001package eu.righettod;
002
003
004import com.auth0.jwt.interfaces.DecodedJWT;
005import org.apache.batik.anim.dom.SAXSVGDocumentFactory;
006import org.apache.batik.util.XMLResourceDescriptor;
007import org.apache.commons.csv.CSVFormat;
008import org.apache.commons.csv.CSVRecord;
009import org.apache.commons.imaging.ImageInfo;
010import org.apache.commons.imaging.Imaging;
011import org.apache.commons.imaging.common.ImageMetadata;
012import org.apache.commons.validator.routines.CreditCardValidator;
013import org.apache.commons.validator.routines.EmailValidator;
014import org.apache.commons.validator.routines.InetAddressValidator;
015import org.apache.pdfbox.Loader;
016import org.apache.pdfbox.pdmodel.PDDocument;
017import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
018import org.apache.pdfbox.pdmodel.PDDocumentInformation;
019import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary;
020import org.apache.pdfbox.pdmodel.common.PDMetadata;
021import org.apache.pdfbox.pdmodel.interactive.action.*;
022import org.apache.pdfbox.pdmodel.interactive.annotation.AnnotationFilter;
023import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
024import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationLink;
025import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
026import org.apache.poi.poifs.filesystem.DirectoryEntry;
027import org.apache.poi.poifs.filesystem.POIFSFileSystem;
028import org.apache.poi.poifs.macros.VBAMacroReader;
029import org.apache.tika.detect.DefaultDetector;
030import org.apache.tika.detect.Detector;
031import org.apache.tika.io.TemporaryResources;
032import org.apache.tika.io.TikaInputStream;
033import org.apache.tika.metadata.Metadata;
034import org.apache.tika.mime.MediaType;
035import org.apache.tika.mime.MimeTypes;
036import org.iban4j.IbanUtil;
037import org.owasp.html.HtmlPolicyBuilder;
038import org.owasp.html.PolicyFactory;
039import org.w3c.dom.Document;
040import org.w3c.dom.svg.SVGDocument;
041import org.xml.sax.EntityResolver;
042import org.xml.sax.InputSource;
043import org.xml.sax.SAXException;
044
045import javax.crypto.Mac;
046import javax.crypto.spec.SecretKeySpec;
047import javax.imageio.ImageIO;
048import javax.json.Json;
049import javax.json.JsonReader;
050import javax.xml.XMLConstants;
051import javax.xml.parsers.DocumentBuilder;
052import javax.xml.parsers.DocumentBuilderFactory;
053import javax.xml.parsers.ParserConfigurationException;
054import javax.xml.stream.XMLInputFactory;
055import javax.xml.stream.XMLStreamReader;
056import javax.xml.stream.events.XMLEvent;
057import javax.xml.validation.Schema;
058import javax.xml.validation.SchemaFactory;
059import java.awt.*;
060import java.awt.image.BufferedImage;
061import java.io.*;
062import java.net.*;
063import java.net.http.HttpClient;
064import java.net.http.HttpRequest;
065import java.net.http.HttpResponse;
066import java.nio.ByteBuffer;
067import java.nio.charset.Charset;
068import java.nio.charset.StandardCharsets;
069import java.nio.file.Files;
070import java.nio.file.Paths;
071import java.security.MessageDigest;
072import java.security.SecureRandom;
073import java.time.Duration;
074import java.time.LocalDate;
075import java.time.YearMonth;
076import java.time.ZoneId;
077import java.util.*;
078import java.util.List;
079import java.util.concurrent.*;
080import java.util.concurrent.atomic.AtomicInteger;
081import java.util.regex.Matcher;
082import java.util.regex.Pattern;
083import java.util.zip.GZIPInputStream;
084import java.util.zip.ZipEntry;
085import java.util.zip.ZipFile;
086
087/**
088 * Provides different utilities methods to apply processing from a security perspective.<br>
089 * These code snippet:
090 * <ul>
091 *     <li>Can be used, as "foundation", to customize the validation to the app context.</li>
092 *     <li>Were implemented in a way to facilitate adding or removal of validations depending on usage context.</li>
093 *     <li>Were centralized on one class to be able to enhance them across time as well as <a href="https://github.com/righettod/code-snippets-security-utils/issues">missing case/bug identification</a>.</li>
094 * </ul>
095 * <br>
096 * <a href="https://github.com/righettod/code-snippets-security-utils">GitHub repository</a>.<br><br>
097 * <a href="https://github.com/righettod/code-snippets-security-utils/blob/main/src/main/java/eu/righettod/SecurityUtils.java">Source code of the class</a>.
098 */
099public class SecurityUtils {
100    /**
101     * Default constructor: Not needed as the class only provides static methods.
102     */
103    private SecurityUtils() {
104    }
105
106    /**
107     * Apply a collection of validation to verify if a provided PIN code is considered weak (easy to guess) or none.<br>
108     * This method consider that format of the PIN code is [0-9]{6,}<br>
109     * Rule to consider a PIN code as weak:
110     * <ul>
111     * <li>Length is inferior to 6 positions.</li>
112     * <li>Contain only the same number or only a sequence of zero.</li>
113     * <li>Contain sequence of following incremental or decremental numbers.</li>
114     * </ul>
115     *
116     * @param pinCode PIN code to verify.
117     * @return True only if the PIN is considered as weak.
118     */
119    public static boolean isWeakPINCode(String pinCode) {
120        boolean isWeak = true;
121        //Length is inferior to 6 positions
122        //Use "Long.parseLong(pinCode)" to cause a NumberFormatException if the PIN is not a numeric one
123        //and to ensure that the PIN is not only a sequence of zero
124        if (pinCode != null && Long.parseLong(pinCode) > 0 && pinCode.trim().length() > 5) {
125            //Contain only the same number
126            String regex = String.format("^[%s]{%s}$", pinCode.charAt(0), pinCode.length());
127            if (!Pattern.matches(regex, pinCode)) {
128                //Contain sequence of following incremental or decremental numbers
129                char previousChar = 'X';
130                boolean containSequence = false;
131                for (char c : pinCode.toCharArray()) {
132                    if (previousChar != 'X') {
133                        int previousNbr = Integer.parseInt(String.valueOf(previousChar));
134                        int currentNbr = Integer.parseInt(String.valueOf(c));
135                        if (currentNbr == (previousNbr - 1) || currentNbr == (previousNbr + 1)) {
136                            containSequence = true;
137                            break;
138                        }
139                    }
140                    previousChar = c;
141                }
142                if (!containSequence) {
143                    isWeak = false;
144                }
145            }
146        }
147        return isWeak;
148    }
149
150    /**
151     * Apply a collection of validations on a Word 97-2003 (binary format) document file provided:
152     * <ul>
153     * <li>Real Microsoft Word 97-2003 document file.</li>
154     * <li>No VBA Macro.<br></li>
155     * <li>No embedded objects.</li>
156     * </ul>
157     *
158     * @param wordFilePath Filename of the Word document file to check.
159     * @return True only if the file pass all validations.
160     * @see "https://poi.apache.org/components/"
161     * @see "https://poi.apache.org/components/document/"
162     * @see "https://poi.apache.org/components/poifs/how-to.html"
163     * @see "https://poi.apache.org/components/poifs/embeded.html"
164     * @see "https://poi.apache.org/"
165     * @see "https://mvnrepository.com/artifact/org.apache.poi/poi"
166     */
167    public static boolean isWord972003DocumentSafe(String wordFilePath) {
168        boolean isSafe = false;
169        try {
170            File wordFile = new File(wordFilePath);
171            if (wordFile.exists() && wordFile.canRead() && wordFile.isFile()) {
172                //Step 1: Try to load the file, if its fail then it imply that is not a valid Word 97-2003 format file
173                try (POIFSFileSystem fs = new POIFSFileSystem(wordFile)) {
174                    //Step 2: Check if the document contains VBA macros, in our case is not allowed
175                    VBAMacroReader macroReader = new VBAMacroReader(fs);
176                    Map<String, String> macros = macroReader.readMacros();
177                    if (macros == null || macros.isEmpty()) {
178                        //Step 3: Check if the document contains any embedded objects, in our case is not allowed
179                        //From POI documentation:
180                        //Word normally stores embedded files in subdirectories of the ObjectPool directory, itself a subdirectory of the filesystem root.
181                        //Typically, these subdirectories and named starting with an underscore, followed by 10 numbers.
182                        final List<String> embeddedObjectFound = new ArrayList<>();
183                        DirectoryEntry root = fs.getRoot();
184                        if (root.getEntryCount() > 0) {
185                            root.iterator().forEachRemaining(entry -> {
186                                if ("ObjectPool".equalsIgnoreCase(entry.getName()) && entry instanceof DirectoryEntry) {
187                                    DirectoryEntry objPoolDirectory = (DirectoryEntry) entry;
188                                    if (objPoolDirectory.getEntryCount() > 0) {
189                                        objPoolDirectory.iterator().forEachRemaining(objPoolDirectoryEntry -> {
190                                            if (objPoolDirectoryEntry instanceof DirectoryEntry) {
191                                                DirectoryEntry objPoolDirectoryEntrySubDirectoryEntry = (DirectoryEntry) objPoolDirectoryEntry;
192                                                if (objPoolDirectoryEntrySubDirectoryEntry.getEntryCount() > 0) {
193                                                    objPoolDirectoryEntrySubDirectoryEntry.forEach(objPoolDirectoryEntrySubDirectoryEntryEntry -> {
194                                                        if (objPoolDirectoryEntrySubDirectoryEntryEntry.isDocumentEntry()) {
195                                                            embeddedObjectFound.add(objPoolDirectoryEntrySubDirectoryEntryEntry.getName());
196                                                        }
197                                                    });
198                                                }
199                                            }
200                                        });
201                                    }
202                                }
203                            });
204                        }
205                        isSafe = embeddedObjectFound.isEmpty();
206                    }
207                }
208            }
209        } catch (Exception e) {
210            isSafe = false;
211        }
212        return isSafe;
213    }
214
215    /**
216     * Ensure that an XML file does not contain any External Entity, DTD or XInclude instructions.
217     *
218     * @param xmlFilePath Filename of the XML file to check.
219     * @return True only if the file pass all validations.
220     * @see "https://portswigger.net/web-security/xxe"
221     * @see "https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html#java"
222     * @see "https://docs.oracle.com/en/java/javase/13/security/java-api-xml-processing-jaxp-security-guide.html#GUID-82F8C206-F2DF-4204-9544-F96155B1D258"
223     * @see "https://www.w3.org/TR/xinclude-11/"
224     * @see "https://en.wikipedia.org/wiki/XInclude"
225     */
226    public static boolean isXMLSafe(String xmlFilePath) {
227        boolean isSafe = false;
228        try {
229            File xmlFile = new File(xmlFilePath);
230            if (xmlFile.exists() && xmlFile.canRead() && xmlFile.isFile()) {
231                //Step 1a: Verify that the XML file content does not contain any XInclude instructions
232                boolean containXInclude = Files.readAllLines(xmlFile.toPath()).stream().anyMatch(line -> line.toLowerCase(Locale.ROOT).contains(":include "));
233                if (!containXInclude) {
234                    //Step 1b: Parse the XML file, if an exception occur than it's imply that the XML specified is not a valid ones
235                    //Create an XML document builder throwing Exception if a DOCTYPE instruction is present
236                    DocumentBuilderFactory dbfInstance = DocumentBuilderFactory.newInstance();
237                    dbfInstance.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
238                    //Xerces 2 only
239                    //dbfInstance.setFeature("http://xerces.apache.org/xerces2-j/features.html#disallow-doctype-decl",true);
240                    dbfInstance.setXIncludeAware(false);
241                    DocumentBuilder builder = dbfInstance.newDocumentBuilder();
242                    //Parse the document
243                    Document doc = builder.parse(xmlFile);
244                    isSafe = (doc != null && doc.getDocumentElement() != null);
245                }
246            }
247        } catch (Exception e) {
248            isSafe = false;
249        }
250        return isSafe;
251    }
252
253
254    /**
255     * Extract all URL links from a PDF file provided.<br>
256     * This can be used to apply validation on a PDF against contained links.
257     *
258     * @param pdfFilePath pdfFilePath Filename of the PDF file to process.
259     * @return A List of URL objects that is empty if no links is found.
260     * @throws Exception If any error occurs during the processing of the PDF file.
261     * @see "https://www.gushiciku.cn/pl/21KQ"
262     * @see "https://pdfbox.apache.org/"
263     * @see "https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox"
264     */
265    public static List<URL> extractAllPDFLinks(String pdfFilePath) throws Exception {
266        final List<URL> links = new ArrayList<>();
267        File pdfFile = new File(pdfFilePath);
268        try (PDDocument document = Loader.loadPDF(pdfFile)) {
269            PDDocumentCatalog documentCatalog = document.getDocumentCatalog();
270            AnnotationFilter actionURIAnnotationFilter = new AnnotationFilter() {
271                @Override
272                public boolean accept(PDAnnotation annotation) {
273                    boolean keep = false;
274                    if (annotation instanceof PDAnnotationLink) {
275                        keep = (((PDAnnotationLink) annotation).getAction() instanceof PDActionURI);
276                    }
277                    return keep;
278                }
279            };
280            documentCatalog.getPages().forEach(page -> {
281                try {
282                    page.getAnnotations(actionURIAnnotationFilter).forEach(annotation -> {
283                        PDActionURI linkAnnotation = (PDActionURI) ((PDAnnotationLink) annotation).getAction();
284                        try {
285                            URL urlObj = new URL(linkAnnotation.getURI());
286                            if (!links.contains(urlObj)) {
287                                links.add(urlObj);
288                            }
289                        } catch (MalformedURLException e) {
290                            throw new RuntimeException(e);
291                        }
292                    });
293                } catch (Exception e) {
294                    throw new RuntimeException(e);
295                }
296            });
297        }
298        return links;
299    }
300
301    /**
302     * Apply a collection of validations on a PDF file provided:
303     * <ul>
304     * <li>Real PDF file.</li>
305     * <li>No attachments.</li>
306     * <li>No Javascript code.</li>
307     * <li>No links using action of type URI/Launch/RemoteGoTo/ImportData.</li>
308     * <li>No XFA forms in order to prevent exposure to XXE/SSRF like CVE-2025-54988.</li>
309     * </ul>
310     *
311     * @param pdfFilePath Filename of the PDF file to check.
312     * @return True only if the file pass all validations.
313     * @see "https://stackoverflow.com/a/36161267"
314     * @see "https://www.gushiciku.cn/pl/21KQ"
315     * @see "https://github.com/jonaslejon/malicious-pdf"
316     * @see "https://pdfbox.apache.org/"
317     * @see "https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox"
318     * @see "https://nvd.nist.gov/vuln/detail/CVE-2025-54988"
319     * @see "https://github.com/mgthuramoemyint/POC-CVE-2025-54988"
320     * @see "https://en.wikipedia.org/wiki/XFA"
321     */
322    public static boolean isPDFSafe(String pdfFilePath) {
323        boolean isSafe = false;
324        try {
325            File pdfFile = new File(pdfFilePath);
326            if (pdfFile.exists() && pdfFile.canRead() && pdfFile.isFile()) {
327                //Step 1: Try to load the file, if its fail then it imply that is not a valid PDF file
328                try (PDDocument document = Loader.loadPDF(pdfFile)) {
329                    //Step 2: Check if the file contains attached files, in our case is not allowed
330                    PDDocumentCatalog documentCatalog = document.getDocumentCatalog();
331                    PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary(documentCatalog);
332                    if (namesDictionary.getEmbeddedFiles() == null) {
333                        //Step 3: Check if the file contains any XFA forms
334                        PDAcroForm acroForm = documentCatalog.getAcroForm();
335                        boolean hasForm = (acroForm != null && acroForm.getXFA() != null);
336                        if (!hasForm) {
337                            //Step 4: Check if the file contains Javascript code, in our case is not allowed
338                            if (namesDictionary.getJavaScript() == null) {
339                                //Step 5: Check if the file contains links using action of type URI/Launch/RemoteGoTo/ImportData, in our case is not allowed
340                                final List<Integer> notAllowedAnnotationCounterList = new ArrayList<>();
341                                AnnotationFilter notAllowedAnnotationFilter = new AnnotationFilter() {
342                                    @Override
343                                    public boolean accept(PDAnnotation annotation) {
344                                        boolean keep = false;
345                                        if (annotation instanceof PDAnnotationLink) {
346                                            PDAnnotationLink link = (PDAnnotationLink) annotation;
347                                            PDAction action = link.getAction();
348                                            if ((action instanceof PDActionURI) || (action instanceof PDActionLaunch) || (action instanceof PDActionRemoteGoTo) || (action instanceof PDActionImportData)) {
349                                                keep = true;
350                                            }
351                                        }
352                                        return keep;
353                                    }
354                                };
355                                documentCatalog.getPages().forEach(page -> {
356                                    try {
357                                        notAllowedAnnotationCounterList.add(page.getAnnotations(notAllowedAnnotationFilter).size());
358                                    } catch (IOException e) {
359                                        throw new RuntimeException(e);
360                                    }
361                                });
362                                if (notAllowedAnnotationCounterList.stream().reduce(0, Integer::sum) == 0) {
363                                    isSafe = true;
364                                }
365                            }
366                        }
367                    }
368                }
369            }
370        } catch (Exception e) {
371            isSafe = false;
372        }
373        return isSafe;
374    }
375
376    /**
377     * Remove as much as possible metadata from the provided PDF document object.
378     *
379     * @param document PDFBox PDF document object on which metadata must be removed.
380     * @see "https://gist.github.com/righettod/d7e07443c43d393a39de741a0d920069"
381     * @see "https://pdfbox.apache.org/"
382     * @see "https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox"
383     */
384    public static void clearPDFMetadata(PDDocument document) {
385        if (document != null) {
386            PDDocumentInformation infoEmpty = new PDDocumentInformation();
387            document.setDocumentInformation(infoEmpty);
388            PDMetadata newMetadataEmpty = new PDMetadata(document);
389            document.getDocumentCatalog().setMetadata(newMetadataEmpty);
390        }
391    }
392
393
394    /**
395     * Validate that the URL provided is really a relative URL.
396     *
397     * @param targetUrl URL to validate.
398     * @return True only if the file pass all validations.
399     * @see "https://portswigger.net/web-security/ssrf"
400     * @see "https://stackoverflow.com/q/6785442"
401     */
402    public static boolean isRelativeURL(String targetUrl) {
403        boolean isValid = false;
404        String work = targetUrl;
405        Pattern startingPrefix = Pattern.compile("^[/a-zA-Z0-9\\-_].*");
406        //Reject any URL no starting with a slash, letter, number, dash, or underscore
407        if (startingPrefix.matcher(work).find()) {
408            //Reject any URL encoded content and URL starting with a double slash
409            if (!work.startsWith("//") && !work.contains("%")) {
410                //Try to create en URI object
411                try {
412                    URI u = new URI(work);
413                    //Scheme must be null
414                    if (u.getScheme() == null) {
415                        isValid = (!u.isAbsolute());
416                    }
417                } catch (URISyntaxException mf) {
418                    isValid = false;
419                }
420            }
421        }
422
423        return isValid;
424    }
425
426    /**
427     * Apply a collection of validations on a ZIP file provided:
428     * <ul>
429     * <li>Real ZIP file.</li>
430     * <li>Contain less than a specified level of deepness.</li>
431     * <li>Do not contain Zip-Slip entry path.</li>
432     * </ul>
433     *
434     * @param zipFilePath       Filename of the ZIP file to check.
435     * @param maxLevelDeepness  Threshold of deepness above which a ZIP archive will be rejected.
436     * @param rejectArchiveFile Flag to specify if presence of any archive entry will cause the rejection of the ZIP file.
437     * @return True only if the file pass all validations.
438     * @see "https://rules.sonarsource.com/java/type/Security%20Hotspot/RSPEC-5042"
439     * @see "https://security.snyk.io/research/zip-slip-vulnerability"
440     * @see "https://en.wikipedia.org/wiki/Zip_bomb"
441     * @see "https://github.com/ptoomey3/evilarc"
442     * @see "https://github.com/abdulfatir/ZipBomb"
443     * @see "https://www.baeldung.com/cs/zip-bomb"
444     * @see "https://thesecurityvault.com/attacks-with-zip-files-and-mitigations/"
445     * @see "https://wiki.sei.cmu.edu/confluence/display/java/IDS04-J.+Safely+extract+files+from+ZipInputStream"
446     */
447    public static boolean isZIPSafe(String zipFilePath, int maxLevelDeepness, boolean rejectArchiveFile) {
448        List<String> archiveExtensions = Arrays.asList("zip", "tar", "7z", "gz", "jar", "phar", "bz2", "tgz");
449        boolean isSafe = false;
450        try {
451            File zipFile = new File(zipFilePath);
452            if (zipFile.exists() && zipFile.canRead() && zipFile.isFile() && maxLevelDeepness > 0) {
453                //Step 1: Try to load the file, if its fail then it imply that is not a valid ZIP file
454                try (ZipFile zipArch = new ZipFile(zipFile)) {
455                    //Step 2: Parse entries
456                    long deepness = 0;
457                    ZipEntry zipEntry;
458                    String entryExtension;
459                    String zipEntryName;
460                    boolean validationsFailed = false;
461                    Enumeration<? extends ZipEntry> entries = zipArch.entries();
462                    while (entries.hasMoreElements()) {
463                        zipEntry = entries.nextElement();
464                        zipEntryName = zipEntry.getName();
465                        entryExtension = zipEntryName.substring(zipEntryName.lastIndexOf(".") + 1).toLowerCase(Locale.ROOT).trim();
466                        //Step 2a: Check if the current entry is an archive file
467                        if (rejectArchiveFile && archiveExtensions.contains(entryExtension)) {
468                            validationsFailed = true;
469                            break;
470                        }
471                        //Step 2b: Check that level of deepness is inferior to the threshold specified
472                        if (zipEntryName.contains("/")) {
473                            //Determine deepness by inspecting the entry name.
474                            //Indeed, folder will be represented like this: folder/folder/folder/
475                            //So we can count the number of "/" to identify the deepness of the entry
476                            deepness = zipEntryName.chars().filter(ch -> ch == '/').count();
477                            if (deepness > maxLevelDeepness) {
478                                validationsFailed = true;
479                                break;
480                            }
481                        }
482                        //Step 2c: Check if any entries match pattern of zip slip payload
483                        if (zipEntryName.contains("..\\") || zipEntryName.contains("../")) {
484                            validationsFailed = true;
485                            break;
486                        }
487                    }
488                    if (!validationsFailed) {
489                        isSafe = true;
490                    }
491                }
492            }
493        } catch (Exception e) {
494            isSafe = false;
495        }
496        return isSafe;
497    }
498
499    /**
500     * Identify the mime type of the content specified (array of bytes).<br>
501     * Note that it cannot be fully trusted (see the tweet '1595824709186519041' referenced), so, additional validations are required.
502     *
503     * @param content The content as an array of bytes.
504     * @return The mime type in lower case or null if it cannot be identified.
505     * @see "https://twitter.com/righettod/status/1595824709186519041"
506     * @see "https://tika.apache.org/"
507     * @see "https://mvnrepository.com/artifact/org.apache.tika/tika-core"
508     * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types"
509     * @see "https://www.iana.org/assignments/media-types/media-types.xhtml"
510     */
511    public static String identifyMimeType(byte[] content) {
512        String mimeType = null;
513        if (content != null && content.length > 0) {
514            Detector detector = new DefaultDetector(MimeTypes.getDefaultMimeTypes());
515            Metadata metadata = new Metadata();
516            try {
517                try (TemporaryResources temporaryResources = new TemporaryResources(); TikaInputStream tikaInputStream = TikaInputStream.get(new ByteArrayInputStream(content), temporaryResources, metadata)) {
518                    MediaType mt = detector.detect(tikaInputStream, metadata);
519                    if (mt != null) {
520                        mimeType = mt.toString().toLowerCase(Locale.ROOT);
521                    }
522                }
523            } catch (IOException ioe) {
524                mimeType = null;
525            }
526        }
527        return mimeType;
528    }
529
530    /**
531     * Apply a collection of validations on a string expected to be an public IP address:
532     * <ul>
533     * <li>Is a valid IP v4 or v6 address.</li>
534     * <li>Is public from an Internet perspective.</li>
535     * </ul>
536     * <br>
537     * <b>Note:</b> I often see missing such validation in the value read from HTTP request headers like "X-Forwarded-For" or "Forwarded".
538     * <br><br>
539     * <b>Note for IPv6:</b> I used documentation found so it is really experimental!
540     *
541     * @param ip String expected to be a valid IP address.
542     * @return True only if the string pass all validations.
543     * @see "https://commons.apache.org/proper/commons-validator/"
544     * @see "https://commons.apache.org/proper/commons-validator/apidocs/org/apache/commons/validator/routines/InetAddressValidator.html"
545     * @see "https://cheatsheetseries.owasp.org/cheatsheets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet.html"
546     * @see "https://cheatsheetseries.owasp.org/assets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet_Orange_Tsai_Talk.pdf"
547     * @see "https://cheatsheetseries.owasp.org/assets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet_SSRF_Bible.pdf"
548     * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For"
549     * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Forwarded"
550     * @see "https://ipcisco.com/lesson/ipv6-address/"
551     * @see "https://www.juniper.net/documentation/us/en/software/junos/interfaces-security-devices/topics/topic-map/security-interface-ipv4-ipv6-protocol.html"
552     * @see "https://docs.oracle.com/en/java/javase/21/docs/api/java.base/java/net/InetAddress.html#getByName(java.lang.String)"
553     * @see "https://www.arin.net/reference/research/statistics/address_filters/"
554     * @see "https://en.wikipedia.org/wiki/Multicast_address"
555     * @see "https://stackoverflow.com/a/5619409"
556     * @see "https://www.ripe.net/media/documents/ipv6-address-types.pdf"
557     * @see "https://www.iana.org/assignments/ipv6-unicast-address-assignments/ipv6-unicast-address-assignments.xhtml"
558     * @see "https://developer.android.com/reference/java/net/Inet6Address"
559     * @see "https://en.wikipedia.org/wiki/Unique_local_address"
560     */
561    public static boolean isPublicIPAddress(String ip) {
562        boolean isValid = false;
563        try {
564            //Quick validation on the string itself based on characters used to compose an IP v4/v6 address
565            if (Pattern.matches("[0-9a-fA-F:.]+", ip)) {
566                //If OK then use the dedicated InetAddressValidator from Apache Commons Validator
567                if (InetAddressValidator.getInstance().isValid(ip)) {
568                    //If OK then validate that is an public IP address
569                    //From Javadoc for "InetAddress.getByName": If a literal IP address is supplied, only the validity of the address format is checked.
570                    InetAddress addr = InetAddress.getByName(ip);
571                    isValid = (!addr.isAnyLocalAddress() && !addr.isLinkLocalAddress() && !addr.isLoopbackAddress() && !addr.isMulticastAddress() && !addr.isSiteLocalAddress());
572                    //If OK and the IP is an V6 one then make additional validation because the built-in Java API will let pass some V6 IP
573                    //For the prefix map, the start of the key indicates if the value is a regex or a string
574                    if (isValid && (addr instanceof Inet6Address)) {
575                        Map<String, String> prefixes = new HashMap<>();
576                        prefixes.put("REGEX_LOOPBACK", "^(0|:)+1$");
577                        prefixes.put("REGEX_UNIQUE-LOCAL-ADDRESSES", "^f(c|d)[a-f0-9]{2}:.*$");
578                        prefixes.put("STRING_LINK-LOCAL-ADDRESSES", "fe80:");
579                        prefixes.put("REGEX_TEREDO", "^2001:[0]*:.*$");
580                        prefixes.put("REGEX_BENCHMARKING", "^2001:[0]*2:.*$");
581                        prefixes.put("REGEX_ORCHID", "^2001:[0]*10:.*$");
582                        prefixes.put("STRING_DOCUMENTATION", "2001:db8:");
583                        prefixes.put("STRING_GLOBAL-UNICAST", "2000:");
584                        prefixes.put("REGEX_MULTICAST", "^ff[0-9]{2}:.*$");
585                        final List<Boolean> results = new ArrayList<>();
586                        final String ipLower = ip.trim().toLowerCase(Locale.ROOT);
587                        prefixes.forEach((addressType, expr) -> {
588                            String exprLower = expr.trim().toLowerCase();
589                            if (addressType.startsWith("STRING_")) {
590                                results.add(ipLower.startsWith(exprLower));
591                            } else {
592                                results.add(Pattern.matches(exprLower, ipLower));
593                            }
594                        });
595                        isValid = ((results.size() == prefixes.size()) && !results.contains(Boolean.TRUE));
596                    }
597                }
598            }
599        } catch (Exception e) {
600            isValid = false;
601        }
602        return isValid;
603    }
604
605    /**
606     * Compute a SHA256 hash from an input composed of a collection of strings.<br><br>
607     * This method take care to build the source string in a way to prevent this source string to be prone to abuse targeting the different parts composing it.<br><br>
608     * <p>
609     * Example of possible abuse without precautions applied during the hash calculation logic:<br>
610     * Hash of <code>SHA256("Hello", "My", "World!!!")</code> will be equals to the hash of <code>SHA256("Hell", "oMyW", "orld!!!")</code>.<br>
611     * </p>
612     * This method ensure that both hash above will be different.<br><br>
613     *
614     * <b>Note:</b> The character <code>|</code> is used, as separator, of every parts so a part is not allowed to contains this character.
615     *
616     * @param parts Ordered list of strings to use to build the input string for which the hash must be computed on. No null value is accepted on object composing the collection.
617     * @return The hash, as an array of bytes, to allow caller to convert it to the final representation wanted (HEX, Base64, etc.). If the collection passed is null or empty then the method return null.
618     * @throws Exception If any exception occurs
619     * @see "https://github.com/righettod/code-snippets-security-utils/issues/16"
620     * @see "https://pentesterlab.com/badges/codereview"
621     * @see "https://blog.trailofbits.com/2024/08/21/yolo-is-not-a-valid-hash-construction/"
622     * @see "https://www.nist.gov/publications/sha-3-derived-functions-cshake-kmac-tuplehash-and-parallelhash"
623     */
624    public static byte[] computeHashNoProneToAbuseOnParts(List<String> parts) throws Exception {
625        byte[] hash = null;
626        String separator = "|";
627        if (parts != null && !parts.isEmpty()) {
628            //Ensure that not part is null
629            if (parts.stream().anyMatch(Objects::isNull)) {
630                throw new IllegalArgumentException("No part must be null!");
631            }
632            //Ensure that the separator is absent from every part
633            if (parts.stream().anyMatch(part -> part.contains(separator))) {
634                throw new IllegalArgumentException(String.format("The character '%s', used as parts separator, must be absent from every parts!", separator));
635            }
636            MessageDigest digest = MessageDigest.getInstance("SHA-256");
637            final StringBuilder buffer = new StringBuilder(separator);
638            parts.forEach(p -> {
639                buffer.append(p).append(separator);
640            });
641            hash = digest.digest(buffer.toString().getBytes(StandardCharsets.UTF_8));
642        }
643        return hash;
644    }
645
646    /**
647     * Ensure that an XML file only uses DTD/XSD references (called System Identifier) present in the allowed list provided.<br><br>
648     * The code is based on the validation implemented into the OpenJDK 21, by the class <b><a href="https://github.com/openjdk/jdk/blob/jdk-21%2B35/src/java.prefs/share/classes/java/util/prefs/XmlSupport.java">java.util.prefs.XmlSupport</a></b>, in the method <b><a href="https://github.com/openjdk/jdk/blob/jdk-21%2B35/src/java.prefs/share/classes/java/util/prefs/XmlSupport.java#L240">loadPrefsDoc()</a></b>.<br><br>
649     * The method also ensure that no Public Identifier is used to prevent potential bypasses of the validations.
650     *
651     * @param xmlFilePath              Filename of the XML file to check.
652     * @param allowedSystemIdentifiers List of URL allowed for System Identifier specified for any XSD/DTD references.
653     * @return True only if the file pass all validations.
654     * @see "https://www.w3schools.com/xml/prop_documenttype_systemid.asp"
655     * @see "https://www.ibm.com/docs/en/integration-bus/9.0.0?topic=doctypedecl-xml-systemid"
656     * @see "https://www.liquid-technologies.com/Reference/Glossary/XML_DocType.html"
657     * @see "https://www.xml.com/pub/98/08/xmlqna0.html"
658     * @see "https://github.com/openjdk/jdk/blob/jdk-21%2B35/src/java.prefs/share/classes/java/util/prefs/XmlSupport.java#L397"
659     * @see "https://en.wikipedia.org/wiki/Formal_Public_Identifier"
660     */
661    public static boolean isXMLOnlyUseAllowedXSDorDTD(String xmlFilePath, final List<String> allowedSystemIdentifiers) {
662        boolean isSafe = false;
663        final String errorTemplate = "Non allowed %s ID detected!";
664        final String emptyFakeDTD = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><!ELEMENT dummy EMPTY>";
665        final String emptyFakeXSD = "<xs:schema xmlns:xs=\"http://www.w3.org/2001/XMLSchema\"> <xs:element name=\"dummy\"/></xs:schema>";
666
667        if (allowedSystemIdentifiers == null || allowedSystemIdentifiers.isEmpty()) {
668            throw new IllegalArgumentException("At least one SID must be specified!");
669        }
670        File xmlFile = new File(xmlFilePath);
671        if (xmlFile.exists() && xmlFile.canRead() && xmlFile.isFile()) {
672            try {
673                EntityResolver resolverValidator = (publicId, systemId) -> {
674                    if (publicId != null) {
675                        throw new SAXException(String.format(errorTemplate, "PUBLIC"));
676                    }
677                    if (!allowedSystemIdentifiers.contains(systemId)) {
678                        throw new SAXException(String.format(errorTemplate, "SYSTEM"));
679                    }
680                    //If it is OK then return a empty DTD/XSD
681                    return new InputSource(new StringReader(systemId.toLowerCase().endsWith(".dtd") ? emptyFakeDTD : emptyFakeXSD));
682                };
683                DocumentBuilderFactory dbfInstance = DocumentBuilderFactory.newInstance();
684                dbfInstance.setIgnoringElementContentWhitespace(true);
685                dbfInstance.setXIncludeAware(false);
686                dbfInstance.setValidating(false);
687                dbfInstance.setCoalescing(true);
688                dbfInstance.setIgnoringComments(false);
689                DocumentBuilder builder = dbfInstance.newDocumentBuilder();
690                builder.setEntityResolver(resolverValidator);
691                Document doc = builder.parse(xmlFile);
692                isSafe = (doc != null);
693            } catch (SAXException | IOException | ParserConfigurationException e) {
694                isSafe = false;
695            }
696        }
697
698        return isSafe;
699    }
700
701    /**
702     * Apply a collection of validations on a EXCEL CSV file provided (file was expected to be opened in Microsoft EXCEL):
703     * <ul>
704     * <li>Real CSV file.</li>
705     * <li>Do not contains any payload related to a CSV injections.</li>
706     * </ul>
707     * Ensure that, if Apache Commons CSV does not find any record then, the file will be considered as NOT safe (prevent potential bypasses).<br><br>
708     * <b>Note:</b> Record delimiter used is the <code>,</code> (comma) character. See the Apache Commons CSV reference provided for EXCEL.<br>
709     *
710     * @param csvFilePath Filename of the CSV file to check.
711     * @return True only if the file pass all validations.
712     * @see "https://commons.apache.org/proper/commons-csv/"
713     * @see "https://commons.apache.org/proper/commons-csv/apidocs/org/apache/commons/csv/CSVFormat.html#EXCEL"
714     * @see "https://www.we45.com/post/your-excel-sheets-are-not-safe-heres-how-to-beat-csv-injection"
715     * @see "https://www.whiteoaksecurity.com/blog/2020-4-23-csv-injection-whats-the-risk/"
716     * @see "https://book.hacktricks.xyz/pentesting-web/formula-csv-doc-latex-ghostscript-injection"
717     * @see "https://owasp.org/www-community/attacks/CSV_Injection"
718     * @see "https://payatu.com/blog/csv-injection-basic-to-exploit/"
719     * @see "https://cwe.mitre.org/data/definitions/1236.html"
720     */
721    public static boolean isExcelCSVSafe(String csvFilePath) {
722        boolean isSafe;
723        final AtomicInteger recordCount = new AtomicInteger();
724        final List<Character> payloadDetectionCharacters = List.of('=', '+', '@', '-', '\r', '\t');
725
726        try {
727            final List<String> payloadsIdentified = new ArrayList<>();
728            try (Reader in = new FileReader(csvFilePath)) {
729                Iterable<CSVRecord> records = CSVFormat.EXCEL.parse(in);
730                records.forEach(record -> {
731                    record.forEach(recordValue -> {
732                        if (recordValue != null && !recordValue.trim().isEmpty() && payloadDetectionCharacters.contains(recordValue.trim().charAt(0))) {
733                            payloadsIdentified.add(recordValue);
734                        }
735                        recordCount.getAndIncrement();
736                    });
737                });
738            }
739            isSafe = (payloadsIdentified.isEmpty() && recordCount.get() > 0);
740        } catch (Exception e) {
741            isSafe = false;
742        }
743
744        return isSafe;
745    }
746
747    /**
748     * Provide a way to add an integrity marker (<a href="https://en.wikipedia.org/wiki/HMAC">HMAC</a>) to a serialized object serialized using the <a href="https://www.baeldung.com/java-serialization">java native system</a> (binary).<br>
749     * The goal is to provide <b>a temporary workaround</b> to try to prevent deserialization attacks and give time to move to a text-based serialization approach.
750     *
751     * @param processingModeType Define the mode of processing i.e. protect or validate. ({@link ProcessingModeType})
752     * @param input              When the processing mode is "protect" than the expected input (string) is a java serialized object encoded in Base64 otherwise (processing mode is "validate") expected input is the output of this method when the "protect" mode was used.
753     * @param secret             Secret to use to compute the SHA256 HMAC.
754     * @return A map with the following keys: <ul><li><b>PROCESSING_MODE</b>: Processing mode used to compute the result.</li><li><b>STATUS</b>: A boolean indicating if the processing was successful or not.</li><li><b>RESULT</b>: Always contains a string representing the protected serialized object in the format <code>[SERIALIZED_OBJECT_BASE64_ENCODED]:[SERIALIZED_OBJECT_HMAC_BASE64_ENCODED]</code>.</li></ul>
755     * @throws Exception If any exception occurs.
756     * @see "https://cheatsheetseries.owasp.org/cheatsheets/Deserialization_Cheat_Sheet.html"
757     * @see "https://owasp.org/www-project-top-ten/2017/A8_2017-Insecure_Deserialization"
758     * @see "https://portswigger.net/web-security/deserialization"
759     * @see "https://www.baeldung.com/java-serialization-approaches"
760     * @see "https://www.baeldung.com/java-serialization"
761     * @see "https://cryptobook.nakov.com/mac-and-key-derivation/hmac-and-key-derivation"
762     * @see "https://en.wikipedia.org/wiki/HMAC"
763     * @see "https://smattme.com/posts/how-to-generate-hmac-signature-in-java/"
764     */
765    public static Map<String, Object> ensureSerializedObjectIntegrity(ProcessingModeType processingModeType, String input, byte[] secret) throws Exception {
766        Map<String, Object> results;
767        String resultFormatTemplate = "%s:%s";
768        //Verify input provided to be consistent
769        if (processingModeType == null) {
770            throw new IllegalArgumentException("The processing mode is mandatory!");
771        }
772        if (input == null || input.trim().isEmpty()) {
773            throw new IllegalArgumentException("Input data is mandatory!");
774        }
775        if (secret == null || secret.length == 0) {
776            throw new IllegalArgumentException("The HMAC secret is mandatory!");
777        }
778        if (processingModeType.equals(ProcessingModeType.VALIDATE) && input.split(":").length != 2) {
779            throw new IllegalArgumentException("Input data provided is invalid for the processing mode specified!");
780        }
781        //Processing
782        Base64.Decoder b64Decoder = Base64.getDecoder();
783        Base64.Encoder b64Encoder = Base64.getEncoder();
784        String hmacAlgorithm = "HmacSHA256";
785        Mac mac = Mac.getInstance(hmacAlgorithm);
786        SecretKeySpec key = new SecretKeySpec(secret, hmacAlgorithm);
787        mac.init(key);
788        results = new HashMap<>();
789        results.put("PROCESSING_MODE", processingModeType.toString());
790        switch (processingModeType) {
791            case PROTECT -> {
792                byte[] objectBytes = b64Decoder.decode(input);
793                byte[] hmac = mac.doFinal(objectBytes);
794                String encodedHmac = b64Encoder.encodeToString(hmac);
795                results.put("STATUS", Boolean.TRUE);
796                results.put("RESULT", String.format(resultFormatTemplate, input, encodedHmac));
797            }
798            case VALIDATE -> {
799                String[] parts = input.split(":");
800                byte[] objectBytes = b64Decoder.decode(parts[0].trim());
801                byte[] hmacProvided = b64Decoder.decode(parts[1].trim());
802                byte[] hmacComputed = mac.doFinal(objectBytes);
803                String encodedHmacComputed = b64Encoder.encodeToString(hmacComputed);
804                Boolean hmacIsValid = Arrays.equals(hmacProvided, hmacComputed);
805                results.put("STATUS", hmacIsValid);
806                results.put("RESULT", String.format(resultFormatTemplate, parts[0].trim(), encodedHmacComputed));
807            }
808            default -> throw new IllegalArgumentException("Not supported processing mode!");
809        }
810        return results;
811    }
812
813    /**
814     * Apply a collection of validations on a JSON string provided:
815     * <ul>
816     * <li>Real JSON structure.</li>
817     * <li>Contain less than a specified number of deepness for nested objects or arrays.</li>
818     * <li>Contain less than a specified number of items in any arrays.</li>
819     * </ul>
820     * <br>
821     * <b>Note:</b> I decided to use a parsing approach using only string processing to prevent any StackOverFlow or OutOfMemory error that can be abused.<br><br>
822     * I used the following assumption:
823     * <ul>
824     *      <li>The character <code>{</code> identify the beginning of an object.</li>
825     *      <li>The character <code>}</code> identify the end of an object.</li>
826     *      <li>The character <code>[</code> identify the beginning of an array.</li>
827     *      <li>The character <code>]</code> identify the end of an array.</li>
828     *      <li>The character <code>"</code> identify the delimiter of a string.</li>
829     *      <li>The character sequence <code>\"</code> identify the escaping of an double quote.</li>
830     * </ul>
831     *
832     * @param json                  String containing the JSON data to validate.
833     * @param maxItemsByArraysCount Maximum number of items allowed in an array.
834     * @param maxDeepnessAllowed    Maximum number nested objects or arrays allowed.
835     * @return True only if the string pass all validations.
836     * @see "https://javaee.github.io/jsonp/"
837     * @see "https://community.f5.com/discussions/technicalforum/disable-buffer-overflow-in-json-parameters/124306"
838     * @see "https://github.com/InductiveComputerScience/pbJson/issues/2"
839     */
840    public static boolean isJSONSafe(String json, int maxItemsByArraysCount, int maxDeepnessAllowed) {
841        boolean isSafe = false;
842
843        try {
844            //Step 1: Analyse the JSON string
845            int currentDeepness = 0;
846            int currentArrayItemsCount = 0;
847            int maxDeepnessReached = 0;
848            int maxArrayItemsCountReached = 0;
849            boolean currentlyInArray = false;
850            boolean currentlyInString = false;
851            int currentNestedArrayLevel = 0;
852            String jsonEscapedDoubleQuote = "\\\"";//Escaped double quote must not be considered as a string delimiter
853            String work = json.replace(jsonEscapedDoubleQuote, "'");
854            for (char c : work.toCharArray()) {
855                switch (c) {
856                    case '{': {
857                        if (!currentlyInString) {
858                            currentDeepness++;
859                        }
860                        break;
861                    }
862                    case '}': {
863                        if (!currentlyInString) {
864                            currentDeepness--;
865                        }
866                        break;
867                    }
868                    case '[': {
869                        if (!currentlyInString) {
870                            currentDeepness++;
871                            if (currentlyInArray) {
872                                currentNestedArrayLevel++;
873                            }
874                            currentlyInArray = true;
875                        }
876                        break;
877                    }
878                    case ']': {
879                        if (!currentlyInString) {
880                            currentDeepness--;
881                            currentArrayItemsCount = 0;
882                            if (currentNestedArrayLevel > 0) {
883                                currentNestedArrayLevel--;
884                            }
885                            if (currentNestedArrayLevel == 0) {
886                                currentlyInArray = false;
887                            }
888                        }
889                        break;
890                    }
891                    case '"': {
892                        currentlyInString = !currentlyInString;
893                        break;
894                    }
895                    case ',': {
896                        if (!currentlyInString && currentlyInArray) {
897                            currentArrayItemsCount++;
898                        }
899                        break;
900                    }
901                }
902                if (currentDeepness > maxDeepnessReached) {
903                    maxDeepnessReached = currentDeepness;
904                }
905                if (currentArrayItemsCount > maxArrayItemsCountReached) {
906                    maxArrayItemsCountReached = currentArrayItemsCount;
907                }
908            }
909            //Step 2: Apply validation against the value specified as limits
910            isSafe = ((maxItemsByArraysCount > maxArrayItemsCountReached) && (maxDeepnessAllowed > maxDeepnessReached));
911
912            //Step 3: If the content is safe then ensure that it is valid JSON structure using the "Java API for JSON Processing" (JSR 374) parser reference implementation.
913            if (isSafe) {
914                JsonReader reader = Json.createReader(new StringReader(json));
915                isSafe = (reader.read() != null);
916            }
917
918        } catch (Exception e) {
919            isSafe = false;
920        }
921        return isSafe;
922    }
923
924    /**
925     * Apply a collection of validations on a image file provided:
926     * <ul>
927     * <li>Real image file.</li>
928     * <li>Its mime type is into the list of allowed mime types.</li>
929     * <li>Its metadata fields do not contains any characters related to a malicious payloads.</li>
930     * </ul>
931     * <br>
932     * <b>Important note:</b> This implementation is prone to bypass using the "<b>raw insertion</b>" method documented in the <a href="https://www.synacktiv.com/en/publications/persistent-php-payloads-in-pngs-how-to-inject-php-code-in-an-image-and-keep-it-there">blog post</a> from the Synacktiv team.
933     * To handle such case, it is recommended to resize the image to remove any non image-related content, see <a href="https://github.com/righettod/document-upload-protection/blob/master/src/main/java/eu/righettod/poc/sanitizer/ImageDocumentSanitizerImpl.java#L54">here</a> for an example.<br>
934     *
935     * @param imageFilePath         Filename of the image file to check.
936     * @param imageAllowedMimeTypes List of image mime types allowed.
937     * @return True only if the file pass all validations.
938     * @see "https://commons.apache.org/proper/commons-imaging/"
939     * @see "https://commons.apache.org/proper/commons-imaging/formatsupport.html"
940     * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types/Common_types"
941     * @see "https://www.iana.org/assignments/media-types/media-types.xhtml#image"
942     * @see "https://www.synacktiv.com/en/publications/persistent-php-payloads-in-pngs-how-to-inject-php-code-in-an-image-and-keep-it-there"
943     * @see "https://cheatsheetseries.owasp.org/cheatsheets/File_Upload_Cheat_Sheet.html"
944     * @see "https://github.com/righettod/document-upload-protection/blob/master/src/main/java/eu/righettod/poc/sanitizer/ImageDocumentSanitizerImpl.java"
945     * @see "https://exiftool.org/examples.html"
946     * @see "https://en.wikipedia.org/wiki/List_of_file_signatures"
947     * @see "https://hexed.it/"
948     * @see "https://github.com/sighook/pixload"
949     */
950    public static boolean isImageSafe(String imageFilePath, List<String> imageAllowedMimeTypes) {
951        boolean isSafe = false;
952        Pattern payloadDetectionRegex = Pattern.compile("[<>${}`]+", Pattern.CASE_INSENSITIVE);
953        try {
954            File imgFile = new File(imageFilePath);
955            if (imgFile.exists() && imgFile.canRead() && imgFile.isFile() && !imageAllowedMimeTypes.isEmpty()) {
956                final byte[] imgBytes = Files.readAllBytes(imgFile.toPath());
957                //Step 1: Check the mime type of the file against the allowed ones
958                ImageInfo imgInfo = Imaging.getImageInfo(imgBytes);
959                if (imageAllowedMimeTypes.contains(imgInfo.getMimeType())) {
960                    //Step 2: Load the image into an object using the Image API
961                    BufferedImage imgObject = Imaging.getBufferedImage(imgBytes);
962                    if (imgObject != null && imgObject.getWidth() > 0 && imgObject.getHeight() > 0) {
963                        //Step 3: Check the metadata if the image format support it - Highly experimental
964                        List<String> metadataWithPayloads = new ArrayList<>();
965                        final ImageMetadata imgMetadata = Imaging.getMetadata(imgBytes);
966                        if (imgMetadata != null) {
967                            imgMetadata.getItems().forEach(item -> {
968                                String metadata = item.toString();
969                                if (payloadDetectionRegex.matcher(metadata).find()) {
970                                    metadataWithPayloads.add(metadata);
971                                }
972                            });
973                        }
974                        isSafe = metadataWithPayloads.isEmpty();
975                    }
976                }
977            }
978        } catch (Exception e) {
979            isSafe = false;
980        }
981        return isSafe;
982    }
983
984    /**
985     * Rewrite the input file to remove any embedded files that is not embedded using a methods supported by the official format of the file.<br>
986     * Example: a file can be embedded by adding it to the end of the source file, see the reference provided for details.
987     *
988     * @param inputFilePath Filename of the file to clean up.
989     * @param inputFileType Type of the file provided.
990     * @return A array of bytes with the cleaned file.
991     * @throws IllegalArgumentException If an invalid parameter is passed
992     * @throws Exception                If any technical error during the cleaning processing
993     * @see "https://www.synacktiv.com/en/publications/persistent-php-payloads-in-pngs-how-to-inject-php-code-in-an-image-and-keep-it-there"
994     * @see "https://github.com/righettod/toolbox-pentest-web/tree/master/misc"
995     * @see "https://github.com/righettod/toolbox-pentest-web?tab=readme-ov-file#misc"
996     * @see "https://stackoverflow.com/a/13605411"
997     */
998    public static byte[] sanitizeFile(String inputFilePath, InputFileType inputFileType) throws Exception {
999        ByteArrayOutputStream sanitizedContent = new ByteArrayOutputStream();
1000        File inputFile = new File(inputFilePath);
1001        if (!inputFile.exists() || !inputFile.canRead() || !inputFile.isFile()) {
1002            throw new IllegalArgumentException("Cannot read the content of the input file!");
1003        }
1004        switch (inputFileType) {
1005            case PDF -> {
1006                try (PDDocument document = Loader.loadPDF(inputFile)) {
1007                    document.save(sanitizedContent);
1008                }
1009            }
1010            case IMAGE -> {
1011                // Load the original image
1012                BufferedImage originalImage = ImageIO.read(inputFile);
1013                String originalFormat = identifyMimeType(Files.readAllBytes(inputFile.toPath())).split("/")[1].trim();
1014                // Check that image has been successfully loaded
1015                if (originalImage == null) {
1016                    throw new IOException("Cannot load the original image !");
1017                }
1018                // Get current Width and Height of the image
1019                int originalWidth = originalImage.getWidth(null);
1020                int originalHeight = originalImage.getHeight(null);
1021                // Resize the image by removing 1px on Width and Height
1022                Image resizedImage = originalImage.getScaledInstance(originalWidth - 1, originalHeight - 1, Image.SCALE_SMOOTH);
1023                // Resize the resized image by adding 1px on Width and Height - In fact set image to is initial size
1024                Image initialSizedImage = resizedImage.getScaledInstance(originalWidth, originalHeight, Image.SCALE_SMOOTH);
1025                // Save image to a bytes buffer
1026                int bufferedImageType = BufferedImage.TYPE_INT_ARGB;//By default use a format supporting transparency
1027                //Sometimes for BMP, the format detected is "bmp; format=compressed"
1028                if ("jpeg".equalsIgnoreCase(originalFormat) || "bmp".equalsIgnoreCase(originalFormat) || originalFormat.startsWith("bmp;")) {
1029                    bufferedImageType = BufferedImage.TYPE_INT_RGB;
1030                }
1031                BufferedImage sanitizedImage = new BufferedImage(initialSizedImage.getWidth(null), initialSizedImage.getHeight(null), bufferedImageType);
1032                Graphics2D drawer = sanitizedImage.createGraphics();
1033                drawer.drawImage(initialSizedImage, 0, 0, null);
1034                drawer.dispose();
1035                //Handle "bmp; format=compressed" case
1036                String formatToUse = originalFormat;
1037                if (formatToUse.startsWith("bmp;")) {
1038                    formatToUse = formatToUse.split(";")[0].trim();
1039                }
1040                ImageIO.write(sanitizedImage, formatToUse, sanitizedContent);
1041            }
1042            default -> throw new IllegalArgumentException("Type of file not supported !");
1043        }
1044        if (sanitizedContent.size() == 0) {
1045            throw new IOException("An error occur during the rewrite operation!");
1046        }
1047        return sanitizedContent.toByteArray();
1048    }
1049
1050    /**
1051     * Apply a collection of validations on a string expected to be an email address:
1052     * <ul>
1053     * <li>Is a valid email address, from a parser perspective, following RFCs on email addresses.</li>
1054     * <li>Is not using "Encoded-word" format.</li>
1055     * <li>Is not using comment format.</li>
1056     * <li>Is not using "Punycode" format.</li>
1057     * <li>Is not using UUCP style addresses.</li>
1058     * <li>Is not using address literals.</li>
1059     * <li>Is not using source routes.</li>
1060     * <li>Is not using the "percent hack".</li>
1061     * <li>Does not contain newline or carriage-return characters (CRLF injection prevention).</li>
1062     * <li>The domain part contains at least one dot (reject single-label domains such as localhost or internal hostnames).</li>
1063     * <li>The local part is not a quoted string (i.e. not wrapped in double quotes).</li>
1064     * <li>Respect the RFC 5321 length limits: local part ≤ 64 characters, domain ≤ 255 characters, total address ≤ 320 characters.</li>
1065     * </ul><br>
1066     * This is based on the research work from <a href="https://portswigger.net/research/gareth-heyes">Gareth Heyes</a> added in references (Portswigger).<br><br>
1067     *
1068     * <b>Note:</b> The notion of valid, here, is to take from a secure usage of the data perspective.
1069     *
1070     * @param addr String expected to be a valid email address.
1071     * @return True only if the string pass all validations.
1072     * @see "https://commons.apache.org/proper/commons-validator/"
1073     * @see "https://commons.apache.org/proper/commons-validator/apidocs/org/apache/commons/validator/routines/EmailValidator.html"
1074     * @see "https://datatracker.ietf.org/doc/html/rfc2047#section-2"
1075     * @see "https://portswigger.net/research/splitting-the-email-atom"
1076     * @see "https://www.jochentopf.com/email/address.html"
1077     * @see "https://en.wikipedia.org/wiki/Email_address"
1078     */
1079    public static boolean isEmailAddress(String addr) {
1080        boolean isValid = false;
1081        String work = addr.toLowerCase(Locale.ROOT);
1082        Pattern encodedWordRegex = Pattern.compile("[=?]+", Pattern.CASE_INSENSITIVE);
1083        Pattern forbiddenCharacterRegex = Pattern.compile("[():!%\\[\\],;\"\n\r]+", Pattern.CASE_INSENSITIVE);
1084        try {
1085            //Start with the use of the dedicated EmailValidator from Apache Commons Validator
1086            if (EmailValidator.getInstance(true, true).isValid(work)) {
1087                //If OK then validate it does not contains "Encoded-word" patterns using an aggressive approach
1088                if (!encodedWordRegex.matcher(work).find()) {
1089                    //If OK then validate it does not contains punycode
1090                    if (!work.contains("xn--")) {
1091                        //If OK then validate it does not use:
1092                        // UUCP style addresses,
1093                        // Comment format,
1094                        // Address literals,
1095                        // Source routes,
1096                        // The percent hack.
1097                        if (!forbiddenCharacterRegex.matcher(work).find()) {
1098                            //If OK ensure that the domain part contains at least one dot
1099                            long arobaseCount = addr.chars().filter(c -> c == '@').count();
1100                            if (arobaseCount == 1) {
1101                                String[] parts = addr.split("@");
1102                                String localPart = parts[0];
1103                                String domainPart = parts[1];
1104                                if (domainPart.contains(".")) {
1105                                    //If OK the check the respect to the RFC 5321 length limits:
1106                                    // local part ≤ 64 characters, domain ≤ 255 characters, total address ≤ 320 characters.
1107                                    if (localPart.length() <= 64 && domainPart.length() <= 255 && addr.length() <= 320) {
1108                                        isValid = true;
1109                                    }
1110                                }
1111                            }
1112                        }
1113                    }
1114                }
1115
1116            }
1117        } catch (Exception e) {
1118            isValid = false;
1119        }
1120        return isValid;
1121    }
1122
1123    /**
1124     * The <a href="https://www.stet.eu/en/psd2/">PSD2 STET</a> specification require to use <a href="https://datatracker.ietf.org/doc/draft-cavage-http-signatures/">HTTP Signature</a>.
1125     * <br>
1126     * Section <b>3.5.1.2</b> of the document <a href="https://www.stet.eu/assets/files/PSD2/1-6-3/api-dsp2-stet-v1.6.3.1-part-1-framework.pdf">Documentation Framework</a> version <b>1.6.3</b>.
1127     * <br>
1128     * The problem is that, by design, the HTTP Signature specification is prone to blind SSRF.
1129     * <br>
1130     * URL example taken from the STET specification: <code>https://path.to/myQsealCertificate_714f8154ec259ac40b8a9786c9908488b2582b68b17e865fede4636d726b709f</code>.
1131     * <br>
1132     * The objective of this code is to try to decrease the "exploitability/interest" of this SSRF for an attacker.
1133     *
1134     * @param certificateUrl Url pointing to a Qualified Certificate (QSealC) encoded in PEM format and respecting the ETSI/TS119495 technical Specification .
1135     * @return TRUE only if the url point to a Qualified Certificate in PEM format.
1136     * @see "https://www.stet.eu/en/psd2/"
1137     * @see "https://www.stet.eu/assets/files/PSD2/1-6-3/api-dsp2-stet-v1.6.3.1-part-1-framework.pdf"
1138     * @see "https://datatracker.ietf.org/doc/draft-cavage-http-signatures/"
1139     * @see "https://datatracker.ietf.org/doc/rfc9421/"
1140     * @see "https://openjdk.org/groups/net/httpclient/intro.html"
1141     * @see "https://docs.oracle.com/en/java/javase/21/docs/api/java.net.http/java/net/http/package-summary.html"
1142     * @see "https://portswigger.net/web-security/ssrf"
1143     * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control"
1144     */
1145    public static boolean isPSD2StetSafeCertificateURL(String certificateUrl) {
1146        boolean isValid = false;
1147        long connectionTimeoutInSeconds = 10;
1148        String userAgent = "PSD2-STET-HTTPSignature-CertificateRequest";
1149        try {
1150            //1. Ensure that the URL end with the SHA-256 fingerprint encoded in HEX of the certificate like requested by STET
1151            if (certificateUrl != null && certificateUrl.lastIndexOf("_") != -1) {
1152                String digestPart = certificateUrl.substring(certificateUrl.lastIndexOf("_") + 1);
1153                if (Pattern.matches("^[0-9a-f]{64}$", digestPart)) {
1154                    //2. Ensure that the URL is a valid url by creating a instance of the class URI
1155                    URI uri = URI.create(certificateUrl);
1156                    //3. Require usage of HTTPS and reject any url containing query parameters
1157                    if ("https".equalsIgnoreCase(uri.getScheme()) && uri.getQuery() == null) {
1158                        //4. Perform a HTTP HEAD request in order to get the content type of the remote resource
1159                        //and limit the interest to use the SSRF because to pass the check the url need to:
1160                        //- Do not having any query parameters.
1161                        //- Use HTTPS protocol.
1162                        //- End with a string having the format "_[0-9a-f]{64}".
1163                        //- Trigger the malicious action that the attacker want but with a HTTP HEAD without any redirect and parameters.
1164                        HttpResponse<String> response;
1165                        try (HttpClient client = HttpClient.newBuilder().followRedirects(HttpClient.Redirect.NEVER).build()) {
1166                            HttpRequest request = HttpRequest.newBuilder().uri(uri).timeout(Duration.ofSeconds(connectionTimeoutInSeconds)).method("HEAD", HttpRequest.BodyPublishers.noBody()).header("User-Agent", userAgent)//To provide an hint to the target about the initiator of the request
1167                                    .header("Cache-Control", "no-store, max-age=0")//To prevent caching issues or abuses
1168                                    .build();
1169                            response = client.send(request, HttpResponse.BodyHandlers.ofString());
1170                            if (response.statusCode() == 200) {
1171                                //5. Ensure that the response content type is "text/plain"
1172                                Optional<String> contentType = response.headers().firstValue("Content-Type");
1173                                isValid = (contentType.isPresent() && contentType.get().trim().toLowerCase(Locale.ENGLISH).startsWith("text/plain"));
1174                            }
1175                        }
1176                    }
1177                }
1178            }
1179        } catch (Exception e) {
1180            isValid = false;
1181        }
1182        return isValid;
1183    }
1184
1185    /**
1186     * Perform sequential URL decoding operations against a URL encoded data until the data is not URL encoded anymore or if the specified threshold is reached.
1187     *
1188     * @param encodedData            URL encoded data.
1189     * @param decodingRoundThreshold Threshold above which decoding will fail.
1190     * @return The decoded data.
1191     * @throws SecurityException If the threshold is reached.
1192     * @see "https://en.wikipedia.org/wiki/Percent-encoding"
1193     * @see "https://owasp.org/www-community/Double_Encoding"
1194     * @see "https://portswigger.net/web-security/essential-skills/obfuscating-attacks-using-encodings"
1195     * @see "https://capec.mitre.org/data/definitions/120.html"
1196     */
1197    public static String applyURLDecoding(String encodedData, int decodingRoundThreshold) throws SecurityException {
1198        if (decodingRoundThreshold < 1) {
1199            throw new IllegalArgumentException("Threshold must be a positive number !");
1200        }
1201        if (encodedData == null) {
1202            throw new IllegalArgumentException("Data provided must not be null !");
1203        }
1204        Charset charset = StandardCharsets.UTF_8;
1205        int currentDecodingRound = 0;
1206        boolean isFinished = false;
1207        String currentRoundData = encodedData;
1208        String previousRoundData = encodedData;
1209        while (!isFinished) {
1210            if (currentDecodingRound > decodingRoundThreshold) {
1211                throw new SecurityException(String.format("Decoding round threshold of %s reached!", decodingRoundThreshold));
1212            }
1213            currentRoundData = URLDecoder.decode(currentRoundData, charset);
1214            isFinished = currentRoundData.equals(previousRoundData);
1215            previousRoundData = currentRoundData;
1216            currentDecodingRound++;
1217        }
1218        return currentRoundData;
1219    }
1220
1221    /**
1222     * Apply a collection of validations on a string expected to be an system file/folder path:
1223     * <ul>
1224     * <li>Does not contains path traversal payload.</li>
1225     * <li>The canonical path is equals to the absolute path.</li>
1226     * </ul><br>
1227     *
1228     * @param path String expected to be a valid system file/folder path.
1229     * @return True only if the string pass all validations.
1230     * @see "https://portswigger.net/web-security/file-path-traversal"
1231     * @see "https://learn.snyk.io/lesson/directory-traversal/"
1232     * @see "https://capec.mitre.org/data/definitions/126.html"
1233     * @see "https://owasp.org/www-community/attacks/Path_Traversal"
1234     */
1235    public static boolean isPathSafe(String path) {
1236        boolean isSafe = false;
1237        int decodingRoundThreshold = 3;
1238        try {
1239            if (path != null && !path.isEmpty()) {
1240                //URL decode the path if case of data coming from a web context
1241                String decodedPath = applyURLDecoding(path, decodingRoundThreshold);
1242                //Ensure that no path traversal expression is present
1243                if (!decodedPath.contains("..")) {
1244                    File f = new File(decodedPath);
1245                    String canonicalPath = f.getCanonicalPath();
1246                    String absolutePath = f.getAbsolutePath();
1247                    isSafe = canonicalPath.equals(absolutePath);
1248                }
1249            }
1250        } catch (Exception e) {
1251            isSafe = false;
1252        }
1253        return isSafe;
1254    }
1255
1256    /**
1257     * Identify if an XML contains any XML comments or have any XSL processing instructions.<br>
1258     * Stream reader based parsing is used to support large XML tree.
1259     *
1260     * @param xmlFilePath Filename of the XML file to check.
1261     * @return True only if XML comments or XSL processing instructions are identified.
1262     * @see "https://www.tutorialspoint.com/xml/xml_processing.htm"
1263     * @see "https://docs.oracle.com/en/java/javase/21/docs/api/java.xml/javax/xml/stream/XMLInputFactory.html"
1264     * @see "https://portswigger.net/kb/issues/00400700_xml-entity-expansion"
1265     * @see "https://www.w3.org/Style/styling-XML.en.html"
1266     */
1267    public static boolean isXMLHaveCommentsOrXSLProcessingInstructions(String xmlFilePath) {
1268        boolean itemsDetected = false;
1269        try {
1270            //Ensure that the parser will not be prone XML external entity (XXE) injection or XML entity expansion (XEE) attacks
1271            XMLInputFactory xmlInputFactory = XMLInputFactory.newFactory();
1272            xmlInputFactory.setProperty(XMLInputFactory.SUPPORT_DTD, false);
1273            xmlInputFactory.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, "");
1274            xmlInputFactory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, false);
1275            xmlInputFactory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false);
1276
1277            //Parse file
1278            try (FileInputStream fis = new FileInputStream(xmlFilePath)) {
1279                XMLStreamReader reader = xmlInputFactory.createXMLStreamReader(fis);
1280                int eventType;
1281                while (reader.hasNext() && !itemsDetected) {
1282                    eventType = reader.next();
1283                    if (eventType == XMLEvent.COMMENT) {
1284                        itemsDetected = true;
1285                    } else if (eventType == XMLEvent.PROCESSING_INSTRUCTION && "xml-stylesheet".equalsIgnoreCase(reader.getPITarget())) {
1286                        itemsDetected = true;
1287                    }
1288                }
1289            }
1290        } catch (Exception e) {
1291            //In case of error then assume that the check failed
1292            itemsDetected = true;
1293        }
1294        return itemsDetected;
1295    }
1296
1297
1298    /**
1299     * Perform a set of additional validations against a JWT token:
1300     * <ul>
1301     *     <li>Do not use the <b>NONE</b> signature algorithm.</li>
1302     *     <li>Have a <a href="https://www.iana.org/assignments/jwt/jwt.xhtml">EXP claim</a> defined.</li>
1303     *     <li>The token identifier (<a href="https://www.iana.org/assignments/jwt/jwt.xhtml">JTI claim</a>) is NOT part of the list of revoked token.</li>
1304     *     <li>Match the expected type of token: ACCESS or ID or REFRESH.</li>
1305     * </ul>
1306     *
1307     * @param token               JWT token for which <b>signature was already validated</b> and on which a set of additional validations will be applied.
1308     * @param expectedTokenType   The type of expected token using the enumeration provided.
1309     * @param revokedTokenJTIList A list of token identifier (<b>JTI</b> claim) referring to tokens that were revoked and to which the JTI claim of the token will be compared to.
1310     * @return True only the token pass all the validations.
1311     * @see "https://www.iana.org/assignments/jwt/jwt.xhtml"
1312     * @see "https://auth0.com/docs/secure/tokens/access-tokens"
1313     * @see "https://auth0.com/docs/secure/tokens/id-tokens"
1314     * @see "https://auth0.com/docs/secure/tokens/refresh-tokens"
1315     * @see "https://auth0.com/blog/id-token-access-token-what-is-the-difference/"
1316     * @see "https://jwt.io/libraries?language=Java"
1317     * @see "https://pentesterlab.com/blog/secure-jwt-library-design"
1318     * @see "https://github.com/auth0/java-jwt"
1319     */
1320    public static boolean applyJWTExtraValidation(DecodedJWT token, TokenType expectedTokenType, List<String> revokedTokenJTIList) {
1321        boolean isValid = false;
1322        TokenType tokenType;
1323        try {
1324            if (!"none".equalsIgnoreCase(token.getAlgorithm().trim())) {
1325                if (!token.getClaim("exp").isMissing() && token.getExpiresAt() != null) {
1326                    String jti = token.getId();
1327                    if (jti != null && !jti.trim().isEmpty()) {
1328                        boolean jtiIsRevoked = revokedTokenJTIList.stream().anyMatch(jti::equalsIgnoreCase);
1329                        if (!jtiIsRevoked) {
1330                            //Determine the token type based on the presence of specifics claims
1331                            if (!token.getClaim("scope").isMissing()) {
1332                                tokenType = TokenType.ACCESS;
1333                            } else if (!token.getClaim("name").isMissing() || !token.getClaim("email").isMissing()) {
1334                                tokenType = TokenType.ID;
1335                            } else {
1336                                tokenType = TokenType.REFRESH;
1337                            }
1338                            isValid = (tokenType.equals(expectedTokenType));
1339                        }
1340                    }
1341                }
1342            }
1343
1344        } catch (Exception e) {
1345            //In case of error then assume that the check failed
1346            isValid = false;
1347        }
1348        return isValid;
1349    }
1350
1351    /**
1352     * Apply a validations on a regular expression to ensure that is not prone to the ReDOS attack.
1353     * <br>If your technology is supported by <a href="https://github.com/doyensec/regexploit">regexploit</a> then <b>use it instead of this method!</b>
1354     * <br>Indeed, the <a href="https://www.doyensec.com/">Doyensec</a> team has made an intensive and amazing work on this topic and created this effective tool.
1355     *
1356     * @param regex                       String expected to be a valid regular expression (regex).
1357     * @param data                        Test data on which the regular expression is executed for the test.
1358     * @param maximumRunningTimeInSeconds Optional parameter to specify a number of seconds above which a regex execution time is considered as not safe (default to 4 seconds when not specified).
1359     * @return True only if the string pass all validations.
1360     * @see "https://github.blog/security/how-to-fix-a-redos/"
1361     * @see "https://learn.snyk.io/lesson/redos"
1362     * @see "https://rules.sonarsource.com/java/RSPEC-2631/"
1363     * @see "https://github.com/doyensec/regexploit"
1364     * @see "https://github.com/makenowjust-labs/recheck"
1365     * @see "https://github.com/tjenkinson/redos-detector"
1366     * @see "https://wiki.owasp.org/images/2/23/OWASP_IL_2009_ReDoS.pdf"
1367     * @see "https://owasp.org/www-community/attacks/Regular_expression_Denial_of_Service_-_ReDoS"
1368     */
1369    public static boolean isRegexSafe(String regex, String data, Optional<Integer> maximumRunningTimeInSeconds) {
1370        Objects.requireNonNull(maximumRunningTimeInSeconds, "Use 'Optional.empty()' to leverage the default value.");
1371        Objects.requireNonNull(data, "A sample data is needed to perform the test.");
1372        Objects.requireNonNull(regex, "A regular expression is needed to perform the test.");
1373        boolean isSafe = false;
1374        int executionTimeout = maximumRunningTimeInSeconds.orElse(4);
1375        ExecutorService executor = Executors.newSingleThreadExecutor();
1376        try {
1377            Callable<Boolean> task = () -> {
1378                Pattern pattern = Pattern.compile(regex);
1379                return pattern.matcher(data).matches();
1380            };
1381            List<Future<Boolean>> tasks = executor.invokeAll(List.of(task), executionTimeout, TimeUnit.SECONDS);
1382            if (!tasks.getFirst().isCancelled()) {
1383                isSafe = true;
1384            }
1385        } catch (Exception e) {
1386            isSafe = false;
1387        } finally {
1388            executor.shutdownNow();
1389        }
1390        return isSafe;
1391    }
1392
1393    /**
1394     * Compute a UUID version 7 without using any external dependency.<br><br>
1395     * <b>Below are my personal point of view and perhaps I'm totally wrong!</b>
1396     * <br><br>
1397     * Why such method?
1398     * <ul>
1399     * <li>Java inferior or equals to 21 does not supports natively the generation of an UUID version 7.</li>
1400     * <li>Import a library just to generate such value is overkill for me.</li>
1401     * <li>Library that I have found, generating such version of an UUID, are not provided by entities commonly used in the java world, such as the SPRING framework provider.</li>
1402     * </ul>
1403     * <br>
1404     * <b>Full credits for this implementation goes to the authors and contributors of the <a href="https://github.com/nalgeon/uuidv7">UUIDv7</a> project.</b>
1405     * <br><br>
1406     * Below are the java libraries that I have found but, for which, I do not trust enough the provider to use them directly:
1407     * <ul>
1408     *     <li><a href="https://github.com/cowtowncoder/java-uuid-generator">java-uuid-generator</a></li>
1409     *     <li><a href="https://github.com/f4b6a3/uuid-creator">uuid-creator</a></li>
1410     * </ul>
1411     *
1412     * @return A UUID object representing the UUID v7.
1413     * @see "https://uuid7.com/"
1414     * @see "https://antonz.org/uuidv7/"
1415     * @see "https://mccue.dev/pages/3-11-25-life-altering-postgresql-patterns"
1416     * @see "https://www.ietf.org/archive/id/draft-peabody-dispatch-new-uuid-format-04.html#name-uuid-version-7"
1417     * @see "https://www.baeldung.com/java-generating-time-based-uuids"
1418     * @see "https://en.wikipedia.org/wiki/Universally_unique_identifier"
1419     * @see "https://buildkite.com/resources/blog/goodbye-integers-hello-uuids/"
1420     */
1421    public static UUID computeUUIDv7() {
1422        SecureRandom secureRandom = new SecureRandom();
1423        // Generate truly random bytes
1424        byte[] value = new byte[16];
1425        secureRandom.nextBytes(value);
1426        // Get current timestamp in milliseconds
1427        ByteBuffer timestamp = ByteBuffer.allocate(Long.BYTES);
1428        timestamp.putLong(System.currentTimeMillis());
1429        // Create the TIMESTAMP part of the UUID
1430        System.arraycopy(timestamp.array(), 2, value, 0, 6);
1431        // Create the VERSION and the VARIANT parts of the UUID
1432        value[6] = (byte) ((value[6] & 0x0F) | 0x70);
1433        value[8] = (byte) ((value[8] & 0x3F) | 0x80);
1434        //Create the HIGH and LOW parts of the UUID
1435        ByteBuffer buf = ByteBuffer.wrap(value);
1436        long high = buf.getLong();
1437        long low = buf.getLong();
1438        //Create and return the UUID object
1439        UUID uuidv7 = new UUID(high, low);
1440        return uuidv7;
1441    }
1442
1443    /**
1444     * Ensure that an XSD file does not contain any include/import/redefine instruction (prevent exposure to SSRF).
1445     *
1446     * @param xsdFilePath Filename of the XSD file to check.
1447     * @return True only if the file pass all validations.
1448     * @see "https://portswigger.net/web-security/ssrf"
1449     * @see "https://www.w3schools.com/Xml/el_import.asp"
1450     * @see "https://www.w3schools.com/xml/el_include.asp"
1451     * @see "https://www.linkedin.com/posts/righettod_appsec-appsecurity-java-activity-7344048434326188053-6Ru9"
1452     * @see "https://docs.oracle.com/en/java/javase/21/docs/api/java.xml/javax/xml/validation/SchemaFactory.html#setProperty(java.lang.String,java.lang.Object)"
1453     */
1454    public static boolean isXSDSafe(String xsdFilePath) {
1455        boolean isSafe = false;
1456        try {
1457            File xsdFile = new File(xsdFilePath);
1458            if (xsdFile.exists() && xsdFile.canRead() && xsdFile.isFile()) {
1459                //Parse the XSD file, if an exception occur then it's imply that the XSD specified is not a valid ones
1460                //Create an schema factory throwing Exception if a external schema is specified
1461                SchemaFactory schemaFactory = SchemaFactory.newDefaultInstance();
1462                schemaFactory.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, "");
1463                schemaFactory.setProperty(XMLConstants.ACCESS_EXTERNAL_SCHEMA, "");
1464                //Parse the schema
1465                Schema schema = schemaFactory.newSchema(xsdFile);
1466                isSafe = (schema != null);
1467            }
1468        } catch (Exception e) {
1469            isSafe = false;
1470        }
1471        return isSafe;
1472    }
1473
1474
1475    /**
1476     * Extract all sensitive information from a string provided.<br>
1477     * This can be used to identify any sensitive information into a <a href="https://cwe.mitre.org/data/definitions/532.html">message expected to be written in a log</a> and then replace every sensitive values by an obfuscated ones.<br><br>
1478     * For the luxembourg national identification number, this method focus on detecting identifiers for a physical entity (people) and not a moral one (company).<br><br>
1479     * I delegated the validation of the IBAN to a dedicated library (<a href="https://github.com/arturmkrtchyan/iban4j">iban4j</a>) to not "reinvent the wheel" and then introduce buggy validation myself. I used <b>iban4j</b> over the <b><a href="https://commons.apache.org/proper/commons-validator/apidocs/org/apache/commons/validator/routines/IBANValidator.html">IBANValidator</a></b> class from the <a href="https://commons.apache.org/proper/commons-validator/"><b>Apache Commons Validator</b></a> library because <b>iban4j</b> perform a full official IBAN specification validation so its reduce risks of false-positives by ensuring that an IBAN detected is a real IBAN.<br><br>
1480     * Same thing and reason regarding the validation of the bank card PAN using the  class <a href="https://commons.apache.org/proper/commons-validator/apidocs/org/apache/commons/validator/routines/CreditCardValidator.html">CreditCardValidator</a> from the <b>Apache Commons Validator</b> library.
1481     *
1482     * @param content String in which sensitive information must be searched.
1483     * @return A map with the collection of identified sensitive information gathered by sensitive information type. If nothing is found then the map is empty. A type of sensitive information is only present if there is at least one item found. A set is used to not store duplicates occurrence of the same sensitive information.
1484     * @throws Exception If any error occurs during the processing.
1485     * @see "https://guichet.public.lu/en/citoyens/citoyennete/registre-national/identification/demande-numero-rnpp.html"
1486     * @see "https://cnpd.public.lu/fr/decisions-avis/2009/identifiant-unique.html"
1487     * @see "https://cnpd.public.lu/content/dam/cnpd/fr/decisions-avis/2009/identifiant-unique/48_2009.pdf"
1488     * @see "https://en.wikipedia.org/wiki/International_Bank_Account_Number"
1489     * @see "https://www.iban.com/structure"
1490     * @see "https://github.com/arturmkrtchyan/iban4j"
1491     * @see "https://cwe.mitre.org/data/definitions/532.html"
1492     * @see "https://www.baeldung.com/logback-mask-sensitive-data"
1493     * @see "https://en.wikipedia.org/wiki/Payment_card_number"
1494     * @see "https://commons.apache.org/proper/commons-validator/apidocs/org/apache/commons/validator/routines/CreditCardValidator.html"
1495     * @see "https://commons.apache.org/proper/commons-validator/"
1496     */
1497    public static Map<SensitiveInformationType, Set<String>> extractAllSensitiveInformation(String content) throws Exception {
1498        CreditCardValidator creditCardValidator = CreditCardValidator.genericCreditCardValidator();
1499        Pattern nationalIdentifierRegex = Pattern.compile("([0-9]{13})");
1500        Pattern ibanNonHumanFormattedRegex = Pattern.compile("([A-Z]{2}[0-9]{2}[A-Z0-9]{11,30})", Pattern.CASE_INSENSITIVE);
1501        Pattern ibanHumanFormattedRegex = Pattern.compile("([A-Z]{2}[0-9]{2}(?:\\s[A-Z0-9]{4}){2,7}\\s[A-Z0-9]{1,4})", Pattern.CASE_INSENSITIVE);
1502        Pattern panRegex = Pattern.compile("((?:\\d[ -]*?){13,19})");
1503        Map<SensitiveInformationType, Set<String>> data = new HashMap<>();
1504        data.put(SensitiveInformationType.LUXEMBOURG_NATIONAL_IDENTIFICATION_NUMBER, new HashSet<>());
1505        data.put(SensitiveInformationType.IBAN, new HashSet<>());
1506        data.put(SensitiveInformationType.BANK_CARD_PAN, new HashSet<>());
1507
1508        if (content != null && !content.isBlank()) {
1509            /* Step 1: Search for LU national identifier */
1510            //A national identifier have the following structure: [BIRTHDATE_YEAR_YYYY][BIRTHDATE_MONTH_MM][BIRTHDATE_DAY_DD][FIVE_INTEGER]
1511            //Define minimal and maximal birth year base on current year
1512            //Assume people live less than 120 years
1513            int maxBirthYear = LocalDate.now(ZoneId.of("Europe/Luxembourg")).getYear();
1514            int minBirthYear = maxBirthYear - 120;
1515            Matcher matcher = nationalIdentifierRegex.matcher(content);
1516            String nationalIdentierFull;
1517            int nationalIdentierYear, nationalIdentierMonth, nationalIdentierDay;
1518            while (matcher.find()) {
1519                nationalIdentierFull = matcher.group(1);
1520                //Check that the string is a valid national identifier and if yes then add it
1521                nationalIdentierYear = Integer.parseInt(nationalIdentierFull.substring(0, 4));
1522                nationalIdentierMonth = Integer.parseInt(nationalIdentierFull.substring(4, 6));
1523                nationalIdentierDay = Integer.parseInt(nationalIdentierFull.substring(6, 8));
1524                if (nationalIdentierYear >= minBirthYear && nationalIdentierYear <= maxBirthYear) {
1525                    if (nationalIdentierMonth >= 1 && nationalIdentierMonth <= 12) {
1526                        if (YearMonth.of(nationalIdentierYear, nationalIdentierMonth).isValidDay(nationalIdentierDay)) {
1527                            data.get(SensitiveInformationType.LUXEMBOURG_NATIONAL_IDENTIFICATION_NUMBER).add(nationalIdentierFull);
1528                        }
1529                    }
1530                }
1531            }
1532
1533            /* Step 2a: Search for IBAN that are non human formatted */
1534            matcher = ibanNonHumanFormattedRegex.matcher(content);
1535            String iban, ibanUpperCased;
1536            while (matcher.find()) {
1537                iban = matcher.group(1);
1538                ibanUpperCased = iban.toUpperCase(Locale.ROOT);
1539                //Check that the string is a valid IBAN and if yes then add it
1540                if (IbanUtil.isValid(ibanUpperCased)) {
1541                    data.get(SensitiveInformationType.IBAN).add(iban);
1542                }
1543            }
1544
1545            /* Step 2b: Search for IBAN that are human formatted */
1546            matcher = ibanHumanFormattedRegex.matcher(content);
1547            String ibanUpperCasedNoSpace;
1548            while (matcher.find()) {
1549                iban = matcher.group(1);
1550                ibanUpperCasedNoSpace = iban.toUpperCase(Locale.ROOT).replace(" ", "");
1551                //Check that the string is a valid IBAN and if yes then add it
1552                if (IbanUtil.isValid(ibanUpperCasedNoSpace)) {
1553                    data.get(SensitiveInformationType.IBAN).add(iban);
1554                }
1555            }
1556
1557            /* Step 3: Search for bank card PAN */
1558            matcher = panRegex.matcher(content);
1559            String pan, panNoSeparator;
1560            while (matcher.find()) {
1561                pan = matcher.group(1);
1562                panNoSeparator = pan.toUpperCase(Locale.ROOT).replace(" ", "").replace("-", "");
1563                //Check that the string is a valid PAN and if yes then add it
1564                if (creditCardValidator.isValid(panNoSeparator)) {
1565                    data.get(SensitiveInformationType.BANK_CARD_PAN).add(pan);
1566                }
1567            }
1568
1569        }
1570
1571        //Cleanup if a set is empty
1572        if (data.get(SensitiveInformationType.LUXEMBOURG_NATIONAL_IDENTIFICATION_NUMBER).isEmpty()) {
1573            data.remove(SensitiveInformationType.LUXEMBOURG_NATIONAL_IDENTIFICATION_NUMBER);
1574        }
1575        if (data.get(SensitiveInformationType.IBAN).isEmpty()) {
1576            data.remove(SensitiveInformationType.IBAN);
1577        }
1578        if (data.get(SensitiveInformationType.BANK_CARD_PAN).isEmpty()) {
1579            data.remove(SensitiveInformationType.BANK_CARD_PAN);
1580        }
1581
1582        return data;
1583    }
1584
1585    /**
1586     * Apply a collection of validations on a bytes array provided representing GZIP compressed data:
1587     * <ul>
1588     * <li>Are valid GZIP compressed data.</li>
1589     * <li>The number of bytes once decompressed is under the specified limit.</li>
1590     * </ul>
1591     * <br><b>Note:</b> The value <code>Integer.MAX_VALUE - 8</code> was chosen because during my tests on Java 25 (JDK 64 bits on Windows 11 Pro), it was possible to decompress such amount of data with the default JVM settings without causing an <a href="https://docs.oracle.com/en/java/javase/25/docs/api//java.base/java/lang/OutOfMemoryError.html">Out Of Memory error</a>.
1592     *
1593     * @param compressedBytes                    Array of bytes containing the GZIP compressed data to check.
1594     * @param maxCountOfDecompressedBytesAllowed Maximum number of decompressed bytes allowed. Default to 10 MB if the specified value is inferior to 1 or superior to Integer.MAX_VALUE - 8.
1595     * @return True only if the file pass all validations.
1596     * @see "https://en.wikipedia.org/wiki/Gzip"
1597     * @see "https://www.rapid7.com/db/modules/auxiliary/dos/http/gzip_bomb_dos/"
1598     */
1599    public static boolean isGZIPCompressedDataSafe(byte[] compressedBytes, long maxCountOfDecompressedBytesAllowed) {
1600        boolean isSafe = false;
1601
1602        try {
1603            long limit = maxCountOfDecompressedBytesAllowed;
1604            long totalRead = 0L;
1605            byte[] buffer = new byte[8 * 1024];
1606            int read;
1607            if (limit < 1 || limit > (Integer.MAX_VALUE - 8)) {
1608                limit = 10_000_000;
1609            }
1610            try (ByteArrayInputStream bis = new ByteArrayInputStream(compressedBytes); GZIPInputStream gzipInputStream = new GZIPInputStream(new BufferedInputStream(bis))) {
1611                while ((read = gzipInputStream.read(buffer)) != -1) {
1612                    totalRead += read;
1613                    if (totalRead > limit) {
1614                        throw new Exception();
1615                    }
1616                }
1617            }
1618            isSafe = true;
1619        } catch (Exception e) {
1620            isSafe = false;
1621        }
1622
1623        return isSafe;
1624    }
1625
1626    /**
1627     * Process a string, intended to be written in a log, to remove as much as possible information that can lead to an exposure to a log injection vulnerability.<br><br>
1628     * <b>Log injection</b> is also called <b>log forging</b>.<br><br>
1629     * The following information are removed:
1630     * <ul>
1631     *     <li>Characters: Carriage Return (CR), Linefeed (LF) and Tabulation (TAB).</li>
1632     *     <li>Characters: Unicode LINE SEPARATOR and Unicode PARAGRAPH SEPARATOR.</li>
1633     *     <li>Characters: CSI sequences and bare ESC.</li>
1634     *     <li>Leading and trailing spaces.</li>
1635     *     <li>Any HTML tags.</li>
1636     * </ul><br>
1637     * A parameter is also used to limit the maximum length of the sanitized message.
1638     * To remove any HTML tags, the OWASP project <a href="https://owasp.org/www-project-java-html-sanitizer/">Java HTML Sanitizer</a> is leveraged.<br>
1639     * I delegated such removal to a dedicated library to prevent missing of edge cases as well as potential bypasses.
1640     *
1641     * @param message          The original string message intended to be written in a log.
1642     * @param maxMessageLength The maximum number of characters after which the sanitized message must be truncated. If inferior to 1 then default to the value of 500.
1643     * @return The string message cleaned.
1644     * @see "https://www.wallarm.com/what/log-forging-attack"
1645     * @see "https://www.invicti.com/learn/crlf-injection"
1646     * @see "https://knowledge-base.secureflag.com/vulnerabilities/inadequate_input_validation/log_injection_vulnerability.html"
1647     * @see "https://capec.mitre.org/data/definitions/93.html"
1648     * @see "https://codeql.github.com/codeql-query-help/javascript/js-log-injection/"
1649     * @see "https://owasp.org/www-project-java-html-sanitizer/"
1650     * @see "https://github.com/OWASP/java-html-sanitizer"
1651     */
1652    public static String sanitizeLogMessage(String message, int maxMessageLength) {
1653        String sanitized = message;
1654        int maxSanitizedMessageLength = maxMessageLength;
1655
1656        if (sanitized != null && !sanitized.isBlank()) {
1657            if (maxSanitizedMessageLength < 1) {
1658                maxSanitizedMessageLength = 500;
1659            }
1660            //Step 1: Remove any CR/LR/TAB characters as well as leading and trailing spaces
1661            sanitized = sanitized.replaceAll("[\\n\\r\\t]", "").trim();
1662            //Step 2: Remove any Unicode LINE SEPARATOR or Unicode PARAGRAPH SEPARATOR as well as leading and trailing spaces
1663            sanitized = sanitized.replace("\u2028", "").replace("\u2029", "").trim();
1664            //Step 3: Remove ANSI escape sequences as well as leading and trailing spaces
1665            sanitized = sanitized.replaceAll("\u001B\\[[\\d;]*[a-zA-Z]", "").replace("\u001B", "").trim();
1666            //Step 4: Remove any HTML tags
1667            PolicyFactory htmlSanitizerPolicy = new HtmlPolicyBuilder().toFactory();
1668            sanitized = htmlSanitizerPolicy.sanitize(sanitized);
1669            //Step 5: Truncate the string in case of need
1670            if (sanitized.length() > maxSanitizedMessageLength) {
1671                sanitized = sanitized.substring(0, maxSanitizedMessageLength);
1672            }
1673        }
1674
1675        return sanitized;
1676    }
1677
1678    /**
1679     * Identify if an XML is an SVG image.<br>
1680     * The goal of this method is to prevent to leverage SVG, as an vector, to achieve a XSS when XML format is accepted.<br>
1681     * Leverage <a href="https://xmlgraphics.apache.org/batik/">Apache Batik</a> to delegate the parsing and support for the SVG format.<br><br>
1682     * <b>Due to the intended usage of the method, the following choice were made:</b>
1683     * <ul>
1684     * <li>Raise an exception when a non SVG related external references is identified.</li>
1685     * <li>Throw any exception that can occur if the provided content is invalid like for example an invalid XML file or a non existing file.</li>
1686     * <li>Explicitly check the XML prior to pass it to Batik even if Batik seems not prone to XXE/SSRF classes of vulnerability.</li>
1687     * </ul>
1688     *
1689     * @param xmlFilePath Filename of the XML file to check.
1690     * @return True only if XML is an valid SVG image.
1691     * @throws SecurityException If a non SVG external references is detected into the XML content.
1692     * @throws Exception         If a error occur due to an invalid content provided.
1693     * @see "https://developer.mozilla.org/en-US/docs/Web/SVG"
1694     * @see "https://www.fortinet.com/blog/threat-research/scalable-vector-graphics-attack-surface-anatomy"
1695     * @see "https://portswigger.net/web-security/cross-site-scripting"
1696     * @see "https://xmlgraphics.apache.org/batik/"
1697     * @see "https://github.com/apache/xmlgraphics-batik/blob/main/batik-dom/src/main/java/org/apache/batik/dom/util/SAXDocumentFactory.java#L420"
1698     * @see "https://mvnrepository.com/artifact/org.apache.xmlgraphics/batik-dom"
1699     * @see "https://mvnrepository.com/artifact/org.apache.xmlgraphics/batik-anim"
1700     * @see "https://portswigger.net/web-security/xxe"
1701     * @see "https://portswigger.net/web-security/ssrf"
1702     */
1703    public static boolean isXMLSVGImage(String xmlFilePath) throws Exception {
1704        boolean isSvg = true;
1705        List<String> svgValidSystemIDs = List.of("http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd", "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-basic.dtd", "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-tiny.dtd", "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd");
1706
1707        //Load the XML content into a reader
1708        String xmlContent = Files.readString(Paths.get(xmlFilePath));
1709        //Then ensure that the XML document does not contains any non SVG external references
1710        try (Reader reader = StringReader.of(xmlContent)) {
1711            DocumentBuilderFactory xmlFactory = DocumentBuilderFactory.newInstance();
1712            DocumentBuilder docBuilder = xmlFactory.newDocumentBuilder();
1713            docBuilder.setEntityResolver((publicId, systemId) -> {
1714                if (systemId != null && !svgValidSystemIDs.contains(systemId)) {
1715                    throw new SecurityException("External references detected: " + systemId);
1716                }
1717                return new InputSource(new ByteArrayInputStream("".getBytes()));
1718            });
1719            docBuilder.parse(new InputSource(reader));
1720        }
1721        //Then parse the XML with Apache Batik
1722        try (Reader reader = StringReader.of(xmlContent)) {
1723            //Method SAXDocumentFactory.createDocument() do not load external DTD or entities.
1724            String parserClassName = XMLResourceDescriptor.getXMLParserClassName();
1725            SAXSVGDocumentFactory svgFactory = new SAXSVGDocumentFactory(parserClassName);
1726            //Method svgFactory.createSVGDocument() raise an IO exception if the XML is not a valid SVG image
1727            try {
1728                SVGDocument doc = svgFactory.createSVGDocument(null, reader);
1729                isSvg = (doc != null && doc.getRootElement() != null);
1730            } catch (IOException e) {
1731                isSvg = false;
1732            }
1733        }
1734
1735        return isSvg;
1736    }
1737}