001package eu.righettod;
002
003
004import com.auth0.jwt.interfaces.DecodedJWT;
005import org.apache.batik.anim.dom.SAXSVGDocumentFactory;
006import org.apache.batik.util.XMLResourceDescriptor;
007import org.apache.commons.csv.CSVFormat;
008import org.apache.commons.csv.CSVRecord;
009import org.apache.commons.imaging.ImageInfo;
010import org.apache.commons.imaging.Imaging;
011import org.apache.commons.imaging.common.ImageMetadata;
012import org.apache.commons.validator.routines.CreditCardValidator;
013import org.apache.commons.validator.routines.EmailValidator;
014import org.apache.commons.validator.routines.InetAddressValidator;
015import org.apache.pdfbox.Loader;
016import org.apache.pdfbox.pdmodel.PDDocument;
017import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
018import org.apache.pdfbox.pdmodel.PDDocumentInformation;
019import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary;
020import org.apache.pdfbox.pdmodel.common.PDMetadata;
021import org.apache.pdfbox.pdmodel.interactive.action.*;
022import org.apache.pdfbox.pdmodel.interactive.annotation.AnnotationFilter;
023import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
024import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationLink;
025import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
026import org.apache.poi.poifs.filesystem.DirectoryEntry;
027import org.apache.poi.poifs.filesystem.POIFSFileSystem;
028import org.apache.poi.poifs.macros.VBAMacroReader;
029import org.apache.tika.detect.DefaultDetector;
030import org.apache.tika.detect.Detector;
031import org.apache.tika.io.TemporaryResources;
032import org.apache.tika.io.TikaInputStream;
033import org.apache.tika.metadata.Metadata;
034import org.apache.tika.mime.MediaType;
035import org.apache.tika.mime.MimeTypes;
036import org.iban4j.IbanUtil;
037import org.owasp.html.HtmlPolicyBuilder;
038import org.owasp.html.PolicyFactory;
039import org.w3c.dom.Document;
040import org.w3c.dom.svg.SVGDocument;
041import org.xml.sax.EntityResolver;
042import org.xml.sax.InputSource;
043import org.xml.sax.SAXException;
044
045import javax.crypto.Mac;
046import javax.crypto.spec.SecretKeySpec;
047import javax.imageio.ImageIO;
048import javax.json.Json;
049import javax.json.JsonReader;
050import javax.xml.XMLConstants;
051import javax.xml.parsers.DocumentBuilder;
052import javax.xml.parsers.DocumentBuilderFactory;
053import javax.xml.parsers.ParserConfigurationException;
054import javax.xml.stream.XMLInputFactory;
055import javax.xml.stream.XMLStreamReader;
056import javax.xml.stream.events.XMLEvent;
057import javax.xml.validation.Schema;
058import javax.xml.validation.SchemaFactory;
059import java.awt.*;
060import java.awt.image.BufferedImage;
061import java.io.*;
062import java.net.*;
063import java.net.http.HttpClient;
064import java.net.http.HttpRequest;
065import java.net.http.HttpResponse;
066import java.nio.ByteBuffer;
067import java.nio.charset.Charset;
068import java.nio.charset.StandardCharsets;
069import java.nio.file.Files;
070import java.nio.file.Paths;
071import java.security.MessageDigest;
072import java.security.SecureRandom;
073import java.time.Duration;
074import java.time.LocalDate;
075import java.time.YearMonth;
076import java.time.ZoneId;
077import java.util.*;
078import java.util.List;
079import java.util.concurrent.*;
080import java.util.concurrent.atomic.AtomicInteger;
081import java.util.regex.Matcher;
082import java.util.regex.Pattern;
083import java.util.zip.GZIPInputStream;
084import java.util.zip.ZipEntry;
085import java.util.zip.ZipFile;
086
087/**
088 * Provides different utilities methods to apply processing from a security perspective.<br>
089 * These code snippet:
090 * <ul>
091 *     <li>Can be used, as "foundation", to customize the validation to the app context.</li>
092 *     <li>Were implemented in a way to facilitate adding or removal of validations depending on usage context.</li>
093 *     <li>Were centralized on one class to be able to enhance them across time as well as <a href="https://github.com/righettod/code-snippets-security-utils/issues">missing case/bug identification</a>.</li>
094 * </ul>
095 * <br>
096 * <a href="https://github.com/righettod/code-snippets-security-utils">GitHub repository</a>.<br><br>
097 * <a href="https://github.com/righettod/code-snippets-security-utils/blob/main/src/main/java/eu/righettod/SecurityUtils.java">Source code of the class</a>.
098 */
099public class SecurityUtils {
100    /**
101     * Default constructor: Not needed as the class only provides static methods.
102     */
103    private SecurityUtils() {
104    }
105
106    /**
107     * Apply a collection of validation to verify if a provided PIN code is considered weak (easy to guess) or none.<br>
108     * This method consider that format of the PIN code is [0-9]{6,}<br>
109     * Rule to consider a PIN code as weak:
110     * <ul>
111     * <li>Length is inferior to 6 positions.</li>
112     * <li>Contain only the same number or only a sequence of zero.</li>
113     * <li>Contain sequence of following incremental or decremental numbers.</li>
114     * </ul>
115     *
116     * @param pinCode PIN code to verify.
117     * @return True only if the PIN is considered as weak.
118     */
119    public static boolean isWeakPINCode(String pinCode) {
120        boolean isWeak = true;
121        //Length is inferior to 6 positions
122        //Use "Long.parseLong(pinCode)" to cause a NumberFormatException if the PIN is not a numeric one
123        //and to ensure that the PIN is not only a sequence of zero
124        if (pinCode != null && Long.parseLong(pinCode) > 0 && pinCode.trim().length() > 5) {
125            //Contain only the same number
126            String regex = String.format("^[%s]{%s}$", pinCode.charAt(0), pinCode.length());
127            if (!Pattern.matches(regex, pinCode)) {
128                //Contain sequence of following incremental or decremental numbers
129                char previousChar = 'X';
130                boolean containSequence = false;
131                for (char c : pinCode.toCharArray()) {
132                    if (previousChar != 'X') {
133                        int previousNbr = Integer.parseInt(String.valueOf(previousChar));
134                        int currentNbr = Integer.parseInt(String.valueOf(c));
135                        if (currentNbr == (previousNbr - 1) || currentNbr == (previousNbr + 1)) {
136                            containSequence = true;
137                            break;
138                        }
139                    }
140                    previousChar = c;
141                }
142                if (!containSequence) {
143                    isWeak = false;
144                }
145            }
146        }
147        return isWeak;
148    }
149
150    /**
151     * Apply a collection of validations on a Word 97-2003 (binary format) document file provided:
152     * <ul>
153     * <li>Real Microsoft Word 97-2003 document file.</li>
154     * <li>No VBA Macro.<br></li>
155     * <li>No embedded objects.</li>
156     * </ul>
157     *
158     * @param wordFilePath Filename of the Word document file to check.
159     * @return True only if the file pass all validations.
160     * @see "https://poi.apache.org/components/"
161     * @see "https://poi.apache.org/components/document/"
162     * @see "https://poi.apache.org/components/poifs/how-to.html"
163     * @see "https://poi.apache.org/components/poifs/embeded.html"
164     * @see "https://poi.apache.org/"
165     * @see "https://mvnrepository.com/artifact/org.apache.poi/poi"
166     */
167    public static boolean isWord972003DocumentSafe(String wordFilePath) {
168        boolean isSafe = false;
169        try {
170            File wordFile = new File(wordFilePath);
171            if (wordFile.exists() && wordFile.canRead() && wordFile.isFile()) {
172                //Step 1: Try to load the file, if its fail then it imply that is not a valid Word 97-2003 format file
173                try (POIFSFileSystem fs = new POIFSFileSystem(wordFile)) {
174                    //Step 2: Check if the document contains VBA macros, in our case is not allowed
175                    VBAMacroReader macroReader = new VBAMacroReader(fs);
176                    Map<String, String> macros = macroReader.readMacros();
177                    if (macros == null || macros.isEmpty()) {
178                        //Step 3: Check if the document contains any embedded objects, in our case is not allowed
179                        //From POI documentation:
180                        //Word normally stores embedded files in subdirectories of the ObjectPool directory, itself a subdirectory of the filesystem root.
181                        //Typically, these subdirectories and named starting with an underscore, followed by 10 numbers.
182                        final List<String> embeddedObjectFound = new ArrayList<>();
183                        DirectoryEntry root = fs.getRoot();
184                        if (root.getEntryCount() > 0) {
185                            root.iterator().forEachRemaining(entry -> {
186                                if ("ObjectPool".equalsIgnoreCase(entry.getName()) && entry instanceof DirectoryEntry) {
187                                    DirectoryEntry objPoolDirectory = (DirectoryEntry) entry;
188                                    if (objPoolDirectory.getEntryCount() > 0) {
189                                        objPoolDirectory.iterator().forEachRemaining(objPoolDirectoryEntry -> {
190                                            if (objPoolDirectoryEntry instanceof DirectoryEntry) {
191                                                DirectoryEntry objPoolDirectoryEntrySubDirectoryEntry = (DirectoryEntry) objPoolDirectoryEntry;
192                                                if (objPoolDirectoryEntrySubDirectoryEntry.getEntryCount() > 0) {
193                                                    objPoolDirectoryEntrySubDirectoryEntry.forEach(objPoolDirectoryEntrySubDirectoryEntryEntry -> {
194                                                        if (objPoolDirectoryEntrySubDirectoryEntryEntry.isDocumentEntry()) {
195                                                            embeddedObjectFound.add(objPoolDirectoryEntrySubDirectoryEntryEntry.getName());
196                                                        }
197                                                    });
198                                                }
199                                            }
200                                        });
201                                    }
202                                }
203                            });
204                        }
205                        isSafe = embeddedObjectFound.isEmpty();
206                    }
207                }
208            }
209        } catch (Exception e) {
210            isSafe = false;
211        }
212        return isSafe;
213    }
214
215    /**
216     * Ensure that an XML file does not contain any External Entity, DTD or XInclude instructions.
217     *
218     * @param xmlFilePath Filename of the XML file to check.
219     * @return True only if the file pass all validations.
220     * @see "https://portswigger.net/web-security/xxe"
221     * @see "https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html#java"
222     * @see "https://docs.oracle.com/en/java/javase/13/security/java-api-xml-processing-jaxp-security-guide.html#GUID-82F8C206-F2DF-4204-9544-F96155B1D258"
223     * @see "https://www.w3.org/TR/xinclude-11/"
224     * @see "https://en.wikipedia.org/wiki/XInclude"
225     */
226    public static boolean isXMLSafe(String xmlFilePath) {
227        boolean isSafe = false;
228        try {
229            File xmlFile = new File(xmlFilePath);
230            if (xmlFile.exists() && xmlFile.canRead() && xmlFile.isFile()) {
231                //Step 1a: Verify that the XML file content does not contain any XInclude instructions
232                boolean containXInclude = Files.readAllLines(xmlFile.toPath()).stream().anyMatch(line -> line.toLowerCase(Locale.ROOT).contains(":include "));
233                if (!containXInclude) {
234                    //Step 1b: Parse the XML file, if an exception occur than it's imply that the XML specified is not a valid ones
235                    //Create an XML document builder throwing Exception if a DOCTYPE instruction is present
236                    DocumentBuilderFactory dbfInstance = DocumentBuilderFactory.newInstance();
237                    dbfInstance.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
238                    //Xerces 2 only
239                    //dbfInstance.setFeature("http://xerces.apache.org/xerces2-j/features.html#disallow-doctype-decl",true);
240                    dbfInstance.setXIncludeAware(false);
241                    DocumentBuilder builder = dbfInstance.newDocumentBuilder();
242                    //Parse the document
243                    Document doc = builder.parse(xmlFile);
244                    isSafe = (doc != null && doc.getDocumentElement() != null);
245                }
246            }
247        } catch (Exception e) {
248            isSafe = false;
249        }
250        return isSafe;
251    }
252
253
254    /**
255     * Extract all URL links from a PDF file provided.<br>
256     * This can be used to apply validation on a PDF against contained links.
257     *
258     * @param pdfFilePath pdfFilePath Filename of the PDF file to process.
259     * @return A List of URL objects that is empty if no links is found.
260     * @throws Exception If any error occurs during the processing of the PDF file.
261     * @see "https://www.gushiciku.cn/pl/21KQ"
262     * @see "https://pdfbox.apache.org/"
263     * @see "https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox"
264     */
265    public static List<URL> extractAllPDFLinks(String pdfFilePath) throws Exception {
266        final List<URL> links = new ArrayList<>();
267        File pdfFile = new File(pdfFilePath);
268        try (PDDocument document = Loader.loadPDF(pdfFile)) {
269            PDDocumentCatalog documentCatalog = document.getDocumentCatalog();
270            AnnotationFilter actionURIAnnotationFilter = new AnnotationFilter() {
271                @Override
272                public boolean accept(PDAnnotation annotation) {
273                    boolean keep = false;
274                    if (annotation instanceof PDAnnotationLink) {
275                        keep = (((PDAnnotationLink) annotation).getAction() instanceof PDActionURI);
276                    }
277                    return keep;
278                }
279            };
280            documentCatalog.getPages().forEach(page -> {
281                try {
282                    page.getAnnotations(actionURIAnnotationFilter).forEach(annotation -> {
283                        PDActionURI linkAnnotation = (PDActionURI) ((PDAnnotationLink) annotation).getAction();
284                        try {
285                            URL urlObj = new URL(linkAnnotation.getURI());
286                            if (!links.contains(urlObj)) {
287                                links.add(urlObj);
288                            }
289                        } catch (MalformedURLException e) {
290                            throw new RuntimeException(e);
291                        }
292                    });
293                } catch (Exception e) {
294                    throw new RuntimeException(e);
295                }
296            });
297        }
298        return links;
299    }
300
301    /**
302     * Apply a collection of validations on a PDF file provided:
303     * <ul>
304     * <li>Real PDF file.</li>
305     * <li>No attachments.</li>
306     * <li>No Javascript code.</li>
307     * <li>No links using action of type URI/Launch/RemoteGoTo/ImportData.</li>
308     * <li>No XFA forms in order to prevent exposure to XXE/SSRF like CVE-2025-54988.</li>
309     * </ul>
310     *
311     * @param pdfFilePath Filename of the PDF file to check.
312     * @return True only if the file pass all validations.
313     * @see "https://stackoverflow.com/a/36161267"
314     * @see "https://www.gushiciku.cn/pl/21KQ"
315     * @see "https://github.com/jonaslejon/malicious-pdf"
316     * @see "https://pdfbox.apache.org/"
317     * @see "https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox"
318     * @see "https://nvd.nist.gov/vuln/detail/CVE-2025-54988"
319     * @see "https://github.com/mgthuramoemyint/POC-CVE-2025-54988"
320     * @see "https://en.wikipedia.org/wiki/XFA"
321     */
322    public static boolean isPDFSafe(String pdfFilePath) {
323        boolean isSafe = false;
324        try {
325            File pdfFile = new File(pdfFilePath);
326            if (pdfFile.exists() && pdfFile.canRead() && pdfFile.isFile()) {
327                //Step 1: Try to load the file, if its fail then it imply that is not a valid PDF file
328                try (PDDocument document = Loader.loadPDF(pdfFile)) {
329                    //Step 2: Check if the file contains attached files, in our case is not allowed
330                    PDDocumentCatalog documentCatalog = document.getDocumentCatalog();
331                    PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary(documentCatalog);
332                    if (namesDictionary.getEmbeddedFiles() == null) {
333                        //Step 3: Check if the file contains any XFA forms
334                        PDAcroForm acroForm = documentCatalog.getAcroForm();
335                        boolean hasForm = (acroForm != null && acroForm.getXFA() != null);
336                        if (!hasForm) {
337                            //Step 4: Check if the file contains Javascript code, in our case is not allowed
338                            if (namesDictionary.getJavaScript() == null) {
339                                //Step 5: Check if the file contains links using action of type URI/Launch/RemoteGoTo/ImportData, in our case is not allowed
340                                final List<Integer> notAllowedAnnotationCounterList = new ArrayList<>();
341                                AnnotationFilter notAllowedAnnotationFilter = new AnnotationFilter() {
342                                    @Override
343                                    public boolean accept(PDAnnotation annotation) {
344                                        boolean keep = false;
345                                        if (annotation instanceof PDAnnotationLink) {
346                                            PDAnnotationLink link = (PDAnnotationLink) annotation;
347                                            PDAction action = link.getAction();
348                                            if ((action instanceof PDActionURI) || (action instanceof PDActionLaunch) || (action instanceof PDActionRemoteGoTo) || (action instanceof PDActionImportData)) {
349                                                keep = true;
350                                            }
351                                        }
352                                        return keep;
353                                    }
354                                };
355                                documentCatalog.getPages().forEach(page -> {
356                                    try {
357                                        notAllowedAnnotationCounterList.add(page.getAnnotations(notAllowedAnnotationFilter).size());
358                                    } catch (IOException e) {
359                                        throw new RuntimeException(e);
360                                    }
361                                });
362                                if (notAllowedAnnotationCounterList.stream().reduce(0, Integer::sum) == 0) {
363                                    isSafe = true;
364                                }
365                            }
366                        }
367                    }
368                }
369            }
370        } catch (Exception e) {
371            isSafe = false;
372        }
373        return isSafe;
374    }
375
376    /**
377     * Remove as much as possible metadata from the provided PDF document object.
378     *
379     * @param document PDFBox PDF document object on which metadata must be removed.
380     * @see "https://gist.github.com/righettod/d7e07443c43d393a39de741a0d920069"
381     * @see "https://pdfbox.apache.org/"
382     * @see "https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox"
383     */
384    public static void clearPDFMetadata(PDDocument document) {
385        if (document != null) {
386            PDDocumentInformation infoEmpty = new PDDocumentInformation();
387            document.setDocumentInformation(infoEmpty);
388            PDMetadata newMetadataEmpty = new PDMetadata(document);
389            document.getDocumentCatalog().setMetadata(newMetadataEmpty);
390        }
391    }
392
393
394    /**
395     * Validate that the URL provided is really a relative URL.
396     *
397     * @param targetUrl URL to validate.
398     * @return True only if the file pass all validations.
399     * @see "https://portswigger.net/web-security/ssrf"
400     * @see "https://stackoverflow.com/q/6785442"
401     */
402    public static boolean isRelativeURL(String targetUrl) {
403        boolean isValid = false;
404        //Reject any URL encoded content and URL starting with a double slash
405        //Reject any URL contains credentials or fragment to prevent potential bypasses
406        String work = targetUrl;
407        if (!work.contains("%") && !work.contains("@") && !work.contains("#") && !work.startsWith("//")) {
408            //Creation of a URL object must fail
409            try {
410                new URL(work);
411                isValid = false;
412            } catch (MalformedURLException mf) {
413                //Last check to be sure (for prod usage compile the pattern one time)
414                isValid = Pattern.compile("^/[a-z0-9]+", Pattern.CASE_INSENSITIVE).matcher(work).find();
415            }
416        }
417        return isValid;
418    }
419
420    /**
421     * Apply a collection of validations on a ZIP file provided:
422     * <ul>
423     * <li>Real ZIP file.</li>
424     * <li>Contain less than a specified level of deepness.</li>
425     * <li>Do not contain Zip-Slip entry path.</li>
426     * </ul>
427     *
428     * @param zipFilePath       Filename of the ZIP file to check.
429     * @param maxLevelDeepness  Threshold of deepness above which a ZIP archive will be rejected.
430     * @param rejectArchiveFile Flag to specify if presence of any archive entry will cause the rejection of the ZIP file.
431     * @return True only if the file pass all validations.
432     * @see "https://rules.sonarsource.com/java/type/Security%20Hotspot/RSPEC-5042"
433     * @see "https://security.snyk.io/research/zip-slip-vulnerability"
434     * @see "https://en.wikipedia.org/wiki/Zip_bomb"
435     * @see "https://github.com/ptoomey3/evilarc"
436     * @see "https://github.com/abdulfatir/ZipBomb"
437     * @see "https://www.baeldung.com/cs/zip-bomb"
438     * @see "https://thesecurityvault.com/attacks-with-zip-files-and-mitigations/"
439     * @see "https://wiki.sei.cmu.edu/confluence/display/java/IDS04-J.+Safely+extract+files+from+ZipInputStream"
440     */
441    public static boolean isZIPSafe(String zipFilePath, int maxLevelDeepness, boolean rejectArchiveFile) {
442        List<String> archiveExtensions = Arrays.asList("zip", "tar", "7z", "gz", "jar", "phar", "bz2", "tgz");
443        boolean isSafe = false;
444        try {
445            File zipFile = new File(zipFilePath);
446            if (zipFile.exists() && zipFile.canRead() && zipFile.isFile() && maxLevelDeepness > 0) {
447                //Step 1: Try to load the file, if its fail then it imply that is not a valid ZIP file
448                try (ZipFile zipArch = new ZipFile(zipFile)) {
449                    //Step 2: Parse entries
450                    long deepness = 0;
451                    ZipEntry zipEntry;
452                    String entryExtension;
453                    String zipEntryName;
454                    boolean validationsFailed = false;
455                    Enumeration<? extends ZipEntry> entries = zipArch.entries();
456                    while (entries.hasMoreElements()) {
457                        zipEntry = entries.nextElement();
458                        zipEntryName = zipEntry.getName();
459                        entryExtension = zipEntryName.substring(zipEntryName.lastIndexOf(".") + 1).toLowerCase(Locale.ROOT).trim();
460                        //Step 2a: Check if the current entry is an archive file
461                        if (rejectArchiveFile && archiveExtensions.contains(entryExtension)) {
462                            validationsFailed = true;
463                            break;
464                        }
465                        //Step 2b: Check that level of deepness is inferior to the threshold specified
466                        if (zipEntryName.contains("/")) {
467                            //Determine deepness by inspecting the entry name.
468                            //Indeed, folder will be represented like this: folder/folder/folder/
469                            //So we can count the number of "/" to identify the deepness of the entry
470                            deepness = zipEntryName.chars().filter(ch -> ch == '/').count();
471                            if (deepness > maxLevelDeepness) {
472                                validationsFailed = true;
473                                break;
474                            }
475                        }
476                        //Step 2c: Check if any entries match pattern of zip slip payload
477                        if (zipEntryName.contains("..\\") || zipEntryName.contains("../")) {
478                            validationsFailed = true;
479                            break;
480                        }
481                    }
482                    if (!validationsFailed) {
483                        isSafe = true;
484                    }
485                }
486            }
487        } catch (Exception e) {
488            isSafe = false;
489        }
490        return isSafe;
491    }
492
493    /**
494     * Identify the mime type of the content specified (array of bytes).<br>
495     * Note that it cannot be fully trusted (see the tweet '1595824709186519041' referenced), so, additional validations are required.
496     *
497     * @param content The content as an array of bytes.
498     * @return The mime type in lower case or null if it cannot be identified.
499     * @see "https://twitter.com/righettod/status/1595824709186519041"
500     * @see "https://tika.apache.org/"
501     * @see "https://mvnrepository.com/artifact/org.apache.tika/tika-core"
502     * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types"
503     * @see "https://www.iana.org/assignments/media-types/media-types.xhtml"
504     */
505    public static String identifyMimeType(byte[] content) {
506        String mimeType = null;
507        if (content != null && content.length > 0) {
508            Detector detector = new DefaultDetector(MimeTypes.getDefaultMimeTypes());
509            Metadata metadata = new Metadata();
510            try {
511                try (TemporaryResources temporaryResources = new TemporaryResources(); TikaInputStream tikaInputStream = TikaInputStream.get(new ByteArrayInputStream(content), temporaryResources, metadata)) {
512                    MediaType mt = detector.detect(tikaInputStream, metadata);
513                    if (mt != null) {
514                        mimeType = mt.toString().toLowerCase(Locale.ROOT);
515                    }
516                }
517            } catch (IOException ioe) {
518                mimeType = null;
519            }
520        }
521        return mimeType;
522    }
523
524    /**
525     * Apply a collection of validations on a string expected to be an public IP address:
526     * <ul>
527     * <li>Is a valid IP v4 or v6 address.</li>
528     * <li>Is public from an Internet perspective.</li>
529     * </ul>
530     * <br>
531     * <b>Note:</b> I often see missing such validation in the value read from HTTP request headers like "X-Forwarded-For" or "Forwarded".
532     * <br><br>
533     * <b>Note for IPv6:</b> I used documentation found so it is really experimental!
534     *
535     * @param ip String expected to be a valid IP address.
536     * @return True only if the string pass all validations.
537     * @see "https://commons.apache.org/proper/commons-validator/"
538     * @see "https://commons.apache.org/proper/commons-validator/apidocs/org/apache/commons/validator/routines/InetAddressValidator.html"
539     * @see "https://cheatsheetseries.owasp.org/cheatsheets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet.html"
540     * @see "https://cheatsheetseries.owasp.org/assets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet_Orange_Tsai_Talk.pdf"
541     * @see "https://cheatsheetseries.owasp.org/assets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet_SSRF_Bible.pdf"
542     * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For"
543     * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Forwarded"
544     * @see "https://ipcisco.com/lesson/ipv6-address/"
545     * @see "https://www.juniper.net/documentation/us/en/software/junos/interfaces-security-devices/topics/topic-map/security-interface-ipv4-ipv6-protocol.html"
546     * @see "https://docs.oracle.com/en/java/javase/21/docs/api/java.base/java/net/InetAddress.html#getByName(java.lang.String)"
547     * @see "https://www.arin.net/reference/research/statistics/address_filters/"
548     * @see "https://en.wikipedia.org/wiki/Multicast_address"
549     * @see "https://stackoverflow.com/a/5619409"
550     * @see "https://www.ripe.net/media/documents/ipv6-address-types.pdf"
551     * @see "https://www.iana.org/assignments/ipv6-unicast-address-assignments/ipv6-unicast-address-assignments.xhtml"
552     * @see "https://developer.android.com/reference/java/net/Inet6Address"
553     * @see "https://en.wikipedia.org/wiki/Unique_local_address"
554     */
555    public static boolean isPublicIPAddress(String ip) {
556        boolean isValid = false;
557        try {
558            //Quick validation on the string itself based on characters used to compose an IP v4/v6 address
559            if (Pattern.matches("[0-9a-fA-F:.]+", ip)) {
560                //If OK then use the dedicated InetAddressValidator from Apache Commons Validator
561                if (InetAddressValidator.getInstance().isValid(ip)) {
562                    //If OK then validate that is an public IP address
563                    //From Javadoc for "InetAddress.getByName": If a literal IP address is supplied, only the validity of the address format is checked.
564                    InetAddress addr = InetAddress.getByName(ip);
565                    isValid = (!addr.isAnyLocalAddress() && !addr.isLinkLocalAddress() && !addr.isLoopbackAddress() && !addr.isMulticastAddress() && !addr.isSiteLocalAddress());
566                    //If OK and the IP is an V6 one then make additional validation because the built-in Java API will let pass some V6 IP
567                    //For the prefix map, the start of the key indicates if the value is a regex or a string
568                    if (isValid && (addr instanceof Inet6Address)) {
569                        Map<String, String> prefixes = new HashMap<>();
570                        prefixes.put("REGEX_LOOPBACK", "^(0|:)+1$");
571                        prefixes.put("REGEX_UNIQUE-LOCAL-ADDRESSES", "^f(c|d)[a-f0-9]{2}:.*$");
572                        prefixes.put("STRING_LINK-LOCAL-ADDRESSES", "fe80:");
573                        prefixes.put("REGEX_TEREDO", "^2001:[0]*:.*$");
574                        prefixes.put("REGEX_BENCHMARKING", "^2001:[0]*2:.*$");
575                        prefixes.put("REGEX_ORCHID", "^2001:[0]*10:.*$");
576                        prefixes.put("STRING_DOCUMENTATION", "2001:db8:");
577                        prefixes.put("STRING_GLOBAL-UNICAST", "2000:");
578                        prefixes.put("REGEX_MULTICAST", "^ff[0-9]{2}:.*$");
579                        final List<Boolean> results = new ArrayList<>();
580                        final String ipLower = ip.trim().toLowerCase(Locale.ROOT);
581                        prefixes.forEach((addressType, expr) -> {
582                            String exprLower = expr.trim().toLowerCase();
583                            if (addressType.startsWith("STRING_")) {
584                                results.add(ipLower.startsWith(exprLower));
585                            } else {
586                                results.add(Pattern.matches(exprLower, ipLower));
587                            }
588                        });
589                        isValid = ((results.size() == prefixes.size()) && !results.contains(Boolean.TRUE));
590                    }
591                }
592            }
593        } catch (Exception e) {
594            isValid = false;
595        }
596        return isValid;
597    }
598
599    /**
600     * Compute a SHA256 hash from an input composed of a collection of strings.<br><br>
601     * This method take care to build the source string in a way to prevent this source string to be prone to abuse targeting the different parts composing it.<br><br>
602     * <p>
603     * Example of possible abuse without precautions applied during the hash calculation logic:<br>
604     * Hash of <code>SHA256("Hello", "My", "World!!!")</code> will be equals to the hash of <code>SHA256("Hell", "oMyW", "orld!!!")</code>.<br>
605     * </p>
606     * This method ensure that both hash above will be different.<br><br>
607     *
608     * <b>Note:</b> The character <code>|</code> is used, as separator, of every parts so a part is not allowed to contains this character.
609     *
610     * @param parts Ordered list of strings to use to build the input string for which the hash must be computed on. No null value is accepted on object composing the collection.
611     * @return The hash, as an array of bytes, to allow caller to convert it to the final representation wanted (HEX, Base64, etc.). If the collection passed is null or empty then the method return null.
612     * @throws Exception If any exception occurs
613     * @see "https://github.com/righettod/code-snippets-security-utils/issues/16"
614     * @see "https://pentesterlab.com/badges/codereview"
615     * @see "https://blog.trailofbits.com/2024/08/21/yolo-is-not-a-valid-hash-construction/"
616     * @see "https://www.nist.gov/publications/sha-3-derived-functions-cshake-kmac-tuplehash-and-parallelhash"
617     */
618    public static byte[] computeHashNoProneToAbuseOnParts(List<String> parts) throws Exception {
619        byte[] hash = null;
620        String separator = "|";
621        if (parts != null && !parts.isEmpty()) {
622            //Ensure that not part is null
623            if (parts.stream().anyMatch(Objects::isNull)) {
624                throw new IllegalArgumentException("No part must be null!");
625            }
626            //Ensure that the separator is absent from every part
627            if (parts.stream().anyMatch(part -> part.contains(separator))) {
628                throw new IllegalArgumentException(String.format("The character '%s', used as parts separator, must be absent from every parts!", separator));
629            }
630            MessageDigest digest = MessageDigest.getInstance("SHA-256");
631            final StringBuilder buffer = new StringBuilder(separator);
632            parts.forEach(p -> {
633                buffer.append(p).append(separator);
634            });
635            hash = digest.digest(buffer.toString().getBytes(StandardCharsets.UTF_8));
636        }
637        return hash;
638    }
639
640    /**
641     * Ensure that an XML file only uses DTD/XSD references (called System Identifier) present in the allowed list provided.<br><br>
642     * The code is based on the validation implemented into the OpenJDK 21, by the class <b><a href="https://github.com/openjdk/jdk/blob/jdk-21%2B35/src/java.prefs/share/classes/java/util/prefs/XmlSupport.java">java.util.prefs.XmlSupport</a></b>, in the method <b><a href="https://github.com/openjdk/jdk/blob/jdk-21%2B35/src/java.prefs/share/classes/java/util/prefs/XmlSupport.java#L240">loadPrefsDoc()</a></b>.<br><br>
643     * The method also ensure that no Public Identifier is used to prevent potential bypasses of the validations.
644     *
645     * @param xmlFilePath              Filename of the XML file to check.
646     * @param allowedSystemIdentifiers List of URL allowed for System Identifier specified for any XSD/DTD references.
647     * @return True only if the file pass all validations.
648     * @see "https://www.w3schools.com/xml/prop_documenttype_systemid.asp"
649     * @see "https://www.ibm.com/docs/en/integration-bus/9.0.0?topic=doctypedecl-xml-systemid"
650     * @see "https://www.liquid-technologies.com/Reference/Glossary/XML_DocType.html"
651     * @see "https://www.xml.com/pub/98/08/xmlqna0.html"
652     * @see "https://github.com/openjdk/jdk/blob/jdk-21%2B35/src/java.prefs/share/classes/java/util/prefs/XmlSupport.java#L397"
653     * @see "https://en.wikipedia.org/wiki/Formal_Public_Identifier"
654     */
655    public static boolean isXMLOnlyUseAllowedXSDorDTD(String xmlFilePath, final List<String> allowedSystemIdentifiers) {
656        boolean isSafe = false;
657        final String errorTemplate = "Non allowed %s ID detected!";
658        final String emptyFakeDTD = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><!ELEMENT dummy EMPTY>";
659        final String emptyFakeXSD = "<xs:schema xmlns:xs=\"http://www.w3.org/2001/XMLSchema\"> <xs:element name=\"dummy\"/></xs:schema>";
660
661        if (allowedSystemIdentifiers == null || allowedSystemIdentifiers.isEmpty()) {
662            throw new IllegalArgumentException("At least one SID must be specified!");
663        }
664        File xmlFile = new File(xmlFilePath);
665        if (xmlFile.exists() && xmlFile.canRead() && xmlFile.isFile()) {
666            try {
667                EntityResolver resolverValidator = (publicId, systemId) -> {
668                    if (publicId != null) {
669                        throw new SAXException(String.format(errorTemplate, "PUBLIC"));
670                    }
671                    if (!allowedSystemIdentifiers.contains(systemId)) {
672                        throw new SAXException(String.format(errorTemplate, "SYSTEM"));
673                    }
674                    //If it is OK then return a empty DTD/XSD
675                    return new InputSource(new StringReader(systemId.toLowerCase().endsWith(".dtd") ? emptyFakeDTD : emptyFakeXSD));
676                };
677                DocumentBuilderFactory dbfInstance = DocumentBuilderFactory.newInstance();
678                dbfInstance.setIgnoringElementContentWhitespace(true);
679                dbfInstance.setXIncludeAware(false);
680                dbfInstance.setValidating(false);
681                dbfInstance.setCoalescing(true);
682                dbfInstance.setIgnoringComments(false);
683                DocumentBuilder builder = dbfInstance.newDocumentBuilder();
684                builder.setEntityResolver(resolverValidator);
685                Document doc = builder.parse(xmlFile);
686                isSafe = (doc != null);
687            } catch (SAXException | IOException | ParserConfigurationException e) {
688                isSafe = false;
689            }
690        }
691
692        return isSafe;
693    }
694
695    /**
696     * Apply a collection of validations on a EXCEL CSV file provided (file was expected to be opened in Microsoft EXCEL):
697     * <ul>
698     * <li>Real CSV file.</li>
699     * <li>Do not contains any payload related to a CSV injections.</li>
700     * </ul>
701     * Ensure that, if Apache Commons CSV does not find any record then, the file will be considered as NOT safe (prevent potential bypasses).<br><br>
702     * <b>Note:</b> Record delimiter used is the <code>,</code> (comma) character. See the Apache Commons CSV reference provided for EXCEL.<br>
703     *
704     * @param csvFilePath Filename of the CSV file to check.
705     * @return True only if the file pass all validations.
706     * @see "https://commons.apache.org/proper/commons-csv/"
707     * @see "https://commons.apache.org/proper/commons-csv/apidocs/org/apache/commons/csv/CSVFormat.html#EXCEL"
708     * @see "https://www.we45.com/post/your-excel-sheets-are-not-safe-heres-how-to-beat-csv-injection"
709     * @see "https://www.whiteoaksecurity.com/blog/2020-4-23-csv-injection-whats-the-risk/"
710     * @see "https://book.hacktricks.xyz/pentesting-web/formula-csv-doc-latex-ghostscript-injection"
711     * @see "https://owasp.org/www-community/attacks/CSV_Injection"
712     * @see "https://payatu.com/blog/csv-injection-basic-to-exploit/"
713     * @see "https://cwe.mitre.org/data/definitions/1236.html"
714     */
715    public static boolean isExcelCSVSafe(String csvFilePath) {
716        boolean isSafe;
717        final AtomicInteger recordCount = new AtomicInteger();
718        final List<Character> payloadDetectionCharacters = List.of('=', '+', '@', '-', '\r', '\t');
719
720        try {
721            final List<String> payloadsIdentified = new ArrayList<>();
722            try (Reader in = new FileReader(csvFilePath)) {
723                Iterable<CSVRecord> records = CSVFormat.EXCEL.parse(in);
724                records.forEach(record -> {
725                    record.forEach(recordValue -> {
726                        if (recordValue != null && !recordValue.trim().isEmpty() && payloadDetectionCharacters.contains(recordValue.trim().charAt(0))) {
727                            payloadsIdentified.add(recordValue);
728                        }
729                        recordCount.getAndIncrement();
730                    });
731                });
732            }
733            isSafe = (payloadsIdentified.isEmpty() && recordCount.get() > 0);
734        } catch (Exception e) {
735            isSafe = false;
736        }
737
738        return isSafe;
739    }
740
741    /**
742     * Provide a way to add an integrity marker (<a href="https://en.wikipedia.org/wiki/HMAC">HMAC</a>) to a serialized object serialized using the <a href="https://www.baeldung.com/java-serialization">java native system</a> (binary).<br>
743     * The goal is to provide <b>a temporary workaround</b> to try to prevent deserialization attacks and give time to move to a text-based serialization approach.
744     *
745     * @param processingModeType Define the mode of processing i.e. protect or validate. ({@link ProcessingModeType})
746     * @param input              When the processing mode is "protect" than the expected input (string) is a java serialized object encoded in Base64 otherwise (processing mode is "validate") expected input is the output of this method when the "protect" mode was used.
747     * @param secret             Secret to use to compute the SHA256 HMAC.
748     * @return A map with the following keys: <ul><li><b>PROCESSING_MODE</b>: Processing mode used to compute the result.</li><li><b>STATUS</b>: A boolean indicating if the processing was successful or not.</li><li><b>RESULT</b>: Always contains a string representing the protected serialized object in the format <code>[SERIALIZED_OBJECT_BASE64_ENCODED]:[SERIALIZED_OBJECT_HMAC_BASE64_ENCODED]</code>.</li></ul>
749     * @throws Exception If any exception occurs.
750     * @see "https://cheatsheetseries.owasp.org/cheatsheets/Deserialization_Cheat_Sheet.html"
751     * @see "https://owasp.org/www-project-top-ten/2017/A8_2017-Insecure_Deserialization"
752     * @see "https://portswigger.net/web-security/deserialization"
753     * @see "https://www.baeldung.com/java-serialization-approaches"
754     * @see "https://www.baeldung.com/java-serialization"
755     * @see "https://cryptobook.nakov.com/mac-and-key-derivation/hmac-and-key-derivation"
756     * @see "https://en.wikipedia.org/wiki/HMAC"
757     * @see "https://smattme.com/posts/how-to-generate-hmac-signature-in-java/"
758     */
759    public static Map<String, Object> ensureSerializedObjectIntegrity(ProcessingModeType processingModeType, String input, byte[] secret) throws Exception {
760        Map<String, Object> results;
761        String resultFormatTemplate = "%s:%s";
762        //Verify input provided to be consistent
763        if (processingModeType == null) {
764            throw new IllegalArgumentException("The processing mode is mandatory!");
765        }
766        if (input == null || input.trim().isEmpty()) {
767            throw new IllegalArgumentException("Input data is mandatory!");
768        }
769        if (secret == null || secret.length == 0) {
770            throw new IllegalArgumentException("The HMAC secret is mandatory!");
771        }
772        if (processingModeType.equals(ProcessingModeType.VALIDATE) && input.split(":").length != 2) {
773            throw new IllegalArgumentException("Input data provided is invalid for the processing mode specified!");
774        }
775        //Processing
776        Base64.Decoder b64Decoder = Base64.getDecoder();
777        Base64.Encoder b64Encoder = Base64.getEncoder();
778        String hmacAlgorithm = "HmacSHA256";
779        Mac mac = Mac.getInstance(hmacAlgorithm);
780        SecretKeySpec key = new SecretKeySpec(secret, hmacAlgorithm);
781        mac.init(key);
782        results = new HashMap<>();
783        results.put("PROCESSING_MODE", processingModeType.toString());
784        switch (processingModeType) {
785            case PROTECT -> {
786                byte[] objectBytes = b64Decoder.decode(input);
787                byte[] hmac = mac.doFinal(objectBytes);
788                String encodedHmac = b64Encoder.encodeToString(hmac);
789                results.put("STATUS", Boolean.TRUE);
790                results.put("RESULT", String.format(resultFormatTemplate, input, encodedHmac));
791            }
792            case VALIDATE -> {
793                String[] parts = input.split(":");
794                byte[] objectBytes = b64Decoder.decode(parts[0].trim());
795                byte[] hmacProvided = b64Decoder.decode(parts[1].trim());
796                byte[] hmacComputed = mac.doFinal(objectBytes);
797                String encodedHmacComputed = b64Encoder.encodeToString(hmacComputed);
798                Boolean hmacIsValid = Arrays.equals(hmacProvided, hmacComputed);
799                results.put("STATUS", hmacIsValid);
800                results.put("RESULT", String.format(resultFormatTemplate, parts[0].trim(), encodedHmacComputed));
801            }
802            default -> throw new IllegalArgumentException("Not supported processing mode!");
803        }
804        return results;
805    }
806
807    /**
808     * Apply a collection of validations on a JSON string provided:
809     * <ul>
810     * <li>Real JSON structure.</li>
811     * <li>Contain less than a specified number of deepness for nested objects or arrays.</li>
812     * <li>Contain less than a specified number of items in any arrays.</li>
813     * </ul>
814     * <br>
815     * <b>Note:</b> I decided to use a parsing approach using only string processing to prevent any StackOverFlow or OutOfMemory error that can be abused.<br><br>
816     * I used the following assumption:
817     * <ul>
818     *      <li>The character <code>{</code> identify the beginning of an object.</li>
819     *      <li>The character <code>}</code> identify the end of an object.</li>
820     *      <li>The character <code>[</code> identify the beginning of an array.</li>
821     *      <li>The character <code>]</code> identify the end of an array.</li>
822     *      <li>The character <code>"</code> identify the delimiter of a string.</li>
823     *      <li>The character sequence <code>\"</code> identify the escaping of an double quote.</li>
824     * </ul>
825     *
826     * @param json                  String containing the JSON data to validate.
827     * @param maxItemsByArraysCount Maximum number of items allowed in an array.
828     * @param maxDeepnessAllowed    Maximum number nested objects or arrays allowed.
829     * @return True only if the string pass all validations.
830     * @see "https://javaee.github.io/jsonp/"
831     * @see "https://community.f5.com/discussions/technicalforum/disable-buffer-overflow-in-json-parameters/124306"
832     * @see "https://github.com/InductiveComputerScience/pbJson/issues/2"
833     */
834    public static boolean isJSONSafe(String json, int maxItemsByArraysCount, int maxDeepnessAllowed) {
835        boolean isSafe = false;
836
837        try {
838            //Step 1: Analyse the JSON string
839            int currentDeepness = 0;
840            int currentArrayItemsCount = 0;
841            int maxDeepnessReached = 0;
842            int maxArrayItemsCountReached = 0;
843            boolean currentlyInArray = false;
844            boolean currentlyInString = false;
845            int currentNestedArrayLevel = 0;
846            String jsonEscapedDoubleQuote = "\\\"";//Escaped double quote must not be considered as a string delimiter
847            String work = json.replace(jsonEscapedDoubleQuote, "'");
848            for (char c : work.toCharArray()) {
849                switch (c) {
850                    case '{': {
851                        if (!currentlyInString) {
852                            currentDeepness++;
853                        }
854                        break;
855                    }
856                    case '}': {
857                        if (!currentlyInString) {
858                            currentDeepness--;
859                        }
860                        break;
861                    }
862                    case '[': {
863                        if (!currentlyInString) {
864                            currentDeepness++;
865                            if (currentlyInArray) {
866                                currentNestedArrayLevel++;
867                            }
868                            currentlyInArray = true;
869                        }
870                        break;
871                    }
872                    case ']': {
873                        if (!currentlyInString) {
874                            currentDeepness--;
875                            currentArrayItemsCount = 0;
876                            if (currentNestedArrayLevel > 0) {
877                                currentNestedArrayLevel--;
878                            }
879                            if (currentNestedArrayLevel == 0) {
880                                currentlyInArray = false;
881                            }
882                        }
883                        break;
884                    }
885                    case '"': {
886                        currentlyInString = !currentlyInString;
887                        break;
888                    }
889                    case ',': {
890                        if (!currentlyInString && currentlyInArray) {
891                            currentArrayItemsCount++;
892                        }
893                        break;
894                    }
895                }
896                if (currentDeepness > maxDeepnessReached) {
897                    maxDeepnessReached = currentDeepness;
898                }
899                if (currentArrayItemsCount > maxArrayItemsCountReached) {
900                    maxArrayItemsCountReached = currentArrayItemsCount;
901                }
902            }
903            //Step 2: Apply validation against the value specified as limits
904            isSafe = ((maxItemsByArraysCount > maxArrayItemsCountReached) && (maxDeepnessAllowed > maxDeepnessReached));
905
906            //Step 3: If the content is safe then ensure that it is valid JSON structure using the "Java API for JSON Processing" (JSR 374) parser reference implementation.
907            if (isSafe) {
908                JsonReader reader = Json.createReader(new StringReader(json));
909                isSafe = (reader.read() != null);
910            }
911
912        } catch (Exception e) {
913            isSafe = false;
914        }
915        return isSafe;
916    }
917
918    /**
919     * Apply a collection of validations on a image file provided:
920     * <ul>
921     * <li>Real image file.</li>
922     * <li>Its mime type is into the list of allowed mime types.</li>
923     * <li>Its metadata fields do not contains any characters related to a malicious payloads.</li>
924     * </ul>
925     * <br>
926     * <b>Important note:</b> This implementation is prone to bypass using the "<b>raw insertion</b>" method documented in the <a href="https://www.synacktiv.com/en/publications/persistent-php-payloads-in-pngs-how-to-inject-php-code-in-an-image-and-keep-it-there">blog post</a> from the Synacktiv team.
927     * To handle such case, it is recommended to resize the image to remove any non image-related content, see <a href="https://github.com/righettod/document-upload-protection/blob/master/src/main/java/eu/righettod/poc/sanitizer/ImageDocumentSanitizerImpl.java#L54">here</a> for an example.<br>
928     *
929     * @param imageFilePath         Filename of the image file to check.
930     * @param imageAllowedMimeTypes List of image mime types allowed.
931     * @return True only if the file pass all validations.
932     * @see "https://commons.apache.org/proper/commons-imaging/"
933     * @see "https://commons.apache.org/proper/commons-imaging/formatsupport.html"
934     * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types/Common_types"
935     * @see "https://www.iana.org/assignments/media-types/media-types.xhtml#image"
936     * @see "https://www.synacktiv.com/en/publications/persistent-php-payloads-in-pngs-how-to-inject-php-code-in-an-image-and-keep-it-there"
937     * @see "https://cheatsheetseries.owasp.org/cheatsheets/File_Upload_Cheat_Sheet.html"
938     * @see "https://github.com/righettod/document-upload-protection/blob/master/src/main/java/eu/righettod/poc/sanitizer/ImageDocumentSanitizerImpl.java"
939     * @see "https://exiftool.org/examples.html"
940     * @see "https://en.wikipedia.org/wiki/List_of_file_signatures"
941     * @see "https://hexed.it/"
942     * @see "https://github.com/sighook/pixload"
943     */
944    public static boolean isImageSafe(String imageFilePath, List<String> imageAllowedMimeTypes) {
945        boolean isSafe = false;
946        Pattern payloadDetectionRegex = Pattern.compile("[<>${}`]+", Pattern.CASE_INSENSITIVE);
947        try {
948            File imgFile = new File(imageFilePath);
949            if (imgFile.exists() && imgFile.canRead() && imgFile.isFile() && !imageAllowedMimeTypes.isEmpty()) {
950                final byte[] imgBytes = Files.readAllBytes(imgFile.toPath());
951                //Step 1: Check the mime type of the file against the allowed ones
952                ImageInfo imgInfo = Imaging.getImageInfo(imgBytes);
953                if (imageAllowedMimeTypes.contains(imgInfo.getMimeType())) {
954                    //Step 2: Load the image into an object using the Image API
955                    BufferedImage imgObject = Imaging.getBufferedImage(imgBytes);
956                    if (imgObject != null && imgObject.getWidth() > 0 && imgObject.getHeight() > 0) {
957                        //Step 3: Check the metadata if the image format support it - Highly experimental
958                        List<String> metadataWithPayloads = new ArrayList<>();
959                        final ImageMetadata imgMetadata = Imaging.getMetadata(imgBytes);
960                        if (imgMetadata != null) {
961                            imgMetadata.getItems().forEach(item -> {
962                                String metadata = item.toString();
963                                if (payloadDetectionRegex.matcher(metadata).find()) {
964                                    metadataWithPayloads.add(metadata);
965                                }
966                            });
967                        }
968                        isSafe = metadataWithPayloads.isEmpty();
969                    }
970                }
971            }
972        } catch (Exception e) {
973            isSafe = false;
974        }
975        return isSafe;
976    }
977
978    /**
979     * Rewrite the input file to remove any embedded files that is not embedded using a methods supported by the official format of the file.<br>
980     * Example: a file can be embedded by adding it to the end of the source file, see the reference provided for details.
981     *
982     * @param inputFilePath Filename of the file to clean up.
983     * @param inputFileType Type of the file provided.
984     * @return A array of bytes with the cleaned file.
985     * @throws IllegalArgumentException If an invalid parameter is passed
986     * @throws Exception                If any technical error during the cleaning processing
987     * @see "https://www.synacktiv.com/en/publications/persistent-php-payloads-in-pngs-how-to-inject-php-code-in-an-image-and-keep-it-there"
988     * @see "https://github.com/righettod/toolbox-pentest-web/tree/master/misc"
989     * @see "https://github.com/righettod/toolbox-pentest-web?tab=readme-ov-file#misc"
990     * @see "https://stackoverflow.com/a/13605411"
991     */
992    public static byte[] sanitizeFile(String inputFilePath, InputFileType inputFileType) throws Exception {
993        ByteArrayOutputStream sanitizedContent = new ByteArrayOutputStream();
994        File inputFile = new File(inputFilePath);
995        if (!inputFile.exists() || !inputFile.canRead() || !inputFile.isFile()) {
996            throw new IllegalArgumentException("Cannot read the content of the input file!");
997        }
998        switch (inputFileType) {
999            case PDF -> {
1000                try (PDDocument document = Loader.loadPDF(inputFile)) {
1001                    document.save(sanitizedContent);
1002                }
1003            }
1004            case IMAGE -> {
1005                // Load the original image
1006                BufferedImage originalImage = ImageIO.read(inputFile);
1007                String originalFormat = identifyMimeType(Files.readAllBytes(inputFile.toPath())).split("/")[1].trim();
1008                // Check that image has been successfully loaded
1009                if (originalImage == null) {
1010                    throw new IOException("Cannot load the original image !");
1011                }
1012                // Get current Width and Height of the image
1013                int originalWidth = originalImage.getWidth(null);
1014                int originalHeight = originalImage.getHeight(null);
1015                // Resize the image by removing 1px on Width and Height
1016                Image resizedImage = originalImage.getScaledInstance(originalWidth - 1, originalHeight - 1, Image.SCALE_SMOOTH);
1017                // Resize the resized image by adding 1px on Width and Height - In fact set image to is initial size
1018                Image initialSizedImage = resizedImage.getScaledInstance(originalWidth, originalHeight, Image.SCALE_SMOOTH);
1019                // Save image to a bytes buffer
1020                int bufferedImageType = BufferedImage.TYPE_INT_ARGB;//By default use a format supporting transparency
1021                if ("jpeg".equalsIgnoreCase(originalFormat) || "bmp".equalsIgnoreCase(originalFormat)) {
1022                    bufferedImageType = BufferedImage.TYPE_INT_RGB;
1023                }
1024                BufferedImage sanitizedImage = new BufferedImage(initialSizedImage.getWidth(null), initialSizedImage.getHeight(null), bufferedImageType);
1025                Graphics2D drawer = sanitizedImage.createGraphics();
1026                drawer.drawImage(initialSizedImage, 0, 0, null);
1027                drawer.dispose();
1028                ImageIO.write(sanitizedImage, originalFormat, sanitizedContent);
1029            }
1030            default -> throw new IllegalArgumentException("Type of file not supported !");
1031        }
1032        if (sanitizedContent.size() == 0) {
1033            throw new IOException("An error occur during the rewrite operation!");
1034        }
1035        return sanitizedContent.toByteArray();
1036    }
1037
1038    /**
1039     * Apply a collection of validations on a string expected to be an email address:
1040     * <ul>
1041     * <li>Is a valid email address, from a parser perspective, following RFCs on email addresses.</li>
1042     * <li>Is not using "Encoded-word" format.</li>
1043     * <li>Is not using comment format.</li>
1044     * <li>Is not using "Punycode" format.</li>
1045     * <li>Is not using UUCP style addresses.</li>
1046     * <li>Is not using address literals.</li>
1047     * <li>Is not using source routes.</li>
1048     * <li>Is not using the "percent hack".</li>
1049     * </ul><br>
1050     * This is based on the research work from <a href="https://portswigger.net/research/gareth-heyes">Gareth Heyes</a> added in references (Portswigger).<br><br>
1051     *
1052     * <b>Note:</b> The notion of valid, here, is to take from a secure usage of the data perspective.
1053     *
1054     * @param addr String expected to be a valid email address.
1055     * @return True only if the string pass all validations.
1056     * @see "https://commons.apache.org/proper/commons-validator/"
1057     * @see "https://commons.apache.org/proper/commons-validator/apidocs/org/apache/commons/validator/routines/EmailValidator.html"
1058     * @see "https://datatracker.ietf.org/doc/html/rfc2047#section-2"
1059     * @see "https://portswigger.net/research/splitting-the-email-atom"
1060     * @see "https://www.jochentopf.com/email/address.html"
1061     * @see "https://en.wikipedia.org/wiki/Email_address"
1062     */
1063    public static boolean isEmailAddress(String addr) {
1064        boolean isValid = false;
1065        String work = addr.toLowerCase(Locale.ROOT);
1066        Pattern encodedWordRegex = Pattern.compile("[=?]+", Pattern.CASE_INSENSITIVE);
1067        Pattern forbiddenCharacterRegex = Pattern.compile("[():!%\\[\\],;]+", Pattern.CASE_INSENSITIVE);
1068        try {
1069            //Start with the use of the dedicated EmailValidator from Apache Commons Validator
1070            if (EmailValidator.getInstance(true, true).isValid(work)) {
1071                //If OK then validate it does not contains "Encoded-word" patterns using an aggressive approach
1072                if (!encodedWordRegex.matcher(work).find()) {
1073                    //If OK then validate it does not contains punycode
1074                    if (!work.contains("xn--")) {
1075                        //If OK then validate it does not use:
1076                        // UUCP style addresses,
1077                        // Comment format,
1078                        // Address literals,
1079                        // Source routes,
1080                        // The percent hack.
1081                        if (!forbiddenCharacterRegex.matcher(work).find()) {
1082                            isValid = true;
1083                        }
1084                    }
1085                }
1086            }
1087        } catch (Exception e) {
1088            isValid = false;
1089        }
1090        return isValid;
1091    }
1092
1093    /**
1094     * The <a href="https://www.stet.eu/en/psd2/">PSD2 STET</a> specification require to use <a href="https://datatracker.ietf.org/doc/draft-cavage-http-signatures/">HTTP Signature</a>.
1095     * <br>
1096     * Section <b>3.5.1.2</b> of the document <a href="https://www.stet.eu/assets/files/PSD2/1-6-3/api-dsp2-stet-v1.6.3.1-part-1-framework.pdf">Documentation Framework</a> version <b>1.6.3</b>.
1097     * <br>
1098     * The problem is that, by design, the HTTP Signature specification is prone to blind SSRF.
1099     * <br>
1100     * URL example taken from the STET specification: <code>https://path.to/myQsealCertificate_714f8154ec259ac40b8a9786c9908488b2582b68b17e865fede4636d726b709f</code>.
1101     * <br>
1102     * The objective of this code is to try to decrease the "exploitability/interest" of this SSRF for an attacker.
1103     *
1104     * @param certificateUrl Url pointing to a Qualified Certificate (QSealC) encoded in PEM format and respecting the ETSI/TS119495 technical Specification .
1105     * @return TRUE only if the url point to a Qualified Certificate in PEM format.
1106     * @see "https://www.stet.eu/en/psd2/"
1107     * @see "https://www.stet.eu/assets/files/PSD2/1-6-3/api-dsp2-stet-v1.6.3.1-part-1-framework.pdf"
1108     * @see "https://datatracker.ietf.org/doc/draft-cavage-http-signatures/"
1109     * @see "https://datatracker.ietf.org/doc/rfc9421/"
1110     * @see "https://openjdk.org/groups/net/httpclient/intro.html"
1111     * @see "https://docs.oracle.com/en/java/javase/21/docs/api/java.net.http/java/net/http/package-summary.html"
1112     * @see "https://portswigger.net/web-security/ssrf"
1113     * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control"
1114     */
1115    public static boolean isPSD2StetSafeCertificateURL(String certificateUrl) {
1116        boolean isValid = false;
1117        long connectionTimeoutInSeconds = 10;
1118        String userAgent = "PSD2-STET-HTTPSignature-CertificateRequest";
1119        try {
1120            //1. Ensure that the URL end with the SHA-256 fingerprint encoded in HEX of the certificate like requested by STET
1121            if (certificateUrl != null && certificateUrl.lastIndexOf("_") != -1) {
1122                String digestPart = certificateUrl.substring(certificateUrl.lastIndexOf("_") + 1);
1123                if (Pattern.matches("^[0-9a-f]{64}$", digestPart)) {
1124                    //2. Ensure that the URL is a valid url by creating a instance of the class URI
1125                    URI uri = URI.create(certificateUrl);
1126                    //3. Require usage of HTTPS and reject any url containing query parameters
1127                    if ("https".equalsIgnoreCase(uri.getScheme()) && uri.getQuery() == null) {
1128                        //4. Perform a HTTP HEAD request in order to get the content type of the remote resource
1129                        //and limit the interest to use the SSRF because to pass the check the url need to:
1130                        //- Do not having any query parameters.
1131                        //- Use HTTPS protocol.
1132                        //- End with a string having the format "_[0-9a-f]{64}".
1133                        //- Trigger the malicious action that the attacker want but with a HTTP HEAD without any redirect and parameters.
1134                        HttpResponse<String> response;
1135                        try (HttpClient client = HttpClient.newBuilder().followRedirects(HttpClient.Redirect.NEVER).build()) {
1136                            HttpRequest request = HttpRequest.newBuilder().uri(uri).timeout(Duration.ofSeconds(connectionTimeoutInSeconds)).method("HEAD", HttpRequest.BodyPublishers.noBody()).header("User-Agent", userAgent)//To provide an hint to the target about the initiator of the request
1137                                    .header("Cache-Control", "no-store, max-age=0")//To prevent caching issues or abuses
1138                                    .build();
1139                            response = client.send(request, HttpResponse.BodyHandlers.ofString());
1140                            if (response.statusCode() == 200) {
1141                                //5. Ensure that the response content type is "text/plain"
1142                                Optional<String> contentType = response.headers().firstValue("Content-Type");
1143                                isValid = (contentType.isPresent() && contentType.get().trim().toLowerCase(Locale.ENGLISH).startsWith("text/plain"));
1144                            }
1145                        }
1146                    }
1147                }
1148            }
1149        } catch (Exception e) {
1150            isValid = false;
1151        }
1152        return isValid;
1153    }
1154
1155    /**
1156     * Perform sequential URL decoding operations against a URL encoded data until the data is not URL encoded anymore or if the specified threshold is reached.
1157     *
1158     * @param encodedData            URL encoded data.
1159     * @param decodingRoundThreshold Threshold above which decoding will fail.
1160     * @return The decoded data.
1161     * @throws SecurityException If the threshold is reached.
1162     * @see "https://en.wikipedia.org/wiki/Percent-encoding"
1163     * @see "https://owasp.org/www-community/Double_Encoding"
1164     * @see "https://portswigger.net/web-security/essential-skills/obfuscating-attacks-using-encodings"
1165     * @see "https://capec.mitre.org/data/definitions/120.html"
1166     */
1167    public static String applyURLDecoding(String encodedData, int decodingRoundThreshold) throws SecurityException {
1168        if (decodingRoundThreshold < 1) {
1169            throw new IllegalArgumentException("Threshold must be a positive number !");
1170        }
1171        if (encodedData == null) {
1172            throw new IllegalArgumentException("Data provided must not be null !");
1173        }
1174        Charset charset = StandardCharsets.UTF_8;
1175        int currentDecodingRound = 0;
1176        boolean isFinished = false;
1177        String currentRoundData = encodedData;
1178        String previousRoundData = encodedData;
1179        while (!isFinished) {
1180            if (currentDecodingRound > decodingRoundThreshold) {
1181                throw new SecurityException(String.format("Decoding round threshold of %s reached!", decodingRoundThreshold));
1182            }
1183            currentRoundData = URLDecoder.decode(currentRoundData, charset);
1184            isFinished = currentRoundData.equals(previousRoundData);
1185            previousRoundData = currentRoundData;
1186            currentDecodingRound++;
1187        }
1188        return currentRoundData;
1189    }
1190
1191    /**
1192     * Apply a collection of validations on a string expected to be an system file/folder path:
1193     * <ul>
1194     * <li>Does not contains path traversal payload.</li>
1195     * <li>The canonical path is equals to the absolute path.</li>
1196     * </ul><br>
1197     *
1198     * @param path String expected to be a valid system file/folder path.
1199     * @return True only if the string pass all validations.
1200     * @see "https://portswigger.net/web-security/file-path-traversal"
1201     * @see "https://learn.snyk.io/lesson/directory-traversal/"
1202     * @see "https://capec.mitre.org/data/definitions/126.html"
1203     * @see "https://owasp.org/www-community/attacks/Path_Traversal"
1204     */
1205    public static boolean isPathSafe(String path) {
1206        boolean isSafe = false;
1207        int decodingRoundThreshold = 3;
1208        try {
1209            if (path != null && !path.isEmpty()) {
1210                //URL decode the path if case of data coming from a web context
1211                String decodedPath = applyURLDecoding(path, decodingRoundThreshold);
1212                //Ensure that no path traversal expression is present
1213                if (!decodedPath.contains("..")) {
1214                    File f = new File(decodedPath);
1215                    String canonicalPath = f.getCanonicalPath();
1216                    String absolutePath = f.getAbsolutePath();
1217                    isSafe = canonicalPath.equals(absolutePath);
1218                }
1219            }
1220        } catch (Exception e) {
1221            isSafe = false;
1222        }
1223        return isSafe;
1224    }
1225
1226    /**
1227     * Identify if an XML contains any XML comments or have any XSL processing instructions.<br>
1228     * Stream reader based parsing is used to support large XML tree.
1229     *
1230     * @param xmlFilePath Filename of the XML file to check.
1231     * @return True only if XML comments or XSL processing instructions are identified.
1232     * @see "https://www.tutorialspoint.com/xml/xml_processing.htm"
1233     * @see "https://docs.oracle.com/en/java/javase/21/docs/api/java.xml/javax/xml/stream/XMLInputFactory.html"
1234     * @see "https://portswigger.net/kb/issues/00400700_xml-entity-expansion"
1235     * @see "https://www.w3.org/Style/styling-XML.en.html"
1236     */
1237    public static boolean isXMLHaveCommentsOrXSLProcessingInstructions(String xmlFilePath) {
1238        boolean itemsDetected = false;
1239        try {
1240            //Ensure that the parser will not be prone XML external entity (XXE) injection or XML entity expansion (XEE) attacks
1241            XMLInputFactory xmlInputFactory = XMLInputFactory.newFactory();
1242            xmlInputFactory.setProperty(XMLInputFactory.SUPPORT_DTD, false);
1243            xmlInputFactory.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, "");
1244            xmlInputFactory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, false);
1245            xmlInputFactory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false);
1246
1247            //Parse file
1248            try (FileInputStream fis = new FileInputStream(xmlFilePath)) {
1249                XMLStreamReader reader = xmlInputFactory.createXMLStreamReader(fis);
1250                int eventType;
1251                while (reader.hasNext() && !itemsDetected) {
1252                    eventType = reader.next();
1253                    if (eventType == XMLEvent.COMMENT) {
1254                        itemsDetected = true;
1255                    } else if (eventType == XMLEvent.PROCESSING_INSTRUCTION && "xml-stylesheet".equalsIgnoreCase(reader.getPITarget())) {
1256                        itemsDetected = true;
1257                    }
1258                }
1259            }
1260        } catch (Exception e) {
1261            //In case of error then assume that the check failed
1262            itemsDetected = true;
1263        }
1264        return itemsDetected;
1265    }
1266
1267
1268    /**
1269     * Perform a set of additional validations against a JWT token:
1270     * <ul>
1271     *     <li>Do not use the <b>NONE</b> signature algorithm.</li>
1272     *     <li>Have a <a href="https://www.iana.org/assignments/jwt/jwt.xhtml">EXP claim</a> defined.</li>
1273     *     <li>The token identifier (<a href="https://www.iana.org/assignments/jwt/jwt.xhtml">JTI claim</a>) is NOT part of the list of revoked token.</li>
1274     *     <li>Match the expected type of token: ACCESS or ID or REFRESH.</li>
1275     * </ul>
1276     *
1277     * @param token               JWT token for which <b>signature was already validated</b> and on which a set of additional validations will be applied.
1278     * @param expectedTokenType   The type of expected token using the enumeration provided.
1279     * @param revokedTokenJTIList A list of token identifier (<b>JTI</b> claim) referring to tokens that were revoked and to which the JTI claim of the token will be compared to.
1280     * @return True only the token pass all the validations.
1281     * @see "https://www.iana.org/assignments/jwt/jwt.xhtml"
1282     * @see "https://auth0.com/docs/secure/tokens/access-tokens"
1283     * @see "https://auth0.com/docs/secure/tokens/id-tokens"
1284     * @see "https://auth0.com/docs/secure/tokens/refresh-tokens"
1285     * @see "https://auth0.com/blog/id-token-access-token-what-is-the-difference/"
1286     * @see "https://jwt.io/libraries?language=Java"
1287     * @see "https://pentesterlab.com/blog/secure-jwt-library-design"
1288     * @see "https://github.com/auth0/java-jwt"
1289     */
1290    public static boolean applyJWTExtraValidation(DecodedJWT token, TokenType expectedTokenType, List<String> revokedTokenJTIList) {
1291        boolean isValid = false;
1292        TokenType tokenType;
1293        try {
1294            if (!"none".equalsIgnoreCase(token.getAlgorithm().trim())) {
1295                if (!token.getClaim("exp").isMissing() && token.getExpiresAt() != null) {
1296                    String jti = token.getId();
1297                    if (jti != null && !jti.trim().isEmpty()) {
1298                        boolean jtiIsRevoked = revokedTokenJTIList.stream().anyMatch(jti::equalsIgnoreCase);
1299                        if (!jtiIsRevoked) {
1300                            //Determine the token type based on the presence of specifics claims
1301                            if (!token.getClaim("scope").isMissing()) {
1302                                tokenType = TokenType.ACCESS;
1303                            } else if (!token.getClaim("name").isMissing() || !token.getClaim("email").isMissing()) {
1304                                tokenType = TokenType.ID;
1305                            } else {
1306                                tokenType = TokenType.REFRESH;
1307                            }
1308                            isValid = (tokenType.equals(expectedTokenType));
1309                        }
1310                    }
1311                }
1312            }
1313
1314        } catch (Exception e) {
1315            //In case of error then assume that the check failed
1316            isValid = false;
1317        }
1318        return isValid;
1319    }
1320
1321    /**
1322     * Apply a validations on a regular expression to ensure that is not prone to the ReDOS attack.
1323     * <br>If your technology is supported by <a href="https://github.com/doyensec/regexploit">regexploit</a> then <b>use it instead of this method!</b>
1324     * <br>Indeed, the <a href="https://www.doyensec.com/">Doyensec</a> team has made an intensive and amazing work on this topic and created this effective tool.
1325     *
1326     * @param regex                       String expected to be a valid regular expression (regex).
1327     * @param data                        Test data on which the regular expression is executed for the test.
1328     * @param maximumRunningTimeInSeconds Optional parameter to specify a number of seconds above which a regex execution time is considered as not safe (default to 4 seconds when not specified).
1329     * @return True only if the string pass all validations.
1330     * @see "https://github.blog/security/how-to-fix-a-redos/"
1331     * @see "https://learn.snyk.io/lesson/redos"
1332     * @see "https://rules.sonarsource.com/java/RSPEC-2631/"
1333     * @see "https://github.com/doyensec/regexploit"
1334     * @see "https://github.com/makenowjust-labs/recheck"
1335     * @see "https://github.com/tjenkinson/redos-detector"
1336     * @see "https://wiki.owasp.org/images/2/23/OWASP_IL_2009_ReDoS.pdf"
1337     * @see "https://owasp.org/www-community/attacks/Regular_expression_Denial_of_Service_-_ReDoS"
1338     */
1339    public static boolean isRegexSafe(String regex, String data, Optional<Integer> maximumRunningTimeInSeconds) {
1340        Objects.requireNonNull(maximumRunningTimeInSeconds, "Use 'Optional.empty()' to leverage the default value.");
1341        Objects.requireNonNull(data, "A sample data is needed to perform the test.");
1342        Objects.requireNonNull(regex, "A regular expression is needed to perform the test.");
1343        boolean isSafe = false;
1344        int executionTimeout = maximumRunningTimeInSeconds.orElse(4);
1345        ExecutorService executor = Executors.newSingleThreadExecutor();
1346        try {
1347            Callable<Boolean> task = () -> {
1348                Pattern pattern = Pattern.compile(regex);
1349                return pattern.matcher(data).matches();
1350            };
1351            List<Future<Boolean>> tasks = executor.invokeAll(List.of(task), executionTimeout, TimeUnit.SECONDS);
1352            if (!tasks.getFirst().isCancelled()) {
1353                isSafe = true;
1354            }
1355        } catch (Exception e) {
1356            isSafe = false;
1357        } finally {
1358            executor.shutdownNow();
1359        }
1360        return isSafe;
1361    }
1362
1363    /**
1364     * Compute a UUID version 7 without using any external dependency.<br><br>
1365     * <b>Below are my personal point of view and perhaps I'm totally wrong!</b>
1366     * <br><br>
1367     * Why such method?
1368     * <ul>
1369     * <li>Java inferior or equals to 21 does not supports natively the generation of an UUID version 7.</li>
1370     * <li>Import a library just to generate such value is overkill for me.</li>
1371     * <li>Library that I have found, generating such version of an UUID, are not provided by entities commonly used in the java world, such as the SPRING framework provider.</li>
1372     * </ul>
1373     * <br>
1374     * <b>Full credits for this implementation goes to the authors and contributors of the <a href="https://github.com/nalgeon/uuidv7">UUIDv7</a> project.</b>
1375     * <br><br>
1376     * Below are the java libraries that I have found but, for which, I do not trust enough the provider to use them directly:
1377     * <ul>
1378     *     <li><a href="https://github.com/cowtowncoder/java-uuid-generator">java-uuid-generator</a></li>
1379     *     <li><a href="https://github.com/f4b6a3/uuid-creator">uuid-creator</a></li>
1380     * </ul>
1381     *
1382     * @return A UUID object representing the UUID v7.
1383     * @see "https://uuid7.com/"
1384     * @see "https://antonz.org/uuidv7/"
1385     * @see "https://mccue.dev/pages/3-11-25-life-altering-postgresql-patterns"
1386     * @see "https://www.ietf.org/archive/id/draft-peabody-dispatch-new-uuid-format-04.html#name-uuid-version-7"
1387     * @see "https://www.baeldung.com/java-generating-time-based-uuids"
1388     * @see "https://en.wikipedia.org/wiki/Universally_unique_identifier"
1389     * @see "https://buildkite.com/resources/blog/goodbye-integers-hello-uuids/"
1390     */
1391    public static UUID computeUUIDv7() {
1392        SecureRandom secureRandom = new SecureRandom();
1393        // Generate truly random bytes
1394        byte[] value = new byte[16];
1395        secureRandom.nextBytes(value);
1396        // Get current timestamp in milliseconds
1397        ByteBuffer timestamp = ByteBuffer.allocate(Long.BYTES);
1398        timestamp.putLong(System.currentTimeMillis());
1399        // Create the TIMESTAMP part of the UUID
1400        System.arraycopy(timestamp.array(), 2, value, 0, 6);
1401        // Create the VERSION and the VARIANT parts of the UUID
1402        value[6] = (byte) ((value[6] & 0x0F) | 0x70);
1403        value[8] = (byte) ((value[8] & 0x3F) | 0x80);
1404        //Create the HIGH and LOW parts of the UUID
1405        ByteBuffer buf = ByteBuffer.wrap(value);
1406        long high = buf.getLong();
1407        long low = buf.getLong();
1408        //Create and return the UUID object
1409        UUID uuidv7 = new UUID(high, low);
1410        return uuidv7;
1411    }
1412
1413    /**
1414     * Ensure that an XSD file does not contain any include/import/redefine instruction (prevent exposure to SSRF).
1415     *
1416     * @param xsdFilePath Filename of the XSD file to check.
1417     * @return True only if the file pass all validations.
1418     * @see "https://portswigger.net/web-security/ssrf"
1419     * @see "https://www.w3schools.com/Xml/el_import.asp"
1420     * @see "https://www.w3schools.com/xml/el_include.asp"
1421     * @see "https://www.linkedin.com/posts/righettod_appsec-appsecurity-java-activity-7344048434326188053-6Ru9"
1422     * @see "https://docs.oracle.com/en/java/javase/21/docs/api/java.xml/javax/xml/validation/SchemaFactory.html#setProperty(java.lang.String,java.lang.Object)"
1423     */
1424    public static boolean isXSDSafe(String xsdFilePath) {
1425        boolean isSafe = false;
1426        try {
1427            File xsdFile = new File(xsdFilePath);
1428            if (xsdFile.exists() && xsdFile.canRead() && xsdFile.isFile()) {
1429                //Parse the XSD file, if an exception occur then it's imply that the XSD specified is not a valid ones
1430                //Create an schema factory throwing Exception if a external schema is specified
1431                SchemaFactory schemaFactory = SchemaFactory.newDefaultInstance();
1432                schemaFactory.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, "");
1433                schemaFactory.setProperty(XMLConstants.ACCESS_EXTERNAL_SCHEMA, "");
1434                //Parse the schema
1435                Schema schema = schemaFactory.newSchema(xsdFile);
1436                isSafe = (schema != null);
1437            }
1438        } catch (Exception e) {
1439            isSafe = false;
1440        }
1441        return isSafe;
1442    }
1443
1444
1445    /**
1446     * Extract all sensitive information from a string provided.<br>
1447     * This can be used to identify any sensitive information into a <a href="https://cwe.mitre.org/data/definitions/532.html">message expected to be written in a log</a> and then replace every sensitive values by an obfuscated ones.<br><br>
1448     * For the luxembourg national identification number, this method focus on detecting identifiers for a physical entity (people) and not a moral one (company).<br><br>
1449     * I delegated the validation of the IBAN to a dedicated library (<a href="https://github.com/arturmkrtchyan/iban4j">iban4j</a>) to not "reinvent the wheel" and then introduce buggy validation myself. I used <b>iban4j</b> over the <b><a href="https://commons.apache.org/proper/commons-validator/apidocs/org/apache/commons/validator/routines/IBANValidator.html">IBANValidator</a></b> class from the <a href="https://commons.apache.org/proper/commons-validator/"><b>Apache Commons Validator</b></a> library because <b>iban4j</b> perform a full official IBAN specification validation so its reduce risks of false-positives by ensuring that an IBAN detected is a real IBAN.<br><br>
1450     * Same thing and reason regarding the validation of the bank card PAN using the  class <a href="https://commons.apache.org/proper/commons-validator/apidocs/org/apache/commons/validator/routines/CreditCardValidator.html">CreditCardValidator</a> from the <b>Apache Commons Validator</b> library.
1451     *
1452     * @param content String in which sensitive information must be searched.
1453     * @return A map with the collection of identified sensitive information gathered by sensitive information type. If nothing is found then the map is empty. A type of sensitive information is only present if there is at least one item found. A set is used to not store duplicates occurrence of the same sensitive information.
1454     * @throws Exception If any error occurs during the processing.
1455     * @see "https://guichet.public.lu/en/citoyens/citoyennete/registre-national/identification/demande-numero-rnpp.html"
1456     * @see "https://cnpd.public.lu/fr/decisions-avis/2009/identifiant-unique.html"
1457     * @see "https://cnpd.public.lu/content/dam/cnpd/fr/decisions-avis/2009/identifiant-unique/48_2009.pdf"
1458     * @see "https://en.wikipedia.org/wiki/International_Bank_Account_Number"
1459     * @see "https://www.iban.com/structure"
1460     * @see "https://github.com/arturmkrtchyan/iban4j"
1461     * @see "https://cwe.mitre.org/data/definitions/532.html"
1462     * @see "https://www.baeldung.com/logback-mask-sensitive-data"
1463     * @see "https://en.wikipedia.org/wiki/Payment_card_number"
1464     * @see "https://commons.apache.org/proper/commons-validator/apidocs/org/apache/commons/validator/routines/CreditCardValidator.html"
1465     * @see "https://commons.apache.org/proper/commons-validator/"
1466     */
1467    public static Map<SensitiveInformationType, Set<String>> extractAllSensitiveInformation(String content) throws Exception {
1468        CreditCardValidator creditCardValidator = CreditCardValidator.genericCreditCardValidator();
1469        Pattern nationalIdentifierRegex = Pattern.compile("([0-9]{13})");
1470        Pattern ibanNonHumanFormattedRegex = Pattern.compile("([A-Z]{2}[0-9]{2}[A-Z0-9]{11,30})", Pattern.CASE_INSENSITIVE);
1471        Pattern ibanHumanFormattedRegex = Pattern.compile("([A-Z]{2}[0-9]{2}(?:\\s[A-Z0-9]{4}){2,7}\\s[A-Z0-9]{1,4})", Pattern.CASE_INSENSITIVE);
1472        Pattern panRegex = Pattern.compile("((?:\\d[ -]*?){13,19})");
1473        Map<SensitiveInformationType, Set<String>> data = new HashMap<>();
1474        data.put(SensitiveInformationType.LUXEMBOURG_NATIONAL_IDENTIFICATION_NUMBER, new HashSet<>());
1475        data.put(SensitiveInformationType.IBAN, new HashSet<>());
1476        data.put(SensitiveInformationType.BANK_CARD_PAN, new HashSet<>());
1477
1478        if (content != null && !content.isBlank()) {
1479            /* Step 1: Search for LU national identifier */
1480            //A national identifier have the following structure: [BIRTHDATE_YEAR_YYYY][BIRTHDATE_MONTH_MM][BIRTHDATE_DAY_DD][FIVE_INTEGER]
1481            //Define minimal and maximal birth year base on current year
1482            //Assume people live less than 120 years
1483            int maxBirthYear = LocalDate.now(ZoneId.of("Europe/Luxembourg")).getYear();
1484            int minBirthYear = maxBirthYear - 120;
1485            Matcher matcher = nationalIdentifierRegex.matcher(content);
1486            String nationalIdentierFull;
1487            int nationalIdentierYear, nationalIdentierMonth, nationalIdentierDay;
1488            while (matcher.find()) {
1489                nationalIdentierFull = matcher.group(1);
1490                //Check that the string is a valid national identifier and if yes then add it
1491                nationalIdentierYear = Integer.parseInt(nationalIdentierFull.substring(0, 4));
1492                nationalIdentierMonth = Integer.parseInt(nationalIdentierFull.substring(4, 6));
1493                nationalIdentierDay = Integer.parseInt(nationalIdentierFull.substring(6, 8));
1494                if (nationalIdentierYear >= minBirthYear && nationalIdentierYear <= maxBirthYear) {
1495                    if (nationalIdentierMonth >= 1 && nationalIdentierMonth <= 12) {
1496                        if (YearMonth.of(nationalIdentierYear, nationalIdentierMonth).isValidDay(nationalIdentierDay)) {
1497                            data.get(SensitiveInformationType.LUXEMBOURG_NATIONAL_IDENTIFICATION_NUMBER).add(nationalIdentierFull);
1498                        }
1499                    }
1500                }
1501            }
1502
1503            /* Step 2a: Search for IBAN that are non human formatted */
1504            matcher = ibanNonHumanFormattedRegex.matcher(content);
1505            String iban, ibanUpperCased;
1506            while (matcher.find()) {
1507                iban = matcher.group(1);
1508                ibanUpperCased = iban.toUpperCase(Locale.ROOT);
1509                //Check that the string is a valid IBAN and if yes then add it
1510                if (IbanUtil.isValid(ibanUpperCased)) {
1511                    data.get(SensitiveInformationType.IBAN).add(iban);
1512                }
1513            }
1514
1515            /* Step 2b: Search for IBAN that are human formatted */
1516            matcher = ibanHumanFormattedRegex.matcher(content);
1517            String ibanUpperCasedNoSpace;
1518            while (matcher.find()) {
1519                iban = matcher.group(1);
1520                ibanUpperCasedNoSpace = iban.toUpperCase(Locale.ROOT).replace(" ", "");
1521                //Check that the string is a valid IBAN and if yes then add it
1522                if (IbanUtil.isValid(ibanUpperCasedNoSpace)) {
1523                    data.get(SensitiveInformationType.IBAN).add(iban);
1524                }
1525            }
1526
1527            /* Step 3: Search for bank card PAN */
1528            matcher = panRegex.matcher(content);
1529            String pan, panNoSeparator;
1530            while (matcher.find()) {
1531                pan = matcher.group(1);
1532                panNoSeparator = pan.toUpperCase(Locale.ROOT).replace(" ", "").replace("-", "");
1533                //Check that the string is a valid PAN and if yes then add it
1534                if (creditCardValidator.isValid(panNoSeparator)) {
1535                    data.get(SensitiveInformationType.BANK_CARD_PAN).add(pan);
1536                }
1537            }
1538
1539        }
1540
1541        //Cleanup if a set is empty
1542        if (data.get(SensitiveInformationType.LUXEMBOURG_NATIONAL_IDENTIFICATION_NUMBER).isEmpty()) {
1543            data.remove(SensitiveInformationType.LUXEMBOURG_NATIONAL_IDENTIFICATION_NUMBER);
1544        }
1545        if (data.get(SensitiveInformationType.IBAN).isEmpty()) {
1546            data.remove(SensitiveInformationType.IBAN);
1547        }
1548        if (data.get(SensitiveInformationType.BANK_CARD_PAN).isEmpty()) {
1549            data.remove(SensitiveInformationType.BANK_CARD_PAN);
1550        }
1551
1552        return data;
1553    }
1554
1555    /**
1556     * Apply a collection of validations on a bytes array provided representing GZIP compressed data:
1557     * <ul>
1558     * <li>Are valid GZIP compressed data.</li>
1559     * <li>The number of bytes once decompressed is under the specified limit.</li>
1560     * </ul>
1561     * <br><b>Note:</b> The value <code>Integer.MAX_VALUE - 8</code> was chosen because during my tests on Java 25 (JDK 64 bits on Windows 11 Pro), it was possible to decompress such amount of data with the default JVM settings without causing an <a href="https://docs.oracle.com/en/java/javase/25/docs/api//java.base/java/lang/OutOfMemoryError.html">Out Of Memory error</a>.
1562     *
1563     * @param compressedBytes                    Array of bytes containing the GZIP compressed data to check.
1564     * @param maxCountOfDecompressedBytesAllowed Maximum number of decompressed bytes allowed. Default to 10 MB if the specified value is inferior to 1 or superior to Integer.MAX_VALUE - 8.
1565     * @return True only if the file pass all validations.
1566     * @see "https://en.wikipedia.org/wiki/Gzip"
1567     * @see "https://www.rapid7.com/db/modules/auxiliary/dos/http/gzip_bomb_dos/"
1568     */
1569    public static boolean isGZIPCompressedDataSafe(byte[] compressedBytes, long maxCountOfDecompressedBytesAllowed) {
1570        boolean isSafe = false;
1571
1572        try {
1573            long limit = maxCountOfDecompressedBytesAllowed;
1574            long totalRead = 0L;
1575            byte[] buffer = new byte[8 * 1024];
1576            int read;
1577            if (limit < 1 || limit > (Integer.MAX_VALUE - 8)) {
1578                limit = 10_000_000;
1579            }
1580            try (ByteArrayInputStream bis = new ByteArrayInputStream(compressedBytes); GZIPInputStream gzipInputStream = new GZIPInputStream(new BufferedInputStream(bis))) {
1581                while ((read = gzipInputStream.read(buffer)) != -1) {
1582                    totalRead += read;
1583                    if (totalRead > limit) {
1584                        throw new Exception();
1585                    }
1586                }
1587            }
1588            isSafe = true;
1589        } catch (Exception e) {
1590            isSafe = false;
1591        }
1592
1593        return isSafe;
1594    }
1595
1596    /**
1597     * Process a string, intended to be written in a log, to remove as much as possible information that can lead to an exposure to a log injection vulnerability.<br><br>
1598     * <b>Log injection</b> is also called <b>log forging</b>.<br><br>
1599     * The following information are removed:
1600     * <ul>
1601     *     <li>Characters: Carriage Return (CR), Linefeed (LF) and Tabulation (TAB).</li>
1602     *     <li>Leading and trailing spaces.</li>
1603     *     <li>Any HTML tags.</li>
1604     * </ul><br>
1605     * A parameter is also used to limit the maximum length of the sanitized message.
1606     * To remove any HTML tags, the OWASP project <a href="https://owasp.org/www-project-java-html-sanitizer/">Java HTML Sanitizer</a> is leveraged.<br>
1607     * I delegated such removal to a dedicated library to prevent missing of edge cases as well as potential bypasses.
1608     *
1609     * @param message          The original string message intended to be written in a log.
1610     * @param maxMessageLength The maximum number of characters after which the sanitized message must be truncated. If inferior to 1 then default to the value of 500.
1611     * @return The string message cleaned.
1612     * @see "https://www.wallarm.com/what/log-forging-attack"
1613     * @see "https://www.invicti.com/learn/crlf-injection"
1614     * @see "https://knowledge-base.secureflag.com/vulnerabilities/inadequate_input_validation/log_injection_vulnerability.html"
1615     * @see "https://capec.mitre.org/data/definitions/93.html"
1616     * @see "https://codeql.github.com/codeql-query-help/javascript/js-log-injection/"
1617     * @see "https://owasp.org/www-project-java-html-sanitizer/"
1618     * @see "https://github.com/OWASP/java-html-sanitizer"
1619     */
1620    public static String sanitizeLogMessage(String message, int maxMessageLength) {
1621        String sanitized = message;
1622        int maxSanitizedMessageLength = maxMessageLength;
1623
1624        if (sanitized != null && !sanitized.isBlank()) {
1625            if (maxSanitizedMessageLength < 1) {
1626                maxSanitizedMessageLength = 500;
1627            }
1628            //Step 1: Remove any CR/LR/TAB characters as well as leading and trailing spaces
1629            sanitized = sanitized.replaceAll("[\\n\\r\\t]", "").trim();
1630            //Step 2: Remove any HTML tags
1631            PolicyFactory htmlSanitizerPolicy = new HtmlPolicyBuilder().toFactory();
1632            sanitized = htmlSanitizerPolicy.sanitize(sanitized);
1633            //Step 3: Truncate the string in case of need
1634            if (sanitized.length() > maxSanitizedMessageLength) {
1635                sanitized = sanitized.substring(0, maxSanitizedMessageLength);
1636            }
1637        }
1638
1639        return sanitized;
1640    }
1641
1642    /**
1643     * Identify if an XML is an SVG image.<br>
1644     * The goal of this method is to prevent to leverage SVG, as an vector, to achieve a XSS when XML format is accepted.<br>
1645     * Leverage <a href="https://xmlgraphics.apache.org/batik/">Apache Batik</a> to delegate the parsing and support for the SVG format.<br><br>
1646     * <b>Due to the intended usage of the method, the following choice were made:</b>
1647     * <ul>
1648     * <li>Raise an exception when a non SVG related external references is identified.</li>
1649     * <li>Throw any exception that can occur if the provided content is invalid like for example an invalid XML file or a non existing file.</li>
1650     * <li>Explicitly check the XML prior to pass it to Batik even if Batik seems not prone to XXE/SSRF classes of vulnerability.</li>
1651     * </ul>
1652     *
1653     * @param xmlFilePath Filename of the XML file to check.
1654     * @return True only if XML is an valid SVG image.
1655     * @throws SecurityException If a non SVG external references is detected into the XML content.
1656     * @throws Exception         If a error occur due to an invalid content provided.
1657     * @see "https://developer.mozilla.org/en-US/docs/Web/SVG"
1658     * @see "https://www.fortinet.com/blog/threat-research/scalable-vector-graphics-attack-surface-anatomy"
1659     * @see "https://portswigger.net/web-security/cross-site-scripting"
1660     * @see "https://xmlgraphics.apache.org/batik/"
1661     * @see "https://github.com/apache/xmlgraphics-batik/blob/main/batik-dom/src/main/java/org/apache/batik/dom/util/SAXDocumentFactory.java#L420"
1662     * @see "https://mvnrepository.com/artifact/org.apache.xmlgraphics/batik-dom"
1663     * @see "https://mvnrepository.com/artifact/org.apache.xmlgraphics/batik-anim"
1664     * @see "https://portswigger.net/web-security/xxe"
1665     * @see "https://portswigger.net/web-security/ssrf"
1666     */
1667    public static boolean isXMLSVGImage(String xmlFilePath) throws Exception {
1668        boolean isSvg = true;
1669        List<String> svgValidSystemIDs = List.of("http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd", "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-basic.dtd", "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-tiny.dtd", "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd");
1670
1671        //Load the XML content into a reader
1672        String xmlContent = Files.readString(Paths.get(xmlFilePath));
1673        //Then ensure that the XML document does not contains any non SVG external references
1674        try (Reader reader = StringReader.of(xmlContent)) {
1675            DocumentBuilderFactory xmlFactory = DocumentBuilderFactory.newInstance();
1676            DocumentBuilder docBuilder = xmlFactory.newDocumentBuilder();
1677            docBuilder.setEntityResolver((publicId, systemId) -> {
1678                if (systemId != null && !svgValidSystemIDs.contains(systemId)) {
1679                    throw new SecurityException("External references detected: " + systemId);
1680                }
1681                return new InputSource(new ByteArrayInputStream("".getBytes()));
1682            });
1683            docBuilder.parse(new InputSource(reader));
1684        }
1685        //Then parse the XML with Apache Batik
1686        try (Reader reader = StringReader.of(xmlContent)) {
1687            //Method SAXDocumentFactory.createDocument() do not load external DTD or entities.
1688            String parserClassName = XMLResourceDescriptor.getXMLParserClassName();
1689            SAXSVGDocumentFactory svgFactory = new SAXSVGDocumentFactory(parserClassName);
1690            //Method svgFactory.createSVGDocument() raise an IO exception if the XML is not a valid SVG image
1691            try {
1692                SVGDocument doc = svgFactory.createSVGDocument(null, reader);
1693                isSvg = (doc != null && doc.getRootElement() != null);
1694            } catch (IOException e) {
1695                isSvg = false;
1696            }
1697        }
1698
1699        return isSvg;
1700    }
1701}