001package eu.righettod;
002
003
004import com.auth0.jwt.interfaces.DecodedJWT;
005import org.apache.commons.csv.CSVFormat;
006import org.apache.commons.csv.CSVRecord;
007import org.apache.commons.imaging.ImageInfo;
008import org.apache.commons.imaging.Imaging;
009import org.apache.commons.imaging.common.ImageMetadata;
010import org.apache.commons.validator.routines.CreditCardValidator;
011import org.apache.commons.validator.routines.EmailValidator;
012import org.apache.commons.validator.routines.InetAddressValidator;
013import org.apache.pdfbox.Loader;
014import org.apache.pdfbox.pdmodel.PDDocument;
015import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
016import org.apache.pdfbox.pdmodel.PDDocumentInformation;
017import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary;
018import org.apache.pdfbox.pdmodel.common.PDMetadata;
019import org.apache.pdfbox.pdmodel.interactive.action.*;
020import org.apache.pdfbox.pdmodel.interactive.annotation.AnnotationFilter;
021import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
022import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationLink;
023import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
024import org.apache.poi.poifs.filesystem.DirectoryEntry;
025import org.apache.poi.poifs.filesystem.POIFSFileSystem;
026import org.apache.poi.poifs.macros.VBAMacroReader;
027import org.apache.tika.detect.DefaultDetector;
028import org.apache.tika.detect.Detector;
029import org.apache.tika.io.TemporaryResources;
030import org.apache.tika.io.TikaInputStream;
031import org.apache.tika.metadata.Metadata;
032import org.apache.tika.mime.MediaType;
033import org.apache.tika.mime.MimeTypes;
034import org.iban4j.IbanUtil;
035import org.owasp.html.HtmlPolicyBuilder;
036import org.owasp.html.PolicyFactory;
037import org.w3c.dom.Document;
038import org.xml.sax.EntityResolver;
039import org.xml.sax.InputSource;
040import org.xml.sax.SAXException;
041
042import javax.crypto.Mac;
043import javax.crypto.spec.SecretKeySpec;
044import javax.imageio.ImageIO;
045import javax.json.Json;
046import javax.json.JsonReader;
047import javax.xml.XMLConstants;
048import javax.xml.parsers.DocumentBuilder;
049import javax.xml.parsers.DocumentBuilderFactory;
050import javax.xml.parsers.ParserConfigurationException;
051import javax.xml.stream.XMLInputFactory;
052import javax.xml.stream.XMLStreamReader;
053import javax.xml.stream.events.XMLEvent;
054import javax.xml.validation.Schema;
055import javax.xml.validation.SchemaFactory;
056import java.awt.*;
057import java.awt.image.BufferedImage;
058import java.io.*;
059import java.net.*;
060import java.net.http.HttpClient;
061import java.net.http.HttpRequest;
062import java.net.http.HttpResponse;
063import java.nio.ByteBuffer;
064import java.nio.charset.Charset;
065import java.nio.charset.StandardCharsets;
066import java.nio.file.Files;
067import java.security.MessageDigest;
068import java.security.SecureRandom;
069import java.time.Duration;
070import java.time.LocalDate;
071import java.time.YearMonth;
072import java.time.ZoneId;
073import java.util.*;
074import java.util.List;
075import java.util.concurrent.*;
076import java.util.concurrent.atomic.AtomicInteger;
077import java.util.regex.Matcher;
078import java.util.regex.Pattern;
079import java.util.zip.GZIPInputStream;
080import java.util.zip.ZipEntry;
081import java.util.zip.ZipFile;
082
083/**
084 * Provides different utilities methods to apply processing from a security perspective.<br>
085 * These code snippet:
086 * <ul>
087 *     <li>Can be used, as "foundation", to customize the validation to the app context.</li>
088 *     <li>Were implemented in a way to facilitate adding or removal of validations depending on usage context.</li>
089 *     <li>Were centralized on one class to be able to enhance them across time as well as <a href="https://github.com/righettod/code-snippets-security-utils/issues">missing case/bug identification</a>.</li>
090 * </ul>
091 * <br>
092 * <a href="https://github.com/righettod/code-snippets-security-utils">GitHub repository</a>.<br><br>
093 * <a href="https://github.com/righettod/code-snippets-security-utils/blob/main/src/main/java/eu/righettod/SecurityUtils.java">Source code of the class</a>.
094 */
095public class SecurityUtils {
096    /**
097     * Default constructor: Not needed as the class only provides static methods.
098     */
099    private SecurityUtils() {
100    }
101
102    /**
103     * Apply a collection of validation to verify if a provided PIN code is considered weak (easy to guess) or none.<br>
104     * This method consider that format of the PIN code is [0-9]{6,}<br>
105     * Rule to consider a PIN code as weak:
106     * <ul>
107     * <li>Length is inferior to 6 positions.</li>
108     * <li>Contain only the same number or only a sequence of zero.</li>
109     * <li>Contain sequence of following incremental or decremental numbers.</li>
110     * </ul>
111     *
112     * @param pinCode PIN code to verify.
113     * @return True only if the PIN is considered as weak.
114     */
115    public static boolean isWeakPINCode(String pinCode) {
116        boolean isWeak = true;
117        //Length is inferior to 6 positions
118        //Use "Long.parseLong(pinCode)" to cause a NumberFormatException if the PIN is not a numeric one
119        //and to ensure that the PIN is not only a sequence of zero
120        if (pinCode != null && Long.parseLong(pinCode) > 0 && pinCode.trim().length() > 5) {
121            //Contain only the same number
122            String regex = String.format("^[%s]{%s}$", pinCode.charAt(0), pinCode.length());
123            if (!Pattern.matches(regex, pinCode)) {
124                //Contain sequence of following incremental or decremental numbers
125                char previousChar = 'X';
126                boolean containSequence = false;
127                for (char c : pinCode.toCharArray()) {
128                    if (previousChar != 'X') {
129                        int previousNbr = Integer.parseInt(String.valueOf(previousChar));
130                        int currentNbr = Integer.parseInt(String.valueOf(c));
131                        if (currentNbr == (previousNbr - 1) || currentNbr == (previousNbr + 1)) {
132                            containSequence = true;
133                            break;
134                        }
135                    }
136                    previousChar = c;
137                }
138                if (!containSequence) {
139                    isWeak = false;
140                }
141            }
142        }
143        return isWeak;
144    }
145
146    /**
147     * Apply a collection of validations on a Word 97-2003 (binary format) document file provided:
148     * <ul>
149     * <li>Real Microsoft Word 97-2003 document file.</li>
150     * <li>No VBA Macro.<br></li>
151     * <li>No embedded objects.</li>
152     * </ul>
153     *
154     * @param wordFilePath Filename of the Word document file to check.
155     * @return True only if the file pass all validations.
156     * @see "https://poi.apache.org/components/"
157     * @see "https://poi.apache.org/components/document/"
158     * @see "https://poi.apache.org/components/poifs/how-to.html"
159     * @see "https://poi.apache.org/components/poifs/embeded.html"
160     * @see "https://poi.apache.org/"
161     * @see "https://mvnrepository.com/artifact/org.apache.poi/poi"
162     */
163    public static boolean isWord972003DocumentSafe(String wordFilePath) {
164        boolean isSafe = false;
165        try {
166            File wordFile = new File(wordFilePath);
167            if (wordFile.exists() && wordFile.canRead() && wordFile.isFile()) {
168                //Step 1: Try to load the file, if its fail then it imply that is not a valid Word 97-2003 format file
169                try (POIFSFileSystem fs = new POIFSFileSystem(wordFile)) {
170                    //Step 2: Check if the document contains VBA macros, in our case is not allowed
171                    VBAMacroReader macroReader = new VBAMacroReader(fs);
172                    Map<String, String> macros = macroReader.readMacros();
173                    if (macros == null || macros.isEmpty()) {
174                        //Step 3: Check if the document contains any embedded objects, in our case is not allowed
175                        //From POI documentation:
176                        //Word normally stores embedded files in subdirectories of the ObjectPool directory, itself a subdirectory of the filesystem root.
177                        //Typically, these subdirectories and named starting with an underscore, followed by 10 numbers.
178                        final List<String> embeddedObjectFound = new ArrayList<>();
179                        DirectoryEntry root = fs.getRoot();
180                        if (root.getEntryCount() > 0) {
181                            root.iterator().forEachRemaining(entry -> {
182                                if ("ObjectPool".equalsIgnoreCase(entry.getName()) && entry instanceof DirectoryEntry) {
183                                    DirectoryEntry objPoolDirectory = (DirectoryEntry) entry;
184                                    if (objPoolDirectory.getEntryCount() > 0) {
185                                        objPoolDirectory.iterator().forEachRemaining(objPoolDirectoryEntry -> {
186                                            if (objPoolDirectoryEntry instanceof DirectoryEntry) {
187                                                DirectoryEntry objPoolDirectoryEntrySubDirectoryEntry = (DirectoryEntry) objPoolDirectoryEntry;
188                                                if (objPoolDirectoryEntrySubDirectoryEntry.getEntryCount() > 0) {
189                                                    objPoolDirectoryEntrySubDirectoryEntry.forEach(objPoolDirectoryEntrySubDirectoryEntryEntry -> {
190                                                        if (objPoolDirectoryEntrySubDirectoryEntryEntry.isDocumentEntry()) {
191                                                            embeddedObjectFound.add(objPoolDirectoryEntrySubDirectoryEntryEntry.getName());
192                                                        }
193                                                    });
194                                                }
195                                            }
196                                        });
197                                    }
198                                }
199                            });
200                        }
201                        isSafe = embeddedObjectFound.isEmpty();
202                    }
203                }
204            }
205        } catch (Exception e) {
206            isSafe = false;
207        }
208        return isSafe;
209    }
210
211    /**
212     * Ensure that an XML file does not contain any External Entity, DTD or XInclude instructions.
213     *
214     * @param xmlFilePath Filename of the XML file to check.
215     * @return True only if the file pass all validations.
216     * @see "https://portswigger.net/web-security/xxe"
217     * @see "https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html#java"
218     * @see "https://docs.oracle.com/en/java/javase/13/security/java-api-xml-processing-jaxp-security-guide.html#GUID-82F8C206-F2DF-4204-9544-F96155B1D258"
219     * @see "https://www.w3.org/TR/xinclude-11/"
220     * @see "https://en.wikipedia.org/wiki/XInclude"
221     */
222    public static boolean isXMLSafe(String xmlFilePath) {
223        boolean isSafe = false;
224        try {
225            File xmlFile = new File(xmlFilePath);
226            if (xmlFile.exists() && xmlFile.canRead() && xmlFile.isFile()) {
227                //Step 1a: Verify that the XML file content does not contain any XInclude instructions
228                boolean containXInclude = Files.readAllLines(xmlFile.toPath()).stream().anyMatch(line -> line.toLowerCase(Locale.ROOT).contains(":include "));
229                if (!containXInclude) {
230                    //Step 1b: Parse the XML file, if an exception occur than it's imply that the XML specified is not a valid ones
231                    //Create an XML document builder throwing Exception if a DOCTYPE instruction is present
232                    DocumentBuilderFactory dbfInstance = DocumentBuilderFactory.newInstance();
233                    dbfInstance.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
234                    //Xerces 2 only
235                    //dbfInstance.setFeature("http://xerces.apache.org/xerces2-j/features.html#disallow-doctype-decl",true);
236                    dbfInstance.setXIncludeAware(false);
237                    DocumentBuilder builder = dbfInstance.newDocumentBuilder();
238                    //Parse the document
239                    Document doc = builder.parse(xmlFile);
240                    isSafe = (doc != null && doc.getDocumentElement() != null);
241                }
242            }
243        } catch (Exception e) {
244            isSafe = false;
245        }
246        return isSafe;
247    }
248
249
250    /**
251     * Extract all URL links from a PDF file provided.<br>
252     * This can be used to apply validation on a PDF against contained links.
253     *
254     * @param pdfFilePath pdfFilePath Filename of the PDF file to process.
255     * @return A List of URL objects that is empty if no links is found.
256     * @throws Exception If any error occurs during the processing of the PDF file.
257     * @see "https://www.gushiciku.cn/pl/21KQ"
258     * @see "https://pdfbox.apache.org/"
259     * @see "https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox"
260     */
261    public static List<URL> extractAllPDFLinks(String pdfFilePath) throws Exception {
262        final List<URL> links = new ArrayList<>();
263        File pdfFile = new File(pdfFilePath);
264        try (PDDocument document = Loader.loadPDF(pdfFile)) {
265            PDDocumentCatalog documentCatalog = document.getDocumentCatalog();
266            AnnotationFilter actionURIAnnotationFilter = new AnnotationFilter() {
267                @Override
268                public boolean accept(PDAnnotation annotation) {
269                    boolean keep = false;
270                    if (annotation instanceof PDAnnotationLink) {
271                        keep = (((PDAnnotationLink) annotation).getAction() instanceof PDActionURI);
272                    }
273                    return keep;
274                }
275            };
276            documentCatalog.getPages().forEach(page -> {
277                try {
278                    page.getAnnotations(actionURIAnnotationFilter).forEach(annotation -> {
279                        PDActionURI linkAnnotation = (PDActionURI) ((PDAnnotationLink) annotation).getAction();
280                        try {
281                            URL urlObj = new URL(linkAnnotation.getURI());
282                            if (!links.contains(urlObj)) {
283                                links.add(urlObj);
284                            }
285                        } catch (MalformedURLException e) {
286                            throw new RuntimeException(e);
287                        }
288                    });
289                } catch (Exception e) {
290                    throw new RuntimeException(e);
291                }
292            });
293        }
294        return links;
295    }
296
297    /**
298     * Apply a collection of validations on a PDF file provided:
299     * <ul>
300     * <li>Real PDF file.</li>
301     * <li>No attachments.</li>
302     * <li>No Javascript code.</li>
303     * <li>No links using action of type URI/Launch/RemoteGoTo/ImportData.</li>
304     * <li>No XFA forms in order to prevent exposure to XXE/SSRF like CVE-2025-54988.</li>
305     * </ul>
306     *
307     * @param pdfFilePath Filename of the PDF file to check.
308     * @return True only if the file pass all validations.
309     * @see "https://stackoverflow.com/a/36161267"
310     * @see "https://www.gushiciku.cn/pl/21KQ"
311     * @see "https://github.com/jonaslejon/malicious-pdf"
312     * @see "https://pdfbox.apache.org/"
313     * @see "https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox"
314     * @see "https://nvd.nist.gov/vuln/detail/CVE-2025-54988"
315     * @see "https://github.com/mgthuramoemyint/POC-CVE-2025-54988"
316     * @see "https://en.wikipedia.org/wiki/XFA"
317     */
318    public static boolean isPDFSafe(String pdfFilePath) {
319        boolean isSafe = false;
320        try {
321            File pdfFile = new File(pdfFilePath);
322            if (pdfFile.exists() && pdfFile.canRead() && pdfFile.isFile()) {
323                //Step 1: Try to load the file, if its fail then it imply that is not a valid PDF file
324                try (PDDocument document = Loader.loadPDF(pdfFile)) {
325                    //Step 2: Check if the file contains attached files, in our case is not allowed
326                    PDDocumentCatalog documentCatalog = document.getDocumentCatalog();
327                    PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary(documentCatalog);
328                    if (namesDictionary.getEmbeddedFiles() == null) {
329                        //Step 3: Check if the file contains any XFA forms
330                        PDAcroForm acroForm = documentCatalog.getAcroForm();
331                        boolean hasForm = (acroForm != null && acroForm.getXFA() != null);
332                        if (!hasForm) {
333                            //Step 4: Check if the file contains Javascript code, in our case is not allowed
334                            if (namesDictionary.getJavaScript() == null) {
335                                //Step 5: Check if the file contains links using action of type URI/Launch/RemoteGoTo/ImportData, in our case is not allowed
336                                final List<Integer> notAllowedAnnotationCounterList = new ArrayList<>();
337                                AnnotationFilter notAllowedAnnotationFilter = new AnnotationFilter() {
338                                    @Override
339                                    public boolean accept(PDAnnotation annotation) {
340                                        boolean keep = false;
341                                        if (annotation instanceof PDAnnotationLink) {
342                                            PDAnnotationLink link = (PDAnnotationLink) annotation;
343                                            PDAction action = link.getAction();
344                                            if ((action instanceof PDActionURI) || (action instanceof PDActionLaunch) || (action instanceof PDActionRemoteGoTo) || (action instanceof PDActionImportData)) {
345                                                keep = true;
346                                            }
347                                        }
348                                        return keep;
349                                    }
350                                };
351                                documentCatalog.getPages().forEach(page -> {
352                                    try {
353                                        notAllowedAnnotationCounterList.add(page.getAnnotations(notAllowedAnnotationFilter).size());
354                                    } catch (IOException e) {
355                                        throw new RuntimeException(e);
356                                    }
357                                });
358                                if (notAllowedAnnotationCounterList.stream().reduce(0, Integer::sum) == 0) {
359                                    isSafe = true;
360                                }
361                            }
362                        }
363                    }
364                }
365            }
366        } catch (Exception e) {
367            isSafe = false;
368        }
369        return isSafe;
370    }
371
372    /**
373     * Remove as much as possible metadata from the provided PDF document object.
374     *
375     * @param document PDFBox PDF document object on which metadata must be removed.
376     * @see "https://gist.github.com/righettod/d7e07443c43d393a39de741a0d920069"
377     * @see "https://pdfbox.apache.org/"
378     * @see "https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox"
379     */
380    public static void clearPDFMetadata(PDDocument document) {
381        if (document != null) {
382            PDDocumentInformation infoEmpty = new PDDocumentInformation();
383            document.setDocumentInformation(infoEmpty);
384            PDMetadata newMetadataEmpty = new PDMetadata(document);
385            document.getDocumentCatalog().setMetadata(newMetadataEmpty);
386        }
387    }
388
389
390    /**
391     * Validate that the URL provided is really a relative URL.
392     *
393     * @param targetUrl URL to validate.
394     * @return True only if the file pass all validations.
395     * @see "https://portswigger.net/web-security/ssrf"
396     * @see "https://stackoverflow.com/q/6785442"
397     */
398    public static boolean isRelativeURL(String targetUrl) {
399        boolean isValid = false;
400        //Reject any URL encoded content and URL starting with a double slash
401        //Reject any URL contains credentials or fragment to prevent potential bypasses
402        String work = targetUrl;
403        if (!work.contains("%") && !work.contains("@") && !work.contains("#") && !work.startsWith("//")) {
404            //Creation of a URL object must fail
405            try {
406                new URL(work);
407                isValid = false;
408            } catch (MalformedURLException mf) {
409                //Last check to be sure (for prod usage compile the pattern one time)
410                isValid = Pattern.compile("^/[a-z0-9]+", Pattern.CASE_INSENSITIVE).matcher(work).find();
411            }
412        }
413        return isValid;
414    }
415
416    /**
417     * Apply a collection of validations on a ZIP file provided:
418     * <ul>
419     * <li>Real ZIP file.</li>
420     * <li>Contain less than a specified level of deepness.</li>
421     * <li>Do not contain Zip-Slip entry path.</li>
422     * </ul>
423     *
424     * @param zipFilePath       Filename of the ZIP file to check.
425     * @param maxLevelDeepness  Threshold of deepness above which a ZIP archive will be rejected.
426     * @param rejectArchiveFile Flag to specify if presence of any archive entry will cause the rejection of the ZIP file.
427     * @return True only if the file pass all validations.
428     * @see "https://rules.sonarsource.com/java/type/Security%20Hotspot/RSPEC-5042"
429     * @see "https://security.snyk.io/research/zip-slip-vulnerability"
430     * @see "https://en.wikipedia.org/wiki/Zip_bomb"
431     * @see "https://github.com/ptoomey3/evilarc"
432     * @see "https://github.com/abdulfatir/ZipBomb"
433     * @see "https://www.baeldung.com/cs/zip-bomb"
434     * @see "https://thesecurityvault.com/attacks-with-zip-files-and-mitigations/"
435     * @see "https://wiki.sei.cmu.edu/confluence/display/java/IDS04-J.+Safely+extract+files+from+ZipInputStream"
436     */
437    public static boolean isZIPSafe(String zipFilePath, int maxLevelDeepness, boolean rejectArchiveFile) {
438        List<String> archiveExtensions = Arrays.asList("zip", "tar", "7z", "gz", "jar", "phar", "bz2", "tgz");
439        boolean isSafe = false;
440        try {
441            File zipFile = new File(zipFilePath);
442            if (zipFile.exists() && zipFile.canRead() && zipFile.isFile() && maxLevelDeepness > 0) {
443                //Step 1: Try to load the file, if its fail then it imply that is not a valid ZIP file
444                try (ZipFile zipArch = new ZipFile(zipFile)) {
445                    //Step 2: Parse entries
446                    long deepness = 0;
447                    ZipEntry zipEntry;
448                    String entryExtension;
449                    String zipEntryName;
450                    boolean validationsFailed = false;
451                    Enumeration<? extends ZipEntry> entries = zipArch.entries();
452                    while (entries.hasMoreElements()) {
453                        zipEntry = entries.nextElement();
454                        zipEntryName = zipEntry.getName();
455                        entryExtension = zipEntryName.substring(zipEntryName.lastIndexOf(".") + 1).toLowerCase(Locale.ROOT).trim();
456                        //Step 2a: Check if the current entry is an archive file
457                        if (rejectArchiveFile && archiveExtensions.contains(entryExtension)) {
458                            validationsFailed = true;
459                            break;
460                        }
461                        //Step 2b: Check that level of deepness is inferior to the threshold specified
462                        if (zipEntryName.contains("/")) {
463                            //Determine deepness by inspecting the entry name.
464                            //Indeed, folder will be represented like this: folder/folder/folder/
465                            //So we can count the number of "/" to identify the deepness of the entry
466                            deepness = zipEntryName.chars().filter(ch -> ch == '/').count();
467                            if (deepness > maxLevelDeepness) {
468                                validationsFailed = true;
469                                break;
470                            }
471                        }
472                        //Step 2c: Check if any entries match pattern of zip slip payload
473                        if (zipEntryName.contains("..\\") || zipEntryName.contains("../")) {
474                            validationsFailed = true;
475                            break;
476                        }
477                    }
478                    if (!validationsFailed) {
479                        isSafe = true;
480                    }
481                }
482            }
483        } catch (Exception e) {
484            isSafe = false;
485        }
486        return isSafe;
487    }
488
489    /**
490     * Identify the mime type of the content specified (array of bytes).<br>
491     * Note that it cannot be fully trusted (see the tweet '1595824709186519041' referenced), so, additional validations are required.
492     *
493     * @param content The content as an array of bytes.
494     * @return The mime type in lower case or null if it cannot be identified.
495     * @see "https://twitter.com/righettod/status/1595824709186519041"
496     * @see "https://tika.apache.org/"
497     * @see "https://mvnrepository.com/artifact/org.apache.tika/tika-core"
498     * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types"
499     * @see "https://www.iana.org/assignments/media-types/media-types.xhtml"
500     */
501    public static String identifyMimeType(byte[] content) {
502        String mimeType = null;
503        if (content != null && content.length > 0) {
504            Detector detector = new DefaultDetector(MimeTypes.getDefaultMimeTypes());
505            Metadata metadata = new Metadata();
506            try {
507                try (TemporaryResources temporaryResources = new TemporaryResources(); TikaInputStream tikaInputStream = TikaInputStream.get(new ByteArrayInputStream(content), temporaryResources, metadata)) {
508                    MediaType mt = detector.detect(tikaInputStream, metadata);
509                    if (mt != null) {
510                        mimeType = mt.toString().toLowerCase(Locale.ROOT);
511                    }
512                }
513            } catch (IOException ioe) {
514                mimeType = null;
515            }
516        }
517        return mimeType;
518    }
519
520    /**
521     * Apply a collection of validations on a string expected to be an public IP address:
522     * <ul>
523     * <li>Is a valid IP v4 or v6 address.</li>
524     * <li>Is public from an Internet perspective.</li>
525     * </ul>
526     * <br>
527     * <b>Note:</b> I often see missing such validation in the value read from HTTP request headers like "X-Forwarded-For" or "Forwarded".
528     * <br><br>
529     * <b>Note for IPv6:</b> I used documentation found so it is really experimental!
530     *
531     * @param ip String expected to be a valid IP address.
532     * @return True only if the string pass all validations.
533     * @see "https://commons.apache.org/proper/commons-validator/"
534     * @see "https://commons.apache.org/proper/commons-validator/apidocs/org/apache/commons/validator/routines/InetAddressValidator.html"
535     * @see "https://cheatsheetseries.owasp.org/cheatsheets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet.html"
536     * @see "https://cheatsheetseries.owasp.org/assets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet_Orange_Tsai_Talk.pdf"
537     * @see "https://cheatsheetseries.owasp.org/assets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet_SSRF_Bible.pdf"
538     * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For"
539     * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Forwarded"
540     * @see "https://ipcisco.com/lesson/ipv6-address/"
541     * @see "https://www.juniper.net/documentation/us/en/software/junos/interfaces-security-devices/topics/topic-map/security-interface-ipv4-ipv6-protocol.html"
542     * @see "https://docs.oracle.com/en/java/javase/21/docs/api/java.base/java/net/InetAddress.html#getByName(java.lang.String)"
543     * @see "https://www.arin.net/reference/research/statistics/address_filters/"
544     * @see "https://en.wikipedia.org/wiki/Multicast_address"
545     * @see "https://stackoverflow.com/a/5619409"
546     * @see "https://www.ripe.net/media/documents/ipv6-address-types.pdf"
547     * @see "https://www.iana.org/assignments/ipv6-unicast-address-assignments/ipv6-unicast-address-assignments.xhtml"
548     * @see "https://developer.android.com/reference/java/net/Inet6Address"
549     * @see "https://en.wikipedia.org/wiki/Unique_local_address"
550     */
551    public static boolean isPublicIPAddress(String ip) {
552        boolean isValid = false;
553        try {
554            //Quick validation on the string itself based on characters used to compose an IP v4/v6 address
555            if (Pattern.matches("[0-9a-fA-F:.]+", ip)) {
556                //If OK then use the dedicated InetAddressValidator from Apache Commons Validator
557                if (InetAddressValidator.getInstance().isValid(ip)) {
558                    //If OK then validate that is an public IP address
559                    //From Javadoc for "InetAddress.getByName": If a literal IP address is supplied, only the validity of the address format is checked.
560                    InetAddress addr = InetAddress.getByName(ip);
561                    isValid = (!addr.isAnyLocalAddress() && !addr.isLinkLocalAddress() && !addr.isLoopbackAddress() && !addr.isMulticastAddress() && !addr.isSiteLocalAddress());
562                    //If OK and the IP is an V6 one then make additional validation because the built-in Java API will let pass some V6 IP
563                    //For the prefix map, the start of the key indicates if the value is a regex or a string
564                    if (isValid && (addr instanceof Inet6Address)) {
565                        Map<String, String> prefixes = new HashMap<>();
566                        prefixes.put("REGEX_LOOPBACK", "^(0|:)+1$");
567                        prefixes.put("REGEX_UNIQUE-LOCAL-ADDRESSES", "^f(c|d)[a-f0-9]{2}:.*$");
568                        prefixes.put("STRING_LINK-LOCAL-ADDRESSES", "fe80:");
569                        prefixes.put("REGEX_TEREDO", "^2001:[0]*:.*$");
570                        prefixes.put("REGEX_BENCHMARKING", "^2001:[0]*2:.*$");
571                        prefixes.put("REGEX_ORCHID", "^2001:[0]*10:.*$");
572                        prefixes.put("STRING_DOCUMENTATION", "2001:db8:");
573                        prefixes.put("STRING_GLOBAL-UNICAST", "2000:");
574                        prefixes.put("REGEX_MULTICAST", "^ff[0-9]{2}:.*$");
575                        final List<Boolean> results = new ArrayList<>();
576                        final String ipLower = ip.trim().toLowerCase(Locale.ROOT);
577                        prefixes.forEach((addressType, expr) -> {
578                            String exprLower = expr.trim().toLowerCase();
579                            if (addressType.startsWith("STRING_")) {
580                                results.add(ipLower.startsWith(exprLower));
581                            } else {
582                                results.add(Pattern.matches(exprLower, ipLower));
583                            }
584                        });
585                        isValid = ((results.size() == prefixes.size()) && !results.contains(Boolean.TRUE));
586                    }
587                }
588            }
589        } catch (Exception e) {
590            isValid = false;
591        }
592        return isValid;
593    }
594
595    /**
596     * Compute a SHA256 hash from an input composed of a collection of strings.<br><br>
597     * This method take care to build the source string in a way to prevent this source string to be prone to abuse targeting the different parts composing it.<br><br>
598     * <p>
599     * Example of possible abuse without precautions applied during the hash calculation logic:<br>
600     * Hash of <code>SHA256("Hello", "My", "World!!!")</code> will be equals to the hash of <code>SHA256("Hell", "oMyW", "orld!!!")</code>.<br>
601     * </p>
602     * This method ensure that both hash above will be different.<br><br>
603     *
604     * <b>Note:</b> The character <code>|</code> is used, as separator, of every parts so a part is not allowed to contains this character.
605     *
606     * @param parts Ordered list of strings to use to build the input string for which the hash must be computed on. No null value is accepted on object composing the collection.
607     * @return The hash, as an array of bytes, to allow caller to convert it to the final representation wanted (HEX, Base64, etc.). If the collection passed is null or empty then the method return null.
608     * @throws Exception If any exception occurs
609     * @see "https://github.com/righettod/code-snippets-security-utils/issues/16"
610     * @see "https://pentesterlab.com/badges/codereview"
611     * @see "https://blog.trailofbits.com/2024/08/21/yolo-is-not-a-valid-hash-construction/"
612     * @see "https://www.nist.gov/publications/sha-3-derived-functions-cshake-kmac-tuplehash-and-parallelhash"
613     */
614    public static byte[] computeHashNoProneToAbuseOnParts(List<String> parts) throws Exception {
615        byte[] hash = null;
616        String separator = "|";
617        if (parts != null && !parts.isEmpty()) {
618            //Ensure that not part is null
619            if (parts.stream().anyMatch(Objects::isNull)) {
620                throw new IllegalArgumentException("No part must be null!");
621            }
622            //Ensure that the separator is absent from every part
623            if (parts.stream().anyMatch(part -> part.contains(separator))) {
624                throw new IllegalArgumentException(String.format("The character '%s', used as parts separator, must be absent from every parts!", separator));
625            }
626            MessageDigest digest = MessageDigest.getInstance("SHA-256");
627            final StringBuilder buffer = new StringBuilder(separator);
628            parts.forEach(p -> {
629                buffer.append(p).append(separator);
630            });
631            hash = digest.digest(buffer.toString().getBytes(StandardCharsets.UTF_8));
632        }
633        return hash;
634    }
635
636    /**
637     * Ensure that an XML file only uses DTD/XSD references (called System Identifier) present in the allowed list provided.<br><br>
638     * The code is based on the validation implemented into the OpenJDK 21, by the class <b><a href="https://github.com/openjdk/jdk/blob/jdk-21%2B35/src/java.prefs/share/classes/java/util/prefs/XmlSupport.java">java.util.prefs.XmlSupport</a></b>, in the method <b><a href="https://github.com/openjdk/jdk/blob/jdk-21%2B35/src/java.prefs/share/classes/java/util/prefs/XmlSupport.java#L240">loadPrefsDoc()</a></b>.<br><br>
639     * The method also ensure that no Public Identifier is used to prevent potential bypasses of the validations.
640     *
641     * @param xmlFilePath              Filename of the XML file to check.
642     * @param allowedSystemIdentifiers List of URL allowed for System Identifier specified for any XSD/DTD references.
643     * @return True only if the file pass all validations.
644     * @see "https://www.w3schools.com/xml/prop_documenttype_systemid.asp"
645     * @see "https://www.ibm.com/docs/en/integration-bus/9.0.0?topic=doctypedecl-xml-systemid"
646     * @see "https://www.liquid-technologies.com/Reference/Glossary/XML_DocType.html"
647     * @see "https://www.xml.com/pub/98/08/xmlqna0.html"
648     * @see "https://github.com/openjdk/jdk/blob/jdk-21%2B35/src/java.prefs/share/classes/java/util/prefs/XmlSupport.java#L397"
649     * @see "https://en.wikipedia.org/wiki/Formal_Public_Identifier"
650     */
651    public static boolean isXMLOnlyUseAllowedXSDorDTD(String xmlFilePath, final List<String> allowedSystemIdentifiers) {
652        boolean isSafe = false;
653        final String errorTemplate = "Non allowed %s ID detected!";
654        final String emptyFakeDTD = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><!ELEMENT dummy EMPTY>";
655        final String emptyFakeXSD = "<xs:schema xmlns:xs=\"http://www.w3.org/2001/XMLSchema\"> <xs:element name=\"dummy\"/></xs:schema>";
656
657        if (allowedSystemIdentifiers == null || allowedSystemIdentifiers.isEmpty()) {
658            throw new IllegalArgumentException("At least one SID must be specified!");
659        }
660        File xmlFile = new File(xmlFilePath);
661        if (xmlFile.exists() && xmlFile.canRead() && xmlFile.isFile()) {
662            try {
663                EntityResolver resolverValidator = (publicId, systemId) -> {
664                    if (publicId != null) {
665                        throw new SAXException(String.format(errorTemplate, "PUBLIC"));
666                    }
667                    if (!allowedSystemIdentifiers.contains(systemId)) {
668                        throw new SAXException(String.format(errorTemplate, "SYSTEM"));
669                    }
670                    //If it is OK then return a empty DTD/XSD
671                    return new InputSource(new StringReader(systemId.toLowerCase().endsWith(".dtd") ? emptyFakeDTD : emptyFakeXSD));
672                };
673                DocumentBuilderFactory dbfInstance = DocumentBuilderFactory.newInstance();
674                dbfInstance.setIgnoringElementContentWhitespace(true);
675                dbfInstance.setXIncludeAware(false);
676                dbfInstance.setValidating(false);
677                dbfInstance.setCoalescing(true);
678                dbfInstance.setIgnoringComments(false);
679                DocumentBuilder builder = dbfInstance.newDocumentBuilder();
680                builder.setEntityResolver(resolverValidator);
681                Document doc = builder.parse(xmlFile);
682                isSafe = (doc != null);
683            } catch (SAXException | IOException | ParserConfigurationException e) {
684                isSafe = false;
685            }
686        }
687
688        return isSafe;
689    }
690
691    /**
692     * Apply a collection of validations on a EXCEL CSV file provided (file was expected to be opened in Microsoft EXCEL):
693     * <ul>
694     * <li>Real CSV file.</li>
695     * <li>Do not contains any payload related to a CSV injections.</li>
696     * </ul>
697     * Ensure that, if Apache Commons CSV does not find any record then, the file will be considered as NOT safe (prevent potential bypasses).<br><br>
698     * <b>Note:</b> Record delimiter used is the <code>,</code> (comma) character. See the Apache Commons CSV reference provided for EXCEL.<br>
699     *
700     * @param csvFilePath Filename of the CSV file to check.
701     * @return True only if the file pass all validations.
702     * @see "https://commons.apache.org/proper/commons-csv/"
703     * @see "https://commons.apache.org/proper/commons-csv/apidocs/org/apache/commons/csv/CSVFormat.html#EXCEL"
704     * @see "https://www.we45.com/post/your-excel-sheets-are-not-safe-heres-how-to-beat-csv-injection"
705     * @see "https://www.whiteoaksecurity.com/blog/2020-4-23-csv-injection-whats-the-risk/"
706     * @see "https://book.hacktricks.xyz/pentesting-web/formula-csv-doc-latex-ghostscript-injection"
707     * @see "https://owasp.org/www-community/attacks/CSV_Injection"
708     * @see "https://payatu.com/blog/csv-injection-basic-to-exploit/"
709     * @see "https://cwe.mitre.org/data/definitions/1236.html"
710     */
711    public static boolean isExcelCSVSafe(String csvFilePath) {
712        boolean isSafe;
713        final AtomicInteger recordCount = new AtomicInteger();
714        final List<Character> payloadDetectionCharacters = List.of('=', '+', '@', '-', '\r', '\t');
715
716        try {
717            final List<String> payloadsIdentified = new ArrayList<>();
718            try (Reader in = new FileReader(csvFilePath)) {
719                Iterable<CSVRecord> records = CSVFormat.EXCEL.parse(in);
720                records.forEach(record -> {
721                    record.forEach(recordValue -> {
722                        if (recordValue != null && !recordValue.trim().isEmpty() && payloadDetectionCharacters.contains(recordValue.trim().charAt(0))) {
723                            payloadsIdentified.add(recordValue);
724                        }
725                        recordCount.getAndIncrement();
726                    });
727                });
728            }
729            isSafe = (payloadsIdentified.isEmpty() && recordCount.get() > 0);
730        } catch (Exception e) {
731            isSafe = false;
732        }
733
734        return isSafe;
735    }
736
737    /**
738     * Provide a way to add an integrity marker (<a href="https://en.wikipedia.org/wiki/HMAC">HMAC</a>) to a serialized object serialized using the <a href="https://www.baeldung.com/java-serialization">java native system</a> (binary).<br>
739     * The goal is to provide <b>a temporary workaround</b> to try to prevent deserialization attacks and give time to move to a text-based serialization approach.
740     *
741     * @param processingModeType Define the mode of processing i.e. protect or validate. ({@link ProcessingModeType})
742     * @param input              When the processing mode is "protect" than the expected input (string) is a java serialized object encoded in Base64 otherwise (processing mode is "validate") expected input is the output of this method when the "protect" mode was used.
743     * @param secret             Secret to use to compute the SHA256 HMAC.
744     * @return A map with the following keys: <ul><li><b>PROCESSING_MODE</b>: Processing mode used to compute the result.</li><li><b>STATUS</b>: A boolean indicating if the processing was successful or not.</li><li><b>RESULT</b>: Always contains a string representing the protected serialized object in the format <code>[SERIALIZED_OBJECT_BASE64_ENCODED]:[SERIALIZED_OBJECT_HMAC_BASE64_ENCODED]</code>.</li></ul>
745     * @throws Exception If any exception occurs.
746     * @see "https://cheatsheetseries.owasp.org/cheatsheets/Deserialization_Cheat_Sheet.html"
747     * @see "https://owasp.org/www-project-top-ten/2017/A8_2017-Insecure_Deserialization"
748     * @see "https://portswigger.net/web-security/deserialization"
749     * @see "https://www.baeldung.com/java-serialization-approaches"
750     * @see "https://www.baeldung.com/java-serialization"
751     * @see "https://cryptobook.nakov.com/mac-and-key-derivation/hmac-and-key-derivation"
752     * @see "https://en.wikipedia.org/wiki/HMAC"
753     * @see "https://smattme.com/posts/how-to-generate-hmac-signature-in-java/"
754     */
755    public static Map<String, Object> ensureSerializedObjectIntegrity(ProcessingModeType processingModeType, String input, byte[] secret) throws Exception {
756        Map<String, Object> results;
757        String resultFormatTemplate = "%s:%s";
758        //Verify input provided to be consistent
759        if (processingModeType == null) {
760            throw new IllegalArgumentException("The processing mode is mandatory!");
761        }
762        if (input == null || input.trim().isEmpty()) {
763            throw new IllegalArgumentException("Input data is mandatory!");
764        }
765        if (secret == null || secret.length == 0) {
766            throw new IllegalArgumentException("The HMAC secret is mandatory!");
767        }
768        if (processingModeType.equals(ProcessingModeType.VALIDATE) && input.split(":").length != 2) {
769            throw new IllegalArgumentException("Input data provided is invalid for the processing mode specified!");
770        }
771        //Processing
772        Base64.Decoder b64Decoder = Base64.getDecoder();
773        Base64.Encoder b64Encoder = Base64.getEncoder();
774        String hmacAlgorithm = "HmacSHA256";
775        Mac mac = Mac.getInstance(hmacAlgorithm);
776        SecretKeySpec key = new SecretKeySpec(secret, hmacAlgorithm);
777        mac.init(key);
778        results = new HashMap<>();
779        results.put("PROCESSING_MODE", processingModeType.toString());
780        switch (processingModeType) {
781            case PROTECT -> {
782                byte[] objectBytes = b64Decoder.decode(input);
783                byte[] hmac = mac.doFinal(objectBytes);
784                String encodedHmac = b64Encoder.encodeToString(hmac);
785                results.put("STATUS", Boolean.TRUE);
786                results.put("RESULT", String.format(resultFormatTemplate, input, encodedHmac));
787            }
788            case VALIDATE -> {
789                String[] parts = input.split(":");
790                byte[] objectBytes = b64Decoder.decode(parts[0].trim());
791                byte[] hmacProvided = b64Decoder.decode(parts[1].trim());
792                byte[] hmacComputed = mac.doFinal(objectBytes);
793                String encodedHmacComputed = b64Encoder.encodeToString(hmacComputed);
794                Boolean hmacIsValid = Arrays.equals(hmacProvided, hmacComputed);
795                results.put("STATUS", hmacIsValid);
796                results.put("RESULT", String.format(resultFormatTemplate, parts[0].trim(), encodedHmacComputed));
797            }
798            default -> throw new IllegalArgumentException("Not supported processing mode!");
799        }
800        return results;
801    }
802
803    /**
804     * Apply a collection of validations on a JSON string provided:
805     * <ul>
806     * <li>Real JSON structure.</li>
807     * <li>Contain less than a specified number of deepness for nested objects or arrays.</li>
808     * <li>Contain less than a specified number of items in any arrays.</li>
809     * </ul>
810     * <br>
811     * <b>Note:</b> I decided to use a parsing approach using only string processing to prevent any StackOverFlow or OutOfMemory error that can be abused.<br><br>
812     * I used the following assumption:
813     * <ul>
814     *      <li>The character <code>{</code> identify the beginning of an object.</li>
815     *      <li>The character <code>}</code> identify the end of an object.</li>
816     *      <li>The character <code>[</code> identify the beginning of an array.</li>
817     *      <li>The character <code>]</code> identify the end of an array.</li>
818     *      <li>The character <code>"</code> identify the delimiter of a string.</li>
819     *      <li>The character sequence <code>\"</code> identify the escaping of an double quote.</li>
820     * </ul>
821     *
822     * @param json                  String containing the JSON data to validate.
823     * @param maxItemsByArraysCount Maximum number of items allowed in an array.
824     * @param maxDeepnessAllowed    Maximum number nested objects or arrays allowed.
825     * @return True only if the string pass all validations.
826     * @see "https://javaee.github.io/jsonp/"
827     * @see "https://community.f5.com/discussions/technicalforum/disable-buffer-overflow-in-json-parameters/124306"
828     * @see "https://github.com/InductiveComputerScience/pbJson/issues/2"
829     */
830    public static boolean isJSONSafe(String json, int maxItemsByArraysCount, int maxDeepnessAllowed) {
831        boolean isSafe = false;
832
833        try {
834            //Step 1: Analyse the JSON string
835            int currentDeepness = 0;
836            int currentArrayItemsCount = 0;
837            int maxDeepnessReached = 0;
838            int maxArrayItemsCountReached = 0;
839            boolean currentlyInArray = false;
840            boolean currentlyInString = false;
841            int currentNestedArrayLevel = 0;
842            String jsonEscapedDoubleQuote = "\\\"";//Escaped double quote must not be considered as a string delimiter
843            String work = json.replace(jsonEscapedDoubleQuote, "'");
844            for (char c : work.toCharArray()) {
845                switch (c) {
846                    case '{': {
847                        if (!currentlyInString) {
848                            currentDeepness++;
849                        }
850                        break;
851                    }
852                    case '}': {
853                        if (!currentlyInString) {
854                            currentDeepness--;
855                        }
856                        break;
857                    }
858                    case '[': {
859                        if (!currentlyInString) {
860                            currentDeepness++;
861                            if (currentlyInArray) {
862                                currentNestedArrayLevel++;
863                            }
864                            currentlyInArray = true;
865                        }
866                        break;
867                    }
868                    case ']': {
869                        if (!currentlyInString) {
870                            currentDeepness--;
871                            currentArrayItemsCount = 0;
872                            if (currentNestedArrayLevel > 0) {
873                                currentNestedArrayLevel--;
874                            }
875                            if (currentNestedArrayLevel == 0) {
876                                currentlyInArray = false;
877                            }
878                        }
879                        break;
880                    }
881                    case '"': {
882                        currentlyInString = !currentlyInString;
883                        break;
884                    }
885                    case ',': {
886                        if (!currentlyInString && currentlyInArray) {
887                            currentArrayItemsCount++;
888                        }
889                        break;
890                    }
891                }
892                if (currentDeepness > maxDeepnessReached) {
893                    maxDeepnessReached = currentDeepness;
894                }
895                if (currentArrayItemsCount > maxArrayItemsCountReached) {
896                    maxArrayItemsCountReached = currentArrayItemsCount;
897                }
898            }
899            //Step 2: Apply validation against the value specified as limits
900            isSafe = ((maxItemsByArraysCount > maxArrayItemsCountReached) && (maxDeepnessAllowed > maxDeepnessReached));
901
902            //Step 3: If the content is safe then ensure that it is valid JSON structure using the "Java API for JSON Processing" (JSR 374) parser reference implementation.
903            if (isSafe) {
904                JsonReader reader = Json.createReader(new StringReader(json));
905                isSafe = (reader.read() != null);
906            }
907
908        } catch (Exception e) {
909            isSafe = false;
910        }
911        return isSafe;
912    }
913
914    /**
915     * Apply a collection of validations on a image file provided:
916     * <ul>
917     * <li>Real image file.</li>
918     * <li>Its mime type is into the list of allowed mime types.</li>
919     * <li>Its metadata fields do not contains any characters related to a malicious payloads.</li>
920     * </ul>
921     * <br>
922     * <b>Important note:</b> This implementation is prone to bypass using the "<b>raw insertion</b>" method documented in the <a href="https://www.synacktiv.com/en/publications/persistent-php-payloads-in-pngs-how-to-inject-php-code-in-an-image-and-keep-it-there">blog post</a> from the Synacktiv team.
923     * To handle such case, it is recommended to resize the image to remove any non image-related content, see <a href="https://github.com/righettod/document-upload-protection/blob/master/src/main/java/eu/righettod/poc/sanitizer/ImageDocumentSanitizerImpl.java#L54">here</a> for an example.<br>
924     *
925     * @param imageFilePath         Filename of the image file to check.
926     * @param imageAllowedMimeTypes List of image mime types allowed.
927     * @return True only if the file pass all validations.
928     * @see "https://commons.apache.org/proper/commons-imaging/"
929     * @see "https://commons.apache.org/proper/commons-imaging/formatsupport.html"
930     * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types/Common_types"
931     * @see "https://www.iana.org/assignments/media-types/media-types.xhtml#image"
932     * @see "https://www.synacktiv.com/en/publications/persistent-php-payloads-in-pngs-how-to-inject-php-code-in-an-image-and-keep-it-there"
933     * @see "https://cheatsheetseries.owasp.org/cheatsheets/File_Upload_Cheat_Sheet.html"
934     * @see "https://github.com/righettod/document-upload-protection/blob/master/src/main/java/eu/righettod/poc/sanitizer/ImageDocumentSanitizerImpl.java"
935     * @see "https://exiftool.org/examples.html"
936     * @see "https://en.wikipedia.org/wiki/List_of_file_signatures"
937     * @see "https://hexed.it/"
938     * @see "https://github.com/sighook/pixload"
939     */
940    public static boolean isImageSafe(String imageFilePath, List<String> imageAllowedMimeTypes) {
941        boolean isSafe = false;
942        Pattern payloadDetectionRegex = Pattern.compile("[<>${}`]+", Pattern.CASE_INSENSITIVE);
943        try {
944            File imgFile = new File(imageFilePath);
945            if (imgFile.exists() && imgFile.canRead() && imgFile.isFile() && !imageAllowedMimeTypes.isEmpty()) {
946                final byte[] imgBytes = Files.readAllBytes(imgFile.toPath());
947                //Step 1: Check the mime type of the file against the allowed ones
948                ImageInfo imgInfo = Imaging.getImageInfo(imgBytes);
949                if (imageAllowedMimeTypes.contains(imgInfo.getMimeType())) {
950                    //Step 2: Load the image into an object using the Image API
951                    BufferedImage imgObject = Imaging.getBufferedImage(imgBytes);
952                    if (imgObject != null && imgObject.getWidth() > 0 && imgObject.getHeight() > 0) {
953                        //Step 3: Check the metadata if the image format support it - Highly experimental
954                        List<String> metadataWithPayloads = new ArrayList<>();
955                        final ImageMetadata imgMetadata = Imaging.getMetadata(imgBytes);
956                        if (imgMetadata != null) {
957                            imgMetadata.getItems().forEach(item -> {
958                                String metadata = item.toString();
959                                if (payloadDetectionRegex.matcher(metadata).find()) {
960                                    metadataWithPayloads.add(metadata);
961                                }
962                            });
963                        }
964                        isSafe = metadataWithPayloads.isEmpty();
965                    }
966                }
967            }
968        } catch (Exception e) {
969            isSafe = false;
970        }
971        return isSafe;
972    }
973
974    /**
975     * Rewrite the input file to remove any embedded files that is not embedded using a methods supported by the official format of the file.<br>
976     * Example: a file can be embedded by adding it to the end of the source file, see the reference provided for details.
977     *
978     * @param inputFilePath Filename of the file to clean up.
979     * @param inputFileType Type of the file provided.
980     * @return A array of bytes with the cleaned file.
981     * @throws IllegalArgumentException If an invalid parameter is passed
982     * @throws Exception                If any technical error during the cleaning processing
983     * @see "https://www.synacktiv.com/en/publications/persistent-php-payloads-in-pngs-how-to-inject-php-code-in-an-image-and-keep-it-there"
984     * @see "https://github.com/righettod/toolbox-pentest-web/tree/master/misc"
985     * @see "https://github.com/righettod/toolbox-pentest-web?tab=readme-ov-file#misc"
986     * @see "https://stackoverflow.com/a/13605411"
987     */
988    public static byte[] sanitizeFile(String inputFilePath, InputFileType inputFileType) throws Exception {
989        ByteArrayOutputStream sanitizedContent = new ByteArrayOutputStream();
990        File inputFile = new File(inputFilePath);
991        if (!inputFile.exists() || !inputFile.canRead() || !inputFile.isFile()) {
992            throw new IllegalArgumentException("Cannot read the content of the input file!");
993        }
994        switch (inputFileType) {
995            case PDF -> {
996                try (PDDocument document = Loader.loadPDF(inputFile)) {
997                    document.save(sanitizedContent);
998                }
999            }
1000            case IMAGE -> {
1001                // Load the original image
1002                BufferedImage originalImage = ImageIO.read(inputFile);
1003                String originalFormat = identifyMimeType(Files.readAllBytes(inputFile.toPath())).split("/")[1].trim();
1004                // Check that image has been successfully loaded
1005                if (originalImage == null) {
1006                    throw new IOException("Cannot load the original image !");
1007                }
1008                // Get current Width and Height of the image
1009                int originalWidth = originalImage.getWidth(null);
1010                int originalHeight = originalImage.getHeight(null);
1011                // Resize the image by removing 1px on Width and Height
1012                Image resizedImage = originalImage.getScaledInstance(originalWidth - 1, originalHeight - 1, Image.SCALE_SMOOTH);
1013                // Resize the resized image by adding 1px on Width and Height - In fact set image to is initial size
1014                Image initialSizedImage = resizedImage.getScaledInstance(originalWidth, originalHeight, Image.SCALE_SMOOTH);
1015                // Save image to a bytes buffer
1016                int bufferedImageType = BufferedImage.TYPE_INT_ARGB;//By default use a format supporting transparency
1017                if ("jpeg".equalsIgnoreCase(originalFormat) || "bmp".equalsIgnoreCase(originalFormat)) {
1018                    bufferedImageType = BufferedImage.TYPE_INT_RGB;
1019                }
1020                BufferedImage sanitizedImage = new BufferedImage(initialSizedImage.getWidth(null), initialSizedImage.getHeight(null), bufferedImageType);
1021                Graphics2D drawer = sanitizedImage.createGraphics();
1022                drawer.drawImage(initialSizedImage, 0, 0, null);
1023                drawer.dispose();
1024                ImageIO.write(sanitizedImage, originalFormat, sanitizedContent);
1025            }
1026            default -> throw new IllegalArgumentException("Type of file not supported !");
1027        }
1028        if (sanitizedContent.size() == 0) {
1029            throw new IOException("An error occur during the rewrite operation!");
1030        }
1031        return sanitizedContent.toByteArray();
1032    }
1033
1034    /**
1035     * Apply a collection of validations on a string expected to be an email address:
1036     * <ul>
1037     * <li>Is a valid email address, from a parser perspective, following RFCs on email addresses.</li>
1038     * <li>Is not using "Encoded-word" format.</li>
1039     * <li>Is not using comment format.</li>
1040     * <li>Is not using "Punycode" format.</li>
1041     * <li>Is not using UUCP style addresses.</li>
1042     * <li>Is not using address literals.</li>
1043     * <li>Is not using source routes.</li>
1044     * <li>Is not using the "percent hack".</li>
1045     * </ul><br>
1046     * This is based on the research work from <a href="https://portswigger.net/research/gareth-heyes">Gareth Heyes</a> added in references (Portswigger).<br><br>
1047     *
1048     * <b>Note:</b> The notion of valid, here, is to take from a secure usage of the data perspective.
1049     *
1050     * @param addr String expected to be a valid email address.
1051     * @return True only if the string pass all validations.
1052     * @see "https://commons.apache.org/proper/commons-validator/"
1053     * @see "https://commons.apache.org/proper/commons-validator/apidocs/org/apache/commons/validator/routines/EmailValidator.html"
1054     * @see "https://datatracker.ietf.org/doc/html/rfc2047#section-2"
1055     * @see "https://portswigger.net/research/splitting-the-email-atom"
1056     * @see "https://www.jochentopf.com/email/address.html"
1057     * @see "https://en.wikipedia.org/wiki/Email_address"
1058     */
1059    public static boolean isEmailAddress(String addr) {
1060        boolean isValid = false;
1061        String work = addr.toLowerCase(Locale.ROOT);
1062        Pattern encodedWordRegex = Pattern.compile("[=?]+", Pattern.CASE_INSENSITIVE);
1063        Pattern forbiddenCharacterRegex = Pattern.compile("[():!%\\[\\],;]+", Pattern.CASE_INSENSITIVE);
1064        try {
1065            //Start with the use of the dedicated EmailValidator from Apache Commons Validator
1066            if (EmailValidator.getInstance(true, true).isValid(work)) {
1067                //If OK then validate it does not contains "Encoded-word" patterns using an aggressive approach
1068                if (!encodedWordRegex.matcher(work).find()) {
1069                    //If OK then validate it does not contains punycode
1070                    if (!work.contains("xn--")) {
1071                        //If OK then validate it does not use:
1072                        // UUCP style addresses,
1073                        // Comment format,
1074                        // Address literals,
1075                        // Source routes,
1076                        // The percent hack.
1077                        if (!forbiddenCharacterRegex.matcher(work).find()) {
1078                            isValid = true;
1079                        }
1080                    }
1081                }
1082            }
1083        } catch (Exception e) {
1084            isValid = false;
1085        }
1086        return isValid;
1087    }
1088
1089    /**
1090     * The <a href="https://www.stet.eu/en/psd2/">PSD2 STET</a> specification require to use <a href="https://datatracker.ietf.org/doc/draft-cavage-http-signatures/">HTTP Signature</a>.
1091     * <br>
1092     * Section <b>3.5.1.2</b> of the document <a href="https://www.stet.eu/assets/files/PSD2/1-6-3/api-dsp2-stet-v1.6.3.1-part-1-framework.pdf">Documentation Framework</a> version <b>1.6.3</b>.
1093     * <br>
1094     * The problem is that, by design, the HTTP Signature specification is prone to blind SSRF.
1095     * <br>
1096     * URL example taken from the STET specification: <code>https://path.to/myQsealCertificate_714f8154ec259ac40b8a9786c9908488b2582b68b17e865fede4636d726b709f</code>.
1097     * <br>
1098     * The objective of this code is to try to decrease the "exploitability/interest" of this SSRF for an attacker.
1099     *
1100     * @param certificateUrl Url pointing to a Qualified Certificate (QSealC) encoded in PEM format and respecting the ETSI/TS119495 technical Specification .
1101     * @return TRUE only if the url point to a Qualified Certificate in PEM format.
1102     * @see "https://www.stet.eu/en/psd2/"
1103     * @see "https://www.stet.eu/assets/files/PSD2/1-6-3/api-dsp2-stet-v1.6.3.1-part-1-framework.pdf"
1104     * @see "https://datatracker.ietf.org/doc/draft-cavage-http-signatures/"
1105     * @see "https://datatracker.ietf.org/doc/rfc9421/"
1106     * @see "https://openjdk.org/groups/net/httpclient/intro.html"
1107     * @see "https://docs.oracle.com/en/java/javase/21/docs/api/java.net.http/java/net/http/package-summary.html"
1108     * @see "https://portswigger.net/web-security/ssrf"
1109     * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control"
1110     */
1111    public static boolean isPSD2StetSafeCertificateURL(String certificateUrl) {
1112        boolean isValid = false;
1113        long connectionTimeoutInSeconds = 10;
1114        String userAgent = "PSD2-STET-HTTPSignature-CertificateRequest";
1115        try {
1116            //1. Ensure that the URL end with the SHA-256 fingerprint encoded in HEX of the certificate like requested by STET
1117            if (certificateUrl != null && certificateUrl.lastIndexOf("_") != -1) {
1118                String digestPart = certificateUrl.substring(certificateUrl.lastIndexOf("_") + 1);
1119                if (Pattern.matches("^[0-9a-f]{64}$", digestPart)) {
1120                    //2. Ensure that the URL is a valid url by creating a instance of the class URI
1121                    URI uri = URI.create(certificateUrl);
1122                    //3. Require usage of HTTPS and reject any url containing query parameters
1123                    if ("https".equalsIgnoreCase(uri.getScheme()) && uri.getQuery() == null) {
1124                        //4. Perform a HTTP HEAD request in order to get the content type of the remote resource
1125                        //and limit the interest to use the SSRF because to pass the check the url need to:
1126                        //- Do not having any query parameters.
1127                        //- Use HTTPS protocol.
1128                        //- End with a string having the format "_[0-9a-f]{64}".
1129                        //- Trigger the malicious action that the attacker want but with a HTTP HEAD without any redirect and parameters.
1130                        HttpResponse<String> response;
1131                        try (HttpClient client = HttpClient.newBuilder().followRedirects(HttpClient.Redirect.NEVER).build()) {
1132                            HttpRequest request = HttpRequest.newBuilder().uri(uri).timeout(Duration.ofSeconds(connectionTimeoutInSeconds)).method("HEAD", HttpRequest.BodyPublishers.noBody()).header("User-Agent", userAgent)//To provide an hint to the target about the initiator of the request
1133                                    .header("Cache-Control", "no-store, max-age=0")//To prevent caching issues or abuses
1134                                    .build();
1135                            response = client.send(request, HttpResponse.BodyHandlers.ofString());
1136                            if (response.statusCode() == 200) {
1137                                //5. Ensure that the response content type is "text/plain"
1138                                Optional<String> contentType = response.headers().firstValue("Content-Type");
1139                                isValid = (contentType.isPresent() && contentType.get().trim().toLowerCase(Locale.ENGLISH).startsWith("text/plain"));
1140                            }
1141                        }
1142                    }
1143                }
1144            }
1145        } catch (Exception e) {
1146            isValid = false;
1147        }
1148        return isValid;
1149    }
1150
1151    /**
1152     * Perform sequential URL decoding operations against a URL encoded data until the data is not URL encoded anymore or if the specified threshold is reached.
1153     *
1154     * @param encodedData            URL encoded data.
1155     * @param decodingRoundThreshold Threshold above which decoding will fail.
1156     * @return The decoded data.
1157     * @throws SecurityException If the threshold is reached.
1158     * @see "https://en.wikipedia.org/wiki/Percent-encoding"
1159     * @see "https://owasp.org/www-community/Double_Encoding"
1160     * @see "https://portswigger.net/web-security/essential-skills/obfuscating-attacks-using-encodings"
1161     * @see "https://capec.mitre.org/data/definitions/120.html"
1162     */
1163    public static String applyURLDecoding(String encodedData, int decodingRoundThreshold) throws SecurityException {
1164        if (decodingRoundThreshold < 1) {
1165            throw new IllegalArgumentException("Threshold must be a positive number !");
1166        }
1167        if (encodedData == null) {
1168            throw new IllegalArgumentException("Data provided must not be null !");
1169        }
1170        Charset charset = StandardCharsets.UTF_8;
1171        int currentDecodingRound = 0;
1172        boolean isFinished = false;
1173        String currentRoundData = encodedData;
1174        String previousRoundData = encodedData;
1175        while (!isFinished) {
1176            if (currentDecodingRound > decodingRoundThreshold) {
1177                throw new SecurityException(String.format("Decoding round threshold of %s reached!", decodingRoundThreshold));
1178            }
1179            currentRoundData = URLDecoder.decode(currentRoundData, charset);
1180            isFinished = currentRoundData.equals(previousRoundData);
1181            previousRoundData = currentRoundData;
1182            currentDecodingRound++;
1183        }
1184        return currentRoundData;
1185    }
1186
1187    /**
1188     * Apply a collection of validations on a string expected to be an system file/folder path:
1189     * <ul>
1190     * <li>Does not contains path traversal payload.</li>
1191     * <li>The canonical path is equals to the absolute path.</li>
1192     * </ul><br>
1193     *
1194     * @param path String expected to be a valid system file/folder path.
1195     * @return True only if the string pass all validations.
1196     * @see "https://portswigger.net/web-security/file-path-traversal"
1197     * @see "https://learn.snyk.io/lesson/directory-traversal/"
1198     * @see "https://capec.mitre.org/data/definitions/126.html"
1199     * @see "https://owasp.org/www-community/attacks/Path_Traversal"
1200     */
1201    public static boolean isPathSafe(String path) {
1202        boolean isSafe = false;
1203        int decodingRoundThreshold = 3;
1204        try {
1205            if (path != null && !path.isEmpty()) {
1206                //URL decode the path if case of data coming from a web context
1207                String decodedPath = applyURLDecoding(path, decodingRoundThreshold);
1208                //Ensure that no path traversal expression is present
1209                if (!decodedPath.contains("..")) {
1210                    File f = new File(decodedPath);
1211                    String canonicalPath = f.getCanonicalPath();
1212                    String absolutePath = f.getAbsolutePath();
1213                    isSafe = canonicalPath.equals(absolutePath);
1214                }
1215            }
1216        } catch (Exception e) {
1217            isSafe = false;
1218        }
1219        return isSafe;
1220    }
1221
1222    /**
1223     * Identify if an XML contains any XML comments or have any XSL processing instructions.<br>
1224     * Stream reader based parsing is used to support large XML tree.
1225     *
1226     * @param xmlFilePath Filename of the XML file to check.
1227     * @return True only if XML comments or XSL processing instructions are identified.
1228     * @see "https://www.tutorialspoint.com/xml/xml_processing.htm"
1229     * @see "https://docs.oracle.com/en/java/javase/21/docs/api/java.xml/javax/xml/stream/XMLInputFactory.html"
1230     * @see "https://portswigger.net/kb/issues/00400700_xml-entity-expansion"
1231     * @see "https://www.w3.org/Style/styling-XML.en.html"
1232     */
1233    public static boolean isXMLHaveCommentsOrXSLProcessingInstructions(String xmlFilePath) {
1234        boolean itemsDetected = false;
1235        try {
1236            //Ensure that the parser will not be prone XML external entity (XXE) injection or XML entity expansion (XEE) attacks
1237            XMLInputFactory xmlInputFactory = XMLInputFactory.newFactory();
1238            xmlInputFactory.setProperty(XMLInputFactory.SUPPORT_DTD, false);
1239            xmlInputFactory.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, "");
1240            xmlInputFactory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, false);
1241            xmlInputFactory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false);
1242
1243            //Parse file
1244            try (FileInputStream fis = new FileInputStream(xmlFilePath)) {
1245                XMLStreamReader reader = xmlInputFactory.createXMLStreamReader(fis);
1246                int eventType;
1247                while (reader.hasNext() && !itemsDetected) {
1248                    eventType = reader.next();
1249                    if (eventType == XMLEvent.COMMENT) {
1250                        itemsDetected = true;
1251                    } else if (eventType == XMLEvent.PROCESSING_INSTRUCTION && "xml-stylesheet".equalsIgnoreCase(reader.getPITarget())) {
1252                        itemsDetected = true;
1253                    }
1254                }
1255            }
1256        } catch (Exception e) {
1257            //In case of error then assume that the check failed
1258            itemsDetected = true;
1259        }
1260        return itemsDetected;
1261    }
1262
1263
1264    /**
1265     * Perform a set of additional validations against a JWT token:
1266     * <ul>
1267     *     <li>Do not use the <b>NONE</b> signature algorithm.</li>
1268     *     <li>Have a <a href="https://www.iana.org/assignments/jwt/jwt.xhtml">EXP claim</a> defined.</li>
1269     *     <li>The token identifier (<a href="https://www.iana.org/assignments/jwt/jwt.xhtml">JTI claim</a>) is NOT part of the list of revoked token.</li>
1270     *     <li>Match the expected type of token: ACCESS or ID or REFRESH.</li>
1271     * </ul>
1272     *
1273     * @param token               JWT token for which <b>signature was already validated</b> and on which a set of additional validations will be applied.
1274     * @param expectedTokenType   The type of expected token using the enumeration provided.
1275     * @param revokedTokenJTIList A list of token identifier (<b>JTI</b> claim) referring to tokens that were revoked and to which the JTI claim of the token will be compared to.
1276     * @return True only the token pass all the validations.
1277     * @see "https://www.iana.org/assignments/jwt/jwt.xhtml"
1278     * @see "https://auth0.com/docs/secure/tokens/access-tokens"
1279     * @see "https://auth0.com/docs/secure/tokens/id-tokens"
1280     * @see "https://auth0.com/docs/secure/tokens/refresh-tokens"
1281     * @see "https://auth0.com/blog/id-token-access-token-what-is-the-difference/"
1282     * @see "https://jwt.io/libraries?language=Java"
1283     * @see "https://pentesterlab.com/blog/secure-jwt-library-design"
1284     * @see "https://github.com/auth0/java-jwt"
1285     */
1286    public static boolean applyJWTExtraValidation(DecodedJWT token, TokenType expectedTokenType, List<String> revokedTokenJTIList) {
1287        boolean isValid = false;
1288        TokenType tokenType;
1289        try {
1290            if (!"none".equalsIgnoreCase(token.getAlgorithm().trim())) {
1291                if (!token.getClaim("exp").isMissing() && token.getExpiresAt() != null) {
1292                    String jti = token.getId();
1293                    if (jti != null && !jti.trim().isEmpty()) {
1294                        boolean jtiIsRevoked = revokedTokenJTIList.stream().anyMatch(jti::equalsIgnoreCase);
1295                        if (!jtiIsRevoked) {
1296                            //Determine the token type based on the presence of specifics claims
1297                            if (!token.getClaim("scope").isMissing()) {
1298                                tokenType = TokenType.ACCESS;
1299                            } else if (!token.getClaim("name").isMissing() || !token.getClaim("email").isMissing()) {
1300                                tokenType = TokenType.ID;
1301                            } else {
1302                                tokenType = TokenType.REFRESH;
1303                            }
1304                            isValid = (tokenType.equals(expectedTokenType));
1305                        }
1306                    }
1307                }
1308            }
1309
1310        } catch (Exception e) {
1311            //In case of error then assume that the check failed
1312            isValid = false;
1313        }
1314        return isValid;
1315    }
1316
1317    /**
1318     * Apply a validations on a regular expression to ensure that is not prone to the ReDOS attack.
1319     * <br>If your technology is supported by <a href="https://github.com/doyensec/regexploit">regexploit</a> then <b>use it instead of this method!</b>
1320     * <br>Indeed, the <a href="https://www.doyensec.com/">Doyensec</a> team has made an intensive and amazing work on this topic and created this effective tool.
1321     *
1322     * @param regex                       String expected to be a valid regular expression (regex).
1323     * @param data                        Test data on which the regular expression is executed for the test.
1324     * @param maximumRunningTimeInSeconds Optional parameter to specify a number of seconds above which a regex execution time is considered as not safe (default to 4 seconds when not specified).
1325     * @return True only if the string pass all validations.
1326     * @see "https://github.blog/security/how-to-fix-a-redos/"
1327     * @see "https://learn.snyk.io/lesson/redos"
1328     * @see "https://rules.sonarsource.com/java/RSPEC-2631/"
1329     * @see "https://github.com/doyensec/regexploit"
1330     * @see "https://github.com/makenowjust-labs/recheck"
1331     * @see "https://github.com/tjenkinson/redos-detector"
1332     * @see "https://wiki.owasp.org/images/2/23/OWASP_IL_2009_ReDoS.pdf"
1333     * @see "https://owasp.org/www-community/attacks/Regular_expression_Denial_of_Service_-_ReDoS"
1334     */
1335    public static boolean isRegexSafe(String regex, String data, Optional<Integer> maximumRunningTimeInSeconds) {
1336        Objects.requireNonNull(maximumRunningTimeInSeconds, "Use 'Optional.empty()' to leverage the default value.");
1337        Objects.requireNonNull(data, "A sample data is needed to perform the test.");
1338        Objects.requireNonNull(regex, "A regular expression is needed to perform the test.");
1339        boolean isSafe = false;
1340        int executionTimeout = maximumRunningTimeInSeconds.orElse(4);
1341        ExecutorService executor = Executors.newSingleThreadExecutor();
1342        try {
1343            Callable<Boolean> task = () -> {
1344                Pattern pattern = Pattern.compile(regex);
1345                return pattern.matcher(data).matches();
1346            };
1347            List<Future<Boolean>> tasks = executor.invokeAll(List.of(task), executionTimeout, TimeUnit.SECONDS);
1348            if (!tasks.getFirst().isCancelled()) {
1349                isSafe = true;
1350            }
1351        } catch (Exception e) {
1352            isSafe = false;
1353        } finally {
1354            executor.shutdownNow();
1355        }
1356        return isSafe;
1357    }
1358
1359    /**
1360     * Compute a UUID version 7 without using any external dependency.<br><br>
1361     * <b>Below are my personal point of view and perhaps I'm totally wrong!</b>
1362     * <br><br>
1363     * Why such method?
1364     * <ul>
1365     * <li>Java inferior or equals to 21 does not supports natively the generation of an UUID version 7.</li>
1366     * <li>Import a library just to generate such value is overkill for me.</li>
1367     * <li>Library that I have found, generating such version of an UUID, are not provided by entities commonly used in the java world, such as the SPRING framework provider.</li>
1368     * </ul>
1369     * <br>
1370     * <b>Full credits for this implementation goes to the authors and contributors of the <a href="https://github.com/nalgeon/uuidv7">UUIDv7</a> project.</b>
1371     * <br><br>
1372     * Below are the java libraries that I have found but, for which, I do not trust enough the provider to use them directly:
1373     * <ul>
1374     *     <li><a href="https://github.com/cowtowncoder/java-uuid-generator">java-uuid-generator</a></li>
1375     *     <li><a href="https://github.com/f4b6a3/uuid-creator">uuid-creator</a></li>
1376     * </ul>
1377     *
1378     * @return A UUID object representing the UUID v7.
1379     * @see "https://uuid7.com/"
1380     * @see "https://antonz.org/uuidv7/"
1381     * @see "https://mccue.dev/pages/3-11-25-life-altering-postgresql-patterns"
1382     * @see "https://www.ietf.org/archive/id/draft-peabody-dispatch-new-uuid-format-04.html#name-uuid-version-7"
1383     * @see "https://www.baeldung.com/java-generating-time-based-uuids"
1384     * @see "https://en.wikipedia.org/wiki/Universally_unique_identifier"
1385     * @see "https://buildkite.com/resources/blog/goodbye-integers-hello-uuids/"
1386     */
1387    public static UUID computeUUIDv7() {
1388        SecureRandom secureRandom = new SecureRandom();
1389        // Generate truly random bytes
1390        byte[] value = new byte[16];
1391        secureRandom.nextBytes(value);
1392        // Get current timestamp in milliseconds
1393        ByteBuffer timestamp = ByteBuffer.allocate(Long.BYTES);
1394        timestamp.putLong(System.currentTimeMillis());
1395        // Create the TIMESTAMP part of the UUID
1396        System.arraycopy(timestamp.array(), 2, value, 0, 6);
1397        // Create the VERSION and the VARIANT parts of the UUID
1398        value[6] = (byte) ((value[6] & 0x0F) | 0x70);
1399        value[8] = (byte) ((value[8] & 0x3F) | 0x80);
1400        //Create the HIGH and LOW parts of the UUID
1401        ByteBuffer buf = ByteBuffer.wrap(value);
1402        long high = buf.getLong();
1403        long low = buf.getLong();
1404        //Create and return the UUID object
1405        UUID uuidv7 = new UUID(high, low);
1406        return uuidv7;
1407    }
1408
1409    /**
1410     * Ensure that an XSD file does not contain any include/import/redefine instruction (prevent exposure to SSRF).
1411     *
1412     * @param xsdFilePath Filename of the XSD file to check.
1413     * @return True only if the file pass all validations.
1414     * @see "https://portswigger.net/web-security/ssrf"
1415     * @see "https://www.w3schools.com/Xml/el_import.asp"
1416     * @see "https://www.w3schools.com/xml/el_include.asp"
1417     * @see "https://www.linkedin.com/posts/righettod_appsec-appsecurity-java-activity-7344048434326188053-6Ru9"
1418     * @see "https://docs.oracle.com/en/java/javase/21/docs/api/java.xml/javax/xml/validation/SchemaFactory.html#setProperty(java.lang.String,java.lang.Object)"
1419     */
1420    public static boolean isXSDSafe(String xsdFilePath) {
1421        boolean isSafe = false;
1422        try {
1423            File xsdFile = new File(xsdFilePath);
1424            if (xsdFile.exists() && xsdFile.canRead() && xsdFile.isFile()) {
1425                //Parse the XSD file, if an exception occur then it's imply that the XSD specified is not a valid ones
1426                //Create an schema factory throwing Exception if a external schema is specified
1427                SchemaFactory schemaFactory = SchemaFactory.newDefaultInstance();
1428                schemaFactory.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, "");
1429                schemaFactory.setProperty(XMLConstants.ACCESS_EXTERNAL_SCHEMA, "");
1430                //Parse the schema
1431                Schema schema = schemaFactory.newSchema(xsdFile);
1432                isSafe = (schema != null);
1433            }
1434        } catch (Exception e) {
1435            isSafe = false;
1436        }
1437        return isSafe;
1438    }
1439
1440
1441    /**
1442     * Extract all sensitive information from a string provided.<br>
1443     * This can be used to identify any sensitive information into a <a href="https://cwe.mitre.org/data/definitions/532.html">message expected to be written in a log</a> and then replace every sensitive values by an obfuscated ones.<br><br>
1444     * For the luxembourg national identification number, this method focus on detecting identifiers for a physical entity (people) and not a moral one (company).<br><br>
1445     * I delegated the validation of the IBAN to a dedicated library (<a href="https://github.com/arturmkrtchyan/iban4j">iban4j</a>) to not "reinvent the wheel" and then introduce buggy validation myself. I used <b>iban4j</b> over the <b><a href="https://commons.apache.org/proper/commons-validator/apidocs/org/apache/commons/validator/routines/IBANValidator.html">IBANValidator</a></b> class from the <a href="https://commons.apache.org/proper/commons-validator/"><b>Apache Commons Validator</b></a> library because <b>iban4j</b> perform a full official IBAN specification validation so its reduce risks of false-positives by ensuring that an IBAN detected is a real IBAN.<br><br>
1446     * Same thing and reason regarding the validation of the bank card PAN using the  class <a href="https://commons.apache.org/proper/commons-validator/apidocs/org/apache/commons/validator/routines/CreditCardValidator.html">CreditCardValidator</a> from the <b>Apache Commons Validator</b> library.
1447     *
1448     * @param content String in which sensitive information must be searched.
1449     * @return A map with the collection of identified sensitive information gathered by sensitive information type. If nothing is found then the map is empty. A type of sensitive information is only present if there is at least one item found. A set is used to not store duplicates occurrence of the same sensitive information.
1450     * @throws Exception If any error occurs during the processing.
1451     * @see "https://guichet.public.lu/en/citoyens/citoyennete/registre-national/identification/demande-numero-rnpp.html"
1452     * @see "https://cnpd.public.lu/fr/decisions-avis/2009/identifiant-unique.html"
1453     * @see "https://cnpd.public.lu/content/dam/cnpd/fr/decisions-avis/2009/identifiant-unique/48_2009.pdf"
1454     * @see "https://en.wikipedia.org/wiki/International_Bank_Account_Number"
1455     * @see "https://www.iban.com/structure"
1456     * @see "https://github.com/arturmkrtchyan/iban4j"
1457     * @see "https://cwe.mitre.org/data/definitions/532.html"
1458     * @see "https://www.baeldung.com/logback-mask-sensitive-data"
1459     * @see "https://en.wikipedia.org/wiki/Payment_card_number"
1460     * @see "https://commons.apache.org/proper/commons-validator/apidocs/org/apache/commons/validator/routines/CreditCardValidator.html"
1461     * @see "https://commons.apache.org/proper/commons-validator/"
1462     */
1463    public static Map<SensitiveInformationType, Set<String>> extractAllSensitiveInformation(String content) throws Exception {
1464        CreditCardValidator creditCardValidator = CreditCardValidator.genericCreditCardValidator();
1465        Pattern nationalIdentifierRegex = Pattern.compile("([0-9]{13})");
1466        Pattern ibanNonHumanFormattedRegex = Pattern.compile("([A-Z]{2}[0-9]{2}[A-Z0-9]{11,30})", Pattern.CASE_INSENSITIVE);
1467        Pattern ibanHumanFormattedRegex = Pattern.compile("([A-Z]{2}[0-9]{2}(?:\\s[A-Z0-9]{4}){2,7}\\s[A-Z0-9]{1,4})", Pattern.CASE_INSENSITIVE);
1468        Pattern panRegex = Pattern.compile("((?:\\d[ -]*?){13,19})");
1469        Map<SensitiveInformationType, Set<String>> data = new HashMap<>();
1470        data.put(SensitiveInformationType.LUXEMBOURG_NATIONAL_IDENTIFICATION_NUMBER, new HashSet<>());
1471        data.put(SensitiveInformationType.IBAN, new HashSet<>());
1472        data.put(SensitiveInformationType.BANK_CARD_PAN, new HashSet<>());
1473
1474        if (content != null && !content.isBlank()) {
1475            /* Step 1: Search for LU national identifier */
1476            //A national identifier have the following structure: [BIRTHDATE_YEAR_YYYY][BIRTHDATE_MONTH_MM][BIRTHDATE_DAY_DD][FIVE_INTEGER]
1477            //Define minimal and maximal birth year base on current year
1478            //Assume people live less than 120 years
1479            int maxBirthYear = LocalDate.now(ZoneId.of("Europe/Luxembourg")).getYear();
1480            int minBirthYear = maxBirthYear - 120;
1481            Matcher matcher = nationalIdentifierRegex.matcher(content);
1482            String nationalIdentierFull;
1483            int nationalIdentierYear, nationalIdentierMonth, nationalIdentierDay;
1484            while (matcher.find()) {
1485                nationalIdentierFull = matcher.group(1);
1486                //Check that the string is a valid national identifier and if yes then add it
1487                nationalIdentierYear = Integer.parseInt(nationalIdentierFull.substring(0, 4));
1488                nationalIdentierMonth = Integer.parseInt(nationalIdentierFull.substring(4, 6));
1489                nationalIdentierDay = Integer.parseInt(nationalIdentierFull.substring(6, 8));
1490                if (nationalIdentierYear >= minBirthYear && nationalIdentierYear <= maxBirthYear) {
1491                    if (nationalIdentierMonth >= 1 && nationalIdentierMonth <= 12) {
1492                        if (YearMonth.of(nationalIdentierYear, nationalIdentierMonth).isValidDay(nationalIdentierDay)) {
1493                            data.get(SensitiveInformationType.LUXEMBOURG_NATIONAL_IDENTIFICATION_NUMBER).add(nationalIdentierFull);
1494                        }
1495                    }
1496                }
1497            }
1498
1499            /* Step 2a: Search for IBAN that are non human formatted */
1500            matcher = ibanNonHumanFormattedRegex.matcher(content);
1501            String iban, ibanUpperCased;
1502            while (matcher.find()) {
1503                iban = matcher.group(1);
1504                ibanUpperCased = iban.toUpperCase(Locale.ROOT);
1505                //Check that the string is a valid IBAN and if yes then add it
1506                if (IbanUtil.isValid(ibanUpperCased)) {
1507                    data.get(SensitiveInformationType.IBAN).add(iban);
1508                }
1509            }
1510
1511            /* Step 2b: Search for IBAN that are human formatted */
1512            matcher = ibanHumanFormattedRegex.matcher(content);
1513            String ibanUpperCasedNoSpace;
1514            while (matcher.find()) {
1515                iban = matcher.group(1);
1516                ibanUpperCasedNoSpace = iban.toUpperCase(Locale.ROOT).replace(" ", "");
1517                //Check that the string is a valid IBAN and if yes then add it
1518                if (IbanUtil.isValid(ibanUpperCasedNoSpace)) {
1519                    data.get(SensitiveInformationType.IBAN).add(iban);
1520                }
1521            }
1522
1523            /* Step 3: Search for bank card PAN */
1524            matcher = panRegex.matcher(content);
1525            String pan, panNoSeparator;
1526            while (matcher.find()) {
1527                pan = matcher.group(1);
1528                panNoSeparator = pan.toUpperCase(Locale.ROOT).replace(" ", "").replace("-", "");
1529                //Check that the string is a valid PAN and if yes then add it
1530                if (creditCardValidator.isValid(panNoSeparator)) {
1531                    data.get(SensitiveInformationType.BANK_CARD_PAN).add(pan);
1532                }
1533            }
1534
1535        }
1536
1537        //Cleanup if a set is empty
1538        if (data.get(SensitiveInformationType.LUXEMBOURG_NATIONAL_IDENTIFICATION_NUMBER).isEmpty()) {
1539            data.remove(SensitiveInformationType.LUXEMBOURG_NATIONAL_IDENTIFICATION_NUMBER);
1540        }
1541        if (data.get(SensitiveInformationType.IBAN).isEmpty()) {
1542            data.remove(SensitiveInformationType.IBAN);
1543        }
1544        if (data.get(SensitiveInformationType.BANK_CARD_PAN).isEmpty()) {
1545            data.remove(SensitiveInformationType.BANK_CARD_PAN);
1546        }
1547
1548        return data;
1549    }
1550
1551    /**
1552     * Apply a collection of validations on a bytes array provided representing GZIP compressed data:
1553     * <ul>
1554     * <li>Are valid GZIP compressed data.</li>
1555     * <li>The number of bytes once decompressed is under the specified limit.</li>
1556     * </ul>
1557     * <br><b>Note:</b> The value <code>Integer.MAX_VALUE - 8</code> was chosen because during my tests on Java 25 (JDK 64 bits on Windows 11 Pro), it was possible to decompress such amount of data with the default JVM settings without causing an <a href="https://docs.oracle.com/en/java/javase/25/docs/api//java.base/java/lang/OutOfMemoryError.html">Out Of Memory error</a>.
1558     *
1559     * @param compressedBytes                    Array of bytes containing the GZIP compressed data to check.
1560     * @param maxCountOfDecompressedBytesAllowed Maximum number of decompressed bytes allowed. Default to 10 MB if the specified value is inferior to 1 or superior to Integer.MAX_VALUE - 8.
1561     * @return True only if the file pass all validations.
1562     * @see "https://en.wikipedia.org/wiki/Gzip"
1563     * @see "https://www.rapid7.com/db/modules/auxiliary/dos/http/gzip_bomb_dos/"
1564     */
1565    public static boolean isGZIPCompressedDataSafe(byte[] compressedBytes, long maxCountOfDecompressedBytesAllowed) {
1566        boolean isSafe = false;
1567
1568        try {
1569            long limit = maxCountOfDecompressedBytesAllowed;
1570            long totalRead = 0L;
1571            byte[] buffer = new byte[8 * 1024];
1572            int read;
1573            if (limit < 1 || limit > (Integer.MAX_VALUE - 8)) {
1574                limit = 10_000_000;
1575            }
1576            try (ByteArrayInputStream bis = new ByteArrayInputStream(compressedBytes); GZIPInputStream gzipInputStream = new GZIPInputStream(new BufferedInputStream(bis))) {
1577                while ((read = gzipInputStream.read(buffer)) != -1) {
1578                    totalRead += read;
1579                    if (totalRead > limit) {
1580                        throw new Exception();
1581                    }
1582                }
1583            }
1584            isSafe = true;
1585        } catch (Exception e) {
1586            isSafe = false;
1587        }
1588
1589        return isSafe;
1590    }
1591
1592    /**
1593     * Process a string, intended to be written in a log, to remove as much as possible information that can lead to an exposure to a log injection vulnerability.<br><br>
1594     * <b>Log injection</b> is also called <b>log forging</b>.<br><br>
1595     * The following information are removed:
1596     * <ul>
1597     *     <li>Characters: Carriage Return (CR), Linefeed (LF) and Tabulation (TAB).</li>
1598     *     <li>Leading and trailing spaces.</li>
1599     *     <li>Any HTML tags.</li>
1600     * </ul><br>
1601     * A parameter is also used to limit the maximum length of the sanitized message.
1602     * To remove any HTML tags, the OWASP project <a href="https://owasp.org/www-project-java-html-sanitizer/">Java HTML Sanitizer</a> is leveraged.<br>
1603     * I delegated such removal to a dedicated library to prevent missing of edge cases as well as potential bypasses.
1604     *
1605     * @param message          The original string message intended to be written in a log.
1606     * @param maxMessageLength The maximum number of characters after which the sanitized message must be truncated. If inferior to 1 then default to the value of 500.
1607     * @return The string message cleaned.
1608     * @see "https://www.wallarm.com/what/log-forging-attack"
1609     * @see "https://www.invicti.com/learn/crlf-injection"
1610     * @see "https://knowledge-base.secureflag.com/vulnerabilities/inadequate_input_validation/log_injection_vulnerability.html"
1611     * @see "https://capec.mitre.org/data/definitions/93.html"
1612     * @see "https://codeql.github.com/codeql-query-help/javascript/js-log-injection/"
1613     * @see "https://owasp.org/www-project-java-html-sanitizer/"
1614     * @see "https://github.com/OWASP/java-html-sanitizer"
1615     */
1616    public static String sanitizeLogMessage(String message, int maxMessageLength) {
1617        String sanitized = message;
1618        int maxSanitizedMessageLength = maxMessageLength;
1619
1620        if (sanitized != null && !sanitized.isBlank()) {
1621            if (maxSanitizedMessageLength < 1) {
1622                maxSanitizedMessageLength = 500;
1623            }
1624            //Step 1: Remove any CR/LR/TAB characters as well as leading and trailing spaces
1625            sanitized = sanitized.replaceAll("[\\n\\r\\t]", "").trim();
1626            //Step 2: Remove any HTML tags
1627            PolicyFactory htmlSanitizerPolicy = new HtmlPolicyBuilder().toFactory();
1628            sanitized = htmlSanitizerPolicy.sanitize(sanitized);
1629            //Step 3: Truncate the string in case of need
1630            if (sanitized.length() > maxSanitizedMessageLength) {
1631                sanitized = sanitized.substring(0, maxSanitizedMessageLength);
1632            }
1633        }
1634
1635        return sanitized;
1636    }
1637}