001package eu.righettod;
002
003
004import com.auth0.jwt.interfaces.DecodedJWT;
005import org.apache.commons.csv.CSVFormat;
006import org.apache.commons.csv.CSVRecord;
007import org.apache.commons.imaging.ImageInfo;
008import org.apache.commons.imaging.Imaging;
009import org.apache.commons.imaging.common.ImageMetadata;
010import org.apache.commons.validator.routines.EmailValidator;
011import org.apache.commons.validator.routines.InetAddressValidator;
012import org.apache.pdfbox.Loader;
013import org.apache.pdfbox.pdmodel.PDDocument;
014import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
015import org.apache.pdfbox.pdmodel.PDDocumentInformation;
016import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary;
017import org.apache.pdfbox.pdmodel.common.PDMetadata;
018import org.apache.pdfbox.pdmodel.interactive.action.*;
019import org.apache.pdfbox.pdmodel.interactive.annotation.AnnotationFilter;
020import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
021import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationLink;
022import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
023import org.apache.poi.poifs.filesystem.DirectoryEntry;
024import org.apache.poi.poifs.filesystem.POIFSFileSystem;
025import org.apache.poi.poifs.macros.VBAMacroReader;
026import org.apache.tika.detect.DefaultDetector;
027import org.apache.tika.detect.Detector;
028import org.apache.tika.io.TemporaryResources;
029import org.apache.tika.io.TikaInputStream;
030import org.apache.tika.metadata.Metadata;
031import org.apache.tika.mime.MediaType;
032import org.apache.tika.mime.MimeTypes;
033import org.w3c.dom.Document;
034import org.xml.sax.EntityResolver;
035import org.xml.sax.InputSource;
036import org.xml.sax.SAXException;
037
038import javax.crypto.Mac;
039import javax.crypto.spec.SecretKeySpec;
040import javax.imageio.ImageIO;
041import javax.json.Json;
042import javax.json.JsonReader;
043import javax.xml.XMLConstants;
044import javax.xml.parsers.DocumentBuilder;
045import javax.xml.parsers.DocumentBuilderFactory;
046import javax.xml.parsers.ParserConfigurationException;
047import javax.xml.stream.XMLInputFactory;
048import javax.xml.stream.XMLStreamReader;
049import javax.xml.stream.events.XMLEvent;
050import javax.xml.validation.Schema;
051import javax.xml.validation.SchemaFactory;
052import java.awt.*;
053import java.awt.image.BufferedImage;
054import java.io.*;
055import java.net.*;
056import java.net.http.HttpClient;
057import java.net.http.HttpRequest;
058import java.net.http.HttpResponse;
059import java.nio.ByteBuffer;
060import java.nio.charset.Charset;
061import java.nio.charset.StandardCharsets;
062import java.nio.file.Files;
063import java.security.MessageDigest;
064import java.security.SecureRandom;
065import java.time.Duration;
066import java.util.*;
067import java.util.List;
068import java.util.concurrent.*;
069import java.util.concurrent.atomic.AtomicInteger;
070import java.util.regex.Pattern;
071import java.util.zip.ZipEntry;
072import java.util.zip.ZipFile;
073
074/**
075 * Provides different utilities methods to apply processing from a security perspective.<br>
076 * These code snippet:
077 * <ul>
078 *     <li>Can be used, as "foundation", to customize the validation to the app context.</li>
079 *     <li>Were implemented in a way to facilitate adding or removal of validations depending on usage context.</li>
080 *     <li>Were centralized on one class to be able to enhance them across time as well as <a href="https://github.com/righettod/code-snippets-security-utils/issues">missing case/bug identification</a>.</li>
081 * </ul>
082 */
083public class SecurityUtils {
084    /**
085     * Default constructor: Not needed as the class only provides static methods.
086     */
087    private SecurityUtils() {
088    }
089
090    /**
091     * Apply a collection of validation to verify if a provided PIN code is considered weak (easy to guess) or none.<br>
092     * This method consider that format of the PIN code is [0-9]{6,}<br>
093     * Rule to consider a PIN code as weak:
094     * <ul>
095     * <li>Length is inferior to 6 positions.</li>
096     * <li>Contain only the same number or only a sequence of zero.</li>
097     * <li>Contain sequence of following incremental or decremental numbers.</li>
098     * </ul>
099     *
100     * @param pinCode PIN code to verify.
101     * @return True only if the PIN is considered as weak.
102     */
103    public static boolean isWeakPINCode(String pinCode) {
104        boolean isWeak = true;
105        //Length is inferior to 6 positions
106        //Use "Long.parseLong(pinCode)" to cause a NumberFormatException if the PIN is not a numeric one
107        //and to ensure that the PIN is not only a sequence of zero
108        if (pinCode != null && Long.parseLong(pinCode) > 0 && pinCode.trim().length() > 5) {
109            //Contain only the same number
110            String regex = String.format("^[%s]{%s}$", pinCode.charAt(0), pinCode.length());
111            if (!Pattern.matches(regex, pinCode)) {
112                //Contain sequence of following incremental or decremental numbers
113                char previousChar = 'X';
114                boolean containSequence = false;
115                for (char c : pinCode.toCharArray()) {
116                    if (previousChar != 'X') {
117                        int previousNbr = Integer.parseInt(String.valueOf(previousChar));
118                        int currentNbr = Integer.parseInt(String.valueOf(c));
119                        if (currentNbr == (previousNbr - 1) || currentNbr == (previousNbr + 1)) {
120                            containSequence = true;
121                            break;
122                        }
123                    }
124                    previousChar = c;
125                }
126                if (!containSequence) {
127                    isWeak = false;
128                }
129            }
130        }
131        return isWeak;
132    }
133
134    /**
135     * Apply a collection of validations on a Word 97-2003 (binary format) document file provided:
136     * <ul>
137     * <li>Real Microsoft Word 97-2003 document file.</li>
138     * <li>No VBA Macro.<br></li>
139     * <li>No embedded objects.</li>
140     * </ul>
141     *
142     * @param wordFilePath Filename of the Word document file to check.
143     * @return True only if the file pass all validations.
144     * @see "https://poi.apache.org/components/"
145     * @see "https://poi.apache.org/components/document/"
146     * @see "https://poi.apache.org/components/poifs/how-to.html"
147     * @see "https://poi.apache.org/components/poifs/embeded.html"
148     * @see "https://poi.apache.org/"
149     * @see "https://mvnrepository.com/artifact/org.apache.poi/poi"
150     */
151    public static boolean isWord972003DocumentSafe(String wordFilePath) {
152        boolean isSafe = false;
153        try {
154            File wordFile = new File(wordFilePath);
155            if (wordFile.exists() && wordFile.canRead() && wordFile.isFile()) {
156                //Step 1: Try to load the file, if its fail then it imply that is not a valid Word 97-2003 format file
157                try (POIFSFileSystem fs = new POIFSFileSystem(wordFile)) {
158                    //Step 2: Check if the document contains VBA macros, in our case is not allowed
159                    VBAMacroReader macroReader = new VBAMacroReader(fs);
160                    Map<String, String> macros = macroReader.readMacros();
161                    if (macros == null || macros.isEmpty()) {
162                        //Step 3: Check if the document contains any embedded objects, in our case is not allowed
163                        //From POI documentation:
164                        //Word normally stores embedded files in subdirectories of the ObjectPool directory, itself a subdirectory of the filesystem root.
165                        //Typically, these subdirectories and named starting with an underscore, followed by 10 numbers.
166                        final List<String> embeddedObjectFound = new ArrayList<>();
167                        DirectoryEntry root = fs.getRoot();
168                        if (root.getEntryCount() > 0) {
169                            root.iterator().forEachRemaining(entry -> {
170                                if ("ObjectPool".equalsIgnoreCase(entry.getName()) && entry instanceof DirectoryEntry) {
171                                    DirectoryEntry objPoolDirectory = (DirectoryEntry) entry;
172                                    if (objPoolDirectory.getEntryCount() > 0) {
173                                        objPoolDirectory.iterator().forEachRemaining(objPoolDirectoryEntry -> {
174                                            if (objPoolDirectoryEntry instanceof DirectoryEntry) {
175                                                DirectoryEntry objPoolDirectoryEntrySubDirectoryEntry = (DirectoryEntry) objPoolDirectoryEntry;
176                                                if (objPoolDirectoryEntrySubDirectoryEntry.getEntryCount() > 0) {
177                                                    objPoolDirectoryEntrySubDirectoryEntry.forEach(objPoolDirectoryEntrySubDirectoryEntryEntry -> {
178                                                        if (objPoolDirectoryEntrySubDirectoryEntryEntry.isDocumentEntry()) {
179                                                            embeddedObjectFound.add(objPoolDirectoryEntrySubDirectoryEntryEntry.getName());
180                                                        }
181                                                    });
182                                                }
183                                            }
184                                        });
185                                    }
186                                }
187                            });
188                        }
189                        isSafe = embeddedObjectFound.isEmpty();
190                    }
191                }
192            }
193        } catch (Exception e) {
194            isSafe = false;
195        }
196        return isSafe;
197    }
198
199    /**
200     * Ensure that an XML file does not contain any External Entity, DTD or XInclude instructions.
201     *
202     * @param xmlFilePath Filename of the XML file to check.
203     * @return True only if the file pass all validations.
204     * @see "https://portswigger.net/web-security/xxe"
205     * @see "https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html#java"
206     * @see "https://docs.oracle.com/en/java/javase/13/security/java-api-xml-processing-jaxp-security-guide.html#GUID-82F8C206-F2DF-4204-9544-F96155B1D258"
207     * @see "https://www.w3.org/TR/xinclude-11/"
208     * @see "https://en.wikipedia.org/wiki/XInclude"
209     */
210    public static boolean isXMLSafe(String xmlFilePath) {
211        boolean isSafe = false;
212        try {
213            File xmlFile = new File(xmlFilePath);
214            if (xmlFile.exists() && xmlFile.canRead() && xmlFile.isFile()) {
215                //Step 1a: Verify that the XML file content does not contain any XInclude instructions
216                boolean containXInclude = Files.readAllLines(xmlFile.toPath()).stream().anyMatch(line -> line.toLowerCase(Locale.ROOT).contains(":include "));
217                if (!containXInclude) {
218                    //Step 1b: Parse the XML file, if an exception occur than it's imply that the XML specified is not a valid ones
219                    //Create an XML document builder throwing Exception if a DOCTYPE instruction is present
220                    DocumentBuilderFactory dbfInstance = DocumentBuilderFactory.newInstance();
221                    dbfInstance.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
222                    //Xerces 2 only
223                    //dbfInstance.setFeature("http://xerces.apache.org/xerces2-j/features.html#disallow-doctype-decl",true);
224                    dbfInstance.setXIncludeAware(false);
225                    DocumentBuilder builder = dbfInstance.newDocumentBuilder();
226                    //Parse the document
227                    Document doc = builder.parse(xmlFile);
228                    isSafe = (doc != null && doc.getDocumentElement() != null);
229                }
230            }
231        } catch (Exception e) {
232            isSafe = false;
233        }
234        return isSafe;
235    }
236
237
238    /**
239     * Extract all URL links from a PDF file provided.<br>
240     * This can be used to apply validation on a PDF against contained links.
241     *
242     * @param pdfFilePath pdfFilePath Filename of the PDF file to process.
243     * @return A List of URL objects that is empty if no links is found.
244     * @throws Exception If any error occurs during the processing of the PDF file.
245     * @see "https://www.gushiciku.cn/pl/21KQ"
246     * @see "https://pdfbox.apache.org/"
247     * @see "https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox"
248     */
249    public static List<URL> extractAllPDFLinks(String pdfFilePath) throws Exception {
250        final List<URL> links = new ArrayList<>();
251        File pdfFile = new File(pdfFilePath);
252        try (PDDocument document = Loader.loadPDF(pdfFile)) {
253            PDDocumentCatalog documentCatalog = document.getDocumentCatalog();
254            AnnotationFilter actionURIAnnotationFilter = new AnnotationFilter() {
255                @Override
256                public boolean accept(PDAnnotation annotation) {
257                    boolean keep = false;
258                    if (annotation instanceof PDAnnotationLink) {
259                        keep = (((PDAnnotationLink) annotation).getAction() instanceof PDActionURI);
260                    }
261                    return keep;
262                }
263            };
264            documentCatalog.getPages().forEach(page -> {
265                try {
266                    page.getAnnotations(actionURIAnnotationFilter).forEach(annotation -> {
267                        PDActionURI linkAnnotation = (PDActionURI) ((PDAnnotationLink) annotation).getAction();
268                        try {
269                            URL urlObj = new URL(linkAnnotation.getURI());
270                            if (!links.contains(urlObj)) {
271                                links.add(urlObj);
272                            }
273                        } catch (MalformedURLException e) {
274                            throw new RuntimeException(e);
275                        }
276                    });
277                } catch (Exception e) {
278                    throw new RuntimeException(e);
279                }
280            });
281        }
282        return links;
283    }
284
285    /**
286     * Apply a collection of validations on a PDF file provided:
287     * <ul>
288     * <li>Real PDF file.</li>
289     * <li>No attachments.</li>
290     * <li>No Javascript code.</li>
291     * <li>No links using action of type URI/Launch/RemoteGoTo/ImportData.</li>
292     * <li>No XFA forms in order to prevent exposure to XXE/SSRF like CVE-2025-54988.</li>
293     * </ul>
294     *
295     * @param pdfFilePath Filename of the PDF file to check.
296     * @return True only if the file pass all validations.
297     * @see "https://stackoverflow.com/a/36161267"
298     * @see "https://www.gushiciku.cn/pl/21KQ"
299     * @see "https://github.com/jonaslejon/malicious-pdf"
300     * @see "https://pdfbox.apache.org/"
301     * @see "https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox"
302     * @see "https://nvd.nist.gov/vuln/detail/CVE-2025-54988"
303     * @see "https://github.com/mgthuramoemyint/POC-CVE-2025-54988"
304     * @see "https://en.wikipedia.org/wiki/XFA"
305     */
306    public static boolean isPDFSafe(String pdfFilePath) {
307        boolean isSafe = false;
308        try {
309            File pdfFile = new File(pdfFilePath);
310            if (pdfFile.exists() && pdfFile.canRead() && pdfFile.isFile()) {
311                //Step 1: Try to load the file, if its fail then it imply that is not a valid PDF file
312                try (PDDocument document = Loader.loadPDF(pdfFile)) {
313                    //Step 2: Check if the file contains attached files, in our case is not allowed
314                    PDDocumentCatalog documentCatalog = document.getDocumentCatalog();
315                    PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary(documentCatalog);
316                    if (namesDictionary.getEmbeddedFiles() == null) {
317                        //Step 3: Check if the file contains any XFA forms
318                        PDAcroForm acroForm = documentCatalog.getAcroForm();
319                        boolean hasForm = (acroForm != null && acroForm.getXFA() != null);
320                        if (!hasForm) {
321                            //Step 4: Check if the file contains Javascript code, in our case is not allowed
322                            if (namesDictionary.getJavaScript() == null) {
323                                //Step 5: Check if the file contains links using action of type URI/Launch/RemoteGoTo/ImportData, in our case is not allowed
324                                final List<Integer> notAllowedAnnotationCounterList = new ArrayList<>();
325                                AnnotationFilter notAllowedAnnotationFilter = new AnnotationFilter() {
326                                    @Override
327                                    public boolean accept(PDAnnotation annotation) {
328                                        boolean keep = false;
329                                        if (annotation instanceof PDAnnotationLink) {
330                                            PDAnnotationLink link = (PDAnnotationLink) annotation;
331                                            PDAction action = link.getAction();
332                                            if ((action instanceof PDActionURI) || (action instanceof PDActionLaunch) || (action instanceof PDActionRemoteGoTo) || (action instanceof PDActionImportData)) {
333                                                keep = true;
334                                            }
335                                        }
336                                        return keep;
337                                    }
338                                };
339                                documentCatalog.getPages().forEach(page -> {
340                                    try {
341                                        notAllowedAnnotationCounterList.add(page.getAnnotations(notAllowedAnnotationFilter).size());
342                                    } catch (IOException e) {
343                                        throw new RuntimeException(e);
344                                    }
345                                });
346                                if (notAllowedAnnotationCounterList.stream().reduce(0, Integer::sum) == 0) {
347                                    isSafe = true;
348                                }
349                            }
350                        }
351                    }
352                }
353            }
354        } catch (Exception e) {
355            isSafe = false;
356        }
357        return isSafe;
358    }
359
360    /**
361     * Remove as much as possible metadata from the provided PDF document object.
362     *
363     * @param document PDFBox PDF document object on which metadata must be removed.
364     * @see "https://gist.github.com/righettod/d7e07443c43d393a39de741a0d920069"
365     * @see "https://pdfbox.apache.org/"
366     * @see "https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox"
367     */
368    public static void clearPDFMetadata(PDDocument document) {
369        if (document != null) {
370            PDDocumentInformation infoEmpty = new PDDocumentInformation();
371            document.setDocumentInformation(infoEmpty);
372            PDMetadata newMetadataEmpty = new PDMetadata(document);
373            document.getDocumentCatalog().setMetadata(newMetadataEmpty);
374        }
375    }
376
377
378    /**
379     * Validate that the URL provided is really a relative URL.
380     *
381     * @param targetUrl URL to validate.
382     * @return True only if the file pass all validations.
383     * @see "https://portswigger.net/web-security/ssrf"
384     * @see "https://stackoverflow.com/q/6785442"
385     */
386    public static boolean isRelativeURL(String targetUrl) {
387        boolean isValid = false;
388        //Reject any URL encoded content and URL starting with a double slash
389        //Reject any URL contains credentials or fragment to prevent potential bypasses
390        String work = targetUrl;
391        if (!work.contains("%") && !work.contains("@") && !work.contains("#") && !work.startsWith("//")) {
392            //Creation of a URL object must fail
393            try {
394                new URL(work);
395                isValid = false;
396            } catch (MalformedURLException mf) {
397                //Last check to be sure (for prod usage compile the pattern one time)
398                isValid = Pattern.compile("^/[a-z0-9]+", Pattern.CASE_INSENSITIVE).matcher(work).find();
399            }
400        }
401        return isValid;
402    }
403
404    /**
405     * Apply a collection of validations on a ZIP file provided:
406     * <ul>
407     * <li>Real ZIP file.</li>
408     * <li>Contain less than a specified level of deepness.</li>
409     * <li>Do not contain Zip-Slip entry path.</li>
410     * </ul>
411     *
412     * @param zipFilePath       Filename of the ZIP file to check.
413     * @param maxLevelDeepness  Threshold of deepness above which a ZIP archive will be rejected.
414     * @param rejectArchiveFile Flag to specify if presence of any archive entry will cause the rejection of the ZIP file.
415     * @return True only if the file pass all validations.
416     * @see "https://rules.sonarsource.com/java/type/Security%20Hotspot/RSPEC-5042"
417     * @see "https://security.snyk.io/research/zip-slip-vulnerability"
418     * @see "https://en.wikipedia.org/wiki/Zip_bomb"
419     * @see "https://github.com/ptoomey3/evilarc"
420     * @see "https://github.com/abdulfatir/ZipBomb"
421     * @see "https://www.baeldung.com/cs/zip-bomb"
422     * @see "https://thesecurityvault.com/attacks-with-zip-files-and-mitigations/"
423     * @see "https://wiki.sei.cmu.edu/confluence/display/java/IDS04-J.+Safely+extract+files+from+ZipInputStream"
424     */
425    public static boolean isZIPSafe(String zipFilePath, int maxLevelDeepness, boolean rejectArchiveFile) {
426        List<String> archiveExtensions = Arrays.asList("zip", "tar", "7z", "gz", "jar", "phar", "bz2", "tgz");
427        boolean isSafe = false;
428        try {
429            File zipFile = new File(zipFilePath);
430            if (zipFile.exists() && zipFile.canRead() && zipFile.isFile() && maxLevelDeepness > 0) {
431                //Step 1: Try to load the file, if its fail then it imply that is not a valid ZIP file
432                try (ZipFile zipArch = new ZipFile(zipFile)) {
433                    //Step 2: Parse entries
434                    long deepness = 0;
435                    ZipEntry zipEntry;
436                    String entryExtension;
437                    String zipEntryName;
438                    boolean validationsFailed = false;
439                    Enumeration<? extends ZipEntry> entries = zipArch.entries();
440                    while (entries.hasMoreElements()) {
441                        zipEntry = entries.nextElement();
442                        zipEntryName = zipEntry.getName();
443                        entryExtension = zipEntryName.substring(zipEntryName.lastIndexOf(".") + 1).toLowerCase(Locale.ROOT).trim();
444                        //Step 2a: Check if the current entry is an archive file
445                        if (rejectArchiveFile && archiveExtensions.contains(entryExtension)) {
446                            validationsFailed = true;
447                            break;
448                        }
449                        //Step 2b: Check that level of deepness is inferior to the threshold specified
450                        if (zipEntryName.contains("/")) {
451                            //Determine deepness by inspecting the entry name.
452                            //Indeed, folder will be represented like this: folder/folder/folder/
453                            //So we can count the number of "/" to identify the deepness of the entry
454                            deepness = zipEntryName.chars().filter(ch -> ch == '/').count();
455                            if (deepness > maxLevelDeepness) {
456                                validationsFailed = true;
457                                break;
458                            }
459                        }
460                        //Step 2c: Check if any entries match pattern of zip slip payload
461                        if (zipEntryName.contains("..\\") || zipEntryName.contains("../")) {
462                            validationsFailed = true;
463                            break;
464                        }
465                    }
466                    if (!validationsFailed) {
467                        isSafe = true;
468                    }
469                }
470            }
471        } catch (Exception e) {
472            isSafe = false;
473        }
474        return isSafe;
475    }
476
477    /**
478     * Identify the mime type of the content specified (array of bytes).<br>
479     * Note that it cannot be fully trusted (see the tweet '1595824709186519041' referenced), so, additional validations are required.
480     *
481     * @param content The content as an array of bytes.
482     * @return The mime type in lower case or null if it cannot be identified.
483     * @see "https://twitter.com/righettod/status/1595824709186519041"
484     * @see "https://tika.apache.org/"
485     * @see "https://mvnrepository.com/artifact/org.apache.tika/tika-core"
486     * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types"
487     * @see "https://www.iana.org/assignments/media-types/media-types.xhtml"
488     */
489    public static String identifyMimeType(byte[] content) {
490        String mimeType = null;
491        if (content != null && content.length > 0) {
492            Detector detector = new DefaultDetector(MimeTypes.getDefaultMimeTypes());
493            Metadata metadata = new Metadata();
494            try {
495                try (TemporaryResources temporaryResources = new TemporaryResources(); TikaInputStream tikaInputStream = TikaInputStream.get(new ByteArrayInputStream(content), temporaryResources, metadata)) {
496                    MediaType mt = detector.detect(tikaInputStream, metadata);
497                    if (mt != null) {
498                        mimeType = mt.toString().toLowerCase(Locale.ROOT);
499                    }
500                }
501            } catch (IOException ioe) {
502                mimeType = null;
503            }
504        }
505        return mimeType;
506    }
507
508    /**
509     * Apply a collection of validations on a string expected to be an public IP address:
510     * <ul>
511     * <li>Is a valid IP v4 or v6 address.</li>
512     * <li>Is public from an Internet perspective.</li>
513     * </ul>
514     * <br>
515     * <b>Note:</b> I often see missing such validation in the value read from HTTP request headers like "X-Forwarded-For" or "Forwarded".
516     * <br><br>
517     * <b>Note for IPv6:</b> I used documentation found so it is really experimental!
518     *
519     * @param ip String expected to be a valid IP address.
520     * @return True only if the string pass all validations.
521     * @see "https://commons.apache.org/proper/commons-validator/"
522     * @see "https://commons.apache.org/proper/commons-validator/apidocs/org/apache/commons/validator/routines/InetAddressValidator.html"
523     * @see "https://cheatsheetseries.owasp.org/cheatsheets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet.html"
524     * @see "https://cheatsheetseries.owasp.org/assets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet_Orange_Tsai_Talk.pdf"
525     * @see "https://cheatsheetseries.owasp.org/assets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet_SSRF_Bible.pdf"
526     * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For"
527     * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Forwarded"
528     * @see "https://ipcisco.com/lesson/ipv6-address/"
529     * @see "https://www.juniper.net/documentation/us/en/software/junos/interfaces-security-devices/topics/topic-map/security-interface-ipv4-ipv6-protocol.html"
530     * @see "https://docs.oracle.com/en/java/javase/21/docs/api/java.base/java/net/InetAddress.html#getByName(java.lang.String)"
531     * @see "https://www.arin.net/reference/research/statistics/address_filters/"
532     * @see "https://en.wikipedia.org/wiki/Multicast_address"
533     * @see "https://stackoverflow.com/a/5619409"
534     * @see "https://www.ripe.net/media/documents/ipv6-address-types.pdf"
535     * @see "https://www.iana.org/assignments/ipv6-unicast-address-assignments/ipv6-unicast-address-assignments.xhtml"
536     * @see "https://developer.android.com/reference/java/net/Inet6Address"
537     * @see "https://en.wikipedia.org/wiki/Unique_local_address"
538     */
539    public static boolean isPublicIPAddress(String ip) {
540        boolean isValid = false;
541        try {
542            //Quick validation on the string itself based on characters used to compose an IP v4/v6 address
543            if (Pattern.matches("[0-9a-fA-F:.]+", ip)) {
544                //If OK then use the dedicated InetAddressValidator from Apache Commons Validator
545                if (InetAddressValidator.getInstance().isValid(ip)) {
546                    //If OK then validate that is an public IP address
547                    //From Javadoc for "InetAddress.getByName": If a literal IP address is supplied, only the validity of the address format is checked.
548                    InetAddress addr = InetAddress.getByName(ip);
549                    isValid = (!addr.isAnyLocalAddress() && !addr.isLinkLocalAddress() && !addr.isLoopbackAddress() && !addr.isMulticastAddress() && !addr.isSiteLocalAddress());
550                    //If OK and the IP is an V6 one then make additional validation because the built-in Java API will let pass some V6 IP
551                    //For the prefix map, the start of the key indicates if the value is a regex or a string
552                    if (isValid && (addr instanceof Inet6Address)) {
553                        Map<String, String> prefixes = new HashMap<>();
554                        prefixes.put("REGEX_LOOPBACK", "^(0|:)+1$");
555                        prefixes.put("REGEX_UNIQUE-LOCAL-ADDRESSES", "^f(c|d)[a-f0-9]{2}:.*$");
556                        prefixes.put("STRING_LINK-LOCAL-ADDRESSES", "fe80:");
557                        prefixes.put("REGEX_TEREDO", "^2001:[0]*:.*$");
558                        prefixes.put("REGEX_BENCHMARKING", "^2001:[0]*2:.*$");
559                        prefixes.put("REGEX_ORCHID", "^2001:[0]*10:.*$");
560                        prefixes.put("STRING_DOCUMENTATION", "2001:db8:");
561                        prefixes.put("STRING_GLOBAL-UNICAST", "2000:");
562                        prefixes.put("REGEX_MULTICAST", "^ff[0-9]{2}:.*$");
563                        final List<Boolean> results = new ArrayList<>();
564                        final String ipLower = ip.trim().toLowerCase(Locale.ROOT);
565                        prefixes.forEach((addressType, expr) -> {
566                            String exprLower = expr.trim().toLowerCase();
567                            if (addressType.startsWith("STRING_")) {
568                                results.add(ipLower.startsWith(exprLower));
569                            } else {
570                                results.add(Pattern.matches(exprLower, ipLower));
571                            }
572                        });
573                        isValid = ((results.size() == prefixes.size()) && !results.contains(Boolean.TRUE));
574                    }
575                }
576            }
577        } catch (Exception e) {
578            isValid = false;
579        }
580        return isValid;
581    }
582
583    /**
584     * Compute a SHA256 hash from an input composed of a collection of strings.<br><br>
585     * This method take care to build the source string in a way to prevent this source string to be prone to abuse targeting the different parts composing it.<br><br>
586     * <p>
587     * Example of possible abuse without precautions applied during the hash calculation logic:<br>
588     * Hash of <code>SHA256("Hello", "My", "World!!!")</code> will be equals to the hash of <code>SHA256("Hell", "oMyW", "orld!!!")</code>.<br>
589     * </p>
590     * This method ensure that both hash above will be different.<br><br>
591     *
592     * <b>Note:</b> The character <code>|</code> is used, as separator, of every parts so a part is not allowed to contains this character.
593     *
594     * @param parts Ordered list of strings to use to build the input string for which the hash must be computed on. No null value is accepted on object composing the collection.
595     * @return The hash, as an array of bytes, to allow caller to convert it to the final representation wanted (HEX, Base64, etc.). If the collection passed is null or empty then the method return null.
596     * @throws Exception If any exception occurs
597     * @see "https://github.com/righettod/code-snippets-security-utils/issues/16"
598     * @see "https://pentesterlab.com/badges/codereview"
599     * @see "https://blog.trailofbits.com/2024/08/21/yolo-is-not-a-valid-hash-construction/"
600     * @see "https://www.nist.gov/publications/sha-3-derived-functions-cshake-kmac-tuplehash-and-parallelhash"
601     */
602    public static byte[] computeHashNoProneToAbuseOnParts(List<String> parts) throws Exception {
603        byte[] hash = null;
604        String separator = "|";
605        if (parts != null && !parts.isEmpty()) {
606            //Ensure that not part is null
607            if (parts.stream().anyMatch(Objects::isNull)) {
608                throw new IllegalArgumentException("No part must be null!");
609            }
610            //Ensure that the separator is absent from every part
611            if (parts.stream().anyMatch(part -> part.contains(separator))) {
612                throw new IllegalArgumentException(String.format("The character '%s', used as parts separator, must be absent from every parts!", separator));
613            }
614            MessageDigest digest = MessageDigest.getInstance("SHA-256");
615            final StringBuilder buffer = new StringBuilder(separator);
616            parts.forEach(p -> {
617                buffer.append(p).append(separator);
618            });
619            hash = digest.digest(buffer.toString().getBytes(StandardCharsets.UTF_8));
620        }
621        return hash;
622    }
623
624    /**
625     * Ensure that an XML file only uses DTD/XSD references (called System Identifier) present in the allowed list provided.<br><br>
626     * The code is based on the validation implemented into the OpenJDK 21, by the class <b><a href="https://github.com/openjdk/jdk/blob/jdk-21%2B35/src/java.prefs/share/classes/java/util/prefs/XmlSupport.java">java.util.prefs.XmlSupport</a></b>, in the method <b><a href="https://github.com/openjdk/jdk/blob/jdk-21%2B35/src/java.prefs/share/classes/java/util/prefs/XmlSupport.java#L240">loadPrefsDoc()</a></b>.<br><br>
627     * The method also ensure that no Public Identifier is used to prevent potential bypasses of the validations.
628     *
629     * @param xmlFilePath              Filename of the XML file to check.
630     * @param allowedSystemIdentifiers List of URL allowed for System Identifier specified for any XSD/DTD references.
631     * @return True only if the file pass all validations.
632     * @see "https://www.w3schools.com/xml/prop_documenttype_systemid.asp"
633     * @see "https://www.ibm.com/docs/en/integration-bus/9.0.0?topic=doctypedecl-xml-systemid"
634     * @see "https://www.liquid-technologies.com/Reference/Glossary/XML_DocType.html"
635     * @see "https://www.xml.com/pub/98/08/xmlqna0.html"
636     * @see "https://github.com/openjdk/jdk/blob/jdk-21%2B35/src/java.prefs/share/classes/java/util/prefs/XmlSupport.java#L397"
637     * @see "https://en.wikipedia.org/wiki/Formal_Public_Identifier"
638     */
639    public static boolean isXMLOnlyUseAllowedXSDorDTD(String xmlFilePath, final List<String> allowedSystemIdentifiers) {
640        boolean isSafe = false;
641        final String errorTemplate = "Non allowed %s ID detected!";
642        final String emptyFakeDTD = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><!ELEMENT dummy EMPTY>";
643        final String emptyFakeXSD = "<xs:schema xmlns:xs=\"http://www.w3.org/2001/XMLSchema\"> <xs:element name=\"dummy\"/></xs:schema>";
644
645        if (allowedSystemIdentifiers == null || allowedSystemIdentifiers.isEmpty()) {
646            throw new IllegalArgumentException("At least one SID must be specified!");
647        }
648        File xmlFile = new File(xmlFilePath);
649        if (xmlFile.exists() && xmlFile.canRead() && xmlFile.isFile()) {
650            try {
651                EntityResolver resolverValidator = (publicId, systemId) -> {
652                    if (publicId != null) {
653                        throw new SAXException(String.format(errorTemplate, "PUBLIC"));
654                    }
655                    if (!allowedSystemIdentifiers.contains(systemId)) {
656                        throw new SAXException(String.format(errorTemplate, "SYSTEM"));
657                    }
658                    //If it is OK then return a empty DTD/XSD
659                    return new InputSource(new StringReader(systemId.toLowerCase().endsWith(".dtd") ? emptyFakeDTD : emptyFakeXSD));
660                };
661                DocumentBuilderFactory dbfInstance = DocumentBuilderFactory.newInstance();
662                dbfInstance.setIgnoringElementContentWhitespace(true);
663                dbfInstance.setXIncludeAware(false);
664                dbfInstance.setValidating(false);
665                dbfInstance.setCoalescing(true);
666                dbfInstance.setIgnoringComments(false);
667                DocumentBuilder builder = dbfInstance.newDocumentBuilder();
668                builder.setEntityResolver(resolverValidator);
669                Document doc = builder.parse(xmlFile);
670                isSafe = (doc != null);
671            } catch (SAXException | IOException | ParserConfigurationException e) {
672                isSafe = false;
673            }
674        }
675
676        return isSafe;
677    }
678
679    /**
680     * Apply a collection of validations on a EXCEL CSV file provided (file was expected to be opened in Microsoft EXCEL):
681     * <ul>
682     * <li>Real CSV file.</li>
683     * <li>Do not contains any payload related to a CSV injections.</li>
684     * </ul>
685     * Ensure that, if Apache Commons CSV does not find any record then, the file will be considered as NOT safe (prevent potential bypasses).<br><br>
686     * <b>Note:</b> Record delimiter used is the <code>,</code> (comma) character. See the Apache Commons CSV reference provided for EXCEL.<br>
687     *
688     * @param csvFilePath Filename of the CSV file to check.
689     * @return True only if the file pass all validations.
690     * @see "https://commons.apache.org/proper/commons-csv/"
691     * @see "https://commons.apache.org/proper/commons-csv/apidocs/org/apache/commons/csv/CSVFormat.html#EXCEL"
692     * @see "https://www.we45.com/post/your-excel-sheets-are-not-safe-heres-how-to-beat-csv-injection"
693     * @see "https://www.whiteoaksecurity.com/blog/2020-4-23-csv-injection-whats-the-risk/"
694     * @see "https://book.hacktricks.xyz/pentesting-web/formula-csv-doc-latex-ghostscript-injection"
695     * @see "https://owasp.org/www-community/attacks/CSV_Injection"
696     * @see "https://payatu.com/blog/csv-injection-basic-to-exploit/"
697     * @see "https://cwe.mitre.org/data/definitions/1236.html"
698     */
699    public static boolean isExcelCSVSafe(String csvFilePath) {
700        boolean isSafe;
701        final AtomicInteger recordCount = new AtomicInteger();
702        final List<Character> payloadDetectionCharacters = List.of('=', '+', '@', '-', '\r', '\t');
703
704        try {
705            final List<String> payloadsIdentified = new ArrayList<>();
706            try (Reader in = new FileReader(csvFilePath)) {
707                Iterable<CSVRecord> records = CSVFormat.EXCEL.parse(in);
708                records.forEach(record -> {
709                    record.forEach(recordValue -> {
710                        if (recordValue != null && !recordValue.trim().isEmpty() && payloadDetectionCharacters.contains(recordValue.trim().charAt(0))) {
711                            payloadsIdentified.add(recordValue);
712                        }
713                        recordCount.getAndIncrement();
714                    });
715                });
716            }
717            isSafe = (payloadsIdentified.isEmpty() && recordCount.get() > 0);
718        } catch (Exception e) {
719            isSafe = false;
720        }
721
722        return isSafe;
723    }
724
725    /**
726     * Provide a way to add an integrity marker (<a href="https://en.wikipedia.org/wiki/HMAC">HMAC</a>) to a serialized object serialized using the <a href="https://www.baeldung.com/java-serialization">java native system</a> (binary).<br>
727     * The goal is to provide <b>a temporary workaround</b> to try to prevent deserialization attacks and give time to move to a text-based serialization approach.
728     *
729     * @param processingMode Define the mode of processing i.e. protect or validate. ({@link eu.righettod.ProcessingMode})
730     * @param input          When the processing mode is "protect" than the expected input (string) is a java serialized object encoded in Base64 otherwise (processing mode is "validate") expected input is the output of this method when the "protect" mode was used.
731     * @param secret         Secret to use to compute the SHA256 HMAC.
732     * @return A map with the following keys: <ul><li><b>PROCESSING_MODE</b>: Processing mode used to compute the result.</li><li><b>STATUS</b>: A boolean indicating if the processing was successful or not.</li><li><b>RESULT</b>: Always contains a string representing the protected serialized object in the format <code>[SERIALIZED_OBJECT_BASE64_ENCODED]:[SERIALIZED_OBJECT_HMAC_BASE64_ENCODED]</code>.</li></ul>
733     * @throws Exception If any exception occurs.
734     * @see "https://cheatsheetseries.owasp.org/cheatsheets/Deserialization_Cheat_Sheet.html"
735     * @see "https://owasp.org/www-project-top-ten/2017/A8_2017-Insecure_Deserialization"
736     * @see "https://portswigger.net/web-security/deserialization"
737     * @see "https://www.baeldung.com/java-serialization-approaches"
738     * @see "https://www.baeldung.com/java-serialization"
739     * @see "https://cryptobook.nakov.com/mac-and-key-derivation/hmac-and-key-derivation"
740     * @see "https://en.wikipedia.org/wiki/HMAC"
741     * @see "https://smattme.com/posts/how-to-generate-hmac-signature-in-java/"
742     */
743    public static Map<String, Object> ensureSerializedObjectIntegrity(ProcessingMode processingMode, String input, byte[] secret) throws Exception {
744        Map<String, Object> results;
745        String resultFormatTemplate = "%s:%s";
746        //Verify input provided to be consistent
747        if (processingMode == null) {
748            throw new IllegalArgumentException("The processing mode is mandatory!");
749        }
750        if (input == null || input.trim().isEmpty()) {
751            throw new IllegalArgumentException("Input data is mandatory!");
752        }
753        if (secret == null || secret.length == 0) {
754            throw new IllegalArgumentException("The HMAC secret is mandatory!");
755        }
756        if (processingMode.equals(ProcessingMode.VALIDATE) && input.split(":").length != 2) {
757            throw new IllegalArgumentException("Input data provided is invalid for the processing mode specified!");
758        }
759        //Processing
760        Base64.Decoder b64Decoder = Base64.getDecoder();
761        Base64.Encoder b64Encoder = Base64.getEncoder();
762        String hmacAlgorithm = "HmacSHA256";
763        Mac mac = Mac.getInstance(hmacAlgorithm);
764        SecretKeySpec key = new SecretKeySpec(secret, hmacAlgorithm);
765        mac.init(key);
766        results = new HashMap<>();
767        results.put("PROCESSING_MODE", processingMode.toString());
768        switch (processingMode) {
769            case PROTECT -> {
770                byte[] objectBytes = b64Decoder.decode(input);
771                byte[] hmac = mac.doFinal(objectBytes);
772                String encodedHmac = b64Encoder.encodeToString(hmac);
773                results.put("STATUS", Boolean.TRUE);
774                results.put("RESULT", String.format(resultFormatTemplate, input, encodedHmac));
775            }
776            case VALIDATE -> {
777                String[] parts = input.split(":");
778                byte[] objectBytes = b64Decoder.decode(parts[0].trim());
779                byte[] hmacProvided = b64Decoder.decode(parts[1].trim());
780                byte[] hmacComputed = mac.doFinal(objectBytes);
781                String encodedHmacComputed = b64Encoder.encodeToString(hmacComputed);
782                Boolean hmacIsValid = Arrays.equals(hmacProvided, hmacComputed);
783                results.put("STATUS", hmacIsValid);
784                results.put("RESULT", String.format(resultFormatTemplate, parts[0].trim(), encodedHmacComputed));
785            }
786            default -> throw new IllegalArgumentException("Not supported processing mode!");
787        }
788        return results;
789    }
790
791    /**
792     * Apply a collection of validations on a JSON string provided:
793     * <ul>
794     * <li>Real JSON structure.</li>
795     * <li>Contain less than a specified number of deepness for nested objects or arrays.</li>
796     * <li>Contain less than a specified number of items in any arrays.</li>
797     * </ul>
798     * <br>
799     * <b>Note:</b> I decided to use a parsing approach using only string processing to prevent any StackOverFlow or OutOfMemory error that can be abused.<br><br>
800     * I used the following assumption:
801     * <ul>
802     *      <li>The character <code>{</code> identify the beginning of an object.</li>
803     *      <li>The character <code>}</code> identify the end of an object.</li>
804     *      <li>The character <code>[</code> identify the beginning of an array.</li>
805     *      <li>The character <code>]</code> identify the end of an array.</li>
806     *      <li>The character <code>"</code> identify the delimiter of a string.</li>
807     *      <li>The character sequence <code>\"</code> identify the escaping of an double quote.</li>
808     * </ul>
809     *
810     * @param json                  String containing the JSON data to validate.
811     * @param maxItemsByArraysCount Maximum number of items allowed in an array.
812     * @param maxDeepnessAllowed    Maximum number nested objects or arrays allowed.
813     * @return True only if the string pass all validations.
814     * @see "https://javaee.github.io/jsonp/"
815     * @see "https://community.f5.com/discussions/technicalforum/disable-buffer-overflow-in-json-parameters/124306"
816     * @see "https://github.com/InductiveComputerScience/pbJson/issues/2"
817     */
818    public static boolean isJSONSafe(String json, int maxItemsByArraysCount, int maxDeepnessAllowed) {
819        boolean isSafe = false;
820
821        try {
822            //Step 1: Analyse the JSON string
823            int currentDeepness = 0;
824            int currentArrayItemsCount = 0;
825            int maxDeepnessReached = 0;
826            int maxArrayItemsCountReached = 0;
827            boolean currentlyInArray = false;
828            boolean currentlyInString = false;
829            int currentNestedArrayLevel = 0;
830            String jsonEscapedDoubleQuote = "\\\"";//Escaped double quote must not be considered as a string delimiter
831            String work = json.replace(jsonEscapedDoubleQuote, "'");
832            for (char c : work.toCharArray()) {
833                switch (c) {
834                    case '{': {
835                        if (!currentlyInString) {
836                            currentDeepness++;
837                        }
838                        break;
839                    }
840                    case '}': {
841                        if (!currentlyInString) {
842                            currentDeepness--;
843                        }
844                        break;
845                    }
846                    case '[': {
847                        if (!currentlyInString) {
848                            currentDeepness++;
849                            if (currentlyInArray) {
850                                currentNestedArrayLevel++;
851                            }
852                            currentlyInArray = true;
853                        }
854                        break;
855                    }
856                    case ']': {
857                        if (!currentlyInString) {
858                            currentDeepness--;
859                            currentArrayItemsCount = 0;
860                            if (currentNestedArrayLevel > 0) {
861                                currentNestedArrayLevel--;
862                            }
863                            if (currentNestedArrayLevel == 0) {
864                                currentlyInArray = false;
865                            }
866                        }
867                        break;
868                    }
869                    case '"': {
870                        currentlyInString = !currentlyInString;
871                        break;
872                    }
873                    case ',': {
874                        if (!currentlyInString && currentlyInArray) {
875                            currentArrayItemsCount++;
876                        }
877                        break;
878                    }
879                }
880                if (currentDeepness > maxDeepnessReached) {
881                    maxDeepnessReached = currentDeepness;
882                }
883                if (currentArrayItemsCount > maxArrayItemsCountReached) {
884                    maxArrayItemsCountReached = currentArrayItemsCount;
885                }
886            }
887            //Step 2: Apply validation against the value specified as limits
888            isSafe = ((maxItemsByArraysCount > maxArrayItemsCountReached) && (maxDeepnessAllowed > maxDeepnessReached));
889
890            //Step 3: If the content is safe then ensure that it is valid JSON structure using the "Java API for JSON Processing" (JSR 374) parser reference implementation.
891            if (isSafe) {
892                JsonReader reader = Json.createReader(new StringReader(json));
893                isSafe = (reader.read() != null);
894            }
895
896        } catch (Exception e) {
897            isSafe = false;
898        }
899        return isSafe;
900    }
901
902    /**
903     * Apply a collection of validations on a image file provided:
904     * <ul>
905     * <li>Real image file.</li>
906     * <li>Its mime type is into the list of allowed mime types.</li>
907     * <li>Its metadata fields do not contains any characters related to a malicious payloads.</li>
908     * </ul>
909     * <br>
910     * <b>Important note:</b> This implementation is prone to bypass using the "<b>raw insertion</b>" method documented in the <a href="https://www.synacktiv.com/en/publications/persistent-php-payloads-in-pngs-how-to-inject-php-code-in-an-image-and-keep-it-there">blog post</a> from the Synacktiv team.
911     * To handle such case, it is recommended to resize the image to remove any non image-related content, see <a href="https://github.com/righettod/document-upload-protection/blob/master/src/main/java/eu/righettod/poc/sanitizer/ImageDocumentSanitizerImpl.java#L54">here</a> for an example.<br>
912     *
913     * @param imageFilePath         Filename of the image file to check.
914     * @param imageAllowedMimeTypes List of image mime types allowed.
915     * @return True only if the file pass all validations.
916     * @see "https://commons.apache.org/proper/commons-imaging/"
917     * @see "https://commons.apache.org/proper/commons-imaging/formatsupport.html"
918     * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types/Common_types"
919     * @see "https://www.iana.org/assignments/media-types/media-types.xhtml#image"
920     * @see "https://www.synacktiv.com/en/publications/persistent-php-payloads-in-pngs-how-to-inject-php-code-in-an-image-and-keep-it-there"
921     * @see "https://cheatsheetseries.owasp.org/cheatsheets/File_Upload_Cheat_Sheet.html"
922     * @see "https://github.com/righettod/document-upload-protection/blob/master/src/main/java/eu/righettod/poc/sanitizer/ImageDocumentSanitizerImpl.java"
923     * @see "https://exiftool.org/examples.html"
924     * @see "https://en.wikipedia.org/wiki/List_of_file_signatures"
925     * @see "https://hexed.it/"
926     * @see "https://github.com/sighook/pixload"
927     */
928    public static boolean isImageSafe(String imageFilePath, List<String> imageAllowedMimeTypes) {
929        boolean isSafe = false;
930        Pattern payloadDetectionRegex = Pattern.compile("[<>${}`]+", Pattern.CASE_INSENSITIVE);
931        try {
932            File imgFile = new File(imageFilePath);
933            if (imgFile.exists() && imgFile.canRead() && imgFile.isFile() && !imageAllowedMimeTypes.isEmpty()) {
934                final byte[] imgBytes = Files.readAllBytes(imgFile.toPath());
935                //Step 1: Check the mime type of the file against the allowed ones
936                ImageInfo imgInfo = Imaging.getImageInfo(imgBytes);
937                if (imageAllowedMimeTypes.contains(imgInfo.getMimeType())) {
938                    //Step 2: Load the image into an object using the Image API
939                    BufferedImage imgObject = Imaging.getBufferedImage(imgBytes);
940                    if (imgObject != null && imgObject.getWidth() > 0 && imgObject.getHeight() > 0) {
941                        //Step 3: Check the metadata if the image format support it - Highly experimental
942                        List<String> metadataWithPayloads = new ArrayList<>();
943                        final ImageMetadata imgMetadata = Imaging.getMetadata(imgBytes);
944                        if (imgMetadata != null) {
945                            imgMetadata.getItems().forEach(item -> {
946                                String metadata = item.toString();
947                                if (payloadDetectionRegex.matcher(metadata).find()) {
948                                    metadataWithPayloads.add(metadata);
949                                }
950                            });
951                        }
952                        isSafe = metadataWithPayloads.isEmpty();
953                    }
954                }
955            }
956        } catch (Exception e) {
957            isSafe = false;
958        }
959        return isSafe;
960    }
961
962    /**
963     * Rewrite the input file to remove any embedded files that is not embedded using a methods supported by the official format of the file.<br>
964     * Example: a file can be embedded by adding it to the end of the source file, see the reference provided for details.
965     *
966     * @param inputFilePath Filename of the file to clean up.
967     * @param inputFileType Type of the file provided.
968     * @return A array of bytes with the cleaned file.
969     * @throws IllegalArgumentException If an invalid parameter is passed
970     * @throws Exception                If any technical error during the cleaning processing
971     * @see "https://www.synacktiv.com/en/publications/persistent-php-payloads-in-pngs-how-to-inject-php-code-in-an-image-and-keep-it-there"
972     * @see "https://github.com/righettod/toolbox-pentest-web/tree/master/misc"
973     * @see "https://github.com/righettod/toolbox-pentest-web?tab=readme-ov-file#misc"
974     * @see "https://stackoverflow.com/a/13605411"
975     */
976    public static byte[] sanitizeFile(String inputFilePath, InputFileType inputFileType) throws Exception {
977        ByteArrayOutputStream sanitizedContent = new ByteArrayOutputStream();
978        File inputFile = new File(inputFilePath);
979        if (!inputFile.exists() || !inputFile.canRead() || !inputFile.isFile()) {
980            throw new IllegalArgumentException("Cannot read the content of the input file!");
981        }
982        switch (inputFileType) {
983            case PDF -> {
984                try (PDDocument document = Loader.loadPDF(inputFile)) {
985                    document.save(sanitizedContent);
986                }
987            }
988            case IMAGE -> {
989                // Load the original image
990                BufferedImage originalImage = ImageIO.read(inputFile);
991                String originalFormat = identifyMimeType(Files.readAllBytes(inputFile.toPath())).split("/")[1].trim();
992                // Check that image has been successfully loaded
993                if (originalImage == null) {
994                    throw new IOException("Cannot load the original image !");
995                }
996                // Get current Width and Height of the image
997                int originalWidth = originalImage.getWidth(null);
998                int originalHeight = originalImage.getHeight(null);
999                // Resize the image by removing 1px on Width and Height
1000                Image resizedImage = originalImage.getScaledInstance(originalWidth - 1, originalHeight - 1, Image.SCALE_SMOOTH);
1001                // Resize the resized image by adding 1px on Width and Height - In fact set image to is initial size
1002                Image initialSizedImage = resizedImage.getScaledInstance(originalWidth, originalHeight, Image.SCALE_SMOOTH);
1003                // Save image to a bytes buffer
1004                int bufferedImageType = BufferedImage.TYPE_INT_ARGB;//By default use a format supporting transparency
1005                if ("jpeg".equalsIgnoreCase(originalFormat) || "bmp".equalsIgnoreCase(originalFormat)) {
1006                    bufferedImageType = BufferedImage.TYPE_INT_RGB;
1007                }
1008                BufferedImage sanitizedImage = new BufferedImage(initialSizedImage.getWidth(null), initialSizedImage.getHeight(null), bufferedImageType);
1009                Graphics2D drawer = sanitizedImage.createGraphics();
1010                drawer.drawImage(initialSizedImage, 0, 0, null);
1011                drawer.dispose();
1012                ImageIO.write(sanitizedImage, originalFormat, sanitizedContent);
1013            }
1014            default -> throw new IllegalArgumentException("Type of file not supported !");
1015        }
1016        if (sanitizedContent.size() == 0) {
1017            throw new IOException("An error occur during the rewrite operation!");
1018        }
1019        return sanitizedContent.toByteArray();
1020    }
1021
1022    /**
1023     * Apply a collection of validations on a string expected to be an email address:
1024     * <ul>
1025     * <li>Is a valid email address, from a parser perspective, following RFCs on email addresses.</li>
1026     * <li>Is not using "Encoded-word" format.</li>
1027     * <li>Is not using comment format.</li>
1028     * <li>Is not using "Punycode" format.</li>
1029     * <li>Is not using UUCP style addresses.</li>
1030     * <li>Is not using address literals.</li>
1031     * <li>Is not using source routes.</li>
1032     * <li>Is not using the "percent hack".</li>
1033     * </ul><br>
1034     * This is based on the research work from <a href="https://portswigger.net/research/gareth-heyes">Gareth Heyes</a> added in references (Portswigger).<br><br>
1035     *
1036     * <b>Note:</b> The notion of valid, here, is to take from a secure usage of the data perspective.
1037     *
1038     * @param addr String expected to be a valid email address.
1039     * @return True only if the string pass all validations.
1040     * @see "https://commons.apache.org/proper/commons-validator/"
1041     * @see "https://commons.apache.org/proper/commons-validator/apidocs/org/apache/commons/validator/routines/EmailValidator.html"
1042     * @see "https://datatracker.ietf.org/doc/html/rfc2047#section-2"
1043     * @see "https://portswigger.net/research/splitting-the-email-atom"
1044     * @see "https://www.jochentopf.com/email/address.html"
1045     * @see "https://en.wikipedia.org/wiki/Email_address"
1046     */
1047    public static boolean isEmailAddress(String addr) {
1048        boolean isValid = false;
1049        String work = addr.toLowerCase(Locale.ROOT);
1050        Pattern encodedWordRegex = Pattern.compile("[=?]+", Pattern.CASE_INSENSITIVE);
1051        Pattern forbiddenCharacterRegex = Pattern.compile("[():!%\\[\\],;]+", Pattern.CASE_INSENSITIVE);
1052        try {
1053            //Start with the use of the dedicated EmailValidator from Apache Commons Validator
1054            if (EmailValidator.getInstance(true, true).isValid(work)) {
1055                //If OK then validate it does not contains "Encoded-word" patterns using an aggressive approach
1056                if (!encodedWordRegex.matcher(work).find()) {
1057                    //If OK then validate it does not contains punycode
1058                    if (!work.contains("xn--")) {
1059                        //If OK then validate it does not use:
1060                        // UUCP style addresses,
1061                        // Comment format,
1062                        // Address literals,
1063                        // Source routes,
1064                        // The percent hack.
1065                        if (!forbiddenCharacterRegex.matcher(work).find()) {
1066                            isValid = true;
1067                        }
1068                    }
1069                }
1070            }
1071        } catch (Exception e) {
1072            isValid = false;
1073        }
1074        return isValid;
1075    }
1076
1077    /**
1078     * The <a href="https://www.stet.eu/en/psd2/">PSD2 STET</a> specification require to use <a href="https://datatracker.ietf.org/doc/draft-cavage-http-signatures/">HTTP Signature</a>.
1079     * <br>
1080     * Section <b>3.5.1.2</b> of the document <a href="https://www.stet.eu/assets/files/PSD2/1-6-3/api-dsp2-stet-v1.6.3.1-part-1-framework.pdf">Documentation Framework</a> version <b>1.6.3</b>.
1081     * <br>
1082     * The problem is that, by design, the HTTP Signature specification is prone to blind SSRF.
1083     * <br>
1084     * URL example taken from the STET specification: <code>https://path.to/myQsealCertificate_714f8154ec259ac40b8a9786c9908488b2582b68b17e865fede4636d726b709f</code>.
1085     * <br>
1086     * The objective of this code is to try to decrease the "exploitability/interest" of this SSRF for an attacker.
1087     *
1088     * @param certificateUrl Url pointing to a Qualified Certificate (QSealC) encoded in PEM format and respecting the ETSI/TS119495 technical Specification .
1089     * @return TRUE only if the url point to a Qualified Certificate in PEM format.
1090     * @see "https://www.stet.eu/en/psd2/"
1091     * @see "https://www.stet.eu/assets/files/PSD2/1-6-3/api-dsp2-stet-v1.6.3.1-part-1-framework.pdf"
1092     * @see "https://datatracker.ietf.org/doc/draft-cavage-http-signatures/"
1093     * @see "https://datatracker.ietf.org/doc/rfc9421/"
1094     * @see "https://openjdk.org/groups/net/httpclient/intro.html"
1095     * @see "https://docs.oracle.com/en/java/javase/21/docs/api/java.net.http/java/net/http/package-summary.html"
1096     * @see "https://portswigger.net/web-security/ssrf"
1097     * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control"
1098     */
1099    public static boolean isPSD2StetSafeCertificateURL(String certificateUrl) {
1100        boolean isValid = false;
1101        long connectionTimeoutInSeconds = 10;
1102        String userAgent = "PSD2-STET-HTTPSignature-CertificateRequest";
1103        try {
1104            //1. Ensure that the URL end with the SHA-256 fingerprint encoded in HEX of the certificate like requested by STET
1105            if (certificateUrl != null && certificateUrl.lastIndexOf("_") != -1) {
1106                String digestPart = certificateUrl.substring(certificateUrl.lastIndexOf("_") + 1);
1107                if (Pattern.matches("^[0-9a-f]{64}$", digestPart)) {
1108                    //2. Ensure that the URL is a valid url by creating a instance of the class URI
1109                    URI uri = URI.create(certificateUrl);
1110                    //3. Require usage of HTTPS and reject any url containing query parameters
1111                    if ("https".equalsIgnoreCase(uri.getScheme()) && uri.getQuery() == null) {
1112                        //4. Perform a HTTP HEAD request in order to get the content type of the remote resource
1113                        //and limit the interest to use the SSRF because to pass the check the url need to:
1114                        //- Do not having any query parameters.
1115                        //- Use HTTPS protocol.
1116                        //- End with a string having the format "_[0-9a-f]{64}".
1117                        //- Trigger the malicious action that the attacker want but with a HTTP HEAD without any redirect and parameters.
1118                        HttpResponse<String> response;
1119                        try (HttpClient client = HttpClient.newBuilder().followRedirects(HttpClient.Redirect.NEVER).build()) {
1120                            HttpRequest request = HttpRequest.newBuilder().uri(uri).timeout(Duration.ofSeconds(connectionTimeoutInSeconds)).method("HEAD", HttpRequest.BodyPublishers.noBody()).header("User-Agent", userAgent)//To provide an hint to the target about the initiator of the request
1121                                    .header("Cache-Control", "no-store, max-age=0")//To prevent caching issues or abuses
1122                                    .build();
1123                            response = client.send(request, HttpResponse.BodyHandlers.ofString());
1124                            if (response.statusCode() == 200) {
1125                                //5. Ensure that the response content type is "text/plain"
1126                                Optional<String> contentType = response.headers().firstValue("Content-Type");
1127                                isValid = (contentType.isPresent() && contentType.get().trim().toLowerCase(Locale.ENGLISH).startsWith("text/plain"));
1128                            }
1129                        }
1130                    }
1131                }
1132            }
1133        } catch (Exception e) {
1134            isValid = false;
1135        }
1136        return isValid;
1137    }
1138
1139    /**
1140     * Perform sequential URL decoding operations against a URL encoded data until the data is not URL encoded anymore or if the specified threshold is reached.
1141     *
1142     * @param encodedData            URL encoded data.
1143     * @param decodingRoundThreshold Threshold above which decoding will fail.
1144     * @return The decoded data.
1145     * @throws SecurityException If the threshold is reached.
1146     * @see "https://en.wikipedia.org/wiki/Percent-encoding"
1147     * @see "https://owasp.org/www-community/Double_Encoding"
1148     * @see "https://portswigger.net/web-security/essential-skills/obfuscating-attacks-using-encodings"
1149     * @see "https://capec.mitre.org/data/definitions/120.html"
1150     */
1151    public static String applyURLDecoding(String encodedData, int decodingRoundThreshold) throws SecurityException {
1152        if (decodingRoundThreshold < 1) {
1153            throw new IllegalArgumentException("Threshold must be a positive number !");
1154        }
1155        if (encodedData == null) {
1156            throw new IllegalArgumentException("Data provided must not be null !");
1157        }
1158        Charset charset = StandardCharsets.UTF_8;
1159        int currentDecodingRound = 0;
1160        boolean isFinished = false;
1161        String currentRoundData = encodedData;
1162        String previousRoundData = encodedData;
1163        while (!isFinished) {
1164            if (currentDecodingRound > decodingRoundThreshold) {
1165                throw new SecurityException(String.format("Decoding round threshold of %s reached!", decodingRoundThreshold));
1166            }
1167            currentRoundData = URLDecoder.decode(currentRoundData, charset);
1168            isFinished = currentRoundData.equals(previousRoundData);
1169            previousRoundData = currentRoundData;
1170            currentDecodingRound++;
1171        }
1172        return currentRoundData;
1173    }
1174
1175    /**
1176     * Apply a collection of validations on a string expected to be an system file/folder path:
1177     * <ul>
1178     * <li>Does not contains path traversal payload.</li>
1179     * <li>The canonical path is equals to the absolute path.</li>
1180     * </ul><br>
1181     *
1182     * @param path String expected to be a valid system file/folder path.
1183     * @return True only if the string pass all validations.
1184     * @see "https://portswigger.net/web-security/file-path-traversal"
1185     * @see "https://learn.snyk.io/lesson/directory-traversal/"
1186     * @see "https://capec.mitre.org/data/definitions/126.html"
1187     * @see "https://owasp.org/www-community/attacks/Path_Traversal"
1188     */
1189    public static boolean isPathSafe(String path) {
1190        boolean isSafe = false;
1191        int decodingRoundThreshold = 3;
1192        try {
1193            if (path != null && !path.isEmpty()) {
1194                //URL decode the path if case of data coming from a web context
1195                String decodedPath = applyURLDecoding(path, decodingRoundThreshold);
1196                //Ensure that no path traversal expression is present
1197                if (!decodedPath.contains("..")) {
1198                    File f = new File(decodedPath);
1199                    String canonicalPath = f.getCanonicalPath();
1200                    String absolutePath = f.getAbsolutePath();
1201                    isSafe = canonicalPath.equals(absolutePath);
1202                }
1203            }
1204        } catch (Exception e) {
1205            isSafe = false;
1206        }
1207        return isSafe;
1208    }
1209
1210    /**
1211     * Identify if an XML contains any XML comments or have any XSL processing instructions.<br>
1212     * Stream reader based parsing is used to support large XML tree.
1213     *
1214     * @param xmlFilePath Filename of the XML file to check.
1215     * @return True only if XML comments or XSL processing instructions are identified.
1216     * @see "https://www.tutorialspoint.com/xml/xml_processing.htm"
1217     * @see "https://docs.oracle.com/en/java/javase/21/docs/api/java.xml/javax/xml/stream/XMLInputFactory.html"
1218     * @see "https://portswigger.net/kb/issues/00400700_xml-entity-expansion"
1219     * @see "https://www.w3.org/Style/styling-XML.en.html"
1220     */
1221    public static boolean isXMLHaveCommentsOrXSLProcessingInstructions(String xmlFilePath) {
1222        boolean itemsDetected = false;
1223        try {
1224            //Ensure that the parser will not be prone XML external entity (XXE) injection or XML entity expansion (XEE) attacks
1225            XMLInputFactory xmlInputFactory = XMLInputFactory.newFactory();
1226            xmlInputFactory.setProperty(XMLInputFactory.SUPPORT_DTD, false);
1227            xmlInputFactory.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, "");
1228            xmlInputFactory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, false);
1229            xmlInputFactory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false);
1230
1231            //Parse file
1232            try (FileInputStream fis = new FileInputStream(xmlFilePath)) {
1233                XMLStreamReader reader = xmlInputFactory.createXMLStreamReader(fis);
1234                int eventType;
1235                while (reader.hasNext() && !itemsDetected) {
1236                    eventType = reader.next();
1237                    if (eventType == XMLEvent.COMMENT) {
1238                        itemsDetected = true;
1239                    } else if (eventType == XMLEvent.PROCESSING_INSTRUCTION && "xml-stylesheet".equalsIgnoreCase(reader.getPITarget())) {
1240                        itemsDetected = true;
1241                    }
1242                }
1243            }
1244        } catch (Exception e) {
1245            //In case of error then assume that the check failed
1246            itemsDetected = true;
1247        }
1248        return itemsDetected;
1249    }
1250
1251
1252    /**
1253     * Perform a set of additional validations against a JWT token:
1254     * <ul>
1255     *     <li>Do not use the <b>NONE</b> signature algorithm.</li>
1256     *     <li>Have a <a href="https://www.iana.org/assignments/jwt/jwt.xhtml">EXP claim</a> defined.</li>
1257     *     <li>The token identifier (<a href="https://www.iana.org/assignments/jwt/jwt.xhtml">JTI claim</a>) is NOT part of the list of revoked token.</li>
1258     *     <li>Match the expected type of token: ACCESS or ID or REFRESH.</li>
1259     * </ul>
1260     *
1261     * @param token               JWT token for which <b>signature was already validated</b> and on which a set of additional validations will be applied.
1262     * @param expectedTokenType   The type of expected token using the enumeration provided.
1263     * @param revokedTokenJTIList A list of token identifier (<b>JTI</b> claim) referring to tokens that were revoked and to which the JTI claim of the token will be compared to.
1264     * @return True only the token pass all the validations.
1265     * @see "https://www.iana.org/assignments/jwt/jwt.xhtml"
1266     * @see "https://auth0.com/docs/secure/tokens/access-tokens"
1267     * @see "https://auth0.com/docs/secure/tokens/id-tokens"
1268     * @see "https://auth0.com/docs/secure/tokens/refresh-tokens"
1269     * @see "https://auth0.com/blog/id-token-access-token-what-is-the-difference/"
1270     * @see "https://jwt.io/libraries?language=Java"
1271     * @see "https://pentesterlab.com/blog/secure-jwt-library-design"
1272     * @see "https://github.com/auth0/java-jwt"
1273     */
1274    public static boolean applyJWTExtraValidation(DecodedJWT token, TokenType expectedTokenType, List<String> revokedTokenJTIList) {
1275        boolean isValid = false;
1276        TokenType tokenType;
1277        try {
1278            if (!"none".equalsIgnoreCase(token.getAlgorithm().trim())) {
1279                if (!token.getClaim("exp").isMissing() && token.getExpiresAt() != null) {
1280                    String jti = token.getId();
1281                    if (jti != null && !jti.trim().isEmpty()) {
1282                        boolean jtiIsRevoked = revokedTokenJTIList.stream().anyMatch(jti::equalsIgnoreCase);
1283                        if (!jtiIsRevoked) {
1284                            //Determine the token type based on the presence of specifics claims
1285                            if (!token.getClaim("scope").isMissing()) {
1286                                tokenType = TokenType.ACCESS;
1287                            } else if (!token.getClaim("name").isMissing() || !token.getClaim("email").isMissing()) {
1288                                tokenType = TokenType.ID;
1289                            } else {
1290                                tokenType = TokenType.REFRESH;
1291                            }
1292                            isValid = (tokenType.equals(expectedTokenType));
1293                        }
1294                    }
1295                }
1296            }
1297
1298        } catch (Exception e) {
1299            //In case of error then assume that the check failed
1300            isValid = false;
1301        }
1302        return isValid;
1303    }
1304
1305    /**
1306     * Apply a validations on a regular expression to ensure that is not prone to the ReDOS attack.
1307     * <br>If your technology is supported by <a href="https://github.com/doyensec/regexploit">regexploit</a> then <b>use it instead of this method!</b>
1308     * <br>Indeed, the <a href="https://www.doyensec.com/">Doyensec</a> team has made an intensive and amazing work on this topic and created this effective tool.
1309     *
1310     * @param regex                       String expected to be a valid regular expression (regex).
1311     * @param data                        Test data on which the regular expression is executed for the test.
1312     * @param maximumRunningTimeInSeconds Optional parameter to specify a number of seconds above which a regex execution time is considered as not safe (default to 4 seconds when not specified).
1313     * @return True only if the string pass all validations.
1314     * @see "https://github.blog/security/how-to-fix-a-redos/"
1315     * @see "https://learn.snyk.io/lesson/redos"
1316     * @see "https://rules.sonarsource.com/java/RSPEC-2631/"
1317     * @see "https://github.com/doyensec/regexploit"
1318     * @see "https://wiki.owasp.org/images/2/23/OWASP_IL_2009_ReDoS.pdf"
1319     * @see "https://owasp.org/www-community/attacks/Regular_expression_Denial_of_Service_-_ReDoS"
1320     */
1321    public static boolean isRegexSafe(String regex, String data, Optional<Integer> maximumRunningTimeInSeconds) {
1322        Objects.requireNonNull(maximumRunningTimeInSeconds, "Use 'Optional.empty()' to leverage the default value.");
1323        Objects.requireNonNull(data, "A sample data is needed to perform the test.");
1324        Objects.requireNonNull(regex, "A regular expression is needed to perform the test.");
1325        boolean isSafe = false;
1326        int executionTimeout = maximumRunningTimeInSeconds.orElse(4);
1327        ExecutorService executor = Executors.newSingleThreadExecutor();
1328        try {
1329            Callable<Boolean> task = () -> {
1330                Pattern pattern = Pattern.compile(regex);
1331                return pattern.matcher(data).matches();
1332            };
1333            List<Future<Boolean>> tasks = executor.invokeAll(List.of(task), executionTimeout, TimeUnit.SECONDS);
1334            if (!tasks.getFirst().isCancelled()) {
1335                isSafe = true;
1336            }
1337        } catch (Exception e) {
1338            isSafe = false;
1339        } finally {
1340            executor.shutdownNow();
1341        }
1342        return isSafe;
1343    }
1344
1345    /**
1346     * Compute a UUID version 7 without using any external dependency.<br><br>
1347     * <b>Below are my personal point of view and perhaps I'm totally wrong!</b>
1348     * <br><br>
1349     * Why such method?
1350     * <ul>
1351     * <li>Java inferior or equals to 21 does not supports natively the generation of an UUID version 7.</li>
1352     * <li>Import a library just to generate such value is overkill for me.</li>
1353     * <li>Library that I have found, generating such version of an UUID, are not provided by entities commonly used in the java world, such as the SPRING framework provider.</li>
1354     * </ul>
1355     * <br>
1356     * <b>Full credits for this implementation goes to the authors and contributors of the <a href="https://github.com/nalgeon/uuidv7">UUIDv7</a> project.</b>
1357     * <br><br>
1358     * Below are the java libraries that I have found but, for which, I do not trust enough the provider to use them directly:
1359     * <ul>
1360     *     <li><a href="https://github.com/cowtowncoder/java-uuid-generator">java-uuid-generator</a></li>
1361     *     <li><a href="https://github.com/f4b6a3/uuid-creator">uuid-creator</a></li>
1362     * </ul>
1363     *
1364     * @return A UUID object representing the UUID v7.
1365     * @see "https://uuid7.com/"
1366     * @see "https://antonz.org/uuidv7/"
1367     * @see "https://mccue.dev/pages/3-11-25-life-altering-postgresql-patterns"
1368     * @see "https://www.ietf.org/archive/id/draft-peabody-dispatch-new-uuid-format-04.html#name-uuid-version-7"
1369     * @see "https://www.baeldung.com/java-generating-time-based-uuids"
1370     * @see "https://en.wikipedia.org/wiki/Universally_unique_identifier"
1371     * @see "https://buildkite.com/resources/blog/goodbye-integers-hello-uuids/"
1372     */
1373    public static UUID computeUUIDv7() {
1374        SecureRandom secureRandom = new SecureRandom();
1375        // Generate truly random bytes
1376        byte[] value = new byte[16];
1377        secureRandom.nextBytes(value);
1378        // Get current timestamp in milliseconds
1379        ByteBuffer timestamp = ByteBuffer.allocate(Long.BYTES);
1380        timestamp.putLong(System.currentTimeMillis());
1381        // Create the TIMESTAMP part of the UUID
1382        System.arraycopy(timestamp.array(), 2, value, 0, 6);
1383        // Create the VERSION and the VARIANT parts of the UUID
1384        value[6] = (byte) ((value[6] & 0x0F) | 0x70);
1385        value[8] = (byte) ((value[8] & 0x3F) | 0x80);
1386        //Create the HIGH and LOW parts of the UUID
1387        ByteBuffer buf = ByteBuffer.wrap(value);
1388        long high = buf.getLong();
1389        long low = buf.getLong();
1390        //Create and return the UUID object
1391        UUID uuidv7 = new UUID(high, low);
1392        return uuidv7;
1393    }
1394
1395    /**
1396     * Ensure that an XSD file does not contain any include/import/redefine instruction (prevent exposure to SSRF).
1397     *
1398     * @param xsdFilePath Filename of the XSD file to check.
1399     * @return True only if the file pass all validations.
1400     * @see "https://portswigger.net/web-security/ssrf"
1401     * @see "https://www.w3schools.com/Xml/el_import.asp"
1402     * @see "https://www.w3schools.com/xml/el_include.asp"
1403     * @see "https://www.linkedin.com/posts/righettod_appsec-appsecurity-java-activity-7344048434326188053-6Ru9"
1404     * @see "https://docs.oracle.com/en/java/javase/21/docs/api/java.xml/javax/xml/validation/SchemaFactory.html#setProperty(java.lang.String,java.lang.Object)"
1405     */
1406    public static boolean isXSDSafe(String xsdFilePath) {
1407        boolean isSafe = false;
1408        try {
1409            File xsdFile = new File(xsdFilePath);
1410            if (xsdFile.exists() && xsdFile.canRead() && xsdFile.isFile()) {
1411                //Parse the XSD file, if an exception occur then it's imply that the XSD specified is not a valid ones
1412                //Create an schema factory throwing Exception if a external schema is specified
1413                SchemaFactory schemaFactory = SchemaFactory.newDefaultInstance();
1414                schemaFactory.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, "");
1415                schemaFactory.setProperty(XMLConstants.ACCESS_EXTERNAL_SCHEMA, "");
1416                //Parse the schema
1417                Schema schema = schemaFactory.newSchema(xsdFile);
1418                isSafe = (schema != null);
1419            }
1420        } catch (Exception e) {
1421            isSafe = false;
1422        }
1423        return isSafe;
1424    }
1425}