001package eu.righettod;
002
003
004import org.apache.commons.csv.CSVFormat;
005import org.apache.commons.csv.CSVRecord;
006import org.apache.commons.imaging.ImageInfo;
007import org.apache.commons.imaging.Imaging;
008import org.apache.commons.imaging.common.ImageMetadata;
009import org.apache.commons.validator.routines.EmailValidator;
010import org.apache.commons.validator.routines.InetAddressValidator;
011import org.apache.pdfbox.Loader;
012import org.apache.pdfbox.pdmodel.PDDocument;
013import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
014import org.apache.pdfbox.pdmodel.PDDocumentInformation;
015import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary;
016import org.apache.pdfbox.pdmodel.common.PDMetadata;
017import org.apache.pdfbox.pdmodel.interactive.action.*;
018import org.apache.pdfbox.pdmodel.interactive.annotation.AnnotationFilter;
019import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
020import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationLink;
021import org.apache.poi.poifs.filesystem.DirectoryEntry;
022import org.apache.poi.poifs.filesystem.POIFSFileSystem;
023import org.apache.poi.poifs.macros.VBAMacroReader;
024import org.apache.tika.detect.DefaultDetector;
025import org.apache.tika.detect.Detector;
026import org.apache.tika.io.TemporaryResources;
027import org.apache.tika.io.TikaInputStream;
028import org.apache.tika.metadata.Metadata;
029import org.apache.tika.mime.MediaType;
030import org.apache.tika.mime.MimeTypes;
031import org.w3c.dom.Document;
032import org.xml.sax.EntityResolver;
033import org.xml.sax.InputSource;
034import org.xml.sax.SAXException;
035
036import javax.crypto.Mac;
037import javax.crypto.spec.SecretKeySpec;
038import javax.imageio.ImageIO;
039import javax.json.Json;
040import javax.json.JsonReader;
041import javax.xml.XMLConstants;
042import javax.xml.parsers.DocumentBuilder;
043import javax.xml.parsers.DocumentBuilderFactory;
044import javax.xml.parsers.ParserConfigurationException;
045import javax.xml.stream.XMLInputFactory;
046import javax.xml.stream.XMLStreamReader;
047import javax.xml.stream.events.XMLEvent;
048import java.awt.*;
049import java.awt.image.BufferedImage;
050import java.io.*;
051import java.net.*;
052import java.net.http.HttpClient;
053import java.net.http.HttpRequest;
054import java.net.http.HttpResponse;
055import java.nio.charset.Charset;
056import java.nio.charset.StandardCharsets;
057import java.nio.file.Files;
058import java.security.MessageDigest;
059import java.time.Duration;
060import java.util.List;
061import java.util.*;
062import java.util.concurrent.atomic.AtomicInteger;
063import java.util.regex.Pattern;
064import java.util.zip.ZipEntry;
065import java.util.zip.ZipFile;
066
067/**
068 * Provides different utilities methods to apply processing from a security perspective.<br>
069 * These code snippet:
070 * <ul>
071 *     <li>Can be used, as "foundation", to customize the validation to the app context.</li>
072 *     <li>Were implemented in a way to facilitate adding or removal of validations depending on usage context.</li>
073 *     <li>Were centralized on one class to be able to enhance them across time as well as <a href="https://github.com/righettod/code-snippets-security-utils/issues">missing case/bug identification</a>.</li>
074 * </ul>
075 */
076public class SecurityUtils {
077
078    /**
079     * Default constructor: Not needed as the class only provides static methods.
080     */
081    private SecurityUtils() {
082    }
083
084    /**
085     * Apply a collection of validation to verify if a provided PIN code is considered weak (easy to guess) or none.<br>
086     * This method consider that format of the PIN code is [0-9]{6,}<br>
087     * Rule to consider a PIN code as weak:
088     * <ul>
089     * <li>Length is inferior to 6 positions.</li>
090     * <li>Contain only the same number or only a sequence of zero.</li>
091     * <li>Contain sequence of following incremental or decremental numbers.</li>
092     * </ul>
093     *
094     * @param pinCode PIN code to verify.
095     * @return True only if the PIN is considered as weak.
096     */
097    public static boolean isWeakPINCode(String pinCode) {
098        boolean isWeak = true;
099        //Length is inferior to 6 positions
100        //Use "Long.parseLong(pinCode)" to cause a NumberFormatException if the PIN is not a numeric one
101        //and to ensure that the PIN is not only a sequence of zero
102        if (pinCode != null && Long.parseLong(pinCode) > 0 && pinCode.trim().length() > 5) {
103            //Contain only the same number
104            String regex = String.format("^[%s]{%s}$", pinCode.charAt(0), pinCode.length());
105            if (!Pattern.matches(regex, pinCode)) {
106                //Contain sequence of following incremental or decremental numbers
107                char previousChar = 'X';
108                boolean containSequence = false;
109                for (char c : pinCode.toCharArray()) {
110                    if (previousChar != 'X') {
111                        int previousNbr = Integer.parseInt(String.valueOf(previousChar));
112                        int currentNbr = Integer.parseInt(String.valueOf(c));
113                        if (currentNbr == (previousNbr - 1) || currentNbr == (previousNbr + 1)) {
114                            containSequence = true;
115                            break;
116                        }
117                    }
118                    previousChar = c;
119                }
120                if (!containSequence) {
121                    isWeak = false;
122                }
123            }
124        }
125        return isWeak;
126    }
127
128    /**
129     * Apply a collection of validations on a Word 97-2003 (binary format) document file provided:
130     * <ul>
131     * <li>Real Microsoft Word 97-2003 document file.</li>
132     * <li>No VBA Macro.<br></li>
133     * <li>No embedded objects.</li>
134     * </ul>
135     *
136     * @param wordFilePath Filename of the Word document file to check.
137     * @return True only if the file pass all validations.
138     * @see "https://poi.apache.org/components/"
139     * @see "https://poi.apache.org/components/document/"
140     * @see "https://poi.apache.org/components/poifs/how-to.html"
141     * @see "https://poi.apache.org/components/poifs/embeded.html"
142     * @see "https://poi.apache.org/"
143     * @see "https://mvnrepository.com/artifact/org.apache.poi/poi"
144     */
145    public static boolean isWord972003DocumentSafe(String wordFilePath) {
146        boolean isSafe = false;
147        try {
148            File wordFile = new File(wordFilePath);
149            if (wordFile.exists() && wordFile.canRead() && wordFile.isFile()) {
150                //Step 1: Try to load the file, if its fail then it imply that is not a valid Word 97-2003 format file
151                try (POIFSFileSystem fs = new POIFSFileSystem(wordFile)) {
152                    //Step 2: Check if the document contains VBA macros, in our case is not allowed
153                    VBAMacroReader macroReader = new VBAMacroReader(fs);
154                    Map<String, String> macros = macroReader.readMacros();
155                    if (macros == null || macros.isEmpty()) {
156                        //Step 3: Check if the document contains any embedded objects, in our case is not allowed
157                        //From POI documentation:
158                        //Word normally stores embedded files in subdirectories of the ObjectPool directory, itself a subdirectory of the filesystem root.
159                        //Typically, these subdirectories and named starting with an underscore, followed by 10 numbers.
160                        final List<String> embeddedObjectFound = new ArrayList<>();
161                        DirectoryEntry root = fs.getRoot();
162                        if (root.getEntryCount() > 0) {
163                            root.iterator().forEachRemaining(entry -> {
164                                if ("ObjectPool".equalsIgnoreCase(entry.getName()) && entry instanceof DirectoryEntry) {
165                                    DirectoryEntry objPoolDirectory = (DirectoryEntry) entry;
166                                    if (objPoolDirectory.getEntryCount() > 0) {
167                                        objPoolDirectory.iterator().forEachRemaining(objPoolDirectoryEntry -> {
168                                            if (objPoolDirectoryEntry instanceof DirectoryEntry) {
169                                                DirectoryEntry objPoolDirectoryEntrySubDirectoryEntry = (DirectoryEntry) objPoolDirectoryEntry;
170                                                if (objPoolDirectoryEntrySubDirectoryEntry.getEntryCount() > 0) {
171                                                    objPoolDirectoryEntrySubDirectoryEntry.forEach(objPoolDirectoryEntrySubDirectoryEntryEntry -> {
172                                                        if (objPoolDirectoryEntrySubDirectoryEntryEntry.isDocumentEntry()) {
173                                                            embeddedObjectFound.add(objPoolDirectoryEntrySubDirectoryEntryEntry.getName());
174                                                        }
175                                                    });
176                                                }
177                                            }
178                                        });
179                                    }
180                                }
181                            });
182                        }
183                        isSafe = embeddedObjectFound.isEmpty();
184                    }
185                }
186            }
187        } catch (Exception e) {
188            isSafe = false;
189        }
190        return isSafe;
191    }
192
193    /**
194     * Ensure that an XML file does not contain any External Entity, DTD or XInclude instructions.
195     *
196     * @param xmlFilePath Filename of the XML file to check.
197     * @return True only if the file pass all validations.
198     * @see "https://portswigger.net/web-security/xxe"
199     * @see "https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html#java"
200     * @see "https://docs.oracle.com/en/java/javase/13/security/java-api-xml-processing-jaxp-security-guide.html#GUID-82F8C206-F2DF-4204-9544-F96155B1D258"
201     * @see "https://www.w3.org/TR/xinclude-11/"
202     * @see "https://en.wikipedia.org/wiki/XInclude"
203     */
204    public static boolean isXMLSafe(String xmlFilePath) {
205        boolean isSafe = false;
206        try {
207            File xmlFile = new File(xmlFilePath);
208            if (xmlFile.exists() && xmlFile.canRead() && xmlFile.isFile()) {
209                //Step 1a: Verify that the XML file content does not contain any XInclude instructions
210                boolean containXInclude = Files.readAllLines(xmlFile.toPath()).stream().anyMatch(line -> line.toLowerCase(Locale.ROOT).contains(":include "));
211                if (!containXInclude) {
212                    //Step 1b: Parse the XML file, if an exception occur than it's imply that the XML specified is not a valid ones
213                    //Create an XML document builder throwing Exception if a DOCTYPE instruction is present
214                    DocumentBuilderFactory dbfInstance = DocumentBuilderFactory.newInstance();
215                    dbfInstance.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
216                    //Xerces 2 only
217                    //dbfInstance.setFeature("http://xerces.apache.org/xerces2-j/features.html#disallow-doctype-decl",true);
218                    dbfInstance.setXIncludeAware(false);
219                    DocumentBuilder builder = dbfInstance.newDocumentBuilder();
220                    //Parse the document
221                    Document doc = builder.parse(xmlFile);
222                    isSafe = (doc != null && doc.getDocumentElement() != null);
223                }
224            }
225        } catch (Exception e) {
226            isSafe = false;
227        }
228        return isSafe;
229    }
230
231
232    /**
233     * Extract all URL links from a PDF file provided.<br>
234     * This can be used to apply validation on a PDF against contained links.
235     *
236     * @param pdfFilePath pdfFilePath Filename of the PDF file to process.
237     * @return A List of URL objects that is empty if no links is found.
238     * @throws Exception If any error occurs during the processing of the PDF file.
239     * @see "https://www.gushiciku.cn/pl/21KQ"
240     * @see "https://pdfbox.apache.org/"
241     * @see "https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox"
242     */
243    public static List<URL> extractAllPDFLinks(String pdfFilePath) throws Exception {
244        final List<URL> links = new ArrayList<>();
245        File pdfFile = new File(pdfFilePath);
246        try (PDDocument document = Loader.loadPDF(pdfFile)) {
247            PDDocumentCatalog documentCatalog = document.getDocumentCatalog();
248            AnnotationFilter actionURIAnnotationFilter = new AnnotationFilter() {
249                @Override
250                public boolean accept(PDAnnotation annotation) {
251                    boolean keep = false;
252                    if (annotation instanceof PDAnnotationLink) {
253                        keep = (((PDAnnotationLink) annotation).getAction() instanceof PDActionURI);
254                    }
255                    return keep;
256                }
257            };
258            documentCatalog.getPages().forEach(page -> {
259                try {
260                    page.getAnnotations(actionURIAnnotationFilter).forEach(annotation -> {
261                        PDActionURI linkAnnotation = (PDActionURI) ((PDAnnotationLink) annotation).getAction();
262                        try {
263                            URL urlObj = new URL(linkAnnotation.getURI());
264                            if (!links.contains(urlObj)) {
265                                links.add(urlObj);
266                            }
267                        } catch (MalformedURLException e) {
268                            throw new RuntimeException(e);
269                        }
270                    });
271                } catch (Exception e) {
272                    throw new RuntimeException(e);
273                }
274            });
275        }
276        return links;
277    }
278
279    /**
280     * Apply a collection of validations on a PDF file provided:
281     * <ul>
282     * <li>Real PDF file.</li>
283     * <li>No attachments.</li>
284     * <li>No Javascript code.</li>
285     * <li>No links using action of type URI/Launch/RemoteGoTo/ImportData.</li>
286     * </ul>
287     *
288     * @param pdfFilePath Filename of the PDF file to check.
289     * @return True only if the file pass all validations.
290     * @see "https://stackoverflow.com/a/36161267"
291     * @see "https://www.gushiciku.cn/pl/21KQ"
292     * @see "https://github.com/jonaslejon/malicious-pdf"
293     * @see "https://pdfbox.apache.org/"
294     * @see "https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox"
295     */
296    public static boolean isPDFSafe(String pdfFilePath) {
297        boolean isSafe = false;
298        try {
299            File pdfFile = new File(pdfFilePath);
300            if (pdfFile.exists() && pdfFile.canRead() && pdfFile.isFile()) {
301                //Step 1: Try to load the file, if its fail then it imply that is not a valid PDF file
302                try (PDDocument document = Loader.loadPDF(pdfFile)) {
303                    //Step 2: Check if the file contains attached files, in our case is not allowed
304                    PDDocumentCatalog documentCatalog = document.getDocumentCatalog();
305                    PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary(documentCatalog);
306                    if (namesDictionary.getEmbeddedFiles() == null) {
307                        //Step 3: Check if the file contains Javascript code, in our case is not allowed
308                        if (namesDictionary.getJavaScript() == null) {
309                            //Step 4: Check if the file contains links using action of type URI/Launch/RemoteGoTo/ImportData, in our case is not allowed
310                            final List<Integer> notAllowedAnnotationCounterList = new ArrayList<>();
311                            AnnotationFilter notAllowedAnnotationFilter = new AnnotationFilter() {
312                                @Override
313                                public boolean accept(PDAnnotation annotation) {
314                                    boolean keep = false;
315                                    if (annotation instanceof PDAnnotationLink) {
316                                        PDAnnotationLink link = (PDAnnotationLink) annotation;
317                                        PDAction action = link.getAction();
318                                        if ((action instanceof PDActionURI) || (action instanceof PDActionLaunch) || (action instanceof PDActionRemoteGoTo) || (action instanceof PDActionImportData)) {
319                                            keep = true;
320                                        }
321                                    }
322                                    return keep;
323                                }
324                            };
325                            documentCatalog.getPages().forEach(page -> {
326                                try {
327                                    notAllowedAnnotationCounterList.add(page.getAnnotations(notAllowedAnnotationFilter).size());
328                                } catch (IOException e) {
329                                    throw new RuntimeException(e);
330                                }
331                            });
332                            if (notAllowedAnnotationCounterList.stream().reduce(0, Integer::sum) == 0) {
333                                isSafe = true;
334                            }
335                        }
336                    }
337                }
338            }
339        } catch (Exception e) {
340            isSafe = false;
341        }
342        return isSafe;
343    }
344
345    /**
346     * Remove as much as possible metadata from the provided PDF document object.
347     *
348     * @param document PDFBox PDF document object on which metadata must be removed.
349     * @see "https://gist.github.com/righettod/d7e07443c43d393a39de741a0d920069"
350     * @see "https://pdfbox.apache.org/"
351     * @see "https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox"
352     */
353    public static void clearPDFMetadata(PDDocument document) {
354        if (document != null) {
355            PDDocumentInformation infoEmpty = new PDDocumentInformation();
356            document.setDocumentInformation(infoEmpty);
357            PDMetadata newMetadataEmpty = new PDMetadata(document);
358            document.getDocumentCatalog().setMetadata(newMetadataEmpty);
359        }
360    }
361
362
363    /**
364     * Validate that the URL provided is really a relative URL.
365     *
366     * @param targetUrl URL to validate.
367     * @return True only if the file pass all validations.
368     * @see "https://portswigger.net/web-security/ssrf"
369     * @see "https://stackoverflow.com/q/6785442"
370     */
371    public static boolean isRelativeURL(String targetUrl) {
372        boolean isValid = false;
373        //Reject any URL encoded content and URL starting with a double slash
374        //Reject any URL contains credentials or fragment to prevent potential bypasses
375        String work = targetUrl;
376        if (!work.contains("%") && !work.contains("@") && !work.contains("#") && !work.startsWith("//")) {
377            //Creation of a URL object must fail
378            try {
379                new URL(work);
380                isValid = false;
381            } catch (MalformedURLException mf) {
382                //Last check to be sure (for prod usage compile the pattern one time)
383                isValid = Pattern.compile("^/[a-z0-9]+", Pattern.CASE_INSENSITIVE).matcher(work).find();
384            }
385        }
386        return isValid;
387    }
388
389    /**
390     * Apply a collection of validations on a ZIP file provided:
391     * <ul>
392     * <li>Real ZIP file.</li>
393     * <li>Contain less than a specified level of deepness.</li>
394     * <li>Do not contain Zip-Slip entry path.</li>
395     * </ul>
396     *
397     * @param zipFilePath       Filename of the ZIP file to check.
398     * @param maxLevelDeepness  Threshold of deepness above which a ZIP archive will be rejected.
399     * @param rejectArchiveFile Flag to specify if presence of any archive entry will cause the rejection of the ZIP file.
400     * @return True only if the file pass all validations.
401     * @see "https://rules.sonarsource.com/java/type/Security%20Hotspot/RSPEC-5042"
402     * @see "https://security.snyk.io/research/zip-slip-vulnerability"
403     * @see "https://en.wikipedia.org/wiki/Zip_bomb"
404     * @see "https://github.com/ptoomey3/evilarc"
405     * @see "https://github.com/abdulfatir/ZipBomb"
406     * @see "https://www.baeldung.com/cs/zip-bomb"
407     * @see "https://thesecurityvault.com/attacks-with-zip-files-and-mitigations/"
408     * @see "https://wiki.sei.cmu.edu/confluence/display/java/IDS04-J.+Safely+extract+files+from+ZipInputStream"
409     */
410    public static boolean isZIPSafe(String zipFilePath, int maxLevelDeepness, boolean rejectArchiveFile) {
411        List<String> archiveExtensions = Arrays.asList("zip", "tar", "7z", "gz", "jar", "phar", "bz2", "tgz");
412        boolean isSafe = false;
413        try {
414            File zipFile = new File(zipFilePath);
415            if (zipFile.exists() && zipFile.canRead() && zipFile.isFile() && maxLevelDeepness > 0) {
416                //Step 1: Try to load the file, if its fail then it imply that is not a valid ZIP file
417                try (ZipFile zipArch = new ZipFile(zipFile)) {
418                    //Step 2: Parse entries
419                    long deepness = 0;
420                    ZipEntry zipEntry;
421                    String entryExtension;
422                    String zipEntryName;
423                    boolean validationsFailed = false;
424                    Enumeration<? extends ZipEntry> entries = zipArch.entries();
425                    while (entries.hasMoreElements()) {
426                        zipEntry = entries.nextElement();
427                        zipEntryName = zipEntry.getName();
428                        entryExtension = zipEntryName.substring(zipEntryName.lastIndexOf(".") + 1).toLowerCase(Locale.ROOT).trim();
429                        //Step 2a: Check if the current entry is an archive file
430                        if (rejectArchiveFile && archiveExtensions.contains(entryExtension)) {
431                            validationsFailed = true;
432                            break;
433                        }
434                        //Step 2b: Check that level of deepness is inferior to the threshold specified
435                        if (zipEntryName.contains("/")) {
436                            //Determine deepness by inspecting the entry name.
437                            //Indeed, folder will be represented like this: folder/folder/folder/
438                            //So we can count the number of "/" to identify the deepness of the entry
439                            deepness = zipEntryName.chars().filter(ch -> ch == '/').count();
440                            if (deepness > maxLevelDeepness) {
441                                validationsFailed = true;
442                                break;
443                            }
444                        }
445                        //Step 2c: Check if any entries match pattern of zip slip payload
446                        if (zipEntryName.contains("..\\") || zipEntryName.contains("../")) {
447                            validationsFailed = true;
448                            break;
449                        }
450                    }
451                    if (!validationsFailed) {
452                        isSafe = true;
453                    }
454                }
455            }
456        } catch (Exception e) {
457            isSafe = false;
458        }
459        return isSafe;
460    }
461
462    /**
463     * Identify the mime type of the content specified (array of bytes).<br>
464     * Note that it cannot be fully trusted (see the tweet '1595824709186519041' referenced), so, additional validations are required.
465     *
466     * @param content The content as an array of bytes.
467     * @return The mime type in lower case or null if it cannot be identified.
468     * @see "https://twitter.com/righettod/status/1595824709186519041"
469     * @see "https://tika.apache.org/"
470     * @see "https://mvnrepository.com/artifact/org.apache.tika/tika-core"
471     * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types"
472     * @see "https://www.iana.org/assignments/media-types/media-types.xhtml"
473     */
474    public static String identifyMimeType(byte[] content) {
475        String mimeType = null;
476        if (content != null && content.length > 0) {
477            Detector detector = new DefaultDetector(MimeTypes.getDefaultMimeTypes());
478            Metadata metadata = new Metadata();
479            try {
480                try (TemporaryResources temporaryResources = new TemporaryResources(); TikaInputStream tikaInputStream = TikaInputStream.get(new ByteArrayInputStream(content), temporaryResources, metadata)) {
481                    MediaType mt = detector.detect(tikaInputStream, metadata);
482                    if (mt != null) {
483                        mimeType = mt.toString().toLowerCase(Locale.ROOT);
484                    }
485                }
486            } catch (IOException ioe) {
487                mimeType = null;
488            }
489        }
490        return mimeType;
491    }
492
493    /**
494     * Apply a collection of validations on a string expected to be an public IP address:
495     * <ul>
496     * <li>Is a valid IP v4 or v6 address.</li>
497     * <li>Is public from an Internet perspective.</li>
498     * </ul>
499     * <br>
500     * <b>Note:</b> I often see missing such validation in the value read from HTTP request headers like "X-Forwarded-For" or "Forwarded".
501     * <br><br>
502     * <b>Note for IPv6:</b> I used documentation found so it is really experimental!
503     *
504     * @param ip String expected to be a valid IP address.
505     * @return True only if the string pass all validations.
506     * @see "https://commons.apache.org/proper/commons-validator/"
507     * @see "https://commons.apache.org/proper/commons-validator/apidocs/org/apache/commons/validator/routines/InetAddressValidator.html"
508     * @see "https://cheatsheetseries.owasp.org/cheatsheets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet.html"
509     * @see "https://cheatsheetseries.owasp.org/assets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet_Orange_Tsai_Talk.pdf"
510     * @see "https://cheatsheetseries.owasp.org/assets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet_SSRF_Bible.pdf"
511     * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For"
512     * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Forwarded"
513     * @see "https://ipcisco.com/lesson/ipv6-address/"
514     * @see "https://www.juniper.net/documentation/us/en/software/junos/interfaces-security-devices/topics/topic-map/security-interface-ipv4-ipv6-protocol.html"
515     * @see "https://docs.oracle.com/en/java/javase/21/docs/api/java.base/java/net/InetAddress.html#getByName(java.lang.String)"
516     * @see "https://www.arin.net/reference/research/statistics/address_filters/"
517     * @see "https://en.wikipedia.org/wiki/Multicast_address"
518     * @see "https://stackoverflow.com/a/5619409"
519     * @see "https://www.ripe.net/media/documents/ipv6-address-types.pdf"
520     * @see "https://www.iana.org/assignments/ipv6-unicast-address-assignments/ipv6-unicast-address-assignments.xhtml"
521     * @see "https://developer.android.com/reference/java/net/Inet6Address"
522     * @see "https://en.wikipedia.org/wiki/Unique_local_address"
523     */
524    public static boolean isPublicIPAddress(String ip) {
525        boolean isValid = false;
526        try {
527            //Quick validation on the string itself based on characters used to compose an IP v4/v6 address
528            if (Pattern.matches("[0-9a-fA-F:.]+", ip)) {
529                //If OK then use the dedicated InetAddressValidator from Apache Commons Validator
530                if (InetAddressValidator.getInstance().isValid(ip)) {
531                    //If OK then validate that is an public IP address
532                    //From Javadoc for "InetAddress.getByName": If a literal IP address is supplied, only the validity of the address format is checked.
533                    InetAddress addr = InetAddress.getByName(ip);
534                    isValid = (!addr.isAnyLocalAddress() && !addr.isLinkLocalAddress() && !addr.isLoopbackAddress() && !addr.isMulticastAddress() && !addr.isSiteLocalAddress());
535                    //If OK and the IP is an V6 one then make additional validation because the built-in Java API will let pass some V6 IP
536                    //For the prefix map, the start of the key indicates if the value is a regex or a string
537                    if (isValid && (addr instanceof Inet6Address)) {
538                        Map<String, String> prefixes = new HashMap<>();
539                        prefixes.put("REGEX_LOOPBACK", "^(0|:)+1$");
540                        prefixes.put("REGEX_UNIQUE-LOCAL-ADDRESSES", "^f(c|d)[a-f0-9]{2}:.*$");
541                        prefixes.put("STRING_LINK-LOCAL-ADDRESSES", "fe80:");
542                        prefixes.put("REGEX_TEREDO", "^2001:[0]*:.*$");
543                        prefixes.put("REGEX_BENCHMARKING", "^2001:[0]*2:.*$");
544                        prefixes.put("REGEX_ORCHID", "^2001:[0]*10:.*$");
545                        prefixes.put("STRING_DOCUMENTATION", "2001:db8:");
546                        prefixes.put("STRING_GLOBAL-UNICAST", "2000:");
547                        prefixes.put("REGEX_MULTICAST", "^ff[0-9]{2}:.*$");
548                        final List<Boolean> results = new ArrayList<>();
549                        final String ipLower = ip.trim().toLowerCase(Locale.ROOT);
550                        prefixes.forEach((addressType, expr) -> {
551                            String exprLower = expr.trim().toLowerCase();
552                            if (addressType.startsWith("STRING_")) {
553                                results.add(ipLower.startsWith(exprLower));
554                            } else {
555                                results.add(Pattern.matches(exprLower, ipLower));
556                            }
557                        });
558                        isValid = ((results.size() == prefixes.size()) && !results.contains(Boolean.TRUE));
559                    }
560                }
561            }
562        } catch (Exception e) {
563            isValid = false;
564        }
565        return isValid;
566    }
567
568    /**
569     * Compute a SHA256 hash from an input composed of a collection of strings.<br><br>
570     * This method take care to build the source string in a way to prevent this source string to be prone to abuse targeting the different parts composing it.<br><br>
571     * <p>
572     * Example of possible abuse without precautions applied during the hash calculation logic:<br>
573     * Hash of <code>SHA256("Hello", "My", "World!!!")</code> will be equals to the hash of <code>SHA256("Hell", "oMyW", "orld!!!")</code>.<br>
574     * </p>
575     * This method ensure that both hash above will be different.<br><br>
576     *
577     * <b>Note:</b> The character <code>|</code> is used, as separator, of every parts so a part is not allowed to contains this character.
578     *
579     * @param parts Ordered list of strings to use to build the input string for which the hash must be computed on. No null value is accepted on object composing the collection.
580     * @return The hash, as an array of bytes, to allow caller to convert it to the final representation wanted (HEX, Base64, etc.). If the collection passed is null or empty then the method return null.
581     * @throws Exception If any exception occurs
582     * @see "https://github.com/righettod/code-snippets-security-utils/issues/16"
583     * @see "https://pentesterlab.com/badges/codereview"
584     * @see "https://blog.trailofbits.com/2024/08/21/yolo-is-not-a-valid-hash-construction/"
585     * @see "https://www.nist.gov/publications/sha-3-derived-functions-cshake-kmac-tuplehash-and-parallelhash"
586     */
587    public static byte[] computeHashNoProneToAbuseOnParts(List<String> parts) throws Exception {
588        byte[] hash = null;
589        String separator = "|";
590        if (parts != null && !parts.isEmpty()) {
591            //Ensure that not part is null
592            if (parts.stream().anyMatch(Objects::isNull)) {
593                throw new IllegalArgumentException("No part must be null!");
594            }
595            //Ensure that the separator is absent from every part
596            if (parts.stream().anyMatch(part -> part.contains(separator))) {
597                throw new IllegalArgumentException(String.format("The character '%s', used as parts separator, must be absent from every parts!", separator));
598            }
599            MessageDigest digest = MessageDigest.getInstance("SHA-256");
600            final StringBuilder buffer = new StringBuilder(separator);
601            parts.forEach(p -> {
602                buffer.append(p).append(separator);
603            });
604            hash = digest.digest(buffer.toString().getBytes(StandardCharsets.UTF_8));
605        }
606        return hash;
607    }
608
609    /**
610     * Ensure that an XML file only uses DTD/XSD references (called System Identifier) present in the allowed list provided.<br><br>
611     * The code is based on the validation implemented into the OpenJDK 21, by the class <b><a href="https://github.com/openjdk/jdk/blob/jdk-21%2B35/src/java.prefs/share/classes/java/util/prefs/XmlSupport.java">java.util.prefs.XmlSupport</a></b>, in the method <b><a href="https://github.com/openjdk/jdk/blob/jdk-21%2B35/src/java.prefs/share/classes/java/util/prefs/XmlSupport.java#L240">loadPrefsDoc()</a></b>.<br><br>
612     * The method also ensure that no Public Identifier is used to prevent potential bypasses of the validations.
613     *
614     * @param xmlFilePath              Filename of the XML file to check.
615     * @param allowedSystemIdentifiers List of URL allowed for System Identifier specified for any XSD/DTD references.
616     * @return True only if the file pass all validations.
617     * @see "https://www.w3schools.com/xml/prop_documenttype_systemid.asp"
618     * @see "https://www.ibm.com/docs/en/integration-bus/9.0.0?topic=doctypedecl-xml-systemid"
619     * @see "https://www.liquid-technologies.com/Reference/Glossary/XML_DocType.html"
620     * @see "https://www.xml.com/pub/98/08/xmlqna0.html"
621     * @see "https://github.com/openjdk/jdk/blob/jdk-21%2B35/src/java.prefs/share/classes/java/util/prefs/XmlSupport.java#L397"
622     * @see "https://en.wikipedia.org/wiki/Formal_Public_Identifier"
623     */
624    public static boolean isXMLOnlyUseAllowedXSDorDTD(String xmlFilePath, final List<String> allowedSystemIdentifiers) {
625        boolean isSafe = false;
626        final String errorTemplate = "Non allowed %s ID detected!";
627        final String emptyFakeDTD = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><!ELEMENT dummy EMPTY>";
628        final String emptyFakeXSD = "<xs:schema xmlns:xs=\"http://www.w3.org/2001/XMLSchema\"> <xs:element name=\"dummy\"/></xs:schema>";
629
630        if (allowedSystemIdentifiers == null || allowedSystemIdentifiers.isEmpty()) {
631            throw new IllegalArgumentException("At least one SID must be specified!");
632        }
633        File xmlFile = new File(xmlFilePath);
634        if (xmlFile.exists() && xmlFile.canRead() && xmlFile.isFile()) {
635            try {
636                EntityResolver resolverValidator = (publicId, systemId) -> {
637                    if (publicId != null) {
638                        throw new SAXException(String.format(errorTemplate, "PUBLIC"));
639                    }
640                    if (!allowedSystemIdentifiers.contains(systemId)) {
641                        throw new SAXException(String.format(errorTemplate, "SYSTEM"));
642                    }
643                    //If it is OK then return a empty DTD/XSD
644                    return new InputSource(new StringReader(systemId.toLowerCase().endsWith(".dtd") ? emptyFakeDTD : emptyFakeXSD));
645                };
646                DocumentBuilderFactory dbfInstance = DocumentBuilderFactory.newInstance();
647                dbfInstance.setIgnoringElementContentWhitespace(true);
648                dbfInstance.setXIncludeAware(false);
649                dbfInstance.setValidating(false);
650                dbfInstance.setCoalescing(true);
651                dbfInstance.setIgnoringComments(false);
652                DocumentBuilder builder = dbfInstance.newDocumentBuilder();
653                builder.setEntityResolver(resolverValidator);
654                Document doc = builder.parse(xmlFile);
655                isSafe = (doc != null);
656            } catch (SAXException | IOException | ParserConfigurationException e) {
657                isSafe = false;
658            }
659        }
660
661        return isSafe;
662    }
663
664    /**
665     * Apply a collection of validations on a EXCEL CSV file provided (file was expected to be opened in Microsoft EXCEL):
666     * <ul>
667     * <li>Real CSV file.</li>
668     * <li>Do not contains any payload related to a CSV injections.</li>
669     * </ul>
670     * Ensure that, if Apache Commons CSV does not find any record then, the file will be considered as NOT safe (prevent potential bypasses).<br><br>
671     * <b>Note:</b> Record delimiter used is the <code>,</code> (comma) character. See the Apache Commons CSV reference provided for EXCEL.<br>
672     *
673     * @param csvFilePath Filename of the CSV file to check.
674     * @return True only if the file pass all validations.
675     * @see "https://commons.apache.org/proper/commons-csv/"
676     * @see "https://commons.apache.org/proper/commons-csv/apidocs/org/apache/commons/csv/CSVFormat.html#EXCEL"
677     * @see "https://www.we45.com/post/your-excel-sheets-are-not-safe-heres-how-to-beat-csv-injection"
678     * @see "https://www.whiteoaksecurity.com/blog/2020-4-23-csv-injection-whats-the-risk/"
679     * @see "https://book.hacktricks.xyz/pentesting-web/formula-csv-doc-latex-ghostscript-injection"
680     * @see "https://owasp.org/www-community/attacks/CSV_Injection"
681     * @see "https://payatu.com/blog/csv-injection-basic-to-exploit/"
682     * @see "https://cwe.mitre.org/data/definitions/1236.html"
683     */
684    public static boolean isExcelCSVSafe(String csvFilePath) {
685        boolean isSafe;
686        final AtomicInteger recordCount = new AtomicInteger();
687        final List<Character> payloadDetectionCharacters = List.of('=', '+', '@', '-', '\r', '\t');
688
689        try {
690            final List<String> payloadsIdentified = new ArrayList<>();
691            try (Reader in = new FileReader(csvFilePath)) {
692                Iterable<CSVRecord> records = CSVFormat.EXCEL.parse(in);
693                records.forEach(record -> {
694                    record.forEach(recordValue -> {
695                        if (recordValue != null && !recordValue.trim().isEmpty() && payloadDetectionCharacters.contains(recordValue.trim().charAt(0))) {
696                            payloadsIdentified.add(recordValue);
697                        }
698                        recordCount.getAndIncrement();
699                    });
700                });
701            }
702            isSafe = (payloadsIdentified.isEmpty() && recordCount.get() > 0);
703        } catch (Exception e) {
704            isSafe = false;
705        }
706
707        return isSafe;
708    }
709
710    /**
711     * Provide a way to add an integrity marker (<a href="https://en.wikipedia.org/wiki/HMAC">HMAC</a>) to a serialized object serialized using the <a href="https://www.baeldung.com/java-serialization">java native system</a> (binary).<br>
712     * The goal is to provide <b>a temporary workaround</b> to try to prevent deserialization attacks and give time to move to a text-based serialization approach.
713     *
714     * @param processingMode Define the mode of processing i.e. protect or validate. ({@link eu.righettod.ProcessingMode})
715     * @param input          When the processing mode is "protect" than the expected input (string) is a java serialized object encoded in Base64 otherwise (processing mode is "validate") expected input is the output of this method when the "protect" mode was used.
716     * @param secret         Secret to use to compute the SHA256 HMAC.
717     * @return A map with the following keys: <ul><li><b>PROCESSING_MODE</b>: Processing mode used to compute the result.</li><li><b>STATUS</b>: A boolean indicating if the processing was successful or not.</li><li><b>RESULT</b>: Always contains a string representing the protected serialized object in the format <code>[SERIALIZED_OBJECT_BASE64_ENCODED]:[SERIALIZED_OBJECT_HMAC_BASE64_ENCODED]</code>.</li></ul>
718     * @throws Exception If any exception occurs.
719     * @see "https://cheatsheetseries.owasp.org/cheatsheets/Deserialization_Cheat_Sheet.html"
720     * @see "https://owasp.org/www-project-top-ten/2017/A8_2017-Insecure_Deserialization"
721     * @see "https://portswigger.net/web-security/deserialization"
722     * @see "https://www.baeldung.com/java-serialization-approaches"
723     * @see "https://www.baeldung.com/java-serialization"
724     * @see "https://cryptobook.nakov.com/mac-and-key-derivation/hmac-and-key-derivation"
725     * @see "https://en.wikipedia.org/wiki/HMAC"
726     * @see "https://smattme.com/posts/how-to-generate-hmac-signature-in-java/"
727     */
728    public static Map<String, Object> ensureSerializedObjectIntegrity(ProcessingMode processingMode, String input, byte[] secret) throws Exception {
729        Map<String, Object> results;
730        String resultFormatTemplate = "%s:%s";
731        //Verify input provided to be consistent
732        if (processingMode == null) {
733            throw new IllegalArgumentException("The processing mode is mandatory!");
734        }
735        if (input == null || input.trim().isEmpty()) {
736            throw new IllegalArgumentException("Input data is mandatory!");
737        }
738        if (secret == null || secret.length == 0) {
739            throw new IllegalArgumentException("The HMAC secret is mandatory!");
740        }
741        if (processingMode.equals(ProcessingMode.VALIDATE) && input.split(":").length != 2) {
742            throw new IllegalArgumentException("Input data provided is invalid for the processing mode specified!");
743        }
744        //Processing
745        Base64.Decoder b64Decoder = Base64.getDecoder();
746        Base64.Encoder b64Encoder = Base64.getEncoder();
747        String hmacAlgorithm = "HmacSHA256";
748        Mac mac = Mac.getInstance(hmacAlgorithm);
749        SecretKeySpec key = new SecretKeySpec(secret, hmacAlgorithm);
750        mac.init(key);
751        results = new HashMap<>();
752        results.put("PROCESSING_MODE", processingMode.toString());
753        switch (processingMode) {
754            case PROTECT -> {
755                byte[] objectBytes = b64Decoder.decode(input);
756                byte[] hmac = mac.doFinal(objectBytes);
757                String encodedHmac = b64Encoder.encodeToString(hmac);
758                results.put("STATUS", Boolean.TRUE);
759                results.put("RESULT", String.format(resultFormatTemplate, input, encodedHmac));
760            }
761            case VALIDATE -> {
762                String[] parts = input.split(":");
763                byte[] objectBytes = b64Decoder.decode(parts[0].trim());
764                byte[] hmacProvided = b64Decoder.decode(parts[1].trim());
765                byte[] hmacComputed = mac.doFinal(objectBytes);
766                String encodedHmacComputed = b64Encoder.encodeToString(hmacComputed);
767                Boolean hmacIsValid = Arrays.equals(hmacProvided, hmacComputed);
768                results.put("STATUS", hmacIsValid);
769                results.put("RESULT", String.format(resultFormatTemplate, parts[0].trim(), encodedHmacComputed));
770            }
771            default -> throw new IllegalArgumentException("Not supported processing mode!");
772        }
773        return results;
774    }
775
776    /**
777     * Apply a collection of validations on a JSON string provided:
778     * <ul>
779     * <li>Real JSON structure.</li>
780     * <li>Contain less than a specified number of deepness for nested objects or arrays.</li>
781     * <li>Contain less than a specified number of items in any arrays.</li>
782     * </ul>
783     * <br>
784     * <b>Note:</b> I decided to use a parsing approach using only string processing to prevent any StackOverFlow or OutOfMemory error that can be abused.<br><br>
785     * I used the following assumption:
786     * <ul>
787     *      <li>The character <code>{</code> identify the beginning of an object.</li>
788     *      <li>The character <code>}</code> identify the end of an object.</li>
789     *      <li>The character <code>[</code> identify the beginning of an array.</li>
790     *      <li>The character <code>]</code> identify the end of an array.</li>
791     *      <li>The character <code>"</code> identify the delimiter of a string.</li>
792     *      <li>The character sequence <code>\"</code> identify the escaping of an double quote.</li>
793     * </ul>
794     *
795     * @param json                  String containing the JSON data to validate.
796     * @param maxItemsByArraysCount Maximum number of items allowed in an array.
797     * @param maxDeepnessAllowed    Maximum number nested objects or arrays allowed.
798     * @return True only if the string pass all validations.
799     * @see "https://javaee.github.io/jsonp/"
800     * @see "https://community.f5.com/discussions/technicalforum/disable-buffer-overflow-in-json-parameters/124306"
801     * @see "https://github.com/InductiveComputerScience/pbJson/issues/2"
802     */
803    public static boolean isJSONSafe(String json, int maxItemsByArraysCount, int maxDeepnessAllowed) {
804        boolean isSafe = false;
805
806        try {
807            //Step 1: Analyse the JSON string
808            int currentDeepness = 0;
809            int currentArrayItemsCount = 0;
810            int maxDeepnessReached = 0;
811            int maxArrayItemsCountReached = 0;
812            boolean currentlyInArray = false;
813            boolean currentlyInString = false;
814            int currentNestedArrayLevel = 0;
815            String jsonEscapedDoubleQuote = "\\\"";//Escaped double quote must not be considered as a string delimiter
816            String work = json.replace(jsonEscapedDoubleQuote, "'");
817            for (char c : work.toCharArray()) {
818                switch (c) {
819                    case '{': {
820                        if (!currentlyInString) {
821                            currentDeepness++;
822                        }
823                        break;
824                    }
825                    case '}': {
826                        if (!currentlyInString) {
827                            currentDeepness--;
828                        }
829                        break;
830                    }
831                    case '[': {
832                        if (!currentlyInString) {
833                            currentDeepness++;
834                            if (currentlyInArray) {
835                                currentNestedArrayLevel++;
836                            }
837                            currentlyInArray = true;
838                        }
839                        break;
840                    }
841                    case ']': {
842                        if (!currentlyInString) {
843                            currentDeepness--;
844                            currentArrayItemsCount = 0;
845                            if (currentNestedArrayLevel > 0) {
846                                currentNestedArrayLevel--;
847                            }
848                            if (currentNestedArrayLevel == 0) {
849                                currentlyInArray = false;
850                            }
851                        }
852                        break;
853                    }
854                    case '"': {
855                        currentlyInString = !currentlyInString;
856                        break;
857                    }
858                    case ',': {
859                        if (!currentlyInString && currentlyInArray) {
860                            currentArrayItemsCount++;
861                        }
862                        break;
863                    }
864                }
865                if (currentDeepness > maxDeepnessReached) {
866                    maxDeepnessReached = currentDeepness;
867                }
868                if (currentArrayItemsCount > maxArrayItemsCountReached) {
869                    maxArrayItemsCountReached = currentArrayItemsCount;
870                }
871            }
872            //Step 2: Apply validation against the value specified as limits
873            isSafe = ((maxItemsByArraysCount > maxArrayItemsCountReached) && (maxDeepnessAllowed > maxDeepnessReached));
874
875            //Step 3: If the content is safe then ensure that it is valid JSON structure using the "Java API for JSON Processing" (JSR 374) parser reference implementation.
876            if (isSafe) {
877                JsonReader reader = Json.createReader(new StringReader(json));
878                isSafe = (reader.read() != null);
879            }
880
881        } catch (Exception e) {
882            isSafe = false;
883        }
884        return isSafe;
885    }
886
887    /**
888     * Apply a collection of validations on a image file provided:
889     * <ul>
890     * <li>Real image file.</li>
891     * <li>Its mime type is into the list of allowed mime types.</li>
892     * <li>Its metadata fields do not contains any characters related to a malicious payloads.</li>
893     * </ul>
894     * <br>
895     * <b>Important note:</b> This implementation is prone to bypass using the "<b>raw insertion</b>" method documented in the <a href="https://www.synacktiv.com/en/publications/persistent-php-payloads-in-pngs-how-to-inject-php-code-in-an-image-and-keep-it-there">blog post</a> from the Synacktiv team.
896     * To handle such case, it is recommended to resize the image to remove any non image-related content, see <a href="https://github.com/righettod/document-upload-protection/blob/master/src/main/java/eu/righettod/poc/sanitizer/ImageDocumentSanitizerImpl.java#L54">here</a> for an example.<br>
897     *
898     * @param imageFilePath         Filename of the image file to check.
899     * @param imageAllowedMimeTypes List of image mime types allowed.
900     * @return True only if the file pass all validations.
901     * @see "https://commons.apache.org/proper/commons-imaging/"
902     * @see "https://commons.apache.org/proper/commons-imaging/formatsupport.html"
903     * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types/Common_types"
904     * @see "https://www.iana.org/assignments/media-types/media-types.xhtml#image"
905     * @see "https://www.synacktiv.com/en/publications/persistent-php-payloads-in-pngs-how-to-inject-php-code-in-an-image-and-keep-it-there"
906     * @see "https://cheatsheetseries.owasp.org/cheatsheets/File_Upload_Cheat_Sheet.html"
907     * @see "https://github.com/righettod/document-upload-protection/blob/master/src/main/java/eu/righettod/poc/sanitizer/ImageDocumentSanitizerImpl.java"
908     * @see "https://exiftool.org/examples.html"
909     * @see "https://en.wikipedia.org/wiki/List_of_file_signatures"
910     * @see "https://hexed.it/"
911     * @see "https://github.com/sighook/pixload"
912     */
913    public static boolean isImageSafe(String imageFilePath, List<String> imageAllowedMimeTypes) {
914        boolean isSafe = false;
915        Pattern payloadDetectionRegex = Pattern.compile("[<>${}`]+", Pattern.CASE_INSENSITIVE);
916        try {
917            File imgFile = new File(imageFilePath);
918            if (imgFile.exists() && imgFile.canRead() && imgFile.isFile() && !imageAllowedMimeTypes.isEmpty()) {
919                final byte[] imgBytes = Files.readAllBytes(imgFile.toPath());
920                //Step 1: Check the mime type of the file against the allowed ones
921                ImageInfo imgInfo = Imaging.getImageInfo(imgBytes);
922                if (imageAllowedMimeTypes.contains(imgInfo.getMimeType())) {
923                    //Step 2: Load the image into an object using the Image API
924                    BufferedImage imgObject = Imaging.getBufferedImage(imgBytes);
925                    if (imgObject != null && imgObject.getWidth() > 0 && imgObject.getHeight() > 0) {
926                        //Step 3: Check the metadata if the image format support it - Highly experimental
927                        List<String> metadataWithPayloads = new ArrayList<>();
928                        final ImageMetadata imgMetadata = Imaging.getMetadata(imgBytes);
929                        if (imgMetadata != null) {
930                            imgMetadata.getItems().forEach(item -> {
931                                String metadata = item.toString();
932                                if (payloadDetectionRegex.matcher(metadata).find()) {
933                                    metadataWithPayloads.add(metadata);
934                                }
935                            });
936                        }
937                        isSafe = metadataWithPayloads.isEmpty();
938                    }
939                }
940            }
941        } catch (Exception e) {
942            isSafe = false;
943        }
944        return isSafe;
945    }
946
947    /**
948     * Rewrite the input file to remove any embedded files that is not embedded using a methods supported by the official format of the file.<br>
949     * Example: a file can be embedded by adding it to the end of the source file, see the reference provided for details.
950     *
951     * @param inputFilePath Filename of the file to clean up.
952     * @param inputFileType Type of the file provided.
953     * @return A array of bytes with the cleaned file.
954     * @throws IllegalArgumentException If an invalid parameter is passed
955     * @throws Exception                If any technical error during the cleaning processing
956     * @see "https://www.synacktiv.com/en/publications/persistent-php-payloads-in-pngs-how-to-inject-php-code-in-an-image-and-keep-it-there"
957     * @see "https://github.com/righettod/toolbox-pentest-web/tree/master/misc"
958     * @see "https://github.com/righettod/toolbox-pentest-web?tab=readme-ov-file#misc"
959     * @see "https://stackoverflow.com/a/13605411"
960     */
961    public static byte[] sanitizeFile(String inputFilePath, InputFileType inputFileType) throws Exception {
962        ByteArrayOutputStream sanitizedContent = new ByteArrayOutputStream();
963        File inputFile = new File(inputFilePath);
964        if (!inputFile.exists() || !inputFile.canRead() || !inputFile.isFile()) {
965            throw new IllegalArgumentException("Cannot read the content of the input file!");
966        }
967        switch (inputFileType) {
968            case PDF -> {
969                try (PDDocument document = Loader.loadPDF(inputFile)) {
970                    document.save(sanitizedContent);
971                }
972            }
973            case IMAGE -> {
974                // Load the original image
975                BufferedImage originalImage = ImageIO.read(inputFile);
976                String originalFormat = identifyMimeType(Files.readAllBytes(inputFile.toPath())).split("/")[1].trim();
977                // Check that image has been successfully loaded
978                if (originalImage == null) {
979                    throw new IOException("Cannot load the original image !");
980                }
981                // Get current Width and Height of the image
982                int originalWidth = originalImage.getWidth(null);
983                int originalHeight = originalImage.getHeight(null);
984                // Resize the image by removing 1px on Width and Height
985                Image resizedImage = originalImage.getScaledInstance(originalWidth - 1, originalHeight - 1, Image.SCALE_SMOOTH);
986                // Resize the resized image by adding 1px on Width and Height - In fact set image to is initial size
987                Image initialSizedImage = resizedImage.getScaledInstance(originalWidth, originalHeight, Image.SCALE_SMOOTH);
988                // Save image to a bytes buffer
989                int bufferedImageType = BufferedImage.TYPE_INT_ARGB;//By default use a format supporting transparency
990                if ("jpeg".equalsIgnoreCase(originalFormat) || "bmp".equalsIgnoreCase(originalFormat)) {
991                    bufferedImageType = BufferedImage.TYPE_INT_RGB;
992                }
993                BufferedImage sanitizedImage = new BufferedImage(initialSizedImage.getWidth(null), initialSizedImage.getHeight(null), bufferedImageType);
994                Graphics2D drawer = sanitizedImage.createGraphics();
995                drawer.drawImage(initialSizedImage, 0, 0, null);
996                drawer.dispose();
997                ImageIO.write(sanitizedImage, originalFormat, sanitizedContent);
998            }
999            default -> throw new IllegalArgumentException("Type of file not supported !");
1000        }
1001        if (sanitizedContent.size() == 0) {
1002            throw new IOException("An error occur during the rewrite operation!");
1003        }
1004        return sanitizedContent.toByteArray();
1005    }
1006
1007    /**
1008     * Apply a collection of validations on a string expected to be an email address:
1009     * <ul>
1010     * <li>Is a valid email address, from a parser perspective, following RFCs on email addresses.</li>
1011     * <li>Is not using "Encoded-word" format.</li>
1012     * <li>Is not using comment format.</li>
1013     * <li>Is not using "Punycode" format.</li>
1014     * <li>Is not using UUCP style addresses.</li>
1015     * <li>Is not using address literals.</li>
1016     * <li>Is not using source routes.</li>
1017     * <li>Is not using the "percent hack".</li>
1018     * </ul><br>
1019     * This is based on the research work from <a href="https://portswigger.net/research/gareth-heyes">Gareth Heyes</a> added in references (Portswigger).<br><br>
1020     *
1021     * <b>Note:</b> The notion of valid, here, is to take from a secure usage of the data perspective.
1022     *
1023     * @param addr String expected to be a valid email address.
1024     * @return True only if the string pass all validations.
1025     * @see "https://commons.apache.org/proper/commons-validator/"
1026     * @see "https://commons.apache.org/proper/commons-validator/apidocs/org/apache/commons/validator/routines/EmailValidator.html"
1027     * @see "https://datatracker.ietf.org/doc/html/rfc2047#section-2"
1028     * @see "https://portswigger.net/research/splitting-the-email-atom"
1029     * @see "https://www.jochentopf.com/email/address.html"
1030     * @see "https://en.wikipedia.org/wiki/Email_address"
1031     */
1032    public static boolean isEmailAddress(String addr) {
1033        boolean isValid = false;
1034        String work = addr.toLowerCase(Locale.ROOT);
1035        Pattern encodedWordRegex = Pattern.compile("[=?]+", Pattern.CASE_INSENSITIVE);
1036        Pattern forbiddenCharacterRegex = Pattern.compile("[():!%\\[\\],;]+", Pattern.CASE_INSENSITIVE);
1037        try {
1038            //Start with the use of the dedicated EmailValidator from Apache Commons Validator
1039            if (EmailValidator.getInstance(true, true).isValid(work)) {
1040                //If OK then validate it does not contains "Encoded-word" patterns using an aggressive approach
1041                if (!encodedWordRegex.matcher(work).find()) {
1042                    //If OK then validate it does not contains punycode
1043                    if (!work.contains("xn--")) {
1044                        //If OK then validate it does not use:
1045                        // UUCP style addresses,
1046                        // Comment format,
1047                        // Address literals,
1048                        // Source routes,
1049                        // The percent hack.
1050                        if (!forbiddenCharacterRegex.matcher(work).find()) {
1051                            isValid = true;
1052                        }
1053                    }
1054                }
1055            }
1056        } catch (Exception e) {
1057            isValid = false;
1058        }
1059        return isValid;
1060    }
1061
1062    /**
1063     * The <a href="https://www.stet.eu/en/psd2/">PSD2 STET</a> specification require to use <a href="https://datatracker.ietf.org/doc/draft-cavage-http-signatures/">HTTP Signature</a>.
1064     * <br>
1065     * Section <b>3.5.1.2</b> of the document <a href="https://www.stet.eu/assets/files/PSD2/1-6-3/api-dsp2-stet-v1.6.3.1-part-1-framework.pdf">Documentation Framework</a> version <b>1.6.3</b>.
1066     * <br>
1067     * The problem is that, by design, the HTTP Signature specification is prone to blind SSRF.
1068     * <br>
1069     * URL example taken from the STET specification: <code>https://path.to/myQsealCertificate_714f8154ec259ac40b8a9786c9908488b2582b68b17e865fede4636d726b709f</code>.
1070     * <br>
1071     * The objective of this code is to try to decrease the "exploitability/interest" of this SSRF for an attacker.
1072     *
1073     * @param certificateUrl Url pointing to a Qualified Certificate (QSealC) encoded in PEM format and respecting the ETSI/TS119495 technical Specification .
1074     * @return TRUE only if the url point to a Qualified Certificate in PEM format.
1075     * @see "https://www.stet.eu/en/psd2/"
1076     * @see "https://www.stet.eu/assets/files/PSD2/1-6-3/api-dsp2-stet-v1.6.3.1-part-1-framework.pdf"
1077     * @see "https://datatracker.ietf.org/doc/draft-cavage-http-signatures/"
1078     * @see "https://datatracker.ietf.org/doc/rfc9421/"
1079     * @see "https://openjdk.org/groups/net/httpclient/intro.html"
1080     * @see "https://docs.oracle.com/en/java/javase/21/docs/api/java.net.http/java/net/http/package-summary.html"
1081     * @see "https://portswigger.net/web-security/ssrf"
1082     * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control"
1083     */
1084    public static boolean isPSD2StetSafeCertificateURL(String certificateUrl) {
1085        boolean isValid = false;
1086        long connectionTimeoutInSeconds = 10;
1087        String userAgent = "PSD2-STET-HTTPSignature-CertificateRequest";
1088        try {
1089            //1. Ensure that the URL end with the SHA-256 fingerprint encoded in HEX of the certificate like requested by STET
1090            if (certificateUrl != null && certificateUrl.lastIndexOf("_") != -1) {
1091                String digestPart = certificateUrl.substring(certificateUrl.lastIndexOf("_") + 1);
1092                if (Pattern.matches("^[0-9a-f]{64}$", digestPart)) {
1093                    //2. Ensure that the URL is a valid url by creating a instance of the class URI
1094                    URI uri = URI.create(certificateUrl);
1095                    //3. Require usage of HTTPS and reject any url containing query parameters
1096                    if ("https".equalsIgnoreCase(uri.getScheme()) && uri.getQuery() == null) {
1097                        //4. Perform a HTTP HEAD request in order to get the content type of the remote resource
1098                        //and limit the interest to use the SSRF because to pass the check the url need to:
1099                        //- Do not having any query parameters.
1100                        //- Use HTTPS protocol.
1101                        //- End with a string having the format "_[0-9a-f]{64}".
1102                        //- Trigger the malicious action that the attacker want but with a HTTP HEAD without any redirect and parameters.
1103                        HttpResponse<String> response;
1104                        try (HttpClient client = HttpClient.newBuilder().followRedirects(HttpClient.Redirect.NEVER).build()) {
1105                            HttpRequest request = HttpRequest.newBuilder().uri(uri).timeout(Duration.ofSeconds(connectionTimeoutInSeconds)).method("HEAD", HttpRequest.BodyPublishers.noBody()).header("User-Agent", userAgent)//To provide an hint to the target about the initiator of the request
1106                                    .header("Cache-Control", "no-store, max-age=0")//To prevent caching issues or abuses
1107                                    .build();
1108                            response = client.send(request, HttpResponse.BodyHandlers.ofString());
1109                            if (response.statusCode() == 200) {
1110                                //5. Ensure that the response content type is "text/plain"
1111                                Optional<String> contentType = response.headers().firstValue("Content-Type");
1112                                isValid = (contentType.isPresent() && contentType.get().trim().toLowerCase(Locale.ENGLISH).startsWith("text/plain"));
1113                            }
1114                        }
1115                    }
1116                }
1117            }
1118        } catch (Exception e) {
1119            isValid = false;
1120        }
1121        return isValid;
1122    }
1123
1124    /**
1125     * Perform sequential URL decoding operations against a URL encoded data until the data is not URL encoded anymore or if the specified threshold is reached.
1126     *
1127     * @param encodedData            URL encoded data.
1128     * @param decodingRoundThreshold Threshold above which decoding will fail.
1129     * @return The decoded data.
1130     * @throws SecurityException If the threshold is reached.
1131     * @see "https://en.wikipedia.org/wiki/Percent-encoding"
1132     * @see "https://owasp.org/www-community/Double_Encoding"
1133     * @see "https://portswigger.net/web-security/essential-skills/obfuscating-attacks-using-encodings"
1134     * @see "https://capec.mitre.org/data/definitions/120.html"
1135     */
1136    public static String applyURLDecoding(String encodedData, int decodingRoundThreshold) throws SecurityException {
1137        if (decodingRoundThreshold < 1) {
1138            throw new IllegalArgumentException("Threshold must be a positive number !");
1139        }
1140        if (encodedData == null) {
1141            throw new IllegalArgumentException("Data provided must not be null !");
1142        }
1143        Charset charset = StandardCharsets.UTF_8;
1144        int currentDecodingRound = 0;
1145        boolean isFinished = false;
1146        String currentRoundData = encodedData;
1147        String previousRoundData = encodedData;
1148        while (!isFinished) {
1149            if (currentDecodingRound > decodingRoundThreshold) {
1150                throw new SecurityException(String.format("Decoding round threshold of %s reached!", decodingRoundThreshold));
1151            }
1152            currentRoundData = URLDecoder.decode(currentRoundData, charset);
1153            isFinished = currentRoundData.equals(previousRoundData);
1154            previousRoundData = currentRoundData;
1155            currentDecodingRound++;
1156        }
1157        return currentRoundData;
1158    }
1159
1160    /**
1161     * Apply a collection of validations on a string expected to be an system file/folder path:
1162     * <ul>
1163     * <li>Does not contains path traversal payload.</li>
1164     * <li>The canonical path is equals to the absolute path.</li>
1165     * </ul><br>
1166     *
1167     * @param path String expected to be a valid system file/folder path.
1168     * @return True only if the string pass all validations.
1169     * @see "https://portswigger.net/web-security/file-path-traversal"
1170     * @see "https://learn.snyk.io/lesson/directory-traversal/"
1171     * @see "https://capec.mitre.org/data/definitions/126.html"
1172     * @see "https://owasp.org/www-community/attacks/Path_Traversal"
1173     */
1174    public static boolean isPathSafe(String path) {
1175        boolean isSafe = false;
1176        int decodingRoundThreshold = 3;
1177        try {
1178            if (path != null && !path.isEmpty()) {
1179                //URL decode the path if case of data coming from a web context
1180                String decodedPath = applyURLDecoding(path, decodingRoundThreshold);
1181                //Ensure that no path traversal expression is present
1182                if (!decodedPath.contains("..")) {
1183                    File f = new File(decodedPath);
1184                    String canonicalPath = f.getCanonicalPath();
1185                    String absolutePath = f.getAbsolutePath();
1186                    isSafe = canonicalPath.equals(absolutePath);
1187                }
1188            }
1189        } catch (Exception e) {
1190            isSafe = false;
1191        }
1192        return isSafe;
1193    }
1194
1195    /**
1196     * Identify if an XML contains any XML comments or have any XSL processing instructions.<br>
1197     * Stream reader based parsing is used to support large XML tree.
1198     *
1199     * @param xmlFilePath Filename of the XML file to check.
1200     * @return True only if XML comments or XSL processing instructions are identified.
1201     * @see "https://www.tutorialspoint.com/xml/xml_processing.htm"
1202     * @see "https://docs.oracle.com/en/java/javase/21/docs/api/java.xml/javax/xml/stream/XMLInputFactory.html"
1203     * @see "https://portswigger.net/kb/issues/00400700_xml-entity-expansion"
1204     * @see "https://www.w3.org/Style/styling-XML.en.html"
1205     */
1206    public static boolean isXMLHaveCommentsOrXSLProcessingInstructions(String xmlFilePath) {
1207        boolean itemsDetected = false;
1208        try {
1209            //Ensure that the parser will not be prone XML external entity (XXE) injection or XML entity expansion (XEE) attacks
1210            XMLInputFactory xmlInputFactory = XMLInputFactory.newFactory();
1211            xmlInputFactory.setProperty(XMLInputFactory.SUPPORT_DTD, false);
1212            xmlInputFactory.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, "");
1213            xmlInputFactory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, false);
1214            xmlInputFactory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false);
1215
1216            //Parse file
1217            try (FileInputStream fis = new FileInputStream(xmlFilePath)) {
1218                XMLStreamReader reader = xmlInputFactory.createXMLStreamReader(fis);
1219                int eventType;
1220                while (reader.hasNext() && !itemsDetected) {
1221                    eventType = reader.next();
1222                    if (eventType == XMLEvent.COMMENT) {
1223                        itemsDetected = true;
1224                    } else if (eventType == XMLEvent.PROCESSING_INSTRUCTION && "xml-stylesheet".equalsIgnoreCase(reader.getPITarget())) {
1225                        itemsDetected = true;
1226                    }
1227                }
1228            }
1229        } catch (Exception e) {
1230            //In case of error then assume that the check failed
1231            itemsDetected = true;
1232        }
1233        return itemsDetected;
1234    }
1235}