001package eu.righettod;
002
003
004import com.auth0.jwt.interfaces.DecodedJWT;
005import org.apache.commons.csv.CSVFormat;
006import org.apache.commons.csv.CSVRecord;
007import org.apache.commons.imaging.ImageInfo;
008import org.apache.commons.imaging.Imaging;
009import org.apache.commons.imaging.common.ImageMetadata;
010import org.apache.commons.validator.routines.EmailValidator;
011import org.apache.commons.validator.routines.InetAddressValidator;
012import org.apache.pdfbox.Loader;
013import org.apache.pdfbox.pdmodel.PDDocument;
014import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
015import org.apache.pdfbox.pdmodel.PDDocumentInformation;
016import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary;
017import org.apache.pdfbox.pdmodel.common.PDMetadata;
018import org.apache.pdfbox.pdmodel.interactive.action.*;
019import org.apache.pdfbox.pdmodel.interactive.annotation.AnnotationFilter;
020import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
021import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationLink;
022import org.apache.poi.poifs.filesystem.DirectoryEntry;
023import org.apache.poi.poifs.filesystem.POIFSFileSystem;
024import org.apache.poi.poifs.macros.VBAMacroReader;
025import org.apache.tika.detect.DefaultDetector;
026import org.apache.tika.detect.Detector;
027import org.apache.tika.io.TemporaryResources;
028import org.apache.tika.io.TikaInputStream;
029import org.apache.tika.metadata.Metadata;
030import org.apache.tika.mime.MediaType;
031import org.apache.tika.mime.MimeTypes;
032import org.w3c.dom.Document;
033import org.xml.sax.EntityResolver;
034import org.xml.sax.InputSource;
035import org.xml.sax.SAXException;
036
037import javax.crypto.Mac;
038import javax.crypto.spec.SecretKeySpec;
039import javax.imageio.ImageIO;
040import javax.json.Json;
041import javax.json.JsonReader;
042import javax.xml.XMLConstants;
043import javax.xml.parsers.DocumentBuilder;
044import javax.xml.parsers.DocumentBuilderFactory;
045import javax.xml.parsers.ParserConfigurationException;
046import javax.xml.stream.XMLInputFactory;
047import javax.xml.stream.XMLStreamReader;
048import javax.xml.stream.events.XMLEvent;
049import java.awt.*;
050import java.awt.image.BufferedImage;
051import java.io.*;
052import java.net.*;
053import java.net.http.HttpClient;
054import java.net.http.HttpRequest;
055import java.net.http.HttpResponse;
056import java.nio.charset.Charset;
057import java.nio.charset.StandardCharsets;
058import java.nio.file.Files;
059import java.security.MessageDigest;
060import java.time.Duration;
061import java.util.List;
062import java.util.*;
063import java.util.concurrent.atomic.AtomicInteger;
064import java.util.regex.Pattern;
065import java.util.zip.ZipEntry;
066import java.util.zip.ZipFile;
067
068/**
069 * Provides different utilities methods to apply processing from a security perspective.<br>
070 * These code snippet:
071 * <ul>
072 *     <li>Can be used, as "foundation", to customize the validation to the app context.</li>
073 *     <li>Were implemented in a way to facilitate adding or removal of validations depending on usage context.</li>
074 *     <li>Were centralized on one class to be able to enhance them across time as well as <a href="https://github.com/righettod/code-snippets-security-utils/issues">missing case/bug identification</a>.</li>
075 * </ul>
076 */
077public class SecurityUtils {
078
079    /**
080     * Default constructor: Not needed as the class only provides static methods.
081     */
082    private SecurityUtils() {
083    }
084
085    /**
086     * Apply a collection of validation to verify if a provided PIN code is considered weak (easy to guess) or none.<br>
087     * This method consider that format of the PIN code is [0-9]{6,}<br>
088     * Rule to consider a PIN code as weak:
089     * <ul>
090     * <li>Length is inferior to 6 positions.</li>
091     * <li>Contain only the same number or only a sequence of zero.</li>
092     * <li>Contain sequence of following incremental or decremental numbers.</li>
093     * </ul>
094     *
095     * @param pinCode PIN code to verify.
096     * @return True only if the PIN is considered as weak.
097     */
098    public static boolean isWeakPINCode(String pinCode) {
099        boolean isWeak = true;
100        //Length is inferior to 6 positions
101        //Use "Long.parseLong(pinCode)" to cause a NumberFormatException if the PIN is not a numeric one
102        //and to ensure that the PIN is not only a sequence of zero
103        if (pinCode != null && Long.parseLong(pinCode) > 0 && pinCode.trim().length() > 5) {
104            //Contain only the same number
105            String regex = String.format("^[%s]{%s}$", pinCode.charAt(0), pinCode.length());
106            if (!Pattern.matches(regex, pinCode)) {
107                //Contain sequence of following incremental or decremental numbers
108                char previousChar = 'X';
109                boolean containSequence = false;
110                for (char c : pinCode.toCharArray()) {
111                    if (previousChar != 'X') {
112                        int previousNbr = Integer.parseInt(String.valueOf(previousChar));
113                        int currentNbr = Integer.parseInt(String.valueOf(c));
114                        if (currentNbr == (previousNbr - 1) || currentNbr == (previousNbr + 1)) {
115                            containSequence = true;
116                            break;
117                        }
118                    }
119                    previousChar = c;
120                }
121                if (!containSequence) {
122                    isWeak = false;
123                }
124            }
125        }
126        return isWeak;
127    }
128
129    /**
130     * Apply a collection of validations on a Word 97-2003 (binary format) document file provided:
131     * <ul>
132     * <li>Real Microsoft Word 97-2003 document file.</li>
133     * <li>No VBA Macro.<br></li>
134     * <li>No embedded objects.</li>
135     * </ul>
136     *
137     * @param wordFilePath Filename of the Word document file to check.
138     * @return True only if the file pass all validations.
139     * @see "https://poi.apache.org/components/"
140     * @see "https://poi.apache.org/components/document/"
141     * @see "https://poi.apache.org/components/poifs/how-to.html"
142     * @see "https://poi.apache.org/components/poifs/embeded.html"
143     * @see "https://poi.apache.org/"
144     * @see "https://mvnrepository.com/artifact/org.apache.poi/poi"
145     */
146    public static boolean isWord972003DocumentSafe(String wordFilePath) {
147        boolean isSafe = false;
148        try {
149            File wordFile = new File(wordFilePath);
150            if (wordFile.exists() && wordFile.canRead() && wordFile.isFile()) {
151                //Step 1: Try to load the file, if its fail then it imply that is not a valid Word 97-2003 format file
152                try (POIFSFileSystem fs = new POIFSFileSystem(wordFile)) {
153                    //Step 2: Check if the document contains VBA macros, in our case is not allowed
154                    VBAMacroReader macroReader = new VBAMacroReader(fs);
155                    Map<String, String> macros = macroReader.readMacros();
156                    if (macros == null || macros.isEmpty()) {
157                        //Step 3: Check if the document contains any embedded objects, in our case is not allowed
158                        //From POI documentation:
159                        //Word normally stores embedded files in subdirectories of the ObjectPool directory, itself a subdirectory of the filesystem root.
160                        //Typically, these subdirectories and named starting with an underscore, followed by 10 numbers.
161                        final List<String> embeddedObjectFound = new ArrayList<>();
162                        DirectoryEntry root = fs.getRoot();
163                        if (root.getEntryCount() > 0) {
164                            root.iterator().forEachRemaining(entry -> {
165                                if ("ObjectPool".equalsIgnoreCase(entry.getName()) && entry instanceof DirectoryEntry) {
166                                    DirectoryEntry objPoolDirectory = (DirectoryEntry) entry;
167                                    if (objPoolDirectory.getEntryCount() > 0) {
168                                        objPoolDirectory.iterator().forEachRemaining(objPoolDirectoryEntry -> {
169                                            if (objPoolDirectoryEntry instanceof DirectoryEntry) {
170                                                DirectoryEntry objPoolDirectoryEntrySubDirectoryEntry = (DirectoryEntry) objPoolDirectoryEntry;
171                                                if (objPoolDirectoryEntrySubDirectoryEntry.getEntryCount() > 0) {
172                                                    objPoolDirectoryEntrySubDirectoryEntry.forEach(objPoolDirectoryEntrySubDirectoryEntryEntry -> {
173                                                        if (objPoolDirectoryEntrySubDirectoryEntryEntry.isDocumentEntry()) {
174                                                            embeddedObjectFound.add(objPoolDirectoryEntrySubDirectoryEntryEntry.getName());
175                                                        }
176                                                    });
177                                                }
178                                            }
179                                        });
180                                    }
181                                }
182                            });
183                        }
184                        isSafe = embeddedObjectFound.isEmpty();
185                    }
186                }
187            }
188        } catch (Exception e) {
189            isSafe = false;
190        }
191        return isSafe;
192    }
193
194    /**
195     * Ensure that an XML file does not contain any External Entity, DTD or XInclude instructions.
196     *
197     * @param xmlFilePath Filename of the XML file to check.
198     * @return True only if the file pass all validations.
199     * @see "https://portswigger.net/web-security/xxe"
200     * @see "https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html#java"
201     * @see "https://docs.oracle.com/en/java/javase/13/security/java-api-xml-processing-jaxp-security-guide.html#GUID-82F8C206-F2DF-4204-9544-F96155B1D258"
202     * @see "https://www.w3.org/TR/xinclude-11/"
203     * @see "https://en.wikipedia.org/wiki/XInclude"
204     */
205    public static boolean isXMLSafe(String xmlFilePath) {
206        boolean isSafe = false;
207        try {
208            File xmlFile = new File(xmlFilePath);
209            if (xmlFile.exists() && xmlFile.canRead() && xmlFile.isFile()) {
210                //Step 1a: Verify that the XML file content does not contain any XInclude instructions
211                boolean containXInclude = Files.readAllLines(xmlFile.toPath()).stream().anyMatch(line -> line.toLowerCase(Locale.ROOT).contains(":include "));
212                if (!containXInclude) {
213                    //Step 1b: Parse the XML file, if an exception occur than it's imply that the XML specified is not a valid ones
214                    //Create an XML document builder throwing Exception if a DOCTYPE instruction is present
215                    DocumentBuilderFactory dbfInstance = DocumentBuilderFactory.newInstance();
216                    dbfInstance.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
217                    //Xerces 2 only
218                    //dbfInstance.setFeature("http://xerces.apache.org/xerces2-j/features.html#disallow-doctype-decl",true);
219                    dbfInstance.setXIncludeAware(false);
220                    DocumentBuilder builder = dbfInstance.newDocumentBuilder();
221                    //Parse the document
222                    Document doc = builder.parse(xmlFile);
223                    isSafe = (doc != null && doc.getDocumentElement() != null);
224                }
225            }
226        } catch (Exception e) {
227            isSafe = false;
228        }
229        return isSafe;
230    }
231
232
233    /**
234     * Extract all URL links from a PDF file provided.<br>
235     * This can be used to apply validation on a PDF against contained links.
236     *
237     * @param pdfFilePath pdfFilePath Filename of the PDF file to process.
238     * @return A List of URL objects that is empty if no links is found.
239     * @throws Exception If any error occurs during the processing of the PDF file.
240     * @see "https://www.gushiciku.cn/pl/21KQ"
241     * @see "https://pdfbox.apache.org/"
242     * @see "https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox"
243     */
244    public static List<URL> extractAllPDFLinks(String pdfFilePath) throws Exception {
245        final List<URL> links = new ArrayList<>();
246        File pdfFile = new File(pdfFilePath);
247        try (PDDocument document = Loader.loadPDF(pdfFile)) {
248            PDDocumentCatalog documentCatalog = document.getDocumentCatalog();
249            AnnotationFilter actionURIAnnotationFilter = new AnnotationFilter() {
250                @Override
251                public boolean accept(PDAnnotation annotation) {
252                    boolean keep = false;
253                    if (annotation instanceof PDAnnotationLink) {
254                        keep = (((PDAnnotationLink) annotation).getAction() instanceof PDActionURI);
255                    }
256                    return keep;
257                }
258            };
259            documentCatalog.getPages().forEach(page -> {
260                try {
261                    page.getAnnotations(actionURIAnnotationFilter).forEach(annotation -> {
262                        PDActionURI linkAnnotation = (PDActionURI) ((PDAnnotationLink) annotation).getAction();
263                        try {
264                            URL urlObj = new URL(linkAnnotation.getURI());
265                            if (!links.contains(urlObj)) {
266                                links.add(urlObj);
267                            }
268                        } catch (MalformedURLException e) {
269                            throw new RuntimeException(e);
270                        }
271                    });
272                } catch (Exception e) {
273                    throw new RuntimeException(e);
274                }
275            });
276        }
277        return links;
278    }
279
280    /**
281     * Apply a collection of validations on a PDF file provided:
282     * <ul>
283     * <li>Real PDF file.</li>
284     * <li>No attachments.</li>
285     * <li>No Javascript code.</li>
286     * <li>No links using action of type URI/Launch/RemoteGoTo/ImportData.</li>
287     * </ul>
288     *
289     * @param pdfFilePath Filename of the PDF file to check.
290     * @return True only if the file pass all validations.
291     * @see "https://stackoverflow.com/a/36161267"
292     * @see "https://www.gushiciku.cn/pl/21KQ"
293     * @see "https://github.com/jonaslejon/malicious-pdf"
294     * @see "https://pdfbox.apache.org/"
295     * @see "https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox"
296     */
297    public static boolean isPDFSafe(String pdfFilePath) {
298        boolean isSafe = false;
299        try {
300            File pdfFile = new File(pdfFilePath);
301            if (pdfFile.exists() && pdfFile.canRead() && pdfFile.isFile()) {
302                //Step 1: Try to load the file, if its fail then it imply that is not a valid PDF file
303                try (PDDocument document = Loader.loadPDF(pdfFile)) {
304                    //Step 2: Check if the file contains attached files, in our case is not allowed
305                    PDDocumentCatalog documentCatalog = document.getDocumentCatalog();
306                    PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary(documentCatalog);
307                    if (namesDictionary.getEmbeddedFiles() == null) {
308                        //Step 3: Check if the file contains Javascript code, in our case is not allowed
309                        if (namesDictionary.getJavaScript() == null) {
310                            //Step 4: Check if the file contains links using action of type URI/Launch/RemoteGoTo/ImportData, in our case is not allowed
311                            final List<Integer> notAllowedAnnotationCounterList = new ArrayList<>();
312                            AnnotationFilter notAllowedAnnotationFilter = new AnnotationFilter() {
313                                @Override
314                                public boolean accept(PDAnnotation annotation) {
315                                    boolean keep = false;
316                                    if (annotation instanceof PDAnnotationLink) {
317                                        PDAnnotationLink link = (PDAnnotationLink) annotation;
318                                        PDAction action = link.getAction();
319                                        if ((action instanceof PDActionURI) || (action instanceof PDActionLaunch) || (action instanceof PDActionRemoteGoTo) || (action instanceof PDActionImportData)) {
320                                            keep = true;
321                                        }
322                                    }
323                                    return keep;
324                                }
325                            };
326                            documentCatalog.getPages().forEach(page -> {
327                                try {
328                                    notAllowedAnnotationCounterList.add(page.getAnnotations(notAllowedAnnotationFilter).size());
329                                } catch (IOException e) {
330                                    throw new RuntimeException(e);
331                                }
332                            });
333                            if (notAllowedAnnotationCounterList.stream().reduce(0, Integer::sum) == 0) {
334                                isSafe = true;
335                            }
336                        }
337                    }
338                }
339            }
340        } catch (Exception e) {
341            isSafe = false;
342        }
343        return isSafe;
344    }
345
346    /**
347     * Remove as much as possible metadata from the provided PDF document object.
348     *
349     * @param document PDFBox PDF document object on which metadata must be removed.
350     * @see "https://gist.github.com/righettod/d7e07443c43d393a39de741a0d920069"
351     * @see "https://pdfbox.apache.org/"
352     * @see "https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox"
353     */
354    public static void clearPDFMetadata(PDDocument document) {
355        if (document != null) {
356            PDDocumentInformation infoEmpty = new PDDocumentInformation();
357            document.setDocumentInformation(infoEmpty);
358            PDMetadata newMetadataEmpty = new PDMetadata(document);
359            document.getDocumentCatalog().setMetadata(newMetadataEmpty);
360        }
361    }
362
363
364    /**
365     * Validate that the URL provided is really a relative URL.
366     *
367     * @param targetUrl URL to validate.
368     * @return True only if the file pass all validations.
369     * @see "https://portswigger.net/web-security/ssrf"
370     * @see "https://stackoverflow.com/q/6785442"
371     */
372    public static boolean isRelativeURL(String targetUrl) {
373        boolean isValid = false;
374        //Reject any URL encoded content and URL starting with a double slash
375        //Reject any URL contains credentials or fragment to prevent potential bypasses
376        String work = targetUrl;
377        if (!work.contains("%") && !work.contains("@") && !work.contains("#") && !work.startsWith("//")) {
378            //Creation of a URL object must fail
379            try {
380                new URL(work);
381                isValid = false;
382            } catch (MalformedURLException mf) {
383                //Last check to be sure (for prod usage compile the pattern one time)
384                isValid = Pattern.compile("^/[a-z0-9]+", Pattern.CASE_INSENSITIVE).matcher(work).find();
385            }
386        }
387        return isValid;
388    }
389
390    /**
391     * Apply a collection of validations on a ZIP file provided:
392     * <ul>
393     * <li>Real ZIP file.</li>
394     * <li>Contain less than a specified level of deepness.</li>
395     * <li>Do not contain Zip-Slip entry path.</li>
396     * </ul>
397     *
398     * @param zipFilePath       Filename of the ZIP file to check.
399     * @param maxLevelDeepness  Threshold of deepness above which a ZIP archive will be rejected.
400     * @param rejectArchiveFile Flag to specify if presence of any archive entry will cause the rejection of the ZIP file.
401     * @return True only if the file pass all validations.
402     * @see "https://rules.sonarsource.com/java/type/Security%20Hotspot/RSPEC-5042"
403     * @see "https://security.snyk.io/research/zip-slip-vulnerability"
404     * @see "https://en.wikipedia.org/wiki/Zip_bomb"
405     * @see "https://github.com/ptoomey3/evilarc"
406     * @see "https://github.com/abdulfatir/ZipBomb"
407     * @see "https://www.baeldung.com/cs/zip-bomb"
408     * @see "https://thesecurityvault.com/attacks-with-zip-files-and-mitigations/"
409     * @see "https://wiki.sei.cmu.edu/confluence/display/java/IDS04-J.+Safely+extract+files+from+ZipInputStream"
410     */
411    public static boolean isZIPSafe(String zipFilePath, int maxLevelDeepness, boolean rejectArchiveFile) {
412        List<String> archiveExtensions = Arrays.asList("zip", "tar", "7z", "gz", "jar", "phar", "bz2", "tgz");
413        boolean isSafe = false;
414        try {
415            File zipFile = new File(zipFilePath);
416            if (zipFile.exists() && zipFile.canRead() && zipFile.isFile() && maxLevelDeepness > 0) {
417                //Step 1: Try to load the file, if its fail then it imply that is not a valid ZIP file
418                try (ZipFile zipArch = new ZipFile(zipFile)) {
419                    //Step 2: Parse entries
420                    long deepness = 0;
421                    ZipEntry zipEntry;
422                    String entryExtension;
423                    String zipEntryName;
424                    boolean validationsFailed = false;
425                    Enumeration<? extends ZipEntry> entries = zipArch.entries();
426                    while (entries.hasMoreElements()) {
427                        zipEntry = entries.nextElement();
428                        zipEntryName = zipEntry.getName();
429                        entryExtension = zipEntryName.substring(zipEntryName.lastIndexOf(".") + 1).toLowerCase(Locale.ROOT).trim();
430                        //Step 2a: Check if the current entry is an archive file
431                        if (rejectArchiveFile && archiveExtensions.contains(entryExtension)) {
432                            validationsFailed = true;
433                            break;
434                        }
435                        //Step 2b: Check that level of deepness is inferior to the threshold specified
436                        if (zipEntryName.contains("/")) {
437                            //Determine deepness by inspecting the entry name.
438                            //Indeed, folder will be represented like this: folder/folder/folder/
439                            //So we can count the number of "/" to identify the deepness of the entry
440                            deepness = zipEntryName.chars().filter(ch -> ch == '/').count();
441                            if (deepness > maxLevelDeepness) {
442                                validationsFailed = true;
443                                break;
444                            }
445                        }
446                        //Step 2c: Check if any entries match pattern of zip slip payload
447                        if (zipEntryName.contains("..\\") || zipEntryName.contains("../")) {
448                            validationsFailed = true;
449                            break;
450                        }
451                    }
452                    if (!validationsFailed) {
453                        isSafe = true;
454                    }
455                }
456            }
457        } catch (Exception e) {
458            isSafe = false;
459        }
460        return isSafe;
461    }
462
463    /**
464     * Identify the mime type of the content specified (array of bytes).<br>
465     * Note that it cannot be fully trusted (see the tweet '1595824709186519041' referenced), so, additional validations are required.
466     *
467     * @param content The content as an array of bytes.
468     * @return The mime type in lower case or null if it cannot be identified.
469     * @see "https://twitter.com/righettod/status/1595824709186519041"
470     * @see "https://tika.apache.org/"
471     * @see "https://mvnrepository.com/artifact/org.apache.tika/tika-core"
472     * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types"
473     * @see "https://www.iana.org/assignments/media-types/media-types.xhtml"
474     */
475    public static String identifyMimeType(byte[] content) {
476        String mimeType = null;
477        if (content != null && content.length > 0) {
478            Detector detector = new DefaultDetector(MimeTypes.getDefaultMimeTypes());
479            Metadata metadata = new Metadata();
480            try {
481                try (TemporaryResources temporaryResources = new TemporaryResources(); TikaInputStream tikaInputStream = TikaInputStream.get(new ByteArrayInputStream(content), temporaryResources, metadata)) {
482                    MediaType mt = detector.detect(tikaInputStream, metadata);
483                    if (mt != null) {
484                        mimeType = mt.toString().toLowerCase(Locale.ROOT);
485                    }
486                }
487            } catch (IOException ioe) {
488                mimeType = null;
489            }
490        }
491        return mimeType;
492    }
493
494    /**
495     * Apply a collection of validations on a string expected to be an public IP address:
496     * <ul>
497     * <li>Is a valid IP v4 or v6 address.</li>
498     * <li>Is public from an Internet perspective.</li>
499     * </ul>
500     * <br>
501     * <b>Note:</b> I often see missing such validation in the value read from HTTP request headers like "X-Forwarded-For" or "Forwarded".
502     * <br><br>
503     * <b>Note for IPv6:</b> I used documentation found so it is really experimental!
504     *
505     * @param ip String expected to be a valid IP address.
506     * @return True only if the string pass all validations.
507     * @see "https://commons.apache.org/proper/commons-validator/"
508     * @see "https://commons.apache.org/proper/commons-validator/apidocs/org/apache/commons/validator/routines/InetAddressValidator.html"
509     * @see "https://cheatsheetseries.owasp.org/cheatsheets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet.html"
510     * @see "https://cheatsheetseries.owasp.org/assets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet_Orange_Tsai_Talk.pdf"
511     * @see "https://cheatsheetseries.owasp.org/assets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet_SSRF_Bible.pdf"
512     * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For"
513     * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Forwarded"
514     * @see "https://ipcisco.com/lesson/ipv6-address/"
515     * @see "https://www.juniper.net/documentation/us/en/software/junos/interfaces-security-devices/topics/topic-map/security-interface-ipv4-ipv6-protocol.html"
516     * @see "https://docs.oracle.com/en/java/javase/21/docs/api/java.base/java/net/InetAddress.html#getByName(java.lang.String)"
517     * @see "https://www.arin.net/reference/research/statistics/address_filters/"
518     * @see "https://en.wikipedia.org/wiki/Multicast_address"
519     * @see "https://stackoverflow.com/a/5619409"
520     * @see "https://www.ripe.net/media/documents/ipv6-address-types.pdf"
521     * @see "https://www.iana.org/assignments/ipv6-unicast-address-assignments/ipv6-unicast-address-assignments.xhtml"
522     * @see "https://developer.android.com/reference/java/net/Inet6Address"
523     * @see "https://en.wikipedia.org/wiki/Unique_local_address"
524     */
525    public static boolean isPublicIPAddress(String ip) {
526        boolean isValid = false;
527        try {
528            //Quick validation on the string itself based on characters used to compose an IP v4/v6 address
529            if (Pattern.matches("[0-9a-fA-F:.]+", ip)) {
530                //If OK then use the dedicated InetAddressValidator from Apache Commons Validator
531                if (InetAddressValidator.getInstance().isValid(ip)) {
532                    //If OK then validate that is an public IP address
533                    //From Javadoc for "InetAddress.getByName": If a literal IP address is supplied, only the validity of the address format is checked.
534                    InetAddress addr = InetAddress.getByName(ip);
535                    isValid = (!addr.isAnyLocalAddress() && !addr.isLinkLocalAddress() && !addr.isLoopbackAddress() && !addr.isMulticastAddress() && !addr.isSiteLocalAddress());
536                    //If OK and the IP is an V6 one then make additional validation because the built-in Java API will let pass some V6 IP
537                    //For the prefix map, the start of the key indicates if the value is a regex or a string
538                    if (isValid && (addr instanceof Inet6Address)) {
539                        Map<String, String> prefixes = new HashMap<>();
540                        prefixes.put("REGEX_LOOPBACK", "^(0|:)+1$");
541                        prefixes.put("REGEX_UNIQUE-LOCAL-ADDRESSES", "^f(c|d)[a-f0-9]{2}:.*$");
542                        prefixes.put("STRING_LINK-LOCAL-ADDRESSES", "fe80:");
543                        prefixes.put("REGEX_TEREDO", "^2001:[0]*:.*$");
544                        prefixes.put("REGEX_BENCHMARKING", "^2001:[0]*2:.*$");
545                        prefixes.put("REGEX_ORCHID", "^2001:[0]*10:.*$");
546                        prefixes.put("STRING_DOCUMENTATION", "2001:db8:");
547                        prefixes.put("STRING_GLOBAL-UNICAST", "2000:");
548                        prefixes.put("REGEX_MULTICAST", "^ff[0-9]{2}:.*$");
549                        final List<Boolean> results = new ArrayList<>();
550                        final String ipLower = ip.trim().toLowerCase(Locale.ROOT);
551                        prefixes.forEach((addressType, expr) -> {
552                            String exprLower = expr.trim().toLowerCase();
553                            if (addressType.startsWith("STRING_")) {
554                                results.add(ipLower.startsWith(exprLower));
555                            } else {
556                                results.add(Pattern.matches(exprLower, ipLower));
557                            }
558                        });
559                        isValid = ((results.size() == prefixes.size()) && !results.contains(Boolean.TRUE));
560                    }
561                }
562            }
563        } catch (Exception e) {
564            isValid = false;
565        }
566        return isValid;
567    }
568
569    /**
570     * Compute a SHA256 hash from an input composed of a collection of strings.<br><br>
571     * This method take care to build the source string in a way to prevent this source string to be prone to abuse targeting the different parts composing it.<br><br>
572     * <p>
573     * Example of possible abuse without precautions applied during the hash calculation logic:<br>
574     * Hash of <code>SHA256("Hello", "My", "World!!!")</code> will be equals to the hash of <code>SHA256("Hell", "oMyW", "orld!!!")</code>.<br>
575     * </p>
576     * This method ensure that both hash above will be different.<br><br>
577     *
578     * <b>Note:</b> The character <code>|</code> is used, as separator, of every parts so a part is not allowed to contains this character.
579     *
580     * @param parts Ordered list of strings to use to build the input string for which the hash must be computed on. No null value is accepted on object composing the collection.
581     * @return The hash, as an array of bytes, to allow caller to convert it to the final representation wanted (HEX, Base64, etc.). If the collection passed is null or empty then the method return null.
582     * @throws Exception If any exception occurs
583     * @see "https://github.com/righettod/code-snippets-security-utils/issues/16"
584     * @see "https://pentesterlab.com/badges/codereview"
585     * @see "https://blog.trailofbits.com/2024/08/21/yolo-is-not-a-valid-hash-construction/"
586     * @see "https://www.nist.gov/publications/sha-3-derived-functions-cshake-kmac-tuplehash-and-parallelhash"
587     */
588    public static byte[] computeHashNoProneToAbuseOnParts(List<String> parts) throws Exception {
589        byte[] hash = null;
590        String separator = "|";
591        if (parts != null && !parts.isEmpty()) {
592            //Ensure that not part is null
593            if (parts.stream().anyMatch(Objects::isNull)) {
594                throw new IllegalArgumentException("No part must be null!");
595            }
596            //Ensure that the separator is absent from every part
597            if (parts.stream().anyMatch(part -> part.contains(separator))) {
598                throw new IllegalArgumentException(String.format("The character '%s', used as parts separator, must be absent from every parts!", separator));
599            }
600            MessageDigest digest = MessageDigest.getInstance("SHA-256");
601            final StringBuilder buffer = new StringBuilder(separator);
602            parts.forEach(p -> {
603                buffer.append(p).append(separator);
604            });
605            hash = digest.digest(buffer.toString().getBytes(StandardCharsets.UTF_8));
606        }
607        return hash;
608    }
609
610    /**
611     * Ensure that an XML file only uses DTD/XSD references (called System Identifier) present in the allowed list provided.<br><br>
612     * The code is based on the validation implemented into the OpenJDK 21, by the class <b><a href="https://github.com/openjdk/jdk/blob/jdk-21%2B35/src/java.prefs/share/classes/java/util/prefs/XmlSupport.java">java.util.prefs.XmlSupport</a></b>, in the method <b><a href="https://github.com/openjdk/jdk/blob/jdk-21%2B35/src/java.prefs/share/classes/java/util/prefs/XmlSupport.java#L240">loadPrefsDoc()</a></b>.<br><br>
613     * The method also ensure that no Public Identifier is used to prevent potential bypasses of the validations.
614     *
615     * @param xmlFilePath              Filename of the XML file to check.
616     * @param allowedSystemIdentifiers List of URL allowed for System Identifier specified for any XSD/DTD references.
617     * @return True only if the file pass all validations.
618     * @see "https://www.w3schools.com/xml/prop_documenttype_systemid.asp"
619     * @see "https://www.ibm.com/docs/en/integration-bus/9.0.0?topic=doctypedecl-xml-systemid"
620     * @see "https://www.liquid-technologies.com/Reference/Glossary/XML_DocType.html"
621     * @see "https://www.xml.com/pub/98/08/xmlqna0.html"
622     * @see "https://github.com/openjdk/jdk/blob/jdk-21%2B35/src/java.prefs/share/classes/java/util/prefs/XmlSupport.java#L397"
623     * @see "https://en.wikipedia.org/wiki/Formal_Public_Identifier"
624     */
625    public static boolean isXMLOnlyUseAllowedXSDorDTD(String xmlFilePath, final List<String> allowedSystemIdentifiers) {
626        boolean isSafe = false;
627        final String errorTemplate = "Non allowed %s ID detected!";
628        final String emptyFakeDTD = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><!ELEMENT dummy EMPTY>";
629        final String emptyFakeXSD = "<xs:schema xmlns:xs=\"http://www.w3.org/2001/XMLSchema\"> <xs:element name=\"dummy\"/></xs:schema>";
630
631        if (allowedSystemIdentifiers == null || allowedSystemIdentifiers.isEmpty()) {
632            throw new IllegalArgumentException("At least one SID must be specified!");
633        }
634        File xmlFile = new File(xmlFilePath);
635        if (xmlFile.exists() && xmlFile.canRead() && xmlFile.isFile()) {
636            try {
637                EntityResolver resolverValidator = (publicId, systemId) -> {
638                    if (publicId != null) {
639                        throw new SAXException(String.format(errorTemplate, "PUBLIC"));
640                    }
641                    if (!allowedSystemIdentifiers.contains(systemId)) {
642                        throw new SAXException(String.format(errorTemplate, "SYSTEM"));
643                    }
644                    //If it is OK then return a empty DTD/XSD
645                    return new InputSource(new StringReader(systemId.toLowerCase().endsWith(".dtd") ? emptyFakeDTD : emptyFakeXSD));
646                };
647                DocumentBuilderFactory dbfInstance = DocumentBuilderFactory.newInstance();
648                dbfInstance.setIgnoringElementContentWhitespace(true);
649                dbfInstance.setXIncludeAware(false);
650                dbfInstance.setValidating(false);
651                dbfInstance.setCoalescing(true);
652                dbfInstance.setIgnoringComments(false);
653                DocumentBuilder builder = dbfInstance.newDocumentBuilder();
654                builder.setEntityResolver(resolverValidator);
655                Document doc = builder.parse(xmlFile);
656                isSafe = (doc != null);
657            } catch (SAXException | IOException | ParserConfigurationException e) {
658                isSafe = false;
659            }
660        }
661
662        return isSafe;
663    }
664
665    /**
666     * Apply a collection of validations on a EXCEL CSV file provided (file was expected to be opened in Microsoft EXCEL):
667     * <ul>
668     * <li>Real CSV file.</li>
669     * <li>Do not contains any payload related to a CSV injections.</li>
670     * </ul>
671     * Ensure that, if Apache Commons CSV does not find any record then, the file will be considered as NOT safe (prevent potential bypasses).<br><br>
672     * <b>Note:</b> Record delimiter used is the <code>,</code> (comma) character. See the Apache Commons CSV reference provided for EXCEL.<br>
673     *
674     * @param csvFilePath Filename of the CSV file to check.
675     * @return True only if the file pass all validations.
676     * @see "https://commons.apache.org/proper/commons-csv/"
677     * @see "https://commons.apache.org/proper/commons-csv/apidocs/org/apache/commons/csv/CSVFormat.html#EXCEL"
678     * @see "https://www.we45.com/post/your-excel-sheets-are-not-safe-heres-how-to-beat-csv-injection"
679     * @see "https://www.whiteoaksecurity.com/blog/2020-4-23-csv-injection-whats-the-risk/"
680     * @see "https://book.hacktricks.xyz/pentesting-web/formula-csv-doc-latex-ghostscript-injection"
681     * @see "https://owasp.org/www-community/attacks/CSV_Injection"
682     * @see "https://payatu.com/blog/csv-injection-basic-to-exploit/"
683     * @see "https://cwe.mitre.org/data/definitions/1236.html"
684     */
685    public static boolean isExcelCSVSafe(String csvFilePath) {
686        boolean isSafe;
687        final AtomicInteger recordCount = new AtomicInteger();
688        final List<Character> payloadDetectionCharacters = List.of('=', '+', '@', '-', '\r', '\t');
689
690        try {
691            final List<String> payloadsIdentified = new ArrayList<>();
692            try (Reader in = new FileReader(csvFilePath)) {
693                Iterable<CSVRecord> records = CSVFormat.EXCEL.parse(in);
694                records.forEach(record -> {
695                    record.forEach(recordValue -> {
696                        if (recordValue != null && !recordValue.trim().isEmpty() && payloadDetectionCharacters.contains(recordValue.trim().charAt(0))) {
697                            payloadsIdentified.add(recordValue);
698                        }
699                        recordCount.getAndIncrement();
700                    });
701                });
702            }
703            isSafe = (payloadsIdentified.isEmpty() && recordCount.get() > 0);
704        } catch (Exception e) {
705            isSafe = false;
706        }
707
708        return isSafe;
709    }
710
711    /**
712     * Provide a way to add an integrity marker (<a href="https://en.wikipedia.org/wiki/HMAC">HMAC</a>) to a serialized object serialized using the <a href="https://www.baeldung.com/java-serialization">java native system</a> (binary).<br>
713     * The goal is to provide <b>a temporary workaround</b> to try to prevent deserialization attacks and give time to move to a text-based serialization approach.
714     *
715     * @param processingMode Define the mode of processing i.e. protect or validate. ({@link eu.righettod.ProcessingMode})
716     * @param input          When the processing mode is "protect" than the expected input (string) is a java serialized object encoded in Base64 otherwise (processing mode is "validate") expected input is the output of this method when the "protect" mode was used.
717     * @param secret         Secret to use to compute the SHA256 HMAC.
718     * @return A map with the following keys: <ul><li><b>PROCESSING_MODE</b>: Processing mode used to compute the result.</li><li><b>STATUS</b>: A boolean indicating if the processing was successful or not.</li><li><b>RESULT</b>: Always contains a string representing the protected serialized object in the format <code>[SERIALIZED_OBJECT_BASE64_ENCODED]:[SERIALIZED_OBJECT_HMAC_BASE64_ENCODED]</code>.</li></ul>
719     * @throws Exception If any exception occurs.
720     * @see "https://cheatsheetseries.owasp.org/cheatsheets/Deserialization_Cheat_Sheet.html"
721     * @see "https://owasp.org/www-project-top-ten/2017/A8_2017-Insecure_Deserialization"
722     * @see "https://portswigger.net/web-security/deserialization"
723     * @see "https://www.baeldung.com/java-serialization-approaches"
724     * @see "https://www.baeldung.com/java-serialization"
725     * @see "https://cryptobook.nakov.com/mac-and-key-derivation/hmac-and-key-derivation"
726     * @see "https://en.wikipedia.org/wiki/HMAC"
727     * @see "https://smattme.com/posts/how-to-generate-hmac-signature-in-java/"
728     */
729    public static Map<String, Object> ensureSerializedObjectIntegrity(ProcessingMode processingMode, String input, byte[] secret) throws Exception {
730        Map<String, Object> results;
731        String resultFormatTemplate = "%s:%s";
732        //Verify input provided to be consistent
733        if (processingMode == null) {
734            throw new IllegalArgumentException("The processing mode is mandatory!");
735        }
736        if (input == null || input.trim().isEmpty()) {
737            throw new IllegalArgumentException("Input data is mandatory!");
738        }
739        if (secret == null || secret.length == 0) {
740            throw new IllegalArgumentException("The HMAC secret is mandatory!");
741        }
742        if (processingMode.equals(ProcessingMode.VALIDATE) && input.split(":").length != 2) {
743            throw new IllegalArgumentException("Input data provided is invalid for the processing mode specified!");
744        }
745        //Processing
746        Base64.Decoder b64Decoder = Base64.getDecoder();
747        Base64.Encoder b64Encoder = Base64.getEncoder();
748        String hmacAlgorithm = "HmacSHA256";
749        Mac mac = Mac.getInstance(hmacAlgorithm);
750        SecretKeySpec key = new SecretKeySpec(secret, hmacAlgorithm);
751        mac.init(key);
752        results = new HashMap<>();
753        results.put("PROCESSING_MODE", processingMode.toString());
754        switch (processingMode) {
755            case PROTECT -> {
756                byte[] objectBytes = b64Decoder.decode(input);
757                byte[] hmac = mac.doFinal(objectBytes);
758                String encodedHmac = b64Encoder.encodeToString(hmac);
759                results.put("STATUS", Boolean.TRUE);
760                results.put("RESULT", String.format(resultFormatTemplate, input, encodedHmac));
761            }
762            case VALIDATE -> {
763                String[] parts = input.split(":");
764                byte[] objectBytes = b64Decoder.decode(parts[0].trim());
765                byte[] hmacProvided = b64Decoder.decode(parts[1].trim());
766                byte[] hmacComputed = mac.doFinal(objectBytes);
767                String encodedHmacComputed = b64Encoder.encodeToString(hmacComputed);
768                Boolean hmacIsValid = Arrays.equals(hmacProvided, hmacComputed);
769                results.put("STATUS", hmacIsValid);
770                results.put("RESULT", String.format(resultFormatTemplate, parts[0].trim(), encodedHmacComputed));
771            }
772            default -> throw new IllegalArgumentException("Not supported processing mode!");
773        }
774        return results;
775    }
776
777    /**
778     * Apply a collection of validations on a JSON string provided:
779     * <ul>
780     * <li>Real JSON structure.</li>
781     * <li>Contain less than a specified number of deepness for nested objects or arrays.</li>
782     * <li>Contain less than a specified number of items in any arrays.</li>
783     * </ul>
784     * <br>
785     * <b>Note:</b> I decided to use a parsing approach using only string processing to prevent any StackOverFlow or OutOfMemory error that can be abused.<br><br>
786     * I used the following assumption:
787     * <ul>
788     *      <li>The character <code>{</code> identify the beginning of an object.</li>
789     *      <li>The character <code>}</code> identify the end of an object.</li>
790     *      <li>The character <code>[</code> identify the beginning of an array.</li>
791     *      <li>The character <code>]</code> identify the end of an array.</li>
792     *      <li>The character <code>"</code> identify the delimiter of a string.</li>
793     *      <li>The character sequence <code>\"</code> identify the escaping of an double quote.</li>
794     * </ul>
795     *
796     * @param json                  String containing the JSON data to validate.
797     * @param maxItemsByArraysCount Maximum number of items allowed in an array.
798     * @param maxDeepnessAllowed    Maximum number nested objects or arrays allowed.
799     * @return True only if the string pass all validations.
800     * @see "https://javaee.github.io/jsonp/"
801     * @see "https://community.f5.com/discussions/technicalforum/disable-buffer-overflow-in-json-parameters/124306"
802     * @see "https://github.com/InductiveComputerScience/pbJson/issues/2"
803     */
804    public static boolean isJSONSafe(String json, int maxItemsByArraysCount, int maxDeepnessAllowed) {
805        boolean isSafe = false;
806
807        try {
808            //Step 1: Analyse the JSON string
809            int currentDeepness = 0;
810            int currentArrayItemsCount = 0;
811            int maxDeepnessReached = 0;
812            int maxArrayItemsCountReached = 0;
813            boolean currentlyInArray = false;
814            boolean currentlyInString = false;
815            int currentNestedArrayLevel = 0;
816            String jsonEscapedDoubleQuote = "\\\"";//Escaped double quote must not be considered as a string delimiter
817            String work = json.replace(jsonEscapedDoubleQuote, "'");
818            for (char c : work.toCharArray()) {
819                switch (c) {
820                    case '{': {
821                        if (!currentlyInString) {
822                            currentDeepness++;
823                        }
824                        break;
825                    }
826                    case '}': {
827                        if (!currentlyInString) {
828                            currentDeepness--;
829                        }
830                        break;
831                    }
832                    case '[': {
833                        if (!currentlyInString) {
834                            currentDeepness++;
835                            if (currentlyInArray) {
836                                currentNestedArrayLevel++;
837                            }
838                            currentlyInArray = true;
839                        }
840                        break;
841                    }
842                    case ']': {
843                        if (!currentlyInString) {
844                            currentDeepness--;
845                            currentArrayItemsCount = 0;
846                            if (currentNestedArrayLevel > 0) {
847                                currentNestedArrayLevel--;
848                            }
849                            if (currentNestedArrayLevel == 0) {
850                                currentlyInArray = false;
851                            }
852                        }
853                        break;
854                    }
855                    case '"': {
856                        currentlyInString = !currentlyInString;
857                        break;
858                    }
859                    case ',': {
860                        if (!currentlyInString && currentlyInArray) {
861                            currentArrayItemsCount++;
862                        }
863                        break;
864                    }
865                }
866                if (currentDeepness > maxDeepnessReached) {
867                    maxDeepnessReached = currentDeepness;
868                }
869                if (currentArrayItemsCount > maxArrayItemsCountReached) {
870                    maxArrayItemsCountReached = currentArrayItemsCount;
871                }
872            }
873            //Step 2: Apply validation against the value specified as limits
874            isSafe = ((maxItemsByArraysCount > maxArrayItemsCountReached) && (maxDeepnessAllowed > maxDeepnessReached));
875
876            //Step 3: If the content is safe then ensure that it is valid JSON structure using the "Java API for JSON Processing" (JSR 374) parser reference implementation.
877            if (isSafe) {
878                JsonReader reader = Json.createReader(new StringReader(json));
879                isSafe = (reader.read() != null);
880            }
881
882        } catch (Exception e) {
883            isSafe = false;
884        }
885        return isSafe;
886    }
887
888    /**
889     * Apply a collection of validations on a image file provided:
890     * <ul>
891     * <li>Real image file.</li>
892     * <li>Its mime type is into the list of allowed mime types.</li>
893     * <li>Its metadata fields do not contains any characters related to a malicious payloads.</li>
894     * </ul>
895     * <br>
896     * <b>Important note:</b> This implementation is prone to bypass using the "<b>raw insertion</b>" method documented in the <a href="https://www.synacktiv.com/en/publications/persistent-php-payloads-in-pngs-how-to-inject-php-code-in-an-image-and-keep-it-there">blog post</a> from the Synacktiv team.
897     * To handle such case, it is recommended to resize the image to remove any non image-related content, see <a href="https://github.com/righettod/document-upload-protection/blob/master/src/main/java/eu/righettod/poc/sanitizer/ImageDocumentSanitizerImpl.java#L54">here</a> for an example.<br>
898     *
899     * @param imageFilePath         Filename of the image file to check.
900     * @param imageAllowedMimeTypes List of image mime types allowed.
901     * @return True only if the file pass all validations.
902     * @see "https://commons.apache.org/proper/commons-imaging/"
903     * @see "https://commons.apache.org/proper/commons-imaging/formatsupport.html"
904     * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types/Common_types"
905     * @see "https://www.iana.org/assignments/media-types/media-types.xhtml#image"
906     * @see "https://www.synacktiv.com/en/publications/persistent-php-payloads-in-pngs-how-to-inject-php-code-in-an-image-and-keep-it-there"
907     * @see "https://cheatsheetseries.owasp.org/cheatsheets/File_Upload_Cheat_Sheet.html"
908     * @see "https://github.com/righettod/document-upload-protection/blob/master/src/main/java/eu/righettod/poc/sanitizer/ImageDocumentSanitizerImpl.java"
909     * @see "https://exiftool.org/examples.html"
910     * @see "https://en.wikipedia.org/wiki/List_of_file_signatures"
911     * @see "https://hexed.it/"
912     * @see "https://github.com/sighook/pixload"
913     */
914    public static boolean isImageSafe(String imageFilePath, List<String> imageAllowedMimeTypes) {
915        boolean isSafe = false;
916        Pattern payloadDetectionRegex = Pattern.compile("[<>${}`]+", Pattern.CASE_INSENSITIVE);
917        try {
918            File imgFile = new File(imageFilePath);
919            if (imgFile.exists() && imgFile.canRead() && imgFile.isFile() && !imageAllowedMimeTypes.isEmpty()) {
920                final byte[] imgBytes = Files.readAllBytes(imgFile.toPath());
921                //Step 1: Check the mime type of the file against the allowed ones
922                ImageInfo imgInfo = Imaging.getImageInfo(imgBytes);
923                if (imageAllowedMimeTypes.contains(imgInfo.getMimeType())) {
924                    //Step 2: Load the image into an object using the Image API
925                    BufferedImage imgObject = Imaging.getBufferedImage(imgBytes);
926                    if (imgObject != null && imgObject.getWidth() > 0 && imgObject.getHeight() > 0) {
927                        //Step 3: Check the metadata if the image format support it - Highly experimental
928                        List<String> metadataWithPayloads = new ArrayList<>();
929                        final ImageMetadata imgMetadata = Imaging.getMetadata(imgBytes);
930                        if (imgMetadata != null) {
931                            imgMetadata.getItems().forEach(item -> {
932                                String metadata = item.toString();
933                                if (payloadDetectionRegex.matcher(metadata).find()) {
934                                    metadataWithPayloads.add(metadata);
935                                }
936                            });
937                        }
938                        isSafe = metadataWithPayloads.isEmpty();
939                    }
940                }
941            }
942        } catch (Exception e) {
943            isSafe = false;
944        }
945        return isSafe;
946    }
947
948    /**
949     * Rewrite the input file to remove any embedded files that is not embedded using a methods supported by the official format of the file.<br>
950     * Example: a file can be embedded by adding it to the end of the source file, see the reference provided for details.
951     *
952     * @param inputFilePath Filename of the file to clean up.
953     * @param inputFileType Type of the file provided.
954     * @return A array of bytes with the cleaned file.
955     * @throws IllegalArgumentException If an invalid parameter is passed
956     * @throws Exception                If any technical error during the cleaning processing
957     * @see "https://www.synacktiv.com/en/publications/persistent-php-payloads-in-pngs-how-to-inject-php-code-in-an-image-and-keep-it-there"
958     * @see "https://github.com/righettod/toolbox-pentest-web/tree/master/misc"
959     * @see "https://github.com/righettod/toolbox-pentest-web?tab=readme-ov-file#misc"
960     * @see "https://stackoverflow.com/a/13605411"
961     */
962    public static byte[] sanitizeFile(String inputFilePath, InputFileType inputFileType) throws Exception {
963        ByteArrayOutputStream sanitizedContent = new ByteArrayOutputStream();
964        File inputFile = new File(inputFilePath);
965        if (!inputFile.exists() || !inputFile.canRead() || !inputFile.isFile()) {
966            throw new IllegalArgumentException("Cannot read the content of the input file!");
967        }
968        switch (inputFileType) {
969            case PDF -> {
970                try (PDDocument document = Loader.loadPDF(inputFile)) {
971                    document.save(sanitizedContent);
972                }
973            }
974            case IMAGE -> {
975                // Load the original image
976                BufferedImage originalImage = ImageIO.read(inputFile);
977                String originalFormat = identifyMimeType(Files.readAllBytes(inputFile.toPath())).split("/")[1].trim();
978                // Check that image has been successfully loaded
979                if (originalImage == null) {
980                    throw new IOException("Cannot load the original image !");
981                }
982                // Get current Width and Height of the image
983                int originalWidth = originalImage.getWidth(null);
984                int originalHeight = originalImage.getHeight(null);
985                // Resize the image by removing 1px on Width and Height
986                Image resizedImage = originalImage.getScaledInstance(originalWidth - 1, originalHeight - 1, Image.SCALE_SMOOTH);
987                // Resize the resized image by adding 1px on Width and Height - In fact set image to is initial size
988                Image initialSizedImage = resizedImage.getScaledInstance(originalWidth, originalHeight, Image.SCALE_SMOOTH);
989                // Save image to a bytes buffer
990                int bufferedImageType = BufferedImage.TYPE_INT_ARGB;//By default use a format supporting transparency
991                if ("jpeg".equalsIgnoreCase(originalFormat) || "bmp".equalsIgnoreCase(originalFormat)) {
992                    bufferedImageType = BufferedImage.TYPE_INT_RGB;
993                }
994                BufferedImage sanitizedImage = new BufferedImage(initialSizedImage.getWidth(null), initialSizedImage.getHeight(null), bufferedImageType);
995                Graphics2D drawer = sanitizedImage.createGraphics();
996                drawer.drawImage(initialSizedImage, 0, 0, null);
997                drawer.dispose();
998                ImageIO.write(sanitizedImage, originalFormat, sanitizedContent);
999            }
1000            default -> throw new IllegalArgumentException("Type of file not supported !");
1001        }
1002        if (sanitizedContent.size() == 0) {
1003            throw new IOException("An error occur during the rewrite operation!");
1004        }
1005        return sanitizedContent.toByteArray();
1006    }
1007
1008    /**
1009     * Apply a collection of validations on a string expected to be an email address:
1010     * <ul>
1011     * <li>Is a valid email address, from a parser perspective, following RFCs on email addresses.</li>
1012     * <li>Is not using "Encoded-word" format.</li>
1013     * <li>Is not using comment format.</li>
1014     * <li>Is not using "Punycode" format.</li>
1015     * <li>Is not using UUCP style addresses.</li>
1016     * <li>Is not using address literals.</li>
1017     * <li>Is not using source routes.</li>
1018     * <li>Is not using the "percent hack".</li>
1019     * </ul><br>
1020     * This is based on the research work from <a href="https://portswigger.net/research/gareth-heyes">Gareth Heyes</a> added in references (Portswigger).<br><br>
1021     *
1022     * <b>Note:</b> The notion of valid, here, is to take from a secure usage of the data perspective.
1023     *
1024     * @param addr String expected to be a valid email address.
1025     * @return True only if the string pass all validations.
1026     * @see "https://commons.apache.org/proper/commons-validator/"
1027     * @see "https://commons.apache.org/proper/commons-validator/apidocs/org/apache/commons/validator/routines/EmailValidator.html"
1028     * @see "https://datatracker.ietf.org/doc/html/rfc2047#section-2"
1029     * @see "https://portswigger.net/research/splitting-the-email-atom"
1030     * @see "https://www.jochentopf.com/email/address.html"
1031     * @see "https://en.wikipedia.org/wiki/Email_address"
1032     */
1033    public static boolean isEmailAddress(String addr) {
1034        boolean isValid = false;
1035        String work = addr.toLowerCase(Locale.ROOT);
1036        Pattern encodedWordRegex = Pattern.compile("[=?]+", Pattern.CASE_INSENSITIVE);
1037        Pattern forbiddenCharacterRegex = Pattern.compile("[():!%\\[\\],;]+", Pattern.CASE_INSENSITIVE);
1038        try {
1039            //Start with the use of the dedicated EmailValidator from Apache Commons Validator
1040            if (EmailValidator.getInstance(true, true).isValid(work)) {
1041                //If OK then validate it does not contains "Encoded-word" patterns using an aggressive approach
1042                if (!encodedWordRegex.matcher(work).find()) {
1043                    //If OK then validate it does not contains punycode
1044                    if (!work.contains("xn--")) {
1045                        //If OK then validate it does not use:
1046                        // UUCP style addresses,
1047                        // Comment format,
1048                        // Address literals,
1049                        // Source routes,
1050                        // The percent hack.
1051                        if (!forbiddenCharacterRegex.matcher(work).find()) {
1052                            isValid = true;
1053                        }
1054                    }
1055                }
1056            }
1057        } catch (Exception e) {
1058            isValid = false;
1059        }
1060        return isValid;
1061    }
1062
1063    /**
1064     * The <a href="https://www.stet.eu/en/psd2/">PSD2 STET</a> specification require to use <a href="https://datatracker.ietf.org/doc/draft-cavage-http-signatures/">HTTP Signature</a>.
1065     * <br>
1066     * Section <b>3.5.1.2</b> of the document <a href="https://www.stet.eu/assets/files/PSD2/1-6-3/api-dsp2-stet-v1.6.3.1-part-1-framework.pdf">Documentation Framework</a> version <b>1.6.3</b>.
1067     * <br>
1068     * The problem is that, by design, the HTTP Signature specification is prone to blind SSRF.
1069     * <br>
1070     * URL example taken from the STET specification: <code>https://path.to/myQsealCertificate_714f8154ec259ac40b8a9786c9908488b2582b68b17e865fede4636d726b709f</code>.
1071     * <br>
1072     * The objective of this code is to try to decrease the "exploitability/interest" of this SSRF for an attacker.
1073     *
1074     * @param certificateUrl Url pointing to a Qualified Certificate (QSealC) encoded in PEM format and respecting the ETSI/TS119495 technical Specification .
1075     * @return TRUE only if the url point to a Qualified Certificate in PEM format.
1076     * @see "https://www.stet.eu/en/psd2/"
1077     * @see "https://www.stet.eu/assets/files/PSD2/1-6-3/api-dsp2-stet-v1.6.3.1-part-1-framework.pdf"
1078     * @see "https://datatracker.ietf.org/doc/draft-cavage-http-signatures/"
1079     * @see "https://datatracker.ietf.org/doc/rfc9421/"
1080     * @see "https://openjdk.org/groups/net/httpclient/intro.html"
1081     * @see "https://docs.oracle.com/en/java/javase/21/docs/api/java.net.http/java/net/http/package-summary.html"
1082     * @see "https://portswigger.net/web-security/ssrf"
1083     * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control"
1084     */
1085    public static boolean isPSD2StetSafeCertificateURL(String certificateUrl) {
1086        boolean isValid = false;
1087        long connectionTimeoutInSeconds = 10;
1088        String userAgent = "PSD2-STET-HTTPSignature-CertificateRequest";
1089        try {
1090            //1. Ensure that the URL end with the SHA-256 fingerprint encoded in HEX of the certificate like requested by STET
1091            if (certificateUrl != null && certificateUrl.lastIndexOf("_") != -1) {
1092                String digestPart = certificateUrl.substring(certificateUrl.lastIndexOf("_") + 1);
1093                if (Pattern.matches("^[0-9a-f]{64}$", digestPart)) {
1094                    //2. Ensure that the URL is a valid url by creating a instance of the class URI
1095                    URI uri = URI.create(certificateUrl);
1096                    //3. Require usage of HTTPS and reject any url containing query parameters
1097                    if ("https".equalsIgnoreCase(uri.getScheme()) && uri.getQuery() == null) {
1098                        //4. Perform a HTTP HEAD request in order to get the content type of the remote resource
1099                        //and limit the interest to use the SSRF because to pass the check the url need to:
1100                        //- Do not having any query parameters.
1101                        //- Use HTTPS protocol.
1102                        //- End with a string having the format "_[0-9a-f]{64}".
1103                        //- Trigger the malicious action that the attacker want but with a HTTP HEAD without any redirect and parameters.
1104                        HttpResponse<String> response;
1105                        try (HttpClient client = HttpClient.newBuilder().followRedirects(HttpClient.Redirect.NEVER).build()) {
1106                            HttpRequest request = HttpRequest.newBuilder().uri(uri).timeout(Duration.ofSeconds(connectionTimeoutInSeconds)).method("HEAD", HttpRequest.BodyPublishers.noBody()).header("User-Agent", userAgent)//To provide an hint to the target about the initiator of the request
1107                                    .header("Cache-Control", "no-store, max-age=0")//To prevent caching issues or abuses
1108                                    .build();
1109                            response = client.send(request, HttpResponse.BodyHandlers.ofString());
1110                            if (response.statusCode() == 200) {
1111                                //5. Ensure that the response content type is "text/plain"
1112                                Optional<String> contentType = response.headers().firstValue("Content-Type");
1113                                isValid = (contentType.isPresent() && contentType.get().trim().toLowerCase(Locale.ENGLISH).startsWith("text/plain"));
1114                            }
1115                        }
1116                    }
1117                }
1118            }
1119        } catch (Exception e) {
1120            isValid = false;
1121        }
1122        return isValid;
1123    }
1124
1125    /**
1126     * Perform sequential URL decoding operations against a URL encoded data until the data is not URL encoded anymore or if the specified threshold is reached.
1127     *
1128     * @param encodedData            URL encoded data.
1129     * @param decodingRoundThreshold Threshold above which decoding will fail.
1130     * @return The decoded data.
1131     * @throws SecurityException If the threshold is reached.
1132     * @see "https://en.wikipedia.org/wiki/Percent-encoding"
1133     * @see "https://owasp.org/www-community/Double_Encoding"
1134     * @see "https://portswigger.net/web-security/essential-skills/obfuscating-attacks-using-encodings"
1135     * @see "https://capec.mitre.org/data/definitions/120.html"
1136     */
1137    public static String applyURLDecoding(String encodedData, int decodingRoundThreshold) throws SecurityException {
1138        if (decodingRoundThreshold < 1) {
1139            throw new IllegalArgumentException("Threshold must be a positive number !");
1140        }
1141        if (encodedData == null) {
1142            throw new IllegalArgumentException("Data provided must not be null !");
1143        }
1144        Charset charset = StandardCharsets.UTF_8;
1145        int currentDecodingRound = 0;
1146        boolean isFinished = false;
1147        String currentRoundData = encodedData;
1148        String previousRoundData = encodedData;
1149        while (!isFinished) {
1150            if (currentDecodingRound > decodingRoundThreshold) {
1151                throw new SecurityException(String.format("Decoding round threshold of %s reached!", decodingRoundThreshold));
1152            }
1153            currentRoundData = URLDecoder.decode(currentRoundData, charset);
1154            isFinished = currentRoundData.equals(previousRoundData);
1155            previousRoundData = currentRoundData;
1156            currentDecodingRound++;
1157        }
1158        return currentRoundData;
1159    }
1160
1161    /**
1162     * Apply a collection of validations on a string expected to be an system file/folder path:
1163     * <ul>
1164     * <li>Does not contains path traversal payload.</li>
1165     * <li>The canonical path is equals to the absolute path.</li>
1166     * </ul><br>
1167     *
1168     * @param path String expected to be a valid system file/folder path.
1169     * @return True only if the string pass all validations.
1170     * @see "https://portswigger.net/web-security/file-path-traversal"
1171     * @see "https://learn.snyk.io/lesson/directory-traversal/"
1172     * @see "https://capec.mitre.org/data/definitions/126.html"
1173     * @see "https://owasp.org/www-community/attacks/Path_Traversal"
1174     */
1175    public static boolean isPathSafe(String path) {
1176        boolean isSafe = false;
1177        int decodingRoundThreshold = 3;
1178        try {
1179            if (path != null && !path.isEmpty()) {
1180                //URL decode the path if case of data coming from a web context
1181                String decodedPath = applyURLDecoding(path, decodingRoundThreshold);
1182                //Ensure that no path traversal expression is present
1183                if (!decodedPath.contains("..")) {
1184                    File f = new File(decodedPath);
1185                    String canonicalPath = f.getCanonicalPath();
1186                    String absolutePath = f.getAbsolutePath();
1187                    isSafe = canonicalPath.equals(absolutePath);
1188                }
1189            }
1190        } catch (Exception e) {
1191            isSafe = false;
1192        }
1193        return isSafe;
1194    }
1195
1196    /**
1197     * Identify if an XML contains any XML comments or have any XSL processing instructions.<br>
1198     * Stream reader based parsing is used to support large XML tree.
1199     *
1200     * @param xmlFilePath Filename of the XML file to check.
1201     * @return True only if XML comments or XSL processing instructions are identified.
1202     * @see "https://www.tutorialspoint.com/xml/xml_processing.htm"
1203     * @see "https://docs.oracle.com/en/java/javase/21/docs/api/java.xml/javax/xml/stream/XMLInputFactory.html"
1204     * @see "https://portswigger.net/kb/issues/00400700_xml-entity-expansion"
1205     * @see "https://www.w3.org/Style/styling-XML.en.html"
1206     */
1207    public static boolean isXMLHaveCommentsOrXSLProcessingInstructions(String xmlFilePath) {
1208        boolean itemsDetected = false;
1209        try {
1210            //Ensure that the parser will not be prone XML external entity (XXE) injection or XML entity expansion (XEE) attacks
1211            XMLInputFactory xmlInputFactory = XMLInputFactory.newFactory();
1212            xmlInputFactory.setProperty(XMLInputFactory.SUPPORT_DTD, false);
1213            xmlInputFactory.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, "");
1214            xmlInputFactory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, false);
1215            xmlInputFactory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false);
1216
1217            //Parse file
1218            try (FileInputStream fis = new FileInputStream(xmlFilePath)) {
1219                XMLStreamReader reader = xmlInputFactory.createXMLStreamReader(fis);
1220                int eventType;
1221                while (reader.hasNext() && !itemsDetected) {
1222                    eventType = reader.next();
1223                    if (eventType == XMLEvent.COMMENT) {
1224                        itemsDetected = true;
1225                    } else if (eventType == XMLEvent.PROCESSING_INSTRUCTION && "xml-stylesheet".equalsIgnoreCase(reader.getPITarget())) {
1226                        itemsDetected = true;
1227                    }
1228                }
1229            }
1230        } catch (Exception e) {
1231            //In case of error then assume that the check failed
1232            itemsDetected = true;
1233        }
1234        return itemsDetected;
1235    }
1236
1237
1238    /**
1239     * Perform a set of additional validations against a JWT token:
1240     * <ul>
1241     *     <li>Do not use the <b>NONE</b> signature algorithm.</li>
1242     *     <li>Have a <a href="https://www.iana.org/assignments/jwt/jwt.xhtml">EXP claim</a> defined.</li>
1243     *     <li>The token identifier (<a href="https://www.iana.org/assignments/jwt/jwt.xhtml">JTI claim</a>) is NOT part of the list of revoked token.</li>
1244     *     <li>Match the expected type of token: ACCESS or ID or REFRESH.</li>
1245     * </ul>
1246     *
1247     * @param token               JWT token for which <b>signature was already validated</b> and on which a set of additional validations will be applied.
1248     * @param expectedTokenType   The type of expected token using the enumeration provided.
1249     * @param revokedTokenJTIList A list of token identifier (<b>JTI</b> claim) referring to tokens that were revoked and to which the JTI claim of the token will be compared to.
1250     * @return True only the token pass all the validations.
1251     * @see "https://www.iana.org/assignments/jwt/jwt.xhtml"
1252     * @see "https://auth0.com/docs/secure/tokens/access-tokens"
1253     * @see "https://auth0.com/docs/secure/tokens/id-tokens"
1254     * @see "https://auth0.com/docs/secure/tokens/refresh-tokens"
1255     * @see "https://auth0.com/blog/id-token-access-token-what-is-the-difference/"
1256     * @see "https://jwt.io/libraries?language=Java"
1257     * @see "https://pentesterlab.com/blog/secure-jwt-library-design"
1258     * @see "https://github.com/auth0/java-jwt"
1259     */
1260    public static boolean applyJWTExtraValidation(DecodedJWT token, TokenType expectedTokenType, List<String> revokedTokenJTIList) {
1261        boolean isValid = false;
1262        TokenType tokenType;
1263        try {
1264            if (!"none".equalsIgnoreCase(token.getAlgorithm().trim())) {
1265                if (!token.getClaim("exp").isMissing() && token.getExpiresAt() != null) {
1266                    String jti = token.getId();
1267                    if (jti != null && !jti.trim().isEmpty()) {
1268                        boolean jtiIsRevoked = revokedTokenJTIList.stream().anyMatch(jti::equalsIgnoreCase);
1269                        if (!jtiIsRevoked) {
1270                            //Determine the token type based on the presence of specifics claims
1271                            if (!token.getClaim("scope").isMissing()) {
1272                                tokenType = TokenType.ACCESS;
1273                            } else if (!token.getClaim("name").isMissing() || !token.getClaim("email").isMissing()) {
1274                                tokenType = TokenType.ID;
1275                            } else {
1276                                tokenType = TokenType.REFRESH;
1277                            }
1278                            isValid = (tokenType.equals(expectedTokenType));
1279                        }
1280                    }
1281                }
1282            }
1283
1284        } catch (Exception e) {
1285            //In case of error then assume that the check failed
1286            isValid = false;
1287        }
1288
1289        return isValid;
1290    }
1291}