001package eu.righettod;
002
003
004import com.auth0.jwt.interfaces.DecodedJWT;
005import org.apache.commons.csv.CSVFormat;
006import org.apache.commons.csv.CSVRecord;
007import org.apache.commons.imaging.ImageInfo;
008import org.apache.commons.imaging.Imaging;
009import org.apache.commons.imaging.common.ImageMetadata;
010import org.apache.commons.validator.routines.EmailValidator;
011import org.apache.commons.validator.routines.InetAddressValidator;
012import org.apache.pdfbox.Loader;
013import org.apache.pdfbox.pdmodel.PDDocument;
014import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
015import org.apache.pdfbox.pdmodel.PDDocumentInformation;
016import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary;
017import org.apache.pdfbox.pdmodel.common.PDMetadata;
018import org.apache.pdfbox.pdmodel.interactive.action.*;
019import org.apache.pdfbox.pdmodel.interactive.annotation.AnnotationFilter;
020import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
021import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationLink;
022import org.apache.poi.poifs.filesystem.DirectoryEntry;
023import org.apache.poi.poifs.filesystem.POIFSFileSystem;
024import org.apache.poi.poifs.macros.VBAMacroReader;
025import org.apache.tika.detect.DefaultDetector;
026import org.apache.tika.detect.Detector;
027import org.apache.tika.io.TemporaryResources;
028import org.apache.tika.io.TikaInputStream;
029import org.apache.tika.metadata.Metadata;
030import org.apache.tika.mime.MediaType;
031import org.apache.tika.mime.MimeTypes;
032import org.w3c.dom.Document;
033import org.xml.sax.EntityResolver;
034import org.xml.sax.InputSource;
035import org.xml.sax.SAXException;
036
037import javax.crypto.Mac;
038import javax.crypto.spec.SecretKeySpec;
039import javax.imageio.ImageIO;
040import javax.json.Json;
041import javax.json.JsonReader;
042import javax.xml.XMLConstants;
043import javax.xml.parsers.DocumentBuilder;
044import javax.xml.parsers.DocumentBuilderFactory;
045import javax.xml.parsers.ParserConfigurationException;
046import javax.xml.stream.XMLInputFactory;
047import javax.xml.stream.XMLStreamReader;
048import javax.xml.stream.events.XMLEvent;
049import java.awt.*;
050import java.awt.image.BufferedImage;
051import java.io.*;
052import java.net.*;
053import java.net.http.HttpClient;
054import java.net.http.HttpRequest;
055import java.net.http.HttpResponse;
056import java.nio.ByteBuffer;
057import java.nio.charset.Charset;
058import java.nio.charset.StandardCharsets;
059import java.nio.file.Files;
060import java.security.MessageDigest;
061import java.security.SecureRandom;
062import java.time.Duration;
063import java.util.*;
064import java.util.List;
065import java.util.concurrent.*;
066import java.util.concurrent.atomic.AtomicInteger;
067import java.util.regex.Pattern;
068import java.util.zip.ZipEntry;
069import java.util.zip.ZipFile;
070
071/**
072 * Provides different utilities methods to apply processing from a security perspective.<br>
073 * These code snippet:
074 * <ul>
075 *     <li>Can be used, as "foundation", to customize the validation to the app context.</li>
076 *     <li>Were implemented in a way to facilitate adding or removal of validations depending on usage context.</li>
077 *     <li>Were centralized on one class to be able to enhance them across time as well as <a href="https://github.com/righettod/code-snippets-security-utils/issues">missing case/bug identification</a>.</li>
078 * </ul>
079 */
080public class SecurityUtils {
081    /**
082     * Default constructor: Not needed as the class only provides static methods.
083     */
084    private SecurityUtils() {
085    }
086
087    /**
088     * Apply a collection of validation to verify if a provided PIN code is considered weak (easy to guess) or none.<br>
089     * This method consider that format of the PIN code is [0-9]{6,}<br>
090     * Rule to consider a PIN code as weak:
091     * <ul>
092     * <li>Length is inferior to 6 positions.</li>
093     * <li>Contain only the same number or only a sequence of zero.</li>
094     * <li>Contain sequence of following incremental or decremental numbers.</li>
095     * </ul>
096     *
097     * @param pinCode PIN code to verify.
098     * @return True only if the PIN is considered as weak.
099     */
100    public static boolean isWeakPINCode(String pinCode) {
101        boolean isWeak = true;
102        //Length is inferior to 6 positions
103        //Use "Long.parseLong(pinCode)" to cause a NumberFormatException if the PIN is not a numeric one
104        //and to ensure that the PIN is not only a sequence of zero
105        if (pinCode != null && Long.parseLong(pinCode) > 0 && pinCode.trim().length() > 5) {
106            //Contain only the same number
107            String regex = String.format("^[%s]{%s}$", pinCode.charAt(0), pinCode.length());
108            if (!Pattern.matches(regex, pinCode)) {
109                //Contain sequence of following incremental or decremental numbers
110                char previousChar = 'X';
111                boolean containSequence = false;
112                for (char c : pinCode.toCharArray()) {
113                    if (previousChar != 'X') {
114                        int previousNbr = Integer.parseInt(String.valueOf(previousChar));
115                        int currentNbr = Integer.parseInt(String.valueOf(c));
116                        if (currentNbr == (previousNbr - 1) || currentNbr == (previousNbr + 1)) {
117                            containSequence = true;
118                            break;
119                        }
120                    }
121                    previousChar = c;
122                }
123                if (!containSequence) {
124                    isWeak = false;
125                }
126            }
127        }
128        return isWeak;
129    }
130
131    /**
132     * Apply a collection of validations on a Word 97-2003 (binary format) document file provided:
133     * <ul>
134     * <li>Real Microsoft Word 97-2003 document file.</li>
135     * <li>No VBA Macro.<br></li>
136     * <li>No embedded objects.</li>
137     * </ul>
138     *
139     * @param wordFilePath Filename of the Word document file to check.
140     * @return True only if the file pass all validations.
141     * @see "https://poi.apache.org/components/"
142     * @see "https://poi.apache.org/components/document/"
143     * @see "https://poi.apache.org/components/poifs/how-to.html"
144     * @see "https://poi.apache.org/components/poifs/embeded.html"
145     * @see "https://poi.apache.org/"
146     * @see "https://mvnrepository.com/artifact/org.apache.poi/poi"
147     */
148    public static boolean isWord972003DocumentSafe(String wordFilePath) {
149        boolean isSafe = false;
150        try {
151            File wordFile = new File(wordFilePath);
152            if (wordFile.exists() && wordFile.canRead() && wordFile.isFile()) {
153                //Step 1: Try to load the file, if its fail then it imply that is not a valid Word 97-2003 format file
154                try (POIFSFileSystem fs = new POIFSFileSystem(wordFile)) {
155                    //Step 2: Check if the document contains VBA macros, in our case is not allowed
156                    VBAMacroReader macroReader = new VBAMacroReader(fs);
157                    Map<String, String> macros = macroReader.readMacros();
158                    if (macros == null || macros.isEmpty()) {
159                        //Step 3: Check if the document contains any embedded objects, in our case is not allowed
160                        //From POI documentation:
161                        //Word normally stores embedded files in subdirectories of the ObjectPool directory, itself a subdirectory of the filesystem root.
162                        //Typically, these subdirectories and named starting with an underscore, followed by 10 numbers.
163                        final List<String> embeddedObjectFound = new ArrayList<>();
164                        DirectoryEntry root = fs.getRoot();
165                        if (root.getEntryCount() > 0) {
166                            root.iterator().forEachRemaining(entry -> {
167                                if ("ObjectPool".equalsIgnoreCase(entry.getName()) && entry instanceof DirectoryEntry) {
168                                    DirectoryEntry objPoolDirectory = (DirectoryEntry) entry;
169                                    if (objPoolDirectory.getEntryCount() > 0) {
170                                        objPoolDirectory.iterator().forEachRemaining(objPoolDirectoryEntry -> {
171                                            if (objPoolDirectoryEntry instanceof DirectoryEntry) {
172                                                DirectoryEntry objPoolDirectoryEntrySubDirectoryEntry = (DirectoryEntry) objPoolDirectoryEntry;
173                                                if (objPoolDirectoryEntrySubDirectoryEntry.getEntryCount() > 0) {
174                                                    objPoolDirectoryEntrySubDirectoryEntry.forEach(objPoolDirectoryEntrySubDirectoryEntryEntry -> {
175                                                        if (objPoolDirectoryEntrySubDirectoryEntryEntry.isDocumentEntry()) {
176                                                            embeddedObjectFound.add(objPoolDirectoryEntrySubDirectoryEntryEntry.getName());
177                                                        }
178                                                    });
179                                                }
180                                            }
181                                        });
182                                    }
183                                }
184                            });
185                        }
186                        isSafe = embeddedObjectFound.isEmpty();
187                    }
188                }
189            }
190        } catch (Exception e) {
191            isSafe = false;
192        }
193        return isSafe;
194    }
195
196    /**
197     * Ensure that an XML file does not contain any External Entity, DTD or XInclude instructions.
198     *
199     * @param xmlFilePath Filename of the XML file to check.
200     * @return True only if the file pass all validations.
201     * @see "https://portswigger.net/web-security/xxe"
202     * @see "https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html#java"
203     * @see "https://docs.oracle.com/en/java/javase/13/security/java-api-xml-processing-jaxp-security-guide.html#GUID-82F8C206-F2DF-4204-9544-F96155B1D258"
204     * @see "https://www.w3.org/TR/xinclude-11/"
205     * @see "https://en.wikipedia.org/wiki/XInclude"
206     */
207    public static boolean isXMLSafe(String xmlFilePath) {
208        boolean isSafe = false;
209        try {
210            File xmlFile = new File(xmlFilePath);
211            if (xmlFile.exists() && xmlFile.canRead() && xmlFile.isFile()) {
212                //Step 1a: Verify that the XML file content does not contain any XInclude instructions
213                boolean containXInclude = Files.readAllLines(xmlFile.toPath()).stream().anyMatch(line -> line.toLowerCase(Locale.ROOT).contains(":include "));
214                if (!containXInclude) {
215                    //Step 1b: Parse the XML file, if an exception occur than it's imply that the XML specified is not a valid ones
216                    //Create an XML document builder throwing Exception if a DOCTYPE instruction is present
217                    DocumentBuilderFactory dbfInstance = DocumentBuilderFactory.newInstance();
218                    dbfInstance.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
219                    //Xerces 2 only
220                    //dbfInstance.setFeature("http://xerces.apache.org/xerces2-j/features.html#disallow-doctype-decl",true);
221                    dbfInstance.setXIncludeAware(false);
222                    DocumentBuilder builder = dbfInstance.newDocumentBuilder();
223                    //Parse the document
224                    Document doc = builder.parse(xmlFile);
225                    isSafe = (doc != null && doc.getDocumentElement() != null);
226                }
227            }
228        } catch (Exception e) {
229            isSafe = false;
230        }
231        return isSafe;
232    }
233
234
235    /**
236     * Extract all URL links from a PDF file provided.<br>
237     * This can be used to apply validation on a PDF against contained links.
238     *
239     * @param pdfFilePath pdfFilePath Filename of the PDF file to process.
240     * @return A List of URL objects that is empty if no links is found.
241     * @throws Exception If any error occurs during the processing of the PDF file.
242     * @see "https://www.gushiciku.cn/pl/21KQ"
243     * @see "https://pdfbox.apache.org/"
244     * @see "https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox"
245     */
246    public static List<URL> extractAllPDFLinks(String pdfFilePath) throws Exception {
247        final List<URL> links = new ArrayList<>();
248        File pdfFile = new File(pdfFilePath);
249        try (PDDocument document = Loader.loadPDF(pdfFile)) {
250            PDDocumentCatalog documentCatalog = document.getDocumentCatalog();
251            AnnotationFilter actionURIAnnotationFilter = new AnnotationFilter() {
252                @Override
253                public boolean accept(PDAnnotation annotation) {
254                    boolean keep = false;
255                    if (annotation instanceof PDAnnotationLink) {
256                        keep = (((PDAnnotationLink) annotation).getAction() instanceof PDActionURI);
257                    }
258                    return keep;
259                }
260            };
261            documentCatalog.getPages().forEach(page -> {
262                try {
263                    page.getAnnotations(actionURIAnnotationFilter).forEach(annotation -> {
264                        PDActionURI linkAnnotation = (PDActionURI) ((PDAnnotationLink) annotation).getAction();
265                        try {
266                            URL urlObj = new URL(linkAnnotation.getURI());
267                            if (!links.contains(urlObj)) {
268                                links.add(urlObj);
269                            }
270                        } catch (MalformedURLException e) {
271                            throw new RuntimeException(e);
272                        }
273                    });
274                } catch (Exception e) {
275                    throw new RuntimeException(e);
276                }
277            });
278        }
279        return links;
280    }
281
282    /**
283     * Apply a collection of validations on a PDF file provided:
284     * <ul>
285     * <li>Real PDF file.</li>
286     * <li>No attachments.</li>
287     * <li>No Javascript code.</li>
288     * <li>No links using action of type URI/Launch/RemoteGoTo/ImportData.</li>
289     * </ul>
290     *
291     * @param pdfFilePath Filename of the PDF file to check.
292     * @return True only if the file pass all validations.
293     * @see "https://stackoverflow.com/a/36161267"
294     * @see "https://www.gushiciku.cn/pl/21KQ"
295     * @see "https://github.com/jonaslejon/malicious-pdf"
296     * @see "https://pdfbox.apache.org/"
297     * @see "https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox"
298     */
299    public static boolean isPDFSafe(String pdfFilePath) {
300        boolean isSafe = false;
301        try {
302            File pdfFile = new File(pdfFilePath);
303            if (pdfFile.exists() && pdfFile.canRead() && pdfFile.isFile()) {
304                //Step 1: Try to load the file, if its fail then it imply that is not a valid PDF file
305                try (PDDocument document = Loader.loadPDF(pdfFile)) {
306                    //Step 2: Check if the file contains attached files, in our case is not allowed
307                    PDDocumentCatalog documentCatalog = document.getDocumentCatalog();
308                    PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary(documentCatalog);
309                    if (namesDictionary.getEmbeddedFiles() == null) {
310                        //Step 3: Check if the file contains Javascript code, in our case is not allowed
311                        if (namesDictionary.getJavaScript() == null) {
312                            //Step 4: Check if the file contains links using action of type URI/Launch/RemoteGoTo/ImportData, in our case is not allowed
313                            final List<Integer> notAllowedAnnotationCounterList = new ArrayList<>();
314                            AnnotationFilter notAllowedAnnotationFilter = new AnnotationFilter() {
315                                @Override
316                                public boolean accept(PDAnnotation annotation) {
317                                    boolean keep = false;
318                                    if (annotation instanceof PDAnnotationLink) {
319                                        PDAnnotationLink link = (PDAnnotationLink) annotation;
320                                        PDAction action = link.getAction();
321                                        if ((action instanceof PDActionURI) || (action instanceof PDActionLaunch) || (action instanceof PDActionRemoteGoTo) || (action instanceof PDActionImportData)) {
322                                            keep = true;
323                                        }
324                                    }
325                                    return keep;
326                                }
327                            };
328                            documentCatalog.getPages().forEach(page -> {
329                                try {
330                                    notAllowedAnnotationCounterList.add(page.getAnnotations(notAllowedAnnotationFilter).size());
331                                } catch (IOException e) {
332                                    throw new RuntimeException(e);
333                                }
334                            });
335                            if (notAllowedAnnotationCounterList.stream().reduce(0, Integer::sum) == 0) {
336                                isSafe = true;
337                            }
338                        }
339                    }
340                }
341            }
342        } catch (Exception e) {
343            isSafe = false;
344        }
345        return isSafe;
346    }
347
348    /**
349     * Remove as much as possible metadata from the provided PDF document object.
350     *
351     * @param document PDFBox PDF document object on which metadata must be removed.
352     * @see "https://gist.github.com/righettod/d7e07443c43d393a39de741a0d920069"
353     * @see "https://pdfbox.apache.org/"
354     * @see "https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox"
355     */
356    public static void clearPDFMetadata(PDDocument document) {
357        if (document != null) {
358            PDDocumentInformation infoEmpty = new PDDocumentInformation();
359            document.setDocumentInformation(infoEmpty);
360            PDMetadata newMetadataEmpty = new PDMetadata(document);
361            document.getDocumentCatalog().setMetadata(newMetadataEmpty);
362        }
363    }
364
365
366    /**
367     * Validate that the URL provided is really a relative URL.
368     *
369     * @param targetUrl URL to validate.
370     * @return True only if the file pass all validations.
371     * @see "https://portswigger.net/web-security/ssrf"
372     * @see "https://stackoverflow.com/q/6785442"
373     */
374    public static boolean isRelativeURL(String targetUrl) {
375        boolean isValid = false;
376        //Reject any URL encoded content and URL starting with a double slash
377        //Reject any URL contains credentials or fragment to prevent potential bypasses
378        String work = targetUrl;
379        if (!work.contains("%") && !work.contains("@") && !work.contains("#") && !work.startsWith("//")) {
380            //Creation of a URL object must fail
381            try {
382                new URL(work);
383                isValid = false;
384            } catch (MalformedURLException mf) {
385                //Last check to be sure (for prod usage compile the pattern one time)
386                isValid = Pattern.compile("^/[a-z0-9]+", Pattern.CASE_INSENSITIVE).matcher(work).find();
387            }
388        }
389        return isValid;
390    }
391
392    /**
393     * Apply a collection of validations on a ZIP file provided:
394     * <ul>
395     * <li>Real ZIP file.</li>
396     * <li>Contain less than a specified level of deepness.</li>
397     * <li>Do not contain Zip-Slip entry path.</li>
398     * </ul>
399     *
400     * @param zipFilePath       Filename of the ZIP file to check.
401     * @param maxLevelDeepness  Threshold of deepness above which a ZIP archive will be rejected.
402     * @param rejectArchiveFile Flag to specify if presence of any archive entry will cause the rejection of the ZIP file.
403     * @return True only if the file pass all validations.
404     * @see "https://rules.sonarsource.com/java/type/Security%20Hotspot/RSPEC-5042"
405     * @see "https://security.snyk.io/research/zip-slip-vulnerability"
406     * @see "https://en.wikipedia.org/wiki/Zip_bomb"
407     * @see "https://github.com/ptoomey3/evilarc"
408     * @see "https://github.com/abdulfatir/ZipBomb"
409     * @see "https://www.baeldung.com/cs/zip-bomb"
410     * @see "https://thesecurityvault.com/attacks-with-zip-files-and-mitigations/"
411     * @see "https://wiki.sei.cmu.edu/confluence/display/java/IDS04-J.+Safely+extract+files+from+ZipInputStream"
412     */
413    public static boolean isZIPSafe(String zipFilePath, int maxLevelDeepness, boolean rejectArchiveFile) {
414        List<String> archiveExtensions = Arrays.asList("zip", "tar", "7z", "gz", "jar", "phar", "bz2", "tgz");
415        boolean isSafe = false;
416        try {
417            File zipFile = new File(zipFilePath);
418            if (zipFile.exists() && zipFile.canRead() && zipFile.isFile() && maxLevelDeepness > 0) {
419                //Step 1: Try to load the file, if its fail then it imply that is not a valid ZIP file
420                try (ZipFile zipArch = new ZipFile(zipFile)) {
421                    //Step 2: Parse entries
422                    long deepness = 0;
423                    ZipEntry zipEntry;
424                    String entryExtension;
425                    String zipEntryName;
426                    boolean validationsFailed = false;
427                    Enumeration<? extends ZipEntry> entries = zipArch.entries();
428                    while (entries.hasMoreElements()) {
429                        zipEntry = entries.nextElement();
430                        zipEntryName = zipEntry.getName();
431                        entryExtension = zipEntryName.substring(zipEntryName.lastIndexOf(".") + 1).toLowerCase(Locale.ROOT).trim();
432                        //Step 2a: Check if the current entry is an archive file
433                        if (rejectArchiveFile && archiveExtensions.contains(entryExtension)) {
434                            validationsFailed = true;
435                            break;
436                        }
437                        //Step 2b: Check that level of deepness is inferior to the threshold specified
438                        if (zipEntryName.contains("/")) {
439                            //Determine deepness by inspecting the entry name.
440                            //Indeed, folder will be represented like this: folder/folder/folder/
441                            //So we can count the number of "/" to identify the deepness of the entry
442                            deepness = zipEntryName.chars().filter(ch -> ch == '/').count();
443                            if (deepness > maxLevelDeepness) {
444                                validationsFailed = true;
445                                break;
446                            }
447                        }
448                        //Step 2c: Check if any entries match pattern of zip slip payload
449                        if (zipEntryName.contains("..\\") || zipEntryName.contains("../")) {
450                            validationsFailed = true;
451                            break;
452                        }
453                    }
454                    if (!validationsFailed) {
455                        isSafe = true;
456                    }
457                }
458            }
459        } catch (Exception e) {
460            isSafe = false;
461        }
462        return isSafe;
463    }
464
465    /**
466     * Identify the mime type of the content specified (array of bytes).<br>
467     * Note that it cannot be fully trusted (see the tweet '1595824709186519041' referenced), so, additional validations are required.
468     *
469     * @param content The content as an array of bytes.
470     * @return The mime type in lower case or null if it cannot be identified.
471     * @see "https://twitter.com/righettod/status/1595824709186519041"
472     * @see "https://tika.apache.org/"
473     * @see "https://mvnrepository.com/artifact/org.apache.tika/tika-core"
474     * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types"
475     * @see "https://www.iana.org/assignments/media-types/media-types.xhtml"
476     */
477    public static String identifyMimeType(byte[] content) {
478        String mimeType = null;
479        if (content != null && content.length > 0) {
480            Detector detector = new DefaultDetector(MimeTypes.getDefaultMimeTypes());
481            Metadata metadata = new Metadata();
482            try {
483                try (TemporaryResources temporaryResources = new TemporaryResources(); TikaInputStream tikaInputStream = TikaInputStream.get(new ByteArrayInputStream(content), temporaryResources, metadata)) {
484                    MediaType mt = detector.detect(tikaInputStream, metadata);
485                    if (mt != null) {
486                        mimeType = mt.toString().toLowerCase(Locale.ROOT);
487                    }
488                }
489            } catch (IOException ioe) {
490                mimeType = null;
491            }
492        }
493        return mimeType;
494    }
495
496    /**
497     * Apply a collection of validations on a string expected to be an public IP address:
498     * <ul>
499     * <li>Is a valid IP v4 or v6 address.</li>
500     * <li>Is public from an Internet perspective.</li>
501     * </ul>
502     * <br>
503     * <b>Note:</b> I often see missing such validation in the value read from HTTP request headers like "X-Forwarded-For" or "Forwarded".
504     * <br><br>
505     * <b>Note for IPv6:</b> I used documentation found so it is really experimental!
506     *
507     * @param ip String expected to be a valid IP address.
508     * @return True only if the string pass all validations.
509     * @see "https://commons.apache.org/proper/commons-validator/"
510     * @see "https://commons.apache.org/proper/commons-validator/apidocs/org/apache/commons/validator/routines/InetAddressValidator.html"
511     * @see "https://cheatsheetseries.owasp.org/cheatsheets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet.html"
512     * @see "https://cheatsheetseries.owasp.org/assets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet_Orange_Tsai_Talk.pdf"
513     * @see "https://cheatsheetseries.owasp.org/assets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet_SSRF_Bible.pdf"
514     * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For"
515     * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Forwarded"
516     * @see "https://ipcisco.com/lesson/ipv6-address/"
517     * @see "https://www.juniper.net/documentation/us/en/software/junos/interfaces-security-devices/topics/topic-map/security-interface-ipv4-ipv6-protocol.html"
518     * @see "https://docs.oracle.com/en/java/javase/21/docs/api/java.base/java/net/InetAddress.html#getByName(java.lang.String)"
519     * @see "https://www.arin.net/reference/research/statistics/address_filters/"
520     * @see "https://en.wikipedia.org/wiki/Multicast_address"
521     * @see "https://stackoverflow.com/a/5619409"
522     * @see "https://www.ripe.net/media/documents/ipv6-address-types.pdf"
523     * @see "https://www.iana.org/assignments/ipv6-unicast-address-assignments/ipv6-unicast-address-assignments.xhtml"
524     * @see "https://developer.android.com/reference/java/net/Inet6Address"
525     * @see "https://en.wikipedia.org/wiki/Unique_local_address"
526     */
527    public static boolean isPublicIPAddress(String ip) {
528        boolean isValid = false;
529        try {
530            //Quick validation on the string itself based on characters used to compose an IP v4/v6 address
531            if (Pattern.matches("[0-9a-fA-F:.]+", ip)) {
532                //If OK then use the dedicated InetAddressValidator from Apache Commons Validator
533                if (InetAddressValidator.getInstance().isValid(ip)) {
534                    //If OK then validate that is an public IP address
535                    //From Javadoc for "InetAddress.getByName": If a literal IP address is supplied, only the validity of the address format is checked.
536                    InetAddress addr = InetAddress.getByName(ip);
537                    isValid = (!addr.isAnyLocalAddress() && !addr.isLinkLocalAddress() && !addr.isLoopbackAddress() && !addr.isMulticastAddress() && !addr.isSiteLocalAddress());
538                    //If OK and the IP is an V6 one then make additional validation because the built-in Java API will let pass some V6 IP
539                    //For the prefix map, the start of the key indicates if the value is a regex or a string
540                    if (isValid && (addr instanceof Inet6Address)) {
541                        Map<String, String> prefixes = new HashMap<>();
542                        prefixes.put("REGEX_LOOPBACK", "^(0|:)+1$");
543                        prefixes.put("REGEX_UNIQUE-LOCAL-ADDRESSES", "^f(c|d)[a-f0-9]{2}:.*$");
544                        prefixes.put("STRING_LINK-LOCAL-ADDRESSES", "fe80:");
545                        prefixes.put("REGEX_TEREDO", "^2001:[0]*:.*$");
546                        prefixes.put("REGEX_BENCHMARKING", "^2001:[0]*2:.*$");
547                        prefixes.put("REGEX_ORCHID", "^2001:[0]*10:.*$");
548                        prefixes.put("STRING_DOCUMENTATION", "2001:db8:");
549                        prefixes.put("STRING_GLOBAL-UNICAST", "2000:");
550                        prefixes.put("REGEX_MULTICAST", "^ff[0-9]{2}:.*$");
551                        final List<Boolean> results = new ArrayList<>();
552                        final String ipLower = ip.trim().toLowerCase(Locale.ROOT);
553                        prefixes.forEach((addressType, expr) -> {
554                            String exprLower = expr.trim().toLowerCase();
555                            if (addressType.startsWith("STRING_")) {
556                                results.add(ipLower.startsWith(exprLower));
557                            } else {
558                                results.add(Pattern.matches(exprLower, ipLower));
559                            }
560                        });
561                        isValid = ((results.size() == prefixes.size()) && !results.contains(Boolean.TRUE));
562                    }
563                }
564            }
565        } catch (Exception e) {
566            isValid = false;
567        }
568        return isValid;
569    }
570
571    /**
572     * Compute a SHA256 hash from an input composed of a collection of strings.<br><br>
573     * This method take care to build the source string in a way to prevent this source string to be prone to abuse targeting the different parts composing it.<br><br>
574     * <p>
575     * Example of possible abuse without precautions applied during the hash calculation logic:<br>
576     * Hash of <code>SHA256("Hello", "My", "World!!!")</code> will be equals to the hash of <code>SHA256("Hell", "oMyW", "orld!!!")</code>.<br>
577     * </p>
578     * This method ensure that both hash above will be different.<br><br>
579     *
580     * <b>Note:</b> The character <code>|</code> is used, as separator, of every parts so a part is not allowed to contains this character.
581     *
582     * @param parts Ordered list of strings to use to build the input string for which the hash must be computed on. No null value is accepted on object composing the collection.
583     * @return The hash, as an array of bytes, to allow caller to convert it to the final representation wanted (HEX, Base64, etc.). If the collection passed is null or empty then the method return null.
584     * @throws Exception If any exception occurs
585     * @see "https://github.com/righettod/code-snippets-security-utils/issues/16"
586     * @see "https://pentesterlab.com/badges/codereview"
587     * @see "https://blog.trailofbits.com/2024/08/21/yolo-is-not-a-valid-hash-construction/"
588     * @see "https://www.nist.gov/publications/sha-3-derived-functions-cshake-kmac-tuplehash-and-parallelhash"
589     */
590    public static byte[] computeHashNoProneToAbuseOnParts(List<String> parts) throws Exception {
591        byte[] hash = null;
592        String separator = "|";
593        if (parts != null && !parts.isEmpty()) {
594            //Ensure that not part is null
595            if (parts.stream().anyMatch(Objects::isNull)) {
596                throw new IllegalArgumentException("No part must be null!");
597            }
598            //Ensure that the separator is absent from every part
599            if (parts.stream().anyMatch(part -> part.contains(separator))) {
600                throw new IllegalArgumentException(String.format("The character '%s', used as parts separator, must be absent from every parts!", separator));
601            }
602            MessageDigest digest = MessageDigest.getInstance("SHA-256");
603            final StringBuilder buffer = new StringBuilder(separator);
604            parts.forEach(p -> {
605                buffer.append(p).append(separator);
606            });
607            hash = digest.digest(buffer.toString().getBytes(StandardCharsets.UTF_8));
608        }
609        return hash;
610    }
611
612    /**
613     * Ensure that an XML file only uses DTD/XSD references (called System Identifier) present in the allowed list provided.<br><br>
614     * The code is based on the validation implemented into the OpenJDK 21, by the class <b><a href="https://github.com/openjdk/jdk/blob/jdk-21%2B35/src/java.prefs/share/classes/java/util/prefs/XmlSupport.java">java.util.prefs.XmlSupport</a></b>, in the method <b><a href="https://github.com/openjdk/jdk/blob/jdk-21%2B35/src/java.prefs/share/classes/java/util/prefs/XmlSupport.java#L240">loadPrefsDoc()</a></b>.<br><br>
615     * The method also ensure that no Public Identifier is used to prevent potential bypasses of the validations.
616     *
617     * @param xmlFilePath              Filename of the XML file to check.
618     * @param allowedSystemIdentifiers List of URL allowed for System Identifier specified for any XSD/DTD references.
619     * @return True only if the file pass all validations.
620     * @see "https://www.w3schools.com/xml/prop_documenttype_systemid.asp"
621     * @see "https://www.ibm.com/docs/en/integration-bus/9.0.0?topic=doctypedecl-xml-systemid"
622     * @see "https://www.liquid-technologies.com/Reference/Glossary/XML_DocType.html"
623     * @see "https://www.xml.com/pub/98/08/xmlqna0.html"
624     * @see "https://github.com/openjdk/jdk/blob/jdk-21%2B35/src/java.prefs/share/classes/java/util/prefs/XmlSupport.java#L397"
625     * @see "https://en.wikipedia.org/wiki/Formal_Public_Identifier"
626     */
627    public static boolean isXMLOnlyUseAllowedXSDorDTD(String xmlFilePath, final List<String> allowedSystemIdentifiers) {
628        boolean isSafe = false;
629        final String errorTemplate = "Non allowed %s ID detected!";
630        final String emptyFakeDTD = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><!ELEMENT dummy EMPTY>";
631        final String emptyFakeXSD = "<xs:schema xmlns:xs=\"http://www.w3.org/2001/XMLSchema\"> <xs:element name=\"dummy\"/></xs:schema>";
632
633        if (allowedSystemIdentifiers == null || allowedSystemIdentifiers.isEmpty()) {
634            throw new IllegalArgumentException("At least one SID must be specified!");
635        }
636        File xmlFile = new File(xmlFilePath);
637        if (xmlFile.exists() && xmlFile.canRead() && xmlFile.isFile()) {
638            try {
639                EntityResolver resolverValidator = (publicId, systemId) -> {
640                    if (publicId != null) {
641                        throw new SAXException(String.format(errorTemplate, "PUBLIC"));
642                    }
643                    if (!allowedSystemIdentifiers.contains(systemId)) {
644                        throw new SAXException(String.format(errorTemplate, "SYSTEM"));
645                    }
646                    //If it is OK then return a empty DTD/XSD
647                    return new InputSource(new StringReader(systemId.toLowerCase().endsWith(".dtd") ? emptyFakeDTD : emptyFakeXSD));
648                };
649                DocumentBuilderFactory dbfInstance = DocumentBuilderFactory.newInstance();
650                dbfInstance.setIgnoringElementContentWhitespace(true);
651                dbfInstance.setXIncludeAware(false);
652                dbfInstance.setValidating(false);
653                dbfInstance.setCoalescing(true);
654                dbfInstance.setIgnoringComments(false);
655                DocumentBuilder builder = dbfInstance.newDocumentBuilder();
656                builder.setEntityResolver(resolverValidator);
657                Document doc = builder.parse(xmlFile);
658                isSafe = (doc != null);
659            } catch (SAXException | IOException | ParserConfigurationException e) {
660                isSafe = false;
661            }
662        }
663
664        return isSafe;
665    }
666
667    /**
668     * Apply a collection of validations on a EXCEL CSV file provided (file was expected to be opened in Microsoft EXCEL):
669     * <ul>
670     * <li>Real CSV file.</li>
671     * <li>Do not contains any payload related to a CSV injections.</li>
672     * </ul>
673     * Ensure that, if Apache Commons CSV does not find any record then, the file will be considered as NOT safe (prevent potential bypasses).<br><br>
674     * <b>Note:</b> Record delimiter used is the <code>,</code> (comma) character. See the Apache Commons CSV reference provided for EXCEL.<br>
675     *
676     * @param csvFilePath Filename of the CSV file to check.
677     * @return True only if the file pass all validations.
678     * @see "https://commons.apache.org/proper/commons-csv/"
679     * @see "https://commons.apache.org/proper/commons-csv/apidocs/org/apache/commons/csv/CSVFormat.html#EXCEL"
680     * @see "https://www.we45.com/post/your-excel-sheets-are-not-safe-heres-how-to-beat-csv-injection"
681     * @see "https://www.whiteoaksecurity.com/blog/2020-4-23-csv-injection-whats-the-risk/"
682     * @see "https://book.hacktricks.xyz/pentesting-web/formula-csv-doc-latex-ghostscript-injection"
683     * @see "https://owasp.org/www-community/attacks/CSV_Injection"
684     * @see "https://payatu.com/blog/csv-injection-basic-to-exploit/"
685     * @see "https://cwe.mitre.org/data/definitions/1236.html"
686     */
687    public static boolean isExcelCSVSafe(String csvFilePath) {
688        boolean isSafe;
689        final AtomicInteger recordCount = new AtomicInteger();
690        final List<Character> payloadDetectionCharacters = List.of('=', '+', '@', '-', '\r', '\t');
691
692        try {
693            final List<String> payloadsIdentified = new ArrayList<>();
694            try (Reader in = new FileReader(csvFilePath)) {
695                Iterable<CSVRecord> records = CSVFormat.EXCEL.parse(in);
696                records.forEach(record -> {
697                    record.forEach(recordValue -> {
698                        if (recordValue != null && !recordValue.trim().isEmpty() && payloadDetectionCharacters.contains(recordValue.trim().charAt(0))) {
699                            payloadsIdentified.add(recordValue);
700                        }
701                        recordCount.getAndIncrement();
702                    });
703                });
704            }
705            isSafe = (payloadsIdentified.isEmpty() && recordCount.get() > 0);
706        } catch (Exception e) {
707            isSafe = false;
708        }
709
710        return isSafe;
711    }
712
713    /**
714     * Provide a way to add an integrity marker (<a href="https://en.wikipedia.org/wiki/HMAC">HMAC</a>) to a serialized object serialized using the <a href="https://www.baeldung.com/java-serialization">java native system</a> (binary).<br>
715     * The goal is to provide <b>a temporary workaround</b> to try to prevent deserialization attacks and give time to move to a text-based serialization approach.
716     *
717     * @param processingMode Define the mode of processing i.e. protect or validate. ({@link eu.righettod.ProcessingMode})
718     * @param input          When the processing mode is "protect" than the expected input (string) is a java serialized object encoded in Base64 otherwise (processing mode is "validate") expected input is the output of this method when the "protect" mode was used.
719     * @param secret         Secret to use to compute the SHA256 HMAC.
720     * @return A map with the following keys: <ul><li><b>PROCESSING_MODE</b>: Processing mode used to compute the result.</li><li><b>STATUS</b>: A boolean indicating if the processing was successful or not.</li><li><b>RESULT</b>: Always contains a string representing the protected serialized object in the format <code>[SERIALIZED_OBJECT_BASE64_ENCODED]:[SERIALIZED_OBJECT_HMAC_BASE64_ENCODED]</code>.</li></ul>
721     * @throws Exception If any exception occurs.
722     * @see "https://cheatsheetseries.owasp.org/cheatsheets/Deserialization_Cheat_Sheet.html"
723     * @see "https://owasp.org/www-project-top-ten/2017/A8_2017-Insecure_Deserialization"
724     * @see "https://portswigger.net/web-security/deserialization"
725     * @see "https://www.baeldung.com/java-serialization-approaches"
726     * @see "https://www.baeldung.com/java-serialization"
727     * @see "https://cryptobook.nakov.com/mac-and-key-derivation/hmac-and-key-derivation"
728     * @see "https://en.wikipedia.org/wiki/HMAC"
729     * @see "https://smattme.com/posts/how-to-generate-hmac-signature-in-java/"
730     */
731    public static Map<String, Object> ensureSerializedObjectIntegrity(ProcessingMode processingMode, String input, byte[] secret) throws Exception {
732        Map<String, Object> results;
733        String resultFormatTemplate = "%s:%s";
734        //Verify input provided to be consistent
735        if (processingMode == null) {
736            throw new IllegalArgumentException("The processing mode is mandatory!");
737        }
738        if (input == null || input.trim().isEmpty()) {
739            throw new IllegalArgumentException("Input data is mandatory!");
740        }
741        if (secret == null || secret.length == 0) {
742            throw new IllegalArgumentException("The HMAC secret is mandatory!");
743        }
744        if (processingMode.equals(ProcessingMode.VALIDATE) && input.split(":").length != 2) {
745            throw new IllegalArgumentException("Input data provided is invalid for the processing mode specified!");
746        }
747        //Processing
748        Base64.Decoder b64Decoder = Base64.getDecoder();
749        Base64.Encoder b64Encoder = Base64.getEncoder();
750        String hmacAlgorithm = "HmacSHA256";
751        Mac mac = Mac.getInstance(hmacAlgorithm);
752        SecretKeySpec key = new SecretKeySpec(secret, hmacAlgorithm);
753        mac.init(key);
754        results = new HashMap<>();
755        results.put("PROCESSING_MODE", processingMode.toString());
756        switch (processingMode) {
757            case PROTECT -> {
758                byte[] objectBytes = b64Decoder.decode(input);
759                byte[] hmac = mac.doFinal(objectBytes);
760                String encodedHmac = b64Encoder.encodeToString(hmac);
761                results.put("STATUS", Boolean.TRUE);
762                results.put("RESULT", String.format(resultFormatTemplate, input, encodedHmac));
763            }
764            case VALIDATE -> {
765                String[] parts = input.split(":");
766                byte[] objectBytes = b64Decoder.decode(parts[0].trim());
767                byte[] hmacProvided = b64Decoder.decode(parts[1].trim());
768                byte[] hmacComputed = mac.doFinal(objectBytes);
769                String encodedHmacComputed = b64Encoder.encodeToString(hmacComputed);
770                Boolean hmacIsValid = Arrays.equals(hmacProvided, hmacComputed);
771                results.put("STATUS", hmacIsValid);
772                results.put("RESULT", String.format(resultFormatTemplate, parts[0].trim(), encodedHmacComputed));
773            }
774            default -> throw new IllegalArgumentException("Not supported processing mode!");
775        }
776        return results;
777    }
778
779    /**
780     * Apply a collection of validations on a JSON string provided:
781     * <ul>
782     * <li>Real JSON structure.</li>
783     * <li>Contain less than a specified number of deepness for nested objects or arrays.</li>
784     * <li>Contain less than a specified number of items in any arrays.</li>
785     * </ul>
786     * <br>
787     * <b>Note:</b> I decided to use a parsing approach using only string processing to prevent any StackOverFlow or OutOfMemory error that can be abused.<br><br>
788     * I used the following assumption:
789     * <ul>
790     *      <li>The character <code>{</code> identify the beginning of an object.</li>
791     *      <li>The character <code>}</code> identify the end of an object.</li>
792     *      <li>The character <code>[</code> identify the beginning of an array.</li>
793     *      <li>The character <code>]</code> identify the end of an array.</li>
794     *      <li>The character <code>"</code> identify the delimiter of a string.</li>
795     *      <li>The character sequence <code>\"</code> identify the escaping of an double quote.</li>
796     * </ul>
797     *
798     * @param json                  String containing the JSON data to validate.
799     * @param maxItemsByArraysCount Maximum number of items allowed in an array.
800     * @param maxDeepnessAllowed    Maximum number nested objects or arrays allowed.
801     * @return True only if the string pass all validations.
802     * @see "https://javaee.github.io/jsonp/"
803     * @see "https://community.f5.com/discussions/technicalforum/disable-buffer-overflow-in-json-parameters/124306"
804     * @see "https://github.com/InductiveComputerScience/pbJson/issues/2"
805     */
806    public static boolean isJSONSafe(String json, int maxItemsByArraysCount, int maxDeepnessAllowed) {
807        boolean isSafe = false;
808
809        try {
810            //Step 1: Analyse the JSON string
811            int currentDeepness = 0;
812            int currentArrayItemsCount = 0;
813            int maxDeepnessReached = 0;
814            int maxArrayItemsCountReached = 0;
815            boolean currentlyInArray = false;
816            boolean currentlyInString = false;
817            int currentNestedArrayLevel = 0;
818            String jsonEscapedDoubleQuote = "\\\"";//Escaped double quote must not be considered as a string delimiter
819            String work = json.replace(jsonEscapedDoubleQuote, "'");
820            for (char c : work.toCharArray()) {
821                switch (c) {
822                    case '{': {
823                        if (!currentlyInString) {
824                            currentDeepness++;
825                        }
826                        break;
827                    }
828                    case '}': {
829                        if (!currentlyInString) {
830                            currentDeepness--;
831                        }
832                        break;
833                    }
834                    case '[': {
835                        if (!currentlyInString) {
836                            currentDeepness++;
837                            if (currentlyInArray) {
838                                currentNestedArrayLevel++;
839                            }
840                            currentlyInArray = true;
841                        }
842                        break;
843                    }
844                    case ']': {
845                        if (!currentlyInString) {
846                            currentDeepness--;
847                            currentArrayItemsCount = 0;
848                            if (currentNestedArrayLevel > 0) {
849                                currentNestedArrayLevel--;
850                            }
851                            if (currentNestedArrayLevel == 0) {
852                                currentlyInArray = false;
853                            }
854                        }
855                        break;
856                    }
857                    case '"': {
858                        currentlyInString = !currentlyInString;
859                        break;
860                    }
861                    case ',': {
862                        if (!currentlyInString && currentlyInArray) {
863                            currentArrayItemsCount++;
864                        }
865                        break;
866                    }
867                }
868                if (currentDeepness > maxDeepnessReached) {
869                    maxDeepnessReached = currentDeepness;
870                }
871                if (currentArrayItemsCount > maxArrayItemsCountReached) {
872                    maxArrayItemsCountReached = currentArrayItemsCount;
873                }
874            }
875            //Step 2: Apply validation against the value specified as limits
876            isSafe = ((maxItemsByArraysCount > maxArrayItemsCountReached) && (maxDeepnessAllowed > maxDeepnessReached));
877
878            //Step 3: If the content is safe then ensure that it is valid JSON structure using the "Java API for JSON Processing" (JSR 374) parser reference implementation.
879            if (isSafe) {
880                JsonReader reader = Json.createReader(new StringReader(json));
881                isSafe = (reader.read() != null);
882            }
883
884        } catch (Exception e) {
885            isSafe = false;
886        }
887        return isSafe;
888    }
889
890    /**
891     * Apply a collection of validations on a image file provided:
892     * <ul>
893     * <li>Real image file.</li>
894     * <li>Its mime type is into the list of allowed mime types.</li>
895     * <li>Its metadata fields do not contains any characters related to a malicious payloads.</li>
896     * </ul>
897     * <br>
898     * <b>Important note:</b> This implementation is prone to bypass using the "<b>raw insertion</b>" method documented in the <a href="https://www.synacktiv.com/en/publications/persistent-php-payloads-in-pngs-how-to-inject-php-code-in-an-image-and-keep-it-there">blog post</a> from the Synacktiv team.
899     * To handle such case, it is recommended to resize the image to remove any non image-related content, see <a href="https://github.com/righettod/document-upload-protection/blob/master/src/main/java/eu/righettod/poc/sanitizer/ImageDocumentSanitizerImpl.java#L54">here</a> for an example.<br>
900     *
901     * @param imageFilePath         Filename of the image file to check.
902     * @param imageAllowedMimeTypes List of image mime types allowed.
903     * @return True only if the file pass all validations.
904     * @see "https://commons.apache.org/proper/commons-imaging/"
905     * @see "https://commons.apache.org/proper/commons-imaging/formatsupport.html"
906     * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types/Common_types"
907     * @see "https://www.iana.org/assignments/media-types/media-types.xhtml#image"
908     * @see "https://www.synacktiv.com/en/publications/persistent-php-payloads-in-pngs-how-to-inject-php-code-in-an-image-and-keep-it-there"
909     * @see "https://cheatsheetseries.owasp.org/cheatsheets/File_Upload_Cheat_Sheet.html"
910     * @see "https://github.com/righettod/document-upload-protection/blob/master/src/main/java/eu/righettod/poc/sanitizer/ImageDocumentSanitizerImpl.java"
911     * @see "https://exiftool.org/examples.html"
912     * @see "https://en.wikipedia.org/wiki/List_of_file_signatures"
913     * @see "https://hexed.it/"
914     * @see "https://github.com/sighook/pixload"
915     */
916    public static boolean isImageSafe(String imageFilePath, List<String> imageAllowedMimeTypes) {
917        boolean isSafe = false;
918        Pattern payloadDetectionRegex = Pattern.compile("[<>${}`]+", Pattern.CASE_INSENSITIVE);
919        try {
920            File imgFile = new File(imageFilePath);
921            if (imgFile.exists() && imgFile.canRead() && imgFile.isFile() && !imageAllowedMimeTypes.isEmpty()) {
922                final byte[] imgBytes = Files.readAllBytes(imgFile.toPath());
923                //Step 1: Check the mime type of the file against the allowed ones
924                ImageInfo imgInfo = Imaging.getImageInfo(imgBytes);
925                if (imageAllowedMimeTypes.contains(imgInfo.getMimeType())) {
926                    //Step 2: Load the image into an object using the Image API
927                    BufferedImage imgObject = Imaging.getBufferedImage(imgBytes);
928                    if (imgObject != null && imgObject.getWidth() > 0 && imgObject.getHeight() > 0) {
929                        //Step 3: Check the metadata if the image format support it - Highly experimental
930                        List<String> metadataWithPayloads = new ArrayList<>();
931                        final ImageMetadata imgMetadata = Imaging.getMetadata(imgBytes);
932                        if (imgMetadata != null) {
933                            imgMetadata.getItems().forEach(item -> {
934                                String metadata = item.toString();
935                                if (payloadDetectionRegex.matcher(metadata).find()) {
936                                    metadataWithPayloads.add(metadata);
937                                }
938                            });
939                        }
940                        isSafe = metadataWithPayloads.isEmpty();
941                    }
942                }
943            }
944        } catch (Exception e) {
945            isSafe = false;
946        }
947        return isSafe;
948    }
949
950    /**
951     * Rewrite the input file to remove any embedded files that is not embedded using a methods supported by the official format of the file.<br>
952     * Example: a file can be embedded by adding it to the end of the source file, see the reference provided for details.
953     *
954     * @param inputFilePath Filename of the file to clean up.
955     * @param inputFileType Type of the file provided.
956     * @return A array of bytes with the cleaned file.
957     * @throws IllegalArgumentException If an invalid parameter is passed
958     * @throws Exception                If any technical error during the cleaning processing
959     * @see "https://www.synacktiv.com/en/publications/persistent-php-payloads-in-pngs-how-to-inject-php-code-in-an-image-and-keep-it-there"
960     * @see "https://github.com/righettod/toolbox-pentest-web/tree/master/misc"
961     * @see "https://github.com/righettod/toolbox-pentest-web?tab=readme-ov-file#misc"
962     * @see "https://stackoverflow.com/a/13605411"
963     */
964    public static byte[] sanitizeFile(String inputFilePath, InputFileType inputFileType) throws Exception {
965        ByteArrayOutputStream sanitizedContent = new ByteArrayOutputStream();
966        File inputFile = new File(inputFilePath);
967        if (!inputFile.exists() || !inputFile.canRead() || !inputFile.isFile()) {
968            throw new IllegalArgumentException("Cannot read the content of the input file!");
969        }
970        switch (inputFileType) {
971            case PDF -> {
972                try (PDDocument document = Loader.loadPDF(inputFile)) {
973                    document.save(sanitizedContent);
974                }
975            }
976            case IMAGE -> {
977                // Load the original image
978                BufferedImage originalImage = ImageIO.read(inputFile);
979                String originalFormat = identifyMimeType(Files.readAllBytes(inputFile.toPath())).split("/")[1].trim();
980                // Check that image has been successfully loaded
981                if (originalImage == null) {
982                    throw new IOException("Cannot load the original image !");
983                }
984                // Get current Width and Height of the image
985                int originalWidth = originalImage.getWidth(null);
986                int originalHeight = originalImage.getHeight(null);
987                // Resize the image by removing 1px on Width and Height
988                Image resizedImage = originalImage.getScaledInstance(originalWidth - 1, originalHeight - 1, Image.SCALE_SMOOTH);
989                // Resize the resized image by adding 1px on Width and Height - In fact set image to is initial size
990                Image initialSizedImage = resizedImage.getScaledInstance(originalWidth, originalHeight, Image.SCALE_SMOOTH);
991                // Save image to a bytes buffer
992                int bufferedImageType = BufferedImage.TYPE_INT_ARGB;//By default use a format supporting transparency
993                if ("jpeg".equalsIgnoreCase(originalFormat) || "bmp".equalsIgnoreCase(originalFormat)) {
994                    bufferedImageType = BufferedImage.TYPE_INT_RGB;
995                }
996                BufferedImage sanitizedImage = new BufferedImage(initialSizedImage.getWidth(null), initialSizedImage.getHeight(null), bufferedImageType);
997                Graphics2D drawer = sanitizedImage.createGraphics();
998                drawer.drawImage(initialSizedImage, 0, 0, null);
999                drawer.dispose();
1000                ImageIO.write(sanitizedImage, originalFormat, sanitizedContent);
1001            }
1002            default -> throw new IllegalArgumentException("Type of file not supported !");
1003        }
1004        if (sanitizedContent.size() == 0) {
1005            throw new IOException("An error occur during the rewrite operation!");
1006        }
1007        return sanitizedContent.toByteArray();
1008    }
1009
1010    /**
1011     * Apply a collection of validations on a string expected to be an email address:
1012     * <ul>
1013     * <li>Is a valid email address, from a parser perspective, following RFCs on email addresses.</li>
1014     * <li>Is not using "Encoded-word" format.</li>
1015     * <li>Is not using comment format.</li>
1016     * <li>Is not using "Punycode" format.</li>
1017     * <li>Is not using UUCP style addresses.</li>
1018     * <li>Is not using address literals.</li>
1019     * <li>Is not using source routes.</li>
1020     * <li>Is not using the "percent hack".</li>
1021     * </ul><br>
1022     * This is based on the research work from <a href="https://portswigger.net/research/gareth-heyes">Gareth Heyes</a> added in references (Portswigger).<br><br>
1023     *
1024     * <b>Note:</b> The notion of valid, here, is to take from a secure usage of the data perspective.
1025     *
1026     * @param addr String expected to be a valid email address.
1027     * @return True only if the string pass all validations.
1028     * @see "https://commons.apache.org/proper/commons-validator/"
1029     * @see "https://commons.apache.org/proper/commons-validator/apidocs/org/apache/commons/validator/routines/EmailValidator.html"
1030     * @see "https://datatracker.ietf.org/doc/html/rfc2047#section-2"
1031     * @see "https://portswigger.net/research/splitting-the-email-atom"
1032     * @see "https://www.jochentopf.com/email/address.html"
1033     * @see "https://en.wikipedia.org/wiki/Email_address"
1034     */
1035    public static boolean isEmailAddress(String addr) {
1036        boolean isValid = false;
1037        String work = addr.toLowerCase(Locale.ROOT);
1038        Pattern encodedWordRegex = Pattern.compile("[=?]+", Pattern.CASE_INSENSITIVE);
1039        Pattern forbiddenCharacterRegex = Pattern.compile("[():!%\\[\\],;]+", Pattern.CASE_INSENSITIVE);
1040        try {
1041            //Start with the use of the dedicated EmailValidator from Apache Commons Validator
1042            if (EmailValidator.getInstance(true, true).isValid(work)) {
1043                //If OK then validate it does not contains "Encoded-word" patterns using an aggressive approach
1044                if (!encodedWordRegex.matcher(work).find()) {
1045                    //If OK then validate it does not contains punycode
1046                    if (!work.contains("xn--")) {
1047                        //If OK then validate it does not use:
1048                        // UUCP style addresses,
1049                        // Comment format,
1050                        // Address literals,
1051                        // Source routes,
1052                        // The percent hack.
1053                        if (!forbiddenCharacterRegex.matcher(work).find()) {
1054                            isValid = true;
1055                        }
1056                    }
1057                }
1058            }
1059        } catch (Exception e) {
1060            isValid = false;
1061        }
1062        return isValid;
1063    }
1064
1065    /**
1066     * The <a href="https://www.stet.eu/en/psd2/">PSD2 STET</a> specification require to use <a href="https://datatracker.ietf.org/doc/draft-cavage-http-signatures/">HTTP Signature</a>.
1067     * <br>
1068     * Section <b>3.5.1.2</b> of the document <a href="https://www.stet.eu/assets/files/PSD2/1-6-3/api-dsp2-stet-v1.6.3.1-part-1-framework.pdf">Documentation Framework</a> version <b>1.6.3</b>.
1069     * <br>
1070     * The problem is that, by design, the HTTP Signature specification is prone to blind SSRF.
1071     * <br>
1072     * URL example taken from the STET specification: <code>https://path.to/myQsealCertificate_714f8154ec259ac40b8a9786c9908488b2582b68b17e865fede4636d726b709f</code>.
1073     * <br>
1074     * The objective of this code is to try to decrease the "exploitability/interest" of this SSRF for an attacker.
1075     *
1076     * @param certificateUrl Url pointing to a Qualified Certificate (QSealC) encoded in PEM format and respecting the ETSI/TS119495 technical Specification .
1077     * @return TRUE only if the url point to a Qualified Certificate in PEM format.
1078     * @see "https://www.stet.eu/en/psd2/"
1079     * @see "https://www.stet.eu/assets/files/PSD2/1-6-3/api-dsp2-stet-v1.6.3.1-part-1-framework.pdf"
1080     * @see "https://datatracker.ietf.org/doc/draft-cavage-http-signatures/"
1081     * @see "https://datatracker.ietf.org/doc/rfc9421/"
1082     * @see "https://openjdk.org/groups/net/httpclient/intro.html"
1083     * @see "https://docs.oracle.com/en/java/javase/21/docs/api/java.net.http/java/net/http/package-summary.html"
1084     * @see "https://portswigger.net/web-security/ssrf"
1085     * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control"
1086     */
1087    public static boolean isPSD2StetSafeCertificateURL(String certificateUrl) {
1088        boolean isValid = false;
1089        long connectionTimeoutInSeconds = 10;
1090        String userAgent = "PSD2-STET-HTTPSignature-CertificateRequest";
1091        try {
1092            //1. Ensure that the URL end with the SHA-256 fingerprint encoded in HEX of the certificate like requested by STET
1093            if (certificateUrl != null && certificateUrl.lastIndexOf("_") != -1) {
1094                String digestPart = certificateUrl.substring(certificateUrl.lastIndexOf("_") + 1);
1095                if (Pattern.matches("^[0-9a-f]{64}$", digestPart)) {
1096                    //2. Ensure that the URL is a valid url by creating a instance of the class URI
1097                    URI uri = URI.create(certificateUrl);
1098                    //3. Require usage of HTTPS and reject any url containing query parameters
1099                    if ("https".equalsIgnoreCase(uri.getScheme()) && uri.getQuery() == null) {
1100                        //4. Perform a HTTP HEAD request in order to get the content type of the remote resource
1101                        //and limit the interest to use the SSRF because to pass the check the url need to:
1102                        //- Do not having any query parameters.
1103                        //- Use HTTPS protocol.
1104                        //- End with a string having the format "_[0-9a-f]{64}".
1105                        //- Trigger the malicious action that the attacker want but with a HTTP HEAD without any redirect and parameters.
1106                        HttpResponse<String> response;
1107                        try (HttpClient client = HttpClient.newBuilder().followRedirects(HttpClient.Redirect.NEVER).build()) {
1108                            HttpRequest request = HttpRequest.newBuilder().uri(uri).timeout(Duration.ofSeconds(connectionTimeoutInSeconds)).method("HEAD", HttpRequest.BodyPublishers.noBody()).header("User-Agent", userAgent)//To provide an hint to the target about the initiator of the request
1109                                    .header("Cache-Control", "no-store, max-age=0")//To prevent caching issues or abuses
1110                                    .build();
1111                            response = client.send(request, HttpResponse.BodyHandlers.ofString());
1112                            if (response.statusCode() == 200) {
1113                                //5. Ensure that the response content type is "text/plain"
1114                                Optional<String> contentType = response.headers().firstValue("Content-Type");
1115                                isValid = (contentType.isPresent() && contentType.get().trim().toLowerCase(Locale.ENGLISH).startsWith("text/plain"));
1116                            }
1117                        }
1118                    }
1119                }
1120            }
1121        } catch (Exception e) {
1122            isValid = false;
1123        }
1124        return isValid;
1125    }
1126
1127    /**
1128     * Perform sequential URL decoding operations against a URL encoded data until the data is not URL encoded anymore or if the specified threshold is reached.
1129     *
1130     * @param encodedData            URL encoded data.
1131     * @param decodingRoundThreshold Threshold above which decoding will fail.
1132     * @return The decoded data.
1133     * @throws SecurityException If the threshold is reached.
1134     * @see "https://en.wikipedia.org/wiki/Percent-encoding"
1135     * @see "https://owasp.org/www-community/Double_Encoding"
1136     * @see "https://portswigger.net/web-security/essential-skills/obfuscating-attacks-using-encodings"
1137     * @see "https://capec.mitre.org/data/definitions/120.html"
1138     */
1139    public static String applyURLDecoding(String encodedData, int decodingRoundThreshold) throws SecurityException {
1140        if (decodingRoundThreshold < 1) {
1141            throw new IllegalArgumentException("Threshold must be a positive number !");
1142        }
1143        if (encodedData == null) {
1144            throw new IllegalArgumentException("Data provided must not be null !");
1145        }
1146        Charset charset = StandardCharsets.UTF_8;
1147        int currentDecodingRound = 0;
1148        boolean isFinished = false;
1149        String currentRoundData = encodedData;
1150        String previousRoundData = encodedData;
1151        while (!isFinished) {
1152            if (currentDecodingRound > decodingRoundThreshold) {
1153                throw new SecurityException(String.format("Decoding round threshold of %s reached!", decodingRoundThreshold));
1154            }
1155            currentRoundData = URLDecoder.decode(currentRoundData, charset);
1156            isFinished = currentRoundData.equals(previousRoundData);
1157            previousRoundData = currentRoundData;
1158            currentDecodingRound++;
1159        }
1160        return currentRoundData;
1161    }
1162
1163    /**
1164     * Apply a collection of validations on a string expected to be an system file/folder path:
1165     * <ul>
1166     * <li>Does not contains path traversal payload.</li>
1167     * <li>The canonical path is equals to the absolute path.</li>
1168     * </ul><br>
1169     *
1170     * @param path String expected to be a valid system file/folder path.
1171     * @return True only if the string pass all validations.
1172     * @see "https://portswigger.net/web-security/file-path-traversal"
1173     * @see "https://learn.snyk.io/lesson/directory-traversal/"
1174     * @see "https://capec.mitre.org/data/definitions/126.html"
1175     * @see "https://owasp.org/www-community/attacks/Path_Traversal"
1176     */
1177    public static boolean isPathSafe(String path) {
1178        boolean isSafe = false;
1179        int decodingRoundThreshold = 3;
1180        try {
1181            if (path != null && !path.isEmpty()) {
1182                //URL decode the path if case of data coming from a web context
1183                String decodedPath = applyURLDecoding(path, decodingRoundThreshold);
1184                //Ensure that no path traversal expression is present
1185                if (!decodedPath.contains("..")) {
1186                    File f = new File(decodedPath);
1187                    String canonicalPath = f.getCanonicalPath();
1188                    String absolutePath = f.getAbsolutePath();
1189                    isSafe = canonicalPath.equals(absolutePath);
1190                }
1191            }
1192        } catch (Exception e) {
1193            isSafe = false;
1194        }
1195        return isSafe;
1196    }
1197
1198    /**
1199     * Identify if an XML contains any XML comments or have any XSL processing instructions.<br>
1200     * Stream reader based parsing is used to support large XML tree.
1201     *
1202     * @param xmlFilePath Filename of the XML file to check.
1203     * @return True only if XML comments or XSL processing instructions are identified.
1204     * @see "https://www.tutorialspoint.com/xml/xml_processing.htm"
1205     * @see "https://docs.oracle.com/en/java/javase/21/docs/api/java.xml/javax/xml/stream/XMLInputFactory.html"
1206     * @see "https://portswigger.net/kb/issues/00400700_xml-entity-expansion"
1207     * @see "https://www.w3.org/Style/styling-XML.en.html"
1208     */
1209    public static boolean isXMLHaveCommentsOrXSLProcessingInstructions(String xmlFilePath) {
1210        boolean itemsDetected = false;
1211        try {
1212            //Ensure that the parser will not be prone XML external entity (XXE) injection or XML entity expansion (XEE) attacks
1213            XMLInputFactory xmlInputFactory = XMLInputFactory.newFactory();
1214            xmlInputFactory.setProperty(XMLInputFactory.SUPPORT_DTD, false);
1215            xmlInputFactory.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, "");
1216            xmlInputFactory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, false);
1217            xmlInputFactory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false);
1218
1219            //Parse file
1220            try (FileInputStream fis = new FileInputStream(xmlFilePath)) {
1221                XMLStreamReader reader = xmlInputFactory.createXMLStreamReader(fis);
1222                int eventType;
1223                while (reader.hasNext() && !itemsDetected) {
1224                    eventType = reader.next();
1225                    if (eventType == XMLEvent.COMMENT) {
1226                        itemsDetected = true;
1227                    } else if (eventType == XMLEvent.PROCESSING_INSTRUCTION && "xml-stylesheet".equalsIgnoreCase(reader.getPITarget())) {
1228                        itemsDetected = true;
1229                    }
1230                }
1231            }
1232        } catch (Exception e) {
1233            //In case of error then assume that the check failed
1234            itemsDetected = true;
1235        }
1236        return itemsDetected;
1237    }
1238
1239
1240    /**
1241     * Perform a set of additional validations against a JWT token:
1242     * <ul>
1243     *     <li>Do not use the <b>NONE</b> signature algorithm.</li>
1244     *     <li>Have a <a href="https://www.iana.org/assignments/jwt/jwt.xhtml">EXP claim</a> defined.</li>
1245     *     <li>The token identifier (<a href="https://www.iana.org/assignments/jwt/jwt.xhtml">JTI claim</a>) is NOT part of the list of revoked token.</li>
1246     *     <li>Match the expected type of token: ACCESS or ID or REFRESH.</li>
1247     * </ul>
1248     *
1249     * @param token               JWT token for which <b>signature was already validated</b> and on which a set of additional validations will be applied.
1250     * @param expectedTokenType   The type of expected token using the enumeration provided.
1251     * @param revokedTokenJTIList A list of token identifier (<b>JTI</b> claim) referring to tokens that were revoked and to which the JTI claim of the token will be compared to.
1252     * @return True only the token pass all the validations.
1253     * @see "https://www.iana.org/assignments/jwt/jwt.xhtml"
1254     * @see "https://auth0.com/docs/secure/tokens/access-tokens"
1255     * @see "https://auth0.com/docs/secure/tokens/id-tokens"
1256     * @see "https://auth0.com/docs/secure/tokens/refresh-tokens"
1257     * @see "https://auth0.com/blog/id-token-access-token-what-is-the-difference/"
1258     * @see "https://jwt.io/libraries?language=Java"
1259     * @see "https://pentesterlab.com/blog/secure-jwt-library-design"
1260     * @see "https://github.com/auth0/java-jwt"
1261     */
1262    public static boolean applyJWTExtraValidation(DecodedJWT token, TokenType expectedTokenType, List<String> revokedTokenJTIList) {
1263        boolean isValid = false;
1264        TokenType tokenType;
1265        try {
1266            if (!"none".equalsIgnoreCase(token.getAlgorithm().trim())) {
1267                if (!token.getClaim("exp").isMissing() && token.getExpiresAt() != null) {
1268                    String jti = token.getId();
1269                    if (jti != null && !jti.trim().isEmpty()) {
1270                        boolean jtiIsRevoked = revokedTokenJTIList.stream().anyMatch(jti::equalsIgnoreCase);
1271                        if (!jtiIsRevoked) {
1272                            //Determine the token type based on the presence of specifics claims
1273                            if (!token.getClaim("scope").isMissing()) {
1274                                tokenType = TokenType.ACCESS;
1275                            } else if (!token.getClaim("name").isMissing() || !token.getClaim("email").isMissing()) {
1276                                tokenType = TokenType.ID;
1277                            } else {
1278                                tokenType = TokenType.REFRESH;
1279                            }
1280                            isValid = (tokenType.equals(expectedTokenType));
1281                        }
1282                    }
1283                }
1284            }
1285
1286        } catch (Exception e) {
1287            //In case of error then assume that the check failed
1288            isValid = false;
1289        }
1290        return isValid;
1291    }
1292
1293    /**
1294     * Apply a validations on a regular expression to ensure that is not prone to the ReDOS attack.
1295     * <br>If your technology is supported by <a href="https://github.com/doyensec/regexploit">regexploit</a> then <b>use it instead of this method!</b>
1296     * <br>Indeed, the <a href="https://www.doyensec.com/">Doyensec</a> team has made an intensive and amazing work on this topic and created this effective tool.
1297     *
1298     * @param regex                       String expected to be a valid regular expression (regex).
1299     * @param data                        Test data on which the regular expression is executed for the test.
1300     * @param maximumRunningTimeInSeconds Optional parameter to specify a number of seconds above which a regex execution time is considered as not safe (default to 4 seconds when not specified).
1301     * @return True only if the string pass all validations.
1302     * @see "https://github.blog/security/how-to-fix-a-redos/"
1303     * @see "https://learn.snyk.io/lesson/redos"
1304     * @see "https://rules.sonarsource.com/java/RSPEC-2631/"
1305     * @see "https://github.com/doyensec/regexploit"
1306     * @see "https://wiki.owasp.org/images/2/23/OWASP_IL_2009_ReDoS.pdf"
1307     * @see "https://owasp.org/www-community/attacks/Regular_expression_Denial_of_Service_-_ReDoS"
1308     */
1309    public static boolean isRegexSafe(String regex, String data, Optional<Integer> maximumRunningTimeInSeconds) {
1310        Objects.requireNonNull(maximumRunningTimeInSeconds, "Use 'Optional.empty()' to leverage the default value.");
1311        Objects.requireNonNull(data, "A sample data is needed to perform the test.");
1312        Objects.requireNonNull(regex, "A regular expression is needed to perform the test.");
1313        boolean isSafe = false;
1314        int executionTimeout = maximumRunningTimeInSeconds.orElse(4);
1315        ExecutorService executor = Executors.newSingleThreadExecutor();
1316        try {
1317            Callable<Boolean> task = () -> {
1318                Pattern pattern = Pattern.compile(regex);
1319                return pattern.matcher(data).matches();
1320            };
1321            List<Future<Boolean>> tasks = executor.invokeAll(List.of(task), executionTimeout, TimeUnit.SECONDS);
1322            if (!tasks.getFirst().isCancelled()) {
1323                isSafe = true;
1324            }
1325        } catch (Exception e) {
1326            isSafe = false;
1327        } finally {
1328            executor.shutdownNow();
1329        }
1330        return isSafe;
1331    }
1332
1333    /**
1334     * Compute a UUID version 7 without using any external dependency.<br><br>
1335     * <b>Below are my personal point of view and perhaps I'm totally wrong!</b>
1336     * <br><br>
1337     * Why such method?
1338     * <ul>
1339     * <li>Java inferior or equals to 21 does not supports natively the generation of an UUID version 7.</li>
1340     * <li>Import a library just to generate such value is overkill for me.</li>
1341     * <li>Library that I have found, generating such version of an UUID, are not provided by entities commonly used in the java world, such as the SPRING framework provider.</li>
1342     * </ul>
1343     * <br>
1344     * <b>Full credits for this implementation goes to the authors and contributors of the <a href="https://github.com/nalgeon/uuidv7">UUIDv7</a> project.</b>
1345     * <br><br>
1346     * Below are the java libraries that I have found but, for which, I do not trust enough the provider to use them directly:
1347     * <ul>
1348     *     <li><a href="https://github.com/cowtowncoder/java-uuid-generator">java-uuid-generator</a></li>
1349     *     <li><a href="https://github.com/f4b6a3/uuid-creator">uuid-creator</a></li>
1350     * </ul>
1351     *
1352     * @return A UUID object representing the UUID v7.
1353     * @see "https://uuid7.com/"
1354     * @see "https://antonz.org/uuidv7/"
1355     * @see "https://mccue.dev/pages/3-11-25-life-altering-postgresql-patterns"
1356     * @see "https://www.ietf.org/archive/id/draft-peabody-dispatch-new-uuid-format-04.html#name-uuid-version-7"
1357     * @see "https://www.baeldung.com/java-generating-time-based-uuids"
1358     * @see "https://en.wikipedia.org/wiki/Universally_unique_identifier"
1359     * @see "https://buildkite.com/resources/blog/goodbye-integers-hello-uuids/"
1360     */
1361    public static UUID computeUUIDv7() {
1362        SecureRandom secureRandom = new SecureRandom();
1363        // Generate truly random bytes
1364        byte[] value = new byte[16];
1365        secureRandom.nextBytes(value);
1366        // Get current timestamp in milliseconds
1367        ByteBuffer timestamp = ByteBuffer.allocate(Long.BYTES);
1368        timestamp.putLong(System.currentTimeMillis());
1369        // Create the TIMESTAMP part of the UUID
1370        System.arraycopy(timestamp.array(), 2, value, 0, 6);
1371        // Create the VERSION and the VARIANT parts of the UUID
1372        value[6] = (byte) ((value[6] & 0x0F) | 0x70);
1373        value[8] = (byte) ((value[8] & 0x3F) | 0x80);
1374        //Create the HIGH and LOW parts of the UUID
1375        ByteBuffer buf = ByteBuffer.wrap(value);
1376        long high = buf.getLong();
1377        long low = buf.getLong();
1378        //Create and return the UUID object
1379        UUID uuidv7 = new UUID(high, low);
1380        return uuidv7;
1381    }
1382}