001package eu.righettod;
002
003
004import org.apache.commons.csv.CSVFormat;
005import org.apache.commons.csv.CSVRecord;
006import org.apache.commons.imaging.ImageInfo;
007import org.apache.commons.imaging.Imaging;
008import org.apache.commons.imaging.common.ImageMetadata;
009import org.apache.commons.validator.routines.EmailValidator;
010import org.apache.commons.validator.routines.InetAddressValidator;
011import org.apache.pdfbox.Loader;
012import org.apache.pdfbox.pdmodel.PDDocument;
013import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
014import org.apache.pdfbox.pdmodel.PDDocumentInformation;
015import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary;
016import org.apache.pdfbox.pdmodel.common.PDMetadata;
017import org.apache.pdfbox.pdmodel.interactive.action.*;
018import org.apache.pdfbox.pdmodel.interactive.annotation.AnnotationFilter;
019import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
020import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationLink;
021import org.apache.poi.poifs.filesystem.DirectoryEntry;
022import org.apache.poi.poifs.filesystem.POIFSFileSystem;
023import org.apache.poi.poifs.macros.VBAMacroReader;
024import org.apache.tika.detect.DefaultDetector;
025import org.apache.tika.detect.Detector;
026import org.apache.tika.io.TemporaryResources;
027import org.apache.tika.io.TikaInputStream;
028import org.apache.tika.metadata.Metadata;
029import org.apache.tika.mime.MediaType;
030import org.apache.tika.mime.MimeTypes;
031import org.w3c.dom.Document;
032import org.xml.sax.EntityResolver;
033import org.xml.sax.InputSource;
034import org.xml.sax.SAXException;
035
036import javax.crypto.Mac;
037import javax.crypto.spec.SecretKeySpec;
038import javax.imageio.ImageIO;
039import javax.json.Json;
040import javax.json.JsonReader;
041import javax.xml.parsers.DocumentBuilder;
042import javax.xml.parsers.DocumentBuilderFactory;
043import javax.xml.parsers.ParserConfigurationException;
044import java.awt.*;
045import java.awt.image.BufferedImage;
046import java.io.*;
047import java.net.*;
048import java.net.http.HttpClient;
049import java.net.http.HttpRequest;
050import java.net.http.HttpResponse;
051import java.nio.charset.StandardCharsets;
052import java.nio.file.Files;
053import java.security.MessageDigest;
054import java.time.Duration;
055import java.util.List;
056import java.util.*;
057import java.util.concurrent.atomic.AtomicInteger;
058import java.util.regex.Pattern;
059import java.util.zip.ZipEntry;
060import java.util.zip.ZipFile;
061
062/**
063 * Provides different utilities methods to apply processing from a security perspective.<br>
064 * These code snippet:
065 * <ul>
066 *     <li>Can be used, as "foundation", to customize the validation to the app context.</li>
067 *     <li>Were implemented in a way to facilitate adding or removal of validations depending on usage context.</li>
068 *     <li>Were centralized on one class to be able to enhance them across time as well as <a href="https://github.com/righettod/code-snippets-security-utils/issues">missing case/bug identification</a>.</li>
069 * </ul>
070 */
071public class SecurityUtils {
072
073    /**
074     * Default constructor: Not needed as the class only provides static methods.
075     */
076    private SecurityUtils() {
077    }
078
079    /**
080     * Apply a collection of validation to verify if a provided PIN code is considered weak (easy to guess) or none.<br>
081     * This method consider that format of the PIN code is [0-9]{6,}<br>
082     * Rule to consider a PIN code as weak:<br>
083     * - Length is inferior to 6 positions.<br>
084     * - Contain only the same number or only a sequence of zero.<br>
085     * - Contain sequence of following incremental or decremental numbers.<br>
086     *
087     * @param pinCode PIN code to verify.
088     * @return True only if the PIN is considered as weak.
089     */
090    public static boolean isWeakPINCode(String pinCode) {
091        boolean isWeak = true;
092        //Length is inferior to 6 positions
093        //Use "Long.parseLong(pinCode)" to cause a NumberFormatException if the PIN is not a numeric one
094        //and to ensure that the PIN is not only a sequence of zero
095        if (pinCode != null && Long.parseLong(pinCode) > 0 && pinCode.trim().length() > 5) {
096            //Contain only the same number
097            String regex = String.format("^[%s]{%s}$", pinCode.charAt(0), pinCode.length());
098            if (!Pattern.matches(regex, pinCode)) {
099                //Contain sequence of following incremental or decremental numbers
100                char previousChar = 'X';
101                boolean containSequence = false;
102                for (char c : pinCode.toCharArray()) {
103                    if (previousChar != 'X') {
104                        int previousNbr = Integer.parseInt(String.valueOf(previousChar));
105                        int currentNbr = Integer.parseInt(String.valueOf(c));
106                        if (currentNbr == (previousNbr - 1) || currentNbr == (previousNbr + 1)) {
107                            containSequence = true;
108                            break;
109                        }
110                    }
111                    previousChar = c;
112                }
113                if (!containSequence) {
114                    isWeak = false;
115                }
116            }
117        }
118        return isWeak;
119    }
120
121    /**
122     * Apply a collection of validations on a Word 97-2003 (binary format) document file provided:<br>
123     * - Real Microsoft Word 97-2003 document file.<br>
124     * - No VBA Macro.<br>
125     * - No embedded objects.<br>
126     *
127     * @param wordFilePath Filename of the Word document file to check.
128     * @return True only if the file pass all validations.
129     * @see "https://poi.apache.org/components/"
130     * @see "https://poi.apache.org/components/document/"
131     * @see "https://poi.apache.org/components/poifs/how-to.html"
132     * @see "https://poi.apache.org/components/poifs/embeded.html"
133     * @see "https://poi.apache.org/"
134     * @see "https://mvnrepository.com/artifact/org.apache.poi/poi"
135     */
136    public static boolean isWord972003DocumentSafe(String wordFilePath) {
137        boolean isSafe = false;
138        try {
139            File wordFile = new File(wordFilePath);
140            if (wordFile.exists() && wordFile.canRead() && wordFile.isFile()) {
141                //Step 1: Try to load the file, if its fail then it imply that is not a valid Word 97-2003 format file
142                try (POIFSFileSystem fs = new POIFSFileSystem(wordFile)) {
143                    //Step 2: Check if the document contains VBA macros, in our case is not allowed
144                    VBAMacroReader macroReader = new VBAMacroReader(fs);
145                    Map<String, String> macros = macroReader.readMacros();
146                    if (macros == null || macros.isEmpty()) {
147                        //Step 3: Check if the document contains any embedded objects, in our case is not allowed
148                        //From POI documentation:
149                        //Word normally stores embedded files in subdirectories of the ObjectPool directory, itself a subdirectory of the filesystem root.
150                        //Typically, these subdirectories and named starting with an underscore, followed by 10 numbers.
151                        final List<String> embeddedObjectFound = new ArrayList<>();
152                        DirectoryEntry root = fs.getRoot();
153                        if (root.getEntryCount() > 0) {
154                            root.iterator().forEachRemaining(entry -> {
155                                if ("ObjectPool".equalsIgnoreCase(entry.getName()) && entry instanceof DirectoryEntry) {
156                                    DirectoryEntry objPoolDirectory = (DirectoryEntry) entry;
157                                    if (objPoolDirectory.getEntryCount() > 0) {
158                                        objPoolDirectory.iterator().forEachRemaining(objPoolDirectoryEntry -> {
159                                            if (objPoolDirectoryEntry instanceof DirectoryEntry) {
160                                                DirectoryEntry objPoolDirectoryEntrySubDirectoryEntry = (DirectoryEntry) objPoolDirectoryEntry;
161                                                if (objPoolDirectoryEntrySubDirectoryEntry.getEntryCount() > 0) {
162                                                    objPoolDirectoryEntrySubDirectoryEntry.forEach(objPoolDirectoryEntrySubDirectoryEntryEntry -> {
163                                                        if (objPoolDirectoryEntrySubDirectoryEntryEntry.isDocumentEntry()) {
164                                                            embeddedObjectFound.add(objPoolDirectoryEntrySubDirectoryEntryEntry.getName());
165                                                        }
166                                                    });
167                                                }
168                                            }
169                                        });
170                                    }
171                                }
172                            });
173                        }
174                        isSafe = embeddedObjectFound.isEmpty();
175                    }
176                }
177            }
178        } catch (Exception e) {
179            isSafe = false;
180        }
181        return isSafe;
182    }
183
184    /**
185     * Ensure that an XML file does not contain any External Entity, DTD or XInclude instructions.
186     *
187     * @param xmlFilePath Filename of the XML file to check.
188     * @return True only if the file pass all validations.
189     * @see "https://portswigger.net/web-security/xxe"
190     * @see "https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html#java"
191     * @see "https://docs.oracle.com/en/java/javase/13/security/java-api-xml-processing-jaxp-security-guide.html#GUID-82F8C206-F2DF-4204-9544-F96155B1D258"
192     * @see "https://www.w3.org/TR/xinclude-11/"
193     * @see "https://en.wikipedia.org/wiki/XInclude"
194     */
195    public static boolean isXMLSafe(String xmlFilePath) {
196        boolean isSafe = false;
197        try {
198            File xmlFile = new File(xmlFilePath);
199            if (xmlFile.exists() && xmlFile.canRead() && xmlFile.isFile()) {
200                //Step 1a: Verify that the XML file content does not contain any XInclude instructions
201                boolean containXInclude = Files.readAllLines(xmlFile.toPath()).stream().anyMatch(line -> line.toLowerCase(Locale.ROOT).contains(":include "));
202                if (!containXInclude) {
203                    //Step 1b: Parse the XML file, if an exception occur than it's imply that the XML specified is not a valid ones
204                    //Create an XML document builder throwing Exception if a DOCTYPE instruction is present
205                    DocumentBuilderFactory dbfInstance = DocumentBuilderFactory.newInstance();
206                    dbfInstance.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
207                    //Xerces 2 only
208                    //dbfInstance.setFeature("http://xerces.apache.org/xerces2-j/features.html#disallow-doctype-decl",true);
209                    dbfInstance.setXIncludeAware(false);
210                    DocumentBuilder builder = dbfInstance.newDocumentBuilder();
211                    //Parse the document
212                    Document doc = builder.parse(xmlFile);
213                    isSafe = (doc != null && doc.getDocumentElement() != null);
214                }
215            }
216        } catch (Exception e) {
217            isSafe = false;
218        }
219        return isSafe;
220    }
221
222
223    /**
224     * Extract all URL links from a PDF file provided.<br>
225     * This can be used to apply validation on a PDF against contained links.
226     *
227     * @param pdfFilePath pdfFilePath Filename of the PDF file to process.
228     * @return A List of URL objects that is empty if no links is found.
229     * @throws Exception If any error occurs during the processing of the PDF file.
230     * @see "https://www.gushiciku.cn/pl/21KQ"
231     * @see "https://pdfbox.apache.org/"
232     * @see "https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox"
233     */
234    public static List<URL> extractAllPDFLinks(String pdfFilePath) throws Exception {
235        final List<URL> links = new ArrayList<>();
236        File pdfFile = new File(pdfFilePath);
237        try (PDDocument document = Loader.loadPDF(pdfFile)) {
238            PDDocumentCatalog documentCatalog = document.getDocumentCatalog();
239            AnnotationFilter actionURIAnnotationFilter = new AnnotationFilter() {
240                @Override
241                public boolean accept(PDAnnotation annotation) {
242                    boolean keep = false;
243                    if (annotation instanceof PDAnnotationLink) {
244                        keep = (((PDAnnotationLink) annotation).getAction() instanceof PDActionURI);
245                    }
246                    return keep;
247                }
248            };
249            documentCatalog.getPages().forEach(page -> {
250                try {
251                    page.getAnnotations(actionURIAnnotationFilter).forEach(annotation -> {
252                        PDActionURI linkAnnotation = (PDActionURI) ((PDAnnotationLink) annotation).getAction();
253                        try {
254                            URL urlObj = new URL(linkAnnotation.getURI());
255                            if (!links.contains(urlObj)) {
256                                links.add(urlObj);
257                            }
258                        } catch (MalformedURLException e) {
259                            throw new RuntimeException(e);
260                        }
261                    });
262                } catch (Exception e) {
263                    throw new RuntimeException(e);
264                }
265            });
266        }
267        return links;
268    }
269
270    /**
271     * Apply a collection of validations on a PDF file provided:<br>
272     * - Real PDF file<br>
273     * - No attachments.<br>
274     * - No Javascript code.<br>
275     * - No links using action of type URI/Launch/RemoteGoTo/ImportData.<br>
276     *
277     * @param pdfFilePath Filename of the PDF file to check.
278     * @return True only if the file pass all validations.
279     * @see "https://stackoverflow.com/a/36161267"
280     * @see "https://www.gushiciku.cn/pl/21KQ"
281     * @see "https://github.com/jonaslejon/malicious-pdf"
282     * @see "https://pdfbox.apache.org/"
283     * @see "https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox"
284     */
285    public static boolean isPDFSafe(String pdfFilePath) {
286        boolean isSafe = false;
287        try {
288            File pdfFile = new File(pdfFilePath);
289            if (pdfFile.exists() && pdfFile.canRead() && pdfFile.isFile()) {
290                //Step 1: Try to load the file, if its fail then it imply that is not a valid PDF file
291                try (PDDocument document = Loader.loadPDF(pdfFile)) {
292                    //Step 2: Check if the file contains attached files, in our case is not allowed
293                    PDDocumentCatalog documentCatalog = document.getDocumentCatalog();
294                    PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary(documentCatalog);
295                    if (namesDictionary.getEmbeddedFiles() == null) {
296                        //Step 3: Check if the file contains Javascript code, in our case is not allowed
297                        if (namesDictionary.getJavaScript() == null) {
298                            //Step 4: Check if the file contains links using action of type URI/Launch/RemoteGoTo/ImportData, in our case is not allowed
299                            final List<Integer> notAllowedAnnotationCounterList = new ArrayList<>();
300                            AnnotationFilter notAllowedAnnotationFilter = new AnnotationFilter() {
301                                @Override
302                                public boolean accept(PDAnnotation annotation) {
303                                    boolean keep = false;
304                                    if (annotation instanceof PDAnnotationLink) {
305                                        PDAnnotationLink link = (PDAnnotationLink) annotation;
306                                        PDAction action = link.getAction();
307                                        if ((action instanceof PDActionURI) || (action instanceof PDActionLaunch) || (action instanceof PDActionRemoteGoTo) || (action instanceof PDActionImportData)) {
308                                            keep = true;
309                                        }
310                                    }
311                                    return keep;
312                                }
313                            };
314                            documentCatalog.getPages().forEach(page -> {
315                                try {
316                                    notAllowedAnnotationCounterList.add(page.getAnnotations(notAllowedAnnotationFilter).size());
317                                } catch (IOException e) {
318                                    throw new RuntimeException(e);
319                                }
320                            });
321                            if (notAllowedAnnotationCounterList.stream().reduce(0, Integer::sum) == 0) {
322                                isSafe = true;
323                            }
324                        }
325                    }
326                }
327            }
328        } catch (Exception e) {
329            isSafe = false;
330        }
331        return isSafe;
332    }
333
334    /**
335     * Remove as much as possible metadata from the provided PDF document object.
336     *
337     * @param document PDFBox PDF document object on which metadata must be removed.
338     * @see "https://gist.github.com/righettod/d7e07443c43d393a39de741a0d920069"
339     * @see "https://pdfbox.apache.org/"
340     * @see "https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox"
341     */
342    public static void clearPDFMetadata(PDDocument document) {
343        if (document != null) {
344            PDDocumentInformation infoEmpty = new PDDocumentInformation();
345            document.setDocumentInformation(infoEmpty);
346            PDMetadata newMetadataEmpty = new PDMetadata(document);
347            document.getDocumentCatalog().setMetadata(newMetadataEmpty);
348        }
349    }
350
351
352    /**
353     * Validate that the URL provided is really a relative URL.
354     *
355     * @param targetUrl URL to validate.
356     * @return True only if the file pass all validations.
357     * @see "https://portswigger.net/web-security/ssrf"
358     * @see "https://stackoverflow.com/q/6785442"
359     */
360    public static boolean isRelativeURL(String targetUrl) {
361        boolean isValid = false;
362        //Reject any URL encoded content and URL starting with a double slash
363        //Reject any URL contains credentials or fragment to prevent potential bypasses
364        String work = targetUrl;
365        if (!work.contains("%") && !work.contains("@") && !work.contains("#") && !work.startsWith("//")) {
366            //Creation of a URL object must fail
367            try {
368                new URL(work);
369                isValid = false;
370            } catch (MalformedURLException mf) {
371                //Last check to be sure (for prod usage compile the pattern one time)
372                isValid = Pattern.compile("^/[a-z0-9]+", Pattern.CASE_INSENSITIVE).matcher(work).find();
373            }
374        }
375        return isValid;
376    }
377
378    /**
379     * Apply a collection of validations on a ZIP file provided:<br>
380     * - Real ZIP file<br>
381     * - Contain less than a specified level of deepness.<br>
382     * - Do not contain Zip-Slip entry path.<br>
383     *
384     * @param zipFilePath       Filename of the ZIP file to check.
385     * @param maxLevelDeepness  Threshold of deepness above which a ZIP archive will be rejected.
386     * @param rejectArchiveFile Flag to specify if presence of any archive entry will cause the rejection of the ZIP file.
387     * @return True only if the file pass all validations.
388     * @see "https://rules.sonarsource.com/java/type/Security%20Hotspot/RSPEC-5042"
389     * @see "https://security.snyk.io/research/zip-slip-vulnerability"
390     * @see "https://en.wikipedia.org/wiki/Zip_bomb"
391     * @see "https://github.com/ptoomey3/evilarc"
392     * @see "https://github.com/abdulfatir/ZipBomb"
393     * @see "https://www.baeldung.com/cs/zip-bomb"
394     * @see "https://thesecurityvault.com/attacks-with-zip-files-and-mitigations/"
395     * @see "https://wiki.sei.cmu.edu/confluence/display/java/IDS04-J.+Safely+extract+files+from+ZipInputStream"
396     */
397    public static boolean isZIPSafe(String zipFilePath, int maxLevelDeepness, boolean rejectArchiveFile) {
398        List<String> archiveExtensions = Arrays.asList("zip", "tar", "7z", "gz", "jar", "phar", "bz2", "tgz");
399        boolean isSafe = false;
400        try {
401            File zipFile = new File(zipFilePath);
402            if (zipFile.exists() && zipFile.canRead() && zipFile.isFile() && maxLevelDeepness > 0) {
403                //Step 1: Try to load the file, if its fail then it imply that is not a valid ZIP file
404                try (ZipFile zipArch = new ZipFile(zipFile)) {
405                    //Step 2: Parse entries
406                    long deepness = 0;
407                    ZipEntry zipEntry;
408                    String entryExtension;
409                    String zipEntryName;
410                    boolean validationsFailed = false;
411                    Enumeration<? extends ZipEntry> entries = zipArch.entries();
412                    while (entries.hasMoreElements()) {
413                        zipEntry = entries.nextElement();
414                        zipEntryName = zipEntry.getName();
415                        entryExtension = zipEntryName.substring(zipEntryName.lastIndexOf(".") + 1).toLowerCase(Locale.ROOT).trim();
416                        //Step 2a: Check if the current entry is an archive file
417                        if (rejectArchiveFile && archiveExtensions.contains(entryExtension)) {
418                            validationsFailed = true;
419                            break;
420                        }
421                        //Step 2b: Check that level of deepness is inferior to the threshold specified
422                        if (zipEntryName.contains("/")) {
423                            //Determine deepness by inspecting the entry name.
424                            //Indeed, folder will be represented like this: folder/folder/folder/
425                            //So we can count the number of "/" to identify the deepness of the entry
426                            deepness = zipEntryName.chars().filter(ch -> ch == '/').count();
427                            if (deepness > maxLevelDeepness) {
428                                validationsFailed = true;
429                                break;
430                            }
431                        }
432                        //Step 2c: Check if any entries match pattern of zip slip payload
433                        if (zipEntryName.contains("..\\") || zipEntryName.contains("../")) {
434                            validationsFailed = true;
435                            break;
436                        }
437                    }
438                    if (!validationsFailed) {
439                        isSafe = true;
440                    }
441                }
442            }
443        } catch (Exception e) {
444            isSafe = false;
445        }
446        return isSafe;
447    }
448
449    /**
450     * Identify the mime type of the content specified (array of bytes).<br>
451     * Note that it cannot be fully trusted (see the tweet '1595824709186519041' referenced), so, additional validations are required.
452     *
453     * @param content The content as an array of bytes.
454     * @return The mime type in lower case or null if it cannot be identified.
455     * @see "https://twitter.com/righettod/status/1595824709186519041"
456     * @see "https://tika.apache.org/"
457     * @see "https://mvnrepository.com/artifact/org.apache.tika/tika-core"
458     * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types"
459     * @see "https://www.iana.org/assignments/media-types/media-types.xhtml"
460     */
461    public static String identifyMimeType(byte[] content) {
462        String mimeType = null;
463        if (content != null && content.length > 0) {
464            Detector detector = new DefaultDetector(MimeTypes.getDefaultMimeTypes());
465            Metadata metadata = new Metadata();
466            try {
467                try (TemporaryResources temporaryResources = new TemporaryResources(); TikaInputStream tikaInputStream = TikaInputStream.get(new ByteArrayInputStream(content), temporaryResources, metadata)) {
468                    MediaType mt = detector.detect(tikaInputStream, metadata);
469                    if (mt != null) {
470                        mimeType = mt.toString().toLowerCase(Locale.ROOT);
471                    }
472                }
473            } catch (IOException ioe) {
474                mimeType = null;
475            }
476        }
477        return mimeType;
478    }
479
480    /**
481     * Apply a collection of validations on a string expected to be an public IP address:<br>
482     * - Is a valid IP v4 or v6 address.<br>
483     * - Is public from an Internet perspective.<br><br>
484     * <b>Note:</b> I often see missing such validation in the value read from HTTP request headers like "X-Forwarded-For" or "Forwarded".
485     * <br>
486     * <b>Note for IPv6:</b> I used documentation found so it is really experimental!
487     *
488     * @param ip String expected to be a valid IP address.
489     * @return True only if the string pass all validations.
490     * @see "https://commons.apache.org/proper/commons-validator/"
491     * @see "https://commons.apache.org/proper/commons-validator/apidocs/org/apache/commons/validator/routines/InetAddressValidator.html"
492     * @see "https://cheatsheetseries.owasp.org/cheatsheets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet.html"
493     * @see "https://cheatsheetseries.owasp.org/assets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet_Orange_Tsai_Talk.pdf"
494     * @see "https://cheatsheetseries.owasp.org/assets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet_SSRF_Bible.pdf"
495     * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For"
496     * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Forwarded"
497     * @see "https://ipcisco.com/lesson/ipv6-address/"
498     * @see "https://www.juniper.net/documentation/us/en/software/junos/interfaces-security-devices/topics/topic-map/security-interface-ipv4-ipv6-protocol.html"
499     * @see "https://docs.oracle.com/en/java/javase/21/docs/api/java.base/java/net/InetAddress.html#getByName(java.lang.String)"
500     * @see "https://www.arin.net/reference/research/statistics/address_filters/"
501     * @see "https://en.wikipedia.org/wiki/Multicast_address"
502     * @see "https://stackoverflow.com/a/5619409"
503     * @see "https://www.ripe.net/media/documents/ipv6-address-types.pdf"
504     * @see "https://www.iana.org/assignments/ipv6-unicast-address-assignments/ipv6-unicast-address-assignments.xhtml"
505     * @see "https://developer.android.com/reference/java/net/Inet6Address"
506     * @see "https://en.wikipedia.org/wiki/Unique_local_address"
507     */
508    public static boolean isPublicIPAddress(String ip) {
509        boolean isValid = false;
510        try {
511            //Quick validation on the string itself based on characters used to compose an IP v4/v6 address
512            if (Pattern.matches("[0-9a-fA-F:.]+", ip)) {
513                //If OK then use the dedicated InetAddressValidator from Apache Commons Validator
514                if (InetAddressValidator.getInstance().isValid(ip)) {
515                    //If OK then validate that is an public IP address
516                    //From Javadoc for "InetAddress.getByName": If a literal IP address is supplied, only the validity of the address format is checked.
517                    InetAddress addr = InetAddress.getByName(ip);
518                    isValid = (!addr.isAnyLocalAddress() && !addr.isLinkLocalAddress() && !addr.isLoopbackAddress() && !addr.isMulticastAddress() && !addr.isSiteLocalAddress());
519                    //If OK and the IP is an V6 one then make additional validation because the built-in Java API will let pass some V6 IP
520                    //For the prefix map, the start of the key indicates if the value is a regex or a string
521                    if (isValid && (addr instanceof Inet6Address)) {
522                        Map<String, String> prefixes = new HashMap<>();
523                        prefixes.put("REGEX_LOOPBACK", "^(0|:)+1$");
524                        prefixes.put("REGEX_UNIQUE-LOCAL-ADDRESSES", "^f(c|d)[a-f0-9]{2}:.*$");
525                        prefixes.put("STRING_LINK-LOCAL-ADDRESSES", "fe80:");
526                        prefixes.put("REGEX_TEREDO", "^2001:[0]*:.*$");
527                        prefixes.put("REGEX_BENCHMARKING", "^2001:[0]*2:.*$");
528                        prefixes.put("REGEX_ORCHID", "^2001:[0]*10:.*$");
529                        prefixes.put("STRING_DOCUMENTATION", "2001:db8:");
530                        prefixes.put("STRING_GLOBAL-UNICAST", "2000:");
531                        prefixes.put("REGEX_MULTICAST", "^ff[0-9]{2}:.*$");
532                        final List<Boolean> results = new ArrayList<>();
533                        final String ipLower = ip.trim().toLowerCase(Locale.ROOT);
534                        prefixes.forEach((addressType, expr) -> {
535                            String exprLower = expr.trim().toLowerCase();
536                            if (addressType.startsWith("STRING_")) {
537                                results.add(ipLower.startsWith(exprLower));
538                            } else {
539                                results.add(Pattern.matches(exprLower, ipLower));
540                            }
541                        });
542                        isValid = ((results.size() == prefixes.size()) && !results.contains(Boolean.TRUE));
543                    }
544                }
545            }
546        } catch (Exception e) {
547            isValid = false;
548        }
549        return isValid;
550    }
551
552    /**
553     * Compute a SHA256 hash from an input composed of a collection of strings.<br><br>
554     * This method take care to build the source string in a way to prevent this source string to be prone to abuse targeting the different parts composing it.<br><br>
555     * Example of possible abuse without precautions applied during the hash calculation logic:<br>
556     * Hash of <code>SHA256("Hello", "My", "World!!!")</code> will be equals to the hash of <code>SHA256("Hell", "oMyW", "orld!!!")</code>.<br><br>
557     * This method ensure that both hash above will be different.<br><br>
558     * <b>Note:</b> The character <code>|</code> is used, as separator, of every parts so a part is not allowed to contains this character.
559     *
560     * @param parts Ordered list of strings to use to build the input string for which the hash must be computed on. No null value is accepted on object composing the collection.
561     * @return The hash, as an array of bytes, to allow caller to convert it to the final representation wanted (HEX, Base64, etc.). If the collection passed is null or empty then the method return null.
562     * @throws Exception If any exception occurs
563     * @see "https://github.com/righettod/code-snippets-security-utils/issues/16"
564     * @see "https://pentesterlab.com/badges/codereview"
565     * @see "https://blog.trailofbits.com/2024/08/21/yolo-is-not-a-valid-hash-construction/"
566     * @see "https://www.nist.gov/publications/sha-3-derived-functions-cshake-kmac-tuplehash-and-parallelhash"
567     */
568    public static byte[] computeHashNoProneToAbuseOnParts(List<String> parts) throws Exception {
569        byte[] hash = null;
570        String separator = "|";
571        if (parts != null && !parts.isEmpty()) {
572            //Ensure that not part is null
573            if (parts.stream().anyMatch(Objects::isNull)) {
574                throw new IllegalArgumentException("No part must be null!");
575            }
576            //Ensure that the separator is absent from every part
577            if (parts.stream().anyMatch(part -> part.contains(separator))) {
578                throw new IllegalArgumentException(String.format("The character '%s', used as parts separator, must be absent from every parts!", separator));
579            }
580            MessageDigest digest = MessageDigest.getInstance("SHA-256");
581            final StringBuilder buffer = new StringBuilder(separator);
582            parts.forEach(p -> {
583                buffer.append(p).append(separator);
584            });
585            hash = digest.digest(buffer.toString().getBytes(StandardCharsets.UTF_8));
586        }
587        return hash;
588    }
589
590    /**
591     * Ensure that an XML file only uses DTD/XSD references (called System Identifier) present in the allowed list provided.<br><br>
592     * The code is based on the validation implemented into the OpenJDK 21, by the class <b><a href="https://github.com/openjdk/jdk/blob/jdk-21%2B35/src/java.prefs/share/classes/java/util/prefs/XmlSupport.java">java.util.prefs.XmlSupport</a></b>, in the method <b><a href="https://github.com/openjdk/jdk/blob/jdk-21%2B35/src/java.prefs/share/classes/java/util/prefs/XmlSupport.java#L240">loadPrefsDoc()</a></b>.<br><br>
593     * The method also ensure that no Public Identifier is used to prevent potential bypasses of the validations.
594     *
595     * @param xmlFilePath              Filename of the XML file to check.
596     * @param allowedSystemIdentifiers List of URL allowed for System Identifier specified for any XSD/DTD references.
597     * @return True only if the file pass all validations.
598     * @see "https://www.w3schools.com/xml/prop_documenttype_systemid.asp"
599     * @see "https://www.ibm.com/docs/en/integration-bus/9.0.0?topic=doctypedecl-xml-systemid"
600     * @see "https://www.liquid-technologies.com/Reference/Glossary/XML_DocType.html"
601     * @see "https://www.xml.com/pub/98/08/xmlqna0.html"
602     * @see "https://github.com/openjdk/jdk/blob/jdk-21%2B35/src/java.prefs/share/classes/java/util/prefs/XmlSupport.java#L397"
603     * @see "https://en.wikipedia.org/wiki/Formal_Public_Identifier"
604     */
605    public static boolean isXMLOnlyUseAllowedXSDorDTD(String xmlFilePath, final List<String> allowedSystemIdentifiers) {
606        boolean isSafe = false;
607        final String errorTemplate = "Non allowed %s ID detected!";
608        final String emptyFakeDTD = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><!ELEMENT dummy EMPTY>";
609        final String emptyFakeXSD = "<xs:schema xmlns:xs=\"http://www.w3.org/2001/XMLSchema\"> <xs:element name=\"dummy\"/></xs:schema>";
610
611        if (allowedSystemIdentifiers == null || allowedSystemIdentifiers.isEmpty()) {
612            throw new IllegalArgumentException("At least one SID must be specified!");
613        }
614        File xmlFile = new File(xmlFilePath);
615        if (xmlFile.exists() && xmlFile.canRead() && xmlFile.isFile()) {
616            try {
617                EntityResolver resolverValidator = (publicId, systemId) -> {
618                    if (publicId != null) {
619                        throw new SAXException(String.format(errorTemplate, "PUBLIC"));
620                    }
621                    if (!allowedSystemIdentifiers.contains(systemId)) {
622                        throw new SAXException(String.format(errorTemplate, "SYSTEM"));
623                    }
624                    //If it is OK then return a empty DTD/XSD
625                    return new InputSource(new StringReader(systemId.toLowerCase().endsWith(".dtd") ? emptyFakeDTD : emptyFakeXSD));
626                };
627                DocumentBuilderFactory dbfInstance = DocumentBuilderFactory.newInstance();
628                dbfInstance.setIgnoringElementContentWhitespace(true);
629                dbfInstance.setXIncludeAware(false);
630                dbfInstance.setValidating(false);
631                dbfInstance.setCoalescing(true);
632                dbfInstance.setIgnoringComments(false);
633                DocumentBuilder builder = dbfInstance.newDocumentBuilder();
634                builder.setEntityResolver(resolverValidator);
635                Document doc = builder.parse(xmlFile);
636                isSafe = (doc != null);
637            } catch (SAXException | IOException | ParserConfigurationException e) {
638                isSafe = false;
639            }
640        }
641
642        return isSafe;
643    }
644
645    /**
646     * Apply a collection of validations on a EXCEL CSV file provided (file was expected to be opened in Microsoft EXCEL):<br>
647     * - Real CSV file.<br>
648     * - Do not contains any payload related to a CSV injections.<br><br>
649     * Ensure that, if Apache Commons CSV does not find any record then, the file will be considered as NOT safe (prevent potential bypasses).<br><br>
650     * <b>Note:</b> Record delimiter used is the <code>,</code> (comma) character. See the Apache Commons CSV reference provided for EXCEL.<br>
651     *
652     * @param csvFilePath Filename of the CSV file to check.
653     * @return True only if the file pass all validations.
654     * @see "https://commons.apache.org/proper/commons-csv/"
655     * @see "https://commons.apache.org/proper/commons-csv/apidocs/org/apache/commons/csv/CSVFormat.html#EXCEL"
656     * @see "https://www.we45.com/post/your-excel-sheets-are-not-safe-heres-how-to-beat-csv-injection"
657     * @see "https://www.whiteoaksecurity.com/blog/2020-4-23-csv-injection-whats-the-risk/"
658     * @see "https://book.hacktricks.xyz/pentesting-web/formula-csv-doc-latex-ghostscript-injection"
659     * @see "https://owasp.org/www-community/attacks/CSV_Injection"
660     * @see "https://payatu.com/blog/csv-injection-basic-to-exploit/"
661     * @see "https://cwe.mitre.org/data/definitions/1236.html"
662     */
663    public static boolean isExcelCSVSafe(String csvFilePath) {
664        boolean isSafe;
665        final AtomicInteger recordCount = new AtomicInteger();
666        final List<Character> payloadDetectionCharacters = List.of('=', '+', '@', '-', '\r', '\t');
667
668        try {
669            final List<String> payloadsIdentified = new ArrayList<>();
670            try (Reader in = new FileReader(csvFilePath)) {
671                Iterable<CSVRecord> records = CSVFormat.EXCEL.parse(in);
672                records.forEach(record -> {
673                    record.forEach(recordValue -> {
674                        if (recordValue != null && !recordValue.trim().isEmpty() && payloadDetectionCharacters.contains(recordValue.trim().charAt(0))) {
675                            payloadsIdentified.add(recordValue);
676                        }
677                        recordCount.getAndIncrement();
678                    });
679                });
680            }
681            isSafe = (payloadsIdentified.isEmpty() && recordCount.get() > 0);
682        } catch (Exception e) {
683            isSafe = false;
684        }
685
686        return isSafe;
687    }
688
689    /**
690     * Provide a way to add an integrity marker (<a href="https://en.wikipedia.org/wiki/HMAC">HMAC</a>) to a serialized object serialized using the <a href="https://www.baeldung.com/java-serialization">java native system</a> (binary).<br>
691     * The goal is to provide <b>a temporary workaround</b> to try to prevent deserialization attacks and give time to move to a text-based serialization approach.
692     *
693     * @param processingMode Define the mode of processing i.e. protect or validate. ({@link eu.righettod.ProcessingMode})
694     * @param input          When the processing mode is "protect" than the expected input (string) is a java serialized object encoded in Base64 otherwise (processing mode is "validate") expected input is the output of this method when the "protect" mode was used.
695     * @param secret         Secret to use to compute the SHA256 HMAC.
696     * @return A map with the following keys: <ul><li><b>PROCESSING_MODE</b>: Processing mode used to compute the result.</li><li><b>STATUS</b>: A boolean indicating if the processing was successful or not.</li><li><b>RESULT</b>: Always contains a string representing the protected serialized object in the format <code>[SERIALIZED_OBJECT_BASE64_ENCODED]:[SERIALIZED_OBJECT_HMAC_BASE64_ENCODED]</code>.</li></ul>
697     * @throws Exception If any exception occurs.
698     * @see "https://cheatsheetseries.owasp.org/cheatsheets/Deserialization_Cheat_Sheet.html"
699     * @see "https://owasp.org/www-project-top-ten/2017/A8_2017-Insecure_Deserialization"
700     * @see "https://portswigger.net/web-security/deserialization"
701     * @see "https://www.baeldung.com/java-serialization-approaches"
702     * @see "https://www.baeldung.com/java-serialization"
703     * @see "https://cryptobook.nakov.com/mac-and-key-derivation/hmac-and-key-derivation"
704     * @see "https://en.wikipedia.org/wiki/HMAC"
705     * @see "https://smattme.com/posts/how-to-generate-hmac-signature-in-java/"
706     */
707    public static Map<String, Object> ensureSerializedObjectIntegrity(ProcessingMode processingMode, String input, byte[] secret) throws Exception {
708        Map<String, Object> results;
709        String resultFormatTemplate = "%s:%s";
710        //Verify input provided to be consistent
711        if (processingMode == null) {
712            throw new IllegalArgumentException("The processing mode is mandatory!");
713        }
714        if (input == null || input.trim().isEmpty()) {
715            throw new IllegalArgumentException("Input data is mandatory!");
716        }
717        if (secret == null || secret.length == 0) {
718            throw new IllegalArgumentException("The HMAC secret is mandatory!");
719        }
720        if (processingMode.equals(ProcessingMode.VALIDATE) && input.split(":").length != 2) {
721            throw new IllegalArgumentException("Input data provided is invalid for the processing mode specified!");
722        }
723        //Processing
724        Base64.Decoder b64Decoder = Base64.getDecoder();
725        Base64.Encoder b64Encoder = Base64.getEncoder();
726        String hmacAlgorithm = "HmacSHA256";
727        Mac mac = Mac.getInstance(hmacAlgorithm);
728        SecretKeySpec key = new SecretKeySpec(secret, hmacAlgorithm);
729        mac.init(key);
730        results = new HashMap<>();
731        results.put("PROCESSING_MODE", processingMode.toString());
732        switch (processingMode) {
733            case PROTECT -> {
734                byte[] objectBytes = b64Decoder.decode(input);
735                byte[] hmac = mac.doFinal(objectBytes);
736                String encodedHmac = b64Encoder.encodeToString(hmac);
737                results.put("STATUS", Boolean.TRUE);
738                results.put("RESULT", String.format(resultFormatTemplate, input, encodedHmac));
739            }
740            case VALIDATE -> {
741                String[] parts = input.split(":");
742                byte[] objectBytes = b64Decoder.decode(parts[0].trim());
743                byte[] hmacProvided = b64Decoder.decode(parts[1].trim());
744                byte[] hmacComputed = mac.doFinal(objectBytes);
745                String encodedHmacComputed = b64Encoder.encodeToString(hmacComputed);
746                Boolean hmacIsValid = Arrays.equals(hmacProvided, hmacComputed);
747                results.put("STATUS", hmacIsValid);
748                results.put("RESULT", String.format(resultFormatTemplate, parts[0].trim(), encodedHmacComputed));
749            }
750            default -> throw new IllegalArgumentException("Not supported processing mode!");
751        }
752        return results;
753    }
754
755    /**
756     * Apply a collection of validations on a JSON string provided:<br>
757     * - Real JSON structure.<br>
758     * - Contain less than a specified number of deepness for nested objects or arrays.<br>
759     * - Contain less than a specified number of items in any arrays.<br><br>
760     *
761     * <b>Note:</b> I decided to use a parsing approach using only string processing to prevent any StackOverFlow or OutOfMemory error that can be abused.<br><br>
762     * I used the following assumption:
763     * <ul>
764     *      <li>The character <code>{</code> identify the beginning of an object.</li>
765     *      <li>The character <code>}</code> identify the end of an object.</li>
766     *      <li>The character <code>[</code> identify the beginning of an array.</li>
767     *      <li>The character <code>]</code> identify the end of an array.</li>
768     *      <li>The character <code>"</code> identify the delimiter of a string.</li>
769     *      <li>The character sequence <code>\"</code> identify the escaping of an double quote.</li>
770     * </ul>
771     *
772     * @param json                  String containing the JSON data to validate.
773     * @param maxItemsByArraysCount Maximum number of items allowed in an array.
774     * @param maxDeepnessAllowed    Maximum number nested objects or arrays allowed.
775     * @return True only if the string pass all validations.
776     * @see "https://javaee.github.io/jsonp/"
777     * @see "https://community.f5.com/discussions/technicalforum/disable-buffer-overflow-in-json-parameters/124306"
778     * @see "https://github.com/InductiveComputerScience/pbJson/issues/2"
779     */
780    public static boolean isJSONSafe(String json, int maxItemsByArraysCount, int maxDeepnessAllowed) {
781        boolean isSafe = false;
782
783        try {
784            //Step 1: Analyse the JSON string
785            int currentDeepness = 0;
786            int currentArrayItemsCount = 0;
787            int maxDeepnessReached = 0;
788            int maxArrayItemsCountReached = 0;
789            boolean currentlyInArray = false;
790            boolean currentlyInString = false;
791            int currentNestedArrayLevel = 0;
792            String jsonEscapedDoubleQuote = "\\\"";//Escaped double quote must not be considered as a string delimiter
793            String work = json.replace(jsonEscapedDoubleQuote, "'");
794            for (char c : work.toCharArray()) {
795                switch (c) {
796                    case '{': {
797                        if (!currentlyInString) {
798                            currentDeepness++;
799                        }
800                        break;
801                    }
802                    case '}': {
803                        if (!currentlyInString) {
804                            currentDeepness--;
805                        }
806                        break;
807                    }
808                    case '[': {
809                        if (!currentlyInString) {
810                            currentDeepness++;
811                            if (currentlyInArray) {
812                                currentNestedArrayLevel++;
813                            }
814                            currentlyInArray = true;
815                        }
816                        break;
817                    }
818                    case ']': {
819                        if (!currentlyInString) {
820                            currentDeepness--;
821                            currentArrayItemsCount = 0;
822                            if (currentNestedArrayLevel > 0) {
823                                currentNestedArrayLevel--;
824                            }
825                            if (currentNestedArrayLevel == 0) {
826                                currentlyInArray = false;
827                            }
828                        }
829                        break;
830                    }
831                    case '"': {
832                        currentlyInString = !currentlyInString;
833                        break;
834                    }
835                    case ',': {
836                        if (!currentlyInString && currentlyInArray) {
837                            currentArrayItemsCount++;
838                        }
839                        break;
840                    }
841                }
842                if (currentDeepness > maxDeepnessReached) {
843                    maxDeepnessReached = currentDeepness;
844                }
845                if (currentArrayItemsCount > maxArrayItemsCountReached) {
846                    maxArrayItemsCountReached = currentArrayItemsCount;
847                }
848            }
849            //Step 2: Apply validation against the value specified as limits
850            isSafe = ((maxItemsByArraysCount > maxArrayItemsCountReached) && (maxDeepnessAllowed > maxDeepnessReached));
851
852            //Step 3: If the content is safe then ensure that it is valid JSON structure using the "Java API for JSON Processing" (JSR 374) parser reference implementation.
853            if (isSafe) {
854                JsonReader reader = Json.createReader(new StringReader(json));
855                isSafe = (reader.read() != null);
856            }
857
858        } catch (Exception e) {
859            isSafe = false;
860        }
861        return isSafe;
862    }
863
864    /**
865     * Apply a collection of validations on a image file provided:<br>
866     * - Real image file.<br>
867     * - Its mime type is into the list of allowed mime types.<br>
868     * - Its metadata fields do not contains any characters related to a malicious payloads.<br>
869     *
870     * <br>
871     * <b>Important note:</b> This implementation is prone to bypass using the "<b>raw insertion</b>" method documented in the <a href="https://www.synacktiv.com/en/publications/persistent-php-payloads-in-pngs-how-to-inject-php-code-in-an-image-and-keep-it-there">blog post</a> from the Synacktiv team.
872     * To handle such case, it is recommended to resize the image to remove any non image-related content, see <a href="https://github.com/righettod/document-upload-protection/blob/master/src/main/java/eu/righettod/poc/sanitizer/ImageDocumentSanitizerImpl.java#L54">here</a> for an example.<br>
873     *
874     * @param imageFilePath         Filename of the image file to check.
875     * @param imageAllowedMimeTypes List of image mime types allowed.
876     * @return True only if the file pass all validations.
877     * @see "https://commons.apache.org/proper/commons-imaging/"
878     * @see "https://commons.apache.org/proper/commons-imaging/formatsupport.html"
879     * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types/Common_types"
880     * @see "https://www.iana.org/assignments/media-types/media-types.xhtml#image"
881     * @see "https://www.synacktiv.com/en/publications/persistent-php-payloads-in-pngs-how-to-inject-php-code-in-an-image-and-keep-it-there"
882     * @see "https://cheatsheetseries.owasp.org/cheatsheets/File_Upload_Cheat_Sheet.html"
883     * @see "https://github.com/righettod/document-upload-protection/blob/master/src/main/java/eu/righettod/poc/sanitizer/ImageDocumentSanitizerImpl.java"
884     * @see "https://exiftool.org/examples.html"
885     * @see "https://en.wikipedia.org/wiki/List_of_file_signatures"
886     * @see "https://hexed.it/"
887     * @see "https://github.com/sighook/pixload"
888     */
889    public static boolean isImageSafe(String imageFilePath, List<String> imageAllowedMimeTypes) {
890        boolean isSafe = false;
891        Pattern payloadDetectionRegex = Pattern.compile("[<>${}`]+", Pattern.CASE_INSENSITIVE);
892        try {
893            File imgFile = new File(imageFilePath);
894            if (imgFile.exists() && imgFile.canRead() && imgFile.isFile() && !imageAllowedMimeTypes.isEmpty()) {
895                final byte[] imgBytes = Files.readAllBytes(imgFile.toPath());
896                //Step 1: Check the mime type of the file against the allowed ones
897                ImageInfo imgInfo = Imaging.getImageInfo(imgBytes);
898                if (imageAllowedMimeTypes.contains(imgInfo.getMimeType())) {
899                    //Step 2: Load the image into an object using the Image API
900                    BufferedImage imgObject = Imaging.getBufferedImage(imgBytes);
901                    if (imgObject != null && imgObject.getWidth() > 0 && imgObject.getHeight() > 0) {
902                        //Step 3: Check the metadata if the image format support it - Highly experimental
903                        List<String> metadataWithPayloads = new ArrayList<>();
904                        final ImageMetadata imgMetadata = Imaging.getMetadata(imgBytes);
905                        if (imgMetadata != null) {
906                            imgMetadata.getItems().forEach(item -> {
907                                String metadata = item.toString();
908                                if (payloadDetectionRegex.matcher(metadata).find()) {
909                                    metadataWithPayloads.add(metadata);
910                                }
911                            });
912                        }
913                        isSafe = metadataWithPayloads.isEmpty();
914                    }
915                }
916            }
917        } catch (Exception e) {
918            isSafe = false;
919        }
920        return isSafe;
921    }
922
923    /**
924     * Rewrite the input file to remove any embedded files that is not embedded using a methods supported by the official format of the file.<br>
925     * Example: a file can be embedded by adding it to the end of the source file, see the reference provided for details.
926     *
927     * @param inputFilePath Filename of the file to clean up.
928     * @param inputFileType Type of the file provided.
929     * @return A array of bytes with the cleaned file.
930     * @throws IllegalArgumentException If an invalid parameter is passed
931     * @throws Exception                If any technical error during the cleaning processing
932     * @see "https://www.synacktiv.com/en/publications/persistent-php-payloads-in-pngs-how-to-inject-php-code-in-an-image-and-keep-it-there"
933     * @see "https://github.com/righettod/toolbox-pentest-web/tree/master/misc"
934     * @see "https://github.com/righettod/toolbox-pentest-web?tab=readme-ov-file#misc"
935     * @see "https://stackoverflow.com/a/13605411"
936     */
937    public static byte[] sanitizeFile(String inputFilePath, InputFileType inputFileType) throws Exception {
938        ByteArrayOutputStream sanitizedContent = new ByteArrayOutputStream();
939        File inputFile = new File(inputFilePath);
940        if (!inputFile.exists() || !inputFile.canRead() || !inputFile.isFile()) {
941            throw new IllegalArgumentException("Cannot read the content of the input file!");
942        }
943        switch (inputFileType) {
944            case PDF -> {
945                try (PDDocument document = Loader.loadPDF(inputFile)) {
946                    document.save(sanitizedContent);
947                }
948            }
949            case IMAGE -> {
950                // Load the original image
951                BufferedImage originalImage = ImageIO.read(inputFile);
952                String originalFormat = identifyMimeType(Files.readAllBytes(inputFile.toPath())).split("/")[1].trim();
953                // Check that image has been successfully loaded
954                if (originalImage == null) {
955                    throw new IOException("Cannot load the original image !");
956                }
957                // Get current Width and Height of the image
958                int originalWidth = originalImage.getWidth(null);
959                int originalHeight = originalImage.getHeight(null);
960                // Resize the image by removing 1px on Width and Height
961                Image resizedImage = originalImage.getScaledInstance(originalWidth - 1, originalHeight - 1, Image.SCALE_SMOOTH);
962                // Resize the resized image by adding 1px on Width and Height - In fact set image to is initial size
963                Image initialSizedImage = resizedImage.getScaledInstance(originalWidth, originalHeight, Image.SCALE_SMOOTH);
964                // Save image to a bytes buffer
965                int bufferedImageType = BufferedImage.TYPE_INT_ARGB;//By default use a format supporting transparency
966                if ("jpeg".equalsIgnoreCase(originalFormat) || "bmp".equalsIgnoreCase(originalFormat)) {
967                    bufferedImageType = BufferedImage.TYPE_INT_RGB;
968                }
969                BufferedImage sanitizedImage = new BufferedImage(initialSizedImage.getWidth(null), initialSizedImage.getHeight(null), bufferedImageType);
970                Graphics2D drawer = sanitizedImage.createGraphics();
971                drawer.drawImage(initialSizedImage, 0, 0, null);
972                drawer.dispose();
973                ImageIO.write(sanitizedImage, originalFormat, sanitizedContent);
974            }
975            default -> throw new IllegalArgumentException("Type of file not supported !");
976        }
977        if (sanitizedContent.size() == 0) {
978            throw new IOException("An error occur during the rewrite operation!");
979        }
980        return sanitizedContent.toByteArray();
981    }
982
983    /**
984     * Apply a collection of validations on a string expected to be an email address:<br>
985     * - Is a valid email address, from a parser perspective, following RFCs on email addresses.<br>
986     * - Is not using "Encoded-word" format.<br>
987     * - Is not using comment format.<br>
988     * - Is not using "Punycode" format.<br>
989     * - Is not using UUCP style addresses.<br>
990     * - Is not using address literals.<br>
991     * - Is not using source routes.<br>
992     * - Is not using the "percent hack".<br><br>
993     * This is based on the research work from <a href="https://portswigger.net/research/gareth-heyes">Gareth Heyes</a> added in references (Portswigger).<br><br>
994     *
995     * <b>Note:</b> The notion of valid, here, is to take from a secure usage of the data perspective.
996     *
997     * @param addr String expected to be a valid email address.
998     * @return True only if the string pass all validations.
999     * @see "https://commons.apache.org/proper/commons-validator/"
1000     * @see "https://commons.apache.org/proper/commons-validator/apidocs/org/apache/commons/validator/routines/EmailValidator.html"
1001     * @see "https://datatracker.ietf.org/doc/html/rfc2047#section-2"
1002     * @see "https://portswigger.net/research/splitting-the-email-atom"
1003     * @see "https://www.jochentopf.com/email/address.html"
1004     * @see "https://en.wikipedia.org/wiki/Email_address"
1005     */
1006    public static boolean isEmailAddress(String addr) {
1007        boolean isValid = false;
1008        String work = addr.toLowerCase(Locale.ROOT);
1009        Pattern encodedWordRegex = Pattern.compile("[=?]+", Pattern.CASE_INSENSITIVE);
1010        Pattern forbiddenCharacterRegex = Pattern.compile("[():!%\\[\\],;]+", Pattern.CASE_INSENSITIVE);
1011        try {
1012            //Start with the use of the dedicated EmailValidator from Apache Commons Validator
1013            if (EmailValidator.getInstance(true, true).isValid(work)) {
1014                //If OK then validate it does not contains "Encoded-word" patterns using an aggressive approach
1015                if (!encodedWordRegex.matcher(work).find()) {
1016                    //If OK then validate it does not contains punycode
1017                    if (!work.contains("xn--")) {
1018                        //If OK then validate it does not use:
1019                        // UUCP style addresses,
1020                        // Comment format,
1021                        // Address literals,
1022                        // Source routes,
1023                        // The percent hack.
1024                        if (!forbiddenCharacterRegex.matcher(work).find()) {
1025                            isValid = true;
1026                        }
1027                    }
1028                }
1029            }
1030        } catch (Exception e) {
1031            isValid = false;
1032        }
1033        return isValid;
1034    }
1035
1036    /**
1037     * The <a href="https://www.stet.eu/en/psd2/">PSD2 STET</a> specification require to use <a href="https://datatracker.ietf.org/doc/draft-cavage-http-signatures/">HTTP Signature</a>.
1038     * <br>
1039     * Section <b>3.5.1.2</b> of the document <a href="https://www.stet.eu/assets/files/PSD2/1-6-3/api-dsp2-stet-v1.6.3.1-part-1-framework.pdf">Documentation Framework</a> version <b>1.6.3</b>.
1040     * <br>
1041     * The problem is that, by design, the HTTP Signature specification is prone to blind SSRF.
1042     * <br>
1043     * URL example taken from the STET specification: <code>https://path.to/myQsealCertificate_714f8154ec259ac40b8a9786c9908488b2582b68b17e865fede4636d726b709f</code>.
1044     * <br>
1045     * The objective of this code is to try to decrease the "exploitability/interest" of this SSRF for an attacker.
1046     *
1047     * @param certificateUrl Url pointing to a Qualified Certificate (QSealC) encoded in PEM format and respecting the ETSI/TS119495 technical Specification .
1048     * @return TRUE only if the url point to a Qualified Certificate in PEM format.
1049     * @see "https://www.stet.eu/en/psd2/"
1050     * @see "https://www.stet.eu/assets/files/PSD2/1-6-3/api-dsp2-stet-v1.6.3.1-part-1-framework.pdf"
1051     * @see "https://datatracker.ietf.org/doc/draft-cavage-http-signatures/"
1052     * @see "https://datatracker.ietf.org/doc/rfc9421/"
1053     * @see "https://openjdk.org/groups/net/httpclient/intro.html"
1054     * @see "https://docs.oracle.com/en/java/javase/21/docs/api/java.net.http/java/net/http/package-summary.html"
1055     * @see "https://portswigger.net/web-security/ssrf"
1056     * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control"
1057     */
1058    public static boolean isPSD2StetSafeCertificateURL(String certificateUrl) {
1059        boolean isValid = false;
1060        long connectionTimeoutInSeconds = 10;
1061        String userAgent = "PSD2-STET-HTTPSignature-CertificateRequest";
1062        try {
1063            //1. Ensure that the URL end with the SHA-256 fingerprint encoded in HEX of the certificate like requested by STET
1064            if (certificateUrl != null && certificateUrl.lastIndexOf("_") != -1) {
1065                String digestPart = certificateUrl.substring(certificateUrl.lastIndexOf("_") + 1);
1066                if (Pattern.matches("^[0-9a-f]{64}$", digestPart)) {
1067                    //2. Ensure that the URL is a valid url by creating a instance of the class URI
1068                    URI uri = URI.create(certificateUrl);
1069                    //3. Require usage of HTTPS and reject any url containing query parameters
1070                    if ("https".equalsIgnoreCase(uri.getScheme()) && uri.getQuery() == null) {
1071                        //4. Perform a HTTP HEAD request in order to get the content type of the remote resource
1072                        //and limit the interest to use the SSRF because to pass the check the url need to:
1073                        //- Do not having any query parameters.
1074                        //- Use HTTPS protocol.
1075                        //- End with a string having the format "_[0-9a-f]{64}".
1076                        //- Trigger the malicious action that the attacker want but with a HTTP HEAD without any redirect and parameters.
1077                        HttpResponse<String> response;
1078                        try (HttpClient client = HttpClient.newBuilder().followRedirects(HttpClient.Redirect.NEVER).build()) {
1079                            HttpRequest request = HttpRequest.newBuilder()
1080                                    .uri(uri)
1081                                    .timeout(Duration.ofSeconds(connectionTimeoutInSeconds))
1082                                    .method("HEAD", HttpRequest.BodyPublishers.noBody())
1083                                    .header("User-Agent", userAgent)//To provide an hint to the target about the initiator of the request
1084                                    .header("Cache-Control", "no-store, max-age=0")//To prevent caching issues or abuses
1085                                    .build();
1086                            response = client.send(request, HttpResponse.BodyHandlers.ofString());
1087                            if (response.statusCode() == 200) {
1088                                //5. Ensure that the response content type is "text/plain"
1089                                Optional<String> contentType = response.headers().firstValue("Content-Type");
1090                                isValid = (contentType.isPresent() && contentType.get().trim().toLowerCase(Locale.ENGLISH).startsWith("text/plain"));
1091                            }
1092                        }
1093                    }
1094                }
1095            }
1096        } catch (Exception e) {
1097            isValid = false;
1098        }
1099        return isValid;
1100    }
1101
1102
1103}