001package eu.righettod; 002 003 004import org.apache.commons.csv.CSVFormat; 005import org.apache.commons.csv.CSVRecord; 006import org.apache.commons.imaging.ImageInfo; 007import org.apache.commons.imaging.Imaging; 008import org.apache.commons.imaging.common.ImageMetadata; 009import org.apache.commons.validator.routines.EmailValidator; 010import org.apache.commons.validator.routines.InetAddressValidator; 011import org.apache.pdfbox.Loader; 012import org.apache.pdfbox.pdmodel.PDDocument; 013import org.apache.pdfbox.pdmodel.PDDocumentCatalog; 014import org.apache.pdfbox.pdmodel.PDDocumentInformation; 015import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary; 016import org.apache.pdfbox.pdmodel.common.PDMetadata; 017import org.apache.pdfbox.pdmodel.interactive.action.*; 018import org.apache.pdfbox.pdmodel.interactive.annotation.AnnotationFilter; 019import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; 020import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationLink; 021import org.apache.poi.poifs.filesystem.DirectoryEntry; 022import org.apache.poi.poifs.filesystem.POIFSFileSystem; 023import org.apache.poi.poifs.macros.VBAMacroReader; 024import org.apache.tika.detect.DefaultDetector; 025import org.apache.tika.detect.Detector; 026import org.apache.tika.io.TemporaryResources; 027import org.apache.tika.io.TikaInputStream; 028import org.apache.tika.metadata.Metadata; 029import org.apache.tika.mime.MediaType; 030import org.apache.tika.mime.MimeTypes; 031import org.w3c.dom.Document; 032import org.xml.sax.EntityResolver; 033import org.xml.sax.InputSource; 034import org.xml.sax.SAXException; 035 036import javax.crypto.Mac; 037import javax.crypto.spec.SecretKeySpec; 038import javax.imageio.ImageIO; 039import javax.json.Json; 040import javax.json.JsonReader; 041import javax.xml.XMLConstants; 042import javax.xml.parsers.DocumentBuilder; 043import javax.xml.parsers.DocumentBuilderFactory; 044import javax.xml.parsers.ParserConfigurationException; 045import javax.xml.stream.XMLInputFactory; 046import javax.xml.stream.XMLStreamReader; 047import javax.xml.stream.events.XMLEvent; 048import java.awt.*; 049import java.awt.image.BufferedImage; 050import java.io.*; 051import java.net.*; 052import java.net.http.HttpClient; 053import java.net.http.HttpRequest; 054import java.net.http.HttpResponse; 055import java.nio.charset.Charset; 056import java.nio.charset.StandardCharsets; 057import java.nio.file.Files; 058import java.security.MessageDigest; 059import java.time.Duration; 060import java.util.List; 061import java.util.*; 062import java.util.concurrent.atomic.AtomicInteger; 063import java.util.regex.Pattern; 064import java.util.zip.ZipEntry; 065import java.util.zip.ZipFile; 066 067/** 068 * Provides different utilities methods to apply processing from a security perspective.<br> 069 * These code snippet: 070 * <ul> 071 * <li>Can be used, as "foundation", to customize the validation to the app context.</li> 072 * <li>Were implemented in a way to facilitate adding or removal of validations depending on usage context.</li> 073 * <li>Were centralized on one class to be able to enhance them across time as well as <a href="https://github.com/righettod/code-snippets-security-utils/issues">missing case/bug identification</a>.</li> 074 * </ul> 075 */ 076public class SecurityUtils { 077 078 /** 079 * Default constructor: Not needed as the class only provides static methods. 080 */ 081 private SecurityUtils() { 082 } 083 084 /** 085 * Apply a collection of validation to verify if a provided PIN code is considered weak (easy to guess) or none.<br> 086 * This method consider that format of the PIN code is [0-9]{6,}<br> 087 * Rule to consider a PIN code as weak: 088 * <ul> 089 * <li>Length is inferior to 6 positions.</li> 090 * <li>Contain only the same number or only a sequence of zero.</li> 091 * <li>Contain sequence of following incremental or decremental numbers.</li> 092 * </ul> 093 * 094 * @param pinCode PIN code to verify. 095 * @return True only if the PIN is considered as weak. 096 */ 097 public static boolean isWeakPINCode(String pinCode) { 098 boolean isWeak = true; 099 //Length is inferior to 6 positions 100 //Use "Long.parseLong(pinCode)" to cause a NumberFormatException if the PIN is not a numeric one 101 //and to ensure that the PIN is not only a sequence of zero 102 if (pinCode != null && Long.parseLong(pinCode) > 0 && pinCode.trim().length() > 5) { 103 //Contain only the same number 104 String regex = String.format("^[%s]{%s}$", pinCode.charAt(0), pinCode.length()); 105 if (!Pattern.matches(regex, pinCode)) { 106 //Contain sequence of following incremental or decremental numbers 107 char previousChar = 'X'; 108 boolean containSequence = false; 109 for (char c : pinCode.toCharArray()) { 110 if (previousChar != 'X') { 111 int previousNbr = Integer.parseInt(String.valueOf(previousChar)); 112 int currentNbr = Integer.parseInt(String.valueOf(c)); 113 if (currentNbr == (previousNbr - 1) || currentNbr == (previousNbr + 1)) { 114 containSequence = true; 115 break; 116 } 117 } 118 previousChar = c; 119 } 120 if (!containSequence) { 121 isWeak = false; 122 } 123 } 124 } 125 return isWeak; 126 } 127 128 /** 129 * Apply a collection of validations on a Word 97-2003 (binary format) document file provided: 130 * <ul> 131 * <li>Real Microsoft Word 97-2003 document file.</li> 132 * <li>No VBA Macro.<br></li> 133 * <li>No embedded objects.</li> 134 * </ul> 135 * 136 * @param wordFilePath Filename of the Word document file to check. 137 * @return True only if the file pass all validations. 138 * @see "https://poi.apache.org/components/" 139 * @see "https://poi.apache.org/components/document/" 140 * @see "https://poi.apache.org/components/poifs/how-to.html" 141 * @see "https://poi.apache.org/components/poifs/embeded.html" 142 * @see "https://poi.apache.org/" 143 * @see "https://mvnrepository.com/artifact/org.apache.poi/poi" 144 */ 145 public static boolean isWord972003DocumentSafe(String wordFilePath) { 146 boolean isSafe = false; 147 try { 148 File wordFile = new File(wordFilePath); 149 if (wordFile.exists() && wordFile.canRead() && wordFile.isFile()) { 150 //Step 1: Try to load the file, if its fail then it imply that is not a valid Word 97-2003 format file 151 try (POIFSFileSystem fs = new POIFSFileSystem(wordFile)) { 152 //Step 2: Check if the document contains VBA macros, in our case is not allowed 153 VBAMacroReader macroReader = new VBAMacroReader(fs); 154 Map<String, String> macros = macroReader.readMacros(); 155 if (macros == null || macros.isEmpty()) { 156 //Step 3: Check if the document contains any embedded objects, in our case is not allowed 157 //From POI documentation: 158 //Word normally stores embedded files in subdirectories of the ObjectPool directory, itself a subdirectory of the filesystem root. 159 //Typically, these subdirectories and named starting with an underscore, followed by 10 numbers. 160 final List<String> embeddedObjectFound = new ArrayList<>(); 161 DirectoryEntry root = fs.getRoot(); 162 if (root.getEntryCount() > 0) { 163 root.iterator().forEachRemaining(entry -> { 164 if ("ObjectPool".equalsIgnoreCase(entry.getName()) && entry instanceof DirectoryEntry) { 165 DirectoryEntry objPoolDirectory = (DirectoryEntry) entry; 166 if (objPoolDirectory.getEntryCount() > 0) { 167 objPoolDirectory.iterator().forEachRemaining(objPoolDirectoryEntry -> { 168 if (objPoolDirectoryEntry instanceof DirectoryEntry) { 169 DirectoryEntry objPoolDirectoryEntrySubDirectoryEntry = (DirectoryEntry) objPoolDirectoryEntry; 170 if (objPoolDirectoryEntrySubDirectoryEntry.getEntryCount() > 0) { 171 objPoolDirectoryEntrySubDirectoryEntry.forEach(objPoolDirectoryEntrySubDirectoryEntryEntry -> { 172 if (objPoolDirectoryEntrySubDirectoryEntryEntry.isDocumentEntry()) { 173 embeddedObjectFound.add(objPoolDirectoryEntrySubDirectoryEntryEntry.getName()); 174 } 175 }); 176 } 177 } 178 }); 179 } 180 } 181 }); 182 } 183 isSafe = embeddedObjectFound.isEmpty(); 184 } 185 } 186 } 187 } catch (Exception e) { 188 isSafe = false; 189 } 190 return isSafe; 191 } 192 193 /** 194 * Ensure that an XML file does not contain any External Entity, DTD or XInclude instructions. 195 * 196 * @param xmlFilePath Filename of the XML file to check. 197 * @return True only if the file pass all validations. 198 * @see "https://portswigger.net/web-security/xxe" 199 * @see "https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html#java" 200 * @see "https://docs.oracle.com/en/java/javase/13/security/java-api-xml-processing-jaxp-security-guide.html#GUID-82F8C206-F2DF-4204-9544-F96155B1D258" 201 * @see "https://www.w3.org/TR/xinclude-11/" 202 * @see "https://en.wikipedia.org/wiki/XInclude" 203 */ 204 public static boolean isXMLSafe(String xmlFilePath) { 205 boolean isSafe = false; 206 try { 207 File xmlFile = new File(xmlFilePath); 208 if (xmlFile.exists() && xmlFile.canRead() && xmlFile.isFile()) { 209 //Step 1a: Verify that the XML file content does not contain any XInclude instructions 210 boolean containXInclude = Files.readAllLines(xmlFile.toPath()).stream().anyMatch(line -> line.toLowerCase(Locale.ROOT).contains(":include ")); 211 if (!containXInclude) { 212 //Step 1b: Parse the XML file, if an exception occur than it's imply that the XML specified is not a valid ones 213 //Create an XML document builder throwing Exception if a DOCTYPE instruction is present 214 DocumentBuilderFactory dbfInstance = DocumentBuilderFactory.newInstance(); 215 dbfInstance.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); 216 //Xerces 2 only 217 //dbfInstance.setFeature("http://xerces.apache.org/xerces2-j/features.html#disallow-doctype-decl",true); 218 dbfInstance.setXIncludeAware(false); 219 DocumentBuilder builder = dbfInstance.newDocumentBuilder(); 220 //Parse the document 221 Document doc = builder.parse(xmlFile); 222 isSafe = (doc != null && doc.getDocumentElement() != null); 223 } 224 } 225 } catch (Exception e) { 226 isSafe = false; 227 } 228 return isSafe; 229 } 230 231 232 /** 233 * Extract all URL links from a PDF file provided.<br> 234 * This can be used to apply validation on a PDF against contained links. 235 * 236 * @param pdfFilePath pdfFilePath Filename of the PDF file to process. 237 * @return A List of URL objects that is empty if no links is found. 238 * @throws Exception If any error occurs during the processing of the PDF file. 239 * @see "https://www.gushiciku.cn/pl/21KQ" 240 * @see "https://pdfbox.apache.org/" 241 * @see "https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox" 242 */ 243 public static List<URL> extractAllPDFLinks(String pdfFilePath) throws Exception { 244 final List<URL> links = new ArrayList<>(); 245 File pdfFile = new File(pdfFilePath); 246 try (PDDocument document = Loader.loadPDF(pdfFile)) { 247 PDDocumentCatalog documentCatalog = document.getDocumentCatalog(); 248 AnnotationFilter actionURIAnnotationFilter = new AnnotationFilter() { 249 @Override 250 public boolean accept(PDAnnotation annotation) { 251 boolean keep = false; 252 if (annotation instanceof PDAnnotationLink) { 253 keep = (((PDAnnotationLink) annotation).getAction() instanceof PDActionURI); 254 } 255 return keep; 256 } 257 }; 258 documentCatalog.getPages().forEach(page -> { 259 try { 260 page.getAnnotations(actionURIAnnotationFilter).forEach(annotation -> { 261 PDActionURI linkAnnotation = (PDActionURI) ((PDAnnotationLink) annotation).getAction(); 262 try { 263 URL urlObj = new URL(linkAnnotation.getURI()); 264 if (!links.contains(urlObj)) { 265 links.add(urlObj); 266 } 267 } catch (MalformedURLException e) { 268 throw new RuntimeException(e); 269 } 270 }); 271 } catch (Exception e) { 272 throw new RuntimeException(e); 273 } 274 }); 275 } 276 return links; 277 } 278 279 /** 280 * Apply a collection of validations on a PDF file provided: 281 * <ul> 282 * <li>Real PDF file.</li> 283 * <li>No attachments.</li> 284 * <li>No Javascript code.</li> 285 * <li>No links using action of type URI/Launch/RemoteGoTo/ImportData.</li> 286 * </ul> 287 * 288 * @param pdfFilePath Filename of the PDF file to check. 289 * @return True only if the file pass all validations. 290 * @see "https://stackoverflow.com/a/36161267" 291 * @see "https://www.gushiciku.cn/pl/21KQ" 292 * @see "https://github.com/jonaslejon/malicious-pdf" 293 * @see "https://pdfbox.apache.org/" 294 * @see "https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox" 295 */ 296 public static boolean isPDFSafe(String pdfFilePath) { 297 boolean isSafe = false; 298 try { 299 File pdfFile = new File(pdfFilePath); 300 if (pdfFile.exists() && pdfFile.canRead() && pdfFile.isFile()) { 301 //Step 1: Try to load the file, if its fail then it imply that is not a valid PDF file 302 try (PDDocument document = Loader.loadPDF(pdfFile)) { 303 //Step 2: Check if the file contains attached files, in our case is not allowed 304 PDDocumentCatalog documentCatalog = document.getDocumentCatalog(); 305 PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary(documentCatalog); 306 if (namesDictionary.getEmbeddedFiles() == null) { 307 //Step 3: Check if the file contains Javascript code, in our case is not allowed 308 if (namesDictionary.getJavaScript() == null) { 309 //Step 4: Check if the file contains links using action of type URI/Launch/RemoteGoTo/ImportData, in our case is not allowed 310 final List<Integer> notAllowedAnnotationCounterList = new ArrayList<>(); 311 AnnotationFilter notAllowedAnnotationFilter = new AnnotationFilter() { 312 @Override 313 public boolean accept(PDAnnotation annotation) { 314 boolean keep = false; 315 if (annotation instanceof PDAnnotationLink) { 316 PDAnnotationLink link = (PDAnnotationLink) annotation; 317 PDAction action = link.getAction(); 318 if ((action instanceof PDActionURI) || (action instanceof PDActionLaunch) || (action instanceof PDActionRemoteGoTo) || (action instanceof PDActionImportData)) { 319 keep = true; 320 } 321 } 322 return keep; 323 } 324 }; 325 documentCatalog.getPages().forEach(page -> { 326 try { 327 notAllowedAnnotationCounterList.add(page.getAnnotations(notAllowedAnnotationFilter).size()); 328 } catch (IOException e) { 329 throw new RuntimeException(e); 330 } 331 }); 332 if (notAllowedAnnotationCounterList.stream().reduce(0, Integer::sum) == 0) { 333 isSafe = true; 334 } 335 } 336 } 337 } 338 } 339 } catch (Exception e) { 340 isSafe = false; 341 } 342 return isSafe; 343 } 344 345 /** 346 * Remove as much as possible metadata from the provided PDF document object. 347 * 348 * @param document PDFBox PDF document object on which metadata must be removed. 349 * @see "https://gist.github.com/righettod/d7e07443c43d393a39de741a0d920069" 350 * @see "https://pdfbox.apache.org/" 351 * @see "https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox" 352 */ 353 public static void clearPDFMetadata(PDDocument document) { 354 if (document != null) { 355 PDDocumentInformation infoEmpty = new PDDocumentInformation(); 356 document.setDocumentInformation(infoEmpty); 357 PDMetadata newMetadataEmpty = new PDMetadata(document); 358 document.getDocumentCatalog().setMetadata(newMetadataEmpty); 359 } 360 } 361 362 363 /** 364 * Validate that the URL provided is really a relative URL. 365 * 366 * @param targetUrl URL to validate. 367 * @return True only if the file pass all validations. 368 * @see "https://portswigger.net/web-security/ssrf" 369 * @see "https://stackoverflow.com/q/6785442" 370 */ 371 public static boolean isRelativeURL(String targetUrl) { 372 boolean isValid = false; 373 //Reject any URL encoded content and URL starting with a double slash 374 //Reject any URL contains credentials or fragment to prevent potential bypasses 375 String work = targetUrl; 376 if (!work.contains("%") && !work.contains("@") && !work.contains("#") && !work.startsWith("//")) { 377 //Creation of a URL object must fail 378 try { 379 new URL(work); 380 isValid = false; 381 } catch (MalformedURLException mf) { 382 //Last check to be sure (for prod usage compile the pattern one time) 383 isValid = Pattern.compile("^/[a-z0-9]+", Pattern.CASE_INSENSITIVE).matcher(work).find(); 384 } 385 } 386 return isValid; 387 } 388 389 /** 390 * Apply a collection of validations on a ZIP file provided: 391 * <ul> 392 * <li>Real ZIP file.</li> 393 * <li>Contain less than a specified level of deepness.</li> 394 * <li>Do not contain Zip-Slip entry path.</li> 395 * </ul> 396 * 397 * @param zipFilePath Filename of the ZIP file to check. 398 * @param maxLevelDeepness Threshold of deepness above which a ZIP archive will be rejected. 399 * @param rejectArchiveFile Flag to specify if presence of any archive entry will cause the rejection of the ZIP file. 400 * @return True only if the file pass all validations. 401 * @see "https://rules.sonarsource.com/java/type/Security%20Hotspot/RSPEC-5042" 402 * @see "https://security.snyk.io/research/zip-slip-vulnerability" 403 * @see "https://en.wikipedia.org/wiki/Zip_bomb" 404 * @see "https://github.com/ptoomey3/evilarc" 405 * @see "https://github.com/abdulfatir/ZipBomb" 406 * @see "https://www.baeldung.com/cs/zip-bomb" 407 * @see "https://thesecurityvault.com/attacks-with-zip-files-and-mitigations/" 408 * @see "https://wiki.sei.cmu.edu/confluence/display/java/IDS04-J.+Safely+extract+files+from+ZipInputStream" 409 */ 410 public static boolean isZIPSafe(String zipFilePath, int maxLevelDeepness, boolean rejectArchiveFile) { 411 List<String> archiveExtensions = Arrays.asList("zip", "tar", "7z", "gz", "jar", "phar", "bz2", "tgz"); 412 boolean isSafe = false; 413 try { 414 File zipFile = new File(zipFilePath); 415 if (zipFile.exists() && zipFile.canRead() && zipFile.isFile() && maxLevelDeepness > 0) { 416 //Step 1: Try to load the file, if its fail then it imply that is not a valid ZIP file 417 try (ZipFile zipArch = new ZipFile(zipFile)) { 418 //Step 2: Parse entries 419 long deepness = 0; 420 ZipEntry zipEntry; 421 String entryExtension; 422 String zipEntryName; 423 boolean validationsFailed = false; 424 Enumeration<? extends ZipEntry> entries = zipArch.entries(); 425 while (entries.hasMoreElements()) { 426 zipEntry = entries.nextElement(); 427 zipEntryName = zipEntry.getName(); 428 entryExtension = zipEntryName.substring(zipEntryName.lastIndexOf(".") + 1).toLowerCase(Locale.ROOT).trim(); 429 //Step 2a: Check if the current entry is an archive file 430 if (rejectArchiveFile && archiveExtensions.contains(entryExtension)) { 431 validationsFailed = true; 432 break; 433 } 434 //Step 2b: Check that level of deepness is inferior to the threshold specified 435 if (zipEntryName.contains("/")) { 436 //Determine deepness by inspecting the entry name. 437 //Indeed, folder will be represented like this: folder/folder/folder/ 438 //So we can count the number of "/" to identify the deepness of the entry 439 deepness = zipEntryName.chars().filter(ch -> ch == '/').count(); 440 if (deepness > maxLevelDeepness) { 441 validationsFailed = true; 442 break; 443 } 444 } 445 //Step 2c: Check if any entries match pattern of zip slip payload 446 if (zipEntryName.contains("..\\") || zipEntryName.contains("../")) { 447 validationsFailed = true; 448 break; 449 } 450 } 451 if (!validationsFailed) { 452 isSafe = true; 453 } 454 } 455 } 456 } catch (Exception e) { 457 isSafe = false; 458 } 459 return isSafe; 460 } 461 462 /** 463 * Identify the mime type of the content specified (array of bytes).<br> 464 * Note that it cannot be fully trusted (see the tweet '1595824709186519041' referenced), so, additional validations are required. 465 * 466 * @param content The content as an array of bytes. 467 * @return The mime type in lower case or null if it cannot be identified. 468 * @see "https://twitter.com/righettod/status/1595824709186519041" 469 * @see "https://tika.apache.org/" 470 * @see "https://mvnrepository.com/artifact/org.apache.tika/tika-core" 471 * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types" 472 * @see "https://www.iana.org/assignments/media-types/media-types.xhtml" 473 */ 474 public static String identifyMimeType(byte[] content) { 475 String mimeType = null; 476 if (content != null && content.length > 0) { 477 Detector detector = new DefaultDetector(MimeTypes.getDefaultMimeTypes()); 478 Metadata metadata = new Metadata(); 479 try { 480 try (TemporaryResources temporaryResources = new TemporaryResources(); TikaInputStream tikaInputStream = TikaInputStream.get(new ByteArrayInputStream(content), temporaryResources, metadata)) { 481 MediaType mt = detector.detect(tikaInputStream, metadata); 482 if (mt != null) { 483 mimeType = mt.toString().toLowerCase(Locale.ROOT); 484 } 485 } 486 } catch (IOException ioe) { 487 mimeType = null; 488 } 489 } 490 return mimeType; 491 } 492 493 /** 494 * Apply a collection of validations on a string expected to be an public IP address: 495 * <ul> 496 * <li>Is a valid IP v4 or v6 address.</li> 497 * <li>Is public from an Internet perspective.</li> 498 * </ul> 499 * <br> 500 * <b>Note:</b> I often see missing such validation in the value read from HTTP request headers like "X-Forwarded-For" or "Forwarded". 501 * <br><br> 502 * <b>Note for IPv6:</b> I used documentation found so it is really experimental! 503 * 504 * @param ip String expected to be a valid IP address. 505 * @return True only if the string pass all validations. 506 * @see "https://commons.apache.org/proper/commons-validator/" 507 * @see "https://commons.apache.org/proper/commons-validator/apidocs/org/apache/commons/validator/routines/InetAddressValidator.html" 508 * @see "https://cheatsheetseries.owasp.org/cheatsheets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet.html" 509 * @see "https://cheatsheetseries.owasp.org/assets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet_Orange_Tsai_Talk.pdf" 510 * @see "https://cheatsheetseries.owasp.org/assets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet_SSRF_Bible.pdf" 511 * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For" 512 * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Forwarded" 513 * @see "https://ipcisco.com/lesson/ipv6-address/" 514 * @see "https://www.juniper.net/documentation/us/en/software/junos/interfaces-security-devices/topics/topic-map/security-interface-ipv4-ipv6-protocol.html" 515 * @see "https://docs.oracle.com/en/java/javase/21/docs/api/java.base/java/net/InetAddress.html#getByName(java.lang.String)" 516 * @see "https://www.arin.net/reference/research/statistics/address_filters/" 517 * @see "https://en.wikipedia.org/wiki/Multicast_address" 518 * @see "https://stackoverflow.com/a/5619409" 519 * @see "https://www.ripe.net/media/documents/ipv6-address-types.pdf" 520 * @see "https://www.iana.org/assignments/ipv6-unicast-address-assignments/ipv6-unicast-address-assignments.xhtml" 521 * @see "https://developer.android.com/reference/java/net/Inet6Address" 522 * @see "https://en.wikipedia.org/wiki/Unique_local_address" 523 */ 524 public static boolean isPublicIPAddress(String ip) { 525 boolean isValid = false; 526 try { 527 //Quick validation on the string itself based on characters used to compose an IP v4/v6 address 528 if (Pattern.matches("[0-9a-fA-F:.]+", ip)) { 529 //If OK then use the dedicated InetAddressValidator from Apache Commons Validator 530 if (InetAddressValidator.getInstance().isValid(ip)) { 531 //If OK then validate that is an public IP address 532 //From Javadoc for "InetAddress.getByName": If a literal IP address is supplied, only the validity of the address format is checked. 533 InetAddress addr = InetAddress.getByName(ip); 534 isValid = (!addr.isAnyLocalAddress() && !addr.isLinkLocalAddress() && !addr.isLoopbackAddress() && !addr.isMulticastAddress() && !addr.isSiteLocalAddress()); 535 //If OK and the IP is an V6 one then make additional validation because the built-in Java API will let pass some V6 IP 536 //For the prefix map, the start of the key indicates if the value is a regex or a string 537 if (isValid && (addr instanceof Inet6Address)) { 538 Map<String, String> prefixes = new HashMap<>(); 539 prefixes.put("REGEX_LOOPBACK", "^(0|:)+1$"); 540 prefixes.put("REGEX_UNIQUE-LOCAL-ADDRESSES", "^f(c|d)[a-f0-9]{2}:.*$"); 541 prefixes.put("STRING_LINK-LOCAL-ADDRESSES", "fe80:"); 542 prefixes.put("REGEX_TEREDO", "^2001:[0]*:.*$"); 543 prefixes.put("REGEX_BENCHMARKING", "^2001:[0]*2:.*$"); 544 prefixes.put("REGEX_ORCHID", "^2001:[0]*10:.*$"); 545 prefixes.put("STRING_DOCUMENTATION", "2001:db8:"); 546 prefixes.put("STRING_GLOBAL-UNICAST", "2000:"); 547 prefixes.put("REGEX_MULTICAST", "^ff[0-9]{2}:.*$"); 548 final List<Boolean> results = new ArrayList<>(); 549 final String ipLower = ip.trim().toLowerCase(Locale.ROOT); 550 prefixes.forEach((addressType, expr) -> { 551 String exprLower = expr.trim().toLowerCase(); 552 if (addressType.startsWith("STRING_")) { 553 results.add(ipLower.startsWith(exprLower)); 554 } else { 555 results.add(Pattern.matches(exprLower, ipLower)); 556 } 557 }); 558 isValid = ((results.size() == prefixes.size()) && !results.contains(Boolean.TRUE)); 559 } 560 } 561 } 562 } catch (Exception e) { 563 isValid = false; 564 } 565 return isValid; 566 } 567 568 /** 569 * Compute a SHA256 hash from an input composed of a collection of strings.<br><br> 570 * This method take care to build the source string in a way to prevent this source string to be prone to abuse targeting the different parts composing it.<br><br> 571 * <p> 572 * Example of possible abuse without precautions applied during the hash calculation logic:<br> 573 * Hash of <code>SHA256("Hello", "My", "World!!!")</code> will be equals to the hash of <code>SHA256("Hell", "oMyW", "orld!!!")</code>.<br> 574 * </p> 575 * This method ensure that both hash above will be different.<br><br> 576 * 577 * <b>Note:</b> The character <code>|</code> is used, as separator, of every parts so a part is not allowed to contains this character. 578 * 579 * @param parts Ordered list of strings to use to build the input string for which the hash must be computed on. No null value is accepted on object composing the collection. 580 * @return The hash, as an array of bytes, to allow caller to convert it to the final representation wanted (HEX, Base64, etc.). If the collection passed is null or empty then the method return null. 581 * @throws Exception If any exception occurs 582 * @see "https://github.com/righettod/code-snippets-security-utils/issues/16" 583 * @see "https://pentesterlab.com/badges/codereview" 584 * @see "https://blog.trailofbits.com/2024/08/21/yolo-is-not-a-valid-hash-construction/" 585 * @see "https://www.nist.gov/publications/sha-3-derived-functions-cshake-kmac-tuplehash-and-parallelhash" 586 */ 587 public static byte[] computeHashNoProneToAbuseOnParts(List<String> parts) throws Exception { 588 byte[] hash = null; 589 String separator = "|"; 590 if (parts != null && !parts.isEmpty()) { 591 //Ensure that not part is null 592 if (parts.stream().anyMatch(Objects::isNull)) { 593 throw new IllegalArgumentException("No part must be null!"); 594 } 595 //Ensure that the separator is absent from every part 596 if (parts.stream().anyMatch(part -> part.contains(separator))) { 597 throw new IllegalArgumentException(String.format("The character '%s', used as parts separator, must be absent from every parts!", separator)); 598 } 599 MessageDigest digest = MessageDigest.getInstance("SHA-256"); 600 final StringBuilder buffer = new StringBuilder(separator); 601 parts.forEach(p -> { 602 buffer.append(p).append(separator); 603 }); 604 hash = digest.digest(buffer.toString().getBytes(StandardCharsets.UTF_8)); 605 } 606 return hash; 607 } 608 609 /** 610 * Ensure that an XML file only uses DTD/XSD references (called System Identifier) present in the allowed list provided.<br><br> 611 * The code is based on the validation implemented into the OpenJDK 21, by the class <b><a href="https://github.com/openjdk/jdk/blob/jdk-21%2B35/src/java.prefs/share/classes/java/util/prefs/XmlSupport.java">java.util.prefs.XmlSupport</a></b>, in the method <b><a href="https://github.com/openjdk/jdk/blob/jdk-21%2B35/src/java.prefs/share/classes/java/util/prefs/XmlSupport.java#L240">loadPrefsDoc()</a></b>.<br><br> 612 * The method also ensure that no Public Identifier is used to prevent potential bypasses of the validations. 613 * 614 * @param xmlFilePath Filename of the XML file to check. 615 * @param allowedSystemIdentifiers List of URL allowed for System Identifier specified for any XSD/DTD references. 616 * @return True only if the file pass all validations. 617 * @see "https://www.w3schools.com/xml/prop_documenttype_systemid.asp" 618 * @see "https://www.ibm.com/docs/en/integration-bus/9.0.0?topic=doctypedecl-xml-systemid" 619 * @see "https://www.liquid-technologies.com/Reference/Glossary/XML_DocType.html" 620 * @see "https://www.xml.com/pub/98/08/xmlqna0.html" 621 * @see "https://github.com/openjdk/jdk/blob/jdk-21%2B35/src/java.prefs/share/classes/java/util/prefs/XmlSupport.java#L397" 622 * @see "https://en.wikipedia.org/wiki/Formal_Public_Identifier" 623 */ 624 public static boolean isXMLOnlyUseAllowedXSDorDTD(String xmlFilePath, final List<String> allowedSystemIdentifiers) { 625 boolean isSafe = false; 626 final String errorTemplate = "Non allowed %s ID detected!"; 627 final String emptyFakeDTD = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><!ELEMENT dummy EMPTY>"; 628 final String emptyFakeXSD = "<xs:schema xmlns:xs=\"http://www.w3.org/2001/XMLSchema\"> <xs:element name=\"dummy\"/></xs:schema>"; 629 630 if (allowedSystemIdentifiers == null || allowedSystemIdentifiers.isEmpty()) { 631 throw new IllegalArgumentException("At least one SID must be specified!"); 632 } 633 File xmlFile = new File(xmlFilePath); 634 if (xmlFile.exists() && xmlFile.canRead() && xmlFile.isFile()) { 635 try { 636 EntityResolver resolverValidator = (publicId, systemId) -> { 637 if (publicId != null) { 638 throw new SAXException(String.format(errorTemplate, "PUBLIC")); 639 } 640 if (!allowedSystemIdentifiers.contains(systemId)) { 641 throw new SAXException(String.format(errorTemplate, "SYSTEM")); 642 } 643 //If it is OK then return a empty DTD/XSD 644 return new InputSource(new StringReader(systemId.toLowerCase().endsWith(".dtd") ? emptyFakeDTD : emptyFakeXSD)); 645 }; 646 DocumentBuilderFactory dbfInstance = DocumentBuilderFactory.newInstance(); 647 dbfInstance.setIgnoringElementContentWhitespace(true); 648 dbfInstance.setXIncludeAware(false); 649 dbfInstance.setValidating(false); 650 dbfInstance.setCoalescing(true); 651 dbfInstance.setIgnoringComments(false); 652 DocumentBuilder builder = dbfInstance.newDocumentBuilder(); 653 builder.setEntityResolver(resolverValidator); 654 Document doc = builder.parse(xmlFile); 655 isSafe = (doc != null); 656 } catch (SAXException | IOException | ParserConfigurationException e) { 657 isSafe = false; 658 } 659 } 660 661 return isSafe; 662 } 663 664 /** 665 * Apply a collection of validations on a EXCEL CSV file provided (file was expected to be opened in Microsoft EXCEL): 666 * <ul> 667 * <li>Real CSV file.</li> 668 * <li>Do not contains any payload related to a CSV injections.</li> 669 * </ul> 670 * Ensure that, if Apache Commons CSV does not find any record then, the file will be considered as NOT safe (prevent potential bypasses).<br><br> 671 * <b>Note:</b> Record delimiter used is the <code>,</code> (comma) character. See the Apache Commons CSV reference provided for EXCEL.<br> 672 * 673 * @param csvFilePath Filename of the CSV file to check. 674 * @return True only if the file pass all validations. 675 * @see "https://commons.apache.org/proper/commons-csv/" 676 * @see "https://commons.apache.org/proper/commons-csv/apidocs/org/apache/commons/csv/CSVFormat.html#EXCEL" 677 * @see "https://www.we45.com/post/your-excel-sheets-are-not-safe-heres-how-to-beat-csv-injection" 678 * @see "https://www.whiteoaksecurity.com/blog/2020-4-23-csv-injection-whats-the-risk/" 679 * @see "https://book.hacktricks.xyz/pentesting-web/formula-csv-doc-latex-ghostscript-injection" 680 * @see "https://owasp.org/www-community/attacks/CSV_Injection" 681 * @see "https://payatu.com/blog/csv-injection-basic-to-exploit/" 682 * @see "https://cwe.mitre.org/data/definitions/1236.html" 683 */ 684 public static boolean isExcelCSVSafe(String csvFilePath) { 685 boolean isSafe; 686 final AtomicInteger recordCount = new AtomicInteger(); 687 final List<Character> payloadDetectionCharacters = List.of('=', '+', '@', '-', '\r', '\t'); 688 689 try { 690 final List<String> payloadsIdentified = new ArrayList<>(); 691 try (Reader in = new FileReader(csvFilePath)) { 692 Iterable<CSVRecord> records = CSVFormat.EXCEL.parse(in); 693 records.forEach(record -> { 694 record.forEach(recordValue -> { 695 if (recordValue != null && !recordValue.trim().isEmpty() && payloadDetectionCharacters.contains(recordValue.trim().charAt(0))) { 696 payloadsIdentified.add(recordValue); 697 } 698 recordCount.getAndIncrement(); 699 }); 700 }); 701 } 702 isSafe = (payloadsIdentified.isEmpty() && recordCount.get() > 0); 703 } catch (Exception e) { 704 isSafe = false; 705 } 706 707 return isSafe; 708 } 709 710 /** 711 * Provide a way to add an integrity marker (<a href="https://en.wikipedia.org/wiki/HMAC">HMAC</a>) to a serialized object serialized using the <a href="https://www.baeldung.com/java-serialization">java native system</a> (binary).<br> 712 * The goal is to provide <b>a temporary workaround</b> to try to prevent deserialization attacks and give time to move to a text-based serialization approach. 713 * 714 * @param processingMode Define the mode of processing i.e. protect or validate. ({@link eu.righettod.ProcessingMode}) 715 * @param input When the processing mode is "protect" than the expected input (string) is a java serialized object encoded in Base64 otherwise (processing mode is "validate") expected input is the output of this method when the "protect" mode was used. 716 * @param secret Secret to use to compute the SHA256 HMAC. 717 * @return A map with the following keys: <ul><li><b>PROCESSING_MODE</b>: Processing mode used to compute the result.</li><li><b>STATUS</b>: A boolean indicating if the processing was successful or not.</li><li><b>RESULT</b>: Always contains a string representing the protected serialized object in the format <code>[SERIALIZED_OBJECT_BASE64_ENCODED]:[SERIALIZED_OBJECT_HMAC_BASE64_ENCODED]</code>.</li></ul> 718 * @throws Exception If any exception occurs. 719 * @see "https://cheatsheetseries.owasp.org/cheatsheets/Deserialization_Cheat_Sheet.html" 720 * @see "https://owasp.org/www-project-top-ten/2017/A8_2017-Insecure_Deserialization" 721 * @see "https://portswigger.net/web-security/deserialization" 722 * @see "https://www.baeldung.com/java-serialization-approaches" 723 * @see "https://www.baeldung.com/java-serialization" 724 * @see "https://cryptobook.nakov.com/mac-and-key-derivation/hmac-and-key-derivation" 725 * @see "https://en.wikipedia.org/wiki/HMAC" 726 * @see "https://smattme.com/posts/how-to-generate-hmac-signature-in-java/" 727 */ 728 public static Map<String, Object> ensureSerializedObjectIntegrity(ProcessingMode processingMode, String input, byte[] secret) throws Exception { 729 Map<String, Object> results; 730 String resultFormatTemplate = "%s:%s"; 731 //Verify input provided to be consistent 732 if (processingMode == null) { 733 throw new IllegalArgumentException("The processing mode is mandatory!"); 734 } 735 if (input == null || input.trim().isEmpty()) { 736 throw new IllegalArgumentException("Input data is mandatory!"); 737 } 738 if (secret == null || secret.length == 0) { 739 throw new IllegalArgumentException("The HMAC secret is mandatory!"); 740 } 741 if (processingMode.equals(ProcessingMode.VALIDATE) && input.split(":").length != 2) { 742 throw new IllegalArgumentException("Input data provided is invalid for the processing mode specified!"); 743 } 744 //Processing 745 Base64.Decoder b64Decoder = Base64.getDecoder(); 746 Base64.Encoder b64Encoder = Base64.getEncoder(); 747 String hmacAlgorithm = "HmacSHA256"; 748 Mac mac = Mac.getInstance(hmacAlgorithm); 749 SecretKeySpec key = new SecretKeySpec(secret, hmacAlgorithm); 750 mac.init(key); 751 results = new HashMap<>(); 752 results.put("PROCESSING_MODE", processingMode.toString()); 753 switch (processingMode) { 754 case PROTECT -> { 755 byte[] objectBytes = b64Decoder.decode(input); 756 byte[] hmac = mac.doFinal(objectBytes); 757 String encodedHmac = b64Encoder.encodeToString(hmac); 758 results.put("STATUS", Boolean.TRUE); 759 results.put("RESULT", String.format(resultFormatTemplate, input, encodedHmac)); 760 } 761 case VALIDATE -> { 762 String[] parts = input.split(":"); 763 byte[] objectBytes = b64Decoder.decode(parts[0].trim()); 764 byte[] hmacProvided = b64Decoder.decode(parts[1].trim()); 765 byte[] hmacComputed = mac.doFinal(objectBytes); 766 String encodedHmacComputed = b64Encoder.encodeToString(hmacComputed); 767 Boolean hmacIsValid = Arrays.equals(hmacProvided, hmacComputed); 768 results.put("STATUS", hmacIsValid); 769 results.put("RESULT", String.format(resultFormatTemplate, parts[0].trim(), encodedHmacComputed)); 770 } 771 default -> throw new IllegalArgumentException("Not supported processing mode!"); 772 } 773 return results; 774 } 775 776 /** 777 * Apply a collection of validations on a JSON string provided: 778 * <ul> 779 * <li>Real JSON structure.</li> 780 * <li>Contain less than a specified number of deepness for nested objects or arrays.</li> 781 * <li>Contain less than a specified number of items in any arrays.</li> 782 * </ul> 783 * <br> 784 * <b>Note:</b> I decided to use a parsing approach using only string processing to prevent any StackOverFlow or OutOfMemory error that can be abused.<br><br> 785 * I used the following assumption: 786 * <ul> 787 * <li>The character <code>{</code> identify the beginning of an object.</li> 788 * <li>The character <code>}</code> identify the end of an object.</li> 789 * <li>The character <code>[</code> identify the beginning of an array.</li> 790 * <li>The character <code>]</code> identify the end of an array.</li> 791 * <li>The character <code>"</code> identify the delimiter of a string.</li> 792 * <li>The character sequence <code>\"</code> identify the escaping of an double quote.</li> 793 * </ul> 794 * 795 * @param json String containing the JSON data to validate. 796 * @param maxItemsByArraysCount Maximum number of items allowed in an array. 797 * @param maxDeepnessAllowed Maximum number nested objects or arrays allowed. 798 * @return True only if the string pass all validations. 799 * @see "https://javaee.github.io/jsonp/" 800 * @see "https://community.f5.com/discussions/technicalforum/disable-buffer-overflow-in-json-parameters/124306" 801 * @see "https://github.com/InductiveComputerScience/pbJson/issues/2" 802 */ 803 public static boolean isJSONSafe(String json, int maxItemsByArraysCount, int maxDeepnessAllowed) { 804 boolean isSafe = false; 805 806 try { 807 //Step 1: Analyse the JSON string 808 int currentDeepness = 0; 809 int currentArrayItemsCount = 0; 810 int maxDeepnessReached = 0; 811 int maxArrayItemsCountReached = 0; 812 boolean currentlyInArray = false; 813 boolean currentlyInString = false; 814 int currentNestedArrayLevel = 0; 815 String jsonEscapedDoubleQuote = "\\\"";//Escaped double quote must not be considered as a string delimiter 816 String work = json.replace(jsonEscapedDoubleQuote, "'"); 817 for (char c : work.toCharArray()) { 818 switch (c) { 819 case '{': { 820 if (!currentlyInString) { 821 currentDeepness++; 822 } 823 break; 824 } 825 case '}': { 826 if (!currentlyInString) { 827 currentDeepness--; 828 } 829 break; 830 } 831 case '[': { 832 if (!currentlyInString) { 833 currentDeepness++; 834 if (currentlyInArray) { 835 currentNestedArrayLevel++; 836 } 837 currentlyInArray = true; 838 } 839 break; 840 } 841 case ']': { 842 if (!currentlyInString) { 843 currentDeepness--; 844 currentArrayItemsCount = 0; 845 if (currentNestedArrayLevel > 0) { 846 currentNestedArrayLevel--; 847 } 848 if (currentNestedArrayLevel == 0) { 849 currentlyInArray = false; 850 } 851 } 852 break; 853 } 854 case '"': { 855 currentlyInString = !currentlyInString; 856 break; 857 } 858 case ',': { 859 if (!currentlyInString && currentlyInArray) { 860 currentArrayItemsCount++; 861 } 862 break; 863 } 864 } 865 if (currentDeepness > maxDeepnessReached) { 866 maxDeepnessReached = currentDeepness; 867 } 868 if (currentArrayItemsCount > maxArrayItemsCountReached) { 869 maxArrayItemsCountReached = currentArrayItemsCount; 870 } 871 } 872 //Step 2: Apply validation against the value specified as limits 873 isSafe = ((maxItemsByArraysCount > maxArrayItemsCountReached) && (maxDeepnessAllowed > maxDeepnessReached)); 874 875 //Step 3: If the content is safe then ensure that it is valid JSON structure using the "Java API for JSON Processing" (JSR 374) parser reference implementation. 876 if (isSafe) { 877 JsonReader reader = Json.createReader(new StringReader(json)); 878 isSafe = (reader.read() != null); 879 } 880 881 } catch (Exception e) { 882 isSafe = false; 883 } 884 return isSafe; 885 } 886 887 /** 888 * Apply a collection of validations on a image file provided: 889 * <ul> 890 * <li>Real image file.</li> 891 * <li>Its mime type is into the list of allowed mime types.</li> 892 * <li>Its metadata fields do not contains any characters related to a malicious payloads.</li> 893 * </ul> 894 * <br> 895 * <b>Important note:</b> This implementation is prone to bypass using the "<b>raw insertion</b>" method documented in the <a href="https://www.synacktiv.com/en/publications/persistent-php-payloads-in-pngs-how-to-inject-php-code-in-an-image-and-keep-it-there">blog post</a> from the Synacktiv team. 896 * To handle such case, it is recommended to resize the image to remove any non image-related content, see <a href="https://github.com/righettod/document-upload-protection/blob/master/src/main/java/eu/righettod/poc/sanitizer/ImageDocumentSanitizerImpl.java#L54">here</a> for an example.<br> 897 * 898 * @param imageFilePath Filename of the image file to check. 899 * @param imageAllowedMimeTypes List of image mime types allowed. 900 * @return True only if the file pass all validations. 901 * @see "https://commons.apache.org/proper/commons-imaging/" 902 * @see "https://commons.apache.org/proper/commons-imaging/formatsupport.html" 903 * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types/Common_types" 904 * @see "https://www.iana.org/assignments/media-types/media-types.xhtml#image" 905 * @see "https://www.synacktiv.com/en/publications/persistent-php-payloads-in-pngs-how-to-inject-php-code-in-an-image-and-keep-it-there" 906 * @see "https://cheatsheetseries.owasp.org/cheatsheets/File_Upload_Cheat_Sheet.html" 907 * @see "https://github.com/righettod/document-upload-protection/blob/master/src/main/java/eu/righettod/poc/sanitizer/ImageDocumentSanitizerImpl.java" 908 * @see "https://exiftool.org/examples.html" 909 * @see "https://en.wikipedia.org/wiki/List_of_file_signatures" 910 * @see "https://hexed.it/" 911 * @see "https://github.com/sighook/pixload" 912 */ 913 public static boolean isImageSafe(String imageFilePath, List<String> imageAllowedMimeTypes) { 914 boolean isSafe = false; 915 Pattern payloadDetectionRegex = Pattern.compile("[<>${}`]+", Pattern.CASE_INSENSITIVE); 916 try { 917 File imgFile = new File(imageFilePath); 918 if (imgFile.exists() && imgFile.canRead() && imgFile.isFile() && !imageAllowedMimeTypes.isEmpty()) { 919 final byte[] imgBytes = Files.readAllBytes(imgFile.toPath()); 920 //Step 1: Check the mime type of the file against the allowed ones 921 ImageInfo imgInfo = Imaging.getImageInfo(imgBytes); 922 if (imageAllowedMimeTypes.contains(imgInfo.getMimeType())) { 923 //Step 2: Load the image into an object using the Image API 924 BufferedImage imgObject = Imaging.getBufferedImage(imgBytes); 925 if (imgObject != null && imgObject.getWidth() > 0 && imgObject.getHeight() > 0) { 926 //Step 3: Check the metadata if the image format support it - Highly experimental 927 List<String> metadataWithPayloads = new ArrayList<>(); 928 final ImageMetadata imgMetadata = Imaging.getMetadata(imgBytes); 929 if (imgMetadata != null) { 930 imgMetadata.getItems().forEach(item -> { 931 String metadata = item.toString(); 932 if (payloadDetectionRegex.matcher(metadata).find()) { 933 metadataWithPayloads.add(metadata); 934 } 935 }); 936 } 937 isSafe = metadataWithPayloads.isEmpty(); 938 } 939 } 940 } 941 } catch (Exception e) { 942 isSafe = false; 943 } 944 return isSafe; 945 } 946 947 /** 948 * Rewrite the input file to remove any embedded files that is not embedded using a methods supported by the official format of the file.<br> 949 * Example: a file can be embedded by adding it to the end of the source file, see the reference provided for details. 950 * 951 * @param inputFilePath Filename of the file to clean up. 952 * @param inputFileType Type of the file provided. 953 * @return A array of bytes with the cleaned file. 954 * @throws IllegalArgumentException If an invalid parameter is passed 955 * @throws Exception If any technical error during the cleaning processing 956 * @see "https://www.synacktiv.com/en/publications/persistent-php-payloads-in-pngs-how-to-inject-php-code-in-an-image-and-keep-it-there" 957 * @see "https://github.com/righettod/toolbox-pentest-web/tree/master/misc" 958 * @see "https://github.com/righettod/toolbox-pentest-web?tab=readme-ov-file#misc" 959 * @see "https://stackoverflow.com/a/13605411" 960 */ 961 public static byte[] sanitizeFile(String inputFilePath, InputFileType inputFileType) throws Exception { 962 ByteArrayOutputStream sanitizedContent = new ByteArrayOutputStream(); 963 File inputFile = new File(inputFilePath); 964 if (!inputFile.exists() || !inputFile.canRead() || !inputFile.isFile()) { 965 throw new IllegalArgumentException("Cannot read the content of the input file!"); 966 } 967 switch (inputFileType) { 968 case PDF -> { 969 try (PDDocument document = Loader.loadPDF(inputFile)) { 970 document.save(sanitizedContent); 971 } 972 } 973 case IMAGE -> { 974 // Load the original image 975 BufferedImage originalImage = ImageIO.read(inputFile); 976 String originalFormat = identifyMimeType(Files.readAllBytes(inputFile.toPath())).split("/")[1].trim(); 977 // Check that image has been successfully loaded 978 if (originalImage == null) { 979 throw new IOException("Cannot load the original image !"); 980 } 981 // Get current Width and Height of the image 982 int originalWidth = originalImage.getWidth(null); 983 int originalHeight = originalImage.getHeight(null); 984 // Resize the image by removing 1px on Width and Height 985 Image resizedImage = originalImage.getScaledInstance(originalWidth - 1, originalHeight - 1, Image.SCALE_SMOOTH); 986 // Resize the resized image by adding 1px on Width and Height - In fact set image to is initial size 987 Image initialSizedImage = resizedImage.getScaledInstance(originalWidth, originalHeight, Image.SCALE_SMOOTH); 988 // Save image to a bytes buffer 989 int bufferedImageType = BufferedImage.TYPE_INT_ARGB;//By default use a format supporting transparency 990 if ("jpeg".equalsIgnoreCase(originalFormat) || "bmp".equalsIgnoreCase(originalFormat)) { 991 bufferedImageType = BufferedImage.TYPE_INT_RGB; 992 } 993 BufferedImage sanitizedImage = new BufferedImage(initialSizedImage.getWidth(null), initialSizedImage.getHeight(null), bufferedImageType); 994 Graphics2D drawer = sanitizedImage.createGraphics(); 995 drawer.drawImage(initialSizedImage, 0, 0, null); 996 drawer.dispose(); 997 ImageIO.write(sanitizedImage, originalFormat, sanitizedContent); 998 } 999 default -> throw new IllegalArgumentException("Type of file not supported !"); 1000 } 1001 if (sanitizedContent.size() == 0) { 1002 throw new IOException("An error occur during the rewrite operation!"); 1003 } 1004 return sanitizedContent.toByteArray(); 1005 } 1006 1007 /** 1008 * Apply a collection of validations on a string expected to be an email address: 1009 * <ul> 1010 * <li>Is a valid email address, from a parser perspective, following RFCs on email addresses.</li> 1011 * <li>Is not using "Encoded-word" format.</li> 1012 * <li>Is not using comment format.</li> 1013 * <li>Is not using "Punycode" format.</li> 1014 * <li>Is not using UUCP style addresses.</li> 1015 * <li>Is not using address literals.</li> 1016 * <li>Is not using source routes.</li> 1017 * <li>Is not using the "percent hack".</li> 1018 * </ul><br> 1019 * This is based on the research work from <a href="https://portswigger.net/research/gareth-heyes">Gareth Heyes</a> added in references (Portswigger).<br><br> 1020 * 1021 * <b>Note:</b> The notion of valid, here, is to take from a secure usage of the data perspective. 1022 * 1023 * @param addr String expected to be a valid email address. 1024 * @return True only if the string pass all validations. 1025 * @see "https://commons.apache.org/proper/commons-validator/" 1026 * @see "https://commons.apache.org/proper/commons-validator/apidocs/org/apache/commons/validator/routines/EmailValidator.html" 1027 * @see "https://datatracker.ietf.org/doc/html/rfc2047#section-2" 1028 * @see "https://portswigger.net/research/splitting-the-email-atom" 1029 * @see "https://www.jochentopf.com/email/address.html" 1030 * @see "https://en.wikipedia.org/wiki/Email_address" 1031 */ 1032 public static boolean isEmailAddress(String addr) { 1033 boolean isValid = false; 1034 String work = addr.toLowerCase(Locale.ROOT); 1035 Pattern encodedWordRegex = Pattern.compile("[=?]+", Pattern.CASE_INSENSITIVE); 1036 Pattern forbiddenCharacterRegex = Pattern.compile("[():!%\\[\\],;]+", Pattern.CASE_INSENSITIVE); 1037 try { 1038 //Start with the use of the dedicated EmailValidator from Apache Commons Validator 1039 if (EmailValidator.getInstance(true, true).isValid(work)) { 1040 //If OK then validate it does not contains "Encoded-word" patterns using an aggressive approach 1041 if (!encodedWordRegex.matcher(work).find()) { 1042 //If OK then validate it does not contains punycode 1043 if (!work.contains("xn--")) { 1044 //If OK then validate it does not use: 1045 // UUCP style addresses, 1046 // Comment format, 1047 // Address literals, 1048 // Source routes, 1049 // The percent hack. 1050 if (!forbiddenCharacterRegex.matcher(work).find()) { 1051 isValid = true; 1052 } 1053 } 1054 } 1055 } 1056 } catch (Exception e) { 1057 isValid = false; 1058 } 1059 return isValid; 1060 } 1061 1062 /** 1063 * The <a href="https://www.stet.eu/en/psd2/">PSD2 STET</a> specification require to use <a href="https://datatracker.ietf.org/doc/draft-cavage-http-signatures/">HTTP Signature</a>. 1064 * <br> 1065 * Section <b>3.5.1.2</b> of the document <a href="https://www.stet.eu/assets/files/PSD2/1-6-3/api-dsp2-stet-v1.6.3.1-part-1-framework.pdf">Documentation Framework</a> version <b>1.6.3</b>. 1066 * <br> 1067 * The problem is that, by design, the HTTP Signature specification is prone to blind SSRF. 1068 * <br> 1069 * URL example taken from the STET specification: <code>https://path.to/myQsealCertificate_714f8154ec259ac40b8a9786c9908488b2582b68b17e865fede4636d726b709f</code>. 1070 * <br> 1071 * The objective of this code is to try to decrease the "exploitability/interest" of this SSRF for an attacker. 1072 * 1073 * @param certificateUrl Url pointing to a Qualified Certificate (QSealC) encoded in PEM format and respecting the ETSI/TS119495 technical Specification . 1074 * @return TRUE only if the url point to a Qualified Certificate in PEM format. 1075 * @see "https://www.stet.eu/en/psd2/" 1076 * @see "https://www.stet.eu/assets/files/PSD2/1-6-3/api-dsp2-stet-v1.6.3.1-part-1-framework.pdf" 1077 * @see "https://datatracker.ietf.org/doc/draft-cavage-http-signatures/" 1078 * @see "https://datatracker.ietf.org/doc/rfc9421/" 1079 * @see "https://openjdk.org/groups/net/httpclient/intro.html" 1080 * @see "https://docs.oracle.com/en/java/javase/21/docs/api/java.net.http/java/net/http/package-summary.html" 1081 * @see "https://portswigger.net/web-security/ssrf" 1082 * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control" 1083 */ 1084 public static boolean isPSD2StetSafeCertificateURL(String certificateUrl) { 1085 boolean isValid = false; 1086 long connectionTimeoutInSeconds = 10; 1087 String userAgent = "PSD2-STET-HTTPSignature-CertificateRequest"; 1088 try { 1089 //1. Ensure that the URL end with the SHA-256 fingerprint encoded in HEX of the certificate like requested by STET 1090 if (certificateUrl != null && certificateUrl.lastIndexOf("_") != -1) { 1091 String digestPart = certificateUrl.substring(certificateUrl.lastIndexOf("_") + 1); 1092 if (Pattern.matches("^[0-9a-f]{64}$", digestPart)) { 1093 //2. Ensure that the URL is a valid url by creating a instance of the class URI 1094 URI uri = URI.create(certificateUrl); 1095 //3. Require usage of HTTPS and reject any url containing query parameters 1096 if ("https".equalsIgnoreCase(uri.getScheme()) && uri.getQuery() == null) { 1097 //4. Perform a HTTP HEAD request in order to get the content type of the remote resource 1098 //and limit the interest to use the SSRF because to pass the check the url need to: 1099 //- Do not having any query parameters. 1100 //- Use HTTPS protocol. 1101 //- End with a string having the format "_[0-9a-f]{64}". 1102 //- Trigger the malicious action that the attacker want but with a HTTP HEAD without any redirect and parameters. 1103 HttpResponse<String> response; 1104 try (HttpClient client = HttpClient.newBuilder().followRedirects(HttpClient.Redirect.NEVER).build()) { 1105 HttpRequest request = HttpRequest.newBuilder().uri(uri).timeout(Duration.ofSeconds(connectionTimeoutInSeconds)).method("HEAD", HttpRequest.BodyPublishers.noBody()).header("User-Agent", userAgent)//To provide an hint to the target about the initiator of the request 1106 .header("Cache-Control", "no-store, max-age=0")//To prevent caching issues or abuses 1107 .build(); 1108 response = client.send(request, HttpResponse.BodyHandlers.ofString()); 1109 if (response.statusCode() == 200) { 1110 //5. Ensure that the response content type is "text/plain" 1111 Optional<String> contentType = response.headers().firstValue("Content-Type"); 1112 isValid = (contentType.isPresent() && contentType.get().trim().toLowerCase(Locale.ENGLISH).startsWith("text/plain")); 1113 } 1114 } 1115 } 1116 } 1117 } 1118 } catch (Exception e) { 1119 isValid = false; 1120 } 1121 return isValid; 1122 } 1123 1124 /** 1125 * Perform sequential URL decoding operations against a URL encoded data until the data is not URL encoded anymore or if the specified threshold is reached. 1126 * 1127 * @param encodedData URL encoded data. 1128 * @param decodingRoundThreshold Threshold above which decoding will fail. 1129 * @return The decoded data. 1130 * @throws SecurityException If the threshold is reached. 1131 * @see "https://en.wikipedia.org/wiki/Percent-encoding" 1132 * @see "https://owasp.org/www-community/Double_Encoding" 1133 * @see "https://portswigger.net/web-security/essential-skills/obfuscating-attacks-using-encodings" 1134 * @see "https://capec.mitre.org/data/definitions/120.html" 1135 */ 1136 public static String applyURLDecoding(String encodedData, int decodingRoundThreshold) throws SecurityException { 1137 if (decodingRoundThreshold < 1) { 1138 throw new IllegalArgumentException("Threshold must be a positive number !"); 1139 } 1140 if (encodedData == null) { 1141 throw new IllegalArgumentException("Data provided must not be null !"); 1142 } 1143 Charset charset = StandardCharsets.UTF_8; 1144 int currentDecodingRound = 0; 1145 boolean isFinished = false; 1146 String currentRoundData = encodedData; 1147 String previousRoundData = encodedData; 1148 while (!isFinished) { 1149 if (currentDecodingRound > decodingRoundThreshold) { 1150 throw new SecurityException(String.format("Decoding round threshold of %s reached!", decodingRoundThreshold)); 1151 } 1152 currentRoundData = URLDecoder.decode(currentRoundData, charset); 1153 isFinished = currentRoundData.equals(previousRoundData); 1154 previousRoundData = currentRoundData; 1155 currentDecodingRound++; 1156 } 1157 return currentRoundData; 1158 } 1159 1160 /** 1161 * Apply a collection of validations on a string expected to be an system file/folder path: 1162 * <ul> 1163 * <li>Does not contains path traversal payload.</li> 1164 * <li>The canonical path is equals to the absolute path.</li> 1165 * </ul><br> 1166 * 1167 * @param path String expected to be a valid system file/folder path. 1168 * @return True only if the string pass all validations. 1169 * @see "https://portswigger.net/web-security/file-path-traversal" 1170 * @see "https://learn.snyk.io/lesson/directory-traversal/" 1171 * @see "https://capec.mitre.org/data/definitions/126.html" 1172 * @see "https://owasp.org/www-community/attacks/Path_Traversal" 1173 */ 1174 public static boolean isPathSafe(String path) { 1175 boolean isSafe = false; 1176 int decodingRoundThreshold = 3; 1177 try { 1178 if (path != null && !path.isEmpty()) { 1179 //URL decode the path if case of data coming from a web context 1180 String decodedPath = applyURLDecoding(path, decodingRoundThreshold); 1181 //Ensure that no path traversal expression is present 1182 if (!decodedPath.contains("..")) { 1183 File f = new File(decodedPath); 1184 String canonicalPath = f.getCanonicalPath(); 1185 String absolutePath = f.getAbsolutePath(); 1186 isSafe = canonicalPath.equals(absolutePath); 1187 } 1188 } 1189 } catch (Exception e) { 1190 isSafe = false; 1191 } 1192 return isSafe; 1193 } 1194 1195 /** 1196 * Identify if an XML contains any XML comments or have any XSL processing instructions.<br> 1197 * Stream reader based parsing is used to support large XML tree. 1198 * 1199 * @param xmlFilePath Filename of the XML file to check. 1200 * @return True only if XML comments or XSL processing instructions are identified. 1201 * @see "https://www.tutorialspoint.com/xml/xml_processing.htm" 1202 * @see "https://docs.oracle.com/en/java/javase/21/docs/api/java.xml/javax/xml/stream/XMLInputFactory.html" 1203 * @see "https://portswigger.net/kb/issues/00400700_xml-entity-expansion" 1204 * @see "https://www.w3.org/Style/styling-XML.en.html" 1205 */ 1206 public static boolean isXMLHaveCommentsOrXSLProcessingInstructions(String xmlFilePath) { 1207 boolean itemsDetected = false; 1208 try { 1209 //Ensure that the parser will not be prone XML external entity (XXE) injection or XML entity expansion (XEE) attacks 1210 XMLInputFactory xmlInputFactory = XMLInputFactory.newFactory(); 1211 xmlInputFactory.setProperty(XMLInputFactory.SUPPORT_DTD, false); 1212 xmlInputFactory.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, ""); 1213 xmlInputFactory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, false); 1214 xmlInputFactory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false); 1215 1216 //Parse file 1217 try (FileInputStream fis = new FileInputStream(xmlFilePath)) { 1218 XMLStreamReader reader = xmlInputFactory.createXMLStreamReader(fis); 1219 int eventType; 1220 while (reader.hasNext() && !itemsDetected) { 1221 eventType = reader.next(); 1222 if (eventType == XMLEvent.COMMENT) { 1223 itemsDetected = true; 1224 } else if (eventType == XMLEvent.PROCESSING_INSTRUCTION && "xml-stylesheet".equalsIgnoreCase(reader.getPITarget())) { 1225 itemsDetected = true; 1226 } 1227 } 1228 } 1229 } catch (Exception e) { 1230 //In case of error then assume that the check failed 1231 itemsDetected = true; 1232 } 1233 return itemsDetected; 1234 } 1235}