001package eu.righettod; 002 003 004import com.auth0.jwt.interfaces.DecodedJWT; 005import org.apache.batik.anim.dom.SAXSVGDocumentFactory; 006import org.apache.batik.util.XMLResourceDescriptor; 007import org.apache.commons.csv.CSVFormat; 008import org.apache.commons.csv.CSVRecord; 009import org.apache.commons.imaging.ImageInfo; 010import org.apache.commons.imaging.Imaging; 011import org.apache.commons.imaging.common.ImageMetadata; 012import org.apache.commons.validator.routines.CreditCardValidator; 013import org.apache.commons.validator.routines.EmailValidator; 014import org.apache.commons.validator.routines.InetAddressValidator; 015import org.apache.pdfbox.Loader; 016import org.apache.pdfbox.pdmodel.PDDocument; 017import org.apache.pdfbox.pdmodel.PDDocumentCatalog; 018import org.apache.pdfbox.pdmodel.PDDocumentInformation; 019import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary; 020import org.apache.pdfbox.pdmodel.common.PDMetadata; 021import org.apache.pdfbox.pdmodel.interactive.action.*; 022import org.apache.pdfbox.pdmodel.interactive.annotation.AnnotationFilter; 023import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; 024import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationLink; 025import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm; 026import org.apache.poi.poifs.filesystem.DirectoryEntry; 027import org.apache.poi.poifs.filesystem.POIFSFileSystem; 028import org.apache.poi.poifs.macros.VBAMacroReader; 029import org.apache.tika.detect.DefaultDetector; 030import org.apache.tika.detect.Detector; 031import org.apache.tika.io.TemporaryResources; 032import org.apache.tika.io.TikaInputStream; 033import org.apache.tika.metadata.Metadata; 034import org.apache.tika.mime.MediaType; 035import org.apache.tika.mime.MimeTypes; 036import org.iban4j.IbanUtil; 037import org.owasp.html.HtmlPolicyBuilder; 038import org.owasp.html.PolicyFactory; 039import org.w3c.dom.Document; 040import org.w3c.dom.svg.SVGDocument; 041import org.xml.sax.EntityResolver; 042import org.xml.sax.InputSource; 043import org.xml.sax.SAXException; 044 045import javax.crypto.Mac; 046import javax.crypto.spec.SecretKeySpec; 047import javax.imageio.ImageIO; 048import javax.json.Json; 049import javax.json.JsonReader; 050import javax.xml.XMLConstants; 051import javax.xml.parsers.DocumentBuilder; 052import javax.xml.parsers.DocumentBuilderFactory; 053import javax.xml.parsers.ParserConfigurationException; 054import javax.xml.stream.XMLInputFactory; 055import javax.xml.stream.XMLStreamReader; 056import javax.xml.stream.events.XMLEvent; 057import javax.xml.validation.Schema; 058import javax.xml.validation.SchemaFactory; 059import java.awt.*; 060import java.awt.image.BufferedImage; 061import java.io.*; 062import java.net.*; 063import java.net.http.HttpClient; 064import java.net.http.HttpRequest; 065import java.net.http.HttpResponse; 066import java.nio.ByteBuffer; 067import java.nio.charset.Charset; 068import java.nio.charset.StandardCharsets; 069import java.nio.file.Files; 070import java.nio.file.Paths; 071import java.security.MessageDigest; 072import java.security.SecureRandom; 073import java.time.Duration; 074import java.time.LocalDate; 075import java.time.YearMonth; 076import java.time.ZoneId; 077import java.util.*; 078import java.util.List; 079import java.util.concurrent.*; 080import java.util.concurrent.atomic.AtomicInteger; 081import java.util.regex.Matcher; 082import java.util.regex.Pattern; 083import java.util.zip.GZIPInputStream; 084import java.util.zip.ZipEntry; 085import java.util.zip.ZipFile; 086 087/** 088 * Provides different utilities methods to apply processing from a security perspective.<br> 089 * These code snippet: 090 * <ul> 091 * <li>Can be used, as "foundation", to customize the validation to the app context.</li> 092 * <li>Were implemented in a way to facilitate adding or removal of validations depending on usage context.</li> 093 * <li>Were centralized on one class to be able to enhance them across time as well as <a href="https://github.com/righettod/code-snippets-security-utils/issues">missing case/bug identification</a>.</li> 094 * </ul> 095 * <br> 096 * <a href="https://github.com/righettod/code-snippets-security-utils">GitHub repository</a>.<br><br> 097 * <a href="https://github.com/righettod/code-snippets-security-utils/blob/main/src/main/java/eu/righettod/SecurityUtils.java">Source code of the class</a>. 098 */ 099public class SecurityUtils { 100 /** 101 * Default constructor: Not needed as the class only provides static methods. 102 */ 103 private SecurityUtils() { 104 } 105 106 /** 107 * Apply a collection of validation to verify if a provided PIN code is considered weak (easy to guess) or none.<br> 108 * This method consider that format of the PIN code is [0-9]{6,}<br> 109 * Rule to consider a PIN code as weak: 110 * <ul> 111 * <li>Length is inferior to 6 positions.</li> 112 * <li>Contain only the same number or only a sequence of zero.</li> 113 * <li>Contain sequence of following incremental or decremental numbers.</li> 114 * </ul> 115 * 116 * @param pinCode PIN code to verify. 117 * @return True only if the PIN is considered as weak. 118 */ 119 public static boolean isWeakPINCode(String pinCode) { 120 boolean isWeak = true; 121 //Length is inferior to 6 positions 122 //Use "Long.parseLong(pinCode)" to cause a NumberFormatException if the PIN is not a numeric one 123 //and to ensure that the PIN is not only a sequence of zero 124 if (pinCode != null && Long.parseLong(pinCode) > 0 && pinCode.trim().length() > 5) { 125 //Contain only the same number 126 String regex = String.format("^[%s]{%s}$", pinCode.charAt(0), pinCode.length()); 127 if (!Pattern.matches(regex, pinCode)) { 128 //Contain sequence of following incremental or decremental numbers 129 char previousChar = 'X'; 130 boolean containSequence = false; 131 for (char c : pinCode.toCharArray()) { 132 if (previousChar != 'X') { 133 int previousNbr = Integer.parseInt(String.valueOf(previousChar)); 134 int currentNbr = Integer.parseInt(String.valueOf(c)); 135 if (currentNbr == (previousNbr - 1) || currentNbr == (previousNbr + 1)) { 136 containSequence = true; 137 break; 138 } 139 } 140 previousChar = c; 141 } 142 if (!containSequence) { 143 isWeak = false; 144 } 145 } 146 } 147 return isWeak; 148 } 149 150 /** 151 * Apply a collection of validations on a Word 97-2003 (binary format) document file provided: 152 * <ul> 153 * <li>Real Microsoft Word 97-2003 document file.</li> 154 * <li>No VBA Macro.<br></li> 155 * <li>No embedded objects.</li> 156 * </ul> 157 * 158 * @param wordFilePath Filename of the Word document file to check. 159 * @return True only if the file pass all validations. 160 * @see "https://poi.apache.org/components/" 161 * @see "https://poi.apache.org/components/document/" 162 * @see "https://poi.apache.org/components/poifs/how-to.html" 163 * @see "https://poi.apache.org/components/poifs/embeded.html" 164 * @see "https://poi.apache.org/" 165 * @see "https://mvnrepository.com/artifact/org.apache.poi/poi" 166 */ 167 public static boolean isWord972003DocumentSafe(String wordFilePath) { 168 boolean isSafe = false; 169 try { 170 File wordFile = new File(wordFilePath); 171 if (wordFile.exists() && wordFile.canRead() && wordFile.isFile()) { 172 //Step 1: Try to load the file, if its fail then it imply that is not a valid Word 97-2003 format file 173 try (POIFSFileSystem fs = new POIFSFileSystem(wordFile)) { 174 //Step 2: Check if the document contains VBA macros, in our case is not allowed 175 VBAMacroReader macroReader = new VBAMacroReader(fs); 176 Map<String, String> macros = macroReader.readMacros(); 177 if (macros == null || macros.isEmpty()) { 178 //Step 3: Check if the document contains any embedded objects, in our case is not allowed 179 //From POI documentation: 180 //Word normally stores embedded files in subdirectories of the ObjectPool directory, itself a subdirectory of the filesystem root. 181 //Typically, these subdirectories and named starting with an underscore, followed by 10 numbers. 182 final List<String> embeddedObjectFound = new ArrayList<>(); 183 DirectoryEntry root = fs.getRoot(); 184 if (root.getEntryCount() > 0) { 185 root.iterator().forEachRemaining(entry -> { 186 if ("ObjectPool".equalsIgnoreCase(entry.getName()) && entry instanceof DirectoryEntry) { 187 DirectoryEntry objPoolDirectory = (DirectoryEntry) entry; 188 if (objPoolDirectory.getEntryCount() > 0) { 189 objPoolDirectory.iterator().forEachRemaining(objPoolDirectoryEntry -> { 190 if (objPoolDirectoryEntry instanceof DirectoryEntry) { 191 DirectoryEntry objPoolDirectoryEntrySubDirectoryEntry = (DirectoryEntry) objPoolDirectoryEntry; 192 if (objPoolDirectoryEntrySubDirectoryEntry.getEntryCount() > 0) { 193 objPoolDirectoryEntrySubDirectoryEntry.forEach(objPoolDirectoryEntrySubDirectoryEntryEntry -> { 194 if (objPoolDirectoryEntrySubDirectoryEntryEntry.isDocumentEntry()) { 195 embeddedObjectFound.add(objPoolDirectoryEntrySubDirectoryEntryEntry.getName()); 196 } 197 }); 198 } 199 } 200 }); 201 } 202 } 203 }); 204 } 205 isSafe = embeddedObjectFound.isEmpty(); 206 } 207 } 208 } 209 } catch (Exception e) { 210 isSafe = false; 211 } 212 return isSafe; 213 } 214 215 /** 216 * Ensure that an XML file does not contain any External Entity, DTD or XInclude instructions. 217 * 218 * @param xmlFilePath Filename of the XML file to check. 219 * @return True only if the file pass all validations. 220 * @see "https://portswigger.net/web-security/xxe" 221 * @see "https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html#java" 222 * @see "https://docs.oracle.com/en/java/javase/13/security/java-api-xml-processing-jaxp-security-guide.html#GUID-82F8C206-F2DF-4204-9544-F96155B1D258" 223 * @see "https://www.w3.org/TR/xinclude-11/" 224 * @see "https://en.wikipedia.org/wiki/XInclude" 225 */ 226 public static boolean isXMLSafe(String xmlFilePath) { 227 boolean isSafe = false; 228 try { 229 File xmlFile = new File(xmlFilePath); 230 if (xmlFile.exists() && xmlFile.canRead() && xmlFile.isFile()) { 231 //Step 1a: Verify that the XML file content does not contain any XInclude instructions 232 boolean containXInclude = Files.readAllLines(xmlFile.toPath()).stream().anyMatch(line -> line.toLowerCase(Locale.ROOT).contains(":include ")); 233 if (!containXInclude) { 234 //Step 1b: Parse the XML file, if an exception occur than it's imply that the XML specified is not a valid ones 235 //Create an XML document builder throwing Exception if a DOCTYPE instruction is present 236 DocumentBuilderFactory dbfInstance = DocumentBuilderFactory.newInstance(); 237 dbfInstance.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); 238 //Xerces 2 only 239 //dbfInstance.setFeature("http://xerces.apache.org/xerces2-j/features.html#disallow-doctype-decl",true); 240 dbfInstance.setXIncludeAware(false); 241 DocumentBuilder builder = dbfInstance.newDocumentBuilder(); 242 //Parse the document 243 Document doc = builder.parse(xmlFile); 244 isSafe = (doc != null && doc.getDocumentElement() != null); 245 } 246 } 247 } catch (Exception e) { 248 isSafe = false; 249 } 250 return isSafe; 251 } 252 253 254 /** 255 * Extract all URL links from a PDF file provided.<br> 256 * This can be used to apply validation on a PDF against contained links. 257 * 258 * @param pdfFilePath pdfFilePath Filename of the PDF file to process. 259 * @return A List of URL objects that is empty if no links is found. 260 * @throws Exception If any error occurs during the processing of the PDF file. 261 * @see "https://www.gushiciku.cn/pl/21KQ" 262 * @see "https://pdfbox.apache.org/" 263 * @see "https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox" 264 */ 265 public static List<URL> extractAllPDFLinks(String pdfFilePath) throws Exception { 266 final List<URL> links = new ArrayList<>(); 267 File pdfFile = new File(pdfFilePath); 268 try (PDDocument document = Loader.loadPDF(pdfFile)) { 269 PDDocumentCatalog documentCatalog = document.getDocumentCatalog(); 270 AnnotationFilter actionURIAnnotationFilter = new AnnotationFilter() { 271 @Override 272 public boolean accept(PDAnnotation annotation) { 273 boolean keep = false; 274 if (annotation instanceof PDAnnotationLink) { 275 keep = (((PDAnnotationLink) annotation).getAction() instanceof PDActionURI); 276 } 277 return keep; 278 } 279 }; 280 documentCatalog.getPages().forEach(page -> { 281 try { 282 page.getAnnotations(actionURIAnnotationFilter).forEach(annotation -> { 283 PDActionURI linkAnnotation = (PDActionURI) ((PDAnnotationLink) annotation).getAction(); 284 try { 285 URL urlObj = new URL(linkAnnotation.getURI()); 286 if (!links.contains(urlObj)) { 287 links.add(urlObj); 288 } 289 } catch (MalformedURLException e) { 290 throw new RuntimeException(e); 291 } 292 }); 293 } catch (Exception e) { 294 throw new RuntimeException(e); 295 } 296 }); 297 } 298 return links; 299 } 300 301 /** 302 * Apply a collection of validations on a PDF file provided: 303 * <ul> 304 * <li>Real PDF file.</li> 305 * <li>No attachments.</li> 306 * <li>No Javascript code.</li> 307 * <li>No links using action of type URI/Launch/RemoteGoTo/ImportData.</li> 308 * <li>No XFA forms in order to prevent exposure to XXE/SSRF like CVE-2025-54988.</li> 309 * </ul> 310 * 311 * @param pdfFilePath Filename of the PDF file to check. 312 * @return True only if the file pass all validations. 313 * @see "https://stackoverflow.com/a/36161267" 314 * @see "https://www.gushiciku.cn/pl/21KQ" 315 * @see "https://github.com/jonaslejon/malicious-pdf" 316 * @see "https://pdfbox.apache.org/" 317 * @see "https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox" 318 * @see "https://nvd.nist.gov/vuln/detail/CVE-2025-54988" 319 * @see "https://github.com/mgthuramoemyint/POC-CVE-2025-54988" 320 * @see "https://en.wikipedia.org/wiki/XFA" 321 */ 322 public static boolean isPDFSafe(String pdfFilePath) { 323 boolean isSafe = false; 324 try { 325 File pdfFile = new File(pdfFilePath); 326 if (pdfFile.exists() && pdfFile.canRead() && pdfFile.isFile()) { 327 //Step 1: Try to load the file, if its fail then it imply that is not a valid PDF file 328 try (PDDocument document = Loader.loadPDF(pdfFile)) { 329 //Step 2: Check if the file contains attached files, in our case is not allowed 330 PDDocumentCatalog documentCatalog = document.getDocumentCatalog(); 331 PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary(documentCatalog); 332 if (namesDictionary.getEmbeddedFiles() == null) { 333 //Step 3: Check if the file contains any XFA forms 334 PDAcroForm acroForm = documentCatalog.getAcroForm(); 335 boolean hasForm = (acroForm != null && acroForm.getXFA() != null); 336 if (!hasForm) { 337 //Step 4: Check if the file contains Javascript code, in our case is not allowed 338 if (namesDictionary.getJavaScript() == null) { 339 //Step 5: Check if the file contains links using action of type URI/Launch/RemoteGoTo/ImportData, in our case is not allowed 340 final List<Integer> notAllowedAnnotationCounterList = new ArrayList<>(); 341 AnnotationFilter notAllowedAnnotationFilter = new AnnotationFilter() { 342 @Override 343 public boolean accept(PDAnnotation annotation) { 344 boolean keep = false; 345 if (annotation instanceof PDAnnotationLink) { 346 PDAnnotationLink link = (PDAnnotationLink) annotation; 347 PDAction action = link.getAction(); 348 if ((action instanceof PDActionURI) || (action instanceof PDActionLaunch) || (action instanceof PDActionRemoteGoTo) || (action instanceof PDActionImportData)) { 349 keep = true; 350 } 351 } 352 return keep; 353 } 354 }; 355 documentCatalog.getPages().forEach(page -> { 356 try { 357 notAllowedAnnotationCounterList.add(page.getAnnotations(notAllowedAnnotationFilter).size()); 358 } catch (IOException e) { 359 throw new RuntimeException(e); 360 } 361 }); 362 if (notAllowedAnnotationCounterList.stream().reduce(0, Integer::sum) == 0) { 363 isSafe = true; 364 } 365 } 366 } 367 } 368 } 369 } 370 } catch (Exception e) { 371 isSafe = false; 372 } 373 return isSafe; 374 } 375 376 /** 377 * Remove as much as possible metadata from the provided PDF document object. 378 * 379 * @param document PDFBox PDF document object on which metadata must be removed. 380 * @see "https://gist.github.com/righettod/d7e07443c43d393a39de741a0d920069" 381 * @see "https://pdfbox.apache.org/" 382 * @see "https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox" 383 */ 384 public static void clearPDFMetadata(PDDocument document) { 385 if (document != null) { 386 PDDocumentInformation infoEmpty = new PDDocumentInformation(); 387 document.setDocumentInformation(infoEmpty); 388 PDMetadata newMetadataEmpty = new PDMetadata(document); 389 document.getDocumentCatalog().setMetadata(newMetadataEmpty); 390 } 391 } 392 393 394 /** 395 * Validate that the URL provided is really a relative URL. 396 * 397 * @param targetUrl URL to validate. 398 * @return True only if the file pass all validations. 399 * @see "https://portswigger.net/web-security/ssrf" 400 * @see "https://stackoverflow.com/q/6785442" 401 */ 402 public static boolean isRelativeURL(String targetUrl) { 403 boolean isValid = false; 404 String work = targetUrl; 405 Pattern startingPrefix = Pattern.compile("^[/a-zA-Z0-9\\-_].*"); 406 //Reject any URL no starting with a slash, letter, number, dash, or underscore 407 if (startingPrefix.matcher(work).find()) { 408 //Reject any URL encoded content and URL starting with a double slash 409 if (!work.startsWith("//") && !work.contains("%")) { 410 //Try to create en URI object 411 try { 412 URI u = new URI(work); 413 //Scheme must be null 414 if (u.getScheme() == null) { 415 isValid = (!u.isAbsolute()); 416 } 417 } catch (URISyntaxException mf) { 418 isValid = false; 419 } 420 } 421 } 422 423 return isValid; 424 } 425 426 /** 427 * Apply a collection of validations on a ZIP file provided: 428 * <ul> 429 * <li>Real ZIP file.</li> 430 * <li>Contain less than a specified level of deepness.</li> 431 * <li>Do not contain Zip-Slip entry path.</li> 432 * </ul> 433 * 434 * @param zipFilePath Filename of the ZIP file to check. 435 * @param maxLevelDeepness Threshold of deepness above which a ZIP archive will be rejected. 436 * @param rejectArchiveFile Flag to specify if presence of any archive entry will cause the rejection of the ZIP file. 437 * @return True only if the file pass all validations. 438 * @see "https://rules.sonarsource.com/java/type/Security%20Hotspot/RSPEC-5042" 439 * @see "https://security.snyk.io/research/zip-slip-vulnerability" 440 * @see "https://en.wikipedia.org/wiki/Zip_bomb" 441 * @see "https://github.com/ptoomey3/evilarc" 442 * @see "https://github.com/abdulfatir/ZipBomb" 443 * @see "https://www.baeldung.com/cs/zip-bomb" 444 * @see "https://thesecurityvault.com/attacks-with-zip-files-and-mitigations/" 445 * @see "https://wiki.sei.cmu.edu/confluence/display/java/IDS04-J.+Safely+extract+files+from+ZipInputStream" 446 */ 447 public static boolean isZIPSafe(String zipFilePath, int maxLevelDeepness, boolean rejectArchiveFile) { 448 List<String> archiveExtensions = Arrays.asList("zip", "tar", "7z", "gz", "jar", "phar", "bz2", "tgz"); 449 boolean isSafe = false; 450 try { 451 File zipFile = new File(zipFilePath); 452 if (zipFile.exists() && zipFile.canRead() && zipFile.isFile() && maxLevelDeepness > 0) { 453 //Step 1: Try to load the file, if its fail then it imply that is not a valid ZIP file 454 try (ZipFile zipArch = new ZipFile(zipFile)) { 455 //Step 2: Parse entries 456 long deepness = 0; 457 ZipEntry zipEntry; 458 String entryExtension; 459 String zipEntryName; 460 boolean validationsFailed = false; 461 Enumeration<? extends ZipEntry> entries = zipArch.entries(); 462 while (entries.hasMoreElements()) { 463 zipEntry = entries.nextElement(); 464 zipEntryName = zipEntry.getName(); 465 entryExtension = zipEntryName.substring(zipEntryName.lastIndexOf(".") + 1).toLowerCase(Locale.ROOT).trim(); 466 //Step 2a: Check if the current entry is an archive file 467 if (rejectArchiveFile && archiveExtensions.contains(entryExtension)) { 468 validationsFailed = true; 469 break; 470 } 471 //Step 2b: Check that level of deepness is inferior to the threshold specified 472 if (zipEntryName.contains("/")) { 473 //Determine deepness by inspecting the entry name. 474 //Indeed, folder will be represented like this: folder/folder/folder/ 475 //So we can count the number of "/" to identify the deepness of the entry 476 deepness = zipEntryName.chars().filter(ch -> ch == '/').count(); 477 if (deepness > maxLevelDeepness) { 478 validationsFailed = true; 479 break; 480 } 481 } 482 //Step 2c: Check if any entries match pattern of zip slip payload 483 if (zipEntryName.contains("..\\") || zipEntryName.contains("../")) { 484 validationsFailed = true; 485 break; 486 } 487 } 488 if (!validationsFailed) { 489 isSafe = true; 490 } 491 } 492 } 493 } catch (Exception e) { 494 isSafe = false; 495 } 496 return isSafe; 497 } 498 499 /** 500 * Identify the mime type of the content specified (array of bytes).<br> 501 * Note that it cannot be fully trusted (see the tweet '1595824709186519041' referenced), so, additional validations are required. 502 * 503 * @param content The content as an array of bytes. 504 * @return The mime type in lower case or null if it cannot be identified. 505 * @see "https://twitter.com/righettod/status/1595824709186519041" 506 * @see "https://tika.apache.org/" 507 * @see "https://mvnrepository.com/artifact/org.apache.tika/tika-core" 508 * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types" 509 * @see "https://www.iana.org/assignments/media-types/media-types.xhtml" 510 */ 511 public static String identifyMimeType(byte[] content) { 512 String mimeType = null; 513 if (content != null && content.length > 0) { 514 Detector detector = new DefaultDetector(MimeTypes.getDefaultMimeTypes()); 515 Metadata metadata = new Metadata(); 516 try { 517 try (TemporaryResources temporaryResources = new TemporaryResources(); TikaInputStream tikaInputStream = TikaInputStream.get(new ByteArrayInputStream(content), temporaryResources, metadata)) { 518 MediaType mt = detector.detect(tikaInputStream, metadata); 519 if (mt != null) { 520 mimeType = mt.toString().toLowerCase(Locale.ROOT); 521 } 522 } 523 } catch (IOException ioe) { 524 mimeType = null; 525 } 526 } 527 return mimeType; 528 } 529 530 /** 531 * Apply a collection of validations on a string expected to be an public IP address: 532 * <ul> 533 * <li>Is a valid IP v4 or v6 address.</li> 534 * <li>Is public from an Internet perspective.</li> 535 * </ul> 536 * <br> 537 * <b>Note:</b> I often see missing such validation in the value read from HTTP request headers like "X-Forwarded-For" or "Forwarded". 538 * <br><br> 539 * <b>Note for IPv6:</b> I used documentation found so it is really experimental! 540 * 541 * @param ip String expected to be a valid IP address. 542 * @return True only if the string pass all validations. 543 * @see "https://commons.apache.org/proper/commons-validator/" 544 * @see "https://commons.apache.org/proper/commons-validator/apidocs/org/apache/commons/validator/routines/InetAddressValidator.html" 545 * @see "https://cheatsheetseries.owasp.org/cheatsheets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet.html" 546 * @see "https://cheatsheetseries.owasp.org/assets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet_Orange_Tsai_Talk.pdf" 547 * @see "https://cheatsheetseries.owasp.org/assets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet_SSRF_Bible.pdf" 548 * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For" 549 * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Forwarded" 550 * @see "https://ipcisco.com/lesson/ipv6-address/" 551 * @see "https://www.juniper.net/documentation/us/en/software/junos/interfaces-security-devices/topics/topic-map/security-interface-ipv4-ipv6-protocol.html" 552 * @see "https://docs.oracle.com/en/java/javase/21/docs/api/java.base/java/net/InetAddress.html#getByName(java.lang.String)" 553 * @see "https://www.arin.net/reference/research/statistics/address_filters/" 554 * @see "https://en.wikipedia.org/wiki/Multicast_address" 555 * @see "https://stackoverflow.com/a/5619409" 556 * @see "https://www.ripe.net/media/documents/ipv6-address-types.pdf" 557 * @see "https://www.iana.org/assignments/ipv6-unicast-address-assignments/ipv6-unicast-address-assignments.xhtml" 558 * @see "https://developer.android.com/reference/java/net/Inet6Address" 559 * @see "https://en.wikipedia.org/wiki/Unique_local_address" 560 */ 561 public static boolean isPublicIPAddress(String ip) { 562 boolean isValid = false; 563 try { 564 //Quick validation on the string itself based on characters used to compose an IP v4/v6 address 565 if (Pattern.matches("[0-9a-fA-F:.]+", ip)) { 566 //If OK then use the dedicated InetAddressValidator from Apache Commons Validator 567 if (InetAddressValidator.getInstance().isValid(ip)) { 568 //If OK then validate that is an public IP address 569 //From Javadoc for "InetAddress.getByName": If a literal IP address is supplied, only the validity of the address format is checked. 570 InetAddress addr = InetAddress.getByName(ip); 571 isValid = (!addr.isAnyLocalAddress() && !addr.isLinkLocalAddress() && !addr.isLoopbackAddress() && !addr.isMulticastAddress() && !addr.isSiteLocalAddress()); 572 //If OK and the IP is an V6 one then make additional validation because the built-in Java API will let pass some V6 IP 573 //For the prefix map, the start of the key indicates if the value is a regex or a string 574 if (isValid && (addr instanceof Inet6Address)) { 575 Map<String, String> prefixes = new HashMap<>(); 576 prefixes.put("REGEX_LOOPBACK", "^(0|:)+1$"); 577 prefixes.put("REGEX_UNIQUE-LOCAL-ADDRESSES", "^f(c|d)[a-f0-9]{2}:.*$"); 578 prefixes.put("STRING_LINK-LOCAL-ADDRESSES", "fe80:"); 579 prefixes.put("REGEX_TEREDO", "^2001:[0]*:.*$"); 580 prefixes.put("REGEX_BENCHMARKING", "^2001:[0]*2:.*$"); 581 prefixes.put("REGEX_ORCHID", "^2001:[0]*10:.*$"); 582 prefixes.put("STRING_DOCUMENTATION", "2001:db8:"); 583 prefixes.put("STRING_GLOBAL-UNICAST", "2000:"); 584 prefixes.put("REGEX_MULTICAST", "^ff[0-9]{2}:.*$"); 585 final List<Boolean> results = new ArrayList<>(); 586 final String ipLower = ip.trim().toLowerCase(Locale.ROOT); 587 prefixes.forEach((addressType, expr) -> { 588 String exprLower = expr.trim().toLowerCase(); 589 if (addressType.startsWith("STRING_")) { 590 results.add(ipLower.startsWith(exprLower)); 591 } else { 592 results.add(Pattern.matches(exprLower, ipLower)); 593 } 594 }); 595 isValid = ((results.size() == prefixes.size()) && !results.contains(Boolean.TRUE)); 596 } 597 } 598 } 599 } catch (Exception e) { 600 isValid = false; 601 } 602 return isValid; 603 } 604 605 /** 606 * Compute a SHA256 hash from an input composed of a collection of strings.<br><br> 607 * This method take care to build the source string in a way to prevent this source string to be prone to abuse targeting the different parts composing it.<br><br> 608 * <p> 609 * Example of possible abuse without precautions applied during the hash calculation logic:<br> 610 * Hash of <code>SHA256("Hello", "My", "World!!!")</code> will be equals to the hash of <code>SHA256("Hell", "oMyW", "orld!!!")</code>.<br> 611 * </p> 612 * This method ensure that both hash above will be different.<br><br> 613 * 614 * <b>Note:</b> The character <code>|</code> is used, as separator, of every parts so a part is not allowed to contains this character. 615 * 616 * @param parts Ordered list of strings to use to build the input string for which the hash must be computed on. No null value is accepted on object composing the collection. 617 * @return The hash, as an array of bytes, to allow caller to convert it to the final representation wanted (HEX, Base64, etc.). If the collection passed is null or empty then the method return null. 618 * @throws Exception If any exception occurs 619 * @see "https://github.com/righettod/code-snippets-security-utils/issues/16" 620 * @see "https://pentesterlab.com/badges/codereview" 621 * @see "https://blog.trailofbits.com/2024/08/21/yolo-is-not-a-valid-hash-construction/" 622 * @see "https://www.nist.gov/publications/sha-3-derived-functions-cshake-kmac-tuplehash-and-parallelhash" 623 */ 624 public static byte[] computeHashNoProneToAbuseOnParts(List<String> parts) throws Exception { 625 byte[] hash = null; 626 String separator = "|"; 627 if (parts != null && !parts.isEmpty()) { 628 //Ensure that not part is null 629 if (parts.stream().anyMatch(Objects::isNull)) { 630 throw new IllegalArgumentException("No part must be null!"); 631 } 632 //Ensure that the separator is absent from every part 633 if (parts.stream().anyMatch(part -> part.contains(separator))) { 634 throw new IllegalArgumentException(String.format("The character '%s', used as parts separator, must be absent from every parts!", separator)); 635 } 636 MessageDigest digest = MessageDigest.getInstance("SHA-256"); 637 final StringBuilder buffer = new StringBuilder(separator); 638 parts.forEach(p -> { 639 buffer.append(p).append(separator); 640 }); 641 hash = digest.digest(buffer.toString().getBytes(StandardCharsets.UTF_8)); 642 } 643 return hash; 644 } 645 646 /** 647 * Ensure that an XML file only uses DTD/XSD references (called System Identifier) present in the allowed list provided.<br><br> 648 * The code is based on the validation implemented into the OpenJDK 21, by the class <b><a href="https://github.com/openjdk/jdk/blob/jdk-21%2B35/src/java.prefs/share/classes/java/util/prefs/XmlSupport.java">java.util.prefs.XmlSupport</a></b>, in the method <b><a href="https://github.com/openjdk/jdk/blob/jdk-21%2B35/src/java.prefs/share/classes/java/util/prefs/XmlSupport.java#L240">loadPrefsDoc()</a></b>.<br><br> 649 * The method also ensure that no Public Identifier is used to prevent potential bypasses of the validations. 650 * 651 * @param xmlFilePath Filename of the XML file to check. 652 * @param allowedSystemIdentifiers List of URL allowed for System Identifier specified for any XSD/DTD references. 653 * @return True only if the file pass all validations. 654 * @see "https://www.w3schools.com/xml/prop_documenttype_systemid.asp" 655 * @see "https://www.ibm.com/docs/en/integration-bus/9.0.0?topic=doctypedecl-xml-systemid" 656 * @see "https://www.liquid-technologies.com/Reference/Glossary/XML_DocType.html" 657 * @see "https://www.xml.com/pub/98/08/xmlqna0.html" 658 * @see "https://github.com/openjdk/jdk/blob/jdk-21%2B35/src/java.prefs/share/classes/java/util/prefs/XmlSupport.java#L397" 659 * @see "https://en.wikipedia.org/wiki/Formal_Public_Identifier" 660 */ 661 public static boolean isXMLOnlyUseAllowedXSDorDTD(String xmlFilePath, final List<String> allowedSystemIdentifiers) { 662 boolean isSafe = false; 663 final String errorTemplate = "Non allowed %s ID detected!"; 664 final String emptyFakeDTD = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><!ELEMENT dummy EMPTY>"; 665 final String emptyFakeXSD = "<xs:schema xmlns:xs=\"http://www.w3.org/2001/XMLSchema\"> <xs:element name=\"dummy\"/></xs:schema>"; 666 667 if (allowedSystemIdentifiers == null || allowedSystemIdentifiers.isEmpty()) { 668 throw new IllegalArgumentException("At least one SID must be specified!"); 669 } 670 File xmlFile = new File(xmlFilePath); 671 if (xmlFile.exists() && xmlFile.canRead() && xmlFile.isFile()) { 672 try { 673 EntityResolver resolverValidator = (publicId, systemId) -> { 674 if (publicId != null) { 675 throw new SAXException(String.format(errorTemplate, "PUBLIC")); 676 } 677 if (!allowedSystemIdentifiers.contains(systemId)) { 678 throw new SAXException(String.format(errorTemplate, "SYSTEM")); 679 } 680 //If it is OK then return a empty DTD/XSD 681 return new InputSource(new StringReader(systemId.toLowerCase().endsWith(".dtd") ? emptyFakeDTD : emptyFakeXSD)); 682 }; 683 DocumentBuilderFactory dbfInstance = DocumentBuilderFactory.newInstance(); 684 dbfInstance.setIgnoringElementContentWhitespace(true); 685 dbfInstance.setXIncludeAware(false); 686 dbfInstance.setValidating(false); 687 dbfInstance.setCoalescing(true); 688 dbfInstance.setIgnoringComments(false); 689 DocumentBuilder builder = dbfInstance.newDocumentBuilder(); 690 builder.setEntityResolver(resolverValidator); 691 Document doc = builder.parse(xmlFile); 692 isSafe = (doc != null); 693 } catch (SAXException | IOException | ParserConfigurationException e) { 694 isSafe = false; 695 } 696 } 697 698 return isSafe; 699 } 700 701 /** 702 * Apply a collection of validations on a EXCEL CSV file provided (file was expected to be opened in Microsoft EXCEL): 703 * <ul> 704 * <li>Real CSV file.</li> 705 * <li>Do not contains any payload related to a CSV injections.</li> 706 * </ul> 707 * Ensure that, if Apache Commons CSV does not find any record then, the file will be considered as NOT safe (prevent potential bypasses).<br><br> 708 * <b>Note:</b> Record delimiter used is the <code>,</code> (comma) character. See the Apache Commons CSV reference provided for EXCEL.<br> 709 * 710 * @param csvFilePath Filename of the CSV file to check. 711 * @return True only if the file pass all validations. 712 * @see "https://commons.apache.org/proper/commons-csv/" 713 * @see "https://commons.apache.org/proper/commons-csv/apidocs/org/apache/commons/csv/CSVFormat.html#EXCEL" 714 * @see "https://www.we45.com/post/your-excel-sheets-are-not-safe-heres-how-to-beat-csv-injection" 715 * @see "https://www.whiteoaksecurity.com/blog/2020-4-23-csv-injection-whats-the-risk/" 716 * @see "https://book.hacktricks.xyz/pentesting-web/formula-csv-doc-latex-ghostscript-injection" 717 * @see "https://owasp.org/www-community/attacks/CSV_Injection" 718 * @see "https://payatu.com/blog/csv-injection-basic-to-exploit/" 719 * @see "https://cwe.mitre.org/data/definitions/1236.html" 720 */ 721 public static boolean isExcelCSVSafe(String csvFilePath) { 722 boolean isSafe; 723 final AtomicInteger recordCount = new AtomicInteger(); 724 final List<Character> payloadDetectionCharacters = List.of('=', '+', '@', '-', '\r', '\t'); 725 726 try { 727 final List<String> payloadsIdentified = new ArrayList<>(); 728 try (Reader in = new FileReader(csvFilePath)) { 729 Iterable<CSVRecord> records = CSVFormat.EXCEL.parse(in); 730 records.forEach(record -> { 731 record.forEach(recordValue -> { 732 if (recordValue != null && !recordValue.trim().isEmpty() && payloadDetectionCharacters.contains(recordValue.trim().charAt(0))) { 733 payloadsIdentified.add(recordValue); 734 } 735 recordCount.getAndIncrement(); 736 }); 737 }); 738 } 739 isSafe = (payloadsIdentified.isEmpty() && recordCount.get() > 0); 740 } catch (Exception e) { 741 isSafe = false; 742 } 743 744 return isSafe; 745 } 746 747 /** 748 * Provide a way to add an integrity marker (<a href="https://en.wikipedia.org/wiki/HMAC">HMAC</a>) to a serialized object serialized using the <a href="https://www.baeldung.com/java-serialization">java native system</a> (binary).<br> 749 * The goal is to provide <b>a temporary workaround</b> to try to prevent deserialization attacks and give time to move to a text-based serialization approach. 750 * 751 * @param processingModeType Define the mode of processing i.e. protect or validate. ({@link ProcessingModeType}) 752 * @param input When the processing mode is "protect" than the expected input (string) is a java serialized object encoded in Base64 otherwise (processing mode is "validate") expected input is the output of this method when the "protect" mode was used. 753 * @param secret Secret to use to compute the SHA256 HMAC. 754 * @return A map with the following keys: <ul><li><b>PROCESSING_MODE</b>: Processing mode used to compute the result.</li><li><b>STATUS</b>: A boolean indicating if the processing was successful or not.</li><li><b>RESULT</b>: Always contains a string representing the protected serialized object in the format <code>[SERIALIZED_OBJECT_BASE64_ENCODED]:[SERIALIZED_OBJECT_HMAC_BASE64_ENCODED]</code>.</li></ul> 755 * @throws Exception If any exception occurs. 756 * @see "https://cheatsheetseries.owasp.org/cheatsheets/Deserialization_Cheat_Sheet.html" 757 * @see "https://owasp.org/www-project-top-ten/2017/A8_2017-Insecure_Deserialization" 758 * @see "https://portswigger.net/web-security/deserialization" 759 * @see "https://www.baeldung.com/java-serialization-approaches" 760 * @see "https://www.baeldung.com/java-serialization" 761 * @see "https://cryptobook.nakov.com/mac-and-key-derivation/hmac-and-key-derivation" 762 * @see "https://en.wikipedia.org/wiki/HMAC" 763 * @see "https://smattme.com/posts/how-to-generate-hmac-signature-in-java/" 764 */ 765 public static Map<String, Object> ensureSerializedObjectIntegrity(ProcessingModeType processingModeType, String input, byte[] secret) throws Exception { 766 Map<String, Object> results; 767 String resultFormatTemplate = "%s:%s"; 768 //Verify input provided to be consistent 769 if (processingModeType == null) { 770 throw new IllegalArgumentException("The processing mode is mandatory!"); 771 } 772 if (input == null || input.trim().isEmpty()) { 773 throw new IllegalArgumentException("Input data is mandatory!"); 774 } 775 if (secret == null || secret.length == 0) { 776 throw new IllegalArgumentException("The HMAC secret is mandatory!"); 777 } 778 if (processingModeType.equals(ProcessingModeType.VALIDATE) && input.split(":").length != 2) { 779 throw new IllegalArgumentException("Input data provided is invalid for the processing mode specified!"); 780 } 781 //Processing 782 Base64.Decoder b64Decoder = Base64.getDecoder(); 783 Base64.Encoder b64Encoder = Base64.getEncoder(); 784 String hmacAlgorithm = "HmacSHA256"; 785 Mac mac = Mac.getInstance(hmacAlgorithm); 786 SecretKeySpec key = new SecretKeySpec(secret, hmacAlgorithm); 787 mac.init(key); 788 results = new HashMap<>(); 789 results.put("PROCESSING_MODE", processingModeType.toString()); 790 switch (processingModeType) { 791 case PROTECT -> { 792 byte[] objectBytes = b64Decoder.decode(input); 793 byte[] hmac = mac.doFinal(objectBytes); 794 String encodedHmac = b64Encoder.encodeToString(hmac); 795 results.put("STATUS", Boolean.TRUE); 796 results.put("RESULT", String.format(resultFormatTemplate, input, encodedHmac)); 797 } 798 case VALIDATE -> { 799 String[] parts = input.split(":"); 800 byte[] objectBytes = b64Decoder.decode(parts[0].trim()); 801 byte[] hmacProvided = b64Decoder.decode(parts[1].trim()); 802 byte[] hmacComputed = mac.doFinal(objectBytes); 803 String encodedHmacComputed = b64Encoder.encodeToString(hmacComputed); 804 Boolean hmacIsValid = Arrays.equals(hmacProvided, hmacComputed); 805 results.put("STATUS", hmacIsValid); 806 results.put("RESULT", String.format(resultFormatTemplate, parts[0].trim(), encodedHmacComputed)); 807 } 808 default -> throw new IllegalArgumentException("Not supported processing mode!"); 809 } 810 return results; 811 } 812 813 /** 814 * Apply a collection of validations on a JSON string provided: 815 * <ul> 816 * <li>Real JSON structure.</li> 817 * <li>Contain less than a specified number of deepness for nested objects or arrays.</li> 818 * <li>Contain less than a specified number of items in any arrays.</li> 819 * </ul> 820 * <br> 821 * <b>Note:</b> I decided to use a parsing approach using only string processing to prevent any StackOverFlow or OutOfMemory error that can be abused.<br><br> 822 * I used the following assumption: 823 * <ul> 824 * <li>The character <code>{</code> identify the beginning of an object.</li> 825 * <li>The character <code>}</code> identify the end of an object.</li> 826 * <li>The character <code>[</code> identify the beginning of an array.</li> 827 * <li>The character <code>]</code> identify the end of an array.</li> 828 * <li>The character <code>"</code> identify the delimiter of a string.</li> 829 * <li>The character sequence <code>\"</code> identify the escaping of an double quote.</li> 830 * </ul> 831 * 832 * @param json String containing the JSON data to validate. 833 * @param maxItemsByArraysCount Maximum number of items allowed in an array. 834 * @param maxDeepnessAllowed Maximum number nested objects or arrays allowed. 835 * @return True only if the string pass all validations. 836 * @see "https://javaee.github.io/jsonp/" 837 * @see "https://community.f5.com/discussions/technicalforum/disable-buffer-overflow-in-json-parameters/124306" 838 * @see "https://github.com/InductiveComputerScience/pbJson/issues/2" 839 */ 840 public static boolean isJSONSafe(String json, int maxItemsByArraysCount, int maxDeepnessAllowed) { 841 boolean isSafe = false; 842 843 try { 844 //Step 1: Analyse the JSON string 845 int currentDeepness = 0; 846 int currentArrayItemsCount = 0; 847 int maxDeepnessReached = 0; 848 int maxArrayItemsCountReached = 0; 849 boolean currentlyInArray = false; 850 boolean currentlyInString = false; 851 int currentNestedArrayLevel = 0; 852 String jsonEscapedDoubleQuote = "\\\"";//Escaped double quote must not be considered as a string delimiter 853 String work = json.replace(jsonEscapedDoubleQuote, "'"); 854 for (char c : work.toCharArray()) { 855 switch (c) { 856 case '{': { 857 if (!currentlyInString) { 858 currentDeepness++; 859 } 860 break; 861 } 862 case '}': { 863 if (!currentlyInString) { 864 currentDeepness--; 865 } 866 break; 867 } 868 case '[': { 869 if (!currentlyInString) { 870 currentDeepness++; 871 if (currentlyInArray) { 872 currentNestedArrayLevel++; 873 } 874 currentlyInArray = true; 875 } 876 break; 877 } 878 case ']': { 879 if (!currentlyInString) { 880 currentDeepness--; 881 currentArrayItemsCount = 0; 882 if (currentNestedArrayLevel > 0) { 883 currentNestedArrayLevel--; 884 } 885 if (currentNestedArrayLevel == 0) { 886 currentlyInArray = false; 887 } 888 } 889 break; 890 } 891 case '"': { 892 currentlyInString = !currentlyInString; 893 break; 894 } 895 case ',': { 896 if (!currentlyInString && currentlyInArray) { 897 currentArrayItemsCount++; 898 } 899 break; 900 } 901 } 902 if (currentDeepness > maxDeepnessReached) { 903 maxDeepnessReached = currentDeepness; 904 } 905 if (currentArrayItemsCount > maxArrayItemsCountReached) { 906 maxArrayItemsCountReached = currentArrayItemsCount; 907 } 908 } 909 //Step 2: Apply validation against the value specified as limits 910 isSafe = ((maxItemsByArraysCount > maxArrayItemsCountReached) && (maxDeepnessAllowed > maxDeepnessReached)); 911 912 //Step 3: If the content is safe then ensure that it is valid JSON structure using the "Java API for JSON Processing" (JSR 374) parser reference implementation. 913 if (isSafe) { 914 JsonReader reader = Json.createReader(new StringReader(json)); 915 isSafe = (reader.read() != null); 916 } 917 918 } catch (Exception e) { 919 isSafe = false; 920 } 921 return isSafe; 922 } 923 924 /** 925 * Apply a collection of validations on a image file provided: 926 * <ul> 927 * <li>Real image file.</li> 928 * <li>Its mime type is into the list of allowed mime types.</li> 929 * <li>Its metadata fields do not contains any characters related to a malicious payloads.</li> 930 * </ul> 931 * <br> 932 * <b>Important note:</b> This implementation is prone to bypass using the "<b>raw insertion</b>" method documented in the <a href="https://www.synacktiv.com/en/publications/persistent-php-payloads-in-pngs-how-to-inject-php-code-in-an-image-and-keep-it-there">blog post</a> from the Synacktiv team. 933 * To handle such case, it is recommended to resize the image to remove any non image-related content, see <a href="https://github.com/righettod/document-upload-protection/blob/master/src/main/java/eu/righettod/poc/sanitizer/ImageDocumentSanitizerImpl.java#L54">here</a> for an example.<br> 934 * 935 * @param imageFilePath Filename of the image file to check. 936 * @param imageAllowedMimeTypes List of image mime types allowed. 937 * @return True only if the file pass all validations. 938 * @see "https://commons.apache.org/proper/commons-imaging/" 939 * @see "https://commons.apache.org/proper/commons-imaging/formatsupport.html" 940 * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types/Common_types" 941 * @see "https://www.iana.org/assignments/media-types/media-types.xhtml#image" 942 * @see "https://www.synacktiv.com/en/publications/persistent-php-payloads-in-pngs-how-to-inject-php-code-in-an-image-and-keep-it-there" 943 * @see "https://cheatsheetseries.owasp.org/cheatsheets/File_Upload_Cheat_Sheet.html" 944 * @see "https://github.com/righettod/document-upload-protection/blob/master/src/main/java/eu/righettod/poc/sanitizer/ImageDocumentSanitizerImpl.java" 945 * @see "https://exiftool.org/examples.html" 946 * @see "https://en.wikipedia.org/wiki/List_of_file_signatures" 947 * @see "https://hexed.it/" 948 * @see "https://github.com/sighook/pixload" 949 */ 950 public static boolean isImageSafe(String imageFilePath, List<String> imageAllowedMimeTypes) { 951 boolean isSafe = false; 952 Pattern payloadDetectionRegex = Pattern.compile("[<>${}`]+", Pattern.CASE_INSENSITIVE); 953 try { 954 File imgFile = new File(imageFilePath); 955 if (imgFile.exists() && imgFile.canRead() && imgFile.isFile() && !imageAllowedMimeTypes.isEmpty()) { 956 final byte[] imgBytes = Files.readAllBytes(imgFile.toPath()); 957 //Step 1: Check the mime type of the file against the allowed ones 958 ImageInfo imgInfo = Imaging.getImageInfo(imgBytes); 959 if (imageAllowedMimeTypes.contains(imgInfo.getMimeType())) { 960 //Step 2: Load the image into an object using the Image API 961 BufferedImage imgObject = Imaging.getBufferedImage(imgBytes); 962 if (imgObject != null && imgObject.getWidth() > 0 && imgObject.getHeight() > 0) { 963 //Step 3: Check the metadata if the image format support it - Highly experimental 964 List<String> metadataWithPayloads = new ArrayList<>(); 965 final ImageMetadata imgMetadata = Imaging.getMetadata(imgBytes); 966 if (imgMetadata != null) { 967 imgMetadata.getItems().forEach(item -> { 968 String metadata = item.toString(); 969 if (payloadDetectionRegex.matcher(metadata).find()) { 970 metadataWithPayloads.add(metadata); 971 } 972 }); 973 } 974 isSafe = metadataWithPayloads.isEmpty(); 975 } 976 } 977 } 978 } catch (Exception e) { 979 isSafe = false; 980 } 981 return isSafe; 982 } 983 984 /** 985 * Rewrite the input file to remove any embedded files that is not embedded using a methods supported by the official format of the file.<br> 986 * Example: a file can be embedded by adding it to the end of the source file, see the reference provided for details. 987 * 988 * @param inputFilePath Filename of the file to clean up. 989 * @param inputFileType Type of the file provided. 990 * @return A array of bytes with the cleaned file. 991 * @throws IllegalArgumentException If an invalid parameter is passed 992 * @throws Exception If any technical error during the cleaning processing 993 * @see "https://www.synacktiv.com/en/publications/persistent-php-payloads-in-pngs-how-to-inject-php-code-in-an-image-and-keep-it-there" 994 * @see "https://github.com/righettod/toolbox-pentest-web/tree/master/misc" 995 * @see "https://github.com/righettod/toolbox-pentest-web?tab=readme-ov-file#misc" 996 * @see "https://stackoverflow.com/a/13605411" 997 */ 998 public static byte[] sanitizeFile(String inputFilePath, InputFileType inputFileType) throws Exception { 999 ByteArrayOutputStream sanitizedContent = new ByteArrayOutputStream(); 1000 File inputFile = new File(inputFilePath); 1001 if (!inputFile.exists() || !inputFile.canRead() || !inputFile.isFile()) { 1002 throw new IllegalArgumentException("Cannot read the content of the input file!"); 1003 } 1004 switch (inputFileType) { 1005 case PDF -> { 1006 try (PDDocument document = Loader.loadPDF(inputFile)) { 1007 document.save(sanitizedContent); 1008 } 1009 } 1010 case IMAGE -> { 1011 // Load the original image 1012 BufferedImage originalImage = ImageIO.read(inputFile); 1013 String originalFormat = identifyMimeType(Files.readAllBytes(inputFile.toPath())).split("/")[1].trim(); 1014 // Check that image has been successfully loaded 1015 if (originalImage == null) { 1016 throw new IOException("Cannot load the original image !"); 1017 } 1018 // Get current Width and Height of the image 1019 int originalWidth = originalImage.getWidth(null); 1020 int originalHeight = originalImage.getHeight(null); 1021 // Resize the image by removing 1px on Width and Height 1022 Image resizedImage = originalImage.getScaledInstance(originalWidth - 1, originalHeight - 1, Image.SCALE_SMOOTH); 1023 // Resize the resized image by adding 1px on Width and Height - In fact set image to is initial size 1024 Image initialSizedImage = resizedImage.getScaledInstance(originalWidth, originalHeight, Image.SCALE_SMOOTH); 1025 // Save image to a bytes buffer 1026 int bufferedImageType = BufferedImage.TYPE_INT_ARGB;//By default use a format supporting transparency 1027 //Sometimes for BMP, the format detected is "bmp; format=compressed" 1028 if ("jpeg".equalsIgnoreCase(originalFormat) || "bmp".equalsIgnoreCase(originalFormat) || originalFormat.startsWith("bmp;")) { 1029 bufferedImageType = BufferedImage.TYPE_INT_RGB; 1030 } 1031 BufferedImage sanitizedImage = new BufferedImage(initialSizedImage.getWidth(null), initialSizedImage.getHeight(null), bufferedImageType); 1032 Graphics2D drawer = sanitizedImage.createGraphics(); 1033 drawer.drawImage(initialSizedImage, 0, 0, null); 1034 drawer.dispose(); 1035 //Handle "bmp; format=compressed" case 1036 String formatToUse = originalFormat; 1037 if (formatToUse.startsWith("bmp;")) { 1038 formatToUse = formatToUse.split(";")[0].trim(); 1039 } 1040 ImageIO.write(sanitizedImage, formatToUse, sanitizedContent); 1041 } 1042 default -> throw new IllegalArgumentException("Type of file not supported !"); 1043 } 1044 if (sanitizedContent.size() == 0) { 1045 throw new IOException("An error occur during the rewrite operation!"); 1046 } 1047 return sanitizedContent.toByteArray(); 1048 } 1049 1050 /** 1051 * Apply a collection of validations on a string expected to be an email address: 1052 * <ul> 1053 * <li>Is a valid email address, from a parser perspective, following RFCs on email addresses.</li> 1054 * <li>Is not using "Encoded-word" format.</li> 1055 * <li>Is not using comment format.</li> 1056 * <li>Is not using "Punycode" format.</li> 1057 * <li>Is not using UUCP style addresses.</li> 1058 * <li>Is not using address literals.</li> 1059 * <li>Is not using source routes.</li> 1060 * <li>Is not using the "percent hack".</li> 1061 * <li>Does not contain newline or carriage-return characters (CRLF injection prevention).</li> 1062 * <li>The domain part contains at least one dot (reject single-label domains such as localhost or internal hostnames).</li> 1063 * <li>The local part is not a quoted string (i.e. not wrapped in double quotes).</li> 1064 * <li>Respect the RFC 5321 length limits: local part ≤ 64 characters, domain ≤ 255 characters, total address ≤ 320 characters.</li> 1065 * </ul><br> 1066 * This is based on the research work from <a href="https://portswigger.net/research/gareth-heyes">Gareth Heyes</a> added in references (Portswigger).<br><br> 1067 * 1068 * <b>Note:</b> The notion of valid, here, is to take from a secure usage of the data perspective. 1069 * 1070 * @param addr String expected to be a valid email address. 1071 * @return True only if the string pass all validations. 1072 * @see "https://commons.apache.org/proper/commons-validator/" 1073 * @see "https://commons.apache.org/proper/commons-validator/apidocs/org/apache/commons/validator/routines/EmailValidator.html" 1074 * @see "https://datatracker.ietf.org/doc/html/rfc2047#section-2" 1075 * @see "https://portswigger.net/research/splitting-the-email-atom" 1076 * @see "https://www.jochentopf.com/email/address.html" 1077 * @see "https://en.wikipedia.org/wiki/Email_address" 1078 */ 1079 public static boolean isEmailAddress(String addr) { 1080 boolean isValid = false; 1081 String work = addr.toLowerCase(Locale.ROOT); 1082 Pattern encodedWordRegex = Pattern.compile("[=?]+", Pattern.CASE_INSENSITIVE); 1083 Pattern forbiddenCharacterRegex = Pattern.compile("[():!%\\[\\],;\"\n\r]+", Pattern.CASE_INSENSITIVE); 1084 try { 1085 //Start with the use of the dedicated EmailValidator from Apache Commons Validator 1086 if (EmailValidator.getInstance(true, true).isValid(work)) { 1087 //If OK then validate it does not contains "Encoded-word" patterns using an aggressive approach 1088 if (!encodedWordRegex.matcher(work).find()) { 1089 //If OK then validate it does not contains punycode 1090 if (!work.contains("xn--")) { 1091 //If OK then validate it does not use: 1092 // UUCP style addresses, 1093 // Comment format, 1094 // Address literals, 1095 // Source routes, 1096 // The percent hack. 1097 if (!forbiddenCharacterRegex.matcher(work).find()) { 1098 //If OK ensure that the domain part contains at least one dot 1099 long arobaseCount = addr.chars().filter(c -> c == '@').count(); 1100 if (arobaseCount == 1) { 1101 String[] parts = addr.split("@"); 1102 String localPart = parts[0]; 1103 String domainPart = parts[1]; 1104 if (domainPart.contains(".")) { 1105 //If OK the check the respect to the RFC 5321 length limits: 1106 // local part ≤ 64 characters, domain ≤ 255 characters, total address ≤ 320 characters. 1107 if (localPart.length() <= 64 && domainPart.length() <= 255 && addr.length() <= 320) { 1108 isValid = true; 1109 } 1110 } 1111 } 1112 } 1113 } 1114 } 1115 1116 } 1117 } catch (Exception e) { 1118 isValid = false; 1119 } 1120 return isValid; 1121 } 1122 1123 /** 1124 * The <a href="https://www.stet.eu/en/psd2/">PSD2 STET</a> specification require to use <a href="https://datatracker.ietf.org/doc/draft-cavage-http-signatures/">HTTP Signature</a>. 1125 * <br> 1126 * Section <b>3.5.1.2</b> of the document <a href="https://www.stet.eu/assets/files/PSD2/1-6-3/api-dsp2-stet-v1.6.3.1-part-1-framework.pdf">Documentation Framework</a> version <b>1.6.3</b>. 1127 * <br> 1128 * The problem is that, by design, the HTTP Signature specification is prone to blind SSRF. 1129 * <br> 1130 * URL example taken from the STET specification: <code>https://path.to/myQsealCertificate_714f8154ec259ac40b8a9786c9908488b2582b68b17e865fede4636d726b709f</code>. 1131 * <br> 1132 * The objective of this code is to try to decrease the "exploitability/interest" of this SSRF for an attacker. 1133 * 1134 * @param certificateUrl Url pointing to a Qualified Certificate (QSealC) encoded in PEM format and respecting the ETSI/TS119495 technical Specification . 1135 * @return TRUE only if the url point to a Qualified Certificate in PEM format. 1136 * @see "https://www.stet.eu/en/psd2/" 1137 * @see "https://www.stet.eu/assets/files/PSD2/1-6-3/api-dsp2-stet-v1.6.3.1-part-1-framework.pdf" 1138 * @see "https://datatracker.ietf.org/doc/draft-cavage-http-signatures/" 1139 * @see "https://datatracker.ietf.org/doc/rfc9421/" 1140 * @see "https://openjdk.org/groups/net/httpclient/intro.html" 1141 * @see "https://docs.oracle.com/en/java/javase/21/docs/api/java.net.http/java/net/http/package-summary.html" 1142 * @see "https://portswigger.net/web-security/ssrf" 1143 * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control" 1144 */ 1145 public static boolean isPSD2StetSafeCertificateURL(String certificateUrl) { 1146 boolean isValid = false; 1147 long connectionTimeoutInSeconds = 10; 1148 String userAgent = "PSD2-STET-HTTPSignature-CertificateRequest"; 1149 try { 1150 //1. Ensure that the URL end with the SHA-256 fingerprint encoded in HEX of the certificate like requested by STET 1151 if (certificateUrl != null && certificateUrl.lastIndexOf("_") != -1) { 1152 String digestPart = certificateUrl.substring(certificateUrl.lastIndexOf("_") + 1); 1153 if (Pattern.matches("^[0-9a-f]{64}$", digestPart)) { 1154 //2. Ensure that the URL is a valid url by creating a instance of the class URI 1155 URI uri = URI.create(certificateUrl); 1156 //3. Require usage of HTTPS and reject any url containing query parameters 1157 if ("https".equalsIgnoreCase(uri.getScheme()) && uri.getQuery() == null) { 1158 //4. Perform a HTTP HEAD request in order to get the content type of the remote resource 1159 //and limit the interest to use the SSRF because to pass the check the url need to: 1160 //- Do not having any query parameters. 1161 //- Use HTTPS protocol. 1162 //- End with a string having the format "_[0-9a-f]{64}". 1163 //- Trigger the malicious action that the attacker want but with a HTTP HEAD without any redirect and parameters. 1164 HttpResponse<String> response; 1165 try (HttpClient client = HttpClient.newBuilder().followRedirects(HttpClient.Redirect.NEVER).build()) { 1166 HttpRequest request = HttpRequest.newBuilder().uri(uri).timeout(Duration.ofSeconds(connectionTimeoutInSeconds)).method("HEAD", HttpRequest.BodyPublishers.noBody()).header("User-Agent", userAgent)//To provide an hint to the target about the initiator of the request 1167 .header("Cache-Control", "no-store, max-age=0")//To prevent caching issues or abuses 1168 .build(); 1169 response = client.send(request, HttpResponse.BodyHandlers.ofString()); 1170 if (response.statusCode() == 200) { 1171 //5. Ensure that the response content type is "text/plain" 1172 Optional<String> contentType = response.headers().firstValue("Content-Type"); 1173 isValid = (contentType.isPresent() && contentType.get().trim().toLowerCase(Locale.ENGLISH).startsWith("text/plain")); 1174 } 1175 } 1176 } 1177 } 1178 } 1179 } catch (Exception e) { 1180 isValid = false; 1181 } 1182 return isValid; 1183 } 1184 1185 /** 1186 * Perform sequential URL decoding operations against a URL encoded data until the data is not URL encoded anymore or if the specified threshold is reached. 1187 * 1188 * @param encodedData URL encoded data. 1189 * @param decodingRoundThreshold Threshold above which decoding will fail. 1190 * @return The decoded data. 1191 * @throws SecurityException If the threshold is reached. 1192 * @see "https://en.wikipedia.org/wiki/Percent-encoding" 1193 * @see "https://owasp.org/www-community/Double_Encoding" 1194 * @see "https://portswigger.net/web-security/essential-skills/obfuscating-attacks-using-encodings" 1195 * @see "https://capec.mitre.org/data/definitions/120.html" 1196 */ 1197 public static String applyURLDecoding(String encodedData, int decodingRoundThreshold) throws SecurityException { 1198 if (decodingRoundThreshold < 1) { 1199 throw new IllegalArgumentException("Threshold must be a positive number !"); 1200 } 1201 if (encodedData == null) { 1202 throw new IllegalArgumentException("Data provided must not be null !"); 1203 } 1204 Charset charset = StandardCharsets.UTF_8; 1205 int currentDecodingRound = 0; 1206 boolean isFinished = false; 1207 String currentRoundData = encodedData; 1208 String previousRoundData = encodedData; 1209 while (!isFinished) { 1210 if (currentDecodingRound > decodingRoundThreshold) { 1211 throw new SecurityException(String.format("Decoding round threshold of %s reached!", decodingRoundThreshold)); 1212 } 1213 currentRoundData = URLDecoder.decode(currentRoundData, charset); 1214 isFinished = currentRoundData.equals(previousRoundData); 1215 previousRoundData = currentRoundData; 1216 currentDecodingRound++; 1217 } 1218 return currentRoundData; 1219 } 1220 1221 /** 1222 * Apply a collection of validations on a string expected to be an system file/folder path: 1223 * <ul> 1224 * <li>Does not contains path traversal payload.</li> 1225 * <li>The canonical path is equals to the absolute path.</li> 1226 * </ul><br> 1227 * 1228 * @param path String expected to be a valid system file/folder path. 1229 * @return True only if the string pass all validations. 1230 * @see "https://portswigger.net/web-security/file-path-traversal" 1231 * @see "https://learn.snyk.io/lesson/directory-traversal/" 1232 * @see "https://capec.mitre.org/data/definitions/126.html" 1233 * @see "https://owasp.org/www-community/attacks/Path_Traversal" 1234 */ 1235 public static boolean isPathSafe(String path) { 1236 boolean isSafe = false; 1237 int decodingRoundThreshold = 3; 1238 try { 1239 if (path != null && !path.isEmpty()) { 1240 //URL decode the path if case of data coming from a web context 1241 String decodedPath = applyURLDecoding(path, decodingRoundThreshold); 1242 //Ensure that no path traversal expression is present 1243 if (!decodedPath.contains("..")) { 1244 File f = new File(decodedPath); 1245 String canonicalPath = f.getCanonicalPath(); 1246 String absolutePath = f.getAbsolutePath(); 1247 isSafe = canonicalPath.equals(absolutePath); 1248 } 1249 } 1250 } catch (Exception e) { 1251 isSafe = false; 1252 } 1253 return isSafe; 1254 } 1255 1256 /** 1257 * Identify if an XML contains any XML comments or have any XSL processing instructions.<br> 1258 * Stream reader based parsing is used to support large XML tree. 1259 * 1260 * @param xmlFilePath Filename of the XML file to check. 1261 * @return True only if XML comments or XSL processing instructions are identified. 1262 * @see "https://www.tutorialspoint.com/xml/xml_processing.htm" 1263 * @see "https://docs.oracle.com/en/java/javase/21/docs/api/java.xml/javax/xml/stream/XMLInputFactory.html" 1264 * @see "https://portswigger.net/kb/issues/00400700_xml-entity-expansion" 1265 * @see "https://www.w3.org/Style/styling-XML.en.html" 1266 */ 1267 public static boolean isXMLHaveCommentsOrXSLProcessingInstructions(String xmlFilePath) { 1268 boolean itemsDetected = false; 1269 try { 1270 //Ensure that the parser will not be prone XML external entity (XXE) injection or XML entity expansion (XEE) attacks 1271 XMLInputFactory xmlInputFactory = XMLInputFactory.newFactory(); 1272 xmlInputFactory.setProperty(XMLInputFactory.SUPPORT_DTD, false); 1273 xmlInputFactory.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, ""); 1274 xmlInputFactory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, false); 1275 xmlInputFactory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false); 1276 1277 //Parse file 1278 try (FileInputStream fis = new FileInputStream(xmlFilePath)) { 1279 XMLStreamReader reader = xmlInputFactory.createXMLStreamReader(fis); 1280 int eventType; 1281 while (reader.hasNext() && !itemsDetected) { 1282 eventType = reader.next(); 1283 if (eventType == XMLEvent.COMMENT) { 1284 itemsDetected = true; 1285 } else if (eventType == XMLEvent.PROCESSING_INSTRUCTION && "xml-stylesheet".equalsIgnoreCase(reader.getPITarget())) { 1286 itemsDetected = true; 1287 } 1288 } 1289 } 1290 } catch (Exception e) { 1291 //In case of error then assume that the check failed 1292 itemsDetected = true; 1293 } 1294 return itemsDetected; 1295 } 1296 1297 1298 /** 1299 * Perform a set of additional validations against a JWT token: 1300 * <ul> 1301 * <li>Do not use the <b>NONE</b> signature algorithm.</li> 1302 * <li>Have a <a href="https://www.iana.org/assignments/jwt/jwt.xhtml">EXP claim</a> defined.</li> 1303 * <li>The token identifier (<a href="https://www.iana.org/assignments/jwt/jwt.xhtml">JTI claim</a>) is NOT part of the list of revoked token.</li> 1304 * <li>Match the expected type of token: ACCESS or ID or REFRESH.</li> 1305 * </ul> 1306 * 1307 * @param token JWT token for which <b>signature was already validated</b> and on which a set of additional validations will be applied. 1308 * @param expectedTokenType The type of expected token using the enumeration provided. 1309 * @param revokedTokenJTIList A list of token identifier (<b>JTI</b> claim) referring to tokens that were revoked and to which the JTI claim of the token will be compared to. 1310 * @return True only the token pass all the validations. 1311 * @see "https://www.iana.org/assignments/jwt/jwt.xhtml" 1312 * @see "https://auth0.com/docs/secure/tokens/access-tokens" 1313 * @see "https://auth0.com/docs/secure/tokens/id-tokens" 1314 * @see "https://auth0.com/docs/secure/tokens/refresh-tokens" 1315 * @see "https://auth0.com/blog/id-token-access-token-what-is-the-difference/" 1316 * @see "https://jwt.io/libraries?language=Java" 1317 * @see "https://pentesterlab.com/blog/secure-jwt-library-design" 1318 * @see "https://github.com/auth0/java-jwt" 1319 */ 1320 public static boolean applyJWTExtraValidation(DecodedJWT token, TokenType expectedTokenType, List<String> revokedTokenJTIList) { 1321 boolean isValid = false; 1322 TokenType tokenType; 1323 try { 1324 if (!"none".equalsIgnoreCase(token.getAlgorithm().trim())) { 1325 if (!token.getClaim("exp").isMissing() && token.getExpiresAt() != null) { 1326 String jti = token.getId(); 1327 if (jti != null && !jti.trim().isEmpty()) { 1328 boolean jtiIsRevoked = revokedTokenJTIList.stream().anyMatch(jti::equalsIgnoreCase); 1329 if (!jtiIsRevoked) { 1330 //Determine the token type based on the presence of specifics claims 1331 if (!token.getClaim("scope").isMissing()) { 1332 tokenType = TokenType.ACCESS; 1333 } else if (!token.getClaim("name").isMissing() || !token.getClaim("email").isMissing()) { 1334 tokenType = TokenType.ID; 1335 } else { 1336 tokenType = TokenType.REFRESH; 1337 } 1338 isValid = (tokenType.equals(expectedTokenType)); 1339 } 1340 } 1341 } 1342 } 1343 1344 } catch (Exception e) { 1345 //In case of error then assume that the check failed 1346 isValid = false; 1347 } 1348 return isValid; 1349 } 1350 1351 /** 1352 * Apply a validations on a regular expression to ensure that is not prone to the ReDOS attack. 1353 * <br>If your technology is supported by <a href="https://github.com/doyensec/regexploit">regexploit</a> then <b>use it instead of this method!</b> 1354 * <br>Indeed, the <a href="https://www.doyensec.com/">Doyensec</a> team has made an intensive and amazing work on this topic and created this effective tool. 1355 * 1356 * @param regex String expected to be a valid regular expression (regex). 1357 * @param data Test data on which the regular expression is executed for the test. 1358 * @param maximumRunningTimeInSeconds Optional parameter to specify a number of seconds above which a regex execution time is considered as not safe (default to 4 seconds when not specified). 1359 * @return True only if the string pass all validations. 1360 * @see "https://github.blog/security/how-to-fix-a-redos/" 1361 * @see "https://learn.snyk.io/lesson/redos" 1362 * @see "https://rules.sonarsource.com/java/RSPEC-2631/" 1363 * @see "https://github.com/doyensec/regexploit" 1364 * @see "https://github.com/makenowjust-labs/recheck" 1365 * @see "https://github.com/tjenkinson/redos-detector" 1366 * @see "https://wiki.owasp.org/images/2/23/OWASP_IL_2009_ReDoS.pdf" 1367 * @see "https://owasp.org/www-community/attacks/Regular_expression_Denial_of_Service_-_ReDoS" 1368 */ 1369 public static boolean isRegexSafe(String regex, String data, Optional<Integer> maximumRunningTimeInSeconds) { 1370 Objects.requireNonNull(maximumRunningTimeInSeconds, "Use 'Optional.empty()' to leverage the default value."); 1371 Objects.requireNonNull(data, "A sample data is needed to perform the test."); 1372 Objects.requireNonNull(regex, "A regular expression is needed to perform the test."); 1373 boolean isSafe = false; 1374 int executionTimeout = maximumRunningTimeInSeconds.orElse(4); 1375 ExecutorService executor = Executors.newSingleThreadExecutor(); 1376 try { 1377 Callable<Boolean> task = () -> { 1378 Pattern pattern = Pattern.compile(regex); 1379 return pattern.matcher(data).matches(); 1380 }; 1381 List<Future<Boolean>> tasks = executor.invokeAll(List.of(task), executionTimeout, TimeUnit.SECONDS); 1382 if (!tasks.getFirst().isCancelled()) { 1383 isSafe = true; 1384 } 1385 } catch (Exception e) { 1386 isSafe = false; 1387 } finally { 1388 executor.shutdownNow(); 1389 } 1390 return isSafe; 1391 } 1392 1393 /** 1394 * Compute a UUID version 7 without using any external dependency.<br><br> 1395 * <b>Below are my personal point of view and perhaps I'm totally wrong!</b> 1396 * <br><br> 1397 * Why such method? 1398 * <ul> 1399 * <li>Java inferior or equals to 21 does not supports natively the generation of an UUID version 7.</li> 1400 * <li>Import a library just to generate such value is overkill for me.</li> 1401 * <li>Library that I have found, generating such version of an UUID, are not provided by entities commonly used in the java world, such as the SPRING framework provider.</li> 1402 * </ul> 1403 * <br> 1404 * <b>Full credits for this implementation goes to the authors and contributors of the <a href="https://github.com/nalgeon/uuidv7">UUIDv7</a> project.</b> 1405 * <br><br> 1406 * Below are the java libraries that I have found but, for which, I do not trust enough the provider to use them directly: 1407 * <ul> 1408 * <li><a href="https://github.com/cowtowncoder/java-uuid-generator">java-uuid-generator</a></li> 1409 * <li><a href="https://github.com/f4b6a3/uuid-creator">uuid-creator</a></li> 1410 * </ul> 1411 * 1412 * @return A UUID object representing the UUID v7. 1413 * @see "https://uuid7.com/" 1414 * @see "https://antonz.org/uuidv7/" 1415 * @see "https://mccue.dev/pages/3-11-25-life-altering-postgresql-patterns" 1416 * @see "https://www.ietf.org/archive/id/draft-peabody-dispatch-new-uuid-format-04.html#name-uuid-version-7" 1417 * @see "https://www.baeldung.com/java-generating-time-based-uuids" 1418 * @see "https://en.wikipedia.org/wiki/Universally_unique_identifier" 1419 * @see "https://buildkite.com/resources/blog/goodbye-integers-hello-uuids/" 1420 */ 1421 public static UUID computeUUIDv7() { 1422 SecureRandom secureRandom = new SecureRandom(); 1423 // Generate truly random bytes 1424 byte[] value = new byte[16]; 1425 secureRandom.nextBytes(value); 1426 // Get current timestamp in milliseconds 1427 ByteBuffer timestamp = ByteBuffer.allocate(Long.BYTES); 1428 timestamp.putLong(System.currentTimeMillis()); 1429 // Create the TIMESTAMP part of the UUID 1430 System.arraycopy(timestamp.array(), 2, value, 0, 6); 1431 // Create the VERSION and the VARIANT parts of the UUID 1432 value[6] = (byte) ((value[6] & 0x0F) | 0x70); 1433 value[8] = (byte) ((value[8] & 0x3F) | 0x80); 1434 //Create the HIGH and LOW parts of the UUID 1435 ByteBuffer buf = ByteBuffer.wrap(value); 1436 long high = buf.getLong(); 1437 long low = buf.getLong(); 1438 //Create and return the UUID object 1439 UUID uuidv7 = new UUID(high, low); 1440 return uuidv7; 1441 } 1442 1443 /** 1444 * Ensure that an XSD file does not contain any include/import/redefine instruction (prevent exposure to SSRF). 1445 * 1446 * @param xsdFilePath Filename of the XSD file to check. 1447 * @return True only if the file pass all validations. 1448 * @see "https://portswigger.net/web-security/ssrf" 1449 * @see "https://www.w3schools.com/Xml/el_import.asp" 1450 * @see "https://www.w3schools.com/xml/el_include.asp" 1451 * @see "https://www.linkedin.com/posts/righettod_appsec-appsecurity-java-activity-7344048434326188053-6Ru9" 1452 * @see "https://docs.oracle.com/en/java/javase/21/docs/api/java.xml/javax/xml/validation/SchemaFactory.html#setProperty(java.lang.String,java.lang.Object)" 1453 */ 1454 public static boolean isXSDSafe(String xsdFilePath) { 1455 boolean isSafe = false; 1456 try { 1457 File xsdFile = new File(xsdFilePath); 1458 if (xsdFile.exists() && xsdFile.canRead() && xsdFile.isFile()) { 1459 //Parse the XSD file, if an exception occur then it's imply that the XSD specified is not a valid ones 1460 //Create an schema factory throwing Exception if a external schema is specified 1461 SchemaFactory schemaFactory = SchemaFactory.newDefaultInstance(); 1462 schemaFactory.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, ""); 1463 schemaFactory.setProperty(XMLConstants.ACCESS_EXTERNAL_SCHEMA, ""); 1464 //Parse the schema 1465 Schema schema = schemaFactory.newSchema(xsdFile); 1466 isSafe = (schema != null); 1467 } 1468 } catch (Exception e) { 1469 isSafe = false; 1470 } 1471 return isSafe; 1472 } 1473 1474 1475 /** 1476 * Extract all sensitive information from a string provided.<br> 1477 * This can be used to identify any sensitive information into a <a href="https://cwe.mitre.org/data/definitions/532.html">message expected to be written in a log</a> and then replace every sensitive values by an obfuscated ones.<br><br> 1478 * For the luxembourg national identification number, this method focus on detecting identifiers for a physical entity (people) and not a moral one (company).<br><br> 1479 * I delegated the validation of the IBAN to a dedicated library (<a href="https://github.com/arturmkrtchyan/iban4j">iban4j</a>) to not "reinvent the wheel" and then introduce buggy validation myself. I used <b>iban4j</b> over the <b><a href="https://commons.apache.org/proper/commons-validator/apidocs/org/apache/commons/validator/routines/IBANValidator.html">IBANValidator</a></b> class from the <a href="https://commons.apache.org/proper/commons-validator/"><b>Apache Commons Validator</b></a> library because <b>iban4j</b> perform a full official IBAN specification validation so its reduce risks of false-positives by ensuring that an IBAN detected is a real IBAN.<br><br> 1480 * Same thing and reason regarding the validation of the bank card PAN using the class <a href="https://commons.apache.org/proper/commons-validator/apidocs/org/apache/commons/validator/routines/CreditCardValidator.html">CreditCardValidator</a> from the <b>Apache Commons Validator</b> library. 1481 * 1482 * @param content String in which sensitive information must be searched. 1483 * @return A map with the collection of identified sensitive information gathered by sensitive information type. If nothing is found then the map is empty. A type of sensitive information is only present if there is at least one item found. A set is used to not store duplicates occurrence of the same sensitive information. 1484 * @throws Exception If any error occurs during the processing. 1485 * @see "https://guichet.public.lu/en/citoyens/citoyennete/registre-national/identification/demande-numero-rnpp.html" 1486 * @see "https://cnpd.public.lu/fr/decisions-avis/2009/identifiant-unique.html" 1487 * @see "https://cnpd.public.lu/content/dam/cnpd/fr/decisions-avis/2009/identifiant-unique/48_2009.pdf" 1488 * @see "https://en.wikipedia.org/wiki/International_Bank_Account_Number" 1489 * @see "https://www.iban.com/structure" 1490 * @see "https://github.com/arturmkrtchyan/iban4j" 1491 * @see "https://cwe.mitre.org/data/definitions/532.html" 1492 * @see "https://www.baeldung.com/logback-mask-sensitive-data" 1493 * @see "https://en.wikipedia.org/wiki/Payment_card_number" 1494 * @see "https://commons.apache.org/proper/commons-validator/apidocs/org/apache/commons/validator/routines/CreditCardValidator.html" 1495 * @see "https://commons.apache.org/proper/commons-validator/" 1496 */ 1497 public static Map<SensitiveInformationType, Set<String>> extractAllSensitiveInformation(String content) throws Exception { 1498 CreditCardValidator creditCardValidator = CreditCardValidator.genericCreditCardValidator(); 1499 Pattern nationalIdentifierRegex = Pattern.compile("([0-9]{13})"); 1500 Pattern ibanNonHumanFormattedRegex = Pattern.compile("([A-Z]{2}[0-9]{2}[A-Z0-9]{11,30})", Pattern.CASE_INSENSITIVE); 1501 Pattern ibanHumanFormattedRegex = Pattern.compile("([A-Z]{2}[0-9]{2}(?:\\s[A-Z0-9]{4}){2,7}\\s[A-Z0-9]{1,4})", Pattern.CASE_INSENSITIVE); 1502 Pattern panRegex = Pattern.compile("((?:\\d[ -]*?){13,19})"); 1503 Map<SensitiveInformationType, Set<String>> data = new HashMap<>(); 1504 data.put(SensitiveInformationType.LUXEMBOURG_NATIONAL_IDENTIFICATION_NUMBER, new HashSet<>()); 1505 data.put(SensitiveInformationType.IBAN, new HashSet<>()); 1506 data.put(SensitiveInformationType.BANK_CARD_PAN, new HashSet<>()); 1507 1508 if (content != null && !content.isBlank()) { 1509 /* Step 1: Search for LU national identifier */ 1510 //A national identifier have the following structure: [BIRTHDATE_YEAR_YYYY][BIRTHDATE_MONTH_MM][BIRTHDATE_DAY_DD][FIVE_INTEGER] 1511 //Define minimal and maximal birth year base on current year 1512 //Assume people live less than 120 years 1513 int maxBirthYear = LocalDate.now(ZoneId.of("Europe/Luxembourg")).getYear(); 1514 int minBirthYear = maxBirthYear - 120; 1515 Matcher matcher = nationalIdentifierRegex.matcher(content); 1516 String nationalIdentierFull; 1517 int nationalIdentierYear, nationalIdentierMonth, nationalIdentierDay; 1518 while (matcher.find()) { 1519 nationalIdentierFull = matcher.group(1); 1520 //Check that the string is a valid national identifier and if yes then add it 1521 nationalIdentierYear = Integer.parseInt(nationalIdentierFull.substring(0, 4)); 1522 nationalIdentierMonth = Integer.parseInt(nationalIdentierFull.substring(4, 6)); 1523 nationalIdentierDay = Integer.parseInt(nationalIdentierFull.substring(6, 8)); 1524 if (nationalIdentierYear >= minBirthYear && nationalIdentierYear <= maxBirthYear) { 1525 if (nationalIdentierMonth >= 1 && nationalIdentierMonth <= 12) { 1526 if (YearMonth.of(nationalIdentierYear, nationalIdentierMonth).isValidDay(nationalIdentierDay)) { 1527 data.get(SensitiveInformationType.LUXEMBOURG_NATIONAL_IDENTIFICATION_NUMBER).add(nationalIdentierFull); 1528 } 1529 } 1530 } 1531 } 1532 1533 /* Step 2a: Search for IBAN that are non human formatted */ 1534 matcher = ibanNonHumanFormattedRegex.matcher(content); 1535 String iban, ibanUpperCased; 1536 while (matcher.find()) { 1537 iban = matcher.group(1); 1538 ibanUpperCased = iban.toUpperCase(Locale.ROOT); 1539 //Check that the string is a valid IBAN and if yes then add it 1540 if (IbanUtil.isValid(ibanUpperCased)) { 1541 data.get(SensitiveInformationType.IBAN).add(iban); 1542 } 1543 } 1544 1545 /* Step 2b: Search for IBAN that are human formatted */ 1546 matcher = ibanHumanFormattedRegex.matcher(content); 1547 String ibanUpperCasedNoSpace; 1548 while (matcher.find()) { 1549 iban = matcher.group(1); 1550 ibanUpperCasedNoSpace = iban.toUpperCase(Locale.ROOT).replace(" ", ""); 1551 //Check that the string is a valid IBAN and if yes then add it 1552 if (IbanUtil.isValid(ibanUpperCasedNoSpace)) { 1553 data.get(SensitiveInformationType.IBAN).add(iban); 1554 } 1555 } 1556 1557 /* Step 3: Search for bank card PAN */ 1558 matcher = panRegex.matcher(content); 1559 String pan, panNoSeparator; 1560 while (matcher.find()) { 1561 pan = matcher.group(1); 1562 panNoSeparator = pan.toUpperCase(Locale.ROOT).replace(" ", "").replace("-", ""); 1563 //Check that the string is a valid PAN and if yes then add it 1564 if (creditCardValidator.isValid(panNoSeparator)) { 1565 data.get(SensitiveInformationType.BANK_CARD_PAN).add(pan); 1566 } 1567 } 1568 1569 } 1570 1571 //Cleanup if a set is empty 1572 if (data.get(SensitiveInformationType.LUXEMBOURG_NATIONAL_IDENTIFICATION_NUMBER).isEmpty()) { 1573 data.remove(SensitiveInformationType.LUXEMBOURG_NATIONAL_IDENTIFICATION_NUMBER); 1574 } 1575 if (data.get(SensitiveInformationType.IBAN).isEmpty()) { 1576 data.remove(SensitiveInformationType.IBAN); 1577 } 1578 if (data.get(SensitiveInformationType.BANK_CARD_PAN).isEmpty()) { 1579 data.remove(SensitiveInformationType.BANK_CARD_PAN); 1580 } 1581 1582 return data; 1583 } 1584 1585 /** 1586 * Apply a collection of validations on a bytes array provided representing GZIP compressed data: 1587 * <ul> 1588 * <li>Are valid GZIP compressed data.</li> 1589 * <li>The number of bytes once decompressed is under the specified limit.</li> 1590 * </ul> 1591 * <br><b>Note:</b> The value <code>Integer.MAX_VALUE - 8</code> was chosen because during my tests on Java 25 (JDK 64 bits on Windows 11 Pro), it was possible to decompress such amount of data with the default JVM settings without causing an <a href="https://docs.oracle.com/en/java/javase/25/docs/api//java.base/java/lang/OutOfMemoryError.html">Out Of Memory error</a>. 1592 * 1593 * @param compressedBytes Array of bytes containing the GZIP compressed data to check. 1594 * @param maxCountOfDecompressedBytesAllowed Maximum number of decompressed bytes allowed. Default to 10 MB if the specified value is inferior to 1 or superior to Integer.MAX_VALUE - 8. 1595 * @return True only if the file pass all validations. 1596 * @see "https://en.wikipedia.org/wiki/Gzip" 1597 * @see "https://www.rapid7.com/db/modules/auxiliary/dos/http/gzip_bomb_dos/" 1598 */ 1599 public static boolean isGZIPCompressedDataSafe(byte[] compressedBytes, long maxCountOfDecompressedBytesAllowed) { 1600 boolean isSafe = false; 1601 1602 try { 1603 long limit = maxCountOfDecompressedBytesAllowed; 1604 long totalRead = 0L; 1605 byte[] buffer = new byte[8 * 1024]; 1606 int read; 1607 if (limit < 1 || limit > (Integer.MAX_VALUE - 8)) { 1608 limit = 10_000_000; 1609 } 1610 try (ByteArrayInputStream bis = new ByteArrayInputStream(compressedBytes); GZIPInputStream gzipInputStream = new GZIPInputStream(new BufferedInputStream(bis))) { 1611 while ((read = gzipInputStream.read(buffer)) != -1) { 1612 totalRead += read; 1613 if (totalRead > limit) { 1614 throw new Exception(); 1615 } 1616 } 1617 } 1618 isSafe = true; 1619 } catch (Exception e) { 1620 isSafe = false; 1621 } 1622 1623 return isSafe; 1624 } 1625 1626 /** 1627 * Process a string, intended to be written in a log, to remove as much as possible information that can lead to an exposure to a log injection vulnerability.<br><br> 1628 * <b>Log injection</b> is also called <b>log forging</b>.<br><br> 1629 * The following information are removed: 1630 * <ul> 1631 * <li>Characters: Carriage Return (CR), Linefeed (LF) and Tabulation (TAB).</li> 1632 * <li>Characters: Unicode LINE SEPARATOR and Unicode PARAGRAPH SEPARATOR.</li> 1633 * <li>Characters: CSI sequences and bare ESC.</li> 1634 * <li>Leading and trailing spaces.</li> 1635 * <li>Any HTML tags.</li> 1636 * </ul><br> 1637 * A parameter is also used to limit the maximum length of the sanitized message. 1638 * To remove any HTML tags, the OWASP project <a href="https://owasp.org/www-project-java-html-sanitizer/">Java HTML Sanitizer</a> is leveraged.<br> 1639 * I delegated such removal to a dedicated library to prevent missing of edge cases as well as potential bypasses. 1640 * 1641 * @param message The original string message intended to be written in a log. 1642 * @param maxMessageLength The maximum number of characters after which the sanitized message must be truncated. If inferior to 1 then default to the value of 500. 1643 * @return The string message cleaned. 1644 * @see "https://www.wallarm.com/what/log-forging-attack" 1645 * @see "https://www.invicti.com/learn/crlf-injection" 1646 * @see "https://knowledge-base.secureflag.com/vulnerabilities/inadequate_input_validation/log_injection_vulnerability.html" 1647 * @see "https://capec.mitre.org/data/definitions/93.html" 1648 * @see "https://codeql.github.com/codeql-query-help/javascript/js-log-injection/" 1649 * @see "https://owasp.org/www-project-java-html-sanitizer/" 1650 * @see "https://github.com/OWASP/java-html-sanitizer" 1651 */ 1652 public static String sanitizeLogMessage(String message, int maxMessageLength) { 1653 String sanitized = message; 1654 int maxSanitizedMessageLength = maxMessageLength; 1655 1656 if (sanitized != null && !sanitized.isBlank()) { 1657 if (maxSanitizedMessageLength < 1) { 1658 maxSanitizedMessageLength = 500; 1659 } 1660 //Step 1: Remove any CR/LR/TAB characters as well as leading and trailing spaces 1661 sanitized = sanitized.replaceAll("[\\n\\r\\t]", "").trim(); 1662 //Step 2: Remove any Unicode LINE SEPARATOR or Unicode PARAGRAPH SEPARATOR as well as leading and trailing spaces 1663 sanitized = sanitized.replace("\u2028", "").replace("\u2029", "").trim(); 1664 //Step 3: Remove ANSI escape sequences as well as leading and trailing spaces 1665 sanitized = sanitized.replaceAll("\u001B\\[[\\d;]*[a-zA-Z]", "").replace("\u001B", "").trim(); 1666 //Step 4: Remove any HTML tags 1667 PolicyFactory htmlSanitizerPolicy = new HtmlPolicyBuilder().toFactory(); 1668 sanitized = htmlSanitizerPolicy.sanitize(sanitized); 1669 //Step 5: Truncate the string in case of need 1670 if (sanitized.length() > maxSanitizedMessageLength) { 1671 sanitized = sanitized.substring(0, maxSanitizedMessageLength); 1672 } 1673 } 1674 1675 return sanitized; 1676 } 1677 1678 /** 1679 * Identify if an XML is an SVG image.<br> 1680 * The goal of this method is to prevent to leverage SVG, as an vector, to achieve a XSS when XML format is accepted.<br> 1681 * Leverage <a href="https://xmlgraphics.apache.org/batik/">Apache Batik</a> to delegate the parsing and support for the SVG format.<br><br> 1682 * <b>Due to the intended usage of the method, the following choice were made:</b> 1683 * <ul> 1684 * <li>Raise an exception when a non SVG related external references is identified.</li> 1685 * <li>Throw any exception that can occur if the provided content is invalid like for example an invalid XML file or a non existing file.</li> 1686 * <li>Explicitly check the XML prior to pass it to Batik even if Batik seems not prone to XXE/SSRF classes of vulnerability.</li> 1687 * </ul> 1688 * 1689 * @param xmlFilePath Filename of the XML file to check. 1690 * @return True only if XML is an valid SVG image. 1691 * @throws SecurityException If a non SVG external references is detected into the XML content. 1692 * @throws Exception If a error occur due to an invalid content provided. 1693 * @see "https://developer.mozilla.org/en-US/docs/Web/SVG" 1694 * @see "https://www.fortinet.com/blog/threat-research/scalable-vector-graphics-attack-surface-anatomy" 1695 * @see "https://portswigger.net/web-security/cross-site-scripting" 1696 * @see "https://xmlgraphics.apache.org/batik/" 1697 * @see "https://github.com/apache/xmlgraphics-batik/blob/main/batik-dom/src/main/java/org/apache/batik/dom/util/SAXDocumentFactory.java#L420" 1698 * @see "https://mvnrepository.com/artifact/org.apache.xmlgraphics/batik-dom" 1699 * @see "https://mvnrepository.com/artifact/org.apache.xmlgraphics/batik-anim" 1700 * @see "https://portswigger.net/web-security/xxe" 1701 * @see "https://portswigger.net/web-security/ssrf" 1702 */ 1703 public static boolean isXMLSVGImage(String xmlFilePath) throws Exception { 1704 boolean isSvg = true; 1705 List<String> svgValidSystemIDs = List.of("http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd", "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-basic.dtd", "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-tiny.dtd", "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd"); 1706 1707 //Load the XML content into a reader 1708 String xmlContent = Files.readString(Paths.get(xmlFilePath)); 1709 //Then ensure that the XML document does not contains any non SVG external references 1710 try (Reader reader = StringReader.of(xmlContent)) { 1711 DocumentBuilderFactory xmlFactory = DocumentBuilderFactory.newInstance(); 1712 DocumentBuilder docBuilder = xmlFactory.newDocumentBuilder(); 1713 docBuilder.setEntityResolver((publicId, systemId) -> { 1714 if (systemId != null && !svgValidSystemIDs.contains(systemId)) { 1715 throw new SecurityException("External references detected: " + systemId); 1716 } 1717 return new InputSource(new ByteArrayInputStream("".getBytes())); 1718 }); 1719 docBuilder.parse(new InputSource(reader)); 1720 } 1721 //Then parse the XML with Apache Batik 1722 try (Reader reader = StringReader.of(xmlContent)) { 1723 //Method SAXDocumentFactory.createDocument() do not load external DTD or entities. 1724 String parserClassName = XMLResourceDescriptor.getXMLParserClassName(); 1725 SAXSVGDocumentFactory svgFactory = new SAXSVGDocumentFactory(parserClassName); 1726 //Method svgFactory.createSVGDocument() raise an IO exception if the XML is not a valid SVG image 1727 try { 1728 SVGDocument doc = svgFactory.createSVGDocument(null, reader); 1729 isSvg = (doc != null && doc.getRootElement() != null); 1730 } catch (IOException e) { 1731 isSvg = false; 1732 } 1733 } 1734 1735 return isSvg; 1736 } 1737}