001package eu.righettod; 002 003 004import com.auth0.jwt.interfaces.DecodedJWT; 005import org.apache.commons.csv.CSVFormat; 006import org.apache.commons.csv.CSVRecord; 007import org.apache.commons.imaging.ImageInfo; 008import org.apache.commons.imaging.Imaging; 009import org.apache.commons.imaging.common.ImageMetadata; 010import org.apache.commons.validator.routines.CreditCardValidator; 011import org.apache.commons.validator.routines.EmailValidator; 012import org.apache.commons.validator.routines.InetAddressValidator; 013import org.apache.pdfbox.Loader; 014import org.apache.pdfbox.pdmodel.PDDocument; 015import org.apache.pdfbox.pdmodel.PDDocumentCatalog; 016import org.apache.pdfbox.pdmodel.PDDocumentInformation; 017import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary; 018import org.apache.pdfbox.pdmodel.common.PDMetadata; 019import org.apache.pdfbox.pdmodel.interactive.action.*; 020import org.apache.pdfbox.pdmodel.interactive.annotation.AnnotationFilter; 021import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; 022import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationLink; 023import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm; 024import org.apache.poi.poifs.filesystem.DirectoryEntry; 025import org.apache.poi.poifs.filesystem.POIFSFileSystem; 026import org.apache.poi.poifs.macros.VBAMacroReader; 027import org.apache.tika.detect.DefaultDetector; 028import org.apache.tika.detect.Detector; 029import org.apache.tika.io.TemporaryResources; 030import org.apache.tika.io.TikaInputStream; 031import org.apache.tika.metadata.Metadata; 032import org.apache.tika.mime.MediaType; 033import org.apache.tika.mime.MimeTypes; 034import org.iban4j.IbanUtil; 035import org.owasp.html.HtmlPolicyBuilder; 036import org.owasp.html.PolicyFactory; 037import org.w3c.dom.Document; 038import org.xml.sax.EntityResolver; 039import org.xml.sax.InputSource; 040import org.xml.sax.SAXException; 041 042import javax.crypto.Mac; 043import javax.crypto.spec.SecretKeySpec; 044import javax.imageio.ImageIO; 045import javax.json.Json; 046import javax.json.JsonReader; 047import javax.xml.XMLConstants; 048import javax.xml.parsers.DocumentBuilder; 049import javax.xml.parsers.DocumentBuilderFactory; 050import javax.xml.parsers.ParserConfigurationException; 051import javax.xml.stream.XMLInputFactory; 052import javax.xml.stream.XMLStreamReader; 053import javax.xml.stream.events.XMLEvent; 054import javax.xml.validation.Schema; 055import javax.xml.validation.SchemaFactory; 056import java.awt.*; 057import java.awt.image.BufferedImage; 058import java.io.*; 059import java.net.*; 060import java.net.http.HttpClient; 061import java.net.http.HttpRequest; 062import java.net.http.HttpResponse; 063import java.nio.ByteBuffer; 064import java.nio.charset.Charset; 065import java.nio.charset.StandardCharsets; 066import java.nio.file.Files; 067import java.security.MessageDigest; 068import java.security.SecureRandom; 069import java.time.Duration; 070import java.time.LocalDate; 071import java.time.YearMonth; 072import java.time.ZoneId; 073import java.util.*; 074import java.util.List; 075import java.util.concurrent.*; 076import java.util.concurrent.atomic.AtomicInteger; 077import java.util.regex.Matcher; 078import java.util.regex.Pattern; 079import java.util.zip.GZIPInputStream; 080import java.util.zip.ZipEntry; 081import java.util.zip.ZipFile; 082 083/** 084 * Provides different utilities methods to apply processing from a security perspective.<br> 085 * These code snippet: 086 * <ul> 087 * <li>Can be used, as "foundation", to customize the validation to the app context.</li> 088 * <li>Were implemented in a way to facilitate adding or removal of validations depending on usage context.</li> 089 * <li>Were centralized on one class to be able to enhance them across time as well as <a href="https://github.com/righettod/code-snippets-security-utils/issues">missing case/bug identification</a>.</li> 090 * </ul> 091 * <br> 092 * <a href="https://github.com/righettod/code-snippets-security-utils">GitHub repository</a>.<br><br> 093 * <a href="https://github.com/righettod/code-snippets-security-utils/blob/main/src/main/java/eu/righettod/SecurityUtils.java">Source code of the class</a>. 094 */ 095public class SecurityUtils { 096 /** 097 * Default constructor: Not needed as the class only provides static methods. 098 */ 099 private SecurityUtils() { 100 } 101 102 /** 103 * Apply a collection of validation to verify if a provided PIN code is considered weak (easy to guess) or none.<br> 104 * This method consider that format of the PIN code is [0-9]{6,}<br> 105 * Rule to consider a PIN code as weak: 106 * <ul> 107 * <li>Length is inferior to 6 positions.</li> 108 * <li>Contain only the same number or only a sequence of zero.</li> 109 * <li>Contain sequence of following incremental or decremental numbers.</li> 110 * </ul> 111 * 112 * @param pinCode PIN code to verify. 113 * @return True only if the PIN is considered as weak. 114 */ 115 public static boolean isWeakPINCode(String pinCode) { 116 boolean isWeak = true; 117 //Length is inferior to 6 positions 118 //Use "Long.parseLong(pinCode)" to cause a NumberFormatException if the PIN is not a numeric one 119 //and to ensure that the PIN is not only a sequence of zero 120 if (pinCode != null && Long.parseLong(pinCode) > 0 && pinCode.trim().length() > 5) { 121 //Contain only the same number 122 String regex = String.format("^[%s]{%s}$", pinCode.charAt(0), pinCode.length()); 123 if (!Pattern.matches(regex, pinCode)) { 124 //Contain sequence of following incremental or decremental numbers 125 char previousChar = 'X'; 126 boolean containSequence = false; 127 for (char c : pinCode.toCharArray()) { 128 if (previousChar != 'X') { 129 int previousNbr = Integer.parseInt(String.valueOf(previousChar)); 130 int currentNbr = Integer.parseInt(String.valueOf(c)); 131 if (currentNbr == (previousNbr - 1) || currentNbr == (previousNbr + 1)) { 132 containSequence = true; 133 break; 134 } 135 } 136 previousChar = c; 137 } 138 if (!containSequence) { 139 isWeak = false; 140 } 141 } 142 } 143 return isWeak; 144 } 145 146 /** 147 * Apply a collection of validations on a Word 97-2003 (binary format) document file provided: 148 * <ul> 149 * <li>Real Microsoft Word 97-2003 document file.</li> 150 * <li>No VBA Macro.<br></li> 151 * <li>No embedded objects.</li> 152 * </ul> 153 * 154 * @param wordFilePath Filename of the Word document file to check. 155 * @return True only if the file pass all validations. 156 * @see "https://poi.apache.org/components/" 157 * @see "https://poi.apache.org/components/document/" 158 * @see "https://poi.apache.org/components/poifs/how-to.html" 159 * @see "https://poi.apache.org/components/poifs/embeded.html" 160 * @see "https://poi.apache.org/" 161 * @see "https://mvnrepository.com/artifact/org.apache.poi/poi" 162 */ 163 public static boolean isWord972003DocumentSafe(String wordFilePath) { 164 boolean isSafe = false; 165 try { 166 File wordFile = new File(wordFilePath); 167 if (wordFile.exists() && wordFile.canRead() && wordFile.isFile()) { 168 //Step 1: Try to load the file, if its fail then it imply that is not a valid Word 97-2003 format file 169 try (POIFSFileSystem fs = new POIFSFileSystem(wordFile)) { 170 //Step 2: Check if the document contains VBA macros, in our case is not allowed 171 VBAMacroReader macroReader = new VBAMacroReader(fs); 172 Map<String, String> macros = macroReader.readMacros(); 173 if (macros == null || macros.isEmpty()) { 174 //Step 3: Check if the document contains any embedded objects, in our case is not allowed 175 //From POI documentation: 176 //Word normally stores embedded files in subdirectories of the ObjectPool directory, itself a subdirectory of the filesystem root. 177 //Typically, these subdirectories and named starting with an underscore, followed by 10 numbers. 178 final List<String> embeddedObjectFound = new ArrayList<>(); 179 DirectoryEntry root = fs.getRoot(); 180 if (root.getEntryCount() > 0) { 181 root.iterator().forEachRemaining(entry -> { 182 if ("ObjectPool".equalsIgnoreCase(entry.getName()) && entry instanceof DirectoryEntry) { 183 DirectoryEntry objPoolDirectory = (DirectoryEntry) entry; 184 if (objPoolDirectory.getEntryCount() > 0) { 185 objPoolDirectory.iterator().forEachRemaining(objPoolDirectoryEntry -> { 186 if (objPoolDirectoryEntry instanceof DirectoryEntry) { 187 DirectoryEntry objPoolDirectoryEntrySubDirectoryEntry = (DirectoryEntry) objPoolDirectoryEntry; 188 if (objPoolDirectoryEntrySubDirectoryEntry.getEntryCount() > 0) { 189 objPoolDirectoryEntrySubDirectoryEntry.forEach(objPoolDirectoryEntrySubDirectoryEntryEntry -> { 190 if (objPoolDirectoryEntrySubDirectoryEntryEntry.isDocumentEntry()) { 191 embeddedObjectFound.add(objPoolDirectoryEntrySubDirectoryEntryEntry.getName()); 192 } 193 }); 194 } 195 } 196 }); 197 } 198 } 199 }); 200 } 201 isSafe = embeddedObjectFound.isEmpty(); 202 } 203 } 204 } 205 } catch (Exception e) { 206 isSafe = false; 207 } 208 return isSafe; 209 } 210 211 /** 212 * Ensure that an XML file does not contain any External Entity, DTD or XInclude instructions. 213 * 214 * @param xmlFilePath Filename of the XML file to check. 215 * @return True only if the file pass all validations. 216 * @see "https://portswigger.net/web-security/xxe" 217 * @see "https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html#java" 218 * @see "https://docs.oracle.com/en/java/javase/13/security/java-api-xml-processing-jaxp-security-guide.html#GUID-82F8C206-F2DF-4204-9544-F96155B1D258" 219 * @see "https://www.w3.org/TR/xinclude-11/" 220 * @see "https://en.wikipedia.org/wiki/XInclude" 221 */ 222 public static boolean isXMLSafe(String xmlFilePath) { 223 boolean isSafe = false; 224 try { 225 File xmlFile = new File(xmlFilePath); 226 if (xmlFile.exists() && xmlFile.canRead() && xmlFile.isFile()) { 227 //Step 1a: Verify that the XML file content does not contain any XInclude instructions 228 boolean containXInclude = Files.readAllLines(xmlFile.toPath()).stream().anyMatch(line -> line.toLowerCase(Locale.ROOT).contains(":include ")); 229 if (!containXInclude) { 230 //Step 1b: Parse the XML file, if an exception occur than it's imply that the XML specified is not a valid ones 231 //Create an XML document builder throwing Exception if a DOCTYPE instruction is present 232 DocumentBuilderFactory dbfInstance = DocumentBuilderFactory.newInstance(); 233 dbfInstance.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); 234 //Xerces 2 only 235 //dbfInstance.setFeature("http://xerces.apache.org/xerces2-j/features.html#disallow-doctype-decl",true); 236 dbfInstance.setXIncludeAware(false); 237 DocumentBuilder builder = dbfInstance.newDocumentBuilder(); 238 //Parse the document 239 Document doc = builder.parse(xmlFile); 240 isSafe = (doc != null && doc.getDocumentElement() != null); 241 } 242 } 243 } catch (Exception e) { 244 isSafe = false; 245 } 246 return isSafe; 247 } 248 249 250 /** 251 * Extract all URL links from a PDF file provided.<br> 252 * This can be used to apply validation on a PDF against contained links. 253 * 254 * @param pdfFilePath pdfFilePath Filename of the PDF file to process. 255 * @return A List of URL objects that is empty if no links is found. 256 * @throws Exception If any error occurs during the processing of the PDF file. 257 * @see "https://www.gushiciku.cn/pl/21KQ" 258 * @see "https://pdfbox.apache.org/" 259 * @see "https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox" 260 */ 261 public static List<URL> extractAllPDFLinks(String pdfFilePath) throws Exception { 262 final List<URL> links = new ArrayList<>(); 263 File pdfFile = new File(pdfFilePath); 264 try (PDDocument document = Loader.loadPDF(pdfFile)) { 265 PDDocumentCatalog documentCatalog = document.getDocumentCatalog(); 266 AnnotationFilter actionURIAnnotationFilter = new AnnotationFilter() { 267 @Override 268 public boolean accept(PDAnnotation annotation) { 269 boolean keep = false; 270 if (annotation instanceof PDAnnotationLink) { 271 keep = (((PDAnnotationLink) annotation).getAction() instanceof PDActionURI); 272 } 273 return keep; 274 } 275 }; 276 documentCatalog.getPages().forEach(page -> { 277 try { 278 page.getAnnotations(actionURIAnnotationFilter).forEach(annotation -> { 279 PDActionURI linkAnnotation = (PDActionURI) ((PDAnnotationLink) annotation).getAction(); 280 try { 281 URL urlObj = new URL(linkAnnotation.getURI()); 282 if (!links.contains(urlObj)) { 283 links.add(urlObj); 284 } 285 } catch (MalformedURLException e) { 286 throw new RuntimeException(e); 287 } 288 }); 289 } catch (Exception e) { 290 throw new RuntimeException(e); 291 } 292 }); 293 } 294 return links; 295 } 296 297 /** 298 * Apply a collection of validations on a PDF file provided: 299 * <ul> 300 * <li>Real PDF file.</li> 301 * <li>No attachments.</li> 302 * <li>No Javascript code.</li> 303 * <li>No links using action of type URI/Launch/RemoteGoTo/ImportData.</li> 304 * <li>No XFA forms in order to prevent exposure to XXE/SSRF like CVE-2025-54988.</li> 305 * </ul> 306 * 307 * @param pdfFilePath Filename of the PDF file to check. 308 * @return True only if the file pass all validations. 309 * @see "https://stackoverflow.com/a/36161267" 310 * @see "https://www.gushiciku.cn/pl/21KQ" 311 * @see "https://github.com/jonaslejon/malicious-pdf" 312 * @see "https://pdfbox.apache.org/" 313 * @see "https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox" 314 * @see "https://nvd.nist.gov/vuln/detail/CVE-2025-54988" 315 * @see "https://github.com/mgthuramoemyint/POC-CVE-2025-54988" 316 * @see "https://en.wikipedia.org/wiki/XFA" 317 */ 318 public static boolean isPDFSafe(String pdfFilePath) { 319 boolean isSafe = false; 320 try { 321 File pdfFile = new File(pdfFilePath); 322 if (pdfFile.exists() && pdfFile.canRead() && pdfFile.isFile()) { 323 //Step 1: Try to load the file, if its fail then it imply that is not a valid PDF file 324 try (PDDocument document = Loader.loadPDF(pdfFile)) { 325 //Step 2: Check if the file contains attached files, in our case is not allowed 326 PDDocumentCatalog documentCatalog = document.getDocumentCatalog(); 327 PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary(documentCatalog); 328 if (namesDictionary.getEmbeddedFiles() == null) { 329 //Step 3: Check if the file contains any XFA forms 330 PDAcroForm acroForm = documentCatalog.getAcroForm(); 331 boolean hasForm = (acroForm != null && acroForm.getXFA() != null); 332 if (!hasForm) { 333 //Step 4: Check if the file contains Javascript code, in our case is not allowed 334 if (namesDictionary.getJavaScript() == null) { 335 //Step 5: Check if the file contains links using action of type URI/Launch/RemoteGoTo/ImportData, in our case is not allowed 336 final List<Integer> notAllowedAnnotationCounterList = new ArrayList<>(); 337 AnnotationFilter notAllowedAnnotationFilter = new AnnotationFilter() { 338 @Override 339 public boolean accept(PDAnnotation annotation) { 340 boolean keep = false; 341 if (annotation instanceof PDAnnotationLink) { 342 PDAnnotationLink link = (PDAnnotationLink) annotation; 343 PDAction action = link.getAction(); 344 if ((action instanceof PDActionURI) || (action instanceof PDActionLaunch) || (action instanceof PDActionRemoteGoTo) || (action instanceof PDActionImportData)) { 345 keep = true; 346 } 347 } 348 return keep; 349 } 350 }; 351 documentCatalog.getPages().forEach(page -> { 352 try { 353 notAllowedAnnotationCounterList.add(page.getAnnotations(notAllowedAnnotationFilter).size()); 354 } catch (IOException e) { 355 throw new RuntimeException(e); 356 } 357 }); 358 if (notAllowedAnnotationCounterList.stream().reduce(0, Integer::sum) == 0) { 359 isSafe = true; 360 } 361 } 362 } 363 } 364 } 365 } 366 } catch (Exception e) { 367 isSafe = false; 368 } 369 return isSafe; 370 } 371 372 /** 373 * Remove as much as possible metadata from the provided PDF document object. 374 * 375 * @param document PDFBox PDF document object on which metadata must be removed. 376 * @see "https://gist.github.com/righettod/d7e07443c43d393a39de741a0d920069" 377 * @see "https://pdfbox.apache.org/" 378 * @see "https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox" 379 */ 380 public static void clearPDFMetadata(PDDocument document) { 381 if (document != null) { 382 PDDocumentInformation infoEmpty = new PDDocumentInformation(); 383 document.setDocumentInformation(infoEmpty); 384 PDMetadata newMetadataEmpty = new PDMetadata(document); 385 document.getDocumentCatalog().setMetadata(newMetadataEmpty); 386 } 387 } 388 389 390 /** 391 * Validate that the URL provided is really a relative URL. 392 * 393 * @param targetUrl URL to validate. 394 * @return True only if the file pass all validations. 395 * @see "https://portswigger.net/web-security/ssrf" 396 * @see "https://stackoverflow.com/q/6785442" 397 */ 398 public static boolean isRelativeURL(String targetUrl) { 399 boolean isValid = false; 400 //Reject any URL encoded content and URL starting with a double slash 401 //Reject any URL contains credentials or fragment to prevent potential bypasses 402 String work = targetUrl; 403 if (!work.contains("%") && !work.contains("@") && !work.contains("#") && !work.startsWith("//")) { 404 //Creation of a URL object must fail 405 try { 406 new URL(work); 407 isValid = false; 408 } catch (MalformedURLException mf) { 409 //Last check to be sure (for prod usage compile the pattern one time) 410 isValid = Pattern.compile("^/[a-z0-9]+", Pattern.CASE_INSENSITIVE).matcher(work).find(); 411 } 412 } 413 return isValid; 414 } 415 416 /** 417 * Apply a collection of validations on a ZIP file provided: 418 * <ul> 419 * <li>Real ZIP file.</li> 420 * <li>Contain less than a specified level of deepness.</li> 421 * <li>Do not contain Zip-Slip entry path.</li> 422 * </ul> 423 * 424 * @param zipFilePath Filename of the ZIP file to check. 425 * @param maxLevelDeepness Threshold of deepness above which a ZIP archive will be rejected. 426 * @param rejectArchiveFile Flag to specify if presence of any archive entry will cause the rejection of the ZIP file. 427 * @return True only if the file pass all validations. 428 * @see "https://rules.sonarsource.com/java/type/Security%20Hotspot/RSPEC-5042" 429 * @see "https://security.snyk.io/research/zip-slip-vulnerability" 430 * @see "https://en.wikipedia.org/wiki/Zip_bomb" 431 * @see "https://github.com/ptoomey3/evilarc" 432 * @see "https://github.com/abdulfatir/ZipBomb" 433 * @see "https://www.baeldung.com/cs/zip-bomb" 434 * @see "https://thesecurityvault.com/attacks-with-zip-files-and-mitigations/" 435 * @see "https://wiki.sei.cmu.edu/confluence/display/java/IDS04-J.+Safely+extract+files+from+ZipInputStream" 436 */ 437 public static boolean isZIPSafe(String zipFilePath, int maxLevelDeepness, boolean rejectArchiveFile) { 438 List<String> archiveExtensions = Arrays.asList("zip", "tar", "7z", "gz", "jar", "phar", "bz2", "tgz"); 439 boolean isSafe = false; 440 try { 441 File zipFile = new File(zipFilePath); 442 if (zipFile.exists() && zipFile.canRead() && zipFile.isFile() && maxLevelDeepness > 0) { 443 //Step 1: Try to load the file, if its fail then it imply that is not a valid ZIP file 444 try (ZipFile zipArch = new ZipFile(zipFile)) { 445 //Step 2: Parse entries 446 long deepness = 0; 447 ZipEntry zipEntry; 448 String entryExtension; 449 String zipEntryName; 450 boolean validationsFailed = false; 451 Enumeration<? extends ZipEntry> entries = zipArch.entries(); 452 while (entries.hasMoreElements()) { 453 zipEntry = entries.nextElement(); 454 zipEntryName = zipEntry.getName(); 455 entryExtension = zipEntryName.substring(zipEntryName.lastIndexOf(".") + 1).toLowerCase(Locale.ROOT).trim(); 456 //Step 2a: Check if the current entry is an archive file 457 if (rejectArchiveFile && archiveExtensions.contains(entryExtension)) { 458 validationsFailed = true; 459 break; 460 } 461 //Step 2b: Check that level of deepness is inferior to the threshold specified 462 if (zipEntryName.contains("/")) { 463 //Determine deepness by inspecting the entry name. 464 //Indeed, folder will be represented like this: folder/folder/folder/ 465 //So we can count the number of "/" to identify the deepness of the entry 466 deepness = zipEntryName.chars().filter(ch -> ch == '/').count(); 467 if (deepness > maxLevelDeepness) { 468 validationsFailed = true; 469 break; 470 } 471 } 472 //Step 2c: Check if any entries match pattern of zip slip payload 473 if (zipEntryName.contains("..\\") || zipEntryName.contains("../")) { 474 validationsFailed = true; 475 break; 476 } 477 } 478 if (!validationsFailed) { 479 isSafe = true; 480 } 481 } 482 } 483 } catch (Exception e) { 484 isSafe = false; 485 } 486 return isSafe; 487 } 488 489 /** 490 * Identify the mime type of the content specified (array of bytes).<br> 491 * Note that it cannot be fully trusted (see the tweet '1595824709186519041' referenced), so, additional validations are required. 492 * 493 * @param content The content as an array of bytes. 494 * @return The mime type in lower case or null if it cannot be identified. 495 * @see "https://twitter.com/righettod/status/1595824709186519041" 496 * @see "https://tika.apache.org/" 497 * @see "https://mvnrepository.com/artifact/org.apache.tika/tika-core" 498 * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types" 499 * @see "https://www.iana.org/assignments/media-types/media-types.xhtml" 500 */ 501 public static String identifyMimeType(byte[] content) { 502 String mimeType = null; 503 if (content != null && content.length > 0) { 504 Detector detector = new DefaultDetector(MimeTypes.getDefaultMimeTypes()); 505 Metadata metadata = new Metadata(); 506 try { 507 try (TemporaryResources temporaryResources = new TemporaryResources(); TikaInputStream tikaInputStream = TikaInputStream.get(new ByteArrayInputStream(content), temporaryResources, metadata)) { 508 MediaType mt = detector.detect(tikaInputStream, metadata); 509 if (mt != null) { 510 mimeType = mt.toString().toLowerCase(Locale.ROOT); 511 } 512 } 513 } catch (IOException ioe) { 514 mimeType = null; 515 } 516 } 517 return mimeType; 518 } 519 520 /** 521 * Apply a collection of validations on a string expected to be an public IP address: 522 * <ul> 523 * <li>Is a valid IP v4 or v6 address.</li> 524 * <li>Is public from an Internet perspective.</li> 525 * </ul> 526 * <br> 527 * <b>Note:</b> I often see missing such validation in the value read from HTTP request headers like "X-Forwarded-For" or "Forwarded". 528 * <br><br> 529 * <b>Note for IPv6:</b> I used documentation found so it is really experimental! 530 * 531 * @param ip String expected to be a valid IP address. 532 * @return True only if the string pass all validations. 533 * @see "https://commons.apache.org/proper/commons-validator/" 534 * @see "https://commons.apache.org/proper/commons-validator/apidocs/org/apache/commons/validator/routines/InetAddressValidator.html" 535 * @see "https://cheatsheetseries.owasp.org/cheatsheets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet.html" 536 * @see "https://cheatsheetseries.owasp.org/assets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet_Orange_Tsai_Talk.pdf" 537 * @see "https://cheatsheetseries.owasp.org/assets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet_SSRF_Bible.pdf" 538 * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For" 539 * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Forwarded" 540 * @see "https://ipcisco.com/lesson/ipv6-address/" 541 * @see "https://www.juniper.net/documentation/us/en/software/junos/interfaces-security-devices/topics/topic-map/security-interface-ipv4-ipv6-protocol.html" 542 * @see "https://docs.oracle.com/en/java/javase/21/docs/api/java.base/java/net/InetAddress.html#getByName(java.lang.String)" 543 * @see "https://www.arin.net/reference/research/statistics/address_filters/" 544 * @see "https://en.wikipedia.org/wiki/Multicast_address" 545 * @see "https://stackoverflow.com/a/5619409" 546 * @see "https://www.ripe.net/media/documents/ipv6-address-types.pdf" 547 * @see "https://www.iana.org/assignments/ipv6-unicast-address-assignments/ipv6-unicast-address-assignments.xhtml" 548 * @see "https://developer.android.com/reference/java/net/Inet6Address" 549 * @see "https://en.wikipedia.org/wiki/Unique_local_address" 550 */ 551 public static boolean isPublicIPAddress(String ip) { 552 boolean isValid = false; 553 try { 554 //Quick validation on the string itself based on characters used to compose an IP v4/v6 address 555 if (Pattern.matches("[0-9a-fA-F:.]+", ip)) { 556 //If OK then use the dedicated InetAddressValidator from Apache Commons Validator 557 if (InetAddressValidator.getInstance().isValid(ip)) { 558 //If OK then validate that is an public IP address 559 //From Javadoc for "InetAddress.getByName": If a literal IP address is supplied, only the validity of the address format is checked. 560 InetAddress addr = InetAddress.getByName(ip); 561 isValid = (!addr.isAnyLocalAddress() && !addr.isLinkLocalAddress() && !addr.isLoopbackAddress() && !addr.isMulticastAddress() && !addr.isSiteLocalAddress()); 562 //If OK and the IP is an V6 one then make additional validation because the built-in Java API will let pass some V6 IP 563 //For the prefix map, the start of the key indicates if the value is a regex or a string 564 if (isValid && (addr instanceof Inet6Address)) { 565 Map<String, String> prefixes = new HashMap<>(); 566 prefixes.put("REGEX_LOOPBACK", "^(0|:)+1$"); 567 prefixes.put("REGEX_UNIQUE-LOCAL-ADDRESSES", "^f(c|d)[a-f0-9]{2}:.*$"); 568 prefixes.put("STRING_LINK-LOCAL-ADDRESSES", "fe80:"); 569 prefixes.put("REGEX_TEREDO", "^2001:[0]*:.*$"); 570 prefixes.put("REGEX_BENCHMARKING", "^2001:[0]*2:.*$"); 571 prefixes.put("REGEX_ORCHID", "^2001:[0]*10:.*$"); 572 prefixes.put("STRING_DOCUMENTATION", "2001:db8:"); 573 prefixes.put("STRING_GLOBAL-UNICAST", "2000:"); 574 prefixes.put("REGEX_MULTICAST", "^ff[0-9]{2}:.*$"); 575 final List<Boolean> results = new ArrayList<>(); 576 final String ipLower = ip.trim().toLowerCase(Locale.ROOT); 577 prefixes.forEach((addressType, expr) -> { 578 String exprLower = expr.trim().toLowerCase(); 579 if (addressType.startsWith("STRING_")) { 580 results.add(ipLower.startsWith(exprLower)); 581 } else { 582 results.add(Pattern.matches(exprLower, ipLower)); 583 } 584 }); 585 isValid = ((results.size() == prefixes.size()) && !results.contains(Boolean.TRUE)); 586 } 587 } 588 } 589 } catch (Exception e) { 590 isValid = false; 591 } 592 return isValid; 593 } 594 595 /** 596 * Compute a SHA256 hash from an input composed of a collection of strings.<br><br> 597 * This method take care to build the source string in a way to prevent this source string to be prone to abuse targeting the different parts composing it.<br><br> 598 * <p> 599 * Example of possible abuse without precautions applied during the hash calculation logic:<br> 600 * Hash of <code>SHA256("Hello", "My", "World!!!")</code> will be equals to the hash of <code>SHA256("Hell", "oMyW", "orld!!!")</code>.<br> 601 * </p> 602 * This method ensure that both hash above will be different.<br><br> 603 * 604 * <b>Note:</b> The character <code>|</code> is used, as separator, of every parts so a part is not allowed to contains this character. 605 * 606 * @param parts Ordered list of strings to use to build the input string for which the hash must be computed on. No null value is accepted on object composing the collection. 607 * @return The hash, as an array of bytes, to allow caller to convert it to the final representation wanted (HEX, Base64, etc.). If the collection passed is null or empty then the method return null. 608 * @throws Exception If any exception occurs 609 * @see "https://github.com/righettod/code-snippets-security-utils/issues/16" 610 * @see "https://pentesterlab.com/badges/codereview" 611 * @see "https://blog.trailofbits.com/2024/08/21/yolo-is-not-a-valid-hash-construction/" 612 * @see "https://www.nist.gov/publications/sha-3-derived-functions-cshake-kmac-tuplehash-and-parallelhash" 613 */ 614 public static byte[] computeHashNoProneToAbuseOnParts(List<String> parts) throws Exception { 615 byte[] hash = null; 616 String separator = "|"; 617 if (parts != null && !parts.isEmpty()) { 618 //Ensure that not part is null 619 if (parts.stream().anyMatch(Objects::isNull)) { 620 throw new IllegalArgumentException("No part must be null!"); 621 } 622 //Ensure that the separator is absent from every part 623 if (parts.stream().anyMatch(part -> part.contains(separator))) { 624 throw new IllegalArgumentException(String.format("The character '%s', used as parts separator, must be absent from every parts!", separator)); 625 } 626 MessageDigest digest = MessageDigest.getInstance("SHA-256"); 627 final StringBuilder buffer = new StringBuilder(separator); 628 parts.forEach(p -> { 629 buffer.append(p).append(separator); 630 }); 631 hash = digest.digest(buffer.toString().getBytes(StandardCharsets.UTF_8)); 632 } 633 return hash; 634 } 635 636 /** 637 * Ensure that an XML file only uses DTD/XSD references (called System Identifier) present in the allowed list provided.<br><br> 638 * The code is based on the validation implemented into the OpenJDK 21, by the class <b><a href="https://github.com/openjdk/jdk/blob/jdk-21%2B35/src/java.prefs/share/classes/java/util/prefs/XmlSupport.java">java.util.prefs.XmlSupport</a></b>, in the method <b><a href="https://github.com/openjdk/jdk/blob/jdk-21%2B35/src/java.prefs/share/classes/java/util/prefs/XmlSupport.java#L240">loadPrefsDoc()</a></b>.<br><br> 639 * The method also ensure that no Public Identifier is used to prevent potential bypasses of the validations. 640 * 641 * @param xmlFilePath Filename of the XML file to check. 642 * @param allowedSystemIdentifiers List of URL allowed for System Identifier specified for any XSD/DTD references. 643 * @return True only if the file pass all validations. 644 * @see "https://www.w3schools.com/xml/prop_documenttype_systemid.asp" 645 * @see "https://www.ibm.com/docs/en/integration-bus/9.0.0?topic=doctypedecl-xml-systemid" 646 * @see "https://www.liquid-technologies.com/Reference/Glossary/XML_DocType.html" 647 * @see "https://www.xml.com/pub/98/08/xmlqna0.html" 648 * @see "https://github.com/openjdk/jdk/blob/jdk-21%2B35/src/java.prefs/share/classes/java/util/prefs/XmlSupport.java#L397" 649 * @see "https://en.wikipedia.org/wiki/Formal_Public_Identifier" 650 */ 651 public static boolean isXMLOnlyUseAllowedXSDorDTD(String xmlFilePath, final List<String> allowedSystemIdentifiers) { 652 boolean isSafe = false; 653 final String errorTemplate = "Non allowed %s ID detected!"; 654 final String emptyFakeDTD = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><!ELEMENT dummy EMPTY>"; 655 final String emptyFakeXSD = "<xs:schema xmlns:xs=\"http://www.w3.org/2001/XMLSchema\"> <xs:element name=\"dummy\"/></xs:schema>"; 656 657 if (allowedSystemIdentifiers == null || allowedSystemIdentifiers.isEmpty()) { 658 throw new IllegalArgumentException("At least one SID must be specified!"); 659 } 660 File xmlFile = new File(xmlFilePath); 661 if (xmlFile.exists() && xmlFile.canRead() && xmlFile.isFile()) { 662 try { 663 EntityResolver resolverValidator = (publicId, systemId) -> { 664 if (publicId != null) { 665 throw new SAXException(String.format(errorTemplate, "PUBLIC")); 666 } 667 if (!allowedSystemIdentifiers.contains(systemId)) { 668 throw new SAXException(String.format(errorTemplate, "SYSTEM")); 669 } 670 //If it is OK then return a empty DTD/XSD 671 return new InputSource(new StringReader(systemId.toLowerCase().endsWith(".dtd") ? emptyFakeDTD : emptyFakeXSD)); 672 }; 673 DocumentBuilderFactory dbfInstance = DocumentBuilderFactory.newInstance(); 674 dbfInstance.setIgnoringElementContentWhitespace(true); 675 dbfInstance.setXIncludeAware(false); 676 dbfInstance.setValidating(false); 677 dbfInstance.setCoalescing(true); 678 dbfInstance.setIgnoringComments(false); 679 DocumentBuilder builder = dbfInstance.newDocumentBuilder(); 680 builder.setEntityResolver(resolverValidator); 681 Document doc = builder.parse(xmlFile); 682 isSafe = (doc != null); 683 } catch (SAXException | IOException | ParserConfigurationException e) { 684 isSafe = false; 685 } 686 } 687 688 return isSafe; 689 } 690 691 /** 692 * Apply a collection of validations on a EXCEL CSV file provided (file was expected to be opened in Microsoft EXCEL): 693 * <ul> 694 * <li>Real CSV file.</li> 695 * <li>Do not contains any payload related to a CSV injections.</li> 696 * </ul> 697 * Ensure that, if Apache Commons CSV does not find any record then, the file will be considered as NOT safe (prevent potential bypasses).<br><br> 698 * <b>Note:</b> Record delimiter used is the <code>,</code> (comma) character. See the Apache Commons CSV reference provided for EXCEL.<br> 699 * 700 * @param csvFilePath Filename of the CSV file to check. 701 * @return True only if the file pass all validations. 702 * @see "https://commons.apache.org/proper/commons-csv/" 703 * @see "https://commons.apache.org/proper/commons-csv/apidocs/org/apache/commons/csv/CSVFormat.html#EXCEL" 704 * @see "https://www.we45.com/post/your-excel-sheets-are-not-safe-heres-how-to-beat-csv-injection" 705 * @see "https://www.whiteoaksecurity.com/blog/2020-4-23-csv-injection-whats-the-risk/" 706 * @see "https://book.hacktricks.xyz/pentesting-web/formula-csv-doc-latex-ghostscript-injection" 707 * @see "https://owasp.org/www-community/attacks/CSV_Injection" 708 * @see "https://payatu.com/blog/csv-injection-basic-to-exploit/" 709 * @see "https://cwe.mitre.org/data/definitions/1236.html" 710 */ 711 public static boolean isExcelCSVSafe(String csvFilePath) { 712 boolean isSafe; 713 final AtomicInteger recordCount = new AtomicInteger(); 714 final List<Character> payloadDetectionCharacters = List.of('=', '+', '@', '-', '\r', '\t'); 715 716 try { 717 final List<String> payloadsIdentified = new ArrayList<>(); 718 try (Reader in = new FileReader(csvFilePath)) { 719 Iterable<CSVRecord> records = CSVFormat.EXCEL.parse(in); 720 records.forEach(record -> { 721 record.forEach(recordValue -> { 722 if (recordValue != null && !recordValue.trim().isEmpty() && payloadDetectionCharacters.contains(recordValue.trim().charAt(0))) { 723 payloadsIdentified.add(recordValue); 724 } 725 recordCount.getAndIncrement(); 726 }); 727 }); 728 } 729 isSafe = (payloadsIdentified.isEmpty() && recordCount.get() > 0); 730 } catch (Exception e) { 731 isSafe = false; 732 } 733 734 return isSafe; 735 } 736 737 /** 738 * Provide a way to add an integrity marker (<a href="https://en.wikipedia.org/wiki/HMAC">HMAC</a>) to a serialized object serialized using the <a href="https://www.baeldung.com/java-serialization">java native system</a> (binary).<br> 739 * The goal is to provide <b>a temporary workaround</b> to try to prevent deserialization attacks and give time to move to a text-based serialization approach. 740 * 741 * @param processingModeType Define the mode of processing i.e. protect or validate. ({@link ProcessingModeType}) 742 * @param input When the processing mode is "protect" than the expected input (string) is a java serialized object encoded in Base64 otherwise (processing mode is "validate") expected input is the output of this method when the "protect" mode was used. 743 * @param secret Secret to use to compute the SHA256 HMAC. 744 * @return A map with the following keys: <ul><li><b>PROCESSING_MODE</b>: Processing mode used to compute the result.</li><li><b>STATUS</b>: A boolean indicating if the processing was successful or not.</li><li><b>RESULT</b>: Always contains a string representing the protected serialized object in the format <code>[SERIALIZED_OBJECT_BASE64_ENCODED]:[SERIALIZED_OBJECT_HMAC_BASE64_ENCODED]</code>.</li></ul> 745 * @throws Exception If any exception occurs. 746 * @see "https://cheatsheetseries.owasp.org/cheatsheets/Deserialization_Cheat_Sheet.html" 747 * @see "https://owasp.org/www-project-top-ten/2017/A8_2017-Insecure_Deserialization" 748 * @see "https://portswigger.net/web-security/deserialization" 749 * @see "https://www.baeldung.com/java-serialization-approaches" 750 * @see "https://www.baeldung.com/java-serialization" 751 * @see "https://cryptobook.nakov.com/mac-and-key-derivation/hmac-and-key-derivation" 752 * @see "https://en.wikipedia.org/wiki/HMAC" 753 * @see "https://smattme.com/posts/how-to-generate-hmac-signature-in-java/" 754 */ 755 public static Map<String, Object> ensureSerializedObjectIntegrity(ProcessingModeType processingModeType, String input, byte[] secret) throws Exception { 756 Map<String, Object> results; 757 String resultFormatTemplate = "%s:%s"; 758 //Verify input provided to be consistent 759 if (processingModeType == null) { 760 throw new IllegalArgumentException("The processing mode is mandatory!"); 761 } 762 if (input == null || input.trim().isEmpty()) { 763 throw new IllegalArgumentException("Input data is mandatory!"); 764 } 765 if (secret == null || secret.length == 0) { 766 throw new IllegalArgumentException("The HMAC secret is mandatory!"); 767 } 768 if (processingModeType.equals(ProcessingModeType.VALIDATE) && input.split(":").length != 2) { 769 throw new IllegalArgumentException("Input data provided is invalid for the processing mode specified!"); 770 } 771 //Processing 772 Base64.Decoder b64Decoder = Base64.getDecoder(); 773 Base64.Encoder b64Encoder = Base64.getEncoder(); 774 String hmacAlgorithm = "HmacSHA256"; 775 Mac mac = Mac.getInstance(hmacAlgorithm); 776 SecretKeySpec key = new SecretKeySpec(secret, hmacAlgorithm); 777 mac.init(key); 778 results = new HashMap<>(); 779 results.put("PROCESSING_MODE", processingModeType.toString()); 780 switch (processingModeType) { 781 case PROTECT -> { 782 byte[] objectBytes = b64Decoder.decode(input); 783 byte[] hmac = mac.doFinal(objectBytes); 784 String encodedHmac = b64Encoder.encodeToString(hmac); 785 results.put("STATUS", Boolean.TRUE); 786 results.put("RESULT", String.format(resultFormatTemplate, input, encodedHmac)); 787 } 788 case VALIDATE -> { 789 String[] parts = input.split(":"); 790 byte[] objectBytes = b64Decoder.decode(parts[0].trim()); 791 byte[] hmacProvided = b64Decoder.decode(parts[1].trim()); 792 byte[] hmacComputed = mac.doFinal(objectBytes); 793 String encodedHmacComputed = b64Encoder.encodeToString(hmacComputed); 794 Boolean hmacIsValid = Arrays.equals(hmacProvided, hmacComputed); 795 results.put("STATUS", hmacIsValid); 796 results.put("RESULT", String.format(resultFormatTemplate, parts[0].trim(), encodedHmacComputed)); 797 } 798 default -> throw new IllegalArgumentException("Not supported processing mode!"); 799 } 800 return results; 801 } 802 803 /** 804 * Apply a collection of validations on a JSON string provided: 805 * <ul> 806 * <li>Real JSON structure.</li> 807 * <li>Contain less than a specified number of deepness for nested objects or arrays.</li> 808 * <li>Contain less than a specified number of items in any arrays.</li> 809 * </ul> 810 * <br> 811 * <b>Note:</b> I decided to use a parsing approach using only string processing to prevent any StackOverFlow or OutOfMemory error that can be abused.<br><br> 812 * I used the following assumption: 813 * <ul> 814 * <li>The character <code>{</code> identify the beginning of an object.</li> 815 * <li>The character <code>}</code> identify the end of an object.</li> 816 * <li>The character <code>[</code> identify the beginning of an array.</li> 817 * <li>The character <code>]</code> identify the end of an array.</li> 818 * <li>The character <code>"</code> identify the delimiter of a string.</li> 819 * <li>The character sequence <code>\"</code> identify the escaping of an double quote.</li> 820 * </ul> 821 * 822 * @param json String containing the JSON data to validate. 823 * @param maxItemsByArraysCount Maximum number of items allowed in an array. 824 * @param maxDeepnessAllowed Maximum number nested objects or arrays allowed. 825 * @return True only if the string pass all validations. 826 * @see "https://javaee.github.io/jsonp/" 827 * @see "https://community.f5.com/discussions/technicalforum/disable-buffer-overflow-in-json-parameters/124306" 828 * @see "https://github.com/InductiveComputerScience/pbJson/issues/2" 829 */ 830 public static boolean isJSONSafe(String json, int maxItemsByArraysCount, int maxDeepnessAllowed) { 831 boolean isSafe = false; 832 833 try { 834 //Step 1: Analyse the JSON string 835 int currentDeepness = 0; 836 int currentArrayItemsCount = 0; 837 int maxDeepnessReached = 0; 838 int maxArrayItemsCountReached = 0; 839 boolean currentlyInArray = false; 840 boolean currentlyInString = false; 841 int currentNestedArrayLevel = 0; 842 String jsonEscapedDoubleQuote = "\\\"";//Escaped double quote must not be considered as a string delimiter 843 String work = json.replace(jsonEscapedDoubleQuote, "'"); 844 for (char c : work.toCharArray()) { 845 switch (c) { 846 case '{': { 847 if (!currentlyInString) { 848 currentDeepness++; 849 } 850 break; 851 } 852 case '}': { 853 if (!currentlyInString) { 854 currentDeepness--; 855 } 856 break; 857 } 858 case '[': { 859 if (!currentlyInString) { 860 currentDeepness++; 861 if (currentlyInArray) { 862 currentNestedArrayLevel++; 863 } 864 currentlyInArray = true; 865 } 866 break; 867 } 868 case ']': { 869 if (!currentlyInString) { 870 currentDeepness--; 871 currentArrayItemsCount = 0; 872 if (currentNestedArrayLevel > 0) { 873 currentNestedArrayLevel--; 874 } 875 if (currentNestedArrayLevel == 0) { 876 currentlyInArray = false; 877 } 878 } 879 break; 880 } 881 case '"': { 882 currentlyInString = !currentlyInString; 883 break; 884 } 885 case ',': { 886 if (!currentlyInString && currentlyInArray) { 887 currentArrayItemsCount++; 888 } 889 break; 890 } 891 } 892 if (currentDeepness > maxDeepnessReached) { 893 maxDeepnessReached = currentDeepness; 894 } 895 if (currentArrayItemsCount > maxArrayItemsCountReached) { 896 maxArrayItemsCountReached = currentArrayItemsCount; 897 } 898 } 899 //Step 2: Apply validation against the value specified as limits 900 isSafe = ((maxItemsByArraysCount > maxArrayItemsCountReached) && (maxDeepnessAllowed > maxDeepnessReached)); 901 902 //Step 3: If the content is safe then ensure that it is valid JSON structure using the "Java API for JSON Processing" (JSR 374) parser reference implementation. 903 if (isSafe) { 904 JsonReader reader = Json.createReader(new StringReader(json)); 905 isSafe = (reader.read() != null); 906 } 907 908 } catch (Exception e) { 909 isSafe = false; 910 } 911 return isSafe; 912 } 913 914 /** 915 * Apply a collection of validations on a image file provided: 916 * <ul> 917 * <li>Real image file.</li> 918 * <li>Its mime type is into the list of allowed mime types.</li> 919 * <li>Its metadata fields do not contains any characters related to a malicious payloads.</li> 920 * </ul> 921 * <br> 922 * <b>Important note:</b> This implementation is prone to bypass using the "<b>raw insertion</b>" method documented in the <a href="https://www.synacktiv.com/en/publications/persistent-php-payloads-in-pngs-how-to-inject-php-code-in-an-image-and-keep-it-there">blog post</a> from the Synacktiv team. 923 * To handle such case, it is recommended to resize the image to remove any non image-related content, see <a href="https://github.com/righettod/document-upload-protection/blob/master/src/main/java/eu/righettod/poc/sanitizer/ImageDocumentSanitizerImpl.java#L54">here</a> for an example.<br> 924 * 925 * @param imageFilePath Filename of the image file to check. 926 * @param imageAllowedMimeTypes List of image mime types allowed. 927 * @return True only if the file pass all validations. 928 * @see "https://commons.apache.org/proper/commons-imaging/" 929 * @see "https://commons.apache.org/proper/commons-imaging/formatsupport.html" 930 * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types/Common_types" 931 * @see "https://www.iana.org/assignments/media-types/media-types.xhtml#image" 932 * @see "https://www.synacktiv.com/en/publications/persistent-php-payloads-in-pngs-how-to-inject-php-code-in-an-image-and-keep-it-there" 933 * @see "https://cheatsheetseries.owasp.org/cheatsheets/File_Upload_Cheat_Sheet.html" 934 * @see "https://github.com/righettod/document-upload-protection/blob/master/src/main/java/eu/righettod/poc/sanitizer/ImageDocumentSanitizerImpl.java" 935 * @see "https://exiftool.org/examples.html" 936 * @see "https://en.wikipedia.org/wiki/List_of_file_signatures" 937 * @see "https://hexed.it/" 938 * @see "https://github.com/sighook/pixload" 939 */ 940 public static boolean isImageSafe(String imageFilePath, List<String> imageAllowedMimeTypes) { 941 boolean isSafe = false; 942 Pattern payloadDetectionRegex = Pattern.compile("[<>${}`]+", Pattern.CASE_INSENSITIVE); 943 try { 944 File imgFile = new File(imageFilePath); 945 if (imgFile.exists() && imgFile.canRead() && imgFile.isFile() && !imageAllowedMimeTypes.isEmpty()) { 946 final byte[] imgBytes = Files.readAllBytes(imgFile.toPath()); 947 //Step 1: Check the mime type of the file against the allowed ones 948 ImageInfo imgInfo = Imaging.getImageInfo(imgBytes); 949 if (imageAllowedMimeTypes.contains(imgInfo.getMimeType())) { 950 //Step 2: Load the image into an object using the Image API 951 BufferedImage imgObject = Imaging.getBufferedImage(imgBytes); 952 if (imgObject != null && imgObject.getWidth() > 0 && imgObject.getHeight() > 0) { 953 //Step 3: Check the metadata if the image format support it - Highly experimental 954 List<String> metadataWithPayloads = new ArrayList<>(); 955 final ImageMetadata imgMetadata = Imaging.getMetadata(imgBytes); 956 if (imgMetadata != null) { 957 imgMetadata.getItems().forEach(item -> { 958 String metadata = item.toString(); 959 if (payloadDetectionRegex.matcher(metadata).find()) { 960 metadataWithPayloads.add(metadata); 961 } 962 }); 963 } 964 isSafe = metadataWithPayloads.isEmpty(); 965 } 966 } 967 } 968 } catch (Exception e) { 969 isSafe = false; 970 } 971 return isSafe; 972 } 973 974 /** 975 * Rewrite the input file to remove any embedded files that is not embedded using a methods supported by the official format of the file.<br> 976 * Example: a file can be embedded by adding it to the end of the source file, see the reference provided for details. 977 * 978 * @param inputFilePath Filename of the file to clean up. 979 * @param inputFileType Type of the file provided. 980 * @return A array of bytes with the cleaned file. 981 * @throws IllegalArgumentException If an invalid parameter is passed 982 * @throws Exception If any technical error during the cleaning processing 983 * @see "https://www.synacktiv.com/en/publications/persistent-php-payloads-in-pngs-how-to-inject-php-code-in-an-image-and-keep-it-there" 984 * @see "https://github.com/righettod/toolbox-pentest-web/tree/master/misc" 985 * @see "https://github.com/righettod/toolbox-pentest-web?tab=readme-ov-file#misc" 986 * @see "https://stackoverflow.com/a/13605411" 987 */ 988 public static byte[] sanitizeFile(String inputFilePath, InputFileType inputFileType) throws Exception { 989 ByteArrayOutputStream sanitizedContent = new ByteArrayOutputStream(); 990 File inputFile = new File(inputFilePath); 991 if (!inputFile.exists() || !inputFile.canRead() || !inputFile.isFile()) { 992 throw new IllegalArgumentException("Cannot read the content of the input file!"); 993 } 994 switch (inputFileType) { 995 case PDF -> { 996 try (PDDocument document = Loader.loadPDF(inputFile)) { 997 document.save(sanitizedContent); 998 } 999 } 1000 case IMAGE -> { 1001 // Load the original image 1002 BufferedImage originalImage = ImageIO.read(inputFile); 1003 String originalFormat = identifyMimeType(Files.readAllBytes(inputFile.toPath())).split("/")[1].trim(); 1004 // Check that image has been successfully loaded 1005 if (originalImage == null) { 1006 throw new IOException("Cannot load the original image !"); 1007 } 1008 // Get current Width and Height of the image 1009 int originalWidth = originalImage.getWidth(null); 1010 int originalHeight = originalImage.getHeight(null); 1011 // Resize the image by removing 1px on Width and Height 1012 Image resizedImage = originalImage.getScaledInstance(originalWidth - 1, originalHeight - 1, Image.SCALE_SMOOTH); 1013 // Resize the resized image by adding 1px on Width and Height - In fact set image to is initial size 1014 Image initialSizedImage = resizedImage.getScaledInstance(originalWidth, originalHeight, Image.SCALE_SMOOTH); 1015 // Save image to a bytes buffer 1016 int bufferedImageType = BufferedImage.TYPE_INT_ARGB;//By default use a format supporting transparency 1017 if ("jpeg".equalsIgnoreCase(originalFormat) || "bmp".equalsIgnoreCase(originalFormat)) { 1018 bufferedImageType = BufferedImage.TYPE_INT_RGB; 1019 } 1020 BufferedImage sanitizedImage = new BufferedImage(initialSizedImage.getWidth(null), initialSizedImage.getHeight(null), bufferedImageType); 1021 Graphics2D drawer = sanitizedImage.createGraphics(); 1022 drawer.drawImage(initialSizedImage, 0, 0, null); 1023 drawer.dispose(); 1024 ImageIO.write(sanitizedImage, originalFormat, sanitizedContent); 1025 } 1026 default -> throw new IllegalArgumentException("Type of file not supported !"); 1027 } 1028 if (sanitizedContent.size() == 0) { 1029 throw new IOException("An error occur during the rewrite operation!"); 1030 } 1031 return sanitizedContent.toByteArray(); 1032 } 1033 1034 /** 1035 * Apply a collection of validations on a string expected to be an email address: 1036 * <ul> 1037 * <li>Is a valid email address, from a parser perspective, following RFCs on email addresses.</li> 1038 * <li>Is not using "Encoded-word" format.</li> 1039 * <li>Is not using comment format.</li> 1040 * <li>Is not using "Punycode" format.</li> 1041 * <li>Is not using UUCP style addresses.</li> 1042 * <li>Is not using address literals.</li> 1043 * <li>Is not using source routes.</li> 1044 * <li>Is not using the "percent hack".</li> 1045 * </ul><br> 1046 * This is based on the research work from <a href="https://portswigger.net/research/gareth-heyes">Gareth Heyes</a> added in references (Portswigger).<br><br> 1047 * 1048 * <b>Note:</b> The notion of valid, here, is to take from a secure usage of the data perspective. 1049 * 1050 * @param addr String expected to be a valid email address. 1051 * @return True only if the string pass all validations. 1052 * @see "https://commons.apache.org/proper/commons-validator/" 1053 * @see "https://commons.apache.org/proper/commons-validator/apidocs/org/apache/commons/validator/routines/EmailValidator.html" 1054 * @see "https://datatracker.ietf.org/doc/html/rfc2047#section-2" 1055 * @see "https://portswigger.net/research/splitting-the-email-atom" 1056 * @see "https://www.jochentopf.com/email/address.html" 1057 * @see "https://en.wikipedia.org/wiki/Email_address" 1058 */ 1059 public static boolean isEmailAddress(String addr) { 1060 boolean isValid = false; 1061 String work = addr.toLowerCase(Locale.ROOT); 1062 Pattern encodedWordRegex = Pattern.compile("[=?]+", Pattern.CASE_INSENSITIVE); 1063 Pattern forbiddenCharacterRegex = Pattern.compile("[():!%\\[\\],;]+", Pattern.CASE_INSENSITIVE); 1064 try { 1065 //Start with the use of the dedicated EmailValidator from Apache Commons Validator 1066 if (EmailValidator.getInstance(true, true).isValid(work)) { 1067 //If OK then validate it does not contains "Encoded-word" patterns using an aggressive approach 1068 if (!encodedWordRegex.matcher(work).find()) { 1069 //If OK then validate it does not contains punycode 1070 if (!work.contains("xn--")) { 1071 //If OK then validate it does not use: 1072 // UUCP style addresses, 1073 // Comment format, 1074 // Address literals, 1075 // Source routes, 1076 // The percent hack. 1077 if (!forbiddenCharacterRegex.matcher(work).find()) { 1078 isValid = true; 1079 } 1080 } 1081 } 1082 } 1083 } catch (Exception e) { 1084 isValid = false; 1085 } 1086 return isValid; 1087 } 1088 1089 /** 1090 * The <a href="https://www.stet.eu/en/psd2/">PSD2 STET</a> specification require to use <a href="https://datatracker.ietf.org/doc/draft-cavage-http-signatures/">HTTP Signature</a>. 1091 * <br> 1092 * Section <b>3.5.1.2</b> of the document <a href="https://www.stet.eu/assets/files/PSD2/1-6-3/api-dsp2-stet-v1.6.3.1-part-1-framework.pdf">Documentation Framework</a> version <b>1.6.3</b>. 1093 * <br> 1094 * The problem is that, by design, the HTTP Signature specification is prone to blind SSRF. 1095 * <br> 1096 * URL example taken from the STET specification: <code>https://path.to/myQsealCertificate_714f8154ec259ac40b8a9786c9908488b2582b68b17e865fede4636d726b709f</code>. 1097 * <br> 1098 * The objective of this code is to try to decrease the "exploitability/interest" of this SSRF for an attacker. 1099 * 1100 * @param certificateUrl Url pointing to a Qualified Certificate (QSealC) encoded in PEM format and respecting the ETSI/TS119495 technical Specification . 1101 * @return TRUE only if the url point to a Qualified Certificate in PEM format. 1102 * @see "https://www.stet.eu/en/psd2/" 1103 * @see "https://www.stet.eu/assets/files/PSD2/1-6-3/api-dsp2-stet-v1.6.3.1-part-1-framework.pdf" 1104 * @see "https://datatracker.ietf.org/doc/draft-cavage-http-signatures/" 1105 * @see "https://datatracker.ietf.org/doc/rfc9421/" 1106 * @see "https://openjdk.org/groups/net/httpclient/intro.html" 1107 * @see "https://docs.oracle.com/en/java/javase/21/docs/api/java.net.http/java/net/http/package-summary.html" 1108 * @see "https://portswigger.net/web-security/ssrf" 1109 * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control" 1110 */ 1111 public static boolean isPSD2StetSafeCertificateURL(String certificateUrl) { 1112 boolean isValid = false; 1113 long connectionTimeoutInSeconds = 10; 1114 String userAgent = "PSD2-STET-HTTPSignature-CertificateRequest"; 1115 try { 1116 //1. Ensure that the URL end with the SHA-256 fingerprint encoded in HEX of the certificate like requested by STET 1117 if (certificateUrl != null && certificateUrl.lastIndexOf("_") != -1) { 1118 String digestPart = certificateUrl.substring(certificateUrl.lastIndexOf("_") + 1); 1119 if (Pattern.matches("^[0-9a-f]{64}$", digestPart)) { 1120 //2. Ensure that the URL is a valid url by creating a instance of the class URI 1121 URI uri = URI.create(certificateUrl); 1122 //3. Require usage of HTTPS and reject any url containing query parameters 1123 if ("https".equalsIgnoreCase(uri.getScheme()) && uri.getQuery() == null) { 1124 //4. Perform a HTTP HEAD request in order to get the content type of the remote resource 1125 //and limit the interest to use the SSRF because to pass the check the url need to: 1126 //- Do not having any query parameters. 1127 //- Use HTTPS protocol. 1128 //- End with a string having the format "_[0-9a-f]{64}". 1129 //- Trigger the malicious action that the attacker want but with a HTTP HEAD without any redirect and parameters. 1130 HttpResponse<String> response; 1131 try (HttpClient client = HttpClient.newBuilder().followRedirects(HttpClient.Redirect.NEVER).build()) { 1132 HttpRequest request = HttpRequest.newBuilder().uri(uri).timeout(Duration.ofSeconds(connectionTimeoutInSeconds)).method("HEAD", HttpRequest.BodyPublishers.noBody()).header("User-Agent", userAgent)//To provide an hint to the target about the initiator of the request 1133 .header("Cache-Control", "no-store, max-age=0")//To prevent caching issues or abuses 1134 .build(); 1135 response = client.send(request, HttpResponse.BodyHandlers.ofString()); 1136 if (response.statusCode() == 200) { 1137 //5. Ensure that the response content type is "text/plain" 1138 Optional<String> contentType = response.headers().firstValue("Content-Type"); 1139 isValid = (contentType.isPresent() && contentType.get().trim().toLowerCase(Locale.ENGLISH).startsWith("text/plain")); 1140 } 1141 } 1142 } 1143 } 1144 } 1145 } catch (Exception e) { 1146 isValid = false; 1147 } 1148 return isValid; 1149 } 1150 1151 /** 1152 * Perform sequential URL decoding operations against a URL encoded data until the data is not URL encoded anymore or if the specified threshold is reached. 1153 * 1154 * @param encodedData URL encoded data. 1155 * @param decodingRoundThreshold Threshold above which decoding will fail. 1156 * @return The decoded data. 1157 * @throws SecurityException If the threshold is reached. 1158 * @see "https://en.wikipedia.org/wiki/Percent-encoding" 1159 * @see "https://owasp.org/www-community/Double_Encoding" 1160 * @see "https://portswigger.net/web-security/essential-skills/obfuscating-attacks-using-encodings" 1161 * @see "https://capec.mitre.org/data/definitions/120.html" 1162 */ 1163 public static String applyURLDecoding(String encodedData, int decodingRoundThreshold) throws SecurityException { 1164 if (decodingRoundThreshold < 1) { 1165 throw new IllegalArgumentException("Threshold must be a positive number !"); 1166 } 1167 if (encodedData == null) { 1168 throw new IllegalArgumentException("Data provided must not be null !"); 1169 } 1170 Charset charset = StandardCharsets.UTF_8; 1171 int currentDecodingRound = 0; 1172 boolean isFinished = false; 1173 String currentRoundData = encodedData; 1174 String previousRoundData = encodedData; 1175 while (!isFinished) { 1176 if (currentDecodingRound > decodingRoundThreshold) { 1177 throw new SecurityException(String.format("Decoding round threshold of %s reached!", decodingRoundThreshold)); 1178 } 1179 currentRoundData = URLDecoder.decode(currentRoundData, charset); 1180 isFinished = currentRoundData.equals(previousRoundData); 1181 previousRoundData = currentRoundData; 1182 currentDecodingRound++; 1183 } 1184 return currentRoundData; 1185 } 1186 1187 /** 1188 * Apply a collection of validations on a string expected to be an system file/folder path: 1189 * <ul> 1190 * <li>Does not contains path traversal payload.</li> 1191 * <li>The canonical path is equals to the absolute path.</li> 1192 * </ul><br> 1193 * 1194 * @param path String expected to be a valid system file/folder path. 1195 * @return True only if the string pass all validations. 1196 * @see "https://portswigger.net/web-security/file-path-traversal" 1197 * @see "https://learn.snyk.io/lesson/directory-traversal/" 1198 * @see "https://capec.mitre.org/data/definitions/126.html" 1199 * @see "https://owasp.org/www-community/attacks/Path_Traversal" 1200 */ 1201 public static boolean isPathSafe(String path) { 1202 boolean isSafe = false; 1203 int decodingRoundThreshold = 3; 1204 try { 1205 if (path != null && !path.isEmpty()) { 1206 //URL decode the path if case of data coming from a web context 1207 String decodedPath = applyURLDecoding(path, decodingRoundThreshold); 1208 //Ensure that no path traversal expression is present 1209 if (!decodedPath.contains("..")) { 1210 File f = new File(decodedPath); 1211 String canonicalPath = f.getCanonicalPath(); 1212 String absolutePath = f.getAbsolutePath(); 1213 isSafe = canonicalPath.equals(absolutePath); 1214 } 1215 } 1216 } catch (Exception e) { 1217 isSafe = false; 1218 } 1219 return isSafe; 1220 } 1221 1222 /** 1223 * Identify if an XML contains any XML comments or have any XSL processing instructions.<br> 1224 * Stream reader based parsing is used to support large XML tree. 1225 * 1226 * @param xmlFilePath Filename of the XML file to check. 1227 * @return True only if XML comments or XSL processing instructions are identified. 1228 * @see "https://www.tutorialspoint.com/xml/xml_processing.htm" 1229 * @see "https://docs.oracle.com/en/java/javase/21/docs/api/java.xml/javax/xml/stream/XMLInputFactory.html" 1230 * @see "https://portswigger.net/kb/issues/00400700_xml-entity-expansion" 1231 * @see "https://www.w3.org/Style/styling-XML.en.html" 1232 */ 1233 public static boolean isXMLHaveCommentsOrXSLProcessingInstructions(String xmlFilePath) { 1234 boolean itemsDetected = false; 1235 try { 1236 //Ensure that the parser will not be prone XML external entity (XXE) injection or XML entity expansion (XEE) attacks 1237 XMLInputFactory xmlInputFactory = XMLInputFactory.newFactory(); 1238 xmlInputFactory.setProperty(XMLInputFactory.SUPPORT_DTD, false); 1239 xmlInputFactory.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, ""); 1240 xmlInputFactory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, false); 1241 xmlInputFactory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false); 1242 1243 //Parse file 1244 try (FileInputStream fis = new FileInputStream(xmlFilePath)) { 1245 XMLStreamReader reader = xmlInputFactory.createXMLStreamReader(fis); 1246 int eventType; 1247 while (reader.hasNext() && !itemsDetected) { 1248 eventType = reader.next(); 1249 if (eventType == XMLEvent.COMMENT) { 1250 itemsDetected = true; 1251 } else if (eventType == XMLEvent.PROCESSING_INSTRUCTION && "xml-stylesheet".equalsIgnoreCase(reader.getPITarget())) { 1252 itemsDetected = true; 1253 } 1254 } 1255 } 1256 } catch (Exception e) { 1257 //In case of error then assume that the check failed 1258 itemsDetected = true; 1259 } 1260 return itemsDetected; 1261 } 1262 1263 1264 /** 1265 * Perform a set of additional validations against a JWT token: 1266 * <ul> 1267 * <li>Do not use the <b>NONE</b> signature algorithm.</li> 1268 * <li>Have a <a href="https://www.iana.org/assignments/jwt/jwt.xhtml">EXP claim</a> defined.</li> 1269 * <li>The token identifier (<a href="https://www.iana.org/assignments/jwt/jwt.xhtml">JTI claim</a>) is NOT part of the list of revoked token.</li> 1270 * <li>Match the expected type of token: ACCESS or ID or REFRESH.</li> 1271 * </ul> 1272 * 1273 * @param token JWT token for which <b>signature was already validated</b> and on which a set of additional validations will be applied. 1274 * @param expectedTokenType The type of expected token using the enumeration provided. 1275 * @param revokedTokenJTIList A list of token identifier (<b>JTI</b> claim) referring to tokens that were revoked and to which the JTI claim of the token will be compared to. 1276 * @return True only the token pass all the validations. 1277 * @see "https://www.iana.org/assignments/jwt/jwt.xhtml" 1278 * @see "https://auth0.com/docs/secure/tokens/access-tokens" 1279 * @see "https://auth0.com/docs/secure/tokens/id-tokens" 1280 * @see "https://auth0.com/docs/secure/tokens/refresh-tokens" 1281 * @see "https://auth0.com/blog/id-token-access-token-what-is-the-difference/" 1282 * @see "https://jwt.io/libraries?language=Java" 1283 * @see "https://pentesterlab.com/blog/secure-jwt-library-design" 1284 * @see "https://github.com/auth0/java-jwt" 1285 */ 1286 public static boolean applyJWTExtraValidation(DecodedJWT token, TokenType expectedTokenType, List<String> revokedTokenJTIList) { 1287 boolean isValid = false; 1288 TokenType tokenType; 1289 try { 1290 if (!"none".equalsIgnoreCase(token.getAlgorithm().trim())) { 1291 if (!token.getClaim("exp").isMissing() && token.getExpiresAt() != null) { 1292 String jti = token.getId(); 1293 if (jti != null && !jti.trim().isEmpty()) { 1294 boolean jtiIsRevoked = revokedTokenJTIList.stream().anyMatch(jti::equalsIgnoreCase); 1295 if (!jtiIsRevoked) { 1296 //Determine the token type based on the presence of specifics claims 1297 if (!token.getClaim("scope").isMissing()) { 1298 tokenType = TokenType.ACCESS; 1299 } else if (!token.getClaim("name").isMissing() || !token.getClaim("email").isMissing()) { 1300 tokenType = TokenType.ID; 1301 } else { 1302 tokenType = TokenType.REFRESH; 1303 } 1304 isValid = (tokenType.equals(expectedTokenType)); 1305 } 1306 } 1307 } 1308 } 1309 1310 } catch (Exception e) { 1311 //In case of error then assume that the check failed 1312 isValid = false; 1313 } 1314 return isValid; 1315 } 1316 1317 /** 1318 * Apply a validations on a regular expression to ensure that is not prone to the ReDOS attack. 1319 * <br>If your technology is supported by <a href="https://github.com/doyensec/regexploit">regexploit</a> then <b>use it instead of this method!</b> 1320 * <br>Indeed, the <a href="https://www.doyensec.com/">Doyensec</a> team has made an intensive and amazing work on this topic and created this effective tool. 1321 * 1322 * @param regex String expected to be a valid regular expression (regex). 1323 * @param data Test data on which the regular expression is executed for the test. 1324 * @param maximumRunningTimeInSeconds Optional parameter to specify a number of seconds above which a regex execution time is considered as not safe (default to 4 seconds when not specified). 1325 * @return True only if the string pass all validations. 1326 * @see "https://github.blog/security/how-to-fix-a-redos/" 1327 * @see "https://learn.snyk.io/lesson/redos" 1328 * @see "https://rules.sonarsource.com/java/RSPEC-2631/" 1329 * @see "https://github.com/doyensec/regexploit" 1330 * @see "https://github.com/makenowjust-labs/recheck" 1331 * @see "https://github.com/tjenkinson/redos-detector" 1332 * @see "https://wiki.owasp.org/images/2/23/OWASP_IL_2009_ReDoS.pdf" 1333 * @see "https://owasp.org/www-community/attacks/Regular_expression_Denial_of_Service_-_ReDoS" 1334 */ 1335 public static boolean isRegexSafe(String regex, String data, Optional<Integer> maximumRunningTimeInSeconds) { 1336 Objects.requireNonNull(maximumRunningTimeInSeconds, "Use 'Optional.empty()' to leverage the default value."); 1337 Objects.requireNonNull(data, "A sample data is needed to perform the test."); 1338 Objects.requireNonNull(regex, "A regular expression is needed to perform the test."); 1339 boolean isSafe = false; 1340 int executionTimeout = maximumRunningTimeInSeconds.orElse(4); 1341 ExecutorService executor = Executors.newSingleThreadExecutor(); 1342 try { 1343 Callable<Boolean> task = () -> { 1344 Pattern pattern = Pattern.compile(regex); 1345 return pattern.matcher(data).matches(); 1346 }; 1347 List<Future<Boolean>> tasks = executor.invokeAll(List.of(task), executionTimeout, TimeUnit.SECONDS); 1348 if (!tasks.getFirst().isCancelled()) { 1349 isSafe = true; 1350 } 1351 } catch (Exception e) { 1352 isSafe = false; 1353 } finally { 1354 executor.shutdownNow(); 1355 } 1356 return isSafe; 1357 } 1358 1359 /** 1360 * Compute a UUID version 7 without using any external dependency.<br><br> 1361 * <b>Below are my personal point of view and perhaps I'm totally wrong!</b> 1362 * <br><br> 1363 * Why such method? 1364 * <ul> 1365 * <li>Java inferior or equals to 21 does not supports natively the generation of an UUID version 7.</li> 1366 * <li>Import a library just to generate such value is overkill for me.</li> 1367 * <li>Library that I have found, generating such version of an UUID, are not provided by entities commonly used in the java world, such as the SPRING framework provider.</li> 1368 * </ul> 1369 * <br> 1370 * <b>Full credits for this implementation goes to the authors and contributors of the <a href="https://github.com/nalgeon/uuidv7">UUIDv7</a> project.</b> 1371 * <br><br> 1372 * Below are the java libraries that I have found but, for which, I do not trust enough the provider to use them directly: 1373 * <ul> 1374 * <li><a href="https://github.com/cowtowncoder/java-uuid-generator">java-uuid-generator</a></li> 1375 * <li><a href="https://github.com/f4b6a3/uuid-creator">uuid-creator</a></li> 1376 * </ul> 1377 * 1378 * @return A UUID object representing the UUID v7. 1379 * @see "https://uuid7.com/" 1380 * @see "https://antonz.org/uuidv7/" 1381 * @see "https://mccue.dev/pages/3-11-25-life-altering-postgresql-patterns" 1382 * @see "https://www.ietf.org/archive/id/draft-peabody-dispatch-new-uuid-format-04.html#name-uuid-version-7" 1383 * @see "https://www.baeldung.com/java-generating-time-based-uuids" 1384 * @see "https://en.wikipedia.org/wiki/Universally_unique_identifier" 1385 * @see "https://buildkite.com/resources/blog/goodbye-integers-hello-uuids/" 1386 */ 1387 public static UUID computeUUIDv7() { 1388 SecureRandom secureRandom = new SecureRandom(); 1389 // Generate truly random bytes 1390 byte[] value = new byte[16]; 1391 secureRandom.nextBytes(value); 1392 // Get current timestamp in milliseconds 1393 ByteBuffer timestamp = ByteBuffer.allocate(Long.BYTES); 1394 timestamp.putLong(System.currentTimeMillis()); 1395 // Create the TIMESTAMP part of the UUID 1396 System.arraycopy(timestamp.array(), 2, value, 0, 6); 1397 // Create the VERSION and the VARIANT parts of the UUID 1398 value[6] = (byte) ((value[6] & 0x0F) | 0x70); 1399 value[8] = (byte) ((value[8] & 0x3F) | 0x80); 1400 //Create the HIGH and LOW parts of the UUID 1401 ByteBuffer buf = ByteBuffer.wrap(value); 1402 long high = buf.getLong(); 1403 long low = buf.getLong(); 1404 //Create and return the UUID object 1405 UUID uuidv7 = new UUID(high, low); 1406 return uuidv7; 1407 } 1408 1409 /** 1410 * Ensure that an XSD file does not contain any include/import/redefine instruction (prevent exposure to SSRF). 1411 * 1412 * @param xsdFilePath Filename of the XSD file to check. 1413 * @return True only if the file pass all validations. 1414 * @see "https://portswigger.net/web-security/ssrf" 1415 * @see "https://www.w3schools.com/Xml/el_import.asp" 1416 * @see "https://www.w3schools.com/xml/el_include.asp" 1417 * @see "https://www.linkedin.com/posts/righettod_appsec-appsecurity-java-activity-7344048434326188053-6Ru9" 1418 * @see "https://docs.oracle.com/en/java/javase/21/docs/api/java.xml/javax/xml/validation/SchemaFactory.html#setProperty(java.lang.String,java.lang.Object)" 1419 */ 1420 public static boolean isXSDSafe(String xsdFilePath) { 1421 boolean isSafe = false; 1422 try { 1423 File xsdFile = new File(xsdFilePath); 1424 if (xsdFile.exists() && xsdFile.canRead() && xsdFile.isFile()) { 1425 //Parse the XSD file, if an exception occur then it's imply that the XSD specified is not a valid ones 1426 //Create an schema factory throwing Exception if a external schema is specified 1427 SchemaFactory schemaFactory = SchemaFactory.newDefaultInstance(); 1428 schemaFactory.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, ""); 1429 schemaFactory.setProperty(XMLConstants.ACCESS_EXTERNAL_SCHEMA, ""); 1430 //Parse the schema 1431 Schema schema = schemaFactory.newSchema(xsdFile); 1432 isSafe = (schema != null); 1433 } 1434 } catch (Exception e) { 1435 isSafe = false; 1436 } 1437 return isSafe; 1438 } 1439 1440 1441 /** 1442 * Extract all sensitive information from a string provided.<br> 1443 * This can be used to identify any sensitive information into a <a href="https://cwe.mitre.org/data/definitions/532.html">message expected to be written in a log</a> and then replace every sensitive values by an obfuscated ones.<br><br> 1444 * For the luxembourg national identification number, this method focus on detecting identifiers for a physical entity (people) and not a moral one (company).<br><br> 1445 * I delegated the validation of the IBAN to a dedicated library (<a href="https://github.com/arturmkrtchyan/iban4j">iban4j</a>) to not "reinvent the wheel" and then introduce buggy validation myself. I used <b>iban4j</b> over the <b><a href="https://commons.apache.org/proper/commons-validator/apidocs/org/apache/commons/validator/routines/IBANValidator.html">IBANValidator</a></b> class from the <a href="https://commons.apache.org/proper/commons-validator/"><b>Apache Commons Validator</b></a> library because <b>iban4j</b> perform a full official IBAN specification validation so its reduce risks of false-positives by ensuring that an IBAN detected is a real IBAN.<br><br> 1446 * Same thing and reason regarding the validation of the bank card PAN using the class <a href="https://commons.apache.org/proper/commons-validator/apidocs/org/apache/commons/validator/routines/CreditCardValidator.html">CreditCardValidator</a> from the <b>Apache Commons Validator</b> library. 1447 * 1448 * @param content String in which sensitive information must be searched. 1449 * @return A map with the collection of identified sensitive information gathered by sensitive information type. If nothing is found then the map is empty. A type of sensitive information is only present if there is at least one item found. A set is used to not store duplicates occurrence of the same sensitive information. 1450 * @throws Exception If any error occurs during the processing. 1451 * @see "https://guichet.public.lu/en/citoyens/citoyennete/registre-national/identification/demande-numero-rnpp.html" 1452 * @see "https://cnpd.public.lu/fr/decisions-avis/2009/identifiant-unique.html" 1453 * @see "https://cnpd.public.lu/content/dam/cnpd/fr/decisions-avis/2009/identifiant-unique/48_2009.pdf" 1454 * @see "https://en.wikipedia.org/wiki/International_Bank_Account_Number" 1455 * @see "https://www.iban.com/structure" 1456 * @see "https://github.com/arturmkrtchyan/iban4j" 1457 * @see "https://cwe.mitre.org/data/definitions/532.html" 1458 * @see "https://www.baeldung.com/logback-mask-sensitive-data" 1459 * @see "https://en.wikipedia.org/wiki/Payment_card_number" 1460 * @see "https://commons.apache.org/proper/commons-validator/apidocs/org/apache/commons/validator/routines/CreditCardValidator.html" 1461 * @see "https://commons.apache.org/proper/commons-validator/" 1462 */ 1463 public static Map<SensitiveInformationType, Set<String>> extractAllSensitiveInformation(String content) throws Exception { 1464 CreditCardValidator creditCardValidator = CreditCardValidator.genericCreditCardValidator(); 1465 Pattern nationalIdentifierRegex = Pattern.compile("([0-9]{13})"); 1466 Pattern ibanNonHumanFormattedRegex = Pattern.compile("([A-Z]{2}[0-9]{2}[A-Z0-9]{11,30})", Pattern.CASE_INSENSITIVE); 1467 Pattern ibanHumanFormattedRegex = Pattern.compile("([A-Z]{2}[0-9]{2}(?:\\s[A-Z0-9]{4}){2,7}\\s[A-Z0-9]{1,4})", Pattern.CASE_INSENSITIVE); 1468 Pattern panRegex = Pattern.compile("((?:\\d[ -]*?){13,19})"); 1469 Map<SensitiveInformationType, Set<String>> data = new HashMap<>(); 1470 data.put(SensitiveInformationType.LUXEMBOURG_NATIONAL_IDENTIFICATION_NUMBER, new HashSet<>()); 1471 data.put(SensitiveInformationType.IBAN, new HashSet<>()); 1472 data.put(SensitiveInformationType.BANK_CARD_PAN, new HashSet<>()); 1473 1474 if (content != null && !content.isBlank()) { 1475 /* Step 1: Search for LU national identifier */ 1476 //A national identifier have the following structure: [BIRTHDATE_YEAR_YYYY][BIRTHDATE_MONTH_MM][BIRTHDATE_DAY_DD][FIVE_INTEGER] 1477 //Define minimal and maximal birth year base on current year 1478 //Assume people live less than 120 years 1479 int maxBirthYear = LocalDate.now(ZoneId.of("Europe/Luxembourg")).getYear(); 1480 int minBirthYear = maxBirthYear - 120; 1481 Matcher matcher = nationalIdentifierRegex.matcher(content); 1482 String nationalIdentierFull; 1483 int nationalIdentierYear, nationalIdentierMonth, nationalIdentierDay; 1484 while (matcher.find()) { 1485 nationalIdentierFull = matcher.group(1); 1486 //Check that the string is a valid national identifier and if yes then add it 1487 nationalIdentierYear = Integer.parseInt(nationalIdentierFull.substring(0, 4)); 1488 nationalIdentierMonth = Integer.parseInt(nationalIdentierFull.substring(4, 6)); 1489 nationalIdentierDay = Integer.parseInt(nationalIdentierFull.substring(6, 8)); 1490 if (nationalIdentierYear >= minBirthYear && nationalIdentierYear <= maxBirthYear) { 1491 if (nationalIdentierMonth >= 1 && nationalIdentierMonth <= 12) { 1492 if (YearMonth.of(nationalIdentierYear, nationalIdentierMonth).isValidDay(nationalIdentierDay)) { 1493 data.get(SensitiveInformationType.LUXEMBOURG_NATIONAL_IDENTIFICATION_NUMBER).add(nationalIdentierFull); 1494 } 1495 } 1496 } 1497 } 1498 1499 /* Step 2a: Search for IBAN that are non human formatted */ 1500 matcher = ibanNonHumanFormattedRegex.matcher(content); 1501 String iban, ibanUpperCased; 1502 while (matcher.find()) { 1503 iban = matcher.group(1); 1504 ibanUpperCased = iban.toUpperCase(Locale.ROOT); 1505 //Check that the string is a valid IBAN and if yes then add it 1506 if (IbanUtil.isValid(ibanUpperCased)) { 1507 data.get(SensitiveInformationType.IBAN).add(iban); 1508 } 1509 } 1510 1511 /* Step 2b: Search for IBAN that are human formatted */ 1512 matcher = ibanHumanFormattedRegex.matcher(content); 1513 String ibanUpperCasedNoSpace; 1514 while (matcher.find()) { 1515 iban = matcher.group(1); 1516 ibanUpperCasedNoSpace = iban.toUpperCase(Locale.ROOT).replace(" ", ""); 1517 //Check that the string is a valid IBAN and if yes then add it 1518 if (IbanUtil.isValid(ibanUpperCasedNoSpace)) { 1519 data.get(SensitiveInformationType.IBAN).add(iban); 1520 } 1521 } 1522 1523 /* Step 3: Search for bank card PAN */ 1524 matcher = panRegex.matcher(content); 1525 String pan, panNoSeparator; 1526 while (matcher.find()) { 1527 pan = matcher.group(1); 1528 panNoSeparator = pan.toUpperCase(Locale.ROOT).replace(" ", "").replace("-", ""); 1529 //Check that the string is a valid PAN and if yes then add it 1530 if (creditCardValidator.isValid(panNoSeparator)) { 1531 data.get(SensitiveInformationType.BANK_CARD_PAN).add(pan); 1532 } 1533 } 1534 1535 } 1536 1537 //Cleanup if a set is empty 1538 if (data.get(SensitiveInformationType.LUXEMBOURG_NATIONAL_IDENTIFICATION_NUMBER).isEmpty()) { 1539 data.remove(SensitiveInformationType.LUXEMBOURG_NATIONAL_IDENTIFICATION_NUMBER); 1540 } 1541 if (data.get(SensitiveInformationType.IBAN).isEmpty()) { 1542 data.remove(SensitiveInformationType.IBAN); 1543 } 1544 if (data.get(SensitiveInformationType.BANK_CARD_PAN).isEmpty()) { 1545 data.remove(SensitiveInformationType.BANK_CARD_PAN); 1546 } 1547 1548 return data; 1549 } 1550 1551 /** 1552 * Apply a collection of validations on a bytes array provided representing GZIP compressed data: 1553 * <ul> 1554 * <li>Are valid GZIP compressed data.</li> 1555 * <li>The number of bytes once decompressed is under the specified limit.</li> 1556 * </ul> 1557 * <br><b>Note:</b> The value <code>Integer.MAX_VALUE - 8</code> was chosen because during my tests on Java 25 (JDK 64 bits on Windows 11 Pro), it was possible to decompress such amount of data with the default JVM settings without causing an <a href="https://docs.oracle.com/en/java/javase/25/docs/api//java.base/java/lang/OutOfMemoryError.html">Out Of Memory error</a>. 1558 * 1559 * @param compressedBytes Array of bytes containing the GZIP compressed data to check. 1560 * @param maxCountOfDecompressedBytesAllowed Maximum number of decompressed bytes allowed. Default to 10 MB if the specified value is inferior to 1 or superior to Integer.MAX_VALUE - 8. 1561 * @return True only if the file pass all validations. 1562 * @see "https://en.wikipedia.org/wiki/Gzip" 1563 * @see "https://www.rapid7.com/db/modules/auxiliary/dos/http/gzip_bomb_dos/" 1564 */ 1565 public static boolean isGZIPCompressedDataSafe(byte[] compressedBytes, long maxCountOfDecompressedBytesAllowed) { 1566 boolean isSafe = false; 1567 1568 try { 1569 long limit = maxCountOfDecompressedBytesAllowed; 1570 long totalRead = 0L; 1571 byte[] buffer = new byte[8 * 1024]; 1572 int read; 1573 if (limit < 1 || limit > (Integer.MAX_VALUE - 8)) { 1574 limit = 10_000_000; 1575 } 1576 try (ByteArrayInputStream bis = new ByteArrayInputStream(compressedBytes); GZIPInputStream gzipInputStream = new GZIPInputStream(new BufferedInputStream(bis))) { 1577 while ((read = gzipInputStream.read(buffer)) != -1) { 1578 totalRead += read; 1579 if (totalRead > limit) { 1580 throw new Exception(); 1581 } 1582 } 1583 } 1584 isSafe = true; 1585 } catch (Exception e) { 1586 isSafe = false; 1587 } 1588 1589 return isSafe; 1590 } 1591 1592 /** 1593 * Process a string, intended to be written in a log, to remove as much as possible information that can lead to an exposure to a log injection vulnerability.<br><br> 1594 * <b>Log injection</b> is also called <b>log forging</b>.<br><br> 1595 * The following information are removed: 1596 * <ul> 1597 * <li>Characters: Carriage Return (CR), Linefeed (LF) and Tabulation (TAB).</li> 1598 * <li>Leading and trailing spaces.</li> 1599 * <li>Any HTML tags.</li> 1600 * </ul><br> 1601 * A parameter is also used to limit the maximum length of the sanitized message. 1602 * To remove any HTML tags, the OWASP project <a href="https://owasp.org/www-project-java-html-sanitizer/">Java HTML Sanitizer</a> is leveraged.<br> 1603 * I delegated such removal to a dedicated library to prevent missing of edge cases as well as potential bypasses. 1604 * 1605 * @param message The original string message intended to be written in a log. 1606 * @param maxMessageLength The maximum number of characters after which the sanitized message must be truncated. If inferior to 1 then default to the value of 500. 1607 * @return The string message cleaned. 1608 * @see "https://www.wallarm.com/what/log-forging-attack" 1609 * @see "https://www.invicti.com/learn/crlf-injection" 1610 * @see "https://knowledge-base.secureflag.com/vulnerabilities/inadequate_input_validation/log_injection_vulnerability.html" 1611 * @see "https://capec.mitre.org/data/definitions/93.html" 1612 * @see "https://codeql.github.com/codeql-query-help/javascript/js-log-injection/" 1613 * @see "https://owasp.org/www-project-java-html-sanitizer/" 1614 * @see "https://github.com/OWASP/java-html-sanitizer" 1615 */ 1616 public static String sanitizeLogMessage(String message, int maxMessageLength) { 1617 String sanitized = message; 1618 int maxSanitizedMessageLength = maxMessageLength; 1619 1620 if (sanitized != null && !sanitized.isBlank()) { 1621 if (maxSanitizedMessageLength < 1) { 1622 maxSanitizedMessageLength = 500; 1623 } 1624 //Step 1: Remove any CR/LR/TAB characters as well as leading and trailing spaces 1625 sanitized = sanitized.replaceAll("[\\n\\r\\t]", "").trim(); 1626 //Step 2: Remove any HTML tags 1627 PolicyFactory htmlSanitizerPolicy = new HtmlPolicyBuilder().toFactory(); 1628 sanitized = htmlSanitizerPolicy.sanitize(sanitized); 1629 //Step 3: Truncate the string in case of need 1630 if (sanitized.length() > maxSanitizedMessageLength) { 1631 sanitized = sanitized.substring(0, maxSanitizedMessageLength); 1632 } 1633 } 1634 1635 return sanitized; 1636 } 1637}