001package eu.righettod; 002 003 004import com.auth0.jwt.interfaces.DecodedJWT; 005import org.apache.commons.csv.CSVFormat; 006import org.apache.commons.csv.CSVRecord; 007import org.apache.commons.imaging.ImageInfo; 008import org.apache.commons.imaging.Imaging; 009import org.apache.commons.imaging.common.ImageMetadata; 010import org.apache.commons.validator.routines.EmailValidator; 011import org.apache.commons.validator.routines.InetAddressValidator; 012import org.apache.pdfbox.Loader; 013import org.apache.pdfbox.pdmodel.PDDocument; 014import org.apache.pdfbox.pdmodel.PDDocumentCatalog; 015import org.apache.pdfbox.pdmodel.PDDocumentInformation; 016import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary; 017import org.apache.pdfbox.pdmodel.common.PDMetadata; 018import org.apache.pdfbox.pdmodel.interactive.action.*; 019import org.apache.pdfbox.pdmodel.interactive.annotation.AnnotationFilter; 020import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; 021import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationLink; 022import org.apache.poi.poifs.filesystem.DirectoryEntry; 023import org.apache.poi.poifs.filesystem.POIFSFileSystem; 024import org.apache.poi.poifs.macros.VBAMacroReader; 025import org.apache.tika.detect.DefaultDetector; 026import org.apache.tika.detect.Detector; 027import org.apache.tika.io.TemporaryResources; 028import org.apache.tika.io.TikaInputStream; 029import org.apache.tika.metadata.Metadata; 030import org.apache.tika.mime.MediaType; 031import org.apache.tika.mime.MimeTypes; 032import org.w3c.dom.Document; 033import org.xml.sax.EntityResolver; 034import org.xml.sax.InputSource; 035import org.xml.sax.SAXException; 036 037import javax.crypto.Mac; 038import javax.crypto.spec.SecretKeySpec; 039import javax.imageio.ImageIO; 040import javax.json.Json; 041import javax.json.JsonReader; 042import javax.xml.XMLConstants; 043import javax.xml.parsers.DocumentBuilder; 044import javax.xml.parsers.DocumentBuilderFactory; 045import javax.xml.parsers.ParserConfigurationException; 046import javax.xml.stream.XMLInputFactory; 047import javax.xml.stream.XMLStreamReader; 048import javax.xml.stream.events.XMLEvent; 049import java.awt.*; 050import java.awt.image.BufferedImage; 051import java.io.*; 052import java.net.*; 053import java.net.http.HttpClient; 054import java.net.http.HttpRequest; 055import java.net.http.HttpResponse; 056import java.nio.ByteBuffer; 057import java.nio.charset.Charset; 058import java.nio.charset.StandardCharsets; 059import java.nio.file.Files; 060import java.security.MessageDigest; 061import java.security.SecureRandom; 062import java.time.Duration; 063import java.util.*; 064import java.util.List; 065import java.util.concurrent.*; 066import java.util.concurrent.atomic.AtomicInteger; 067import java.util.regex.Pattern; 068import java.util.zip.ZipEntry; 069import java.util.zip.ZipFile; 070 071/** 072 * Provides different utilities methods to apply processing from a security perspective.<br> 073 * These code snippet: 074 * <ul> 075 * <li>Can be used, as "foundation", to customize the validation to the app context.</li> 076 * <li>Were implemented in a way to facilitate adding or removal of validations depending on usage context.</li> 077 * <li>Were centralized on one class to be able to enhance them across time as well as <a href="https://github.com/righettod/code-snippets-security-utils/issues">missing case/bug identification</a>.</li> 078 * </ul> 079 */ 080public class SecurityUtils { 081 /** 082 * Default constructor: Not needed as the class only provides static methods. 083 */ 084 private SecurityUtils() { 085 } 086 087 /** 088 * Apply a collection of validation to verify if a provided PIN code is considered weak (easy to guess) or none.<br> 089 * This method consider that format of the PIN code is [0-9]{6,}<br> 090 * Rule to consider a PIN code as weak: 091 * <ul> 092 * <li>Length is inferior to 6 positions.</li> 093 * <li>Contain only the same number or only a sequence of zero.</li> 094 * <li>Contain sequence of following incremental or decremental numbers.</li> 095 * </ul> 096 * 097 * @param pinCode PIN code to verify. 098 * @return True only if the PIN is considered as weak. 099 */ 100 public static boolean isWeakPINCode(String pinCode) { 101 boolean isWeak = true; 102 //Length is inferior to 6 positions 103 //Use "Long.parseLong(pinCode)" to cause a NumberFormatException if the PIN is not a numeric one 104 //and to ensure that the PIN is not only a sequence of zero 105 if (pinCode != null && Long.parseLong(pinCode) > 0 && pinCode.trim().length() > 5) { 106 //Contain only the same number 107 String regex = String.format("^[%s]{%s}$", pinCode.charAt(0), pinCode.length()); 108 if (!Pattern.matches(regex, pinCode)) { 109 //Contain sequence of following incremental or decremental numbers 110 char previousChar = 'X'; 111 boolean containSequence = false; 112 for (char c : pinCode.toCharArray()) { 113 if (previousChar != 'X') { 114 int previousNbr = Integer.parseInt(String.valueOf(previousChar)); 115 int currentNbr = Integer.parseInt(String.valueOf(c)); 116 if (currentNbr == (previousNbr - 1) || currentNbr == (previousNbr + 1)) { 117 containSequence = true; 118 break; 119 } 120 } 121 previousChar = c; 122 } 123 if (!containSequence) { 124 isWeak = false; 125 } 126 } 127 } 128 return isWeak; 129 } 130 131 /** 132 * Apply a collection of validations on a Word 97-2003 (binary format) document file provided: 133 * <ul> 134 * <li>Real Microsoft Word 97-2003 document file.</li> 135 * <li>No VBA Macro.<br></li> 136 * <li>No embedded objects.</li> 137 * </ul> 138 * 139 * @param wordFilePath Filename of the Word document file to check. 140 * @return True only if the file pass all validations. 141 * @see "https://poi.apache.org/components/" 142 * @see "https://poi.apache.org/components/document/" 143 * @see "https://poi.apache.org/components/poifs/how-to.html" 144 * @see "https://poi.apache.org/components/poifs/embeded.html" 145 * @see "https://poi.apache.org/" 146 * @see "https://mvnrepository.com/artifact/org.apache.poi/poi" 147 */ 148 public static boolean isWord972003DocumentSafe(String wordFilePath) { 149 boolean isSafe = false; 150 try { 151 File wordFile = new File(wordFilePath); 152 if (wordFile.exists() && wordFile.canRead() && wordFile.isFile()) { 153 //Step 1: Try to load the file, if its fail then it imply that is not a valid Word 97-2003 format file 154 try (POIFSFileSystem fs = new POIFSFileSystem(wordFile)) { 155 //Step 2: Check if the document contains VBA macros, in our case is not allowed 156 VBAMacroReader macroReader = new VBAMacroReader(fs); 157 Map<String, String> macros = macroReader.readMacros(); 158 if (macros == null || macros.isEmpty()) { 159 //Step 3: Check if the document contains any embedded objects, in our case is not allowed 160 //From POI documentation: 161 //Word normally stores embedded files in subdirectories of the ObjectPool directory, itself a subdirectory of the filesystem root. 162 //Typically, these subdirectories and named starting with an underscore, followed by 10 numbers. 163 final List<String> embeddedObjectFound = new ArrayList<>(); 164 DirectoryEntry root = fs.getRoot(); 165 if (root.getEntryCount() > 0) { 166 root.iterator().forEachRemaining(entry -> { 167 if ("ObjectPool".equalsIgnoreCase(entry.getName()) && entry instanceof DirectoryEntry) { 168 DirectoryEntry objPoolDirectory = (DirectoryEntry) entry; 169 if (objPoolDirectory.getEntryCount() > 0) { 170 objPoolDirectory.iterator().forEachRemaining(objPoolDirectoryEntry -> { 171 if (objPoolDirectoryEntry instanceof DirectoryEntry) { 172 DirectoryEntry objPoolDirectoryEntrySubDirectoryEntry = (DirectoryEntry) objPoolDirectoryEntry; 173 if (objPoolDirectoryEntrySubDirectoryEntry.getEntryCount() > 0) { 174 objPoolDirectoryEntrySubDirectoryEntry.forEach(objPoolDirectoryEntrySubDirectoryEntryEntry -> { 175 if (objPoolDirectoryEntrySubDirectoryEntryEntry.isDocumentEntry()) { 176 embeddedObjectFound.add(objPoolDirectoryEntrySubDirectoryEntryEntry.getName()); 177 } 178 }); 179 } 180 } 181 }); 182 } 183 } 184 }); 185 } 186 isSafe = embeddedObjectFound.isEmpty(); 187 } 188 } 189 } 190 } catch (Exception e) { 191 isSafe = false; 192 } 193 return isSafe; 194 } 195 196 /** 197 * Ensure that an XML file does not contain any External Entity, DTD or XInclude instructions. 198 * 199 * @param xmlFilePath Filename of the XML file to check. 200 * @return True only if the file pass all validations. 201 * @see "https://portswigger.net/web-security/xxe" 202 * @see "https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html#java" 203 * @see "https://docs.oracle.com/en/java/javase/13/security/java-api-xml-processing-jaxp-security-guide.html#GUID-82F8C206-F2DF-4204-9544-F96155B1D258" 204 * @see "https://www.w3.org/TR/xinclude-11/" 205 * @see "https://en.wikipedia.org/wiki/XInclude" 206 */ 207 public static boolean isXMLSafe(String xmlFilePath) { 208 boolean isSafe = false; 209 try { 210 File xmlFile = new File(xmlFilePath); 211 if (xmlFile.exists() && xmlFile.canRead() && xmlFile.isFile()) { 212 //Step 1a: Verify that the XML file content does not contain any XInclude instructions 213 boolean containXInclude = Files.readAllLines(xmlFile.toPath()).stream().anyMatch(line -> line.toLowerCase(Locale.ROOT).contains(":include ")); 214 if (!containXInclude) { 215 //Step 1b: Parse the XML file, if an exception occur than it's imply that the XML specified is not a valid ones 216 //Create an XML document builder throwing Exception if a DOCTYPE instruction is present 217 DocumentBuilderFactory dbfInstance = DocumentBuilderFactory.newInstance(); 218 dbfInstance.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); 219 //Xerces 2 only 220 //dbfInstance.setFeature("http://xerces.apache.org/xerces2-j/features.html#disallow-doctype-decl",true); 221 dbfInstance.setXIncludeAware(false); 222 DocumentBuilder builder = dbfInstance.newDocumentBuilder(); 223 //Parse the document 224 Document doc = builder.parse(xmlFile); 225 isSafe = (doc != null && doc.getDocumentElement() != null); 226 } 227 } 228 } catch (Exception e) { 229 isSafe = false; 230 } 231 return isSafe; 232 } 233 234 235 /** 236 * Extract all URL links from a PDF file provided.<br> 237 * This can be used to apply validation on a PDF against contained links. 238 * 239 * @param pdfFilePath pdfFilePath Filename of the PDF file to process. 240 * @return A List of URL objects that is empty if no links is found. 241 * @throws Exception If any error occurs during the processing of the PDF file. 242 * @see "https://www.gushiciku.cn/pl/21KQ" 243 * @see "https://pdfbox.apache.org/" 244 * @see "https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox" 245 */ 246 public static List<URL> extractAllPDFLinks(String pdfFilePath) throws Exception { 247 final List<URL> links = new ArrayList<>(); 248 File pdfFile = new File(pdfFilePath); 249 try (PDDocument document = Loader.loadPDF(pdfFile)) { 250 PDDocumentCatalog documentCatalog = document.getDocumentCatalog(); 251 AnnotationFilter actionURIAnnotationFilter = new AnnotationFilter() { 252 @Override 253 public boolean accept(PDAnnotation annotation) { 254 boolean keep = false; 255 if (annotation instanceof PDAnnotationLink) { 256 keep = (((PDAnnotationLink) annotation).getAction() instanceof PDActionURI); 257 } 258 return keep; 259 } 260 }; 261 documentCatalog.getPages().forEach(page -> { 262 try { 263 page.getAnnotations(actionURIAnnotationFilter).forEach(annotation -> { 264 PDActionURI linkAnnotation = (PDActionURI) ((PDAnnotationLink) annotation).getAction(); 265 try { 266 URL urlObj = new URL(linkAnnotation.getURI()); 267 if (!links.contains(urlObj)) { 268 links.add(urlObj); 269 } 270 } catch (MalformedURLException e) { 271 throw new RuntimeException(e); 272 } 273 }); 274 } catch (Exception e) { 275 throw new RuntimeException(e); 276 } 277 }); 278 } 279 return links; 280 } 281 282 /** 283 * Apply a collection of validations on a PDF file provided: 284 * <ul> 285 * <li>Real PDF file.</li> 286 * <li>No attachments.</li> 287 * <li>No Javascript code.</li> 288 * <li>No links using action of type URI/Launch/RemoteGoTo/ImportData.</li> 289 * </ul> 290 * 291 * @param pdfFilePath Filename of the PDF file to check. 292 * @return True only if the file pass all validations. 293 * @see "https://stackoverflow.com/a/36161267" 294 * @see "https://www.gushiciku.cn/pl/21KQ" 295 * @see "https://github.com/jonaslejon/malicious-pdf" 296 * @see "https://pdfbox.apache.org/" 297 * @see "https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox" 298 */ 299 public static boolean isPDFSafe(String pdfFilePath) { 300 boolean isSafe = false; 301 try { 302 File pdfFile = new File(pdfFilePath); 303 if (pdfFile.exists() && pdfFile.canRead() && pdfFile.isFile()) { 304 //Step 1: Try to load the file, if its fail then it imply that is not a valid PDF file 305 try (PDDocument document = Loader.loadPDF(pdfFile)) { 306 //Step 2: Check if the file contains attached files, in our case is not allowed 307 PDDocumentCatalog documentCatalog = document.getDocumentCatalog(); 308 PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary(documentCatalog); 309 if (namesDictionary.getEmbeddedFiles() == null) { 310 //Step 3: Check if the file contains Javascript code, in our case is not allowed 311 if (namesDictionary.getJavaScript() == null) { 312 //Step 4: Check if the file contains links using action of type URI/Launch/RemoteGoTo/ImportData, in our case is not allowed 313 final List<Integer> notAllowedAnnotationCounterList = new ArrayList<>(); 314 AnnotationFilter notAllowedAnnotationFilter = new AnnotationFilter() { 315 @Override 316 public boolean accept(PDAnnotation annotation) { 317 boolean keep = false; 318 if (annotation instanceof PDAnnotationLink) { 319 PDAnnotationLink link = (PDAnnotationLink) annotation; 320 PDAction action = link.getAction(); 321 if ((action instanceof PDActionURI) || (action instanceof PDActionLaunch) || (action instanceof PDActionRemoteGoTo) || (action instanceof PDActionImportData)) { 322 keep = true; 323 } 324 } 325 return keep; 326 } 327 }; 328 documentCatalog.getPages().forEach(page -> { 329 try { 330 notAllowedAnnotationCounterList.add(page.getAnnotations(notAllowedAnnotationFilter).size()); 331 } catch (IOException e) { 332 throw new RuntimeException(e); 333 } 334 }); 335 if (notAllowedAnnotationCounterList.stream().reduce(0, Integer::sum) == 0) { 336 isSafe = true; 337 } 338 } 339 } 340 } 341 } 342 } catch (Exception e) { 343 isSafe = false; 344 } 345 return isSafe; 346 } 347 348 /** 349 * Remove as much as possible metadata from the provided PDF document object. 350 * 351 * @param document PDFBox PDF document object on which metadata must be removed. 352 * @see "https://gist.github.com/righettod/d7e07443c43d393a39de741a0d920069" 353 * @see "https://pdfbox.apache.org/" 354 * @see "https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox" 355 */ 356 public static void clearPDFMetadata(PDDocument document) { 357 if (document != null) { 358 PDDocumentInformation infoEmpty = new PDDocumentInformation(); 359 document.setDocumentInformation(infoEmpty); 360 PDMetadata newMetadataEmpty = new PDMetadata(document); 361 document.getDocumentCatalog().setMetadata(newMetadataEmpty); 362 } 363 } 364 365 366 /** 367 * Validate that the URL provided is really a relative URL. 368 * 369 * @param targetUrl URL to validate. 370 * @return True only if the file pass all validations. 371 * @see "https://portswigger.net/web-security/ssrf" 372 * @see "https://stackoverflow.com/q/6785442" 373 */ 374 public static boolean isRelativeURL(String targetUrl) { 375 boolean isValid = false; 376 //Reject any URL encoded content and URL starting with a double slash 377 //Reject any URL contains credentials or fragment to prevent potential bypasses 378 String work = targetUrl; 379 if (!work.contains("%") && !work.contains("@") && !work.contains("#") && !work.startsWith("//")) { 380 //Creation of a URL object must fail 381 try { 382 new URL(work); 383 isValid = false; 384 } catch (MalformedURLException mf) { 385 //Last check to be sure (for prod usage compile the pattern one time) 386 isValid = Pattern.compile("^/[a-z0-9]+", Pattern.CASE_INSENSITIVE).matcher(work).find(); 387 } 388 } 389 return isValid; 390 } 391 392 /** 393 * Apply a collection of validations on a ZIP file provided: 394 * <ul> 395 * <li>Real ZIP file.</li> 396 * <li>Contain less than a specified level of deepness.</li> 397 * <li>Do not contain Zip-Slip entry path.</li> 398 * </ul> 399 * 400 * @param zipFilePath Filename of the ZIP file to check. 401 * @param maxLevelDeepness Threshold of deepness above which a ZIP archive will be rejected. 402 * @param rejectArchiveFile Flag to specify if presence of any archive entry will cause the rejection of the ZIP file. 403 * @return True only if the file pass all validations. 404 * @see "https://rules.sonarsource.com/java/type/Security%20Hotspot/RSPEC-5042" 405 * @see "https://security.snyk.io/research/zip-slip-vulnerability" 406 * @see "https://en.wikipedia.org/wiki/Zip_bomb" 407 * @see "https://github.com/ptoomey3/evilarc" 408 * @see "https://github.com/abdulfatir/ZipBomb" 409 * @see "https://www.baeldung.com/cs/zip-bomb" 410 * @see "https://thesecurityvault.com/attacks-with-zip-files-and-mitigations/" 411 * @see "https://wiki.sei.cmu.edu/confluence/display/java/IDS04-J.+Safely+extract+files+from+ZipInputStream" 412 */ 413 public static boolean isZIPSafe(String zipFilePath, int maxLevelDeepness, boolean rejectArchiveFile) { 414 List<String> archiveExtensions = Arrays.asList("zip", "tar", "7z", "gz", "jar", "phar", "bz2", "tgz"); 415 boolean isSafe = false; 416 try { 417 File zipFile = new File(zipFilePath); 418 if (zipFile.exists() && zipFile.canRead() && zipFile.isFile() && maxLevelDeepness > 0) { 419 //Step 1: Try to load the file, if its fail then it imply that is not a valid ZIP file 420 try (ZipFile zipArch = new ZipFile(zipFile)) { 421 //Step 2: Parse entries 422 long deepness = 0; 423 ZipEntry zipEntry; 424 String entryExtension; 425 String zipEntryName; 426 boolean validationsFailed = false; 427 Enumeration<? extends ZipEntry> entries = zipArch.entries(); 428 while (entries.hasMoreElements()) { 429 zipEntry = entries.nextElement(); 430 zipEntryName = zipEntry.getName(); 431 entryExtension = zipEntryName.substring(zipEntryName.lastIndexOf(".") + 1).toLowerCase(Locale.ROOT).trim(); 432 //Step 2a: Check if the current entry is an archive file 433 if (rejectArchiveFile && archiveExtensions.contains(entryExtension)) { 434 validationsFailed = true; 435 break; 436 } 437 //Step 2b: Check that level of deepness is inferior to the threshold specified 438 if (zipEntryName.contains("/")) { 439 //Determine deepness by inspecting the entry name. 440 //Indeed, folder will be represented like this: folder/folder/folder/ 441 //So we can count the number of "/" to identify the deepness of the entry 442 deepness = zipEntryName.chars().filter(ch -> ch == '/').count(); 443 if (deepness > maxLevelDeepness) { 444 validationsFailed = true; 445 break; 446 } 447 } 448 //Step 2c: Check if any entries match pattern of zip slip payload 449 if (zipEntryName.contains("..\\") || zipEntryName.contains("../")) { 450 validationsFailed = true; 451 break; 452 } 453 } 454 if (!validationsFailed) { 455 isSafe = true; 456 } 457 } 458 } 459 } catch (Exception e) { 460 isSafe = false; 461 } 462 return isSafe; 463 } 464 465 /** 466 * Identify the mime type of the content specified (array of bytes).<br> 467 * Note that it cannot be fully trusted (see the tweet '1595824709186519041' referenced), so, additional validations are required. 468 * 469 * @param content The content as an array of bytes. 470 * @return The mime type in lower case or null if it cannot be identified. 471 * @see "https://twitter.com/righettod/status/1595824709186519041" 472 * @see "https://tika.apache.org/" 473 * @see "https://mvnrepository.com/artifact/org.apache.tika/tika-core" 474 * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types" 475 * @see "https://www.iana.org/assignments/media-types/media-types.xhtml" 476 */ 477 public static String identifyMimeType(byte[] content) { 478 String mimeType = null; 479 if (content != null && content.length > 0) { 480 Detector detector = new DefaultDetector(MimeTypes.getDefaultMimeTypes()); 481 Metadata metadata = new Metadata(); 482 try { 483 try (TemporaryResources temporaryResources = new TemporaryResources(); TikaInputStream tikaInputStream = TikaInputStream.get(new ByteArrayInputStream(content), temporaryResources, metadata)) { 484 MediaType mt = detector.detect(tikaInputStream, metadata); 485 if (mt != null) { 486 mimeType = mt.toString().toLowerCase(Locale.ROOT); 487 } 488 } 489 } catch (IOException ioe) { 490 mimeType = null; 491 } 492 } 493 return mimeType; 494 } 495 496 /** 497 * Apply a collection of validations on a string expected to be an public IP address: 498 * <ul> 499 * <li>Is a valid IP v4 or v6 address.</li> 500 * <li>Is public from an Internet perspective.</li> 501 * </ul> 502 * <br> 503 * <b>Note:</b> I often see missing such validation in the value read from HTTP request headers like "X-Forwarded-For" or "Forwarded". 504 * <br><br> 505 * <b>Note for IPv6:</b> I used documentation found so it is really experimental! 506 * 507 * @param ip String expected to be a valid IP address. 508 * @return True only if the string pass all validations. 509 * @see "https://commons.apache.org/proper/commons-validator/" 510 * @see "https://commons.apache.org/proper/commons-validator/apidocs/org/apache/commons/validator/routines/InetAddressValidator.html" 511 * @see "https://cheatsheetseries.owasp.org/cheatsheets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet.html" 512 * @see "https://cheatsheetseries.owasp.org/assets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet_Orange_Tsai_Talk.pdf" 513 * @see "https://cheatsheetseries.owasp.org/assets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet_SSRF_Bible.pdf" 514 * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For" 515 * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Forwarded" 516 * @see "https://ipcisco.com/lesson/ipv6-address/" 517 * @see "https://www.juniper.net/documentation/us/en/software/junos/interfaces-security-devices/topics/topic-map/security-interface-ipv4-ipv6-protocol.html" 518 * @see "https://docs.oracle.com/en/java/javase/21/docs/api/java.base/java/net/InetAddress.html#getByName(java.lang.String)" 519 * @see "https://www.arin.net/reference/research/statistics/address_filters/" 520 * @see "https://en.wikipedia.org/wiki/Multicast_address" 521 * @see "https://stackoverflow.com/a/5619409" 522 * @see "https://www.ripe.net/media/documents/ipv6-address-types.pdf" 523 * @see "https://www.iana.org/assignments/ipv6-unicast-address-assignments/ipv6-unicast-address-assignments.xhtml" 524 * @see "https://developer.android.com/reference/java/net/Inet6Address" 525 * @see "https://en.wikipedia.org/wiki/Unique_local_address" 526 */ 527 public static boolean isPublicIPAddress(String ip) { 528 boolean isValid = false; 529 try { 530 //Quick validation on the string itself based on characters used to compose an IP v4/v6 address 531 if (Pattern.matches("[0-9a-fA-F:.]+", ip)) { 532 //If OK then use the dedicated InetAddressValidator from Apache Commons Validator 533 if (InetAddressValidator.getInstance().isValid(ip)) { 534 //If OK then validate that is an public IP address 535 //From Javadoc for "InetAddress.getByName": If a literal IP address is supplied, only the validity of the address format is checked. 536 InetAddress addr = InetAddress.getByName(ip); 537 isValid = (!addr.isAnyLocalAddress() && !addr.isLinkLocalAddress() && !addr.isLoopbackAddress() && !addr.isMulticastAddress() && !addr.isSiteLocalAddress()); 538 //If OK and the IP is an V6 one then make additional validation because the built-in Java API will let pass some V6 IP 539 //For the prefix map, the start of the key indicates if the value is a regex or a string 540 if (isValid && (addr instanceof Inet6Address)) { 541 Map<String, String> prefixes = new HashMap<>(); 542 prefixes.put("REGEX_LOOPBACK", "^(0|:)+1$"); 543 prefixes.put("REGEX_UNIQUE-LOCAL-ADDRESSES", "^f(c|d)[a-f0-9]{2}:.*$"); 544 prefixes.put("STRING_LINK-LOCAL-ADDRESSES", "fe80:"); 545 prefixes.put("REGEX_TEREDO", "^2001:[0]*:.*$"); 546 prefixes.put("REGEX_BENCHMARKING", "^2001:[0]*2:.*$"); 547 prefixes.put("REGEX_ORCHID", "^2001:[0]*10:.*$"); 548 prefixes.put("STRING_DOCUMENTATION", "2001:db8:"); 549 prefixes.put("STRING_GLOBAL-UNICAST", "2000:"); 550 prefixes.put("REGEX_MULTICAST", "^ff[0-9]{2}:.*$"); 551 final List<Boolean> results = new ArrayList<>(); 552 final String ipLower = ip.trim().toLowerCase(Locale.ROOT); 553 prefixes.forEach((addressType, expr) -> { 554 String exprLower = expr.trim().toLowerCase(); 555 if (addressType.startsWith("STRING_")) { 556 results.add(ipLower.startsWith(exprLower)); 557 } else { 558 results.add(Pattern.matches(exprLower, ipLower)); 559 } 560 }); 561 isValid = ((results.size() == prefixes.size()) && !results.contains(Boolean.TRUE)); 562 } 563 } 564 } 565 } catch (Exception e) { 566 isValid = false; 567 } 568 return isValid; 569 } 570 571 /** 572 * Compute a SHA256 hash from an input composed of a collection of strings.<br><br> 573 * This method take care to build the source string in a way to prevent this source string to be prone to abuse targeting the different parts composing it.<br><br> 574 * <p> 575 * Example of possible abuse without precautions applied during the hash calculation logic:<br> 576 * Hash of <code>SHA256("Hello", "My", "World!!!")</code> will be equals to the hash of <code>SHA256("Hell", "oMyW", "orld!!!")</code>.<br> 577 * </p> 578 * This method ensure that both hash above will be different.<br><br> 579 * 580 * <b>Note:</b> The character <code>|</code> is used, as separator, of every parts so a part is not allowed to contains this character. 581 * 582 * @param parts Ordered list of strings to use to build the input string for which the hash must be computed on. No null value is accepted on object composing the collection. 583 * @return The hash, as an array of bytes, to allow caller to convert it to the final representation wanted (HEX, Base64, etc.). If the collection passed is null or empty then the method return null. 584 * @throws Exception If any exception occurs 585 * @see "https://github.com/righettod/code-snippets-security-utils/issues/16" 586 * @see "https://pentesterlab.com/badges/codereview" 587 * @see "https://blog.trailofbits.com/2024/08/21/yolo-is-not-a-valid-hash-construction/" 588 * @see "https://www.nist.gov/publications/sha-3-derived-functions-cshake-kmac-tuplehash-and-parallelhash" 589 */ 590 public static byte[] computeHashNoProneToAbuseOnParts(List<String> parts) throws Exception { 591 byte[] hash = null; 592 String separator = "|"; 593 if (parts != null && !parts.isEmpty()) { 594 //Ensure that not part is null 595 if (parts.stream().anyMatch(Objects::isNull)) { 596 throw new IllegalArgumentException("No part must be null!"); 597 } 598 //Ensure that the separator is absent from every part 599 if (parts.stream().anyMatch(part -> part.contains(separator))) { 600 throw new IllegalArgumentException(String.format("The character '%s', used as parts separator, must be absent from every parts!", separator)); 601 } 602 MessageDigest digest = MessageDigest.getInstance("SHA-256"); 603 final StringBuilder buffer = new StringBuilder(separator); 604 parts.forEach(p -> { 605 buffer.append(p).append(separator); 606 }); 607 hash = digest.digest(buffer.toString().getBytes(StandardCharsets.UTF_8)); 608 } 609 return hash; 610 } 611 612 /** 613 * Ensure that an XML file only uses DTD/XSD references (called System Identifier) present in the allowed list provided.<br><br> 614 * The code is based on the validation implemented into the OpenJDK 21, by the class <b><a href="https://github.com/openjdk/jdk/blob/jdk-21%2B35/src/java.prefs/share/classes/java/util/prefs/XmlSupport.java">java.util.prefs.XmlSupport</a></b>, in the method <b><a href="https://github.com/openjdk/jdk/blob/jdk-21%2B35/src/java.prefs/share/classes/java/util/prefs/XmlSupport.java#L240">loadPrefsDoc()</a></b>.<br><br> 615 * The method also ensure that no Public Identifier is used to prevent potential bypasses of the validations. 616 * 617 * @param xmlFilePath Filename of the XML file to check. 618 * @param allowedSystemIdentifiers List of URL allowed for System Identifier specified for any XSD/DTD references. 619 * @return True only if the file pass all validations. 620 * @see "https://www.w3schools.com/xml/prop_documenttype_systemid.asp" 621 * @see "https://www.ibm.com/docs/en/integration-bus/9.0.0?topic=doctypedecl-xml-systemid" 622 * @see "https://www.liquid-technologies.com/Reference/Glossary/XML_DocType.html" 623 * @see "https://www.xml.com/pub/98/08/xmlqna0.html" 624 * @see "https://github.com/openjdk/jdk/blob/jdk-21%2B35/src/java.prefs/share/classes/java/util/prefs/XmlSupport.java#L397" 625 * @see "https://en.wikipedia.org/wiki/Formal_Public_Identifier" 626 */ 627 public static boolean isXMLOnlyUseAllowedXSDorDTD(String xmlFilePath, final List<String> allowedSystemIdentifiers) { 628 boolean isSafe = false; 629 final String errorTemplate = "Non allowed %s ID detected!"; 630 final String emptyFakeDTD = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><!ELEMENT dummy EMPTY>"; 631 final String emptyFakeXSD = "<xs:schema xmlns:xs=\"http://www.w3.org/2001/XMLSchema\"> <xs:element name=\"dummy\"/></xs:schema>"; 632 633 if (allowedSystemIdentifiers == null || allowedSystemIdentifiers.isEmpty()) { 634 throw new IllegalArgumentException("At least one SID must be specified!"); 635 } 636 File xmlFile = new File(xmlFilePath); 637 if (xmlFile.exists() && xmlFile.canRead() && xmlFile.isFile()) { 638 try { 639 EntityResolver resolverValidator = (publicId, systemId) -> { 640 if (publicId != null) { 641 throw new SAXException(String.format(errorTemplate, "PUBLIC")); 642 } 643 if (!allowedSystemIdentifiers.contains(systemId)) { 644 throw new SAXException(String.format(errorTemplate, "SYSTEM")); 645 } 646 //If it is OK then return a empty DTD/XSD 647 return new InputSource(new StringReader(systemId.toLowerCase().endsWith(".dtd") ? emptyFakeDTD : emptyFakeXSD)); 648 }; 649 DocumentBuilderFactory dbfInstance = DocumentBuilderFactory.newInstance(); 650 dbfInstance.setIgnoringElementContentWhitespace(true); 651 dbfInstance.setXIncludeAware(false); 652 dbfInstance.setValidating(false); 653 dbfInstance.setCoalescing(true); 654 dbfInstance.setIgnoringComments(false); 655 DocumentBuilder builder = dbfInstance.newDocumentBuilder(); 656 builder.setEntityResolver(resolverValidator); 657 Document doc = builder.parse(xmlFile); 658 isSafe = (doc != null); 659 } catch (SAXException | IOException | ParserConfigurationException e) { 660 isSafe = false; 661 } 662 } 663 664 return isSafe; 665 } 666 667 /** 668 * Apply a collection of validations on a EXCEL CSV file provided (file was expected to be opened in Microsoft EXCEL): 669 * <ul> 670 * <li>Real CSV file.</li> 671 * <li>Do not contains any payload related to a CSV injections.</li> 672 * </ul> 673 * Ensure that, if Apache Commons CSV does not find any record then, the file will be considered as NOT safe (prevent potential bypasses).<br><br> 674 * <b>Note:</b> Record delimiter used is the <code>,</code> (comma) character. See the Apache Commons CSV reference provided for EXCEL.<br> 675 * 676 * @param csvFilePath Filename of the CSV file to check. 677 * @return True only if the file pass all validations. 678 * @see "https://commons.apache.org/proper/commons-csv/" 679 * @see "https://commons.apache.org/proper/commons-csv/apidocs/org/apache/commons/csv/CSVFormat.html#EXCEL" 680 * @see "https://www.we45.com/post/your-excel-sheets-are-not-safe-heres-how-to-beat-csv-injection" 681 * @see "https://www.whiteoaksecurity.com/blog/2020-4-23-csv-injection-whats-the-risk/" 682 * @see "https://book.hacktricks.xyz/pentesting-web/formula-csv-doc-latex-ghostscript-injection" 683 * @see "https://owasp.org/www-community/attacks/CSV_Injection" 684 * @see "https://payatu.com/blog/csv-injection-basic-to-exploit/" 685 * @see "https://cwe.mitre.org/data/definitions/1236.html" 686 */ 687 public static boolean isExcelCSVSafe(String csvFilePath) { 688 boolean isSafe; 689 final AtomicInteger recordCount = new AtomicInteger(); 690 final List<Character> payloadDetectionCharacters = List.of('=', '+', '@', '-', '\r', '\t'); 691 692 try { 693 final List<String> payloadsIdentified = new ArrayList<>(); 694 try (Reader in = new FileReader(csvFilePath)) { 695 Iterable<CSVRecord> records = CSVFormat.EXCEL.parse(in); 696 records.forEach(record -> { 697 record.forEach(recordValue -> { 698 if (recordValue != null && !recordValue.trim().isEmpty() && payloadDetectionCharacters.contains(recordValue.trim().charAt(0))) { 699 payloadsIdentified.add(recordValue); 700 } 701 recordCount.getAndIncrement(); 702 }); 703 }); 704 } 705 isSafe = (payloadsIdentified.isEmpty() && recordCount.get() > 0); 706 } catch (Exception e) { 707 isSafe = false; 708 } 709 710 return isSafe; 711 } 712 713 /** 714 * Provide a way to add an integrity marker (<a href="https://en.wikipedia.org/wiki/HMAC">HMAC</a>) to a serialized object serialized using the <a href="https://www.baeldung.com/java-serialization">java native system</a> (binary).<br> 715 * The goal is to provide <b>a temporary workaround</b> to try to prevent deserialization attacks and give time to move to a text-based serialization approach. 716 * 717 * @param processingMode Define the mode of processing i.e. protect or validate. ({@link eu.righettod.ProcessingMode}) 718 * @param input When the processing mode is "protect" than the expected input (string) is a java serialized object encoded in Base64 otherwise (processing mode is "validate") expected input is the output of this method when the "protect" mode was used. 719 * @param secret Secret to use to compute the SHA256 HMAC. 720 * @return A map with the following keys: <ul><li><b>PROCESSING_MODE</b>: Processing mode used to compute the result.</li><li><b>STATUS</b>: A boolean indicating if the processing was successful or not.</li><li><b>RESULT</b>: Always contains a string representing the protected serialized object in the format <code>[SERIALIZED_OBJECT_BASE64_ENCODED]:[SERIALIZED_OBJECT_HMAC_BASE64_ENCODED]</code>.</li></ul> 721 * @throws Exception If any exception occurs. 722 * @see "https://cheatsheetseries.owasp.org/cheatsheets/Deserialization_Cheat_Sheet.html" 723 * @see "https://owasp.org/www-project-top-ten/2017/A8_2017-Insecure_Deserialization" 724 * @see "https://portswigger.net/web-security/deserialization" 725 * @see "https://www.baeldung.com/java-serialization-approaches" 726 * @see "https://www.baeldung.com/java-serialization" 727 * @see "https://cryptobook.nakov.com/mac-and-key-derivation/hmac-and-key-derivation" 728 * @see "https://en.wikipedia.org/wiki/HMAC" 729 * @see "https://smattme.com/posts/how-to-generate-hmac-signature-in-java/" 730 */ 731 public static Map<String, Object> ensureSerializedObjectIntegrity(ProcessingMode processingMode, String input, byte[] secret) throws Exception { 732 Map<String, Object> results; 733 String resultFormatTemplate = "%s:%s"; 734 //Verify input provided to be consistent 735 if (processingMode == null) { 736 throw new IllegalArgumentException("The processing mode is mandatory!"); 737 } 738 if (input == null || input.trim().isEmpty()) { 739 throw new IllegalArgumentException("Input data is mandatory!"); 740 } 741 if (secret == null || secret.length == 0) { 742 throw new IllegalArgumentException("The HMAC secret is mandatory!"); 743 } 744 if (processingMode.equals(ProcessingMode.VALIDATE) && input.split(":").length != 2) { 745 throw new IllegalArgumentException("Input data provided is invalid for the processing mode specified!"); 746 } 747 //Processing 748 Base64.Decoder b64Decoder = Base64.getDecoder(); 749 Base64.Encoder b64Encoder = Base64.getEncoder(); 750 String hmacAlgorithm = "HmacSHA256"; 751 Mac mac = Mac.getInstance(hmacAlgorithm); 752 SecretKeySpec key = new SecretKeySpec(secret, hmacAlgorithm); 753 mac.init(key); 754 results = new HashMap<>(); 755 results.put("PROCESSING_MODE", processingMode.toString()); 756 switch (processingMode) { 757 case PROTECT -> { 758 byte[] objectBytes = b64Decoder.decode(input); 759 byte[] hmac = mac.doFinal(objectBytes); 760 String encodedHmac = b64Encoder.encodeToString(hmac); 761 results.put("STATUS", Boolean.TRUE); 762 results.put("RESULT", String.format(resultFormatTemplate, input, encodedHmac)); 763 } 764 case VALIDATE -> { 765 String[] parts = input.split(":"); 766 byte[] objectBytes = b64Decoder.decode(parts[0].trim()); 767 byte[] hmacProvided = b64Decoder.decode(parts[1].trim()); 768 byte[] hmacComputed = mac.doFinal(objectBytes); 769 String encodedHmacComputed = b64Encoder.encodeToString(hmacComputed); 770 Boolean hmacIsValid = Arrays.equals(hmacProvided, hmacComputed); 771 results.put("STATUS", hmacIsValid); 772 results.put("RESULT", String.format(resultFormatTemplate, parts[0].trim(), encodedHmacComputed)); 773 } 774 default -> throw new IllegalArgumentException("Not supported processing mode!"); 775 } 776 return results; 777 } 778 779 /** 780 * Apply a collection of validations on a JSON string provided: 781 * <ul> 782 * <li>Real JSON structure.</li> 783 * <li>Contain less than a specified number of deepness for nested objects or arrays.</li> 784 * <li>Contain less than a specified number of items in any arrays.</li> 785 * </ul> 786 * <br> 787 * <b>Note:</b> I decided to use a parsing approach using only string processing to prevent any StackOverFlow or OutOfMemory error that can be abused.<br><br> 788 * I used the following assumption: 789 * <ul> 790 * <li>The character <code>{</code> identify the beginning of an object.</li> 791 * <li>The character <code>}</code> identify the end of an object.</li> 792 * <li>The character <code>[</code> identify the beginning of an array.</li> 793 * <li>The character <code>]</code> identify the end of an array.</li> 794 * <li>The character <code>"</code> identify the delimiter of a string.</li> 795 * <li>The character sequence <code>\"</code> identify the escaping of an double quote.</li> 796 * </ul> 797 * 798 * @param json String containing the JSON data to validate. 799 * @param maxItemsByArraysCount Maximum number of items allowed in an array. 800 * @param maxDeepnessAllowed Maximum number nested objects or arrays allowed. 801 * @return True only if the string pass all validations. 802 * @see "https://javaee.github.io/jsonp/" 803 * @see "https://community.f5.com/discussions/technicalforum/disable-buffer-overflow-in-json-parameters/124306" 804 * @see "https://github.com/InductiveComputerScience/pbJson/issues/2" 805 */ 806 public static boolean isJSONSafe(String json, int maxItemsByArraysCount, int maxDeepnessAllowed) { 807 boolean isSafe = false; 808 809 try { 810 //Step 1: Analyse the JSON string 811 int currentDeepness = 0; 812 int currentArrayItemsCount = 0; 813 int maxDeepnessReached = 0; 814 int maxArrayItemsCountReached = 0; 815 boolean currentlyInArray = false; 816 boolean currentlyInString = false; 817 int currentNestedArrayLevel = 0; 818 String jsonEscapedDoubleQuote = "\\\"";//Escaped double quote must not be considered as a string delimiter 819 String work = json.replace(jsonEscapedDoubleQuote, "'"); 820 for (char c : work.toCharArray()) { 821 switch (c) { 822 case '{': { 823 if (!currentlyInString) { 824 currentDeepness++; 825 } 826 break; 827 } 828 case '}': { 829 if (!currentlyInString) { 830 currentDeepness--; 831 } 832 break; 833 } 834 case '[': { 835 if (!currentlyInString) { 836 currentDeepness++; 837 if (currentlyInArray) { 838 currentNestedArrayLevel++; 839 } 840 currentlyInArray = true; 841 } 842 break; 843 } 844 case ']': { 845 if (!currentlyInString) { 846 currentDeepness--; 847 currentArrayItemsCount = 0; 848 if (currentNestedArrayLevel > 0) { 849 currentNestedArrayLevel--; 850 } 851 if (currentNestedArrayLevel == 0) { 852 currentlyInArray = false; 853 } 854 } 855 break; 856 } 857 case '"': { 858 currentlyInString = !currentlyInString; 859 break; 860 } 861 case ',': { 862 if (!currentlyInString && currentlyInArray) { 863 currentArrayItemsCount++; 864 } 865 break; 866 } 867 } 868 if (currentDeepness > maxDeepnessReached) { 869 maxDeepnessReached = currentDeepness; 870 } 871 if (currentArrayItemsCount > maxArrayItemsCountReached) { 872 maxArrayItemsCountReached = currentArrayItemsCount; 873 } 874 } 875 //Step 2: Apply validation against the value specified as limits 876 isSafe = ((maxItemsByArraysCount > maxArrayItemsCountReached) && (maxDeepnessAllowed > maxDeepnessReached)); 877 878 //Step 3: If the content is safe then ensure that it is valid JSON structure using the "Java API for JSON Processing" (JSR 374) parser reference implementation. 879 if (isSafe) { 880 JsonReader reader = Json.createReader(new StringReader(json)); 881 isSafe = (reader.read() != null); 882 } 883 884 } catch (Exception e) { 885 isSafe = false; 886 } 887 return isSafe; 888 } 889 890 /** 891 * Apply a collection of validations on a image file provided: 892 * <ul> 893 * <li>Real image file.</li> 894 * <li>Its mime type is into the list of allowed mime types.</li> 895 * <li>Its metadata fields do not contains any characters related to a malicious payloads.</li> 896 * </ul> 897 * <br> 898 * <b>Important note:</b> This implementation is prone to bypass using the "<b>raw insertion</b>" method documented in the <a href="https://www.synacktiv.com/en/publications/persistent-php-payloads-in-pngs-how-to-inject-php-code-in-an-image-and-keep-it-there">blog post</a> from the Synacktiv team. 899 * To handle such case, it is recommended to resize the image to remove any non image-related content, see <a href="https://github.com/righettod/document-upload-protection/blob/master/src/main/java/eu/righettod/poc/sanitizer/ImageDocumentSanitizerImpl.java#L54">here</a> for an example.<br> 900 * 901 * @param imageFilePath Filename of the image file to check. 902 * @param imageAllowedMimeTypes List of image mime types allowed. 903 * @return True only if the file pass all validations. 904 * @see "https://commons.apache.org/proper/commons-imaging/" 905 * @see "https://commons.apache.org/proper/commons-imaging/formatsupport.html" 906 * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types/Common_types" 907 * @see "https://www.iana.org/assignments/media-types/media-types.xhtml#image" 908 * @see "https://www.synacktiv.com/en/publications/persistent-php-payloads-in-pngs-how-to-inject-php-code-in-an-image-and-keep-it-there" 909 * @see "https://cheatsheetseries.owasp.org/cheatsheets/File_Upload_Cheat_Sheet.html" 910 * @see "https://github.com/righettod/document-upload-protection/blob/master/src/main/java/eu/righettod/poc/sanitizer/ImageDocumentSanitizerImpl.java" 911 * @see "https://exiftool.org/examples.html" 912 * @see "https://en.wikipedia.org/wiki/List_of_file_signatures" 913 * @see "https://hexed.it/" 914 * @see "https://github.com/sighook/pixload" 915 */ 916 public static boolean isImageSafe(String imageFilePath, List<String> imageAllowedMimeTypes) { 917 boolean isSafe = false; 918 Pattern payloadDetectionRegex = Pattern.compile("[<>${}`]+", Pattern.CASE_INSENSITIVE); 919 try { 920 File imgFile = new File(imageFilePath); 921 if (imgFile.exists() && imgFile.canRead() && imgFile.isFile() && !imageAllowedMimeTypes.isEmpty()) { 922 final byte[] imgBytes = Files.readAllBytes(imgFile.toPath()); 923 //Step 1: Check the mime type of the file against the allowed ones 924 ImageInfo imgInfo = Imaging.getImageInfo(imgBytes); 925 if (imageAllowedMimeTypes.contains(imgInfo.getMimeType())) { 926 //Step 2: Load the image into an object using the Image API 927 BufferedImage imgObject = Imaging.getBufferedImage(imgBytes); 928 if (imgObject != null && imgObject.getWidth() > 0 && imgObject.getHeight() > 0) { 929 //Step 3: Check the metadata if the image format support it - Highly experimental 930 List<String> metadataWithPayloads = new ArrayList<>(); 931 final ImageMetadata imgMetadata = Imaging.getMetadata(imgBytes); 932 if (imgMetadata != null) { 933 imgMetadata.getItems().forEach(item -> { 934 String metadata = item.toString(); 935 if (payloadDetectionRegex.matcher(metadata).find()) { 936 metadataWithPayloads.add(metadata); 937 } 938 }); 939 } 940 isSafe = metadataWithPayloads.isEmpty(); 941 } 942 } 943 } 944 } catch (Exception e) { 945 isSafe = false; 946 } 947 return isSafe; 948 } 949 950 /** 951 * Rewrite the input file to remove any embedded files that is not embedded using a methods supported by the official format of the file.<br> 952 * Example: a file can be embedded by adding it to the end of the source file, see the reference provided for details. 953 * 954 * @param inputFilePath Filename of the file to clean up. 955 * @param inputFileType Type of the file provided. 956 * @return A array of bytes with the cleaned file. 957 * @throws IllegalArgumentException If an invalid parameter is passed 958 * @throws Exception If any technical error during the cleaning processing 959 * @see "https://www.synacktiv.com/en/publications/persistent-php-payloads-in-pngs-how-to-inject-php-code-in-an-image-and-keep-it-there" 960 * @see "https://github.com/righettod/toolbox-pentest-web/tree/master/misc" 961 * @see "https://github.com/righettod/toolbox-pentest-web?tab=readme-ov-file#misc" 962 * @see "https://stackoverflow.com/a/13605411" 963 */ 964 public static byte[] sanitizeFile(String inputFilePath, InputFileType inputFileType) throws Exception { 965 ByteArrayOutputStream sanitizedContent = new ByteArrayOutputStream(); 966 File inputFile = new File(inputFilePath); 967 if (!inputFile.exists() || !inputFile.canRead() || !inputFile.isFile()) { 968 throw new IllegalArgumentException("Cannot read the content of the input file!"); 969 } 970 switch (inputFileType) { 971 case PDF -> { 972 try (PDDocument document = Loader.loadPDF(inputFile)) { 973 document.save(sanitizedContent); 974 } 975 } 976 case IMAGE -> { 977 // Load the original image 978 BufferedImage originalImage = ImageIO.read(inputFile); 979 String originalFormat = identifyMimeType(Files.readAllBytes(inputFile.toPath())).split("/")[1].trim(); 980 // Check that image has been successfully loaded 981 if (originalImage == null) { 982 throw new IOException("Cannot load the original image !"); 983 } 984 // Get current Width and Height of the image 985 int originalWidth = originalImage.getWidth(null); 986 int originalHeight = originalImage.getHeight(null); 987 // Resize the image by removing 1px on Width and Height 988 Image resizedImage = originalImage.getScaledInstance(originalWidth - 1, originalHeight - 1, Image.SCALE_SMOOTH); 989 // Resize the resized image by adding 1px on Width and Height - In fact set image to is initial size 990 Image initialSizedImage = resizedImage.getScaledInstance(originalWidth, originalHeight, Image.SCALE_SMOOTH); 991 // Save image to a bytes buffer 992 int bufferedImageType = BufferedImage.TYPE_INT_ARGB;//By default use a format supporting transparency 993 if ("jpeg".equalsIgnoreCase(originalFormat) || "bmp".equalsIgnoreCase(originalFormat)) { 994 bufferedImageType = BufferedImage.TYPE_INT_RGB; 995 } 996 BufferedImage sanitizedImage = new BufferedImage(initialSizedImage.getWidth(null), initialSizedImage.getHeight(null), bufferedImageType); 997 Graphics2D drawer = sanitizedImage.createGraphics(); 998 drawer.drawImage(initialSizedImage, 0, 0, null); 999 drawer.dispose(); 1000 ImageIO.write(sanitizedImage, originalFormat, sanitizedContent); 1001 } 1002 default -> throw new IllegalArgumentException("Type of file not supported !"); 1003 } 1004 if (sanitizedContent.size() == 0) { 1005 throw new IOException("An error occur during the rewrite operation!"); 1006 } 1007 return sanitizedContent.toByteArray(); 1008 } 1009 1010 /** 1011 * Apply a collection of validations on a string expected to be an email address: 1012 * <ul> 1013 * <li>Is a valid email address, from a parser perspective, following RFCs on email addresses.</li> 1014 * <li>Is not using "Encoded-word" format.</li> 1015 * <li>Is not using comment format.</li> 1016 * <li>Is not using "Punycode" format.</li> 1017 * <li>Is not using UUCP style addresses.</li> 1018 * <li>Is not using address literals.</li> 1019 * <li>Is not using source routes.</li> 1020 * <li>Is not using the "percent hack".</li> 1021 * </ul><br> 1022 * This is based on the research work from <a href="https://portswigger.net/research/gareth-heyes">Gareth Heyes</a> added in references (Portswigger).<br><br> 1023 * 1024 * <b>Note:</b> The notion of valid, here, is to take from a secure usage of the data perspective. 1025 * 1026 * @param addr String expected to be a valid email address. 1027 * @return True only if the string pass all validations. 1028 * @see "https://commons.apache.org/proper/commons-validator/" 1029 * @see "https://commons.apache.org/proper/commons-validator/apidocs/org/apache/commons/validator/routines/EmailValidator.html" 1030 * @see "https://datatracker.ietf.org/doc/html/rfc2047#section-2" 1031 * @see "https://portswigger.net/research/splitting-the-email-atom" 1032 * @see "https://www.jochentopf.com/email/address.html" 1033 * @see "https://en.wikipedia.org/wiki/Email_address" 1034 */ 1035 public static boolean isEmailAddress(String addr) { 1036 boolean isValid = false; 1037 String work = addr.toLowerCase(Locale.ROOT); 1038 Pattern encodedWordRegex = Pattern.compile("[=?]+", Pattern.CASE_INSENSITIVE); 1039 Pattern forbiddenCharacterRegex = Pattern.compile("[():!%\\[\\],;]+", Pattern.CASE_INSENSITIVE); 1040 try { 1041 //Start with the use of the dedicated EmailValidator from Apache Commons Validator 1042 if (EmailValidator.getInstance(true, true).isValid(work)) { 1043 //If OK then validate it does not contains "Encoded-word" patterns using an aggressive approach 1044 if (!encodedWordRegex.matcher(work).find()) { 1045 //If OK then validate it does not contains punycode 1046 if (!work.contains("xn--")) { 1047 //If OK then validate it does not use: 1048 // UUCP style addresses, 1049 // Comment format, 1050 // Address literals, 1051 // Source routes, 1052 // The percent hack. 1053 if (!forbiddenCharacterRegex.matcher(work).find()) { 1054 isValid = true; 1055 } 1056 } 1057 } 1058 } 1059 } catch (Exception e) { 1060 isValid = false; 1061 } 1062 return isValid; 1063 } 1064 1065 /** 1066 * The <a href="https://www.stet.eu/en/psd2/">PSD2 STET</a> specification require to use <a href="https://datatracker.ietf.org/doc/draft-cavage-http-signatures/">HTTP Signature</a>. 1067 * <br> 1068 * Section <b>3.5.1.2</b> of the document <a href="https://www.stet.eu/assets/files/PSD2/1-6-3/api-dsp2-stet-v1.6.3.1-part-1-framework.pdf">Documentation Framework</a> version <b>1.6.3</b>. 1069 * <br> 1070 * The problem is that, by design, the HTTP Signature specification is prone to blind SSRF. 1071 * <br> 1072 * URL example taken from the STET specification: <code>https://path.to/myQsealCertificate_714f8154ec259ac40b8a9786c9908488b2582b68b17e865fede4636d726b709f</code>. 1073 * <br> 1074 * The objective of this code is to try to decrease the "exploitability/interest" of this SSRF for an attacker. 1075 * 1076 * @param certificateUrl Url pointing to a Qualified Certificate (QSealC) encoded in PEM format and respecting the ETSI/TS119495 technical Specification . 1077 * @return TRUE only if the url point to a Qualified Certificate in PEM format. 1078 * @see "https://www.stet.eu/en/psd2/" 1079 * @see "https://www.stet.eu/assets/files/PSD2/1-6-3/api-dsp2-stet-v1.6.3.1-part-1-framework.pdf" 1080 * @see "https://datatracker.ietf.org/doc/draft-cavage-http-signatures/" 1081 * @see "https://datatracker.ietf.org/doc/rfc9421/" 1082 * @see "https://openjdk.org/groups/net/httpclient/intro.html" 1083 * @see "https://docs.oracle.com/en/java/javase/21/docs/api/java.net.http/java/net/http/package-summary.html" 1084 * @see "https://portswigger.net/web-security/ssrf" 1085 * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control" 1086 */ 1087 public static boolean isPSD2StetSafeCertificateURL(String certificateUrl) { 1088 boolean isValid = false; 1089 long connectionTimeoutInSeconds = 10; 1090 String userAgent = "PSD2-STET-HTTPSignature-CertificateRequest"; 1091 try { 1092 //1. Ensure that the URL end with the SHA-256 fingerprint encoded in HEX of the certificate like requested by STET 1093 if (certificateUrl != null && certificateUrl.lastIndexOf("_") != -1) { 1094 String digestPart = certificateUrl.substring(certificateUrl.lastIndexOf("_") + 1); 1095 if (Pattern.matches("^[0-9a-f]{64}$", digestPart)) { 1096 //2. Ensure that the URL is a valid url by creating a instance of the class URI 1097 URI uri = URI.create(certificateUrl); 1098 //3. Require usage of HTTPS and reject any url containing query parameters 1099 if ("https".equalsIgnoreCase(uri.getScheme()) && uri.getQuery() == null) { 1100 //4. Perform a HTTP HEAD request in order to get the content type of the remote resource 1101 //and limit the interest to use the SSRF because to pass the check the url need to: 1102 //- Do not having any query parameters. 1103 //- Use HTTPS protocol. 1104 //- End with a string having the format "_[0-9a-f]{64}". 1105 //- Trigger the malicious action that the attacker want but with a HTTP HEAD without any redirect and parameters. 1106 HttpResponse<String> response; 1107 try (HttpClient client = HttpClient.newBuilder().followRedirects(HttpClient.Redirect.NEVER).build()) { 1108 HttpRequest request = HttpRequest.newBuilder().uri(uri).timeout(Duration.ofSeconds(connectionTimeoutInSeconds)).method("HEAD", HttpRequest.BodyPublishers.noBody()).header("User-Agent", userAgent)//To provide an hint to the target about the initiator of the request 1109 .header("Cache-Control", "no-store, max-age=0")//To prevent caching issues or abuses 1110 .build(); 1111 response = client.send(request, HttpResponse.BodyHandlers.ofString()); 1112 if (response.statusCode() == 200) { 1113 //5. Ensure that the response content type is "text/plain" 1114 Optional<String> contentType = response.headers().firstValue("Content-Type"); 1115 isValid = (contentType.isPresent() && contentType.get().trim().toLowerCase(Locale.ENGLISH).startsWith("text/plain")); 1116 } 1117 } 1118 } 1119 } 1120 } 1121 } catch (Exception e) { 1122 isValid = false; 1123 } 1124 return isValid; 1125 } 1126 1127 /** 1128 * Perform sequential URL decoding operations against a URL encoded data until the data is not URL encoded anymore or if the specified threshold is reached. 1129 * 1130 * @param encodedData URL encoded data. 1131 * @param decodingRoundThreshold Threshold above which decoding will fail. 1132 * @return The decoded data. 1133 * @throws SecurityException If the threshold is reached. 1134 * @see "https://en.wikipedia.org/wiki/Percent-encoding" 1135 * @see "https://owasp.org/www-community/Double_Encoding" 1136 * @see "https://portswigger.net/web-security/essential-skills/obfuscating-attacks-using-encodings" 1137 * @see "https://capec.mitre.org/data/definitions/120.html" 1138 */ 1139 public static String applyURLDecoding(String encodedData, int decodingRoundThreshold) throws SecurityException { 1140 if (decodingRoundThreshold < 1) { 1141 throw new IllegalArgumentException("Threshold must be a positive number !"); 1142 } 1143 if (encodedData == null) { 1144 throw new IllegalArgumentException("Data provided must not be null !"); 1145 } 1146 Charset charset = StandardCharsets.UTF_8; 1147 int currentDecodingRound = 0; 1148 boolean isFinished = false; 1149 String currentRoundData = encodedData; 1150 String previousRoundData = encodedData; 1151 while (!isFinished) { 1152 if (currentDecodingRound > decodingRoundThreshold) { 1153 throw new SecurityException(String.format("Decoding round threshold of %s reached!", decodingRoundThreshold)); 1154 } 1155 currentRoundData = URLDecoder.decode(currentRoundData, charset); 1156 isFinished = currentRoundData.equals(previousRoundData); 1157 previousRoundData = currentRoundData; 1158 currentDecodingRound++; 1159 } 1160 return currentRoundData; 1161 } 1162 1163 /** 1164 * Apply a collection of validations on a string expected to be an system file/folder path: 1165 * <ul> 1166 * <li>Does not contains path traversal payload.</li> 1167 * <li>The canonical path is equals to the absolute path.</li> 1168 * </ul><br> 1169 * 1170 * @param path String expected to be a valid system file/folder path. 1171 * @return True only if the string pass all validations. 1172 * @see "https://portswigger.net/web-security/file-path-traversal" 1173 * @see "https://learn.snyk.io/lesson/directory-traversal/" 1174 * @see "https://capec.mitre.org/data/definitions/126.html" 1175 * @see "https://owasp.org/www-community/attacks/Path_Traversal" 1176 */ 1177 public static boolean isPathSafe(String path) { 1178 boolean isSafe = false; 1179 int decodingRoundThreshold = 3; 1180 try { 1181 if (path != null && !path.isEmpty()) { 1182 //URL decode the path if case of data coming from a web context 1183 String decodedPath = applyURLDecoding(path, decodingRoundThreshold); 1184 //Ensure that no path traversal expression is present 1185 if (!decodedPath.contains("..")) { 1186 File f = new File(decodedPath); 1187 String canonicalPath = f.getCanonicalPath(); 1188 String absolutePath = f.getAbsolutePath(); 1189 isSafe = canonicalPath.equals(absolutePath); 1190 } 1191 } 1192 } catch (Exception e) { 1193 isSafe = false; 1194 } 1195 return isSafe; 1196 } 1197 1198 /** 1199 * Identify if an XML contains any XML comments or have any XSL processing instructions.<br> 1200 * Stream reader based parsing is used to support large XML tree. 1201 * 1202 * @param xmlFilePath Filename of the XML file to check. 1203 * @return True only if XML comments or XSL processing instructions are identified. 1204 * @see "https://www.tutorialspoint.com/xml/xml_processing.htm" 1205 * @see "https://docs.oracle.com/en/java/javase/21/docs/api/java.xml/javax/xml/stream/XMLInputFactory.html" 1206 * @see "https://portswigger.net/kb/issues/00400700_xml-entity-expansion" 1207 * @see "https://www.w3.org/Style/styling-XML.en.html" 1208 */ 1209 public static boolean isXMLHaveCommentsOrXSLProcessingInstructions(String xmlFilePath) { 1210 boolean itemsDetected = false; 1211 try { 1212 //Ensure that the parser will not be prone XML external entity (XXE) injection or XML entity expansion (XEE) attacks 1213 XMLInputFactory xmlInputFactory = XMLInputFactory.newFactory(); 1214 xmlInputFactory.setProperty(XMLInputFactory.SUPPORT_DTD, false); 1215 xmlInputFactory.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, ""); 1216 xmlInputFactory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, false); 1217 xmlInputFactory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false); 1218 1219 //Parse file 1220 try (FileInputStream fis = new FileInputStream(xmlFilePath)) { 1221 XMLStreamReader reader = xmlInputFactory.createXMLStreamReader(fis); 1222 int eventType; 1223 while (reader.hasNext() && !itemsDetected) { 1224 eventType = reader.next(); 1225 if (eventType == XMLEvent.COMMENT) { 1226 itemsDetected = true; 1227 } else if (eventType == XMLEvent.PROCESSING_INSTRUCTION && "xml-stylesheet".equalsIgnoreCase(reader.getPITarget())) { 1228 itemsDetected = true; 1229 } 1230 } 1231 } 1232 } catch (Exception e) { 1233 //In case of error then assume that the check failed 1234 itemsDetected = true; 1235 } 1236 return itemsDetected; 1237 } 1238 1239 1240 /** 1241 * Perform a set of additional validations against a JWT token: 1242 * <ul> 1243 * <li>Do not use the <b>NONE</b> signature algorithm.</li> 1244 * <li>Have a <a href="https://www.iana.org/assignments/jwt/jwt.xhtml">EXP claim</a> defined.</li> 1245 * <li>The token identifier (<a href="https://www.iana.org/assignments/jwt/jwt.xhtml">JTI claim</a>) is NOT part of the list of revoked token.</li> 1246 * <li>Match the expected type of token: ACCESS or ID or REFRESH.</li> 1247 * </ul> 1248 * 1249 * @param token JWT token for which <b>signature was already validated</b> and on which a set of additional validations will be applied. 1250 * @param expectedTokenType The type of expected token using the enumeration provided. 1251 * @param revokedTokenJTIList A list of token identifier (<b>JTI</b> claim) referring to tokens that were revoked and to which the JTI claim of the token will be compared to. 1252 * @return True only the token pass all the validations. 1253 * @see "https://www.iana.org/assignments/jwt/jwt.xhtml" 1254 * @see "https://auth0.com/docs/secure/tokens/access-tokens" 1255 * @see "https://auth0.com/docs/secure/tokens/id-tokens" 1256 * @see "https://auth0.com/docs/secure/tokens/refresh-tokens" 1257 * @see "https://auth0.com/blog/id-token-access-token-what-is-the-difference/" 1258 * @see "https://jwt.io/libraries?language=Java" 1259 * @see "https://pentesterlab.com/blog/secure-jwt-library-design" 1260 * @see "https://github.com/auth0/java-jwt" 1261 */ 1262 public static boolean applyJWTExtraValidation(DecodedJWT token, TokenType expectedTokenType, List<String> revokedTokenJTIList) { 1263 boolean isValid = false; 1264 TokenType tokenType; 1265 try { 1266 if (!"none".equalsIgnoreCase(token.getAlgorithm().trim())) { 1267 if (!token.getClaim("exp").isMissing() && token.getExpiresAt() != null) { 1268 String jti = token.getId(); 1269 if (jti != null && !jti.trim().isEmpty()) { 1270 boolean jtiIsRevoked = revokedTokenJTIList.stream().anyMatch(jti::equalsIgnoreCase); 1271 if (!jtiIsRevoked) { 1272 //Determine the token type based on the presence of specifics claims 1273 if (!token.getClaim("scope").isMissing()) { 1274 tokenType = TokenType.ACCESS; 1275 } else if (!token.getClaim("name").isMissing() || !token.getClaim("email").isMissing()) { 1276 tokenType = TokenType.ID; 1277 } else { 1278 tokenType = TokenType.REFRESH; 1279 } 1280 isValid = (tokenType.equals(expectedTokenType)); 1281 } 1282 } 1283 } 1284 } 1285 1286 } catch (Exception e) { 1287 //In case of error then assume that the check failed 1288 isValid = false; 1289 } 1290 return isValid; 1291 } 1292 1293 /** 1294 * Apply a validations on a regular expression to ensure that is not prone to the ReDOS attack. 1295 * <br>If your technology is supported by <a href="https://github.com/doyensec/regexploit">regexploit</a> then <b>use it instead of this method!</b> 1296 * <br>Indeed, the <a href="https://www.doyensec.com/">Doyensec</a> team has made an intensive and amazing work on this topic and created this effective tool. 1297 * 1298 * @param regex String expected to be a valid regular expression (regex). 1299 * @param data Test data on which the regular expression is executed for the test. 1300 * @param maximumRunningTimeInSeconds Optional parameter to specify a number of seconds above which a regex execution time is considered as not safe (default to 4 seconds when not specified). 1301 * @return True only if the string pass all validations. 1302 * @see "https://github.blog/security/how-to-fix-a-redos/" 1303 * @see "https://learn.snyk.io/lesson/redos" 1304 * @see "https://rules.sonarsource.com/java/RSPEC-2631/" 1305 * @see "https://github.com/doyensec/regexploit" 1306 * @see "https://wiki.owasp.org/images/2/23/OWASP_IL_2009_ReDoS.pdf" 1307 * @see "https://owasp.org/www-community/attacks/Regular_expression_Denial_of_Service_-_ReDoS" 1308 */ 1309 public static boolean isRegexSafe(String regex, String data, Optional<Integer> maximumRunningTimeInSeconds) { 1310 Objects.requireNonNull(maximumRunningTimeInSeconds, "Use 'Optional.empty()' to leverage the default value."); 1311 Objects.requireNonNull(data, "A sample data is needed to perform the test."); 1312 Objects.requireNonNull(regex, "A regular expression is needed to perform the test."); 1313 boolean isSafe = false; 1314 int executionTimeout = maximumRunningTimeInSeconds.orElse(4); 1315 ExecutorService executor = Executors.newSingleThreadExecutor(); 1316 try { 1317 Callable<Boolean> task = () -> { 1318 Pattern pattern = Pattern.compile(regex); 1319 return pattern.matcher(data).matches(); 1320 }; 1321 List<Future<Boolean>> tasks = executor.invokeAll(List.of(task), executionTimeout, TimeUnit.SECONDS); 1322 if (!tasks.getFirst().isCancelled()) { 1323 isSafe = true; 1324 } 1325 } catch (Exception e) { 1326 isSafe = false; 1327 } finally { 1328 executor.shutdownNow(); 1329 } 1330 return isSafe; 1331 } 1332 1333 /** 1334 * Compute a UUID version 7 without using any external dependency.<br><br> 1335 * <b>Below are my personal point of view and perhaps I'm totally wrong!</b> 1336 * <br><br> 1337 * Why such method? 1338 * <ul> 1339 * <li>Java inferior or equals to 21 does not supports natively the generation of an UUID version 7.</li> 1340 * <li>Import a library just to generate such value is overkill for me.</li> 1341 * <li>Library that I have found, generating such version of an UUID, are not provided by entities commonly used in the java world, such as the SPRING framework provider.</li> 1342 * </ul> 1343 * <br> 1344 * <b>Full credits for this implementation goes to the authors and contributors of the <a href="https://github.com/nalgeon/uuidv7">UUIDv7</a> project.</b> 1345 * <br><br> 1346 * Below are the java libraries that I have found but, for which, I do not trust enough the provider to use them directly: 1347 * <ul> 1348 * <li><a href="https://github.com/cowtowncoder/java-uuid-generator">java-uuid-generator</a></li> 1349 * <li><a href="https://github.com/f4b6a3/uuid-creator">uuid-creator</a></li> 1350 * </ul> 1351 * 1352 * @return A UUID object representing the UUID v7. 1353 * @see "https://uuid7.com/" 1354 * @see "https://antonz.org/uuidv7/" 1355 * @see "https://mccue.dev/pages/3-11-25-life-altering-postgresql-patterns" 1356 * @see "https://www.ietf.org/archive/id/draft-peabody-dispatch-new-uuid-format-04.html#name-uuid-version-7" 1357 * @see "https://www.baeldung.com/java-generating-time-based-uuids" 1358 * @see "https://en.wikipedia.org/wiki/Universally_unique_identifier" 1359 * @see "https://buildkite.com/resources/blog/goodbye-integers-hello-uuids/" 1360 */ 1361 public static UUID computeUUIDv7() { 1362 SecureRandom secureRandom = new SecureRandom(); 1363 // Generate truly random bytes 1364 byte[] value = new byte[16]; 1365 secureRandom.nextBytes(value); 1366 // Get current timestamp in milliseconds 1367 ByteBuffer timestamp = ByteBuffer.allocate(Long.BYTES); 1368 timestamp.putLong(System.currentTimeMillis()); 1369 // Create the TIMESTAMP part of the UUID 1370 System.arraycopy(timestamp.array(), 2, value, 0, 6); 1371 // Create the VERSION and the VARIANT parts of the UUID 1372 value[6] = (byte) ((value[6] & 0x0F) | 0x70); 1373 value[8] = (byte) ((value[8] & 0x3F) | 0x80); 1374 //Create the HIGH and LOW parts of the UUID 1375 ByteBuffer buf = ByteBuffer.wrap(value); 1376 long high = buf.getLong(); 1377 long low = buf.getLong(); 1378 //Create and return the UUID object 1379 UUID uuidv7 = new UUID(high, low); 1380 return uuidv7; 1381 } 1382}