001package eu.righettod; 002 003 004import com.auth0.jwt.interfaces.DecodedJWT; 005import org.apache.commons.csv.CSVFormat; 006import org.apache.commons.csv.CSVRecord; 007import org.apache.commons.imaging.ImageInfo; 008import org.apache.commons.imaging.Imaging; 009import org.apache.commons.imaging.common.ImageMetadata; 010import org.apache.commons.validator.routines.EmailValidator; 011import org.apache.commons.validator.routines.InetAddressValidator; 012import org.apache.pdfbox.Loader; 013import org.apache.pdfbox.pdmodel.PDDocument; 014import org.apache.pdfbox.pdmodel.PDDocumentCatalog; 015import org.apache.pdfbox.pdmodel.PDDocumentInformation; 016import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary; 017import org.apache.pdfbox.pdmodel.common.PDMetadata; 018import org.apache.pdfbox.pdmodel.interactive.action.*; 019import org.apache.pdfbox.pdmodel.interactive.annotation.AnnotationFilter; 020import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; 021import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationLink; 022import org.apache.poi.poifs.filesystem.DirectoryEntry; 023import org.apache.poi.poifs.filesystem.POIFSFileSystem; 024import org.apache.poi.poifs.macros.VBAMacroReader; 025import org.apache.tika.detect.DefaultDetector; 026import org.apache.tika.detect.Detector; 027import org.apache.tika.io.TemporaryResources; 028import org.apache.tika.io.TikaInputStream; 029import org.apache.tika.metadata.Metadata; 030import org.apache.tika.mime.MediaType; 031import org.apache.tika.mime.MimeTypes; 032import org.w3c.dom.Document; 033import org.xml.sax.EntityResolver; 034import org.xml.sax.InputSource; 035import org.xml.sax.SAXException; 036 037import javax.crypto.Mac; 038import javax.crypto.spec.SecretKeySpec; 039import javax.imageio.ImageIO; 040import javax.json.Json; 041import javax.json.JsonReader; 042import javax.xml.XMLConstants; 043import javax.xml.parsers.DocumentBuilder; 044import javax.xml.parsers.DocumentBuilderFactory; 045import javax.xml.parsers.ParserConfigurationException; 046import javax.xml.stream.XMLInputFactory; 047import javax.xml.stream.XMLStreamReader; 048import javax.xml.stream.events.XMLEvent; 049import java.awt.*; 050import java.awt.image.BufferedImage; 051import java.io.*; 052import java.net.*; 053import java.net.http.HttpClient; 054import java.net.http.HttpRequest; 055import java.net.http.HttpResponse; 056import java.nio.charset.Charset; 057import java.nio.charset.StandardCharsets; 058import java.nio.file.Files; 059import java.security.MessageDigest; 060import java.time.Duration; 061import java.util.List; 062import java.util.*; 063import java.util.concurrent.atomic.AtomicInteger; 064import java.util.regex.Pattern; 065import java.util.zip.ZipEntry; 066import java.util.zip.ZipFile; 067 068/** 069 * Provides different utilities methods to apply processing from a security perspective.<br> 070 * These code snippet: 071 * <ul> 072 * <li>Can be used, as "foundation", to customize the validation to the app context.</li> 073 * <li>Were implemented in a way to facilitate adding or removal of validations depending on usage context.</li> 074 * <li>Were centralized on one class to be able to enhance them across time as well as <a href="https://github.com/righettod/code-snippets-security-utils/issues">missing case/bug identification</a>.</li> 075 * </ul> 076 */ 077public class SecurityUtils { 078 079 /** 080 * Default constructor: Not needed as the class only provides static methods. 081 */ 082 private SecurityUtils() { 083 } 084 085 /** 086 * Apply a collection of validation to verify if a provided PIN code is considered weak (easy to guess) or none.<br> 087 * This method consider that format of the PIN code is [0-9]{6,}<br> 088 * Rule to consider a PIN code as weak: 089 * <ul> 090 * <li>Length is inferior to 6 positions.</li> 091 * <li>Contain only the same number or only a sequence of zero.</li> 092 * <li>Contain sequence of following incremental or decremental numbers.</li> 093 * </ul> 094 * 095 * @param pinCode PIN code to verify. 096 * @return True only if the PIN is considered as weak. 097 */ 098 public static boolean isWeakPINCode(String pinCode) { 099 boolean isWeak = true; 100 //Length is inferior to 6 positions 101 //Use "Long.parseLong(pinCode)" to cause a NumberFormatException if the PIN is not a numeric one 102 //and to ensure that the PIN is not only a sequence of zero 103 if (pinCode != null && Long.parseLong(pinCode) > 0 && pinCode.trim().length() > 5) { 104 //Contain only the same number 105 String regex = String.format("^[%s]{%s}$", pinCode.charAt(0), pinCode.length()); 106 if (!Pattern.matches(regex, pinCode)) { 107 //Contain sequence of following incremental or decremental numbers 108 char previousChar = 'X'; 109 boolean containSequence = false; 110 for (char c : pinCode.toCharArray()) { 111 if (previousChar != 'X') { 112 int previousNbr = Integer.parseInt(String.valueOf(previousChar)); 113 int currentNbr = Integer.parseInt(String.valueOf(c)); 114 if (currentNbr == (previousNbr - 1) || currentNbr == (previousNbr + 1)) { 115 containSequence = true; 116 break; 117 } 118 } 119 previousChar = c; 120 } 121 if (!containSequence) { 122 isWeak = false; 123 } 124 } 125 } 126 return isWeak; 127 } 128 129 /** 130 * Apply a collection of validations on a Word 97-2003 (binary format) document file provided: 131 * <ul> 132 * <li>Real Microsoft Word 97-2003 document file.</li> 133 * <li>No VBA Macro.<br></li> 134 * <li>No embedded objects.</li> 135 * </ul> 136 * 137 * @param wordFilePath Filename of the Word document file to check. 138 * @return True only if the file pass all validations. 139 * @see "https://poi.apache.org/components/" 140 * @see "https://poi.apache.org/components/document/" 141 * @see "https://poi.apache.org/components/poifs/how-to.html" 142 * @see "https://poi.apache.org/components/poifs/embeded.html" 143 * @see "https://poi.apache.org/" 144 * @see "https://mvnrepository.com/artifact/org.apache.poi/poi" 145 */ 146 public static boolean isWord972003DocumentSafe(String wordFilePath) { 147 boolean isSafe = false; 148 try { 149 File wordFile = new File(wordFilePath); 150 if (wordFile.exists() && wordFile.canRead() && wordFile.isFile()) { 151 //Step 1: Try to load the file, if its fail then it imply that is not a valid Word 97-2003 format file 152 try (POIFSFileSystem fs = new POIFSFileSystem(wordFile)) { 153 //Step 2: Check if the document contains VBA macros, in our case is not allowed 154 VBAMacroReader macroReader = new VBAMacroReader(fs); 155 Map<String, String> macros = macroReader.readMacros(); 156 if (macros == null || macros.isEmpty()) { 157 //Step 3: Check if the document contains any embedded objects, in our case is not allowed 158 //From POI documentation: 159 //Word normally stores embedded files in subdirectories of the ObjectPool directory, itself a subdirectory of the filesystem root. 160 //Typically, these subdirectories and named starting with an underscore, followed by 10 numbers. 161 final List<String> embeddedObjectFound = new ArrayList<>(); 162 DirectoryEntry root = fs.getRoot(); 163 if (root.getEntryCount() > 0) { 164 root.iterator().forEachRemaining(entry -> { 165 if ("ObjectPool".equalsIgnoreCase(entry.getName()) && entry instanceof DirectoryEntry) { 166 DirectoryEntry objPoolDirectory = (DirectoryEntry) entry; 167 if (objPoolDirectory.getEntryCount() > 0) { 168 objPoolDirectory.iterator().forEachRemaining(objPoolDirectoryEntry -> { 169 if (objPoolDirectoryEntry instanceof DirectoryEntry) { 170 DirectoryEntry objPoolDirectoryEntrySubDirectoryEntry = (DirectoryEntry) objPoolDirectoryEntry; 171 if (objPoolDirectoryEntrySubDirectoryEntry.getEntryCount() > 0) { 172 objPoolDirectoryEntrySubDirectoryEntry.forEach(objPoolDirectoryEntrySubDirectoryEntryEntry -> { 173 if (objPoolDirectoryEntrySubDirectoryEntryEntry.isDocumentEntry()) { 174 embeddedObjectFound.add(objPoolDirectoryEntrySubDirectoryEntryEntry.getName()); 175 } 176 }); 177 } 178 } 179 }); 180 } 181 } 182 }); 183 } 184 isSafe = embeddedObjectFound.isEmpty(); 185 } 186 } 187 } 188 } catch (Exception e) { 189 isSafe = false; 190 } 191 return isSafe; 192 } 193 194 /** 195 * Ensure that an XML file does not contain any External Entity, DTD or XInclude instructions. 196 * 197 * @param xmlFilePath Filename of the XML file to check. 198 * @return True only if the file pass all validations. 199 * @see "https://portswigger.net/web-security/xxe" 200 * @see "https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html#java" 201 * @see "https://docs.oracle.com/en/java/javase/13/security/java-api-xml-processing-jaxp-security-guide.html#GUID-82F8C206-F2DF-4204-9544-F96155B1D258" 202 * @see "https://www.w3.org/TR/xinclude-11/" 203 * @see "https://en.wikipedia.org/wiki/XInclude" 204 */ 205 public static boolean isXMLSafe(String xmlFilePath) { 206 boolean isSafe = false; 207 try { 208 File xmlFile = new File(xmlFilePath); 209 if (xmlFile.exists() && xmlFile.canRead() && xmlFile.isFile()) { 210 //Step 1a: Verify that the XML file content does not contain any XInclude instructions 211 boolean containXInclude = Files.readAllLines(xmlFile.toPath()).stream().anyMatch(line -> line.toLowerCase(Locale.ROOT).contains(":include ")); 212 if (!containXInclude) { 213 //Step 1b: Parse the XML file, if an exception occur than it's imply that the XML specified is not a valid ones 214 //Create an XML document builder throwing Exception if a DOCTYPE instruction is present 215 DocumentBuilderFactory dbfInstance = DocumentBuilderFactory.newInstance(); 216 dbfInstance.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); 217 //Xerces 2 only 218 //dbfInstance.setFeature("http://xerces.apache.org/xerces2-j/features.html#disallow-doctype-decl",true); 219 dbfInstance.setXIncludeAware(false); 220 DocumentBuilder builder = dbfInstance.newDocumentBuilder(); 221 //Parse the document 222 Document doc = builder.parse(xmlFile); 223 isSafe = (doc != null && doc.getDocumentElement() != null); 224 } 225 } 226 } catch (Exception e) { 227 isSafe = false; 228 } 229 return isSafe; 230 } 231 232 233 /** 234 * Extract all URL links from a PDF file provided.<br> 235 * This can be used to apply validation on a PDF against contained links. 236 * 237 * @param pdfFilePath pdfFilePath Filename of the PDF file to process. 238 * @return A List of URL objects that is empty if no links is found. 239 * @throws Exception If any error occurs during the processing of the PDF file. 240 * @see "https://www.gushiciku.cn/pl/21KQ" 241 * @see "https://pdfbox.apache.org/" 242 * @see "https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox" 243 */ 244 public static List<URL> extractAllPDFLinks(String pdfFilePath) throws Exception { 245 final List<URL> links = new ArrayList<>(); 246 File pdfFile = new File(pdfFilePath); 247 try (PDDocument document = Loader.loadPDF(pdfFile)) { 248 PDDocumentCatalog documentCatalog = document.getDocumentCatalog(); 249 AnnotationFilter actionURIAnnotationFilter = new AnnotationFilter() { 250 @Override 251 public boolean accept(PDAnnotation annotation) { 252 boolean keep = false; 253 if (annotation instanceof PDAnnotationLink) { 254 keep = (((PDAnnotationLink) annotation).getAction() instanceof PDActionURI); 255 } 256 return keep; 257 } 258 }; 259 documentCatalog.getPages().forEach(page -> { 260 try { 261 page.getAnnotations(actionURIAnnotationFilter).forEach(annotation -> { 262 PDActionURI linkAnnotation = (PDActionURI) ((PDAnnotationLink) annotation).getAction(); 263 try { 264 URL urlObj = new URL(linkAnnotation.getURI()); 265 if (!links.contains(urlObj)) { 266 links.add(urlObj); 267 } 268 } catch (MalformedURLException e) { 269 throw new RuntimeException(e); 270 } 271 }); 272 } catch (Exception e) { 273 throw new RuntimeException(e); 274 } 275 }); 276 } 277 return links; 278 } 279 280 /** 281 * Apply a collection of validations on a PDF file provided: 282 * <ul> 283 * <li>Real PDF file.</li> 284 * <li>No attachments.</li> 285 * <li>No Javascript code.</li> 286 * <li>No links using action of type URI/Launch/RemoteGoTo/ImportData.</li> 287 * </ul> 288 * 289 * @param pdfFilePath Filename of the PDF file to check. 290 * @return True only if the file pass all validations. 291 * @see "https://stackoverflow.com/a/36161267" 292 * @see "https://www.gushiciku.cn/pl/21KQ" 293 * @see "https://github.com/jonaslejon/malicious-pdf" 294 * @see "https://pdfbox.apache.org/" 295 * @see "https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox" 296 */ 297 public static boolean isPDFSafe(String pdfFilePath) { 298 boolean isSafe = false; 299 try { 300 File pdfFile = new File(pdfFilePath); 301 if (pdfFile.exists() && pdfFile.canRead() && pdfFile.isFile()) { 302 //Step 1: Try to load the file, if its fail then it imply that is not a valid PDF file 303 try (PDDocument document = Loader.loadPDF(pdfFile)) { 304 //Step 2: Check if the file contains attached files, in our case is not allowed 305 PDDocumentCatalog documentCatalog = document.getDocumentCatalog(); 306 PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary(documentCatalog); 307 if (namesDictionary.getEmbeddedFiles() == null) { 308 //Step 3: Check if the file contains Javascript code, in our case is not allowed 309 if (namesDictionary.getJavaScript() == null) { 310 //Step 4: Check if the file contains links using action of type URI/Launch/RemoteGoTo/ImportData, in our case is not allowed 311 final List<Integer> notAllowedAnnotationCounterList = new ArrayList<>(); 312 AnnotationFilter notAllowedAnnotationFilter = new AnnotationFilter() { 313 @Override 314 public boolean accept(PDAnnotation annotation) { 315 boolean keep = false; 316 if (annotation instanceof PDAnnotationLink) { 317 PDAnnotationLink link = (PDAnnotationLink) annotation; 318 PDAction action = link.getAction(); 319 if ((action instanceof PDActionURI) || (action instanceof PDActionLaunch) || (action instanceof PDActionRemoteGoTo) || (action instanceof PDActionImportData)) { 320 keep = true; 321 } 322 } 323 return keep; 324 } 325 }; 326 documentCatalog.getPages().forEach(page -> { 327 try { 328 notAllowedAnnotationCounterList.add(page.getAnnotations(notAllowedAnnotationFilter).size()); 329 } catch (IOException e) { 330 throw new RuntimeException(e); 331 } 332 }); 333 if (notAllowedAnnotationCounterList.stream().reduce(0, Integer::sum) == 0) { 334 isSafe = true; 335 } 336 } 337 } 338 } 339 } 340 } catch (Exception e) { 341 isSafe = false; 342 } 343 return isSafe; 344 } 345 346 /** 347 * Remove as much as possible metadata from the provided PDF document object. 348 * 349 * @param document PDFBox PDF document object on which metadata must be removed. 350 * @see "https://gist.github.com/righettod/d7e07443c43d393a39de741a0d920069" 351 * @see "https://pdfbox.apache.org/" 352 * @see "https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox" 353 */ 354 public static void clearPDFMetadata(PDDocument document) { 355 if (document != null) { 356 PDDocumentInformation infoEmpty = new PDDocumentInformation(); 357 document.setDocumentInformation(infoEmpty); 358 PDMetadata newMetadataEmpty = new PDMetadata(document); 359 document.getDocumentCatalog().setMetadata(newMetadataEmpty); 360 } 361 } 362 363 364 /** 365 * Validate that the URL provided is really a relative URL. 366 * 367 * @param targetUrl URL to validate. 368 * @return True only if the file pass all validations. 369 * @see "https://portswigger.net/web-security/ssrf" 370 * @see "https://stackoverflow.com/q/6785442" 371 */ 372 public static boolean isRelativeURL(String targetUrl) { 373 boolean isValid = false; 374 //Reject any URL encoded content and URL starting with a double slash 375 //Reject any URL contains credentials or fragment to prevent potential bypasses 376 String work = targetUrl; 377 if (!work.contains("%") && !work.contains("@") && !work.contains("#") && !work.startsWith("//")) { 378 //Creation of a URL object must fail 379 try { 380 new URL(work); 381 isValid = false; 382 } catch (MalformedURLException mf) { 383 //Last check to be sure (for prod usage compile the pattern one time) 384 isValid = Pattern.compile("^/[a-z0-9]+", Pattern.CASE_INSENSITIVE).matcher(work).find(); 385 } 386 } 387 return isValid; 388 } 389 390 /** 391 * Apply a collection of validations on a ZIP file provided: 392 * <ul> 393 * <li>Real ZIP file.</li> 394 * <li>Contain less than a specified level of deepness.</li> 395 * <li>Do not contain Zip-Slip entry path.</li> 396 * </ul> 397 * 398 * @param zipFilePath Filename of the ZIP file to check. 399 * @param maxLevelDeepness Threshold of deepness above which a ZIP archive will be rejected. 400 * @param rejectArchiveFile Flag to specify if presence of any archive entry will cause the rejection of the ZIP file. 401 * @return True only if the file pass all validations. 402 * @see "https://rules.sonarsource.com/java/type/Security%20Hotspot/RSPEC-5042" 403 * @see "https://security.snyk.io/research/zip-slip-vulnerability" 404 * @see "https://en.wikipedia.org/wiki/Zip_bomb" 405 * @see "https://github.com/ptoomey3/evilarc" 406 * @see "https://github.com/abdulfatir/ZipBomb" 407 * @see "https://www.baeldung.com/cs/zip-bomb" 408 * @see "https://thesecurityvault.com/attacks-with-zip-files-and-mitigations/" 409 * @see "https://wiki.sei.cmu.edu/confluence/display/java/IDS04-J.+Safely+extract+files+from+ZipInputStream" 410 */ 411 public static boolean isZIPSafe(String zipFilePath, int maxLevelDeepness, boolean rejectArchiveFile) { 412 List<String> archiveExtensions = Arrays.asList("zip", "tar", "7z", "gz", "jar", "phar", "bz2", "tgz"); 413 boolean isSafe = false; 414 try { 415 File zipFile = new File(zipFilePath); 416 if (zipFile.exists() && zipFile.canRead() && zipFile.isFile() && maxLevelDeepness > 0) { 417 //Step 1: Try to load the file, if its fail then it imply that is not a valid ZIP file 418 try (ZipFile zipArch = new ZipFile(zipFile)) { 419 //Step 2: Parse entries 420 long deepness = 0; 421 ZipEntry zipEntry; 422 String entryExtension; 423 String zipEntryName; 424 boolean validationsFailed = false; 425 Enumeration<? extends ZipEntry> entries = zipArch.entries(); 426 while (entries.hasMoreElements()) { 427 zipEntry = entries.nextElement(); 428 zipEntryName = zipEntry.getName(); 429 entryExtension = zipEntryName.substring(zipEntryName.lastIndexOf(".") + 1).toLowerCase(Locale.ROOT).trim(); 430 //Step 2a: Check if the current entry is an archive file 431 if (rejectArchiveFile && archiveExtensions.contains(entryExtension)) { 432 validationsFailed = true; 433 break; 434 } 435 //Step 2b: Check that level of deepness is inferior to the threshold specified 436 if (zipEntryName.contains("/")) { 437 //Determine deepness by inspecting the entry name. 438 //Indeed, folder will be represented like this: folder/folder/folder/ 439 //So we can count the number of "/" to identify the deepness of the entry 440 deepness = zipEntryName.chars().filter(ch -> ch == '/').count(); 441 if (deepness > maxLevelDeepness) { 442 validationsFailed = true; 443 break; 444 } 445 } 446 //Step 2c: Check if any entries match pattern of zip slip payload 447 if (zipEntryName.contains("..\\") || zipEntryName.contains("../")) { 448 validationsFailed = true; 449 break; 450 } 451 } 452 if (!validationsFailed) { 453 isSafe = true; 454 } 455 } 456 } 457 } catch (Exception e) { 458 isSafe = false; 459 } 460 return isSafe; 461 } 462 463 /** 464 * Identify the mime type of the content specified (array of bytes).<br> 465 * Note that it cannot be fully trusted (see the tweet '1595824709186519041' referenced), so, additional validations are required. 466 * 467 * @param content The content as an array of bytes. 468 * @return The mime type in lower case or null if it cannot be identified. 469 * @see "https://twitter.com/righettod/status/1595824709186519041" 470 * @see "https://tika.apache.org/" 471 * @see "https://mvnrepository.com/artifact/org.apache.tika/tika-core" 472 * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types" 473 * @see "https://www.iana.org/assignments/media-types/media-types.xhtml" 474 */ 475 public static String identifyMimeType(byte[] content) { 476 String mimeType = null; 477 if (content != null && content.length > 0) { 478 Detector detector = new DefaultDetector(MimeTypes.getDefaultMimeTypes()); 479 Metadata metadata = new Metadata(); 480 try { 481 try (TemporaryResources temporaryResources = new TemporaryResources(); TikaInputStream tikaInputStream = TikaInputStream.get(new ByteArrayInputStream(content), temporaryResources, metadata)) { 482 MediaType mt = detector.detect(tikaInputStream, metadata); 483 if (mt != null) { 484 mimeType = mt.toString().toLowerCase(Locale.ROOT); 485 } 486 } 487 } catch (IOException ioe) { 488 mimeType = null; 489 } 490 } 491 return mimeType; 492 } 493 494 /** 495 * Apply a collection of validations on a string expected to be an public IP address: 496 * <ul> 497 * <li>Is a valid IP v4 or v6 address.</li> 498 * <li>Is public from an Internet perspective.</li> 499 * </ul> 500 * <br> 501 * <b>Note:</b> I often see missing such validation in the value read from HTTP request headers like "X-Forwarded-For" or "Forwarded". 502 * <br><br> 503 * <b>Note for IPv6:</b> I used documentation found so it is really experimental! 504 * 505 * @param ip String expected to be a valid IP address. 506 * @return True only if the string pass all validations. 507 * @see "https://commons.apache.org/proper/commons-validator/" 508 * @see "https://commons.apache.org/proper/commons-validator/apidocs/org/apache/commons/validator/routines/InetAddressValidator.html" 509 * @see "https://cheatsheetseries.owasp.org/cheatsheets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet.html" 510 * @see "https://cheatsheetseries.owasp.org/assets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet_Orange_Tsai_Talk.pdf" 511 * @see "https://cheatsheetseries.owasp.org/assets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet_SSRF_Bible.pdf" 512 * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For" 513 * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Forwarded" 514 * @see "https://ipcisco.com/lesson/ipv6-address/" 515 * @see "https://www.juniper.net/documentation/us/en/software/junos/interfaces-security-devices/topics/topic-map/security-interface-ipv4-ipv6-protocol.html" 516 * @see "https://docs.oracle.com/en/java/javase/21/docs/api/java.base/java/net/InetAddress.html#getByName(java.lang.String)" 517 * @see "https://www.arin.net/reference/research/statistics/address_filters/" 518 * @see "https://en.wikipedia.org/wiki/Multicast_address" 519 * @see "https://stackoverflow.com/a/5619409" 520 * @see "https://www.ripe.net/media/documents/ipv6-address-types.pdf" 521 * @see "https://www.iana.org/assignments/ipv6-unicast-address-assignments/ipv6-unicast-address-assignments.xhtml" 522 * @see "https://developer.android.com/reference/java/net/Inet6Address" 523 * @see "https://en.wikipedia.org/wiki/Unique_local_address" 524 */ 525 public static boolean isPublicIPAddress(String ip) { 526 boolean isValid = false; 527 try { 528 //Quick validation on the string itself based on characters used to compose an IP v4/v6 address 529 if (Pattern.matches("[0-9a-fA-F:.]+", ip)) { 530 //If OK then use the dedicated InetAddressValidator from Apache Commons Validator 531 if (InetAddressValidator.getInstance().isValid(ip)) { 532 //If OK then validate that is an public IP address 533 //From Javadoc for "InetAddress.getByName": If a literal IP address is supplied, only the validity of the address format is checked. 534 InetAddress addr = InetAddress.getByName(ip); 535 isValid = (!addr.isAnyLocalAddress() && !addr.isLinkLocalAddress() && !addr.isLoopbackAddress() && !addr.isMulticastAddress() && !addr.isSiteLocalAddress()); 536 //If OK and the IP is an V6 one then make additional validation because the built-in Java API will let pass some V6 IP 537 //For the prefix map, the start of the key indicates if the value is a regex or a string 538 if (isValid && (addr instanceof Inet6Address)) { 539 Map<String, String> prefixes = new HashMap<>(); 540 prefixes.put("REGEX_LOOPBACK", "^(0|:)+1$"); 541 prefixes.put("REGEX_UNIQUE-LOCAL-ADDRESSES", "^f(c|d)[a-f0-9]{2}:.*$"); 542 prefixes.put("STRING_LINK-LOCAL-ADDRESSES", "fe80:"); 543 prefixes.put("REGEX_TEREDO", "^2001:[0]*:.*$"); 544 prefixes.put("REGEX_BENCHMARKING", "^2001:[0]*2:.*$"); 545 prefixes.put("REGEX_ORCHID", "^2001:[0]*10:.*$"); 546 prefixes.put("STRING_DOCUMENTATION", "2001:db8:"); 547 prefixes.put("STRING_GLOBAL-UNICAST", "2000:"); 548 prefixes.put("REGEX_MULTICAST", "^ff[0-9]{2}:.*$"); 549 final List<Boolean> results = new ArrayList<>(); 550 final String ipLower = ip.trim().toLowerCase(Locale.ROOT); 551 prefixes.forEach((addressType, expr) -> { 552 String exprLower = expr.trim().toLowerCase(); 553 if (addressType.startsWith("STRING_")) { 554 results.add(ipLower.startsWith(exprLower)); 555 } else { 556 results.add(Pattern.matches(exprLower, ipLower)); 557 } 558 }); 559 isValid = ((results.size() == prefixes.size()) && !results.contains(Boolean.TRUE)); 560 } 561 } 562 } 563 } catch (Exception e) { 564 isValid = false; 565 } 566 return isValid; 567 } 568 569 /** 570 * Compute a SHA256 hash from an input composed of a collection of strings.<br><br> 571 * This method take care to build the source string in a way to prevent this source string to be prone to abuse targeting the different parts composing it.<br><br> 572 * <p> 573 * Example of possible abuse without precautions applied during the hash calculation logic:<br> 574 * Hash of <code>SHA256("Hello", "My", "World!!!")</code> will be equals to the hash of <code>SHA256("Hell", "oMyW", "orld!!!")</code>.<br> 575 * </p> 576 * This method ensure that both hash above will be different.<br><br> 577 * 578 * <b>Note:</b> The character <code>|</code> is used, as separator, of every parts so a part is not allowed to contains this character. 579 * 580 * @param parts Ordered list of strings to use to build the input string for which the hash must be computed on. No null value is accepted on object composing the collection. 581 * @return The hash, as an array of bytes, to allow caller to convert it to the final representation wanted (HEX, Base64, etc.). If the collection passed is null or empty then the method return null. 582 * @throws Exception If any exception occurs 583 * @see "https://github.com/righettod/code-snippets-security-utils/issues/16" 584 * @see "https://pentesterlab.com/badges/codereview" 585 * @see "https://blog.trailofbits.com/2024/08/21/yolo-is-not-a-valid-hash-construction/" 586 * @see "https://www.nist.gov/publications/sha-3-derived-functions-cshake-kmac-tuplehash-and-parallelhash" 587 */ 588 public static byte[] computeHashNoProneToAbuseOnParts(List<String> parts) throws Exception { 589 byte[] hash = null; 590 String separator = "|"; 591 if (parts != null && !parts.isEmpty()) { 592 //Ensure that not part is null 593 if (parts.stream().anyMatch(Objects::isNull)) { 594 throw new IllegalArgumentException("No part must be null!"); 595 } 596 //Ensure that the separator is absent from every part 597 if (parts.stream().anyMatch(part -> part.contains(separator))) { 598 throw new IllegalArgumentException(String.format("The character '%s', used as parts separator, must be absent from every parts!", separator)); 599 } 600 MessageDigest digest = MessageDigest.getInstance("SHA-256"); 601 final StringBuilder buffer = new StringBuilder(separator); 602 parts.forEach(p -> { 603 buffer.append(p).append(separator); 604 }); 605 hash = digest.digest(buffer.toString().getBytes(StandardCharsets.UTF_8)); 606 } 607 return hash; 608 } 609 610 /** 611 * Ensure that an XML file only uses DTD/XSD references (called System Identifier) present in the allowed list provided.<br><br> 612 * The code is based on the validation implemented into the OpenJDK 21, by the class <b><a href="https://github.com/openjdk/jdk/blob/jdk-21%2B35/src/java.prefs/share/classes/java/util/prefs/XmlSupport.java">java.util.prefs.XmlSupport</a></b>, in the method <b><a href="https://github.com/openjdk/jdk/blob/jdk-21%2B35/src/java.prefs/share/classes/java/util/prefs/XmlSupport.java#L240">loadPrefsDoc()</a></b>.<br><br> 613 * The method also ensure that no Public Identifier is used to prevent potential bypasses of the validations. 614 * 615 * @param xmlFilePath Filename of the XML file to check. 616 * @param allowedSystemIdentifiers List of URL allowed for System Identifier specified for any XSD/DTD references. 617 * @return True only if the file pass all validations. 618 * @see "https://www.w3schools.com/xml/prop_documenttype_systemid.asp" 619 * @see "https://www.ibm.com/docs/en/integration-bus/9.0.0?topic=doctypedecl-xml-systemid" 620 * @see "https://www.liquid-technologies.com/Reference/Glossary/XML_DocType.html" 621 * @see "https://www.xml.com/pub/98/08/xmlqna0.html" 622 * @see "https://github.com/openjdk/jdk/blob/jdk-21%2B35/src/java.prefs/share/classes/java/util/prefs/XmlSupport.java#L397" 623 * @see "https://en.wikipedia.org/wiki/Formal_Public_Identifier" 624 */ 625 public static boolean isXMLOnlyUseAllowedXSDorDTD(String xmlFilePath, final List<String> allowedSystemIdentifiers) { 626 boolean isSafe = false; 627 final String errorTemplate = "Non allowed %s ID detected!"; 628 final String emptyFakeDTD = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><!ELEMENT dummy EMPTY>"; 629 final String emptyFakeXSD = "<xs:schema xmlns:xs=\"http://www.w3.org/2001/XMLSchema\"> <xs:element name=\"dummy\"/></xs:schema>"; 630 631 if (allowedSystemIdentifiers == null || allowedSystemIdentifiers.isEmpty()) { 632 throw new IllegalArgumentException("At least one SID must be specified!"); 633 } 634 File xmlFile = new File(xmlFilePath); 635 if (xmlFile.exists() && xmlFile.canRead() && xmlFile.isFile()) { 636 try { 637 EntityResolver resolverValidator = (publicId, systemId) -> { 638 if (publicId != null) { 639 throw new SAXException(String.format(errorTemplate, "PUBLIC")); 640 } 641 if (!allowedSystemIdentifiers.contains(systemId)) { 642 throw new SAXException(String.format(errorTemplate, "SYSTEM")); 643 } 644 //If it is OK then return a empty DTD/XSD 645 return new InputSource(new StringReader(systemId.toLowerCase().endsWith(".dtd") ? emptyFakeDTD : emptyFakeXSD)); 646 }; 647 DocumentBuilderFactory dbfInstance = DocumentBuilderFactory.newInstance(); 648 dbfInstance.setIgnoringElementContentWhitespace(true); 649 dbfInstance.setXIncludeAware(false); 650 dbfInstance.setValidating(false); 651 dbfInstance.setCoalescing(true); 652 dbfInstance.setIgnoringComments(false); 653 DocumentBuilder builder = dbfInstance.newDocumentBuilder(); 654 builder.setEntityResolver(resolverValidator); 655 Document doc = builder.parse(xmlFile); 656 isSafe = (doc != null); 657 } catch (SAXException | IOException | ParserConfigurationException e) { 658 isSafe = false; 659 } 660 } 661 662 return isSafe; 663 } 664 665 /** 666 * Apply a collection of validations on a EXCEL CSV file provided (file was expected to be opened in Microsoft EXCEL): 667 * <ul> 668 * <li>Real CSV file.</li> 669 * <li>Do not contains any payload related to a CSV injections.</li> 670 * </ul> 671 * Ensure that, if Apache Commons CSV does not find any record then, the file will be considered as NOT safe (prevent potential bypasses).<br><br> 672 * <b>Note:</b> Record delimiter used is the <code>,</code> (comma) character. See the Apache Commons CSV reference provided for EXCEL.<br> 673 * 674 * @param csvFilePath Filename of the CSV file to check. 675 * @return True only if the file pass all validations. 676 * @see "https://commons.apache.org/proper/commons-csv/" 677 * @see "https://commons.apache.org/proper/commons-csv/apidocs/org/apache/commons/csv/CSVFormat.html#EXCEL" 678 * @see "https://www.we45.com/post/your-excel-sheets-are-not-safe-heres-how-to-beat-csv-injection" 679 * @see "https://www.whiteoaksecurity.com/blog/2020-4-23-csv-injection-whats-the-risk/" 680 * @see "https://book.hacktricks.xyz/pentesting-web/formula-csv-doc-latex-ghostscript-injection" 681 * @see "https://owasp.org/www-community/attacks/CSV_Injection" 682 * @see "https://payatu.com/blog/csv-injection-basic-to-exploit/" 683 * @see "https://cwe.mitre.org/data/definitions/1236.html" 684 */ 685 public static boolean isExcelCSVSafe(String csvFilePath) { 686 boolean isSafe; 687 final AtomicInteger recordCount = new AtomicInteger(); 688 final List<Character> payloadDetectionCharacters = List.of('=', '+', '@', '-', '\r', '\t'); 689 690 try { 691 final List<String> payloadsIdentified = new ArrayList<>(); 692 try (Reader in = new FileReader(csvFilePath)) { 693 Iterable<CSVRecord> records = CSVFormat.EXCEL.parse(in); 694 records.forEach(record -> { 695 record.forEach(recordValue -> { 696 if (recordValue != null && !recordValue.trim().isEmpty() && payloadDetectionCharacters.contains(recordValue.trim().charAt(0))) { 697 payloadsIdentified.add(recordValue); 698 } 699 recordCount.getAndIncrement(); 700 }); 701 }); 702 } 703 isSafe = (payloadsIdentified.isEmpty() && recordCount.get() > 0); 704 } catch (Exception e) { 705 isSafe = false; 706 } 707 708 return isSafe; 709 } 710 711 /** 712 * Provide a way to add an integrity marker (<a href="https://en.wikipedia.org/wiki/HMAC">HMAC</a>) to a serialized object serialized using the <a href="https://www.baeldung.com/java-serialization">java native system</a> (binary).<br> 713 * The goal is to provide <b>a temporary workaround</b> to try to prevent deserialization attacks and give time to move to a text-based serialization approach. 714 * 715 * @param processingMode Define the mode of processing i.e. protect or validate. ({@link eu.righettod.ProcessingMode}) 716 * @param input When the processing mode is "protect" than the expected input (string) is a java serialized object encoded in Base64 otherwise (processing mode is "validate") expected input is the output of this method when the "protect" mode was used. 717 * @param secret Secret to use to compute the SHA256 HMAC. 718 * @return A map with the following keys: <ul><li><b>PROCESSING_MODE</b>: Processing mode used to compute the result.</li><li><b>STATUS</b>: A boolean indicating if the processing was successful or not.</li><li><b>RESULT</b>: Always contains a string representing the protected serialized object in the format <code>[SERIALIZED_OBJECT_BASE64_ENCODED]:[SERIALIZED_OBJECT_HMAC_BASE64_ENCODED]</code>.</li></ul> 719 * @throws Exception If any exception occurs. 720 * @see "https://cheatsheetseries.owasp.org/cheatsheets/Deserialization_Cheat_Sheet.html" 721 * @see "https://owasp.org/www-project-top-ten/2017/A8_2017-Insecure_Deserialization" 722 * @see "https://portswigger.net/web-security/deserialization" 723 * @see "https://www.baeldung.com/java-serialization-approaches" 724 * @see "https://www.baeldung.com/java-serialization" 725 * @see "https://cryptobook.nakov.com/mac-and-key-derivation/hmac-and-key-derivation" 726 * @see "https://en.wikipedia.org/wiki/HMAC" 727 * @see "https://smattme.com/posts/how-to-generate-hmac-signature-in-java/" 728 */ 729 public static Map<String, Object> ensureSerializedObjectIntegrity(ProcessingMode processingMode, String input, byte[] secret) throws Exception { 730 Map<String, Object> results; 731 String resultFormatTemplate = "%s:%s"; 732 //Verify input provided to be consistent 733 if (processingMode == null) { 734 throw new IllegalArgumentException("The processing mode is mandatory!"); 735 } 736 if (input == null || input.trim().isEmpty()) { 737 throw new IllegalArgumentException("Input data is mandatory!"); 738 } 739 if (secret == null || secret.length == 0) { 740 throw new IllegalArgumentException("The HMAC secret is mandatory!"); 741 } 742 if (processingMode.equals(ProcessingMode.VALIDATE) && input.split(":").length != 2) { 743 throw new IllegalArgumentException("Input data provided is invalid for the processing mode specified!"); 744 } 745 //Processing 746 Base64.Decoder b64Decoder = Base64.getDecoder(); 747 Base64.Encoder b64Encoder = Base64.getEncoder(); 748 String hmacAlgorithm = "HmacSHA256"; 749 Mac mac = Mac.getInstance(hmacAlgorithm); 750 SecretKeySpec key = new SecretKeySpec(secret, hmacAlgorithm); 751 mac.init(key); 752 results = new HashMap<>(); 753 results.put("PROCESSING_MODE", processingMode.toString()); 754 switch (processingMode) { 755 case PROTECT -> { 756 byte[] objectBytes = b64Decoder.decode(input); 757 byte[] hmac = mac.doFinal(objectBytes); 758 String encodedHmac = b64Encoder.encodeToString(hmac); 759 results.put("STATUS", Boolean.TRUE); 760 results.put("RESULT", String.format(resultFormatTemplate, input, encodedHmac)); 761 } 762 case VALIDATE -> { 763 String[] parts = input.split(":"); 764 byte[] objectBytes = b64Decoder.decode(parts[0].trim()); 765 byte[] hmacProvided = b64Decoder.decode(parts[1].trim()); 766 byte[] hmacComputed = mac.doFinal(objectBytes); 767 String encodedHmacComputed = b64Encoder.encodeToString(hmacComputed); 768 Boolean hmacIsValid = Arrays.equals(hmacProvided, hmacComputed); 769 results.put("STATUS", hmacIsValid); 770 results.put("RESULT", String.format(resultFormatTemplate, parts[0].trim(), encodedHmacComputed)); 771 } 772 default -> throw new IllegalArgumentException("Not supported processing mode!"); 773 } 774 return results; 775 } 776 777 /** 778 * Apply a collection of validations on a JSON string provided: 779 * <ul> 780 * <li>Real JSON structure.</li> 781 * <li>Contain less than a specified number of deepness for nested objects or arrays.</li> 782 * <li>Contain less than a specified number of items in any arrays.</li> 783 * </ul> 784 * <br> 785 * <b>Note:</b> I decided to use a parsing approach using only string processing to prevent any StackOverFlow or OutOfMemory error that can be abused.<br><br> 786 * I used the following assumption: 787 * <ul> 788 * <li>The character <code>{</code> identify the beginning of an object.</li> 789 * <li>The character <code>}</code> identify the end of an object.</li> 790 * <li>The character <code>[</code> identify the beginning of an array.</li> 791 * <li>The character <code>]</code> identify the end of an array.</li> 792 * <li>The character <code>"</code> identify the delimiter of a string.</li> 793 * <li>The character sequence <code>\"</code> identify the escaping of an double quote.</li> 794 * </ul> 795 * 796 * @param json String containing the JSON data to validate. 797 * @param maxItemsByArraysCount Maximum number of items allowed in an array. 798 * @param maxDeepnessAllowed Maximum number nested objects or arrays allowed. 799 * @return True only if the string pass all validations. 800 * @see "https://javaee.github.io/jsonp/" 801 * @see "https://community.f5.com/discussions/technicalforum/disable-buffer-overflow-in-json-parameters/124306" 802 * @see "https://github.com/InductiveComputerScience/pbJson/issues/2" 803 */ 804 public static boolean isJSONSafe(String json, int maxItemsByArraysCount, int maxDeepnessAllowed) { 805 boolean isSafe = false; 806 807 try { 808 //Step 1: Analyse the JSON string 809 int currentDeepness = 0; 810 int currentArrayItemsCount = 0; 811 int maxDeepnessReached = 0; 812 int maxArrayItemsCountReached = 0; 813 boolean currentlyInArray = false; 814 boolean currentlyInString = false; 815 int currentNestedArrayLevel = 0; 816 String jsonEscapedDoubleQuote = "\\\"";//Escaped double quote must not be considered as a string delimiter 817 String work = json.replace(jsonEscapedDoubleQuote, "'"); 818 for (char c : work.toCharArray()) { 819 switch (c) { 820 case '{': { 821 if (!currentlyInString) { 822 currentDeepness++; 823 } 824 break; 825 } 826 case '}': { 827 if (!currentlyInString) { 828 currentDeepness--; 829 } 830 break; 831 } 832 case '[': { 833 if (!currentlyInString) { 834 currentDeepness++; 835 if (currentlyInArray) { 836 currentNestedArrayLevel++; 837 } 838 currentlyInArray = true; 839 } 840 break; 841 } 842 case ']': { 843 if (!currentlyInString) { 844 currentDeepness--; 845 currentArrayItemsCount = 0; 846 if (currentNestedArrayLevel > 0) { 847 currentNestedArrayLevel--; 848 } 849 if (currentNestedArrayLevel == 0) { 850 currentlyInArray = false; 851 } 852 } 853 break; 854 } 855 case '"': { 856 currentlyInString = !currentlyInString; 857 break; 858 } 859 case ',': { 860 if (!currentlyInString && currentlyInArray) { 861 currentArrayItemsCount++; 862 } 863 break; 864 } 865 } 866 if (currentDeepness > maxDeepnessReached) { 867 maxDeepnessReached = currentDeepness; 868 } 869 if (currentArrayItemsCount > maxArrayItemsCountReached) { 870 maxArrayItemsCountReached = currentArrayItemsCount; 871 } 872 } 873 //Step 2: Apply validation against the value specified as limits 874 isSafe = ((maxItemsByArraysCount > maxArrayItemsCountReached) && (maxDeepnessAllowed > maxDeepnessReached)); 875 876 //Step 3: If the content is safe then ensure that it is valid JSON structure using the "Java API for JSON Processing" (JSR 374) parser reference implementation. 877 if (isSafe) { 878 JsonReader reader = Json.createReader(new StringReader(json)); 879 isSafe = (reader.read() != null); 880 } 881 882 } catch (Exception e) { 883 isSafe = false; 884 } 885 return isSafe; 886 } 887 888 /** 889 * Apply a collection of validations on a image file provided: 890 * <ul> 891 * <li>Real image file.</li> 892 * <li>Its mime type is into the list of allowed mime types.</li> 893 * <li>Its metadata fields do not contains any characters related to a malicious payloads.</li> 894 * </ul> 895 * <br> 896 * <b>Important note:</b> This implementation is prone to bypass using the "<b>raw insertion</b>" method documented in the <a href="https://www.synacktiv.com/en/publications/persistent-php-payloads-in-pngs-how-to-inject-php-code-in-an-image-and-keep-it-there">blog post</a> from the Synacktiv team. 897 * To handle such case, it is recommended to resize the image to remove any non image-related content, see <a href="https://github.com/righettod/document-upload-protection/blob/master/src/main/java/eu/righettod/poc/sanitizer/ImageDocumentSanitizerImpl.java#L54">here</a> for an example.<br> 898 * 899 * @param imageFilePath Filename of the image file to check. 900 * @param imageAllowedMimeTypes List of image mime types allowed. 901 * @return True only if the file pass all validations. 902 * @see "https://commons.apache.org/proper/commons-imaging/" 903 * @see "https://commons.apache.org/proper/commons-imaging/formatsupport.html" 904 * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types/Common_types" 905 * @see "https://www.iana.org/assignments/media-types/media-types.xhtml#image" 906 * @see "https://www.synacktiv.com/en/publications/persistent-php-payloads-in-pngs-how-to-inject-php-code-in-an-image-and-keep-it-there" 907 * @see "https://cheatsheetseries.owasp.org/cheatsheets/File_Upload_Cheat_Sheet.html" 908 * @see "https://github.com/righettod/document-upload-protection/blob/master/src/main/java/eu/righettod/poc/sanitizer/ImageDocumentSanitizerImpl.java" 909 * @see "https://exiftool.org/examples.html" 910 * @see "https://en.wikipedia.org/wiki/List_of_file_signatures" 911 * @see "https://hexed.it/" 912 * @see "https://github.com/sighook/pixload" 913 */ 914 public static boolean isImageSafe(String imageFilePath, List<String> imageAllowedMimeTypes) { 915 boolean isSafe = false; 916 Pattern payloadDetectionRegex = Pattern.compile("[<>${}`]+", Pattern.CASE_INSENSITIVE); 917 try { 918 File imgFile = new File(imageFilePath); 919 if (imgFile.exists() && imgFile.canRead() && imgFile.isFile() && !imageAllowedMimeTypes.isEmpty()) { 920 final byte[] imgBytes = Files.readAllBytes(imgFile.toPath()); 921 //Step 1: Check the mime type of the file against the allowed ones 922 ImageInfo imgInfo = Imaging.getImageInfo(imgBytes); 923 if (imageAllowedMimeTypes.contains(imgInfo.getMimeType())) { 924 //Step 2: Load the image into an object using the Image API 925 BufferedImage imgObject = Imaging.getBufferedImage(imgBytes); 926 if (imgObject != null && imgObject.getWidth() > 0 && imgObject.getHeight() > 0) { 927 //Step 3: Check the metadata if the image format support it - Highly experimental 928 List<String> metadataWithPayloads = new ArrayList<>(); 929 final ImageMetadata imgMetadata = Imaging.getMetadata(imgBytes); 930 if (imgMetadata != null) { 931 imgMetadata.getItems().forEach(item -> { 932 String metadata = item.toString(); 933 if (payloadDetectionRegex.matcher(metadata).find()) { 934 metadataWithPayloads.add(metadata); 935 } 936 }); 937 } 938 isSafe = metadataWithPayloads.isEmpty(); 939 } 940 } 941 } 942 } catch (Exception e) { 943 isSafe = false; 944 } 945 return isSafe; 946 } 947 948 /** 949 * Rewrite the input file to remove any embedded files that is not embedded using a methods supported by the official format of the file.<br> 950 * Example: a file can be embedded by adding it to the end of the source file, see the reference provided for details. 951 * 952 * @param inputFilePath Filename of the file to clean up. 953 * @param inputFileType Type of the file provided. 954 * @return A array of bytes with the cleaned file. 955 * @throws IllegalArgumentException If an invalid parameter is passed 956 * @throws Exception If any technical error during the cleaning processing 957 * @see "https://www.synacktiv.com/en/publications/persistent-php-payloads-in-pngs-how-to-inject-php-code-in-an-image-and-keep-it-there" 958 * @see "https://github.com/righettod/toolbox-pentest-web/tree/master/misc" 959 * @see "https://github.com/righettod/toolbox-pentest-web?tab=readme-ov-file#misc" 960 * @see "https://stackoverflow.com/a/13605411" 961 */ 962 public static byte[] sanitizeFile(String inputFilePath, InputFileType inputFileType) throws Exception { 963 ByteArrayOutputStream sanitizedContent = new ByteArrayOutputStream(); 964 File inputFile = new File(inputFilePath); 965 if (!inputFile.exists() || !inputFile.canRead() || !inputFile.isFile()) { 966 throw new IllegalArgumentException("Cannot read the content of the input file!"); 967 } 968 switch (inputFileType) { 969 case PDF -> { 970 try (PDDocument document = Loader.loadPDF(inputFile)) { 971 document.save(sanitizedContent); 972 } 973 } 974 case IMAGE -> { 975 // Load the original image 976 BufferedImage originalImage = ImageIO.read(inputFile); 977 String originalFormat = identifyMimeType(Files.readAllBytes(inputFile.toPath())).split("/")[1].trim(); 978 // Check that image has been successfully loaded 979 if (originalImage == null) { 980 throw new IOException("Cannot load the original image !"); 981 } 982 // Get current Width and Height of the image 983 int originalWidth = originalImage.getWidth(null); 984 int originalHeight = originalImage.getHeight(null); 985 // Resize the image by removing 1px on Width and Height 986 Image resizedImage = originalImage.getScaledInstance(originalWidth - 1, originalHeight - 1, Image.SCALE_SMOOTH); 987 // Resize the resized image by adding 1px on Width and Height - In fact set image to is initial size 988 Image initialSizedImage = resizedImage.getScaledInstance(originalWidth, originalHeight, Image.SCALE_SMOOTH); 989 // Save image to a bytes buffer 990 int bufferedImageType = BufferedImage.TYPE_INT_ARGB;//By default use a format supporting transparency 991 if ("jpeg".equalsIgnoreCase(originalFormat) || "bmp".equalsIgnoreCase(originalFormat)) { 992 bufferedImageType = BufferedImage.TYPE_INT_RGB; 993 } 994 BufferedImage sanitizedImage = new BufferedImage(initialSizedImage.getWidth(null), initialSizedImage.getHeight(null), bufferedImageType); 995 Graphics2D drawer = sanitizedImage.createGraphics(); 996 drawer.drawImage(initialSizedImage, 0, 0, null); 997 drawer.dispose(); 998 ImageIO.write(sanitizedImage, originalFormat, sanitizedContent); 999 } 1000 default -> throw new IllegalArgumentException("Type of file not supported !"); 1001 } 1002 if (sanitizedContent.size() == 0) { 1003 throw new IOException("An error occur during the rewrite operation!"); 1004 } 1005 return sanitizedContent.toByteArray(); 1006 } 1007 1008 /** 1009 * Apply a collection of validations on a string expected to be an email address: 1010 * <ul> 1011 * <li>Is a valid email address, from a parser perspective, following RFCs on email addresses.</li> 1012 * <li>Is not using "Encoded-word" format.</li> 1013 * <li>Is not using comment format.</li> 1014 * <li>Is not using "Punycode" format.</li> 1015 * <li>Is not using UUCP style addresses.</li> 1016 * <li>Is not using address literals.</li> 1017 * <li>Is not using source routes.</li> 1018 * <li>Is not using the "percent hack".</li> 1019 * </ul><br> 1020 * This is based on the research work from <a href="https://portswigger.net/research/gareth-heyes">Gareth Heyes</a> added in references (Portswigger).<br><br> 1021 * 1022 * <b>Note:</b> The notion of valid, here, is to take from a secure usage of the data perspective. 1023 * 1024 * @param addr String expected to be a valid email address. 1025 * @return True only if the string pass all validations. 1026 * @see "https://commons.apache.org/proper/commons-validator/" 1027 * @see "https://commons.apache.org/proper/commons-validator/apidocs/org/apache/commons/validator/routines/EmailValidator.html" 1028 * @see "https://datatracker.ietf.org/doc/html/rfc2047#section-2" 1029 * @see "https://portswigger.net/research/splitting-the-email-atom" 1030 * @see "https://www.jochentopf.com/email/address.html" 1031 * @see "https://en.wikipedia.org/wiki/Email_address" 1032 */ 1033 public static boolean isEmailAddress(String addr) { 1034 boolean isValid = false; 1035 String work = addr.toLowerCase(Locale.ROOT); 1036 Pattern encodedWordRegex = Pattern.compile("[=?]+", Pattern.CASE_INSENSITIVE); 1037 Pattern forbiddenCharacterRegex = Pattern.compile("[():!%\\[\\],;]+", Pattern.CASE_INSENSITIVE); 1038 try { 1039 //Start with the use of the dedicated EmailValidator from Apache Commons Validator 1040 if (EmailValidator.getInstance(true, true).isValid(work)) { 1041 //If OK then validate it does not contains "Encoded-word" patterns using an aggressive approach 1042 if (!encodedWordRegex.matcher(work).find()) { 1043 //If OK then validate it does not contains punycode 1044 if (!work.contains("xn--")) { 1045 //If OK then validate it does not use: 1046 // UUCP style addresses, 1047 // Comment format, 1048 // Address literals, 1049 // Source routes, 1050 // The percent hack. 1051 if (!forbiddenCharacterRegex.matcher(work).find()) { 1052 isValid = true; 1053 } 1054 } 1055 } 1056 } 1057 } catch (Exception e) { 1058 isValid = false; 1059 } 1060 return isValid; 1061 } 1062 1063 /** 1064 * The <a href="https://www.stet.eu/en/psd2/">PSD2 STET</a> specification require to use <a href="https://datatracker.ietf.org/doc/draft-cavage-http-signatures/">HTTP Signature</a>. 1065 * <br> 1066 * Section <b>3.5.1.2</b> of the document <a href="https://www.stet.eu/assets/files/PSD2/1-6-3/api-dsp2-stet-v1.6.3.1-part-1-framework.pdf">Documentation Framework</a> version <b>1.6.3</b>. 1067 * <br> 1068 * The problem is that, by design, the HTTP Signature specification is prone to blind SSRF. 1069 * <br> 1070 * URL example taken from the STET specification: <code>https://path.to/myQsealCertificate_714f8154ec259ac40b8a9786c9908488b2582b68b17e865fede4636d726b709f</code>. 1071 * <br> 1072 * The objective of this code is to try to decrease the "exploitability/interest" of this SSRF for an attacker. 1073 * 1074 * @param certificateUrl Url pointing to a Qualified Certificate (QSealC) encoded in PEM format and respecting the ETSI/TS119495 technical Specification . 1075 * @return TRUE only if the url point to a Qualified Certificate in PEM format. 1076 * @see "https://www.stet.eu/en/psd2/" 1077 * @see "https://www.stet.eu/assets/files/PSD2/1-6-3/api-dsp2-stet-v1.6.3.1-part-1-framework.pdf" 1078 * @see "https://datatracker.ietf.org/doc/draft-cavage-http-signatures/" 1079 * @see "https://datatracker.ietf.org/doc/rfc9421/" 1080 * @see "https://openjdk.org/groups/net/httpclient/intro.html" 1081 * @see "https://docs.oracle.com/en/java/javase/21/docs/api/java.net.http/java/net/http/package-summary.html" 1082 * @see "https://portswigger.net/web-security/ssrf" 1083 * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control" 1084 */ 1085 public static boolean isPSD2StetSafeCertificateURL(String certificateUrl) { 1086 boolean isValid = false; 1087 long connectionTimeoutInSeconds = 10; 1088 String userAgent = "PSD2-STET-HTTPSignature-CertificateRequest"; 1089 try { 1090 //1. Ensure that the URL end with the SHA-256 fingerprint encoded in HEX of the certificate like requested by STET 1091 if (certificateUrl != null && certificateUrl.lastIndexOf("_") != -1) { 1092 String digestPart = certificateUrl.substring(certificateUrl.lastIndexOf("_") + 1); 1093 if (Pattern.matches("^[0-9a-f]{64}$", digestPart)) { 1094 //2. Ensure that the URL is a valid url by creating a instance of the class URI 1095 URI uri = URI.create(certificateUrl); 1096 //3. Require usage of HTTPS and reject any url containing query parameters 1097 if ("https".equalsIgnoreCase(uri.getScheme()) && uri.getQuery() == null) { 1098 //4. Perform a HTTP HEAD request in order to get the content type of the remote resource 1099 //and limit the interest to use the SSRF because to pass the check the url need to: 1100 //- Do not having any query parameters. 1101 //- Use HTTPS protocol. 1102 //- End with a string having the format "_[0-9a-f]{64}". 1103 //- Trigger the malicious action that the attacker want but with a HTTP HEAD without any redirect and parameters. 1104 HttpResponse<String> response; 1105 try (HttpClient client = HttpClient.newBuilder().followRedirects(HttpClient.Redirect.NEVER).build()) { 1106 HttpRequest request = HttpRequest.newBuilder().uri(uri).timeout(Duration.ofSeconds(connectionTimeoutInSeconds)).method("HEAD", HttpRequest.BodyPublishers.noBody()).header("User-Agent", userAgent)//To provide an hint to the target about the initiator of the request 1107 .header("Cache-Control", "no-store, max-age=0")//To prevent caching issues or abuses 1108 .build(); 1109 response = client.send(request, HttpResponse.BodyHandlers.ofString()); 1110 if (response.statusCode() == 200) { 1111 //5. Ensure that the response content type is "text/plain" 1112 Optional<String> contentType = response.headers().firstValue("Content-Type"); 1113 isValid = (contentType.isPresent() && contentType.get().trim().toLowerCase(Locale.ENGLISH).startsWith("text/plain")); 1114 } 1115 } 1116 } 1117 } 1118 } 1119 } catch (Exception e) { 1120 isValid = false; 1121 } 1122 return isValid; 1123 } 1124 1125 /** 1126 * Perform sequential URL decoding operations against a URL encoded data until the data is not URL encoded anymore or if the specified threshold is reached. 1127 * 1128 * @param encodedData URL encoded data. 1129 * @param decodingRoundThreshold Threshold above which decoding will fail. 1130 * @return The decoded data. 1131 * @throws SecurityException If the threshold is reached. 1132 * @see "https://en.wikipedia.org/wiki/Percent-encoding" 1133 * @see "https://owasp.org/www-community/Double_Encoding" 1134 * @see "https://portswigger.net/web-security/essential-skills/obfuscating-attacks-using-encodings" 1135 * @see "https://capec.mitre.org/data/definitions/120.html" 1136 */ 1137 public static String applyURLDecoding(String encodedData, int decodingRoundThreshold) throws SecurityException { 1138 if (decodingRoundThreshold < 1) { 1139 throw new IllegalArgumentException("Threshold must be a positive number !"); 1140 } 1141 if (encodedData == null) { 1142 throw new IllegalArgumentException("Data provided must not be null !"); 1143 } 1144 Charset charset = StandardCharsets.UTF_8; 1145 int currentDecodingRound = 0; 1146 boolean isFinished = false; 1147 String currentRoundData = encodedData; 1148 String previousRoundData = encodedData; 1149 while (!isFinished) { 1150 if (currentDecodingRound > decodingRoundThreshold) { 1151 throw new SecurityException(String.format("Decoding round threshold of %s reached!", decodingRoundThreshold)); 1152 } 1153 currentRoundData = URLDecoder.decode(currentRoundData, charset); 1154 isFinished = currentRoundData.equals(previousRoundData); 1155 previousRoundData = currentRoundData; 1156 currentDecodingRound++; 1157 } 1158 return currentRoundData; 1159 } 1160 1161 /** 1162 * Apply a collection of validations on a string expected to be an system file/folder path: 1163 * <ul> 1164 * <li>Does not contains path traversal payload.</li> 1165 * <li>The canonical path is equals to the absolute path.</li> 1166 * </ul><br> 1167 * 1168 * @param path String expected to be a valid system file/folder path. 1169 * @return True only if the string pass all validations. 1170 * @see "https://portswigger.net/web-security/file-path-traversal" 1171 * @see "https://learn.snyk.io/lesson/directory-traversal/" 1172 * @see "https://capec.mitre.org/data/definitions/126.html" 1173 * @see "https://owasp.org/www-community/attacks/Path_Traversal" 1174 */ 1175 public static boolean isPathSafe(String path) { 1176 boolean isSafe = false; 1177 int decodingRoundThreshold = 3; 1178 try { 1179 if (path != null && !path.isEmpty()) { 1180 //URL decode the path if case of data coming from a web context 1181 String decodedPath = applyURLDecoding(path, decodingRoundThreshold); 1182 //Ensure that no path traversal expression is present 1183 if (!decodedPath.contains("..")) { 1184 File f = new File(decodedPath); 1185 String canonicalPath = f.getCanonicalPath(); 1186 String absolutePath = f.getAbsolutePath(); 1187 isSafe = canonicalPath.equals(absolutePath); 1188 } 1189 } 1190 } catch (Exception e) { 1191 isSafe = false; 1192 } 1193 return isSafe; 1194 } 1195 1196 /** 1197 * Identify if an XML contains any XML comments or have any XSL processing instructions.<br> 1198 * Stream reader based parsing is used to support large XML tree. 1199 * 1200 * @param xmlFilePath Filename of the XML file to check. 1201 * @return True only if XML comments or XSL processing instructions are identified. 1202 * @see "https://www.tutorialspoint.com/xml/xml_processing.htm" 1203 * @see "https://docs.oracle.com/en/java/javase/21/docs/api/java.xml/javax/xml/stream/XMLInputFactory.html" 1204 * @see "https://portswigger.net/kb/issues/00400700_xml-entity-expansion" 1205 * @see "https://www.w3.org/Style/styling-XML.en.html" 1206 */ 1207 public static boolean isXMLHaveCommentsOrXSLProcessingInstructions(String xmlFilePath) { 1208 boolean itemsDetected = false; 1209 try { 1210 //Ensure that the parser will not be prone XML external entity (XXE) injection or XML entity expansion (XEE) attacks 1211 XMLInputFactory xmlInputFactory = XMLInputFactory.newFactory(); 1212 xmlInputFactory.setProperty(XMLInputFactory.SUPPORT_DTD, false); 1213 xmlInputFactory.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, ""); 1214 xmlInputFactory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, false); 1215 xmlInputFactory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false); 1216 1217 //Parse file 1218 try (FileInputStream fis = new FileInputStream(xmlFilePath)) { 1219 XMLStreamReader reader = xmlInputFactory.createXMLStreamReader(fis); 1220 int eventType; 1221 while (reader.hasNext() && !itemsDetected) { 1222 eventType = reader.next(); 1223 if (eventType == XMLEvent.COMMENT) { 1224 itemsDetected = true; 1225 } else if (eventType == XMLEvent.PROCESSING_INSTRUCTION && "xml-stylesheet".equalsIgnoreCase(reader.getPITarget())) { 1226 itemsDetected = true; 1227 } 1228 } 1229 } 1230 } catch (Exception e) { 1231 //In case of error then assume that the check failed 1232 itemsDetected = true; 1233 } 1234 return itemsDetected; 1235 } 1236 1237 1238 /** 1239 * Perform a set of additional validations against a JWT token: 1240 * <ul> 1241 * <li>Do not use the <b>NONE</b> signature algorithm.</li> 1242 * <li>Have a <a href="https://www.iana.org/assignments/jwt/jwt.xhtml">EXP claim</a> defined.</li> 1243 * <li>The token identifier (<a href="https://www.iana.org/assignments/jwt/jwt.xhtml">JTI claim</a>) is NOT part of the list of revoked token.</li> 1244 * <li>Match the expected type of token: ACCESS or ID or REFRESH.</li> 1245 * </ul> 1246 * 1247 * @param token JWT token for which <b>signature was already validated</b> and on which a set of additional validations will be applied. 1248 * @param expectedTokenType The type of expected token using the enumeration provided. 1249 * @param revokedTokenJTIList A list of token identifier (<b>JTI</b> claim) referring to tokens that were revoked and to which the JTI claim of the token will be compared to. 1250 * @return True only the token pass all the validations. 1251 * @see "https://www.iana.org/assignments/jwt/jwt.xhtml" 1252 * @see "https://auth0.com/docs/secure/tokens/access-tokens" 1253 * @see "https://auth0.com/docs/secure/tokens/id-tokens" 1254 * @see "https://auth0.com/docs/secure/tokens/refresh-tokens" 1255 * @see "https://auth0.com/blog/id-token-access-token-what-is-the-difference/" 1256 * @see "https://jwt.io/libraries?language=Java" 1257 * @see "https://pentesterlab.com/blog/secure-jwt-library-design" 1258 * @see "https://github.com/auth0/java-jwt" 1259 */ 1260 public static boolean applyJWTExtraValidation(DecodedJWT token, TokenType expectedTokenType, List<String> revokedTokenJTIList) { 1261 boolean isValid = false; 1262 TokenType tokenType; 1263 try { 1264 if (!"none".equalsIgnoreCase(token.getAlgorithm().trim())) { 1265 if (!token.getClaim("exp").isMissing() && token.getExpiresAt() != null) { 1266 String jti = token.getId(); 1267 if (jti != null && !jti.trim().isEmpty()) { 1268 boolean jtiIsRevoked = revokedTokenJTIList.stream().anyMatch(jti::equalsIgnoreCase); 1269 if (!jtiIsRevoked) { 1270 //Determine the token type based on the presence of specifics claims 1271 if (!token.getClaim("scope").isMissing()) { 1272 tokenType = TokenType.ACCESS; 1273 } else if (!token.getClaim("name").isMissing() || !token.getClaim("email").isMissing()) { 1274 tokenType = TokenType.ID; 1275 } else { 1276 tokenType = TokenType.REFRESH; 1277 } 1278 isValid = (tokenType.equals(expectedTokenType)); 1279 } 1280 } 1281 } 1282 } 1283 1284 } catch (Exception e) { 1285 //In case of error then assume that the check failed 1286 isValid = false; 1287 } 1288 1289 return isValid; 1290 } 1291}