Regular Expression Denial of Service (ReDoS)

Medium Risk Input Validation
regexdenial-of-servicecatastrophic-backtrackingperformanceresource-exhaustionredos

What it is

Regular Expression Denial of Service (ReDoS) occurs when poorly constructed regular expressions consume excessive CPU time when processing maliciously crafted input strings. This happens due to catastrophic backtracking in regex engines, where certain patterns can cause exponential time complexity, leading to application timeouts, resource exhaustion, and denial of service attacks.

// VULNERABLE: Nested quantifiers causing exponential backtracking function validateEmail(email) { // Problematic pattern with nested quantifiers const emailPattern = /^([a-zA-Z0-9_\.-]+)+@([a-zA-Z0-9_\.-]+)+\.([a-zA-Z]{2,5})+$/; return emailPattern.test(email); } // VULNERABLE: Complex pattern for HTML parsing function extractLinks(html) { // Catastrophic backtracking in attribute matching const linkPattern = /]*\s+)?href\s*=\s*(["'])([^\"']*?)\2([^>]*)>/gi; const matches = []; let match; while ((match = linkPattern.exec(html)) !== null) { matches.push(match[3]); } return matches; } // VULNERABLE: Phone number validation with alternation function validatePhone(phone) { // Overlapping alternation patterns const phonePattern = /^(\+?1?[-\s]?)?(\(?[0-9]{3}\)?[-\s]?)?([0-9]{3}[-\s]?[0-9]{4})|(\+?[0-9]{1,3}[-\s]?[0-9]{4,14})$/; return phonePattern.test(phone); } // VULNERABLE: URL validation with nested groups function validateURL(url) { // Complex pattern with potential for backtracking const urlPattern = /^(https?:\/\/)?(([a-zA-Z0-9_\.-]+)+\.([a-zA-Z]{2,5})+)(:[0-9]{1,5})?(\/([a-zA-Z0-9_\.-])*)*\/?$/; return urlPattern.test(url); } // Malicious inputs that cause ReDoS: // validateEmail("a".repeat(50) + "@gmail.com") // Takes exponential time // extractLinks("") // validatePhone("+" + "1-".repeat(30) + "!") // validateURL("http://" + "a.".repeat(25) + "!")
// SECURE: Safe regex patterns without nested quantifiers class SafeValidators { // Simple email validation without nested quantifiers static validateEmail(email) { // Input length check first if (!email || email.length > 254) { return false; } // Simple, safe pattern const emailPattern = /^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/; return emailPattern.test(email); } // Safe HTML link extraction using DOM parser static extractLinks(html) { // Length limit to prevent processing huge documents if (!html || html.length > 100000) { return []; } try { // Use DOM parser instead of regex for HTML const parser = new DOMParser(); const doc = parser.parseFromString(html, 'text/html'); const links = Array.from(doc.querySelectorAll('a[href]')); return links.map(link => link.href).slice(0, 100); // Limit results } catch (error) { console.error('HTML parsing error:', error); return []; } } // Safe phone validation with simple pattern static validatePhone(phone) { if (!phone || phone.length > 20) { return false; } // Remove common separators const cleaned = phone.replace(/[\s\-\(\)\.]/g, ''); // Simple pattern for international numbers const phonePattern = /^\+?[1-9]\d{6,14}$/; return phonePattern.test(cleaned); } // Safe URL validation using URL constructor static validateURL(url) { if (!url || url.length > 2000) { return false; } try { const urlObj = new URL(url); // Check allowed protocols const allowedProtocols = ['http:', 'https:']; if (!allowedProtocols.includes(urlObj.protocol)) { return false; } // Additional simple checks return urlObj.hostname.length > 0 && urlObj.hostname.length <= 253 && urlObj.hostname.includes('.'); } catch (error) { return false; } } // Utility method to test regex performance static testRegexPerformance(pattern, testString, description = '') { const start = performance.now(); try { const result = pattern.test(testString); const end = performance.now(); const duration = end - start; if (duration > 10) { // Log operations taking more than 10ms console.warn(`Slow regex operation (${description}): ${duration.toFixed(2)}ms`); } return result; } catch (error) { console.error(`Regex error (${description}):`, error); return false; } } } // Enhanced validation with timeout protection class ValidatorWithTimeout { static async validateWithTimeout(validatorFunc, input, timeoutMs = 1000) { return new Promise((resolve, reject) => { const timeoutId = setTimeout(() => { reject(new Error('Validation timeout - possible ReDoS attack')); }, timeoutMs); try { const result = validatorFunc(input); clearTimeout(timeoutId); resolve(result); } catch (error) { clearTimeout(timeoutId); reject(error); } }); } static async safeValidateEmail(email) { try { return await this.validateWithTimeout( SafeValidators.validateEmail, email, 1000 ); } catch (error) { console.error('Email validation error:', error.message); return false; } } } // Usage examples async function validateUserInput(userData) { const results = {}; try { results.email = await ValidatorWithTimeout.safeValidateEmail(userData.email); results.phone = SafeValidators.validatePhone(userData.phone); results.website = SafeValidators.validateURL(userData.website); return results; } catch (error) { console.error('Validation batch error:', error); return { email: false, phone: false, website: false }; } } // Export for testing if (typeof module !== 'undefined' && module.exports) { module.exports = { SafeValidators, ValidatorWithTimeout }; }

💡 Why This Fix Works

The vulnerable code uses nested quantifiers and complex patterns that can cause exponential backtracking. The secure version uses simple patterns, length limits, and alternative validation methods.

import re import time # VULNERABLE: Nested quantifiers in email validation def validate_email_vulnerable(email): # Problematic pattern with nested quantifiers pattern = r'^([a-zA-Z0-9_.+-]+)+@([a-zA-Z0-9-]+)+\.([a-zA-Z0-9-.]+)+$' return re.match(pattern, email) is not None # VULNERABLE: Complex pattern for parsing user input def parse_user_data_vulnerable(data): # Catastrophic backtracking in complex parsing patterns = { 'name': r'^([A-Za-z]+\s*)+$', # Nested quantifiers 'address': r'^([0-9A-Za-z\s,.-]+)*$', # Overlapping patterns 'comment': r'^(.|\n)*$' # Dangerous .* pattern } results = {} for field, pattern in patterns.items(): if field in data: results[field] = re.match(pattern, data[field]) is not None else: results[field] = False return results # VULNERABLE: Log parsing with complex regex def parse_log_entries_vulnerable(log_content): # Complex pattern prone to catastrophic backtracking log_pattern = r'(\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2})\s+([A-Z]+)\s+(.+)\s*' matches = [] for line in log_content.split('\n'): match = re.search(log_pattern, line) if match: matches.append({ 'timestamp': match.group(1), 'level': match.group(2), 'message': match.group(3) }) return matches # VULNERABLE: HTML tag extraction def extract_html_tags_vulnerable(html): # Nested quantifiers in HTML parsing tag_pattern = r'<([a-zA-Z][a-zA-Z0-9]*)(\s+[a-zA-Z][a-zA-Z0-9]*\s*=\s*(["\'][^"\']*["\']|[^\s>]*))*\s*/?>' tags = [] for match in re.finditer(tag_pattern, html): tags.append(match.group(1)) return tags # VULNERABLE: Complex URL validation def validate_url_vulnerable(url): # Multiple overlapping alternatives url_pattern = r'^(https?://)?(www\.)?([a-zA-Z0-9-]+\.)+[a-zA-Z]{2,}(/.*)?$|^(ftp://)?(ftp\.)?([a-zA-Z0-9-]+\.)+[a-zA-Z]{2,}(/.*)?$' return re.match(url_pattern, url) is not None # Test with malicious inputs: # validate_email_vulnerable("a" * 50 + "@gmail.com") # Exponential time # parse_user_data_vulnerable({'name': "a " * 50 + "!"}) # extract_html_tags_vulnerable("
") # validate_url_vulnerable("http://" + "a." * 25 + "!")
import re import time import signal from contextlib import contextmanager from urllib.parse import urlparse from typing import Dict, List, Optional, Any import logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) class RegexTimeoutError(Exception): """Raised when regex operation times out""" pass @contextmanager def regex_timeout(seconds): """Context manager to timeout regex operations""" def timeout_handler(signum, frame): raise RegexTimeoutError("Regex operation timed out") # Set up the timeout old_handler = signal.signal(signal.SIGALRM, timeout_handler) signal.alarm(seconds) try: yield finally: signal.alarm(0) signal.signal(signal.SIGALRM, old_handler) class SafeRegexValidator: """Safe regex validation with timeout protection and simple patterns""" # Safe patterns without nested quantifiers PATTERNS = { 'email': r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$', 'name': r'^[A-Za-z\s]{1,100}$', # Simple pattern with length limit 'address': r'^[0-9A-Za-z\s,.#-]{1,200}$', # No nested quantifiers 'phone': r'^\+?[1-9]\d{6,14}$', 'url': r'^https?://[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}(?:/[^\s]*)?$', 'ipv4': r'^(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.(?:25[0-5]|2[0-4]\d|[01]?\d\d?)$', 'date': r'^\d{4}-(?:0[1-9]|1[0-2])-(?:0[1-9]|[12]\d|3[01])$' } def __init__(self, max_input_length=1000, timeout_seconds=1): self.max_input_length = max_input_length self.timeout_seconds = timeout_seconds self.compiled_patterns = {} # Pre-compile patterns for name, pattern in self.PATTERNS.items(): try: self.compiled_patterns[name] = re.compile(pattern) except re.error as e: logger.error(f"Failed to compile pattern {name}: {e}") def validate_input_length(self, text: str) -> bool: """Check if input length is within safe limits""" return text is not None and len(text) <= self.max_input_length def safe_regex_match(self, pattern_name: str, text: str) -> bool: """Safely execute regex with timeout protection""" if not self.validate_input_length(text): logger.warning(f"Input too long for pattern {pattern_name}: {len(text) if text else 0}") return False pattern = self.compiled_patterns.get(pattern_name) if not pattern: logger.error(f"Unknown pattern: {pattern_name}") return False try: with regex_timeout(self.timeout_seconds): start_time = time.time() result = pattern.match(text) is not None duration = time.time() - start_time if duration > 0.1: # Log slow operations logger.warning(f"Slow regex operation: {pattern_name} took {duration:.3f}s") return result except RegexTimeoutError: logger.error(f"Regex timeout for pattern {pattern_name} with input length {len(text)}") return False except Exception as e: logger.error(f"Regex error for pattern {pattern_name}: {e}") return False def validate_email(self, email: str) -> bool: """Safe email validation""" return self.safe_regex_match('email', email) def validate_url_safe(self, url: str) -> bool: """Safe URL validation using urlparse""" if not self.validate_input_length(url): return False try: parsed = urlparse(url) return (parsed.scheme in ['http', 'https'] and parsed.netloc and len(parsed.netloc) <= 253 and '.' in parsed.netloc) except Exception: return False def parse_user_data_safe(self, data: Dict[str, Any]) -> Dict[str, bool]: """Safe user data parsing with individual field validation""" results = {} # Validate each field individually with appropriate patterns field_patterns = { 'name': 'name', 'email': 'email', 'phone': 'phone', 'address': 'address' } for field, pattern_name in field_patterns.items(): if field in data: value = str(data[field]) if data[field] is not None else '' results[field] = self.safe_regex_match(pattern_name, value) else: results[field] = False return results def parse_log_entries_safe(self, log_content: str) -> List[Dict[str, str]]: """Safe log parsing with simple string operations""" if not self.validate_input_length(log_content): logger.warning("Log content too long for processing") return [] matches = [] lines = log_content.split('\n')[:1000] # Limit number of lines for line in lines: if not line.strip(): continue # Use simple string operations instead of complex regex parts = line.split(None, 2) # Split on whitespace, max 3 parts if len(parts) >= 3: timestamp_part = parts[0] + ' ' + parts[1] if len(parts[0]) == 10 else parts[0] level_part = parts[1] if len(parts[0]) == 10 else parts[1] message_part = parts[2] if len(parts[0]) == 10 else ' '.join(parts[2:]) # Simple validation instead of complex regex if self.safe_regex_match('date', timestamp_part.split()[0]): matches.append({ 'timestamp': timestamp_part, 'level': level_part, 'message': message_part[:500] # Limit message length }) return matches def extract_html_tags_safe(self, html: str) -> List[str]: """Safe HTML tag extraction using simple parsing""" if not self.validate_input_length(html): return [] tags = [] in_tag = False current_tag = '' # Simple state machine instead of complex regex for i, char in enumerate(html): if i > 10000: # Additional safety limit break if char == '<': in_tag = True current_tag = '' elif char == '>' and in_tag: in_tag = False # Extract tag name (first word) tag_name = current_tag.split()[0] if current_tag else '' if tag_name and tag_name.isalpha(): tags.append(tag_name) elif in_tag: current_tag += char # Prevent excessively long tags if len(current_tag) > 100: in_tag = False current_tag = '' return tags[:100] # Limit number of tags returned # Performance testing utilities def test_regex_performance(validator: SafeRegexValidator): """Test regex patterns for potential ReDoS vulnerabilities""" test_cases = { 'email': [ 'a' * 50 + '@gmail.com', 'test@' + 'a' * 50 + '.com', 'a@b.c' + 'a' * 50 ], 'name': [ 'a ' * 100, 'John ' * 50 + '!', 'A' * 200 ], 'url': [ 'http://' + 'a.' * 50 + 'com', 'https://example.com/' + 'a' * 100, 'http://sub.' * 25 + 'example.com' ] } for pattern_name, test_inputs in test_cases.items(): print(f"\nTesting pattern: {pattern_name}") for test_input in test_inputs: start_time = time.time() try: result = validator.safe_regex_match(pattern_name, test_input) duration = time.time() - start_time print(f" Input length {len(test_input)}: {duration:.4f}s - {'PASS' if result else 'FAIL'}") if duration > 0.1: print(f" WARNING: Slow operation detected!") except Exception as e: print(f" Input length {len(test_input)}: ERROR - {e}") # Usage examples def main(): validator = SafeRegexValidator(max_input_length=1000, timeout_seconds=1) # Test email validation test_emails = [ 'user@example.com', 'a' * 50 + '@gmail.com', # Potential ReDoS input 'invalid-email' ] for email in test_emails: result = validator.validate_email(email) print(f"Email '{email[:30]}...': {result}") # Test URL validation test_urls = [ 'https://example.com', 'http://' + 'a.' * 50 + 'com', # Potential ReDoS input 'invalid-url' ] for url in test_urls: result = validator.validate_url_safe(url) print(f"URL '{url[:30]}...': {result}") # Performance testing test_regex_performance(validator) if __name__ == '__main__': main()

💡 Why This Fix Works

The vulnerable Python code uses nested quantifiers and complex patterns prone to catastrophic backtracking. The secure version implements timeout protection, simple patterns, and alternative parsing methods.

import java.util.regex.Pattern; import java.util.regex.Matcher; import java.util.*; public class VulnerableValidator { // VULNERABLE: Nested quantifiers in email pattern private static final Pattern EMAIL_PATTERN = Pattern.compile( "^([a-zA-Z0-9_\\.-]+)+@([a-zA-Z0-9_\\.-]+)+\\.([a-zA-Z]{2,5})+$" ); // VULNERABLE: Complex alternation with overlapping patterns private static final Pattern PHONE_PATTERN = Pattern.compile( "^(\\+?1?[-\\s]?)?(\\(?[0-9]{3}\\)?[-\\s]?)?([0-9]{3}[-\\s]?[0-9]{4})|(\\+?[0-9]{1,3}[-\\s]?[0-9]{4,14})$" ); // VULNERABLE: Nested groups in URL validation private static final Pattern URL_PATTERN = Pattern.compile( "^(https?:\\/\\/)?(([a-zA-Z0-9_\\.-]+)+\\.([a-zA-Z]{2,5})+)(:[0-9]{1,5})?(\\/([a-zA-Z0-9_\\.-])*)*\\/?$" ); // VULNERABLE: Complex HTML parsing pattern private static final Pattern HTML_TAG_PATTERN = Pattern.compile( "<([a-zA-Z][a-zA-Z0-9]*)(\\s+[a-zA-Z][a-zA-Z0-9]*\\s*=\\s*([\"'][^\"']*[\"']|[^\\s>]*))*\\s*\\/?>" ); public boolean validateEmail(String email) { // No input validation or timeout protection return EMAIL_PATTERN.matcher(email).matches(); } public boolean validatePhone(String phone) { return PHONE_PATTERN.matcher(phone).matches(); } public boolean validateURL(String url) { return URL_PATTERN.matcher(url).matches(); } public List extractHtmlTags(String html) { List tags = new ArrayList<>(); Matcher matcher = HTML_TAG_PATTERN.matcher(html); while (matcher.find()) { tags.add(matcher.group(1)); } return tags; } // VULNERABLE: Batch validation without protection public Map validateBatch(Map data) { Map results = new HashMap<>(); for (Map.Entry entry : data.entrySet()) { String key = entry.getKey(); String value = entry.getValue(); switch (key) { case "email": results.put(key, validateEmail(value)); break; case "phone": results.put(key, validatePhone(value)); break; case "url": results.put(key, validateURL(value)); break; default: results.put(key, false); } } return results; } } // Malicious test inputs that cause ReDoS: // validateEmail("a".repeat(50) + "@gmail.com"); // validatePhone("+" + "1-".repeat(30) + "!"); // validateURL("http://" + "a.".repeat(25) + "!"); // extractHtmlTags("
");
import java.util.regex.Pattern; import java.util.regex.Matcher; import java.util.*; import java.util.concurrent.*; import java.net.URL; import java.net.MalformedURLException; import java.time.Duration; import java.time.Instant; import java.util.logging.Logger; public class SafeValidator { private static final Logger logger = Logger.getLogger(SafeValidator.class.getName()); // Safe patterns without nested quantifiers private static final Pattern SAFE_EMAIL_PATTERN = Pattern.compile( "^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$" ); private static final Pattern SAFE_PHONE_PATTERN = Pattern.compile( "^\\+?[1-9]\\d{6,14}$" ); private static final Pattern SAFE_IPV4_PATTERN = Pattern.compile( "^(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d?)\\.(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d?)\\.(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d?)\\.(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d?)$" ); private static final Pattern SAFE_DATE_PATTERN = Pattern.compile( "^\\d{4}-(?:0[1-9]|1[0-2])-(?:0[1-9]|[12]\\d|3[01])$" ); // Configuration private final int maxInputLength; private final long timeoutMillis; private final ExecutorService executor; public SafeValidator() { this(1000, 1000); // 1000 chars max, 1 second timeout } public SafeValidator(int maxInputLength, long timeoutMillis) { this.maxInputLength = maxInputLength; this.timeoutMillis = timeoutMillis; this.executor = Executors.newCachedThreadPool(); } private boolean validateInputLength(String input) { return input != null && input.length() <= maxInputLength; } private boolean executeRegexWithTimeout(Pattern pattern, String input) { if (!validateInputLength(input)) { logger.warning("Input rejected due to length: " + (input != null ? input.length() : "null")); return false; } Future future = executor.submit(() -> { Instant start = Instant.now(); try { boolean result = pattern.matcher(input).matches(); Duration duration = Duration.between(start, Instant.now()); if (duration.toMillis() > 100) { logger.warning(String.format( "Slow regex operation: %dms for pattern with input length %d", duration.toMillis(), input.length() )); } return result; } catch (Exception e) { logger.severe("Regex execution error: " + e.getMessage()); return false; } }); try { return future.get(timeoutMillis, TimeUnit.MILLISECONDS); } catch (TimeoutException e) { future.cancel(true); logger.severe(String.format( "Regex timeout detected - possible ReDoS attack. Input length: %d", input.length() )); return false; } catch (Exception e) { logger.severe("Regex validation error: " + e.getMessage()); return false; } } public boolean validateEmail(String email) { return executeRegexWithTimeout(SAFE_EMAIL_PATTERN, email); } public boolean validatePhone(String phone) { if (!validateInputLength(phone)) { return false; } // Clean phone number first String cleaned = phone.replaceAll("[\\s\\-\\(\\)\\.]", ""); return executeRegexWithTimeout(SAFE_PHONE_PATTERN, cleaned); } // Safe URL validation using URL class public boolean validateURL(String url) { if (!validateInputLength(url)) { return false; } try { URL urlObj = new URL(url); // Check protocol String protocol = urlObj.getProtocol(); if (!"http".equals(protocol) && !"https".equals(protocol)) { return false; } // Check host String host = urlObj.getHost(); return host != null && host.length() > 0 && host.length() <= 253 && host.contains("."); } catch (MalformedURLException e) { return false; } } // Safe HTML tag extraction using simple parsing public List extractHtmlTags(String html) { if (!validateInputLength(html)) { return new ArrayList<>(); } List tags = new ArrayList<>(); boolean inTag = false; StringBuilder currentTag = new StringBuilder(); // Simple state machine instead of complex regex for (int i = 0; i < html.length() && i < 10000; i++) { char c = html.charAt(i); if (c == '<') { inTag = true; currentTag.setLength(0); } else if (c == '>' && inTag) { inTag = false; String tagContent = currentTag.toString().trim(); if (!tagContent.isEmpty()) { // Extract tag name (first word) String[] parts = tagContent.split("\\s+"); if (parts.length > 0 && parts[0].matches("[a-zA-Z][a-zA-Z0-9]*")) { tags.add(parts[0]); } } } else if (inTag) { currentTag.append(c); // Prevent excessively long tags if (currentTag.length() > 100) { inTag = false; } } // Limit number of tags if (tags.size() >= 100) { break; } } return tags; } // Safe batch validation with monitoring public Map validateBatch(Map data) { Map results = new HashMap<>(); Instant start = Instant.now(); // Limit batch size if (data.size() > 100) { logger.warning("Batch size too large: " + data.size()); return results; } for (Map.Entry entry : data.entrySet()) { String key = entry.getKey(); String value = entry.getValue(); try { switch (key) { case "email": results.put(key, validateEmail(value)); break; case "phone": results.put(key, validatePhone(value)); break; case "url": results.put(key, validateURL(value)); break; case "ipv4": results.put(key, executeRegexWithTimeout(SAFE_IPV4_PATTERN, value)); break; case "date": results.put(key, executeRegexWithTimeout(SAFE_DATE_PATTERN, value)); break; default: results.put(key, false); } } catch (Exception e) { logger.severe("Validation error for key " + key + ": " + e.getMessage()); results.put(key, false); } } Duration duration = Duration.between(start, Instant.now()); if (duration.toMillis() > 5000) { logger.warning(String.format( "Slow batch validation: %dms for %d items", duration.toMillis(), data.size() )); } return results; } // Performance testing method public void testRegexPerformance() { String[] testInputs = { "a".repeat(50) + "@gmail.com", "+" + "1-".repeat(30) + "!", "http://" + "a.".repeat(25) + "!", "
" }; System.out.println("Testing regex performance:"); for (String input : testInputs) { Instant start = Instant.now(); try { // Test email pattern (most likely to be vulnerable) boolean result = validateEmail(input); Duration duration = Duration.between(start, Instant.now()); System.out.printf( "Input length %d: %dms - %s%n", input.length(), duration.toMillis(), result ? "VALID" : "INVALID" ); if (duration.toMillis() > 100) { System.out.println(" WARNING: Slow operation detected!"); } } catch (Exception e) { System.out.printf( "Input length %d: ERROR - %s%n", input.length(), e.getMessage() ); } } } public void shutdown() { executor.shutdown(); try { if (!executor.awaitTermination(5, TimeUnit.SECONDS)) { executor.shutdownNow(); } } catch (InterruptedException e) { executor.shutdownNow(); Thread.currentThread().interrupt(); } } // Example usage public static void main(String[] args) { SafeValidator validator = new SafeValidator(); try { // Test individual validations System.out.println("Email validation: " + validator.validateEmail("user@example.com")); System.out.println("Phone validation: " + validator.validatePhone("+1234567890")); System.out.println("URL validation: " + validator.validateURL("https://example.com")); // Test batch validation Map testData = new HashMap<>(); testData.put("email", "test@example.com"); testData.put("phone", "+1234567890"); testData.put("url", "https://example.com"); Map results = validator.validateBatch(testData); System.out.println("Batch results: " + results); // Performance testing validator.testRegexPerformance(); } finally { validator.shutdown(); } } }

💡 Why This Fix Works

The vulnerable Java code uses nested quantifiers and complex patterns without timeout protection. The secure version implements timeout mechanisms, simple patterns, and alternative validation methods like URL class.

Why it happens

The most common cause of ReDoS is using nested quantifiers or alternation groups that create exponential backtracking scenarios. Patterns like (a+)+ or (a|a)* can cause the regex engine to explore an exponential number of possibilities when matching fails.

Root causes

Nested Quantifiers and Catastrophic Backtracking

The most common cause of ReDoS is using nested quantifiers or alternation groups that create exponential backtracking scenarios. Patterns like (a+)+ or (a|a)* can cause the regex engine to explore an exponential number of possibilities when matching fails.

Preview example – JAVASCRIPT
// Vulnerable regex with nested quantifiers
const emailPattern = /^([a-zA-Z0-9_\.-]+)+@([a-zA-Z0-9_\.-]+)+\.([a-zA-Z]{2,5})+$/;
// Malicious input: "a" * 50 + "!"
// Causes exponential backtracking when @ is not found

Alternation with Overlapping Patterns

Regular expressions with alternation groups that have overlapping patterns can cause catastrophic backtracking. When the regex engine tries multiple paths that could match the same input, it exponentially increases processing time.

Preview example – PYTHON
# Vulnerable regex with overlapping alternation
import re

# Problem: (a|a)* can match 'a' in multiple ways
pattern = re.compile(r'^(a|a)*b$')
# Malicious input: "a" * 30 + "c"
# Engine tries 2^30 combinations before failing

Unanchored Quantifiers with Complex Patterns

Complex regular expressions without proper anchoring that use quantifiers can be exploited to cause excessive backtracking. This is especially problematic in patterns used for input validation or parsing user data.

Preview example – JAVASCRIPT
// Vulnerable: Complex unanchored pattern
const htmlPattern = /<([a-zA-Z][a-zA-Z0-9]*)(\s+[a-zA-Z][a-zA-Z0-9]*\s*=\s*(["'][^"']*["']|[^\s>]*))*\s*\/?>/;
// Malicious input: "<" + "a " * 50 + "!"
// Causes exponential backtracking in attribute matching

Recursive Patterns and Self-Referencing Groups

Regular expressions that use recursive patterns or self-referencing groups can create infinite loops or exponential processing times when given specific input patterns designed to exploit the recursion.

Preview example – JAVA
// Vulnerable recursive pattern (in languages that support it)
// Pattern for matching nested parentheses
String pattern = "\\(([^()]++|(?R))*+\\)";
// While this example uses possessive quantifiers, 
// similar non-possessive patterns can cause ReDoS

// Example vulnerable pattern:
String badPattern = "\\(([^()]|\\([^()]*\\))*\\)";
// Malicious: "(" + "a" * 50 + "!"
// Causes exponential backtracking

Fixes

1

Use Atomic Groups and Possessive Quantifiers

Replace vulnerable quantifiers with atomic groups (?>...) or possessive quantifiers (++, *+, ?+) that prevent backtracking. These constructs ensure the regex engine doesn't try alternative matching strategies once a path is chosen.

View implementation – JAVASCRIPT
// Before: Vulnerable nested quantifiers
const vulnerablePattern = /^([a-zA-Z0-9_\.-]+)+@([a-zA-Z0-9_\.-]+)+\.([a-zA-Z]{2,5})+$/;

// After: Using atomic groups to prevent backtracking
const safePattern = /^(?>[a-zA-Z0-9_\.-]+)@(?>[a-zA-Z0-9_\.-]+)\.(?>[a-zA-Z]{2,5})$/;

// Alternative: Simplified pattern without nested quantifiers
const simplifiedPattern = /^[a-zA-Z0-9_.\-]+@[a-zA-Z0-9_.\-]+\.[a-zA-Z]{2,5}$/;

// Function to safely validate email
function validateEmail(email) {
    // Length check first to prevent long input processing
    if (!email || email.length > 254) {
        return false;
    }
    
    // Use simple, safe pattern
    return simplifiedPattern.test(email);
}
2

Implement Input Length Limits and Timeouts

Set strict limits on input length before applying regular expressions and implement timeouts for regex operations. This prevents attackers from providing extremely long strings designed to exploit regex vulnerabilities.

View implementation – PYTHON
import re
import signal
from contextlib import contextmanager

class RegexTimeoutError(Exception):
    pass

@contextmanager
def regex_timeout(seconds):
    """Context manager to timeout regex operations"""
    def timeout_handler(signum, frame):
        raise RegexTimeoutError("Regex operation timed out")
    
    old_handler = signal.signal(signal.SIGALRM, timeout_handler)
    signal.alarm(seconds)
    try:
        yield
    finally:
        signal.alarm(0)
        signal.signal(signal.SIGALRM, old_handler)

def safe_regex_match(pattern, text, max_length=1000, timeout_seconds=1):
    """Safely execute regex with length limits and timeout"""
    
    # Input length validation
    if len(text) > max_length:
        raise ValueError(f"Input too long: {len(text)} > {max_length}")
    
    # Compile pattern with timeout protection
    try:
        compiled_pattern = re.compile(pattern)
    except re.error as e:
        raise ValueError(f"Invalid regex pattern: {e}")
    
    # Execute with timeout
    try:
        with regex_timeout(timeout_seconds):
            return compiled_pattern.match(text)
    except RegexTimeoutError:
        raise ValueError("Regex operation timed out - possible ReDoS attack")

# Example usage
def validate_input_safe(user_input):
    # Safe email pattern without nested quantifiers
    email_pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
    
    try:
        result = safe_regex_match(email_pattern, user_input, max_length=254, timeout_seconds=1)
        return result is not None
    except ValueError as e:
        print(f"Validation error: {e}")
        return False
3

Use Alternative String Processing Methods

For complex parsing tasks, consider using purpose-built parsers or simpler string methods instead of complex regular expressions. This eliminates ReDoS risks while often improving performance and maintainability.

View implementation – JAVASCRIPT
// Instead of complex regex for URL validation
function validateUrlSafe(url) {
    // Length check
    if (!url || url.length > 2000) {
        return false;
    }
    
    try {
        // Use built-in URL constructor for validation
        const urlObj = new URL(url);
        
        // Additional checks with simple string methods
        const allowedProtocols = ['http:', 'https:'];
        if (!allowedProtocols.includes(urlObj.protocol)) {
            return false;
        }
        
        // Check for reasonable hostname length
        if (urlObj.hostname.length > 253) {
            return false;
        }
        
        // Simple checks instead of complex regex
        return urlObj.hostname.includes('.') && 
               !urlObj.hostname.startsWith('.') && 
               !urlObj.hostname.endsWith('.');
               
    } catch (error) {
        return false;
    }
}

// Instead of regex for HTML tag parsing
function extractTagsSafe(html) {
    const tags = [];
    let inTag = false;
    let currentTag = '';
    
    // Simple state machine instead of regex
    for (let i = 0; i < html.length && i < 10000; i++) {  // Length limit
        const char = html[i];
        
        if (char === '<') {
            inTag = true;
            currentTag = '';
        } else if (char === '>' && inTag) {
            inTag = false;
            if (currentTag.trim()) {
                tags.push(currentTag.trim());
            }
        } else if (inTag) {
            currentTag += char;
            // Prevent excessively long tags
            if (currentTag.length > 100) {
                break;
            }
        }
    }
    
    return tags;
}

// Safe phone number validation without complex regex
function validatePhoneSafe(phone) {
    if (!phone || phone.length > 20) {
        return false;
    }
    
    // Remove common separators
    const cleaned = phone.replace(/[\s\-\(\)\.]/g, '');
    
    // Simple checks
    return /^\+?[0-9]{7,15}$/.test(cleaned);  // Simple, safe pattern
}
4

Implement Regex Security Testing and Monitoring

Test regular expressions for ReDoS vulnerabilities using automated tools and monitor regex performance in production. Implement alerting for regex operations that take longer than expected.

View implementation – JAVA
import java.util.regex.*;
import java.util.concurrent.*;
import java.time.Duration;
import java.time.Instant;
import java.util.logging.Logger;

public class SafeRegexValidator {
    private static final Logger logger = Logger.getLogger(SafeRegexValidator.class.getName());
    private static final int MAX_INPUT_LENGTH = 1000;
    private static final long REGEX_TIMEOUT_MS = 1000; // 1 second
    
    private final ExecutorService executor = Executors.newCachedThreadPool();
    
    public boolean validateWithTimeout(Pattern pattern, String input) {
        // Input length validation
        if (input == null || input.length() > MAX_INPUT_LENGTH) {
            logger.warning("Input rejected due to length: " + 
                          (input != null ? input.length() : "null"));
            return false;
        }
        
        // Create timeout-protected regex execution
        Future<Boolean> future = executor.submit(() -> {
            Instant start = Instant.now();
            try {
                boolean result = pattern.matcher(input).matches();
                
                Duration duration = Duration.between(start, Instant.now());
                if (duration.toMillis() > 100) { // Log slow operations
                    logger.warning(String.format(
                        "Slow regex operation: %dms for pattern %s with input length %d",
                        duration.toMillis(), pattern.pattern(), input.length()
                    ));
                }
                
                return result;
            } catch (Exception e) {
                logger.severe("Regex execution error: " + e.getMessage());
                return false;
            }
        });
        
        try {
            return future.get(REGEX_TIMEOUT_MS, TimeUnit.MILLISECONDS);
        } catch (TimeoutException e) {
            future.cancel(true);
            logger.severe(String.format(
                "Regex timeout detected - possible ReDoS attack. Pattern: %s, Input length: %d",
                pattern.pattern(), input.length()
            ));
            return false;
        } catch (Exception e) {
            logger.severe("Regex validation error: " + e.getMessage());
            return false;
        }
    }
    
    // Safe email validation with simplified pattern
    private static final Pattern SAFE_EMAIL_PATTERN = Pattern.compile(
        "^[a-zA-Z0-9._%+-]{1,64}@[a-zA-Z0-9.-]{1,63}\\.[a-zA-Z]{2,}$"
    );
    
    public boolean isValidEmail(String email) {
        return validateWithTimeout(SAFE_EMAIL_PATTERN, email);
    }
    
    // Regex security testing method
    public static void testRegexSecurity(String patternString) {
        try {
            Pattern pattern = Pattern.compile(patternString);
            
            // Test with potentially problematic inputs
            String[] testInputs = {
                "a".repeat(50) + "!",          // Long repetition
                "(a".repeat(25) + "!",         // Unmatched parentheses
                "a@".repeat(25) + "!",         // Repeated special chars
                "<".repeat(20) + "a b c",      // Tag-like patterns
                "http://" + "a".repeat(100)    // Long URLs
            };
            
            for (String testInput : testInputs) {
                Instant start = Instant.now();
                try {
                    pattern.matcher(testInput).matches();
                    Duration duration = Duration.between(start, Instant.now());
                    
                    if (duration.toMillis() > 100) {
                        System.err.printf(
                            "WARNING: Regex pattern may be vulnerable to ReDoS.\n" +
                            "Pattern: %s\n" +
                            "Test input length: %d\n" +
                            "Execution time: %dms\n\n",
                            patternString, testInput.length(), duration.toMillis()
                        );
                    }
                } catch (Exception e) {
                    System.err.printf(
                        "ERROR: Regex pattern failed during testing: %s\n",
                        e.getMessage()
                    );
                }
            }
            
        } catch (PatternSyntaxException e) {
            System.err.printf("Invalid regex pattern: %s\n", e.getMessage());
        }
    }
    
    public void shutdown() {
        executor.shutdown();
        try {
            if (!executor.awaitTermination(5, TimeUnit.SECONDS)) {
                executor.shutdownNow();
            }
        } catch (InterruptedException e) {
            executor.shutdownNow();
            Thread.currentThread().interrupt();
        }
    }
}
5

Use Pre-validated Regex Patterns and Libraries

Use well-tested regex patterns from security-focused libraries rather than writing custom complex expressions. Maintain a whitelist of approved patterns and regularly audit regex usage across the application.

View implementation – JAVASCRIPT
// Safe regex pattern library
class SafeRegexPatterns {
    // All patterns are tested for ReDoS vulnerabilities
    static patterns = {
        // Simple email validation (non-RFC compliant but safe)
        email: /^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/,
        
        // Phone number (international format)
        phone: /^\+?[1-9]\d{6,14}$/,
        
        // URL validation (basic)
        url: /^https?:\/\/[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}(\/[^\s]*)?$/,
        
        // Username (alphanumeric, underscore, hyphen)
        username: /^[a-zA-Z0-9_-]{3,20}$/,
        
        // IPv4 address
        ipv4: /^(?:(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(?:25[0-5]|2[0-4]\d|[01]?\d\d?)$/,
        
        // Credit card number (digits only, basic format)
        creditCard: /^\d{13,19}$/,
        
        // Date formats (YYYY-MM-DD)
        date: /^\d{4}-(?:0[1-9]|1[0-2])-(?:0[1-9]|[12]\d|3[01])$/,
        
        // Time format (HH:MM)
        time: /^(?:[01]\d|2[0-3]):[0-5]\d$/
    };
    
    static validate(type, input, maxLength = 100) {
        // Input length validation
        if (!input || typeof input !== 'string' || input.length > maxLength) {
            return false;
        }
        
        // Get pattern
        const pattern = this.patterns[type];
        if (!pattern) {
            throw new Error(`Unknown validation type: ${type}`);
        }
        
        try {
            return pattern.test(input);
        } catch (error) {
            console.error(`Regex validation error for type ${type}:`, error);
            return false;
        }
    }
    
    // Batch validation with performance monitoring
    static validateBatch(validations) {
        const start = Date.now();
        const results = {};
        
        for (const [key, { type, value, maxLength }] of Object.entries(validations)) {
            try {
                results[key] = this.validate(type, value, maxLength);
            } catch (error) {
                results[key] = false;
            }
        }
        
        const duration = Date.now() - start;
        if (duration > 100) {
            console.warn(`Slow batch validation: ${duration}ms for ${Object.keys(validations).length} items`);
        }
        
        return results;
    }
}

// Usage examples
function validateUserInput(userData) {
    const validations = {
        email: { type: 'email', value: userData.email, maxLength: 254 },
        username: { type: 'username', value: userData.username, maxLength: 20 },
        phone: { type: 'phone', value: userData.phone, maxLength: 20 }
    };
    
    return SafeRegexPatterns.validateBatch(validations);
}

// Alternative validation using external libraries
// Example with validator.js (which uses safe regex patterns)
const validator = require('validator');

function validateWithLibrary(input) {
    // Length limits first
    if (!input || input.length > 254) {
        return false;
    }
    
    // Use well-tested library functions
    return {
        isEmail: validator.isEmail(input),
        isURL: validator.isURL(input),
        isIP: validator.isIP(input),
        isUUID: validator.isUUID(input)
    };
}

Detect This Vulnerability in Your Code

Sourcery automatically identifies regular expression denial of service (redos) and many other security issues in your codebase.