Django Path Traversal via open() Function

High Risk Path Traversal
djangopythonpath-traversaldirectory-traversalfile-accessopen-function

What it is

The Django application uses the open() function with user-controlled file paths without proper validation, enabling path traversal attacks. Attackers can manipulate file paths to access files outside the intended directory structure using sequences like '../' to traverse up the directory tree and access sensitive system files, configuration data, or other restricted content.

# Vulnerable: open() with user input in Django from django.http import HttpResponse, JsonResponse from django.views import View from django.conf import settings import os # Dangerous: Direct file opening with user input class FileViewerView(View): def get(self, request, filename): # CRITICAL: User controls filename, can use ../ file_path = os.path.join(settings.MEDIA_ROOT, 'documents', filename) try: with open(file_path, 'r') as f: content = f.read() return HttpResponse(content, content_type='text/plain') except FileNotFoundError: return JsonResponse({'error': 'File not found'}, status=404) # Another vulnerable pattern def read_user_file(request): user_dir = request.GET.get('user_dir', '') filename = request.GET.get('file', '') # Dangerous: Multiple path components from user input file_path = f'/var/data/{user_dir}/{filename}' try: with open(file_path, 'r') as f: data = f.read() return HttpResponse(data) except Exception as e: return HttpResponse(f'Error: {e}', status=500) # Configuration file access def load_config_file(request): config_name = request.POST.get('config', '') environment = request.POST.get('env', 'production') # Dangerous: User-controlled config path config_path = f'/etc/myapp/{environment}/{config_name}.conf' try: with open(config_path, 'r') as f: config_content = f.read() return JsonResponse({'config': config_content}) except Exception as e: return JsonResponse({'error': str(e)}) # Log file reading def read_log_entries(request): log_type = request.GET.get('type', '') date = request.GET.get('date', '') # Dangerous: Path traversal in log access log_path = f'/var/log/myapp/{log_type}-{date}.log' try: with open(log_path, 'r') as f: log_entries = f.readlines() return JsonResponse({'entries': log_entries}) except Exception as e: return JsonResponse({'error': str(e)}) # Template file reading def get_template_content(request): template_path = request.GET.get('template', '') # Dangerous: Direct template path from user full_path = os.path.join(settings.BASE_DIR, 'templates', template_path) try: with open(full_path, 'r') as f: template_data = f.read() return HttpResponse(template_data, content_type='text/html') except Exception as e: return HttpResponse('Template error', status=500)
# Secure: Safe file handling in Django from django.http import HttpResponse, JsonResponse from django.views import View from django.conf import settings from django.core.exceptions import ValidationError from pathlib import Path import os import re # Safe: Validated file viewer class SafeFileViewerView(View): def get(self, request, filename): try: # Validate filename validated_filename = self.validate_filename(filename) # Get safe file path safe_path = self.get_safe_file_path(validated_filename) # Read file securely content = self.read_file_safely(safe_path) return HttpResponse(content, content_type='text/plain') except ValidationError as e: return JsonResponse({'error': str(e)}, status=400) def validate_filename(self, filename): # Check filename format if not filename or len(filename) > 100: raise ValidationError('Invalid filename length') # Only allow safe characters if not re.match(r'^[a-zA-Z0-9._-]+$', filename): raise ValidationError('Filename contains invalid characters') # Prevent hidden files and traversal if filename.startswith('.') or '..' in filename: raise ValidationError('Invalid filename format') # Check file extension allowed_extensions = ['.txt', '.md', '.json', '.csv'] if not any(filename.lower().endswith(ext) for ext in allowed_extensions): raise ValidationError('File type not allowed') return filename def get_safe_file_path(self, filename): # Define safe base directory documents_dir = Path(settings.MEDIA_ROOT) / 'documents' # Construct absolute path file_path = documents_dir / filename # Resolve path and validate it's within documents directory try: resolved_path = file_path.resolve() documents_dir_resolved = documents_dir.resolve() # Ensure file is within the documents directory resolved_path.relative_to(documents_dir_resolved) return resolved_path except ValueError: raise ValidationError('File path outside allowed directory') def read_file_safely(self, file_path): try: if not file_path.exists(): raise ValidationError('File not found') # Check file size max_size = 1024 * 1024 # 1MB if file_path.stat().st_size > max_size: raise ValidationError('File too large') # Read file with open(file_path, 'r', encoding='utf-8') as f: return f.read() except UnicodeDecodeError: raise ValidationError('File encoding error') except PermissionError: raise ValidationError('Access denied') except Exception: raise ValidationError('File read error') # Safe: User file access with validation def safe_read_user_file(request): user_id = request.GET.get('user_id', '') filename = request.GET.get('file', '') try: # Validate user access validated_user_id = validate_user_access(request, user_id) # Validate filename validated_filename = validate_user_filename(filename) # Get safe file path file_path = get_safe_user_file_path(validated_user_id, validated_filename) # Read file content = read_user_file_safely(file_path) return HttpResponse(content) except ValidationError as e: return JsonResponse({'error': str(e)}, status=400) def validate_user_access(request, user_id): # Validate user ID format if not user_id.isdigit(): raise ValidationError('Invalid user ID') user_id = int(user_id) # Check access permissions if not request.user.is_authenticated: raise ValidationError('Authentication required') if request.user.id != user_id and not request.user.is_staff: raise ValidationError('Access denied') return user_id def validate_user_filename(filename): if not filename or len(filename) > 50: raise ValidationError('Invalid filename') # Only allow safe characters if not re.match(r'^[a-zA-Z0-9._-]+$', filename): raise ValidationError('Filename contains invalid characters') # Prevent directory traversal if '..' in filename or '/' in filename: raise ValidationError('Invalid file path') return filename def get_safe_user_file_path(user_id, filename): # Define safe base directory user_data_dir = Path(settings.MEDIA_ROOT) / 'user_data' / str(user_id) # Construct file path file_path = user_data_dir / filename # Validate path is within user directory try: resolved_path = file_path.resolve() user_data_dir_resolved = user_data_dir.resolve() resolved_path.relative_to(user_data_dir_resolved) return resolved_path except ValueError: raise ValidationError('File path outside user directory') def read_user_file_safely(file_path): try: if not file_path.exists(): raise ValidationError('File not found') # Check file size max_size = 512 * 1024 # 512KB if file_path.stat().st_size > max_size: raise ValidationError('File too large') with open(file_path, 'r', encoding='utf-8') as f: return f.read() except UnicodeDecodeError: raise ValidationError('File encoding error') except Exception: raise ValidationError('File read error') # Safe: Configuration access with allowlists def safe_load_config_file(request): config_name = request.POST.get('config', '') environment = request.POST.get('env', '') try: # Validate inputs validated_config = validate_config_request(config_name, environment) # Load configuration safely config_content = load_config_safely(validated_config) return JsonResponse({'config': config_content}) except ValidationError as e: return JsonResponse({'error': str(e)}, status=400) def validate_config_request(config_name, environment): # Validate config name allowed_configs = ['database', 'cache', 'email', 'logging'] if config_name not in allowed_configs: raise ValidationError('Configuration not allowed') # Validate environment allowed_environments = ['development', 'staging', 'production'] if environment not in allowed_environments: raise ValidationError('Environment not allowed') return {'name': config_name, 'environment': environment} def load_config_safely(config_data): config_name = config_data['name'] environment = config_data['environment'] # Construct safe config path config_dir = Path(settings.BASE_DIR) / 'config' / environment config_path = config_dir / f'{config_name}.conf' # Validate path is within config directory try: resolved_path = config_path.resolve() config_dir_resolved = config_dir.resolve() resolved_path.relative_to(config_dir_resolved) except ValueError: raise ValidationError('Config path outside allowed directory') # Read configuration try: if not resolved_path.exists(): raise ValidationError('Configuration not found') with open(resolved_path, 'r', encoding='utf-8') as f: return f.read() except Exception: raise ValidationError('Configuration read error')

💡 Why This Fix Works

See fix suggestions for detailed explanation.

Why it happens

Django applications pass user-controlled data from request parameters, URL paths, or form inputs directly to Python's open() function without validation, enabling attackers to inject directory traversal sequences (../) that escape intended directory boundaries and access arbitrary files. The open(file_path, mode) function accepts any valid filesystem path without built-in security restrictions: open('../../../etc/passwd', 'r') successfully reads system files when the path resolves outside intended directories. Django file viewer endpoints: file_path = os.path.join(settings.MEDIA_ROOT, request.GET['filename']); content = open(file_path).read() allows filename = '../../etc/passwd' to traverse outside MEDIA_ROOT accessing sensitive system files. Document download views: with open(f'/var/docs/{user_id}/{doc_name}', 'r') as f: return HttpResponse(f.read()) vulnerable when doc_name contains traversal sequences like '../../../etc/shadow'. API endpoints reading configuration: config_path = f'/etc/app/{request.POST['config']}.conf'; open(config_path) enables config = '../../../var/log/auth.log' accessing unintended files. Template rendering from user paths: template_path = os.path.join(template_dir, user_template); open(template_path).read() allows user_template = '../../settings.py' reading application source code. Log file access: with open(f'/var/log/{log_type}-{date}.log') as f: logs = f.readlines() vulnerable through both log_type and date parameters containing traversal. Report generation: open(os.path.join(reports_dir, report_id, filename), 'rb') allows traversal through filename even with intermediate directories. Image or media serving: image_data = open(f'{media_root}/images/{category}/{image}', 'rb').read() exploitable through both category and image parameters. User profile data access: open(f'/data/users/{user_id}/profile.json') vulnerable when user_id contains path separators or traversal sequences.

Root causes

Using open() with Unvalidated User Input for File Paths in Django Views

Django applications pass user-controlled data from request parameters, URL paths, or form inputs directly to Python's open() function without validation, enabling attackers to inject directory traversal sequences (../) that escape intended directory boundaries and access arbitrary files. The open(file_path, mode) function accepts any valid filesystem path without built-in security restrictions: open('../../../etc/passwd', 'r') successfully reads system files when the path resolves outside intended directories. Django file viewer endpoints: file_path = os.path.join(settings.MEDIA_ROOT, request.GET['filename']); content = open(file_path).read() allows filename = '../../etc/passwd' to traverse outside MEDIA_ROOT accessing sensitive system files. Document download views: with open(f'/var/docs/{user_id}/{doc_name}', 'r') as f: return HttpResponse(f.read()) vulnerable when doc_name contains traversal sequences like '../../../etc/shadow'. API endpoints reading configuration: config_path = f'/etc/app/{request.POST['config']}.conf'; open(config_path) enables config = '../../../var/log/auth.log' accessing unintended files. Template rendering from user paths: template_path = os.path.join(template_dir, user_template); open(template_path).read() allows user_template = '../../settings.py' reading application source code. Log file access: with open(f'/var/log/{log_type}-{date}.log') as f: logs = f.readlines() vulnerable through both log_type and date parameters containing traversal. Report generation: open(os.path.join(reports_dir, report_id, filename), 'rb') allows traversal through filename even with intermediate directories. Image or media serving: image_data = open(f'{media_root}/images/{category}/{image}', 'rb').read() exploitable through both category and image parameters. User profile data access: open(f'/data/users/{user_id}/profile.json') vulnerable when user_id contains path separators or traversal sequences.

Missing Normalization of File Paths Before Validation and open() Calls

Django applications validate user-provided file paths without first normalizing them to canonical form, allowing attackers to bypass validation using encoded sequences, mixed separators, symbolic links, or Unicode variations that expand after validation but before the open() call processes them. Path validation checking for '..' sequences: if '..' not in user_path: open(file_path) fails when attackers use URL-encoded forms like '%2e%2e%2f' which decode to '../' after validation, or Unicode fullwidth periods (U+FF0E) that normalize to '.' enabling traversal. Case sensitivity bypass on case-insensitive filesystems: validation blocking '../' but Windows filesystem treating '../' and '..\' identically allows traversal through mixed separators that pass string matching but resolve equivalently during file access. Symbolic link exploitation: validation confirms path within /var/www/uploads/ but attacker creates symlink /var/www/uploads/link pointing to /etc/ then requests link/../passwd where link resolves after validation enabling traversal post-check. Double encoding bypass: validation decoding once but path processed multiple times: '%252e%252e%252f' decodes to '%2e%2e%2f' after first decode passing validation, then to '../' during final path resolution in open() call. Environment variable or special character expansion: validation checking literal paths but open() processing ~, $HOME, or shell expansions that resolve to different paths after validation. Relative vs absolute path confusion: validation assuming relative paths but open('/absolute/path/from/user') accepting absolute paths that bypass base directory restrictions entirely. Unicode normalization issues: validation checking ASCII '..' but accepting Unicode decomposed forms or combining characters that normalize to '..' after validation: 'ˑˑ/' (U+02D1) appearing as periods. Windows short name bypass: validation checking full paths but Windows accepting 8.3 short names like 'PROGRA~1' that resolve to 'Program Files' after validation. Null byte injection in older Python versions: path.endswith('.txt') validation bypassed by path/../secret.conf%00.txt where null byte truncates extension after validation but before file system access. Path separator normalization performed incorrectly: replacing '\' with '/' but not checking result for traversal allows '\..\/secret' to become '/../secret' after normalization.

Insufficient Filtering of Directory Traversal Sequences Before open() Calls

Django applications attempt to filter directory traversal patterns from user input using simple string replacement or incomplete regular expressions before passing paths to open(), but attackers bypass these filters through encoding, repetition, or alternative traversal techniques that survive validation. String replacement approaches: safe_path = user_path.replace('..', ''); open(safe_path) fails when attackers use '..../' where removal of '..' leaves '../', or nested patterns like '.../...//' where single-pass replacement misses embedded traversal. Regex filtering with incomplete patterns: if re.search(r'\.\./|\.\.\', path): raise Error followed by open(path) fails when attackers use URL-encoded '%2e%2e/', Windows alternative separators, or case variations that don't match the regex pattern. Blacklist validation attempting to block common traversal: BLOCKED = ['../', '..\', '%2e%2e']; if any(b in path for b in BLOCKED): raise Error before open() remains incomplete as attackers find variations like double-encoding '%252e', Unicode fullwidth characters, or filesystem-specific patterns. Path separator normalization without traversal removal: normalized = path.replace('\', '/') before validation and open() normalizes separators but doesn't remove traversal sequences allowing clean '../../../etc/passwd' through normalization. Single-pass filtering applied once without recursion: safe = path.replace('../', '') then open(safe) allows nested '....//....//' to become '../' after one replacement pass. Validation of filename component only without full path checking: if '..' in filename: raise Error but open(os.path.join(base, user_dir, filename)) where user_dir contains unvalidated traversal. Extension-based validation assuming safety: if path.endswith('.txt'): open(path) fails when path = '../../../etc/passwd.txt' having correct extension but traversing directories. Filter evasion through null bytes in older Python: filter removes '../' but attacker uses '..%00/' where null byte breaks filter matching but gets processed during file system access. Case-insensitive filesystem bypass: validation case-sensitive for '../' but Windows treating variations identically during open() call. Incomplete Unicode handling: filter checking ASCII patterns but accepting Unicode variations U+FF0E (fullwidth period), U+2215 (division slash), or decomposed forms that normalize to traversal sequences when open() processes the path.

Trusting User-Provided Relative Paths Without Verification Before open() Calls

Django applications accept user-provided relative file paths assuming they remain within intended directories when passed to open(), but the function's behavior with absolute paths, special directories, and filesystem features allows escaping base directories without using explicit '..' sequences. Absolute path injection: open(os.path.join('/var/www/uploads', user_path)) vulnerable when user_path = '/etc/passwd' because os.path.join() discards the base path when the second argument is absolute, resulting in open('/etc/passwd') ignoring upload directory restrictions. Root directory reference: user_path = '/' makes os.path.join(base, user_path) return '/' then open('/') or open('/etc/passwd') accesses root filesystem. UNC path injection on Windows: user_path = '\\\\network\\share\\file' causes open() to access network paths bypassing local directory restrictions and potentially accessing SMB shares or network resources. Drive letter injection on Windows: user_path = 'C:\\Windows\\System32\\config\\SAM' provides absolute path with drive letter that open() processes directly ignoring base directory. Empty base directory handling: open(os.path.join('', user_path)) returns user_path unchanged giving complete path control. User-controlled base and filename: open(os.path.join(user_base, user_filename)) gives complete filesystem access when both components come from user input. Relative paths with multiple levels appearing legitimate: user_path = 'subdirectory/../../sensitive.txt' combines valid subdirectory reference with traversal that casual validation might miss. Symlink following during path resolution: user provides path to symlink that itself points outside allowed directory, open() follows symlink after path construction enabling indirect traversal. Home directory expansion: older implementations or wrapper functions expanding ~ in paths allowing user_path = '~root/.ssh/id_rsa' accessing other users' home directories. Current directory manipulation: applications changing working directory based on user input then using relative paths with open() where changing directory context enables access to different filesystem locations. Mount point confusion: user provides paths to filesystem mount points or special directories that appear within allowed paths but actually reference different filesystems or devices. Special device files: user_path pointing to /dev/random, /dev/null, or other device files causing denial of service or unexpected behavior when opened. Process filesystem access on Linux: user_path = '/proc/self/environ' or '/proc/self/cmdline' enabling information disclosure about application environment when passed to open().

Concatenating User Input Directly into File Paths for open() Calls Without Safe Path Construction

Django applications construct file paths using string concatenation, f-strings, or format operations that directly embed user input, then pass these paths to open() without using safe path joining or validation, enabling trivial directory traversal. F-string path construction: file_path = f'{base_dir}/{user_filename}'; open(file_path) allows user_filename = '../../../etc/passwd' to traverse directories through direct string interpolation. String concatenation patterns: path = upload_dir + '/' + request.GET['file']; open(path, 'r') concatenates user input directly enabling traversal through any path separators or sequences in the input. Format string operations: file_path = '{}/{}/{}'.format(base, category, filename); open(file_path) vulnerable when any format parameter contains traversal or absolute paths. Multi-level concatenation: path = base + '/' + user_category + '/' + user_subcategory + '/' + user_file; open(path) creates multiple injection points where each component could contain traversal. Direct URL path usage: file_path = settings.MEDIA_ROOT + request.path; open(file_path) uses URL path directly allowing HTTP request to /../../etc/passwd accessing system files. Template-based path building: template_path = template_dir + '/' + request.POST['template'] + '.html'; open(template_path) concatenates template name enabling template = '../../../settings' to read configuration. API version paths: api_file = f'/api/{version}/{endpoint}/data.json'; open(api_file) where version comes from user allows version = '../../../var/log' accessing unintended files. User profile paths: profile_path = '/data/profiles/' + str(user_id) + '/' + profile_field + '.txt'; open(profile_path) exploitable through profile_field parameter. Log file paths: log_path = '/var/log/' + app_name + '/' + log_type + '-' + date + '.log'; open(log_path) vulnerable through multiple concatenated user parameters. Configuration file construction: config = base_config_dir + environment + '/' + config_name + '.conf'; open(config) allows traversal through environment or config_name. Database export paths: export_file = export_dir + table_name + '_' + timestamp + '.csv'; open(export_file, 'w') vulnerable when table_name contains path separators. Backup file access: backup = '/backups/' + backup_id + '/' + requested_file; open(backup) exploitable through both backup_id and requested_file parameters containing traversal sequences. Report generation paths: report_path = reports_base + '/' + report_type + '/' + company_id + '/' + report_name; open(report_path) creates multiple injection opportunities across concatenated components.

Fixes

1

Validate and Sanitize All File Paths Before Using with open() Function

Implement comprehensive input validation for all file paths before passing them to Python's open() function, using strict allowlists for characters, length limits, format requirements, and file extension verification to prevent directory traversal attacks. Apply regular expressions enforcing alphanumeric-only filenames with limited special characters: re.match(r'^[a-zA-Z0-9._-]+$', filename) rejecting any input containing path separators (/, \), traversal sequences (..), null bytes (\x00), or special characters that enable bypass attacks before the open() call. Validate filename length: MAX_FILENAME_LENGTH = 255; if not filename or len(filename) > MAX_FILENAME_LENGTH: raise ValidationError ensuring inputs don't exceed filesystem limits or attempt buffer-related attacks. Reject hidden files and system files: if filename.startswith('.') or filename.startswith('_'): raise ValidationError preventing access to hidden configuration files or system resources. Check for Windows reserved names: RESERVED_NAMES = {'CON', 'PRN', 'AUX', 'NUL', 'COM1', 'COM2', 'LPT1'}; if filename.upper().split('.')[0] in RESERVED_NAMES: raise ValidationError avoiding device file access on Windows. For directory components from user input, map to predefined safe values: ALLOWED_DIRECTORIES = {'reports': 'monthly_reports', 'uploads': 'user_uploads', 'documents': 'public_docs'}; safe_dir = ALLOWED_DIRECTORIES.get(user_dir) preventing arbitrary directory traversal before constructing paths for open(). Validate file extensions against allowlists: ALLOWED_EXTENSIONS = {'.txt', '.pdf', '.jpg', '.png', '.csv', '.json'}; extension = Path(filename).suffix.lower(); if extension not in ALLOWED_EXTENSIONS: raise ValidationError('File type not allowed') before attempting file access. Check for encoded traversal sequences: decoded = urllib.parse.unquote(user_input); if any(pattern in decoded for pattern in ['..', '~', '\x00', '%00']): raise ValidationError catching URL-encoding and Unicode attacks. For numeric identifiers (user IDs, document IDs), parse and validate as integers: try: user_id = int(request.GET['user_id']); if not 1 <= user_id <= 999999: raise ValidationError except ValueError: raise ValidationError ensuring IDs can't contain path syntax. Create reusable validation functions: def validate_safe_filename(name): if not name or len(name) > 100: raise ValidationError; if not re.match(r'^[a-zA-Z0-9._-]+$', name): raise ValidationError; if '..' in name or name.startswith('.'): raise ValidationError; return name applying consistent checks across the application. Integrate validation at Django form/serializer level: class FileRequestForm(forms.Form): filename = forms.CharField(validators=[validate_safe_filename], max_length=100) catching malicious input early before it reaches open() calls. Log validation failures: logger.warning('Path traversal attempt detected', extra={'user': request.user, 'input': filename, 'ip': get_client_ip(request)}) enabling security monitoring and incident response. Never rely on blacklisting—use allowlists defining explicitly what IS permitted rather than attempting to block all dangerous patterns.

2

Use pathlib.Path.resolve() and Verify Resolved Path Stays Within Allowed Directories Before open()

Convert all file paths to absolute canonical form using pathlib.Path.resolve() or os.path.realpath() before passing them to open(), then verify the resolved path remains within allowed directory boundaries using Path.relative_to() to prevent traversal escaping base directories. After constructing paths with validated components, resolve them to eliminate symbolic links, relative references, and traversal sequences: from pathlib import Path; base_dir = Path(settings.MEDIA_ROOT).resolve(); file_path = (base_dir / validated_filename).resolve(); with open(file_path, 'r') as f: content = f.read() where resolve() expands symlinks and normalizes '..' before the open() call. Validate the resolved path stays within allowed directory: try: resolved_path = file_path.resolve(); base_dir_resolved = base_dir.resolve(); resolved_path.relative_to(base_dir_resolved); except ValueError: raise ValidationError('File path outside allowed directory') where relative_to() raises ValueError if resolved_path is not within base_dir catching traversal attempts. This prevents attacks constructing paths like /var/www/uploads/../../../etc/passwd that resolve to /etc/passwd failing the relative_to() check before open() accesses the file. For multiple allowed base directories, check against each: ALLOWED_BASES = [Path(settings.MEDIA_ROOT).resolve(), Path(settings.STATIC_ROOT).resolve()]; if not any(resolved_path.is_relative_to(base) for base in ALLOWED_BASES): raise ValidationError ensuring file access restricted to explicitly permitted directories. Handle symlink attacks by resolving before validation: attackers creating symlinks pointing outside allowed directories are detected when the resolved symlink target fails relative_to() verification before open() follows the link. Use strict=True in resolve(): resolved_path = file_path.resolve(strict=True) (Python 3.6+) raising FileNotFoundError if path doesn't exist, preventing TOCTOU races where attackers create malicious symlinks between validation and file access. For Django settings-based paths, resolve once at startup: UPLOAD_DIR_RESOLVED = Path(settings.MEDIA_ROOT).resolve() as module-level constant, then use in all validations: file_path = (UPLOAD_DIR_RESOLVED / filename).resolve(); file_path.relative_to(UPLOAD_DIR_RESOLVED); with open(file_path) as f: data = f.read() avoiding repeated resolution overhead. Create safe file access wrapper functions: def safe_open_file(base_dir, filename, mode='r'): base = Path(base_dir).resolve(); path = (base / filename).resolve(); try: path.relative_to(base); except ValueError: raise PermissionDenied('Invalid file path'); return open(path, mode) encapsulating validation logic and providing safe interface for all file operations. Check both parent directory and file: parent = file_path.parent.resolve(); parent.relative_to(base_dir_resolved); if not file_path.exists(): raise ValidationError ensuring parent within boundaries before checking file existence preventing information disclosure through existence checks. On Windows, handle case-insensitivity and alternative data streams: normalized_filename = filename.lower(); if ':' in normalized_filename: raise ValidationError preventing access to NTFS alternate data streams. Compare paths using Path methods: if not base_dir_resolved in resolved_path.parents: raise ValidationError providing additional validation layer. Use os.path.commonpath() for verification: if os.path.commonpath([str(base_dir_resolved), str(resolved_path)]) != str(base_dir_resolved): raise ValidationError ensuring paths share common base before open() call.

3

Implement Allowlists for Permitted File Names and Directories Accessed via open()

Define explicit allowlists of permitted filenames, file extensions, and directory paths that can be accessed with open(), rejecting any request that doesn't match allowlist entries rather than attempting to filter out dangerous patterns. For file reading operations, maintain dictionaries mapping safe identifiers to actual filenames: ALLOWED_FILES = {'report_2024': 'monthly_report_2024.pdf', 'user_guide': 'user_documentation.pdf', 'terms': 'terms_of_service.txt'}; actual_filename = ALLOWED_FILES.get(file_id); if not actual_filename: raise ValidationError; with open(os.path.join(base_dir, actual_filename), 'r') as f: content = f.read() where users request 'report_2024' and application maps to real filename preventing arbitrary file specification. For directory access, map user-facing categories to filesystem paths: ALLOWED_DIRS = {'user_uploads': Path(settings.MEDIA_ROOT) / 'uploads' / 'users', 'public_documents': Path(settings.MEDIA_ROOT) / 'documents' / 'public', 'temp_files': Path(settings.MEDIA_ROOT) / 'temp'}; safe_dir = ALLOWED_DIRS.get(category); if not safe_dir: raise ValidationError; with open(safe_dir / validated_filename) as f: data = f.read() rejecting unmapped categories. Implement file extension allowlists for uploads and access: ALLOWED_READ_EXTENSIONS = {'.txt', '.pdf', '.jpg', '.png', '.json', '.csv'}; extension = Path(filename).suffix.lower(); if extension not in ALLOWED_READ_EXTENSIONS: raise ValidationError('File type not allowed for reading') before open() call. For template or configuration access, enumerate specific allowed files: ALLOWED_TEMPLATES = {'email': ['welcome.html', 'password_reset.html', 'notification.html'], 'reports': ['monthly.html', 'quarterly.html', 'annual.html']}; if template_name not in ALLOWED_TEMPLATES.get(category, []): raise ValidationError; with open(template_path, 'r') as f: template = f.read() ensuring only predefined templates accessible. Use database-driven allowlists for dynamic file access: from django.db import models; class AllowedFile(models.Model): file_identifier = models.CharField(unique=True); actual_path = models.CharField(); user = models.ForeignKey(User); is_active = models.BooleanField(default=True); allowed_file = AllowedFile.objects.filter(file_identifier=requested_id, user=request.user, is_active=True).first(); if not allowed_file: raise PermissionDenied; with open(allowed_file.actual_path, 'r') as f: content = f.read() combining permissions and existence checking. For user-specific directories, construct paths from authenticated user IDs only: user_dir = UPLOAD_BASE / 'users' / str(request.user.id); validated_filename = validate_filename(filename); file_path = (user_dir / validated_filename).resolve(); with open(file_path, 'r') as f: data = f.read() where user ID comes from authenticated session not user input. Implement MIME type validation for opened files: import magic; mime = magic.Magic(mime=True); detected_mime = mime.from_file(str(file_path)); ALLOWED_MIMES = {'text/plain', 'application/pdf', 'image/jpeg', 'image/png'}; if detected_mime not in ALLOWED_MIMES: raise ValidationError checking actual file content not just extension before processing. Create Django management commands to maintain allowlists: python manage.py audit_allowed_files verifying all allowlist entries reference existing files, have correct permissions, and removing stale entries. Document allowlist maintenance: how administrators add new permitted files through secure admin interfaces: class AllowedFileAdmin(admin.ModelAdmin): list_display = ['file_identifier', 'actual_path', 'user', 'is_active']; list_filter = ['is_active', 'user']; search_fields = ['file_identifier', 'actual_path'] providing controlled access management. For public file access, maintain separate public file registry: public_files = PublicFile.objects.filter(is_public=True, file_identifier=requested_id).first(); if public_files: with open(public_files.file_path, 'rb') as f: return FileResponse(f) handling public and authenticated access differently.

4

Use Django's Secure File Handling APIs Instead of Direct open() Calls

Replace direct open() function calls with Django's built-in file storage APIs—FileSystemStorage, default_storage, and model FileField—which provide safe path handling, validation, access control abstractions, and automatic security features that prevent directory traversal. Use Django's default_storage for file operations: from django.core.files.storage import default_storage; if default_storage.exists(validated_filename): with default_storage.open(validated_filename, 'r') as f: content = f.read().decode('utf-8') where default_storage abstracts file access, validates paths internally, and prevents traversal through configuration-based location restrictions. Configure custom FileSystemStorage with explicit security boundaries: from django.core.files.storage import FileSystemStorage; secure_storage = FileSystemStorage(location=str(Path(settings.MEDIA_ROOT) / 'secure-uploads'), base_url='/secure-files/', file_permissions_mode=0o600, directory_permissions_mode=0o700) then use secure_storage.open(validated_filename, 'r') instead of open() where FileSystemStorage contains all access within configured location and sets secure permissions. For model-based file management, use FileField avoiding manual path handling: class Document(models.Model): title = models.CharField(max_length=200); file = models.FileField(upload_to='documents/%Y/%m/%d/', storage=secure_storage); uploaded_by = models.ForeignKey(User); uploaded_at = models.DateTimeField(auto_now_add=True) then access: document = Document.objects.get(id=doc_id, uploaded_by=request.user); with document.file.open('r') as f: content = f.read() where FileField provides path validation, access control through queryset filtering, and automatic file management. Implement custom storage backends for specialized security: class SecureStorage(FileSystemStorage): def get_valid_name(self, name): cleaned = super().get_valid_name(name); if '..' in cleaned or cleaned.startswith('.'): raise ValidationError('Invalid filename'); return cleaned; def get_available_name(self, name, max_length=None): name = re.sub(r'[^a-zA-Z0-9._-]', '', name); return super().get_available_name(name, max_length); def _save(self, name, content): if Path(name).suffix.lower() not in {'.txt', '.pdf', '.jpg'}: raise ValidationError('File type not allowed'); return super()._save(name, content) overriding validation methods for custom security policies. Use storage.path() to get filesystem paths only after validation: if default_storage.exists(validated_filename): filesystem_path = default_storage.path(validated_filename); with open(filesystem_path, 'r') as f: content = f.read() where storage backend validates filename safety before returning path. For serving files to users, use Django's FileResponse with storage: from django.http import FileResponse; file_obj = default_storage.open(validated_filename, 'rb'); response = FileResponse(file_obj, as_attachment=True, filename=safe_display_name); response['Content-Type'] = 'application/octet-stream'; return response handling content-type detection, safe headers, and proper file streaming. Implement file access views with permission checks: @login_required; def download_document(request, doc_id): document = get_object_or_404(Document, id=doc_id); if not request.user.has_perm('documents.view_document', document) and document.uploaded_by != request.user: raise PermissionDenied; return FileResponse(document.file.open('rb'), as_attachment=True) centralizing authorization before file access. Configure Django settings properly: MEDIA_ROOT = Path(BASE_DIR) / 'media'; MEDIA_URL = '/media/'; DEFAULT_FILE_STORAGE = 'django.core.files.storage.FileSystemStorage' ensuring consistent secure storage across application. For cloud storage, use storage backends: DEFAULT_FILE_STORAGE = 'storages.backends.s3boto3.S3Boto3Storage'; AWS_STORAGE_BUCKET_NAME = 'my-secure-bucket'; AWS_DEFAULT_ACL = 'private' with django-storages providing consistent API preventing local path traversal entirely. Create storage-based helper functions: def safe_read_file(filename, storage=default_storage): if not storage.exists(filename): raise FileNotFoundError; return storage.open(filename, 'r').read().decode('utf-8') encapsulating safe access patterns replacing direct open() calls throughout application.

5

Employ Indirect Object References Instead of Direct File Paths in open() Calls

Replace direct file path handling in open() calls with indirect object reference patterns where users provide opaque identifiers (UUIDs, database IDs, tokens) that the application maps to actual file paths, eliminating user control over paths passed to open() entirely. Implement database-backed file management: import uuid; class UploadedFile(models.Model): id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False); user = models.ForeignKey(User, on_delete=models.CASCADE); original_filename = models.CharField(max_length=255); storage_path = models.CharField(max_length=512); uploaded_at = models.DateTimeField(auto_now_add=True); file_size = models.IntegerField(); mime_type = models.CharField(max_length=100) where users receive UUID like '550e8400-e29b-41d4-a716-446655440000' instead of real filename. Access files via database lookup: file_record = UploadedFile.objects.get(id=uuid_from_request, user=request.user); with open(file_record.storage_path, 'rb') as f: content = f.read(); return FileResponse(f, filename=file_record.original_filename) where application controls paths passed to open() and enforces ownership through database query. For temporary file access, generate time-limited tokens: import secrets; from django.core.cache import cache; file_token = secrets.token_urlsafe(32); cache.set(f'file_access_{file_token}', {'file_id': str(file_obj.id), 'user_id': request.user.id}, timeout=3600); download_url = reverse('download_file', kwargs={'token': file_token}) then validate: token_data = cache.get(f'file_access_{token}'); if not token_data or token_data['user_id'] != request.user.id: raise PermissionDenied; file_obj = UploadedFile.objects.get(id=token_data['file_id']); with open(file_obj.storage_path, 'rb') as f: return FileResponse(f) providing secure temporary access without exposing paths. Implement signed URLs for downloads: from django.core.signing import TimestampSigner; signer = TimestampSigner(); signed_id = signer.sign(str(file_obj.id)); download_url = reverse('signed_download', kwargs={'signed_id': signed_id}) then verify: try: file_id = signer.unsign(signed_id, max_age=3600); except BadSignature: raise PermissionDenied; file_obj = UploadedFile.objects.get(id=file_id); with open(file_obj.storage_path, 'rb') as f: return FileResponse(f) preventing ID tampering. Use content-addressed storage with cryptographic hashes: import hashlib; file_content = uploaded_file.read(); file_hash = hashlib.sha256(file_content).hexdigest(); storage_subdir = file_hash[:2]; storage_path = Path(settings.SECURE_STORAGE) / storage_subdir / file_hash[2:4] / file_hash; storage_path.parent.mkdir(parents=True, exist_ok=True); with open(storage_path, 'wb') as f: f.write(file_content); FileRecord.objects.create(user_filename=uploaded_file.name, storage_hash=file_hash, storage_path=str(storage_path)) where users reference files by database ID, application maps to hash-based paths in open() calls. For user file organization, maintain virtual directory structures: class FileNode(models.Model): name = models.CharField(max_length=255); parent = models.ForeignKey('self', null=True, on_delete=models.CASCADE); user = models.ForeignKey(User, on_delete=models.CASCADE); actual_file = models.OneToOneField(UploadedFile, null=True, on_delete=models.CASCADE); node_type = models.CharField(choices=[('file', 'File'), ('folder', 'Folder')]) allowing user-friendly paths like 'My Documents/Reports/2024.pdf' mapped to secure storage paths in open(). Implement file access logging: class FileAccessLog(models.Model): file = models.ForeignKey(UploadedFile, on_delete=models.CASCADE); user = models.ForeignKey(User, on_delete=models.CASCADE); accessed_at = models.DateTimeField(auto_now_add=True); action = models.CharField(max_length=20); ip_address = models.GenericIPAddressField(); def log_file_access(file_obj, user, action, ip): FileAccessLog.objects.create(file=file_obj, user=user, action=action, ip_address=ip); with open(file_obj.storage_path, 'rb') as f: content = f.read() recording all access for audit trails. Create RESTful APIs using identifiers: @api_view(['GET']); def get_file_content(request, file_uuid): file_obj = get_object_or_404(UploadedFile, id=file_uuid, user=request.user); with open(file_obj.storage_path, 'r') as f: return Response({'content': f.read(), 'metadata': {'filename': file_obj.original_filename, 'size': file_obj.file_size}}) providing clean API without path exposure. Implement bulk operations: file_ids = request.data.get('file_ids', []); files = UploadedFile.objects.filter(id__in=file_ids, user=request.user); for file_obj in files: with open(file_obj.storage_path, 'rb') as f: archive.add(f, arcname=file_obj.original_filename) safely handling multiple files through IDs.

6

Normalize Paths and Check for Traversal Patterns Before Passing to open()

Apply comprehensive path normalization and traversal pattern detection to all user input before constructing file paths for open() calls, handling various encoding schemes, separator variations, Unicode obfuscation, and filesystem-specific patterns that might bypass simple validation. Normalize path separators to consistent format: normalized_path = user_path.replace('\\', '/') converting Windows backslashes to forward slashes (or vice versa) before validation and open() call preventing mixed separator bypass attacks. Decode all URL encoding before validation: from urllib.parse import unquote; decoded_once = unquote(user_path); decoded_twice = unquote(decoded_once) checking for double-encoding where '%252e%252e%252f' decodes to '%2e%2e%2f' then to '../' catching encoded traversal before open(). Normalize Unicode representations: import unicodedata; unicode_normalized = unicodedata.normalize('NFKC', user_path) converting Unicode lookalikes (U+FF0E fullwidth period, U+2215 division slash) and combining characters to standard ASCII forms before validation preventing Unicode obfuscation attacks. Check for common traversal patterns after normalization: TRAVERSAL_PATTERNS = [re.compile(r'\.\./'), re.compile(r'\.\\\\'), re.compile(r'%2e%2e[/%5c]', re.I), re.compile(r'%252e', re.I), re.compile(r'%c0%ae', re.I), re.compile(r'%e0%80%ae', re.I), re.compile(r'\.\.'), re.compile(r'\x00')]; if any(pattern.search(normalized_path) for pattern in TRAVERSAL_PATTERNS): raise ValidationError('Traversal sequence detected') before constructing paths for open(). Detect absolute path injection: if normalized_path.startswith('/') or (len(normalized_path) > 1 and normalized_path[1] == ':'): raise ValidationError('Absolute paths not allowed') catching Unix absolute paths and Windows drive letters before open() call. Check for null byte injection: if '\x00' in user_path or '%00' in user_path or '\u0000' in user_path: raise ValidationError('Null bytes not allowed') preventing truncation attacks on C-based file operations underlying open(). Apply os.path.normpath() and validate result: normalized = os.path.normpath(user_path); if normalized.startswith('..') or '/..' in normalized or '\\...' in normalized: raise ValidationError('Path traversal detected') catching remaining traversal sequences. For comprehensive defense, combine all normalization steps: def normalize_and_validate_path(user_input): if not user_input: raise ValidationError('Empty path'); decoded = unquote(unquote(user_input)); unicode_norm = unicodedata.normalize('NFKC', decoded); separator_norm = unicode_norm.replace('\\', '/'); path_norm = os.path.normpath(separator_norm); checks = [path_norm.startswith('..'), '/..' in path_norm, path_norm.startswith('/'), '\x00' in path_norm, any(pattern.search(path_norm) for pattern in TRAVERSAL_PATTERNS)]; if any(checks): raise ValidationError('Invalid path detected'); return path_norm; safe_path = normalize_and_validate_path(user_input); with open(os.path.join(base_dir, safe_path), 'r') as f: content = f.read() applying defense-in-depth. Check for Windows alternative data streams: if ':' in filename and not (len(filename) > 1 and filename[1] == ':'): raise ValidationError('Alternative data streams not allowed') preventing access to hidden NTFS streams before open(). Detect path equivalence using os.path.samefile() after construction: constructed_path = os.path.join(base_dir, validated_filename); if os.path.exists(constructed_path) and os.path.exists(sensitive_resource): try: if os.path.samefile(constructed_path, sensitive_resource): raise ValidationError('Access to resource not allowed'); except FileNotFoundError: pass; with open(constructed_path, 'r') as f: content = f.read() checking if constructed path points to protected resources through symlinks. Implement recursive normalization until stable: prev = user_path; iterations = 0; while iterations < 10: normalized = apply_normalizations(prev); if normalized == prev: break; prev = normalized; iterations += 1; if iterations >= 10: raise ValidationError('Normalization limit exceeded') catching nested encoding preventing infinite loops. Log normalization discrepancies for security monitoring: if normalized_path != original_user_input: logger.warning('Path normalization detected potential attack', extra={'user': request.user, 'original': original_user_input, 'normalized': normalized_path, 'ip': get_client_ip(request)}); final_path = os.path.join(base_dir, normalized_path); with open(final_path, 'r') as f: data = f.read() identifying attack attempts for incident response.

Detect This Vulnerability in Your Code

Sourcery automatically identifies django path traversal via open() function and many other security issues in your codebase.