Django Path Traversal via os.path.join()

High Risk Path Traversal
djangopythonpath-traversaldirectory-traversalfile-accessos-path-join

What it is

The Django application uses os.path.join() with user-controlled input without proper validation, enabling path traversal attacks. Attackers can manipulate file paths to access files outside the intended directory structure using sequences like '../' to traverse up the directory tree and access sensitive system files.

# Vulnerable: os.path.join() with user input in Django import os from django.http import HttpResponse, JsonResponse from django.views import View from django.conf import settings # Dangerous: Direct path joining with user input class FileDownloadView(View): def get(self, request, filename): # CRITICAL: User controls filename, can use ../ file_path = os.path.join(settings.MEDIA_ROOT, 'uploads', filename) try: with open(file_path, 'rb') as f: response = HttpResponse(f.read()) response['Content-Disposition'] = f'attachment; filename={filename}' return response except FileNotFoundError: return JsonResponse({'error': 'File not found'}, status=404) # Another vulnerable pattern def serve_user_file(request): user_id = request.GET.get('user_id', '') filename = request.GET.get('file', '') # Dangerous: Multiple path components from user input file_path = os.path.join(settings.MEDIA_ROOT, 'users', user_id, filename) if os.path.exists(file_path): with open(file_path, 'r') as f: content = f.read() return HttpResponse(content) return HttpResponse('File not found', status=404) # Template file access def load_custom_template(request): template_name = request.POST.get('template', '') template_dir = request.POST.get('directory', 'default') # Dangerous: User controls template path template_path = os.path.join(settings.TEMPLATES[0]['DIRS'][0], template_dir, template_name) try: with open(template_path, 'r') as f: template_content = f.read() return JsonResponse({'template': template_content}) except Exception as e: return JsonResponse({'error': str(e)}) # Log file access def view_log_file(request): log_name = request.GET.get('log', '') date = request.GET.get('date', '') # Dangerous: Path traversal in log access log_path = os.path.join('/var/log/myapp', date, f'{log_name}.log') try: with open(log_path, 'r') as f: log_content = f.read() return HttpResponse(log_content, content_type='text/plain') except Exception as e: return HttpResponse(f'Error: {e}', status=500) # Configuration file access def get_config_file(request): config_name = request.GET.get('config', '') environment = request.GET.get('env', 'production') # Dangerous: User-controlled config path config_path = os.path.join(settings.BASE_DIR, 'config', environment, f'{config_name}.conf') if os.path.exists(config_path): with open(config_path, 'r') as f: config_data = f.read() return JsonResponse({'config': config_data}) return JsonResponse({'error': 'Config not found'})
# Secure: Safe path handling in Django import os from pathlib import Path from django.http import HttpResponse, JsonResponse from django.views import View from django.conf import settings from django.core.exceptions import ValidationError from django.core.files.storage import default_storage import re # Safe: Validated file download class SafeFileDownloadView(View): def get(self, request, filename): try: # Validate filename validated_filename = self.validate_filename(filename) # Construct safe file path safe_path = self.get_safe_file_path(validated_filename) # Serve file securely return self.serve_file_safely(safe_path, validated_filename) except ValidationError as e: return JsonResponse({'error': str(e)}, status=400) def validate_filename(self, filename): # Check filename format if not filename or len(filename) > 255: raise ValidationError('Invalid filename length') # Only allow safe characters if not re.match(r'^[a-zA-Z0-9._-]+$', filename): raise ValidationError('Filename contains invalid characters') # Prevent hidden files if filename.startswith('.'): raise ValidationError('Hidden files not allowed') # Check file extension allowed_extensions = ['.txt', '.pdf', '.jpg', '.png', '.docx'] if not any(filename.lower().endswith(ext) for ext in allowed_extensions): raise ValidationError('File type not allowed') return filename def get_safe_file_path(self, filename): # Define safe base directory upload_dir = Path(settings.MEDIA_ROOT) / 'uploads' # Construct absolute path file_path = upload_dir / filename # Resolve path and validate it's within upload directory try: resolved_path = file_path.resolve() upload_dir_resolved = upload_dir.resolve() # Ensure file is within the upload directory resolved_path.relative_to(upload_dir_resolved) return resolved_path except ValueError: raise ValidationError('File path outside allowed directory') def serve_file_safely(self, file_path, filename): try: if not file_path.exists(): return JsonResponse({'error': 'File not found'}, status=404) # Check file size max_size = 10 * 1024 * 1024 # 10MB if file_path.stat().st_size > max_size: return JsonResponse({'error': 'File too large'}, status=413) # Serve file with open(file_path, 'rb') as f: response = HttpResponse(f.read()) response['Content-Disposition'] = f'attachment; filename="{filename}"' return response except PermissionError: return JsonResponse({'error': 'Access denied'}, status=403) except Exception: return JsonResponse({'error': 'File read error'}, status=500) # Safe: User file access with validation def safe_serve_user_file(request): user_id = request.GET.get('user_id', '') filename = request.GET.get('file', '') try: # Validate user access validated_user_id = validate_user_access(request, user_id) # Validate filename validated_filename = validate_user_filename(filename) # Get safe file path file_path = get_safe_user_file_path(validated_user_id, validated_filename) # Serve file return serve_user_file_safely(file_path) except ValidationError as e: return JsonResponse({'error': str(e)}, status=400) def validate_user_access(request, user_id): # Validate user ID format if not user_id.isdigit(): raise ValidationError('Invalid user ID') user_id = int(user_id) # Check access permissions if not request.user.is_authenticated: raise ValidationError('Authentication required') if request.user.id != user_id and not request.user.is_staff: raise ValidationError('Access denied') return user_id def validate_user_filename(filename): if not filename or len(filename) > 100: raise ValidationError('Invalid filename') # Only allow alphanumeric, dots, hyphens, underscores if not re.match(r'^[a-zA-Z0-9._-]+$', filename): raise ValidationError('Filename contains invalid characters') # Prevent directory traversal sequences if '..' in filename or '/' in filename or '\\' in filename: raise ValidationError('Invalid file path') return filename def get_safe_user_file_path(user_id, filename): # Define safe base directory users_dir = Path(settings.MEDIA_ROOT) / 'users' / str(user_id) # Construct file path file_path = users_dir / filename # Validate path is within user directory try: resolved_path = file_path.resolve() users_dir_resolved = users_dir.resolve() resolved_path.relative_to(users_dir_resolved) return resolved_path except ValueError: raise ValidationError('File path outside user directory') def serve_user_file_safely(file_path): try: if not file_path.exists(): return HttpResponse('File not found', status=404) # Read file with size limit max_size = 1024 * 1024 # 1MB if file_path.stat().st_size > max_size: return HttpResponse('File too large', status=413) with open(file_path, 'r', encoding='utf-8') as f: content = f.read() return HttpResponse(content) except UnicodeDecodeError: return HttpResponse('File encoding error', status=400) except Exception: return HttpResponse('File read error', status=500) # Safe: Template access with allowlists def safe_load_custom_template(request): template_name = request.POST.get('template', '') template_category = request.POST.get('category', '') try: # Validate inputs validated_template = validate_template_request(template_name, template_category) # Load template safely template_content = load_template_safely(validated_template) return JsonResponse({'template': template_content}) except ValidationError as e: return JsonResponse({'error': str(e)}, status=400) def validate_template_request(template_name, category): # Validate template name if not re.match(r'^[a-zA-Z0-9_-]+$', template_name): raise ValidationError('Invalid template name') # Validate category allowed_categories = ['emails', 'reports', 'notifications'] if category not in allowed_categories: raise ValidationError('Template category not allowed') # Use allowlist for template names allowed_templates = { 'emails': ['welcome', 'password_reset', 'confirmation'], 'reports': ['monthly', 'quarterly', 'annual'], 'notifications': ['alert', 'reminder', 'update'] } if template_name not in allowed_templates.get(category, []): raise ValidationError('Template not allowed') return {'name': template_name, 'category': category} def load_template_safely(template_data): template_name = template_data['name'] category = template_data['category'] # Construct safe template path template_dir = Path(settings.TEMPLATES[0]['DIRS'][0]) / 'custom' / category template_path = template_dir / f'{template_name}.html' # Validate path is within template directory try: resolved_path = template_path.resolve() template_dir_resolved = template_dir.resolve() resolved_path.relative_to(template_dir_resolved) except ValueError: raise ValidationError('Template path outside allowed directory') # Read template try: if not resolved_path.exists(): raise ValidationError('Template not found') with open(resolved_path, 'r', encoding='utf-8') as f: return f.read() except Exception: raise ValidationError('Template read error') # Safe: Log access with restrictions def safe_view_log_file(request): log_name = request.GET.get('log', '') try: # Validate log access if not request.user.is_staff: raise ValidationError('Access denied') # Validate log name validated_log = validate_log_request(log_name) # Get log content log_content = get_log_content_safely(validated_log) return HttpResponse(log_content, content_type='text/plain') except ValidationError as e: return HttpResponse(f'Error: {e}', status=400) def validate_log_request(log_name): # Only allow specific log names allowed_logs = ['application', 'error', 'access', 'security'] if log_name not in allowed_logs: raise ValidationError('Log file not allowed') return log_name def get_log_content_safely(log_name): # Define safe log directory log_dir = Path(settings.BASE_DIR) / 'logs' log_path = log_dir / f'{log_name}.log' # Validate path try: resolved_path = log_path.resolve() log_dir_resolved = log_dir.resolve() resolved_path.relative_to(log_dir_resolved) except ValueError: raise ValidationError('Log path outside allowed directory') # Read log with size limit try: if not resolved_path.exists(): raise ValidationError('Log file not found') max_size = 1024 * 1024 # 1MB if resolved_path.stat().st_size > max_size: # Read last 1MB with open(resolved_path, 'rb') as f: f.seek(-max_size, 2) content = f.read().decode('utf-8', errors='ignore') return '... (truncated)\n' + content with open(resolved_path, 'r', encoding='utf-8') as f: return f.read() except Exception: raise ValidationError('Log read error')

💡 Why This Fix Works

See fix suggestions for detailed explanation.

Why it happens

Django views construct file paths using os.path.join() with data from request parameters, URL paths, or form inputs without validation, enabling attackers to inject directory traversal sequences (../) that escape intended directory boundaries and access arbitrary files. The os.path.join(base_dir, user_input) function concatenates path components but doesn't prevent user_input from containing absolute paths or traversal sequences: os.path.join('/var/www', '../etc/passwd') results in '/var/etc/passwd' because join() replaces the base when user_input starts with '/'. Django file download endpoints: file_path = os.path.join(settings.MEDIA_ROOT, request.GET['filename']); open(file_path) allows filename = '../../etc/passwd' to traverse outside MEDIA_ROOT accessing system files. Template rendering or static file serving: os.path.join(template_dir, user_template_name) enables traversal through template paths to read application source code or configuration files. API endpoints accepting file identifiers: os.path.join(uploads_dir, file_id + '.pdf') vulnerable when file_id contains path separators or traversal sequences. User profile or avatar access: os.path.join(user_files_dir, str(user_id), filename) allows traversal even with intermediate directories through filename = '../../other_user/secret.txt'. Log file or report access: os.path.join(log_dir, date, log_name) enables traversal through both date and log_name parameters.

Root causes

Using os.path.join() with Unvalidated User Input from Django Requests

Django views construct file paths using os.path.join() with data from request parameters, URL paths, or form inputs without validation, enabling attackers to inject directory traversal sequences (../) that escape intended directory boundaries and access arbitrary files. The os.path.join(base_dir, user_input) function concatenates path components but doesn't prevent user_input from containing absolute paths or traversal sequences: os.path.join('/var/www', '../etc/passwd') results in '/var/etc/passwd' because join() replaces the base when user_input starts with '/'. Django file download endpoints: file_path = os.path.join(settings.MEDIA_ROOT, request.GET['filename']); open(file_path) allows filename = '../../etc/passwd' to traverse outside MEDIA_ROOT accessing system files. Template rendering or static file serving: os.path.join(template_dir, user_template_name) enables traversal through template paths to read application source code or configuration files. API endpoints accepting file identifiers: os.path.join(uploads_dir, file_id + '.pdf') vulnerable when file_id contains path separators or traversal sequences. User profile or avatar access: os.path.join(user_files_dir, str(user_id), filename) allows traversal even with intermediate directories through filename = '../../other_user/secret.txt'. Log file or report access: os.path.join(log_dir, date, log_name) enables traversal through both date and log_name parameters.

Missing Normalization of File Paths Before Validation

Django applications validate user-provided paths without first normalizing them to canonical form, allowing attackers to bypass validation using encoded sequences, mixed separators, or symbolic links that expand after validation but before file access. Path validation checking for '..' sequences: if '..' not in user_path: os.path.join(base, user_path) fails when attackers use encoded forms like '%2e%2e%2f', URL-encoded '../', or Unicode variations that decode or normalize after validation. Case sensitivity bypass on case-insensitive filesystems: validation blocking '../' but filesystem treating '../' and '..\' identically on Windows allows traversal through case variation. Symbolic link exploitation: validation confirms path within allowed directory, but path contains symlinks pointing outside: /allowed/link/../secret where 'link' symlinks to /sensitive enables traversal post-validation. Mixed path separator usage: validation checking for '/' but attackers using '\' on systems accepting both separators, or vice versa. Double encoding bypass: validation decoding once but application decoding again: '%252e%252e%252f' decodes to '%2e%2e%2f' after first decode, then to '../' after second. Validation sequence order issues: checking traversal patterns before decoding URL encoding allows encoded traversal to pass validation then decode during path construction. Relative vs absolute path confusion: os.path.join() treats absolute paths specially, validation assuming relative paths but os.path.join('/base', '/absolute/path/from/user') returns '/absolute/path/from/user' ignoring base entirely.

Insufficient Filtering of Directory Traversal Sequences

Django applications attempt to filter directory traversal patterns using simple string replacement or incomplete regular expressions that attackers bypass through encoding, repetition, or alternative traversal techniques. String replacement approaches: safe_path = user_path.replace('..', ''); os.path.join(base, safe_path) fails when attackers use '..../' where removal of '..' leaves '../', or '....//' where one replacement misses nested patterns. Regex filtering with incomplete patterns: if re.search(r'\.\.', path): raise Error fails when attackers use '%2e%2e/', '..\\', or other encoded/alternative forms. Blacklist validation attempting to block traversal: BLOCKED = ['../', '..\\', '%2e%2e']; if any(b in path for b in BLOCKED): raise Error remains incomplete as attackers find unlisted variations. Path separator normalization without traversal checking: path.replace('\\', '/') normalizes separators but doesn't remove traversal allowing normalized '../../../etc/passwd'. Single-pass filtering: applying replace() or regex once but not recursively allows nested patterns to survive: '.../...//' becomes '..//' after one pass of removing '../'. Validation of filename without full path validation: checking filename for '..' but joining with other user-controlled components: os.path.join(base, user_dir, checked_filename) where user_dir contains traversal. Case-insensitive filesystem bypass: validation case-sensitive for '../' but Windows treating 'dotdot' variations identically.

Trusting User-Provided Relative Paths Without Verification

Django applications accept user-provided relative file paths assuming they remain within intended directories when os.path.join() behavior with leading slashes or specific path constructions allows escaping base directories without using '..' sequences. Absolute path injection: os.path.join('/var/www/uploads', user_path) vulnerable when user_path = '/etc/passwd'—join() returns '/etc/passwd' discarding base path when second argument is absolute. Root directory reference: user_path = '/' makes os.path.join(base, user_path) return '/' regardless of base. UNC path injection on Windows: user_path = '\\\\network\\share\\file' on Windows causes join() to return network path accessing remote files. Drive letter injection on Windows: user_path = 'C:\\Windows\\System32\\config' returns absolute path ignoring base directory. Empty base directory: os.path.join('', user_path) returns user_path verbatim allowing complete path control. User-controlled base and filename: os.path.join(user_base, user_filename) gives complete path control enabling access to any filesystem location. Relative paths with multiple levels: user_path = 'subdir/../../sensitive' combines legitimate-looking subdirectory with traversal that validation might miss. Symlink relative paths: user_path pointing to symlink that itself contains relative paths or traversal that resolves differently after join().

Concatenating User Input Directly into File Paths Without Using join()

Django applications construct file paths using string concatenation, f-strings, or format operations that directly embed user input into path strings, bypassing any safety os.path.join() might provide and enabling trivial directory traversal. String concatenation patterns: file_path = base_dir + '/' + user_filename; open(file_path) allows user_filename = '../../../etc/passwd' to traverse. F-string path construction: file_path = f'{upload_dir}/{user_id}/{filename}' enables traversal through any variable containing path separators or traversal sequences. Format string paths: file_path = '{}/{}/{}'.format(base, subdir, user_file) vulnerable when any format parameter contains traversal. Path building in loops or list comprehensions: paths = [f'{base}/{item}' for item in user_items] creates traversal opportunities through each item. Multi-level concatenation: path = base + '/' + user_category + '/' + user_subcategory + '/' + user_file gives multiple injection points. URL-based path construction: file_path = settings.MEDIA_ROOT + request.path; open(file_path) directly uses URL path enabling request to /../../etc/passwd. Template path concatenation: template_path = template_dir + '/' + request.POST['template'] + '.html' allows template = '../../../settings' to read configuration. API version paths: api_path = f'/api/{version}/{endpoint}' where version from user enables version = '../internal/admin' accessing unauthorized endpoints.

Fixes

1

Validate and Sanitize All Path Components Before Joining with os.path.join()

Implement comprehensive validation of every path component received from user input before using os.path.join() or Path operations, employing strict allowlists for characters, length limits, and format requirements. Use regular expressions to enforce alphanumeric-only filenames: re.match(r'^[a-zA-Z0-9._-]+$', filename) rejecting any input containing path separators (/, \\), traversal sequences (..), null bytes (\x00), or special characters that might enable bypass attacks. Validate filename length (typically max 255 characters) and reject empty strings, hidden files starting with '.', or Windows reserved names (CON, PRN, AUX, NUL, COM1-9, LPT1-9). For directory components, use even stricter allowlists mapping user input to predefined safe values: ALLOWED_DIRS = {'reports': 'monthly_reports', 'uploads': 'user_uploads'}; safe_dir = ALLOWED_DIRS.get(user_dir) preventing arbitrary directory specification. Check file extensions against allowlists of permitted types: ALLOWED_EXTENSIONS = {'.pdf', '.txt', '.jpg'}; if not any(filename.endswith(ext) for ext in ALLOWED_EXTENSIONS): raise ValidationError. Inspect path components for encoded traversal: URL-decode and Unicode-normalize input before validation to catch %2e%2e%2f, double-encoding, or Unicode lookalikes. For numeric identifiers (user IDs, file IDs), parse as integers and validate ranges: user_id = int(request.GET['user_id']); if not 1 <= user_id <= 999999: raise ValidationError ensuring identifiers can't contain path syntax. Create validation functions that combine multiple checks: validate_safe_filename(name) checking format, length, extension, traversal patterns, then returning sanitized value or raising ValidationError with specific error messages. Apply validation at Django form/serializer level using custom validators: forms.CharField(validators=[validate_safe_filename]) catching malicious input early. Log validation failures for security monitoring: logger.warning('Path traversal attempt', extra={'user': request.user, 'input': filename, 'ip': request.META['REMOTE_ADDR']}) enabling threat detection. Never rely solely on blacklisting '..' or '../'—use strict allowlists defining what IS permitted rather than attempting to enumerate all dangerous patterns.

2

Use Path.resolve() and Verify the Resolved Path Stays Within Allowed Directories

Convert all file paths to absolute canonical form using pathlib.Path.resolve() or os.path.abspath() before accessing files, then verify the resolved path remains within allowed directory boundaries using Path.relative_to() to prevent traversal escaping base directories. After constructing paths with validated components, resolve them to eliminate symbolic links, relative references, and traversal sequences: from pathlib import Path; base_dir = Path('/var/www/uploads').resolve(); file_path = (base_dir / filename).resolve() where resolve() expands symlinks and normalizes '..' producing canonical absolute paths. Validate the resolved path is within the allowed directory: try: file_path.relative_to(base_dir); except ValueError: raise ValidationError('Path outside allowed directory') where relative_to() raises ValueError if file_path is not a subdirectory of base_dir. This catches traversal attempts that constructed paths like /var/www/uploads/../../../etc/passwd resolving to /etc/passwd failing the relative_to() check. For multiple allowed directories, check against each: ALLOWED_BASES = [Path('/var/www/uploads').resolve(), Path('/var/www/media').resolve()]; if not any(try: file_path.relative_to(base) for base in ALLOWED_BASES): raise ValidationError. Handle symlink attacks by resolving before validation—symlinks pointing outside allowed directories are detected when resolved path fails relative_to() check. Use strict=True in resolve(strict=True) (Python 3.6+) to ensure the path exists before resolving, preventing time-of-check-time-of-use races where attackers create malicious symlinks between validation and access. For Django settings-based paths, resolve settings values once at startup: UPLOAD_DIR_RESOLVED = Path(settings.MEDIA_ROOT).resolve() then use resolved constant in all path operations avoiding re-resolution. Create safe path builder functions: def get_safe_path(base, *components): base_resolved = Path(base).resolve(); path = (base_resolved / Path(*components)).resolve(); path.relative_to(base_resolved); return path encapsulating validation logic. Check both the parent directory and final file: ensure parent exists and is within boundaries before checking file existence preventing information disclosure through existence checks. On Windows, be aware of case-insensitivity and alternative data streams (file.txt:stream)—normalize case and reject colons in filenames. Compare resolved paths using Path comparison or os.path.commonpath() to ensure relationships: if not os.path.commonpath([base_dir, file_path]) == base_dir: raise ValidationError providing defense-in-depth.

3

Implement Allowlists for Permitted File Names and Directories

Define explicit allowlists of permitted filenames, file extensions, and directory paths that users can access, rejecting any request that doesn't match allowlist entries rather than attempting to filter out dangerous patterns. For file downloads or access, maintain dictionaries mapping safe identifiers to actual filenames: ALLOWED_FILES = {'report1': 'monthly_report_2024.pdf', 'doc2': 'user_guide.pdf'}; actual_filename = ALLOWED_FILES.get(file_id) where users request 'report1' and application maps to real filename preventing arbitrary file specification. For directory access, map user-facing categories to filesystem paths: ALLOWED_DIRS = {'user_uploads': Path(MEDIA_ROOT) / 'uploads' / 'users', 'public_docs': Path(MEDIA_ROOT) / 'documents' / 'public'}; safe_dir = ALLOWED_DIRS.get(category) rejecting unmapped categories. Implement file extension allowlists for uploads: ALLOWED_UPLOAD_EXTENSIONS = {'.jpg', '.png', '.pdf', '.docx'}; ext = Path(filename).suffix.lower(); if ext not in ALLOWED_UPLOAD_EXTENSIONS: raise ValidationError('File type not allowed') preventing executable uploads or directory traversal through extensions. For template access, enumerate specific allowed templates: ALLOWED_TEMPLATES = {'email': ['welcome.html', 'reset.html', 'confirm.html'], 'reports': ['monthly.html', 'annual.html']}; if template_name not in ALLOWED_TEMPLATES.get(category, []): raise ValidationError. Use database-driven allowlists for dynamic file access: AllowedFile.objects.filter(file_id=requested_id, user=request.user).exists() checking permissions and existence in single query. For user-specific directories, construct paths from validated user IDs: user_dir = UPLOAD_BASE / 'users' / str(validated_user_id) where validated_user_id is authenticated user's integer ID from database preventing traversal through user ID manipulation. Implement file type validation beyond extensions: import magic; mime = magic.from_file(file_path, mime=True); if mime not in ALLOWED_MIMES: raise ValidationError checking actual file content not just extension. Create Django management commands to audit allowed files: check_allowed_files verifying all allowlist entries reference existing files and removing stale entries. Document allowlist maintenance procedures: how to add new permitted files, extensions, or directories through secure admin interfaces or deployment processes. For public file access, store allowed files in dedicated public directory with web server configuration serving only that directory: NGINX location /public-files/ serving from /var/www/public-files/ with no execution permissions. Generate allowlists from database: files = File.objects.filter(is_public=True).values_list('filename', flat=True); ALLOWED = set(files) refreshing periodically.

4

Use Django's Secure File Handling Mechanisms (FileSystemStorage, default_storage)

Leverage Django's built-in file storage APIs—FileSystemStorage, default_storage, and model FileField—which provide safe path handling, validation, and access control abstractions that avoid direct path manipulation with os.path.join(). Use Django's default_storage for file operations: from django.core.files.storage import default_storage; if default_storage.exists(filename): with default_storage.open(filename, 'rb') as f: content = f.read() where default_storage abstracts file access and prevents path traversal through its internal path validation. Configure FileSystemStorage with explicit location and base_url: from django.core.files.storage import FileSystemStorage; secure_storage = FileSystemStorage(location='/var/www/secure-uploads', base_url='/secure-files/') then use secure_storage.save(filename, file_content) and secure_storage.open(filename) where FileSystemStorage validates paths and contains files within configured location. For model-based file management, use FileField and ImageField with upload_to: class Document(models.Model): file = models.FileField(upload_to='documents/%Y/%m/%d/') where upload_to automatically organizes files and FileField provides path validation. Access files through model instances: document.file.open() rather than constructing paths manually. Implement custom storage backends for specialized security requirements: class SecureStorage(FileSystemStorage): def get_valid_name(self, name): return super().get_valid_name(name).replace('..', '') def get_available_name(self, name, max_length=None): name = re.sub(r'[^a-zA-Z0-9._-]', '', name); return super().get_available_name(name, max_length) overriding validation methods. Use storage.path(filename) to get filesystem path only after validation: validated_path = default_storage.path(filename) where storage backend has validated filename safety. For serving files, use Django's FileResponse: from django.http import FileResponse; return FileResponse(default_storage.open(filename, 'rb'), as_attachment=True, filename=safe_filename) handling content-type detection and safe headers. Implement file access through views that check permissions: if not request.user.has_perm('documents.view_document', document): raise PermissionDenied; return FileResponse(document.file) centralizing authorization. Configure Django's MEDIA_ROOT and MEDIA_URL properly: MEDIA_ROOT = '/var/www/media'; MEDIA_URL = '/media/' then serve media files through Django views in development or dedicated web server in production with proper access controls. Use storage backends for cloud storage: DEFAULT_FILE_STORAGE = 'storages.backends.s3boto3.S3Boto3Storage' with django-storages providing consistent API that prevents local path traversal. Enable storage options for security: FileSystemStorage(directory_permissions_mode=0o700, file_permissions_mode=0o600) restricting file access at OS level. Create helper functions wrapping storage operations: def safe_file_read(file_path): if not default_storage.exists(file_path): raise FileNotFoundError; return default_storage.open(file_path).read() encapsulating safe access patterns.

5

Employ Indirect Object References Instead of Direct File Paths

Replace direct file path handling with indirect object reference patterns where users provide opaque identifiers (UUIDs, database IDs, tokens) that the application maps to actual file paths, eliminating user control over path construction entirely. Implement database-backed file management: class UploadedFile(models.Model): id = models.UUIDField(primary_key=True, default=uuid.uuid4); user = models.ForeignKey(User); file = models.FileField(upload_to='secure/%Y/%m/'); filename = models.CharField(max_length=255); uploaded_at = models.DateTimeField(auto_now_add=True) where users receive UUID (e.g., '550e8400-e29b-41d4-a716-446655440000') instead of real filename. Access files via database lookup: uploaded_file = UploadedFile.objects.get(id=uuid_from_request, user=request.user); return FileResponse(uploaded_file.file) where application controls path and enforces ownership through database query. For temporary file access, generate time-limited tokens: import secrets; file_token = secrets.token_urlsafe(32); cache.set(f'file_token_{file_token}', {'file_id': file.id, 'user_id': user.id}, timeout=3600); return file_token then validate tokens: token_data = cache.get(f'file_token_{token}'); if not token_data or token_data['user_id'] != request.user.id: raise PermissionDenied; file = UploadedFile.objects.get(id=token_data['file_id']) providing secure temporary access without exposing paths. Implement signed URLs for file downloads: from django.core.signing import Signer; signer = Signer(); signed_id = signer.sign(file.id); download_url = reverse('file_download', kwargs={'signed_id': signed_id}) then verify signatures: file_id = signer.unsign(signed_id, max_age=3600); file = UploadedFile.objects.get(id=file_id) preventing ID tampering. Use content-addressed storage with hashes: file_hash = hashlib.sha256(content).hexdigest(); storage_path = f'content/{file_hash[:2]}/{file_hash[2:4]}/{file_hash}'; FileRecord.objects.create(user_provided_name=filename, storage_hash=file_hash) where users reference files by database ID and application maps to hash-based storage path. For user file organization, maintain virtual directory structure in database: class FileNode(models.Model): name = models.CharField(); parent = models.ForeignKey('self', null=True); user = models.ForeignKey(User); actual_file = models.FileField() allowing user-friendly paths (My Documents/Reports/2024.pdf) mapped to secure storage (secure/abc123/def456.dat). Implement file access logging: class FileAccessLog(models.Model): file = models.ForeignKey(UploadedFile); user = models.ForeignKey(User); accessed_at = models.DateTimeField(auto_now_add=True); action = models.CharField() recording all access for audit trails. Create RESTful APIs using identifiers: GET /api/files/{uuid}/ returning file metadata and download URLs, POST /api/files/ uploading and returning new UUID. Use Django's get_object_or_404() for automatic 404 on invalid IDs: file = get_object_or_404(UploadedFile, id=uuid, user=request.user) combining lookup and authorization. Implement bulk operations with ID lists: file_ids = request.POST.getlist('file_ids'); files = UploadedFile.objects.filter(id__in=file_ids, user=request.user) safely handling multiple files.

6

Normalize Paths and Check for Traversal Patterns Before Using in File Operations

Apply comprehensive path normalization and traversal pattern detection to all user input before file operations, handling various encoding schemes, separator variations, and obfuscation techniques that might bypass simple validation. Normalize path separators to consistent format: normalized = user_path.replace('\\', '/') converting Windows backslashes to forward slashes before validation (or vice versa based on OS). Decode URL encoding to catch encoded traversal: from urllib.parse import unquote; decoded = unquote(user_path) then unquote(decoded) again checking for double-encoding where %252e%252e%252f decodes to %2e%2e%2f then ../. Normalize Unicode representations: import unicodedata; normalized = unicodedata.normalize('NFKC', user_path) converting Unicode lookalikes and combining characters to standard forms catching attacks using U+FF0E (fullwidth period) or U+2215 (division slash) instead of ASCII . and /. Check for common traversal patterns after normalization: TRAVERSAL_PATTERNS = [r'\.\./|\\.\\', r'%2e%2e[/%5c]', r'\.\.', r'%252e', r'%c0%ae', r'%e0%80%ae']; if any(re.search(pattern, normalized, re.I) for pattern in TRAVERSAL_PATTERNS): raise ValidationError('Traversal sequence detected'. Detect absolute path injection: if normalized.startswith('/') or (len(normalized) > 1 and normalized[1] == ':'): raise ValidationError('Absolute paths not allowed') catching Unix absolute paths and Windows drive letters. Check for null byte injection: if '\x00' in user_path or '%00' in user_path: raise ValidationError('Null bytes not allowed') preventing truncation attacks on C-based file operations. Validate path after normalization with os.path.normpath(): normalized_path = os.path.normpath(user_path); if normalized_path.startswith('..') or '/..' in normalized_path: raise ValidationError catching remaining traversal after normalization. For comprehensive defense, combine normalization steps: def normalize_and_validate(user_input): decoded = unquote(unquote(user_input)); unicode_normalized = unicodedata.normalize('NFKC', decoded); separator_normalized = unicode_normalized.replace('\\', '/'); path_normalized = os.path.normpath(separator_normalized); if any([path_normalized.startswith('..'), '/..' in path_normalized, path_normalized.startswith('/'), '\x00' in path_normalized]): raise ValidationError('Invalid path'); return path_normalized applying defense-in-depth. Check for alternative data streams on Windows: if ':' in filename: raise ValidationError('Alternative data streams not allowed') preventing access to hidden NTFS streams. Detect path equivalence attacks using os.path.samefile() after construction: if os.path.exists(constructed_path) and os.path.exists(sensitive_path): if os.path.samefile(constructed_path, sensitive_path): raise ValidationError checking if constructed path points to protected resource through symlinks. Implement recursive normalization applying operations until stable: prev = user_path; while True: normalized = apply_normalizations(prev); if normalized == prev: break; prev = normalized catching nested encoding. Log normalization discrepancies: if normalized != user_path: logger.warning('Path normalization changed input', extra={'original': user_path, 'normalized': normalized}) identifying potential attack attempts.

Detect This Vulnerability in Your Code

Sourcery automatically identifies django path traversal via os.path.join() and many other security issues in your codebase.