Python urllib Insecure FTP urlretrieve Vulnerability

High Risk Insecure Transport
PythonurllibFTPurlretrievePlaintext CredentialsFile DownloadInsecure Transport

What it is

Application uses urllib.request.urlretrieve() with FTP URLs to download files, transmitting data and credentials in plaintext over insecure FTP connections.

import urllib.request from flask import request @app.route('/download_backup') def download_backup(): # Vulnerable: FTP credentials in URL ftp_url = 'ftp://backup_user:secret123@ftp.company.com/backups/data.zip' local_file = '/tmp/backup.zip' # Extremely dangerous: Credentials and data transmitted in plaintext urllib.request.urlretrieve(ftp_url, local_file) return 'Backup downloaded' @app.route('/fetch_file') def fetch_file(): # Vulnerable: User-controlled FTP download server = request.args.get('server') path = request.args.get('path') filename = request.args.get('filename') # Dangerous: No validation of FTP protocol usage ftp_url = f'ftp://anonymous@{server}/{path}' local_path = f'/downloads/{filename}' urllib.request.urlretrieve(ftp_url, local_path) return f'Downloaded {filename}'
import urllib.request import urllib.parse import ssl import os from flask import request def validate_secure_download_protocol(url): """Ensure only secure protocols are used.""" parsed = urllib.parse.urlparse(url) secure_protocols = ['https', 'sftp', 'ftps'] if parsed.scheme not in secure_protocols: raise ValueError(f'Insecure protocol {parsed.scheme}. Use: {secure_protocols}') return parsed @app.route('/download_backup') def download_backup(): """Secure backup download using HTTPS API.""" try: # Secure: Use HTTPS API instead of FTP backup_api_url = 'https://secure-backup.company.com/api/backups/latest' validate_secure_download_protocol(backup_api_url) # Create secure request with authentication request_obj = urllib.request.Request( backup_api_url, headers={ 'Authorization': 'Bearer YOUR_SECURE_TOKEN', 'User-Agent': 'SecureBackupClient/1.0' } ) # Secure SSL context context = ssl.create_default_context() # Download with timeout and size limits with urllib.request.urlopen(request_obj, timeout=300, context=context) as response: if response.headers.get('Content-Type') != 'application/zip': raise ValueError('Invalid backup file type') # Secure download directory download_dir = '/var/secure_backups' os.makedirs(download_dir, exist_ok=True, mode=0o700) local_file = os.path.join(download_dir, 'backup.zip') # Download with size limit (100MB) max_size = 100 * 1024 * 1024 downloaded = 0 with open(local_file, 'wb') as f: while True: chunk = response.read(8192) if not chunk: break downloaded += len(chunk) if downloaded > max_size: os.remove(local_file) raise ValueError('Backup file too large') f.write(chunk) return { 'status': 'success', 'file': local_file, 'size': downloaded } except (ValueError, urllib.error.URLError) as e: return {'error': f'Backup download failed: {str(e)}'}, 500 @app.route('/fetch_file') def fetch_file(): """Secure file download with strict validation.""" return { 'error': 'FTP downloads disabled for security. Use secure HTTPS API endpoints.' }, 400 # Secure alternative using SFTP class SecureFileDownloader: """Secure file downloader using encrypted protocols.""" def __init__(self): self.allowed_hosts = [ 'secure-files.company.com', 'backup.trusted.com' ] def download_via_https(self, url, filename, max_size=50*1024*1024): """Download file securely via HTTPS.""" # Validate URL parsed = urllib.parse.urlparse(url) if parsed.scheme != 'https': raise ValueError('Only HTTPS downloads allowed') if parsed.netloc not in self.allowed_hosts: raise ValueError(f'Host {parsed.netloc} not in allowlist') # Validate filename if '..' in filename or '/' in filename: raise ValueError('Invalid filename') # Secure download context = ssl.create_default_context() try: with urllib.request.urlopen(url, timeout=60, context=context) as response: # Check content type content_type = response.headers.get('Content-Type', '') safe_types = [ 'application/octet-stream', 'application/zip', 'text/plain', 'application/pdf' ] if not any(ct in content_type for ct in safe_types): raise ValueError(f'Unsafe content type: {content_type}') # Secure download with size check content_length = response.headers.get('Content-Length') if content_length and int(content_length) > max_size: raise ValueError('File too large') # Create secure directory download_dir = '/var/secure_downloads' os.makedirs(download_dir, exist_ok=True, mode=0o700) local_path = os.path.join(download_dir, filename) downloaded = 0 with open(local_path, 'wb') as f: while True: chunk = response.read(8192) if not chunk: break downloaded += len(chunk) if downloaded > max_size: os.remove(local_path) raise ValueError('Download size exceeded') f.write(chunk) return local_path, downloaded except Exception as e: raise RuntimeError(f'Download failed: {str(e)}') def download_via_sftp(self, host, remote_path, local_filename, username, key_file): """Download file securely via SFTP.""" if host not in self.allowed_hosts: raise ValueError(f'Host {host} not allowed') try: import paramiko # Create SSH client with strict security ssh = paramiko.SSHClient() ssh.set_missing_host_key_policy(paramiko.RejectPolicy()) # Connect with key authentication ssh.connect( host, username=username, key_filename=key_file, timeout=30, compress=True ) sftp = ssh.open_sftp() # Check file size file_stat = sftp.stat(remote_path) if file_stat.st_size > 50*1024*1024: # 50MB limit raise ValueError('File too large for SFTP download') # Secure local path download_dir = '/var/secure_downloads' os.makedirs(download_dir, exist_ok=True, mode=0o700) local_path = os.path.join(download_dir, local_filename) # Download file sftp.get(remote_path, local_path) sftp.close() ssh.close() return local_path, file_stat.st_size except ImportError: raise RuntimeError('paramiko library required for SFTP') except Exception as e: raise RuntimeError(f'SFTP download failed: {str(e)}') @app.route('/secure_download') def secure_download(): """Secure file download endpoint.""" download_url = request.args.get('url', '') filename = request.args.get('filename', '') if not download_url or not filename: return {'error': 'URL and filename required'}, 400 try: downloader = SecureFileDownloader() local_path, size = downloader.download_via_https(download_url, filename) return { 'status': 'downloaded', 'file': os.path.basename(local_path), 'size': size } except (ValueError, RuntimeError) as e: return {'error': str(e)}, 500

💡 Why This Fix Works

See fix suggestions for detailed explanation.

Why it happens

Code downloads files via FTP: urllib.request.urlretrieve('ftp://ftp.example.com/file.zip', 'local.zip'). FTP transmits authentication and data unencrypted. Credentials and file contents exposed to network interception. Legacy file download code using insecure FTP protocol.

Root causes

Using urllib.request.urlretrieve() with FTP URLs

Code downloads files via FTP: urllib.request.urlretrieve('ftp://ftp.example.com/file.zip', 'local.zip'). FTP transmits authentication and data unencrypted. Credentials and file contents exposed to network interception. Legacy file download code using insecure FTP protocol.

FTP Credentials in URLs or Code

Embedded FTP credentials: urlretrieve('ftp://user:password@ftp.example.com/file'). Username and password in URLs. Credentials logged, stored in code repositories. Clear text transmission over network. URL-embedded credentials compound FTP security issues with credential exposure.

Using urlretrieve() Without URL Scheme Validation

Downloading without protocol checks: url = config['file_url']; urlretrieve(url, filename). Configuration may contain ftp:// URLs. No validation ensuring secure protocols. Legacy configurations or external sources specify FTP. Missing scheme validation allows insecure file downloads.

Automated File Downloads from FTP in Scheduled Tasks

Cron jobs or scheduled downloads using FTP: urlretrieve(ftp_url, backup_file). Legacy integrations with FTP servers. Regular data transfers over unencrypted FTP. Backup files, data feeds, or system updates transmitted insecurely. Scheduled automation compounds risk through regular exposure.

Using FTP for Software or Update Downloads

Downloading software via FTP: urlretrieve('ftp://updates.example.com/app.tar.gz', 'update.tar.gz'). Application updates over unencrypted FTP. No integrity verification. Man-in-the-middle can inject malicious code. FTP for software distribution enables supply chain attacks.

Fixes

1

Replace FTP with HTTPS for File Downloads

Use HTTPS instead: import requests; r = requests.get('https://example.com/file.zip'); open('local.zip', 'wb').write(r.content). Or with urllib: urlretrieve('https://example.com/file'). HTTPS provides encryption and authentication. Modern servers should offer HTTPS endpoints for file downloads.

2

Use SFTP or FTPS for Encrypted FTP Transfers

For FTP servers, use secure variants: import paramiko; sftp = paramiko.SFTPClient.from_transport(transport); sftp.get('remote_file', 'local_file'). Or FTP_TLS: from ftplib import FTP_TLS; ftps = FTP_TLS('host'). Both provide encryption. SFTP uses SSH, FTPS uses TLS.

3

Validate URL Schemes, Reject Insecure Protocols

Check scheme before urlretrieve: from urllib.parse import urlparse; if urlparse(url).scheme not in ['https', 'ftps']: raise ValueError('Secure protocol required'); urlretrieve(url, filename). Allowlist secure protocols. Reject ftp, http schemes. Validation prevents insecure downloads.

4

Implement Cryptographic Verification of Downloaded Files

Verify file integrity: import hashlib; sha256 = hashlib.sha256(open('file', 'rb').read()).hexdigest(); if sha256 != expected_hash: raise ValueError('Hash mismatch'). Download and verify checksums. Use GPG signatures for software. Cryptographic verification detects tampering even with encrypted transport.

5

Use Object Storage APIs Instead of FTP

Migrate to cloud storage: import boto3; s3 = boto3.client('s3'); s3.download_file('bucket', 'key', 'local_file'). S3, GCS, Azure Blob provide HTTPS APIs. Built-in encryption, access control, versioning. Modern alternative to FTP with better security features.

6

Scan Codebase for urlretrieve with FTP and Replace All Usage

Find insecure usage: grep -r 'urlretrieve.*ftp://' --include="*.py". Use bandit for detection. Replace all FTP downloads with HTTPS or SFTP. Update configuration files. Remove FTP server dependencies. Complete migration prevents future insecure downloads.

Detect This Vulnerability in Your Code

Sourcery automatically identifies python urllib insecure ftp urlretrieve vulnerability and many other security issues in your codebase.