Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Document Authenticity Analysis Script | |
| Purpose: | |
| Performs forensic analysis on document images to detect tampering and verify authenticity. | |
| Uses Error Level Analysis (ELA) and metadata examination techniques. | |
| Why this script exists: | |
| - Need to verify document authenticity in verification workflows | |
| - Detect potential image tampering or editing | |
| - Provide forensic evidence for document verification | |
| - Support fraud prevention in document processing systems | |
| Key Features: | |
| - Error Level Analysis (ELA) for tampering detection | |
| - EXIF metadata analysis for editing software detection | |
| - Brightness ratio analysis for tampering assessment | |
| - Support for single document analysis | |
| Dependencies: | |
| - Pillow (PIL): Image processing (https://pillow.readthedocs.io/) | |
| - exifread: EXIF metadata extraction (https://pypi.org/project/ExifRead/) | |
| - numpy: Numerical operations (https://numpy.org/doc/) | |
| - requests: HTTP library (https://docs.python-requests.org/) | |
| Usage: | |
| python analyze_document.py "https://example.com/document.jpg" | |
| Output: | |
| JSON with tampering and metadata analysis results | |
| """ | |
| import sys, json, requests, exifread,numpy as np | |
| from PIL import Image, ImageChops, ImageEnhance | |
| from io import BytesIO | |
| import uuid, os | |
| # List of photo editing software to detect in metadata | |
| # This helps identify if an image has been edited | |
| software_list = ['photoshop', 'lightroom', 'gimp', 'paint', 'paint.net', 'paintshop pro', 'paintshop pro x', 'paintshop pro x2', 'paintshop pro x3', 'paintshop pro x4', 'paintshop pro x5', 'paintshop pro x6', 'paintshop pro x7', 'paintshop pro x8', 'paintshop pro x9', 'paintshop pro x10'] | |
| def download_image(url, output_path='temp_image.jpg'): | |
| """ | |
| Download image from URL and process it for analysis. | |
| Args: | |
| url (str): Image URL | |
| output_path (str): Local save path | |
| Returns: | |
| str: Path to processed image or None if failed | |
| Why this approach: | |
| - Handles JSON-wrapped URLs (common in web applications) | |
| - Converts RGBA to RGB for JPEG compatibility | |
| - Provides detailed error handling and logging | |
| - Uses high quality to preserve analysis accuracy | |
| """ | |
| print(f"DOCUMENT Starting download for URL: {url}", file=sys.stderr) | |
| # Handle URL that might be a JSON string | |
| if isinstance(url, str) and (url.startswith('{') or url.startswith('[')): | |
| try: | |
| url_data = json.loads(url) | |
| print(f"DOCUMENT Parsed JSON URL data: {url_data}", file=sys.stderr) | |
| if isinstance(url_data, dict) and 'url' in url_data: | |
| url = url_data['url'] | |
| print(f"DOCUMENT Extracted URL from dict: {url}", file=sys.stderr) | |
| elif isinstance(url_data, list) and len(url_data) > 0: | |
| url = url_data[0] if isinstance(url_data[0], str) else url_data[0].get('url', '') | |
| print(f"DOCUMENT Extracted URL from list: {url}", file=sys.stderr) | |
| except json.JSONDecodeError as e: | |
| print(f"DOCUMENT Error parsing URL JSON: {url}, Error: {str(e)}", file=sys.stderr) | |
| return None | |
| if not url or url == '': | |
| print(f"DOCUMENT Empty URL after processing", file=sys.stderr) | |
| return None | |
| try: | |
| headers = { | |
| 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' | |
| } | |
| response = requests.get(url, headers=headers, timeout=30) | |
| response.raise_for_status() | |
| image_data = response.content | |
| print(f"DOCUMENT Downloaded image from {url}", file=sys.stderr) | |
| print(f"DOCUMENT Image data size: {len(image_data)} bytes", file=sys.stderr) | |
| # Process the image | |
| image = Image.open(BytesIO(image_data)) | |
| print(f"DOCUMENT Original image format: {image.format}", file=sys.stderr) | |
| print(f"DOCUMENT Original image mode: {image.mode}", file=sys.stderr) | |
| # Convert to RGB if necessary (JPEG doesn't support alpha channel) | |
| if image.mode == 'RGBA': | |
| background = Image.new('RGB', image.size, (255, 255, 255)) | |
| background.paste(image, mask=image.split()[-1]) | |
| image = background | |
| print(f"DOCUMENT Converted RGBA to RGB", file=sys.stderr) | |
| elif image.mode != 'RGB': | |
| image = image.convert('RGB') | |
| print(f"DOCUMENT Converted {image.mode} to RGB", file=sys.stderr) | |
| # Save as JPG without trying to preserve EXIF | |
| image.save(output_path, 'JPEG', quality=95) | |
| print(f"DOCUMENT Saved image to {output_path}", file=sys.stderr) | |
| return output_path | |
| except Exception as e: | |
| print(f"DOCUMENT Error processing image: {str(e)}", file=sys.stderr) | |
| return None | |
| # Tampering Function | |
| def perform_error_level_analysis(image_path, output_path="ELA.png", quality=90): | |
| """ | |
| Perform Error Level Analysis (ELA) on image. | |
| Args: | |
| image_path (str): Path to original image | |
| output_path (str): Path to save ELA result | |
| quality (int): JPEG quality for recompression | |
| Returns: | |
| str: Path to ELA result image | |
| Why ELA: | |
| - Detects areas of image that have been edited or tampered with | |
| - Works by comparing original image with recompressed version | |
| - Edited areas show different compression artifacts | |
| - Standard forensic technique for image authenticity | |
| """ | |
| print(f"DOCUMENT TAMPERING Starting Error Level Analysis...", file=sys.stderr) | |
| try: | |
| original_image = Image.open(image_path) | |
| # Convert RGBA to RGB if necessary (JPEG doesn't support alpha channel) | |
| if original_image.mode == 'RGBA': | |
| # Create a white background | |
| background = Image.new('RGB', original_image.size, (255, 255, 255)) | |
| # Paste the image onto the background | |
| background.paste(original_image, mask=original_image.split()[-1]) # Use alpha channel as mask | |
| original_image = background | |
| elif original_image.mode != 'RGB': | |
| original_image = original_image.convert('RGB') | |
| # Use a unique temp file name to avoid conflicts | |
| temp_file = f"temp_ela_{str(uuid.uuid4())[:8]}.jpg" | |
| original_image.save(temp_file, "JPEG", quality=quality) | |
| resaved_image = Image.open(temp_file) | |
| difference = ImageChops.difference(original_image, resaved_image) | |
| difference = ImageEnhance.Brightness(difference).enhance(10) | |
| # Save the difference image | |
| difference.save(output_path, format='PNG') | |
| # Clean up temp file | |
| try: | |
| os.remove(temp_file) | |
| except: | |
| pass | |
| print(f"DOCUMENT TAMPERING ELA analysis completed, saved to {output_path}", file=sys.stderr) | |
| return output_path | |
| except Exception as e: | |
| print(f"DOCUMENT TAMPERING Error during ELA: {str(e)}", file=sys.stderr) | |
| return None | |
| def detect_tampering(ela_path, threshold=100): | |
| """ | |
| Analyze ELA results to detect tampering. | |
| Args: | |
| ela_path (str): Path to ELA result image | |
| threshold (int): Brightness threshold for tampering detection | |
| Returns: | |
| dict: Tampering analysis results | |
| Why this approach: | |
| - Analyzes brightness distribution in ELA image | |
| - High brightness indicates areas of potential tampering | |
| - Uses statistical thresholds for tampering assessment | |
| - Provides confidence levels for tampering detection | |
| """ | |
| print(f"DOCUMENT TAMPERING Analyzing ELA results...", file=sys.stderr) | |
| ela_image = Image.open(ela_path) | |
| ela_array = np.array(ela_image) | |
| bright_pixels = np.sum(ela_array > threshold) | |
| total_pixels = ela_array.size | |
| brightness_ratio = bright_pixels / total_pixels | |
| print(f"DOCUMENT TAMPERING Brightness ratio: {brightness_ratio:.4f}", file=sys.stderr) | |
| if brightness_ratio < 0.02: | |
| return { | |
| "tampered": "False", | |
| "brightness_ratio": round(float(brightness_ratio), 4) | |
| } | |
| elif brightness_ratio > 0.02 and brightness_ratio <= 0.05: | |
| return { | |
| "tampered": "Investigation Required", | |
| "brightness_ratio": round(float(brightness_ratio), 4) | |
| } | |
| elif brightness_ratio > 0.05: | |
| return { | |
| "tampered": "True", | |
| "brightness_ratio": round(float(brightness_ratio), 4) | |
| } | |
| # Metadata Analysis Function (TO BE TESTED IN MOBILE) | |
| def analyze_metadata(image_path): | |
| """ | |
| Analyze EXIF metadata for editing software and authenticity indicators. | |
| Args: | |
| image_path (str): Path to image file | |
| Returns: | |
| dict: Metadata analysis results | |
| Why this approach: | |
| - EXIF metadata contains information about image creation and editing | |
| - Detects photo editing software used | |
| - Identifies camera information and timestamps | |
| - Provides forensic evidence for image authenticity | |
| """ | |
| print(f"DOCUMENT METADATA Starting metadata analysis...", file=sys.stderr) | |
| try: | |
| with open(image_path, 'rb') as f: | |
| tags = exifread.process_file(f) | |
| metadata = {tag: str(tags.get(tag)) for tag in tags.keys()} | |
| # Debug: Print what we found | |
| print(f"DOCUMENT METADATA Found {len(metadata)} metadata tags", file=sys.stderr) | |
| if metadata: | |
| print(f"DOCUMENT METADATA Metadata keys: {list(metadata.keys())}", file=sys.stderr) | |
| except Exception as e: | |
| print(f"DOCUMENT METADATA Error reading metadata: {str(e)}", file=sys.stderr) | |
| return { | |
| "result": "error", | |
| "message": f"Error reading metadata: {str(e)}", | |
| "metadata": {} | |
| } | |
| if not metadata: | |
| print(f"DOCUMENT METADATA No metadata found in {image_path}", file=sys.stderr) | |
| return { | |
| "result": "no metadata", | |
| "message": "No metadata found in image.", | |
| "metadata": metadata | |
| } | |
| # Check for timestamp metadata | |
| has_timestamp = any(key in metadata for key in ['EXIF DateTimeOriginal', 'Image DateTime', 'EXIF DateTime']) | |
| # Check for editing software | |
| software_used = metadata.get('Image Software', '').lower() | |
| # Check for camera information | |
| has_camera_info = any(key in metadata for key in ['Image Make', 'Image Model', 'EXIF Make', 'EXIF Model']) | |
| # Provide more detailed analysis | |
| analysis_summary = [] | |
| if has_timestamp: | |
| analysis_summary.append("Timestamp found") | |
| if has_camera_info: | |
| analysis_summary.append("Camera information found") | |
| if software_used: | |
| analysis_summary.append(f"Software detected: {software_used}") | |
| if not has_timestamp and not has_camera_info and not software_used: | |
| return { | |
| "result": "minimal metadata", | |
| "message": "Image contains minimal metadata (no timestamp, camera info, or editing software detected)", | |
| "metadata": metadata, | |
| "analysis": "Image appears to be legitimate with no signs of editing" | |
| } | |
| if any(term in software_used for term in software_list): | |
| return { | |
| "result": "edited", | |
| "message": f"Image appears to have been edited with software: {software_used}", | |
| "metadata": metadata, | |
| "analysis": "Suspicious editing software detected" | |
| } | |
| return { | |
| "result": "success", | |
| "message": f"Successfully analyzed metadata: {', '.join(analysis_summary)}", | |
| "metadata": metadata, | |
| "analysis": "Image appears to be legitimate with no signs of editing" | |
| } | |
| # Main execution | |
| if __name__ == "__main__": | |
| # Validate command line arguments | |
| if len(sys.argv) < 2: | |
| print(json.dumps({"error": "No image URL provided"})) | |
| sys.exit(1) | |
| image_url = sys.argv[1] | |
| try: | |
| # Download and process the image | |
| image_path = download_image(image_url) | |
| if not image_path: | |
| raise Exception("Failed to download image") | |
| # Perform metadata analysis first (before any file gets deleted) | |
| metadata_results = analyze_metadata(image_path) | |
| # Perform ELA and tampering detection | |
| ela_path = perform_error_level_analysis(image_path) | |
| if not ela_path: | |
| raise Exception("Failed to perform error level analysis") | |
| tampering_results = detect_tampering(ela_path) | |
| # Clean up files after all analysis is done | |
| try: | |
| if ela_path: | |
| os.remove(ela_path) | |
| if image_path: | |
| os.remove(image_path) | |
| except: | |
| pass | |
| # Return combined results | |
| print(json.dumps({ | |
| "success": True, | |
| "tampering_results": tampering_results, | |
| "metadata_results": metadata_results | |
| })) | |
| except Exception as e: | |
| print(json.dumps({ | |
| "success": False, | |
| "error": str(e), | |
| "context": { | |
| "url": image_url | |
| } | |
| })) | |
| sys.exit(1) |