File size: 13,358 Bytes
db10255
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fd1f4fc
 
 
 
db10255
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
#!/usr/bin/env python3
"""
Document Authenticity Analysis Script

Purpose:
    Performs forensic analysis on document images to detect tampering and verify authenticity.
    Uses Error Level Analysis (ELA) and metadata examination techniques.

Why this script exists:
    - Need to verify document authenticity in verification workflows
    - Detect potential image tampering or editing
    - Provide forensic evidence for document verification
    - Support fraud prevention in document processing systems

Key Features:
    - Error Level Analysis (ELA) for tampering detection
    - EXIF metadata analysis for editing software detection
    - Brightness ratio analysis for tampering assessment
    - Support for single document analysis

Dependencies:
    - Pillow (PIL): Image processing (https://pillow.readthedocs.io/)
    - exifread: EXIF metadata extraction (https://pypi.org/project/ExifRead/)
    - numpy: Numerical operations (https://numpy.org/doc/)
    - requests: HTTP library (https://docs.python-requests.org/)

Usage:
    python analyze_document.py "https://example.com/document.jpg"

Output:
    JSON with tampering and metadata analysis results
"""

import sys, json, requests, exifread,numpy as np
from PIL import Image, ImageChops, ImageEnhance
from io import BytesIO
import uuid, os

# List of photo editing software to detect in metadata
# This helps identify if an image has been edited
software_list = ['photoshop', 'lightroom', 'gimp', 'paint', 'paint.net', 'paintshop pro', 'paintshop pro x', 'paintshop pro x2', 'paintshop pro x3', 'paintshop pro x4', 'paintshop pro x5', 'paintshop pro x6', 'paintshop pro x7', 'paintshop pro x8', 'paintshop pro x9', 'paintshop pro x10']

def download_image(url, output_path='temp_image.jpg'):
    """
    Download image from URL and process it for analysis.
    
    Args:
        url (str): Image URL
        output_path (str): Local save path
        
    Returns:
        str: Path to processed image or None if failed
        
    Why this approach:
    - Handles JSON-wrapped URLs (common in web applications)
    - Converts RGBA to RGB for JPEG compatibility
    - Provides detailed error handling and logging
    - Uses high quality to preserve analysis accuracy
    """
    print(f"DOCUMENT Starting download for URL: {url}", file=sys.stderr)
    
    # Handle URL that might be a JSON string
    if isinstance(url, str) and (url.startswith('{') or url.startswith('[')):
        try:
            url_data = json.loads(url)
            print(f"DOCUMENT Parsed JSON URL data: {url_data}", file=sys.stderr)
            if isinstance(url_data, dict) and 'url' in url_data:
                url = url_data['url']
                print(f"DOCUMENT Extracted URL from dict: {url}", file=sys.stderr)
            elif isinstance(url_data, list) and len(url_data) > 0:
                url = url_data[0] if isinstance(url_data[0], str) else url_data[0].get('url', '')
                print(f"DOCUMENT Extracted URL from list: {url}", file=sys.stderr)
        except json.JSONDecodeError as e:
            print(f"DOCUMENT Error parsing URL JSON: {url}, Error: {str(e)}", file=sys.stderr)
            return None
    
    if not url or url == '':
        print(f"DOCUMENT Empty URL after processing", file=sys.stderr)
        return None

    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
        response = requests.get(url, headers=headers, timeout=30)
        response.raise_for_status()
        image_data = response.content
        
        print(f"DOCUMENT Downloaded image from {url}", file=sys.stderr)
        print(f"DOCUMENT Image data size: {len(image_data)} bytes", file=sys.stderr)
        
        # Process the image
        image = Image.open(BytesIO(image_data))
        print(f"DOCUMENT Original image format: {image.format}", file=sys.stderr)
        print(f"DOCUMENT Original image mode: {image.mode}", file=sys.stderr)
        
        # Convert to RGB if necessary (JPEG doesn't support alpha channel)
        if image.mode == 'RGBA':
            background = Image.new('RGB', image.size, (255, 255, 255))
            background.paste(image, mask=image.split()[-1])
            image = background
            print(f"DOCUMENT Converted RGBA to RGB", file=sys.stderr)
        elif image.mode != 'RGB':
            image = image.convert('RGB')
            print(f"DOCUMENT Converted {image.mode} to RGB", file=sys.stderr)
        
        # Save as JPG without trying to preserve EXIF
        image.save(output_path, 'JPEG', quality=95)
        print(f"DOCUMENT Saved image to {output_path}", file=sys.stderr)
        
        return output_path
        
    except Exception as e:
        print(f"DOCUMENT Error processing image: {str(e)}", file=sys.stderr)
        return None

# Tampering Function 
def perform_error_level_analysis(image_path, output_path="ELA.png", quality=90):
    """
    Perform Error Level Analysis (ELA) on image.
    
    Args:
        image_path (str): Path to original image
        output_path (str): Path to save ELA result
        quality (int): JPEG quality for recompression
        
    Returns:
        str: Path to ELA result image
        
    Why ELA:
    - Detects areas of image that have been edited or tampered with
    - Works by comparing original image with recompressed version
    - Edited areas show different compression artifacts
    - Standard forensic technique for image authenticity
    """
    print(f"DOCUMENT TAMPERING Starting Error Level Analysis...", file=sys.stderr)
    
    try:
        original_image = Image.open(image_path)
        
        # Convert RGBA to RGB if necessary (JPEG doesn't support alpha channel)
        if original_image.mode == 'RGBA':
            # Create a white background
            background = Image.new('RGB', original_image.size, (255, 255, 255))
            # Paste the image onto the background
            background.paste(original_image, mask=original_image.split()[-1])  # Use alpha channel as mask
            original_image = background
        elif original_image.mode != 'RGB':
            original_image = original_image.convert('RGB')
        
        # Use a unique temp file name to avoid conflicts
        temp_file = f"temp_ela_{str(uuid.uuid4())[:8]}.jpg"
        original_image.save(temp_file, "JPEG", quality=quality)
        resaved_image = Image.open(temp_file)
        
        difference = ImageChops.difference(original_image, resaved_image)
        difference = ImageEnhance.Brightness(difference).enhance(10)
        
        # Save the difference image
        difference.save(output_path, format='PNG')
        
        # Clean up temp file
        try:
            os.remove(temp_file)
        except:
            pass
        
        print(f"DOCUMENT TAMPERING ELA analysis completed, saved to {output_path}", file=sys.stderr)
        return output_path
        
    except Exception as e:
        print(f"DOCUMENT TAMPERING Error during ELA: {str(e)}", file=sys.stderr)
        return None

def detect_tampering(ela_path, threshold=100):
    """
    Analyze ELA results to detect tampering.
    
    Args:
        ela_path (str): Path to ELA result image
        threshold (int): Brightness threshold for tampering detection
        
    Returns:
        dict: Tampering analysis results
        
    Why this approach:
    - Analyzes brightness distribution in ELA image
    - High brightness indicates areas of potential tampering
    - Uses statistical thresholds for tampering assessment
    - Provides confidence levels for tampering detection
    """
    print(f"DOCUMENT TAMPERING Analyzing ELA results...", file=sys.stderr)
    
    ela_image = Image.open(ela_path)
    ela_array = np.array(ela_image)

    bright_pixels = np.sum(ela_array > threshold)
    total_pixels = ela_array.size
    
    brightness_ratio = bright_pixels / total_pixels
    
    print(f"DOCUMENT TAMPERING Brightness ratio: {brightness_ratio:.4f}", file=sys.stderr)
    
    if brightness_ratio < 0.02:
        return {
            "tampered": "False",
            "brightness_ratio": round(float(brightness_ratio), 4)
        }
    elif brightness_ratio > 0.02 and brightness_ratio <= 0.05:
        return {
            "tampered": "Investigation Required",
            "brightness_ratio": round(float(brightness_ratio), 4)
        }
    elif brightness_ratio > 0.05:
        return {
            "tampered": "True",
            "brightness_ratio": round(float(brightness_ratio), 4)
        }
            
# Metadata Analysis Function (TO BE TESTED IN MOBILE)
def analyze_metadata(image_path):
    """
    Analyze EXIF metadata for editing software and authenticity indicators.
    
    Args:
        image_path (str): Path to image file
        
    Returns:
        dict: Metadata analysis results
        
    Why this approach:
    - EXIF metadata contains information about image creation and editing
    - Detects photo editing software used
    - Identifies camera information and timestamps
    - Provides forensic evidence for image authenticity
    """
    print(f"DOCUMENT METADATA Starting metadata analysis...", file=sys.stderr)
    
    try:
        with open(image_path, 'rb') as f:
            tags = exifread.process_file(f)
            metadata = {tag: str(tags.get(tag)) for tag in tags.keys()}
            
        # Debug: Print what we found
        print(f"DOCUMENT METADATA Found {len(metadata)} metadata tags", file=sys.stderr)
        if metadata:
            print(f"DOCUMENT METADATA Metadata keys: {list(metadata.keys())}", file=sys.stderr)
            
    except Exception as e:
        print(f"DOCUMENT METADATA Error reading metadata: {str(e)}", file=sys.stderr)
        return {
            "result": "error",
            "message": f"Error reading metadata: {str(e)}",
            "metadata": {}
        }
        
    if not metadata: 
        print(f"DOCUMENT METADATA No metadata found in {image_path}", file=sys.stderr)
        return {
            "result": "no metadata",
            "message": "No metadata found in image.",
            "metadata": metadata
        }
    
    # Check for timestamp metadata
    has_timestamp = any(key in metadata for key in ['EXIF DateTimeOriginal', 'Image DateTime', 'EXIF DateTime'])
    
    # Check for editing software
    software_used = metadata.get('Image Software', '').lower()
    
    # Check for camera information
    has_camera_info = any(key in metadata for key in ['Image Make', 'Image Model', 'EXIF Make', 'EXIF Model'])
    
    # Provide more detailed analysis
    analysis_summary = []
    if has_timestamp:
        analysis_summary.append("Timestamp found")
    if has_camera_info:
        analysis_summary.append("Camera information found")
    if software_used:
        analysis_summary.append(f"Software detected: {software_used}")
    
    if not has_timestamp and not has_camera_info and not software_used:
        return {
            "result": "minimal metadata",
            "message": "Image contains minimal metadata (no timestamp, camera info, or editing software detected)",
            "metadata": metadata,
            "analysis": "Image appears to be legitimate with no signs of editing"
        }
    
    if any(term in software_used for term in software_list):
        return {
            "result": "edited",
            "message": f"Image appears to have been edited with software: {software_used}",
            "metadata": metadata,
            "analysis": "Suspicious editing software detected"
        }
        
    return {
        "result": "success",
        "message": f"Successfully analyzed metadata: {', '.join(analysis_summary)}",
        "metadata": metadata,
        "analysis": "Image appears to be legitimate with no signs of editing"
    }

# Main execution
if __name__ == "__main__":
    # Validate command line arguments
    if len(sys.argv) < 2:
        print(json.dumps({"error": "No image URL provided"}))
        sys.exit(1)

    image_url = sys.argv[1]

    try:
        # Download and process the image
        image_path = download_image(image_url)
        if not image_path:
            raise Exception("Failed to download image")

        # Perform metadata analysis first (before any file gets deleted)
        metadata_results = analyze_metadata(image_path)

        # Perform ELA and tampering detection
        ela_path = perform_error_level_analysis(image_path)
        if not ela_path:
            raise Exception("Failed to perform error level analysis")
            
        tampering_results = detect_tampering(ela_path)
        
        # Clean up files after all analysis is done
        try:
            if ela_path:
                os.remove(ela_path)
            if image_path:
                os.remove(image_path)
        except:
            pass
        
        # Return combined results
        print(json.dumps({
            "success": True, 
            "tampering_results": tampering_results,
            "metadata_results": metadata_results
        }))

    except Exception as e:
        print(json.dumps({
            "success": False,
            "error": str(e),
            "context": {
                "url": image_url
            }
        }))
        sys.exit(1)