Spaces:
Sleeping
Sleeping
| from flask import Flask, request, jsonify | |
| import sys | |
| import os | |
| import subprocess | |
| import json | |
| # Suppress PaddleOCR verbose logging | |
| os.environ['PADDLEOCR_LOG_LEVEL'] = 'ERROR' | |
| os.environ['QT_QPA_PLATFORM'] = 'offscreen' | |
| os.environ['DISPLAY'] = ':99' | |
| def run_extraction_script(script_name, document_url): | |
| """Generic function to run OCR extraction scripts""" | |
| try: | |
| cmd = [sys.executable, script_name, document_url] | |
| result = subprocess.run( | |
| cmd, | |
| capture_output=True, | |
| text=True, | |
| timeout=300, | |
| cwd=os.getcwd() | |
| ) | |
| if result.returncode != 0: | |
| return { | |
| 'success': False, | |
| 'error': f'Script failed with return code {result.returncode}', | |
| 'stderr': result.stderr, | |
| 'stdout': result.stdout | |
| } | |
| # Parse JSON output | |
| try: | |
| output_str = result.stdout.strip() | |
| # Try direct parse first | |
| try: | |
| return json.loads(output_str) | |
| except json.JSONDecodeError: | |
| # Find the last JSON object in output | |
| lines = output_str.split('\n') | |
| json_lines = [line.strip() for line in lines if line.strip().startswith('{')] | |
| if json_lines: | |
| return json.loads(json_lines[-1]) | |
| # Try extracting JSON from the output | |
| start_idx = output_str.rfind('{') | |
| end_idx = output_str.rfind('}') | |
| if start_idx != -1 and end_idx != -1 and end_idx >= start_idx: | |
| return json.loads(output_str[start_idx:end_idx+1]) | |
| raise ValueError("No valid JSON found in output") | |
| except Exception as e: | |
| return { | |
| 'success': False, | |
| 'error': 'Invalid JSON output from script', | |
| 'raw_output': result.stdout[:500], # Limit output size | |
| 'json_error': str(e) | |
| } | |
| except subprocess.TimeoutExpired: | |
| return { | |
| 'success': False, | |
| 'error': 'Script execution timed out after 5 minutes' | |
| } | |
| except Exception as e: | |
| return { | |
| 'success': False, | |
| 'error': f'Unexpected error: {str(e)}' | |
| } | |
| # Create Flask app | |
| app = Flask(__name__) | |
| # Configure Flask for production | |
| app.config['JSON_SORT_KEYS'] = False | |
| app.config['JSONIFY_PRETTYPRINT_REGULAR'] = False | |
| # ============================================================================ | |
| # PHILIPPINE ID OCR EXTRACTION ENDPOINTS | |
| # ============================================================================ | |
| def api_extract_national_id(): | |
| """Extract Philippine National ID details""" | |
| try: | |
| data = request.json | |
| document_url = data.get('document_url') | |
| if not document_url: | |
| return jsonify({'error': 'Missing document_url'}), 400 | |
| result = run_extraction_script('extract_national_id.py', document_url) | |
| return jsonify(result) | |
| except Exception as e: | |
| return jsonify({'success': False, 'error': str(e)}), 500 | |
| def api_extract_drivers_license(): | |
| """Extract Philippine Driver's License details""" | |
| try: | |
| data = request.json | |
| document_url = data.get('document_url') | |
| if not document_url: | |
| return jsonify({'error': 'Missing document_url'}), 400 | |
| result = run_extraction_script('extract_drivers_license.py', document_url) | |
| return jsonify(result) | |
| except Exception as e: | |
| return jsonify({'success': False, 'error': str(e)}), 500 | |
| def api_extract_prc(): | |
| """Extract PRC ID details""" | |
| try: | |
| data = request.json | |
| document_url = data.get('document_url') | |
| if not document_url: | |
| return jsonify({'error': 'Missing document_url'}), 400 | |
| result = run_extraction_script('extract_prc.py', document_url) | |
| return jsonify(result) | |
| except Exception as e: | |
| return jsonify({'success': False, 'error': str(e)}), 500 | |
| def api_extract_umid(): | |
| """Extract UMID details""" | |
| try: | |
| data = request.json | |
| document_url = data.get('document_url') | |
| if not document_url: | |
| return jsonify({'error': 'Missing document_url'}), 400 | |
| result = run_extraction_script('extract_umid.py', document_url) | |
| return jsonify(result) | |
| except Exception as e: | |
| return jsonify({'success': False, 'error': str(e)}), 500 | |
| def api_extract_sss(): | |
| """Extract SSS ID details""" | |
| try: | |
| data = request.json | |
| document_url = data.get('document_url') | |
| if not document_url: | |
| return jsonify({'error': 'Missing document_url'}), 400 | |
| result = run_extraction_script('extract_sss.py', document_url) | |
| return jsonify(result) | |
| except Exception as e: | |
| return jsonify({'success': False, 'error': str(e)}), 500 | |
| def api_extract_passport(): | |
| """Extract Philippine Passport details""" | |
| try: | |
| data = request.json | |
| document_url = data.get('document_url') | |
| if not document_url: | |
| return jsonify({'error': 'Missing document_url'}), 400 | |
| result = run_extraction_script('extract_passport.py', document_url) | |
| return jsonify(result) | |
| except Exception as e: | |
| return jsonify({'success': False, 'error': str(e)}), 500 | |
| def api_extract_postal(): | |
| """Extract Postal ID details""" | |
| try: | |
| data = request.json | |
| document_url = data.get('document_url') | |
| if not document_url: | |
| return jsonify({'error': 'Missing document_url'}), 400 | |
| result = run_extraction_script('extract_postal.py', document_url) | |
| return jsonify(result) | |
| except Exception as e: | |
| return jsonify({'success': False, 'error': str(e)}), 500 | |
| def api_extract_phic(): | |
| """Extract PhilHealth ID details""" | |
| try: | |
| data = request.json | |
| document_url = data.get('document_url') | |
| if not document_url: | |
| return jsonify({'error': 'Missing document_url'}), 400 | |
| result = run_extraction_script('extract_phic.py', document_url) | |
| return jsonify(result) | |
| except Exception as e: | |
| return jsonify({'success': False, 'error': str(e)}), 500 | |
| # ============================================================================ | |
| # CLEARANCE & CERTIFICATE OCR EXTRACTION ENDPOINTS | |
| # ============================================================================ | |
| def api_extract_nbi(): | |
| """Extract NBI Clearance details""" | |
| try: | |
| data = request.json | |
| document_url = data.get('document_url') | |
| if not document_url: | |
| return jsonify({'error': 'Missing document_url'}), 400 | |
| result = run_extraction_script('extract_nbi_ocr.py', document_url) | |
| return jsonify(result) | |
| except Exception as e: | |
| return jsonify({'success': False, 'error': str(e)}), 500 | |
| def api_extract_police_clearance(): | |
| """Extract Police Clearance details""" | |
| try: | |
| data = request.json | |
| document_url = data.get('document_url') | |
| if not document_url: | |
| return jsonify({'error': 'Missing document_url'}), 400 | |
| result = run_extraction_script('extract_police_ocr.py', document_url) | |
| return jsonify(result) | |
| except Exception as e: | |
| return jsonify({'success': False, 'error': str(e)}), 500 | |
| def api_extract_tesda(): | |
| """Extract TESDA Certificate details""" | |
| try: | |
| data = request.json | |
| document_url = data.get('document_url') | |
| if not document_url: | |
| return jsonify({'error': 'Missing document_url'}), 400 | |
| result = run_extraction_script('extract_tesda_ocr.py', document_url) | |
| return jsonify(result) | |
| except Exception as e: | |
| return jsonify({'success': False, 'error': str(e)}), 500 | |
| # ============================================================================ | |
| # DOCUMENT ANALYSIS ENDPOINT | |
| # ============================================================================ | |
| def api_analyze_document(): | |
| """Analyze and identify document type""" | |
| try: | |
| data = request.json | |
| image_url = data.get('image_url') | |
| if not image_url: | |
| return jsonify({'error': 'Missing image_url'}), 400 | |
| result = run_extraction_script('analyze_document.py', image_url) | |
| return jsonify(result) | |
| except Exception as e: | |
| return jsonify({'success': False, 'error': str(e)}), 500 | |
| def api_analyze_documents(): | |
| """Analyze multiple documents for tampering detection and metadata""" | |
| try: | |
| data = request.json | |
| image_urls = data.get('image_urls', []) | |
| if not image_urls: | |
| return jsonify({'error': 'Missing image_urls array'}), 400 | |
| if len(image_urls) > 3: | |
| return jsonify({'error': 'Maximum 3 documents allowed'}), 400 | |
| # Run analyze_documents.py with multiple URLs | |
| cmd = [sys.executable, 'analyze_documents.py'] + image_urls | |
| result = subprocess.run( | |
| cmd, | |
| capture_output=True, | |
| text=True, | |
| timeout=300, | |
| cwd=os.getcwd() | |
| ) | |
| if result.returncode != 0: | |
| return jsonify({ | |
| 'success': False, | |
| 'error': f'Script failed with return code {result.returncode}', | |
| 'stderr': result.stderr | |
| }) | |
| # Parse JSON output | |
| try: | |
| output_str = result.stdout.strip() | |
| return jsonify(json.loads(output_str)) | |
| except Exception as e: | |
| return jsonify({ | |
| 'success': False, | |
| 'error': 'Invalid JSON output from script', | |
| 'raw_output': result.stdout[:500] | |
| }) | |
| except Exception as e: | |
| return jsonify({'success': False, 'error': str(e)}), 500 | |
| # ============================================================================ | |
| # UTILITY ENDPOINTS | |
| # ============================================================================ | |
| def health_check(): | |
| """Health check endpoint""" | |
| return jsonify({ | |
| 'status': 'healthy', | |
| 'service': 'handyhome-ocr-api', | |
| 'version': '1.0.0' | |
| }) | |
| def index(): | |
| """API documentation endpoint""" | |
| return jsonify({ | |
| 'service': 'HandyHome OCR Extraction API', | |
| 'version': '1.0.0', | |
| 'description': 'Philippine ID and Document OCR Extraction using PaddleOCR', | |
| 'endpoints': { | |
| 'Philippine IDs': { | |
| 'POST /api/extract-national-id': { | |
| 'description': 'Extract Philippine National ID details', | |
| 'fields': ['id_number', 'full_name', 'birth_date'] | |
| }, | |
| 'POST /api/extract-drivers-license': { | |
| 'description': 'Extract Driver\'s License details', | |
| 'fields': ['license_number', 'full_name', 'birth_date', 'address'] | |
| }, | |
| 'POST /api/extract-prc': { | |
| 'description': 'Extract PRC ID details', | |
| 'fields': ['prc_number', 'full_name', 'profession', 'valid_until'] | |
| }, | |
| 'POST /api/extract-umid': { | |
| 'description': 'Extract UMID details', | |
| 'fields': ['crn', 'full_name', 'birth_date'] | |
| }, | |
| 'POST /api/extract-sss': { | |
| 'description': 'Extract SSS ID details', | |
| 'fields': ['sss_number', 'full_name', 'birth_date'] | |
| }, | |
| 'POST /api/extract-passport': { | |
| 'description': 'Extract Philippine Passport details', | |
| 'fields': ['passport_number', 'surname', 'given_names', 'birth_date'] | |
| }, | |
| 'POST /api/extract-postal': { | |
| 'description': 'Extract Postal ID details', | |
| 'fields': ['prn', 'full_name', 'address', 'birth_date'] | |
| }, | |
| 'POST /api/extract-phic': { | |
| 'description': 'Extract PhilHealth ID details', | |
| 'fields': ['id_number', 'full_name', 'birth_date', 'sex', 'address'] | |
| } | |
| }, | |
| 'Clearances & Certificates': { | |
| 'POST /api/extract-nbi': { | |
| 'description': 'Extract NBI Clearance details', | |
| 'fields': ['id_number', 'full_name', 'birth_date'] | |
| }, | |
| 'POST /api/extract-police-clearance': { | |
| 'description': 'Extract Police Clearance details', | |
| 'fields': ['id_number', 'full_name', 'address', 'birth_date', 'status'] | |
| }, | |
| 'POST /api/extract-tesda': { | |
| 'description': 'Extract TESDA Certificate details', | |
| 'fields': ['registry_number', 'full_name', 'qualification', 'date_issued'] | |
| } | |
| }, | |
| 'Document Analysis': { | |
| 'POST /api/analyze-document': { | |
| 'description': 'Analyze and identify document type from image', | |
| 'fields': ['document_type', 'confidence'] | |
| }, | |
| 'POST /api/analyze-documents': { | |
| 'description': 'Analyze multiple documents for tampering detection and metadata (max 3)', | |
| 'fields': ['tampering_results', 'metadata_results'], | |
| 'body': {'image_urls': 'array of image URLs (max 3)'} | |
| } | |
| }, | |
| 'Utility': { | |
| 'GET /health': 'Health check endpoint', | |
| 'GET /': 'This API documentation' | |
| } | |
| }, | |
| 'request_format': { | |
| 'body': { | |
| 'document_url': 'string (required) - URL of the document image to process' | |
| }, | |
| 'example': { | |
| 'document_url': 'https://example.com/national_id.jpg' | |
| } | |
| }, | |
| 'response_format': { | |
| 'success': 'boolean - Whether extraction was successful', | |
| 'extracted_fields': 'object - Extracted data fields', | |
| 'error': 'string - Error message if failed' | |
| } | |
| }) | |
| def list_routes(): | |
| """List all available API routes""" | |
| routes = [] | |
| for rule in app.url_map.iter_rules(): | |
| if rule.endpoint != 'static': | |
| methods = sorted(rule.methods - {'HEAD', 'OPTIONS'}) | |
| routes.append({ | |
| 'endpoint': rule.endpoint, | |
| 'methods': methods, | |
| 'path': rule.rule | |
| }) | |
| routes.sort(key=lambda x: x['path']) | |
| return jsonify({ | |
| 'total_routes': len(routes), | |
| 'routes': routes | |
| }) | |
| # Launch Flask app | |
| if __name__ == '__main__': | |
| port = int(os.environ.get('PORT', 7860)) | |
| app.run(host='0.0.0.0', port=port, debug=False) | |