takomattyy's picture
Upload 21 files
3f04313 verified
raw
history blame
16.6 kB
from flask import Flask, request, jsonify
import sys
import os
import subprocess
import json
# Suppress PaddleOCR verbose logging
os.environ['PADDLEOCR_LOG_LEVEL'] = 'ERROR'
os.environ['QT_QPA_PLATFORM'] = 'offscreen'
os.environ['DISPLAY'] = ':99'
def run_extraction_script(script_name, document_url):
"""Generic function to run OCR extraction scripts"""
try:
cmd = [sys.executable, script_name, document_url]
result = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=300,
cwd=os.getcwd()
)
if result.returncode != 0:
return {
'success': False,
'error': f'Script failed with return code {result.returncode}',
'stderr': result.stderr,
'stdout': result.stdout
}
# Parse JSON output
try:
output_str = result.stdout.strip()
# Try direct parse first
try:
return json.loads(output_str)
except json.JSONDecodeError:
# Find the last JSON object in output
lines = output_str.split('\n')
json_lines = [line.strip() for line in lines if line.strip().startswith('{')]
if json_lines:
return json.loads(json_lines[-1])
# Try extracting JSON from the output
start_idx = output_str.rfind('{')
end_idx = output_str.rfind('}')
if start_idx != -1 and end_idx != -1 and end_idx >= start_idx:
return json.loads(output_str[start_idx:end_idx+1])
raise ValueError("No valid JSON found in output")
except Exception as e:
return {
'success': False,
'error': 'Invalid JSON output from script',
'raw_output': result.stdout[:500], # Limit output size
'json_error': str(e)
}
except subprocess.TimeoutExpired:
return {
'success': False,
'error': 'Script execution timed out after 5 minutes'
}
except Exception as e:
return {
'success': False,
'error': f'Unexpected error: {str(e)}'
}
# Create Flask app
app = Flask(__name__)
# Configure Flask for production
app.config['JSON_SORT_KEYS'] = False
app.config['JSONIFY_PRETTYPRINT_REGULAR'] = False
# ============================================================================
# PHILIPPINE ID OCR EXTRACTION ENDPOINTS
# ============================================================================
@app.route('/api/extract-national-id', methods=['POST'])
def api_extract_national_id():
"""Extract Philippine National ID details"""
try:
data = request.json
document_url = data.get('document_url')
if not document_url:
return jsonify({'error': 'Missing document_url'}), 400
result = run_extraction_script('extract_national_id.py', document_url)
return jsonify(result)
except Exception as e:
return jsonify({'success': False, 'error': str(e)}), 500
@app.route('/api/extract-drivers-license', methods=['POST'])
def api_extract_drivers_license():
"""Extract Philippine Driver's License details"""
try:
data = request.json
document_url = data.get('document_url')
if not document_url:
return jsonify({'error': 'Missing document_url'}), 400
result = run_extraction_script('extract_drivers_license.py', document_url)
return jsonify(result)
except Exception as e:
return jsonify({'success': False, 'error': str(e)}), 500
@app.route('/api/extract-prc', methods=['POST'])
def api_extract_prc():
"""Extract PRC ID details"""
try:
data = request.json
document_url = data.get('document_url')
if not document_url:
return jsonify({'error': 'Missing document_url'}), 400
result = run_extraction_script('extract_prc.py', document_url)
return jsonify(result)
except Exception as e:
return jsonify({'success': False, 'error': str(e)}), 500
@app.route('/api/extract-umid', methods=['POST'])
def api_extract_umid():
"""Extract UMID details"""
try:
data = request.json
document_url = data.get('document_url')
if not document_url:
return jsonify({'error': 'Missing document_url'}), 400
result = run_extraction_script('extract_umid.py', document_url)
return jsonify(result)
except Exception as e:
return jsonify({'success': False, 'error': str(e)}), 500
@app.route('/api/extract-sss', methods=['POST'])
def api_extract_sss():
"""Extract SSS ID details"""
try:
data = request.json
document_url = data.get('document_url')
if not document_url:
return jsonify({'error': 'Missing document_url'}), 400
result = run_extraction_script('extract_sss.py', document_url)
return jsonify(result)
except Exception as e:
return jsonify({'success': False, 'error': str(e)}), 500
@app.route('/api/extract-passport', methods=['POST'])
def api_extract_passport():
"""Extract Philippine Passport details"""
try:
data = request.json
document_url = data.get('document_url')
if not document_url:
return jsonify({'error': 'Missing document_url'}), 400
result = run_extraction_script('extract_passport.py', document_url)
return jsonify(result)
except Exception as e:
return jsonify({'success': False, 'error': str(e)}), 500
@app.route('/api/extract-postal', methods=['POST'])
def api_extract_postal():
"""Extract Postal ID details"""
try:
data = request.json
document_url = data.get('document_url')
if not document_url:
return jsonify({'error': 'Missing document_url'}), 400
result = run_extraction_script('extract_postal.py', document_url)
return jsonify(result)
except Exception as e:
return jsonify({'success': False, 'error': str(e)}), 500
@app.route('/api/extract-phic', methods=['POST'])
def api_extract_phic():
"""Extract PhilHealth ID details"""
try:
data = request.json
document_url = data.get('document_url')
if not document_url:
return jsonify({'error': 'Missing document_url'}), 400
result = run_extraction_script('extract_phic.py', document_url)
return jsonify(result)
except Exception as e:
return jsonify({'success': False, 'error': str(e)}), 500
# ============================================================================
# CLEARANCE & CERTIFICATE OCR EXTRACTION ENDPOINTS
# ============================================================================
@app.route('/api/extract-nbi', methods=['POST'])
def api_extract_nbi():
"""Extract NBI Clearance details"""
try:
data = request.json
document_url = data.get('document_url')
if not document_url:
return jsonify({'error': 'Missing document_url'}), 400
result = run_extraction_script('extract_nbi_ocr.py', document_url)
return jsonify(result)
except Exception as e:
return jsonify({'success': False, 'error': str(e)}), 500
@app.route('/api/extract-police-clearance', methods=['POST'])
def api_extract_police_clearance():
"""Extract Police Clearance details"""
try:
data = request.json
document_url = data.get('document_url')
if not document_url:
return jsonify({'error': 'Missing document_url'}), 400
result = run_extraction_script('extract_police_ocr.py', document_url)
return jsonify(result)
except Exception as e:
return jsonify({'success': False, 'error': str(e)}), 500
@app.route('/api/extract-tesda', methods=['POST'])
def api_extract_tesda():
"""Extract TESDA Certificate details"""
try:
data = request.json
document_url = data.get('document_url')
if not document_url:
return jsonify({'error': 'Missing document_url'}), 400
result = run_extraction_script('extract_tesda_ocr.py', document_url)
return jsonify(result)
except Exception as e:
return jsonify({'success': False, 'error': str(e)}), 500
# ============================================================================
# DOCUMENT ANALYSIS ENDPOINT
# ============================================================================
@app.route('/api/analyze-document', methods=['POST'])
def api_analyze_document():
"""Analyze and identify document type"""
try:
data = request.json
image_url = data.get('image_url')
if not image_url:
return jsonify({'error': 'Missing image_url'}), 400
result = run_extraction_script('analyze_document.py', image_url)
return jsonify(result)
except Exception as e:
return jsonify({'success': False, 'error': str(e)}), 500
@app.route('/api/analyze-documents', methods=['POST'])
def api_analyze_documents():
"""Analyze multiple documents for tampering detection and metadata"""
try:
data = request.json
image_urls = data.get('image_urls', [])
if not image_urls:
return jsonify({'error': 'Missing image_urls array'}), 400
if len(image_urls) > 3:
return jsonify({'error': 'Maximum 3 documents allowed'}), 400
# Run analyze_documents.py with multiple URLs
cmd = [sys.executable, 'analyze_documents.py'] + image_urls
result = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=300,
cwd=os.getcwd()
)
if result.returncode != 0:
return jsonify({
'success': False,
'error': f'Script failed with return code {result.returncode}',
'stderr': result.stderr
})
# Parse JSON output
try:
output_str = result.stdout.strip()
return jsonify(json.loads(output_str))
except Exception as e:
return jsonify({
'success': False,
'error': 'Invalid JSON output from script',
'raw_output': result.stdout[:500]
})
except Exception as e:
return jsonify({'success': False, 'error': str(e)}), 500
# ============================================================================
# UTILITY ENDPOINTS
# ============================================================================
@app.route('/health', methods=['GET'])
def health_check():
"""Health check endpoint"""
return jsonify({
'status': 'healthy',
'service': 'handyhome-ocr-api',
'version': '1.0.0'
})
@app.route('/', methods=['GET'])
def index():
"""API documentation endpoint"""
return jsonify({
'service': 'HandyHome OCR Extraction API',
'version': '1.0.0',
'description': 'Philippine ID and Document OCR Extraction using PaddleOCR',
'endpoints': {
'Philippine IDs': {
'POST /api/extract-national-id': {
'description': 'Extract Philippine National ID details',
'fields': ['id_number', 'full_name', 'birth_date']
},
'POST /api/extract-drivers-license': {
'description': 'Extract Driver\'s License details',
'fields': ['license_number', 'full_name', 'birth_date', 'address']
},
'POST /api/extract-prc': {
'description': 'Extract PRC ID details',
'fields': ['prc_number', 'full_name', 'profession', 'valid_until']
},
'POST /api/extract-umid': {
'description': 'Extract UMID details',
'fields': ['crn', 'full_name', 'birth_date']
},
'POST /api/extract-sss': {
'description': 'Extract SSS ID details',
'fields': ['sss_number', 'full_name', 'birth_date']
},
'POST /api/extract-passport': {
'description': 'Extract Philippine Passport details',
'fields': ['passport_number', 'surname', 'given_names', 'birth_date']
},
'POST /api/extract-postal': {
'description': 'Extract Postal ID details',
'fields': ['prn', 'full_name', 'address', 'birth_date']
},
'POST /api/extract-phic': {
'description': 'Extract PhilHealth ID details',
'fields': ['id_number', 'full_name', 'birth_date', 'sex', 'address']
}
},
'Clearances & Certificates': {
'POST /api/extract-nbi': {
'description': 'Extract NBI Clearance details',
'fields': ['id_number', 'full_name', 'birth_date']
},
'POST /api/extract-police-clearance': {
'description': 'Extract Police Clearance details',
'fields': ['id_number', 'full_name', 'address', 'birth_date', 'status']
},
'POST /api/extract-tesda': {
'description': 'Extract TESDA Certificate details',
'fields': ['registry_number', 'full_name', 'qualification', 'date_issued']
}
},
'Document Analysis': {
'POST /api/analyze-document': {
'description': 'Analyze and identify document type from image',
'fields': ['document_type', 'confidence']
},
'POST /api/analyze-documents': {
'description': 'Analyze multiple documents for tampering detection and metadata (max 3)',
'fields': ['tampering_results', 'metadata_results'],
'body': {'image_urls': 'array of image URLs (max 3)'}
}
},
'Utility': {
'GET /health': 'Health check endpoint',
'GET /': 'This API documentation'
}
},
'request_format': {
'body': {
'document_url': 'string (required) - URL of the document image to process'
},
'example': {
'document_url': 'https://example.com/national_id.jpg'
}
},
'response_format': {
'success': 'boolean - Whether extraction was successful',
'extracted_fields': 'object - Extracted data fields',
'error': 'string - Error message if failed'
}
})
@app.route('/api/routes', methods=['GET'])
def list_routes():
"""List all available API routes"""
routes = []
for rule in app.url_map.iter_rules():
if rule.endpoint != 'static':
methods = sorted(rule.methods - {'HEAD', 'OPTIONS'})
routes.append({
'endpoint': rule.endpoint,
'methods': methods,
'path': rule.rule
})
routes.sort(key=lambda x: x['path'])
return jsonify({
'total_routes': len(routes),
'routes': routes
})
# Launch Flask app
if __name__ == '__main__':
port = int(os.environ.get('PORT', 7860))
app.run(host='0.0.0.0', port=port, debug=False)