Spaces:

takomattyy
/

handyhome-ocr-api

Sleeping

App Files Files Community

handyhome-ocr-api / app.py

takomattyy

Upload 21 files

3f04313 verified 26 days ago

raw

history blame

16.6 kB

	from flask import Flask, request, jsonify
	import sys
	import os
	import subprocess
	import json

	# Suppress PaddleOCR verbose logging
	os.environ['PADDLEOCR_LOG_LEVEL'] = 'ERROR'
	os.environ['QT_QPA_PLATFORM'] = 'offscreen'
	os.environ['DISPLAY'] = ':99'

	def run_extraction_script(script_name, document_url):
	"""Generic function to run OCR extraction scripts"""
	try:
	cmd = [sys.executable, script_name, document_url]
	result = subprocess.run(
	cmd,
	capture_output=True,
	text=True,
	timeout=300,
	cwd=os.getcwd()
	)

	if result.returncode != 0:
	return {
	'success': False,
	'error': f'Script failed with return code {result.returncode}',
	'stderr': result.stderr,
	'stdout': result.stdout
	}

	# Parse JSON output
	try:
	output_str = result.stdout.strip()

	# Try direct parse first
	try:
	return json.loads(output_str)
	except json.JSONDecodeError:
	# Find the last JSON object in output
	lines = output_str.split('\n')
	json_lines = [line.strip() for line in lines if line.strip().startswith('{')]

	if json_lines:
	return json.loads(json_lines[-1])

	# Try extracting JSON from the output
	start_idx = output_str.rfind('{')
	end_idx = output_str.rfind('}')
	if start_idx != -1 and end_idx != -1 and end_idx >= start_idx:
	return json.loads(output_str[start_idx:end_idx+1])

	raise ValueError("No valid JSON found in output")

	except Exception as e:
	return {
	'success': False,
	'error': 'Invalid JSON output from script',
	'raw_output': result.stdout[:500], # Limit output size
	'json_error': str(e)
	}

	except subprocess.TimeoutExpired:
	return {
	'success': False,
	'error': 'Script execution timed out after 5 minutes'
	}
	except Exception as e:
	return {
	'success': False,
	'error': f'Unexpected error: {str(e)}'
	}

	# Create Flask app
	app = Flask(__name__)

	# Configure Flask for production
	app.config['JSON_SORT_KEYS'] = False
	app.config['JSONIFY_PRETTYPRINT_REGULAR'] = False

	# ============================================================================
	# PHILIPPINE ID OCR EXTRACTION ENDPOINTS
	# ============================================================================

	@app.route('/api/extract-national-id', methods=['POST'])
	def api_extract_national_id():
	"""Extract Philippine National ID details"""
	try:
	data = request.json
	document_url = data.get('document_url')

	if not document_url:
	return jsonify({'error': 'Missing document_url'}), 400

	result = run_extraction_script('extract_national_id.py', document_url)
	return jsonify(result)

	except Exception as e:
	return jsonify({'success': False, 'error': str(e)}), 500

	@app.route('/api/extract-drivers-license', methods=['POST'])
	def api_extract_drivers_license():
	"""Extract Philippine Driver's License details"""
	try:
	data = request.json
	document_url = data.get('document_url')

	if not document_url:
	return jsonify({'error': 'Missing document_url'}), 400

	result = run_extraction_script('extract_drivers_license.py', document_url)
	return jsonify(result)

	except Exception as e:
	return jsonify({'success': False, 'error': str(e)}), 500

	@app.route('/api/extract-prc', methods=['POST'])
	def api_extract_prc():
	"""Extract PRC ID details"""
	try:
	data = request.json
	document_url = data.get('document_url')

	if not document_url:
	return jsonify({'error': 'Missing document_url'}), 400

	result = run_extraction_script('extract_prc.py', document_url)
	return jsonify(result)

	except Exception as e:
	return jsonify({'success': False, 'error': str(e)}), 500

	@app.route('/api/extract-umid', methods=['POST'])
	def api_extract_umid():
	"""Extract UMID details"""
	try:
	data = request.json
	document_url = data.get('document_url')

	if not document_url:
	return jsonify({'error': 'Missing document_url'}), 400

	result = run_extraction_script('extract_umid.py', document_url)
	return jsonify(result)

	except Exception as e:
	return jsonify({'success': False, 'error': str(e)}), 500

	@app.route('/api/extract-sss', methods=['POST'])
	def api_extract_sss():
	"""Extract SSS ID details"""
	try:
	data = request.json
	document_url = data.get('document_url')

	if not document_url:
	return jsonify({'error': 'Missing document_url'}), 400

	result = run_extraction_script('extract_sss.py', document_url)
	return jsonify(result)

	except Exception as e:
	return jsonify({'success': False, 'error': str(e)}), 500

	@app.route('/api/extract-passport', methods=['POST'])
	def api_extract_passport():
	"""Extract Philippine Passport details"""
	try:
	data = request.json
	document_url = data.get('document_url')

	if not document_url:
	return jsonify({'error': 'Missing document_url'}), 400

	result = run_extraction_script('extract_passport.py', document_url)
	return jsonify(result)

	except Exception as e:
	return jsonify({'success': False, 'error': str(e)}), 500

	@app.route('/api/extract-postal', methods=['POST'])
	def api_extract_postal():
	"""Extract Postal ID details"""
	try:
	data = request.json
	document_url = data.get('document_url')

	if not document_url:
	return jsonify({'error': 'Missing document_url'}), 400

	result = run_extraction_script('extract_postal.py', document_url)
	return jsonify(result)

	except Exception as e:
	return jsonify({'success': False, 'error': str(e)}), 500

	@app.route('/api/extract-phic', methods=['POST'])
	def api_extract_phic():
	"""Extract PhilHealth ID details"""
	try:
	data = request.json
	document_url = data.get('document_url')

	if not document_url:
	return jsonify({'error': 'Missing document_url'}), 400

	result = run_extraction_script('extract_phic.py', document_url)
	return jsonify(result)

	except Exception as e:
	return jsonify({'success': False, 'error': str(e)}), 500

	# ============================================================================
	# CLEARANCE & CERTIFICATE OCR EXTRACTION ENDPOINTS
	# ============================================================================

	@app.route('/api/extract-nbi', methods=['POST'])
	def api_extract_nbi():
	"""Extract NBI Clearance details"""
	try:
	data = request.json
	document_url = data.get('document_url')

	if not document_url:
	return jsonify({'error': 'Missing document_url'}), 400

	result = run_extraction_script('extract_nbi_ocr.py', document_url)
	return jsonify(result)

	except Exception as e:
	return jsonify({'success': False, 'error': str(e)}), 500

	@app.route('/api/extract-police-clearance', methods=['POST'])
	def api_extract_police_clearance():
	"""Extract Police Clearance details"""
	try:
	data = request.json
	document_url = data.get('document_url')

	if not document_url:
	return jsonify({'error': 'Missing document_url'}), 400

	result = run_extraction_script('extract_police_ocr.py', document_url)
	return jsonify(result)

	except Exception as e:
	return jsonify({'success': False, 'error': str(e)}), 500

	@app.route('/api/extract-tesda', methods=['POST'])
	def api_extract_tesda():
	"""Extract TESDA Certificate details"""
	try:
	data = request.json
	document_url = data.get('document_url')

	if not document_url:
	return jsonify({'error': 'Missing document_url'}), 400

	result = run_extraction_script('extract_tesda_ocr.py', document_url)
	return jsonify(result)

	except Exception as e:
	return jsonify({'success': False, 'error': str(e)}), 500

	# ============================================================================
	# DOCUMENT ANALYSIS ENDPOINT
	# ============================================================================

	@app.route('/api/analyze-document', methods=['POST'])
	def api_analyze_document():
	"""Analyze and identify document type"""
	try:
	data = request.json
	image_url = data.get('image_url')

	if not image_url:
	return jsonify({'error': 'Missing image_url'}), 400

	result = run_extraction_script('analyze_document.py', image_url)
	return jsonify(result)

	except Exception as e:
	return jsonify({'success': False, 'error': str(e)}), 500

	@app.route('/api/analyze-documents', methods=['POST'])
	def api_analyze_documents():
	"""Analyze multiple documents for tampering detection and metadata"""
	try:
	data = request.json
	image_urls = data.get('image_urls', [])

	if not image_urls:
	return jsonify({'error': 'Missing image_urls array'}), 400

	if len(image_urls) > 3:
	return jsonify({'error': 'Maximum 3 documents allowed'}), 400

	# Run analyze_documents.py with multiple URLs
	cmd = [sys.executable, 'analyze_documents.py'] + image_urls
	result = subprocess.run(
	cmd,
	capture_output=True,
	text=True,
	timeout=300,
	cwd=os.getcwd()
	)

	if result.returncode != 0:
	return jsonify({
	'success': False,
	'error': f'Script failed with return code {result.returncode}',
	'stderr': result.stderr
	})

	# Parse JSON output
	try:
	output_str = result.stdout.strip()
	return jsonify(json.loads(output_str))
	except Exception as e:
	return jsonify({
	'success': False,
	'error': 'Invalid JSON output from script',
	'raw_output': result.stdout[:500]
	})

	except Exception as e:
	return jsonify({'success': False, 'error': str(e)}), 500

	# ============================================================================
	# UTILITY ENDPOINTS
	# ============================================================================

	@app.route('/health', methods=['GET'])
	def health_check():
	"""Health check endpoint"""
	return jsonify({
	'status': 'healthy',
	'service': 'handyhome-ocr-api',
	'version': '1.0.0'
	})

	@app.route('/', methods=['GET'])
	def index():
	"""API documentation endpoint"""
	return jsonify({
	'service': 'HandyHome OCR Extraction API',
	'version': '1.0.0',
	'description': 'Philippine ID and Document OCR Extraction using PaddleOCR',
	'endpoints': {
	'Philippine IDs': {
	'POST /api/extract-national-id': {
	'description': 'Extract Philippine National ID details',
	'fields': ['id_number', 'full_name', 'birth_date']
	},
	'POST /api/extract-drivers-license': {
	'description': 'Extract Driver\'s License details',
	'fields': ['license_number', 'full_name', 'birth_date', 'address']
	},
	'POST /api/extract-prc': {
	'description': 'Extract PRC ID details',
	'fields': ['prc_number', 'full_name', 'profession', 'valid_until']
	},
	'POST /api/extract-umid': {
	'description': 'Extract UMID details',
	'fields': ['crn', 'full_name', 'birth_date']
	},
	'POST /api/extract-sss': {
	'description': 'Extract SSS ID details',
	'fields': ['sss_number', 'full_name', 'birth_date']
	},
	'POST /api/extract-passport': {
	'description': 'Extract Philippine Passport details',
	'fields': ['passport_number', 'surname', 'given_names', 'birth_date']
	},
	'POST /api/extract-postal': {
	'description': 'Extract Postal ID details',
	'fields': ['prn', 'full_name', 'address', 'birth_date']
	},
	'POST /api/extract-phic': {
	'description': 'Extract PhilHealth ID details',
	'fields': ['id_number', 'full_name', 'birth_date', 'sex', 'address']
	}
	},
	'Clearances & Certificates': {
	'POST /api/extract-nbi': {
	'description': 'Extract NBI Clearance details',
	'fields': ['id_number', 'full_name', 'birth_date']
	},
	'POST /api/extract-police-clearance': {
	'description': 'Extract Police Clearance details',
	'fields': ['id_number', 'full_name', 'address', 'birth_date', 'status']
	},
	'POST /api/extract-tesda': {
	'description': 'Extract TESDA Certificate details',
	'fields': ['registry_number', 'full_name', 'qualification', 'date_issued']
	}
	},
	'Document Analysis': {
	'POST /api/analyze-document': {
	'description': 'Analyze and identify document type from image',
	'fields': ['document_type', 'confidence']
	},
	'POST /api/analyze-documents': {
	'description': 'Analyze multiple documents for tampering detection and metadata (max 3)',
	'fields': ['tampering_results', 'metadata_results'],
	'body': {'image_urls': 'array of image URLs (max 3)'}
	}
	},
	'Utility': {
	'GET /health': 'Health check endpoint',
	'GET /': 'This API documentation'
	}
	},
	'request_format': {
	'body': {
	'document_url': 'string (required) - URL of the document image to process'
	},
	'example': {
	'document_url': 'https://example.com/national_id.jpg'
	}
	},
	'response_format': {
	'success': 'boolean - Whether extraction was successful',
	'extracted_fields': 'object - Extracted data fields',
	'error': 'string - Error message if failed'
	}
	})

	@app.route('/api/routes', methods=['GET'])
	def list_routes():
	"""List all available API routes"""
	routes = []
	for rule in app.url_map.iter_rules():
	if rule.endpoint != 'static':
	methods = sorted(rule.methods - {'HEAD', 'OPTIONS'})
	routes.append({
	'endpoint': rule.endpoint,
	'methods': methods,
	'path': rule.rule
	})

	routes.sort(key=lambda x: x['path'])
	return jsonify({
	'total_routes': len(routes),
	'routes': routes
	})

	# Launch Flask app
	if __name__ == '__main__':
	port = int(os.environ.get('PORT', 7860))
	app.run(host='0.0.0.0', port=port, debug=False)