Spaces:

earthtoolsmaker
/

snowleopard-reid

Running

App Files Files Community

snowleopard-reid / app.py

achouffe

fix: cache issue for first example load

4d5b14b verified 23 days ago

raw

history blame contribute delete

66.1 kB

	"""Gradio web application for snow leopard identification and catalog exploration.

	This interactive web interface provides an easy-to-use frontend for the snow
	leopard identification system. Users can upload images, view matches against the catalog,
	and explore reference leopards through a browser-based UI powered by Gradio.

	Features:
	- Upload snow leopard images or select from examples
	- Run full identification pipeline with GDINO+SAM segmentation
	- View top-K matches with Wasserstein distance scores
	- Explore complete leopard catalog with thumbnails
	- Visualize matched keypoints between query and catalog images

	Usage:
	# Local testing with uv:
	uv sync
	uv run python app.py

	# Deployed on Hugging Face Spaces
	"""

	import sys
	from pathlib import Path

	# Add src to path for imports BEFORE importing snowleopard_reid
	SPACE_ROOT = Path(__file__).parent
	sys.path.insert(0, str(SPACE_ROOT / "src"))

	import logging
	import shutil
	import tempfile
	from dataclasses import dataclass

	import cv2
	import gradio as gr
	import numpy as np
	import torch
	import yaml
	from huggingface_hub import hf_hub_download
	from PIL import Image

	from snowleopard_reid.cache import (
	filter_cached_matches,
	generate_visualizations_from_npz,
	is_cached,
	load_cached_results,
	)
	from snowleopard_reid.catalog import (
	get_available_body_parts,
	get_available_locations,
	get_catalog_metadata_for_id,
	load_catalog_index,
	load_leopard_metadata,
	)
	from snowleopard_reid.data_setup import ensure_data_extracted
	from snowleopard_reid.pipeline.stages import (
	run_feature_extraction_stage,
	run_matching_stage,
	run_preprocess_stage,
	run_segmentation_stage,
	select_best_mask,
	)
	from snowleopard_reid.pipeline.stages.segmentation import (
	load_gdino_model,
	load_sam_predictor,
	)
	from snowleopard_reid.visualization import (
	draw_keypoints_overlay,
	draw_matched_keypoints,
	draw_side_by_side_comparison,
	)

	# Configure logging
	logging.basicConfig(
	level=logging.INFO,
	format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
	)
	logger = logging.getLogger(__name__)

	# Configuration (hardcoded for HF Spaces / local dev)
	CATALOG_ROOT = SPACE_ROOT / "data" / "catalog"
	SAM_CHECKPOINT_DIR = SPACE_ROOT / "data" / "models"
	SAM_CHECKPOINT_NAME = "sam_hq_vit_l.pth"
	EXAMPLES_DIR = SPACE_ROOT / "data" / "examples"
	GDINO_MODEL_ID = "IDEA-Research/grounding-dino-base"
	TEXT_PROMPT = "a snow leopard."
	TOP_K_DEFAULT = 5
	SAM_MODEL_TYPE = "vit_l"


	@dataclass
	class AppConfig:
	"""Configuration for the Snow Leopard ID UI application."""

	model_path: Path \| None
	catalog_root: Path
	examples_dir: Path
	top_k: int
	port: int
	share: bool
	# GDINO+SAM parameters
	sam_checkpoint_path: Path
	sam_model_type: str
	gdino_model_id: str
	text_prompt: str


	def ensure_sam_model() -> Path:
	"""Download SAM HQ model if not present.

	Returns:
	Path to the SAM HQ checkpoint file
	"""
	sam_path = SAM_CHECKPOINT_DIR / SAM_CHECKPOINT_NAME
	if not sam_path.exists():
	logger.info("Downloading SAM HQ model (1.6GB)...")
	SAM_CHECKPOINT_DIR.mkdir(parents=True, exist_ok=True)
	hf_hub_download(
	repo_id="lkeab/hq-sam",
	filename=SAM_CHECKPOINT_NAME,
	local_dir=SAM_CHECKPOINT_DIR,
	)
	logger.info("SAM HQ model downloaded successfully")
	return sam_path


	def get_available_extractors(catalog_root: Path) -> list[str]:
	"""Get list of available feature extractors from catalog.

	Args:
	catalog_root: Root directory of the leopard catalog

	Returns:
	List of available extractor names (e.g., ['sift', 'superpoint'])
	"""
	try:
	catalog_index = load_catalog_index(catalog_root)
	extractors = list(catalog_index.get("feature_extractors", {}).keys())
	if not extractors:
	logger.warning(f"No extractors found in catalog at {catalog_root}")
	return ["sift"] # Default fallback
	return extractors
	except Exception as e:
	logger.error(f"Failed to load catalog index: {e}")
	return ["sift"] # Default fallback


	# Global state for models and catalog (loaded at startup)
	LOADED_MODELS = {}


	def load_catalog_data(config: AppConfig):
	"""Load catalog index and individual leopard metadata.

	Args:
	config: Application configuration containing catalog_root

	Returns:
	Tuple of (catalog_index, individuals_data)
	"""
	catalog_index_path = config.catalog_root / "catalog_index.yaml"

	# Load catalog index
	with open(catalog_index_path) as f:
	catalog_index = yaml.safe_load(f)

	# Load metadata for each individual
	individuals_data = []
	for individual in catalog_index["individuals"]:
	metadata_path = config.catalog_root / individual["metadata_path"]
	with open(metadata_path) as f:
	leopard_metadata = yaml.safe_load(f)
	individuals_data.append(leopard_metadata)

	return catalog_index, individuals_data


	def initialize_models(config: AppConfig):
	"""Load models at startup for faster inference.

	Args:
	config: Application configuration containing model paths
	"""
	logger.info("Initializing models...")

	# Check for GPU
	device = "cuda" if torch.cuda.is_available() else "cpu"
	logger.info(f"Using device: {device}")

	if device == "cuda":
	gpu_name = torch.cuda.get_device_name(0)
	gpu_memory = torch.cuda.get_device_properties(0).total_memory / (1024**3)
	logger.info(f"GPU: {gpu_name} ({gpu_memory:.1f} GB)")

	# Load Grounding DINO model
	logger.info(f"Loading Grounding DINO model: {config.gdino_model_id}")
	gdino_processor, gdino_model = load_gdino_model(
	model_id=config.gdino_model_id,
	device=device,
	)
	LOADED_MODELS["gdino_processor"] = gdino_processor
	LOADED_MODELS["gdino_model"] = gdino_model
	logger.info("Grounding DINO model loaded successfully")

	# Load SAM HQ model
	logger.info(
	f"Loading SAM HQ model from {config.sam_checkpoint_path} (type: {config.sam_model_type})"
	)
	sam_predictor = load_sam_predictor(
	checkpoint_path=config.sam_checkpoint_path,
	model_type=config.sam_model_type,
	device=device,
	)
	LOADED_MODELS["sam_predictor"] = sam_predictor
	logger.info("SAM HQ model loaded successfully")

	# Store device info and catalog root for callbacks
	LOADED_MODELS["device"] = device
	LOADED_MODELS["catalog_root"] = config.catalog_root
	LOADED_MODELS["text_prompt"] = config.text_prompt

	logger.info("Models initialized successfully")


	def _load_from_cache(
	example_path: str,
	extractor: str,
	config: "AppConfig",
	filter_locations: list[str] \| None = None,
	filter_body_parts: list[str] \| None = None,
	top_k: int = 5,
	):
	"""Load cached pipeline results with optional filtering and return UI component updates.

	Supports the v2.0 cache format which stores ALL matches with location/body_part
	metadata, enabling client-side filtering without re-running the pipeline.

	Args:
	example_path: Path to the example image
	extractor: Feature extractor name
	config: Application configuration
	filter_locations: Optional list of locations to filter by
	filter_body_parts: Optional list of body parts to filter by
	top_k: Number of top matches to return after filtering

	Returns:
	Tuple of 23 UI components matching run_identification output
	"""
	# Load cached results
	cached = load_cached_results(example_path, extractor)
	predictions = cached["predictions"]

	# Support both v1.0 ("matches") and v2.0 ("all_matches") cache formats
	if "all_matches" in predictions:
	all_matches = predictions["all_matches"]
	else:
	# Fallback for v1.0 cache format (no filtering support)
	all_matches = predictions.get("matches", [])

	# Filter and re-rank matches
	matches = filter_cached_matches(
	all_matches=all_matches,
	filter_locations=filter_locations,
	filter_body_parts=filter_body_parts,
	top_k=top_k,
	)

	if not matches:
	# No matches after filtering - return empty results
	return (
	"No matches found with the selected filters",
	cached["segmentation_image"],
	cached["cropped_image"],
	cached["keypoints_image"],
	[],
	gr.update(value=None),
	gr.update(value=None),
	gr.update(value=""),
	gr.update(value=""),
	gr.update(value=""),
	gr.update(value=""),
	gr.update(value=""),
	gr.update(value=""),
	gr.update(visible=False),
	gr.update(visible=False),
	gr.update(visible=False),
	gr.update(visible=False),
	gr.update(visible=False),
	gr.update(value=[]),
	gr.update(value=[]),
	gr.update(value=[]),
	gr.update(value=[]),
	gr.update(value=[]),
	)

	# Generate visualizations on-demand from NPZ data
	logger.info(f"Generating visualizations for {len(matches)} filtered matches...")
	match_visualizations, clean_comparison_visualizations = (
	generate_visualizations_from_npz(
	pairwise_dir=cached["pairwise_dir"],
	matches=matches,
	cropped_image_path=cached["pairwise_dir"].parent / "cropped.png",
	)
	)

	# Store in global state for match selection
	LOADED_MODELS["current_match_visualizations"] = match_visualizations
	LOADED_MODELS["current_clean_comparison_visualizations"] = (
	clean_comparison_visualizations
	)
	LOADED_MODELS["current_enriched_matches"] = matches
	LOADED_MODELS["current_filter_body_parts"] = filter_body_parts
	LOADED_MODELS["current_temp_dir"] = None # No temp dir for cached results

	# Top match info for result text
	top_match = matches[0]
	top_leopard_name = top_match["leopard_name"]
	top_wasserstein = top_match["wasserstein"]

	# Determine confidence level
	if top_wasserstein >= 0.12:
	confidence_indicator = "🔵" # Excellent
	elif top_wasserstein >= 0.07:
	confidence_indicator = "🟢" # Good
	elif top_wasserstein >= 0.04:
	confidence_indicator = "🟡" # Fair
	else:
	confidence_indicator = "🔴" # Uncertain

	result_text = f"## {confidence_indicator} {top_leopard_name.title()}"

	# Build dataset for top-K matches table
	dataset_samples = []
	for match in matches:
	rank = match["rank"]
	leopard_name = match["leopard_name"]
	wasserstein = match["wasserstein"]

	# Use location from cache (v2.0) or extract from path
	location = match.get("location", "unknown")
	if location == "unknown":
	catalog_id = match["catalog_id"]
	catalog_metadata = get_catalog_metadata_for_id(
	config.catalog_root, catalog_id
	)
	if catalog_metadata:
	img_path_parts = Path(catalog_metadata["image_path"]).parts
	try:
	db_idx = img_path_parts.index("database")
	if db_idx + 1 < len(img_path_parts):
	location = img_path_parts[db_idx + 1]
	except ValueError:
	pass

	# Confidence indicator
	if wasserstein >= 0.12:
	indicator = "🔵"
	elif wasserstein >= 0.07:
	indicator = "🟢"
	elif wasserstein >= 0.04:
	indicator = "🟡"
	else:
	indicator = "🔴"

	dataset_samples.append(
	[
	rank,
	indicator,
	leopard_name.title(),
	location.replace("_", " ").title(),
	f"{wasserstein:.4f}",
	]
	)

	# Load rank 1 details
	rank1_details = load_match_details_for_rank(rank=1)

	# Return all 23 outputs
	return (
	result_text, # 1. Top match result text
	cached["segmentation_image"], # 2. Segmentation overlay
	cached["cropped_image"], # 3. Cropped leopard
	cached["keypoints_image"], # 4. Extracted keypoints
	dataset_samples, # 5. Matches table data
	*rank1_details, # 6-23. visualizations, header, indicators, galleries
	)


	def run_identification(
	image,
	extractor: str,
	top_k: int,
	selected_locations: list[str],
	selected_body_parts: list[str],
	example_path: str \| None,
	config: AppConfig,
	):
	"""Run snow leopard identification pipeline on uploaded image.

	Args:
	image: PIL Image from Gradio upload
	extractor: Feature extractor to use ('sift', 'superpoint', 'disk', 'aliked')
	top_k: Number of top matches to return
	selected_locations: List of selected locations (includes "all" for no filtering)
	selected_body_parts: List of selected body parts (includes "all" for no filtering)
	example_path: Path to example image if selected from examples (for cache lookup)
	config: Application configuration

	Returns:
	Tuple of UI components to update
	"""
	if image is None:
	# Return 23 empty outputs (5 pipeline + 18 rank 1 details)
	return (
	"Please upload an image first", # 1. result_text
	None, # 2. seg_viz
	None, # 3. cropped_image
	None, # 4. extracted_kpts_viz
	[], # 5. dataset_samples
	gr.update(value=None), # 6. matched_kpts_viz
	gr.update(value=None), # 7. clean_comparison_viz
	gr.update(value=""), # 8. header
	gr.update(value=""), # 9. head indicator
	gr.update(value=""), # 10. left_flank indicator
	gr.update(value=""), # 11. right_flank indicator
	gr.update(value=""), # 12. tail indicator
	gr.update(value=""), # 13. misc indicator
	gr.update(visible=False), # 14. head empty message
	gr.update(visible=False), # 15. left_flank empty message
	gr.update(visible=False), # 16. right_flank empty message
	gr.update(visible=False), # 17. tail empty message
	gr.update(visible=False), # 18. misc empty message
	gr.update(value=[]), # 19. head gallery
	gr.update(value=[]), # 20. left_flank gallery
	gr.update(value=[]), # 21. right_flank gallery
	gr.update(value=[]), # 22. tail gallery
	gr.update(value=[]), # 23. misc gallery
	)

	# Convert filter selections to None if "all" is selected
	filter_locations = (
	None
	if not selected_locations or "all" in selected_locations
	else selected_locations
	)
	filter_body_parts_parsed = (
	None
	if not selected_body_parts or "all" in selected_body_parts
	else selected_body_parts
	)

	# Debug logging for cache check
	logger.info(f"Cache check: example_path={example_path}, extractor={extractor}")
	if example_path:
	cache_exists = is_cached(example_path, extractor)
	logger.info(f"is_cached() returned: {cache_exists}")
	else:
	cache_exists = False
	logger.info("No example_path provided, skipping cache")

	# Check cache for example images (v2.0 cache supports filtering)
	if example_path and cache_exists:
	logger.info(f"Cache hit for {example_path} with {extractor}")
	if filter_locations or filter_body_parts_parsed:
	logger.info(
	f" Applying filters: locations={filter_locations}, body_parts={filter_body_parts_parsed}"
	)
	try:
	return _load_from_cache(
	example_path,
	extractor,
	config,
	filter_locations=filter_locations,
	filter_body_parts=filter_body_parts_parsed,
	top_k=int(top_k),
	)
	except Exception as e:
	logger.warning(f"Cache load failed, running pipeline: {e}")
	# Fall through to run full pipeline

	# Use the already-parsed filter values for the pipeline
	filter_body_parts = filter_body_parts_parsed

	# Log applied filters
	if filter_locations or filter_body_parts:
	filter_desc = []
	if filter_locations:
	filter_desc.append(f"locations: {', '.join(filter_locations)}")
	if filter_body_parts:
	filter_desc.append(f"body parts: {', '.join(filter_body_parts)}")
	logger.info(f"Applied filters - {' \| '.join(filter_desc)}")
	else:
	logger.info("No filters applied - matching against entire catalog")

	try:
	# Create temporary directory for this query
	temp_dir = Path(tempfile.mkdtemp(prefix="snowleopard_id_"))
	temp_image_path = temp_dir / "query.jpg"

	# Save uploaded image
	logger.info(f"Image type: {type(image)}")
	logger.info(f"Image mode: {image.mode if hasattr(image, 'mode') else 'N/A'}")
	logger.info(f"Image size: {image.size if hasattr(image, 'size') else 'N/A'}")
	image.save(temp_image_path, quality=95)

	# Verify saved image
	saved_size = temp_image_path.stat().st_size
	logger.info(f"Saved image size: {saved_size / 1024 / 1024:.2f} MB")

	logger.info(f"Processing query image: {temp_image_path}")

	device = LOADED_MODELS.get("device", "cpu")

	# Step 1: Run GDINO+SAM segmentation using pre-loaded models
	logger.info("Running GDINO+SAM segmentation...")
	gdino_processor = LOADED_MODELS.get("gdino_processor")
	gdino_model = LOADED_MODELS.get("gdino_model")
	sam_predictor = LOADED_MODELS.get("sam_predictor")
	text_prompt = LOADED_MODELS.get("text_prompt", "a snow leopard.")

	seg_stage = run_segmentation_stage(
	image_path=temp_image_path,
	strategy="gdino_sam",
	confidence_threshold=0.2,
	device=device,
	gdino_processor=gdino_processor,
	gdino_model=gdino_model,
	sam_predictor=sam_predictor,
	text_prompt=text_prompt,
	box_threshold=0.30,
	text_threshold=0.20,
	)

	predictions = seg_stage["data"]["predictions"]
	logger.info(f"Number of predictions: {len(predictions)}")

	if not predictions:
	logger.warning("No predictions found from segmentation")
	logger.warning(f"Full segmentation stage: {seg_stage}")
	# Return 23 empty outputs (5 pipeline + 18 rank 1 details)
	return (
	"No snow leopards detected in image", # 1. result_text
	None, # 2. seg_viz
	None, # 3. cropped_image
	None, # 4. extracted_kpts_viz
	[], # 5. dataset_samples
	gr.update(value=None), # 6. matched_kpts_viz
	gr.update(value=None), # 7. clean_comparison_viz
	gr.update(value=""), # 8. header
	gr.update(value=""), # 9. head indicator
	gr.update(value=""), # 10. left_flank indicator
	gr.update(value=""), # 11. right_flank indicator
	gr.update(value=""), # 12. tail indicator
	gr.update(value=""), # 13. misc indicator
	gr.update(visible=False), # 14. head empty message
	gr.update(visible=False), # 15. left_flank empty message
	gr.update(visible=False), # 16. right_flank empty message
	gr.update(visible=False), # 17. tail empty message
	gr.update(visible=False), # 18. misc empty message
	gr.update(value=[]), # 19. head gallery
	gr.update(value=[]), # 20. left_flank gallery
	gr.update(value=[]), # 21. right_flank gallery
	gr.update(value=[]), # 22. tail gallery
	gr.update(value=[]), # 23. misc gallery
	)

	# Step 2: Select best mask
	logger.info("Selecting best mask...")
	selected_idx, selected_pred = select_best_mask(
	predictions,
	strategy="confidence_area",
	)

	# Step 3: Preprocess (crop and mask)
	logger.info("Preprocessing query image...")
	prep_stage = run_preprocess_stage(
	image_path=temp_image_path,
	mask=selected_pred["mask"],
	padding=5,
	)

	cropped_image_pil = prep_stage["data"]["cropped_image"]

	# Save cropped image for visualization later
	cropped_path = temp_dir / "cropped.jpg"
	cropped_image_pil.save(cropped_path)

	# Step 4: Extract features
	logger.info(f"Extracting features using {extractor.upper()}...")
	feat_stage = run_feature_extraction_stage(
	image=cropped_image_pil,
	extractor=extractor,
	max_keypoints=2048,
	device=device,
	)

	query_features = feat_stage["data"]["features"]

	# Step 5: Match against catalog
	logger.info("Matching against catalog...")
	pairwise_dir = temp_dir / "pairwise"
	pairwise_dir.mkdir(exist_ok=True)

	match_stage = run_matching_stage(
	query_features=query_features,
	catalog_path=config.catalog_root,
	top_k=top_k,
	extractor=extractor,
	device=device,
	query_image_path=str(cropped_path),
	pairwise_output_dir=pairwise_dir,
	filter_locations=filter_locations,
	filter_body_parts=filter_body_parts,
	)

	matches = match_stage["data"]["matches"]

	if not matches:
	# Return 23 empty outputs (5 pipeline + 18 rank 1 details)
	return (
	"No matches found in catalog", # 1. result_text
	None, # 2. seg_viz
	cropped_image_pil, # 3. cropped_image
	None, # 4. extracted_kpts_viz
	[], # 5. dataset_samples
	gr.update(value=None), # 6. matched_kpts_viz
	gr.update(value=None), # 7. clean_comparison_viz
	gr.update(value=""), # 8. header
	gr.update(value=""), # 9. head indicator
	gr.update(value=""), # 10. left_flank indicator
	gr.update(value=""), # 11. right_flank indicator
	gr.update(value=""), # 12. tail indicator
	gr.update(value=""), # 13. misc indicator
	gr.update(visible=False), # 14. head empty message
	gr.update(visible=False), # 15. left_flank empty message
	gr.update(visible=False), # 16. right_flank empty message
	gr.update(visible=False), # 17. tail empty message
	gr.update(visible=False), # 18. misc empty message
	gr.update(value=[]), # 19. head gallery
	gr.update(value=[]), # 20. left_flank gallery
	gr.update(value=[]), # 21. right_flank gallery
	gr.update(value=[]), # 22. tail gallery
	gr.update(value=[]), # 23. misc gallery
	)

	# Top match
	top_match = matches[0]
	top_leopard_name = top_match["leopard_name"]
	top_wasserstein = top_match["wasserstein"]

	# Determine confidence level (higher Wasserstein = better match)
	if top_wasserstein >= 0.12:
	confidence_indicator = "🔵" # Excellent
	elif top_wasserstein >= 0.07:
	confidence_indicator = "🟢" # Good
	elif top_wasserstein >= 0.04:
	confidence_indicator = "🟡" # Fair
	else:
	confidence_indicator = "🔴" # Uncertain

	result_text = f"## {confidence_indicator} {top_leopard_name.title()}"

	# Create segmentation visualization
	seg_viz = create_segmentation_viz(
	image_path=temp_image_path, mask=selected_pred["mask"]
	)

	# Generate extracted keypoints visualization
	extracted_kpts_viz = None
	try:
	# Extract keypoints from query features for visualization
	query_kpts = query_features["keypoints"].cpu().numpy()
	extracted_kpts_viz = draw_keypoints_overlay(
	image_path=cropped_path,
	keypoints=query_kpts,
	max_keypoints=500,
	color="blue",
	ps=10,
	)
	except Exception as e:
	logger.error(f"Error creating extracted keypoints visualization: {e}")

	# Build dataset for top-K matches table
	dataset_samples = []
	match_visualizations = {}
	clean_comparison_visualizations = {}

	for match in matches:
	rank = match["rank"]
	leopard_name = match["leopard_name"]
	wasserstein = match["wasserstein"]
	catalog_img_path = Path(match["filepath"])

	# Get location from catalog metadata
	catalog_id = match["catalog_id"]
	catalog_metadata = get_catalog_metadata_for_id(
	config.catalog_root, catalog_id
	)
	location = "unknown"
	if catalog_metadata:
	# Extract location from path: database/{location}/{individual}/...
	img_path_parts = Path(catalog_metadata["image_path"]).parts
	if len(img_path_parts) >= 3:
	# Find 'database' in path and get next part
	try:
	db_idx = img_path_parts.index("database")
	if db_idx + 1 < len(img_path_parts):
	location = img_path_parts[db_idx + 1]
	except ValueError:
	pass

	# Confidence indicator (higher Wasserstein = better match)
	if wasserstein >= 0.12:
	indicator = "🔵" # Excellent
	elif wasserstein >= 0.07:
	indicator = "🟢" # Good
	elif wasserstein >= 0.04:
	indicator = "🟡" # Fair
	else:
	indicator = "🔴" # Uncertain

	# Create visualizations for this match
	npz_path = pairwise_dir / f"rank_{rank:02d}_{match['catalog_id']}.npz"
	if npz_path.exists():
	try:
	pairwise_data = np.load(npz_path)

	# Create matched keypoints visualization
	match_viz = draw_matched_keypoints(
	query_image_path=cropped_path,
	catalog_image_path=catalog_img_path,
	query_keypoints=pairwise_data["query_keypoints"],
	catalog_keypoints=pairwise_data["catalog_keypoints"],
	match_scores=pairwise_data["match_scores"],
	max_matches=100,
	)
	match_visualizations[rank] = match_viz

	# Create clean comparison visualization
	clean_viz = draw_side_by_side_comparison(
	query_image_path=cropped_path,
	catalog_image_path=catalog_img_path,
	)
	clean_comparison_visualizations[rank] = clean_viz
	except Exception as e:
	logger.error(f"Error creating visualizations for rank {rank}: {e}")

	# Format for table (as list, not dict)
	dataset_samples.append(
	[
	rank,
	indicator,
	leopard_name.title(),
	location.replace("_", " ").title(),
	f"{wasserstein:.4f}",
	]
	)

	# Store match visualizations, enriched matches, filters, and temp_dir in global state
	LOADED_MODELS["current_match_visualizations"] = match_visualizations
	LOADED_MODELS["current_clean_comparison_visualizations"] = (
	clean_comparison_visualizations
	)
	LOADED_MODELS["current_enriched_matches"] = matches
	LOADED_MODELS["current_filter_body_parts"] = filter_body_parts
	LOADED_MODELS["current_temp_dir"] = temp_dir

	# Automatically load rank 1 details (visualizations + galleries)
	rank1_details = load_match_details_for_rank(rank=1)

	# Return 23 outputs total:
	# - 5 pipeline outputs (result_text, seg_viz, cropped_image, extracted_kpts_viz, dataset_samples)
	# - 18 rank 1 details (from load_match_details_for_rank)
	return (
	result_text, # 1. Top match result text
	seg_viz, # 2. Segmentation overlay
	cropped_image_pil, # 3. Cropped leopard
	extracted_kpts_viz, # 4. Extracted keypoints
	dataset_samples, # 5. Matches table data
	# Unpack all 18 rank 1 details:
	*rank1_details, # 6-23. visualizations, header, indicators, galleries
	)

	except Exception as e:
	logger.error(f"Error processing image: {e}", exc_info=True)
	# Return 23 empty outputs (5 pipeline + 18 rank 1 details)
	return (
	f"Error processing image: {str(e)}", # 1. result_text
	None, # 2. seg_viz
	None, # 3. cropped_image
	None, # 4. extracted_kpts_viz
	[], # 5. dataset_samples
	gr.update(value=None), # 6. matched_kpts_viz
	gr.update(value=None), # 7. clean_comparison_viz
	gr.update(value=""), # 8. header
	gr.update(value=""), # 9. head indicator
	gr.update(value=""), # 10. left_flank indicator
	gr.update(value=""), # 11. right_flank indicator
	gr.update(value=""), # 12. tail indicator
	gr.update(value=""), # 13. misc indicator
	gr.update(visible=False), # 14. head empty message
	gr.update(visible=False), # 15. left_flank empty message
	gr.update(visible=False), # 16. right_flank empty message
	gr.update(visible=False), # 17. tail empty message
	gr.update(visible=False), # 18. misc empty message
	gr.update(value=[]), # 19. head gallery
	gr.update(value=[]), # 20. left_flank gallery
	gr.update(value=[]), # 21. right_flank gallery
	gr.update(value=[]), # 22. tail gallery
	gr.update(value=[]), # 23. misc gallery
	)


	def create_segmentation_viz(image_path, mask):
	"""Create visualization of segmentation mask overlaid on image."""
	# Load original image
	img = cv2.imread(str(image_path))
	img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

	# Resize mask to match image dimensions if needed
	if mask.shape[:2] != img_rgb.shape[:2]:
	mask_resized = cv2.resize(
	mask.astype(np.uint8),
	(img_rgb.shape[1], img_rgb.shape[0]),
	interpolation=cv2.INTER_NEAREST,
	)
	else:
	mask_resized = mask

	# Create colored overlay
	overlay = img_rgb.copy()
	overlay[mask_resized > 0] = [255, 0, 0] # Red for masked region

	# Blend
	alpha = 0.4
	blended = cv2.addWeighted(img_rgb, 1 - alpha, overlay, alpha, 0)

	return Image.fromarray(blended)


	def load_match_details_for_rank(rank: int) -> tuple:
	"""Load all match details (visualizations + galleries) for a specific rank.

	This is a reusable helper function that encapsulates the logic for loading
	match visualizations, galleries, and metadata for a given rank. Used by both
	the automatic rank 1 display after pipeline completion and the interactive
	row selection handler.

	Args:
	rank: The rank to load (1-indexed)

	Returns:
	Tuple of 18 Gradio component updates:
	(matched_kpts_viz, clean_comparison_viz, header,
	head_indicator, left_flank_indicator, right_flank_indicator, tail_indicator, misc_indicator,
	head_empty_message, left_flank_empty_message, right_flank_empty_message,
	tail_empty_message, misc_empty_message,
	gallery_head, gallery_left_flank, gallery_right_flank, gallery_tail, gallery_misc)
	"""
	# Get stored data from global state
	match_visualizations = LOADED_MODELS.get("current_match_visualizations", {})
	clean_comparison_visualizations = LOADED_MODELS.get(
	"current_clean_comparison_visualizations", {}
	)
	enriched_matches = LOADED_MODELS.get("current_enriched_matches", [])
	filter_body_parts = LOADED_MODELS.get("current_filter_body_parts")
	catalog_root = LOADED_MODELS.get("catalog_root")

	# Find the match for the requested rank
	selected_match = None
	for match in enriched_matches:
	if match["rank"] == rank:
	selected_match = match
	break

	if not selected_match or rank not in match_visualizations:
	# Return empty updates for all 18 outputs
	return (
	gr.update(value=None), # 1. matched_kpts_viz
	gr.update(value=None), # 2. clean_comparison_viz
	gr.update(value=""), # 3. header
	gr.update(value=""), # 4. head indicator
	gr.update(value=""), # 5. left_flank indicator
	gr.update(value=""), # 6. right_flank indicator
	gr.update(value=""), # 7. tail indicator
	gr.update(value=""), # 8. misc indicator
	gr.update(visible=False), # 9. head empty message
	gr.update(visible=False), # 10. left_flank empty message
	gr.update(visible=False), # 11. right_flank empty message
	gr.update(visible=False), # 12. tail empty message
	gr.update(visible=False), # 13. misc empty message
	gr.update(value=[]), # 14. head gallery
	gr.update(value=[]), # 15. left_flank gallery
	gr.update(value=[]), # 16. right_flank gallery
	gr.update(value=[]), # 17. tail gallery
	gr.update(value=[]), # 18. misc gallery
	)

	# Get both visualizations
	match_viz = match_visualizations[rank]
	clean_viz = clean_comparison_visualizations.get(rank)

	# Create dynamic header with leopard name
	leopard_name = selected_match["leopard_name"]
	header_text = f"## Reference Images for {leopard_name.title()}"

	# Load galleries organized by body part
	galleries = {}
	if catalog_root:
	try:
	# Extract location from match filepath
	location = None
	filepath = Path(selected_match["filepath"])
	parts = filepath.parts
	if "database" in parts:
	db_idx = parts.index("database")
	if db_idx + 1 < len(parts):
	location = parts[db_idx + 1]

	galleries = load_matched_individual_gallery_by_body_part(
	catalog_root=catalog_root,
	leopard_name=leopard_name,
	location=location,
	)
	except Exception as e:
	logger.error(f"Error loading gallery for {leopard_name}: {e}")
	# Initialize empty galleries on error
	galleries = {
	"head": [],
	"left_flank": [],
	"right_flank": [],
	"tail": [],
	"misc": [],
	}

	# Create emoji indicators for filtered body parts
	def get_indicator(body_part: str) -> str:
	"""Return star if body part was in filter, empty string otherwise."""
	if filter_body_parts and body_part in filter_body_parts:
	return "* (filtered)"
	return ""

	# Helper to determine if empty message should be visible
	def is_empty(body_part: str) -> bool:
	"""Return True if no images for this body part."""
	return len(galleries.get(body_part, [])) == 0

	return (
	gr.update(value=match_viz), # 1. matched_kpts_viz
	gr.update(value=clean_viz), # 2. clean_comparison_viz
	gr.update(value=header_text), # 3. header
	gr.update(value=get_indicator("head")), # 4. head indicator
	gr.update(value=get_indicator("left_flank")), # 5. left_flank indicator
	gr.update(value=get_indicator("right_flank")), # 6. right_flank indicator
	gr.update(value=get_indicator("tail")), # 7. tail indicator
	gr.update(value=get_indicator("misc")), # 8. misc indicator
	gr.update(visible=is_empty("head")), # 9. head empty message
	gr.update(visible=is_empty("left_flank")), # 10. left_flank empty message
	gr.update(visible=is_empty("right_flank")), # 11. right_flank empty message
	gr.update(visible=is_empty("tail")), # 12. tail empty message
	gr.update(visible=is_empty("misc")), # 13. misc empty message
	gr.update(
	value=galleries.get("head", []), visible=not is_empty("head")
	), # 14. head gallery
	gr.update(
	value=galleries.get("left_flank", []), visible=not is_empty("left_flank")
	), # 15. left_flank gallery
	gr.update(
	value=galleries.get("right_flank", []), visible=not is_empty("right_flank")
	), # 16. right_flank gallery
	gr.update(
	value=galleries.get("tail", []), visible=not is_empty("tail")
	), # 17. tail gallery
	gr.update(
	value=galleries.get("misc", []), visible=not is_empty("misc")
	), # 18. misc gallery
	)


	def on_match_selected(evt: gr.SelectData):
	"""Handle selection of a match from the dataset table.

	Returns both visualizations, header, indicators, empty messages,
	and galleries organized by body part.
	"""
	# evt.index is [row, col] for Dataframe, we want row
	if isinstance(evt.index, (list, tuple)):
	selected_row = evt.index[0]
	else:
	selected_row = evt.index

	selected_rank = selected_row + 1 # Ranks are 1-indexed

	# Delegate to the reusable helper function
	return load_match_details_for_rank(selected_rank)


	def load_matched_individual_gallery_by_body_part(
	catalog_root: Path,
	leopard_name: str,
	location: str \| None = None,
	) -> dict[str, list[tuple]]:
	"""Load all images for a matched individual organized by body part.

	Args:
	catalog_root: Path to catalog root directory
	leopard_name: Name of the matched individual (e.g., "karindas")
	location: Geographic location (e.g., "skycrest_valley")

	Returns:
	Dict mapping body part to list of (PIL.Image, caption) tuples:
	{
	"head": [(img1, caption1), (img2, caption2), ...],
	"left_flank": [...],
	"right_flank": [...],
	"tail": [...],
	"misc": [...]
	}
	"""
	# Initialize dict with all body parts
	galleries = {
	"head": [],
	"left_flank": [],
	"right_flank": [],
	"tail": [],
	"misc": [],
	}

	# Find metadata path: database/{location}/{individual}/metadata.yaml
	if location:
	metadata_path = (
	catalog_root / "database" / location / leopard_name / "metadata.yaml"
	)
	else:
	# Try to find the individual in any location
	metadata_path = None
	database_dir = catalog_root / "database"
	if database_dir.exists():
	for loc_dir in database_dir.iterdir():
	if loc_dir.is_dir():
	potential_path = loc_dir / leopard_name / "metadata.yaml"
	if potential_path.exists():
	metadata_path = potential_path
	break

	if not metadata_path or not metadata_path.exists():
	logger.warning(f"Metadata not found for {leopard_name}")
	return galleries

	try:
	metadata = load_leopard_metadata(metadata_path)

	# Load all images organized by body part
	for img_entry in metadata["reference_images"]:
	body_part = img_entry.get("body_part", "misc")

	# Normalize body_part to match our keys
	if body_part not in galleries:
	body_part = "misc" # Default to misc if unknown

	# Load image
	img_path = catalog_root / "database" / img_entry["path"]

	try:
	img = Image.open(img_path)
	# Simple caption: just body part name
	caption = body_part
	galleries[body_part].append((img, caption))
	except Exception as e:
	logger.error(f"Error loading image {img_path}: {e}")

	except Exception as e:
	logger.error(f"Error loading metadata for {leopard_name}: {e}")

	return galleries


	def cleanup_temp_files():
	"""Clean up temporary files from previous run."""
	temp_dir = LOADED_MODELS.get("current_temp_dir")
	if temp_dir and temp_dir.exists():
	try:
	shutil.rmtree(temp_dir)
	logger.info(f"Cleaned up temporary directory: {temp_dir}")
	except Exception as e:
	logger.warning(f"Error cleaning up temp directory: {e}")


	def create_leopard_tab(leopard_metadata, config: AppConfig):
	"""Create a tab for displaying a single leopard's images.

	Args:
	leopard_metadata: Metadata dictionary for the leopard individual
	config: Application configuration
	"""
	# Support both 'leopard_name' and 'individual_name' keys
	leopard_name = leopard_metadata.get("leopard_name") or leopard_metadata.get(
	"individual_name"
	)
	location = leopard_metadata.get("location", "unknown")
	total_images = leopard_metadata["statistics"]["total_reference_images"]

	# Get body parts from statistics
	body_parts = leopard_metadata["statistics"].get(
	"body_parts_represented", leopard_metadata["statistics"].get("body_parts", [])
	)
	body_parts_str = ", ".join(body_parts) if body_parts else "N/A"

	with gr.Tab(f"{leopard_name}"):
	# Header with statistics
	gr.Markdown(
	f"### {leopard_name.title()}\n"
	f"Location: {location.replace('_', ' ').title()} \| "
	f"{total_images} images \| "
	f"Body parts: {body_parts_str}"
	)

	# Load all images with body_part captions
	gallery_data = []
	for img_entry in leopard_metadata["reference_images"]:
	img_path = config.catalog_root / "database" / img_entry["path"]
	body_part = img_entry.get("body_part", "unknown")
	try:
	img = Image.open(img_path)
	# Caption format: just body_part (location is already in tab)
	caption = body_part
	gallery_data.append((img, caption))
	except Exception as e:
	logger.error(f"Error loading image {img_path}: {e}")

	# Display gallery
	gr.Gallery(
	value=gallery_data,
	label=f"Reference Images for {leopard_name.title()}",
	columns=6,
	height=700,
	object_fit="scale-down",
	allow_preview=True,
	)


	def create_app(config: AppConfig):
	"""Create and configure the Gradio application.

	Args:
	config: Application configuration
	"""
	# Initialize models at startup
	initialize_models(config)

	# Load catalog data
	catalog_index, individuals_data = load_catalog_data(config)

	# Build example images list from examples directory
	example_images = (
	list(config.examples_dir.glob("*.jpg"))
	+ list(config.examples_dir.glob("*.JPG"))
	+ list(config.examples_dir.glob("*.png"))
	)
	# Sort with Ayima images last
	example_images.sort(key=lambda x: (1 if "Ayima" in x.name else 0, x.name))

	# Create interface
	with gr.Blocks(title="Snow Leopard Identification") as app:
	# Hidden state to track which example image was selected (for cache lookup)
	selected_example_state = gr.State(value=None)

	# Main tabs
	with gr.Tabs():
	# Tab 1: Identify Snow Leopard
	with gr.Tab("Identify Snow Leopard"):
	gr.Markdown("""
	Upload a snow leopard image or select an example to identify which individual it is.
	The system will detect the leopard, extract distinctive features, and match against the catalog.
	""")

	with gr.Row():
	# Left column: Input
	with gr.Column(scale=1):
	image_input = gr.Image(
	type="pil",
	label="Upload Snow Leopard Image",
	sources=["upload", "clipboard"],
	)

	examples_component = gr.Examples(
	examples=[[str(img)] for img in example_images],
	inputs=image_input,
	label="Example Images",
	)

	# Track example selection for cache lookup
	def on_example_select(evt: gr.SelectData):
	"""Update state when an example is selected."""
	if evt.index is not None:
	return str(example_images[evt.index])
	return None

	# When image changes, check if it matches an example
	def check_if_example(img):
	"""Check if uploaded image matches an example path."""
	# When user uploads a new image, clear the example state
	# Examples component handles setting state via select event
	return gr.update() # No change to state on image change

	examples_component.dataset.select(
	fn=on_example_select,
	outputs=[selected_example_state],
	)

	# Clear example state when user uploads a new image
	image_input.upload(
	fn=lambda: None,
	outputs=[selected_example_state],
	)

	# Location filter dropdown
	available_locations = get_available_locations(
	config.catalog_root
	)
	location_filter = gr.Dropdown(
	choices=available_locations,
	value=["all"],
	multiselect=True,
	label="Filter by Location",
	info="Select locations to search (default: all locations)",
	)

	# Body part filter dropdown
	available_body_parts = get_available_body_parts(
	config.catalog_root
	)
	body_part_filter = gr.Dropdown(
	choices=available_body_parts,
	value=["all"],
	multiselect=True,
	label="Filter by Body Part",
	info="Select body parts to match (default: all body parts)",
	)

	# Advanced Configuration Accordion
	with gr.Accordion("Advanced Configuration", open=False):
	# Feature extractor dropdown
	available_extractors = get_available_extractors(
	config.catalog_root
	)
	extractor_dropdown = gr.Dropdown(
	choices=available_extractors,
	value="sift"
	if "sift" in available_extractors
	else (
	available_extractors[0]
	if available_extractors
	else "sift"
	),
	label="Feature Extractor",
	info=f"Available: {', '.join(available_extractors)}",
	scale=1,
	)

	# Top-K parameter
	top_k_input = gr.Number(
	value=config.top_k,
	label="Top-K Matches",
	info="Number of top matches to return",
	minimum=1,
	maximum=20,
	step=1,
	precision=0,
	scale=1,
	)

	submit_btn = gr.Button(
	value="Identify Snow Leopard",
	variant="primary",
	size="lg",
	)

	# Right column: Results
	with gr.Column(scale=4):
	# Top-1 prediction
	result_text = gr.Markdown("")

	# Tabs for different result views
	with gr.Tabs():
	with gr.Tab("Model Internals"):
	gr.Markdown("""
	View the internal processing steps: segmentation mask, cropped leopard, and extracted keypoints.
	""")
	with gr.Row():
	seg_viz = gr.Image(
	label="Segmentation Overlay",
	type="pil",
	)
	cropped_image = gr.Image(
	label="Extracted Snow Leopard",
	type="pil",
	)
	extracted_kpts_viz = gr.Image(
	label="Extracted Keypoints",
	type="pil",
	)

	with gr.Tab("Top Matches"):
	gr.Markdown("""
	Click a row to view detailed feature matching visualization and all reference images for that leopard.

	Higher Wasserstein distance = better match (typical range: 0.04-0.27)

	Confidence Levels: 🔵 Excellent (>=0.12) \| 🟢 Good (>=0.07) \| 🟡 Fair (>=0.04) \| 🔴 Uncertain (<0.04)
	""")

	matches_dataset = gr.Dataframe(
	headers=[
	"Rank",
	"Confidence",
	"Leopard Name",
	"Location",
	"Wasserstein",
	],
	label="Top Matches",
	wrap=True,
	col_count=(5, "fixed"),
	)

	# Visualization container (always visible, images populated on pipeline completion)
	with gr.Column() as viz_tabs:
	# Tabbed visualization views
	with gr.Tabs():
	with gr.Tab("Matched Keypoints"):
	gr.Markdown(
	"Feature matching with keypoints and confidence-coded connecting lines. "
	"Green = high confidence, Yellow = medium, Red = low."
	)
	matched_kpts_viz = gr.Image(
	type="pil",
	show_label=False,
	)

	with gr.Tab("Clean Comparison"):
	gr.Markdown(
	"Side-by-side comparison without feature annotations. "
	"Useful for assessing overall visual similarity and spotting patterns."
	)
	clean_comparison_viz = gr.Image(
	type="pil",
	show_label=False,
	)

	# Dynamic header showing matched leopard name
	selected_match_header = gr.Markdown(
	"", visible=True
	)

	# Create tabs for each body part
	with gr.Tabs():
	with gr.Tab("Head"):
	head_indicator = gr.Markdown("")
	head_empty_message = gr.Markdown(
	value='<div style="text-align: center; padding: 60px 20px; color: #888;">'
	'<p style="font-size: 16px;">No reference images available for this body part</p>'
	"</div>",
	visible=False,
	)
	gallery_head = gr.Gallery(
	columns=6,
	height=400,
	object_fit="scale-down",
	allow_preview=True,
	)

	with gr.Tab("Left Flank"):
	left_flank_indicator = gr.Markdown("")
	left_flank_empty_message = gr.Markdown(
	value='<div style="text-align: center; padding: 60px 20px; color: #888;">'
	'<p style="font-size: 16px;">No reference images available for this body part</p>'
	"</div>",
	visible=False,
	)
	gallery_left_flank = gr.Gallery(
	columns=6,
	height=400,
	object_fit="scale-down",
	allow_preview=True,
	)

	with gr.Tab("Right Flank"):
	right_flank_indicator = gr.Markdown("")
	right_flank_empty_message = gr.Markdown(
	value='<div style="text-align: center; padding: 60px 20px; color: #888;">'
	'<p style="font-size: 16px;">No reference images available for this body part</p>'
	"</div>",
	visible=False,
	)
	gallery_right_flank = gr.Gallery(
	columns=6,
	height=400,
	object_fit="scale-down",
	allow_preview=True,
	)

	with gr.Tab("Tail"):
	tail_indicator = gr.Markdown("")
	tail_empty_message = gr.Markdown(
	value='<div style="text-align: center; padding: 60px 20px; color: #888;">'
	'<p style="font-size: 16px;">No reference images available for this body part</p>'
	"</div>",
	visible=False,
	)
	gallery_tail = gr.Gallery(
	columns=6,
	height=400,
	object_fit="scale-down",
	allow_preview=True,
	)

	with gr.Tab("Other"):
	misc_indicator = gr.Markdown("")
	misc_empty_message = gr.Markdown(
	value='<div style="text-align: center; padding: 60px 20px; color: #888;">'
	'<p style="font-size: 16px;">No reference images available for this body part</p>'
	"</div>",
	visible=False,
	)
	gallery_misc = gr.Gallery(
	columns=6,
	height=400,
	object_fit="scale-down",
	allow_preview=True,
	)

	# Connect submit button
	submit_btn.click(
	fn=lambda img, ext, top_k, locs, parts, ex_path: run_identification(
	image=img,
	extractor=ext,
	top_k=int(top_k),
	selected_locations=locs,
	selected_body_parts=parts,
	example_path=ex_path,
	config=config,
	),
	inputs=[
	image_input,
	extractor_dropdown,
	top_k_input,
	location_filter,
	body_part_filter,
	selected_example_state,
	],
	outputs=[
	# Pipeline outputs (5 total)
	result_text,
	seg_viz,
	cropped_image,
	extracted_kpts_viz,
	matches_dataset,
	# Rank 1 auto-display outputs (18 total)
	matched_kpts_viz,
	clean_comparison_viz,
	selected_match_header,
	head_indicator,
	left_flank_indicator,
	right_flank_indicator,
	tail_indicator,
	misc_indicator,
	head_empty_message,
	left_flank_empty_message,
	right_flank_empty_message,
	tail_empty_message,
	misc_empty_message,
	gallery_head,
	gallery_left_flank,
	gallery_right_flank,
	gallery_tail,
	gallery_misc,
	],
	)

	# Connect dataset selection
	matches_dataset.select(
	fn=on_match_selected,
	outputs=[
	matched_kpts_viz,
	clean_comparison_viz,
	selected_match_header,
	head_indicator,
	left_flank_indicator,
	right_flank_indicator,
	tail_indicator,
	misc_indicator,
	head_empty_message,
	left_flank_empty_message,
	right_flank_empty_message,
	tail_empty_message,
	misc_empty_message,
	gallery_head,
	gallery_left_flank,
	gallery_right_flank,
	gallery_tail,
	gallery_misc,
	],
	)

	# Tab 2: Explore Catalog
	with gr.Tab("Explore Catalog"):
	gr.Markdown(
	"""
	## Snow Leopard Catalog Browser

	Browse the reference catalog of known snow leopard individuals.
	Each individual has multiple reference images from different body parts and locations.
	"""
	)

	# Display catalog statistics
	stats = catalog_index.get("statistics", {})
	formatted_locations = [
	loc.replace("_", " ").title() for loc in stats.get("locations", [])
	]
	gr.Markdown(
	f"""
	### Catalog Statistics
	- Total Individuals: {stats.get("total_individuals", "N/A")}
	- Total Images: {stats.get("total_reference_images", "N/A")}
	- Locations: {", ".join(formatted_locations)}
	- Body Parts: {", ".join(stats.get("body_parts", []))}
	"""
	)

	gr.Markdown("---")
	gr.Markdown("### Individual Leopards by Location")

	# Group individuals by location
	individuals_by_location = {}
	for individual_data in individuals_data:
	location = individual_data.get("location", "unknown")
	if location not in individuals_by_location:
	individuals_by_location[location] = []
	individuals_by_location[location].append(individual_data)

	# Create tabs for each location
	with gr.Tabs():
	for location in sorted(individuals_by_location.keys()):
	with gr.Tab(f"{location.replace('_', ' ').title()}"):
	# Create subtabs for each individual in this location
	with gr.Tabs():
	for leopard_data in individuals_by_location[location]:
	create_leopard_tab(
	leopard_metadata=leopard_data, config=config
	)

	# Cleanup on app close
	app.unload(cleanup_temp_files)

	# Load first example image on startup
	def load_first_example():
	"""Load the first example image when the app starts.

	Returns both the image AND the path so the cache can be used
	when the user clicks Identify without selecting a new example.
	"""
	if example_images:
	try:
	first_image = Image.open(example_images[0])
	first_path = str(example_images[0])
	return first_image, first_path
	except Exception as e:
	logger.error(f"Error loading first example image: {e}")
	return None, None
	return None, None

	app.load(fn=load_first_example, outputs=[image_input, selected_example_state])

	return app


	if __name__ == "__main__":
	# Extract data archives on first run (for HF Spaces deployment)
	ensure_data_extracted()

	# Ensure SAM model is downloaded
	logger.info("Checking for SAM HQ model...")
	sam_path = ensure_sam_model()

	# Validate required directories exist
	if not CATALOG_ROOT.exists():
	logger.error(f"Catalog not found: {CATALOG_ROOT}")
	logger.error("Please ensure catalog data is present in data/catalog/")
	exit(1)

	if not EXAMPLES_DIR.exists():
	logger.warning(f"Examples directory not found: {EXAMPLES_DIR}")
	EXAMPLES_DIR.mkdir(parents=True, exist_ok=True)

	# Create config
	config = AppConfig(
	model_path=None, # Not using YOLO
	catalog_root=CATALOG_ROOT,
	examples_dir=EXAMPLES_DIR,
	top_k=TOP_K_DEFAULT,
	port=7860,
	share=False,
	sam_checkpoint_path=sam_path,
	sam_model_type=SAM_MODEL_TYPE,
	gdino_model_id=GDINO_MODEL_ID,
	text_prompt=TEXT_PROMPT,
	)

	# Build and launch app
	logger.info("Building Gradio interface...")
	app = create_app(config)

	logger.info("Launching app...")
	app.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False,
	)