""" Optimized configuration for HF Spaces with intelligent handling of large models. This file contains recommended settings based on available hardware. """ import os from typing import Dict, Any class HFSpacesConfig: """Optimized configuration for different HF Spaces tiers""" # Timeouts (in seconds) TIMEOUT_SMALL_MODEL = 120 # Models <2B parameters TIMEOUT_MEDIUM_MODEL = 300 # Models 2-5B parameters TIMEOUT_LARGE_MODEL = 600 # Models >5B parameters TIMEOUT_PING = 5 # Health checks # Recommended memory limits (GB) per HF Spaces tier MEMORY_LIMITS = { "free": 16, # Free HF Spaces "pro": 32, # HF Spaces PRO "enterprise": 64 # HF Spaces Enterprise } # Recommended models per tier RECOMMENDED_MODELS = { "free": [ "meta-llama/Llama-3.2-1B", "oopere/pruned40-llama-3.2-1B", "oopere/Fair-Llama-3.2-1B", "google/gemma-3-1b-pt", "Qwen/Qwen3-1.7B", ], "pro": [ "meta-llama/Llama-3.2-3B", "meta-llama/Llama-3-8B", ], "enterprise": [ "meta-llama/Llama-3-70B", ] } # Model loading configuration MODEL_LOAD_CONFIG = { "small": { # <2B params "low_cpu_mem_usage": True, "torch_dtype": "auto", "device_map": "auto", "timeout": TIMEOUT_SMALL_MODEL, }, "medium": { # 2-8B params "low_cpu_mem_usage": True, "torch_dtype": "float16", # Reduces memory "device_map": "auto", "timeout": TIMEOUT_MEDIUM_MODEL, }, "large": { # >8B params "low_cpu_mem_usage": True, "torch_dtype": "float16", "device_map": "auto", "load_in_8bit": True, # int8 quantization "timeout": TIMEOUT_LARGE_MODEL, } } @classmethod def get_model_size_category(cls, model_name: str) -> str: """ Determines the model size category based on the name. Returns: "small", "medium", or "large" """ model_lower = model_name.lower() # Detect by parameters in the name if any(size in model_lower for size in ["1b", "1.7b", "1.5b"]): return "small" elif any(size in model_lower for size in ["3b", "7b", "8b"]): return "medium" elif any(size in model_lower for size in ["13b", "30b", "70b"]): return "large" # Default: small (assume the safest case) return "small" @classmethod def get_timeout_for_model(cls, model_name: str) -> int: """Gets the recommended timeout for a model.""" size = cls.get_model_size_category(model_name) return cls.MODEL_LOAD_CONFIG[size]["timeout"] @classmethod def get_load_config(cls, model_name: str) -> Dict[str, Any]: """Gets the optimized loading configuration for a model.""" size = cls.get_model_size_category(model_name) return cls.MODEL_LOAD_CONFIG[size].copy() @classmethod def is_model_recommended(cls, model_name: str, tier: str = "free") -> bool: """Verifies if a model is recommended for the current tier.""" return model_name in cls.RECOMMENDED_MODELS.get(tier, []) @classmethod def get_memory_warning(cls, model_name: str, tier: str = "free") -> str: """ Generates a warning if the model may exceed memory limits. Returns: String with warning, or empty string if no problem """ if cls.is_model_recommended(model_name, tier): return "" size = cls.get_model_size_category(model_name) if size == "medium" and tier == "free": return ( "⚠️ **Warning**: This model may be too large for free HF Spaces. " "Consider upgrading to HF Spaces PRO or using a smaller model." ) elif size == "large" and tier in ["free", "pro"]: return ( "❌ **Error**: This model is too large for your HF Spaces tier. " "Use a smaller model or upgrade to Enterprise." ) return "" # Usage example: def get_optimized_request_config(model_name: str) -> dict: """ Gets optimized configuration for HTTP requests based on the model. Usage: config = get_optimized_request_config("meta-llama/Llama-3.2-1B") response = requests.post(url, json=payload, **config) """ return { "timeout": HFSpacesConfig.get_timeout_for_model(model_name), } # Default configuration for general use DEFAULT_CONFIG = { "timeout": HFSpacesConfig.TIMEOUT_MEDIUM_MODEL, "max_retries": 2, "retry_delay": 5, # seconds between retries } if __name__ == "__main__": # Usage examples print("🔧 Optimized configuration for HF Spaces\n") test_models = [ "meta-llama/Llama-3.2-1B", "meta-llama/Llama-3.2-3B", "meta-llama/Llama-3-8B", ] for model in test_models: print(f"📦 Model: {model}") print(f" Category: {HFSpacesConfig.get_model_size_category(model)}") print(f" Timeout: {HFSpacesConfig.get_timeout_for_model(model)}s") print(f" Recommended (free): {HFSpacesConfig.is_model_recommended(model, 'free')}") warning = HFSpacesConfig.get_memory_warning(model, "free") if warning: print(f" {warning}") print()