Spaces:

Yurikks
/

yoruba-tts

Sleeping

App Files Files Community

Yurikks commited on 13 days ago

Commit

4601f86

verified ·

1 Parent(s): e23420c

Upload 2 files

Browse files

Files changed (2) hide show

main.py +363 -358
tts_service.py +109 -73

main.py CHANGED Viewed

@@ -1,358 +1,363 @@
-"""
-TTS Backend for YorubaApp
-Uses facebook/mms-tts-yor model for Yoruba text-to-speech
-Security Features:
-- Firebase ID Token validation
-- Rate limiting per user (100 requests/day)
-- Request signature validation
-- API key fallback for development
-"""
-import os
-import json
-import hmac
-import hashlib
-from datetime import datetime, timezone
-from typing import Optional
-from fastapi import FastAPI, HTTPException, Header, Depends
-from fastapi.middleware.cors import CORSMiddleware
-from pydantic import BaseModel
-import base64
-import logging
-import firebase_admin
-from firebase_admin import auth as firebase_auth, credentials
-from tts_service import TTSService
-from cache import TTSCache
-# Configure logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-# =============================================================================
-# CONFIGURATION
-# =============================================================================
-# API Key for development (fallback)
-API_KEY = os.environ.get("TTS_API_KEY", "")
-# Firebase configuration (set in HF Spaces secrets)
-FIREBASE_PROJECT_ID = os.environ.get("FIREBASE_PROJECT_ID", "demo-yorubaapp")
-FIREBASE_SERVICE_ACCOUNT = os.environ.get("FIREBASE_SERVICE_ACCOUNT_JSON", "")
-# Rate limiting
-MAX_REQUESTS_PER_DAY = 100
-# Request signing secret (for additional verification)
-REQUEST_SIGNING_SECRET = os.environ.get("REQUEST_SIGNING_SECRET", "")
-# =============================================================================
-# FIREBASE INITIALIZATION
-# =============================================================================
-firebase_initialized = False
-def initialize_firebase():
-    global firebase_initialized
-    if firebase_initialized:
-        return
-    try:
-        if FIREBASE_SERVICE_ACCOUNT:
-            # Parse JSON from environment variable
-            cred_dict = json.loads(FIREBASE_SERVICE_ACCOUNT)
-            cred = credentials.Certificate(cred_dict)
-            firebase_admin.initialize_app(cred)
-            logger.info("Firebase Admin SDK initialized with service account")
-        else:
-            # Initialize with just project ID (limited functionality)
-            firebase_admin.initialize_app(options={
-                'projectId': FIREBASE_PROJECT_ID
-            })
-            logger.warning("Firebase Admin SDK initialized without service account (limited functionality)")
-        firebase_initialized = True
-    except Exception as e:
-        logger.error(f"Failed to initialize Firebase Admin SDK: {e}")
-        # Continue without Firebase - fall back to API key only
-# Initialize on startup
-initialize_firebase()
-# =============================================================================
-# RATE LIMITING (In-Memory - resets on restart)
-# =============================================================================
-# In production, this should use Redis or Firestore
-rate_limit_cache: dict[str, dict] = {}
-def check_rate_limit(user_id: str) -> tuple[bool, int]:
-    """
-    Check if user has exceeded rate limit.
-    Returns (allowed, remaining_requests)
-    """
-    today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
-    cache_key = f"{user_id}_{today}"
-    if cache_key not in rate_limit_cache:
-        rate_limit_cache[cache_key] = {"count": 0, "date": today}
-    entry = rate_limit_cache[cache_key]
-    # Reset if new day
-    if entry["date"] != today:
-        entry = {"count": 0, "date": today}
-        rate_limit_cache[cache_key] = entry
-    remaining = MAX_REQUESTS_PER_DAY - entry["count"]
-    if entry["count"] >= MAX_REQUESTS_PER_DAY:
-        return False, 0
-    # Increment count
-    entry["count"] += 1
-    return True, remaining - 1
-def cleanup_old_rate_limits():
-    """Remove entries from previous days"""
-    today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
-    keys_to_remove = [k for k, v in rate_limit_cache.items() if v.get("date") != today]
-    for key in keys_to_remove:
-        del rate_limit_cache[key]
-# =============================================================================
-# SECURITY HELPERS
-# =============================================================================
-async def verify_firebase_token(authorization: Optional[str]) -> Optional[dict]:
-    """
-    Verify Firebase ID token and return user info.
-    Returns None if verification fails.
-    """
-    if not authorization or not authorization.startswith("Bearer "):
-        return None
-    token = authorization[7:]  # Remove "Bearer " prefix
-    try:
-        decoded_token = firebase_auth.verify_id_token(token)
-        return {
-            "uid": decoded_token["uid"],
-            "email": decoded_token.get("email"),
-            "email_verified": decoded_token.get("email_verified", False)
-        }
-    except Exception as e:
-        logger.warning(f"Firebase token verification failed: {e}")
-        return None
-def verify_request_signature(
-    user_id: str,
-    text: str,
-    timestamp: str,
-    signature: str
-) -> bool:
-    """
-    Verify HMAC signature of request.
-    Signature = HMAC-SHA256(userId + timestamp + text)
-    """
-    if not REQUEST_SIGNING_SECRET:
-        return True  # Skip if not configured
-    # Check timestamp (within 5 minutes)
-    try:
-        request_time = datetime.fromisoformat(timestamp.replace("Z", "+00:00"))
-        now = datetime.now(timezone.utc)
-        diff = abs((now - request_time).total_seconds())
-        if diff > 300:  # 5 minutes
-            logger.warning(f"Request timestamp too old: {diff} seconds")
-            return False
-    except Exception as e:
-        logger.warning(f"Invalid timestamp format: {e}")
-        return False
-    # Verify signature
-    message = f"{user_id}{timestamp}{text}"
-    expected_signature = hmac.new(
-        REQUEST_SIGNING_SECRET.encode(),
-        message.encode(),
-        hashlib.sha256
-    ).hexdigest()
-    return hmac.compare_digest(signature, expected_signature)
-# =============================================================================
-# FASTAPI APP
-# =============================================================================
-app = FastAPI(
-    title="YorubaApp TTS API",
-    description="Text-to-Speech API for Yoruba language using MMS-TTS-YOR",
-    version="2.0.0"
-)
-# CORS - allow requests from Expo dev server and production
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],  # Configure for production
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-# Initialize services
-tts = TTSService()
-cache = TTSCache()
-# =============================================================================
-# MODELS
-# =============================================================================
-class TTSRequest(BaseModel):
-    text: str
-    timestamp: Optional[str] = None  # ISO format for signature verification
-    signature: Optional[str] = None  # HMAC signature
-class TTSResponse(BaseModel):
-    audio: str  # base64 encoded WAV
-    cached: bool
-    remaining_requests: Optional[int] = None
-# =============================================================================
-# ENDPOINTS
-# =============================================================================
-@app.get("/")
-async def root():
-    return {"status": "ok", "service": "YorubaApp TTS API", "version": "2.0.0"}
-@app.get("/health")
-async def health():
-    return {
-        "status": "healthy",
-        "model": "facebook/mms-tts-yor",
-        "firebase_initialized": firebase_initialized
-    }
-@app.post("/tts", response_model=TTSResponse)
-async def text_to_speech(
-    request: TTSRequest,
-    authorization: Optional[str] = Header(None),
-    x_api_key: Optional[str] = Header(None, alias="X-API-Key")
-):
-    """
-    Generate speech from text.
-    Authentication (in order of priority):
-    1. Firebase ID Token (Authorization: Bearer <token>)
-    2. API Key (X-API-Key header) - for development only
-    Rate limiting: 100 requests per user per day
-    """
-    user_info = None
-    user_id = None
-    # Try Firebase token first
-    if authorization:
-        user_info = await verify_firebase_token(authorization)
-        if user_info:
-            user_id = user_info["uid"]
-            logger.info(f"Authenticated via Firebase: {user_id[:8]}...")
-    # Fall back to API key
-    if not user_info:
-        if API_KEY and x_api_key == API_KEY:
-            user_id = "api_key_user"
-            logger.info("Authenticated via API key")
-        else:
-            raise HTTPException(status_code=401, detail="Invalid or missing authentication")
-    # Validate request
-    text = request.text.strip()
-    if not text:
-        raise HTTPException(status_code=400, detail="Text is required")
-    if len(text) > 500:
-        raise HTTPException(status_code=400, detail="Text too long (max 500 characters)")
-    # Verify request signature (optional extra security)
-    if request.timestamp and request.signature and user_id:
-        if not verify_request_signature(user_id, text, request.timestamp, request.signature):
-            raise HTTPException(status_code=401, detail="Invalid request signature")
-    # Check rate limit
-    allowed, remaining = check_rate_limit(user_id)
-    if not allowed:
-        raise HTTPException(
-            status_code=429,
-            detail="Daily rate limit exceeded. Please try again tomorrow."
-        )
-    logger.info(f"TTS request from {user_id[:8]}... for text: {text[:50]}...")
-    # Check cache first
-    cached_audio = await cache.get(text)
-    if cached_audio:
-        logger.info("Returning cached audio")
-        return TTSResponse(audio=cached_audio, cached=True, remaining_requests=remaining)
-    try:
-        # Generate audio
-        audio_bytes = await tts.synthesize(text)
-        audio_b64 = base64.b64encode(audio_bytes).decode('utf-8')
-        # Cache result
-        await cache.set(text, audio_b64)
-        logger.info(f"Generated audio: {len(audio_bytes)} bytes")
-        return TTSResponse(audio=audio_b64, cached=False, remaining_requests=remaining)
-    except Exception as e:
-        logger.error(f"TTS synthesis failed: {e}")
-        raise HTTPException(status_code=500, detail=f"TTS synthesis failed: {str(e)}")
-@app.get("/rate-limit/{user_id}")
-async def get_rate_limit_status(
-    user_id: str,
-    authorization: Optional[str] = Header(None)
-):
-    """
-    Get current rate limit status for a user.
-    Only accessible with valid Firebase token for the same user.
-    """
-    user_info = await verify_firebase_token(authorization)
-    if not user_info or user_info["uid"] != user_id:
-        raise HTTPException(status_code=401, detail="Unauthorized")
-    today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
-    cache_key = f"{user_id}_{today}"
-    if cache_key in rate_limit_cache:
-        count = rate_limit_cache[cache_key]["count"]
-    else:
-        count = 0
-    return {
-        "user_id": user_id,
-        "date": today,
-        "used": count,
-        "limit": MAX_REQUESTS_PER_DAY,
-        "remaining": max(0, MAX_REQUESTS_PER_DAY - count)
-    }
-# =============================================================================
-# STARTUP
-# =============================================================================
-@app.on_event("startup")
-async def startup_event():
-    """Cleanup old rate limit entries on startup"""
-    cleanup_old_rate_limits()
-    logger.info("TTS API started")
-if __name__ == "__main__":
-    import uvicorn
-    # Port 7860 is the default for Hugging Face Spaces
-    uvicorn.run(app, host="0.0.0.0", port=7860)

+"""
+TTS Backend for YorubaApp
+Uses facebook/mms-tts-yor model for Yoruba text-to-speech
+Security Features:
+- Firebase ID Token validation
+- Rate limiting per user (100 requests/day)
+- Request signature validation
+- API key fallback for development
+"""
+import os
+import json
+import hmac
+import hashlib
+from datetime import datetime, timezone
+from typing import Optional
+from fastapi import FastAPI, HTTPException, Header, Depends
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+import base64
+import logging
+import firebase_admin
+from firebase_admin import auth as firebase_auth, credentials
+from tts_service import TTSService
+from cache import TTSCache
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# =============================================================================
+# CONFIGURATION
+# =============================================================================
+# API Key for development (fallback)
+API_KEY = os.environ.get("TTS_API_KEY", "")
+# Firebase configuration (set in HF Spaces secrets)
+FIREBASE_PROJECT_ID = os.environ.get("FIREBASE_PROJECT_ID", "demo-yorubaapp")
+FIREBASE_SERVICE_ACCOUNT = os.environ.get("FIREBASE_SERVICE_ACCOUNT_JSON", "")
+# Rate limiting
+MAX_REQUESTS_PER_DAY = 100
+# Request signing secret (for additional verification)
+REQUEST_SIGNING_SECRET = os.environ.get("REQUEST_SIGNING_SECRET", "")
+# =============================================================================
+# FIREBASE INITIALIZATION
+# =============================================================================
+firebase_initialized = False
+def initialize_firebase():
+    global firebase_initialized
+    if firebase_initialized:
+        return
+    try:
+        if FIREBASE_SERVICE_ACCOUNT:
+            # Parse JSON from environment variable
+            cred_dict = json.loads(FIREBASE_SERVICE_ACCOUNT)
+            cred = credentials.Certificate(cred_dict)
+            firebase_admin.initialize_app(cred)
+            logger.info("Firebase Admin SDK initialized with service account")
+        else:
+            # Initialize with just project ID (limited functionality)
+            firebase_admin.initialize_app(options={
+                'projectId': FIREBASE_PROJECT_ID
+            })
+            logger.warning("Firebase Admin SDK initialized without service account (limited functionality)")
+        firebase_initialized = True
+    except Exception as e:
+        logger.error(f"Failed to initialize Firebase Admin SDK: {e}")
+        # Continue without Firebase - fall back to API key only
+# Initialize on startup
+initialize_firebase()
+# =============================================================================
+# RATE LIMITING (In-Memory - resets on restart)
+# =============================================================================
+# In production, this should use Redis or Firestore
+rate_limit_cache: dict[str, dict] = {}
+def check_rate_limit(user_id: str) -> tuple[bool, int]:
+    """
+    Check if user has exceeded rate limit.
+    Returns (allowed, remaining_requests)
+    """
+    today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
+    cache_key = f"{user_id}_{today}"
+    if cache_key not in rate_limit_cache:
+        rate_limit_cache[cache_key] = {"count": 0, "date": today}
+    entry = rate_limit_cache[cache_key]
+    # Reset if new day
+    if entry["date"] != today:
+        entry = {"count": 0, "date": today}
+        rate_limit_cache[cache_key] = entry
+    remaining = MAX_REQUESTS_PER_DAY - entry["count"]
+    if entry["count"] >= MAX_REQUESTS_PER_DAY:
+        return False, 0
+    # Increment count
+    entry["count"] += 1
+    return True, remaining - 1
+def cleanup_old_rate_limits():
+    """Remove entries from previous days"""
+    today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
+    keys_to_remove = [k for k, v in rate_limit_cache.items() if v.get("date") != today]
+    for key in keys_to_remove:
+        del rate_limit_cache[key]
+# =============================================================================
+# SECURITY HELPERS
+# =============================================================================
+async def verify_firebase_token(authorization: Optional[str]) -> Optional[dict]:
+    """
+    Verify Firebase ID token and return user info.
+    Returns None if verification fails.
+    """
+    if not authorization or not authorization.startswith("Bearer "):
+        return None
+    token = authorization[7:]  # Remove "Bearer " prefix
+    try:
+        decoded_token = firebase_auth.verify_id_token(token)
+        return {
+            "uid": decoded_token["uid"],
+            "email": decoded_token.get("email"),
+            "email_verified": decoded_token.get("email_verified", False)
+        }
+    except Exception as e:
+        logger.warning(f"Firebase token verification failed: {e}")
+        return None
+def verify_request_signature(
+    user_id: str,
+    text: str,
+    timestamp: str,
+    signature: str
+) -> bool:
+    """
+    Verify HMAC signature of request.
+    Signature = HMAC-SHA256(userId + timestamp + text)
+    """
+    if not REQUEST_SIGNING_SECRET:
+        return True  # Skip if not configured
+    # Check timestamp (within 5 minutes)
+    try:
+        request_time = datetime.fromisoformat(timestamp.replace("Z", "+00:00"))
+        now = datetime.now(timezone.utc)
+        diff = abs((now - request_time).total_seconds())
+        if diff > 300:  # 5 minutes
+            logger.warning(f"Request timestamp too old: {diff} seconds")
+            return False
+    except Exception as e:
+        logger.warning(f"Invalid timestamp format: {e}")
+        return False
+    # Verify signature
+    message = f"{user_id}{timestamp}{text}"
+    expected_signature = hmac.new(
+        REQUEST_SIGNING_SECRET.encode(),
+        message.encode(),
+        hashlib.sha256
+    ).hexdigest()
+    return hmac.compare_digest(signature, expected_signature)
+# =============================================================================
+# FASTAPI APP
+# =============================================================================
+app = FastAPI(
+    title="YorubaApp TTS API",
+    description="Text-to-Speech API for Yoruba language using MMS-TTS-YOR",
+    version="2.0.0"
+)
+# CORS - allow requests from Expo dev server and production
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # Configure for production
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Initialize services
+tts = TTSService()
+cache = TTSCache()
+# =============================================================================
+# MODELS
+# =============================================================================
+class TTSRequest(BaseModel):
+    text: str
+    speed: Optional[float] = 1.0  # Speed: 0.5-1.5 (1.0 = normal, 0.7 = devagar)
+    timestamp: Optional[str] = None  # ISO format for signature verification
+    signature: Optional[str] = None  # HMAC signature
+class TTSResponse(BaseModel):
+    audio: str  # base64 encoded WAV
+    cached: bool
+    remaining_requests: Optional[int] = None
+# =============================================================================
+# ENDPOINTS
+# =============================================================================
+@app.get("/")
+async def root():
+    return {"status": "ok", "service": "YorubaApp TTS API", "version": "2.0.0"}
+@app.get("/health")
+async def health():
+    return {
+        "status": "healthy",
+        "model": "facebook/mms-tts-yor",
+        "firebase_initialized": firebase_initialized
+    }
+@app.post("/tts", response_model=TTSResponse)
+async def text_to_speech(
+    request: TTSRequest,
+    authorization: Optional[str] = Header(None),
+    x_api_key: Optional[str] = Header(None, alias="X-API-Key")
+):
+    """
+    Generate speech from text.
+    Authentication (in order of priority):
+    1. Firebase ID Token (Authorization: Bearer <token>)
+    2. API Key (X-API-Key header) - for development only
+    Rate limiting: 100 requests per user per day
+    """
+    user_info = None
+    user_id = None
+    # Try Firebase token first
+    if authorization:
+        user_info = await verify_firebase_token(authorization)
+        if user_info:
+            user_id = user_info["uid"]
+            logger.info(f"Authenticated via Firebase: {user_id[:8]}...")
+    # Fall back to API key
+    if not user_info:
+        if API_KEY and x_api_key == API_KEY:
+            user_id = "api_key_user"
+            logger.info("Authenticated via API key")
+        else:
+            raise HTTPException(status_code=401, detail="Invalid or missing authentication")
+    # Validate request
+    text = request.text.strip()
+    if not text:
+        raise HTTPException(status_code=400, detail="Text is required")
+    if len(text) > 500:
+        raise HTTPException(status_code=400, detail="Text too long (max 500 characters)")
+    # Verify request signature (optional extra security)
+    if request.timestamp and request.signature and user_id:
+        if not verify_request_signature(user_id, text, request.timestamp, request.signature):
+            raise HTTPException(status_code=401, detail="Invalid request signature")
+    # Check rate limit
+    allowed, remaining = check_rate_limit(user_id)
+    if not allowed:
+        raise HTTPException(
+            status_code=429,
+            detail="Daily rate limit exceeded. Please try again tomorrow."
+        )
+    logger.info(f"TTS request from {user_id[:8]}... for text: {text[:50]}... speed: {request.speed}")
+    # Normalize speed (clamp to safe range)
+    speed = max(0.5, min(1.5, request.speed or 1.0))
+    # Check cache first (include speed in cache key)
+    cache_key = f"{text}|speed={speed}" if speed != 1.0 else text
+    cached_audio = await cache.get(cache_key)
+    if cached_audio:
+        logger.info("Returning cached audio")
+        return TTSResponse(audio=cached_audio, cached=True, remaining_requests=remaining)
+    try:
+        # Generate audio with speed
+        audio_bytes = await tts.synthesize(text, speed=speed)
+        audio_b64 = base64.b64encode(audio_bytes).decode('utf-8')
+        # Cache result
+        await cache.set(cache_key, audio_b64)
+        logger.info(f"Generated audio: {len(audio_bytes)} bytes")
+        return TTSResponse(audio=audio_b64, cached=False, remaining_requests=remaining)
+    except Exception as e:
+        logger.error(f"TTS synthesis failed: {e}")
+        raise HTTPException(status_code=500, detail=f"TTS synthesis failed: {str(e)}")
+@app.get("/rate-limit/{user_id}")
+async def get_rate_limit_status(
+    user_id: str,
+    authorization: Optional[str] = Header(None)
+):
+    """
+    Get current rate limit status for a user.
+    Only accessible with valid Firebase token for the same user.
+    """
+    user_info = await verify_firebase_token(authorization)
+    if not user_info or user_info["uid"] != user_id:
+        raise HTTPException(status_code=401, detail="Unauthorized")
+    today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
+    cache_key = f"{user_id}_{today}"
+    if cache_key in rate_limit_cache:
+        count = rate_limit_cache[cache_key]["count"]
+    else:
+        count = 0
+    return {
+        "user_id": user_id,
+        "date": today,
+        "used": count,
+        "limit": MAX_REQUESTS_PER_DAY,
+        "remaining": max(0, MAX_REQUESTS_PER_DAY - count)
+    }
+# =============================================================================
+# STARTUP
+# =============================================================================
+@app.on_event("startup")
+async def startup_event():
+    """Cleanup old rate limit entries on startup"""
+    cleanup_old_rate_limits()
+    logger.info("TTS API started")
+if __name__ == "__main__":
+    import uvicorn
+    # Port 7860 is the default for Hugging Face Spaces
+    uvicorn.run(app, host="0.0.0.0", port=7860)

tts_service.py CHANGED Viewed

@@ -1,73 +1,109 @@
-"""
-TTS Service using facebook/mms-tts-yor (Yoruba)
-"""
-import io
-import logging
-import asyncio
-from functools import lru_cache
-import torch
-import numpy as np
-import scipy.io.wavfile as wavfile
-from transformers import VitsModel, AutoTokenizer
-logger = logging.getLogger(__name__)
-class TTSService:
-    def __init__(self):
-        logger.info("Loading MMS-TTS-YOR model...")
-        # Load model and tokenizer
-        self.model = VitsModel.from_pretrained("facebook/mms-tts-yor")
-        self.tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-yor")
-        # Set to evaluation mode
-        self.model.eval()
-        # Use GPU if available
-        self.device = "cuda" if torch.cuda.is_available() else "cpu"
-        self.model = self.model.to(self.device)
-        logger.info(f"Model loaded on {self.device}")
-        logger.info(f"Sampling rate: {self.model.config.sampling_rate}")
-    async def synthesize(self, text: str) -> bytes:
-        """
-        Synthesize speech from Yoruba text.
-        Returns WAV audio bytes.
-        """
-        # Run synthesis in thread pool to avoid blocking
-        loop = asyncio.get_event_loop()
-        return await loop.run_in_executor(None, self._synthesize_sync, text)
-    def _synthesize_sync(self, text: str) -> bytes:
-        """Synchronous synthesis (runs in thread pool)"""
-        # Tokenize input
-        inputs = self.tokenizer(text, return_tensors="pt")
-        inputs = {k: v.to(self.device) for k, v in inputs.items()}
-        # Generate audio
-        with torch.no_grad():
-            output = self.model(**inputs).waveform
-        # Convert to numpy
-        waveform = output.squeeze().cpu().numpy()
-        # Normalize to 16-bit PCM
-        waveform = np.clip(waveform, -1.0, 1.0)
-        waveform_int16 = (waveform * 32767).astype(np.int16)
-        # Write to WAV buffer
-        buffer = io.BytesIO()
-        wavfile.write(buffer, rate=self.model.config.sampling_rate, data=waveform_int16)
-        return buffer.getvalue()
-# Singleton instance
-@lru_cache(maxsize=1)
-def get_tts_service() -> TTSService:
-    return TTSService()

+"""
+TTS Service using facebook/mms-tts-yor (Yoruba)
+Supports variable speed playback (normal and slow)
+"""
+import io
+import logging
+import asyncio
+from functools import lru_cache
+import torch
+import numpy as np
+import scipy.io.wavfile as wavfile
+from transformers import VitsModel, AutoTokenizer
+logger = logging.getLogger(__name__)
+class TTSService:
+    def __init__(self):
+        logger.info("Loading MMS-TTS-YOR model...")
+        # Load model and tokenizer
+        self.model = VitsModel.from_pretrained("facebook/mms-tts-yor")
+        self.tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-yor")
+        # Set to evaluation mode
+        self.model.eval()
+        # Use GPU if available
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.model = self.model.to(self.device)
+        self.sample_rate = self.model.config.sampling_rate
+        logger.info(f"Model loaded on {self.device}")
+        logger.info(f"Sampling rate: {self.sample_rate}")
+    async def synthesize(self, text: str, speed: float = 1.0) -> bytes:
+        """
+        Synthesize speech from Yoruba text.
+        Args:
+            text: Text to synthesize
+            speed: Playback speed (0.5 = half speed, 1.0 = normal, 1.5 = faster)
+        Returns WAV audio bytes.
+        """
+        # Run synthesis in thread pool to avoid blocking
+        loop = asyncio.get_event_loop()
+        return await loop.run_in_executor(None, self._synthesize_sync, text, speed)
+    def _synthesize_sync(self, text: str, speed: float = 1.0) -> bytes:
+        """Synchronous synthesis (runs in thread pool)"""
+        # Tokenize input
+        inputs = self.tokenizer(text, return_tensors="pt")
+        inputs = {k: v.to(self.device) for k, v in inputs.items()}
+        # Generate audio
+        with torch.no_grad():
+            output = self.model(**inputs).waveform
+        # Convert to numpy
+        waveform = output.squeeze().cpu().numpy()
+        # Apply time-stretching for speed change (using simple resampling)
+        if speed != 1.0 and speed > 0:
+            waveform = self._change_speed(waveform, speed)
+        # Normalize to 16-bit PCM
+        waveform = np.clip(waveform, -1.0, 1.0)
+        waveform_int16 = (waveform * 32767).astype(np.int16)
+        # Write to WAV buffer
+        buffer = io.BytesIO()
+        wavfile.write(buffer, rate=self.sample_rate, data=waveform_int16)
+        return buffer.getvalue()
+    def _change_speed(self, waveform: np.ndarray, speed: float) -> np.ndarray:
+        """
+        Change playback speed using resampling.
+        Speed > 1 = faster (shorter audio)
+        Speed < 1 = slower (longer audio)
+        This uses simple linear interpolation for speed change without pitch shift.
+        """
+        if speed == 1.0:
+            return waveform
+        # Calculate new length
+        original_length = len(waveform)
+        new_length = int(original_length / speed)
+        # Create new time indices
+        old_indices = np.arange(original_length)
+        new_indices = np.linspace(0, original_length - 1, new_length)
+        # Interpolate
+        stretched = np.interp(new_indices, old_indices, waveform)
+        return stretched.astype(np.float32)
+# Singleton instance
+@lru_cache(maxsize=1)
+def get_tts_service() -> TTSService:
+    return TTSService()