from fastapi import FastAPI from fastapi.responses import Response from pydantic import BaseModel from prometheus_client import Counter, generate_latest, CONTENT_TYPE_LATEST from transformers import pipeline import gradio as gr import uvicorn # ✅ FastAPI app app = FastAPI() # 📊 Prometheus metrics triage_requests = Counter("triage_requests_total", "Total triage requests") triage_errors = Counter("triage_errors_total", "Total triage errors") # 🧠 Load ultra-lightweight model triage_pipeline = pipeline( "text2text-generation", model="sshleifer/tiny-t5", # ~25MB model device=-1 ) # 📥 Request model class Incident(BaseModel): description: str # 🔍 Triage API @app.post("/triage") async def triage(incident: Incident): triage_requests.inc() try: prompt = f"Classify this incident and suggest priority:\n\n{incident.description}" result = triage_pipeline(prompt, max_new_tokens=50)[0]["generated_text"] return {"triage": result.strip()} except Exception as e: triage_errors.inc() return {"error": str(e)} # 📈 Metrics endpoint @app.get("/metrics") def metrics(): return Response(generate_latest(), media_type=CONTENT_TYPE_LATEST) # 🏠 Root @app.get("/") def home(): return {"message": "Incident Triage Bot is running!"} # 🎨 Gradio UI def gradio_triage(description): try: triage_requests.inc() prompt = f"Classify this incident and suggest priority:\n\n{description}" result = triage_pipeline(prompt, max_new_tokens=50)[0]["generated_text"] return result.strip() except Exception as e: triage_errors.inc() return f"Error: {str(e)}" demo = gr.Interface( fn=gradio_triage, inputs=gr.Textbox(lines=5, placeholder="Describe the incident..."), outputs="text", title="Incident Triage Bot", description="Classify incidents and suggest priority using a tiny Transformers model" ) # ✅ Mount Gradio properly from fastapi.middleware.wsgi import WSGIMiddleware app.mount("/gradio", gr.mount_gradio_app(app, demo, path="/gradio"))