mahesh1209's picture
Create app.py
f991080 verified
import logging, time, random
from prometheus_client import start_http_server, Counter, Gauge
import gradio as gr
# Logging setup
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("LLMMonitor")
# Prometheus metrics
requests_total = Counter("llm_requests_total", "Total LLM requests")
latency_gauge = Gauge("llm_latency_seconds", "LLM response latency")
error_gauge = Gauge("llm_error_count", "LLM error count")
# Simulated LLM call
def monitor_llm(prompt):
start = time.time()
requests_total.inc()
latency = random.uniform(0.1, 1.5)
time.sleep(latency)
latency_gauge.set(latency)
if random.random() < 0.2:
error_gauge.inc()
logger.warning("LLM error triggered")
return f"⚠️ Error: Simulated failure for prompt '{prompt}'"
logger.info(f"LLM processed: {prompt}")
return f"✅ LLM response to '{prompt}' in {latency:.2f}s"
# Gradio UI
demo = gr.Interface(fn=monitor_llm, inputs="text", outputs="text", title="LLM Health Monitor")
start_http_server(8000) # Prometheus metrics exposed at :8000
demo.launch()