import logging, time, random from prometheus_client import start_http_server, Counter, Gauge import gradio as gr # Logging setup logging.basicConfig(level=logging.INFO) logger = logging.getLogger("LLMMonitor") # Prometheus metrics requests_total = Counter("llm_requests_total", "Total LLM requests") latency_gauge = Gauge("llm_latency_seconds", "LLM response latency") error_gauge = Gauge("llm_error_count", "LLM error count") # Simulated LLM call def monitor_llm(prompt): start = time.time() requests_total.inc() latency = random.uniform(0.1, 1.5) time.sleep(latency) latency_gauge.set(latency) if random.random() < 0.2: error_gauge.inc() logger.warning("LLM error triggered") return f"⚠️ Error: Simulated failure for prompt '{prompt}'" logger.info(f"LLM processed: {prompt}") return f"✅ LLM response to '{prompt}' in {latency:.2f}s" # Gradio UI demo = gr.Interface(fn=monitor_llm, inputs="text", outputs="text", title="LLM Health Monitor") start_http_server(8000) # Prometheus metrics exposed at :8000 demo.launch()