Spaces:
Sleeping
Sleeping
| import logging, time, random | |
| from prometheus_client import start_http_server, Counter, Gauge | |
| import gradio as gr | |
| # Logging setup | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger("LLMMonitor") | |
| # Prometheus metrics | |
| requests_total = Counter("llm_requests_total", "Total LLM requests") | |
| latency_gauge = Gauge("llm_latency_seconds", "LLM response latency") | |
| error_gauge = Gauge("llm_error_count", "LLM error count") | |
| # Simulated LLM call | |
| def monitor_llm(prompt): | |
| start = time.time() | |
| requests_total.inc() | |
| latency = random.uniform(0.1, 1.5) | |
| time.sleep(latency) | |
| latency_gauge.set(latency) | |
| if random.random() < 0.2: | |
| error_gauge.inc() | |
| logger.warning("LLM error triggered") | |
| return f"⚠️ Error: Simulated failure for prompt '{prompt}'" | |
| logger.info(f"LLM processed: {prompt}") | |
| return f"✅ LLM response to '{prompt}' in {latency:.2f}s" | |
| # Gradio UI | |
| demo = gr.Interface(fn=monitor_llm, inputs="text", outputs="text", title="LLM Health Monitor") | |
| start_http_server(8000) # Prometheus metrics exposed at :8000 | |
| demo.launch() | |