import gradio as gr import pandas as pd import re from sklearn.ensemble import IsolationForest from transformers import AutoTokenizer, AutoModelForSequenceClassification import torch import torch.nn.functional as F # ✅ Load Google-backed BERT model manually (no pipeline) model_name = "bert-base-uncased" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSequenceClassification.from_pretrained(model_name) def get_llm_score(text): inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True) with torch.no_grad(): outputs = model(**inputs) probs = F.softmax(outputs.logits, dim=1) return float(probs[0][0]) # Use first class probability as a proxy score def detect_anomalies(log_text): lines = log_text.strip().split("\n") df = pd.DataFrame({"log": lines}) df["length"] = df["log"].apply(len) df["digits"] = df["log"].apply(lambda x: sum(c.isdigit() for c in x)) df["specials"] = df["log"].apply(lambda x: sum(not c.isalnum() for c in x)) df["llm_score"] = df["log"].apply(get_llm_score) features = df[["length", "digits", "specials", "llm_score"]].fillna(0) model_iso = IsolationForest(contamination=0.1, random_state=42) preds = model_iso.fit_predict(features) df["anomaly"] = preds df["status"] = df["anomaly"].map({1: "Normal", -1: "Anomaly"}) return df[["log", "status"]] demo = gr.Interface( fn=detect_anomalies, inputs=gr.Textbox(lines=20, placeholder="Paste logs here..."), outputs=gr.Dataframe(label="Log Status", type="pandas"), title="🧠 Log Anomaly Detection (Google BERT)", description="Detect anomalies using Isolation Forest + Google-backed BERT. Fast, accurate, and deploy-safe." ) demo.launch()