Spaces:

mahesh1209
/

Log-Anomaly-Detection-aiops2

Sleeping

App Files Files Community

mahesh1209 commited on Oct 16

Commit

cc74d82

verified ·

1 Parent(s): f9be16a

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -10

app.py CHANGED Viewed

@@ -2,10 +2,21 @@ import gradio as gr
 import pandas as pd
 import re
 from sklearn.ensemble import IsolationForest
-from transformers import pipeline
-# ✅ Google-backed open-source LLM (fast + CPU-safe)
-llm = pipeline("text-classification", model="bert-base-uncased", top_k=1)
 def detect_anomalies(log_text):
     lines = log_text.strip().split("\n")
@@ -13,14 +24,11 @@ def detect_anomalies(log_text):
     df["length"] = df["log"].apply(len)
     df["digits"] = df["log"].apply(lambda x: sum(c.isdigit() for c in x))
     df["specials"] = df["log"].apply(lambda x: sum(not c.isalnum() for c in x))
-    # Semantic score using LLM
-    df["llm_score"] = df["log"].apply(lambda x: llm(x)[0]["score"])
-    # Isolation Forest anomaly detection
     features = df[["length", "digits", "specials", "llm_score"]].fillna(0)
-    model = IsolationForest(contamination=0.1, random_state=42)
-    preds = model.fit_predict(features)
     df["anomaly"] = preds
     df["status"] = df["anomaly"].map({1: "Normal", -1: "Anomaly"})
@@ -30,7 +38,7 @@ demo = gr.Interface(
     fn=detect_anomalies,
     inputs=gr.Textbox(lines=20, placeholder="Paste logs here..."),
     outputs=gr.Dataframe(label="Log Status", type="pandas"),
-    title="🧠 Log Anomaly Detection (Google LLM)",
     description="Detect anomalies using Isolation Forest + Google-backed BERT. Fast, accurate, and deploy-safe."
 )

 import pandas as pd
 import re
 from sklearn.ensemble import IsolationForest
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
+import torch.nn.functional as F
+# ✅ Load Google-backed BERT model manually (no pipeline)
+model_name = "bert-base-uncased"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForSequenceClassification.from_pretrained(model_name)
+def get_llm_score(text):
+    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
+    with torch.no_grad():
+        outputs = model(**inputs)
+        probs = F.softmax(outputs.logits, dim=1)
+    return float(probs[0][0])  # Use first class probability as a proxy score
 def detect_anomalies(log_text):
     lines = log_text.strip().split("\n")
     df["length"] = df["log"].apply(len)
     df["digits"] = df["log"].apply(lambda x: sum(c.isdigit() for c in x))
     df["specials"] = df["log"].apply(lambda x: sum(not c.isalnum() for c in x))
+    df["llm_score"] = df["log"].apply(get_llm_score)
     features = df[["length", "digits", "specials", "llm_score"]].fillna(0)
+    model_iso = IsolationForest(contamination=0.1, random_state=42)
+    preds = model_iso.fit_predict(features)
     df["anomaly"] = preds
     df["status"] = df["anomaly"].map({1: "Normal", -1: "Anomaly"})
     fn=detect_anomalies,
     inputs=gr.Textbox(lines=20, placeholder="Paste logs here..."),
     outputs=gr.Dataframe(label="Log Status", type="pandas"),
+    title="🧠 Log Anomaly Detection (Google BERT)",
     description="Detect anomalies using Isolation Forest + Google-backed BERT. Fast, accurate, and deploy-safe."
 )