mahesh1209 commited on
Commit
cc74d82
·
verified ·
1 Parent(s): f9be16a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -10
app.py CHANGED
@@ -2,10 +2,21 @@ import gradio as gr
2
  import pandas as pd
3
  import re
4
  from sklearn.ensemble import IsolationForest
5
- from transformers import pipeline
 
 
6
 
7
- # ✅ Google-backed open-source LLM (fast + CPU-safe)
8
- llm = pipeline("text-classification", model="bert-base-uncased", top_k=1)
 
 
 
 
 
 
 
 
 
9
 
10
  def detect_anomalies(log_text):
11
  lines = log_text.strip().split("\n")
@@ -13,14 +24,11 @@ def detect_anomalies(log_text):
13
  df["length"] = df["log"].apply(len)
14
  df["digits"] = df["log"].apply(lambda x: sum(c.isdigit() for c in x))
15
  df["specials"] = df["log"].apply(lambda x: sum(not c.isalnum() for c in x))
 
16
 
17
- # Semantic score using LLM
18
- df["llm_score"] = df["log"].apply(lambda x: llm(x)[0]["score"])
19
-
20
- # Isolation Forest anomaly detection
21
  features = df[["length", "digits", "specials", "llm_score"]].fillna(0)
22
- model = IsolationForest(contamination=0.1, random_state=42)
23
- preds = model.fit_predict(features)
24
  df["anomaly"] = preds
25
  df["status"] = df["anomaly"].map({1: "Normal", -1: "Anomaly"})
26
 
@@ -30,7 +38,7 @@ demo = gr.Interface(
30
  fn=detect_anomalies,
31
  inputs=gr.Textbox(lines=20, placeholder="Paste logs here..."),
32
  outputs=gr.Dataframe(label="Log Status", type="pandas"),
33
- title="🧠 Log Anomaly Detection (Google LLM)",
34
  description="Detect anomalies using Isolation Forest + Google-backed BERT. Fast, accurate, and deploy-safe."
35
  )
36
 
 
2
  import pandas as pd
3
  import re
4
  from sklearn.ensemble import IsolationForest
5
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
6
+ import torch
7
+ import torch.nn.functional as F
8
 
9
+ # ✅ Load Google-backed BERT model manually (no pipeline)
10
+ model_name = "bert-base-uncased"
11
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
12
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
13
+
14
+ def get_llm_score(text):
15
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
16
+ with torch.no_grad():
17
+ outputs = model(**inputs)
18
+ probs = F.softmax(outputs.logits, dim=1)
19
+ return float(probs[0][0]) # Use first class probability as a proxy score
20
 
21
  def detect_anomalies(log_text):
22
  lines = log_text.strip().split("\n")
 
24
  df["length"] = df["log"].apply(len)
25
  df["digits"] = df["log"].apply(lambda x: sum(c.isdigit() for c in x))
26
  df["specials"] = df["log"].apply(lambda x: sum(not c.isalnum() for c in x))
27
+ df["llm_score"] = df["log"].apply(get_llm_score)
28
 
 
 
 
 
29
  features = df[["length", "digits", "specials", "llm_score"]].fillna(0)
30
+ model_iso = IsolationForest(contamination=0.1, random_state=42)
31
+ preds = model_iso.fit_predict(features)
32
  df["anomaly"] = preds
33
  df["status"] = df["anomaly"].map({1: "Normal", -1: "Anomaly"})
34
 
 
38
  fn=detect_anomalies,
39
  inputs=gr.Textbox(lines=20, placeholder="Paste logs here..."),
40
  outputs=gr.Dataframe(label="Log Status", type="pandas"),
41
+ title="🧠 Log Anomaly Detection (Google BERT)",
42
  description="Detect anomalies using Isolation Forest + Google-backed BERT. Fast, accurate, and deploy-safe."
43
  )
44