mahesh1209's picture
Update app.py
cc74d82 verified
import gradio as gr
import pandas as pd
import re
from sklearn.ensemble import IsolationForest
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import torch.nn.functional as F
# βœ… Load Google-backed BERT model manually (no pipeline)
model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
def get_llm_score(text):
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
with torch.no_grad():
outputs = model(**inputs)
probs = F.softmax(outputs.logits, dim=1)
return float(probs[0][0]) # Use first class probability as a proxy score
def detect_anomalies(log_text):
lines = log_text.strip().split("\n")
df = pd.DataFrame({"log": lines})
df["length"] = df["log"].apply(len)
df["digits"] = df["log"].apply(lambda x: sum(c.isdigit() for c in x))
df["specials"] = df["log"].apply(lambda x: sum(not c.isalnum() for c in x))
df["llm_score"] = df["log"].apply(get_llm_score)
features = df[["length", "digits", "specials", "llm_score"]].fillna(0)
model_iso = IsolationForest(contamination=0.1, random_state=42)
preds = model_iso.fit_predict(features)
df["anomaly"] = preds
df["status"] = df["anomaly"].map({1: "Normal", -1: "Anomaly"})
return df[["log", "status"]]
demo = gr.Interface(
fn=detect_anomalies,
inputs=gr.Textbox(lines=20, placeholder="Paste logs here..."),
outputs=gr.Dataframe(label="Log Status", type="pandas"),
title="🧠 Log Anomaly Detection (Google BERT)",
description="Detect anomalies using Isolation Forest + Google-backed BERT. Fast, accurate, and deploy-safe."
)
demo.launch()