|
|
import gradio as gr |
|
|
import pandas as pd |
|
|
import re |
|
|
from sklearn.ensemble import IsolationForest |
|
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
|
import torch |
|
|
import torch.nn.functional as F |
|
|
|
|
|
|
|
|
model_name = "bert-base-uncased" |
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
model = AutoModelForSequenceClassification.from_pretrained(model_name) |
|
|
|
|
|
def get_llm_score(text): |
|
|
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True) |
|
|
with torch.no_grad(): |
|
|
outputs = model(**inputs) |
|
|
probs = F.softmax(outputs.logits, dim=1) |
|
|
return float(probs[0][0]) |
|
|
|
|
|
def detect_anomalies(log_text): |
|
|
lines = log_text.strip().split("\n") |
|
|
df = pd.DataFrame({"log": lines}) |
|
|
df["length"] = df["log"].apply(len) |
|
|
df["digits"] = df["log"].apply(lambda x: sum(c.isdigit() for c in x)) |
|
|
df["specials"] = df["log"].apply(lambda x: sum(not c.isalnum() for c in x)) |
|
|
df["llm_score"] = df["log"].apply(get_llm_score) |
|
|
|
|
|
features = df[["length", "digits", "specials", "llm_score"]].fillna(0) |
|
|
model_iso = IsolationForest(contamination=0.1, random_state=42) |
|
|
preds = model_iso.fit_predict(features) |
|
|
df["anomaly"] = preds |
|
|
df["status"] = df["anomaly"].map({1: "Normal", -1: "Anomaly"}) |
|
|
|
|
|
return df[["log", "status"]] |
|
|
|
|
|
demo = gr.Interface( |
|
|
fn=detect_anomalies, |
|
|
inputs=gr.Textbox(lines=20, placeholder="Paste logs here..."), |
|
|
outputs=gr.Dataframe(label="Log Status", type="pandas"), |
|
|
title="π§ Log Anomaly Detection (Google BERT)", |
|
|
description="Detect anomalies using Isolation Forest + Google-backed BERT. Fast, accurate, and deploy-safe." |
|
|
) |
|
|
|
|
|
demo.launch() |
|
|
|