Spaces:

mahesh1209
/

Log-Anomaly-Detection-aiops2

Sleeping

App Files Files Community

Log-Anomaly-Detection-aiops2 / app.py

mahesh1209

Update app.py

cc74d82 verified 2 months ago

raw

history blame contribute delete

1.76 kB

	import gradio as gr
	import pandas as pd
	import re
	from sklearn.ensemble import IsolationForest
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	import torch
	import torch.nn.functional as F

	# ✅ Load Google-backed BERT model manually (no pipeline)
	model_name = "bert-base-uncased"
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForSequenceClassification.from_pretrained(model_name)

	def get_llm_score(text):
	inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
	with torch.no_grad():
	outputs = model(**inputs)
	probs = F.softmax(outputs.logits, dim=1)
	return float(probs[0][0]) # Use first class probability as a proxy score

	def detect_anomalies(log_text):
	lines = log_text.strip().split("\n")
	df = pd.DataFrame({"log": lines})
	df["length"] = df["log"].apply(len)
	df["digits"] = df["log"].apply(lambda x: sum(c.isdigit() for c in x))
	df["specials"] = df["log"].apply(lambda x: sum(not c.isalnum() for c in x))
	df["llm_score"] = df["log"].apply(get_llm_score)

	features = df[["length", "digits", "specials", "llm_score"]].fillna(0)
	model_iso = IsolationForest(contamination=0.1, random_state=42)
	preds = model_iso.fit_predict(features)
	df["anomaly"] = preds
	df["status"] = df["anomaly"].map({1: "Normal", -1: "Anomaly"})

	return df[["log", "status"]]

	demo = gr.Interface(
	fn=detect_anomalies,
	inputs=gr.Textbox(lines=20, placeholder="Paste logs here..."),
	outputs=gr.Dataframe(label="Log Status", type="pandas"),
	title="🧠 Log Anomaly Detection (Google BERT)",
	description="Detect anomalies using Isolation Forest + Google-backed BERT. Fast, accurate, and deploy-safe."
	)

	demo.launch()