AI_Detector_2

Sleeping

App Files Files Community

AI_Detector_2 / app.py

mahmoudsaber0

Update app.py

dde6bd9 verified 2 months ago

raw

history blame

4.02 kB

	import os
	import re
	import torch
	from fastapi import FastAPI
	from pydantic import BaseModel
	from transformers import AutoTokenizer, AutoModelForSequenceClassification

	# =====================================================
	# ✅ Safe Hugging Face Cache Configuration
	# =====================================================
	CACHE_DIR = "/tmp/hf_cache"
	os.environ["HF_HOME"] = CACHE_DIR
	os.environ["TRANSFORMERS_CACHE"] = CACHE_DIR
	os.environ["HF_DATASETS_CACHE"] = CACHE_DIR
	os.environ["HF_HUB_CACHE"] = CACHE_DIR
	os.makedirs(CACHE_DIR, exist_ok=True)

	# =====================================================
	# ✅ Load Model and Tokenizer
	# =====================================================
	MODEL_NAME = "roberta-base-openai-detector"

	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)
	model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)

	app = FastAPI(title="AI Text Detector")

	# =====================================================
	# ✅ Input Schema
	# =====================================================
	class InputText(BaseModel):
	text: str

	# =====================================================
	# ✅ Helper Functions
	# =====================================================
	def split_into_paragraphs(text: str):
	"""Split text into paragraphs by double newlines or long single breaks."""
	paragraphs = re.split(r'\n\s*\n', text.strip())
	paragraphs = [p.strip() for p in paragraphs if len(p.strip()) > 0]
	return paragraphs

	def analyze_text_block(text: str):
	"""Analyze a single paragraph and return AI/Human probability."""
	inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
	with torch.no_grad():
	logits = model(**inputs).logits
	probs = torch.softmax(logits, dim=1)[0].tolist()

	return {
	"label_scores": {
	model.config.id2label[0]: round(probs[0], 4),
	model.config.id2label[1]: round(probs[1], 4)
	},
	"ai_generated_score": probs[1],
	"human_written_score": probs[0],
	"is_ai": probs[1] > probs[0]
	}

	# =====================================================
	# ✅ Routes
	# =====================================================
	@app.get("/")
	def root():
	return {"message": "AI Text Detector is running. Use POST /analyze with {'text': 'your text'}"}

	@app.post("/analyze")
	async def analyze(data: InputText):
	text = data.text.strip()
	if not text:
	return {"success": False, "code": 400, "message": "Empty input text"}

	paragraphs = split_into_paragraphs(text)
	results = []

	ai_words, total_words = 0, 0

	for paragraph in paragraphs:
	res = analyze_text_block(paragraph)
	results.append({
	"paragraph": paragraph,
	"ai_generated_score": res["ai_generated_score"],
	"human_written_score": res["human_written_score"]
	})

	word_count = len(paragraph.split())
	total_words += word_count
	ai_words += word_count * res["ai_generated_score"]

	fake_percentage = round((ai_words / total_words) * 100, 2) if total_words > 0 else 0
	feedback = (
	"Most of Your Text is AI/GPT Generated"
	if fake_percentage > 50
	else "Most of Your Text Appears Human-Written"
	)

	return {
	"success": True,
	"code": 200,
	"message": "detection result passed to proxy",
	"data": {
	"sentences": [],
	"isHuman": round(100 - fake_percentage, 2),
	"additional_feedback": "",
	"h": [r["paragraph"] for r in results],
	"hi": [],
	"textWords": total_words,
	"aiWords": int(total_words * (fake_percentage / 100)),
	"fakePercentage": fake_percentage,
	"specialIndexes": [],
	"specialSentences": [],
	"originalParagraph": text,
	"feedback": feedback,
	"input_text": text,
	"detected_language": "en"
	}
	}