AI_Detector_2 / app.py
mahmoudsaber0's picture
Update app.py
dde6bd9 verified
raw
history blame
4.02 kB
import os
import re
import torch
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import AutoTokenizer, AutoModelForSequenceClassification
# =====================================================
# βœ… Safe Hugging Face Cache Configuration
# =====================================================
CACHE_DIR = "/tmp/hf_cache"
os.environ["HF_HOME"] = CACHE_DIR
os.environ["TRANSFORMERS_CACHE"] = CACHE_DIR
os.environ["HF_DATASETS_CACHE"] = CACHE_DIR
os.environ["HF_HUB_CACHE"] = CACHE_DIR
os.makedirs(CACHE_DIR, exist_ok=True)
# =====================================================
# βœ… Load Model and Tokenizer
# =====================================================
MODEL_NAME = "roberta-base-openai-detector"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)
app = FastAPI(title="AI Text Detector")
# =====================================================
# βœ… Input Schema
# =====================================================
class InputText(BaseModel):
text: str
# =====================================================
# βœ… Helper Functions
# =====================================================
def split_into_paragraphs(text: str):
"""Split text into paragraphs by double newlines or long single breaks."""
paragraphs = re.split(r'\n\s*\n', text.strip())
paragraphs = [p.strip() for p in paragraphs if len(p.strip()) > 0]
return paragraphs
def analyze_text_block(text: str):
"""Analyze a single paragraph and return AI/Human probability."""
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
with torch.no_grad():
logits = model(**inputs).logits
probs = torch.softmax(logits, dim=1)[0].tolist()
return {
"label_scores": {
model.config.id2label[0]: round(probs[0], 4),
model.config.id2label[1]: round(probs[1], 4)
},
"ai_generated_score": probs[1],
"human_written_score": probs[0],
"is_ai": probs[1] > probs[0]
}
# =====================================================
# βœ… Routes
# =====================================================
@app.get("/")
def root():
return {"message": "AI Text Detector is running. Use POST /analyze with {'text': 'your text'}"}
@app.post("/analyze")
async def analyze(data: InputText):
text = data.text.strip()
if not text:
return {"success": False, "code": 400, "message": "Empty input text"}
paragraphs = split_into_paragraphs(text)
results = []
ai_words, total_words = 0, 0
for paragraph in paragraphs:
res = analyze_text_block(paragraph)
results.append({
"paragraph": paragraph,
"ai_generated_score": res["ai_generated_score"],
"human_written_score": res["human_written_score"]
})
word_count = len(paragraph.split())
total_words += word_count
ai_words += word_count * res["ai_generated_score"]
fake_percentage = round((ai_words / total_words) * 100, 2) if total_words > 0 else 0
feedback = (
"Most of Your Text is AI/GPT Generated"
if fake_percentage > 50
else "Most of Your Text Appears Human-Written"
)
return {
"success": True,
"code": 200,
"message": "detection result passed to proxy",
"data": {
"sentences": [],
"isHuman": round(100 - fake_percentage, 2),
"additional_feedback": "",
"h": [r["paragraph"] for r in results],
"hi": [],
"textWords": total_words,
"aiWords": int(total_words * (fake_percentage / 100)),
"fakePercentage": fake_percentage,
"specialIndexes": [],
"specialSentences": [],
"originalParagraph": text,
"feedback": feedback,
"input_text": text,
"detected_language": "en"
}
}