Spaces:
Sleeping
Sleeping
| import os | |
| import re | |
| import torch | |
| from fastapi import FastAPI | |
| from pydantic import BaseModel | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| # ===================================================== | |
| # β Safe Hugging Face Cache Configuration | |
| # ===================================================== | |
| CACHE_DIR = "/tmp/hf_cache" | |
| os.environ["HF_HOME"] = CACHE_DIR | |
| os.environ["TRANSFORMERS_CACHE"] = CACHE_DIR | |
| os.environ["HF_DATASETS_CACHE"] = CACHE_DIR | |
| os.environ["HF_HUB_CACHE"] = CACHE_DIR | |
| os.makedirs(CACHE_DIR, exist_ok=True) | |
| # ===================================================== | |
| # β Load Model and Tokenizer | |
| # ===================================================== | |
| MODEL_NAME = "roberta-base-openai-detector" | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR) | |
| model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR) | |
| app = FastAPI(title="AI Text Detector") | |
| # ===================================================== | |
| # β Input Schema | |
| # ===================================================== | |
| class InputText(BaseModel): | |
| text: str | |
| # ===================================================== | |
| # β Helper Functions | |
| # ===================================================== | |
| def split_into_paragraphs(text: str): | |
| """Split text into paragraphs by double newlines or long single breaks.""" | |
| paragraphs = re.split(r'\n\s*\n', text.strip()) | |
| paragraphs = [p.strip() for p in paragraphs if len(p.strip()) > 0] | |
| return paragraphs | |
| def analyze_text_block(text: str): | |
| """Analyze a single paragraph and return AI/Human probability.""" | |
| inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True) | |
| with torch.no_grad(): | |
| logits = model(**inputs).logits | |
| probs = torch.softmax(logits, dim=1)[0].tolist() | |
| return { | |
| "label_scores": { | |
| model.config.id2label[0]: round(probs[0], 4), | |
| model.config.id2label[1]: round(probs[1], 4) | |
| }, | |
| "ai_generated_score": probs[1], | |
| "human_written_score": probs[0], | |
| "is_ai": probs[1] > probs[0] | |
| } | |
| # ===================================================== | |
| # β Routes | |
| # ===================================================== | |
| def root(): | |
| return {"message": "AI Text Detector is running. Use POST /analyze with {'text': 'your text'}"} | |
| async def analyze(data: InputText): | |
| text = data.text.strip() | |
| if not text: | |
| return {"success": False, "code": 400, "message": "Empty input text"} | |
| paragraphs = split_into_paragraphs(text) | |
| results = [] | |
| ai_words, total_words = 0, 0 | |
| for paragraph in paragraphs: | |
| res = analyze_text_block(paragraph) | |
| results.append({ | |
| "paragraph": paragraph, | |
| "ai_generated_score": res["ai_generated_score"], | |
| "human_written_score": res["human_written_score"] | |
| }) | |
| word_count = len(paragraph.split()) | |
| total_words += word_count | |
| ai_words += word_count * res["ai_generated_score"] | |
| fake_percentage = round((ai_words / total_words) * 100, 2) if total_words > 0 else 0 | |
| feedback = ( | |
| "Most of Your Text is AI/GPT Generated" | |
| if fake_percentage > 50 | |
| else "Most of Your Text Appears Human-Written" | |
| ) | |
| return { | |
| "success": True, | |
| "code": 200, | |
| "message": "detection result passed to proxy", | |
| "data": { | |
| "sentences": [], | |
| "isHuman": round(100 - fake_percentage, 2), | |
| "additional_feedback": "", | |
| "h": [r["paragraph"] for r in results], | |
| "hi": [], | |
| "textWords": total_words, | |
| "aiWords": int(total_words * (fake_percentage / 100)), | |
| "fakePercentage": fake_percentage, | |
| "specialIndexes": [], | |
| "specialSentences": [], | |
| "originalParagraph": text, | |
| "feedback": feedback, | |
| "input_text": text, | |
| "detected_language": "en" | |
| } | |
| } | |