Spaces:

sharath88
/

mistral-context-qa-demo

Running

sharath88 commited on about 1 month ago

Commit

67bcec0

1 Parent(s): 6375afa

Switch from OpenAI/Mistral to HuggingFace QA model

Replaces the previous OpenAI/Mistral-based chat endpoint with an extractive question-answering pipeline using HuggingFace Transformers and the 'deepset/roberta-base-squad2' model. Updates requirements to use 'transformers' and 'torch', removing 'openai'. The API now returns both the answer and a confidence score.

Files changed (2) hide show

main.py +31 -56
requirements.txt +2 -1

main.py CHANGED Viewed

@@ -1,32 +1,11 @@
-import os
 from fastapi import FastAPI, Request
 from fastapi.responses import HTMLResponse, JSONResponse
 from fastapi.staticfiles import StaticFiles
 from fastapi.templating import Jinja2Templates
 from pydantic import BaseModel
-from openai import OpenAI
-# ---------- Hugging Face router / Mistral config ----------
-HF_TOKEN = os.getenv("HF_TOKEN")
-if HF_TOKEN is None:
-    raise RuntimeError(
-        "HF_TOKEN environment variable is not set. "
-        "Go to your Space → Settings → Variables and add HF_TOKEN=<your hf_... token>."
-    )
-# Use HF router with OpenAI-compatible client
-client = OpenAI(
-    base_url="https://router.huggingface.co/v1",
-    api_key=HF_TOKEN,
-)
-MODEL_ID = "mistralai/Mistral-Nemo-Instruct-2407"
-# ---------- FastAPI app setup ----------
 app = FastAPI()
@@ -44,19 +23,30 @@ class ChatRequest(BaseModel):
 @app.get("/", response_class=HTMLResponse)
 async def read_root(request: Request):
-    """Serve the main HTML page."""
     return templates.TemplateResponse("index.html", {"request": request})
 @app.post("/chat")
 async def chat_endpoint(payload: ChatRequest):
     """
     Accepts:
-      - context: long paragraph / document text
-      - question: user question about that context
     Returns:
-      - { "answer": "<model reply>" }
     """
     context = payload.context.strip()
     question = payload.question.strip()
@@ -67,40 +57,25 @@ async def chat_endpoint(payload: ChatRequest):
             status_code=400,
         )
-    # Build chat-style messages for Mistral via HF router
-    messages = [
-        {
-            "role": "system",
-            "content": (
-                "You are a helpful assistant that answers questions ONLY using the "
-                "given context. If the answer is not in the context, say you don't "
-                "know and do NOT make up information."
-            ),
-        },
-        {
-            "role": "user",
-            "content": (
-                f"Context:\n{context}\n\n"
-                f"Question:\n{question}\n\n"
-                "Answer concisely based only on the context."
-            ),
-        },
-    ]
     try:
-        completion = client.chat.completions.create(
-            model=MODEL_ID,
-            messages=messages,
-            max_tokens=256,
-            temperature=0.2,
         )
-        answer = completion.choices[0].message.content.strip()
-        return {"answer": answer}
     except Exception as e:
-        # Return the error message to the frontend so you can see what's wrong
         return JSONResponse(
-            {"answer": f"Error calling model: {e}"},
             status_code=500,
         )

 from fastapi import FastAPI, Request
 from fastapi.responses import HTMLResponse, JSONResponse
 from fastapi.staticfiles import StaticFiles
 from fastapi.templating import Jinja2Templates
 from pydantic import BaseModel
+from transformers import pipeline
+# ---------------- FastAPI + Frontend Setup ----------------
 app = FastAPI()
 @app.get("/", response_class=HTMLResponse)
 async def read_root(request: Request):
+    """Serve main HTML page."""
     return templates.TemplateResponse("index.html", {"request": request})
+# ---------------- QA Model Setup ----------------
+# This is an extractive QA model: it finds the answer span inside the context.
+# It will download the model the first time the Space builds, then cache it.
+qa_pipeline = pipeline(
+    "question-answering",
+    model="deepset/roberta-base-squad2",
+    tokenizer="deepset/roberta-base-squad2",
+)
 @app.post("/chat")
 async def chat_endpoint(payload: ChatRequest):
     """
     Accepts:
+      - context: paragraph / document text
+      - question: user's question about that context
     Returns:
+      - { "answer": "<short answer>", "score": float }
     """
     context = payload.context.strip()
     question = payload.question.strip()
             status_code=400,
         )
     try:
+        result = qa_pipeline(
+            {
+                "context": context,
+                "question": question,
+            }
         )
+        answer = result.get("answer", "").strip()
+        score = float(result.get("score", 0.0))
+        # Fallback if model fails to find anything reasonable
+        if not answer:
+            answer = "I couldn't find the answer in the given context."
+        return {"answer": answer, "score": score}
     except Exception as e:
         return JSONResponse(
+            {"answer": f"Error running QA model: {e}"},
             status_code=500,
         )

requirements.txt CHANGED Viewed

@@ -1,4 +1,5 @@
 fastapi
 uvicorn[standard]
 jinja2
-openai>=1.50.0

 fastapi
 uvicorn[standard]
 jinja2
+transformers
+torch