# handler.py import os, json, time, tempfile, base64 import torch from docling.document_converter import DocumentConverter from transformers import AutoModelForCausalLM, AutoTokenizer from supabase import create_client from huggingface_hub import EndpointHandler class Handler(EndpointHandler): def __init__(self): from dotenv import load_dotenv load_dotenv() # supabase self.supabase = create_client( os.getenv("DATABASE_URL"), os.getenv("SUPABASE_SERVICE_ROLE_KEY") ) # model device = "mps" if torch.backends.mps.is_available() else "cpu" dtype = torch.float16 if device=="mps" else torch.float32 self.model = AutoModelForCausalLM.from_pretrained( "numind/NuExtract-1.5-tiny", torch_dtype=dtype, trust_remote_code=True ).to(device).eval() self.tokenizer = AutoTokenizer.from_pretrained( "numind/NuExtract-1.5-tiny", trust_remote_code=True ) def __call__(self, payload): """ Expects JSON: { "inputs": "", "is_pdf": false|true } """ text = payload["inputs"] if payload.get("is_pdf"): b = base64.b64decode(text) with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as tmp: tmp.write(b) path = tmp.name conv = DocumentConverter().convert(path) text = conv.document.export_to_text() os.remove(path) # build your prompt + generate (same as predict_NuExtract) prompt = ( "<|input|>\n### Instruction:\nRemplis la template JSON…\n" "### Text:\n" + text + "\n<|output|>" ) enc = self.tokenizer(prompt, return_tensors="pt", truncation=True).to(self.model.device) out_ids = self.model.generate(**enc, max_new_tokens=1024) out = self.tokenizer.decode(out_ids[0], skip_special_tokens=True) result = out.split("<|output|>")[1] return {"result": json.loads(result)}