Spaces:

mahesh1209
/

SLM-3

Sleeping

App Files Files Community

mahesh1209 commited on Nov 12

Commit

ddfa3e7

verified ·

1 Parent(s): 8685c87

Update app/app.py

Browse files

Files changed (1) hide show

app/app.py +23 -59

app/app.py CHANGED Viewed

@@ -1,101 +1,65 @@
-import os
-import re
-import torch
 from flask import Flask, request, jsonify, send_from_directory
-# Import local SLM components
 from slm_qa import TinyTransformer, encode, wrap_bos_eos, itos, PAD, BOS, EOS
-# Resolve absolute paths
 BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 STATIC_DIR = os.path.join(BASE_DIR, "static")
 MODELS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "models")
 app = Flask(__name__, static_folder=STATIC_DIR)
-# Moderation rule: ban exact word "sex" (case-insensitive)
 BAN_REGEX = re.compile(r"(?i)\bsex\b")
-# Model setup
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 VOCAB_SIZE = len(itos)
 MAX_LEN = 128
-model = TinyTransformer(
-    vocab_size=VOCAB_SIZE,
-    d_model=128,
-    n_heads=4,
-    n_layers=2,
-    d_ff=256,
-    dropout=0.1,
-    max_len=MAX_LEN,
-).to(DEVICE)
-# Load checkpoint if present
 ckpt_path = os.path.join(MODELS_DIR, "slm_qa_best.pt")
 if os.path.exists(ckpt_path):
     model.load_state_dict(torch.load(ckpt_path, map_location=DEVICE))
 model.eval()
 def generate_answer(question: str, max_new_tokens: int = 40) -> str:
     q_ids = encode("q: " + question)
     a_prefix = encode("a:")
-    tokens = wrap_bos_eos(q_ids + a_prefix)[:-1]  # BOS + q + "a:" (no EOS)
     x = torch.tensor(tokens, dtype=torch.long, device=DEVICE).unsqueeze(0)
     with torch.no_grad():
         for _ in range(max_new_tokens):
-            if x.size(1) >= MAX_LEN:
-                break
-            logits = model(x)  # [1, T, V]
-            next_id = logits[:, -1, :].argmax(dim=-1).item()
-            if next_id == EOS:
-                break
-            x = torch.cat(
-                [x, torch.tensor([[next_id]], dtype=torch.long, device=DEVICE)],
-                dim=1,
-            )
     gen_ids = x.squeeze(0).tolist()
-    prefix_len = 1 + len(q_ids) + len(a_prefix)  # BOS + question + "a:"
     answer_ids = gen_ids[prefix_len:]
     out = " ".join(itos[i] for i in answer_ids if i not in (PAD, BOS, EOS)).strip()
     return out if out else "..."
 @app.route("/")
-def index():
-    return send_from_directory(STATIC_DIR, "index.html")
-@app.route("/static/<path:filename>")
-def static_files(filename):
-    return send_from_directory(STATIC_DIR, filename)
-@app.route("/health")
-def health():
-    return jsonify({"ok": True})
 @app.route("/api/moderate", methods=["POST"])
 def moderate():
-    data = request.get_json(force=True, silent=True) or {}
-    text = (data.get("text") or "").strip()
-    banned = bool(BAN_REGEX.search(text))
-    return jsonify({"banned": banned})
 @app.route("/api/answer", methods=["POST"])
 def answer():
-    data = request.get_json(force=True, silent=True) or {}
-    question = (data.get("question") or "").strip()
-    if not question:
-        return jsonify({"ok": False, "answer": "", "error": "Empty question"}), 400
-    if BAN_REGEX.search(question):
-        return jsonify({"ok": False, "answer": "", "error": "banned"}), 403
-    try:
-        ans = generate_answer(question)
-        return jsonify({"ok": True, "answer": ans})
-    except Exception:
-        # Avoid leaking stack traces in production
-        return jsonify({"ok": False, "answer": "", "error": "server_error"}), 500
 if __name__ == "__main__":
     port = int(os.environ.get("PORT", "7860"))

+import os, re, torch
 from flask import Flask, request, jsonify, send_from_directory
 from slm_qa import TinyTransformer, encode, wrap_bos_eos, itos, PAD, BOS, EOS
 BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 STATIC_DIR = os.path.join(BASE_DIR, "static")
 MODELS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "models")
 app = Flask(__name__, static_folder=STATIC_DIR)
 BAN_REGEX = re.compile(r"(?i)\bsex\b")
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 VOCAB_SIZE = len(itos)
 MAX_LEN = 128
+model = TinyTransformer(vocab_size=VOCAB_SIZE, max_len=MAX_LEN).to(DEVICE)
 ckpt_path = os.path.join(MODELS_DIR, "slm_qa_best.pt")
 if os.path.exists(ckpt_path):
     model.load_state_dict(torch.load(ckpt_path, map_location=DEVICE))
 model.eval()
+def sample_next(logits, top_k=5):
+    """Top-k sampling instead of greedy argmax."""
+    probs = torch.softmax(logits, dim=-1)
+    topk_probs, topk_idx = torch.topk(probs, k=top_k)
+    idx = torch.multinomial(topk_probs, 1)
+    return topk_idx[idx].item()
 def generate_answer(question: str, max_new_tokens: int = 40) -> str:
     q_ids = encode("q: " + question)
     a_prefix = encode("a:")
+    tokens = wrap_bos_eos(q_ids + a_prefix)[:-1]
     x = torch.tensor(tokens, dtype=torch.long, device=DEVICE).unsqueeze(0)
     with torch.no_grad():
         for _ in range(max_new_tokens):
+            if x.size(1) >= MAX_LEN: break
+            logits = model(x)
+            next_id = sample_next(logits[:, -1, :])
+            if next_id == EOS: break
+            x = torch.cat([x, torch.tensor([[next_id]], device=DEVICE)], dim=1)
     gen_ids = x.squeeze(0).tolist()
+    prefix_len = 1 + len(q_ids) + len(a_prefix)
     answer_ids = gen_ids[prefix_len:]
     out = " ".join(itos[i] for i in answer_ids if i not in (PAD, BOS, EOS)).strip()
     return out if out else "..."
 @app.route("/")
+def index(): return send_from_directory(STATIC_DIR, "index.html")
 @app.route("/api/moderate", methods=["POST"])
 def moderate():
+    text = (request.json.get("text") or "").strip()
+    return jsonify({"banned": bool(BAN_REGEX.search(text))})
 @app.route("/api/answer", methods=["POST"])
 def answer():
+    question = (request.json.get("question") or "").strip()
+    if not question: return jsonify({"ok": False, "answer": "", "error": "Empty"}), 400
+    if BAN_REGEX.search(question): return jsonify({"ok": False, "answer": "", "error": "banned"}), 403
+    return jsonify({"ok": True, "answer": generate_answer(question)})
 if __name__ == "__main__":
     port = int(os.environ.get("PORT", "7860"))