import gradio as gr
from transformers import pipeline

# Dùng model tiếng Việt public nhẹ
chatbot = pipeline(
    "text-generation",
    model="NlpHUST/gpt2-vietnamese",
    tokenizer="NlpHUST/gpt2-vietnamese"
)

def chat_fn(message, history):
    context = ""
    for user, bot in history:
        context += f"Người: {user}\nTrợ lý: {bot}\n"
    prompt = context + f"Người: {message}\nTrợ lý:"

    response = chatbot(
        prompt,
        max_new_tokens=80,
        do_sample=True,
        temperature=0.8,
        top_p=0.9,
        pad_token_id=tokenizer.eos_token_id if hasattr(chatbot.tokenizer, "eos_token_id") else None
    )[0]["generated_text"]

    if "Trợ lý:" in response:
        response = response.split("Trợ lý:")[-1].strip()
    return response

with gr.Blocks(theme="soft", css=".gradio-container {background-color: #f0f8ff}") as demo:
    gr.Markdown("# 🤖 Trợ lý ảo PC2English")
    gr.Markdown("Xin chào! Tôi là trợ lý ảo PC2English. Hãy trò chuyện với tôi nhé 😊")

    gr.ChatInterface(
        fn=chat_fn,
        chatbot=gr.Chatbot(height=400, avatar_images=("👤", "🤖")),
        title="Trợ lý ảo PC2English",
        description="Demo chatbot bằng mô hình NlpHUST/gpt2-vietnamese."
    )

demo.launch()
import fitz  # PyMuPDF
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from transformers import pipeline


# ===== Bước 1: Đọc PDF =====
def pdf_to_text(path):
    doc = fitz.open(path)
    text = ""
    for page in doc:
        text += page.get_text("text") + "\n"
    return text

pdf_text = pdf_to_text("Global success 3T1.pdf")  # đổi thành sách của bạn


# ===== Bước 2: Chia nhỏ văn bản =====
def chunk_text(text, chunk_size=300):
    words = text.split()
    for i in range(0, len(words), chunk_size):
        yield " ".join(words[i:i+chunk_size])

chunks = list(chunk_text(pdf_text, 300))


# ===== Bước 3: Tạo FAISS index =====
embedder = SentenceTransformer("all-MiniLM-L6-v2")  # model embeddings
embeddings = embedder.encode(chunks, convert_to_numpy=True)

dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)


# ===== Bước 4: Pipeline Hugging Face =====
chatbot = pipeline("text-generation", model="NlpHUST/gpt2-vietnamese")  # có thể đổi thành model khác


# ===== Hàm Hỏi - Đáp =====
def ask(query, k=3):
    query_vector = embedder.encode([query], convert_to_numpy=True)
    D, I = index.search(query_vector, k)

    # Ghép ngữ cảnh từ top k đoạn
    context = "\n".join([chunks[i] for i in I[0]])

    prompt = f"Dựa trên tài liệu sau:\n{context}\n\nCâu hỏi: {query}\nTrả lời:"

    answer = chatbot(prompt, max_new_tokens=200, do_sample=True, temperature=0.7)
    return answer[0]["generated_text"]


# ===== Test =====
if __name__ == "__main__":
    question = "How to introduce yourself in English?"
    print(ask(question))