🗣 Hindi → Kangri Translator

import gradio as gr
import torch
from transformers import pipeline
import textwrap

# HF Model repo
MODEL_NAME = "cloghost/nllb-200-distilled-600M-hin-kang-v2"
MAX_CHUNK_SIZE = 500  # adjust if needed

# Global model
translator = None

def load_model():
    """Loads the model once and returns the pipeline."""
    global translator
    if translator is None:
        device = 0 if torch.cuda.is_available() else -1
        translator = pipeline(
            "translation",
            model=MODEL_NAME,
            src_lang="hin_Deva",
            tgt_lang="kang_Deva",
            device=device,
            max_length = 512
        )
    return translator

def split_text(text, chunk_size=MAX_CHUNK_SIZE):
    """Splits long text into chunks that fit within model limits."""
    return textwrap.wrap(text, width=chunk_size, break_long_words=True, replace_whitespace=False)

def preprocess_text(text):
    """Optional preprocessing placeholder (currently returns input as-is)."""
    return text

def translate_text(text, enable_preprocessing=True):
    """Translates Hindi text to Kangri with optional preprocessing and chunking."""
    if not text.strip():
        return "Please enter some text to translate.", ""

    try:
        translator = load_model()
        processed_text = preprocess_text(text) if enable_preprocessing else text
        chunks = split_text(processed_text)
        translated_chunks = []

        for chunk in chunks:
            result = translator(chunk)
            if isinstance(result, list):
                translated_chunks.append(result[0]["translation_text"])
            else:
                translated_chunks.append(result["translation_text"])

        full_translation = " ".join(translated_chunks)
        info = f"Processed {len(chunks)} chunk(s)." if len(chunks) > 1 else "Translation completed."
        return full_translation, info
    except Exception as e:
        return f"Translation error: {str(e)}", ""

def create_ui():
    with gr.Blocks(title="Hindi to Kangri Translator", theme=gr.themes.Monochrome()) as demo:
        gr.HTML("""
        <div style="text-align: center; padding: 20px;">
            <h1 style="font-family: 'Segoe UI', sans-serif;">🗣 Hindi → Kangri Translator</h1>
            <p style="font-size: 16px;">Fine-tuned NLLB-200 model for low-resource Kangri language</p>

            <p><em>Supports long input, chunked translation, and optional preprocessing.</em></p>
        </div>
        """)

        with gr.Row():
            with gr.Column(scale=1):
                input_text = gr.Textbox(
                    label="Hindi Text",
                    placeholder="Type or paste Hindi text here...",
                    lines=6
                )
                enable_preprocessing = gr.Checkbox(label="Enable Preprocessing", value=True)
                translate_btn = gr.Button("Translate", variant="primary")

            with gr.Column(scale=1):
                output_text = gr.Textbox(
                    label="Kangri Translation",
                    lines=6,
                    interactive=False
                )
                translation_info = gr.Textbox(
                    label="Info",
                    interactive=False
                )

        gr.Examples(
            examples=[
                ["मैं आज बाजार जा रहा हूं।"],
                ["हिमाचल प्रदेश बहुत सुंदर है।"],
                ["दिवाली का त्योहार बहुत धूमधाम से मनाया जाता है।"]
            ],
            inputs=[input_text],
            outputs=[output_text, translation_info],
            fn=translate_text,
            cache_examples=False
        )

        translate_btn.click(
            fn=translate_text,
            inputs=[input_text, enable_preprocessing],
            outputs=[output_text, translation_info]
        )

    return demo

if __name__ == "__main__":
    app = create_ui()
    app.launch()