import gradio as gr import torch from transformers import pipeline import textwrap # HF Model repo MODEL_NAME = "cloghost/nllb-200-distilled-600M-hin-kang-v2" MAX_CHUNK_SIZE = 500 # adjust if needed # Global model translator = None def load_model(): """Loads the model once and returns the pipeline.""" global translator if translator is None: device = 0 if torch.cuda.is_available() else -1 translator = pipeline( "translation", model=MODEL_NAME, src_lang="hin_Deva", tgt_lang="kang_Deva", device=device, max_length = 512 ) return translator def split_text(text, chunk_size=MAX_CHUNK_SIZE): """Splits long text into chunks that fit within model limits.""" return textwrap.wrap(text, width=chunk_size, break_long_words=True, replace_whitespace=False) def preprocess_text(text): """Optional preprocessing placeholder (currently returns input as-is).""" return text def translate_text(text, enable_preprocessing=True): """Translates Hindi text to Kangri with optional preprocessing and chunking.""" if not text.strip(): return "Please enter some text to translate.", "" try: translator = load_model() processed_text = preprocess_text(text) if enable_preprocessing else text chunks = split_text(processed_text) translated_chunks = [] for chunk in chunks: result = translator(chunk) if isinstance(result, list): translated_chunks.append(result[0]["translation_text"]) else: translated_chunks.append(result["translation_text"]) full_translation = " ".join(translated_chunks) info = f"Processed {len(chunks)} chunk(s)." if len(chunks) > 1 else "Translation completed." return full_translation, info except Exception as e: return f"Translation error: {str(e)}", "" def create_ui(): with gr.Blocks(title="Hindi to Kangri Translator", theme=gr.themes.Monochrome()) as demo: gr.HTML("""

ЁЯЧг Hindi тЖТ Kangri Translator

Fine-tuned NLLB-200 model for low-resource Kangri language

Supports long input, chunked translation, and optional preprocessing.

""") with gr.Row(): with gr.Column(scale=1): input_text = gr.Textbox( label="Hindi Text", placeholder="Type or paste Hindi text here...", lines=6 ) enable_preprocessing = gr.Checkbox(label="Enable Preprocessing", value=True) translate_btn = gr.Button("Translate", variant="primary") with gr.Column(scale=1): output_text = gr.Textbox( label="Kangri Translation", lines=6, interactive=False ) translation_info = gr.Textbox( label="Info", interactive=False ) gr.Examples( examples=[ ["рдореИрдВ рдЖрдЬ рдмрд╛рдЬрд╛рд░ рдЬрд╛ рд░рд╣рд╛ рд╣реВрдВред"], ["рд╣рд┐рдорд╛рдЪрд▓ рдкреНрд░рджреЗрд╢ рдмрд╣реБрдд рд╕реБрдВрджрд░ рд╣реИред"], ["рджрд┐рд╡рд╛рд▓реА рдХрд╛ рддреНрдпреЛрд╣рд╛рд░ рдмрд╣реБрдд рдзреВрдордзрд╛рдо рд╕реЗ рдордирд╛рдпрд╛ рдЬрд╛рддрд╛ рд╣реИред"] ], inputs=[input_text], outputs=[output_text, translation_info], fn=translate_text, cache_examples=False ) translate_btn.click( fn=translate_text, inputs=[input_text, enable_preprocessing], outputs=[output_text, translation_info] ) return demo if __name__ == "__main__": app = create_ui() app.launch()