import gradio as gr import torch from transformers import pipeline import textwrap # HF Model repo MODEL_NAME = "cloghost/nllb-200-distilled-600M-hin-kang-v2" MAX_CHUNK_SIZE = 500 # adjust if needed # Global model translator = None def load_model(): """Loads the model once and returns the pipeline.""" global translator if translator is None: device = 0 if torch.cuda.is_available() else -1 translator = pipeline( "translation", model=MODEL_NAME, src_lang="hin_Deva", tgt_lang="kang_Deva", device=device, max_length = 512 ) return translator def split_text(text, chunk_size=MAX_CHUNK_SIZE): """Splits long text into chunks that fit within model limits.""" return textwrap.wrap(text, width=chunk_size, break_long_words=True, replace_whitespace=False) def preprocess_text(text): """Optional preprocessing placeholder (currently returns input as-is).""" return text def translate_text(text, enable_preprocessing=True): """Translates Hindi text to Kangri with optional preprocessing and chunking.""" if not text.strip(): return "Please enter some text to translate.", "" try: translator = load_model() processed_text = preprocess_text(text) if enable_preprocessing else text chunks = split_text(processed_text) translated_chunks = [] for chunk in chunks: result = translator(chunk) if isinstance(result, list): translated_chunks.append(result[0]["translation_text"]) else: translated_chunks.append(result["translation_text"]) full_translation = " ".join(translated_chunks) info = f"Processed {len(chunks)} chunk(s)." if len(chunks) > 1 else "Translation completed." return full_translation, info except Exception as e: return f"Translation error: {str(e)}", "" def create_ui(): with gr.Blocks(title="Hindi to Kangri Translator", theme=gr.themes.Monochrome()) as demo: gr.HTML("""
Fine-tuned NLLB-200 model for low-resource Kangri language
Supports long input, chunked translation, and optional preprocessing.