|
|
import gradio as gr |
|
|
import torch |
|
|
from transformers import pipeline |
|
|
import textwrap |
|
|
|
|
|
|
|
|
MODEL_NAME = "cloghost/nllb-200-distilled-600M-hin-kang-v2" |
|
|
MAX_CHUNK_SIZE = 500 |
|
|
|
|
|
|
|
|
translator = None |
|
|
|
|
|
def load_model(): |
|
|
"""Loads the model once and returns the pipeline.""" |
|
|
global translator |
|
|
if translator is None: |
|
|
device = 0 if torch.cuda.is_available() else -1 |
|
|
translator = pipeline( |
|
|
"translation", |
|
|
model=MODEL_NAME, |
|
|
src_lang="hin_Deva", |
|
|
tgt_lang="kang_Deva", |
|
|
device=device, |
|
|
max_length = 512 |
|
|
) |
|
|
return translator |
|
|
|
|
|
def split_text(text, chunk_size=MAX_CHUNK_SIZE): |
|
|
"""Splits long text into chunks that fit within model limits.""" |
|
|
return textwrap.wrap(text, width=chunk_size, break_long_words=True, replace_whitespace=False) |
|
|
|
|
|
def preprocess_text(text): |
|
|
"""Optional preprocessing placeholder (currently returns input as-is).""" |
|
|
return text |
|
|
|
|
|
def translate_text(text, enable_preprocessing=True): |
|
|
"""Translates Hindi text to Kangri with optional preprocessing and chunking.""" |
|
|
if not text.strip(): |
|
|
return "Please enter some text to translate.", "" |
|
|
|
|
|
try: |
|
|
translator = load_model() |
|
|
processed_text = preprocess_text(text) if enable_preprocessing else text |
|
|
chunks = split_text(processed_text) |
|
|
translated_chunks = [] |
|
|
|
|
|
for chunk in chunks: |
|
|
result = translator(chunk) |
|
|
if isinstance(result, list): |
|
|
translated_chunks.append(result[0]["translation_text"]) |
|
|
else: |
|
|
translated_chunks.append(result["translation_text"]) |
|
|
|
|
|
full_translation = " ".join(translated_chunks) |
|
|
info = f"Processed {len(chunks)} chunk(s)." if len(chunks) > 1 else "Translation completed." |
|
|
return full_translation, info |
|
|
except Exception as e: |
|
|
return f"Translation error: {str(e)}", "" |
|
|
|
|
|
def create_ui(): |
|
|
with gr.Blocks(title="Hindi to Kangri Translator", theme=gr.themes.Monochrome()) as demo: |
|
|
gr.HTML(""" |
|
|
<div style="text-align: center; padding: 20px;"> |
|
|
<h1 style="font-family: 'Segoe UI', sans-serif;">🗣 Hindi → Kangri Translator</h1> |
|
|
<p style="font-size: 16px;">Fine-tuned NLLB-200 model for low-resource Kangri language</p> |
|
|
|
|
|
<p><em>Supports long input, chunked translation, and optional preprocessing.</em></p> |
|
|
</div> |
|
|
""") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=1): |
|
|
input_text = gr.Textbox( |
|
|
label="Hindi Text", |
|
|
placeholder="Type or paste Hindi text here...", |
|
|
lines=6 |
|
|
) |
|
|
enable_preprocessing = gr.Checkbox(label="Enable Preprocessing", value=True) |
|
|
translate_btn = gr.Button("Translate", variant="primary") |
|
|
|
|
|
with gr.Column(scale=1): |
|
|
output_text = gr.Textbox( |
|
|
label="Kangri Translation", |
|
|
lines=6, |
|
|
interactive=False |
|
|
) |
|
|
translation_info = gr.Textbox( |
|
|
label="Info", |
|
|
interactive=False |
|
|
) |
|
|
|
|
|
gr.Examples( |
|
|
examples=[ |
|
|
["मैं आज बाजार जा रहा हूं।"], |
|
|
["हिमाचल प्रदेश बहुत सुंदर है।"], |
|
|
["दिवाली का त्योहार बहुत धूमधाम से मनाया जाता है।"] |
|
|
], |
|
|
inputs=[input_text], |
|
|
outputs=[output_text, translation_info], |
|
|
fn=translate_text, |
|
|
cache_examples=False |
|
|
) |
|
|
|
|
|
translate_btn.click( |
|
|
fn=translate_text, |
|
|
inputs=[input_text, enable_preprocessing], |
|
|
outputs=[output_text, translation_info] |
|
|
) |
|
|
|
|
|
return demo |
|
|
|
|
|
if __name__ == "__main__": |
|
|
app = create_ui() |
|
|
app.launch() |
|
|
|