Karun Sharma
Update app.py
d81f071 verified
import gradio as gr
import torch
from transformers import pipeline
import textwrap
# HF Model repo
MODEL_NAME = "cloghost/nllb-200-distilled-600M-hin-kang-v2"
MAX_CHUNK_SIZE = 500 # adjust if needed
# Global model
translator = None
def load_model():
"""Loads the model once and returns the pipeline."""
global translator
if translator is None:
device = 0 if torch.cuda.is_available() else -1
translator = pipeline(
"translation",
model=MODEL_NAME,
src_lang="hin_Deva",
tgt_lang="kang_Deva",
device=device,
max_length = 512
)
return translator
def split_text(text, chunk_size=MAX_CHUNK_SIZE):
"""Splits long text into chunks that fit within model limits."""
return textwrap.wrap(text, width=chunk_size, break_long_words=True, replace_whitespace=False)
def preprocess_text(text):
"""Optional preprocessing placeholder (currently returns input as-is)."""
return text
def translate_text(text, enable_preprocessing=True):
"""Translates Hindi text to Kangri with optional preprocessing and chunking."""
if not text.strip():
return "Please enter some text to translate.", ""
try:
translator = load_model()
processed_text = preprocess_text(text) if enable_preprocessing else text
chunks = split_text(processed_text)
translated_chunks = []
for chunk in chunks:
result = translator(chunk)
if isinstance(result, list):
translated_chunks.append(result[0]["translation_text"])
else:
translated_chunks.append(result["translation_text"])
full_translation = " ".join(translated_chunks)
info = f"Processed {len(chunks)} chunk(s)." if len(chunks) > 1 else "Translation completed."
return full_translation, info
except Exception as e:
return f"Translation error: {str(e)}", ""
def create_ui():
with gr.Blocks(title="Hindi to Kangri Translator", theme=gr.themes.Monochrome()) as demo:
gr.HTML("""
<div style="text-align: center; padding: 20px;">
<h1 style="font-family: 'Segoe UI', sans-serif;">🗣 Hindi → Kangri Translator</h1>
<p style="font-size: 16px;">Fine-tuned NLLB-200 model for low-resource Kangri language</p>
<p><em>Supports long input, chunked translation, and optional preprocessing.</em></p>
</div>
""")
with gr.Row():
with gr.Column(scale=1):
input_text = gr.Textbox(
label="Hindi Text",
placeholder="Type or paste Hindi text here...",
lines=6
)
enable_preprocessing = gr.Checkbox(label="Enable Preprocessing", value=True)
translate_btn = gr.Button("Translate", variant="primary")
with gr.Column(scale=1):
output_text = gr.Textbox(
label="Kangri Translation",
lines=6,
interactive=False
)
translation_info = gr.Textbox(
label="Info",
interactive=False
)
gr.Examples(
examples=[
["मैं आज बाजार जा रहा हूं।"],
["हिमाचल प्रदेश बहुत सुंदर है।"],
["दिवाली का त्योहार बहुत धूमधाम से मनाया जाता है।"]
],
inputs=[input_text],
outputs=[output_text, translation_info],
fn=translate_text,
cache_examples=False
)
translate_btn.click(
fn=translate_text,
inputs=[input_text, enable_preprocessing],
outputs=[output_text, translation_info]
)
return demo
if __name__ == "__main__":
app = create_ui()
app.launch()