Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
| import torch | |
| import time | |
| import textwrap # <--- مكتبة مهمة لتقسيم النص | |
| print("\n⏳ جاري تحميل نموذج Fine-Tashkeel (الدقيق)...") | |
| model_name = "basharalrfooh/Fine-Tashkeel" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForSeq2SeqLM.from_pretrained(model_name) | |
| device = torch.device("cpu") | |
| model.to(device) | |
| model.eval() | |
| print(f"✅ جاهز على {device}!\n") | |
| LOADING_HTML = """ | |
| <div style="text-align: center; padding: 2rem;"> | |
| <div style="display: inline-block; animation: spin 1s linear infinite; font-size: 2.5rem;">⏳</div> | |
| <div style="font-size: 1.3rem; color: #667eea; margin-top: 1rem; font-weight: bold;">جاري العمل على التشكيل...</div> | |
| <div style="color: #999; margin-top: 0.5rem;">هذا النموذج دقيق وبطيء، نقوم بمعالجة النص جزءاً بجزء...</div> | |
| </div> | |
| <style> | |
| @keyframes spin { | |
| 0% { transform: rotate(0deg); } | |
| 100% { transform: rotate(360deg); } | |
| } | |
| </style> | |
| """ | |
| def remove_diacritics(text): | |
| diacritics = [ | |
| '\u064B', '\u064C', '\u064D', '\u064E', '\u064F', | |
| '\u0650', '\u0651', '\u0652', '\u0653', '\u0654', | |
| '\u0655', '\u0656', '\u0657', '\u0658', '\u0670', | |
| ] | |
| for diacritic in diacritics: | |
| text = text.replace(diacritic, '') | |
| return text | |
| def count_diacritics(text): | |
| diacritics = [ | |
| '\u064B', '\u064C', '\u064D', '\u064E', '\u064F', | |
| '\u0650', '\u0651', '\u0652', '\u0653', '\u0654', | |
| '\u0655', '\u0656', '\u0657', '\u0658', '\u0670', | |
| ] | |
| return sum(text.count(d) for d in diacritics) | |
| # --- دالة التشكيل المعدلة (الحل هنا) --- | |
| def run_model(text): | |
| if not text or not text.strip(): | |
| error_msg = "❌ يرجى إدخال نص" | |
| stats = {'error': error_msg} | |
| return None, None, stats, error_msg | |
| try: | |
| start = time.time() | |
| # 1. تنظيف النص | |
| full_clean_text = remove_diacritics(text) | |
| # 2. تقسيم النص الأصلي حسب الأسطر للحفاظ على الهيكلية | |
| lines = full_clean_text.split('\n') | |
| final_result_parts = [] | |
| # 3. معالجة كل سطر | |
| for line in lines: | |
| line = line.strip() | |
| if not line: | |
| final_result_parts.append("") # سطر فارغ | |
| continue | |
| # --- التعديل الجوهري: تقسيم السطر الطويل إلى قطع صغيرة --- | |
| # نقسم السطر إلى أجزاء طولها 600 حرف تقريباً | |
| # هذا يضمن أن النموذج يملك مساحة كافية لإضافة الحركات | |
| chunks = textwrap.wrap(line, width=250, break_long_words=False, replace_whitespace=False) | |
| line_result_parts = [] | |
| for chunk in chunks: | |
| if not chunk.strip(): | |
| continue | |
| inputs = tokenizer( | |
| chunk, | |
| return_tensors="pt", | |
| max_length=1024, | |
| truncation=True | |
| ) | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| **inputs, | |
| max_length=1024, # نعطيه مساحة كاملة | |
| num_beams=1, # سرعة أكبر | |
| early_stopping=False # لا تتوقف حتى تنتهي تماماً | |
| ) | |
| chunk_result = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| line_result_parts.append(chunk_result) | |
| # تجميع أجزاء السطر الواحد | |
| final_result_parts.append(" ".join(line_result_parts)) | |
| # 4. تجميع النص النهائي | |
| final_result = '\n'.join(final_result_parts) | |
| elapsed = time.time() - start | |
| words_count = len(full_clean_text.split()) | |
| diacritics_count = count_diacritics(final_result) | |
| speed = round(words_count / elapsed, 1) if elapsed > 0 else 0 | |
| stats = { | |
| "elapsed": elapsed, | |
| "words_count": words_count, | |
| "chars_count": len(final_result), | |
| "diacritics_count": diacritics_count, | |
| "speed": speed | |
| } | |
| return full_clean_text, final_result, stats, "✅ تم التشكيل بنجاح!" | |
| except Exception as e: | |
| print(f"ERROR: {str(e)}") | |
| import traceback | |
| traceback.print_exc() | |
| error_msg = f"❌ خطأ: {str(e)}" | |
| stats = {'error': error_msg} | |
| return None, None, stats, error_msg | |
| def generate_final_html(clean_text, result_text, stats, show_comparison, highlight_mode): | |
| if not result_text: | |
| if stats and 'error' in stats: | |
| return f""" | |
| <div style="text-align: center; padding: 2rem;"> | |
| <div style="color: #e74c3c; font-size: 1.2rem;">{stats['error']}</div> | |
| </div> | |
| """ | |
| return None | |
| comparison_html = "" | |
| if show_comparison: | |
| comparison_html = f""" | |
| <div style="display: grid; grid-template-columns: 1fr auto 1fr; gap: 1rem; padding: 1.5rem; background: #f8f9fa; border-radius: 15px; border: 2px solid #ffc107;"> | |
| <div style="text-align: right;"> | |
| <h4 style="color: #667eea; margin-bottom: 1rem;">⬅️ قبل التشكيل</h4> | |
| <div style="background: white; padding: 1.5rem; border-radius: 10px; border: 2px solid #ddd; font-size: 1.1rem; line-height: 2.2; direction: rtl; text-align: right;"> | |
| {clean_text} | |
| </div> | |
| </div> | |
| <div style="display: flex; align-items: center; justify-content: center; font-size: 2rem; color: #667eea;">➡️</div> | |
| <div style="text-align: right;"> | |
| <h4 style="color: #28a745; margin-bottom: 1rem;">➡️ بعد التشكيل</h4> | |
| <div style="background: white; padding: 1.5rem; border-radius: 10px; border: 2px solid #28a745; font-size: 1.1rem; line-height: 2.2; direction: rtl; text-align: right;"> | |
| {result_text} | |
| </div> | |
| </div> | |
| </div> | |
| """ | |
| highlighted_result = result_text | |
| if highlight_mode: | |
| diacritics = ['\u064B', '\u064C', '\u064D', '\u064E', '\u064F', | |
| '\u0650', '\u0651', '\u0652', '\u0653', '\u0654', | |
| '\u0655', '\u0656', '\u0657', '\u0658', '\u0670'] | |
| for diacritic in diacritics: | |
| highlighted_result = highlighted_result.replace( | |
| diacritic, | |
| f'<span style="color: #fff; background: #e74c3c; font-weight: bold; padding: 2px 5px; border-radius: 3px; margin: 0 2px;">{diacritic}</span>' | |
| ) | |
| stats_html = f""" | |
| <div style="margin-top: 2rem; padding: 1.5rem; background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%); border-radius: 15px;"> | |
| <h3 style="color: #667eea; text-align: center; margin-bottom: 1.5rem;">📊 إحصائيات التشكيل</h3> | |
| <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(140px, 1fr)); gap: 1rem;"> | |
| <div style="background: white; padding: 1.5rem; border-radius: 12px; text-align: center; box-shadow: 0 4px 15px rgba(102, 126, 234, 0.2); border-left: 4px solid #667eea;"> | |
| <div style="font-size: 2.5rem;">⚡</div> | |
| <div style="font-size: 1.8rem; font-weight: bold; color: #667eea;">{stats.get('elapsed', 0):.2f}s</div> | |
| <div style="color: #666; font-size: 0.85rem; margin-top: 0.5rem;">الوقت</div> | |
| </div> | |
| <div style="background: white; padding: 1.5rem; border-radius: 12px; text-align: center; box-shadow: 0 4px 15px rgba(102, 126, 234, 0.2); border-left: 4px solid #667eea;"> | |
| <div style="font-size: 2.5rem;">📝</div> | |
| <div style="font-size: 1.8rem; font-weight: bold; color: #667eea;">{stats.get('words_count', 0)}</div> | |
| <div style="color: #666; font-size: 0.85rem; margin-top: 0.5rem;">كلمة</div> | |
| </div> | |
| <div style="background: white; padding: 1.5rem; border-radius: 12px; text-align: center; box-shadow: 0 4px 15px rgba(102, 126, 234, 0.2); border-left: 4px solid #667eea;"> | |
| <div style="font-size: 2.5rem;">📊</div> | |
| <div style="font-size: 1.8rem; font-weight: bold; color: #667eea;">{stats.get('chars_count', 0)}</div> | |
| <div style="color: #666; font-size: 0.85rem; margin-top: 0.5rem;">حرف</div> | |
| </div> | |
| <div style="background: white; padding: 1.5rem; border-radius: 12px; text-align: center; box-shadow: 0 4px 15px rgba(102, 126, 234, 0.2); border-left: 4px solid #667eea;"> | |
| <div style="font-size: 2.5rem;">✨</div> | |
| <div style="font-size: 1.8rem; font-weight: bold; color: #667eea;">{stats.get('diacritics_count', 0)}</div> | |
| <div style="color: #666; font-size: 0.85rem; margin-top: 0.5rem;">علامة</div> | |
| </div> | |
| <div style="background: white; padding: 1.5rem; border-radius: 12px; text-align: center; box-shadow: 0 4px 15px rgba(102, 126, 234, 0.2); border-left: 4px solid #667eea;"> | |
| <div style="font-size: 2.5rem;">🎯</div> | |
| <div style="font-size: 1.8rem; font-weight: bold; color: #667eea;">98%+</div> | |
| <div style="color: #666; font-size: 0.85rem; margin-top: 0.5rem;">الدقة</div> | |
| </div> | |
| </div> | |
| </div> | |
| """ | |
| output = comparison_html | |
| output += f""" | |
| <div style="margin-top: 1.5rem; padding: 1.5rem; background: #f8f9fa; border-radius: 12px; font-size: 1.2rem; line-height: 2.2; border-right: 4px solid #28a745; direction: rtl; text-align: right;"> | |
| {highlighted_result} | |
| </div> | |
| {stats_html} | |
| """ | |
| return output | |
| with gr.Blocks( | |
| title="🎯 مُشَكِّل (الدقيق 98%)", | |
| theme=gr.themes.Soft(), | |
| css=""" | |
| body { font-family: 'Arial', sans-serif; } | |
| .gradio-container { direction: rtl; } | |
| """ | |
| ) as demo: | |
| gr.Markdown(""" | |
| # 🚀 مُشَكِّل النصوص (النموذج الدقيق 98%+) | |
| <p style='direction: rtl; color: #e74c3c; font-weight: bold;'> | |
| ⚠️ تنبيه: هذا النموذج هو الأدق، ولكنه بطيء مع النصوص الطويلة. | |
| </p> | |
| """) | |
| clean_text_state = gr.State(None) | |
| result_text_state = gr.State(None) | |
| stats_state = gr.State({}) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| input_text = gr.Textbox( | |
| label="النص", | |
| placeholder="أدخل النص العربي هنا (مشكول أو بدون تشكيل)...", | |
| lines=10, | |
| max_lines=20 | |
| ) | |
| with gr.Row(): | |
| show_comparison = gr.Checkbox(label="🔄 مقارنة النصين", value=False) | |
| highlight_diacritics = gr.Checkbox(label="🎨 تلوين الحركات", value=False) | |
| submit_btn = gr.Button("✨ إضافة التشكيل", variant="primary", size="lg") | |
| output_html = gr.HTML() | |
| status = gr.Textbox(label="الحالة", interactive=False) | |
| gr.Examples( | |
| [ | |
| ["السلام عليكم ورحمة الله وبركاته"], | |
| ["اللغة العربية لغة القران الكريم"], | |
| ], | |
| inputs=input_text, | |
| label="أمثلة سريعة" | |
| ) | |
| def show_loading(): | |
| return LOADING_HTML, "⏳ جاري التشكيل..." | |
| render_inputs = [ | |
| clean_text_state, | |
| result_text_state, | |
| stats_state, | |
| show_comparison, | |
| highlight_diacritics | |
| ] | |
| submit_btn.click( | |
| fn=show_loading, | |
| inputs=None, | |
| outputs=[output_html, status] | |
| ).then( | |
| fn=run_model, | |
| inputs=[input_text], | |
| outputs=[clean_text_state, result_text_state, stats_state, status] | |
| ).then( | |
| fn=generate_final_html, | |
| inputs=render_inputs, | |
| outputs=[output_html] | |
| ) | |
| show_comparison.change( | |
| fn=generate_final_html, | |
| inputs=render_inputs, | |
| outputs=[output_html] | |
| ) | |
| highlight_diacritics.change( | |
| fn=generate_final_html, | |
| inputs=render_inputs, | |
| outputs=[output_html] | |
| ) | |
| demo.launch() |