fadimari commited on
Commit
36b635a
ยท
verified ยท
1 Parent(s): db08577

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +274 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,274 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
+ import torch
4
+ import time
5
+
6
+ print("\nโณ ุฌุงุฑูŠ ุชุญู…ูŠู„ ู†ู…ูˆุฐุฌ Fine-Tashkeel (ุงู„ุฏู‚ูŠู‚)...")
7
+
8
+ model_name = "basharalrfooh/Fine-Tashkeel"
9
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
10
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
11
+
12
+ device = torch.device("cpu")
13
+ model.to(device)
14
+ model.eval()
15
+
16
+ print(f"โœ… ุฌุงู‡ุฒ ุนู„ู‰ {device}!\n")
17
+
18
+ LOADING_HTML = """
19
+ <div style="text-align: center; padding: 2rem;">
20
+ <div style="display: inline-block; animation: spin 1s linear infinite; font-size: 2.5rem;">โณ</div>
21
+ <div style="font-size: 1.3rem; color: #667eea; margin-top: 1rem; font-weight: bold;">ุฌุงุฑูŠ ุงู„ุนู…ู„ ุนู„ู‰ ุงู„ุชุดูƒูŠู„...</div>
22
+ <div style="color: #999; margin-top: 0.5rem;">ู‡ุฐุง ุงู„ู†ู…ูˆุฐุฌ ุฏู‚ูŠู‚ ูˆุจุทูŠุก (20-30 ุซุงู†ูŠุฉ)ุŒ ูŠุฑุฌู‰ ุงู„ุงู†ุชุธุงุฑ</div>
23
+ </div>
24
+ <style>
25
+ @keyframes spin {
26
+ 0% { transform: rotate(0deg); }
27
+ 100% { transform: rotate(360deg); }
28
+ }
29
+ </style>
30
+ """
31
+
32
+ def remove_diacritics(text):
33
+ diacritics = [
34
+ '\u064B', '\u064C', '\u064D', '\u064E', '\u064F',
35
+ '\u0650', '\u0651', '\u0652', '\u0653', '\u0654',
36
+ '\u0655', '\u0656', '\u0657', '\u0658', '\u0670',
37
+ ]
38
+ for diacritic in diacritics:
39
+ text = text.replace(diacritic, '')
40
+ return text
41
+
42
+ def count_diacritics(text):
43
+ diacritics = [
44
+ '\u064B', '\u064C', '\u064D', '\u064E', '\u064F',
45
+ '\u0650', '\u0651', '\u0652', '\u0653', '\u0654',
46
+ '\u0655', '\u0656', '\u0657', '\u0658', '\u0670',
47
+ ]
48
+ return sum(text.count(d) for d in diacritics)
49
+
50
+ def run_model(text):
51
+ if not text or not text.strip():
52
+ error_msg = "โŒ ูŠุฑุฌู‰ ุฅุฏุฎุงู„ ู†ุต"
53
+ stats = {'error': error_msg}
54
+ return None, None, stats, error_msg
55
+
56
+ try:
57
+ start = time.time()
58
+ clean_text = remove_diacritics(text)
59
+
60
+ inputs = tokenizer(
61
+ clean_text,
62
+ return_tensors="pt",
63
+ max_length=1024,
64
+ truncation=True,
65
+ padding="max_length"
66
+ )
67
+
68
+ with torch.no_grad():
69
+ outputs = model.generate(
70
+ **inputs,
71
+ max_length=1024,
72
+ num_beams=1,
73
+ early_stopping=True
74
+ )
75
+
76
+ result = tokenizer.decode(outputs[0], skip_special_tokens=True)
77
+ elapsed = time.time() - start
78
+
79
+ words_count = len(clean_text.split())
80
+ diacritics_count = count_diacritics(result)
81
+ speed = round(words_count / elapsed, 1) if elapsed > 0 else 0
82
+
83
+ stats = {
84
+ "elapsed": elapsed,
85
+ "words_count": words_count,
86
+ "chars_count": len(result),
87
+ "diacritics_count": diacritics_count,
88
+ "speed": speed
89
+ }
90
+
91
+ return clean_text, result, stats, "โœ… ุชู… ุงู„ุชุดูƒูŠู„ ุจู†ุฌุงุญ!"
92
+
93
+ except Exception as e:
94
+ print(f"ERROR: {str(e)}")
95
+ import traceback
96
+ traceback.print_exc()
97
+ error_msg = f"โŒ ุฎุทุฃ: {str(e)}"
98
+ stats = {'error': error_msg}
99
+ return None, None, stats, error_msg
100
+
101
+ def generate_final_html(clean_text, result_text, stats, show_comparison, highlight_mode):
102
+ if not result_text:
103
+ if stats and 'error' in stats:
104
+ return f"""
105
+ <div style="text-align: center; padding: 2rem;">
106
+ <div style="color: #e74c3c; font-size: 1.2rem;">{stats['error']}</div>
107
+ </div>
108
+ """
109
+ return None
110
+
111
+ comparison_html = ""
112
+ if show_comparison:
113
+ comparison_html = f"""
114
+ <div style="display: grid; grid-template-columns: 1fr auto 1fr; gap: 1rem; padding: 1.5rem; background: #f8f9fa; border-radius: 15px; border: 2px solid #ffc107;">
115
+ <div style="text-align: right;">
116
+ <h4 style="color: #667eea; margin-bottom: 1rem;">โฌ…๏ธ ู‚ุจู„ ุงู„ุชุดูƒูŠู„</h4>
117
+ <div style="background: white; padding: 1.5rem; border-radius: 10px; border: 2px solid #ddd; font-size: 1.1rem; line-height: 2.2; direction: rtl; text-align: right;">
118
+ {clean_text}
119
+ </div>
120
+ </div>
121
+ <div style="display: flex; align-items: center; justify-content: center; font-size: 2rem; color: #667eea;">โžก๏ธ</div>
122
+ <div style="text-align: right;">
123
+ <h4 style="color: #28a745; margin-bottom: 1rem;">โžก๏ธ ุจุนุฏ ุงู„ุชุดูƒูŠู„</h4>
124
+ <div style="background: white; padding: 1.5rem; border-radius: 10px; border: 2px solid #28a745; font-size: 1.1rem; line-height: 2.2; direction: rtl; text-align: right;">
125
+ {result_text}
126
+ </div>
127
+ </div>
128
+ </div>
129
+ """
130
+
131
+ highlighted_result = result_text
132
+ if highlight_mode:
133
+ diacritics = ['\u064B', '\u064C', '\u064D', '\u064E', '\u064F',
134
+ '\u0650', '\u0651', '\u0652', '\u0653', '\u0654',
135
+ '\u0655', '\u0656', '\u0657', '\u0658', '\u0670']
136
+ for diacritic in diacritics:
137
+ highlighted_result = highlighted_result.replace(
138
+ diacritic,
139
+ f'<span style="color: #fff; background: #e74c3c; font-weight: bold; padding: 2px 5px; border-radius: 3px; margin: 0 2px;">{diacritic}</span>'
140
+ )
141
+
142
+ stats_html = f"""
143
+ <div style="margin-top: 2rem; padding: 1.5rem; background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%); border-radius: 15px;">
144
+ <h3 style="color: #667eea; text-align: center; margin-bottom: 1.5rem;">๐Ÿ“Š ุฅุญุตุงุฆูŠุงุช ุงู„ุชุดูƒูŠู„</h3>
145
+ <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(140px, 1fr)); gap: 1rem;">
146
+ <div style="background: white; padding: 1.5rem; border-radius: 12px; text-align: center; box-shadow: 0 4px 15px rgba(102, 126, 234, 0.2); border-left: 4px solid #667eea;">
147
+ <div style="font-size: 2.5rem;">โšก</div>
148
+ <div style="font-size: 1.8rem; font-weight: bold; color: #667eea;">{stats.get('elapsed', 0):.2f}s</div>
149
+ <div style="color: #666; font-size: 0.85rem; margin-top: 0.5rem;">ุงู„ูˆู‚ุช</div>
150
+ </div>
151
+ <div style="background: white; padding: 1.5rem; border-radius: 12px; text-align: center; box-shadow: 0 4px 15px rgba(102, 126, 234, 0.2); border-left: 4px solid #667eea;">
152
+ <div style="font-size: 2.5rem;">๐Ÿ“</div>
153
+ <div style="font-size: 1.8rem; font-weight: bold; color: #667eea;">{stats.get('words_count', 0)}</div>
154
+ <div style="color: #666; font-size: 0.85rem; margin-top: 0.5rem;">ูƒู„ู…ุฉ</div>
155
+ </div>
156
+ <div style="background: white; padding: 1.5rem; border-radius: 12px; text-align: center; box-shadow: 0 4px 15px rgba(102, 126, 234, 0.2); border-left: 4px solid #667eea;">
157
+ <div style="font-size: 2.5rem;">๐Ÿ“Š</div>
158
+ <div style="font-size: 1.8rem; font-weight: bold; color: #667eea;">{stats.get('chars_count', 0)}</div>
159
+ <div style="color: #666; font-size: 0.85rem; margin-top: 0.5rem;">ุญุฑู</div>
160
+ </div>
161
+ <div style="background: white; padding: 1.5rem; border-radius: 12px; text-align: center; box-shadow: 0 4px 15px rgba(102, 126, 234, 0.2); border-left: 4px solid #667eea;">
162
+ <div style="font-size: 2.5rem;">โœจ</div>
163
+ <div style="font-size: 1.8rem; font-weight: bold; color: #667eea;">{stats.get('diacritics_count', 0)}</div>
164
+ <div style="color: #666; font-size: 0.85rem; margin-top: 0.5rem;">ุนู„ุงู…ุฉ</div>
165
+ </div>
166
+ <div style="background: white; padding: 1.5rem; border-radius: 12px; text-align: center; box-shadow: 0 4px 15px rgba(102, 126, 234, 0.2); border-left: 4px solid #667eea;">
167
+ <div style="font-size: 2.5rem;">๐Ÿš€</div>
168
+ <div style="font-size: 1.8rem; font-weight: bold; color: #667eea;">{stats.get('speed', 0)}</div>
169
+ <div style="color: #666; font-size: 0.85rem; margin-top: 0.5rem;">ูƒู„ู…ุฉ/ุซุง</div>
170
+ </div>
171
+ <div style="background: white; padding: 1.5rem; border-radius: 12px; text-align: center; box-shadow: 0 4px 15px rgba(102, 126, 234, 0.2); border-left: 4px solid #667eea;">
172
+ <div style="font-size: 2.5rem;">๐ŸŽฏ</div>
173
+ <div style="font-size: 1.8rem; font-weight: bold; color: #667eea;">98%+</div>
174
+ <div style="color: #666; font-size: 0.85rem; margin-top: 0.5rem;">ุงู„ุฏู‚ุฉ</div>
175
+ </div>
176
+ </div>
177
+ </div>
178
+ """
179
+
180
+ output = comparison_html
181
+ output += f"""
182
+ <div style="margin-top: 1.5rem; padding: 1.5rem; background: #f8f9fa; border-radius: 12px; font-size: 1.2rem; line-height: 2.2; border-right: 4px solid #28a745; direction: rtl; text-align: right;">
183
+ {highlighted_result}
184
+ </div>
185
+ {stats_html}
186
+ """
187
+
188
+ return output
189
+
190
+ with gr.Blocks(
191
+ title="๐ŸŽฏ ู…ูุดูŽูƒูู‘ู„ (ุงู„ุฏู‚ูŠู‚ 98%)",
192
+ theme=gr.themes.Soft(),
193
+ css="""
194
+ body { font-family: 'Arial', sans-serif; }
195
+ .gradio-container { direction: rtl; }
196
+ """
197
+ ) as demo:
198
+
199
+ gr.Markdown("""
200
+ # ๐Ÿš€ ู…ูุดูŽูƒูู‘ู„ ุงู„ู†ุตูˆุต (ุงู„ู†ู…ูˆุฐุฌ ุงู„ุฏู‚ูŠู‚ 98%+)
201
+ <p style='direction: rtl; color: #e74c3c; font-weight: bold;'>
202
+ โš ๏ธ ุชู†ุจูŠู‡: ู‡ุฐุง ุงู„ู†ู…ูˆุฐุฌ ู‡ูˆ ุงู„ุฃุฏู‚ุŒ ูˆู„ูƒู†ู‡ ุจุทูŠุก ุฌุฏุงู‹ (20-30 ุซุงู†ูŠุฉ) ู„ุฃู†ู‡ ูŠุนู…ู„ ุนู„ู‰ ุณูŠุฑูุฑ ู…ุฌุงู†ูŠ.
203
+ </p>
204
+ """)
205
+
206
+ clean_text_state = gr.State(None)
207
+ result_text_state = gr.State(None)
208
+ stats_state = gr.State({})
209
+
210
+ with gr.Row():
211
+ with gr.Column(scale=2):
212
+ input_text = gr.Textbox(
213
+ label="ุงู„ู†ุต",
214
+ placeholder="ุฃุฏุฎู„ ุงู„ู†ุต ุงู„ุนุฑุจูŠ ู‡ู†ุง (ู…ุดูƒูˆู„ ุฃูˆ ุจุฏูˆู† ุชุดูƒูŠู„)...",
215
+ lines=10,
216
+ max_lines=20
217
+ )
218
+
219
+ with gr.Row():
220
+ show_comparison = gr.Checkbox(label="๐Ÿ”„ ู…ู‚ุงุฑู†ุฉ ุงู„ู†ุตูŠู†", value=False)
221
+ highlight_diacritics = gr.Checkbox(label="๐ŸŽจ ุชู„ูˆูŠู† ุงู„ุญุฑูƒุงุช", value=False)
222
+
223
+ submit_btn = gr.Button("โœจ ุฅุถุงูุฉ ุงู„ุชุดูƒูŠู„", variant="primary", size="lg")
224
+
225
+ output_html = gr.HTML()
226
+ status = gr.Textbox(label="ุงู„ุญุงู„ุฉ", interactive=False)
227
+
228
+ gr.Examples(
229
+ [
230
+ ["ุงู„ุณู„ุงู… ุนู„ูŠูƒู… ูˆุฑุญู…ุฉ ุงู„ู„ู‡ ูˆุจุฑูƒุงุชู‡"],
231
+ ["ุงู„ู„ุบุฉ ุงู„ุนุฑุจูŠุฉ ู„ุบุฉ ุงู„ู‚ุฑุงู† ุงู„ูƒุฑูŠู…"],
232
+ ],
233
+ inputs=input_text,
234
+ label="ุฃู…ุซู„ุฉ ุณุฑูŠุนุฉ"
235
+ )
236
+
237
+ def show_loading():
238
+ return LOADING_HTML, "โณ ุฌุงุฑูŠ ุงู„ุชุดูƒูŠู„..."
239
+
240
+ render_inputs = [
241
+ clean_text_state,
242
+ result_text_state,
243
+ stats_state,
244
+ show_comparison,
245
+ highlight_diacritics
246
+ ]
247
+
248
+ submit_btn.click(
249
+ fn=show_loading,
250
+ inputs=None,
251
+ outputs=[output_html, status]
252
+ ).then(
253
+ fn=run_model,
254
+ inputs=[input_text],
255
+ outputs=[clean_text_state, result_text_state, stats_state, status]
256
+ ).then(
257
+ fn=generate_final_html,
258
+ inputs=render_inputs,
259
+ outputs=[output_html]
260
+ )
261
+
262
+ show_comparison.change(
263
+ fn=generate_final_html,
264
+ inputs=render_inputs,
265
+ outputs=[output_html]
266
+ )
267
+
268
+ highlight_diacritics.change(
269
+ fn=generate_final_html,
270
+ inputs=render_inputs,
271
+ outputs=[output_html]
272
+ )
273
+
274
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio
2
+ torch
3
+ transformers