tasal9 commited on
Commit
79ecc0a
Β·
verified Β·
1 Parent(s): 278788c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +97 -177
app.py CHANGED
@@ -6,7 +6,7 @@ from datetime import datetime
6
  from datasets import load_dataset
7
  import pandas as pd
8
 
9
- # Global state to track training/fine-tuning status
10
  class TrainingState:
11
  def __init__(self):
12
  self.status = "idle"
@@ -21,245 +21,165 @@ class TrainingState:
21
 
22
  def load_dataset(self):
23
  try:
24
- self.logs.append(f"⏳ Loading dataset: tasal9/ZamAi-Pashto-Datasets-V2")
25
  dataset = load_dataset("tasal9/ZamAi-Pashto-Datasets-V2")
26
  self.dataset_loaded = True
27
- self.dataset_info = f"βœ… Dataset loaded successfully!\n- Name: ZamAi-Pashto-Datasets-V2\n- Size: {len(dataset['train'])} examples"
28
-
29
- # Create sample preview
30
- sample_data = dataset['train'].select(range(5))
31
- self.dataset_sample = pd.DataFrame(sample_data)
32
-
33
- self.logs.append(f"πŸ“Š Dataset loaded: {len(dataset['train'])} Pashto examples")
34
  return True
35
  except Exception as e:
36
- self.logs.append(f"❌ Dataset loading failed: {str(e)}")
37
- self.dataset_info = f"Error loading dataset: {str(e)}"
38
  return False
39
 
40
- def start_training(self, data_size):
41
  self.status = "training"
42
  self.progress = 0
43
  self.logs = [f"πŸ‹οΈ Training started at {datetime.now().strftime('%H:%M:%S')}"]
44
- self.logs.append(f"πŸ“ Training data size: {data_size} characters")
45
  self.start_time = time.time()
46
-
47
- def start_finetuning(self, data_size):
48
  self.status = "fine-tuning"
49
  self.progress = 0
50
  self.logs = [f"🎯 Fine-tuning started at {datetime.now().strftime('%H:%M:%S')}"]
51
- self.logs.append(f"πŸ“ Fine-tuning data size: {data_size} characters")
52
  self.start_time = time.time()
53
-
54
  def update_progress(self, progress):
55
  self.progress = min(100, max(0, progress))
56
- if progress >= 100 and self.status != "idle":
57
  self.complete_process()
58
-
59
- def add_log(self, message):
60
- self.logs.append(f"[{datetime.now().strftime('%H:%M:%S')}] {message}")
61
- if len(self.logs) > 15: # Keep only last 15 logs
62
  self.logs.pop(0)
63
-
64
  def complete_process(self):
65
  elapsed = time.time() - self.start_time
66
- self.add_log(f"🏁 {self.status.capitalize()} completed in {elapsed:.1f} seconds!")
67
  self.status = "idle"
68
  self.progress = 100
69
-
70
  def get_status(self):
71
- status_map = {
72
  "idle": "βœ… Ready",
73
- "training": "πŸ‹οΈ Training in progress",
74
- "fine-tuning": "🎯 Fine-tuning in progress"
75
  }
76
- return status_map.get(self.status, "❓ Unknown status")
77
 
78
- # Create global state
79
  state = TrainingState()
80
 
81
- def test_model(input_text):
82
- """Enhanced test function with response variations"""
83
- if not input_text.strip():
84
- return "Please enter some text to test."
85
-
86
- responses = [
87
- f"Processed: '{input_text}'",
88
- f"Model response to: {input_text}",
89
- f"Analysis: This appears to be Pashto text with {len(input_text)} characters",
90
- f"βœ… Received: {input_text}",
91
- f"Generated continuation: {input_text}... [simulated output]",
92
- f"Pashto analysis: Text contains {len(input_text.split())} words",
93
- f"πŸ” Detected language: Pashto (confidence: 95%)"
94
  ]
95
- return random.choice(responses)
96
 
97
  def simulate_process(duration, process_type, data_size):
98
- """Simulate long-running training/fine-tuning process"""
99
  if process_type == "train":
100
  state.start_training(data_size)
101
  else:
102
  state.start_finetuning(data_size)
103
-
104
  steps = 10
105
  for i in range(steps + 1):
106
  time.sleep(duration / steps)
107
- progress = int((i / steps) * 100)
108
- state.update_progress(progress)
109
-
110
- # Add simulated log messages
111
  if i % 3 == 0:
112
- messages = [
113
- f"Processing batch {i*5}/{steps*5}",
114
- f"Loss: {random.uniform(0.1, 1.0):.4f}",
115
- f"Accuracy: {random.uniform(80, 95):.1f}%",
116
- f"Learning rate: {random.uniform(1e-5, 1e-3):.6f}",
117
- f"Pashto token accuracy: {random.uniform(85, 98):.1f}%",
118
- f"GPU utilization: {random.randint(70, 95)}%"
119
- ]
120
- state.add_log(random.choice(messages))
121
-
122
  state.complete_process()
123
 
124
- def train_model(dataset_text):
125
- """Training function with simulated processing"""
126
- if not dataset_text.strip():
127
- return "Please provide training data.", ""
128
-
129
- # Validate dataset requirements
130
  if not state.dataset_loaded:
131
- return "Please load the Pashto dataset first using the 'Load Dataset' button.", ""
132
-
133
- data_size = len(dataset_text)
134
  if state.status != "idle":
135
- return "Another process is already running. Please wait.", ""
136
-
137
- # Start simulation in background thread
138
- threading.Thread(
139
- target=simulate_process,
140
- args=(15, "train", data_size),
141
- daemon=True
142
- ).start()
143
-
144
- return "Training started successfully! Check status in the Status tab.", ""
145
 
146
- def finetune_model(dataset_text):
147
- """Fine-tuning function with simulated processing"""
148
- if not dataset_text.strip():
149
- return "Please provide fine-tuning data.", ""
150
-
151
- # Validate dataset requirements
152
  if not state.dataset_loaded:
153
- return "Please load the Pashto dataset first using the 'Load Dataset' button.", ""
154
-
155
- data_size = len(dataset_text)
156
  if state.status != "idle":
157
- return "Another process is already running. Please wait.", ""
158
-
159
- # Start simulation in background thread
160
- threading.Thread(
161
- target=simulate_process,
162
- args=(10, "fine-tune", data_size),
163
- daemon=True
164
- ).start()
165
-
166
- return "Fine-tuning started successfully! Check status in the Status tab.", ""
167
 
168
  def load_hf_dataset():
169
- """Load dataset from Hugging Face Hub"""
170
- success = state.load_dataset()
171
- if success:
172
- return {
173
- dataset_status: state.dataset_info,
174
- dataset_preview: state.dataset_sample,
175
- dataset_btn: "Dataset Loaded βœ…"
176
- }
177
  return {
178
  dataset_status: state.dataset_info,
179
- dataset_preview: pd.DataFrame(),
180
- dataset_btn: "Retry Loading Dataset"
181
  }
182
 
183
  def get_current_status():
184
- """Get current system status"""
185
- status_text = state.get_status()
186
-
187
- # Add progress information
188
- if state.status != "idle":
189
- status_text += f" - {state.progress}% complete"
190
-
191
- # Format logs
192
- logs = "\n".join(state.logs) if state.logs else "No logs available"
193
-
194
  return {
195
- status_box: status_text,
196
- progress_bar: state.progress / 100, # Progress expects a value between 0 and 1
197
- log_output: logs
198
  }
199
 
200
- # Create interface
201
- with gr.Blocks(title="Pashto-Base-Bloom Trainer", theme="soft") as demo:
202
- gr.Markdown("# 🌸 Pashto-Base-Bloom Training Space")
203
- gr.Markdown("Train and fine-tune Pashto language model tasal9/pashto-base-bloom")
204
-
205
- with gr.Tab("Dataset"):
206
- gr.Markdown("### Load Pashto Dataset")
207
- gr.Markdown("Dataset: [tasal9/ZamAi-Pashto-Datasets-V2](https://huggingface.co/datasets/tasal9/ZamAi-Pashto-Datasets-V2)")
208
  with gr.Row():
209
- dataset_btn = gr.Button("Load Dataset", variant="primary")
210
- dataset_status = gr.Textbox(label="Dataset Status", lines=3, interactive=False)
211
- dataset_preview = gr.DataFrame(label="Dataset Preview (First 5 Examples)", interactive=False)
212
  dataset_btn.click(load_hf_dataset, outputs=[dataset_status, dataset_preview, dataset_btn])
213
-
214
- with gr.Tab("Test Model"):
215
- gr.Markdown("### Test Model with Sample Text")
216
  with gr.Row():
217
- with gr.Column():
218
- test_input = gr.Textbox(label="Input Text", lines=3, placeholder="Enter Pashto text here...")
219
- test_btn = gr.Button("Run Test", variant="primary")
220
- test_output = gr.Textbox(label="Model Output", lines=4, interactive=False)
221
  test_btn.click(test_model, inputs=test_input, outputs=test_output)
222
-
223
- with gr.Tab("Train Model"):
224
- gr.Markdown("### Train Model with New Data")
225
- gr.Markdown("Note: Requires loaded Pashto dataset")
226
- with gr.Row():
227
- with gr.Column():
228
- train_input = gr.Textbox(label="Training Data", lines=8, placeholder="Paste additional training data here...")
229
- train_btn = gr.Button("Start Training", variant="primary")
230
- train_output = gr.Textbox(label="Training Status", lines=2, interactive=False)
231
  train_btn.click(train_model, inputs=train_input, outputs=train_output)
232
-
233
- with gr.Tab("Fine-tune Model"):
234
- gr.Markdown("### Fine-tune Model with Specialized Data")
235
- gr.Markdown("Note: Requires loaded Pashto dataset")
236
- with gr.Row():
237
- with gr.Column():
238
- finetune_input = gr.Textbox(label="Fine-tuning Data", lines=8, placeholder="Paste fine-tuning dataset here...")
239
- finetune_btn = gr.Button("Start Fine-tuning", variant="primary")
240
- finetune_output = gr.Textbox(label="Fine-tuning Status", lines=2, interactive=False)
241
  finetune_btn.click(finetune_model, inputs=finetune_input, outputs=finetune_output)
242
-
243
- with gr.Tab("Status"):
244
- gr.Markdown("### System Status")
245
  with gr.Row():
246
- with gr.Column():
247
- status_box = gr.Textbox(label="Current Status", interactive=False)
248
- # CORRECTED: Using gr.Progress() instead of gr.ProgressBar()
249
- progress_bar = gr.Progress()
250
- refresh_btn = gr.Button("Refresh Status", variant="secondary")
251
- auto_refresh = gr.Checkbox(label="Auto-refresh every 5 seconds", value=True)
252
- log_output = gr.Textbox(label="Process Logs", lines=10, interactive=False)
253
-
254
- # Auto-refresh component
255
- auto_refresh_component = gr.Interval(5, interactive=False)
256
-
257
- with gr.Blocks() as demo:
258
- out = gr.Textbox()
259
- def update():
260
- return "Auto refreshed."
261
 
262
- with gr.Blocks() as demo:
263
- out = gr.Textbox()
264
  if __name__ == "__main__":
265
  demo.launch(share=True)
 
6
  from datasets import load_dataset
7
  import pandas as pd
8
 
9
+ # Global state
10
  class TrainingState:
11
  def __init__(self):
12
  self.status = "idle"
 
21
 
22
  def load_dataset(self):
23
  try:
24
+ self.logs.append("⏳ Loading dataset: tasal9/ZamAi-Pashto-Datasets-V2")
25
  dataset = load_dataset("tasal9/ZamAi-Pashto-Datasets-V2")
26
  self.dataset_loaded = True
27
+ self.dataset_info = f"βœ… Dataset loaded!\nName: ZamAi-Pashto-Datasets-V2\nSize: {len(dataset['train'])} examples"
28
+ self.dataset_sample = pd.DataFrame(dataset['train'].select(range(5)))
29
+ self.logs.append(f"πŸ“Š {len(dataset['train'])} Pashto examples loaded")
 
 
 
 
30
  return True
31
  except Exception as e:
32
+ self.logs.append(f"❌ Error loading dataset: {str(e)}")
33
+ self.dataset_info = f"Error: {str(e)}"
34
  return False
35
 
36
+ def start_training(self, size):
37
  self.status = "training"
38
  self.progress = 0
39
  self.logs = [f"πŸ‹οΈ Training started at {datetime.now().strftime('%H:%M:%S')}"]
40
+ self.logs.append(f"πŸ“ Data size: {size} characters")
41
  self.start_time = time.time()
42
+
43
+ def start_finetuning(self, size):
44
  self.status = "fine-tuning"
45
  self.progress = 0
46
  self.logs = [f"🎯 Fine-tuning started at {datetime.now().strftime('%H:%M:%S')}"]
47
+ self.logs.append(f"πŸ“ Data size: {size} characters")
48
  self.start_time = time.time()
49
+
50
  def update_progress(self, progress):
51
  self.progress = min(100, max(0, progress))
52
+ if progress >= 100:
53
  self.complete_process()
54
+
55
+ def add_log(self, msg):
56
+ self.logs.append(f"[{datetime.now().strftime('%H:%M:%S')}] {msg}")
57
+ if len(self.logs) > 15:
58
  self.logs.pop(0)
59
+
60
  def complete_process(self):
61
  elapsed = time.time() - self.start_time
62
+ self.add_log(f"🏁 {self.status.capitalize()} completed in {elapsed:.1f}s")
63
  self.status = "idle"
64
  self.progress = 100
65
+
66
  def get_status(self):
67
+ m = {
68
  "idle": "βœ… Ready",
69
+ "training": "πŸ‹οΈ Training",
70
+ "fine-tuning": "🎯 Fine-tuning"
71
  }
72
+ return m.get(self.status, "❓ Unknown") + (f" - {self.progress}%" if self.status != "idle" else "")
73
 
 
74
  state = TrainingState()
75
 
76
+ def test_model(text):
77
+ if not text.strip():
78
+ return "❗ Enter text to test."
79
+ options = [
80
+ f"Processed: '{text}'",
81
+ f"Model response to: {text}",
82
+ f"Pashto analysis: {len(text)} characters",
83
+ f"βœ… Got it: {text}",
84
+ f"Generated: {text}... [simulated]",
85
+ f"πŸ” Words: {len(text.split())}"
 
 
 
86
  ]
87
+ return random.choice(options)
88
 
89
  def simulate_process(duration, process_type, data_size):
 
90
  if process_type == "train":
91
  state.start_training(data_size)
92
  else:
93
  state.start_finetuning(data_size)
 
94
  steps = 10
95
  for i in range(steps + 1):
96
  time.sleep(duration / steps)
97
+ state.update_progress(int((i / steps) * 100))
 
 
 
98
  if i % 3 == 0:
99
+ state.add_log(random.choice([
100
+ f"Batch {i}/{steps}",
101
+ f"Loss: {random.uniform(0.1, 1.0):.3f}",
102
+ f"LR: {random.uniform(1e-5, 1e-3):.6f}",
103
+ f"GPU: {random.randint(60, 95)}% (sim)",
104
+ ]))
 
 
 
 
105
  state.complete_process()
106
 
107
+ def train_model(text):
108
+ if not text.strip():
109
+ return "❌ Add training data.", ""
 
 
 
110
  if not state.dataset_loaded:
111
+ return "❌ Load dataset first.", ""
 
 
112
  if state.status != "idle":
113
+ return "⏳ Wait for current process.", ""
114
+ threading.Thread(target=simulate_process, args=(15, "train", len(text)), daemon=True).start()
115
+ return "βœ… Training started", ""
 
 
 
 
 
 
 
116
 
117
+ def finetune_model(text):
118
+ if not text.strip():
119
+ return "❌ Add fine-tuning data.", ""
 
 
 
120
  if not state.dataset_loaded:
121
+ return "❌ Load dataset first.", ""
 
 
122
  if state.status != "idle":
123
+ return "⏳ Wait for current process.", ""
124
+ threading.Thread(target=simulate_process, args=(10, "fine-tune", len(text)), daemon=True).start()
125
+ return "βœ… Fine-tuning started", ""
 
 
 
 
 
 
 
126
 
127
  def load_hf_dataset():
128
+ ok = state.load_dataset()
 
 
 
 
 
 
 
129
  return {
130
  dataset_status: state.dataset_info,
131
+ dataset_preview: state.dataset_sample if ok else pd.DataFrame(),
132
+ dataset_btn: "βœ… Loaded" if ok else "Retry"
133
  }
134
 
135
  def get_current_status():
 
 
 
 
 
 
 
 
 
 
136
  return {
137
+ status_box: state.get_status(),
138
+ progress_bar: state.progress / 100,
139
+ log_output: "\n".join(state.logs) if state.logs else "No logs yet"
140
  }
141
 
142
+ with gr.Blocks(title="Pashto Base Bloom Trainer", theme="soft") as demo:
143
+ gr.Markdown("# 🌸 Pashto-Base-Bloom Trainer")
144
+ gr.Markdown("Train & fine-tune Pashto model: `tasal9/pashto-base-bloom`")
145
+
146
+ with gr.Tab("πŸ“‚ Dataset"):
147
+ gr.Markdown("### Load Dataset from Hugging Face")
 
 
148
  with gr.Row():
149
+ dataset_btn = gr.Button("Load Dataset")
150
+ dataset_status = gr.Textbox(label="Status", lines=2, interactive=False)
151
+ dataset_preview = gr.DataFrame(label="Sample Preview", interactive=False)
152
  dataset_btn.click(load_hf_dataset, outputs=[dataset_status, dataset_preview, dataset_btn])
153
+
154
+ with gr.Tab("πŸ§ͺ Test Model"):
 
155
  with gr.Row():
156
+ test_input = gr.Textbox(label="Input", lines=3)
157
+ test_btn = gr.Button("Test")
158
+ test_output = gr.Textbox(label="Output", lines=3, interactive=False)
 
159
  test_btn.click(test_model, inputs=test_input, outputs=test_output)
160
+
161
+ with gr.Tab("πŸ‹οΈ Train"):
162
+ train_input = gr.Textbox(label="Training Data", lines=6)
163
+ train_btn = gr.Button("Start Training")
164
+ train_output = gr.Textbox(label="Status", lines=2, interactive=False)
 
 
 
 
165
  train_btn.click(train_model, inputs=train_input, outputs=train_output)
166
+
167
+ with gr.Tab("🎯 Fine-tune"):
168
+ finetune_input = gr.Textbox(label="Fine-tuning Data", lines=6)
169
+ finetune_btn = gr.Button("Start Fine-tuning")
170
+ finetune_output = gr.Textbox(label="Status", lines=2, interactive=False)
 
 
 
 
171
  finetune_btn.click(finetune_model, inputs=finetune_input, outputs=finetune_output)
172
+
173
+ with gr.Tab("πŸ“Š Status"):
 
174
  with gr.Row():
175
+ status_box = gr.Textbox(label="Current Status", interactive=False)
176
+ progress_bar = gr.Slider(minimum=0, maximum=1, value=0, step=0.01, interactive=False, label="Progress")
177
+ log_output = gr.Textbox(label="Logs", lines=10, interactive=False)
178
+ refresh_btn = gr.Button("πŸ”„ Refresh")
179
+ auto_refresh = gr.Checkbox(label="Auto-refresh every 5s", value=True)
180
+ refresh_btn.click(get_current_status, outputs=[status_box, progress_bar, log_output])
181
+ auto_refresh_component = gr.Interval(5, visible=True)
182
+ auto_refresh_component.click(get_current_status, outputs=[status_box, progress_bar, log_output], every=5)
 
 
 
 
 
 
 
183
 
 
 
184
  if __name__ == "__main__":
185
  demo.launch(share=True)