Spaces:

royeis
/

reasoning_loading_bar

Sleeping

App Files Files Community

royeis commited on Jun 18, 2025

Commit

3903517

1 Parent(s): c47cc24

extract generate_with_updates to global scope

Browse files

Files changed (1) hide show

app.py +247 -219

app.py CHANGED Viewed

@@ -367,6 +367,224 @@ def initialize_app():
 # Load model automatically when script starts
 initialize_app()
 # Create the Gradio interface
 def create_interface():
     # Create custom theme with light green progress bars
@@ -577,237 +795,47 @@ def create_interface():
         baseline_tokens_queue = queue.Queue()
         stop_generation = threading.Event()
-        def baseline_progress_updater(prog_value):
-            """Update the baseline progress via the queue"""
-            baseline_progress_queue.put(prog_value)
-        def baseline_tokens_updater(text, token_count):
-            """Update the baseline generated text via the queue"""
-            global baseline_think_tag_detected, baseline_progress_frozen, baseline_pre_think_content, baseline_post_think_content
-            # Check if </think> tag appears in the text
-            if not baseline_think_tag_detected and "</think>" in text:
-                baseline_think_tag_detected = True
-                baseline_progress_frozen = True
-                # Split content at </think>
-                parts = text.split("</think>", 1)
-                baseline_pre_think_content = parts[0] + "</think>"
-                baseline_post_think_content = parts[1] if len(parts) > 1 else ""
-                # Signal content split with token count
-                baseline_tokens_queue.put(("THINK_TAG_DETECTED", baseline_pre_think_content, baseline_post_think_content, token_count))
-            elif baseline_think_tag_detected:
-                # Update post-think content
-                if "</think>" in text:
-                    parts = text.split("</think>", 1)
-                    baseline_post_think_content = parts[1] if len(parts) > 1 else ""
-                    baseline_tokens_queue.put(("POST_THINK_UPDATE", baseline_post_think_content))
-                else:
-                    baseline_tokens_queue.put(("NORMAL_UPDATE", text))
-            else:
-                # Normal pre-think streaming with token count
-                baseline_tokens_queue.put(("NORMAL_UPDATE", text, token_count))
         def stop_generation_fn():
             """Stop the generation process"""
             stop_generation.set()
             return "Generation stopped"
-        def reset_ui():
-            """Reset the UI elements for a new generation"""
-            global baseline_think_tag_detected, baseline_progress_frozen
-            global baseline_pre_think_content, baseline_post_think_content
-            # Reset progress tracking for monotonic behavior
-            reset_progress_tracking()
-            baseline_think_tag_detected = False
-            baseline_progress_frozen = False
-            baseline_pre_think_content = ""
-            baseline_post_think_content = ""
-            stop_generation.clear()
-            # Clear all queues
-            while not baseline_progress_queue.empty():
-                baseline_progress_queue.get()
-            while not baseline_tokens_queue.empty():
-                baseline_tokens_queue.get()
-            return {
-                generation_status: "**Starting generation...**",
-                baseline_progress_bar: 0,
-                baseline_thinking_output: "",
-                baseline_answer_output: "",
-                baseline_tokens_count: "",
-                generate_btn: gr.Button("Generating...", variant="secondary", interactive=False),
-                stop_btn: gr.Button("Stop", variant="stop", interactive=True)
-            }
-        @spaces.GPU(duration=240)
-        def generate_with_updates(prompt):
-            """Wrapper around generation function that handles real-time updates"""
-            # Check if model is loaded
-            if not model_loaded_successfully:
-                yield {
-                    generation_status: f"**Cannot generate: {model_loading_error}**"
-                }
-                return
-            # Use default values
-            max_tokens = 2048
-            # Reset UI first
-            yield reset_ui()
-            # Start generation in a separate thread to allow for UI updates
-            baseline_result = ""
-            baseline_token_count = 0
-            generation_error = None
-            generation_thread = None
-            def run_generation():
-                nonlocal baseline_result, baseline_token_count, generation_error
-                try:
-                    # Baseline-only generation
-                    baseline_result, baseline_token_count = generate_baseline_only(
-                        prompt=prompt,
-                        max_new_tokens=max_tokens,
-                        baseline_progress_callback=baseline_progress_updater,
-                        baseline_tokens_callback=baseline_tokens_updater,
-                        stop_event=stop_generation
                     )
-                except Exception as e:
-                    generation_error = str(e)
-            # Start the generation thread
-            generation_thread = threading.Thread(target=run_generation)
-            generation_thread.start()
-            # Monitor queues for updates while generation is running
-            baseline_current_text = ""
-            baseline_thinking_tokens = 0
-            baseline_last_progress = 0
-            while generation_thread.is_alive() or not baseline_tokens_queue.empty() or not baseline_progress_queue.empty():
-                updates = {}
-                # Check baseline tokens queue
-                try:
-                    while not baseline_tokens_queue.empty():
-                        token_update = baseline_tokens_queue.get_nowait()
-                        if isinstance(token_update, tuple):
-                            update_type = token_update[0]
-                            if update_type == "THINK_TAG_DETECTED":
-                                # </think> tag detected - split content
-                                pre_content = token_update[1]
-                                post_content = token_update[2]
-                                thinking_token_count = token_update[3]
-                                updates[baseline_thinking_output] = pre_content
-                                updates[baseline_answer_output] = post_content
-                                updates[baseline_progress_bar] = 100.0  # Freeze at 100%
-                                # Use actual token count (before </think>)
-                                baseline_thinking_tokens = thinking_token_count
-                                updates[baseline_tokens_count] = f"{baseline_thinking_tokens}"
-                            elif update_type == "POST_THINK_UPDATE":
-                                # Update only the final answer
-                                post_content = token_update[1]
-                                updates[baseline_answer_output] = post_content
-                                # Don't update token count - frozen at thinking tokens
-                            elif update_type == "NORMAL_UPDATE":
-                                # Normal text update
-                                baseline_current_text = token_update[1]
-                                if not baseline_think_tag_detected:
-                                    updates[baseline_thinking_output] = baseline_current_text
-                                    # Update thinking token count with actual token count if available
-                                    if len(token_update) > 2:
-                                        baseline_thinking_tokens = token_update[2]
-                                    else:
-                                        # Fallback to word count for backward compatibility
-                                        baseline_thinking_tokens = len(baseline_current_text.split())
-                                    updates[baseline_tokens_count] = f"{baseline_thinking_tokens}"
-                                else:
-                                    # This shouldn't happen, but handle it gracefully
-                                    updates[baseline_answer_output] = baseline_current_text
-                        else:
-                            # Backward compatibility - treat as normal text
-                            baseline_current_text = token_update
-                            updates[baseline_thinking_output] = baseline_current_text
-                            if not baseline_think_tag_detected:
-                                baseline_thinking_tokens = len(baseline_current_text.split())
-                                updates[baseline_tokens_count] = f"{baseline_thinking_tokens}"
-                except queue.Empty:
-                    pass
-                # Check baseline progress queue
-                try:
-                    while not baseline_progress_queue.empty():
-                        baseline_last_progress = baseline_progress_queue.get_nowait()
-                        updates[baseline_progress_bar] = baseline_last_progress
-                except queue.Empty:
-                    pass
-                # If there are any updates, yield them
-                if updates:
-                    yield updates
-                # Sleep briefly to prevent excessive CPU usage
-                time.sleep(0.05)
-            # Final update
-            final_updates = {
-                generation_status: "**Generation complete!**" if not generation_error else f"**Error: {generation_error}**",
-                baseline_progress_bar: 100,
-                generate_btn: gr.Button("Generate", variant="primary", interactive=True),
-                stop_btn: gr.Button("Stop", variant="stop", interactive=True)
-            }
-            if not generation_error:
-                # Handle baseline final display
-                if baseline_think_tag_detected:
-                    # Split result for final display
-                    if "</think>" in baseline_result:
-                        parts = baseline_result.split("</think>", 1)
-                        final_updates[baseline_thinking_output] = parts[0] + "</think>"
-                        final_updates[baseline_answer_output] = parts[1] if len(parts) > 1 else ""
-                        # Use actual token count from generation
-                        if baseline_thinking_tokens > 0:
-                            final_updates[baseline_tokens_count] = f"{baseline_thinking_tokens}"
-                        else:
-                            # Fallback: use actual token count for thinking part
-                            thinking_text = parts[0] + "</think>"
-                            thinking_token_count = len(tokenizer.encode(thinking_text, add_special_tokens=False))
-                            final_updates[baseline_tokens_count] = f"{thinking_token_count}"
-                    else:
-                        final_updates[baseline_thinking_output] = baseline_result
-                        # Use actual token count
-                        if baseline_thinking_tokens > 0:
-                            final_updates[baseline_tokens_count] = f"{baseline_thinking_tokens}"
-                        else:
-                            total_token_count = len(tokenizer.encode(baseline_result, add_special_tokens=False))
-                            final_updates[baseline_tokens_count] = f"{total_token_count}"
-                else:
-                    final_updates[baseline_thinking_output] = baseline_result
-                    # Use actual token count
-                    if baseline_thinking_tokens > 0:
-                        final_updates[baseline_tokens_count] = f"{baseline_thinking_tokens}"
-                    else:
-                        total_token_count = len(tokenizer.encode(baseline_result, add_special_tokens=False))
-                        final_updates[baseline_tokens_count] = f"{total_token_count}"
-            yield final_updates
         # Connect the buttons to the handlers
         if model_loaded_successfully:
             generate_btn.click(
-                generate_with_updates,
                 inputs=[prompt],
                 outputs=[
                     generation_status,

 # Load model automatically when script starts
 initialize_app()
+# Global function for resetting UI state
+def reset_ui():
+    """Reset the UI elements for a new generation"""
+    global baseline_think_tag_detected, baseline_progress_frozen
+    global baseline_pre_think_content, baseline_post_think_content
+    # Reset progress tracking for monotonic behavior
+    reset_progress_tracking()
+    baseline_think_tag_detected = False
+    baseline_progress_frozen = False
+    baseline_pre_think_content = ""
+    baseline_post_think_content = ""
+    return {
+        "status": "**Starting generation...**",
+        "progress": 0,
+        "thinking": "",
+        "answer": "",
+        "tokens": "",
+        "generate_btn_text": "Generating...",
+        "generate_btn_interactive": False,
+        "stop_btn_interactive": True
+    }
+@spaces.GPU(duration=240)
+def generate_with_updates(prompt, baseline_progress_queue, baseline_tokens_queue, stop_generation):
+    """Wrapper around generation function that handles real-time updates"""
+    # Check if model is loaded
+    if not model_loaded_successfully:
+        yield {
+            "status": f"**Cannot generate: {model_loading_error}**"
+        }
+        return
+    # Use default values
+    max_tokens = 2048
+    # Reset UI first
+    yield reset_ui()
+    # Start generation in a separate thread to allow for UI updates
+    baseline_result = ""
+    baseline_token_count = 0
+    generation_error = None
+    generation_thread = None
+    def baseline_progress_updater(prog_value):
+        """Update the baseline progress via the queue"""
+        baseline_progress_queue.put(prog_value)
+    def baseline_tokens_updater(text, token_count):
+        """Update the baseline generated text via the queue"""
+        global baseline_think_tag_detected, baseline_progress_frozen, baseline_pre_think_content, baseline_post_think_content
+        # Check if </think> tag appears in the text
+        if not baseline_think_tag_detected and "</think>" in text:
+            baseline_think_tag_detected = True
+            baseline_progress_frozen = True
+            # Split content at </think>
+            parts = text.split("</think>", 1)
+            baseline_pre_think_content = parts[0] + "</think>"
+            baseline_post_think_content = parts[1] if len(parts) > 1 else ""
+            # Signal content split with token count
+            baseline_tokens_queue.put(("THINK_TAG_DETECTED", baseline_pre_think_content, baseline_post_think_content, token_count))
+        elif baseline_think_tag_detected:
+            # Update post-think content
+            if "</think>" in text:
+                parts = text.split("</think>", 1)
+                baseline_post_think_content = parts[1] if len(parts) > 1 else ""
+                baseline_tokens_queue.put(("POST_THINK_UPDATE", baseline_post_think_content))
+            else:
+                baseline_tokens_queue.put(("NORMAL_UPDATE", text))
+        else:
+            # Normal pre-think streaming with token count
+            baseline_tokens_queue.put(("NORMAL_UPDATE", text, token_count))
+    def run_generation():
+        nonlocal baseline_result, baseline_token_count, generation_error
+        try:
+            # Baseline-only generation
+            baseline_result, baseline_token_count = generate_baseline_only(
+                prompt=prompt,
+                max_new_tokens=max_tokens,
+                baseline_progress_callback=baseline_progress_updater,
+                baseline_tokens_callback=baseline_tokens_updater,
+                stop_event=stop_generation
+            )
+        except Exception as e:
+            generation_error = str(e)
+    # Start the generation thread
+    generation_thread = threading.Thread(target=run_generation)
+    generation_thread.start()
+    # Monitor queues for updates while generation is running
+    baseline_current_text = ""
+    baseline_thinking_tokens = 0
+    baseline_last_progress = 0
+    while generation_thread.is_alive() or not baseline_tokens_queue.empty() or not baseline_progress_queue.empty():
+        updates = {}
+        # Check baseline tokens queue
+        try:
+            while not baseline_tokens_queue.empty():
+                token_update = baseline_tokens_queue.get_nowait()
+                if isinstance(token_update, tuple):
+                    update_type = token_update[0]
+                    if update_type == "THINK_TAG_DETECTED":
+                        # </think> tag detected - split content
+                        pre_content = token_update[1]
+                        post_content = token_update[2]
+                        thinking_token_count = token_update[3]
+                        updates["thinking"] = pre_content
+                        updates["answer"] = post_content
+                        updates["progress"] = 100.0  # Freeze at 100%
+                        # Use actual token count (before </think>)
+                        baseline_thinking_tokens = thinking_token_count
+                        updates["tokens"] = f"{baseline_thinking_tokens}"
+                    elif update_type == "POST_THINK_UPDATE":
+                        # Update only the final answer
+                        post_content = token_update[1]
+                        updates["answer"] = post_content
+                        # Don't update token count - frozen at thinking tokens
+                    elif update_type == "NORMAL_UPDATE":
+                        # Normal text update
+                        baseline_current_text = token_update[1]
+                        if not baseline_think_tag_detected:
+                            updates["thinking"] = baseline_current_text
+                            # Update thinking token count with actual token count if available
+                            if len(token_update) > 2:
+                                baseline_thinking_tokens = token_update[2]
+                            else:
+                                # Fallback to word count for backward compatibility
+                                baseline_thinking_tokens = len(baseline_current_text.split())
+                            updates["tokens"] = f"{baseline_thinking_tokens}"
+                        else:
+                            # This shouldn't happen, but handle it gracefully
+                            updates["answer"] = baseline_current_text
+                else:
+                    # Backward compatibility - treat as normal text
+                    baseline_current_text = token_update
+                    updates["thinking"] = baseline_current_text
+                    if not baseline_think_tag_detected:
+                        baseline_thinking_tokens = len(baseline_current_text.split())
+                        updates["tokens"] = f"{baseline_thinking_tokens}"
+        except queue.Empty:
+            pass
+        # Check baseline progress queue
+        try:
+            while not baseline_progress_queue.empty():
+                baseline_last_progress = baseline_progress_queue.get_nowait()
+                updates["progress"] = baseline_last_progress
+        except queue.Empty:
+            pass
+        # If there are any updates, yield them
+        if updates:
+            yield updates
+        # Sleep briefly to prevent excessive CPU usage
+        time.sleep(0.05)
+    # Final update
+    final_updates = {
+        "status": "**Generation complete!**" if not generation_error else f"**Error: {generation_error}**",
+        "progress": 100,
+        "generate_btn_text": "Generate",
+        "generate_btn_interactive": True,
+        "stop_btn_interactive": True
+    }
+    if not generation_error:
+        # Handle baseline final display
+        if baseline_think_tag_detected:
+            # Split result for final display
+            if "</think>" in baseline_result:
+                parts = baseline_result.split("</think>", 1)
+                final_updates["thinking"] = parts[0] + "</think>"
+                final_updates["answer"] = parts[1] if len(parts) > 1 else ""
+                # Use actual token count from generation
+                if baseline_thinking_tokens > 0:
+                    final_updates["tokens"] = f"{baseline_thinking_tokens}"
+                else:
+                    # Fallback: use actual token count for thinking part
+                    thinking_text = parts[0] + "</think>"
+                    thinking_token_count = len(tokenizer.encode(thinking_text, add_special_tokens=False))
+                    final_updates["tokens"] = f"{thinking_token_count}"
+            else:
+                final_updates["thinking"] = baseline_result
+                # Use actual token count
+                if baseline_thinking_tokens > 0:
+                    final_updates["tokens"] = f"{baseline_thinking_tokens}"
+                else:
+                    total_token_count = len(tokenizer.encode(baseline_result, add_special_tokens=False))
+                    final_updates["tokens"] = f"{total_token_count}"
+        else:
+            final_updates["thinking"] = baseline_result
+            # Use actual token count
+            if baseline_thinking_tokens > 0:
+                final_updates["tokens"] = f"{baseline_thinking_tokens}"
+            else:
+                total_token_count = len(tokenizer.encode(baseline_result, add_special_tokens=False))
+                final_updates["tokens"] = f"{total_token_count}"
+    yield final_updates
 # Create the Gradio interface
 def create_interface():
     # Create custom theme with light green progress bars
         baseline_tokens_queue = queue.Queue()
         stop_generation = threading.Event()
         def stop_generation_fn():
             """Stop the generation process"""
             stop_generation.set()
             return "Generation stopped"
+        def generate_wrapper(prompt):
+            """Wrapper to adapt the global generate_with_updates function for Gradio"""
+            # Process updates from the global function and map to UI components
+            for update_dict in generate_with_updates(prompt, baseline_progress_queue, baseline_tokens_queue, stop_generation):
+                gradio_updates = {}
+                # Map the string keys to actual Gradio components
+                if "status" in update_dict:
+                    gradio_updates[generation_status] = update_dict["status"]
+                if "progress" in update_dict:
+                    gradio_updates[baseline_progress_bar] = update_dict["progress"]
+                if "thinking" in update_dict:
+                    gradio_updates[baseline_thinking_output] = update_dict["thinking"]
+                if "answer" in update_dict:
+                    gradio_updates[baseline_answer_output] = update_dict["answer"]
+                if "tokens" in update_dict:
+                    gradio_updates[baseline_tokens_count] = update_dict["tokens"]
+                if "generate_btn_text" in update_dict:
+                    gradio_updates[generate_btn] = gr.Button(
+                        update_dict["generate_btn_text"],
+                        variant="secondary" if "Generating" in update_dict["generate_btn_text"] else "primary",
+                        interactive=update_dict.get("generate_btn_interactive", True)
+                    )
+                if "stop_btn_interactive" in update_dict:
+                    gradio_updates[stop_btn] = gr.Button(
+                        "Stop",
+                        variant="stop",
+                        interactive=update_dict["stop_btn_interactive"]
                     )
+                yield gradio_updates
         # Connect the buttons to the handlers
         if model_loaded_successfully:
             generate_btn.click(
+                generate_wrapper,
                 inputs=[prompt],
                 outputs=[
                     generation_status,