Spaces:

pinthoz
/

attention-atlas

Running

App Files Files Community

pinthoz commited on 3 days ago

Commit

0cd6681

verified ·

1 Parent(s): ed09cf3

Fix sidebar and GPT2 tokens

Browse files

Files changed (6) hide show

attention_app/isa.py +3 -2
attention_app/server/main.py +50 -20
attention_app/server/renderers.py +24 -13
attention_app/ui/layouts.py +0 -5
attention_app/ui/scripts.py +13 -4
attention_app/ui/styles.py +19 -0

attention_app/isa.py CHANGED Viewed

@@ -3,15 +3,16 @@ import numpy as np
 import nltk
 from typing import List, Dict, Tuple, Optional
 # Ensure nltk data is downloaded
 try:
     nltk.data.find('tokenizers/punkt')
-except LookupError:
     nltk.download('punkt')
 try:
     nltk.data.find('tokenizers/punkt_tab')
-except LookupError:
     nltk.download('punkt_tab')
 def get_sentence_boundaries(text: str, tokens: List[str], tokenizer, inputs) -> Tuple[List[str], List[int]]:

 import nltk
 from typing import List, Dict, Tuple, Optional
+# Ensure nltk data is downloaded
 # Ensure nltk data is downloaded
 try:
     nltk.data.find('tokenizers/punkt')
+except (LookupError, OSError):
     nltk.download('punkt')
 try:
     nltk.data.find('tokenizers/punkt_tab')
+except (LookupError, OSError):
     nltk.download('punkt_tab')
 def get_sentence_boundaries(text: str, tokens: List[str], tokenizer, inputs) -> Tuple[List[str], List[int]]:

attention_app/server/main.py CHANGED Viewed

@@ -149,12 +149,13 @@ def server(input, output, session):
             print(f"ERROR in compute_all: {e}")
             traceback.print_exc()
             cached_result.set(None)
-            await session.send_custom_message('stop_loading', {})
         finally:
             running.set(False)
     @output
     @render.ui
     def preview_text():
@@ -171,10 +172,11 @@ def server(input, output, session):
         att_received_norm = (attention_received - attention_received.min()) / (attention_received.max() - attention_received.min() + 1e-10)
         token_html = []
         for i, (tok, att_recv, recv_norm) in enumerate(zip(tokens, attention_received, att_received_norm)):
             opacity = 0.2 + (recv_norm * 0.6)
             bg_color = f"rgba(59, 130, 246, {opacity})" # Keep blue for attention
-            tooltip = f"Token: {tok}&#10;Attention Received: {att_recv:.3f}"
-            token_html.append(f'<span class="token-viz" style="background:{bg_color};" title="{tooltip}">{tok}</span>')
         html = '<div class="token-viz-container">' + ''.join(token_html) + '</div>'
         legend_html = '''
         <div style="display:flex;gap:12px;margin-top:8px;font-size:9px;color:#6b7280;">
@@ -220,7 +222,7 @@ def server(input, output, session):
         try: tree_root_idx = int(input.tree_root_token())
         except: tree_root_idx = 0
-        clean_tokens = [t.replace("##", "") if t.startswith("##") else t for t in tokens]
         return ui.div(
             {"class": "dashboard-stack gpt2-layout"},
@@ -813,8 +815,7 @@ def server(input, output, session):
         return ui.div(
             {"class": "card"},
             ui.h4("Global Attention Metrics"),
-            get_metrics_display(res),
-            ui.tags.script("$('#loading_spinner').hide(); $('#generate_all').prop('disabled', false).css('opacity', '1'); $('#dashboard-container').removeClass('content-hidden').addClass('content-visible');")
         )
     def dashboard_layout_helper(is_gpt2, num_layers, num_heads, clean_tokens):
@@ -1034,7 +1035,7 @@ def server(input, output, session):
         res = cached_result.get()
         if not res: return []
         tokens = res[0]
-        return [t.replace("##", "") if t.startswith("##") else t for t in tokens]
     @reactive.effect
     def update_selectors():
@@ -1053,7 +1054,7 @@ def server(input, output, session):
     def dashboard_content():
         config = current_layout_config.get()
         if not config:
-            return ui.HTML("<script>$('#loading_spinner').hide(); $('#generate_all').prop('disabled', false).css('opacity', '1');</script>")
         is_gpt2, num_layers, num_heads = config
@@ -1087,8 +1088,11 @@ def server(input, output, session):
         y_flat = y.flatten().tolist()
         scores = np.nan_to_num(matrix.flatten(), nan=0.0).tolist()
         hover_texts = [
-            f"Target ← {sentences[int(r)][:60]}...<br>Source → {sentences[int(c)][:60]}...<br>ISA = {s:.4f}"
             for r, c, s in zip(y_flat, x_flat, scores)
         ]
@@ -1130,7 +1134,7 @@ def server(input, output, session):
             customdata=customdata
         ))
-        labels = [s[:30] + "..." if len(s) > 30 else s for s in sentences]
         fig.update_layout(
             xaxis=dict(
@@ -1171,10 +1175,24 @@ def server(input, output, session):
         # Generate HTML with unique ID
         plot_html = fig.to_html(include_plotlyjs='cdn', full_html=False, div_id="isa_scatter_plot", config={'displayModeBar': False})
-        # Custom JS to handle clicks and send to Shiny
         js = """
         <script>
         (function() {
             console.log("DEBUG: Initializing ISA Plot Script");
             function initPlot() {
                 var plot = document.getElementById('isa_scatter_plot');
@@ -1251,8 +1269,9 @@ def server(input, output, session):
             attentions, tokens, target_idx, source_idx, boundaries
         )
-        toks_target = tokens_combined[:src_start]
-        toks_source = tokens_combined[src_start:]
         # Custom colorscale for heatmap (Light Blue -> Deep Blue/Purple)
         heatmap_colorscale = [
@@ -1429,7 +1448,9 @@ def server(input, output, session):
             [1.0, '#1e3a8a']
         ]
-        fig = px.imshow(att, x=tokens, y=tokens, color_continuous_scale=att_colorscale, aspect="auto")
         fig.update_traces(customdata=custom, hovertemplate=hover)
         fig.update_layout(
             xaxis_title="Key (attending to)",
@@ -1507,6 +1528,8 @@ def server(input, output, session):
         block_width = 0.95 / n_tokens  # Maximum spacing
         for i, tok in enumerate(tokens):
             color = color_palette[i % len(color_palette)]
             x_pos = i / n_tokens + block_width / 2
             show_focus = focus_idx is not None
@@ -1521,15 +1544,15 @@ def server(input, output, session):
                 font_size = 13 if is_selected else 10
             text_color = color if (show_focus and is_selected) else "#111827"
-            fig.add_trace(go.Scatter(x=[x_pos], y=[1.05], mode='text', text=tok, textfont=dict(size=font_size, color=text_color, family='monospace', weight='bold'), showlegend=False, hoverinfo='skip'))
-            fig.add_trace(go.Scatter(x=[x_pos], y=[-0.05], mode='text', text=tok, textfont=dict(size=font_size, color=text_color, family='monospace', weight='bold'), showlegend=False, hoverinfo='skip'))
         threshold = 0.04
         for i in range(n_tokens):
             for j in range(n_tokens):
                 weight = att[i, j]
                 if weight > threshold:
-                    is_line_focused = (focus_idx is None) or (i == focus_idx)
                     x_source = i / n_tokens + block_width / 2
                     x_target = j / n_tokens + block_width / 2
                     x_vals = [x_source, (x_source + x_target) / 2, x_target]
@@ -1542,12 +1565,19 @@ def server(input, output, session):
                         line_color = '#2a2a2a'
                         line_opacity = 0.003
                         line_width = 0.1
-                    fig.add_trace(go.Scatter(x=x_vals, y=y_vals, mode='lines', line=dict(color=line_color, width=line_width), opacity=line_opacity, showlegend=False, hoverinfo='text' if is_line_focused else 'skip', hovertext=f"<b>{tokens[i]} to {tokens[j]}</b><br>Attention: {weight:.4f}"))
         title_text = ""
         if focus_idx is not None:
             focus_color = color_palette[focus_idx % len(color_palette)]
-            title_text += f" · <b style='color:{focus_color}'>Focused: '{tokens[focus_idx]}'</b>"
         fig.update_layout(
             title=title_text,
@@ -1754,7 +1784,7 @@ def server(input, output, session):
                 return None
             def build_node(current_idx, current_depth, current_value):
-                token = tokens[current_idx]
                 node = {
                     "name": f"{current_idx}: {token}",
                     "att": current_value,

             print(f"ERROR in compute_all: {e}")
             traceback.print_exc()
             cached_result.set(None)
         finally:
             running.set(False)
     @output
     @render.ui
     def preview_text():
         att_received_norm = (attention_received - attention_received.min()) / (attention_received.max() - attention_received.min() + 1e-10)
         token_html = []
         for i, (tok, att_recv, recv_norm) in enumerate(zip(tokens, attention_received, att_received_norm)):
+            clean_tok = tok.replace("##", "").replace("Ġ", "")
             opacity = 0.2 + (recv_norm * 0.6)
             bg_color = f"rgba(59, 130, 246, {opacity})" # Keep blue for attention
+            tooltip = f"Token: {clean_tok}&#10;Attention Received: {att_recv:.3f}"
+            token_html.append(f'<span class="token-viz" style="background:{bg_color};" title="{tooltip}">{clean_tok}</span>')
         html = '<div class="token-viz-container">' + ''.join(token_html) + '</div>'
         legend_html = '''
         <div style="display:flex;gap:12px;margin-top:8px;font-size:9px;color:#6b7280;">
         try: tree_root_idx = int(input.tree_root_token())
         except: tree_root_idx = 0
+        clean_tokens = [t.replace("##", "") if t.startswith("##") else t.replace("Ġ", "") for t in tokens]
         return ui.div(
             {"class": "dashboard-stack gpt2-layout"},
         return ui.div(
             {"class": "card"},
             ui.h4("Global Attention Metrics"),
+            get_metrics_display(res)
         )
     def dashboard_layout_helper(is_gpt2, num_layers, num_heads, clean_tokens):
         res = cached_result.get()
         if not res: return []
         tokens = res[0]
+        return [t.replace("##", "") if t.startswith("##") else t.replace("Ġ", "") for t in tokens]
     @reactive.effect
     def update_selectors():
     def dashboard_content():
         config = current_layout_config.get()
         if not config:
+            return ui.HTML("<script>$('#generate_all').html('Generate All').prop('disabled', false).css('opacity', '1');</script>")
         is_gpt2, num_layers, num_heads = config
         y_flat = y.flatten().tolist()
         scores = np.nan_to_num(matrix.flatten(), nan=0.0).tolist()
+        # Clean tokens for display in hover_texts
+        cleaned_sentences = [s.replace("Ġ", "").replace("##", "") for s in sentences]
         hover_texts = [
+            f"Target ← {cleaned_sentences[int(r)][:60]}...<br>Source → {cleaned_sentences[int(c)][:60]}...<br>ISA = {s:.4f}"
             for r, c, s in zip(y_flat, x_flat, scores)
         ]
             customdata=customdata
         ))
+        labels = [s[:30].replace("Ġ", "").replace("##", "") + "..." if len(s) > 30 else s.replace("Ġ", "").replace("##", "") for s in sentences]
         fig.update_layout(
             xaxis=dict(
         # Generate HTML with unique ID
         plot_html = fig.to_html(include_plotlyjs='cdn', full_html=False, div_id="isa_scatter_plot", config={'displayModeBar': False})
+        # Custom JS to handle clicks, send to Shiny, AND stop loading state
+        # This is placed here because the ISA plot is the heaviest component.
+        # When this renders, we know the data is ready.
         js = """
         <script>
         (function() {
+            // Stop loading state (Button Reset)
+            var btn = $('#generate_all');
+            if (btn.data('original-content')) {
+                btn.html(btn.data('original-content'));
+            } else {
+                btn.html('Generate All');
+            }
+            btn.prop('disabled', false).css('opacity', '1');
+            // Show Dashboard
+            $('#dashboard-container').removeClass('content-hidden').addClass('content-visible');
             console.log("DEBUG: Initializing ISA Plot Script");
             function initPlot() {
                 var plot = document.getElementById('isa_scatter_plot');
             attentions, tokens, target_idx, source_idx, boundaries
         )
+        # Clean tokens for display in the heatmap
+        toks_target = [t.replace("Ġ", "").replace("##", "") for t in tokens_combined[:src_start]]
+        toks_source = [t.replace("Ġ", "").replace("##", "") for t in tokens_combined[src_start:]]
         # Custom colorscale for heatmap (Light Blue -> Deep Blue/Purple)
         heatmap_colorscale = [
             [1.0, '#1e3a8a']
         ]
+        # Clean tokens for display in the imshow plot
+        cleaned_tokens = [t.replace("##", "").replace("Ġ", "") for t in tokens]
+        fig = px.imshow(att, x=cleaned_tokens, y=cleaned_tokens, color_continuous_scale=att_colorscale, aspect="auto")
         fig.update_traces(customdata=custom, hovertemplate=hover)
         fig.update_layout(
             xaxis_title="Key (attending to)",
         block_width = 0.95 / n_tokens  # Maximum spacing
         for i, tok in enumerate(tokens):
+            # Clean token for display
+            cleaned_tok = tok.replace("##", "").replace("Ġ", "")
             color = color_palette[i % len(color_palette)]
             x_pos = i / n_tokens + block_width / 2
             show_focus = focus_idx is not None
                 font_size = 13 if is_selected else 10
             text_color = color if (show_focus and is_selected) else "#111827"
+            fig.add_trace(go.Scatter(x=[x_pos], y=[1.05], mode='text', text=cleaned_tok, textfont=dict(size=font_size, color=text_color, family='monospace', weight='bold'), showlegend=False, hoverinfo='skip'))
+            fig.add_trace(go.Scatter(x=[x_pos], y=[-0.05], mode='text', text=cleaned_tok, textfont=dict(size=font_size, color=text_color, family='monospace', weight='bold'), showlegend=False, hoverinfo='skip'))
         threshold = 0.04
         for i in range(n_tokens):
             for j in range(n_tokens):
                 weight = att[i, j]
                 if weight > threshold:
+                    is_line_focused = (focus_idx is not None and i == focus_idx) or (focus_idx is None)
                     x_source = i / n_tokens + block_width / 2
                     x_target = j / n_tokens + block_width / 2
                     x_vals = [x_source, (x_source + x_target) / 2, x_target]
                         line_color = '#2a2a2a'
                         line_opacity = 0.003
                         line_width = 0.1
+                    # Clean tokens for hovertext
+                    cleaned_token_i = tokens[i].replace("##", "").replace("Ġ", "")
+                    cleaned_token_j = tokens[j].replace("##", "").replace("Ġ", "")
+                    fig.add_trace(go.Scatter(x=x_vals, y=y_vals, mode='lines', line=dict(color=line_color, width=line_width), opacity=line_opacity, showlegend=False, hoverinfo='text' if is_line_focused else 'skip', hovertext=f"<b>{cleaned_token_i} to {cleaned_token_j}</b><br>Attention: {weight:.4f}"))
         title_text = ""
         if focus_idx is not None:
             focus_color = color_palette[focus_idx % len(color_palette)]
+            # Clean token for title
+            cleaned_focus_token = tokens[focus_idx].replace("##", "").replace("Ġ", "")
+            title_text += f" · <b style='color:{focus_color}'>Focused: '{cleaned_focus_token}'</b>"
         fig.update_layout(
             title=title_text,
                 return None
             def build_node(current_idx, current_depth, current_value):
+                token = tokens[current_idx].replace("##", "").replace("Ġ", "")
                 node = {
                     "name": f"{current_idx}: {token}",
                     "att": current_value,

attention_app/server/renderers.py CHANGED Viewed

@@ -86,9 +86,10 @@ def get_embedding_table(res):
         vec = embeddings[i]
         strip = array_to_base64_img(vec[:64], cmap="Blues", height=0.18)
         tip = "Embedding (first 32 dims): " + ", ".join(f"{v:.3f}" for v in vec[:32])
         rows.append(
             f"<tr>"
-            f"<td class='token-name'>{tok}</td>"
             f"<td><img class='heatmap' src='data:image/png;base64,{strip}' title='{tip}'></td>"
             f"</tr>"
         )
@@ -109,11 +110,12 @@ def get_segment_embedding_view(res):
     rows = ""
     for i, (tok, seg) in enumerate(zip(tokens, ids)):
         row_class = f"seg-row-{seg}" if seg in [0, 1] else ""
         seg_label = "A" if seg == 0 else "B" if seg == 1 else str(seg)
         rows += f"""
         <tr class='{row_class}'>
-            <td class='token-cell'>{tok}</td>
             <td class='segment-cell'>{seg_label}</td>
         </tr>
         """
@@ -142,11 +144,12 @@ def get_posenc_table(res):
     rows = []
     for i, tok in enumerate(tokens):
         pe = pos_enc[i]
         strip = array_to_base64_img(pe[:64], cmap="Blues", height=0.18)
         tip = f"Position {i} encoding: " + ", ".join(f"{v:.3f}" for v in pe[:32])
         rows.append(
             f"<tr>"
-            f"<td class='token-name'>{tok}</td>"
             f"<td><img class='heatmap' src='data:image/png;base64,{strip}' title='{tip}'></td>"
             f"</tr>"
         )
@@ -194,11 +197,12 @@ def get_sum_layernorm_view(res, encoder_model):
     norm_np = normalized[0].cpu().numpy()
     rows = []
     for i, tok in enumerate(tokens):
         sum_strip = array_to_base64_img(summed_np[i][:96], "Blues", 0.15)
         norm_strip = array_to_base64_img(norm_np[i][:96], "Blues", 0.15)
         rows.append(
             "<tr>"
-            f"<td class='token-name'>{tok}</td>"
             f"<td><img class='heatmap' src='data:image/png;base64,{sum_strip}' title='Sum of embeddings'></td>"
             f"<td><img class='heatmap' src='data:image/png;base64,{norm_strip}' title='LayerNorm output'></td>"
             "</tr>"
@@ -221,7 +225,7 @@ def get_qkv_table(res, layer_idx):
     cards = []
     for i, tok in enumerate(tokens):
         # Clean token for display
-        display_tok = tok.replace("##", "") if tok.startswith("##") else tok
         q_strip = array_to_base64_img(Q[i][:48], "Greens", 0.12)
         k_strip = array_to_base64_img(K[i][:48], "Oranges", 0.12)
@@ -293,9 +297,9 @@ def get_scaled_attention_view(res, layer_idx, head_idx, focus_idx):
             <div class='scaled-rank'>#{rank}</div>
             <div class='scaled-details'>
                 <div class='scaled-connection'>
-                    <span class='token-name' style='color:#ff5ca9;'>{tokens[focus_idx]}</span>
                     <span style='color:#94a3b8;margin:0 4px;'>→</span>
-                    <span class='token-name' style='color:#3b82f6;'>{tokens[j]}</span>
                 </div>
                 <div class='scaled-values'>
                     <span class='scaled-step'>Q·K = <b>{dot:.2f}</b></span>
@@ -325,15 +329,16 @@ def get_add_norm_view(res, layer_idx):
     hs_out = hidden_states[layer_idx + 1][0].cpu().numpy()
     rows = []
     for i, tok in enumerate(tokens):
         diff = np.linalg.norm(hs_out[i] - hs_in[i])
         norm = np.linalg.norm(hs_in[i]) + 1e-6
         ratio = diff / norm
         width = max(4, min(100, int(ratio * 80)))
         rows.append(
-            f"<tr><td class='token-name'>{tok}</td>"
             f"<td><div style='background:#e5e7eb;border-radius:999px;height:10px;' title='Change: {ratio:.1%}'>"
             f"<div style='width:{width}%;height:10px;border-radius:999px;"
-            f"background:linear-gradient(90deg,#22c55e,#22d3ee);'></div></div></td></tr>"
         )
     return ui.HTML(
         "<div class='card-scroll'>"
@@ -363,11 +368,12 @@ def get_ffn_view(res, layer_idx):
     proj_np = proj.cpu().numpy()
     rows = []
     for i, tok in enumerate(tokens):
         inter_strip = array_to_base64_img(inter_np[i][:96], "Blues", 0.15)
         proj_strip = array_to_base64_img(proj_np[i][:96], "Blues", 0.15)
         rows.append(
             "<tr>"
-            f"<td class='token-name'>{tok}</td>"
             f"<td><img class='heatmap' src='data:image/png;base64,{inter_strip}' title='Intermediate 3072 dims'></td>"
             f"<td><img class='heatmap' src='data:image/png;base64,{proj_strip}' title='Projection back to 768 dims'></td>"
             "</tr>"
@@ -388,15 +394,16 @@ def get_add_norm_post_ffn_view(res, layer_idx):
     hs_out = hidden_states[layer_idx + 2][0].cpu().numpy()
     rows = []
     for i, tok in enumerate(tokens):
         diff = np.linalg.norm(hs_out[i] - hs_mid[i])
         norm = np.linalg.norm(hs_mid[i]) + 1e-6
         ratio = diff / norm
         width = max(4, min(100, int(ratio * 80)))
         rows.append(
-            f"<tr><td class='token-name'>{tok}</td>"
             f"<td><div style='background:#e5e7eb;border-radius:999px;height:10px;' title='Change: {ratio:.1%}'>"
             f"<div style='width:{width}%;height:10px;border-radius:999px;"
-            f"background:linear-gradient(90deg,#14b8a6,#0ea5e9);'></div></div></td></tr>"
         )
     return ui.HTML(
         "<div class='card-scroll'>"
@@ -414,6 +421,7 @@ def get_layer_output_view(res, layer_idx):
     rows = []
     for i, tok in enumerate(tokens):
         vec_strip = array_to_base64_img(hs[i][:64], "Blues", 0.15)
         vec_tip = "Hidden state (first 32 dims): " + ", ".join(f"{v:.3f}" for v in hs[i][:32])
         mean_val = float(hs[i].mean())
@@ -422,7 +430,7 @@ def get_layer_output_view(res, layer_idx):
         rows.append(f"""
             <tr>
-                <td class='token-name'>{tok}</td>
                 <td><img class='heatmap' src='data:image/png;base64,{vec_strip}' title='{vec_tip}'></td>
                 <td style='font-size:9px;color:#374151;white-space:nowrap;'>
                     μ={mean_val:.3f}, σ={std_val:.3f}, max={max_val:.3f}
@@ -470,6 +478,9 @@ def get_output_probabilities(res, use_mlm, text):
     top_k = 5
     for i, tok in enumerate(mlm_tokens):
         token_probs = probs[i]
         top_vals, top_idx = torch.topk(token_probs, top_k)

         vec = embeddings[i]
         strip = array_to_base64_img(vec[:64], cmap="Blues", height=0.18)
         tip = "Embedding (first 32 dims): " + ", ".join(f"{v:.3f}" for v in vec[:32])
+        clean_tok = tok.replace("##", "").replace("Ġ", "")
         rows.append(
             f"<tr>"
+            f"<td class='token-name'>{clean_tok}</td>"
             f"<td><img class='heatmap' src='data:image/png;base64,{strip}' title='{tip}'></td>"
             f"</tr>"
         )
     rows = ""
     for i, (tok, seg) in enumerate(zip(tokens, ids)):
+        clean_tok = tok.replace("##", "").replace("Ġ", "")
         row_class = f"seg-row-{seg}" if seg in [0, 1] else ""
         seg_label = "A" if seg == 0 else "B" if seg == 1 else str(seg)
         rows += f"""
         <tr class='{row_class}'>
+            <td class='token-cell'>{clean_tok}</td>
             <td class='segment-cell'>{seg_label}</td>
         </tr>
         """
     rows = []
     for i, tok in enumerate(tokens):
         pe = pos_enc[i]
+        clean_tok = tok.replace("##", "").replace("Ġ", "")
         strip = array_to_base64_img(pe[:64], cmap="Blues", height=0.18)
         tip = f"Position {i} encoding: " + ", ".join(f"{v:.3f}" for v in pe[:32])
         rows.append(
             f"<tr>"
+            f"<td class='token-name'>{clean_tok}</td>"
             f"<td><img class='heatmap' src='data:image/png;base64,{strip}' title='{tip}'></td>"
             f"</tr>"
         )
     norm_np = normalized[0].cpu().numpy()
     rows = []
     for i, tok in enumerate(tokens):
+        clean_tok = tok.replace("##", "").replace("Ġ", "")
         sum_strip = array_to_base64_img(summed_np[i][:96], "Blues", 0.15)
         norm_strip = array_to_base64_img(norm_np[i][:96], "Blues", 0.15)
         rows.append(
             "<tr>"
+            f"<td class='token-name'>{clean_tok}</td>"
             f"<td><img class='heatmap' src='data:image/png;base64,{sum_strip}' title='Sum of embeddings'></td>"
             f"<td><img class='heatmap' src='data:image/png;base64,{norm_strip}' title='LayerNorm output'></td>"
             "</tr>"
     cards = []
     for i, tok in enumerate(tokens):
         # Clean token for display
+        display_tok = tok.replace("##", "").replace("Ġ", "")
         q_strip = array_to_base64_img(Q[i][:48], "Greens", 0.12)
         k_strip = array_to_base64_img(K[i][:48], "Oranges", 0.12)
             <div class='scaled-rank'>#{rank}</div>
             <div class='scaled-details'>
                 <div class='scaled-connection'>
+                    <span class='token-name' style='color:#ff5ca9;'>{tokens[focus_idx].replace("##", "").replace("Ġ", "")}</span>
                     <span style='color:#94a3b8;margin:0 4px;'>→</span>
+                    <span class='token-name' style='color:#3b82f6;'>{tokens[j].replace("##", "").replace("Ġ", "")}</span>
                 </div>
                 <div class='scaled-values'>
                     <span class='scaled-step'>Q·K = <b>{dot:.2f}</b></span>
     hs_out = hidden_states[layer_idx + 1][0].cpu().numpy()
     rows = []
     for i, tok in enumerate(tokens):
+        clean_tok = tok.replace("##", "").replace("Ġ", "")
         diff = np.linalg.norm(hs_out[i] - hs_in[i])
         norm = np.linalg.norm(hs_in[i]) + 1e-6
         ratio = diff / norm
         width = max(4, min(100, int(ratio * 80)))
         rows.append(
+            f"<tr><td class='token-name'>{clean_tok}</td>"
             f"<td><div style='background:#e5e7eb;border-radius:999px;height:10px;' title='Change: {ratio:.1%}'>"
             f"<div style='width:{width}%;height:10px;border-radius:999px;"
+            f"background:linear-gradient(90deg,#ff5ca9,#3b82f6);'></div></div></td></tr>"
         )
     return ui.HTML(
         "<div class='card-scroll'>"
     proj_np = proj.cpu().numpy()
     rows = []
     for i, tok in enumerate(tokens):
+        clean_tok = tok.replace("##", "").replace("Ġ", "")
         inter_strip = array_to_base64_img(inter_np[i][:96], "Blues", 0.15)
         proj_strip = array_to_base64_img(proj_np[i][:96], "Blues", 0.15)
         rows.append(
             "<tr>"
+            f"<td class='token-name'>{clean_tok}</td>"
             f"<td><img class='heatmap' src='data:image/png;base64,{inter_strip}' title='Intermediate 3072 dims'></td>"
             f"<td><img class='heatmap' src='data:image/png;base64,{proj_strip}' title='Projection back to 768 dims'></td>"
             "</tr>"
     hs_out = hidden_states[layer_idx + 2][0].cpu().numpy()
     rows = []
     for i, tok in enumerate(tokens):
+        clean_tok = tok.replace("##", "").replace("Ġ", "")
         diff = np.linalg.norm(hs_out[i] - hs_mid[i])
         norm = np.linalg.norm(hs_mid[i]) + 1e-6
         ratio = diff / norm
         width = max(4, min(100, int(ratio * 80)))
         rows.append(
+            f"<tr><td class='token-name'>{clean_tok}</td>"
             f"<td><div style='background:#e5e7eb;border-radius:999px;height:10px;' title='Change: {ratio:.1%}'>"
             f"<div style='width:{width}%;height:10px;border-radius:999px;"
+            f"background:linear-gradient(90deg,#ff5ca9,#3b82f6);'></div></div></td></tr>"
         )
     return ui.HTML(
         "<div class='card-scroll'>"
     rows = []
     for i, tok in enumerate(tokens):
+        clean_tok = tok.replace("##", "").replace("Ġ", "")
         vec_strip = array_to_base64_img(hs[i][:64], "Blues", 0.15)
         vec_tip = "Hidden state (first 32 dims): " + ", ".join(f"{v:.3f}" for v in hs[i][:32])
         mean_val = float(hs[i].mean())
         rows.append(f"""
             <tr>
+                <td class='token-name'>{clean_tok}</td>
                 <td><img class='heatmap' src='data:image/png;base64,{vec_strip}' title='{vec_tip}'></td>
                 <td style='font-size:9px;color:#374151;white-space:nowrap;'>
                     μ={mean_val:.3f}, σ={std_val:.3f}, max={max_val:.3f}
     top_k = 5
     for i, tok in enumerate(mlm_tokens):
+        # Clean token header
+        tok = tok.replace("##", "").replace("Ġ", "")
+        if not tok: tok = "&nbsp;"
         token_probs = probs[i]
         top_vals, top_idx = torch.topk(token_probs, top_k)

attention_app/ui/layouts.py CHANGED Viewed

@@ -56,11 +56,6 @@ attention_analysis_page = ui.page_fluid(
             ui.input_text_area("text_input", None, "All women are naturally nurturing and emotional. Men are logical and suited for leadership positions.", rows=6),
             ui.div(
                 ui.input_action_button("generate_all", "Generate All", class_="btn-primary"),
-                ui.div(
-                    {"id": "loading_spinner", "class": "loading-container", "style": "display:none;"},
-                    ui.div({"class": "spinner"}),
-                    ui.span("Processing...")
-                ),
             ),
         ),

             ui.input_text_area("text_input", None, "All women are naturally nurturing and emotional. Men are logical and suited for leadership positions.", rows=6),
             ui.div(
                 ui.input_action_button("generate_all", "Generate All", class_="btn-primary"),
             ),
         ),

attention_app/ui/scripts.py CHANGED Viewed

@@ -146,14 +146,23 @@ JS_INTERACTIVE = """
         // Custom message handlers
         Shiny.addCustomMessageHandler('start_loading', function(msg) {
-            $('#loading_spinner').css('display', 'flex');
-            $('#generate_all').prop('disabled', true).css('opacity', '0.7');
             $('#dashboard-container').addClass('content-hidden').removeClass('content-visible');
         });
         Shiny.addCustomMessageHandler('stop_loading', function(msg) {
-            $('#loading_spinner').css('display', 'none');
-            $('#generate_all').prop('disabled', false).css('opacity', '1');
         });
         // Bias Loading Handlers

         // Custom message handlers
         Shiny.addCustomMessageHandler('start_loading', function(msg) {
+            var btn = $('#generate_all');
+            if (!btn.data('original-content')) {
+                btn.data('original-content', btn.html());
+            }
+            btn.html('<div class="spinner" style="width:16px;height:16px;border-width:2px;display:inline-block;vertical-align:middle;margin-right:8px;"></div>Processing<span class="loading-dots"></span>');
+            btn.prop('disabled', true).css('opacity', '0.8');
             $('#dashboard-container').addClass('content-hidden').removeClass('content-visible');
         });
         Shiny.addCustomMessageHandler('stop_loading', function(msg) {
+            var btn = $('#generate_all');
+            if (btn.data('original-content')) {
+                btn.html(btn.data('original-content'));
+            } else {
+                btn.html('Generate All');
+            }
+            btn.prop('disabled', false).css('opacity', '1');
         });
         // Bias Loading Handlers

attention_app/ui/styles.py CHANGED Viewed

@@ -534,6 +534,25 @@ CSS = """
         @keyframes spin { to { transform: rotate(360deg); } }
         /* Tables */
         .token-table {
             width: 100%;

         @keyframes spin { to { transform: rotate(360deg); } }
+        /* Spinner inside primary button (needs to be white) */
+        .btn-primary .spinner {
+            border-color: rgba(255, 255, 255, 0.3);
+            border-top-color: white;
+        }
+        /* Loading Dots Animation */
+        .loading-dots:after {
+            content: '.';
+            animation: dots 1.5s steps(5, end) infinite;
+        }
+        @keyframes dots {
+            0%, 20% { content: '.'; }
+            40% { content: '..'; }
+            60% { content: '...'; }
+            80%, 100% { content: ''; }
+        }
         /* Tables */
         .token-table {
             width: 100%;