MindLabUnimib commited on
Commit
c1cccf2
·
verified ·
1 Parent(s): 20419e5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +230 -59
app.py CHANGED
@@ -1,70 +1,241 @@
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
 
 
3
 
 
 
 
 
 
 
 
4
 
5
- def respond(
6
- message,
7
- history: list[dict[str, str]],
8
- system_message,
9
- max_tokens,
10
- temperature,
11
- top_p,
12
- hf_token: gr.OAuthToken,
13
- ):
14
- """
15
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
16
- """
17
- client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
18
-
19
- messages = [{"role": "system", "content": system_message}]
20
-
21
- messages.extend(history)
22
-
23
- messages.append({"role": "user", "content": message})
24
-
25
- response = ""
26
-
27
- for message in client.chat_completion(
28
  messages,
29
- max_tokens=max_tokens,
30
- stream=True,
31
- temperature=temperature,
32
- top_p=top_p,
33
- ):
34
- choices = message.choices
35
- token = ""
36
- if len(choices) and choices[0].delta.content:
37
- token = choices[0].delta.content
 
 
 
 
 
 
 
 
 
38
 
39
- response += token
40
- yield response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
 
 
 
 
 
 
 
 
 
 
 
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
- chatbot = gr.ChatInterface(
47
- respond,
48
- type="messages",
49
- additional_inputs=[
50
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
51
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
52
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
53
- gr.Slider(
54
- minimum=0.1,
55
- maximum=1.0,
56
- value=0.95,
57
- step=0.05,
58
- label="Top-p (nucleus sampling)",
59
- ),
60
- ],
61
- )
62
 
63
- with gr.Blocks() as demo:
64
- with gr.Sidebar():
65
- gr.LoginButton()
66
- chatbot.render()
 
 
67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
- if __name__ == "__main__":
70
- demo.launch()
 
1
  import gradio as gr
2
+ import spaces
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
+ import torch
5
 
6
+ model_name = "rubenroy/Zurich-14B-GCv2-5m"
7
+ model = AutoModelForCausalLM.from_pretrained(
8
+ model_name,
9
+ torch_dtype=torch.bfloat16,
10
+ device_map="auto"
11
+ )
12
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
13
 
14
+ @spaces.GPU
15
+ def generate(message, chat_history, temperature=0.7, top_p=0.9, top_k=50, max_new_tokens=512, repetition_penalty=1.1):
16
+ messages = [
17
+ {"role": "system", "content": "You are a helpul assistant named Zurich, a 14 billion parameter Large Language model, you were fine-tuned and trained by Ruben Roy. You have been trained with the GammaCorpus v2 dataset, a dataset filled with structured and filtered multi-turn conversations, this was also made by Ruben Roy."}, # Attribution to Qwen is not included to prevent hallucinations.
18
+ {"role": "user", "content": message}
19
+ ]
20
+ text = tokenizer.apply_chat_template(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  messages,
22
+ tokenize=False,
23
+ add_generation_prompt=True
24
+ )
25
+ model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
26
+ generated_ids = model.generate(
27
+ **model_inputs,
28
+ temperature=float(temperature),
29
+ top_p=float(top_p),
30
+ top_k=int(top_k),
31
+ max_new_tokens=int(max_new_tokens),
32
+ repetition_penalty=float(repetition_penalty),
33
+ do_sample=True if float(temperature) > 0 else False
34
+ )
35
+ generated_ids = [
36
+ output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
37
+ ]
38
+ response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
39
+ return response
40
 
41
+ TITLE_HTML = """
42
+ <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">
43
+ <style>
44
+ .model-btn {
45
+ background: linear-gradient(135deg, #2563eb 0%, #1d4ed8 100%);
46
+ color: white !important;
47
+ padding: 0.75rem 1rem;
48
+ border-radius: 0.5rem;
49
+ text-decoration: none !important;
50
+ font-weight: 500;
51
+ transition: all 0.2s ease;
52
+ font-size: 0.9rem;
53
+ display: flex;
54
+ align-items: center;
55
+ justify-content: center;
56
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
57
+ }
58
+ .model-btn:hover {
59
+ background: linear-gradient(135deg, #1d4ed8 0%, #1e40af 100%);
60
+ box-shadow: 0 4px 6px rgba(0,0,0,0.2);
61
+ }
62
+ .model-section {
63
+ flex: 1;
64
+ max-width: 450px;
65
+ background: rgba(255, 255, 255, 0.05);
66
+ padding: 1.5rem;
67
+ border-radius: 1rem;
68
+ border: 1px solid rgba(255, 255, 255, 0.1);
69
+ backdrop-filter: blur(10px);
70
+ transition: all 0.3s ease;
71
+ }
72
+ .info-link {
73
+ color: #60a5fa;
74
+ text-decoration: none;
75
+ transition: color 0.2s ease;
76
+ }
77
+ .info-link:hover {
78
+ color: #93c5fd;
79
+ text-decoration: underline;
80
+ }
81
+ .info-section {
82
+ margin-top: 0.5rem;
83
+ font-size: 0.9rem;
84
+ color: #94a3b8;
85
+ }
86
+ .settings-section {
87
+ background: rgba(255, 255, 255, 0.05);
88
+ padding: 1.5rem;
89
+ border-radius: 1rem;
90
+ margin: 1.5rem auto;
91
+ border: 1px solid rgba(255, 255, 255, 0.1);
92
+ max-width: 800px;
93
+ }
94
+ .settings-title {
95
+ color: #e2e8f0;
96
+ font-size: 1.25rem;
97
+ font-weight: 600;
98
+ margin-bottom: 1rem;
99
+ display: flex;
100
+ align-items: center;
101
+ gap: 0.7rem;
102
+ }
103
+ .parameter-info {
104
+ color: #94a3b8;
105
+ font-size: 0.8rem;
106
+ margin-top: 0.25rem;
107
+ }
108
+ </style>
109
 
110
+ <div style="background: linear-gradient(135deg, #1e293b 0%, #0f172a 100%); padding: 1.5rem; border-radius: 1.5rem; text-align: center; margin: 1rem auto; max-width: 1200px; box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);">
111
+ <div style="margin-bottom: 1.5rem;">
112
+ <div style="display: flex; align-items: center; justify-content: center; gap: 1rem;">
113
+ <h1 style="font-size: 2.5rem; font-weight: 800; margin: 0; background: linear-gradient(135deg, #60a5fa 0%, #93c5fd 100%); -webkit-background-clip: text; -webkit-text-fill-color: transparent;">Zurich</h1>
114
+ <div style="width: 2px; height: 2.5rem; background: linear-gradient(180deg, #3b82f6 0%, #60a5fa 100%);"></div>
115
+ <p style="font-size: 1.25rem; color: #94a3b8; margin: 0;">GammaCorpus v2-5m</p>
116
+ </div>
117
+ <div class="info-section">
118
+ <span>Fine-tuned from <a href="https://huggingface.co/Qwen/Qwen2.5-14B-Instruct" class="info-link">Qwen 2.5 14B Instruct</a> | Model: <a href="https://huggingface.co/rubenroy/Zurich-14B-GCv2-5m" class="info-link">Zurich-14B-GCv2-5m</a> | Training Dataset: <a href="https://huggingface.co/datasets/rubenroy/GammaCorpus-v2-5m" class="info-link">GammaCorpus v2 5m</a></span>
119
+ </div>
120
+ </div>
121
 
122
+ <div style="display: flex; gap: 1.5rem; justify-content: center; flex-wrap: wrap;">
123
+ <div class="model-section">
124
+ <h2 style="font-size: 1.25rem; color: #e2e8f0; margin-bottom: 1.4rem; margin-top: 1px; font-weight: 600; display: flex; align-items: center; justify-content: center; gap: 0.7rem;">
125
+ <i class="fas fa-microchip"></i>
126
+ 1.5B Models
127
+ </h2>
128
+ <div style="display: grid; grid-template-columns: repeat(2, 1fr); gap: 0.75rem;">
129
+ <a href="https://huggingface.co/rubenroy/Zurich-1.5B-GCv2-5m" class="model-btn">Zurich 1.5B GCv2 5m</a>
130
+ <a href="https://huggingface.co/rubenroy/Zurich-1.5B-GCv2-1m" class="model-btn">Zurich 1.5B GCv2 1m</a>
131
+ <a href="https://huggingface.co/rubenroy/Zurich-1.5B-GCv2-500k" class="model-btn">Zurich 1.5B GCv2 500k</a>
132
+ <a href="https://huggingface.co/rubenroy/Zurich-1.5B-GCv2-100k" class="model-btn">Zurich 1.5B GCv2 100k</a>
133
+ <a href="https://huggingface.co/rubenroy/Zurich-1.5B-GCv2-50k" class="model-btn">Zurich 1.5B GCv2 50k</a>
134
+ <a href="https://huggingface.co/rubenroy/Zurich-1.5B-GCv2-10k" class="model-btn">Zurich 1.5B GCv2 10k</a>
135
+ </div>
136
+ </div>
137
+ <div class="model-section">
138
+ <h2 style="font-size: 1.25rem; color: #e2e8f0; margin-bottom: 1.4rem; margin-top: 1px; font-weight: 600; display: flex; align-items: center; justify-content: center; gap: 0.7rem;">
139
+ <i class="fas fa-brain"></i>
140
+ 7B Models
141
+ </h2>
142
+ <div style="display: grid; grid-template-columns: repeat(2, 1fr); gap: 0.75rem;">
143
+ <a href="https://huggingface.co/rubenroy/Zurich-7B-GCv2-5m" class="model-btn">Zurich 7B GCv2 5m</a>
144
+ <a href="https://huggingface.co/rubenroy/Zurich-7B-GCv2-1m" class="model-btn">Zurich 7B GCv2 1m</a>
145
+ <a href="https://huggingface.co/rubenroy/Zurich-7B-GCv2-500k" class="model-btn">Zurich 7B GCv2 500k</a>
146
+ <a href="https://huggingface.co/rubenroy/Zurich-7B-GCv2-100k" class="model-btn">Zurich 7B GCv2 100k</a>
147
+ <a href="https://huggingface.co/rubenroy/Zurich-7B-GCv2-50k" class="model-btn">Zurich 7B GCv2 50k</a>
148
+ <a href="https://huggingface.co/rubenroy/Zurich-7B-GCv2-10k" class="model-btn">Zurich 7B GCv2 10k</a>
149
+ </div>
150
+ </div>
151
+ <div class="model-section">
152
+ <h2 style="font-size: 1.25rem; color: #e2e8f0; margin-bottom: 1.4rem; margin-top: 1px; font-weight: 600; display: flex; align-items: center; justify-content: center; gap: 0.7rem;">
153
+ <i class="fas fa-rocket"></i>
154
+ 14B Models
155
+ </h2>
156
+ <div style="display: grid; grid-template-columns: repeat(2, 1fr); gap: 0.75rem;">
157
+ <a href="https://huggingface.co/rubenroy/Zurich-14B-GCv2-5m" class="model-btn">Zurich 14B GCv2 5m</a>
158
+ <a href="https://huggingface.co/rubenroy/Zurich-14B-GCv2-1m" class="model-btn">Zurich 14B GCv2 1m</a>
159
+ <a href="https://huggingface.co/rubenroy/Zurich-14B-GCv2-500k" class="model-btn">Zurich 14B GCv2 500k</a>
160
+ <a href="https://huggingface.co/rubenroy/Zurich-14B-GCv2-100k" class="model-btn">Zurich 14B GCv2 100k</a>
161
+ <a href="https://huggingface.co/rubenroy/Zurich-14B-GCv2-50k" class="model-btn">Zurich 14B GCv2 50k</a>
162
+ <a href="https://huggingface.co/rubenroy/Zurich-14B-GCv2-10k" class="model-btn">Zurich 14B GCv2 10k</a>
163
+ </div>
164
+ </div>
165
+ </div>
166
+ </div>
167
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
 
169
+ examples = [
170
+ ["Explain quantum computing in simple terms"],
171
+ ["Write a short story about a time traveler"],
172
+ ["Explain the process of photosynthesis"],
173
+ ["Tell me an interesting fact about Palm trees"]
174
+ ]
175
 
176
+ with gr.Blocks() as demo:
177
+ gr.HTML(TITLE_HTML)
178
+
179
+ with gr.Accordion("Generation Settings", open=False):
180
+ with gr.Row():
181
+ with gr.Column():
182
+ temperature = gr.Slider(
183
+ minimum=0.0,
184
+ maximum=2.0,
185
+ value=0.7,
186
+ step=0.1,
187
+ label="Temperature",
188
+ info="Higher values make the output more random, lower values make it more deterministic",
189
+ interactive=True
190
+ )
191
+ top_p = gr.Slider(
192
+ minimum=0.0,
193
+ maximum=1.0,
194
+ value=0.9,
195
+ step=0.05,
196
+ label="Top P",
197
+ info="Controls the cumulative probability threshold for nucleus sampling",
198
+ interactive=True
199
+ )
200
+ top_k = gr.Slider(
201
+ minimum=1,
202
+ maximum=100,
203
+ value=50,
204
+ step=1,
205
+ label="Top K",
206
+ info="Limits the number of tokens to consider for each generation step",
207
+ interactive=True
208
+ )
209
+ with gr.Column():
210
+ max_new_tokens = gr.Slider(
211
+ minimum=1,
212
+ maximum=2048,
213
+ value=512,
214
+ step=1,
215
+ label="Max New Tokens",
216
+ info="Maximum number of tokens to generate in the response",
217
+ interactive=True
218
+ )
219
+ repetition_penalty = gr.Slider(
220
+ minimum=1.0,
221
+ maximum=2.0,
222
+ value=1.1,
223
+ step=0.1,
224
+ label="Repetition Penalty",
225
+ info="Higher values stop the model from repeating the same info",
226
+ interactive=True
227
+ )
228
+
229
+ chatbot = gr.ChatInterface(
230
+ fn=generate,
231
+ additional_inputs=[
232
+ temperature,
233
+ top_p,
234
+ top_k,
235
+ max_new_tokens,
236
+ repetition_penalty
237
+ ],
238
+ examples=examples
239
+ )
240
 
241
+ demo.launch(share=True)