.gitattributes CHANGED
@@ -33,4 +33,3 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
- tokenizer.model.v3 filter=lfs diff=lfs merge=lfs -text
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
README.md CHANGED
@@ -1,99 +1,17 @@
1
  ---
2
- library_name: vllm
3
- language:
4
- - en
5
- - es
6
- - it
7
- - de
8
- - fr
9
  license: apache-2.0
10
- base_model: mistralai/Mixtral-8x22B-v0.1
11
- extra_gated_description: >-
12
- If you want to learn more about how we process your personal data, please read
13
- our <a href="https://mistral.ai/terms/">Privacy Policy</a>.
14
- tags:
15
- - mistral-common
16
  ---
17
 
18
  # Model Card for Mixtral-8x22B-Instruct-v0.1
 
19
 
20
-
21
- ## Encode and Decode with `mistral_common`
22
-
23
- ```py
24
- from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
25
- from mistral_common.protocol.instruct.messages import UserMessage
26
- from mistral_common.protocol.instruct.request import ChatCompletionRequest
27
-
28
- mistral_models_path = "MISTRAL_MODELS_PATH"
29
-
30
- tokenizer = MistralTokenizer.v3()
31
-
32
- completion_request = ChatCompletionRequest(messages=[UserMessage(content="Explain Machine Learning to me in a nutshell.")])
33
-
34
- tokens = tokenizer.encode_chat_completion(completion_request).tokens
35
- ```
36
-
37
- ## Inference with `mistral_inference`
38
-
39
- ```py
40
- from mistral_inference.transformer import Transformer
41
- from mistral_inference.generate import generate
42
-
43
- model = Transformer.from_folder(mistral_models_path)
44
- out_tokens, _ = generate([tokens], model, max_tokens=64, temperature=0.0, eos_id=tokenizer.instruct_tokenizer.tokenizer.eos_id)
45
-
46
- result = tokenizer.decode(out_tokens[0])
47
-
48
- print(result)
49
- ```
50
-
51
- ## Preparing inputs with Hugging Face `transformers`
52
-
53
- ```py
54
- from transformers import AutoTokenizer
55
-
56
- tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x22B-Instruct-v0.1")
57
-
58
- chat = [{"role": "user", "content": "Explain Machine Learning to me in a nutshell."}]
59
-
60
- tokens = tokenizer.apply_chat_template(chat, return_dict=True, return_tensors="pt", add_generation_prompt=True)
61
  ```
62
-
63
- ## Inference with hugging face `transformers`
64
-
65
- ```py
66
- from transformers import AutoModelForCausalLM
67
- import torch
68
-
69
- # You can also use 8-bit or 4-bit quantization here
70
- model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x22B-Instruct-v0.1", torch_dtype=torch.bfloat16, device_map="auto")
71
- model.to("cuda")
72
-
73
- generated_ids = model.generate(**tokens, max_new_tokens=1000, do_sample=True)
74
-
75
- # decode with HF tokenizer
76
- result = tokenizer.decode(generated_ids[0])
77
- print(result)
78
- ```
79
-
80
- > [!TIP]
81
- > PRs to correct the `transformers` tokenizer so that it gives 1-to-1 the same results as the `mistral_common` reference implementation are very welcome!
82
-
83
- ---
84
- The Mixtral-8x22B-Instruct-v0.1 Large Language Model (LLM) is an instruct fine-tuned version of the [Mixtral-8x22B-v0.1](https://huggingface.co/mistralai/Mixtral-8x22B-v0.1).
85
-
86
- ## Function calling example
87
- ```python
88
  from transformers import AutoModelForCausalLM
89
  from mistral_common.protocol.instruct.messages import (
90
  AssistantMessage,
91
  UserMessage,
92
  )
93
- from mistral_common.protocol.instruct.tool_calls import (
94
- Tool,
95
- Function,
96
- )
97
  from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
98
  from mistral_common.tokens.instruct.normalize import ChatCompletionRequest
99
 
@@ -137,59 +55,10 @@ model_inputs = encodeds.to(device)
137
  model.to(device)
138
 
139
  generated_ids = model.generate(model_inputs, max_new_tokens=1000, do_sample=True)
140
- sp_tokenizer = tokenizer_v3.instruct_tokenizer.tokenizer
141
- decoded = sp_tokenizer.decode(generated_ids[0])
142
- print(decoded)
143
  ```
144
 
145
- ## Function calling with `transformers`
146
-
147
- To use this example, you'll need `transformers` version 4.42.0 or higher. Please see the
148
- [function calling guide](https://huggingface.co/docs/transformers/main/chat_templating#advanced-tool-use--function-calling)
149
- in the `transformers` docs for more information.
150
-
151
- ```python
152
- from transformers import AutoModelForCausalLM, AutoTokenizer
153
- import torch
154
-
155
- model_id = "mistralai/Mixtral-8x22B-Instruct-v0.1"
156
- tokenizer = AutoTokenizer.from_pretrained(model_id)
157
-
158
- def get_current_weather(location: str, format: str):
159
- """
160
- Get the current weather
161
-
162
- Args:
163
- location: The city and state, e.g. San Francisco, CA
164
- format: The temperature unit to use. Infer this from the users location. (choices: ["celsius", "fahrenheit"])
165
- """
166
- pass
167
-
168
- conversation = [{"role": "user", "content": "What's the weather like in Paris?"}]
169
- tools = [get_current_weather]
170
-
171
- # format and tokenize the tool use prompt
172
- inputs = tokenizer.apply_chat_template(
173
- conversation,
174
- tools=tools,
175
- add_generation_prompt=True,
176
- return_dict=True,
177
- return_tensors="pt",
178
- )
179
-
180
- model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="auto")
181
-
182
- inputs.to(model.device)
183
- outputs = model.generate(**inputs, max_new_tokens=1000)
184
- print(tokenizer.decode(outputs[0], skip_special_tokens=True))
185
- ```
186
-
187
- Note that, for reasons of space, this example does not show a complete cycle of calling a tool and adding the tool call and tool
188
- results to the chat history so that the model can use them in its next generation. For a full tool calling example, please
189
- see the [function calling guide](https://huggingface.co/docs/transformers/main/chat_templating#advanced-tool-use--function-calling),
190
- and note that Mixtral **does** use tool call IDs, so these must be included in your tool calls and tool results. They should be
191
- exactly 9 alphanumeric characters.
192
-
193
  # Instruct tokenizer
194
  The HuggingFace tokenizer included in this release should match our own. To compare:
195
  `pip install mistral-common`
@@ -231,20 +100,7 @@ This tokenizer includes more special tokens, related to function calling :
231
  - [TOOL_CALLS]
232
  - [AVAILABLE_TOOLS]
233
  - [/AVAILABLE_TOOLS]
234
- - [TOOL_RESULTS]
235
  - [/TOOL_RESULTS]
236
 
237
- If you want to use this model with function calling, please be sure to apply it similarly to what is done in our [SentencePieceTokenizerV3](https://github.com/mistralai/mistral-common/blob/main/src/mistral_common/tokens/tokenizers/sentencepiece.py#L299).
238
-
239
- # The Mistral AI Team
240
- Albert Jiang, Alexandre Sablayrolles, Alexis Tacnet, Antoine Roux,
241
- Arthur Mensch, Audrey Herblin-Stoop, Baptiste Bout, Baudouin de Monicault,
242
- Blanche Savary, Bam4d, Caroline Feldman, Devendra Singh Chaplot,
243
- Diego de las Casas, Eleonore Arcelin, Emma Bou Hanna, Etienne Metzger,
244
- Gianna Lengyel, Guillaume Bour, Guillaume Lample, Harizo Rajaona,
245
- Jean-Malo Delignon, Jia Li, Justus Murke, Louis Martin, Louis Ternon,
246
- Lucile Saulnier, Lélio Renard Lavaud, Margaret Jennings, Marie Pellat,
247
- Marie Torelli, Marie-Anne Lachaux, Nicolas Schuhl, Patrick von Platen,
248
- Pierre Stock, Sandeep Subramanian, Sophia Yang, Szymon Antoniak, Teven Le Scao,
249
- Thibaut Lavril, Timothée Lacroix, Théophile Gervet, Thomas Wang,
250
- Valera Nemychnikova, William El Sayed, William Marshall
 
1
  ---
 
 
 
 
 
 
 
2
  license: apache-2.0
 
 
 
 
 
 
3
  ---
4
 
5
  # Model Card for Mixtral-8x22B-Instruct-v0.1
6
+ The Mixtral-8x22B-Instruct-v0.1 Large Language Model (LLM) is an instruct fine-tuned version of the Mixtral-8x22B-v0.1.
7
 
8
+ ## Run the model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  from transformers import AutoModelForCausalLM
11
  from mistral_common.protocol.instruct.messages import (
12
  AssistantMessage,
13
  UserMessage,
14
  )
 
 
 
 
15
  from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
16
  from mistral_common.tokens.instruct.normalize import ChatCompletionRequest
17
 
 
55
  model.to(device)
56
 
57
  generated_ids = model.generate(model_inputs, max_new_tokens=1000, do_sample=True)
58
+ decoded = tokenizer.batch_decode(generated_ids)
59
+ print(decoded[0])
 
60
  ```
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  # Instruct tokenizer
63
  The HuggingFace tokenizer included in this release should match our own. To compare:
64
  `pip install mistral-common`
 
100
  - [TOOL_CALLS]
101
  - [AVAILABLE_TOOLS]
102
  - [/AVAILABLE_TOOLS]
103
+ - [TOOL_RESULT]
104
  - [/TOOL_RESULTS]
105
 
106
+ If you want to use this model with function calling, please be sure to apply it similarly to what is done in our [SentencePieceTokenizerV3](github.com/mistralai/mistral-common/...).
 
 
 
 
 
 
 
 
 
 
 
 
 
config.json CHANGED
@@ -9,7 +9,7 @@
9
  "hidden_size": 6144,
10
  "initializer_range": 0.02,
11
  "intermediate_size": 16384,
12
- "max_position_embeddings": 65536,
13
  "model_type": "mixtral",
14
  "num_attention_heads": 48,
15
  "num_experts_per_tok": 2,
 
9
  "hidden_size": 6144,
10
  "initializer_range": 0.02,
11
  "intermediate_size": 16384,
12
+ "max_position_embeddings": 32768,
13
  "model_type": "mixtral",
14
  "num_attention_heads": 48,
15
  "num_experts_per_tok": 2,
special_tokens_map.json CHANGED
@@ -1,23 +1,7 @@
1
  {
2
- "bos_token": {
3
- "content": "<s>",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "eos_token": {
10
- "content": "</s>",
11
- "lstrip": false,
12
- "normalized": false,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "unk_token": {
17
- "content": "<unk>",
18
- "lstrip": false,
19
- "normalized": false,
20
- "rstrip": false,
21
- "single_word": false
22
- }
23
  }
 
1
  {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "unk_token": "<unk>",
5
+ "b_inst": "[INST]",
6
+ "e_inst": "[/INST]"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  }
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer.model DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:37f00374dea48658ee8f5d0f21895b9bc55cb0103939607c8185bfd1c6ca1f89
3
- size 587404
 
 
 
 
tokenizer.model.v3 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:37f00374dea48658ee8f5d0f21895b9bc55cb0103939607c8185bfd1c6ca1f89
3
- size 587404
 
 
 
 
tokenizer_config.json CHANGED
The diff for this file is too large to render. See raw diff