Update README.md
#1
by
sophiamyang - opened
- .gitattributes +0 -1
- README.md +6 -150
- config.json +1 -1
- special_tokens_map.json +5 -21
- tokenizer.json +0 -0
- tokenizer.model +0 -3
- tokenizer.model.v3 +0 -3
- tokenizer_config.json +0 -0
.gitattributes
CHANGED
|
@@ -33,4 +33,3 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
-
tokenizer.model.v3 filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
README.md
CHANGED
|
@@ -1,99 +1,17 @@
|
|
| 1 |
---
|
| 2 |
-
library_name: vllm
|
| 3 |
-
language:
|
| 4 |
-
- en
|
| 5 |
-
- es
|
| 6 |
-
- it
|
| 7 |
-
- de
|
| 8 |
-
- fr
|
| 9 |
license: apache-2.0
|
| 10 |
-
base_model: mistralai/Mixtral-8x22B-v0.1
|
| 11 |
-
extra_gated_description: >-
|
| 12 |
-
If you want to learn more about how we process your personal data, please read
|
| 13 |
-
our <a href="https://mistral.ai/terms/">Privacy Policy</a>.
|
| 14 |
-
tags:
|
| 15 |
-
- mistral-common
|
| 16 |
---
|
| 17 |
|
| 18 |
# Model Card for Mixtral-8x22B-Instruct-v0.1
|
|
|
|
| 19 |
|
| 20 |
-
|
| 21 |
-
## Encode and Decode with `mistral_common`
|
| 22 |
-
|
| 23 |
-
```py
|
| 24 |
-
from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
|
| 25 |
-
from mistral_common.protocol.instruct.messages import UserMessage
|
| 26 |
-
from mistral_common.protocol.instruct.request import ChatCompletionRequest
|
| 27 |
-
|
| 28 |
-
mistral_models_path = "MISTRAL_MODELS_PATH"
|
| 29 |
-
|
| 30 |
-
tokenizer = MistralTokenizer.v3()
|
| 31 |
-
|
| 32 |
-
completion_request = ChatCompletionRequest(messages=[UserMessage(content="Explain Machine Learning to me in a nutshell.")])
|
| 33 |
-
|
| 34 |
-
tokens = tokenizer.encode_chat_completion(completion_request).tokens
|
| 35 |
-
```
|
| 36 |
-
|
| 37 |
-
## Inference with `mistral_inference`
|
| 38 |
-
|
| 39 |
-
```py
|
| 40 |
-
from mistral_inference.transformer import Transformer
|
| 41 |
-
from mistral_inference.generate import generate
|
| 42 |
-
|
| 43 |
-
model = Transformer.from_folder(mistral_models_path)
|
| 44 |
-
out_tokens, _ = generate([tokens], model, max_tokens=64, temperature=0.0, eos_id=tokenizer.instruct_tokenizer.tokenizer.eos_id)
|
| 45 |
-
|
| 46 |
-
result = tokenizer.decode(out_tokens[0])
|
| 47 |
-
|
| 48 |
-
print(result)
|
| 49 |
-
```
|
| 50 |
-
|
| 51 |
-
## Preparing inputs with Hugging Face `transformers`
|
| 52 |
-
|
| 53 |
-
```py
|
| 54 |
-
from transformers import AutoTokenizer
|
| 55 |
-
|
| 56 |
-
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x22B-Instruct-v0.1")
|
| 57 |
-
|
| 58 |
-
chat = [{"role": "user", "content": "Explain Machine Learning to me in a nutshell."}]
|
| 59 |
-
|
| 60 |
-
tokens = tokenizer.apply_chat_template(chat, return_dict=True, return_tensors="pt", add_generation_prompt=True)
|
| 61 |
```
|
| 62 |
-
|
| 63 |
-
## Inference with hugging face `transformers`
|
| 64 |
-
|
| 65 |
-
```py
|
| 66 |
-
from transformers import AutoModelForCausalLM
|
| 67 |
-
import torch
|
| 68 |
-
|
| 69 |
-
# You can also use 8-bit or 4-bit quantization here
|
| 70 |
-
model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x22B-Instruct-v0.1", torch_dtype=torch.bfloat16, device_map="auto")
|
| 71 |
-
model.to("cuda")
|
| 72 |
-
|
| 73 |
-
generated_ids = model.generate(**tokens, max_new_tokens=1000, do_sample=True)
|
| 74 |
-
|
| 75 |
-
# decode with HF tokenizer
|
| 76 |
-
result = tokenizer.decode(generated_ids[0])
|
| 77 |
-
print(result)
|
| 78 |
-
```
|
| 79 |
-
|
| 80 |
-
> [!TIP]
|
| 81 |
-
> PRs to correct the `transformers` tokenizer so that it gives 1-to-1 the same results as the `mistral_common` reference implementation are very welcome!
|
| 82 |
-
|
| 83 |
-
---
|
| 84 |
-
The Mixtral-8x22B-Instruct-v0.1 Large Language Model (LLM) is an instruct fine-tuned version of the [Mixtral-8x22B-v0.1](https://huggingface.co/mistralai/Mixtral-8x22B-v0.1).
|
| 85 |
-
|
| 86 |
-
## Function calling example
|
| 87 |
-
```python
|
| 88 |
from transformers import AutoModelForCausalLM
|
| 89 |
from mistral_common.protocol.instruct.messages import (
|
| 90 |
AssistantMessage,
|
| 91 |
UserMessage,
|
| 92 |
)
|
| 93 |
-
from mistral_common.protocol.instruct.tool_calls import (
|
| 94 |
-
Tool,
|
| 95 |
-
Function,
|
| 96 |
-
)
|
| 97 |
from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
|
| 98 |
from mistral_common.tokens.instruct.normalize import ChatCompletionRequest
|
| 99 |
|
|
@@ -137,59 +55,10 @@ model_inputs = encodeds.to(device)
|
|
| 137 |
model.to(device)
|
| 138 |
|
| 139 |
generated_ids = model.generate(model_inputs, max_new_tokens=1000, do_sample=True)
|
| 140 |
-
|
| 141 |
-
decoded
|
| 142 |
-
print(decoded)
|
| 143 |
```
|
| 144 |
|
| 145 |
-
## Function calling with `transformers`
|
| 146 |
-
|
| 147 |
-
To use this example, you'll need `transformers` version 4.42.0 or higher. Please see the
|
| 148 |
-
[function calling guide](https://huggingface.co/docs/transformers/main/chat_templating#advanced-tool-use--function-calling)
|
| 149 |
-
in the `transformers` docs for more information.
|
| 150 |
-
|
| 151 |
-
```python
|
| 152 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 153 |
-
import torch
|
| 154 |
-
|
| 155 |
-
model_id = "mistralai/Mixtral-8x22B-Instruct-v0.1"
|
| 156 |
-
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 157 |
-
|
| 158 |
-
def get_current_weather(location: str, format: str):
|
| 159 |
-
"""
|
| 160 |
-
Get the current weather
|
| 161 |
-
|
| 162 |
-
Args:
|
| 163 |
-
location: The city and state, e.g. San Francisco, CA
|
| 164 |
-
format: The temperature unit to use. Infer this from the users location. (choices: ["celsius", "fahrenheit"])
|
| 165 |
-
"""
|
| 166 |
-
pass
|
| 167 |
-
|
| 168 |
-
conversation = [{"role": "user", "content": "What's the weather like in Paris?"}]
|
| 169 |
-
tools = [get_current_weather]
|
| 170 |
-
|
| 171 |
-
# format and tokenize the tool use prompt
|
| 172 |
-
inputs = tokenizer.apply_chat_template(
|
| 173 |
-
conversation,
|
| 174 |
-
tools=tools,
|
| 175 |
-
add_generation_prompt=True,
|
| 176 |
-
return_dict=True,
|
| 177 |
-
return_tensors="pt",
|
| 178 |
-
)
|
| 179 |
-
|
| 180 |
-
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="auto")
|
| 181 |
-
|
| 182 |
-
inputs.to(model.device)
|
| 183 |
-
outputs = model.generate(**inputs, max_new_tokens=1000)
|
| 184 |
-
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
|
| 185 |
-
```
|
| 186 |
-
|
| 187 |
-
Note that, for reasons of space, this example does not show a complete cycle of calling a tool and adding the tool call and tool
|
| 188 |
-
results to the chat history so that the model can use them in its next generation. For a full tool calling example, please
|
| 189 |
-
see the [function calling guide](https://huggingface.co/docs/transformers/main/chat_templating#advanced-tool-use--function-calling),
|
| 190 |
-
and note that Mixtral **does** use tool call IDs, so these must be included in your tool calls and tool results. They should be
|
| 191 |
-
exactly 9 alphanumeric characters.
|
| 192 |
-
|
| 193 |
# Instruct tokenizer
|
| 194 |
The HuggingFace tokenizer included in this release should match our own. To compare:
|
| 195 |
`pip install mistral-common`
|
|
@@ -231,20 +100,7 @@ This tokenizer includes more special tokens, related to function calling :
|
|
| 231 |
- [TOOL_CALLS]
|
| 232 |
- [AVAILABLE_TOOLS]
|
| 233 |
- [/AVAILABLE_TOOLS]
|
| 234 |
-
- [
|
| 235 |
- [/TOOL_RESULTS]
|
| 236 |
|
| 237 |
-
If you want to use this model with function calling, please be sure to apply it similarly to what is done in our [SentencePieceTokenizerV3](
|
| 238 |
-
|
| 239 |
-
# The Mistral AI Team
|
| 240 |
-
Albert Jiang, Alexandre Sablayrolles, Alexis Tacnet, Antoine Roux,
|
| 241 |
-
Arthur Mensch, Audrey Herblin-Stoop, Baptiste Bout, Baudouin de Monicault,
|
| 242 |
-
Blanche Savary, Bam4d, Caroline Feldman, Devendra Singh Chaplot,
|
| 243 |
-
Diego de las Casas, Eleonore Arcelin, Emma Bou Hanna, Etienne Metzger,
|
| 244 |
-
Gianna Lengyel, Guillaume Bour, Guillaume Lample, Harizo Rajaona,
|
| 245 |
-
Jean-Malo Delignon, Jia Li, Justus Murke, Louis Martin, Louis Ternon,
|
| 246 |
-
Lucile Saulnier, Lélio Renard Lavaud, Margaret Jennings, Marie Pellat,
|
| 247 |
-
Marie Torelli, Marie-Anne Lachaux, Nicolas Schuhl, Patrick von Platen,
|
| 248 |
-
Pierre Stock, Sandeep Subramanian, Sophia Yang, Szymon Antoniak, Teven Le Scao,
|
| 249 |
-
Thibaut Lavril, Timothée Lacroix, Théophile Gervet, Thomas Wang,
|
| 250 |
-
Valera Nemychnikova, William El Sayed, William Marshall
|
|
|
|
| 1 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
license: apache-2.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
---
|
| 4 |
|
| 5 |
# Model Card for Mixtral-8x22B-Instruct-v0.1
|
| 6 |
+
The Mixtral-8x22B-Instruct-v0.1 Large Language Model (LLM) is an instruct fine-tuned version of the Mixtral-8x22B-v0.1.
|
| 7 |
|
| 8 |
+
## Run the model
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
from transformers import AutoModelForCausalLM
|
| 11 |
from mistral_common.protocol.instruct.messages import (
|
| 12 |
AssistantMessage,
|
| 13 |
UserMessage,
|
| 14 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
|
| 16 |
from mistral_common.tokens.instruct.normalize import ChatCompletionRequest
|
| 17 |
|
|
|
|
| 55 |
model.to(device)
|
| 56 |
|
| 57 |
generated_ids = model.generate(model_inputs, max_new_tokens=1000, do_sample=True)
|
| 58 |
+
decoded = tokenizer.batch_decode(generated_ids)
|
| 59 |
+
print(decoded[0])
|
|
|
|
| 60 |
```
|
| 61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
# Instruct tokenizer
|
| 63 |
The HuggingFace tokenizer included in this release should match our own. To compare:
|
| 64 |
`pip install mistral-common`
|
|
|
|
| 100 |
- [TOOL_CALLS]
|
| 101 |
- [AVAILABLE_TOOLS]
|
| 102 |
- [/AVAILABLE_TOOLS]
|
| 103 |
+
- [TOOL_RESULT]
|
| 104 |
- [/TOOL_RESULTS]
|
| 105 |
|
| 106 |
+
If you want to use this model with function calling, please be sure to apply it similarly to what is done in our [SentencePieceTokenizerV3](github.com/mistralai/mistral-common/...).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
config.json
CHANGED
|
@@ -9,7 +9,7 @@
|
|
| 9 |
"hidden_size": 6144,
|
| 10 |
"initializer_range": 0.02,
|
| 11 |
"intermediate_size": 16384,
|
| 12 |
-
"max_position_embeddings":
|
| 13 |
"model_type": "mixtral",
|
| 14 |
"num_attention_heads": 48,
|
| 15 |
"num_experts_per_tok": 2,
|
|
|
|
| 9 |
"hidden_size": 6144,
|
| 10 |
"initializer_range": 0.02,
|
| 11 |
"intermediate_size": 16384,
|
| 12 |
+
"max_position_embeddings": 32768,
|
| 13 |
"model_type": "mixtral",
|
| 14 |
"num_attention_heads": 48,
|
| 15 |
"num_experts_per_tok": 2,
|
special_tokens_map.json
CHANGED
|
@@ -1,23 +1,7 @@
|
|
| 1 |
{
|
| 2 |
-
"bos_token":
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
"single_word": false
|
| 8 |
-
},
|
| 9 |
-
"eos_token": {
|
| 10 |
-
"content": "</s>",
|
| 11 |
-
"lstrip": false,
|
| 12 |
-
"normalized": false,
|
| 13 |
-
"rstrip": false,
|
| 14 |
-
"single_word": false
|
| 15 |
-
},
|
| 16 |
-
"unk_token": {
|
| 17 |
-
"content": "<unk>",
|
| 18 |
-
"lstrip": false,
|
| 19 |
-
"normalized": false,
|
| 20 |
-
"rstrip": false,
|
| 21 |
-
"single_word": false
|
| 22 |
-
}
|
| 23 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"bos_token": "<s>",
|
| 3 |
+
"eos_token": "</s>",
|
| 4 |
+
"unk_token": "<unk>",
|
| 5 |
+
"b_inst": "[INST]",
|
| 6 |
+
"e_inst": "[/INST]"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
}
|
tokenizer.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer.model
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:37f00374dea48658ee8f5d0f21895b9bc55cb0103939607c8185bfd1c6ca1f89
|
| 3 |
-
size 587404
|
|
|
|
|
|
|
|
|
|
|
|
tokenizer.model.v3
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:37f00374dea48658ee8f5d0f21895b9bc55cb0103939607c8185bfd1c6ca1f89
|
| 3 |
-
size 587404
|
|
|
|
|
|
|
|
|
|
|
|
tokenizer_config.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|