duzx16
commited on
Commit
·
7e69b85
1
Parent(s):
9324de7
Fix tokenizer config saving
Browse files- tokenization_chatglm.py +9 -3
tokenization_chatglm.py
CHANGED
|
@@ -170,9 +170,9 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
|
|
| 170 |
vocab_file,
|
| 171 |
do_lower_case=False,
|
| 172 |
remove_space=False,
|
| 173 |
-
bos_token='sop',
|
| 174 |
-
eos_token='
|
| 175 |
-
eop_token='eop',
|
| 176 |
mask_token='[MASK]',
|
| 177 |
gmask_token='[gMASK]',
|
| 178 |
padding_side="left",
|
|
@@ -183,6 +183,12 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
|
|
| 183 |
do_lower_case=do_lower_case,
|
| 184 |
remove_space=remove_space,
|
| 185 |
padding_side=padding_side,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
**kwargs
|
| 187 |
)
|
| 188 |
|
|
|
|
| 170 |
vocab_file,
|
| 171 |
do_lower_case=False,
|
| 172 |
remove_space=False,
|
| 173 |
+
bos_token='<sop>',
|
| 174 |
+
eos_token='</s>',
|
| 175 |
+
eop_token='<eop>',
|
| 176 |
mask_token='[MASK]',
|
| 177 |
gmask_token='[gMASK]',
|
| 178 |
padding_side="left",
|
|
|
|
| 183 |
do_lower_case=do_lower_case,
|
| 184 |
remove_space=remove_space,
|
| 185 |
padding_side=padding_side,
|
| 186 |
+
bos_token=bos_token,
|
| 187 |
+
eos_token=eos_token,
|
| 188 |
+
eop_token=eop_token,
|
| 189 |
+
mask_token=mask_token,
|
| 190 |
+
gmask_token=gmask_token,
|
| 191 |
+
num_image_tokens=num_image_tokens,
|
| 192 |
**kwargs
|
| 193 |
)
|
| 194 |
|