IntMeGroup commited on
Commit
fd380e5
·
verified ·
1 Parent(s): 9d4466a

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ training_log.txt filter=lfs diff=lfs merge=lfs -text
added_tokens.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</box>": 92552,
3
+ "</img>": 92545,
4
+ "</quad>": 92548,
5
+ "</ref>": 92550,
6
+ "<IMG_CONTEXT>": 92546,
7
+ "<box>": 92551,
8
+ "<img>": 92544,
9
+ "<quad>": 92547,
10
+ "<ref>": 92549
11
+ }
config.json ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_commit_hash": null,
3
+ "_name_or_path": "vlms/OpenGVLab/InternVL2-8B",
4
+ "architectures": [
5
+ "InternVLChatModel"
6
+ ],
7
+ "auto_map": {
8
+ "AutoConfig": "configuration_internvl_chat.InternVLChatConfig",
9
+ "AutoModel": "modeling_internvl_chat.InternVLChatModel",
10
+ "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel"
11
+ },
12
+ "downsample_ratio": 0.5,
13
+ "dynamic_image_size": true,
14
+ "force_image_size": 448,
15
+ "llm_config": {
16
+ "_name_or_path": "internlm/internlm2_5-7b-chat",
17
+ "add_cross_attention": false,
18
+ "architectures": [
19
+ "InternLM2ForCausalLM"
20
+ ],
21
+ "attn_implementation": "flash_attention_2",
22
+ "auto_map": {
23
+ "AutoConfig": "configuration_internlm2.InternLM2Config",
24
+ "AutoModel": "modeling_internlm2.InternLM2ForCausalLM",
25
+ "AutoModelForCausalLM": "modeling_internlm2.InternLM2ForCausalLM"
26
+ },
27
+ "bad_words_ids": null,
28
+ "begin_suppress_tokens": null,
29
+ "bias": false,
30
+ "bos_token_id": 1,
31
+ "chunk_size_feed_forward": 0,
32
+ "cross_attention_hidden_size": null,
33
+ "decoder_start_token_id": null,
34
+ "diversity_penalty": 0.0,
35
+ "do_sample": false,
36
+ "early_stopping": false,
37
+ "encoder_no_repeat_ngram_size": 0,
38
+ "eos_token_id": 2,
39
+ "exponential_decay_length_penalty": null,
40
+ "finetuning_task": null,
41
+ "forced_bos_token_id": null,
42
+ "forced_eos_token_id": null,
43
+ "hidden_act": "silu",
44
+ "hidden_size": 4096,
45
+ "id2label": {
46
+ "0": "LABEL_0",
47
+ "1": "LABEL_1"
48
+ },
49
+ "initializer_range": 0.02,
50
+ "intermediate_size": 14336,
51
+ "is_decoder": false,
52
+ "is_encoder_decoder": false,
53
+ "label2id": {
54
+ "LABEL_0": 0,
55
+ "LABEL_1": 1
56
+ },
57
+ "length_penalty": 1.0,
58
+ "max_length": 20,
59
+ "max_position_embeddings": 32768,
60
+ "min_length": 0,
61
+ "model_type": "internlm2",
62
+ "no_repeat_ngram_size": 0,
63
+ "num_attention_heads": 32,
64
+ "num_beam_groups": 1,
65
+ "num_beams": 1,
66
+ "num_hidden_layers": 32,
67
+ "num_key_value_heads": 8,
68
+ "num_return_sequences": 1,
69
+ "output_attentions": false,
70
+ "output_hidden_states": false,
71
+ "output_scores": false,
72
+ "pad_token_id": 2,
73
+ "prefix": null,
74
+ "pretraining_tp": 1,
75
+ "problem_type": null,
76
+ "pruned_heads": {},
77
+ "remove_invalid_values": false,
78
+ "repetition_penalty": 1.0,
79
+ "return_dict": true,
80
+ "return_dict_in_generate": false,
81
+ "rms_norm_eps": 1e-05,
82
+ "rope_scaling": {
83
+ "factor": 2.0,
84
+ "type": "dynamic"
85
+ },
86
+ "rope_theta": 1000000,
87
+ "sep_token_id": null,
88
+ "suppress_tokens": null,
89
+ "task_specific_params": null,
90
+ "temperature": 1.0,
91
+ "tf_legacy_loss": false,
92
+ "tie_encoder_decoder": false,
93
+ "tie_word_embeddings": false,
94
+ "tokenizer_class": null,
95
+ "top_k": 50,
96
+ "top_p": 1.0,
97
+ "torch_dtype": "bfloat16",
98
+ "torchscript": false,
99
+ "transformers_version": "4.37.2",
100
+ "typical_p": 1.0,
101
+ "use_bfloat16": true,
102
+ "use_cache": false,
103
+ "vocab_size": 92553
104
+ },
105
+ "max_dynamic_patch": 6,
106
+ "min_dynamic_patch": 1,
107
+ "model_type": "internvl_chat",
108
+ "pad2square": false,
109
+ "ps_version": "v2",
110
+ "select_layer": -1,
111
+ "template": "internlm2-chat",
112
+ "torch_dtype": "bfloat16",
113
+ "transformers_version": null,
114
+ "use_backbone_lora": 16,
115
+ "use_llm_lora": 16,
116
+ "use_thumbnail": true,
117
+ "vision_config": {
118
+ "_name_or_path": "",
119
+ "add_cross_attention": false,
120
+ "architectures": [
121
+ "InternVisionModel"
122
+ ],
123
+ "attention_dropout": 0.0,
124
+ "bad_words_ids": null,
125
+ "begin_suppress_tokens": null,
126
+ "bos_token_id": null,
127
+ "chunk_size_feed_forward": 0,
128
+ "cross_attention_hidden_size": null,
129
+ "decoder_start_token_id": null,
130
+ "diversity_penalty": 0.0,
131
+ "do_sample": false,
132
+ "drop_path_rate": 0.1,
133
+ "dropout": 0.0,
134
+ "early_stopping": false,
135
+ "encoder_no_repeat_ngram_size": 0,
136
+ "eos_token_id": null,
137
+ "exponential_decay_length_penalty": null,
138
+ "finetuning_task": null,
139
+ "forced_bos_token_id": null,
140
+ "forced_eos_token_id": null,
141
+ "hidden_act": "gelu",
142
+ "hidden_size": 1024,
143
+ "id2label": {
144
+ "0": "LABEL_0",
145
+ "1": "LABEL_1"
146
+ },
147
+ "image_size": 448,
148
+ "initializer_factor": 1.0,
149
+ "initializer_range": 0.02,
150
+ "intermediate_size": 4096,
151
+ "is_decoder": false,
152
+ "is_encoder_decoder": false,
153
+ "label2id": {
154
+ "LABEL_0": 0,
155
+ "LABEL_1": 1
156
+ },
157
+ "layer_norm_eps": 1e-06,
158
+ "length_penalty": 1.0,
159
+ "max_length": 20,
160
+ "min_length": 0,
161
+ "model_type": "intern_vit_6b",
162
+ "no_repeat_ngram_size": 0,
163
+ "norm_type": "layer_norm",
164
+ "num_attention_heads": 16,
165
+ "num_beam_groups": 1,
166
+ "num_beams": 1,
167
+ "num_channels": 3,
168
+ "num_hidden_layers": 24,
169
+ "num_return_sequences": 1,
170
+ "output_attentions": false,
171
+ "output_hidden_states": false,
172
+ "output_scores": false,
173
+ "pad_token_id": null,
174
+ "patch_size": 14,
175
+ "prefix": null,
176
+ "problem_type": null,
177
+ "pruned_heads": {},
178
+ "qk_normalization": false,
179
+ "qkv_bias": true,
180
+ "remove_invalid_values": false,
181
+ "repetition_penalty": 1.0,
182
+ "return_dict": true,
183
+ "return_dict_in_generate": false,
184
+ "sep_token_id": null,
185
+ "suppress_tokens": null,
186
+ "task_specific_params": null,
187
+ "temperature": 1.0,
188
+ "tf_legacy_loss": false,
189
+ "tie_encoder_decoder": false,
190
+ "tie_word_embeddings": true,
191
+ "tokenizer_class": null,
192
+ "top_k": 50,
193
+ "top_p": 1.0,
194
+ "torch_dtype": "bfloat16",
195
+ "torchscript": false,
196
+ "transformers_version": "4.37.2",
197
+ "typical_p": 1.0,
198
+ "use_bfloat16": true,
199
+ "use_flash_attn": true
200
+ }
201
+ }
generation_config.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "eos_token_id": [
4
+ 92542,
5
+ 92543
6
+ ],
7
+ "transformers_version": "4.37.2"
8
+ }
lora_weights.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d11df08755f0fb8a78c847b1d769da9f7ef9be63cbcf023c67043d9e431c1fb0
3
+ size 88267974
model-00001-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bfca4d3274a376e97753edc8b0e61a15f78ecb105d0aa61c5866567c847b28e
3
+ size 4972043848
model-00002-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6287a423aefbf86c4621c282ca911b1547ad930c1504f5813569b4dfba7a85e1
3
+ size 4942476624
model-00003-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3d954333da1c07c039626c492a7bcad159dc360845d01766fa61edcfab7946c
3
+ size 4942476664
model-00004-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af7018b11b0091ac7b8f113ba6394e58a38c97d7d8d2ba04492021c46ccdb964
3
+ size 1510905010
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
results.csv ADDED
The diff for this file is too large to render. See raw diff
 
results.txt ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ SRCC_score: 0.8593689345602272
2
+ PLCC_score: 0.8428952873034686
3
+ KRCC_score: 0.6784100085991075
4
+ Accuracy: 0.0001392369813422445
5
+ SRCC_level: -0.022674155098422008
6
+ PLCC_level: -0.02088249160847023
7
+ KRCC_level: -0.018598599351636224
8
+ SRCC_score: 0.8593058701502301
9
+ PLCC_score: 0.8428920355809602
10
+ KRCC_score: 0.6782591732355335
11
+ Accuracy: 0.0004177109440267335
12
+ SRCC_level: -0.03501692210894626
13
+ PLCC_level: -0.03942505538299566
14
+ KRCC_level: -0.028722821291677077
15
+ SRCC_score: 0.8805964022419992
16
+ PLCC_score: 0.8667571046510911
17
+ KRCC_score: 0.7026884753214226
18
+ Accuracy: 0.0
19
+ SRCC_level: nan
20
+ PLCC_level: nan
21
+ KRCC_level: nan
22
+ SRCC_score: 0.8810343456540407
23
+ PLCC_score: 0.8670600271693797
24
+ KRCC_score: 0.7032434122319121
25
+ Accuracy: 0.0
26
+ SRCC_level: nan
27
+ PLCC_level: nan
28
+ KRCC_level: nan
29
+ SRCC_score: 0.8765646911029039
30
+ PLCC_score: 0.8739012374938525
31
+ KRCC_score: 0.6996016569449197
32
+ Accuracy: 0.0
33
+ SRCC_level: -0.018516302890088068
34
+ PLCC_level: -0.014238519819895728
35
+ KRCC_level: -0.015188098406818196
36
+ SRCC_score: 0.8764479096765181
37
+ PLCC_score: 0.8737589055537217
38
+ KRCC_score: 0.6995259197460111
39
+ Accuracy: 0.0
40
+ SRCC_level: -0.024266723686067224
41
+ PLCC_level: -0.018630166590774228
42
+ KRCC_level: -0.019907698862271818
43
+ SRCC_score: 0.8866738030908949
44
+ PLCC_score: 0.8837564339328823
45
+ KRCC_score: 0.7122522423577204
46
+ Accuracy: 0.0015316067947646894
47
+ SRCC_level: -0.0635318904474534
48
+ PLCC_level: -0.05447317913867028
49
+ KRCC_level: -0.051974258519070275
50
+ SRCC_score: 0.88658800882714
51
+ PLCC_score: 0.8835339791286331
52
+ KRCC_score: 0.7121097981472183
53
+ Accuracy: 0.001670843776106934
54
+ SRCC_level: -0.06698834562792388
55
+ PLCC_level: -0.05937006313480223
56
+ KRCC_level: -0.05483120184063846
57
+ SRCC_score: 0.8931734329484106
58
+ PLCC_score: 0.8893485492307392
59
+ KRCC_score: 0.7208965084256661
60
+ Accuracy: 0.000278473962684489
61
+ SRCC_level: -0.149583581145868
62
+ PLCC_level: -0.1328972572337725
63
+ KRCC_level: -0.12216863247456813
64
+ SRCC_score: 0.8928532549255208
65
+ PLCC_score: 0.8891536641786139
66
+ KRCC_score: 0.7205435339192087
67
+ Accuracy: 0.000556947925368978
68
+ SRCC_level: -0.15961270701343125
69
+ PLCC_level: -0.14454215390115768
70
+ KRCC_level: -0.13030078487014474
71
+ SRCC_score: 0.8886541849284574
72
+ PLCC_score: 0.888885901023489
73
+ KRCC_score: 0.7168108398476788
74
+ Accuracy: 0.0001392369813422445
75
+ SRCC_level: -0.05027794638111091
76
+ PLCC_level: -0.04588162427034577
77
+ KRCC_level: -0.041195957673676634
78
+ SRCC_score: 0.887704447804412
79
+ PLCC_score: 0.8880319019217863
80
+ KRCC_score: 0.7156711387615758
81
+ Accuracy: 0.0
82
+ SRCC_level: -0.051902098466116986
83
+ PLCC_level: -0.04661410297818629
84
+ KRCC_level: -0.04254311116257898
85
+ SRCC_score: 0.8894428855889271
86
+ PLCC_score: 0.8878910634951341
87
+ KRCC_score: 0.7169029810078482
88
+ Accuracy: 0.004177109440267335
89
+ SRCC_level: -0.17971535366687483
90
+ PLCC_level: -0.1586142398500061
91
+ KRCC_level: -0.1456137497641119
92
+ SRCC_score: 0.8889102925058991
93
+ PLCC_score: 0.8872438188595395
94
+ KRCC_score: 0.716068567181459
95
+ Accuracy: 0.005708716235032024
96
+ SRCC_level: -0.19256251054851958
97
+ PLCC_level: -0.16957211839120678
98
+ KRCC_level: -0.15611373848798576
99
+ SRCC_score: 0.8873750367998665
100
+ PLCC_score: 0.8857580747816941
101
+ KRCC_score: 0.7141924936638172
102
+ Accuracy: 0.0
103
+ SRCC_level: -0.08108053283959671
104
+ PLCC_level: -0.081021415518285
105
+ KRCC_level: -0.06644853255808986
106
+ SRCC_score: 0.8881904196426522
107
+ PLCC_score: 0.8866888062456022
108
+ KRCC_score: 0.7152093445760982
109
+ Accuracy: 0.0
110
+ SRCC_level: -0.08215058778691106
111
+ PLCC_level: -0.08177999474541327
112
+ KRCC_level: -0.0673218184527014
113
+ SRCC_score: 0.8939969974644103
114
+ PLCC_score: 0.8940251861970278
115
+ KRCC_score: 0.7239335189897169
116
+ Accuracy: 0.0001392369813422445
117
+ SRCC_level: -0.016484213885418264
118
+ PLCC_level: -0.023880206606280267
119
+ KRCC_level: -0.013516595136417685
120
+ SRCC_score: 0.8931441237203306
121
+ PLCC_score: 0.8932419767314996
122
+ KRCC_score: 0.7228243415353742
123
+ Accuracy: 0.0
124
+ SRCC_level: -0.029381023905795323
125
+ PLCC_level: -0.03370858348828192
126
+ KRCC_level: -0.024088512066436207
127
+ SRCC_score: 0.9000416625760053
128
+ PLCC_score: 0.9000707445266456
129
+ KRCC_score: 0.732054390267222
130
+ Accuracy: 0.0
131
+ SRCC_level: -0.04211746864561537
132
+ PLCC_level: -0.03860240492571986
133
+ KRCC_level: -0.03453146720715946
134
+ SRCC_score: 0.8993917716197506
135
+ PLCC_score: 0.8996467569482082
136
+ KRCC_score: 0.7310972516236967
137
+ Accuracy: 0.0
138
+ SRCC_level: -0.030580001872719184
139
+ PLCC_level: -0.036584460914523116
140
+ KRCC_level: -0.025080954410230335
141
+ SRCC_score: 0.8977933378215608
142
+ PLCC_score: 0.897576041245265
143
+ KRCC_score: 0.7294122377193878
144
+ Accuracy: 0.0004177109440267335
145
+ SRCC_level: -0.017554344124745987
146
+ PLCC_level: -0.018258949501154444
147
+ KRCC_level: -0.014392651252882699
148
+ SRCC_score: 0.8983149972002357
149
+ PLCC_score: 0.8981694891490573
150
+ KRCC_score: 0.7302284289751617
151
+ Accuracy: 0.0004177109440267335
152
+ SRCC_level: -0.023276923780104358
153
+ PLCC_level: -0.02682417921326203
154
+ KRCC_level: -0.01908406698705812
155
+ SRCC_score: 0.8990430843626656
156
+ PLCC_score: 0.8989636569904695
157
+ KRCC_score: 0.7306847059917438
158
+ Accuracy: 0.0
159
+ SRCC_level: -0.03568041611879261
160
+ PLCC_level: -0.04025513454221178
161
+ KRCC_level: -0.029263107445304762
162
+ SRCC_score: 0.8994428053993866
163
+ PLCC_score: 0.8992365119665013
164
+ KRCC_score: 0.7311804632787288
165
+ Accuracy: 0.0001392369813422445
166
+ SRCC_level: -0.03774851482217209
167
+ PLCC_level: -0.04562283012455974
168
+ KRCC_level: -0.030955921335970114
169
+ SRCC_score: 0.8988664149206043
170
+ PLCC_score: 0.898468684815087
171
+ KRCC_score: 0.7303077945634544
172
+ Accuracy: 0.000556947925368978
173
+ SRCC_level: -0.059439681295382206
174
+ PLCC_level: -0.05085414149686177
175
+ KRCC_level: -0.04871048661555866
176
+ SRCC_score: 0.8992709697781449
177
+ PLCC_score: 0.8989129079634819
178
+ KRCC_score: 0.730886863992449
179
+ Accuracy: 0.0006961849067112225
180
+ SRCC_level: -0.06711662186202942
181
+ PLCC_level: -0.0584840394538452
182
+ KRCC_level: -0.055001004022223146
183
+ SRCC_score: 0.9000031070060149
184
+ PLCC_score: 0.8997939536664199
185
+ KRCC_score: 0.7320833817088588
186
+ Accuracy: 0.0006961849067112225
187
+ SRCC_level: -0.06587498379512986
188
+ PLCC_level: -0.06525321019700087
189
+ KRCC_level: -0.053999688876573
190
+ SRCC_score: 0.9002022138429692
191
+ PLCC_score: 0.9000263002808182
192
+ KRCC_score: 0.7322938126894455
193
+ Accuracy: 0.000556947925368978
194
+ SRCC_level: -0.05963923228647293
195
+ PLCC_level: -0.055987280407824976
196
+ KRCC_level: -0.04889272881272738
197
+ SRCC_score: 0.9007270226146622
198
+ PLCC_score: 0.900606376726291
199
+ KRCC_score: 0.7328473431837479
200
+ Accuracy: 0.000278473962684489
201
+ SRCC_level: -0.07595638526076029
202
+ PLCC_level: -0.06940776194570394
203
+ KRCC_level: -0.06225768772556203
204
+ SRCC_score: 0.900895068977504
205
+ PLCC_score: 0.9008099890426235
206
+ KRCC_score: 0.7330782259366975
207
+ Accuracy: 0.000835421888053467
208
+ SRCC_level: -0.07674760445903578
209
+ PLCC_level: -0.06880327454017351
210
+ KRCC_level: -0.06290135058471628
211
+ SRCC_score: 0.9003743779810622
212
+ PLCC_score: 0.9005815936729471
213
+ KRCC_score: 0.732581594962951
214
+ Accuracy: 0.000835421888053467
215
+ SRCC_level: -0.10779351713892346
216
+ PLCC_level: -0.09680194314894573
217
+ KRCC_level: -0.08829631632861393
218
+ SRCC_score: 0.9002790972078871
219
+ PLCC_score: 0.9003902637064324
220
+ KRCC_score: 0.7323494239106358
221
+ Accuracy: 0.0009746588693957114
222
+ SRCC_level: -0.10445462830073
223
+ PLCC_level: -0.0937806930647066
224
+ KRCC_level: -0.08555379398991467
runs/Nov04_13-06-34_amax/events.out.tfevents.1730696809.amax.2444846.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b3469523d74558e819c57e0a39329cce6da7d5807eef742bba38d59b0353815
3
+ size 5241072
runs/Nov08_20-22-09_amax/events.out.tfevents.1731068545.amax.1181007.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3aa5786efa1fb59fc74f240b241c9141b9f5c82521b80ca0fa1b339254f22ba
3
+ size 31391
special_tokens_map.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|action_start|>",
6
+ "<|action_end|>",
7
+ "<|interpreter|>",
8
+ "<|plugin|>",
9
+ "<img>",
10
+ "</img>",
11
+ "<IMG_CONTEXT>",
12
+ "<quad>",
13
+ "</quad>",
14
+ "<ref>",
15
+ "</ref>",
16
+ "<box>",
17
+ "</box>"
18
+ ],
19
+ "bos_token": {
20
+ "content": "<s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false
25
+ },
26
+ "eos_token": {
27
+ "content": "</s>",
28
+ "lstrip": false,
29
+ "normalized": false,
30
+ "rstrip": false,
31
+ "single_word": false
32
+ },
33
+ "pad_token": {
34
+ "content": "</s>",
35
+ "lstrip": false,
36
+ "normalized": false,
37
+ "rstrip": false,
38
+ "single_word": false
39
+ },
40
+ "unk_token": {
41
+ "content": "<unk>",
42
+ "lstrip": false,
43
+ "normalized": false,
44
+ "rstrip": false,
45
+ "single_word": false
46
+ }
47
+ }
tokenization_internlm2.py ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) The InternLM team and The HuggingFace Inc. team. All rights reserved.
2
+ #
3
+ # This code is based on transformers/src/transformers/models/llama/tokenization_llama.py
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ """Tokenization classes for InternLM."""
18
+ import os
19
+ from shutil import copyfile
20
+ from typing import Any, Dict, List, Optional, Tuple
21
+
22
+ import sentencepiece as spm
23
+ from transformers.tokenization_utils import PreTrainedTokenizer
24
+ from transformers.utils import logging
25
+
26
+ logger = logging.get_logger(__name__)
27
+
28
+ VOCAB_FILES_NAMES = {'vocab_file': './tokenizer.model'}
29
+
30
+ PRETRAINED_VOCAB_FILES_MAP = {}
31
+
32
+
33
+ # Modified from transformers.model.llama.tokenization_llama.LlamaTokenizer
34
+ class InternLM2Tokenizer(PreTrainedTokenizer):
35
+ """
36
+ Construct a InternLM2 tokenizer. Based on byte-level Byte-Pair-Encoding.
37
+
38
+ Args:
39
+ vocab_file (`str`):
40
+ Path to the vocabulary file.
41
+ """
42
+
43
+ vocab_files_names = VOCAB_FILES_NAMES
44
+ pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
45
+ model_input_names = ['input_ids', 'attention_mask']
46
+ _auto_class = 'AutoTokenizer'
47
+
48
+ def __init__(
49
+ self,
50
+ vocab_file,
51
+ unk_token='<unk>',
52
+ bos_token='<s>',
53
+ eos_token='</s>',
54
+ pad_token='</s>',
55
+ sp_model_kwargs: Optional[Dict[str, Any]] = None,
56
+ add_bos_token=True,
57
+ add_eos_token=False,
58
+ decode_with_prefix_space=False,
59
+ clean_up_tokenization_spaces=False,
60
+ **kwargs,
61
+ ):
62
+ self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
63
+ self.vocab_file = vocab_file
64
+ self.add_bos_token = add_bos_token
65
+ self.add_eos_token = add_eos_token
66
+ self.decode_with_prefix_space = decode_with_prefix_space
67
+ self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
68
+ self.sp_model.Load(vocab_file)
69
+ self._no_prefix_space_tokens = None
70
+ super().__init__(
71
+ bos_token=bos_token,
72
+ eos_token=eos_token,
73
+ unk_token=unk_token,
74
+ pad_token=pad_token,
75
+ clean_up_tokenization_spaces=clean_up_tokenization_spaces,
76
+ **kwargs,
77
+ )
78
+
79
+ @property
80
+ def no_prefix_space_tokens(self):
81
+ if self._no_prefix_space_tokens is None:
82
+ vocab = self.convert_ids_to_tokens(list(range(self.vocab_size)))
83
+ self._no_prefix_space_tokens = {i for i, tok in enumerate(vocab) if not tok.startswith('▁')}
84
+ return self._no_prefix_space_tokens
85
+
86
+ @property
87
+ def vocab_size(self):
88
+ """Returns vocab size"""
89
+ return self.sp_model.get_piece_size()
90
+
91
+ @property
92
+ def bos_token_id(self) -> Optional[int]:
93
+ return self.sp_model.bos_id()
94
+
95
+ @property
96
+ def eos_token_id(self) -> Optional[int]:
97
+ return self.sp_model.eos_id()
98
+
99
+ def get_vocab(self):
100
+ """Returns vocab as a dict"""
101
+ vocab = {self.convert_ids_to_tokens(i): i for i in range(self.vocab_size)}
102
+ vocab.update(self.added_tokens_encoder)
103
+ return vocab
104
+
105
+ def _tokenize(self, text):
106
+ """Returns a tokenized string."""
107
+ return self.sp_model.encode(text, out_type=str)
108
+
109
+ def _convert_token_to_id(self, token):
110
+ """Converts a token (str) in an id using the vocab."""
111
+ return self.sp_model.piece_to_id(token)
112
+
113
+ def _convert_id_to_token(self, index):
114
+ """Converts an index (integer) in a token (str) using the vocab."""
115
+ token = self.sp_model.IdToPiece(index)
116
+ return token
117
+
118
+ def _maybe_add_prefix_space(self, tokens, decoded):
119
+ if tokens and tokens[0] not in self.no_prefix_space_tokens:
120
+ return ' ' + decoded
121
+ else:
122
+ return decoded
123
+
124
+ def convert_tokens_to_string(self, tokens):
125
+ """Converts a sequence of tokens (string) in a single string."""
126
+ current_sub_tokens = []
127
+ out_string = ''
128
+ prev_is_special = False
129
+ for token in tokens:
130
+ # make sure that special tokens are not decoded using sentencepiece model
131
+ if token in self.all_special_tokens:
132
+ if not prev_is_special:
133
+ out_string += ' '
134
+ out_string += self.sp_model.decode(current_sub_tokens) + token
135
+ prev_is_special = True
136
+ current_sub_tokens = []
137
+ else:
138
+ current_sub_tokens.append(token)
139
+ prev_is_special = False
140
+ out_string += self.sp_model.decode(current_sub_tokens)
141
+ out_string = self.clean_up_tokenization(out_string)
142
+ out_string = self._maybe_add_prefix_space(tokens=tokens, decoded=out_string)
143
+ return out_string[1:]
144
+
145
+ def save_vocabulary(self, save_directory, filename_prefix: Optional[str] = None) -> Tuple[str]:
146
+ """
147
+ Save the vocabulary and special tokens file to a directory.
148
+
149
+ Args:
150
+ save_directory (`str`):
151
+ The directory in which to save the vocabulary.
152
+
153
+ Returns:
154
+ `Tuple(str)`: Paths to the files saved.
155
+ """
156
+ if not os.path.isdir(save_directory):
157
+ logger.error(f'Vocabulary path ({save_directory}) should be a directory')
158
+ return
159
+ out_vocab_file = os.path.join(
160
+ save_directory, (filename_prefix + '-' if filename_prefix else '') + VOCAB_FILES_NAMES['vocab_file']
161
+ )
162
+
163
+ if os.path.abspath(self.vocab_file) != os.path.abspath(out_vocab_file) and os.path.isfile(self.vocab_file):
164
+ copyfile(self.vocab_file, out_vocab_file)
165
+ elif not os.path.isfile(self.vocab_file):
166
+ with open(out_vocab_file, 'wb') as fi:
167
+ content_spiece_model = self.sp_model.serialized_model_proto()
168
+ fi.write(content_spiece_model)
169
+
170
+ return (out_vocab_file,)
171
+
172
+ def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
173
+ if self.add_bos_token:
174
+ bos_token_ids = [self.bos_token_id]
175
+ else:
176
+ bos_token_ids = []
177
+
178
+ output = bos_token_ids + token_ids_0
179
+
180
+ if token_ids_1 is not None:
181
+ output = output + token_ids_1
182
+
183
+ if self.add_eos_token:
184
+ output = output + [self.eos_token_id]
185
+
186
+ return output
187
+
188
+ def get_special_tokens_mask(
189
+ self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None, already_has_special_tokens: bool = False
190
+ ) -> List[int]:
191
+ """
192
+ Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding
193
+ special tokens using the tokenizer `prepare_for_model` method.
194
+
195
+ Args:
196
+ token_ids_0 (`List[int]`):
197
+ List of IDs.
198
+ token_ids_1 (`List[int]`, *optional*):
199
+ Optional second list of IDs for sequence pairs.
200
+ already_has_special_tokens (`bool`, *optional*, defaults to `False`):
201
+ Whether or not the token list is already formatted with special tokens for the model.
202
+
203
+ Returns:
204
+ `List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
205
+ """
206
+ if already_has_special_tokens:
207
+ return super().get_special_tokens_mask(
208
+ token_ids_0=token_ids_0, token_ids_1=token_ids_1, already_has_special_tokens=True
209
+ )
210
+
211
+ if token_ids_1 is None:
212
+ return [1] + ([0] * len(token_ids_0)) + [1]
213
+ return [1] + ([0] * len(token_ids_0)) + [1, 1] + ([0] * len(token_ids_1)) + [1]
214
+
215
+ def create_token_type_ids_from_sequences(
216
+ self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
217
+ ) -> List[int]:
218
+ """
219
+ Create a mask from the two sequences passed to be used in a sequence-pair classification task. T5 does not make
220
+ use of token type ids, therefore a list of zeros is returned.
221
+
222
+ Args:
223
+ token_ids_0 (`List[int]`):
224
+ List of IDs.
225
+ token_ids_1 (`List[int]`, *optional*):
226
+ Optional second list of IDs for sequence pairs.
227
+
228
+ Returns:
229
+ `List[int]`: List of zeros.
230
+ """
231
+ eos = [self.eos_token_id]
232
+
233
+ if token_ids_1 is None:
234
+ return len(token_ids_0 + eos) * [0]
235
+ return len(token_ids_0 + eos + token_ids_1 + eos) * [0]
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f868398fc4e05ee1e8aeba95ddf18ddcc45b8bce55d5093bead5bbf80429b48b
3
+ size 1477754
tokenizer_config.json ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<unk>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "92538": {
28
+ "content": "<|plugin|>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "92539": {
36
+ "content": "<|interpreter|>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "92540": {
44
+ "content": "<|action_end|>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "92541": {
52
+ "content": "<|action_start|>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "92542": {
60
+ "content": "<|im_end|>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "92543": {
68
+ "content": "<|im_start|>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "92544": {
76
+ "content": "<img>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "92545": {
84
+ "content": "</img>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "92546": {
92
+ "content": "<IMG_CONTEXT>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "92547": {
100
+ "content": "<quad>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "92548": {
108
+ "content": "</quad>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "92549": {
116
+ "content": "<ref>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "92550": {
124
+ "content": "</ref>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "92551": {
132
+ "content": "<box>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "92552": {
140
+ "content": "</box>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ }
147
+ },
148
+ "additional_special_tokens": [
149
+ "<|im_start|>",
150
+ "<|im_end|>",
151
+ "<|action_start|>",
152
+ "<|action_end|>",
153
+ "<|interpreter|>",
154
+ "<|plugin|>",
155
+ "<img>",
156
+ "</img>",
157
+ "<IMG_CONTEXT>",
158
+ "<quad>",
159
+ "</quad>",
160
+ "<ref>",
161
+ "</ref>",
162
+ "<box>",
163
+ "</box>"
164
+ ],
165
+ "auto_map": {
166
+ "AutoTokenizer": [
167
+ "tokenization_internlm2.InternLM2Tokenizer",
168
+ null
169
+ ]
170
+ },
171
+ "bos_token": "<s>",
172
+ "chat_template": "{{ bos_token }}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
173
+ "clean_up_tokenization_spaces": false,
174
+ "eos_token": "</s>",
175
+ "model_max_length": 4096,
176
+ "pad_token": "</s>",
177
+ "tokenizer_class": "InternLM2Tokenizer",
178
+ "unk_token": "<unk>"
179
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:032c9f31c319cc7e5d2ac259f595c0fb209b264767faee2c1f5374242184d4a3
3
+ size 6136
training_log.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ede9796883bc00dcfacaf85e69ecea8c6f5930e12f4e97c036fe60674c6f6f8
3
+ size 91495535