dinov3-vit7b16-pretrain-lvd1689m / model.safetensors.index.json
backseollgi's picture
Upload folder using huggingface_hub
b09ecf5 verified
{
"metadata": {
"total_parameters": 6716035072,
"total_size": 26864140288
},
"weight_map": {
"embeddings.cls_token": "model-00001-of-00006.safetensors",
"embeddings.mask_token": "model-00001-of-00006.safetensors",
"embeddings.patch_embeddings.bias": "model-00001-of-00006.safetensors",
"embeddings.patch_embeddings.weight": "model-00001-of-00006.safetensors",
"embeddings.register_tokens": "model-00001-of-00006.safetensors",
"layer.0.attention.k_proj.weight": "model-00001-of-00006.safetensors",
"layer.0.attention.o_proj.bias": "model-00001-of-00006.safetensors",
"layer.0.attention.o_proj.weight": "model-00001-of-00006.safetensors",
"layer.0.attention.q_proj.weight": "model-00001-of-00006.safetensors",
"layer.0.attention.v_proj.weight": "model-00001-of-00006.safetensors",
"layer.0.layer_scale1.lambda1": "model-00001-of-00006.safetensors",
"layer.0.layer_scale2.lambda1": "model-00001-of-00006.safetensors",
"layer.0.mlp.down_proj.bias": "model-00001-of-00006.safetensors",
"layer.0.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
"layer.0.mlp.gate_proj.bias": "model-00001-of-00006.safetensors",
"layer.0.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
"layer.0.mlp.up_proj.bias": "model-00001-of-00006.safetensors",
"layer.0.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
"layer.0.norm1.bias": "model-00001-of-00006.safetensors",
"layer.0.norm1.weight": "model-00001-of-00006.safetensors",
"layer.0.norm2.bias": "model-00001-of-00006.safetensors",
"layer.0.norm2.weight": "model-00001-of-00006.safetensors",
"layer.1.attention.k_proj.weight": "model-00001-of-00006.safetensors",
"layer.1.attention.o_proj.bias": "model-00001-of-00006.safetensors",
"layer.1.attention.o_proj.weight": "model-00001-of-00006.safetensors",
"layer.1.attention.q_proj.weight": "model-00001-of-00006.safetensors",
"layer.1.attention.v_proj.weight": "model-00001-of-00006.safetensors",
"layer.1.layer_scale1.lambda1": "model-00001-of-00006.safetensors",
"layer.1.layer_scale2.lambda1": "model-00001-of-00006.safetensors",
"layer.1.mlp.down_proj.bias": "model-00001-of-00006.safetensors",
"layer.1.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
"layer.1.mlp.gate_proj.bias": "model-00001-of-00006.safetensors",
"layer.1.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
"layer.1.mlp.up_proj.bias": "model-00001-of-00006.safetensors",
"layer.1.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
"layer.1.norm1.bias": "model-00001-of-00006.safetensors",
"layer.1.norm1.weight": "model-00001-of-00006.safetensors",
"layer.1.norm2.bias": "model-00001-of-00006.safetensors",
"layer.1.norm2.weight": "model-00001-of-00006.safetensors",
"layer.10.attention.k_proj.weight": "model-00002-of-00006.safetensors",
"layer.10.attention.o_proj.bias": "model-00002-of-00006.safetensors",
"layer.10.attention.o_proj.weight": "model-00002-of-00006.safetensors",
"layer.10.attention.q_proj.weight": "model-00002-of-00006.safetensors",
"layer.10.attention.v_proj.weight": "model-00002-of-00006.safetensors",
"layer.10.layer_scale1.lambda1": "model-00002-of-00006.safetensors",
"layer.10.layer_scale2.lambda1": "model-00002-of-00006.safetensors",
"layer.10.mlp.down_proj.bias": "model-00002-of-00006.safetensors",
"layer.10.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
"layer.10.mlp.gate_proj.bias": "model-00002-of-00006.safetensors",
"layer.10.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
"layer.10.mlp.up_proj.bias": "model-00002-of-00006.safetensors",
"layer.10.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
"layer.10.norm1.bias": "model-00002-of-00006.safetensors",
"layer.10.norm1.weight": "model-00002-of-00006.safetensors",
"layer.10.norm2.bias": "model-00002-of-00006.safetensors",
"layer.10.norm2.weight": "model-00002-of-00006.safetensors",
"layer.11.attention.k_proj.weight": "model-00002-of-00006.safetensors",
"layer.11.attention.o_proj.bias": "model-00002-of-00006.safetensors",
"layer.11.attention.o_proj.weight": "model-00002-of-00006.safetensors",
"layer.11.attention.q_proj.weight": "model-00002-of-00006.safetensors",
"layer.11.attention.v_proj.weight": "model-00002-of-00006.safetensors",
"layer.11.layer_scale1.lambda1": "model-00002-of-00006.safetensors",
"layer.11.layer_scale2.lambda1": "model-00002-of-00006.safetensors",
"layer.11.mlp.down_proj.bias": "model-00002-of-00006.safetensors",
"layer.11.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
"layer.11.mlp.gate_proj.bias": "model-00002-of-00006.safetensors",
"layer.11.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
"layer.11.mlp.up_proj.bias": "model-00002-of-00006.safetensors",
"layer.11.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
"layer.11.norm1.bias": "model-00002-of-00006.safetensors",
"layer.11.norm1.weight": "model-00002-of-00006.safetensors",
"layer.11.norm2.bias": "model-00002-of-00006.safetensors",
"layer.11.norm2.weight": "model-00002-of-00006.safetensors",
"layer.12.attention.k_proj.weight": "model-00002-of-00006.safetensors",
"layer.12.attention.o_proj.bias": "model-00002-of-00006.safetensors",
"layer.12.attention.o_proj.weight": "model-00002-of-00006.safetensors",
"layer.12.attention.q_proj.weight": "model-00002-of-00006.safetensors",
"layer.12.attention.v_proj.weight": "model-00002-of-00006.safetensors",
"layer.12.layer_scale1.lambda1": "model-00002-of-00006.safetensors",
"layer.12.layer_scale2.lambda1": "model-00002-of-00006.safetensors",
"layer.12.mlp.down_proj.bias": "model-00002-of-00006.safetensors",
"layer.12.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
"layer.12.mlp.gate_proj.bias": "model-00002-of-00006.safetensors",
"layer.12.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
"layer.12.mlp.up_proj.bias": "model-00002-of-00006.safetensors",
"layer.12.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
"layer.12.norm1.bias": "model-00002-of-00006.safetensors",
"layer.12.norm1.weight": "model-00002-of-00006.safetensors",
"layer.12.norm2.bias": "model-00002-of-00006.safetensors",
"layer.12.norm2.weight": "model-00002-of-00006.safetensors",
"layer.13.attention.k_proj.weight": "model-00002-of-00006.safetensors",
"layer.13.attention.o_proj.bias": "model-00002-of-00006.safetensors",
"layer.13.attention.o_proj.weight": "model-00002-of-00006.safetensors",
"layer.13.attention.q_proj.weight": "model-00002-of-00006.safetensors",
"layer.13.attention.v_proj.weight": "model-00002-of-00006.safetensors",
"layer.13.layer_scale1.lambda1": "model-00002-of-00006.safetensors",
"layer.13.layer_scale2.lambda1": "model-00002-of-00006.safetensors",
"layer.13.mlp.down_proj.bias": "model-00002-of-00006.safetensors",
"layer.13.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
"layer.13.mlp.gate_proj.bias": "model-00002-of-00006.safetensors",
"layer.13.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
"layer.13.mlp.up_proj.bias": "model-00002-of-00006.safetensors",
"layer.13.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
"layer.13.norm1.bias": "model-00002-of-00006.safetensors",
"layer.13.norm1.weight": "model-00002-of-00006.safetensors",
"layer.13.norm2.bias": "model-00002-of-00006.safetensors",
"layer.13.norm2.weight": "model-00002-of-00006.safetensors",
"layer.14.attention.k_proj.weight": "model-00002-of-00006.safetensors",
"layer.14.attention.o_proj.bias": "model-00002-of-00006.safetensors",
"layer.14.attention.o_proj.weight": "model-00002-of-00006.safetensors",
"layer.14.attention.q_proj.weight": "model-00002-of-00006.safetensors",
"layer.14.attention.v_proj.weight": "model-00002-of-00006.safetensors",
"layer.14.layer_scale1.lambda1": "model-00002-of-00006.safetensors",
"layer.14.layer_scale2.lambda1": "model-00003-of-00006.safetensors",
"layer.14.mlp.down_proj.bias": "model-00003-of-00006.safetensors",
"layer.14.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
"layer.14.mlp.gate_proj.bias": "model-00002-of-00006.safetensors",
"layer.14.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
"layer.14.mlp.up_proj.bias": "model-00002-of-00006.safetensors",
"layer.14.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
"layer.14.norm1.bias": "model-00002-of-00006.safetensors",
"layer.14.norm1.weight": "model-00002-of-00006.safetensors",
"layer.14.norm2.bias": "model-00002-of-00006.safetensors",
"layer.14.norm2.weight": "model-00002-of-00006.safetensors",
"layer.15.attention.k_proj.weight": "model-00003-of-00006.safetensors",
"layer.15.attention.o_proj.bias": "model-00003-of-00006.safetensors",
"layer.15.attention.o_proj.weight": "model-00003-of-00006.safetensors",
"layer.15.attention.q_proj.weight": "model-00003-of-00006.safetensors",
"layer.15.attention.v_proj.weight": "model-00003-of-00006.safetensors",
"layer.15.layer_scale1.lambda1": "model-00003-of-00006.safetensors",
"layer.15.layer_scale2.lambda1": "model-00003-of-00006.safetensors",
"layer.15.mlp.down_proj.bias": "model-00003-of-00006.safetensors",
"layer.15.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
"layer.15.mlp.gate_proj.bias": "model-00003-of-00006.safetensors",
"layer.15.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
"layer.15.mlp.up_proj.bias": "model-00003-of-00006.safetensors",
"layer.15.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
"layer.15.norm1.bias": "model-00003-of-00006.safetensors",
"layer.15.norm1.weight": "model-00003-of-00006.safetensors",
"layer.15.norm2.bias": "model-00003-of-00006.safetensors",
"layer.15.norm2.weight": "model-00003-of-00006.safetensors",
"layer.16.attention.k_proj.weight": "model-00003-of-00006.safetensors",
"layer.16.attention.o_proj.bias": "model-00003-of-00006.safetensors",
"layer.16.attention.o_proj.weight": "model-00003-of-00006.safetensors",
"layer.16.attention.q_proj.weight": "model-00003-of-00006.safetensors",
"layer.16.attention.v_proj.weight": "model-00003-of-00006.safetensors",
"layer.16.layer_scale1.lambda1": "model-00003-of-00006.safetensors",
"layer.16.layer_scale2.lambda1": "model-00003-of-00006.safetensors",
"layer.16.mlp.down_proj.bias": "model-00003-of-00006.safetensors",
"layer.16.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
"layer.16.mlp.gate_proj.bias": "model-00003-of-00006.safetensors",
"layer.16.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
"layer.16.mlp.up_proj.bias": "model-00003-of-00006.safetensors",
"layer.16.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
"layer.16.norm1.bias": "model-00003-of-00006.safetensors",
"layer.16.norm1.weight": "model-00003-of-00006.safetensors",
"layer.16.norm2.bias": "model-00003-of-00006.safetensors",
"layer.16.norm2.weight": "model-00003-of-00006.safetensors",
"layer.17.attention.k_proj.weight": "model-00003-of-00006.safetensors",
"layer.17.attention.o_proj.bias": "model-00003-of-00006.safetensors",
"layer.17.attention.o_proj.weight": "model-00003-of-00006.safetensors",
"layer.17.attention.q_proj.weight": "model-00003-of-00006.safetensors",
"layer.17.attention.v_proj.weight": "model-00003-of-00006.safetensors",
"layer.17.layer_scale1.lambda1": "model-00003-of-00006.safetensors",
"layer.17.layer_scale2.lambda1": "model-00003-of-00006.safetensors",
"layer.17.mlp.down_proj.bias": "model-00003-of-00006.safetensors",
"layer.17.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
"layer.17.mlp.gate_proj.bias": "model-00003-of-00006.safetensors",
"layer.17.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
"layer.17.mlp.up_proj.bias": "model-00003-of-00006.safetensors",
"layer.17.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
"layer.17.norm1.bias": "model-00003-of-00006.safetensors",
"layer.17.norm1.weight": "model-00003-of-00006.safetensors",
"layer.17.norm2.bias": "model-00003-of-00006.safetensors",
"layer.17.norm2.weight": "model-00003-of-00006.safetensors",
"layer.18.attention.k_proj.weight": "model-00003-of-00006.safetensors",
"layer.18.attention.o_proj.bias": "model-00003-of-00006.safetensors",
"layer.18.attention.o_proj.weight": "model-00003-of-00006.safetensors",
"layer.18.attention.q_proj.weight": "model-00003-of-00006.safetensors",
"layer.18.attention.v_proj.weight": "model-00003-of-00006.safetensors",
"layer.18.layer_scale1.lambda1": "model-00003-of-00006.safetensors",
"layer.18.layer_scale2.lambda1": "model-00003-of-00006.safetensors",
"layer.18.mlp.down_proj.bias": "model-00003-of-00006.safetensors",
"layer.18.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
"layer.18.mlp.gate_proj.bias": "model-00003-of-00006.safetensors",
"layer.18.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
"layer.18.mlp.up_proj.bias": "model-00003-of-00006.safetensors",
"layer.18.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
"layer.18.norm1.bias": "model-00003-of-00006.safetensors",
"layer.18.norm1.weight": "model-00003-of-00006.safetensors",
"layer.18.norm2.bias": "model-00003-of-00006.safetensors",
"layer.18.norm2.weight": "model-00003-of-00006.safetensors",
"layer.19.attention.k_proj.weight": "model-00003-of-00006.safetensors",
"layer.19.attention.o_proj.bias": "model-00003-of-00006.safetensors",
"layer.19.attention.o_proj.weight": "model-00003-of-00006.safetensors",
"layer.19.attention.q_proj.weight": "model-00003-of-00006.safetensors",
"layer.19.attention.v_proj.weight": "model-00003-of-00006.safetensors",
"layer.19.layer_scale1.lambda1": "model-00003-of-00006.safetensors",
"layer.19.layer_scale2.lambda1": "model-00003-of-00006.safetensors",
"layer.19.mlp.down_proj.bias": "model-00003-of-00006.safetensors",
"layer.19.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
"layer.19.mlp.gate_proj.bias": "model-00003-of-00006.safetensors",
"layer.19.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
"layer.19.mlp.up_proj.bias": "model-00003-of-00006.safetensors",
"layer.19.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
"layer.19.norm1.bias": "model-00003-of-00006.safetensors",
"layer.19.norm1.weight": "model-00003-of-00006.safetensors",
"layer.19.norm2.bias": "model-00003-of-00006.safetensors",
"layer.19.norm2.weight": "model-00003-of-00006.safetensors",
"layer.2.attention.k_proj.weight": "model-00001-of-00006.safetensors",
"layer.2.attention.o_proj.bias": "model-00001-of-00006.safetensors",
"layer.2.attention.o_proj.weight": "model-00001-of-00006.safetensors",
"layer.2.attention.q_proj.weight": "model-00001-of-00006.safetensors",
"layer.2.attention.v_proj.weight": "model-00001-of-00006.safetensors",
"layer.2.layer_scale1.lambda1": "model-00001-of-00006.safetensors",
"layer.2.layer_scale2.lambda1": "model-00001-of-00006.safetensors",
"layer.2.mlp.down_proj.bias": "model-00001-of-00006.safetensors",
"layer.2.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
"layer.2.mlp.gate_proj.bias": "model-00001-of-00006.safetensors",
"layer.2.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
"layer.2.mlp.up_proj.bias": "model-00001-of-00006.safetensors",
"layer.2.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
"layer.2.norm1.bias": "model-00001-of-00006.safetensors",
"layer.2.norm1.weight": "model-00001-of-00006.safetensors",
"layer.2.norm2.bias": "model-00001-of-00006.safetensors",
"layer.2.norm2.weight": "model-00001-of-00006.safetensors",
"layer.20.attention.k_proj.weight": "model-00003-of-00006.safetensors",
"layer.20.attention.o_proj.bias": "model-00003-of-00006.safetensors",
"layer.20.attention.o_proj.weight": "model-00003-of-00006.safetensors",
"layer.20.attention.q_proj.weight": "model-00003-of-00006.safetensors",
"layer.20.attention.v_proj.weight": "model-00003-of-00006.safetensors",
"layer.20.layer_scale1.lambda1": "model-00003-of-00006.safetensors",
"layer.20.layer_scale2.lambda1": "model-00003-of-00006.safetensors",
"layer.20.mlp.down_proj.bias": "model-00003-of-00006.safetensors",
"layer.20.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
"layer.20.mlp.gate_proj.bias": "model-00003-of-00006.safetensors",
"layer.20.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
"layer.20.mlp.up_proj.bias": "model-00003-of-00006.safetensors",
"layer.20.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
"layer.20.norm1.bias": "model-00003-of-00006.safetensors",
"layer.20.norm1.weight": "model-00003-of-00006.safetensors",
"layer.20.norm2.bias": "model-00003-of-00006.safetensors",
"layer.20.norm2.weight": "model-00003-of-00006.safetensors",
"layer.21.attention.k_proj.weight": "model-00003-of-00006.safetensors",
"layer.21.attention.o_proj.bias": "model-00003-of-00006.safetensors",
"layer.21.attention.o_proj.weight": "model-00003-of-00006.safetensors",
"layer.21.attention.q_proj.weight": "model-00003-of-00006.safetensors",
"layer.21.attention.v_proj.weight": "model-00003-of-00006.safetensors",
"layer.21.layer_scale1.lambda1": "model-00003-of-00006.safetensors",
"layer.21.layer_scale2.lambda1": "model-00003-of-00006.safetensors",
"layer.21.mlp.down_proj.bias": "model-00003-of-00006.safetensors",
"layer.21.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
"layer.21.mlp.gate_proj.bias": "model-00003-of-00006.safetensors",
"layer.21.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
"layer.21.mlp.up_proj.bias": "model-00003-of-00006.safetensors",
"layer.21.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
"layer.21.norm1.bias": "model-00003-of-00006.safetensors",
"layer.21.norm1.weight": "model-00003-of-00006.safetensors",
"layer.21.norm2.bias": "model-00003-of-00006.safetensors",
"layer.21.norm2.weight": "model-00003-of-00006.safetensors",
"layer.22.attention.k_proj.weight": "model-00003-of-00006.safetensors",
"layer.22.attention.o_proj.bias": "model-00004-of-00006.safetensors",
"layer.22.attention.o_proj.weight": "model-00004-of-00006.safetensors",
"layer.22.attention.q_proj.weight": "model-00004-of-00006.safetensors",
"layer.22.attention.v_proj.weight": "model-00003-of-00006.safetensors",
"layer.22.layer_scale1.lambda1": "model-00004-of-00006.safetensors",
"layer.22.layer_scale2.lambda1": "model-00004-of-00006.safetensors",
"layer.22.mlp.down_proj.bias": "model-00004-of-00006.safetensors",
"layer.22.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
"layer.22.mlp.gate_proj.bias": "model-00004-of-00006.safetensors",
"layer.22.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
"layer.22.mlp.up_proj.bias": "model-00004-of-00006.safetensors",
"layer.22.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
"layer.22.norm1.bias": "model-00003-of-00006.safetensors",
"layer.22.norm1.weight": "model-00003-of-00006.safetensors",
"layer.22.norm2.bias": "model-00004-of-00006.safetensors",
"layer.22.norm2.weight": "model-00004-of-00006.safetensors",
"layer.23.attention.k_proj.weight": "model-00004-of-00006.safetensors",
"layer.23.attention.o_proj.bias": "model-00004-of-00006.safetensors",
"layer.23.attention.o_proj.weight": "model-00004-of-00006.safetensors",
"layer.23.attention.q_proj.weight": "model-00004-of-00006.safetensors",
"layer.23.attention.v_proj.weight": "model-00004-of-00006.safetensors",
"layer.23.layer_scale1.lambda1": "model-00004-of-00006.safetensors",
"layer.23.layer_scale2.lambda1": "model-00004-of-00006.safetensors",
"layer.23.mlp.down_proj.bias": "model-00004-of-00006.safetensors",
"layer.23.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
"layer.23.mlp.gate_proj.bias": "model-00004-of-00006.safetensors",
"layer.23.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
"layer.23.mlp.up_proj.bias": "model-00004-of-00006.safetensors",
"layer.23.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
"layer.23.norm1.bias": "model-00004-of-00006.safetensors",
"layer.23.norm1.weight": "model-00004-of-00006.safetensors",
"layer.23.norm2.bias": "model-00004-of-00006.safetensors",
"layer.23.norm2.weight": "model-00004-of-00006.safetensors",
"layer.24.attention.k_proj.weight": "model-00004-of-00006.safetensors",
"layer.24.attention.o_proj.bias": "model-00004-of-00006.safetensors",
"layer.24.attention.o_proj.weight": "model-00004-of-00006.safetensors",
"layer.24.attention.q_proj.weight": "model-00004-of-00006.safetensors",
"layer.24.attention.v_proj.weight": "model-00004-of-00006.safetensors",
"layer.24.layer_scale1.lambda1": "model-00004-of-00006.safetensors",
"layer.24.layer_scale2.lambda1": "model-00004-of-00006.safetensors",
"layer.24.mlp.down_proj.bias": "model-00004-of-00006.safetensors",
"layer.24.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
"layer.24.mlp.gate_proj.bias": "model-00004-of-00006.safetensors",
"layer.24.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
"layer.24.mlp.up_proj.bias": "model-00004-of-00006.safetensors",
"layer.24.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
"layer.24.norm1.bias": "model-00004-of-00006.safetensors",
"layer.24.norm1.weight": "model-00004-of-00006.safetensors",
"layer.24.norm2.bias": "model-00004-of-00006.safetensors",
"layer.24.norm2.weight": "model-00004-of-00006.safetensors",
"layer.25.attention.k_proj.weight": "model-00004-of-00006.safetensors",
"layer.25.attention.o_proj.bias": "model-00004-of-00006.safetensors",
"layer.25.attention.o_proj.weight": "model-00004-of-00006.safetensors",
"layer.25.attention.q_proj.weight": "model-00004-of-00006.safetensors",
"layer.25.attention.v_proj.weight": "model-00004-of-00006.safetensors",
"layer.25.layer_scale1.lambda1": "model-00004-of-00006.safetensors",
"layer.25.layer_scale2.lambda1": "model-00004-of-00006.safetensors",
"layer.25.mlp.down_proj.bias": "model-00004-of-00006.safetensors",
"layer.25.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
"layer.25.mlp.gate_proj.bias": "model-00004-of-00006.safetensors",
"layer.25.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
"layer.25.mlp.up_proj.bias": "model-00004-of-00006.safetensors",
"layer.25.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
"layer.25.norm1.bias": "model-00004-of-00006.safetensors",
"layer.25.norm1.weight": "model-00004-of-00006.safetensors",
"layer.25.norm2.bias": "model-00004-of-00006.safetensors",
"layer.25.norm2.weight": "model-00004-of-00006.safetensors",
"layer.26.attention.k_proj.weight": "model-00004-of-00006.safetensors",
"layer.26.attention.o_proj.bias": "model-00004-of-00006.safetensors",
"layer.26.attention.o_proj.weight": "model-00004-of-00006.safetensors",
"layer.26.attention.q_proj.weight": "model-00004-of-00006.safetensors",
"layer.26.attention.v_proj.weight": "model-00004-of-00006.safetensors",
"layer.26.layer_scale1.lambda1": "model-00004-of-00006.safetensors",
"layer.26.layer_scale2.lambda1": "model-00004-of-00006.safetensors",
"layer.26.mlp.down_proj.bias": "model-00004-of-00006.safetensors",
"layer.26.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
"layer.26.mlp.gate_proj.bias": "model-00004-of-00006.safetensors",
"layer.26.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
"layer.26.mlp.up_proj.bias": "model-00004-of-00006.safetensors",
"layer.26.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
"layer.26.norm1.bias": "model-00004-of-00006.safetensors",
"layer.26.norm1.weight": "model-00004-of-00006.safetensors",
"layer.26.norm2.bias": "model-00004-of-00006.safetensors",
"layer.26.norm2.weight": "model-00004-of-00006.safetensors",
"layer.27.attention.k_proj.weight": "model-00004-of-00006.safetensors",
"layer.27.attention.o_proj.bias": "model-00004-of-00006.safetensors",
"layer.27.attention.o_proj.weight": "model-00004-of-00006.safetensors",
"layer.27.attention.q_proj.weight": "model-00004-of-00006.safetensors",
"layer.27.attention.v_proj.weight": "model-00004-of-00006.safetensors",
"layer.27.layer_scale1.lambda1": "model-00004-of-00006.safetensors",
"layer.27.layer_scale2.lambda1": "model-00004-of-00006.safetensors",
"layer.27.mlp.down_proj.bias": "model-00004-of-00006.safetensors",
"layer.27.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
"layer.27.mlp.gate_proj.bias": "model-00004-of-00006.safetensors",
"layer.27.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
"layer.27.mlp.up_proj.bias": "model-00004-of-00006.safetensors",
"layer.27.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
"layer.27.norm1.bias": "model-00004-of-00006.safetensors",
"layer.27.norm1.weight": "model-00004-of-00006.safetensors",
"layer.27.norm2.bias": "model-00004-of-00006.safetensors",
"layer.27.norm2.weight": "model-00004-of-00006.safetensors",
"layer.28.attention.k_proj.weight": "model-00004-of-00006.safetensors",
"layer.28.attention.o_proj.bias": "model-00004-of-00006.safetensors",
"layer.28.attention.o_proj.weight": "model-00004-of-00006.safetensors",
"layer.28.attention.q_proj.weight": "model-00004-of-00006.safetensors",
"layer.28.attention.v_proj.weight": "model-00004-of-00006.safetensors",
"layer.28.layer_scale1.lambda1": "model-00004-of-00006.safetensors",
"layer.28.layer_scale2.lambda1": "model-00004-of-00006.safetensors",
"layer.28.mlp.down_proj.bias": "model-00004-of-00006.safetensors",
"layer.28.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
"layer.28.mlp.gate_proj.bias": "model-00004-of-00006.safetensors",
"layer.28.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
"layer.28.mlp.up_proj.bias": "model-00004-of-00006.safetensors",
"layer.28.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
"layer.28.norm1.bias": "model-00004-of-00006.safetensors",
"layer.28.norm1.weight": "model-00004-of-00006.safetensors",
"layer.28.norm2.bias": "model-00004-of-00006.safetensors",
"layer.28.norm2.weight": "model-00004-of-00006.safetensors",
"layer.29.attention.k_proj.weight": "model-00004-of-00006.safetensors",
"layer.29.attention.o_proj.bias": "model-00004-of-00006.safetensors",
"layer.29.attention.o_proj.weight": "model-00004-of-00006.safetensors",
"layer.29.attention.q_proj.weight": "model-00004-of-00006.safetensors",
"layer.29.attention.v_proj.weight": "model-00004-of-00006.safetensors",
"layer.29.layer_scale1.lambda1": "model-00004-of-00006.safetensors",
"layer.29.layer_scale2.lambda1": "model-00005-of-00006.safetensors",
"layer.29.mlp.down_proj.bias": "model-00005-of-00006.safetensors",
"layer.29.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
"layer.29.mlp.gate_proj.bias": "model-00004-of-00006.safetensors",
"layer.29.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
"layer.29.mlp.up_proj.bias": "model-00005-of-00006.safetensors",
"layer.29.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
"layer.29.norm1.bias": "model-00004-of-00006.safetensors",
"layer.29.norm1.weight": "model-00004-of-00006.safetensors",
"layer.29.norm2.bias": "model-00004-of-00006.safetensors",
"layer.29.norm2.weight": "model-00004-of-00006.safetensors",
"layer.3.attention.k_proj.weight": "model-00001-of-00006.safetensors",
"layer.3.attention.o_proj.bias": "model-00001-of-00006.safetensors",
"layer.3.attention.o_proj.weight": "model-00001-of-00006.safetensors",
"layer.3.attention.q_proj.weight": "model-00001-of-00006.safetensors",
"layer.3.attention.v_proj.weight": "model-00001-of-00006.safetensors",
"layer.3.layer_scale1.lambda1": "model-00001-of-00006.safetensors",
"layer.3.layer_scale2.lambda1": "model-00001-of-00006.safetensors",
"layer.3.mlp.down_proj.bias": "model-00001-of-00006.safetensors",
"layer.3.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
"layer.3.mlp.gate_proj.bias": "model-00001-of-00006.safetensors",
"layer.3.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
"layer.3.mlp.up_proj.bias": "model-00001-of-00006.safetensors",
"layer.3.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
"layer.3.norm1.bias": "model-00001-of-00006.safetensors",
"layer.3.norm1.weight": "model-00001-of-00006.safetensors",
"layer.3.norm2.bias": "model-00001-of-00006.safetensors",
"layer.3.norm2.weight": "model-00001-of-00006.safetensors",
"layer.30.attention.k_proj.weight": "model-00005-of-00006.safetensors",
"layer.30.attention.o_proj.bias": "model-00005-of-00006.safetensors",
"layer.30.attention.o_proj.weight": "model-00005-of-00006.safetensors",
"layer.30.attention.q_proj.weight": "model-00005-of-00006.safetensors",
"layer.30.attention.v_proj.weight": "model-00005-of-00006.safetensors",
"layer.30.layer_scale1.lambda1": "model-00005-of-00006.safetensors",
"layer.30.layer_scale2.lambda1": "model-00005-of-00006.safetensors",
"layer.30.mlp.down_proj.bias": "model-00005-of-00006.safetensors",
"layer.30.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
"layer.30.mlp.gate_proj.bias": "model-00005-of-00006.safetensors",
"layer.30.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
"layer.30.mlp.up_proj.bias": "model-00005-of-00006.safetensors",
"layer.30.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
"layer.30.norm1.bias": "model-00005-of-00006.safetensors",
"layer.30.norm1.weight": "model-00005-of-00006.safetensors",
"layer.30.norm2.bias": "model-00005-of-00006.safetensors",
"layer.30.norm2.weight": "model-00005-of-00006.safetensors",
"layer.31.attention.k_proj.weight": "model-00005-of-00006.safetensors",
"layer.31.attention.o_proj.bias": "model-00005-of-00006.safetensors",
"layer.31.attention.o_proj.weight": "model-00005-of-00006.safetensors",
"layer.31.attention.q_proj.weight": "model-00005-of-00006.safetensors",
"layer.31.attention.v_proj.weight": "model-00005-of-00006.safetensors",
"layer.31.layer_scale1.lambda1": "model-00005-of-00006.safetensors",
"layer.31.layer_scale2.lambda1": "model-00005-of-00006.safetensors",
"layer.31.mlp.down_proj.bias": "model-00005-of-00006.safetensors",
"layer.31.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
"layer.31.mlp.gate_proj.bias": "model-00005-of-00006.safetensors",
"layer.31.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
"layer.31.mlp.up_proj.bias": "model-00005-of-00006.safetensors",
"layer.31.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
"layer.31.norm1.bias": "model-00005-of-00006.safetensors",
"layer.31.norm1.weight": "model-00005-of-00006.safetensors",
"layer.31.norm2.bias": "model-00005-of-00006.safetensors",
"layer.31.norm2.weight": "model-00005-of-00006.safetensors",
"layer.32.attention.k_proj.weight": "model-00005-of-00006.safetensors",
"layer.32.attention.o_proj.bias": "model-00005-of-00006.safetensors",
"layer.32.attention.o_proj.weight": "model-00005-of-00006.safetensors",
"layer.32.attention.q_proj.weight": "model-00005-of-00006.safetensors",
"layer.32.attention.v_proj.weight": "model-00005-of-00006.safetensors",
"layer.32.layer_scale1.lambda1": "model-00005-of-00006.safetensors",
"layer.32.layer_scale2.lambda1": "model-00005-of-00006.safetensors",
"layer.32.mlp.down_proj.bias": "model-00005-of-00006.safetensors",
"layer.32.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
"layer.32.mlp.gate_proj.bias": "model-00005-of-00006.safetensors",
"layer.32.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
"layer.32.mlp.up_proj.bias": "model-00005-of-00006.safetensors",
"layer.32.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
"layer.32.norm1.bias": "model-00005-of-00006.safetensors",
"layer.32.norm1.weight": "model-00005-of-00006.safetensors",
"layer.32.norm2.bias": "model-00005-of-00006.safetensors",
"layer.32.norm2.weight": "model-00005-of-00006.safetensors",
"layer.33.attention.k_proj.weight": "model-00005-of-00006.safetensors",
"layer.33.attention.o_proj.bias": "model-00005-of-00006.safetensors",
"layer.33.attention.o_proj.weight": "model-00005-of-00006.safetensors",
"layer.33.attention.q_proj.weight": "model-00005-of-00006.safetensors",
"layer.33.attention.v_proj.weight": "model-00005-of-00006.safetensors",
"layer.33.layer_scale1.lambda1": "model-00005-of-00006.safetensors",
"layer.33.layer_scale2.lambda1": "model-00005-of-00006.safetensors",
"layer.33.mlp.down_proj.bias": "model-00005-of-00006.safetensors",
"layer.33.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
"layer.33.mlp.gate_proj.bias": "model-00005-of-00006.safetensors",
"layer.33.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
"layer.33.mlp.up_proj.bias": "model-00005-of-00006.safetensors",
"layer.33.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
"layer.33.norm1.bias": "model-00005-of-00006.safetensors",
"layer.33.norm1.weight": "model-00005-of-00006.safetensors",
"layer.33.norm2.bias": "model-00005-of-00006.safetensors",
"layer.33.norm2.weight": "model-00005-of-00006.safetensors",
"layer.34.attention.k_proj.weight": "model-00005-of-00006.safetensors",
"layer.34.attention.o_proj.bias": "model-00005-of-00006.safetensors",
"layer.34.attention.o_proj.weight": "model-00005-of-00006.safetensors",
"layer.34.attention.q_proj.weight": "model-00005-of-00006.safetensors",
"layer.34.attention.v_proj.weight": "model-00005-of-00006.safetensors",
"layer.34.layer_scale1.lambda1": "model-00005-of-00006.safetensors",
"layer.34.layer_scale2.lambda1": "model-00005-of-00006.safetensors",
"layer.34.mlp.down_proj.bias": "model-00005-of-00006.safetensors",
"layer.34.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
"layer.34.mlp.gate_proj.bias": "model-00005-of-00006.safetensors",
"layer.34.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
"layer.34.mlp.up_proj.bias": "model-00005-of-00006.safetensors",
"layer.34.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
"layer.34.norm1.bias": "model-00005-of-00006.safetensors",
"layer.34.norm1.weight": "model-00005-of-00006.safetensors",
"layer.34.norm2.bias": "model-00005-of-00006.safetensors",
"layer.34.norm2.weight": "model-00005-of-00006.safetensors",
"layer.35.attention.k_proj.weight": "model-00005-of-00006.safetensors",
"layer.35.attention.o_proj.bias": "model-00005-of-00006.safetensors",
"layer.35.attention.o_proj.weight": "model-00005-of-00006.safetensors",
"layer.35.attention.q_proj.weight": "model-00005-of-00006.safetensors",
"layer.35.attention.v_proj.weight": "model-00005-of-00006.safetensors",
"layer.35.layer_scale1.lambda1": "model-00005-of-00006.safetensors",
"layer.35.layer_scale2.lambda1": "model-00005-of-00006.safetensors",
"layer.35.mlp.down_proj.bias": "model-00005-of-00006.safetensors",
"layer.35.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
"layer.35.mlp.gate_proj.bias": "model-00005-of-00006.safetensors",
"layer.35.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
"layer.35.mlp.up_proj.bias": "model-00005-of-00006.safetensors",
"layer.35.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
"layer.35.norm1.bias": "model-00005-of-00006.safetensors",
"layer.35.norm1.weight": "model-00005-of-00006.safetensors",
"layer.35.norm2.bias": "model-00005-of-00006.safetensors",
"layer.35.norm2.weight": "model-00005-of-00006.safetensors",
"layer.36.attention.k_proj.weight": "model-00005-of-00006.safetensors",
"layer.36.attention.o_proj.bias": "model-00005-of-00006.safetensors",
"layer.36.attention.o_proj.weight": "model-00005-of-00006.safetensors",
"layer.36.attention.q_proj.weight": "model-00005-of-00006.safetensors",
"layer.36.attention.v_proj.weight": "model-00005-of-00006.safetensors",
"layer.36.layer_scale1.lambda1": "model-00005-of-00006.safetensors",
"layer.36.layer_scale2.lambda1": "model-00005-of-00006.safetensors",
"layer.36.mlp.down_proj.bias": "model-00005-of-00006.safetensors",
"layer.36.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
"layer.36.mlp.gate_proj.bias": "model-00005-of-00006.safetensors",
"layer.36.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
"layer.36.mlp.up_proj.bias": "model-00005-of-00006.safetensors",
"layer.36.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
"layer.36.norm1.bias": "model-00005-of-00006.safetensors",
"layer.36.norm1.weight": "model-00005-of-00006.safetensors",
"layer.36.norm2.bias": "model-00005-of-00006.safetensors",
"layer.36.norm2.weight": "model-00005-of-00006.safetensors",
"layer.37.attention.k_proj.weight": "model-00006-of-00006.safetensors",
"layer.37.attention.o_proj.bias": "model-00006-of-00006.safetensors",
"layer.37.attention.o_proj.weight": "model-00006-of-00006.safetensors",
"layer.37.attention.q_proj.weight": "model-00006-of-00006.safetensors",
"layer.37.attention.v_proj.weight": "model-00006-of-00006.safetensors",
"layer.37.layer_scale1.lambda1": "model-00006-of-00006.safetensors",
"layer.37.layer_scale2.lambda1": "model-00006-of-00006.safetensors",
"layer.37.mlp.down_proj.bias": "model-00006-of-00006.safetensors",
"layer.37.mlp.down_proj.weight": "model-00006-of-00006.safetensors",
"layer.37.mlp.gate_proj.bias": "model-00006-of-00006.safetensors",
"layer.37.mlp.gate_proj.weight": "model-00006-of-00006.safetensors",
"layer.37.mlp.up_proj.bias": "model-00006-of-00006.safetensors",
"layer.37.mlp.up_proj.weight": "model-00006-of-00006.safetensors",
"layer.37.norm1.bias": "model-00005-of-00006.safetensors",
"layer.37.norm1.weight": "model-00005-of-00006.safetensors",
"layer.37.norm2.bias": "model-00006-of-00006.safetensors",
"layer.37.norm2.weight": "model-00006-of-00006.safetensors",
"layer.38.attention.k_proj.weight": "model-00006-of-00006.safetensors",
"layer.38.attention.o_proj.bias": "model-00006-of-00006.safetensors",
"layer.38.attention.o_proj.weight": "model-00006-of-00006.safetensors",
"layer.38.attention.q_proj.weight": "model-00006-of-00006.safetensors",
"layer.38.attention.v_proj.weight": "model-00006-of-00006.safetensors",
"layer.38.layer_scale1.lambda1": "model-00006-of-00006.safetensors",
"layer.38.layer_scale2.lambda1": "model-00006-of-00006.safetensors",
"layer.38.mlp.down_proj.bias": "model-00006-of-00006.safetensors",
"layer.38.mlp.down_proj.weight": "model-00006-of-00006.safetensors",
"layer.38.mlp.gate_proj.bias": "model-00006-of-00006.safetensors",
"layer.38.mlp.gate_proj.weight": "model-00006-of-00006.safetensors",
"layer.38.mlp.up_proj.bias": "model-00006-of-00006.safetensors",
"layer.38.mlp.up_proj.weight": "model-00006-of-00006.safetensors",
"layer.38.norm1.bias": "model-00006-of-00006.safetensors",
"layer.38.norm1.weight": "model-00006-of-00006.safetensors",
"layer.38.norm2.bias": "model-00006-of-00006.safetensors",
"layer.38.norm2.weight": "model-00006-of-00006.safetensors",
"layer.39.attention.k_proj.weight": "model-00006-of-00006.safetensors",
"layer.39.attention.o_proj.bias": "model-00006-of-00006.safetensors",
"layer.39.attention.o_proj.weight": "model-00006-of-00006.safetensors",
"layer.39.attention.q_proj.weight": "model-00006-of-00006.safetensors",
"layer.39.attention.v_proj.weight": "model-00006-of-00006.safetensors",
"layer.39.layer_scale1.lambda1": "model-00006-of-00006.safetensors",
"layer.39.layer_scale2.lambda1": "model-00006-of-00006.safetensors",
"layer.39.mlp.down_proj.bias": "model-00006-of-00006.safetensors",
"layer.39.mlp.down_proj.weight": "model-00006-of-00006.safetensors",
"layer.39.mlp.gate_proj.bias": "model-00006-of-00006.safetensors",
"layer.39.mlp.gate_proj.weight": "model-00006-of-00006.safetensors",
"layer.39.mlp.up_proj.bias": "model-00006-of-00006.safetensors",
"layer.39.mlp.up_proj.weight": "model-00006-of-00006.safetensors",
"layer.39.norm1.bias": "model-00006-of-00006.safetensors",
"layer.39.norm1.weight": "model-00006-of-00006.safetensors",
"layer.39.norm2.bias": "model-00006-of-00006.safetensors",
"layer.39.norm2.weight": "model-00006-of-00006.safetensors",
"layer.4.attention.k_proj.weight": "model-00001-of-00006.safetensors",
"layer.4.attention.o_proj.bias": "model-00001-of-00006.safetensors",
"layer.4.attention.o_proj.weight": "model-00001-of-00006.safetensors",
"layer.4.attention.q_proj.weight": "model-00001-of-00006.safetensors",
"layer.4.attention.v_proj.weight": "model-00001-of-00006.safetensors",
"layer.4.layer_scale1.lambda1": "model-00001-of-00006.safetensors",
"layer.4.layer_scale2.lambda1": "model-00001-of-00006.safetensors",
"layer.4.mlp.down_proj.bias": "model-00001-of-00006.safetensors",
"layer.4.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
"layer.4.mlp.gate_proj.bias": "model-00001-of-00006.safetensors",
"layer.4.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
"layer.4.mlp.up_proj.bias": "model-00001-of-00006.safetensors",
"layer.4.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
"layer.4.norm1.bias": "model-00001-of-00006.safetensors",
"layer.4.norm1.weight": "model-00001-of-00006.safetensors",
"layer.4.norm2.bias": "model-00001-of-00006.safetensors",
"layer.4.norm2.weight": "model-00001-of-00006.safetensors",
"layer.5.attention.k_proj.weight": "model-00001-of-00006.safetensors",
"layer.5.attention.o_proj.bias": "model-00001-of-00006.safetensors",
"layer.5.attention.o_proj.weight": "model-00001-of-00006.safetensors",
"layer.5.attention.q_proj.weight": "model-00001-of-00006.safetensors",
"layer.5.attention.v_proj.weight": "model-00001-of-00006.safetensors",
"layer.5.layer_scale1.lambda1": "model-00001-of-00006.safetensors",
"layer.5.layer_scale2.lambda1": "model-00001-of-00006.safetensors",
"layer.5.mlp.down_proj.bias": "model-00001-of-00006.safetensors",
"layer.5.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
"layer.5.mlp.gate_proj.bias": "model-00001-of-00006.safetensors",
"layer.5.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
"layer.5.mlp.up_proj.bias": "model-00001-of-00006.safetensors",
"layer.5.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
"layer.5.norm1.bias": "model-00001-of-00006.safetensors",
"layer.5.norm1.weight": "model-00001-of-00006.safetensors",
"layer.5.norm2.bias": "model-00001-of-00006.safetensors",
"layer.5.norm2.weight": "model-00001-of-00006.safetensors",
"layer.6.attention.k_proj.weight": "model-00001-of-00006.safetensors",
"layer.6.attention.o_proj.bias": "model-00001-of-00006.safetensors",
"layer.6.attention.o_proj.weight": "model-00001-of-00006.safetensors",
"layer.6.attention.q_proj.weight": "model-00001-of-00006.safetensors",
"layer.6.attention.v_proj.weight": "model-00001-of-00006.safetensors",
"layer.6.layer_scale1.lambda1": "model-00001-of-00006.safetensors",
"layer.6.layer_scale2.lambda1": "model-00001-of-00006.safetensors",
"layer.6.mlp.down_proj.bias": "model-00001-of-00006.safetensors",
"layer.6.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
"layer.6.mlp.gate_proj.bias": "model-00001-of-00006.safetensors",
"layer.6.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
"layer.6.mlp.up_proj.bias": "model-00001-of-00006.safetensors",
"layer.6.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
"layer.6.norm1.bias": "model-00001-of-00006.safetensors",
"layer.6.norm1.weight": "model-00001-of-00006.safetensors",
"layer.6.norm2.bias": "model-00001-of-00006.safetensors",
"layer.6.norm2.weight": "model-00001-of-00006.safetensors",
"layer.7.attention.k_proj.weight": "model-00001-of-00006.safetensors",
"layer.7.attention.o_proj.bias": "model-00001-of-00006.safetensors",
"layer.7.attention.o_proj.weight": "model-00001-of-00006.safetensors",
"layer.7.attention.q_proj.weight": "model-00001-of-00006.safetensors",
"layer.7.attention.v_proj.weight": "model-00001-of-00006.safetensors",
"layer.7.layer_scale1.lambda1": "model-00001-of-00006.safetensors",
"layer.7.layer_scale2.lambda1": "model-00002-of-00006.safetensors",
"layer.7.mlp.down_proj.bias": "model-00002-of-00006.safetensors",
"layer.7.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
"layer.7.mlp.gate_proj.bias": "model-00002-of-00006.safetensors",
"layer.7.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
"layer.7.mlp.up_proj.bias": "model-00002-of-00006.safetensors",
"layer.7.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
"layer.7.norm1.bias": "model-00001-of-00006.safetensors",
"layer.7.norm1.weight": "model-00001-of-00006.safetensors",
"layer.7.norm2.bias": "model-00001-of-00006.safetensors",
"layer.7.norm2.weight": "model-00001-of-00006.safetensors",
"layer.8.attention.k_proj.weight": "model-00002-of-00006.safetensors",
"layer.8.attention.o_proj.bias": "model-00002-of-00006.safetensors",
"layer.8.attention.o_proj.weight": "model-00002-of-00006.safetensors",
"layer.8.attention.q_proj.weight": "model-00002-of-00006.safetensors",
"layer.8.attention.v_proj.weight": "model-00002-of-00006.safetensors",
"layer.8.layer_scale1.lambda1": "model-00002-of-00006.safetensors",
"layer.8.layer_scale2.lambda1": "model-00002-of-00006.safetensors",
"layer.8.mlp.down_proj.bias": "model-00002-of-00006.safetensors",
"layer.8.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
"layer.8.mlp.gate_proj.bias": "model-00002-of-00006.safetensors",
"layer.8.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
"layer.8.mlp.up_proj.bias": "model-00002-of-00006.safetensors",
"layer.8.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
"layer.8.norm1.bias": "model-00002-of-00006.safetensors",
"layer.8.norm1.weight": "model-00002-of-00006.safetensors",
"layer.8.norm2.bias": "model-00002-of-00006.safetensors",
"layer.8.norm2.weight": "model-00002-of-00006.safetensors",
"layer.9.attention.k_proj.weight": "model-00002-of-00006.safetensors",
"layer.9.attention.o_proj.bias": "model-00002-of-00006.safetensors",
"layer.9.attention.o_proj.weight": "model-00002-of-00006.safetensors",
"layer.9.attention.q_proj.weight": "model-00002-of-00006.safetensors",
"layer.9.attention.v_proj.weight": "model-00002-of-00006.safetensors",
"layer.9.layer_scale1.lambda1": "model-00002-of-00006.safetensors",
"layer.9.layer_scale2.lambda1": "model-00002-of-00006.safetensors",
"layer.9.mlp.down_proj.bias": "model-00002-of-00006.safetensors",
"layer.9.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
"layer.9.mlp.gate_proj.bias": "model-00002-of-00006.safetensors",
"layer.9.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
"layer.9.mlp.up_proj.bias": "model-00002-of-00006.safetensors",
"layer.9.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
"layer.9.norm1.bias": "model-00002-of-00006.safetensors",
"layer.9.norm1.weight": "model-00002-of-00006.safetensors",
"layer.9.norm2.bias": "model-00002-of-00006.safetensors",
"layer.9.norm2.weight": "model-00002-of-00006.safetensors",
"norm.bias": "model-00006-of-00006.safetensors",
"norm.weight": "model-00006-of-00006.safetensors"
}
}