Safetensors
English
Chinese
linearmodel
ZOOM-VL-0.4B-72k / config.json
JL-er
Upload 9 files
c9db239 verified
{
"architectures": [
"RWKV7VLForConditionalGeneration"
],
"text_config":{
"a_low_rank_dim": 64,
"attn": null,
"attn_mode": "chunk",
"bos_token_id": 0,
"decay_low_rank_dim": 64,
"eos_token_id": 0,
"fuse_cross_entropy": true,
"fuse_norm": false,
"gate_low_rank_dim": 128,
"head_dim": 64,
"hidden_act": "sqrelu",
"hidden_ratio": 4.0,
"hidden_size": 1024,
"initializer_range": 0.006,
"intermediate_size": 4096,
"max_position_embeddings": 2048,
"model_type": "rwkv7_vl",
"norm_bias": true,
"norm_eps": 1e-05,
"norm_first": true,
"num_heads": 32,
"num_hidden_layers": 24,
"tie_word_embeddings": false,
"torch_dtype": "float32",
"transformers_version": "4.48.0",
"use_cache": true,
"v_low_rank_dim": 32,
"vocab_size": 65536
},
"vision_config": {
"image_size": 384,
"model_type": "siglip_vision_model"
},
"proj_config": {
"encoder_dim": 768,
"project_dim" : 1024
},
"encoder_type": "siglip2"
}