{ "model_type": "xoron", "model_name": "Xoron-Dev-MultiMoE", "hidden_size": 1024, "num_layers": 12, "num_heads": 16, "intermediate_size": 2048, "vocab_size": 152200, "max_position_embeddings": 131072, "rms_norm_eps": 1e-06, "use_ring_attention": true, "ring_attention_chunk_size": 4096, "tie_word_embeddings": true, "use_moe": true, "num_experts": 8, "num_experts_per_tok": 2, "moe_layer_freq": 2, "use_shared_expert": true, "moe_capacity_factor": 1.25, "use_aux_lossless": true, "vision_model_name": "google/siglip-so400m-patch14-384", "freeze_vision": false, "num_vision_tokens": 64, "projector_type": "perceiver", "use_vision_dual_stream": true, "use_vision_titok": true, "num_vision_titok_tokens": 256, "num_vision_dual_stream_layers": 2, "use_video_3d_rope": true, "use_video_temporal_moe": true, "num_video_encoder_layers": 4, "num_video_experts": 4, "use_video_vidtok": true, "vidtok_latent_channels": 4, "vidtok_temporal_compression": 4, "vidtok_spatial_compression": 8, "vidtok_causal": true, "vidtok_use_fsq": false, "use_video_titok": true, "num_video_titok_tokens": 64, "num_video_titok_layers": 2, "num_video_titok_heads": 8, "video_titok_dropout": 0.1, "use_multi_scale": true, "use_continuous_scale": true, "image_min_size": 128, "image_max_size": 384, "image_base_size": 256, "image_size_step": 32, "video_min_size": 128, "video_max_size": 320, "video_base_size": 320, "video_size_step": 32, "video_min_frames": 8, "video_max_frames": 8, "video_base_frames": 16, "video_frame_step": 4, "multi_scale_strategy": "adaptive", "multi_scale_warmup_epochs": 3, "adaptive_scale_oom_penalty": 0.5, "adaptive_scale_success_boost": 0.1, "generation_supported_sizes": [ 192, 256, 320, 384 ], "generation_supported_frames": [ 8, 12, 16, 20, 24 ], "enable_generation": true, "generation_latent_channels": 4, "generation_base_channels": 128, "generation_inference_steps": 50, "generation_cfg_scale": 7.5, "generation_use_flow_matching": true, "generation_num_experts": 4, "generation_use_dual_stream": true, "generation_video_cfg_scale": 7.5, "generation_video_use_flow_matching": true, "generation_video_num_experts": 4, "generation_video_use_3d_rope": true, "generation_video_use_temporal_moe": true, "audio_sample_rate": 16000, "audio_n_mels": 80, "audio_max_length": 625, "audio_max_waveform_samples": 160000, "audio_num_speakers": 256, "use_raw_waveform": true, "audio_kv_lora_rank": 256, "audio_speaker_embed_dim": 256, "use_mas": true, "use_in_context_audio_prompting": true, "tokenizer_name": "Qwen/Qwen2.5-1.5B", "use_lora": true, "lora_r": 32, "lora_alpha": 64, "lora_dropout": 0.05, "lora_target_modules": [ "q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj" ], "train_lora_only": false, "use_rslora": true, "use_dora": false, "lora_plus_lr_ratio": 4.0, "use_cross_attention": true, "cross_attention_layers": 4, "cross_attention_heads": 8, "cross_attention_dropout": 0.1, "use_flash_attention": true, "output_dir": "./xoron-model", "has_audio_encoder": true, "has_audio_decoder": true, "has_waveform_decoder": true, "has_vision_encoder": true, "has_video_encoder": true, "has_generator": true, "has_video_generator": true, "has_cross_attention": true, "lora_applied": true, "architecture_version": 2, "auto_map": { "AutoConfig": "configuration_xoron.XoronConfig", "AutoModel": "modeling_xoron.XoronModel", "AutoModelForCausalLM": "modeling_xoron.XoronForCausalLM" } }