{ "components": [ "llm", "vision_encoder", "video_encoder", "audio_encoder", "audio_decoder", "projector", "audio_projector", "cross_attention", "generator", "video_generator", "waveform_decoder", "modality_markers" ], "save_format": "components" }