|
|
|
|
|
|
|
|
"""
|
|
|
Shared Subspace Decoder Models
|
|
|
|
|
|
This module contains the implementation of the Shared Subspace Decoder architecture,
|
|
|
including Multi-Head Latent Attention (MLA) and decomposed MLP layers.
|
|
|
"""
|
|
|
|
|
|
from transformers import AutoConfig, AutoModel, AutoModelForCausalLM
|
|
|
|
|
|
from .configuration_shared_subspace_decoder import SharedSpaceDecoderConfig
|
|
|
from .modeling_shared_subspace_decoder import (
|
|
|
SharedSpaceDecoderPreTrainedModel,
|
|
|
SharedSpaceDecoderModel,
|
|
|
)
|
|
|
|
|
|
|
|
|
from ..layers.task_heads import SharedSpaceDecoderForCausalLM
|
|
|
|
|
|
|
|
|
AutoConfig.register("shared_subspace_decoder", SharedSpaceDecoderConfig)
|
|
|
|
|
|
|
|
|
AutoModel.register(SharedSpaceDecoderConfig, SharedSpaceDecoderModel)
|
|
|
AutoModelForCausalLM.register(SharedSpaceDecoderConfig, SharedSpaceDecoderForCausalLM)
|
|
|
|
|
|
__all__ = [
|
|
|
"SharedSpaceDecoderConfig",
|
|
|
"SharedSpaceDecoderPreTrainedModel",
|
|
|
"SharedSpaceDecoderModel",
|
|
|
"SharedSpaceDecoderForCausalLM",
|
|
|
]
|
|
|
|