diff --git a/modeling_mixformer_sequential.py b/modeling_mixformer_sequential.py index 7c10e73..5e3db86 100644 --- a/modeling_mixformer_sequential.py +++ b/modeling_mixformer_sequential.py @@ -717,6 +717,7 @@ class MixFormerSequentialForCausalLM(MixFormerSequentialPreTrainedModel): _keys_to_ignore_on_load_missing = [""] _keys_to_ignore_on_load_unexpected = [r"layers\.\d+\.mlp.(fc_in|fc_out)\.(weight|bias)"] + _no_split_modules = ["ParallelBlock"] def __init__(self, config: MixFormerSequentialConfig) -> None: super().__init__(config)