diff --git a/defuser/model_registry.py b/defuser/model_registry.py index 2183aa1..dc9b832 100644 --- a/defuser/model_registry.py +++ b/defuser/model_registry.py @@ -16,6 +16,9 @@ class PATCH(str, Enum): MODEL_CONFIG = { + "cohere2_moe": { + "min_transformers_version": MIN_SUPPORTED_TRANSFORMERS_VERSION, + }, "dbrx": { "min_transformers_version": MIN_SUPPORTED_TRANSFORMERS_VERSION, }, @@ -62,8 +65,12 @@ class PATCH(str, Enum): ], "checkpoint_mapping": [ WeightRenaming(".block_sparse_moe.", ".mlp."), - WeightRenaming(r".experts.(\d+).w1.weight", r".experts.\1.gate_proj.weight"), - WeightRenaming(r".experts.(\d+).w2.weight", r".experts.\1.down_proj.weight"), + WeightRenaming( + r".experts.(\d+).w1.weight", r".experts.\1.gate_proj.weight" + ), + WeightRenaming( + r".experts.(\d+).w2.weight", r".experts.\1.down_proj.weight" + ), WeightRenaming(r".experts.(\d+).w3.weight", r".experts.\1.up_proj.weight"), WeightConverter( source_patterns=".experts.gate_up_proj", @@ -124,7 +131,7 @@ class PATCH(str, Enum): ( "transformers.models.qwen3_omni_moe.modeling_qwen3_omni_moe.Qwen3OmniMoeTalkerTextSparseMoeBlock", "defuser.modeling.unfused_moe.qwen3_omni_moe.LinearQwen3OmniMoeTalkerTextSparseMoeBlock", - ) + ), ], }, "glm4_moe": { @@ -168,6 +175,9 @@ class PATCH(str, Enum): "glm4v_moe": { "min_transformers_version": MIN_SUPPORTED_TRANSFORMERS_VERSION, }, + "glm4v_moe_text": { + "min_transformers_version": MIN_SUPPORTED_TRANSFORMERS_VERSION, + }, "glm_image": { "min_transformers_version": MIN_SUPPORTED_TRANSFORMERS_VERSION, }, @@ -192,6 +202,9 @@ class PATCH(str, Enum): "hunyuan_v1_moe": { "min_transformers_version": MIN_SUPPORTED_TRANSFORMERS_VERSION, }, + "hy_v3": { + "min_transformers_version": MIN_SUPPORTED_TRANSFORMERS_VERSION, + }, "jamba": { "min_transformers_version": MIN_SUPPORTED_TRANSFORMERS_VERSION, }, @@ -210,6 +223,15 @@ class PATCH(str, Enum): } ], }, + "llama4_text": { + "min_transformers_version": MIN_SUPPORTED_TRANSFORMERS_VERSION, + PATCH.EXPERTS_DEFUSE: [ + { + "module_class": "transformers.models.llama4.modeling_llama4.Llama4TextExperts", + "forward_impl": "batched_input", + } + ], + }, "lfm2_moe": { "min_transformers_version": MIN_SUPPORTED_TRANSFORMERS_VERSION, }, diff --git a/pyproject.toml b/pyproject.toml index 1bfb14c..6817940 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ build-backend = "setuptools.build_meta" [project] name = "Defuser" -version = "0.0.22" +version = "0.0.23" description = "Model defuser helper for HF Transformers." readme = "README.md" requires-python = ">=3.9"