From 2380ddc99184d1ea5ddc29a0a46d9c395b5ae8d5 Mon Sep 17 00:00:00 2001 From: ZX-ModelCloud Date: Wed, 24 Jun 2026 15:17:56 +0800 Subject: [PATCH 1/4] support llama4_text and glm4v_moe_text Signed-off-by: ZX-ModelCloud --- defuser/model_registry.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/defuser/model_registry.py b/defuser/model_registry.py index 2183aa1..beda1e2 100644 --- a/defuser/model_registry.py +++ b/defuser/model_registry.py @@ -62,8 +62,12 @@ class PATCH(str, Enum): ], "checkpoint_mapping": [ WeightRenaming(".block_sparse_moe.", ".mlp."), - WeightRenaming(r".experts.(\d+).w1.weight", r".experts.\1.gate_proj.weight"), - WeightRenaming(r".experts.(\d+).w2.weight", r".experts.\1.down_proj.weight"), + WeightRenaming( + r".experts.(\d+).w1.weight", r".experts.\1.gate_proj.weight" + ), + WeightRenaming( + r".experts.(\d+).w2.weight", r".experts.\1.down_proj.weight" + ), WeightRenaming(r".experts.(\d+).w3.weight", r".experts.\1.up_proj.weight"), WeightConverter( source_patterns=".experts.gate_up_proj", @@ -124,7 +128,7 @@ class PATCH(str, Enum): ( "transformers.models.qwen3_omni_moe.modeling_qwen3_omni_moe.Qwen3OmniMoeTalkerTextSparseMoeBlock", "defuser.modeling.unfused_moe.qwen3_omni_moe.LinearQwen3OmniMoeTalkerTextSparseMoeBlock", - ) + ), ], }, "glm4_moe": { @@ -168,6 +172,9 @@ class PATCH(str, Enum): "glm4v_moe": { "min_transformers_version": MIN_SUPPORTED_TRANSFORMERS_VERSION, }, + "glm4v_moe_text": { + "min_transformers_version": MIN_SUPPORTED_TRANSFORMERS_VERSION, + }, "glm_image": { "min_transformers_version": MIN_SUPPORTED_TRANSFORMERS_VERSION, }, @@ -210,6 +217,15 @@ class PATCH(str, Enum): } ], }, + "llama4_text": { + "min_transformers_version": MIN_SUPPORTED_TRANSFORMERS_VERSION, + PATCH.EXPERTS_DEFUSE: [ + { + "module_class": "transformers.models.llama4.modeling_llama4.Llama4TextExperts", + "forward_impl": "batched_input", + } + ], + }, "lfm2_moe": { "min_transformers_version": MIN_SUPPORTED_TRANSFORMERS_VERSION, }, From 72b5abddf12c96f6779790f4c3f811c9f9a4092a Mon Sep 17 00:00:00 2001 From: ZX-ModelCloud Date: Wed, 24 Jun 2026 15:30:38 +0800 Subject: [PATCH 2/4] update version Signed-off-by: ZX-ModelCloud --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 1bfb14c..6817940 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ build-backend = "setuptools.build_meta" [project] name = "Defuser" -version = "0.0.22" +version = "0.0.23" description = "Model defuser helper for HF Transformers." readme = "README.md" requires-python = ">=3.9" From 3e38432d49f61b18fd33882d2d74b76e4121fe33 Mon Sep 17 00:00:00 2001 From: ZX-ModelCloud Date: Wed, 24 Jun 2026 21:19:05 +0800 Subject: [PATCH 3/4] support hy_v3 Signed-off-by: ZX-ModelCloud --- defuser/model_registry.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/defuser/model_registry.py b/defuser/model_registry.py index beda1e2..8dcc26b 100644 --- a/defuser/model_registry.py +++ b/defuser/model_registry.py @@ -199,6 +199,9 @@ class PATCH(str, Enum): "hunyuan_v1_moe": { "min_transformers_version": MIN_SUPPORTED_TRANSFORMERS_VERSION, }, + "hy_v3": { + "min_transformers_version": MIN_SUPPORTED_TRANSFORMERS_VERSION, + }, "jamba": { "min_transformers_version": MIN_SUPPORTED_TRANSFORMERS_VERSION, }, From cf4e4c99a7dce123ff910304b27311e6f96a8a3a Mon Sep 17 00:00:00 2001 From: ZX-ModelCloud Date: Thu, 25 Jun 2026 19:50:26 +0800 Subject: [PATCH 4/4] support cohere2_moe Signed-off-by: ZX-ModelCloud --- defuser/model_registry.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/defuser/model_registry.py b/defuser/model_registry.py index 8dcc26b..dc9b832 100644 --- a/defuser/model_registry.py +++ b/defuser/model_registry.py @@ -16,6 +16,9 @@ class PATCH(str, Enum): MODEL_CONFIG = { + "cohere2_moe": { + "min_transformers_version": MIN_SUPPORTED_TRANSFORMERS_VERSION, + }, "dbrx": { "min_transformers_version": MIN_SUPPORTED_TRANSFORMERS_VERSION, },