[Shardformer] Support the Qwen2 model (#5699)

* feat: support qwen2 model * fix: modify model config and add Qwen2RMSNorm * fix qwen2 model conflicts * test: add qwen2 shard test * to: add qwen2 auto policy * support qwen model * fix the conflicts * add try catch * add transformers version for qwen2 * add the ColoAttention for the qwen2 model * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add the unit test version check * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix the test input bug * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix the version check * fix the version check --------- Co-authored-by: Wenhao Chen <cwher@outlook.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2025-09-27 04:33:04 +00:00 · 2024-05-09 20:04:25 +08:00
parent d4c5ef441e
commit a3cc68ca93
6 changed files with 1466 additions and 0 deletions
--- a/colossalai/shardformer/policies/auto_policy.py
+++ b/colossalai/shardformer/policies/auto_policy.py
@@ -182,6 +182,16 @@ _POLICY_LIST = {
    "transformers.models.mistral.modeling_mistral.MistralForSequenceClassification": PolicyLocation(
        file_name="mistral", class_name="MistralForSequenceClassificationPolicy"
    ),
+    # Qwen2
+    "transformers.models.qwen2.modeling_qwen2.Qwen2Model": PolicyLocation(
+        file_name="qwen2", class_name="Qwen2ModelPolicy"
+    ),
+    "transformers.models.qwen2.modeling_qwen2.Qwen2ForCausalLM": PolicyLocation(
+        file_name="qwen2", class_name="Qwen2ForCausalLMPolicy"
+    ),
+    "transformers.models.qwen2.modeling_qwen2.Qwen2ForSequenceClassification": PolicyLocation(
+        file_name="qwen2", class_name="Qwen2ForSequenceClassificationPolicy"
+    ),
 }