__init__.py
|
[feat] support qwen3 in shardformer
|
2025-07-10 13:57:52 +08:00 |
bert.py
|
[upgrade]Upgrade transformers (#6320)
|
2025-05-27 14:29:01 +08:00 |
chatglm2.py
|
[test] fix chatglm test kit (#5793)
|
2024-06-11 16:54:31 +08:00 |
command.py
|
[Feature] Zigzag Ring attention (#5905)
|
2024-08-16 13:56:38 +08:00 |
deepseek_v3.py
|
[release] update version (#6195)
|
2025-02-20 11:36:18 +08:00 |
deepseek.py
|
[misc] remove debug/print code
|
2024-08-01 10:06:59 +08:00 |
llama.py
|
[Feature] Zigzag Ring attention (#5905)
|
2024-08-16 13:56:38 +08:00 |
mistral.py
|
[Feature] Zigzag Ring attention (#5905)
|
2024-08-16 13:56:38 +08:00 |
mixtral.py
|
[Feature] MoE Ulysses Support (#5918)
|
2024-08-01 10:06:59 +08:00 |
opt.py
|
[upgrade]Upgrade transformers (#6320)
|
2025-05-27 14:29:01 +08:00 |
qwen2.py
|
[Feature] Zigzag Ring attention (#5905)
|
2024-08-16 13:56:38 +08:00 |
qwen3.py
|
[feat] support qwen3 in shardformer
|
2025-07-10 13:57:52 +08:00 |