[shardformer] support pipeline for deepseek v3 and optimize lora save (#6188)

* [shardformer] support pipeline for deepseek v3

* [checkpointio] fix lora save

* [devops] update ci env

* [booster] optimize lora

* fix test

* fix test
This commit is contained in:
Hongxin Liu
2025-02-14 14:48:54 +08:00
committed by GitHub
parent ec73f1b5e2
commit 014837e725
21 changed files with 478 additions and 91 deletions

View File

@@ -156,7 +156,9 @@ def _check_for_nccl_hccl_backend(group):
while isinstance(pg, c10d._ProcessGroupWrapper):
pg = pg.wrapped_pg
return (c10d.is_nccl_available() or torch.distributed.is_hccl_available()) and pg.name() == c10d.Backend.NCCL
return (c10d.is_nccl_available() or torch.distributed.is_hccl_available()) and (
pg.name() == c10d.Backend.NCCL or pg.name() == c10d.Backend.HCCL
)
def _check_device(group):