[hotfix] ZeroDDP use new process group (#1333)

* process group supports getting ranks in group

* chunk mgr receives a process group

* update unit test

* fix unit tests
This commit is contained in:
ver217
2022-07-18 14:14:52 +08:00
committed by GitHub
parent 11d1436a67
commit 0c51ff2c13
9 changed files with 49 additions and 43 deletions

View File

@@ -20,13 +20,14 @@ from colossalai.tensor import ProcessGroup
def init_zero(model, use_chunk, use_zero, placement_policy):
pg = ProcessGroup()
chunk_size = ChunkManager.search_chunk_size(model, 8192, 8) if use_chunk else None
chunk_manager = ChunkManager(chunk_size,
pg,
enable_distributed_storage=use_zero,
init_device=GeminiManager.get_default_device(placement_policy))
gemini_manager = GeminiManager(placement_policy, chunk_manager)
pg = ProcessGroup()
return ZeroDDP(model, gemini_manager, pg)
return ZeroDDP(model, gemini_manager)
def run_step(model, optim, criterion, data, label):