mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-06 19:40:28 +00:00
[fp8] hotfix backward hook (#6053)
* [fp8] hotfix backward hook * [fp8] hotfix pipeline loss accumulation
This commit is contained in:
@@ -9,6 +9,7 @@ import os
|
||||
# https://forums.developer.nvidia.com/t/how-many-streams-maximum-number-of-streams/6571/16
|
||||
os.environ["CUDA_DEVICE_MAX_CONNECTIONS"] = "1"
|
||||
|
||||
import torch
|
||||
import torch.distributed as dist
|
||||
|
||||
from colossalai.accelerator import get_accelerator
|
||||
@@ -64,6 +65,11 @@ def launch(
|
||||
|
||||
set_seed(seed)
|
||||
|
||||
try:
|
||||
torch._dynamo.config.optimize_ddp = world_size > 1
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
if verbose:
|
||||
logger = get_dist_logger()
|
||||
logger.info(f"Distributed environment is initialized, world size: {dist.get_world_size()}", ranks=[0])
|
||||
|
Reference in New Issue
Block a user