[misc] update torch version (#6206)

* [misc] update torch version

* fix test

* fix test

* fix test

* fix test
This commit is contained in:
Hongxin Liu
2025-02-24 14:35:48 +08:00
committed by GitHub
parent b9e60559b8
commit f32861ccc5
5 changed files with 7 additions and 6 deletions

View File

@@ -1,7 +1,7 @@
from colossalai.cluster.device_mesh_manager import DeviceMeshInfo, DeviceMeshManager
from colossalai.initialize import launch
from colossalai.logging import disable_existing_loggers
from colossalai.testing import spawn
from colossalai.testing import rerun_if_address_is_in_use, spawn
def check_device_mesh_manager(rank, world_size, port):
@@ -24,6 +24,7 @@ def check_device_mesh_manager(rank, world_size, port):
assert device_mesh_with_shape._logical_mesh_id.tolist() == [[0, 1], [2, 3]]
@rerun_if_address_is_in_use()
def test_device_mesh_manager():
spawn(check_device_mesh_manager, 4)

View File

@@ -51,7 +51,7 @@ def check_forward_backward(model_fn, data_gen_fn, output_transform_fn, loss_fn,
if test_config["precision"] == "fp32":
atol, rtol = 1e-5, 1e-3
else:
atol, rtol = 5e-2, 5e-2
atol, rtol = 9e-2, 0
if (stage_manager is None or stage_manager.is_first_stage()) and booster.plugin.zero_stage == 0:
row_layer_grads = get_grad_tensors_for_check(
t5, sharded_t5, row_layer_for_check, tp_group, atol=atol, rtol=rtol, dim=0