[misc] update pre-commit and run all files (#4752)

* [misc] update pre-commit * [misc] run pre-commit * [misc] remove useless configuration files * [misc] ignore cuda for clang-format
2025-09-15 14:12:02 +00:00 · 2023-09-19 14:20:26 +08:00
parent 3c6b831c26
commit 079bf3cb26
1268 changed files with 50037 additions and 38444 deletions
--- a/tests/test_pipeline/test_schedule/test_interleaved.py
+++ b/tests/test_pipeline/test_schedule/test_interleaved.py
@@ -16,7 +16,6 @@ from colossalai.testing.random import seed_all


 class MlpModel(nn.Module):
-
    def __init__(self):
        super(MlpModel, self).__init__()
        self.linear1 = nn.Linear(4, 8)
@@ -40,19 +39,20 @@ class MlpModel(nn.Module):
        return x


-def pp_linear_fwd(forward,
-                  data: torch.Tensor = None,
-                  input_obj: torch.Tensor = None,
-                  stage_mgr: PipelineStageManager = None,
-                  num_chunks: int = None,
-                  model_chunk_id: int = None):
-
+def pp_linear_fwd(
+    forward,
+    data: torch.Tensor = None,
+    input_obj: torch.Tensor = None,
+    stage_mgr: PipelineStageManager = None,
+    num_chunks: int = None,
+    model_chunk_id: int = None,
+):
    if stage_mgr.is_first_stage() and model_chunk_id == 0:
-        return {'input_obj': forward(data)}
+        return {"input_obj": forward(data)}
    elif stage_mgr.is_last_stage() and model_chunk_id == num_chunks - 1:
        return forward(input_obj)
    else:
-        return {'input_obj': forward(input_obj)}
+        return {"input_obj": forward(input_obj)}


@parameterize("num_micro_batches", [4, 8, 12])
@@ -84,10 +84,11 @@ def examine_pp(num_micro_batches):
        if idx % (world_size) == local_rank:
            sub_model._forward = sub_model.forward
            sub_model.forward = MethodType(
-                partial(pp_linear_fwd,
-                        stage_mgr=stage_manager,
-                        num_chunks=NUM_CHUNKS,
-                        model_chunk_id=len(sharded_model)), sub_model._forward)
+                partial(
+                    pp_linear_fwd, stage_mgr=stage_manager, num_chunks=NUM_CHUNKS, model_chunk_id=len(sharded_model)
+                ),
+                sub_model._forward,
+            )
            sharded_model.append(sub_model.cuda())

    # create optimizer
@@ -109,16 +110,13 @@ def examine_pp(num_micro_batches):
    torch_loss = criterion(torch_output, _)
    torch_loss.backward()

-    pp_ret = schedule.forward_backward_step(sharded_model,
-                                            iter(input_list),
-                                            criterion,
-                                            pp_optimizer,
-                                            return_loss=True,
-                                            return_outputs=True)
+    pp_ret = schedule.forward_backward_step(
+        sharded_model, iter(input_list), criterion, pp_optimizer, return_loss=True, return_outputs=True
+    )

    # check loss
    if stage_manager.is_last_stage():
-        assert torch.allclose(torch_loss, pp_ret['loss'])
+        assert torch.allclose(torch_loss, pp_ret["loss"])

    # check gradients
    torch_grad = []
@@ -147,7 +145,7 @@ def examine_pp(num_micro_batches):


 def run_dist(rank, world_size, port):
-    colossalai.launch(config=dict(), rank=rank, world_size=world_size, port=port, host='localhost')
+    colossalai.launch(config=dict(), rank=rank, world_size=world_size, port=port, host="localhost")
    examine_pp()


@@ -157,5 +155,5 @@ def test_pp():
    spawn(run_dist, 4)


-if __name__ == '__main__':
+if __name__ == "__main__":
    test_pp()
--- a/tests/test_pipeline/test_schedule/test_oneF_oneB.py
+++ b/tests/test_pipeline/test_schedule/test_oneF_oneB.py
@@ -16,7 +16,6 @@ from colossalai.testing.random import seed_all


 class MlpModel(nn.Module):
-
    def __init__(self):
        super(MlpModel, self).__init__()
        self.linear1 = nn.Linear(4, 8)
@@ -28,17 +27,15 @@ class MlpModel(nn.Module):
        return x


-def pp_linear_fwd(forward,
-                  data: torch.Tensor = None,
-                  input_obj: torch.Tensor = None,
-                  stage_mgr: PipelineStageManager = None):
-
+def pp_linear_fwd(
+    forward, data: torch.Tensor = None, input_obj: torch.Tensor = None, stage_mgr: PipelineStageManager = None
+):
    if stage_mgr.is_first_stage():
-        return {'input_obj': forward(data)}
+        return {"input_obj": forward(data)}
    elif stage_mgr.is_last_stage():
        return forward(input_obj)
    else:
-        return {'input_obj': forward(input_obj)}
+        return {"input_obj": forward(input_obj)}


 def examine_pp():
@@ -89,16 +86,13 @@ def examine_pp():
    torch_loss = criterion(torch_output, _)
    torch_loss.backward()

-    pp_ret = schedule.forward_backward_step(sharded_model,
-                                            iter(input_list),
-                                            criterion,
-                                            pp_optimizer,
-                                            return_loss=True,
-                                            return_outputs=True)
+    pp_ret = schedule.forward_backward_step(
+        sharded_model, iter(input_list), criterion, pp_optimizer, return_loss=True, return_outputs=True
+    )

    # check loss
    if stage_manager.is_last_stage():
-        assert torch.allclose(torch_loss, pp_ret['loss'])
+        assert torch.allclose(torch_loss, pp_ret["loss"])

    # check gradients
    torch_grad = []
@@ -120,7 +114,7 @@ def examine_pp():


 def run_dist(rank, world_size, port):
-    colossalai.launch(config=dict(), rank=rank, world_size=world_size, port=port, host='localhost')
+    colossalai.launch(config=dict(), rank=rank, world_size=world_size, port=port, host="localhost")
    examine_pp()


@@ -130,5 +124,5 @@ def test_pp():
    spawn(run_dist, 2)


-if __name__ == '__main__':
+if __name__ == "__main__":
    test_pp()
--- a/tests/test_pipeline/test_schedule/test_pipeline_schedule_utils.py
+++ b/tests/test_pipeline/test_schedule/test_pipeline_schedule_utils.py
@@ -8,9 +8,9 @@ def test_get_batch_size():
    assert get_batch_size(tensor) == 2
    assert get_batch_size([tensor]) == 2
    assert get_batch_size((1, tensor)) == 2
-    assert get_batch_size({'tensor': tensor}) == 2
-    assert get_batch_size({'dummy': [1], 'tensor': tensor}) == 2
-    assert get_batch_size({'tensor': [tensor]}) == 2
+    assert get_batch_size({"tensor": tensor}) == 2
+    assert get_batch_size({"dummy": [1], "tensor": tensor}) == 2
+    assert get_batch_size({"tensor": [tensor]}) == 2


 def test_get_micro_batch():
@@ -26,12 +26,12 @@ def test_get_micro_batch():
    micro_batch = get_micro_batch([x, y], 1, 1)
    assert torch.equal(micro_batch[0], x[1:2])
    assert torch.equal(micro_batch[1], y[1:2])
-    micro_batch = get_micro_batch({'x': x, 'y': y}, 0, 1)
-    assert torch.equal(micro_batch['x'], x[0:1])
-    assert torch.equal(micro_batch['y'], y[0:1])
-    micro_batch = get_micro_batch({'x': x, 'y': y}, 1, 1)
-    assert torch.equal(micro_batch['x'], x[1:2])
-    assert torch.equal(micro_batch['y'], y[1:2])
+    micro_batch = get_micro_batch({"x": x, "y": y}, 0, 1)
+    assert torch.equal(micro_batch["x"], x[0:1])
+    assert torch.equal(micro_batch["y"], y[0:1])
+    micro_batch = get_micro_batch({"x": x, "y": y}, 1, 1)
+    assert torch.equal(micro_batch["x"], x[1:2])
+    assert torch.equal(micro_batch["y"], y[1:2])


 def test_merge_batch():
@@ -42,6 +42,6 @@ def test_merge_batch():
    merged = merge_batch([[x[0:1], y[0:1]], [x[1:2], y[1:2]]])
    assert torch.equal(merged[0], x)
    assert torch.equal(merged[1], y)
-    merged = merge_batch([{'x': x[0:1], 'y': y[0:1]}, {'x': x[1:2], 'y': y[1:2]}])
-    assert torch.equal(merged['x'], x)
-    assert torch.equal(merged['y'], y)
+    merged = merge_batch([{"x": x[0:1], "y": y[0:1]}, {"x": x[1:2], "y": y[1:2]}])
+    assert torch.equal(merged["x"], x)
+    assert torch.equal(merged["y"], y)