[legacy] clean up legacy code (#4743)

* [legacy] remove outdated codes of pipeline (#4692) * [legacy] remove cli of benchmark and update optim (#4690) * [legacy] remove cli of benchmark and update optim * [doc] fix cli doc test * [legacy] fix engine clip grad norm * [legacy] remove outdated colo tensor (#4694) * [legacy] remove outdated colo tensor * [test] fix test import * [legacy] move outdated zero to legacy (#4696) * [legacy] clean up utils (#4700) * [legacy] clean up utils * [example] update examples * [legacy] clean up amp * [legacy] fix amp module * [legacy] clean up gpc (#4742) * [legacy] clean up context * [legacy] clean core, constants and global vars * [legacy] refactor initialize * [example] fix examples ci * [example] fix examples ci * [legacy] fix tests * [example] fix gpt example * [example] fix examples ci * [devops] fix ci installation * [example] fix examples ci
2025-09-03 18:19:58 +00:00 · 2023-09-18 16:31:06 +08:00
parent 32e7f99416
commit b5f9e37c70
342 changed files with 2919 additions and 4182 deletions
--- a/tests/test_legacy/test_utils/test_activation_checkpointing.py
+++ b/tests/test_legacy/test_utils/test_activation_checkpointing.py
@@ -0,0 +1,124 @@
+#!/usr/bin/env python
+# -*- encoding: utf-8 -*-
+
+import pytest
+import torch
+import torch.nn.functional as F
+
+from colossalai.legacy.context.parallel_mode import ParallelMode
+from colossalai.legacy.context.random import add_seed, reset_seeds, seed, set_mode
+from colossalai.legacy.utils.activation_checkpoint import checkpoint
+from colossalai.testing import clear_cache_before_run, parameterize
+
+
+def forward(x, weight):
+    out = torch.matmul(x, weight)
+    with seed(ParallelMode.DATA):
+        out_ = F.dropout(out, p=0.4, training=True)
+    return out_
+
+
+def forward_inplace_ckpt(x, weight, cpu_offload=False):
+    out = torch.matmul(x, weight)
+    bn = torch.nn.BatchNorm1d(4, affine=False)
+    bn = bn.to(device="cuda")
+    out = bn(out)
+
+    def ckpt0(x):
+        return F.relu(x, inplace=True)
+
+    out = checkpoint(ckpt0, cpu_offload, out, use_reentrant=False)
+    return out
+
+
+def forward_inplace(x, weight):
+    out = torch.matmul(x, weight)
+    bn = torch.nn.BatchNorm1d(4, affine=False)
+    bn = bn.to(device="cuda")
+    out = bn(out)
+    out = F.relu(out, inplace=True)
+    return out
+
+
+@clear_cache_before_run()
+@parameterize("use_reentrant", [True, False])
+@parameterize("cpu_offload", [True, False])
+def test_activation_checkpointing(cpu_offload, use_reentrant):
+
+    # as seed manager is singleton
+    # if we don't reset seeds here,
+    # other tests might affect this test
+    reset_seeds()
+
+    # We put initialization here to avoid change cuda rng state below
+    inputs = torch.rand(2, 2, requires_grad=True, device='cuda')
+    weight = torch.rand(2, 4, requires_grad=True, device='cuda')
+
+    # Get a copy of input tensors
+    inputs_ = torch.empty(2, 2, requires_grad=True, device='cuda')
+    inputs_.data.copy_(inputs.data)
+    weight_ = torch.empty(2, 4, requires_grad=True, device='cuda')
+    weight_.data.copy_(weight.data)
+
+    add_seed(ParallelMode.GLOBAL, 1024)
+    add_seed(ParallelMode.DATA, 1026)
+    set_mode(ParallelMode.GLOBAL)
+    global_cuda_rng_state = torch.cuda.get_rng_state()
+    set_mode(ParallelMode.DATA)
+    data_parallel_cuda_rng_state = torch.cuda.get_rng_state()
+    set_mode(ParallelMode.GLOBAL)
+
+    out = forward(inputs, weight)
+    loss = out.sum()
+    loss.backward()
+
+    # Recover cuda rng states
+    set_mode(ParallelMode.GLOBAL)
+    torch.cuda.set_rng_state(global_cuda_rng_state)
+    set_mode(ParallelMode.DATA)
+    torch.cuda.set_rng_state(data_parallel_cuda_rng_state)
+    set_mode(ParallelMode.GLOBAL)
+
+    out = checkpoint(forward, cpu_offload, inputs_, weight_, use_reentrant=use_reentrant)
+    loss = out.sum()
+    loss.backward()
+
+    assert torch.all(inputs.grad == inputs_.grad), 'Gradient of the input does not match'
+    torch.cuda.empty_cache()
+
+    # Extra test for use_reentrant=False
+    if use_reentrant == False:
+        # Recover cuda rng states
+        set_mode(ParallelMode.GLOBAL)
+        torch.cuda.set_rng_state(global_cuda_rng_state)
+        set_mode(ParallelMode.DATA)
+        torch.cuda.set_rng_state(data_parallel_cuda_rng_state)
+        set_mode(ParallelMode.GLOBAL)
+
+        out = forward_inplace(inputs, weight)
+        loss = out.sum()
+        loss.backward()
+
+        # Recover cuda rng states
+        set_mode(ParallelMode.GLOBAL)
+        torch.cuda.set_rng_state(global_cuda_rng_state)
+        set_mode(ParallelMode.DATA)
+        torch.cuda.set_rng_state(data_parallel_cuda_rng_state)
+        set_mode(ParallelMode.GLOBAL)
+
+        out = forward_inplace_ckpt(inputs_, weight_, cpu_offload=cpu_offload)
+        loss = out.sum()
+        loss.backward()
+
+        assert torch.all(inputs.grad == inputs_.grad), 'Gradient of the input does not match'
+        torch.cuda.empty_cache()
+
+    # as seed manager is singleton
+    # if we don't reset seeds here,
+    # other tests will fail if running together with this test
+    # as other tests can't overwrite the seed set by this test
+    reset_seeds()
+
+
+if __name__ == "__main__":
+    test_activation_checkpointing(False, False)