[zero] update zero context init with the updated test utils (#327)

This commit is contained in:
Jiarui Fang
2022-03-08 14:45:01 +08:00
committed by Frank Lee
parent 6268446b81
commit 11bddb6e55
10 changed files with 96 additions and 49 deletions

View File

@@ -1,6 +1,7 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
from colossalai.nn import CheckpointModule
from .utils import DummyDataGenerator
from .registry import non_distributed_component_funcs
@@ -15,10 +16,10 @@ class SubNet(nn.Module):
return F.linear(x, weight, self.bias)
class NestedNet(nn.Module):
class NestedNet(CheckpointModule):
def __init__(self) -> None:
super().__init__()
def __init__(self, checkpoint=False) -> None:
super().__init__(checkpoint)
self.fc1 = nn.Linear(5, 5)
self.sub_fc = SubNet(5)
self.fc2 = nn.Linear(5, 2)
@@ -41,9 +42,15 @@ class DummyDataLoader(DummyDataGenerator):
@non_distributed_component_funcs.register(name='nested_model')
def get_training_components():
model = NestedNet()
def model_builder(checkpoint):
return NestedNet(checkpoint)
trainloader = DummyDataLoader()
testloader = DummyDataLoader()
optim = torch.optim.Adam(model.parameters(), lr=0.001)
def optim_builder(model):
return torch.optim.Adam(model.parameters(), lr=0.001)
criterion = torch.nn.CrossEntropyLoss()
return model, trainloader, testloader, optim, criterion
return model_builder, trainloader, testloader, optim_builder, criterion

View File

@@ -36,9 +36,15 @@ class DummyDataLoader(DummyDataGenerator):
@non_distributed_component_funcs.register(name='repeated_computed_layers')
def get_training_components():
model = NetWithRepeatedlyComputedLayers(checkpoint=True)
def model_builder(checkpoint=True):
return NetWithRepeatedlyComputedLayers(checkpoint)
trainloader = DummyDataLoader()
testloader = DummyDataLoader()
optim = torch.optim.Adam(model.parameters(), lr=0.001)
def optim_builder(model):
return torch.optim.Adam(model.parameters(), lr=0.001)
criterion = torch.nn.CrossEntropyLoss()
return model, trainloader, testloader, optim, criterion
return model_builder, trainloader, testloader, optim_builder, criterion

View File

@@ -22,9 +22,15 @@ def get_cifar10_dataloader(train):
@non_distributed_component_funcs.register(name='resnet18')
def get_resnet_training_components():
model = resnet18(num_classes=10)
def model_builder(checkpoint=False):
return resnet18(num_classes=10)
trainloader = get_cifar10_dataloader(train=True)
testloader = get_cifar10_dataloader(train=False)
optim = torch.optim.Adam(model.parameters(), lr=0.001)
def optim_builder(model):
return torch.optim.Adam(model.parameters(), lr=0.001)
criterion = torch.nn.CrossEntropyLoss()
return model, trainloader, testloader, optim, criterion
return model_builder, trainloader, testloader, optim_builder, criterion