[workflow] refactored the example check workflow (#2411)

* [workflow] refactored the example check workflow

* polish code

* polish code

* polish code

* polish code

* polish code

* polish code

* polish code

* polish code

* polish code

* polish code

* polish code
This commit is contained in:
Frank Lee
2023-01-10 11:26:19 +08:00
committed by GitHub
parent 8de8de9fa3
commit 8327932d2c
10 changed files with 113 additions and 92 deletions

View File

@@ -6,8 +6,8 @@ from colossalai.amp import AMP_TYPE
BATCH_SIZE = 256
LEARNING_RATE = 3e-3
WEIGHT_DECAY = 0.3
NUM_EPOCHS = 10
WARMUP_EPOCHS = 3
NUM_EPOCHS = 2
WARMUP_EPOCHS = 1
# model config
IMG_SIZE = 224

View File

@@ -1,2 +1,3 @@
colossalai >= 0.1.12
torch >= 1.8.1
titans

View File

@@ -0,0 +1,5 @@
#!/bin/bash
set -euxo pipefail
pip install -r requirements.txt
torchrun --standalone --nproc_per_node 4 train.py --config config.py -s

View File

@@ -98,9 +98,9 @@ def main():
root = os.environ.get('DATA', '../data')
if args.synthetic:
# if we use synthetic dataset
# we train for 30 steps and eval for 10 steps per epoch
train_dataloader = DummyDataloader(length=30, batch_size=gpc.config.BATCH_SIZE)
test_dataloader = DummyDataloader(length=10, batch_size=gpc.config.BATCH_SIZE)
# we train for 10 steps and eval for 5 steps per epoch
train_dataloader = DummyDataloader(length=10, batch_size=gpc.config.BATCH_SIZE)
test_dataloader = DummyDataloader(length=5, batch_size=gpc.config.BATCH_SIZE)
else:
train_dataloader, test_dataloader = build_cifar(gpc.config.BATCH_SIZE, root, pad_if_needed=True)