Files
ColossalAI/configs/resnet/resnet50.py
Frank Lee 3defa32aee Support TP-compatible Torch AMP and Update trainer API (#27)
* Add gradient accumulation, fix lr scheduler

* fix FP16 optimizer and adapted torch amp with tensor parallel (#18)

* fixed bugs in compatibility between torch amp and tensor parallel and performed some minor fixes

* fixed trainer

* Revert "fixed trainer"

This reverts commit 2e0b0b7699.

* improved consistency between trainer, engine and schedule (#23)

Co-authored-by: 1SAA <c2h214748@gmail.com>

Co-authored-by: 1SAA <c2h214748@gmail.com>
Co-authored-by: ver217 <lhx0217@gmail.com>
2021-11-18 19:45:06 +08:00

77 lines
1.5 KiB
Python

#!/usr/bin/env python
# -*- encoding: utf-8 -*-
import os
IMG_SIZE = 224
BATCH_SIZE = 256
NUM_EPOCHS = 100
model = dict(
type='VanillaResNet',
block_type='ResNetBottleneck',
layers=[3, 4, 6, 3],
num_cls=10
)
train_data = dict(
dataset=dict(
type='CIFAR10Dataset',
root=os.environ['DATA'],
transform_pipeline=[
dict(type='Resize', size=IMG_SIZE),
dict(type='RandomCrop', size=IMG_SIZE, padding=4),
dict(type='RandomHorizontalFlip'),
dict(type='ToTensor'),
dict(type='Normalize',
mean=[0.4914, 0.4822, 0.4465],
std=[0.2023, 0.1994, 0.2010]),
]
),
dataloader=dict(
batch_size=BATCH_SIZE,
pin_memory=True,
shuffle=True,
)
)
test_data = dict(
dataset=dict(
type='CIFAR10Dataset',
root=os.environ['DATA'],
train=False,
transform_pipeline=[
dict(type='Resize', size=IMG_SIZE),
dict(type='ToTensor'),
dict(type='Normalize',
mean=[0.4914, 0.4822, 0.4465],
std=[0.2023, 0.1994, 0.2010]
),
]
),
dataloader=dict(
batch_size=BATCH_SIZE,
pin_memory=True,
)
)
parallelization = dict(
pipeline=1,
tensor=dict(size=1, mode=None),
)
optimizer = dict(
type='Adam',
lr=0.01
)
loss = dict(
type='CrossEntropyLoss'
)
from colossalai.engine import AMP_TYPE
fp16 = dict(
mode=AMP_TYPE.APEX,
opt_level='O2',
)