Layer integration (#83)

* integrated parallel layers for ease of building models

* integrated 2.5d layers

* cleaned codes and unit tests

* added log metric by step hook; updated imagenet benchmark; fixed some bugs

* reworked initialization; cleaned codes

Co-authored-by: BoxiangW <45734921+BoxiangW@users.noreply.github.com>
This commit is contained in:
アマデウス
2021-12-27 15:04:32 +08:00
committed by GitHub
parent 5c3843dc98
commit 0fedef4f3c
118 changed files with 4941 additions and 8116 deletions

View File

@@ -13,9 +13,7 @@ from tqdm import tqdm
def main():
colossalai.launch_from_torch(config='./config.py',
host='localhost',
port=29500)
colossalai.launch_from_torch(config='./config.py')
logger = get_dist_logger()

View File

@@ -1,22 +1,22 @@
import os
from pathlib import Path
from colossalai.logging import get_dist_logger
import colossalai
import torch
import os
from colossalai.core import global_context as gpc
from colossalai.utils import get_dataloader, MultiTimer
from colossalai.logging import get_dist_logger
from colossalai.nn import CosineAnnealingLR
from colossalai.nn.metric import Accuracy
from colossalai.trainer import Trainer, hooks
from colossalai.utils import MultiTimer, get_dataloader
from torchvision import transforms
from colossalai.trainer import hooks, Trainer
from torchvision.datasets import CIFAR10
from torchvision.models import resnet34
from colossalai.nn import CosineAnnealingLR
from tqdm import tqdm
def main():
colossalai.launch_from_torch(config='./config.py',
host='localhost',
port=29500)
colossalai.launch_from_torch(config='./config.py')
logger = get_dist_logger()
@@ -93,7 +93,7 @@ def main():
hook_list = [
hooks.LossHook(),
hooks.LRSchedulerHook(lr_scheduler=lr_scheduler, by_epoch=True),
hooks.AccuracyHook(),
hooks.AccuracyHook(accuracy_func=Accuracy()),
hooks.LogMetricByEpochHook(logger),
hooks.LogMemoryByEpochHook(logger),
hooks.LogTimingByEpochHook(timer, logger),

View File

@@ -19,5 +19,5 @@ dataset = dict(
)
gradient_accumulation=2
gradient_clipping=1.0
clip_grad_norm=1.0

View File

@@ -20,4 +20,4 @@ dataset = dict(
)
gradient_accumulation=1
gradient_clipping=1.0
clip_grad_norm=1.0

View File

@@ -1,3 +1,4 @@
from colossalai.nn.metric import Accuracy
import torch
import colossalai
from colossalai.core import global_context as gpc
@@ -40,9 +41,7 @@ def build_dataset_test():
)
def main():
colossalai.launch_from_torch(config='./le_config.py',
host='localhost',
port=29500)
colossalai.launch_from_torch(config='./le_config.py')
# get logger
logger = get_dist_logger()
@@ -81,7 +80,7 @@ def main():
# build hooks
hook_list = [
hooks.LossHook(),
hooks.AccuracyHook(),
hooks.AccuracyHook(accuracy_func=Accuracy()),
hooks.LogMetricByEpochHook(logger),
hooks.LRSchedulerHook(lr_scheduler, by_epoch=True),
TotalBatchsizeHook(),

View File

@@ -41,9 +41,7 @@ def build_dataset_test():
)
def main():
colossalai.launch_from_torch(config='./config.py',
host='localhost',
port=29500)
colossalai.launch_from_torch(config='./config.py')
# get logger
logger = get_dist_logger()

View File

@@ -39,11 +39,7 @@ In your training script:
# initialize distributed setting
parser = colossalai.get_default_parser()
args = parser.parse_args()
colossalai.launch_from_torch(config=args.config,
host=args.host,
port=args.port,
backend=args.backend
)
colossalai.launch_from_torch(config=args.config)
```
In your terminal

View File

@@ -11,7 +11,7 @@ fp16 = dict(
)
gradient_accumulation = 16
gradient_clipping = 1.0
clip_grad_norm = 1.0
dali = dict(
# root='./dataset/ILSVRC2012_1k',

View File

@@ -2,6 +2,7 @@ import glob
from math import log
import os
import colossalai
from colossalai.nn.metric import Accuracy
import torch
from colossalai.context import ParallelMode
@@ -54,11 +55,15 @@ def main():
# initialize distributed setting
parser = colossalai.get_default_parser()
args = parser.parse_args()
# launch from slurm batch job
colossalai.launch_from_slurm(config=args.config,
host=args.host,
port=args.port,
backend=args.backend
)
# launch from torch
# colossalai.launch_from_torch(config=args.config)
# get logger
logger = get_dist_logger()
@@ -91,7 +96,7 @@ def main():
# build hooks
hook_list = [
hooks.LossHook(),
hooks.AccuracyHook(),
hooks.AccuracyHook(accuracy_func=Accuracy()),
hooks.LogMetricByEpochHook(logger),
hooks.LRSchedulerHook(lr_scheduler, by_epoch=True),
TotalBatchsizeHook(),