mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2026-04-26 09:42:27 +00:00
* Add gradient accumulation, fix lr scheduler
* fix FP16 optimizer and adapted torch amp with tensor parallel (#18)
* fixed bugs in compatibility between torch amp and tensor parallel and performed some minor fixes
* fixed trainer
* Revert "fixed trainer"
This reverts commit 2e0b0b7699.
* improved consistency between trainer, engine and schedule (#23)
Co-authored-by: 1SAA <c2h214748@gmail.com>
Co-authored-by: 1SAA <c2h214748@gmail.com>
Co-authored-by: ver217 <lhx0217@gmail.com>
28 lines
795 B
Python
28 lines
795 B
Python
#!/usr/bin/env python
|
|
# -*- encoding: utf-8 -*-
|
|
|
|
from typing import Union, List
|
|
|
|
from torch import Tensor
|
|
|
|
|
|
def convert_to_fp16(data: Union[Tensor, List[Tensor]]):
|
|
if isinstance(data, Tensor):
|
|
ret = data.half()
|
|
elif isinstance(data, (list, tuple)):
|
|
ret = [val.half() for val in data]
|
|
else:
|
|
raise TypeError(f"Expected argument 'data' to be a Tensor or a list/tuple of Tensor, but got {type(data)}")
|
|
return ret
|
|
|
|
|
|
def convert_to_fp32(data: Union[Tensor, List[Tensor]]):
|
|
if isinstance(data, Tensor):
|
|
ret = data.float()
|
|
elif isinstance(data, (list, tuple)):
|
|
ret = [val.float() for val in data]
|
|
else:
|
|
raise TypeError(f"Expected argument 'data' to be a Tensor or a list/tuple of Tensor, but got {type(data)}")
|
|
return ret
|
|
|