mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-05 02:51:59 +00:00
[zero] cpu adam kernel (#288)
* Added CPU Adam * finished the cpu adam * updated the license * delete useless parameters, removed resnet * modified the method off cpu adam unittest * deleted some useless codes * removed useless codes Co-authored-by: ver217 <lhx0217@gmail.com> Co-authored-by: Frank Lee <somerlee.9@gmail.com> Co-authored-by: jiaruifang <fangjiarui123@gmail.com>
This commit is contained in:
10
setup.py
10
setup.py
@@ -124,12 +124,12 @@ if build_cuda_ext:
|
||||
# https://github.com/pytorch/pytorch/commit/eb7b39e02f7d75c26d8a795ea8c7fd911334da7e#diff-4632522f237f1e4e728cb824300403ac
|
||||
version_dependent_macros = ['-DVERSION_GE_1_1', '-DVERSION_GE_1_3', '-DVERSION_GE_1_5']
|
||||
|
||||
def cuda_ext_helper(name, sources, extra_cuda_flags):
|
||||
def cuda_ext_helper(name, sources, extra_cuda_flags, extra_cxx_flags=[]):
|
||||
return CUDAExtension(name=name,
|
||||
sources=[os.path.join('colossalai/kernel/cuda_native/csrc', path) for path in sources],
|
||||
include_dirs=[os.path.join(
|
||||
this_dir, 'colossalai/kernel/cuda_native/csrc/kernels/include')],
|
||||
extra_compile_args={'cxx': ['-O3'] + version_dependent_macros,
|
||||
extra_compile_args={'cxx': ['-O3'] + version_dependent_macros + extra_cxx_flags,
|
||||
'nvcc': append_nvcc_threads(['-O3',
|
||||
'--use_fast_math'] + version_dependent_macros + extra_cuda_flags)})
|
||||
|
||||
@@ -188,6 +188,12 @@ if build_cuda_ext:
|
||||
'kernels/general_kernels.cu',
|
||||
'kernels/cuda_util.cu'],
|
||||
extra_cuda_flags + cc_flag))
|
||||
|
||||
extra_cxx_flags = ['-std=c++14', '-lcudart', '-lcublas', '-g', '-Wno-reorder', '-fopenmp', '-march=native']
|
||||
ext_modules.append(cuda_ext_helper('cpu_adam',
|
||||
['cpu_adam.cpp'],
|
||||
extra_cuda_flags,
|
||||
extra_cxx_flags))
|
||||
|
||||
setup(
|
||||
name='colossalai',
|
||||
|
Reference in New Issue
Block a user