[Inference/Refactor] Refactor compilation mechanism and unified multi hw (#5613)

* refactor compilation mechanism and unified multi hw

* fix file path bug

* add init.py to make pybind a module to avoid relative path error caused by softlink

* delete duplicated micros

* fix micros bug in gcc
This commit is contained in:
傅剑寒
2024-04-24 14:17:54 +08:00
committed by GitHub
parent 04863a9b14
commit 279300dc5f
64 changed files with 345 additions and 310 deletions

View File

@@ -0,0 +1,60 @@
#pragma once
#if defined(COLOSSAL_WITH_CUDA)
#include <cuda_bf16.h>
#include <cuda_fp16.h>
#endif
namespace colossalAI {
namespace dtype {
struct bfloat164 {
#ifdef COLOSSAL_WITH_CUDA
__nv_bfloat162 x;
__nv_bfloat162 y;
#endif
};
struct bfloat168 {
#ifdef COLOSSAL_WITH_CUDA
__nv_bfloat162 x;
__nv_bfloat162 y;
__nv_bfloat162 z;
__nv_bfloat162 w;
#endif
};
struct half4 {
#ifdef COLOSSAL_WITH_CUDA
half2 x;
half2 y;
#endif
};
struct half8 {
#ifdef COLOSSAL_WITH_CUDA
half2 x;
half2 y;
half2 z;
half2 w;
#endif
};
struct float4_ {
#ifdef COLOSSAL_WITH_CUDA
float2 x;
float2 y;
#endif
};
struct float8_ {
#ifdef COLOSSAL_WITH_CUDA
float2 x;
float2 y;
float2 z;
float2 w;
#endif
};
} // namespace dtype
} // namespace colossalAI

View File

@@ -222,3 +222,13 @@
AT_ERROR(#NAME, "not implemented for '", toString(GTYPE), toString(PTYPE), \
"'"); \
}
#if defined(COLOSSAL_WITH_CUDA)
#define HOST __host__
#define DEVICE __device__
#define HOSTDEVICE __host__ __device__
#else
#define HOST
#define DEVICE
#define HOSTDEVICE
#endif

View File

@@ -0,0 +1,73 @@
#pragma once
#if defined(COLOSSAL_WITH_CUDA)
#include <cuda_bf16.h>
#include <cuda_fp16.h>
#endif
#include <stdint.h>
#include "common/data_type.h"
namespace colossalAI {
namespace common {
template <typename T, int VecSize>
struct VecTypeTrait {};
template <typename T>
struct FloatVecTypeTrait {};
#define VEC_TYPE_TRAITS_SPECIALIZATION(T, VEC_SIZE, VECT, ARGS...) \
template <ARGS> \
struct VecTypeTrait<T, VEC_SIZE> { \
using Type = VECT; \
};
VEC_TYPE_TRAITS_SPECIALIZATION(T, 1, T, typename T)
#if defined(COLOSSAL_WITH_CUDA)
VEC_TYPE_TRAITS_SPECIALIZATION(at::BFloat16, 1, __nv_bfloat16)
VEC_TYPE_TRAITS_SPECIALIZATION(at::BFloat16, 2, __nv_bfloat162)
VEC_TYPE_TRAITS_SPECIALIZATION(at::BFloat16, 4, float2)
VEC_TYPE_TRAITS_SPECIALIZATION(at::BFloat16, 8, float4)
VEC_TYPE_TRAITS_SPECIALIZATION(at::Half, 1, half)
VEC_TYPE_TRAITS_SPECIALIZATION(at::Half, 2, half2)
VEC_TYPE_TRAITS_SPECIALIZATION(at::Half, 4, float2)
VEC_TYPE_TRAITS_SPECIALIZATION(at::Half, 8, float4)
VEC_TYPE_TRAITS_SPECIALIZATION(float, 2, float2)
VEC_TYPE_TRAITS_SPECIALIZATION(float, 4, float4)
VEC_TYPE_TRAITS_SPECIALIZATION(float, 8, dtype::float8_)
VEC_TYPE_TRAITS_SPECIALIZATION(uint8_t, 2, half)
VEC_TYPE_TRAITS_SPECIALIZATION(uint8_t, 4, half2)
VEC_TYPE_TRAITS_SPECIALIZATION(uint8_t, 8, float2)
VEC_TYPE_TRAITS_SPECIALIZATION(__nv_bfloat16, 2, __nv_bfloat162);
VEC_TYPE_TRAITS_SPECIALIZATION(__nv_bfloat16, 4, dtype::bfloat164);
VEC_TYPE_TRAITS_SPECIALIZATION(__nv_bfloat16, 8, dtype::bfloat168);
VEC_TYPE_TRAITS_SPECIALIZATION(half, 2, half2);
VEC_TYPE_TRAITS_SPECIALIZATION(half, 4, dtype::half4);
VEC_TYPE_TRAITS_SPECIALIZATION(half, 8, dtype::half8);
#endif /* defined(COLOSSAL_WITH_CUDA) */
#undef VEC_TYPE_TRAITS_SPECIALIZATION
#define FLOATVEC_TYPE_TRAITS_SPECIALIZATION(T, FLOATT, ARGS...) \
template <ARGS> \
struct FloatVecTypeTrait<T> { \
using Type = FLOATT; \
};
#if defined(COLOSSAL_WITH_CUDA)
FLOATVEC_TYPE_TRAITS_SPECIALIZATION(float2, float2)
FLOATVEC_TYPE_TRAITS_SPECIALIZATION(float4, float4)
FLOATVEC_TYPE_TRAITS_SPECIALIZATION(__nv_bfloat162, float2);
FLOATVEC_TYPE_TRAITS_SPECIALIZATION(dtype::bfloat164, dtype::float4_);
FLOATVEC_TYPE_TRAITS_SPECIALIZATION(dtype::bfloat168, dtype::float8_);
FLOATVEC_TYPE_TRAITS_SPECIALIZATION(half2, float2);
FLOATVEC_TYPE_TRAITS_SPECIALIZATION(dtype::half4, dtype::float4_);
FLOATVEC_TYPE_TRAITS_SPECIALIZATION(dtype::half8, dtype::float8_);
#endif /* COLOSSAL_WITH_CUDA */
#undef FLOATVEC_TYPE_TRAITS_SPECIALIZATION
} // namespace common
} // namespace colossalAI