mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-03 18:19:58 +00:00
[Inference/Refactor] Refactor compilation mechanism and unified multi hw (#5613)
* refactor compilation mechanism and unified multi hw * fix file path bug * add init.py to make pybind a module to avoid relative path error caused by softlink * delete duplicated micros * fix micros bug in gcc
This commit is contained in:
88
extensions/csrc/funcs/unary_functor.h
Normal file
88
extensions/csrc/funcs/unary_functor.h
Normal file
@@ -0,0 +1,88 @@
|
||||
#pragma once
|
||||
|
||||
#if defined(COLOSSAL_WITH_CUDA)
|
||||
#include <cuda.h>
|
||||
#include <cuda_bf16.h>
|
||||
#include <cuda_fp16.h>
|
||||
#include <cuda_runtime.h>
|
||||
#endif
|
||||
|
||||
#include <functional>
|
||||
|
||||
#include "common/data_type.h"
|
||||
#include "common/micros.h"
|
||||
|
||||
namespace colossalAI {
|
||||
namespace funcs {
|
||||
|
||||
template <typename T>
|
||||
inline __device__ void zero(T& dst) {
|
||||
constexpr int WORDS = sizeof(T) / 4;
|
||||
union {
|
||||
T raw;
|
||||
uint32_t words[WORDS];
|
||||
} tmp;
|
||||
|
||||
#pragma unroll
|
||||
for (int ii = 0; ii < WORDS; ii++) {
|
||||
tmp.words[ii] = 0u;
|
||||
}
|
||||
dst = tmp.raw;
|
||||
}
|
||||
|
||||
// Note(LiuYang): As a retrieved table to check which operation is supported
|
||||
// already
|
||||
enum class UnaryOpType { kLog2Ceil = 0, kAbs, kSum };
|
||||
|
||||
// Note(LiuYang): Implementation of common and simple unary operators should be
|
||||
// placed here, otherwise, they should be placed in a new file under functors
|
||||
// dir.
|
||||
template <typename From, typename To, UnaryOpType op_type>
|
||||
struct UnaryOpFunctor;
|
||||
|
||||
#define COLOSSAL_UNARY_FUNCTOR_SPECIALIZATION( \
|
||||
FROM, TO, UNARY_OP_TYPE, FUNCTION_MODIFIER, STMTS, ARGS...) \
|
||||
template <ARGS> \
|
||||
struct UnaryOpFunctor<FROM, TO, UNARY_OP_TYPE> \
|
||||
: public std::unary_function<FROM, TO> { \
|
||||
FUNCTION_MODIFIER TO operator()(FROM val) STMTS \
|
||||
};
|
||||
|
||||
COLOSSAL_UNARY_FUNCTOR_SPECIALIZATION(
|
||||
T, T, UnaryOpType::kAbs, HOSTDEVICE, { return std::abs(val); }, typename T)
|
||||
|
||||
COLOSSAL_UNARY_FUNCTOR_SPECIALIZATION(int, int, UnaryOpType::kLog2Ceil,
|
||||
HOSTDEVICE, {
|
||||
int log2_value = 0;
|
||||
while ((1 << log2_value) < val)
|
||||
++log2_value;
|
||||
return log2_value;
|
||||
})
|
||||
|
||||
#if defined(COLOSSAL_WITH_CUDA)
|
||||
|
||||
COLOSSAL_UNARY_FUNCTOR_SPECIALIZATION(float2, float, UnaryOpType::kSum, DEVICE,
|
||||
{ return val.x + val.y; })
|
||||
|
||||
COLOSSAL_UNARY_FUNCTOR_SPECIALIZATION(float4, float, UnaryOpType::kSum, DEVICE,
|
||||
{ return val.x + val.y + val.z + val.w; })
|
||||
|
||||
COLOSSAL_UNARY_FUNCTOR_SPECIALIZATION(dtype::float4_, float, UnaryOpType::kSum,
|
||||
DEVICE, {
|
||||
return val.x.x + val.x.y + val.y.x +
|
||||
val.y.y;
|
||||
})
|
||||
|
||||
COLOSSAL_UNARY_FUNCTOR_SPECIALIZATION(dtype::float8_, float, UnaryOpType::kSum,
|
||||
DEVICE, {
|
||||
return val.x.x + val.x.y + val.y.x +
|
||||
val.y.y + val.z.x + val.z.y +
|
||||
val.w.x + val.w.y;
|
||||
})
|
||||
|
||||
#endif /* defined(COLOSSAL_WITH_CUDA) */
|
||||
|
||||
#undef COLOSSAL_UARY_FUNCTOR_SPECIALIZATION
|
||||
|
||||
} // namespace funcs
|
||||
} // namespace colossalAI
|
Reference in New Issue
Block a user