diff --git a/extensions/cpu_adam/cpu_adam_x86.py b/extensions/cpu_adam/cpu_adam_x86.py index a38194167..27b06bb65 100644 --- a/extensions/cpu_adam/cpu_adam_x86.py +++ b/extensions/cpu_adam/cpu_adam_x86.py @@ -21,7 +21,7 @@ class CpuAdamX86Extension(_CudaExtension): # necessary 4 functions def sources_files(self): ret = [ - self.csrc_abs_path("cuda/cpu_adam.cpp"), + self.csrc_abs_path("x86/cpu_adam.cpp"), ] return ret diff --git a/extensions/csrc/cuda/compat.h b/extensions/csrc/cuda/compat.h deleted file mode 100644 index e69de29bb..000000000 diff --git a/extensions/csrc/cuda/layer_norm_cuda_kernel.cu b/extensions/csrc/cuda/layer_norm_kernel.cu similarity index 100% rename from extensions/csrc/cuda/layer_norm_cuda_kernel.cu rename to extensions/csrc/cuda/layer_norm_kernel.cu diff --git a/extensions/csrc/cuda/moe_cuda_kernel.cu b/extensions/csrc/cuda/moe_kernel.cu similarity index 100% rename from extensions/csrc/cuda/moe_cuda_kernel.cu rename to extensions/csrc/cuda/moe_kernel.cu diff --git a/extensions/csrc/cuda/multi_tensor_adam.cu b/extensions/csrc/cuda/multi_tensor_adam_kernel.cu similarity index 100% rename from extensions/csrc/cuda/multi_tensor_adam.cu rename to extensions/csrc/cuda/multi_tensor_adam_kernel.cu diff --git a/extensions/csrc/cuda/multi_tensor_lamb.cu b/extensions/csrc/cuda/multi_tensor_lamb_kernel.cu similarity index 100% rename from extensions/csrc/cuda/multi_tensor_lamb.cu rename to extensions/csrc/cuda/multi_tensor_lamb_kernel.cu diff --git a/extensions/csrc/cuda/colossal_inference_C_frontend.cpp b/extensions/csrc/cuda/pybind/inference.cpp similarity index 100% rename from extensions/csrc/cuda/colossal_inference_C_frontend.cpp rename to extensions/csrc/cuda/pybind/inference.cpp diff --git a/extensions/csrc/cuda/layer_norm_cuda.cpp b/extensions/csrc/cuda/pybind/layer_norm.cpp similarity index 100% rename from extensions/csrc/cuda/layer_norm_cuda.cpp rename to extensions/csrc/cuda/pybind/layer_norm.cpp diff --git a/extensions/csrc/cuda/moe_cuda.cpp b/extensions/csrc/cuda/pybind/moe.cpp similarity index 100% rename from extensions/csrc/cuda/moe_cuda.cpp rename to extensions/csrc/cuda/pybind/moe.cpp diff --git a/extensions/csrc/cuda/colossal_C_frontend.cpp b/extensions/csrc/cuda/pybind/optimizer.cpp similarity index 100% rename from extensions/csrc/cuda/colossal_C_frontend.cpp rename to extensions/csrc/cuda/pybind/optimizer.cpp diff --git a/extensions/csrc/cuda/scaled_masked_softmax.cpp b/extensions/csrc/cuda/pybind/scaled_masked_softmax.cpp similarity index 100% rename from extensions/csrc/cuda/scaled_masked_softmax.cpp rename to extensions/csrc/cuda/pybind/scaled_masked_softmax.cpp diff --git a/extensions/csrc/cuda/scaled_upper_triang_masked_softmax.cpp b/extensions/csrc/cuda/pybind/scaled_upper_triang_masked_softmax.cpp similarity index 100% rename from extensions/csrc/cuda/scaled_upper_triang_masked_softmax.cpp rename to extensions/csrc/cuda/pybind/scaled_upper_triang_masked_softmax.cpp diff --git a/extensions/csrc/cuda/rms_layernorm_kernel.cu b/extensions/csrc/cuda/rms_layernorm_kernel.cu index 99d36575d..0ab40f9f7 100644 --- a/extensions/csrc/cuda/rms_layernorm_kernel.cu +++ b/extensions/csrc/cuda/rms_layernorm_kernel.cu @@ -9,7 +9,7 @@ #include "block_reduce.h" -#include "type_shim.h" +#include "../common/micros.h" template __global__ void rms_layernorm_kernel( diff --git a/extensions/csrc/cuda/scaled_masked_softmax_cuda.cu b/extensions/csrc/cuda/scaled_masked_softmax_kernel.cu similarity index 100% rename from extensions/csrc/cuda/scaled_masked_softmax_cuda.cu rename to extensions/csrc/cuda/scaled_masked_softmax_kernel.cu diff --git a/extensions/csrc/cuda/scaled_upper_triang_masked_softmax_cuda.cu b/extensions/csrc/cuda/scaled_upper_triang_masked_softmax_kernel.cu similarity index 100% rename from extensions/csrc/cuda/scaled_upper_triang_masked_softmax_cuda.cu rename to extensions/csrc/cuda/scaled_upper_triang_masked_softmax_kernel.cu diff --git a/extensions/csrc/cuda/cpu_adam.cpp b/extensions/csrc/x86/cpu_adam.cpp similarity index 100% rename from extensions/csrc/cuda/cpu_adam.cpp rename to extensions/csrc/x86/cpu_adam.cpp diff --git a/extensions/csrc/cuda/cpu_adam.h b/extensions/csrc/x86/cpu_adam.h similarity index 100% rename from extensions/csrc/cuda/cpu_adam.h rename to extensions/csrc/x86/cpu_adam.h diff --git a/extensions/inference/inference_ops_cuda.py b/extensions/inference/inference_ops_cuda.py index 042c598fb..f465fe600 100644 --- a/extensions/inference/inference_ops_cuda.py +++ b/extensions/inference/inference_ops_cuda.py @@ -10,7 +10,7 @@ class InferenceOpsCudaExtension(_CudaExtension): ret = [ self.csrc_abs_path(fname) for fname in [ - "cuda/colossal_inference_C_frontend.cpp", + "cuda/pybind/inference.cpp", "cuda/decode_kv_cache_memcpy_kernel.cu", "cuda/activation_kernel.cu", "cuda/rms_layernorm_kernel.cu", diff --git a/extensions/layernorm/layernorm_cuda.py b/extensions/layernorm/layernorm_cuda.py index db5f2fce1..36cf73590 100644 --- a/extensions/layernorm/layernorm_cuda.py +++ b/extensions/layernorm/layernorm_cuda.py @@ -7,7 +7,7 @@ class LayerNormCudaExtension(_CudaExtension): super().__init__(name="layernorm_cuda") def sources_files(self): - ret = [self.csrc_abs_path(fname) for fname in ["cuda/layer_norm_cuda.cpp", "cuda/layer_norm_cuda_kernel.cu"]] + ret = [self.csrc_abs_path(fname) for fname in ["cuda/pybind/layer_norm.cpp", "cuda/layer_norm_kernel.cu"]] return ret def include_dirs(self): diff --git a/extensions/moe/moe_cuda.py b/extensions/moe/moe_cuda.py index 52883e97f..722daae33 100644 --- a/extensions/moe/moe_cuda.py +++ b/extensions/moe/moe_cuda.py @@ -11,7 +11,7 @@ class MoeCudaExtension(_CudaExtension): return ret def sources_files(self): - ret = [self.csrc_abs_path(fname) for fname in ["cuda/moe_cuda.cpp", "cuda/moe_cuda_kernel.cu"]] + ret = [self.csrc_abs_path(fname) for fname in ["cuda/moe.cpp", "cuda/moe_kernel.cu"]] return ret def cxx_flags(self): diff --git a/extensions/optimizer/fused_optimizer_cuda.py b/extensions/optimizer/fused_optimizer_cuda.py index e065cf34a..41c6260aa 100644 --- a/extensions/optimizer/fused_optimizer_cuda.py +++ b/extensions/optimizer/fused_optimizer_cuda.py @@ -10,12 +10,12 @@ class FusedOptimizerCudaExtension(_CudaExtension): ret = [ self.csrc_abs_path(fname) for fname in [ - "cuda/colossal_C_frontend.cpp", + "cuda/pybind/optimizer.cpp", "cuda/multi_tensor_sgd_kernel.cu", "cuda/multi_tensor_scale_kernel.cu", - "cuda/multi_tensor_adam.cu", + "cuda/multi_tensor_adam_kernel.cu", "cuda/multi_tensor_l2norm_kernel.cu", - "cuda/multi_tensor_lamb.cu", + "cuda/multi_tensor_lamb_kernel.cu", ] ] return ret diff --git a/extensions/softmax/scaled_masked_softmax_cuda.py b/extensions/softmax/scaled_masked_softmax_cuda.py index 5b4208dba..797638c3b 100644 --- a/extensions/softmax/scaled_masked_softmax_cuda.py +++ b/extensions/softmax/scaled_masked_softmax_cuda.py @@ -9,7 +9,7 @@ class ScaledMaskedSoftmaxCudaExtension(_CudaExtension): def sources_files(self): ret = [ self.csrc_abs_path(fname) - for fname in ["cuda/scaled_masked_softmax.cpp", "cuda/scaled_masked_softmax_cuda.cu"] + for fname in ["cuda/pybind/scaled_masked_softmax.cpp", "cuda/scaled_masked_softmax_kernel.cu"] ] return ret diff --git a/extensions/softmax/scaled_upper_triangle_masked_softmax_cuda.py b/extensions/softmax/scaled_upper_triangle_masked_softmax_cuda.py index d4f27a921..d48d542ad 100644 --- a/extensions/softmax/scaled_upper_triangle_masked_softmax_cuda.py +++ b/extensions/softmax/scaled_upper_triangle_masked_softmax_cuda.py @@ -13,8 +13,8 @@ class ScaledUpperTriangleMaskedSoftmaxCudaExtension(_CudaExtension): ret = [ self.csrc_abs_path(fname) for fname in [ - "cuda/scaled_upper_triang_masked_softmax.cpp", - "cuda/scaled_upper_triang_masked_softmax_cuda.cu", + "cuda/pybind/scaled_upper_triang_masked_softmax.cpp", + "cuda/scaled_upper_triang_masked_softmax_kernel.cu", ] ] return ret