mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-08-31 16:40:41 +00:00
[Fix] Fix Inference Example, Tests, and Requirements (#5688)
* clean requirements * modify example inference struct * add test ci scripts * mark test_infer as submodule * rm deprecated cls & deps * import of HAS_FLASH_ATTN * prune inference tests to be run * prune triton kernel tests * increment pytest timeout mins * revert import path in openmoe
This commit is contained in:
0
examples/inference/benchmark_ops/test_ci.sh
Normal file
0
examples/inference/benchmark_ops/test_ci.sh
Normal file
@@ -182,7 +182,7 @@ def benchmark_inference(args):
|
||||
|
||||
|
||||
def inference(rank, world_size, port, args):
|
||||
colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
|
||||
colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
|
||||
benchmark_inference(args)
|
||||
|
||||
|
@@ -17,7 +17,7 @@ def infer(args):
|
||||
# ==============================
|
||||
# Launch colossalai, setup distributed environment
|
||||
# ==============================
|
||||
colossalai.launch_from_torch(config={})
|
||||
colossalai.launch_from_torch()
|
||||
coordinator = DistCoordinator()
|
||||
|
||||
# ==============================
|
||||
@@ -59,7 +59,7 @@ def infer(args):
|
||||
coordinator.print_on_master(out[0])
|
||||
|
||||
|
||||
# colossalai run --nproc_per_node 1 llama_gen.py -m MODEL_PATH
|
||||
# colossalai run --nproc_per_node 1 llama_generation.py -m MODEL_PATH
|
||||
if __name__ == "__main__":
|
||||
# ==============================
|
||||
# Parse Arguments
|
4
examples/inference/llama/test_ci.sh
Normal file
4
examples/inference/llama/test_ci.sh
Normal file
@@ -0,0 +1,4 @@
|
||||
#!/bin/bash
|
||||
echo "Skip the test (this test is slow)"
|
||||
|
||||
# bash ./run_benchmark.sh
|
@@ -35,7 +35,7 @@ from transformers.utils import (
|
||||
replace_return_docstrings,
|
||||
)
|
||||
|
||||
from colossalai.kernel.extensions.pybind.flash_attention import HAS_FLASH_ATTN
|
||||
from colossalai.kernel.extensions.flash_attention import HAS_FLASH_ATTN
|
||||
from colossalai.kernel.triton.llama_act_combine_kernel import HAS_TRITON
|
||||
from colossalai.moe.layers import SparseMLP
|
||||
from colossalai.moe.manager import MOE_MANAGER
|
||||
|
Reference in New Issue
Block a user