[Fix] Fix Inference Example, Tests, and Requirements (#5688)

* clean requirements

* modify example inference struct

* add test ci scripts

* mark test_infer as submodule

* rm deprecated cls & deps

* import of HAS_FLASH_ATTN

* prune inference tests to be run

* prune triton kernel tests

* increment pytest timeout mins

* revert import path in openmoe
This commit is contained in:
Yuanheng Zhao
2024-05-08 11:30:15 +08:00
committed by GitHub
parent f9afe0addd
commit 55cc7f3df7
23 changed files with 46 additions and 328 deletions

View File

@@ -182,7 +182,7 @@ def benchmark_inference(args):
def inference(rank, world_size, port, args):
colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
benchmark_inference(args)

View File

@@ -17,7 +17,7 @@ def infer(args):
# ==============================
# Launch colossalai, setup distributed environment
# ==============================
colossalai.launch_from_torch(config={})
colossalai.launch_from_torch()
coordinator = DistCoordinator()
# ==============================
@@ -59,7 +59,7 @@ def infer(args):
coordinator.print_on_master(out[0])
# colossalai run --nproc_per_node 1 llama_gen.py -m MODEL_PATH
# colossalai run --nproc_per_node 1 llama_generation.py -m MODEL_PATH
if __name__ == "__main__":
# ==============================
# Parse Arguments

View File

@@ -0,0 +1,4 @@
#!/bin/bash
echo "Skip the test (this test is slow)"
# bash ./run_benchmark.sh

View File

@@ -35,7 +35,7 @@ from transformers.utils import (
replace_return_docstrings,
)
from colossalai.kernel.extensions.pybind.flash_attention import HAS_FLASH_ATTN
from colossalai.kernel.extensions.flash_attention import HAS_FLASH_ATTN
from colossalai.kernel.triton.llama_act_combine_kernel import HAS_TRITON
from colossalai.moe.layers import SparseMLP
from colossalai.moe.manager import MOE_MANAGER