mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-01 17:17:05 +00:00
[Fix] Fix & Update Inference Tests (compatibility w/ main)
This commit is contained in:
@@ -4,7 +4,7 @@ from transformers.modeling_attn_mask_utils import AttentionMaskConverter
|
||||
from colossalai.inference.modeling.layers.attention import PagedAttention
|
||||
from colossalai.kernel.triton import context_attention_unpadded
|
||||
from colossalai.utils import get_current_device
|
||||
from tests.test_infer.test_ops.triton.kernel_utils import generate_caches_and_block_tables_v2, torch_attn_ref
|
||||
from tests.test_infer.test_kernels.triton.kernel_utils import generate_caches_and_block_tables_v2, torch_attn_ref
|
||||
|
||||
try:
|
||||
import triton # noqa
|
||||
|
@@ -2,14 +2,14 @@ import torch
|
||||
|
||||
from colossalai.kernel.triton import flash_decoding_attention
|
||||
from colossalai.utils import get_current_device
|
||||
from tests.test_infer.test_ops.triton.kernel_utils import (
|
||||
from tests.test_infer.test_kernels.triton.kernel_utils import (
|
||||
convert_kv_unpad_to_padded,
|
||||
create_attention_mask,
|
||||
generate_caches_and_block_tables_v2,
|
||||
generate_caches_and_block_tables_v3,
|
||||
torch_attn_ref,
|
||||
)
|
||||
from tests.test_infer.test_ops.triton.test_decoding_attn import prepare_data
|
||||
from tests.test_infer.test_kernels.triton.test_decoding_attn import prepare_data
|
||||
|
||||
try:
|
||||
import triton # noqa
|
||||
|
@@ -3,7 +3,7 @@ import torch
|
||||
from colossalai.kernel.kernel_loader import InferenceOpsLoader
|
||||
from colossalai.kernel.triton import flash_decoding_attention
|
||||
from colossalai.utils import get_current_device
|
||||
from tests.test_infer.test_ops.triton.kernel_utils import (
|
||||
from tests.test_infer.test_kernels.triton.kernel_utils import (
|
||||
generate_caches_and_block_tables_v2,
|
||||
generate_caches_and_block_tables_v3,
|
||||
generate_caches_and_block_tables_vllm,
|
||||
|
@@ -2,7 +2,7 @@ import torch
|
||||
|
||||
from colossalai.kernel.kernel_loader import InferenceOpsLoader
|
||||
from colossalai.kernel.triton import copy_kv_to_blocked_cache, decoding_fused_rotary_embedding, rotary_embedding
|
||||
from tests.test_infer.test_ops.triton.kernel_utils import (
|
||||
from tests.test_infer.test_kernels.triton.kernel_utils import (
|
||||
mock_alloc_block_table_and_kvcache_v2,
|
||||
mock_alloc_block_table_and_kvcache_v3,
|
||||
mock_alloc_single_token,
|
||||
|
@@ -4,8 +4,8 @@ from colossalai.inference.modeling.layers.attention import copy_to_cache
|
||||
from colossalai.kernel.kernel_loader import InferenceOpsLoader
|
||||
from colossalai.kernel.triton import copy_kv_to_blocked_cache
|
||||
from colossalai.utils import get_current_device
|
||||
from tests.test_infer.test_ops.cuda.test_kv_cache_memcpy import prepare_data as prepare_data_new_kcache_layout
|
||||
from tests.test_infer.test_ops.triton.test_kvcache_copy import prepare_data
|
||||
from tests.test_infer.test_kernels.cuda.test_kv_cache_memcpy import prepare_data as prepare_data_new_kcache_layout
|
||||
from tests.test_infer.test_kernels.triton.test_kvcache_copy import prepare_data
|
||||
|
||||
try:
|
||||
import triton # noqa
|
||||
|
@@ -1,7 +1,7 @@
|
||||
import torch
|
||||
|
||||
from colossalai.kernel.triton import get_xine_cache
|
||||
from tests.test_infer.test_ops.triton.test_xine_copy import get_cos_sin
|
||||
from tests.test_infer.test_kernels.triton.test_xine_copy import get_cos_sin
|
||||
|
||||
try:
|
||||
import triton # noqa
|
||||
|
Reference in New Issue
Block a user