[Hotfix] Fix accuracy and align attention method api with Triton kernel (#5229)

* fix accuracy

* alignment in attention

* fix attention

* fix

* fix bugs

* fix bugs

* fix bugs
This commit is contained in:
Jianghai
2024-01-08 15:56:00 +08:00
committed by FrankLeeeee
parent fa4fbdbffb
commit e545a871b8
6 changed files with 168 additions and 107 deletions

View File

@@ -8,7 +8,7 @@ import colossalai
from colossalai.inference.config import InferenceConfig
from colossalai.inference.kv_cache import CacheBlock, KVCacheManager
from colossalai.logging import disable_existing_loggers
from colossalai.testing import parameterize, spawn
from colossalai.testing import parameterize, rerun_if_address_is_in_use, spawn
@parameterize(
@@ -155,6 +155,7 @@ def run_dist(rank, world_size, port):
@pytest.mark.dist
@rerun_if_address_is_in_use()
def test_cache_manager():
spawn(run_dist, 1)