[doc] updated inference readme (#5343)

This commit is contained in:
Frank Lee
2024-02-02 14:31:10 +08:00
committed by GitHub
parent e76acbb076
commit 027aa1043f
10 changed files with 82 additions and 33 deletions

View File

@@ -1,5 +1,7 @@
from typing import Any
__all__ = ["CacheBlock"]
class CacheBlock:
"""A simplified version of logical cache block used for Paged Attention."""

View File

@@ -10,6 +10,8 @@ from colossalai.utils import get_current_device
from .block_cache import CacheBlock
__all__ = ["KVCacheManager"]
GIGABYTE = 1024**3