mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-09 04:50:17 +00:00
fix bugs in request_handler.py and engine.py
This commit is contained in:
committed by
FrankLeeeee
parent
10e3c9f923
commit
d40eb26029
@@ -58,7 +58,12 @@ class KVCacheManager:
|
||||
# Parallel settings
|
||||
self.tp_size = config.tp_size
|
||||
# Model settings
|
||||
self.dtype = config.dtype
|
||||
if config.dtype == "fp32" or config.dtype == torch.float32:
|
||||
self.dtype = torch.float32
|
||||
elif config.dtype == "fp16" or config.dtype == torch.float16:
|
||||
self.dtype = torch.float16
|
||||
else:
|
||||
self.dtype = torch.bfloat16
|
||||
self.elem_size_in_bytes = torch.tensor([], dtype=self.dtype).element_size()
|
||||
self.num_layers = get_model_config_attr(model_config, "num_hidden_layers")
|
||||
# For now we focus on MHA only, TODO add handling for MQA and GQA
|
||||
|
Reference in New Issue
Block a user