mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-03 18:19:58 +00:00
[inference]Add alibi to flash attn function (#5678)
* add alibi to flash attn function * rm redundant modifications
This commit is contained in:
@@ -121,9 +121,7 @@ class InferenceEngine:
|
||||
casuallm = _supported_models[arch](hf_config)
|
||||
if isinstance(casuallm, AutoModelForCausalLM):
|
||||
# NOTE(caidi) It's necessary to add half() here, otherwise baichuan13B will overflow the memory.
|
||||
model = (
|
||||
AutoModelForCausalLM.from_pretrained(model_or_path, trust_remote_code=True).half().cuda()
|
||||
)
|
||||
model = AutoModelForCausalLM.from_pretrained(model_or_path, trust_remote_code=True).half()
|
||||
else:
|
||||
model = _supported_models[arch](hf_config)
|
||||
else:
|
||||
|
Reference in New Issue
Block a user