Allow specifying dtype in langchain.llms.VLLM (#9635)

- Description: add `dtype` argument for VLLM 
  - Issue: #9593 
  - Dependencies: none
  - Tag maintainer: @hwchase17, @baskaryan
This commit is contained in:
Gabriel Fu 2023-08-23 11:21:56 +08:00 committed by GitHub
parent 900c1f3e8d
commit b2d9970fc1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -59,6 +59,9 @@ class VLLM(BaseLLM):
logprobs: Optional[int] = None logprobs: Optional[int] = None
"""Number of log probabilities to return per output token.""" """Number of log probabilities to return per output token."""
dtype: str = "auto"
"""The data type for the model weights and activations."""
client: Any #: :meta private: client: Any #: :meta private:
@root_validator() @root_validator()
@ -77,6 +80,7 @@ class VLLM(BaseLLM):
model=values["model"], model=values["model"],
tensor_parallel_size=values["tensor_parallel_size"], tensor_parallel_size=values["tensor_parallel_size"],
trust_remote_code=values["trust_remote_code"], trust_remote_code=values["trust_remote_code"],
dtype=values["dtype"],
) )
return values return values