From b2d9970fc1ae93610ab4e3d07d1796f8174fdf43 Mon Sep 17 00:00:00 2001 From: Gabriel Fu Date: Wed, 23 Aug 2023 11:21:56 +0800 Subject: [PATCH] Allow specifying dtype in `langchain.llms.VLLM` (#9635) - Description: add `dtype` argument for VLLM - Issue: #9593 - Dependencies: none - Tag maintainer: @hwchase17, @baskaryan --- libs/langchain/langchain/llms/vllm.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/libs/langchain/langchain/llms/vllm.py b/libs/langchain/langchain/llms/vllm.py index bf82c8c2149..9f456fde514 100644 --- a/libs/langchain/langchain/llms/vllm.py +++ b/libs/langchain/langchain/llms/vllm.py @@ -59,6 +59,9 @@ class VLLM(BaseLLM): logprobs: Optional[int] = None """Number of log probabilities to return per output token.""" + dtype: str = "auto" + """The data type for the model weights and activations.""" + client: Any #: :meta private: @root_validator() @@ -77,6 +80,7 @@ class VLLM(BaseLLM): model=values["model"], tensor_parallel_size=values["tensor_parallel_size"], trust_remote_code=values["trust_remote_code"], + dtype=values["dtype"], ) return values