mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-26 16:43:35 +00:00
Allow specifying dtype in langchain.llms.VLLM
(#9635)
- Description: add `dtype` argument for VLLM - Issue: #9593 - Dependencies: none - Tag maintainer: @hwchase17, @baskaryan
This commit is contained in:
parent
900c1f3e8d
commit
b2d9970fc1
@ -59,6 +59,9 @@ class VLLM(BaseLLM):
|
|||||||
logprobs: Optional[int] = None
|
logprobs: Optional[int] = None
|
||||||
"""Number of log probabilities to return per output token."""
|
"""Number of log probabilities to return per output token."""
|
||||||
|
|
||||||
|
dtype: str = "auto"
|
||||||
|
"""The data type for the model weights and activations."""
|
||||||
|
|
||||||
client: Any #: :meta private:
|
client: Any #: :meta private:
|
||||||
|
|
||||||
@root_validator()
|
@root_validator()
|
||||||
@ -77,6 +80,7 @@ class VLLM(BaseLLM):
|
|||||||
model=values["model"],
|
model=values["model"],
|
||||||
tensor_parallel_size=values["tensor_parallel_size"],
|
tensor_parallel_size=values["tensor_parallel_size"],
|
||||||
trust_remote_code=values["trust_remote_code"],
|
trust_remote_code=values["trust_remote_code"],
|
||||||
|
dtype=values["dtype"],
|
||||||
)
|
)
|
||||||
|
|
||||||
return values
|
return values
|
||||||
|
Loading…
Reference in New Issue
Block a user