From dc171221b32f218685f4493657d3683344553109 Mon Sep 17 00:00:00 2001 From: Changyong Um Date: Wed, 30 Oct 2024 22:59:34 +0900 Subject: [PATCH] community[patch]: Fix vLLM integration to apply lora_request (#27731) **Description:** - Add the `lora_request` parameter to the VLLM class to support LoRA model configurations. This enhancement allows users to specify LoRA requests directly when using VLLM, enabling more flexible and efficient model customization. **Issue:** - No existing issue for `lora_adapter` in VLLM. This PR addresses the need for configuring LoRA requests within the VLLM framework. - Reference : [Using LoRA Adapters in vLLM](https://docs.vllm.ai/en/stable/models/lora.html#using-lora-adapters) **Example Code :** Before this change, the `lora_request` parameter was not applied correctly: ```python ADAPTER_PATH = "/path/of/lora_adapter" llm = VLLM(model="Bllossom/llama-3.2-Korean-Bllossom-3B", max_new_tokens=512, top_k=2, top_p=0.90, temperature=0.1, vllm_kwargs={ "gpu_memory_utilization":0.5, "enable_lora":True, "max_model_len":1024, } ) print(llm.invoke( ["...prompt_content..."], lora_request=LoRARequest("lora_adapter", 1, ADAPTER_PATH) )) ``` **Before Change Output:** ```bash response was not applied lora_request ``` So, I attempted to apply the lora_adapter to langchain_community.llms.vllm.VLLM. **current output:** ```bash response applied lora_request ``` **Dependencies:** - None **Lint and test:** - All tests and lint checks have passed. --------- Co-authored-by: Um Changyong --- libs/community/langchain_community/llms/vllm.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/libs/community/langchain_community/llms/vllm.py b/libs/community/langchain_community/llms/vllm.py index dc8a7a76d24..66a0f17756b 100644 --- a/libs/community/langchain_community/llms/vllm.py +++ b/libs/community/langchain_community/llms/vllm.py @@ -125,6 +125,8 @@ class VLLM(BaseLLM): """Run the LLM on the given prompt and input.""" from vllm import SamplingParams + lora_request = kwargs.pop("lora_request", None) + # build sampling parameters params = {**self._default_params, **kwargs, "stop": stop} @@ -135,7 +137,12 @@ class VLLM(BaseLLM): ) # call the model - outputs = self.client.generate(prompts, sample_params) + if lora_request: + outputs = self.client.generate( + prompts, sample_params, lora_request=lora_request + ) + else: + outputs = self.client.generate(prompts, sample_params) generations = [] for output in outputs: