From dc171221b32f218685f4493657d3683344553109 Mon Sep 17 00:00:00 2001
From: Changyong Um <e7217@naver.com>
Date: Wed, 30 Oct 2024 22:59:34 +0900
Subject: [PATCH] community[patch]: Fix vLLM integration to apply lora_request
 (#27731)

**Description:**
- Add the `lora_request` parameter to the VLLM class to support LoRA
model configurations. This enhancement allows users to specify LoRA
requests directly when using VLLM, enabling more flexible and efficient
model customization.

**Issue:**
- No existing issue for `lora_adapter` in VLLM. This PR addresses the
need for configuring LoRA requests within the VLLM framework.
- Reference : [Using LoRA Adapters in
vLLM](https://docs.vllm.ai/en/stable/models/lora.html#using-lora-adapters)


**Example Code :**
Before this change, the `lora_request` parameter was not applied
correctly:

```python
ADAPTER_PATH = "/path/of/lora_adapter"

llm = VLLM(model="Bllossom/llama-3.2-Korean-Bllossom-3B",
           max_new_tokens=512,
           top_k=2,
           top_p=0.90,
           temperature=0.1,
           vllm_kwargs={
               "gpu_memory_utilization":0.5,
               "enable_lora":True,
               "max_model_len":1024,
           }
)

print(llm.invoke(
    ["...prompt_content..."],
    lora_request=LoRARequest("lora_adapter", 1, ADAPTER_PATH)
    ))
```
**Before Change Output:**
```bash
response was not applied lora_request
```
So, I attempted to apply the lora_adapter to
langchain_community.llms.vllm.VLLM.

**current output:**
```bash
response applied lora_request
```

**Dependencies:**
- None

**Lint and test:**
- All tests and lint checks have passed.

---------

Co-authored-by: Um Changyong <changyong.um@sfa.co.kr>
---
 libs/community/langchain_community/llms/vllm.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/libs/community/langchain_community/llms/vllm.py b/libs/community/langchain_community/llms/vllm.py
index dc8a7a76d24..66a0f17756b 100644
--- a/libs/community/langchain_community/llms/vllm.py
+++ b/libs/community/langchain_community/llms/vllm.py
@@ -125,6 +125,8 @@ class VLLM(BaseLLM):
         """Run the LLM on the given prompt and input."""
         from vllm import SamplingParams
 
+        lora_request = kwargs.pop("lora_request", None)
+
         # build sampling parameters
         params = {**self._default_params, **kwargs, "stop": stop}
 
@@ -135,7 +137,12 @@ class VLLM(BaseLLM):
         )
 
         # call the model
-        outputs = self.client.generate(prompts, sample_params)
+        if lora_request:
+            outputs = self.client.generate(
+                prompts, sample_params, lora_request=lora_request
+            )
+        else:
+            outputs = self.client.generate(prompts, sample_params)
 
         generations = []
         for output in outputs: