fix: Adding an LLM param to fix broken generator from llamacpp (#1519)

2025-07-05 11:36:44 +00:00 · 2024-01-17 12:10:45 -05:00 · 2024-01-17 12:10:45 -05:00 · 869233f0e4
commit 869233f0e4
parent e326126d0d
1 changed files with 1 additions and 1 deletions
--- a/private_gpt/components/llm/llm_component.py
+++ b/private_gpt/components/llm/llm_component.py
@ -42,7 +42,7 @@ class LLMComponent:
                    context_window=settings.llm.context_window,
                    generate_kwargs={},
                    # All to GPU
-                    model_kwargs={"n_gpu_layers": -1},
+                    model_kwargs={"n_gpu_layers": -1, "offload_kqv": True},
                    # transform inputs into Llama2 format
                    messages_to_prompt=prompt_style.messages_to_prompt,
                    completion_to_prompt=prompt_style.completion_to_prompt,