llamacpp wrong default value passed for f16_kv (#3320)

Fixes default f16_kv value in llamacpp; corrects incorrect parameter passed. See: ba3959eafd/llama_cpp/llama.py (L33) Fixes #3241 Fixes #3301
2025-08-23 03:22:38 +00:00 · 2023-04-23 02:46:55 +01:00 · 2023-04-23 02:46:55 +01:00 · 77bb6c99f7
commit 77bb6c99f7
parent 3a1bdce3f5
1 changed files with 3 additions and 3 deletions
--- a/langchain/llms/llamacpp.py
+++ b/langchain/llms/llamacpp.py
@ -31,13 +31,13 @@ class LlamaCpp(LLM):
    """Token context window."""

    n_parts: int = Field(-1, alias="n_parts")
-    """Number of parts to split the model into. 
+    """Number of parts to split the model into.
    If -1, the number of parts is automatically determined."""

    seed: int = Field(-1, alias="seed")
    """Seed. If -1, a random seed is used."""

-    f16_kv: bool = Field(False, alias="f16_kv")
+    f16_kv: bool = Field(True, alias="f16_kv")
    """Use half-precision for key/value cache."""

    logits_all: bool = Field(False, alias="logits_all")
@ -50,7 +50,7 @@ class LlamaCpp(LLM):
    """Force system to keep model in RAM."""

    n_threads: Optional[int] = Field(None, alias="n_threads")
-    """Number of threads to use. 
+    """Number of threads to use.
    If None, the number of threads is automatically determined."""

    n_batch: Optional[int] = Field(8, alias="n_batch")