community[patch]: Added support for Ollama's num_predict option in ChatOllama (#16633)

Just a simple default addition to the options payload for a ollama generate call to support a max_new_tokens parameter. Should fix issue: https://github.com/langchain-ai/langchain/issues/14715
2025-09-13 21:47:12 +00:00 · 2024-01-26 17:00:19 -06:00
parent 6a75ef74ca
commit 6543e585a5
2 changed files with 7 additions and 0 deletions
--- a/libs/community/langchain_community/llms/ollama.py
+++ b/libs/community/langchain_community/llms/ollama.py
@@ -64,6 +64,10 @@ class _OllamaCommon(BaseLanguageModel):
    It is recommended to set this value to the number of physical
    CPU cores your system has (as opposed to the logical number of cores)."""

+    num_predict: Optional[int] = None
+    """Maximum number of tokens to predict when generating text. 
+    (Default: 128, -1 = infinite generation, -2 = fill context)"""
+
    repeat_last_n: Optional[int] = None
    """Sets how far back for the model to look back to prevent
    repetition. (Default: 64, 0 = disabled, -1 = num_ctx)"""
@@ -126,6 +130,7 @@ class _OllamaCommon(BaseLanguageModel):
                "num_ctx": self.num_ctx,
                "num_gpu": self.num_gpu,
                "num_thread": self.num_thread,
+                "num_predict": self.num_predict,
                "repeat_last_n": self.repeat_last_n,
                "repeat_penalty": self.repeat_penalty,
                "temperature": self.temperature,