mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-13 21:47:12 +00:00
community[patch]: Added support for Ollama's num_predict option in ChatOllama (#16633)
Just a simple default addition to the options payload for a ollama generate call to support a max_new_tokens parameter. Should fix issue: https://github.com/langchain-ai/langchain/issues/14715
This commit is contained in:
@@ -64,6 +64,10 @@ class _OllamaCommon(BaseLanguageModel):
|
||||
It is recommended to set this value to the number of physical
|
||||
CPU cores your system has (as opposed to the logical number of cores)."""
|
||||
|
||||
num_predict: Optional[int] = None
|
||||
"""Maximum number of tokens to predict when generating text.
|
||||
(Default: 128, -1 = infinite generation, -2 = fill context)"""
|
||||
|
||||
repeat_last_n: Optional[int] = None
|
||||
"""Sets how far back for the model to look back to prevent
|
||||
repetition. (Default: 64, 0 = disabled, -1 = num_ctx)"""
|
||||
@@ -126,6 +130,7 @@ class _OllamaCommon(BaseLanguageModel):
|
||||
"num_ctx": self.num_ctx,
|
||||
"num_gpu": self.num_gpu,
|
||||
"num_thread": self.num_thread,
|
||||
"num_predict": self.num_predict,
|
||||
"repeat_last_n": self.repeat_last_n,
|
||||
"repeat_penalty": self.repeat_penalty,
|
||||
"temperature": self.temperature,
|
||||
|
Reference in New Issue
Block a user