community: Add keep_alive parameter to control how long the model w… (#19005)

Add `keep_alive` parameter to control how long the model will stay
loaded into memory with Ollama。

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
gonvee 2024-03-19 12:29:01 +08:00 committed by GitHub
parent bb0dd8f82f
commit b82644078e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 17 additions and 1 deletions

View File

@ -1,5 +1,5 @@
import json import json
from typing import Any, AsyncIterator, Dict, Iterator, List, Mapping, Optional from typing import Any, AsyncIterator, Dict, Iterator, List, Mapping, Optional, Union
import aiohttp import aiohttp
import requests import requests
@ -111,6 +111,18 @@ class _OllamaCommon(BaseLanguageModel):
timeout: Optional[int] = None timeout: Optional[int] = None
"""Timeout for the request stream""" """Timeout for the request stream"""
keep_alive: Optional[Union[int, str]] = None
"""How long the model will stay loaded into memory.
The parameter (Default: 5 minutes) can be set to:
1. a duration string in Golang (such as "10m" or "24h");
2. a number in seconds (such as 3600);
3. any negative number which will keep the model loaded \
in memory (e.g. -1 or "-1m");
4. 0 which will unload the model immediately after generating a response;
See the [Ollama documents](https://github.com/ollama/ollama/blob/main/docs/faq.md#how-do-i-keep-a-model-loaded-in-memory-or-make-it-unload-immediately)"""
headers: Optional[dict] = None headers: Optional[dict] = None
"""Additional headers to pass to endpoint (e.g. Authorization, Referer). """Additional headers to pass to endpoint (e.g. Authorization, Referer).
This is useful when Ollama is hosted on cloud services that require This is useful when Ollama is hosted on cloud services that require
@ -141,6 +153,7 @@ class _OllamaCommon(BaseLanguageModel):
}, },
"system": self.system, "system": self.system,
"template": self.template, "template": self.template,
"keep_alive": self.keep_alive,
} }
@property @property

View File

@ -100,6 +100,7 @@ def test_handle_kwargs_top_level_parameters(monkeypatch: MonkeyPatch) -> None:
"prompt": "Test prompt", "prompt": "Test prompt",
"system": "Test system prompt", "system": "Test system prompt",
"template": None, "template": None,
"keep_alive": None,
} }
assert stream is True assert stream is True
assert timeout == 300 assert timeout == 300
@ -147,6 +148,7 @@ def test_handle_kwargs_with_unknown_param(monkeypatch: MonkeyPatch) -> None:
"prompt": "Test prompt", "prompt": "Test prompt",
"system": None, "system": None,
"template": None, "template": None,
"keep_alive": None,
} }
assert stream is True assert stream is True
assert timeout == 300 assert timeout == 300
@ -178,6 +180,7 @@ def test_handle_kwargs_with_options(monkeypatch: MonkeyPatch) -> None:
"prompt": "Test prompt", "prompt": "Test prompt",
"system": None, "system": None,
"template": None, "template": None,
"keep_alive": None,
} }
assert stream is True assert stream is True
assert timeout == 300 assert timeout == 300