diff --git a/docs/docs/integrations/llm_caching.ipynb b/docs/docs/integrations/llm_caching.ipynb index ee5152e023f..4ba1901613a 100644 --- a/docs/docs/integrations/llm_caching.ipynb +++ b/docs/docs/integrations/llm_caching.ipynb @@ -2368,6 +2368,102 @@ ")" ] }, + { + "cell_type": "markdown", + "id": "7e6b9b1a", + "metadata": {}, + "source": [ + "## `Memcached` Cache\n", + "You can use [Memcached](https://www.memcached.org/) as a cache to cache prompts and responses through [pymemcache](https://github.com/pinterest/pymemcache).\n", + "\n", + "This cache requires the pymemcache dependency to be installed:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "b2e5e0b1", + "metadata": {}, + "outputs": [], + "source": [ + "%pip install -qU pymemcache" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "4c7ffe37", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.cache import MemcachedCache\n", + "from pymemcache.client.base import Client\n", + "\n", + "set_llm_cache(MemcachedCache(Client(\"localhost\")))" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "a4cfc48a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 32.8 ms, sys: 21 ms, total: 53.8 ms\n", + "Wall time: 343 ms\n" + ] + }, + { + "data": { + "text/plain": [ + "'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side!'" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "# The first time, it is not yet in cache, so it should take longer\n", + "llm.invoke(\"Tell me a joke\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "cb3b2bf5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 2.31 ms, sys: 850 µs, total: 3.16 ms\n", + "Wall time: 6.43 ms\n" + ] + }, + { + "data": { + "text/plain": [ + "'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side!'" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "# The second time it is, so it goes faster\n", + "llm.invoke(\"Tell me a joke\")" + ] + }, { "cell_type": "markdown", "id": "7019c991-0101-4f9c-b212-5729a5471293", diff --git a/docs/docs/integrations/providers/memcached.mdx b/docs/docs/integrations/providers/memcached.mdx new file mode 100644 index 00000000000..f7719deda40 --- /dev/null +++ b/docs/docs/integrations/providers/memcached.mdx @@ -0,0 +1,34 @@ +# Memcached + +> [Memcached](https://www.memcached.org/) is a free & open source, high-performance, distributed memory object caching system, +> generic in nature, but intended for use in speeding up dynamic web applications by alleviating database load. + +This page covers how to use Memcached with langchain, using [pymemcache](https://github.com/pinterest/pymemcache) as +a client to connect to an already running Memcached instance. + +## Installation and Setup +```bash +pip install pymemcache +``` + +## LLM Cache + +To integrate a Memcached Cache into your application: +```python3 +from langchain.globals import set_llm_cache +from langchain_openai import OpenAI + +from langchain_community.cache import MemcachedCache +from pymemcache.client.base import Client + +llm = OpenAI(model="gpt-3.5-turbo-instruct", n=2, best_of=2) +set_llm_cache(MemcachedCache(Client('localhost'))) + +# The first time, it is not yet in cache, so it should take longer +llm.invoke("Which city is the most crowded city in the USA?") + +# The second time it is, so it goes faster +llm.invoke("Which city is the most crowded city in the USA?") +``` + +Learn more in the [example notebook](/docs/integrations/llm_caching#memcached-cache) \ No newline at end of file diff --git a/libs/community/langchain_community/cache.py b/libs/community/langchain_community/cache.py index c074747370c..697c26ed872 100644 --- a/libs/community/langchain_community/cache.py +++ b/libs/community/langchain_community/cache.py @@ -91,6 +91,7 @@ logger = logging.getLogger(__file__) if TYPE_CHECKING: import momento + import pymemcache from astrapy.db import AstraDB, AsyncAstraDB from cassandra.cluster import Session as CassandraSession @@ -2599,3 +2600,96 @@ class SingleStoreDBSemanticCache(BaseCache): if index_name in self._cache_dict: self._cache_dict[index_name].drop() del self._cache_dict[index_name] + + +class MemcachedCache(BaseCache): + """Cache that uses Memcached backend through pymemcache client lib""" + + def __init__(self, client_: Any): + """ + Initialize an instance of MemcachedCache. + + Args: + client_ (str): An instance of any of pymemcache's Clients + (Client, PooledClient, HashClient) + Example: + .. code-block:: python + ifrom langchain.globals import set_llm_cache + from langchain_openai import OpenAI + + from langchain_community.cache import MemcachedCache + from pymemcache.client.base import Client + + llm = OpenAI(model="gpt-3.5-turbo-instruct", n=2, best_of=2) + set_llm_cache(MemcachedCache(Client('localhost'))) + + # The first time, it is not yet in cache, so it should take longer + llm.invoke("Which city is the most crowded city in the USA?") + + # The second time it is, so it goes faster + llm.invoke("Which city is the most crowded city in the USA?") + """ + + try: + from pymemcache.client import ( + Client, + HashClient, + PooledClient, + RetryingClient, + ) + except (ImportError, ModuleNotFoundError): + raise ImportError( + "Could not import pymemcache python package. " + "Please install it with `pip install -U pymemcache`." + ) + + if not ( + isinstance(client_, Client) + or isinstance(client_, PooledClient) + or isinstance(client_, HashClient) + or isinstance(client_, RetryingClient) + ): + raise ValueError("Please pass a valid pymemcached client") + + self.client = client_ + + def lookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYPE]: + """Look up based on prompt and llm_string.""" + key = _hash(prompt + llm_string) + try: + result = self.client.get(key) + except pymemcache.MemcacheError: + return None + + return _loads_generations(result) if result is not None else None + + def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> None: + """Update cache based on prompt and llm_string.""" + key = _hash(prompt + llm_string) + + # Validate input is made of standard LLM generations + for gen in return_val: + if not isinstance(gen, Generation): + raise ValueError( + "Memcached only supports caching of normal LLM generations, " + + f"got {type(gen)}" + ) + + # Deserialize return_val into string and update cache + value = _dumps_generations(return_val) + self.client.set(key, value) + + def clear(self, **kwargs: Any) -> None: + """ + Clear the entire cache. Takes optional kwargs: + + delay: optional int, the number of seconds to wait before flushing, + or zero to flush immediately (the default). NON-BLOCKING, returns + immediately. + noreply: optional bool, True to not wait for the reply (defaults to + client.default_noreply). + """ + delay = kwargs.get("delay", 0) + noreply = kwargs.get("noreply", None) + + self.client.flush_all(delay, noreply) diff --git a/libs/community/tests/integration_tests/cache/test_memcached_cache.py b/libs/community/tests/integration_tests/cache/test_memcached_cache.py new file mode 100644 index 00000000000..2aca3df0566 --- /dev/null +++ b/libs/community/tests/integration_tests/cache/test_memcached_cache.py @@ -0,0 +1,61 @@ +""" +Test Memcached llm cache functionality. Requires running instance of Memcached on +localhost default port (11211) and pymemcache +""" + +import pytest +from langchain.globals import get_llm_cache, set_llm_cache +from langchain_core.outputs import Generation, LLMResult + +from langchain_community.cache import MemcachedCache +from tests.unit_tests.llms.fake_llm import FakeLLM + +DEFAULT_MEMCACHED_URL = "localhost" + + +@pytest.mark.requires("pymemcache") +def test_memcached_cache() -> None: + """Test general Memcached caching""" + from pymemcache import Client + + set_llm_cache(MemcachedCache(Client(DEFAULT_MEMCACHED_URL))) + llm = FakeLLM() + + params = llm.dict() + params["stop"] = None + llm_string = str(sorted([(k, v) for k, v in params.items()])) + get_llm_cache().update("foo", llm_string, [Generation(text="fizz")]) + output = llm.generate(["foo"]) + expected_output = LLMResult( + generations=[[Generation(text="fizz")]], + llm_output={}, + ) + assert output == expected_output + # clear the cache + get_llm_cache().clear() + + +@pytest.mark.requires("pymemcache") +def test_memcached_cache_flush() -> None: + """Test flushing Memcached cache""" + from pymemcache import Client + + set_llm_cache(MemcachedCache(Client(DEFAULT_MEMCACHED_URL))) + llm = FakeLLM() + + params = llm.dict() + params["stop"] = None + llm_string = str(sorted([(k, v) for k, v in params.items()])) + get_llm_cache().update("foo", llm_string, [Generation(text="fizz")]) + output = llm.generate(["foo"]) + expected_output = LLMResult( + generations=[[Generation(text="fizz")]], + llm_output={}, + ) + assert output == expected_output + # clear the cache + get_llm_cache().clear(delay=0, noreply=False) + + # After cache has been cleared, the result shouldn't be the same + output = llm.generate(["foo"]) + assert output != expected_output