From 53b0a99f3769d83e8aaf2b0382937bc91eae79b7 Mon Sep 17 00:00:00 2001
From: Dmitriy Prokopchuk <87666671+prokopchukdim@users.noreply.github.com>
Date: Wed, 6 Nov 2024 22:07:59 -0500
Subject: [PATCH] community: Memcached LLM Cache Integration (#27323)

## Description
This PR adds support for Memcached as a usable LLM model cache by adding
the ```MemcachedCache``` implementation relying on the
[pymemcache](https://github.com/pinterest/pymemcache) client.

Unit test-wise, the new integration is generally covered under existing
import testing. All new functionality depends on pymemcache if
instantiated and used, so to comply with the other cache implementations
the PR also adds optional integration tests for ```MemcachedCache```.

Since this is a new integration, documentation is added for Memcached as
an integration and as an LLM Cache.

## Issue
This PR closes #27275 which was originally raised as a discussion in
#27035

## Dependencies
There are no new required dependencies for langchain, but
[pymemcache](https://github.com/pinterest/pymemcache) is required to
instantiate the new ```MemcachedCache```.

## Example Usage
```python3
from langchain.globals import set_llm_cache
from langchain_openai import OpenAI

from langchain_community.cache import MemcachedCache
from pymemcache.client.base import Client

llm = OpenAI(model="gpt-3.5-turbo-instruct", n=2, best_of=2)
set_llm_cache(MemcachedCache(Client('localhost')))

# The first time, it is not yet in cache, so it should take longer
llm.invoke("Which city is the most crowded city in the USA?")

# The second time it is, so it goes faster
llm.invoke("Which city is the most crowded city in the USA?")
```

---------

Co-authored-by: Erick Friis <erick@langchain.dev>
---
 docs/docs/integrations/llm_caching.ipynb      | 96 +++++++++++++++++++
 .../docs/integrations/providers/memcached.mdx | 34 +++++++
 libs/community/langchain_community/cache.py   | 94 ++++++++++++++++++
 .../cache/test_memcached_cache.py             | 61 ++++++++++++
 4 files changed, 285 insertions(+)
 create mode 100644 docs/docs/integrations/providers/memcached.mdx
 create mode 100644 libs/community/tests/integration_tests/cache/test_memcached_cache.py

diff --git a/docs/docs/integrations/llm_caching.ipynb b/docs/docs/integrations/llm_caching.ipynb
index ee5152e023f..4ba1901613a 100644
--- a/docs/docs/integrations/llm_caching.ipynb
+++ b/docs/docs/integrations/llm_caching.ipynb
@@ -2368,6 +2368,102 @@
     ")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "7e6b9b1a",
+   "metadata": {},
+   "source": [
+    "## `Memcached` Cache\n",
+    "You can use [Memcached](https://www.memcached.org/) as a cache to cache prompts and responses through [pymemcache](https://github.com/pinterest/pymemcache).\n",
+    "\n",
+    "This cache requires the pymemcache dependency to be installed:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "b2e5e0b1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install -qU pymemcache"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "4c7ffe37",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_community.cache import MemcachedCache\n",
+    "from pymemcache.client.base import Client\n",
+    "\n",
+    "set_llm_cache(MemcachedCache(Client(\"localhost\")))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "a4cfc48a",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CPU times: user 32.8 ms, sys: 21 ms, total: 53.8 ms\n",
+      "Wall time: 343 ms\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side!'"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%time\n",
+    "# The first time, it is not yet in cache, so it should take longer\n",
+    "llm.invoke(\"Tell me a joke\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "cb3b2bf5",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CPU times: user 2.31 ms, sys: 850 µs, total: 3.16 ms\n",
+      "Wall time: 6.43 ms\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side!'"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%time\n",
+    "# The second time it is, so it goes faster\n",
+    "llm.invoke(\"Tell me a joke\")"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "7019c991-0101-4f9c-b212-5729a5471293",
diff --git a/docs/docs/integrations/providers/memcached.mdx b/docs/docs/integrations/providers/memcached.mdx
new file mode 100644
index 00000000000..f7719deda40
--- /dev/null
+++ b/docs/docs/integrations/providers/memcached.mdx
@@ -0,0 +1,34 @@
+# Memcached
+
+> [Memcached](https://www.memcached.org/) is a free & open source, high-performance, distributed memory object caching system,
+> generic in nature, but intended for use in speeding up dynamic web applications by alleviating database load.
+
+This page covers how to use Memcached with langchain, using [pymemcache](https://github.com/pinterest/pymemcache) as
+a client to connect to an already running Memcached instance.
+
+## Installation and Setup
+```bash
+pip install pymemcache
+```
+
+## LLM Cache
+
+To integrate a Memcached Cache into your application:
+```python3
+from langchain.globals import set_llm_cache
+from langchain_openai import OpenAI
+
+from langchain_community.cache import MemcachedCache
+from pymemcache.client.base import Client
+
+llm = OpenAI(model="gpt-3.5-turbo-instruct", n=2, best_of=2)
+set_llm_cache(MemcachedCache(Client('localhost')))
+
+# The first time, it is not yet in cache, so it should take longer
+llm.invoke("Which city is the most crowded city in the USA?")
+
+# The second time it is, so it goes faster
+llm.invoke("Which city is the most crowded city in the USA?")
+```
+
+Learn more in the [example notebook](/docs/integrations/llm_caching#memcached-cache)
\ No newline at end of file
diff --git a/libs/community/langchain_community/cache.py b/libs/community/langchain_community/cache.py
index c074747370c..697c26ed872 100644
--- a/libs/community/langchain_community/cache.py
+++ b/libs/community/langchain_community/cache.py
@@ -91,6 +91,7 @@ logger = logging.getLogger(__file__)
 
 if TYPE_CHECKING:
     import momento
+    import pymemcache
     from astrapy.db import AstraDB, AsyncAstraDB
     from cassandra.cluster import Session as CassandraSession
 
@@ -2599,3 +2600,96 @@ class SingleStoreDBSemanticCache(BaseCache):
         if index_name in self._cache_dict:
             self._cache_dict[index_name].drop()
             del self._cache_dict[index_name]
+
+
+class MemcachedCache(BaseCache):
+    """Cache that uses Memcached backend through pymemcache client lib"""
+
+    def __init__(self, client_: Any):
+        """
+        Initialize an instance of MemcachedCache.
+
+        Args:
+            client_ (str): An instance of any of pymemcache's Clients
+                (Client, PooledClient, HashClient)
+        Example:
+        .. code-block:: python
+            ifrom langchain.globals import set_llm_cache
+            from langchain_openai import OpenAI
+
+            from langchain_community.cache import MemcachedCache
+            from pymemcache.client.base import Client
+
+            llm = OpenAI(model="gpt-3.5-turbo-instruct", n=2, best_of=2)
+            set_llm_cache(MemcachedCache(Client('localhost')))
+
+            # The first time, it is not yet in cache, so it should take longer
+            llm.invoke("Which city is the most crowded city in the USA?")
+
+            # The second time it is, so it goes faster
+            llm.invoke("Which city is the most crowded city in the USA?")
+        """
+
+        try:
+            from pymemcache.client import (
+                Client,
+                HashClient,
+                PooledClient,
+                RetryingClient,
+            )
+        except (ImportError, ModuleNotFoundError):
+            raise ImportError(
+                "Could not import pymemcache python package. "
+                "Please install it with `pip install -U pymemcache`."
+            )
+
+        if not (
+            isinstance(client_, Client)
+            or isinstance(client_, PooledClient)
+            or isinstance(client_, HashClient)
+            or isinstance(client_, RetryingClient)
+        ):
+            raise ValueError("Please pass a valid pymemcached client")
+
+        self.client = client_
+
+    def lookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYPE]:
+        """Look up based on prompt and llm_string."""
+        key = _hash(prompt + llm_string)
+        try:
+            result = self.client.get(key)
+        except pymemcache.MemcacheError:
+            return None
+
+        return _loads_generations(result) if result is not None else None
+
+    def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> None:
+        """Update cache based on prompt and llm_string."""
+        key = _hash(prompt + llm_string)
+
+        # Validate input is made of standard LLM generations
+        for gen in return_val:
+            if not isinstance(gen, Generation):
+                raise ValueError(
+                    "Memcached only supports caching of normal LLM generations, "
+                    + f"got {type(gen)}"
+                )
+
+        # Deserialize return_val into string and update cache
+        value = _dumps_generations(return_val)
+        self.client.set(key, value)
+
+    def clear(self, **kwargs: Any) -> None:
+        """
+        Clear the entire cache. Takes optional kwargs:
+
+        delay: optional int, the number of seconds to wait before flushing,
+                or zero to flush immediately (the default). NON-BLOCKING, returns
+                immediately.
+        noreply: optional bool, True to not wait for the reply (defaults to
+                client.default_noreply).
+        """
+        delay = kwargs.get("delay", 0)
+        noreply = kwargs.get("noreply", None)
+
+        self.client.flush_all(delay, noreply)
diff --git a/libs/community/tests/integration_tests/cache/test_memcached_cache.py b/libs/community/tests/integration_tests/cache/test_memcached_cache.py
new file mode 100644
index 00000000000..2aca3df0566
--- /dev/null
+++ b/libs/community/tests/integration_tests/cache/test_memcached_cache.py
@@ -0,0 +1,61 @@
+"""
+Test Memcached llm cache functionality. Requires running instance of Memcached on
+localhost default port (11211) and pymemcache
+"""
+
+import pytest
+from langchain.globals import get_llm_cache, set_llm_cache
+from langchain_core.outputs import Generation, LLMResult
+
+from langchain_community.cache import MemcachedCache
+from tests.unit_tests.llms.fake_llm import FakeLLM
+
+DEFAULT_MEMCACHED_URL = "localhost"
+
+
+@pytest.mark.requires("pymemcache")
+def test_memcached_cache() -> None:
+    """Test general Memcached caching"""
+    from pymemcache import Client
+
+    set_llm_cache(MemcachedCache(Client(DEFAULT_MEMCACHED_URL)))
+    llm = FakeLLM()
+
+    params = llm.dict()
+    params["stop"] = None
+    llm_string = str(sorted([(k, v) for k, v in params.items()]))
+    get_llm_cache().update("foo", llm_string, [Generation(text="fizz")])
+    output = llm.generate(["foo"])
+    expected_output = LLMResult(
+        generations=[[Generation(text="fizz")]],
+        llm_output={},
+    )
+    assert output == expected_output
+    # clear the cache
+    get_llm_cache().clear()
+
+
+@pytest.mark.requires("pymemcache")
+def test_memcached_cache_flush() -> None:
+    """Test flushing Memcached cache"""
+    from pymemcache import Client
+
+    set_llm_cache(MemcachedCache(Client(DEFAULT_MEMCACHED_URL)))
+    llm = FakeLLM()
+
+    params = llm.dict()
+    params["stop"] = None
+    llm_string = str(sorted([(k, v) for k, v in params.items()]))
+    get_llm_cache().update("foo", llm_string, [Generation(text="fizz")])
+    output = llm.generate(["foo"])
+    expected_output = LLMResult(
+        generations=[[Generation(text="fizz")]],
+        llm_output={},
+    )
+    assert output == expected_output
+    # clear the cache
+    get_llm_cache().clear(delay=0, noreply=False)
+
+    # After cache has been cleared, the result shouldn't be the same
+    output = llm.generate(["foo"])
+    assert output != expected_output