langchain[minor], community[minor], core[minor]: Async Cache support and AsyncRedisCache (#15817)

* This PR adds async methods to the LLM cache. * Adds an implementation using Redis called AsyncRedisCache. * Adds a docker compose file at the /docker to help spin up docker * Updates redis tests to use a context manager so flushing always happens by default
2025-09-21 18:39:57 +00:00 · 2024-02-08 04:06:09 +01:00
parent 19546081c6
commit f92738a6f6
8 changed files with 472 additions and 133 deletions
--- a/libs/langchain/langchain/cache.py
+++ b/libs/langchain/langchain/cache.py
@@ -1,6 +1,7 @@
 from langchain_community.cache import (
    AstraDBCache,
    AstraDBSemanticCache,
+    AsyncRedisCache,
    CassandraCache,
    CassandraSemanticCache,
    FullLLMCache,
@@ -22,6 +23,7 @@ __all__ = [
    "SQLAlchemyCache",
    "SQLiteCache",
    "UpstashRedisCache",
+    "AsyncRedisCache",
    "RedisCache",
    "RedisSemanticCache",
    "GPTCache",
--- a/libs/langchain/tests/integration_tests/cache/test_redis_cache.py
+++ b/libs/langchain/tests/integration_tests/cache/test_redis_cache.py
@@ -1,6 +1,7 @@
 """Test Redis cache functionality."""
 import uuid
-from typing import List, cast
+from contextlib import asynccontextmanager, contextmanager
+from typing import AsyncGenerator, Generator, List, Optional, cast

 import pytest
 from langchain_core.embeddings import Embeddings
@@ -8,7 +9,7 @@ from langchain_core.load.dump import dumps
 from langchain_core.messages import AIMessage, BaseMessage, HumanMessage
 from langchain_core.outputs import ChatGeneration, Generation, LLMResult

-from langchain.cache import RedisCache, RedisSemanticCache
+from langchain.cache import AsyncRedisCache, RedisCache, RedisSemanticCache
 from langchain.globals import get_llm_cache, set_llm_cache
 from tests.integration_tests.cache.fake_embeddings import (
    ConsistentFakeEmbeddings,
@@ -17,65 +18,176 @@ from tests.integration_tests.cache.fake_embeddings import (
 from tests.unit_tests.llms.fake_chat_model import FakeChatModel
 from tests.unit_tests.llms.fake_llm import FakeLLM

-REDIS_TEST_URL = "redis://localhost:6379"
+# Using a non-standard port to avoid conflicts with potentially local running
+# redis instances
+# You can spin up a local redis using docker compose
+# cd [repository-root]/docker
+# docker-compose up redis
+REDIS_TEST_URL = "redis://localhost:6020"


 def random_string() -> str:
    return str(uuid.uuid4())


+@contextmanager
+def get_sync_redis(*, ttl: Optional[int] = 1) -> Generator[RedisCache, None, None]:
+    """Get a sync RedisCache instance."""
+    import redis
+
+    cache = RedisCache(redis_=redis.Redis.from_url(REDIS_TEST_URL), ttl=ttl)
+    try:
+        yield cache
+    finally:
+        cache.clear()
+
+
+@asynccontextmanager
+async def get_async_redis(
+    *, ttl: Optional[int] = 1
+) -> AsyncGenerator[AsyncRedisCache, None]:
+    """Get an async RedisCache instance."""
+    from redis.asyncio import Redis
+
+    cache = AsyncRedisCache(redis_=Redis.from_url(REDIS_TEST_URL), ttl=ttl)
+    try:
+        yield cache
+    finally:
+        await cache.aclear()
+
+
 def test_redis_cache_ttl() -> None:
-    import redis
+    from redis import Redis

-    set_llm_cache(RedisCache(redis_=redis.Redis.from_url(REDIS_TEST_URL), ttl=1))
-    llm_cache = cast(RedisCache, get_llm_cache())
-    llm_cache.update("foo", "bar", [Generation(text="fizz")])
-    key = llm_cache._key("foo", "bar")
-    assert llm_cache.redis.pttl(key) > 0
+    with get_sync_redis() as llm_cache:
+        set_llm_cache(llm_cache)
+        llm_cache.update("foo", "bar", [Generation(text="fizz")])
+        key = llm_cache._key("foo", "bar")
+        assert isinstance(llm_cache.redis, Redis)
+        assert llm_cache.redis.pttl(key) > 0


-def test_redis_cache() -> None:
-    import redis
+async def test_async_redis_cache_ttl() -> None:
+    from redis.asyncio import Redis as AsyncRedis

-    set_llm_cache(RedisCache(redis_=redis.Redis.from_url(REDIS_TEST_URL)))
-    llm = FakeLLM()
-    params = llm.dict()
-    params["stop"] = None
-    llm_string = str(sorted([(k, v) for k, v in params.items()]))
-    get_llm_cache().update("foo", llm_string, [Generation(text="fizz")])
-    output = llm.generate(["foo"])
-    expected_output = LLMResult(
-        generations=[[Generation(text="fizz")]],
-        llm_output={},
-    )
-    assert output == expected_output
-    llm_cache = cast(RedisCache, get_llm_cache())
-    llm_cache.redis.flushall()
+    async with get_async_redis() as redis_cache:
+        set_llm_cache(redis_cache)
+        llm_cache = cast(RedisCache, get_llm_cache())
+        await llm_cache.aupdate("foo", "bar", [Generation(text="fizz")])
+        key = llm_cache._key("foo", "bar")
+        assert isinstance(llm_cache.redis, AsyncRedis)
+        assert await llm_cache.redis.pttl(key) > 0
+
+
+def test_sync_redis_cache() -> None:
+    with get_sync_redis() as llm_cache:
+        set_llm_cache(llm_cache)
+        llm = FakeLLM()
+        params = llm.dict()
+        params["stop"] = None
+        llm_string = str(sorted([(k, v) for k, v in params.items()]))
+        llm_cache.update("prompt", llm_string, [Generation(text="fizz0")])
+        output = llm.generate(["prompt"])
+        expected_output = LLMResult(
+            generations=[[Generation(text="fizz0")]],
+            llm_output={},
+        )
+        assert output == expected_output
+
+
+async def test_sync_in_async_redis_cache() -> None:
+    """Test the sync RedisCache invoked with async methods"""
+    with get_sync_redis() as llm_cache:
+        set_llm_cache(llm_cache)
+        llm = FakeLLM()
+        params = llm.dict()
+        params["stop"] = None
+        llm_string = str(sorted([(k, v) for k, v in params.items()]))
+        # llm_cache.update("meow", llm_string, [Generation(text="meow")])
+        await llm_cache.aupdate("prompt", llm_string, [Generation(text="fizz1")])
+        output = await llm.agenerate(["prompt"])
+        expected_output = LLMResult(
+            generations=[[Generation(text="fizz1")]],
+            llm_output={},
+        )
+        assert output == expected_output
+
+
+async def test_async_redis_cache() -> None:
+    async with get_async_redis() as redis_cache:
+        set_llm_cache(redis_cache)
+        llm = FakeLLM()
+        params = llm.dict()
+        params["stop"] = None
+        llm_string = str(sorted([(k, v) for k, v in params.items()]))
+        llm_cache = cast(RedisCache, get_llm_cache())
+        await llm_cache.aupdate("prompt", llm_string, [Generation(text="fizz2")])
+        output = await llm.agenerate(["prompt"])
+        expected_output = LLMResult(
+            generations=[[Generation(text="fizz2")]],
+            llm_output={},
+        )
+        assert output == expected_output
+
+
+async def test_async_in_sync_redis_cache() -> None:
+    async with get_async_redis() as redis_cache:
+        set_llm_cache(redis_cache)
+        llm = FakeLLM()
+        params = llm.dict()
+        params["stop"] = None
+        llm_string = str(sorted([(k, v) for k, v in params.items()]))
+        llm_cache = cast(RedisCache, get_llm_cache())
+        with pytest.raises(NotImplementedError):
+            llm_cache.update("foo", llm_string, [Generation(text="fizz")])


 def test_redis_cache_chat() -> None:
-    import redis
+    with get_sync_redis() as redis_cache:
+        set_llm_cache(redis_cache)
+        llm = FakeChatModel()
+        params = llm.dict()
+        params["stop"] = None
+        llm_string = str(sorted([(k, v) for k, v in params.items()]))
+        prompt: List[BaseMessage] = [HumanMessage(content="foo")]
+        llm_cache = cast(RedisCache, get_llm_cache())
+        llm_cache.update(
+            dumps(prompt),
+            llm_string,
+            [ChatGeneration(message=AIMessage(content="fizz"))],
+        )
+        output = llm.generate([prompt])
+        expected_output = LLMResult(
+            generations=[[ChatGeneration(message=AIMessage(content="fizz"))]],
+            llm_output={},
+        )
+        assert output == expected_output

-    set_llm_cache(RedisCache(redis_=redis.Redis.from_url(REDIS_TEST_URL)))
-    llm = FakeChatModel()
-    params = llm.dict()
-    params["stop"] = None
-    llm_string = str(sorted([(k, v) for k, v in params.items()]))
-    prompt: List[BaseMessage] = [HumanMessage(content="foo")]
-    get_llm_cache().update(
-        dumps(prompt), llm_string, [ChatGeneration(message=AIMessage(content="fizz"))]
-    )
-    output = llm.generate([prompt])
-    expected_output = LLMResult(
-        generations=[[ChatGeneration(message=AIMessage(content="fizz"))]],
-        llm_output={},
-    )
-    assert output == expected_output
-    llm_cache = cast(RedisCache, get_llm_cache())
-    llm_cache.redis.flushall()
+
+async def test_async_redis_cache_chat() -> None:
+    async with get_async_redis() as redis_cache:
+        set_llm_cache(redis_cache)
+        llm = FakeChatModel()
+        params = llm.dict()
+        params["stop"] = None
+        llm_string = str(sorted([(k, v) for k, v in params.items()]))
+        prompt: List[BaseMessage] = [HumanMessage(content="foo")]
+        llm_cache = cast(RedisCache, get_llm_cache())
+        await llm_cache.aupdate(
+            dumps(prompt),
+            llm_string,
+            [ChatGeneration(message=AIMessage(content="fizz"))],
+        )
+        output = await llm.agenerate([prompt])
+        expected_output = LLMResult(
+            generations=[[ChatGeneration(message=AIMessage(content="fizz"))]],
+            llm_output={},
+        )
+        assert output == expected_output


 def test_redis_semantic_cache() -> None:
+    """Test redis semantic cache functionality."""
    set_llm_cache(
        RedisSemanticCache(
            embedding=FakeEmbeddings(), redis_url=REDIS_TEST_URL, score_threshold=0.1
@@ -85,7 +197,8 @@ def test_redis_semantic_cache() -> None:
    params = llm.dict()
    params["stop"] = None
    llm_string = str(sorted([(k, v) for k, v in params.items()]))
-    get_llm_cache().update("foo", llm_string, [Generation(text="fizz")])
+    llm_cache = cast(RedisSemanticCache, get_llm_cache())
+    llm_cache.update("foo", llm_string, [Generation(text="fizz")])
    output = llm.generate(
        ["bar"]
    )  # foo and bar will have the same embedding produced by FakeEmbeddings
@@ -95,13 +208,13 @@ def test_redis_semantic_cache() -> None:
    )
    assert output == expected_output
    # clear the cache
-    get_llm_cache().clear(llm_string=llm_string)
+    llm_cache.clear(llm_string=llm_string)
    output = llm.generate(
        ["bar"]
    )  # foo and bar will have the same embedding produced by FakeEmbeddings
    # expect different output now without cached result
    assert output != expected_output
-    get_llm_cache().clear(llm_string=llm_string)
+    llm_cache.clear(llm_string=llm_string)


 def test_redis_semantic_cache_multi() -> None:
@@ -114,7 +227,8 @@ def test_redis_semantic_cache_multi() -> None:
    params = llm.dict()
    params["stop"] = None
    llm_string = str(sorted([(k, v) for k, v in params.items()]))
-    get_llm_cache().update(
+    llm_cache = cast(RedisSemanticCache, get_llm_cache())
+    llm_cache.update(
        "foo", llm_string, [Generation(text="fizz"), Generation(text="Buzz")]
    )
    output = llm.generate(
@@ -126,7 +240,7 @@ def test_redis_semantic_cache_multi() -> None:
    )
    assert output == expected_output
    # clear the cache
-    get_llm_cache().clear(llm_string=llm_string)
+    llm_cache.clear(llm_string=llm_string)


 def test_redis_semantic_cache_chat() -> None:
@@ -140,7 +254,8 @@ def test_redis_semantic_cache_chat() -> None:
    params["stop"] = None
    llm_string = str(sorted([(k, v) for k, v in params.items()]))
    prompt: List[BaseMessage] = [HumanMessage(content="foo")]
-    get_llm_cache().update(
+    llm_cache = cast(RedisSemanticCache, get_llm_cache())
+    llm_cache.update(
        dumps(prompt), llm_string, [ChatGeneration(message=AIMessage(content="fizz"))]
    )
    output = llm.generate([prompt])
@@ -149,7 +264,7 @@ def test_redis_semantic_cache_chat() -> None:
        llm_output={},
    )
    assert output == expected_output
-    get_llm_cache().clear(llm_string=llm_string)
+    llm_cache.clear(llm_string=llm_string)


@pytest.mark.parametrize("embedding", [ConsistentFakeEmbeddings()])
@@ -192,10 +307,11 @@ def test_redis_semantic_cache_hit(
        ]
        for prompt_i_generations in generations
    ]
+    llm_cache = cast(RedisSemanticCache, get_llm_cache())
    for prompt_i, llm_generations_i in zip(prompts, llm_generations):
        print(prompt_i)
        print(llm_generations_i)
-        get_llm_cache().update(prompt_i, llm_string, llm_generations_i)
+        llm_cache.update(prompt_i, llm_string, llm_generations_i)
    llm.generate(prompts)
    assert llm.generate(prompts) == LLMResult(
        generations=llm_generations, llm_output={}
--- a/libs/langchain/tests/unit_tests/test_cache.py
+++ b/libs/langchain/tests/unit_tests/test_cache.py
@@ -1,4 +1,5 @@
 """Test caching for LLMs and ChatModels."""
+import sqlite3
 from typing import Dict, Generator, List, Union

 import pytest
@@ -21,7 +22,11 @@ from langchain.globals import get_llm_cache, set_llm_cache


 def get_sqlite_cache() -> SQLAlchemyCache:
-    return SQLAlchemyCache(engine=create_engine("sqlite://"))
+    return SQLAlchemyCache(
+        engine=create_engine(
+            "sqlite://", creator=lambda: sqlite3.connect("file::memory:?cache=shared")
+        )
+    )


 CACHE_OPTIONS = [
@@ -35,33 +40,41 @@ def set_cache_and_teardown(request: FixtureRequest) -> Generator[None, None, Non
    # Will be run before each test
    cache_instance = request.param
    set_llm_cache(cache_instance())
-    if get_llm_cache():
-        get_llm_cache().clear()
+    if llm_cache := get_llm_cache():
+        llm_cache.clear()
    else:
        raise ValueError("Cache not set. This should never happen.")

    yield

    # Will be run after each test
-    if get_llm_cache():
-        get_llm_cache().clear()
+    if llm_cache:
+        llm_cache.clear()
        set_llm_cache(None)
    else:
        raise ValueError("Cache not set. This should never happen.")


-def test_llm_caching() -> None:
+async def test_llm_caching() -> None:
    prompt = "How are you?"
    response = "Test response"
    cached_response = "Cached test response"
    llm = FakeListLLM(responses=[response])
-    if get_llm_cache():
-        get_llm_cache().update(
+    if llm_cache := get_llm_cache():
+        # sync test
+        llm_cache.update(
            prompt=prompt,
            llm_string=create_llm_string(llm),
            return_val=[Generation(text=cached_response)],
        )
        assert llm(prompt) == cached_response
+        # async test
+        await llm_cache.aupdate(
+            prompt=prompt,
+            llm_string=create_llm_string(llm),
+            return_val=[Generation(text=cached_response)],
+        )
+        assert await llm.ainvoke(prompt) == cached_response
    else:
        raise ValueError(
            "The cache not set. This should never happen, as the pytest fixture "
@@ -90,14 +103,15 @@ def test_old_sqlite_llm_caching() -> None:
        assert llm(prompt) == cached_response


-def test_chat_model_caching() -> None:
+async def test_chat_model_caching() -> None:
    prompt: List[BaseMessage] = [HumanMessage(content="How are you?")]
    response = "Test response"
    cached_response = "Cached test response"
    cached_message = AIMessage(content=cached_response)
    llm = FakeListChatModel(responses=[response])
-    if get_llm_cache():
-        get_llm_cache().update(
+    if llm_cache := get_llm_cache():
+        # sync test
+        llm_cache.update(
            prompt=dumps(prompt),
            llm_string=llm._get_llm_string(),
            return_val=[ChatGeneration(message=cached_message)],
@@ -105,6 +119,16 @@ def test_chat_model_caching() -> None:
        result = llm(prompt)
        assert isinstance(result, AIMessage)
        assert result.content == cached_response
+
+        # async test
+        await llm_cache.aupdate(
+            prompt=dumps(prompt),
+            llm_string=llm._get_llm_string(),
+            return_val=[ChatGeneration(message=cached_message)],
+        )
+        result = await llm.ainvoke(prompt)
+        assert isinstance(result, AIMessage)
+        assert result.content == cached_response
    else:
        raise ValueError(
            "The cache not set. This should never happen, as the pytest fixture "
@@ -112,25 +136,38 @@ def test_chat_model_caching() -> None:
        )


-def test_chat_model_caching_params() -> None:
+async def test_chat_model_caching_params() -> None:
    prompt: List[BaseMessage] = [HumanMessage(content="How are you?")]
    response = "Test response"
    cached_response = "Cached test response"
    cached_message = AIMessage(content=cached_response)
    llm = FakeListChatModel(responses=[response])
-    if get_llm_cache():
-        get_llm_cache().update(
+    if llm_cache := get_llm_cache():
+        # sync test
+        llm_cache.update(
            prompt=dumps(prompt),
            llm_string=llm._get_llm_string(functions=[]),
            return_val=[ChatGeneration(message=cached_message)],
        )
        result = llm(prompt, functions=[])
+        result_no_params = llm(prompt)
        assert isinstance(result, AIMessage)
        assert result.content == cached_response
-        result_no_params = llm(prompt)
        assert isinstance(result_no_params, AIMessage)
        assert result_no_params.content == response

+        # async test
+        await llm_cache.aupdate(
+            prompt=dumps(prompt),
+            llm_string=llm._get_llm_string(functions=[]),
+            return_val=[ChatGeneration(message=cached_message)],
+        )
+        result = await llm.ainvoke(prompt, functions=[])
+        result_no_params = await llm.ainvoke(prompt)
+        assert isinstance(result, AIMessage)
+        assert result.content == cached_response
+        assert isinstance(result_no_params, AIMessage)
+        assert result_no_params.content == response
    else:
        raise ValueError(
            "The cache not set. This should never happen, as the pytest fixture "
@@ -138,19 +175,31 @@ def test_chat_model_caching_params() -> None:
        )


-def test_llm_cache_clear() -> None:
+async def test_llm_cache_clear() -> None:
    prompt = "How are you?"
-    response = "Test response"
+    expected_response = "Test response"
    cached_response = "Cached test response"
-    llm = FakeListLLM(responses=[response])
-    if get_llm_cache():
-        get_llm_cache().update(
+    llm = FakeListLLM(responses=[expected_response])
+    if llm_cache := get_llm_cache():
+        # sync test
+        llm_cache.update(
            prompt=prompt,
            llm_string=create_llm_string(llm),
            return_val=[Generation(text=cached_response)],
        )
-        get_llm_cache().clear()
-        assert llm(prompt) == response
+        llm_cache.clear()
+        response = llm(prompt)
+        assert response == expected_response
+
+        # async test
+        await llm_cache.aupdate(
+            prompt=prompt,
+            llm_string=create_llm_string(llm),
+            return_val=[Generation(text=cached_response)],
+        )
+        await llm_cache.aclear()
+        response = await llm.ainvoke(prompt)
+        assert response == expected_response
    else:
        raise ValueError(
            "The cache not set. This should never happen, as the pytest fixture "