mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-06 13:33:37 +00:00
standard-tests: migrate to pytest-recording (#31425)
Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com>
This commit is contained in:
@@ -1,25 +1,85 @@
|
||||
import base64
|
||||
import gzip
|
||||
from os import PathLike
|
||||
from pathlib import Path
|
||||
from typing import Union
|
||||
|
||||
import pytest
|
||||
from vcr import VCR # type: ignore[import-untyped]
|
||||
from vcr.serializers import yamlserializer # type: ignore[import-untyped]
|
||||
import yaml
|
||||
from vcr import VCR
|
||||
from vcr.persisters.filesystem import CassetteNotFoundError
|
||||
from vcr.request import Request
|
||||
|
||||
|
||||
class YamlGzipSerializer:
|
||||
@staticmethod
|
||||
def serialize(cassette_dict: dict) -> str:
|
||||
raw = yamlserializer.serialize(cassette_dict).encode("utf-8")
|
||||
compressed = gzip.compress(raw)
|
||||
return base64.b64encode(compressed).decode("ascii")
|
||||
class CustomSerializer:
|
||||
"""Custom serializer for VCR cassettes using YAML and gzip.
|
||||
|
||||
We're using a custom serializer to avoid the default yaml serializer
|
||||
used by VCR, which is not designed to be safe for untrusted input.
|
||||
|
||||
This step is an extra precaution necessary because the cassette files
|
||||
are in compressed YAML format, which makes it more difficult to inspect
|
||||
their contents during development or debugging.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def deserialize(data: str) -> dict:
|
||||
compressed = base64.b64decode(data.encode("ascii"))
|
||||
text = gzip.decompress(compressed).decode("utf-8")
|
||||
return yamlserializer.deserialize(text)
|
||||
def serialize(cassette_dict: dict) -> bytes:
|
||||
"""Convert cassette to YAML and compress it."""
|
||||
cassette_dict["requests"] = [
|
||||
request._to_dict() for request in cassette_dict["requests"]
|
||||
]
|
||||
yml = yaml.safe_dump(cassette_dict)
|
||||
return gzip.compress(yml.encode("utf-8"))
|
||||
|
||||
@staticmethod
|
||||
def deserialize(data: bytes) -> dict:
|
||||
"""Decompress data and convert it from YAML."""
|
||||
text = gzip.decompress(data).decode("utf-8")
|
||||
cassette = yaml.safe_load(text)
|
||||
cassette["requests"] = [
|
||||
Request._from_dict(request) for request in cassette["requests"]
|
||||
]
|
||||
return cassette
|
||||
|
||||
|
||||
class CustomPersister:
|
||||
"""A custom persister for VCR that uses the CustomSerializer."""
|
||||
|
||||
@classmethod
|
||||
def load_cassette(
|
||||
cls, cassette_path: Union[str, PathLike[str]], serializer: CustomSerializer
|
||||
) -> tuple[dict, dict]:
|
||||
"""Load a cassette from a file."""
|
||||
# If cassette path is already Path this is a no-op
|
||||
cassette_path = Path(cassette_path)
|
||||
if not cassette_path.is_file():
|
||||
raise CassetteNotFoundError(
|
||||
f"Cassette file {cassette_path} does not exist."
|
||||
)
|
||||
with cassette_path.open(mode="rb") as f:
|
||||
data = f.read()
|
||||
deser = serializer.deserialize(data)
|
||||
return deser["requests"], deser["responses"]
|
||||
|
||||
@staticmethod
|
||||
def save_cassette(
|
||||
cassette_path: Union[str, PathLike[str]],
|
||||
cassette_dict: dict,
|
||||
serializer: CustomSerializer,
|
||||
) -> None:
|
||||
"""Save a cassette to a file."""
|
||||
data = serializer.serialize(cassette_dict)
|
||||
# if cassette path is already Path this is no operation
|
||||
cassette_path = Path(cassette_path)
|
||||
cassette_folder = cassette_path.parent
|
||||
if not cassette_folder.exists():
|
||||
cassette_folder.mkdir(parents=True)
|
||||
with cassette_path.open("wb") as f:
|
||||
f.write(data)
|
||||
|
||||
|
||||
# A list of headers that should be filtered out of the cassettes.
|
||||
# These are typically associated with sensitive information and should
|
||||
# not be stored in cassettes.
|
||||
_BASE_FILTER_HEADERS = [
|
||||
("authorization", "PLACEHOLDER"),
|
||||
("x-api-key", "PLACEHOLDER"),
|
||||
@@ -29,14 +89,15 @@ _BASE_FILTER_HEADERS = [
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def _base_vcr_config() -> dict:
|
||||
"""
|
||||
Configuration that every cassette will receive.
|
||||
"""Configuration that every cassette will receive.
|
||||
|
||||
(Anything permitted by vcr.VCR(**kwargs) can be put here.)
|
||||
"""
|
||||
return {
|
||||
"record_mode": "once",
|
||||
"filter_headers": _BASE_FILTER_HEADERS.copy(),
|
||||
"match_on": ["method", "scheme", "host", "port", "path", "query"],
|
||||
"match_on": ["method", "uri", "body"],
|
||||
"allow_playback_repeats": True,
|
||||
"decode_compressed_response": True,
|
||||
"cassette_library_dir": "tests/cassettes",
|
||||
"path_transformer": VCR.ensure_suffix(".yaml"),
|
||||
|
@@ -6,7 +6,6 @@ from unittest.mock import MagicMock
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
import vcr # type: ignore[import-untyped]
|
||||
from langchain_core._api import warn_deprecated
|
||||
from langchain_core.callbacks import BaseCallbackHandler
|
||||
from langchain_core.language_models import BaseChatModel, GenericFakeChatModel
|
||||
@@ -31,6 +30,7 @@ from pydantic.v1 import BaseModel as BaseModelV1
|
||||
from pydantic.v1 import Field as FieldV1
|
||||
from pytest_benchmark.fixture import BenchmarkFixture # type: ignore[import-untyped]
|
||||
from typing_extensions import Annotated, TypedDict
|
||||
from vcr.cassette import Cassette
|
||||
|
||||
from langchain_tests.unit_tests.chat_models import (
|
||||
ChatModelTests,
|
||||
@@ -592,7 +592,7 @@ class ChatModelIntegrationTests(ChatModelTests):
|
||||
:caption: tests/conftest.py
|
||||
|
||||
import pytest
|
||||
from langchain_tests.conftest import YamlGzipSerializer
|
||||
from langchain_tests.conftest import CustomPersister, CustomSerializer
|
||||
from langchain_tests.conftest import _base_vcr_config as _base_vcr_config
|
||||
from vcr import VCR
|
||||
|
||||
@@ -621,24 +621,26 @@ class ChatModelIntegrationTests(ChatModelTests):
|
||||
return config
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def vcr(vcr_config: dict) -> VCR:
|
||||
\"\"\"Override the default vcr fixture to include custom serializers\"\"\"
|
||||
my_vcr = VCR(**vcr_config)
|
||||
my_vcr.register_serializer("yaml.gz", YamlGzipSerializer)
|
||||
return my_vcr
|
||||
def pytest_recording_configure(config: dict, vcr: VCR) -> None:
|
||||
vcr.register_persister(CustomPersister())
|
||||
vcr.register_serializer("yaml.gz", CustomSerializer())
|
||||
|
||||
|
||||
You can inspect the contents of the compressed cassettes (e.g., to
|
||||
ensure no sensitive information is recorded) using the serializer:
|
||||
ensure no sensitive information is recorded) using
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
gunzip -k /path/to/tests/cassettes/TestClass_test.yaml.gz
|
||||
|
||||
or by using the serializer:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_tests.conftest import YamlGzipSerializer
|
||||
from langchain_tests.conftest import CustomPersister, CustomSerializer
|
||||
|
||||
with open("/path/to/tests/cassettes/TestClass_test.yaml.gz", "r") as f:
|
||||
data = f.read()
|
||||
|
||||
YamlGzipSerializer.deserialize(data)
|
||||
cassette_path = "/path/to/tests/cassettes/TestClass_test.yaml.gz"
|
||||
requests, responses = CustomPersister().load_cassette(path, CustomSerializer())
|
||||
|
||||
3. Run tests to generate VCR cassettes.
|
||||
|
||||
@@ -2826,8 +2828,9 @@ class ChatModelIntegrationTests(ChatModelTests):
|
||||
assert isinstance(response, AIMessage)
|
||||
|
||||
@pytest.mark.benchmark
|
||||
@pytest.mark.vcr
|
||||
def test_stream_time(
|
||||
self, model: BaseChatModel, benchmark: BenchmarkFixture, vcr: vcr.VCR
|
||||
self, model: BaseChatModel, benchmark: BenchmarkFixture, vcr: Cassette
|
||||
) -> None:
|
||||
"""Test that streaming does not introduce undue overhead.
|
||||
|
||||
@@ -2857,12 +2860,13 @@ class ChatModelIntegrationTests(ChatModelTests):
|
||||
pytest.skip("VCR not set up.")
|
||||
|
||||
def _run() -> None:
|
||||
cassette_name = f"{self.__class__.__name__}_test_stream_time"
|
||||
with vcr.use_cassette(cassette_name, record_mode="once"):
|
||||
for _ in model.stream("Write a story about a cat."):
|
||||
pass
|
||||
for _ in model.stream("Write a story about a cat."):
|
||||
pass
|
||||
|
||||
benchmark(_run)
|
||||
if not vcr.responses:
|
||||
_run()
|
||||
else:
|
||||
benchmark(_run)
|
||||
|
||||
def invoke_with_audio_input(self, *, stream: bool = False) -> AIMessage:
|
||||
""":private:"""
|
||||
|
@@ -693,7 +693,7 @@ class ChatModelUnitTests(ChatModelTests):
|
||||
:caption: tests/conftest.py
|
||||
|
||||
import pytest
|
||||
from langchain_tests.conftest import YamlGzipSerializer
|
||||
from langchain_tests.conftest import CustomPersister, CustomSerializer
|
||||
from langchain_tests.conftest import _base_vcr_config as _base_vcr_config
|
||||
from vcr import VCR
|
||||
|
||||
@@ -722,24 +722,26 @@ class ChatModelUnitTests(ChatModelTests):
|
||||
return config
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def vcr(vcr_config: dict) -> VCR:
|
||||
\"\"\"Override the default vcr fixture to include custom serializers\"\"\"
|
||||
my_vcr = VCR(**vcr_config)
|
||||
my_vcr.register_serializer("yaml.gz", YamlGzipSerializer)
|
||||
return my_vcr
|
||||
def pytest_recording_configure(config: dict, vcr: VCR) -> None:
|
||||
vcr.register_persister(CustomPersister())
|
||||
vcr.register_serializer("yaml.gz", CustomSerializer())
|
||||
|
||||
|
||||
You can inspect the contents of the compressed cassettes (e.g., to
|
||||
ensure no sensitive information is recorded) using the serializer:
|
||||
ensure no sensitive information is recorded) using
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
gunzip -k /path/to/tests/cassettes/TestClass_test.yaml.gz
|
||||
|
||||
or by using the serializer:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_tests.conftest import YamlGzipSerializer
|
||||
from langchain_tests.conftest import CustomPersister, CustomSerializer
|
||||
|
||||
with open("/path/to/tests/cassettes/TestClass_test.yaml.gz", "r") as f:
|
||||
data = f.read()
|
||||
|
||||
YamlGzipSerializer.deserialize(data)
|
||||
cassette_path = "/path/to/tests/cassettes/TestClass_test.yaml.gz"
|
||||
requests, responses = CustomPersister().load_cassette(path, CustomSerializer())
|
||||
|
||||
3. Run tests to generate VCR cassettes.
|
||||
|
||||
|
@@ -15,7 +15,7 @@ dependencies = [
|
||||
"pytest-socket<1,>=0.6.0",
|
||||
"pytest-benchmark",
|
||||
"pytest-codspeed",
|
||||
"pytest-vcr",
|
||||
"pytest-recording",
|
||||
"vcrpy>=7.0",
|
||||
"numpy>=1.26.2; python_version<'3.13'",
|
||||
"numpy>=2.1.0; python_version>='3.13'",
|
||||
@@ -42,6 +42,15 @@ langchain-core = { path = "../core", editable = true }
|
||||
[tool.mypy]
|
||||
disallow_untyped_defs = "True"
|
||||
|
||||
[[tool.mypy.overrides]]
|
||||
module = "yaml"
|
||||
ignore_missing_imports = true
|
||||
|
||||
[[tool.mypy.overrides]]
|
||||
module = "vcr.*"
|
||||
ignore_missing_imports = true
|
||||
|
||||
|
||||
[tool.ruff]
|
||||
target-version = "py39"
|
||||
|
||||
|
32
libs/standard-tests/uv.lock
generated
32
libs/standard-tests/uv.lock
generated
@@ -304,7 +304,7 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "langchain-core"
|
||||
version = "0.3.60"
|
||||
version = "0.3.63"
|
||||
source = { editable = "../core" }
|
||||
dependencies = [
|
||||
{ name = "jsonpatch" },
|
||||
@@ -373,8 +373,8 @@ dependencies = [
|
||||
{ name = "pytest-asyncio" },
|
||||
{ name = "pytest-benchmark" },
|
||||
{ name = "pytest-codspeed" },
|
||||
{ name = "pytest-recording" },
|
||||
{ name = "pytest-socket" },
|
||||
{ name = "pytest-vcr" },
|
||||
{ name = "syrupy" },
|
||||
{ name = "vcrpy" },
|
||||
]
|
||||
@@ -404,8 +404,8 @@ requires-dist = [
|
||||
{ name = "pytest-asyncio", specifier = ">=0.20,<1" },
|
||||
{ name = "pytest-benchmark" },
|
||||
{ name = "pytest-codspeed" },
|
||||
{ name = "pytest-recording" },
|
||||
{ name = "pytest-socket", specifier = ">=0.6.0,<1" },
|
||||
{ name = "pytest-vcr" },
|
||||
{ name = "syrupy", specifier = ">=4,<5" },
|
||||
{ name = "vcrpy", specifier = ">=7.0" },
|
||||
]
|
||||
@@ -1153,6 +1153,19 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/f1/9b/952c70bd1fae9baa58077272e7f191f377c86d812263c21b361195e125e6/pytest_codspeed-3.2.0-py3-none-any.whl", hash = "sha256:54b5c2e986d6a28e7b0af11d610ea57bd5531cec8326abe486f1b55b09d91c39", size = 15007 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pytest-recording"
|
||||
version = "0.13.4"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "pytest" },
|
||||
{ name = "vcrpy" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/32/9c/f4027c5f1693847b06d11caf4b4f6bb09f22c1581ada4663877ec166b8c6/pytest_recording-0.13.4.tar.gz", hash = "sha256:568d64b2a85992eec4ae0a419c855d5fd96782c5fb016784d86f18053792768c", size = 26576 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/42/c2/ce34735972cc42d912173e79f200fe66530225190c06655c5632a9d88f1e/pytest_recording-0.13.4-py3-none-any.whl", hash = "sha256:ad49a434b51b1c4f78e85b1e6b74fdcc2a0a581ca16e52c798c6ace971f7f439", size = 13723 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pytest-socket"
|
||||
version = "0.7.0"
|
||||
@@ -1165,19 +1178,6 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/19/58/5d14cb5cb59409e491ebe816c47bf81423cd03098ea92281336320ae5681/pytest_socket-0.7.0-py3-none-any.whl", hash = "sha256:7e0f4642177d55d317bbd58fc68c6bd9048d6eadb2d46a89307fa9221336ce45", size = 6754 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pytest-vcr"
|
||||
version = "1.0.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "pytest" },
|
||||
{ name = "vcrpy" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/1a/60/104c619483c1a42775d3f8b27293f1ecfc0728014874d065e68cb9702d49/pytest-vcr-1.0.2.tar.gz", hash = "sha256:23ee51b75abbcc43d926272773aae4f39f93aceb75ed56852d0bf618f92e1896", size = 3810 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/9d/d3/ff520d11e6ee400602711d1ece8168dcfc5b6d8146fb7db4244a6ad6a9c3/pytest_vcr-1.0.2-py2.py3-none-any.whl", hash = "sha256:2f316e0539399bea0296e8b8401145c62b6f85e9066af7e57b6151481b0d6d9c", size = 4137 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyyaml"
|
||||
version = "6.0.2"
|
||||
|
Reference in New Issue
Block a user