From f9f11527f6b0dc17b804d7fdd1adb604b58bb571 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Mon, 8 Jun 2026 11:17:12 -0400 Subject: [PATCH] fix(standard-tests): serialize `BytesIO` bodies in VCR cassettes (#37963) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The custom VCR serializer pipes the cassette dict through `yaml.safe_dump`, which raises on stream objects — so any request with an `io.BytesIO` body (multipart/file-upload endpoints) couldn't be recorded. A new `_coerce_bytesio` helper walks the cassette and replaces each `BytesIO` with its raw bytes before dumping. --- .../langchain_tests/conftest.py | 30 +++++++++++++++++++ libs/standard-tests/uv.lock | 2 +- 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/libs/standard-tests/langchain_tests/conftest.py b/libs/standard-tests/langchain_tests/conftest.py index 96ff7bf741e..352e9f6227f 100644 --- a/libs/standard-tests/langchain_tests/conftest.py +++ b/libs/standard-tests/langchain_tests/conftest.py @@ -3,6 +3,7 @@ from __future__ import annotations import gzip +import io from pathlib import Path from typing import TYPE_CHECKING, Any, cast @@ -17,6 +18,32 @@ if TYPE_CHECKING: from os import PathLike +def _coerce_bytesio(value: Any) -> Any: + """Recursively replace `io.BytesIO` values with their raw bytes. + + `yaml.safe_dump` cannot serialize stream objects, so any `io.BytesIO` + (e.g. a multipart/file-upload request body) is converted to its + underlying bytes via `getvalue()`, which is non-destructive and + independent of the stream position. + + Args: + value: An arbitrary cassette value, possibly nested in dicts, + lists, or tuples. + + Returns: + The value with every `io.BytesIO` replaced by its bytes content. + """ + if isinstance(value, io.BytesIO): + return value.getvalue() + if isinstance(value, dict): + return {key: _coerce_bytesio(item) for key, item in value.items()} + if isinstance(value, list): + return [_coerce_bytesio(item) for item in value] + if isinstance(value, tuple): + return tuple(_coerce_bytesio(item) for item in value) + return value + + class CustomSerializer: """Custom serializer for VCR cassettes using YAML and gzip. @@ -40,6 +67,9 @@ class CustomSerializer: } for request in cassette_dict["requests"] ] + # Sweep the whole dict (not just request bodies) so a `BytesIO` + # hiding in an untransformed response body is also coerced. + cassette_dict = _coerce_bytesio(cassette_dict) yml = yaml.safe_dump(cassette_dict) return gzip.compress(yml.encode("utf-8")) diff --git a/libs/standard-tests/uv.lock b/libs/standard-tests/uv.lock index f9ab63e84c0..e907b8be506 100644 --- a/libs/standard-tests/uv.lock +++ b/libs/standard-tests/uv.lock @@ -324,7 +324,7 @@ wheels = [ [[package]] name = "langchain-core" -version = "1.4.0" +version = "1.4.1" source = { editable = "../core" } dependencies = [ { name = "jsonpatch" },