chore(openai): fix broken vcr cassette playback and add ci guard (#36502)

Fix broken VCR cassette playback in `langchain-openai` integration tests
and add a CI job to prevent regressions. Two independent bugs made all
VCR-backed tests fail: `before_record_request` redacts URIs to
`**REDACTED**` but `match_on` still included `uri` (so playback never
matched), and a typo-fix commit (`c9f51aef85`) changed test input
strings without re-recording cassettes (so `json_body` matching also
failed).
This commit is contained in:
Mason Daugherty
2026-04-03 12:55:52 -04:00
committed by GitHub
parent 8c15649127
commit deb85b6c4c
6 changed files with 111 additions and 7 deletions

View File

@@ -33,14 +33,19 @@ LANGCHAIN_DIRS = [
"libs/model-profiles", "libs/model-profiles",
] ]
# Packages with VCR cassette-backed integration tests.
# These get a playback-only CI check to catch stale cassettes.
VCR_PACKAGES = {
"libs/partners/openai",
}
# When set to True, we are ignoring core dependents # When set to True, we are ignoring core dependents
# in order to be able to get CI to pass for each individual # in order to be able to get CI to pass for each individual
# package that depends on core # package that depends on core
# e.g. if you touch core, we don't then add textsplitters/etc to CI # e.g. if you touch core, we don't then add textsplitters/etc to CI
IGNORE_CORE_DEPENDENTS = False IGNORE_CORE_DEPENDENTS = False
# ignored partners are removed from dependents # Ignored partners are removed from dependents but still run if directly edited
# but still run if directly edited
IGNORED_PARTNERS = [ IGNORED_PARTNERS = [
# remove huggingface from dependents because of CI instability # remove huggingface from dependents because of CI instability
# specifically in huggingface jobs # specifically in huggingface jobs
@@ -221,6 +226,14 @@ def _get_configs_for_multi_dirs(
dirs = list(dirs_to_run["extended-test"]) dirs = list(dirs_to_run["extended-test"])
elif job == "codspeed": elif job == "codspeed":
dirs = list(dirs_to_run["codspeed"]) dirs = list(dirs_to_run["codspeed"])
elif job == "vcr-tests":
# Only run VCR tests for packages that have cassettes and are affected
all_affected = set(
add_dependents(
dirs_to_run["test"] | dirs_to_run["extended-test"], dependents
)
)
dirs = [d for d in VCR_PACKAGES if d in all_affected]
else: else:
raise ValueError(f"Unknown job: {job}") raise ValueError(f"Unknown job: {job}")
@@ -335,6 +348,7 @@ if __name__ == "__main__":
"dependencies", "dependencies",
"test-pydantic", "test-pydantic",
"codspeed", "codspeed",
"vcr-tests",
] ]
} }

66
.github/workflows/_test_vcr.yml vendored Normal file
View File

@@ -0,0 +1,66 @@
# Runs VCR cassette-backed integration tests in playback-only mode.
#
# No API keys needed — catches stale cassettes caused by test input
# changes without re-recording.
#
# Called as part of check_diffs.yml workflow.
name: "📼 VCR Cassette Tests"
on:
workflow_call:
inputs:
working-directory:
required: true
type: string
description: "From which folder this pipeline executes"
python-version:
required: true
type: string
description: "Python version to use"
permissions:
contents: read
env:
UV_FROZEN: "true"
jobs:
build:
defaults:
run:
working-directory: ${{ inputs.working-directory }}
runs-on: ubuntu-latest
timeout-minutes: 20
name: "Python ${{ inputs.python-version }}"
steps:
- uses: actions/checkout@v6
- name: "🐍 Set up Python ${{ inputs.python-version }} + UV"
uses: "./.github/actions/uv_setup"
with:
python-version: ${{ inputs.python-version }}
cache-suffix: test-vcr-${{ inputs.working-directory }}
working-directory: ${{ inputs.working-directory }}
- name: "📦 Install Test Dependencies"
shell: bash
run: uv sync --group test
- name: "📼 Run VCR Cassette Tests (playback-only)"
shell: bash
env:
OPENAI_API_KEY: sk-fake
run: make test_vcr
- name: "🧹 Verify Clean Working Directory"
shell: bash
run: |
set -eu
STATUS="$(git status)"
echo "$STATUS"
# grep will exit non-zero if the target message isn't found,
# and `set -e` above will cause the step to fail.
echo "$STATUS" | grep 'nothing to commit, working tree clean'

View File

@@ -66,6 +66,7 @@ jobs:
compile-integration-tests: ${{ steps.set-matrix.outputs.compile-integration-tests }} compile-integration-tests: ${{ steps.set-matrix.outputs.compile-integration-tests }}
dependencies: ${{ steps.set-matrix.outputs.dependencies }} dependencies: ${{ steps.set-matrix.outputs.dependencies }}
test-pydantic: ${{ steps.set-matrix.outputs.test-pydantic }} test-pydantic: ${{ steps.set-matrix.outputs.test-pydantic }}
vcr-tests: ${{ steps.set-matrix.outputs.vcr-tests }}
# Run linting only on packages that have changed files # Run linting only on packages that have changed files
lint: lint:
needs: [build] needs: [build]
@@ -123,6 +124,21 @@ jobs:
python-version: ${{ matrix.job-configs.python-version }} python-version: ${{ matrix.job-configs.python-version }}
secrets: inherit secrets: inherit
# Run VCR cassette-backed integration tests in playback-only mode (no API keys)
vcr-tests:
name: "VCR Cassette Tests"
needs: [build]
if: ${{ needs.build.outputs.vcr-tests != '[]' }}
strategy:
matrix:
job-configs: ${{ fromJson(needs.build.outputs.vcr-tests) }}
fail-fast: false
uses: ./.github/workflows/_test_vcr.yml
with:
working-directory: ${{ matrix.job-configs.working-directory }}
python-version: ${{ matrix.job-configs.python-version }}
secrets: inherit
# Run extended test suites that require additional dependencies # Run extended test suites that require additional dependencies
extended-tests: extended-tests:
name: "Extended Tests" name: "Extended Tests"
@@ -178,6 +194,7 @@ jobs:
lint, lint,
test, test,
compile-integration-tests, compile-integration-tests,
vcr-tests,
extended-tests, extended-tests,
test-pydantic, test-pydantic,
] ]

View File

@@ -27,6 +27,11 @@ test tests:
integration_test integration_tests: integration_test integration_tests:
uv run --group test --group test_integration pytest -n auto $(TEST_FILE) uv run --group test --group test_integration pytest -n auto $(TEST_FILE)
# Run VCR cassette-backed integration tests in playback-only mode (no API keys needed).
# Catches stale cassettes caused by test input changes without re-recording.
test_vcr:
uv run --group test pytest --record-mode=none -m vcr --ignore=tests/integration_tests/chat_models/test_azure_standard.py tests/integration_tests/
test_watch: test_watch:
uv run --group test ptw --snapshot-update --now . -- -vv $(TEST_FILE) uv run --group test ptw --snapshot-update --now . -- -vv $(TEST_FILE)

View File

@@ -32,7 +32,9 @@ def vcr_config() -> dict:
"""Extend the default configuration coming from langchain_tests.""" """Extend the default configuration coming from langchain_tests."""
config = base_vcr_config() config = base_vcr_config()
config["match_on"] = [ config["match_on"] = [
m if m != "body" else "json_body" for m in config.get("match_on", []) m if m != "body" else "json_body"
for m in config.get("match_on", [])
if m != "uri"
] ]
config.setdefault("filter_headers", []).extend(_EXTRA_HEADERS) config.setdefault("filter_headers", []).extend(_EXTRA_HEADERS)
config["before_record_request"] = remove_request_headers config["before_record_request"] = remove_request_headers

View File

@@ -182,13 +182,13 @@ def test_function_calling(output_version: Literal["v0", "responses/v1", "v1"]) -
llm = ChatOpenAI(model=MODEL_NAME, output_version=output_version) llm = ChatOpenAI(model=MODEL_NAME, output_version=output_version)
bound_llm = llm.bind_tools([multiply, {"type": "web_search_preview"}]) bound_llm = llm.bind_tools([multiply, {"type": "web_search_preview"}])
ai_msg = cast(AIMessage, bound_llm.invoke("what's 5 * 4")) ai_msg = cast(AIMessage, bound_llm.invoke("whats 5 * 4"))
assert len(ai_msg.tool_calls) == 1 assert len(ai_msg.tool_calls) == 1
assert ai_msg.tool_calls[0]["name"] == "multiply" assert ai_msg.tool_calls[0]["name"] == "multiply"
assert set(ai_msg.tool_calls[0]["args"]) == {"x", "y"} assert set(ai_msg.tool_calls[0]["args"]) == {"x", "y"}
full: Any = None full: Any = None
for chunk in bound_llm.stream("what's 5 * 4"): for chunk in bound_llm.stream("whats 5 * 4"):
assert isinstance(chunk, AIMessageChunk) assert isinstance(chunk, AIMessageChunk)
full = chunk if full is None else full + chunk full = chunk if full is None else full + chunk
assert len(full.tool_calls) == 1 assert len(full.tool_calls) == 1
@@ -416,7 +416,7 @@ def test_function_calling_and_structured_output(schema: Any) -> None:
assert parsed == response.additional_kwargs["parsed"] assert parsed == response.additional_kwargs["parsed"]
# Test function calling # Test function calling
ai_msg = cast(AIMessage, bound_llm.invoke("what's 5 * 4")) ai_msg = cast(AIMessage, bound_llm.invoke("whats 5 * 4"))
assert len(ai_msg.tool_calls) == 1 assert len(ai_msg.tool_calls) == 1
assert ai_msg.tool_calls[0]["name"] == "multiply" assert ai_msg.tool_calls[0]["name"] == "multiply"
assert set(ai_msg.tool_calls[0]["args"]) == {"x", "y"} assert set(ai_msg.tool_calls[0]["args"]) == {"x", "y"}
@@ -555,7 +555,7 @@ def test_stream_reasoning_summary(
) )
message_1 = { message_1 = {
"role": "user", "role": "user",
"content": "What was the third tallest building in the year 2000?", "content": "What was the third tallest buliding in the year 2000?",
} }
response_1: BaseMessageChunk | None = None response_1: BaseMessageChunk | None = None
for chunk in llm.stream([message_1]): for chunk in llm.stream([message_1]):