From deb85b6c4ccba4a41f1b71a500880ace13f0b5a5 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Fri, 3 Apr 2026 12:55:52 -0400 Subject: [PATCH] chore(openai): fix broken vcr cassette playback and add ci guard (#36502) Fix broken VCR cassette playback in `langchain-openai` integration tests and add a CI job to prevent regressions. Two independent bugs made all VCR-backed tests fail: `before_record_request` redacts URIs to `**REDACTED**` but `match_on` still included `uri` (so playback never matched), and a typo-fix commit (`c9f51aef85`) changed test input strings without re-recording cassettes (so `json_body` matching also failed). --- .github/scripts/check_diff.py | 18 ++++- .github/workflows/_test_vcr.yml | 66 +++++++++++++++++++ .github/workflows/check_diffs.yml | 17 +++++ libs/partners/openai/Makefile | 5 ++ libs/partners/openai/tests/conftest.py | 4 +- .../chat_models/test_responses_api.py | 8 +-- 6 files changed, 111 insertions(+), 7 deletions(-) create mode 100644 .github/workflows/_test_vcr.yml diff --git a/.github/scripts/check_diff.py b/.github/scripts/check_diff.py index 78a2b8fe866..65cd39e3b5b 100644 --- a/.github/scripts/check_diff.py +++ b/.github/scripts/check_diff.py @@ -33,14 +33,19 @@ LANGCHAIN_DIRS = [ "libs/model-profiles", ] +# Packages with VCR cassette-backed integration tests. +# These get a playback-only CI check to catch stale cassettes. +VCR_PACKAGES = { + "libs/partners/openai", +} + # When set to True, we are ignoring core dependents # in order to be able to get CI to pass for each individual # package that depends on core # e.g. if you touch core, we don't then add textsplitters/etc to CI IGNORE_CORE_DEPENDENTS = False -# ignored partners are removed from dependents -# but still run if directly edited +# Ignored partners are removed from dependents but still run if directly edited IGNORED_PARTNERS = [ # remove huggingface from dependents because of CI instability # specifically in huggingface jobs @@ -221,6 +226,14 @@ def _get_configs_for_multi_dirs( dirs = list(dirs_to_run["extended-test"]) elif job == "codspeed": dirs = list(dirs_to_run["codspeed"]) + elif job == "vcr-tests": + # Only run VCR tests for packages that have cassettes and are affected + all_affected = set( + add_dependents( + dirs_to_run["test"] | dirs_to_run["extended-test"], dependents + ) + ) + dirs = [d for d in VCR_PACKAGES if d in all_affected] else: raise ValueError(f"Unknown job: {job}") @@ -335,6 +348,7 @@ if __name__ == "__main__": "dependencies", "test-pydantic", "codspeed", + "vcr-tests", ] } diff --git a/.github/workflows/_test_vcr.yml b/.github/workflows/_test_vcr.yml new file mode 100644 index 00000000000..45520b6b7d7 --- /dev/null +++ b/.github/workflows/_test_vcr.yml @@ -0,0 +1,66 @@ +# Runs VCR cassette-backed integration tests in playback-only mode. +# +# No API keys needed โ€” catches stale cassettes caused by test input +# changes without re-recording. +# +# Called as part of check_diffs.yml workflow. + +name: "๐Ÿ“ผ VCR Cassette Tests" + +on: + workflow_call: + inputs: + working-directory: + required: true + type: string + description: "From which folder this pipeline executes" + python-version: + required: true + type: string + description: "Python version to use" + +permissions: + contents: read + +env: + UV_FROZEN: "true" + +jobs: + build: + defaults: + run: + working-directory: ${{ inputs.working-directory }} + runs-on: ubuntu-latest + timeout-minutes: 20 + name: "Python ${{ inputs.python-version }}" + steps: + - uses: actions/checkout@v6 + + - name: "๐Ÿ Set up Python ${{ inputs.python-version }} + UV" + uses: "./.github/actions/uv_setup" + with: + python-version: ${{ inputs.python-version }} + cache-suffix: test-vcr-${{ inputs.working-directory }} + working-directory: ${{ inputs.working-directory }} + + - name: "๐Ÿ“ฆ Install Test Dependencies" + shell: bash + run: uv sync --group test + + - name: "๐Ÿ“ผ Run VCR Cassette Tests (playback-only)" + shell: bash + env: + OPENAI_API_KEY: sk-fake + run: make test_vcr + + - name: "๐Ÿงน Verify Clean Working Directory" + shell: bash + run: | + set -eu + + STATUS="$(git status)" + echo "$STATUS" + + # grep will exit non-zero if the target message isn't found, + # and `set -e` above will cause the step to fail. + echo "$STATUS" | grep 'nothing to commit, working tree clean' diff --git a/.github/workflows/check_diffs.yml b/.github/workflows/check_diffs.yml index a53aa14f2ec..69a062c8fa3 100644 --- a/.github/workflows/check_diffs.yml +++ b/.github/workflows/check_diffs.yml @@ -66,6 +66,7 @@ jobs: compile-integration-tests: ${{ steps.set-matrix.outputs.compile-integration-tests }} dependencies: ${{ steps.set-matrix.outputs.dependencies }} test-pydantic: ${{ steps.set-matrix.outputs.test-pydantic }} + vcr-tests: ${{ steps.set-matrix.outputs.vcr-tests }} # Run linting only on packages that have changed files lint: needs: [build] @@ -123,6 +124,21 @@ jobs: python-version: ${{ matrix.job-configs.python-version }} secrets: inherit + # Run VCR cassette-backed integration tests in playback-only mode (no API keys) + vcr-tests: + name: "VCR Cassette Tests" + needs: [build] + if: ${{ needs.build.outputs.vcr-tests != '[]' }} + strategy: + matrix: + job-configs: ${{ fromJson(needs.build.outputs.vcr-tests) }} + fail-fast: false + uses: ./.github/workflows/_test_vcr.yml + with: + working-directory: ${{ matrix.job-configs.working-directory }} + python-version: ${{ matrix.job-configs.python-version }} + secrets: inherit + # Run extended test suites that require additional dependencies extended-tests: name: "Extended Tests" @@ -178,6 +194,7 @@ jobs: lint, test, compile-integration-tests, + vcr-tests, extended-tests, test-pydantic, ] diff --git a/libs/partners/openai/Makefile b/libs/partners/openai/Makefile index 8f8df6b5dd8..2bf160b45ed 100644 --- a/libs/partners/openai/Makefile +++ b/libs/partners/openai/Makefile @@ -27,6 +27,11 @@ test tests: integration_test integration_tests: uv run --group test --group test_integration pytest -n auto $(TEST_FILE) +# Run VCR cassette-backed integration tests in playback-only mode (no API keys needed). +# Catches stale cassettes caused by test input changes without re-recording. +test_vcr: + uv run --group test pytest --record-mode=none -m vcr --ignore=tests/integration_tests/chat_models/test_azure_standard.py tests/integration_tests/ + test_watch: uv run --group test ptw --snapshot-update --now . -- -vv $(TEST_FILE) diff --git a/libs/partners/openai/tests/conftest.py b/libs/partners/openai/tests/conftest.py index 748df9beea0..e8159df1384 100644 --- a/libs/partners/openai/tests/conftest.py +++ b/libs/partners/openai/tests/conftest.py @@ -32,7 +32,9 @@ def vcr_config() -> dict: """Extend the default configuration coming from langchain_tests.""" config = base_vcr_config() config["match_on"] = [ - m if m != "body" else "json_body" for m in config.get("match_on", []) + m if m != "body" else "json_body" + for m in config.get("match_on", []) + if m != "uri" ] config.setdefault("filter_headers", []).extend(_EXTRA_HEADERS) config["before_record_request"] = remove_request_headers diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py index 153b73965be..219a3a748e8 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py @@ -182,13 +182,13 @@ def test_function_calling(output_version: Literal["v0", "responses/v1", "v1"]) - llm = ChatOpenAI(model=MODEL_NAME, output_version=output_version) bound_llm = llm.bind_tools([multiply, {"type": "web_search_preview"}]) - ai_msg = cast(AIMessage, bound_llm.invoke("what's 5 * 4")) + ai_msg = cast(AIMessage, bound_llm.invoke("whats 5 * 4")) assert len(ai_msg.tool_calls) == 1 assert ai_msg.tool_calls[0]["name"] == "multiply" assert set(ai_msg.tool_calls[0]["args"]) == {"x", "y"} full: Any = None - for chunk in bound_llm.stream("what's 5 * 4"): + for chunk in bound_llm.stream("whats 5 * 4"): assert isinstance(chunk, AIMessageChunk) full = chunk if full is None else full + chunk assert len(full.tool_calls) == 1 @@ -416,7 +416,7 @@ def test_function_calling_and_structured_output(schema: Any) -> None: assert parsed == response.additional_kwargs["parsed"] # Test function calling - ai_msg = cast(AIMessage, bound_llm.invoke("what's 5 * 4")) + ai_msg = cast(AIMessage, bound_llm.invoke("whats 5 * 4")) assert len(ai_msg.tool_calls) == 1 assert ai_msg.tool_calls[0]["name"] == "multiply" assert set(ai_msg.tool_calls[0]["args"]) == {"x", "y"} @@ -555,7 +555,7 @@ def test_stream_reasoning_summary( ) message_1 = { "role": "user", - "content": "What was the third tallest building in the year 2000?", + "content": "What was the third tallest buliding in the year 2000?", } response_1: BaseMessageChunk | None = None for chunk in llm.stream([message_1]):