From deb85b6c4ccba4a41f1b71a500880ace13f0b5a5 Mon Sep 17 00:00:00 2001
From: Mason Daugherty <mason@langchain.dev>
Date: Fri, 3 Apr 2026 12:55:52 -0400
Subject: [PATCH] chore(openai): fix broken vcr cassette playback and add ci
 guard (#36502)

Fix broken VCR cassette playback in `langchain-openai` integration tests
and add a CI job to prevent regressions. Two independent bugs made all
VCR-backed tests fail: `before_record_request` redacts URIs to
`**REDACTED**` but `match_on` still included `uri` (so playback never
matched), and a typo-fix commit (`c9f51aef85`) changed test input
strings without re-recording cassettes (so `json_body` matching also
failed).
---
 .github/scripts/check_diff.py                 | 18 ++++-
 .github/workflows/_test_vcr.yml               | 66 +++++++++++++++++++
 .github/workflows/check_diffs.yml             | 17 +++++
 libs/partners/openai/Makefile                 |  5 ++
 libs/partners/openai/tests/conftest.py        |  4 +-
 .../chat_models/test_responses_api.py         |  8 +--
 6 files changed, 111 insertions(+), 7 deletions(-)
 create mode 100644 .github/workflows/_test_vcr.yml

diff --git a/.github/scripts/check_diff.py b/.github/scripts/check_diff.py
index 78a2b8fe866..65cd39e3b5b 100644
--- a/.github/scripts/check_diff.py
+++ b/.github/scripts/check_diff.py
@@ -33,14 +33,19 @@ LANGCHAIN_DIRS = [
     "libs/model-profiles",
 ]
 
+# Packages with VCR cassette-backed integration tests.
+# These get a playback-only CI check to catch stale cassettes.
+VCR_PACKAGES = {
+    "libs/partners/openai",
+}
+
 # When set to True, we are ignoring core dependents
 # in order to be able to get CI to pass for each individual
 # package that depends on core
 # e.g. if you touch core, we don't then add textsplitters/etc to CI
 IGNORE_CORE_DEPENDENTS = False
 
-# ignored partners are removed from dependents
-# but still run if directly edited
+# Ignored partners are removed from dependents but still run if directly edited
 IGNORED_PARTNERS = [
     # remove huggingface from dependents because of CI instability
     # specifically in huggingface jobs
@@ -221,6 +226,14 @@ def _get_configs_for_multi_dirs(
         dirs = list(dirs_to_run["extended-test"])
     elif job == "codspeed":
         dirs = list(dirs_to_run["codspeed"])
+    elif job == "vcr-tests":
+        # Only run VCR tests for packages that have cassettes and are affected
+        all_affected = set(
+            add_dependents(
+                dirs_to_run["test"] | dirs_to_run["extended-test"], dependents
+            )
+        )
+        dirs = [d for d in VCR_PACKAGES if d in all_affected]
     else:
         raise ValueError(f"Unknown job: {job}")
 
@@ -335,6 +348,7 @@ if __name__ == "__main__":
             "dependencies",
             "test-pydantic",
             "codspeed",
+            "vcr-tests",
         ]
     }
 
diff --git a/.github/workflows/_test_vcr.yml b/.github/workflows/_test_vcr.yml
new file mode 100644
index 00000000000..45520b6b7d7
--- /dev/null
+++ b/.github/workflows/_test_vcr.yml
@@ -0,0 +1,66 @@
+# Runs VCR cassette-backed integration tests in playback-only mode.
+#
+# No API keys needed — catches stale cassettes caused by test input
+# changes without re-recording.
+#
+# Called as part of check_diffs.yml workflow.
+
+name: "📼 VCR Cassette Tests"
+
+on:
+  workflow_call:
+    inputs:
+      working-directory:
+        required: true
+        type: string
+        description: "From which folder this pipeline executes"
+      python-version:
+        required: true
+        type: string
+        description: "Python version to use"
+
+permissions:
+  contents: read
+
+env:
+  UV_FROZEN: "true"
+
+jobs:
+  build:
+    defaults:
+      run:
+        working-directory: ${{ inputs.working-directory }}
+    runs-on: ubuntu-latest
+    timeout-minutes: 20
+    name: "Python ${{ inputs.python-version }}"
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: "🐍 Set up Python ${{ inputs.python-version }} + UV"
+        uses: "./.github/actions/uv_setup"
+        with:
+          python-version: ${{ inputs.python-version }}
+          cache-suffix: test-vcr-${{ inputs.working-directory }}
+          working-directory: ${{ inputs.working-directory }}
+
+      - name: "📦 Install Test Dependencies"
+        shell: bash
+        run: uv sync --group test
+
+      - name: "📼 Run VCR Cassette Tests (playback-only)"
+        shell: bash
+        env:
+          OPENAI_API_KEY: sk-fake
+        run: make test_vcr
+
+      - name: "🧹 Verify Clean Working Directory"
+        shell: bash
+        run: |
+          set -eu
+
+          STATUS="$(git status)"
+          echo "$STATUS"
+
+          # grep will exit non-zero if the target message isn't found,
+          # and `set -e` above will cause the step to fail.
+          echo "$STATUS" | grep 'nothing to commit, working tree clean'
diff --git a/.github/workflows/check_diffs.yml b/.github/workflows/check_diffs.yml
index a53aa14f2ec..69a062c8fa3 100644
--- a/.github/workflows/check_diffs.yml
+++ b/.github/workflows/check_diffs.yml
@@ -66,6 +66,7 @@ jobs:
       compile-integration-tests: ${{ steps.set-matrix.outputs.compile-integration-tests }}
       dependencies: ${{ steps.set-matrix.outputs.dependencies }}
       test-pydantic: ${{ steps.set-matrix.outputs.test-pydantic }}
+      vcr-tests: ${{ steps.set-matrix.outputs.vcr-tests }}
   # Run linting only on packages that have changed files
   lint:
     needs: [build]
@@ -123,6 +124,21 @@ jobs:
       python-version: ${{ matrix.job-configs.python-version }}
     secrets: inherit
 
+  # Run VCR cassette-backed integration tests in playback-only mode (no API keys)
+  vcr-tests:
+    name: "VCR Cassette Tests"
+    needs: [build]
+    if: ${{ needs.build.outputs.vcr-tests != '[]' }}
+    strategy:
+      matrix:
+        job-configs: ${{ fromJson(needs.build.outputs.vcr-tests) }}
+      fail-fast: false
+    uses: ./.github/workflows/_test_vcr.yml
+    with:
+      working-directory: ${{ matrix.job-configs.working-directory }}
+      python-version: ${{ matrix.job-configs.python-version }}
+    secrets: inherit
+
   # Run extended test suites that require additional dependencies
   extended-tests:
     name: "Extended Tests"
@@ -178,6 +194,7 @@ jobs:
         lint,
         test,
         compile-integration-tests,
+        vcr-tests,
         extended-tests,
         test-pydantic,
       ]
diff --git a/libs/partners/openai/Makefile b/libs/partners/openai/Makefile
index 8f8df6b5dd8..2bf160b45ed 100644
--- a/libs/partners/openai/Makefile
+++ b/libs/partners/openai/Makefile
@@ -27,6 +27,11 @@ test tests:
 integration_test integration_tests:
 	uv run --group test --group test_integration pytest -n auto $(TEST_FILE)
 
+# Run VCR cassette-backed integration tests in playback-only mode (no API keys needed).
+# Catches stale cassettes caused by test input changes without re-recording.
+test_vcr:
+	uv run --group test pytest --record-mode=none -m vcr --ignore=tests/integration_tests/chat_models/test_azure_standard.py tests/integration_tests/
+
 test_watch:
 	uv run --group test ptw --snapshot-update --now . -- -vv $(TEST_FILE)
 
diff --git a/libs/partners/openai/tests/conftest.py b/libs/partners/openai/tests/conftest.py
index 748df9beea0..e8159df1384 100644
--- a/libs/partners/openai/tests/conftest.py
+++ b/libs/partners/openai/tests/conftest.py
@@ -32,7 +32,9 @@ def vcr_config() -> dict:
     """Extend the default configuration coming from langchain_tests."""
     config = base_vcr_config()
     config["match_on"] = [
-        m if m != "body" else "json_body" for m in config.get("match_on", [])
+        m if m != "body" else "json_body"
+        for m in config.get("match_on", [])
+        if m != "uri"
     ]
     config.setdefault("filter_headers", []).extend(_EXTRA_HEADERS)
     config["before_record_request"] = remove_request_headers
diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py
index 153b73965be..219a3a748e8 100644
--- a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py
+++ b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py
@@ -182,13 +182,13 @@ def test_function_calling(output_version: Literal["v0", "responses/v1", "v1"]) -
 
     llm = ChatOpenAI(model=MODEL_NAME, output_version=output_version)
     bound_llm = llm.bind_tools([multiply, {"type": "web_search_preview"}])
-    ai_msg = cast(AIMessage, bound_llm.invoke("what's 5 * 4"))
+    ai_msg = cast(AIMessage, bound_llm.invoke("whats 5 * 4"))
     assert len(ai_msg.tool_calls) == 1
     assert ai_msg.tool_calls[0]["name"] == "multiply"
     assert set(ai_msg.tool_calls[0]["args"]) == {"x", "y"}
 
     full: Any = None
-    for chunk in bound_llm.stream("what's 5 * 4"):
+    for chunk in bound_llm.stream("whats 5 * 4"):
         assert isinstance(chunk, AIMessageChunk)
         full = chunk if full is None else full + chunk
     assert len(full.tool_calls) == 1
@@ -416,7 +416,7 @@ def test_function_calling_and_structured_output(schema: Any) -> None:
     assert parsed == response.additional_kwargs["parsed"]
 
     # Test function calling
-    ai_msg = cast(AIMessage, bound_llm.invoke("what's 5 * 4"))
+    ai_msg = cast(AIMessage, bound_llm.invoke("whats 5 * 4"))
     assert len(ai_msg.tool_calls) == 1
     assert ai_msg.tool_calls[0]["name"] == "multiply"
     assert set(ai_msg.tool_calls[0]["args"]) == {"x", "y"}
@@ -555,7 +555,7 @@ def test_stream_reasoning_summary(
     )
     message_1 = {
         "role": "user",
-        "content": "What was the third tallest building in the year 2000?",
+        "content": "What was the third tallest buliding in the year 2000?",
     }
     response_1: BaseMessageChunk | None = None
     for chunk in llm.stream([message_1]):