chore(openai): fix broken vcr cassette playback and add ci guard (#36502)

Fix broken VCR cassette playback in `langchain-openai` integration tests and add a CI job to prevent regressions. Two independent bugs made all VCR-backed tests fail: `before_record_request` redacts URIs to `**REDACTED**` but `match_on` still included `uri` (so playback never matched), and a typo-fix commit (`c9f51aef85`) changed test input strings without re-recording cassettes (so `json_body` matching also failed).
2026-06-09 10:17:00 +00:00 · 2026-04-03 12:55:52 -04:00
parent 8c15649127
commit deb85b6c4c
6 changed files with 111 additions and 7 deletions
--- a/.github/scripts/check_diff.py
+++ b/.github/scripts/check_diff.py
@@ -33,14 +33,19 @@ LANGCHAIN_DIRS = [
    "libs/model-profiles",
 ]

+# Packages with VCR cassette-backed integration tests.
+# These get a playback-only CI check to catch stale cassettes.
+VCR_PACKAGES = {
+    "libs/partners/openai",
+}
+
 # When set to True, we are ignoring core dependents
 # in order to be able to get CI to pass for each individual
 # package that depends on core
 # e.g. if you touch core, we don't then add textsplitters/etc to CI
 IGNORE_CORE_DEPENDENTS = False

-# ignored partners are removed from dependents
-# but still run if directly edited
+# Ignored partners are removed from dependents but still run if directly edited
 IGNORED_PARTNERS = [
    # remove huggingface from dependents because of CI instability
    # specifically in huggingface jobs
@@ -221,6 +226,14 @@ def _get_configs_for_multi_dirs(
        dirs = list(dirs_to_run["extended-test"])
    elif job == "codspeed":
        dirs = list(dirs_to_run["codspeed"])
+    elif job == "vcr-tests":
+        # Only run VCR tests for packages that have cassettes and are affected
+        all_affected = set(
+            add_dependents(
+                dirs_to_run["test"] | dirs_to_run["extended-test"], dependents
+            )
+        )
+        dirs = [d for d in VCR_PACKAGES if d in all_affected]
    else:
        raise ValueError(f"Unknown job: {job}")

@@ -335,6 +348,7 @@ if __name__ == "__main__":
            "dependencies",
            "test-pydantic",
            "codspeed",
+            "vcr-tests",
        ]
    }

--- a/.github/workflows/_test_vcr.yml
+++ b/.github/workflows/_test_vcr.yml
@@ -0,0 +1,66 @@
+# Runs VCR cassette-backed integration tests in playback-only mode.
+#
+# No API keys needed — catches stale cassettes caused by test input
+# changes without re-recording.
+#
+# Called as part of check_diffs.yml workflow.
+
+name: "📼 VCR Cassette Tests"
+
+on:
+  workflow_call:
+    inputs:
+      working-directory:
+        required: true
+        type: string
+        description: "From which folder this pipeline executes"
+      python-version:
+        required: true
+        type: string
+        description: "Python version to use"
+
+permissions:
+  contents: read
+
+env:
+  UV_FROZEN: "true"
+
+jobs:
+  build:
+    defaults:
+      run:
+        working-directory: ${{ inputs.working-directory }}
+    runs-on: ubuntu-latest
+    timeout-minutes: 20
+    name: "Python ${{ inputs.python-version }}"
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: "🐍 Set up Python ${{ inputs.python-version }} + UV"
+        uses: "./.github/actions/uv_setup"
+        with:
+          python-version: ${{ inputs.python-version }}
+          cache-suffix: test-vcr-${{ inputs.working-directory }}
+          working-directory: ${{ inputs.working-directory }}
+
+      - name: "📦 Install Test Dependencies"
+        shell: bash
+        run: uv sync --group test
+
+      - name: "📼 Run VCR Cassette Tests (playback-only)"
+        shell: bash
+        env:
+          OPENAI_API_KEY: sk-fake
+        run: make test_vcr
+
+      - name: "🧹 Verify Clean Working Directory"
+        shell: bash
+        run: |
+          set -eu
+
+          STATUS="$(git status)"
+          echo "$STATUS"
+
+          # grep will exit non-zero if the target message isn't found,
+          # and `set -e` above will cause the step to fail.
+          echo "$STATUS" | grep 'nothing to commit, working tree clean'
--- a/.github/workflows/check_diffs.yml
+++ b/.github/workflows/check_diffs.yml
@@ -66,6 +66,7 @@ jobs:
      compile-integration-tests: ${{ steps.set-matrix.outputs.compile-integration-tests }}
      dependencies: ${{ steps.set-matrix.outputs.dependencies }}
      test-pydantic: ${{ steps.set-matrix.outputs.test-pydantic }}
+      vcr-tests: ${{ steps.set-matrix.outputs.vcr-tests }}
  # Run linting only on packages that have changed files
  lint:
    needs: [build]
@@ -123,6 +124,21 @@ jobs:
      python-version: ${{ matrix.job-configs.python-version }}
    secrets: inherit

+  # Run VCR cassette-backed integration tests in playback-only mode (no API keys)
+  vcr-tests:
+    name: "VCR Cassette Tests"
+    needs: [build]
+    if: ${{ needs.build.outputs.vcr-tests != '[]' }}
+    strategy:
+      matrix:
+        job-configs: ${{ fromJson(needs.build.outputs.vcr-tests) }}
+      fail-fast: false
+    uses: ./.github/workflows/_test_vcr.yml
+    with:
+      working-directory: ${{ matrix.job-configs.working-directory }}
+      python-version: ${{ matrix.job-configs.python-version }}
+    secrets: inherit
+
  # Run extended test suites that require additional dependencies
  extended-tests:
    name: "Extended Tests"
@@ -178,6 +194,7 @@ jobs:
        lint,
        test,
        compile-integration-tests,
+        vcr-tests,
        extended-tests,
        test-pydantic,
      ]
--- a/libs/partners/openai/Makefile
+++ b/libs/partners/openai/Makefile
@@ -27,6 +27,11 @@ test tests:
 integration_test integration_tests:
 	uv run --group test --group test_integration pytest -n auto $(TEST_FILE)

+# Run VCR cassette-backed integration tests in playback-only mode (no API keys needed).
+# Catches stale cassettes caused by test input changes without re-recording.
+test_vcr:
+	uv run --group test pytest --record-mode=none -m vcr --ignore=tests/integration_tests/chat_models/test_azure_standard.py tests/integration_tests/
+
 test_watch:
 	uv run --group test ptw --snapshot-update --now . -- -vv $(TEST_FILE)

--- a/libs/partners/openai/tests/conftest.py
+++ b/libs/partners/openai/tests/conftest.py
@@ -32,7 +32,9 @@ def vcr_config() -> dict:
    """Extend the default configuration coming from langchain_tests."""
    config = base_vcr_config()
    config["match_on"] = [
-        m if m != "body" else "json_body" for m in config.get("match_on", [])
+        m if m != "body" else "json_body"
+        for m in config.get("match_on", [])
+        if m != "uri"
    ]
    config.setdefault("filter_headers", []).extend(_EXTRA_HEADERS)
    config["before_record_request"] = remove_request_headers
--- a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py
+++ b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py
@@ -182,13 +182,13 @@ def test_function_calling(output_version: Literal["v0", "responses/v1", "v1"]) -

    llm = ChatOpenAI(model=MODEL_NAME, output_version=output_version)
    bound_llm = llm.bind_tools([multiply, {"type": "web_search_preview"}])
-    ai_msg = cast(AIMessage, bound_llm.invoke("what's 5 * 4"))
+    ai_msg = cast(AIMessage, bound_llm.invoke("whats 5 * 4"))
    assert len(ai_msg.tool_calls) == 1
    assert ai_msg.tool_calls[0]["name"] == "multiply"
    assert set(ai_msg.tool_calls[0]["args"]) == {"x", "y"}

    full: Any = None
-    for chunk in bound_llm.stream("what's 5 * 4"):
+    for chunk in bound_llm.stream("whats 5 * 4"):
        assert isinstance(chunk, AIMessageChunk)
        full = chunk if full is None else full + chunk
    assert len(full.tool_calls) == 1
@@ -416,7 +416,7 @@ def test_function_calling_and_structured_output(schema: Any) -> None:
    assert parsed == response.additional_kwargs["parsed"]

    # Test function calling
-    ai_msg = cast(AIMessage, bound_llm.invoke("what's 5 * 4"))
+    ai_msg = cast(AIMessage, bound_llm.invoke("whats 5 * 4"))
    assert len(ai_msg.tool_calls) == 1
    assert ai_msg.tool_calls[0]["name"] == "multiply"
    assert set(ai_msg.tool_calls[0]["args"]) == {"x", "y"}
@@ -555,7 +555,7 @@ def test_stream_reasoning_summary(
    )
    message_1 = {
        "role": "user",
-        "content": "What was the third tallest building in the year 2000?",
+        "content": "What was the third tallest buliding in the year 2000?",
    }
    response_1: BaseMessageChunk | None = None
    for chunk in llm.stream([message_1]):