chore(openai): fix broken vcr cassette playback and add ci guard (#36502)

Fix broken VCR cassette playback in `langchain-openai` integration tests and add a CI job to prevent regressions. Two independent bugs made all VCR-backed tests fail: `before_record_request` redacts URIs to `**REDACTED**` but `match_on` still included `uri` (so playback never matched), and a typo-fix commit (`c9f51aef85`) changed test input strings without re-recording cassettes (so `json_body` matching also failed).
2026-06-09 10:17:00 +00:00 · 2026-04-03 12:55:52 -04:00
parent 8c15649127
commit deb85b6c4c
6 changed files with 111 additions and 7 deletions
--- a/.github/scripts/check_diff.py
+++ b/.github/scripts/check_diff.py
@@ -33,14 +33,19 @@ LANGCHAIN_DIRS = [
    "libs/model-profiles",
 ]
 # Packages with VCR cassette-backed integration tests.
 # These get a playback-only CI check to catch stale cassettes.
 VCR_PACKAGES = {
    "libs/partners/openai",
 }
 # When set to True, we are ignoring core dependents
 # in order to be able to get CI to pass for each individual
 # package that depends on core
 # e.g. if you touch core, we don't then add textsplitters/etc to CI
 IGNORE_CORE_DEPENDENTS = False
-# ignored partners are removed from dependents
+# Ignored partners are removed from dependents but still run if directly edited
 # but still run if directly edited
 IGNORED_PARTNERS = [
    # remove huggingface from dependents because of CI instability
    # specifically in huggingface jobs
@@ -221,6 +226,14 @@ def _get_configs_for_multi_dirs(
        dirs = list(dirs_to_run["extended-test"])
    elif job == "codspeed":
        dirs = list(dirs_to_run["codspeed"])
    elif job == "vcr-tests":
        # Only run VCR tests for packages that have cassettes and are affected
        all_affected = set(
            add_dependents(
                dirs_to_run["test"] | dirs_to_run["extended-test"], dependents
            )
        )
        dirs = [d for d in VCR_PACKAGES if d in all_affected]
    else:
        raise ValueError(f"Unknown job: {job}")
@@ -335,6 +348,7 @@ if __name__ == "__main__":
            "dependencies",
            "test-pydantic",
            "codspeed",
            "vcr-tests",
        ]
    }
--- a/.github/workflows/_test_vcr.yml
+++ b/.github/workflows/_test_vcr.yml
@@ -0,0 +1,66 @@
 # Runs VCR cassette-backed integration tests in playback-only mode.
 #
 # No API keys needed — catches stale cassettes caused by test input
 # changes without re-recording.
 #
 # Called as part of check_diffs.yml workflow.
 name: "📼 VCR Cassette Tests"
 on:
  workflow_call:
    inputs:
      working-directory:
        required: true
        type: string
        description: "From which folder this pipeline executes"
      python-version:
        required: true
        type: string
        description: "Python version to use"
 permissions:
  contents: read
 env:
  UV_FROZEN: "true"
 jobs:
  build:
    defaults:
      run:
        working-directory: ${{ inputs.working-directory }}
    runs-on: ubuntu-latest
    timeout-minutes: 20
    name: "Python ${{ inputs.python-version }}"
    steps:
      - uses: actions/checkout@v6
      - name: "🐍 Set up Python ${{ inputs.python-version }} + UV"
        uses: "./.github/actions/uv_setup"
        with:
          python-version: ${{ inputs.python-version }}
          cache-suffix: test-vcr-${{ inputs.working-directory }}
          working-directory: ${{ inputs.working-directory }}
      - name: "📦 Install Test Dependencies"
        shell: bash
        run: uv sync --group test
      - name: "📼 Run VCR Cassette Tests (playback-only)"
        shell: bash
        env:
          OPENAI_API_KEY: sk-fake
        run: make test_vcr
      - name: "🧹 Verify Clean Working Directory"
        shell: bash
        run: |
          set -eu
          STATUS="$(git status)"
          echo "$STATUS"
          # grep will exit non-zero if the target message isn't found,
          # and `set -e` above will cause the step to fail.
          echo "$STATUS" | grep 'nothing to commit, working tree clean'
--- a/.github/workflows/check_diffs.yml
+++ b/.github/workflows/check_diffs.yml
@@ -66,6 +66,7 @@ jobs:
      compile-integration-tests: ${{ steps.set-matrix.outputs.compile-integration-tests }}
      dependencies: ${{ steps.set-matrix.outputs.dependencies }}
      test-pydantic: ${{ steps.set-matrix.outputs.test-pydantic }}
      vcr-tests: ${{ steps.set-matrix.outputs.vcr-tests }}
  # Run linting only on packages that have changed files
  lint:
    needs: [build]
@@ -123,6 +124,21 @@ jobs:
      python-version: ${{ matrix.job-configs.python-version }}
    secrets: inherit
  # Run VCR cassette-backed integration tests in playback-only mode (no API keys)
  vcr-tests:
    name: "VCR Cassette Tests"
    needs: [build]
    if: ${{ needs.build.outputs.vcr-tests != '[]' }}
    strategy:
      matrix:
        job-configs: ${{ fromJson(needs.build.outputs.vcr-tests) }}
      fail-fast: false
    uses: ./.github/workflows/_test_vcr.yml
    with:
      working-directory: ${{ matrix.job-configs.working-directory }}
      python-version: ${{ matrix.job-configs.python-version }}
    secrets: inherit
  # Run extended test suites that require additional dependencies
  extended-tests:
    name: "Extended Tests"
@@ -178,6 +194,7 @@ jobs:
        lint,
        test,
        compile-integration-tests,
        vcr-tests,
        extended-tests,
        test-pydantic,
      ]
--- a/libs/partners/openai/Makefile
+++ b/libs/partners/openai/Makefile
@@ -27,6 +27,11 @@ test tests:
 integration_test integration_tests:
 	uv run --group test --group test_integration pytest -n auto $(TEST_FILE)
 # Run VCR cassette-backed integration tests in playback-only mode (no API keys needed).
 # Catches stale cassettes caused by test input changes without re-recording.
 test_vcr:
 	uv run --group test pytest --record-mode=none -m vcr --ignore=tests/integration_tests/chat_models/test_azure_standard.py tests/integration_tests/
 test_watch:
 	uv run --group test ptw --snapshot-update --now . -- -vv $(TEST_FILE)
--- a/libs/partners/openai/tests/conftest.py
+++ b/libs/partners/openai/tests/conftest.py
@@ -32,7 +32,9 @@ def vcr_config() -> dict:
    """Extend the default configuration coming from langchain_tests."""
    config = base_vcr_config()
    config["match_on"] = [
-        m if m != "body" else "json_body" for m in config.get("match_on", [])
+        m if m != "body" else "json_body"
        for m in config.get("match_on", [])
        if m != "uri"
    ]
    config.setdefault("filter_headers", []).extend(_EXTRA_HEADERS)
    config["before_record_request"] = remove_request_headers
--- a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py
+++ b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py
@@ -182,13 +182,13 @@ def test_function_calling(output_version: Literal["v0", "responses/v1", "v1"]) -
    llm = ChatOpenAI(model=MODEL_NAME, output_version=output_version)
    bound_llm = llm.bind_tools([multiply, {"type": "web_search_preview"}])
-    ai_msg = cast(AIMessage, bound_llm.invoke("what's 5 * 4"))
+    ai_msg = cast(AIMessage, bound_llm.invoke("whats 5 * 4"))
    assert len(ai_msg.tool_calls) == 1
    assert ai_msg.tool_calls[0]["name"] == "multiply"
    assert set(ai_msg.tool_calls[0]["args"]) == {"x", "y"}
    full: Any = None
-    for chunk in bound_llm.stream("what's 5 * 4"):
+    for chunk in bound_llm.stream("whats 5 * 4"):
        assert isinstance(chunk, AIMessageChunk)
        full = chunk if full is None else full + chunk
    assert len(full.tool_calls) == 1
@@ -416,7 +416,7 @@ def test_function_calling_and_structured_output(schema: Any) -> None:
    assert parsed == response.additional_kwargs["parsed"]
    # Test function calling
-    ai_msg = cast(AIMessage, bound_llm.invoke("what's 5 * 4"))
+    ai_msg = cast(AIMessage, bound_llm.invoke("whats 5 * 4"))
    assert len(ai_msg.tool_calls) == 1
    assert ai_msg.tool_calls[0]["name"] == "multiply"
    assert set(ai_msg.tool_calls[0]["args"]) == {"x", "y"}
@@ -555,7 +555,7 @@ def test_stream_reasoning_summary(
    )
    message_1 = {
        "role": "user",
-        "content": "What was the third tallest building in the year 2000?",
+        "content": "What was the third tallest buliding in the year 2000?",
    }
    response_1: BaseMessageChunk | None = None
    for chunk in llm.stream([message_1]):