mirror of
https://github.com/hwchase17/langchain.git
synced 2026-06-09 10:17:00 +00:00
chore(openai): fix broken vcr cassette playback and add ci guard (#36502)
Fix broken VCR cassette playback in `langchain-openai` integration tests and add a CI job to prevent regressions. Two independent bugs made all VCR-backed tests fail: `before_record_request` redacts URIs to `**REDACTED**` but `match_on` still included `uri` (so playback never matched), and a typo-fix commit (`c9f51aef85`) changed test input strings without re-recording cassettes (so `json_body` matching also failed).
This commit is contained in:
18
.github/scripts/check_diff.py
vendored
18
.github/scripts/check_diff.py
vendored
@@ -33,14 +33,19 @@ LANGCHAIN_DIRS = [
|
||||
"libs/model-profiles",
|
||||
]
|
||||
|
||||
# Packages with VCR cassette-backed integration tests.
|
||||
# These get a playback-only CI check to catch stale cassettes.
|
||||
VCR_PACKAGES = {
|
||||
"libs/partners/openai",
|
||||
}
|
||||
|
||||
# When set to True, we are ignoring core dependents
|
||||
# in order to be able to get CI to pass for each individual
|
||||
# package that depends on core
|
||||
# e.g. if you touch core, we don't then add textsplitters/etc to CI
|
||||
IGNORE_CORE_DEPENDENTS = False
|
||||
|
||||
# ignored partners are removed from dependents
|
||||
# but still run if directly edited
|
||||
# Ignored partners are removed from dependents but still run if directly edited
|
||||
IGNORED_PARTNERS = [
|
||||
# remove huggingface from dependents because of CI instability
|
||||
# specifically in huggingface jobs
|
||||
@@ -221,6 +226,14 @@ def _get_configs_for_multi_dirs(
|
||||
dirs = list(dirs_to_run["extended-test"])
|
||||
elif job == "codspeed":
|
||||
dirs = list(dirs_to_run["codspeed"])
|
||||
elif job == "vcr-tests":
|
||||
# Only run VCR tests for packages that have cassettes and are affected
|
||||
all_affected = set(
|
||||
add_dependents(
|
||||
dirs_to_run["test"] | dirs_to_run["extended-test"], dependents
|
||||
)
|
||||
)
|
||||
dirs = [d for d in VCR_PACKAGES if d in all_affected]
|
||||
else:
|
||||
raise ValueError(f"Unknown job: {job}")
|
||||
|
||||
@@ -335,6 +348,7 @@ if __name__ == "__main__":
|
||||
"dependencies",
|
||||
"test-pydantic",
|
||||
"codspeed",
|
||||
"vcr-tests",
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
66
.github/workflows/_test_vcr.yml
vendored
Normal file
66
.github/workflows/_test_vcr.yml
vendored
Normal file
@@ -0,0 +1,66 @@
|
||||
# Runs VCR cassette-backed integration tests in playback-only mode.
|
||||
#
|
||||
# No API keys needed — catches stale cassettes caused by test input
|
||||
# changes without re-recording.
|
||||
#
|
||||
# Called as part of check_diffs.yml workflow.
|
||||
|
||||
name: "📼 VCR Cassette Tests"
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
working-directory:
|
||||
required: true
|
||||
type: string
|
||||
description: "From which folder this pipeline executes"
|
||||
python-version:
|
||||
required: true
|
||||
type: string
|
||||
description: "Python version to use"
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
env:
|
||||
UV_FROZEN: "true"
|
||||
|
||||
jobs:
|
||||
build:
|
||||
defaults:
|
||||
run:
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 20
|
||||
name: "Python ${{ inputs.python-version }}"
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: "🐍 Set up Python ${{ inputs.python-version }} + UV"
|
||||
uses: "./.github/actions/uv_setup"
|
||||
with:
|
||||
python-version: ${{ inputs.python-version }}
|
||||
cache-suffix: test-vcr-${{ inputs.working-directory }}
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
||||
- name: "📦 Install Test Dependencies"
|
||||
shell: bash
|
||||
run: uv sync --group test
|
||||
|
||||
- name: "📼 Run VCR Cassette Tests (playback-only)"
|
||||
shell: bash
|
||||
env:
|
||||
OPENAI_API_KEY: sk-fake
|
||||
run: make test_vcr
|
||||
|
||||
- name: "🧹 Verify Clean Working Directory"
|
||||
shell: bash
|
||||
run: |
|
||||
set -eu
|
||||
|
||||
STATUS="$(git status)"
|
||||
echo "$STATUS"
|
||||
|
||||
# grep will exit non-zero if the target message isn't found,
|
||||
# and `set -e` above will cause the step to fail.
|
||||
echo "$STATUS" | grep 'nothing to commit, working tree clean'
|
||||
17
.github/workflows/check_diffs.yml
vendored
17
.github/workflows/check_diffs.yml
vendored
@@ -66,6 +66,7 @@ jobs:
|
||||
compile-integration-tests: ${{ steps.set-matrix.outputs.compile-integration-tests }}
|
||||
dependencies: ${{ steps.set-matrix.outputs.dependencies }}
|
||||
test-pydantic: ${{ steps.set-matrix.outputs.test-pydantic }}
|
||||
vcr-tests: ${{ steps.set-matrix.outputs.vcr-tests }}
|
||||
# Run linting only on packages that have changed files
|
||||
lint:
|
||||
needs: [build]
|
||||
@@ -123,6 +124,21 @@ jobs:
|
||||
python-version: ${{ matrix.job-configs.python-version }}
|
||||
secrets: inherit
|
||||
|
||||
# Run VCR cassette-backed integration tests in playback-only mode (no API keys)
|
||||
vcr-tests:
|
||||
name: "VCR Cassette Tests"
|
||||
needs: [build]
|
||||
if: ${{ needs.build.outputs.vcr-tests != '[]' }}
|
||||
strategy:
|
||||
matrix:
|
||||
job-configs: ${{ fromJson(needs.build.outputs.vcr-tests) }}
|
||||
fail-fast: false
|
||||
uses: ./.github/workflows/_test_vcr.yml
|
||||
with:
|
||||
working-directory: ${{ matrix.job-configs.working-directory }}
|
||||
python-version: ${{ matrix.job-configs.python-version }}
|
||||
secrets: inherit
|
||||
|
||||
# Run extended test suites that require additional dependencies
|
||||
extended-tests:
|
||||
name: "Extended Tests"
|
||||
@@ -178,6 +194,7 @@ jobs:
|
||||
lint,
|
||||
test,
|
||||
compile-integration-tests,
|
||||
vcr-tests,
|
||||
extended-tests,
|
||||
test-pydantic,
|
||||
]
|
||||
|
||||
@@ -27,6 +27,11 @@ test tests:
|
||||
integration_test integration_tests:
|
||||
uv run --group test --group test_integration pytest -n auto $(TEST_FILE)
|
||||
|
||||
# Run VCR cassette-backed integration tests in playback-only mode (no API keys needed).
|
||||
# Catches stale cassettes caused by test input changes without re-recording.
|
||||
test_vcr:
|
||||
uv run --group test pytest --record-mode=none -m vcr --ignore=tests/integration_tests/chat_models/test_azure_standard.py tests/integration_tests/
|
||||
|
||||
test_watch:
|
||||
uv run --group test ptw --snapshot-update --now . -- -vv $(TEST_FILE)
|
||||
|
||||
|
||||
@@ -32,7 +32,9 @@ def vcr_config() -> dict:
|
||||
"""Extend the default configuration coming from langchain_tests."""
|
||||
config = base_vcr_config()
|
||||
config["match_on"] = [
|
||||
m if m != "body" else "json_body" for m in config.get("match_on", [])
|
||||
m if m != "body" else "json_body"
|
||||
for m in config.get("match_on", [])
|
||||
if m != "uri"
|
||||
]
|
||||
config.setdefault("filter_headers", []).extend(_EXTRA_HEADERS)
|
||||
config["before_record_request"] = remove_request_headers
|
||||
|
||||
@@ -182,13 +182,13 @@ def test_function_calling(output_version: Literal["v0", "responses/v1", "v1"]) -
|
||||
|
||||
llm = ChatOpenAI(model=MODEL_NAME, output_version=output_version)
|
||||
bound_llm = llm.bind_tools([multiply, {"type": "web_search_preview"}])
|
||||
ai_msg = cast(AIMessage, bound_llm.invoke("what's 5 * 4"))
|
||||
ai_msg = cast(AIMessage, bound_llm.invoke("whats 5 * 4"))
|
||||
assert len(ai_msg.tool_calls) == 1
|
||||
assert ai_msg.tool_calls[0]["name"] == "multiply"
|
||||
assert set(ai_msg.tool_calls[0]["args"]) == {"x", "y"}
|
||||
|
||||
full: Any = None
|
||||
for chunk in bound_llm.stream("what's 5 * 4"):
|
||||
for chunk in bound_llm.stream("whats 5 * 4"):
|
||||
assert isinstance(chunk, AIMessageChunk)
|
||||
full = chunk if full is None else full + chunk
|
||||
assert len(full.tool_calls) == 1
|
||||
@@ -416,7 +416,7 @@ def test_function_calling_and_structured_output(schema: Any) -> None:
|
||||
assert parsed == response.additional_kwargs["parsed"]
|
||||
|
||||
# Test function calling
|
||||
ai_msg = cast(AIMessage, bound_llm.invoke("what's 5 * 4"))
|
||||
ai_msg = cast(AIMessage, bound_llm.invoke("whats 5 * 4"))
|
||||
assert len(ai_msg.tool_calls) == 1
|
||||
assert ai_msg.tool_calls[0]["name"] == "multiply"
|
||||
assert set(ai_msg.tool_calls[0]["args"]) == {"x", "y"}
|
||||
@@ -555,7 +555,7 @@ def test_stream_reasoning_summary(
|
||||
)
|
||||
message_1 = {
|
||||
"role": "user",
|
||||
"content": "What was the third tallest building in the year 2000?",
|
||||
"content": "What was the third tallest buliding in the year 2000?",
|
||||
}
|
||||
response_1: BaseMessageChunk | None = None
|
||||
for chunk in llm.stream([message_1]):
|
||||
|
||||
Reference in New Issue
Block a user