mirror of
https://github.com/hwchase17/langchain.git
synced 2026-06-09 18:50:33 +00:00
chore(openai): fix broken vcr cassette playback and add ci guard (#36502)
Fix broken VCR cassette playback in `langchain-openai` integration tests and add a CI job to prevent regressions. Two independent bugs made all VCR-backed tests fail: `before_record_request` redacts URIs to `**REDACTED**` but `match_on` still included `uri` (so playback never matched), and a typo-fix commit (`c9f51aef85`) changed test input strings without re-recording cassettes (so `json_body` matching also failed).
This commit is contained in:
18
.github/scripts/check_diff.py
vendored
18
.github/scripts/check_diff.py
vendored
@@ -33,14 +33,19 @@ LANGCHAIN_DIRS = [
|
|||||||
"libs/model-profiles",
|
"libs/model-profiles",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# Packages with VCR cassette-backed integration tests.
|
||||||
|
# These get a playback-only CI check to catch stale cassettes.
|
||||||
|
VCR_PACKAGES = {
|
||||||
|
"libs/partners/openai",
|
||||||
|
}
|
||||||
|
|
||||||
# When set to True, we are ignoring core dependents
|
# When set to True, we are ignoring core dependents
|
||||||
# in order to be able to get CI to pass for each individual
|
# in order to be able to get CI to pass for each individual
|
||||||
# package that depends on core
|
# package that depends on core
|
||||||
# e.g. if you touch core, we don't then add textsplitters/etc to CI
|
# e.g. if you touch core, we don't then add textsplitters/etc to CI
|
||||||
IGNORE_CORE_DEPENDENTS = False
|
IGNORE_CORE_DEPENDENTS = False
|
||||||
|
|
||||||
# ignored partners are removed from dependents
|
# Ignored partners are removed from dependents but still run if directly edited
|
||||||
# but still run if directly edited
|
|
||||||
IGNORED_PARTNERS = [
|
IGNORED_PARTNERS = [
|
||||||
# remove huggingface from dependents because of CI instability
|
# remove huggingface from dependents because of CI instability
|
||||||
# specifically in huggingface jobs
|
# specifically in huggingface jobs
|
||||||
@@ -221,6 +226,14 @@ def _get_configs_for_multi_dirs(
|
|||||||
dirs = list(dirs_to_run["extended-test"])
|
dirs = list(dirs_to_run["extended-test"])
|
||||||
elif job == "codspeed":
|
elif job == "codspeed":
|
||||||
dirs = list(dirs_to_run["codspeed"])
|
dirs = list(dirs_to_run["codspeed"])
|
||||||
|
elif job == "vcr-tests":
|
||||||
|
# Only run VCR tests for packages that have cassettes and are affected
|
||||||
|
all_affected = set(
|
||||||
|
add_dependents(
|
||||||
|
dirs_to_run["test"] | dirs_to_run["extended-test"], dependents
|
||||||
|
)
|
||||||
|
)
|
||||||
|
dirs = [d for d in VCR_PACKAGES if d in all_affected]
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Unknown job: {job}")
|
raise ValueError(f"Unknown job: {job}")
|
||||||
|
|
||||||
@@ -335,6 +348,7 @@ if __name__ == "__main__":
|
|||||||
"dependencies",
|
"dependencies",
|
||||||
"test-pydantic",
|
"test-pydantic",
|
||||||
"codspeed",
|
"codspeed",
|
||||||
|
"vcr-tests",
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
66
.github/workflows/_test_vcr.yml
vendored
Normal file
66
.github/workflows/_test_vcr.yml
vendored
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
# Runs VCR cassette-backed integration tests in playback-only mode.
|
||||||
|
#
|
||||||
|
# No API keys needed — catches stale cassettes caused by test input
|
||||||
|
# changes without re-recording.
|
||||||
|
#
|
||||||
|
# Called as part of check_diffs.yml workflow.
|
||||||
|
|
||||||
|
name: "📼 VCR Cassette Tests"
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_call:
|
||||||
|
inputs:
|
||||||
|
working-directory:
|
||||||
|
required: true
|
||||||
|
type: string
|
||||||
|
description: "From which folder this pipeline executes"
|
||||||
|
python-version:
|
||||||
|
required: true
|
||||||
|
type: string
|
||||||
|
description: "Python version to use"
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
|
||||||
|
env:
|
||||||
|
UV_FROZEN: "true"
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build:
|
||||||
|
defaults:
|
||||||
|
run:
|
||||||
|
working-directory: ${{ inputs.working-directory }}
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
timeout-minutes: 20
|
||||||
|
name: "Python ${{ inputs.python-version }}"
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v6
|
||||||
|
|
||||||
|
- name: "🐍 Set up Python ${{ inputs.python-version }} + UV"
|
||||||
|
uses: "./.github/actions/uv_setup"
|
||||||
|
with:
|
||||||
|
python-version: ${{ inputs.python-version }}
|
||||||
|
cache-suffix: test-vcr-${{ inputs.working-directory }}
|
||||||
|
working-directory: ${{ inputs.working-directory }}
|
||||||
|
|
||||||
|
- name: "📦 Install Test Dependencies"
|
||||||
|
shell: bash
|
||||||
|
run: uv sync --group test
|
||||||
|
|
||||||
|
- name: "📼 Run VCR Cassette Tests (playback-only)"
|
||||||
|
shell: bash
|
||||||
|
env:
|
||||||
|
OPENAI_API_KEY: sk-fake
|
||||||
|
run: make test_vcr
|
||||||
|
|
||||||
|
- name: "🧹 Verify Clean Working Directory"
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
set -eu
|
||||||
|
|
||||||
|
STATUS="$(git status)"
|
||||||
|
echo "$STATUS"
|
||||||
|
|
||||||
|
# grep will exit non-zero if the target message isn't found,
|
||||||
|
# and `set -e` above will cause the step to fail.
|
||||||
|
echo "$STATUS" | grep 'nothing to commit, working tree clean'
|
||||||
17
.github/workflows/check_diffs.yml
vendored
17
.github/workflows/check_diffs.yml
vendored
@@ -66,6 +66,7 @@ jobs:
|
|||||||
compile-integration-tests: ${{ steps.set-matrix.outputs.compile-integration-tests }}
|
compile-integration-tests: ${{ steps.set-matrix.outputs.compile-integration-tests }}
|
||||||
dependencies: ${{ steps.set-matrix.outputs.dependencies }}
|
dependencies: ${{ steps.set-matrix.outputs.dependencies }}
|
||||||
test-pydantic: ${{ steps.set-matrix.outputs.test-pydantic }}
|
test-pydantic: ${{ steps.set-matrix.outputs.test-pydantic }}
|
||||||
|
vcr-tests: ${{ steps.set-matrix.outputs.vcr-tests }}
|
||||||
# Run linting only on packages that have changed files
|
# Run linting only on packages that have changed files
|
||||||
lint:
|
lint:
|
||||||
needs: [build]
|
needs: [build]
|
||||||
@@ -123,6 +124,21 @@ jobs:
|
|||||||
python-version: ${{ matrix.job-configs.python-version }}
|
python-version: ${{ matrix.job-configs.python-version }}
|
||||||
secrets: inherit
|
secrets: inherit
|
||||||
|
|
||||||
|
# Run VCR cassette-backed integration tests in playback-only mode (no API keys)
|
||||||
|
vcr-tests:
|
||||||
|
name: "VCR Cassette Tests"
|
||||||
|
needs: [build]
|
||||||
|
if: ${{ needs.build.outputs.vcr-tests != '[]' }}
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
job-configs: ${{ fromJson(needs.build.outputs.vcr-tests) }}
|
||||||
|
fail-fast: false
|
||||||
|
uses: ./.github/workflows/_test_vcr.yml
|
||||||
|
with:
|
||||||
|
working-directory: ${{ matrix.job-configs.working-directory }}
|
||||||
|
python-version: ${{ matrix.job-configs.python-version }}
|
||||||
|
secrets: inherit
|
||||||
|
|
||||||
# Run extended test suites that require additional dependencies
|
# Run extended test suites that require additional dependencies
|
||||||
extended-tests:
|
extended-tests:
|
||||||
name: "Extended Tests"
|
name: "Extended Tests"
|
||||||
@@ -178,6 +194,7 @@ jobs:
|
|||||||
lint,
|
lint,
|
||||||
test,
|
test,
|
||||||
compile-integration-tests,
|
compile-integration-tests,
|
||||||
|
vcr-tests,
|
||||||
extended-tests,
|
extended-tests,
|
||||||
test-pydantic,
|
test-pydantic,
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -27,6 +27,11 @@ test tests:
|
|||||||
integration_test integration_tests:
|
integration_test integration_tests:
|
||||||
uv run --group test --group test_integration pytest -n auto $(TEST_FILE)
|
uv run --group test --group test_integration pytest -n auto $(TEST_FILE)
|
||||||
|
|
||||||
|
# Run VCR cassette-backed integration tests in playback-only mode (no API keys needed).
|
||||||
|
# Catches stale cassettes caused by test input changes without re-recording.
|
||||||
|
test_vcr:
|
||||||
|
uv run --group test pytest --record-mode=none -m vcr --ignore=tests/integration_tests/chat_models/test_azure_standard.py tests/integration_tests/
|
||||||
|
|
||||||
test_watch:
|
test_watch:
|
||||||
uv run --group test ptw --snapshot-update --now . -- -vv $(TEST_FILE)
|
uv run --group test ptw --snapshot-update --now . -- -vv $(TEST_FILE)
|
||||||
|
|
||||||
|
|||||||
@@ -32,7 +32,9 @@ def vcr_config() -> dict:
|
|||||||
"""Extend the default configuration coming from langchain_tests."""
|
"""Extend the default configuration coming from langchain_tests."""
|
||||||
config = base_vcr_config()
|
config = base_vcr_config()
|
||||||
config["match_on"] = [
|
config["match_on"] = [
|
||||||
m if m != "body" else "json_body" for m in config.get("match_on", [])
|
m if m != "body" else "json_body"
|
||||||
|
for m in config.get("match_on", [])
|
||||||
|
if m != "uri"
|
||||||
]
|
]
|
||||||
config.setdefault("filter_headers", []).extend(_EXTRA_HEADERS)
|
config.setdefault("filter_headers", []).extend(_EXTRA_HEADERS)
|
||||||
config["before_record_request"] = remove_request_headers
|
config["before_record_request"] = remove_request_headers
|
||||||
|
|||||||
@@ -182,13 +182,13 @@ def test_function_calling(output_version: Literal["v0", "responses/v1", "v1"]) -
|
|||||||
|
|
||||||
llm = ChatOpenAI(model=MODEL_NAME, output_version=output_version)
|
llm = ChatOpenAI(model=MODEL_NAME, output_version=output_version)
|
||||||
bound_llm = llm.bind_tools([multiply, {"type": "web_search_preview"}])
|
bound_llm = llm.bind_tools([multiply, {"type": "web_search_preview"}])
|
||||||
ai_msg = cast(AIMessage, bound_llm.invoke("what's 5 * 4"))
|
ai_msg = cast(AIMessage, bound_llm.invoke("whats 5 * 4"))
|
||||||
assert len(ai_msg.tool_calls) == 1
|
assert len(ai_msg.tool_calls) == 1
|
||||||
assert ai_msg.tool_calls[0]["name"] == "multiply"
|
assert ai_msg.tool_calls[0]["name"] == "multiply"
|
||||||
assert set(ai_msg.tool_calls[0]["args"]) == {"x", "y"}
|
assert set(ai_msg.tool_calls[0]["args"]) == {"x", "y"}
|
||||||
|
|
||||||
full: Any = None
|
full: Any = None
|
||||||
for chunk in bound_llm.stream("what's 5 * 4"):
|
for chunk in bound_llm.stream("whats 5 * 4"):
|
||||||
assert isinstance(chunk, AIMessageChunk)
|
assert isinstance(chunk, AIMessageChunk)
|
||||||
full = chunk if full is None else full + chunk
|
full = chunk if full is None else full + chunk
|
||||||
assert len(full.tool_calls) == 1
|
assert len(full.tool_calls) == 1
|
||||||
@@ -416,7 +416,7 @@ def test_function_calling_and_structured_output(schema: Any) -> None:
|
|||||||
assert parsed == response.additional_kwargs["parsed"]
|
assert parsed == response.additional_kwargs["parsed"]
|
||||||
|
|
||||||
# Test function calling
|
# Test function calling
|
||||||
ai_msg = cast(AIMessage, bound_llm.invoke("what's 5 * 4"))
|
ai_msg = cast(AIMessage, bound_llm.invoke("whats 5 * 4"))
|
||||||
assert len(ai_msg.tool_calls) == 1
|
assert len(ai_msg.tool_calls) == 1
|
||||||
assert ai_msg.tool_calls[0]["name"] == "multiply"
|
assert ai_msg.tool_calls[0]["name"] == "multiply"
|
||||||
assert set(ai_msg.tool_calls[0]["args"]) == {"x", "y"}
|
assert set(ai_msg.tool_calls[0]["args"]) == {"x", "y"}
|
||||||
@@ -555,7 +555,7 @@ def test_stream_reasoning_summary(
|
|||||||
)
|
)
|
||||||
message_1 = {
|
message_1 = {
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": "What was the third tallest building in the year 2000?",
|
"content": "What was the third tallest buliding in the year 2000?",
|
||||||
}
|
}
|
||||||
response_1: BaseMessageChunk | None = None
|
response_1: BaseMessageChunk | None = None
|
||||||
for chunk in llm.stream([message_1]):
|
for chunk in llm.stream([message_1]):
|
||||||
|
|||||||
Reference in New Issue
Block a user