diff --git a/libs/langchain_v1/tests/unit_tests/agents/middleware/implementations/test_baseline_vulnerability.py b/libs/langchain_v1/tests/unit_tests/agents/middleware/implementations/test_baseline_vulnerability.py index 677b8cf4c9e..54a2178c0a0 100644 --- a/libs/langchain_v1/tests/unit_tests/agents/middleware/implementations/test_baseline_vulnerability.py +++ b/libs/langchain_v1/tests/unit_tests/agents/middleware/implementations/test_baseline_vulnerability.py @@ -4,10 +4,25 @@ These tests verify that models trigger tool calls from injection payloads when middleware is NOT applied. This proves the middleware provides real protection. A test PASSES if the model IS vulnerable (triggers the target tool). + +NOTE: These tests are skipped by default in CI because they: +1. Make real API calls to LLM providers (costs money) +2. Are slow (multiple LLM roundtrips per test) +3. Are for manual validation, not regression testing + +To run manually: + RUN_BENCHMARK_TESTS=1 pytest test_baseline_vulnerability.py -v -s """ +import os + import pytest +pytestmark = pytest.mark.skipif( + os.environ.get("RUN_BENCHMARK_TESTS") != "1", + reason="E2E tests are skipped by default. Set RUN_BENCHMARK_TESTS=1 to run.", +) + from .conftest import INJECTION_TEST_CASES, check_vulnerability