diff --git a/libs/langchain_v1/tests/unit_tests/agents/middleware/implementations/test_baseline_vulnerability.py b/libs/langchain_v1/tests/unit_tests/agents/middleware/implementations/test_baseline_vulnerability.py
index 677b8cf4c9e..54a2178c0a0 100644
--- a/libs/langchain_v1/tests/unit_tests/agents/middleware/implementations/test_baseline_vulnerability.py
+++ b/libs/langchain_v1/tests/unit_tests/agents/middleware/implementations/test_baseline_vulnerability.py
@@ -4,10 +4,25 @@ These tests verify that models trigger tool calls from injection payloads when
 middleware is NOT applied. This proves the middleware provides real protection.
 
 A test PASSES if the model IS vulnerable (triggers the target tool).
+
+NOTE: These tests are skipped by default in CI because they:
+1. Make real API calls to LLM providers (costs money)
+2. Are slow (multiple LLM roundtrips per test)
+3. Are for manual validation, not regression testing
+
+To run manually:
+    RUN_BENCHMARK_TESTS=1 pytest test_baseline_vulnerability.py -v -s
 """
 
+import os
+
 import pytest
 
+pytestmark = pytest.mark.skipif(
+    os.environ.get("RUN_BENCHMARK_TESTS") != "1",
+    reason="E2E tests are skipped by default. Set RUN_BENCHMARK_TESTS=1 to run.",
+)
+
 from .conftest import INJECTION_TEST_CASES, check_vulnerability