fix

add comment
update
2026-02-09 02:33:34 +00:00 · 2024-09-17 19:20:32 -04:00 · 2024-09-17 19:19:22 -04:00 · 2024-09-17 19:18:11 -04:00 · 2024-09-17 19:01:36 -04:00 · 2024-09-17 19:01:08 -04:00
15 changed files with 195 additions and 53 deletions
--- a/.github/scripts/check_diff.py
+++ b/.github/scripts/check_diff.py
@@ -68,7 +68,9 @@ def dependents_graph() -> dict:

        # load extended deps from extended_testing_deps.txt
        package_path = Path(path).parent
-        extended_requirement_path = package_path / "extended_testing_deps.txt"
+        extended_requirement_path = (
+            package_path / "extended_dependencies" / "extended_testing_deps.txt"
+        )
        if extended_requirement_path.exists():
            with open(extended_requirement_path, "r") as f:
                extended_deps = f.read().splitlines()
--- a/.github/workflows/_extended_test.yml
+++ b/.github/workflows/_extended_test.yml
@@ -0,0 +1,73 @@
+name: Extended tests
+
+on:
+  workflow_dispatch:
+    inputs:
+      working-directory:
+        required: true
+        type: string
+        default: "libs/community"
+      python-version:
+        required: true
+        type: string
+        description: "Python version to use"
+        default: "3.11"
+      extended-deps-file:
+        required: true
+        type: choice
+        description: "File to install extended dependencies from"
+        options:
+          - extended_testing_deps.txt
+          - pdf_loader_deps.txt
+          - other_deps.txt
+
+env:
+  POETRY_VERSION: "1.7.1"
+
+jobs:
+  build:
+    defaults:
+      run:
+        working-directory: ${{ inputs.working-directory }}
+    runs-on: ubuntu-latest
+    name: Python ${{ inputs.python-version }}
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python ${{ inputs.python-version }} + Poetry ${{ env.POETRY_VERSION }}
+        uses: "./.github/actions/poetry_setup"
+        with:
+          python-version: ${{ inputs.python-version }}
+          poetry-version: ${{ env.POETRY_VERSION }}
+          working-directory: ${{ inputs.working-directory }}
+          cache-key: core
+
+      - name: Install extended dependencies
+        shell: bash
+        run: |
+          poetry install --with test,test_integration
+          poetry run pip install uv
+          poetry run uv pip install -r extended_dependencies/${{ inputs.extended-deps-file }}
+
+      - name: Install deps outside pyproject
+        if: ${{ startsWith(inputs.working-directory, 'libs/community/') }}
+        shell: bash
+        run: poetry run pip install "boto3<2" "google-cloud-aiplatform<2"
+
+      - name: Run extended tests
+        shell: bash
+        run: |
+          make test
+          make integration_tests
+
+      - name: Ensure the tests did not create any additional files
+        shell: bash
+        run: |
+          set -eu
+
+          STATUS="$(git status)"
+          echo "$STATUS"
+
+          # grep will exit non-zero if the target message isn't found,
+          # and `set -e` above will cause the step to fail.
+          echo "$STATUS" | grep 'nothing to commit, working tree clean'
--- a/.github/workflows/_integration_test.yml
+++ b/.github/workflows/_integration_test.yml
@@ -1,3 +1,6 @@
+# Ignore changes to this file. Hijacking just to allow
+# testing of workflow dispatch on new workflow off of branch.
+
 name: Integration tests

 on:
@@ -6,10 +9,20 @@ on:
      working-directory:
        required: true
        type: string
+        default: "libs/community"
      python-version:
        required: true
        type: string
        description: "Python version to use"
+        default: "3.11"
+      extended-deps-file:
+        required: true
+        type: choice
+        description: "File to install extended dependencies from"
+        options:
+          - extended_testing_deps.txt
+          - pdf_loader_deps.txt
+          - other_deps.txt

 env:
  POETRY_VERSION: "1.7.1"
@@ -32,60 +45,22 @@ jobs:
          working-directory: ${{ inputs.working-directory }}
          cache-key: core

-      - name: Install dependencies
+      - name: Install extended dependencies
        shell: bash
-        run: poetry install --with test,test_integration
+        run: |
+          poetry install --with test,test_integration
+          poetry run pip install uv
+          poetry run uv pip install -r extended_dependencies/${{ inputs.extended-deps-file }}

      - name: Install deps outside pyproject
        if: ${{ startsWith(inputs.working-directory, 'libs/community/') }}
        shell: bash
        run: poetry run pip install "boto3<2" "google-cloud-aiplatform<2"

-      - name: 'Authenticate to Google Cloud'
-        id: 'auth'
-        uses: google-github-actions/auth@v2
-        with:
-          credentials_json: '${{ secrets.GOOGLE_CREDENTIALS }}'
-
-      - name: Run integration tests
+      - name: Run extended tests
        shell: bash
-        env:
-          AI21_API_KEY: ${{ secrets.AI21_API_KEY }}
-          FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
-          GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
-          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
-          AZURE_OPENAI_API_VERSION: ${{ secrets.AZURE_OPENAI_API_VERSION }}
-          AZURE_OPENAI_API_BASE: ${{ secrets.AZURE_OPENAI_API_BASE }}
-          AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
-          AZURE_OPENAI_CHAT_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_CHAT_DEPLOYMENT_NAME }}
-          AZURE_OPENAI_LLM_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_LLM_DEPLOYMENT_NAME }}
-          AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME }}
-          MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
-          TOGETHER_API_KEY: ${{ secrets.TOGETHER_API_KEY }}
-          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-          GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
-          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
-          GOOGLE_SEARCH_API_KEY: ${{ secrets.GOOGLE_SEARCH_API_KEY }}
-          GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
-          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
-          EXA_API_KEY: ${{ secrets.EXA_API_KEY }}
-          NOMIC_API_KEY: ${{ secrets.NOMIC_API_KEY }}
-          WATSONX_APIKEY: ${{ secrets.WATSONX_APIKEY }}
-          WATSONX_PROJECT_ID: ${{ secrets.WATSONX_PROJECT_ID }}
-          PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }}
-          PINECONE_ENVIRONMENT: ${{ secrets.PINECONE_ENVIRONMENT }}
-          ASTRA_DB_API_ENDPOINT: ${{ secrets.ASTRA_DB_API_ENDPOINT }}
-          ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }}
-          ASTRA_DB_KEYSPACE: ${{ secrets.ASTRA_DB_KEYSPACE }}
-          ES_URL: ${{ secrets.ES_URL }}
-          ES_CLOUD_ID: ${{ secrets.ES_CLOUD_ID }}
-          ES_API_KEY: ${{ secrets.ES_API_KEY }}
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # for airbyte
-          MONGODB_ATLAS_URI: ${{ secrets.MONGODB_ATLAS_URI }}
-          VOYAGE_API_KEY: ${{ secrets.VOYAGE_API_KEY }}
-          COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
-          UPSTAGE_API_KEY: ${{ secrets.UPSTAGE_API_KEY }}
        run: |
+          make test
          make integration_tests

      - name: Ensure the tests did not create any additional files
--- a/.github/workflows/check_diffs.yml
+++ b/.github/workflows/check_diffs.yml
@@ -139,7 +139,7 @@ jobs:
          echo "Running extended tests, installing dependencies with poetry..."
          poetry install --with test
          poetry run pip install uv
-          poetry run uv pip install -r extended_testing_deps.txt
+          poetry run uv pip install -r extended_dependencies/extended_testing_deps.txt

      - name: Run extended tests
        run: make extended_tests
--- a/libs/community/Makefile
+++ b/libs/community/Makefile
@@ -19,7 +19,7 @@ test tests:
 	poetry run pytest --disable-socket --allow-unix-socket $(TEST_FILE)

 integration_tests:
-	poetry run pytest $(TEST_FILE)
+	poetry run pytest -m runs $(TEST_FILE)

 test_watch:
 	poetry run ptw --disable-socket --allow-unix-socket --snapshot-update --now . -- -vv tests/unit_tests
--- a/libs/community/extended_dependencies/extended_testing_deps.txt
+++ b/libs/community/extended_dependencies/extended_testing_deps.txt
@@ -0,0 +1,2 @@
+-r pdf_loader_deps.txt
+-r other_deps.txt
--- a/libs/community/extended_dependencies/other_deps.txt
+++ b/libs/community/extended_dependencies/other_deps.txt
@@ -54,7 +54,6 @@ openapi-pydantic>=0.3.2,<0.4
 oracle-ads>=2.9.1,<3
 oracledb>=2.2.0,<3
 pandas>=2.0.1,<3
-pdfminer-six>=20221105,<20240706
 pgvector>=0.1.6,<0.2
 praw>=7.7.1,<8
 premai>=0.3.25,<0.4
@@ -62,9 +61,6 @@ psychicapi>=0.8.0,<0.9
 pydantic>=2.7.4,<3
 py-trello>=0.19.0,<0.20
 pyjwt>=2.8.0,<3
-pymupdf>=1.22.3,<2
-pypdf>=3.4.0,<5
-pypdfium2>=4.10.0,<5
 pyspark>=3.4.0,<4
 rank-bm25>=0.2.2,<0.3
 rapidfuzz>=3.1.1,<4
--- a/libs/community/extended_dependencies/pdf_loader_deps.txt
+++ b/libs/community/extended_dependencies/pdf_loader_deps.txt
@@ -0,0 +1,4 @@
+pdfminer-six>=20221105,<20240706
+pymupdf>=1.22.3,<2
+pypdf>=3.4.0,<5
+pypdfium2>=4.10.0,<5
--- a/libs/community/pyproject.toml
+++ b/libs/community/pyproject.toml
@@ -63,6 +63,7 @@ addopts = "--strict-markers --strict-config --durations=5 --snapshot-warn-unused
 markers = [
    "requires: mark tests as requiring a specific library",
    "scheduled: mark tests to run in scheduled testing",
+    "runs: mark tests to run in CI",
    "compile: mark placeholder test used to compile integration tests without running them",
 ]
 asyncio_mode = "auto"
--- a/libs/community/tests/integration_tests/conftest.py
+++ b/libs/community/tests/integration_tests/conftest.py
@@ -1,6 +1,11 @@
-# Getting the absolute path of the current file's directory
+from importlib import util
 import os
+from typing import Dict, Sequence

+import pytest
+from pytest import Config, Function, Parser
+
+# Getting the absolute path of the current file's directory
 ABS_PATH = os.path.dirname(os.path.abspath(__file__))

 # Getting the absolute path of the project's root directory
@@ -17,3 +22,83 @@ def _load_env() -> None:


 _load_env()
+
+def pytest_addoption(parser: Parser) -> None:
+    """Add custom command line options to pytest."""
+    parser.addoption(
+        "--only-extended",
+        action="store_true",
+        help="Only run extended tests. Does not allow skipping any extended tests.",
+    )
+    parser.addoption(
+        "--only-core",
+        action="store_true",
+        help="Only run core tests. Never runs any extended tests.",
+    )
+
+
+def pytest_collection_modifyitems(config: Config, items: Sequence[Function]) -> None:
+    """Add implementations for handling custom markers.
+
+    At the moment, this adds support for a custom `requires` marker.
+
+    The `requires` marker is used to denote tests that require one or more packages
+    to be installed to run. If the package is not installed, the test is skipped.
+
+    The `requires` marker syntax is:
+
+    .. code-block:: python
+
+        @pytest.mark.requires("package1", "package2")
+        def test_something():
+            ...
+    """
+    # Mapping from the name of a package to whether it is installed or not.
+    # Used to avoid repeated calls to `util.find_spec`
+    required_pkgs_info: Dict[str, bool] = {}
+
+    only_extended = config.getoption("--only-extended") or False
+    only_core = config.getoption("--only-core") or False
+
+    if only_extended and only_core:
+        raise ValueError("Cannot specify both `--only-extended` and `--only-core`.")
+
+    for item in items:
+        requires_marker = item.get_closest_marker("requires")
+        if requires_marker is not None:
+            if only_core:
+                item.add_marker(pytest.mark.skip(reason="Skipping not a core test."))
+                continue
+
+            # Iterate through the list of required packages
+            required_pkgs = requires_marker.args
+            for pkg in required_pkgs:
+                # If we haven't yet checked whether the pkg is installed
+                # let's check it and store the result.
+                if pkg not in required_pkgs_info:
+                    try:
+                        installed = util.find_spec(pkg) is not None
+                    except Exception:
+                        installed = False
+                    required_pkgs_info[pkg] = installed
+
+                if not required_pkgs_info[pkg]:
+                    if only_extended:
+                        pytest.fail(
+                            f"Package `{pkg}` is not installed but is required for "
+                            f"extended tests. Please install the given package and "
+                            f"try again.",
+                        )
+
+                    else:
+                        # If the package is not installed, we immediately break
+                        # and mark the test as skipped.
+                        item.add_marker(
+                            pytest.mark.skip(reason=f"Requires pkg: `{pkg}`")
+                        )
+                        break
+        else:
+            if only_extended:
+                item.add_marker(
+                    pytest.mark.skip(reason="Skipping not an extended test.")
+                )
--- a/libs/community/tests/integration_tests/document_loaders/test_pdf.py
+++ b/libs/community/tests/integration_tests/document_loaders/test_pdf.py
@@ -86,6 +86,8 @@ def test_pdfminer_pdf_as_html_loader() -> None:
    assert len(docs) == 1


+@pytest.mark.runs
+@pytest.mark.requires("pypdf")
 def test_pypdf_loader() -> None:
    """Test PyPDFLoader."""
    file_path = Path(__file__).parent.parent / "examples/hello.pdf"
@@ -101,6 +103,8 @@ def test_pypdf_loader() -> None:
    assert len(docs) == 16


+@pytest.mark.runs
+@pytest.mark.requires("pypdf")
 def test_pypdf_loader_with_layout() -> None:
    """Test PyPDFLoader with layout mode."""
    file_path = Path(__file__).parent.parent / "examples/layout-parser-paper.pdf"
--- a/libs/core/extended_dependencies/extended_testing_deps.txt
+++ b/libs/core/extended_dependencies/extended_testing_deps.txt
--- a/libs/experimental/extended_dependencies/extended_testing_deps.txt
+++ b/libs/experimental/extended_dependencies/extended_testing_deps.txt
--- a/libs/langchain/extended_dependencies/extended_testing_deps.txt
+++ b/libs/langchain/extended_dependencies/extended_testing_deps.txt
--- a/libs/text-splitters/extended_dependencies/extended_testing_deps.txt
+++ b/libs/text-splitters/extended_dependencies/extended_testing_deps.txt
Author	SHA1	Message	Date
Chester Curme	fdbd9f6eba	fix	2024-09-17 19:20:32 -04:00
Chester Curme	9786f01106	add comment	2024-09-17 19:19:22 -04:00
Chester Curme	0dfe63af2a	update	2024-09-17 19:18:11 -04:00
Chester Curme	5dfc2a89f7	temporarily hijack integration test job to test workflow dispatch off branch	2024-09-17 19:01:36 -04:00
Chester Curme	22bee8147f	add pytest marker	2024-09-17 19:01:08 -04:00
Chester Curme	4706541307	Revert "move some pdf integration tests to extended tests" This reverts commit `48ca84dcfa`.	2024-09-17 18:53:29 -04:00
Chester Curme	d7667da26d	add extended test workflow	2024-09-17 17:55:08 -04:00
Chester Curme	58e7175262	Revert "temporarily hijack integration test job to test workflow dispatch off branch" This reverts commit `fe832bef26`.	2024-09-17 17:54:04 -04:00
Chester Curme	9ef23df3f8	Revert "temporarily raise assertion error to check that tests run" This reverts commit `8f78650181`.	2024-09-17 17:53:15 -04:00
Chester Curme	8f78650181	temporarily raise assertion error to check that tests run	2024-09-17 17:50:41 -04:00
Chester Curme	48ca84dcfa	move some pdf integration tests to extended tests	2024-09-17 17:50:23 -04:00
Chester Curme	fe832bef26	temporarily hijack integration test job to test workflow dispatch off branch	2024-09-17 17:30:57 -04:00
Chester Curme	74a6079992	move all extended_testing_deps.txt	2024-09-17 16:38:49 -04:00
Chester Curme	32e90ce912	reorganize community deps	2024-09-17 16:34:28 -04:00