Compare commits

...

14 Commits

Author SHA1 Message Date
Chester Curme
fdbd9f6eba fix 2024-09-17 19:20:32 -04:00
Chester Curme
9786f01106 add comment 2024-09-17 19:19:22 -04:00
Chester Curme
0dfe63af2a update 2024-09-17 19:18:11 -04:00
Chester Curme
5dfc2a89f7 temporarily hijack integration test job to test workflow dispatch off branch 2024-09-17 19:01:36 -04:00
Chester Curme
22bee8147f add pytest marker 2024-09-17 19:01:08 -04:00
Chester Curme
4706541307 Revert "move some pdf integration tests to extended tests"
This reverts commit 48ca84dcfa.
2024-09-17 18:53:29 -04:00
Chester Curme
d7667da26d add extended test workflow 2024-09-17 17:55:08 -04:00
Chester Curme
58e7175262 Revert "temporarily hijack integration test job to test workflow dispatch off branch"
This reverts commit fe832bef26.
2024-09-17 17:54:04 -04:00
Chester Curme
9ef23df3f8 Revert "temporarily raise assertion error to check that tests run"
This reverts commit 8f78650181.
2024-09-17 17:53:15 -04:00
Chester Curme
8f78650181 temporarily raise assertion error to check that tests run 2024-09-17 17:50:41 -04:00
Chester Curme
48ca84dcfa move some pdf integration tests to extended tests 2024-09-17 17:50:23 -04:00
Chester Curme
fe832bef26 temporarily hijack integration test job to test workflow dispatch off branch 2024-09-17 17:30:57 -04:00
Chester Curme
74a6079992 move all extended_testing_deps.txt 2024-09-17 16:38:49 -04:00
Chester Curme
32e90ce912 reorganize community deps 2024-09-17 16:34:28 -04:00
15 changed files with 195 additions and 53 deletions

View File

@@ -68,7 +68,9 @@ def dependents_graph() -> dict:
# load extended deps from extended_testing_deps.txt
package_path = Path(path).parent
extended_requirement_path = package_path / "extended_testing_deps.txt"
extended_requirement_path = (
package_path / "extended_dependencies" / "extended_testing_deps.txt"
)
if extended_requirement_path.exists():
with open(extended_requirement_path, "r") as f:
extended_deps = f.read().splitlines()

73
.github/workflows/_extended_test.yml vendored Normal file
View File

@@ -0,0 +1,73 @@
name: Extended tests
on:
workflow_dispatch:
inputs:
working-directory:
required: true
type: string
default: "libs/community"
python-version:
required: true
type: string
description: "Python version to use"
default: "3.11"
extended-deps-file:
required: true
type: choice
description: "File to install extended dependencies from"
options:
- extended_testing_deps.txt
- pdf_loader_deps.txt
- other_deps.txt
env:
POETRY_VERSION: "1.7.1"
jobs:
build:
defaults:
run:
working-directory: ${{ inputs.working-directory }}
runs-on: ubuntu-latest
name: Python ${{ inputs.python-version }}
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ inputs.python-version }} + Poetry ${{ env.POETRY_VERSION }}
uses: "./.github/actions/poetry_setup"
with:
python-version: ${{ inputs.python-version }}
poetry-version: ${{ env.POETRY_VERSION }}
working-directory: ${{ inputs.working-directory }}
cache-key: core
- name: Install extended dependencies
shell: bash
run: |
poetry install --with test,test_integration
poetry run pip install uv
poetry run uv pip install -r extended_dependencies/${{ inputs.extended-deps-file }}
- name: Install deps outside pyproject
if: ${{ startsWith(inputs.working-directory, 'libs/community/') }}
shell: bash
run: poetry run pip install "boto3<2" "google-cloud-aiplatform<2"
- name: Run extended tests
shell: bash
run: |
make test
make integration_tests
- name: Ensure the tests did not create any additional files
shell: bash
run: |
set -eu
STATUS="$(git status)"
echo "$STATUS"
# grep will exit non-zero if the target message isn't found,
# and `set -e` above will cause the step to fail.
echo "$STATUS" | grep 'nothing to commit, working tree clean'

View File

@@ -1,3 +1,6 @@
# Ignore changes to this file. Hijacking just to allow
# testing of workflow dispatch on new workflow off of branch.
name: Integration tests
on:
@@ -6,10 +9,20 @@ on:
working-directory:
required: true
type: string
default: "libs/community"
python-version:
required: true
type: string
description: "Python version to use"
default: "3.11"
extended-deps-file:
required: true
type: choice
description: "File to install extended dependencies from"
options:
- extended_testing_deps.txt
- pdf_loader_deps.txt
- other_deps.txt
env:
POETRY_VERSION: "1.7.1"
@@ -32,60 +45,22 @@ jobs:
working-directory: ${{ inputs.working-directory }}
cache-key: core
- name: Install dependencies
- name: Install extended dependencies
shell: bash
run: poetry install --with test,test_integration
run: |
poetry install --with test,test_integration
poetry run pip install uv
poetry run uv pip install -r extended_dependencies/${{ inputs.extended-deps-file }}
- name: Install deps outside pyproject
if: ${{ startsWith(inputs.working-directory, 'libs/community/') }}
shell: bash
run: poetry run pip install "boto3<2" "google-cloud-aiplatform<2"
- name: 'Authenticate to Google Cloud'
id: 'auth'
uses: google-github-actions/auth@v2
with:
credentials_json: '${{ secrets.GOOGLE_CREDENTIALS }}'
- name: Run integration tests
- name: Run extended tests
shell: bash
env:
AI21_API_KEY: ${{ secrets.AI21_API_KEY }}
FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
AZURE_OPENAI_API_VERSION: ${{ secrets.AZURE_OPENAI_API_VERSION }}
AZURE_OPENAI_API_BASE: ${{ secrets.AZURE_OPENAI_API_BASE }}
AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
AZURE_OPENAI_CHAT_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_CHAT_DEPLOYMENT_NAME }}
AZURE_OPENAI_LLM_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_LLM_DEPLOYMENT_NAME }}
AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME }}
MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
TOGETHER_API_KEY: ${{ secrets.TOGETHER_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
GOOGLE_SEARCH_API_KEY: ${{ secrets.GOOGLE_SEARCH_API_KEY }}
GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
EXA_API_KEY: ${{ secrets.EXA_API_KEY }}
NOMIC_API_KEY: ${{ secrets.NOMIC_API_KEY }}
WATSONX_APIKEY: ${{ secrets.WATSONX_APIKEY }}
WATSONX_PROJECT_ID: ${{ secrets.WATSONX_PROJECT_ID }}
PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }}
PINECONE_ENVIRONMENT: ${{ secrets.PINECONE_ENVIRONMENT }}
ASTRA_DB_API_ENDPOINT: ${{ secrets.ASTRA_DB_API_ENDPOINT }}
ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }}
ASTRA_DB_KEYSPACE: ${{ secrets.ASTRA_DB_KEYSPACE }}
ES_URL: ${{ secrets.ES_URL }}
ES_CLOUD_ID: ${{ secrets.ES_CLOUD_ID }}
ES_API_KEY: ${{ secrets.ES_API_KEY }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # for airbyte
MONGODB_ATLAS_URI: ${{ secrets.MONGODB_ATLAS_URI }}
VOYAGE_API_KEY: ${{ secrets.VOYAGE_API_KEY }}
COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
UPSTAGE_API_KEY: ${{ secrets.UPSTAGE_API_KEY }}
run: |
make test
make integration_tests
- name: Ensure the tests did not create any additional files

View File

@@ -139,7 +139,7 @@ jobs:
echo "Running extended tests, installing dependencies with poetry..."
poetry install --with test
poetry run pip install uv
poetry run uv pip install -r extended_testing_deps.txt
poetry run uv pip install -r extended_dependencies/extended_testing_deps.txt
- name: Run extended tests
run: make extended_tests

View File

@@ -19,7 +19,7 @@ test tests:
poetry run pytest --disable-socket --allow-unix-socket $(TEST_FILE)
integration_tests:
poetry run pytest $(TEST_FILE)
poetry run pytest -m runs $(TEST_FILE)
test_watch:
poetry run ptw --disable-socket --allow-unix-socket --snapshot-update --now . -- -vv tests/unit_tests

View File

@@ -0,0 +1,2 @@
-r pdf_loader_deps.txt
-r other_deps.txt

View File

@@ -54,7 +54,6 @@ openapi-pydantic>=0.3.2,<0.4
oracle-ads>=2.9.1,<3
oracledb>=2.2.0,<3
pandas>=2.0.1,<3
pdfminer-six>=20221105,<20240706
pgvector>=0.1.6,<0.2
praw>=7.7.1,<8
premai>=0.3.25,<0.4
@@ -62,9 +61,6 @@ psychicapi>=0.8.0,<0.9
pydantic>=2.7.4,<3
py-trello>=0.19.0,<0.20
pyjwt>=2.8.0,<3
pymupdf>=1.22.3,<2
pypdf>=3.4.0,<5
pypdfium2>=4.10.0,<5
pyspark>=3.4.0,<4
rank-bm25>=0.2.2,<0.3
rapidfuzz>=3.1.1,<4

View File

@@ -0,0 +1,4 @@
pdfminer-six>=20221105,<20240706
pymupdf>=1.22.3,<2
pypdf>=3.4.0,<5
pypdfium2>=4.10.0,<5

View File

@@ -63,6 +63,7 @@ addopts = "--strict-markers --strict-config --durations=5 --snapshot-warn-unused
markers = [
"requires: mark tests as requiring a specific library",
"scheduled: mark tests to run in scheduled testing",
"runs: mark tests to run in CI",
"compile: mark placeholder test used to compile integration tests without running them",
]
asyncio_mode = "auto"

View File

@@ -1,6 +1,11 @@
# Getting the absolute path of the current file's directory
from importlib import util
import os
from typing import Dict, Sequence
import pytest
from pytest import Config, Function, Parser
# Getting the absolute path of the current file's directory
ABS_PATH = os.path.dirname(os.path.abspath(__file__))
# Getting the absolute path of the project's root directory
@@ -17,3 +22,83 @@ def _load_env() -> None:
_load_env()
def pytest_addoption(parser: Parser) -> None:
"""Add custom command line options to pytest."""
parser.addoption(
"--only-extended",
action="store_true",
help="Only run extended tests. Does not allow skipping any extended tests.",
)
parser.addoption(
"--only-core",
action="store_true",
help="Only run core tests. Never runs any extended tests.",
)
def pytest_collection_modifyitems(config: Config, items: Sequence[Function]) -> None:
"""Add implementations for handling custom markers.
At the moment, this adds support for a custom `requires` marker.
The `requires` marker is used to denote tests that require one or more packages
to be installed to run. If the package is not installed, the test is skipped.
The `requires` marker syntax is:
.. code-block:: python
@pytest.mark.requires("package1", "package2")
def test_something():
...
"""
# Mapping from the name of a package to whether it is installed or not.
# Used to avoid repeated calls to `util.find_spec`
required_pkgs_info: Dict[str, bool] = {}
only_extended = config.getoption("--only-extended") or False
only_core = config.getoption("--only-core") or False
if only_extended and only_core:
raise ValueError("Cannot specify both `--only-extended` and `--only-core`.")
for item in items:
requires_marker = item.get_closest_marker("requires")
if requires_marker is not None:
if only_core:
item.add_marker(pytest.mark.skip(reason="Skipping not a core test."))
continue
# Iterate through the list of required packages
required_pkgs = requires_marker.args
for pkg in required_pkgs:
# If we haven't yet checked whether the pkg is installed
# let's check it and store the result.
if pkg not in required_pkgs_info:
try:
installed = util.find_spec(pkg) is not None
except Exception:
installed = False
required_pkgs_info[pkg] = installed
if not required_pkgs_info[pkg]:
if only_extended:
pytest.fail(
f"Package `{pkg}` is not installed but is required for "
f"extended tests. Please install the given package and "
f"try again.",
)
else:
# If the package is not installed, we immediately break
# and mark the test as skipped.
item.add_marker(
pytest.mark.skip(reason=f"Requires pkg: `{pkg}`")
)
break
else:
if only_extended:
item.add_marker(
pytest.mark.skip(reason="Skipping not an extended test.")
)

View File

@@ -86,6 +86,8 @@ def test_pdfminer_pdf_as_html_loader() -> None:
assert len(docs) == 1
@pytest.mark.runs
@pytest.mark.requires("pypdf")
def test_pypdf_loader() -> None:
"""Test PyPDFLoader."""
file_path = Path(__file__).parent.parent / "examples/hello.pdf"
@@ -101,6 +103,8 @@ def test_pypdf_loader() -> None:
assert len(docs) == 16
@pytest.mark.runs
@pytest.mark.requires("pypdf")
def test_pypdf_loader_with_layout() -> None:
"""Test PyPDFLoader with layout mode."""
file_path = Path(__file__).parent.parent / "examples/layout-parser-paper.pdf"