multiple: get rid of pyproject extras (#22581)

They cause `poetry lock` to take a ton of time, and `uv pip install` can resolve the constraints from these toml files in trivial time (addressing problem with #19153) This allows us to properly upgrade lockfile dependencies moving forward, which revealed some issues that were either fixed or type-ignored (see file comments)
2025-09-18 16:16:33 +00:00 · 2024-06-06 15:45:22 -07:00
parent 4367e89c9a
commit a24a9c6427
34 changed files with 3522 additions and 12444 deletions
--- a/libs/community/extended_testing_deps.txt
+++ b/libs/community/extended_testing_deps.txt
@@ -0,0 +1,86 @@
+aiosqlite>=0.19.0,<0.20
+aleph-alpha-client>=2.15.0,<3
+anthropic>=0.3.11,<0.4
+arxiv>=1.4,<2
+assemblyai>=0.17.0,<0.18
+atlassian-python-api>=3.36.0,<4
+azure-ai-documentintelligence>=1.0.0b1,<2
+azure-identity>=1.15.0,<2
+azure-search-documents==11.4.0
+beautifulsoup4>=4,<5
+bibtexparser>=1.4.0,<2
+cassio>=0.1.6,<0.2
+chardet>=5.1.0,<6
+cloudpathlib>=0.18,<0.19
+cloudpickle>=2.0.0
+cohere>=4,<6
+databricks-vectorsearch>=0.21,<0.22
+datasets>=2.15.0,<3
+dgml-utils>=0.3.0,<0.4
+elasticsearch>=8.12.0,<9
+esprima>=4.0.1,<5
+faiss-cpu>=1,<2
+feedparser>=6.0.10,<7
+fireworks-ai>=0.9.0,<0.10
+friendli-client>=1.2.4,<2
+geopandas>=0.13.1,<0.14
+gitpython>=3.1.32,<4
+google-cloud-documentai>=2.20.1,<3
+gql>=3.4.1,<4
+gradientai>=1.4.0,<2
+hdbcli>=2.19.21,<3
+hologres-vector==0.0.6
+html2text>=2020.1.16
+httpx>=0.24.1,<0.25
+httpx-sse>=0.4.0,<0.5
+javelin-sdk>=0.1.8,<0.2
+jinja2>=3,<4
+jq>=1.4.1,<2
+jsonschema>1
+lxml>=4.9.3,<6.0
+markdownify>=0.11.6,<0.12
+motor>=3.3.1,<4
+msal>=1.25.0,<2
+mwparserfromhell>=0.6.4,<0.7
+mwxml>=0.3.3,<0.4
+newspaper3k>=0.2.8,<0.3
+numexpr>=2.8.6,<3
+nvidia-riva-client>=2.14.0,<3
+oci>=2.119.1,<3
+openai<2
+openapi-pydantic>=0.3.2,<0.4
+oracle-ads>=2.9.1,<3
+oracledb>=2.2.0,<3
+pandas>=2.0.1,<3
+pdfminer-six>=20221105
+pgvector>=0.1.6,<0.2
+praw>=7.7.1,<8
+premai>=0.3.25,<0.4
+psychicapi>=0.8.0,<0.9
+py-trello>=0.19.0,<0.20
+pyjwt>=2.8.0,<3
+pymupdf>=1.22.3,<2
+pypdf>=3.4.0,<4
+pypdfium2>=4.10.0,<5
+pyspark>=3.4.0,<4
+rank-bm25>=0.2.2,<0.3
+rapidfuzz>=3.1.1,<4
+rapidocr-onnxruntime>=1.3.2,<2
+rdflib==7.0.0
+requests-toolbelt>=1.0.0,<2
+rspace_client>=2.5.0,<3
+scikit-learn>=1.2.2,<2
+simsimd>=4.3.1,<5
+sqlite-vss>=0.1.2,<0.2
+streamlit>=1.18.0,<2
+sympy>=1.12,<2
+telethon>=1.28.5,<2
+tidb-vector>=0.0.3,<1.0.0
+timescale-vector==0.0.1
+tqdm>=4.48.0
+tree-sitter>=0.20.2,<0.21
+tree-sitter-languages>=1.8.0,<2
+upstash-redis>=0.15.0,<0.16
+vdms==0.0.20
+xata>=1.0.0a7,<2
+xmltodict>=0.13.0,<0.14
--- a/libs/community/langchain_community/callbacks/tracers/wandb.py
+++ b/libs/community/langchain_community/callbacks/tracers/wandb.py
@@ -75,7 +75,7 @@ class RunProcessor:
        :return: The converted Span.
        """
        attributes = {**run.extra} if run.extra else {}
-        attributes["execution_order"] = run.execution_order
+        attributes["execution_order"] = run.execution_order  # type: ignore

        return self.trace_tree.Span(
            span_id=str(run.id) if run.id is not None else None,
--- a/libs/community/langchain_community/tools/nuclia/tool.py
+++ b/libs/community/langchain_community/tools/nuclia/tool.py
@@ -220,7 +220,7 @@ class NucliaUnderstandingAPI(BaseTool):
                data = MessageToJson(
                    pb,
                    preserving_proto_field_name=True,
-                    including_default_value_fields=True,
+                    including_default_value_fields=True,  # type: ignore
                )
                self._results[matching_id]["data"] = data

--- a/libs/community/langchain_community/vectorstores/docarray/base.py
+++ b/libs/community/langchain_community/vectorstores/docarray/base.py
@@ -28,7 +28,7 @@ def _check_docarray_import() -> None:
    except ImportError:
        raise ImportError(
            "Could not import docarray python package. "
-            'Please install it with `pip install "langchain[docarray]"`.'
+            "Please install it with `pip install docarray`."
        )


--- a/libs/community/langchain_community/vectorstores/docarray/hnsw.py
+++ b/libs/community/langchain_community/vectorstores/docarray/hnsw.py
@@ -14,7 +14,7 @@ class DocArrayHnswSearch(DocArrayIndex):
    """`HnswLib` storage using `DocArray` package.

    To use it, you should have the ``docarray`` package with version >=0.32.0 installed.
-    You can install it with `pip install "docarray[hnswlib]"`.
+    You can install it with `pip install docarray`.
    """

    @classmethod
--- a/libs/community/langchain_community/vectorstores/docarray/in_memory.py
+++ b/libs/community/langchain_community/vectorstores/docarray/in_memory.py
@@ -15,7 +15,7 @@ class DocArrayInMemorySearch(DocArrayIndex):
    """In-memory `DocArray` storage for exact search.

    To use it, you should have the ``docarray`` package with version >=0.32.0 installed.
-    You can install it with `pip install "langchain[docarray]"`.
+    You can install it with `pip install docarray`.
    """

    @classmethod
--- a/libs/community/poetry.lock
+++ b/libs/community/poetry.lock
--- a/libs/community/pyproject.toml
+++ b/libs/community/pyproject.toml
@@ -19,93 +19,6 @@ aiohttp = "^3.8.3"
 tenacity = "^8.1.0"
 dataclasses-json = ">= 0.5.7, < 0.7"
 langsmith = "^0.1.0"
-tqdm = {version = ">=4.48.0", optional = true}
-openapi-pydantic = {version = "^0.3.2", optional = true}
-faiss-cpu = {version = "^1", optional = true}
-beautifulsoup4 = {version = "^4", optional = true}
-jinja2 = {version = "^3", optional = true}
-cohere = {version = "^4", optional = true}
-openai = {version = "<2", optional = true}
-arxiv = {version = "^1.4", optional = true}
-pypdf = {version = "^3.4.0", optional = true}
-aleph-alpha-client = {version="^2.15.0", optional = true}
-gradientai = {version="^1.4.0", optional = true}
-pgvector = {version = "^0.1.6", optional = true}
-atlassian-python-api = {version = "^3.36.0", optional=true}
-html2text = {version="^2020.1.16", optional=true}
-numexpr = {version="^2.8.6", optional=true}
-jq = {version = "^1.4.1", optional = true}
-pdfminer-six = {version = "^20221105", optional = true}
-lxml = {version = ">=4.9.3,<6.0", optional = true}
-pymupdf = {version = "^1.22.3", optional = true}
-rapidocr-onnxruntime = {version = "^1.3.2", optional = true, python = ">=3.8.1,<3.12"}
-pypdfium2 = {version = "^4.10.0", optional = true}
-gql = {version = "^3.4.1", optional = true}
-pandas = {version = "^2.0.1", optional = true}
-telethon = {version = "^1.28.5", optional = true}
-chardet = {version="^5.1.0", optional=true}
-requests-toolbelt = {version = "^1.0.0", optional = true}
-scikit-learn = {version = "^1.2.2", optional = true}
-py-trello = {version = "^0.19.0", optional = true}
-bibtexparser = {version = "^1.4.0", optional = true}
-pyspark = {version = "^3.4.0", optional = true}
-mwparserfromhell = {version = "^0.6.4", optional = true}
-mwxml = {version = "^0.3.3", optional = true}
-esprima = {version = "^4.0.1", optional = true}
-streamlit = {version = "^1.18.0", optional = true, python = ">=3.8.1,<3.9.7 || >3.9.7,<4.0"}
-psychicapi = {version = "^0.8.0", optional = true}
-cassio = {version = "^0.1.6", optional = true}
-sympy = {version = "^1.12", optional = true}
-rapidfuzz = {version = "^3.1.1", optional = true}
-jsonschema = {version = ">1", optional = true}
-rank-bm25 = {version = "^0.2.2", optional = true}
-geopandas = {version = "^0.13.1", optional = true}
-gitpython = {version = "^3.1.32", optional = true}
-feedparser = {version = "^6.0.10", optional = true}
-newspaper3k = {version = "^0.2.8", optional = true}
-xata = {version = "^1.0.0a7", optional = true}
-xmltodict = {version = "^0.13.0", optional = true}
-markdownify = {version = "^0.11.6", optional = true}
-assemblyai = {version = "^0.17.0", optional = true}
-sqlite-vss = {version = "^0.1.2", optional = true}
-motor = {version = "^3.3.1", optional = true}
-timescale-vector = {version = "^0.0.1", optional = true}
-typer = {version= "^0.9.0", optional = true}
-anthropic = {version = "^0.3.11", optional = true}
-aiosqlite = {version = "^0.19.0", optional = true}
-rspace_client = {version = "^2.5.0", optional = true}
-upstash-redis = {version = "^0.15.0", optional = true}
-google-cloud-documentai = {version = "^2.20.1", optional = true}
-fireworks-ai = {version = "^0.9.0", optional = true}
-javelin-sdk = {version = "^0.1.8", optional = true}
-hologres-vector = {version = "^0.0.6", optional = true}
-praw = {version = "^7.7.1", optional = true}
-msal = {version = "^1.25.0", optional = true}
-databricks-vectorsearch = {version = "^0.21", optional = true}
-cloudpickle = {version = ">=2.0.0", optional = true}
-dgml-utils = {version = "^0.3.0", optional = true}
-datasets = {version = "^2.15.0", optional = true}
-tree-sitter = {version = "^0.20.2", optional = true}
-tree-sitter-languages = {version = "^1.8.0", optional = true}
-azure-ai-documentintelligence = {version = "^1.0.0b1", optional = true}
-oracle-ads = {version = "^2.9.1", optional = true}
-httpx = {version = "^0.24.1", optional = true}
-elasticsearch = {version = "^8.12.0", optional = true}
-hdbcli = {version = "^2.19.21", optional = true}
-oci = {version = "^2.119.1", optional = true}
-rdflib = {version = "7.0.0", optional = true}
-nvidia-riva-client = {version = "^2.14.0", optional = true}
-azure-search-documents = {version = "11.4.0", optional = true}
-azure-identity = {version = "^1.15.0", optional = true}
-tidb-vector = {version = ">=0.0.3,<1.0.0", optional = true}
-friendli-client = {version = "^1.2.4", optional = true}
-premai = {version = "^0.3.25", optional = true}
-vdms = {version = "^0.0.20", optional = true}
-httpx-sse = {version = "^0.4.0", optional = true}
-pyjwt = {version = "^2.8.0", optional = true}
-oracledb = {version = "^2.2.0", optional = true}
-cloudpathlib = { version = "^0.18", optional = true }
-simsimd = {version = "^4.3.1", optional = true}


 [tool.poetry.group.test]
@@ -125,12 +38,12 @@ responses = "^0.22.0"
 pytest-asyncio = "^0.20.3"
 lark = "^1.1.5"
 pandas = "^2.0.0"
-pytest-mock  = "^3.10.0"
+pytest-mock = "^3.10.0"
 pytest-socket = "^0.6.0"
 syrupy = "^4.0.2"
 requests-mock = "^1.11.0"
-langchain-core = {path = "../core", develop = true}
-langchain = {path = "../langchain", develop = true}
+langchain-core = { path = "../core", develop = true }
+langchain = { path = "../langchain", develop = true }

 [tool.poetry.group.codespell]
 optional = true
@@ -143,19 +56,7 @@ optional = true

 [tool.poetry.group.test_integration.dependencies]
 # Do not add dependencies in the test_integration group
-# Instead:
-# 1. Add an optional dependency to the main group
-#       poetry add --optional [package name]
-# 2. Add the package name to the extended_testing extra (find it below)
-# 3. Relock the poetry file
-#       poetry lock --no-update
-# 4. Favor unit tests not integration tests.
-#    Use the @pytest.mark.requires(pkg_name) decorator in unit_tests.
-#    Your tests should not rely on network access, as it prevents other
-#    developers from being able to easily run them.
-#    Instead write unit tests that use the `responses` library or mock.patch with
-#    fixtures. Keep the fixtures minimal.
-# See Contributing Guide for more instructions on working with optional dependencies.
+# Instead read the following link:
 # https://python.langchain.com/docs/contributing/code#working-with-optional-dependencies
 pytest-vcr = "^1.0.2"
 wrapt = "^1.15.0"
@@ -165,7 +66,7 @@ cassio = "^0.1.6"
 tiktoken = ">=0.3.2,<0.6.0"
 anthropic = "^0.3.11"
 langchain-core = { path = "../core", develop = true }
-langchain = {path = "../langchain", develop = true}
+langchain = { path = "../langchain", develop = true }
 fireworks-ai = "^0.9.0"
 vdms = "^0.0.20"
 exllamav2 = "^0.0.18"
@@ -185,9 +86,9 @@ types-pytz = "^2023.3.0.0"
 types-chardet = "^5.0.4.6"
 types-redis = "^4.3.21.6"
 mypy-protobuf = "^3.0.0"
-langchain-core = {path = "../core", develop = true}
-langchain-text-splitters = {path = "../text-splitters", develop = true}
-langchain = {path = "../langchain", develop = true}
+langchain-core = { path = "../core", develop = true }
+langchain-text-splitters = { path = "../text-splitters", develop = true }
+langchain = { path = "../langchain", develop = true }

 [tool.poetry.group.dev]
 optional = true
@@ -195,104 +96,7 @@ optional = true
 [tool.poetry.group.dev.dependencies]
 jupyter = "^1.0.0"
 setuptools = "^67.6.1"
-langchain-core = {path = "../core", develop = true}
-
-[tool.poetry.extras]
-cli = ["typer"]
-
-# An extra used to be able to add extended testing.
-# Please use new-line on formatting to make it easier to add new packages without
-# merge-conflicts
-extended_testing = [
- "aleph-alpha-client",
- "aiosqlite",
- "assemblyai",
- "beautifulsoup4",
- "bibtexparser",
- "cassio",
- "chardet",
- "datasets",
- "google-cloud-documentai",
- "esprima",
- "jq",
- "pdfminer-six",
- "pgvector",
- "pypdf",
- "pymupdf",
- "pypdfium2",
- "tqdm",
- "lxml",
- "atlassian-python-api",
- "mwparserfromhell",
- "mwxml",
- "msal",
- "pandas",
- "telethon",
- "psychicapi",
- "gql",
- "gradientai",
- "requests-toolbelt",
- "html2text",
- "numexpr",
- "py-trello",
- "scikit-learn",
- "streamlit",
- "pyspark",
- "openai",
- "sympy",
- "rapidfuzz",
- "jsonschema",
- "rank-bm25",
- "geopandas",
- "jinja2",
- "gitpython",
- "newspaper3k",
- "nvidia-riva-client",
- "feedparser",
- "xata",
- "xmltodict",
- "faiss-cpu",
- "openapi-pydantic",
- "markdownify",
- "arxiv",
- "sqlite-vss",
- "rapidocr-onnxruntime",
- "motor",
- "timescale-vector",
- "anthropic",
- "upstash-redis",
- "rspace_client",
- "fireworks-ai",
- "javelin-sdk",
- "hologres-vector",
- "praw",
- "databricks-vectorsearch",
- "cloudpickle",
- "cloudpathlib",
- "dgml-utils",
- "cohere",
- "tree-sitter",
- "tree-sitter-languages",
- "azure-ai-documentintelligence",
- "oracle-ads",
- "httpx",
- "elasticsearch",
- "hdbcli",
- "oci",
- "rdflib",
- "azure-search-documents",
- "azure-identity",
- "tidb-vector",
- "cloudpickle",
- "friendli-client",
- "premai",
- "vdms",
- "httpx-sse",
- "pyjwt",
- "oracledb",
- "simsimd",
- "aiosqlite"
-]
+langchain-core = { path = "../core", develop = true }

 [tool.ruff]
 exclude = [
@@ -302,9 +106,9 @@ exclude = [

 [tool.ruff.lint]
 select = [
-  "E",  # pycodestyle
-  "F",  # pyflakes
-  "I",  # isort
+  "E",    # pycodestyle
+  "F",    # pyflakes
+  "I",    # isort
  "T201", # print
 ]

@@ -314,9 +118,7 @@ disallow_untyped_defs = "True"
 exclude = ["notebooks", "examples", "example_data"]

 [tool.coverage.run]
-omit = [
-    "tests/*",
-]
+omit = ["tests/*"]

 [build-system]
 requires = ["poetry-core>=1.0.0"]
@@ -338,7 +140,7 @@ addopts = "--strict-markers --strict-config --durations=5 --snapshot-warn-unused
 markers = [
  "requires: mark tests as requiring a specific library",
  "scheduled: mark tests to run in scheduled testing",
-  "compile: mark placeholder test used to compile integration tests without running them"
+  "compile: mark placeholder test used to compile integration tests without running them",
 ]
 asyncio_mode = "auto"

--- a/libs/community/tests/integration_tests/vectorstores/test_vectara.py
+++ b/libs/community/tests/integration_tests/vectorstores/test_vectara.py
@@ -1,5 +1,6 @@
 import tempfile
 import urllib.request
+from typing import Iterable

 import pytest
 from langchain_core.documents import Document
@@ -31,7 +32,7 @@ def get_abbr(s: str) -> str:


@pytest.fixture(scope="function")
-def vectara1():  # type: ignore[no-untyped-def]
+def vectara1() -> Iterable[Vectara]:
    # Set up code
    # create a new Vectara instance
    vectara1: Vectara = Vectara()
@@ -59,7 +60,7 @@ def vectara1():  # type: ignore[no-untyped-def]
    vectara1.delete(doc_ids)


-def test_vectara_add_documents(vectara1: Vectara) -> None:  # type: ignore[no-untyped-def]
+def test_vectara_add_documents(vectara1: Vectara) -> None:
    """Test add_documents."""

    # test without filter
@@ -222,7 +223,7 @@ def test_vectara_rag_with_reranking(vectara2: Vectara) -> None:


@pytest.fixture(scope="function")
-def vectara3():  # type: ignore[no-untyped-def]
+def vectara3() -> Iterable[Vectara]:
    # Set up code
    vectara3: Vectara = Vectara()

--- a/libs/community/tests/unit_tests/test_dependencies.py
+++ b/libs/community/tests/unit_tests/test_dependencies.py
@@ -1,4 +1,5 @@
 """A unit test meant to catch accidental introduction of non-optional dependencies."""
+
 from pathlib import Path
 from typing import Any, Dict, Mapping

@@ -54,7 +55,9 @@ def test_required_dependencies(poetry_conf: Mapping[str, Any]) -> None:
    unrequired_dependencies = [
        package_name for package_name, required in is_required.items() if not required
    ]
-    in_extras = [dep for group in poetry_conf["extras"].values() for dep in group]
+    in_extras = [
+        dep for group in poetry_conf.get("extras", {}).values() for dep in group
+    ]
    assert set(unrequired_dependencies) == set(in_extras)


--- a/libs/community/tests/unit_tests/test_sql_database_schema.py
+++ b/libs/community/tests/unit_tests/test_sql_database_schema.py
@@ -75,16 +75,17 @@ def test_sql_database_run() -> None:
    with pytest.warns(Warning) as records:
        db = SQLDatabase(engine, schema="schema_a")

-    # Metadata creation with duckdb raises a warning at the moment about reflection.
+    # Metadata creation with duckdb raises 3 warnings at the moment about reflection.
    # As a stop-gap to increase strictness of pytest to fail on warnings, we'll
-    # explicitly catch the warning and assert that it's the one we expect.
+    # explicitly catch the warnings and assert that it's the one we expect.
    # We may need to revisit at a later stage and determine why a warning is being
    # raised here.
-    assert len(records) == 1
-    assert isinstance(records[0].message, Warning)
-    assert (
-        records[0].message.args[0]
+    for record in records:
+        assert isinstance(record.message, Warning)
+    assert any(
+        record.message.args[0]  # type: ignore
        == "duckdb-engine doesn't yet support reflection on indices"
+        for record in records
    )

    command = 'select user_name from "user" where user_id = 13'