community, langchain, infra: store extended test deps outside of poetry (#18995)

poetry can't reliably handle resolving the number of optional "extended test" dependencies we have. If we instead just rely on pip to install extended test deps in CI, this isn't an issue.
2025-09-22 11:00:37 +00:00 · 2024-03-14 22:55:30 -07:00
parent 191ddbc77e
commit 9e569d85a4
21 changed files with 691 additions and 11072 deletions
--- a/libs/langchain/extended_requirements.txt
+++ b/libs/langchain/extended_requirements.txt
@@ -0,0 +1,69 @@
+aleph-alpha-client
+aiosqlite
+assemblyai
+beautifulsoup4
+bibtexparser
+cassio
+chardet
+datasets
+google-cloud-documentai
+esprima
+jq
+pdfminer-six
+pgvector
+pypdf
+pymupdf
+pypdfium2
+tqdm
+lxml
+atlassian-python-api
+mwparserfromhell
+mwxml
+msal
+pandas
+telethon
+psychicapi
+gql
+requests-toolbelt
+html2text
+numexpr
+py-trello
+scikit-learn
+streamlit
+pyspark
+openai
+sympy
+rapidfuzz
+jsonschema
+openai
+rank-bm25
+geopandas
+jinja2
+gitpython
+newspaper3k
+feedparser
+xata
+xmltodict
+faiss-cpu
+openapi-pydantic==0.3.2
+markdownify
+arxiv
+dashvector
+sqlite-vss
+rapidocr-onnxruntime
+motor
+timescale-vector==0.0.1
+anthropic
+upstash-redis
+rspace_client
+fireworks-ai
+javelin-sdk>=0.1.8,<0.2
+hologres-vector
+praw
+databricks-vectorsearch
+couchbase
+dgml-utils
+cohere
+langchain-openai
+rdflib
+pydantic>=1,<2
--- a/libs/langchain/poetry.lock
+++ b/libs/langchain/poetry.lock
--- a/libs/langchain/pyproject.toml
+++ b/libs/langchain/pyproject.toml
@@ -24,94 +24,29 @@ numpy = "^1"
 aiohttp = "^3.8.3"
 tenacity = "^8.1.0"
 jsonpatch = "^1.33"
-azure-core = {version = "^1.26.4", optional=true}
-tqdm = {version = ">=4.48.0", optional = true}
-openapi-pydantic = {version = "^0.3.2", optional = true}
-faiss-cpu = {version = "^1", optional = true}
-manifest-ml = {version = "^0.0.1", optional = true}
-transformers = {version = "^4", optional = true}
-beautifulsoup4 = {version = "^4", optional = true}
-torch = {version = ">=1,<3", optional = true}
-jinja2 = {version = "^3", optional = true}
-tiktoken = {version = ">=0.3.2,<0.6.0", optional = true, python=">=3.9"}
-qdrant-client = {version = "^1.3.1", optional = true, python = ">=3.8.1,<3.12"}
 dataclasses-json = ">= 0.5.7, < 0.7"
-cohere = {version = "^4", optional = true}
-openai = {version = "<2", optional = true}
-nlpcloud = {version = "^1", optional = true}
-huggingface_hub = {version = "^0", optional = true}
-sentence-transformers = {version = "^2", optional = true}
-arxiv = {version = "^1.4", optional = true}
-pypdf = {version = "^3.4.0", optional = true}
-aleph-alpha-client = {version="^2.15.0", optional = true}
-pgvector = {version = "^0.1.6", optional = true}
-async-timeout = {version = "^4.0.0", python = "<3.11"}
-azure-identity = {version = "^1.12.0", optional=true}
-atlassian-python-api = {version = "^3.36.0", optional=true}
-html2text = {version="^2020.1.16", optional=true}
-numexpr = {version="^2.8.6", optional=true}
-azure-cosmos = {version="^4.4.0b1", optional=true}
-jq = {version = "^1.4.1", optional = true}
-pdfminer-six = {version = "^20221105", optional = true}
-docarray = {version="^0.32.0", extras=["hnswlib"], optional=true}
-lxml = {version = "^4.9.2", optional = true}
-pymupdf = {version = "^1.22.3", optional = true}
-rapidocr-onnxruntime = {version = "^1.3.2", optional = true, python = ">=3.8.1,<3.12"}
-pypdfium2 = {version = "^4.10.0", optional = true}
-gql = {version = "^3.4.1", optional = true}
-pandas = {version = "^2.0.1", optional = true}
-telethon = {version = "^1.28.5", optional = true}
-chardet = {version="^5.1.0", optional=true}
-requests-toolbelt = {version = "^1.0.0", optional = true}
-openlm = {version = "^0.0.5", optional = true}
-scikit-learn = {version = "^1.2.2", optional = true}
-azure-ai-formrecognizer = {version = "^3.2.1", optional = true}
-azure-cognitiveservices-speech = {version = "^1.28.0", optional = true}
-py-trello = {version = "^0.19.0", optional = true}
-bibtexparser = {version = "^1.4.0", optional = true}
-pyspark = {version = "^3.4.0", optional = true}
-clarifai = {version = ">=9.1.0", optional = true}
-mwparserfromhell = {version = "^0.6.4", optional = true}
-mwxml = {version = "^0.3.3", optional = true}
-azure-search-documents = {version = "11.4.0b8", optional = true}
-esprima = {version = "^4.0.1", optional = true}
-streamlit = {version = "^1.18.0", optional = true, python = ">=3.8.1,<3.9.7 || >3.9.7,<4.0"}
-psychicapi = {version = "^0.8.0", optional = true}
-cassio = {version = "^0.1.0", optional = true}
-sympy = {version = "^1.12", optional = true}
-rapidfuzz = {version = "^3.1.1", optional = true}
-jsonschema = {version = ">1", optional = true}
-rank-bm25 = {version = "^0.2.2", optional = true}
-geopandas = {version = "^0.13.1", optional = true}
-gitpython = {version = "^3.1.32", optional = true}
-feedparser = {version = "^6.0.10", optional = true}
-newspaper3k = {version = "^0.2.8", optional = true}
-xata = {version = "^1.0.0a7", optional = true}
-xmltodict = {version = "^0.13.0", optional = true}
-markdownify = {version = "^0.11.6", optional = true}
-assemblyai = {version = "^0.17.0", optional = true}
-dashvector = {version = "^1.0.1", optional = true}
-sqlite-vss = {version = "^0.1.2", optional = true}
-motor = {version = "^3.3.1", optional = true}
-timescale-vector = {version = "^0.0.1", optional = true}
-typer = {version= "^0.9.0", optional = true}
-anthropic = {version = "^0.3.11", optional = true}
-aiosqlite = {version = "^0.19.0", optional = true}
-rspace_client = {version = "^2.5.0", optional = true}
-upstash-redis = {version = "^0.15.0", optional = true}
-azure-ai-textanalytics = {version = "^5.3.0", optional = true}
-google-cloud-documentai = {version = "^2.20.1", optional = true}
-fireworks-ai = {version = "^0.9.0", optional = true}
-javelin-sdk = {version = "^0.1.8", optional = true}
-hologres-vector = {version = "^0.0.6", optional = true}
-praw = {version = "^7.7.1", optional = true}
-msal = {version = "^1.25.0", optional = true}
-databricks-vectorsearch = {version = "^0.21", optional = true}
-couchbase = {version = "^4.1.9", optional = true}
-dgml-utils = {version = "^0.3.0", optional = true}
-datasets = {version = "^2.15.0", optional = true}
-langchain-openai = {version = ">=0.0.2,<0.1", optional = true}
-rdflib = {version = "7.0.0", optional = true}
+async-timeout = { version = "^4.0.0", python = "<3.11" }
+azure-ai-formrecognizer = { version = "^3.2.1", optional = true }
+azure-ai-textanalytics = { version = "^5.3.0", optional = true }
+azure-cognitiveservices-speech = { version = "^1.28.0", optional = true }
+azure-core = { version = "^1.26.4", optional = true }
+azure-cosmos = { version = "^4.4.0b1", optional = true }
+azure-identity = { version = "^1.12.0", optional = true }
+azure-search-documents = { version = "11.4.0b8", optional = true }
+clarifai = { version = ">=9.1.0", optional = true }
+cohere = { version = "^4", optional = true }
+docarray = { version = "^0.32.0", extras = ["hnswlib"], optional = true }
+huggingface_hub = { version = "^0", optional = true }
+manifest-ml = { version = "^0.0.1", optional = true }
+nlpcloud = { version = "^1", optional = true }
+openai = { version = "<2", optional = true }
+openlm = { version = "^0.0.5", optional = true }
+qdrant-client = { version = "^1.3.1", optional = true, python = ">=3.8.1,<3.12" }
+sentence-transformers = { version = "^2", optional = true }
+tiktoken = { version = ">=0.3.2,<0.6.0", optional = true, python = ">=3.9" }
+torch = { version = ">=1,<3", optional = true }
+transformers = { version = "^4", optional = true }
+typer = { version = "^0.9.0", optional = true }

 [tool.poetry.group.test]
 optional = true
@@ -130,12 +65,12 @@ responses = "^0.22.0"
 pytest-asyncio = "^0.23.2"
 lark = "^1.1.5"
 pandas = "^2.0.0"
-pytest-mock  = "^3.10.0"
+pytest-mock = "^3.10.0"
 pytest-socket = "^0.6.0"
 syrupy = "^4.0.2"
 requests-mock = "^1.11.0"
-langchain-core = {path = "../core", develop = true}
-langchain-text-splitters = {path = "../text-splitters", develop = true}
+langchain-core = { path = "../core", develop = true }
+langchain-text-splitters = { path = "../text-splitters", develop = true }

 [tool.poetry.group.codespell]
 optional = true
@@ -148,19 +83,7 @@ optional = true

 [tool.poetry.group.test_integration.dependencies]
 # Do not add dependencies in the test_integration group
-# Instead:
-# 1. Add an optional dependency to the main group
-#       poetry add --optional [package name]
-# 2. Add the package name to the extended_testing extra (find it below)
-# 3. Relock the poetry file
-#       poetry lock --no-update
-# 4. Favor unit tests not integration tests.
-#    Use the @pytest.mark.requires(pkg_name) decorator in unit_tests.
-#    Your tests should not rely on network access, as it prevents other
-#    developers from being able to easily run them.
-#    Instead write unit tests that use the `responses` library or mock.patch with
-#    fixtures. Keep the fixtures minimal.
-# See the Contributing Guide for more instructions on working with optional dependencies.
+# Instead read the following link:
 # https://python.langchain.com/docs/contributing/code#working-with-optional-dependencies
 pytest-vcr = "^1.0.2"
 wrapt = "^1.15.0"
@@ -169,9 +92,9 @@ python-dotenv = "^1.0.0"
 cassio = "^0.1.0"
 tiktoken = ">=0.3.2,<0.6.0"
 anthropic = "^0.3.11"
-langchain-core = {path = "../core", develop = true}
-langchain-community = {path = "../community", develop = true}
-langchain-text-splitters = {path = "../text-splitters", develop = true}
+langchain-core = { path = "../core", develop = true }
+langchain-community = { path = "../community", develop = true }
+langchain-text-splitters = { path = "../text-splitters", develop = true }
 langchainhub = "^0.1.15"

 [tool.poetry.group.lint]
@@ -192,9 +115,10 @@ types-redis = "^4.3.21.6"
 types-pytz = "^2023.3.0.0"
 types-chardet = "^5.0.4.6"
 mypy-protobuf = "^3.0.0"
-langchain-core = {path = "../core", develop = true}
-langchain-community = {path = "../community", develop = true}
-langchain-text-splitters = {path = "../text-splitters", develop = true}
+pydantic = "^1"
+langchain-core = { path = "../core", develop = true }
+langchain-community = { path = "../community", develop = true }
+langchain-text-splitters = { path = "../text-splitters", develop = true }

 [tool.poetry.group.dev]
 optional = true
@@ -203,117 +127,49 @@ optional = true
 jupyter = "^1.0.0"
 playwright = "^1.28.0"
 setuptools = "^67.6.1"
-langchain-core = {path = "../core", develop = true}
-langchain-community = {path = "../community", develop = true}
-langchain-text-splitters = {path = "../text-splitters", develop = true}
+langchain-core = { path = "../core", develop = true }
+langchain-community = { path = "../community", develop = true }
+langchain-text-splitters = { path = "../text-splitters", develop = true }

 [tool.poetry.extras]
-llms = ["clarifai", "cohere", "openai", "openlm", "nlpcloud", "huggingface_hub", "manifest-ml", "torch", "transformers"]
+llms = [
+  "clarifai",
+  "cohere",
+  "openai",
+  "openlm",
+  "nlpcloud",
+  "huggingface_hub",
+  "manifest-ml",
+  "torch",
+  "transformers",
+]
 qdrant = ["qdrant-client"]
 openai = ["openai", "tiktoken"]
-text_helpers = ["chardet"]
 clarifai = ["clarifai"]
 cohere = ["cohere"]
 docarray = ["docarray"]
 embeddings = ["sentence-transformers"]
-javascript = ["esprima"]
 azure = [
-    "azure-identity",
-    "azure-cosmos",
-    "openai",
-    "azure-core",
-    "azure-ai-formrecognizer",
-    "azure-cognitiveservices-speech",
-    "azure-search-documents",
-    "azure-ai-textanalytics",
+  "azure-identity",
+  "azure-cosmos",
+  "openai",
+  "azure-core",
+  "azure-ai-formrecognizer",
+  "azure-cognitiveservices-speech",
+  "azure-search-documents",
+  "azure-ai-textanalytics",
 ]
 all = []
 cli = ["typer"]

-# An extra used to be able to add extended testing.
-# Please use new-line on formatting to make it easier to add new packages without
-# merge-conflicts
-extended_testing = [
- "aleph-alpha-client",
- "aiosqlite",
- "assemblyai",
- "beautifulsoup4",
- "bibtexparser",
- "cassio",
- "chardet",
- "datasets",
- "google-cloud-documentai",
- "esprima",
- "jq",
- "pdfminer-six",
- "pgvector",
- "pypdf",
- "pymupdf",
- "pypdfium2",
- "tqdm",
- "lxml",
- "atlassian-python-api",
- "mwparserfromhell",
- "mwxml",
- "msal",
- "pandas",
- "telethon",
- "psychicapi",
- "gql",
- "requests-toolbelt",
- "html2text",
- "numexpr",
- "py-trello",
- "scikit-learn",
- "streamlit",
- "pyspark",
- "openai",
- "sympy",
- "rapidfuzz",
- "jsonschema",
- "openai",
- "rank-bm25",
- "geopandas",
- "jinja2",
- "gitpython",
- "newspaper3k",
- "feedparser",
- "xata",
- "xmltodict",
- "faiss-cpu",
- "openapi-pydantic",
- "markdownify",
- "arxiv",
- "dashvector",
- "sqlite-vss",
- "rapidocr-onnxruntime",
- "motor",
- "timescale-vector",
- "anthropic",
- "upstash-redis",
- "rspace_client",
- "fireworks-ai",
- "javelin-sdk",
- "hologres-vector",
- "praw",
- "databricks-vectorsearch",
- "couchbase",
- "dgml-utils",
- "cohere",
- "langchain-openai",
- "rdflib",
-]
-
 [tool.ruff]
-exclude = [
-  "tests/integration_tests/examples/non-utf8-encoding.py",
-]
+exclude = ["tests/integration_tests/examples/non-utf8-encoding.py"]

 [tool.ruff.lint]
 select = [
-  "E",  # pycodestyle
-  "F",  # pyflakes
-  "I",  # isort
+  "E",    # pycodestyle
+  "F",    # pyflakes
+  "I",    # isort
  "T201", # print
 ]

@@ -323,9 +179,7 @@ disallow_untyped_defs = "True"
 exclude = ["notebooks", "examples", "example_data"]

 [tool.coverage.run]
-omit = [
-    "tests/*",
-]
+omit = ["tests/*"]

 [build-system]
 requires = ["poetry-core>=1.0.0"]
@@ -347,7 +201,7 @@ addopts = "--strict-markers --strict-config --durations=5 --snapshot-warn-unused
 markers = [
  "requires: mark tests as requiring a specific library",
  "scheduled: mark tests to run in scheduled testing",
-  "compile: mark placeholder test used to compile integration tests without running them"
+  "compile: mark placeholder test used to compile integration tests without running them",
 ]
 asyncio_mode = "auto"

--- a/libs/langchain/tests/unit_tests/load/test_load.py
+++ b/libs/langchain/tests/unit_tests/load/test_load.py
@@ -76,7 +76,7 @@ def test_loads_llmchain_with_non_serializable_arg() -> None:
        model="davinci",
        temperature=0.5,
        openai_api_key="hello",
-        http_client=NotSerializable,
+        model_kwargs={"a": NotSerializable},
    )
    prompt = PromptTemplate.from_template("hello {name}!")
    chain = LLMChain(llm=llm, prompt=prompt)
@@ -147,7 +147,7 @@ def test_load_llmchain_with_non_serializable_arg() -> None:
        model="davinci",
        temperature=0.5,
        openai_api_key="hello",
-        http_client=NotSerializable,
+        model_kwargs={"a": NotSerializable},
    )
    prompt = PromptTemplate.from_template("hello {name}!")
    chain = LLMChain(llm=llm, prompt=prompt)