multiple: get rid of pyproject extras (#22581)

They cause `poetry lock` to take a ton of time, and `uv pip install` can
resolve the constraints from these toml files in trivial time
(addressing problem with #19153)

This allows us to properly upgrade lockfile dependencies moving forward,
which revealed some issues that were either fixed or type-ignored (see
file comments)
This commit is contained in:
Erick Friis
2024-06-06 15:45:22 -07:00
committed by GitHub
parent 4367e89c9a
commit a24a9c6427
34 changed files with 3522 additions and 12444 deletions

View File

@@ -0,0 +1,86 @@
aiosqlite>=0.19.0,<0.20
aleph-alpha-client>=2.15.0,<3
anthropic>=0.3.11,<0.4
arxiv>=1.4,<2
assemblyai>=0.17.0,<0.18
atlassian-python-api>=3.36.0,<4
azure-ai-documentintelligence>=1.0.0b1,<2
azure-identity>=1.15.0,<2
azure-search-documents==11.4.0
beautifulsoup4>=4,<5
bibtexparser>=1.4.0,<2
cassio>=0.1.6,<0.2
chardet>=5.1.0,<6
cloudpathlib>=0.18,<0.19
cloudpickle>=2.0.0
cohere>=4,<6
databricks-vectorsearch>=0.21,<0.22
datasets>=2.15.0,<3
dgml-utils>=0.3.0,<0.4
elasticsearch>=8.12.0,<9
esprima>=4.0.1,<5
faiss-cpu>=1,<2
feedparser>=6.0.10,<7
fireworks-ai>=0.9.0,<0.10
friendli-client>=1.2.4,<2
geopandas>=0.13.1,<0.14
gitpython>=3.1.32,<4
google-cloud-documentai>=2.20.1,<3
gql>=3.4.1,<4
gradientai>=1.4.0,<2
hdbcli>=2.19.21,<3
hologres-vector==0.0.6
html2text>=2020.1.16
httpx>=0.24.1,<0.25
httpx-sse>=0.4.0,<0.5
javelin-sdk>=0.1.8,<0.2
jinja2>=3,<4
jq>=1.4.1,<2
jsonschema>1
lxml>=4.9.3,<6.0
markdownify>=0.11.6,<0.12
motor>=3.3.1,<4
msal>=1.25.0,<2
mwparserfromhell>=0.6.4,<0.7
mwxml>=0.3.3,<0.4
newspaper3k>=0.2.8,<0.3
numexpr>=2.8.6,<3
nvidia-riva-client>=2.14.0,<3
oci>=2.119.1,<3
openai<2
openapi-pydantic>=0.3.2,<0.4
oracle-ads>=2.9.1,<3
oracledb>=2.2.0,<3
pandas>=2.0.1,<3
pdfminer-six>=20221105
pgvector>=0.1.6,<0.2
praw>=7.7.1,<8
premai>=0.3.25,<0.4
psychicapi>=0.8.0,<0.9
py-trello>=0.19.0,<0.20
pyjwt>=2.8.0,<3
pymupdf>=1.22.3,<2
pypdf>=3.4.0,<4
pypdfium2>=4.10.0,<5
pyspark>=3.4.0,<4
rank-bm25>=0.2.2,<0.3
rapidfuzz>=3.1.1,<4
rapidocr-onnxruntime>=1.3.2,<2
rdflib==7.0.0
requests-toolbelt>=1.0.0,<2
rspace_client>=2.5.0,<3
scikit-learn>=1.2.2,<2
simsimd>=4.3.1,<5
sqlite-vss>=0.1.2,<0.2
streamlit>=1.18.0,<2
sympy>=1.12,<2
telethon>=1.28.5,<2
tidb-vector>=0.0.3,<1.0.0
timescale-vector==0.0.1
tqdm>=4.48.0
tree-sitter>=0.20.2,<0.21
tree-sitter-languages>=1.8.0,<2
upstash-redis>=0.15.0,<0.16
vdms==0.0.20
xata>=1.0.0a7,<2
xmltodict>=0.13.0,<0.14

View File

@@ -75,7 +75,7 @@ class RunProcessor:
:return: The converted Span.
"""
attributes = {**run.extra} if run.extra else {}
attributes["execution_order"] = run.execution_order
attributes["execution_order"] = run.execution_order # type: ignore
return self.trace_tree.Span(
span_id=str(run.id) if run.id is not None else None,

View File

@@ -220,7 +220,7 @@ class NucliaUnderstandingAPI(BaseTool):
data = MessageToJson(
pb,
preserving_proto_field_name=True,
including_default_value_fields=True,
including_default_value_fields=True, # type: ignore
)
self._results[matching_id]["data"] = data

View File

@@ -28,7 +28,7 @@ def _check_docarray_import() -> None:
except ImportError:
raise ImportError(
"Could not import docarray python package. "
'Please install it with `pip install "langchain[docarray]"`.'
"Please install it with `pip install docarray`."
)

View File

@@ -14,7 +14,7 @@ class DocArrayHnswSearch(DocArrayIndex):
"""`HnswLib` storage using `DocArray` package.
To use it, you should have the ``docarray`` package with version >=0.32.0 installed.
You can install it with `pip install "docarray[hnswlib]"`.
You can install it with `pip install docarray`.
"""
@classmethod

View File

@@ -15,7 +15,7 @@ class DocArrayInMemorySearch(DocArrayIndex):
"""In-memory `DocArray` storage for exact search.
To use it, you should have the ``docarray`` package with version >=0.32.0 installed.
You can install it with `pip install "langchain[docarray]"`.
You can install it with `pip install docarray`.
"""
@classmethod

File diff suppressed because it is too large Load Diff

View File

@@ -19,93 +19,6 @@ aiohttp = "^3.8.3"
tenacity = "^8.1.0"
dataclasses-json = ">= 0.5.7, < 0.7"
langsmith = "^0.1.0"
tqdm = {version = ">=4.48.0", optional = true}
openapi-pydantic = {version = "^0.3.2", optional = true}
faiss-cpu = {version = "^1", optional = true}
beautifulsoup4 = {version = "^4", optional = true}
jinja2 = {version = "^3", optional = true}
cohere = {version = "^4", optional = true}
openai = {version = "<2", optional = true}
arxiv = {version = "^1.4", optional = true}
pypdf = {version = "^3.4.0", optional = true}
aleph-alpha-client = {version="^2.15.0", optional = true}
gradientai = {version="^1.4.0", optional = true}
pgvector = {version = "^0.1.6", optional = true}
atlassian-python-api = {version = "^3.36.0", optional=true}
html2text = {version="^2020.1.16", optional=true}
numexpr = {version="^2.8.6", optional=true}
jq = {version = "^1.4.1", optional = true}
pdfminer-six = {version = "^20221105", optional = true}
lxml = {version = ">=4.9.3,<6.0", optional = true}
pymupdf = {version = "^1.22.3", optional = true}
rapidocr-onnxruntime = {version = "^1.3.2", optional = true, python = ">=3.8.1,<3.12"}
pypdfium2 = {version = "^4.10.0", optional = true}
gql = {version = "^3.4.1", optional = true}
pandas = {version = "^2.0.1", optional = true}
telethon = {version = "^1.28.5", optional = true}
chardet = {version="^5.1.0", optional=true}
requests-toolbelt = {version = "^1.0.0", optional = true}
scikit-learn = {version = "^1.2.2", optional = true}
py-trello = {version = "^0.19.0", optional = true}
bibtexparser = {version = "^1.4.0", optional = true}
pyspark = {version = "^3.4.0", optional = true}
mwparserfromhell = {version = "^0.6.4", optional = true}
mwxml = {version = "^0.3.3", optional = true}
esprima = {version = "^4.0.1", optional = true}
streamlit = {version = "^1.18.0", optional = true, python = ">=3.8.1,<3.9.7 || >3.9.7,<4.0"}
psychicapi = {version = "^0.8.0", optional = true}
cassio = {version = "^0.1.6", optional = true}
sympy = {version = "^1.12", optional = true}
rapidfuzz = {version = "^3.1.1", optional = true}
jsonschema = {version = ">1", optional = true}
rank-bm25 = {version = "^0.2.2", optional = true}
geopandas = {version = "^0.13.1", optional = true}
gitpython = {version = "^3.1.32", optional = true}
feedparser = {version = "^6.0.10", optional = true}
newspaper3k = {version = "^0.2.8", optional = true}
xata = {version = "^1.0.0a7", optional = true}
xmltodict = {version = "^0.13.0", optional = true}
markdownify = {version = "^0.11.6", optional = true}
assemblyai = {version = "^0.17.0", optional = true}
sqlite-vss = {version = "^0.1.2", optional = true}
motor = {version = "^3.3.1", optional = true}
timescale-vector = {version = "^0.0.1", optional = true}
typer = {version= "^0.9.0", optional = true}
anthropic = {version = "^0.3.11", optional = true}
aiosqlite = {version = "^0.19.0", optional = true}
rspace_client = {version = "^2.5.0", optional = true}
upstash-redis = {version = "^0.15.0", optional = true}
google-cloud-documentai = {version = "^2.20.1", optional = true}
fireworks-ai = {version = "^0.9.0", optional = true}
javelin-sdk = {version = "^0.1.8", optional = true}
hologres-vector = {version = "^0.0.6", optional = true}
praw = {version = "^7.7.1", optional = true}
msal = {version = "^1.25.0", optional = true}
databricks-vectorsearch = {version = "^0.21", optional = true}
cloudpickle = {version = ">=2.0.0", optional = true}
dgml-utils = {version = "^0.3.0", optional = true}
datasets = {version = "^2.15.0", optional = true}
tree-sitter = {version = "^0.20.2", optional = true}
tree-sitter-languages = {version = "^1.8.0", optional = true}
azure-ai-documentintelligence = {version = "^1.0.0b1", optional = true}
oracle-ads = {version = "^2.9.1", optional = true}
httpx = {version = "^0.24.1", optional = true}
elasticsearch = {version = "^8.12.0", optional = true}
hdbcli = {version = "^2.19.21", optional = true}
oci = {version = "^2.119.1", optional = true}
rdflib = {version = "7.0.0", optional = true}
nvidia-riva-client = {version = "^2.14.0", optional = true}
azure-search-documents = {version = "11.4.0", optional = true}
azure-identity = {version = "^1.15.0", optional = true}
tidb-vector = {version = ">=0.0.3,<1.0.0", optional = true}
friendli-client = {version = "^1.2.4", optional = true}
premai = {version = "^0.3.25", optional = true}
vdms = {version = "^0.0.20", optional = true}
httpx-sse = {version = "^0.4.0", optional = true}
pyjwt = {version = "^2.8.0", optional = true}
oracledb = {version = "^2.2.0", optional = true}
cloudpathlib = { version = "^0.18", optional = true }
simsimd = {version = "^4.3.1", optional = true}
[tool.poetry.group.test]
@@ -125,12 +38,12 @@ responses = "^0.22.0"
pytest-asyncio = "^0.20.3"
lark = "^1.1.5"
pandas = "^2.0.0"
pytest-mock = "^3.10.0"
pytest-mock = "^3.10.0"
pytest-socket = "^0.6.0"
syrupy = "^4.0.2"
requests-mock = "^1.11.0"
langchain-core = {path = "../core", develop = true}
langchain = {path = "../langchain", develop = true}
langchain-core = { path = "../core", develop = true }
langchain = { path = "../langchain", develop = true }
[tool.poetry.group.codespell]
optional = true
@@ -143,19 +56,7 @@ optional = true
[tool.poetry.group.test_integration.dependencies]
# Do not add dependencies in the test_integration group
# Instead:
# 1. Add an optional dependency to the main group
# poetry add --optional [package name]
# 2. Add the package name to the extended_testing extra (find it below)
# 3. Relock the poetry file
# poetry lock --no-update
# 4. Favor unit tests not integration tests.
# Use the @pytest.mark.requires(pkg_name) decorator in unit_tests.
# Your tests should not rely on network access, as it prevents other
# developers from being able to easily run them.
# Instead write unit tests that use the `responses` library or mock.patch with
# fixtures. Keep the fixtures minimal.
# See Contributing Guide for more instructions on working with optional dependencies.
# Instead read the following link:
# https://python.langchain.com/docs/contributing/code#working-with-optional-dependencies
pytest-vcr = "^1.0.2"
wrapt = "^1.15.0"
@@ -165,7 +66,7 @@ cassio = "^0.1.6"
tiktoken = ">=0.3.2,<0.6.0"
anthropic = "^0.3.11"
langchain-core = { path = "../core", develop = true }
langchain = {path = "../langchain", develop = true}
langchain = { path = "../langchain", develop = true }
fireworks-ai = "^0.9.0"
vdms = "^0.0.20"
exllamav2 = "^0.0.18"
@@ -185,9 +86,9 @@ types-pytz = "^2023.3.0.0"
types-chardet = "^5.0.4.6"
types-redis = "^4.3.21.6"
mypy-protobuf = "^3.0.0"
langchain-core = {path = "../core", develop = true}
langchain-text-splitters = {path = "../text-splitters", develop = true}
langchain = {path = "../langchain", develop = true}
langchain-core = { path = "../core", develop = true }
langchain-text-splitters = { path = "../text-splitters", develop = true }
langchain = { path = "../langchain", develop = true }
[tool.poetry.group.dev]
optional = true
@@ -195,104 +96,7 @@ optional = true
[tool.poetry.group.dev.dependencies]
jupyter = "^1.0.0"
setuptools = "^67.6.1"
langchain-core = {path = "../core", develop = true}
[tool.poetry.extras]
cli = ["typer"]
# An extra used to be able to add extended testing.
# Please use new-line on formatting to make it easier to add new packages without
# merge-conflicts
extended_testing = [
"aleph-alpha-client",
"aiosqlite",
"assemblyai",
"beautifulsoup4",
"bibtexparser",
"cassio",
"chardet",
"datasets",
"google-cloud-documentai",
"esprima",
"jq",
"pdfminer-six",
"pgvector",
"pypdf",
"pymupdf",
"pypdfium2",
"tqdm",
"lxml",
"atlassian-python-api",
"mwparserfromhell",
"mwxml",
"msal",
"pandas",
"telethon",
"psychicapi",
"gql",
"gradientai",
"requests-toolbelt",
"html2text",
"numexpr",
"py-trello",
"scikit-learn",
"streamlit",
"pyspark",
"openai",
"sympy",
"rapidfuzz",
"jsonschema",
"rank-bm25",
"geopandas",
"jinja2",
"gitpython",
"newspaper3k",
"nvidia-riva-client",
"feedparser",
"xata",
"xmltodict",
"faiss-cpu",
"openapi-pydantic",
"markdownify",
"arxiv",
"sqlite-vss",
"rapidocr-onnxruntime",
"motor",
"timescale-vector",
"anthropic",
"upstash-redis",
"rspace_client",
"fireworks-ai",
"javelin-sdk",
"hologres-vector",
"praw",
"databricks-vectorsearch",
"cloudpickle",
"cloudpathlib",
"dgml-utils",
"cohere",
"tree-sitter",
"tree-sitter-languages",
"azure-ai-documentintelligence",
"oracle-ads",
"httpx",
"elasticsearch",
"hdbcli",
"oci",
"rdflib",
"azure-search-documents",
"azure-identity",
"tidb-vector",
"cloudpickle",
"friendli-client",
"premai",
"vdms",
"httpx-sse",
"pyjwt",
"oracledb",
"simsimd",
"aiosqlite"
]
langchain-core = { path = "../core", develop = true }
[tool.ruff]
exclude = [
@@ -302,9 +106,9 @@ exclude = [
[tool.ruff.lint]
select = [
"E", # pycodestyle
"F", # pyflakes
"I", # isort
"E", # pycodestyle
"F", # pyflakes
"I", # isort
"T201", # print
]
@@ -314,9 +118,7 @@ disallow_untyped_defs = "True"
exclude = ["notebooks", "examples", "example_data"]
[tool.coverage.run]
omit = [
"tests/*",
]
omit = ["tests/*"]
[build-system]
requires = ["poetry-core>=1.0.0"]
@@ -338,7 +140,7 @@ addopts = "--strict-markers --strict-config --durations=5 --snapshot-warn-unused
markers = [
"requires: mark tests as requiring a specific library",
"scheduled: mark tests to run in scheduled testing",
"compile: mark placeholder test used to compile integration tests without running them"
"compile: mark placeholder test used to compile integration tests without running them",
]
asyncio_mode = "auto"

View File

@@ -1,5 +1,6 @@
import tempfile
import urllib.request
from typing import Iterable
import pytest
from langchain_core.documents import Document
@@ -31,7 +32,7 @@ def get_abbr(s: str) -> str:
@pytest.fixture(scope="function")
def vectara1(): # type: ignore[no-untyped-def]
def vectara1() -> Iterable[Vectara]:
# Set up code
# create a new Vectara instance
vectara1: Vectara = Vectara()
@@ -59,7 +60,7 @@ def vectara1(): # type: ignore[no-untyped-def]
vectara1.delete(doc_ids)
def test_vectara_add_documents(vectara1: Vectara) -> None: # type: ignore[no-untyped-def]
def test_vectara_add_documents(vectara1: Vectara) -> None:
"""Test add_documents."""
# test without filter
@@ -222,7 +223,7 @@ def test_vectara_rag_with_reranking(vectara2: Vectara) -> None:
@pytest.fixture(scope="function")
def vectara3(): # type: ignore[no-untyped-def]
def vectara3() -> Iterable[Vectara]:
# Set up code
vectara3: Vectara = Vectara()

View File

@@ -1,4 +1,5 @@
"""A unit test meant to catch accidental introduction of non-optional dependencies."""
from pathlib import Path
from typing import Any, Dict, Mapping
@@ -54,7 +55,9 @@ def test_required_dependencies(poetry_conf: Mapping[str, Any]) -> None:
unrequired_dependencies = [
package_name for package_name, required in is_required.items() if not required
]
in_extras = [dep for group in poetry_conf["extras"].values() for dep in group]
in_extras = [
dep for group in poetry_conf.get("extras", {}).values() for dep in group
]
assert set(unrequired_dependencies) == set(in_extras)

View File

@@ -75,16 +75,17 @@ def test_sql_database_run() -> None:
with pytest.warns(Warning) as records:
db = SQLDatabase(engine, schema="schema_a")
# Metadata creation with duckdb raises a warning at the moment about reflection.
# Metadata creation with duckdb raises 3 warnings at the moment about reflection.
# As a stop-gap to increase strictness of pytest to fail on warnings, we'll
# explicitly catch the warning and assert that it's the one we expect.
# explicitly catch the warnings and assert that it's the one we expect.
# We may need to revisit at a later stage and determine why a warning is being
# raised here.
assert len(records) == 1
assert isinstance(records[0].message, Warning)
assert (
records[0].message.args[0]
for record in records:
assert isinstance(record.message, Warning)
assert any(
record.message.args[0] # type: ignore
== "duckdb-engine doesn't yet support reflection on indices"
for record in records
)
command = 'select user_name from "user" where user_id = 13'