From 6eacd88ae79b976d8c9744abdec488c693ab883a Mon Sep 17 00:00:00 2001 From: Michael Landis Date: Mon, 22 May 2023 09:48:09 -0700 Subject: [PATCH] fix: revert docarray explicit transitive dependencies and use extras instead (#5015) tldr: The docarray [integration PR](https://github.com/hwchase17/langchain/pull/4483) introduced a pinned dependency to protobuf. This is a docarray dependency, not a langchain dependency. Since this is handled by the docarray dependencies, it is unnecessary here. Further, as a pinned dependency, this quickly leads to incompatibilities with application code that consumes the library. Much less with a heavily used library like protobuf. Detail: as we see in the [docarray integration](https://github.com/hwchase17/langchain/pull/4483/files#diff-50c86b7ed8ac2cf95bd48334961bf0530cdc77b5a56f852c5c61b89d735fd711R81-R83), the transitive dependencies of docarray were also listed as langchain dependencies. This is unnecessary as the docarray project has an appropriate [extras](https://github.com/docarray/docarray/blob/a01a05542d17264b8a164bec783633658deeedb8/pyproject.toml#L70). The docarray project also does not require this _pinned_ version of protobuf, rather [a minimum version](https://github.com/docarray/docarray/blob/a01a05542d17264b8a164bec783633658deeedb8/pyproject.toml#L41). So this pinned version was likely in error. To fix this, this PR reverts the explicit hnswlib and protobuf dependencies and adds the hnswlib extras install for docarray (which installs hnswlib and protobuf, as originally intended). Because version `0.32.0` of the docarray hnswlib extras added protobuf, we bump the docarray dependency from `^0.31.0` to `^0.32.0`. # revert docarray explicit transitive dependencies and use extras instead ## Who can review? @dev2049 -- reviewed the original PR @eyurtsev -- bumped the pinned protobuf dependency a few days ago --------- Co-authored-by: Dev 2049 --- poetry.lock | 26 +++++++++++++------------- pyproject.toml | 9 ++------- 2 files changed, 15 insertions(+), 20 deletions(-) diff --git a/poetry.lock b/poetry.lock index 4a59e44bd71..b63bb6988ee 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry and should not be changed by hand. +# This file is automatically @generated by Poetry 1.4.2 and should not be changed by hand. [[package]] name = "absl-py" @@ -1625,19 +1625,21 @@ wmi = ["wmi (>=1.5.1,<2.0.0)"] [[package]] name = "docarray" -version = "0.31.1" +version = "0.32.0" description = "The data structure for multimodal data" category = "main" optional = true python-versions = ">=3.7,<4.0" files = [ - {file = "docarray-0.31.1-py3-none-any.whl", hash = "sha256:286842c84a9946648f36b2a4dc33bcb47589780b4614e5cd32ce67c5a46cb4c0"}, - {file = "docarray-0.31.1.tar.gz", hash = "sha256:096b1eabf0be3c0b1517bbbe82485c19a0de61dde24b8f3448f26c5ead672c4a"}, + {file = "docarray-0.32.0-py3-none-any.whl", hash = "sha256:5216858966ea42133614be421ef7ae670d020bfdfcd2ab3e0118a4a8ecc77034"}, + {file = "docarray-0.32.0.tar.gz", hash = "sha256:7a3156cb0d13dec7d6b85f193b339b823748446fc9fff1e0ca4c2ef50b4183d2"}, ] [package.dependencies] +hnswlib = {version = ">=0.6.2", optional = true, markers = "extra == \"hnswlib\""} numpy = ">=1.17.3" orjson = ">=3.8.2" +protobuf = {version = ">=3.19.0", optional = true, markers = "extra == \"proto\" or extra == \"hnswlib\" or extra == \"full\""} pydantic = ">=1.10.2" rich = ">=13.1.0" types-requests = ">=2.28.11.6" @@ -1648,7 +1650,7 @@ audio = ["pydub (>=0.25.1,<0.26.0)"] aws = ["smart-open[s3] (>=6.3.0)"] elasticsearch = ["elastic-transport (>=8.4.0,<9.0.0)", "elasticsearch (>=7.10.1)"] full = ["av (>=10.0.0)", "lz4 (>=1.0.0)", "pandas (>=1.1.0)", "pillow (>=9.3.0)", "protobuf (>=3.19.0)", "pydub (>=0.25.1,<0.26.0)", "trimesh[easy] (>=3.17.1)", "types-pillow (>=9.3.0.1)"] -hnswlib = ["hnswlib (>=0.6.2)"] +hnswlib = ["hnswlib (>=0.6.2)", "protobuf (>=3.19.0)"] image = ["pillow (>=9.3.0)", "types-pillow (>=9.3.0.1)"] jac = ["jina-hubble-sdk (>=0.34.0)"] mesh = ["trimesh[easy] (>=3.17.1)"] @@ -6653,7 +6655,6 @@ files = [ {file = "pylance-0.4.12-cp38-abi3-macosx_10_15_x86_64.whl", hash = "sha256:2b86fb8dccc03094c0db37bef0d91bda60e8eb0d1eddf245c6971450c8d8a53f"}, {file = "pylance-0.4.12-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:0bc82914b13204187d673b5f3d45f93219c38a0e9d0542ba251074f639669789"}, {file = "pylance-0.4.12-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a4bcce77f99ecd4cbebbadb01e58d5d8138d40eb56bdcdbc3b20b0475e7a472"}, - {file = "pylance-0.4.12-cp38-abi3-win_amd64.whl", hash = "sha256:9616931c5300030adb9626d22515710a127d1e46a46737a7a0f980b52f13627c"}, ] [package.dependencies] @@ -10378,14 +10379,13 @@ cffi = {version = ">=1.11", markers = "platform_python_implementation == \"PyPy\ cffi = ["cffi (>=1.11)"] [extras] -all = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "jina", "manifest-ml", "elasticsearch", "opensearch-py", "google-search-results", "faiss-cpu", "sentence-transformers", "transformers", "spacy", "nltk", "wikipedia", "beautifulsoup4", "tiktoken", "torch", "jinja2", "pinecone-client", "pinecone-text", "weaviate-client", "redis", "google-api-python-client", "wolframalpha", "qdrant-client", "tensorflow-text", "pypdf", "networkx", "nomic", "aleph-alpha-client", "deeplake", "pgvector", "psycopg2-binary", "pyowm", "pytesseract", "html2text", "atlassian-python-api", "gptcache", "duckduckgo-search", "arxiv", "azure-identity", "clickhouse-connect", "azure-cosmos", "lancedb", "lark", "pexpect", "pyvespa", "O365", "jq", "docarray", "protobuf", "hnswlib", "steamship", "pdfminer-six", "lxml", "requests-toolbelt", "neo4j"] -azure = ["azure-identity", "azure-cosmos", "openai", "azure-core"] +all = ["O365", "aleph-alpha-client", "anthropic", "arxiv", "atlassian-python-api", "azure-cosmos", "azure-identity", "beautifulsoup4", "clickhouse-connect", "cohere", "deeplake", "docarray", "duckduckgo-search", "elasticsearch", "faiss-cpu", "google-api-python-client", "google-search-results", "gptcache", "html2text", "huggingface_hub", "jina", "jinja2", "jq", "lancedb", "lark", "lxml", "manifest-ml", "neo4j", "networkx", "nlpcloud", "nltk", "nomic", "openai", "opensearch-py", "pdfminer-six", "pexpect", "pgvector", "pinecone-client", "pinecone-text", "psycopg2-binary", "pyowm", "pypdf", "pytesseract", "pyvespa", "qdrant-client", "redis", "requests-toolbelt", "sentence-transformers", "spacy", "steamship", "tensorflow-text", "tiktoken", "torch", "transformers", "weaviate-client", "wikipedia", "wolframalpha"] +azure = ["azure-core", "azure-cosmos", "azure-identity", "openai"] cohere = ["cohere"] +docarray = ["docarray"] embeddings = ["sentence-transformers"] -extended-testing = ["beautifulsoup4", "chardet", "jq", "pdfminer-six", "pypdf", "pymupdf", "pypdfium2", "tqdm", "lxml", "atlassian-python-api", "beautifulsoup4", "pandas", "telethon", "psychicapi", "zep-python", "gql", "requests-toolbelt", "html2text"] -hnswlib = ["docarray", "protobuf", "hnswlib"] -in-memory-store = ["docarray"] -llms = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "manifest-ml", "torch", "transformers"] +extended-testing = ["atlassian-python-api", "beautifulsoup4", "beautifulsoup4", "chardet", "gql", "html2text", "jq", "lxml", "pandas", "pdfminer-six", "psychicapi", "pymupdf", "pypdf", "pypdfium2", "requests-toolbelt", "telethon", "tqdm", "zep-python"] +llms = ["anthropic", "cohere", "huggingface_hub", "manifest-ml", "nlpcloud", "openai", "torch", "transformers"] openai = ["openai", "tiktoken"] qdrant = ["qdrant-client"] text-helpers = ["chardet"] @@ -10393,4 +10393,4 @@ text-helpers = ["chardet"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "e41e0253ccc1137f3f4a8f3627fb5165f611dfd57e65960c95421e868f4defae" +content-hash = "7c14d63435a60c32edbb4d7adcc647430cf64a80aa26c1515fba28d5433efc6b" diff --git a/pyproject.toml b/pyproject.toml index 4d221ad833c..cf1a931cee7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -79,9 +79,7 @@ O365 = {version = "^2.0.26", optional = true} jq = {version = "^1.4.1", optional = true} steamship = {version = "^2.16.9", optional = true} pdfminer-six = {version = "^20221105", optional = true} -docarray = {version="^0.31.0", optional=true} -protobuf = {version="3.19.6", optional=true} -hnswlib = {version="^0.7.0", optional=true} +docarray = {version="^0.32.0", extras=["hnswlib"], optional=true} lxml = {version = "^4.9.2", optional = true} pymupdf = {version = "^1.22.3", optional = true} pypdfium2 = {version = "^4.10.0", optional = true} @@ -181,8 +179,7 @@ qdrant = ["qdrant-client"] openai = ["openai", "tiktoken"] text_helpers = ["chardet"] cohere = ["cohere"] -in_memory_store = ["docarray"] -hnswlib = ["docarray", "protobuf", "hnswlib"] +docarray = ["docarray"] embeddings = ["sentence-transformers"] azure = ["azure-identity", "azure-cosmos", "openai", "azure-core"] all = [ @@ -238,8 +235,6 @@ all = [ "O365", "jq", "docarray", - "protobuf", - "hnswlib", "steamship", "pdfminer-six", "lxml",