diff --git a/libs/community/poetry.lock b/libs/community/poetry.lock index cfd7fd45809..392a96a4c19 100644 --- a/libs/community/poetry.lock +++ b/libs/community/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. [[package]] name = "aenum" @@ -552,6 +552,17 @@ azure-core = ">=1.30.0,<2.0.0" isodate = ">=0.6.1,<1.0.0" typing-extensions = ">=4.6.0" +[[package]] +name = "azure-common" +version = "1.1.28" +description = "Microsoft Azure Client Library for Python (Common)" +optional = true +python-versions = "*" +files = [ + {file = "azure-common-1.1.28.zip", hash = "sha256:4ac0cd3214e36b6a1b6a442686722a5d8cc449603aa833f3f0f40bda836704a3"}, + {file = "azure_common-1.1.28-py2.py3-none-any.whl", hash = "sha256:5c12d3dcf4ec20599ca6b0d3e09e86e146353d443e7fcc050c9a19c1f9df20ad"}, +] + [[package]] name = "azure-core" version = "1.30.1" @@ -571,6 +582,39 @@ typing-extensions = ">=4.6.0" [package.extras] aio = ["aiohttp (>=3.0)"] +[[package]] +name = "azure-identity" +version = "1.16.0" +description = "Microsoft Azure Identity Library for Python" +optional = true +python-versions = ">=3.8" +files = [ + {file = "azure-identity-1.16.0.tar.gz", hash = "sha256:6ff1d667cdcd81da1ceab42f80a0be63ca846629f518a922f7317a7e3c844e1b"}, + {file = "azure_identity-1.16.0-py3-none-any.whl", hash = "sha256:722fdb60b8fdd55fa44dc378b8072f4b419b56a5e54c0de391f644949f3a826f"}, +] + +[package.dependencies] +azure-core = ">=1.23.0" +cryptography = ">=2.5" +msal = ">=1.24.0" +msal-extensions = ">=0.3.0" + +[[package]] +name = "azure-search-documents" +version = "11.4.0" +description = "Microsoft Azure Cognitive Search Client Library for Python" +optional = true +python-versions = ">=3.7" +files = [ + {file = "azure-search-documents-11.4.0.tar.gz", hash = "sha256:599f269f106fb51e646ff426a218c21811575598e6a769b23fa4a0127c0f57e0"}, + {file = "azure_search_documents-11.4.0-py3-none-any.whl", hash = "sha256:e435266dc992a3450dc475309c9475f89a4bb0e9dac838140e609d9f1c7608ac"}, +] + +[package.dependencies] +azure-common = ">=1.1,<2.0" +azure-core = ">=1.28.0,<2.0.0" +isodate = ">=0.6.0" + [[package]] name = "babel" version = "2.14.0" @@ -3204,7 +3248,6 @@ files = [ {file = "jq-1.6.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:227b178b22a7f91ae88525810441791b1ca1fc71c86f03190911793be15cec3d"}, {file = "jq-1.6.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:780eb6383fbae12afa819ef676fc93e1548ae4b076c004a393af26a04b460742"}, {file = "jq-1.6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:08ded6467f4ef89fec35b2bf310f210f8cd13fbd9d80e521500889edf8d22441"}, - {file = "jq-1.6.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:49e44ed677713f4115bd5bf2dbae23baa4cd503be350e12a1c1f506b0687848f"}, {file = "jq-1.6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:984f33862af285ad3e41e23179ac4795f1701822473e1a26bf87ff023e5a89ea"}, {file = "jq-1.6.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f42264fafc6166efb5611b5d4cb01058887d050a6c19334f6a3f8a13bb369df5"}, {file = "jq-1.6.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a67154f150aaf76cc1294032ed588436eb002097dd4fd1e283824bf753a05080"}, @@ -3715,7 +3758,7 @@ files = [ [[package]] name = "langchain-core" -version = "0.1.45" +version = "0.1.46" description = "Building applications with LLMs through composability" optional = false python-versions = ">=3.8.1,<4.0" @@ -4204,6 +4247,25 @@ requests = ">=2.0.0,<3" [package.extras] broker = ["pymsalruntime (>=0.13.2,<0.15)"] +[[package]] +name = "msal-extensions" +version = "1.1.0" +description = "Microsoft Authentication Library extensions (MSAL EX) provides a persistence API that can save your data on disk, encrypted on Windows, macOS and Linux. Concurrent data access will be coordinated by a file lock mechanism." +optional = true +python-versions = ">=3.7" +files = [ + {file = "msal-extensions-1.1.0.tar.gz", hash = "sha256:6ab357867062db7b253d0bd2df6d411c7891a0ee7308d54d1e4317c1d1c54252"}, + {file = "msal_extensions-1.1.0-py3-none-any.whl", hash = "sha256:01be9711b4c0b1a151450068eeb2c4f0997df3bba085ac299de3a66f585e382f"}, +] + +[package.dependencies] +msal = ">=0.4.1,<2.0.0" +packaging = "*" +portalocker = [ + {version = ">=1.0,<3", markers = "platform_system != \"Windows\""}, + {version = ">=1.6,<3", markers = "platform_system == \"Windows\""}, +] + [[package]] name = "multidict" version = "6.0.5" @@ -5349,6 +5411,25 @@ files = [ dev = ["pre-commit", "tox"] testing = ["pytest", "pytest-benchmark"] +[[package]] +name = "portalocker" +version = "2.8.2" +description = "Wraps the portalocker recipe for easy usage" +optional = true +python-versions = ">=3.8" +files = [ + {file = "portalocker-2.8.2-py3-none-any.whl", hash = "sha256:cfb86acc09b9aa7c3b43594e19be1345b9d16af3feb08bf92f23d4dce513a28e"}, + {file = "portalocker-2.8.2.tar.gz", hash = "sha256:2b035aa7828e46c58e9b31390ee1f169b98e1066ab10b9a6a861fe7e25ee4f33"}, +] + +[package.dependencies] +pywin32 = {version = ">=226", markers = "platform_system == \"Windows\""} + +[package.extras] +docs = ["sphinx (>=1.7.1)"] +redis = ["redis"] +tests = ["pytest (>=5.4.1)", "pytest-cov (>=2.8.1)", "pytest-mypy (>=0.8.0)", "pytest-timeout (>=2.1.0)", "redis", "sphinx (>=6.0.0)", "types-redis"] + [[package]] name = "praw" version = "7.7.1" @@ -5529,6 +5610,8 @@ files = [ {file = "psycopg2-2.9.9-cp310-cp310-win_amd64.whl", hash = "sha256:426f9f29bde126913a20a96ff8ce7d73fd8a216cfb323b1f04da402d452853c3"}, {file = "psycopg2-2.9.9-cp311-cp311-win32.whl", hash = "sha256:ade01303ccf7ae12c356a5e10911c9e1c51136003a9a1d92f7aa9d010fb98372"}, {file = "psycopg2-2.9.9-cp311-cp311-win_amd64.whl", hash = "sha256:121081ea2e76729acfb0673ff33755e8703d45e926e416cb59bae3a86c6a4981"}, + {file = "psycopg2-2.9.9-cp312-cp312-win32.whl", hash = "sha256:d735786acc7dd25815e89cc4ad529a43af779db2e25aa7c626de864127e5a024"}, + {file = "psycopg2-2.9.9-cp312-cp312-win_amd64.whl", hash = "sha256:a7653d00b732afb6fc597e29c50ad28087dcb4fbfb28e86092277a559ae4e693"}, {file = "psycopg2-2.9.9-cp37-cp37m-win32.whl", hash = "sha256:5e0d98cade4f0e0304d7d6f25bbfbc5bd186e07b38eac65379309c4ca3193efa"}, {file = "psycopg2-2.9.9-cp37-cp37m-win_amd64.whl", hash = "sha256:7e2dacf8b009a1c1e843b5213a87f7c544b2b042476ed7755be813eaf4e8347a"}, {file = "psycopg2-2.9.9-cp38-cp38-win32.whl", hash = "sha256:ff432630e510709564c01dafdbe996cb552e0b9f3f065eb89bdce5bd31fabf4c"}, @@ -5571,6 +5654,7 @@ files = [ {file = "psycopg2_binary-2.9.9-cp311-cp311-win32.whl", hash = "sha256:dc4926288b2a3e9fd7b50dc6a1909a13bbdadfc67d93f3374d984e56f885579d"}, {file = "psycopg2_binary-2.9.9-cp311-cp311-win_amd64.whl", hash = "sha256:b76bedd166805480ab069612119ea636f5ab8f8771e640ae103e05a4aae3e417"}, {file = "psycopg2_binary-2.9.9-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:8532fd6e6e2dc57bcb3bc90b079c60de896d2128c5d9d6f24a63875a95a088cf"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b0605eaed3eb239e87df0d5e3c6489daae3f7388d455d0c0b4df899519c6a38d"}, {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f8544b092a29a6ddd72f3556a9fcf249ec412e10ad28be6a0c0d948924f2212"}, {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2d423c8d8a3c82d08fe8af900ad5b613ce3632a1249fd6a223941d0735fce493"}, {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2e5afae772c00980525f6d6ecf7cbca55676296b580c0e6abb407f15f3706996"}, @@ -5579,6 +5663,8 @@ files = [ {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:cb16c65dcb648d0a43a2521f2f0a2300f40639f6f8c1ecbc662141e4e3e1ee07"}, {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:911dda9c487075abd54e644ccdf5e5c16773470a6a5d3826fda76699410066fb"}, {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:57fede879f08d23c85140a360c6a77709113efd1c993923c59fde17aa27599fe"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-win32.whl", hash = "sha256:64cf30263844fa208851ebb13b0732ce674d8ec6a0c86a4e160495d299ba3c93"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-win_amd64.whl", hash = "sha256:81ff62668af011f9a48787564ab7eded4e9fb17a4a6a74af5ffa6a457400d2ab"}, {file = "psycopg2_binary-2.9.9-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2293b001e319ab0d869d660a704942c9e2cce19745262a8aba2115ef41a0a42a"}, {file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:03ef7df18daf2c4c07e2695e8cfd5ee7f748a1d54d802330985a78d2a5a6dca9"}, {file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a602ea5aff39bb9fac6308e9c9d82b9a35c2bf288e184a816002c9fae930b77"}, @@ -6576,6 +6662,7 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, @@ -9229,9 +9316,9 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [extras] cli = ["typer"] -extended-testing = ["aiosqlite", "aleph-alpha-client", "anthropic", "arxiv", "assemblyai", "atlassian-python-api", "azure-ai-documentintelligence", "beautifulsoup4", "bibtexparser", "cassio", "chardet", "cloudpickle", "cloudpickle", "cohere", "databricks-vectorsearch", "datasets", "dgml-utils", "elasticsearch", "esprima", "faiss-cpu", "feedparser", "fireworks-ai", "friendli-client", "geopandas", "gitpython", "google-cloud-documentai", "gql", "gradientai", "hdbcli", "hologres-vector", "html2text", "httpx", "httpx-sse", "javelin-sdk", "jinja2", "jq", "jsonschema", "lxml", "markdownify", "motor", "msal", "mwparserfromhell", "mwxml", "newspaper3k", "numexpr", "nvidia-riva-client", "oci", "openai", "openapi-pydantic", "oracle-ads", "pandas", "pdfminer-six", "pgvector", "praw", "premai", "psychicapi", "py-trello", "pyjwt", "pymupdf", "pypdf", "pypdfium2", "pyspark", "rank-bm25", "rapidfuzz", "rapidocr-onnxruntime", "rdflib", "requests-toolbelt", "rspace_client", "scikit-learn", "sqlite-vss", "streamlit", "sympy", "telethon", "tidb-vector", "timescale-vector", "tqdm", "tree-sitter", "tree-sitter-languages", "upstash-redis", "vdms", "xata", "xmltodict"] +extended-testing = ["aiosqlite", "aleph-alpha-client", "anthropic", "arxiv", "assemblyai", "atlassian-python-api", "azure-ai-documentintelligence", "azure-identity", "azure-search-documents", "beautifulsoup4", "bibtexparser", "cassio", "chardet", "cloudpickle", "cloudpickle", "cohere", "databricks-vectorsearch", "datasets", "dgml-utils", "elasticsearch", "esprima", "faiss-cpu", "feedparser", "fireworks-ai", "friendli-client", "geopandas", "gitpython", "google-cloud-documentai", "gql", "gradientai", "hdbcli", "hologres-vector", "html2text", "httpx", "httpx-sse", "javelin-sdk", "jinja2", "jq", "jsonschema", "lxml", "markdownify", "motor", "msal", "mwparserfromhell", "mwxml", "newspaper3k", "numexpr", "nvidia-riva-client", "oci", "openai", "openapi-pydantic", "oracle-ads", "pandas", "pdfminer-six", "pgvector", "praw", "premai", "psychicapi", "py-trello", "pyjwt", "pymupdf", "pypdf", "pypdfium2", "pyspark", "rank-bm25", "rapidfuzz", "rapidocr-onnxruntime", "rdflib", "requests-toolbelt", "rspace_client", "scikit-learn", "sqlite-vss", "streamlit", "sympy", "telethon", "tidb-vector", "timescale-vector", "tqdm", "tree-sitter", "tree-sitter-languages", "upstash-redis", "vdms", "xata", "xmltodict"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "48ea73a94d06ae90f8f089017ae1bbcf9d37b2cc9957a44fb617785be0fe3236" +content-hash = "b066cbf8a1f02cae88c6c099e916d805fe6eb8685fd15c093d66cf52ea363fa5" diff --git a/libs/community/pyproject.toml b/libs/community/pyproject.toml index 43a4c185691..357d0244c60 100644 --- a/libs/community/pyproject.toml +++ b/libs/community/pyproject.toml @@ -94,6 +94,8 @@ hdbcli = {version = "^2.19.21", optional = true} oci = {version = "^2.119.1", optional = true} rdflib = {version = "7.0.0", optional = true} nvidia-riva-client = {version = "^2.14.0", optional = true} +azure-search-documents = {version = "11.4.0", optional = true} +azure-identity = {version = "^1.15.0", optional = true} tidb-vector = {version = ">=0.0.3,<1.0.0", optional = true} friendli-client = {version = "^1.2.4", optional = true} premai = {version = "^0.3.25", optional = true} @@ -268,6 +270,8 @@ extended_testing = [ "hdbcli", "oci", "rdflib", + "azure-search-documents", + "azure-identity", "tidb-vector", "cloudpickle", "friendli-client", diff --git a/libs/community/tests/unit_tests/vectorstores/test_azure_search.py b/libs/community/tests/unit_tests/vectorstores/test_azure_search.py new file mode 100644 index 00000000000..25aabb8759d --- /dev/null +++ b/libs/community/tests/unit_tests/vectorstores/test_azure_search.py @@ -0,0 +1,170 @@ +import json +from typing import List, Optional +from unittest.mock import patch + +import pytest + +from langchain_community.vectorstores.azuresearch import AzureSearch +from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings + +DEFAULT_VECTOR_DIMENSION = 4 + + +class FakeEmbeddingsWithDimension(FakeEmbeddings): + """Fake embeddings functionality for testing.""" + + def __init__(self, dimension: int = DEFAULT_VECTOR_DIMENSION): + super().__init__() + self.dimension = dimension + + def embed_documents(self, embedding_texts: List[str]) -> List[List[float]]: + """Return simple embeddings.""" + return [ + [float(1.0)] * (self.dimension - 1) + [float(i)] + for i in range(len(embedding_texts)) + ] + + def embed_query(self, text: str) -> List[float]: + """Return simple embeddings.""" + return [float(1.0)] * (self.dimension - 1) + [float(0.0)] + + +DEFAULT_INDEX_NAME = "langchain-index" +DEFAULT_ENDPOINT = "https://my-search-service.search.windows.net" +DEFAULT_KEY = "mykey" +DEFAULT_EMBEDDING_MODEL = FakeEmbeddingsWithDimension() + + +def mock_default_index(*args, **kwargs): # type: ignore[no-untyped-def] + from azure.search.documents.indexes.models import ( + ExhaustiveKnnAlgorithmConfiguration, + ExhaustiveKnnParameters, + HnswAlgorithmConfiguration, + HnswParameters, + SearchField, + SearchFieldDataType, + SearchIndex, + VectorSearch, + VectorSearchAlgorithmMetric, + VectorSearchProfile, + ) + + return SearchIndex( + name=DEFAULT_INDEX_NAME, + fields=[ + SearchField( + name="id", + type=SearchFieldDataType.String, + key=True, + hidden=False, + searchable=False, + filterable=True, + sortable=False, + facetable=False, + ), + SearchField( + name="content", + type=SearchFieldDataType.String, + key=False, + hidden=False, + searchable=True, + filterable=False, + sortable=False, + facetable=False, + ), + SearchField( + name="content_vector", + type=SearchFieldDataType.Collection(SearchFieldDataType.Single), + searchable=True, + vector_search_dimensions=4, + vector_search_profile_name="myHnswProfile", + ), + SearchField( + name="metadata", + type="Edm.String", + key=False, + hidden=False, + searchable=True, + filterable=False, + sortable=False, + facetable=False, + ), + ], + vector_search=VectorSearch( + profiles=[ + VectorSearchProfile( + name="myHnswProfile", algorithm_configuration_name="default" + ), + VectorSearchProfile( + name="myExhaustiveKnnProfile", + algorithm_configuration_name="default_exhaustive_knn", + ), + ], + algorithms=[ + HnswAlgorithmConfiguration( + name="default", + parameters=HnswParameters( + m=4, + ef_construction=400, + ef_search=500, + metric=VectorSearchAlgorithmMetric.COSINE, + ), + ), + ExhaustiveKnnAlgorithmConfiguration( + name="default_exhaustive_knn", + parameters=ExhaustiveKnnParameters( + metric=VectorSearchAlgorithmMetric.COSINE + ), + ), + ], + ), + ) + + +def create_vector_store() -> AzureSearch: + return AzureSearch( + azure_search_endpoint=DEFAULT_ENDPOINT, + azure_search_key=DEFAULT_KEY, + index_name=DEFAULT_INDEX_NAME, + embedding_function=DEFAULT_EMBEDDING_MODEL, + ) + + +@pytest.mark.requires("azure.search.documents") +def test_init_existing_index() -> None: + from azure.search.documents.indexes import SearchIndexClient + + def mock_create_index() -> None: + pytest.fail("Should not create index in this test") + + with patch.multiple( + SearchIndexClient, get_index=mock_default_index, create_index=mock_create_index + ): + vector_store = create_vector_store() + assert vector_store.client is not None + + +@pytest.mark.requires("azure.search.documents") +def test_init_new_index() -> None: + from azure.core.exceptions import ResourceNotFoundError + from azure.search.documents.indexes import SearchIndexClient + from azure.search.documents.indexes.models import SearchIndex + + def no_index(self, name: str): # type: ignore[no-untyped-def] + raise ResourceNotFoundError + + created_index: Optional[SearchIndex] = None + + def mock_create_index(self, index): # type: ignore[no-untyped-def] + nonlocal created_index + created_index = index + + with patch.multiple( + SearchIndexClient, get_index=no_index, create_index=mock_create_index + ): + vector_store = create_vector_store() + assert vector_store.client is not None + assert created_index is not None + assert json.dumps(created_index.as_dict()) == json.dumps( + mock_default_index().as_dict() + )