From ea0afd07ca1ef7715c0df606efe12ae9f9ad7e62 Mon Sep 17 00:00:00 2001 From: guillaumedelande <115003496+guillaumedelande@users.noreply.github.com> Date: Tue, 5 Dec 2023 05:29:20 +0100 Subject: [PATCH] Update azuresearch.py following recent change from azure-search-documents library (#13472) - **Description:** Reference library azure-search-documents has been adapted in version 11.4.0: 1. Notebook explaining Azure AI Search updated with most recent info 2. HnswVectorSearchAlgorithmConfiguration --> HnswAlgorithmConfiguration 3. PrioritizedFields(prioritized_content_fields) --> SemanticPrioritizedFields(content_fields) 4. SemanticSettings --> SemanticSearch 5. VectorSearch(algorithm_configurations) --> VectorSearch(configurations) --> Changes now reflected on Langchain: default vector search config from langchain is now compatible with officially released library from Azure. - **Issue:** Issue creating a new index (due to wrong class used for default vector search configuration) if using latest version of azure-search-documents with current langchain version - **Dependencies:** azure-search-documents>=11.4.0, - **Tag maintainer:** , --------- Co-authored-by: Erick Friis --- .../vectorstores/azuresearch.ipynb | 11 +- .../langchain/vectorstores/azuresearch.py | 120 +++++++++++++----- 2 files changed, 93 insertions(+), 38 deletions(-) diff --git a/docs/docs/integrations/vectorstores/azuresearch.ipynb b/docs/docs/integrations/vectorstores/azuresearch.ipynb index 1ffb30e5c72..710e5b3fcb8 100644 --- a/docs/docs/integrations/vectorstores/azuresearch.ipynb +++ b/docs/docs/integrations/vectorstores/azuresearch.ipynb @@ -6,18 +6,17 @@ "collapsed": false }, "source": [ - "# Azure Cognitive Search\n", + "# Azure AI Search\n", "\n", - "[Azure Cognitive Search](https://learn.microsoft.com/azure/search/search-what-is-azure-search) (formerly known as `Azure Search`) is a cloud search service that gives developers infrastructure, APIs, and tools for building a rich search experience over private, heterogeneous content in web, mobile, and enterprise applications.\n", - "\n", - "Vector search is currently in public preview. It's available through the Azure portal, preview REST API and beta client libraries. [More info](https://learn.microsoft.com/en-us/azure/search/vector-search-overview) Beta client libraries are subject to potential breaking changes, please be sure to use the SDK package version identified below. azure-search-documents==11.4.0b8" + "[Azure AI Search](https://learn.microsoft.com/azure/search/search-what-is-azure-search) (formerly known as `Azure Search` and `Azure Cognitive Search`) is a cloud search service that gives developers infrastructure, APIs, and tools for building a rich search experience over private, heterogeneous content in web, mobile, and enterprise applications.\n", + "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "# Install Azure Cognitive Search SDK" + "# Install Azure AI Search SDK" ] }, { @@ -26,7 +25,7 @@ "metadata": {}, "outputs": [], "source": [ - "!pip install azure-search-documents==11.4.0b8\n", + "!pip install azure-search-documents\n", "!pip install azure-identity" ] }, diff --git a/libs/langchain/langchain/vectorstores/azuresearch.py b/libs/langchain/langchain/vectorstores/azuresearch.py index 2cd1f9546de..b818dc71fd5 100644 --- a/libs/langchain/langchain/vectorstores/azuresearch.py +++ b/libs/langchain/langchain/vectorstores/azuresearch.py @@ -14,6 +14,7 @@ from typing import ( Optional, Tuple, Type, + Union, ) import numpy as np @@ -36,10 +37,13 @@ if TYPE_CHECKING: from azure.search.documents.indexes.models import ( ScoringProfile, SearchField, - SemanticSettings, VectorSearch, ) + try: + from azure.search.documents.indexes.models import SemanticSearch + except ImportError: + from azure.search.documents.indexes.models import SemanticSettings # <11.4.0 # Allow overriding field names for Azure Search FIELDS_ID = get_from_env( @@ -69,7 +73,7 @@ def _get_search_client( semantic_configuration_name: Optional[str] = None, fields: Optional[List[SearchField]] = None, vector_search: Optional[VectorSearch] = None, - semantic_settings: Optional[SemanticSettings] = None, + semantic_settings: Optional[Union[SemanticSearch, SemanticSettings]] = None, scoring_profiles: Optional[List[ScoringProfile]] = None, default_scoring_profile: Optional[str] = None, default_fields: Optional[List[SearchField]] = None, @@ -81,15 +85,30 @@ def _get_search_client( from azure.search.documents import SearchClient from azure.search.documents.indexes import SearchIndexClient from azure.search.documents.indexes.models import ( - HnswVectorSearchAlgorithmConfiguration, - PrioritizedFields, SearchIndex, SemanticConfiguration, SemanticField, - SemanticSettings, VectorSearch, ) + # class names changed for versions >= 11.4.0 + try: + from azure.search.documents.indexes.models import ( + HnswAlgorithmConfiguration, # HnswVectorSearchAlgorithmConfiguration is old + SemanticPrioritizedFields, # PrioritizedFields outdated + SemanticSearch, # SemanticSettings outdated + ) + + NEW_VERSION = True + except ImportError: + from azure.search.documents.indexes.models import ( + HnswVectorSearchAlgorithmConfiguration, + PrioritizedFields, + SemanticSettings, + ) + + NEW_VERSION = False + default_fields = default_fields or [] if key is None: credential = DefaultAzureCredential() @@ -135,34 +154,71 @@ def _get_search_client( fields = default_fields # Vector search configuration if vector_search is None: - vector_search = VectorSearch( - algorithm_configurations=[ - HnswVectorSearchAlgorithmConfiguration( - name="default", - kind="hnsw", - parameters={ # type: ignore - "m": 4, - "efConstruction": 400, - "efSearch": 500, - "metric": "cosine", - }, - ) - ] - ) + if NEW_VERSION: + # >= 11.4.0: + # VectorSearch(algorithm_configuration) --> VectorSearch(algorithms) + # HnswVectorSearchAlgorithmConfiguration --> HnswAlgorithmConfiguration + vector_search = VectorSearch( + algorithms=[ + HnswAlgorithmConfiguration( + name="default", + kind="hnsw", + parameters={ # type: ignore + "m": 4, + "efConstruction": 400, + "efSearch": 500, + "metric": "cosine", + }, + ) + ] + ) + else: # < 11.4.0 + vector_search = VectorSearch( + algorithm_configurations=[ + HnswVectorSearchAlgorithmConfiguration( + name="default", + kind="hnsw", + parameters={ # type: ignore + "m": 4, + "efConstruction": 400, + "efSearch": 500, + "metric": "cosine", + }, + ) + ] + ) + # Create the semantic settings with the configuration if semantic_settings is None and semantic_configuration_name is not None: - semantic_settings = SemanticSettings( - configurations=[ - SemanticConfiguration( - name=semantic_configuration_name, - prioritized_fields=PrioritizedFields( - prioritized_content_fields=[ - SemanticField(field_name=FIELDS_CONTENT) - ], - ), - ) - ] - ) + if NEW_VERSION: + # <=11.4.0: SemanticSettings --> SemanticSearch + # PrioritizedFields(prioritized_content_fields) + # --> SemanticPrioritizedFields(content_fields) + semantic_settings = SemanticSearch( + configurations=[ + SemanticConfiguration( + name=semantic_configuration_name, + prioritized_fields=SemanticPrioritizedFields( + content_fields=[ + SemanticField(field_name=FIELDS_CONTENT) + ], + ), + ) + ] + ) + else: # < 11.4.0 + semantic_settings = SemanticSettings( + configurations=[ + SemanticConfiguration( + name=semantic_configuration_name, + prioritized_fields=PrioritizedFields( + prioritized_content_fields=[ + SemanticField(field_name=FIELDS_CONTENT) + ], + ), + ) + ] + ) # Create the search index with the semantic settings and vector search index = SearchIndex( name=index_name, @@ -196,7 +252,7 @@ class AzureSearch(VectorStore): semantic_query_language: str = "en-us", fields: Optional[List[SearchField]] = None, vector_search: Optional[VectorSearch] = None, - semantic_settings: Optional[SemanticSettings] = None, + semantic_settings: Optional[Union[SemanticSearch, SemanticSettings]] = None, scoring_profiles: Optional[List[ScoringProfile]] = None, default_scoring_profile: Optional[str] = None, **kwargs: Any,