Update azuresearch.py following recent change from azure-search-documents library (#13472)

- **Description:** 

Reference library azure-search-documents has been adapted in version
11.4.0:

1. Notebook explaining Azure AI Search updated with most recent info
2. HnswVectorSearchAlgorithmConfiguration --> HnswAlgorithmConfiguration
3. PrioritizedFields(prioritized_content_fields) -->
SemanticPrioritizedFields(content_fields)
4. SemanticSettings --> SemanticSearch
5. VectorSearch(algorithm_configurations) -->
VectorSearch(configurations)

--> Changes now reflected on Langchain: default vector search config
from langchain is now compatible with officially released library from
Azure.

  - **Issue:**
Issue creating a new index (due to wrong class used for default vector
search configuration) if using latest version of azure-search-documents
with current langchain version
  - **Dependencies:** azure-search-documents>=11.4.0,
  - **Tag maintainer:** ,

---------

Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
guillaumedelande 2023-12-05 05:29:20 +01:00 committed by GitHub
parent 5cb3393e20
commit ea0afd07ca
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 93 additions and 38 deletions

View File

@ -6,18 +6,17 @@
"collapsed": false
},
"source": [
"# Azure Cognitive Search\n",
"# Azure AI Search\n",
"\n",
"[Azure Cognitive Search](https://learn.microsoft.com/azure/search/search-what-is-azure-search) (formerly known as `Azure Search`) is a cloud search service that gives developers infrastructure, APIs, and tools for building a rich search experience over private, heterogeneous content in web, mobile, and enterprise applications.\n",
"\n",
"Vector search is currently in public preview. It's available through the Azure portal, preview REST API and beta client libraries. [More info](https://learn.microsoft.com/en-us/azure/search/vector-search-overview) Beta client libraries are subject to potential breaking changes, please be sure to use the SDK package version identified below. azure-search-documents==11.4.0b8"
"[Azure AI Search](https://learn.microsoft.com/azure/search/search-what-is-azure-search) (formerly known as `Azure Search` and `Azure Cognitive Search`) is a cloud search service that gives developers infrastructure, APIs, and tools for building a rich search experience over private, heterogeneous content in web, mobile, and enterprise applications.\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Install Azure Cognitive Search SDK"
"# Install Azure AI Search SDK"
]
},
{
@ -26,7 +25,7 @@
"metadata": {},
"outputs": [],
"source": [
"!pip install azure-search-documents==11.4.0b8\n",
"!pip install azure-search-documents\n",
"!pip install azure-identity"
]
},

View File

@ -14,6 +14,7 @@ from typing import (
Optional,
Tuple,
Type,
Union,
)
import numpy as np
@ -36,10 +37,13 @@ if TYPE_CHECKING:
from azure.search.documents.indexes.models import (
ScoringProfile,
SearchField,
SemanticSettings,
VectorSearch,
)
try:
from azure.search.documents.indexes.models import SemanticSearch
except ImportError:
from azure.search.documents.indexes.models import SemanticSettings # <11.4.0
# Allow overriding field names for Azure Search
FIELDS_ID = get_from_env(
@ -69,7 +73,7 @@ def _get_search_client(
semantic_configuration_name: Optional[str] = None,
fields: Optional[List[SearchField]] = None,
vector_search: Optional[VectorSearch] = None,
semantic_settings: Optional[SemanticSettings] = None,
semantic_settings: Optional[Union[SemanticSearch, SemanticSettings]] = None,
scoring_profiles: Optional[List[ScoringProfile]] = None,
default_scoring_profile: Optional[str] = None,
default_fields: Optional[List[SearchField]] = None,
@ -81,15 +85,30 @@ def _get_search_client(
from azure.search.documents import SearchClient
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import (
HnswVectorSearchAlgorithmConfiguration,
PrioritizedFields,
SearchIndex,
SemanticConfiguration,
SemanticField,
SemanticSettings,
VectorSearch,
)
# class names changed for versions >= 11.4.0
try:
from azure.search.documents.indexes.models import (
HnswAlgorithmConfiguration, # HnswVectorSearchAlgorithmConfiguration is old
SemanticPrioritizedFields, # PrioritizedFields outdated
SemanticSearch, # SemanticSettings outdated
)
NEW_VERSION = True
except ImportError:
from azure.search.documents.indexes.models import (
HnswVectorSearchAlgorithmConfiguration,
PrioritizedFields,
SemanticSettings,
)
NEW_VERSION = False
default_fields = default_fields or []
if key is None:
credential = DefaultAzureCredential()
@ -135,34 +154,71 @@ def _get_search_client(
fields = default_fields
# Vector search configuration
if vector_search is None:
vector_search = VectorSearch(
algorithm_configurations=[
HnswVectorSearchAlgorithmConfiguration(
name="default",
kind="hnsw",
parameters={ # type: ignore
"m": 4,
"efConstruction": 400,
"efSearch": 500,
"metric": "cosine",
},
)
]
)
if NEW_VERSION:
# >= 11.4.0:
# VectorSearch(algorithm_configuration) --> VectorSearch(algorithms)
# HnswVectorSearchAlgorithmConfiguration --> HnswAlgorithmConfiguration
vector_search = VectorSearch(
algorithms=[
HnswAlgorithmConfiguration(
name="default",
kind="hnsw",
parameters={ # type: ignore
"m": 4,
"efConstruction": 400,
"efSearch": 500,
"metric": "cosine",
},
)
]
)
else: # < 11.4.0
vector_search = VectorSearch(
algorithm_configurations=[
HnswVectorSearchAlgorithmConfiguration(
name="default",
kind="hnsw",
parameters={ # type: ignore
"m": 4,
"efConstruction": 400,
"efSearch": 500,
"metric": "cosine",
},
)
]
)
# Create the semantic settings with the configuration
if semantic_settings is None and semantic_configuration_name is not None:
semantic_settings = SemanticSettings(
configurations=[
SemanticConfiguration(
name=semantic_configuration_name,
prioritized_fields=PrioritizedFields(
prioritized_content_fields=[
SemanticField(field_name=FIELDS_CONTENT)
],
),
)
]
)
if NEW_VERSION:
# <=11.4.0: SemanticSettings --> SemanticSearch
# PrioritizedFields(prioritized_content_fields)
# --> SemanticPrioritizedFields(content_fields)
semantic_settings = SemanticSearch(
configurations=[
SemanticConfiguration(
name=semantic_configuration_name,
prioritized_fields=SemanticPrioritizedFields(
content_fields=[
SemanticField(field_name=FIELDS_CONTENT)
],
),
)
]
)
else: # < 11.4.0
semantic_settings = SemanticSettings(
configurations=[
SemanticConfiguration(
name=semantic_configuration_name,
prioritized_fields=PrioritizedFields(
prioritized_content_fields=[
SemanticField(field_name=FIELDS_CONTENT)
],
),
)
]
)
# Create the search index with the semantic settings and vector search
index = SearchIndex(
name=index_name,
@ -196,7 +252,7 @@ class AzureSearch(VectorStore):
semantic_query_language: str = "en-us",
fields: Optional[List[SearchField]] = None,
vector_search: Optional[VectorSearch] = None,
semantic_settings: Optional[SemanticSettings] = None,
semantic_settings: Optional[Union[SemanticSearch, SemanticSettings]] = None,
scoring_profiles: Optional[List[ScoringProfile]] = None,
default_scoring_profile: Optional[str] = None,
**kwargs: Any,