community[patch]: update AzureSearch class to work with azure-search-documents=11.4.0 (#15659)

- **Description:** Updates
`libs/community/langchain_community/vectorstores/azuresearch.py` to
support the stable version `azure-search-documents=11.4.0`
- **Issue:** https://github.com/langchain-ai/langchain/issues/14534,
https://github.com/langchain-ai/langchain/issues/15039,
https://github.com/langchain-ai/langchain/issues/15355
  - **Dependencies:** azure-search-documents>=11.4.0

---------

Co-authored-by: Clément Tamines <Skar0@users.noreply.github.com>
Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
Lingzhen Chen 2024-02-13 04:23:35 +01:00 committed by GitHub
parent e135dc70c3
commit 30af711c34
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -37,14 +37,10 @@ if TYPE_CHECKING:
CorsOptions, CorsOptions,
ScoringProfile, ScoringProfile,
SearchField, SearchField,
SemanticConfiguration,
VectorSearch, VectorSearch,
) )
try:
from azure.search.documents.indexes.models import SemanticSearch
except ImportError:
from azure.search.documents.indexes.models import SemanticSettings # <11.4.0
# Allow overriding field names for Azure Search # Allow overriding field names for Azure Search
FIELDS_ID = get_from_env( FIELDS_ID = get_from_env(
key="AZURESEARCH_FIELDS_ID", env_key="AZURESEARCH_FIELDS_ID", default="id" key="AZURESEARCH_FIELDS_ID", env_key="AZURESEARCH_FIELDS_ID", default="id"
@ -73,7 +69,7 @@ def _get_search_client(
semantic_configuration_name: Optional[str] = None, semantic_configuration_name: Optional[str] = None,
fields: Optional[List[SearchField]] = None, fields: Optional[List[SearchField]] = None,
vector_search: Optional[VectorSearch] = None, vector_search: Optional[VectorSearch] = None,
semantic_settings: Optional[Union[SemanticSearch, SemanticSettings]] = None, semantic_configurations: Optional[SemanticConfiguration] = None,
scoring_profiles: Optional[List[ScoringProfile]] = None, scoring_profiles: Optional[List[ScoringProfile]] = None,
default_scoring_profile: Optional[str] = None, default_scoring_profile: Optional[str] = None,
default_fields: Optional[List[SearchField]] = None, default_fields: Optional[List[SearchField]] = None,
@ -86,30 +82,20 @@ def _get_search_client(
from azure.search.documents import SearchClient from azure.search.documents import SearchClient
from azure.search.documents.indexes import SearchIndexClient from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import ( from azure.search.documents.indexes.models import (
ExhaustiveKnnAlgorithmConfiguration,
ExhaustiveKnnParameters,
HnswAlgorithmConfiguration,
HnswParameters,
SearchIndex, SearchIndex,
SemanticConfiguration, SemanticConfiguration,
SemanticField, SemanticField,
VectorSearch, SemanticPrioritizedFields,
SemanticSearch,
VectorSearchAlgorithmKind,
VectorSearchAlgorithmMetric,
VectorSearchProfile,
) )
# class names changed for versions >= 11.4.0
try:
from azure.search.documents.indexes.models import (
HnswAlgorithmConfiguration, # HnswVectorSearchAlgorithmConfiguration is old
SemanticPrioritizedFields, # PrioritizedFields outdated
SemanticSearch, # SemanticSettings outdated
)
NEW_VERSION = True
except ImportError:
from azure.search.documents.indexes.models import (
HnswVectorSearchAlgorithmConfiguration,
PrioritizedFields,
SemanticSettings,
)
NEW_VERSION = False
default_fields = default_fields or [] default_fields = default_fields or []
if key is None: if key is None:
credential = DefaultAzureCredential() credential = DefaultAzureCredential()
@ -155,77 +141,55 @@ def _get_search_client(
fields = default_fields fields = default_fields
# Vector search configuration # Vector search configuration
if vector_search is None: if vector_search is None:
if NEW_VERSION: vector_search = VectorSearch(
# >= 11.4.0: algorithms=[
# VectorSearch(algorithm_configuration) --> VectorSearch(algorithms) HnswAlgorithmConfiguration(
# HnswVectorSearchAlgorithmConfiguration --> HnswAlgorithmConfiguration name="default",
vector_search = VectorSearch( kind=VectorSearchAlgorithmKind.HNSW,
algorithms=[ parameters=HnswParameters(
HnswAlgorithmConfiguration( m=4,
name="default", ef_construction=400,
kind="hnsw", ef_search=500,
parameters={ # type: ignore metric=VectorSearchAlgorithmMetric.COSINE,
"m": 4, ),
"efConstruction": 400, ),
"efSearch": 500, ExhaustiveKnnAlgorithmConfiguration(
"metric": "cosine", name="default_exhaustive_knn",
}, kind=VectorSearchAlgorithmKind.EXHAUSTIVE_KNN,
) parameters=ExhaustiveKnnParameters(
] metric=VectorSearchAlgorithmMetric.COSINE
) ),
else: # < 11.4.0 ),
vector_search = VectorSearch( ],
algorithm_configurations=[ profiles=[
HnswVectorSearchAlgorithmConfiguration( VectorSearchProfile(
name="default", name="myHnswProfile",
kind="hnsw", algorithm_configuration_name="default",
parameters={ # type: ignore ),
"m": 4, VectorSearchProfile(
"efConstruction": 400, name="myExhaustiveKnnProfile",
"efSearch": 500, algorithm_configuration_name="default_exhaustive_knn",
"metric": "cosine", ),
}, ],
) )
]
)
# Create the semantic settings with the configuration # Create the semantic settings with the configuration
if semantic_settings is None and semantic_configuration_name is not None: semantic_search = None
if NEW_VERSION: if semantic_configurations is None and semantic_configuration_name is not None:
# <=11.4.0: SemanticSettings --> SemanticSearch semantic_configuration = SemanticConfiguration(
# PrioritizedFields(prioritized_content_fields) name=semantic_configuration_name,
# --> SemanticPrioritizedFields(content_fields) prioritized_fields=SemanticPrioritizedFields(
semantic_settings = SemanticSearch( content_fields=[SemanticField(field_name=FIELDS_CONTENT)],
configurations=[ ),
SemanticConfiguration( )
name=semantic_configuration_name, semantic_search = SemanticSearch(configurations=[semantic_configuration])
prioritized_fields=SemanticPrioritizedFields(
content_fields=[
SemanticField(field_name=FIELDS_CONTENT)
],
),
)
]
)
else: # < 11.4.0
semantic_settings = SemanticSettings(
configurations=[
SemanticConfiguration(
name=semantic_configuration_name,
prioritized_fields=PrioritizedFields(
prioritized_content_fields=[
SemanticField(field_name=FIELDS_CONTENT)
],
),
)
]
)
# Create the search index with the semantic settings and vector search # Create the search index with the semantic settings and vector search
index = SearchIndex( index = SearchIndex(
name=index_name, name=index_name,
fields=fields, fields=fields,
vector_search=vector_search, vector_search=vector_search,
semantic_settings=semantic_settings, semantic_search=semantic_search,
scoring_profiles=scoring_profiles, scoring_profiles=scoring_profiles,
default_scoring_profile=default_scoring_profile, default_scoring_profile=default_scoring_profile,
cors_options=cors_options, cors_options=cors_options,
@ -251,10 +215,9 @@ class AzureSearch(VectorStore):
embedding_function: Union[Callable, Embeddings], embedding_function: Union[Callable, Embeddings],
search_type: str = "hybrid", search_type: str = "hybrid",
semantic_configuration_name: Optional[str] = None, semantic_configuration_name: Optional[str] = None,
semantic_query_language: str = "en-us",
fields: Optional[List[SearchField]] = None, fields: Optional[List[SearchField]] = None,
vector_search: Optional[VectorSearch] = None, vector_search: Optional[VectorSearch] = None,
semantic_settings: Optional[Union[SemanticSearch, SemanticSettings]] = None, semantic_configurations: Optional[SemanticConfiguration] = None,
scoring_profiles: Optional[List[ScoringProfile]] = None, scoring_profiles: Optional[List[ScoringProfile]] = None,
default_scoring_profile: Optional[str] = None, default_scoring_profile: Optional[str] = None,
cors_options: Optional[CorsOptions] = None, cors_options: Optional[CorsOptions] = None,
@ -309,7 +272,7 @@ class AzureSearch(VectorStore):
semantic_configuration_name=semantic_configuration_name, semantic_configuration_name=semantic_configuration_name,
fields=fields, fields=fields,
vector_search=vector_search, vector_search=vector_search,
semantic_settings=semantic_settings, semantic_configurations=semantic_configurations,
scoring_profiles=scoring_profiles, scoring_profiles=scoring_profiles,
default_scoring_profile=default_scoring_profile, default_scoring_profile=default_scoring_profile,
default_fields=default_fields, default_fields=default_fields,
@ -318,7 +281,6 @@ class AzureSearch(VectorStore):
) )
self.search_type = search_type self.search_type = search_type
self.semantic_configuration_name = semantic_configuration_name self.semantic_configuration_name = semantic_configuration_name
self.semantic_query_language = semantic_query_language
self.fields = fields if fields else default_fields self.fields = fields if fields else default_fields
@property @property
@ -451,39 +413,30 @@ class AzureSearch(VectorStore):
Returns: Returns:
List of Documents most similar to the query and score for each List of Documents most similar to the query and score for each
""" """
from azure.search.documents.models import Vector
from azure.search.documents.models import VectorizedQuery
results = self.client.search( results = self.client.search(
search_text="", search_text="",
vectors=[ vector_queries=[
Vector( VectorizedQuery(
value=np.array(self.embed_query(query), dtype=np.float32).tolist(), vector=np.array(self.embed_query(query), dtype=np.float32).tolist(),
k=k, k_nearest_neighbors=k,
fields=FIELDS_CONTENT_VECTOR, fields=FIELDS_CONTENT_VECTOR,
) )
], ],
filter=filters, filter=filters,
top=k,
) )
# Convert results to Document objects # Convert results to Document objects
docs = [ docs = [
( (
Document( Document(
page_content=result.pop(FIELDS_CONTENT), page_content=result.pop(FIELDS_CONTENT),
metadata={ metadata=json.loads(result[FIELDS_METADATA])
**( if FIELDS_METADATA in result
{FIELDS_ID: result.pop(FIELDS_ID)} else {
if FIELDS_ID in result k: v for k, v in result.items() if k != FIELDS_CONTENT_VECTOR
else {}
),
**(
json.loads(result[FIELDS_METADATA])
if FIELDS_METADATA in result
else {
k: v
for k, v in result.items()
if k != FIELDS_CONTENT_VECTOR
}
),
}, },
), ),
float(result["@search.score"]), float(result["@search.score"]),
@ -520,14 +473,14 @@ class AzureSearch(VectorStore):
Returns: Returns:
List of Documents most similar to the query and score for each List of Documents most similar to the query and score for each
""" """
from azure.search.documents.models import Vector from azure.search.documents.models import VectorizedQuery
results = self.client.search( results = self.client.search(
search_text=query, search_text=query,
vectors=[ vector_queries=[
Vector( VectorizedQuery(
value=np.array(self.embed_query(query), dtype=np.float32).tolist(), vector=np.array(self.embed_query(query), dtype=np.float32).tolist(),
k=k, k_nearest_neighbors=k,
fields=FIELDS_CONTENT_VECTOR, fields=FIELDS_CONTENT_VECTOR,
) )
], ],
@ -539,21 +492,10 @@ class AzureSearch(VectorStore):
( (
Document( Document(
page_content=result.pop(FIELDS_CONTENT), page_content=result.pop(FIELDS_CONTENT),
metadata={ metadata=json.loads(result[FIELDS_METADATA])
**( if FIELDS_METADATA in result
{FIELDS_ID: result.pop(FIELDS_ID)} else {
if FIELDS_ID in result k: v for k, v in result.items() if k != FIELDS_CONTENT_VECTOR
else {}
),
**(
json.loads(result[FIELDS_METADATA])
if FIELDS_METADATA in result
else {
k: v
for k, v in result.items()
if k != FIELDS_CONTENT_VECTOR
}
),
}, },
), ),
float(result["@search.score"]), float(result["@search.score"]),
@ -610,20 +552,19 @@ class AzureSearch(VectorStore):
Returns: Returns:
List of Documents most similar to the query and score for each List of Documents most similar to the query and score for each
""" """
from azure.search.documents.models import Vector from azure.search.documents.models import VectorizedQuery
results = self.client.search( results = self.client.search(
search_text=query, search_text=query,
vectors=[ vector_queries=[
Vector( VectorizedQuery(
value=np.array(self.embed_query(query), dtype=np.float32).tolist(), vector=np.array(self.embed_query(query), dtype=np.float32).tolist(),
k=50, k_nearest_neighbors=k,
fields=FIELDS_CONTENT_VECTOR, fields=FIELDS_CONTENT_VECTOR,
) )
], ],
filter=filters, filter=filters,
query_type="semantic", query_type="semantic",
query_language=self.semantic_query_language,
semantic_configuration_name=self.semantic_configuration_name, semantic_configuration_name=self.semantic_configuration_name,
query_caption="extractive", query_caption="extractive",
query_answer="extractive", query_answer="extractive",
@ -643,11 +584,6 @@ class AzureSearch(VectorStore):
Document( Document(
page_content=result.pop(FIELDS_CONTENT), page_content=result.pop(FIELDS_CONTENT),
metadata={ metadata={
**(
{FIELDS_ID: result.pop(FIELDS_ID)}
if FIELDS_ID in result
else {}
),
**( **(
json.loads(result[FIELDS_METADATA]) json.loads(result[FIELDS_METADATA])
if FIELDS_METADATA in result if FIELDS_METADATA in result
@ -667,9 +603,7 @@ class AzureSearch(VectorStore):
if result.get("@search.captions") if result.get("@search.captions")
else {}, else {},
"answers": semantic_answers_dict.get( "answers": semantic_answers_dict.get(
json.loads(result[FIELDS_METADATA]).get("key") json.loads(result["metadata"]).get("key"),
if FIELDS_METADATA in result
else "",
"", "",
), ),
}, },