mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-04 20:28:10 +00:00
community[patch]: update AzureSearch class to work with azure-search-documents=11.4.0 (#15659)
- **Description:** Updates `libs/community/langchain_community/vectorstores/azuresearch.py` to support the stable version `azure-search-documents=11.4.0` - **Issue:** https://github.com/langchain-ai/langchain/issues/14534, https://github.com/langchain-ai/langchain/issues/15039, https://github.com/langchain-ai/langchain/issues/15355 - **Dependencies:** azure-search-documents>=11.4.0 --------- Co-authored-by: Clément Tamines <Skar0@users.noreply.github.com> Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
parent
e135dc70c3
commit
30af711c34
@ -37,14 +37,10 @@ if TYPE_CHECKING:
|
||||
CorsOptions,
|
||||
ScoringProfile,
|
||||
SearchField,
|
||||
SemanticConfiguration,
|
||||
VectorSearch,
|
||||
)
|
||||
|
||||
try:
|
||||
from azure.search.documents.indexes.models import SemanticSearch
|
||||
except ImportError:
|
||||
from azure.search.documents.indexes.models import SemanticSettings # <11.4.0
|
||||
|
||||
# Allow overriding field names for Azure Search
|
||||
FIELDS_ID = get_from_env(
|
||||
key="AZURESEARCH_FIELDS_ID", env_key="AZURESEARCH_FIELDS_ID", default="id"
|
||||
@ -73,7 +69,7 @@ def _get_search_client(
|
||||
semantic_configuration_name: Optional[str] = None,
|
||||
fields: Optional[List[SearchField]] = None,
|
||||
vector_search: Optional[VectorSearch] = None,
|
||||
semantic_settings: Optional[Union[SemanticSearch, SemanticSettings]] = None,
|
||||
semantic_configurations: Optional[SemanticConfiguration] = None,
|
||||
scoring_profiles: Optional[List[ScoringProfile]] = None,
|
||||
default_scoring_profile: Optional[str] = None,
|
||||
default_fields: Optional[List[SearchField]] = None,
|
||||
@ -86,30 +82,20 @@ def _get_search_client(
|
||||
from azure.search.documents import SearchClient
|
||||
from azure.search.documents.indexes import SearchIndexClient
|
||||
from azure.search.documents.indexes.models import (
|
||||
ExhaustiveKnnAlgorithmConfiguration,
|
||||
ExhaustiveKnnParameters,
|
||||
HnswAlgorithmConfiguration,
|
||||
HnswParameters,
|
||||
SearchIndex,
|
||||
SemanticConfiguration,
|
||||
SemanticField,
|
||||
VectorSearch,
|
||||
SemanticPrioritizedFields,
|
||||
SemanticSearch,
|
||||
VectorSearchAlgorithmKind,
|
||||
VectorSearchAlgorithmMetric,
|
||||
VectorSearchProfile,
|
||||
)
|
||||
|
||||
# class names changed for versions >= 11.4.0
|
||||
try:
|
||||
from azure.search.documents.indexes.models import (
|
||||
HnswAlgorithmConfiguration, # HnswVectorSearchAlgorithmConfiguration is old
|
||||
SemanticPrioritizedFields, # PrioritizedFields outdated
|
||||
SemanticSearch, # SemanticSettings outdated
|
||||
)
|
||||
|
||||
NEW_VERSION = True
|
||||
except ImportError:
|
||||
from azure.search.documents.indexes.models import (
|
||||
HnswVectorSearchAlgorithmConfiguration,
|
||||
PrioritizedFields,
|
||||
SemanticSettings,
|
||||
)
|
||||
|
||||
NEW_VERSION = False
|
||||
|
||||
default_fields = default_fields or []
|
||||
if key is None:
|
||||
credential = DefaultAzureCredential()
|
||||
@ -155,77 +141,55 @@ def _get_search_client(
|
||||
fields = default_fields
|
||||
# Vector search configuration
|
||||
if vector_search is None:
|
||||
if NEW_VERSION:
|
||||
# >= 11.4.0:
|
||||
# VectorSearch(algorithm_configuration) --> VectorSearch(algorithms)
|
||||
# HnswVectorSearchAlgorithmConfiguration --> HnswAlgorithmConfiguration
|
||||
vector_search = VectorSearch(
|
||||
algorithms=[
|
||||
HnswAlgorithmConfiguration(
|
||||
name="default",
|
||||
kind="hnsw",
|
||||
parameters={ # type: ignore
|
||||
"m": 4,
|
||||
"efConstruction": 400,
|
||||
"efSearch": 500,
|
||||
"metric": "cosine",
|
||||
},
|
||||
)
|
||||
]
|
||||
)
|
||||
else: # < 11.4.0
|
||||
vector_search = VectorSearch(
|
||||
algorithm_configurations=[
|
||||
HnswVectorSearchAlgorithmConfiguration(
|
||||
name="default",
|
||||
kind="hnsw",
|
||||
parameters={ # type: ignore
|
||||
"m": 4,
|
||||
"efConstruction": 400,
|
||||
"efSearch": 500,
|
||||
"metric": "cosine",
|
||||
},
|
||||
)
|
||||
]
|
||||
kind=VectorSearchAlgorithmKind.HNSW,
|
||||
parameters=HnswParameters(
|
||||
m=4,
|
||||
ef_construction=400,
|
||||
ef_search=500,
|
||||
metric=VectorSearchAlgorithmMetric.COSINE,
|
||||
),
|
||||
),
|
||||
ExhaustiveKnnAlgorithmConfiguration(
|
||||
name="default_exhaustive_knn",
|
||||
kind=VectorSearchAlgorithmKind.EXHAUSTIVE_KNN,
|
||||
parameters=ExhaustiveKnnParameters(
|
||||
metric=VectorSearchAlgorithmMetric.COSINE
|
||||
),
|
||||
),
|
||||
],
|
||||
profiles=[
|
||||
VectorSearchProfile(
|
||||
name="myHnswProfile",
|
||||
algorithm_configuration_name="default",
|
||||
),
|
||||
VectorSearchProfile(
|
||||
name="myExhaustiveKnnProfile",
|
||||
algorithm_configuration_name="default_exhaustive_knn",
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
# Create the semantic settings with the configuration
|
||||
if semantic_settings is None and semantic_configuration_name is not None:
|
||||
if NEW_VERSION:
|
||||
# <=11.4.0: SemanticSettings --> SemanticSearch
|
||||
# PrioritizedFields(prioritized_content_fields)
|
||||
# --> SemanticPrioritizedFields(content_fields)
|
||||
semantic_settings = SemanticSearch(
|
||||
configurations=[
|
||||
SemanticConfiguration(
|
||||
semantic_search = None
|
||||
if semantic_configurations is None and semantic_configuration_name is not None:
|
||||
semantic_configuration = SemanticConfiguration(
|
||||
name=semantic_configuration_name,
|
||||
prioritized_fields=SemanticPrioritizedFields(
|
||||
content_fields=[
|
||||
SemanticField(field_name=FIELDS_CONTENT)
|
||||
],
|
||||
content_fields=[SemanticField(field_name=FIELDS_CONTENT)],
|
||||
),
|
||||
)
|
||||
]
|
||||
)
|
||||
else: # < 11.4.0
|
||||
semantic_settings = SemanticSettings(
|
||||
configurations=[
|
||||
SemanticConfiguration(
|
||||
name=semantic_configuration_name,
|
||||
prioritized_fields=PrioritizedFields(
|
||||
prioritized_content_fields=[
|
||||
SemanticField(field_name=FIELDS_CONTENT)
|
||||
],
|
||||
),
|
||||
)
|
||||
]
|
||||
)
|
||||
semantic_search = SemanticSearch(configurations=[semantic_configuration])
|
||||
|
||||
# Create the search index with the semantic settings and vector search
|
||||
index = SearchIndex(
|
||||
name=index_name,
|
||||
fields=fields,
|
||||
vector_search=vector_search,
|
||||
semantic_settings=semantic_settings,
|
||||
semantic_search=semantic_search,
|
||||
scoring_profiles=scoring_profiles,
|
||||
default_scoring_profile=default_scoring_profile,
|
||||
cors_options=cors_options,
|
||||
@ -251,10 +215,9 @@ class AzureSearch(VectorStore):
|
||||
embedding_function: Union[Callable, Embeddings],
|
||||
search_type: str = "hybrid",
|
||||
semantic_configuration_name: Optional[str] = None,
|
||||
semantic_query_language: str = "en-us",
|
||||
fields: Optional[List[SearchField]] = None,
|
||||
vector_search: Optional[VectorSearch] = None,
|
||||
semantic_settings: Optional[Union[SemanticSearch, SemanticSettings]] = None,
|
||||
semantic_configurations: Optional[SemanticConfiguration] = None,
|
||||
scoring_profiles: Optional[List[ScoringProfile]] = None,
|
||||
default_scoring_profile: Optional[str] = None,
|
||||
cors_options: Optional[CorsOptions] = None,
|
||||
@ -309,7 +272,7 @@ class AzureSearch(VectorStore):
|
||||
semantic_configuration_name=semantic_configuration_name,
|
||||
fields=fields,
|
||||
vector_search=vector_search,
|
||||
semantic_settings=semantic_settings,
|
||||
semantic_configurations=semantic_configurations,
|
||||
scoring_profiles=scoring_profiles,
|
||||
default_scoring_profile=default_scoring_profile,
|
||||
default_fields=default_fields,
|
||||
@ -318,7 +281,6 @@ class AzureSearch(VectorStore):
|
||||
)
|
||||
self.search_type = search_type
|
||||
self.semantic_configuration_name = semantic_configuration_name
|
||||
self.semantic_query_language = semantic_query_language
|
||||
self.fields = fields if fields else default_fields
|
||||
|
||||
@property
|
||||
@ -451,39 +413,30 @@ class AzureSearch(VectorStore):
|
||||
Returns:
|
||||
List of Documents most similar to the query and score for each
|
||||
"""
|
||||
from azure.search.documents.models import Vector
|
||||
|
||||
from azure.search.documents.models import VectorizedQuery
|
||||
|
||||
results = self.client.search(
|
||||
search_text="",
|
||||
vectors=[
|
||||
Vector(
|
||||
value=np.array(self.embed_query(query), dtype=np.float32).tolist(),
|
||||
k=k,
|
||||
vector_queries=[
|
||||
VectorizedQuery(
|
||||
vector=np.array(self.embed_query(query), dtype=np.float32).tolist(),
|
||||
k_nearest_neighbors=k,
|
||||
fields=FIELDS_CONTENT_VECTOR,
|
||||
)
|
||||
],
|
||||
filter=filters,
|
||||
top=k,
|
||||
)
|
||||
# Convert results to Document objects
|
||||
docs = [
|
||||
(
|
||||
Document(
|
||||
page_content=result.pop(FIELDS_CONTENT),
|
||||
metadata={
|
||||
**(
|
||||
{FIELDS_ID: result.pop(FIELDS_ID)}
|
||||
if FIELDS_ID in result
|
||||
else {}
|
||||
),
|
||||
**(
|
||||
json.loads(result[FIELDS_METADATA])
|
||||
metadata=json.loads(result[FIELDS_METADATA])
|
||||
if FIELDS_METADATA in result
|
||||
else {
|
||||
k: v
|
||||
for k, v in result.items()
|
||||
if k != FIELDS_CONTENT_VECTOR
|
||||
}
|
||||
),
|
||||
k: v for k, v in result.items() if k != FIELDS_CONTENT_VECTOR
|
||||
},
|
||||
),
|
||||
float(result["@search.score"]),
|
||||
@ -520,14 +473,14 @@ class AzureSearch(VectorStore):
|
||||
Returns:
|
||||
List of Documents most similar to the query and score for each
|
||||
"""
|
||||
from azure.search.documents.models import Vector
|
||||
from azure.search.documents.models import VectorizedQuery
|
||||
|
||||
results = self.client.search(
|
||||
search_text=query,
|
||||
vectors=[
|
||||
Vector(
|
||||
value=np.array(self.embed_query(query), dtype=np.float32).tolist(),
|
||||
k=k,
|
||||
vector_queries=[
|
||||
VectorizedQuery(
|
||||
vector=np.array(self.embed_query(query), dtype=np.float32).tolist(),
|
||||
k_nearest_neighbors=k,
|
||||
fields=FIELDS_CONTENT_VECTOR,
|
||||
)
|
||||
],
|
||||
@ -539,21 +492,10 @@ class AzureSearch(VectorStore):
|
||||
(
|
||||
Document(
|
||||
page_content=result.pop(FIELDS_CONTENT),
|
||||
metadata={
|
||||
**(
|
||||
{FIELDS_ID: result.pop(FIELDS_ID)}
|
||||
if FIELDS_ID in result
|
||||
else {}
|
||||
),
|
||||
**(
|
||||
json.loads(result[FIELDS_METADATA])
|
||||
metadata=json.loads(result[FIELDS_METADATA])
|
||||
if FIELDS_METADATA in result
|
||||
else {
|
||||
k: v
|
||||
for k, v in result.items()
|
||||
if k != FIELDS_CONTENT_VECTOR
|
||||
}
|
||||
),
|
||||
k: v for k, v in result.items() if k != FIELDS_CONTENT_VECTOR
|
||||
},
|
||||
),
|
||||
float(result["@search.score"]),
|
||||
@ -610,20 +552,19 @@ class AzureSearch(VectorStore):
|
||||
Returns:
|
||||
List of Documents most similar to the query and score for each
|
||||
"""
|
||||
from azure.search.documents.models import Vector
|
||||
from azure.search.documents.models import VectorizedQuery
|
||||
|
||||
results = self.client.search(
|
||||
search_text=query,
|
||||
vectors=[
|
||||
Vector(
|
||||
value=np.array(self.embed_query(query), dtype=np.float32).tolist(),
|
||||
k=50,
|
||||
vector_queries=[
|
||||
VectorizedQuery(
|
||||
vector=np.array(self.embed_query(query), dtype=np.float32).tolist(),
|
||||
k_nearest_neighbors=k,
|
||||
fields=FIELDS_CONTENT_VECTOR,
|
||||
)
|
||||
],
|
||||
filter=filters,
|
||||
query_type="semantic",
|
||||
query_language=self.semantic_query_language,
|
||||
semantic_configuration_name=self.semantic_configuration_name,
|
||||
query_caption="extractive",
|
||||
query_answer="extractive",
|
||||
@ -643,11 +584,6 @@ class AzureSearch(VectorStore):
|
||||
Document(
|
||||
page_content=result.pop(FIELDS_CONTENT),
|
||||
metadata={
|
||||
**(
|
||||
{FIELDS_ID: result.pop(FIELDS_ID)}
|
||||
if FIELDS_ID in result
|
||||
else {}
|
||||
),
|
||||
**(
|
||||
json.loads(result[FIELDS_METADATA])
|
||||
if FIELDS_METADATA in result
|
||||
@ -667,9 +603,7 @@ class AzureSearch(VectorStore):
|
||||
if result.get("@search.captions")
|
||||
else {},
|
||||
"answers": semantic_answers_dict.get(
|
||||
json.loads(result[FIELDS_METADATA]).get("key")
|
||||
if FIELDS_METADATA in result
|
||||
else "",
|
||||
json.loads(result["metadata"]).get("key"),
|
||||
"",
|
||||
),
|
||||
},
|
||||
|
Loading…
Reference in New Issue
Block a user