mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-03 19:57:51 +00:00
community[patch]: update AzureSearch class to work with azure-search-documents=11.4.0 (#15659)
- **Description:** Updates `libs/community/langchain_community/vectorstores/azuresearch.py` to support the stable version `azure-search-documents=11.4.0` - **Issue:** https://github.com/langchain-ai/langchain/issues/14534, https://github.com/langchain-ai/langchain/issues/15039, https://github.com/langchain-ai/langchain/issues/15355 - **Dependencies:** azure-search-documents>=11.4.0 --------- Co-authored-by: Clément Tamines <Skar0@users.noreply.github.com> Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
parent
e135dc70c3
commit
30af711c34
@ -37,14 +37,10 @@ if TYPE_CHECKING:
|
|||||||
CorsOptions,
|
CorsOptions,
|
||||||
ScoringProfile,
|
ScoringProfile,
|
||||||
SearchField,
|
SearchField,
|
||||||
|
SemanticConfiguration,
|
||||||
VectorSearch,
|
VectorSearch,
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
|
||||||
from azure.search.documents.indexes.models import SemanticSearch
|
|
||||||
except ImportError:
|
|
||||||
from azure.search.documents.indexes.models import SemanticSettings # <11.4.0
|
|
||||||
|
|
||||||
# Allow overriding field names for Azure Search
|
# Allow overriding field names for Azure Search
|
||||||
FIELDS_ID = get_from_env(
|
FIELDS_ID = get_from_env(
|
||||||
key="AZURESEARCH_FIELDS_ID", env_key="AZURESEARCH_FIELDS_ID", default="id"
|
key="AZURESEARCH_FIELDS_ID", env_key="AZURESEARCH_FIELDS_ID", default="id"
|
||||||
@ -73,7 +69,7 @@ def _get_search_client(
|
|||||||
semantic_configuration_name: Optional[str] = None,
|
semantic_configuration_name: Optional[str] = None,
|
||||||
fields: Optional[List[SearchField]] = None,
|
fields: Optional[List[SearchField]] = None,
|
||||||
vector_search: Optional[VectorSearch] = None,
|
vector_search: Optional[VectorSearch] = None,
|
||||||
semantic_settings: Optional[Union[SemanticSearch, SemanticSettings]] = None,
|
semantic_configurations: Optional[SemanticConfiguration] = None,
|
||||||
scoring_profiles: Optional[List[ScoringProfile]] = None,
|
scoring_profiles: Optional[List[ScoringProfile]] = None,
|
||||||
default_scoring_profile: Optional[str] = None,
|
default_scoring_profile: Optional[str] = None,
|
||||||
default_fields: Optional[List[SearchField]] = None,
|
default_fields: Optional[List[SearchField]] = None,
|
||||||
@ -86,30 +82,20 @@ def _get_search_client(
|
|||||||
from azure.search.documents import SearchClient
|
from azure.search.documents import SearchClient
|
||||||
from azure.search.documents.indexes import SearchIndexClient
|
from azure.search.documents.indexes import SearchIndexClient
|
||||||
from azure.search.documents.indexes.models import (
|
from azure.search.documents.indexes.models import (
|
||||||
|
ExhaustiveKnnAlgorithmConfiguration,
|
||||||
|
ExhaustiveKnnParameters,
|
||||||
|
HnswAlgorithmConfiguration,
|
||||||
|
HnswParameters,
|
||||||
SearchIndex,
|
SearchIndex,
|
||||||
SemanticConfiguration,
|
SemanticConfiguration,
|
||||||
SemanticField,
|
SemanticField,
|
||||||
VectorSearch,
|
SemanticPrioritizedFields,
|
||||||
|
SemanticSearch,
|
||||||
|
VectorSearchAlgorithmKind,
|
||||||
|
VectorSearchAlgorithmMetric,
|
||||||
|
VectorSearchProfile,
|
||||||
)
|
)
|
||||||
|
|
||||||
# class names changed for versions >= 11.4.0
|
|
||||||
try:
|
|
||||||
from azure.search.documents.indexes.models import (
|
|
||||||
HnswAlgorithmConfiguration, # HnswVectorSearchAlgorithmConfiguration is old
|
|
||||||
SemanticPrioritizedFields, # PrioritizedFields outdated
|
|
||||||
SemanticSearch, # SemanticSettings outdated
|
|
||||||
)
|
|
||||||
|
|
||||||
NEW_VERSION = True
|
|
||||||
except ImportError:
|
|
||||||
from azure.search.documents.indexes.models import (
|
|
||||||
HnswVectorSearchAlgorithmConfiguration,
|
|
||||||
PrioritizedFields,
|
|
||||||
SemanticSettings,
|
|
||||||
)
|
|
||||||
|
|
||||||
NEW_VERSION = False
|
|
||||||
|
|
||||||
default_fields = default_fields or []
|
default_fields = default_fields or []
|
||||||
if key is None:
|
if key is None:
|
||||||
credential = DefaultAzureCredential()
|
credential = DefaultAzureCredential()
|
||||||
@ -155,77 +141,55 @@ def _get_search_client(
|
|||||||
fields = default_fields
|
fields = default_fields
|
||||||
# Vector search configuration
|
# Vector search configuration
|
||||||
if vector_search is None:
|
if vector_search is None:
|
||||||
if NEW_VERSION:
|
vector_search = VectorSearch(
|
||||||
# >= 11.4.0:
|
algorithms=[
|
||||||
# VectorSearch(algorithm_configuration) --> VectorSearch(algorithms)
|
HnswAlgorithmConfiguration(
|
||||||
# HnswVectorSearchAlgorithmConfiguration --> HnswAlgorithmConfiguration
|
name="default",
|
||||||
vector_search = VectorSearch(
|
kind=VectorSearchAlgorithmKind.HNSW,
|
||||||
algorithms=[
|
parameters=HnswParameters(
|
||||||
HnswAlgorithmConfiguration(
|
m=4,
|
||||||
name="default",
|
ef_construction=400,
|
||||||
kind="hnsw",
|
ef_search=500,
|
||||||
parameters={ # type: ignore
|
metric=VectorSearchAlgorithmMetric.COSINE,
|
||||||
"m": 4,
|
),
|
||||||
"efConstruction": 400,
|
),
|
||||||
"efSearch": 500,
|
ExhaustiveKnnAlgorithmConfiguration(
|
||||||
"metric": "cosine",
|
name="default_exhaustive_knn",
|
||||||
},
|
kind=VectorSearchAlgorithmKind.EXHAUSTIVE_KNN,
|
||||||
)
|
parameters=ExhaustiveKnnParameters(
|
||||||
]
|
metric=VectorSearchAlgorithmMetric.COSINE
|
||||||
)
|
),
|
||||||
else: # < 11.4.0
|
),
|
||||||
vector_search = VectorSearch(
|
],
|
||||||
algorithm_configurations=[
|
profiles=[
|
||||||
HnswVectorSearchAlgorithmConfiguration(
|
VectorSearchProfile(
|
||||||
name="default",
|
name="myHnswProfile",
|
||||||
kind="hnsw",
|
algorithm_configuration_name="default",
|
||||||
parameters={ # type: ignore
|
),
|
||||||
"m": 4,
|
VectorSearchProfile(
|
||||||
"efConstruction": 400,
|
name="myExhaustiveKnnProfile",
|
||||||
"efSearch": 500,
|
algorithm_configuration_name="default_exhaustive_knn",
|
||||||
"metric": "cosine",
|
),
|
||||||
},
|
],
|
||||||
)
|
)
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
# Create the semantic settings with the configuration
|
# Create the semantic settings with the configuration
|
||||||
if semantic_settings is None and semantic_configuration_name is not None:
|
semantic_search = None
|
||||||
if NEW_VERSION:
|
if semantic_configurations is None and semantic_configuration_name is not None:
|
||||||
# <=11.4.0: SemanticSettings --> SemanticSearch
|
semantic_configuration = SemanticConfiguration(
|
||||||
# PrioritizedFields(prioritized_content_fields)
|
name=semantic_configuration_name,
|
||||||
# --> SemanticPrioritizedFields(content_fields)
|
prioritized_fields=SemanticPrioritizedFields(
|
||||||
semantic_settings = SemanticSearch(
|
content_fields=[SemanticField(field_name=FIELDS_CONTENT)],
|
||||||
configurations=[
|
),
|
||||||
SemanticConfiguration(
|
)
|
||||||
name=semantic_configuration_name,
|
semantic_search = SemanticSearch(configurations=[semantic_configuration])
|
||||||
prioritized_fields=SemanticPrioritizedFields(
|
|
||||||
content_fields=[
|
|
||||||
SemanticField(field_name=FIELDS_CONTENT)
|
|
||||||
],
|
|
||||||
),
|
|
||||||
)
|
|
||||||
]
|
|
||||||
)
|
|
||||||
else: # < 11.4.0
|
|
||||||
semantic_settings = SemanticSettings(
|
|
||||||
configurations=[
|
|
||||||
SemanticConfiguration(
|
|
||||||
name=semantic_configuration_name,
|
|
||||||
prioritized_fields=PrioritizedFields(
|
|
||||||
prioritized_content_fields=[
|
|
||||||
SemanticField(field_name=FIELDS_CONTENT)
|
|
||||||
],
|
|
||||||
),
|
|
||||||
)
|
|
||||||
]
|
|
||||||
)
|
|
||||||
# Create the search index with the semantic settings and vector search
|
# Create the search index with the semantic settings and vector search
|
||||||
index = SearchIndex(
|
index = SearchIndex(
|
||||||
name=index_name,
|
name=index_name,
|
||||||
fields=fields,
|
fields=fields,
|
||||||
vector_search=vector_search,
|
vector_search=vector_search,
|
||||||
semantic_settings=semantic_settings,
|
semantic_search=semantic_search,
|
||||||
scoring_profiles=scoring_profiles,
|
scoring_profiles=scoring_profiles,
|
||||||
default_scoring_profile=default_scoring_profile,
|
default_scoring_profile=default_scoring_profile,
|
||||||
cors_options=cors_options,
|
cors_options=cors_options,
|
||||||
@ -251,10 +215,9 @@ class AzureSearch(VectorStore):
|
|||||||
embedding_function: Union[Callable, Embeddings],
|
embedding_function: Union[Callable, Embeddings],
|
||||||
search_type: str = "hybrid",
|
search_type: str = "hybrid",
|
||||||
semantic_configuration_name: Optional[str] = None,
|
semantic_configuration_name: Optional[str] = None,
|
||||||
semantic_query_language: str = "en-us",
|
|
||||||
fields: Optional[List[SearchField]] = None,
|
fields: Optional[List[SearchField]] = None,
|
||||||
vector_search: Optional[VectorSearch] = None,
|
vector_search: Optional[VectorSearch] = None,
|
||||||
semantic_settings: Optional[Union[SemanticSearch, SemanticSettings]] = None,
|
semantic_configurations: Optional[SemanticConfiguration] = None,
|
||||||
scoring_profiles: Optional[List[ScoringProfile]] = None,
|
scoring_profiles: Optional[List[ScoringProfile]] = None,
|
||||||
default_scoring_profile: Optional[str] = None,
|
default_scoring_profile: Optional[str] = None,
|
||||||
cors_options: Optional[CorsOptions] = None,
|
cors_options: Optional[CorsOptions] = None,
|
||||||
@ -309,7 +272,7 @@ class AzureSearch(VectorStore):
|
|||||||
semantic_configuration_name=semantic_configuration_name,
|
semantic_configuration_name=semantic_configuration_name,
|
||||||
fields=fields,
|
fields=fields,
|
||||||
vector_search=vector_search,
|
vector_search=vector_search,
|
||||||
semantic_settings=semantic_settings,
|
semantic_configurations=semantic_configurations,
|
||||||
scoring_profiles=scoring_profiles,
|
scoring_profiles=scoring_profiles,
|
||||||
default_scoring_profile=default_scoring_profile,
|
default_scoring_profile=default_scoring_profile,
|
||||||
default_fields=default_fields,
|
default_fields=default_fields,
|
||||||
@ -318,7 +281,6 @@ class AzureSearch(VectorStore):
|
|||||||
)
|
)
|
||||||
self.search_type = search_type
|
self.search_type = search_type
|
||||||
self.semantic_configuration_name = semantic_configuration_name
|
self.semantic_configuration_name = semantic_configuration_name
|
||||||
self.semantic_query_language = semantic_query_language
|
|
||||||
self.fields = fields if fields else default_fields
|
self.fields = fields if fields else default_fields
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@ -451,39 +413,30 @@ class AzureSearch(VectorStore):
|
|||||||
Returns:
|
Returns:
|
||||||
List of Documents most similar to the query and score for each
|
List of Documents most similar to the query and score for each
|
||||||
"""
|
"""
|
||||||
from azure.search.documents.models import Vector
|
|
||||||
|
from azure.search.documents.models import VectorizedQuery
|
||||||
|
|
||||||
results = self.client.search(
|
results = self.client.search(
|
||||||
search_text="",
|
search_text="",
|
||||||
vectors=[
|
vector_queries=[
|
||||||
Vector(
|
VectorizedQuery(
|
||||||
value=np.array(self.embed_query(query), dtype=np.float32).tolist(),
|
vector=np.array(self.embed_query(query), dtype=np.float32).tolist(),
|
||||||
k=k,
|
k_nearest_neighbors=k,
|
||||||
fields=FIELDS_CONTENT_VECTOR,
|
fields=FIELDS_CONTENT_VECTOR,
|
||||||
)
|
)
|
||||||
],
|
],
|
||||||
filter=filters,
|
filter=filters,
|
||||||
|
top=k,
|
||||||
)
|
)
|
||||||
# Convert results to Document objects
|
# Convert results to Document objects
|
||||||
docs = [
|
docs = [
|
||||||
(
|
(
|
||||||
Document(
|
Document(
|
||||||
page_content=result.pop(FIELDS_CONTENT),
|
page_content=result.pop(FIELDS_CONTENT),
|
||||||
metadata={
|
metadata=json.loads(result[FIELDS_METADATA])
|
||||||
**(
|
if FIELDS_METADATA in result
|
||||||
{FIELDS_ID: result.pop(FIELDS_ID)}
|
else {
|
||||||
if FIELDS_ID in result
|
k: v for k, v in result.items() if k != FIELDS_CONTENT_VECTOR
|
||||||
else {}
|
|
||||||
),
|
|
||||||
**(
|
|
||||||
json.loads(result[FIELDS_METADATA])
|
|
||||||
if FIELDS_METADATA in result
|
|
||||||
else {
|
|
||||||
k: v
|
|
||||||
for k, v in result.items()
|
|
||||||
if k != FIELDS_CONTENT_VECTOR
|
|
||||||
}
|
|
||||||
),
|
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
float(result["@search.score"]),
|
float(result["@search.score"]),
|
||||||
@ -520,14 +473,14 @@ class AzureSearch(VectorStore):
|
|||||||
Returns:
|
Returns:
|
||||||
List of Documents most similar to the query and score for each
|
List of Documents most similar to the query and score for each
|
||||||
"""
|
"""
|
||||||
from azure.search.documents.models import Vector
|
from azure.search.documents.models import VectorizedQuery
|
||||||
|
|
||||||
results = self.client.search(
|
results = self.client.search(
|
||||||
search_text=query,
|
search_text=query,
|
||||||
vectors=[
|
vector_queries=[
|
||||||
Vector(
|
VectorizedQuery(
|
||||||
value=np.array(self.embed_query(query), dtype=np.float32).tolist(),
|
vector=np.array(self.embed_query(query), dtype=np.float32).tolist(),
|
||||||
k=k,
|
k_nearest_neighbors=k,
|
||||||
fields=FIELDS_CONTENT_VECTOR,
|
fields=FIELDS_CONTENT_VECTOR,
|
||||||
)
|
)
|
||||||
],
|
],
|
||||||
@ -539,21 +492,10 @@ class AzureSearch(VectorStore):
|
|||||||
(
|
(
|
||||||
Document(
|
Document(
|
||||||
page_content=result.pop(FIELDS_CONTENT),
|
page_content=result.pop(FIELDS_CONTENT),
|
||||||
metadata={
|
metadata=json.loads(result[FIELDS_METADATA])
|
||||||
**(
|
if FIELDS_METADATA in result
|
||||||
{FIELDS_ID: result.pop(FIELDS_ID)}
|
else {
|
||||||
if FIELDS_ID in result
|
k: v for k, v in result.items() if k != FIELDS_CONTENT_VECTOR
|
||||||
else {}
|
|
||||||
),
|
|
||||||
**(
|
|
||||||
json.loads(result[FIELDS_METADATA])
|
|
||||||
if FIELDS_METADATA in result
|
|
||||||
else {
|
|
||||||
k: v
|
|
||||||
for k, v in result.items()
|
|
||||||
if k != FIELDS_CONTENT_VECTOR
|
|
||||||
}
|
|
||||||
),
|
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
float(result["@search.score"]),
|
float(result["@search.score"]),
|
||||||
@ -610,20 +552,19 @@ class AzureSearch(VectorStore):
|
|||||||
Returns:
|
Returns:
|
||||||
List of Documents most similar to the query and score for each
|
List of Documents most similar to the query and score for each
|
||||||
"""
|
"""
|
||||||
from azure.search.documents.models import Vector
|
from azure.search.documents.models import VectorizedQuery
|
||||||
|
|
||||||
results = self.client.search(
|
results = self.client.search(
|
||||||
search_text=query,
|
search_text=query,
|
||||||
vectors=[
|
vector_queries=[
|
||||||
Vector(
|
VectorizedQuery(
|
||||||
value=np.array(self.embed_query(query), dtype=np.float32).tolist(),
|
vector=np.array(self.embed_query(query), dtype=np.float32).tolist(),
|
||||||
k=50,
|
k_nearest_neighbors=k,
|
||||||
fields=FIELDS_CONTENT_VECTOR,
|
fields=FIELDS_CONTENT_VECTOR,
|
||||||
)
|
)
|
||||||
],
|
],
|
||||||
filter=filters,
|
filter=filters,
|
||||||
query_type="semantic",
|
query_type="semantic",
|
||||||
query_language=self.semantic_query_language,
|
|
||||||
semantic_configuration_name=self.semantic_configuration_name,
|
semantic_configuration_name=self.semantic_configuration_name,
|
||||||
query_caption="extractive",
|
query_caption="extractive",
|
||||||
query_answer="extractive",
|
query_answer="extractive",
|
||||||
@ -643,11 +584,6 @@ class AzureSearch(VectorStore):
|
|||||||
Document(
|
Document(
|
||||||
page_content=result.pop(FIELDS_CONTENT),
|
page_content=result.pop(FIELDS_CONTENT),
|
||||||
metadata={
|
metadata={
|
||||||
**(
|
|
||||||
{FIELDS_ID: result.pop(FIELDS_ID)}
|
|
||||||
if FIELDS_ID in result
|
|
||||||
else {}
|
|
||||||
),
|
|
||||||
**(
|
**(
|
||||||
json.loads(result[FIELDS_METADATA])
|
json.loads(result[FIELDS_METADATA])
|
||||||
if FIELDS_METADATA in result
|
if FIELDS_METADATA in result
|
||||||
@ -667,9 +603,7 @@ class AzureSearch(VectorStore):
|
|||||||
if result.get("@search.captions")
|
if result.get("@search.captions")
|
||||||
else {},
|
else {},
|
||||||
"answers": semantic_answers_dict.get(
|
"answers": semantic_answers_dict.get(
|
||||||
json.loads(result[FIELDS_METADATA]).get("key")
|
json.loads(result["metadata"]).get("key"),
|
||||||
if FIELDS_METADATA in result
|
|
||||||
else "",
|
|
||||||
"",
|
"",
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
|
Loading…
Reference in New Issue
Block a user