mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-12 12:59:07 +00:00
Azure Cognitive Search - update sdk b8, mod user agent, search with scores (#9191)
Description: Update Azure Cognitive Search SDK to version b8 (breaking change) Customizable User Agent. Implemented Similarity search with scores @baskaryan --------- Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
@@ -73,6 +73,7 @@ def _get_search_client(
|
||||
scoring_profiles: Optional[List[ScoringProfile]] = None,
|
||||
default_scoring_profile: Optional[str] = None,
|
||||
default_fields: Optional[List[SearchField]] = None,
|
||||
user_agent: Optional[str] = "langchain",
|
||||
) -> SearchClient:
|
||||
from azure.core.credentials import AzureKeyCredential
|
||||
from azure.core.exceptions import ResourceNotFoundError
|
||||
@@ -80,13 +81,13 @@ def _get_search_client(
|
||||
from azure.search.documents import SearchClient
|
||||
from azure.search.documents.indexes import SearchIndexClient
|
||||
from azure.search.documents.indexes.models import (
|
||||
HnswVectorSearchAlgorithmConfiguration,
|
||||
PrioritizedFields,
|
||||
SearchIndex,
|
||||
SemanticConfiguration,
|
||||
SemanticField,
|
||||
SemanticSettings,
|
||||
VectorSearch,
|
||||
VectorSearchAlgorithmConfiguration,
|
||||
)
|
||||
|
||||
default_fields = default_fields or []
|
||||
@@ -95,7 +96,7 @@ def _get_search_client(
|
||||
else:
|
||||
credential = AzureKeyCredential(key)
|
||||
index_client: SearchIndexClient = SearchIndexClient(
|
||||
endpoint=endpoint, credential=credential, user_agent="langchain"
|
||||
endpoint=endpoint, credential=credential, user_agent=user_agent
|
||||
)
|
||||
try:
|
||||
index_client.get_index(name=index_name)
|
||||
@@ -130,10 +131,10 @@ def _get_search_client(
|
||||
if vector_search is None:
|
||||
vector_search = VectorSearch(
|
||||
algorithm_configurations=[
|
||||
VectorSearchAlgorithmConfiguration(
|
||||
HnswVectorSearchAlgorithmConfiguration(
|
||||
name="default",
|
||||
kind="hnsw",
|
||||
hnsw_parameters={ # type: ignore
|
||||
parameters={ # type: ignore
|
||||
"m": 4,
|
||||
"efConstruction": 400,
|
||||
"efSearch": 500,
|
||||
@@ -171,7 +172,7 @@ def _get_search_client(
|
||||
endpoint=endpoint,
|
||||
index_name=index_name,
|
||||
credential=credential,
|
||||
user_agent="langchain",
|
||||
user_agent=user_agent,
|
||||
)
|
||||
|
||||
|
||||
@@ -227,6 +228,9 @@ class AzureSearch(VectorStore):
|
||||
type=SearchFieldDataType.String,
|
||||
),
|
||||
]
|
||||
user_agent = "langchain"
|
||||
if "user_agent" in kwargs and kwargs["user_agent"]:
|
||||
user_agent += " " + kwargs["user_agent"]
|
||||
self.client = _get_search_client(
|
||||
azure_search_endpoint,
|
||||
azure_search_key,
|
||||
@@ -238,6 +242,7 @@ class AzureSearch(VectorStore):
|
||||
scoring_profiles=scoring_profiles,
|
||||
default_scoring_profile=default_scoring_profile,
|
||||
default_fields=default_fields,
|
||||
user_agent=user_agent,
|
||||
)
|
||||
self.search_type = search_type
|
||||
self.semantic_configuration_name = semantic_configuration_name
|
||||
@@ -321,6 +326,17 @@ class AzureSearch(VectorStore):
|
||||
raise ValueError(f"search_type of {search_type} not allowed.")
|
||||
return docs
|
||||
|
||||
def similarity_search_with_relevance_scores(
|
||||
self, query: str, k: int = 4, **kwargs: Any
|
||||
) -> List[Tuple[Document, float]]:
|
||||
score_threshold = kwargs.pop("score_threshold", None)
|
||||
result = self.vector_search_with_score(query, k=k, **kwargs)
|
||||
return (
|
||||
result
|
||||
if score_threshold is None
|
||||
else [r for r in result if r[1] >= score_threshold]
|
||||
)
|
||||
|
||||
def vector_search(self, query: str, k: int = 4, **kwargs: Any) -> List[Document]:
|
||||
"""
|
||||
Returns the most similar indexed documents to the query text.
|
||||
@@ -349,12 +365,19 @@ class AzureSearch(VectorStore):
|
||||
Returns:
|
||||
List of Documents most similar to the query and score for each
|
||||
"""
|
||||
from azure.search.documents.models import Vector
|
||||
|
||||
results = self.client.search(
|
||||
search_text="",
|
||||
vector=np.array(self.embedding_function(query), dtype=np.float32).tolist(),
|
||||
top_k=k,
|
||||
vector_fields=FIELDS_CONTENT_VECTOR,
|
||||
vectors=[
|
||||
Vector(
|
||||
value=np.array(
|
||||
self.embedding_function(query), dtype=np.float32
|
||||
).tolist(),
|
||||
k=k,
|
||||
fields=FIELDS_CONTENT_VECTOR,
|
||||
)
|
||||
],
|
||||
select=[FIELDS_ID, FIELDS_CONTENT, FIELDS_METADATA],
|
||||
filter=filters,
|
||||
)
|
||||
@@ -399,12 +422,19 @@ class AzureSearch(VectorStore):
|
||||
Returns:
|
||||
List of Documents most similar to the query and score for each
|
||||
"""
|
||||
from azure.search.documents.models import Vector
|
||||
|
||||
results = self.client.search(
|
||||
search_text=query,
|
||||
vector=np.array(self.embedding_function(query), dtype=np.float32).tolist(),
|
||||
top_k=k,
|
||||
vector_fields=FIELDS_CONTENT_VECTOR,
|
||||
vectors=[
|
||||
Vector(
|
||||
value=np.array(
|
||||
self.embedding_function(query), dtype=np.float32
|
||||
).tolist(),
|
||||
k=k,
|
||||
fields=FIELDS_CONTENT_VECTOR,
|
||||
)
|
||||
],
|
||||
select=[FIELDS_ID, FIELDS_CONTENT, FIELDS_METADATA],
|
||||
filter=filters,
|
||||
top=k,
|
||||
@@ -452,11 +482,19 @@ class AzureSearch(VectorStore):
|
||||
Returns:
|
||||
List of Documents most similar to the query and score for each
|
||||
"""
|
||||
from azure.search.documents.models import Vector
|
||||
|
||||
results = self.client.search(
|
||||
search_text=query,
|
||||
vector=np.array(self.embedding_function(query), dtype=np.float32).tolist(),
|
||||
top_k=50, # Hardcoded value to maximize L2 retrieval
|
||||
vector_fields=FIELDS_CONTENT_VECTOR,
|
||||
vectors=[
|
||||
Vector(
|
||||
value=np.array(
|
||||
self.embedding_function(query), dtype=np.float32
|
||||
).tolist(),
|
||||
k=50,
|
||||
fields=FIELDS_CONTENT_VECTOR,
|
||||
)
|
||||
],
|
||||
select=[FIELDS_ID, FIELDS_CONTENT, FIELDS_METADATA],
|
||||
filter=filters,
|
||||
query_type="semantic",
|
||||
|
8
libs/langchain/poetry.lock
generated
8
libs/langchain/poetry.lock
generated
@@ -719,13 +719,13 @@ msal-extensions = ">=0.3.0,<2.0.0"
|
||||
|
||||
[[package]]
|
||||
name = "azure-search-documents"
|
||||
version = "11.4.0b6"
|
||||
version = "11.4.0b8"
|
||||
description = "Microsoft Azure Cognitive Search Client Library for Python"
|
||||
optional = true
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "azure-search-documents-11.4.0b6.zip", hash = "sha256:c9ebd7d99d3c7b879f48acad66141e1f50eae4468cfb8389a4b25d4c620e8df1"},
|
||||
{file = "azure_search_documents-11.4.0b6-py3-none-any.whl", hash = "sha256:24ff85bf2680c36b38d8092bcbbe2d90699aac7c4a228b0839c0ce595a41628c"},
|
||||
{file = "azure-search-documents-11.4.0b8.zip", hash = "sha256:b178ff52918590191a9cb7f411a9ab3cb517663666a501a3e84b715d19b0d93b"},
|
||||
{file = "azure_search_documents-11.4.0b8-py3-none-any.whl", hash = "sha256:4137daa2db75bff9484d394c16c0604822a51281cad2f50e11d7c48dd8d4b4cf"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
@@ -10447,4 +10447,4 @@ text-helpers = ["chardet"]
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = ">=3.8.1,<4.0"
|
||||
content-hash = "88e479307b19d991105360780f67ed3258ef1a0151f70b9e91c86c8153751e83"
|
||||
content-hash = "43a6bd42efc0baf917418087f788aaf3b1bc793cb4aa81de99c52ed6a7d54d26"
|
||||
|
@@ -105,7 +105,7 @@ nebula3-python = {version = "^3.4.0", optional = true}
|
||||
mwparserfromhell = {version = "^0.6.4", optional = true}
|
||||
mwxml = {version = "^0.3.3", optional = true}
|
||||
awadb = {version = "^0.3.9", optional = true}
|
||||
azure-search-documents = {version = "11.4.0b6", optional = true}
|
||||
azure-search-documents = {version = "11.4.0b8", optional = true}
|
||||
esprima = {version = "^4.0.1", optional = true}
|
||||
streamlit = {version = "^1.18.0", optional = true, python = ">=3.8.1,<3.9.7 || >3.9.7,<4.0"}
|
||||
psychicapi = {version = "^0.8.0", optional = true}
|
||||
|
Reference in New Issue
Block a user