mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-13 08:27:03 +00:00
Azure Search: Remove select field restrictions and expand metadata to other fields, also expose kwargs to searches (#9894)
Description: If metadata field returned in results, previous behavior unchanged. If metadata field does not exist in results, expand metadata to any fields returned outside of content field. There's precedence for this as well, see the retriever: https://github.com/langchain-ai/langchain/blob/master/libs/langchain/langchain/retrievers/azure_cognitive_search.py#L96C46-L96C46 Issue: #9765 - Ameliorates hard-coding in case you already indexed to cognitive search without a metadata field but rather placed metadata in separate fields. @hwchase17
This commit is contained in:
parent
94cf71ecfa
commit
4f19ba3065
@ -378,15 +378,18 @@ class AzureSearch(VectorStore):
|
||||
fields=FIELDS_CONTENT_VECTOR,
|
||||
)
|
||||
],
|
||||
select=[FIELDS_ID, FIELDS_CONTENT, FIELDS_METADATA],
|
||||
filter=filters,
|
||||
)
|
||||
# Convert results to Document objects
|
||||
docs = [
|
||||
(
|
||||
Document(
|
||||
page_content=result[FIELDS_CONTENT],
|
||||
metadata=json.loads(result[FIELDS_METADATA]),
|
||||
page_content=result.pop(FIELDS_CONTENT),
|
||||
metadata=json.loads(result[FIELDS_METADATA])
|
||||
if FIELDS_METADATA in result
|
||||
else {
|
||||
k: v for k, v in result.items() if k != FIELDS_CONTENT_VECTOR
|
||||
},
|
||||
),
|
||||
float(result["@search.score"]),
|
||||
)
|
||||
@ -435,7 +438,6 @@ class AzureSearch(VectorStore):
|
||||
fields=FIELDS_CONTENT_VECTOR,
|
||||
)
|
||||
],
|
||||
select=[FIELDS_ID, FIELDS_CONTENT, FIELDS_METADATA],
|
||||
filter=filters,
|
||||
top=k,
|
||||
)
|
||||
@ -443,8 +445,12 @@ class AzureSearch(VectorStore):
|
||||
docs = [
|
||||
(
|
||||
Document(
|
||||
page_content=result[FIELDS_CONTENT],
|
||||
metadata=json.loads(result[FIELDS_METADATA]),
|
||||
page_content=result.pop(FIELDS_CONTENT),
|
||||
metadata=json.loads(result[FIELDS_METADATA])
|
||||
if FIELDS_METADATA in result
|
||||
else {
|
||||
k: v for k, v in result.items() if k != FIELDS_CONTENT_VECTOR
|
||||
},
|
||||
),
|
||||
float(result["@search.score"]),
|
||||
)
|
||||
@ -495,7 +501,6 @@ class AzureSearch(VectorStore):
|
||||
fields=FIELDS_CONTENT_VECTOR,
|
||||
)
|
||||
],
|
||||
select=[FIELDS_ID, FIELDS_CONTENT, FIELDS_METADATA],
|
||||
filter=filters,
|
||||
query_type="semantic",
|
||||
query_language=self.semantic_query_language,
|
||||
@ -516,9 +521,17 @@ class AzureSearch(VectorStore):
|
||||
docs = [
|
||||
(
|
||||
Document(
|
||||
page_content=result["content"],
|
||||
page_content=result.pop(FIELDS_CONTENT),
|
||||
metadata={
|
||||
**json.loads(result["metadata"]),
|
||||
**(
|
||||
json.loads(result[FIELDS_METADATA])
|
||||
if FIELDS_METADATA in result
|
||||
else {
|
||||
k: v
|
||||
for k, v in result.items()
|
||||
if k != FIELDS_CONTENT_VECTOR
|
||||
}
|
||||
),
|
||||
**{
|
||||
"captions": {
|
||||
"text": result.get("@search.captions", [{}])[0].text,
|
||||
@ -590,15 +603,15 @@ class AzureSearchVectorStoreRetriever(BaseRetriever):
|
||||
def _get_relevant_documents(
|
||||
self,
|
||||
query: str,
|
||||
*,
|
||||
run_manager: CallbackManagerForRetrieverRun,
|
||||
**kwargs: Any,
|
||||
) -> List[Document]:
|
||||
if self.search_type == "similarity":
|
||||
docs = self.vectorstore.vector_search(query, k=self.k)
|
||||
docs = self.vectorstore.vector_search(query, k=self.k, **kwargs)
|
||||
elif self.search_type == "hybrid":
|
||||
docs = self.vectorstore.hybrid_search(query, k=self.k)
|
||||
docs = self.vectorstore.hybrid_search(query, k=self.k, **kwargs)
|
||||
elif self.search_type == "semantic_hybrid":
|
||||
docs = self.vectorstore.semantic_hybrid_search(query, k=self.k)
|
||||
docs = self.vectorstore.semantic_hybrid_search(query, k=self.k, **kwargs)
|
||||
else:
|
||||
raise ValueError(f"search_type of {self.search_type} not allowed.")
|
||||
return docs
|
||||
|
Loading…
Reference in New Issue
Block a user