From 567dc1e4228ec4baa6d04655df006e4428234d49 Mon Sep 17 00:00:00 2001
From: Renzo-vS <renzovanslooten@gmail.com>
Date: Wed, 20 Nov 2024 21:49:03 +0100
Subject: [PATCH] community: fix duplicate content (#28003)

Thank you for reading my first PR!

**Description:**
Deduplicate content in AzureSearch vectorstore.
Currently, by default, the content of the retrieval is placed both in
metadata and page_content of a Document.
This PR removes the content from metadata, and leaves it in
page_content.

**Issue:**:
Previously, the content was popped from result before metadata was
populated.
In #25828 , the order was changed which leads to a response with
duplicated content.
This was not the intention of that PR and seems undesirable.

Looking forward to seeing my contribution in the next version!

Cheers,
Renzo
---
 .../langchain_community/vectorstores/azuresearch.py         | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/libs/community/langchain_community/vectorstores/azuresearch.py b/libs/community/langchain_community/vectorstores/azuresearch.py
index 193f6fc680e..2a715b846f8 100644
--- a/libs/community/langchain_community/vectorstores/azuresearch.py
+++ b/libs/community/langchain_community/vectorstores/azuresearch.py
@@ -1798,7 +1798,9 @@ def _result_to_document(result: Dict) -> Document:
             fields_metadata = json.loads(result[FIELDS_METADATA])
     else:
         fields_metadata = {
-            key: value for key, value in result.items() if key != FIELDS_CONTENT_VECTOR
+            key: value
+            for key, value in result.items()
+            if key not in [FIELDS_CONTENT_VECTOR, FIELDS_CONTENT]
         }
     # IDs
     if FIELDS_ID in result:
@@ -1806,7 +1808,7 @@ def _result_to_document(result: Dict) -> Document:
     else:
         fields_id = {}
     return Document(
-        page_content=result.pop(FIELDS_CONTENT),
+        page_content=result[FIELDS_CONTENT],
         metadata={
             **fields_id,
             **fields_metadata,