From df234fb171fba636d4efc4c1e11639e8ca70f480 Mon Sep 17 00:00:00 2001 From: Kai Kugler Date: Thu, 29 Feb 2024 23:54:37 +0100 Subject: [PATCH] community[patch]: Fixing embedchain document mapping (#18255) - **Description:** The current embedchain implementation seems to handle document metadata differently than done in the current implementation of langchain and a KeyError is thrown. I would love for someone else to test this... --------- Co-authored-by: KKUGLER Co-authored-by: Harrison Chase Co-authored-by: Deshraj Yadav --- libs/community/langchain_community/retrievers/embedchain.py | 5 ++++- .../tests/integration_tests/retrievers/test_embedchain.py | 6 ++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/libs/community/langchain_community/retrievers/embedchain.py b/libs/community/langchain_community/retrievers/embedchain.py index 0fae1b0f65a..9c64f628e1e 100644 --- a/libs/community/langchain_community/retrievers/embedchain.py +++ b/libs/community/langchain_community/retrievers/embedchain.py @@ -65,7 +65,10 @@ class EmbedchainRetriever(BaseRetriever): docs.append( Document( page_content=r["context"], - metadata={"source": r["source"], "document_id": r["document_id"]}, + metadata={ + "source": r["metadata"]["url"], + "document_id": r["metadata"]["doc_id"], + }, ) ) return docs diff --git a/libs/community/tests/integration_tests/retrievers/test_embedchain.py b/libs/community/tests/integration_tests/retrievers/test_embedchain.py index 1049616ef21..dc58938db17 100644 --- a/libs/community/tests/integration_tests/retrievers/test_embedchain.py +++ b/libs/community/tests/integration_tests/retrievers/test_embedchain.py @@ -19,8 +19,10 @@ os.environ["OPENAI_API_KEY"] = "sk-xxxx" context_value = [ { "context": "this document is about John", - "source": "source#1", - "document_id": 123, + "metadata": { + "source": "source#1", + "doc_id": 123, + }, }, ]