community[minor]: Rememberizer retriever (#20052)

**Description:**
This pull request introduces a new feature for LangChain: the
integration with the Rememberizer API through a custom retriever.
This enables LangChain applications to allow users to load and sync
their data from Dropbox, Google Drive, Slack, their hard drive into a
vector database that LangChain can query. Queries involve sending text
chunks generated within LangChain and retrieving a collection of
semantically relevant user data for inclusion in LLM prompts.
User knowledge dramatically improved AI applications.
The Rememberizer integration will also allow users to access general
purpose vectorized data such as Reddit channel discussions and US
patents.

**Issue:**
N/A

**Dependencies:**
N/A

**Twitter handle:**
https://twitter.com/Rememberizer
This commit is contained in:
East Agile
2024-05-01 21:41:44 +07:00
committed by GitHub
parent 1ce1a10f2b
commit 2a6f78a53f
8 changed files with 368 additions and 0 deletions

View File

@@ -0,0 +1,75 @@
import unittest
from typing import Any
from unittest.mock import patch
import responses
from langchain_community.utilities import RememberizerAPIWrapper
class TestRememberizerAPIWrapper(unittest.TestCase):
@responses.activate
def test_search_successful(self) -> None:
responses.add(
responses.GET,
"https://api.rememberizer.ai/api/v1/documents/search?q=test&n=10",
json={
"matched_chunks": [
{
"chunk_id": "chunk",
"matched_content": "content",
"document": {"id": "id", "name": "name"},
}
]
},
)
wrapper = RememberizerAPIWrapper(rememberizer_api_key="dummy_key", n=10)
result = wrapper.search("test")
self.assertEqual(
result,
[
{
"chunk_id": "chunk",
"matched_content": "content",
"document": {"id": "id", "name": "name"},
}
],
)
@responses.activate
def test_search_fail(self) -> None:
responses.add(
responses.GET,
"https://api.rememberizer.ai/api/v1/documents/search?q=test&n=10",
status=400,
json={"detail": "Incorrect authentication credentials."},
)
wrapper = RememberizerAPIWrapper(rememberizer_api_key="dummy_key", n=10)
with self.assertRaises(ValueError) as e:
wrapper.search("test")
self.assertEqual(
str(e.exception),
"API Error: {'detail': 'Incorrect authentication credentials.'}",
)
@patch("langchain_community.utilities.rememberizer.RememberizerAPIWrapper.search")
def test_load(self, mock_search: Any) -> None:
mock_search.return_value = [
{
"chunk_id": "chunk1",
"matched_content": "content1",
"document": {"id": "id1", "name": "name1"},
},
{
"chunk_id": "chunk2",
"matched_content": "content2",
"document": {"id": "id2", "name": "name2"},
},
]
wrapper = RememberizerAPIWrapper(rememberizer_api_key="dummy_key", n=10)
result = wrapper.load("test")
self.assertEqual(len(result), 2)
self.assertEqual(result[0].page_content, "content1")
self.assertEqual(result[0].metadata, {"id": "id1", "name": "name1"})
self.assertEqual(result[1].page_content, "content2")
self.assertEqual(result[1].metadata, {"id": "id2", "name": "name2"})