mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-01 02:50:47 +00:00
wip: add method for both docstore and embeddings (#119)
this will break atm but wanted to get thoughts on implementation. 1. should add() be on docstore interface? 2. should InMemoryDocstore change to take a list of documents as init? (makes this slightly easier to implement in FAISS -- if we think it is less clean then could expose a method to get the number of documents currently in the dict, and perform the logic of creating the necessary dictionary in the FAISS.add_texts method. Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
This commit is contained in:
committed by
GitHub
parent
e9baf9c134
commit
315b0c09c6
@@ -1,4 +1,5 @@
|
||||
"""Test in memory docstore."""
|
||||
import pytest
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.docstore.in_memory import InMemoryDocstore
|
||||
@@ -19,3 +20,37 @@ def test_document_not_found() -> None:
|
||||
docstore = InMemoryDocstore(_dict)
|
||||
output = docstore.search("bar")
|
||||
assert output == "ID bar not found."
|
||||
|
||||
|
||||
def test_adding_document() -> None:
|
||||
"""Test that documents are added correctly."""
|
||||
_dict = {"foo": Document(page_content="bar")}
|
||||
docstore = InMemoryDocstore(_dict)
|
||||
new_dict = {"bar": Document(page_content="foo")}
|
||||
docstore.add(new_dict)
|
||||
|
||||
# Test that you can find new document.
|
||||
foo_output = docstore.search("bar")
|
||||
assert isinstance(foo_output, Document)
|
||||
assert foo_output.page_content == "foo"
|
||||
|
||||
# Test that old document is the same.
|
||||
bar_output = docstore.search("foo")
|
||||
assert isinstance(bar_output, Document)
|
||||
assert bar_output.page_content == "bar"
|
||||
|
||||
|
||||
def test_adding_document_already_exists() -> None:
|
||||
"""Test that error is raised if document id already exists."""
|
||||
_dict = {"foo": Document(page_content="bar")}
|
||||
docstore = InMemoryDocstore(_dict)
|
||||
new_dict = {"foo": Document(page_content="foo")}
|
||||
|
||||
# Test that error is raised.
|
||||
with pytest.raises(ValueError):
|
||||
docstore.add(new_dict)
|
||||
|
||||
# Test that old document is the same.
|
||||
bar_output = docstore.search("foo")
|
||||
assert isinstance(bar_output, Document)
|
||||
assert bar_output.page_content == "bar"
|
||||
|
Reference in New Issue
Block a user