Harrison/improve data augmented generation docs (#390)

Co-authored-by: cameronccohen <cameron.c.cohen@gmail.com>
Co-authored-by: Cameron Cohen <cameron.cohen@quantco.com>
This commit is contained in:
Harrison Chase
2022-12-20 22:24:08 -05:00
committed by GitHub
parent ad4414b59f
commit c104d507bf
40 changed files with 1237 additions and 265 deletions

View File

@@ -1,6 +1,7 @@
"""Test text splitting functionality."""
import pytest
from langchain.docstore.document import Document
from langchain.text_splitter import CharacterTextSplitter
@@ -35,3 +36,29 @@ def test_character_text_splitting_args() -> None:
"""Test invalid arguments."""
with pytest.raises(ValueError):
CharacterTextSplitter(chunk_size=2, chunk_overlap=4)
def test_create_documents() -> None:
"""Test create documents method."""
texts = ["foo bar", "baz"]
splitter = CharacterTextSplitter(separator=" ", chunk_size=3, chunk_overlap=0)
docs = splitter.create_documents(texts)
expected_docs = [
Document(page_content="foo"),
Document(page_content="bar"),
Document(page_content="baz"),
]
assert docs == expected_docs
def test_create_documents_with_metadata() -> None:
"""Test create documents with metadata method."""
texts = ["foo bar", "baz"]
splitter = CharacterTextSplitter(separator=" ", chunk_size=3, chunk_overlap=0)
docs = splitter.create_documents(texts, [{"source": "1"}, {"source": "2"}])
expected_docs = [
Document(page_content="foo", metadata={"source": "1"}),
Document(page_content="bar", metadata={"source": "1"}),
Document(page_content="baz", metadata={"source": "2"}),
]
assert docs == expected_docs