mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-06 21:43:44 +00:00
Harrison/improve data augmented generation docs (#390)
Co-authored-by: cameronccohen <cameron.c.cohen@gmail.com> Co-authored-by: Cameron Cohen <cameron.cohen@quantco.com>
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
"""Test text splitting functionality."""
|
||||
import pytest
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.text_splitter import CharacterTextSplitter
|
||||
|
||||
|
||||
@@ -35,3 +36,29 @@ def test_character_text_splitting_args() -> None:
|
||||
"""Test invalid arguments."""
|
||||
with pytest.raises(ValueError):
|
||||
CharacterTextSplitter(chunk_size=2, chunk_overlap=4)
|
||||
|
||||
|
||||
def test_create_documents() -> None:
|
||||
"""Test create documents method."""
|
||||
texts = ["foo bar", "baz"]
|
||||
splitter = CharacterTextSplitter(separator=" ", chunk_size=3, chunk_overlap=0)
|
||||
docs = splitter.create_documents(texts)
|
||||
expected_docs = [
|
||||
Document(page_content="foo"),
|
||||
Document(page_content="bar"),
|
||||
Document(page_content="baz"),
|
||||
]
|
||||
assert docs == expected_docs
|
||||
|
||||
|
||||
def test_create_documents_with_metadata() -> None:
|
||||
"""Test create documents with metadata method."""
|
||||
texts = ["foo bar", "baz"]
|
||||
splitter = CharacterTextSplitter(separator=" ", chunk_size=3, chunk_overlap=0)
|
||||
docs = splitter.create_documents(texts, [{"source": "1"}, {"source": "2"}])
|
||||
expected_docs = [
|
||||
Document(page_content="foo", metadata={"source": "1"}),
|
||||
Document(page_content="bar", metadata={"source": "1"}),
|
||||
Document(page_content="baz", metadata={"source": "2"}),
|
||||
]
|
||||
assert docs == expected_docs
|
||||
|
Reference in New Issue
Block a user