mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-04 20:46:45 +00:00
Harrison/format agent instructions (#973)
Co-authored-by: Andrew White <white.d.andrew@gmail.com> Co-authored-by: Harrison Chase <harrisonchase@Harrisons-MBP.attlocal.net> Co-authored-by: Peng Qu <82029664+pengqu123@users.noreply.github.com>
This commit is contained in:
BIN
tests/integration_tests/examples/hello.pdf
Normal file
BIN
tests/integration_tests/examples/hello.pdf
Normal file
Binary file not shown.
19
tests/integration_tests/test_pdf_pagesplitter.py
Normal file
19
tests/integration_tests/test_pdf_pagesplitter.py
Normal file
@@ -0,0 +1,19 @@
|
||||
"""Test splitting with page numbers included."""
|
||||
import os
|
||||
|
||||
from langchain.document_loaders import PagedPDFSplitter
|
||||
from langchain.embeddings.openai import OpenAIEmbeddings
|
||||
from langchain.vectorstores import FAISS
|
||||
|
||||
|
||||
def test_pdf_pagesplitter() -> None:
|
||||
"""Test splitting with page numbers included."""
|
||||
script_dir = os.path.dirname(__file__)
|
||||
loader = PagedPDFSplitter(os.path.join(script_dir, "examples/hello.pdf"))
|
||||
docs = loader.load()
|
||||
assert "page" in docs[0].metadata
|
||||
assert "source" in docs[0].metadata
|
||||
|
||||
faiss_index = FAISS.from_documents(docs, OpenAIEmbeddings())
|
||||
docs = faiss_index.similarity_search("Complete this sentence: Hello", k=1)
|
||||
assert "Hello world" in docs[0].page_content
|
Reference in New Issue
Block a user