Harrison/format agent instructions (#973)

Co-authored-by: Andrew White <white.d.andrew@gmail.com>
Co-authored-by: Harrison Chase <harrisonchase@Harrisons-MBP.attlocal.net>
Co-authored-by: Peng Qu <82029664+pengqu123@users.noreply.github.com>
This commit is contained in:
Harrison Chase
2023-02-10 10:07:26 -08:00
committed by GitHub
parent 5469d898a9
commit c64f98e2bb
10 changed files with 441 additions and 187 deletions

Binary file not shown.

View File

@@ -0,0 +1,19 @@
"""Test splitting with page numbers included."""
import os
from langchain.document_loaders import PagedPDFSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
def test_pdf_pagesplitter() -> None:
"""Test splitting with page numbers included."""
script_dir = os.path.dirname(__file__)
loader = PagedPDFSplitter(os.path.join(script_dir, "examples/hello.pdf"))
docs = loader.load()
assert "page" in docs[0].metadata
assert "source" in docs[0].metadata
faiss_index = FAISS.from_documents(docs, OpenAIEmbeddings())
docs = faiss_index.similarity_search("Complete this sentence: Hello", k=1)
assert "Hello world" in docs[0].page_content