Harrison/indexing pipeline (#1317)

This commit is contained in:
Harrison Chase
2023-02-27 00:31:36 -08:00
committed by GitHub
parent a0bf856c70
commit 0824d65a5c
4 changed files with 211 additions and 36 deletions

View File

@@ -30,29 +30,24 @@
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain.vectorstores import Chroma\n",
"from langchain.docstore.document import Document\n",
"from langchain.prompts import PromptTemplate"
"from langchain.prompts import PromptTemplate\n",
"from langchain.indexes.vectorstore import VectorstoreIndexCreator"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "291f0117",
"id": "ef9305cc",
"metadata": {},
"outputs": [],
"source": [
"from langchain.document_loaders import TextLoader\n",
"loader = TextLoader('../../state_of_the_union.txt')\n",
"documents = loader.load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"texts = text_splitter.split_documents(documents)\n",
"\n",
"embeddings = OpenAIEmbeddings()"
"index_creator = VectorstoreIndexCreator()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "fd9666a9",
"execution_count": 3,
"id": "291f0117",
"metadata": {},
"outputs": [
{
@@ -65,12 +60,14 @@
}
],
"source": [
"docsearch = Chroma.from_documents(texts, embeddings)"
"from langchain.document_loaders import TextLoader\n",
"loader = TextLoader('../../state_of_the_union.txt')\n",
"docsearch = index_creator.from_loaders([loader])"
]
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 4,
"id": "d1eaf6e6",
"metadata": {},
"outputs": [],