Harrison/from documents (#3919)

Co-authored-by: Gabriel Altay <gabriel.altay@gmail.com>
This commit is contained in:
Harrison Chase 2023-05-01 20:28:14 -07:00 committed by GitHub
parent e7e29f9937
commit 05170b6764
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -9,6 +9,7 @@ from langchain.embeddings.base import Embeddings
from langchain.embeddings.openai import OpenAIEmbeddings from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms.base import BaseLLM from langchain.llms.base import BaseLLM
from langchain.llms.openai import OpenAI from langchain.llms.openai import OpenAI
from langchain.schemas import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter, TextSplitter from langchain.text_splitter import RecursiveCharacterTextSplitter, TextSplitter
from langchain.vectorstores.base import VectorStore from langchain.vectorstores.base import VectorStore
from langchain.vectorstores.chroma import Chroma from langchain.vectorstores.chroma import Chroma
@ -67,7 +68,11 @@ class VectorstoreIndexCreator(BaseModel):
docs = [] docs = []
for loader in loaders: for loader in loaders:
docs.extend(loader.load()) docs.extend(loader.load())
sub_docs = self.text_splitter.split_documents(docs) return self.from_documents(docs)
def from_documents(self, documents: List[Document]) -> VectorStoreIndexWrapper:
"""Create a vectorstore index from documents."""
sub_docs = self.text_splitter.split_documents(documents)
vectorstore = self.vectorstore_cls.from_documents( vectorstore = self.vectorstore_cls.from_documents(
sub_docs, self.embedding, **self.vectorstore_kwargs sub_docs, self.embedding, **self.vectorstore_kwargs
) )