From b6893a569ea00a803eae5c6a48543d22a8ca3bfe Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Mon, 20 Mar 2023 22:47:22 -0700 Subject: [PATCH] qasper eval --- langchain/indexes/vectorstore.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/langchain/indexes/vectorstore.py b/langchain/indexes/vectorstore.py index dc5807e4906..03cb594ac4a 100644 --- a/langchain/indexes/vectorstore.py +++ b/langchain/indexes/vectorstore.py @@ -4,6 +4,7 @@ from pydantic import BaseModel, Extra, Field from langchain.chains.qa_with_sources.vector_db import VectorDBQAWithSourcesChain from langchain.chains.vector_db_qa.base import VectorDBQA +from langchain.docstore.document import Document from langchain.document_loaders.base import BaseLoader from langchain.embeddings.base import Embeddings from langchain.embeddings.openai import OpenAIEmbeddings @@ -60,13 +61,22 @@ class VectorstoreIndexCreator(BaseModel): extra = Extra.forbid arbitrary_types_allowed = True - def from_loaders(self, loaders: List[BaseLoader]) -> VectorStoreIndexWrapper: - """Create a vectorstore index from loaders.""" - docs = [] - for loader in loaders: - docs.extend(loader.load()) + def _from_docs(self, docs: List[Document]) -> VectorStoreIndexWrapper: sub_docs = self.text_splitter.split_documents(docs) vectorstore = self.vectorstore_cls.from_documents( sub_docs, self.embedding, **self.vectorstore_kwargs ) return VectorStoreIndexWrapper(vectorstore=vectorstore) + + def from_text( + self, text: str, metadata: Optional[dict] = None + ) -> VectorStoreIndexWrapper: + doc = Document(page_content=text, metadata=metadata or {}) + return self._from_docs([doc]) + + def from_loaders(self, loaders: List[BaseLoader]) -> VectorStoreIndexWrapper: + """Create a vectorstore index from loaders.""" + docs = [] + for loader in loaders: + docs.extend(loader.load()) + return self._from_docs(docs)