Compare commits

...

1 Commits

Author SHA1 Message Date
Harrison Chase
31f92cf88f document processor 2023-04-07 12:24:06 -07:00
2 changed files with 11 additions and 1 deletions

View File

@@ -390,3 +390,9 @@ class OutputParserException(Exception):
"""
pass
class BaseDocumentProcessor(ABC):
@abstractmethod
def process(self, documents: List[Document]) -> List[Document]:
"""Process documents."""

View File

@@ -17,11 +17,12 @@ from typing import (
)
from langchain.docstore.document import Document
from langchain.schema import BaseDocumentProcessor
logger = logging.getLogger()
class TextSplitter(ABC):
class TextSplitter(BaseDocumentProcessor, ABC):
"""Interface for splitting text into chunks."""
def __init__(
@@ -64,6 +65,9 @@ class TextSplitter(ABC):
metadatas = [doc.metadata for doc in documents]
return self.create_documents(texts, metadatas)
def process(self, documents: List[Document]) -> List[Document]:
return self.split_documents(documents)
def _join_docs(self, docs: List[str], separator: str) -> Optional[str]:
text = separator.join(docs)
text = text.strip()