mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-13 05:25:07 +00:00
text-splitters[minor], langchain[minor], community[patch], templates, docs: langchain-text-splitters 0.0.1 (#18346)
This commit is contained in:
@@ -48,7 +48,13 @@ class TelegramChatFileLoader(BaseLoader):
|
||||
|
||||
def text_to_docs(text: Union[str, List[str]]) -> List[Document]:
|
||||
"""Convert a string or list of strings to a list of Documents with metadata."""
|
||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
||||
|
||||
text_splitter = RecursiveCharacterTextSplitter(
|
||||
chunk_size=800,
|
||||
separators=["\n\n", "\n", ".", "!", "?", ",", " ", ""],
|
||||
chunk_overlap=20,
|
||||
)
|
||||
|
||||
if isinstance(text, str):
|
||||
# Take a single string as one page
|
||||
@@ -63,11 +69,6 @@ def text_to_docs(text: Union[str, List[str]]) -> List[Document]:
|
||||
doc_chunks = []
|
||||
|
||||
for doc in page_docs:
|
||||
text_splitter = RecursiveCharacterTextSplitter(
|
||||
chunk_size=800,
|
||||
separators=["\n\n", "\n", ".", "!", "?", ",", " ", ""],
|
||||
chunk_overlap=20,
|
||||
)
|
||||
chunks = text_splitter.split_text(doc.page_content)
|
||||
for i, chunk in enumerate(chunks):
|
||||
doc = Document(
|
||||
|
Reference in New Issue
Block a user