mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-23 19:39:58 +00:00
text-splitters[minor], langchain[minor], community[patch], templates, docs: langchain-text-splitters 0.0.1 (#18346)
This commit is contained in:
71
libs/text-splitters/langchain_text_splitters/__init__.py
Normal file
71
libs/text-splitters/langchain_text_splitters/__init__.py
Normal file
@@ -0,0 +1,71 @@
|
||||
"""**Text Splitters** are classes for splitting text.
|
||||
|
||||
|
||||
**Class hierarchy:**
|
||||
|
||||
.. code-block::
|
||||
|
||||
BaseDocumentTransformer --> TextSplitter --> <name>TextSplitter # Example: CharacterTextSplitter
|
||||
RecursiveCharacterTextSplitter --> <name>TextSplitter
|
||||
|
||||
Note: **MarkdownHeaderTextSplitter** and **HTMLHeaderTextSplitter do not derive from TextSplitter.
|
||||
|
||||
|
||||
**Main helpers:**
|
||||
|
||||
.. code-block::
|
||||
|
||||
Document, Tokenizer, Language, LineType, HeaderType
|
||||
|
||||
""" # noqa: E501
|
||||
|
||||
from langchain_text_splitters.base import (
|
||||
Language,
|
||||
TextSplitter,
|
||||
Tokenizer,
|
||||
TokenTextSplitter,
|
||||
split_text_on_tokens,
|
||||
)
|
||||
from langchain_text_splitters.character import (
|
||||
CharacterTextSplitter,
|
||||
RecursiveCharacterTextSplitter,
|
||||
)
|
||||
from langchain_text_splitters.html import ElementType, HTMLHeaderTextSplitter
|
||||
from langchain_text_splitters.json import RecursiveJsonSplitter
|
||||
from langchain_text_splitters.konlpy import KonlpyTextSplitter
|
||||
from langchain_text_splitters.latex import LatexTextSplitter
|
||||
from langchain_text_splitters.markdown import (
|
||||
HeaderType,
|
||||
LineType,
|
||||
MarkdownHeaderTextSplitter,
|
||||
MarkdownTextSplitter,
|
||||
)
|
||||
from langchain_text_splitters.nltk import NLTKTextSplitter
|
||||
from langchain_text_splitters.python import PythonCodeTextSplitter
|
||||
from langchain_text_splitters.sentence_transformers import (
|
||||
SentenceTransformersTokenTextSplitter,
|
||||
)
|
||||
from langchain_text_splitters.spacy import SpacyTextSplitter
|
||||
|
||||
__all__ = [
|
||||
"TokenTextSplitter",
|
||||
"TextSplitter",
|
||||
"Tokenizer",
|
||||
"Language",
|
||||
"RecursiveCharacterTextSplitter",
|
||||
"RecursiveJsonSplitter",
|
||||
"LatexTextSplitter",
|
||||
"PythonCodeTextSplitter",
|
||||
"KonlpyTextSplitter",
|
||||
"SpacyTextSplitter",
|
||||
"NLTKTextSplitter",
|
||||
"split_text_on_tokens",
|
||||
"SentenceTransformersTokenTextSplitter",
|
||||
"ElementType",
|
||||
"HeaderType",
|
||||
"LineType",
|
||||
"HTMLHeaderTextSplitter",
|
||||
"MarkdownHeaderTextSplitter",
|
||||
"MarkdownTextSplitter",
|
||||
"CharacterTextSplitter",
|
||||
]
|
Reference in New Issue
Block a user