text-splitters: add pydocstyle linting (#28127)

As seen in #23188, turned on Google-style docstrings by enabling
`pydocstyle` linting in the `text-splitters` package. Each resulting
linting error was addressed differently: ignored, resolved, suppressed,
and missing docstrings were added.

Fixes one of the checklist items from #25154, similar to #25939 in
`core` package. Ran `make format`, `make lint` and `make test` from the
root of the package `text-splitters` to ensure no issues were found.

---------

Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
Ankit Dangi
2024-12-08 22:01:03 -08:00
committed by GitHub
parent b53f07bfb9
commit 90f162efb6
9 changed files with 194 additions and 27 deletions

View File

@@ -115,17 +115,45 @@ class RecursiveCharacterTextSplitter(TextSplitter):
return final_chunks
def split_text(self, text: str) -> List[str]:
"""Split the input text into smaller chunks based on predefined separators.
Args:
text (str): The input text to be split.
Returns:
List[str]: A list of text chunks obtained after splitting.
"""
return self._split_text(text, self._separators)
@classmethod
def from_language(
cls, language: Language, **kwargs: Any
) -> RecursiveCharacterTextSplitter:
"""Return an instance of this class based on a specific language.
This method initializes the text splitter with language-specific separators.
Args:
language (Language): The language to configure the text splitter for.
**kwargs (Any): Additional keyword arguments to customize the splitter.
Returns:
RecursiveCharacterTextSplitter: An instance of the text splitter configured
for the specified language.
"""
separators = cls.get_separators_for_language(language)
return cls(separators=separators, is_separator_regex=True, **kwargs)
@staticmethod
def get_separators_for_language(language: Language) -> List[str]:
"""Retrieve a list of separators specific to the given language.
Args:
language (Language): The language for which to get the separators.
Returns:
List[str]: A list of separators appropriate for the specified language.
"""
if language == Language.C or language == Language.CPP:
return [
# Split along class definitions