docs: docstrings langchain_community update (#14889)

Addded missed docstrings. Fixed inconsistency in docstrings.

**Note** CC @efriis 
There were PR errors on
`langchain_experimental/prompt_injection_identifier/hugging_face_identifier.py`
But, I didn't touch this file in this PR! Can it be some cache problems?
I fixed this error.
This commit is contained in:
Leonid Ganeline
2023-12-19 05:58:24 -08:00
committed by GitHub
parent 583696732c
commit b2fd41331e
35 changed files with 156 additions and 25 deletions

View File

@@ -5,11 +5,12 @@ from langchain_community.document_loaders.sitemap import SitemapLoader
class DocusaurusLoader(SitemapLoader):
"""
Loader that leverages the SitemapLoader to loop through the generated pages of a
"""Load from Docusaurus Documentation.
It leverages the SitemapLoader to loop through the generated pages of a
Docusaurus Documentation website and extracts the content by looking for specific
HTML tags. By default, the parser searches for the main content of the Docusaurus
page, which is normally the <article>. You also have the option to define your own
page, which is normally the <article>. You can also define your own
custom HTML tags by providing them as a list, for example: ["div", ".main", "a"].
"""
@@ -19,8 +20,8 @@ class DocusaurusLoader(SitemapLoader):
custom_html_tags: Optional[List[str]] = None,
**kwargs: Any,
):
"""
Initialize DocusaurusLoader
"""Initialize DocusaurusLoader
Args:
url: The base URL of the Docusaurus website.
custom_html_tags: Optional custom html tags to extract content from pages.
@@ -39,7 +40,7 @@ class DocusaurusLoader(SitemapLoader):
)
def _parsing_function(self, content: Any) -> str:
"""Parses specific elements from a Docusarus page."""
"""Parses specific elements from a Docusaurus page."""
relevant_elements = content.select(",".join(self.custom_html_tags))
for element in relevant_elements: