mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-15 07:36:08 +00:00
Update readthedocs.py (#943)
Sometimes, the docs may be empty. For example for the text = soup.find_all("main", {"id": "main-content"}) was an empty list. To cater to these edge cases, the clean function needs to be checked if it is empty or not.
This commit is contained in:
parent
3e1901e1aa
commit
e85c53ce68
@ -19,7 +19,11 @@ class ReadTheDocsLoader(BaseLoader):
|
||||
|
||||
def _clean_data(data: str) -> str:
|
||||
soup = BeautifulSoup(data)
|
||||
text = soup.find_all("main", {"id": "main-content"})[0].get_text()
|
||||
text = soup.find_all("main", {"id": "main-content"})
|
||||
if len(text) != 0:
|
||||
text = text[0].get_text()
|
||||
else:
|
||||
text = ""
|
||||
return "\n".join([t for t in text.split("\n") if t])
|
||||
|
||||
docs = []
|
||||
|
Loading…
Reference in New Issue
Block a user