Update readthedocs.py (#943)

Sometimes, the docs may be empty. For example for the text =
soup.find_all("main", {"id": "main-content"}) was an empty list. To
cater to these edge cases, the clean function needs to be checked if it
is empty or not.
This commit is contained in:
Usama Navid 2023-02-09 05:01:07 +05:00 committed by GitHub
parent 3e1901e1aa
commit e85c53ce68
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -19,7 +19,11 @@ class ReadTheDocsLoader(BaseLoader):
def _clean_data(data: str) -> str:
soup = BeautifulSoup(data)
text = soup.find_all("main", {"id": "main-content"})[0].get_text()
text = soup.find_all("main", {"id": "main-content"})
if len(text) != 0:
text = text[0].get_text()
else:
text = ""
return "\n".join([t for t in text.split("\n") if t])
docs = []