Fix usage of AsyncHtmlLoader with an already running event loop (#10220)

This commit is contained in:
Nuno Campos 2023-09-05 15:25:28 +01:00 committed by GitHub
parent ac2310a405
commit 5d8673a3c1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1,7 +1,8 @@
import asyncio import asyncio
import logging import logging
import warnings import warnings
from typing import Any, Dict, Iterator, List, Optional, Union from concurrent.futures import ThreadPoolExecutor
from typing import Any, Dict, Iterator, List, Optional, Union, cast
import aiohttp import aiohttp
import requests import requests
@ -129,9 +130,18 @@ class AsyncHtmlLoader(BaseLoader):
def load(self) -> List[Document]: def load(self) -> List[Document]:
"""Load text from the url(s) in web_path.""" """Load text from the url(s) in web_path."""
results = asyncio.run(self.fetch_all(self.web_paths)) try:
# Raises RuntimeError if there is no current event loop.
asyncio.get_running_loop()
# If there is a current event loop, we need to run the async code
# in a separate loop, in a separate thread.
with ThreadPoolExecutor(max_workers=1) as executor:
future = executor.submit(asyncio.run, self.fetch_all(self.web_paths))
results = future.result()
except RuntimeError:
results = asyncio.run(self.fetch_all(self.web_paths))
docs = [] docs = []
for i, text in enumerate(results): for i, text in enumerate(cast(List[str], results)):
metadata = {"source": self.web_paths[i]} metadata = {"source": self.web_paths[i]}
docs.append(Document(page_content=text, metadata=metadata)) docs.append(Document(page_content=text, metadata=metadata))