community: Implement lazy_load() for SlackDirectoryLoader (#18675)

Integration tests:
`tests/integration_tests/document_loaders/test_slack.py`
This commit is contained in:
Christophe Bornet 2024-03-06 19:04:13 +01:00 committed by GitHub
parent ed36f9f604
commit 302985fea1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,7 +1,7 @@
import json import json
import zipfile import zipfile
from pathlib import Path from pathlib import Path
from typing import Dict, List, Optional from typing import Dict, Iterator, List, Optional
from langchain_core.documents import Document from langchain_core.documents import Document
@ -35,9 +35,8 @@ class SlackDirectoryLoader(BaseLoader):
except KeyError: except KeyError:
return {} return {}
def load(self) -> List[Document]: def lazy_load(self) -> Iterator[Document]:
"""Load and return documents from the Slack directory dump.""" """Load and return documents from the Slack directory dump."""
docs = []
with zipfile.ZipFile(self.zip_path, "r") as zip_file: with zipfile.ZipFile(self.zip_path, "r") as zip_file:
for channel_path in zip_file.namelist(): for channel_path in zip_file.namelist():
channel_name = Path(channel_path).parent.name channel_name = Path(channel_path).parent.name
@ -46,11 +45,7 @@ class SlackDirectoryLoader(BaseLoader):
if channel_path.endswith(".json"): if channel_path.endswith(".json"):
messages = self._read_json(zip_file, channel_path) messages = self._read_json(zip_file, channel_path)
for message in messages: for message in messages:
document = self._convert_message_to_document( yield self._convert_message_to_document(message, channel_name)
message, channel_name
)
docs.append(document)
return docs
def _read_json(self, zip_file: zipfile.ZipFile, file_path: str) -> List[dict]: def _read_json(self, zip_file: zipfile.ZipFile, file_path: str) -> List[dict]:
"""Read JSON data from a zip subfile.""" """Read JSON data from a zip subfile."""