community: Implement lazy_load() for SlackDirectoryLoader (#18675)

Integration tests:
`tests/integration_tests/document_loaders/test_slack.py`
This commit is contained in:
Christophe Bornet 2024-03-06 19:04:13 +01:00 committed by GitHub
parent ed36f9f604
commit 302985fea1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,7 +1,7 @@
import json
import zipfile
from pathlib import Path
from typing import Dict, List, Optional
from typing import Dict, Iterator, List, Optional
from langchain_core.documents import Document
@ -35,9 +35,8 @@ class SlackDirectoryLoader(BaseLoader):
except KeyError:
return {}
def load(self) -> List[Document]:
def lazy_load(self) -> Iterator[Document]:
"""Load and return documents from the Slack directory dump."""
docs = []
with zipfile.ZipFile(self.zip_path, "r") as zip_file:
for channel_path in zip_file.namelist():
channel_name = Path(channel_path).parent.name
@ -46,11 +45,7 @@ class SlackDirectoryLoader(BaseLoader):
if channel_path.endswith(".json"):
messages = self._read_json(zip_file, channel_path)
for message in messages:
document = self._convert_message_to_document(
message, channel_name
)
docs.append(document)
return docs
yield self._convert_message_to_document(message, channel_name)
def _read_json(self, zip_file: zipfile.ZipFile, file_path: str) -> List[dict]:
"""Read JSON data from a zip subfile."""