mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-27 00:48:45 +00:00
community: Implement lazy_load() for SlackDirectoryLoader (#18675)
Integration tests: `tests/integration_tests/document_loaders/test_slack.py`
This commit is contained in:
parent
ed36f9f604
commit
302985fea1
@ -1,7 +1,7 @@
|
|||||||
import json
|
import json
|
||||||
import zipfile
|
import zipfile
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Dict, List, Optional
|
from typing import Dict, Iterator, List, Optional
|
||||||
|
|
||||||
from langchain_core.documents import Document
|
from langchain_core.documents import Document
|
||||||
|
|
||||||
@ -35,9 +35,8 @@ class SlackDirectoryLoader(BaseLoader):
|
|||||||
except KeyError:
|
except KeyError:
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
def load(self) -> List[Document]:
|
def lazy_load(self) -> Iterator[Document]:
|
||||||
"""Load and return documents from the Slack directory dump."""
|
"""Load and return documents from the Slack directory dump."""
|
||||||
docs = []
|
|
||||||
with zipfile.ZipFile(self.zip_path, "r") as zip_file:
|
with zipfile.ZipFile(self.zip_path, "r") as zip_file:
|
||||||
for channel_path in zip_file.namelist():
|
for channel_path in zip_file.namelist():
|
||||||
channel_name = Path(channel_path).parent.name
|
channel_name = Path(channel_path).parent.name
|
||||||
@ -46,11 +45,7 @@ class SlackDirectoryLoader(BaseLoader):
|
|||||||
if channel_path.endswith(".json"):
|
if channel_path.endswith(".json"):
|
||||||
messages = self._read_json(zip_file, channel_path)
|
messages = self._read_json(zip_file, channel_path)
|
||||||
for message in messages:
|
for message in messages:
|
||||||
document = self._convert_message_to_document(
|
yield self._convert_message_to_document(message, channel_name)
|
||||||
message, channel_name
|
|
||||||
)
|
|
||||||
docs.append(document)
|
|
||||||
return docs
|
|
||||||
|
|
||||||
def _read_json(self, zip_file: zipfile.ZipFile, file_path: str) -> List[dict]:
|
def _read_json(self, zip_file: zipfile.ZipFile, file_path: str) -> List[dict]:
|
||||||
"""Read JSON data from a zip subfile."""
|
"""Read JSON data from a zip subfile."""
|
||||||
|
Loading…
Reference in New Issue
Block a user