mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-22 14:49:29 +00:00
community[minor]: Add LarkSuite wiki document loader. (#21016)
**Description:** Add LarkSuite wiki document loader. Refer to [LarkSuite api document ](https://open.feishu.cn/document/server-docs/docs/wiki-v2/space-node/list)for details. **Issue:** None **Dependencies:** None **Twitter handle:** None
This commit is contained in:
parent
d36332476c
commit
1fad39be1c
@ -46,3 +46,33 @@ class LarkSuiteDocLoader(BaseLoader):
|
||||
"title": metadata_json["data"]["document"]["title"],
|
||||
}
|
||||
yield Document(page_content=text, metadata=metadata)
|
||||
|
||||
|
||||
class LarkSuiteWikiLoader(LarkSuiteDocLoader):
|
||||
"""Load from `LarkSuite` (`FeiShu`) wiki."""
|
||||
|
||||
def __init__(self, domain: str, access_token: str, wiki_id: str):
|
||||
"""Initialize with domain, access_token (tenant / user), and wiki_id.
|
||||
|
||||
Args:
|
||||
domain: The domain to load the LarkSuite.
|
||||
access_token: The access_token to use.
|
||||
wiki_id: The wiki_id to load.
|
||||
"""
|
||||
self.domain = domain
|
||||
self.access_token = access_token
|
||||
self.wiki_id = wiki_id
|
||||
self.document_id = ""
|
||||
|
||||
def lazy_load(self) -> Iterator[Document]:
|
||||
"""Lazy load LarkSuite (FeiShu) wiki document."""
|
||||
|
||||
# convert Feishu wiki id to document id
|
||||
if not self.document_id:
|
||||
wiki_url_prefix = f"{self.domain}/open-apis/wiki/v2/spaces/get_node"
|
||||
wiki_node_info_json = self._get_larksuite_api_json_data(
|
||||
f"{wiki_url_prefix}?token={self.wiki_id}"
|
||||
)
|
||||
self.document_id = wiki_node_info_json["data"]["node"]["obj_token"]
|
||||
|
||||
yield from super().lazy_load()
|
||||
|
@ -1,4 +1,7 @@
|
||||
from langchain_community.document_loaders.larksuite import LarkSuiteDocLoader
|
||||
from langchain_community.document_loaders.larksuite import (
|
||||
LarkSuiteDocLoader,
|
||||
LarkSuiteWikiLoader,
|
||||
)
|
||||
|
||||
DOMAIN = ""
|
||||
ACCESS_TOKEN = ""
|
||||
@ -12,3 +15,12 @@ def test_larksuite_doc_loader() -> None:
|
||||
|
||||
assert len(docs) == 1
|
||||
assert docs[0].page_content is not None
|
||||
|
||||
|
||||
def test_larksuite_wiki_loader() -> None:
|
||||
"""Test LarkSuite (FeiShu) wiki loader."""
|
||||
loader = LarkSuiteWikiLoader(DOMAIN, ACCESS_TOKEN, DOCUMENT_ID)
|
||||
docs = loader.load()
|
||||
|
||||
assert len(docs) == 1
|
||||
assert docs[0].page_content is not None
|
||||
|
Loading…
Reference in New Issue
Block a user