mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-24 20:09:01 +00:00
I've added a bilibili loader, bilibili is a very active video site in China and I think we need this loader. Example: ```python from langchain.document_loaders.bilibili import BiliBiliLoader loader = BiliBiliLoader( ["https://www.bilibili.com/video/BV1xt411o7Xu/", "https://www.bilibili.com/video/av330407025/"] ) docs = loader.load() ``` Co-authored-by: 了空 <568250549@qq.com>
This commit is contained in:
20
tests/integration_tests/document_loaders/test_bilibili.py
Normal file
20
tests/integration_tests/document_loaders/test_bilibili.py
Normal file
@@ -0,0 +1,20 @@
|
||||
from langchain.document_loaders.bilibili import BiliBiliLoader
|
||||
|
||||
|
||||
def test_bilibili_loader() -> None:
|
||||
"""Test Bilibili Loader."""
|
||||
loader = BiliBiliLoader(
|
||||
[
|
||||
"https://www.bilibili.com/video/BV1xt411o7Xu/",
|
||||
"https://www.bilibili.com/video/av330407025/",
|
||||
]
|
||||
)
|
||||
docs = loader.load()
|
||||
|
||||
assert len(docs) == 2
|
||||
|
||||
assert len(docs[0].page_content) > 0
|
||||
assert docs[1].metadata["owner"]["mid"] == 398095160
|
||||
|
||||
assert docs[1].page_content == ""
|
||||
assert docs[1].metadata["owner"]["mid"] == 398095160
|
Reference in New Issue
Block a user