Added bilibili loader (#2673) (#2724)

I've added a bilibili loader, bilibili is a very active video site in
China and I think we need this loader.

Example:
```python
from langchain.document_loaders.bilibili import BiliBiliLoader

loader = BiliBiliLoader(
       ["https://www.bilibili.com/video/BV1xt411o7Xu/",
       "https://www.bilibili.com/video/av330407025/"]
)
docs = loader.load()
```

Co-authored-by: 了空 <568250549@qq.com>
This commit is contained in:
vowelparrot
2023-04-11 10:40:32 -07:00
committed by GitHub
parent d42deff402
commit 709f26b69e
3 changed files with 184 additions and 0 deletions

View File

@@ -0,0 +1,20 @@
from langchain.document_loaders.bilibili import BiliBiliLoader
def test_bilibili_loader() -> None:
"""Test Bilibili Loader."""
loader = BiliBiliLoader(
[
"https://www.bilibili.com/video/BV1xt411o7Xu/",
"https://www.bilibili.com/video/av330407025/",
]
)
docs = loader.load()
assert len(docs) == 2
assert len(docs[0].page_content) > 0
assert docs[1].metadata["owner"]["mid"] == 398095160
assert docs[1].page_content == ""
assert docs[1].metadata["owner"]["mid"] == 398095160