mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-04 04:07:54 +00:00
Remove unnecessary spaces from document object’s page_content of BiliBiliLoader (#4619)
- Remove unnecessary spaces from document object’s page_content of BiliBiliLoader - Fix BiliBiliLoader document and test file
This commit is contained in:
parent
f47ec5b4b6
commit
f7e3d97b19
@ -5,7 +5,7 @@
|
|||||||
"id": "66a7777e",
|
"id": "66a7777e",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# Bilibili\n",
|
"# BiliBili\n",
|
||||||
"\n",
|
"\n",
|
||||||
">[Bilibili](https://www.bilibili.tv/) is one of the most beloved long-form video sites in China.\n",
|
">[Bilibili](https://www.bilibili.tv/) is one of the most beloved long-form video sites in China.\n",
|
||||||
"\n",
|
"\n",
|
||||||
@ -35,7 +35,7 @@
|
|||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from langchain.document_loaders.bilibili import BiliBiliLoader"
|
"from langchain.document_loaders import BiliBiliLoader"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -60,11 +60,11 @@ class BiliBiliLoader(BaseLoader):
|
|||||||
raw_sub_titles = json.loads(result.content)["body"]
|
raw_sub_titles = json.loads(result.content)["body"]
|
||||||
raw_transcript = " ".join([c["content"] for c in raw_sub_titles])
|
raw_transcript = " ".join([c["content"] for c in raw_sub_titles])
|
||||||
|
|
||||||
raw_transcript_with_meta_info = f"""
|
raw_transcript_with_meta_info = (
|
||||||
Video Title: {video_info['title']},
|
f"Video Title: {video_info['title']},"
|
||||||
description: {video_info['desc']}\n
|
f"description: {video_info['desc']}\n\n"
|
||||||
Transcript: {raw_transcript}
|
f"Transcript: {raw_transcript}"
|
||||||
"""
|
)
|
||||||
return raw_transcript_with_meta_info, video_info
|
return raw_transcript_with_meta_info, video_info
|
||||||
else:
|
else:
|
||||||
raw_transcript = ""
|
raw_transcript = ""
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
from langchain.document_loaders.bilibili import BiliBiliLoader
|
from langchain.document_loaders import BiliBiliLoader
|
||||||
|
|
||||||
|
|
||||||
def test_bilibili_loader() -> None:
|
def test_bilibili_loader() -> None:
|
||||||
|
Loading…
Reference in New Issue
Block a user