mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-03 03:38:06 +00:00
Remove unnecessary spaces from document object’s page_content of BiliBiliLoader (#4619)
- Remove unnecessary spaces from document object’s page_content of BiliBiliLoader - Fix BiliBiliLoader document and test file
This commit is contained in:
parent
f47ec5b4b6
commit
f7e3d97b19
@ -5,7 +5,7 @@
|
||||
"id": "66a7777e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Bilibili\n",
|
||||
"# BiliBili\n",
|
||||
"\n",
|
||||
">[Bilibili](https://www.bilibili.tv/) is one of the most beloved long-form video sites in China.\n",
|
||||
"\n",
|
||||
@ -35,7 +35,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders.bilibili import BiliBiliLoader"
|
||||
"from langchain.document_loaders import BiliBiliLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -60,11 +60,11 @@ class BiliBiliLoader(BaseLoader):
|
||||
raw_sub_titles = json.loads(result.content)["body"]
|
||||
raw_transcript = " ".join([c["content"] for c in raw_sub_titles])
|
||||
|
||||
raw_transcript_with_meta_info = f"""
|
||||
Video Title: {video_info['title']},
|
||||
description: {video_info['desc']}\n
|
||||
Transcript: {raw_transcript}
|
||||
"""
|
||||
raw_transcript_with_meta_info = (
|
||||
f"Video Title: {video_info['title']},"
|
||||
f"description: {video_info['desc']}\n\n"
|
||||
f"Transcript: {raw_transcript}"
|
||||
)
|
||||
return raw_transcript_with_meta_info, video_info
|
||||
else:
|
||||
raw_transcript = ""
|
||||
|
@ -1,4 +1,4 @@
|
||||
from langchain.document_loaders.bilibili import BiliBiliLoader
|
||||
from langchain.document_loaders import BiliBiliLoader
|
||||
|
||||
|
||||
def test_bilibili_loader() -> None:
|
||||
|
Loading…
Reference in New Issue
Block a user