mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-07 12:06:43 +00:00
fix:get bilibili subtitles (#8165)
- Description: fix the Loader 'BiliBiliLoader' - Issue: the API response was changed  The previously used API no longer returns the "subtitle_url" property.  We should use another API to get `subtitle_url` property. The `subtitle_url` returned by this API does not include the http schema and needs to be added. - Dependencies: Nope - Tag maintainer: @rlancemartin
This commit is contained in:
parent
21771a6f1c
commit
d00a247da7
@ -54,12 +54,14 @@ class BiliBiliLoader(BaseLoader):
|
||||
|
||||
video_info = sync(v.get_info())
|
||||
video_info.update({"url": url})
|
||||
sub = sync(v.get_subtitle(video_info["cid"]))
|
||||
|
||||
# Get subtitle url
|
||||
subtitle = video_info.pop("subtitle")
|
||||
sub_list = subtitle["list"]
|
||||
sub_list = sub["subtitles"]
|
||||
if sub_list:
|
||||
sub_url = sub_list[0]["subtitle_url"]
|
||||
if not sub_url.startswith("http"):
|
||||
sub_url = "https:" + sub_url
|
||||
result = requests.get(sub_url)
|
||||
raw_sub_titles = json.loads(result.content)["body"]
|
||||
raw_transcript = " ".join([c["content"] for c in raw_sub_titles])
|
||||
|
Loading…
Reference in New Issue
Block a user