From 33f1ab15284f9036963f9151cbce7e321b0fecef Mon Sep 17 00:00:00 2001 From: Mohammad Mohtashim <45242107+keenborder786@users.noreply.github.com> Date: Sun, 23 Mar 2025 23:48:03 +0500 Subject: [PATCH] Youtube Loader `load` method Fixed (#30314) - **Description:** Fixed the `YoutubeLoader` loading method not returning the correct object - **Issue:** #30309 --------- Co-authored-by: ccurme --- .../langchain_community/document_loaders/youtube.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/libs/community/langchain_community/document_loaders/youtube.py b/libs/community/langchain_community/document_loaders/youtube.py index 1b99a8d2da2..eeecad1af30 100644 --- a/libs/community/langchain_community/document_loaders/youtube.py +++ b/libs/community/langchain_community/document_loaders/youtube.py @@ -241,6 +241,7 @@ class YoutubeLoader(BaseLoader): """Load YouTube transcripts into `Document` objects.""" try: from youtube_transcript_api import ( + FetchedTranscript, NoTranscriptFound, TranscriptsDisabled, YouTubeTranscriptApi, @@ -269,8 +270,11 @@ class YoutubeLoader(BaseLoader): if self.translation is not None: transcript = transcript.translate(self.translation) - - transcript_pieces: List[Dict[str, Any]] = transcript.fetch() + transcript_object = transcript.fetch() + if isinstance(transcript_object, FetchedTranscript): + transcript_pieces = [{"text": x.text} for x in transcript_object.snippets] + else: + transcript_pieces: List[Dict[str, Any]] = transcript_object # type: ignore[no-redef] if self.transcript_format == TranscriptFormat.TEXT: transcript = " ".join(