diff --git a/libs/community/langchain_community/document_loaders/parsers/audio.py b/libs/community/langchain_community/document_loaders/parsers/audio.py index 8a873f4b3a4..4e1ac1bba62 100644 --- a/libs/community/langchain_community/document_loaders/parsers/audio.py +++ b/libs/community/langchain_community/document_loaders/parsers/audio.py @@ -281,12 +281,8 @@ class OpenAIWhisperParser(BaseBlobParser): raise ImportError( "openai package not found, please install it with `pip install openai`" ) - try: - from pydub import AudioSegment - except ImportError: - raise ImportError( - "pydub package not found, please install it with `pip install pydub`" - ) + + audio = _get_audio_from_blob(blob) if is_openai_v1(): # api_key optional, defaults to `os.environ['OPENAI_API_KEY']` @@ -298,9 +294,6 @@ class OpenAIWhisperParser(BaseBlobParser): if self.base_url: openai.api_base = self.base_url - # Audio file from disk - - audio = AudioSegment.from_file(blob.path) # Define the duration of each chunk in minutes # Need to meet 25MB size limit for Whisper API chunk_duration = 20 @@ -451,13 +444,6 @@ class OpenAIWhisperParserLocal(BaseBlobParser): def lazy_parse(self, blob: Blob) -> Iterator[Document]: """Lazily parse the blob.""" - try: - from pydub import AudioSegment - except ImportError: - raise ImportError( - "pydub package not found, please install it with `pip install pydub`" - ) - try: import librosa except ImportError: @@ -466,8 +452,7 @@ class OpenAIWhisperParserLocal(BaseBlobParser): "`pip install librosa`" ) - # Audio file from disk - audio = AudioSegment.from_file(blob.path) + audio = _get_audio_from_blob(blob) file_obj = io.BytesIO(audio.export(format="mp3").read()) @@ -529,12 +514,8 @@ class YandexSTTParser(BaseBlobParser): "yandex-speechkit package not found, please install it with " "`pip install yandex-speechkit`" ) - try: - from pydub import AudioSegment - except ImportError: - raise ImportError( - "pydub package not found, please install it with `pip install pydub`" - ) + + audio = _get_audio_from_blob(blob) if self.api_key: configure_credentials( @@ -545,8 +526,6 @@ class YandexSTTParser(BaseBlobParser): yandex_credentials=creds.YandexCredentials(iam_token=self.iam_token) ) - audio = AudioSegment.from_file(blob.path) - model = model_repository.recognition_model() model.model = self.model @@ -645,13 +624,6 @@ class FasterWhisperParser(BaseBlobParser): def lazy_parse(self, blob: Blob) -> Iterator[Document]: """Lazily parse the blob.""" - try: - from pydub import AudioSegment - except ImportError: - raise ImportError( - "pydub package not found, please install it with `pip install pydub`" - ) - try: from faster_whisper import WhisperModel except ImportError: @@ -660,15 +632,7 @@ class FasterWhisperParser(BaseBlobParser): "`pip install faster-whisper`" ) - # get the audio - if isinstance(blob.data, bytes): - # blob contains the audio - audio = AudioSegment.from_file(io.BytesIO(blob.data)) - elif blob.data is None and blob.path: - # Audio file from disk - audio = AudioSegment.from_file(blob.path) - else: - raise ValueError("Unable to get audio from blob") + audio = _get_audio_from_blob(blob) file_obj = io.BytesIO(audio.export(format="mp3").read()) @@ -688,3 +652,33 @@ class FasterWhisperParser(BaseBlobParser): **blob.metadata, }, ) + + +def _get_audio_from_blob(blob: Blob) -> Any: + """Get audio data from blob. + + Args: + blob: Blob object containing the audio data. + + Returns: + AudioSegment: Audio data from the blob. + + Raises: + ImportError: If the required package `pydub` is not installed. + ValueError: If the audio data is not found in the blob + """ + try: + from pydub import AudioSegment + except ImportError: + raise ImportError( + "pydub package not found, please install it with `pip install pydub`" + ) + + if isinstance(blob.data, bytes): + audio = AudioSegment.from_file(io.BytesIO(blob.data)) + elif blob.data is None and blob.path: + audio = AudioSegment.from_file(blob.path) + else: + raise ValueError("Unable to get audio from blob") + + return audio