mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-17 16:39:52 +00:00
community: support in-memory data (Blob.from_data) in all audio parsers (#30262)
OpenAIWhisperParser, OpenAIWhisperParserLocal, YandexSTTParser do not handle in-memory audio data (loaded via Blob.from_data) correctly. They require Blob.path to be set and AudioSegment is always read from the file system. In-memory data is handled correctly only for FasterWhisperParser so far. I changed OpenAIWhisperParser, OpenAIWhisperParserLocal, YandexSTTParser accordingly to match FasterWhisperParser. Thanks for reviewing the PR! Co-authored-by: qonnop <qonnop@users.noreply.github.com>
This commit is contained in:
parent
98a9ef19ec
commit
036f00dc92
@ -281,12 +281,8 @@ class OpenAIWhisperParser(BaseBlobParser):
|
||||
raise ImportError(
|
||||
"openai package not found, please install it with `pip install openai`"
|
||||
)
|
||||
try:
|
||||
from pydub import AudioSegment
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"pydub package not found, please install it with `pip install pydub`"
|
||||
)
|
||||
|
||||
audio = _get_audio_from_blob(blob)
|
||||
|
||||
if is_openai_v1():
|
||||
# api_key optional, defaults to `os.environ['OPENAI_API_KEY']`
|
||||
@ -298,9 +294,6 @@ class OpenAIWhisperParser(BaseBlobParser):
|
||||
if self.base_url:
|
||||
openai.api_base = self.base_url
|
||||
|
||||
# Audio file from disk
|
||||
|
||||
audio = AudioSegment.from_file(blob.path)
|
||||
# Define the duration of each chunk in minutes
|
||||
# Need to meet 25MB size limit for Whisper API
|
||||
chunk_duration = 20
|
||||
@ -451,13 +444,6 @@ class OpenAIWhisperParserLocal(BaseBlobParser):
|
||||
def lazy_parse(self, blob: Blob) -> Iterator[Document]:
|
||||
"""Lazily parse the blob."""
|
||||
|
||||
try:
|
||||
from pydub import AudioSegment
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"pydub package not found, please install it with `pip install pydub`"
|
||||
)
|
||||
|
||||
try:
|
||||
import librosa
|
||||
except ImportError:
|
||||
@ -466,8 +452,7 @@ class OpenAIWhisperParserLocal(BaseBlobParser):
|
||||
"`pip install librosa`"
|
||||
)
|
||||
|
||||
# Audio file from disk
|
||||
audio = AudioSegment.from_file(blob.path)
|
||||
audio = _get_audio_from_blob(blob)
|
||||
|
||||
file_obj = io.BytesIO(audio.export(format="mp3").read())
|
||||
|
||||
@ -529,12 +514,8 @@ class YandexSTTParser(BaseBlobParser):
|
||||
"yandex-speechkit package not found, please install it with "
|
||||
"`pip install yandex-speechkit`"
|
||||
)
|
||||
try:
|
||||
from pydub import AudioSegment
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"pydub package not found, please install it with `pip install pydub`"
|
||||
)
|
||||
|
||||
audio = _get_audio_from_blob(blob)
|
||||
|
||||
if self.api_key:
|
||||
configure_credentials(
|
||||
@ -545,8 +526,6 @@ class YandexSTTParser(BaseBlobParser):
|
||||
yandex_credentials=creds.YandexCredentials(iam_token=self.iam_token)
|
||||
)
|
||||
|
||||
audio = AudioSegment.from_file(blob.path)
|
||||
|
||||
model = model_repository.recognition_model()
|
||||
|
||||
model.model = self.model
|
||||
@ -645,13 +624,6 @@ class FasterWhisperParser(BaseBlobParser):
|
||||
def lazy_parse(self, blob: Blob) -> Iterator[Document]:
|
||||
"""Lazily parse the blob."""
|
||||
|
||||
try:
|
||||
from pydub import AudioSegment
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"pydub package not found, please install it with `pip install pydub`"
|
||||
)
|
||||
|
||||
try:
|
||||
from faster_whisper import WhisperModel
|
||||
except ImportError:
|
||||
@ -660,15 +632,7 @@ class FasterWhisperParser(BaseBlobParser):
|
||||
"`pip install faster-whisper`"
|
||||
)
|
||||
|
||||
# get the audio
|
||||
if isinstance(blob.data, bytes):
|
||||
# blob contains the audio
|
||||
audio = AudioSegment.from_file(io.BytesIO(blob.data))
|
||||
elif blob.data is None and blob.path:
|
||||
# Audio file from disk
|
||||
audio = AudioSegment.from_file(blob.path)
|
||||
else:
|
||||
raise ValueError("Unable to get audio from blob")
|
||||
audio = _get_audio_from_blob(blob)
|
||||
|
||||
file_obj = io.BytesIO(audio.export(format="mp3").read())
|
||||
|
||||
@ -688,3 +652,33 @@ class FasterWhisperParser(BaseBlobParser):
|
||||
**blob.metadata,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def _get_audio_from_blob(blob: Blob) -> Any:
|
||||
"""Get audio data from blob.
|
||||
|
||||
Args:
|
||||
blob: Blob object containing the audio data.
|
||||
|
||||
Returns:
|
||||
AudioSegment: Audio data from the blob.
|
||||
|
||||
Raises:
|
||||
ImportError: If the required package `pydub` is not installed.
|
||||
ValueError: If the audio data is not found in the blob
|
||||
"""
|
||||
try:
|
||||
from pydub import AudioSegment
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"pydub package not found, please install it with `pip install pydub`"
|
||||
)
|
||||
|
||||
if isinstance(blob.data, bytes):
|
||||
audio = AudioSegment.from_file(io.BytesIO(blob.data))
|
||||
elif blob.data is None and blob.path:
|
||||
audio = AudioSegment.from_file(blob.path)
|
||||
else:
|
||||
raise ValueError("Unable to get audio from blob")
|
||||
|
||||
return audio
|
||||
|
Loading…
Reference in New Issue
Block a user