mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-17 16:39:52 +00:00
community: support in-memory data (Blob.from_data) in all audio parsers (#30262)
OpenAIWhisperParser, OpenAIWhisperParserLocal, YandexSTTParser do not handle in-memory audio data (loaded via Blob.from_data) correctly. They require Blob.path to be set and AudioSegment is always read from the file system. In-memory data is handled correctly only for FasterWhisperParser so far. I changed OpenAIWhisperParser, OpenAIWhisperParserLocal, YandexSTTParser accordingly to match FasterWhisperParser. Thanks for reviewing the PR! Co-authored-by: qonnop <qonnop@users.noreply.github.com>
This commit is contained in:
parent
98a9ef19ec
commit
036f00dc92
@ -281,12 +281,8 @@ class OpenAIWhisperParser(BaseBlobParser):
|
|||||||
raise ImportError(
|
raise ImportError(
|
||||||
"openai package not found, please install it with `pip install openai`"
|
"openai package not found, please install it with `pip install openai`"
|
||||||
)
|
)
|
||||||
try:
|
|
||||||
from pydub import AudioSegment
|
audio = _get_audio_from_blob(blob)
|
||||||
except ImportError:
|
|
||||||
raise ImportError(
|
|
||||||
"pydub package not found, please install it with `pip install pydub`"
|
|
||||||
)
|
|
||||||
|
|
||||||
if is_openai_v1():
|
if is_openai_v1():
|
||||||
# api_key optional, defaults to `os.environ['OPENAI_API_KEY']`
|
# api_key optional, defaults to `os.environ['OPENAI_API_KEY']`
|
||||||
@ -298,9 +294,6 @@ class OpenAIWhisperParser(BaseBlobParser):
|
|||||||
if self.base_url:
|
if self.base_url:
|
||||||
openai.api_base = self.base_url
|
openai.api_base = self.base_url
|
||||||
|
|
||||||
# Audio file from disk
|
|
||||||
|
|
||||||
audio = AudioSegment.from_file(blob.path)
|
|
||||||
# Define the duration of each chunk in minutes
|
# Define the duration of each chunk in minutes
|
||||||
# Need to meet 25MB size limit for Whisper API
|
# Need to meet 25MB size limit for Whisper API
|
||||||
chunk_duration = 20
|
chunk_duration = 20
|
||||||
@ -451,13 +444,6 @@ class OpenAIWhisperParserLocal(BaseBlobParser):
|
|||||||
def lazy_parse(self, blob: Blob) -> Iterator[Document]:
|
def lazy_parse(self, blob: Blob) -> Iterator[Document]:
|
||||||
"""Lazily parse the blob."""
|
"""Lazily parse the blob."""
|
||||||
|
|
||||||
try:
|
|
||||||
from pydub import AudioSegment
|
|
||||||
except ImportError:
|
|
||||||
raise ImportError(
|
|
||||||
"pydub package not found, please install it with `pip install pydub`"
|
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import librosa
|
import librosa
|
||||||
except ImportError:
|
except ImportError:
|
||||||
@ -466,8 +452,7 @@ class OpenAIWhisperParserLocal(BaseBlobParser):
|
|||||||
"`pip install librosa`"
|
"`pip install librosa`"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Audio file from disk
|
audio = _get_audio_from_blob(blob)
|
||||||
audio = AudioSegment.from_file(blob.path)
|
|
||||||
|
|
||||||
file_obj = io.BytesIO(audio.export(format="mp3").read())
|
file_obj = io.BytesIO(audio.export(format="mp3").read())
|
||||||
|
|
||||||
@ -529,12 +514,8 @@ class YandexSTTParser(BaseBlobParser):
|
|||||||
"yandex-speechkit package not found, please install it with "
|
"yandex-speechkit package not found, please install it with "
|
||||||
"`pip install yandex-speechkit`"
|
"`pip install yandex-speechkit`"
|
||||||
)
|
)
|
||||||
try:
|
|
||||||
from pydub import AudioSegment
|
audio = _get_audio_from_blob(blob)
|
||||||
except ImportError:
|
|
||||||
raise ImportError(
|
|
||||||
"pydub package not found, please install it with `pip install pydub`"
|
|
||||||
)
|
|
||||||
|
|
||||||
if self.api_key:
|
if self.api_key:
|
||||||
configure_credentials(
|
configure_credentials(
|
||||||
@ -545,8 +526,6 @@ class YandexSTTParser(BaseBlobParser):
|
|||||||
yandex_credentials=creds.YandexCredentials(iam_token=self.iam_token)
|
yandex_credentials=creds.YandexCredentials(iam_token=self.iam_token)
|
||||||
)
|
)
|
||||||
|
|
||||||
audio = AudioSegment.from_file(blob.path)
|
|
||||||
|
|
||||||
model = model_repository.recognition_model()
|
model = model_repository.recognition_model()
|
||||||
|
|
||||||
model.model = self.model
|
model.model = self.model
|
||||||
@ -645,13 +624,6 @@ class FasterWhisperParser(BaseBlobParser):
|
|||||||
def lazy_parse(self, blob: Blob) -> Iterator[Document]:
|
def lazy_parse(self, blob: Blob) -> Iterator[Document]:
|
||||||
"""Lazily parse the blob."""
|
"""Lazily parse the blob."""
|
||||||
|
|
||||||
try:
|
|
||||||
from pydub import AudioSegment
|
|
||||||
except ImportError:
|
|
||||||
raise ImportError(
|
|
||||||
"pydub package not found, please install it with `pip install pydub`"
|
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from faster_whisper import WhisperModel
|
from faster_whisper import WhisperModel
|
||||||
except ImportError:
|
except ImportError:
|
||||||
@ -660,15 +632,7 @@ class FasterWhisperParser(BaseBlobParser):
|
|||||||
"`pip install faster-whisper`"
|
"`pip install faster-whisper`"
|
||||||
)
|
)
|
||||||
|
|
||||||
# get the audio
|
audio = _get_audio_from_blob(blob)
|
||||||
if isinstance(blob.data, bytes):
|
|
||||||
# blob contains the audio
|
|
||||||
audio = AudioSegment.from_file(io.BytesIO(blob.data))
|
|
||||||
elif blob.data is None and blob.path:
|
|
||||||
# Audio file from disk
|
|
||||||
audio = AudioSegment.from_file(blob.path)
|
|
||||||
else:
|
|
||||||
raise ValueError("Unable to get audio from blob")
|
|
||||||
|
|
||||||
file_obj = io.BytesIO(audio.export(format="mp3").read())
|
file_obj = io.BytesIO(audio.export(format="mp3").read())
|
||||||
|
|
||||||
@ -688,3 +652,33 @@ class FasterWhisperParser(BaseBlobParser):
|
|||||||
**blob.metadata,
|
**blob.metadata,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_audio_from_blob(blob: Blob) -> Any:
|
||||||
|
"""Get audio data from blob.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
blob: Blob object containing the audio data.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
AudioSegment: Audio data from the blob.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ImportError: If the required package `pydub` is not installed.
|
||||||
|
ValueError: If the audio data is not found in the blob
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from pydub import AudioSegment
|
||||||
|
except ImportError:
|
||||||
|
raise ImportError(
|
||||||
|
"pydub package not found, please install it with `pip install pydub`"
|
||||||
|
)
|
||||||
|
|
||||||
|
if isinstance(blob.data, bytes):
|
||||||
|
audio = AudioSegment.from_file(io.BytesIO(blob.data))
|
||||||
|
elif blob.data is None and blob.path:
|
||||||
|
audio = AudioSegment.from_file(blob.path)
|
||||||
|
else:
|
||||||
|
raise ValueError("Unable to get audio from blob")
|
||||||
|
|
||||||
|
return audio
|
||||||
|
Loading…
Reference in New Issue
Block a user