mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-07 21:50:25 +00:00
Add yandex stt parser (#11435)
Description: Introducing an ability to load a transcription document of audio file using [Yandex SpeechKit](https://cloud.yandex.com/en-ru/services/speechkit) Issue: None Dependencies: yandex-speechkit Tag maintainer: @rlancemartin, @eyurtsev
This commit is contained in:
parent
15687a28d5
commit
ead9d5b55c
@ -219,3 +219,81 @@ class OpenAIWhisperParserLocal(BaseBlobParser):
|
||||
page_content=prediction,
|
||||
metadata={"source": blob.source},
|
||||
)
|
||||
|
||||
|
||||
class YandexSTTParser(BaseBlobParser):
|
||||
"""Transcribe and parse audio files.
|
||||
Audio transcription is with OpenAI Whisper model."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
api_key: Optional[str] = None,
|
||||
iam_token: Optional[str] = None,
|
||||
model: str = "general",
|
||||
language: str = "auto",
|
||||
):
|
||||
"""Initialize the parser.
|
||||
|
||||
Args:
|
||||
api_key: API key for a service account
|
||||
with the `ai.speechkit-stt.user` role.
|
||||
iam_token: IAM token for a service account
|
||||
with the `ai.speechkit-stt.user` role.
|
||||
model: Recognition model name.
|
||||
Defaults to general.
|
||||
language: The language in ISO 639-1 format.
|
||||
Defaults to automatic language recognition.
|
||||
Either `api_key` or `iam_token` must be provided, but not both.
|
||||
"""
|
||||
if (api_key is None) == (iam_token is None):
|
||||
raise ValueError(
|
||||
"Either 'api_key' or 'iam_token' must be provided, but not both."
|
||||
)
|
||||
self.api_key = api_key
|
||||
self.iam_token = iam_token
|
||||
self.model = model
|
||||
self.language = language
|
||||
|
||||
def lazy_parse(self, blob: Blob) -> Iterator[Document]:
|
||||
"""Lazily parse the blob."""
|
||||
|
||||
try:
|
||||
from speechkit import configure_credentials, creds, model_repository
|
||||
from speechkit.stt import AudioProcessingType
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"yandex-speechkit package not found, please install it with "
|
||||
"`pip install yandex-speechkit`"
|
||||
)
|
||||
try:
|
||||
from pydub import AudioSegment
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"pydub package not found, please install it with " "`pip install pydub`"
|
||||
)
|
||||
|
||||
if self.api_key:
|
||||
configure_credentials(
|
||||
yandex_credentials=creds.YandexCredentials(api_key=self.api_key)
|
||||
)
|
||||
else:
|
||||
configure_credentials(
|
||||
yandex_credentials=creds.YandexCredentials(iam_token=self.iam_token)
|
||||
)
|
||||
|
||||
audio = AudioSegment.from_file(blob.path)
|
||||
|
||||
model = model_repository.recognition_model()
|
||||
|
||||
model.model = self.model
|
||||
model.language = self.language
|
||||
model.audio_processing_type = AudioProcessingType.Full
|
||||
|
||||
result = model.transcribe(audio)
|
||||
|
||||
for res in result:
|
||||
yield Document(
|
||||
page_content=res.normalized_text,
|
||||
metadata={"source": blob.source},
|
||||
)
|
||||
|
Loading…
Reference in New Issue
Block a user