mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-07 21:50:25 +00:00
Add yandex stt parser (#11435)
Description: Introducing an ability to load a transcription document of audio file using [Yandex SpeechKit](https://cloud.yandex.com/en-ru/services/speechkit) Issue: None Dependencies: yandex-speechkit Tag maintainer: @rlancemartin, @eyurtsev
This commit is contained in:
parent
15687a28d5
commit
ead9d5b55c
@ -219,3 +219,81 @@ class OpenAIWhisperParserLocal(BaseBlobParser):
|
|||||||
page_content=prediction,
|
page_content=prediction,
|
||||||
metadata={"source": blob.source},
|
metadata={"source": blob.source},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class YandexSTTParser(BaseBlobParser):
|
||||||
|
"""Transcribe and parse audio files.
|
||||||
|
Audio transcription is with OpenAI Whisper model."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
api_key: Optional[str] = None,
|
||||||
|
iam_token: Optional[str] = None,
|
||||||
|
model: str = "general",
|
||||||
|
language: str = "auto",
|
||||||
|
):
|
||||||
|
"""Initialize the parser.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
api_key: API key for a service account
|
||||||
|
with the `ai.speechkit-stt.user` role.
|
||||||
|
iam_token: IAM token for a service account
|
||||||
|
with the `ai.speechkit-stt.user` role.
|
||||||
|
model: Recognition model name.
|
||||||
|
Defaults to general.
|
||||||
|
language: The language in ISO 639-1 format.
|
||||||
|
Defaults to automatic language recognition.
|
||||||
|
Either `api_key` or `iam_token` must be provided, but not both.
|
||||||
|
"""
|
||||||
|
if (api_key is None) == (iam_token is None):
|
||||||
|
raise ValueError(
|
||||||
|
"Either 'api_key' or 'iam_token' must be provided, but not both."
|
||||||
|
)
|
||||||
|
self.api_key = api_key
|
||||||
|
self.iam_token = iam_token
|
||||||
|
self.model = model
|
||||||
|
self.language = language
|
||||||
|
|
||||||
|
def lazy_parse(self, blob: Blob) -> Iterator[Document]:
|
||||||
|
"""Lazily parse the blob."""
|
||||||
|
|
||||||
|
try:
|
||||||
|
from speechkit import configure_credentials, creds, model_repository
|
||||||
|
from speechkit.stt import AudioProcessingType
|
||||||
|
except ImportError:
|
||||||
|
raise ImportError(
|
||||||
|
"yandex-speechkit package not found, please install it with "
|
||||||
|
"`pip install yandex-speechkit`"
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
from pydub import AudioSegment
|
||||||
|
except ImportError:
|
||||||
|
raise ImportError(
|
||||||
|
"pydub package not found, please install it with " "`pip install pydub`"
|
||||||
|
)
|
||||||
|
|
||||||
|
if self.api_key:
|
||||||
|
configure_credentials(
|
||||||
|
yandex_credentials=creds.YandexCredentials(api_key=self.api_key)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
configure_credentials(
|
||||||
|
yandex_credentials=creds.YandexCredentials(iam_token=self.iam_token)
|
||||||
|
)
|
||||||
|
|
||||||
|
audio = AudioSegment.from_file(blob.path)
|
||||||
|
|
||||||
|
model = model_repository.recognition_model()
|
||||||
|
|
||||||
|
model.model = self.model
|
||||||
|
model.language = self.language
|
||||||
|
model.audio_processing_type = AudioProcessingType.Full
|
||||||
|
|
||||||
|
result = model.transcribe(audio)
|
||||||
|
|
||||||
|
for res in result:
|
||||||
|
yield Document(
|
||||||
|
page_content=res.normalized_text,
|
||||||
|
metadata={"source": blob.source},
|
||||||
|
)
|
||||||
|
Loading…
Reference in New Issue
Block a user