Add yandex stt parser (#11435)

Description: Introducing an ability to load a transcription document of audio file using [Yandex SpeechKit](https://cloud.yandex.com/en-ru/services/speechkit) Issue: None Dependencies: yandex-speechkit Tag maintainer: @rlancemartin, @eyurtsev
2025-08-23 03:22:38 +00:00 · 2023-10-12 18:42:03 +03:00 · 2023-10-12 18:42:03 +03:00 · ead9d5b55c
commit ead9d5b55c
parent 15687a28d5
1 changed files with 78 additions and 0 deletions
--- a/libs/langchain/langchain/document_loaders/parsers/audio.py
+++ b/libs/langchain/langchain/document_loaders/parsers/audio.py
@ -219,3 +219,81 @@ class OpenAIWhisperParserLocal(BaseBlobParser):
            page_content=prediction,
            metadata={"source": blob.source},
        )
 class YandexSTTParser(BaseBlobParser):
    """Transcribe and parse audio files.
    Audio transcription is with OpenAI Whisper model."""
    def __init__(
        self,
        *,
        api_key: Optional[str] = None,
        iam_token: Optional[str] = None,
        model: str = "general",
        language: str = "auto",
    ):
        """Initialize the parser.
        Args:
            api_key: API key for a service account
            with the `ai.speechkit-stt.user` role.
            iam_token: IAM token for a service account
            with the `ai.speechkit-stt.user` role.
            model: Recognition model name.
              Defaults to general.
            language: The language in ISO 639-1 format.
              Defaults to automatic language recognition.
        Either `api_key` or `iam_token` must be provided, but not both.
        """
        if (api_key is None) == (iam_token is None):
            raise ValueError(
                "Either 'api_key' or 'iam_token' must be provided, but not both."
            )
        self.api_key = api_key
        self.iam_token = iam_token
        self.model = model
        self.language = language
    def lazy_parse(self, blob: Blob) -> Iterator[Document]:
        """Lazily parse the blob."""
        try:
            from speechkit import configure_credentials, creds, model_repository
            from speechkit.stt import AudioProcessingType
        except ImportError:
            raise ImportError(
                "yandex-speechkit package not found, please install it with "
                "`pip install yandex-speechkit`"
            )
        try:
            from pydub import AudioSegment
        except ImportError:
            raise ImportError(
                "pydub package not found, please install it with " "`pip install pydub`"
            )
        if self.api_key:
            configure_credentials(
                yandex_credentials=creds.YandexCredentials(api_key=self.api_key)
            )
        else:
            configure_credentials(
                yandex_credentials=creds.YandexCredentials(iam_token=self.iam_token)
            )
        audio = AudioSegment.from_file(blob.path)
        model = model_repository.recognition_model()
        model.model = self.model
        model.language = self.language
        model.audio_processing_type = AudioProcessingType.Full
        result = model.transcribe(audio)
        for res in result:
            yield Document(
                page_content=res.normalized_text,
                metadata={"source": blob.source},
            )