From cd198ac9edbd70262e2d7bf49eff5da66afbd3f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C4=90=E1=BB=97=20Quang=20Minh?= <59284549+shinishiho@users.noreply.github.com> Date: Mon, 17 Feb 2025 09:26:07 +0700 Subject: [PATCH] community: add custom model for OpenAIWhisperParser (#29831) Add `model` properties for OpenAIWhisperParser. Defaulted to `whisper-1` (previous value). Please help me update the docs and other related components of this repo. --- .../langchain_community/document_loaders/parsers/audio.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/libs/community/langchain_community/document_loaders/parsers/audio.py b/libs/community/langchain_community/document_loaders/parsers/audio.py index 6d35336300e..bdcf72b96b9 100644 --- a/libs/community/langchain_community/document_loaders/parsers/audio.py +++ b/libs/community/langchain_community/document_loaders/parsers/audio.py @@ -249,6 +249,7 @@ class OpenAIWhisperParser(BaseBlobParser): Literal["json", "text", "srt", "verbose_json", "vtt"], None ] = None, temperature: Union[float, None] = None, + model: str = "whisper-1", ): self.api_key = api_key self.chunk_duration_threshold = chunk_duration_threshold @@ -259,6 +260,7 @@ class OpenAIWhisperParser(BaseBlobParser): self.prompt = prompt self.response_format = response_format self.temperature = temperature + self.model = model @property def _create_params(self) -> Dict[str, Any]: @@ -324,10 +326,10 @@ class OpenAIWhisperParser(BaseBlobParser): try: if is_openai_v1(): transcript = client.audio.transcriptions.create( - model="whisper-1", file=file_obj, **self._create_params + model=self.model, file=file_obj, **self._create_params ) else: - transcript = openai.Audio.transcribe("whisper-1", file_obj) # type: ignore[attr-defined] + transcript = openai.Audio.transcribe(self.model, file_obj) # type: ignore[attr-defined] break except Exception as e: attempts += 1