From cd198ac9edbd70262e2d7bf49eff5da66afbd3f7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C4=90=E1=BB=97=20Quang=20Minh?=
 <59284549+shinishiho@users.noreply.github.com>
Date: Mon, 17 Feb 2025 09:26:07 +0700
Subject: [PATCH] community: add custom model for OpenAIWhisperParser (#29831)

Add `model` properties for OpenAIWhisperParser. Defaulted to `whisper-1`
(previous value).
Please help me update the docs and other related components of this
repo.
---
 .../langchain_community/document_loaders/parsers/audio.py   | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/libs/community/langchain_community/document_loaders/parsers/audio.py b/libs/community/langchain_community/document_loaders/parsers/audio.py
index 6d35336300e..bdcf72b96b9 100644
--- a/libs/community/langchain_community/document_loaders/parsers/audio.py
+++ b/libs/community/langchain_community/document_loaders/parsers/audio.py
@@ -249,6 +249,7 @@ class OpenAIWhisperParser(BaseBlobParser):
             Literal["json", "text", "srt", "verbose_json", "vtt"], None
         ] = None,
         temperature: Union[float, None] = None,
+        model: str = "whisper-1",
     ):
         self.api_key = api_key
         self.chunk_duration_threshold = chunk_duration_threshold
@@ -259,6 +260,7 @@ class OpenAIWhisperParser(BaseBlobParser):
         self.prompt = prompt
         self.response_format = response_format
         self.temperature = temperature
+        self.model = model
 
     @property
     def _create_params(self) -> Dict[str, Any]:
@@ -324,10 +326,10 @@ class OpenAIWhisperParser(BaseBlobParser):
                 try:
                     if is_openai_v1():
                         transcript = client.audio.transcriptions.create(
-                            model="whisper-1", file=file_obj, **self._create_params
+                            model=self.model, file=file_obj, **self._create_params
                         )
                     else:
-                        transcript = openai.Audio.transcribe("whisper-1", file_obj)  # type: ignore[attr-defined]
+                        transcript = openai.Audio.transcribe(self.model, file_obj)  # type: ignore[attr-defined]
                     break
                 except Exception as e:
                     attempts += 1