Add local support for audio models (PR #7329) (#7591)

- Description: run the poetry dependencies - Issue: #7329 - Dependencies: any dependencies required for this change, - Tag maintainer: @rlancemartin --------- Co-authored-by: Harrison Chase <hw.chase.17@gmail.com> Co-authored-by: Bagatur <baskaryan@gmail.com>
2025-09-07 22:11:51 +00:00 · 2023-08-02 09:24:53 +01:00
parent d2adec3818
commit 5c516945d0
4 changed files with 341 additions and 14 deletions
--- a/docs/extras/integrations/document_loaders/youtube_audio.ipynb
+++ b/docs/extras/integrations/document_loaders/youtube_audio.ipynb
@@ -1,6 +1,7 @@
 {
 "cells": [
  {
+   "attachments": {},
   "cell_type": "markdown",
   "id": "e48afb8d",
   "metadata": {},
@@ -11,7 +12,8 @@
    "\n",
    "Below we show how to easily go from a YouTube url to text to chat!\n",
    "\n",
-    "We wil use the `OpenAIWhisperParser`, which will use the OpenAI Whisper API to transcribe audio to text.\n",
+    "We wil use the `OpenAIWhisperParser`, which will use the OpenAI Whisper API to transcribe audio to text, \n",
+    "and the  `OpenAIWhisperParserLocal` for local support and running on private clouds or on premise.\n",
    "\n",
    "Note: You will need to have an `OPENAI_API_KEY` supplied."
   ]
@@ -24,7 +26,7 @@
   "outputs": [],
   "source": [
    "from langchain.document_loaders.generic import GenericLoader\n",
-    "from langchain.document_loaders.parsers import OpenAIWhisperParser\n",
+    "from langchain.document_loaders.parsers import OpenAIWhisperParser, OpenAIWhisperParserLocal\n",
    "from langchain.document_loaders.blob_loaders.youtube_audio import YoutubeAudioLoader"
   ]
  },
@@ -46,7 +48,8 @@
   "outputs": [],
   "source": [
    "! pip install yt_dlp\n",
-    "! pip install pydub"
+    "! pip install pydub\n",
+    "! pip install librosa"
   ]
  },
  {
@@ -63,6 +66,18 @@
    "Let's take the first lecture of Andrej Karpathy's YouTube course as an example! "
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8682f256",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# set a flag to switch between local and remote parsing\n",
+    "# change this to True if you want to use local parsing\n",
+    "local = False"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 2,
@@ -102,7 +117,10 @@
    "save_dir = \"~/Downloads/YouTube\"\n",
    "\n",
    "# Transcribe the videos to text\n",
-    "loader = GenericLoader(YoutubeAudioLoader(urls, save_dir), OpenAIWhisperParser())\n",
+    "if local:\n",
+    "    loader = GenericLoader(YoutubeAudioLoader(urls, save_dir), OpenAIWhisperParserLocal())\n",
+    "else:\n",
+    "    loader = GenericLoader(YoutubeAudioLoader(urls, save_dir), OpenAIWhisperParser())\n",
    "docs = loader.load()"
   ]
  },
@@ -275,7 +293,7 @@
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
@@ -289,7 +307,12 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.16"
+   "version": "3.10.11"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "97cc609b13305c559618ec78a438abc56230b9381f827f22d070313b9a1f3777"
+   }
  }
 },
 "nbformat": 4,