From a0970d8d7ec2c6a2e662d4593b70b81c6365a4f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thor=20=E9=9B=B7=E7=A5=9E=20Schaeff?= <5748289+thorwebdev@users.noreply.github.com> Date: Thu, 13 Feb 2025 09:54:34 +0800 Subject: [PATCH] [WIP] chore: update ElevenLabs tool. (#29722) Thank you for contributing to LangChain! - [ ] **PR title**: "package: description" - Where "package" is whichever of langchain, community, core, etc. is being modified. Use "docs: ..." for purely docs changes, "infra: ..." for CI changes. - Example: "community: add foobar LLM" - [ ] **PR message**: ***Delete this entire checklist*** and replace with - **Description:** a description of the change - **Issue:** the issue # it fixes, if applicable - **Dependencies:** any dependencies required for this change - **Twitter handle:** if your PR gets announced, and you'd like a mention, we'll gladly shout you out! - [ ] **Add tests and docs**: If you're adding a new integration, please include 1. a test for the integration, preferably unit tests that do not rely on network access, 2. an example notebook showing its use. It lives in `docs/docs/integrations` directory. - [ ] **Lint and test**: Run `make format`, `make lint` and `make test` from the root of the package(s) you've modified. See contribution guidelines for more: https://python.langchain.com/docs/contributing/ Additional guidelines: - Make sure optional dependencies are imported within a function. - Please do not add dependencies to pyproject.toml files (even optional ones) unless they are required for unit tests. - Most PRs should not touch more than one package. - Changes should be backwards compatible. - If you are adding something to community, do not re-import it in langchain. If no one reviews your PR within a few days, please @-mention one of baskaryan, efriis, eyurtsev, ccurme, vbarda, hwchase17. --------- Co-authored-by: Chester Curme --- .../integrations/tools/eleven_labs_tts.ipynb | 4 +-- .../agent_toolkits/load_tools.py | 2 +- .../tools/eleven_labs/models.py | 5 ++-- .../tools/eleven_labs/text2speech.py | 27 +++++++++++++------ 4 files changed, 25 insertions(+), 13 deletions(-) diff --git a/docs/docs/integrations/tools/eleven_labs_tts.ipynb b/docs/docs/integrations/tools/eleven_labs_tts.ipynb index ff70757d258..ef1dc59e511 100644 --- a/docs/docs/integrations/tools/eleven_labs_tts.ipynb +++ b/docs/docs/integrations/tools/eleven_labs_tts.ipynb @@ -5,7 +5,7 @@ "id": "a991a6f8-1897-4f49-a191-ae3bdaeda856", "metadata": {}, "source": [ - "# Eleven Labs Text2Speech\n", + "# ElevenLabs Text2Speech\n", "\n", "This notebook shows how to interact with the `ElevenLabs API` to achieve text-to-speech capabilities." ] @@ -37,7 +37,7 @@ "source": [ "import os\n", "\n", - "os.environ[\"ELEVEN_API_KEY\"] = \"\"" + "os.environ[\"ELEVENLABS_API_KEY\"] = \"\"" ] }, { diff --git a/libs/community/langchain_community/agent_toolkits/load_tools.py b/libs/community/langchain_community/agent_toolkits/load_tools.py index d7f5fc3cfd1..db3f7a425b3 100644 --- a/libs/community/langchain_community/agent_toolkits/load_tools.py +++ b/libs/community/langchain_community/agent_toolkits/load_tools.py @@ -557,7 +557,7 @@ _EXTRA_OPTIONAL_TOOLS: Dict[str, Tuple[Callable[[KwArg(Any)], BaseTool], List[st _get_dataforseo_api_search_json, ["api_login", "api_password", "aiosession"], ), - "eleven_labs_text2speech": (_get_eleven_labs_text2speech, ["eleven_api_key"]), + "eleven_labs_text2speech": (_get_eleven_labs_text2speech, ["elevenlabs_api_key"]), "google_cloud_texttospeech": (_get_google_cloud_texttospeech, []), "read_file": (_get_file_management_tool, []), "reddit_search": ( diff --git a/libs/community/langchain_community/tools/eleven_labs/models.py b/libs/community/langchain_community/tools/eleven_labs/models.py index c977b2972f7..72e699a7810 100644 --- a/libs/community/langchain_community/tools/eleven_labs/models.py +++ b/libs/community/langchain_community/tools/eleven_labs/models.py @@ -4,5 +4,6 @@ from enum import Enum class ElevenLabsModel(str, Enum): """Models available for Eleven Labs Text2Speech.""" - MULTI_LINGUAL = "eleven_multilingual_v1" - MONO_LINGUAL = "eleven_monolingual_v1" + MULTI_LINGUAL = "eleven_multilingual_v2" + MULTI_LINGUAL_FLASH = "eleven_flash_v2_5" + MONO_LINGUAL = "eleven_flash_v2" diff --git a/libs/community/langchain_community/tools/eleven_labs/text2speech.py b/libs/community/langchain_community/tools/eleven_labs/text2speech.py index 9f56647caf5..b6e51061a71 100644 --- a/libs/community/langchain_community/tools/eleven_labs/text2speech.py +++ b/libs/community/langchain_community/tools/eleven_labs/text2speech.py @@ -21,24 +21,26 @@ def _import_elevenlabs() -> Any: class ElevenLabsModel(str, Enum): """Models available for Eleven Labs Text2Speech.""" - MULTI_LINGUAL = "eleven_multilingual_v1" - MONO_LINGUAL = "eleven_monolingual_v1" + MULTI_LINGUAL = "eleven_multilingual_v2" + MULTI_LINGUAL_FLASH = "eleven_flash_v2_5" + MONO_LINGUAL = "eleven_flash_v2" class ElevenLabsText2SpeechTool(BaseTool): # type: ignore[override] """Tool that queries the Eleven Labs Text2Speech API. In order to set this up, follow instructions at: - https://docs.elevenlabs.io/welcome/introduction + https://elevenlabs.io/docs """ model: Union[ElevenLabsModel, str] = ElevenLabsModel.MULTI_LINGUAL + voice: str = "JBFqnCBsd6RMkjVDRZzb" name: str = "eleven_labs_text2speech" description: str = ( "A wrapper around Eleven Labs Text2Speech. " "Useful for when you need to convert text to speech. " - "It supports multiple languages, including English, German, Polish, " + "It supports more than 30 languages, including English, German, Polish, " "Spanish, Italian, French, Portuguese, and Hindi. " ) @@ -46,7 +48,7 @@ class ElevenLabsText2SpeechTool(BaseTool): # type: ignore[override] @classmethod def validate_environment(cls, values: Dict) -> Any: """Validate that api key exists in environment.""" - _ = get_from_dict_or_env(values, "eleven_api_key", "ELEVEN_API_KEY") + _ = get_from_dict_or_env(values, "elevenlabs_api_key", "ELEVENLABS_API_KEY") return values @@ -55,10 +57,16 @@ class ElevenLabsText2SpeechTool(BaseTool): # type: ignore[override] ) -> str: """Use the tool.""" elevenlabs = _import_elevenlabs() + client = elevenlabs.client.ElevenLabs() try: - speech = elevenlabs.generate(text=query, model=self.model) + speech = client.text_to_speech.convert( + text=query, + model_id=self.model, + voice_id=self.voice, + output_format="mp3_44100_128", + ) with tempfile.NamedTemporaryFile( - mode="bx", suffix=".wav", delete=False + mode="bx", suffix=".mp3", delete=False ) as f: f.write(speech) return f.name @@ -77,5 +85,8 @@ class ElevenLabsText2SpeechTool(BaseTool): # type: ignore[override] """Stream the text as speech as it is generated. Play the text in your speakers.""" elevenlabs = _import_elevenlabs() - speech_stream = elevenlabs.generate(text=query, model=self.model, stream=True) + client = elevenlabs.client.ElevenLabs() + speech_stream = client.text_to_speech.convert_as_stream( + text=query, model_id=self.model, voice_id=self.voice + ) elevenlabs.stream(speech_stream)