Add ElevenLabs text to speech tool (#10525)

This commit is contained in:
Bagatur
2023-09-12 23:11:04 -07:00
committed by GitHub
7 changed files with 328 additions and 0 deletions

View File

@@ -32,6 +32,7 @@ from langchain.tools.requests.tool import (
RequestsPostTool,
RequestsPutTool,
)
from langchain.tools.eleven_labs.text2speech import ElevenLabsText2SpeechTool
from langchain.tools.scenexplain.tool import SceneXplainTool
from langchain.tools.searx_search.tool import SearxSearchResults, SearxSearchRun
from langchain.tools.shell.tool import ShellTool
@@ -285,6 +286,10 @@ def _get_dataforseo_api_search_json(**kwargs: Any) -> BaseTool:
return DataForSeoAPISearchResults(api_wrapper=DataForSeoAPIWrapper(**kwargs))
def _get_eleven_labs_text2speech(**kwargs: Any) -> BaseTool:
return ElevenLabsText2SpeechTool(**kwargs)
_EXTRA_LLM_TOOLS: Dict[
str,
Tuple[Callable[[Arg(BaseLanguageModel, "llm"), KwArg(Any)], BaseTool], List[str]],
@@ -340,6 +345,7 @@ _EXTRA_OPTIONAL_TOOLS: Dict[str, Tuple[Callable[[KwArg(Any)], BaseTool], List[st
_get_dataforseo_api_search_json,
["api_login", "api_password", "aiosession"],
),
"eleven_labs_text2speech": (_get_eleven_labs_text2speech, ["eleven_api_key"]),
}

View File

@@ -44,6 +44,7 @@ from langchain.tools.edenai import (
EdenAiTextToSpeechTool,
EdenaiTool,
)
from langchain.tools.eleven_labs.text2speech import ElevenLabsText2SpeechTool
from langchain.tools.file_management import (
CopyFileTool,
DeleteFileTool,
@@ -167,6 +168,7 @@ __all__ = [
"EdenAiSpeechToTextTool",
"EdenAiTextModerationTool",
"EdenaiTool",
"ElevenLabsText2SpeechTool",
"ExtractHyperlinksTool",
"ExtractTextTool",
"FileSearchTool",

View File

@@ -0,0 +1,5 @@
"""Eleven Labs Services Tools."""
from langchain.tools.eleven_labs.text2speech import ElevenLabsText2SpeechTool
__all__ = ["ElevenLabsText2SpeechTool"]

View File

@@ -0,0 +1,8 @@
from enum import Enum
class ElevenLabsModel(str, Enum):
"""Models available for Eleven Labs Text2Speech."""
MULTI_LINGUAL = "eleven_multilingual_v1"
MONO_LINGUAL = "eleven_monolingual_v1"

View File

@@ -0,0 +1,80 @@
import tempfile
from enum import Enum
from typing import Any, Dict, Optional, Union
from langchain.callbacks.manager import CallbackManagerForToolRun
from langchain.pydantic_v1 import root_validator
from langchain.tools.base import BaseTool
from langchain.utils import get_from_dict_or_env
def _import_elevenlabs() -> Any:
try:
import elevenlabs
except ImportError as e:
raise ImportError(
"Cannot import elevenlabs, please install `pip install elevenlabs`."
) from e
return elevenlabs
class ElevenLabsModel(str, Enum):
"""Models available for Eleven Labs Text2Speech."""
MULTI_LINGUAL = "eleven_multilingual_v1"
MONO_LINGUAL = "eleven_monolingual_v1"
class ElevenLabsText2SpeechTool(BaseTool):
"""Tool that queries the Eleven Labs Text2Speech API.
In order to set this up, follow instructions at:
https://docs.elevenlabs.io/welcome/introduction
"""
model: Union[ElevenLabsModel, str] = ElevenLabsModel.MULTI_LINGUAL
name: str = "eleven_labs_text2speech"
description: str = (
"A wrapper around Eleven Labs Text2Speech. "
"Useful for when you need to convert text to speech. "
"It supports multiple languages, including English, German, Polish, "
"Spanish, Italian, French, Portuguese, and Hindi. "
)
@root_validator(pre=True)
def validate_environment(cls, values: Dict) -> Dict:
"""Validate that api key exists in environment."""
_ = get_from_dict_or_env(values, "eleven_api_key", "ELEVEN_API_KEY")
return values
def _run(
self, query: str, run_manager: Optional[CallbackManagerForToolRun] = None
) -> str:
"""Use the tool."""
elevenlabs = _import_elevenlabs()
try:
speech = elevenlabs.generate(text=query, model=self.model)
with tempfile.NamedTemporaryFile(
mode="bx", suffix=".wav", delete=False
) as f:
f.write(speech)
return f.name
except Exception as e:
raise RuntimeError(f"Error while running ElevenLabsText2SpeechTool: {e}")
def play(self, speech_file: str) -> None:
"""Play the text as speech."""
elevenlabs = _import_elevenlabs()
with open(speech_file, mode="rb") as f:
speech = f.read()
elevenlabs.play(speech)
def stream_speech(self, query: str) -> None:
"""Stream the text as speech as it is generated.
Play the text in your speakers."""
elevenlabs = _import_elevenlabs()
speech_stream = elevenlabs.generate(text=query, model=self.model, stream=True)
elevenlabs.stream(speech_stream)

View File

@@ -36,6 +36,7 @@ _EXPECTED = [
"EdenAiTextModerationTool",
"EdenAiTextToSpeechTool",
"EdenaiTool",
"ElevenLabsText2SpeechTool",
"ExtractHyperlinksTool",
"ExtractTextTool",
"FileSearchTool",