feat: Add Google Cloud Text-to-Speech Tool (#12572)

- Add Tool for [Google Cloud
Text-to-Speech](https://cloud.google.com/text-to-speech)
- Follows similar structure to [Eleven Labs
Text2Speech](https://python.langchain.com/docs/integrations/tools/eleven_labs_tts)

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
Holt Skinner
2023-10-30 16:05:39 -05:00
committed by GitHub
parent 1f2c672d4a
commit e53b9ccd70
6 changed files with 205 additions and 0 deletions

View File

@@ -33,6 +33,7 @@ from langchain.tools.pubmed.tool import PubmedQueryRun
from langchain.tools.base import BaseTool
from langchain.tools.bing_search.tool import BingSearchRun
from langchain.tools.ddg_search.tool import DuckDuckGoSearchRun
from langchain.tools.google_cloud.texttospeech import GoogleCloudTextToSpeechTool
from langchain.tools.google_search.tool import GoogleSearchResults, GoogleSearchRun
from langchain.tools.google_scholar.tool import GoogleScholarQueryRun
from langchain.tools.metaphor_search.tool import MetaphorSearchResults
@@ -326,6 +327,10 @@ def _get_eleven_labs_text2speech(**kwargs: Any) -> BaseTool:
return ElevenLabsText2SpeechTool(**kwargs)
def _get_google_cloud_texttospeech(**kwargs: Any) -> BaseTool:
return GoogleCloudTextToSpeechTool(**kwargs)
_EXTRA_LLM_TOOLS: Dict[
str,
Tuple[Callable[[Arg(BaseLanguageModel, "llm"), KwArg(Any)], BaseTool], List[str]],
@@ -390,6 +395,7 @@ _EXTRA_OPTIONAL_TOOLS: Dict[str, Tuple[Callable[[KwArg(Any)], BaseTool], List[st
["api_login", "api_password", "aiosession"],
),
"eleven_labs_text2speech": (_get_eleven_labs_text2speech, ["eleven_api_key"]),
"google_cloud_texttospeech": (_get_google_cloud_texttospeech, []),
}

View File

@@ -240,6 +240,12 @@ def _import_gmail_GmailSendMessage() -> Any:
return GmailSendMessage
def _import_google_cloud_texttospeech() -> Any:
from langchain.tools.google_cloud.texttospeech import GoogleCloudTextToSpeechTool
return GoogleCloudTextToSpeechTool
def _import_google_places_tool() -> Any:
from langchain.tools.google_places.tool import GooglePlacesTool
@@ -731,6 +737,8 @@ def __getattr__(name: str) -> Any:
return _import_gmail_GmailSearch()
elif name == "GmailSendMessage":
return _import_gmail_GmailSendMessage()
elif name == "GoogleCloudTextToSpeechTool":
return _import_google_cloud_texttospeech()
elif name == "GooglePlacesTool":
return _import_google_places_tool()
elif name == "GoogleSearchResults":
@@ -916,6 +924,7 @@ __all__ = [
"GmailGetThread",
"GmailSearch",
"GmailSendMessage",
"GoogleCloudTextToSpeechTool",
"GooglePlacesTool",
"GoogleSearchResults",
"GoogleSearchRun",

View File

@@ -0,0 +1,5 @@
"""Google Cloud Tools."""
from langchain.tools.google_cloud.texttospeech import GoogleCloudTextToSpeechTool
__all__ = ["GoogleCloudTextToSpeechTool"]

View File

@@ -0,0 +1,90 @@
from __future__ import annotations
import tempfile
from typing import TYPE_CHECKING, Any, Optional
from langchain.callbacks.manager import CallbackManagerForToolRun
from langchain.tools.base import BaseTool
from langchain.utilities.vertexai import get_client_info
if TYPE_CHECKING:
from google.cloud import texttospeech
def _import_google_cloud_texttospeech() -> Any:
try:
from google.cloud import texttospeech
except ImportError as e:
raise ImportError(
"Cannot import google.cloud.texttospeech, please install "
"`pip install google-cloud-texttospeech`."
) from e
return texttospeech
def _encoding_file_extension_map(encoding: texttospeech.AudioEncoding) -> Optional[str]:
texttospeech = _import_google_cloud_texttospeech()
ENCODING_FILE_EXTENSION_MAP = {
texttospeech.AudioEncoding.LINEAR16: ".wav",
texttospeech.AudioEncoding.MP3: ".mp3",
texttospeech.AudioEncoding.OGG_OPUS: ".ogg",
texttospeech.AudioEncoding.MULAW: ".wav",
texttospeech.AudioEncoding.ALAW: ".wav",
}
return ENCODING_FILE_EXTENSION_MAP.get(encoding)
class GoogleCloudTextToSpeechTool(BaseTool):
"""Tool that queries the Google Cloud Text to Speech API.
In order to set this up, follow instructions at:
https://cloud.google.com/text-to-speech/docs/before-you-begin
"""
name: str = "google_cloud_texttospeech"
description: str = (
"A wrapper around Google Cloud Text-to-Speech. "
"Useful for when you need to synthesize audio from text. "
"It supports multiple languages, including English, German, Polish, "
"Spanish, Italian, French, Portuguese, and Hindi. "
)
_client: Any
def __init__(self, **kwargs: Any) -> None:
"""Initializes private fields."""
texttospeech = _import_google_cloud_texttospeech()
super().__init__(**kwargs)
self._client = texttospeech.TextToSpeechClient(
client_info=get_client_info(module="text-to-speech")
)
def _run(
self,
input_text: str,
language_code: str = "en-US",
ssml_gender: Optional[texttospeech.SsmlVoiceGender] = None,
audio_encoding: Optional[texttospeech.AudioEncoding] = None,
run_manager: Optional[CallbackManagerForToolRun] = None,
) -> str:
"""Use the tool."""
texttospeech = _import_google_cloud_texttospeech()
ssml_gender = ssml_gender or texttospeech.SsmlVoiceGender.NEUTRAL
audio_encoding = audio_encoding or texttospeech.AudioEncoding.MP3
response = self._client.synthesize_speech(
input=texttospeech.SynthesisInput(text=input_text),
voice=texttospeech.VoiceSelectionParams(
language_code=language_code, ssml_gender=ssml_gender
),
audio_config=texttospeech.AudioConfig(audio_encoding=audio_encoding),
)
suffix = _encoding_file_extension_map(audio_encoding)
with tempfile.NamedTemporaryFile(mode="bx", suffix=suffix, delete=False) as f:
f.write(response.audio_content)
return f.name

View File

@@ -46,6 +46,7 @@ _EXPECTED = [
"GmailGetThread",
"GmailSearch",
"GmailSendMessage",
"GoogleCloudTextToSpeechTool",
"GooglePlacesTool",
"GoogleSearchResults",
"GoogleSearchRun",