mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-13 21:47:12 +00:00
feat: Add Google Cloud Text-to-Speech Tool (#12572)
- Add Tool for [Google Cloud Text-to-Speech](https://cloud.google.com/text-to-speech) - Follows similar structure to [Eleven Labs Text2Speech](https://python.langchain.com/docs/integrations/tools/eleven_labs_tts) --------- Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
@@ -33,6 +33,7 @@ from langchain.tools.pubmed.tool import PubmedQueryRun
|
||||
from langchain.tools.base import BaseTool
|
||||
from langchain.tools.bing_search.tool import BingSearchRun
|
||||
from langchain.tools.ddg_search.tool import DuckDuckGoSearchRun
|
||||
from langchain.tools.google_cloud.texttospeech import GoogleCloudTextToSpeechTool
|
||||
from langchain.tools.google_search.tool import GoogleSearchResults, GoogleSearchRun
|
||||
from langchain.tools.google_scholar.tool import GoogleScholarQueryRun
|
||||
from langchain.tools.metaphor_search.tool import MetaphorSearchResults
|
||||
@@ -326,6 +327,10 @@ def _get_eleven_labs_text2speech(**kwargs: Any) -> BaseTool:
|
||||
return ElevenLabsText2SpeechTool(**kwargs)
|
||||
|
||||
|
||||
def _get_google_cloud_texttospeech(**kwargs: Any) -> BaseTool:
|
||||
return GoogleCloudTextToSpeechTool(**kwargs)
|
||||
|
||||
|
||||
_EXTRA_LLM_TOOLS: Dict[
|
||||
str,
|
||||
Tuple[Callable[[Arg(BaseLanguageModel, "llm"), KwArg(Any)], BaseTool], List[str]],
|
||||
@@ -390,6 +395,7 @@ _EXTRA_OPTIONAL_TOOLS: Dict[str, Tuple[Callable[[KwArg(Any)], BaseTool], List[st
|
||||
["api_login", "api_password", "aiosession"],
|
||||
),
|
||||
"eleven_labs_text2speech": (_get_eleven_labs_text2speech, ["eleven_api_key"]),
|
||||
"google_cloud_texttospeech": (_get_google_cloud_texttospeech, []),
|
||||
}
|
||||
|
||||
|
||||
|
@@ -240,6 +240,12 @@ def _import_gmail_GmailSendMessage() -> Any:
|
||||
return GmailSendMessage
|
||||
|
||||
|
||||
def _import_google_cloud_texttospeech() -> Any:
|
||||
from langchain.tools.google_cloud.texttospeech import GoogleCloudTextToSpeechTool
|
||||
|
||||
return GoogleCloudTextToSpeechTool
|
||||
|
||||
|
||||
def _import_google_places_tool() -> Any:
|
||||
from langchain.tools.google_places.tool import GooglePlacesTool
|
||||
|
||||
@@ -731,6 +737,8 @@ def __getattr__(name: str) -> Any:
|
||||
return _import_gmail_GmailSearch()
|
||||
elif name == "GmailSendMessage":
|
||||
return _import_gmail_GmailSendMessage()
|
||||
elif name == "GoogleCloudTextToSpeechTool":
|
||||
return _import_google_cloud_texttospeech()
|
||||
elif name == "GooglePlacesTool":
|
||||
return _import_google_places_tool()
|
||||
elif name == "GoogleSearchResults":
|
||||
@@ -916,6 +924,7 @@ __all__ = [
|
||||
"GmailGetThread",
|
||||
"GmailSearch",
|
||||
"GmailSendMessage",
|
||||
"GoogleCloudTextToSpeechTool",
|
||||
"GooglePlacesTool",
|
||||
"GoogleSearchResults",
|
||||
"GoogleSearchRun",
|
||||
|
5
libs/langchain/langchain/tools/google_cloud/__init__.py
Normal file
5
libs/langchain/langchain/tools/google_cloud/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
"""Google Cloud Tools."""
|
||||
|
||||
from langchain.tools.google_cloud.texttospeech import GoogleCloudTextToSpeechTool
|
||||
|
||||
__all__ = ["GoogleCloudTextToSpeechTool"]
|
90
libs/langchain/langchain/tools/google_cloud/texttospeech.py
Normal file
90
libs/langchain/langchain/tools/google_cloud/texttospeech.py
Normal file
@@ -0,0 +1,90 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import tempfile
|
||||
from typing import TYPE_CHECKING, Any, Optional
|
||||
|
||||
from langchain.callbacks.manager import CallbackManagerForToolRun
|
||||
from langchain.tools.base import BaseTool
|
||||
from langchain.utilities.vertexai import get_client_info
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from google.cloud import texttospeech
|
||||
|
||||
|
||||
def _import_google_cloud_texttospeech() -> Any:
|
||||
try:
|
||||
from google.cloud import texttospeech
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Cannot import google.cloud.texttospeech, please install "
|
||||
"`pip install google-cloud-texttospeech`."
|
||||
) from e
|
||||
return texttospeech
|
||||
|
||||
|
||||
def _encoding_file_extension_map(encoding: texttospeech.AudioEncoding) -> Optional[str]:
|
||||
texttospeech = _import_google_cloud_texttospeech()
|
||||
|
||||
ENCODING_FILE_EXTENSION_MAP = {
|
||||
texttospeech.AudioEncoding.LINEAR16: ".wav",
|
||||
texttospeech.AudioEncoding.MP3: ".mp3",
|
||||
texttospeech.AudioEncoding.OGG_OPUS: ".ogg",
|
||||
texttospeech.AudioEncoding.MULAW: ".wav",
|
||||
texttospeech.AudioEncoding.ALAW: ".wav",
|
||||
}
|
||||
return ENCODING_FILE_EXTENSION_MAP.get(encoding)
|
||||
|
||||
|
||||
class GoogleCloudTextToSpeechTool(BaseTool):
|
||||
"""Tool that queries the Google Cloud Text to Speech API.
|
||||
|
||||
In order to set this up, follow instructions at:
|
||||
https://cloud.google.com/text-to-speech/docs/before-you-begin
|
||||
"""
|
||||
|
||||
name: str = "google_cloud_texttospeech"
|
||||
description: str = (
|
||||
"A wrapper around Google Cloud Text-to-Speech. "
|
||||
"Useful for when you need to synthesize audio from text. "
|
||||
"It supports multiple languages, including English, German, Polish, "
|
||||
"Spanish, Italian, French, Portuguese, and Hindi. "
|
||||
)
|
||||
|
||||
_client: Any
|
||||
|
||||
def __init__(self, **kwargs: Any) -> None:
|
||||
"""Initializes private fields."""
|
||||
texttospeech = _import_google_cloud_texttospeech()
|
||||
|
||||
super().__init__(**kwargs)
|
||||
|
||||
self._client = texttospeech.TextToSpeechClient(
|
||||
client_info=get_client_info(module="text-to-speech")
|
||||
)
|
||||
|
||||
def _run(
|
||||
self,
|
||||
input_text: str,
|
||||
language_code: str = "en-US",
|
||||
ssml_gender: Optional[texttospeech.SsmlVoiceGender] = None,
|
||||
audio_encoding: Optional[texttospeech.AudioEncoding] = None,
|
||||
run_manager: Optional[CallbackManagerForToolRun] = None,
|
||||
) -> str:
|
||||
"""Use the tool."""
|
||||
texttospeech = _import_google_cloud_texttospeech()
|
||||
ssml_gender = ssml_gender or texttospeech.SsmlVoiceGender.NEUTRAL
|
||||
audio_encoding = audio_encoding or texttospeech.AudioEncoding.MP3
|
||||
|
||||
response = self._client.synthesize_speech(
|
||||
input=texttospeech.SynthesisInput(text=input_text),
|
||||
voice=texttospeech.VoiceSelectionParams(
|
||||
language_code=language_code, ssml_gender=ssml_gender
|
||||
),
|
||||
audio_config=texttospeech.AudioConfig(audio_encoding=audio_encoding),
|
||||
)
|
||||
|
||||
suffix = _encoding_file_extension_map(audio_encoding)
|
||||
|
||||
with tempfile.NamedTemporaryFile(mode="bx", suffix=suffix, delete=False) as f:
|
||||
f.write(response.audio_content)
|
||||
return f.name
|
@@ -46,6 +46,7 @@ _EXPECTED = [
|
||||
"GmailGetThread",
|
||||
"GmailSearch",
|
||||
"GmailSendMessage",
|
||||
"GoogleCloudTextToSpeechTool",
|
||||
"GooglePlacesTool",
|
||||
"GoogleSearchResults",
|
||||
"GoogleSearchRun",
|
||||
|
Reference in New Issue
Block a user