mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-09-15 14:11:14 +00:00
refactor: The first refactored version for sdk release (#907)
Co-authored-by: chengfangyin2 <chengfangyin3@jd.com>
This commit is contained in:
89
dbgpt/util/speech/eleven_labs.py
Normal file
89
dbgpt/util/speech/eleven_labs.py
Normal file
@@ -0,0 +1,89 @@
|
||||
"""ElevenLabs speech module"""
|
||||
import os
|
||||
import logging
|
||||
import requests
|
||||
|
||||
from dbgpt._private.config import Config
|
||||
from dbgpt.util.speech.base import VoiceBase
|
||||
|
||||
PLACEHOLDERS = {"your-voice-id"}
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ElevenLabsSpeech(VoiceBase):
|
||||
"""ElevenLabs speech class"""
|
||||
|
||||
def _setup(self) -> None:
|
||||
"""Set up the voices, API key, etc.
|
||||
|
||||
Returns:
|
||||
None: None
|
||||
"""
|
||||
|
||||
cfg = Config()
|
||||
default_voices = ["ErXwobaYiN019PkySvjV", "EXAVITQu4vr4xnSDxMaL"]
|
||||
voice_options = {
|
||||
"Rachel": "21m00Tcm4TlvDq8ikWAM",
|
||||
"Domi": "AZnzlk1XvdvUeBnXmlld",
|
||||
"Bella": "EXAVITQu4vr4xnSDxMaL",
|
||||
"Antoni": "ErXwobaYiN019PkySvjV",
|
||||
"Elli": "MF3mGyEYCl7XYWbV9V6O",
|
||||
"Josh": "TxGEqnHWrfWFTfGW9XjX",
|
||||
"Arnold": "VR6AewLTigWG4xSOukaG",
|
||||
"Adam": "pNInz6obpgDQGcFmaJgB",
|
||||
"Sam": "yoZ06aMxZJJ28mfd3POQ",
|
||||
}
|
||||
self._headers = {
|
||||
"Content-Type": "application/json",
|
||||
"xi-api_v1-key": cfg.elevenlabs_api_key,
|
||||
}
|
||||
self._voices = default_voices.copy()
|
||||
if cfg.elevenlabs_voice_1_id in voice_options:
|
||||
cfg.elevenlabs_voice_1_id = voice_options[cfg.elevenlabs_voice_1_id]
|
||||
if cfg.elevenlabs_voice_2_id in voice_options:
|
||||
cfg.elevenlabs_voice_2_id = voice_options[cfg.elevenlabs_voice_2_id]
|
||||
self._use_custom_voice(cfg.elevenlabs_voice_1_id, 0)
|
||||
self._use_custom_voice(cfg.elevenlabs_voice_2_id, 1)
|
||||
|
||||
def _use_custom_voice(self, voice, voice_index) -> None:
|
||||
"""Use a custom voice if provided and not a placeholder
|
||||
|
||||
Args:
|
||||
voice (str): The voice ID
|
||||
voice_index (int): The voice index
|
||||
|
||||
Returns:
|
||||
None: None
|
||||
"""
|
||||
# Placeholder values that should be treated as empty
|
||||
if voice and voice not in PLACEHOLDERS:
|
||||
self._voices[voice_index] = voice
|
||||
|
||||
def _speech(self, text: str, voice_index: int = 0) -> bool:
|
||||
"""Speak text using elevenlabs.io's API
|
||||
|
||||
Args:
|
||||
text (str): The text to speak
|
||||
voice_index (int, optional): The voice to use. Defaults to 0.
|
||||
|
||||
Returns:
|
||||
bool: True if the request was successful, False otherwise
|
||||
"""
|
||||
from playsound import playsound
|
||||
|
||||
tts_url = (
|
||||
f"https://api.elevenlabs.io/v1/text-to-speech/{self._voices[voice_index]}"
|
||||
)
|
||||
response = requests.post(tts_url, headers=self._headers, json={"text": text})
|
||||
|
||||
if response.status_code == 200:
|
||||
with open("speech.mpeg", "wb") as f:
|
||||
f.write(response.content)
|
||||
playsound("speech.mpeg", True)
|
||||
os.remove("speech.mpeg")
|
||||
return True
|
||||
else:
|
||||
logger.warn("Request failed with status code:", response.status_code)
|
||||
logger.info("Response content:", response.content)
|
||||
return False
|
Reference in New Issue
Block a user