Source code for voicebox.tts.googlecloudtts

from dataclasses import dataclass, field
from io import BytesIO

from google.api_core import gapic_v1
from google.cloud.texttospeech import (
    AudioConfig,
    AudioEncoding,
    SynthesisInput,
    TextToSpeechClient,
    VoiceSelectionParams,
)

from voicebox.audio import Audio
from voicebox.ssml import SSML
from voicebox.tts.tts import TTS
from voicebox.tts.utils import get_audio_from_wav_file
from voicebox.types import StrOrSSML


[docs] @dataclass class GoogleCloudTTS(TTS): """ TTS using `Google Cloud TTS <https://cloud.google.com/text-to-speech>`_. You will need to set up a Google Cloud project with billing enabled. See this `quickstart guide <https://cloud.google.com/text-to-speech/docs/create-audio-text-client-libraries#client-libraries-install-python>`_ to get started. Supports `SSML <https://www.w3.org/TR/speech-synthesis/>`_: ✔ (`docs <https://cloud.google.com/text-to-speech/docs/ssml>`_) """ client: TextToSpeechClient voice_params: VoiceSelectionParams audio_config: AudioConfig = field(default_factory=AudioConfig) timeout: float = gapic_v1.method.DEFAULT
[docs] def get_speech(self, text: StrOrSSML) -> Audio: self.audio_config.audio_encoding = AudioEncoding.LINEAR16 input_ = ( SynthesisInput(ssml=text) if isinstance(text, SSML) else SynthesisInput(text=text) ) response = self.client.synthesize_speech( input=input_, voice=self.voice_params, audio_config=self.audio_config, timeout=self.timeout, ) with BytesIO(response.audio_content) as wav_file: return get_audio_from_wav_file(wav_file)