Source code for voicebox.tts.googlecloudtts

from dataclasses import dataclass, field
from io import BytesIO

from google.api_core import gapic_v1
from google.cloud.texttospeech import (
    AudioConfig,
    AudioEncoding,
    SynthesisInput,
    TextToSpeechClient,
    VoiceSelectionParams,
)

from voicebox.audio import Audio
from voicebox.ssml import SSML
from voicebox.tts.tts import TTS
from voicebox.tts.utils import get_audio_from_wav_file
from voicebox.types import StrOrSSML



[docs]
@dataclass
class GoogleCloudTTS(TTS):
    """
    TTS using `Google Cloud TTS <https://cloud.google.com/text-to-speech>`_.

    You will need to set up a Google Cloud project with billing enabled.
    See this `quickstart guide
    <https://cloud.google.com/text-to-speech/docs/create-audio-text-client-libraries#client-libraries-install-python>`_
    to get started.

    Supports `SSML <https://www.w3.org/TR/speech-synthesis/>`_: ✔
    (`docs <https://cloud.google.com/text-to-speech/docs/ssml>`_)
    """

    client: TextToSpeechClient
    voice_params: VoiceSelectionParams
    audio_config: AudioConfig = field(default_factory=AudioConfig)

    timeout: float = gapic_v1.method.DEFAULT


[docs]
    def get_speech(self, text: StrOrSSML) -> Audio:
        self.audio_config.audio_encoding = AudioEncoding.LINEAR16

        input_ = (
            SynthesisInput(ssml=text)
            if isinstance(text, SSML)
            else SynthesisInput(text=text)
        )

        response = self.client.synthesize_speech(
            input=input_,
            voice=self.voice_params,
            audio_config=self.audio_config,
            timeout=self.timeout,
        )

        with BytesIO(response.audio_content) as wav_file:
            return get_audio_from_wav_file(wav_file)