homelab/voice-server/src/voice_server/tts.py

"""Text-to-speech service using Piper."""
import subprocess
import tempfile
import os
import logging
from pathlib import Path
from typing import Optional, Tuple
from .config import config

logger = logging.getLogger(__name__)


class TTSService:
    """Text-to-speech service using Piper."""

    def __init__(self):
        self.config = config
        self._validate_setup()

    def _validate_setup(self):
        """Validate that piper and voice models are available."""
        # Check if piper-tts is available
        try:
            result = subprocess.run(
                [self.config.piper_executable, "--help"],
                capture_output=True,
                timeout=10
            )
            if result.returncode != 0:
                raise RuntimeError(f"Piper TTS not working: {result.stderr.decode()}")
        except (subprocess.TimeoutExpired, FileNotFoundError) as e:
            raise RuntimeError(f"Piper TTS not found or not working: {e}")

        # Check if default voice model exists
        if not self.config.validate_voice_files():
            default_voice = self.config.default_voice
            model_path = self.config.get_voice_model_path()
            raise RuntimeError(
                f"Default voice '{default_voice}' model not found at {model_path}. "
                f"Please download the voice model files."
            )

        logger.info(f"TTS service initialized with voice: {self.config.default_voice}")

    def synthesize(
        self,
        text: str,
        voice: Optional[str] = None,
        speed: float = 1.0
    ) -> Tuple[bytes, str]:
        """
        Synthesize text to speech.

        Args:
            text: Text to synthesize
            voice: Voice to use (defaults to configured default)
            speed: Speech speed multiplier

        Returns:
            Tuple of (audio_data, audio_format)

        Raises:
            ValueError: If voice is not available
            RuntimeError: If synthesis fails
        """
        voice = voice or self.config.default_voice

        if not self.config.validate_voice_files(voice):
            available_voices = list(self.config.available_voices.keys())
            raise ValueError(
                f"Voice '{voice}' not available. Available voices: {available_voices}"
            )

        model_path = self.config.get_voice_model_path(voice)

        # Create temporary file for output
        with tempfile.NamedTemporaryFile(suffix=f".{self.config.audio_format}", delete=False) as temp_file:
            temp_path = temp_file.name

        try:
            # Build piper command
            cmd = [
                self.config.piper_executable,
                "-m", str(model_path),
                "-f", temp_path
            ]

            # Add speed if different from default
            if speed != 1.0:
                cmd.extend(["--length-scale", str(1.0 / speed)])

            logger.debug(f"Running piper command: {' '.join(cmd)}")

            # Run piper-tts
            process = subprocess.Popen(
                cmd,
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                text=True
            )

            stdout, stderr = process.communicate(input=text, timeout=30)

            if process.returncode != 0:
                raise RuntimeError(f"TTS synthesis failed: {stderr}")

            # Read the generated audio file
            with open(temp_path, "rb") as f:
                audio_data = f.read()

            if not audio_data:
                raise RuntimeError("Generated audio file is empty")

            logger.info(f"Successfully synthesized {len(text)} characters with voice '{voice}'")
            return audio_data, self.config.audio_format

        except subprocess.TimeoutExpired:
            process.kill()
            raise RuntimeError("TTS synthesis timed out")
        except Exception as e:
            logger.error(f"TTS synthesis error: {e}")
            raise
        finally:
            # Clean up temp file
            try:
                os.unlink(temp_path)
            except OSError:
                pass

    def list_voices(self) -> dict:
        """List available voices with their information."""
        voices = {}
        for voice_name, voice_config in self.config.available_voices.items():
            voices[voice_name] = {
                "name": voice_name,
                "language": voice_config["language"],
                "gender": voice_config["gender"],
                "description": voice_config["description"],
                "available": self.config.validate_voice_files(voice_name)
            }
        return voices

    def get_voice_info(self, voice_name: str) -> dict:
        """Get information about a specific voice."""
        if voice_name not in self.config.available_voices:
            raise ValueError(f"Voice '{voice_name}' not found")

        voice_config = self.config.available_voices[voice_name]
        return {
            "name": voice_name,
            "language": voice_config["language"],
            "gender": voice_config["gender"],
            "description": voice_config["description"],
            "available": self.config.validate_voice_files(voice_name),
            "model_path": str(self.config.get_voice_model_path(voice_name)),
            "config_path": str(self.config.get_voice_config_path(voice_name))
        }