Files
homelab/voice-server/src/voice_server/tts.py
Arpad Krejczinger 572434d42e Add professional voice assistant server implementation
- FastAPI-based TTS server using Piper neural text-to-speech
- Poetry for dependency management and virtual environments
- OpenAI-compatible API endpoints for seamless integration
- Support for multiple voice models (Ryan, Alan, Lessac)
- Robust error handling and voice fallback system
- Professional logging and configuration management
- Docker-ready with proper Python packaging

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-17 14:56:01 +02:00

158 lines
5.5 KiB
Python

"""Text-to-speech service using Piper."""
import subprocess
import tempfile
import os
import logging
from pathlib import Path
from typing import Optional, Tuple
from .config import config
logger = logging.getLogger(__name__)
class TTSService:
"""Text-to-speech service using Piper."""
def __init__(self):
self.config = config
self._validate_setup()
def _validate_setup(self):
"""Validate that piper and voice models are available."""
# Check if piper-tts is available
try:
result = subprocess.run(
[self.config.piper_executable, "--help"],
capture_output=True,
timeout=10
)
if result.returncode != 0:
raise RuntimeError(f"Piper TTS not working: {result.stderr.decode()}")
except (subprocess.TimeoutExpired, FileNotFoundError) as e:
raise RuntimeError(f"Piper TTS not found or not working: {e}")
# Check if default voice model exists
if not self.config.validate_voice_files():
default_voice = self.config.default_voice
model_path = self.config.get_voice_model_path()
raise RuntimeError(
f"Default voice '{default_voice}' model not found at {model_path}. "
f"Please download the voice model files."
)
logger.info(f"TTS service initialized with voice: {self.config.default_voice}")
def synthesize(
self,
text: str,
voice: Optional[str] = None,
speed: float = 1.0
) -> Tuple[bytes, str]:
"""
Synthesize text to speech.
Args:
text: Text to synthesize
voice: Voice to use (defaults to configured default)
speed: Speech speed multiplier
Returns:
Tuple of (audio_data, audio_format)
Raises:
ValueError: If voice is not available
RuntimeError: If synthesis fails
"""
voice = voice or self.config.default_voice
if not self.config.validate_voice_files(voice):
available_voices = list(self.config.available_voices.keys())
raise ValueError(
f"Voice '{voice}' not available. Available voices: {available_voices}"
)
model_path = self.config.get_voice_model_path(voice)
# Create temporary file for output
with tempfile.NamedTemporaryFile(suffix=f".{self.config.audio_format}", delete=False) as temp_file:
temp_path = temp_file.name
try:
# Build piper command
cmd = [
self.config.piper_executable,
"-m", str(model_path),
"-f", temp_path
]
# Add speed if different from default
if speed != 1.0:
cmd.extend(["--length-scale", str(1.0 / speed)])
logger.debug(f"Running piper command: {' '.join(cmd)}")
# Run piper-tts
process = subprocess.Popen(
cmd,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
stdout, stderr = process.communicate(input=text, timeout=30)
if process.returncode != 0:
raise RuntimeError(f"TTS synthesis failed: {stderr}")
# Read the generated audio file
with open(temp_path, "rb") as f:
audio_data = f.read()
if not audio_data:
raise RuntimeError("Generated audio file is empty")
logger.info(f"Successfully synthesized {len(text)} characters with voice '{voice}'")
return audio_data, self.config.audio_format
except subprocess.TimeoutExpired:
process.kill()
raise RuntimeError("TTS synthesis timed out")
except Exception as e:
logger.error(f"TTS synthesis error: {e}")
raise
finally:
# Clean up temp file
try:
os.unlink(temp_path)
except OSError:
pass
def list_voices(self) -> dict:
"""List available voices with their information."""
voices = {}
for voice_name, voice_config in self.config.available_voices.items():
voices[voice_name] = {
"name": voice_name,
"language": voice_config["language"],
"gender": voice_config["gender"],
"description": voice_config["description"],
"available": self.config.validate_voice_files(voice_name)
}
return voices
def get_voice_info(self, voice_name: str) -> dict:
"""Get information about a specific voice."""
if voice_name not in self.config.available_voices:
raise ValueError(f"Voice '{voice_name}' not found")
voice_config = self.config.available_voices[voice_name]
return {
"name": voice_name,
"language": voice_config["language"],
"gender": voice_config["gender"],
"description": voice_config["description"],
"available": self.config.validate_voice_files(voice_name),
"model_path": str(self.config.get_voice_model_path(voice_name)),
"config_path": str(self.config.get_voice_config_path(voice_name))
}