- FastAPI-based TTS server using Piper neural text-to-speech - Poetry for dependency management and virtual environments - OpenAI-compatible API endpoints for seamless integration - Support for multiple voice models (Ryan, Alan, Lessac) - Robust error handling and voice fallback system - Professional logging and configuration management - Docker-ready with proper Python packaging 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
158 lines
5.5 KiB
Python
158 lines
5.5 KiB
Python
"""Text-to-speech service using Piper."""
|
|
import subprocess
|
|
import tempfile
|
|
import os
|
|
import logging
|
|
from pathlib import Path
|
|
from typing import Optional, Tuple
|
|
from .config import config
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class TTSService:
|
|
"""Text-to-speech service using Piper."""
|
|
|
|
def __init__(self):
|
|
self.config = config
|
|
self._validate_setup()
|
|
|
|
def _validate_setup(self):
|
|
"""Validate that piper and voice models are available."""
|
|
# Check if piper-tts is available
|
|
try:
|
|
result = subprocess.run(
|
|
[self.config.piper_executable, "--help"],
|
|
capture_output=True,
|
|
timeout=10
|
|
)
|
|
if result.returncode != 0:
|
|
raise RuntimeError(f"Piper TTS not working: {result.stderr.decode()}")
|
|
except (subprocess.TimeoutExpired, FileNotFoundError) as e:
|
|
raise RuntimeError(f"Piper TTS not found or not working: {e}")
|
|
|
|
# Check if default voice model exists
|
|
if not self.config.validate_voice_files():
|
|
default_voice = self.config.default_voice
|
|
model_path = self.config.get_voice_model_path()
|
|
raise RuntimeError(
|
|
f"Default voice '{default_voice}' model not found at {model_path}. "
|
|
f"Please download the voice model files."
|
|
)
|
|
|
|
logger.info(f"TTS service initialized with voice: {self.config.default_voice}")
|
|
|
|
def synthesize(
|
|
self,
|
|
text: str,
|
|
voice: Optional[str] = None,
|
|
speed: float = 1.0
|
|
) -> Tuple[bytes, str]:
|
|
"""
|
|
Synthesize text to speech.
|
|
|
|
Args:
|
|
text: Text to synthesize
|
|
voice: Voice to use (defaults to configured default)
|
|
speed: Speech speed multiplier
|
|
|
|
Returns:
|
|
Tuple of (audio_data, audio_format)
|
|
|
|
Raises:
|
|
ValueError: If voice is not available
|
|
RuntimeError: If synthesis fails
|
|
"""
|
|
voice = voice or self.config.default_voice
|
|
|
|
if not self.config.validate_voice_files(voice):
|
|
available_voices = list(self.config.available_voices.keys())
|
|
raise ValueError(
|
|
f"Voice '{voice}' not available. Available voices: {available_voices}"
|
|
)
|
|
|
|
model_path = self.config.get_voice_model_path(voice)
|
|
|
|
# Create temporary file for output
|
|
with tempfile.NamedTemporaryFile(suffix=f".{self.config.audio_format}", delete=False) as temp_file:
|
|
temp_path = temp_file.name
|
|
|
|
try:
|
|
# Build piper command
|
|
cmd = [
|
|
self.config.piper_executable,
|
|
"-m", str(model_path),
|
|
"-f", temp_path
|
|
]
|
|
|
|
# Add speed if different from default
|
|
if speed != 1.0:
|
|
cmd.extend(["--length-scale", str(1.0 / speed)])
|
|
|
|
logger.debug(f"Running piper command: {' '.join(cmd)}")
|
|
|
|
# Run piper-tts
|
|
process = subprocess.Popen(
|
|
cmd,
|
|
stdin=subprocess.PIPE,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.PIPE,
|
|
text=True
|
|
)
|
|
|
|
stdout, stderr = process.communicate(input=text, timeout=30)
|
|
|
|
if process.returncode != 0:
|
|
raise RuntimeError(f"TTS synthesis failed: {stderr}")
|
|
|
|
# Read the generated audio file
|
|
with open(temp_path, "rb") as f:
|
|
audio_data = f.read()
|
|
|
|
if not audio_data:
|
|
raise RuntimeError("Generated audio file is empty")
|
|
|
|
logger.info(f"Successfully synthesized {len(text)} characters with voice '{voice}'")
|
|
return audio_data, self.config.audio_format
|
|
|
|
except subprocess.TimeoutExpired:
|
|
process.kill()
|
|
raise RuntimeError("TTS synthesis timed out")
|
|
except Exception as e:
|
|
logger.error(f"TTS synthesis error: {e}")
|
|
raise
|
|
finally:
|
|
# Clean up temp file
|
|
try:
|
|
os.unlink(temp_path)
|
|
except OSError:
|
|
pass
|
|
|
|
def list_voices(self) -> dict:
|
|
"""List available voices with their information."""
|
|
voices = {}
|
|
for voice_name, voice_config in self.config.available_voices.items():
|
|
voices[voice_name] = {
|
|
"name": voice_name,
|
|
"language": voice_config["language"],
|
|
"gender": voice_config["gender"],
|
|
"description": voice_config["description"],
|
|
"available": self.config.validate_voice_files(voice_name)
|
|
}
|
|
return voices
|
|
|
|
def get_voice_info(self, voice_name: str) -> dict:
|
|
"""Get information about a specific voice."""
|
|
if voice_name not in self.config.available_voices:
|
|
raise ValueError(f"Voice '{voice_name}' not found")
|
|
|
|
voice_config = self.config.available_voices[voice_name]
|
|
return {
|
|
"name": voice_name,
|
|
"language": voice_config["language"],
|
|
"gender": voice_config["gender"],
|
|
"description": voice_config["description"],
|
|
"available": self.config.validate_voice_files(voice_name),
|
|
"model_path": str(self.config.get_voice_model_path(voice_name)),
|
|
"config_path": str(self.config.get_voice_config_path(voice_name))
|
|
} |