Add professional voice assistant server implementation
- FastAPI-based TTS server using Piper neural text-to-speech - Poetry for dependency management and virtual environments - OpenAI-compatible API endpoints for seamless integration - Support for multiple voice models (Ryan, Alan, Lessac) - Robust error handling and voice fallback system - Professional logging and configuration management - Docker-ready with proper Python packaging 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
158
voice-server/src/voice_server/tts.py
Normal file
158
voice-server/src/voice_server/tts.py
Normal file
@@ -0,0 +1,158 @@
|
||||
"""Text-to-speech service using Piper."""
|
||||
import subprocess
|
||||
import tempfile
|
||||
import os
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Optional, Tuple
|
||||
from .config import config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TTSService:
|
||||
"""Text-to-speech service using Piper."""
|
||||
|
||||
def __init__(self):
|
||||
self.config = config
|
||||
self._validate_setup()
|
||||
|
||||
def _validate_setup(self):
|
||||
"""Validate that piper and voice models are available."""
|
||||
# Check if piper-tts is available
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[self.config.piper_executable, "--help"],
|
||||
capture_output=True,
|
||||
timeout=10
|
||||
)
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError(f"Piper TTS not working: {result.stderr.decode()}")
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError) as e:
|
||||
raise RuntimeError(f"Piper TTS not found or not working: {e}")
|
||||
|
||||
# Check if default voice model exists
|
||||
if not self.config.validate_voice_files():
|
||||
default_voice = self.config.default_voice
|
||||
model_path = self.config.get_voice_model_path()
|
||||
raise RuntimeError(
|
||||
f"Default voice '{default_voice}' model not found at {model_path}. "
|
||||
f"Please download the voice model files."
|
||||
)
|
||||
|
||||
logger.info(f"TTS service initialized with voice: {self.config.default_voice}")
|
||||
|
||||
def synthesize(
|
||||
self,
|
||||
text: str,
|
||||
voice: Optional[str] = None,
|
||||
speed: float = 1.0
|
||||
) -> Tuple[bytes, str]:
|
||||
"""
|
||||
Synthesize text to speech.
|
||||
|
||||
Args:
|
||||
text: Text to synthesize
|
||||
voice: Voice to use (defaults to configured default)
|
||||
speed: Speech speed multiplier
|
||||
|
||||
Returns:
|
||||
Tuple of (audio_data, audio_format)
|
||||
|
||||
Raises:
|
||||
ValueError: If voice is not available
|
||||
RuntimeError: If synthesis fails
|
||||
"""
|
||||
voice = voice or self.config.default_voice
|
||||
|
||||
if not self.config.validate_voice_files(voice):
|
||||
available_voices = list(self.config.available_voices.keys())
|
||||
raise ValueError(
|
||||
f"Voice '{voice}' not available. Available voices: {available_voices}"
|
||||
)
|
||||
|
||||
model_path = self.config.get_voice_model_path(voice)
|
||||
|
||||
# Create temporary file for output
|
||||
with tempfile.NamedTemporaryFile(suffix=f".{self.config.audio_format}", delete=False) as temp_file:
|
||||
temp_path = temp_file.name
|
||||
|
||||
try:
|
||||
# Build piper command
|
||||
cmd = [
|
||||
self.config.piper_executable,
|
||||
"-m", str(model_path),
|
||||
"-f", temp_path
|
||||
]
|
||||
|
||||
# Add speed if different from default
|
||||
if speed != 1.0:
|
||||
cmd.extend(["--length-scale", str(1.0 / speed)])
|
||||
|
||||
logger.debug(f"Running piper command: {' '.join(cmd)}")
|
||||
|
||||
# Run piper-tts
|
||||
process = subprocess.Popen(
|
||||
cmd,
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True
|
||||
)
|
||||
|
||||
stdout, stderr = process.communicate(input=text, timeout=30)
|
||||
|
||||
if process.returncode != 0:
|
||||
raise RuntimeError(f"TTS synthesis failed: {stderr}")
|
||||
|
||||
# Read the generated audio file
|
||||
with open(temp_path, "rb") as f:
|
||||
audio_data = f.read()
|
||||
|
||||
if not audio_data:
|
||||
raise RuntimeError("Generated audio file is empty")
|
||||
|
||||
logger.info(f"Successfully synthesized {len(text)} characters with voice '{voice}'")
|
||||
return audio_data, self.config.audio_format
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
process.kill()
|
||||
raise RuntimeError("TTS synthesis timed out")
|
||||
except Exception as e:
|
||||
logger.error(f"TTS synthesis error: {e}")
|
||||
raise
|
||||
finally:
|
||||
# Clean up temp file
|
||||
try:
|
||||
os.unlink(temp_path)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
def list_voices(self) -> dict:
|
||||
"""List available voices with their information."""
|
||||
voices = {}
|
||||
for voice_name, voice_config in self.config.available_voices.items():
|
||||
voices[voice_name] = {
|
||||
"name": voice_name,
|
||||
"language": voice_config["language"],
|
||||
"gender": voice_config["gender"],
|
||||
"description": voice_config["description"],
|
||||
"available": self.config.validate_voice_files(voice_name)
|
||||
}
|
||||
return voices
|
||||
|
||||
def get_voice_info(self, voice_name: str) -> dict:
|
||||
"""Get information about a specific voice."""
|
||||
if voice_name not in self.config.available_voices:
|
||||
raise ValueError(f"Voice '{voice_name}' not found")
|
||||
|
||||
voice_config = self.config.available_voices[voice_name]
|
||||
return {
|
||||
"name": voice_name,
|
||||
"language": voice_config["language"],
|
||||
"gender": voice_config["gender"],
|
||||
"description": voice_config["description"],
|
||||
"available": self.config.validate_voice_files(voice_name),
|
||||
"model_path": str(self.config.get_voice_model_path(voice_name)),
|
||||
"config_path": str(self.config.get_voice_config_path(voice_name))
|
||||
}
|
||||
Reference in New Issue
Block a user