Add professional voice assistant server implementation

- FastAPI-based TTS server using Piper neural text-to-speech - Poetry for dependency management and virtual environments - OpenAI-compatible API endpoints for seamless integration - Support for multiple voice models (Ryan, Alan, Lessac) - Robust error handling and voice fallback system - Professional logging and configuration management - Docker-ready with proper Python packaging 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-17 14:56:01 +02:00
parent 82f9cc4990
commit 572434d42e
13 changed files with 1722 additions and 0 deletions
--- a/voice-server/src/voice_server/tts.py
+++ b/voice-server/src/voice_server/tts.py
@@ -0,0 +1,158 @@
+"""Text-to-speech service using Piper."""
+import subprocess
+import tempfile
+import os
+import logging
+from pathlib import Path
+from typing import Optional, Tuple
+from .config import config
+
+logger = logging.getLogger(__name__)
+
+
+class TTSService:
+    """Text-to-speech service using Piper."""
+    
+    def __init__(self):
+        self.config = config
+        self._validate_setup()
+    
+    def _validate_setup(self):
+        """Validate that piper and voice models are available."""
+        # Check if piper-tts is available
+        try:
+            result = subprocess.run(
+                [self.config.piper_executable, "--help"],
+                capture_output=True,
+                timeout=10
+            )
+            if result.returncode != 0:
+                raise RuntimeError(f"Piper TTS not working: {result.stderr.decode()}")
+        except (subprocess.TimeoutExpired, FileNotFoundError) as e:
+            raise RuntimeError(f"Piper TTS not found or not working: {e}")
+        
+        # Check if default voice model exists
+        if not self.config.validate_voice_files():
+            default_voice = self.config.default_voice
+            model_path = self.config.get_voice_model_path()
+            raise RuntimeError(
+                f"Default voice '{default_voice}' model not found at {model_path}. "
+                f"Please download the voice model files."
+            )
+        
+        logger.info(f"TTS service initialized with voice: {self.config.default_voice}")
+    
+    def synthesize(
+        self, 
+        text: str, 
+        voice: Optional[str] = None,
+        speed: float = 1.0
+    ) -> Tuple[bytes, str]:
+        """
+        Synthesize text to speech.
+        
+        Args:
+            text: Text to synthesize
+            voice: Voice to use (defaults to configured default)
+            speed: Speech speed multiplier
+            
+        Returns:
+            Tuple of (audio_data, audio_format)
+            
+        Raises:
+            ValueError: If voice is not available
+            RuntimeError: If synthesis fails
+        """
+        voice = voice or self.config.default_voice
+        
+        if not self.config.validate_voice_files(voice):
+            available_voices = list(self.config.available_voices.keys())
+            raise ValueError(
+                f"Voice '{voice}' not available. Available voices: {available_voices}"
+            )
+        
+        model_path = self.config.get_voice_model_path(voice)
+        
+        # Create temporary file for output
+        with tempfile.NamedTemporaryFile(suffix=f".{self.config.audio_format}", delete=False) as temp_file:
+            temp_path = temp_file.name
+        
+        try:
+            # Build piper command
+            cmd = [
+                self.config.piper_executable,
+                "-m", str(model_path),
+                "-f", temp_path
+            ]
+            
+            # Add speed if different from default
+            if speed != 1.0:
+                cmd.extend(["--length-scale", str(1.0 / speed)])
+            
+            logger.debug(f"Running piper command: {' '.join(cmd)}")
+            
+            # Run piper-tts
+            process = subprocess.Popen(
+                cmd,
+                stdin=subprocess.PIPE,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                text=True
+            )
+            
+            stdout, stderr = process.communicate(input=text, timeout=30)
+            
+            if process.returncode != 0:
+                raise RuntimeError(f"TTS synthesis failed: {stderr}")
+            
+            # Read the generated audio file
+            with open(temp_path, "rb") as f:
+                audio_data = f.read()
+            
+            if not audio_data:
+                raise RuntimeError("Generated audio file is empty")
+            
+            logger.info(f"Successfully synthesized {len(text)} characters with voice '{voice}'")
+            return audio_data, self.config.audio_format
+            
+        except subprocess.TimeoutExpired:
+            process.kill()
+            raise RuntimeError("TTS synthesis timed out")
+        except Exception as e:
+            logger.error(f"TTS synthesis error: {e}")
+            raise
+        finally:
+            # Clean up temp file
+            try:
+                os.unlink(temp_path)
+            except OSError:
+                pass
+    
+    def list_voices(self) -> dict:
+        """List available voices with their information."""
+        voices = {}
+        for voice_name, voice_config in self.config.available_voices.items():
+            voices[voice_name] = {
+                "name": voice_name,
+                "language": voice_config["language"],
+                "gender": voice_config["gender"],
+                "description": voice_config["description"],
+                "available": self.config.validate_voice_files(voice_name)
+            }
+        return voices
+    
+    def get_voice_info(self, voice_name: str) -> dict:
+        """Get information about a specific voice."""
+        if voice_name not in self.config.available_voices:
+            raise ValueError(f"Voice '{voice_name}' not found")
+        
+        voice_config = self.config.available_voices[voice_name]
+        return {
+            "name": voice_name,
+            "language": voice_config["language"],
+            "gender": voice_config["gender"],
+            "description": voice_config["description"],
+            "available": self.config.validate_voice_files(voice_name),
+            "model_path": str(self.config.get_voice_model_path(voice_name)),
+            "config_path": str(self.config.get_voice_config_path(voice_name))
+        }