Add professional voice assistant server implementation

- FastAPI-based TTS server using Piper neural text-to-speech - Poetry for dependency management and virtual environments - OpenAI-compatible API endpoints for seamless integration - Support for multiple voice models (Ryan, Alan, Lessac) - Robust error handling and voice fallback system - Professional logging and configuration management - Docker-ready with proper Python packaging 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-17 14:56:01 +02:00
parent 82f9cc4990
commit 572434d42e
13 changed files with 1722 additions and 0 deletions
--- a/voice-server/src/voice_server/api.py
+++ b/voice-server/src/voice_server/api.py
@@ -0,0 +1,169 @@
+"""FastAPI application for voice server."""
+import logging
+from typing import Optional
+from fastapi import FastAPI, HTTPException, Response
+from pydantic import BaseModel, Field
+from .tts import TTSService
+from .config import config
+
+# Configure logging
+logging.basicConfig(level=getattr(logging, config.log_level.upper()))
+logger = logging.getLogger(__name__)
+
+# Initialize TTS service
+try:
+    tts_service = TTSService()
+except Exception as e:
+    logger.error(f"Failed to initialize TTS service: {e}")
+    tts_service = None
+
+app = FastAPI(
+    title="Homelab Voice Server",
+    description="Local TTS server for Claude Code voice assistant using Piper",
+    version="0.1.0"
+)
+
+
+class TTSRequest(BaseModel):
+    """Request model for TTS synthesis."""
+    input: str = Field(..., description="Text to synthesize")
+    model: str = Field(default="tts-1", description="Model to use (for compatibility)")
+    voice: str = Field(default="alloy", description="Voice to use")
+    response_format: str = Field(default="mp3", description="Audio format (ignored, always returns wav)")
+    speed: float = Field(default=1.0, ge=0.25, le=4.0, description="Speech speed")
+
+
+class ModelInfo(BaseModel):
+    """Model information."""
+    id: str
+    object: str = "model"
+    created: int = 1677649963
+    owned_by: str = "piper"
+
+
+class ModelsResponse(BaseModel):
+    """Response for models endpoint."""
+    object: str = "list"
+    data: list[ModelInfo]
+
+
+@app.get("/health")
+async def health_check():
+    """Health check endpoint."""
+    if tts_service is None:
+        raise HTTPException(status_code=503, detail="TTS service not available")
+    
+    return {
+        "status": "healthy",
+        "tts_available": True,
+        "default_voice": config.default_voice,
+        "voices_available": len(config.available_voices)
+    }
+
+
+@app.get("/v1/models", response_model=ModelsResponse)
+async def list_models():
+    """List available models (OpenAI compatible)."""
+    return ModelsResponse(
+        object="list",
+        data=[
+            ModelInfo(id="tts-1", owned_by="piper"),
+            ModelInfo(id="tts-1-hd", owned_by="piper")
+        ]
+    )
+
+
+@app.get("/v1/voices")
+async def list_voices():
+    """List available voices."""
+    if tts_service is None:
+        raise HTTPException(status_code=503, detail="TTS service not available")
+    
+    return {"voices": tts_service.list_voices()}
+
+
+@app.get("/v1/voices/{voice_name}")
+async def get_voice_info(voice_name: str):
+    """Get information about a specific voice."""
+    if tts_service is None:
+        raise HTTPException(status_code=503, detail="TTS service not available")
+    
+    try:
+        voice_info = tts_service.get_voice_info(voice_name)
+        return voice_info
+    except ValueError as e:
+        raise HTTPException(status_code=404, detail=str(e))
+
+
+@app.post("/v1/audio/speech")
+async def create_speech(request: TTSRequest):
+    """
+    Create speech from text (OpenAI compatible).
+    
+    Returns raw audio data as wav format.
+    """
+    if tts_service is None:
+        raise HTTPException(status_code=503, detail="TTS service not available")
+    
+    # Map common voice names to our voices
+    voice_mapping = {
+        # OpenAI voices
+        "alloy": config.default_voice,
+        "echo": config.default_voice,
+        "fable": config.default_voice,
+        "onyx": config.default_voice,
+        "nova": "lessac",  # Female voice
+        "shimmer": "lessac",  # Female voice
+        # Common defaults
+        "default": config.default_voice,
+        "male": config.default_voice,
+        "female": "lessac"
+    }
+    
+    # Get voice name, with fallback to default
+    voice_name = voice_mapping.get(request.voice, request.voice)
+    
+    # If the requested voice doesn't exist in our available voices, use default
+    if voice_name not in config.available_voices:
+        logger.warning(f"Requested voice '{voice_name}' not available, using default: {config.default_voice}")
+        voice_name = config.default_voice
+    
+    try:
+        audio_data, audio_format = tts_service.synthesize(
+            text=request.input,
+            voice=voice_name,
+            speed=request.speed
+        )
+        
+        # Return raw audio data
+        return Response(
+            content=audio_data,
+            media_type="audio/wav",
+            headers={
+                "Content-Disposition": "attachment; filename=speech.wav"
+            }
+        )
+        
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except RuntimeError as e:
+        logger.error(f"TTS synthesis failed: {e}")
+        raise HTTPException(status_code=500, detail=f"TTS synthesis failed: {e}")
+
+
+@app.get("/")
+async def root():
+    """Root endpoint with API information."""
+    return {
+        "service": "Homelab Voice Server",
+        "version": "0.1.0",
+        "description": "Local TTS server using Piper",
+        "endpoints": {
+            "health": "/health",
+            "models": "/v1/models",
+            "voices": "/v1/voices",
+            "speech": "/v1/audio/speech"
+        },
+        "default_voice": config.default_voice,
+        "available_voices": list(config.available_voices.keys()) if tts_service else []
+    }