Add professional voice assistant server implementation

- FastAPI-based TTS server using Piper neural text-to-speech
- Poetry for dependency management and virtual environments
- OpenAI-compatible API endpoints for seamless integration
- Support for multiple voice models (Ryan, Alan, Lessac)
- Robust error handling and voice fallback system
- Professional logging and configuration management
- Docker-ready with proper Python packaging

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-08-17 14:56:01 +02:00
parent 82f9cc4990
commit 572434d42e
13 changed files with 1722 additions and 0 deletions

View File

@@ -0,0 +1,169 @@
"""FastAPI application for voice server."""
import logging
from typing import Optional
from fastapi import FastAPI, HTTPException, Response
from pydantic import BaseModel, Field
from .tts import TTSService
from .config import config
# Configure logging
logging.basicConfig(level=getattr(logging, config.log_level.upper()))
logger = logging.getLogger(__name__)
# Initialize TTS service
try:
tts_service = TTSService()
except Exception as e:
logger.error(f"Failed to initialize TTS service: {e}")
tts_service = None
app = FastAPI(
title="Homelab Voice Server",
description="Local TTS server for Claude Code voice assistant using Piper",
version="0.1.0"
)
class TTSRequest(BaseModel):
"""Request model for TTS synthesis."""
input: str = Field(..., description="Text to synthesize")
model: str = Field(default="tts-1", description="Model to use (for compatibility)")
voice: str = Field(default="alloy", description="Voice to use")
response_format: str = Field(default="mp3", description="Audio format (ignored, always returns wav)")
speed: float = Field(default=1.0, ge=0.25, le=4.0, description="Speech speed")
class ModelInfo(BaseModel):
"""Model information."""
id: str
object: str = "model"
created: int = 1677649963
owned_by: str = "piper"
class ModelsResponse(BaseModel):
"""Response for models endpoint."""
object: str = "list"
data: list[ModelInfo]
@app.get("/health")
async def health_check():
"""Health check endpoint."""
if tts_service is None:
raise HTTPException(status_code=503, detail="TTS service not available")
return {
"status": "healthy",
"tts_available": True,
"default_voice": config.default_voice,
"voices_available": len(config.available_voices)
}
@app.get("/v1/models", response_model=ModelsResponse)
async def list_models():
"""List available models (OpenAI compatible)."""
return ModelsResponse(
object="list",
data=[
ModelInfo(id="tts-1", owned_by="piper"),
ModelInfo(id="tts-1-hd", owned_by="piper")
]
)
@app.get("/v1/voices")
async def list_voices():
"""List available voices."""
if tts_service is None:
raise HTTPException(status_code=503, detail="TTS service not available")
return {"voices": tts_service.list_voices()}
@app.get("/v1/voices/{voice_name}")
async def get_voice_info(voice_name: str):
"""Get information about a specific voice."""
if tts_service is None:
raise HTTPException(status_code=503, detail="TTS service not available")
try:
voice_info = tts_service.get_voice_info(voice_name)
return voice_info
except ValueError as e:
raise HTTPException(status_code=404, detail=str(e))
@app.post("/v1/audio/speech")
async def create_speech(request: TTSRequest):
"""
Create speech from text (OpenAI compatible).
Returns raw audio data as wav format.
"""
if tts_service is None:
raise HTTPException(status_code=503, detail="TTS service not available")
# Map common voice names to our voices
voice_mapping = {
# OpenAI voices
"alloy": config.default_voice,
"echo": config.default_voice,
"fable": config.default_voice,
"onyx": config.default_voice,
"nova": "lessac", # Female voice
"shimmer": "lessac", # Female voice
# Common defaults
"default": config.default_voice,
"male": config.default_voice,
"female": "lessac"
}
# Get voice name, with fallback to default
voice_name = voice_mapping.get(request.voice, request.voice)
# If the requested voice doesn't exist in our available voices, use default
if voice_name not in config.available_voices:
logger.warning(f"Requested voice '{voice_name}' not available, using default: {config.default_voice}")
voice_name = config.default_voice
try:
audio_data, audio_format = tts_service.synthesize(
text=request.input,
voice=voice_name,
speed=request.speed
)
# Return raw audio data
return Response(
content=audio_data,
media_type="audio/wav",
headers={
"Content-Disposition": "attachment; filename=speech.wav"
}
)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
except RuntimeError as e:
logger.error(f"TTS synthesis failed: {e}")
raise HTTPException(status_code=500, detail=f"TTS synthesis failed: {e}")
@app.get("/")
async def root():
"""Root endpoint with API information."""
return {
"service": "Homelab Voice Server",
"version": "0.1.0",
"description": "Local TTS server using Piper",
"endpoints": {
"health": "/health",
"models": "/v1/models",
"voices": "/v1/voices",
"speech": "/v1/audio/speech"
},
"default_voice": config.default_voice,
"available_voices": list(config.available_voices.keys()) if tts_service else []
}