Files
homelab/voice-server/src/voice_server/api.py
Arpad Krejczinger 572434d42e Add professional voice assistant server implementation
- FastAPI-based TTS server using Piper neural text-to-speech
- Poetry for dependency management and virtual environments
- OpenAI-compatible API endpoints for seamless integration
- Support for multiple voice models (Ryan, Alan, Lessac)
- Robust error handling and voice fallback system
- Professional logging and configuration management
- Docker-ready with proper Python packaging

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-17 14:56:01 +02:00

169 lines
5.1 KiB
Python

"""FastAPI application for voice server."""
import logging
from typing import Optional
from fastapi import FastAPI, HTTPException, Response
from pydantic import BaseModel, Field
from .tts import TTSService
from .config import config
# Configure logging
logging.basicConfig(level=getattr(logging, config.log_level.upper()))
logger = logging.getLogger(__name__)
# Initialize TTS service
try:
tts_service = TTSService()
except Exception as e:
logger.error(f"Failed to initialize TTS service: {e}")
tts_service = None
app = FastAPI(
title="Homelab Voice Server",
description="Local TTS server for Claude Code voice assistant using Piper",
version="0.1.0"
)
class TTSRequest(BaseModel):
"""Request model for TTS synthesis."""
input: str = Field(..., description="Text to synthesize")
model: str = Field(default="tts-1", description="Model to use (for compatibility)")
voice: str = Field(default="alloy", description="Voice to use")
response_format: str = Field(default="mp3", description="Audio format (ignored, always returns wav)")
speed: float = Field(default=1.0, ge=0.25, le=4.0, description="Speech speed")
class ModelInfo(BaseModel):
"""Model information."""
id: str
object: str = "model"
created: int = 1677649963
owned_by: str = "piper"
class ModelsResponse(BaseModel):
"""Response for models endpoint."""
object: str = "list"
data: list[ModelInfo]
@app.get("/health")
async def health_check():
"""Health check endpoint."""
if tts_service is None:
raise HTTPException(status_code=503, detail="TTS service not available")
return {
"status": "healthy",
"tts_available": True,
"default_voice": config.default_voice,
"voices_available": len(config.available_voices)
}
@app.get("/v1/models", response_model=ModelsResponse)
async def list_models():
"""List available models (OpenAI compatible)."""
return ModelsResponse(
object="list",
data=[
ModelInfo(id="tts-1", owned_by="piper"),
ModelInfo(id="tts-1-hd", owned_by="piper")
]
)
@app.get("/v1/voices")
async def list_voices():
"""List available voices."""
if tts_service is None:
raise HTTPException(status_code=503, detail="TTS service not available")
return {"voices": tts_service.list_voices()}
@app.get("/v1/voices/{voice_name}")
async def get_voice_info(voice_name: str):
"""Get information about a specific voice."""
if tts_service is None:
raise HTTPException(status_code=503, detail="TTS service not available")
try:
voice_info = tts_service.get_voice_info(voice_name)
return voice_info
except ValueError as e:
raise HTTPException(status_code=404, detail=str(e))
@app.post("/v1/audio/speech")
async def create_speech(request: TTSRequest):
"""
Create speech from text (OpenAI compatible).
Returns raw audio data as wav format.
"""
if tts_service is None:
raise HTTPException(status_code=503, detail="TTS service not available")
# Map common voice names to our voices
voice_mapping = {
# OpenAI voices
"alloy": config.default_voice,
"echo": config.default_voice,
"fable": config.default_voice,
"onyx": config.default_voice,
"nova": "lessac", # Female voice
"shimmer": "lessac", # Female voice
# Common defaults
"default": config.default_voice,
"male": config.default_voice,
"female": "lessac"
}
# Get voice name, with fallback to default
voice_name = voice_mapping.get(request.voice, request.voice)
# If the requested voice doesn't exist in our available voices, use default
if voice_name not in config.available_voices:
logger.warning(f"Requested voice '{voice_name}' not available, using default: {config.default_voice}")
voice_name = config.default_voice
try:
audio_data, audio_format = tts_service.synthesize(
text=request.input,
voice=voice_name,
speed=request.speed
)
# Return raw audio data
return Response(
content=audio_data,
media_type="audio/wav",
headers={
"Content-Disposition": "attachment; filename=speech.wav"
}
)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
except RuntimeError as e:
logger.error(f"TTS synthesis failed: {e}")
raise HTTPException(status_code=500, detail=f"TTS synthesis failed: {e}")
@app.get("/")
async def root():
"""Root endpoint with API information."""
return {
"service": "Homelab Voice Server",
"version": "0.1.0",
"description": "Local TTS server using Piper",
"endpoints": {
"health": "/health",
"models": "/v1/models",
"voices": "/v1/voices",
"speech": "/v1/audio/speech"
},
"default_voice": config.default_voice,
"available_voices": list(config.available_voices.keys()) if tts_service else []
}