Add professional voice assistant server implementation
- FastAPI-based TTS server using Piper neural text-to-speech - Poetry for dependency management and virtual environments - OpenAI-compatible API endpoints for seamless integration - Support for multiple voice models (Ryan, Alan, Lessac) - Robust error handling and voice fallback system - Professional logging and configuration management - Docker-ready with proper Python packaging 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
169
voice-server/src/voice_server/api.py
Normal file
169
voice-server/src/voice_server/api.py
Normal file
@@ -0,0 +1,169 @@
|
||||
"""FastAPI application for voice server."""
|
||||
import logging
|
||||
from typing import Optional
|
||||
from fastapi import FastAPI, HTTPException, Response
|
||||
from pydantic import BaseModel, Field
|
||||
from .tts import TTSService
|
||||
from .config import config
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(level=getattr(logging, config.log_level.upper()))
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Initialize TTS service
|
||||
try:
|
||||
tts_service = TTSService()
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize TTS service: {e}")
|
||||
tts_service = None
|
||||
|
||||
app = FastAPI(
|
||||
title="Homelab Voice Server",
|
||||
description="Local TTS server for Claude Code voice assistant using Piper",
|
||||
version="0.1.0"
|
||||
)
|
||||
|
||||
|
||||
class TTSRequest(BaseModel):
|
||||
"""Request model for TTS synthesis."""
|
||||
input: str = Field(..., description="Text to synthesize")
|
||||
model: str = Field(default="tts-1", description="Model to use (for compatibility)")
|
||||
voice: str = Field(default="alloy", description="Voice to use")
|
||||
response_format: str = Field(default="mp3", description="Audio format (ignored, always returns wav)")
|
||||
speed: float = Field(default=1.0, ge=0.25, le=4.0, description="Speech speed")
|
||||
|
||||
|
||||
class ModelInfo(BaseModel):
|
||||
"""Model information."""
|
||||
id: str
|
||||
object: str = "model"
|
||||
created: int = 1677649963
|
||||
owned_by: str = "piper"
|
||||
|
||||
|
||||
class ModelsResponse(BaseModel):
|
||||
"""Response for models endpoint."""
|
||||
object: str = "list"
|
||||
data: list[ModelInfo]
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
async def health_check():
|
||||
"""Health check endpoint."""
|
||||
if tts_service is None:
|
||||
raise HTTPException(status_code=503, detail="TTS service not available")
|
||||
|
||||
return {
|
||||
"status": "healthy",
|
||||
"tts_available": True,
|
||||
"default_voice": config.default_voice,
|
||||
"voices_available": len(config.available_voices)
|
||||
}
|
||||
|
||||
|
||||
@app.get("/v1/models", response_model=ModelsResponse)
|
||||
async def list_models():
|
||||
"""List available models (OpenAI compatible)."""
|
||||
return ModelsResponse(
|
||||
object="list",
|
||||
data=[
|
||||
ModelInfo(id="tts-1", owned_by="piper"),
|
||||
ModelInfo(id="tts-1-hd", owned_by="piper")
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
@app.get("/v1/voices")
|
||||
async def list_voices():
|
||||
"""List available voices."""
|
||||
if tts_service is None:
|
||||
raise HTTPException(status_code=503, detail="TTS service not available")
|
||||
|
||||
return {"voices": tts_service.list_voices()}
|
||||
|
||||
|
||||
@app.get("/v1/voices/{voice_name}")
|
||||
async def get_voice_info(voice_name: str):
|
||||
"""Get information about a specific voice."""
|
||||
if tts_service is None:
|
||||
raise HTTPException(status_code=503, detail="TTS service not available")
|
||||
|
||||
try:
|
||||
voice_info = tts_service.get_voice_info(voice_name)
|
||||
return voice_info
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=404, detail=str(e))
|
||||
|
||||
|
||||
@app.post("/v1/audio/speech")
|
||||
async def create_speech(request: TTSRequest):
|
||||
"""
|
||||
Create speech from text (OpenAI compatible).
|
||||
|
||||
Returns raw audio data as wav format.
|
||||
"""
|
||||
if tts_service is None:
|
||||
raise HTTPException(status_code=503, detail="TTS service not available")
|
||||
|
||||
# Map common voice names to our voices
|
||||
voice_mapping = {
|
||||
# OpenAI voices
|
||||
"alloy": config.default_voice,
|
||||
"echo": config.default_voice,
|
||||
"fable": config.default_voice,
|
||||
"onyx": config.default_voice,
|
||||
"nova": "lessac", # Female voice
|
||||
"shimmer": "lessac", # Female voice
|
||||
# Common defaults
|
||||
"default": config.default_voice,
|
||||
"male": config.default_voice,
|
||||
"female": "lessac"
|
||||
}
|
||||
|
||||
# Get voice name, with fallback to default
|
||||
voice_name = voice_mapping.get(request.voice, request.voice)
|
||||
|
||||
# If the requested voice doesn't exist in our available voices, use default
|
||||
if voice_name not in config.available_voices:
|
||||
logger.warning(f"Requested voice '{voice_name}' not available, using default: {config.default_voice}")
|
||||
voice_name = config.default_voice
|
||||
|
||||
try:
|
||||
audio_data, audio_format = tts_service.synthesize(
|
||||
text=request.input,
|
||||
voice=voice_name,
|
||||
speed=request.speed
|
||||
)
|
||||
|
||||
# Return raw audio data
|
||||
return Response(
|
||||
content=audio_data,
|
||||
media_type="audio/wav",
|
||||
headers={
|
||||
"Content-Disposition": "attachment; filename=speech.wav"
|
||||
}
|
||||
)
|
||||
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
except RuntimeError as e:
|
||||
logger.error(f"TTS synthesis failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"TTS synthesis failed: {e}")
|
||||
|
||||
|
||||
@app.get("/")
|
||||
async def root():
|
||||
"""Root endpoint with API information."""
|
||||
return {
|
||||
"service": "Homelab Voice Server",
|
||||
"version": "0.1.0",
|
||||
"description": "Local TTS server using Piper",
|
||||
"endpoints": {
|
||||
"health": "/health",
|
||||
"models": "/v1/models",
|
||||
"voices": "/v1/voices",
|
||||
"speech": "/v1/audio/speech"
|
||||
},
|
||||
"default_voice": config.default_voice,
|
||||
"available_voices": list(config.available_voices.keys()) if tts_service else []
|
||||
}
|
||||
Reference in New Issue
Block a user