"""FastAPI application for voice server.""" import logging from typing import Optional from fastapi import FastAPI, HTTPException, Response from pydantic import BaseModel, Field from .tts import TTSService from .config import config # Configure logging logging.basicConfig(level=getattr(logging, config.log_level.upper())) logger = logging.getLogger(__name__) # Initialize TTS service try: tts_service = TTSService() except Exception as e: logger.error(f"Failed to initialize TTS service: {e}") tts_service = None app = FastAPI( title="Homelab Voice Server", description="Local TTS server for Claude Code voice assistant using Piper", version="0.1.0" ) class TTSRequest(BaseModel): """Request model for TTS synthesis.""" input: str = Field(..., description="Text to synthesize") model: str = Field(default="tts-1", description="Model to use (for compatibility)") voice: str = Field(default="alloy", description="Voice to use") response_format: str = Field(default="mp3", description="Audio format (ignored, always returns wav)") speed: float = Field(default=1.0, ge=0.25, le=4.0, description="Speech speed") class ModelInfo(BaseModel): """Model information.""" id: str object: str = "model" created: int = 1677649963 owned_by: str = "piper" class ModelsResponse(BaseModel): """Response for models endpoint.""" object: str = "list" data: list[ModelInfo] @app.get("/health") async def health_check(): """Health check endpoint.""" if tts_service is None: raise HTTPException(status_code=503, detail="TTS service not available") return { "status": "healthy", "tts_available": True, "default_voice": config.default_voice, "voices_available": len(config.available_voices) } @app.get("/v1/models", response_model=ModelsResponse) async def list_models(): """List available models (OpenAI compatible).""" return ModelsResponse( object="list", data=[ ModelInfo(id="tts-1", owned_by="piper"), ModelInfo(id="tts-1-hd", owned_by="piper") ] ) @app.get("/v1/voices") async def list_voices(): """List available voices.""" if tts_service is None: raise HTTPException(status_code=503, detail="TTS service not available") return {"voices": tts_service.list_voices()} @app.get("/v1/voices/{voice_name}") async def get_voice_info(voice_name: str): """Get information about a specific voice.""" if tts_service is None: raise HTTPException(status_code=503, detail="TTS service not available") try: voice_info = tts_service.get_voice_info(voice_name) return voice_info except ValueError as e: raise HTTPException(status_code=404, detail=str(e)) @app.post("/v1/audio/speech") async def create_speech(request: TTSRequest): """ Create speech from text (OpenAI compatible). Returns raw audio data as wav format. """ if tts_service is None: raise HTTPException(status_code=503, detail="TTS service not available") # Map common voice names to our voices voice_mapping = { # OpenAI voices "alloy": config.default_voice, "echo": config.default_voice, "fable": config.default_voice, "onyx": config.default_voice, "nova": "lessac", # Female voice "shimmer": "lessac", # Female voice # Common defaults "default": config.default_voice, "male": config.default_voice, "female": "lessac" } # Get voice name, with fallback to default voice_name = voice_mapping.get(request.voice, request.voice) # If the requested voice doesn't exist in our available voices, use default if voice_name not in config.available_voices: logger.warning(f"Requested voice '{voice_name}' not available, using default: {config.default_voice}") voice_name = config.default_voice try: audio_data, audio_format = tts_service.synthesize( text=request.input, voice=voice_name, speed=request.speed ) # Return raw audio data return Response( content=audio_data, media_type="audio/wav", headers={ "Content-Disposition": "attachment; filename=speech.wav" } ) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) except RuntimeError as e: logger.error(f"TTS synthesis failed: {e}") raise HTTPException(status_code=500, detail=f"TTS synthesis failed: {e}") @app.get("/") async def root(): """Root endpoint with API information.""" return { "service": "Homelab Voice Server", "version": "0.1.0", "description": "Local TTS server using Piper", "endpoints": { "health": "/health", "models": "/v1/models", "voices": "/v1/voices", "speech": "/v1/audio/speech" }, "default_voice": config.default_voice, "available_voices": list(config.available_voices.keys()) if tts_service else [] }