mirror of
https://github.com/mudler/LocalAI.git
synced 2026-01-06 02:29:54 -06:00
* feat(vibevoice): add backend Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chore: add workflow and backend index Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chore(gallery): add vibevoice Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Use self-hosted for intel builds Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Pin python version for l4t Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
79 lines
4.0 KiB
YAML
79 lines
4.0 KiB
YAML
---
|
|
name: localai
|
|
|
|
config_file: |-
|
|
name: vibevoice
|
|
backend: vibevoice
|
|
description: |
|
|
VibeVoice-Realtime is a real-time text-to-speech model that generates natural-sounding speech.
|
|
This model supports voice cloning through voice preset files (.pt files).
|
|
|
|
parameters:
|
|
model: microsoft/VibeVoice-Realtime-0.5B
|
|
|
|
# TTS configuration
|
|
tts:
|
|
# Voice selection - can be:
|
|
# 1. Voice preset name (e.g., "Frank", "en-Frank_man", "Grace") - looks for .pt files in voices/streaming_model/
|
|
# 2. Path to a voice preset .pt file (relative to model directory or absolute)
|
|
# Available English voices: Carter, Davis, Emma, Frank, Grace, Mike
|
|
voice: "Frank"
|
|
# Alternative: use audio_path to specify a voice file directly
|
|
# audio_path: "voices/streaming_model/en-Frank_man.pt"
|
|
|
|
known_usecases:
|
|
- tts
|
|
|
|
# Backend-specific options
|
|
# These are passed as "key:value" strings to the backend
|
|
options:
|
|
# CFG (Classifier-Free Guidance) scale for generation (default: 1.5)
|
|
# Higher values can improve quality but may slow generation
|
|
- "cfg_scale:1.5"
|
|
# Number of inference steps for the diffusion process (default: 5)
|
|
# More steps = better quality but slower. Typical range: 3-10
|
|
- "inference_steps:5"
|
|
# Enable sampling (default: false)
|
|
# When true, uses temperature and top_p for sampling
|
|
- "do_sample:false"
|
|
# Temperature for sampling (only used if do_sample=true, default: 0.9)
|
|
- "temperature:0.9"
|
|
# Top-p (nucleus) sampling (only used if do_sample=true, default: 0.9)
|
|
- "top_p:0.9"
|
|
# Voices directory path
|
|
# This explicitly sets where to look for voice preset files (.pt files)
|
|
# Since we're downloading voices to voices/streaming_model/, we set it here
|
|
#
|
|
# Examples:
|
|
# - Relative path (relative to models directory): "voices/streaming_model"
|
|
# - Absolute path: "/custom/path/to/voices/streaming_model"
|
|
# - Custom relative path: "my_custom_voices/streaming_model"
|
|
#
|
|
# If not specified, the backend will auto-detect from common locations:
|
|
# 1. {ModelFile directory}/voices/streaming_model/
|
|
# 2. {models_dir}/voices/streaming_model/
|
|
# 3. Backend directory
|
|
- "voices_dir:voices/streaming_model"
|
|
# # Download voice preset files
|
|
# # Voice presets are downloaded to: {models_dir}/voices/streaming_model/
|
|
# # The voices_dir option above tells the backend to look in this location
|
|
# download_files:
|
|
# # English voices
|
|
# - filename: voices/streaming_model/en-Frank_man.pt
|
|
# uri: https://raw.githubusercontent.com/microsoft/VibeVoice/main/demo/voices/streaming_model/en-Frank_man.pt
|
|
# - filename: voices/streaming_model/en-Grace_woman.pt
|
|
# uri: https://raw.githubusercontent.com/microsoft/VibeVoice/main/demo/voices/streaming_model/en-Grace_woman.pt
|
|
# - filename: voices/streaming_model/en-Mike_man.pt
|
|
# uri: https://raw.githubusercontent.com/microsoft/VibeVoice/main/demo/voices/streaming_model/en-Mike_man.pt
|
|
# - filename: voices/streaming_model/en-Emma_woman.pt
|
|
# uri: https://raw.githubusercontent.com/microsoft/VibeVoice/main/demo/voices/streaming_model/en-Emma_woman.pt
|
|
# - filename: voices/streaming_model/en-Carter_man.pt
|
|
# uri: https://raw.githubusercontent.com/microsoft/VibeVoice/main/demo/voices/streaming_model/en-Carter_man.pt
|
|
# - filename: voices/streaming_model/en-Davis_man.pt
|
|
# uri: https://raw.githubusercontent.com/microsoft/VibeVoice/main/demo/voices/streaming_model/en-Davis_man.pt
|
|
# # Uncomment to add more languages:
|
|
# # - filename: voices/streaming_model/fr-Spk0_man.pt
|
|
# # uri: https://raw.githubusercontent.com/microsoft/VibeVoice/main/demo/voices/streaming_model/fr-Spk0_man.pt
|
|
# # - filename: voices/streaming_model/de-Spk0_man.pt
|
|
# # uri: https://raw.githubusercontent.com/microsoft/VibeVoice/main/demo/voices/streaming_model/de-Spk0_man.pt
|