mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-08 03:41:13 -05:00
feat(chatterbox): support multilingual (#6240)
* feat(chatterbox): support multilingual Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add l4t support Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix: switch to fork Until https://github.com/resemble-ai/chatterbox/pull/295 is merged Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
committed by
GitHub
parent
b5efc4f89e
commit
c85d559919
@@ -955,6 +955,18 @@ jobs:
|
|||||||
backend: "exllama2"
|
backend: "exllama2"
|
||||||
dockerfile: "./backend/Dockerfile.python"
|
dockerfile: "./backend/Dockerfile.python"
|
||||||
context: "./backend"
|
context: "./backend"
|
||||||
|
- build-type: 'cublas'
|
||||||
|
cuda-major-version: "12"
|
||||||
|
cuda-minor-version: "0"
|
||||||
|
platforms: 'linux/arm64'
|
||||||
|
skip-drivers: 'true'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-nvidia-l4t-arm64-chatterbox'
|
||||||
|
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
|
||||||
|
runs-on: 'ubuntu-24.04-arm'
|
||||||
|
backend: "chatterbox"
|
||||||
|
dockerfile: "./backend/Dockerfile.python"
|
||||||
|
context: "./backend"
|
||||||
# runs out of space on the runner
|
# runs out of space on the runner
|
||||||
# - build-type: 'hipblas'
|
# - build-type: 'hipblas'
|
||||||
# cuda-major-version: ""
|
# cuda-major-version: ""
|
||||||
|
|||||||
@@ -429,6 +429,9 @@ docker-build-kitten-tts:
|
|||||||
docker-save-kitten-tts: backend-images
|
docker-save-kitten-tts: backend-images
|
||||||
docker save local-ai-backend:kitten-tts -o backend-images/kitten-tts.tar
|
docker save local-ai-backend:kitten-tts -o backend-images/kitten-tts.tar
|
||||||
|
|
||||||
|
docker-save-chatterbox: backend-images
|
||||||
|
docker save local-ai-backend:chatterbox -o backend-images/chatterbox.tar
|
||||||
|
|
||||||
docker-build-kokoro:
|
docker-build-kokoro:
|
||||||
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:kokoro -f backend/Dockerfile.python --build-arg BACKEND=kokoro ./backend
|
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:kokoro -f backend/Dockerfile.python --build-arg BACKEND=kokoro ./backend
|
||||||
|
|
||||||
|
|||||||
@@ -353,6 +353,7 @@
|
|||||||
nvidia: "cuda12-chatterbox"
|
nvidia: "cuda12-chatterbox"
|
||||||
metal: "metal-chatterbox"
|
metal: "metal-chatterbox"
|
||||||
default: "cpu-chatterbox"
|
default: "cpu-chatterbox"
|
||||||
|
nvidia-l4t: "nvidia-l4t-arm64-chatterbox"
|
||||||
- &piper
|
- &piper
|
||||||
name: "piper"
|
name: "piper"
|
||||||
uri: "quay.io/go-skynet/local-ai-backends:latest-piper"
|
uri: "quay.io/go-skynet/local-ai-backends:latest-piper"
|
||||||
@@ -1239,6 +1240,7 @@
|
|||||||
nvidia: "cuda12-chatterbox-development"
|
nvidia: "cuda12-chatterbox-development"
|
||||||
metal: "metal-chatterbox-development"
|
metal: "metal-chatterbox-development"
|
||||||
default: "cpu-chatterbox-development"
|
default: "cpu-chatterbox-development"
|
||||||
|
nvidia-l4t: "nvidia-l4t-arm64-chatterbox"
|
||||||
- !!merge <<: *chatterbox
|
- !!merge <<: *chatterbox
|
||||||
name: "cpu-chatterbox"
|
name: "cpu-chatterbox"
|
||||||
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-chatterbox"
|
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-chatterbox"
|
||||||
@@ -1249,6 +1251,16 @@
|
|||||||
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-chatterbox"
|
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-chatterbox"
|
||||||
mirrors:
|
mirrors:
|
||||||
- localai/localai-backends:master-cpu-chatterbox
|
- localai/localai-backends:master-cpu-chatterbox
|
||||||
|
- !!merge <<: *chatterbox
|
||||||
|
name: "nvidia-l4t-arm64-chatterbox"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-l4t-arm64-chatterbox"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-gpu-nvidia-l4t-arm64-chatterbox
|
||||||
|
- !!merge <<: *chatterbox
|
||||||
|
name: "nvidia-l4t-arm64-chatterbox-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-l4t-arm64-chatterbox"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-gpu-nvidia-l4t-arm64-chatterbox
|
||||||
- !!merge <<: *chatterbox
|
- !!merge <<: *chatterbox
|
||||||
name: "metal-chatterbox"
|
name: "metal-chatterbox"
|
||||||
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-chatterbox"
|
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-chatterbox"
|
||||||
|
|||||||
@@ -14,9 +14,23 @@ import backend_pb2_grpc
|
|||||||
import torch
|
import torch
|
||||||
import torchaudio as ta
|
import torchaudio as ta
|
||||||
from chatterbox.tts import ChatterboxTTS
|
from chatterbox.tts import ChatterboxTTS
|
||||||
|
from chatterbox.mtl_tts import ChatterboxMultilingualTTS
|
||||||
import grpc
|
import grpc
|
||||||
|
|
||||||
|
def is_float(s):
|
||||||
|
"""Check if a string can be converted to float."""
|
||||||
|
try:
|
||||||
|
float(s)
|
||||||
|
return True
|
||||||
|
except ValueError:
|
||||||
|
return False
|
||||||
|
def is_int(s):
|
||||||
|
"""Check if a string can be converted to int."""
|
||||||
|
try:
|
||||||
|
int(s)
|
||||||
|
return True
|
||||||
|
except ValueError:
|
||||||
|
return False
|
||||||
|
|
||||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||||
|
|
||||||
@@ -47,6 +61,28 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
if not torch.cuda.is_available() and request.CUDA:
|
if not torch.cuda.is_available() and request.CUDA:
|
||||||
return backend_pb2.Result(success=False, message="CUDA is not available")
|
return backend_pb2.Result(success=False, message="CUDA is not available")
|
||||||
|
|
||||||
|
|
||||||
|
options = request.Options
|
||||||
|
|
||||||
|
# empty dict
|
||||||
|
self.options = {}
|
||||||
|
|
||||||
|
# The options are a list of strings in this form optname:optvalue
|
||||||
|
# We are storing all the options in a dict so we can use it later when
|
||||||
|
# generating the images
|
||||||
|
for opt in options:
|
||||||
|
if ":" not in opt:
|
||||||
|
continue
|
||||||
|
key, value = opt.split(":")
|
||||||
|
# if value is a number, convert it to the appropriate type
|
||||||
|
if is_float(value):
|
||||||
|
value = float(value)
|
||||||
|
elif is_int(value):
|
||||||
|
value = int(value)
|
||||||
|
elif value.lower() in ["true", "false"]:
|
||||||
|
value = value.lower() == "true"
|
||||||
|
self.options[key] = value
|
||||||
|
|
||||||
self.AudioPath = None
|
self.AudioPath = None
|
||||||
|
|
||||||
if os.path.isabs(request.AudioPath):
|
if os.path.isabs(request.AudioPath):
|
||||||
@@ -56,10 +92,14 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
modelFileBase = os.path.dirname(request.ModelFile)
|
modelFileBase = os.path.dirname(request.ModelFile)
|
||||||
# modify LoraAdapter to be relative to modelFileBase
|
# modify LoraAdapter to be relative to modelFileBase
|
||||||
self.AudioPath = os.path.join(modelFileBase, request.AudioPath)
|
self.AudioPath = os.path.join(modelFileBase, request.AudioPath)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
print("Preparing models, please wait", file=sys.stderr)
|
print("Preparing models, please wait", file=sys.stderr)
|
||||||
self.model = ChatterboxTTS.from_pretrained(device=device)
|
if "multilingual" in self.options:
|
||||||
|
# remove key from options
|
||||||
|
del self.options["multilingual"]
|
||||||
|
self.model = ChatterboxMultilingualTTS.from_pretrained(device=device)
|
||||||
|
else:
|
||||||
|
self.model = ChatterboxTTS.from_pretrained(device=device)
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||||
# Implement your logic here for the LoadModel service
|
# Implement your logic here for the LoadModel service
|
||||||
@@ -68,12 +108,18 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
|
|
||||||
def TTS(self, request, context):
|
def TTS(self, request, context):
|
||||||
try:
|
try:
|
||||||
# Generate audio using ChatterboxTTS
|
kwargs = {}
|
||||||
|
|
||||||
|
if "language" in self.options:
|
||||||
|
kwargs["language_id"] = self.options["language"]
|
||||||
if self.AudioPath is not None:
|
if self.AudioPath is not None:
|
||||||
wav = self.model.generate(request.text, audio_prompt_path=self.AudioPath)
|
kwargs["audio_prompt_path"] = self.AudioPath
|
||||||
else:
|
|
||||||
wav = self.model.generate(request.text)
|
# add options to kwargs
|
||||||
|
kwargs.update(self.options)
|
||||||
|
|
||||||
|
# Generate audio using ChatterboxTTS
|
||||||
|
wav = self.model.generate(request.text, **kwargs)
|
||||||
# Save the generated audio
|
# Save the generated audio
|
||||||
ta.save(request.dst, wav, self.model.sr)
|
ta.save(request.dst, wav, self.model.sr)
|
||||||
|
|
||||||
|
|||||||
@@ -15,5 +15,6 @@ fi
|
|||||||
if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
||||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
||||||
fi
|
fi
|
||||||
|
EXTRA_PIP_INSTALL_FLAGS+=" --no-build-isolation"
|
||||||
|
|
||||||
installRequirements
|
installRequirements
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/cpu
|
--extra-index-url https://download.pytorch.org/whl/cpu
|
||||||
accelerate
|
accelerate
|
||||||
torch==2.6.0
|
torch
|
||||||
torchaudio==2.6.0
|
torchaudio
|
||||||
transformers==4.46.3
|
transformers
|
||||||
chatterbox-tts==0.1.2
|
# https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289
|
||||||
|
chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster
|
||||||
|
#chatterbox-tts==0.1.4
|
||||||
@@ -2,5 +2,6 @@
|
|||||||
torch==2.6.0+cu118
|
torch==2.6.0+cu118
|
||||||
torchaudio==2.6.0+cu118
|
torchaudio==2.6.0+cu118
|
||||||
transformers==4.46.3
|
transformers==4.46.3
|
||||||
chatterbox-tts==0.1.2
|
# https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289
|
||||||
|
chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster
|
||||||
accelerate
|
accelerate
|
||||||
@@ -1,5 +1,6 @@
|
|||||||
torch==2.6.0
|
torch
|
||||||
torchaudio==2.6.0
|
torchaudio
|
||||||
transformers==4.46.3
|
transformers
|
||||||
chatterbox-tts==0.1.2
|
# https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289
|
||||||
|
chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster
|
||||||
accelerate
|
accelerate
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||||
torch==2.6.0+rocm6.1
|
torch==2.6.0+rocm6.1
|
||||||
torchaudio==2.6.0+rocm6.1
|
torchaudio==2.6.0+rocm6.1
|
||||||
transformers==4.46.3
|
transformers
|
||||||
chatterbox-tts==0.1.2
|
# https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289
|
||||||
|
chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster
|
||||||
accelerate
|
accelerate
|
||||||
|
|||||||
@@ -2,8 +2,9 @@
|
|||||||
intel-extension-for-pytorch==2.3.110+xpu
|
intel-extension-for-pytorch==2.3.110+xpu
|
||||||
torch==2.3.1+cxx11.abi
|
torch==2.3.1+cxx11.abi
|
||||||
torchaudio==2.3.1+cxx11.abi
|
torchaudio==2.3.1+cxx11.abi
|
||||||
transformers==4.46.3
|
transformers
|
||||||
chatterbox-tts==0.1.2
|
# https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289
|
||||||
|
chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster
|
||||||
accelerate
|
accelerate
|
||||||
oneccl_bind_pt==2.3.100+xpu
|
oneccl_bind_pt==2.3.100+xpu
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
|
|||||||
@@ -0,0 +1,6 @@
|
|||||||
|
--extra-index-url https://pypi.jetson-ai-lab.io/jp6/cu126/
|
||||||
|
torch
|
||||||
|
torchaudio
|
||||||
|
transformers
|
||||||
|
chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster
|
||||||
|
accelerate
|
||||||
Reference in New Issue
Block a user