Add ElevenLabs TTS provider integration (#2004)

* Add ElevenLabs TTS provider integration

Adds ElevenLabs text-to-speech support in the backend and frontend. Adds `ElevenLabsTTSService`, updates `PuterAIModule` to register the service, documents configuration, and integrates cost tracking with zero cost in `MeteringService` for now (todo). updates `AI.js` to support 11labs as a provider and adds related tests for `txt2speech` functionality.

* Update 11labs cost map values
This commit is contained in:
Nariman Jelveh
2025-11-21 17:31:21 -08:00
committed by GitHub
parent 11e057557d
commit bb752a5bb5
8 changed files with 300 additions and 5 deletions

View File

@@ -1,10 +1,10 @@
'use strict';
"use strict";
// Code generated by protoc-gen-ts_proto. DO NOT EDIT.
// versions:
// protoc-gen-ts_proto v2.8.0
// protoc v3.21.12
// source: fsentry.proto
Object.defineProperty(exports, '__esModule', { value: true });
Object.defineProperty(exports, "__esModule", { value: true });
exports.FSEntry = exports.protobufPackage = void 0;
/* eslint-disable */
const wire_1 = require("@bufbuild/protobuf/wire");

View File

@@ -0,0 +1,196 @@
/*
* Copyright (C) 2024-present Puter Technologies Inc.
*
* This file is part of Puter.
*
* Puter is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
const { Readable } = require('stream');
const APIError = require('../../api/APIError');
const BaseService = require('../../services/BaseService');
const { TypedValue } = require('../../services/drivers/meta/Runtime');
const { Context } = require('../../util/context');
const DEFAULT_MODEL = 'eleven_multilingual_v2';
const DEFAULT_VOICE_ID = '21m00Tcm4TlvDq8ikWAM'; // Common public "Rachel" sample voice
const DEFAULT_OUTPUT_FORMAT = 'mp3_44100_128';
const SAMPLE_AUDIO_URL = 'https://puter-sample-data.puter.site/tts_example.mp3';
const ELEVENLABS_TTS_MODELS = [
{ id: DEFAULT_MODEL, name: 'Eleven Multilingual v2' },
{ id: 'eleven_flash_v2_5', name: 'Eleven Flash v2.5' },
{ id: 'eleven_turbo_v2_5', name: 'Eleven Turbo v2.5' },
{ id: 'eleven_v3', name: 'Eleven v3 Alpha' },
];
/**
* ElevenLabs text-to-speech provider.
* Implements the `puter-tts` interface so the AI module can synthesize speech
* using ElevenLabs voices.
*/
class ElevenLabsTTSService extends BaseService {
/** @type {import('../../services/MeteringService/MeteringService').MeteringService} */
get meteringService () {
return this.services.get('meteringService').meteringService;
}
static IMPLEMENTS = {
['driver-capabilities']: {
supports_test_mode (iface, method_name) {
return iface === 'puter-tts' && method_name === 'synthesize';
},
},
['puter-tts']: {
async list_voices () {
return this.listVoices();
},
async list_engines () {
return this.listEngines();
},
async synthesize (params) {
return this.synthesize(params);
},
},
};
async _init () {
const svcThere = this.global_config?.services?.elevenlabs ?? this.config?.services?.elevenlabs ?? this.config?.elevenlabs;
this.apiKey = svcThere?.apiKey ?? svcThere?.api_key ?? svcThere?.key;
this.baseUrl = svcThere?.baseUrl ?? 'https://api.elevenlabs.io';
this.defaultVoiceId = svcThere?.defaultVoiceId ?? svcThere?.voiceId ?? DEFAULT_VOICE_ID;
if ( !this.apiKey ) {
throw new Error('ElevenLabs API key not configured');
}
}
async request (path, { method = 'GET', body, headers = {} } = {}) {
const response = await fetch(`${this.baseUrl}${path}`, {
method,
headers: {
'xi-api-key': this.apiKey,
...(body ? { 'Content-Type': 'application/json' } : {}),
...headers,
},
body: body ? JSON.stringify(body) : undefined,
});
if ( response.ok ) {
return response;
}
let detail = null;
try {
detail = await response.json();
} catch ( e ) {
// ignore
}
this.log.error('ElevenLabs request failed', { path, status: response.status, detail });
throw APIError.create('internal_server_error', null, { provider: 'elevenlabs', status: response.status });
}
async listVoices () {
const res = await this.request('/v1/voices');
const data = await res.json();
const voices = Array.isArray(data?.voices) ? data.voices : Array.isArray(data) ? data : [];
return voices
.map(voice => ({
id: voice.voice_id || voice.voiceId || voice.id,
name: voice.name,
description: voice.description,
category: voice.category,
provider: 'elevenlabs',
labels: voice.labels,
supported_models: ELEVENLABS_TTS_MODELS.map(model => model.id),
}))
.filter(v => v.id && v.name);
}
async listEngines () {
return ELEVENLABS_TTS_MODELS.map(model => ({
id: model.id,
name: model.name,
provider: 'elevenlabs',
pricing_per_million_chars: 0,
}));
}
async synthesize (params) {
const {
text,
voice,
model,
response_format,
output_format,
voice_settings,
voiceSettings,
test_mode,
} = params;
if ( test_mode ) {
return new TypedValue({
$: 'string:url:web',
content_type: 'audio',
}, SAMPLE_AUDIO_URL);
}
if ( typeof text !== 'string' || !text.trim() ) {
throw APIError.create('field_required', null, { key: 'text' });
}
const voiceId = voice || this.defaultVoiceId;
const modelId = model || DEFAULT_MODEL;
const desiredFormat = output_format || response_format || DEFAULT_OUTPUT_FORMAT;
const actor = Context.get('actor');
const usageKey = `elevenlabs:${modelId}:character`;
const usageAllowed = await this.meteringService.hasEnoughCreditsFor(actor, usageKey, text.length);
if ( !usageAllowed ) {
throw APIError.create('insufficient_funds');
}
const payload = {
text,
model_id: modelId,
output_format: desiredFormat,
};
const finalVoiceSettings = voice_settings ?? voiceSettings;
if ( finalVoiceSettings ) {
payload.voice_settings = finalVoiceSettings;
}
const response = await this.request(`/v1/text-to-speech/${voiceId}`, {
method: 'POST',
body: payload,
});
const arrayBuffer = await response.arrayBuffer();
const buffer = Buffer.from(arrayBuffer);
const stream = Readable.from(buffer);
this.meteringService.incrementUsage(actor, usageKey, text.length);
return new TypedValue({
$: 'stream',
content_type: response.headers.get('content-type') || 'audio/mpeg',
}, stream);
}
}
module.exports = {
ElevenLabsTTSService,
};

View File

@@ -55,6 +55,11 @@ class PuterAIModule extends AdvancedBase {
services.registerService('aws-polly', AWSPollyService);
}
if ( config?.services?.['elevenlabs'] || config?.elevenlabs ) {
const { ElevenLabsTTSService } = require('./ElevenLabsTTSService');
services.registerService('elevenlabs-tts', ElevenLabsTTSService);
}
if ( config?.services?.openai || config?.openai ) {
const { OpenAICompletionServiceWrapper } = require('./OpenAiCompletionService/index.mjs');
services.registerService('openai-completion', OpenAICompletionServiceWrapper);

View File

@@ -9,6 +9,10 @@ AI services are configured under the `services` block in the configuration file.
"openai": {
"apiKey": "sk-abcdefg..."
},
"elevenlabs": {
"apiKey": "eleven-api-key",
"defaultVoiceId": "optional-voice-id"
},
"deepseek": {
"apiKey": "sk-xyz123..."
},

View File

@@ -0,0 +1,13 @@
// ElevenLabs Text-to-Speech Cost Map
//
// Pricing for ElevenLabs voices varies by model and plan tier. We don't yet
// have public micro-cent pricing, so we record usage with a zero cost. This
// prevents metering alerts while still tracking character counts for future
// cost attribution once pricing is finalized.
export const ELEVENLABS_COST_MAP = {
'elevenlabs:eleven_multilingual_v2:character': 11,
'elevenlabs:eleven_turbo_v2_5:character': 11,
'elevenlabs:eleven_flash_v2_5:character': 5.5,
'elevenlabs:eleven_v3:character': 11,
};

View File

@@ -13,12 +13,14 @@ import { OPENROUTER_COST_MAP } from './openrouterCostMap';
import { OPENAI_VIDEO_COST_MAP } from './openaiVideoCostMap';
import { TOGETHER_COST_MAP } from './togetherCostMap';
import { XAI_COST_MAP } from './xaiCostMap';
import { ELEVENLABS_COST_MAP } from './elevenlabsCostMap';
export const COST_MAPS = {
...AWS_POLLY_COST_MAP,
...AWS_TEXTRACT_COST_MAP,
...CLAUDE_COST_MAP,
...DEEPSEEK_COST_MAP,
...ELEVENLABS_COST_MAP,
...GEMINI_COST_MAP,
...GROQ_COST_MAP,
...KV_COST_MAP,

View File

@@ -6,6 +6,7 @@ const normalizeTTSProvider = (value) => {
}
const lower = value.toLowerCase();
if ( lower === 'openai' ) return 'openai';
if ( ['elevenlabs', 'eleven', '11labs', '11-labs', 'eleven-labs', 'elevenlabs-tts'].includes(lower) ) return 'elevenlabs';
if ( lower === 'aws' || lower === 'polly' || lower === 'aws-polly' ) return 'aws-polly';
return value;
};
@@ -281,6 +282,10 @@ class AI {
provider = 'openai';
}
if ( options.engine && normalizeTTSProvider(options.engine) === 'elevenlabs' && !options.provider ) {
provider = 'elevenlabs';
}
if ( provider === 'openai' ) {
if ( !options.model && typeof options.engine === 'string' ) {
options.model = options.engine;
@@ -295,6 +300,23 @@ class AI {
options.response_format = 'mp3';
}
delete options.engine;
} else if ( provider === 'elevenlabs' ) {
if ( ! options.voice ) {
options.voice = '21m00Tcm4TlvDq8ikWAM';
}
if ( ! options.model && typeof options.engine === 'string' ) {
options.model = options.engine;
}
if ( ! options.model ) {
options.model = 'eleven_multilingual_v2';
}
if ( ! options.output_format && !options.response_format ) {
options.output_format = 'mp3_44100_128';
}
if ( options.response_format && !options.output_format ) {
options.output_format = options.response_format;
}
delete options.engine;
} else {
provider = 'aws-polly';
@@ -326,7 +348,9 @@ class AI {
}
}
const driverName = provider === 'openai' ? 'openai-tts' : 'aws-polly';
const driverName = provider === 'openai'
? 'openai-tts'
: (provider === 'elevenlabs' ? 'elevenlabs-tts' : 'aws-polly');
return await utils.make_driver_method(['source'], 'puter-tts', driverName, 'synthesize', {
responseType: 'blob',
@@ -449,7 +473,13 @@ class AI {
params.provider = 'openai';
}
const driverName = provider === 'openai' ? 'openai-tts' : 'aws-polly';
if ( provider === 'elevenlabs' ) {
params.provider = 'elevenlabs';
}
const driverName = provider === 'openai'
? 'openai-tts'
: (provider === 'elevenlabs' ? 'elevenlabs-tts' : 'aws-polly');
return await utils.make_driver_method(['source'], 'puter-tts', driverName, 'list_engines', {
responseType: 'text',
@@ -478,7 +508,13 @@ class AI {
delete params.engine;
}
const driverName = provider === 'openai' ? 'openai-tts' : 'aws-polly';
if ( provider === 'elevenlabs' ) {
params.provider = 'elevenlabs';
}
const driverName = provider === 'openai'
? 'openai-tts'
: (provider === 'elevenlabs' ? 'elevenlabs-tts' : 'aws-polly');
return utils.make_driver_method(['source'], 'puter-tts', driverName, 'list_voices', {
responseType: 'text',

View File

@@ -157,6 +157,32 @@ const testTxt2SpeechWithOpenAIProviderCore = async function() {
assert(valueOfValue === srcValue, "valueOf() should match src for OpenAI provider");
};
const testTxt2SpeechWithElevenLabsProviderCore = async function() {
// Test ElevenLabs provider in test mode to avoid external calls
const result = await puter.ai.txt2speech(
"Hello, this is an ElevenLabs provider test.",
{ provider: "elevenlabs", voice: "21m00Tcm4TlvDq8ikWAM" },
true,
);
assert(result instanceof Audio, "txt2speech should return an Audio object for ElevenLabs provider");
assert(result !== null, "txt2speech should not return null for ElevenLabs provider");
const toStringValue = result.toString();
const valueOfValue = result.valueOf();
const srcValue = result.src;
assert(typeof toStringValue === 'string', "toString() should return a string for ElevenLabs provider");
assert(typeof valueOfValue === 'string', "valueOf() should return a string for ElevenLabs provider");
assert(typeof srcValue === 'string', "src should be a string for ElevenLabs provider");
assert(toStringValue.length > 0, "toString() should not be empty for ElevenLabs provider");
assert(valueOfValue.length > 0, "valueOf() should not be empty for ElevenLabs provider");
assert(srcValue.length > 0, "src should not be empty for ElevenLabs provider");
assert(toStringValue === srcValue, "toString() should match src for ElevenLabs provider");
assert(valueOfValue === srcValue, "valueOf() should match src for ElevenLabs provider");
};
// Export test functions
window.txt2speechTests = [
{
@@ -209,5 +235,18 @@ window.txt2speechTests = [
fail("testTxt2SpeechWithOpenAIProvider failed:", error);
}
}
},
{
name: "testTxt2SpeechWithElevenLabsProvider",
description: "Test text-to-speech using the ElevenLabs provider in test mode",
test: async function() {
try {
await testTxt2SpeechWithElevenLabsProviderCore();
pass("testTxt2SpeechWithElevenLabsProvider passed");
} catch (error) {
fail("testTxt2SpeechWithElevenLabsProvider failed:", error);
}
}
}
];