Reapply "feat: support Gemini imagegen service"

This reverts commit 05071b4338.
This commit is contained in:
KernelDeimos
2025-09-12 19:16:58 -04:00
parent c40c138aa2
commit 5119efaf79
6 changed files with 264 additions and 3 deletions

43
package-lock.json generated
View File

@@ -15,6 +15,7 @@
],
"dependencies": {
"@aws-sdk/client-secrets-manager": "^3.879.0",
"@google/genai": "^1.19.0",
"@heyputer/putility": "^1.0.2",
"@paralleldrive/cuid2": "^2.2.2",
"dedent": "^1.5.3",
@@ -2088,6 +2089,48 @@
"uuid": "dist/bin/uuid"
}
},
"node_modules/@google/genai": {
"version": "1.19.0",
"resolved": "https://registry.npmjs.org/@google/genai/-/genai-1.19.0.tgz",
"integrity": "sha512-mIMV3M/KfzzFA//0fziK472wKBJ1TdJLhozIUJKTPLyTDN1NotU+hyoHW/N0cfrcEWUK20YA0GxCeHC4z0SbMA==",
"license": "Apache-2.0",
"dependencies": {
"google-auth-library": "^9.14.2",
"ws": "^8.18.0"
},
"engines": {
"node": ">=20.0.0"
},
"peerDependencies": {
"@modelcontextprotocol/sdk": "^1.11.4"
},
"peerDependenciesMeta": {
"@modelcontextprotocol/sdk": {
"optional": true
}
}
},
"node_modules/@google/genai/node_modules/ws": {
"version": "8.18.3",
"resolved": "https://registry.npmjs.org/ws/-/ws-8.18.3.tgz",
"integrity": "sha512-PEIGCY5tSlUt50cqyMXfCzX+oOPqN0vuGqWzbcJ2xvnkzkq46oOpz7dQaTDBdfICb4N14+GARUDw2XV2N4tvzg==",
"license": "MIT",
"engines": {
"node": ">=10.0.0"
},
"peerDependencies": {
"bufferutil": "^4.0.1",
"utf-8-validate": ">=5.0.2"
},
"peerDependenciesMeta": {
"bufferutil": {
"optional": true
},
"utf-8-validate": {
"optional": true
}
}
},
"node_modules/@google/generative-ai": {
"version": "0.21.0",
"resolved": "https://registry.npmjs.org/@google/generative-ai/-/generative-ai-0.21.0.tgz",

View File

@@ -49,6 +49,7 @@
},
"dependencies": {
"@aws-sdk/client-secrets-manager": "^3.879.0",
"@google/genai": "^1.19.0",
"@heyputer/putility": "^1.0.2",
"@paralleldrive/cuid2": "^2.2.2",
"dedent": "^1.5.3",
@@ -66,4 +67,4 @@
"sharp-bmp": "^0.1.5",
"sharp-ico": "^0.1.5"
}
}
}

View File

@@ -0,0 +1,205 @@
/*
* Copyright (C) 2024-present Puter Technologies Inc.
*
* This file is part of Puter.
*
* Puter is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
// METADATA // {"ai-commented":{"service":"claude"}}
const APIError = require("../../api/APIError");
const BaseService = require("../../services/BaseService");
const { TypedValue } = require("../../services/drivers/meta/Runtime");
const { Context } = require("../../util/context");
const { GoogleGenAI } = require('@google/genai');
/**
* Service class for generating images using Gemini's API
* Extends BaseService to provide image generation capabilities through
* the puter-image-generation interface.
*/
class GeminiImageGenerationService extends BaseService {
static MODULES = {
}
_construct() {
this.models_ = {
'gemini-2.5-flash-image-preview': {
"1024x1024": 0.039,
}
};
}
/**
* Initializes the Gemini client with API credentials from config
* @private
* @async
* @returns {Promise<void>}
*/
async _init() {
this.genAI = new GoogleGenAI({apiKey: this.global_config.services.gemini.apiKey});
}
static IMPLEMENTS = {
['driver-capabilities']: {
supports_test_mode(iface, method_name) {
return iface === 'puter-image-generation' &&
method_name === 'generate';
}
},
['puter-image-generation']: {
/**
* Generates an image using Gemini's gemini-2.5-flash-image-preview
* @param {string} prompt - The text description of the image to generate
* @param {Object} options - Generation options
* @param {Object} options.ratio - Image dimensions ratio object with w/h properties
* @param {string} [options.model='gemini-2.5-flash-image-preview'] - The model to use for generation
* @returns {Promise<string>} URL of the generated image
* @throws {Error} If prompt is not a string or ratio is invalid
*/
async generate(params) {
const { prompt, quality, test_mode, model, ratio } = params;
if (test_mode) {
return new TypedValue({
$: 'string:url:web',
content_type: 'image',
}, 'https://puter-sample-data.puter.site/image_example.png');
}
const url = await this.generate(prompt, {
quality,
ratio: ratio || this.constructor.RATIO_SQUARE,
model
});
const image = new TypedValue({
$: 'string:url:web',
content_type: 'image'
}, url);
return image;
}
}
};
static RATIO_SQUARE = { w: 1024, h: 1024 };
async generate(prompt, {
ratio,
model,
}) {
if (typeof prompt !== 'string') {
throw new Error('`prompt` must be a string');
}
if (!ratio || !this._validate_ratio(ratio, model)) {
throw new Error('`ratio` must be a valid ratio for model ' + model);
}
// Somewhat sane defaults
model = model ?? 'gemini-2.5-flash-image-preview';
if (!this.models_[model]) {
throw APIError.create('field_invalid', null, {
key: 'model',
expected: 'one of: ' +
Object.keys(this.models_).join(', '),
got: model,
});
}
const price_key = `${ratio.w}x${ratio.h}`;
if (!this.models_[model][price_key]) {
const availableSizes = Object.keys(this.models_[model]);
throw APIError.create('field_invalid', null, {
key: 'size/quality combination',
expected: 'one of: ' + availableSizes.join(', '),
got: price_key,
});
}
const user_private_uid = Context.get('actor')?.private_uid ?? 'UNKNOWN';
if (user_private_uid === 'UNKNOWN') {
this.errors.report('chat-completion-service:unknown-user', {
message: 'failed to get a user ID for a Gemini request',
alarm: true,
trace: true,
});
}
const exact_cost = this.models_[model][price_key]
* 100 // $ USD to cents USD
* Math.pow(10, 6) // cents to microcents
const svc_cost = this.services.get('cost');
const usageAllowed = await svc_cost.get_funding_allowed({
minimum: exact_cost,
});
if (!usageAllowed) {
throw APIError.create('insufficient_funds');
}
// We can charge immediately
await svc_cost.record_cost({ cost: exact_cost });
const response = await this.genAI.models.generateContent({
model: "gemini-2.5-flash-image-preview",
contents: `Generate a picture of dimensions ${parseInt(ratio.w)}x${parseInt(ratio.h)} with the prompt: ${prompt}`,
});
let url = undefined;
for (const part of response.candidates[0].content.parts) {
if (part.text) {
} else if (part.inlineData) {
const imageData = part.inlineData.data;
url = "data:image/png;base64," + imageData
}
}
if (!url) {
throw new Error('Failed to extract image URL from Gemini response');
}
const spending_meta = {
model,
size: `${ratio.w}x${ratio.h}`,
};
const svc_spending = Context.get('services').get('spending');
svc_spending.record_spending('gemini', 'image-generation', spending_meta);
return url;
}
/**
* Get valid ratios for a specific model
* @param {string} model - The model name
* @returns {Array<Object>} Array of valid ratio objects
* @private
*/
_getValidRatios(model) {
if (model === 'gemini-2.5-flash-image-preview') {
return [this.constructor.RATIO_SQUARE];
}
}
_validate_ratio(ratio, model) {
const validRatios = this._getValidRatios(model);
return validRatios.includes(ratio);
}
}
module.exports = {
GeminiImageGenerationService,
};

View File

@@ -95,7 +95,10 @@ class PuterAIModule extends AdvancedBase {
}
if ( !! config?.services?.['gemini'] ) {
const { GeminiService } = require('./GeminiService');
const { GeminiImageGenerationService } = require('./GeminiImageGenerationService');
services.registerService('gemini', GeminiService);
services.registerService('gemini-image-generation', GeminiImageGenerationService);
}
if ( !! config?.services?.['openrouter'] ) {
const { OpenRouterService } = require('./OpenRouterService');

View File

@@ -110,6 +110,9 @@ class TrackSpendingService extends BaseService {
*/
static ImageGenerationStrategy = class ImageGenerationStrategy {
static models = {
'gemini-2.5-flash-image-preview': {
"1024x1024": 0.039,
},
'gpt-image-1': {
"low:1024x1024": 0.011,
"low:1024x1536": 0.016,

View File

@@ -630,9 +630,15 @@ class AI{
if (typeof args[0] === 'object') {
options = args[0]
}
let AIService = "openai-image-generation"
if (options.model === "nano-banana")
options.model = "gemini-2.5-flash-image-preview";
if (options.model === "gemini-2.5-flash-image-preview")
AIService = "gemini-image-generation";
// Call the original chat.complete method
return await utils.make_driver_method(['prompt'], 'puter-image-generation', undefined, 'generate', {
return await utils.make_driver_method(['prompt'], 'puter-image-generation', AIService, 'generate', {
responseType: 'blob',
test_mode: testMode ?? false,
transform: async blob => {