feat: improve image generation and coercion for data URLs

- Updated `GeminiImageGenerationService` to differentiate between data URLs and web URLs for image handling.
- Added coercion support in CoercionService for converting data URLs to streams, including extraction of content type.
- Documented the txt2img method in AI.js to clarify usage for text and image-to-image generation.
This commit is contained in:
Nariman Jelveh
2025-09-15 15:45:54 -07:00
parent 6480d37ec0
commit 4e8aad675d
3 changed files with 59 additions and 1 deletions

View File

@@ -88,8 +88,10 @@ class GeminiImageGenerationService extends BaseService {
input_image_mime_type
});
// Determine if this is a data URL or web URL
const isDataUrl = url.startsWith('data:');
const image = new TypedValue({
$: 'string:url:web',
$: isDataUrl ? 'string:url:data' : 'string:url:web',
content_type: 'image'
}, url);

View File

@@ -90,6 +90,37 @@ class CoercionService extends BaseService {
}, response.data);
}
});
// Add coercion for data URLs to streams
this.coercions_.push({
produces: {
$: 'stream',
content_type: 'image'
},
consumes: {
$: 'string:url:data',
content_type: 'image'
},
coerce: async typed_value => {
this.log.noticeme('data URL coercion is running!');
const data_url = typed_value.value;
const data = data_url.split(',')[1];
const buffer = Buffer.from(data, 'base64');
const { PassThrough } = require('stream');
const stream = new PassThrough();
stream.end(buffer);
// Extract content type from data URL
const contentType = data_url.match(/data:([^;]+)/)?.[1] || 'image/png';
return new TypedValue({
$: 'stream',
content_type: contentType,
}, stream);
}
});
}
/**

View File

@@ -604,6 +604,31 @@ class AI{
}).call(this, requestParams);
}
/**
* Generate images from text prompts or perform image-to-image generation
*
* @param {string|object} prompt - Text prompt or options object
* @param {object|boolean} [options] - Generation options or test mode flag
* @param {string} [options.prompt] - Text description of the image to generate
* @param {string} [options.model] - Model to use (e.g., "gemini-2.5-flash-image-preview")
* @param {object} [options.ratio] - Image dimensions (e.g., {w: 1024, h: 1024})
* @param {string} [options.input_image] - Base64 encoded input image for image-to-image generation
* @param {string} [options.input_image_mime_type] - MIME type of input image (e.g., "image/png")
* @returns {Promise<Image>} Generated image object with src property
*
* @example
* // Text-to-image
* const img = await puter.ai.txt2img("A beautiful sunset");
*
* @example
* // Image-to-image
* const img = await puter.ai.txt2img({
* prompt: "Transform this into a watercolor painting",
* input_image: base64ImageData,
* input_image_mime_type: "image/png",
* model: "gemini-2.5-flash-image-preview"
* });
*/
txt2img = async (...args) => {
let options = {};
let testMode = false;