feat: improve image generation and coercion for data URLs

- Updated `GeminiImageGenerationService` to differentiate between data URLs and web URLs for image handling. - Added coercion support in CoercionService for converting data URLs to streams, including extraction of content type. - Documented the txt2img method in AI.js to clarify usage for text and image-to-image generation.
2026-01-07 05:30:31 -06:00 · 2025-09-15 15:45:54 -07:00
parent 6480d37ec0
commit 4e8aad675d
3 changed files with 59 additions and 1 deletions
--- a/src/backend/src/modules/puterai/GeminiImageGenerationService.js
+++ b/src/backend/src/modules/puterai/GeminiImageGenerationService.js
@@ -88,8 +88,10 @@ class GeminiImageGenerationService extends BaseService {
                    input_image_mime_type
                });

+                // Determine if this is a data URL or web URL
+                const isDataUrl = url.startsWith('data:');
                const image = new TypedValue({
-                    $: 'string:url:web',
+                    $: isDataUrl ? 'string:url:data' : 'string:url:web',
                    content_type: 'image'
                }, url);

--- a/src/backend/src/services/drivers/CoercionService.js
+++ b/src/backend/src/services/drivers/CoercionService.js
@@ -90,6 +90,37 @@ class CoercionService extends BaseService {
                }, response.data);
            }
        });
+
+        // Add coercion for data URLs to streams
+        this.coercions_.push({
+            produces: {
+                $: 'stream',
+                content_type: 'image'
+            },
+            consumes: {
+                $: 'string:url:data',
+                content_type: 'image'
+            },
+            coerce: async typed_value => {
+                this.log.noticeme('data URL coercion is running!');
+                
+                const data_url = typed_value.value;
+                const data = data_url.split(',')[1];
+                const buffer = Buffer.from(data, 'base64');
+                
+                const { PassThrough } = require('stream');
+                const stream = new PassThrough();
+                stream.end(buffer);
+
+                // Extract content type from data URL
+                const contentType = data_url.match(/data:([^;]+)/)?.[1] || 'image/png';
+
+                return new TypedValue({
+                    $: 'stream',
+                    content_type: contentType,
+                }, stream);
+            }
+        });
    }

    /**
--- a/src/puter-js/src/modules/AI.js
+++ b/src/puter-js/src/modules/AI.js
@@ -604,6 +604,31 @@ class AI{
        }).call(this, requestParams);
    }

+    /**
+     * Generate images from text prompts or perform image-to-image generation
+     * 
+     * @param {string|object} prompt - Text prompt or options object
+     * @param {object|boolean} [options] - Generation options or test mode flag
+     * @param {string} [options.prompt] - Text description of the image to generate
+     * @param {string} [options.model] - Model to use (e.g., "gemini-2.5-flash-image-preview")
+     * @param {object} [options.ratio] - Image dimensions (e.g., {w: 1024, h: 1024})
+     * @param {string} [options.input_image] - Base64 encoded input image for image-to-image generation
+     * @param {string} [options.input_image_mime_type] - MIME type of input image (e.g., "image/png")
+     * @returns {Promise<Image>} Generated image object with src property
+     * 
+     * @example
+     * // Text-to-image
+     * const img = await puter.ai.txt2img("A beautiful sunset");
+     * 
+     * @example
+     * // Image-to-image
+     * const img = await puter.ai.txt2img({
+     *   prompt: "Transform this into a watercolor painting",
+     *   input_image: base64ImageData,
+     *   input_image_mime_type: "image/png",
+     *   model: "gemini-2.5-flash-image-preview"
+     * });
+     */
    txt2img = async (...args) => {
        let options = {};
        let testMode = false;