mirror of
https://github.com/HeyPuter/puter.git
synced 2025-12-30 09:40:00 -06:00
feat: enforce new metering + get rid of old methods (#1762)
* feat: enforce new metering + get rid of old methods * fix: make openrouter metering more robust
This commit is contained in:
@@ -175,3 +175,9 @@ This repository, including all its contents, sub-projects, modules, and componen
|
||||
- [Ukrainian / Українська](https://github.com/HeyPuter/puter/blob/main/doc/i18n/README.ua.md)
|
||||
- [Urdu / اردو](https://github.com/HeyPuter/puter/blob/main/doc/i18n/README.ur.md)
|
||||
- [Vietnamese / Tiếng Việt](https://github.com/HeyPuter/puter/blob/main/doc/i18n/README.vi.md)
|
||||
|
||||
|
||||
## Links to Other READMEs
|
||||
### Backend
|
||||
- [PuterAI Module](./src/backend/doc/modules/puterai/README.md)
|
||||
- [Metering and Billing Service](./src/backend/src/services/MeteringService/README.md)
|
||||
|
||||
@@ -28,7 +28,7 @@ export default [
|
||||
mutability: 'mutable',
|
||||
summary: 'parameters',
|
||||
notes: [],
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -66,7 +66,7 @@ export default [
|
||||
mutability: 'mutable',
|
||||
summary: 'service used',
|
||||
notes: [],
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -75,12 +75,6 @@ export default [
|
||||
This event is emitted for ai prompt cost calculated operations.
|
||||
`,
|
||||
},
|
||||
{
|
||||
id: 'ai.prompt.report-usage',
|
||||
description: `
|
||||
This event is emitted for ai prompt report usage operations.
|
||||
`,
|
||||
},
|
||||
{
|
||||
id: 'ai.prompt.validate',
|
||||
description: `
|
||||
@@ -113,7 +107,7 @@ export default [
|
||||
mutability: 'mutable',
|
||||
summary: 'parameters',
|
||||
notes: [],
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -127,7 +121,7 @@ export default [
|
||||
mutability: 'no-effect',
|
||||
summary: 'data url',
|
||||
notes: [],
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -141,7 +135,7 @@ export default [
|
||||
mutability: 'no-effect',
|
||||
summary: 'data url',
|
||||
notes: [],
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -156,7 +150,7 @@ export default [
|
||||
mutability: 'no-effect',
|
||||
summary: 'apps',
|
||||
notes: [],
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -170,7 +164,7 @@ export default [
|
||||
mutability: 'no-effect',
|
||||
summary: 'required',
|
||||
notes: [],
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -185,17 +179,17 @@ export default [
|
||||
mutability: 'no-effect',
|
||||
summary: 'the email being validated',
|
||||
notes: [
|
||||
'The email may have already been cleaned.'
|
||||
]
|
||||
'The email may have already been cleaned.',
|
||||
],
|
||||
},
|
||||
allow: {
|
||||
type: 'boolean',
|
||||
mutability: 'mutable',
|
||||
summary: 'whether the email is allowed',
|
||||
notes: [
|
||||
'If set to false, the email will be considered invalid.'
|
||||
]
|
||||
}
|
||||
'If set to false, the email will be considered invalid.',
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -207,13 +201,13 @@ export default [
|
||||
node: {
|
||||
type: 'FSNodeContext',
|
||||
mutability: 'no-effect',
|
||||
summary: 'the directory that was created'
|
||||
summary: 'the directory that was created',
|
||||
},
|
||||
context: {
|
||||
type: 'Context',
|
||||
mutability: 'no-effect',
|
||||
summary: 'current context'
|
||||
}
|
||||
summary: 'current context',
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -236,7 +230,7 @@ export default [
|
||||
measurements: data.measurements
|
||||
});
|
||||
});
|
||||
`
|
||||
`,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -256,7 +250,7 @@ export default [
|
||||
mutability: 'no-effect',
|
||||
summary: 'cost uuid',
|
||||
notes: [],
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -276,7 +270,7 @@ export default [
|
||||
mutability: 'no-effect',
|
||||
summary: 'cost uuid',
|
||||
notes: [],
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -296,7 +290,7 @@ export default [
|
||||
mutability: 'no-effect',
|
||||
summary: 'cost uuid',
|
||||
notes: [],
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -310,7 +304,7 @@ export default [
|
||||
mutability: 'no-effect',
|
||||
summary: 'usages',
|
||||
notes: [],
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -335,7 +329,7 @@ export default [
|
||||
notes: [
|
||||
'The email may have already been cleaned.',
|
||||
],
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -355,7 +349,7 @@ export default [
|
||||
mutability: 'no-effect',
|
||||
summary: 'current context',
|
||||
notes: [],
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -387,7 +381,7 @@ export default [
|
||||
mutability: 'no-effect',
|
||||
summary: 'path to the affected resource',
|
||||
notes: [],
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -419,7 +413,7 @@ export default [
|
||||
mutability: 'no-effect',
|
||||
summary: 'path to the affected resource',
|
||||
notes: [],
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -439,7 +433,7 @@ export default [
|
||||
mutability: 'no-effect',
|
||||
summary: 'current context',
|
||||
notes: [],
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -466,7 +460,7 @@ export default [
|
||||
mutability: 'mutable',
|
||||
summary: 'end',
|
||||
notes: [],
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -480,7 +474,7 @@ export default [
|
||||
mutability: 'no-effect',
|
||||
summary: 'uuid',
|
||||
notes: [],
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -494,7 +488,7 @@ export default [
|
||||
mutability: 'no-effect',
|
||||
summary: 'response',
|
||||
notes: [],
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -508,7 +502,7 @@ export default [
|
||||
mutability: 'no-effect',
|
||||
summary: 'response',
|
||||
notes: [],
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -522,7 +516,7 @@ export default [
|
||||
mutability: 'no-effect',
|
||||
summary: 'response',
|
||||
notes: [],
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -536,7 +530,7 @@ export default [
|
||||
mutability: 'no-effect',
|
||||
summary: 'response',
|
||||
notes: [],
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -550,7 +544,7 @@ export default [
|
||||
mutability: 'no-effect',
|
||||
summary: 'response',
|
||||
notes: [],
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -570,7 +564,7 @@ export default [
|
||||
mutability: 'no-effect',
|
||||
summary: 'notification',
|
||||
notes: [],
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -584,7 +578,7 @@ export default [
|
||||
mutability: 'no-effect',
|
||||
summary: 'response',
|
||||
notes: [],
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -598,7 +592,7 @@ export default [
|
||||
mutability: 'no-effect',
|
||||
summary: 'response',
|
||||
notes: [],
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -612,7 +606,7 @@ export default [
|
||||
mutability: 'no-effect',
|
||||
summary: 'response',
|
||||
notes: [],
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -644,7 +638,7 @@ export default [
|
||||
mutability: 'no-effect',
|
||||
summary: 'data',
|
||||
notes: [],
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -670,7 +664,7 @@ export default [
|
||||
mutability: 'mutable',
|
||||
summary: 'body',
|
||||
notes: [],
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -690,7 +684,7 @@ export default [
|
||||
mutability: 'no-effect',
|
||||
summary: 'res',
|
||||
notes: [],
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -716,7 +710,7 @@ export default [
|
||||
mutability: 'mutable',
|
||||
summary: 'end',
|
||||
notes: [],
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -730,7 +724,7 @@ export default [
|
||||
mutability: 'no-effect',
|
||||
summary: 'message',
|
||||
notes: [],
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -750,7 +744,7 @@ export default [
|
||||
mutability: 'no-effect',
|
||||
summary: 'usages',
|
||||
notes: [],
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -764,7 +758,7 @@ export default [
|
||||
mutability: 'no-effect',
|
||||
summary: 'new email',
|
||||
notes: [],
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -778,7 +772,7 @@ export default [
|
||||
mutability: 'no-effect',
|
||||
summary: 'email',
|
||||
notes: [],
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -792,7 +786,7 @@ export default [
|
||||
mutability: 'no-effect',
|
||||
summary: 'user associated with the operation',
|
||||
notes: [],
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -806,7 +800,7 @@ export default [
|
||||
mutability: 'mutable',
|
||||
summary: 'user associated with the operation',
|
||||
notes: [],
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -820,7 +814,7 @@ export default [
|
||||
mutability: 'mutable',
|
||||
summary: 'user associated with the operation',
|
||||
notes: [],
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -834,7 +828,7 @@ export default [
|
||||
mutability: 'mutable',
|
||||
summary: 'policy information for the operation',
|
||||
notes: [],
|
||||
}
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
];
|
||||
|
||||
@@ -76,11 +76,6 @@ service used
|
||||
This event is emitted for ai prompt cost calculated operations.
|
||||
|
||||
|
||||
### `ai.prompt.report-usage`
|
||||
|
||||
This event is emitted for ai prompt report usage operations.
|
||||
|
||||
|
||||
### `ai.prompt.validate`
|
||||
|
||||
This event is emitted when a validate is being validated.
|
||||
|
||||
@@ -8,10 +8,6 @@ extension.on('ai.prompt.complete', event => {
|
||||
console.log('GOT AI.PROMPT.COMPLETE EVENT', event);
|
||||
});
|
||||
|
||||
extension.on('ai.prompt.report-usage', event => {
|
||||
console.log('GOT AI.PROMPT.REPORT-USAGE EVENT', event);
|
||||
});
|
||||
|
||||
extension.on('ai.prompt.validate', event => {
|
||||
console.log('GOT AI.PROMPT.VALIDATE EVENT', event);
|
||||
});
|
||||
@@ -155,4 +151,3 @@ extension.on('web.socket.user-connected', event => {
|
||||
extension.on('wisp.get-policy', event => {
|
||||
console.log('GOT WISP.GET-POLICY EVENT', event);
|
||||
});
|
||||
|
||||
|
||||
@@ -1,117 +0,0 @@
|
||||
# Metered Services and Cost Management
|
||||
|
||||
Puter supports metered services through the CostService infrastructure. This allows services to check available funds, record costs, and track usage in a standardized way.
|
||||
|
||||
## CostService Overview
|
||||
|
||||
The CostService (`src/backend/src/services/drivers/CostService.js`) provides core functionality for managing costs and funds:
|
||||
|
||||
```javascript
|
||||
// Check if user has sufficient funds
|
||||
const usageAllowed = await svc_cost.get_funding_allowed({
|
||||
minimum: cost_in_microcents,
|
||||
});
|
||||
|
||||
// Record a cost
|
||||
await svc_cost.record_cost({
|
||||
cost: cost_in_microcents,
|
||||
});
|
||||
|
||||
// Record funding updates
|
||||
await svc_cost.record_funding_update({
|
||||
old_amount: previous_amount,
|
||||
new_amount: updated_amount,
|
||||
});
|
||||
```
|
||||
|
||||
### Cost Units
|
||||
|
||||
Costs are tracked in microcents (1/1,000,000th of a USD cent) to allow for precise metering of very small costs. For example:
|
||||
|
||||
- 1 USD = 100 cents = 100,000,000 microcents
|
||||
- 0.1 cents = 100,000 microcents
|
||||
- 0.001 cents = 1,000 microcents
|
||||
|
||||
## Implementation Examples
|
||||
|
||||
### AI Services
|
||||
|
||||
AI services are a prime example of metered services in Puter. Each AI service defines its own cost structure based on usage:
|
||||
|
||||
#### Text Generation (e.g. MistralAI)
|
||||
|
||||
```javascript
|
||||
{
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000, // per million tokens
|
||||
input: 200, // cost for input tokens
|
||||
output: 600 // cost for output tokens
|
||||
}
|
||||
```
|
||||
|
||||
#### Text-to-Speech (AWS Polly)
|
||||
|
||||
```javascript
|
||||
const microcents_per_character = 400;
|
||||
const exact_cost = microcents_per_character * text.length;
|
||||
```
|
||||
|
||||
#### Document Processing (AWS Textract)
|
||||
|
||||
```javascript
|
||||
const min_cost =
|
||||
(150 * // cents per 1000 pages
|
||||
Math.pow(10, 6)) / // microcents per cent
|
||||
1000; // pages // 150,000 microcents per page
|
||||
```
|
||||
|
||||
### Usage Pattern
|
||||
|
||||
Services typically follow this pattern for metered operations:
|
||||
|
||||
1. Calculate the exact cost or minimum cost for the operation
|
||||
2. Check if the user has sufficient funds using `get_funding_allowed()`
|
||||
3. If funds are available:
|
||||
- For fixed-cost operations: Record the cost immediately
|
||||
- For variable-cost operations: Record the cost after completion
|
||||
4. If funds are insufficient, throw an `insufficient_funds` error
|
||||
|
||||
## Integration Guide
|
||||
|
||||
To add metering to a new service:
|
||||
|
||||
1. Get the CostService instance:
|
||||
|
||||
```javascript
|
||||
const svc_cost = this.services.get("cost");
|
||||
```
|
||||
|
||||
2. Define your cost structure:
|
||||
|
||||
- Use microcents as the base unit
|
||||
- Consider both fixed and variable costs
|
||||
- Document the cost calculation logic
|
||||
|
||||
3. Implement the usage check:
|
||||
|
||||
```javascript
|
||||
const usageAllowed = await svc_cost.get_funding_allowed({
|
||||
minimum: calculated_cost,
|
||||
});
|
||||
if (!usageAllowed) {
|
||||
throw APIError.create("insufficient_funds");
|
||||
}
|
||||
```
|
||||
|
||||
4. Record costs appropriately:
|
||||
|
||||
```javascript
|
||||
await svc_cost.record_cost({
|
||||
cost: final_cost,
|
||||
});
|
||||
```
|
||||
|
||||
## Related Documentation
|
||||
|
||||
- For AI-specific metering, see the [PuterAI documentation](../modules/puterai/README.md)
|
||||
- For implementation details of CostService, see the [service documentation](../services/CostService.md)
|
||||
@@ -9,38 +9,7 @@ The PuterAI module provides AI capabilities to Puter through various services in
|
||||
|
||||
## Metered Services
|
||||
|
||||
All AI services in this module are metered using Puter's CostService infrastructure. For details on how metering works and how to implement it in new services, see the [Metered Services documentation](../../features/metered-services.md).
|
||||
All AI services in this module are metered using Puter's MeteringService. This allows us to charge per `unit` usage, where a `unit` is defined by the specific service:
|
||||
for example, most LLMs will charge per token, AWS Polly charges per character, and AWS Textract charges per page. the metering service tracks usage units, and relies on its centralized cost maps to determine if a user has enough credits to perform an operation, and to record usage after the operation is complete.
|
||||
|
||||
Each AI service defines its own cost structure based on its specific usage patterns:
|
||||
|
||||
### Text Generation Models
|
||||
|
||||
- Costs are typically defined per million tokens
|
||||
- Separate rates for input and output tokens
|
||||
- Different models have different pricing tiers
|
||||
|
||||
### Text-to-Speech (AWS Polly)
|
||||
|
||||
- Cost per character
|
||||
- Fixed rate regardless of voice or language
|
||||
|
||||
### Document Analysis (AWS Textract)
|
||||
|
||||
- Cost per page
|
||||
- Fixed rate for basic layout analysis
|
||||
|
||||
### Image Generation (DALL-E)
|
||||
|
||||
- Cost varies by image size and quality
|
||||
- Different rates for different models
|
||||
|
||||
## Service Implementation
|
||||
|
||||
Each service in this module:
|
||||
|
||||
1. Defines its cost structure
|
||||
2. Validates available funds before operations
|
||||
3. Records costs after successful operations
|
||||
4. Handles insufficient funds errors appropriately
|
||||
|
||||
For implementation details of specific services, see their respective documentation files in this directory.
|
||||
see [MeteringService](../../../src/services/MeteringService/MeteringService.ts) for more details on how metering works.
|
||||
@@ -161,7 +161,6 @@ const install = async ({ context, services, app, useapi, modapi }) => {
|
||||
const { OwnerLimitedES } = require('./om/entitystorage/OwnerLimitedES');
|
||||
const { ESBuilder } = require('./om/entitystorage/ESBuilder');
|
||||
const { Eq, Or } = require('./om/query/query');
|
||||
const { TrackSpendingService } = require('./services/TrackSpendingService');
|
||||
const { MakeProdDebuggingLessAwfulService } = require('./services/MakeProdDebuggingLessAwfulService');
|
||||
const { ConfigurableCountingService } = require('./services/ConfigurableCountingService');
|
||||
const { FSLockService } = require('./services/fs/FSLockService');
|
||||
@@ -259,7 +258,6 @@ const install = async ({ context, services, app, useapi, modapi }) => {
|
||||
services.registerService('context-init', ContextInitService);
|
||||
services.registerService('identification', IdentificationService);
|
||||
services.registerService('auth-audit', AuthAuditService);
|
||||
services.registerService('spending', TrackSpendingService);
|
||||
services.registerService('counting', ConfigurableCountingService);
|
||||
services.registerService('thumbnails', StrategizedService, {
|
||||
strategy_key: 'engine',
|
||||
@@ -321,9 +319,6 @@ const install = async ({ context, services, app, useapi, modapi }) => {
|
||||
const { DevTODService } = require('./services/DevTODService');
|
||||
services.registerService('__dev-tod', DevTODService);
|
||||
|
||||
const { CostService } = require("./services/drivers/CostService");
|
||||
services.registerService('cost', CostService);
|
||||
|
||||
const { DriverService } = require("./services/drivers/DriverService");
|
||||
services.registerService('driver', DriverService);
|
||||
|
||||
|
||||
@@ -21,7 +21,6 @@
|
||||
const { PassThrough } = require("stream");
|
||||
const APIError = require("../../api/APIError");
|
||||
const config = require("../../config");
|
||||
const { PermissionUtil } = require("../../services/auth/permissionUtils.mjs");
|
||||
const BaseService = require("../../services/BaseService");
|
||||
const { DB_WRITE } = require("../../services/database/consts");
|
||||
const { TypedValue } = require("../../services/drivers/meta/Runtime");
|
||||
@@ -48,6 +47,10 @@ class AIChatService extends BaseService {
|
||||
cuid2: require('@paralleldrive/cuid2').createId,
|
||||
};
|
||||
|
||||
/** @type {import('../../services/MeteringService/MeteringService').MeteringAndBillingService} */
|
||||
get meteringAndBillingService(){
|
||||
return this.services.get('meteringService').meteringAndBillingService;
|
||||
}
|
||||
/**
|
||||
* Initializes the service by setting up core properties.
|
||||
* Creates empty arrays for providers and model lists,
|
||||
@@ -57,7 +60,6 @@ class AIChatService extends BaseService {
|
||||
*/
|
||||
_construct() {
|
||||
this.providers = [];
|
||||
|
||||
this.simple_model_list = [];
|
||||
this.detail_model_list = [];
|
||||
this.detail_model_map = {};
|
||||
@@ -91,76 +93,6 @@ class AIChatService extends BaseService {
|
||||
|
||||
this.db = this.services.get('database').get(DB_WRITE, 'ai-usage');
|
||||
|
||||
const svc_event = this.services.get('event');
|
||||
svc_event.on('ai.prompt.report-usage', async (_, details) => {
|
||||
// Only skip usage reporting for fake-chat if it's not using the costly model
|
||||
if ( details.service_used === 'fake-chat' && details.model_used !== 'costly' ) return;
|
||||
if ( details.service_used === 'usage-limited-chat' ) return;
|
||||
|
||||
const values = {
|
||||
user_id: details.actor?.type?.user?.id,
|
||||
app_id: details.actor?.type?.app?.id ?? null,
|
||||
service_name: details.service_used,
|
||||
model_name: details.model_used,
|
||||
};
|
||||
|
||||
let model_details;
|
||||
|
||||
// New format
|
||||
if ( Array.isArray(details.usage) ) {
|
||||
values.cost = details.usage.reduce((acc, u) => {
|
||||
return acc + u.cost;
|
||||
}, 0);
|
||||
} else {
|
||||
values.value_uint_1 = details.usage?.input_tokens;
|
||||
values.value_uint_2 = details.usage?.output_tokens;
|
||||
|
||||
model_details = this.get_model_details(values.model_name, {
|
||||
service_used: values.service_name,
|
||||
});
|
||||
if ( model_details ) {
|
||||
values.cost = 0 + // for formatting
|
||||
|
||||
model_details.cost.input * details.usage.input_tokens
|
||||
// cents/MTok tokens
|
||||
+
|
||||
|
||||
model_details.cost.output * details.usage.output_tokens
|
||||
// cents/MTok tokens
|
||||
;
|
||||
} else {
|
||||
this.log.error('could not find model details', { details });
|
||||
}
|
||||
}
|
||||
|
||||
this.log.noticeme('USAGE INFO', { usage: details.usage });
|
||||
this.log.noticeme('COST INFO', values);
|
||||
|
||||
await svc_event.emit('ai.prompt.cost-calculated', {
|
||||
actor: Context.get('actor'),
|
||||
model_details,
|
||||
usage: details.usage,
|
||||
completionId: details.completionId,
|
||||
service: values.service_name,
|
||||
model: values.model_name,
|
||||
cost: values.cost,
|
||||
});
|
||||
|
||||
const svc_cost = this.services.get('cost');
|
||||
svc_cost.record_cost({ cost: values.cost });
|
||||
|
||||
// USD cost from microcents
|
||||
const cost_usc = values.cost / 1000000;
|
||||
const cost_usd = cost_usc / 100;
|
||||
|
||||
// Add to TrackSpendingService
|
||||
const svc_spending = this.services.get('spending');
|
||||
svc_spending.record_cost(`${details.service_used}:chat-completion`, {
|
||||
timestamp: Date.now(),
|
||||
cost: cost_usd,
|
||||
});
|
||||
});
|
||||
|
||||
const svc_apiErrpr = this.services.get('api-error');
|
||||
svc_apiErrpr.register({
|
||||
max_tokens_exceeded: {
|
||||
@@ -352,18 +284,12 @@ class AIChatService extends BaseService {
|
||||
* structure:
|
||||
*
|
||||
* {
|
||||
* usage_promise: Promise,
|
||||
* stream: true,
|
||||
* response: stream {
|
||||
* content_type: 'application/x-ndjson',
|
||||
* }
|
||||
* }
|
||||
*
|
||||
* The `usage_promise` is a promise that resolves to the usage
|
||||
* information for the completion. This is used to report usage
|
||||
* as soon as possible regardless of when it is reported in the
|
||||
* stream.
|
||||
*
|
||||
* @param {Object} options - The completion options
|
||||
* @param {Array} options.messages - Array of chat messages to process
|
||||
* @param {boolean} options.stream - Whether to stream the response
|
||||
@@ -427,9 +353,7 @@ class AIChatService extends BaseService {
|
||||
});
|
||||
|
||||
// Updated: Check usage and get a boolean result instead of throwing error
|
||||
const svc_cost = this.services.get('cost');
|
||||
const available = await svc_cost.get_available_amount();
|
||||
|
||||
const actor = Context.get('actor');
|
||||
const model_details = this.get_model_details(model_used, {
|
||||
service_used,
|
||||
});
|
||||
@@ -449,11 +373,8 @@ class AIChatService extends BaseService {
|
||||
const model_output_cost = model_details.cost.output;
|
||||
const model_max_tokens = model_details.max_tokens;
|
||||
const text = Messages.extract_text(parameters.messages);
|
||||
const approximate_input_cost = text.length / 4 * model_input_cost;
|
||||
const usageAllowed = await svc_cost.get_funding_allowed({
|
||||
available,
|
||||
minimum: approximate_input_cost,
|
||||
});
|
||||
const approximate_input_cost = text.length / 4 * model_input_cost; // TODO DS: guesstimate tokens better,
|
||||
const usageAllowed = await this.meteringAndBillingService.hasEnoughCredits(actor, approximate_input_cost);
|
||||
|
||||
// Handle usage limits reached case
|
||||
if ( !usageAllowed ) {
|
||||
@@ -464,8 +385,10 @@ class AIChatService extends BaseService {
|
||||
intended_service = service_used;
|
||||
}
|
||||
|
||||
// available is no longer defined, so use meteringService to get available credits
|
||||
const availableCredits = await this.meteringAndBillingService.getRemainingUsage(actor);
|
||||
const max_allowed_output_amount =
|
||||
available - approximate_input_cost;
|
||||
availableCredits - approximate_input_cost;
|
||||
|
||||
const max_allowed_output_tokens =
|
||||
max_allowed_output_amount / model_output_cost;
|
||||
@@ -558,8 +481,8 @@ class AIChatService extends BaseService {
|
||||
});
|
||||
|
||||
// Check usage for fallback model too (with updated method)
|
||||
const svc_cost = this.services.get('cost');
|
||||
const fallbackUsageAllowed = await svc_cost.get_funding_allowed();
|
||||
const actor = Context.get('actor');
|
||||
const fallbackUsageAllowed = await this.meteringAndBillingService.hasEnoughCredits(actor, 1);
|
||||
|
||||
// If usage not allowed for fallback, use usage-limited-chat instead
|
||||
if ( !fallbackUsageAllowed ) {
|
||||
@@ -624,18 +547,6 @@ class AIChatService extends BaseService {
|
||||
const username = Context.get('actor').type?.user?.username;
|
||||
|
||||
if ( ret.result.stream ) {
|
||||
(async () => {
|
||||
const usage_promise = ret.result.usage_promise;
|
||||
const usage = await usage_promise;
|
||||
await svc_event.emit('ai.prompt.report-usage', {
|
||||
actor: Context.get('actor'),
|
||||
completionId,
|
||||
service_used,
|
||||
model_used,
|
||||
usage,
|
||||
});
|
||||
})();
|
||||
|
||||
if ( ret.result.init_chat_stream ) {
|
||||
const stream = new PassThrough();
|
||||
const retval = new TypedValue({
|
||||
@@ -671,15 +582,6 @@ class AIChatService extends BaseService {
|
||||
}
|
||||
|
||||
return ret.result.response;
|
||||
} else {
|
||||
await svc_event.emit('ai.prompt.report-usage', {
|
||||
actor: Context.get('actor'),
|
||||
completionId,
|
||||
username,
|
||||
service_used,
|
||||
model_used,
|
||||
usage: ret.result.usage,
|
||||
});
|
||||
}
|
||||
|
||||
await svc_event.emit('ai.prompt.complete', {
|
||||
@@ -706,58 +608,6 @@ class AIChatService extends BaseService {
|
||||
},
|
||||
};
|
||||
|
||||
/**
|
||||
* Checks if the user has permission to use AI services and verifies usage limits
|
||||
*
|
||||
* @param {Object} params - The check parameters
|
||||
* @param {Object} params.actor - The user/actor making the request
|
||||
* @param {string} params.service - The AI service being used
|
||||
* @param {string} params.model - The model being accessed
|
||||
* @throws {APIError} If usage is not allowed or limits are exceeded
|
||||
* @private
|
||||
*/
|
||||
async check_usage_({ actor, service, model }) {
|
||||
const svc_permission = this.services.get('permission');
|
||||
const svc_event = this.services.get('event');
|
||||
const reading = await svc_permission.scan(actor, `paid-services:ai-chat`);
|
||||
const options = PermissionUtil.reading_to_options(reading);
|
||||
|
||||
// Query current ai usage in terms of cost (only from the past month)
|
||||
const oneMonthAgo = new Date();
|
||||
oneMonthAgo.setMonth(oneMonthAgo.getMonth() - 1);
|
||||
const oneMonthAgoStr = oneMonthAgo.toISOString().slice(0, 19).replace('T', ' ');
|
||||
|
||||
const [row] = await this.db.read('SELECT SUM(`cost`) AS sum FROM `ai_usage` ' +
|
||||
'WHERE `user_id` = ? AND `created_at` >= ?',
|
||||
[actor.type.user.id, oneMonthAgoStr]);
|
||||
|
||||
const cost_used = row?.sum || 0;
|
||||
|
||||
const event = {
|
||||
allowed: true,
|
||||
actor,
|
||||
service,
|
||||
model,
|
||||
cost_used,
|
||||
permission_options: options,
|
||||
};
|
||||
await svc_event.emit('ai.prompt.check-usage', event);
|
||||
|
||||
// If the user has exceeded their usage limit, apply usage-limited-chat which lets them know
|
||||
if ( event.error || ! event.allowed ) {
|
||||
// Instead of throwing an error, modify the intended_service
|
||||
const client_driver_call = Context.get('client_driver_call');
|
||||
client_driver_call.intended_service = 'usage-limited-chat';
|
||||
client_driver_call.response_metadata.usage_limited = true;
|
||||
|
||||
// Return false to indicate that the user has gone over their limit and service has been changed
|
||||
return false;
|
||||
}
|
||||
|
||||
// Return true if the user has tokens to spend
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Moderates chat messages for inappropriate content using OpenAI's moderation service
|
||||
*
|
||||
|
||||
@@ -44,8 +44,11 @@ const VALID_ENGINES = ['standard', 'neural', 'long-form', 'generative'];
|
||||
* @extends BaseService
|
||||
*/
|
||||
class AWSPollyService extends BaseService {
|
||||
|
||||
/** @type {import('../../services/MeteringService/MeteringService').MeteringAndBillingService} */
|
||||
meteringAndBillingService;
|
||||
get meteringAndBillingService() {
|
||||
return this.services.get('meteringService').meteringAndBillingService;
|
||||
}
|
||||
|
||||
static MODULES = {
|
||||
kv: globalThis.kv,
|
||||
@@ -61,10 +64,6 @@ class AWSPollyService extends BaseService {
|
||||
this.clients_ = {};
|
||||
}
|
||||
|
||||
async _init() {
|
||||
this.meteringAndBillingService = this.services.get('meteringService').meteringAndBillingService;
|
||||
}
|
||||
|
||||
static IMPLEMENTS = {
|
||||
['driver-capabilities']: {
|
||||
supports_test_mode(iface, method_name) {
|
||||
@@ -131,19 +130,15 @@ class AWSPollyService extends BaseService {
|
||||
throw APIError.create('invalid_engine', null, { engine, valid_engines: VALID_ENGINES });
|
||||
}
|
||||
|
||||
const microcents_per_character = ENGINE_PRICING[engine];
|
||||
const exact_cost = microcents_per_character * text.length;
|
||||
const actor = Context.get('actor');
|
||||
|
||||
const svc_cost = this.services.get('cost');
|
||||
const usageAllowed = await svc_cost.get_funding_allowed({
|
||||
minimum: exact_cost,
|
||||
});
|
||||
const usageType = `aws-polly:${engine}:character`;
|
||||
|
||||
const usageAllowed = await this.meteringAndBillingService.hasEnoughCreditsFor(actor, usageType, text.length);
|
||||
|
||||
if ( ! usageAllowed ) {
|
||||
throw APIError.create('insufficient_funds');
|
||||
}
|
||||
// We can charge immediately
|
||||
await svc_cost.record_cost({ cost: exact_cost });
|
||||
|
||||
const polly_speech = await this.synthesize_speech(text, {
|
||||
format: 'mp3',
|
||||
@@ -153,13 +148,8 @@ class AWSPollyService extends BaseService {
|
||||
engine,
|
||||
});
|
||||
|
||||
// Metering integration for TTS usage
|
||||
const actor = Context.get('actor');
|
||||
// AWS Polly TTS metering: track character count, voice, engine, cost, audio duration if available
|
||||
const trackedUsage = {
|
||||
character: text.length,
|
||||
};
|
||||
this.meteringAndBillingService.utilRecordUsageObject(trackedUsage, actor, `aws-polly:${engine}`);
|
||||
this.meteringAndBillingService.incrementUsage(actor, usageType, text.length);
|
||||
|
||||
const speech = new TypedValue({
|
||||
$: 'stream',
|
||||
|
||||
@@ -32,7 +32,9 @@ const { Context } = require("../../util/context");
|
||||
*/
|
||||
class AWSTextractService extends BaseService {
|
||||
/** @type {import('../../services/MeteringService/MeteringService').MeteringAndBillingService} */
|
||||
meteringAndBillingService;
|
||||
get meteringAndBillingService(){
|
||||
return this.services.get('meteringService').meteringAndBillingService;
|
||||
}
|
||||
/**
|
||||
* AWS Textract service for OCR functionality
|
||||
* Provides document analysis capabilities using AWS Textract API
|
||||
@@ -103,13 +105,6 @@ class AWSTextractService extends BaseService {
|
||||
},
|
||||
};
|
||||
|
||||
/**
|
||||
* Service initialization: set up metering service
|
||||
*/
|
||||
async _init() {
|
||||
this.meteringAndBillingService = this.services.get('meteringService').meteringAndBillingService;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates AWS credentials object for authentication
|
||||
* @private
|
||||
@@ -150,24 +145,15 @@ class AWSTextractService extends BaseService {
|
||||
client, document, using_s3,
|
||||
} = await this._get_client_and_document(file_facade);
|
||||
|
||||
const min_cost = 150 // cents per 1000 pages
|
||||
* Math.pow(10, 6) // microcents per cent
|
||||
/ 1000 // pages
|
||||
; // works out to 150,000 microcents per page
|
||||
const actor = Context.get('actor');
|
||||
const usageType = "aws-textract:detect-document-text:page";
|
||||
|
||||
const svc_cost = this.services.get('cost');
|
||||
const usageAllowed = await svc_cost.get_funding_allowed({
|
||||
minimum: min_cost,
|
||||
});
|
||||
const usageAllowed = await this.meteringAndBillingService.hasEnoughCreditsFor(actor, usageType, 1); // allow them to pass if they have enough for 1 page atleast
|
||||
|
||||
if ( ! usageAllowed ) {
|
||||
throw APIError.create('insufficient_funds');
|
||||
}
|
||||
|
||||
// Note: we are using the synchronous command, so cost
|
||||
// should always be the same (only 1 page allowed)
|
||||
await svc_cost.record_cost({ cost: min_cost });
|
||||
|
||||
const command = new AnalyzeDocumentCommand({
|
||||
Document: document,
|
||||
FeatureTypes: [
|
||||
@@ -198,7 +184,6 @@ class AWSTextractService extends BaseService {
|
||||
}
|
||||
|
||||
// Metering integration for Textract OCR usage
|
||||
const actor = Context.get('actor');
|
||||
// AWS Textract metering: track page count, block count, cost, document size if available
|
||||
let pageCount = 0;
|
||||
if ( textractResp.Blocks ) {
|
||||
@@ -206,10 +191,7 @@ class AWSTextractService extends BaseService {
|
||||
if ( block.BlockType === 'PAGE' ) pageCount += 1;
|
||||
}
|
||||
}
|
||||
const trackedUsage = {
|
||||
page: pageCount || 1,
|
||||
};
|
||||
this.meteringAndBillingService.utilRecordUsageObject(trackedUsage, actor, "aws-textract:detect-document-text");
|
||||
this.meteringAndBillingService.incrementUsage(actor, usageType, pageCount || 1);
|
||||
|
||||
return textractResp;
|
||||
}
|
||||
|
||||
@@ -25,7 +25,6 @@ const Messages = require("./lib/Messages");
|
||||
const FSNodeParam = require("../../api/filesystem/FSNodeParam");
|
||||
const { LLRead } = require("../../filesystem/ll_operations/ll_read");
|
||||
const { Context } = require("../../util/context");
|
||||
const { TeePromise } = require('@heyputer/putility').libs.promise;
|
||||
|
||||
/**
|
||||
* ClaudeService class extends BaseService to provide integration with Anthropic's Claude AI models.
|
||||
@@ -245,8 +244,6 @@ class ClaudeService extends BaseService {
|
||||
const anthropic = (c => beta_mode ? c.beta : c)(this.anthropic);
|
||||
|
||||
if ( stream ) {
|
||||
let usage_promise = new TeePromise();
|
||||
|
||||
const init_chat_stream = async ({ chatStream }) => {
|
||||
const completion = await anthropic.messages.stream(sdk_params);
|
||||
const usageSum = {};
|
||||
@@ -306,17 +303,11 @@ class ClaudeService extends BaseService {
|
||||
chatStream.end();
|
||||
|
||||
this.billForUsage(actor, model || this.get_default_model(), usageSum);
|
||||
// TODO DS: Legacy cost metering, remove when new is ready
|
||||
usage_promise.resolve({
|
||||
input_tokens: usageSum.input_tokens,
|
||||
output_tokens: usageSum.input_tokens,
|
||||
});
|
||||
};
|
||||
|
||||
return {
|
||||
init_chat_stream,
|
||||
stream: true,
|
||||
usage_promise: usage_promise,
|
||||
finally_fn: cleanup_files,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -129,7 +129,6 @@ class FakeChatService extends BaseService {
|
||||
}) + '\n');
|
||||
chatStream.end();
|
||||
},
|
||||
usage_promise: new Promise(rslv => rslv(resp.usage)),
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -31,7 +31,9 @@ const { GoogleGenAI } = require('@google/genai');
|
||||
*/
|
||||
class GeminiImageGenerationService extends BaseService {
|
||||
/** @type {import('../../services/MeteringService/MeteringService').MeteringAndBillingService} */
|
||||
meteringAndBillingService;
|
||||
get meteringAndBillingService(){
|
||||
return this.services.get('meteringService').meteringAndBillingService;
|
||||
}
|
||||
static MODULES = {
|
||||
};
|
||||
|
||||
@@ -51,7 +53,6 @@ class GeminiImageGenerationService extends BaseService {
|
||||
*/
|
||||
async _init() {
|
||||
this.genAI = new GoogleGenAI({ apiKey: this.global_config.services.gemini.apiKey });
|
||||
this.meteringAndBillingService = this.services.get('meteringService').meteringAndBillingService;
|
||||
}
|
||||
|
||||
static IMPLEMENTS = {
|
||||
@@ -154,7 +155,8 @@ class GeminiImageGenerationService extends BaseService {
|
||||
});
|
||||
}
|
||||
|
||||
const user_private_uid = Context.get('actor')?.private_uid ?? 'UNKNOWN';
|
||||
const actor = Context.get('actor');
|
||||
const user_private_uid = actor?.private_uid ?? 'UNKNOWN';
|
||||
if ( user_private_uid === 'UNKNOWN' ) {
|
||||
this.errors.report('chat-completion-service:unknown-user', {
|
||||
message: 'failed to get a user ID for a Gemini request',
|
||||
@@ -163,22 +165,14 @@ class GeminiImageGenerationService extends BaseService {
|
||||
});
|
||||
}
|
||||
|
||||
const exact_cost = this.models_[model][price_key]
|
||||
* 100 // $ USD to cents USD
|
||||
* Math.pow(10, 6); // cents to microcents
|
||||
const usageType = `gemini:${model}:${price_key}`;
|
||||
|
||||
const svc_cost = this.services.get('cost');
|
||||
const usageAllowed = await svc_cost.get_funding_allowed({
|
||||
minimum: exact_cost,
|
||||
});
|
||||
const usageAllowed = await this.meteringAndBillingService.hasEnoughCreditsFor(actor, usageType, 1);
|
||||
|
||||
if ( !usageAllowed ) {
|
||||
throw APIError.create('insufficient_funds');
|
||||
}
|
||||
|
||||
// We can charge immediately
|
||||
await svc_cost.record_cost({ cost: exact_cost });
|
||||
|
||||
// Construct the prompt based on whether we have an input image
|
||||
let contents;
|
||||
if ( input_image && input_image_mime_type ) {
|
||||
@@ -201,6 +195,9 @@ class GeminiImageGenerationService extends BaseService {
|
||||
model: "gemini-2.5-flash-image-preview",
|
||||
contents: contents,
|
||||
});
|
||||
// Metering usage tracking
|
||||
// Gemini usage: always 1 image, resolution, cost, model
|
||||
this.meteringAndBillingService.incrementUsage(actor, usageType, 1);
|
||||
let url = undefined;
|
||||
for ( const part of response.candidates[0].content.parts ) {
|
||||
if ( part.text ) {
|
||||
@@ -215,23 +212,6 @@ class GeminiImageGenerationService extends BaseService {
|
||||
throw new Error('Failed to extract image URL from Gemini response');
|
||||
}
|
||||
|
||||
// Metering usage tracking
|
||||
const actor = Context.get('actor');
|
||||
// Gemini usage: always 1 image, resolution, cost, model
|
||||
const trackedUsage = {
|
||||
[price_key]: 1,
|
||||
};
|
||||
this.meteringAndBillingService.utilRecordUsageObject(trackedUsage, actor, `gemini:${model}`);
|
||||
|
||||
const spending_meta = {
|
||||
model,
|
||||
size: `${ratio.w}x${ratio.h}`,
|
||||
};
|
||||
|
||||
// Legacy spending record for analytics
|
||||
const svc_spending = Context.get('services').get('spending');
|
||||
svc_spending.record_spending('gemini', 'image-generation', spending_meta);
|
||||
|
||||
return url;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
const BaseService = require("../../services/BaseService");
|
||||
const { GoogleGenerativeAI } = require('@google/generative-ai');
|
||||
const GeminiSquareHole = require("./lib/GeminiSquareHole");
|
||||
const putility = require("@heyputer/putility");
|
||||
const FunctionCalling = require("./lib/FunctionCalling");
|
||||
const { Context } = require("../../util/context");
|
||||
|
||||
@@ -74,23 +73,21 @@ class GeminiService extends BaseService {
|
||||
const genResult = await chat.sendMessageStream(last_message_parts);
|
||||
const stream = genResult.stream;
|
||||
|
||||
const usage_promise = new putility.libs.promise.TeePromise();
|
||||
return {
|
||||
stream: true,
|
||||
init_chat_stream:
|
||||
GeminiSquareHole.create_chat_stream_handler({
|
||||
stream,
|
||||
usage_promise,
|
||||
usageCallback: (usageMetadata) => {
|
||||
// TODO DS: dedup this logic
|
||||
const trackedUsage = {
|
||||
prompt_tokens: usageMetadata.promptTokenCount - (usageMetadata.cachedContentTokenCount || 0),
|
||||
completion_tokens: usageMetadata.candidatesTokenCount,
|
||||
cached_tokens: usageMetadata.cachedContentTokenCount || 0,
|
||||
};
|
||||
this.meteringAndBillingService.utilRecordUsageObject(trackedUsage, actor, meteringPrefix);
|
||||
},
|
||||
}),
|
||||
usage_promise: usage_promise.then(usageMetadata => {
|
||||
const trackedUsage = {
|
||||
prompt_tokens: usageMetadata.promptTokenCount - (usageMetadata.cachedContentTokenCount || 0),
|
||||
completion_tokens: usageMetadata.candidatesTokenCount,
|
||||
cached_tokens: usageMetadata.cachedContentTokenCount || 0,
|
||||
};
|
||||
this.meteringAndBillingService.utilRecordUsageObject(trackedUsage, actor, meteringPrefix);
|
||||
return usage_calculator({ usageMetadata });
|
||||
}),
|
||||
};
|
||||
} else {
|
||||
const genResult = await chat.sendMessage(last_message_parts);
|
||||
|
||||
@@ -32,7 +32,9 @@ const { Context } = require("../../util/context");
|
||||
*/
|
||||
class OpenAIImageGenerationService extends BaseService {
|
||||
/** @type {import('../../services/MeteringService/MeteringService').MeteringAndBillingService} */
|
||||
meteringAndBillingService;
|
||||
get meteringAndBillingService(){
|
||||
return this.services.get('meteringService').meteringAndBillingService;
|
||||
}
|
||||
|
||||
static MODULES = {
|
||||
openai: require('openai'),
|
||||
@@ -81,8 +83,6 @@ class OpenAIImageGenerationService extends BaseService {
|
||||
this.openai = new this.modules.openai.OpenAI({
|
||||
apiKey: sk_key,
|
||||
});
|
||||
|
||||
this.meteringAndBillingService = this.services.get('meteringService').meteringAndBillingService;
|
||||
}
|
||||
|
||||
static IMPLEMENTS = {
|
||||
@@ -182,7 +182,8 @@ class OpenAIImageGenerationService extends BaseService {
|
||||
});
|
||||
}
|
||||
|
||||
const user_private_uid = Context.get('actor')?.private_uid ?? 'UNKNOWN';
|
||||
const actor = Context.get('actor');
|
||||
const user_private_uid = actor?.private_uid ?? 'UNKNOWN';
|
||||
if ( user_private_uid === 'UNKNOWN' ) {
|
||||
this.errors.report('chat-completion-service:unknown-user', {
|
||||
message: 'failed to get a user ID for an OpenAI request',
|
||||
@@ -191,22 +192,13 @@ class OpenAIImageGenerationService extends BaseService {
|
||||
});
|
||||
}
|
||||
|
||||
const exact_cost = this.models_[model][price_key]
|
||||
* 100 // $ USD to cents USD
|
||||
* Math.pow(10, 6); // cents to microcents
|
||||
|
||||
const svc_cost = this.services.get('cost');
|
||||
const usageAllowed = await svc_cost.get_funding_allowed({
|
||||
minimum: exact_cost,
|
||||
});
|
||||
const usageType = `openai:${model}:${price_key}`;
|
||||
const usageAllowed = await this.meteringAndBillingService.hasEnoughCreditsFor(actor, usageType, 1);
|
||||
|
||||
if ( ! usageAllowed ) {
|
||||
throw APIError.create('insufficient_funds');
|
||||
}
|
||||
|
||||
// We can charge immediately
|
||||
await svc_cost.record_cost({ cost: exact_cost });
|
||||
|
||||
// Build API parameters based on model
|
||||
const apiParams = this._buildApiParams(model, {
|
||||
user: user_private_uid,
|
||||
@@ -217,29 +209,8 @@ class OpenAIImageGenerationService extends BaseService {
|
||||
|
||||
const result = await this.openai.images.generate(apiParams);
|
||||
|
||||
const actor = Context.get('actor');
|
||||
// For image generation, usage is typically image count and resolution
|
||||
const trackedUsage = {
|
||||
[price_key]: 1,
|
||||
};
|
||||
this.meteringAndBillingService.utilRecordUsageObject(trackedUsage, actor, `openai:${model}`);
|
||||
|
||||
// Tiny base64 result for testing
|
||||
// const result = {
|
||||
// data: [
|
||||
// {
|
||||
// url: 'data:image/png;base64,' +
|
||||
// 'iVBORw0KGgoAAAANSUhEUgAAAAgAAAAICAIAAABLbSncAAAA' +
|
||||
// '2ElEQVR4nADIADf/AkRgiOi4oaIHfdeNCE2vFMURlKdHdb/H' +
|
||||
// '4wRTROeyGdCpn089i13t42v73DQSsCwSDAsEBLH783BZu1si' +
|
||||
// 'LkiwqfGwHAC/8bL0NggaA47QKDuRDp0NRgtALj8W+mSm9BIH' +
|
||||
// 'PMGYegR+bu/c85wWQGLYrjLhis9E8AE1F/AFbCMA53+9d73t' +
|
||||
// '/QKPbbdLHZY8wB4OewzT8CrCBG3RE7kyWAXuJvaHHHzFhbIN' +
|
||||
// '1hryGU5vvwD6liTD3hytRktVRRAaRi71k2PYCro6AlYBAAD/' +
|
||||
// '/wWtWjI5xEefAAAAAElFTkSuQmCC'
|
||||
// }
|
||||
// ]
|
||||
// };
|
||||
this.meteringAndBillingService.incrementUsage(actor, usageType, 1);
|
||||
|
||||
const spending_meta = {
|
||||
model,
|
||||
@@ -250,8 +221,6 @@ class OpenAIImageGenerationService extends BaseService {
|
||||
spending_meta.size = quality + ":" + spending_meta.size;
|
||||
}
|
||||
|
||||
const svc_spending = Context.get('services').get('spending');
|
||||
svc_spending.record_spending('openai', 'image-generation', spending_meta);
|
||||
const url = result.data?.[0]?.url || (result.data?.[0]?.b64_json ? "data:image/png;base64," + result.data[0].b64_json : null);
|
||||
|
||||
if ( !url ) {
|
||||
|
||||
@@ -147,7 +147,7 @@ class OpenRouterService extends BaseService {
|
||||
const trackedUsage = {
|
||||
prompt: usage.prompt_tokens ?? 0,
|
||||
completion: usage.completion_tokens ?? 0,
|
||||
input_cache_read: usage.prompt_tokens_details.cached_tokens ?? 0,
|
||||
input_cache_read: usage.prompt_tokens_details?.cached_tokens ?? 0,
|
||||
};
|
||||
const costOverwrites = Object.fromEntries(Object.keys(trackedUsage).map((k) => {
|
||||
return [k, rawPriceModelDetails.cost[k] * trackedUsage[k]];
|
||||
|
||||
@@ -22,7 +22,6 @@ const { PassThrough } = require("stream");
|
||||
const BaseService = require("../../services/BaseService");
|
||||
const { TypedValue } = require("../../services/drivers/meta/Runtime");
|
||||
const { nou } = require("../../util/langutil");
|
||||
const { TeePromise } = require('@heyputer/putility').libs.promise;
|
||||
const { Together } = require('together-ai');
|
||||
const OpenAIUtil = require("./lib/OpenAIUtil");
|
||||
const { Context } = require("../../util/context");
|
||||
@@ -115,9 +114,9 @@ class TogetherAIService extends BaseService {
|
||||
const actor = Context.get('actor');
|
||||
const modelId = model ?? this.get_default_model();
|
||||
|
||||
if ( stream ) {
|
||||
let usage_promise = new TeePromise();
|
||||
const modelDetails = (await this.models_()).find(m => m.id === modelId);
|
||||
|
||||
if ( stream ) {
|
||||
const stream = new PassThrough();
|
||||
const retval = new TypedValue({
|
||||
$: 'stream',
|
||||
@@ -128,14 +127,13 @@ class TogetherAIService extends BaseService {
|
||||
for await ( const chunk of completion ) {
|
||||
// DRY: same as openai
|
||||
if ( chunk.usage ) {
|
||||
// TODO DS: get rid of legacy usage
|
||||
usage_promise.resolve({
|
||||
input_tokens: chunk.usage.prompt_tokens,
|
||||
output_tokens: chunk.usage.completion_tokens,
|
||||
});
|
||||
// Metering: record usage for streamed chunks
|
||||
const trackedUsage = OpenAIUtil.extractMeteredUsage(chunk.usage);
|
||||
this.meteringAndBillingService.utilRecordUsageObject(trackedUsage, actor, modelId);
|
||||
const costOverrides = {
|
||||
prompt_tokens: trackedUsage.prompt_tokens * (modelDetails?.cost?.input ?? 0),
|
||||
completion_tokens: trackedUsage.completion_tokens * (modelDetails?.cost?.output ?? 0),
|
||||
};
|
||||
this.meteringAndBillingService.utilRecordUsageObject(trackedUsage, actor, modelId, costOverrides);
|
||||
}
|
||||
|
||||
if ( chunk.choices.length < 1 ) continue;
|
||||
@@ -155,7 +153,6 @@ class TogetherAIService extends BaseService {
|
||||
return {
|
||||
stream: true,
|
||||
response: retval,
|
||||
usage_promise: usage_promise,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -175,7 +172,7 @@ class TogetherAIService extends BaseService {
|
||||
/**
|
||||
* Fetches and caches available AI models from Together API
|
||||
* @private
|
||||
* @returns {Promise<Array>} Array of model objects containing id, name, context length,
|
||||
* @returns Array of model objects containing id, name, context length,
|
||||
* description and pricing information
|
||||
* @remarks Models are cached for 5 minutes in KV store
|
||||
*/
|
||||
|
||||
@@ -112,17 +112,12 @@ class UsageLimitedChatService extends BaseService {
|
||||
chatStream.end();
|
||||
}, 10);
|
||||
|
||||
// Return a TypedValue with usage_promise for proper integration
|
||||
return {
|
||||
stream: true,
|
||||
init_chat_stream: async ({ chatStream: cs }) => {
|
||||
// Copy contents from our stream to the provided one
|
||||
chatStream.pipe(cs.stream);
|
||||
},
|
||||
usage_promise: Promise.resolve({
|
||||
input_tokens: 0,
|
||||
output_tokens: 1,
|
||||
}),
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ module.exports = class GeminiSquareHole {
|
||||
* Transforms messages from standard format to Gemini API format.
|
||||
* Converts 'content' to 'parts', 'assistant' role to 'model', and transforms
|
||||
* tool_use/tool_result/text parts into Gemini's expected structure.
|
||||
*
|
||||
*
|
||||
* @param {Array} messages - Array of message objects to transform
|
||||
* @returns {Promise<Array>} Transformed messages compatible with Gemini API
|
||||
*/
|
||||
@@ -27,7 +27,7 @@ module.exports = class GeminiSquareHole {
|
||||
msg.role = 'model';
|
||||
}
|
||||
|
||||
for ( let i=0 ; i < msg.parts.length ; i++ ) {
|
||||
for ( let i = 0 ; i < msg.parts.length ; i++ ) {
|
||||
const part = msg.parts[i];
|
||||
if ( part.type === 'tool_use' ) {
|
||||
msg.parts[i] = {
|
||||
@@ -57,11 +57,11 @@ module.exports = class GeminiSquareHole {
|
||||
}
|
||||
|
||||
return messages;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Creates a function that calculates token usage and associated costs from Gemini API response metadata.
|
||||
*
|
||||
*
|
||||
* @param {Object} params - Configuration object
|
||||
* @param {Object} params.model_details - Model details including id and cost structure
|
||||
* @returns {Function} Function that takes usageMetadata and returns an array of token usage objects with costs
|
||||
@@ -69,7 +69,7 @@ module.exports = class GeminiSquareHole {
|
||||
static create_usage_calculator = ({ model_details }) => {
|
||||
return ({ usageMetadata }) => {
|
||||
const tokens = [];
|
||||
|
||||
|
||||
tokens.push({
|
||||
type: 'prompt',
|
||||
model: model_details.id,
|
||||
@@ -92,23 +92,22 @@ module.exports = class GeminiSquareHole {
|
||||
* Creates a handler function for processing Gemini API streaming chat responses.
|
||||
* The handler processes chunks from the stream, managing text and tool call content blocks,
|
||||
* and resolves usage metadata when streaming completes.
|
||||
*
|
||||
*
|
||||
* @param {Object} params - Configuration object
|
||||
* @param {Object} params.stream - Gemini GenerateContentStreamResult stream
|
||||
* @param {Object} params.usage_promise - Promise to resolve with final usage metadata
|
||||
* @param {Function} params.usageCallback - Callback function to handle usage metadata
|
||||
* @returns {Function} Async function that processes the chat stream and manages content blocks
|
||||
*/
|
||||
static create_chat_stream_handler = ({
|
||||
stream, // GenerateContentStreamResult:stream
|
||||
usage_promise,
|
||||
usageCallback,
|
||||
}) => async ({ chatStream }) => {
|
||||
const message = chatStream.message();
|
||||
|
||||
|
||||
let textblock = message.contentBlock({ type: 'text' });
|
||||
let toolblock = null;
|
||||
let mode = 'text';
|
||||
|
||||
|
||||
let last_usage = null;
|
||||
for await ( const chunk of stream ) {
|
||||
// This is spread across several lines so that the stack trace
|
||||
@@ -129,9 +128,7 @@ module.exports = class GeminiSquareHole {
|
||||
id: part.functionCall.name,
|
||||
name: part.functionCall.name,
|
||||
});
|
||||
toolblock.addPartialJSON(JSON.stringify(
|
||||
part.functionCall.args,
|
||||
));
|
||||
toolblock.addPartialJSON(JSON.stringify(part.functionCall.args));
|
||||
|
||||
continue;
|
||||
}
|
||||
@@ -150,11 +147,11 @@ module.exports = class GeminiSquareHole {
|
||||
last_usage = chunk.usageMetadata;
|
||||
}
|
||||
|
||||
usage_promise.resolve(last_usage);
|
||||
usageCallback(last_usage);
|
||||
|
||||
if ( mode === 'text' ) textblock.end();
|
||||
if ( mode === 'tool' ) toolblock.end();
|
||||
message.end();
|
||||
chatStream.end();
|
||||
}
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
const putility = require("@heyputer/putility");
|
||||
|
||||
/**
|
||||
* Process input messages from Puter's normalized format to OpenAI's format
|
||||
* May make changes in-place.
|
||||
@@ -91,7 +89,7 @@ const extractMeteredUsage = (usage) => {
|
||||
const create_chat_stream_handler = ({
|
||||
deviations,
|
||||
completion,
|
||||
usage_promise,
|
||||
usage_calculator,
|
||||
}) => async ({ chatStream }) => {
|
||||
deviations = Object.assign({
|
||||
// affected by: Groq
|
||||
@@ -154,7 +152,8 @@ const create_chat_stream_handler = ({
|
||||
}
|
||||
}
|
||||
|
||||
usage_promise.resolve(last_usage);
|
||||
// TODO DS: this is a bit too abstracted... this is basically just doing the metering now
|
||||
usage_calculator({ usage: last_usage });
|
||||
|
||||
if ( mode === 'text' ) textblock.end();
|
||||
if ( mode === 'tool' ) toolblock.end();
|
||||
@@ -182,13 +181,10 @@ const handle_completion_output = async ({
|
||||
}, deviations);
|
||||
|
||||
if ( stream ) {
|
||||
let usage_promise = new putility.libs.promise.TeePromise();
|
||||
|
||||
const init_chat_stream =
|
||||
create_chat_stream_handler({
|
||||
deviations,
|
||||
completion,
|
||||
usage_promise,
|
||||
usage_calculator,
|
||||
});
|
||||
|
||||
@@ -196,12 +192,6 @@ const handle_completion_output = async ({
|
||||
stream: true,
|
||||
init_chat_stream,
|
||||
finally_fn,
|
||||
usage_promise: usage_promise.then(usage => {
|
||||
return usage_calculator ? usage_calculator({ usage }) : {
|
||||
input_tokens: usage.prompt_tokens,
|
||||
output_tokens: usage.completion_tokens,
|
||||
};
|
||||
}),
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@ interface UsageByType {
|
||||
}
|
||||
|
||||
interface MeteringAndBillingServiceDeps {
|
||||
kvClientWrapper: DBKVStore,
|
||||
kvStore: DBKVStore,
|
||||
superUserService: SUService,
|
||||
alarmService: AlarmService
|
||||
eventService: EventService
|
||||
@@ -30,13 +30,13 @@ interface MeteringAndBillingServiceDeps {
|
||||
*/
|
||||
export class MeteringAndBillingService {
|
||||
|
||||
#kvClientWrapper: DBKVStore;
|
||||
#kvStore: DBKVStore;
|
||||
#superUserService: SUService;
|
||||
#alarmService: AlarmService;
|
||||
#eventService: EventService;
|
||||
constructor({ kvClientWrapper, superUserService, alarmService, eventService }: MeteringAndBillingServiceDeps) {
|
||||
constructor({ kvStore, superUserService, alarmService, eventService }: MeteringAndBillingServiceDeps) {
|
||||
this.#superUserService = superUserService;
|
||||
this.#kvClientWrapper = kvClientWrapper;
|
||||
this.#kvStore = kvStore;
|
||||
this.#alarmService = alarmService;
|
||||
this.#eventService = eventService;
|
||||
}
|
||||
@@ -91,19 +91,19 @@ export class MeteringAndBillingService {
|
||||
};
|
||||
|
||||
const lastUpdatedKey = `${METRICS_PREFIX}:actor:${actorId}:lastUpdated`;
|
||||
const lastUpdatedPromise = this.#kvClientWrapper.set({
|
||||
const lastUpdatedPromise = this.#kvStore.set({
|
||||
key: lastUpdatedKey,
|
||||
value: Date.now(),
|
||||
});
|
||||
|
||||
const actorUsageKey = `${METRICS_PREFIX}:actor:${actorId}:${currentMonth}`;
|
||||
const actorUsagesPromise = this.#kvClientWrapper.incr({
|
||||
const actorUsagesPromise = this.#kvStore.incr({
|
||||
key: actorUsageKey,
|
||||
pathAndAmountMap,
|
||||
});
|
||||
|
||||
const puterConsumptionKey = `${METRICS_PREFIX}:puter:${currentMonth}`; // global consumption across all users and apps
|
||||
this.#kvClientWrapper.incr({
|
||||
this.#kvStore.incr({
|
||||
key: puterConsumptionKey,
|
||||
pathAndAmountMap,
|
||||
}).catch((e: Error) => {
|
||||
@@ -111,7 +111,7 @@ export class MeteringAndBillingService {
|
||||
});
|
||||
|
||||
const actorAppUsageKey = `${METRICS_PREFIX}:actor:${actorId}:app:${appId}:${currentMonth}`;
|
||||
this.#kvClientWrapper.incr({
|
||||
this.#kvStore.incr({
|
||||
key: actorAppUsageKey,
|
||||
pathAndAmountMap,
|
||||
}).catch((e: Error) => {
|
||||
@@ -119,7 +119,7 @@ export class MeteringAndBillingService {
|
||||
});
|
||||
|
||||
const appUsageKey = `${METRICS_PREFIX}:app:${appId}:${currentMonth}`;
|
||||
this.#kvClientWrapper.incr({
|
||||
this.#kvStore.incr({
|
||||
key: appUsageKey,
|
||||
pathAndAmountMap,
|
||||
}).catch((e: Error) => {
|
||||
@@ -127,7 +127,7 @@ export class MeteringAndBillingService {
|
||||
});
|
||||
|
||||
const actorAppTotalsKey = `${METRICS_PREFIX}:actor:${actorId}:apps:${currentMonth}`;
|
||||
this.#kvClientWrapper.incr({
|
||||
this.#kvStore.incr({
|
||||
key: actorAppTotalsKey,
|
||||
pathAndAmountMap: {
|
||||
[`${appId}.total`]: totalCost,
|
||||
@@ -164,7 +164,7 @@ export class MeteringAndBillingService {
|
||||
];
|
||||
|
||||
return await this.#superUserService.sudo(async () => {
|
||||
const [usage, appTotals] = await this.#kvClientWrapper.get({ key: keys }) as [UsageByType | null, Record<string, UsageByType> | null];
|
||||
const [usage, appTotals] = await this.#kvStore.get({ key: keys }) as [UsageByType | null, Record<string, UsageByType> | null];
|
||||
// only show details of app based on actor, aggregate all as others, except if app is global one or null, then show all
|
||||
const appId = actor.type?.app?.uid;
|
||||
if ( appTotals && appId ) {
|
||||
@@ -207,7 +207,7 @@ export class MeteringAndBillingService {
|
||||
const key = `${METRICS_PREFIX}:actor:${actor.type.user.uuid}:app:${appId}:${currentMonth}`;
|
||||
|
||||
return await this.#superUserService.sudo(async () => {
|
||||
const usage = await this.#kvClientWrapper.get({ key }) as UsageByType | null;
|
||||
const usage = await this.#kvStore.get({ key }) as UsageByType | null;
|
||||
// only show usage if actor app is the same or if global app ( null appId )
|
||||
const actorAppId = actor.type?.app?.uid;
|
||||
if ( actorAppId && actorAppId !== appId && appId !== GLOBAL_APP_KEY ) {
|
||||
@@ -281,7 +281,7 @@ export class MeteringAndBillingService {
|
||||
}
|
||||
const key = `${POLICY_PREFIX}:actor:${actor.type.user?.uuid}:addons`;
|
||||
return this.#superUserService.sudo(async () => {
|
||||
const policyAddOns = await this.#kvClientWrapper.get({ key });
|
||||
const policyAddOns = await this.#kvStore.get({ key });
|
||||
return (policyAddOns ?? {}) as PolicyAddOns;
|
||||
});
|
||||
}
|
||||
@@ -293,7 +293,7 @@ export class MeteringAndBillingService {
|
||||
}
|
||||
const key = `${POLICY_PREFIX}:actor:${actor.type.user?.uuid}:addons`;
|
||||
return this.#superUserService.sudo(async () => {
|
||||
await this.#kvClientWrapper.incr({
|
||||
await this.#kvStore.incr({
|
||||
key,
|
||||
pathAndAmountMap: {
|
||||
purchasedCredits: tokenAmount,
|
||||
|
||||
@@ -7,7 +7,7 @@ export class MeteringAndBillingServiceWrapper extends BaseService {
|
||||
meteringAndBillingService = undefined;
|
||||
_init() {
|
||||
this.meteringAndBillingService = new MeteringAndBillingService({
|
||||
kvClientWrapper: this.services.get('puter-kvstore').as('puter-kvstore'),
|
||||
kvStore: this.services.get('puter-kvstore').as('puter-kvstore'),
|
||||
superUserService: this.services.get('su'),
|
||||
alarmService: this.services.get('alarm'),
|
||||
eventService: this.services.get('event'),
|
||||
|
||||
23
src/backend/src/services/MeteringService/README.md
Normal file
23
src/backend/src/services/MeteringService/README.md
Normal file
@@ -0,0 +1,23 @@
|
||||
# Metering and Billing Service
|
||||
|
||||
This service provides all metering functionality in puter. It relies on our own KV infrastructure to track usage (note the implementation of kvStore affects performance, and atomicity, currently sqlite implementation is not atomic).
|
||||
|
||||
It will also slowly add billing functionlity in it and through extension events.
|
||||
|
||||
## Cost maps
|
||||
The metering service relies on cost maps to determine how much to charge for a given operation. Cost maps are simple JSON objects that map a usage type to a cost per unit in microcents (1 millionth of a cent).
|
||||
For example, a cost map for AWS Polly might look like this:
|
||||
|
||||
```json
|
||||
{
|
||||
"aws-polly:standard:character": 4,
|
||||
"aws-polly:neural:character": 16
|
||||
}
|
||||
```
|
||||
|
||||
We need to manually update these for now until we can automate it somehow.
|
||||
|
||||
## Usage and allowance tracking
|
||||
This service provides functionality to directly check if a user has enough credits to perform an operation, and to record usage after the operation is complete.
|
||||
See [MeteringService.ts](./MeteringService.ts) for more details on how metering works.
|
||||
This should be the primary, and ideally only, way to check for usage and record it.
|
||||
@@ -1,15 +1,10 @@
|
||||
// OpenAI Image Generation Cost Map (microcents per image)
|
||||
// Pricing for DALL-E 2 and DALL-E 3 models based on image dimensions.
|
||||
// All costs are in microcents (1/1,000,000th of a cent). Example: 1,000,000 microcents = $0.01 USD.
|
||||
//
|
||||
// Source: [`TrackSpendingService.js`](../../../../TrackSpendingService.js) ImageGenerationStrategy static models
|
||||
//
|
||||
// All costs are in microcents (1/1,000,000th of a cent). Example: 1,000,000 microcents = $0.01 USD.//
|
||||
// Naming pattern: "openai:{model}:{size}" or "openai:{model}:hd:{size}" for HD images
|
||||
|
||||
import { toMicroCents } from "../utils";
|
||||
|
||||
|
||||
|
||||
export const OPENAI_IMAGE_COST_MAP = {
|
||||
// DALL-E 3
|
||||
"openai:dall-e-3:1024x1024": toMicroCents(0.04), // $0.04
|
||||
|
||||
@@ -1,339 +0,0 @@
|
||||
// METADATA // {"ai-commented":{"service":"claude"}}
|
||||
/*
|
||||
* Copyright (C) 2024-present Puter Technologies Inc.
|
||||
*
|
||||
* This file is part of Puter.
|
||||
*
|
||||
* Puter is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
const { TimeWindow } = require("../util/opmath");
|
||||
const smol = require('@heyputer/putility').libs.smol;
|
||||
const { format_as_usd } = require('@heyputer/putility').libs.string;
|
||||
const { MINUTE, SECOND } = require("@heyputer/putility").libs.time;
|
||||
const BaseService = require("./BaseService");
|
||||
|
||||
|
||||
/**
|
||||
* @class TrackSpendingService
|
||||
* @extends BaseService
|
||||
* @description Service for tracking and monitoring API spending across different vendors and strategies.
|
||||
* Implements cost tracking for various AI models (like GPT-4, DALL-E), manages spending windows,
|
||||
* and provides alerting functionality when spending thresholds are exceeded. Supports different
|
||||
* pricing strategies for chat completions and image generation services.
|
||||
*/
|
||||
class TrackSpendingService extends BaseService {
|
||||
static ChatCompletionStrategy = class ChatCompletionStrategy {
|
||||
static models = {
|
||||
'gpt-4-1106-preview': {
|
||||
cost_per_input_token: [0.01, 1000],
|
||||
cost_per_output_token: [0.03, 1000],
|
||||
},
|
||||
'gpt-4-vision-preview': {
|
||||
cost_per_input_token: [0.01, 1000],
|
||||
cost_per_output_token: [0.03, 1000],
|
||||
},
|
||||
'gpt-3.5-turbo': {
|
||||
cost_per_input_token: [0.001, 1000],
|
||||
cost_per_output_token: [0.002, 1000],
|
||||
},
|
||||
};
|
||||
constructor ({ service }) {
|
||||
this.service = service;
|
||||
}
|
||||
|
||||
multiply_by_ratio_ (value, [numerator, denominator]) {
|
||||
return value * numerator / denominator;
|
||||
}
|
||||
|
||||
get_cost (vendor, data) {
|
||||
const model = data.model ?? 'gpt-4-1106-preview';
|
||||
const model_pricing = this.constructor.models[model];
|
||||
|
||||
if ( ! model_pricing ) {
|
||||
throw new Error(`unknown model ${model}`);
|
||||
}
|
||||
|
||||
const cost_per_input_token = model_pricing.cost_per_input_token;
|
||||
const cost_per_output_token = model_pricing.cost_per_output_token;
|
||||
|
||||
const input_tokens = data.count_tokens_input ?? 0;
|
||||
const output_tokens = data.count_tokens_output ?? 0;
|
||||
|
||||
const cost = smol.add(
|
||||
this.multiply_by_ratio_(input_tokens, cost_per_input_token),
|
||||
this.multiply_by_ratio_(output_tokens, cost_per_output_token),
|
||||
);
|
||||
|
||||
console.log('COST IS', cost);
|
||||
|
||||
return cost;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Validates pricing configurations for all models to prevent division by zero errors
|
||||
* @async
|
||||
* @throws {Error} If any model's pricing configuration would cause division by zero
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
async validate () {
|
||||
// Ensure no models will cause division by zero
|
||||
for ( const model in this.constructor.models ) {
|
||||
const model_pricing = this.constructor.models[model];
|
||||
if ( model_pricing.cost_per_input_token[1] === 0 ) {
|
||||
throw new Error(`model ${model} pricing conf (input tokens) will cause division by zero`);
|
||||
}
|
||||
if ( model_pricing.cost_per_output_token[1] === 0 ) {
|
||||
throw new Error(`model ${model} pricing conf (output tokens) will cause division by zero`);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/**
|
||||
* @class ImageGenerationStrategy
|
||||
* @description A strategy class for handling image generation cost calculations.
|
||||
* Supports different models (DALL-E 2 and 3) with varying pricing based on image
|
||||
* dimensions. Maintains a static pricing model configuration and provides methods
|
||||
* to calculate costs for image generation requests. Part of the TrackSpendingService
|
||||
* system for monitoring and tracking API usage costs.
|
||||
*/
|
||||
static ImageGenerationStrategy = class ImageGenerationStrategy {
|
||||
static models = {
|
||||
'gemini-2.5-flash-image-preview': {
|
||||
"1024x1024": 0.039,
|
||||
},
|
||||
'gpt-image-1': {
|
||||
"low:1024x1024": 0.011,
|
||||
"low:1024x1536": 0.016,
|
||||
"low:1536x1024": 0.016,
|
||||
"medium:1024x1024": 0.042,
|
||||
"medium:1024x1536": 0.063,
|
||||
"medium:1536x1024": 0.063,
|
||||
"high:1024x1024": 0.167,
|
||||
"high:1024x1536": 0.25,
|
||||
"high:1536x1024": 0.25
|
||||
},
|
||||
'dall-e-3': {
|
||||
'1024x1024': 0.04,
|
||||
'1024x1792': 0.08,
|
||||
'1792x1024': 0.08,
|
||||
'hd:1024x1024': 0.08,
|
||||
'hd:1024x1792': 0.12,
|
||||
'hd:1792x1024': 0.12,
|
||||
},
|
||||
'dall-e-2': {
|
||||
'1024x1024': 0.02,
|
||||
'512x512': 0.018,
|
||||
'256x256': 0.016,
|
||||
},
|
||||
};
|
||||
constructor ({ service }) {
|
||||
this.service = service;
|
||||
}
|
||||
|
||||
multiply_by_ratio_ (value, [numerator, denominator]) {
|
||||
return value * numerator / denominator;
|
||||
}
|
||||
|
||||
get_cost (vendor, data) {
|
||||
const model = data.model ?? 'dall-e-2';
|
||||
const model_pricing = this.constructor.models[model];
|
||||
|
||||
if ( ! model_pricing ) {
|
||||
throw new Error(`unknown model ${model}`);
|
||||
}
|
||||
|
||||
if ( ! model_pricing.hasOwnProperty(data.size) ) {
|
||||
throw new Error(`unknown size ${data.size} for model ${model}`);
|
||||
}
|
||||
|
||||
const cost = model_pricing[data.size];
|
||||
|
||||
console.log('COST IS', cost);
|
||||
|
||||
return cost;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Initializes the TrackSpendingService with spending tracking strategies and alarm monitoring
|
||||
*
|
||||
* Sets up cost tracking strategies for different services (chat completion, image generation),
|
||||
* initializes spending windows for monitoring, and configures periodic alarm checks for high spending.
|
||||
*
|
||||
* Creates an interval that checks spending levels and triggers alarms when spending exceeds
|
||||
* configured thresholds.
|
||||
*
|
||||
* @private
|
||||
* @async
|
||||
* @throws {Error} If no logging service is configured
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
async _init () {
|
||||
const strategies = {
|
||||
'chat-completion': new this.constructor.ChatCompletionStrategy({
|
||||
service: this,
|
||||
}),
|
||||
'image-generation': new this.constructor.ImageGenerationStrategy({
|
||||
service: this,
|
||||
}),
|
||||
};
|
||||
|
||||
// How quickly we get the first alarm
|
||||
const alarm_check_interval = 10 * SECOND;
|
||||
|
||||
// How frequently we'll get repeat alarms
|
||||
const alarm_cooldown_time = 30 * MINUTE;
|
||||
|
||||
const alarm_at_cost = this.config.alarm_at_cost ?? 10;
|
||||
const alarm_increment = this.config.alarm_increment ?? 10;
|
||||
|
||||
for ( const k in strategies ) {
|
||||
await strategies[k].validate?.();
|
||||
}
|
||||
|
||||
if ( ! this.log ) {
|
||||
throw new Error('no log?');
|
||||
}
|
||||
|
||||
this.strategies = strategies;
|
||||
|
||||
// Tracks overall server spending
|
||||
this.spend_windows = {};
|
||||
|
||||
// Tracks what dollar amounts alerts were reported for
|
||||
this.alerts_window = new TimeWindow({
|
||||
// window_duration: 30 * MINUTE,
|
||||
window_duration: alarm_cooldown_time,
|
||||
reducer: a => Math.max(0, ...a),
|
||||
});
|
||||
|
||||
const svc_alarm = this.services.get('alarm');
|
||||
|
||||
|
||||
/**
|
||||
* Generates alarms when spending exceeds configured thresholds
|
||||
*
|
||||
* Periodically checks the current spending levels across all spending windows
|
||||
* and triggers alarms when spending exceeds configured thresholds. Alarms are
|
||||
* triggered based on the total spending across all windows and the configured
|
||||
* alarm thresholds.
|
||||
*/
|
||||
setInterval(() => {
|
||||
const spending = this.get_window_spending_();
|
||||
|
||||
const increment = Math.floor(spending / alarm_increment);
|
||||
const last_increment = this.alerts_window.get();
|
||||
|
||||
if ( increment <= last_increment ) {
|
||||
return;
|
||||
}
|
||||
|
||||
this.log.info('adding that increment');
|
||||
this.alerts_window.add(increment);
|
||||
|
||||
if ( spending >= alarm_at_cost ) {
|
||||
// see: src/polyfill/to-string-higher-radix.js
|
||||
const ts_for_id = Date.now().toString(62);
|
||||
|
||||
this.log.info('triggering alarm');
|
||||
this.log.info('alarm at: ' + alarm_at_cost);
|
||||
this.log.info('spend: ' + this.get_window_spending_());
|
||||
svc_alarm.create(
|
||||
`high-spending-${ts_for_id}`,
|
||||
`server spending is ${spending} within 30 minutes`,
|
||||
{
|
||||
spending,
|
||||
increment_level: increment,
|
||||
},
|
||||
);
|
||||
}
|
||||
}, alarm_check_interval);
|
||||
}
|
||||
|
||||
add_or_get_window_ (id) {
|
||||
if ( this.spend_windows[id] ) {
|
||||
return this.spend_windows[id];
|
||||
}
|
||||
|
||||
return this.spend_windows[id] = new TimeWindow({
|
||||
// window_duration: 30 * MINUTE,
|
||||
window_duration: 30 * MINUTE,
|
||||
reducer: a => a.reduce((a, b) => a + b, 0),
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the total spending across all tracked windows
|
||||
*
|
||||
* @private
|
||||
* @returns {number} The sum of all spending windows' current values
|
||||
*/
|
||||
get_window_spending_ () {
|
||||
const windows = Object.values(this.spend_windows);
|
||||
return windows.reduce((sum, win) => {
|
||||
return sum + win.get();
|
||||
}, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Records spending for a given vendor using the specified strategy
|
||||
*
|
||||
* @deprecated Use `record_cost` instead
|
||||
*
|
||||
* @param {string} vendor - The vendor name/identifier
|
||||
* @param {string} strategy_key - Key identifying the pricing strategy to use
|
||||
* @param {Object} data - Data needed to calculate cost based on the strategy
|
||||
* @throws {Error} If strategy_key is invalid/unknown
|
||||
*/
|
||||
record_spending (vendor, strategy_key, data) {
|
||||
const strategy = this.strategies[strategy_key];
|
||||
if ( ! strategy ) {
|
||||
throw new Error(`unknown strategy ${strategy_key}`);
|
||||
}
|
||||
|
||||
const cost = strategy.get_cost(vendor, data);
|
||||
|
||||
this.log.info(`Spent ${format_as_usd(cost)}`, {
|
||||
vendor, strategy_key, data,
|
||||
cost,
|
||||
})
|
||||
|
||||
const id = `${vendor}:${strategy_key}`;
|
||||
const window = this.add_or_get_window_(id);
|
||||
window.add(cost);
|
||||
}
|
||||
|
||||
/**
|
||||
* Records known cost into a specified window id.
|
||||
*
|
||||
* This is simliar to `record_spending` but puts the responsibility
|
||||
* of determining cost outside of this services.
|
||||
*/
|
||||
record_cost (window_id, { timestamp, cost }) {
|
||||
const window = this.add_or_get_window_(window_id);
|
||||
this.log.info(`Spent ${format_as_usd(cost)}`, {
|
||||
window_id,
|
||||
timestamp,
|
||||
cost,
|
||||
})
|
||||
window.add(cost);
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
TrackSpendingService,
|
||||
};
|
||||
@@ -72,8 +72,8 @@ class PermissionService extends BaseService {
|
||||
this.kvAvgTimes = { count: 0, avg: 0, max: 0 };
|
||||
this.dbAvgTimes = { count: 0, avg: 0, max: 0 };
|
||||
}
|
||||
|
||||
async ['__on_boot.consolidation'] () {
|
||||
|
||||
async ['__on_boot.consolidation']() {
|
||||
const svc_event = this.services.get('event');
|
||||
// Event to allow extensions to add permissions
|
||||
{
|
||||
@@ -260,120 +260,104 @@ class PermissionService extends BaseService {
|
||||
}
|
||||
|
||||
async #flat_validateUserPerms({ actor, permissions }){
|
||||
const start = Date.now();
|
||||
try {
|
||||
/** @type {Promise<Record<string, unknown>[]>} */
|
||||
const validPerms = (await this.services.get('su').sudo(() => (
|
||||
this.kvService.get({
|
||||
key: [...new Set(permissions.map(perm => PermissionUtil.join(PERM_KEY_PREFIX, actor.type.user.id, perm)))],
|
||||
})
|
||||
))).filter(Boolean);
|
||||
|
||||
let permDeleted = false;
|
||||
// We no longer fetch up the tree, if user was given this perm, then they have it
|
||||
for ( const validPerm of validPerms ){
|
||||
const { permission, issuer_user_id, deleted, ...extra } = validPerm;
|
||||
if ( deleted ){
|
||||
permDeleted = true;
|
||||
continue;
|
||||
}
|
||||
const issuer_actor = new Actor({
|
||||
type: new UserActorType({
|
||||
user: await get_user({ id: issuer_user_id }),
|
||||
}),
|
||||
});
|
||||
// return first perm that allows them in here
|
||||
return [{
|
||||
$: 'option',
|
||||
via: 'user',
|
||||
has_terminal: true,
|
||||
permission: permission,
|
||||
data: extra,
|
||||
holder_username: actor.type.user.username,
|
||||
issuer_username: issuer_actor.type.user.username,
|
||||
issuer_user_id: issuer_actor.type.user.uuid,
|
||||
reading: [],
|
||||
}];
|
||||
const validPerms = (await this.services.get('su').sudo(() => (
|
||||
this.kvService.get({
|
||||
key: [...new Set(permissions.map(perm => PermissionUtil.join(PERM_KEY_PREFIX, actor.type.user.id, perm)))],
|
||||
})
|
||||
))).filter(Boolean);
|
||||
|
||||
let permDeleted = false;
|
||||
// We no longer fetch up the tree, if user was given this perm, then they have it
|
||||
for ( const validPerm of validPerms ){
|
||||
const { permission, issuer_user_id, deleted, ...extra } = validPerm;
|
||||
if ( deleted ){
|
||||
permDeleted = true;
|
||||
continue;
|
||||
}
|
||||
return permDeleted ? [{
|
||||
deleted: true,
|
||||
}] : [];
|
||||
} finally {
|
||||
const end = Date.now();
|
||||
this.kvAvgTimes.avg = (this.kvAvgTimes.avg * this.kvAvgTimes.count + (end - start)) / (this.kvAvgTimes.count + 1);
|
||||
this.kvAvgTimes.count++;
|
||||
this.kvAvgTimes.max = Math.max(this.kvAvgTimes.max, end - start);
|
||||
const issuer_actor = new Actor({
|
||||
type: new UserActorType({
|
||||
user: await get_user({ id: issuer_user_id }),
|
||||
}),
|
||||
});
|
||||
// return first perm that allows them in here
|
||||
return [{
|
||||
$: 'option',
|
||||
via: 'user',
|
||||
has_terminal: true,
|
||||
permission: permission,
|
||||
data: extra,
|
||||
holder_username: actor.type.user.username,
|
||||
issuer_username: issuer_actor.type.user.username,
|
||||
issuer_user_id: issuer_actor.type.user.uuid,
|
||||
reading: [],
|
||||
}];
|
||||
|
||||
}
|
||||
return permDeleted ? [{
|
||||
deleted: true,
|
||||
}] : [];
|
||||
|
||||
}
|
||||
async #linked_validateUserPerms({ actor, permissions, state }){
|
||||
let sqlPermQuery = permissions.map(_perm => {
|
||||
return '`permission` = ?';
|
||||
}).join(' OR ');
|
||||
|
||||
const start = Date.now();
|
||||
try {
|
||||
let sqlPermQuery = permissions.map(_perm => {
|
||||
return '`permission` = ?';
|
||||
}).join(' OR ');
|
||||
|
||||
if ( permissions.length > 1 ) {
|
||||
sqlPermQuery = `(${sqlPermQuery})`;
|
||||
}
|
||||
|
||||
const rows = await this.db.read('SELECT * FROM `user_to_user_permissions` ' +
|
||||
`WHERE \`holder_user_id\` = ? AND ${
|
||||
sqlPermQuery}`,
|
||||
[
|
||||
actor.type.user.id,
|
||||
...permissions,
|
||||
]);
|
||||
|
||||
const readings = [];
|
||||
// Return the first matching permission where the
|
||||
// issuer also has the permission granted
|
||||
for ( const row of rows ) {
|
||||
row.extra = this.db.case({
|
||||
mysql: () => row.extra,
|
||||
otherwise: () => JSON.parse(row.extra ?? '{}'),
|
||||
})();
|
||||
|
||||
const issuer_actor = new Actor({
|
||||
type: new UserActorType({
|
||||
user: await get_user({ id: row.issuer_user_id }),
|
||||
}),
|
||||
});
|
||||
|
||||
let should_continue = false;
|
||||
for ( const seen_actor of state.anti_cycle_actors ) {
|
||||
if ( seen_actor.type.user.id === issuer_actor.type.user.id ) {
|
||||
should_continue = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ( should_continue ) continue;
|
||||
|
||||
const issuer_reading = await this.scan(issuer_actor, row.permission, undefined, state);
|
||||
|
||||
const has_terminal = reading_has_terminal({ reading: issuer_reading });
|
||||
|
||||
readings.push({
|
||||
$: 'path',
|
||||
via: 'user',
|
||||
has_terminal,
|
||||
permission: row.permission,
|
||||
data: row.extra,
|
||||
holder_username: actor.type.user.username,
|
||||
issuer_username: issuer_actor.type.user.username,
|
||||
issuer_user_id: issuer_actor.type.user.uuid,
|
||||
reading: issuer_reading,
|
||||
});
|
||||
}
|
||||
return readings;
|
||||
} finally {
|
||||
const end = Date.now();
|
||||
this.dbAvgTimes.avg = (this.dbAvgTimes.avg * this.dbAvgTimes.count + (end - start)) / (this.dbAvgTimes.count + 1);
|
||||
this.dbAvgTimes.count++;
|
||||
this.dbAvgTimes.max = Math.max(this.dbAvgTimes.max, end - start);
|
||||
if ( permissions.length > 1 ) {
|
||||
sqlPermQuery = `(${sqlPermQuery})`;
|
||||
}
|
||||
|
||||
const rows = await this.db.read('SELECT * FROM `user_to_user_permissions` ' +
|
||||
`WHERE \`holder_user_id\` = ? AND ${
|
||||
sqlPermQuery}`,
|
||||
[
|
||||
actor.type.user.id,
|
||||
...permissions,
|
||||
]);
|
||||
|
||||
const readings = [];
|
||||
// Return the first matching permission where the
|
||||
// issuer also has the permission granted
|
||||
for ( const row of rows ) {
|
||||
row.extra = this.db.case({
|
||||
mysql: () => row.extra,
|
||||
otherwise: () => JSON.parse(row.extra ?? '{}'),
|
||||
})();
|
||||
|
||||
const issuer_actor = new Actor({
|
||||
type: new UserActorType({
|
||||
user: await get_user({ id: row.issuer_user_id }),
|
||||
}),
|
||||
});
|
||||
|
||||
let should_continue = false;
|
||||
for ( const seen_actor of state.anti_cycle_actors ) {
|
||||
if ( seen_actor.type.user.id === issuer_actor.type.user.id ) {
|
||||
should_continue = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ( should_continue ) continue;
|
||||
|
||||
const issuer_reading = await this.scan(issuer_actor, row.permission, undefined, state);
|
||||
|
||||
const has_terminal = reading_has_terminal({ reading: issuer_reading });
|
||||
|
||||
readings.push({
|
||||
$: 'path',
|
||||
via: 'user',
|
||||
has_terminal,
|
||||
permission: row.permission,
|
||||
data: row.extra,
|
||||
holder_username: actor.type.user.username,
|
||||
issuer_username: issuer_actor.type.user.username,
|
||||
issuer_user_id: issuer_actor.type.user.uuid,
|
||||
reading: issuer_reading,
|
||||
});
|
||||
}
|
||||
return readings;
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -1,73 +0,0 @@
|
||||
const APIError = require("../../api/APIError");
|
||||
const { Context } = require("../../util/context");
|
||||
const BaseService = require("../BaseService");
|
||||
|
||||
class CreditContext {
|
||||
constructor (svc, o) {
|
||||
for ( const k in o ) this[k] = o[k];
|
||||
this.svc = svc;
|
||||
}
|
||||
}
|
||||
|
||||
class CostService extends BaseService {
|
||||
static MODULES = {
|
||||
uuidv4: require('uuid').v4,
|
||||
}
|
||||
|
||||
_init () {
|
||||
const svc_cost = this;
|
||||
const svc_event = this.services.get('event');
|
||||
svc_event.on('driver.create-call-context', async (_, event) => {
|
||||
event.context = event.context.sub({
|
||||
// Future Use
|
||||
});
|
||||
});
|
||||
}
|
||||
async get_credit_context (params) {
|
||||
return new CreditContext(this, params);
|
||||
}
|
||||
|
||||
async get_funding_allowed (options = {}) {
|
||||
return (options.available ?? await this.get_available_amount())
|
||||
>= (options.minimum ?? 100);
|
||||
}
|
||||
|
||||
async get_available_amount () {
|
||||
const cost_uuid = this.modules.uuidv4();
|
||||
const svc_event = this.services.get('event');
|
||||
const event = {
|
||||
actor: Context.get('actor'),
|
||||
available: 0,
|
||||
cost_uuid,
|
||||
};
|
||||
await svc_event.emit('credit.check-available', event);
|
||||
|
||||
return event.available;
|
||||
}
|
||||
async record_cost ({ cost }) {
|
||||
const svc_event = this.services.get('event');
|
||||
const event = {
|
||||
actor: Context.get('actor'),
|
||||
cost,
|
||||
};
|
||||
this.log.info('cost record', {
|
||||
actor: event.actor,
|
||||
cost,
|
||||
client_driver_call: Context.get('client_driver_call'),
|
||||
});
|
||||
await svc_event.emit('credit.record-cost', event);
|
||||
}
|
||||
async record_funding_update ({ old_amount, new_amount }) {
|
||||
const svc_event = this.services.get('event');
|
||||
const event = {
|
||||
actor: Context.get('actor'),
|
||||
old_amount,
|
||||
new_amount,
|
||||
};
|
||||
await svc_event.emit('credit.funding-update', event);
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
CostService,
|
||||
};
|
||||
Reference in New Issue
Block a user