diff --git a/api/dev/configs/api.json b/api/dev/configs/api.json index acaf5daa9..e09b0f3f5 100644 --- a/api/dev/configs/api.json +++ b/api/dev/configs/api.json @@ -1,5 +1,5 @@ { - "version": "4.25.3", + "version": "4.27.2", "extraOrigins": [], "sandbox": true, "ssoSubIds": [], diff --git a/api/src/core/utils/misc/__test__/timeout-budget.test.ts b/api/src/core/utils/misc/__test__/timeout-budget.test.ts new file mode 100644 index 000000000..d857633cd --- /dev/null +++ b/api/src/core/utils/misc/__test__/timeout-budget.test.ts @@ -0,0 +1,231 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +import { TimeoutBudget } from '@app/core/utils/misc/timeout-budget.js'; + +describe('TimeoutBudget', () => { + beforeEach(() => { + vi.useFakeTimers(); + }); + + afterEach(() => { + vi.useRealTimers(); + }); + + describe('constructor', () => { + it('initializes with the given budget', () => { + const budget = new TimeoutBudget(10000); + expect(budget.remaining()).toBe(10000); + expect(budget.elapsed()).toBe(0); + }); + }); + + describe('remaining', () => { + it('returns full budget immediately after construction', () => { + const budget = new TimeoutBudget(5000); + expect(budget.remaining()).toBe(5000); + }); + + it('decreases as time passes', () => { + const budget = new TimeoutBudget(5000); + + vi.advanceTimersByTime(1000); + expect(budget.remaining()).toBe(4000); + + vi.advanceTimersByTime(2000); + expect(budget.remaining()).toBe(2000); + }); + + it('never returns negative values', () => { + const budget = new TimeoutBudget(1000); + + vi.advanceTimersByTime(5000); // Well past the budget + expect(budget.remaining()).toBe(0); + }); + + it('returns zero when budget is exactly exhausted', () => { + const budget = new TimeoutBudget(1000); + + vi.advanceTimersByTime(1000); + expect(budget.remaining()).toBe(0); + }); + }); + + describe('elapsed', () => { + it('returns zero immediately after construction', () => { + const budget = new TimeoutBudget(5000); + expect(budget.elapsed()).toBe(0); + }); + + it('increases as time passes', () => { + const budget = new TimeoutBudget(5000); + + vi.advanceTimersByTime(1000); + expect(budget.elapsed()).toBe(1000); + + vi.advanceTimersByTime(500); + expect(budget.elapsed()).toBe(1500); + }); + + it('continues increasing past the budget limit', () => { + const budget = new TimeoutBudget(1000); + + vi.advanceTimersByTime(2000); + expect(budget.elapsed()).toBe(2000); + }); + }); + + describe('getTimeout', () => { + it('returns maxMs when plenty of budget remains', () => { + const budget = new TimeoutBudget(10000); + expect(budget.getTimeout(2000)).toBe(2000); + }); + + it('returns maxMs when budget minus reserve is sufficient', () => { + const budget = new TimeoutBudget(10000); + expect(budget.getTimeout(2000, 5000)).toBe(2000); + }); + + it('caps timeout to available budget minus reserve', () => { + const budget = new TimeoutBudget(10000); + vi.advanceTimersByTime(5000); // 5000ms remaining + + // Want 2000ms but reserve 4000ms, only 1000ms available + expect(budget.getTimeout(2000, 4000)).toBe(1000); + }); + + it('caps timeout to remaining budget when no reserve', () => { + const budget = new TimeoutBudget(1000); + vi.advanceTimersByTime(800); // 200ms remaining + + expect(budget.getTimeout(500)).toBe(200); + }); + + it('returns minimum of 100ms even when budget is exhausted', () => { + const budget = new TimeoutBudget(1000); + vi.advanceTimersByTime(2000); // Budget exhausted + + expect(budget.getTimeout(500)).toBe(100); + }); + + it('returns minimum of 100ms when reserve exceeds remaining', () => { + const budget = new TimeoutBudget(5000); + vi.advanceTimersByTime(4000); // 1000ms remaining + + // Reserve 2000ms but only 1000ms remaining + expect(budget.getTimeout(500, 2000)).toBe(100); + }); + + it('uses default reserve of 0 when not specified', () => { + const budget = new TimeoutBudget(1000); + vi.advanceTimersByTime(500); // 500ms remaining + + expect(budget.getTimeout(1000)).toBe(500); // Capped to remaining + }); + }); + + describe('hasTimeFor', () => { + it('returns true when enough time remains', () => { + const budget = new TimeoutBudget(5000); + expect(budget.hasTimeFor(3000)).toBe(true); + }); + + it('returns true when exactly enough time remains', () => { + const budget = new TimeoutBudget(5000); + expect(budget.hasTimeFor(5000)).toBe(true); + }); + + it('returns false when not enough time remains', () => { + const budget = new TimeoutBudget(5000); + expect(budget.hasTimeFor(6000)).toBe(false); + }); + + it('accounts for elapsed time', () => { + const budget = new TimeoutBudget(5000); + vi.advanceTimersByTime(3000); // 2000ms remaining + + expect(budget.hasTimeFor(2000)).toBe(true); + expect(budget.hasTimeFor(3000)).toBe(false); + }); + + it('returns false when budget is exhausted', () => { + const budget = new TimeoutBudget(1000); + vi.advanceTimersByTime(2000); + + expect(budget.hasTimeFor(1)).toBe(false); + }); + + it('returns true for zero required time', () => { + const budget = new TimeoutBudget(1000); + vi.advanceTimersByTime(2000); // Budget exhausted + + expect(budget.hasTimeFor(0)).toBe(true); + }); + }); + + describe('integration scenarios', () => { + it('simulates a typical startup sequence', () => { + const budget = new TimeoutBudget(13000); // 13 second budget + const BOOTSTRAP_RESERVE = 8000; + const MAX_OP_TIMEOUT = 2000; + + // First operation - should get full 2000ms + const op1Timeout = budget.getTimeout(MAX_OP_TIMEOUT, BOOTSTRAP_RESERVE); + expect(op1Timeout).toBe(2000); + + // Simulate operation taking 500ms + vi.advanceTimersByTime(500); + + // Second operation - still have plenty of budget + const op2Timeout = budget.getTimeout(MAX_OP_TIMEOUT, BOOTSTRAP_RESERVE); + expect(op2Timeout).toBe(2000); + + // Simulate operation taking 1000ms + vi.advanceTimersByTime(1000); + + // Third operation + const op3Timeout = budget.getTimeout(MAX_OP_TIMEOUT, BOOTSTRAP_RESERVE); + expect(op3Timeout).toBe(2000); + + // Simulate slow operation taking 2000ms + vi.advanceTimersByTime(2000); + + // Now 3500ms elapsed, 9500ms remaining + // After reserve, only 1500ms available - less than max + const op4Timeout = budget.getTimeout(MAX_OP_TIMEOUT, BOOTSTRAP_RESERVE); + expect(op4Timeout).toBe(1500); + + // Simulate operation completing + vi.advanceTimersByTime(1000); + + // Bootstrap phase - use all remaining time + const bootstrapTimeout = budget.remaining(); + expect(bootstrapTimeout).toBe(8500); + expect(budget.hasTimeFor(8000)).toBe(true); + }); + + it('handles worst-case scenario where all operations timeout', () => { + const budget = new TimeoutBudget(13000); + const BOOTSTRAP_RESERVE = 8000; + const MAX_OP_TIMEOUT = 2000; + + // Each operation times out at its limit + // Available for operations: 13000 - 8000 = 5000ms + + // Op 1: gets 2000ms, times out + budget.getTimeout(MAX_OP_TIMEOUT, BOOTSTRAP_RESERVE); + vi.advanceTimersByTime(2000); + + // Op 2: gets 2000ms, times out + budget.getTimeout(MAX_OP_TIMEOUT, BOOTSTRAP_RESERVE); + vi.advanceTimersByTime(2000); + + // Op 3: only 1000ms available (5000 - 4000), times out + const op3Timeout = budget.getTimeout(MAX_OP_TIMEOUT, BOOTSTRAP_RESERVE); + expect(op3Timeout).toBe(1000); + vi.advanceTimersByTime(1000); + + // Bootstrap: should still have 8000ms + expect(budget.remaining()).toBe(8000); + }); + }); +}); diff --git a/api/src/core/utils/misc/__test__/with-timeout.test.ts b/api/src/core/utils/misc/__test__/with-timeout.test.ts new file mode 100644 index 000000000..7fb9ab2f7 --- /dev/null +++ b/api/src/core/utils/misc/__test__/with-timeout.test.ts @@ -0,0 +1,65 @@ +import { describe, expect, it } from 'vitest'; + +import { withTimeout } from '@app/core/utils/misc/with-timeout.js'; + +describe('withTimeout', () => { + it('resolves when promise completes before timeout', async () => { + const promise = Promise.resolve('success'); + const result = await withTimeout(promise, 1000, 'testOp'); + expect(result).toBe('success'); + }); + + it('resolves with correct value for delayed promise within timeout', async () => { + const promise = new Promise((resolve) => setTimeout(() => resolve(42), 50)); + const result = await withTimeout(promise, 1000, 'testOp'); + expect(result).toBe(42); + }); + + it('rejects when promise takes longer than timeout', async () => { + const promise = new Promise((resolve) => setTimeout(() => resolve('late'), 500)); + await expect(withTimeout(promise, 50, 'slowOp')).rejects.toThrow('slowOp timed out after 50ms'); + }); + + it('includes operation name in timeout error message', async () => { + const promise = new Promise(() => {}); // Never resolves + await expect(withTimeout(promise, 10, 'myCustomOperation')).rejects.toThrow( + 'myCustomOperation timed out after 10ms' + ); + }); + + it('propagates rejection from the original promise', async () => { + const promise = Promise.reject(new Error('original error')); + await expect(withTimeout(promise, 1000, 'testOp')).rejects.toThrow('original error'); + }); + + it('resolves immediately for already-resolved promises', async () => { + const promise = Promise.resolve('immediate'); + const start = Date.now(); + const result = await withTimeout(promise, 1000, 'testOp'); + const elapsed = Date.now() - start; + + expect(result).toBe('immediate'); + expect(elapsed).toBeLessThan(50); // Should be nearly instant + }); + + it('works with zero timeout (immediately times out for pending promises)', async () => { + const promise = new Promise(() => {}); // Never resolves + await expect(withTimeout(promise, 0, 'zeroTimeout')).rejects.toThrow( + 'zeroTimeout timed out after 0ms' + ); + }); + + it('preserves the type of the resolved value', async () => { + interface TestType { + id: number; + name: string; + } + const testObj: TestType = { id: 1, name: 'test' }; + const promise = Promise.resolve(testObj); + + const result = await withTimeout(promise, 1000, 'testOp'); + + expect(result.id).toBe(1); + expect(result.name).toBe('test'); + }); +}); diff --git a/api/src/core/utils/misc/timeout-budget.ts b/api/src/core/utils/misc/timeout-budget.ts new file mode 100644 index 000000000..b126cb692 --- /dev/null +++ b/api/src/core/utils/misc/timeout-budget.ts @@ -0,0 +1,70 @@ +/** + * Tracks remaining time budget to ensure we don't exceed external timeouts (e.g., PM2's listen_timeout). + * + * This class helps coordinate multiple async operations by: + * - Tracking elapsed time from construction + * - Calculating dynamic timeouts based on remaining budget + * - Reserving time for critical operations (like server bootstrap) + * + * @example + * ```typescript + * const budget = new TimeoutBudget(15000); // 15 second total budget + * + * // Each operation gets a timeout capped by remaining budget + * await withTimeout(loadConfig(), budget.getTimeout(2000, 8000), 'loadConfig'); + * await withTimeout(loadState(), budget.getTimeout(2000, 8000), 'loadState'); + * + * // Bootstrap gets all remaining time + * await withTimeout(bootstrap(), budget.remaining(), 'bootstrap'); + * + * console.log(`Completed in ${budget.elapsed()}ms`); + * ``` + */ +export class TimeoutBudget { + private startTime: number; + private budgetMs: number; + + /** + * Creates a new startup budget tracker. + * @param budgetMs Total time budget in milliseconds + */ + constructor(budgetMs: number) { + this.startTime = Date.now(); + this.budgetMs = budgetMs; + } + + /** + * Returns remaining time in milliseconds. + * Never returns negative values. + */ + remaining(): number { + return Math.max(0, this.budgetMs - (Date.now() - this.startTime)); + } + + /** + * Returns elapsed time in milliseconds since construction. + */ + elapsed(): number { + return Date.now() - this.startTime; + } + + /** + * Returns timeout for an operation, capped by remaining budget. + * + * @param maxMs Maximum timeout for this operation + * @param reserveMs Time to reserve for future operations (e.g., server bootstrap) + * @returns Timeout in milliseconds (minimum 100ms to avoid instant failures) + */ + getTimeout(maxMs: number, reserveMs: number = 0): number { + const available = this.remaining() - reserveMs; + return Math.max(100, Math.min(maxMs, available)); + } + + /** + * Checks if there's enough time remaining for an operation. + * @param requiredMs Time required in milliseconds + */ + hasTimeFor(requiredMs: number): boolean { + return this.remaining() >= requiredMs; + } +} diff --git a/api/src/core/utils/misc/with-timeout.ts b/api/src/core/utils/misc/with-timeout.ts new file mode 100644 index 000000000..43d36ac6e --- /dev/null +++ b/api/src/core/utils/misc/with-timeout.ts @@ -0,0 +1,25 @@ +/** + * Wraps a promise with a timeout to prevent hangs. + * If the operation takes longer than timeoutMs, it rejects with a timeout error. + * + * @param promise The promise to wrap with a timeout + * @param timeoutMs Maximum time in milliseconds before timing out + * @param operationName Name of the operation for the error message + * @returns The result of the promise if it completes in time + * @throws Error if the operation times out + */ +export const withTimeout = ( + promise: Promise, + timeoutMs: number, + operationName: string +): Promise => { + return Promise.race([ + promise, + new Promise((_, reject) => + setTimeout( + () => reject(new Error(`${operationName} timed out after ${timeoutMs}ms`)), + timeoutMs + ) + ), + ]); +}; diff --git a/api/src/index.ts b/api/src/index.ts index e1d2c41d4..7b65a0851 100644 --- a/api/src/index.ts +++ b/api/src/index.ts @@ -4,7 +4,7 @@ import '@app/dotenv.js'; import { type NestFastifyApplication } from '@nestjs/platform-fastify'; import { unlinkSync } from 'fs'; -import { mkdir, readFile } from 'fs/promises'; +import { mkdir } from 'fs/promises'; import http from 'http'; import https from 'https'; @@ -15,6 +15,8 @@ import { WebSocket } from 'ws'; import { logger } from '@app/core/log.js'; import { fileExistsSync } from '@app/core/utils/files/file-exists.js'; +import { TimeoutBudget } from '@app/core/utils/misc/timeout-budget.js'; +import { withTimeout } from '@app/core/utils/misc/with-timeout.js'; import { getServerIdentifier } from '@app/core/utils/server-identifier.js'; import { environment, PATHS_CONFIG_MODULES, PORT } from '@app/environment.js'; import * as envVars from '@app/environment.js'; @@ -28,13 +30,23 @@ import { StateManager } from '@app/store/watch/state-watch.js'; let server: NestFastifyApplication | null = null; +// PM2 listen_timeout is 15 seconds (ecosystem.config.json) +// We use 13 seconds as our total budget to ensure our timeout triggers before PM2 kills us +const TOTAL_STARTUP_BUDGET_MS = 13_000; +// Reserve time for the NestJS bootstrap (the most critical and time-consuming operation) +const BOOTSTRAP_RESERVED_MS = 8_000; +// Maximum time for any single pre-bootstrap operation +const MAX_OPERATION_TIMEOUT_MS = 2_000; + const unlinkUnixPort = () => { if (isNaN(parseInt(PORT, 10))) { if (fileExistsSync(PORT)) unlinkSync(PORT); } }; -export const viteNodeApp = async () => { +export const viteNodeApp = async (): Promise> => { + const budget = new TimeoutBudget(TOTAL_STARTUP_BUDGET_MS); + try { await import('json-bigint-patch'); environment.IS_MAIN_PROCESS = true; @@ -42,15 +54,15 @@ export const viteNodeApp = async () => { /**------------------------------------------------------------------------ * Attaching getServerIdentifier to globalThis - * getServerIdentifier is tightly coupled to the deprecated redux store, + * getServerIdentifier is tightly coupled to the deprecated redux store, * which we don't want to share with other packages or plugins. - * + * * At the same time, we need to use it in @unraid/shared as a building block, * where it's used & available outside of NestJS's DI context. - * - * Attaching to globalThis is a temporary solution to avoid refactoring + * + * Attaching to globalThis is a temporary solution to avoid refactoring * config sync & management outside of NestJS's DI context. - * + * * Plugin authors should import getServerIdentifier from @unraid/shared instead, * to avoid breaking changes to their code. *------------------------------------------------------------------------**/ @@ -58,7 +70,18 @@ export const viteNodeApp = async () => { logger.info('ENV %o', envVars); logger.info('PATHS %o', store.getState().paths); - await mkdir(PATHS_CONFIG_MODULES, { recursive: true }); + // Note: we use logger.info for checkpoints instead of a lower log level + // to ensure emission during an unraid server's boot, + // where the log level will be set to INFO by default. + + // Create config directory + try { + await mkdir(PATHS_CONFIG_MODULES, { recursive: true }); + logger.info('Config directory ready'); + } catch (error) { + logger.error(error, 'Failed to create config directory'); + throw error; + } const cacheable = new CacheableLookup(); @@ -68,29 +91,73 @@ export const viteNodeApp = async () => { cacheable.install(https.globalAgent); // Load emhttp state into store - await store.dispatch(loadStateFiles()); + try { + const timeout = budget.getTimeout(MAX_OPERATION_TIMEOUT_MS, BOOTSTRAP_RESERVED_MS); + await withTimeout(store.dispatch(loadStateFiles()), timeout, 'loadStateFiles'); + logger.info('Emhttp state loaded'); + } catch (error) { + logger.error(error, 'Failed to load emhttp state files'); + logger.warn('Continuing with default state'); + } // Load initial registration key into store - await store.dispatch(loadRegistrationKey()); + try { + const timeout = budget.getTimeout(MAX_OPERATION_TIMEOUT_MS, BOOTSTRAP_RESERVED_MS); + await withTimeout(store.dispatch(loadRegistrationKey()), timeout, 'loadRegistrationKey'); + logger.info('Registration key loaded'); + } catch (error) { + logger.error(error, 'Failed to load registration key'); + logger.warn('Continuing without registration key'); + } // Load my dynamix config file into store - loadDynamixConfig(); + try { + loadDynamixConfig(); + logger.info('Dynamix config loaded'); + } catch (error) { + logger.error(error, 'Failed to load dynamix config'); + logger.warn('Continuing with default dynamix config'); + } // Start listening to file updates - StateManager.getInstance(); + try { + StateManager.getInstance(); + logger.info('State manager initialized'); + } catch (error) { + logger.error(error, 'Failed to initialize state manager'); + logger.warn('Continuing without state watching'); + } // Start listening to key file changes - setupRegistrationKeyWatch(); + try { + setupRegistrationKeyWatch(); + logger.info('Registration key watch active'); + } catch (error) { + logger.error(error, 'Failed to setup registration key watch'); + logger.warn('Continuing without key file watching'); + } // If port is unix socket, delete old socket before starting http server unlinkUnixPort(); startMiddlewareListeners(); - // Start webserver - const { bootstrapNestServer } = await import('@app/unraid-api/main.js'); - - server = await bootstrapNestServer(); + // Start webserver - use all remaining budget + try { + const bootstrapTimeout = budget.remaining(); + if (bootstrapTimeout < 1000) { + logger.warn( + `Insufficient startup budget remaining (${bootstrapTimeout}ms) for NestJS bootstrap` + ); + } + logger.info('Bootstrapping NestJS server (budget: %dms)...', bootstrapTimeout); + const { bootstrapNestServer } = await import('@app/unraid-api/main.js'); + server = await withTimeout(bootstrapNestServer(), bootstrapTimeout, 'bootstrapNestServer'); + logger.info('Startup complete in %dms', budget.elapsed()); + } catch (error) { + logger.error(error, 'Failed to start NestJS server'); + throw error; // This is critical - must rethrow to trigger graceful exit + } asyncExitHook( async (signal) => { @@ -103,8 +170,10 @@ export const viteNodeApp = async () => { gracefulExit(); }, - { wait: 9999 } + { wait: 10_000 } ); + + return server; } catch (error: unknown) { if (error instanceof Error) { logger.error(error, 'API-ERROR'); @@ -115,8 +184,9 @@ export const viteNodeApp = async () => { await server?.close?.(); } shutdownApiEvent(); - // Kill application + // Kill application - gracefulExit calls process.exit but TS doesn't know it never returns gracefulExit(1); + throw new Error('Unreachable'); } };