mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-01 15:50:46 -05:00
feat(watchdog): add Memory resource reclaimer (#7583)
* feat(watchdog): add GPU reclaimer Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Handle vram calculation for unified memory devices Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Support RAM eviction, set watchdog interval from runtime settings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
committed by
GitHub
parent
dbd25885c3
commit
50f9c9a058
@@ -185,33 +185,6 @@ func readExternalBackendsJson(startupAppConfig config.ApplicationConfig) fileHan
|
||||
return handler
|
||||
}
|
||||
|
||||
type runtimeSettings struct {
|
||||
WatchdogEnabled *bool `json:"watchdog_enabled,omitempty"`
|
||||
WatchdogIdleEnabled *bool `json:"watchdog_idle_enabled,omitempty"`
|
||||
WatchdogBusyEnabled *bool `json:"watchdog_busy_enabled,omitempty"`
|
||||
WatchdogIdleTimeout *string `json:"watchdog_idle_timeout,omitempty"`
|
||||
WatchdogBusyTimeout *string `json:"watchdog_busy_timeout,omitempty"`
|
||||
SingleBackend *bool `json:"single_backend,omitempty"` // Deprecated: use MaxActiveBackends = 1 instead
|
||||
MaxActiveBackends *int `json:"max_active_backends,omitempty"` // Maximum number of active backends (0 = unlimited, 1 = single backend mode)
|
||||
ParallelBackendRequests *bool `json:"parallel_backend_requests,omitempty"`
|
||||
Threads *int `json:"threads,omitempty"`
|
||||
ContextSize *int `json:"context_size,omitempty"`
|
||||
F16 *bool `json:"f16,omitempty"`
|
||||
Debug *bool `json:"debug,omitempty"`
|
||||
CORS *bool `json:"cors,omitempty"`
|
||||
CSRF *bool `json:"csrf,omitempty"`
|
||||
CORSAllowOrigins *string `json:"cors_allow_origins,omitempty"`
|
||||
P2PToken *string `json:"p2p_token,omitempty"`
|
||||
P2PNetworkID *string `json:"p2p_network_id,omitempty"`
|
||||
Federated *bool `json:"federated,omitempty"`
|
||||
Galleries *[]config.Gallery `json:"galleries,omitempty"`
|
||||
BackendGalleries *[]config.Gallery `json:"backend_galleries,omitempty"`
|
||||
AutoloadGalleries *bool `json:"autoload_galleries,omitempty"`
|
||||
AutoloadBackendGalleries *bool `json:"autoload_backend_galleries,omitempty"`
|
||||
ApiKeys *[]string `json:"api_keys,omitempty"`
|
||||
AgentJobRetentionDays *int `json:"agent_job_retention_days,omitempty"`
|
||||
}
|
||||
|
||||
func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHandler {
|
||||
handler := func(fileContent []byte, appConfig *config.ApplicationConfig) error {
|
||||
log.Debug().Msg("processing runtime_settings.json")
|
||||
@@ -227,6 +200,8 @@ func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHand
|
||||
envSingleBackend := appConfig.SingleBackend == startupAppConfig.SingleBackend
|
||||
envMaxActiveBackends := appConfig.MaxActiveBackends == startupAppConfig.MaxActiveBackends
|
||||
envParallelRequests := appConfig.ParallelBackendRequests == startupAppConfig.ParallelBackendRequests
|
||||
envMemoryReclaimerEnabled := appConfig.MemoryReclaimerEnabled == startupAppConfig.MemoryReclaimerEnabled
|
||||
envMemoryReclaimerThreshold := appConfig.MemoryReclaimerThreshold == startupAppConfig.MemoryReclaimerThreshold
|
||||
envThreads := appConfig.Threads == startupAppConfig.Threads
|
||||
envContextSize := appConfig.ContextSize == startupAppConfig.ContextSize
|
||||
envF16 := appConfig.F16 == startupAppConfig.F16
|
||||
@@ -242,7 +217,7 @@ func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHand
|
||||
envAgentJobRetentionDays := appConfig.AgentJobRetentionDays == startupAppConfig.AgentJobRetentionDays
|
||||
|
||||
if len(fileContent) > 0 {
|
||||
var settings runtimeSettings
|
||||
var settings config.RuntimeSettings
|
||||
err := json.Unmarshal(fileContent, &settings)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -294,6 +269,15 @@ func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHand
|
||||
if settings.ParallelBackendRequests != nil && !envParallelRequests {
|
||||
appConfig.ParallelBackendRequests = *settings.ParallelBackendRequests
|
||||
}
|
||||
if settings.MemoryReclaimerEnabled != nil && !envMemoryReclaimerEnabled {
|
||||
appConfig.MemoryReclaimerEnabled = *settings.MemoryReclaimerEnabled
|
||||
if appConfig.MemoryReclaimerEnabled {
|
||||
appConfig.WatchDog = true // Memory reclaimer requires watchdog
|
||||
}
|
||||
}
|
||||
if settings.MemoryReclaimerThreshold != nil && !envMemoryReclaimerThreshold {
|
||||
appConfig.MemoryReclaimerThreshold = *settings.MemoryReclaimerThreshold
|
||||
}
|
||||
if settings.Threads != nil && !envThreads {
|
||||
appConfig.Threads = *settings.Threads
|
||||
}
|
||||
|
||||
+41
-21
@@ -218,17 +218,7 @@ func loadRuntimeSettingsFromFile(options *config.ApplicationConfig) {
|
||||
return
|
||||
}
|
||||
|
||||
var settings struct {
|
||||
WatchdogEnabled *bool `json:"watchdog_enabled,omitempty"`
|
||||
WatchdogIdleEnabled *bool `json:"watchdog_idle_enabled,omitempty"`
|
||||
WatchdogBusyEnabled *bool `json:"watchdog_busy_enabled,omitempty"`
|
||||
WatchdogIdleTimeout *string `json:"watchdog_idle_timeout,omitempty"`
|
||||
WatchdogBusyTimeout *string `json:"watchdog_busy_timeout,omitempty"`
|
||||
SingleBackend *bool `json:"single_backend,omitempty"` // Deprecated: use MaxActiveBackends = 1 instead
|
||||
MaxActiveBackends *int `json:"max_active_backends,omitempty"` // Maximum number of active backends (0 = unlimited)
|
||||
ParallelBackendRequests *bool `json:"parallel_backend_requests,omitempty"`
|
||||
AgentJobRetentionDays *int `json:"agent_job_retention_days,omitempty"`
|
||||
}
|
||||
var settings config.RuntimeSettings
|
||||
|
||||
if err := json.Unmarshal(fileContent, &settings); err != nil {
|
||||
log.Warn().Err(err).Msg("failed to parse runtime_settings.json")
|
||||
@@ -281,6 +271,16 @@ func loadRuntimeSettingsFromFile(options *config.ApplicationConfig) {
|
||||
}
|
||||
}
|
||||
}
|
||||
if settings.WatchdogInterval != nil {
|
||||
if options.WatchDogInterval == 0 {
|
||||
dur, err := time.ParseDuration(*settings.WatchdogInterval)
|
||||
if err == nil {
|
||||
options.WatchDogInterval = dur
|
||||
} else {
|
||||
log.Warn().Err(err).Str("interval", *settings.WatchdogInterval).Msg("invalid watchdog interval in runtime_settings.json")
|
||||
}
|
||||
}
|
||||
}
|
||||
// Handle MaxActiveBackends (new) and SingleBackend (deprecated)
|
||||
if settings.MaxActiveBackends != nil {
|
||||
// Only apply if current value is default (0), suggesting it wasn't set from env var
|
||||
@@ -303,6 +303,21 @@ func loadRuntimeSettingsFromFile(options *config.ApplicationConfig) {
|
||||
options.ParallelBackendRequests = *settings.ParallelBackendRequests
|
||||
}
|
||||
}
|
||||
if settings.MemoryReclaimerEnabled != nil {
|
||||
// Only apply if current value is default (false), suggesting it wasn't set from env var
|
||||
if !options.MemoryReclaimerEnabled {
|
||||
options.MemoryReclaimerEnabled = *settings.MemoryReclaimerEnabled
|
||||
if options.MemoryReclaimerEnabled {
|
||||
options.WatchDog = true // Memory reclaimer requires watchdog
|
||||
}
|
||||
}
|
||||
}
|
||||
if settings.MemoryReclaimerThreshold != nil {
|
||||
// Only apply if current value is default (0), suggesting it wasn't set from env var
|
||||
if options.MemoryReclaimerThreshold == 0 {
|
||||
options.MemoryReclaimerThreshold = *settings.MemoryReclaimerThreshold
|
||||
}
|
||||
}
|
||||
if settings.AgentJobRetentionDays != nil {
|
||||
// Only apply if current value is default (0), suggesting it wasn't set from env var
|
||||
if options.AgentJobRetentionDays == 0 {
|
||||
@@ -323,19 +338,24 @@ func initializeWatchdog(application *Application, options *config.ApplicationCon
|
||||
// Get effective max active backends (considers both MaxActiveBackends and deprecated SingleBackend)
|
||||
lruLimit := options.GetEffectiveMaxActiveBackends()
|
||||
|
||||
// Create watchdog if enabled OR if LRU limit is set
|
||||
if options.WatchDog || lruLimit > 0 {
|
||||
// Create watchdog if enabled OR if LRU limit is set OR if memory reclaimer is enabled
|
||||
if options.WatchDog || lruLimit > 0 || options.MemoryReclaimerEnabled {
|
||||
wd := model.NewWatchDog(
|
||||
application.ModelLoader(),
|
||||
options.WatchDogBusyTimeout,
|
||||
options.WatchDogIdleTimeout,
|
||||
options.WatchDogBusy,
|
||||
options.WatchDogIdle,
|
||||
lruLimit)
|
||||
model.WithProcessManager(application.ModelLoader()),
|
||||
model.WithBusyTimeout(options.WatchDogBusyTimeout),
|
||||
model.WithIdleTimeout(options.WatchDogIdleTimeout),
|
||||
model.WithWatchdogInterval(options.WatchDogInterval),
|
||||
model.WithBusyCheck(options.WatchDogBusy),
|
||||
model.WithIdleCheck(options.WatchDogIdle),
|
||||
model.WithLRULimit(lruLimit),
|
||||
model.WithMemoryReclaimer(options.MemoryReclaimerEnabled, options.MemoryReclaimerThreshold),
|
||||
)
|
||||
application.ModelLoader().SetWatchDog(wd)
|
||||
|
||||
// Start watchdog goroutine only if busy/idle checks are enabled
|
||||
if options.WatchDogBusy || options.WatchDogIdle {
|
||||
// Start watchdog goroutine if any periodic checks are enabled
|
||||
// LRU eviction doesn't need the Run() loop - it's triggered on model load
|
||||
// But memory reclaimer needs the Run() loop for periodic checking
|
||||
if options.WatchDogBusy || options.WatchDogIdle || options.MemoryReclaimerEnabled {
|
||||
go wd.Run()
|
||||
}
|
||||
|
||||
|
||||
@@ -23,24 +23,28 @@ func (a *Application) startWatchdog() error {
|
||||
// Get effective max active backends (considers both MaxActiveBackends and deprecated SingleBackend)
|
||||
lruLimit := appConfig.GetEffectiveMaxActiveBackends()
|
||||
|
||||
// Create watchdog if enabled OR if LRU limit is set
|
||||
// Create watchdog if enabled OR if LRU limit is set OR if memory reclaimer is enabled
|
||||
// LRU eviction requires watchdog infrastructure even without busy/idle checks
|
||||
if appConfig.WatchDog || lruLimit > 0 {
|
||||
if appConfig.WatchDog || lruLimit > 0 || appConfig.MemoryReclaimerEnabled {
|
||||
wd := model.NewWatchDog(
|
||||
a.modelLoader,
|
||||
appConfig.WatchDogBusyTimeout,
|
||||
appConfig.WatchDogIdleTimeout,
|
||||
appConfig.WatchDogBusy,
|
||||
appConfig.WatchDogIdle,
|
||||
lruLimit)
|
||||
model.WithProcessManager(a.modelLoader),
|
||||
model.WithBusyTimeout(appConfig.WatchDogBusyTimeout),
|
||||
model.WithIdleTimeout(appConfig.WatchDogIdleTimeout),
|
||||
model.WithWatchdogInterval(appConfig.WatchDogInterval),
|
||||
model.WithBusyCheck(appConfig.WatchDogBusy),
|
||||
model.WithIdleCheck(appConfig.WatchDogIdle),
|
||||
model.WithLRULimit(lruLimit),
|
||||
model.WithMemoryReclaimer(appConfig.MemoryReclaimerEnabled, appConfig.MemoryReclaimerThreshold),
|
||||
)
|
||||
a.modelLoader.SetWatchDog(wd)
|
||||
|
||||
// Create new stop channel
|
||||
a.watchdogStop = make(chan bool, 1)
|
||||
|
||||
// Start watchdog goroutine only if busy/idle checks are enabled
|
||||
// Start watchdog goroutine if any periodic checks are enabled
|
||||
// LRU eviction doesn't need the Run() loop - it's triggered on model load
|
||||
if appConfig.WatchDogBusy || appConfig.WatchDogIdle {
|
||||
// But memory reclaimer needs the Run() loop for periodic checking
|
||||
if appConfig.WatchDogBusy || appConfig.WatchDogIdle || appConfig.MemoryReclaimerEnabled {
|
||||
go wd.Run()
|
||||
}
|
||||
|
||||
@@ -56,7 +60,14 @@ func (a *Application) startWatchdog() error {
|
||||
}
|
||||
}()
|
||||
|
||||
log.Info().Int("lruLimit", lruLimit).Bool("busyCheck", appConfig.WatchDogBusy).Bool("idleCheck", appConfig.WatchDogIdle).Msg("Watchdog started with new settings")
|
||||
log.Info().
|
||||
Int("lruLimit", lruLimit).
|
||||
Bool("busyCheck", appConfig.WatchDogBusy).
|
||||
Bool("idleCheck", appConfig.WatchDogIdle).
|
||||
Bool("memoryReclaimer", appConfig.MemoryReclaimerEnabled).
|
||||
Float64("memoryThreshold", appConfig.MemoryReclaimerThreshold).
|
||||
Dur("interval", appConfig.WatchDogInterval).
|
||||
Msg("Watchdog started with new settings")
|
||||
} else {
|
||||
log.Info().Msg("Watchdog disabled")
|
||||
}
|
||||
|
||||
@@ -72,6 +72,8 @@ type RunCMD struct {
|
||||
WatchdogIdleTimeout string `env:"LOCALAI_WATCHDOG_IDLE_TIMEOUT,WATCHDOG_IDLE_TIMEOUT" default:"15m" help:"Threshold beyond which an idle backend should be stopped" group:"backends"`
|
||||
EnableWatchdogBusy bool `env:"LOCALAI_WATCHDOG_BUSY,WATCHDOG_BUSY" default:"false" help:"Enable watchdog for stopping backends that are busy longer than the watchdog-busy-timeout" group:"backends"`
|
||||
WatchdogBusyTimeout string `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"`
|
||||
EnableMemoryReclaimer bool `env:"LOCALAI_MEMORY_RECLAIMER,MEMORY_RECLAIMER,LOCALAI_GPU_RECLAIMER,GPU_RECLAIMER" default:"false" help:"Enable memory threshold monitoring to auto-evict backends when memory usage exceeds threshold (uses GPU VRAM if available, otherwise RAM)" group:"backends"`
|
||||
MemoryReclaimerThreshold float64 `env:"LOCALAI_MEMORY_RECLAIMER_THRESHOLD,MEMORY_RECLAIMER_THRESHOLD,LOCALAI_GPU_RECLAIMER_THRESHOLD,GPU_RECLAIMER_THRESHOLD" default:"0.95" help:"Memory usage threshold (0.0-1.0) that triggers backend eviction (default 0.95 = 95%%)" group:"backends"`
|
||||
Federated bool `env:"LOCALAI_FEDERATED,FEDERATED" help:"Enable federated instance" group:"federated"`
|
||||
DisableGalleryEndpoint bool `env:"LOCALAI_DISABLE_GALLERY_ENDPOINT,DISABLE_GALLERY_ENDPOINT" help:"Disable the gallery endpoints" group:"api"`
|
||||
MachineTag string `env:"LOCALAI_MACHINE_TAG,MACHINE_TAG" help:"Add Machine-Tag header to each response which is useful to track the machine in the P2P network" group:"api"`
|
||||
@@ -200,6 +202,12 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
||||
opts = append(opts, config.SetWatchDogBusyTimeout(dur))
|
||||
}
|
||||
}
|
||||
|
||||
// Handle memory reclaimer (uses GPU VRAM if available, otherwise RAM)
|
||||
if r.EnableMemoryReclaimer {
|
||||
opts = append(opts, config.WithMemoryReclaimer(true, r.MemoryReclaimerThreshold))
|
||||
}
|
||||
|
||||
if r.ParallelRequests {
|
||||
opts = append(opts, config.EnableParallelBackendRequests)
|
||||
}
|
||||
|
||||
@@ -60,9 +60,14 @@ type ApplicationConfig struct {
|
||||
WatchDogBusy bool
|
||||
WatchDog bool
|
||||
|
||||
// Memory Reclaimer settings (works with GPU if available, otherwise RAM)
|
||||
MemoryReclaimerEnabled bool // Enable memory threshold monitoring
|
||||
MemoryReclaimerThreshold float64 // Threshold 0.0-1.0 (e.g., 0.95 = 95%)
|
||||
|
||||
ModelsURL []string
|
||||
|
||||
WatchDogBusyTimeout, WatchDogIdleTimeout time.Duration
|
||||
WatchDogInterval time.Duration // Interval between watchdog checks
|
||||
|
||||
MachineTag string
|
||||
|
||||
@@ -187,6 +192,39 @@ func SetWatchDogIdleTimeout(t time.Duration) AppOption {
|
||||
}
|
||||
}
|
||||
|
||||
// EnableMemoryReclaimer enables memory threshold monitoring.
|
||||
// When enabled, the watchdog will evict backends if memory usage exceeds the threshold.
|
||||
// Works with GPU VRAM if available, otherwise uses system RAM.
|
||||
var EnableMemoryReclaimer = func(o *ApplicationConfig) {
|
||||
o.MemoryReclaimerEnabled = true
|
||||
o.WatchDog = true // Memory reclaimer requires watchdog infrastructure
|
||||
}
|
||||
|
||||
// SetMemoryReclaimerThreshold sets the memory usage threshold (0.0-1.0).
|
||||
// When memory usage exceeds this threshold, backends will be evicted using LRU strategy.
|
||||
func SetMemoryReclaimerThreshold(threshold float64) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
if threshold > 0 && threshold <= 1.0 {
|
||||
o.MemoryReclaimerThreshold = threshold
|
||||
o.MemoryReclaimerEnabled = true
|
||||
o.WatchDog = true // Memory reclaimer requires watchdog infrastructure
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// WithMemoryReclaimer configures the memory reclaimer with the given settings
|
||||
func WithMemoryReclaimer(enabled bool, threshold float64) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
o.MemoryReclaimerEnabled = enabled
|
||||
if threshold > 0 && threshold <= 1.0 {
|
||||
o.MemoryReclaimerThreshold = threshold
|
||||
}
|
||||
if enabled {
|
||||
o.WatchDog = true // Memory reclaimer requires watchdog infrastructure
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// EnableSingleBackend is deprecated: use SetMaxActiveBackends(1) instead.
|
||||
// This is kept for backward compatibility.
|
||||
var EnableSingleBackend = func(o *ApplicationConfig) {
|
||||
@@ -454,6 +492,208 @@ func (o *ApplicationConfig) ToConfigLoaderOptions() []ConfigLoaderOption {
|
||||
}
|
||||
}
|
||||
|
||||
// ToRuntimeSettings converts ApplicationConfig to RuntimeSettings for API responses and JSON serialization.
|
||||
// This provides a single source of truth - ApplicationConfig holds the live values,
|
||||
// and this method creates a RuntimeSettings snapshot for external consumption.
|
||||
func (o *ApplicationConfig) ToRuntimeSettings() RuntimeSettings {
|
||||
// Create local copies for pointer fields
|
||||
watchdogEnabled := o.WatchDog
|
||||
watchdogIdle := o.WatchDogIdle
|
||||
watchdogBusy := o.WatchDogBusy
|
||||
singleBackend := o.SingleBackend
|
||||
maxActiveBackends := o.MaxActiveBackends
|
||||
parallelBackendRequests := o.ParallelBackendRequests
|
||||
memoryReclaimerEnabled := o.MemoryReclaimerEnabled
|
||||
memoryReclaimerThreshold := o.MemoryReclaimerThreshold
|
||||
threads := o.Threads
|
||||
contextSize := o.ContextSize
|
||||
f16 := o.F16
|
||||
debug := o.Debug
|
||||
cors := o.CORS
|
||||
csrf := o.CSRF
|
||||
corsAllowOrigins := o.CORSAllowOrigins
|
||||
p2pToken := o.P2PToken
|
||||
p2pNetworkID := o.P2PNetworkID
|
||||
federated := o.Federated
|
||||
galleries := o.Galleries
|
||||
backendGalleries := o.BackendGalleries
|
||||
autoloadGalleries := o.AutoloadGalleries
|
||||
autoloadBackendGalleries := o.AutoloadBackendGalleries
|
||||
apiKeys := o.ApiKeys
|
||||
agentJobRetentionDays := o.AgentJobRetentionDays
|
||||
|
||||
// Format timeouts as strings
|
||||
var idleTimeout, busyTimeout, watchdogInterval string
|
||||
if o.WatchDogIdleTimeout > 0 {
|
||||
idleTimeout = o.WatchDogIdleTimeout.String()
|
||||
} else {
|
||||
idleTimeout = "15m" // default
|
||||
}
|
||||
if o.WatchDogBusyTimeout > 0 {
|
||||
busyTimeout = o.WatchDogBusyTimeout.String()
|
||||
} else {
|
||||
busyTimeout = "5m" // default
|
||||
}
|
||||
if o.WatchDogInterval > 0 {
|
||||
watchdogInterval = o.WatchDogInterval.String()
|
||||
} else {
|
||||
watchdogInterval = "2s" // default
|
||||
}
|
||||
|
||||
return RuntimeSettings{
|
||||
WatchdogEnabled: &watchdogEnabled,
|
||||
WatchdogIdleEnabled: &watchdogIdle,
|
||||
WatchdogBusyEnabled: &watchdogBusy,
|
||||
WatchdogIdleTimeout: &idleTimeout,
|
||||
WatchdogBusyTimeout: &busyTimeout,
|
||||
WatchdogInterval: &watchdogInterval,
|
||||
SingleBackend: &singleBackend,
|
||||
MaxActiveBackends: &maxActiveBackends,
|
||||
ParallelBackendRequests: ¶llelBackendRequests,
|
||||
MemoryReclaimerEnabled: &memoryReclaimerEnabled,
|
||||
MemoryReclaimerThreshold: &memoryReclaimerThreshold,
|
||||
Threads: &threads,
|
||||
ContextSize: &contextSize,
|
||||
F16: &f16,
|
||||
Debug: &debug,
|
||||
CORS: &cors,
|
||||
CSRF: &csrf,
|
||||
CORSAllowOrigins: &corsAllowOrigins,
|
||||
P2PToken: &p2pToken,
|
||||
P2PNetworkID: &p2pNetworkID,
|
||||
Federated: &federated,
|
||||
Galleries: &galleries,
|
||||
BackendGalleries: &backendGalleries,
|
||||
AutoloadGalleries: &autoloadGalleries,
|
||||
AutoloadBackendGalleries: &autoloadBackendGalleries,
|
||||
ApiKeys: &apiKeys,
|
||||
AgentJobRetentionDays: &agentJobRetentionDays,
|
||||
}
|
||||
}
|
||||
|
||||
// ApplyRuntimeSettings applies RuntimeSettings to ApplicationConfig.
|
||||
// Only non-nil fields in RuntimeSettings are applied.
|
||||
// Returns true if watchdog-related settings changed (requiring restart).
|
||||
func (o *ApplicationConfig) ApplyRuntimeSettings(settings *RuntimeSettings) (requireRestart bool) {
|
||||
if settings == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
if settings.WatchdogEnabled != nil {
|
||||
o.WatchDog = *settings.WatchdogEnabled
|
||||
requireRestart = true
|
||||
}
|
||||
if settings.WatchdogIdleEnabled != nil {
|
||||
o.WatchDogIdle = *settings.WatchdogIdleEnabled
|
||||
if o.WatchDogIdle {
|
||||
o.WatchDog = true
|
||||
}
|
||||
requireRestart = true
|
||||
}
|
||||
if settings.WatchdogBusyEnabled != nil {
|
||||
o.WatchDogBusy = *settings.WatchdogBusyEnabled
|
||||
if o.WatchDogBusy {
|
||||
o.WatchDog = true
|
||||
}
|
||||
requireRestart = true
|
||||
}
|
||||
if settings.WatchdogIdleTimeout != nil {
|
||||
if dur, err := time.ParseDuration(*settings.WatchdogIdleTimeout); err == nil {
|
||||
o.WatchDogIdleTimeout = dur
|
||||
requireRestart = true
|
||||
}
|
||||
}
|
||||
if settings.WatchdogBusyTimeout != nil {
|
||||
if dur, err := time.ParseDuration(*settings.WatchdogBusyTimeout); err == nil {
|
||||
o.WatchDogBusyTimeout = dur
|
||||
requireRestart = true
|
||||
}
|
||||
}
|
||||
if settings.WatchdogInterval != nil {
|
||||
if dur, err := time.ParseDuration(*settings.WatchdogInterval); err == nil {
|
||||
o.WatchDogInterval = dur
|
||||
requireRestart = true
|
||||
}
|
||||
}
|
||||
if settings.MaxActiveBackends != nil {
|
||||
o.MaxActiveBackends = *settings.MaxActiveBackends
|
||||
o.SingleBackend = (*settings.MaxActiveBackends == 1)
|
||||
requireRestart = true
|
||||
} else if settings.SingleBackend != nil {
|
||||
o.SingleBackend = *settings.SingleBackend
|
||||
if *settings.SingleBackend {
|
||||
o.MaxActiveBackends = 1
|
||||
} else {
|
||||
o.MaxActiveBackends = 0
|
||||
}
|
||||
requireRestart = true
|
||||
}
|
||||
if settings.ParallelBackendRequests != nil {
|
||||
o.ParallelBackendRequests = *settings.ParallelBackendRequests
|
||||
}
|
||||
if settings.MemoryReclaimerEnabled != nil {
|
||||
o.MemoryReclaimerEnabled = *settings.MemoryReclaimerEnabled
|
||||
if *settings.MemoryReclaimerEnabled {
|
||||
o.WatchDog = true
|
||||
}
|
||||
requireRestart = true
|
||||
}
|
||||
if settings.MemoryReclaimerThreshold != nil {
|
||||
if *settings.MemoryReclaimerThreshold > 0 && *settings.MemoryReclaimerThreshold <= 1.0 {
|
||||
o.MemoryReclaimerThreshold = *settings.MemoryReclaimerThreshold
|
||||
requireRestart = true
|
||||
}
|
||||
}
|
||||
if settings.Threads != nil {
|
||||
o.Threads = *settings.Threads
|
||||
}
|
||||
if settings.ContextSize != nil {
|
||||
o.ContextSize = *settings.ContextSize
|
||||
}
|
||||
if settings.F16 != nil {
|
||||
o.F16 = *settings.F16
|
||||
}
|
||||
if settings.Debug != nil {
|
||||
o.Debug = *settings.Debug
|
||||
}
|
||||
if settings.CORS != nil {
|
||||
o.CORS = *settings.CORS
|
||||
}
|
||||
if settings.CSRF != nil {
|
||||
o.CSRF = *settings.CSRF
|
||||
}
|
||||
if settings.CORSAllowOrigins != nil {
|
||||
o.CORSAllowOrigins = *settings.CORSAllowOrigins
|
||||
}
|
||||
if settings.P2PToken != nil {
|
||||
o.P2PToken = *settings.P2PToken
|
||||
}
|
||||
if settings.P2PNetworkID != nil {
|
||||
o.P2PNetworkID = *settings.P2PNetworkID
|
||||
}
|
||||
if settings.Federated != nil {
|
||||
o.Federated = *settings.Federated
|
||||
}
|
||||
if settings.Galleries != nil {
|
||||
o.Galleries = *settings.Galleries
|
||||
}
|
||||
if settings.BackendGalleries != nil {
|
||||
o.BackendGalleries = *settings.BackendGalleries
|
||||
}
|
||||
if settings.AutoloadGalleries != nil {
|
||||
o.AutoloadGalleries = *settings.AutoloadGalleries
|
||||
}
|
||||
if settings.AutoloadBackendGalleries != nil {
|
||||
o.AutoloadBackendGalleries = *settings.AutoloadBackendGalleries
|
||||
}
|
||||
if settings.AgentJobRetentionDays != nil {
|
||||
o.AgentJobRetentionDays = *settings.AgentJobRetentionDays
|
||||
}
|
||||
// Note: ApiKeys requires special handling (merging with startup keys) - handled in caller
|
||||
|
||||
return requireRestart
|
||||
}
|
||||
|
||||
// func WithMetrics(meter *metrics.Metrics) AppOption {
|
||||
// return func(o *StartupOptions) {
|
||||
// o.Metrics = meter
|
||||
|
||||
@@ -0,0 +1,577 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
var _ = Describe("ApplicationConfig RuntimeSettings Conversion", func() {
|
||||
Describe("ToRuntimeSettings", func() {
|
||||
It("should convert all fields correctly", func() {
|
||||
appConfig := &ApplicationConfig{
|
||||
WatchDog: true,
|
||||
WatchDogIdle: true,
|
||||
WatchDogBusy: true,
|
||||
WatchDogIdleTimeout: 20 * time.Minute,
|
||||
WatchDogBusyTimeout: 10 * time.Minute,
|
||||
SingleBackend: false,
|
||||
MaxActiveBackends: 5,
|
||||
ParallelBackendRequests: true,
|
||||
MemoryReclaimerEnabled: true,
|
||||
MemoryReclaimerThreshold: 0.85,
|
||||
Threads: 8,
|
||||
ContextSize: 4096,
|
||||
F16: true,
|
||||
Debug: true,
|
||||
CORS: true,
|
||||
CSRF: true,
|
||||
CORSAllowOrigins: "https://example.com",
|
||||
P2PToken: "test-token",
|
||||
P2PNetworkID: "test-network",
|
||||
Federated: true,
|
||||
Galleries: []Gallery{{Name: "test-gallery", URL: "https://example.com"}},
|
||||
BackendGalleries: []Gallery{{Name: "backend-gallery", URL: "https://example.com/backend"}},
|
||||
AutoloadGalleries: true,
|
||||
AutoloadBackendGalleries: true,
|
||||
ApiKeys: []string{"key1", "key2"},
|
||||
AgentJobRetentionDays: 30,
|
||||
}
|
||||
|
||||
rs := appConfig.ToRuntimeSettings()
|
||||
|
||||
Expect(rs.WatchdogEnabled).ToNot(BeNil())
|
||||
Expect(*rs.WatchdogEnabled).To(BeTrue())
|
||||
|
||||
Expect(rs.WatchdogIdleEnabled).ToNot(BeNil())
|
||||
Expect(*rs.WatchdogIdleEnabled).To(BeTrue())
|
||||
|
||||
Expect(rs.WatchdogBusyEnabled).ToNot(BeNil())
|
||||
Expect(*rs.WatchdogBusyEnabled).To(BeTrue())
|
||||
|
||||
Expect(rs.WatchdogIdleTimeout).ToNot(BeNil())
|
||||
Expect(*rs.WatchdogIdleTimeout).To(Equal("20m0s"))
|
||||
|
||||
Expect(rs.WatchdogBusyTimeout).ToNot(BeNil())
|
||||
Expect(*rs.WatchdogBusyTimeout).To(Equal("10m0s"))
|
||||
|
||||
Expect(rs.SingleBackend).ToNot(BeNil())
|
||||
Expect(*rs.SingleBackend).To(BeFalse())
|
||||
|
||||
Expect(rs.MaxActiveBackends).ToNot(BeNil())
|
||||
Expect(*rs.MaxActiveBackends).To(Equal(5))
|
||||
|
||||
Expect(rs.ParallelBackendRequests).ToNot(BeNil())
|
||||
Expect(*rs.ParallelBackendRequests).To(BeTrue())
|
||||
|
||||
Expect(rs.MemoryReclaimerEnabled).ToNot(BeNil())
|
||||
Expect(*rs.MemoryReclaimerEnabled).To(BeTrue())
|
||||
|
||||
Expect(rs.MemoryReclaimerThreshold).ToNot(BeNil())
|
||||
Expect(*rs.MemoryReclaimerThreshold).To(Equal(0.85))
|
||||
|
||||
Expect(rs.Threads).ToNot(BeNil())
|
||||
Expect(*rs.Threads).To(Equal(8))
|
||||
|
||||
Expect(rs.ContextSize).ToNot(BeNil())
|
||||
Expect(*rs.ContextSize).To(Equal(4096))
|
||||
|
||||
Expect(rs.F16).ToNot(BeNil())
|
||||
Expect(*rs.F16).To(BeTrue())
|
||||
|
||||
Expect(rs.Debug).ToNot(BeNil())
|
||||
Expect(*rs.Debug).To(BeTrue())
|
||||
|
||||
Expect(rs.CORS).ToNot(BeNil())
|
||||
Expect(*rs.CORS).To(BeTrue())
|
||||
|
||||
Expect(rs.CSRF).ToNot(BeNil())
|
||||
Expect(*rs.CSRF).To(BeTrue())
|
||||
|
||||
Expect(rs.CORSAllowOrigins).ToNot(BeNil())
|
||||
Expect(*rs.CORSAllowOrigins).To(Equal("https://example.com"))
|
||||
|
||||
Expect(rs.P2PToken).ToNot(BeNil())
|
||||
Expect(*rs.P2PToken).To(Equal("test-token"))
|
||||
|
||||
Expect(rs.P2PNetworkID).ToNot(BeNil())
|
||||
Expect(*rs.P2PNetworkID).To(Equal("test-network"))
|
||||
|
||||
Expect(rs.Federated).ToNot(BeNil())
|
||||
Expect(*rs.Federated).To(BeTrue())
|
||||
|
||||
Expect(rs.Galleries).ToNot(BeNil())
|
||||
Expect(*rs.Galleries).To(HaveLen(1))
|
||||
Expect((*rs.Galleries)[0].Name).To(Equal("test-gallery"))
|
||||
|
||||
Expect(rs.BackendGalleries).ToNot(BeNil())
|
||||
Expect(*rs.BackendGalleries).To(HaveLen(1))
|
||||
Expect((*rs.BackendGalleries)[0].Name).To(Equal("backend-gallery"))
|
||||
|
||||
Expect(rs.AutoloadGalleries).ToNot(BeNil())
|
||||
Expect(*rs.AutoloadGalleries).To(BeTrue())
|
||||
|
||||
Expect(rs.AutoloadBackendGalleries).ToNot(BeNil())
|
||||
Expect(*rs.AutoloadBackendGalleries).To(BeTrue())
|
||||
|
||||
Expect(rs.ApiKeys).ToNot(BeNil())
|
||||
Expect(*rs.ApiKeys).To(HaveLen(2))
|
||||
Expect(*rs.ApiKeys).To(ContainElements("key1", "key2"))
|
||||
|
||||
Expect(rs.AgentJobRetentionDays).ToNot(BeNil())
|
||||
Expect(*rs.AgentJobRetentionDays).To(Equal(30))
|
||||
})
|
||||
|
||||
It("should use default timeouts when not set", func() {
|
||||
appConfig := &ApplicationConfig{}
|
||||
|
||||
rs := appConfig.ToRuntimeSettings()
|
||||
|
||||
Expect(rs.WatchdogIdleTimeout).ToNot(BeNil())
|
||||
Expect(*rs.WatchdogIdleTimeout).To(Equal("15m"))
|
||||
|
||||
Expect(rs.WatchdogBusyTimeout).ToNot(BeNil())
|
||||
Expect(*rs.WatchdogBusyTimeout).To(Equal("5m"))
|
||||
})
|
||||
})
|
||||
|
||||
Describe("ApplyRuntimeSettings", func() {
|
||||
It("should return false when settings is nil", func() {
|
||||
appConfig := &ApplicationConfig{}
|
||||
changed := appConfig.ApplyRuntimeSettings(nil)
|
||||
Expect(changed).To(BeFalse())
|
||||
})
|
||||
|
||||
It("should only apply non-nil fields", func() {
|
||||
appConfig := &ApplicationConfig{
|
||||
WatchDog: false,
|
||||
Threads: 4,
|
||||
ContextSize: 2048,
|
||||
}
|
||||
|
||||
watchdogEnabled := true
|
||||
rs := &RuntimeSettings{
|
||||
WatchdogEnabled: &watchdogEnabled,
|
||||
// Leave other fields nil
|
||||
}
|
||||
|
||||
changed := appConfig.ApplyRuntimeSettings(rs)
|
||||
|
||||
Expect(changed).To(BeTrue())
|
||||
Expect(appConfig.WatchDog).To(BeTrue())
|
||||
// Unchanged fields should remain
|
||||
Expect(appConfig.Threads).To(Equal(4))
|
||||
Expect(appConfig.ContextSize).To(Equal(2048))
|
||||
})
|
||||
|
||||
It("should apply watchdog settings and return changed=true", func() {
|
||||
appConfig := &ApplicationConfig{}
|
||||
|
||||
watchdogEnabled := true
|
||||
watchdogIdle := true
|
||||
watchdogBusy := true
|
||||
idleTimeout := "30m"
|
||||
busyTimeout := "15m"
|
||||
|
||||
rs := &RuntimeSettings{
|
||||
WatchdogEnabled: &watchdogEnabled,
|
||||
WatchdogIdleEnabled: &watchdogIdle,
|
||||
WatchdogBusyEnabled: &watchdogBusy,
|
||||
WatchdogIdleTimeout: &idleTimeout,
|
||||
WatchdogBusyTimeout: &busyTimeout,
|
||||
}
|
||||
|
||||
changed := appConfig.ApplyRuntimeSettings(rs)
|
||||
|
||||
Expect(changed).To(BeTrue())
|
||||
Expect(appConfig.WatchDog).To(BeTrue())
|
||||
Expect(appConfig.WatchDogIdle).To(BeTrue())
|
||||
Expect(appConfig.WatchDogBusy).To(BeTrue())
|
||||
Expect(appConfig.WatchDogIdleTimeout).To(Equal(30 * time.Minute))
|
||||
Expect(appConfig.WatchDogBusyTimeout).To(Equal(15 * time.Minute))
|
||||
})
|
||||
|
||||
It("should enable watchdog when idle is enabled", func() {
|
||||
appConfig := &ApplicationConfig{WatchDog: false}
|
||||
|
||||
watchdogIdle := true
|
||||
rs := &RuntimeSettings{
|
||||
WatchdogIdleEnabled: &watchdogIdle,
|
||||
}
|
||||
|
||||
appConfig.ApplyRuntimeSettings(rs)
|
||||
|
||||
Expect(appConfig.WatchDog).To(BeTrue())
|
||||
Expect(appConfig.WatchDogIdle).To(BeTrue())
|
||||
})
|
||||
|
||||
It("should enable watchdog when busy is enabled", func() {
|
||||
appConfig := &ApplicationConfig{WatchDog: false}
|
||||
|
||||
watchdogBusy := true
|
||||
rs := &RuntimeSettings{
|
||||
WatchdogBusyEnabled: &watchdogBusy,
|
||||
}
|
||||
|
||||
appConfig.ApplyRuntimeSettings(rs)
|
||||
|
||||
Expect(appConfig.WatchDog).To(BeTrue())
|
||||
Expect(appConfig.WatchDogBusy).To(BeTrue())
|
||||
})
|
||||
|
||||
It("should handle MaxActiveBackends and update SingleBackend accordingly", func() {
|
||||
appConfig := &ApplicationConfig{}
|
||||
|
||||
maxBackends := 1
|
||||
rs := &RuntimeSettings{
|
||||
MaxActiveBackends: &maxBackends,
|
||||
}
|
||||
|
||||
appConfig.ApplyRuntimeSettings(rs)
|
||||
|
||||
Expect(appConfig.MaxActiveBackends).To(Equal(1))
|
||||
Expect(appConfig.SingleBackend).To(BeTrue())
|
||||
|
||||
// Test with multiple backends
|
||||
maxBackends = 5
|
||||
rs = &RuntimeSettings{
|
||||
MaxActiveBackends: &maxBackends,
|
||||
}
|
||||
|
||||
appConfig.ApplyRuntimeSettings(rs)
|
||||
|
||||
Expect(appConfig.MaxActiveBackends).To(Equal(5))
|
||||
Expect(appConfig.SingleBackend).To(BeFalse())
|
||||
})
|
||||
|
||||
It("should handle SingleBackend and update MaxActiveBackends accordingly", func() {
|
||||
appConfig := &ApplicationConfig{}
|
||||
|
||||
singleBackend := true
|
||||
rs := &RuntimeSettings{
|
||||
SingleBackend: &singleBackend,
|
||||
}
|
||||
|
||||
appConfig.ApplyRuntimeSettings(rs)
|
||||
|
||||
Expect(appConfig.SingleBackend).To(BeTrue())
|
||||
Expect(appConfig.MaxActiveBackends).To(Equal(1))
|
||||
|
||||
// Test disabling single backend
|
||||
singleBackend = false
|
||||
rs = &RuntimeSettings{
|
||||
SingleBackend: &singleBackend,
|
||||
}
|
||||
|
||||
appConfig.ApplyRuntimeSettings(rs)
|
||||
|
||||
Expect(appConfig.SingleBackend).To(BeFalse())
|
||||
Expect(appConfig.MaxActiveBackends).To(Equal(0))
|
||||
})
|
||||
|
||||
It("should enable watchdog when memory reclaimer is enabled", func() {
|
||||
appConfig := &ApplicationConfig{WatchDog: false}
|
||||
|
||||
memoryEnabled := true
|
||||
threshold := 0.90
|
||||
rs := &RuntimeSettings{
|
||||
MemoryReclaimerEnabled: &memoryEnabled,
|
||||
MemoryReclaimerThreshold: &threshold,
|
||||
}
|
||||
|
||||
changed := appConfig.ApplyRuntimeSettings(rs)
|
||||
|
||||
Expect(changed).To(BeTrue())
|
||||
Expect(appConfig.WatchDog).To(BeTrue())
|
||||
Expect(appConfig.MemoryReclaimerEnabled).To(BeTrue())
|
||||
Expect(appConfig.MemoryReclaimerThreshold).To(Equal(0.90))
|
||||
})
|
||||
|
||||
It("should reject invalid memory threshold values", func() {
|
||||
appConfig := &ApplicationConfig{MemoryReclaimerThreshold: 0.50}
|
||||
|
||||
// Test threshold > 1.0
|
||||
invalidThreshold := 1.5
|
||||
rs := &RuntimeSettings{
|
||||
MemoryReclaimerThreshold: &invalidThreshold,
|
||||
}
|
||||
appConfig.ApplyRuntimeSettings(rs)
|
||||
Expect(appConfig.MemoryReclaimerThreshold).To(Equal(0.50)) // Should remain unchanged
|
||||
|
||||
// Test threshold <= 0
|
||||
invalidThreshold = 0.0
|
||||
rs = &RuntimeSettings{
|
||||
MemoryReclaimerThreshold: &invalidThreshold,
|
||||
}
|
||||
appConfig.ApplyRuntimeSettings(rs)
|
||||
Expect(appConfig.MemoryReclaimerThreshold).To(Equal(0.50)) // Should remain unchanged
|
||||
|
||||
// Test negative threshold
|
||||
invalidThreshold = -0.5
|
||||
rs = &RuntimeSettings{
|
||||
MemoryReclaimerThreshold: &invalidThreshold,
|
||||
}
|
||||
appConfig.ApplyRuntimeSettings(rs)
|
||||
Expect(appConfig.MemoryReclaimerThreshold).To(Equal(0.50)) // Should remain unchanged
|
||||
})
|
||||
|
||||
It("should accept valid memory threshold at boundary", func() {
|
||||
appConfig := &ApplicationConfig{}
|
||||
|
||||
// Test threshold = 1.0 (maximum valid)
|
||||
threshold := 1.0
|
||||
rs := &RuntimeSettings{
|
||||
MemoryReclaimerThreshold: &threshold,
|
||||
}
|
||||
appConfig.ApplyRuntimeSettings(rs)
|
||||
Expect(appConfig.MemoryReclaimerThreshold).To(Equal(1.0))
|
||||
|
||||
// Test threshold just above 0
|
||||
threshold = 0.01
|
||||
rs = &RuntimeSettings{
|
||||
MemoryReclaimerThreshold: &threshold,
|
||||
}
|
||||
appConfig.ApplyRuntimeSettings(rs)
|
||||
Expect(appConfig.MemoryReclaimerThreshold).To(Equal(0.01))
|
||||
})
|
||||
|
||||
It("should apply performance settings without triggering watchdog change", func() {
|
||||
appConfig := &ApplicationConfig{}
|
||||
|
||||
threads := 16
|
||||
contextSize := 8192
|
||||
f16 := true
|
||||
debug := true
|
||||
|
||||
rs := &RuntimeSettings{
|
||||
Threads: &threads,
|
||||
ContextSize: &contextSize,
|
||||
F16: &f16,
|
||||
Debug: &debug,
|
||||
}
|
||||
|
||||
changed := appConfig.ApplyRuntimeSettings(rs)
|
||||
|
||||
// These settings don't require watchdog restart
|
||||
Expect(changed).To(BeFalse())
|
||||
Expect(appConfig.Threads).To(Equal(16))
|
||||
Expect(appConfig.ContextSize).To(Equal(8192))
|
||||
Expect(appConfig.F16).To(BeTrue())
|
||||
Expect(appConfig.Debug).To(BeTrue())
|
||||
})
|
||||
|
||||
It("should apply CORS and security settings", func() {
|
||||
appConfig := &ApplicationConfig{}
|
||||
|
||||
cors := true
|
||||
csrf := true
|
||||
origins := "https://example.com,https://other.com"
|
||||
|
||||
rs := &RuntimeSettings{
|
||||
CORS: &cors,
|
||||
CSRF: &csrf,
|
||||
CORSAllowOrigins: &origins,
|
||||
}
|
||||
|
||||
appConfig.ApplyRuntimeSettings(rs)
|
||||
|
||||
Expect(appConfig.CORS).To(BeTrue())
|
||||
Expect(appConfig.CSRF).To(BeTrue())
|
||||
Expect(appConfig.CORSAllowOrigins).To(Equal("https://example.com,https://other.com"))
|
||||
})
|
||||
|
||||
It("should apply P2P settings", func() {
|
||||
appConfig := &ApplicationConfig{}
|
||||
|
||||
token := "p2p-test-token"
|
||||
networkID := "p2p-test-network"
|
||||
federated := true
|
||||
|
||||
rs := &RuntimeSettings{
|
||||
P2PToken: &token,
|
||||
P2PNetworkID: &networkID,
|
||||
Federated: &federated,
|
||||
}
|
||||
|
||||
appConfig.ApplyRuntimeSettings(rs)
|
||||
|
||||
Expect(appConfig.P2PToken).To(Equal("p2p-test-token"))
|
||||
Expect(appConfig.P2PNetworkID).To(Equal("p2p-test-network"))
|
||||
Expect(appConfig.Federated).To(BeTrue())
|
||||
})
|
||||
|
||||
It("should apply gallery settings", func() {
|
||||
appConfig := &ApplicationConfig{}
|
||||
|
||||
galleries := []Gallery{
|
||||
{Name: "gallery1", URL: "https://gallery1.com"},
|
||||
{Name: "gallery2", URL: "https://gallery2.com"},
|
||||
}
|
||||
backendGalleries := []Gallery{
|
||||
{Name: "backend-gallery", URL: "https://backend.com"},
|
||||
}
|
||||
autoload := true
|
||||
autoloadBackend := true
|
||||
|
||||
rs := &RuntimeSettings{
|
||||
Galleries: &galleries,
|
||||
BackendGalleries: &backendGalleries,
|
||||
AutoloadGalleries: &autoload,
|
||||
AutoloadBackendGalleries: &autoloadBackend,
|
||||
}
|
||||
|
||||
appConfig.ApplyRuntimeSettings(rs)
|
||||
|
||||
Expect(appConfig.Galleries).To(HaveLen(2))
|
||||
Expect(appConfig.Galleries[0].Name).To(Equal("gallery1"))
|
||||
Expect(appConfig.BackendGalleries).To(HaveLen(1))
|
||||
Expect(appConfig.AutoloadGalleries).To(BeTrue())
|
||||
Expect(appConfig.AutoloadBackendGalleries).To(BeTrue())
|
||||
})
|
||||
|
||||
It("should apply agent settings", func() {
|
||||
appConfig := &ApplicationConfig{}
|
||||
|
||||
retentionDays := 14
|
||||
|
||||
rs := &RuntimeSettings{
|
||||
AgentJobRetentionDays: &retentionDays,
|
||||
}
|
||||
|
||||
appConfig.ApplyRuntimeSettings(rs)
|
||||
|
||||
Expect(appConfig.AgentJobRetentionDays).To(Equal(14))
|
||||
})
|
||||
})
|
||||
|
||||
Describe("Round-trip conversion", func() {
|
||||
It("should maintain values through ToRuntimeSettings -> ApplyRuntimeSettings", func() {
|
||||
original := &ApplicationConfig{
|
||||
WatchDog: true,
|
||||
WatchDogIdle: true,
|
||||
WatchDogBusy: false,
|
||||
WatchDogIdleTimeout: 25 * time.Minute,
|
||||
WatchDogBusyTimeout: 12 * time.Minute,
|
||||
SingleBackend: false,
|
||||
MaxActiveBackends: 3,
|
||||
ParallelBackendRequests: true,
|
||||
MemoryReclaimerEnabled: true,
|
||||
MemoryReclaimerThreshold: 0.92,
|
||||
Threads: 12,
|
||||
ContextSize: 6144,
|
||||
F16: true,
|
||||
Debug: false,
|
||||
CORS: true,
|
||||
CSRF: false,
|
||||
CORSAllowOrigins: "https://test.com",
|
||||
P2PToken: "round-trip-token",
|
||||
P2PNetworkID: "round-trip-network",
|
||||
Federated: true,
|
||||
AutoloadGalleries: true,
|
||||
AutoloadBackendGalleries: false,
|
||||
AgentJobRetentionDays: 60,
|
||||
}
|
||||
|
||||
// Convert to RuntimeSettings
|
||||
rs := original.ToRuntimeSettings()
|
||||
|
||||
// Apply to a new ApplicationConfig
|
||||
target := &ApplicationConfig{}
|
||||
target.ApplyRuntimeSettings(&rs)
|
||||
|
||||
// Verify all values match
|
||||
Expect(target.WatchDog).To(Equal(original.WatchDog))
|
||||
Expect(target.WatchDogIdle).To(Equal(original.WatchDogIdle))
|
||||
Expect(target.WatchDogBusy).To(Equal(original.WatchDogBusy))
|
||||
Expect(target.WatchDogIdleTimeout).To(Equal(original.WatchDogIdleTimeout))
|
||||
Expect(target.WatchDogBusyTimeout).To(Equal(original.WatchDogBusyTimeout))
|
||||
Expect(target.MaxActiveBackends).To(Equal(original.MaxActiveBackends))
|
||||
Expect(target.ParallelBackendRequests).To(Equal(original.ParallelBackendRequests))
|
||||
Expect(target.MemoryReclaimerEnabled).To(Equal(original.MemoryReclaimerEnabled))
|
||||
Expect(target.MemoryReclaimerThreshold).To(Equal(original.MemoryReclaimerThreshold))
|
||||
Expect(target.Threads).To(Equal(original.Threads))
|
||||
Expect(target.ContextSize).To(Equal(original.ContextSize))
|
||||
Expect(target.F16).To(Equal(original.F16))
|
||||
Expect(target.Debug).To(Equal(original.Debug))
|
||||
Expect(target.CORS).To(Equal(original.CORS))
|
||||
Expect(target.CSRF).To(Equal(original.CSRF))
|
||||
Expect(target.CORSAllowOrigins).To(Equal(original.CORSAllowOrigins))
|
||||
Expect(target.P2PToken).To(Equal(original.P2PToken))
|
||||
Expect(target.P2PNetworkID).To(Equal(original.P2PNetworkID))
|
||||
Expect(target.Federated).To(Equal(original.Federated))
|
||||
Expect(target.AutoloadGalleries).To(Equal(original.AutoloadGalleries))
|
||||
Expect(target.AutoloadBackendGalleries).To(Equal(original.AutoloadBackendGalleries))
|
||||
Expect(target.AgentJobRetentionDays).To(Equal(original.AgentJobRetentionDays))
|
||||
})
|
||||
|
||||
It("should handle empty galleries correctly in round-trip", func() {
|
||||
original := &ApplicationConfig{
|
||||
Galleries: []Gallery{},
|
||||
BackendGalleries: []Gallery{},
|
||||
ApiKeys: []string{},
|
||||
}
|
||||
|
||||
rs := original.ToRuntimeSettings()
|
||||
target := &ApplicationConfig{}
|
||||
target.ApplyRuntimeSettings(&rs)
|
||||
|
||||
Expect(target.Galleries).To(BeEmpty())
|
||||
Expect(target.BackendGalleries).To(BeEmpty())
|
||||
})
|
||||
})
|
||||
|
||||
Describe("Edge cases", func() {
|
||||
It("should handle invalid timeout string in ApplyRuntimeSettings", func() {
|
||||
appConfig := &ApplicationConfig{
|
||||
WatchDogIdleTimeout: 10 * time.Minute,
|
||||
}
|
||||
|
||||
invalidTimeout := "not-a-duration"
|
||||
rs := &RuntimeSettings{
|
||||
WatchdogIdleTimeout: &invalidTimeout,
|
||||
}
|
||||
|
||||
appConfig.ApplyRuntimeSettings(rs)
|
||||
|
||||
// Should remain unchanged due to parse error
|
||||
Expect(appConfig.WatchDogIdleTimeout).To(Equal(10 * time.Minute))
|
||||
})
|
||||
|
||||
It("should handle zero values in ApplicationConfig", func() {
|
||||
appConfig := &ApplicationConfig{
|
||||
// All zero values
|
||||
}
|
||||
|
||||
rs := appConfig.ToRuntimeSettings()
|
||||
|
||||
// Should still have non-nil pointers with zero/default values
|
||||
Expect(rs.WatchdogEnabled).ToNot(BeNil())
|
||||
Expect(*rs.WatchdogEnabled).To(BeFalse())
|
||||
|
||||
Expect(rs.Threads).ToNot(BeNil())
|
||||
Expect(*rs.Threads).To(Equal(0))
|
||||
|
||||
Expect(rs.MemoryReclaimerThreshold).ToNot(BeNil())
|
||||
Expect(*rs.MemoryReclaimerThreshold).To(Equal(0.0))
|
||||
})
|
||||
|
||||
It("should prefer MaxActiveBackends over SingleBackend when both are set", func() {
|
||||
appConfig := &ApplicationConfig{}
|
||||
|
||||
maxBackends := 3
|
||||
singleBackend := true
|
||||
|
||||
rs := &RuntimeSettings{
|
||||
MaxActiveBackends: &maxBackends,
|
||||
SingleBackend: &singleBackend,
|
||||
}
|
||||
|
||||
appConfig.ApplyRuntimeSettings(rs)
|
||||
|
||||
// MaxActiveBackends should take precedence
|
||||
Expect(appConfig.MaxActiveBackends).To(Equal(3))
|
||||
Expect(appConfig.SingleBackend).To(BeFalse()) // 3 != 1, so single backend is false
|
||||
})
|
||||
})
|
||||
})
|
||||
@@ -0,0 +1,56 @@
|
||||
package config
|
||||
|
||||
// RuntimeSettings represents runtime configuration that can be changed dynamically.
|
||||
// This struct is used for:
|
||||
// - API responses (GET /api/settings)
|
||||
// - API requests (POST /api/settings)
|
||||
// - Persisting to runtime_settings.json
|
||||
// - Loading from runtime_settings.json on startup
|
||||
//
|
||||
// All fields are pointers to distinguish between "not set" and "set to zero/false value".
|
||||
type RuntimeSettings struct {
|
||||
// Watchdog settings
|
||||
WatchdogEnabled *bool `json:"watchdog_enabled,omitempty"`
|
||||
WatchdogIdleEnabled *bool `json:"watchdog_idle_enabled,omitempty"`
|
||||
WatchdogBusyEnabled *bool `json:"watchdog_busy_enabled,omitempty"`
|
||||
WatchdogIdleTimeout *string `json:"watchdog_idle_timeout,omitempty"`
|
||||
WatchdogBusyTimeout *string `json:"watchdog_busy_timeout,omitempty"`
|
||||
WatchdogInterval *string `json:"watchdog_interval,omitempty"` // Interval between watchdog checks (e.g., 2s, 30s)
|
||||
|
||||
// Backend management
|
||||
SingleBackend *bool `json:"single_backend,omitempty"` // Deprecated: use MaxActiveBackends = 1 instead
|
||||
MaxActiveBackends *int `json:"max_active_backends,omitempty"` // Maximum number of active backends (0 = unlimited, 1 = single backend mode)
|
||||
ParallelBackendRequests *bool `json:"parallel_backend_requests,omitempty"`
|
||||
|
||||
// Memory Reclaimer settings (works with GPU if available, otherwise RAM)
|
||||
MemoryReclaimerEnabled *bool `json:"memory_reclaimer_enabled,omitempty"` // Enable memory threshold monitoring
|
||||
MemoryReclaimerThreshold *float64 `json:"memory_reclaimer_threshold,omitempty"` // Threshold 0.0-1.0 (e.g., 0.95 = 95%)
|
||||
|
||||
// Performance settings
|
||||
Threads *int `json:"threads,omitempty"`
|
||||
ContextSize *int `json:"context_size,omitempty"`
|
||||
F16 *bool `json:"f16,omitempty"`
|
||||
Debug *bool `json:"debug,omitempty"`
|
||||
|
||||
// Security/CORS settings
|
||||
CORS *bool `json:"cors,omitempty"`
|
||||
CSRF *bool `json:"csrf,omitempty"`
|
||||
CORSAllowOrigins *string `json:"cors_allow_origins,omitempty"`
|
||||
|
||||
// P2P settings
|
||||
P2PToken *string `json:"p2p_token,omitempty"`
|
||||
P2PNetworkID *string `json:"p2p_network_id,omitempty"`
|
||||
Federated *bool `json:"federated,omitempty"`
|
||||
|
||||
// Gallery settings
|
||||
Galleries *[]Gallery `json:"galleries,omitempty"`
|
||||
BackendGalleries *[]Gallery `json:"backend_galleries,omitempty"`
|
||||
AutoloadGalleries *bool `json:"autoload_galleries,omitempty"`
|
||||
AutoloadBackendGalleries *bool `json:"autoload_backend_galleries,omitempty"`
|
||||
|
||||
// API keys - No omitempty as we need to save empty arrays to clear keys
|
||||
ApiKeys *[]string `json:"api_keys"`
|
||||
|
||||
// Agent settings
|
||||
AgentJobRetentionDays *int `json:"agent_job_retention_days,omitempty"`
|
||||
}
|
||||
@@ -12,115 +12,15 @@ import (
|
||||
"github.com/mudler/LocalAI/core/application"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/p2p"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
type SettingsResponse struct {
|
||||
Success bool `json:"success"`
|
||||
Error string `json:"error,omitempty"`
|
||||
Message string `json:"message,omitempty"`
|
||||
}
|
||||
|
||||
type RuntimeSettings struct {
|
||||
WatchdogEnabled *bool `json:"watchdog_enabled,omitempty"`
|
||||
WatchdogIdleEnabled *bool `json:"watchdog_idle_enabled,omitempty"`
|
||||
WatchdogBusyEnabled *bool `json:"watchdog_busy_enabled,omitempty"`
|
||||
WatchdogIdleTimeout *string `json:"watchdog_idle_timeout,omitempty"`
|
||||
WatchdogBusyTimeout *string `json:"watchdog_busy_timeout,omitempty"`
|
||||
SingleBackend *bool `json:"single_backend,omitempty"` // Deprecated: use MaxActiveBackends = 1 instead
|
||||
MaxActiveBackends *int `json:"max_active_backends,omitempty"` // Maximum number of active backends (0 = unlimited, 1 = single backend mode)
|
||||
ParallelBackendRequests *bool `json:"parallel_backend_requests,omitempty"`
|
||||
Threads *int `json:"threads,omitempty"`
|
||||
ContextSize *int `json:"context_size,omitempty"`
|
||||
F16 *bool `json:"f16,omitempty"`
|
||||
Debug *bool `json:"debug,omitempty"`
|
||||
CORS *bool `json:"cors,omitempty"`
|
||||
CSRF *bool `json:"csrf,omitempty"`
|
||||
CORSAllowOrigins *string `json:"cors_allow_origins,omitempty"`
|
||||
P2PToken *string `json:"p2p_token,omitempty"`
|
||||
P2PNetworkID *string `json:"p2p_network_id,omitempty"`
|
||||
Federated *bool `json:"federated,omitempty"`
|
||||
Galleries *[]config.Gallery `json:"galleries,omitempty"`
|
||||
BackendGalleries *[]config.Gallery `json:"backend_galleries,omitempty"`
|
||||
AutoloadGalleries *bool `json:"autoload_galleries,omitempty"`
|
||||
AutoloadBackendGalleries *bool `json:"autoload_backend_galleries,omitempty"`
|
||||
ApiKeys *[]string `json:"api_keys"` // No omitempty - we need to save empty arrays to clear keys
|
||||
AgentJobRetentionDays *int `json:"agent_job_retention_days,omitempty"`
|
||||
}
|
||||
|
||||
// GetSettingsEndpoint returns current settings with precedence (env > file > defaults)
|
||||
func GetSettingsEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
appConfig := app.ApplicationConfig()
|
||||
startupConfig := app.StartupConfig()
|
||||
|
||||
if startupConfig == nil {
|
||||
// Fallback if startup config not available
|
||||
startupConfig = appConfig
|
||||
}
|
||||
|
||||
settings := RuntimeSettings{}
|
||||
|
||||
// Set all current values (using pointers for RuntimeSettings)
|
||||
watchdogIdle := appConfig.WatchDogIdle
|
||||
watchdogBusy := appConfig.WatchDogBusy
|
||||
watchdogEnabled := appConfig.WatchDog
|
||||
singleBackend := appConfig.SingleBackend
|
||||
maxActiveBackends := appConfig.MaxActiveBackends
|
||||
parallelBackendRequests := appConfig.ParallelBackendRequests
|
||||
threads := appConfig.Threads
|
||||
contextSize := appConfig.ContextSize
|
||||
f16 := appConfig.F16
|
||||
debug := appConfig.Debug
|
||||
cors := appConfig.CORS
|
||||
csrf := appConfig.CSRF
|
||||
corsAllowOrigins := appConfig.CORSAllowOrigins
|
||||
p2pToken := appConfig.P2PToken
|
||||
p2pNetworkID := appConfig.P2PNetworkID
|
||||
federated := appConfig.Federated
|
||||
galleries := appConfig.Galleries
|
||||
backendGalleries := appConfig.BackendGalleries
|
||||
autoloadGalleries := appConfig.AutoloadGalleries
|
||||
autoloadBackendGalleries := appConfig.AutoloadBackendGalleries
|
||||
apiKeys := appConfig.ApiKeys
|
||||
agentJobRetentionDays := appConfig.AgentJobRetentionDays
|
||||
|
||||
settings.WatchdogIdleEnabled = &watchdogIdle
|
||||
settings.WatchdogBusyEnabled = &watchdogBusy
|
||||
settings.WatchdogEnabled = &watchdogEnabled
|
||||
settings.SingleBackend = &singleBackend
|
||||
settings.MaxActiveBackends = &maxActiveBackends
|
||||
settings.ParallelBackendRequests = ¶llelBackendRequests
|
||||
settings.Threads = &threads
|
||||
settings.ContextSize = &contextSize
|
||||
settings.F16 = &f16
|
||||
settings.Debug = &debug
|
||||
settings.CORS = &cors
|
||||
settings.CSRF = &csrf
|
||||
settings.CORSAllowOrigins = &corsAllowOrigins
|
||||
settings.P2PToken = &p2pToken
|
||||
settings.P2PNetworkID = &p2pNetworkID
|
||||
settings.Federated = &federated
|
||||
settings.Galleries = &galleries
|
||||
settings.BackendGalleries = &backendGalleries
|
||||
settings.AutoloadGalleries = &autoloadGalleries
|
||||
settings.AutoloadBackendGalleries = &autoloadBackendGalleries
|
||||
settings.ApiKeys = &apiKeys
|
||||
settings.AgentJobRetentionDays = &agentJobRetentionDays
|
||||
|
||||
var idleTimeout, busyTimeout string
|
||||
if appConfig.WatchDogIdleTimeout > 0 {
|
||||
idleTimeout = appConfig.WatchDogIdleTimeout.String()
|
||||
} else {
|
||||
idleTimeout = "15m" // default
|
||||
}
|
||||
if appConfig.WatchDogBusyTimeout > 0 {
|
||||
busyTimeout = appConfig.WatchDogBusyTimeout.String()
|
||||
} else {
|
||||
busyTimeout = "5m" // default
|
||||
}
|
||||
settings.WatchdogIdleTimeout = &idleTimeout
|
||||
settings.WatchdogBusyTimeout = &busyTimeout
|
||||
settings := appConfig.ToRuntimeSettings()
|
||||
return c.JSON(http.StatusOK, settings)
|
||||
}
|
||||
}
|
||||
@@ -132,21 +32,20 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
startupConfig := app.StartupConfig()
|
||||
|
||||
if startupConfig == nil {
|
||||
// Fallback if startup config not available
|
||||
startupConfig = appConfig
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(c.Request().Body)
|
||||
if err != nil {
|
||||
return c.JSON(http.StatusBadRequest, SettingsResponse{
|
||||
return c.JSON(http.StatusBadRequest, schema.SettingsResponse{
|
||||
Success: false,
|
||||
Error: "Failed to read request body: " + err.Error(),
|
||||
})
|
||||
}
|
||||
|
||||
var settings RuntimeSettings
|
||||
var settings config.RuntimeSettings
|
||||
if err := json.Unmarshal(body, &settings); err != nil {
|
||||
return c.JSON(http.StatusBadRequest, SettingsResponse{
|
||||
return c.JSON(http.StatusBadRequest, schema.SettingsResponse{
|
||||
Success: false,
|
||||
Error: "Failed to parse JSON: " + err.Error(),
|
||||
})
|
||||
@@ -154,27 +53,33 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
|
||||
// Validate timeouts if provided
|
||||
if settings.WatchdogIdleTimeout != nil {
|
||||
_, err := time.ParseDuration(*settings.WatchdogIdleTimeout)
|
||||
if err != nil {
|
||||
return c.JSON(http.StatusBadRequest, SettingsResponse{
|
||||
if _, err := time.ParseDuration(*settings.WatchdogIdleTimeout); err != nil {
|
||||
return c.JSON(http.StatusBadRequest, schema.SettingsResponse{
|
||||
Success: false,
|
||||
Error: "Invalid watchdog_idle_timeout format: " + err.Error(),
|
||||
})
|
||||
}
|
||||
}
|
||||
if settings.WatchdogBusyTimeout != nil {
|
||||
_, err := time.ParseDuration(*settings.WatchdogBusyTimeout)
|
||||
if err != nil {
|
||||
return c.JSON(http.StatusBadRequest, SettingsResponse{
|
||||
if _, err := time.ParseDuration(*settings.WatchdogBusyTimeout); err != nil {
|
||||
return c.JSON(http.StatusBadRequest, schema.SettingsResponse{
|
||||
Success: false,
|
||||
Error: "Invalid watchdog_busy_timeout format: " + err.Error(),
|
||||
})
|
||||
}
|
||||
}
|
||||
if settings.WatchdogInterval != nil {
|
||||
if _, err := time.ParseDuration(*settings.WatchdogInterval); err != nil {
|
||||
return c.JSON(http.StatusBadRequest, schema.SettingsResponse{
|
||||
Success: false,
|
||||
Error: "Invalid watchdog_interval format: " + err.Error(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Save to file
|
||||
if appConfig.DynamicConfigsDir == "" {
|
||||
return c.JSON(http.StatusBadRequest, SettingsResponse{
|
||||
return c.JSON(http.StatusBadRequest, schema.SettingsResponse{
|
||||
Success: false,
|
||||
Error: "DynamicConfigsDir is not set",
|
||||
})
|
||||
@@ -183,133 +88,38 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
settingsFile := filepath.Join(appConfig.DynamicConfigsDir, "runtime_settings.json")
|
||||
settingsJSON, err := json.MarshalIndent(settings, "", " ")
|
||||
if err != nil {
|
||||
return c.JSON(http.StatusInternalServerError, SettingsResponse{
|
||||
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
|
||||
Success: false,
|
||||
Error: "Failed to marshal settings: " + err.Error(),
|
||||
})
|
||||
}
|
||||
|
||||
if err := os.WriteFile(settingsFile, settingsJSON, 0600); err != nil {
|
||||
return c.JSON(http.StatusInternalServerError, SettingsResponse{
|
||||
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
|
||||
Success: false,
|
||||
Error: "Failed to write settings file: " + err.Error(),
|
||||
})
|
||||
}
|
||||
|
||||
// Apply settings immediately, checking env var overrides per field
|
||||
watchdogChanged := false
|
||||
if settings.WatchdogEnabled != nil {
|
||||
appConfig.WatchDog = *settings.WatchdogEnabled
|
||||
watchdogChanged = true
|
||||
}
|
||||
if settings.WatchdogIdleEnabled != nil {
|
||||
appConfig.WatchDogIdle = *settings.WatchdogIdleEnabled
|
||||
if appConfig.WatchDogIdle {
|
||||
appConfig.WatchDog = true
|
||||
}
|
||||
watchdogChanged = true
|
||||
}
|
||||
if settings.WatchdogBusyEnabled != nil {
|
||||
appConfig.WatchDogBusy = *settings.WatchdogBusyEnabled
|
||||
if appConfig.WatchDogBusy {
|
||||
appConfig.WatchDog = true
|
||||
}
|
||||
watchdogChanged = true
|
||||
}
|
||||
if settings.WatchdogIdleTimeout != nil {
|
||||
dur, _ := time.ParseDuration(*settings.WatchdogIdleTimeout)
|
||||
appConfig.WatchDogIdleTimeout = dur
|
||||
watchdogChanged = true
|
||||
}
|
||||
if settings.WatchdogBusyTimeout != nil {
|
||||
dur, _ := time.ParseDuration(*settings.WatchdogBusyTimeout)
|
||||
appConfig.WatchDogBusyTimeout = dur
|
||||
watchdogChanged = true
|
||||
}
|
||||
if settings.MaxActiveBackends != nil {
|
||||
appConfig.MaxActiveBackends = *settings.MaxActiveBackends
|
||||
// For backward compatibility, update SingleBackend too
|
||||
appConfig.SingleBackend = (*settings.MaxActiveBackends == 1)
|
||||
watchdogChanged = true // LRU limit is managed by watchdog
|
||||
} else if settings.SingleBackend != nil {
|
||||
// Legacy support: SingleBackend maps to MaxActiveBackends = 1
|
||||
appConfig.SingleBackend = *settings.SingleBackend
|
||||
if *settings.SingleBackend {
|
||||
appConfig.MaxActiveBackends = 1
|
||||
} else {
|
||||
appConfig.MaxActiveBackends = 0
|
||||
}
|
||||
watchdogChanged = true // LRU limit is managed by watchdog
|
||||
}
|
||||
if settings.ParallelBackendRequests != nil {
|
||||
appConfig.ParallelBackendRequests = *settings.ParallelBackendRequests
|
||||
}
|
||||
if settings.Threads != nil {
|
||||
appConfig.Threads = *settings.Threads
|
||||
}
|
||||
if settings.ContextSize != nil {
|
||||
appConfig.ContextSize = *settings.ContextSize
|
||||
}
|
||||
if settings.F16 != nil {
|
||||
appConfig.F16 = *settings.F16
|
||||
}
|
||||
if settings.Debug != nil {
|
||||
appConfig.Debug = *settings.Debug
|
||||
}
|
||||
if settings.CORS != nil {
|
||||
appConfig.CORS = *settings.CORS
|
||||
}
|
||||
if settings.CSRF != nil {
|
||||
appConfig.CSRF = *settings.CSRF
|
||||
}
|
||||
if settings.CORSAllowOrigins != nil {
|
||||
appConfig.CORSAllowOrigins = *settings.CORSAllowOrigins
|
||||
}
|
||||
if settings.P2PToken != nil {
|
||||
appConfig.P2PToken = *settings.P2PToken
|
||||
}
|
||||
if settings.P2PNetworkID != nil {
|
||||
appConfig.P2PNetworkID = *settings.P2PNetworkID
|
||||
}
|
||||
if settings.Federated != nil {
|
||||
appConfig.Federated = *settings.Federated
|
||||
}
|
||||
if settings.Galleries != nil {
|
||||
appConfig.Galleries = *settings.Galleries
|
||||
}
|
||||
if settings.BackendGalleries != nil {
|
||||
appConfig.BackendGalleries = *settings.BackendGalleries
|
||||
}
|
||||
if settings.AutoloadGalleries != nil {
|
||||
appConfig.AutoloadGalleries = *settings.AutoloadGalleries
|
||||
}
|
||||
if settings.AutoloadBackendGalleries != nil {
|
||||
appConfig.AutoloadBackendGalleries = *settings.AutoloadBackendGalleries
|
||||
}
|
||||
agentJobChanged := false
|
||||
if settings.AgentJobRetentionDays != nil {
|
||||
appConfig.AgentJobRetentionDays = *settings.AgentJobRetentionDays
|
||||
agentJobChanged = true
|
||||
}
|
||||
// Apply settings using centralized method
|
||||
watchdogChanged := appConfig.ApplyRuntimeSettings(&settings)
|
||||
|
||||
// Handle API keys specially (merge with startup keys)
|
||||
if settings.ApiKeys != nil {
|
||||
// API keys from env vars (startup) should be kept, runtime settings keys are added
|
||||
// Combine startup keys (env vars) with runtime settings keys
|
||||
envKeys := startupConfig.ApiKeys
|
||||
runtimeKeys := *settings.ApiKeys
|
||||
// Merge: env keys first (they take precedence), then runtime keys
|
||||
appConfig.ApiKeys = append(envKeys, runtimeKeys...)
|
||||
|
||||
// Note: We only save to runtime_settings.json (not api_keys.json) to avoid duplication
|
||||
// The runtime_settings.json is the unified config file. If api_keys.json exists,
|
||||
// it will be loaded first, but runtime_settings.json takes precedence and deduplicates.
|
||||
}
|
||||
|
||||
// Check if agent job retention changed
|
||||
agentJobChanged := settings.AgentJobRetentionDays != nil
|
||||
|
||||
// Restart watchdog if settings changed
|
||||
if watchdogChanged {
|
||||
if settings.WatchdogEnabled != nil && !*settings.WatchdogEnabled || settings.WatchdogEnabled == nil {
|
||||
if settings.WatchdogEnabled != nil && !*settings.WatchdogEnabled {
|
||||
if err := app.StopWatchdog(); err != nil {
|
||||
log.Error().Err(err).Msg("Failed to stop watchdog")
|
||||
return c.JSON(http.StatusInternalServerError, SettingsResponse{
|
||||
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
|
||||
Success: false,
|
||||
Error: "Settings saved but failed to stop watchdog: " + err.Error(),
|
||||
})
|
||||
@@ -317,7 +127,7 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
} else {
|
||||
if err := app.RestartWatchdog(); err != nil {
|
||||
log.Error().Err(err).Msg("Failed to restart watchdog")
|
||||
return c.JSON(http.StatusInternalServerError, SettingsResponse{
|
||||
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
|
||||
Success: false,
|
||||
Error: "Settings saved but failed to restart watchdog: " + err.Error(),
|
||||
})
|
||||
@@ -329,7 +139,7 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
if agentJobChanged {
|
||||
if err := app.RestartAgentJobService(); err != nil {
|
||||
log.Error().Err(err).Msg("Failed to restart agent job service")
|
||||
return c.JSON(http.StatusInternalServerError, SettingsResponse{
|
||||
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
|
||||
Success: false,
|
||||
Error: "Settings saved but failed to restart agent job service: " + err.Error(),
|
||||
})
|
||||
@@ -340,33 +150,30 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
p2pChanged := settings.P2PToken != nil || settings.P2PNetworkID != nil || settings.Federated != nil
|
||||
if p2pChanged {
|
||||
if settings.P2PToken != nil && *settings.P2PToken == "" {
|
||||
// stop P2P
|
||||
if err := app.StopP2P(); err != nil {
|
||||
log.Error().Err(err).Msg("Failed to stop P2P")
|
||||
return c.JSON(http.StatusInternalServerError, SettingsResponse{
|
||||
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
|
||||
Success: false,
|
||||
Error: "Settings saved but failed to stop P2P: " + err.Error(),
|
||||
})
|
||||
}
|
||||
} else {
|
||||
if settings.P2PToken != nil && *settings.P2PToken == "0" {
|
||||
// generate a token if users sets 0 (disabled)
|
||||
token := p2p.GenerateToken(60, 60)
|
||||
settings.P2PToken = &token
|
||||
appConfig.P2PToken = token
|
||||
}
|
||||
// Stop existing P2P
|
||||
if err := app.RestartP2P(); err != nil {
|
||||
log.Error().Err(err).Msg("Failed to stop P2P")
|
||||
return c.JSON(http.StatusInternalServerError, SettingsResponse{
|
||||
log.Error().Err(err).Msg("Failed to restart P2P")
|
||||
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
|
||||
Success: false,
|
||||
Error: "Settings saved but failed to stop P2P: " + err.Error(),
|
||||
Error: "Settings saved but failed to restart P2P: " + err.Error(),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return c.JSON(http.StatusOK, SettingsResponse{
|
||||
return c.JSON(http.StatusOK, schema.SettingsResponse{
|
||||
Success: true,
|
||||
Message: "Settings updated successfully",
|
||||
})
|
||||
|
||||
@@ -19,6 +19,7 @@ import (
|
||||
"github.com/mudler/LocalAI/core/p2p"
|
||||
"github.com/mudler/LocalAI/core/services"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/mudler/LocalAI/pkg/xsysinfo"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
@@ -917,6 +918,30 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
|
||||
})
|
||||
})
|
||||
|
||||
// Resources API endpoint - unified memory info (GPU if available, otherwise RAM)
|
||||
app.GET("/api/resources", func(c echo.Context) error {
|
||||
resourceInfo := xsysinfo.GetResourceInfo()
|
||||
|
||||
// Format watchdog interval
|
||||
watchdogInterval := "2s" // default
|
||||
if appConfig.WatchDogInterval > 0 {
|
||||
watchdogInterval = appConfig.WatchDogInterval.String()
|
||||
}
|
||||
|
||||
response := map[string]interface{}{
|
||||
"type": resourceInfo.Type, // "gpu" or "ram"
|
||||
"available": resourceInfo.Available,
|
||||
"gpus": resourceInfo.GPUs,
|
||||
"ram": resourceInfo.RAM,
|
||||
"aggregate": resourceInfo.Aggregate,
|
||||
"reclaimer_enabled": appConfig.MemoryReclaimerEnabled,
|
||||
"reclaimer_threshold": appConfig.MemoryReclaimerThreshold,
|
||||
"watchdog_interval": watchdogInterval,
|
||||
}
|
||||
|
||||
return c.JSON(200, response)
|
||||
})
|
||||
|
||||
if !appConfig.DisableRuntimeSettings {
|
||||
// Settings API
|
||||
app.GET("/api/settings", localai.GetSettingsEndpoint(applicationInstance))
|
||||
|
||||
@@ -462,6 +462,27 @@
|
||||
</a>
|
||||
</div>
|
||||
|
||||
<!-- Memory Status Indicator (GPU or RAM) -->
|
||||
<div class="mb-4" x-data="resourceMonitor()" x-init="startPolling()">
|
||||
<template x-if="resourceData && resourceData.available">
|
||||
<div class="flex items-center justify-center gap-3 text-xs text-[var(--color-text-secondary)]">
|
||||
<div class="flex items-center gap-2 px-3 py-1.5 rounded-full bg-[var(--color-bg-secondary)] border border-[var(--color-primary-border)]/20">
|
||||
<i :class="resourceData.type === 'gpu' ? 'fas fa-microchip' : 'fas fa-memory'"
|
||||
:class="resourceData.aggregate.usage_percent > 90 ? 'text-red-400' : resourceData.aggregate.usage_percent > 70 ? 'text-yellow-400' : 'text-green-400'"></i>
|
||||
<span class="text-[var(--color-text-secondary)]" x-text="resourceData.type === 'gpu' ? 'GPU' : 'RAM'"></span>
|
||||
<span class="font-mono"
|
||||
:class="resourceData.aggregate.usage_percent > 90 ? 'text-red-400' : resourceData.aggregate.usage_percent > 70 ? 'text-yellow-400' : 'text-green-400'"
|
||||
x-text="`${resourceData.aggregate.usage_percent.toFixed(0)}%`"></span>
|
||||
<div class="w-16 bg-[var(--color-bg-primary)] rounded-full h-1.5 overflow-hidden">
|
||||
<div class="h-full rounded-full transition-all duration-300"
|
||||
:class="resourceData.aggregate.usage_percent > 90 ? 'bg-red-500' : resourceData.aggregate.usage_percent > 70 ? 'bg-yellow-500' : 'bg-[var(--color-success)]'"
|
||||
:style="`width: ${resourceData.aggregate.usage_percent}%`"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</template>
|
||||
</div>
|
||||
|
||||
<!-- Model Status Summary - Subtle -->
|
||||
{{ $loadedModels := .LoadedModels }}
|
||||
<div class="mb-8 flex items-center justify-center gap-2 text-xs text-[var(--color-text-secondary)]"
|
||||
@@ -687,6 +708,38 @@ async function stopAllModels(component) {
|
||||
// Make functions available globally for Alpine.js
|
||||
window.stopModel = stopModel;
|
||||
window.stopAllModels = stopAllModels;
|
||||
|
||||
// Resource Monitor component (GPU if available, otherwise RAM)
|
||||
function resourceMonitor() {
|
||||
return {
|
||||
resourceData: null,
|
||||
pollInterval: null,
|
||||
|
||||
async fetchResourceData() {
|
||||
try {
|
||||
const response = await fetch('/api/resources');
|
||||
if (response.ok) {
|
||||
this.resourceData = await response.json();
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error fetching resource data:', error);
|
||||
}
|
||||
},
|
||||
|
||||
startPolling() {
|
||||
// Initial fetch
|
||||
this.fetchResourceData();
|
||||
// Poll every 5 seconds
|
||||
this.pollInterval = setInterval(() => this.fetchResourceData(), 5000);
|
||||
},
|
||||
|
||||
stopPolling() {
|
||||
if (this.pollInterval) {
|
||||
clearInterval(this.pollInterval);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
</body>
|
||||
|
||||
@@ -73,6 +73,106 @@
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Memory Info Section (GPU or RAM) -->
|
||||
<div class="mt-8" x-data="resourceMonitor()" x-init="startPolling()">
|
||||
<template x-if="resourceData && resourceData.available">
|
||||
<div class="bg-[var(--color-bg-secondary)] border border-[var(--color-primary-border)]/20 rounded-lg p-4 mb-6">
|
||||
<div class="flex items-center justify-between mb-3">
|
||||
<h2 class="h3 flex items-center">
|
||||
<i :class="resourceData.type === 'gpu' ? 'fas fa-microchip' : 'fas fa-memory'" class="mr-2 text-[var(--color-primary)] text-sm"></i>
|
||||
<span x-text="resourceData.type === 'gpu' ? 'GPU Status' : 'Memory Status'"></span>
|
||||
</h2>
|
||||
<div class="flex items-center gap-2 text-xs text-[var(--color-text-secondary)]">
|
||||
<template x-if="resourceData.type === 'gpu'">
|
||||
<span x-text="`${resourceData.aggregate.gpu_count} GPU${resourceData.aggregate.gpu_count > 1 ? 's' : ''}`"></span>
|
||||
</template>
|
||||
<template x-if="resourceData.type === 'ram'">
|
||||
<span>System RAM</span>
|
||||
</template>
|
||||
<template x-if="resourceData.reclaimer_enabled">
|
||||
<span class="inline-flex items-center px-1.5 py-0.5 rounded text-[10px] font-medium bg-[var(--color-primary)]/10 text-[var(--color-primary)]">
|
||||
<i class="fas fa-shield-alt text-[8px] mr-1"></i>Reclaimer Active
|
||||
</span>
|
||||
</template>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Per-GPU Stats (when GPU available) -->
|
||||
<template x-if="resourceData.type === 'gpu' && resourceData.gpus">
|
||||
<div class="space-y-3">
|
||||
<template x-for="gpu in resourceData.gpus" :key="gpu.index">
|
||||
<div class="bg-[var(--color-bg-primary)] rounded p-3">
|
||||
<div class="flex items-center justify-between mb-2">
|
||||
<div class="flex items-center gap-2">
|
||||
<span class="text-xs font-medium text-[var(--color-text-primary)] truncate max-w-[200px]" x-text="gpu.name"></span>
|
||||
<span class="inline-flex items-center px-1.5 py-0.5 rounded text-[10px] font-medium"
|
||||
:class="gpu.vendor === 'nvidia' ? 'bg-green-500/10 text-green-300' :
|
||||
gpu.vendor === 'amd' ? 'bg-red-500/10 text-red-300' :
|
||||
gpu.vendor === 'intel' ? 'bg-blue-500/10 text-blue-300' :
|
||||
'bg-[var(--color-accent-light)] text-[var(--color-accent)]'"
|
||||
x-text="gpu.vendor.toUpperCase()">
|
||||
</span>
|
||||
</div>
|
||||
<span class="text-xs font-mono"
|
||||
:class="gpu.usage_percent > 90 ? 'text-red-400' : gpu.usage_percent > 70 ? 'text-yellow-400' : 'text-green-400'"
|
||||
x-text="`${gpu.usage_percent.toFixed(1)}%`"></span>
|
||||
</div>
|
||||
<!-- Progress Bar -->
|
||||
<div class="w-full bg-[var(--color-bg-secondary)] rounded-full h-2 overflow-hidden">
|
||||
<div class="h-full rounded-full transition-all duration-300"
|
||||
:class="gpu.usage_percent > 90 ? 'bg-red-500' : gpu.usage_percent > 70 ? 'bg-yellow-500' : 'bg-[var(--color-success)]'"
|
||||
:style="`width: ${gpu.usage_percent}%`"></div>
|
||||
</div>
|
||||
<div class="flex justify-between mt-1 text-[10px] text-[var(--color-text-secondary)]">
|
||||
<span x-text="`Used: ${formatBytes(gpu.used_vram)}`"></span>
|
||||
<span x-text="`Total: ${formatBytes(gpu.total_vram)}`"></span>
|
||||
</div>
|
||||
</div>
|
||||
</template>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
<!-- RAM Stats (when no GPU) -->
|
||||
<template x-if="resourceData.type === 'ram' && resourceData.ram">
|
||||
<div class="bg-[var(--color-bg-primary)] rounded p-3">
|
||||
<div class="flex items-center justify-between mb-2">
|
||||
<div class="flex items-center gap-2">
|
||||
<span class="text-xs font-medium text-[var(--color-text-primary)]">System RAM</span>
|
||||
<span class="inline-flex items-center px-1.5 py-0.5 rounded text-[10px] font-medium bg-[var(--color-accent-light)] text-[var(--color-accent)]">
|
||||
RAM
|
||||
</span>
|
||||
</div>
|
||||
<span class="text-xs font-mono"
|
||||
:class="resourceData.ram.usage_percent > 90 ? 'text-red-400' : resourceData.ram.usage_percent > 70 ? 'text-yellow-400' : 'text-green-400'"
|
||||
x-text="`${resourceData.ram.usage_percent.toFixed(1)}%`"></span>
|
||||
</div>
|
||||
<!-- Progress Bar -->
|
||||
<div class="w-full bg-[var(--color-bg-secondary)] rounded-full h-2 overflow-hidden">
|
||||
<div class="h-full rounded-full transition-all duration-300"
|
||||
:class="resourceData.ram.usage_percent > 90 ? 'bg-red-500' : resourceData.ram.usage_percent > 70 ? 'bg-yellow-500' : 'bg-[var(--color-success)]'"
|
||||
:style="`width: ${resourceData.ram.usage_percent}%`"></div>
|
||||
</div>
|
||||
<div class="flex justify-between mt-1 text-[10px] text-[var(--color-text-secondary)]">
|
||||
<span x-text="`Used: ${formatBytes(resourceData.ram.used)}`"></span>
|
||||
<span x-text="`Total: ${formatBytes(resourceData.ram.total)}`"></span>
|
||||
</div>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
<!-- Aggregate Stats (if multiple GPUs) -->
|
||||
<template x-if="resourceData.type === 'gpu' && resourceData.aggregate.gpu_count > 1">
|
||||
<div class="mt-3 pt-3 border-t border-[var(--color-primary-border)]/20">
|
||||
<div class="flex items-center justify-between text-xs">
|
||||
<span class="text-[var(--color-text-secondary)]">Total VRAM:</span>
|
||||
<span class="font-mono text-[var(--color-text-primary)]"
|
||||
x-text="`${formatBytes(resourceData.aggregate.used_memory)} / ${formatBytes(resourceData.aggregate.total_memory)} (${resourceData.aggregate.usage_percent.toFixed(1)}%)`"></span>
|
||||
</div>
|
||||
</div>
|
||||
</template>
|
||||
</div>
|
||||
</template>
|
||||
</div>
|
||||
|
||||
<!-- Models Section -->
|
||||
<div class="models mt-8">
|
||||
{{template "views/partials/inprogress" .}}
|
||||
@@ -426,6 +526,47 @@
|
||||
</div>
|
||||
|
||||
<script>
|
||||
// Resource Monitor component (GPU if available, otherwise RAM)
|
||||
function resourceMonitor() {
|
||||
return {
|
||||
resourceData: null,
|
||||
pollInterval: null,
|
||||
|
||||
async fetchResourceData() {
|
||||
try {
|
||||
const response = await fetch('/api/resources');
|
||||
if (response.ok) {
|
||||
this.resourceData = await response.json();
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error fetching resource data:', error);
|
||||
}
|
||||
},
|
||||
|
||||
startPolling() {
|
||||
// Initial fetch
|
||||
this.fetchResourceData();
|
||||
// Poll every 5 seconds
|
||||
this.pollInterval = setInterval(() => this.fetchResourceData(), 5000);
|
||||
},
|
||||
|
||||
stopPolling() {
|
||||
if (this.pollInterval) {
|
||||
clearInterval(this.pollInterval);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Helper function to format bytes
|
||||
function formatBytes(bytes) {
|
||||
if (bytes === 0) return '0 B';
|
||||
const k = 1024;
|
||||
const sizes = ['B', 'KB', 'MB', 'GB', 'TB'];
|
||||
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
||||
return parseFloat((bytes / Math.pow(k, i)).toFixed(1)) + ' ' + sizes[i];
|
||||
}
|
||||
|
||||
// Alpine.js component for index dashboard
|
||||
function indexDashboard() {
|
||||
return {
|
||||
|
||||
@@ -124,6 +124,90 @@
|
||||
class="w-full px-3 py-2 bg-[var(--color-bg-primary)] border border-[var(--color-primary-border)]/20 rounded text-sm text-[var(--color-text-primary)] focus:outline-none focus:ring-2 focus:ring-[var(--color-primary-border)]"
|
||||
:class="!settings.watchdog_busy_enabled ? 'opacity-50 cursor-not-allowed' : ''">
|
||||
</div>
|
||||
|
||||
<!-- Watchdog Check Interval -->
|
||||
<div>
|
||||
<label class="block text-sm font-medium text-[var(--color-text-primary)] mb-2">Check Interval</label>
|
||||
<p class="text-xs text-[var(--color-text-secondary)] mb-2">How often the watchdog checks backends and memory usage (e.g., 2s, 30s)</p>
|
||||
<input type="text" x-model="settings.watchdog_interval"
|
||||
:disabled="!settings.watchdog_enabled"
|
||||
placeholder="2s"
|
||||
class="w-full px-3 py-2 bg-[var(--color-bg-primary)] border border-[var(--color-primary-border)]/20 rounded text-sm text-[var(--color-text-primary)] focus:outline-none focus:ring-2 focus:ring-[var(--color-primary-border)]"
|
||||
:class="!settings.watchdog_enabled ? 'opacity-50 cursor-not-allowed' : ''">
|
||||
</div>
|
||||
|
||||
<!-- Memory Reclaimer Subsection -->
|
||||
<div class="mt-6 pt-4 border-t border-[var(--color-primary-border)]/20">
|
||||
<h3 class="text-md font-medium text-[var(--color-text-primary)] mb-3 flex items-center">
|
||||
<i class="fas fa-memory mr-2 text-[var(--color-primary)] text-xs"></i>
|
||||
Memory Reclaimer
|
||||
</h3>
|
||||
<p class="text-xs text-[var(--color-text-secondary)] mb-4">
|
||||
Automatically evict backends when memory usage exceeds a threshold. Uses GPU VRAM if available, otherwise system RAM. Uses LRU strategy.
|
||||
</p>
|
||||
|
||||
<!-- Memory Status Preview -->
|
||||
<div x-data="resourceStatus()" x-init="fetchResource()" class="p-3 bg-[var(--color-bg-primary)] rounded mb-4">
|
||||
<div class="flex items-center justify-between mb-2">
|
||||
<span class="text-xs text-[var(--color-text-secondary)]" x-text="resourceData && resourceData.type === 'gpu' ? 'Current GPU Status' : 'Current Memory Status'">Current Memory Status</span>
|
||||
<button @click="fetchResource()" class="text-[10px] text-[var(--color-primary)] hover:underline">
|
||||
<i class="fas fa-sync-alt mr-1"></i>Refresh
|
||||
</button>
|
||||
</div>
|
||||
<template x-if="resourceData && resourceData.available && resourceData.type === 'gpu'">
|
||||
<div class="space-y-2">
|
||||
<template x-for="gpu in resourceData.gpus" :key="gpu.index">
|
||||
<div class="flex items-center justify-between text-xs">
|
||||
<span class="text-[var(--color-text-primary)] truncate max-w-[200px]" x-text="gpu.name"></span>
|
||||
<span class="font-mono"
|
||||
:class="gpu.usage_percent > 90 ? 'text-red-400' : gpu.usage_percent > 70 ? 'text-yellow-400' : 'text-green-400'"
|
||||
x-text="`${gpu.usage_percent.toFixed(1)}%`"></span>
|
||||
</div>
|
||||
</template>
|
||||
</div>
|
||||
</template>
|
||||
<template x-if="resourceData && resourceData.available && resourceData.type === 'ram'">
|
||||
<div class="flex items-center justify-between text-xs">
|
||||
<span class="text-[var(--color-text-primary)]">System RAM</span>
|
||||
<span class="font-mono"
|
||||
:class="resourceData.ram.usage_percent > 90 ? 'text-red-400' : resourceData.ram.usage_percent > 70 ? 'text-yellow-400' : 'text-green-400'"
|
||||
x-text="`${resourceData.ram.usage_percent.toFixed(1)}%`"></span>
|
||||
</div>
|
||||
</template>
|
||||
<template x-if="!resourceData || !resourceData.available">
|
||||
<p class="text-xs text-[var(--color-text-secondary)]">Memory monitoring unavailable</p>
|
||||
</template>
|
||||
</div>
|
||||
|
||||
<!-- Enable Memory Reclaimer -->
|
||||
<div class="flex items-center justify-between mb-4">
|
||||
<div>
|
||||
<label class="text-sm font-medium text-[var(--color-text-primary)]">Enable Memory Reclaimer</label>
|
||||
<p class="text-xs text-[var(--color-text-secondary)] mt-1">Evict backends when memory usage exceeds threshold</p>
|
||||
</div>
|
||||
<label class="relative inline-flex items-center cursor-pointer">
|
||||
<input type="checkbox" x-model="settings.memory_reclaimer_enabled"
|
||||
:disabled="!settings.watchdog_enabled"
|
||||
class="sr-only peer" :class="!settings.watchdog_enabled ? 'opacity-50' : ''">
|
||||
<div class="w-11 h-6 bg-[var(--color-bg-primary)] peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-[var(--color-primary-light)] rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-[var(--color-primary)]"></div>
|
||||
</label>
|
||||
</div>
|
||||
|
||||
<!-- Memory Reclaimer Threshold -->
|
||||
<div>
|
||||
<label class="block text-sm font-medium text-[var(--color-text-primary)] mb-2">Memory Threshold (%)</label>
|
||||
<p class="text-xs text-[var(--color-text-secondary)] mb-2">When memory usage exceeds this, backends will be evicted (50-100%)</p>
|
||||
<div class="flex items-center gap-3">
|
||||
<input type="range" x-model="settings.memory_reclaimer_threshold_percent"
|
||||
min="50" max="100" step="1"
|
||||
:disabled="!settings.memory_reclaimer_enabled || !settings.watchdog_enabled"
|
||||
class="flex-1 h-2 bg-[var(--color-bg-primary)] rounded-lg appearance-none cursor-pointer"
|
||||
:class="(!settings.memory_reclaimer_enabled || !settings.watchdog_enabled) ? 'opacity-50' : ''">
|
||||
<span class="text-sm font-mono text-[var(--color-text-primary)] w-12 text-right"
|
||||
x-text="`${settings.memory_reclaimer_threshold_percent}%`"></span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -460,8 +544,12 @@ function settingsDashboard() {
|
||||
watchdog_busy_enabled: false,
|
||||
watchdog_idle_timeout: '15m',
|
||||
watchdog_busy_timeout: '5m',
|
||||
watchdog_interval: '2s',
|
||||
max_active_backends: 0,
|
||||
parallel_backend_requests: false,
|
||||
memory_reclaimer_enabled: false,
|
||||
memory_reclaimer_threshold: 0.95,
|
||||
memory_reclaimer_threshold_percent: 95,
|
||||
threads: 0,
|
||||
context_size: 0,
|
||||
f16: false,
|
||||
@@ -498,8 +586,12 @@ function settingsDashboard() {
|
||||
watchdog_busy_enabled: data.watchdog_busy_enabled,
|
||||
watchdog_idle_timeout: data.watchdog_idle_timeout || '15m',
|
||||
watchdog_busy_timeout: data.watchdog_busy_timeout || '5m',
|
||||
watchdog_interval: data.watchdog_interval || '2s',
|
||||
max_active_backends: data.max_active_backends || 0,
|
||||
parallel_backend_requests: data.parallel_backend_requests,
|
||||
memory_reclaimer_enabled: data.memory_reclaimer_enabled || false,
|
||||
memory_reclaimer_threshold: data.memory_reclaimer_threshold || 0.95,
|
||||
memory_reclaimer_threshold_percent: Math.round((data.memory_reclaimer_threshold || 0.95) * 100),
|
||||
threads: data.threads || 0,
|
||||
context_size: data.context_size || 0,
|
||||
f16: data.f16 || false,
|
||||
@@ -531,6 +623,7 @@ function settingsDashboard() {
|
||||
if (!this.settings.watchdog_enabled) {
|
||||
this.settings.watchdog_idle_enabled = false;
|
||||
this.settings.watchdog_busy_enabled = false;
|
||||
this.settings.memory_reclaimer_enabled = false;
|
||||
}
|
||||
},
|
||||
|
||||
@@ -564,12 +657,22 @@ function settingsDashboard() {
|
||||
if (this.settings.watchdog_busy_timeout) {
|
||||
payload.watchdog_busy_timeout = this.settings.watchdog_busy_timeout;
|
||||
}
|
||||
if (this.settings.watchdog_interval) {
|
||||
payload.watchdog_interval = this.settings.watchdog_interval;
|
||||
}
|
||||
if (this.settings.max_active_backends !== undefined) {
|
||||
payload.max_active_backends = parseInt(this.settings.max_active_backends) || 0;
|
||||
}
|
||||
if (this.settings.parallel_backend_requests !== undefined) {
|
||||
payload.parallel_backend_requests = this.settings.parallel_backend_requests;
|
||||
}
|
||||
if (this.settings.memory_reclaimer_enabled !== undefined) {
|
||||
payload.memory_reclaimer_enabled = this.settings.memory_reclaimer_enabled;
|
||||
}
|
||||
if (this.settings.memory_reclaimer_threshold_percent !== undefined) {
|
||||
// Convert percent to decimal (0.0-1.0)
|
||||
payload.memory_reclaimer_threshold = parseInt(this.settings.memory_reclaimer_threshold_percent) / 100;
|
||||
}
|
||||
if (this.settings.threads !== undefined) {
|
||||
payload.threads = parseInt(this.settings.threads) || 0;
|
||||
}
|
||||
@@ -678,6 +781,24 @@ function settingsDashboard() {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Resource Status component for settings page (GPU if available, otherwise RAM)
|
||||
function resourceStatus() {
|
||||
return {
|
||||
resourceData: null,
|
||||
|
||||
async fetchResource() {
|
||||
try {
|
||||
const response = await fetch('/api/resources');
|
||||
if (response.ok) {
|
||||
this.resourceData = await response.json();
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error fetching resource data:', error);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
</body>
|
||||
|
||||
@@ -163,3 +163,10 @@ type ImportModelRequest struct {
|
||||
URI string `json:"uri"`
|
||||
Preferences json.RawMessage `json:"preferences,omitempty"`
|
||||
}
|
||||
|
||||
// SettingsResponse is the response type for settings API operations
|
||||
type SettingsResponse struct {
|
||||
Success bool `json:"success"`
|
||||
Error string `json:"error,omitempty"`
|
||||
Message string `json:"message,omitempty"`
|
||||
}
|
||||
|
||||
+167
-17
@@ -5,6 +5,7 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/mudler/LocalAI/pkg/xsysinfo"
|
||||
process "github.com/mudler/go-processmanager"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
@@ -17,6 +18,9 @@ import (
|
||||
// force a reload of the model.
|
||||
// The watchdog also supports LRU (Least Recently Used) eviction when a maximum
|
||||
// number of active backends is configured.
|
||||
// The watchdog also supports memory threshold monitoring - when memory usage
|
||||
// (GPU VRAM if available, otherwise system RAM) exceeds the threshold,
|
||||
// it will evict backends using the LRU strategy.
|
||||
// The watchdog runs as a separate go routine,
|
||||
// and the GRPC client talks to it via a channel to send status updates
|
||||
type WatchDog struct {
|
||||
@@ -32,26 +36,48 @@ type WatchDog struct {
|
||||
|
||||
busyCheck, idleCheck bool
|
||||
lruLimit int // Maximum number of active backends (0 = unlimited)
|
||||
|
||||
// Memory reclaimer settings (works with GPU if available, otherwise RAM)
|
||||
memoryReclaimerEnabled bool // Enable memory threshold monitoring
|
||||
memoryReclaimerThreshold float64 // Threshold 0.0-1.0 (e.g., 0.95 = 95%)
|
||||
watchdogInterval time.Duration
|
||||
}
|
||||
|
||||
type ProcessManager interface {
|
||||
ShutdownModel(modelName string) error
|
||||
}
|
||||
|
||||
func NewWatchDog(pm ProcessManager, timeoutBusy, timeoutIdle time.Duration, busy, idle bool, lruLimit int) *WatchDog {
|
||||
// NewWatchDog creates a new WatchDog with the provided options.
|
||||
// Example usage:
|
||||
//
|
||||
// wd := NewWatchDog(
|
||||
// WithProcessManager(pm),
|
||||
// WithBusyTimeout(5*time.Minute),
|
||||
// WithIdleTimeout(15*time.Minute),
|
||||
// WithBusyCheck(true),
|
||||
// WithIdleCheck(true),
|
||||
// WithLRULimit(3),
|
||||
// WithMemoryReclaimer(true, 0.95),
|
||||
// )
|
||||
func NewWatchDog(opts ...WatchDogOption) *WatchDog {
|
||||
o := NewWatchDogOptions(opts...)
|
||||
|
||||
return &WatchDog{
|
||||
timeout: timeoutBusy,
|
||||
idletimeout: timeoutIdle,
|
||||
pm: pm,
|
||||
busyTime: make(map[string]time.Time),
|
||||
idleTime: make(map[string]time.Time),
|
||||
lastUsed: make(map[string]time.Time),
|
||||
addressMap: make(map[string]*process.Process),
|
||||
busyCheck: busy,
|
||||
idleCheck: idle,
|
||||
lruLimit: lruLimit,
|
||||
addressModelMap: make(map[string]string),
|
||||
stop: make(chan bool, 1),
|
||||
timeout: o.busyTimeout,
|
||||
idletimeout: o.idleTimeout,
|
||||
pm: o.processManager,
|
||||
busyTime: make(map[string]time.Time),
|
||||
idleTime: make(map[string]time.Time),
|
||||
lastUsed: make(map[string]time.Time),
|
||||
addressMap: make(map[string]*process.Process),
|
||||
busyCheck: o.busyCheck,
|
||||
idleCheck: o.idleCheck,
|
||||
lruLimit: o.lruLimit,
|
||||
addressModelMap: make(map[string]string),
|
||||
stop: make(chan bool, 1),
|
||||
memoryReclaimerEnabled: o.memoryReclaimerEnabled,
|
||||
memoryReclaimerThreshold: o.memoryReclaimerThreshold,
|
||||
watchdogInterval: o.watchdogInterval,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -69,6 +95,21 @@ func (wd *WatchDog) GetLRULimit() int {
|
||||
return wd.lruLimit
|
||||
}
|
||||
|
||||
// SetMemoryReclaimer updates the memory reclaimer settings dynamically
|
||||
func (wd *WatchDog) SetMemoryReclaimer(enabled bool, threshold float64) {
|
||||
wd.Lock()
|
||||
defer wd.Unlock()
|
||||
wd.memoryReclaimerEnabled = enabled
|
||||
wd.memoryReclaimerThreshold = threshold
|
||||
}
|
||||
|
||||
// GetMemoryReclaimerSettings returns the current memory reclaimer settings
|
||||
func (wd *WatchDog) GetMemoryReclaimerSettings() (enabled bool, threshold float64) {
|
||||
wd.Lock()
|
||||
defer wd.Unlock()
|
||||
return wd.memoryReclaimerEnabled, wd.memoryReclaimerThreshold
|
||||
}
|
||||
|
||||
func (wd *WatchDog) Shutdown() {
|
||||
wd.Lock()
|
||||
defer wd.Unlock()
|
||||
@@ -202,17 +243,27 @@ func (wd *WatchDog) Run() {
|
||||
case <-wd.stop:
|
||||
log.Info().Msg("[WatchDog] Stopping watchdog")
|
||||
return
|
||||
case <-time.After(30 * time.Second):
|
||||
if !wd.busyCheck && !wd.idleCheck {
|
||||
case <-time.After(wd.watchdogInterval):
|
||||
// Check if any monitoring is enabled
|
||||
wd.Lock()
|
||||
busyCheck := wd.busyCheck
|
||||
idleCheck := wd.idleCheck
|
||||
memoryCheck := wd.memoryReclaimerEnabled
|
||||
wd.Unlock()
|
||||
|
||||
if !busyCheck && !idleCheck && !memoryCheck {
|
||||
log.Info().Msg("[WatchDog] No checks enabled, stopping watchdog")
|
||||
return
|
||||
}
|
||||
if wd.busyCheck {
|
||||
if busyCheck {
|
||||
wd.checkBusy()
|
||||
}
|
||||
if wd.idleCheck {
|
||||
if idleCheck {
|
||||
wd.checkIdle()
|
||||
}
|
||||
if memoryCheck {
|
||||
wd.checkMemory()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -278,6 +329,105 @@ func (wd *WatchDog) checkBusy() {
|
||||
}
|
||||
}
|
||||
|
||||
// checkMemory monitors memory usage (GPU VRAM if available, otherwise RAM) and evicts backends when usage exceeds threshold
|
||||
func (wd *WatchDog) checkMemory() {
|
||||
wd.Lock()
|
||||
threshold := wd.memoryReclaimerThreshold
|
||||
enabled := wd.memoryReclaimerEnabled
|
||||
modelCount := len(wd.addressModelMap)
|
||||
wd.Unlock()
|
||||
|
||||
if !enabled || threshold <= 0 || modelCount == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
// Get current memory usage (GPU if available, otherwise RAM)
|
||||
aggregate := xsysinfo.GetResourceAggregateInfo()
|
||||
if aggregate.TotalMemory == 0 {
|
||||
log.Debug().Msg("[WatchDog] No memory information available for memory reclaimer")
|
||||
return
|
||||
}
|
||||
|
||||
// Convert threshold from 0.0-1.0 to percentage
|
||||
thresholdPercent := threshold * 100
|
||||
|
||||
memoryType := "GPU"
|
||||
if aggregate.GPUCount == 0 {
|
||||
memoryType = "RAM"
|
||||
}
|
||||
|
||||
log.Debug().
|
||||
Str("type", memoryType).
|
||||
Float64("usage_percent", aggregate.UsagePercent).
|
||||
Float64("threshold_percent", thresholdPercent).
|
||||
Int("loaded_models", modelCount).
|
||||
Msg("[WatchDog] Memory check")
|
||||
|
||||
// Check if usage exceeds threshold
|
||||
if aggregate.UsagePercent > thresholdPercent {
|
||||
log.Warn().
|
||||
Str("type", memoryType).
|
||||
Float64("usage_percent", aggregate.UsagePercent).
|
||||
Float64("threshold_percent", thresholdPercent).
|
||||
Msg("[WatchDog] Memory usage exceeds threshold, evicting LRU backend")
|
||||
|
||||
// Evict the least recently used model
|
||||
wd.evictLRUModel()
|
||||
}
|
||||
}
|
||||
|
||||
// evictLRUModel evicts the least recently used model
|
||||
func (wd *WatchDog) evictLRUModel() {
|
||||
wd.Lock()
|
||||
|
||||
if len(wd.addressModelMap) == 0 {
|
||||
wd.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
// Build a list of models sorted by last used time (oldest first)
|
||||
var models []modelUsageInfo
|
||||
for address, model := range wd.addressModelMap {
|
||||
lastUsed := wd.lastUsed[address]
|
||||
if lastUsed.IsZero() {
|
||||
lastUsed = time.Time{}
|
||||
}
|
||||
models = append(models, modelUsageInfo{
|
||||
address: address,
|
||||
model: model,
|
||||
lastUsed: lastUsed,
|
||||
})
|
||||
}
|
||||
|
||||
if len(models) == 0 {
|
||||
wd.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
// Sort by lastUsed time (oldest first)
|
||||
sort.Slice(models, func(i, j int) bool {
|
||||
return models[i].lastUsed.Before(models[j].lastUsed)
|
||||
})
|
||||
|
||||
// Get the LRU model
|
||||
lruModel := models[0]
|
||||
log.Info().
|
||||
Str("model", lruModel.model).
|
||||
Time("lastUsed", lruModel.lastUsed).
|
||||
Msg("[WatchDog] Memory reclaimer evicting LRU model")
|
||||
|
||||
// Untrack the model
|
||||
wd.untrack(lruModel.address)
|
||||
wd.Unlock()
|
||||
|
||||
// Shutdown the model
|
||||
if err := wd.pm.ShutdownModel(lruModel.model); err != nil {
|
||||
log.Error().Err(err).Str("model", lruModel.model).Msg("[WatchDog] error shutting down model during memory reclamation")
|
||||
} else {
|
||||
log.Info().Str("model", lruModel.model).Msg("[WatchDog] Memory reclaimer eviction complete")
|
||||
}
|
||||
}
|
||||
|
||||
func (wd *WatchDog) untrack(address string) {
|
||||
delete(wd.busyTime, address)
|
||||
delete(wd.idleTime, address)
|
||||
|
||||
@@ -0,0 +1,124 @@
|
||||
package model
|
||||
|
||||
import (
|
||||
"time"
|
||||
)
|
||||
|
||||
// WatchDogOptions contains all configuration for the WatchDog
|
||||
type WatchDogOptions struct {
|
||||
processManager ProcessManager
|
||||
|
||||
// Timeout settings
|
||||
busyTimeout time.Duration
|
||||
idleTimeout time.Duration
|
||||
watchdogInterval time.Duration
|
||||
|
||||
// Check toggles
|
||||
busyCheck bool
|
||||
idleCheck bool
|
||||
|
||||
// LRU settings
|
||||
lruLimit int // Maximum number of active backends (0 = unlimited)
|
||||
|
||||
// Memory reclaimer settings (works with GPU if available, otherwise RAM)
|
||||
memoryReclaimerEnabled bool // Enable memory threshold monitoring
|
||||
memoryReclaimerThreshold float64 // Threshold 0.0-1.0 (e.g., 0.95 = 95%)
|
||||
}
|
||||
|
||||
// WatchDogOption is a function that configures WatchDogOptions
|
||||
type WatchDogOption func(*WatchDogOptions)
|
||||
|
||||
// WithProcessManager sets the process manager for the watchdog
|
||||
func WithProcessManager(pm ProcessManager) WatchDogOption {
|
||||
return func(o *WatchDogOptions) {
|
||||
o.processManager = pm
|
||||
}
|
||||
}
|
||||
|
||||
// WithBusyTimeout sets the busy timeout duration
|
||||
func WithBusyTimeout(timeout time.Duration) WatchDogOption {
|
||||
return func(o *WatchDogOptions) {
|
||||
o.busyTimeout = timeout
|
||||
}
|
||||
}
|
||||
|
||||
// WithIdleTimeout sets the idle timeout duration
|
||||
func WithIdleTimeout(timeout time.Duration) WatchDogOption {
|
||||
return func(o *WatchDogOptions) {
|
||||
o.idleTimeout = timeout
|
||||
}
|
||||
}
|
||||
|
||||
// WithWatchdogCheck sets the watchdog check duration
|
||||
func WithWatchdogInterval(interval time.Duration) WatchDogOption {
|
||||
return func(o *WatchDogOptions) {
|
||||
o.watchdogInterval = interval
|
||||
}
|
||||
}
|
||||
|
||||
// WithBusyCheck enables or disables busy checking
|
||||
func WithBusyCheck(enabled bool) WatchDogOption {
|
||||
return func(o *WatchDogOptions) {
|
||||
o.busyCheck = enabled
|
||||
}
|
||||
}
|
||||
|
||||
// WithIdleCheck enables or disables idle checking
|
||||
func WithIdleCheck(enabled bool) WatchDogOption {
|
||||
return func(o *WatchDogOptions) {
|
||||
o.idleCheck = enabled
|
||||
}
|
||||
}
|
||||
|
||||
// WithLRULimit sets the maximum number of active backends (0 = unlimited)
|
||||
func WithLRULimit(limit int) WatchDogOption {
|
||||
return func(o *WatchDogOptions) {
|
||||
o.lruLimit = limit
|
||||
}
|
||||
}
|
||||
|
||||
// WithMemoryReclaimer enables memory threshold monitoring with the specified threshold
|
||||
// Works with GPU VRAM if available, otherwise uses system RAM
|
||||
func WithMemoryReclaimer(enabled bool, threshold float64) WatchDogOption {
|
||||
return func(o *WatchDogOptions) {
|
||||
o.memoryReclaimerEnabled = enabled
|
||||
o.memoryReclaimerThreshold = threshold
|
||||
}
|
||||
}
|
||||
|
||||
// WithMemoryReclaimerEnabled enables or disables memory threshold monitoring
|
||||
func WithMemoryReclaimerEnabled(enabled bool) WatchDogOption {
|
||||
return func(o *WatchDogOptions) {
|
||||
o.memoryReclaimerEnabled = enabled
|
||||
}
|
||||
}
|
||||
|
||||
// WithMemoryReclaimerThreshold sets the memory threshold (0.0-1.0)
|
||||
func WithMemoryReclaimerThreshold(threshold float64) WatchDogOption {
|
||||
return func(o *WatchDogOptions) {
|
||||
o.memoryReclaimerThreshold = threshold
|
||||
}
|
||||
}
|
||||
|
||||
// DefaultWatchDogOptions returns default options for the watchdog
|
||||
func DefaultWatchDogOptions() *WatchDogOptions {
|
||||
return &WatchDogOptions{
|
||||
busyTimeout: 5 * time.Minute,
|
||||
idleTimeout: 15 * time.Minute,
|
||||
watchdogInterval: 2 * time.Second,
|
||||
busyCheck: false,
|
||||
idleCheck: false,
|
||||
lruLimit: 0,
|
||||
memoryReclaimerEnabled: false,
|
||||
memoryReclaimerThreshold: 0.95,
|
||||
}
|
||||
}
|
||||
|
||||
// NewWatchDogOptions creates WatchDogOptions with the provided options applied
|
||||
func NewWatchDogOptions(opts ...WatchDogOption) *WatchDogOptions {
|
||||
o := DefaultWatchDogOptions()
|
||||
for _, opt := range opts {
|
||||
opt(o)
|
||||
}
|
||||
return o
|
||||
}
|
||||
@@ -0,0 +1,187 @@
|
||||
package model_test
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
var _ = Describe("WatchDogOptions", func() {
|
||||
Context("DefaultWatchDogOptions", func() {
|
||||
It("should return sensible defaults", func() {
|
||||
opts := model.DefaultWatchDogOptions()
|
||||
|
||||
Expect(opts).ToNot(BeNil())
|
||||
})
|
||||
})
|
||||
|
||||
Context("NewWatchDogOptions", func() {
|
||||
It("should apply options in order", func() {
|
||||
pm := newMockProcessManager()
|
||||
opts := model.NewWatchDogOptions(
|
||||
model.WithProcessManager(pm),
|
||||
model.WithBusyTimeout(10*time.Minute),
|
||||
model.WithIdleTimeout(20*time.Minute),
|
||||
model.WithBusyCheck(true),
|
||||
model.WithIdleCheck(true),
|
||||
model.WithLRULimit(5),
|
||||
model.WithMemoryReclaimer(true, 0.85),
|
||||
)
|
||||
|
||||
Expect(opts).ToNot(BeNil())
|
||||
})
|
||||
|
||||
It("should allow overriding options", func() {
|
||||
opts := model.NewWatchDogOptions(
|
||||
model.WithLRULimit(3),
|
||||
model.WithLRULimit(7), // override
|
||||
)
|
||||
|
||||
// Create watchdog to verify
|
||||
wd := model.NewWatchDog(
|
||||
model.WithProcessManager(newMockProcessManager()),
|
||||
model.WithLRULimit(3),
|
||||
model.WithLRULimit(7), // override
|
||||
)
|
||||
Expect(wd.GetLRULimit()).To(Equal(7))
|
||||
|
||||
Expect(opts).ToNot(BeNil())
|
||||
})
|
||||
})
|
||||
|
||||
Context("Individual Options", func() {
|
||||
var pm *mockProcessManager
|
||||
|
||||
BeforeEach(func() {
|
||||
pm = newMockProcessManager()
|
||||
})
|
||||
|
||||
It("WithProcessManager should set process manager", func() {
|
||||
wd := model.NewWatchDog(
|
||||
model.WithProcessManager(pm),
|
||||
)
|
||||
Expect(wd).ToNot(BeNil())
|
||||
})
|
||||
|
||||
It("WithBusyTimeout should set busy timeout", func() {
|
||||
wd := model.NewWatchDog(
|
||||
model.WithProcessManager(pm),
|
||||
model.WithBusyTimeout(7*time.Minute),
|
||||
)
|
||||
Expect(wd).ToNot(BeNil())
|
||||
})
|
||||
|
||||
It("WithIdleTimeout should set idle timeout", func() {
|
||||
wd := model.NewWatchDog(
|
||||
model.WithProcessManager(pm),
|
||||
model.WithIdleTimeout(25*time.Minute),
|
||||
)
|
||||
Expect(wd).ToNot(BeNil())
|
||||
})
|
||||
|
||||
It("WithBusyCheck should enable busy checking", func() {
|
||||
wd := model.NewWatchDog(
|
||||
model.WithProcessManager(pm),
|
||||
model.WithBusyCheck(true),
|
||||
)
|
||||
Expect(wd).ToNot(BeNil())
|
||||
})
|
||||
|
||||
It("WithIdleCheck should enable idle checking", func() {
|
||||
wd := model.NewWatchDog(
|
||||
model.WithProcessManager(pm),
|
||||
model.WithIdleCheck(true),
|
||||
)
|
||||
Expect(wd).ToNot(BeNil())
|
||||
})
|
||||
|
||||
It("WithLRULimit should set LRU limit", func() {
|
||||
wd := model.NewWatchDog(
|
||||
model.WithProcessManager(pm),
|
||||
model.WithLRULimit(10),
|
||||
)
|
||||
Expect(wd.GetLRULimit()).To(Equal(10))
|
||||
})
|
||||
|
||||
It("WithMemoryReclaimer should set both enabled and threshold", func() {
|
||||
wd := model.NewWatchDog(
|
||||
model.WithProcessManager(pm),
|
||||
model.WithMemoryReclaimer(true, 0.88),
|
||||
)
|
||||
enabled, threshold := wd.GetMemoryReclaimerSettings()
|
||||
Expect(enabled).To(BeTrue())
|
||||
Expect(threshold).To(Equal(0.88))
|
||||
})
|
||||
|
||||
It("WithMemoryReclaimerEnabled should set enabled flag only", func() {
|
||||
wd := model.NewWatchDog(
|
||||
model.WithProcessManager(pm),
|
||||
model.WithMemoryReclaimerEnabled(true),
|
||||
)
|
||||
enabled, _ := wd.GetMemoryReclaimerSettings()
|
||||
Expect(enabled).To(BeTrue())
|
||||
})
|
||||
|
||||
It("WithMemoryReclaimerThreshold should set threshold only", func() {
|
||||
wd := model.NewWatchDog(
|
||||
model.WithProcessManager(pm),
|
||||
model.WithMemoryReclaimerThreshold(0.75),
|
||||
)
|
||||
_, threshold := wd.GetMemoryReclaimerSettings()
|
||||
Expect(threshold).To(Equal(0.75))
|
||||
})
|
||||
})
|
||||
|
||||
Context("Option Combinations", func() {
|
||||
It("should work with all options combined", func() {
|
||||
pm := newMockProcessManager()
|
||||
wd := model.NewWatchDog(
|
||||
model.WithProcessManager(pm),
|
||||
model.WithBusyTimeout(3*time.Minute),
|
||||
model.WithIdleTimeout(10*time.Minute),
|
||||
model.WithBusyCheck(true),
|
||||
model.WithIdleCheck(true),
|
||||
model.WithLRULimit(2),
|
||||
model.WithMemoryReclaimerEnabled(true),
|
||||
model.WithMemoryReclaimerThreshold(0.92),
|
||||
)
|
||||
|
||||
Expect(wd).ToNot(BeNil())
|
||||
Expect(wd.GetLRULimit()).To(Equal(2))
|
||||
|
||||
enabled, threshold := wd.GetMemoryReclaimerSettings()
|
||||
Expect(enabled).To(BeTrue())
|
||||
Expect(threshold).To(Equal(0.92))
|
||||
})
|
||||
|
||||
It("should work with no options (all defaults)", func() {
|
||||
wd := model.NewWatchDog()
|
||||
|
||||
Expect(wd).ToNot(BeNil())
|
||||
Expect(wd.GetLRULimit()).To(Equal(0))
|
||||
|
||||
enabled, threshold := wd.GetMemoryReclaimerSettings()
|
||||
Expect(enabled).To(BeFalse())
|
||||
Expect(threshold).To(Equal(0.95)) // default
|
||||
})
|
||||
|
||||
It("should allow partial configuration", func() {
|
||||
pm := newMockProcessManager()
|
||||
wd := model.NewWatchDog(
|
||||
model.WithProcessManager(pm),
|
||||
model.WithLRULimit(3),
|
||||
)
|
||||
|
||||
Expect(wd).ToNot(BeNil())
|
||||
Expect(wd.GetLRULimit()).To(Equal(3))
|
||||
|
||||
// Memory reclaimer should use defaults
|
||||
enabled, threshold := wd.GetMemoryReclaimerSettings()
|
||||
Expect(enabled).To(BeFalse())
|
||||
Expect(threshold).To(Equal(0.95))
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
+105
-6
@@ -53,25 +53,82 @@ var _ = Describe("WatchDog", func() {
|
||||
|
||||
Context("LRU Limit", func() {
|
||||
It("should create watchdog with LRU limit", func() {
|
||||
wd = model.NewWatchDog(pm, 5*time.Minute, 15*time.Minute, false, false, 2)
|
||||
wd = model.NewWatchDog(
|
||||
model.WithProcessManager(pm),
|
||||
model.WithBusyTimeout(5*time.Minute),
|
||||
model.WithIdleTimeout(15*time.Minute),
|
||||
model.WithLRULimit(2),
|
||||
)
|
||||
Expect(wd.GetLRULimit()).To(Equal(2))
|
||||
})
|
||||
|
||||
It("should allow updating LRU limit dynamically", func() {
|
||||
wd = model.NewWatchDog(pm, 5*time.Minute, 15*time.Minute, false, false, 2)
|
||||
wd = model.NewWatchDog(
|
||||
model.WithProcessManager(pm),
|
||||
model.WithLRULimit(2),
|
||||
)
|
||||
wd.SetLRULimit(5)
|
||||
Expect(wd.GetLRULimit()).To(Equal(5))
|
||||
})
|
||||
|
||||
It("should return 0 for disabled LRU", func() {
|
||||
wd = model.NewWatchDog(pm, 5*time.Minute, 15*time.Minute, false, false, 0)
|
||||
wd = model.NewWatchDog(
|
||||
model.WithProcessManager(pm),
|
||||
model.WithLRULimit(0),
|
||||
)
|
||||
Expect(wd.GetLRULimit()).To(Equal(0))
|
||||
})
|
||||
})
|
||||
|
||||
Context("Memory Reclaimer Options", func() {
|
||||
It("should create watchdog with memory reclaimer settings", func() {
|
||||
wd = model.NewWatchDog(
|
||||
model.WithProcessManager(pm),
|
||||
model.WithMemoryReclaimer(true, 0.85),
|
||||
)
|
||||
enabled, threshold := wd.GetMemoryReclaimerSettings()
|
||||
Expect(enabled).To(BeTrue())
|
||||
Expect(threshold).To(Equal(0.85))
|
||||
})
|
||||
|
||||
It("should allow setting memory reclaimer via separate options", func() {
|
||||
wd = model.NewWatchDog(
|
||||
model.WithProcessManager(pm),
|
||||
model.WithMemoryReclaimerEnabled(true),
|
||||
model.WithMemoryReclaimerThreshold(0.90),
|
||||
)
|
||||
enabled, threshold := wd.GetMemoryReclaimerSettings()
|
||||
Expect(enabled).To(BeTrue())
|
||||
Expect(threshold).To(Equal(0.90))
|
||||
})
|
||||
|
||||
It("should use default threshold when not specified", func() {
|
||||
wd = model.NewWatchDog(
|
||||
model.WithProcessManager(pm),
|
||||
)
|
||||
_, threshold := wd.GetMemoryReclaimerSettings()
|
||||
Expect(threshold).To(Equal(0.95)) // default
|
||||
})
|
||||
|
||||
It("should allow updating memory reclaimer settings dynamically", func() {
|
||||
wd = model.NewWatchDog(
|
||||
model.WithProcessManager(pm),
|
||||
)
|
||||
wd.SetMemoryReclaimer(true, 0.80)
|
||||
enabled, threshold := wd.GetMemoryReclaimerSettings()
|
||||
Expect(enabled).To(BeTrue())
|
||||
Expect(threshold).To(Equal(0.80))
|
||||
})
|
||||
})
|
||||
|
||||
Context("Model Tracking", func() {
|
||||
BeforeEach(func() {
|
||||
wd = model.NewWatchDog(pm, 5*time.Minute, 15*time.Minute, false, false, 3)
|
||||
wd = model.NewWatchDog(
|
||||
model.WithProcessManager(pm),
|
||||
model.WithBusyTimeout(5*time.Minute),
|
||||
model.WithIdleTimeout(15*time.Minute),
|
||||
model.WithLRULimit(3),
|
||||
)
|
||||
})
|
||||
|
||||
It("should track loaded models count", func() {
|
||||
@@ -108,7 +165,12 @@ var _ = Describe("WatchDog", func() {
|
||||
|
||||
Context("EnforceLRULimit", func() {
|
||||
BeforeEach(func() {
|
||||
wd = model.NewWatchDog(pm, 5*time.Minute, 15*time.Minute, false, false, 2)
|
||||
wd = model.NewWatchDog(
|
||||
model.WithProcessManager(pm),
|
||||
model.WithBusyTimeout(5*time.Minute),
|
||||
model.WithIdleTimeout(15*time.Minute),
|
||||
model.WithLRULimit(2),
|
||||
)
|
||||
})
|
||||
|
||||
It("should not evict when under limit", func() {
|
||||
@@ -218,7 +280,12 @@ var _ = Describe("WatchDog", func() {
|
||||
|
||||
Context("Single Backend Mode (LRU=1)", func() {
|
||||
BeforeEach(func() {
|
||||
wd = model.NewWatchDog(pm, 5*time.Minute, 15*time.Minute, false, false, 1)
|
||||
wd = model.NewWatchDog(
|
||||
model.WithProcessManager(pm),
|
||||
model.WithBusyTimeout(5*time.Minute),
|
||||
model.WithIdleTimeout(15*time.Minute),
|
||||
model.WithLRULimit(1),
|
||||
)
|
||||
})
|
||||
|
||||
It("should evict existing model when loading new one", func() {
|
||||
@@ -241,4 +308,36 @@ var _ = Describe("WatchDog", func() {
|
||||
Expect(len(pm.getShutdownCalls())).To(Equal(5))
|
||||
})
|
||||
})
|
||||
|
||||
Context("Functional Options", func() {
|
||||
It("should use default options when none provided", func() {
|
||||
wd = model.NewWatchDog(
|
||||
model.WithProcessManager(pm),
|
||||
)
|
||||
Expect(wd.GetLRULimit()).To(Equal(0))
|
||||
|
||||
enabled, threshold := wd.GetMemoryReclaimerSettings()
|
||||
Expect(enabled).To(BeFalse())
|
||||
Expect(threshold).To(Equal(0.95))
|
||||
})
|
||||
|
||||
It("should allow combining multiple options", func() {
|
||||
wd = model.NewWatchDog(
|
||||
model.WithProcessManager(pm),
|
||||
model.WithBusyTimeout(10*time.Minute),
|
||||
model.WithIdleTimeout(30*time.Minute),
|
||||
model.WithBusyCheck(true),
|
||||
model.WithIdleCheck(true),
|
||||
model.WithLRULimit(5),
|
||||
model.WithMemoryReclaimerEnabled(true),
|
||||
model.WithMemoryReclaimerThreshold(0.80),
|
||||
)
|
||||
|
||||
Expect(wd.GetLRULimit()).To(Equal(5))
|
||||
|
||||
enabled, threshold := wd.GetMemoryReclaimerSettings()
|
||||
Expect(enabled).To(BeTrue())
|
||||
Expect(threshold).To(Equal(0.80))
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
@@ -1,13 +1,83 @@
|
||||
package xsysinfo
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"os/exec"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/jaypipes/ghw"
|
||||
"github.com/jaypipes/ghw/pkg/gpu"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
// GPU vendor constants
|
||||
const (
|
||||
VendorNVIDIA = "nvidia"
|
||||
VendorAMD = "amd"
|
||||
VendorIntel = "intel"
|
||||
VendorVulkan = "vulkan"
|
||||
VendorUnknown = "unknown"
|
||||
)
|
||||
|
||||
// UnifiedMemoryDevices is a list of GPU device name patterns that use unified memory
|
||||
// (shared with system RAM). When these devices are detected and report N/A for VRAM,
|
||||
// we fall back to system RAM information.
|
||||
var UnifiedMemoryDevices = []string{
|
||||
"NVIDIA GB10",
|
||||
"GB10",
|
||||
// Add more unified memory devices here as needed
|
||||
}
|
||||
|
||||
// GPUMemoryInfo contains real-time GPU memory usage information
|
||||
type GPUMemoryInfo struct {
|
||||
Index int `json:"index"`
|
||||
Name string `json:"name"`
|
||||
Vendor string `json:"vendor"`
|
||||
TotalVRAM uint64 `json:"total_vram"` // Total VRAM in bytes
|
||||
UsedVRAM uint64 `json:"used_vram"` // Used VRAM in bytes
|
||||
FreeVRAM uint64 `json:"free_vram"` // Free VRAM in bytes
|
||||
UsagePercent float64 `json:"usage_percent"` // Usage as percentage (0-100)
|
||||
}
|
||||
|
||||
// GPUAggregateInfo contains aggregate GPU information across all GPUs
|
||||
type GPUAggregateInfo struct {
|
||||
TotalVRAM uint64 `json:"total_vram"`
|
||||
UsedVRAM uint64 `json:"used_vram"`
|
||||
FreeVRAM uint64 `json:"free_vram"`
|
||||
UsagePercent float64 `json:"usage_percent"`
|
||||
GPUCount int `json:"gpu_count"`
|
||||
}
|
||||
|
||||
// SystemRAMInfo contains system RAM usage information
|
||||
type SystemRAMInfo struct {
|
||||
Total uint64 `json:"total"`
|
||||
Used uint64 `json:"used"`
|
||||
Free uint64 `json:"free"`
|
||||
Available uint64 `json:"available"`
|
||||
UsagePercent float64 `json:"usage_percent"`
|
||||
}
|
||||
|
||||
// AggregateMemoryInfo contains aggregate memory information (unified for GPU/RAM)
|
||||
type AggregateMemoryInfo struct {
|
||||
TotalMemory uint64 `json:"total_memory"`
|
||||
UsedMemory uint64 `json:"used_memory"`
|
||||
FreeMemory uint64 `json:"free_memory"`
|
||||
UsagePercent float64 `json:"usage_percent"`
|
||||
GPUCount int `json:"gpu_count"`
|
||||
}
|
||||
|
||||
// ResourceInfo represents unified memory resource information
|
||||
type ResourceInfo struct {
|
||||
Type string `json:"type"` // "gpu" or "ram"
|
||||
Available bool `json:"available"`
|
||||
GPUs []GPUMemoryInfo `json:"gpus,omitempty"`
|
||||
RAM *SystemRAMInfo `json:"ram,omitempty"`
|
||||
Aggregate AggregateMemoryInfo `json:"aggregate"`
|
||||
}
|
||||
|
||||
var (
|
||||
gpuCache []*gpu.GraphicsCard
|
||||
gpuCacheOnce sync.Once
|
||||
@@ -60,3 +130,632 @@ func HasGPU(vendor string) bool {
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// isUnifiedMemoryDevice checks if the given GPU name matches any known unified memory device
|
||||
func isUnifiedMemoryDevice(gpuName string) bool {
|
||||
gpuNameUpper := strings.ToUpper(gpuName)
|
||||
for _, pattern := range UnifiedMemoryDevices {
|
||||
if strings.Contains(gpuNameUpper, strings.ToUpper(pattern)) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// getSystemRAM returns system RAM information using ghw
|
||||
func getSystemRAM() (total, used, free uint64, err error) {
|
||||
memory, err := ghw.Memory()
|
||||
if err != nil {
|
||||
return 0, 0, 0, err
|
||||
}
|
||||
|
||||
total = uint64(memory.TotalUsableBytes)
|
||||
// ghw doesn't provide used/free directly, but we can estimate
|
||||
// For unified memory GPUs, we report total system RAM as available VRAM
|
||||
// since the GPU can potentially use all of it
|
||||
free = total
|
||||
used = 0
|
||||
|
||||
return total, used, free, nil
|
||||
}
|
||||
|
||||
// GetGPUMemoryUsage returns real-time GPU memory usage for all detected GPUs.
|
||||
// It tries multiple vendor-specific tools in order: NVIDIA, AMD, Intel, Vulkan.
|
||||
// Returns an empty slice if no GPU monitoring tools are available.
|
||||
func GetGPUMemoryUsage() []GPUMemoryInfo {
|
||||
var gpus []GPUMemoryInfo
|
||||
|
||||
// Try NVIDIA first
|
||||
nvidiaGPUs := getNVIDIAGPUMemory()
|
||||
if len(nvidiaGPUs) > 0 {
|
||||
gpus = append(gpus, nvidiaGPUs...)
|
||||
}
|
||||
|
||||
// XXX: Note - I could not test this with AMD and Intel GPUs, so I'm not sure if it works and it was added with the help of AI.
|
||||
|
||||
// Try AMD ROCm
|
||||
amdGPUs := getAMDGPUMemory()
|
||||
if len(amdGPUs) > 0 {
|
||||
// Adjust indices to continue from NVIDIA GPUs
|
||||
startIdx := len(gpus)
|
||||
for i := range amdGPUs {
|
||||
amdGPUs[i].Index = startIdx + i
|
||||
}
|
||||
gpus = append(gpus, amdGPUs...)
|
||||
}
|
||||
|
||||
// Try Intel
|
||||
intelGPUs := getIntelGPUMemory()
|
||||
if len(intelGPUs) > 0 {
|
||||
startIdx := len(gpus)
|
||||
for i := range intelGPUs {
|
||||
intelGPUs[i].Index = startIdx + i
|
||||
}
|
||||
gpus = append(gpus, intelGPUs...)
|
||||
}
|
||||
|
||||
// Try Vulkan as fallback for device detection (limited real-time data)
|
||||
if len(gpus) == 0 {
|
||||
vulkanGPUs := getVulkanGPUMemory()
|
||||
gpus = append(gpus, vulkanGPUs...)
|
||||
}
|
||||
|
||||
return gpus
|
||||
}
|
||||
|
||||
// GetGPUAggregateInfo returns aggregate GPU information across all GPUs
|
||||
func GetGPUAggregateInfo() GPUAggregateInfo {
|
||||
gpus := GetGPUMemoryUsage()
|
||||
|
||||
var aggregate GPUAggregateInfo
|
||||
aggregate.GPUCount = len(gpus)
|
||||
|
||||
for _, gpu := range gpus {
|
||||
aggregate.TotalVRAM += gpu.TotalVRAM
|
||||
aggregate.UsedVRAM += gpu.UsedVRAM
|
||||
aggregate.FreeVRAM += gpu.FreeVRAM
|
||||
}
|
||||
|
||||
if aggregate.TotalVRAM > 0 {
|
||||
aggregate.UsagePercent = float64(aggregate.UsedVRAM) / float64(aggregate.TotalVRAM) * 100
|
||||
}
|
||||
|
||||
return aggregate
|
||||
}
|
||||
|
||||
// getNVIDIAGPUMemory queries NVIDIA GPUs using nvidia-smi
|
||||
func getNVIDIAGPUMemory() []GPUMemoryInfo {
|
||||
// Check if nvidia-smi is available
|
||||
if _, err := exec.LookPath("nvidia-smi"); err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
cmd := exec.Command("nvidia-smi",
|
||||
"--query-gpu=index,name,memory.total,memory.used,memory.free",
|
||||
"--format=csv,noheader,nounits")
|
||||
|
||||
var stdout, stderr bytes.Buffer
|
||||
cmd.Stdout = &stdout
|
||||
cmd.Stderr = &stderr
|
||||
|
||||
if err := cmd.Run(); err != nil {
|
||||
log.Debug().Err(err).Str("stderr", stderr.String()).Msg("nvidia-smi failed")
|
||||
return nil
|
||||
}
|
||||
|
||||
var gpus []GPUMemoryInfo
|
||||
lines := strings.Split(strings.TrimSpace(stdout.String()), "\n")
|
||||
|
||||
for _, line := range lines {
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
parts := strings.Split(line, ", ")
|
||||
if len(parts) < 5 {
|
||||
continue
|
||||
}
|
||||
|
||||
idx, _ := strconv.Atoi(strings.TrimSpace(parts[0]))
|
||||
name := strings.TrimSpace(parts[1])
|
||||
totalStr := strings.TrimSpace(parts[2])
|
||||
usedStr := strings.TrimSpace(parts[3])
|
||||
freeStr := strings.TrimSpace(parts[4])
|
||||
|
||||
var totalBytes, usedBytes, freeBytes uint64
|
||||
var usagePercent float64
|
||||
|
||||
// Check if memory values are N/A (unified memory devices like GB10)
|
||||
isNA := totalStr == "[N/A]" || usedStr == "[N/A]" || freeStr == "[N/A]"
|
||||
|
||||
if isNA && isUnifiedMemoryDevice(name) {
|
||||
// Unified memory device - fall back to system RAM
|
||||
sysTotal, sysUsed, sysFree, err := getSystemRAM()
|
||||
if err != nil {
|
||||
log.Debug().Err(err).Str("device", name).Msg("failed to get system RAM for unified memory device")
|
||||
// Still add the GPU but with zero memory info
|
||||
gpus = append(gpus, GPUMemoryInfo{
|
||||
Index: idx,
|
||||
Name: name,
|
||||
Vendor: VendorNVIDIA,
|
||||
TotalVRAM: 0,
|
||||
UsedVRAM: 0,
|
||||
FreeVRAM: 0,
|
||||
UsagePercent: 0,
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
totalBytes = sysTotal
|
||||
usedBytes = sysUsed
|
||||
freeBytes = sysFree
|
||||
if totalBytes > 0 {
|
||||
usagePercent = float64(usedBytes) / float64(totalBytes) * 100
|
||||
}
|
||||
|
||||
log.Debug().
|
||||
Str("device", name).
|
||||
Uint64("system_ram_bytes", totalBytes).
|
||||
Msg("using system RAM for unified memory GPU")
|
||||
} else if isNA {
|
||||
// Unknown device with N/A values - skip memory info
|
||||
log.Debug().Str("device", name).Msg("nvidia-smi returned N/A for unknown device")
|
||||
gpus = append(gpus, GPUMemoryInfo{
|
||||
Index: idx,
|
||||
Name: name,
|
||||
Vendor: VendorNVIDIA,
|
||||
TotalVRAM: 0,
|
||||
UsedVRAM: 0,
|
||||
FreeVRAM: 0,
|
||||
UsagePercent: 0,
|
||||
})
|
||||
continue
|
||||
} else {
|
||||
// Normal GPU with dedicated VRAM
|
||||
totalMB, _ := strconv.ParseFloat(totalStr, 64)
|
||||
usedMB, _ := strconv.ParseFloat(usedStr, 64)
|
||||
freeMB, _ := strconv.ParseFloat(freeStr, 64)
|
||||
|
||||
// Convert MB to bytes
|
||||
totalBytes = uint64(totalMB * 1024 * 1024)
|
||||
usedBytes = uint64(usedMB * 1024 * 1024)
|
||||
freeBytes = uint64(freeMB * 1024 * 1024)
|
||||
|
||||
if totalBytes > 0 {
|
||||
usagePercent = float64(usedBytes) / float64(totalBytes) * 100
|
||||
}
|
||||
}
|
||||
|
||||
gpus = append(gpus, GPUMemoryInfo{
|
||||
Index: idx,
|
||||
Name: name,
|
||||
Vendor: VendorNVIDIA,
|
||||
TotalVRAM: totalBytes,
|
||||
UsedVRAM: usedBytes,
|
||||
FreeVRAM: freeBytes,
|
||||
UsagePercent: usagePercent,
|
||||
})
|
||||
}
|
||||
|
||||
return gpus
|
||||
}
|
||||
|
||||
// getAMDGPUMemory queries AMD GPUs using rocm-smi
|
||||
func getAMDGPUMemory() []GPUMemoryInfo {
|
||||
// Check if rocm-smi is available
|
||||
if _, err := exec.LookPath("rocm-smi"); err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Try CSV format first
|
||||
cmd := exec.Command("rocm-smi", "--showmeminfo", "vram", "--csv")
|
||||
|
||||
var stdout, stderr bytes.Buffer
|
||||
cmd.Stdout = &stdout
|
||||
cmd.Stderr = &stderr
|
||||
|
||||
if err := cmd.Run(); err != nil {
|
||||
log.Debug().Err(err).Str("stderr", stderr.String()).Msg("rocm-smi failed")
|
||||
return nil
|
||||
}
|
||||
|
||||
var gpus []GPUMemoryInfo
|
||||
lines := strings.Split(strings.TrimSpace(stdout.String()), "\n")
|
||||
|
||||
// Skip header line
|
||||
for i, line := range lines {
|
||||
if i == 0 || line == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
parts := strings.Split(line, ",")
|
||||
if len(parts) < 3 {
|
||||
continue
|
||||
}
|
||||
|
||||
// Parse GPU index from first column (usually "GPU[0]" format)
|
||||
idxStr := strings.TrimSpace(parts[0])
|
||||
idx := 0
|
||||
if strings.HasPrefix(idxStr, "GPU[") {
|
||||
idxStr = strings.TrimPrefix(idxStr, "GPU[")
|
||||
idxStr = strings.TrimSuffix(idxStr, "]")
|
||||
idx, _ = strconv.Atoi(idxStr)
|
||||
}
|
||||
|
||||
// Parse memory values (in bytes or MB depending on rocm-smi version)
|
||||
usedBytes, _ := strconv.ParseUint(strings.TrimSpace(parts[1]), 10, 64)
|
||||
totalBytes, _ := strconv.ParseUint(strings.TrimSpace(parts[2]), 10, 64)
|
||||
|
||||
// If values seem like MB, convert to bytes
|
||||
if totalBytes < 1000000 {
|
||||
usedBytes *= 1024 * 1024
|
||||
totalBytes *= 1024 * 1024
|
||||
}
|
||||
|
||||
freeBytes := uint64(0)
|
||||
if totalBytes > usedBytes {
|
||||
freeBytes = totalBytes - usedBytes
|
||||
}
|
||||
|
||||
usagePercent := 0.0
|
||||
if totalBytes > 0 {
|
||||
usagePercent = float64(usedBytes) / float64(totalBytes) * 100
|
||||
}
|
||||
|
||||
gpus = append(gpus, GPUMemoryInfo{
|
||||
Index: idx,
|
||||
Name: "AMD GPU",
|
||||
Vendor: VendorAMD,
|
||||
TotalVRAM: totalBytes,
|
||||
UsedVRAM: usedBytes,
|
||||
FreeVRAM: freeBytes,
|
||||
UsagePercent: usagePercent,
|
||||
})
|
||||
}
|
||||
|
||||
return gpus
|
||||
}
|
||||
|
||||
// getIntelGPUMemory queries Intel GPUs using xpu-smi or intel_gpu_top
|
||||
func getIntelGPUMemory() []GPUMemoryInfo {
|
||||
// Try xpu-smi first (Intel's official GPU management tool)
|
||||
gpus := getIntelXPUSMI()
|
||||
if len(gpus) > 0 {
|
||||
return gpus
|
||||
}
|
||||
|
||||
// Fallback to intel_gpu_top
|
||||
return getIntelGPUTop()
|
||||
}
|
||||
|
||||
// getIntelXPUSMI queries Intel GPUs using xpu-smi
|
||||
func getIntelXPUSMI() []GPUMemoryInfo {
|
||||
if _, err := exec.LookPath("xpu-smi"); err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Get device list
|
||||
cmd := exec.Command("xpu-smi", "discovery", "--json")
|
||||
|
||||
var stdout, stderr bytes.Buffer
|
||||
cmd.Stdout = &stdout
|
||||
cmd.Stderr = &stderr
|
||||
|
||||
if err := cmd.Run(); err != nil {
|
||||
log.Debug().Err(err).Str("stderr", stderr.String()).Msg("xpu-smi discovery failed")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Parse JSON output
|
||||
var result struct {
|
||||
DeviceList []struct {
|
||||
DeviceID int `json:"device_id"`
|
||||
DeviceName string `json:"device_name"`
|
||||
VendorName string `json:"vendor_name"`
|
||||
MemoryPhysicalSizeBytes uint64 `json:"memory_physical_size_byte"`
|
||||
} `json:"device_list"`
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(stdout.Bytes(), &result); err != nil {
|
||||
log.Debug().Err(err).Msg("failed to parse xpu-smi discovery output")
|
||||
return nil
|
||||
}
|
||||
|
||||
var gpus []GPUMemoryInfo
|
||||
|
||||
for _, device := range result.DeviceList {
|
||||
// Get memory usage for this device
|
||||
statsCmd := exec.Command("xpu-smi", "stats", "-d", strconv.Itoa(device.DeviceID), "--json")
|
||||
|
||||
var statsStdout bytes.Buffer
|
||||
statsCmd.Stdout = &statsStdout
|
||||
|
||||
usedBytes := uint64(0)
|
||||
if err := statsCmd.Run(); err == nil {
|
||||
var stats struct {
|
||||
DeviceID int `json:"device_id"`
|
||||
MemoryUsed uint64 `json:"memory_used"`
|
||||
}
|
||||
if err := json.Unmarshal(statsStdout.Bytes(), &stats); err == nil {
|
||||
usedBytes = stats.MemoryUsed
|
||||
}
|
||||
}
|
||||
|
||||
totalBytes := device.MemoryPhysicalSizeBytes
|
||||
freeBytes := uint64(0)
|
||||
if totalBytes > usedBytes {
|
||||
freeBytes = totalBytes - usedBytes
|
||||
}
|
||||
|
||||
usagePercent := 0.0
|
||||
if totalBytes > 0 {
|
||||
usagePercent = float64(usedBytes) / float64(totalBytes) * 100
|
||||
}
|
||||
|
||||
gpus = append(gpus, GPUMemoryInfo{
|
||||
Index: device.DeviceID,
|
||||
Name: device.DeviceName,
|
||||
Vendor: VendorIntel,
|
||||
TotalVRAM: totalBytes,
|
||||
UsedVRAM: usedBytes,
|
||||
FreeVRAM: freeBytes,
|
||||
UsagePercent: usagePercent,
|
||||
})
|
||||
}
|
||||
|
||||
return gpus
|
||||
}
|
||||
|
||||
// getIntelGPUTop queries Intel GPUs using intel_gpu_top
|
||||
func getIntelGPUTop() []GPUMemoryInfo {
|
||||
if _, err := exec.LookPath("intel_gpu_top"); err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// intel_gpu_top with -J outputs JSON, -s 1 for single sample
|
||||
cmd := exec.Command("intel_gpu_top", "-J", "-s", "1")
|
||||
|
||||
var stdout, stderr bytes.Buffer
|
||||
cmd.Stdout = &stdout
|
||||
cmd.Stderr = &stderr
|
||||
|
||||
if err := cmd.Run(); err != nil {
|
||||
log.Debug().Err(err).Str("stderr", stderr.String()).Msg("intel_gpu_top failed")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Parse JSON output - intel_gpu_top outputs NDJSON
|
||||
lines := strings.Split(strings.TrimSpace(stdout.String()), "\n")
|
||||
if len(lines) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Take the last complete JSON object
|
||||
var lastJSON string
|
||||
for i := len(lines) - 1; i >= 0; i-- {
|
||||
if strings.HasPrefix(strings.TrimSpace(lines[i]), "{") {
|
||||
lastJSON = lines[i]
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if lastJSON == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
var result struct {
|
||||
Engines map[string]interface{} `json:"engines"`
|
||||
// Memory info if available
|
||||
}
|
||||
|
||||
if err := json.Unmarshal([]byte(lastJSON), &result); err != nil {
|
||||
log.Debug().Err(err).Msg("failed to parse intel_gpu_top output")
|
||||
return nil
|
||||
}
|
||||
|
||||
// intel_gpu_top doesn't always provide memory info
|
||||
// Return empty if we can't get useful data
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetSystemRAMInfo returns real-time system RAM usage
|
||||
func GetSystemRAMInfo() (*SystemRAMInfo, error) {
|
||||
memory, err := ghw.Memory()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
total := uint64(memory.TotalUsableBytes)
|
||||
|
||||
// Try to get more accurate memory info from /proc/meminfo on Linux
|
||||
used, available, free := getDetailedMemoryInfo(total)
|
||||
|
||||
usagePercent := 0.0
|
||||
if total > 0 {
|
||||
usagePercent = float64(used) / float64(total) * 100
|
||||
}
|
||||
|
||||
return &SystemRAMInfo{
|
||||
Total: total,
|
||||
Used: used,
|
||||
Free: free,
|
||||
Available: available,
|
||||
UsagePercent: usagePercent,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// getDetailedMemoryInfo tries to get detailed memory info from /proc/meminfo on Linux
|
||||
// Returns used, available, and free memory in bytes
|
||||
func getDetailedMemoryInfo(total uint64) (used, available, free uint64) {
|
||||
// Try to read /proc/meminfo for more accurate data
|
||||
cmd := exec.Command("cat", "/proc/meminfo")
|
||||
var stdout bytes.Buffer
|
||||
cmd.Stdout = &stdout
|
||||
|
||||
if err := cmd.Run(); err != nil {
|
||||
// Fallback: assume all memory is available
|
||||
return 0, total, total
|
||||
}
|
||||
|
||||
lines := strings.Split(stdout.String(), "\n")
|
||||
memInfo := make(map[string]uint64)
|
||||
|
||||
for _, line := range lines {
|
||||
parts := strings.Fields(line)
|
||||
if len(parts) < 2 {
|
||||
continue
|
||||
}
|
||||
key := strings.TrimSuffix(parts[0], ":")
|
||||
value, err := strconv.ParseUint(parts[1], 10, 64)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
// Values in /proc/meminfo are in kB
|
||||
memInfo[key] = value * 1024
|
||||
}
|
||||
|
||||
// Get MemAvailable if present (preferred), otherwise calculate from free + buffers + cached
|
||||
if avail, ok := memInfo["MemAvailable"]; ok {
|
||||
available = avail
|
||||
} else {
|
||||
available = memInfo["MemFree"] + memInfo["Buffers"] + memInfo["Cached"]
|
||||
}
|
||||
|
||||
free = memInfo["MemFree"]
|
||||
|
||||
// Calculate used memory
|
||||
if total > available {
|
||||
used = total - available
|
||||
} else {
|
||||
used = 0
|
||||
}
|
||||
|
||||
return used, available, free
|
||||
}
|
||||
|
||||
// GetResourceInfo returns GPU info if available, otherwise system RAM info
|
||||
func GetResourceInfo() ResourceInfo {
|
||||
gpus := GetGPUMemoryUsage()
|
||||
|
||||
if len(gpus) > 0 {
|
||||
// GPU available - return GPU info
|
||||
aggregate := GetGPUAggregateInfo()
|
||||
return ResourceInfo{
|
||||
Type: "gpu",
|
||||
Available: true,
|
||||
GPUs: gpus,
|
||||
RAM: nil,
|
||||
Aggregate: AggregateMemoryInfo{
|
||||
TotalMemory: aggregate.TotalVRAM,
|
||||
UsedMemory: aggregate.UsedVRAM,
|
||||
FreeMemory: aggregate.FreeVRAM,
|
||||
UsagePercent: aggregate.UsagePercent,
|
||||
GPUCount: aggregate.GPUCount,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// No GPU - fall back to system RAM
|
||||
ramInfo, err := GetSystemRAMInfo()
|
||||
if err != nil {
|
||||
log.Debug().Err(err).Msg("failed to get system RAM info")
|
||||
return ResourceInfo{
|
||||
Type: "ram",
|
||||
Available: false,
|
||||
Aggregate: AggregateMemoryInfo{},
|
||||
}
|
||||
}
|
||||
|
||||
return ResourceInfo{
|
||||
Type: "ram",
|
||||
Available: true,
|
||||
GPUs: nil,
|
||||
RAM: ramInfo,
|
||||
Aggregate: AggregateMemoryInfo{
|
||||
TotalMemory: ramInfo.Total,
|
||||
UsedMemory: ramInfo.Used,
|
||||
FreeMemory: ramInfo.Free,
|
||||
UsagePercent: ramInfo.UsagePercent,
|
||||
GPUCount: 0,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// GetResourceAggregateInfo returns aggregate memory info (GPU if available, otherwise RAM)
|
||||
// This is used by the memory reclaimer to check memory usage
|
||||
func GetResourceAggregateInfo() AggregateMemoryInfo {
|
||||
resourceInfo := GetResourceInfo()
|
||||
return resourceInfo.Aggregate
|
||||
}
|
||||
|
||||
// getVulkanGPUMemory queries GPUs using vulkaninfo as a fallback
|
||||
// Note: Vulkan provides memory heap info but not real-time usage
|
||||
func getVulkanGPUMemory() []GPUMemoryInfo {
|
||||
if _, err := exec.LookPath("vulkaninfo"); err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
cmd := exec.Command("vulkaninfo", "--json")
|
||||
|
||||
var stdout, stderr bytes.Buffer
|
||||
cmd.Stdout = &stdout
|
||||
cmd.Stderr = &stderr
|
||||
|
||||
if err := cmd.Run(); err != nil {
|
||||
log.Debug().Err(err).Str("stderr", stderr.String()).Msg("vulkaninfo failed")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Parse Vulkan JSON output
|
||||
var result struct {
|
||||
VkPhysicalDevices []struct {
|
||||
DeviceName string `json:"deviceName"`
|
||||
DeviceType string `json:"deviceType"`
|
||||
VkPhysicalDeviceMemoryProperties struct {
|
||||
MemoryHeaps []struct {
|
||||
Flags int `json:"flags"`
|
||||
Size uint64 `json:"size"`
|
||||
} `json:"memoryHeaps"`
|
||||
} `json:"VkPhysicalDeviceMemoryProperties"`
|
||||
} `json:"VkPhysicalDevices"`
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(stdout.Bytes(), &result); err != nil {
|
||||
log.Debug().Err(err).Msg("failed to parse vulkaninfo output")
|
||||
return nil
|
||||
}
|
||||
|
||||
var gpus []GPUMemoryInfo
|
||||
|
||||
for i, device := range result.VkPhysicalDevices {
|
||||
// Skip non-discrete/integrated GPUs if possible
|
||||
if device.DeviceType == "VK_PHYSICAL_DEVICE_TYPE_CPU" {
|
||||
continue
|
||||
}
|
||||
|
||||
// Sum up device-local memory heaps
|
||||
var totalVRAM uint64
|
||||
for _, heap := range device.VkPhysicalDeviceMemoryProperties.MemoryHeaps {
|
||||
// Flag 1 = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT
|
||||
if heap.Flags&1 != 0 {
|
||||
totalVRAM += heap.Size
|
||||
}
|
||||
}
|
||||
|
||||
if totalVRAM == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
gpus = append(gpus, GPUMemoryInfo{
|
||||
Index: i,
|
||||
Name: device.DeviceName,
|
||||
Vendor: VendorVulkan,
|
||||
TotalVRAM: totalVRAM,
|
||||
UsedVRAM: 0, // Vulkan doesn't provide real-time usage
|
||||
FreeVRAM: totalVRAM,
|
||||
UsagePercent: 0,
|
||||
})
|
||||
}
|
||||
|
||||
return gpus
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user