feat(watchdog): add Memory resource reclaimer (#7583)

* feat(watchdog): add GPU reclaimer

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Handle vram calculation for unified memory devices

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Support RAM eviction, set watchdog interval from runtime settings

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto
2025-12-16 09:15:18 +01:00
committed by GitHub
parent dbd25885c3
commit 50f9c9a058
18 changed files with 2621 additions and 312 deletions
+12 -28
View File
@@ -185,33 +185,6 @@ func readExternalBackendsJson(startupAppConfig config.ApplicationConfig) fileHan
return handler
}
type runtimeSettings struct {
WatchdogEnabled *bool `json:"watchdog_enabled,omitempty"`
WatchdogIdleEnabled *bool `json:"watchdog_idle_enabled,omitempty"`
WatchdogBusyEnabled *bool `json:"watchdog_busy_enabled,omitempty"`
WatchdogIdleTimeout *string `json:"watchdog_idle_timeout,omitempty"`
WatchdogBusyTimeout *string `json:"watchdog_busy_timeout,omitempty"`
SingleBackend *bool `json:"single_backend,omitempty"` // Deprecated: use MaxActiveBackends = 1 instead
MaxActiveBackends *int `json:"max_active_backends,omitempty"` // Maximum number of active backends (0 = unlimited, 1 = single backend mode)
ParallelBackendRequests *bool `json:"parallel_backend_requests,omitempty"`
Threads *int `json:"threads,omitempty"`
ContextSize *int `json:"context_size,omitempty"`
F16 *bool `json:"f16,omitempty"`
Debug *bool `json:"debug,omitempty"`
CORS *bool `json:"cors,omitempty"`
CSRF *bool `json:"csrf,omitempty"`
CORSAllowOrigins *string `json:"cors_allow_origins,omitempty"`
P2PToken *string `json:"p2p_token,omitempty"`
P2PNetworkID *string `json:"p2p_network_id,omitempty"`
Federated *bool `json:"federated,omitempty"`
Galleries *[]config.Gallery `json:"galleries,omitempty"`
BackendGalleries *[]config.Gallery `json:"backend_galleries,omitempty"`
AutoloadGalleries *bool `json:"autoload_galleries,omitempty"`
AutoloadBackendGalleries *bool `json:"autoload_backend_galleries,omitempty"`
ApiKeys *[]string `json:"api_keys,omitempty"`
AgentJobRetentionDays *int `json:"agent_job_retention_days,omitempty"`
}
func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHandler {
handler := func(fileContent []byte, appConfig *config.ApplicationConfig) error {
log.Debug().Msg("processing runtime_settings.json")
@@ -227,6 +200,8 @@ func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHand
envSingleBackend := appConfig.SingleBackend == startupAppConfig.SingleBackend
envMaxActiveBackends := appConfig.MaxActiveBackends == startupAppConfig.MaxActiveBackends
envParallelRequests := appConfig.ParallelBackendRequests == startupAppConfig.ParallelBackendRequests
envMemoryReclaimerEnabled := appConfig.MemoryReclaimerEnabled == startupAppConfig.MemoryReclaimerEnabled
envMemoryReclaimerThreshold := appConfig.MemoryReclaimerThreshold == startupAppConfig.MemoryReclaimerThreshold
envThreads := appConfig.Threads == startupAppConfig.Threads
envContextSize := appConfig.ContextSize == startupAppConfig.ContextSize
envF16 := appConfig.F16 == startupAppConfig.F16
@@ -242,7 +217,7 @@ func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHand
envAgentJobRetentionDays := appConfig.AgentJobRetentionDays == startupAppConfig.AgentJobRetentionDays
if len(fileContent) > 0 {
var settings runtimeSettings
var settings config.RuntimeSettings
err := json.Unmarshal(fileContent, &settings)
if err != nil {
return err
@@ -294,6 +269,15 @@ func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHand
if settings.ParallelBackendRequests != nil && !envParallelRequests {
appConfig.ParallelBackendRequests = *settings.ParallelBackendRequests
}
if settings.MemoryReclaimerEnabled != nil && !envMemoryReclaimerEnabled {
appConfig.MemoryReclaimerEnabled = *settings.MemoryReclaimerEnabled
if appConfig.MemoryReclaimerEnabled {
appConfig.WatchDog = true // Memory reclaimer requires watchdog
}
}
if settings.MemoryReclaimerThreshold != nil && !envMemoryReclaimerThreshold {
appConfig.MemoryReclaimerThreshold = *settings.MemoryReclaimerThreshold
}
if settings.Threads != nil && !envThreads {
appConfig.Threads = *settings.Threads
}
+41 -21
View File
@@ -218,17 +218,7 @@ func loadRuntimeSettingsFromFile(options *config.ApplicationConfig) {
return
}
var settings struct {
WatchdogEnabled *bool `json:"watchdog_enabled,omitempty"`
WatchdogIdleEnabled *bool `json:"watchdog_idle_enabled,omitempty"`
WatchdogBusyEnabled *bool `json:"watchdog_busy_enabled,omitempty"`
WatchdogIdleTimeout *string `json:"watchdog_idle_timeout,omitempty"`
WatchdogBusyTimeout *string `json:"watchdog_busy_timeout,omitempty"`
SingleBackend *bool `json:"single_backend,omitempty"` // Deprecated: use MaxActiveBackends = 1 instead
MaxActiveBackends *int `json:"max_active_backends,omitempty"` // Maximum number of active backends (0 = unlimited)
ParallelBackendRequests *bool `json:"parallel_backend_requests,omitempty"`
AgentJobRetentionDays *int `json:"agent_job_retention_days,omitempty"`
}
var settings config.RuntimeSettings
if err := json.Unmarshal(fileContent, &settings); err != nil {
log.Warn().Err(err).Msg("failed to parse runtime_settings.json")
@@ -281,6 +271,16 @@ func loadRuntimeSettingsFromFile(options *config.ApplicationConfig) {
}
}
}
if settings.WatchdogInterval != nil {
if options.WatchDogInterval == 0 {
dur, err := time.ParseDuration(*settings.WatchdogInterval)
if err == nil {
options.WatchDogInterval = dur
} else {
log.Warn().Err(err).Str("interval", *settings.WatchdogInterval).Msg("invalid watchdog interval in runtime_settings.json")
}
}
}
// Handle MaxActiveBackends (new) and SingleBackend (deprecated)
if settings.MaxActiveBackends != nil {
// Only apply if current value is default (0), suggesting it wasn't set from env var
@@ -303,6 +303,21 @@ func loadRuntimeSettingsFromFile(options *config.ApplicationConfig) {
options.ParallelBackendRequests = *settings.ParallelBackendRequests
}
}
if settings.MemoryReclaimerEnabled != nil {
// Only apply if current value is default (false), suggesting it wasn't set from env var
if !options.MemoryReclaimerEnabled {
options.MemoryReclaimerEnabled = *settings.MemoryReclaimerEnabled
if options.MemoryReclaimerEnabled {
options.WatchDog = true // Memory reclaimer requires watchdog
}
}
}
if settings.MemoryReclaimerThreshold != nil {
// Only apply if current value is default (0), suggesting it wasn't set from env var
if options.MemoryReclaimerThreshold == 0 {
options.MemoryReclaimerThreshold = *settings.MemoryReclaimerThreshold
}
}
if settings.AgentJobRetentionDays != nil {
// Only apply if current value is default (0), suggesting it wasn't set from env var
if options.AgentJobRetentionDays == 0 {
@@ -323,19 +338,24 @@ func initializeWatchdog(application *Application, options *config.ApplicationCon
// Get effective max active backends (considers both MaxActiveBackends and deprecated SingleBackend)
lruLimit := options.GetEffectiveMaxActiveBackends()
// Create watchdog if enabled OR if LRU limit is set
if options.WatchDog || lruLimit > 0 {
// Create watchdog if enabled OR if LRU limit is set OR if memory reclaimer is enabled
if options.WatchDog || lruLimit > 0 || options.MemoryReclaimerEnabled {
wd := model.NewWatchDog(
application.ModelLoader(),
options.WatchDogBusyTimeout,
options.WatchDogIdleTimeout,
options.WatchDogBusy,
options.WatchDogIdle,
lruLimit)
model.WithProcessManager(application.ModelLoader()),
model.WithBusyTimeout(options.WatchDogBusyTimeout),
model.WithIdleTimeout(options.WatchDogIdleTimeout),
model.WithWatchdogInterval(options.WatchDogInterval),
model.WithBusyCheck(options.WatchDogBusy),
model.WithIdleCheck(options.WatchDogIdle),
model.WithLRULimit(lruLimit),
model.WithMemoryReclaimer(options.MemoryReclaimerEnabled, options.MemoryReclaimerThreshold),
)
application.ModelLoader().SetWatchDog(wd)
// Start watchdog goroutine only if busy/idle checks are enabled
if options.WatchDogBusy || options.WatchDogIdle {
// Start watchdog goroutine if any periodic checks are enabled
// LRU eviction doesn't need the Run() loop - it's triggered on model load
// But memory reclaimer needs the Run() loop for periodic checking
if options.WatchDogBusy || options.WatchDogIdle || options.MemoryReclaimerEnabled {
go wd.Run()
}
+22 -11
View File
@@ -23,24 +23,28 @@ func (a *Application) startWatchdog() error {
// Get effective max active backends (considers both MaxActiveBackends and deprecated SingleBackend)
lruLimit := appConfig.GetEffectiveMaxActiveBackends()
// Create watchdog if enabled OR if LRU limit is set
// Create watchdog if enabled OR if LRU limit is set OR if memory reclaimer is enabled
// LRU eviction requires watchdog infrastructure even without busy/idle checks
if appConfig.WatchDog || lruLimit > 0 {
if appConfig.WatchDog || lruLimit > 0 || appConfig.MemoryReclaimerEnabled {
wd := model.NewWatchDog(
a.modelLoader,
appConfig.WatchDogBusyTimeout,
appConfig.WatchDogIdleTimeout,
appConfig.WatchDogBusy,
appConfig.WatchDogIdle,
lruLimit)
model.WithProcessManager(a.modelLoader),
model.WithBusyTimeout(appConfig.WatchDogBusyTimeout),
model.WithIdleTimeout(appConfig.WatchDogIdleTimeout),
model.WithWatchdogInterval(appConfig.WatchDogInterval),
model.WithBusyCheck(appConfig.WatchDogBusy),
model.WithIdleCheck(appConfig.WatchDogIdle),
model.WithLRULimit(lruLimit),
model.WithMemoryReclaimer(appConfig.MemoryReclaimerEnabled, appConfig.MemoryReclaimerThreshold),
)
a.modelLoader.SetWatchDog(wd)
// Create new stop channel
a.watchdogStop = make(chan bool, 1)
// Start watchdog goroutine only if busy/idle checks are enabled
// Start watchdog goroutine if any periodic checks are enabled
// LRU eviction doesn't need the Run() loop - it's triggered on model load
if appConfig.WatchDogBusy || appConfig.WatchDogIdle {
// But memory reclaimer needs the Run() loop for periodic checking
if appConfig.WatchDogBusy || appConfig.WatchDogIdle || appConfig.MemoryReclaimerEnabled {
go wd.Run()
}
@@ -56,7 +60,14 @@ func (a *Application) startWatchdog() error {
}
}()
log.Info().Int("lruLimit", lruLimit).Bool("busyCheck", appConfig.WatchDogBusy).Bool("idleCheck", appConfig.WatchDogIdle).Msg("Watchdog started with new settings")
log.Info().
Int("lruLimit", lruLimit).
Bool("busyCheck", appConfig.WatchDogBusy).
Bool("idleCheck", appConfig.WatchDogIdle).
Bool("memoryReclaimer", appConfig.MemoryReclaimerEnabled).
Float64("memoryThreshold", appConfig.MemoryReclaimerThreshold).
Dur("interval", appConfig.WatchDogInterval).
Msg("Watchdog started with new settings")
} else {
log.Info().Msg("Watchdog disabled")
}
+8
View File
@@ -72,6 +72,8 @@ type RunCMD struct {
WatchdogIdleTimeout string `env:"LOCALAI_WATCHDOG_IDLE_TIMEOUT,WATCHDOG_IDLE_TIMEOUT" default:"15m" help:"Threshold beyond which an idle backend should be stopped" group:"backends"`
EnableWatchdogBusy bool `env:"LOCALAI_WATCHDOG_BUSY,WATCHDOG_BUSY" default:"false" help:"Enable watchdog for stopping backends that are busy longer than the watchdog-busy-timeout" group:"backends"`
WatchdogBusyTimeout string `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"`
EnableMemoryReclaimer bool `env:"LOCALAI_MEMORY_RECLAIMER,MEMORY_RECLAIMER,LOCALAI_GPU_RECLAIMER,GPU_RECLAIMER" default:"false" help:"Enable memory threshold monitoring to auto-evict backends when memory usage exceeds threshold (uses GPU VRAM if available, otherwise RAM)" group:"backends"`
MemoryReclaimerThreshold float64 `env:"LOCALAI_MEMORY_RECLAIMER_THRESHOLD,MEMORY_RECLAIMER_THRESHOLD,LOCALAI_GPU_RECLAIMER_THRESHOLD,GPU_RECLAIMER_THRESHOLD" default:"0.95" help:"Memory usage threshold (0.0-1.0) that triggers backend eviction (default 0.95 = 95%%)" group:"backends"`
Federated bool `env:"LOCALAI_FEDERATED,FEDERATED" help:"Enable federated instance" group:"federated"`
DisableGalleryEndpoint bool `env:"LOCALAI_DISABLE_GALLERY_ENDPOINT,DISABLE_GALLERY_ENDPOINT" help:"Disable the gallery endpoints" group:"api"`
MachineTag string `env:"LOCALAI_MACHINE_TAG,MACHINE_TAG" help:"Add Machine-Tag header to each response which is useful to track the machine in the P2P network" group:"api"`
@@ -200,6 +202,12 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
opts = append(opts, config.SetWatchDogBusyTimeout(dur))
}
}
// Handle memory reclaimer (uses GPU VRAM if available, otherwise RAM)
if r.EnableMemoryReclaimer {
opts = append(opts, config.WithMemoryReclaimer(true, r.MemoryReclaimerThreshold))
}
if r.ParallelRequests {
opts = append(opts, config.EnableParallelBackendRequests)
}
+240
View File
@@ -60,9 +60,14 @@ type ApplicationConfig struct {
WatchDogBusy bool
WatchDog bool
// Memory Reclaimer settings (works with GPU if available, otherwise RAM)
MemoryReclaimerEnabled bool // Enable memory threshold monitoring
MemoryReclaimerThreshold float64 // Threshold 0.0-1.0 (e.g., 0.95 = 95%)
ModelsURL []string
WatchDogBusyTimeout, WatchDogIdleTimeout time.Duration
WatchDogInterval time.Duration // Interval between watchdog checks
MachineTag string
@@ -187,6 +192,39 @@ func SetWatchDogIdleTimeout(t time.Duration) AppOption {
}
}
// EnableMemoryReclaimer enables memory threshold monitoring.
// When enabled, the watchdog will evict backends if memory usage exceeds the threshold.
// Works with GPU VRAM if available, otherwise uses system RAM.
var EnableMemoryReclaimer = func(o *ApplicationConfig) {
o.MemoryReclaimerEnabled = true
o.WatchDog = true // Memory reclaimer requires watchdog infrastructure
}
// SetMemoryReclaimerThreshold sets the memory usage threshold (0.0-1.0).
// When memory usage exceeds this threshold, backends will be evicted using LRU strategy.
func SetMemoryReclaimerThreshold(threshold float64) AppOption {
return func(o *ApplicationConfig) {
if threshold > 0 && threshold <= 1.0 {
o.MemoryReclaimerThreshold = threshold
o.MemoryReclaimerEnabled = true
o.WatchDog = true // Memory reclaimer requires watchdog infrastructure
}
}
}
// WithMemoryReclaimer configures the memory reclaimer with the given settings
func WithMemoryReclaimer(enabled bool, threshold float64) AppOption {
return func(o *ApplicationConfig) {
o.MemoryReclaimerEnabled = enabled
if threshold > 0 && threshold <= 1.0 {
o.MemoryReclaimerThreshold = threshold
}
if enabled {
o.WatchDog = true // Memory reclaimer requires watchdog infrastructure
}
}
}
// EnableSingleBackend is deprecated: use SetMaxActiveBackends(1) instead.
// This is kept for backward compatibility.
var EnableSingleBackend = func(o *ApplicationConfig) {
@@ -454,6 +492,208 @@ func (o *ApplicationConfig) ToConfigLoaderOptions() []ConfigLoaderOption {
}
}
// ToRuntimeSettings converts ApplicationConfig to RuntimeSettings for API responses and JSON serialization.
// This provides a single source of truth - ApplicationConfig holds the live values,
// and this method creates a RuntimeSettings snapshot for external consumption.
func (o *ApplicationConfig) ToRuntimeSettings() RuntimeSettings {
// Create local copies for pointer fields
watchdogEnabled := o.WatchDog
watchdogIdle := o.WatchDogIdle
watchdogBusy := o.WatchDogBusy
singleBackend := o.SingleBackend
maxActiveBackends := o.MaxActiveBackends
parallelBackendRequests := o.ParallelBackendRequests
memoryReclaimerEnabled := o.MemoryReclaimerEnabled
memoryReclaimerThreshold := o.MemoryReclaimerThreshold
threads := o.Threads
contextSize := o.ContextSize
f16 := o.F16
debug := o.Debug
cors := o.CORS
csrf := o.CSRF
corsAllowOrigins := o.CORSAllowOrigins
p2pToken := o.P2PToken
p2pNetworkID := o.P2PNetworkID
federated := o.Federated
galleries := o.Galleries
backendGalleries := o.BackendGalleries
autoloadGalleries := o.AutoloadGalleries
autoloadBackendGalleries := o.AutoloadBackendGalleries
apiKeys := o.ApiKeys
agentJobRetentionDays := o.AgentJobRetentionDays
// Format timeouts as strings
var idleTimeout, busyTimeout, watchdogInterval string
if o.WatchDogIdleTimeout > 0 {
idleTimeout = o.WatchDogIdleTimeout.String()
} else {
idleTimeout = "15m" // default
}
if o.WatchDogBusyTimeout > 0 {
busyTimeout = o.WatchDogBusyTimeout.String()
} else {
busyTimeout = "5m" // default
}
if o.WatchDogInterval > 0 {
watchdogInterval = o.WatchDogInterval.String()
} else {
watchdogInterval = "2s" // default
}
return RuntimeSettings{
WatchdogEnabled: &watchdogEnabled,
WatchdogIdleEnabled: &watchdogIdle,
WatchdogBusyEnabled: &watchdogBusy,
WatchdogIdleTimeout: &idleTimeout,
WatchdogBusyTimeout: &busyTimeout,
WatchdogInterval: &watchdogInterval,
SingleBackend: &singleBackend,
MaxActiveBackends: &maxActiveBackends,
ParallelBackendRequests: &parallelBackendRequests,
MemoryReclaimerEnabled: &memoryReclaimerEnabled,
MemoryReclaimerThreshold: &memoryReclaimerThreshold,
Threads: &threads,
ContextSize: &contextSize,
F16: &f16,
Debug: &debug,
CORS: &cors,
CSRF: &csrf,
CORSAllowOrigins: &corsAllowOrigins,
P2PToken: &p2pToken,
P2PNetworkID: &p2pNetworkID,
Federated: &federated,
Galleries: &galleries,
BackendGalleries: &backendGalleries,
AutoloadGalleries: &autoloadGalleries,
AutoloadBackendGalleries: &autoloadBackendGalleries,
ApiKeys: &apiKeys,
AgentJobRetentionDays: &agentJobRetentionDays,
}
}
// ApplyRuntimeSettings applies RuntimeSettings to ApplicationConfig.
// Only non-nil fields in RuntimeSettings are applied.
// Returns true if watchdog-related settings changed (requiring restart).
func (o *ApplicationConfig) ApplyRuntimeSettings(settings *RuntimeSettings) (requireRestart bool) {
if settings == nil {
return false
}
if settings.WatchdogEnabled != nil {
o.WatchDog = *settings.WatchdogEnabled
requireRestart = true
}
if settings.WatchdogIdleEnabled != nil {
o.WatchDogIdle = *settings.WatchdogIdleEnabled
if o.WatchDogIdle {
o.WatchDog = true
}
requireRestart = true
}
if settings.WatchdogBusyEnabled != nil {
o.WatchDogBusy = *settings.WatchdogBusyEnabled
if o.WatchDogBusy {
o.WatchDog = true
}
requireRestart = true
}
if settings.WatchdogIdleTimeout != nil {
if dur, err := time.ParseDuration(*settings.WatchdogIdleTimeout); err == nil {
o.WatchDogIdleTimeout = dur
requireRestart = true
}
}
if settings.WatchdogBusyTimeout != nil {
if dur, err := time.ParseDuration(*settings.WatchdogBusyTimeout); err == nil {
o.WatchDogBusyTimeout = dur
requireRestart = true
}
}
if settings.WatchdogInterval != nil {
if dur, err := time.ParseDuration(*settings.WatchdogInterval); err == nil {
o.WatchDogInterval = dur
requireRestart = true
}
}
if settings.MaxActiveBackends != nil {
o.MaxActiveBackends = *settings.MaxActiveBackends
o.SingleBackend = (*settings.MaxActiveBackends == 1)
requireRestart = true
} else if settings.SingleBackend != nil {
o.SingleBackend = *settings.SingleBackend
if *settings.SingleBackend {
o.MaxActiveBackends = 1
} else {
o.MaxActiveBackends = 0
}
requireRestart = true
}
if settings.ParallelBackendRequests != nil {
o.ParallelBackendRequests = *settings.ParallelBackendRequests
}
if settings.MemoryReclaimerEnabled != nil {
o.MemoryReclaimerEnabled = *settings.MemoryReclaimerEnabled
if *settings.MemoryReclaimerEnabled {
o.WatchDog = true
}
requireRestart = true
}
if settings.MemoryReclaimerThreshold != nil {
if *settings.MemoryReclaimerThreshold > 0 && *settings.MemoryReclaimerThreshold <= 1.0 {
o.MemoryReclaimerThreshold = *settings.MemoryReclaimerThreshold
requireRestart = true
}
}
if settings.Threads != nil {
o.Threads = *settings.Threads
}
if settings.ContextSize != nil {
o.ContextSize = *settings.ContextSize
}
if settings.F16 != nil {
o.F16 = *settings.F16
}
if settings.Debug != nil {
o.Debug = *settings.Debug
}
if settings.CORS != nil {
o.CORS = *settings.CORS
}
if settings.CSRF != nil {
o.CSRF = *settings.CSRF
}
if settings.CORSAllowOrigins != nil {
o.CORSAllowOrigins = *settings.CORSAllowOrigins
}
if settings.P2PToken != nil {
o.P2PToken = *settings.P2PToken
}
if settings.P2PNetworkID != nil {
o.P2PNetworkID = *settings.P2PNetworkID
}
if settings.Federated != nil {
o.Federated = *settings.Federated
}
if settings.Galleries != nil {
o.Galleries = *settings.Galleries
}
if settings.BackendGalleries != nil {
o.BackendGalleries = *settings.BackendGalleries
}
if settings.AutoloadGalleries != nil {
o.AutoloadGalleries = *settings.AutoloadGalleries
}
if settings.AutoloadBackendGalleries != nil {
o.AutoloadBackendGalleries = *settings.AutoloadBackendGalleries
}
if settings.AgentJobRetentionDays != nil {
o.AgentJobRetentionDays = *settings.AgentJobRetentionDays
}
// Note: ApiKeys requires special handling (merging with startup keys) - handled in caller
return requireRestart
}
// func WithMetrics(meter *metrics.Metrics) AppOption {
// return func(o *StartupOptions) {
// o.Metrics = meter
+577
View File
@@ -0,0 +1,577 @@
package config
import (
"time"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("ApplicationConfig RuntimeSettings Conversion", func() {
Describe("ToRuntimeSettings", func() {
It("should convert all fields correctly", func() {
appConfig := &ApplicationConfig{
WatchDog: true,
WatchDogIdle: true,
WatchDogBusy: true,
WatchDogIdleTimeout: 20 * time.Minute,
WatchDogBusyTimeout: 10 * time.Minute,
SingleBackend: false,
MaxActiveBackends: 5,
ParallelBackendRequests: true,
MemoryReclaimerEnabled: true,
MemoryReclaimerThreshold: 0.85,
Threads: 8,
ContextSize: 4096,
F16: true,
Debug: true,
CORS: true,
CSRF: true,
CORSAllowOrigins: "https://example.com",
P2PToken: "test-token",
P2PNetworkID: "test-network",
Federated: true,
Galleries: []Gallery{{Name: "test-gallery", URL: "https://example.com"}},
BackendGalleries: []Gallery{{Name: "backend-gallery", URL: "https://example.com/backend"}},
AutoloadGalleries: true,
AutoloadBackendGalleries: true,
ApiKeys: []string{"key1", "key2"},
AgentJobRetentionDays: 30,
}
rs := appConfig.ToRuntimeSettings()
Expect(rs.WatchdogEnabled).ToNot(BeNil())
Expect(*rs.WatchdogEnabled).To(BeTrue())
Expect(rs.WatchdogIdleEnabled).ToNot(BeNil())
Expect(*rs.WatchdogIdleEnabled).To(BeTrue())
Expect(rs.WatchdogBusyEnabled).ToNot(BeNil())
Expect(*rs.WatchdogBusyEnabled).To(BeTrue())
Expect(rs.WatchdogIdleTimeout).ToNot(BeNil())
Expect(*rs.WatchdogIdleTimeout).To(Equal("20m0s"))
Expect(rs.WatchdogBusyTimeout).ToNot(BeNil())
Expect(*rs.WatchdogBusyTimeout).To(Equal("10m0s"))
Expect(rs.SingleBackend).ToNot(BeNil())
Expect(*rs.SingleBackend).To(BeFalse())
Expect(rs.MaxActiveBackends).ToNot(BeNil())
Expect(*rs.MaxActiveBackends).To(Equal(5))
Expect(rs.ParallelBackendRequests).ToNot(BeNil())
Expect(*rs.ParallelBackendRequests).To(BeTrue())
Expect(rs.MemoryReclaimerEnabled).ToNot(BeNil())
Expect(*rs.MemoryReclaimerEnabled).To(BeTrue())
Expect(rs.MemoryReclaimerThreshold).ToNot(BeNil())
Expect(*rs.MemoryReclaimerThreshold).To(Equal(0.85))
Expect(rs.Threads).ToNot(BeNil())
Expect(*rs.Threads).To(Equal(8))
Expect(rs.ContextSize).ToNot(BeNil())
Expect(*rs.ContextSize).To(Equal(4096))
Expect(rs.F16).ToNot(BeNil())
Expect(*rs.F16).To(BeTrue())
Expect(rs.Debug).ToNot(BeNil())
Expect(*rs.Debug).To(BeTrue())
Expect(rs.CORS).ToNot(BeNil())
Expect(*rs.CORS).To(BeTrue())
Expect(rs.CSRF).ToNot(BeNil())
Expect(*rs.CSRF).To(BeTrue())
Expect(rs.CORSAllowOrigins).ToNot(BeNil())
Expect(*rs.CORSAllowOrigins).To(Equal("https://example.com"))
Expect(rs.P2PToken).ToNot(BeNil())
Expect(*rs.P2PToken).To(Equal("test-token"))
Expect(rs.P2PNetworkID).ToNot(BeNil())
Expect(*rs.P2PNetworkID).To(Equal("test-network"))
Expect(rs.Federated).ToNot(BeNil())
Expect(*rs.Federated).To(BeTrue())
Expect(rs.Galleries).ToNot(BeNil())
Expect(*rs.Galleries).To(HaveLen(1))
Expect((*rs.Galleries)[0].Name).To(Equal("test-gallery"))
Expect(rs.BackendGalleries).ToNot(BeNil())
Expect(*rs.BackendGalleries).To(HaveLen(1))
Expect((*rs.BackendGalleries)[0].Name).To(Equal("backend-gallery"))
Expect(rs.AutoloadGalleries).ToNot(BeNil())
Expect(*rs.AutoloadGalleries).To(BeTrue())
Expect(rs.AutoloadBackendGalleries).ToNot(BeNil())
Expect(*rs.AutoloadBackendGalleries).To(BeTrue())
Expect(rs.ApiKeys).ToNot(BeNil())
Expect(*rs.ApiKeys).To(HaveLen(2))
Expect(*rs.ApiKeys).To(ContainElements("key1", "key2"))
Expect(rs.AgentJobRetentionDays).ToNot(BeNil())
Expect(*rs.AgentJobRetentionDays).To(Equal(30))
})
It("should use default timeouts when not set", func() {
appConfig := &ApplicationConfig{}
rs := appConfig.ToRuntimeSettings()
Expect(rs.WatchdogIdleTimeout).ToNot(BeNil())
Expect(*rs.WatchdogIdleTimeout).To(Equal("15m"))
Expect(rs.WatchdogBusyTimeout).ToNot(BeNil())
Expect(*rs.WatchdogBusyTimeout).To(Equal("5m"))
})
})
Describe("ApplyRuntimeSettings", func() {
It("should return false when settings is nil", func() {
appConfig := &ApplicationConfig{}
changed := appConfig.ApplyRuntimeSettings(nil)
Expect(changed).To(BeFalse())
})
It("should only apply non-nil fields", func() {
appConfig := &ApplicationConfig{
WatchDog: false,
Threads: 4,
ContextSize: 2048,
}
watchdogEnabled := true
rs := &RuntimeSettings{
WatchdogEnabled: &watchdogEnabled,
// Leave other fields nil
}
changed := appConfig.ApplyRuntimeSettings(rs)
Expect(changed).To(BeTrue())
Expect(appConfig.WatchDog).To(BeTrue())
// Unchanged fields should remain
Expect(appConfig.Threads).To(Equal(4))
Expect(appConfig.ContextSize).To(Equal(2048))
})
It("should apply watchdog settings and return changed=true", func() {
appConfig := &ApplicationConfig{}
watchdogEnabled := true
watchdogIdle := true
watchdogBusy := true
idleTimeout := "30m"
busyTimeout := "15m"
rs := &RuntimeSettings{
WatchdogEnabled: &watchdogEnabled,
WatchdogIdleEnabled: &watchdogIdle,
WatchdogBusyEnabled: &watchdogBusy,
WatchdogIdleTimeout: &idleTimeout,
WatchdogBusyTimeout: &busyTimeout,
}
changed := appConfig.ApplyRuntimeSettings(rs)
Expect(changed).To(BeTrue())
Expect(appConfig.WatchDog).To(BeTrue())
Expect(appConfig.WatchDogIdle).To(BeTrue())
Expect(appConfig.WatchDogBusy).To(BeTrue())
Expect(appConfig.WatchDogIdleTimeout).To(Equal(30 * time.Minute))
Expect(appConfig.WatchDogBusyTimeout).To(Equal(15 * time.Minute))
})
It("should enable watchdog when idle is enabled", func() {
appConfig := &ApplicationConfig{WatchDog: false}
watchdogIdle := true
rs := &RuntimeSettings{
WatchdogIdleEnabled: &watchdogIdle,
}
appConfig.ApplyRuntimeSettings(rs)
Expect(appConfig.WatchDog).To(BeTrue())
Expect(appConfig.WatchDogIdle).To(BeTrue())
})
It("should enable watchdog when busy is enabled", func() {
appConfig := &ApplicationConfig{WatchDog: false}
watchdogBusy := true
rs := &RuntimeSettings{
WatchdogBusyEnabled: &watchdogBusy,
}
appConfig.ApplyRuntimeSettings(rs)
Expect(appConfig.WatchDog).To(BeTrue())
Expect(appConfig.WatchDogBusy).To(BeTrue())
})
It("should handle MaxActiveBackends and update SingleBackend accordingly", func() {
appConfig := &ApplicationConfig{}
maxBackends := 1
rs := &RuntimeSettings{
MaxActiveBackends: &maxBackends,
}
appConfig.ApplyRuntimeSettings(rs)
Expect(appConfig.MaxActiveBackends).To(Equal(1))
Expect(appConfig.SingleBackend).To(BeTrue())
// Test with multiple backends
maxBackends = 5
rs = &RuntimeSettings{
MaxActiveBackends: &maxBackends,
}
appConfig.ApplyRuntimeSettings(rs)
Expect(appConfig.MaxActiveBackends).To(Equal(5))
Expect(appConfig.SingleBackend).To(BeFalse())
})
It("should handle SingleBackend and update MaxActiveBackends accordingly", func() {
appConfig := &ApplicationConfig{}
singleBackend := true
rs := &RuntimeSettings{
SingleBackend: &singleBackend,
}
appConfig.ApplyRuntimeSettings(rs)
Expect(appConfig.SingleBackend).To(BeTrue())
Expect(appConfig.MaxActiveBackends).To(Equal(1))
// Test disabling single backend
singleBackend = false
rs = &RuntimeSettings{
SingleBackend: &singleBackend,
}
appConfig.ApplyRuntimeSettings(rs)
Expect(appConfig.SingleBackend).To(BeFalse())
Expect(appConfig.MaxActiveBackends).To(Equal(0))
})
It("should enable watchdog when memory reclaimer is enabled", func() {
appConfig := &ApplicationConfig{WatchDog: false}
memoryEnabled := true
threshold := 0.90
rs := &RuntimeSettings{
MemoryReclaimerEnabled: &memoryEnabled,
MemoryReclaimerThreshold: &threshold,
}
changed := appConfig.ApplyRuntimeSettings(rs)
Expect(changed).To(BeTrue())
Expect(appConfig.WatchDog).To(BeTrue())
Expect(appConfig.MemoryReclaimerEnabled).To(BeTrue())
Expect(appConfig.MemoryReclaimerThreshold).To(Equal(0.90))
})
It("should reject invalid memory threshold values", func() {
appConfig := &ApplicationConfig{MemoryReclaimerThreshold: 0.50}
// Test threshold > 1.0
invalidThreshold := 1.5
rs := &RuntimeSettings{
MemoryReclaimerThreshold: &invalidThreshold,
}
appConfig.ApplyRuntimeSettings(rs)
Expect(appConfig.MemoryReclaimerThreshold).To(Equal(0.50)) // Should remain unchanged
// Test threshold <= 0
invalidThreshold = 0.0
rs = &RuntimeSettings{
MemoryReclaimerThreshold: &invalidThreshold,
}
appConfig.ApplyRuntimeSettings(rs)
Expect(appConfig.MemoryReclaimerThreshold).To(Equal(0.50)) // Should remain unchanged
// Test negative threshold
invalidThreshold = -0.5
rs = &RuntimeSettings{
MemoryReclaimerThreshold: &invalidThreshold,
}
appConfig.ApplyRuntimeSettings(rs)
Expect(appConfig.MemoryReclaimerThreshold).To(Equal(0.50)) // Should remain unchanged
})
It("should accept valid memory threshold at boundary", func() {
appConfig := &ApplicationConfig{}
// Test threshold = 1.0 (maximum valid)
threshold := 1.0
rs := &RuntimeSettings{
MemoryReclaimerThreshold: &threshold,
}
appConfig.ApplyRuntimeSettings(rs)
Expect(appConfig.MemoryReclaimerThreshold).To(Equal(1.0))
// Test threshold just above 0
threshold = 0.01
rs = &RuntimeSettings{
MemoryReclaimerThreshold: &threshold,
}
appConfig.ApplyRuntimeSettings(rs)
Expect(appConfig.MemoryReclaimerThreshold).To(Equal(0.01))
})
It("should apply performance settings without triggering watchdog change", func() {
appConfig := &ApplicationConfig{}
threads := 16
contextSize := 8192
f16 := true
debug := true
rs := &RuntimeSettings{
Threads: &threads,
ContextSize: &contextSize,
F16: &f16,
Debug: &debug,
}
changed := appConfig.ApplyRuntimeSettings(rs)
// These settings don't require watchdog restart
Expect(changed).To(BeFalse())
Expect(appConfig.Threads).To(Equal(16))
Expect(appConfig.ContextSize).To(Equal(8192))
Expect(appConfig.F16).To(BeTrue())
Expect(appConfig.Debug).To(BeTrue())
})
It("should apply CORS and security settings", func() {
appConfig := &ApplicationConfig{}
cors := true
csrf := true
origins := "https://example.com,https://other.com"
rs := &RuntimeSettings{
CORS: &cors,
CSRF: &csrf,
CORSAllowOrigins: &origins,
}
appConfig.ApplyRuntimeSettings(rs)
Expect(appConfig.CORS).To(BeTrue())
Expect(appConfig.CSRF).To(BeTrue())
Expect(appConfig.CORSAllowOrigins).To(Equal("https://example.com,https://other.com"))
})
It("should apply P2P settings", func() {
appConfig := &ApplicationConfig{}
token := "p2p-test-token"
networkID := "p2p-test-network"
federated := true
rs := &RuntimeSettings{
P2PToken: &token,
P2PNetworkID: &networkID,
Federated: &federated,
}
appConfig.ApplyRuntimeSettings(rs)
Expect(appConfig.P2PToken).To(Equal("p2p-test-token"))
Expect(appConfig.P2PNetworkID).To(Equal("p2p-test-network"))
Expect(appConfig.Federated).To(BeTrue())
})
It("should apply gallery settings", func() {
appConfig := &ApplicationConfig{}
galleries := []Gallery{
{Name: "gallery1", URL: "https://gallery1.com"},
{Name: "gallery2", URL: "https://gallery2.com"},
}
backendGalleries := []Gallery{
{Name: "backend-gallery", URL: "https://backend.com"},
}
autoload := true
autoloadBackend := true
rs := &RuntimeSettings{
Galleries: &galleries,
BackendGalleries: &backendGalleries,
AutoloadGalleries: &autoload,
AutoloadBackendGalleries: &autoloadBackend,
}
appConfig.ApplyRuntimeSettings(rs)
Expect(appConfig.Galleries).To(HaveLen(2))
Expect(appConfig.Galleries[0].Name).To(Equal("gallery1"))
Expect(appConfig.BackendGalleries).To(HaveLen(1))
Expect(appConfig.AutoloadGalleries).To(BeTrue())
Expect(appConfig.AutoloadBackendGalleries).To(BeTrue())
})
It("should apply agent settings", func() {
appConfig := &ApplicationConfig{}
retentionDays := 14
rs := &RuntimeSettings{
AgentJobRetentionDays: &retentionDays,
}
appConfig.ApplyRuntimeSettings(rs)
Expect(appConfig.AgentJobRetentionDays).To(Equal(14))
})
})
Describe("Round-trip conversion", func() {
It("should maintain values through ToRuntimeSettings -> ApplyRuntimeSettings", func() {
original := &ApplicationConfig{
WatchDog: true,
WatchDogIdle: true,
WatchDogBusy: false,
WatchDogIdleTimeout: 25 * time.Minute,
WatchDogBusyTimeout: 12 * time.Minute,
SingleBackend: false,
MaxActiveBackends: 3,
ParallelBackendRequests: true,
MemoryReclaimerEnabled: true,
MemoryReclaimerThreshold: 0.92,
Threads: 12,
ContextSize: 6144,
F16: true,
Debug: false,
CORS: true,
CSRF: false,
CORSAllowOrigins: "https://test.com",
P2PToken: "round-trip-token",
P2PNetworkID: "round-trip-network",
Federated: true,
AutoloadGalleries: true,
AutoloadBackendGalleries: false,
AgentJobRetentionDays: 60,
}
// Convert to RuntimeSettings
rs := original.ToRuntimeSettings()
// Apply to a new ApplicationConfig
target := &ApplicationConfig{}
target.ApplyRuntimeSettings(&rs)
// Verify all values match
Expect(target.WatchDog).To(Equal(original.WatchDog))
Expect(target.WatchDogIdle).To(Equal(original.WatchDogIdle))
Expect(target.WatchDogBusy).To(Equal(original.WatchDogBusy))
Expect(target.WatchDogIdleTimeout).To(Equal(original.WatchDogIdleTimeout))
Expect(target.WatchDogBusyTimeout).To(Equal(original.WatchDogBusyTimeout))
Expect(target.MaxActiveBackends).To(Equal(original.MaxActiveBackends))
Expect(target.ParallelBackendRequests).To(Equal(original.ParallelBackendRequests))
Expect(target.MemoryReclaimerEnabled).To(Equal(original.MemoryReclaimerEnabled))
Expect(target.MemoryReclaimerThreshold).To(Equal(original.MemoryReclaimerThreshold))
Expect(target.Threads).To(Equal(original.Threads))
Expect(target.ContextSize).To(Equal(original.ContextSize))
Expect(target.F16).To(Equal(original.F16))
Expect(target.Debug).To(Equal(original.Debug))
Expect(target.CORS).To(Equal(original.CORS))
Expect(target.CSRF).To(Equal(original.CSRF))
Expect(target.CORSAllowOrigins).To(Equal(original.CORSAllowOrigins))
Expect(target.P2PToken).To(Equal(original.P2PToken))
Expect(target.P2PNetworkID).To(Equal(original.P2PNetworkID))
Expect(target.Federated).To(Equal(original.Federated))
Expect(target.AutoloadGalleries).To(Equal(original.AutoloadGalleries))
Expect(target.AutoloadBackendGalleries).To(Equal(original.AutoloadBackendGalleries))
Expect(target.AgentJobRetentionDays).To(Equal(original.AgentJobRetentionDays))
})
It("should handle empty galleries correctly in round-trip", func() {
original := &ApplicationConfig{
Galleries: []Gallery{},
BackendGalleries: []Gallery{},
ApiKeys: []string{},
}
rs := original.ToRuntimeSettings()
target := &ApplicationConfig{}
target.ApplyRuntimeSettings(&rs)
Expect(target.Galleries).To(BeEmpty())
Expect(target.BackendGalleries).To(BeEmpty())
})
})
Describe("Edge cases", func() {
It("should handle invalid timeout string in ApplyRuntimeSettings", func() {
appConfig := &ApplicationConfig{
WatchDogIdleTimeout: 10 * time.Minute,
}
invalidTimeout := "not-a-duration"
rs := &RuntimeSettings{
WatchdogIdleTimeout: &invalidTimeout,
}
appConfig.ApplyRuntimeSettings(rs)
// Should remain unchanged due to parse error
Expect(appConfig.WatchDogIdleTimeout).To(Equal(10 * time.Minute))
})
It("should handle zero values in ApplicationConfig", func() {
appConfig := &ApplicationConfig{
// All zero values
}
rs := appConfig.ToRuntimeSettings()
// Should still have non-nil pointers with zero/default values
Expect(rs.WatchdogEnabled).ToNot(BeNil())
Expect(*rs.WatchdogEnabled).To(BeFalse())
Expect(rs.Threads).ToNot(BeNil())
Expect(*rs.Threads).To(Equal(0))
Expect(rs.MemoryReclaimerThreshold).ToNot(BeNil())
Expect(*rs.MemoryReclaimerThreshold).To(Equal(0.0))
})
It("should prefer MaxActiveBackends over SingleBackend when both are set", func() {
appConfig := &ApplicationConfig{}
maxBackends := 3
singleBackend := true
rs := &RuntimeSettings{
MaxActiveBackends: &maxBackends,
SingleBackend: &singleBackend,
}
appConfig.ApplyRuntimeSettings(rs)
// MaxActiveBackends should take precedence
Expect(appConfig.MaxActiveBackends).To(Equal(3))
Expect(appConfig.SingleBackend).To(BeFalse()) // 3 != 1, so single backend is false
})
})
})
+56
View File
@@ -0,0 +1,56 @@
package config
// RuntimeSettings represents runtime configuration that can be changed dynamically.
// This struct is used for:
// - API responses (GET /api/settings)
// - API requests (POST /api/settings)
// - Persisting to runtime_settings.json
// - Loading from runtime_settings.json on startup
//
// All fields are pointers to distinguish between "not set" and "set to zero/false value".
type RuntimeSettings struct {
// Watchdog settings
WatchdogEnabled *bool `json:"watchdog_enabled,omitempty"`
WatchdogIdleEnabled *bool `json:"watchdog_idle_enabled,omitempty"`
WatchdogBusyEnabled *bool `json:"watchdog_busy_enabled,omitempty"`
WatchdogIdleTimeout *string `json:"watchdog_idle_timeout,omitempty"`
WatchdogBusyTimeout *string `json:"watchdog_busy_timeout,omitempty"`
WatchdogInterval *string `json:"watchdog_interval,omitempty"` // Interval between watchdog checks (e.g., 2s, 30s)
// Backend management
SingleBackend *bool `json:"single_backend,omitempty"` // Deprecated: use MaxActiveBackends = 1 instead
MaxActiveBackends *int `json:"max_active_backends,omitempty"` // Maximum number of active backends (0 = unlimited, 1 = single backend mode)
ParallelBackendRequests *bool `json:"parallel_backend_requests,omitempty"`
// Memory Reclaimer settings (works with GPU if available, otherwise RAM)
MemoryReclaimerEnabled *bool `json:"memory_reclaimer_enabled,omitempty"` // Enable memory threshold monitoring
MemoryReclaimerThreshold *float64 `json:"memory_reclaimer_threshold,omitempty"` // Threshold 0.0-1.0 (e.g., 0.95 = 95%)
// Performance settings
Threads *int `json:"threads,omitempty"`
ContextSize *int `json:"context_size,omitempty"`
F16 *bool `json:"f16,omitempty"`
Debug *bool `json:"debug,omitempty"`
// Security/CORS settings
CORS *bool `json:"cors,omitempty"`
CSRF *bool `json:"csrf,omitempty"`
CORSAllowOrigins *string `json:"cors_allow_origins,omitempty"`
// P2P settings
P2PToken *string `json:"p2p_token,omitempty"`
P2PNetworkID *string `json:"p2p_network_id,omitempty"`
Federated *bool `json:"federated,omitempty"`
// Gallery settings
Galleries *[]Gallery `json:"galleries,omitempty"`
BackendGalleries *[]Gallery `json:"backend_galleries,omitempty"`
AutoloadGalleries *bool `json:"autoload_galleries,omitempty"`
AutoloadBackendGalleries *bool `json:"autoload_backend_galleries,omitempty"`
// API keys - No omitempty as we need to save empty arrays to clear keys
ApiKeys *[]string `json:"api_keys"`
// Agent settings
AgentJobRetentionDays *int `json:"agent_job_retention_days,omitempty"`
}
+36 -229
View File
@@ -12,115 +12,15 @@ import (
"github.com/mudler/LocalAI/core/application"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/p2p"
"github.com/mudler/LocalAI/core/schema"
"github.com/rs/zerolog/log"
)
type SettingsResponse struct {
Success bool `json:"success"`
Error string `json:"error,omitempty"`
Message string `json:"message,omitempty"`
}
type RuntimeSettings struct {
WatchdogEnabled *bool `json:"watchdog_enabled,omitempty"`
WatchdogIdleEnabled *bool `json:"watchdog_idle_enabled,omitempty"`
WatchdogBusyEnabled *bool `json:"watchdog_busy_enabled,omitempty"`
WatchdogIdleTimeout *string `json:"watchdog_idle_timeout,omitempty"`
WatchdogBusyTimeout *string `json:"watchdog_busy_timeout,omitempty"`
SingleBackend *bool `json:"single_backend,omitempty"` // Deprecated: use MaxActiveBackends = 1 instead
MaxActiveBackends *int `json:"max_active_backends,omitempty"` // Maximum number of active backends (0 = unlimited, 1 = single backend mode)
ParallelBackendRequests *bool `json:"parallel_backend_requests,omitempty"`
Threads *int `json:"threads,omitempty"`
ContextSize *int `json:"context_size,omitempty"`
F16 *bool `json:"f16,omitempty"`
Debug *bool `json:"debug,omitempty"`
CORS *bool `json:"cors,omitempty"`
CSRF *bool `json:"csrf,omitempty"`
CORSAllowOrigins *string `json:"cors_allow_origins,omitempty"`
P2PToken *string `json:"p2p_token,omitempty"`
P2PNetworkID *string `json:"p2p_network_id,omitempty"`
Federated *bool `json:"federated,omitempty"`
Galleries *[]config.Gallery `json:"galleries,omitempty"`
BackendGalleries *[]config.Gallery `json:"backend_galleries,omitempty"`
AutoloadGalleries *bool `json:"autoload_galleries,omitempty"`
AutoloadBackendGalleries *bool `json:"autoload_backend_galleries,omitempty"`
ApiKeys *[]string `json:"api_keys"` // No omitempty - we need to save empty arrays to clear keys
AgentJobRetentionDays *int `json:"agent_job_retention_days,omitempty"`
}
// GetSettingsEndpoint returns current settings with precedence (env > file > defaults)
func GetSettingsEndpoint(app *application.Application) echo.HandlerFunc {
return func(c echo.Context) error {
appConfig := app.ApplicationConfig()
startupConfig := app.StartupConfig()
if startupConfig == nil {
// Fallback if startup config not available
startupConfig = appConfig
}
settings := RuntimeSettings{}
// Set all current values (using pointers for RuntimeSettings)
watchdogIdle := appConfig.WatchDogIdle
watchdogBusy := appConfig.WatchDogBusy
watchdogEnabled := appConfig.WatchDog
singleBackend := appConfig.SingleBackend
maxActiveBackends := appConfig.MaxActiveBackends
parallelBackendRequests := appConfig.ParallelBackendRequests
threads := appConfig.Threads
contextSize := appConfig.ContextSize
f16 := appConfig.F16
debug := appConfig.Debug
cors := appConfig.CORS
csrf := appConfig.CSRF
corsAllowOrigins := appConfig.CORSAllowOrigins
p2pToken := appConfig.P2PToken
p2pNetworkID := appConfig.P2PNetworkID
federated := appConfig.Federated
galleries := appConfig.Galleries
backendGalleries := appConfig.BackendGalleries
autoloadGalleries := appConfig.AutoloadGalleries
autoloadBackendGalleries := appConfig.AutoloadBackendGalleries
apiKeys := appConfig.ApiKeys
agentJobRetentionDays := appConfig.AgentJobRetentionDays
settings.WatchdogIdleEnabled = &watchdogIdle
settings.WatchdogBusyEnabled = &watchdogBusy
settings.WatchdogEnabled = &watchdogEnabled
settings.SingleBackend = &singleBackend
settings.MaxActiveBackends = &maxActiveBackends
settings.ParallelBackendRequests = &parallelBackendRequests
settings.Threads = &threads
settings.ContextSize = &contextSize
settings.F16 = &f16
settings.Debug = &debug
settings.CORS = &cors
settings.CSRF = &csrf
settings.CORSAllowOrigins = &corsAllowOrigins
settings.P2PToken = &p2pToken
settings.P2PNetworkID = &p2pNetworkID
settings.Federated = &federated
settings.Galleries = &galleries
settings.BackendGalleries = &backendGalleries
settings.AutoloadGalleries = &autoloadGalleries
settings.AutoloadBackendGalleries = &autoloadBackendGalleries
settings.ApiKeys = &apiKeys
settings.AgentJobRetentionDays = &agentJobRetentionDays
var idleTimeout, busyTimeout string
if appConfig.WatchDogIdleTimeout > 0 {
idleTimeout = appConfig.WatchDogIdleTimeout.String()
} else {
idleTimeout = "15m" // default
}
if appConfig.WatchDogBusyTimeout > 0 {
busyTimeout = appConfig.WatchDogBusyTimeout.String()
} else {
busyTimeout = "5m" // default
}
settings.WatchdogIdleTimeout = &idleTimeout
settings.WatchdogBusyTimeout = &busyTimeout
settings := appConfig.ToRuntimeSettings()
return c.JSON(http.StatusOK, settings)
}
}
@@ -132,21 +32,20 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
startupConfig := app.StartupConfig()
if startupConfig == nil {
// Fallback if startup config not available
startupConfig = appConfig
}
body, err := io.ReadAll(c.Request().Body)
if err != nil {
return c.JSON(http.StatusBadRequest, SettingsResponse{
return c.JSON(http.StatusBadRequest, schema.SettingsResponse{
Success: false,
Error: "Failed to read request body: " + err.Error(),
})
}
var settings RuntimeSettings
var settings config.RuntimeSettings
if err := json.Unmarshal(body, &settings); err != nil {
return c.JSON(http.StatusBadRequest, SettingsResponse{
return c.JSON(http.StatusBadRequest, schema.SettingsResponse{
Success: false,
Error: "Failed to parse JSON: " + err.Error(),
})
@@ -154,27 +53,33 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
// Validate timeouts if provided
if settings.WatchdogIdleTimeout != nil {
_, err := time.ParseDuration(*settings.WatchdogIdleTimeout)
if err != nil {
return c.JSON(http.StatusBadRequest, SettingsResponse{
if _, err := time.ParseDuration(*settings.WatchdogIdleTimeout); err != nil {
return c.JSON(http.StatusBadRequest, schema.SettingsResponse{
Success: false,
Error: "Invalid watchdog_idle_timeout format: " + err.Error(),
})
}
}
if settings.WatchdogBusyTimeout != nil {
_, err := time.ParseDuration(*settings.WatchdogBusyTimeout)
if err != nil {
return c.JSON(http.StatusBadRequest, SettingsResponse{
if _, err := time.ParseDuration(*settings.WatchdogBusyTimeout); err != nil {
return c.JSON(http.StatusBadRequest, schema.SettingsResponse{
Success: false,
Error: "Invalid watchdog_busy_timeout format: " + err.Error(),
})
}
}
if settings.WatchdogInterval != nil {
if _, err := time.ParseDuration(*settings.WatchdogInterval); err != nil {
return c.JSON(http.StatusBadRequest, schema.SettingsResponse{
Success: false,
Error: "Invalid watchdog_interval format: " + err.Error(),
})
}
}
// Save to file
if appConfig.DynamicConfigsDir == "" {
return c.JSON(http.StatusBadRequest, SettingsResponse{
return c.JSON(http.StatusBadRequest, schema.SettingsResponse{
Success: false,
Error: "DynamicConfigsDir is not set",
})
@@ -183,133 +88,38 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
settingsFile := filepath.Join(appConfig.DynamicConfigsDir, "runtime_settings.json")
settingsJSON, err := json.MarshalIndent(settings, "", " ")
if err != nil {
return c.JSON(http.StatusInternalServerError, SettingsResponse{
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
Success: false,
Error: "Failed to marshal settings: " + err.Error(),
})
}
if err := os.WriteFile(settingsFile, settingsJSON, 0600); err != nil {
return c.JSON(http.StatusInternalServerError, SettingsResponse{
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
Success: false,
Error: "Failed to write settings file: " + err.Error(),
})
}
// Apply settings immediately, checking env var overrides per field
watchdogChanged := false
if settings.WatchdogEnabled != nil {
appConfig.WatchDog = *settings.WatchdogEnabled
watchdogChanged = true
}
if settings.WatchdogIdleEnabled != nil {
appConfig.WatchDogIdle = *settings.WatchdogIdleEnabled
if appConfig.WatchDogIdle {
appConfig.WatchDog = true
}
watchdogChanged = true
}
if settings.WatchdogBusyEnabled != nil {
appConfig.WatchDogBusy = *settings.WatchdogBusyEnabled
if appConfig.WatchDogBusy {
appConfig.WatchDog = true
}
watchdogChanged = true
}
if settings.WatchdogIdleTimeout != nil {
dur, _ := time.ParseDuration(*settings.WatchdogIdleTimeout)
appConfig.WatchDogIdleTimeout = dur
watchdogChanged = true
}
if settings.WatchdogBusyTimeout != nil {
dur, _ := time.ParseDuration(*settings.WatchdogBusyTimeout)
appConfig.WatchDogBusyTimeout = dur
watchdogChanged = true
}
if settings.MaxActiveBackends != nil {
appConfig.MaxActiveBackends = *settings.MaxActiveBackends
// For backward compatibility, update SingleBackend too
appConfig.SingleBackend = (*settings.MaxActiveBackends == 1)
watchdogChanged = true // LRU limit is managed by watchdog
} else if settings.SingleBackend != nil {
// Legacy support: SingleBackend maps to MaxActiveBackends = 1
appConfig.SingleBackend = *settings.SingleBackend
if *settings.SingleBackend {
appConfig.MaxActiveBackends = 1
} else {
appConfig.MaxActiveBackends = 0
}
watchdogChanged = true // LRU limit is managed by watchdog
}
if settings.ParallelBackendRequests != nil {
appConfig.ParallelBackendRequests = *settings.ParallelBackendRequests
}
if settings.Threads != nil {
appConfig.Threads = *settings.Threads
}
if settings.ContextSize != nil {
appConfig.ContextSize = *settings.ContextSize
}
if settings.F16 != nil {
appConfig.F16 = *settings.F16
}
if settings.Debug != nil {
appConfig.Debug = *settings.Debug
}
if settings.CORS != nil {
appConfig.CORS = *settings.CORS
}
if settings.CSRF != nil {
appConfig.CSRF = *settings.CSRF
}
if settings.CORSAllowOrigins != nil {
appConfig.CORSAllowOrigins = *settings.CORSAllowOrigins
}
if settings.P2PToken != nil {
appConfig.P2PToken = *settings.P2PToken
}
if settings.P2PNetworkID != nil {
appConfig.P2PNetworkID = *settings.P2PNetworkID
}
if settings.Federated != nil {
appConfig.Federated = *settings.Federated
}
if settings.Galleries != nil {
appConfig.Galleries = *settings.Galleries
}
if settings.BackendGalleries != nil {
appConfig.BackendGalleries = *settings.BackendGalleries
}
if settings.AutoloadGalleries != nil {
appConfig.AutoloadGalleries = *settings.AutoloadGalleries
}
if settings.AutoloadBackendGalleries != nil {
appConfig.AutoloadBackendGalleries = *settings.AutoloadBackendGalleries
}
agentJobChanged := false
if settings.AgentJobRetentionDays != nil {
appConfig.AgentJobRetentionDays = *settings.AgentJobRetentionDays
agentJobChanged = true
}
// Apply settings using centralized method
watchdogChanged := appConfig.ApplyRuntimeSettings(&settings)
// Handle API keys specially (merge with startup keys)
if settings.ApiKeys != nil {
// API keys from env vars (startup) should be kept, runtime settings keys are added
// Combine startup keys (env vars) with runtime settings keys
envKeys := startupConfig.ApiKeys
runtimeKeys := *settings.ApiKeys
// Merge: env keys first (they take precedence), then runtime keys
appConfig.ApiKeys = append(envKeys, runtimeKeys...)
// Note: We only save to runtime_settings.json (not api_keys.json) to avoid duplication
// The runtime_settings.json is the unified config file. If api_keys.json exists,
// it will be loaded first, but runtime_settings.json takes precedence and deduplicates.
}
// Check if agent job retention changed
agentJobChanged := settings.AgentJobRetentionDays != nil
// Restart watchdog if settings changed
if watchdogChanged {
if settings.WatchdogEnabled != nil && !*settings.WatchdogEnabled || settings.WatchdogEnabled == nil {
if settings.WatchdogEnabled != nil && !*settings.WatchdogEnabled {
if err := app.StopWatchdog(); err != nil {
log.Error().Err(err).Msg("Failed to stop watchdog")
return c.JSON(http.StatusInternalServerError, SettingsResponse{
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
Success: false,
Error: "Settings saved but failed to stop watchdog: " + err.Error(),
})
@@ -317,7 +127,7 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
} else {
if err := app.RestartWatchdog(); err != nil {
log.Error().Err(err).Msg("Failed to restart watchdog")
return c.JSON(http.StatusInternalServerError, SettingsResponse{
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
Success: false,
Error: "Settings saved but failed to restart watchdog: " + err.Error(),
})
@@ -329,7 +139,7 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
if agentJobChanged {
if err := app.RestartAgentJobService(); err != nil {
log.Error().Err(err).Msg("Failed to restart agent job service")
return c.JSON(http.StatusInternalServerError, SettingsResponse{
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
Success: false,
Error: "Settings saved but failed to restart agent job service: " + err.Error(),
})
@@ -340,33 +150,30 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
p2pChanged := settings.P2PToken != nil || settings.P2PNetworkID != nil || settings.Federated != nil
if p2pChanged {
if settings.P2PToken != nil && *settings.P2PToken == "" {
// stop P2P
if err := app.StopP2P(); err != nil {
log.Error().Err(err).Msg("Failed to stop P2P")
return c.JSON(http.StatusInternalServerError, SettingsResponse{
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
Success: false,
Error: "Settings saved but failed to stop P2P: " + err.Error(),
})
}
} else {
if settings.P2PToken != nil && *settings.P2PToken == "0" {
// generate a token if users sets 0 (disabled)
token := p2p.GenerateToken(60, 60)
settings.P2PToken = &token
appConfig.P2PToken = token
}
// Stop existing P2P
if err := app.RestartP2P(); err != nil {
log.Error().Err(err).Msg("Failed to stop P2P")
return c.JSON(http.StatusInternalServerError, SettingsResponse{
log.Error().Err(err).Msg("Failed to restart P2P")
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
Success: false,
Error: "Settings saved but failed to stop P2P: " + err.Error(),
Error: "Settings saved but failed to restart P2P: " + err.Error(),
})
}
}
}
return c.JSON(http.StatusOK, SettingsResponse{
return c.JSON(http.StatusOK, schema.SettingsResponse{
Success: true,
Message: "Settings updated successfully",
})
+25
View File
@@ -19,6 +19,7 @@ import (
"github.com/mudler/LocalAI/core/p2p"
"github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/xsysinfo"
"github.com/rs/zerolog/log"
)
@@ -917,6 +918,30 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
})
})
// Resources API endpoint - unified memory info (GPU if available, otherwise RAM)
app.GET("/api/resources", func(c echo.Context) error {
resourceInfo := xsysinfo.GetResourceInfo()
// Format watchdog interval
watchdogInterval := "2s" // default
if appConfig.WatchDogInterval > 0 {
watchdogInterval = appConfig.WatchDogInterval.String()
}
response := map[string]interface{}{
"type": resourceInfo.Type, // "gpu" or "ram"
"available": resourceInfo.Available,
"gpus": resourceInfo.GPUs,
"ram": resourceInfo.RAM,
"aggregate": resourceInfo.Aggregate,
"reclaimer_enabled": appConfig.MemoryReclaimerEnabled,
"reclaimer_threshold": appConfig.MemoryReclaimerThreshold,
"watchdog_interval": watchdogInterval,
}
return c.JSON(200, response)
})
if !appConfig.DisableRuntimeSettings {
// Settings API
app.GET("/api/settings", localai.GetSettingsEndpoint(applicationInstance))
+53
View File
@@ -462,6 +462,27 @@
</a>
</div>
<!-- Memory Status Indicator (GPU or RAM) -->
<div class="mb-4" x-data="resourceMonitor()" x-init="startPolling()">
<template x-if="resourceData && resourceData.available">
<div class="flex items-center justify-center gap-3 text-xs text-[var(--color-text-secondary)]">
<div class="flex items-center gap-2 px-3 py-1.5 rounded-full bg-[var(--color-bg-secondary)] border border-[var(--color-primary-border)]/20">
<i :class="resourceData.type === 'gpu' ? 'fas fa-microchip' : 'fas fa-memory'"
:class="resourceData.aggregate.usage_percent > 90 ? 'text-red-400' : resourceData.aggregate.usage_percent > 70 ? 'text-yellow-400' : 'text-green-400'"></i>
<span class="text-[var(--color-text-secondary)]" x-text="resourceData.type === 'gpu' ? 'GPU' : 'RAM'"></span>
<span class="font-mono"
:class="resourceData.aggregate.usage_percent > 90 ? 'text-red-400' : resourceData.aggregate.usage_percent > 70 ? 'text-yellow-400' : 'text-green-400'"
x-text="`${resourceData.aggregate.usage_percent.toFixed(0)}%`"></span>
<div class="w-16 bg-[var(--color-bg-primary)] rounded-full h-1.5 overflow-hidden">
<div class="h-full rounded-full transition-all duration-300"
:class="resourceData.aggregate.usage_percent > 90 ? 'bg-red-500' : resourceData.aggregate.usage_percent > 70 ? 'bg-yellow-500' : 'bg-[var(--color-success)]'"
:style="`width: ${resourceData.aggregate.usage_percent}%`"></div>
</div>
</div>
</div>
</template>
</div>
<!-- Model Status Summary - Subtle -->
{{ $loadedModels := .LoadedModels }}
<div class="mb-8 flex items-center justify-center gap-2 text-xs text-[var(--color-text-secondary)]"
@@ -687,6 +708,38 @@ async function stopAllModels(component) {
// Make functions available globally for Alpine.js
window.stopModel = stopModel;
window.stopAllModels = stopAllModels;
// Resource Monitor component (GPU if available, otherwise RAM)
function resourceMonitor() {
return {
resourceData: null,
pollInterval: null,
async fetchResourceData() {
try {
const response = await fetch('/api/resources');
if (response.ok) {
this.resourceData = await response.json();
}
} catch (error) {
console.error('Error fetching resource data:', error);
}
},
startPolling() {
// Initial fetch
this.fetchResourceData();
// Poll every 5 seconds
this.pollInterval = setInterval(() => this.fetchResourceData(), 5000);
},
stopPolling() {
if (this.pollInterval) {
clearInterval(this.pollInterval);
}
}
}
}
</script>
</body>
+141
View File
@@ -73,6 +73,106 @@
</div>
</div>
<!-- Memory Info Section (GPU or RAM) -->
<div class="mt-8" x-data="resourceMonitor()" x-init="startPolling()">
<template x-if="resourceData && resourceData.available">
<div class="bg-[var(--color-bg-secondary)] border border-[var(--color-primary-border)]/20 rounded-lg p-4 mb-6">
<div class="flex items-center justify-between mb-3">
<h2 class="h3 flex items-center">
<i :class="resourceData.type === 'gpu' ? 'fas fa-microchip' : 'fas fa-memory'" class="mr-2 text-[var(--color-primary)] text-sm"></i>
<span x-text="resourceData.type === 'gpu' ? 'GPU Status' : 'Memory Status'"></span>
</h2>
<div class="flex items-center gap-2 text-xs text-[var(--color-text-secondary)]">
<template x-if="resourceData.type === 'gpu'">
<span x-text="`${resourceData.aggregate.gpu_count} GPU${resourceData.aggregate.gpu_count > 1 ? 's' : ''}`"></span>
</template>
<template x-if="resourceData.type === 'ram'">
<span>System RAM</span>
</template>
<template x-if="resourceData.reclaimer_enabled">
<span class="inline-flex items-center px-1.5 py-0.5 rounded text-[10px] font-medium bg-[var(--color-primary)]/10 text-[var(--color-primary)]">
<i class="fas fa-shield-alt text-[8px] mr-1"></i>Reclaimer Active
</span>
</template>
</div>
</div>
<!-- Per-GPU Stats (when GPU available) -->
<template x-if="resourceData.type === 'gpu' && resourceData.gpus">
<div class="space-y-3">
<template x-for="gpu in resourceData.gpus" :key="gpu.index">
<div class="bg-[var(--color-bg-primary)] rounded p-3">
<div class="flex items-center justify-between mb-2">
<div class="flex items-center gap-2">
<span class="text-xs font-medium text-[var(--color-text-primary)] truncate max-w-[200px]" x-text="gpu.name"></span>
<span class="inline-flex items-center px-1.5 py-0.5 rounded text-[10px] font-medium"
:class="gpu.vendor === 'nvidia' ? 'bg-green-500/10 text-green-300' :
gpu.vendor === 'amd' ? 'bg-red-500/10 text-red-300' :
gpu.vendor === 'intel' ? 'bg-blue-500/10 text-blue-300' :
'bg-[var(--color-accent-light)] text-[var(--color-accent)]'"
x-text="gpu.vendor.toUpperCase()">
</span>
</div>
<span class="text-xs font-mono"
:class="gpu.usage_percent > 90 ? 'text-red-400' : gpu.usage_percent > 70 ? 'text-yellow-400' : 'text-green-400'"
x-text="`${gpu.usage_percent.toFixed(1)}%`"></span>
</div>
<!-- Progress Bar -->
<div class="w-full bg-[var(--color-bg-secondary)] rounded-full h-2 overflow-hidden">
<div class="h-full rounded-full transition-all duration-300"
:class="gpu.usage_percent > 90 ? 'bg-red-500' : gpu.usage_percent > 70 ? 'bg-yellow-500' : 'bg-[var(--color-success)]'"
:style="`width: ${gpu.usage_percent}%`"></div>
</div>
<div class="flex justify-between mt-1 text-[10px] text-[var(--color-text-secondary)]">
<span x-text="`Used: ${formatBytes(gpu.used_vram)}`"></span>
<span x-text="`Total: ${formatBytes(gpu.total_vram)}`"></span>
</div>
</div>
</template>
</div>
</template>
<!-- RAM Stats (when no GPU) -->
<template x-if="resourceData.type === 'ram' && resourceData.ram">
<div class="bg-[var(--color-bg-primary)] rounded p-3">
<div class="flex items-center justify-between mb-2">
<div class="flex items-center gap-2">
<span class="text-xs font-medium text-[var(--color-text-primary)]">System RAM</span>
<span class="inline-flex items-center px-1.5 py-0.5 rounded text-[10px] font-medium bg-[var(--color-accent-light)] text-[var(--color-accent)]">
RAM
</span>
</div>
<span class="text-xs font-mono"
:class="resourceData.ram.usage_percent > 90 ? 'text-red-400' : resourceData.ram.usage_percent > 70 ? 'text-yellow-400' : 'text-green-400'"
x-text="`${resourceData.ram.usage_percent.toFixed(1)}%`"></span>
</div>
<!-- Progress Bar -->
<div class="w-full bg-[var(--color-bg-secondary)] rounded-full h-2 overflow-hidden">
<div class="h-full rounded-full transition-all duration-300"
:class="resourceData.ram.usage_percent > 90 ? 'bg-red-500' : resourceData.ram.usage_percent > 70 ? 'bg-yellow-500' : 'bg-[var(--color-success)]'"
:style="`width: ${resourceData.ram.usage_percent}%`"></div>
</div>
<div class="flex justify-between mt-1 text-[10px] text-[var(--color-text-secondary)]">
<span x-text="`Used: ${formatBytes(resourceData.ram.used)}`"></span>
<span x-text="`Total: ${formatBytes(resourceData.ram.total)}`"></span>
</div>
</div>
</template>
<!-- Aggregate Stats (if multiple GPUs) -->
<template x-if="resourceData.type === 'gpu' && resourceData.aggregate.gpu_count > 1">
<div class="mt-3 pt-3 border-t border-[var(--color-primary-border)]/20">
<div class="flex items-center justify-between text-xs">
<span class="text-[var(--color-text-secondary)]">Total VRAM:</span>
<span class="font-mono text-[var(--color-text-primary)]"
x-text="`${formatBytes(resourceData.aggregate.used_memory)} / ${formatBytes(resourceData.aggregate.total_memory)} (${resourceData.aggregate.usage_percent.toFixed(1)}%)`"></span>
</div>
</div>
</template>
</div>
</template>
</div>
<!-- Models Section -->
<div class="models mt-8">
{{template "views/partials/inprogress" .}}
@@ -426,6 +526,47 @@
</div>
<script>
// Resource Monitor component (GPU if available, otherwise RAM)
function resourceMonitor() {
return {
resourceData: null,
pollInterval: null,
async fetchResourceData() {
try {
const response = await fetch('/api/resources');
if (response.ok) {
this.resourceData = await response.json();
}
} catch (error) {
console.error('Error fetching resource data:', error);
}
},
startPolling() {
// Initial fetch
this.fetchResourceData();
// Poll every 5 seconds
this.pollInterval = setInterval(() => this.fetchResourceData(), 5000);
},
stopPolling() {
if (this.pollInterval) {
clearInterval(this.pollInterval);
}
}
}
}
// Helper function to format bytes
function formatBytes(bytes) {
if (bytes === 0) return '0 B';
const k = 1024;
const sizes = ['B', 'KB', 'MB', 'GB', 'TB'];
const i = Math.floor(Math.log(bytes) / Math.log(k));
return parseFloat((bytes / Math.pow(k, i)).toFixed(1)) + ' ' + sizes[i];
}
// Alpine.js component for index dashboard
function indexDashboard() {
return {
+121
View File
@@ -124,6 +124,90 @@
class="w-full px-3 py-2 bg-[var(--color-bg-primary)] border border-[var(--color-primary-border)]/20 rounded text-sm text-[var(--color-text-primary)] focus:outline-none focus:ring-2 focus:ring-[var(--color-primary-border)]"
:class="!settings.watchdog_busy_enabled ? 'opacity-50 cursor-not-allowed' : ''">
</div>
<!-- Watchdog Check Interval -->
<div>
<label class="block text-sm font-medium text-[var(--color-text-primary)] mb-2">Check Interval</label>
<p class="text-xs text-[var(--color-text-secondary)] mb-2">How often the watchdog checks backends and memory usage (e.g., 2s, 30s)</p>
<input type="text" x-model="settings.watchdog_interval"
:disabled="!settings.watchdog_enabled"
placeholder="2s"
class="w-full px-3 py-2 bg-[var(--color-bg-primary)] border border-[var(--color-primary-border)]/20 rounded text-sm text-[var(--color-text-primary)] focus:outline-none focus:ring-2 focus:ring-[var(--color-primary-border)]"
:class="!settings.watchdog_enabled ? 'opacity-50 cursor-not-allowed' : ''">
</div>
<!-- Memory Reclaimer Subsection -->
<div class="mt-6 pt-4 border-t border-[var(--color-primary-border)]/20">
<h3 class="text-md font-medium text-[var(--color-text-primary)] mb-3 flex items-center">
<i class="fas fa-memory mr-2 text-[var(--color-primary)] text-xs"></i>
Memory Reclaimer
</h3>
<p class="text-xs text-[var(--color-text-secondary)] mb-4">
Automatically evict backends when memory usage exceeds a threshold. Uses GPU VRAM if available, otherwise system RAM. Uses LRU strategy.
</p>
<!-- Memory Status Preview -->
<div x-data="resourceStatus()" x-init="fetchResource()" class="p-3 bg-[var(--color-bg-primary)] rounded mb-4">
<div class="flex items-center justify-between mb-2">
<span class="text-xs text-[var(--color-text-secondary)]" x-text="resourceData && resourceData.type === 'gpu' ? 'Current GPU Status' : 'Current Memory Status'">Current Memory Status</span>
<button @click="fetchResource()" class="text-[10px] text-[var(--color-primary)] hover:underline">
<i class="fas fa-sync-alt mr-1"></i>Refresh
</button>
</div>
<template x-if="resourceData && resourceData.available && resourceData.type === 'gpu'">
<div class="space-y-2">
<template x-for="gpu in resourceData.gpus" :key="gpu.index">
<div class="flex items-center justify-between text-xs">
<span class="text-[var(--color-text-primary)] truncate max-w-[200px]" x-text="gpu.name"></span>
<span class="font-mono"
:class="gpu.usage_percent > 90 ? 'text-red-400' : gpu.usage_percent > 70 ? 'text-yellow-400' : 'text-green-400'"
x-text="`${gpu.usage_percent.toFixed(1)}%`"></span>
</div>
</template>
</div>
</template>
<template x-if="resourceData && resourceData.available && resourceData.type === 'ram'">
<div class="flex items-center justify-between text-xs">
<span class="text-[var(--color-text-primary)]">System RAM</span>
<span class="font-mono"
:class="resourceData.ram.usage_percent > 90 ? 'text-red-400' : resourceData.ram.usage_percent > 70 ? 'text-yellow-400' : 'text-green-400'"
x-text="`${resourceData.ram.usage_percent.toFixed(1)}%`"></span>
</div>
</template>
<template x-if="!resourceData || !resourceData.available">
<p class="text-xs text-[var(--color-text-secondary)]">Memory monitoring unavailable</p>
</template>
</div>
<!-- Enable Memory Reclaimer -->
<div class="flex items-center justify-between mb-4">
<div>
<label class="text-sm font-medium text-[var(--color-text-primary)]">Enable Memory Reclaimer</label>
<p class="text-xs text-[var(--color-text-secondary)] mt-1">Evict backends when memory usage exceeds threshold</p>
</div>
<label class="relative inline-flex items-center cursor-pointer">
<input type="checkbox" x-model="settings.memory_reclaimer_enabled"
:disabled="!settings.watchdog_enabled"
class="sr-only peer" :class="!settings.watchdog_enabled ? 'opacity-50' : ''">
<div class="w-11 h-6 bg-[var(--color-bg-primary)] peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-[var(--color-primary-light)] rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-[var(--color-primary)]"></div>
</label>
</div>
<!-- Memory Reclaimer Threshold -->
<div>
<label class="block text-sm font-medium text-[var(--color-text-primary)] mb-2">Memory Threshold (%)</label>
<p class="text-xs text-[var(--color-text-secondary)] mb-2">When memory usage exceeds this, backends will be evicted (50-100%)</p>
<div class="flex items-center gap-3">
<input type="range" x-model="settings.memory_reclaimer_threshold_percent"
min="50" max="100" step="1"
:disabled="!settings.memory_reclaimer_enabled || !settings.watchdog_enabled"
class="flex-1 h-2 bg-[var(--color-bg-primary)] rounded-lg appearance-none cursor-pointer"
:class="(!settings.memory_reclaimer_enabled || !settings.watchdog_enabled) ? 'opacity-50' : ''">
<span class="text-sm font-mono text-[var(--color-text-primary)] w-12 text-right"
x-text="`${settings.memory_reclaimer_threshold_percent}%`"></span>
</div>
</div>
</div>
</div>
</div>
@@ -460,8 +544,12 @@ function settingsDashboard() {
watchdog_busy_enabled: false,
watchdog_idle_timeout: '15m',
watchdog_busy_timeout: '5m',
watchdog_interval: '2s',
max_active_backends: 0,
parallel_backend_requests: false,
memory_reclaimer_enabled: false,
memory_reclaimer_threshold: 0.95,
memory_reclaimer_threshold_percent: 95,
threads: 0,
context_size: 0,
f16: false,
@@ -498,8 +586,12 @@ function settingsDashboard() {
watchdog_busy_enabled: data.watchdog_busy_enabled,
watchdog_idle_timeout: data.watchdog_idle_timeout || '15m',
watchdog_busy_timeout: data.watchdog_busy_timeout || '5m',
watchdog_interval: data.watchdog_interval || '2s',
max_active_backends: data.max_active_backends || 0,
parallel_backend_requests: data.parallel_backend_requests,
memory_reclaimer_enabled: data.memory_reclaimer_enabled || false,
memory_reclaimer_threshold: data.memory_reclaimer_threshold || 0.95,
memory_reclaimer_threshold_percent: Math.round((data.memory_reclaimer_threshold || 0.95) * 100),
threads: data.threads || 0,
context_size: data.context_size || 0,
f16: data.f16 || false,
@@ -531,6 +623,7 @@ function settingsDashboard() {
if (!this.settings.watchdog_enabled) {
this.settings.watchdog_idle_enabled = false;
this.settings.watchdog_busy_enabled = false;
this.settings.memory_reclaimer_enabled = false;
}
},
@@ -564,12 +657,22 @@ function settingsDashboard() {
if (this.settings.watchdog_busy_timeout) {
payload.watchdog_busy_timeout = this.settings.watchdog_busy_timeout;
}
if (this.settings.watchdog_interval) {
payload.watchdog_interval = this.settings.watchdog_interval;
}
if (this.settings.max_active_backends !== undefined) {
payload.max_active_backends = parseInt(this.settings.max_active_backends) || 0;
}
if (this.settings.parallel_backend_requests !== undefined) {
payload.parallel_backend_requests = this.settings.parallel_backend_requests;
}
if (this.settings.memory_reclaimer_enabled !== undefined) {
payload.memory_reclaimer_enabled = this.settings.memory_reclaimer_enabled;
}
if (this.settings.memory_reclaimer_threshold_percent !== undefined) {
// Convert percent to decimal (0.0-1.0)
payload.memory_reclaimer_threshold = parseInt(this.settings.memory_reclaimer_threshold_percent) / 100;
}
if (this.settings.threads !== undefined) {
payload.threads = parseInt(this.settings.threads) || 0;
}
@@ -678,6 +781,24 @@ function settingsDashboard() {
}
}
}
// Resource Status component for settings page (GPU if available, otherwise RAM)
function resourceStatus() {
return {
resourceData: null,
async fetchResource() {
try {
const response = await fetch('/api/resources');
if (response.ok) {
this.resourceData = await response.json();
}
} catch (error) {
console.error('Error fetching resource data:', error);
}
}
}
}
</script>
</body>
+7
View File
@@ -163,3 +163,10 @@ type ImportModelRequest struct {
URI string `json:"uri"`
Preferences json.RawMessage `json:"preferences,omitempty"`
}
// SettingsResponse is the response type for settings API operations
type SettingsResponse struct {
Success bool `json:"success"`
Error string `json:"error,omitempty"`
Message string `json:"message,omitempty"`
}
+167 -17
View File
@@ -5,6 +5,7 @@ import (
"sync"
"time"
"github.com/mudler/LocalAI/pkg/xsysinfo"
process "github.com/mudler/go-processmanager"
"github.com/rs/zerolog/log"
)
@@ -17,6 +18,9 @@ import (
// force a reload of the model.
// The watchdog also supports LRU (Least Recently Used) eviction when a maximum
// number of active backends is configured.
// The watchdog also supports memory threshold monitoring - when memory usage
// (GPU VRAM if available, otherwise system RAM) exceeds the threshold,
// it will evict backends using the LRU strategy.
// The watchdog runs as a separate go routine,
// and the GRPC client talks to it via a channel to send status updates
type WatchDog struct {
@@ -32,26 +36,48 @@ type WatchDog struct {
busyCheck, idleCheck bool
lruLimit int // Maximum number of active backends (0 = unlimited)
// Memory reclaimer settings (works with GPU if available, otherwise RAM)
memoryReclaimerEnabled bool // Enable memory threshold monitoring
memoryReclaimerThreshold float64 // Threshold 0.0-1.0 (e.g., 0.95 = 95%)
watchdogInterval time.Duration
}
type ProcessManager interface {
ShutdownModel(modelName string) error
}
func NewWatchDog(pm ProcessManager, timeoutBusy, timeoutIdle time.Duration, busy, idle bool, lruLimit int) *WatchDog {
// NewWatchDog creates a new WatchDog with the provided options.
// Example usage:
//
// wd := NewWatchDog(
// WithProcessManager(pm),
// WithBusyTimeout(5*time.Minute),
// WithIdleTimeout(15*time.Minute),
// WithBusyCheck(true),
// WithIdleCheck(true),
// WithLRULimit(3),
// WithMemoryReclaimer(true, 0.95),
// )
func NewWatchDog(opts ...WatchDogOption) *WatchDog {
o := NewWatchDogOptions(opts...)
return &WatchDog{
timeout: timeoutBusy,
idletimeout: timeoutIdle,
pm: pm,
busyTime: make(map[string]time.Time),
idleTime: make(map[string]time.Time),
lastUsed: make(map[string]time.Time),
addressMap: make(map[string]*process.Process),
busyCheck: busy,
idleCheck: idle,
lruLimit: lruLimit,
addressModelMap: make(map[string]string),
stop: make(chan bool, 1),
timeout: o.busyTimeout,
idletimeout: o.idleTimeout,
pm: o.processManager,
busyTime: make(map[string]time.Time),
idleTime: make(map[string]time.Time),
lastUsed: make(map[string]time.Time),
addressMap: make(map[string]*process.Process),
busyCheck: o.busyCheck,
idleCheck: o.idleCheck,
lruLimit: o.lruLimit,
addressModelMap: make(map[string]string),
stop: make(chan bool, 1),
memoryReclaimerEnabled: o.memoryReclaimerEnabled,
memoryReclaimerThreshold: o.memoryReclaimerThreshold,
watchdogInterval: o.watchdogInterval,
}
}
@@ -69,6 +95,21 @@ func (wd *WatchDog) GetLRULimit() int {
return wd.lruLimit
}
// SetMemoryReclaimer updates the memory reclaimer settings dynamically
func (wd *WatchDog) SetMemoryReclaimer(enabled bool, threshold float64) {
wd.Lock()
defer wd.Unlock()
wd.memoryReclaimerEnabled = enabled
wd.memoryReclaimerThreshold = threshold
}
// GetMemoryReclaimerSettings returns the current memory reclaimer settings
func (wd *WatchDog) GetMemoryReclaimerSettings() (enabled bool, threshold float64) {
wd.Lock()
defer wd.Unlock()
return wd.memoryReclaimerEnabled, wd.memoryReclaimerThreshold
}
func (wd *WatchDog) Shutdown() {
wd.Lock()
defer wd.Unlock()
@@ -202,17 +243,27 @@ func (wd *WatchDog) Run() {
case <-wd.stop:
log.Info().Msg("[WatchDog] Stopping watchdog")
return
case <-time.After(30 * time.Second):
if !wd.busyCheck && !wd.idleCheck {
case <-time.After(wd.watchdogInterval):
// Check if any monitoring is enabled
wd.Lock()
busyCheck := wd.busyCheck
idleCheck := wd.idleCheck
memoryCheck := wd.memoryReclaimerEnabled
wd.Unlock()
if !busyCheck && !idleCheck && !memoryCheck {
log.Info().Msg("[WatchDog] No checks enabled, stopping watchdog")
return
}
if wd.busyCheck {
if busyCheck {
wd.checkBusy()
}
if wd.idleCheck {
if idleCheck {
wd.checkIdle()
}
if memoryCheck {
wd.checkMemory()
}
}
}
}
@@ -278,6 +329,105 @@ func (wd *WatchDog) checkBusy() {
}
}
// checkMemory monitors memory usage (GPU VRAM if available, otherwise RAM) and evicts backends when usage exceeds threshold
func (wd *WatchDog) checkMemory() {
wd.Lock()
threshold := wd.memoryReclaimerThreshold
enabled := wd.memoryReclaimerEnabled
modelCount := len(wd.addressModelMap)
wd.Unlock()
if !enabled || threshold <= 0 || modelCount == 0 {
return
}
// Get current memory usage (GPU if available, otherwise RAM)
aggregate := xsysinfo.GetResourceAggregateInfo()
if aggregate.TotalMemory == 0 {
log.Debug().Msg("[WatchDog] No memory information available for memory reclaimer")
return
}
// Convert threshold from 0.0-1.0 to percentage
thresholdPercent := threshold * 100
memoryType := "GPU"
if aggregate.GPUCount == 0 {
memoryType = "RAM"
}
log.Debug().
Str("type", memoryType).
Float64("usage_percent", aggregate.UsagePercent).
Float64("threshold_percent", thresholdPercent).
Int("loaded_models", modelCount).
Msg("[WatchDog] Memory check")
// Check if usage exceeds threshold
if aggregate.UsagePercent > thresholdPercent {
log.Warn().
Str("type", memoryType).
Float64("usage_percent", aggregate.UsagePercent).
Float64("threshold_percent", thresholdPercent).
Msg("[WatchDog] Memory usage exceeds threshold, evicting LRU backend")
// Evict the least recently used model
wd.evictLRUModel()
}
}
// evictLRUModel evicts the least recently used model
func (wd *WatchDog) evictLRUModel() {
wd.Lock()
if len(wd.addressModelMap) == 0 {
wd.Unlock()
return
}
// Build a list of models sorted by last used time (oldest first)
var models []modelUsageInfo
for address, model := range wd.addressModelMap {
lastUsed := wd.lastUsed[address]
if lastUsed.IsZero() {
lastUsed = time.Time{}
}
models = append(models, modelUsageInfo{
address: address,
model: model,
lastUsed: lastUsed,
})
}
if len(models) == 0 {
wd.Unlock()
return
}
// Sort by lastUsed time (oldest first)
sort.Slice(models, func(i, j int) bool {
return models[i].lastUsed.Before(models[j].lastUsed)
})
// Get the LRU model
lruModel := models[0]
log.Info().
Str("model", lruModel.model).
Time("lastUsed", lruModel.lastUsed).
Msg("[WatchDog] Memory reclaimer evicting LRU model")
// Untrack the model
wd.untrack(lruModel.address)
wd.Unlock()
// Shutdown the model
if err := wd.pm.ShutdownModel(lruModel.model); err != nil {
log.Error().Err(err).Str("model", lruModel.model).Msg("[WatchDog] error shutting down model during memory reclamation")
} else {
log.Info().Str("model", lruModel.model).Msg("[WatchDog] Memory reclaimer eviction complete")
}
}
func (wd *WatchDog) untrack(address string) {
delete(wd.busyTime, address)
delete(wd.idleTime, address)
+124
View File
@@ -0,0 +1,124 @@
package model
import (
"time"
)
// WatchDogOptions contains all configuration for the WatchDog
type WatchDogOptions struct {
processManager ProcessManager
// Timeout settings
busyTimeout time.Duration
idleTimeout time.Duration
watchdogInterval time.Duration
// Check toggles
busyCheck bool
idleCheck bool
// LRU settings
lruLimit int // Maximum number of active backends (0 = unlimited)
// Memory reclaimer settings (works with GPU if available, otherwise RAM)
memoryReclaimerEnabled bool // Enable memory threshold monitoring
memoryReclaimerThreshold float64 // Threshold 0.0-1.0 (e.g., 0.95 = 95%)
}
// WatchDogOption is a function that configures WatchDogOptions
type WatchDogOption func(*WatchDogOptions)
// WithProcessManager sets the process manager for the watchdog
func WithProcessManager(pm ProcessManager) WatchDogOption {
return func(o *WatchDogOptions) {
o.processManager = pm
}
}
// WithBusyTimeout sets the busy timeout duration
func WithBusyTimeout(timeout time.Duration) WatchDogOption {
return func(o *WatchDogOptions) {
o.busyTimeout = timeout
}
}
// WithIdleTimeout sets the idle timeout duration
func WithIdleTimeout(timeout time.Duration) WatchDogOption {
return func(o *WatchDogOptions) {
o.idleTimeout = timeout
}
}
// WithWatchdogCheck sets the watchdog check duration
func WithWatchdogInterval(interval time.Duration) WatchDogOption {
return func(o *WatchDogOptions) {
o.watchdogInterval = interval
}
}
// WithBusyCheck enables or disables busy checking
func WithBusyCheck(enabled bool) WatchDogOption {
return func(o *WatchDogOptions) {
o.busyCheck = enabled
}
}
// WithIdleCheck enables or disables idle checking
func WithIdleCheck(enabled bool) WatchDogOption {
return func(o *WatchDogOptions) {
o.idleCheck = enabled
}
}
// WithLRULimit sets the maximum number of active backends (0 = unlimited)
func WithLRULimit(limit int) WatchDogOption {
return func(o *WatchDogOptions) {
o.lruLimit = limit
}
}
// WithMemoryReclaimer enables memory threshold monitoring with the specified threshold
// Works with GPU VRAM if available, otherwise uses system RAM
func WithMemoryReclaimer(enabled bool, threshold float64) WatchDogOption {
return func(o *WatchDogOptions) {
o.memoryReclaimerEnabled = enabled
o.memoryReclaimerThreshold = threshold
}
}
// WithMemoryReclaimerEnabled enables or disables memory threshold monitoring
func WithMemoryReclaimerEnabled(enabled bool) WatchDogOption {
return func(o *WatchDogOptions) {
o.memoryReclaimerEnabled = enabled
}
}
// WithMemoryReclaimerThreshold sets the memory threshold (0.0-1.0)
func WithMemoryReclaimerThreshold(threshold float64) WatchDogOption {
return func(o *WatchDogOptions) {
o.memoryReclaimerThreshold = threshold
}
}
// DefaultWatchDogOptions returns default options for the watchdog
func DefaultWatchDogOptions() *WatchDogOptions {
return &WatchDogOptions{
busyTimeout: 5 * time.Minute,
idleTimeout: 15 * time.Minute,
watchdogInterval: 2 * time.Second,
busyCheck: false,
idleCheck: false,
lruLimit: 0,
memoryReclaimerEnabled: false,
memoryReclaimerThreshold: 0.95,
}
}
// NewWatchDogOptions creates WatchDogOptions with the provided options applied
func NewWatchDogOptions(opts ...WatchDogOption) *WatchDogOptions {
o := DefaultWatchDogOptions()
for _, opt := range opts {
opt(o)
}
return o
}
+187
View File
@@ -0,0 +1,187 @@
package model_test
import (
"time"
"github.com/mudler/LocalAI/pkg/model"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("WatchDogOptions", func() {
Context("DefaultWatchDogOptions", func() {
It("should return sensible defaults", func() {
opts := model.DefaultWatchDogOptions()
Expect(opts).ToNot(BeNil())
})
})
Context("NewWatchDogOptions", func() {
It("should apply options in order", func() {
pm := newMockProcessManager()
opts := model.NewWatchDogOptions(
model.WithProcessManager(pm),
model.WithBusyTimeout(10*time.Minute),
model.WithIdleTimeout(20*time.Minute),
model.WithBusyCheck(true),
model.WithIdleCheck(true),
model.WithLRULimit(5),
model.WithMemoryReclaimer(true, 0.85),
)
Expect(opts).ToNot(BeNil())
})
It("should allow overriding options", func() {
opts := model.NewWatchDogOptions(
model.WithLRULimit(3),
model.WithLRULimit(7), // override
)
// Create watchdog to verify
wd := model.NewWatchDog(
model.WithProcessManager(newMockProcessManager()),
model.WithLRULimit(3),
model.WithLRULimit(7), // override
)
Expect(wd.GetLRULimit()).To(Equal(7))
Expect(opts).ToNot(BeNil())
})
})
Context("Individual Options", func() {
var pm *mockProcessManager
BeforeEach(func() {
pm = newMockProcessManager()
})
It("WithProcessManager should set process manager", func() {
wd := model.NewWatchDog(
model.WithProcessManager(pm),
)
Expect(wd).ToNot(BeNil())
})
It("WithBusyTimeout should set busy timeout", func() {
wd := model.NewWatchDog(
model.WithProcessManager(pm),
model.WithBusyTimeout(7*time.Minute),
)
Expect(wd).ToNot(BeNil())
})
It("WithIdleTimeout should set idle timeout", func() {
wd := model.NewWatchDog(
model.WithProcessManager(pm),
model.WithIdleTimeout(25*time.Minute),
)
Expect(wd).ToNot(BeNil())
})
It("WithBusyCheck should enable busy checking", func() {
wd := model.NewWatchDog(
model.WithProcessManager(pm),
model.WithBusyCheck(true),
)
Expect(wd).ToNot(BeNil())
})
It("WithIdleCheck should enable idle checking", func() {
wd := model.NewWatchDog(
model.WithProcessManager(pm),
model.WithIdleCheck(true),
)
Expect(wd).ToNot(BeNil())
})
It("WithLRULimit should set LRU limit", func() {
wd := model.NewWatchDog(
model.WithProcessManager(pm),
model.WithLRULimit(10),
)
Expect(wd.GetLRULimit()).To(Equal(10))
})
It("WithMemoryReclaimer should set both enabled and threshold", func() {
wd := model.NewWatchDog(
model.WithProcessManager(pm),
model.WithMemoryReclaimer(true, 0.88),
)
enabled, threshold := wd.GetMemoryReclaimerSettings()
Expect(enabled).To(BeTrue())
Expect(threshold).To(Equal(0.88))
})
It("WithMemoryReclaimerEnabled should set enabled flag only", func() {
wd := model.NewWatchDog(
model.WithProcessManager(pm),
model.WithMemoryReclaimerEnabled(true),
)
enabled, _ := wd.GetMemoryReclaimerSettings()
Expect(enabled).To(BeTrue())
})
It("WithMemoryReclaimerThreshold should set threshold only", func() {
wd := model.NewWatchDog(
model.WithProcessManager(pm),
model.WithMemoryReclaimerThreshold(0.75),
)
_, threshold := wd.GetMemoryReclaimerSettings()
Expect(threshold).To(Equal(0.75))
})
})
Context("Option Combinations", func() {
It("should work with all options combined", func() {
pm := newMockProcessManager()
wd := model.NewWatchDog(
model.WithProcessManager(pm),
model.WithBusyTimeout(3*time.Minute),
model.WithIdleTimeout(10*time.Minute),
model.WithBusyCheck(true),
model.WithIdleCheck(true),
model.WithLRULimit(2),
model.WithMemoryReclaimerEnabled(true),
model.WithMemoryReclaimerThreshold(0.92),
)
Expect(wd).ToNot(BeNil())
Expect(wd.GetLRULimit()).To(Equal(2))
enabled, threshold := wd.GetMemoryReclaimerSettings()
Expect(enabled).To(BeTrue())
Expect(threshold).To(Equal(0.92))
})
It("should work with no options (all defaults)", func() {
wd := model.NewWatchDog()
Expect(wd).ToNot(BeNil())
Expect(wd.GetLRULimit()).To(Equal(0))
enabled, threshold := wd.GetMemoryReclaimerSettings()
Expect(enabled).To(BeFalse())
Expect(threshold).To(Equal(0.95)) // default
})
It("should allow partial configuration", func() {
pm := newMockProcessManager()
wd := model.NewWatchDog(
model.WithProcessManager(pm),
model.WithLRULimit(3),
)
Expect(wd).ToNot(BeNil())
Expect(wd.GetLRULimit()).To(Equal(3))
// Memory reclaimer should use defaults
enabled, threshold := wd.GetMemoryReclaimerSettings()
Expect(enabled).To(BeFalse())
Expect(threshold).To(Equal(0.95))
})
})
})
+105 -6
View File
@@ -53,25 +53,82 @@ var _ = Describe("WatchDog", func() {
Context("LRU Limit", func() {
It("should create watchdog with LRU limit", func() {
wd = model.NewWatchDog(pm, 5*time.Minute, 15*time.Minute, false, false, 2)
wd = model.NewWatchDog(
model.WithProcessManager(pm),
model.WithBusyTimeout(5*time.Minute),
model.WithIdleTimeout(15*time.Minute),
model.WithLRULimit(2),
)
Expect(wd.GetLRULimit()).To(Equal(2))
})
It("should allow updating LRU limit dynamically", func() {
wd = model.NewWatchDog(pm, 5*time.Minute, 15*time.Minute, false, false, 2)
wd = model.NewWatchDog(
model.WithProcessManager(pm),
model.WithLRULimit(2),
)
wd.SetLRULimit(5)
Expect(wd.GetLRULimit()).To(Equal(5))
})
It("should return 0 for disabled LRU", func() {
wd = model.NewWatchDog(pm, 5*time.Minute, 15*time.Minute, false, false, 0)
wd = model.NewWatchDog(
model.WithProcessManager(pm),
model.WithLRULimit(0),
)
Expect(wd.GetLRULimit()).To(Equal(0))
})
})
Context("Memory Reclaimer Options", func() {
It("should create watchdog with memory reclaimer settings", func() {
wd = model.NewWatchDog(
model.WithProcessManager(pm),
model.WithMemoryReclaimer(true, 0.85),
)
enabled, threshold := wd.GetMemoryReclaimerSettings()
Expect(enabled).To(BeTrue())
Expect(threshold).To(Equal(0.85))
})
It("should allow setting memory reclaimer via separate options", func() {
wd = model.NewWatchDog(
model.WithProcessManager(pm),
model.WithMemoryReclaimerEnabled(true),
model.WithMemoryReclaimerThreshold(0.90),
)
enabled, threshold := wd.GetMemoryReclaimerSettings()
Expect(enabled).To(BeTrue())
Expect(threshold).To(Equal(0.90))
})
It("should use default threshold when not specified", func() {
wd = model.NewWatchDog(
model.WithProcessManager(pm),
)
_, threshold := wd.GetMemoryReclaimerSettings()
Expect(threshold).To(Equal(0.95)) // default
})
It("should allow updating memory reclaimer settings dynamically", func() {
wd = model.NewWatchDog(
model.WithProcessManager(pm),
)
wd.SetMemoryReclaimer(true, 0.80)
enabled, threshold := wd.GetMemoryReclaimerSettings()
Expect(enabled).To(BeTrue())
Expect(threshold).To(Equal(0.80))
})
})
Context("Model Tracking", func() {
BeforeEach(func() {
wd = model.NewWatchDog(pm, 5*time.Minute, 15*time.Minute, false, false, 3)
wd = model.NewWatchDog(
model.WithProcessManager(pm),
model.WithBusyTimeout(5*time.Minute),
model.WithIdleTimeout(15*time.Minute),
model.WithLRULimit(3),
)
})
It("should track loaded models count", func() {
@@ -108,7 +165,12 @@ var _ = Describe("WatchDog", func() {
Context("EnforceLRULimit", func() {
BeforeEach(func() {
wd = model.NewWatchDog(pm, 5*time.Minute, 15*time.Minute, false, false, 2)
wd = model.NewWatchDog(
model.WithProcessManager(pm),
model.WithBusyTimeout(5*time.Minute),
model.WithIdleTimeout(15*time.Minute),
model.WithLRULimit(2),
)
})
It("should not evict when under limit", func() {
@@ -218,7 +280,12 @@ var _ = Describe("WatchDog", func() {
Context("Single Backend Mode (LRU=1)", func() {
BeforeEach(func() {
wd = model.NewWatchDog(pm, 5*time.Minute, 15*time.Minute, false, false, 1)
wd = model.NewWatchDog(
model.WithProcessManager(pm),
model.WithBusyTimeout(5*time.Minute),
model.WithIdleTimeout(15*time.Minute),
model.WithLRULimit(1),
)
})
It("should evict existing model when loading new one", func() {
@@ -241,4 +308,36 @@ var _ = Describe("WatchDog", func() {
Expect(len(pm.getShutdownCalls())).To(Equal(5))
})
})
Context("Functional Options", func() {
It("should use default options when none provided", func() {
wd = model.NewWatchDog(
model.WithProcessManager(pm),
)
Expect(wd.GetLRULimit()).To(Equal(0))
enabled, threshold := wd.GetMemoryReclaimerSettings()
Expect(enabled).To(BeFalse())
Expect(threshold).To(Equal(0.95))
})
It("should allow combining multiple options", func() {
wd = model.NewWatchDog(
model.WithProcessManager(pm),
model.WithBusyTimeout(10*time.Minute),
model.WithIdleTimeout(30*time.Minute),
model.WithBusyCheck(true),
model.WithIdleCheck(true),
model.WithLRULimit(5),
model.WithMemoryReclaimerEnabled(true),
model.WithMemoryReclaimerThreshold(0.80),
)
Expect(wd.GetLRULimit()).To(Equal(5))
enabled, threshold := wd.GetMemoryReclaimerSettings()
Expect(enabled).To(BeTrue())
Expect(threshold).To(Equal(0.80))
})
})
})
+699
View File
@@ -1,13 +1,83 @@
package xsysinfo
import (
"bytes"
"encoding/json"
"os/exec"
"strconv"
"strings"
"sync"
"github.com/jaypipes/ghw"
"github.com/jaypipes/ghw/pkg/gpu"
"github.com/rs/zerolog/log"
)
// GPU vendor constants
const (
VendorNVIDIA = "nvidia"
VendorAMD = "amd"
VendorIntel = "intel"
VendorVulkan = "vulkan"
VendorUnknown = "unknown"
)
// UnifiedMemoryDevices is a list of GPU device name patterns that use unified memory
// (shared with system RAM). When these devices are detected and report N/A for VRAM,
// we fall back to system RAM information.
var UnifiedMemoryDevices = []string{
"NVIDIA GB10",
"GB10",
// Add more unified memory devices here as needed
}
// GPUMemoryInfo contains real-time GPU memory usage information
type GPUMemoryInfo struct {
Index int `json:"index"`
Name string `json:"name"`
Vendor string `json:"vendor"`
TotalVRAM uint64 `json:"total_vram"` // Total VRAM in bytes
UsedVRAM uint64 `json:"used_vram"` // Used VRAM in bytes
FreeVRAM uint64 `json:"free_vram"` // Free VRAM in bytes
UsagePercent float64 `json:"usage_percent"` // Usage as percentage (0-100)
}
// GPUAggregateInfo contains aggregate GPU information across all GPUs
type GPUAggregateInfo struct {
TotalVRAM uint64 `json:"total_vram"`
UsedVRAM uint64 `json:"used_vram"`
FreeVRAM uint64 `json:"free_vram"`
UsagePercent float64 `json:"usage_percent"`
GPUCount int `json:"gpu_count"`
}
// SystemRAMInfo contains system RAM usage information
type SystemRAMInfo struct {
Total uint64 `json:"total"`
Used uint64 `json:"used"`
Free uint64 `json:"free"`
Available uint64 `json:"available"`
UsagePercent float64 `json:"usage_percent"`
}
// AggregateMemoryInfo contains aggregate memory information (unified for GPU/RAM)
type AggregateMemoryInfo struct {
TotalMemory uint64 `json:"total_memory"`
UsedMemory uint64 `json:"used_memory"`
FreeMemory uint64 `json:"free_memory"`
UsagePercent float64 `json:"usage_percent"`
GPUCount int `json:"gpu_count"`
}
// ResourceInfo represents unified memory resource information
type ResourceInfo struct {
Type string `json:"type"` // "gpu" or "ram"
Available bool `json:"available"`
GPUs []GPUMemoryInfo `json:"gpus,omitempty"`
RAM *SystemRAMInfo `json:"ram,omitempty"`
Aggregate AggregateMemoryInfo `json:"aggregate"`
}
var (
gpuCache []*gpu.GraphicsCard
gpuCacheOnce sync.Once
@@ -60,3 +130,632 @@ func HasGPU(vendor string) bool {
}
return false
}
// isUnifiedMemoryDevice checks if the given GPU name matches any known unified memory device
func isUnifiedMemoryDevice(gpuName string) bool {
gpuNameUpper := strings.ToUpper(gpuName)
for _, pattern := range UnifiedMemoryDevices {
if strings.Contains(gpuNameUpper, strings.ToUpper(pattern)) {
return true
}
}
return false
}
// getSystemRAM returns system RAM information using ghw
func getSystemRAM() (total, used, free uint64, err error) {
memory, err := ghw.Memory()
if err != nil {
return 0, 0, 0, err
}
total = uint64(memory.TotalUsableBytes)
// ghw doesn't provide used/free directly, but we can estimate
// For unified memory GPUs, we report total system RAM as available VRAM
// since the GPU can potentially use all of it
free = total
used = 0
return total, used, free, nil
}
// GetGPUMemoryUsage returns real-time GPU memory usage for all detected GPUs.
// It tries multiple vendor-specific tools in order: NVIDIA, AMD, Intel, Vulkan.
// Returns an empty slice if no GPU monitoring tools are available.
func GetGPUMemoryUsage() []GPUMemoryInfo {
var gpus []GPUMemoryInfo
// Try NVIDIA first
nvidiaGPUs := getNVIDIAGPUMemory()
if len(nvidiaGPUs) > 0 {
gpus = append(gpus, nvidiaGPUs...)
}
// XXX: Note - I could not test this with AMD and Intel GPUs, so I'm not sure if it works and it was added with the help of AI.
// Try AMD ROCm
amdGPUs := getAMDGPUMemory()
if len(amdGPUs) > 0 {
// Adjust indices to continue from NVIDIA GPUs
startIdx := len(gpus)
for i := range amdGPUs {
amdGPUs[i].Index = startIdx + i
}
gpus = append(gpus, amdGPUs...)
}
// Try Intel
intelGPUs := getIntelGPUMemory()
if len(intelGPUs) > 0 {
startIdx := len(gpus)
for i := range intelGPUs {
intelGPUs[i].Index = startIdx + i
}
gpus = append(gpus, intelGPUs...)
}
// Try Vulkan as fallback for device detection (limited real-time data)
if len(gpus) == 0 {
vulkanGPUs := getVulkanGPUMemory()
gpus = append(gpus, vulkanGPUs...)
}
return gpus
}
// GetGPUAggregateInfo returns aggregate GPU information across all GPUs
func GetGPUAggregateInfo() GPUAggregateInfo {
gpus := GetGPUMemoryUsage()
var aggregate GPUAggregateInfo
aggregate.GPUCount = len(gpus)
for _, gpu := range gpus {
aggregate.TotalVRAM += gpu.TotalVRAM
aggregate.UsedVRAM += gpu.UsedVRAM
aggregate.FreeVRAM += gpu.FreeVRAM
}
if aggregate.TotalVRAM > 0 {
aggregate.UsagePercent = float64(aggregate.UsedVRAM) / float64(aggregate.TotalVRAM) * 100
}
return aggregate
}
// getNVIDIAGPUMemory queries NVIDIA GPUs using nvidia-smi
func getNVIDIAGPUMemory() []GPUMemoryInfo {
// Check if nvidia-smi is available
if _, err := exec.LookPath("nvidia-smi"); err != nil {
return nil
}
cmd := exec.Command("nvidia-smi",
"--query-gpu=index,name,memory.total,memory.used,memory.free",
"--format=csv,noheader,nounits")
var stdout, stderr bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
log.Debug().Err(err).Str("stderr", stderr.String()).Msg("nvidia-smi failed")
return nil
}
var gpus []GPUMemoryInfo
lines := strings.Split(strings.TrimSpace(stdout.String()), "\n")
for _, line := range lines {
if line == "" {
continue
}
parts := strings.Split(line, ", ")
if len(parts) < 5 {
continue
}
idx, _ := strconv.Atoi(strings.TrimSpace(parts[0]))
name := strings.TrimSpace(parts[1])
totalStr := strings.TrimSpace(parts[2])
usedStr := strings.TrimSpace(parts[3])
freeStr := strings.TrimSpace(parts[4])
var totalBytes, usedBytes, freeBytes uint64
var usagePercent float64
// Check if memory values are N/A (unified memory devices like GB10)
isNA := totalStr == "[N/A]" || usedStr == "[N/A]" || freeStr == "[N/A]"
if isNA && isUnifiedMemoryDevice(name) {
// Unified memory device - fall back to system RAM
sysTotal, sysUsed, sysFree, err := getSystemRAM()
if err != nil {
log.Debug().Err(err).Str("device", name).Msg("failed to get system RAM for unified memory device")
// Still add the GPU but with zero memory info
gpus = append(gpus, GPUMemoryInfo{
Index: idx,
Name: name,
Vendor: VendorNVIDIA,
TotalVRAM: 0,
UsedVRAM: 0,
FreeVRAM: 0,
UsagePercent: 0,
})
continue
}
totalBytes = sysTotal
usedBytes = sysUsed
freeBytes = sysFree
if totalBytes > 0 {
usagePercent = float64(usedBytes) / float64(totalBytes) * 100
}
log.Debug().
Str("device", name).
Uint64("system_ram_bytes", totalBytes).
Msg("using system RAM for unified memory GPU")
} else if isNA {
// Unknown device with N/A values - skip memory info
log.Debug().Str("device", name).Msg("nvidia-smi returned N/A for unknown device")
gpus = append(gpus, GPUMemoryInfo{
Index: idx,
Name: name,
Vendor: VendorNVIDIA,
TotalVRAM: 0,
UsedVRAM: 0,
FreeVRAM: 0,
UsagePercent: 0,
})
continue
} else {
// Normal GPU with dedicated VRAM
totalMB, _ := strconv.ParseFloat(totalStr, 64)
usedMB, _ := strconv.ParseFloat(usedStr, 64)
freeMB, _ := strconv.ParseFloat(freeStr, 64)
// Convert MB to bytes
totalBytes = uint64(totalMB * 1024 * 1024)
usedBytes = uint64(usedMB * 1024 * 1024)
freeBytes = uint64(freeMB * 1024 * 1024)
if totalBytes > 0 {
usagePercent = float64(usedBytes) / float64(totalBytes) * 100
}
}
gpus = append(gpus, GPUMemoryInfo{
Index: idx,
Name: name,
Vendor: VendorNVIDIA,
TotalVRAM: totalBytes,
UsedVRAM: usedBytes,
FreeVRAM: freeBytes,
UsagePercent: usagePercent,
})
}
return gpus
}
// getAMDGPUMemory queries AMD GPUs using rocm-smi
func getAMDGPUMemory() []GPUMemoryInfo {
// Check if rocm-smi is available
if _, err := exec.LookPath("rocm-smi"); err != nil {
return nil
}
// Try CSV format first
cmd := exec.Command("rocm-smi", "--showmeminfo", "vram", "--csv")
var stdout, stderr bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
log.Debug().Err(err).Str("stderr", stderr.String()).Msg("rocm-smi failed")
return nil
}
var gpus []GPUMemoryInfo
lines := strings.Split(strings.TrimSpace(stdout.String()), "\n")
// Skip header line
for i, line := range lines {
if i == 0 || line == "" {
continue
}
parts := strings.Split(line, ",")
if len(parts) < 3 {
continue
}
// Parse GPU index from first column (usually "GPU[0]" format)
idxStr := strings.TrimSpace(parts[0])
idx := 0
if strings.HasPrefix(idxStr, "GPU[") {
idxStr = strings.TrimPrefix(idxStr, "GPU[")
idxStr = strings.TrimSuffix(idxStr, "]")
idx, _ = strconv.Atoi(idxStr)
}
// Parse memory values (in bytes or MB depending on rocm-smi version)
usedBytes, _ := strconv.ParseUint(strings.TrimSpace(parts[1]), 10, 64)
totalBytes, _ := strconv.ParseUint(strings.TrimSpace(parts[2]), 10, 64)
// If values seem like MB, convert to bytes
if totalBytes < 1000000 {
usedBytes *= 1024 * 1024
totalBytes *= 1024 * 1024
}
freeBytes := uint64(0)
if totalBytes > usedBytes {
freeBytes = totalBytes - usedBytes
}
usagePercent := 0.0
if totalBytes > 0 {
usagePercent = float64(usedBytes) / float64(totalBytes) * 100
}
gpus = append(gpus, GPUMemoryInfo{
Index: idx,
Name: "AMD GPU",
Vendor: VendorAMD,
TotalVRAM: totalBytes,
UsedVRAM: usedBytes,
FreeVRAM: freeBytes,
UsagePercent: usagePercent,
})
}
return gpus
}
// getIntelGPUMemory queries Intel GPUs using xpu-smi or intel_gpu_top
func getIntelGPUMemory() []GPUMemoryInfo {
// Try xpu-smi first (Intel's official GPU management tool)
gpus := getIntelXPUSMI()
if len(gpus) > 0 {
return gpus
}
// Fallback to intel_gpu_top
return getIntelGPUTop()
}
// getIntelXPUSMI queries Intel GPUs using xpu-smi
func getIntelXPUSMI() []GPUMemoryInfo {
if _, err := exec.LookPath("xpu-smi"); err != nil {
return nil
}
// Get device list
cmd := exec.Command("xpu-smi", "discovery", "--json")
var stdout, stderr bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
log.Debug().Err(err).Str("stderr", stderr.String()).Msg("xpu-smi discovery failed")
return nil
}
// Parse JSON output
var result struct {
DeviceList []struct {
DeviceID int `json:"device_id"`
DeviceName string `json:"device_name"`
VendorName string `json:"vendor_name"`
MemoryPhysicalSizeBytes uint64 `json:"memory_physical_size_byte"`
} `json:"device_list"`
}
if err := json.Unmarshal(stdout.Bytes(), &result); err != nil {
log.Debug().Err(err).Msg("failed to parse xpu-smi discovery output")
return nil
}
var gpus []GPUMemoryInfo
for _, device := range result.DeviceList {
// Get memory usage for this device
statsCmd := exec.Command("xpu-smi", "stats", "-d", strconv.Itoa(device.DeviceID), "--json")
var statsStdout bytes.Buffer
statsCmd.Stdout = &statsStdout
usedBytes := uint64(0)
if err := statsCmd.Run(); err == nil {
var stats struct {
DeviceID int `json:"device_id"`
MemoryUsed uint64 `json:"memory_used"`
}
if err := json.Unmarshal(statsStdout.Bytes(), &stats); err == nil {
usedBytes = stats.MemoryUsed
}
}
totalBytes := device.MemoryPhysicalSizeBytes
freeBytes := uint64(0)
if totalBytes > usedBytes {
freeBytes = totalBytes - usedBytes
}
usagePercent := 0.0
if totalBytes > 0 {
usagePercent = float64(usedBytes) / float64(totalBytes) * 100
}
gpus = append(gpus, GPUMemoryInfo{
Index: device.DeviceID,
Name: device.DeviceName,
Vendor: VendorIntel,
TotalVRAM: totalBytes,
UsedVRAM: usedBytes,
FreeVRAM: freeBytes,
UsagePercent: usagePercent,
})
}
return gpus
}
// getIntelGPUTop queries Intel GPUs using intel_gpu_top
func getIntelGPUTop() []GPUMemoryInfo {
if _, err := exec.LookPath("intel_gpu_top"); err != nil {
return nil
}
// intel_gpu_top with -J outputs JSON, -s 1 for single sample
cmd := exec.Command("intel_gpu_top", "-J", "-s", "1")
var stdout, stderr bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
log.Debug().Err(err).Str("stderr", stderr.String()).Msg("intel_gpu_top failed")
return nil
}
// Parse JSON output - intel_gpu_top outputs NDJSON
lines := strings.Split(strings.TrimSpace(stdout.String()), "\n")
if len(lines) == 0 {
return nil
}
// Take the last complete JSON object
var lastJSON string
for i := len(lines) - 1; i >= 0; i-- {
if strings.HasPrefix(strings.TrimSpace(lines[i]), "{") {
lastJSON = lines[i]
break
}
}
if lastJSON == "" {
return nil
}
var result struct {
Engines map[string]interface{} `json:"engines"`
// Memory info if available
}
if err := json.Unmarshal([]byte(lastJSON), &result); err != nil {
log.Debug().Err(err).Msg("failed to parse intel_gpu_top output")
return nil
}
// intel_gpu_top doesn't always provide memory info
// Return empty if we can't get useful data
return nil
}
// GetSystemRAMInfo returns real-time system RAM usage
func GetSystemRAMInfo() (*SystemRAMInfo, error) {
memory, err := ghw.Memory()
if err != nil {
return nil, err
}
total := uint64(memory.TotalUsableBytes)
// Try to get more accurate memory info from /proc/meminfo on Linux
used, available, free := getDetailedMemoryInfo(total)
usagePercent := 0.0
if total > 0 {
usagePercent = float64(used) / float64(total) * 100
}
return &SystemRAMInfo{
Total: total,
Used: used,
Free: free,
Available: available,
UsagePercent: usagePercent,
}, nil
}
// getDetailedMemoryInfo tries to get detailed memory info from /proc/meminfo on Linux
// Returns used, available, and free memory in bytes
func getDetailedMemoryInfo(total uint64) (used, available, free uint64) {
// Try to read /proc/meminfo for more accurate data
cmd := exec.Command("cat", "/proc/meminfo")
var stdout bytes.Buffer
cmd.Stdout = &stdout
if err := cmd.Run(); err != nil {
// Fallback: assume all memory is available
return 0, total, total
}
lines := strings.Split(stdout.String(), "\n")
memInfo := make(map[string]uint64)
for _, line := range lines {
parts := strings.Fields(line)
if len(parts) < 2 {
continue
}
key := strings.TrimSuffix(parts[0], ":")
value, err := strconv.ParseUint(parts[1], 10, 64)
if err != nil {
continue
}
// Values in /proc/meminfo are in kB
memInfo[key] = value * 1024
}
// Get MemAvailable if present (preferred), otherwise calculate from free + buffers + cached
if avail, ok := memInfo["MemAvailable"]; ok {
available = avail
} else {
available = memInfo["MemFree"] + memInfo["Buffers"] + memInfo["Cached"]
}
free = memInfo["MemFree"]
// Calculate used memory
if total > available {
used = total - available
} else {
used = 0
}
return used, available, free
}
// GetResourceInfo returns GPU info if available, otherwise system RAM info
func GetResourceInfo() ResourceInfo {
gpus := GetGPUMemoryUsage()
if len(gpus) > 0 {
// GPU available - return GPU info
aggregate := GetGPUAggregateInfo()
return ResourceInfo{
Type: "gpu",
Available: true,
GPUs: gpus,
RAM: nil,
Aggregate: AggregateMemoryInfo{
TotalMemory: aggregate.TotalVRAM,
UsedMemory: aggregate.UsedVRAM,
FreeMemory: aggregate.FreeVRAM,
UsagePercent: aggregate.UsagePercent,
GPUCount: aggregate.GPUCount,
},
}
}
// No GPU - fall back to system RAM
ramInfo, err := GetSystemRAMInfo()
if err != nil {
log.Debug().Err(err).Msg("failed to get system RAM info")
return ResourceInfo{
Type: "ram",
Available: false,
Aggregate: AggregateMemoryInfo{},
}
}
return ResourceInfo{
Type: "ram",
Available: true,
GPUs: nil,
RAM: ramInfo,
Aggregate: AggregateMemoryInfo{
TotalMemory: ramInfo.Total,
UsedMemory: ramInfo.Used,
FreeMemory: ramInfo.Free,
UsagePercent: ramInfo.UsagePercent,
GPUCount: 0,
},
}
}
// GetResourceAggregateInfo returns aggregate memory info (GPU if available, otherwise RAM)
// This is used by the memory reclaimer to check memory usage
func GetResourceAggregateInfo() AggregateMemoryInfo {
resourceInfo := GetResourceInfo()
return resourceInfo.Aggregate
}
// getVulkanGPUMemory queries GPUs using vulkaninfo as a fallback
// Note: Vulkan provides memory heap info but not real-time usage
func getVulkanGPUMemory() []GPUMemoryInfo {
if _, err := exec.LookPath("vulkaninfo"); err != nil {
return nil
}
cmd := exec.Command("vulkaninfo", "--json")
var stdout, stderr bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
log.Debug().Err(err).Str("stderr", stderr.String()).Msg("vulkaninfo failed")
return nil
}
// Parse Vulkan JSON output
var result struct {
VkPhysicalDevices []struct {
DeviceName string `json:"deviceName"`
DeviceType string `json:"deviceType"`
VkPhysicalDeviceMemoryProperties struct {
MemoryHeaps []struct {
Flags int `json:"flags"`
Size uint64 `json:"size"`
} `json:"memoryHeaps"`
} `json:"VkPhysicalDeviceMemoryProperties"`
} `json:"VkPhysicalDevices"`
}
if err := json.Unmarshal(stdout.Bytes(), &result); err != nil {
log.Debug().Err(err).Msg("failed to parse vulkaninfo output")
return nil
}
var gpus []GPUMemoryInfo
for i, device := range result.VkPhysicalDevices {
// Skip non-discrete/integrated GPUs if possible
if device.DeviceType == "VK_PHYSICAL_DEVICE_TYPE_CPU" {
continue
}
// Sum up device-local memory heaps
var totalVRAM uint64
for _, heap := range device.VkPhysicalDeviceMemoryProperties.MemoryHeaps {
// Flag 1 = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT
if heap.Flags&1 != 0 {
totalVRAM += heap.Size
}
}
if totalVRAM == 0 {
continue
}
gpus = append(gpus, GPUMemoryInfo{
Index: i,
Name: device.DeviceName,
Vendor: VendorVulkan,
TotalVRAM: totalVRAM,
UsedVRAM: 0, // Vulkan doesn't provide real-time usage
FreeVRAM: totalVRAM,
UsagePercent: 0,
})
}
return gpus
}