fix: improve ram estimation (#7603)

* fix: default to 10seconds of watchdog if runtime setting is malformed

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fix: use gosigar for RAM estimation

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto
2025-12-16 10:18:36 +01:00
committed by GitHub
parent b841a495da
commit 878c9d46d5
4 changed files with 21 additions and 67 deletions

View File

@@ -278,6 +278,7 @@ func loadRuntimeSettingsFromFile(options *config.ApplicationConfig) {
options.WatchDogInterval = dur
} else {
log.Warn().Err(err).Str("interval", *settings.WatchdogInterval).Msg("invalid watchdog interval in runtime_settings.json")
options.WatchDogInterval = 10 * time.Second
}
}
}

3
go.mod
View File

@@ -62,6 +62,7 @@ require (
)
require (
github.com/cloudfoundry/gosigar v1.3.112 // indirect
github.com/ghodss/yaml v1.0.0 // indirect
github.com/labstack/gommon v0.4.2 // indirect
github.com/swaggo/files/v2 v2.0.2 // indirect
@@ -203,7 +204,7 @@ require (
github.com/google/btree v1.1.3 // indirect
github.com/google/go-cmp v0.7.0 // indirect
github.com/google/gopacket v1.1.19 // indirect
github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 // indirect
github.com/google/pprof v0.0.0-20250630185457-6e76a2b096b5 // indirect
github.com/gorilla/css v1.0.1 // indirect
github.com/gorilla/websocket v1.5.3
github.com/hashicorp/golang-lru v1.0.2 // indirect

4
go.sum
View File

@@ -83,6 +83,8 @@ github.com/charmbracelet/x/term v0.2.1/go.mod h1:oQ4enTYFV7QN4m0i9mzHrViD7TQKvNE
github.com/chengxilo/virtualterm v1.0.4 h1:Z6IpERbRVlfB8WkOmtbHiDbBANU7cimRIof7mk9/PwM=
github.com/chengxilo/virtualterm v1.0.4/go.mod h1:DyxxBZz/x1iqJjFxTFcr6/x+jSpqN0iwWCOK1q10rlY=
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
github.com/cloudfoundry/gosigar v1.3.112 h1:cGGZ2sj1GKyiwSxzouIR7ATNbgAkC4zqwWDxYQ2ObPc=
github.com/cloudfoundry/gosigar v1.3.112/go.mod h1:Ldc+tVw3dfqPwasZ9om1LT2aRwpjC1eFfbWKfv2WbDI=
github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
github.com/containerd/cgroups v1.1.0 h1:v8rEWFl6EoqHB+swVNjVoCJE8o3jX7e8nqBGPLaDFBM=
github.com/containerd/cgroups v1.1.0/go.mod h1:6ppBcbh/NOOUU+dMKrykgaBnK9lCIBxHqJDGwsa1mIw=
@@ -275,6 +277,8 @@ github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXi
github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 h1:BHT72Gu3keYf3ZEu2J0b1vyeLSOYI8bm5wbJM/8yDe8=
github.com/google/pprof v0.0.0-20250403155104-27863c87afa6/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
github.com/google/pprof v0.0.0-20250630185457-6e76a2b096b5 h1:xhMrHhTJ6zxu3gA4enFM9MLn9AY7613teCdFnlUVbSQ=
github.com/google/pprof v0.0.0-20250630185457-6e76a2b096b5/go.mod h1:5hDyRhoBCxViHszMt12TnOpEI4VVi+U8Gm9iphldiMA=
github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=

View File

@@ -3,11 +3,13 @@ package xsysinfo
import (
"bytes"
"encoding/json"
"fmt"
"os/exec"
"strconv"
"strings"
"sync"
sigar "github.com/cloudfoundry/gosigar"
"github.com/jaypipes/ghw"
"github.com/jaypipes/ghw/pkg/gpu"
"github.com/rs/zerolog/log"
@@ -144,17 +146,15 @@ func isUnifiedMemoryDevice(gpuName string) bool {
// getSystemRAM returns system RAM information using ghw
func getSystemRAM() (total, used, free uint64, err error) {
memory, err := ghw.Memory()
if err != nil {
return 0, 0, 0, err
}
mem := sigar.Mem{}
//swap := sigar.Swap{}
total = uint64(memory.TotalUsableBytes)
// ghw doesn't provide used/free directly, but we can estimate
// For unified memory GPUs, we report total system RAM as available VRAM
// since the GPU can potentially use all of it
free = total
used = 0
mem.Get() //nolint:errcheck
//swap.Get() //nolint:errcheck
total = mem.Total
free = mem.ActualFree
used = mem.ActualUsed
return total, used, free, nil
}
@@ -560,79 +560,27 @@ func getIntelGPUTop() []GPUMemoryInfo {
// GetSystemRAMInfo returns real-time system RAM usage
func GetSystemRAMInfo() (*SystemRAMInfo, error) {
memory, err := ghw.Memory()
total, used, free, err := getSystemRAM()
if err != nil {
return nil, err
}
total := uint64(memory.TotalUsableBytes)
// Try to get more accurate memory info from /proc/meminfo on Linux
used, available, free := getDetailedMemoryInfo(total)
usagePercent := 0.0
if total > 0 {
usagePercent = float64(used) / float64(total) * 100
}
fmt.Println("total", total, "used", used, "free", free)
return &SystemRAMInfo{
Total: total,
Used: used,
Free: free,
Available: available,
Available: total - used,
UsagePercent: usagePercent,
}, nil
}
// getDetailedMemoryInfo tries to get detailed memory info from /proc/meminfo on Linux
// Returns used, available, and free memory in bytes
func getDetailedMemoryInfo(total uint64) (used, available, free uint64) {
// Try to read /proc/meminfo for more accurate data
cmd := exec.Command("cat", "/proc/meminfo")
var stdout bytes.Buffer
cmd.Stdout = &stdout
if err := cmd.Run(); err != nil {
// Fallback: assume all memory is available
return 0, total, total
}
lines := strings.Split(stdout.String(), "\n")
memInfo := make(map[string]uint64)
for _, line := range lines {
parts := strings.Fields(line)
if len(parts) < 2 {
continue
}
key := strings.TrimSuffix(parts[0], ":")
value, err := strconv.ParseUint(parts[1], 10, 64)
if err != nil {
continue
}
// Values in /proc/meminfo are in kB
memInfo[key] = value * 1024
}
// Get MemAvailable if present (preferred), otherwise calculate from free + buffers + cached
if avail, ok := memInfo["MemAvailable"]; ok {
available = avail
} else {
available = memInfo["MemFree"] + memInfo["Buffers"] + memInfo["Cached"]
}
free = memInfo["MemFree"]
// Calculate used memory
if total > available {
used = total - available
} else {
used = 0
}
return used, available, free
}
// GetResourceInfo returns GPU info if available, otherwise system RAM info
func GetResourceInfo() ResourceInfo {
gpus := GetGPUMemoryUsage()