From 878c9d46d5b6b93e69eeb6561c4c75c1fc1f71bc Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 16 Dec 2025 10:18:36 +0100 Subject: [PATCH] fix: improve ram estimation (#7603) * fix: default to 10seconds of watchdog if runtime setting is malformed Signed-off-by: Ettore Di Giacinto * fix: use gosigar for RAM estimation Signed-off-by: Ettore Di Giacinto --------- Signed-off-by: Ettore Di Giacinto --- core/application/startup.go | 1 + go.mod | 3 +- go.sum | 4 ++ pkg/xsysinfo/gpu.go | 80 +++++++------------------------------ 4 files changed, 21 insertions(+), 67 deletions(-) diff --git a/core/application/startup.go b/core/application/startup.go index d5e06c4e2..76ef1d525 100644 --- a/core/application/startup.go +++ b/core/application/startup.go @@ -278,6 +278,7 @@ func loadRuntimeSettingsFromFile(options *config.ApplicationConfig) { options.WatchDogInterval = dur } else { log.Warn().Err(err).Str("interval", *settings.WatchdogInterval).Msg("invalid watchdog interval in runtime_settings.json") + options.WatchDogInterval = 10 * time.Second } } } diff --git a/go.mod b/go.mod index f0cfb1eef..63fa809d8 100644 --- a/go.mod +++ b/go.mod @@ -62,6 +62,7 @@ require ( ) require ( + github.com/cloudfoundry/gosigar v1.3.112 // indirect github.com/ghodss/yaml v1.0.0 // indirect github.com/labstack/gommon v0.4.2 // indirect github.com/swaggo/files/v2 v2.0.2 // indirect @@ -203,7 +204,7 @@ require ( github.com/google/btree v1.1.3 // indirect github.com/google/go-cmp v0.7.0 // indirect github.com/google/gopacket v1.1.19 // indirect - github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 // indirect + github.com/google/pprof v0.0.0-20250630185457-6e76a2b096b5 // indirect github.com/gorilla/css v1.0.1 // indirect github.com/gorilla/websocket v1.5.3 github.com/hashicorp/golang-lru v1.0.2 // indirect diff --git a/go.sum b/go.sum index c8604c52a..5fe1d6061 100644 --- a/go.sum +++ b/go.sum @@ -83,6 +83,8 @@ github.com/charmbracelet/x/term v0.2.1/go.mod h1:oQ4enTYFV7QN4m0i9mzHrViD7TQKvNE github.com/chengxilo/virtualterm v1.0.4 h1:Z6IpERbRVlfB8WkOmtbHiDbBANU7cimRIof7mk9/PwM= github.com/chengxilo/virtualterm v1.0.4/go.mod h1:DyxxBZz/x1iqJjFxTFcr6/x+jSpqN0iwWCOK1q10rlY= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= +github.com/cloudfoundry/gosigar v1.3.112 h1:cGGZ2sj1GKyiwSxzouIR7ATNbgAkC4zqwWDxYQ2ObPc= +github.com/cloudfoundry/gosigar v1.3.112/go.mod h1:Ldc+tVw3dfqPwasZ9om1LT2aRwpjC1eFfbWKfv2WbDI= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= github.com/containerd/cgroups v1.1.0 h1:v8rEWFl6EoqHB+swVNjVoCJE8o3jX7e8nqBGPLaDFBM= github.com/containerd/cgroups v1.1.0/go.mod h1:6ppBcbh/NOOUU+dMKrykgaBnK9lCIBxHqJDGwsa1mIw= @@ -275,6 +277,8 @@ github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXi github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 h1:BHT72Gu3keYf3ZEu2J0b1vyeLSOYI8bm5wbJM/8yDe8= github.com/google/pprof v0.0.0-20250403155104-27863c87afa6/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA= +github.com/google/pprof v0.0.0-20250630185457-6e76a2b096b5 h1:xhMrHhTJ6zxu3gA4enFM9MLn9AY7613teCdFnlUVbSQ= +github.com/google/pprof v0.0.0-20250630185457-6e76a2b096b5/go.mod h1:5hDyRhoBCxViHszMt12TnOpEI4VVi+U8Gm9iphldiMA= github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= diff --git a/pkg/xsysinfo/gpu.go b/pkg/xsysinfo/gpu.go index 560377044..8ed91ba67 100644 --- a/pkg/xsysinfo/gpu.go +++ b/pkg/xsysinfo/gpu.go @@ -3,11 +3,13 @@ package xsysinfo import ( "bytes" "encoding/json" + "fmt" "os/exec" "strconv" "strings" "sync" + sigar "github.com/cloudfoundry/gosigar" "github.com/jaypipes/ghw" "github.com/jaypipes/ghw/pkg/gpu" "github.com/rs/zerolog/log" @@ -144,17 +146,15 @@ func isUnifiedMemoryDevice(gpuName string) bool { // getSystemRAM returns system RAM information using ghw func getSystemRAM() (total, used, free uint64, err error) { - memory, err := ghw.Memory() - if err != nil { - return 0, 0, 0, err - } + mem := sigar.Mem{} + //swap := sigar.Swap{} - total = uint64(memory.TotalUsableBytes) - // ghw doesn't provide used/free directly, but we can estimate - // For unified memory GPUs, we report total system RAM as available VRAM - // since the GPU can potentially use all of it - free = total - used = 0 + mem.Get() //nolint:errcheck + //swap.Get() //nolint:errcheck + + total = mem.Total + free = mem.ActualFree + used = mem.ActualUsed return total, used, free, nil } @@ -560,79 +560,27 @@ func getIntelGPUTop() []GPUMemoryInfo { // GetSystemRAMInfo returns real-time system RAM usage func GetSystemRAMInfo() (*SystemRAMInfo, error) { - memory, err := ghw.Memory() + total, used, free, err := getSystemRAM() if err != nil { return nil, err } - total := uint64(memory.TotalUsableBytes) - - // Try to get more accurate memory info from /proc/meminfo on Linux - used, available, free := getDetailedMemoryInfo(total) - usagePercent := 0.0 if total > 0 { usagePercent = float64(used) / float64(total) * 100 } + fmt.Println("total", total, "used", used, "free", free) + return &SystemRAMInfo{ Total: total, Used: used, Free: free, - Available: available, + Available: total - used, UsagePercent: usagePercent, }, nil } -// getDetailedMemoryInfo tries to get detailed memory info from /proc/meminfo on Linux -// Returns used, available, and free memory in bytes -func getDetailedMemoryInfo(total uint64) (used, available, free uint64) { - // Try to read /proc/meminfo for more accurate data - cmd := exec.Command("cat", "/proc/meminfo") - var stdout bytes.Buffer - cmd.Stdout = &stdout - - if err := cmd.Run(); err != nil { - // Fallback: assume all memory is available - return 0, total, total - } - - lines := strings.Split(stdout.String(), "\n") - memInfo := make(map[string]uint64) - - for _, line := range lines { - parts := strings.Fields(line) - if len(parts) < 2 { - continue - } - key := strings.TrimSuffix(parts[0], ":") - value, err := strconv.ParseUint(parts[1], 10, 64) - if err != nil { - continue - } - // Values in /proc/meminfo are in kB - memInfo[key] = value * 1024 - } - - // Get MemAvailable if present (preferred), otherwise calculate from free + buffers + cached - if avail, ok := memInfo["MemAvailable"]; ok { - available = avail - } else { - available = memInfo["MemFree"] + memInfo["Buffers"] + memInfo["Cached"] - } - - free = memInfo["MemFree"] - - // Calculate used memory - if total > available { - used = total - available - } else { - used = 0 - } - - return used, available, free -} - // GetResourceInfo returns GPU info if available, otherwise system RAM info func GetResourceInfo() ResourceInfo { gpus := GetGPUMemoryUsage()