feat(loader): enhance single active backend to support LRU eviction (#7535)

* feat(loader): refactor single active backend support to LRU

This changeset introduces LRU management of loaded backends. Users can
set now a maximum number of models to be loaded concurrently, and, when
setting LocalAI in single active backend mode we set LRU to 1 for
backward compatibility.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore: add tests

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Update docs

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Fixups

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto
2025-12-12 12:28:38 +01:00
committed by GitHub
parent c141a40e00
commit fc5b9ebfcc
39 changed files with 836 additions and 131 deletions
+1 -1
View File
@@ -37,7 +37,7 @@ func findLLamaCPPBackend(galleries string, systemState *system.SystemState) (str
backend, ok := backends.Get(llamaCPPGalleryName)
if !ok {
ml := model.NewModelLoader(systemState, true)
ml := model.NewModelLoader(systemState)
var gals []config.Gallery
if err := json.Unmarshal([]byte(galleries), &gals); err != nil {
log.Error().Err(err).Msg("failed loading galleries")