mirror of
https://github.com/mudler/LocalAI.git
synced 2026-01-05 18:19:53 -06:00
* feat(loader): refactor single active backend support to LRU This changeset introduces LRU management of loaded backends. Users can set now a maximum number of models to be loaded concurrently, and, when setting LocalAI in single active backend mode we set LRU to 1 for backward compatibility. Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chore: add tests Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Update docs Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
32 lines
678 B
Go
32 lines
678 B
Go
package backend
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
|
|
"github.com/mudler/LocalAI/core/config"
|
|
"github.com/mudler/LocalAI/pkg/grpc/proto"
|
|
model "github.com/mudler/LocalAI/pkg/model"
|
|
)
|
|
|
|
func TokenMetrics(
|
|
modelFile string,
|
|
loader *model.ModelLoader,
|
|
appConfig *config.ApplicationConfig,
|
|
modelConfig config.ModelConfig) (*proto.MetricsResponse, error) {
|
|
|
|
opts := ModelOptions(modelConfig, appConfig, model.WithModel(modelFile))
|
|
model, err := loader.Load(opts...)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if model == nil {
|
|
return nil, fmt.Errorf("could not loadmodel model")
|
|
}
|
|
|
|
res, err := model.GetTokenMetrics(context.Background(), &proto.MetricsRequest{})
|
|
|
|
return res, err
|
|
}
|