feat(llama.cpp): support embeddings endpoints (#2871)

* feat(llama.cpp): add embeddings Also enable embeddings by default for llama.cpp models Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(Makefile): prepare llama.cpp sources only once Otherwise we keep cloning llama.cpp for each of the variants Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * do not set embeddings to false Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * docs: add embeddings to the YAML config reference Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-01-06 10:39:55 -06:00 · 2024-07-15 22:54:16 +02:00
parent 6564e7ea01
commit 35561edb6e
5 changed files with 44 additions and 12 deletions
--- a/core/backend/options.go
+++ b/core/backend/options.go
@@ -91,7 +91,7 @@ func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
 		Type:                 c.ModelType,
 		RopeFreqScale:        c.RopeFreqScale,
 		NUMA:                 c.NUMA,
-		Embeddings:           c.Embeddings,
+		Embeddings:           *c.Embeddings,
 		LowVRAM:              *c.LowVRAM,
 		NGPULayers:           int32(*c.NGPULayers),
 		MMap:                 *c.MMap,
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@@ -32,7 +32,7 @@ type BackendConfig struct {
 	Threads        *int              `yaml:"threads"`
 	Debug          *bool             `yaml:"debug"`
 	Roles          map[string]string `yaml:"roles"`
-	Embeddings     bool              `yaml:"embeddings"`
+	Embeddings     *bool             `yaml:"embeddings"`
 	Backend        string            `yaml:"backend"`
 	TemplateConfig TemplateConfig    `yaml:"template"`

@@ -338,6 +338,10 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
 		cfg.LowVRAM = &falseV
 	}

+	if cfg.Embeddings == nil {
+		cfg.Embeddings = &falseV
+	}
+
 	// Value passed by the top level are treated as default (no implicit defaults)
 	// defaults are set by the user
 	if ctx == 0 {