feat: add grammar and functions call support

2026-02-17 23:48:49 -06:00 · 2023-07-02 11:13:51 +02:00
parent a6839fd238
commit f09ddd2983
7 changed files with 571 additions and 9 deletions
--- a/api/config.go
+++ b/api/config.go
@@ -46,12 +46,16 @@ type Config struct {
 	PromptCacheAll  bool   `yaml:"prompt_cache_all"`
 	PromptCacheRO   bool   `yaml:"prompt_cache_ro"`

-	PromptStrings, InputStrings []string
-	InputToken                  [][]int
+	Grammar string `yaml:"grammar"`
+
+	PromptStrings, InputStrings                []string
+	InputToken                                 [][]int
+	functionCallString, functionCallNameString string
 }

 type TemplateConfig struct {
 	Completion string `yaml:"completion"`
+	Functions  string `yaml:"function"`
 	Chat       string `yaml:"chat"`
 	Edit       string `yaml:"edit"`
 }
@@ -181,6 +185,10 @@ func updateConfig(config *Config, input *OpenAIRequest) {
 		config.TopP = input.TopP
 	}

+	if input.Grammar != "" {
+		config.Grammar = input.Grammar
+	}
+
 	if input.Temperature != 0 {
 		config.Temperature = input.Temperature
 	}
@@ -262,6 +270,24 @@ func updateConfig(config *Config, input *OpenAIRequest) {
 		}
 	}

+	// Can be either a string or an object
+	switch fnc := input.FunctionCall.(type) {
+	case string:
+		if fnc != "" {
+			config.functionCallString = fnc
+		}
+	case map[string]interface{}:
+		var name string
+		n, exists := fnc["name"]
+		if exists {
+			nn, e := n.(string)
+			if !e {
+				name = nn
+			}
+		}
+		config.functionCallNameString = name
+	}
+
 	switch p := input.Prompt.(type) {
 	case string:
 		config.PromptStrings = append(config.PromptStrings, p)
--- a/api/openai.go
+++ b/api/openai.go
@@ -17,6 +17,7 @@ import (
 	"strings"

 	"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
+	"github.com/go-skynet/LocalAI/pkg/grammar"
 	model "github.com/go-skynet/LocalAI/pkg/model"
 	whisperutil "github.com/go-skynet/LocalAI/pkg/whisper"
 	llama "github.com/go-skynet/go-llama.cpp"
@@ -73,8 +74,12 @@ type Choice struct {
 }

 type Message struct {
-	Role    string `json:"role,omitempty" yaml:"role"`
+	// The message role
+	Role string `json:"role,omitempty" yaml:"role"`
+	// The message content
 	Content string `json:"content,omitempty" yaml:"content"`
+	// A result of a function call
+	FunctionCall interface{} `json:"function_call,omitempty" yaml:"function_call,omitempty"`
 }

 type OpenAIModel struct {
@@ -104,6 +109,10 @@ type OpenAIRequest struct {
 	// Messages is read only by chat/completion API calls
 	Messages []Message `json:"messages" yaml:"messages"`

+	// A list of available functions to call
+	Functions    []grammar.Function `json:"functions" yaml:"functions"`
+	FunctionCall interface{}        `json:"function_call" yaml:"function_call"` // might be a string or an object
+
 	Stream bool `json:"stream"`
 	Echo   bool `json:"echo"`
 	// Common options between all the API calls
@@ -134,6 +143,9 @@ type OpenAIRequest struct {
 	Mode int `json:"mode"`
 	Step int `json:"step"`

+	// A grammar to constrain the LLM output
+	Grammar string `json:"grammar" yaml:"grammar"`
+
 	TypicalP float64 `json:"typical_p" yaml:"typical_p"`
 }

@@ -345,6 +357,23 @@ func embeddingsEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error {
 }

 func chatEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error {
+	// TODO: replace this with config settings
+	// Allow the user to set custom actions via config file
+	// to be "embedded" in each model
+	const noActionName = "answer"
+	const noActionDescription = "use this action to answer without performing any action"
+
+	noActionGrammar := grammar.Function{
+		Name:        noActionName,
+		Description: noActionDescription,
+		Parameters: map[string]interface{}{
+			"properties": map[string]interface{}{
+				"message": map[string]interface{}{
+					"type":        "string",
+					"description": "The message to reply the user with",
+				}},
+		},
+	}

 	process := func(s string, req *OpenAIRequest, config *Config, loader *model.ModelLoader, responses chan OpenAIResponse) {
 		initialMessage := OpenAIResponse{
@@ -368,6 +397,8 @@ func chatEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error {
 		close(responses)
 	}
 	return func(c *fiber.Ctx) error {
+		processFunctions := false
+		funcs := []grammar.Function{}
 		model, input, err := readInput(c, o.loader, true)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
@@ -377,8 +408,33 @@ func chatEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error {
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
+		log.Debug().Msgf("Configuration read: %+v", config)

-		log.Debug().Msgf("Parameter Config: %+v", config)
+		// process functions if we have any defined or if we have a function call string
+		if len(input.Functions) > 0 &&
+			((config.functionCallString != "none" || config.functionCallString == "") || len(config.functionCallNameString) > 0) {
+			log.Debug().Msgf("Response needs to process functions")
+
+			var funcs grammar.Functions = input.Functions
+			processFunctions = true
+
+			// Force picking one of the functions by the request
+			if config.functionCallNameString != "" {
+				funcs = funcs.Select(config.functionCallNameString)
+			}
+
+			// Append the no action function
+			funcs = append(funcs, noActionGrammar)
+
+			// Update input grammar
+			jsStruct := funcs.ToJSONStructure()
+			config.Grammar = jsStruct.Grammar("")
+		}
+
+		// functions are not supported in stream mode (yet?)
+		toStream := input.Stream && !processFunctions
+
+		log.Debug().Msgf("Parameters: %+v", config)

 		var predInput string

@@ -397,7 +453,7 @@ func chatEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error {

 		predInput = strings.Join(mess, "\n")

-		if input.Stream {
+		if toStream {
 			log.Debug().Msgf("Stream request received")
 			c.Context().SetContentType("text/event-stream")
 			//c.Response().Header.SetContentType(fiber.MIMETextHTMLCharsetUTF8)
@@ -409,20 +465,35 @@ func chatEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error {

 		templateFile := config.Model

-		if config.TemplateConfig.Chat != "" {
+		if config.TemplateConfig.Chat != "" && !processFunctions {
 			templateFile = config.TemplateConfig.Chat
 		}

+		if config.TemplateConfig.Functions != "" && processFunctions {
+			templateFile = config.TemplateConfig.Functions
+		}
+
 		// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
 		templatedInput, err := o.loader.TemplatePrefix(templateFile, struct {
-			Input string
-		}{Input: predInput})
+			Input     string
+			Functions []grammar.Function
+		}{
+			Input:     predInput,
+			Functions: funcs,
+		})
 		if err == nil {
 			predInput = templatedInput
 			log.Debug().Msgf("Template found, input modified to: %s", predInput)
+		} else {
+			log.Debug().Msgf("Template failed loading: %s", err.Error())
 		}

-		if input.Stream {
+		log.Debug().Msgf("Prompt: %s", predInput)
+		if processFunctions {
+			log.Debug().Msgf("Grammar: %+v", config.Grammar)
+		}
+
+		if toStream {
 			responses := make(chan OpenAIResponse)

 			go process(predInput, input, config, o.loader, responses)
@@ -459,6 +530,71 @@ func chatEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error {
 		}

 		result, err := ComputeChoices(predInput, input, config, o, o.loader, func(s string, c *[]Choice) {
+			if processFunctions {
+				// As we have to change the result before processing, we can't stream the answer (yet?)
+				ss := map[string]interface{}{}
+				json.Unmarshal([]byte(s), &ss)
+				log.Debug().Msgf("Function return: %s %+v", s, ss)
+
+				// The grammar defines the function name as "function", while OpenAI returns "name"
+				func_name := ss["function"]
+				// Similarly, while here arguments is a map[string]interface{}, OpenAI actually want a stringified object
+				args := ss["arguments"] // arguments needs to be a string, but we return an object from the grammar result (TODO: fix)
+				d, _ := json.Marshal(args)
+
+				ss["arguments"] = string(d)
+				ss["name"] = func_name
+
+				// if do nothing, reply with a message
+				if func_name == noActionName {
+					log.Debug().Msgf("nothing to do, computing a reply")
+
+					// If there is a message that the LLM already sends as part of the JSON reply, use it
+					arguments := map[string]interface{}{}
+					json.Unmarshal([]byte(d), &arguments)
+					m, exists := arguments["message"]
+					if exists {
+						switch message := m.(type) {
+						case string:
+							if message != "" {
+								log.Debug().Msgf("Reply received from LLM: %s", message)
+								message = Finetune(*config, predInput, message)
+								log.Debug().Msgf("Reply received from LLM(finetuned): %s", message)
+
+								*c = append(*c, Choice{Message: &Message{Role: "assistant", Content: message}})
+								return
+							}
+						}
+					}
+
+					log.Debug().Msgf("No action received from LLM, without a message, computing a reply")
+					// Otherwise ask the LLM to understand the JSON output and the context, and return a message
+					// Note: This costs (in term of CPU) another computation
+					config.Grammar = ""
+					predFunc, err := ModelInference(predInput, o.loader, *config, o, nil)
+					if err != nil {
+						log.Error().Msgf("inference error: %s", err.Error())
+						return
+					}
+
+					prediction, err := predFunc()
+					if err != nil {
+						log.Error().Msgf("inference error: %s", err.Error())
+						return
+					}
+
+					prediction = Finetune(*config, predInput, prediction)
+					*c = append(*c, Choice{Message: &Message{Role: "assistant", Content: prediction}})
+				} else {
+					// otherwise reply with the function call
+					*c = append(*c, Choice{
+						FinishReason: "function_call",
+						Message:      &Message{Role: "function", FunctionCall: ss},
+					})
+				}
+
+				return
+			}
 			*c = append(*c, Choice{Message: &Message{Role: "assistant", Content: s}})
 		}, nil)
 		if err != nil {
--- a/api/prediction.go
+++ b/api/prediction.go
@@ -189,6 +189,8 @@ func buildLLamaPredictOptions(c Config, modelPath string) []llama.PredictOption
 		predictOptions = append(predictOptions, llama.EnablePromptCacheRO)
 	}

+	predictOptions = append(predictOptions, llama.WithGrammar(c.Grammar))
+
 	if c.PromptCachePath != "" {
 		// Create parent directory
 		p := filepath.Join(modelPath, c.PromptCachePath)