diff --git a/aio/cpu/text-to-text.yaml b/aio/cpu/text-to-text.yaml
index 6c4ec9e68..cf18f659a 100644
--- a/aio/cpu/text-to-text.yaml
+++ b/aio/cpu/text-to-text.yaml
@@ -6,14 +6,22 @@ parameters:
template:
chat_message: |
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
- {{- if .FunctionCall }}{{end}}
- {{- if eq .RoleName "tool" }}{{end }}
- {{- if .Content}}
- {{.Content}}
+ {{- if .FunctionCall }}
+
+ {{- else if eq .RoleName "tool" }}
+
+ {{- end }}
+ {{- if .Content}}
+ {{.Content }}
+ {{- end }}
+ {{- if .FunctionCall}}
+ {{toJson .FunctionCall}}
+ {{- end }}
+ {{- if .FunctionCall }}
+
+ {{- else if eq .RoleName "tool" }}
+
{{- end }}
- {{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }}
- {{- if .FunctionCall }}{{end }}
- {{- if eq .RoleName "tool" }}{{end }}
<|im_end|>
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
function: |
diff --git a/aio/gpu-8g/text-to-text.yaml b/aio/gpu-8g/text-to-text.yaml
index 8d5c84f77..0407bb229 100644
--- a/aio/gpu-8g/text-to-text.yaml
+++ b/aio/gpu-8g/text-to-text.yaml
@@ -6,14 +6,22 @@ parameters:
template:
chat_message: |
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
- {{- if .FunctionCall }}{{end}}
- {{- if eq .RoleName "tool" }}{{end }}
- {{- if .Content}}
- {{.Content}}
+ {{- if .FunctionCall }}
+
+ {{- else if eq .RoleName "tool" }}
+
+ {{- end }}
+ {{- if .Content}}
+ {{.Content }}
+ {{- end }}
+ {{- if .FunctionCall}}
+ {{toJson .FunctionCall}}
+ {{- end }}
+ {{- if .FunctionCall }}
+
+ {{- else if eq .RoleName "tool" }}
+
{{- end }}
- {{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }}
- {{- if .FunctionCall }}{{end }}
- {{- if eq .RoleName "tool" }}{{end }}
<|im_end|>
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
function: |
diff --git a/aio/intel/text-to-text.yaml b/aio/intel/text-to-text.yaml
index a7cb5b4da..f5f93c14f 100644
--- a/aio/intel/text-to-text.yaml
+++ b/aio/intel/text-to-text.yaml
@@ -7,14 +7,22 @@ parameters:
template:
chat_message: |
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
- {{- if .FunctionCall }}{{end}}
- {{- if eq .RoleName "tool" }}{{end }}
- {{- if .Content}}
- {{.Content}}
+ {{- if .FunctionCall }}
+
+ {{- else if eq .RoleName "tool" }}
+
+ {{- end }}
+ {{- if .Content}}
+ {{.Content }}
+ {{- end }}
+ {{- if .FunctionCall}}
+ {{toJson .FunctionCall}}
+ {{- end }}
+ {{- if .FunctionCall }}
+
+ {{- else if eq .RoleName "tool" }}
+
{{- end }}
- {{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }}
- {{- if .FunctionCall }}{{end }}
- {{- if eq .RoleName "tool" }}{{end }}
<|im_end|>
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
function: |
diff --git a/embedded/models/hermes-2-pro-mistral.yaml b/embedded/models/hermes-2-pro-mistral.yaml
index 7bfa94180..dd18ce6f8 100644
--- a/embedded/models/hermes-2-pro-mistral.yaml
+++ b/embedded/models/hermes-2-pro-mistral.yaml
@@ -6,14 +6,22 @@ parameters:
template:
chat_message: |
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
- {{- if .FunctionCall }}{{end}}
- {{- if eq .RoleName "tool" }}{{end }}
- {{- if .Content}}
- {{.Content}}
+ {{- if .FunctionCall }}
+
+ {{- else if eq .RoleName "tool" }}
+
+ {{- end }}
+ {{- if .Content}}
+ {{.Content }}
+ {{- end }}
+ {{- if .FunctionCall}}
+ {{toJson .FunctionCall}}
+ {{- end }}
+ {{- if .FunctionCall }}
+
+ {{- else if eq .RoleName "tool" }}
+
{{- end }}
- {{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }}
- {{- if .FunctionCall }}{{end }}
- {{- if eq .RoleName "tool" }}{{end }}
<|im_end|>
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
function: |
diff --git a/embedded/models/llama3-instruct.yaml b/embedded/models/llama3-instruct.yaml
new file mode 100644
index 000000000..d483d2b2a
--- /dev/null
+++ b/embedded/models/llama3-instruct.yaml
@@ -0,0 +1,48 @@
+name: llama3-8b-instruct
+mmap: true
+parameters:
+ model: huggingface://second-state/Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct-Q5_K_M.gguf
+
+template:
+ chat_message: |
+ <|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
+
+ {{ if .FunctionCall -}}
+ Function call:
+ {{ else if eq .RoleName "tool" -}}
+ Function response:
+ {{ end -}}
+ {{ if .Content -}}
+ {{.Content -}}
+ {{ else if .FunctionCall -}}
+ {{ toJson .FunctionCall -}}
+ {{ end -}}
+ <|eot_id|>
+ function: |
+ <|start_header_id|>system<|end_header_id|>
+
+ You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
+
+ {{range .Functions}}
+ {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
+ {{end}}
+
+ Use the following pydantic model json schema for each tool call you will make:
+ {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+ Function call:
+ chat: |
+ <|begin_of_text|>{{.Input }}
+ <|start_header_id|>assistant<|end_header_id|>
+ completion: |
+ {{.Input}}
+context_size: 8192
+f16: true
+stopwords:
+- <|im_end|>
+-
+- "<|eot_id|>"
+usage: |
+ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+ "model": "llama3-8b-instruct",
+ "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
+ }'
diff --git a/pkg/model/loader_test.go b/pkg/model/loader_test.go
index e4207b35c..d3956b63e 100644
--- a/pkg/model/loader_test.go
+++ b/pkg/model/loader_test.go
@@ -27,7 +27,84 @@ const chatML = `<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq
{{- end }}
<|im_end|>`
-var testMatch map[string]map[string]interface{} = map[string]map[string]interface{}{
+const llama3 = `<|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
+
+{{ if .FunctionCall -}}
+Function call:
+{{ else if eq .RoleName "tool" -}}
+Function response:
+{{ end -}}
+{{ if .Content -}}
+{{.Content -}}
+{{ else if .FunctionCall -}}
+{{ toJson .FunctionCall -}}
+{{ end -}}
+<|eot_id|>`
+
+var llama3TestMatch map[string]map[string]interface{} = map[string]map[string]interface{}{
+ "user": {
+ "template": llama3,
+ "expected": "<|start_header_id|>user<|end_header_id|>\n\nA long time ago in a galaxy far, far away...<|eot_id|>",
+ "data": model.ChatMessageTemplateData{
+ SystemPrompt: "",
+ Role: "user",
+ RoleName: "user",
+ Content: "A long time ago in a galaxy far, far away...",
+ FunctionCall: nil,
+ FunctionName: "",
+ LastMessage: false,
+ Function: false,
+ MessageIndex: 0,
+ },
+ },
+ "assistant": {
+ "template": llama3,
+ "expected": "<|start_header_id|>assistant<|end_header_id|>\n\nA long time ago in a galaxy far, far away...<|eot_id|>",
+ "data": model.ChatMessageTemplateData{
+ SystemPrompt: "",
+ Role: "assistant",
+ RoleName: "assistant",
+ Content: "A long time ago in a galaxy far, far away...",
+ FunctionCall: nil,
+ FunctionName: "",
+ LastMessage: false,
+ Function: false,
+ MessageIndex: 0,
+ },
+ },
+ "function_call": {
+ "template": llama3,
+ "expected": "<|start_header_id|>assistant<|end_header_id|>\n\nFunction call:\n{\"function\":\"test\"}<|eot_id|>",
+ "data": model.ChatMessageTemplateData{
+ SystemPrompt: "",
+ Role: "assistant",
+ RoleName: "assistant",
+ Content: "",
+ FunctionCall: map[string]string{"function": "test"},
+ FunctionName: "",
+ LastMessage: false,
+ Function: false,
+ MessageIndex: 0,
+ },
+ },
+ "function_response": {
+ "template": llama3,
+ "expected": "<|start_header_id|>tool<|end_header_id|>\n\nFunction response:\nResponse from tool<|eot_id|>",
+ "data": model.ChatMessageTemplateData{
+ SystemPrompt: "",
+ Role: "tool",
+ RoleName: "tool",
+ Content: "Response from tool",
+ FunctionCall: nil,
+ FunctionName: "",
+ LastMessage: false,
+ Function: false,
+ MessageIndex: 0,
+ },
+ },
+}
+
+var chatMLTestMatch map[string]map[string]interface{} = map[string]map[string]interface{}{
"user": {
"template": chatML,
"expected": "<|im_start|>user\nA long time ago in a galaxy far, far away...\n<|im_end|>",
@@ -91,13 +168,27 @@ var testMatch map[string]map[string]interface{} = map[string]map[string]interfac
}
var _ = Describe("Templates", func() {
- Context("chat message", func() {
+ Context("chat message ChatML", func() {
var modelLoader *ModelLoader
BeforeEach(func() {
modelLoader = NewModelLoader("")
})
- for key := range testMatch {
- foo := testMatch[key]
+ for key := range chatMLTestMatch {
+ foo := chatMLTestMatch[key]
+ It("renders correctly `"+key+"`", func() {
+ templated, err := modelLoader.EvaluateTemplateForChatMessage(foo["template"].(string), foo["data"].(model.ChatMessageTemplateData))
+ Expect(err).ToNot(HaveOccurred())
+ Expect(templated).To(Equal(foo["expected"]), templated)
+ })
+ }
+ })
+ Context("chat message llama3", func() {
+ var modelLoader *ModelLoader
+ BeforeEach(func() {
+ modelLoader = NewModelLoader("")
+ })
+ for key := range llama3TestMatch {
+ foo := llama3TestMatch[key]
It("renders correctly `"+key+"`", func() {
templated, err := modelLoader.EvaluateTemplateForChatMessage(foo["template"].(string), foo["data"].(model.ChatMessageTemplateData))
Expect(err).ToNot(HaveOccurred())