From fa4de05c1435033ade806f2b428dc4559a48ef66 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 28 Jul 2023 19:40:21 +0200 Subject: [PATCH 1/5] fix: symlink libphonemize in the container Signed-off-by: Ettore Di Giacinto --- .github/workflows/test.yml | 2 +- Dockerfile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 5a0f502e2..2b9b4402f 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -43,8 +43,8 @@ jobs: mkdir -p "lib/Linux-$(uname -m)/piper_phonemize" && \ curl -L "https://github.com/rhasspy/piper-phonemize/releases/download/v1.0.0/libpiper_phonemize-amd64.tar.gz" | \ tar -C "lib/Linux-$(uname -m)/piper_phonemize" -xzvf - && ls -liah /build/lib/Linux-$(uname -m)/piper_phonemize/ && \ - sudo cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/lib/. /lib64/ && \ sudo cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/lib/. /usr/lib/ && \ + sudo ln -s /usr/lib/libpiper_phonemize.so /usr/lib/libpiper_phonemize.so.1 && \ sudo cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/include/. /usr/include/ - name: Test run: | diff --git a/Dockerfile b/Dockerfile index 5e39303a7..f67a1f3eb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -63,8 +63,8 @@ RUN curl -L "https://github.com/gabime/spdlog/archive/refs/tags/v${SPDLOG_VERSIO mkdir -p "lib/Linux-$(uname -m)/piper_phonemize" && \ curl -L "https://github.com/rhasspy/piper-phonemize/releases/download/v${PIPER_PHONEMIZE_VERSION}/libpiper_phonemize-${TARGETARCH:-$(go env GOARCH)}${TARGETVARIANT}.tar.gz" | \ tar -C "lib/Linux-$(uname -m)/piper_phonemize" -xzvf - && ls -liah /build/lib/Linux-$(uname -m)/piper_phonemize/ && \ - cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/lib/. /lib64/ && \ cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/lib/. /usr/lib/ && \ + ln -s /usr/lib/libpiper_phonemize.so /usr/lib/libpiper_phonemize.so.1 && \ cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/include/. /usr/include/ # \ # ; fi From f085baa77d7faa79096edbee474c565d42046eae Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 29 Jul 2023 00:04:25 +0200 Subject: [PATCH 2/5] fix: set default rope if not specified Signed-off-by: Ettore Di Giacinto --- api/api_test.go | 19 +++++++++---------- pkg/grpc/llm/llama/llama.go | 13 +++++++++++-- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/api/api_test.go b/api/api_test.go index 147774dfb..2da2a7d77 100644 --- a/api/api_test.go +++ b/api/api_test.go @@ -30,10 +30,10 @@ import ( ) type modelApplyRequest struct { - ID string `json:"id"` - URL string `json:"url"` - Name string `json:"name"` - Overrides map[string]string `json:"overrides"` + ID string `json:"id"` + URL string `json:"url"` + Name string `json:"name"` + Overrides map[string]interface{} `json:"overrides"` } func getModelStatus(url string) (response map[string]interface{}) { @@ -243,7 +243,7 @@ var _ = Describe("API test", func() { response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{ URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml", Name: "bert", - Overrides: map[string]string{ + Overrides: map[string]interface{}{ "backend": "llama", }, }) @@ -269,7 +269,7 @@ var _ = Describe("API test", func() { response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{ URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml", Name: "bert", - Overrides: map[string]string{}, + Overrides: map[string]interface{}{}, }) Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response)) @@ -297,7 +297,7 @@ var _ = Describe("API test", func() { response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{ URL: "github:go-skynet/model-gallery/openllama_3b.yaml", Name: "openllama_3b", - Overrides: map[string]string{"backend": "llama"}, + Overrides: map[string]interface{}{"backend": "llama", "mmap": true, "f16": true}, }) Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response)) @@ -366,9 +366,8 @@ var _ = Describe("API test", func() { } response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{ - URL: "github:go-skynet/model-gallery/gpt4all-j.yaml", - Name: "gpt4all-j", - Overrides: map[string]string{}, + URL: "github:go-skynet/model-gallery/gpt4all-j.yaml", + Name: "gpt4all-j", }) Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response)) diff --git a/pkg/grpc/llm/llama/llama.go b/pkg/grpc/llm/llama/llama.go index 7d867813c..2f85e1751 100644 --- a/pkg/grpc/llm/llama/llama.go +++ b/pkg/grpc/llm/llama/llama.go @@ -58,6 +58,15 @@ func (llm *LLM) Load(opts *pb.ModelOptions) error { } func buildPredictOptions(opts *pb.PredictOptions) []llama.PredictOption { + ropeFreqBase := float32(1000) + ropeFreqScale := float32(1) + + if opts.RopeFreqBase != 0 { + ropeFreqBase = opts.RopeFreqBase + } + if opts.RopeFreqScale != 0 { + ropeFreqScale = opts.RopeFreqScale + } predictOptions := []llama.PredictOption{ llama.SetTemperature(opts.Temperature), llama.SetTopP(opts.TopP), @@ -65,8 +74,8 @@ func buildPredictOptions(opts *pb.PredictOptions) []llama.PredictOption { llama.SetTokens(int(opts.Tokens)), llama.SetThreads(int(opts.Threads)), llama.WithGrammar(opts.Grammar), - llama.SetRopeFreqBase(opts.RopeFreqBase), - llama.SetRopeFreqScale(opts.RopeFreqScale), + llama.SetRopeFreqBase(ropeFreqBase), + llama.SetRopeFreqScale(ropeFreqScale), llama.SetNegativePromptScale(opts.NegativePromptScale), llama.SetNegativePrompt(opts.NegativePrompt), } From 8b90ac2b1ae6f905d1ec0c2f028dca4a15347a88 Mon Sep 17 00:00:00 2001 From: Dave Date: Sat, 29 Jul 2023 02:37:24 -0400 Subject: [PATCH 3/5] 1000 -> 10,000 for ropeFreqBase? the error message talks about a default of 10k, so setting this to 10k instead of 1k experimentally. --- pkg/grpc/llm/llama/llama.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/grpc/llm/llama/llama.go b/pkg/grpc/llm/llama/llama.go index 2f85e1751..1cc2ec7b2 100644 --- a/pkg/grpc/llm/llama/llama.go +++ b/pkg/grpc/llm/llama/llama.go @@ -58,7 +58,7 @@ func (llm *LLM) Load(opts *pb.ModelOptions) error { } func buildPredictOptions(opts *pb.PredictOptions) []llama.PredictOption { - ropeFreqBase := float32(1000) + ropeFreqBase := float32(10000) ropeFreqScale := float32(1) if opts.RopeFreqBase != 0 { From e70b91aaef31ba3d8f4f27994831dc237c27936d Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 29 Jul 2023 10:29:47 +0200 Subject: [PATCH 4/5] tests: set a small context_size Signed-off-by: Ettore Di Giacinto --- api/api_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/api_test.go b/api/api_test.go index 2da2a7d77..2947842e4 100644 --- a/api/api_test.go +++ b/api/api_test.go @@ -297,7 +297,7 @@ var _ = Describe("API test", func() { response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{ URL: "github:go-skynet/model-gallery/openllama_3b.yaml", Name: "openllama_3b", - Overrides: map[string]interface{}{"backend": "llama", "mmap": true, "f16": true}, + Overrides: map[string]interface{}{"backend": "llama", "mmap": true, "f16": true, "context_size": 128}, }) Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response)) From 00ccb8d4f110514da040453852c78dd7ae3e9a76 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 29 Jul 2023 10:40:56 +0200 Subject: [PATCH 5/5] fix: set default rope freq base to 10000 during model load Signed-off-by: Ettore Di Giacinto --- pkg/grpc/llm/llama/llama.go | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/pkg/grpc/llm/llama/llama.go b/pkg/grpc/llm/llama/llama.go index 1cc2ec7b2..18e481efe 100644 --- a/pkg/grpc/llm/llama/llama.go +++ b/pkg/grpc/llm/llama/llama.go @@ -17,9 +17,20 @@ type LLM struct { } func (llm *LLM) Load(opts *pb.ModelOptions) error { + + ropeFreqBase := float32(10000) + ropeFreqScale := float32(1) + + if opts.RopeFreqBase != 0 { + ropeFreqBase = opts.RopeFreqBase + } + if opts.RopeFreqScale != 0 { + ropeFreqScale = opts.RopeFreqScale + } + llamaOpts := []llama.ModelOption{ - llama.WithRopeFreqBase(opts.RopeFreqBase), - llama.WithRopeFreqScale(opts.RopeFreqScale), + llama.WithRopeFreqBase(ropeFreqBase), + llama.WithRopeFreqScale(ropeFreqScale), } if opts.ContextSize != 0 {