mirror of
https://github.com/mudler/LocalAI.git
synced 2026-04-25 20:49:42 -05:00
6d5bde860b
* WIP * wip * wip * Make it compile * Update json.hpp * this shouldn't be private for now * Add logs * Reset auto detected template Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Re-enable grammars * This seems to be broken - https://github.com/ggml-org/llama.cpp/commit/360a9c98e13d35f322b4c5b1309aab0cc90ed82b#diff-a18a8e64e12a01167d8e98fc[…]cccf0d4eed09d76d879L2998-L3207 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Placeholder * Simplify image loading * use completion type * disable streaming Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * correctly return timings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Remove some debug logging * Adapt tests Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Keep header * embedding: do not use oai type Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Sync from server.cpp * Use utils and json directly from llama.cpp Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Sync with upstream Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix: copy json.hpp from the correct location Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix: add httplib * sync llama.cpp Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Embeddiongs: set OAICOMPAT_TYPE_EMBEDDING Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat: sync with server.cpp by including it Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * make it darwin-compatible Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
52 lines
2.0 KiB
Bash
52 lines
2.0 KiB
Bash
#!/bin/bash
|
|
|
|
## Patches
|
|
## Apply patches from the `patches` directory
|
|
for patch in $(ls patches); do
|
|
echo "Applying patch $patch"
|
|
patch -d llama.cpp/ -p1 < patches/$patch
|
|
done
|
|
|
|
set -e
|
|
|
|
cp -r CMakeLists.txt llama.cpp/tools/grpc-server/
|
|
cp -r grpc-server.cpp llama.cpp/tools/grpc-server/
|
|
cp -rfv llama.cpp/common/json.hpp llama.cpp/tools/grpc-server/
|
|
cp -rfv llama.cpp/tools/server/utils.hpp llama.cpp/tools/grpc-server/
|
|
cp -rfv llama.cpp/tools/server/httplib.h llama.cpp/tools/grpc-server/
|
|
|
|
set +e
|
|
if grep -q "grpc-server" llama.cpp/tools/CMakeLists.txt; then
|
|
echo "grpc-server already added"
|
|
else
|
|
echo "add_subdirectory(grpc-server)" >> llama.cpp/tools/CMakeLists.txt
|
|
fi
|
|
set -e
|
|
|
|
# Now to keep maximum compatibility with the original server.cpp, we need to remove the index.html.gz.hpp and loading.html.hpp includes
|
|
# and remove the main function
|
|
# TODO: upstream this to the original server.cpp by extracting the upstream main function to a separate file
|
|
awk '
|
|
/int[ \t]+main[ \t]*\(/ { # If the line starts the main function
|
|
in_main=1; # Set a flag
|
|
open_braces=0; # Track number of open braces
|
|
}
|
|
in_main {
|
|
open_braces += gsub(/\{/, "{"); # Count opening braces
|
|
open_braces -= gsub(/\}/, "}"); # Count closing braces
|
|
if (open_braces == 0) { # If all braces are closed
|
|
in_main=0; # End skipping
|
|
}
|
|
next; # Skip lines inside main
|
|
}
|
|
!in_main # Print lines not inside main
|
|
' "llama.cpp/tools/server/server.cpp" > llama.cpp/tools/grpc-server/server.cpp
|
|
|
|
# remove index.html.gz.hpp and loading.html.hpp includes
|
|
if [[ "$OSTYPE" == "darwin"* ]]; then
|
|
# macOS
|
|
sed -i '' '/#include "index\.html\.gz\.hpp"/d; /#include "loading\.html\.hpp"/d' llama.cpp/tools/grpc-server/server.cpp
|
|
else
|
|
# Linux and others
|
|
sed -i '/#include "index\.html\.gz\.hpp"/d; /#include "loading\.html\.hpp"/d' llama.cpp/tools/grpc-server/server.cpp
|
|
fi |