diff --git a/backend/cpp/llama-cpp/Makefile b/backend/cpp/llama-cpp/Makefile index 337268c1d..0f589313a 100644 --- a/backend/cpp/llama-cpp/Makefile +++ b/backend/cpp/llama-cpp/Makefile @@ -1,5 +1,5 @@ -LLAMA_VERSION?=e9f9483464e6f01d843d7f0293bd9c7bc6b2221c +LLAMA_VERSION?=bde188d60f58012ada0725c6dd5ba7c69fe4dd87 LLAMA_REPO?=https://github.com/ggerganov/llama.cpp CMAKE_ARGS?= diff --git a/backend/cpp/llama-cpp/grpc-server.cpp b/backend/cpp/llama-cpp/grpc-server.cpp index edfb14862..cb48e172b 100644 --- a/backend/cpp/llama-cpp/grpc-server.cpp +++ b/backend/cpp/llama-cpp/grpc-server.cpp @@ -1211,6 +1211,8 @@ public: } tasks.reserve(inputs.size()); + std::vector states; + states.reserve(inputs.size()); for (size_t i = 0; i < inputs.size(); i++) { server_task task = server_task(type); @@ -1229,9 +1231,13 @@ public: task.params.oaicompat_cmpl_id = completion_id; // oaicompat_model is already populated by params_from_json_cmpl + // Extract oaicompat_chat_syntax for state tracking before moving task + states.push_back(task.params.oaicompat_chat_syntax); + tasks.push_back(std::move(task)); } + rd->set_states(std::move(states)); rd->post_tasks(std::move(tasks)); } catch (const std::exception & e) { return grpc::Status(grpc::StatusCode::INVALID_ARGUMENT, e.what()); @@ -1946,6 +1952,8 @@ public: } tasks.reserve(inputs.size()); + std::vector states; + states.reserve(inputs.size()); for (size_t i = 0; i < inputs.size(); i++) { server_task task = server_task(type); @@ -1964,9 +1972,13 @@ public: task.params.oaicompat_cmpl_id = completion_id; // oaicompat_model is already populated by params_from_json_cmpl + // Extract oaicompat_chat_syntax for state tracking before moving task + states.push_back(task.params.oaicompat_chat_syntax); + tasks.push_back(std::move(task)); } + rd->set_states(std::move(states)); rd->post_tasks(std::move(tasks)); } catch (const std::exception & e) { return grpc::Status(grpc::StatusCode::INVALID_ARGUMENT, e.what());