mirror of
https://github.com/mudler/LocalAI.git
synced 2026-01-06 10:39:55 -06:00
feat(grpc): return consumed token count and update response accordingly (#2035)
Fixes: #1920
This commit is contained in:
committed by
GitHub
parent
de3a1a0a8e
commit
e843d7df0e
@@ -114,6 +114,8 @@ message PredictOptions {
|
||||
// The response message containing the result
|
||||
message Reply {
|
||||
bytes message = 1;
|
||||
int32 tokens = 2;
|
||||
int32 prompt_tokens = 3;
|
||||
}
|
||||
|
||||
message ModelOptions {
|
||||
|
||||
@@ -2332,6 +2332,10 @@ public:
|
||||
std::string completion_text = result.result_json.value("content", "");
|
||||
|
||||
reply.set_message(completion_text);
|
||||
int32_t tokens_predicted = result.result_json.value("tokens_predicted", 0);
|
||||
reply.set_tokens(tokens_predicted);
|
||||
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
|
||||
reply.set_prompt_tokens(tokens_evaluated);
|
||||
|
||||
// Send the reply
|
||||
writer->Write(reply);
|
||||
@@ -2357,6 +2361,10 @@ public:
|
||||
task_result result = llama.queue_results.recv(task_id);
|
||||
if (!result.error && result.stop) {
|
||||
completion_text = result.result_json.value("content", "");
|
||||
int32_t tokens_predicted = result.result_json.value("tokens_predicted", 0);
|
||||
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
|
||||
reply->set_prompt_tokens(tokens_evaluated);
|
||||
reply->set_tokens(tokens_predicted);
|
||||
reply->set_message(completion_text);
|
||||
}
|
||||
else
|
||||
|
||||
Reference in New Issue
Block a user