mirror of
https://github.com/mudler/LocalAI.git
synced 2025-12-30 22:20:20 -06:00
feat(openai): support input_audio chat api field (#5870)
Improving the chat completion endpoint OpenAI API compatibility by supporting messages of type `input_audio`, e.g.:
```
{
...
"messages": [
{
"role": "user",
"content": [{
"type": "input_audio",
"input_audio": {
"data": "<base64-encoded audio data>",
"format": "wav"
}
}]
}
]
}
```
Closes #5869
Signed-off-by: Max Goltzsche <max.goltzsche@gmail.com>
This commit is contained in:
@@ -308,7 +308,7 @@ func mergeOpenAIRequestAndBackendConfig(config *config.BackendConfig, input *sch
|
||||
input.Messages[i].StringVideos = append(input.Messages[i].StringVideos, base64) // TODO: make sure that we only return base64 stuff
|
||||
vidIndex++
|
||||
nrOfVideosInMessage++
|
||||
case "audio_url", "audio", "input_audio":
|
||||
case "audio_url", "audio":
|
||||
// Decode content as base64 either if it's an URL or base64 text
|
||||
base64, err := utils.GetContentURIAsBase64(pp.AudioURL.URL)
|
||||
if err != nil {
|
||||
@@ -318,6 +318,11 @@ func mergeOpenAIRequestAndBackendConfig(config *config.BackendConfig, input *sch
|
||||
input.Messages[i].StringAudios = append(input.Messages[i].StringAudios, base64) // TODO: make sure that we only return base64 stuff
|
||||
audioIndex++
|
||||
nrOfAudiosInMessage++
|
||||
case "input_audio":
|
||||
// TODO: make sure that we only return base64 stuff
|
||||
input.Messages[i].StringAudios = append(input.Messages[i].StringAudios, pp.InputAudio.Data)
|
||||
audioIndex++
|
||||
nrOfAudiosInMessage++
|
||||
case "image_url", "image":
|
||||
// Decode content as base64 either if it's an URL or base64 text
|
||||
base64, err := utils.GetContentURIAsBase64(pp.ImageURL.URL)
|
||||
|
||||
@@ -58,17 +58,25 @@ type Choice struct {
|
||||
}
|
||||
|
||||
type Content struct {
|
||||
Type string `json:"type" yaml:"type"`
|
||||
Text string `json:"text" yaml:"text"`
|
||||
ImageURL ContentURL `json:"image_url" yaml:"image_url"`
|
||||
AudioURL ContentURL `json:"audio_url" yaml:"audio_url"`
|
||||
VideoURL ContentURL `json:"video_url" yaml:"video_url"`
|
||||
Type string `json:"type" yaml:"type"`
|
||||
Text string `json:"text" yaml:"text"`
|
||||
ImageURL ContentURL `json:"image_url" yaml:"image_url"`
|
||||
AudioURL ContentURL `json:"audio_url" yaml:"audio_url"`
|
||||
VideoURL ContentURL `json:"video_url" yaml:"video_url"`
|
||||
InputAudio InputAudio `json:"input_audio" yaml:"input_audio"`
|
||||
}
|
||||
|
||||
type ContentURL struct {
|
||||
URL string `json:"url" yaml:"url"`
|
||||
}
|
||||
|
||||
type InputAudio struct {
|
||||
// Format identifies the audio format, e.g. 'wav'.
|
||||
Format string `json:"format" yaml:"format"`
|
||||
// Data holds the base64-encoded audio data.
|
||||
Data string `json:"data" yaml:"data"`
|
||||
}
|
||||
|
||||
type Message struct {
|
||||
// The message role
|
||||
Role string `json:"role,omitempty" yaml:"role"`
|
||||
|
||||
Reference in New Issue
Block a user