feat: Realtime API support reboot (#5392)

* feat(realtime): Initial Realtime API implementation

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore: go mod tidy

Signed-off-by: Richard Palethorpe <io@richiejp.com>

* feat: Implement transcription only mode for realtime API

Reduce the scope of the real time API for the initial realease and make
transcription only mode functional.

Signed-off-by: Richard Palethorpe <io@richiejp.com>

* chore(build): Build backends on a separate layer to speed up core only changes

Signed-off-by: Richard Palethorpe <io@richiejp.com>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Signed-off-by: Richard Palethorpe <io@richiejp.com>
Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Richard Palethorpe
2025-05-25 21:25:05 +01:00
committed by GitHub
parent 4a91950848
commit bf6426aef2
18 changed files with 2953 additions and 70 deletions

View File

@@ -21,8 +21,8 @@ func (vad *VAD) Load(opts *pb.ModelOptions) error {
SampleRate: 16000,
//WindowSize: 1024,
Threshold: 0.5,
MinSilenceDurationMs: 0,
SpeechPadMs: 0,
MinSilenceDurationMs: 100,
SpeechPadMs: 30,
})
if err != nil {
return fmt.Errorf("create silero detector: %w", err)
@@ -35,6 +35,10 @@ func (vad *VAD) Load(opts *pb.ModelOptions) error {
func (vad *VAD) VAD(req *pb.VADRequest) (pb.VADResponse, error) {
audio := req.Audio
if err := vad.detector.Reset(); err != nil {
return pb.VADResponse{}, fmt.Errorf("reset: %w", err)
}
segments, err := vad.detector.Detect(audio)
if err != nil {
return pb.VADResponse{}, fmt.Errorf("detect: %w", err)