feat(whisper-cpp): Convert to Purego and add VAD (#6087)

* fix(ci): Avoid matching wrong backend with the same prefix Signed-off-by: Richard Palethorpe <io@richiejp.com> * chore(whisper): Use Purego and enable VAD This replaces the Whisper CGO bindings with our own Purego based module to make compilation easier. In addition this allows VAD models to be loaded by Whisper. There is not much benefit now except that the same backend can be used for VAD and transcription. Depending on upstream we may also be able to use GPU for VAD in the future, but presently it is disabled. Signed-off-by: Richard Palethorpe <io@richiejp.com> --------- Signed-off-by: Richard Palethorpe <io@richiejp.com> Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2026-01-06 02:29:54 -06:00 · 2025-08-28 16:25:18 +01:00
parent ead00a28b9
commit e6ebfd3ba1
13 changed files with 424 additions and 198 deletions
--- a/core/http/endpoints/openai/realtime.go
+++ b/core/http/endpoints/openai/realtime.go
@@ -31,6 +31,7 @@ import (
 const (
 	localSampleRate  = 16000
 	remoteSampleRate = 24000
+	vadModel         = "silero-vad-ggml"
 )

 // A model can be "emulated" that is: transcribe audio to text -> feed text to the LLM -> generate audio as result
@@ -233,7 +234,7 @@ func registerRealtime(application *application.Application) func(c *websocket.Co
 		// TODO: The API has no way to configure the VAD model or other models that make up a pipeline to fake any-to-any
 		//       So possibly we could have a way to configure a composite model that can be used in situations where any-to-any is expected
 		pipeline := config.Pipeline{
-			VAD:           "silero-vad",
+			VAD:           vadModel,
 			Transcription: session.InputAudioTranscription.Model,
 		}

@@ -568,7 +569,7 @@ func updateTransSession(session *Session, update *types.ClientSession, cl *confi

 	if trUpd != nil && trUpd.Model != "" && trUpd.Model != trCur.Model {
 		pipeline := config.Pipeline{
-			VAD:           "silero-vad",
+			VAD:           vadModel,
 			Transcription: trUpd.Model,
 		}