[FEATURE] Added support for LocalAI Speech To Text configuration (#2376)

* Added support for LocalAI to the Speech To Text configuration. Added a few debug statements around speech to text conversion. Finally, refactored the speechToTextProviders a bit to try and remove some magic strings that have undocumented rules around naming. * LocalAI STT - PR Feedback - Updated LocalAI Image, changed casing, and updated the default model to whisper-1.
2026-06-28 23:01:09 +03:00 · 2024-05-13 07:21:27 -04:00
parent 823cefb5c5
commit d3f03e380e
4 changed files with 129 additions and 32 deletions
@@ -78,7 +78,8 @@ export const utilBuildChatflow = async (req: Request, socketIO?: Server, isInter
                }

                // Run Speech to Text conversion
-                if (upload.mime === 'audio/webm' || upload.mime === 'audio/mp4') {
+                if (upload.mime === 'audio/webm' || upload.mime === 'audio/mp4' || upload.mime === 'audio/ogg') {
+                    logger.debug(`Attempting a speech to text conversion...`)
                    let speechToTextConfig: ICommonObject = {}
                    if (chatflow.speechToText) {
                        const speechToTextProviders = JSON.parse(chatflow.speechToText)
@@ -99,6 +100,7 @@ export const utilBuildChatflow = async (req: Request, socketIO?: Server, isInter
                            databaseEntities: databaseEntities
                        }
                        const speechToTextResult = await convertSpeechToText(upload, speechToTextConfig, options)
+                        logger.debug(`Speech to text result: ${speechToTextResult}`)
                        if (speechToTextResult) {
                            incomingInput.question = speechToTextResult
                        }