[FEATURE] Added support for LocalAI Speech To Text configuration (#2376)

* Added support for LocalAI to the Speech To Text configuration. Added a few debug statements around speech to text conversion. Finally, refactored the speechToTextProviders a bit to try and remove some magic strings that have undocumented rules around naming.

* LocalAI STT - PR Feedback - Updated LocalAI Image, changed casing, and updated the default model to whisper-1.
This commit is contained in:
clates
2024-05-13 07:21:27 -04:00
committed by GitHub
parent 823cefb5c5
commit d3f03e380e
4 changed files with 129 additions and 32 deletions
+3 -1
View File
@@ -78,7 +78,8 @@ export const utilBuildChatflow = async (req: Request, socketIO?: Server, isInter
}
// Run Speech to Text conversion
if (upload.mime === 'audio/webm' || upload.mime === 'audio/mp4') {
if (upload.mime === 'audio/webm' || upload.mime === 'audio/mp4' || upload.mime === 'audio/ogg') {
logger.debug(`Attempting a speech to text conversion...`)
let speechToTextConfig: ICommonObject = {}
if (chatflow.speechToText) {
const speechToTextProviders = JSON.parse(chatflow.speechToText)
@@ -99,6 +100,7 @@ export const utilBuildChatflow = async (req: Request, socketIO?: Server, isInter
databaseEntities: databaseEntities
}
const speechToTextResult = await convertSpeechToText(upload, speechToTextConfig, options)
logger.debug(`Speech to text result: ${speechToTextResult}`)
if (speechToTextResult) {
incomingInput.question = speechToTextResult
}