mirror of
https://github.com/farcasclaudiu/Flowise.git
synced 2026-06-25 19:01:06 +03:00
[FEATURE] Added support for LocalAI Speech To Text configuration (#2376)
* Added support for LocalAI to the Speech To Text configuration. Added a few debug statements around speech to text conversion. Finally, refactored the speechToTextProviders a bit to try and remove some magic strings that have undocumented rules around naming. * LocalAI STT - PR Feedback - Updated LocalAI Image, changed casing, and updated the default model to whisper-1.
This commit is contained in:
@@ -4,40 +4,69 @@ import { type ClientOptions, OpenAIClient } from '@langchain/openai'
|
||||
import { AssemblyAI } from 'assemblyai'
|
||||
import { getFileFromStorage } from './storageUtils'
|
||||
|
||||
const SpeechToTextType = {
|
||||
OPENAI_WHISPER: 'openAIWhisper',
|
||||
ASSEMBLYAI_TRANSCRIBE: 'assemblyAiTranscribe',
|
||||
LOCALAI_STT: 'localAISTT'
|
||||
}
|
||||
|
||||
export const convertSpeechToText = async (upload: IFileUpload, speechToTextConfig: ICommonObject, options: ICommonObject) => {
|
||||
if (speechToTextConfig) {
|
||||
const credentialId = speechToTextConfig.credentialId as string
|
||||
const credentialData = await getCredentialData(credentialId ?? '', options)
|
||||
const audio_file = await getFileFromStorage(upload.name, options.chatflowid, options.chatId)
|
||||
|
||||
if (speechToTextConfig.name === 'openAIWhisper') {
|
||||
const openAIClientOptions: ClientOptions = {
|
||||
apiKey: credentialData.openAIApiKey
|
||||
switch (speechToTextConfig.name) {
|
||||
case SpeechToTextType.OPENAI_WHISPER: {
|
||||
const openAIClientOptions: ClientOptions = {
|
||||
apiKey: credentialData.openAIApiKey
|
||||
}
|
||||
const openAIClient = new OpenAIClient(openAIClientOptions)
|
||||
const openAITranscription = await openAIClient.audio.transcriptions.create({
|
||||
file: new File([new Blob([audio_file])], upload.name),
|
||||
model: 'whisper-1',
|
||||
language: speechToTextConfig?.language,
|
||||
temperature: speechToTextConfig?.temperature ? parseFloat(speechToTextConfig.temperature) : undefined,
|
||||
prompt: speechToTextConfig?.prompt
|
||||
})
|
||||
if (openAITranscription?.text) {
|
||||
return openAITranscription.text
|
||||
}
|
||||
break
|
||||
}
|
||||
const openAIClient = new OpenAIClient(openAIClientOptions)
|
||||
const transcription = await openAIClient.audio.transcriptions.create({
|
||||
file: new File([new Blob([audio_file])], upload.name),
|
||||
model: 'whisper-1',
|
||||
language: speechToTextConfig?.language,
|
||||
temperature: speechToTextConfig?.temperature ? parseFloat(speechToTextConfig.temperature) : undefined,
|
||||
prompt: speechToTextConfig?.prompt
|
||||
})
|
||||
if (transcription?.text) {
|
||||
return transcription.text
|
||||
}
|
||||
} else if (speechToTextConfig.name === 'assemblyAiTranscribe') {
|
||||
const client = new AssemblyAI({
|
||||
apiKey: credentialData.assemblyAIApiKey
|
||||
})
|
||||
case SpeechToTextType.ASSEMBLYAI_TRANSCRIBE: {
|
||||
const assemblyAIClient = new AssemblyAI({
|
||||
apiKey: credentialData.assemblyAIApiKey
|
||||
})
|
||||
|
||||
const params = {
|
||||
audio: audio_file,
|
||||
speaker_labels: false
|
||||
}
|
||||
const params = {
|
||||
audio: audio_file,
|
||||
speaker_labels: false
|
||||
}
|
||||
|
||||
const transcription = await client.transcripts.transcribe(params)
|
||||
if (transcription?.text) {
|
||||
return transcription.text
|
||||
const assemblyAITranscription = await assemblyAIClient.transcripts.transcribe(params)
|
||||
if (assemblyAITranscription?.text) {
|
||||
return assemblyAITranscription.text
|
||||
}
|
||||
break
|
||||
}
|
||||
case SpeechToTextType.LOCALAI_STT: {
|
||||
const LocalAIClientOptions: ClientOptions = {
|
||||
apiKey: credentialData.localAIApiKey,
|
||||
baseURL: speechToTextConfig?.baseUrl
|
||||
}
|
||||
const localAIClient = new OpenAIClient(LocalAIClientOptions)
|
||||
const localAITranscription = await localAIClient.audio.transcriptions.create({
|
||||
file: new File([new Blob([audio_file])], upload.name),
|
||||
model: speechToTextConfig?.model || 'whisper-1',
|
||||
language: speechToTextConfig?.language,
|
||||
temperature: speechToTextConfig?.temperature ? parseFloat(speechToTextConfig.temperature) : undefined,
|
||||
prompt: speechToTextConfig?.prompt
|
||||
})
|
||||
if (localAITranscription?.text) {
|
||||
return localAITranscription.text
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
||||
Reference in New Issue
Block a user