Feature/Add Groq Whisper support (#3706)

* feat: Add Groq Whisper support to SpeechToText component

- Introduced a new speech-to-text provider, Groq Whisper, in both the backend and UI components.
- Updated SpeechToTextType to include GROQ_WHISPER.
- Implemented Groq client integration for audio transcription with customizable model, language, and temperature options.
- Added UI elements for Groq Whisper configuration, including input fields for model, language, and temperature settings.

* turn speech to text none status to false when other was selected

---------

Co-authored-by: Henry <hzj94@hotmail.com>
This commit is contained in:
Anthony Bryan Gavilan Vinces
2024-12-17 18:11:07 -05:00
committed by GitHub
parent d5498858ec
commit 4c29b2390c
3 changed files with 66 additions and 2 deletions
+20 -1
View File
@@ -3,11 +3,13 @@ import { getCredentialData } from './utils'
import { type ClientOptions, OpenAIClient, toFile } from '@langchain/openai'
import { AssemblyAI } from 'assemblyai'
import { getFileFromStorage } from './storageUtils'
import Groq from 'groq-sdk'
const SpeechToTextType = {
OPENAI_WHISPER: 'openAIWhisper',
ASSEMBLYAI_TRANSCRIBE: 'assemblyAiTranscribe',
LOCALAI_STT: 'localAISTT'
LOCALAI_STT: 'localAISTT',
GROQ_WHISPER: 'groqWhisper'
}
export const convertSpeechToText = async (upload: IFileUpload, speechToTextConfig: ICommonObject, options: ICommonObject) => {
@@ -70,6 +72,23 @@ export const convertSpeechToText = async (upload: IFileUpload, speechToTextConfi
}
break
}
case SpeechToTextType.GROQ_WHISPER: {
const groqClient = new Groq({
apiKey: credentialData.groqApiKey
})
const file = await toFile(audio_file, upload.name)
const groqTranscription = await groqClient.audio.transcriptions.create({
file,
model: speechToTextConfig?.model || 'whisper-large-v3',
language: speechToTextConfig?.language,
temperature: speechToTextConfig?.temperature ? parseFloat(speechToTextConfig.temperature) : undefined,
response_format: 'verbose_json'
})
if (groqTranscription?.text) {
return groqTranscription.text
}
break
}
}
} else {
throw new Error('Speech to text is not selected, but found a recorded audio file. Please fix the chain.')