Feature/Add Groq Whisper support (#3706)

* feat: Add Groq Whisper support to SpeechToText component - Introduced a new speech-to-text provider, Groq Whisper, in both the backend and UI components. - Updated SpeechToTextType to include GROQ_WHISPER. - Implemented Groq client integration for audio transcription with customizable model, language, and temperature options. - Added UI elements for Groq Whisper configuration, including input fields for model, language, and temperature settings. * turn speech to text none status to false when other was selected --------- Co-authored-by: Henry <hzj94@hotmail.com>
2026-06-28 07:00:49 +03:00 · 2024-12-17 18:11:07 -05:00
parent d5498858ec
commit 4c29b2390c
3 changed files with 66 additions and 2 deletions
@@ -3,11 +3,13 @@ import { getCredentialData } from './utils'
 import { type ClientOptions, OpenAIClient, toFile } from '@langchain/openai'
 import { AssemblyAI } from 'assemblyai'
 import { getFileFromStorage } from './storageUtils'
+import Groq from 'groq-sdk'

 const SpeechToTextType = {
    OPENAI_WHISPER: 'openAIWhisper',
    ASSEMBLYAI_TRANSCRIBE: 'assemblyAiTranscribe',
-    LOCALAI_STT: 'localAISTT'
+    LOCALAI_STT: 'localAISTT',
+    GROQ_WHISPER: 'groqWhisper'
 }

 export const convertSpeechToText = async (upload: IFileUpload, speechToTextConfig: ICommonObject, options: ICommonObject) => {
@@ -70,6 +72,23 @@ export const convertSpeechToText = async (upload: IFileUpload, speechToTextConfi
                }
                break
            }
+            case SpeechToTextType.GROQ_WHISPER: {
+                const groqClient = new Groq({
+                    apiKey: credentialData.groqApiKey
+                })
+                const file = await toFile(audio_file, upload.name)
+                const groqTranscription = await groqClient.audio.transcriptions.create({
+                    file,
+                    model: speechToTextConfig?.model || 'whisper-large-v3',
+                    language: speechToTextConfig?.language,
+                    temperature: speechToTextConfig?.temperature ? parseFloat(speechToTextConfig.temperature) : undefined,
+                    response_format: 'verbose_json'
+                })
+                if (groqTranscription?.text) {
+                    return groqTranscription.text
+                }
+                break
+            }
        }
    } else {
        throw new Error('Speech to text is not selected, but found a recorded audio file. Please fix the chain.')