mirror of
https://github.com/farcasclaudiu/Flowise.git
synced 2026-06-25 03:00:54 +03:00
Feature/Add Azure Cognitive speech-to-text functionality (#3718)
* feat: Add Azure Cognitive Services integration for speech-to-text functionality - Introduced a new credential class for Azure Cognitive Services. - Updated speech-to-text processing to support Azure Cognitive Services as a provider. - Enhanced UI components to include Azure Cognitive Services options and inputs for configuration. - Added necessary imports and error handling for Azure API requests. * Update SpeechToText.jsx linting * refactor: Update audio file handling in SpeechToText component - Removed the dependency on 'form-data' and replaced it with a Blob for audio file uploads. - Simplified the audio file appending process to the form data. - Cleaned up the headers in the Axios request by removing unnecessary form data headers. This change enhances the efficiency of audio file processing in the speech-to-text functionality. --------- Co-authored-by: Henry Heng <henryheng@flowiseai.com> Co-authored-by: Henry <hzj94@hotmail.com>
This commit is contained in:
committed by
GitHub
parent
fff6319f5d
commit
2360f5fdeb
@@ -3,12 +3,14 @@ import { getCredentialData } from './utils'
|
||||
import { type ClientOptions, OpenAIClient, toFile } from '@langchain/openai'
|
||||
import { AssemblyAI } from 'assemblyai'
|
||||
import { getFileFromStorage } from './storageUtils'
|
||||
import axios from 'axios'
|
||||
import Groq from 'groq-sdk'
|
||||
|
||||
const SpeechToTextType = {
|
||||
OPENAI_WHISPER: 'openAIWhisper',
|
||||
ASSEMBLYAI_TRANSCRIBE: 'assemblyAiTranscribe',
|
||||
LOCALAI_STT: 'localAISTT',
|
||||
AZURE_COGNITIVE: 'azureCognitive',
|
||||
GROQ_WHISPER: 'groqWhisper'
|
||||
}
|
||||
|
||||
@@ -72,6 +74,40 @@ export const convertSpeechToText = async (upload: IFileUpload, speechToTextConfi
|
||||
}
|
||||
break
|
||||
}
|
||||
case SpeechToTextType.AZURE_COGNITIVE: {
|
||||
try {
|
||||
const baseUrl = `https://${credentialData.serviceRegion}.cognitiveservices.azure.com/speechtotext/transcriptions:transcribe`
|
||||
const apiVersion = credentialData.apiVersion || '2024-05-15-preview'
|
||||
|
||||
const formData = new FormData()
|
||||
const audioBlob = new Blob([audio_file], { type: upload.type })
|
||||
formData.append('audio', audioBlob, upload.name)
|
||||
|
||||
const channelsStr = speechToTextConfig.channels || '0,1'
|
||||
const channels = channelsStr.split(',').map(Number)
|
||||
|
||||
const definition = {
|
||||
locales: [speechToTextConfig.language || 'en-US'],
|
||||
profanityFilterMode: speechToTextConfig.profanityFilterMode || 'Masked',
|
||||
channels
|
||||
}
|
||||
formData.append('definition', JSON.stringify(definition))
|
||||
|
||||
const response = await axios.post(`${baseUrl}?api-version=${apiVersion}`, formData, {
|
||||
headers: {
|
||||
'Ocp-Apim-Subscription-Key': credentialData.azureSubscriptionKey,
|
||||
Accept: 'application/json'
|
||||
}
|
||||
})
|
||||
|
||||
if (response.data && response.data.combinedPhrases.length > 0) {
|
||||
return response.data.combinedPhrases[0]?.text || ''
|
||||
}
|
||||
return ''
|
||||
} catch (error) {
|
||||
throw error.response?.data || error
|
||||
}
|
||||
}
|
||||
case SpeechToTextType.GROQ_WHISPER: {
|
||||
const groqClient = new Groq({
|
||||
apiKey: credentialData.groqApiKey
|
||||
|
||||
Reference in New Issue
Block a user