Feature/Add Azure Cognitive speech-to-text functionality (#3718)

* feat: Add Azure Cognitive Services integration for speech-to-text functionality - Introduced a new credential class for Azure Cognitive Services. - Updated speech-to-text processing to support Azure Cognitive Services as a provider. - Enhanced UI components to include Azure Cognitive Services options and inputs for configuration. - Added necessary imports and error handling for Azure API requests. * Update SpeechToText.jsx linting * refactor: Update audio file handling in SpeechToText component - Removed the dependency on 'form-data' and replaced it with a Blob for audio file uploads. - Simplified the audio file appending process to the form data. - Cleaned up the headers in the Axios request by removing unnecessary form data headers. This change enhances the efficiency of audio file processing in the speech-to-text functionality. --------- Co-authored-by: Henry Heng <henryheng@flowiseai.com> Co-authored-by: Henry <hzj94@hotmail.com>
2026-06-28 19:00:59 +03:00 · 2024-12-17 20:35:16 -05:00
parent fff6319f5d
commit 2360f5fdeb
3 changed files with 129 additions and 0 deletions
@@ -17,6 +17,7 @@ import { Dropdown } from '@/ui-component/dropdown/Dropdown'
 import openAISVG from '@/assets/images/openai.svg'
 import assemblyAIPng from '@/assets/images/assemblyai.png'
 import localAiPng from '@/assets/images/localai.png'
+import azureSvg from '@/assets/images/azure_openai.svg'
 import groqPng from '@/assets/images/groq.png'

 // store
@@ -31,6 +32,7 @@ const SpeechToTextType = {
    OPENAI_WHISPER: 'openAIWhisper',
    ASSEMBLYAI_TRANSCRIBE: 'assemblyAiTranscribe',
    LOCALAI_STT: 'localAISTT',
+    AZURE_COGNITIVE: 'azureCognitive',
    GROQ_WHISPER: 'groqWhisper'
 }

@@ -142,6 +144,58 @@ const speechToTextProviders = {
            }
        ]
    },
+    [SpeechToTextType.AZURE_COGNITIVE]: {
+        label: 'Azure Cognitive Services',
+        name: SpeechToTextType.AZURE_COGNITIVE,
+        icon: azureSvg,
+        url: 'https://azure.microsoft.com/en-us/products/cognitive-services/speech-services',
+        inputs: [
+            {
+                label: 'Connect Credential',
+                name: 'credential',
+                type: 'credential',
+                credentialNames: ['azureCognitiveServices']
+            },
+            {
+                label: 'Language',
+                name: 'language',
+                type: 'string',
+                description: 'The recognition language (e.g., "en-US", "es-ES")',
+                placeholder: 'en-US',
+                optional: true
+            },
+            {
+                label: 'Profanity Filter Mode',
+                name: 'profanityFilterMode',
+                type: 'options',
+                description: 'How to handle profanity in the transcription',
+                options: [
+                    {
+                        label: 'None',
+                        name: 'None'
+                    },
+                    {
+                        label: 'Masked',
+                        name: 'Masked'
+                    },
+                    {
+                        label: 'Removed',
+                        name: 'Removed'
+                    }
+                ],
+                default: 'Masked',
+                optional: true
+            },
+            {
+                label: 'Audio Channels',
+                name: 'channels',
+                type: 'string',
+                description: 'Comma-separated list of audio channels to process (e.g., "0,1")',
+                placeholder: '0,1',
+                default: '0,1'
+            }
+        ]
+    },
    [SpeechToTextType.GROQ_WHISPER]: {
        label: 'Groq Whisper',
        name: SpeechToTextType.GROQ_WHISPER,