Text to speech (#5062)

* Add tts UI * Add tts backend * Add description to eleven labs credentials * Fix issue with fetching eleven labs voices * Fix issue with text to speech tab not showing correct saved voice * Add option to autoplay tts audio after prediction completes * Fix crash issue when first changing tts provider * Set up streaming response for text to speech audio * Update controllers - fix issue with sse client getting removed before tts events are sent * Use existing sse streamer to stream tts audio before sse client is removed * Add tts sse to redis publisher * Fix issues with TTS - openai voices, streaming audio, rate limiting, speed of speech * Refactor * Refactor TTS - fix issues with tts loading and stop audio buttons * Abort TTS SSE when clicking the stop button * Update SSE handling for TTS * Fix issue with test voice feature * Fix issue with tts voices not loading * Update generate tts endpoint and its usage in internal chat * Whitelist tts generate endpoint * Refactor Text-to-Speech Provider Selection and Enhance UI Components - Updated the text-to-speech controller to select the active provider based on status instead of the first available provider - Added audio waveform controls and test audio functionality in the TextToSpeech component, allowing users to play and pause test audio - Integrated Autocomplete for voice selection in the TextToSpeech component - Implemented TTS action management in ChatMessage to prevent auto-scrolling during TTS actions * - Implemented stopAllTTS function calls to halt existing TTS audio before playing new audio or starting a new TTS stream * Updated the condition for enabling TTS providers to exclude the 'none' provider, ensuring only valid providers are considered for text-to-speech functionality. * Remove unnecessary code * Add ability to abort audio streaming in TTS and release lock on chat input * Remove logger * Fix tts audio not playing when clicking speaker button * update * TTS abort controller * Fix abort not working for TTS autoplay * Send metadata event when aborting autoplay TTS * Fix UI issue * Remove elevenlabs sdk from root package.json * Remove redundant condition for tts autoplay in chatflow --------- Co-authored-by: Henry <hzj94@hotmail.com>
2026-06-28 13:00:56 +03:00 · 2025-10-02 16:49:06 +05:30
parent 8d0a198e2f
commit 9b8fee3d8f
34 changed files with 41358 additions and 39056 deletions
@@ -64,6 +64,7 @@ export interface IChatFlow {
    apikeyid?: string
    analytic?: string
    speechToText?: string
+    textToSpeech?: string
    chatbotConfig?: string
    followUpPrompts?: string
    apiConfig?: string
@@ -0,0 +1,226 @@
+import { Request, Response, NextFunction } from 'express'
+import chatflowsService from '../../services/chatflows'
+import textToSpeechService from '../../services/text-to-speech'
+import { InternalFlowiseError } from '../../errors/internalFlowiseError'
+import { StatusCodes } from 'http-status-codes'
+import { getRunningExpressApp } from '../../utils/getRunningExpressApp'
+import { convertTextToSpeechStream } from 'flowise-components'
+import { databaseEntities } from '../../utils'
+
+const generateTextToSpeech = async (req: Request, res: Response) => {
+    try {
+        const {
+            chatId,
+            chatflowId,
+            chatMessageId,
+            text,
+            provider: bodyProvider,
+            credentialId: bodyCredentialId,
+            voice: bodyVoice,
+            model: bodyModel
+        } = req.body
+
+        if (!text) {
+            throw new InternalFlowiseError(
+                StatusCodes.BAD_REQUEST,
+                `Error: textToSpeechController.generateTextToSpeech - text not provided!`
+            )
+        }
+
+        let provider: string, credentialId: string, voice: string, model: string
+
+        if (chatflowId) {
+            // Get TTS config from chatflow
+            const chatflow = await chatflowsService.getChatflowById(chatflowId)
+            const ttsConfig = JSON.parse(chatflow.textToSpeech)
+
+            // Find the provider with status: true
+            const activeProviderKey = Object.keys(ttsConfig).find((key) => ttsConfig[key].status === true)
+            if (!activeProviderKey) {
+                throw new InternalFlowiseError(
+                    StatusCodes.BAD_REQUEST,
+                    `Error: textToSpeechController.generateTextToSpeech - no active TTS provider configured in chatflow!`
+                )
+            }
+
+            const providerConfig = ttsConfig[activeProviderKey]
+            provider = activeProviderKey
+            credentialId = providerConfig.credentialId
+            voice = providerConfig.voice
+            model = providerConfig.model
+        } else {
+            // Use TTS config from request body
+            provider = bodyProvider
+            credentialId = bodyCredentialId
+            voice = bodyVoice
+            model = bodyModel
+        }
+
+        if (!provider) {
+            throw new InternalFlowiseError(
+                StatusCodes.BAD_REQUEST,
+                `Error: textToSpeechController.generateTextToSpeech - provider not provided!`
+            )
+        }
+
+        if (!credentialId) {
+            throw new InternalFlowiseError(
+                StatusCodes.BAD_REQUEST,
+                `Error: textToSpeechController.generateTextToSpeech - credentialId not provided!`
+            )
+        }
+
+        res.setHeader('Content-Type', 'text/event-stream')
+        res.setHeader('Cache-Control', 'no-cache')
+        res.setHeader('Connection', 'keep-alive')
+        res.setHeader('Access-Control-Allow-Origin', '*')
+        res.setHeader('Access-Control-Allow-Headers', 'Cache-Control')
+
+        const appServer = getRunningExpressApp()
+        const options = {
+            orgId: '',
+            chatflowid: chatflowId || '',
+            chatId: chatId || '',
+            appDataSource: appServer.AppDataSource,
+            databaseEntities: databaseEntities
+        }
+
+        const textToSpeechConfig = {
+            name: provider,
+            credentialId: credentialId,
+            voice: voice,
+            model: model
+        }
+
+        // Create and store AbortController
+        const abortController = new AbortController()
+        const ttsAbortId = `tts_${chatId}_${chatMessageId}`
+        appServer.abortControllerPool.add(ttsAbortId, abortController)
+
+        try {
+            await convertTextToSpeechStream(
+                text,
+                textToSpeechConfig,
+                options,
+                abortController,
+                (format: string) => {
+                    const startResponse = {
+                        event: 'tts_start',
+                        data: { chatMessageId, format }
+                    }
+                    res.write('event: tts_start\n')
+                    res.write(`data: ${JSON.stringify(startResponse)}\n\n`)
+                },
+                (chunk: Buffer) => {
+                    const audioBase64 = chunk.toString('base64')
+                    const clientResponse = {
+                        event: 'tts_data',
+                        data: { chatMessageId, audioChunk: audioBase64 }
+                    }
+                    res.write('event: tts_data\n')
+                    res.write(`data: ${JSON.stringify(clientResponse)}\n\n`)
+                },
+                async () => {
+                    const endResponse = {
+                        event: 'tts_end',
+                        data: { chatMessageId }
+                    }
+                    res.write('event: tts_end\n')
+                    res.write(`data: ${JSON.stringify(endResponse)}\n\n`)
+                    res.end()
+                    // Clean up from pool on successful completion
+                    appServer.abortControllerPool.remove(ttsAbortId)
+                }
+            )
+        } catch (error) {
+            // Clean up from pool on error
+            appServer.abortControllerPool.remove(ttsAbortId)
+            throw error
+        }
+    } catch (error) {
+        if (!res.headersSent) {
+            res.setHeader('Content-Type', 'text/event-stream')
+            res.setHeader('Cache-Control', 'no-cache')
+            res.setHeader('Connection', 'keep-alive')
+        }
+
+        const errorResponse = {
+            event: 'tts_error',
+            data: { error: error instanceof Error ? error.message : 'TTS generation failed' }
+        }
+        res.write('event: tts_error\n')
+        res.write(`data: ${JSON.stringify(errorResponse)}\n\n`)
+        res.end()
+    }
+}
+
+const abortTextToSpeech = async (req: Request, res: Response) => {
+    try {
+        const { chatId, chatMessageId, chatflowId } = req.body
+
+        if (!chatId) {
+            throw new InternalFlowiseError(
+                StatusCodes.BAD_REQUEST,
+                `Error: textToSpeechController.abortTextToSpeech - chatId not provided!`
+            )
+        }
+
+        if (!chatMessageId) {
+            throw new InternalFlowiseError(
+                StatusCodes.BAD_REQUEST,
+                `Error: textToSpeechController.abortTextToSpeech - chatMessageId not provided!`
+            )
+        }
+
+        if (!chatflowId) {
+            throw new InternalFlowiseError(
+                StatusCodes.BAD_REQUEST,
+                `Error: textToSpeechController.abortTextToSpeech - chatflowId not provided!`
+            )
+        }
+
+        const appServer = getRunningExpressApp()
+
+        // Abort the TTS generation using existing pool
+        const ttsAbortId = `tts_${chatId}_${chatMessageId}`
+        appServer.abortControllerPool.abort(ttsAbortId)
+
+        // Also abort the main chat flow AbortController for auto-TTS
+        const chatFlowAbortId = `${chatflowId}_${chatId}`
+        if (appServer.abortControllerPool.get(chatFlowAbortId)) {
+            appServer.abortControllerPool.abort(chatFlowAbortId)
+            appServer.sseStreamer.streamMetadataEvent(chatId, { chatId, chatMessageId })
+        }
+
+        // Send abort event to client
+        appServer.sseStreamer.streamTTSAbortEvent(chatId, chatMessageId)
+
+        res.json({ message: 'TTS stream aborted successfully', chatId, chatMessageId })
+    } catch (error) {
+        res.status(500).json({
+            error: error instanceof Error ? error.message : 'Failed to abort TTS stream'
+        })
+    }
+}
+
+const getVoices = async (req: Request, res: Response, next: NextFunction) => {
+    try {
+        const { provider, credentialId } = req.query
+
+        if (!provider) {
+            throw new InternalFlowiseError(StatusCodes.BAD_REQUEST, `Error: textToSpeechController.getVoices - provider not provided!`)
+        }
+
+        const voices = await textToSpeechService.getVoices(provider as any, credentialId as string)
+
+        return res.json(voices)
+    } catch (error) {
+        next(error)
+    }
+}
+
+export default {
+    generateTextToSpeech,
+    abortTextToSpeech,
+    getVoices
+}
@@ -41,6 +41,9 @@ export class ChatFlow implements IChatFlow {
    @Column({ nullable: true, type: 'text' })
    speechToText?: string

+    @Column({ nullable: true, type: 'text' })
+    textToSpeech?: string
+
    @Column({ nullable: true, type: 'text' })
    followUpPrompts?: string

@@ -0,0 +1,12 @@
+import { MigrationInterface, QueryRunner } from 'typeorm'
+
+export class AddTextToSpeechToChatFlow1754986457485 implements MigrationInterface {
+    public async up(queryRunner: QueryRunner): Promise<void> {
+        const columnExists = await queryRunner.hasColumn('chat_flow', 'textToSpeech')
+        if (!columnExists) queryRunner.query(`ALTER TABLE \`chat_flow\` ADD COLUMN \`textToSpeech\` TEXT;`)
+    }
+
+    public async down(queryRunner: QueryRunner): Promise<void> {
+        await queryRunner.query(`ALTER TABLE \`chat_flow\` DROP COLUMN \`textToSpeech\`;`)
+    }
+}
@@ -36,6 +36,7 @@ import { AddExecutionEntity1738090872625 } from './1738090872625-AddExecutionEnt
 import { FixOpenSourceAssistantTable1743758056188 } from './1743758056188-FixOpenSourceAssistantTable'
 import { AddErrorToEvaluationRun1744964560174 } from './1744964560174-AddErrorToEvaluationRun'
 import { ModifyExecutionDataColumnType1747902489801 } from './1747902489801-ModifyExecutionDataColumnType'
+import { AddTextToSpeechToChatFlow1754986457485 } from './1754986457485-AddTextToSpeechToChatFlow'
 import { ModifyChatflowType1755066758601 } from './1755066758601-ModifyChatflowType'
 import { AddChatFlowNameIndex1755748356008 } from './1755748356008-AddChatFlowNameIndex'

@@ -101,6 +102,7 @@ export const mariadbMigrations = [
    AddErrorToEvaluationRun1744964560174,
    ExecutionLinkWorkspaceId1746862866554,
    ModifyExecutionDataColumnType1747902489801,
+    AddTextToSpeechToChatFlow1754986457485,
    ModifyChatflowType1755066758601,
    AddChatFlowNameIndex1755748356008
 ]
@@ -0,0 +1,12 @@
+import { MigrationInterface, QueryRunner } from 'typeorm'
+
+export class AddTextToSpeechToChatFlow1754986468397 implements MigrationInterface {
+    public async up(queryRunner: QueryRunner): Promise<void> {
+        const columnExists = await queryRunner.hasColumn('chat_flow', 'textToSpeech')
+        if (!columnExists) queryRunner.query(`ALTER TABLE \`chat_flow\` ADD COLUMN \`textToSpeech\` TEXT;`)
+    }
+
+    public async down(queryRunner: QueryRunner): Promise<void> {
+        await queryRunner.query(`ALTER TABLE \`chat_flow\` DROP COLUMN \`textToSpeech\`;`)
+    }
+}
@@ -37,6 +37,7 @@ import { FixOpenSourceAssistantTable1743758056188 } from './1743758056188-FixOpe
 import { AddErrorToEvaluationRun1744964560174 } from './1744964560174-AddErrorToEvaluationRun'
 import { FixErrorsColumnInEvaluationRun1746437114935 } from './1746437114935-FixErrorsColumnInEvaluationRun'
 import { ModifyExecutionDataColumnType1747902489801 } from './1747902489801-ModifyExecutionDataColumnType'
+import { AddTextToSpeechToChatFlow1754986468397 } from './1754986468397-AddTextToSpeechToChatFlow'
 import { ModifyChatflowType1755066758601 } from './1755066758601-ModifyChatflowType'
 import { AddChatFlowNameIndex1755748356008 } from './1755748356008-AddChatFlowNameIndex'

@@ -103,6 +104,7 @@ export const mysqlMigrations = [
    FixErrorsColumnInEvaluationRun1746437114935,
    ExecutionLinkWorkspaceId1746862866554,
    ModifyExecutionDataColumnType1747902489801,
+    AddTextToSpeechToChatFlow1754986468397,
    ModifyChatflowType1755066758601,
    AddChatFlowNameIndex1755748356008
 ]
@@ -0,0 +1,11 @@
+import { MigrationInterface, QueryRunner } from 'typeorm'
+
+export class AddTextToSpeechToChatFlow1754986480347 implements MigrationInterface {
+    public async up(queryRunner: QueryRunner): Promise<void> {
+        await queryRunner.query(`ALTER TABLE "chat_flow" ADD COLUMN IF NOT EXISTS "textToSpeech" TEXT;`)
+    }
+
+    public async down(queryRunner: QueryRunner): Promise<void> {
+        await queryRunner.query(`ALTER TABLE "chat_flow" DROP COLUMN "textToSpeech";`)
+    }
+}
@@ -36,6 +36,7 @@ import { AddExecutionEntity1738090872625 } from './1738090872625-AddExecutionEnt
 import { FixOpenSourceAssistantTable1743758056188 } from './1743758056188-FixOpenSourceAssistantTable'
 import { AddErrorToEvaluationRun1744964560174 } from './1744964560174-AddErrorToEvaluationRun'
 import { ModifyExecutionSessionIdFieldType1748450230238 } from './1748450230238-ModifyExecutionSessionIdFieldType'
+import { AddTextToSpeechToChatFlow1754986480347 } from './1754986480347-AddTextToSpeechToChatFlow'
 import { ModifyChatflowType1755066758601 } from './1755066758601-ModifyChatflowType'
 import { AddChatFlowNameIndex1755748356008 } from './1755748356008-AddChatFlowNameIndex'

@@ -101,6 +102,7 @@ export const postgresMigrations = [
    AddErrorToEvaluationRun1744964560174,
    ExecutionLinkWorkspaceId1746862866554,
    ModifyExecutionSessionIdFieldType1748450230238,
+    AddTextToSpeechToChatFlow1754986480347,
    ModifyChatflowType1755066758601,
    AddChatFlowNameIndex1755748356008
 ]
@@ -0,0 +1,11 @@
+import { MigrationInterface, QueryRunner } from 'typeorm'
+
+export class AddTextToSpeechToChatFlow1754986486669 implements MigrationInterface {
+    public async up(queryRunner: QueryRunner): Promise<void> {
+        await queryRunner.query(`ALTER TABLE "chat_flow" ADD COLUMN "textToSpeech" TEXT;`)
+    }
+
+    public async down(queryRunner: QueryRunner): Promise<void> {
+        await queryRunner.query(`ALTER TABLE "chat_flow" DROP COLUMN "textToSpeech";`)
+    }
+}
@@ -34,6 +34,7 @@ import { AddSeqNoToDatasetRow1733752119696 } from './1733752119696-AddSeqNoToDat
 import { AddExecutionEntity1738090872625 } from './1738090872625-AddExecutionEntity'
 import { FixOpenSourceAssistantTable1743758056188 } from './1743758056188-FixOpenSourceAssistantTable'
 import { AddErrorToEvaluationRun1744964560174 } from './1744964560174-AddErrorToEvaluationRun'
+import { AddTextToSpeechToChatFlow1754986486669 } from './1754986486669-AddTextToSpeechToChatFlow'
 import { ModifyChatflowType1755066758601 } from './1755066758601-ModifyChatflowType'
 import { AddChatFlowNameIndex1755748356008 } from './1755748356008-AddChatFlowNameIndex'

@@ -97,6 +98,7 @@ export const sqliteMigrations = [
    FixOpenSourceAssistantTable1743758056188,
    AddErrorToEvaluationRun1744964560174,
    ExecutionLinkWorkspaceId1746862866554,
+    AddTextToSpeechToChatFlow1754986486669,
    ModifyChatflowType1755066758601,
    AddChatFlowNameIndex1755748356008
 ]
@@ -380,6 +380,70 @@ export class RedisEventPublisher implements IServerSideEventStreamer {
        }
    }

+    streamTTSStartEvent(chatId: string, chatMessageId: string, format: string): void {
+        try {
+            this.redisPublisher.publish(
+                chatId,
+                JSON.stringify({
+                    chatId,
+                    chatMessageId,
+                    eventType: 'tts_start',
+                    data: { format }
+                })
+            )
+        } catch (error) {
+            console.error('Error streaming TTS start event:', error)
+        }
+    }
+
+    streamTTSDataEvent(chatId: string, chatMessageId: string, audioChunk: string): void {
+        try {
+            this.redisPublisher.publish(
+                chatId,
+                JSON.stringify({
+                    chatId,
+                    chatMessageId,
+                    eventType: 'tts_data',
+                    data: audioChunk
+                })
+            )
+        } catch (error) {
+            console.error('Error streaming TTS data event:', error)
+        }
+    }
+
+    streamTTSEndEvent(chatId: string, chatMessageId: string): void {
+        try {
+            this.redisPublisher.publish(
+                chatId,
+                JSON.stringify({
+                    chatId,
+                    chatMessageId,
+                    eventType: 'tts_end',
+                    data: {}
+                })
+            )
+        } catch (error) {
+            console.error('Error streaming TTS end event:', error)
+        }
+    }
+
+    streamTTSAbortEvent(chatId: string, chatMessageId: string): void {
+        try {
+            this.redisPublisher.publish(
+                chatId,
+                JSON.stringify({
+                    chatId,
+                    chatMessageId,
+                    eventType: 'tts_abort',
+                    data: {}
+                })
+            )
+        } catch (error) {
+            console.error('Error streaming TTS abort event:', error)
+        }
+    }
+
    async disconnect() {
        if (this.redisPublisher) {
            await this.redisPublisher.quit()
@@ -102,7 +102,7 @@ export class RedisEventSubscriber {
    private handleEvent(message: string) {
        // Parse the message from Redis
        const event = JSON.parse(message)
-        const { eventType, chatId, data } = event
+        const { eventType, chatId, chatMessageId, data } = event

        // Stream the event to the client
        switch (eventType) {
@@ -121,6 +121,9 @@ export class RedisEventSubscriber {
            case 'usedTools':
                this.sseStreamer.streamUsedToolsEvent(chatId, data)
                break
+            case 'calledTools':
+                this.sseStreamer.streamCalledToolsEvent(chatId, data)
+                break
            case 'fileAnnotations':
                this.sseStreamer.streamFileAnnotationsEvent(chatId, data)
                break
@@ -154,6 +157,21 @@ export class RedisEventSubscriber {
            case 'metadata':
                this.sseStreamer.streamMetadataEvent(chatId, data)
                break
+            case 'usageMetadata':
+                this.sseStreamer.streamUsageMetadataEvent(chatId, data)
+                break
+            case 'tts_start':
+                this.sseStreamer.streamTTSStartEvent(chatId, chatMessageId, data.format)
+                break
+            case 'tts_data':
+                this.sseStreamer.streamTTSDataEvent(chatId, chatMessageId, data)
+                break
+            case 'tts_end':
+                this.sseStreamer.streamTTSEndEvent(chatId, chatMessageId)
+                break
+            case 'tts_abort':
+                this.sseStreamer.streamTTSAbortEvent(chatId, chatMessageId)
+                break
        }
    }

@@ -55,6 +55,7 @@ import nvidiaNimRouter from './nvidia-nim'
 import executionsRouter from './executions'
 import validationRouter from './validation'
 import agentflowv2GeneratorRouter from './agentflowv2-generator'
+import textToSpeechRouter from './text-to-speech'

 import authRouter from '../enterprise/routes/auth'
 import auditRouter from '../enterprise/routes/audit'
@@ -124,6 +125,7 @@ router.use('/nvidia-nim', nvidiaNimRouter)
 router.use('/executions', executionsRouter)
 router.use('/validation', validationRouter)
 router.use('/agentflowv2-generator', agentflowv2GeneratorRouter)
+router.use('/text-to-speech', textToSpeechRouter)

 router.use('/auth', authRouter)
 router.use('/audit', IdentityManager.checkFeatureByPlan('feat:login-activity'), auditRouter)
@@ -0,0 +1,12 @@
+import express from 'express'
+import textToSpeechController from '../../controllers/text-to-speech'
+
+const router = express.Router()
+
+router.post('/generate', textToSpeechController.generateTextToSpeech)
+
+router.post('/abort', textToSpeechController.abortTextToSpeech)
+
+router.get('/voices', textToSpeechController.getVoices)
+
+export default router
@@ -363,7 +363,18 @@ const getSinglePublicChatbotConfig = async (chatflowId: string): Promise<any> =>
        if (dbResponse.chatbotConfig || uploadsConfig) {
            try {
                const parsedConfig = dbResponse.chatbotConfig ? JSON.parse(dbResponse.chatbotConfig) : {}
-                return { ...parsedConfig, uploads: uploadsConfig, flowData: dbResponse.flowData }
+                const ttsConfig =
+                    typeof dbResponse.textToSpeech === 'string' ? JSON.parse(dbResponse.textToSpeech) : dbResponse.textToSpeech
+
+                let isTTSEnabled = false
+                if (ttsConfig) {
+                    Object.keys(ttsConfig).forEach((provider) => {
+                        if (provider !== 'none' && ttsConfig?.[provider]?.status) {
+                            isTTSEnabled = true
+                        }
+                    })
+                }
+                return { ...parsedConfig, uploads: uploadsConfig, flowData: dbResponse.flowData, isTTSEnabled }
            } catch (e) {
                throw new InternalFlowiseError(StatusCodes.INTERNAL_SERVER_ERROR, `Error parsing Chatbot Config for Chatflow ${chatflowId}`)
            }
@@ -0,0 +1,52 @@
+import { StatusCodes } from 'http-status-codes'
+import { getRunningExpressApp } from '../../utils/getRunningExpressApp'
+import { InternalFlowiseError } from '../../errors/internalFlowiseError'
+import { getErrorMessage } from '../../errors/utils'
+import { getVoices } from 'flowise-components'
+import { databaseEntities } from '../../utils'
+
+export enum TextToSpeechProvider {
+    OPENAI = 'openai',
+    ELEVEN_LABS = 'elevenlabs'
+}
+
+export interface TTSRequest {
+    text: string
+    provider: TextToSpeechProvider
+    credentialId: string
+    voice?: string
+    model?: string
+}
+
+export interface TTSResponse {
+    audioBuffer: Buffer
+    contentType: string
+}
+
+const getVoicesForProvider = async (provider: string, credentialId?: string): Promise<any[]> => {
+    try {
+        if (!credentialId) {
+            throw new InternalFlowiseError(StatusCodes.BAD_REQUEST, 'Credential ID required for this provider')
+        }
+
+        const appServer = getRunningExpressApp()
+        const options = {
+            orgId: '',
+            chatflowid: '',
+            chatId: '',
+            appDataSource: appServer.AppDataSource,
+            databaseEntities: databaseEntities
+        }
+
+        return await getVoices(provider, credentialId, options)
+    } catch (error) {
+        throw new InternalFlowiseError(
+            StatusCodes.INTERNAL_SERVER_ERROR,
+            `Error: textToSpeechService.getVoices - ${getErrorMessage(error)}`
+        )
+    }
+}
+
+export default {
+    getVoices: getVoicesForProvider
+}
@@ -257,4 +257,50 @@ export class SSEStreamer implements IServerSideEventStreamer {
            client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n')
        }
    }
+
+    streamTTSStartEvent(chatId: string, chatMessageId: string, format: string): void {
+        const client = this.clients[chatId]
+        if (client) {
+            const clientResponse = {
+                event: 'tts_start',
+                data: { chatMessageId, format }
+            }
+            client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n')
+        }
+    }
+
+    streamTTSDataEvent(chatId: string, chatMessageId: string, audioChunk: string): void {
+        const client = this.clients[chatId]
+        if (client) {
+            const clientResponse = {
+                event: 'tts_data',
+                data: { chatMessageId, audioChunk }
+            }
+            client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n')
+        }
+    }
+
+    streamTTSEndEvent(chatId: string, chatMessageId: string): void {
+        const client = this.clients[chatId]
+        if (client) {
+            const clientResponse = {
+                event: 'tts_end',
+                data: { chatMessageId }
+            }
+            client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n')
+        }
+    }
+
+    streamTTSAbortEvent(chatId: string, chatMessageId: string): void {
+        const client = this.clients[chatId]
+        if (client) {
+            const clientResponse = {
+                event: 'tts_abort',
+                data: { chatMessageId }
+            }
+            client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n')
+            client.response.end()
+            delete this.clients[chatId]
+        }
+    }
 }
@@ -58,6 +58,7 @@ import { ChatMessage } from '../database/entities/ChatMessage'
 import { Telemetry } from './telemetry'
 import { getWorkspaceSearchOptions } from '../enterprise/utils/ControllerServiceUtils'
 import { UsageCacheManager } from '../UsageCacheManager'
+import { generateTTSForResponseStream, shouldAutoPlayTTS } from './buildChatflow'

 interface IWaitingNode {
    nodeId: string
@@ -2208,5 +2209,27 @@ export const executeAgentFlow = async ({

    if (sessionId) result.sessionId = sessionId

+    if (shouldAutoPlayTTS(chatflow.textToSpeech) && result.text) {
+        const options = {
+            orgId,
+            chatflowid,
+            chatId,
+            appDataSource,
+            databaseEntities
+        }
+
+        if (sseStreamer) {
+            await generateTTSForResponseStream(
+                result.text,
+                chatflow.textToSpeech,
+                options,
+                chatId,
+                chatMessage?.id,
+                sseStreamer,
+                abortController
+            )
+        }
+    }
+
    return result
 }
@@ -6,6 +6,7 @@ import { omit } from 'lodash'
 import {
    IFileUpload,
    convertSpeechToText,
+    convertTextToSpeechStream,
    ICommonObject,
    addSingleFileToStorage,
    generateFollowUpPrompts,
@@ -16,7 +17,8 @@ import {
    getFileFromUpload,
    removeSpecificFileFromUpload,
    EvaluationRunner,
-    handleEscapeCharacters
+    handleEscapeCharacters,
+    IServerSideEventStreamer
 } from 'flowise-components'
 import { StatusCodes } from 'http-status-codes'
 import {
@@ -70,9 +72,74 @@ import { executeAgentFlow } from './buildAgentflow'
 import { Workspace } from '../enterprise/database/entities/workspace.entity'
 import { Organization } from '../enterprise/database/entities/organization.entity'

-/*
- * Initialize the ending node to be executed
- */
+const shouldAutoPlayTTS = (textToSpeechConfig: string | undefined | null): boolean => {
+    if (!textToSpeechConfig) return false
+    try {
+        const config = typeof textToSpeechConfig === 'string' ? JSON.parse(textToSpeechConfig) : textToSpeechConfig
+        for (const providerKey in config) {
+            const provider = config[providerKey]
+            if (provider && provider.status === true && provider.autoPlay === true) {
+                return true
+            }
+        }
+        return false
+    } catch (error) {
+        logger.error(`Error parsing textToSpeechConfig: ${getErrorMessage(error)}`)
+        return false
+    }
+}
+
+const generateTTSForResponseStream = async (
+    responseText: string,
+    textToSpeechConfig: string | undefined,
+    options: ICommonObject,
+    chatId: string,
+    chatMessageId: string,
+    sseStreamer: IServerSideEventStreamer,
+    abortController?: AbortController
+): Promise<void> => {
+    try {
+        if (!textToSpeechConfig) return
+        const config = typeof textToSpeechConfig === 'string' ? JSON.parse(textToSpeechConfig) : textToSpeechConfig
+
+        let activeProviderConfig = null
+        for (const providerKey in config) {
+            const provider = config[providerKey]
+            if (provider && provider.status === true) {
+                activeProviderConfig = {
+                    name: providerKey,
+                    credentialId: provider.credentialId,
+                    voice: provider.voice,
+                    model: provider.model
+                }
+                break
+            }
+        }
+
+        if (!activeProviderConfig) return
+
+        await convertTextToSpeechStream(
+            responseText,
+            activeProviderConfig,
+            options,
+            abortController || new AbortController(),
+            (format: string) => {
+                sseStreamer.streamTTSStartEvent(chatId, chatMessageId, format)
+            },
+            (chunk: Buffer) => {
+                const audioBase64 = chunk.toString('base64')
+                sseStreamer.streamTTSDataEvent(chatId, chatMessageId, audioBase64)
+            },
+            () => {
+                sseStreamer.streamTTSEndEvent(chatId, chatMessageId)
+            }
+        )
+    } catch (error) {
+        logger.error(`[server]: TTS streaming failed: ${getErrorMessage(error)}`)
+        sseStreamer.streamTTSEndEvent(chatId, chatMessageId)
+    }
+}
+
 const initEndingNode = async ({
    endingNodeIds,
    componentNodes,
@@ -833,6 +900,17 @@ export const executeFlow = async ({
        if (memoryType) result.memoryType = memoryType
        if (Object.keys(setVariableNodesOutput).length) result.flowVariables = setVariableNodesOutput

+        if (shouldAutoPlayTTS(chatflow.textToSpeech) && result.text) {
+            const options = {
+                orgId,
+                chatflowid,
+                chatId,
+                appDataSource,
+                databaseEntities
+            }
+            await generateTTSForResponseStream(result.text, chatflow.textToSpeech, options, chatId, chatMessage?.id, sseStreamer, signal)
+        }
+
        return result
    }
 }
@@ -1064,3 +1142,5 @@ const incrementFailedMetricCounter = (metricsProvider: IMetricsProvider, isInter
        )
    }
 }
+
+export { shouldAutoPlayTTS, generateTTSForResponseStream }
@@ -41,6 +41,8 @@ export const WHITELIST_URLS = [
    '/api/v1/user/test',
    '/api/v1/oauth2-credential/callback',
    '/api/v1/oauth2-credential/refresh',
+    '/api/v1/text-to-speech/generate',
+    '/api/v1/text-to-speech/abort',
    AzureSSO.LOGIN_URI,
    AzureSSO.LOGOUT_URI,
    AzureSSO.CALLBACK_URI,