Text to speech (#5062)

* Add tts UI

* Add tts backend

* Add description to eleven labs credentials

* Fix issue with fetching eleven labs voices

* Fix issue with text to speech tab not showing correct saved voice

* Add option to autoplay tts audio after prediction completes

* Fix crash issue when first changing tts provider

* Set up streaming response for text to speech audio

* Update controllers - fix issue with sse client getting removed before tts events are sent

* Use existing sse streamer to stream tts audio before sse client is removed

* Add tts sse to redis publisher

* Fix issues with TTS - openai voices, streaming audio, rate limiting, speed of speech

* Refactor

* Refactor TTS - fix issues with tts loading and stop audio buttons

* Abort TTS SSE when clicking the stop button

* Update SSE handling for TTS

* Fix issue with test voice feature

* Fix issue with tts voices not loading

* Update generate tts endpoint and its usage in internal chat

* Whitelist tts generate endpoint

* Refactor Text-to-Speech Provider Selection and Enhance UI Components

- Updated the text-to-speech controller to select the active provider based on status instead of the first available provider
- Added audio waveform controls and test audio functionality in the TextToSpeech component, allowing users to play and pause test audio
- Integrated Autocomplete for voice selection in the TextToSpeech component
- Implemented TTS action management in ChatMessage to prevent auto-scrolling during TTS actions

* - Implemented stopAllTTS function calls to halt existing TTS audio before playing new audio or starting a new TTS stream

* Updated the condition for enabling TTS providers to exclude the 'none' provider, ensuring only valid providers are considered for text-to-speech functionality.

* Remove unnecessary code

* Add ability to abort audio streaming in TTS and release lock on chat input

* Remove logger

* Fix tts audio not playing when clicking speaker button

* update

* TTS abort controller

* Fix abort not working for TTS autoplay

* Send metadata event when aborting autoplay TTS

* Fix UI issue

* Remove elevenlabs sdk from root package.json

* Remove redundant condition for tts autoplay in chatflow

---------

Co-authored-by: Henry <hzj94@hotmail.com>
This commit is contained in:
Ilango
2025-10-02 16:49:06 +05:30
committed by GitHub
parent 8d0a198e2f
commit 9b8fee3d8f
34 changed files with 41358 additions and 39056 deletions
+46
View File
@@ -257,4 +257,50 @@ export class SSEStreamer implements IServerSideEventStreamer {
client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n')
}
}
streamTTSStartEvent(chatId: string, chatMessageId: string, format: string): void {
const client = this.clients[chatId]
if (client) {
const clientResponse = {
event: 'tts_start',
data: { chatMessageId, format }
}
client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n')
}
}
streamTTSDataEvent(chatId: string, chatMessageId: string, audioChunk: string): void {
const client = this.clients[chatId]
if (client) {
const clientResponse = {
event: 'tts_data',
data: { chatMessageId, audioChunk }
}
client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n')
}
}
streamTTSEndEvent(chatId: string, chatMessageId: string): void {
const client = this.clients[chatId]
if (client) {
const clientResponse = {
event: 'tts_end',
data: { chatMessageId }
}
client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n')
}
}
streamTTSAbortEvent(chatId: string, chatMessageId: string): void {
const client = this.clients[chatId]
if (client) {
const clientResponse = {
event: 'tts_abort',
data: { chatMessageId }
}
client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n')
client.response.end()
delete this.clients[chatId]
}
}
}
@@ -58,6 +58,7 @@ import { ChatMessage } from '../database/entities/ChatMessage'
import { Telemetry } from './telemetry'
import { getWorkspaceSearchOptions } from '../enterprise/utils/ControllerServiceUtils'
import { UsageCacheManager } from '../UsageCacheManager'
import { generateTTSForResponseStream, shouldAutoPlayTTS } from './buildChatflow'
interface IWaitingNode {
nodeId: string
@@ -2208,5 +2209,27 @@ export const executeAgentFlow = async ({
if (sessionId) result.sessionId = sessionId
if (shouldAutoPlayTTS(chatflow.textToSpeech) && result.text) {
const options = {
orgId,
chatflowid,
chatId,
appDataSource,
databaseEntities
}
if (sseStreamer) {
await generateTTSForResponseStream(
result.text,
chatflow.textToSpeech,
options,
chatId,
chatMessage?.id,
sseStreamer,
abortController
)
}
}
return result
}
+84 -4
View File
@@ -6,6 +6,7 @@ import { omit } from 'lodash'
import {
IFileUpload,
convertSpeechToText,
convertTextToSpeechStream,
ICommonObject,
addSingleFileToStorage,
generateFollowUpPrompts,
@@ -16,7 +17,8 @@ import {
getFileFromUpload,
removeSpecificFileFromUpload,
EvaluationRunner,
handleEscapeCharacters
handleEscapeCharacters,
IServerSideEventStreamer
} from 'flowise-components'
import { StatusCodes } from 'http-status-codes'
import {
@@ -70,9 +72,74 @@ import { executeAgentFlow } from './buildAgentflow'
import { Workspace } from '../enterprise/database/entities/workspace.entity'
import { Organization } from '../enterprise/database/entities/organization.entity'
/*
* Initialize the ending node to be executed
*/
const shouldAutoPlayTTS = (textToSpeechConfig: string | undefined | null): boolean => {
if (!textToSpeechConfig) return false
try {
const config = typeof textToSpeechConfig === 'string' ? JSON.parse(textToSpeechConfig) : textToSpeechConfig
for (const providerKey in config) {
const provider = config[providerKey]
if (provider && provider.status === true && provider.autoPlay === true) {
return true
}
}
return false
} catch (error) {
logger.error(`Error parsing textToSpeechConfig: ${getErrorMessage(error)}`)
return false
}
}
const generateTTSForResponseStream = async (
responseText: string,
textToSpeechConfig: string | undefined,
options: ICommonObject,
chatId: string,
chatMessageId: string,
sseStreamer: IServerSideEventStreamer,
abortController?: AbortController
): Promise<void> => {
try {
if (!textToSpeechConfig) return
const config = typeof textToSpeechConfig === 'string' ? JSON.parse(textToSpeechConfig) : textToSpeechConfig
let activeProviderConfig = null
for (const providerKey in config) {
const provider = config[providerKey]
if (provider && provider.status === true) {
activeProviderConfig = {
name: providerKey,
credentialId: provider.credentialId,
voice: provider.voice,
model: provider.model
}
break
}
}
if (!activeProviderConfig) return
await convertTextToSpeechStream(
responseText,
activeProviderConfig,
options,
abortController || new AbortController(),
(format: string) => {
sseStreamer.streamTTSStartEvent(chatId, chatMessageId, format)
},
(chunk: Buffer) => {
const audioBase64 = chunk.toString('base64')
sseStreamer.streamTTSDataEvent(chatId, chatMessageId, audioBase64)
},
() => {
sseStreamer.streamTTSEndEvent(chatId, chatMessageId)
}
)
} catch (error) {
logger.error(`[server]: TTS streaming failed: ${getErrorMessage(error)}`)
sseStreamer.streamTTSEndEvent(chatId, chatMessageId)
}
}
const initEndingNode = async ({
endingNodeIds,
componentNodes,
@@ -833,6 +900,17 @@ export const executeFlow = async ({
if (memoryType) result.memoryType = memoryType
if (Object.keys(setVariableNodesOutput).length) result.flowVariables = setVariableNodesOutput
if (shouldAutoPlayTTS(chatflow.textToSpeech) && result.text) {
const options = {
orgId,
chatflowid,
chatId,
appDataSource,
databaseEntities
}
await generateTTSForResponseStream(result.text, chatflow.textToSpeech, options, chatId, chatMessage?.id, sseStreamer, signal)
}
return result
}
}
@@ -1064,3 +1142,5 @@ const incrementFailedMetricCounter = (metricsProvider: IMetricsProvider, isInter
)
}
}
export { shouldAutoPlayTTS, generateTTSForResponseStream }
+2
View File
@@ -41,6 +41,8 @@ export const WHITELIST_URLS = [
'/api/v1/user/test',
'/api/v1/oauth2-credential/callback',
'/api/v1/oauth2-credential/refresh',
'/api/v1/text-to-speech/generate',
'/api/v1/text-to-speech/abort',
AzureSSO.LOGIN_URI,
AzureSSO.LOGOUT_URI,
AzureSSO.CALLBACK_URI,