mirror of
https://github.com/farcasclaudiu/Flowise.git
synced 2026-06-28 13:00:56 +03:00
Text to speech (#5062)
* Add tts UI * Add tts backend * Add description to eleven labs credentials * Fix issue with fetching eleven labs voices * Fix issue with text to speech tab not showing correct saved voice * Add option to autoplay tts audio after prediction completes * Fix crash issue when first changing tts provider * Set up streaming response for text to speech audio * Update controllers - fix issue with sse client getting removed before tts events are sent * Use existing sse streamer to stream tts audio before sse client is removed * Add tts sse to redis publisher * Fix issues with TTS - openai voices, streaming audio, rate limiting, speed of speech * Refactor * Refactor TTS - fix issues with tts loading and stop audio buttons * Abort TTS SSE when clicking the stop button * Update SSE handling for TTS * Fix issue with test voice feature * Fix issue with tts voices not loading * Update generate tts endpoint and its usage in internal chat * Whitelist tts generate endpoint * Refactor Text-to-Speech Provider Selection and Enhance UI Components - Updated the text-to-speech controller to select the active provider based on status instead of the first available provider - Added audio waveform controls and test audio functionality in the TextToSpeech component, allowing users to play and pause test audio - Integrated Autocomplete for voice selection in the TextToSpeech component - Implemented TTS action management in ChatMessage to prevent auto-scrolling during TTS actions * - Implemented stopAllTTS function calls to halt existing TTS audio before playing new audio or starting a new TTS stream * Updated the condition for enabling TTS providers to exclude the 'none' provider, ensuring only valid providers are considered for text-to-speech functionality. * Remove unnecessary code * Add ability to abort audio streaming in TTS and release lock on chat input * Remove logger * Fix tts audio not playing when clicking speaker button * update * TTS abort controller * Fix abort not working for TTS autoplay * Send metadata event when aborting autoplay TTS * Fix UI issue * Remove elevenlabs sdk from root package.json * Remove redundant condition for tts autoplay in chatflow --------- Co-authored-by: Henry <hzj94@hotmail.com>
This commit is contained in:
@@ -257,4 +257,50 @@ export class SSEStreamer implements IServerSideEventStreamer {
|
||||
client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n')
|
||||
}
|
||||
}
|
||||
|
||||
streamTTSStartEvent(chatId: string, chatMessageId: string, format: string): void {
|
||||
const client = this.clients[chatId]
|
||||
if (client) {
|
||||
const clientResponse = {
|
||||
event: 'tts_start',
|
||||
data: { chatMessageId, format }
|
||||
}
|
||||
client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n')
|
||||
}
|
||||
}
|
||||
|
||||
streamTTSDataEvent(chatId: string, chatMessageId: string, audioChunk: string): void {
|
||||
const client = this.clients[chatId]
|
||||
if (client) {
|
||||
const clientResponse = {
|
||||
event: 'tts_data',
|
||||
data: { chatMessageId, audioChunk }
|
||||
}
|
||||
client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n')
|
||||
}
|
||||
}
|
||||
|
||||
streamTTSEndEvent(chatId: string, chatMessageId: string): void {
|
||||
const client = this.clients[chatId]
|
||||
if (client) {
|
||||
const clientResponse = {
|
||||
event: 'tts_end',
|
||||
data: { chatMessageId }
|
||||
}
|
||||
client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n')
|
||||
}
|
||||
}
|
||||
|
||||
streamTTSAbortEvent(chatId: string, chatMessageId: string): void {
|
||||
const client = this.clients[chatId]
|
||||
if (client) {
|
||||
const clientResponse = {
|
||||
event: 'tts_abort',
|
||||
data: { chatMessageId }
|
||||
}
|
||||
client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n')
|
||||
client.response.end()
|
||||
delete this.clients[chatId]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -58,6 +58,7 @@ import { ChatMessage } from '../database/entities/ChatMessage'
|
||||
import { Telemetry } from './telemetry'
|
||||
import { getWorkspaceSearchOptions } from '../enterprise/utils/ControllerServiceUtils'
|
||||
import { UsageCacheManager } from '../UsageCacheManager'
|
||||
import { generateTTSForResponseStream, shouldAutoPlayTTS } from './buildChatflow'
|
||||
|
||||
interface IWaitingNode {
|
||||
nodeId: string
|
||||
@@ -2208,5 +2209,27 @@ export const executeAgentFlow = async ({
|
||||
|
||||
if (sessionId) result.sessionId = sessionId
|
||||
|
||||
if (shouldAutoPlayTTS(chatflow.textToSpeech) && result.text) {
|
||||
const options = {
|
||||
orgId,
|
||||
chatflowid,
|
||||
chatId,
|
||||
appDataSource,
|
||||
databaseEntities
|
||||
}
|
||||
|
||||
if (sseStreamer) {
|
||||
await generateTTSForResponseStream(
|
||||
result.text,
|
||||
chatflow.textToSpeech,
|
||||
options,
|
||||
chatId,
|
||||
chatMessage?.id,
|
||||
sseStreamer,
|
||||
abortController
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ import { omit } from 'lodash'
|
||||
import {
|
||||
IFileUpload,
|
||||
convertSpeechToText,
|
||||
convertTextToSpeechStream,
|
||||
ICommonObject,
|
||||
addSingleFileToStorage,
|
||||
generateFollowUpPrompts,
|
||||
@@ -16,7 +17,8 @@ import {
|
||||
getFileFromUpload,
|
||||
removeSpecificFileFromUpload,
|
||||
EvaluationRunner,
|
||||
handleEscapeCharacters
|
||||
handleEscapeCharacters,
|
||||
IServerSideEventStreamer
|
||||
} from 'flowise-components'
|
||||
import { StatusCodes } from 'http-status-codes'
|
||||
import {
|
||||
@@ -70,9 +72,74 @@ import { executeAgentFlow } from './buildAgentflow'
|
||||
import { Workspace } from '../enterprise/database/entities/workspace.entity'
|
||||
import { Organization } from '../enterprise/database/entities/organization.entity'
|
||||
|
||||
/*
|
||||
* Initialize the ending node to be executed
|
||||
*/
|
||||
const shouldAutoPlayTTS = (textToSpeechConfig: string | undefined | null): boolean => {
|
||||
if (!textToSpeechConfig) return false
|
||||
try {
|
||||
const config = typeof textToSpeechConfig === 'string' ? JSON.parse(textToSpeechConfig) : textToSpeechConfig
|
||||
for (const providerKey in config) {
|
||||
const provider = config[providerKey]
|
||||
if (provider && provider.status === true && provider.autoPlay === true) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
} catch (error) {
|
||||
logger.error(`Error parsing textToSpeechConfig: ${getErrorMessage(error)}`)
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
const generateTTSForResponseStream = async (
|
||||
responseText: string,
|
||||
textToSpeechConfig: string | undefined,
|
||||
options: ICommonObject,
|
||||
chatId: string,
|
||||
chatMessageId: string,
|
||||
sseStreamer: IServerSideEventStreamer,
|
||||
abortController?: AbortController
|
||||
): Promise<void> => {
|
||||
try {
|
||||
if (!textToSpeechConfig) return
|
||||
const config = typeof textToSpeechConfig === 'string' ? JSON.parse(textToSpeechConfig) : textToSpeechConfig
|
||||
|
||||
let activeProviderConfig = null
|
||||
for (const providerKey in config) {
|
||||
const provider = config[providerKey]
|
||||
if (provider && provider.status === true) {
|
||||
activeProviderConfig = {
|
||||
name: providerKey,
|
||||
credentialId: provider.credentialId,
|
||||
voice: provider.voice,
|
||||
model: provider.model
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if (!activeProviderConfig) return
|
||||
|
||||
await convertTextToSpeechStream(
|
||||
responseText,
|
||||
activeProviderConfig,
|
||||
options,
|
||||
abortController || new AbortController(),
|
||||
(format: string) => {
|
||||
sseStreamer.streamTTSStartEvent(chatId, chatMessageId, format)
|
||||
},
|
||||
(chunk: Buffer) => {
|
||||
const audioBase64 = chunk.toString('base64')
|
||||
sseStreamer.streamTTSDataEvent(chatId, chatMessageId, audioBase64)
|
||||
},
|
||||
() => {
|
||||
sseStreamer.streamTTSEndEvent(chatId, chatMessageId)
|
||||
}
|
||||
)
|
||||
} catch (error) {
|
||||
logger.error(`[server]: TTS streaming failed: ${getErrorMessage(error)}`)
|
||||
sseStreamer.streamTTSEndEvent(chatId, chatMessageId)
|
||||
}
|
||||
}
|
||||
|
||||
const initEndingNode = async ({
|
||||
endingNodeIds,
|
||||
componentNodes,
|
||||
@@ -833,6 +900,17 @@ export const executeFlow = async ({
|
||||
if (memoryType) result.memoryType = memoryType
|
||||
if (Object.keys(setVariableNodesOutput).length) result.flowVariables = setVariableNodesOutput
|
||||
|
||||
if (shouldAutoPlayTTS(chatflow.textToSpeech) && result.text) {
|
||||
const options = {
|
||||
orgId,
|
||||
chatflowid,
|
||||
chatId,
|
||||
appDataSource,
|
||||
databaseEntities
|
||||
}
|
||||
await generateTTSForResponseStream(result.text, chatflow.textToSpeech, options, chatId, chatMessage?.id, sseStreamer, signal)
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
}
|
||||
@@ -1064,3 +1142,5 @@ const incrementFailedMetricCounter = (metricsProvider: IMetricsProvider, isInter
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
export { shouldAutoPlayTTS, generateTTSForResponseStream }
|
||||
|
||||
@@ -41,6 +41,8 @@ export const WHITELIST_URLS = [
|
||||
'/api/v1/user/test',
|
||||
'/api/v1/oauth2-credential/callback',
|
||||
'/api/v1/oauth2-credential/refresh',
|
||||
'/api/v1/text-to-speech/generate',
|
||||
'/api/v1/text-to-speech/abort',
|
||||
AzureSSO.LOGIN_URI,
|
||||
AzureSSO.LOGOUT_URI,
|
||||
AzureSSO.CALLBACK_URI,
|
||||
|
||||
Reference in New Issue
Block a user