mirror of
https://github.com/farcasclaudiu/Flowise.git
synced 2026-06-28 13:00:56 +03:00
Text to speech (#5062)
* Add tts UI * Add tts backend * Add description to eleven labs credentials * Fix issue with fetching eleven labs voices * Fix issue with text to speech tab not showing correct saved voice * Add option to autoplay tts audio after prediction completes * Fix crash issue when first changing tts provider * Set up streaming response for text to speech audio * Update controllers - fix issue with sse client getting removed before tts events are sent * Use existing sse streamer to stream tts audio before sse client is removed * Add tts sse to redis publisher * Fix issues with TTS - openai voices, streaming audio, rate limiting, speed of speech * Refactor * Refactor TTS - fix issues with tts loading and stop audio buttons * Abort TTS SSE when clicking the stop button * Update SSE handling for TTS * Fix issue with test voice feature * Fix issue with tts voices not loading * Update generate tts endpoint and its usage in internal chat * Whitelist tts generate endpoint * Refactor Text-to-Speech Provider Selection and Enhance UI Components - Updated the text-to-speech controller to select the active provider based on status instead of the first available provider - Added audio waveform controls and test audio functionality in the TextToSpeech component, allowing users to play and pause test audio - Integrated Autocomplete for voice selection in the TextToSpeech component - Implemented TTS action management in ChatMessage to prevent auto-scrolling during TTS actions * - Implemented stopAllTTS function calls to halt existing TTS audio before playing new audio or starting a new TTS stream * Updated the condition for enabling TTS providers to exclude the 'none' provider, ensuring only valid providers are considered for text-to-speech functionality. * Remove unnecessary code * Add ability to abort audio streaming in TTS and release lock on chat input * Remove logger * Fix tts audio not playing when clicking speaker button * update * TTS abort controller * Fix abort not working for TTS autoplay * Send metadata event when aborting autoplay TTS * Fix UI issue * Remove elevenlabs sdk from root package.json * Remove redundant condition for tts autoplay in chatflow --------- Co-authored-by: Henry <hzj94@hotmail.com>
This commit is contained in:
@@ -64,6 +64,7 @@ export interface IChatFlow {
|
||||
apikeyid?: string
|
||||
analytic?: string
|
||||
speechToText?: string
|
||||
textToSpeech?: string
|
||||
chatbotConfig?: string
|
||||
followUpPrompts?: string
|
||||
apiConfig?: string
|
||||
|
||||
@@ -0,0 +1,226 @@
|
||||
import { Request, Response, NextFunction } from 'express'
|
||||
import chatflowsService from '../../services/chatflows'
|
||||
import textToSpeechService from '../../services/text-to-speech'
|
||||
import { InternalFlowiseError } from '../../errors/internalFlowiseError'
|
||||
import { StatusCodes } from 'http-status-codes'
|
||||
import { getRunningExpressApp } from '../../utils/getRunningExpressApp'
|
||||
import { convertTextToSpeechStream } from 'flowise-components'
|
||||
import { databaseEntities } from '../../utils'
|
||||
|
||||
const generateTextToSpeech = async (req: Request, res: Response) => {
|
||||
try {
|
||||
const {
|
||||
chatId,
|
||||
chatflowId,
|
||||
chatMessageId,
|
||||
text,
|
||||
provider: bodyProvider,
|
||||
credentialId: bodyCredentialId,
|
||||
voice: bodyVoice,
|
||||
model: bodyModel
|
||||
} = req.body
|
||||
|
||||
if (!text) {
|
||||
throw new InternalFlowiseError(
|
||||
StatusCodes.BAD_REQUEST,
|
||||
`Error: textToSpeechController.generateTextToSpeech - text not provided!`
|
||||
)
|
||||
}
|
||||
|
||||
let provider: string, credentialId: string, voice: string, model: string
|
||||
|
||||
if (chatflowId) {
|
||||
// Get TTS config from chatflow
|
||||
const chatflow = await chatflowsService.getChatflowById(chatflowId)
|
||||
const ttsConfig = JSON.parse(chatflow.textToSpeech)
|
||||
|
||||
// Find the provider with status: true
|
||||
const activeProviderKey = Object.keys(ttsConfig).find((key) => ttsConfig[key].status === true)
|
||||
if (!activeProviderKey) {
|
||||
throw new InternalFlowiseError(
|
||||
StatusCodes.BAD_REQUEST,
|
||||
`Error: textToSpeechController.generateTextToSpeech - no active TTS provider configured in chatflow!`
|
||||
)
|
||||
}
|
||||
|
||||
const providerConfig = ttsConfig[activeProviderKey]
|
||||
provider = activeProviderKey
|
||||
credentialId = providerConfig.credentialId
|
||||
voice = providerConfig.voice
|
||||
model = providerConfig.model
|
||||
} else {
|
||||
// Use TTS config from request body
|
||||
provider = bodyProvider
|
||||
credentialId = bodyCredentialId
|
||||
voice = bodyVoice
|
||||
model = bodyModel
|
||||
}
|
||||
|
||||
if (!provider) {
|
||||
throw new InternalFlowiseError(
|
||||
StatusCodes.BAD_REQUEST,
|
||||
`Error: textToSpeechController.generateTextToSpeech - provider not provided!`
|
||||
)
|
||||
}
|
||||
|
||||
if (!credentialId) {
|
||||
throw new InternalFlowiseError(
|
||||
StatusCodes.BAD_REQUEST,
|
||||
`Error: textToSpeechController.generateTextToSpeech - credentialId not provided!`
|
||||
)
|
||||
}
|
||||
|
||||
res.setHeader('Content-Type', 'text/event-stream')
|
||||
res.setHeader('Cache-Control', 'no-cache')
|
||||
res.setHeader('Connection', 'keep-alive')
|
||||
res.setHeader('Access-Control-Allow-Origin', '*')
|
||||
res.setHeader('Access-Control-Allow-Headers', 'Cache-Control')
|
||||
|
||||
const appServer = getRunningExpressApp()
|
||||
const options = {
|
||||
orgId: '',
|
||||
chatflowid: chatflowId || '',
|
||||
chatId: chatId || '',
|
||||
appDataSource: appServer.AppDataSource,
|
||||
databaseEntities: databaseEntities
|
||||
}
|
||||
|
||||
const textToSpeechConfig = {
|
||||
name: provider,
|
||||
credentialId: credentialId,
|
||||
voice: voice,
|
||||
model: model
|
||||
}
|
||||
|
||||
// Create and store AbortController
|
||||
const abortController = new AbortController()
|
||||
const ttsAbortId = `tts_${chatId}_${chatMessageId}`
|
||||
appServer.abortControllerPool.add(ttsAbortId, abortController)
|
||||
|
||||
try {
|
||||
await convertTextToSpeechStream(
|
||||
text,
|
||||
textToSpeechConfig,
|
||||
options,
|
||||
abortController,
|
||||
(format: string) => {
|
||||
const startResponse = {
|
||||
event: 'tts_start',
|
||||
data: { chatMessageId, format }
|
||||
}
|
||||
res.write('event: tts_start\n')
|
||||
res.write(`data: ${JSON.stringify(startResponse)}\n\n`)
|
||||
},
|
||||
(chunk: Buffer) => {
|
||||
const audioBase64 = chunk.toString('base64')
|
||||
const clientResponse = {
|
||||
event: 'tts_data',
|
||||
data: { chatMessageId, audioChunk: audioBase64 }
|
||||
}
|
||||
res.write('event: tts_data\n')
|
||||
res.write(`data: ${JSON.stringify(clientResponse)}\n\n`)
|
||||
},
|
||||
async () => {
|
||||
const endResponse = {
|
||||
event: 'tts_end',
|
||||
data: { chatMessageId }
|
||||
}
|
||||
res.write('event: tts_end\n')
|
||||
res.write(`data: ${JSON.stringify(endResponse)}\n\n`)
|
||||
res.end()
|
||||
// Clean up from pool on successful completion
|
||||
appServer.abortControllerPool.remove(ttsAbortId)
|
||||
}
|
||||
)
|
||||
} catch (error) {
|
||||
// Clean up from pool on error
|
||||
appServer.abortControllerPool.remove(ttsAbortId)
|
||||
throw error
|
||||
}
|
||||
} catch (error) {
|
||||
if (!res.headersSent) {
|
||||
res.setHeader('Content-Type', 'text/event-stream')
|
||||
res.setHeader('Cache-Control', 'no-cache')
|
||||
res.setHeader('Connection', 'keep-alive')
|
||||
}
|
||||
|
||||
const errorResponse = {
|
||||
event: 'tts_error',
|
||||
data: { error: error instanceof Error ? error.message : 'TTS generation failed' }
|
||||
}
|
||||
res.write('event: tts_error\n')
|
||||
res.write(`data: ${JSON.stringify(errorResponse)}\n\n`)
|
||||
res.end()
|
||||
}
|
||||
}
|
||||
|
||||
const abortTextToSpeech = async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { chatId, chatMessageId, chatflowId } = req.body
|
||||
|
||||
if (!chatId) {
|
||||
throw new InternalFlowiseError(
|
||||
StatusCodes.BAD_REQUEST,
|
||||
`Error: textToSpeechController.abortTextToSpeech - chatId not provided!`
|
||||
)
|
||||
}
|
||||
|
||||
if (!chatMessageId) {
|
||||
throw new InternalFlowiseError(
|
||||
StatusCodes.BAD_REQUEST,
|
||||
`Error: textToSpeechController.abortTextToSpeech - chatMessageId not provided!`
|
||||
)
|
||||
}
|
||||
|
||||
if (!chatflowId) {
|
||||
throw new InternalFlowiseError(
|
||||
StatusCodes.BAD_REQUEST,
|
||||
`Error: textToSpeechController.abortTextToSpeech - chatflowId not provided!`
|
||||
)
|
||||
}
|
||||
|
||||
const appServer = getRunningExpressApp()
|
||||
|
||||
// Abort the TTS generation using existing pool
|
||||
const ttsAbortId = `tts_${chatId}_${chatMessageId}`
|
||||
appServer.abortControllerPool.abort(ttsAbortId)
|
||||
|
||||
// Also abort the main chat flow AbortController for auto-TTS
|
||||
const chatFlowAbortId = `${chatflowId}_${chatId}`
|
||||
if (appServer.abortControllerPool.get(chatFlowAbortId)) {
|
||||
appServer.abortControllerPool.abort(chatFlowAbortId)
|
||||
appServer.sseStreamer.streamMetadataEvent(chatId, { chatId, chatMessageId })
|
||||
}
|
||||
|
||||
// Send abort event to client
|
||||
appServer.sseStreamer.streamTTSAbortEvent(chatId, chatMessageId)
|
||||
|
||||
res.json({ message: 'TTS stream aborted successfully', chatId, chatMessageId })
|
||||
} catch (error) {
|
||||
res.status(500).json({
|
||||
error: error instanceof Error ? error.message : 'Failed to abort TTS stream'
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
const getVoices = async (req: Request, res: Response, next: NextFunction) => {
|
||||
try {
|
||||
const { provider, credentialId } = req.query
|
||||
|
||||
if (!provider) {
|
||||
throw new InternalFlowiseError(StatusCodes.BAD_REQUEST, `Error: textToSpeechController.getVoices - provider not provided!`)
|
||||
}
|
||||
|
||||
const voices = await textToSpeechService.getVoices(provider as any, credentialId as string)
|
||||
|
||||
return res.json(voices)
|
||||
} catch (error) {
|
||||
next(error)
|
||||
}
|
||||
}
|
||||
|
||||
export default {
|
||||
generateTextToSpeech,
|
||||
abortTextToSpeech,
|
||||
getVoices
|
||||
}
|
||||
@@ -41,6 +41,9 @@ export class ChatFlow implements IChatFlow {
|
||||
@Column({ nullable: true, type: 'text' })
|
||||
speechToText?: string
|
||||
|
||||
@Column({ nullable: true, type: 'text' })
|
||||
textToSpeech?: string
|
||||
|
||||
@Column({ nullable: true, type: 'text' })
|
||||
followUpPrompts?: string
|
||||
|
||||
|
||||
+12
@@ -0,0 +1,12 @@
|
||||
import { MigrationInterface, QueryRunner } from 'typeorm'
|
||||
|
||||
export class AddTextToSpeechToChatFlow1754986457485 implements MigrationInterface {
|
||||
public async up(queryRunner: QueryRunner): Promise<void> {
|
||||
const columnExists = await queryRunner.hasColumn('chat_flow', 'textToSpeech')
|
||||
if (!columnExists) queryRunner.query(`ALTER TABLE \`chat_flow\` ADD COLUMN \`textToSpeech\` TEXT;`)
|
||||
}
|
||||
|
||||
public async down(queryRunner: QueryRunner): Promise<void> {
|
||||
await queryRunner.query(`ALTER TABLE \`chat_flow\` DROP COLUMN \`textToSpeech\`;`)
|
||||
}
|
||||
}
|
||||
@@ -36,6 +36,7 @@ import { AddExecutionEntity1738090872625 } from './1738090872625-AddExecutionEnt
|
||||
import { FixOpenSourceAssistantTable1743758056188 } from './1743758056188-FixOpenSourceAssistantTable'
|
||||
import { AddErrorToEvaluationRun1744964560174 } from './1744964560174-AddErrorToEvaluationRun'
|
||||
import { ModifyExecutionDataColumnType1747902489801 } from './1747902489801-ModifyExecutionDataColumnType'
|
||||
import { AddTextToSpeechToChatFlow1754986457485 } from './1754986457485-AddTextToSpeechToChatFlow'
|
||||
import { ModifyChatflowType1755066758601 } from './1755066758601-ModifyChatflowType'
|
||||
import { AddChatFlowNameIndex1755748356008 } from './1755748356008-AddChatFlowNameIndex'
|
||||
|
||||
@@ -101,6 +102,7 @@ export const mariadbMigrations = [
|
||||
AddErrorToEvaluationRun1744964560174,
|
||||
ExecutionLinkWorkspaceId1746862866554,
|
||||
ModifyExecutionDataColumnType1747902489801,
|
||||
AddTextToSpeechToChatFlow1754986457485,
|
||||
ModifyChatflowType1755066758601,
|
||||
AddChatFlowNameIndex1755748356008
|
||||
]
|
||||
|
||||
+12
@@ -0,0 +1,12 @@
|
||||
import { MigrationInterface, QueryRunner } from 'typeorm'
|
||||
|
||||
export class AddTextToSpeechToChatFlow1754986468397 implements MigrationInterface {
|
||||
public async up(queryRunner: QueryRunner): Promise<void> {
|
||||
const columnExists = await queryRunner.hasColumn('chat_flow', 'textToSpeech')
|
||||
if (!columnExists) queryRunner.query(`ALTER TABLE \`chat_flow\` ADD COLUMN \`textToSpeech\` TEXT;`)
|
||||
}
|
||||
|
||||
public async down(queryRunner: QueryRunner): Promise<void> {
|
||||
await queryRunner.query(`ALTER TABLE \`chat_flow\` DROP COLUMN \`textToSpeech\`;`)
|
||||
}
|
||||
}
|
||||
@@ -37,6 +37,7 @@ import { FixOpenSourceAssistantTable1743758056188 } from './1743758056188-FixOpe
|
||||
import { AddErrorToEvaluationRun1744964560174 } from './1744964560174-AddErrorToEvaluationRun'
|
||||
import { FixErrorsColumnInEvaluationRun1746437114935 } from './1746437114935-FixErrorsColumnInEvaluationRun'
|
||||
import { ModifyExecutionDataColumnType1747902489801 } from './1747902489801-ModifyExecutionDataColumnType'
|
||||
import { AddTextToSpeechToChatFlow1754986468397 } from './1754986468397-AddTextToSpeechToChatFlow'
|
||||
import { ModifyChatflowType1755066758601 } from './1755066758601-ModifyChatflowType'
|
||||
import { AddChatFlowNameIndex1755748356008 } from './1755748356008-AddChatFlowNameIndex'
|
||||
|
||||
@@ -103,6 +104,7 @@ export const mysqlMigrations = [
|
||||
FixErrorsColumnInEvaluationRun1746437114935,
|
||||
ExecutionLinkWorkspaceId1746862866554,
|
||||
ModifyExecutionDataColumnType1747902489801,
|
||||
AddTextToSpeechToChatFlow1754986468397,
|
||||
ModifyChatflowType1755066758601,
|
||||
AddChatFlowNameIndex1755748356008
|
||||
]
|
||||
|
||||
+11
@@ -0,0 +1,11 @@
|
||||
import { MigrationInterface, QueryRunner } from 'typeorm'
|
||||
|
||||
export class AddTextToSpeechToChatFlow1754986480347 implements MigrationInterface {
|
||||
public async up(queryRunner: QueryRunner): Promise<void> {
|
||||
await queryRunner.query(`ALTER TABLE "chat_flow" ADD COLUMN IF NOT EXISTS "textToSpeech" TEXT;`)
|
||||
}
|
||||
|
||||
public async down(queryRunner: QueryRunner): Promise<void> {
|
||||
await queryRunner.query(`ALTER TABLE "chat_flow" DROP COLUMN "textToSpeech";`)
|
||||
}
|
||||
}
|
||||
@@ -36,6 +36,7 @@ import { AddExecutionEntity1738090872625 } from './1738090872625-AddExecutionEnt
|
||||
import { FixOpenSourceAssistantTable1743758056188 } from './1743758056188-FixOpenSourceAssistantTable'
|
||||
import { AddErrorToEvaluationRun1744964560174 } from './1744964560174-AddErrorToEvaluationRun'
|
||||
import { ModifyExecutionSessionIdFieldType1748450230238 } from './1748450230238-ModifyExecutionSessionIdFieldType'
|
||||
import { AddTextToSpeechToChatFlow1754986480347 } from './1754986480347-AddTextToSpeechToChatFlow'
|
||||
import { ModifyChatflowType1755066758601 } from './1755066758601-ModifyChatflowType'
|
||||
import { AddChatFlowNameIndex1755748356008 } from './1755748356008-AddChatFlowNameIndex'
|
||||
|
||||
@@ -101,6 +102,7 @@ export const postgresMigrations = [
|
||||
AddErrorToEvaluationRun1744964560174,
|
||||
ExecutionLinkWorkspaceId1746862866554,
|
||||
ModifyExecutionSessionIdFieldType1748450230238,
|
||||
AddTextToSpeechToChatFlow1754986480347,
|
||||
ModifyChatflowType1755066758601,
|
||||
AddChatFlowNameIndex1755748356008
|
||||
]
|
||||
|
||||
+11
@@ -0,0 +1,11 @@
|
||||
import { MigrationInterface, QueryRunner } from 'typeorm'
|
||||
|
||||
export class AddTextToSpeechToChatFlow1754986486669 implements MigrationInterface {
|
||||
public async up(queryRunner: QueryRunner): Promise<void> {
|
||||
await queryRunner.query(`ALTER TABLE "chat_flow" ADD COLUMN "textToSpeech" TEXT;`)
|
||||
}
|
||||
|
||||
public async down(queryRunner: QueryRunner): Promise<void> {
|
||||
await queryRunner.query(`ALTER TABLE "chat_flow" DROP COLUMN "textToSpeech";`)
|
||||
}
|
||||
}
|
||||
@@ -34,6 +34,7 @@ import { AddSeqNoToDatasetRow1733752119696 } from './1733752119696-AddSeqNoToDat
|
||||
import { AddExecutionEntity1738090872625 } from './1738090872625-AddExecutionEntity'
|
||||
import { FixOpenSourceAssistantTable1743758056188 } from './1743758056188-FixOpenSourceAssistantTable'
|
||||
import { AddErrorToEvaluationRun1744964560174 } from './1744964560174-AddErrorToEvaluationRun'
|
||||
import { AddTextToSpeechToChatFlow1754986486669 } from './1754986486669-AddTextToSpeechToChatFlow'
|
||||
import { ModifyChatflowType1755066758601 } from './1755066758601-ModifyChatflowType'
|
||||
import { AddChatFlowNameIndex1755748356008 } from './1755748356008-AddChatFlowNameIndex'
|
||||
|
||||
@@ -97,6 +98,7 @@ export const sqliteMigrations = [
|
||||
FixOpenSourceAssistantTable1743758056188,
|
||||
AddErrorToEvaluationRun1744964560174,
|
||||
ExecutionLinkWorkspaceId1746862866554,
|
||||
AddTextToSpeechToChatFlow1754986486669,
|
||||
ModifyChatflowType1755066758601,
|
||||
AddChatFlowNameIndex1755748356008
|
||||
]
|
||||
|
||||
@@ -380,6 +380,70 @@ export class RedisEventPublisher implements IServerSideEventStreamer {
|
||||
}
|
||||
}
|
||||
|
||||
streamTTSStartEvent(chatId: string, chatMessageId: string, format: string): void {
|
||||
try {
|
||||
this.redisPublisher.publish(
|
||||
chatId,
|
||||
JSON.stringify({
|
||||
chatId,
|
||||
chatMessageId,
|
||||
eventType: 'tts_start',
|
||||
data: { format }
|
||||
})
|
||||
)
|
||||
} catch (error) {
|
||||
console.error('Error streaming TTS start event:', error)
|
||||
}
|
||||
}
|
||||
|
||||
streamTTSDataEvent(chatId: string, chatMessageId: string, audioChunk: string): void {
|
||||
try {
|
||||
this.redisPublisher.publish(
|
||||
chatId,
|
||||
JSON.stringify({
|
||||
chatId,
|
||||
chatMessageId,
|
||||
eventType: 'tts_data',
|
||||
data: audioChunk
|
||||
})
|
||||
)
|
||||
} catch (error) {
|
||||
console.error('Error streaming TTS data event:', error)
|
||||
}
|
||||
}
|
||||
|
||||
streamTTSEndEvent(chatId: string, chatMessageId: string): void {
|
||||
try {
|
||||
this.redisPublisher.publish(
|
||||
chatId,
|
||||
JSON.stringify({
|
||||
chatId,
|
||||
chatMessageId,
|
||||
eventType: 'tts_end',
|
||||
data: {}
|
||||
})
|
||||
)
|
||||
} catch (error) {
|
||||
console.error('Error streaming TTS end event:', error)
|
||||
}
|
||||
}
|
||||
|
||||
streamTTSAbortEvent(chatId: string, chatMessageId: string): void {
|
||||
try {
|
||||
this.redisPublisher.publish(
|
||||
chatId,
|
||||
JSON.stringify({
|
||||
chatId,
|
||||
chatMessageId,
|
||||
eventType: 'tts_abort',
|
||||
data: {}
|
||||
})
|
||||
)
|
||||
} catch (error) {
|
||||
console.error('Error streaming TTS abort event:', error)
|
||||
}
|
||||
}
|
||||
|
||||
async disconnect() {
|
||||
if (this.redisPublisher) {
|
||||
await this.redisPublisher.quit()
|
||||
|
||||
@@ -102,7 +102,7 @@ export class RedisEventSubscriber {
|
||||
private handleEvent(message: string) {
|
||||
// Parse the message from Redis
|
||||
const event = JSON.parse(message)
|
||||
const { eventType, chatId, data } = event
|
||||
const { eventType, chatId, chatMessageId, data } = event
|
||||
|
||||
// Stream the event to the client
|
||||
switch (eventType) {
|
||||
@@ -121,6 +121,9 @@ export class RedisEventSubscriber {
|
||||
case 'usedTools':
|
||||
this.sseStreamer.streamUsedToolsEvent(chatId, data)
|
||||
break
|
||||
case 'calledTools':
|
||||
this.sseStreamer.streamCalledToolsEvent(chatId, data)
|
||||
break
|
||||
case 'fileAnnotations':
|
||||
this.sseStreamer.streamFileAnnotationsEvent(chatId, data)
|
||||
break
|
||||
@@ -154,6 +157,21 @@ export class RedisEventSubscriber {
|
||||
case 'metadata':
|
||||
this.sseStreamer.streamMetadataEvent(chatId, data)
|
||||
break
|
||||
case 'usageMetadata':
|
||||
this.sseStreamer.streamUsageMetadataEvent(chatId, data)
|
||||
break
|
||||
case 'tts_start':
|
||||
this.sseStreamer.streamTTSStartEvent(chatId, chatMessageId, data.format)
|
||||
break
|
||||
case 'tts_data':
|
||||
this.sseStreamer.streamTTSDataEvent(chatId, chatMessageId, data)
|
||||
break
|
||||
case 'tts_end':
|
||||
this.sseStreamer.streamTTSEndEvent(chatId, chatMessageId)
|
||||
break
|
||||
case 'tts_abort':
|
||||
this.sseStreamer.streamTTSAbortEvent(chatId, chatMessageId)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -55,6 +55,7 @@ import nvidiaNimRouter from './nvidia-nim'
|
||||
import executionsRouter from './executions'
|
||||
import validationRouter from './validation'
|
||||
import agentflowv2GeneratorRouter from './agentflowv2-generator'
|
||||
import textToSpeechRouter from './text-to-speech'
|
||||
|
||||
import authRouter from '../enterprise/routes/auth'
|
||||
import auditRouter from '../enterprise/routes/audit'
|
||||
@@ -124,6 +125,7 @@ router.use('/nvidia-nim', nvidiaNimRouter)
|
||||
router.use('/executions', executionsRouter)
|
||||
router.use('/validation', validationRouter)
|
||||
router.use('/agentflowv2-generator', agentflowv2GeneratorRouter)
|
||||
router.use('/text-to-speech', textToSpeechRouter)
|
||||
|
||||
router.use('/auth', authRouter)
|
||||
router.use('/audit', IdentityManager.checkFeatureByPlan('feat:login-activity'), auditRouter)
|
||||
|
||||
@@ -0,0 +1,12 @@
|
||||
import express from 'express'
|
||||
import textToSpeechController from '../../controllers/text-to-speech'
|
||||
|
||||
const router = express.Router()
|
||||
|
||||
router.post('/generate', textToSpeechController.generateTextToSpeech)
|
||||
|
||||
router.post('/abort', textToSpeechController.abortTextToSpeech)
|
||||
|
||||
router.get('/voices', textToSpeechController.getVoices)
|
||||
|
||||
export default router
|
||||
@@ -363,7 +363,18 @@ const getSinglePublicChatbotConfig = async (chatflowId: string): Promise<any> =>
|
||||
if (dbResponse.chatbotConfig || uploadsConfig) {
|
||||
try {
|
||||
const parsedConfig = dbResponse.chatbotConfig ? JSON.parse(dbResponse.chatbotConfig) : {}
|
||||
return { ...parsedConfig, uploads: uploadsConfig, flowData: dbResponse.flowData }
|
||||
const ttsConfig =
|
||||
typeof dbResponse.textToSpeech === 'string' ? JSON.parse(dbResponse.textToSpeech) : dbResponse.textToSpeech
|
||||
|
||||
let isTTSEnabled = false
|
||||
if (ttsConfig) {
|
||||
Object.keys(ttsConfig).forEach((provider) => {
|
||||
if (provider !== 'none' && ttsConfig?.[provider]?.status) {
|
||||
isTTSEnabled = true
|
||||
}
|
||||
})
|
||||
}
|
||||
return { ...parsedConfig, uploads: uploadsConfig, flowData: dbResponse.flowData, isTTSEnabled }
|
||||
} catch (e) {
|
||||
throw new InternalFlowiseError(StatusCodes.INTERNAL_SERVER_ERROR, `Error parsing Chatbot Config for Chatflow ${chatflowId}`)
|
||||
}
|
||||
|
||||
@@ -0,0 +1,52 @@
|
||||
import { StatusCodes } from 'http-status-codes'
|
||||
import { getRunningExpressApp } from '../../utils/getRunningExpressApp'
|
||||
import { InternalFlowiseError } from '../../errors/internalFlowiseError'
|
||||
import { getErrorMessage } from '../../errors/utils'
|
||||
import { getVoices } from 'flowise-components'
|
||||
import { databaseEntities } from '../../utils'
|
||||
|
||||
export enum TextToSpeechProvider {
|
||||
OPENAI = 'openai',
|
||||
ELEVEN_LABS = 'elevenlabs'
|
||||
}
|
||||
|
||||
export interface TTSRequest {
|
||||
text: string
|
||||
provider: TextToSpeechProvider
|
||||
credentialId: string
|
||||
voice?: string
|
||||
model?: string
|
||||
}
|
||||
|
||||
export interface TTSResponse {
|
||||
audioBuffer: Buffer
|
||||
contentType: string
|
||||
}
|
||||
|
||||
const getVoicesForProvider = async (provider: string, credentialId?: string): Promise<any[]> => {
|
||||
try {
|
||||
if (!credentialId) {
|
||||
throw new InternalFlowiseError(StatusCodes.BAD_REQUEST, 'Credential ID required for this provider')
|
||||
}
|
||||
|
||||
const appServer = getRunningExpressApp()
|
||||
const options = {
|
||||
orgId: '',
|
||||
chatflowid: '',
|
||||
chatId: '',
|
||||
appDataSource: appServer.AppDataSource,
|
||||
databaseEntities: databaseEntities
|
||||
}
|
||||
|
||||
return await getVoices(provider, credentialId, options)
|
||||
} catch (error) {
|
||||
throw new InternalFlowiseError(
|
||||
StatusCodes.INTERNAL_SERVER_ERROR,
|
||||
`Error: textToSpeechService.getVoices - ${getErrorMessage(error)}`
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
export default {
|
||||
getVoices: getVoicesForProvider
|
||||
}
|
||||
@@ -257,4 +257,50 @@ export class SSEStreamer implements IServerSideEventStreamer {
|
||||
client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n')
|
||||
}
|
||||
}
|
||||
|
||||
streamTTSStartEvent(chatId: string, chatMessageId: string, format: string): void {
|
||||
const client = this.clients[chatId]
|
||||
if (client) {
|
||||
const clientResponse = {
|
||||
event: 'tts_start',
|
||||
data: { chatMessageId, format }
|
||||
}
|
||||
client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n')
|
||||
}
|
||||
}
|
||||
|
||||
streamTTSDataEvent(chatId: string, chatMessageId: string, audioChunk: string): void {
|
||||
const client = this.clients[chatId]
|
||||
if (client) {
|
||||
const clientResponse = {
|
||||
event: 'tts_data',
|
||||
data: { chatMessageId, audioChunk }
|
||||
}
|
||||
client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n')
|
||||
}
|
||||
}
|
||||
|
||||
streamTTSEndEvent(chatId: string, chatMessageId: string): void {
|
||||
const client = this.clients[chatId]
|
||||
if (client) {
|
||||
const clientResponse = {
|
||||
event: 'tts_end',
|
||||
data: { chatMessageId }
|
||||
}
|
||||
client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n')
|
||||
}
|
||||
}
|
||||
|
||||
streamTTSAbortEvent(chatId: string, chatMessageId: string): void {
|
||||
const client = this.clients[chatId]
|
||||
if (client) {
|
||||
const clientResponse = {
|
||||
event: 'tts_abort',
|
||||
data: { chatMessageId }
|
||||
}
|
||||
client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n')
|
||||
client.response.end()
|
||||
delete this.clients[chatId]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -58,6 +58,7 @@ import { ChatMessage } from '../database/entities/ChatMessage'
|
||||
import { Telemetry } from './telemetry'
|
||||
import { getWorkspaceSearchOptions } from '../enterprise/utils/ControllerServiceUtils'
|
||||
import { UsageCacheManager } from '../UsageCacheManager'
|
||||
import { generateTTSForResponseStream, shouldAutoPlayTTS } from './buildChatflow'
|
||||
|
||||
interface IWaitingNode {
|
||||
nodeId: string
|
||||
@@ -2208,5 +2209,27 @@ export const executeAgentFlow = async ({
|
||||
|
||||
if (sessionId) result.sessionId = sessionId
|
||||
|
||||
if (shouldAutoPlayTTS(chatflow.textToSpeech) && result.text) {
|
||||
const options = {
|
||||
orgId,
|
||||
chatflowid,
|
||||
chatId,
|
||||
appDataSource,
|
||||
databaseEntities
|
||||
}
|
||||
|
||||
if (sseStreamer) {
|
||||
await generateTTSForResponseStream(
|
||||
result.text,
|
||||
chatflow.textToSpeech,
|
||||
options,
|
||||
chatId,
|
||||
chatMessage?.id,
|
||||
sseStreamer,
|
||||
abortController
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ import { omit } from 'lodash'
|
||||
import {
|
||||
IFileUpload,
|
||||
convertSpeechToText,
|
||||
convertTextToSpeechStream,
|
||||
ICommonObject,
|
||||
addSingleFileToStorage,
|
||||
generateFollowUpPrompts,
|
||||
@@ -16,7 +17,8 @@ import {
|
||||
getFileFromUpload,
|
||||
removeSpecificFileFromUpload,
|
||||
EvaluationRunner,
|
||||
handleEscapeCharacters
|
||||
handleEscapeCharacters,
|
||||
IServerSideEventStreamer
|
||||
} from 'flowise-components'
|
||||
import { StatusCodes } from 'http-status-codes'
|
||||
import {
|
||||
@@ -70,9 +72,74 @@ import { executeAgentFlow } from './buildAgentflow'
|
||||
import { Workspace } from '../enterprise/database/entities/workspace.entity'
|
||||
import { Organization } from '../enterprise/database/entities/organization.entity'
|
||||
|
||||
/*
|
||||
* Initialize the ending node to be executed
|
||||
*/
|
||||
const shouldAutoPlayTTS = (textToSpeechConfig: string | undefined | null): boolean => {
|
||||
if (!textToSpeechConfig) return false
|
||||
try {
|
||||
const config = typeof textToSpeechConfig === 'string' ? JSON.parse(textToSpeechConfig) : textToSpeechConfig
|
||||
for (const providerKey in config) {
|
||||
const provider = config[providerKey]
|
||||
if (provider && provider.status === true && provider.autoPlay === true) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
} catch (error) {
|
||||
logger.error(`Error parsing textToSpeechConfig: ${getErrorMessage(error)}`)
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
const generateTTSForResponseStream = async (
|
||||
responseText: string,
|
||||
textToSpeechConfig: string | undefined,
|
||||
options: ICommonObject,
|
||||
chatId: string,
|
||||
chatMessageId: string,
|
||||
sseStreamer: IServerSideEventStreamer,
|
||||
abortController?: AbortController
|
||||
): Promise<void> => {
|
||||
try {
|
||||
if (!textToSpeechConfig) return
|
||||
const config = typeof textToSpeechConfig === 'string' ? JSON.parse(textToSpeechConfig) : textToSpeechConfig
|
||||
|
||||
let activeProviderConfig = null
|
||||
for (const providerKey in config) {
|
||||
const provider = config[providerKey]
|
||||
if (provider && provider.status === true) {
|
||||
activeProviderConfig = {
|
||||
name: providerKey,
|
||||
credentialId: provider.credentialId,
|
||||
voice: provider.voice,
|
||||
model: provider.model
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if (!activeProviderConfig) return
|
||||
|
||||
await convertTextToSpeechStream(
|
||||
responseText,
|
||||
activeProviderConfig,
|
||||
options,
|
||||
abortController || new AbortController(),
|
||||
(format: string) => {
|
||||
sseStreamer.streamTTSStartEvent(chatId, chatMessageId, format)
|
||||
},
|
||||
(chunk: Buffer) => {
|
||||
const audioBase64 = chunk.toString('base64')
|
||||
sseStreamer.streamTTSDataEvent(chatId, chatMessageId, audioBase64)
|
||||
},
|
||||
() => {
|
||||
sseStreamer.streamTTSEndEvent(chatId, chatMessageId)
|
||||
}
|
||||
)
|
||||
} catch (error) {
|
||||
logger.error(`[server]: TTS streaming failed: ${getErrorMessage(error)}`)
|
||||
sseStreamer.streamTTSEndEvent(chatId, chatMessageId)
|
||||
}
|
||||
}
|
||||
|
||||
const initEndingNode = async ({
|
||||
endingNodeIds,
|
||||
componentNodes,
|
||||
@@ -833,6 +900,17 @@ export const executeFlow = async ({
|
||||
if (memoryType) result.memoryType = memoryType
|
||||
if (Object.keys(setVariableNodesOutput).length) result.flowVariables = setVariableNodesOutput
|
||||
|
||||
if (shouldAutoPlayTTS(chatflow.textToSpeech) && result.text) {
|
||||
const options = {
|
||||
orgId,
|
||||
chatflowid,
|
||||
chatId,
|
||||
appDataSource,
|
||||
databaseEntities
|
||||
}
|
||||
await generateTTSForResponseStream(result.text, chatflow.textToSpeech, options, chatId, chatMessage?.id, sseStreamer, signal)
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
}
|
||||
@@ -1064,3 +1142,5 @@ const incrementFailedMetricCounter = (metricsProvider: IMetricsProvider, isInter
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
export { shouldAutoPlayTTS, generateTTSForResponseStream }
|
||||
|
||||
@@ -41,6 +41,8 @@ export const WHITELIST_URLS = [
|
||||
'/api/v1/user/test',
|
||||
'/api/v1/oauth2-credential/callback',
|
||||
'/api/v1/oauth2-credential/refresh',
|
||||
'/api/v1/text-to-speech/generate',
|
||||
'/api/v1/text-to-speech/abort',
|
||||
AzureSSO.LOGIN_URI,
|
||||
AzureSSO.LOGOUT_URI,
|
||||
AzureSSO.CALLBACK_URI,
|
||||
|
||||
Reference in New Issue
Block a user