SpeechToText: Adding SpeechToText at the Chatflow level.

This commit is contained in:
vinodkiran
2024-01-31 07:48:38 -05:00
parent 4604594c55
commit e81927ee13
7 changed files with 67 additions and 39 deletions
@@ -38,7 +38,7 @@ export class FlowiseChatOpenAI extends ChatOpenAI {
const nodeData = FlowiseChatOpenAI.chainNodeData
const optionsData = FlowiseChatOpenAI.chainNodeOptions
const messageContent = addImagesToMessages(nodeData, optionsData)
if (messageContent) {
if (messageContent?.length) {
if (messages[0].length > 0 && messages[0][messages[0].length - 1] instanceof HumanMessage) {
const lastMessage = messages[0].pop()
if (lastMessage instanceof HumanMessage) {
+1
View File
@@ -40,6 +40,7 @@
"@upstash/redis": "^1.22.1",
"@zilliz/milvus2-sdk-node": "^2.2.24",
"apify-client": "^2.7.1",
"assemblyai": "^4.2.2",
"axios": "1.6.2",
"cheerio": "^1.0.0-rc.12",
"chromadb": "^1.5.11",
@@ -1,6 +1,5 @@
import { ICommonObject, INodeData } from './Interface'
import { BaseChatModel } from 'langchain/chat_models/base'
import { type ClientOptions, OpenAIClient } from '@langchain/openai'
import { ChatOpenAI } from 'langchain/chat_models/openai'
import path from 'path'
import { getUserHome } from './utils'
+1
View File
@@ -6,3 +6,4 @@ dotenv.config({ path: envPath, override: true })
export * from './Interface'
export * from './utils'
export * from './speechToText'
+49
View File
@@ -0,0 +1,49 @@
import { ICommonObject } from './Interface'
import { getCredentialData, getUserHome } from './utils'
import { type ClientOptions, OpenAIClient } from '@langchain/openai'
import fs from 'fs'
import path from 'path'
import { AssemblyAI } from 'assemblyai'
export const convertSpeechToText = async (upload: any, speechToTextConfig: any, options: ICommonObject) => {
if (speechToTextConfig) {
const credentialId = speechToTextConfig.credentialId as string
const credentialData = await getCredentialData(credentialId ?? '', options)
const filePath = path.join(getUserHome(), '.flowise', 'gptvision', upload.data, upload.name)
// as the image is stored in the server, read the file and convert it to base64
const audio_file = fs.createReadStream(filePath)
if (speechToTextConfig.name === 'openAIWhisper') {
const openAIClientOptions: ClientOptions = {
apiKey: credentialData.openAIApiKey
}
const openAIClient = new OpenAIClient(openAIClientOptions)
const transcription = await openAIClient.audio.transcriptions.create({
file: audio_file,
model: 'whisper-1'
})
if (transcription?.text) {
return transcription.text
}
} else if (speechToTextConfig.name === 'assemblyAiTranscribe') {
const client = new AssemblyAI({
apiKey: credentialData.assemblyAIApiKey
})
const params = {
audio: audio_file,
speaker_labels: false
}
const transcription = await client.transcripts.transcribe(params)
if (transcription?.text) {
return transcription.text
}
}
} else {
throw new Error('Speech to text is not selected, but found a recorded audio file. Please fix the chain.')
}
return undefined
}