Merge pull request #1419 from vinodkiran/FEATURE/Vision

FEATURE: Add Multi Modal Capabilities to Flowise
This commit is contained in:
Henry Heng
2024-02-27 11:58:47 +08:00
committed by GitHub
91 changed files with 4345 additions and 353 deletions
@@ -0,0 +1,23 @@
import { INodeParams, INodeCredential } from '../src/Interface'
class AssemblyAIApi implements INodeCredential {
label: string
name: string
version: number
inputs: INodeParams[]
constructor() {
this.label = 'AssemblyAI API'
this.name = 'assemblyAIApi'
this.version = 1.0
this.inputs = [
{
label: 'AssemblyAI Api Key',
name: 'assemblyAIApiKey',
type: 'password'
}
]
}
}
module.exports = { credClass: AssemblyAIApi }
@@ -4,13 +4,15 @@ import { BaseChatModel } from '@langchain/core/language_models/chat_models'
import { AIMessage, BaseMessage, HumanMessage } from '@langchain/core/messages'
import { ChainValues } from '@langchain/core/utils/types'
import { AgentStep } from '@langchain/core/agents'
import { renderTemplate } from '@langchain/core/prompts'
import { renderTemplate, MessagesPlaceholder } from '@langchain/core/prompts'
import { RunnableSequence } from '@langchain/core/runnables'
import { ChatConversationalAgent } from 'langchain/agents'
import { getBaseClasses } from '../../../src/utils'
import { ConsoleCallbackHandler, CustomChainHandler, additionalCallbacks } from '../../../src/handler'
import { FlowiseMemory, ICommonObject, IMessage, INode, INodeData, INodeParams } from '../../../src/Interface'
import { AgentExecutor } from '../../../src/agents'
import { ChatOpenAI } from '../../chatmodels/ChatOpenAI/FlowiseChatOpenAI'
import { addImagesToMessages } from '../../../src/multiModalUtils'
const DEFAULT_PREFIX = `Assistant is a large language model trained by OpenAI.
@@ -81,12 +83,18 @@ class ConversationalAgent_Agents implements INode {
}
async init(nodeData: INodeData, input: string, options: ICommonObject): Promise<any> {
return prepareAgent(nodeData, { sessionId: this.sessionId, chatId: options.chatId, input }, options.chatHistory)
return prepareAgent(nodeData, options, { sessionId: this.sessionId, chatId: options.chatId, input }, options.chatHistory)
}
async run(nodeData: INodeData, input: string, options: ICommonObject): Promise<string> {
const memory = nodeData.inputs?.memory as FlowiseMemory
const executor = await prepareAgent(nodeData, { sessionId: this.sessionId, chatId: options.chatId, input }, options.chatHistory)
const executor = await prepareAgent(
nodeData,
options,
{ sessionId: this.sessionId, chatId: options.chatId, input },
options.chatHistory
)
const loggerHandler = new ConsoleCallbackHandler(options.logger)
const callbacks = await additionalCallbacks(nodeData, options)
@@ -120,6 +128,7 @@ class ConversationalAgent_Agents implements INode {
const prepareAgent = async (
nodeData: INodeData,
options: ICommonObject,
flowObj: { sessionId?: string; chatId?: string; input?: string },
chatHistory: IMessage[] = []
) => {
@@ -131,11 +140,6 @@ const prepareAgent = async (
const memoryKey = memory.memoryKey ? memory.memoryKey : 'chat_history'
const inputKey = memory.inputKey ? memory.inputKey : 'input'
/** Bind a stop token to the model */
const modelWithStop = model.bind({
stop: ['\nObservation']
})
const outputParser = ChatConversationalAgent.getDefaultOutputParser({
llm: model,
toolNames: tools.map((tool) => tool.name)
@@ -146,6 +150,41 @@ const prepareAgent = async (
outputParser
})
if (model instanceof ChatOpenAI) {
let humanImageMessages: HumanMessage[] = []
const messageContent = addImagesToMessages(nodeData, options, model.multiModalOption)
if (messageContent?.length) {
// Change model to gpt-4-vision
model.modelName = 'gpt-4-vision-preview'
// Change default max token to higher when using gpt-4-vision
model.maxTokens = 1024
for (const msg of messageContent) {
humanImageMessages.push(new HumanMessage({ content: [msg] }))
}
// Pop the `agent_scratchpad` MessagePlaceHolder
let messagePlaceholder = prompt.promptMessages.pop() as MessagesPlaceholder
// Add the HumanMessage for images
prompt.promptMessages.push(...humanImageMessages)
// Add the `agent_scratchpad` MessagePlaceHolder back
prompt.promptMessages.push(messagePlaceholder)
} else {
// revert to previous values if image upload is empty
model.modelName = model.configuredModel
model.maxTokens = model.configuredMaxToken
}
}
/** Bind a stop token to the model */
const modelWithStop = model.bind({
stop: ['\nObservation']
})
const runnableAgent = RunnableSequence.from([
{
[inputKey]: (i: { input: string; steps: AgentStep[] }) => i.input,
@@ -166,7 +205,7 @@ const prepareAgent = async (
sessionId: flowObj?.sessionId,
chatId: flowObj?.chatId,
input: flowObj?.input,
verbose: process.env.DEBUG === 'true' ? true : false
verbose: process.env.DEBUG === 'true'
})
return executor
@@ -8,6 +8,10 @@ import { additionalCallbacks } from '../../../src/handler'
import { FlowiseMemory, ICommonObject, IMessage, INode, INodeData, INodeParams } from '../../../src/Interface'
import { getBaseClasses } from '../../../src/utils'
import { createReactAgent } from '../../../src/agents'
import { ChatOpenAI } from '../../chatmodels/ChatOpenAI/FlowiseChatOpenAI'
import { HumanMessage } from '@langchain/core/messages'
import { addImagesToMessages } from '../../../src/multiModalUtils'
import { ChatPromptTemplate, HumanMessagePromptTemplate } from 'langchain/prompts'
class MRKLAgentChat_Agents implements INode {
label: string
@@ -61,18 +65,39 @@ class MRKLAgentChat_Agents implements INode {
let tools = nodeData.inputs?.tools as Tool[]
tools = flatten(tools)
const promptWithChat = await pull<PromptTemplate>('hwchase17/react-chat')
const prompt = await pull<PromptTemplate>('hwchase17/react-chat')
let chatPromptTemplate = undefined
if (model instanceof ChatOpenAI) {
const messageContent = addImagesToMessages(nodeData, options, model.multiModalOption)
if (messageContent?.length) {
// Change model to gpt-4-vision
model.modelName = 'gpt-4-vision-preview'
// Change default max token to higher when using gpt-4-vision
model.maxTokens = 1024
const oldTemplate = prompt.template as string
chatPromptTemplate = ChatPromptTemplate.fromMessages([HumanMessagePromptTemplate.fromTemplate(oldTemplate)])
chatPromptTemplate.promptMessages.push(new HumanMessage({ content: messageContent }))
} else {
// revert to previous values if image upload is empty
model.modelName = model.configuredModel
model.maxTokens = model.configuredMaxToken
}
}
const agent = await createReactAgent({
llm: model,
tools,
prompt: promptWithChat
prompt: chatPromptTemplate ?? prompt
})
const executor = new AgentExecutor({
agent,
tools,
verbose: process.env.DEBUG === 'true' ? true : false
verbose: process.env.DEBUG === 'true'
})
const callbacks = await additionalCallbacks(nodeData, options)
@@ -1,14 +1,16 @@
import { ConversationChain } from 'langchain/chains'
import { ChatPromptTemplate, HumanMessagePromptTemplate, MessagesPlaceholder, SystemMessagePromptTemplate } from '@langchain/core/prompts'
import { BaseChatModel } from '@langchain/core/language_models/chat_models'
import { RunnableSequence } from '@langchain/core/runnables'
import { StringOutputParser } from '@langchain/core/output_parsers'
import { HumanMessage } from '@langchain/core/messages'
import { ConsoleCallbackHandler as LCConsoleCallbackHandler } from '@langchain/core/tracers/console'
import { ConversationChain } from 'langchain/chains'
import { FlowiseMemory, ICommonObject, IMessage, INode, INodeData, INodeParams } from '../../../src/Interface'
import { ConsoleCallbackHandler, CustomChainHandler, additionalCallbacks } from '../../../src/handler'
import { getBaseClasses, handleEscapeCharacters } from '../../../src/utils'
import { checkInputs, Moderation, streamResponse } from '../../moderation/Moderation'
import { formatResponse } from '../../outputparsers/OutputParserHelpers'
import { addImagesToMessages } from '../../../src/multiModalUtils'
import { ChatOpenAI } from '../../chatmodels/ChatOpenAI/FlowiseChatOpenAI'
import { FlowiseMemory, ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
import { ConsoleCallbackHandler, CustomChainHandler, additionalCallbacks } from '../../../src/handler'
import { getBaseClasses, handleEscapeCharacters } from '../../../src/utils'
let systemMessage = `The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.`
const inputKey = 'input'
@@ -86,12 +88,14 @@ class ConversationChain_Chains implements INode {
}
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
const chain = prepareChain(nodeData, this.sessionId, options.chatHistory)
const chain = prepareChain(nodeData, options, this.sessionId)
return chain
}
async run(nodeData: INodeData, input: string, options: ICommonObject): Promise<string | object> {
const memory = nodeData.inputs?.memory
const chain = prepareChain(nodeData, options, this.sessionId)
const moderations = nodeData.inputs?.inputModeration as Moderation[]
if (moderations && moderations.length > 0) {
@@ -105,8 +109,6 @@ class ConversationChain_Chains implements INode {
}
}
const chain = prepareChain(nodeData, this.sessionId, options.chatHistory)
const loggerHandler = new ConsoleCallbackHandler(options.logger)
const additionalCallback = await additionalCallbacks(nodeData, options)
@@ -143,7 +145,7 @@ class ConversationChain_Chains implements INode {
}
}
const prepareChatPrompt = (nodeData: INodeData) => {
const prepareChatPrompt = (nodeData: INodeData, humanImageMessages: HumanMessage[]) => {
const memory = nodeData.inputs?.memory as FlowiseMemory
const prompt = nodeData.inputs?.systemMessagePrompt as string
const chatPromptTemplate = nodeData.inputs?.chatPromptTemplate as ChatPromptTemplate
@@ -151,12 +153,10 @@ const prepareChatPrompt = (nodeData: INodeData) => {
if (chatPromptTemplate && chatPromptTemplate.promptMessages.length) {
const sysPrompt = chatPromptTemplate.promptMessages[0]
const humanPrompt = chatPromptTemplate.promptMessages[chatPromptTemplate.promptMessages.length - 1]
const chatPrompt = ChatPromptTemplate.fromMessages([
sysPrompt,
new MessagesPlaceholder(memory.memoryKey ?? 'chat_history'),
humanPrompt
])
const messages = [sysPrompt, new MessagesPlaceholder(memory.memoryKey ?? 'chat_history'), humanPrompt]
if (humanImageMessages.length) messages.push(...humanImageMessages)
const chatPrompt = ChatPromptTemplate.fromMessages(messages)
if ((chatPromptTemplate as any).promptValues) {
// @ts-ignore
chatPrompt.promptValues = (chatPromptTemplate as any).promptValues
@@ -165,21 +165,46 @@ const prepareChatPrompt = (nodeData: INodeData) => {
return chatPrompt
}
const chatPrompt = ChatPromptTemplate.fromMessages([
const messages = [
SystemMessagePromptTemplate.fromTemplate(prompt ? prompt : systemMessage),
new MessagesPlaceholder(memory.memoryKey ?? 'chat_history'),
HumanMessagePromptTemplate.fromTemplate(`{${inputKey}}`)
])
]
if (humanImageMessages.length) messages.push(...(humanImageMessages as any[]))
const chatPrompt = ChatPromptTemplate.fromMessages(messages)
return chatPrompt
}
const prepareChain = (nodeData: INodeData, sessionId?: string, chatHistory: IMessage[] = []) => {
const model = nodeData.inputs?.model as BaseChatModel
const prepareChain = (nodeData: INodeData, options: ICommonObject, sessionId?: string) => {
const chatHistory = options.chatHistory
let model = nodeData.inputs?.model as ChatOpenAI
const memory = nodeData.inputs?.memory as FlowiseMemory
const memoryKey = memory.memoryKey ?? 'chat_history'
const chatPrompt = prepareChatPrompt(nodeData)
let humanImageMessages: HumanMessage[] = []
if (model instanceof ChatOpenAI) {
const messageContent = addImagesToMessages(nodeData, options, model.multiModalOption)
if (messageContent?.length) {
// Change model to gpt-4-vision
model.modelName = 'gpt-4-vision-preview'
// Change default max token to higher when using gpt-4-vision
model.maxTokens = 1024
for (const msg of messageContent) {
humanImageMessages.push(new HumanMessage({ content: [msg] }))
}
} else {
// revert to previous values if image upload is empty
model.modelName = model.configuredModel
model.maxTokens = model.configuredMaxToken
}
}
const chatPrompt = prepareChatPrompt(nodeData, humanImageMessages)
let promptVariables = {}
const promptValuesRaw = (chatPrompt as any).promptValues
if (promptValuesRaw) {
@@ -203,7 +228,7 @@ const prepareChain = (nodeData: INodeData, sessionId?: string, chatHistory: IMes
},
...promptVariables
},
chatPrompt,
prepareChatPrompt(nodeData, humanImageMessages),
model,
new StringOutputParser()
])
@@ -1,5 +1,6 @@
import { BaseLanguageModel, BaseLanguageModelCallOptions } from '@langchain/core/language_models/base'
import { BaseLLMOutputParser, BaseOutputParser } from '@langchain/core/output_parsers'
import { ChatPromptTemplate, FewShotPromptTemplate, PromptTemplate, HumanMessagePromptTemplate } from '@langchain/core/prompts'
import { OutputFixingParser } from 'langchain/output_parsers'
import { LLMChain } from 'langchain/chains'
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
@@ -7,6 +8,9 @@ import { ConsoleCallbackHandler, CustomChainHandler, additionalCallbacks } from
import { getBaseClasses, handleEscapeCharacters } from '../../../src/utils'
import { checkInputs, Moderation, streamResponse } from '../../moderation/Moderation'
import { formatResponse, injectOutputParser } from '../../outputparsers/OutputParserHelpers'
import { ChatOpenAI } from '../../chatmodels/ChatOpenAI/FlowiseChatOpenAI'
import { addImagesToMessages } from '../../../src/multiModalUtils'
import { HumanMessage } from 'langchain/schema'
class LLMChain_Chains implements INode {
label: string
@@ -160,12 +164,7 @@ const runPrediction = async (
const socketIO = isStreaming ? options.socketIO : undefined
const socketIOClientId = isStreaming ? options.socketIOClientId : ''
const moderations = nodeData.inputs?.inputModeration as Moderation[]
/**
* Apply string transformation to reverse converted special chars:
* FROM: { "value": "hello i am benFLOWISE_NEWLINEFLOWISE_NEWLINEFLOWISE_TABhow are you?" }
* TO: { "value": "hello i am ben\n\n\thow are you?" }
*/
const promptValues = handleEscapeCharacters(promptValuesRaw, true)
let model = nodeData.inputs?.model as ChatOpenAI
if (moderations && moderations.length > 0) {
try {
@@ -178,6 +177,46 @@ const runPrediction = async (
}
}
/**
* Apply string transformation to reverse converted special chars:
* FROM: { "value": "hello i am benFLOWISE_NEWLINEFLOWISE_NEWLINEFLOWISE_TABhow are you?" }
* TO: { "value": "hello i am ben\n\n\thow are you?" }
*/
const promptValues = handleEscapeCharacters(promptValuesRaw, true)
const messageContent = addImagesToMessages(nodeData, options, model.multiModalOption)
if (chain.llm instanceof ChatOpenAI) {
const chatOpenAI = chain.llm as ChatOpenAI
if (messageContent?.length) {
// Change model to gpt-4-vision && max token to higher when using gpt-4-vision
chatOpenAI.modelName = 'gpt-4-vision-preview'
chatOpenAI.maxTokens = 1024
// Add image to the message
if (chain.prompt instanceof PromptTemplate) {
const existingPromptTemplate = chain.prompt.template as string
let newChatPromptTemplate = ChatPromptTemplate.fromMessages([
HumanMessagePromptTemplate.fromTemplate(existingPromptTemplate)
])
newChatPromptTemplate.promptMessages.push(new HumanMessage({ content: messageContent }))
chain.prompt = newChatPromptTemplate
} else if (chain.prompt instanceof ChatPromptTemplate) {
chain.prompt.promptMessages.push(new HumanMessage({ content: messageContent }))
} else if (chain.prompt instanceof FewShotPromptTemplate) {
let existingFewShotPromptTemplate = chain.prompt.examplePrompt.template as string
let newFewShotPromptTemplate = ChatPromptTemplate.fromMessages([
HumanMessagePromptTemplate.fromTemplate(existingFewShotPromptTemplate)
])
newFewShotPromptTemplate.promptMessages.push(new HumanMessage({ content: messageContent }))
// @ts-ignore
chain.prompt.examplePrompt = newFewShotPromptTemplate
}
} else {
// revert to previous values if image upload is empty
chatOpenAI.modelName = model.configuredModel
chatOpenAI.maxTokens = model.configuredMaxToken
}
}
if (promptValues && inputVariables.length > 0) {
let seen: string[] = []
@@ -1,8 +1,10 @@
import { ChatOpenAI, OpenAIChatInput } from '@langchain/openai'
import type { ClientOptions } from 'openai'
import { ChatOpenAI as LangchainChatOpenAI, OpenAIChatInput, AzureOpenAIInput, LegacyOpenAIInput } from '@langchain/openai'
import { BaseCache } from '@langchain/core/caches'
import { BaseLLMParams } from '@langchain/core/language_models/llms'
import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
import { BaseChatModelParams } from '@langchain/core/language_models/chat_models'
import { ICommonObject, IMultiModalOption, INode, INodeData, INodeParams } from '../../../src/Interface'
import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../src/utils'
import { ChatOpenAI } from './FlowiseChatOpenAI'
class ChatOpenAI_ChatModels implements INode {
label: string
@@ -19,12 +21,12 @@ class ChatOpenAI_ChatModels implements INode {
constructor() {
this.label = 'ChatOpenAI'
this.name = 'chatOpenAI'
this.version = 4.0
this.version = 5.0
this.type = 'ChatOpenAI'
this.icon = 'openai.svg'
this.category = 'Chat Models'
this.description = 'Wrapper around OpenAI large language models that use the Chat endpoint'
this.baseClasses = [this.type, ...getBaseClasses(ChatOpenAI)]
this.baseClasses = [this.type, ...getBaseClasses(LangchainChatOpenAI)]
this.credential = {
label: 'Connect Credential',
name: 'credential',
@@ -168,6 +170,38 @@ class ChatOpenAI_ChatModels implements INode {
type: 'json',
optional: true,
additionalParams: true
},
{
label: 'Allow Image Uploads',
name: 'allowImageUploads',
type: 'boolean',
description:
'Automatically uses gpt-4-vision-preview when image is being uploaded from chat. Only works with LLMChain, Conversation Chain, ReAct Agent, and Conversational Agent',
default: false,
optional: true
},
{
label: 'Image Resolution',
description: 'This parameter controls the resolution in which the model views the image.',
name: 'imageResolution',
type: 'options',
options: [
{
label: 'Low',
name: 'low'
},
{
label: 'High',
name: 'high'
},
{
label: 'Auto',
name: 'auto'
}
],
default: 'low',
optional: false,
additionalParams: true
}
]
}
@@ -184,12 +218,17 @@ class ChatOpenAI_ChatModels implements INode {
const basePath = nodeData.inputs?.basepath as string
const baseOptions = nodeData.inputs?.baseOptions
const allowImageUploads = nodeData.inputs?.allowImageUploads as boolean
const imageResolution = nodeData.inputs?.imageResolution as string
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
const openAIApiKey = getCredentialParam('openAIApiKey', credentialData, nodeData)
const cache = nodeData.inputs?.cache as BaseCache
const obj: Partial<OpenAIChatInput> & BaseLLMParams & { openAIApiKey?: string } = {
const obj: Partial<OpenAIChatInput> &
Partial<AzureOpenAIInput> &
BaseChatModelParams & { configuration?: ClientOptions & LegacyOpenAIInput; multiModalOption?: IMultiModalOption } = {
temperature: parseFloat(temperature),
modelName,
openAIApiKey,
@@ -212,10 +251,24 @@ class ChatOpenAI_ChatModels implements INode {
throw new Error("Invalid JSON in the ChatOpenAI's BaseOptions: " + exception)
}
}
const model = new ChatOpenAI(obj, {
basePath,
baseOptions: parsedBaseOptions
})
if (basePath || parsedBaseOptions) {
obj.configuration = {
baseURL: basePath,
baseOptions: parsedBaseOptions
}
}
const multiModalOption: IMultiModalOption = {
image: {
allowImageUploads: allowImageUploads ?? false,
imageResolution
}
}
obj.multiModalOption = multiModalOption
const model = new ChatOpenAI(nodeData.id, obj)
return model
}
}
@@ -0,0 +1,38 @@
import type { ClientOptions } from 'openai'
import {
ChatOpenAI as LangchainChatOpenAI,
OpenAIChatInput,
LegacyOpenAIInput,
AzureOpenAIInput,
ChatOpenAICallOptions
} from '@langchain/openai'
import { BaseChatModelParams } from '@langchain/core/language_models/chat_models'
import { IMultiModalOption } from '../../../src'
import { BaseMessageLike, LLMResult } from 'langchain/schema'
import { Callbacks } from '@langchain/core/callbacks/manager'
export class ChatOpenAI extends LangchainChatOpenAI {
configuredModel: string
configuredMaxToken?: number
multiModalOption?: IMultiModalOption
id: string
constructor(
id: string,
fields?: Partial<OpenAIChatInput> &
Partial<AzureOpenAIInput> &
BaseChatModelParams & { configuration?: ClientOptions & LegacyOpenAIInput; multiModalOption?: IMultiModalOption },
/** @deprecated */
configuration?: ClientOptions & LegacyOpenAIInput
) {
super(fields, configuration)
this.id = id
this.multiModalOption = fields?.multiModalOption
this.configuredModel = fields?.modelName ?? 'gpt-3.5-turbo'
this.configuredMaxToken = fields?.maxTokens
}
async generate(messages: BaseMessageLike[][], options?: string[] | ChatOpenAICallOptions, callbacks?: Callbacks): Promise<LLMResult> {
return super.generate(messages, options, callbacks)
}
}
@@ -0,0 +1,33 @@
import { INode, INodeParams } from '../../../src/Interface'
class AssemblyAI_SpeechToText implements INode {
label: string
name: string
version: number
description: string
type: string
icon: string
category: string
baseClasses: string[]
inputs?: INodeParams[]
credential: INodeParams
constructor() {
this.label = 'AssemblyAI'
this.name = 'assemblyAI'
this.version = 1.0
this.type = 'AssemblyAI'
this.icon = 'assemblyai.png'
this.category = 'SpeechToText'
this.baseClasses = [this.type]
this.inputs = []
this.credential = {
label: 'Connect Credential',
name: 'credential',
type: 'credential',
credentialNames: ['assemblyAIApi']
}
}
}
module.exports = { nodeClass: AssemblyAI_SpeechToText }
Binary file not shown.

After

Width:  |  Height:  |  Size: 8.5 KiB

+1
View File
@@ -46,6 +46,7 @@
"@upstash/redis": "1.22.1",
"@zilliz/milvus2-sdk-node": "^2.2.24",
"apify-client": "^2.7.1",
"assemblyai": "^4.2.2",
"axios": "1.6.2",
"cheerio": "^1.0.0-rc.12",
"chromadb": "^1.5.11",
+29
View File
@@ -21,6 +21,8 @@ export type CommonType = string | number | boolean | undefined | null
export type MessageType = 'apiMessage' | 'userMessage'
export type ImageDetail = 'auto' | 'low' | 'high'
/**
* Others
*/
@@ -146,6 +148,33 @@ export interface IUsedTool {
toolOutput: string | object
}
export interface IFileUpload {
data?: string
type: string
name: string
mime: string
}
export interface IMultiModalOption {
image?: Record<string, any>
audio?: Record<string, any>
}
export type MessageContentText = {
type: 'text'
text: string
}
export type MessageContentImageUrl = {
type: 'image_url'
image_url:
| string
| {
url: string
detail?: ImageDetail
}
}
/**
* Classes
*/
+1
View File
@@ -6,3 +6,4 @@ dotenv.config({ path: envPath, override: true })
export * from './Interface'
export * from './utils'
export * from './speechToText'
@@ -0,0 +1,48 @@
import { ICommonObject, IFileUpload, IMultiModalOption, INodeData, MessageContentImageUrl } from './Interface'
import { ChatOpenAI as LangchainChatOpenAI } from 'langchain/chat_models/openai'
import path from 'path'
import { getStoragePath } from './utils'
import fs from 'fs'
export const addImagesToMessages = (
nodeData: INodeData,
options: ICommonObject,
multiModalOption?: IMultiModalOption
): MessageContentImageUrl[] => {
const imageContent: MessageContentImageUrl[] = []
let model = nodeData.inputs?.model
if (model instanceof LangchainChatOpenAI && multiModalOption) {
// Image Uploaded
if (multiModalOption.image && multiModalOption.image.allowImageUploads && options?.uploads && options?.uploads.length > 0) {
const imageUploads = getImageUploads(options.uploads)
for (const upload of imageUploads) {
let bf = upload.data
if (upload.type == 'stored-file') {
const filePath = path.join(getStoragePath(), options.chatflowid, options.chatId, upload.name)
// as the image is stored in the server, read the file and convert it to base64
const contents = fs.readFileSync(filePath)
bf = 'data:' + upload.mime + ';base64,' + contents.toString('base64')
imageContent.push({
type: 'image_url',
image_url: {
url: bf,
detail: multiModalOption.image.imageResolution ?? 'low'
}
})
}
}
}
}
return imageContent
}
export const getAudioUploads = (uploads: IFileUpload[]) => {
return uploads.filter((upload: IFileUpload) => upload.mime.startsWith('audio/'))
}
export const getImageUploads = (uploads: IFileUpload[]) => {
return uploads.filter((upload: IFileUpload) => upload.mime.startsWith('image/'))
}
+51
View File
@@ -0,0 +1,51 @@
import { ICommonObject, IFileUpload } from './Interface'
import { getCredentialData, getStoragePath } from './utils'
import { type ClientOptions, OpenAIClient } from '@langchain/openai'
import fs from 'fs'
import path from 'path'
import { AssemblyAI } from 'assemblyai'
export const convertSpeechToText = async (upload: IFileUpload, speechToTextConfig: ICommonObject, options: ICommonObject) => {
if (speechToTextConfig) {
const credentialId = speechToTextConfig.credentialId as string
const credentialData = await getCredentialData(credentialId ?? '', options)
const filePath = path.join(getStoragePath(), options.chatflowid, options.chatId, upload.name)
const audio_file = fs.createReadStream(filePath)
if (speechToTextConfig.name === 'openAIWhisper') {
const openAIClientOptions: ClientOptions = {
apiKey: credentialData.openAIApiKey
}
const openAIClient = new OpenAIClient(openAIClientOptions)
const transcription = await openAIClient.audio.transcriptions.create({
file: audio_file,
model: 'whisper-1',
language: speechToTextConfig?.language,
temperature: speechToTextConfig?.temperature ? parseFloat(speechToTextConfig.temperature) : undefined,
prompt: speechToTextConfig?.prompt
})
if (transcription?.text) {
return transcription.text
}
} else if (speechToTextConfig.name === 'assemblyAiTranscribe') {
const client = new AssemblyAI({
apiKey: credentialData.assemblyAIApiKey
})
const params = {
audio: audio_file,
speaker_labels: false
}
const transcription = await client.transcripts.transcribe(params)
if (transcription?.text) {
return transcription.text
}
}
} else {
throw new Error('Speech to text is not selected, but found a recorded audio file. Please fix the chain.')
}
return undefined
}
+7
View File
@@ -770,3 +770,10 @@ export const prepareSandboxVars = (variables: IVariable[]) => {
}
return vars
}
/**
* Prepare storage path
*/
export const getStoragePath = (): string => {
return process.env.BLOB_STORAGE_PATH ? path.join(process.env.BLOB_STORAGE_PATH) : path.join(getUserHome(), '.flowise', 'storage')
}