mirror of
https://github.com/farcasclaudiu/Flowise.git
synced 2026-06-28 07:00:49 +03:00
MultiModal : Adding functionality to base OpenAI Chat Model
This commit is contained in:
@@ -8,8 +8,7 @@ import { flatten } from 'lodash'
|
||||
import { Document } from 'langchain/document'
|
||||
import { RunnableSequence } from 'langchain/schema/runnable'
|
||||
import { StringOutputParser } from 'langchain/schema/output_parser'
|
||||
import { addImagesToMessages, processSpeechToText } from '../../../src/MultiModalUtils'
|
||||
import { HumanMessage } from 'langchain/schema'
|
||||
import { injectChainNodeData } from '../../../src/MultiModalUtils'
|
||||
|
||||
let systemMessage = `The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.`
|
||||
const inputKey = 'input'
|
||||
@@ -75,7 +74,7 @@ class ConversationChain_Chains implements INode {
|
||||
|
||||
async run(nodeData: INodeData, input: string, options: ICommonObject): Promise<string> {
|
||||
const memory = nodeData.inputs?.memory
|
||||
input = await processSpeechToText(nodeData, input, options)
|
||||
injectChainNodeData(nodeData, options)
|
||||
|
||||
const chain = prepareChain(nodeData, options, this.sessionId)
|
||||
|
||||
@@ -132,24 +131,12 @@ const prepareChatPrompt = (nodeData: INodeData, options: ICommonObject) => {
|
||||
|
||||
if (finalText) systemMessage = `${systemMessage}\nThe AI has the following context:\n${finalText}`
|
||||
|
||||
// TODO: add audio uploads
|
||||
// if (options.uploads.length > 0) {
|
||||
// const audioUploads = getAudioUploads(options.uploads)
|
||||
// for (const upload of audioUploads) {
|
||||
// await this.processAudioWithWhisper(upload, chatMessages)
|
||||
// }
|
||||
// }
|
||||
const imageContent = addImagesToMessages(nodeData, options)
|
||||
|
||||
//TODO, this should not be any[], what interface should it be?
|
||||
let promptMessages: any[] = [
|
||||
SystemMessagePromptTemplate.fromTemplate(prompt ? `${prompt}\n${systemMessage}` : systemMessage),
|
||||
new MessagesPlaceholder(memory.memoryKey ?? 'chat_history'),
|
||||
HumanMessagePromptTemplate.fromTemplate(`{${inputKey}}`)
|
||||
]
|
||||
if (imageContent.length > 0) {
|
||||
promptMessages.push(new HumanMessage({ content: imageContent }))
|
||||
}
|
||||
const chatPrompt = ChatPromptTemplate.fromMessages(promptMessages)
|
||||
|
||||
return chatPrompt
|
||||
|
||||
@@ -8,6 +8,7 @@ import { formatResponse, injectOutputParser } from '../../outputparsers/OutputPa
|
||||
import { BaseLLMOutputParser } from 'langchain/schema/output_parser'
|
||||
import { OutputFixingParser } from 'langchain/output_parsers'
|
||||
import { checkInputs, Moderation, streamResponse } from '../../moderation/Moderation'
|
||||
import { injectChainNodeData } from '../../../src/MultiModalUtils'
|
||||
|
||||
class LLMChain_Chains implements INode {
|
||||
label: string
|
||||
@@ -129,6 +130,7 @@ class LLMChain_Chains implements INode {
|
||||
if (!this.outputParser && outputParser) {
|
||||
this.outputParser = outputParser
|
||||
}
|
||||
injectChainNodeData(nodeData, options)
|
||||
promptValues = injectOutputParser(this.outputParser, chain, promptValues)
|
||||
const res = await runPrediction(inputVariables, chain, input, promptValues, options, nodeData)
|
||||
// eslint-disable-next-line no-console
|
||||
|
||||
@@ -3,6 +3,7 @@ import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../
|
||||
import { ChatOpenAI, OpenAIChatInput } from 'langchain/chat_models/openai'
|
||||
import { BaseCache } from 'langchain/schema'
|
||||
import { BaseLLMParams } from 'langchain/llms/base'
|
||||
import { FlowiseChatOpenAI } from './FlowiseChatOpenAI'
|
||||
|
||||
class ChatOpenAI_ChatModels implements INode {
|
||||
label: string
|
||||
@@ -157,13 +158,7 @@ class ChatOpenAI_ChatModels implements INode {
|
||||
label: 'Allow Image Uploads',
|
||||
name: 'allowImageUploads',
|
||||
type: 'boolean',
|
||||
default: false,
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
label: 'Allow Audio Uploads',
|
||||
name: 'allowAudioUploads',
|
||||
type: 'boolean',
|
||||
description: 'Enabling this option, would default the model to gpt-4-vision-preview',
|
||||
default: false,
|
||||
optional: true
|
||||
},
|
||||
@@ -236,7 +231,6 @@ class ChatOpenAI_ChatModels implements INode {
|
||||
const baseOptions = nodeData.inputs?.baseOptions
|
||||
|
||||
const allowImageUploads = nodeData.inputs?.allowImageUploads as boolean
|
||||
const allowAudioUploads = nodeData.inputs?.allowAudioUploads as boolean
|
||||
const allowSpeechToText = nodeData.inputs?.allowSpeechToText as boolean
|
||||
const speechToTextMode = nodeData.inputs?.speechToTextMode as string
|
||||
const imageResolution = nodeData.inputs?.imageResolution as string
|
||||
@@ -269,24 +263,18 @@ class ChatOpenAI_ChatModels implements INode {
|
||||
throw new Error("Invalid JSON in the ChatOpenAI's BaseOptions: " + exception)
|
||||
}
|
||||
}
|
||||
const model = new ChatOpenAI(obj, {
|
||||
basePath,
|
||||
const model = new FlowiseChatOpenAI(obj, {
|
||||
baseURL: basePath,
|
||||
baseOptions: parsedBaseOptions
|
||||
})
|
||||
|
||||
const multiModal = {
|
||||
allowImageUploads: allowImageUploads ?? false,
|
||||
allowAudioUploads: allowAudioUploads ?? false,
|
||||
allowSpeechToText: allowSpeechToText ?? false,
|
||||
imageResolution,
|
||||
speechToTextMode
|
||||
}
|
||||
Object.defineProperty(model, 'multiModal', {
|
||||
enumerable: true,
|
||||
configurable: true,
|
||||
writable: true,
|
||||
value: multiModal
|
||||
})
|
||||
model.multiModal = multiModal
|
||||
return model
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,71 @@
|
||||
import { ChatOpenAI, OpenAIChatInput } from 'langchain/chat_models/openai'
|
||||
import { BaseChatModelParams } from 'langchain/chat_models/base'
|
||||
import type { ClientOptions } from 'openai'
|
||||
import type { LegacyOpenAIInput } from '@langchain/openai/dist/types'
|
||||
import { BaseLanguageModelInput } from 'langchain/base_language'
|
||||
import { ChatOpenAICallOptions } from '@langchain/openai/dist/chat_models'
|
||||
import { BaseMessageChunk, BaseMessageLike, HumanMessage, LLMResult } from 'langchain/schema'
|
||||
import { Callbacks } from '@langchain/core/callbacks/manager'
|
||||
import { ICommonObject, INodeData } from '../../../src'
|
||||
import { addImagesToMessages, checkSpeechToText } from '../../../src/MultiModalUtils'
|
||||
import { ChatPromptTemplate, PromptTemplate } from 'langchain/prompts'
|
||||
|
||||
export class FlowiseChatOpenAI extends ChatOpenAI {
|
||||
multiModal: {}
|
||||
//TODO: Should be class variables and not static
|
||||
public static chainNodeData: INodeData
|
||||
public static chainNodeOptions: ICommonObject
|
||||
|
||||
constructor(
|
||||
fields?: Partial<OpenAIChatInput> & BaseChatModelParams & { openAIApiKey?: string },
|
||||
/** @deprecated */
|
||||
configuration?: ClientOptions & LegacyOpenAIInput
|
||||
) {
|
||||
super(fields)
|
||||
}
|
||||
|
||||
async invoke(input: BaseLanguageModelInput, options?: ChatOpenAICallOptions): Promise<BaseMessageChunk> {
|
||||
//input.messages
|
||||
return super.invoke(input, options)
|
||||
}
|
||||
|
||||
async generate(messages: BaseMessageLike[][], options?: string[] | ChatOpenAICallOptions, callbacks?: Callbacks): Promise<LLMResult> {
|
||||
//messages
|
||||
await this.injectMultiModalMessages(messages)
|
||||
return super.generate(messages, options, callbacks)
|
||||
}
|
||||
|
||||
private async injectMultiModalMessages(messages: BaseMessageLike[][]) {
|
||||
const nodeData = FlowiseChatOpenAI.chainNodeData
|
||||
const optionsData = FlowiseChatOpenAI.chainNodeOptions
|
||||
let audioTrans = await checkSpeechToText(nodeData, optionsData)
|
||||
if (audioTrans) {
|
||||
if (messages.length > 0) {
|
||||
const lastMessage = messages[0].pop() as HumanMessage
|
||||
if (!nodeData.inputs?.prompt) {
|
||||
lastMessage.content = audioTrans
|
||||
} else if (nodeData.inputs?.prompt instanceof ChatPromptTemplate) {
|
||||
lastMessage.content = audioTrans
|
||||
} else if (nodeData.inputs?.prompt instanceof PromptTemplate) {
|
||||
let prompt = nodeData.inputs?.prompt as PromptTemplate
|
||||
let inputVar = prompt.inputVariables[0]
|
||||
let formattedValues: any = {}
|
||||
formattedValues[inputVar] = audioTrans
|
||||
lastMessage.content = await prompt.format(formattedValues)
|
||||
}
|
||||
messages[0].push(lastMessage)
|
||||
}
|
||||
}
|
||||
const messageContent = addImagesToMessages(nodeData, optionsData)
|
||||
if (messageContent) {
|
||||
if (messages[0].length > 0 && messages[0][messages[0].length - 1] instanceof HumanMessage) {
|
||||
const lastMessage = messages[0].pop()
|
||||
if (lastMessage instanceof HumanMessage) {
|
||||
lastMessage.content = messageContent
|
||||
this.modelName = 'gpt-4-vision-preview'
|
||||
}
|
||||
messages[0].push(lastMessage as HumanMessage)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -6,15 +6,26 @@ import path from 'path'
|
||||
import { getUserHome } from './utils'
|
||||
import fs from 'fs'
|
||||
import { MessageContent } from '@langchain/core/dist/messages'
|
||||
import { FlowiseChatOpenAI } from '../nodes/chatmodels/ChatOpenAI/FlowiseChatOpenAI'
|
||||
|
||||
export const processSpeechToText = async (nodeData: INodeData, input: string, options: ICommonObject) => {
|
||||
export const injectChainNodeData = (nodeData: INodeData, options: ICommonObject) => {
|
||||
let model = nodeData.inputs?.model as BaseChatModel
|
||||
|
||||
if (model instanceof FlowiseChatOpenAI) {
|
||||
// TODO: this should not be static, need to figure out how to pass the nodeData and options to the invoke method
|
||||
FlowiseChatOpenAI.chainNodeOptions = options
|
||||
FlowiseChatOpenAI.chainNodeData = nodeData
|
||||
}
|
||||
}
|
||||
|
||||
export const checkSpeechToText = async (nodeData: INodeData, options: ICommonObject) => {
|
||||
const MODEL_NAME = 'whisper-1'
|
||||
|
||||
let input = undefined
|
||||
let model = nodeData.inputs?.model as BaseChatModel
|
||||
if (model instanceof ChatOpenAI && (model as any).multiModal) {
|
||||
const multiModalConfig = (model as any).multiModal
|
||||
if (options?.uploads) {
|
||||
if (options.uploads.length === 1 && input.length === 0 && options.uploads[0].mime === 'audio/webm') {
|
||||
if (options.uploads.length === 1 && options.uploads[0].mime === 'audio/webm') {
|
||||
const upload = options.uploads[0]
|
||||
//special case, text input is empty, but we have an upload (recorded audio)
|
||||
if (multiModalConfig.allowSpeechToText) {
|
||||
|
||||
@@ -474,7 +474,6 @@ export class App {
|
||||
const allowances: IUploadFileSizeAndTypes[] = []
|
||||
let allowSpeechToText = false
|
||||
let allowImageUploads = false
|
||||
let allowAudioUploads = false
|
||||
flowObj.nodes.forEach((node: IReactFlowNode) => {
|
||||
if (uploadAllowedCategoryNodes.indexOf(node.data.category) > -1) {
|
||||
logger.debug(`[server]: Found Eligible Node ${node.data.type}, Allowing Uploads.`)
|
||||
@@ -484,18 +483,11 @@ export class App {
|
||||
node.data.inputParams.map((param: INodeParams) => {
|
||||
if (param.name === 'allowImageUploads' && node.data.inputs?.['allowImageUploads'] && !allowImageUploads) {
|
||||
allowances.push({
|
||||
fileTypes: 'image/gif;image/jpeg;image/png;image/webp'.split(';'),
|
||||
fileTypes: 'image/gif;image/jpeg;image/png;image/webp;'.split(';'),
|
||||
maxUploadSize: 5
|
||||
})
|
||||
allowImageUploads = true
|
||||
}
|
||||
if (param.name === 'allowAudioUploads' && node.data.inputs?.['allowAudioUploads'] && !allowAudioUploads) {
|
||||
allowances.push({
|
||||
fileTypes: 'audio/mpeg;audio/x-wav;audio/mp4'.split(';'),
|
||||
maxUploadSize: 5
|
||||
})
|
||||
allowAudioUploads = true
|
||||
}
|
||||
if (param.name === 'allowSpeechToText' && node.data.inputs?.['allowSpeechToText']) {
|
||||
allowSpeechToText = true
|
||||
}
|
||||
|
||||
@@ -23,7 +23,7 @@ import {
|
||||
Typography
|
||||
} from '@mui/material'
|
||||
import { useTheme } from '@mui/material/styles'
|
||||
import { IconCircleDot, IconDownload, IconSend, IconMicrophone, IconPhotoPlus, IconSquare, IconTrash, IconX } from '@tabler/icons'
|
||||
import { IconCircleDot, IconDownload, IconSend, IconMicrophone, IconPhotoPlus, IconTrash, IconX } from '@tabler/icons'
|
||||
import robotPNG from 'assets/images/robot.png'
|
||||
import userPNG from 'assets/images/account.png'
|
||||
import audioUploadSVG from 'assets/images/wave-sound.jpg'
|
||||
@@ -897,9 +897,7 @@ export const ChatMessage = ({ open, chatflowid, isDialog }) => {
|
||||
<IconX color={loading || !chatflowid ? '#9e9e9e' : customization.isDarkMode ? 'white' : '#1e88e5'} />
|
||||
</IconButton>
|
||||
<IconButton onClick={onRecordingStopped} size='small'>
|
||||
<IconSquare
|
||||
color={loading || !chatflowid ? '#9e9e9e' : customization.isDarkMode ? 'white' : '#1e88e5'}
|
||||
/>
|
||||
<IconSend color={loading || !chatflowid ? '#9e9e9e' : customization.isDarkMode ? 'white' : '#1e88e5'} />
|
||||
</IconButton>
|
||||
</div>
|
||||
</Box>
|
||||
|
||||
Reference in New Issue
Block a user