mirror of
https://github.com/farcasclaudiu/Flowise.git
synced 2026-06-28 17:01:00 +03:00
MultiModal : Adding functionality to base OpenAI Chat Model
This commit is contained in:
@@ -3,6 +3,7 @@ import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../
|
||||
import { ChatOpenAI, OpenAIChatInput } from 'langchain/chat_models/openai'
|
||||
import { BaseCache } from 'langchain/schema'
|
||||
import { BaseLLMParams } from 'langchain/llms/base'
|
||||
import { FlowiseChatOpenAI } from './FlowiseChatOpenAI'
|
||||
|
||||
class ChatOpenAI_ChatModels implements INode {
|
||||
label: string
|
||||
@@ -157,13 +158,7 @@ class ChatOpenAI_ChatModels implements INode {
|
||||
label: 'Allow Image Uploads',
|
||||
name: 'allowImageUploads',
|
||||
type: 'boolean',
|
||||
default: false,
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
label: 'Allow Audio Uploads',
|
||||
name: 'allowAudioUploads',
|
||||
type: 'boolean',
|
||||
description: 'Enabling this option, would default the model to gpt-4-vision-preview',
|
||||
default: false,
|
||||
optional: true
|
||||
},
|
||||
@@ -236,7 +231,6 @@ class ChatOpenAI_ChatModels implements INode {
|
||||
const baseOptions = nodeData.inputs?.baseOptions
|
||||
|
||||
const allowImageUploads = nodeData.inputs?.allowImageUploads as boolean
|
||||
const allowAudioUploads = nodeData.inputs?.allowAudioUploads as boolean
|
||||
const allowSpeechToText = nodeData.inputs?.allowSpeechToText as boolean
|
||||
const speechToTextMode = nodeData.inputs?.speechToTextMode as string
|
||||
const imageResolution = nodeData.inputs?.imageResolution as string
|
||||
@@ -269,24 +263,18 @@ class ChatOpenAI_ChatModels implements INode {
|
||||
throw new Error("Invalid JSON in the ChatOpenAI's BaseOptions: " + exception)
|
||||
}
|
||||
}
|
||||
const model = new ChatOpenAI(obj, {
|
||||
basePath,
|
||||
const model = new FlowiseChatOpenAI(obj, {
|
||||
baseURL: basePath,
|
||||
baseOptions: parsedBaseOptions
|
||||
})
|
||||
|
||||
const multiModal = {
|
||||
allowImageUploads: allowImageUploads ?? false,
|
||||
allowAudioUploads: allowAudioUploads ?? false,
|
||||
allowSpeechToText: allowSpeechToText ?? false,
|
||||
imageResolution,
|
||||
speechToTextMode
|
||||
}
|
||||
Object.defineProperty(model, 'multiModal', {
|
||||
enumerable: true,
|
||||
configurable: true,
|
||||
writable: true,
|
||||
value: multiModal
|
||||
})
|
||||
model.multiModal = multiModal
|
||||
return model
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,71 @@
|
||||
import { ChatOpenAI, OpenAIChatInput } from 'langchain/chat_models/openai'
|
||||
import { BaseChatModelParams } from 'langchain/chat_models/base'
|
||||
import type { ClientOptions } from 'openai'
|
||||
import type { LegacyOpenAIInput } from '@langchain/openai/dist/types'
|
||||
import { BaseLanguageModelInput } from 'langchain/base_language'
|
||||
import { ChatOpenAICallOptions } from '@langchain/openai/dist/chat_models'
|
||||
import { BaseMessageChunk, BaseMessageLike, HumanMessage, LLMResult } from 'langchain/schema'
|
||||
import { Callbacks } from '@langchain/core/callbacks/manager'
|
||||
import { ICommonObject, INodeData } from '../../../src'
|
||||
import { addImagesToMessages, checkSpeechToText } from '../../../src/MultiModalUtils'
|
||||
import { ChatPromptTemplate, PromptTemplate } from 'langchain/prompts'
|
||||
|
||||
export class FlowiseChatOpenAI extends ChatOpenAI {
|
||||
multiModal: {}
|
||||
//TODO: Should be class variables and not static
|
||||
public static chainNodeData: INodeData
|
||||
public static chainNodeOptions: ICommonObject
|
||||
|
||||
constructor(
|
||||
fields?: Partial<OpenAIChatInput> & BaseChatModelParams & { openAIApiKey?: string },
|
||||
/** @deprecated */
|
||||
configuration?: ClientOptions & LegacyOpenAIInput
|
||||
) {
|
||||
super(fields)
|
||||
}
|
||||
|
||||
async invoke(input: BaseLanguageModelInput, options?: ChatOpenAICallOptions): Promise<BaseMessageChunk> {
|
||||
//input.messages
|
||||
return super.invoke(input, options)
|
||||
}
|
||||
|
||||
async generate(messages: BaseMessageLike[][], options?: string[] | ChatOpenAICallOptions, callbacks?: Callbacks): Promise<LLMResult> {
|
||||
//messages
|
||||
await this.injectMultiModalMessages(messages)
|
||||
return super.generate(messages, options, callbacks)
|
||||
}
|
||||
|
||||
private async injectMultiModalMessages(messages: BaseMessageLike[][]) {
|
||||
const nodeData = FlowiseChatOpenAI.chainNodeData
|
||||
const optionsData = FlowiseChatOpenAI.chainNodeOptions
|
||||
let audioTrans = await checkSpeechToText(nodeData, optionsData)
|
||||
if (audioTrans) {
|
||||
if (messages.length > 0) {
|
||||
const lastMessage = messages[0].pop() as HumanMessage
|
||||
if (!nodeData.inputs?.prompt) {
|
||||
lastMessage.content = audioTrans
|
||||
} else if (nodeData.inputs?.prompt instanceof ChatPromptTemplate) {
|
||||
lastMessage.content = audioTrans
|
||||
} else if (nodeData.inputs?.prompt instanceof PromptTemplate) {
|
||||
let prompt = nodeData.inputs?.prompt as PromptTemplate
|
||||
let inputVar = prompt.inputVariables[0]
|
||||
let formattedValues: any = {}
|
||||
formattedValues[inputVar] = audioTrans
|
||||
lastMessage.content = await prompt.format(formattedValues)
|
||||
}
|
||||
messages[0].push(lastMessage)
|
||||
}
|
||||
}
|
||||
const messageContent = addImagesToMessages(nodeData, optionsData)
|
||||
if (messageContent) {
|
||||
if (messages[0].length > 0 && messages[0][messages[0].length - 1] instanceof HumanMessage) {
|
||||
const lastMessage = messages[0].pop()
|
||||
if (lastMessage instanceof HumanMessage) {
|
||||
lastMessage.content = messageContent
|
||||
this.modelName = 'gpt-4-vision-preview'
|
||||
}
|
||||
messages[0].push(lastMessage as HumanMessage)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user