MultiModal : Adding functionality to base OpenAI Chat Model

This commit is contained in:
vinodkiran
2024-01-24 18:25:22 +05:30
parent 318686e622
commit 3ce22d0dde
7 changed files with 97 additions and 48 deletions
@@ -3,6 +3,7 @@ import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../
import { ChatOpenAI, OpenAIChatInput } from 'langchain/chat_models/openai'
import { BaseCache } from 'langchain/schema'
import { BaseLLMParams } from 'langchain/llms/base'
import { FlowiseChatOpenAI } from './FlowiseChatOpenAI'
class ChatOpenAI_ChatModels implements INode {
label: string
@@ -157,13 +158,7 @@ class ChatOpenAI_ChatModels implements INode {
label: 'Allow Image Uploads',
name: 'allowImageUploads',
type: 'boolean',
default: false,
optional: true
},
{
label: 'Allow Audio Uploads',
name: 'allowAudioUploads',
type: 'boolean',
description: 'Enabling this option, would default the model to gpt-4-vision-preview',
default: false,
optional: true
},
@@ -236,7 +231,6 @@ class ChatOpenAI_ChatModels implements INode {
const baseOptions = nodeData.inputs?.baseOptions
const allowImageUploads = nodeData.inputs?.allowImageUploads as boolean
const allowAudioUploads = nodeData.inputs?.allowAudioUploads as boolean
const allowSpeechToText = nodeData.inputs?.allowSpeechToText as boolean
const speechToTextMode = nodeData.inputs?.speechToTextMode as string
const imageResolution = nodeData.inputs?.imageResolution as string
@@ -269,24 +263,18 @@ class ChatOpenAI_ChatModels implements INode {
throw new Error("Invalid JSON in the ChatOpenAI's BaseOptions: " + exception)
}
}
const model = new ChatOpenAI(obj, {
basePath,
const model = new FlowiseChatOpenAI(obj, {
baseURL: basePath,
baseOptions: parsedBaseOptions
})
const multiModal = {
allowImageUploads: allowImageUploads ?? false,
allowAudioUploads: allowAudioUploads ?? false,
allowSpeechToText: allowSpeechToText ?? false,
imageResolution,
speechToTextMode
}
Object.defineProperty(model, 'multiModal', {
enumerable: true,
configurable: true,
writable: true,
value: multiModal
})
model.multiModal = multiModal
return model
}
}
@@ -0,0 +1,71 @@
import { ChatOpenAI, OpenAIChatInput } from 'langchain/chat_models/openai'
import { BaseChatModelParams } from 'langchain/chat_models/base'
import type { ClientOptions } from 'openai'
import type { LegacyOpenAIInput } from '@langchain/openai/dist/types'
import { BaseLanguageModelInput } from 'langchain/base_language'
import { ChatOpenAICallOptions } from '@langchain/openai/dist/chat_models'
import { BaseMessageChunk, BaseMessageLike, HumanMessage, LLMResult } from 'langchain/schema'
import { Callbacks } from '@langchain/core/callbacks/manager'
import { ICommonObject, INodeData } from '../../../src'
import { addImagesToMessages, checkSpeechToText } from '../../../src/MultiModalUtils'
import { ChatPromptTemplate, PromptTemplate } from 'langchain/prompts'
export class FlowiseChatOpenAI extends ChatOpenAI {
multiModal: {}
//TODO: Should be class variables and not static
public static chainNodeData: INodeData
public static chainNodeOptions: ICommonObject
constructor(
fields?: Partial<OpenAIChatInput> & BaseChatModelParams & { openAIApiKey?: string },
/** @deprecated */
configuration?: ClientOptions & LegacyOpenAIInput
) {
super(fields)
}
async invoke(input: BaseLanguageModelInput, options?: ChatOpenAICallOptions): Promise<BaseMessageChunk> {
//input.messages
return super.invoke(input, options)
}
async generate(messages: BaseMessageLike[][], options?: string[] | ChatOpenAICallOptions, callbacks?: Callbacks): Promise<LLMResult> {
//messages
await this.injectMultiModalMessages(messages)
return super.generate(messages, options, callbacks)
}
private async injectMultiModalMessages(messages: BaseMessageLike[][]) {
const nodeData = FlowiseChatOpenAI.chainNodeData
const optionsData = FlowiseChatOpenAI.chainNodeOptions
let audioTrans = await checkSpeechToText(nodeData, optionsData)
if (audioTrans) {
if (messages.length > 0) {
const lastMessage = messages[0].pop() as HumanMessage
if (!nodeData.inputs?.prompt) {
lastMessage.content = audioTrans
} else if (nodeData.inputs?.prompt instanceof ChatPromptTemplate) {
lastMessage.content = audioTrans
} else if (nodeData.inputs?.prompt instanceof PromptTemplate) {
let prompt = nodeData.inputs?.prompt as PromptTemplate
let inputVar = prompt.inputVariables[0]
let formattedValues: any = {}
formattedValues[inputVar] = audioTrans
lastMessage.content = await prompt.format(formattedValues)
}
messages[0].push(lastMessage)
}
}
const messageContent = addImagesToMessages(nodeData, optionsData)
if (messageContent) {
if (messages[0].length > 0 && messages[0][messages[0].length - 1] instanceof HumanMessage) {
const lastMessage = messages[0].pop()
if (lastMessage instanceof HumanMessage) {
lastMessage.content = messageContent
this.modelName = 'gpt-4-vision-preview'
}
messages[0].push(lastMessage as HumanMessage)
}
}
}
}