MultiModal : Adding functionality to base OpenAI Chat Model

This commit is contained in:
vinodkiran
2024-01-24 18:25:22 +05:30
parent 318686e622
commit 3ce22d0dde
7 changed files with 97 additions and 48 deletions
@@ -8,8 +8,7 @@ import { flatten } from 'lodash'
import { Document } from 'langchain/document'
import { RunnableSequence } from 'langchain/schema/runnable'
import { StringOutputParser } from 'langchain/schema/output_parser'
import { addImagesToMessages, processSpeechToText } from '../../../src/MultiModalUtils'
import { HumanMessage } from 'langchain/schema'
import { injectChainNodeData } from '../../../src/MultiModalUtils'
let systemMessage = `The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.`
const inputKey = 'input'
@@ -75,7 +74,7 @@ class ConversationChain_Chains implements INode {
async run(nodeData: INodeData, input: string, options: ICommonObject): Promise<string> {
const memory = nodeData.inputs?.memory
input = await processSpeechToText(nodeData, input, options)
injectChainNodeData(nodeData, options)
const chain = prepareChain(nodeData, options, this.sessionId)
@@ -132,24 +131,12 @@ const prepareChatPrompt = (nodeData: INodeData, options: ICommonObject) => {
if (finalText) systemMessage = `${systemMessage}\nThe AI has the following context:\n${finalText}`
// TODO: add audio uploads
// if (options.uploads.length > 0) {
// const audioUploads = getAudioUploads(options.uploads)
// for (const upload of audioUploads) {
// await this.processAudioWithWhisper(upload, chatMessages)
// }
// }
const imageContent = addImagesToMessages(nodeData, options)
//TODO, this should not be any[], what interface should it be?
let promptMessages: any[] = [
SystemMessagePromptTemplate.fromTemplate(prompt ? `${prompt}\n${systemMessage}` : systemMessage),
new MessagesPlaceholder(memory.memoryKey ?? 'chat_history'),
HumanMessagePromptTemplate.fromTemplate(`{${inputKey}}`)
]
if (imageContent.length > 0) {
promptMessages.push(new HumanMessage({ content: imageContent }))
}
const chatPrompt = ChatPromptTemplate.fromMessages(promptMessages)
return chatPrompt
@@ -8,6 +8,7 @@ import { formatResponse, injectOutputParser } from '../../outputparsers/OutputPa
import { BaseLLMOutputParser } from 'langchain/schema/output_parser'
import { OutputFixingParser } from 'langchain/output_parsers'
import { checkInputs, Moderation, streamResponse } from '../../moderation/Moderation'
import { injectChainNodeData } from '../../../src/MultiModalUtils'
class LLMChain_Chains implements INode {
label: string
@@ -129,6 +130,7 @@ class LLMChain_Chains implements INode {
if (!this.outputParser && outputParser) {
this.outputParser = outputParser
}
injectChainNodeData(nodeData, options)
promptValues = injectOutputParser(this.outputParser, chain, promptValues)
const res = await runPrediction(inputVariables, chain, input, promptValues, options, nodeData)
// eslint-disable-next-line no-console