GPT Vision: Vision Chain Node update along with addition of chatid folder on the server side when saving messages.

2026-06-28 15:00:57 +03:00 · 2023-12-07 22:32:07 +05:30
parent b492153f8a
commit 68fbe0ea12
4 changed files with 112 additions and 42 deletions
@@ -1,10 +1,8 @@
 import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
-import { getBaseClasses, handleEscapeCharacters } from '../../../src/utils'
-import { VLLMChain } from './VLLMChain'
-import { BaseLanguageModel } from 'langchain/base_language'
+import { getBaseClasses, getCredentialData, getCredentialParam, handleEscapeCharacters } from '../../../src/utils'
+import { OpenAIVisionChainInput, VLLMChain } from './VLLMChain'
 import { ConsoleCallbackHandler, CustomChainHandler, additionalCallbacks } from '../../../src/handler'
 import { formatResponse } from '../../outputparsers/OutputParserHelpers'
-import { ChatOpenAI } from 'langchain/chat_models/openai'

 class OpenAIVisionChain_Chains implements INode {
    label: string
@@ -18,6 +16,7 @@ class OpenAIVisionChain_Chains implements INode {
    description: string
    inputs: INodeParams[]
    outputs: INodeOutputsValue[]
+    credential: INodeParams

    constructor() {
        this.label = 'Open AI Vision Chain'
@@ -26,14 +25,28 @@ class OpenAIVisionChain_Chains implements INode {
        this.type = 'OpenAIVisionChain'
        this.icon = 'chain.svg'
        this.category = 'Chains'
-        this.badge = 'EXPERIMENTAL'
+        this.badge = 'BETA'
        this.description = 'Chain to run queries against OpenAI (GPT-4) Vision .'
        this.baseClasses = [this.type, ...getBaseClasses(VLLMChain)]
+        this.credential = {
+            label: 'Connect Credential',
+            name: 'credential',
+            type: 'credential',
+            credentialNames: ['openAIApi']
+        }
        this.inputs = [
            {
-                label: 'Language Model (Works only with Open AI [gpt-4-vision-preview])',
-                name: 'model',
-                type: 'BaseLanguageModel'
+                label: 'Model Name',
+                name: 'modelName',
+                type: 'options',
+                options: [
+                    {
+                        label: 'gpt-4-vision-preview',
+                        name: 'gpt-4-vision-preview'
+                    }
+                ],
+                default: 'gpt-4-vision-preview',
+                optional: true
            },
            {
                label: 'Prompt',
@@ -57,7 +70,33 @@ class OpenAIVisionChain_Chains implements INode {
                    }
                ],
                default: 'low',
-                optional: false
+                optional: false,
+                additionalParams: true
+            },
+            {
+                label: 'Temperature',
+                name: 'temperature',
+                type: 'number',
+                step: 0.1,
+                default: 0.9,
+                optional: true,
+                additionalParams: true
+            },
+            {
+                label: 'Top Probability',
+                name: 'topP',
+                type: 'number',
+                step: 0.1,
+                optional: true,
+                additionalParams: true
+            },
+            {
+                label: 'Max Tokens',
+                name: 'maxTokens',
+                type: 'number',
+                step: 1,
+                optional: true,
+                additionalParams: true
            },
            {
                label: 'Chain Name',
@@ -96,22 +135,26 @@ class OpenAIVisionChain_Chains implements INode {
    }

    async init(nodeData: INodeData, input: string, options: ICommonObject): Promise<any> {
-        const model = nodeData.inputs?.model as BaseLanguageModel
        const prompt = nodeData.inputs?.prompt
        const output = nodeData.outputs?.output as string
        const imageResolution = nodeData.inputs?.imageResolution
        const promptValues = prompt.promptValues as ICommonObject
-        if (!(model as any).openAIApiKey || (model as any).modelName !== 'gpt-4-vision-preview') {
-            throw new Error('Chain works with OpenAI Vision model only')
-        }
-        const openAIModel = model as ChatOpenAI
-        const fields = {
-            openAIApiKey: openAIModel.openAIApiKey,
+        const credentialData = await getCredentialData(nodeData.credential ?? '', options)
+        const openAIApiKey = getCredentialParam('openAIApiKey', credentialData, nodeData)
+        const temperature = nodeData.inputs?.temperature as string
+        const modelName = nodeData.inputs?.modelName as string
+        const maxTokens = nodeData.inputs?.maxTokens as string
+        const topP = nodeData.inputs?.topP as string
+        const fields: OpenAIVisionChainInput = {
+            openAIApiKey: openAIApiKey,
            imageResolution: imageResolution,
            verbose: process.env.DEBUG === 'true',
            imageUrls: options.uploads,
-            openAIModel: openAIModel
+            modelName: modelName
        }
+        if (temperature) fields.temperature = parseFloat(temperature)
+        if (maxTokens) fields.maxTokens = parseInt(maxTokens, 10)
+        if (topP) fields.topP = parseFloat(topP)
        if (output === this.name) {
            const chain = new VLLMChain({
                ...fields,
@@ -2,7 +2,6 @@ import { OpenAI as OpenAIClient, ClientOptions } from 'openai'
 import { BaseChain, ChainInputs } from 'langchain/chains'
 import { ChainValues } from 'langchain/schema'
 import { BasePromptTemplate, ChatPromptTemplate, SystemMessagePromptTemplate } from 'langchain/prompts'
-import { ChatOpenAI } from 'langchain/chat_models/openai'
 import path from 'path'
 import { getUserHome } from '../../../src/utils'
 import fs from 'fs'
@@ -18,7 +17,10 @@ export interface OpenAIVisionChainInput extends ChainInputs {
    configuration?: ClientOptions
    imageUrls?: []
    imageResolution?: string
-    openAIModel: ChatOpenAI
+    temperature?: number
+    modelName?: string
+    maxTokens?: number
+    topP?: number
 }

 /**
@@ -30,12 +32,6 @@ export class VLLMChain extends BaseChain implements OpenAIVisionChainInput {
    static lc_name() {
        return 'VLLMChain'
    }
-
-    get lc_secrets(): { [key: string]: string } | undefined {
-        return {
-            openAIApiKey: 'OPENAI_API_KEY'
-        }
-    }
    prompt: BasePromptTemplate | undefined

    inputKey = 'input'
@@ -44,10 +40,13 @@ export class VLLMChain extends BaseChain implements OpenAIVisionChainInput {
    imageResolution: string = 'low'
    openAIApiKey?: string
    openAIOrganization?: string
-    openAIModel: ChatOpenAI
    clientConfig: ClientOptions
    client: OpenAIClient
    throwError: boolean
+    temperature?: number
+    modelName?: string
+    maxTokens?: number
+    topP?: number

    constructor(fields: OpenAIVisionChainInput) {
        super(fields)
@@ -55,13 +54,16 @@ export class VLLMChain extends BaseChain implements OpenAIVisionChainInput {
        this.imageResolution = fields?.imageResolution ?? 'low'
        this.openAIApiKey = fields?.openAIApiKey
        this.prompt = fields?.prompt
+        this.temperature = fields?.temperature
+        this.modelName = fields?.modelName
+        this.maxTokens = fields?.maxTokens
+        this.topP = fields?.topP
        this.imageUrls = fields?.imageUrls ?? []
        if (!this.openAIApiKey) {
            throw new Error('OpenAI API key not found')
        }

        this.openAIOrganization = fields?.openAIOrganization
-        this.openAIModel = fields.openAIModel

        this.clientConfig = {
            ...fields?.configuration,
@@ -76,12 +78,12 @@ export class VLLMChain extends BaseChain implements OpenAIVisionChainInput {
        const userInput = values[this.inputKey]

        const vRequest: any = {
-            model: 'gpt-4-vision-preview',
-            temperature: this.openAIModel.temperature,
-            top_p: this.openAIModel.topP,
+            model: this.modelName,
+            temperature: this.temperature,
+            top_p: this.topP,
            messages: []
        }
-        if (this.openAIModel.maxTokens) vRequest.max_tokens = this.openAIModel.maxTokens
+        if (this.maxTokens) vRequest.max_tokens = this.maxTokens
        else vRequest.max_tokens = 1024

        const userRole: any = { role: 'user' }
@@ -94,7 +96,7 @@ export class VLLMChain extends BaseChain implements OpenAIVisionChainInput {
            this.imageUrls.forEach((imageUrl: any) => {
                let bf = imageUrl?.data
                if (imageUrl.type == 'stored-file') {
-                    const filePath = path.join(getUserHome(), '.flowise', 'gptvision', imageUrl.data)
+                    const filePath = path.join(getUserHome(), '.flowise', 'gptvision', imageUrl.data, imageUrl.name)

                    // as the image is stored in the server, read the file and convert it to base64
                    const contents = fs.readFileSync(filePath)