mirror of
https://github.com/farcasclaudiu/Flowise.git
synced 2026-06-28 15:00:57 +03:00
GPT Vision: Converting vision into Multi Modal. Base Changes.
This commit is contained in:
@@ -0,0 +1,61 @@
|
||||
import { INode, INodeData, INodeParams } from '../../../src'
|
||||
|
||||
class OpenAIAudioWhisper implements INode {
|
||||
label: string
|
||||
name: string
|
||||
version: number
|
||||
description: string
|
||||
type: string
|
||||
icon: string
|
||||
category: string
|
||||
baseClasses: string[]
|
||||
inputs: INodeParams[]
|
||||
|
||||
constructor() {
|
||||
this.label = 'Open AI Whisper'
|
||||
this.name = 'openAIAudioWhisper'
|
||||
this.version = 1.0
|
||||
this.type = 'OpenAIWhisper'
|
||||
this.description = 'Speech to text using OpenAI Whisper API'
|
||||
this.icon = 'audio.svg'
|
||||
this.category = 'MultiModal'
|
||||
this.baseClasses = [this.type]
|
||||
this.inputs = [
|
||||
{
|
||||
label: 'Purpose',
|
||||
name: 'purpose',
|
||||
type: 'options',
|
||||
options: [
|
||||
{
|
||||
label: 'transcription',
|
||||
name: 'transcription'
|
||||
},
|
||||
{
|
||||
label: 'translation',
|
||||
name: 'translation'
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
label: 'Accepted Upload Types',
|
||||
name: 'allowedUploadTypes',
|
||||
type: 'string',
|
||||
default: 'audio/mpeg;audio/x-wav;audio/mp4',
|
||||
hidden: true
|
||||
},
|
||||
{
|
||||
label: 'Maximum Upload Size (MB)',
|
||||
name: 'maxUploadSize',
|
||||
type: 'number',
|
||||
default: '5',
|
||||
hidden: true
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
async init(nodeData: INodeData): Promise<any> {
|
||||
return {}
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { nodeClass: OpenAIAudioWhisper }
|
||||
+25
-16
@@ -19,14 +19,14 @@ class OpenAIVisionChain_Chains implements INode {
|
||||
credential: INodeParams
|
||||
|
||||
constructor() {
|
||||
this.label = 'Open AI Vision Chain'
|
||||
this.name = 'openAIVisionChain'
|
||||
this.label = 'Open AI MultiModal Chain'
|
||||
this.name = 'openAIMultiModalChain'
|
||||
this.version = 1.0
|
||||
this.type = 'OpenAIVisionChain'
|
||||
this.type = 'OpenAIMultiModalChain'
|
||||
this.icon = 'chain.svg'
|
||||
this.category = 'Chains'
|
||||
this.badge = 'BETA'
|
||||
this.description = 'Chain to run queries against OpenAI (GPT-4) Vision .'
|
||||
this.description = 'Chain to query against Image and Audio Input.'
|
||||
this.baseClasses = [this.type, ...getBaseClasses(VLLMChain)]
|
||||
this.credential = {
|
||||
label: 'Connect Credential',
|
||||
@@ -36,16 +36,9 @@ class OpenAIVisionChain_Chains implements INode {
|
||||
}
|
||||
this.inputs = [
|
||||
{
|
||||
label: 'Model Name',
|
||||
name: 'modelName',
|
||||
type: 'options',
|
||||
options: [
|
||||
{
|
||||
label: 'gpt-4-vision-preview',
|
||||
name: 'gpt-4-vision-preview'
|
||||
}
|
||||
],
|
||||
default: 'gpt-4-vision-preview',
|
||||
label: 'Audio Input',
|
||||
name: 'audioInput',
|
||||
type: 'OpenAIWhisper',
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
@@ -54,6 +47,22 @@ class OpenAIVisionChain_Chains implements INode {
|
||||
type: 'BasePromptTemplate',
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
label: 'Model Name',
|
||||
name: 'modelName',
|
||||
type: 'options',
|
||||
options: [
|
||||
{
|
||||
label: 'gpt-4-vision-preview',
|
||||
name: 'gpt-4-vision-preview'
|
||||
},
|
||||
{
|
||||
label: 'whisper-1',
|
||||
name: 'whisper-1'
|
||||
}
|
||||
],
|
||||
default: 'gpt-4-vision-preview'
|
||||
},
|
||||
{
|
||||
label: 'Image Resolution',
|
||||
description: 'This parameter controls the resolution in which the model views the image.',
|
||||
@@ -122,8 +131,8 @@ class OpenAIVisionChain_Chains implements INode {
|
||||
]
|
||||
this.outputs = [
|
||||
{
|
||||
label: 'Open AI Vision Chain',
|
||||
name: 'openAIVisionChain',
|
||||
label: 'Open AI MultiModal Chain',
|
||||
name: 'OpenAIMultiModalChain',
|
||||
baseClasses: [this.type, ...getBaseClasses(VLLMChain)]
|
||||
},
|
||||
{
|
||||
@@ -0,0 +1 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" id="mdi-text-to-speech" width="24" height="24" viewBox="0 0 24 24"><path d="M8,7A2,2 0 0,1 10,9V14A2,2 0 0,1 8,16A2,2 0 0,1 6,14V9A2,2 0 0,1 8,7M14,14C14,16.97 11.84,19.44 9,19.92V22H7V19.92C4.16,19.44 2,16.97 2,14H4A4,4 0 0,0 8,18A4,4 0 0,0 12,14H14M21.41,9.41L17.17,13.66L18.18,10H14A2,2 0 0,1 12,8V4A2,2 0 0,1 14,2H20A2,2 0 0,1 22,4V8C22,8.55 21.78,9.05 21.41,9.41Z" /></svg>
|
||||
|
After Width: | Height: | Size: 611 B |
|
Before Width: | Height: | Size: 489 B After Width: | Height: | Size: 489 B |
Binary file not shown.
|
After Width: | Height: | Size: 4.9 KiB |
Reference in New Issue
Block a user