mirror of
https://github.com/farcasclaudiu/Flowise.git
synced 2026-06-28 23:01:09 +03:00
GPT Vision: Converting vision into Multi Modal. Base Changes.
This commit is contained in:
@@ -0,0 +1,61 @@
|
|||||||
|
import { INode, INodeData, INodeParams } from '../../../src'
|
||||||
|
|
||||||
|
class OpenAIAudioWhisper implements INode {
|
||||||
|
label: string
|
||||||
|
name: string
|
||||||
|
version: number
|
||||||
|
description: string
|
||||||
|
type: string
|
||||||
|
icon: string
|
||||||
|
category: string
|
||||||
|
baseClasses: string[]
|
||||||
|
inputs: INodeParams[]
|
||||||
|
|
||||||
|
constructor() {
|
||||||
|
this.label = 'Open AI Whisper'
|
||||||
|
this.name = 'openAIAudioWhisper'
|
||||||
|
this.version = 1.0
|
||||||
|
this.type = 'OpenAIWhisper'
|
||||||
|
this.description = 'Speech to text using OpenAI Whisper API'
|
||||||
|
this.icon = 'audio.svg'
|
||||||
|
this.category = 'MultiModal'
|
||||||
|
this.baseClasses = [this.type]
|
||||||
|
this.inputs = [
|
||||||
|
{
|
||||||
|
label: 'Purpose',
|
||||||
|
name: 'purpose',
|
||||||
|
type: 'options',
|
||||||
|
options: [
|
||||||
|
{
|
||||||
|
label: 'transcription',
|
||||||
|
name: 'transcription'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: 'translation',
|
||||||
|
name: 'translation'
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: 'Accepted Upload Types',
|
||||||
|
name: 'allowedUploadTypes',
|
||||||
|
type: 'string',
|
||||||
|
default: 'audio/mpeg;audio/x-wav;audio/mp4',
|
||||||
|
hidden: true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: 'Maximum Upload Size (MB)',
|
||||||
|
name: 'maxUploadSize',
|
||||||
|
type: 'number',
|
||||||
|
default: '5',
|
||||||
|
hidden: true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
async init(nodeData: INodeData): Promise<any> {
|
||||||
|
return {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = { nodeClass: OpenAIAudioWhisper }
|
||||||
+25
-16
@@ -19,14 +19,14 @@ class OpenAIVisionChain_Chains implements INode {
|
|||||||
credential: INodeParams
|
credential: INodeParams
|
||||||
|
|
||||||
constructor() {
|
constructor() {
|
||||||
this.label = 'Open AI Vision Chain'
|
this.label = 'Open AI MultiModal Chain'
|
||||||
this.name = 'openAIVisionChain'
|
this.name = 'openAIMultiModalChain'
|
||||||
this.version = 1.0
|
this.version = 1.0
|
||||||
this.type = 'OpenAIVisionChain'
|
this.type = 'OpenAIMultiModalChain'
|
||||||
this.icon = 'chain.svg'
|
this.icon = 'chain.svg'
|
||||||
this.category = 'Chains'
|
this.category = 'Chains'
|
||||||
this.badge = 'BETA'
|
this.badge = 'BETA'
|
||||||
this.description = 'Chain to run queries against OpenAI (GPT-4) Vision .'
|
this.description = 'Chain to query against Image and Audio Input.'
|
||||||
this.baseClasses = [this.type, ...getBaseClasses(VLLMChain)]
|
this.baseClasses = [this.type, ...getBaseClasses(VLLMChain)]
|
||||||
this.credential = {
|
this.credential = {
|
||||||
label: 'Connect Credential',
|
label: 'Connect Credential',
|
||||||
@@ -36,16 +36,9 @@ class OpenAIVisionChain_Chains implements INode {
|
|||||||
}
|
}
|
||||||
this.inputs = [
|
this.inputs = [
|
||||||
{
|
{
|
||||||
label: 'Model Name',
|
label: 'Audio Input',
|
||||||
name: 'modelName',
|
name: 'audioInput',
|
||||||
type: 'options',
|
type: 'OpenAIWhisper',
|
||||||
options: [
|
|
||||||
{
|
|
||||||
label: 'gpt-4-vision-preview',
|
|
||||||
name: 'gpt-4-vision-preview'
|
|
||||||
}
|
|
||||||
],
|
|
||||||
default: 'gpt-4-vision-preview',
|
|
||||||
optional: true
|
optional: true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -54,6 +47,22 @@ class OpenAIVisionChain_Chains implements INode {
|
|||||||
type: 'BasePromptTemplate',
|
type: 'BasePromptTemplate',
|
||||||
optional: true
|
optional: true
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
label: 'Model Name',
|
||||||
|
name: 'modelName',
|
||||||
|
type: 'options',
|
||||||
|
options: [
|
||||||
|
{
|
||||||
|
label: 'gpt-4-vision-preview',
|
||||||
|
name: 'gpt-4-vision-preview'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: 'whisper-1',
|
||||||
|
name: 'whisper-1'
|
||||||
|
}
|
||||||
|
],
|
||||||
|
default: 'gpt-4-vision-preview'
|
||||||
|
},
|
||||||
{
|
{
|
||||||
label: 'Image Resolution',
|
label: 'Image Resolution',
|
||||||
description: 'This parameter controls the resolution in which the model views the image.',
|
description: 'This parameter controls the resolution in which the model views the image.',
|
||||||
@@ -122,8 +131,8 @@ class OpenAIVisionChain_Chains implements INode {
|
|||||||
]
|
]
|
||||||
this.outputs = [
|
this.outputs = [
|
||||||
{
|
{
|
||||||
label: 'Open AI Vision Chain',
|
label: 'Open AI MultiModal Chain',
|
||||||
name: 'openAIVisionChain',
|
name: 'OpenAIMultiModalChain',
|
||||||
baseClasses: [this.type, ...getBaseClasses(VLLMChain)]
|
baseClasses: [this.type, ...getBaseClasses(VLLMChain)]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" id="mdi-text-to-speech" width="24" height="24" viewBox="0 0 24 24"><path d="M8,7A2,2 0 0,1 10,9V14A2,2 0 0,1 8,16A2,2 0 0,1 6,14V9A2,2 0 0,1 8,7M14,14C14,16.97 11.84,19.44 9,19.92V22H7V19.92C4.16,19.44 2,16.97 2,14H4A4,4 0 0,0 8,18A4,4 0 0,0 12,14H14M21.41,9.41L17.17,13.66L18.18,10H14A2,2 0 0,1 12,8V4A2,2 0 0,1 14,2H20A2,2 0 0,1 22,4V8C22,8.55 21.78,9.05 21.41,9.41Z" /></svg>
|
||||||
|
After Width: | Height: | Size: 611 B |
|
Before Width: | Height: | Size: 489 B After Width: | Height: | Size: 489 B |
Binary file not shown.
|
After Width: | Height: | Size: 4.9 KiB |
@@ -1212,30 +1212,32 @@ export class App {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
private uploadAllowedNodes = ['OpenAIVisionChain']
|
private uploadAllowedNodes = ['OpenAIMultiModalChain', 'OpenAIWhisper']
|
||||||
private shouldAllowUploads(result: ChatFlow): any {
|
private shouldAllowUploads(result: ChatFlow): any {
|
||||||
const flowObj = JSON.parse(result.flowData)
|
const flowObj = JSON.parse(result.flowData)
|
||||||
let allowUploads = false
|
let allowUploads = false
|
||||||
let allowedTypes: string[] = []
|
const allowances: any = []
|
||||||
let maxUploadSize: number = -1
|
|
||||||
flowObj.nodes.forEach((node: IReactFlowNode) => {
|
flowObj.nodes.forEach((node: IReactFlowNode) => {
|
||||||
if (this.uploadAllowedNodes.indexOf(node.data.type) > -1) {
|
if (this.uploadAllowedNodes.indexOf(node.data.type) > -1) {
|
||||||
logger.debug(`[server]: Found Eligible Node ${node.data.type}, Allowing Uploads.`)
|
logger.debug(`[server]: Found Eligible Node ${node.data.type}, Allowing Uploads.`)
|
||||||
allowUploads = true
|
allowUploads = true
|
||||||
|
const allowance: any = {}
|
||||||
node.data.inputParams.map((param: any) => {
|
node.data.inputParams.map((param: any) => {
|
||||||
if (param.name === 'allowedUploadTypes') {
|
if (param.name === 'allowedUploadTypes') {
|
||||||
allowedTypes = param.default.split(';')
|
allowance.allowedTypes = param.default.split(';')
|
||||||
}
|
}
|
||||||
if (param.name === 'maxUploadSize') {
|
if (param.name === 'maxUploadSize') {
|
||||||
maxUploadSize = parseInt(param.default ? param.default : '0')
|
allowance.maxUploadSize = parseInt(param.default ? param.default : '0')
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
if (allowance.allowedTypes && allowance.maxUploadSize) {
|
||||||
|
allowances.push(allowance)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
return {
|
return {
|
||||||
allowUploads,
|
allowUploads,
|
||||||
allowedTypes,
|
allowed: allowances
|
||||||
maxUploadSize
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Binary file not shown.
|
After Width: | Height: | Size: 323 KiB |
@@ -8,6 +8,7 @@ import rehypeRaw from 'rehype-raw'
|
|||||||
import remarkGfm from 'remark-gfm'
|
import remarkGfm from 'remark-gfm'
|
||||||
import remarkMath from 'remark-math'
|
import remarkMath from 'remark-math'
|
||||||
import axios from 'axios'
|
import axios from 'axios'
|
||||||
|
import audioUploadSVG from 'assets/images/wave-sound.jpg'
|
||||||
|
|
||||||
import {
|
import {
|
||||||
Box,
|
Box,
|
||||||
@@ -85,23 +86,21 @@ export const ChatMessage = ({ open, chatflowid, isDialog }) => {
|
|||||||
e.preventDefault()
|
e.preventDefault()
|
||||||
}
|
}
|
||||||
const isFileAllowedForUpload = (file) => {
|
const isFileAllowedForUpload = (file) => {
|
||||||
// check if file type is allowed
|
const constraints = getAllowChatFlowUploads.data
|
||||||
if (getAllowChatFlowUploads.data?.allowedTypes?.length > 0) {
|
let acceptFile = false
|
||||||
const allowedFileTypes = getAllowChatFlowUploads.data?.allowedTypes
|
if (constraints.allowUploads) {
|
||||||
if (!allowedFileTypes.includes(file.type)) {
|
const fileType = file.type
|
||||||
alert(`File ${file.name} is not allowed.\nAllowed file types are ${allowedFileTypes.join(', ')}.`)
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// check if file size is allowed
|
|
||||||
if (getAllowChatFlowUploads.data?.maxUploadSize > 0) {
|
|
||||||
const sizeInMB = file.size / 1024 / 1024
|
const sizeInMB = file.size / 1024 / 1024
|
||||||
if (sizeInMB > getAllowChatFlowUploads.data?.maxUploadSize) {
|
constraints.allowed.map((allowed) => {
|
||||||
alert(`File ${file.name} is too large.\nMaximum allowed size is ${getAllowChatFlowUploads.data?.maxUploadSize} MB.`)
|
if (allowed.allowedTypes.includes(fileType) && sizeInMB <= allowed.maxUploadSize) {
|
||||||
return false
|
acceptFile = true
|
||||||
}
|
}
|
||||||
|
})
|
||||||
}
|
}
|
||||||
return true
|
if (!acceptFile) {
|
||||||
|
alert(`Cannot upload file. Kindly check the allowed file types and maximum allowed size.`)
|
||||||
|
}
|
||||||
|
return acceptFile
|
||||||
}
|
}
|
||||||
const handleDrop = async (e) => {
|
const handleDrop = async (e) => {
|
||||||
if (!isChatFlowAvailableForUploads) {
|
if (!isChatFlowAvailableForUploads) {
|
||||||
@@ -124,9 +123,15 @@ export const ChatMessage = ({ open, chatflowid, isDialog }) => {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
const { result } = evt.target
|
const { result } = evt.target
|
||||||
|
let previewUrl
|
||||||
|
if (file.type.startsWith('audio/')) {
|
||||||
|
previewUrl = audioUploadSVG
|
||||||
|
} else if (file.type.startsWith('image/')) {
|
||||||
|
previewUrl = URL.createObjectURL(file)
|
||||||
|
}
|
||||||
resolve({
|
resolve({
|
||||||
data: result,
|
data: result,
|
||||||
preview: URL.createObjectURL(file),
|
preview: previewUrl,
|
||||||
type: 'file',
|
type: 'file',
|
||||||
name: name,
|
name: name,
|
||||||
mime: file.type
|
mime: file.type
|
||||||
@@ -240,7 +245,7 @@ export const ChatMessage = ({ open, chatflowid, isDialog }) => {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const previewStyle = {
|
const previewStyle = {
|
||||||
width: '64px',
|
width: '128px',
|
||||||
height: '64px',
|
height: '64px',
|
||||||
objectFit: 'cover' // This makes the image cover the area, cropping it if necessary
|
objectFit: 'cover' // This makes the image cover the area, cropping it if necessary
|
||||||
}
|
}
|
||||||
@@ -514,11 +519,17 @@ export const ChatMessage = ({ open, chatflowid, isDialog }) => {
|
|||||||
onDrop={handleDrop}
|
onDrop={handleDrop}
|
||||||
className={`file-drop-field`}
|
className={`file-drop-field`}
|
||||||
>
|
>
|
||||||
{isDragOver && (
|
{isDragOver && getAllowChatFlowUploads.data?.allowUploads && (
|
||||||
<Box className='drop-overlay'>
|
<Box className='drop-overlay'>
|
||||||
<Typography variant='h2'>Drop here to upload</Typography>
|
<Typography variant='h2'>Drop here to upload</Typography>
|
||||||
<Typography variant='subtitle1'>{getAllowChatFlowUploads.data?.allowedTypes?.join(', ')}</Typography>
|
{getAllowChatFlowUploads.data.allowed.map((allowed) => {
|
||||||
<Typography variant='subtitle1'>Max Allowed Size: {getAllowChatFlowUploads.data?.maxUploadSize} MB</Typography>
|
return (
|
||||||
|
<>
|
||||||
|
<Typography variant='subtitle1'>{allowed.allowedTypes?.join(', ')}</Typography>
|
||||||
|
<Typography variant='subtitle1'>Max Allowed Size: {allowed.maxUploadSize} MB</Typography>
|
||||||
|
</>
|
||||||
|
)
|
||||||
|
})}
|
||||||
</Box>
|
</Box>
|
||||||
)}
|
)}
|
||||||
<div className={`${isDialog ? 'cloud-dialog' : 'cloud'}`}>
|
<div className={`${isDialog ? 'cloud-dialog' : 'cloud'}`}>
|
||||||
@@ -727,7 +738,7 @@ export const ChatMessage = ({ open, chatflowid, isDialog }) => {
|
|||||||
<Grid container spacing={2} sx={{ p: 1, mt: '5px', ml: '1px' }}>
|
<Grid container spacing={2} sx={{ p: 1, mt: '5px', ml: '1px' }}>
|
||||||
{previews.map((item, index) => (
|
{previews.map((item, index) => (
|
||||||
<Grid item xs={12} sm={6} md={3} key={index}>
|
<Grid item xs={12} sm={6} md={3} key={index}>
|
||||||
<Card variant='outlined' sx={{ maxWidth: 64 }}>
|
<Card variant='outlined' sx={{ maxWidth: 128 }}>
|
||||||
<CardMedia
|
<CardMedia
|
||||||
component='img'
|
component='img'
|
||||||
image={item.preview}
|
image={item.preview}
|
||||||
@@ -735,7 +746,7 @@ export const ChatMessage = ({ open, chatflowid, isDialog }) => {
|
|||||||
alt={`preview ${index}`}
|
alt={`preview ${index}`}
|
||||||
style={previewStyle}
|
style={previewStyle}
|
||||||
/>
|
/>
|
||||||
<CardActions className='center' sx={{ padding: 0, margin: 0 }}>
|
<CardActions className='center' sx={{ p: 0, m: 0 }}>
|
||||||
<Button
|
<Button
|
||||||
startIcon={<DeleteIcon />}
|
startIcon={<DeleteIcon />}
|
||||||
onClick={() => handleDeletePreview(item)}
|
onClick={() => handleDeletePreview(item)}
|
||||||
|
|||||||
Reference in New Issue
Block a user