GPT Vision: Converting vision into Multi Modal. Base Changes.

This commit is contained in:
vinodkiran
2023-12-08 17:21:53 +05:30
parent 68fbe0ea12
commit 32575828cd
9 changed files with 129 additions and 45 deletions
@@ -0,0 +1,61 @@
import { INode, INodeData, INodeParams } from '../../../src'
class OpenAIAudioWhisper implements INode {
label: string
name: string
version: number
description: string
type: string
icon: string
category: string
baseClasses: string[]
inputs: INodeParams[]
constructor() {
this.label = 'Open AI Whisper'
this.name = 'openAIAudioWhisper'
this.version = 1.0
this.type = 'OpenAIWhisper'
this.description = 'Speech to text using OpenAI Whisper API'
this.icon = 'audio.svg'
this.category = 'MultiModal'
this.baseClasses = [this.type]
this.inputs = [
{
label: 'Purpose',
name: 'purpose',
type: 'options',
options: [
{
label: 'transcription',
name: 'transcription'
},
{
label: 'translation',
name: 'translation'
}
]
},
{
label: 'Accepted Upload Types',
name: 'allowedUploadTypes',
type: 'string',
default: 'audio/mpeg;audio/x-wav;audio/mp4',
hidden: true
},
{
label: 'Maximum Upload Size (MB)',
name: 'maxUploadSize',
type: 'number',
default: '5',
hidden: true
}
]
}
async init(nodeData: INodeData): Promise<any> {
return {}
}
}
module.exports = { nodeClass: OpenAIAudioWhisper }
@@ -19,14 +19,14 @@ class OpenAIVisionChain_Chains implements INode {
credential: INodeParams credential: INodeParams
constructor() { constructor() {
this.label = 'Open AI Vision Chain' this.label = 'Open AI MultiModal Chain'
this.name = 'openAIVisionChain' this.name = 'openAIMultiModalChain'
this.version = 1.0 this.version = 1.0
this.type = 'OpenAIVisionChain' this.type = 'OpenAIMultiModalChain'
this.icon = 'chain.svg' this.icon = 'chain.svg'
this.category = 'Chains' this.category = 'Chains'
this.badge = 'BETA' this.badge = 'BETA'
this.description = 'Chain to run queries against OpenAI (GPT-4) Vision .' this.description = 'Chain to query against Image and Audio Input.'
this.baseClasses = [this.type, ...getBaseClasses(VLLMChain)] this.baseClasses = [this.type, ...getBaseClasses(VLLMChain)]
this.credential = { this.credential = {
label: 'Connect Credential', label: 'Connect Credential',
@@ -36,16 +36,9 @@ class OpenAIVisionChain_Chains implements INode {
} }
this.inputs = [ this.inputs = [
{ {
label: 'Model Name', label: 'Audio Input',
name: 'modelName', name: 'audioInput',
type: 'options', type: 'OpenAIWhisper',
options: [
{
label: 'gpt-4-vision-preview',
name: 'gpt-4-vision-preview'
}
],
default: 'gpt-4-vision-preview',
optional: true optional: true
}, },
{ {
@@ -54,6 +47,22 @@ class OpenAIVisionChain_Chains implements INode {
type: 'BasePromptTemplate', type: 'BasePromptTemplate',
optional: true optional: true
}, },
{
label: 'Model Name',
name: 'modelName',
type: 'options',
options: [
{
label: 'gpt-4-vision-preview',
name: 'gpt-4-vision-preview'
},
{
label: 'whisper-1',
name: 'whisper-1'
}
],
default: 'gpt-4-vision-preview'
},
{ {
label: 'Image Resolution', label: 'Image Resolution',
description: 'This parameter controls the resolution in which the model views the image.', description: 'This parameter controls the resolution in which the model views the image.',
@@ -122,8 +131,8 @@ class OpenAIVisionChain_Chains implements INode {
] ]
this.outputs = [ this.outputs = [
{ {
label: 'Open AI Vision Chain', label: 'Open AI MultiModal Chain',
name: 'openAIVisionChain', name: 'OpenAIMultiModalChain',
baseClasses: [this.type, ...getBaseClasses(VLLMChain)] baseClasses: [this.type, ...getBaseClasses(VLLMChain)]
}, },
{ {
@@ -0,0 +1 @@
<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" id="mdi-text-to-speech" width="24" height="24" viewBox="0 0 24 24"><path d="M8,7A2,2 0 0,1 10,9V14A2,2 0 0,1 8,16A2,2 0 0,1 6,14V9A2,2 0 0,1 8,7M14,14C14,16.97 11.84,19.44 9,19.92V22H7V19.92C4.16,19.44 2,16.97 2,14H4A4,4 0 0,0 8,18A4,4 0 0,0 12,14H14M21.41,9.41L17.17,13.66L18.18,10H14A2,2 0 0,1 12,8V4A2,2 0 0,1 14,2H20A2,2 0 0,1 22,4V8C22,8.55 21.78,9.05 21.41,9.41Z" /></svg>

After

Width:  |  Height:  |  Size: 611 B

Before

Width:  |  Height:  |  Size: 489 B

After

Width:  |  Height:  |  Size: 489 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.9 KiB

+9 -7
View File
@@ -1212,30 +1212,32 @@ export class App {
}) })
} }
private uploadAllowedNodes = ['OpenAIVisionChain'] private uploadAllowedNodes = ['OpenAIMultiModalChain', 'OpenAIWhisper']
private shouldAllowUploads(result: ChatFlow): any { private shouldAllowUploads(result: ChatFlow): any {
const flowObj = JSON.parse(result.flowData) const flowObj = JSON.parse(result.flowData)
let allowUploads = false let allowUploads = false
let allowedTypes: string[] = [] const allowances: any = []
let maxUploadSize: number = -1
flowObj.nodes.forEach((node: IReactFlowNode) => { flowObj.nodes.forEach((node: IReactFlowNode) => {
if (this.uploadAllowedNodes.indexOf(node.data.type) > -1) { if (this.uploadAllowedNodes.indexOf(node.data.type) > -1) {
logger.debug(`[server]: Found Eligible Node ${node.data.type}, Allowing Uploads.`) logger.debug(`[server]: Found Eligible Node ${node.data.type}, Allowing Uploads.`)
allowUploads = true allowUploads = true
const allowance: any = {}
node.data.inputParams.map((param: any) => { node.data.inputParams.map((param: any) => {
if (param.name === 'allowedUploadTypes') { if (param.name === 'allowedUploadTypes') {
allowedTypes = param.default.split(';') allowance.allowedTypes = param.default.split(';')
} }
if (param.name === 'maxUploadSize') { if (param.name === 'maxUploadSize') {
maxUploadSize = parseInt(param.default ? param.default : '0') allowance.maxUploadSize = parseInt(param.default ? param.default : '0')
} }
}) })
if (allowance.allowedTypes && allowance.maxUploadSize) {
allowances.push(allowance)
}
} }
}) })
return { return {
allowUploads, allowUploads,
allowedTypes, allowed: allowances
maxUploadSize
} }
} }
Binary file not shown.

After

Width:  |  Height:  |  Size: 323 KiB

@@ -8,6 +8,7 @@ import rehypeRaw from 'rehype-raw'
import remarkGfm from 'remark-gfm' import remarkGfm from 'remark-gfm'
import remarkMath from 'remark-math' import remarkMath from 'remark-math'
import axios from 'axios' import axios from 'axios'
import audioUploadSVG from 'assets/images/wave-sound.jpg'
import { import {
Box, Box,
@@ -85,23 +86,21 @@ export const ChatMessage = ({ open, chatflowid, isDialog }) => {
e.preventDefault() e.preventDefault()
} }
const isFileAllowedForUpload = (file) => { const isFileAllowedForUpload = (file) => {
// check if file type is allowed const constraints = getAllowChatFlowUploads.data
if (getAllowChatFlowUploads.data?.allowedTypes?.length > 0) { let acceptFile = false
const allowedFileTypes = getAllowChatFlowUploads.data?.allowedTypes if (constraints.allowUploads) {
if (!allowedFileTypes.includes(file.type)) { const fileType = file.type
alert(`File ${file.name} is not allowed.\nAllowed file types are ${allowedFileTypes.join(', ')}.`)
return false
}
}
// check if file size is allowed
if (getAllowChatFlowUploads.data?.maxUploadSize > 0) {
const sizeInMB = file.size / 1024 / 1024 const sizeInMB = file.size / 1024 / 1024
if (sizeInMB > getAllowChatFlowUploads.data?.maxUploadSize) { constraints.allowed.map((allowed) => {
alert(`File ${file.name} is too large.\nMaximum allowed size is ${getAllowChatFlowUploads.data?.maxUploadSize} MB.`) if (allowed.allowedTypes.includes(fileType) && sizeInMB <= allowed.maxUploadSize) {
return false acceptFile = true
} }
})
} }
return true if (!acceptFile) {
alert(`Cannot upload file. Kindly check the allowed file types and maximum allowed size.`)
}
return acceptFile
} }
const handleDrop = async (e) => { const handleDrop = async (e) => {
if (!isChatFlowAvailableForUploads) { if (!isChatFlowAvailableForUploads) {
@@ -124,9 +123,15 @@ export const ChatMessage = ({ open, chatflowid, isDialog }) => {
return return
} }
const { result } = evt.target const { result } = evt.target
let previewUrl
if (file.type.startsWith('audio/')) {
previewUrl = audioUploadSVG
} else if (file.type.startsWith('image/')) {
previewUrl = URL.createObjectURL(file)
}
resolve({ resolve({
data: result, data: result,
preview: URL.createObjectURL(file), preview: previewUrl,
type: 'file', type: 'file',
name: name, name: name,
mime: file.type mime: file.type
@@ -240,7 +245,7 @@ export const ChatMessage = ({ open, chatflowid, isDialog }) => {
} }
const previewStyle = { const previewStyle = {
width: '64px', width: '128px',
height: '64px', height: '64px',
objectFit: 'cover' // This makes the image cover the area, cropping it if necessary objectFit: 'cover' // This makes the image cover the area, cropping it if necessary
} }
@@ -514,11 +519,17 @@ export const ChatMessage = ({ open, chatflowid, isDialog }) => {
onDrop={handleDrop} onDrop={handleDrop}
className={`file-drop-field`} className={`file-drop-field`}
> >
{isDragOver && ( {isDragOver && getAllowChatFlowUploads.data?.allowUploads && (
<Box className='drop-overlay'> <Box className='drop-overlay'>
<Typography variant='h2'>Drop here to upload</Typography> <Typography variant='h2'>Drop here to upload</Typography>
<Typography variant='subtitle1'>{getAllowChatFlowUploads.data?.allowedTypes?.join(', ')}</Typography> {getAllowChatFlowUploads.data.allowed.map((allowed) => {
<Typography variant='subtitle1'>Max Allowed Size: {getAllowChatFlowUploads.data?.maxUploadSize} MB</Typography> return (
<>
<Typography variant='subtitle1'>{allowed.allowedTypes?.join(', ')}</Typography>
<Typography variant='subtitle1'>Max Allowed Size: {allowed.maxUploadSize} MB</Typography>
</>
)
})}
</Box> </Box>
)} )}
<div className={`${isDialog ? 'cloud-dialog' : 'cloud'}`}> <div className={`${isDialog ? 'cloud-dialog' : 'cloud'}`}>
@@ -727,7 +738,7 @@ export const ChatMessage = ({ open, chatflowid, isDialog }) => {
<Grid container spacing={2} sx={{ p: 1, mt: '5px', ml: '1px' }}> <Grid container spacing={2} sx={{ p: 1, mt: '5px', ml: '1px' }}>
{previews.map((item, index) => ( {previews.map((item, index) => (
<Grid item xs={12} sm={6} md={3} key={index}> <Grid item xs={12} sm={6} md={3} key={index}>
<Card variant='outlined' sx={{ maxWidth: 64 }}> <Card variant='outlined' sx={{ maxWidth: 128 }}>
<CardMedia <CardMedia
component='img' component='img'
image={item.preview} image={item.preview}
@@ -735,7 +746,7 @@ export const ChatMessage = ({ open, chatflowid, isDialog }) => {
alt={`preview ${index}`} alt={`preview ${index}`}
style={previewStyle} style={previewStyle}
/> />
<CardActions className='center' sx={{ padding: 0, margin: 0 }}> <CardActions className='center' sx={{ p: 0, m: 0 }}>
<Button <Button
startIcon={<DeleteIcon />} startIcon={<DeleteIcon />}
onClick={() => handleDeletePreview(item)} onClick={() => handleDeletePreview(item)}