diff --git a/packages/components/nodes/multimodal/OpenAI/AudioWhisper.ts b/packages/components/nodes/multimodal/OpenAI/AudioWhisper.ts new file mode 100644 index 00000000..b308a7c5 --- /dev/null +++ b/packages/components/nodes/multimodal/OpenAI/AudioWhisper.ts @@ -0,0 +1,61 @@ +import { INode, INodeData, INodeParams } from '../../../src' + +class OpenAIAudioWhisper implements INode { + label: string + name: string + version: number + description: string + type: string + icon: string + category: string + baseClasses: string[] + inputs: INodeParams[] + + constructor() { + this.label = 'Open AI Whisper' + this.name = 'openAIAudioWhisper' + this.version = 1.0 + this.type = 'OpenAIWhisper' + this.description = 'Speech to text using OpenAI Whisper API' + this.icon = 'audio.svg' + this.category = 'MultiModal' + this.baseClasses = [this.type] + this.inputs = [ + { + label: 'Purpose', + name: 'purpose', + type: 'options', + options: [ + { + label: 'transcription', + name: 'transcription' + }, + { + label: 'translation', + name: 'translation' + } + ] + }, + { + label: 'Accepted Upload Types', + name: 'allowedUploadTypes', + type: 'string', + default: 'audio/mpeg;audio/x-wav;audio/mp4', + hidden: true + }, + { + label: 'Maximum Upload Size (MB)', + name: 'maxUploadSize', + type: 'number', + default: '5', + hidden: true + } + ] + } + + async init(nodeData: INodeData): Promise { + return {} + } +} + +module.exports = { nodeClass: OpenAIAudioWhisper } diff --git a/packages/components/nodes/chains/VisionChain/OpenAIVisionChain.ts b/packages/components/nodes/multimodal/OpenAI/OpenAIVisionChain.ts similarity index 94% rename from packages/components/nodes/chains/VisionChain/OpenAIVisionChain.ts rename to packages/components/nodes/multimodal/OpenAI/OpenAIVisionChain.ts index 6d19235c..4151b4b0 100644 --- a/packages/components/nodes/chains/VisionChain/OpenAIVisionChain.ts +++ b/packages/components/nodes/multimodal/OpenAI/OpenAIVisionChain.ts @@ -19,14 +19,14 @@ class OpenAIVisionChain_Chains implements INode { credential: INodeParams constructor() { - this.label = 'Open AI Vision Chain' - this.name = 'openAIVisionChain' + this.label = 'Open AI MultiModal Chain' + this.name = 'openAIMultiModalChain' this.version = 1.0 - this.type = 'OpenAIVisionChain' + this.type = 'OpenAIMultiModalChain' this.icon = 'chain.svg' this.category = 'Chains' this.badge = 'BETA' - this.description = 'Chain to run queries against OpenAI (GPT-4) Vision .' + this.description = 'Chain to query against Image and Audio Input.' this.baseClasses = [this.type, ...getBaseClasses(VLLMChain)] this.credential = { label: 'Connect Credential', @@ -36,16 +36,9 @@ class OpenAIVisionChain_Chains implements INode { } this.inputs = [ { - label: 'Model Name', - name: 'modelName', - type: 'options', - options: [ - { - label: 'gpt-4-vision-preview', - name: 'gpt-4-vision-preview' - } - ], - default: 'gpt-4-vision-preview', + label: 'Audio Input', + name: 'audioInput', + type: 'OpenAIWhisper', optional: true }, { @@ -54,6 +47,22 @@ class OpenAIVisionChain_Chains implements INode { type: 'BasePromptTemplate', optional: true }, + { + label: 'Model Name', + name: 'modelName', + type: 'options', + options: [ + { + label: 'gpt-4-vision-preview', + name: 'gpt-4-vision-preview' + }, + { + label: 'whisper-1', + name: 'whisper-1' + } + ], + default: 'gpt-4-vision-preview' + }, { label: 'Image Resolution', description: 'This parameter controls the resolution in which the model views the image.', @@ -122,8 +131,8 @@ class OpenAIVisionChain_Chains implements INode { ] this.outputs = [ { - label: 'Open AI Vision Chain', - name: 'openAIVisionChain', + label: 'Open AI MultiModal Chain', + name: 'OpenAIMultiModalChain', baseClasses: [this.type, ...getBaseClasses(VLLMChain)] }, { diff --git a/packages/components/nodes/chains/VisionChain/VLLMChain.ts b/packages/components/nodes/multimodal/OpenAI/VLLMChain.ts similarity index 100% rename from packages/components/nodes/chains/VisionChain/VLLMChain.ts rename to packages/components/nodes/multimodal/OpenAI/VLLMChain.ts diff --git a/packages/components/nodes/multimodal/OpenAI/audio.svg b/packages/components/nodes/multimodal/OpenAI/audio.svg new file mode 100644 index 00000000..3bcbbdcd --- /dev/null +++ b/packages/components/nodes/multimodal/OpenAI/audio.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/packages/components/nodes/chains/VisionChain/chain.svg b/packages/components/nodes/multimodal/OpenAI/chain.svg similarity index 100% rename from packages/components/nodes/chains/VisionChain/chain.svg rename to packages/components/nodes/multimodal/OpenAI/chain.svg diff --git a/packages/components/nodes/multimodal/OpenAI/list.png b/packages/components/nodes/multimodal/OpenAI/list.png new file mode 100644 index 00000000..acb4e5d6 Binary files /dev/null and b/packages/components/nodes/multimodal/OpenAI/list.png differ diff --git a/packages/server/src/index.ts b/packages/server/src/index.ts index 84e76c6e..eb03f47e 100644 --- a/packages/server/src/index.ts +++ b/packages/server/src/index.ts @@ -1212,30 +1212,32 @@ export class App { }) } - private uploadAllowedNodes = ['OpenAIVisionChain'] + private uploadAllowedNodes = ['OpenAIMultiModalChain', 'OpenAIWhisper'] private shouldAllowUploads(result: ChatFlow): any { const flowObj = JSON.parse(result.flowData) let allowUploads = false - let allowedTypes: string[] = [] - let maxUploadSize: number = -1 + const allowances: any = [] flowObj.nodes.forEach((node: IReactFlowNode) => { if (this.uploadAllowedNodes.indexOf(node.data.type) > -1) { logger.debug(`[server]: Found Eligible Node ${node.data.type}, Allowing Uploads.`) allowUploads = true + const allowance: any = {} node.data.inputParams.map((param: any) => { if (param.name === 'allowedUploadTypes') { - allowedTypes = param.default.split(';') + allowance.allowedTypes = param.default.split(';') } if (param.name === 'maxUploadSize') { - maxUploadSize = parseInt(param.default ? param.default : '0') + allowance.maxUploadSize = parseInt(param.default ? param.default : '0') } }) + if (allowance.allowedTypes && allowance.maxUploadSize) { + allowances.push(allowance) + } } }) return { allowUploads, - allowedTypes, - maxUploadSize + allowed: allowances } } diff --git a/packages/ui/src/assets/images/wave-sound.jpg b/packages/ui/src/assets/images/wave-sound.jpg new file mode 100644 index 00000000..9f56d67d Binary files /dev/null and b/packages/ui/src/assets/images/wave-sound.jpg differ diff --git a/packages/ui/src/views/chatmessage/ChatMessage.js b/packages/ui/src/views/chatmessage/ChatMessage.js index d2ff51d8..79a9b6e0 100644 --- a/packages/ui/src/views/chatmessage/ChatMessage.js +++ b/packages/ui/src/views/chatmessage/ChatMessage.js @@ -8,6 +8,7 @@ import rehypeRaw from 'rehype-raw' import remarkGfm from 'remark-gfm' import remarkMath from 'remark-math' import axios from 'axios' +import audioUploadSVG from 'assets/images/wave-sound.jpg' import { Box, @@ -85,23 +86,21 @@ export const ChatMessage = ({ open, chatflowid, isDialog }) => { e.preventDefault() } const isFileAllowedForUpload = (file) => { - // check if file type is allowed - if (getAllowChatFlowUploads.data?.allowedTypes?.length > 0) { - const allowedFileTypes = getAllowChatFlowUploads.data?.allowedTypes - if (!allowedFileTypes.includes(file.type)) { - alert(`File ${file.name} is not allowed.\nAllowed file types are ${allowedFileTypes.join(', ')}.`) - return false - } - } - // check if file size is allowed - if (getAllowChatFlowUploads.data?.maxUploadSize > 0) { + const constraints = getAllowChatFlowUploads.data + let acceptFile = false + if (constraints.allowUploads) { + const fileType = file.type const sizeInMB = file.size / 1024 / 1024 - if (sizeInMB > getAllowChatFlowUploads.data?.maxUploadSize) { - alert(`File ${file.name} is too large.\nMaximum allowed size is ${getAllowChatFlowUploads.data?.maxUploadSize} MB.`) - return false - } + constraints.allowed.map((allowed) => { + if (allowed.allowedTypes.includes(fileType) && sizeInMB <= allowed.maxUploadSize) { + acceptFile = true + } + }) } - return true + if (!acceptFile) { + alert(`Cannot upload file. Kindly check the allowed file types and maximum allowed size.`) + } + return acceptFile } const handleDrop = async (e) => { if (!isChatFlowAvailableForUploads) { @@ -124,9 +123,15 @@ export const ChatMessage = ({ open, chatflowid, isDialog }) => { return } const { result } = evt.target + let previewUrl + if (file.type.startsWith('audio/')) { + previewUrl = audioUploadSVG + } else if (file.type.startsWith('image/')) { + previewUrl = URL.createObjectURL(file) + } resolve({ data: result, - preview: URL.createObjectURL(file), + preview: previewUrl, type: 'file', name: name, mime: file.type @@ -240,7 +245,7 @@ export const ChatMessage = ({ open, chatflowid, isDialog }) => { } const previewStyle = { - width: '64px', + width: '128px', height: '64px', objectFit: 'cover' // This makes the image cover the area, cropping it if necessary } @@ -514,11 +519,17 @@ export const ChatMessage = ({ open, chatflowid, isDialog }) => { onDrop={handleDrop} className={`file-drop-field`} > - {isDragOver && ( + {isDragOver && getAllowChatFlowUploads.data?.allowUploads && ( Drop here to upload - {getAllowChatFlowUploads.data?.allowedTypes?.join(', ')} - Max Allowed Size: {getAllowChatFlowUploads.data?.maxUploadSize} MB + {getAllowChatFlowUploads.data.allowed.map((allowed) => { + return ( + <> + {allowed.allowedTypes?.join(', ')} + Max Allowed Size: {allowed.maxUploadSize} MB + + ) + })} )}
@@ -727,7 +738,7 @@ export const ChatMessage = ({ open, chatflowid, isDialog }) => { {previews.map((item, index) => ( - + { alt={`preview ${index}`} style={previewStyle} /> - +