diff --git a/packages/components/nodes/multimodal/OpenAI/OpenAIVisionChain.ts b/packages/components/nodes/chains/OpenAIMultiModalChain/OpenAIMultiModalChain.ts
similarity index 77%
rename from packages/components/nodes/multimodal/OpenAI/OpenAIVisionChain.ts
rename to packages/components/nodes/chains/OpenAIMultiModalChain/OpenAIMultiModalChain.ts
index 1ff4f4c9..f62d58bc 100644
--- a/packages/components/nodes/multimodal/OpenAI/OpenAIVisionChain.ts
+++ b/packages/components/nodes/chains/OpenAIMultiModalChain/OpenAIMultiModalChain.ts
@@ -1,10 +1,17 @@
-import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
+import {
+    ICommonObject,
+    INode,
+    INodeData,
+    INodeOutputsValue,
+    INodeParams
+} from "../../../src/Interface";
 import { getBaseClasses, getCredentialData, getCredentialParam, handleEscapeCharacters } from '../../../src/utils'
-import { OpenAIVisionChainInput, VLLMChain } from './VLLMChain'
+import { OpenAIMultiModalChainInput, VLLMChain } from "./VLLMChain";
 import { ConsoleCallbackHandler, CustomChainHandler, additionalCallbacks } from '../../../src/handler'
 import { formatResponse } from '../../outputparsers/OutputParserHelpers'
+import { checkInputs, Moderation, streamResponse } from "../../moderation/Moderation";
 
-class OpenAIVisionChain_Chains implements INode {
+class OpenAIMultiModalChain_Chains implements INode {
     label: string
     name: string
     version: number
@@ -24,7 +31,7 @@ class OpenAIVisionChain_Chains implements INode {
         this.version = 1.0
         this.type = 'OpenAIMultiModalChain'
         this.icon = 'chain.svg'
-        this.category = 'MultiModal'
+        this.category = 'Chains'
         this.badge = 'BETA'
         this.description = 'Chain to query against Image and Audio Input.'
         this.baseClasses = [this.type, ...getBaseClasses(VLLMChain)]
@@ -35,18 +42,20 @@ class OpenAIVisionChain_Chains implements INode {
             credentialNames: ['openAIApi']
         }
         this.inputs = [
-            {
-                label: 'Audio Input',
-                name: 'audioInput',
-                type: 'OpenAIWhisper',
-                optional: true
-            },
             {
                 label: 'Prompt',
                 name: 'prompt',
                 type: 'BasePromptTemplate',
                 optional: true
             },
+            {
+                label: 'Input Moderation',
+                description: 'Detect text that could generate harmful output and prevent it from being sent to the language model',
+                name: 'inputModeration',
+                type: 'Moderation',
+                optional: true,
+                list: true
+            },
             {
                 label: 'Model Name',
                 name: 'modelName',
@@ -55,14 +64,38 @@ class OpenAIVisionChain_Chains implements INode {
                     {
                         label: 'gpt-4-vision-preview',
                         name: 'gpt-4-vision-preview'
-                    },
-                    {
-                        label: 'whisper-1',
-                        name: 'whisper-1'
                     }
                 ],
                 default: 'gpt-4-vision-preview'
             },
+            {
+                label: 'Speech to Text',
+                name: 'speechToText',
+                type: 'boolean',
+                optional: true,
+            },
+            // TODO: only show when speechToText is true
+            {
+                label: 'Speech to Text Method',
+                description: 'How to turn audio into text',
+                name: 'speechToTextMode',
+                type: 'options',
+                options: [
+                    {
+                        label: 'Transcriptions',
+                        name: 'transcriptions',
+                        description: 'Transcribe audio into whatever language the audio is in. Default method when Speech to Text is turned on.'
+                    },
+                    {
+                        label: 'Translations',
+                        name: 'translations',
+                        description: 'Translate and transcribe the audio into english.'
+                    }
+                ],
+                optional: false,
+                default: 'transcriptions',
+                additionalParams: true
+            },
             {
                 label: 'Image Resolution',
                 description: 'This parameter controls the resolution in which the model views the image.',
@@ -76,6 +109,10 @@ class OpenAIVisionChain_Chains implements INode {
                     {
                         label: 'High',
                         name: 'high'
+                    },
+                    {
+                        label: 'Auto',
+                        name: 'auto'
                     }
                 ],
                 default: 'low',
@@ -107,18 +144,11 @@ class OpenAIVisionChain_Chains implements INode {
                 optional: true,
                 additionalParams: true
             },
-            {
-                label: 'Chain Name',
-                name: 'chainName',
-                type: 'string',
-                placeholder: 'Name Your Chain',
-                optional: true
-            },
             {
                 label: 'Accepted Upload Types',
                 name: 'allowedUploadTypes',
                 type: 'string',
-                default: 'image/gif;image/jpeg;image/png;image/webp',
+                default: 'image/gif;image/jpeg;image/png;image/webp;audio/mpeg;audio/x-wav;audio/mp4',
                 hidden: true
             },
             {
@@ -154,19 +184,23 @@ class OpenAIVisionChain_Chains implements INode {
         const modelName = nodeData.inputs?.modelName as string
         const maxTokens = nodeData.inputs?.maxTokens as string
         const topP = nodeData.inputs?.topP as string
-        const whisperConfig = nodeData.inputs?.audioInput
+        const speechToText = nodeData.inputs?.speechToText as boolean
 
-        const fields: OpenAIVisionChainInput = {
+
+        const fields: OpenAIMultiModalChainInput = {
             openAIApiKey: openAIApiKey,
             imageResolution: imageResolution,
             verbose: process.env.DEBUG === 'true',
-            imageUrls: options.uploads,
+            uploads: options.uploads,
             modelName: modelName
         }
         if (temperature) fields.temperature = parseFloat(temperature)
         if (maxTokens) fields.maxTokens = parseInt(maxTokens, 10)
         if (topP) fields.topP = parseFloat(topP)
-        if (whisperConfig) fields.whisperConfig = whisperConfig
+        if (speechToText) {
+            const speechToTextMode = nodeData.inputs?.speechToTextMode ?? 'transcriptions'
+            if (speechToTextMode) fields.speechToTextMode = speechToTextMode
+        }
 
         if (output === this.name) {
             const chain = new VLLMChain({
@@ -221,6 +255,17 @@ const runPrediction = async (
     const isStreaming = options.socketIO && options.socketIOClientId
     const socketIO = isStreaming ? options.socketIO : undefined
     const socketIOClientId = isStreaming ? options.socketIOClientId : ''
+    const moderations = nodeData.inputs?.inputModeration as Moderation[]
+    if (moderations && moderations.length > 0) {
+        try {
+            // Use the output of the moderation chain as input for the LLM chain
+            input = await checkInputs(moderations, input)
+        } catch (e) {
+            await new Promise((resolve) => setTimeout(resolve, 500))
+            streamResponse(isStreaming, e.message, socketIO, socketIOClientId)
+            return formatResponse(e.message)
+        }
+    }
 
     /**
      * Apply string transformation to reverse converted special chars:
@@ -229,7 +274,7 @@ const runPrediction = async (
      */
     const promptValues = handleEscapeCharacters(promptValuesRaw, true)
     if (options?.uploads) {
-        chain.imageUrls = options.uploads
+        chain.uploads = options.uploads
     }
     if (promptValues && inputVariables.length > 0) {
         let seen: string[] = []
@@ -285,4 +330,4 @@ const runPrediction = async (
     }
 }
 
-module.exports = { nodeClass: OpenAIVisionChain_Chains }
+module.exports = { nodeClass: OpenAIMultiModalChain_Chains }
diff --git a/packages/components/nodes/multimodal/OpenAI/VLLMChain.ts b/packages/components/nodes/chains/OpenAIMultiModalChain/VLLMChain.ts
similarity index 71%
rename from packages/components/nodes/multimodal/OpenAI/VLLMChain.ts
rename to packages/components/nodes/chains/OpenAIMultiModalChain/VLLMChain.ts
index dd44ebb5..2cf2ce95 100644
--- a/packages/components/nodes/multimodal/OpenAI/VLLMChain.ts
+++ b/packages/components/nodes/chains/OpenAIMultiModalChain/VLLMChain.ts
@@ -1,27 +1,30 @@
-import { OpenAI as OpenAIClient, ClientOptions } from 'openai'
+import { OpenAI as OpenAIClient, ClientOptions, OpenAI } from 'openai'
 import { BaseChain, ChainInputs } from 'langchain/chains'
 import { ChainValues } from 'langchain/schema'
-import { BasePromptTemplate, ChatPromptTemplate, SystemMessagePromptTemplate } from 'langchain/prompts'
+import { BasePromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate } from 'langchain/prompts'
 import path from 'path'
 import { getUserHome } from '../../../src/utils'
 import fs from 'fs'
+import { ChatCompletionContentPart, ChatCompletionMessageParam } from 'openai/src/resources/chat/completions'
+import ChatCompletionCreateParamsNonStreaming = OpenAI.ChatCompletionCreateParamsNonStreaming
+import { IFileUpload } from '../../../src'
 
 /**
  * Interface for the input parameters of the OpenAIVisionChain class.
  */
-export interface OpenAIVisionChainInput extends ChainInputs {
+export interface OpenAIMultiModalChainInput extends ChainInputs {
     openAIApiKey?: string
     openAIOrganization?: string
     throwError?: boolean
     prompt?: BasePromptTemplate
     configuration?: ClientOptions
-    imageUrls?: []
-    imageResolution?: string
+    uploads?: IFileUpload[]
+    imageResolution?: 'auto' | 'low' | 'high'
     temperature?: number
     modelName?: string
     maxTokens?: number
     topP?: number
-    whisperConfig?: any
+    speechToTextMode?: string
 }
 
 /**
@@ -29,7 +32,7 @@ export interface OpenAIVisionChainInput extends ChainInputs {
  * Vision API. It extends the BaseChain class and implements the
  * OpenAIVisionChainInput interface.
  */
-export class VLLMChain extends BaseChain implements OpenAIVisionChainInput {
+export class VLLMChain extends BaseChain implements OpenAIMultiModalChainInput {
     static lc_name() {
         return 'VLLMChain'
     }
@@ -37,8 +40,8 @@ export class VLLMChain extends BaseChain implements OpenAIVisionChainInput {
 
     inputKey = 'input'
     outputKey = 'text'
-    imageUrls?: []
-    imageResolution: string = 'low'
+    uploads?: IFileUpload[]
+    imageResolution: 'auto' | 'low' | 'high'
     openAIApiKey?: string
     openAIOrganization?: string
     clientConfig: ClientOptions
@@ -49,9 +52,9 @@ export class VLLMChain extends BaseChain implements OpenAIVisionChainInput {
     maxTokens?: number
     topP?: number
 
-    whisperConfig?: any
+    speechToTextMode?: any
 
-    constructor(fields: OpenAIVisionChainInput) {
+    constructor(fields: OpenAIMultiModalChainInput) {
         super(fields)
         this.throwError = fields?.throwError ?? false
         this.imageResolution = fields?.imageResolution ?? 'low'
@@ -61,8 +64,8 @@ export class VLLMChain extends BaseChain implements OpenAIVisionChainInput {
         this.modelName = fields?.modelName
         this.maxTokens = fields?.maxTokens
         this.topP = fields?.topP
-        this.imageUrls = fields?.imageUrls ?? []
-        this.whisperConfig = fields?.whisperConfig ?? {}
+        this.uploads = fields?.uploads ?? []
+        this.speechToTextMode = fields?.speechToTextMode ?? {}
         if (!this.openAIApiKey) {
             throw new Error('OpenAI API key not found')
         }
@@ -81,8 +84,8 @@ export class VLLMChain extends BaseChain implements OpenAIVisionChainInput {
     async _call(values: ChainValues): Promise<ChainValues> {
         const userInput = values[this.inputKey]
 
-        const vRequest: any = {
-            model: this.modelName,
+        const vRequest: ChatCompletionCreateParamsNonStreaming = {
+            model: 'gpt-4-vision-preview',
             temperature: this.temperature,
             top_p: this.topP,
             messages: []
@@ -90,42 +93,42 @@ export class VLLMChain extends BaseChain implements OpenAIVisionChainInput {
         if (this.maxTokens) vRequest.max_tokens = this.maxTokens
         else vRequest.max_tokens = 1024
 
-        const userRole: any = { role: 'user' }
-        userRole.content = []
-        userRole.content.push({
+        const chatMessages: ChatCompletionContentPart[] = []
+        const userRole: ChatCompletionMessageParam = { role: 'user', content: [] }
+        chatMessages.push({
             type: 'text',
             text: userInput
         })
-        if (this.whisperConfig && this.imageUrls && this.imageUrls.length > 0) {
-            const audioUploads = this.getAudioUploads(this.imageUrls)
+        if (this.speechToTextMode && this.uploads && this.uploads.length > 0) {
+            const audioUploads = this.getAudioUploads(this.uploads)
             for (const url of audioUploads) {
                 const filePath = path.join(getUserHome(), '.flowise', 'gptvision', url.data, url.name)
 
                 // as the image is stored in the server, read the file and convert it to base64
                 const audio_file = fs.createReadStream(filePath)
-                if (this.whisperConfig.purpose === 'transcription') {
+                if (this.speechToTextMode.purpose === 'transcriptions') {
                     const transcription = await this.client.audio.transcriptions.create({
                         file: audio_file,
                         model: 'whisper-1'
                     })
-                    userRole.content.push({
+                    chatMessages.push({
                         type: 'text',
                         text: transcription.text
                     })
-                } else if (this.whisperConfig.purpose === 'translation') {
+                } else if (this.speechToTextMode.purpose === 'translations') {
                     const translation = await this.client.audio.translations.create({
                         file: audio_file,
                         model: 'whisper-1'
                     })
-                    userRole.content.push({
+                    chatMessages.push({
                         type: 'text',
                         text: translation.text
                     })
                 }
             }
         }
-        if (this.imageUrls && this.imageUrls.length > 0) {
-            const imageUploads = this.getImageUploads(this.imageUrls)
+        if (this.uploads && this.uploads.length > 0) {
+            const imageUploads = this.getImageUploads(this.uploads)
             for (const url of imageUploads) {
                 let bf = url.data
                 if (url.type == 'stored-file') {
@@ -135,7 +138,7 @@ export class VLLMChain extends BaseChain implements OpenAIVisionChainInput {
                     const contents = fs.readFileSync(filePath)
                     bf = 'data:' + url.mime + ';base64,' + contents.toString('base64')
                 }
-                userRole.content.push({
+                chatMessages.push({
                     type: 'image_url',
                     image_url: {
                         url: bf,
@@ -144,6 +147,7 @@ export class VLLMChain extends BaseChain implements OpenAIVisionChainInput {
                 })
             }
         }
+        userRole.content = chatMessages
         vRequest.messages.push(userRole)
         if (this.prompt && this.prompt instanceof ChatPromptTemplate) {
             let chatPrompt = this.prompt as ChatPromptTemplate
@@ -151,12 +155,12 @@ export class VLLMChain extends BaseChain implements OpenAIVisionChainInput {
                 if (message instanceof SystemMessagePromptTemplate) {
                     vRequest.messages.push({
                         role: 'system',
-                        content: [
-                            {
-                                type: 'text',
-                                text: (message.prompt as any).template
-                            }
-                        ]
+                        content: (message.prompt as any).template
+                    })
+                } else if (message instanceof HumanMessagePromptTemplate) {
+                    vRequest.messages.push({
+                        role: 'user',
+                        content: (message.prompt as any).template
                     })
                 }
             })
@@ -164,7 +168,6 @@ export class VLLMChain extends BaseChain implements OpenAIVisionChainInput {
 
         let response
         try {
-            // @ts-ignore
             response = await this.client.chat.completions.create(vRequest)
         } catch (error) {
             if (error instanceof Error) {
diff --git a/packages/components/nodes/multimodal/OpenAI/chain.svg b/packages/components/nodes/chains/OpenAIMultiModalChain/chain.svg
similarity index 100%
rename from packages/components/nodes/multimodal/OpenAI/chain.svg
rename to packages/components/nodes/chains/OpenAIMultiModalChain/chain.svg
diff --git a/packages/components/nodes/multimodal/OpenAI/AudioWhisper.ts b/packages/components/nodes/multimodal/OpenAI/AudioWhisper.ts
deleted file mode 100644
index aa2c71e1..00000000
--- a/packages/components/nodes/multimodal/OpenAI/AudioWhisper.ts
+++ /dev/null
@@ -1,66 +0,0 @@
-import { INode, INodeData, INodeParams } from '../../../src'
-
-class OpenAIAudioWhisper implements INode {
-    label: string
-    name: string
-    version: number
-    description: string
-    type: string
-    icon: string
-    badge: string
-    category: string
-    baseClasses: string[]
-    inputs: INodeParams[]
-
-    constructor() {
-        this.label = 'Open AI Whisper'
-        this.name = 'openAIAudioWhisper'
-        this.version = 1.0
-        this.type = 'OpenAIWhisper'
-        this.description = 'Speech to text using OpenAI Whisper API'
-        this.icon = 'audio.svg'
-        this.badge = 'BETA'
-        this.category = 'MultiModal'
-        this.baseClasses = [this.type]
-        this.inputs = [
-            {
-                label: 'Purpose',
-                name: 'purpose',
-                type: 'options',
-                options: [
-                    {
-                        label: 'Transcription',
-                        name: 'transcription'
-                    },
-                    {
-                        label: 'Translation',
-                        name: 'translation'
-                    }
-                ],
-                default: 'transcription'
-            },
-            {
-                label: 'Accepted Upload Types',
-                name: 'allowedUploadTypes',
-                type: 'string',
-                default: 'audio/mpeg;audio/x-wav;audio/mp4',
-                hidden: true
-            },
-            {
-                label: 'Maximum Upload Size (MB)',
-                name: 'maxUploadSize',
-                type: 'number',
-                default: '5',
-                hidden: true
-            }
-        ]
-    }
-
-    async init(nodeData: INodeData): Promise<any> {
-        const purpose = nodeData.inputs?.purpose as string
-
-        return { purpose }
-    }
-}
-
-module.exports = { nodeClass: OpenAIAudioWhisper }
diff --git a/packages/components/nodes/multimodal/OpenAI/audio.svg b/packages/components/nodes/multimodal/OpenAI/audio.svg
deleted file mode 100644
index 3bcbbdcd..00000000
--- a/packages/components/nodes/multimodal/OpenAI/audio.svg
+++ /dev/null
@@ -1 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" id="mdi-text-to-speech" width="24" height="24" viewBox="0 0 24 24"><path d="M8,7A2,2 0 0,1 10,9V14A2,2 0 0,1 8,16A2,2 0 0,1 6,14V9A2,2 0 0,1 8,7M14,14C14,16.97 11.84,19.44 9,19.92V22H7V19.92C4.16,19.44 2,16.97 2,14H4A4,4 0 0,0 8,18A4,4 0 0,0 12,14H14M21.41,9.41L17.17,13.66L18.18,10H14A2,2 0 0,1 12,8V4A2,2 0 0,1 14,2H20A2,2 0 0,1 22,4V8C22,8.55 21.78,9.05 21.41,9.41Z" /></svg>
\ No newline at end of file
diff --git a/packages/components/nodes/multimodal/OpenAI/list.png b/packages/components/nodes/multimodal/OpenAI/list.png
deleted file mode 100644
index acb4e5d6..00000000
Binary files a/packages/components/nodes/multimodal/OpenAI/list.png and /dev/null differ
diff --git a/packages/components/src/Interface.ts b/packages/components/src/Interface.ts
index 676618e5..e7f6fe86 100644
--- a/packages/components/src/Interface.ts
+++ b/packages/components/src/Interface.ts
@@ -234,3 +234,10 @@ export abstract class FlowiseSummaryMemory extends ConversationSummaryMemory imp
     abstract addChatMessages(msgArray: { text: string; type: MessageType }[], overrideSessionId?: string): Promise<void>
     abstract clearChatMessages(overrideSessionId?: string): Promise<void>
 }
+
+export interface IFileUpload {
+    data: string
+    type: string
+    name: string
+    mime: string
+}
\ No newline at end of file
diff --git a/packages/server/src/index.ts b/packages/server/src/index.ts
index 4451b838..61aff470 100644
--- a/packages/server/src/index.ts
+++ b/packages/server/src/index.ts
@@ -1695,9 +1695,7 @@ export class App {
                     if (!endingNodeData) return res.status(500).send(`Ending node ${endingNode.id} data not found`)
 
                     if (endingNodeData && endingNodeData.category !== 'Chains' && endingNodeData.category !== 'Agents') {
-                        if (endingNodeData.type !== 'OpenAIMultiModalChain') {
-                            return res.status(500).send(`Ending node must be either a Chain or Agent`)
-                        }
+                        return res.status(500).send(`Ending node must be either a Chain or Agent`)
                     }
 
                     if (