mirror of
https://github.com/farcasclaudiu/Flowise.git
synced 2026-06-28 21:00:58 +03:00
GPT Vision: Renaming to OpenAIMultiModalChain and merging the functionality of Wisper.
This commit is contained in:
+73
-28
@@ -1,10 +1,17 @@
|
|||||||
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
|
import {
|
||||||
|
ICommonObject,
|
||||||
|
INode,
|
||||||
|
INodeData,
|
||||||
|
INodeOutputsValue,
|
||||||
|
INodeParams
|
||||||
|
} from "../../../src/Interface";
|
||||||
import { getBaseClasses, getCredentialData, getCredentialParam, handleEscapeCharacters } from '../../../src/utils'
|
import { getBaseClasses, getCredentialData, getCredentialParam, handleEscapeCharacters } from '../../../src/utils'
|
||||||
import { OpenAIVisionChainInput, VLLMChain } from './VLLMChain'
|
import { OpenAIMultiModalChainInput, VLLMChain } from "./VLLMChain";
|
||||||
import { ConsoleCallbackHandler, CustomChainHandler, additionalCallbacks } from '../../../src/handler'
|
import { ConsoleCallbackHandler, CustomChainHandler, additionalCallbacks } from '../../../src/handler'
|
||||||
import { formatResponse } from '../../outputparsers/OutputParserHelpers'
|
import { formatResponse } from '../../outputparsers/OutputParserHelpers'
|
||||||
|
import { checkInputs, Moderation, streamResponse } from "../../moderation/Moderation";
|
||||||
|
|
||||||
class OpenAIVisionChain_Chains implements INode {
|
class OpenAIMultiModalChain_Chains implements INode {
|
||||||
label: string
|
label: string
|
||||||
name: string
|
name: string
|
||||||
version: number
|
version: number
|
||||||
@@ -24,7 +31,7 @@ class OpenAIVisionChain_Chains implements INode {
|
|||||||
this.version = 1.0
|
this.version = 1.0
|
||||||
this.type = 'OpenAIMultiModalChain'
|
this.type = 'OpenAIMultiModalChain'
|
||||||
this.icon = 'chain.svg'
|
this.icon = 'chain.svg'
|
||||||
this.category = 'MultiModal'
|
this.category = 'Chains'
|
||||||
this.badge = 'BETA'
|
this.badge = 'BETA'
|
||||||
this.description = 'Chain to query against Image and Audio Input.'
|
this.description = 'Chain to query against Image and Audio Input.'
|
||||||
this.baseClasses = [this.type, ...getBaseClasses(VLLMChain)]
|
this.baseClasses = [this.type, ...getBaseClasses(VLLMChain)]
|
||||||
@@ -35,18 +42,20 @@ class OpenAIVisionChain_Chains implements INode {
|
|||||||
credentialNames: ['openAIApi']
|
credentialNames: ['openAIApi']
|
||||||
}
|
}
|
||||||
this.inputs = [
|
this.inputs = [
|
||||||
{
|
|
||||||
label: 'Audio Input',
|
|
||||||
name: 'audioInput',
|
|
||||||
type: 'OpenAIWhisper',
|
|
||||||
optional: true
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
label: 'Prompt',
|
label: 'Prompt',
|
||||||
name: 'prompt',
|
name: 'prompt',
|
||||||
type: 'BasePromptTemplate',
|
type: 'BasePromptTemplate',
|
||||||
optional: true
|
optional: true
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
label: 'Input Moderation',
|
||||||
|
description: 'Detect text that could generate harmful output and prevent it from being sent to the language model',
|
||||||
|
name: 'inputModeration',
|
||||||
|
type: 'Moderation',
|
||||||
|
optional: true,
|
||||||
|
list: true
|
||||||
|
},
|
||||||
{
|
{
|
||||||
label: 'Model Name',
|
label: 'Model Name',
|
||||||
name: 'modelName',
|
name: 'modelName',
|
||||||
@@ -55,14 +64,38 @@ class OpenAIVisionChain_Chains implements INode {
|
|||||||
{
|
{
|
||||||
label: 'gpt-4-vision-preview',
|
label: 'gpt-4-vision-preview',
|
||||||
name: 'gpt-4-vision-preview'
|
name: 'gpt-4-vision-preview'
|
||||||
},
|
|
||||||
{
|
|
||||||
label: 'whisper-1',
|
|
||||||
name: 'whisper-1'
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
default: 'gpt-4-vision-preview'
|
default: 'gpt-4-vision-preview'
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
label: 'Speech to Text',
|
||||||
|
name: 'speechToText',
|
||||||
|
type: 'boolean',
|
||||||
|
optional: true,
|
||||||
|
},
|
||||||
|
// TODO: only show when speechToText is true
|
||||||
|
{
|
||||||
|
label: 'Speech to Text Method',
|
||||||
|
description: 'How to turn audio into text',
|
||||||
|
name: 'speechToTextMode',
|
||||||
|
type: 'options',
|
||||||
|
options: [
|
||||||
|
{
|
||||||
|
label: 'Transcriptions',
|
||||||
|
name: 'transcriptions',
|
||||||
|
description: 'Transcribe audio into whatever language the audio is in. Default method when Speech to Text is turned on.'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: 'Translations',
|
||||||
|
name: 'translations',
|
||||||
|
description: 'Translate and transcribe the audio into english.'
|
||||||
|
}
|
||||||
|
],
|
||||||
|
optional: false,
|
||||||
|
default: 'transcriptions',
|
||||||
|
additionalParams: true
|
||||||
|
},
|
||||||
{
|
{
|
||||||
label: 'Image Resolution',
|
label: 'Image Resolution',
|
||||||
description: 'This parameter controls the resolution in which the model views the image.',
|
description: 'This parameter controls the resolution in which the model views the image.',
|
||||||
@@ -76,6 +109,10 @@ class OpenAIVisionChain_Chains implements INode {
|
|||||||
{
|
{
|
||||||
label: 'High',
|
label: 'High',
|
||||||
name: 'high'
|
name: 'high'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: 'Auto',
|
||||||
|
name: 'auto'
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
default: 'low',
|
default: 'low',
|
||||||
@@ -107,18 +144,11 @@ class OpenAIVisionChain_Chains implements INode {
|
|||||||
optional: true,
|
optional: true,
|
||||||
additionalParams: true
|
additionalParams: true
|
||||||
},
|
},
|
||||||
{
|
|
||||||
label: 'Chain Name',
|
|
||||||
name: 'chainName',
|
|
||||||
type: 'string',
|
|
||||||
placeholder: 'Name Your Chain',
|
|
||||||
optional: true
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
label: 'Accepted Upload Types',
|
label: 'Accepted Upload Types',
|
||||||
name: 'allowedUploadTypes',
|
name: 'allowedUploadTypes',
|
||||||
type: 'string',
|
type: 'string',
|
||||||
default: 'image/gif;image/jpeg;image/png;image/webp',
|
default: 'image/gif;image/jpeg;image/png;image/webp;audio/mpeg;audio/x-wav;audio/mp4',
|
||||||
hidden: true
|
hidden: true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -154,19 +184,23 @@ class OpenAIVisionChain_Chains implements INode {
|
|||||||
const modelName = nodeData.inputs?.modelName as string
|
const modelName = nodeData.inputs?.modelName as string
|
||||||
const maxTokens = nodeData.inputs?.maxTokens as string
|
const maxTokens = nodeData.inputs?.maxTokens as string
|
||||||
const topP = nodeData.inputs?.topP as string
|
const topP = nodeData.inputs?.topP as string
|
||||||
const whisperConfig = nodeData.inputs?.audioInput
|
const speechToText = nodeData.inputs?.speechToText as boolean
|
||||||
|
|
||||||
const fields: OpenAIVisionChainInput = {
|
|
||||||
|
const fields: OpenAIMultiModalChainInput = {
|
||||||
openAIApiKey: openAIApiKey,
|
openAIApiKey: openAIApiKey,
|
||||||
imageResolution: imageResolution,
|
imageResolution: imageResolution,
|
||||||
verbose: process.env.DEBUG === 'true',
|
verbose: process.env.DEBUG === 'true',
|
||||||
imageUrls: options.uploads,
|
uploads: options.uploads,
|
||||||
modelName: modelName
|
modelName: modelName
|
||||||
}
|
}
|
||||||
if (temperature) fields.temperature = parseFloat(temperature)
|
if (temperature) fields.temperature = parseFloat(temperature)
|
||||||
if (maxTokens) fields.maxTokens = parseInt(maxTokens, 10)
|
if (maxTokens) fields.maxTokens = parseInt(maxTokens, 10)
|
||||||
if (topP) fields.topP = parseFloat(topP)
|
if (topP) fields.topP = parseFloat(topP)
|
||||||
if (whisperConfig) fields.whisperConfig = whisperConfig
|
if (speechToText) {
|
||||||
|
const speechToTextMode = nodeData.inputs?.speechToTextMode ?? 'transcriptions'
|
||||||
|
if (speechToTextMode) fields.speechToTextMode = speechToTextMode
|
||||||
|
}
|
||||||
|
|
||||||
if (output === this.name) {
|
if (output === this.name) {
|
||||||
const chain = new VLLMChain({
|
const chain = new VLLMChain({
|
||||||
@@ -221,6 +255,17 @@ const runPrediction = async (
|
|||||||
const isStreaming = options.socketIO && options.socketIOClientId
|
const isStreaming = options.socketIO && options.socketIOClientId
|
||||||
const socketIO = isStreaming ? options.socketIO : undefined
|
const socketIO = isStreaming ? options.socketIO : undefined
|
||||||
const socketIOClientId = isStreaming ? options.socketIOClientId : ''
|
const socketIOClientId = isStreaming ? options.socketIOClientId : ''
|
||||||
|
const moderations = nodeData.inputs?.inputModeration as Moderation[]
|
||||||
|
if (moderations && moderations.length > 0) {
|
||||||
|
try {
|
||||||
|
// Use the output of the moderation chain as input for the LLM chain
|
||||||
|
input = await checkInputs(moderations, input)
|
||||||
|
} catch (e) {
|
||||||
|
await new Promise((resolve) => setTimeout(resolve, 500))
|
||||||
|
streamResponse(isStreaming, e.message, socketIO, socketIOClientId)
|
||||||
|
return formatResponse(e.message)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Apply string transformation to reverse converted special chars:
|
* Apply string transformation to reverse converted special chars:
|
||||||
@@ -229,7 +274,7 @@ const runPrediction = async (
|
|||||||
*/
|
*/
|
||||||
const promptValues = handleEscapeCharacters(promptValuesRaw, true)
|
const promptValues = handleEscapeCharacters(promptValuesRaw, true)
|
||||||
if (options?.uploads) {
|
if (options?.uploads) {
|
||||||
chain.imageUrls = options.uploads
|
chain.uploads = options.uploads
|
||||||
}
|
}
|
||||||
if (promptValues && inputVariables.length > 0) {
|
if (promptValues && inputVariables.length > 0) {
|
||||||
let seen: string[] = []
|
let seen: string[] = []
|
||||||
@@ -285,4 +330,4 @@ const runPrediction = async (
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = { nodeClass: OpenAIVisionChain_Chains }
|
module.exports = { nodeClass: OpenAIMultiModalChain_Chains }
|
||||||
+37
-34
@@ -1,27 +1,30 @@
|
|||||||
import { OpenAI as OpenAIClient, ClientOptions } from 'openai'
|
import { OpenAI as OpenAIClient, ClientOptions, OpenAI } from 'openai'
|
||||||
import { BaseChain, ChainInputs } from 'langchain/chains'
|
import { BaseChain, ChainInputs } from 'langchain/chains'
|
||||||
import { ChainValues } from 'langchain/schema'
|
import { ChainValues } from 'langchain/schema'
|
||||||
import { BasePromptTemplate, ChatPromptTemplate, SystemMessagePromptTemplate } from 'langchain/prompts'
|
import { BasePromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate } from 'langchain/prompts'
|
||||||
import path from 'path'
|
import path from 'path'
|
||||||
import { getUserHome } from '../../../src/utils'
|
import { getUserHome } from '../../../src/utils'
|
||||||
import fs from 'fs'
|
import fs from 'fs'
|
||||||
|
import { ChatCompletionContentPart, ChatCompletionMessageParam } from 'openai/src/resources/chat/completions'
|
||||||
|
import ChatCompletionCreateParamsNonStreaming = OpenAI.ChatCompletionCreateParamsNonStreaming
|
||||||
|
import { IFileUpload } from '../../../src'
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Interface for the input parameters of the OpenAIVisionChain class.
|
* Interface for the input parameters of the OpenAIVisionChain class.
|
||||||
*/
|
*/
|
||||||
export interface OpenAIVisionChainInput extends ChainInputs {
|
export interface OpenAIMultiModalChainInput extends ChainInputs {
|
||||||
openAIApiKey?: string
|
openAIApiKey?: string
|
||||||
openAIOrganization?: string
|
openAIOrganization?: string
|
||||||
throwError?: boolean
|
throwError?: boolean
|
||||||
prompt?: BasePromptTemplate
|
prompt?: BasePromptTemplate
|
||||||
configuration?: ClientOptions
|
configuration?: ClientOptions
|
||||||
imageUrls?: []
|
uploads?: IFileUpload[]
|
||||||
imageResolution?: string
|
imageResolution?: 'auto' | 'low' | 'high'
|
||||||
temperature?: number
|
temperature?: number
|
||||||
modelName?: string
|
modelName?: string
|
||||||
maxTokens?: number
|
maxTokens?: number
|
||||||
topP?: number
|
topP?: number
|
||||||
whisperConfig?: any
|
speechToTextMode?: string
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -29,7 +32,7 @@ export interface OpenAIVisionChainInput extends ChainInputs {
|
|||||||
* Vision API. It extends the BaseChain class and implements the
|
* Vision API. It extends the BaseChain class and implements the
|
||||||
* OpenAIVisionChainInput interface.
|
* OpenAIVisionChainInput interface.
|
||||||
*/
|
*/
|
||||||
export class VLLMChain extends BaseChain implements OpenAIVisionChainInput {
|
export class VLLMChain extends BaseChain implements OpenAIMultiModalChainInput {
|
||||||
static lc_name() {
|
static lc_name() {
|
||||||
return 'VLLMChain'
|
return 'VLLMChain'
|
||||||
}
|
}
|
||||||
@@ -37,8 +40,8 @@ export class VLLMChain extends BaseChain implements OpenAIVisionChainInput {
|
|||||||
|
|
||||||
inputKey = 'input'
|
inputKey = 'input'
|
||||||
outputKey = 'text'
|
outputKey = 'text'
|
||||||
imageUrls?: []
|
uploads?: IFileUpload[]
|
||||||
imageResolution: string = 'low'
|
imageResolution: 'auto' | 'low' | 'high'
|
||||||
openAIApiKey?: string
|
openAIApiKey?: string
|
||||||
openAIOrganization?: string
|
openAIOrganization?: string
|
||||||
clientConfig: ClientOptions
|
clientConfig: ClientOptions
|
||||||
@@ -49,9 +52,9 @@ export class VLLMChain extends BaseChain implements OpenAIVisionChainInput {
|
|||||||
maxTokens?: number
|
maxTokens?: number
|
||||||
topP?: number
|
topP?: number
|
||||||
|
|
||||||
whisperConfig?: any
|
speechToTextMode?: any
|
||||||
|
|
||||||
constructor(fields: OpenAIVisionChainInput) {
|
constructor(fields: OpenAIMultiModalChainInput) {
|
||||||
super(fields)
|
super(fields)
|
||||||
this.throwError = fields?.throwError ?? false
|
this.throwError = fields?.throwError ?? false
|
||||||
this.imageResolution = fields?.imageResolution ?? 'low'
|
this.imageResolution = fields?.imageResolution ?? 'low'
|
||||||
@@ -61,8 +64,8 @@ export class VLLMChain extends BaseChain implements OpenAIVisionChainInput {
|
|||||||
this.modelName = fields?.modelName
|
this.modelName = fields?.modelName
|
||||||
this.maxTokens = fields?.maxTokens
|
this.maxTokens = fields?.maxTokens
|
||||||
this.topP = fields?.topP
|
this.topP = fields?.topP
|
||||||
this.imageUrls = fields?.imageUrls ?? []
|
this.uploads = fields?.uploads ?? []
|
||||||
this.whisperConfig = fields?.whisperConfig ?? {}
|
this.speechToTextMode = fields?.speechToTextMode ?? {}
|
||||||
if (!this.openAIApiKey) {
|
if (!this.openAIApiKey) {
|
||||||
throw new Error('OpenAI API key not found')
|
throw new Error('OpenAI API key not found')
|
||||||
}
|
}
|
||||||
@@ -81,8 +84,8 @@ export class VLLMChain extends BaseChain implements OpenAIVisionChainInput {
|
|||||||
async _call(values: ChainValues): Promise<ChainValues> {
|
async _call(values: ChainValues): Promise<ChainValues> {
|
||||||
const userInput = values[this.inputKey]
|
const userInput = values[this.inputKey]
|
||||||
|
|
||||||
const vRequest: any = {
|
const vRequest: ChatCompletionCreateParamsNonStreaming = {
|
||||||
model: this.modelName,
|
model: 'gpt-4-vision-preview',
|
||||||
temperature: this.temperature,
|
temperature: this.temperature,
|
||||||
top_p: this.topP,
|
top_p: this.topP,
|
||||||
messages: []
|
messages: []
|
||||||
@@ -90,42 +93,42 @@ export class VLLMChain extends BaseChain implements OpenAIVisionChainInput {
|
|||||||
if (this.maxTokens) vRequest.max_tokens = this.maxTokens
|
if (this.maxTokens) vRequest.max_tokens = this.maxTokens
|
||||||
else vRequest.max_tokens = 1024
|
else vRequest.max_tokens = 1024
|
||||||
|
|
||||||
const userRole: any = { role: 'user' }
|
const chatMessages: ChatCompletionContentPart[] = []
|
||||||
userRole.content = []
|
const userRole: ChatCompletionMessageParam = { role: 'user', content: [] }
|
||||||
userRole.content.push({
|
chatMessages.push({
|
||||||
type: 'text',
|
type: 'text',
|
||||||
text: userInput
|
text: userInput
|
||||||
})
|
})
|
||||||
if (this.whisperConfig && this.imageUrls && this.imageUrls.length > 0) {
|
if (this.speechToTextMode && this.uploads && this.uploads.length > 0) {
|
||||||
const audioUploads = this.getAudioUploads(this.imageUrls)
|
const audioUploads = this.getAudioUploads(this.uploads)
|
||||||
for (const url of audioUploads) {
|
for (const url of audioUploads) {
|
||||||
const filePath = path.join(getUserHome(), '.flowise', 'gptvision', url.data, url.name)
|
const filePath = path.join(getUserHome(), '.flowise', 'gptvision', url.data, url.name)
|
||||||
|
|
||||||
// as the image is stored in the server, read the file and convert it to base64
|
// as the image is stored in the server, read the file and convert it to base64
|
||||||
const audio_file = fs.createReadStream(filePath)
|
const audio_file = fs.createReadStream(filePath)
|
||||||
if (this.whisperConfig.purpose === 'transcription') {
|
if (this.speechToTextMode.purpose === 'transcriptions') {
|
||||||
const transcription = await this.client.audio.transcriptions.create({
|
const transcription = await this.client.audio.transcriptions.create({
|
||||||
file: audio_file,
|
file: audio_file,
|
||||||
model: 'whisper-1'
|
model: 'whisper-1'
|
||||||
})
|
})
|
||||||
userRole.content.push({
|
chatMessages.push({
|
||||||
type: 'text',
|
type: 'text',
|
||||||
text: transcription.text
|
text: transcription.text
|
||||||
})
|
})
|
||||||
} else if (this.whisperConfig.purpose === 'translation') {
|
} else if (this.speechToTextMode.purpose === 'translations') {
|
||||||
const translation = await this.client.audio.translations.create({
|
const translation = await this.client.audio.translations.create({
|
||||||
file: audio_file,
|
file: audio_file,
|
||||||
model: 'whisper-1'
|
model: 'whisper-1'
|
||||||
})
|
})
|
||||||
userRole.content.push({
|
chatMessages.push({
|
||||||
type: 'text',
|
type: 'text',
|
||||||
text: translation.text
|
text: translation.text
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (this.imageUrls && this.imageUrls.length > 0) {
|
if (this.uploads && this.uploads.length > 0) {
|
||||||
const imageUploads = this.getImageUploads(this.imageUrls)
|
const imageUploads = this.getImageUploads(this.uploads)
|
||||||
for (const url of imageUploads) {
|
for (const url of imageUploads) {
|
||||||
let bf = url.data
|
let bf = url.data
|
||||||
if (url.type == 'stored-file') {
|
if (url.type == 'stored-file') {
|
||||||
@@ -135,7 +138,7 @@ export class VLLMChain extends BaseChain implements OpenAIVisionChainInput {
|
|||||||
const contents = fs.readFileSync(filePath)
|
const contents = fs.readFileSync(filePath)
|
||||||
bf = 'data:' + url.mime + ';base64,' + contents.toString('base64')
|
bf = 'data:' + url.mime + ';base64,' + contents.toString('base64')
|
||||||
}
|
}
|
||||||
userRole.content.push({
|
chatMessages.push({
|
||||||
type: 'image_url',
|
type: 'image_url',
|
||||||
image_url: {
|
image_url: {
|
||||||
url: bf,
|
url: bf,
|
||||||
@@ -144,6 +147,7 @@ export class VLLMChain extends BaseChain implements OpenAIVisionChainInput {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
userRole.content = chatMessages
|
||||||
vRequest.messages.push(userRole)
|
vRequest.messages.push(userRole)
|
||||||
if (this.prompt && this.prompt instanceof ChatPromptTemplate) {
|
if (this.prompt && this.prompt instanceof ChatPromptTemplate) {
|
||||||
let chatPrompt = this.prompt as ChatPromptTemplate
|
let chatPrompt = this.prompt as ChatPromptTemplate
|
||||||
@@ -151,12 +155,12 @@ export class VLLMChain extends BaseChain implements OpenAIVisionChainInput {
|
|||||||
if (message instanceof SystemMessagePromptTemplate) {
|
if (message instanceof SystemMessagePromptTemplate) {
|
||||||
vRequest.messages.push({
|
vRequest.messages.push({
|
||||||
role: 'system',
|
role: 'system',
|
||||||
content: [
|
content: (message.prompt as any).template
|
||||||
{
|
})
|
||||||
type: 'text',
|
} else if (message instanceof HumanMessagePromptTemplate) {
|
||||||
text: (message.prompt as any).template
|
vRequest.messages.push({
|
||||||
}
|
role: 'user',
|
||||||
]
|
content: (message.prompt as any).template
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
@@ -164,7 +168,6 @@ export class VLLMChain extends BaseChain implements OpenAIVisionChainInput {
|
|||||||
|
|
||||||
let response
|
let response
|
||||||
try {
|
try {
|
||||||
// @ts-ignore
|
|
||||||
response = await this.client.chat.completions.create(vRequest)
|
response = await this.client.chat.completions.create(vRequest)
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
if (error instanceof Error) {
|
if (error instanceof Error) {
|
||||||
|
Before Width: | Height: | Size: 489 B After Width: | Height: | Size: 489 B |
@@ -1,66 +0,0 @@
|
|||||||
import { INode, INodeData, INodeParams } from '../../../src'
|
|
||||||
|
|
||||||
class OpenAIAudioWhisper implements INode {
|
|
||||||
label: string
|
|
||||||
name: string
|
|
||||||
version: number
|
|
||||||
description: string
|
|
||||||
type: string
|
|
||||||
icon: string
|
|
||||||
badge: string
|
|
||||||
category: string
|
|
||||||
baseClasses: string[]
|
|
||||||
inputs: INodeParams[]
|
|
||||||
|
|
||||||
constructor() {
|
|
||||||
this.label = 'Open AI Whisper'
|
|
||||||
this.name = 'openAIAudioWhisper'
|
|
||||||
this.version = 1.0
|
|
||||||
this.type = 'OpenAIWhisper'
|
|
||||||
this.description = 'Speech to text using OpenAI Whisper API'
|
|
||||||
this.icon = 'audio.svg'
|
|
||||||
this.badge = 'BETA'
|
|
||||||
this.category = 'MultiModal'
|
|
||||||
this.baseClasses = [this.type]
|
|
||||||
this.inputs = [
|
|
||||||
{
|
|
||||||
label: 'Purpose',
|
|
||||||
name: 'purpose',
|
|
||||||
type: 'options',
|
|
||||||
options: [
|
|
||||||
{
|
|
||||||
label: 'Transcription',
|
|
||||||
name: 'transcription'
|
|
||||||
},
|
|
||||||
{
|
|
||||||
label: 'Translation',
|
|
||||||
name: 'translation'
|
|
||||||
}
|
|
||||||
],
|
|
||||||
default: 'transcription'
|
|
||||||
},
|
|
||||||
{
|
|
||||||
label: 'Accepted Upload Types',
|
|
||||||
name: 'allowedUploadTypes',
|
|
||||||
type: 'string',
|
|
||||||
default: 'audio/mpeg;audio/x-wav;audio/mp4',
|
|
||||||
hidden: true
|
|
||||||
},
|
|
||||||
{
|
|
||||||
label: 'Maximum Upload Size (MB)',
|
|
||||||
name: 'maxUploadSize',
|
|
||||||
type: 'number',
|
|
||||||
default: '5',
|
|
||||||
hidden: true
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
async init(nodeData: INodeData): Promise<any> {
|
|
||||||
const purpose = nodeData.inputs?.purpose as string
|
|
||||||
|
|
||||||
return { purpose }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
module.exports = { nodeClass: OpenAIAudioWhisper }
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" id="mdi-text-to-speech" width="24" height="24" viewBox="0 0 24 24"><path d="M8,7A2,2 0 0,1 10,9V14A2,2 0 0,1 8,16A2,2 0 0,1 6,14V9A2,2 0 0,1 8,7M14,14C14,16.97 11.84,19.44 9,19.92V22H7V19.92C4.16,19.44 2,16.97 2,14H4A4,4 0 0,0 8,18A4,4 0 0,0 12,14H14M21.41,9.41L17.17,13.66L18.18,10H14A2,2 0 0,1 12,8V4A2,2 0 0,1 14,2H20A2,2 0 0,1 22,4V8C22,8.55 21.78,9.05 21.41,9.41Z" /></svg>
|
|
||||||
|
Before Width: | Height: | Size: 611 B |
Binary file not shown.
|
Before Width: | Height: | Size: 4.9 KiB |
@@ -234,3 +234,10 @@ export abstract class FlowiseSummaryMemory extends ConversationSummaryMemory imp
|
|||||||
abstract addChatMessages(msgArray: { text: string; type: MessageType }[], overrideSessionId?: string): Promise<void>
|
abstract addChatMessages(msgArray: { text: string; type: MessageType }[], overrideSessionId?: string): Promise<void>
|
||||||
abstract clearChatMessages(overrideSessionId?: string): Promise<void>
|
abstract clearChatMessages(overrideSessionId?: string): Promise<void>
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface IFileUpload {
|
||||||
|
data: string
|
||||||
|
type: string
|
||||||
|
name: string
|
||||||
|
mime: string
|
||||||
|
}
|
||||||
@@ -1695,9 +1695,7 @@ export class App {
|
|||||||
if (!endingNodeData) return res.status(500).send(`Ending node ${endingNode.id} data not found`)
|
if (!endingNodeData) return res.status(500).send(`Ending node ${endingNode.id} data not found`)
|
||||||
|
|
||||||
if (endingNodeData && endingNodeData.category !== 'Chains' && endingNodeData.category !== 'Agents') {
|
if (endingNodeData && endingNodeData.category !== 'Chains' && endingNodeData.category !== 'Agents') {
|
||||||
if (endingNodeData.type !== 'OpenAIMultiModalChain') {
|
return res.status(500).send(`Ending node must be either a Chain or Agent`)
|
||||||
return res.status(500).send(`Ending node must be either a Chain or Agent`)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (
|
if (
|
||||||
|
|||||||
Reference in New Issue
Block a user