Feature/Multer to s3 (#2408)

* add ability to store files from multer to s3

* add check to bypass doc loader
This commit is contained in:
Henry Heng
2024-05-15 19:41:37 +01:00
committed by GitHub
parent c022972cf8
commit b5e502f3b6
6 changed files with 109 additions and 31 deletions
@@ -2,7 +2,7 @@ import { getRunningExpressApp } from '../../utils/getRunningExpressApp'
import { DocumentStore } from '../../database/entities/DocumentStore'
// @ts-ignore
import {
addFileToStorage,
addSingleFileToStorage,
getFileFromStorage,
ICommonObject,
IDocument,
@@ -343,7 +343,7 @@ const _saveFileToStorage = async (fileBase64: string, entity: DocumentStore) =>
if (mimePrefix) {
mime = mimePrefix.split(';')[0].split(':')[1]
}
await addFileToStorage(mime, bf, filename, DOCUMENT_STORE_BASE_FOLDER, entity.id)
await addSingleFileToStorage(mime, bf, filename, DOCUMENT_STORE_BASE_FOLDER, entity.id)
return {
id: uuidv4(),
name: filename,
+11 -10
View File
@@ -1,5 +1,5 @@
import { Request } from 'express'
import { IFileUpload, convertSpeechToText, ICommonObject, addFileToStorage } from 'flowise-components'
import { IFileUpload, convertSpeechToText, ICommonObject, addSingleFileToStorage, addArrayFilesToStorage } from 'flowise-components'
import { StatusCodes } from 'http-status-codes'
import { IncomingInput, IMessage, INodeData, IReactFlowObject, IReactFlowNode, IDepthQueue, chatType, IChatMessage } from '../Interface'
import { InternalFlowiseError } from '../errors/internalFlowiseError'
@@ -71,7 +71,7 @@ export const utilBuildChatflow = async (req: Request, socketIO?: Server, isInter
const splitDataURI = upload.data.split(',')
const bf = Buffer.from(splitDataURI.pop() || '', 'base64')
const mime = splitDataURI[0].split(':')[1].split(';')[0]
await addFileToStorage(mime, bf, filename, chatflowid, chatId)
await addSingleFileToStorage(mime, bf, filename, chatflowid, chatId)
upload.type = 'stored-file'
// Omit upload.data since we don't store the content in database
fileUploads[i] = omit(upload, ['data'])
@@ -111,20 +111,21 @@ export const utilBuildChatflow = async (req: Request, socketIO?: Server, isInter
let isStreamValid = false
const files = (req.files as any[]) || []
const files = (req.files as Express.Multer.File[]) || []
if (files.length) {
const overrideConfig: ICommonObject = { ...req.body }
const fileNames: string[] = []
for (const file of files) {
const fileData = fs.readFileSync(file.path, { encoding: 'base64' })
const dataBase64String = `data:${file.mimetype};base64,${fileData},filename:${file.filename}`
const fileBuffer = fs.readFileSync(file.path)
const storagePath = await addArrayFilesToStorage(file.mimetype, fileBuffer, file.originalname, fileNames, chatflowid)
const fileInputField = mapMimeTypeToInputField(file.mimetype)
if (overrideConfig[fileInputField]) {
overrideConfig[fileInputField] = JSON.stringify([...JSON.parse(overrideConfig[fileInputField]), dataBase64String])
} else {
overrideConfig[fileInputField] = JSON.stringify([dataBase64String])
}
overrideConfig[fileInputField] = storagePath
fs.unlinkSync(file.path)
}
incomingInput = {
question: req.body.question ?? 'hello',
+45 -2
View File
@@ -374,6 +374,44 @@ export const saveUpsertFlowData = (nodeData: INodeData, upsertHistory: Record<st
return existingUpsertFlowData
}
/**
* Check if doc loader should be bypassed, ONLY if doc loader is connected to a vector store
* Reason being we dont want to load the doc loader again whenever we are building the flow, because it was already done during upserting
* TODO: Remove this logic when we remove doc loader nodes from the canvas
* @param {IReactFlowNode} reactFlowNode
* @param {IReactFlowNode[]} reactFlowNodes
* @param {IReactFlowEdge[]} reactFlowEdges
* @returns {boolean}
*/
const checkIfDocLoaderShouldBeIgnored = (
reactFlowNode: IReactFlowNode,
reactFlowNodes: IReactFlowNode[],
reactFlowEdges: IReactFlowEdge[]
): boolean => {
let outputId = ''
if (reactFlowNode.data.outputAnchors.length) {
if (Object.keys(reactFlowNode.data.outputs || {}).length) {
const output = reactFlowNode.data.outputs?.output
const node = reactFlowNode.data.outputAnchors[0].options?.find((anchor) => anchor.name === output)
if (node) outputId = (node as ICommonObject).id
} else {
outputId = (reactFlowNode.data.outputAnchors[0] as ICommonObject).id
}
}
const targetNodeId = reactFlowEdges.find((edge) => edge.sourceHandle === outputId)?.target
if (targetNodeId) {
const targetNodeCategory = reactFlowNodes.find((nd) => nd.id === targetNodeId)?.data.category || ''
if (targetNodeCategory === 'Vector Stores') {
return true
}
}
return false
}
/**
* Build langchain from start to end
* @param {string[]} startingNodeIds
@@ -446,7 +484,6 @@ export const buildFlow = async (
const reactFlowNodeData: INodeData = resolveVariables(flowNodeData, flowNodes, question, chatHistory)
// TODO: Avoid processing Text Splitter + Doc Loader once Upsert & Load Existing Vector Nodes are deprecated
if (isUpsert && stopNodeId && nodeId === stopNodeId) {
logger.debug(`[server]: Upserting ${reactFlowNode.data.label} (${reactFlowNode.data.id})`)
const indexResult = await newNodeInstance.vectorStoreMethods!['upsert']!.call(newNodeInstance, reactFlowNodeData, {
@@ -464,6 +501,12 @@ export const buildFlow = async (
if (indexResult) upsertHistory['result'] = indexResult
logger.debug(`[server]: Finished upserting ${reactFlowNode.data.label} (${reactFlowNode.data.id})`)
break
} else if (
!isUpsert &&
reactFlowNode.data.category === 'Document Loaders' &&
checkIfDocLoaderShouldBeIgnored(reactFlowNode, reactFlowNodes, reactFlowEdges)
) {
initializedNodes.add(nodeId)
} else {
logger.debug(`[server]: Initializing ${reactFlowNode.data.label} (${reactFlowNode.data.id})`)
let outputResult = await newNodeInstance.init(reactFlowNodeData, question, {
@@ -935,7 +978,7 @@ export const mapMimeTypeToInputField = (mimeType: string) => {
case 'text/yaml':
return 'yamlFile'
default:
return ''
return 'txtFile'
}
}
+10 -9
View File
@@ -1,7 +1,7 @@
import { Request } from 'express'
import * as fs from 'fs'
import { cloneDeep, omit } from 'lodash'
import { ICommonObject, IMessage } from 'flowise-components'
import { ICommonObject, IMessage, addArrayFilesToStorage } from 'flowise-components'
import telemetryService from '../services/telemetry'
import logger from '../utils/logger'
import {
@@ -48,20 +48,21 @@ export const upsertVector = async (req: Request, isInternal: boolean = false) =>
}
}
const files = (req.files as any[]) || []
const files = (req.files as Express.Multer.File[]) || []
if (files.length) {
const overrideConfig: ICommonObject = { ...req.body }
const fileNames: string[] = []
for (const file of files) {
const fileData = fs.readFileSync(file.path, { encoding: 'base64' })
const dataBase64String = `data:${file.mimetype};base64,${fileData},filename:${file.filename}`
const fileBuffer = fs.readFileSync(file.path)
const storagePath = await addArrayFilesToStorage(file.mimetype, fileBuffer, file.originalname, fileNames, chatflowid)
const fileInputField = mapMimeTypeToInputField(file.mimetype)
if (overrideConfig[fileInputField]) {
overrideConfig[fileInputField] = JSON.stringify([...JSON.parse(overrideConfig[fileInputField]), dataBase64String])
} else {
overrideConfig[fileInputField] = JSON.stringify([dataBase64String])
}
overrideConfig[fileInputField] = storagePath
fs.unlinkSync(file.path)
}
incomingInput = {
question: req.body.question ?? 'hello',