Feature/externalize files from chatflow - do not save as base64 (#1976)

* initial commit. Externalizing the file base64 string from flowData

* csv - docloader - Externalizing the file base64 string from flowData

* csv - docloader - Externalizing the file base64 string from flowData

* DocX - docloader - Externalizing the file base64 string from flowData

* Json - docloader - Externalizing the file base64 string from flowData

* Jsonlines - docloader - Externalizing the file base64 string from flowData

* PDF - docloader - Externalizing the file base64 string from flowData

* Vectara - vector store - Externalizing the file base64 string from flowData

* OpenAPIToolkit - tools - Externalizing the file base64 string from flowData

* OpenAPIChain - chain - Externalizing the file base64 string from flowData

* lint fixes

* datasource enabled - initial commit

* CSVAgent - agents - Externalizing the file base64 string from flowData

* Externalizing the file base64 string from flowData

* Externalizing the file base64 string from flowData

* add pnpm-lock.yaml

* update filerepository to add try catch

* Rename FileRepository.ts to fileRepository.ts

---------

Co-authored-by: Henry <hzj94@hotmail.com>
Co-authored-by: Henry Heng <henryheng@flowiseai.com>
This commit is contained in:
Vinod Kiran
2024-04-04 21:41:06 +05:30
committed by GitHub
parent eed7de6df5
commit 658fa3984e
16 changed files with 593 additions and 194 deletions
@@ -17,6 +17,7 @@ import { utilGetUploadsConfig } from '../../utils/getUploadsConfig'
import { ChatMessage } from '../../database/entities/ChatMessage'
import { ChatMessageFeedback } from '../../database/entities/ChatMessageFeedback'
import { UpsertHistory } from '../../database/entities/UpsertHistory'
import { containsBase64File, updateFlowDataWithFilePaths } from '../../utils/fileRepository'
// Check if chatflow valid for streaming
const checkIfChatflowIsValidForStreaming = async (chatflowId: string): Promise<any> => {
@@ -184,8 +185,24 @@ const getChatflowById = async (chatflowId: string): Promise<any> => {
const saveChatflow = async (newChatFlow: ChatFlow): Promise<any> => {
try {
const appServer = getRunningExpressApp()
const newDbChatflow = await appServer.AppDataSource.getRepository(ChatFlow).create(newChatFlow)
const dbResponse = await appServer.AppDataSource.getRepository(ChatFlow).save(newDbChatflow)
let dbResponse: ChatFlow
if (containsBase64File(newChatFlow)) {
// we need a 2-step process, as we need to save the chatflow first and then update the file paths
// this is because we need the chatflow id to create the file paths
// step 1 - save with empty flowData
const incomingFlowData = newChatFlow.flowData
newChatFlow.flowData = JSON.stringify({})
const chatflow = appServer.AppDataSource.getRepository(ChatFlow).create(newChatFlow)
const step1Results = await appServer.AppDataSource.getRepository(ChatFlow).save(chatflow)
// step 2 - convert base64 to file paths and update the chatflow
step1Results.flowData = updateFlowDataWithFilePaths(step1Results.id, incomingFlowData)
dbResponse = await appServer.AppDataSource.getRepository(ChatFlow).save(step1Results)
} else {
const chatflow = appServer.AppDataSource.getRepository(ChatFlow).create(newChatFlow)
dbResponse = await appServer.AppDataSource.getRepository(ChatFlow).save(chatflow)
}
await appServer.telemetry.sendTelemetry('chatflow_created', {
version: await getAppVersion(),
chatflowId: dbResponse.id,
@@ -200,6 +217,9 @@ const saveChatflow = async (newChatFlow: ChatFlow): Promise<any> => {
const updateChatflow = async (chatflow: ChatFlow, updateChatFlow: ChatFlow): Promise<any> => {
try {
const appServer = getRunningExpressApp()
if (containsBase64File(updateChatFlow)) {
updateChatFlow.flowData = updateFlowDataWithFilePaths(chatflow.id, updateChatFlow.flowData)
}
const newDbChatflow = await appServer.AppDataSource.getRepository(ChatFlow).merge(chatflow, updateChatFlow)
const dbResponse = await appServer.AppDataSource.getRepository(ChatFlow).save(newDbChatflow)
// chatFlowPool is initialized only when a flow is opened
+113
View File
@@ -0,0 +1,113 @@
import { ChatFlow } from '../database/entities/ChatFlow'
import path from 'path'
import { getStoragePath } from 'flowise-components'
import fs from 'fs'
import { IReactFlowObject } from '../Interface'
export const containsBase64File = (chatflow: ChatFlow) => {
const parsedFlowData: IReactFlowObject = JSON.parse(chatflow.flowData)
const re = new RegExp('^data.*;base64', 'i')
let found = false
const nodes = parsedFlowData.nodes
for (const node of nodes) {
if (node.data.category !== 'Document Loaders') {
continue
}
const inputs = node.data.inputs
if (inputs) {
const keys = Object.getOwnPropertyNames(inputs)
for (let i = 0; i < keys.length; i++) {
const input = inputs[keys[i]]
if (!input) {
continue
}
if (typeof input !== 'string') {
continue
}
if (input.startsWith('[')) {
try {
const files = JSON.parse(input)
for (let j = 0; j < files.length; j++) {
const file = files[j]
if (re.test(file)) {
found = true
break
}
}
} catch (e) {
continue
}
}
if (re.test(input)) {
found = true
break
}
}
}
}
return found
}
function addFileToStorage(file: string, chatflowid: string, fileNames: string[]) {
const dir = path.join(getStoragePath(), chatflowid)
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir, { recursive: true })
}
const splitDataURI = file.split(',')
const filename = splitDataURI.pop()?.split(':')[1] ?? ''
const bf = Buffer.from(splitDataURI.pop() || '', 'base64')
const filePath = path.join(dir, filename)
fs.writeFileSync(filePath, bf)
fileNames.push(filename)
return 'FILE-STORAGE::' + JSON.stringify(fileNames)
}
export const updateFlowDataWithFilePaths = (chatflowid: string, flowData: string) => {
try {
const parsedFlowData: IReactFlowObject = JSON.parse(flowData)
const re = new RegExp('^data.*;base64', 'i')
const nodes = parsedFlowData.nodes
for (let j = 0; j < nodes.length; j++) {
const node = nodes[j]
if (node.data.category !== 'Document Loaders') {
continue
}
if (node.data.inputs) {
const inputs = node.data.inputs
const keys = Object.getOwnPropertyNames(inputs)
for (let i = 0; i < keys.length; i++) {
const fileNames: string[] = []
const key = keys[i]
const input = inputs?.[key]
if (!input) {
continue
}
if (typeof input !== 'string') {
continue
}
if (input.startsWith('[')) {
try {
const files = JSON.parse(input)
for (let j = 0; j < files.length; j++) {
const file = files[j]
if (re.test(file)) {
node.data.inputs[key] = addFileToStorage(file, chatflowid, fileNames)
}
}
} catch (e) {
continue
}
} else if (re.test(input)) {
node.data.inputs[key] = addFileToStorage(input, chatflowid, fileNames)
}
}
}
}
return JSON.stringify(parsedFlowData)
} catch (e) {
return ''
}
}
+9
View File
@@ -243,6 +243,15 @@ export const getEndingNodes = (nodeDependencies: INodeDependencies, graph: INode
*/
export const getFileName = (fileBase64: string): string => {
let fileNames = []
if (fileBase64.startsWith('FILE-STORAGE::')) {
const names = fileBase64.substring(14)
if (names.includes('[') && names.includes(']')) {
const files = JSON.parse(names)
return files.join(', ')
} else {
return fileBase64.substring(14)
}
}
if (fileBase64.startsWith('[') && fileBase64.endsWith(']')) {
const files = JSON.parse(fileBase64)
for (const file of files) {