Feature/externalize files from chatflow - do not save as base64 (#1976)

* initial commit. Externalizing the file base64 string from flowData

* csv - docloader - Externalizing the file base64 string from flowData

* csv - docloader - Externalizing the file base64 string from flowData

* DocX - docloader - Externalizing the file base64 string from flowData

* Json - docloader - Externalizing the file base64 string from flowData

* Jsonlines - docloader - Externalizing the file base64 string from flowData

* PDF - docloader - Externalizing the file base64 string from flowData

* Vectara - vector store - Externalizing the file base64 string from flowData

* OpenAPIToolkit - tools - Externalizing the file base64 string from flowData

* OpenAPIChain - chain - Externalizing the file base64 string from flowData

* lint fixes

* datasource enabled - initial commit

* CSVAgent - agents - Externalizing the file base64 string from flowData

* Externalizing the file base64 string from flowData

* Externalizing the file base64 string from flowData

* add pnpm-lock.yaml

* update filerepository to add try catch

* Rename FileRepository.ts to fileRepository.ts

---------

Co-authored-by: Henry <hzj94@hotmail.com>
Co-authored-by: Henry Heng <henryheng@flowiseai.com>
This commit is contained in:
Vinod Kiran
2024-04-04 21:41:06 +05:30
committed by GitHub
parent eed7de6df5
commit 658fa3984e
16 changed files with 593 additions and 194 deletions
+113
View File
@@ -0,0 +1,113 @@
import { ChatFlow } from '../database/entities/ChatFlow'
import path from 'path'
import { getStoragePath } from 'flowise-components'
import fs from 'fs'
import { IReactFlowObject } from '../Interface'
export const containsBase64File = (chatflow: ChatFlow) => {
const parsedFlowData: IReactFlowObject = JSON.parse(chatflow.flowData)
const re = new RegExp('^data.*;base64', 'i')
let found = false
const nodes = parsedFlowData.nodes
for (const node of nodes) {
if (node.data.category !== 'Document Loaders') {
continue
}
const inputs = node.data.inputs
if (inputs) {
const keys = Object.getOwnPropertyNames(inputs)
for (let i = 0; i < keys.length; i++) {
const input = inputs[keys[i]]
if (!input) {
continue
}
if (typeof input !== 'string') {
continue
}
if (input.startsWith('[')) {
try {
const files = JSON.parse(input)
for (let j = 0; j < files.length; j++) {
const file = files[j]
if (re.test(file)) {
found = true
break
}
}
} catch (e) {
continue
}
}
if (re.test(input)) {
found = true
break
}
}
}
}
return found
}
function addFileToStorage(file: string, chatflowid: string, fileNames: string[]) {
const dir = path.join(getStoragePath(), chatflowid)
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir, { recursive: true })
}
const splitDataURI = file.split(',')
const filename = splitDataURI.pop()?.split(':')[1] ?? ''
const bf = Buffer.from(splitDataURI.pop() || '', 'base64')
const filePath = path.join(dir, filename)
fs.writeFileSync(filePath, bf)
fileNames.push(filename)
return 'FILE-STORAGE::' + JSON.stringify(fileNames)
}
export const updateFlowDataWithFilePaths = (chatflowid: string, flowData: string) => {
try {
const parsedFlowData: IReactFlowObject = JSON.parse(flowData)
const re = new RegExp('^data.*;base64', 'i')
const nodes = parsedFlowData.nodes
for (let j = 0; j < nodes.length; j++) {
const node = nodes[j]
if (node.data.category !== 'Document Loaders') {
continue
}
if (node.data.inputs) {
const inputs = node.data.inputs
const keys = Object.getOwnPropertyNames(inputs)
for (let i = 0; i < keys.length; i++) {
const fileNames: string[] = []
const key = keys[i]
const input = inputs?.[key]
if (!input) {
continue
}
if (typeof input !== 'string') {
continue
}
if (input.startsWith('[')) {
try {
const files = JSON.parse(input)
for (let j = 0; j < files.length; j++) {
const file = files[j]
if (re.test(file)) {
node.data.inputs[key] = addFileToStorage(file, chatflowid, fileNames)
}
}
} catch (e) {
continue
}
} else if (re.test(input)) {
node.data.inputs[key] = addFileToStorage(input, chatflowid, fileNames)
}
}
}
}
return JSON.stringify(parsedFlowData)
} catch (e) {
return ''
}
}