Feature/Indexing (#1802)

* indexing

* fix for multiple files upsert

* fix default Postgres port

* fix SQLite node description

* add MySQLRecordManager node

* fix MySQL unique index

* add upsert history

* update jsx ui

* lint-fix

* update dialog details

* update llamaindex pinecone

---------

Co-authored-by: chungyau97 <chungyau97@gmail.com>
This commit is contained in:
Henry Heng
2024-04-02 23:47:19 +01:00
committed by GitHub
parent 957694a912
commit e422ce287b
67 changed files with 3006 additions and 246 deletions
@@ -36,7 +36,6 @@ import { utilAddChatMessage } from './addChatMesage'
* @param {Request} req
* @param {Server} socketIO
* @param {boolean} isInternal
* @param {boolean} isUpsert
*/
export const utilBuildChatflow = async (req: Request, socketIO?: Server, isInternal: boolean = false): Promise<any> => {
try {
+86 -2
View File
@@ -237,6 +237,84 @@ export const getEndingNodes = (nodeDependencies: INodeDependencies, graph: INode
return endingNodeIds
}
/**
* Get file name from base64 string
* @param {string} fileBase64
*/
export const getFileName = (fileBase64: string): string => {
let fileNames = []
if (fileBase64.startsWith('[') && fileBase64.endsWith(']')) {
const files = JSON.parse(fileBase64)
for (const file of files) {
const splitDataURI = file.split(',')
const filename = splitDataURI[splitDataURI.length - 1].split(':')[1]
fileNames.push(filename)
}
return fileNames.join(', ')
} else {
const splitDataURI = fileBase64.split(',')
const filename = splitDataURI[splitDataURI.length - 1].split(':')[1]
return filename
}
}
/**
* Save upsert flowData
* @param {INodeData} nodeData
* @param {Record<string, any>} upsertHistory
*/
export const saveUpsertFlowData = (nodeData: INodeData, upsertHistory: Record<string, any>): ICommonObject[] => {
const existingUpsertFlowData = upsertHistory['flowData'] ?? []
const paramValues: ICommonObject[] = []
for (const input in nodeData.inputs) {
const inputParam = nodeData.inputParams.find((inp) => inp.name === input)
if (!inputParam) continue
let paramValue: ICommonObject = {}
if (!nodeData.inputs[input]) {
continue
}
if (
typeof nodeData.inputs[input] === 'string' &&
nodeData.inputs[input].startsWith('{{') &&
nodeData.inputs[input].endsWith('}}')
) {
continue
}
// Get file name instead of the base64 string
if (nodeData.category === 'Document Loaders' && nodeData.inputParams.find((inp) => inp.name === input)?.type === 'file') {
paramValue = {
label: inputParam?.label,
name: inputParam?.name,
type: inputParam?.type,
value: getFileName(nodeData.inputs[input])
}
paramValues.push(paramValue)
continue
}
paramValue = {
label: inputParam?.label,
name: inputParam?.name,
type: inputParam?.type,
value: nodeData.inputs[input]
}
paramValues.push(paramValue)
}
const newFlowData = {
label: nodeData.label,
name: nodeData.name,
category: nodeData.category,
id: nodeData.id,
paramValues
}
existingUpsertFlowData.push(newFlowData)
return existingUpsertFlowData
}
/**
* Build langchain from start to end
* @param {string[]} startingNodeIds
@@ -272,6 +350,8 @@ export const buildFlow = async (
) => {
const flowNodes = cloneDeep(reactFlowNodes)
let upsertHistory: Record<string, any> = {}
// Create a Queue and add our initial node in it
const nodeQueue = [] as INodeQueue[]
const exploredNode = {} as IExploredNode
@@ -302,12 +382,15 @@ export const buildFlow = async (
let flowNodeData = cloneDeep(reactFlowNode.data)
if (overrideConfig) flowNodeData = replaceInputsWithConfig(flowNodeData, overrideConfig)
if (isUpsert) upsertHistory['flowData'] = saveUpsertFlowData(flowNodeData, upsertHistory)
const reactFlowNodeData: INodeData = resolveVariables(flowNodeData, flowNodes, question, chatHistory)
// TODO: Avoid processing Text Splitter + Doc Loader once Upsert & Load Existing Vector Nodes are deprecated
if (isUpsert && stopNodeId && nodeId === stopNodeId) {
logger.debug(`[server]: Upserting ${reactFlowNode.data.label} (${reactFlowNode.data.id})`)
await newNodeInstance.vectorStoreMethods!['upsert']!.call(newNodeInstance, reactFlowNodeData, {
const indexResult = await newNodeInstance.vectorStoreMethods!['upsert']!.call(newNodeInstance, reactFlowNodeData, {
chatId,
sessionId,
chatflowid,
@@ -319,6 +402,7 @@ export const buildFlow = async (
dynamicVariables,
uploads
})
if (indexResult) upsertHistory['result'] = indexResult
logger.debug(`[server]: Finished upserting ${reactFlowNode.data.label} (${reactFlowNode.data.id})`)
break
} else {
@@ -422,7 +506,7 @@ export const buildFlow = async (
flowNodes.push(flowNodes.splice(index, 1)[0])
}
}
return flowNodes
return isUpsert ? (upsertHistory as any) : flowNodes
}
/**
+25 -2
View File
@@ -1,5 +1,6 @@
import { Request, Response } from 'express'
import * as fs from 'fs'
import { cloneDeep, omit } from 'lodash'
import { ICommonObject } from 'flowise-components'
import telemetryService from '../services/telemetry'
import logger from '../utils/logger'
@@ -18,7 +19,14 @@ import { utilValidateKey } from './validateKey'
import { IncomingInput, INodeDirectedGraph, IReactFlowObject, chatType } from '../Interface'
import { ChatFlow } from '../database/entities/ChatFlow'
import { getRunningExpressApp } from '../utils/getRunningExpressApp'
import { UpsertHistory } from '../database/entities/UpsertHistory'
/**
* Upsert documents
* @param {Request} req
* @param {Response} res
* @param {boolean} isInternal
*/
export const upsertVector = async (req: Request, res: Response, isInternal: boolean = false) => {
try {
const appServer = getRunningExpressApp()
@@ -78,6 +86,8 @@ export const upsertVector = async (req: Request, res: Response, isInternal: bool
(node) =>
node.data.category === 'Vector Stores' && !node.data.label.includes('Upsert') && !node.data.label.includes('Load Existing')
)
// Check if multiple vector store nodes exist, and if stopNodeId is specified
if (vsNodes.length > 1 && !stopNodeId) {
return res.status(500).send('There are multiple vector nodes, please provide stopNodeId in body request')
} else if (vsNodes.length === 1 && !stopNodeId) {
@@ -99,7 +109,7 @@ export const upsertVector = async (req: Request, res: Response, isInternal: bool
const { startingNodeIds, depthQueue } = getStartingNodes(filteredGraph, stopNodeId)
await buildFlow(
const upsertedResult = await buildFlow(
startingNodeIds,
nodes,
edges,
@@ -121,6 +131,19 @@ export const upsertVector = async (req: Request, res: Response, isInternal: bool
const startingNodes = nodes.filter((nd) => startingNodeIds.includes(nd.data.id))
await appServer.chatflowPool.add(chatflowid, undefined, startingNodes, incomingInput?.overrideConfig)
// Save to DB
if (upsertedResult['flowData'] && upsertedResult['result']) {
const result = cloneDeep(upsertedResult)
result['flowData'] = JSON.stringify(result['flowData'])
result['result'] = JSON.stringify(omit(result['result'], ['totalKeys', 'addedDocs']))
result.chatflowid = chatflowid
const newUpsertHistory = new UpsertHistory()
Object.assign(newUpsertHistory, result)
const upsertHistory = appServer.AppDataSource.getRepository(UpsertHistory).create(newUpsertHistory)
await appServer.AppDataSource.getRepository(UpsertHistory).save(upsertHistory)
}
await telemetryService.createEvent({
name: `vector_upserted`,
data: {
@@ -131,7 +154,7 @@ export const upsertVector = async (req: Request, res: Response, isInternal: bool
stopNodeId
}
})
return res.status(201).send('Successfully Upserted')
return res.status(201).json(upsertedResult['result'] ?? { result: 'Successfully Upserted' })
} catch (e: any) {
logger.error('[server]: Error:', e)
return res.status(500).send(e.message)