Document Store - Phase 2 (#2912)

* Document Store - Phase 2

* Adding additional columns for vector store config, document store phase 2

* Adding additional columns for vector store config, document store phase 2

* Document Store - Phase 2 - Upsert and Query

* ux cleanup

* retrieval settings and more ux changes

* adding MMR params to execution

* Making the upsert process async.

* add upsert history changes

* making the searchParams dynamic

* removing unnecessary params

* add ability to delete data from vector store

* update margin for vector store query

* adding option to save config in the retrieval playground

* adding chunk number for query return chunks

* Adding a Document Store node in the VectorStore category

* update doc store status, ui touchup

---------

Co-authored-by: Henry <hzj94@hotmail.com>
This commit is contained in:
Vinod Kiran
2024-08-07 23:29:52 +05:30
committed by GitHub
parent c7306c93d7
commit c0bae635b0
36 changed files with 3589 additions and 91 deletions
+19 -1
View File
@@ -5,7 +5,9 @@ export enum DocumentStoreStatus {
SYNC = 'SYNC',
SYNCING = 'SYNCING',
STALE = 'STALE',
NEW = 'NEW'
NEW = 'NEW',
UPSERTING = 'UPSERTING',
UPSERTED = 'UPSERTED'
}
export interface IDocumentStore {
@@ -17,6 +19,9 @@ export interface IDocumentStore {
updatedDate: Date
createdDate: Date
status: DocumentStoreStatus
vectorStoreConfig: string | null // JSON string
embeddingConfig: string | null // JSON string
recordManagerConfig: string | null // JSON string
}
export interface IDocumentStoreFileChunk {
@@ -89,6 +94,9 @@ export class DocumentStoreDTO {
totalChars: number
chunkSize: number
loaders: IDocumentStoreLoader[]
vectorStoreConfig: any
embeddingConfig: any
recordManagerConfig: any
constructor() {}
@@ -109,6 +117,16 @@ export class DocumentStoreDTO {
documentStoreDTO.whereUsed = []
}
if (entity.vectorStoreConfig) {
documentStoreDTO.vectorStoreConfig = JSON.parse(entity.vectorStoreConfig)
}
if (entity.embeddingConfig) {
documentStoreDTO.embeddingConfig = JSON.parse(entity.embeddingConfig)
}
if (entity.recordManagerConfig) {
documentStoreDTO.recordManagerConfig = JSON.parse(entity.recordManagerConfig)
}
if (entity.loaders) {
documentStoreDTO.loaders = JSON.parse(entity.loaders)
documentStoreDTO.loaders.map((loader) => {
@@ -248,6 +248,100 @@ const getDocumentLoaders = async (req: Request, res: Response, next: NextFunctio
}
}
const insertIntoVectorStore = async (req: Request, res: Response, next: NextFunction) => {
try {
if (typeof req.body === 'undefined') {
throw new Error('Error: documentStoreController.insertIntoVectorStore - body not provided!')
}
const body = req.body
const apiResponse = await documentStoreService.insertIntoVectorStore(body)
return res.json(DocumentStoreDTO.fromEntity(apiResponse))
} catch (error) {
next(error)
}
}
const queryVectorStore = async (req: Request, res: Response, next: NextFunction) => {
try {
if (typeof req.body === 'undefined') {
throw new Error('Error: documentStoreController.queryVectorStore - body not provided!')
}
const body = req.body
const apiResponse = await documentStoreService.queryVectorStore(body)
return res.json(apiResponse)
} catch (error) {
next(error)
}
}
const deleteVectorStoreFromStore = async (req: Request, res: Response, next: NextFunction) => {
try {
if (typeof req.params.storeId === 'undefined' || req.params.storeId === '') {
throw new InternalFlowiseError(
StatusCodes.PRECONDITION_FAILED,
`Error: documentStoreController.deleteVectorStoreFromStore - storeId not provided!`
)
}
const apiResponse = await documentStoreService.deleteVectorStoreFromStore(req.params.storeId)
return res.json(apiResponse)
} catch (error) {
next(error)
}
}
const saveVectorStoreConfig = async (req: Request, res: Response, next: NextFunction) => {
try {
if (typeof req.body === 'undefined') {
throw new Error('Error: documentStoreController.saveVectorStoreConfig - body not provided!')
}
const body = req.body
const apiResponse = await documentStoreService.saveVectorStoreConfig(body)
return res.json(apiResponse)
} catch (error) {
next(error)
}
}
const updateVectorStoreConfigOnly = async (req: Request, res: Response, next: NextFunction) => {
try {
if (typeof req.body === 'undefined') {
throw new Error('Error: documentStoreController.updateVectorStoreConfigOnly - body not provided!')
}
const body = req.body
const apiResponse = await documentStoreService.updateVectorStoreConfigOnly(body)
return res.json(apiResponse)
} catch (error) {
next(error)
}
}
const getEmbeddingProviders = async (req: Request, res: Response, next: NextFunction) => {
try {
const apiResponse = await documentStoreService.getEmbeddingProviders()
return res.json(apiResponse)
} catch (error) {
next(error)
}
}
const getVectorStoreProviders = async (req: Request, res: Response, next: NextFunction) => {
try {
const apiResponse = await documentStoreService.getVectorStoreProviders()
return res.json(apiResponse)
} catch (error) {
next(error)
}
}
const getRecordManagerProviders = async (req: Request, res: Response, next: NextFunction) => {
try {
const apiResponse = await documentStoreService.getRecordManagerProviders()
return res.json(apiResponse)
} catch (error) {
next(error)
}
}
export default {
deleteDocumentStore,
createDocumentStore,
@@ -260,5 +354,13 @@ export default {
previewFileChunks,
getDocumentLoaders,
deleteDocumentStoreFileChunk,
editDocumentStoreFileChunk
editDocumentStoreFileChunk,
insertIntoVectorStore,
getEmbeddingProviders,
getVectorStoreProviders,
getRecordManagerProviders,
saveVectorStoreConfig,
queryVectorStore,
deleteVectorStoreFromStore,
updateVectorStoreConfigOnly
}
@@ -28,4 +28,13 @@ export class DocumentStore implements IDocumentStore {
@Column({ nullable: false, type: 'text' })
status: DocumentStoreStatus
@Column({ nullable: true, type: 'text' })
vectorStoreConfig: string | null
@Column({ nullable: true, type: 'text' })
embeddingConfig: string | null
@Column({ nullable: true, type: 'text' })
recordManagerConfig: string | null
}
@@ -0,0 +1,18 @@
import { MigrationInterface, QueryRunner } from 'typeorm'
export class AddVectorStoreConfigToDocStore1715861032479 implements MigrationInterface {
public async up(queryRunner: QueryRunner): Promise<void> {
const columnExists = await queryRunner.hasColumn('document_store', 'vectorStoreConfig')
if (!columnExists) {
await queryRunner.query(`ALTER TABLE \`document_store\` ADD COLUMN \`vectorStoreConfig\` TEXT;`)
await queryRunner.query(`ALTER TABLE \`document_store\` ADD COLUMN \`embeddingConfig\` TEXT;`)
await queryRunner.query(`ALTER TABLE \`document_store\` ADD COLUMN \`recordManagerConfig\` TEXT;`)
}
}
public async down(queryRunner: QueryRunner): Promise<void> {
await queryRunner.query(`ALTER TABLE \`document_store\` DROP COLUMN \`vectorStoreConfig\`;`)
await queryRunner.query(`ALTER TABLE \`document_store\` DROP COLUMN \`embeddingConfig\`;`)
await queryRunner.query(`ALTER TABLE \`document_store\` DROP COLUMN \`recordManagerConfig\`;`)
}
}
@@ -17,6 +17,7 @@ import { AddFeedback1707213626553 } from './1707213626553-AddFeedback'
import { AddUpsertHistoryEntity1709814301358 } from './1709814301358-AddUpsertHistoryEntity'
import { AddLead1710832127079 } from './1710832127079-AddLead'
import { AddLeadToChatMessage1711538023578 } from './1711538023578-AddLeadToChatMessage'
import { AddVectorStoreConfigToDocStore1715861032479 } from './1715861032479-AddVectorStoreConfigToDocStore'
import { AddDocumentStore1711637331047 } from './1711637331047-AddDocumentStore'
import { AddAgentReasoningToChatMessage1714679514451 } from './1714679514451-AddAgentReasoningToChatMessage'
import { AddTypeToChatFlow1716300000000 } from './1716300000000-AddTypeToChatFlow'
@@ -47,6 +48,7 @@ export const mysqlMigrations = [
AddLeadToChatMessage1711538023578,
AddAgentReasoningToChatMessage1714679514451,
AddTypeToChatFlow1716300000000,
AddVectorStoreConfigToDocStore1715861032479,
AddApiKey1720230151480,
AddActionToChatMessage1721078251523,
LongTextColumn1722301395521
@@ -0,0 +1,15 @@
import { MigrationInterface, QueryRunner } from 'typeorm'
export class AddVectorStoreConfigToDocStore1715861032479 implements MigrationInterface {
public async up(queryRunner: QueryRunner): Promise<void> {
await queryRunner.query(`ALTER TABLE "document_store" ADD COLUMN IF NOT EXISTS "vectorStoreConfig" TEXT;`)
await queryRunner.query(`ALTER TABLE "document_store" ADD COLUMN IF NOT EXISTS "embeddingConfig" TEXT;`)
await queryRunner.query(`ALTER TABLE "document_store" ADD COLUMN IF NOT EXISTS "recordManagerConfig" TEXT;`)
}
public async down(queryRunner: QueryRunner): Promise<void> {
await queryRunner.query(`ALTER TABLE "document_store" DROP COLUMN "vectorStoreConfig";`)
await queryRunner.query(`ALTER TABLE "document_store" DROP COLUMN "embeddingConfig";`)
await queryRunner.query(`ALTER TABLE "document_store" DROP COLUMN "recordManagerConfig";`)
}
}
@@ -18,6 +18,7 @@ import { AddUpsertHistoryEntity1709814301358 } from './1709814301358-AddUpsertHi
import { FieldTypes1710497452584 } from './1710497452584-FieldTypes'
import { AddLead1710832137905 } from './1710832137905-AddLead'
import { AddLeadToChatMessage1711538016098 } from './1711538016098-AddLeadToChatMessage'
import { AddVectorStoreConfigToDocStore1715861032479 } from './1715861032479-AddVectorStoreConfigToDocStore'
import { AddDocumentStore1711637331047 } from './1711637331047-AddDocumentStore'
import { AddAgentReasoningToChatMessage1714679514451 } from './1714679514451-AddAgentReasoningToChatMessage'
import { AddTypeToChatFlow1716300000000 } from './1716300000000-AddTypeToChatFlow'
@@ -48,6 +49,7 @@ export const postgresMigrations = [
AddLeadToChatMessage1711538016098,
AddAgentReasoningToChatMessage1714679514451,
AddTypeToChatFlow1716300000000,
AddVectorStoreConfigToDocStore1715861032479,
AddApiKey1720230151480,
AddActionToChatMessage1721078251523
]
@@ -0,0 +1,15 @@
import { MigrationInterface, QueryRunner } from 'typeorm'
export class AddVectorStoreConfigToDocStore1715861032479 implements MigrationInterface {
public async up(queryRunner: QueryRunner): Promise<void> {
await queryRunner.query(`ALTER TABLE "document_store" ADD COLUMN "vectorStoreConfig" TEXT;`)
await queryRunner.query(`ALTER TABLE "document_store" ADD COLUMN "embeddingConfig" TEXT;`)
await queryRunner.query(`ALTER TABLE "document_store" ADD COLUMN "recordManagerConfig" TEXT;`)
}
public async down(queryRunner: QueryRunner): Promise<void> {
await queryRunner.query(`ALTER TABLE "document_store" DROP COLUMN "vectorStoreConfig";`)
await queryRunner.query(`ALTER TABLE "document_store" DROP COLUMN "embeddingConfig";`)
await queryRunner.query(`ALTER TABLE "document_store" DROP COLUMN "recordManagerConfig";`)
}
}
@@ -17,6 +17,7 @@ import { AddFeedback1707213619308 } from './1707213619308-AddFeedback'
import { AddUpsertHistoryEntity1709814301358 } from './1709814301358-AddUpsertHistoryEntity'
import { AddLead1710832117612 } from './1710832117612-AddLead'
import { AddLeadToChatMessage1711537986113 } from './1711537986113-AddLeadToChatMessage'
import { AddVectorStoreConfigToDocStore1715861032479 } from './1715861032479-AddVectorStoreConfigToDocStore'
import { AddDocumentStore1711637331047 } from './1711637331047-AddDocumentStore'
import { AddAgentReasoningToChatMessage1714679514451 } from './1714679514451-AddAgentReasoningToChatMessage'
import { AddTypeToChatFlow1716300000000 } from './1716300000000-AddTypeToChatFlow'
@@ -46,6 +47,7 @@ export const sqliteMigrations = [
AddLeadToChatMessage1711537986113,
AddAgentReasoningToChatMessage1714679514451,
AddTypeToChatFlow1716300000000,
AddVectorStoreConfigToDocStore1715861032479,
AddApiKey1720230151480,
AddActionToChatMessage1721078251523
]
@@ -16,7 +16,7 @@ router.delete('/store/:id', documentStoreController.deleteDocumentStore)
/** Component Nodes = Document Store - Loaders */
// Get all loaders
router.get('/loaders', documentStoreController.getDocumentLoaders)
router.get('/components/loaders', documentStoreController.getDocumentLoaders)
// delete loader from document store
router.delete('/loader/:id/:loaderId', documentStoreController.deleteLoaderFromDocumentStore)
@@ -33,4 +33,22 @@ router.put('/chunks/:storeId/:loaderId/:chunkId', documentStoreController.editDo
// Get all file chunks from the store
router.get('/chunks/:storeId/:fileId/:pageNo', documentStoreController.getDocumentStoreFileChunks)
// add chunks to the selected vector store
router.post('/vectorstore/insert', documentStoreController.insertIntoVectorStore)
// save the selected vector store
router.post('/vectorstore/save', documentStoreController.saveVectorStoreConfig)
// delete data from the selected vector store
router.delete('/vectorstore/:storeId', documentStoreController.deleteVectorStoreFromStore)
// query the vector store
router.post('/vectorstore/query', documentStoreController.queryVectorStore)
// Get all embedding providers
router.get('/components/embeddings', documentStoreController.getEmbeddingProviders)
// Get all vector store providers
router.get('/components/vectorstore', documentStoreController.getVectorStoreProviders)
// Get all Record Manager providers
router.get('/components/recordmanager', documentStoreController.getRecordManagerProviders)
// update the selected vector store from the playground
router.post('/vectorstore/update', documentStoreController.updateVectorStoreConfigOnly)
export default router
@@ -1,6 +1,5 @@
import { getRunningExpressApp } from '../../utils/getRunningExpressApp'
import { DocumentStore } from '../../database/entities/DocumentStore'
// @ts-ignore
import {
addSingleFileToStorage,
getFileFromStorage,
@@ -10,22 +9,29 @@ import {
removeSpecificFileFromStorage
} from 'flowise-components'
import {
chatType,
DocumentStoreStatus,
IDocumentStoreFileChunkPagedResponse,
IDocumentStoreLoader,
IDocumentStoreLoaderFile,
IDocumentStoreLoaderForPreview,
IDocumentStoreWhereUsed
IDocumentStoreWhereUsed,
INodeData
} from '../../Interface'
import { DocumentStoreFileChunk } from '../../database/entities/DocumentStoreFileChunk'
import { v4 as uuidv4 } from 'uuid'
import { databaseEntities } from '../../utils'
import { databaseEntities, getAppVersion, saveUpsertFlowData } from '../../utils'
import logger from '../../utils/logger'
import nodesService from '../nodes'
import { InternalFlowiseError } from '../../errors/internalFlowiseError'
import { StatusCodes } from 'http-status-codes'
import { getErrorMessage } from '../../errors/utils'
import { ChatFlow } from '../../database/entities/ChatFlow'
import { Document } from '@langchain/core/documents'
import { App } from '../../index'
import { UpsertHistory } from '../../database/entities/UpsertHistory'
import { cloneDeep, omit } from 'lodash'
import telemetryService from '../telemetry'
const DOCUMENT_STORE_BASE_FOLDER = 'docustore'
@@ -234,8 +240,16 @@ const deleteDocumentStore = async (storeId: string) => {
const entity = await appServer.AppDataSource.getRepository(DocumentStore).findOneBy({
id: storeId
})
if (!entity) throw new Error(`Document store ${storeId} not found`)
if (!entity) {
throw new InternalFlowiseError(StatusCodes.NOT_FOUND, `Document store ${storeId} not found`)
}
await removeFilesFromStorage(DOCUMENT_STORE_BASE_FOLDER, entity.id)
// delete upsert history
await appServer.AppDataSource.getRepository(UpsertHistory).delete({
chatflowid: storeId
})
// now delete the store
const tbd = await appServer.AppDataSource.getRepository(DocumentStore).delete({
id: storeId
@@ -285,6 +299,83 @@ const deleteDocumentStoreFileChunk = async (storeId: string, docId: string, chun
}
}
const deleteVectorStoreFromStore = async (storeId: string) => {
try {
const appServer = getRunningExpressApp()
const entity = await appServer.AppDataSource.getRepository(DocumentStore).findOneBy({
id: storeId
})
if (!entity) {
throw new InternalFlowiseError(StatusCodes.NOT_FOUND, `Document store ${storeId} not found`)
}
if (!entity.embeddingConfig) {
throw new InternalFlowiseError(StatusCodes.NOT_FOUND, `Embedding for Document store ${storeId} not found`)
}
if (!entity.vectorStoreConfig) {
throw new InternalFlowiseError(StatusCodes.NOT_FOUND, `Vector Store for Document store ${storeId} not found`)
}
if (!entity.recordManagerConfig) {
throw new InternalFlowiseError(
StatusCodes.NOT_FOUND,
`Record Manager for Document Store ${storeId} is needed to delete data from Vector Store`
)
}
const options: ICommonObject = {
chatflowid: storeId,
appDataSource: appServer.AppDataSource,
databaseEntities,
logger
}
// Get Record Manager Instance
const recordManagerConfig = JSON.parse(entity.recordManagerConfig)
const recordManagerObj = await _createRecordManagerObject(
appServer,
{ recordManagerName: recordManagerConfig.name, recordManagerConfig: recordManagerConfig.config },
options
)
// Get Embeddings Instance
const embeddingConfig = JSON.parse(entity.embeddingConfig)
const embeddingObj = await _createEmbeddingsObject(
appServer,
{ embeddingName: embeddingConfig.name, embeddingConfig: embeddingConfig.config },
options
)
// Get Vector Store Node Data
const vectorStoreConfig = JSON.parse(entity.vectorStoreConfig)
const vStoreNodeData = _createVectorStoreNodeData(
appServer,
{ vectorStoreName: vectorStoreConfig.name, vectorStoreConfig: vectorStoreConfig.config },
embeddingObj,
recordManagerObj
)
// Get Vector Store Instance
const vectorStoreObj = await _createVectorStoreObject(
appServer,
{ vectorStoreName: vectorStoreConfig.name, vectorStoreConfig: vectorStoreConfig.config },
vStoreNodeData
)
const idsToDelete: string[] = [] // empty ids because we get it dynamically from the record manager
// Call the delete method of the vector store
if (vectorStoreObj.vectorStoreMethods.delete) {
await vectorStoreObj.vectorStoreMethods.delete(vStoreNodeData, idsToDelete, options)
}
} catch (error) {
throw new InternalFlowiseError(
StatusCodes.INTERNAL_SERVER_ERROR,
`Error: documentStoreServices.deleteVectorStoreFromStore - ${getErrorMessage(error)}`
)
}
}
const editDocumentStoreFileChunk = async (storeId: string, docId: string, chunkId: string, content: string, metadata: ICommonObject) => {
try {
const appServer = getRunningExpressApp()
@@ -700,6 +791,417 @@ const updateDocumentStoreUsage = async (chatId: string, storeId: string | undefi
}
}
const updateVectorStoreConfigOnly = async (data: ICommonObject) => {
try {
const appServer = getRunningExpressApp()
const entity = await appServer.AppDataSource.getRepository(DocumentStore).findOneBy({
id: data.storeId
})
if (!entity) {
throw new InternalFlowiseError(StatusCodes.NOT_FOUND, `Document store ${data.storeId} not found`)
}
if (data.vectorStoreName) {
entity.vectorStoreConfig = JSON.stringify({
config: data.vectorStoreConfig,
name: data.vectorStoreName
})
const updatedEntity = await appServer.AppDataSource.getRepository(DocumentStore).save(entity)
return updatedEntity
}
return {}
} catch (error) {
throw new InternalFlowiseError(
StatusCodes.INTERNAL_SERVER_ERROR,
`Error: documentStoreServices.updateVectorStoreConfig - ${getErrorMessage(error)}`
)
}
}
const saveVectorStoreConfig = async (data: ICommonObject) => {
try {
const appServer = getRunningExpressApp()
const entity = await appServer.AppDataSource.getRepository(DocumentStore).findOneBy({
id: data.storeId
})
if (!entity) {
throw new InternalFlowiseError(StatusCodes.NOT_FOUND, `Document store ${data.storeId} not found`)
}
if (data.embeddingName) {
entity.embeddingConfig = JSON.stringify({
config: data.embeddingConfig,
name: data.embeddingName
})
} else if (!data.embeddingName && !data.embeddingConfig) {
entity.embeddingConfig = null
}
if (data.vectorStoreName) {
entity.vectorStoreConfig = JSON.stringify({
config: data.vectorStoreConfig,
name: data.vectorStoreName
})
} else if (!data.vectorStoreName && !data.vectorStoreConfig) {
entity.vectorStoreConfig = null
}
if (data.recordManagerName) {
entity.recordManagerConfig = JSON.stringify({
config: data.recordManagerConfig,
name: data.recordManagerName
})
} else if (!data.recordManagerName && !data.recordManagerConfig) {
entity.recordManagerConfig = null
}
if (entity.status !== DocumentStoreStatus.UPSERTED && (data.vectorStoreName || data.recordManagerName || data.embeddingName)) {
// if the store is not already in sync, mark it as sync
// this also means that the store is not yet sync'ed to vector store
entity.status = DocumentStoreStatus.SYNC
}
await appServer.AppDataSource.getRepository(DocumentStore).save(entity)
return entity
} catch (error) {
throw new InternalFlowiseError(
StatusCodes.INTERNAL_SERVER_ERROR,
`Error: documentStoreServices.saveVectorStoreConfig - ${getErrorMessage(error)}`
)
}
}
const insertIntoVectorStore = async (data: ICommonObject) => {
try {
const appServer = getRunningExpressApp()
const entity = await saveVectorStoreConfig(data)
entity.status = DocumentStoreStatus.UPSERTING
await appServer.AppDataSource.getRepository(DocumentStore).save(entity)
// TODO: to be moved into a worker thread...
const indexResult = await _insertIntoVectorStoreWorkerThread(data)
return indexResult
} catch (error) {
throw new InternalFlowiseError(
StatusCodes.INTERNAL_SERVER_ERROR,
`Error: documentStoreServices.insertIntoVectorStore - ${getErrorMessage(error)}`
)
}
}
const _insertIntoVectorStoreWorkerThread = async (data: ICommonObject) => {
try {
const appServer = getRunningExpressApp()
const entity = await saveVectorStoreConfig(data)
let upsertHistory: Record<string, any> = {}
const chatflowid = data.storeId // fake chatflowid because this is not tied to any chatflow
const options: ICommonObject = {
chatflowid,
appDataSource: appServer.AppDataSource,
databaseEntities,
logger
}
let recordManagerObj = undefined
// Get Record Manager Instance
if (data.recordManagerName && data.recordManagerConfig) {
recordManagerObj = await _createRecordManagerObject(appServer, data, options, upsertHistory)
}
// Get Embeddings Instance
const embeddingObj = await _createEmbeddingsObject(appServer, data, options, upsertHistory)
// Get Vector Store Node Data
const vStoreNodeData = _createVectorStoreNodeData(appServer, data, embeddingObj, recordManagerObj)
// Prepare docs for upserting
const chunks = await appServer.AppDataSource.getRepository(DocumentStoreFileChunk).find({
where: {
storeId: data.storeId
}
})
const docs: Document[] = chunks.map((chunk: DocumentStoreFileChunk) => {
return new Document({
pageContent: chunk.pageContent,
metadata: JSON.parse(chunk.metadata)
})
})
vStoreNodeData.inputs.document = docs
// Get Vector Store Instance
const vectorStoreObj = await _createVectorStoreObject(appServer, data, vStoreNodeData, upsertHistory)
const indexResult = await vectorStoreObj.vectorStoreMethods.upsert(vStoreNodeData, options)
// Save to DB
if (indexResult) {
const result = cloneDeep(upsertHistory)
result['flowData'] = JSON.stringify(result['flowData'])
result['result'] = JSON.stringify(omit(indexResult, ['totalKeys', 'addedDocs']))
result.chatflowid = chatflowid
const newUpsertHistory = new UpsertHistory()
Object.assign(newUpsertHistory, result)
const upsertHistoryItem = appServer.AppDataSource.getRepository(UpsertHistory).create(newUpsertHistory)
await appServer.AppDataSource.getRepository(UpsertHistory).save(upsertHistoryItem)
}
await telemetryService.createEvent({
name: `vector_upserted`,
data: {
version: await getAppVersion(),
chatlowId: chatflowid,
type: chatType.INTERNAL,
flowGraph: omit(indexResult['result'], ['totalKeys', 'addedDocs'])
}
})
entity.status = DocumentStoreStatus.UPSERTED
await appServer.AppDataSource.getRepository(DocumentStore).save(entity)
return indexResult ?? { result: 'Successfully Upserted' }
} catch (error) {
throw new InternalFlowiseError(
StatusCodes.INTERNAL_SERVER_ERROR,
`Error: documentStoreServices._insertIntoVectorStoreWorkerThread - ${getErrorMessage(error)}`
)
}
}
// Get all component nodes - Embeddings
const getEmbeddingProviders = async () => {
try {
const dbResponse = await nodesService.getAllNodesForCategory('Embeddings')
return dbResponse.filter((node) => !node.tags?.includes('LlamaIndex'))
} catch (error) {
throw new InternalFlowiseError(
StatusCodes.INTERNAL_SERVER_ERROR,
`Error: documentStoreServices.getEmbeddingProviders - ${getErrorMessage(error)}`
)
}
}
// Get all component nodes - Vector Stores
const getVectorStoreProviders = async () => {
try {
const dbResponse = await nodesService.getAllNodesForCategory('Vector Stores')
return dbResponse.filter((node) => !node.tags?.includes('LlamaIndex') && node.name !== 'documentStoreVS')
} catch (error) {
throw new InternalFlowiseError(
StatusCodes.INTERNAL_SERVER_ERROR,
`Error: documentStoreServices.getVectorStoreProviders - ${getErrorMessage(error)}`
)
}
}
// Get all component nodes - Vector Stores
const getRecordManagerProviders = async () => {
try {
const dbResponse = await nodesService.getAllNodesForCategory('Record Manager')
return dbResponse.filter((node) => !node.tags?.includes('LlamaIndex'))
} catch (error) {
throw new InternalFlowiseError(
StatusCodes.INTERNAL_SERVER_ERROR,
`Error: documentStoreServices.getRecordManagerProviders - ${getErrorMessage(error)}`
)
}
}
const queryVectorStore = async (data: ICommonObject) => {
try {
const appServer = getRunningExpressApp()
const entity = await appServer.AppDataSource.getRepository(DocumentStore).findOneBy({
id: data.storeId
})
if (!entity) {
throw new InternalFlowiseError(StatusCodes.INTERNAL_SERVER_ERROR, `Document store ${data.storeId} not found`)
}
const options: ICommonObject = {
chatflowid: uuidv4(),
appDataSource: appServer.AppDataSource,
databaseEntities,
logger
}
if (!entity.embeddingConfig) {
throw new InternalFlowiseError(StatusCodes.INTERNAL_SERVER_ERROR, `Embedding for ${data.storeId} is not configured`)
}
if (!entity.vectorStoreConfig) {
throw new InternalFlowiseError(StatusCodes.INTERNAL_SERVER_ERROR, `Vector Store for ${data.storeId} is not configured`)
}
const embeddingConfig = JSON.parse(entity.embeddingConfig)
data.embeddingName = embeddingConfig.name
data.embeddingConfig = embeddingConfig.config
let embeddingObj = await _createEmbeddingsObject(appServer, data, options)
const vsConfig = JSON.parse(entity.vectorStoreConfig)
data.vectorStoreName = vsConfig.name
data.vectorStoreConfig = vsConfig.config
if (data.inputs) {
data.vectorStoreConfig = { ...vsConfig.config, ...data.inputs }
}
const vStoreNodeData = _createVectorStoreNodeData(appServer, data, embeddingObj, undefined)
// Get Vector Store Instance
const vectorStoreObj = await _createVectorStoreObject(appServer, data, vStoreNodeData)
const retriever = await vectorStoreObj.init(vStoreNodeData, '', options)
if (!retriever) {
throw new InternalFlowiseError(StatusCodes.INTERNAL_SERVER_ERROR, `Failed to create retriever`)
}
const startMillis = Date.now()
const results = await retriever.invoke(data.query, undefined)
if (!results) {
throw new InternalFlowiseError(StatusCodes.INTERNAL_SERVER_ERROR, `Failed to retrieve results`)
}
const endMillis = Date.now()
const timeTaken = endMillis - startMillis
const docs: any = results.map((result: IDocument) => {
return {
pageContent: result.pageContent,
metadata: result.metadata,
id: uuidv4()
}
})
// query our document store chunk with the storeId and pageContent
for (const doc of docs) {
const documentStoreChunk = await appServer.AppDataSource.getRepository(DocumentStoreFileChunk).findOneBy({
storeId: data.storeId,
pageContent: doc.pageContent
})
if (documentStoreChunk) {
doc.id = documentStoreChunk.id
doc.chunkNo = documentStoreChunk.chunkNo
} else {
// this should not happen, only possible if the vector store has more content
// than our document store
doc.id = uuidv4()
doc.chunkNo = -1
}
}
return {
timeTaken: timeTaken,
docs: docs
}
} catch (error) {
throw new InternalFlowiseError(
StatusCodes.INTERNAL_SERVER_ERROR,
`Error: documentStoreServices.queryVectorStore - ${getErrorMessage(error)}`
)
}
}
const _createEmbeddingsObject = async (
appServer: App,
data: ICommonObject,
options: ICommonObject,
upsertHistory?: Record<string, any>
): Promise<any> => {
// prepare embedding node data
const embeddingComponent = appServer.nodesPool.componentNodes[data.embeddingName]
const embeddingNodeData: any = {
inputs: { ...data.embeddingConfig },
outputs: { output: 'document' },
id: `${embeddingComponent.name}_0`,
label: embeddingComponent.label,
name: embeddingComponent.name,
category: embeddingComponent.category,
inputParams: embeddingComponent.inputs || []
}
if (data.embeddingConfig.credential) {
embeddingNodeData.credential = data.embeddingConfig.credential
}
// save to upsert history
if (upsertHistory) upsertHistory['flowData'] = saveUpsertFlowData(embeddingNodeData, upsertHistory)
// init embedding object
const embeddingNodeInstanceFilePath = embeddingComponent.filePath as string
const embeddingNodeModule = await import(embeddingNodeInstanceFilePath)
const embeddingNodeInstance = new embeddingNodeModule.nodeClass()
const embeddingObj = await embeddingNodeInstance.init(embeddingNodeData, '', options)
if (!embeddingObj) {
throw new InternalFlowiseError(StatusCodes.INTERNAL_SERVER_ERROR, `Failed to create EmbeddingObj`)
}
return embeddingObj
}
const _createRecordManagerObject = async (
appServer: App,
data: ICommonObject,
options: ICommonObject,
upsertHistory?: Record<string, any>
) => {
// prepare record manager node data
const recordManagerComponent = appServer.nodesPool.componentNodes[data.recordManagerName]
const rmNodeData: any = {
inputs: { ...data.recordManagerConfig },
id: `${recordManagerComponent.name}_0`,
inputParams: recordManagerComponent.inputs,
label: recordManagerComponent.label,
name: recordManagerComponent.name,
category: recordManagerComponent.category
}
if (data.recordManagerConfig.credential) {
rmNodeData.credential = data.recordManagerConfig.credential
}
// save to upsert history
if (upsertHistory) upsertHistory['flowData'] = saveUpsertFlowData(rmNodeData, upsertHistory)
// init record manager object
const rmNodeInstanceFilePath = recordManagerComponent.filePath as string
const rmNodeModule = await import(rmNodeInstanceFilePath)
const rmNodeInstance = new rmNodeModule.nodeClass()
const recordManagerObj = await rmNodeInstance.init(rmNodeData, '', options)
if (!recordManagerObj) {
throw new InternalFlowiseError(StatusCodes.INTERNAL_SERVER_ERROR, `Failed to create RecordManager obj`)
}
return recordManagerObj
}
const _createVectorStoreNodeData = (appServer: App, data: ICommonObject, embeddingObj: any, recordManagerObj?: any) => {
const vectorStoreComponent = appServer.nodesPool.componentNodes[data.vectorStoreName]
const vStoreNodeData: any = {
id: `${vectorStoreComponent.name}_0`,
inputs: { ...data.vectorStoreConfig },
outputs: { output: 'retriever' },
label: vectorStoreComponent.label,
name: vectorStoreComponent.name,
category: vectorStoreComponent.category
}
if (data.vectorStoreConfig.credential) {
vStoreNodeData.credential = data.vectorStoreConfig.credential
}
if (embeddingObj) {
vStoreNodeData.inputs.embeddings = embeddingObj
}
if (recordManagerObj) {
vStoreNodeData.inputs.recordManager = recordManagerObj
}
// Get all input params except the ones that are anchor points to avoid JSON stringify circular error
const filterInputParams = ['document', 'embeddings', 'recordManager']
const inputParams = vectorStoreComponent.inputs?.filter((input) => !filterInputParams.includes(input.name))
vStoreNodeData.inputParams = inputParams
return vStoreNodeData
}
const _createVectorStoreObject = async (
appServer: App,
data: ICommonObject,
vStoreNodeData: INodeData,
upsertHistory?: Record<string, any>
) => {
const vStoreNodeInstanceFilePath = appServer.nodesPool.componentNodes[data.vectorStoreName].filePath as string
const vStoreNodeModule = await import(vStoreNodeInstanceFilePath)
const vStoreNodeInstance = new vStoreNodeModule.nodeClass()
if (upsertHistory) upsertHistory['flowData'] = saveUpsertFlowData(vStoreNodeData, upsertHistory)
return vStoreNodeInstance
}
export default {
updateDocumentStoreUsage,
deleteDocumentStore,
@@ -714,5 +1216,13 @@ export default {
processAndSaveChunks,
deleteDocumentStoreFileChunk,
editDocumentStoreFileChunk,
getDocumentLoaders
getDocumentLoaders,
insertIntoVectorStore,
getEmbeddingProviders,
getVectorStoreProviders,
getRecordManagerProviders,
saveVectorStoreConfig,
queryVectorStore,
deleteVectorStoreFromStore,
updateVectorStoreConfigOnly
}
+2 -1
View File
@@ -547,7 +547,8 @@ export const buildFlow = async ({
uploads,
baseURL,
socketIO,
socketIOClientId
socketIOClientId,
componentNodes: componentNodes as ICommonObject
})
// Save dynamic variables