Bugfix/Remove postgres vector store data when deletion (#5536)

Remove postgres vector store data when deletion

- Introduced a new `doc_id` column in MySQL, Postgres, and SQLite record managers to support document identification.
- Updated the `update` method to handle both string and object formats for keys, allowing for better flexibility in document updates.
- Enhanced `listKeys` method to filter by `doc_id` when provided in options.
- Updated vector store integrations to utilize the new `doc_id` filtering capability
This commit is contained in:
Henry Heng
2025-11-30 12:01:36 +00:00
committed by GitHub
parent e6e0c2d07b
commit 465005a503
20 changed files with 620 additions and 217 deletions
@@ -465,9 +465,10 @@ const insertIntoVectorStore = async (req: Request, res: Response, next: NextFunc
}
const subscriptionId = req.user?.activeOrganizationSubscriptionId || ''
const body = req.body
const isStrictSave = body.isStrictSave ?? false
const apiResponse = await documentStoreService.insertIntoVectorStoreMiddleware(
body,
false,
isStrictSave,
orgId,
workspaceId,
subscriptionId,
@@ -513,7 +514,11 @@ const deleteVectorStoreFromStore = async (req: Request, res: Response, next: Nex
`Error: documentStoreController.deleteVectorStoreFromStore - workspaceId not provided!`
)
}
const apiResponse = await documentStoreService.deleteVectorStoreFromStore(req.params.storeId, workspaceId)
const apiResponse = await documentStoreService.deleteVectorStoreFromStore(
req.params.storeId,
workspaceId,
(req.query.docId as string) || undefined
)
return res.json(apiResponse)
} catch (error) {
next(error)
@@ -391,7 +391,7 @@ const deleteDocumentStoreFileChunk = async (storeId: string, docId: string, chun
}
}
const deleteVectorStoreFromStore = async (storeId: string, workspaceId: string) => {
const deleteVectorStoreFromStore = async (storeId: string, workspaceId: string, docId?: string) => {
try {
const appServer = getRunningExpressApp()
const componentNodes = appServer.nodesPool.componentNodes
@@ -461,7 +461,7 @@ const deleteVectorStoreFromStore = async (storeId: string, workspaceId: string)
// Call the delete method of the vector store
if (vectorStoreObj.vectorStoreMethods.delete) {
await vectorStoreObj.vectorStoreMethods.delete(vStoreNodeData, idsToDelete, options)
await vectorStoreObj.vectorStoreMethods.delete(vStoreNodeData, idsToDelete, { ...options, docId })
}
} catch (error) {
throw new InternalFlowiseError(
@@ -1157,6 +1157,18 @@ const updateVectorStoreConfigOnly = async (data: ICommonObject, workspaceId: str
)
}
}
/**
* Saves vector store configuration to the document store entity.
* Handles embedding, vector store, and record manager configurations.
*
* @example
* // Strict mode: Only save what's provided, clear the rest
* await saveVectorStoreConfig(ds, { storeId, embeddingName, embeddingConfig }, true, wsId)
*
* @example
* // Lenient mode: Reuse existing configs if not provided
* await saveVectorStoreConfig(ds, { storeId, vectorStoreName, vectorStoreConfig }, false, wsId)
*/
const saveVectorStoreConfig = async (appDataSource: DataSource, data: ICommonObject, isStrictSave = true, workspaceId: string) => {
try {
const entity = await appDataSource.getRepository(DocumentStore).findOneBy({
@@ -1221,6 +1233,15 @@ const saveVectorStoreConfig = async (appDataSource: DataSource, data: ICommonObj
}
}
/**
* Inserts documents from document store into the configured vector store.
*
* Process:
* 1. Saves vector store configuration (embedding, vector store, record manager)
* 2. Sets document store status to UPSERTING
* 3. Performs the actual vector store upsert operation
* 4. Updates status to UPSERTED upon completion
*/
export const insertIntoVectorStore = async ({
appDataSource,
componentNodes,
@@ -1231,19 +1252,16 @@ export const insertIntoVectorStore = async ({
workspaceId
}: IExecuteVectorStoreInsert) => {
try {
// Step 1: Save configuration based on isStrictSave mode
const entity = await saveVectorStoreConfig(appDataSource, data, isStrictSave, workspaceId)
// Step 2: Mark as UPSERTING before starting the operation
entity.status = DocumentStoreStatus.UPSERTING
await appDataSource.getRepository(DocumentStore).save(entity)
const indexResult = await _insertIntoVectorStoreWorkerThread(
appDataSource,
componentNodes,
telemetry,
data,
isStrictSave,
orgId,
workspaceId
)
// Step 3: Perform the actual vector store upsert
// Note: Configuration already saved above, worker thread just retrieves and uses it
const indexResult = await _insertIntoVectorStoreWorkerThread(appDataSource, componentNodes, telemetry, data, orgId, workspaceId)
return indexResult
} catch (error) {
throw new InternalFlowiseError(
@@ -1308,12 +1326,18 @@ const _insertIntoVectorStoreWorkerThread = async (
componentNodes: IComponentNodes,
telemetry: Telemetry,
data: ICommonObject,
isStrictSave = true,
orgId: string,
workspaceId: string
) => {
try {
const entity = await saveVectorStoreConfig(appDataSource, data, isStrictSave, workspaceId)
// Configuration already saved by insertIntoVectorStore, just retrieve the entity
const entity = await appDataSource.getRepository(DocumentStore).findOneBy({
id: data.storeId,
workspaceId: workspaceId
})
if (!entity) {
throw new InternalFlowiseError(StatusCodes.NOT_FOUND, `Document store ${data.storeId} not found`)
}
let upsertHistory: Record<string, any> = {}
const chatflowid = data.storeId // fake chatflowid because this is not tied to any chatflow
@@ -1350,7 +1374,10 @@ const _insertIntoVectorStoreWorkerThread = async (
const docs: Document[] = chunks.map((chunk: DocumentStoreFileChunk) => {
return new Document({
pageContent: chunk.pageContent,
metadata: JSON.parse(chunk.metadata)
metadata: {
...JSON.parse(chunk.metadata),
docId: chunk.docId
}
})
})
vStoreNodeData.inputs.document = docs
@@ -1911,6 +1938,8 @@ const upsertDocStore = async (
recordManagerConfig
}
// Use isStrictSave: false to preserve existing configurations during upsert
// This allows the operation to reuse existing embedding/vector store/record manager configs
const res = await insertIntoVectorStore({
appDataSource,
componentNodes,