Bugfix/Remove postgres vector store data when deletion (#5536)

Remove postgres vector store data when deletion

- Introduced a new `doc_id` column in MySQL, Postgres, and SQLite record managers to support document identification.
- Updated the `update` method to handle both string and object formats for keys, allowing for better flexibility in document updates.
- Enhanced `listKeys` method to filter by `doc_id` when provided in options.
- Updated vector store integrations to utilize the new `doc_id` filtering capability
This commit is contained in:
Henry Heng
2025-11-30 12:01:36 +00:00
committed by GitHub
parent e6e0c2d07b
commit 465005a503
20 changed files with 620 additions and 217 deletions
@@ -78,7 +78,6 @@ class PostgresRecordManager_RecordManager implements INode {
label: 'Namespace',
name: 'namespace',
type: 'string',
description: 'If not specified, chatflowid will be used',
additionalParams: true,
optional: true
},
@@ -241,6 +240,19 @@ class PostgresRecordManager implements RecordManagerInterface {
CREATE INDEX IF NOT EXISTS namespace_index ON "${tableName}" (namespace);
CREATE INDEX IF NOT EXISTS group_id_index ON "${tableName}" (group_id);`)
// Add doc_id column if it doesn't exist (migration for existing tables)
await queryRunner.manager.query(`
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM information_schema.columns
WHERE table_name = '${tableName}' AND column_name = 'doc_id'
) THEN
ALTER TABLE "${tableName}" ADD COLUMN doc_id TEXT;
CREATE INDEX IF NOT EXISTS doc_id_index ON "${tableName}" (doc_id);
END IF;
END $$;`)
await queryRunner.release()
} catch (e: any) {
// This error indicates that the table already exists
@@ -286,7 +298,7 @@ class PostgresRecordManager implements RecordManagerInterface {
return `(${placeholders.join(', ')})`
}
async update(keys: string[], updateOptions?: UpdateOptions): Promise<void> {
async update(keys: Array<{ uid: string; docId: string }> | string[], updateOptions?: UpdateOptions): Promise<void> {
if (keys.length === 0) {
return
}
@@ -302,17 +314,22 @@ class PostgresRecordManager implements RecordManagerInterface {
throw new Error(`Time sync issue with database ${updatedAt} < ${timeAtLeast}`)
}
const groupIds = _groupIds ?? keys.map(() => null)
// Handle both new format (objects with uid and docId) and old format (strings)
const isNewFormat = keys.length > 0 && typeof keys[0] === 'object' && 'uid' in keys[0]
const keyStrings = isNewFormat ? (keys as Array<{ uid: string; docId: string }>).map((k) => k.uid) : (keys as string[])
const docIds = isNewFormat ? (keys as Array<{ uid: string; docId: string }>).map((k) => k.docId) : keys.map(() => null)
if (groupIds.length !== keys.length) {
throw new Error(`Number of keys (${keys.length}) does not match number of group_ids ${groupIds.length})`)
const groupIds = _groupIds ?? keyStrings.map(() => null)
if (groupIds.length !== keyStrings.length) {
throw new Error(`Number of keys (${keyStrings.length}) does not match number of group_ids ${groupIds.length})`)
}
const recordsToUpsert = keys.map((key, i) => [key, this.namespace, updatedAt, groupIds[i]])
const recordsToUpsert = keyStrings.map((key, i) => [key, this.namespace, updatedAt, groupIds[i], docIds[i]])
const valuesPlaceholders = recordsToUpsert.map((_, j) => this.generatePlaceholderForRowAt(j, recordsToUpsert[0].length)).join(', ')
const query = `INSERT INTO "${tableName}" (key, namespace, updated_at, group_id) VALUES ${valuesPlaceholders} ON CONFLICT (key, namespace) DO UPDATE SET updated_at = EXCLUDED.updated_at;`
const query = `INSERT INTO "${tableName}" (key, namespace, updated_at, group_id, doc_id) VALUES ${valuesPlaceholders} ON CONFLICT (key, namespace) DO UPDATE SET updated_at = EXCLUDED.updated_at, doc_id = EXCLUDED.doc_id;`
try {
await queryRunner.manager.query(query, recordsToUpsert.flat())
await queryRunner.release()
@@ -351,8 +368,8 @@ class PostgresRecordManager implements RecordManagerInterface {
}
}
async listKeys(options?: ListKeyOptions): Promise<string[]> {
const { before, after, limit, groupIds } = options ?? {}
async listKeys(options?: ListKeyOptions & { docId?: string }): Promise<string[]> {
const { before, after, limit, groupIds, docId } = options ?? {}
const tableName = this.sanitizeTableName(this.tableName)
let query = `SELECT key FROM "${tableName}" WHERE namespace = $1`
@@ -383,6 +400,12 @@ class PostgresRecordManager implements RecordManagerInterface {
index += 1
}
if (docId) {
values.push(docId)
query += ` AND doc_id = $${index}`
index += 1
}
query += ';'
const dataSource = await this.getDataSource()