Bugfix/Remove postgres vector store data when deletion (#5536)

Remove postgres vector store data when deletion

- Introduced a new `doc_id` column in MySQL, Postgres, and SQLite record managers to support document identification.
- Updated the `update` method to handle both string and object formats for keys, allowing for better flexibility in document updates.
- Enhanced `listKeys` method to filter by `doc_id` when provided in options.
- Updated vector store integrations to utilize the new `doc_id` filtering capability
This commit is contained in:
Henry Heng
2025-11-30 12:01:36 +00:00
committed by GitHub
parent e6e0c2d07b
commit 465005a503
20 changed files with 620 additions and 217 deletions
@@ -62,7 +62,6 @@ class MySQLRecordManager_RecordManager implements INode {
label: 'Namespace',
name: 'namespace',
type: 'string',
description: 'If not specified, chatflowid will be used',
additionalParams: true,
optional: true
},
@@ -219,7 +218,16 @@ class MySQLRecordManager implements RecordManagerInterface {
unique key \`unique_key_namespace\` (\`key\`,
\`namespace\`));`)
const columns = [`updated_at`, `key`, `namespace`, `group_id`]
// Add doc_id column if it doesn't exist (migration for existing tables)
const checkColumn = await queryRunner.manager.query(
`SELECT COUNT(1) ColumnExists FROM INFORMATION_SCHEMA.COLUMNS
WHERE table_schema=DATABASE() AND table_name='${tableName}' AND column_name='doc_id';`
)
if (checkColumn[0].ColumnExists === 0) {
await queryRunner.manager.query(`ALTER TABLE \`${tableName}\` ADD COLUMN \`doc_id\` longtext;`)
}
const columns = [`updated_at`, `key`, `namespace`, `group_id`, `doc_id`]
for (const column of columns) {
// MySQL does not support 'IF NOT EXISTS' function for Index
const Check = await queryRunner.manager.query(
@@ -261,7 +269,7 @@ class MySQLRecordManager implements RecordManagerInterface {
}
}
async update(keys: string[], updateOptions?: UpdateOptions): Promise<void> {
async update(keys: Array<{ uid: string; docId: string }> | string[], updateOptions?: UpdateOptions): Promise<void> {
if (keys.length === 0) {
return
}
@@ -277,23 +285,23 @@ class MySQLRecordManager implements RecordManagerInterface {
throw new Error(`Time sync issue with database ${updatedAt} < ${timeAtLeast}`)
}
const groupIds = _groupIds ?? keys.map(() => null)
// Handle both new format (objects with uid and docId) and old format (strings)
const isNewFormat = keys.length > 0 && typeof keys[0] === 'object' && 'uid' in keys[0]
const keyStrings = isNewFormat ? (keys as Array<{ uid: string; docId: string }>).map((k) => k.uid) : (keys as string[])
const docIds = isNewFormat ? (keys as Array<{ uid: string; docId: string }>).map((k) => k.docId) : keys.map(() => null)
if (groupIds.length !== keys.length) {
throw new Error(`Number of keys (${keys.length}) does not match number of group_ids (${groupIds.length})`)
const groupIds = _groupIds ?? keyStrings.map(() => null)
if (groupIds.length !== keyStrings.length) {
throw new Error(`Number of keys (${keyStrings.length}) does not match number of group_ids (${groupIds.length})`)
}
const recordsToUpsert = keys.map((key, i) => [
key,
this.namespace,
updatedAt,
groupIds[i] ?? null // Ensure groupIds[i] is null if undefined
])
const recordsToUpsert = keyStrings.map((key, i) => [key, this.namespace, updatedAt, groupIds[i] ?? null, docIds[i] ?? null])
const query = `
INSERT INTO \`${tableName}\` (\`key\`, \`namespace\`, \`updated_at\`, \`group_id\`)
VALUES (?, ?, ?, ?)
ON DUPLICATE KEY UPDATE \`updated_at\` = VALUES(\`updated_at\`)`
INSERT INTO \`${tableName}\` (\`key\`, \`namespace\`, \`updated_at\`, \`group_id\`, \`doc_id\`)
VALUES (?, ?, ?, ?, ?)
ON DUPLICATE KEY UPDATE \`updated_at\` = VALUES(\`updated_at\`), \`doc_id\` = VALUES(\`doc_id\`)`
// To handle multiple files upsert
try {
@@ -349,13 +357,13 @@ class MySQLRecordManager implements RecordManagerInterface {
}
}
async listKeys(options?: ListKeyOptions): Promise<string[]> {
async listKeys(options?: ListKeyOptions & { docId?: string }): Promise<string[]> {
const dataSource = await this.getDataSource()
const queryRunner = dataSource.createQueryRunner()
const tableName = this.sanitizeTableName(this.tableName)
try {
const { before, after, limit, groupIds } = options ?? {}
const { before, after, limit, groupIds, docId } = options ?? {}
let query = `SELECT \`key\` FROM \`${tableName}\` WHERE \`namespace\` = ?`
const values: (string | number | string[])[] = [this.namespace]
@@ -382,6 +390,11 @@ class MySQLRecordManager implements RecordManagerInterface {
values.push(...groupIds.filter((gid): gid is string => gid !== null))
}
if (docId) {
query += ` AND \`doc_id\` = ?`
values.push(docId)
}
query += ';'
// Directly using try/catch with async/await for cleaner flow