Bugfix/Remove postgres vector store data when deletion (#5536)

Remove postgres vector store data when deletion

- Introduced a new `doc_id` column in MySQL, Postgres, and SQLite record managers to support document identification.
- Updated the `update` method to handle both string and object formats for keys, allowing for better flexibility in document updates.
- Enhanced `listKeys` method to filter by `doc_id` when provided in options.
- Updated vector store integrations to utilize the new `doc_id` filtering capability
This commit is contained in:
Henry Heng
2025-11-30 12:01:36 +00:00
committed by GitHub
parent e6e0c2d07b
commit 465005a503
20 changed files with 620 additions and 217 deletions
@@ -186,7 +186,11 @@ class Chroma_VectorStores implements INode {
const vectorStoreName = collectionName
await recordManager.createSchema()
;(recordManager as any).namespace = (recordManager as any).namespace + '_' + vectorStoreName
const keys: string[] = await recordManager.listKeys({})
const filterKeys: ICommonObject = {}
if (options.docId) {
filterKeys.docId = options.docId
}
const keys: string[] = await recordManager.listKeys(filterKeys)
const chromaStore = new ChromaExtended(embeddings, obj)
@@ -198,7 +198,11 @@ class Elasticsearch_VectorStores implements INode {
const vectorStoreName = indexName
await recordManager.createSchema()
;(recordManager as any).namespace = (recordManager as any).namespace + '_' + vectorStoreName
const keys: string[] = await recordManager.listKeys({})
const filterKeys: ICommonObject = {}
if (options.docId) {
filterKeys.docId = options.docId
}
const keys: string[] = await recordManager.listKeys(filterKeys)
await vectorStore.delete({ ids: keys })
await recordManager.deleteKeys(keys)
@@ -212,7 +212,11 @@ class Pinecone_VectorStores implements INode {
const vectorStoreName = pineconeNamespace
await recordManager.createSchema()
;(recordManager as any).namespace = (recordManager as any).namespace + '_' + vectorStoreName
const keys: string[] = await recordManager.listKeys({})
const filterKeys: ICommonObject = {}
if (options.docId) {
filterKeys.docId = options.docId
}
const keys: string[] = await recordManager.listKeys(filterKeys)
await pineconeStore.delete({ ids: keys })
await recordManager.deleteKeys(keys)
@@ -49,7 +49,7 @@ class Postgres_VectorStores implements INode {
constructor() {
this.label = 'Postgres'
this.name = 'postgres'
this.version = 7.0
this.version = 7.1
this.type = 'Postgres'
this.icon = 'postgres.svg'
this.category = 'Vector Stores'
@@ -173,6 +173,15 @@ class Postgres_VectorStores implements INode {
additionalParams: true,
optional: true
},
{
label: 'Upsert Batch Size',
name: 'batchSize',
type: 'number',
step: 1,
description: 'Upsert in batches of size N',
additionalParams: true,
optional: true
},
{
label: 'Additional Configuration',
name: 'additionalConfig',
@@ -232,6 +241,7 @@ class Postgres_VectorStores implements INode {
const docs = nodeData.inputs?.document as Document[]
const recordManager = nodeData.inputs?.recordManager
const isFileUploadEnabled = nodeData.inputs?.fileUpload as boolean
const _batchSize = nodeData.inputs?.batchSize
const vectorStoreDriver: VectorStoreDriver = Postgres_VectorStores.getDriverFromConfig(nodeData, options)
const flattenDocs = docs && docs.length ? flatten(docs) : []
@@ -265,7 +275,15 @@ class Postgres_VectorStores implements INode {
return res
} else {
await vectorStoreDriver.fromDocuments(finalDocs)
if (_batchSize) {
const batchSize = parseInt(_batchSize, 10)
for (let i = 0; i < finalDocs.length; i += batchSize) {
const batch = finalDocs.slice(i, i + batchSize)
await vectorStoreDriver.fromDocuments(batch)
}
} else {
await vectorStoreDriver.fromDocuments(finalDocs)
}
return { numAdded: finalDocs.length, addedDocs: finalDocs }
}
@@ -285,7 +303,11 @@ class Postgres_VectorStores implements INode {
const vectorStoreName = tableName
await recordManager.createSchema()
;(recordManager as any).namespace = (recordManager as any).namespace + '_' + vectorStoreName
const keys: string[] = await recordManager.listKeys({})
const filterKeys: ICommonObject = {}
if (options.docId) {
filterKeys.docId = options.docId
}
const keys: string[] = await recordManager.listKeys(filterKeys)
await vectorStore.delete({ ids: keys })
await recordManager.deleteKeys(keys)
@@ -5,6 +5,11 @@ import { TypeORMVectorStore, TypeORMVectorStoreArgs, TypeORMVectorStoreDocument
import { VectorStore } from '@langchain/core/vectorstores'
import { Document } from '@langchain/core/documents'
import { Pool } from 'pg'
import { v4 as uuid } from 'uuid'
type TypeORMAddDocumentOptions = {
ids?: string[]
}
export class TypeORMDriver extends VectorStoreDriver {
protected _postgresConnectionOptions: DataSourceOptions
@@ -95,15 +100,45 @@ export class TypeORMDriver extends VectorStoreDriver {
try {
instance.appDataSource.getRepository(instance.documentEntity).delete(ids)
} catch (e) {
console.error('Failed to delete')
console.error('Failed to delete', e)
}
}
}
const baseAddVectorsFn = instance.addVectors.bind(instance)
instance.addVectors = async (
vectors: number[][],
documents: Document[],
documentOptions?: TypeORMAddDocumentOptions
): Promise<void> => {
const rows = vectors.map((embedding, idx) => {
const embeddingString = `[${embedding.join(',')}]`
const documentRow = {
id: documentOptions?.ids?.length ? documentOptions.ids[idx] : uuid(),
pageContent: documents[idx].pageContent,
embedding: embeddingString,
metadata: documents[idx].metadata
}
return documentRow
})
instance.addVectors = async (vectors, documents) => {
return baseAddVectorsFn(vectors, this.sanitizeDocuments(documents))
const documentRepository = instance.appDataSource.getRepository(instance.documentEntity)
const _batchSize = this.nodeData.inputs?.batchSize
const chunkSize = _batchSize ? parseInt(_batchSize, 10) : 500
for (let i = 0; i < rows.length; i += chunkSize) {
const chunk = rows.slice(i, i + chunkSize)
try {
await documentRepository.save(chunk)
} catch (e) {
console.error(e)
throw new Error(`Error inserting: ${chunk[0].pageContent}`)
}
}
}
instance.addDocuments = async (documents: Document[], options?: { ids?: string[] }): Promise<void> => {
const texts = documents.map(({ pageContent }) => pageContent)
return (instance.addVectors as any)(await this.getEmbeddings().embedDocuments(texts), documents, options)
}
return instance
@@ -385,7 +385,11 @@ class Qdrant_VectorStores implements INode {
const vectorStoreName = collectionName
await recordManager.createSchema()
;(recordManager as any).namespace = (recordManager as any).namespace + '_' + vectorStoreName
const keys: string[] = await recordManager.listKeys({})
const filterKeys: ICommonObject = {}
if (options.docId) {
filterKeys.docId = options.docId
}
const keys: string[] = await recordManager.listKeys(filterKeys)
await vectorStore.delete({ ids: keys })
await recordManager.deleteKeys(keys)
@@ -197,7 +197,11 @@ class Supabase_VectorStores implements INode {
const vectorStoreName = tableName + '_' + queryName
await recordManager.createSchema()
;(recordManager as any).namespace = (recordManager as any).namespace + '_' + vectorStoreName
const keys: string[] = await recordManager.listKeys({})
const filterKeys: ICommonObject = {}
if (options.docId) {
filterKeys.docId = options.docId
}
const keys: string[] = await recordManager.listKeys(filterKeys)
await supabaseStore.delete({ ids: keys })
await recordManager.deleteKeys(keys)
@@ -187,7 +187,11 @@ class Upstash_VectorStores implements INode {
const vectorStoreName = UPSTASH_VECTOR_REST_URL
await recordManager.createSchema()
;(recordManager as any).namespace = (recordManager as any).namespace + '_' + vectorStoreName
const keys: string[] = await recordManager.listKeys({})
const filterKeys: ICommonObject = {}
if (options.docId) {
filterKeys.docId = options.docId
}
const keys: string[] = await recordManager.listKeys(filterKeys)
await upstashStore.delete({ ids: keys })
await recordManager.deleteKeys(keys)
@@ -252,7 +252,11 @@ class Weaviate_VectorStores implements INode {
const vectorStoreName = weaviateTextKey ? weaviateIndex + '_' + weaviateTextKey : weaviateIndex
await recordManager.createSchema()
;(recordManager as any).namespace = (recordManager as any).namespace + '_' + vectorStoreName
const keys: string[] = await recordManager.listKeys({})
const filterKeys: ICommonObject = {}
if (options.docId) {
filterKeys.docId = options.docId
}
const keys: string[] = await recordManager.listKeys(filterKeys)
await weaviateStore.delete({ ids: keys })
await recordManager.deleteKeys(keys)