Document Store - Phase 2 (#2912)

* Document Store - Phase 2

* Adding additional columns for vector store config, document store phase 2

* Adding additional columns for vector store config, document store phase 2

* Document Store - Phase 2 - Upsert and Query

* ux cleanup

* retrieval settings and more ux changes

* adding MMR params to execution

* Making the upsert process async.

* add upsert history changes

* making the searchParams dynamic

* removing unnecessary params

* add ability to delete data from vector store

* update margin for vector store query

* adding option to save config in the retrieval playground

* adding chunk number for query return chunks

* Adding a Document Store node in the VectorStore category

* update doc store status, ui touchup

---------

Co-authored-by: Henry <hzj94@hotmail.com>
This commit is contained in:
Vinod Kiran
2024-08-07 23:29:52 +05:30
committed by GitHub
parent c7306c93d7
commit c0bae635b0
36 changed files with 3589 additions and 91 deletions
@@ -150,6 +150,42 @@ class Chroma_VectorStores implements INode {
} catch (e) {
throw new Error(e)
}
},
async delete(nodeData: INodeData, ids: string[], options: ICommonObject): Promise<void> {
const collectionName = nodeData.inputs?.collectionName as string
const embeddings = nodeData.inputs?.embeddings as Embeddings
const chromaURL = nodeData.inputs?.chromaURL as string
const recordManager = nodeData.inputs?.recordManager
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
const chromaApiKey = getCredentialParam('chromaApiKey', credentialData, nodeData)
const obj: {
collectionName: string
url?: string
chromaApiKey?: string
} = { collectionName }
if (chromaURL) obj.url = chromaURL
if (chromaApiKey) obj.chromaApiKey = chromaApiKey
try {
if (recordManager) {
const vectorStoreName = collectionName
await recordManager.createSchema()
;(recordManager as any).namespace = (recordManager as any).namespace + '_' + vectorStoreName
const keys: string[] = await recordManager.listKeys({})
const chromaStore = new ChromaExtended(embeddings, obj)
await chromaStore.delete({ ids: keys })
await recordManager.deleteKeys(keys)
} else {
const chromaStore = new ChromaExtended(embeddings, obj)
await chromaStore.delete({ ids })
}
} catch (e) {
throw new Error(e)
}
}
}
@@ -0,0 +1,174 @@
import { ICommonObject, IDatabaseEntity, INode, INodeData, INodeOptionsValue, INodeOutputsValue, INodeParams } from '../../../src/Interface'
import { DataSource } from 'typeorm'
class DocStore_VectorStores implements INode {
label: string
name: string
version: number
description: string
type: string
icon: string
category: string
baseClasses: string[]
inputs: INodeParams[]
outputs: INodeOutputsValue[]
badge: string
constructor() {
this.label = 'Document Store (Vector)'
this.name = 'documentStoreVS'
this.version = 1.0
this.type = 'DocumentStoreVS'
this.icon = 'dstore.svg'
this.badge = 'New'
this.category = 'Vector Stores'
this.description = `Search and retrieve documents from Document Store`
this.baseClasses = [this.type]
this.inputs = [
{
label: 'Select Store',
name: 'selectedStore',
type: 'asyncOptions',
loadMethod: 'listStores'
}
]
this.outputs = [
{
label: 'Retriever',
name: 'retriever',
baseClasses: ['BaseRetriever']
},
{
label: 'Vector Store',
name: 'vectorStore',
baseClasses: ['VectorStore']
}
]
}
//@ts-ignore
loadMethods = {
async listStores(_: INodeData, options: ICommonObject): Promise<INodeOptionsValue[]> {
const returnData: INodeOptionsValue[] = []
const appDataSource = options.appDataSource as DataSource
const databaseEntities = options.databaseEntities as IDatabaseEntity
if (appDataSource === undefined || !appDataSource) {
return returnData
}
const stores = await appDataSource.getRepository(databaseEntities['DocumentStore']).find()
for (const store of stores) {
if (store.status === 'UPSERTED') {
const obj = {
name: store.id,
label: store.name,
description: store.description
}
returnData.push(obj)
}
}
return returnData
}
}
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
const selectedStore = nodeData.inputs?.selectedStore as string
const appDataSource = options.appDataSource as DataSource
const databaseEntities = options.databaseEntities as IDatabaseEntity
const output = nodeData.outputs?.output as string
const entity = await appDataSource.getRepository(databaseEntities['DocumentStore']).findOneBy({ id: selectedStore })
if (!entity) {
return { error: 'Store not found' }
}
const data: ICommonObject = {}
data.output = output
// Prepare Embeddings Instance
const embeddingConfig = JSON.parse(entity.embeddingConfig)
data.embeddingName = embeddingConfig.name
data.embeddingConfig = embeddingConfig.config
let embeddingObj = await _createEmbeddingsObject(options.componentNodes, data, options)
if (!embeddingObj) {
return { error: 'Failed to create EmbeddingObj' }
}
// Prepare Vector Store Instance
const vsConfig = JSON.parse(entity.vectorStoreConfig)
data.vectorStoreName = vsConfig.name
data.vectorStoreConfig = vsConfig.config
if (data.inputs) {
data.vectorStoreConfig = { ...vsConfig.config, ...data.inputs }
}
// Prepare Vector Store Node Data
const vStoreNodeData = _createVectorStoreNodeData(options.componentNodes, data, embeddingObj)
// Finally create the Vector Store or Retriever object (data.output)
const vectorStoreObj = await _createVectorStoreObject(options.componentNodes, data)
const retrieverOrVectorStore = await vectorStoreObj.init(vStoreNodeData, '', options)
if (!retrieverOrVectorStore) {
return { error: 'Failed to create vectorStore' }
}
return retrieverOrVectorStore
}
}
const _createEmbeddingsObject = async (componentNodes: ICommonObject, data: ICommonObject, options: ICommonObject): Promise<any> => {
// prepare embedding node data
const embeddingComponent = componentNodes[data.embeddingName]
const embeddingNodeData: any = {
inputs: { ...data.embeddingConfig },
outputs: { output: 'document' },
id: `${embeddingComponent.name}_0`,
label: embeddingComponent.label,
name: embeddingComponent.name,
category: embeddingComponent.category,
inputParams: embeddingComponent.inputs || []
}
if (data.embeddingConfig.credential) {
embeddingNodeData.credential = data.embeddingConfig.credential
}
// init embedding object
const embeddingNodeInstanceFilePath = embeddingComponent.filePath as string
const embeddingNodeModule = await import(embeddingNodeInstanceFilePath)
const embeddingNodeInstance = new embeddingNodeModule.nodeClass()
return await embeddingNodeInstance.init(embeddingNodeData, '', options)
}
const _createVectorStoreNodeData = (componentNodes: ICommonObject, data: ICommonObject, embeddingObj: any) => {
const vectorStoreComponent = componentNodes[data.vectorStoreName]
const vStoreNodeData: any = {
id: `${vectorStoreComponent.name}_0`,
inputs: { ...data.vectorStoreConfig },
outputs: { output: data.output },
label: vectorStoreComponent.label,
name: vectorStoreComponent.name,
category: vectorStoreComponent.category
}
if (data.vectorStoreConfig.credential) {
vStoreNodeData.credential = data.vectorStoreConfig.credential
}
if (embeddingObj) {
vStoreNodeData.inputs.embeddings = embeddingObj
}
// Get all input params except the ones that are anchor points to avoid JSON stringify circular error
const filterInputParams = ['document', 'embeddings', 'recordManager']
const inputParams = vectorStoreComponent.inputs?.filter((input: any) => !filterInputParams.includes(input.name))
vStoreNodeData.inputParams = inputParams
return vStoreNodeData
}
const _createVectorStoreObject = async (componentNodes: ICommonObject, data: ICommonObject) => {
const vStoreNodeInstanceFilePath = componentNodes[data.vectorStoreName].filePath as string
const vStoreNodeModule = await import(vStoreNodeInstanceFilePath)
const vStoreNodeInstance = new vStoreNodeModule.nodeClass()
return vStoreNodeInstance
}
module.exports = { nodeClass: DocStore_VectorStores }
@@ -0,0 +1,15 @@
<svg
xmlns="http://www.w3.org/2000/svg"
width="24"
height="24"
viewBox="0 0 24 24"
fill="none"
stroke="currentColor"
stroke-width="2"
stroke-linecap="round"
stroke-linejoin="round"
>
<path d="M12 4l-8 4l8 4l8 -4l-8 -4" />
<path d="M4 12l8 4l8 -4" />
<path d="M4 16l8 4l8 -4" />
</svg>

After

Width:  |  Height:  |  Size: 305 B

@@ -163,6 +163,35 @@ class Elasticsearch_VectorStores implements INode {
} catch (e) {
throw new Error(e)
}
},
async delete(nodeData: INodeData, ids: string[], options: ICommonObject): Promise<void> {
const indexName = nodeData.inputs?.indexName as string
const embeddings = nodeData.inputs?.embeddings as Embeddings
const similarityMeasure = nodeData.inputs?.similarityMeasure as string
const recordManager = nodeData.inputs?.recordManager
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
const endPoint = getCredentialParam('endpoint', credentialData, nodeData)
const cloudId = getCredentialParam('cloudId', credentialData, nodeData)
const elasticSearchClientArgs = prepareClientArgs(endPoint, cloudId, credentialData, nodeData, similarityMeasure, indexName)
const vectorStore = new ElasticVectorSearch(embeddings, elasticSearchClientArgs)
try {
if (recordManager) {
const vectorStoreName = indexName
await recordManager.createSchema()
;(recordManager as any).namespace = (recordManager as any).namespace + '_' + vectorStoreName
const keys: string[] = await recordManager.listKeys({})
await vectorStore.delete({ ids: keys })
await recordManager.deleteKeys(keys)
} else {
await vectorStore.delete({ ids })
}
} catch (e) {
throw new Error(e)
}
}
}
@@ -184,6 +184,45 @@ class Pinecone_VectorStores implements INode {
} catch (e) {
throw new Error(e)
}
},
async delete(nodeData: INodeData, ids: string[], options: ICommonObject): Promise<void> {
const _index = nodeData.inputs?.pineconeIndex as string
const pineconeNamespace = nodeData.inputs?.pineconeNamespace as string
const embeddings = nodeData.inputs?.embeddings as Embeddings
const pineconeTextKey = nodeData.inputs?.pineconeTextKey as string
const recordManager = nodeData.inputs?.recordManager
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
const pineconeApiKey = getCredentialParam('pineconeApiKey', credentialData, nodeData)
const client = getPineconeClient({ apiKey: pineconeApiKey })
const pineconeIndex = client.Index(_index)
const obj: PineconeStoreParams = {
pineconeIndex,
textKey: pineconeTextKey || 'text'
}
if (pineconeNamespace) obj.namespace = pineconeNamespace
const pineconeStore = new PineconeStore(embeddings, obj)
try {
if (recordManager) {
const vectorStoreName = pineconeNamespace
await recordManager.createSchema()
;(recordManager as any).namespace = (recordManager as any).namespace + '_' + vectorStoreName
const keys: string[] = await recordManager.listKeys({})
await pineconeStore.delete({ ids: keys })
await recordManager.deleteKeys(keys)
} else {
const pineconeStore = new PineconeStore(embeddings, obj)
await pineconeStore.delete({ ids })
}
} catch (e) {
throw new Error(e)
}
}
}
@@ -201,6 +201,58 @@ class Postgres_VectorStores implements INode {
} catch (e) {
throw new Error(e)
}
},
async delete(nodeData: INodeData, ids: string[], options: ICommonObject): Promise<void> {
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
const user = getCredentialParam('user', credentialData, nodeData)
const password = getCredentialParam('password', credentialData, nodeData)
const _tableName = nodeData.inputs?.tableName as string
const tableName = _tableName ? _tableName : 'documents'
const embeddings = nodeData.inputs?.embeddings as Embeddings
const additionalConfig = nodeData.inputs?.additionalConfig as string
const recordManager = nodeData.inputs?.recordManager
let additionalConfiguration = {}
if (additionalConfig) {
try {
additionalConfiguration = typeof additionalConfig === 'object' ? additionalConfig : JSON.parse(additionalConfig)
} catch (exception) {
throw new Error('Invalid JSON in the Additional Configuration: ' + exception)
}
}
const postgresConnectionOptions = {
...additionalConfiguration,
type: 'postgres',
host: nodeData.inputs?.host as string,
port: nodeData.inputs?.port as number,
username: user,
password: password,
database: nodeData.inputs?.database as string
}
const args = {
postgresConnectionOptions: postgresConnectionOptions as DataSourceOptions,
tableName: tableName
}
const vectorStore = await TypeORMVectorStore.fromDataSource(embeddings, args)
try {
if (recordManager) {
const vectorStoreName = tableName
await recordManager.createSchema()
;(recordManager as any).namespace = (recordManager as any).namespace + '_' + vectorStoreName
const keys: string[] = await recordManager.listKeys({})
await vectorStore.delete({ ids: keys })
await recordManager.deleteKeys(keys)
} else {
await vectorStore.delete({ ids })
}
} catch (e) {
throw new Error(e)
}
}
}
@@ -291,6 +291,69 @@ class Qdrant_VectorStores implements INode {
} catch (e) {
throw new Error(e)
}
},
async delete(nodeData: INodeData, ids: string[], options: ICommonObject): Promise<void> {
const qdrantServerUrl = nodeData.inputs?.qdrantServerUrl as string
const collectionName = nodeData.inputs?.qdrantCollection as string
const embeddings = nodeData.inputs?.embeddings as Embeddings
const qdrantSimilarity = nodeData.inputs?.qdrantSimilarity
const qdrantVectorDimension = nodeData.inputs?.qdrantVectorDimension
const recordManager = nodeData.inputs?.recordManager
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
const qdrantApiKey = getCredentialParam('qdrantApiKey', credentialData, nodeData)
const port = Qdrant_VectorStores.determinePortByUrl(qdrantServerUrl)
const client = new QdrantClient({
url: qdrantServerUrl,
apiKey: qdrantApiKey,
port: port
})
const dbConfig: QdrantLibArgs = {
client,
url: qdrantServerUrl,
collectionName,
collectionConfig: {
vectors: {
size: qdrantVectorDimension ? parseInt(qdrantVectorDimension, 10) : 1536,
distance: qdrantSimilarity ?? 'Cosine'
}
}
}
const vectorStore = new QdrantVectorStore(embeddings, dbConfig)
vectorStore.delete = async (params: { ids: string[] }): Promise<void> => {
const { ids } = params
if (ids?.length) {
try {
client.delete(collectionName, {
points: ids
})
} catch (e) {
console.error('Failed to delete')
}
}
}
try {
if (recordManager) {
const vectorStoreName = collectionName
await recordManager.createSchema()
;(recordManager as any).namespace = (recordManager as any).namespace + '_' + vectorStoreName
const keys: string[] = await recordManager.listKeys({})
await vectorStore.delete({ ids: keys })
await recordManager.deleteKeys(keys)
} else {
await vectorStore.delete({ ids })
}
} catch (e) {
throw new Error(e)
}
}
}
@@ -171,6 +171,40 @@ class Supabase_VectorStores implements INode {
} catch (e) {
throw new Error(e)
}
},
async delete(nodeData: INodeData, ids: string[], options: ICommonObject): Promise<void> {
const supabaseProjUrl = nodeData.inputs?.supabaseProjUrl as string
const tableName = nodeData.inputs?.tableName as string
const queryName = nodeData.inputs?.queryName as string
const embeddings = nodeData.inputs?.embeddings as Embeddings
const recordManager = nodeData.inputs?.recordManager
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
const supabaseApiKey = getCredentialParam('supabaseApiKey', credentialData, nodeData)
const client = createClient(supabaseProjUrl, supabaseApiKey)
const supabaseStore = new SupabaseVectorStore(embeddings, {
client,
tableName: tableName,
queryName: queryName
})
try {
if (recordManager) {
const vectorStoreName = tableName + '_' + queryName
await recordManager.createSchema()
;(recordManager as any).namespace = (recordManager as any).namespace + '_' + vectorStoreName
const keys: string[] = await recordManager.listKeys({})
await supabaseStore.delete({ ids: keys })
await recordManager.deleteKeys(keys)
} else {
await supabaseStore.delete({ ids })
}
} catch (e) {
throw new Error(e)
}
}
}
@@ -145,6 +145,41 @@ class Upstash_VectorStores implements INode {
} catch (e) {
throw new Error(e)
}
},
async delete(nodeData: INodeData, ids: string[], options: ICommonObject): Promise<void> {
const embeddings = nodeData.inputs?.embeddings as Embeddings
const recordManager = nodeData.inputs?.recordManager
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
const UPSTASH_VECTOR_REST_URL = getCredentialParam('UPSTASH_VECTOR_REST_URL', credentialData, nodeData)
const UPSTASH_VECTOR_REST_TOKEN = getCredentialParam('UPSTASH_VECTOR_REST_TOKEN', credentialData, nodeData)
const upstashIndex = new UpstashIndex({
url: UPSTASH_VECTOR_REST_URL,
token: UPSTASH_VECTOR_REST_TOKEN
})
const obj = {
index: upstashIndex
}
const upstashStore = new UpstashVectorStore(embeddings, obj)
try {
if (recordManager) {
const vectorStoreName = UPSTASH_VECTOR_REST_URL
await recordManager.createSchema()
;(recordManager as any).namespace = (recordManager as any).namespace + '_' + vectorStoreName
const keys: string[] = await recordManager.listKeys({})
await upstashStore.delete({ ids: keys })
await recordManager.deleteKeys(keys)
} else {
await upstashStore.delete({ ids })
}
} catch (e) {
throw new Error(e)
}
}
}
@@ -200,6 +200,53 @@ class Weaviate_VectorStores implements INode {
} catch (e) {
throw new Error(e)
}
},
async delete(nodeData: INodeData, ids: string[], options: ICommonObject): Promise<void> {
const weaviateScheme = nodeData.inputs?.weaviateScheme as string
const weaviateHost = nodeData.inputs?.weaviateHost as string
const weaviateIndex = nodeData.inputs?.weaviateIndex as string
const weaviateTextKey = nodeData.inputs?.weaviateTextKey as string
const weaviateMetadataKeys = nodeData.inputs?.weaviateMetadataKeys as string
const embeddings = nodeData.inputs?.embeddings as Embeddings
const recordManager = nodeData.inputs?.recordManager
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
const weaviateApiKey = getCredentialParam('weaviateApiKey', credentialData, nodeData)
const clientConfig: any = {
scheme: weaviateScheme,
host: weaviateHost
}
if (weaviateApiKey) clientConfig.apiKey = new ApiKey(weaviateApiKey)
const client: WeaviateClient = weaviate.client(clientConfig)
const obj: WeaviateLibArgs = {
//@ts-ignore
client,
indexName: weaviateIndex
}
if (weaviateTextKey) obj.textKey = weaviateTextKey
if (weaviateMetadataKeys) obj.metadataKeys = JSON.parse(weaviateMetadataKeys.replace(/\s/g, ''))
const weaviateStore = new WeaviateStore(embeddings, obj)
try {
if (recordManager) {
const vectorStoreName = weaviateTextKey ? weaviateIndex + '_' + weaviateTextKey : weaviateIndex
await recordManager.createSchema()
;(recordManager as any).namespace = (recordManager as any).namespace + '_' + vectorStoreName
const keys: string[] = await recordManager.listKeys({})
await weaviateStore.delete({ ids: keys })
await recordManager.deleteKeys(keys)
} else {
await weaviateStore.delete({ ids })
}
} catch (e) {
throw new Error(e)
}
}
}