Feature/Indexing (#1802)

* indexing

* fix for multiple files upsert

* fix default Postgres port

* fix SQLite node description

* add MySQLRecordManager node

* fix MySQL unique index

* add upsert history

* update jsx ui

* lint-fix

* update dialog details

* update llamaindex pinecone

---------

Co-authored-by: chungyau97 <chungyau97@gmail.com>
This commit is contained in:
Henry Heng
2024-04-02 23:47:19 +01:00
committed by GitHub
parent 957694a912
commit e422ce287b
67 changed files with 3006 additions and 246 deletions
@@ -0,0 +1,31 @@
import { INodeParams, INodeCredential } from '../src/Interface'
class MySQLApi implements INodeCredential {
label: string
name: string
version: number
description: string
inputs: INodeParams[]
constructor() {
this.label = 'MySQL API'
this.name = 'MySQLApi'
this.version = 1.0
this.inputs = [
{
label: 'User',
name: 'user',
type: 'string',
placeholder: '<MYSQL_USERNAME>'
},
{
label: 'Password',
name: 'password',
type: 'password',
placeholder: '<MYSQL_PASSWORD>'
}
]
}
}
module.exports = { credClass: MySQLApi }
@@ -0,0 +1,361 @@
import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../src/utils'
import { ListKeyOptions, RecordManagerInterface, UpdateOptions } from '@langchain/community/indexes/base'
import { DataSource, QueryRunner } from 'typeorm'
class MySQLRecordManager_RecordManager implements INode {
label: string
name: string
version: number
description: string
type: string
icon: string
category: string
badge: string
baseClasses: string[]
credential: INodeParams
inputs: INodeParams[]
constructor() {
this.label = 'MySQL Record Manager'
this.name = 'MySQLRecordManager'
this.version = 1.0
this.type = 'MySQL RecordManager'
this.icon = 'mysql.png'
this.category = 'Record Manager'
this.description = 'Use MySQL to keep track of document writes into the vector databases'
this.baseClasses = [this.type, 'RecordManager', ...getBaseClasses(MySQLRecordManager)]
this.badge = 'NEW'
this.inputs = [
{
label: 'Host',
name: 'host',
type: 'string'
},
{
label: 'Database',
name: 'database',
type: 'string'
},
{
label: 'Port',
name: 'port',
type: 'number',
placeholder: '3306',
optional: true
},
{
label: 'Additional Connection Configuration',
name: 'additionalConfig',
type: 'json',
additionalParams: true,
optional: true
},
{
label: 'Table Name',
name: 'tableName',
type: 'string',
placeholder: 'upsertion_records',
additionalParams: true,
optional: true
},
{
label: 'Namespace',
name: 'namespace',
type: 'string',
description: 'If not specified, chatflowid will be used',
additionalParams: true,
optional: true
},
{
label: 'Cleanup',
name: 'cleanup',
type: 'options',
description:
'Read more on the difference between different cleanup methods <a target="_blank" href="https://js.langchain.com/docs/modules/data_connection/indexing/#deletion-modes">here</a>',
options: [
{
label: 'None',
name: 'none',
description: 'No clean up of old content'
},
{
label: 'Incremental',
name: 'incremental',
description:
'Delete previous versions of the content if content of the source document has changed. Important!! SourceId Key must be specified and document metadata must contains the specified key'
},
{
label: 'Full',
name: 'full',
description:
'Same as incremental, but if the source document has been deleted, it will be deleted from vector store as well, incremental mode will not.'
}
],
additionalParams: true,
default: 'none'
},
{
label: 'SourceId Key',
name: 'sourceIdKey',
type: 'string',
description:
'Key used to get the true source of document, to be compared against the record. Document metadata must contains SourceId Key',
default: 'source',
placeholder: 'source',
additionalParams: true,
optional: true
}
]
this.credential = {
label: 'Connect Credential',
name: 'credential',
type: 'credential',
credentialNames: ['MySQLApi']
}
}
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
const user = getCredentialParam('user', credentialData, nodeData)
const password = getCredentialParam('password', credentialData, nodeData)
const _tableName = nodeData.inputs?.tableName as string
const tableName = _tableName ? _tableName : 'upsertion_records'
const additionalConfig = nodeData.inputs?.additionalConfig as string
const _namespace = nodeData.inputs?.namespace as string
const namespace = _namespace ? _namespace : options.chatflowid
const cleanup = nodeData.inputs?.cleanup as string
const _sourceIdKey = nodeData.inputs?.sourceIdKey as string
const sourceIdKey = _sourceIdKey ? _sourceIdKey : 'source'
let additionalConfiguration = {}
if (additionalConfig) {
try {
additionalConfiguration = typeof additionalConfig === 'object' ? additionalConfig : JSON.parse(additionalConfig)
} catch (exception) {
throw new Error('Invalid JSON in the Additional Configuration: ' + exception)
}
}
const mysqlOptions = {
...additionalConfiguration,
type: 'mysql',
host: nodeData.inputs?.host as string,
port: nodeData.inputs?.port as number,
username: user,
password: password,
database: nodeData.inputs?.database as string
}
const args = {
mysqlOptions,
tableName: tableName
}
const recordManager = new MySQLRecordManager(namespace, args)
;(recordManager as any).cleanup = cleanup
;(recordManager as any).sourceIdKey = sourceIdKey
return recordManager
}
}
type MySQLRecordManagerOptions = {
mysqlOptions: any
tableName?: string
}
class MySQLRecordManager implements RecordManagerInterface {
lc_namespace = ['langchain', 'recordmanagers', 'mysql']
datasource: DataSource
queryRunner: QueryRunner
tableName: string
namespace: string
constructor(namespace: string, config: MySQLRecordManagerOptions) {
const { mysqlOptions, tableName } = config
this.namespace = namespace
this.tableName = tableName || 'upsertion_records'
this.datasource = new DataSource(mysqlOptions)
}
async createSchema(): Promise<void> {
try {
const appDataSource = await this.datasource.initialize()
this.queryRunner = appDataSource.createQueryRunner()
await this.queryRunner.manager.query(`create table if not exists \`${this.tableName}\` (
\`uuid\` varchar(36) primary key default (UUID()),
\`key\` varchar(36) not null,
\`namespace\` varchar(36) not null,
\`updated_at\` DOUBLE precision not null,
\`group_id\` varchar(36),
unique key \`unique_key_namespace\` (\`key\`,
\`namespace\`));`)
const columns = [`updated_at`, `key`, `namespace`, `group_id`]
for (const column of columns) {
// MySQL does not support 'IF NOT EXISTS' function for Index
const Check = await this.queryRunner.manager.query(
`SELECT COUNT(1) IndexIsThere FROM INFORMATION_SCHEMA.STATISTICS
WHERE table_schema=DATABASE() AND table_name='${this.tableName}' AND index_name='${column}_index';`
)
if (Check[0].IndexIsThere === 0)
await this.queryRunner.manager.query(`CREATE INDEX \`${column}_index\`
ON \`${this.tableName}\` (\`${column}\`);`)
}
} catch (e: any) {
// This error indicates that the table already exists
// Due to asynchronous nature of the code, it is possible that
// the table is created between the time we check if it exists
// and the time we try to create it. It can be safely ignored.
if ('code' in e && e.code === '23505') {
return
}
throw e
}
}
async getTime(): Promise<number> {
try {
const res = await this.queryRunner.manager.query(`SELECT UNIX_TIMESTAMP(NOW()) AS epoch`)
return Number.parseFloat(res[0].epoch)
} catch (error) {
console.error('Error getting time in MySQLRecordManager:')
throw error
}
}
async update(keys: string[], updateOptions?: UpdateOptions): Promise<void> {
if (keys.length === 0) {
return
}
const updatedAt = await this.getTime()
const { timeAtLeast, groupIds: _groupIds } = updateOptions ?? {}
if (timeAtLeast && updatedAt < timeAtLeast) {
throw new Error(`Time sync issue with database ${updatedAt} < ${timeAtLeast}`)
}
const groupIds = _groupIds ?? keys.map(() => null)
if (groupIds.length !== keys.length) {
throw new Error(`Number of keys (${keys.length}) does not match number of group_ids (${groupIds.length})`)
}
const recordsToUpsert = keys.map((key, i) => [
key,
this.namespace,
updatedAt,
groupIds[i] ?? null // Ensure groupIds[i] is null if undefined
])
const query = `
INSERT INTO \`${this.tableName}\` (\`key\`, \`namespace\`, \`updated_at\`, \`group_id\`)
VALUES (?, ?, ?, ?)
ON DUPLICATE KEY UPDATE updated_at = updated_at;`
// To handle multiple files upsert
for (const record of recordsToUpsert) {
// Consider using a transaction for batch operations
await this.queryRunner.manager.query(query, record.flat())
}
}
async exists(keys: string[]): Promise<boolean[]> {
if (keys.length === 0) {
return []
}
// Prepare the placeholders and the query
const placeholders = keys.map(() => `?`).join(', ')
const query = `
SELECT \`key\`
FROM \`${this.tableName}\`
WHERE \`namespace\` = ? AND \`key\` IN (${placeholders})`
// Initialize an array to fill with the existence checks
const existsArray = new Array(keys.length).fill(false)
try {
// Execute the query
const rows = await this.queryRunner.manager.query(query, [this.namespace, ...keys.flat()])
// Create a set of existing keys for faster lookup
const existingKeysSet = new Set(rows.map((row: { key: string }) => row.key))
// Map the input keys to booleans indicating if they exist
keys.forEach((key, index) => {
existsArray[index] = existingKeysSet.has(key)
})
return existsArray
} catch (error) {
console.error('Error checking existence of keys')
throw error // Allow the caller to handle the error
}
}
async listKeys(options?: ListKeyOptions): Promise<string[]> {
try {
const { before, after, limit, groupIds } = options ?? {}
let query = `SELECT \`key\` FROM \`${this.tableName}\` WHERE \`namespace\` = ?`
const values: (string | number | string[])[] = [this.namespace]
if (before) {
query += ` AND \`updated_at\` < ?`
values.push(before)
}
if (after) {
query += ` AND \`updated_at\` > ?`
values.push(after)
}
if (limit) {
query += ` LIMIT ?`
values.push(limit)
}
if (groupIds && Array.isArray(groupIds)) {
query += ` AND \`group_id\` IN (${groupIds
.filter((gid) => gid !== null)
.map(() => '?')
.join(', ')})`
values.push(...groupIds.filter((gid): gid is string => gid !== null))
}
query += ';'
// Directly using try/catch with async/await for cleaner flow
const result = await this.queryRunner.manager.query(query, values)
return result.map((row: { key: string }) => row.key)
} catch (error) {
console.error('MySQLRecordManager listKeys Error: ')
throw error // Re-throw the error to be handled by the caller
}
}
async deleteKeys(keys: string[]): Promise<void> {
if (keys.length === 0) {
return
}
const placeholders = keys.map(() => '?').join(', ')
const query = `DELETE FROM \`${this.tableName}\` WHERE \`namespace\` = ? AND \`key\` IN (${placeholders});`
const values = [this.namespace, ...keys].map((v) => (typeof v !== 'string' ? `${v}` : v))
// Directly using try/catch with async/await for cleaner flow
try {
await this.queryRunner.manager.query(query, values)
} catch (error) {
console.error('Error deleting keys')
throw error // Re-throw the error to be handled by the caller
}
}
}
module.exports = { nodeClass: MySQLRecordManager_RecordManager }
Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

@@ -0,0 +1,332 @@
import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../src/utils'
import { ListKeyOptions, RecordManagerInterface, UpdateOptions } from '@langchain/community/indexes/base'
import { DataSource, QueryRunner } from 'typeorm'
class PostgresRecordManager_RecordManager implements INode {
label: string
name: string
version: number
description: string
type: string
icon: string
category: string
badge: string
baseClasses: string[]
credential: INodeParams
inputs: INodeParams[]
constructor() {
this.label = 'Postgres Record Manager'
this.name = 'postgresRecordManager'
this.version = 1.0
this.type = 'Postgres RecordManager'
this.icon = 'postgres.svg'
this.category = 'Record Manager'
this.description = 'Use Postgres to keep track of document writes into the vector databases'
this.baseClasses = [this.type, 'RecordManager', ...getBaseClasses(PostgresRecordManager)]
this.badge = 'NEW'
this.inputs = [
{
label: 'Host',
name: 'host',
type: 'string'
},
{
label: 'Database',
name: 'database',
type: 'string'
},
{
label: 'Port',
name: 'port',
type: 'number',
placeholder: '5432',
optional: true
},
{
label: 'Additional Connection Configuration',
name: 'additionalConfig',
type: 'json',
additionalParams: true,
optional: true
},
{
label: 'Table Name',
name: 'tableName',
type: 'string',
placeholder: 'upsertion_records',
additionalParams: true,
optional: true
},
{
label: 'Namespace',
name: 'namespace',
type: 'string',
description: 'If not specified, chatflowid will be used',
additionalParams: true,
optional: true
},
{
label: 'Cleanup',
name: 'cleanup',
type: 'options',
description:
'Read more on the difference between different cleanup methods <a target="_blank" href="https://js.langchain.com/docs/modules/data_connection/indexing/#deletion-modes">here</a>',
options: [
{
label: 'None',
name: 'none',
description: 'No clean up of old content'
},
{
label: 'Incremental',
name: 'incremental',
description:
'Delete previous versions of the content if content of the source document has changed. Important!! SourceId Key must be specified and document metadata must contains the specified key'
},
{
label: 'Full',
name: 'full',
description:
'Same as incremental, but if the source document has been deleted, it will be deleted from vector store as well, incremental mode will not.'
}
],
additionalParams: true,
default: 'none'
},
{
label: 'SourceId Key',
name: 'sourceIdKey',
type: 'string',
description:
'Key used to get the true source of document, to be compared against the record. Document metadata must contains SourceId Key',
default: 'source',
placeholder: 'source',
additionalParams: true,
optional: true
}
]
this.credential = {
label: 'Connect Credential',
name: 'credential',
type: 'credential',
credentialNames: ['PostgresApi']
}
}
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
const user = getCredentialParam('user', credentialData, nodeData)
const password = getCredentialParam('password', credentialData, nodeData)
const _tableName = nodeData.inputs?.tableName as string
const tableName = _tableName ? _tableName : 'upsertion_records'
const additionalConfig = nodeData.inputs?.additionalConfig as string
const _namespace = nodeData.inputs?.namespace as string
const namespace = _namespace ? _namespace : options.chatflowid
const cleanup = nodeData.inputs?.cleanup as string
const _sourceIdKey = nodeData.inputs?.sourceIdKey as string
const sourceIdKey = _sourceIdKey ? _sourceIdKey : 'source'
let additionalConfiguration = {}
if (additionalConfig) {
try {
additionalConfiguration = typeof additionalConfig === 'object' ? additionalConfig : JSON.parse(additionalConfig)
} catch (exception) {
throw new Error('Invalid JSON in the Additional Configuration: ' + exception)
}
}
const postgresConnectionOptions = {
...additionalConfiguration,
type: 'postgres',
host: nodeData.inputs?.host as string,
port: nodeData.inputs?.port as number,
username: user,
password: password,
database: nodeData.inputs?.database as string
}
const args = {
postgresConnectionOptions: postgresConnectionOptions,
tableName: tableName
}
const recordManager = new PostgresRecordManager(namespace, args)
;(recordManager as any).cleanup = cleanup
;(recordManager as any).sourceIdKey = sourceIdKey
return recordManager
}
}
type PostgresRecordManagerOptions = {
postgresConnectionOptions: any
tableName?: string
}
class PostgresRecordManager implements RecordManagerInterface {
lc_namespace = ['langchain', 'recordmanagers', 'postgres']
datasource: DataSource
queryRunner: QueryRunner
tableName: string
namespace: string
constructor(namespace: string, config: PostgresRecordManagerOptions) {
const { postgresConnectionOptions, tableName } = config
this.namespace = namespace
this.datasource = new DataSource(postgresConnectionOptions)
this.tableName = tableName || 'upsertion_records'
}
async createSchema(): Promise<void> {
try {
const appDataSource = await this.datasource.initialize()
this.queryRunner = appDataSource.createQueryRunner()
await this.queryRunner.manager.query(`
CREATE TABLE IF NOT EXISTS "${this.tableName}" (
uuid UUID PRIMARY KEY DEFAULT gen_random_uuid(),
key TEXT NOT NULL,
namespace TEXT NOT NULL,
updated_at Double PRECISION NOT NULL,
group_id TEXT,
UNIQUE (key, namespace)
);
CREATE INDEX IF NOT EXISTS updated_at_index ON "${this.tableName}" (updated_at);
CREATE INDEX IF NOT EXISTS key_index ON "${this.tableName}" (key);
CREATE INDEX IF NOT EXISTS namespace_index ON "${this.tableName}" (namespace);
CREATE INDEX IF NOT EXISTS group_id_index ON "${this.tableName}" (group_id);`)
} catch (e: any) {
// This error indicates that the table already exists
// Due to asynchronous nature of the code, it is possible that
// the table is created between the time we check if it exists
// and the time we try to create it. It can be safely ignored.
if ('code' in e && e.code === '23505') {
return
}
throw e
}
}
async getTime(): Promise<number> {
const res = await this.queryRunner.manager.query('SELECT EXTRACT(EPOCH FROM CURRENT_TIMESTAMP)')
return Number.parseFloat(res[0].extract)
}
/**
* Generates the SQL placeholders for a specific row at the provided index.
*
* @param index - The index of the row for which placeholders need to be generated.
* @param numOfColumns - The number of columns we are inserting data into.
* @returns The SQL placeholders for the row values.
*/
private generatePlaceholderForRowAt(index: number, numOfColumns: number): string {
const placeholders = []
for (let i = 0; i < numOfColumns; i += 1) {
placeholders.push(`$${index * numOfColumns + i + 1}`)
}
return `(${placeholders.join(', ')})`
}
async update(keys: string[], updateOptions?: UpdateOptions): Promise<void> {
if (keys.length === 0) {
return
}
const updatedAt = await this.getTime()
const { timeAtLeast, groupIds: _groupIds } = updateOptions ?? {}
if (timeAtLeast && updatedAt < timeAtLeast) {
throw new Error(`Time sync issue with database ${updatedAt} < ${timeAtLeast}`)
}
const groupIds = _groupIds ?? keys.map(() => null)
if (groupIds.length !== keys.length) {
throw new Error(`Number of keys (${keys.length}) does not match number of group_ids ${groupIds.length})`)
}
const recordsToUpsert = keys.map((key, i) => [key, this.namespace, updatedAt, groupIds[i]])
const valuesPlaceholders = recordsToUpsert.map((_, j) => this.generatePlaceholderForRowAt(j, recordsToUpsert[0].length)).join(', ')
const query = `INSERT INTO "${this.tableName}" (key, namespace, updated_at, group_id) VALUES ${valuesPlaceholders} ON CONFLICT (key, namespace) DO UPDATE SET updated_at = EXCLUDED.updated_at;`
await this.queryRunner.manager.query(query, recordsToUpsert.flat())
}
async exists(keys: string[]): Promise<boolean[]> {
if (keys.length === 0) {
return []
}
const startIndex = 2
const arrayPlaceholders = keys.map((_, i) => `$${i + startIndex}`).join(', ')
const query = `
SELECT k, (key is not null) ex from unnest(ARRAY[${arrayPlaceholders}]) k left join "${this.tableName}" on k=key and namespace = $1;
`
const res = await this.queryRunner.manager.query(query, [this.namespace, ...keys.flat()])
return res.map((row: { ex: boolean }) => row.ex)
}
async listKeys(options?: ListKeyOptions): Promise<string[]> {
const { before, after, limit, groupIds } = options ?? {}
let query = `SELECT key FROM "${this.tableName}" WHERE namespace = $1`
const values: (string | number | (string | null)[])[] = [this.namespace]
let index = 2
if (before) {
values.push(before)
query += ` AND updated_at < $${index}`
index += 1
}
if (after) {
values.push(after)
query += ` AND updated_at > $${index}`
index += 1
}
if (limit) {
values.push(limit)
query += ` LIMIT $${index}`
index += 1
}
if (groupIds) {
values.push(groupIds)
query += ` AND group_id = ANY($${index})`
index += 1
}
query += ';'
const res = await this.queryRunner.manager.query(query, values)
return res.map((row: { key: string }) => row.key)
}
async deleteKeys(keys: string[]): Promise<void> {
if (keys.length === 0) {
return
}
const query = `DELETE FROM "${this.tableName}" WHERE namespace = $1 AND key = ANY($2);`
await this.queryRunner.manager.query(query, [this.namespace, keys])
}
/**
* Terminates the connection pool.
* @returns {Promise<void>}
*/
async end(): Promise<void> {
if (this.datasource && this.datasource.isInitialized) await this.datasource.destroy()
}
}
module.exports = { nodeClass: PostgresRecordManager_RecordManager }
File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 6.8 KiB

@@ -0,0 +1,332 @@
import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
import { getBaseClasses } from '../../../src/utils'
import { ListKeyOptions, RecordManagerInterface, UpdateOptions } from '@langchain/community/indexes/base'
import { DataSource, QueryRunner } from 'typeorm'
import path from 'path'
class SQLiteRecordManager_RecordManager implements INode {
label: string
name: string
version: number
description: string
type: string
icon: string
category: string
badge: string
baseClasses: string[]
inputs: INodeParams[]
constructor() {
this.label = 'SQLite Record Manager'
this.name = 'SQLiteRecordManager'
this.version = 1.0
this.type = 'SQLite RecordManager'
this.icon = 'sqlite.png'
this.category = 'Record Manager'
this.description = 'Use SQLite to keep track of document writes into the vector databases'
this.baseClasses = [this.type, 'RecordManager', ...getBaseClasses(SQLiteRecordManager)]
this.badge = 'NEW'
this.inputs = [
{
label: 'Database File Path',
name: 'databaseFilePath',
type: 'string',
placeholder: 'C:\\Users\\User\\.flowise\\database.sqlite'
},
{
label: 'Additional Connection Configuration',
name: 'additionalConfig',
type: 'json',
additionalParams: true,
optional: true
},
{
label: 'Table Name',
name: 'tableName',
type: 'string',
placeholder: 'upsertion_records',
additionalParams: true,
optional: true
},
{
label: 'Namespace',
name: 'namespace',
type: 'string',
description: 'If not specified, chatflowid will be used',
additionalParams: true,
optional: true
},
{
label: 'Cleanup',
name: 'cleanup',
type: 'options',
description:
'Read more on the difference between different cleanup methods <a target="_blank" href="https://js.langchain.com/docs/modules/data_connection/indexing/#deletion-modes">here</a>',
options: [
{
label: 'None',
name: 'none',
description: 'No clean up of old content'
},
{
label: 'Incremental',
name: 'incremental',
description:
'Delete previous versions of the content if content of the source document has changed. Important!! SourceId Key must be specified and document metadata must contains the specified key'
},
{
label: 'Full',
name: 'full',
description:
'Same as incremental, but if the source document has been deleted, it will be deleted from vector store as well, incremental mode will not.'
}
],
additionalParams: true,
default: 'none'
},
{
label: 'SourceId Key',
name: 'sourceIdKey',
type: 'string',
description:
'Key used to get the true source of document, to be compared against the record. Document metadata must contains SourceId Key',
default: 'source',
placeholder: 'source',
additionalParams: true,
optional: true
}
]
}
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
const _tableName = nodeData.inputs?.tableName as string
const tableName = _tableName ? _tableName : 'upsertion_records'
const additionalConfig = nodeData.inputs?.additionalConfig as string
const _namespace = nodeData.inputs?.namespace as string
const namespace = _namespace ? _namespace : options.chatflowid
const cleanup = nodeData.inputs?.cleanup as string
const _sourceIdKey = nodeData.inputs?.sourceIdKey as string
const sourceIdKey = _sourceIdKey ? _sourceIdKey : 'source'
const databaseFilePath = nodeData.inputs?.databaseFilePath as string
let additionalConfiguration = {}
if (additionalConfig) {
try {
additionalConfiguration = typeof additionalConfig === 'object' ? additionalConfig : JSON.parse(additionalConfig)
} catch (exception) {
throw new Error('Invalid JSON in the Additional Configuration: ' + exception)
}
}
const sqliteOptions = {
...additionalConfiguration,
type: 'sqlite',
database: path.resolve(databaseFilePath)
}
const args = {
sqliteOptions,
tableName: tableName
}
const recordManager = new SQLiteRecordManager(namespace, args)
;(recordManager as any).cleanup = cleanup
;(recordManager as any).sourceIdKey = sourceIdKey
return recordManager
}
}
type SQLiteRecordManagerOptions = {
sqliteOptions: any
tableName?: string
}
class SQLiteRecordManager implements RecordManagerInterface {
lc_namespace = ['langchain', 'recordmanagers', 'sqlite']
datasource: DataSource
queryRunner: QueryRunner
tableName: string
namespace: string
constructor(namespace: string, config: SQLiteRecordManagerOptions) {
const { sqliteOptions, tableName } = config
this.namespace = namespace
this.tableName = tableName || 'upsertion_records'
this.datasource = new DataSource(sqliteOptions)
}
async createSchema(): Promise<void> {
try {
const appDataSource = await this.datasource.initialize()
this.queryRunner = appDataSource.createQueryRunner()
await this.queryRunner.manager.query(`
CREATE TABLE IF NOT EXISTS "${this.tableName}" (
uuid TEXT PRIMARY KEY DEFAULT (lower(hex(randomblob(16)))),
key TEXT NOT NULL,
namespace TEXT NOT NULL,
updated_at REAL NOT NULL,
group_id TEXT,
UNIQUE (key, namespace)
);
CREATE INDEX IF NOT EXISTS updated_at_index ON "${this.tableName}" (updated_at);
CREATE INDEX IF NOT EXISTS key_index ON "${this.tableName}" (key);
CREATE INDEX IF NOT EXISTS namespace_index ON "${this.tableName}" (namespace);
CREATE INDEX IF NOT EXISTS group_id_index ON "${this.tableName}" (group_id);`)
} catch (e: any) {
// This error indicates that the table already exists
// Due to asynchronous nature of the code, it is possible that
// the table is created between the time we check if it exists
// and the time we try to create it. It can be safely ignored.
if ('code' in e && e.code === '23505') {
return
}
throw e
}
}
async getTime(): Promise<number> {
try {
const res = await this.queryRunner.manager.query(`SELECT strftime('%s', 'now') AS epoch`)
return Number.parseFloat(res[0].epoch)
} catch (error) {
console.error('Error getting time in SQLiteRecordManager:')
throw error
}
}
async update(keys: string[], updateOptions?: UpdateOptions): Promise<void> {
if (keys.length === 0) {
return
}
const updatedAt = await this.getTime()
const { timeAtLeast, groupIds: _groupIds } = updateOptions ?? {}
if (timeAtLeast && updatedAt < timeAtLeast) {
throw new Error(`Time sync issue with database ${updatedAt} < ${timeAtLeast}`)
}
const groupIds = _groupIds ?? keys.map(() => null)
if (groupIds.length !== keys.length) {
throw new Error(`Number of keys (${keys.length}) does not match number of group_ids (${groupIds.length})`)
}
const recordsToUpsert = keys.map((key, i) => [
key,
this.namespace,
updatedAt,
groupIds[i] ?? null // Ensure groupIds[i] is null if undefined
])
const query = `
INSERT INTO "${this.tableName}" (key, namespace, updated_at, group_id)
VALUES (?, ?, ?, ?)
ON CONFLICT (key, namespace) DO UPDATE SET updated_at = excluded.updated_at`
// To handle multiple files upsert
for (const record of recordsToUpsert) {
// Consider using a transaction for batch operations
await this.queryRunner.manager.query(query, record.flat())
}
}
async exists(keys: string[]): Promise<boolean[]> {
if (keys.length === 0) {
return []
}
// Prepare the placeholders and the query
const placeholders = keys.map(() => `?`).join(', ')
const sql = `
SELECT key
FROM "${this.tableName}"
WHERE namespace = ? AND key IN (${placeholders})`
// Initialize an array to fill with the existence checks
const existsArray = new Array(keys.length).fill(false)
try {
// Execute the query
const rows = await this.queryRunner.manager.query(sql, [this.namespace, ...keys.flat()])
// Create a set of existing keys for faster lookup
const existingKeysSet = new Set(rows.map((row: { key: string }) => row.key))
// Map the input keys to booleans indicating if they exist
keys.forEach((key, index) => {
existsArray[index] = existingKeysSet.has(key)
})
return existsArray
} catch (error) {
console.error('Error checking existence of keys')
throw error // Allow the caller to handle the error
}
}
async listKeys(options?: ListKeyOptions): Promise<string[]> {
const { before, after, limit, groupIds } = options ?? {}
let query = `SELECT key FROM "${this.tableName}" WHERE namespace = ?`
const values: (string | number | string[])[] = [this.namespace]
if (before) {
query += ` AND updated_at < ?`
values.push(before)
}
if (after) {
query += ` AND updated_at > ?`
values.push(after)
}
if (limit) {
query += ` LIMIT ?`
values.push(limit)
}
if (groupIds && Array.isArray(groupIds)) {
query += ` AND group_id IN (${groupIds
.filter((gid) => gid !== null)
.map(() => '?')
.join(', ')})`
values.push(...groupIds.filter((gid): gid is string => gid !== null))
}
query += ';'
// Directly using try/catch with async/await for cleaner flow
try {
const result = await this.queryRunner.manager.query(query, values)
return result.map((row: { key: string }) => row.key)
} catch (error) {
console.error('Error listing keys.')
throw error // Re-throw the error to be handled by the caller
}
}
async deleteKeys(keys: string[]): Promise<void> {
if (keys.length === 0) {
return
}
const placeholders = keys.map(() => '?').join(', ')
const query = `DELETE FROM "${this.tableName}" WHERE namespace = ? AND key IN (${placeholders});`
const values = [this.namespace, ...keys].map((v) => (typeof v !== 'string' ? `${v}` : v))
// Directly using try/catch with async/await for cleaner flow
try {
await this.queryRunner.manager.query(query, values)
} catch (error) {
console.error('Error deleting keys')
throw error // Re-throw the error to be handled by the caller
}
}
}
module.exports = { nodeClass: SQLiteRecordManager_RecordManager }
Binary file not shown.

After

Width:  |  Height:  |  Size: 49 KiB

@@ -2,7 +2,7 @@ import { flatten } from 'lodash'
import { Embeddings } from '@langchain/core/embeddings'
import { Document } from '@langchain/core/documents'
import { AstraDBVectorStore, AstraLibArgs } from '@langchain/community/vectorstores/astradb'
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams, IndexingResult } from '../../../src/Interface'
import { getBaseClasses, getCredentialData } from '../../../src/utils'
import { addMMRInputParams, resolveVectorStoreOrRetriever } from '../VectorStoreUtils'
@@ -101,7 +101,7 @@ class Astra_VectorStores implements INode {
//@ts-ignore
vectorStoreMethods = {
async upsert(nodeData: INodeData, options: ICommonObject): Promise<void> {
async upsert(nodeData: INodeData, options: ICommonObject): Promise<Partial<IndexingResult>> {
const docs = nodeData.inputs?.document as Document[]
const embeddings = nodeData.inputs?.embeddings as Embeddings
const vectorDimension = nodeData.inputs?.vectorDimension as number
@@ -142,6 +142,7 @@ class Astra_VectorStores implements INode {
try {
await AstraDBVectorStore.fromDocuments(finalDocs, embeddings, astraConfig)
return { numAdded: finalDocs.length, addedDocs: finalDocs }
} catch (e) {
throw new Error(e)
}
@@ -2,9 +2,10 @@ import { flatten } from 'lodash'
import { Chroma } from '@langchain/community/vectorstores/chroma'
import { Embeddings } from '@langchain/core/embeddings'
import { Document } from '@langchain/core/documents'
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams, IndexingResult } from '../../../src/Interface'
import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../src/utils'
import { ChromaExtended } from './core'
import { index } from '../../../src/indexing'
class Chroma_VectorStores implements INode {
label: string
@@ -23,7 +24,7 @@ class Chroma_VectorStores implements INode {
constructor() {
this.label = 'Chroma'
this.name = 'chroma'
this.version = 1.0
this.version = 2.0
this.type = 'Chroma'
this.icon = 'chroma.svg'
this.category = 'Vector Stores'
@@ -51,6 +52,13 @@ class Chroma_VectorStores implements INode {
name: 'embeddings',
type: 'Embeddings'
},
{
label: 'Record Manager',
name: 'recordManager',
type: 'RecordManager',
description: 'Keep track of the record to prevent duplication',
optional: true
},
{
label: 'Collection Name',
name: 'collectionName',
@@ -95,11 +103,12 @@ class Chroma_VectorStores implements INode {
//@ts-ignore
vectorStoreMethods = {
async upsert(nodeData: INodeData, options: ICommonObject): Promise<void> {
async upsert(nodeData: INodeData, options: ICommonObject): Promise<Partial<IndexingResult>> {
const collectionName = nodeData.inputs?.collectionName as string
const docs = nodeData.inputs?.document as Document[]
const embeddings = nodeData.inputs?.embeddings as Embeddings
const chromaURL = nodeData.inputs?.chromaURL as string
const recordManager = nodeData.inputs?.recordManager
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
const chromaApiKey = getCredentialParam('chromaApiKey', credentialData, nodeData)
@@ -121,7 +130,24 @@ class Chroma_VectorStores implements INode {
if (chromaApiKey) obj.chromaApiKey = chromaApiKey
try {
await ChromaExtended.fromDocuments(finalDocs, embeddings, obj)
if (recordManager) {
const vectorStore = await ChromaExtended.fromExistingCollection(embeddings, obj)
await recordManager.createSchema()
const res = await index({
docsSource: finalDocs,
recordManager,
vectorStore,
options: {
cleanup: recordManager?.cleanup,
sourceIdKey: recordManager?.sourceIdKey ?? 'source',
vectorStoreName: collectionName
}
})
return res
} else {
await ChromaExtended.fromDocuments(finalDocs, embeddings, obj)
return { numAdded: finalDocs.length, addedDocs: finalDocs }
}
} catch (e) {
throw new Error(e)
}
@@ -3,8 +3,9 @@ import { Client, ClientOptions } from '@elastic/elasticsearch'
import { Document } from '@langchain/core/documents'
import { Embeddings } from '@langchain/core/embeddings'
import { ElasticClientArgs, ElasticVectorSearch } from '@langchain/community/vectorstores/elasticsearch'
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams, IndexingResult } from '../../../src/Interface'
import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../src/utils'
import { index } from '../../../src/indexing'
class Elasticsearch_VectorStores implements INode {
label: string
@@ -23,7 +24,7 @@ class Elasticsearch_VectorStores implements INode {
constructor() {
this.label = 'Elasticsearch'
this.name = 'elasticsearch'
this.version = 1.0
this.version = 2.0
this.description =
'Upsert embedded data and perform similarity search upon query using Elasticsearch, a distributed search and analytics engine'
this.type = 'Elasticsearch'
@@ -50,6 +51,13 @@ class Elasticsearch_VectorStores implements INode {
name: 'embeddings',
type: 'Embeddings'
},
{
label: 'Record Manager',
name: 'recordManager',
type: 'RecordManager',
description: 'Keep track of the record to prevent duplication',
optional: true
},
{
label: 'Index Name',
name: 'indexName',
@@ -105,13 +113,14 @@ class Elasticsearch_VectorStores implements INode {
//@ts-ignore
vectorStoreMethods = {
async upsert(nodeData: INodeData, options: ICommonObject): Promise<void> {
async upsert(nodeData: INodeData, options: ICommonObject): Promise<Partial<IndexingResult>> {
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
const endPoint = getCredentialParam('endpoint', credentialData, nodeData)
const cloudId = getCredentialParam('cloudId', credentialData, nodeData)
const indexName = nodeData.inputs?.indexName as string
const embeddings = nodeData.inputs?.embeddings as Embeddings
const similarityMeasure = nodeData.inputs?.similarityMeasure as string
const recordManager = nodeData.inputs?.recordManager
const docs = nodeData.inputs?.document as Document[]
const flattenDocs = docs && docs.length ? flatten(docs) : []
@@ -134,7 +143,24 @@ class Elasticsearch_VectorStores implements INode {
const vectorStore = new ElasticVectorSearch(embeddings, elasticSearchClientArgs)
try {
await vectorStore.addDocuments(finalDocs)
if (recordManager) {
const vectorStore = await ElasticVectorSearch.fromExistingIndex(embeddings, elasticSearchClientArgs)
await recordManager.createSchema()
const res = await index({
docsSource: finalDocs,
recordManager,
vectorStore,
options: {
cleanup: recordManager?.cleanup,
sourceIdKey: recordManager?.sourceIdKey ?? 'source',
vectorStoreName: indexName
}
})
return res
} else {
await vectorStore.addDocuments(finalDocs)
return { numAdded: finalDocs.length, addedDocs: finalDocs }
}
} catch (e) {
throw new Error(e)
}
@@ -2,7 +2,7 @@ import { flatten } from 'lodash'
import { Document } from '@langchain/core/documents'
import { FaissStore } from '@langchain/community/vectorstores/faiss'
import { Embeddings } from '@langchain/core/embeddings'
import { INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
import { INode, INodeData, INodeOutputsValue, INodeParams, IndexingResult } from '../../../src/Interface'
import { getBaseClasses } from '../../../src/utils'
class Faiss_VectorStores implements INode {
@@ -74,7 +74,7 @@ class Faiss_VectorStores implements INode {
//@ts-ignore
vectorStoreMethods = {
async upsert(nodeData: INodeData): Promise<void> {
async upsert(nodeData: INodeData): Promise<Partial<IndexingResult>> {
const docs = nodeData.inputs?.document as Document[]
const embeddings = nodeData.inputs?.embeddings as Embeddings
const basePath = nodeData.inputs?.basePath as string
@@ -95,6 +95,8 @@ class Faiss_VectorStores implements INode {
vectorStore.similaritySearchVectorWithScore = async (query: number[], k: number) => {
return await similaritySearchVectorWithScore(query, k, vectorStore)
}
return { numAdded: finalDocs.length, addedDocs: finalDocs }
} catch (e) {
throw new Error(e)
}
@@ -2,7 +2,7 @@ import { flatten } from 'lodash'
import { MemoryVectorStore } from 'langchain/vectorstores/memory'
import { Embeddings } from '@langchain/core/embeddings'
import { Document } from '@langchain/core/documents'
import { INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
import { INode, INodeData, INodeOutputsValue, INodeParams, IndexingResult } from '../../../src/Interface'
import { getBaseClasses } from '../../../src/utils'
class InMemoryVectorStore_VectorStores implements INode {
@@ -64,7 +64,7 @@ class InMemoryVectorStore_VectorStores implements INode {
//@ts-ignore
vectorStoreMethods = {
async upsert(nodeData: INodeData): Promise<void> {
async upsert(nodeData: INodeData): Promise<Partial<IndexingResult>> {
const docs = nodeData.inputs?.document as Document[]
const embeddings = nodeData.inputs?.embeddings as Embeddings
@@ -78,6 +78,7 @@ class InMemoryVectorStore_VectorStores implements INode {
try {
await MemoryVectorStore.fromDocuments(finalDocs, embeddings)
return { numAdded: finalDocs.length, addedDocs: finalDocs }
} catch (e) {
throw new Error(e)
}
@@ -3,7 +3,7 @@ import { DataType, ErrorCode, MetricType, IndexType } from '@zilliz/milvus2-sdk-
import { Document } from '@langchain/core/documents'
import { MilvusLibArgs, Milvus } from '@langchain/community/vectorstores/milvus'
import { Embeddings } from '@langchain/core/embeddings'
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams, IndexingResult } from '../../../src/Interface'
import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../src/utils'
interface InsertRow {
@@ -109,7 +109,7 @@ class Milvus_VectorStores implements INode {
//@ts-ignore
vectorStoreMethods = {
async upsert(nodeData: INodeData, options: ICommonObject): Promise<void> {
async upsert(nodeData: INodeData, options: ICommonObject): Promise<Partial<IndexingResult>> {
// server setup
const address = nodeData.inputs?.milvusServerUrl as string
const collectionName = nodeData.inputs?.milvusCollection as string
@@ -147,6 +147,8 @@ class Milvus_VectorStores implements INode {
vectorStore.similaritySearchVectorWithScore = async (query: number[], k: number, filter?: string) => {
return await similaritySearchVectorWithScore(query, k, vectorStore, undefined, filter)
}
return { numAdded: finalDocs.length, addedDocs: finalDocs }
} catch (e) {
throw new Error(e)
}
@@ -3,7 +3,7 @@ import { MongoClient } from 'mongodb'
import { MongoDBAtlasVectorSearch } from '@langchain/mongodb'
import { Embeddings } from '@langchain/core/embeddings'
import { Document } from '@langchain/core/documents'
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams, IndexingResult } from '../../../src/Interface'
import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../src/utils'
import { addMMRInputParams, resolveVectorStoreOrRetriever } from '../VectorStoreUtils'
@@ -113,7 +113,7 @@ class MongoDBAtlas_VectorStores implements INode {
//@ts-ignore
vectorStoreMethods = {
async upsert(nodeData: INodeData, options: ICommonObject): Promise<void> {
async upsert(nodeData: INodeData, options: ICommonObject): Promise<Partial<IndexingResult>> {
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
const databaseName = nodeData.inputs?.databaseName as string
const collectionName = nodeData.inputs?.collectionName as string
@@ -149,6 +149,7 @@ class MongoDBAtlas_VectorStores implements INode {
embeddingKey
})
await mongoDBAtlasVectorSearch.addDocuments(finalDocs)
return { numAdded: finalDocs.length, addedDocs: finalDocs }
} catch (e) {
throw new Error(e)
}
@@ -3,7 +3,7 @@ import { Client } from '@opensearch-project/opensearch'
import { Document } from '@langchain/core/documents'
import { OpenSearchVectorStore } from '@langchain/community/vectorstores/opensearch'
import { Embeddings } from '@langchain/core/embeddings'
import { INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
import { INode, INodeData, INodeOutputsValue, INodeParams, IndexingResult } from '../../../src/Interface'
import { getBaseClasses } from '../../../src/utils'
class OpenSearch_VectorStores implements INode {
@@ -79,7 +79,7 @@ class OpenSearch_VectorStores implements INode {
//@ts-ignore
vectorStoreMethods = {
async upsert(nodeData: INodeData): Promise<void> {
async upsert(nodeData: INodeData): Promise<Partial<IndexingResult>> {
const docs = nodeData.inputs?.document as Document[]
const embeddings = nodeData.inputs?.embeddings as Embeddings
const opensearchURL = nodeData.inputs?.opensearchURL as string
@@ -102,6 +102,7 @@ class OpenSearch_VectorStores implements INode {
client,
indexName: indexName
})
return { numAdded: finalDocs.length, addedDocs: finalDocs }
} catch (e) {
throw new Error(e)
}
@@ -3,9 +3,10 @@ import { Pinecone } from '@pinecone-database/pinecone'
import { PineconeStoreParams, PineconeStore } from '@langchain/pinecone'
import { Embeddings } from '@langchain/core/embeddings'
import { Document } from '@langchain/core/documents'
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams, IndexingResult } from '../../../src/Interface'
import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../src/utils'
import { addMMRInputParams, resolveVectorStoreOrRetriever } from '../VectorStoreUtils'
import { index } from '../../../src/indexing'
class Pinecone_VectorStores implements INode {
label: string
@@ -24,7 +25,7 @@ class Pinecone_VectorStores implements INode {
constructor() {
this.label = 'Pinecone'
this.name = 'pinecone'
this.version = 2.0
this.version = 3.0
this.type = 'Pinecone'
this.icon = 'pinecone.svg'
this.category = 'Vector Stores'
@@ -50,6 +51,13 @@ class Pinecone_VectorStores implements INode {
name: 'embeddings',
type: 'Embeddings'
},
{
label: 'Record Manager',
name: 'recordManager',
type: 'RecordManager',
description: 'Keep track of the record to prevent duplication',
optional: true
},
{
label: 'Pinecone Index',
name: 'pineconeIndex',
@@ -97,11 +105,12 @@ class Pinecone_VectorStores implements INode {
//@ts-ignore
vectorStoreMethods = {
async upsert(nodeData: INodeData, options: ICommonObject): Promise<void> {
const index = nodeData.inputs?.pineconeIndex as string
async upsert(nodeData: INodeData, options: ICommonObject): Promise<Partial<IndexingResult>> {
const _index = nodeData.inputs?.pineconeIndex as string
const pineconeNamespace = nodeData.inputs?.pineconeNamespace as string
const docs = nodeData.inputs?.document as Document[]
const embeddings = nodeData.inputs?.embeddings as Embeddings
const recordManager = nodeData.inputs?.recordManager
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
const pineconeApiKey = getCredentialParam('pineconeApiKey', credentialData, nodeData)
@@ -110,7 +119,7 @@ class Pinecone_VectorStores implements INode {
apiKey: pineconeApiKey
})
const pineconeIndex = client.Index(index)
const pineconeIndex = client.Index(_index)
const flattenDocs = docs && docs.length ? flatten(docs) : []
const finalDocs = []
@@ -127,7 +136,25 @@ class Pinecone_VectorStores implements INode {
if (pineconeNamespace) obj.namespace = pineconeNamespace
try {
await PineconeStore.fromDocuments(finalDocs, embeddings, obj)
if (recordManager) {
const vectorStore = await PineconeStore.fromExistingIndex(embeddings, obj)
await recordManager.createSchema()
const res = await index({
docsSource: finalDocs,
recordManager,
vectorStore,
options: {
cleanup: recordManager?.cleanup,
sourceIdKey: recordManager?.sourceIdKey ?? 'source',
vectorStoreName: pineconeNamespace
}
})
return res
} else {
await PineconeStore.fromDocuments(finalDocs, embeddings, obj)
return { numAdded: finalDocs.length, addedDocs: finalDocs }
}
} catch (e) {
throw new Error(e)
}
@@ -13,7 +13,7 @@ import {
import { FetchResponse, Index, Pinecone, ScoredPineconeRecord } from '@pinecone-database/pinecone'
import { flatten } from 'lodash'
import { Document as LCDocument } from 'langchain/document'
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams, IndexingResult } from '../../../src/Interface'
import { flattenObject, getCredentialData, getCredentialParam } from '../../../src/utils'
class PineconeLlamaIndex_VectorStores implements INode {
@@ -110,7 +110,7 @@ class PineconeLlamaIndex_VectorStores implements INode {
//@ts-ignore
vectorStoreMethods = {
async upsert(nodeData: INodeData, options: ICommonObject): Promise<void> {
async upsert(nodeData: INodeData, options: ICommonObject): Promise<Partial<IndexingResult>> {
const indexName = nodeData.inputs?.pineconeIndex as string
const pineconeNamespace = nodeData.inputs?.pineconeNamespace as string
const docs = nodeData.inputs?.document as LCDocument[]
@@ -144,6 +144,7 @@ class PineconeLlamaIndex_VectorStores implements INode {
try {
await VectorStoreIndex.fromDocuments(llamadocs, { serviceContext, storageContext })
return { numAdded: finalDocs.length, addedDocs: finalDocs }
} catch (e) {
throw new Error(e)
}
@@ -4,8 +4,9 @@ import { DataSourceOptions } from 'typeorm'
import { Embeddings } from '@langchain/core/embeddings'
import { Document } from '@langchain/core/documents'
import { TypeORMVectorStore, TypeORMVectorStoreDocument } from '@langchain/community/vectorstores/typeorm'
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams, IndexingResult } from '../../../src/Interface'
import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../src/utils'
import { index } from '../../../src/indexing'
class Postgres_VectorStores implements INode {
label: string
@@ -24,7 +25,7 @@ class Postgres_VectorStores implements INode {
constructor() {
this.label = 'Postgres'
this.name = 'postgres'
this.version = 3.0
this.version = 4.0
this.type = 'Postgres'
this.icon = 'postgres.svg'
this.category = 'Vector Stores'
@@ -50,6 +51,13 @@ class Postgres_VectorStores implements INode {
name: 'embeddings',
type: 'Embeddings'
},
{
label: 'Record Manager',
name: 'recordManager',
type: 'RecordManager',
description: 'Keep track of the record to prevent duplication',
optional: true
},
{
label: 'Host',
name: 'host',
@@ -108,7 +116,7 @@ class Postgres_VectorStores implements INode {
//@ts-ignore
vectorStoreMethods = {
async upsert(nodeData: INodeData, options: ICommonObject): Promise<void> {
async upsert(nodeData: INodeData, options: ICommonObject): Promise<Partial<IndexingResult>> {
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
const user = getCredentialParam('user', credentialData, nodeData)
const password = getCredentialParam('password', credentialData, nodeData)
@@ -117,6 +125,7 @@ class Postgres_VectorStores implements INode {
const docs = nodeData.inputs?.document as Document[]
const embeddings = nodeData.inputs?.embeddings as Embeddings
const additionalConfig = nodeData.inputs?.additionalConfig as string
const recordManager = nodeData.inputs?.recordManager
let additionalConfiguration = {}
if (additionalConfig) {
@@ -151,11 +160,37 @@ class Postgres_VectorStores implements INode {
}
try {
const vectorStore = await TypeORMVectorStore.fromDocuments(finalDocs, embeddings, args)
if (recordManager) {
const vectorStore = await TypeORMVectorStore.fromDataSource(embeddings, args)
// Avoid Illegal invocation error
vectorStore.similaritySearchVectorWithScore = async (query: number[], k: number, filter?: any) => {
return await similaritySearchVectorWithScore(query, k, tableName, postgresConnectionOptions, filter)
// Avoid Illegal invocation error
vectorStore.similaritySearchVectorWithScore = async (query: number[], k: number, filter?: any) => {
return await similaritySearchVectorWithScore(query, k, tableName, postgresConnectionOptions, filter)
}
await recordManager.createSchema()
const res = await index({
docsSource: finalDocs,
recordManager,
vectorStore,
options: {
cleanup: recordManager?.cleanup,
sourceIdKey: recordManager?.sourceIdKey ?? 'source',
vectorStoreName: tableName
}
})
return res
} else {
const vectorStore = await TypeORMVectorStore.fromDocuments(finalDocs, embeddings, args)
// Avoid Illegal invocation error
vectorStore.similaritySearchVectorWithScore = async (query: number[], k: number, filter?: any) => {
return await similaritySearchVectorWithScore(query, k, tableName, postgresConnectionOptions, filter)
}
return { numAdded: finalDocs.length, addedDocs: finalDocs }
}
} catch (e) {
throw new Error(e)
@@ -1,13 +1,19 @@
import { flatten } from 'lodash'
import { v4 as uuid } from 'uuid'
import { QdrantClient } from '@qdrant/js-client-rest'
import { VectorStoreRetrieverInput } from '@langchain/core/vectorstores'
import { Document } from '@langchain/core/documents'
import { QdrantVectorStore, QdrantLibArgs } from '@langchain/community/vectorstores/qdrant'
import { Embeddings } from '@langchain/core/embeddings'
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams, IndexingResult } from '../../../src/Interface'
import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../src/utils'
import { index } from '../../../src/indexing'
type RetrieverConfig = Partial<VectorStoreRetrieverInput<QdrantVectorStore>>
type QdrantAddDocumentOptions = {
customPayload?: Record<string, any>[]
ids?: string[]
}
class Qdrant_VectorStores implements INode {
label: string
@@ -26,7 +32,7 @@ class Qdrant_VectorStores implements INode {
constructor() {
this.label = 'Qdrant'
this.name = 'qdrant'
this.version = 1.0
this.version = 2.0
this.type = 'Qdrant'
this.icon = 'qdrant.png'
this.category = 'Vector Stores'
@@ -55,6 +61,13 @@ class Qdrant_VectorStores implements INode {
name: 'embeddings',
type: 'Embeddings'
},
{
label: 'Record Manager',
name: 'recordManager',
type: 'RecordManager',
description: 'Keep track of the record to prevent duplication',
optional: true
},
{
label: 'Qdrant Server URL',
name: 'qdrantServerUrl',
@@ -138,13 +151,14 @@ class Qdrant_VectorStores implements INode {
//@ts-ignore
vectorStoreMethods = {
async upsert(nodeData: INodeData, options: ICommonObject): Promise<void> {
async upsert(nodeData: INodeData, options: ICommonObject): Promise<Partial<IndexingResult>> {
const qdrantServerUrl = nodeData.inputs?.qdrantServerUrl as string
const collectionName = nodeData.inputs?.qdrantCollection as string
const docs = nodeData.inputs?.document as Document[]
const embeddings = nodeData.inputs?.embeddings as Embeddings
const qdrantSimilarity = nodeData.inputs?.qdrantSimilarity
const qdrantVectorDimension = nodeData.inputs?.qdrantVectorDimension
const recordManager = nodeData.inputs?.recordManager
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
const qdrantApiKey = getCredentialParam('qdrantApiKey', credentialData, nodeData)
@@ -178,7 +192,74 @@ class Qdrant_VectorStores implements INode {
}
try {
await QdrantVectorStore.fromDocuments(finalDocs, embeddings, dbConfig)
if (recordManager) {
const vectorStore = new QdrantVectorStore(embeddings, dbConfig)
await vectorStore.ensureCollection()
vectorStore.addVectors = async (
vectors: number[][],
documents: Document[],
documentOptions?: QdrantAddDocumentOptions
): Promise<void> => {
if (vectors.length === 0) {
return
}
await vectorStore.ensureCollection()
const points = vectors.map((embedding, idx) => ({
id: documentOptions?.ids?.length ? documentOptions?.ids[idx] : uuid(),
vector: embedding,
payload: {
content: documents[idx].pageContent,
metadata: documents[idx].metadata,
customPayload: documentOptions?.customPayload?.length ? documentOptions?.customPayload[idx] : undefined
}
}))
try {
await client.upsert(collectionName, {
wait: true,
points
})
} catch (e: any) {
const error = new Error(`${e?.status ?? 'Undefined error code'} ${e?.message}: ${e?.data?.status?.error}`)
throw error
}
}
vectorStore.delete = async (params: { ids: string[] }): Promise<void> => {
const { ids } = params
if (ids?.length) {
try {
client.delete(collectionName, {
points: ids
})
} catch (e) {
console.error('Failed to delete')
}
}
}
await recordManager.createSchema()
const res = await index({
docsSource: finalDocs,
recordManager,
vectorStore,
options: {
cleanup: recordManager?.cleanup,
sourceIdKey: recordManager?.sourceIdKey ?? 'source',
vectorStoreName: collectionName
}
})
return res
} else {
await QdrantVectorStore.fromDocuments(finalDocs, embeddings, dbConfig)
return { numAdded: finalDocs.length, addedDocs: finalDocs }
}
} catch (e) {
throw new Error(e)
}
@@ -3,7 +3,7 @@ import { createClient, SearchOptions, RedisClientOptions } from 'redis'
import { Embeddings } from '@langchain/core/embeddings'
import { RedisVectorStore, RedisVectorStoreConfig } from '@langchain/community/vectorstores/redis'
import { Document } from '@langchain/core/documents'
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams, IndexingResult } from '../../../src/Interface'
import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../src/utils'
import { escapeAllStrings, escapeSpecialChars, unEscapeSpecialChars } from './utils'
@@ -138,7 +138,7 @@ class Redis_VectorStores implements INode {
//@ts-ignore
vectorStoreMethods = {
async upsert(nodeData: INodeData, options: ICommonObject): Promise<void> {
async upsert(nodeData: INodeData, options: ICommonObject): Promise<Partial<IndexingResult>> {
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
const indexName = nodeData.inputs?.indexName as string
let contentKey = nodeData.inputs?.contentKey as string
@@ -203,6 +203,8 @@ class Redis_VectorStores implements INode {
filter
)
}
return { numAdded: finalDocs.length, addedDocs: finalDocs }
} catch (e) {
throw new Error(e)
}
@@ -2,7 +2,7 @@ import path from 'path'
import { flatten } from 'lodash'
import { storageContextFromDefaults, serviceContextFromDefaults, VectorStoreIndex, Document } from 'llamaindex'
import { Document as LCDocument } from 'langchain/document'
import { INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
import { INode, INodeData, INodeOutputsValue, INodeParams, IndexingResult } from '../../../src/Interface'
import { getUserHome } from '../../../src'
class SimpleStoreUpsert_LlamaIndex_VectorStores implements INode {
@@ -79,7 +79,7 @@ class SimpleStoreUpsert_LlamaIndex_VectorStores implements INode {
//@ts-ignore
vectorStoreMethods = {
async upsert(nodeData: INodeData): Promise<void> {
async upsert(nodeData: INodeData): Promise<Partial<IndexingResult>> {
const basePath = nodeData.inputs?.basePath as string
const docs = nodeData.inputs?.document as LCDocument[]
const embeddings = nodeData.inputs?.embeddings
@@ -105,6 +105,7 @@ class SimpleStoreUpsert_LlamaIndex_VectorStores implements INode {
try {
await VectorStoreIndex.fromDocuments(llamadocs, { serviceContext, storageContext })
return { numAdded: finalDocs.length, addedDocs: finalDocs }
} catch (e) {
throw new Error(e)
}
@@ -2,7 +2,7 @@ import { flatten } from 'lodash'
import { Embeddings } from '@langchain/core/embeddings'
import { SingleStoreVectorStore, SingleStoreVectorStoreConfig } from '@langchain/community/vectorstores/singlestore'
import { Document } from '@langchain/core/documents'
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams, IndexingResult } from '../../../src/Interface'
import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../src/utils'
class SingleStore_VectorStores implements INode {
@@ -118,7 +118,7 @@ class SingleStore_VectorStores implements INode {
//@ts-ignore
vectorStoreMethods = {
async upsert(nodeData: INodeData, options: ICommonObject): Promise<void> {
async upsert(nodeData: INodeData, options: ICommonObject): Promise<Partial<IndexingResult>> {
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
const user = getCredentialParam('user', credentialData, nodeData)
const password = getCredentialParam('password', credentialData, nodeData)
@@ -151,6 +151,7 @@ class SingleStore_VectorStores implements INode {
try {
const vectorStore = new SingleStoreVectorStore(embeddings, singleStoreConnectionConfig)
vectorStore.addDocuments.bind(vectorStore)(finalDocs)
return { numAdded: finalDocs.length, addedDocs: finalDocs }
} catch (e) {
throw new Error(e)
}
@@ -3,9 +3,10 @@ import { createClient } from '@supabase/supabase-js'
import { Document } from '@langchain/core/documents'
import { Embeddings } from '@langchain/core/embeddings'
import { SupabaseVectorStore, SupabaseLibArgs } from '@langchain/community/vectorstores/supabase'
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams, IndexingResult } from '../../../src/Interface'
import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../src/utils'
import { addMMRInputParams, resolveVectorStoreOrRetriever } from '../VectorStoreUtils'
import { index } from '../../../src/indexing'
class Supabase_VectorStores implements INode {
label: string
@@ -24,7 +25,7 @@ class Supabase_VectorStores implements INode {
constructor() {
this.label = 'Supabase'
this.name = 'supabase'
this.version = 2.0
this.version = 3.0
this.type = 'Supabase'
this.icon = 'supabase.svg'
this.category = 'Vector Stores'
@@ -50,6 +51,13 @@ class Supabase_VectorStores implements INode {
name: 'embeddings',
type: 'Embeddings'
},
{
label: 'Record Manager',
name: 'recordManager',
type: 'RecordManager',
description: 'Keep track of the record to prevent duplication',
optional: true
},
{
label: 'Supabase Project URL',
name: 'supabaseProjUrl',
@@ -99,12 +107,13 @@ class Supabase_VectorStores implements INode {
//@ts-ignore
vectorStoreMethods = {
async upsert(nodeData: INodeData, options: ICommonObject): Promise<void> {
async upsert(nodeData: INodeData, options: ICommonObject): Promise<Partial<IndexingResult>> {
const supabaseProjUrl = nodeData.inputs?.supabaseProjUrl as string
const tableName = nodeData.inputs?.tableName as string
const queryName = nodeData.inputs?.queryName as string
const docs = nodeData.inputs?.document as Document[]
const embeddings = nodeData.inputs?.embeddings as Embeddings
const recordManager = nodeData.inputs?.recordManager
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
const supabaseApiKey = getCredentialParam('supabaseApiKey', credentialData, nodeData)
@@ -120,11 +129,32 @@ class Supabase_VectorStores implements INode {
}
try {
await SupabaseVectorStore.fromDocuments(finalDocs, embeddings, {
client,
tableName: tableName,
queryName: queryName
})
if (recordManager) {
const vectorStore = await SupabaseVectorStore.fromExistingIndex(embeddings, {
client,
tableName: tableName,
queryName: queryName
})
await recordManager.createSchema()
const res = await index({
docsSource: finalDocs,
recordManager,
vectorStore,
options: {
cleanup: recordManager?.cleanup,
sourceIdKey: recordManager?.sourceIdKey ?? 'source',
vectorStoreName: tableName + '_' + queryName
}
})
return res
} else {
await SupabaseVectorStore.fromDocuments(finalDocs, embeddings, {
client,
tableName: tableName,
queryName: queryName
})
return { numAdded: finalDocs.length, addedDocs: finalDocs }
}
} catch (e) {
throw new Error(e)
}
@@ -9,7 +9,7 @@ import {
} from '@langchain/community/vectorstores/vectara'
import { Document } from '@langchain/core/documents'
import { Embeddings } from '@langchain/core/embeddings'
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams, IndexingResult } from '../../../src/Interface'
import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../src/utils'
class Vectara_VectorStores implements INode {
@@ -144,7 +144,7 @@ class Vectara_VectorStores implements INode {
//@ts-ignore
vectorStoreMethods = {
async upsert(nodeData: INodeData, options: ICommonObject): Promise<void> {
async upsert(nodeData: INodeData, options: ICommonObject): Promise<Partial<IndexingResult>> {
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
const apiKey = getCredentialParam('apiKey', credentialData, nodeData)
const customerId = getCredentialParam('customerID', credentialData, nodeData)
@@ -204,6 +204,7 @@ class Vectara_VectorStores implements INode {
const vectorStore = new VectaraStore(vectaraArgs)
await vectorStore.addFiles(vectaraFiles)
}
return { numAdded: finalDocs.length, addedDocs: finalDocs }
} catch (e) {
throw new Error(e)
}
@@ -1,11 +1,12 @@
import { flatten } from 'lodash'
import weaviate, { WeaviateClient, ApiKey } from 'weaviate-ts-client'
import { WeaviateLibArgs, WeaviateStore } from '@langchain/community/vectorstores/weaviate'
import { WeaviateLibArgs, WeaviateStore } from '@langchain/weaviate'
import { Document } from '@langchain/core/documents'
import { Embeddings } from '@langchain/core/embeddings'
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams, IndexingResult } from '../../../src/Interface'
import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../src/utils'
import { addMMRInputParams, resolveVectorStoreOrRetriever } from '../VectorStoreUtils'
import { index } from '../../../src/indexing'
class Weaviate_VectorStores implements INode {
label: string
@@ -24,7 +25,7 @@ class Weaviate_VectorStores implements INode {
constructor() {
this.label = 'Weaviate'
this.name = 'weaviate'
this.version = 2.0
this.version = 3.0
this.type = 'Weaviate'
this.icon = 'weaviate.png'
this.category = 'Vector Stores'
@@ -53,6 +54,13 @@ class Weaviate_VectorStores implements INode {
name: 'embeddings',
type: 'Embeddings'
},
{
label: 'Record Manager',
name: 'recordManager',
type: 'RecordManager',
description: 'Keep track of the record to prevent duplication',
optional: true
},
{
label: 'Weaviate Scheme',
name: 'weaviateScheme',
@@ -125,7 +133,7 @@ class Weaviate_VectorStores implements INode {
//@ts-ignore
vectorStoreMethods = {
async upsert(nodeData: INodeData, options: ICommonObject): Promise<void> {
async upsert(nodeData: INodeData, options: ICommonObject): Promise<Partial<IndexingResult>> {
const weaviateScheme = nodeData.inputs?.weaviateScheme as string
const weaviateHost = nodeData.inputs?.weaviateHost as string
const weaviateIndex = nodeData.inputs?.weaviateIndex as string
@@ -133,6 +141,7 @@ class Weaviate_VectorStores implements INode {
const weaviateMetadataKeys = nodeData.inputs?.weaviateMetadataKeys as string
const docs = nodeData.inputs?.document as Document[]
const embeddings = nodeData.inputs?.embeddings as Embeddings
const recordManager = nodeData.inputs?.recordManager
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
const weaviateApiKey = getCredentialParam('weaviateApiKey', credentialData, nodeData)
@@ -154,6 +163,7 @@ class Weaviate_VectorStores implements INode {
}
const obj: WeaviateLibArgs = {
//@ts-ignore
client,
indexName: weaviateIndex
}
@@ -162,7 +172,24 @@ class Weaviate_VectorStores implements INode {
if (weaviateMetadataKeys) obj.metadataKeys = JSON.parse(weaviateMetadataKeys.replace(/\s/g, ''))
try {
await WeaviateStore.fromDocuments(finalDocs, embeddings, obj)
if (recordManager) {
const vectorStore = await WeaviateStore.fromExistingIndex(embeddings, obj)
await recordManager.createSchema()
const res = await index({
docsSource: finalDocs,
recordManager,
vectorStore,
options: {
cleanup: recordManager?.cleanup,
sourceIdKey: recordManager?.sourceIdKey ?? 'source',
vectorStoreName: weaviateTextKey ? weaviateIndex + '_' + weaviateTextKey : weaviateIndex
}
})
return res
} else {
await WeaviateStore.fromDocuments(finalDocs, embeddings, obj)
return { numAdded: finalDocs.length, addedDocs: finalDocs }
}
} catch (e) {
throw new Error(e)
}
@@ -189,6 +216,7 @@ class Weaviate_VectorStores implements INode {
const client: WeaviateClient = weaviate.client(clientConfig)
const obj: WeaviateLibArgs = {
//@ts-ignore
client,
indexName: weaviateIndex
}
@@ -3,7 +3,7 @@ import { IDocument, ZepClient } from '@getzep/zep-js'
import { ZepVectorStore, IZepConfig } from '@langchain/community/vectorstores/zep'
import { Embeddings } from '@langchain/core/embeddings'
import { Document } from '@langchain/core/documents'
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams, IndexingResult } from '../../../src/Interface'
import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../src/utils'
import { addMMRInputParams, resolveVectorStoreOrRetriever } from '../VectorStoreUtils'
@@ -106,7 +106,7 @@ class Zep_VectorStores implements INode {
//@ts-ignore
vectorStoreMethods = {
async upsert(nodeData: INodeData, options: ICommonObject): Promise<void> {
async upsert(nodeData: INodeData, options: ICommonObject): Promise<Partial<IndexingResult>> {
const baseURL = nodeData.inputs?.baseURL as string
const zepCollection = nodeData.inputs?.zepCollection as string
const dimension = (nodeData.inputs?.dimension as number) ?? 1536
@@ -134,6 +134,7 @@ class Zep_VectorStores implements INode {
try {
await ZepVectorStore.fromDocuments(finalDocs, embeddings, zepConfig)
return { numAdded: finalDocs.length, addedDocs: finalDocs }
} catch (e) {
throw new Error(e)
}
@@ -3,7 +3,7 @@ import { IDocument, ZepClient } from '@getzep/zep-cloud'
import { IZepConfig, ZepVectorStore } from '@getzep/zep-cloud/langchain'
import { Embeddings } from 'langchain/embeddings/base'
import { Document } from 'langchain/document'
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams, IndexingResult } from '../../../src/Interface'
import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../src/utils'
import { addMMRInputParams, resolveVectorStoreOrRetriever } from '../VectorStoreUtils'
import { FakeEmbeddings } from 'langchain/embeddings/fake'
@@ -89,7 +89,7 @@ class Zep_CloudVectorStores implements INode {
//@ts-ignore
vectorStoreMethods = {
async upsert(nodeData: INodeData, options: ICommonObject): Promise<void> {
async upsert(nodeData: INodeData, options: ICommonObject): Promise<Partial<IndexingResult>> {
const zepCollection = nodeData.inputs?.zepCollection as string
const docs = nodeData.inputs?.document as Document[]
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
@@ -109,6 +109,7 @@ class Zep_CloudVectorStores implements INode {
}
try {
await ZepVectorStore.fromDocuments(finalDocs, new FakeEmbeddings(), zepConfig)
return { numAdded: finalDocs.length, addedDocs: finalDocs }
} catch (e) {
throw new Error(e)
}
+1
View File
@@ -42,6 +42,7 @@
"@langchain/mongodb": "^0.0.1",
"@langchain/openai": "^0.0.14",
"@langchain/pinecone": "^0.0.3",
"@langchain/weaviate": "^0.0.1",
"@mistralai/mistralai": "0.1.3",
"@notionhq/client": "^2.2.8",
"@opensearch-project/opensearch": "^1.2.0",
+11 -1
View File
@@ -113,7 +113,7 @@ export interface INode extends INodeProperties {
[key: string]: (nodeData: INodeData, options?: ICommonObject) => Promise<INodeOptionsValue[]>
}
vectorStoreMethods?: {
upsert: (nodeData: INodeData, options?: ICommonObject) => Promise<void>
upsert: (nodeData: INodeData, options?: ICommonObject) => Promise<IndexingResult | void>
search: (nodeData: INodeData, options?: ICommonObject) => Promise<any>
delete: (nodeData: INodeData, options?: ICommonObject) => Promise<void>
}
@@ -181,6 +181,7 @@ export type MessageContentImageUrl = {
import { PromptTemplate as LangchainPromptTemplate, PromptTemplateInput } from '@langchain/core/prompts'
import { VectorStore } from '@langchain/core/vectorstores'
import { Document } from '@langchain/core/documents'
export class PromptTemplate extends LangchainPromptTemplate {
promptValues: ICommonObject
@@ -271,6 +272,15 @@ export abstract class FlowiseSummaryMemory extends ConversationSummaryMemory imp
abstract clearChatMessages(overrideSessionId?: string): Promise<void>
}
export type IndexingResult = {
numAdded: number
numDeleted: number
numUpdated: number
numSkipped: number
totalKeys: number
addedDocs: Document[]
}
export interface IVisionChatModal {
id: string
configuredModel: string
+355
View File
@@ -0,0 +1,355 @@
import { VectorStore } from '@langchain/core/vectorstores'
import { v5 as uuidv5 } from 'uuid'
import { RecordManagerInterface, UUIDV5_NAMESPACE } from '@langchain/community/indexes/base'
import { insecureHash } from '@langchain/core/utils/hash'
import { Document, DocumentInterface } from '@langchain/core/documents'
import { BaseDocumentLoader } from 'langchain/document_loaders/base.js'
import { IndexingResult } from './Interface'
type Metadata = Record<string, unknown>
type StringOrDocFunc = string | ((doc: DocumentInterface) => string)
export interface HashedDocumentInterface extends DocumentInterface {
uid: string
hash_?: string
contentHash?: string
metadataHash?: string
pageContent: string
metadata: Metadata
calculateHashes(): void
toDocument(): DocumentInterface
}
interface HashedDocumentArgs {
pageContent: string
metadata: Metadata
uid: string
}
/**
* HashedDocument is a Document with hashes calculated.
* Hashes are calculated based on page content and metadata.
* It is used for indexing.
*/
export class _HashedDocument implements HashedDocumentInterface {
uid: string
hash_?: string
contentHash?: string
metadataHash?: string
pageContent: string
metadata: Metadata
constructor(fields: HashedDocumentArgs) {
this.uid = fields.uid
this.pageContent = fields.pageContent
this.metadata = fields.metadata
}
calculateHashes(): void {
const forbiddenKeys = ['hash_', 'content_hash', 'metadata_hash']
for (const key of forbiddenKeys) {
if (key in this.metadata) {
throw new Error(
`Metadata cannot contain key ${key} as it is reserved for internal use. Restricted keys: [${forbiddenKeys.join(', ')}]`
)
}
}
const contentHash = this._hashStringToUUID(this.pageContent)
try {
const metadataHash = this._hashNestedDictToUUID(this.metadata)
this.contentHash = contentHash
this.metadataHash = metadataHash
} catch (e) {
throw new Error(`Failed to hash metadata: ${e}. Please use a dict that can be serialized using json.`)
}
this.hash_ = this._hashStringToUUID(this.contentHash + this.metadataHash)
if (!this.uid) {
this.uid = this.hash_
}
}
toDocument(): DocumentInterface {
return new Document({
pageContent: this.pageContent,
metadata: this.metadata
})
}
static fromDocument(document: DocumentInterface, uid?: string): _HashedDocument {
const doc = new this({
pageContent: document.pageContent,
metadata: document.metadata,
uid: uid || (document as DocumentInterface & { uid: string }).uid
})
doc.calculateHashes()
return doc
}
private _hashStringToUUID(inputString: string): string {
const hash_value = insecureHash(inputString)
return uuidv5(hash_value, UUIDV5_NAMESPACE)
}
private _hashNestedDictToUUID(data: Record<string, unknown>): string {
const serialized_data = JSON.stringify(data, Object.keys(data).sort())
const hash_value = insecureHash(serialized_data)
return uuidv5(hash_value, UUIDV5_NAMESPACE)
}
}
export type CleanupMode = 'full' | 'incremental'
export type IndexOptions = {
/**
* The number of documents to index in one batch.
*/
batchSize?: number
/**
* The cleanup mode to use. Can be "full", "incremental" or undefined.
* - **Incremental**: Cleans up all documents that haven't been updated AND
* that are associated with source ids that were seen
* during indexing.
* Clean up is done continuously during indexing helping
* to minimize the probability of users seeing duplicated
* content.
* - **Full**: Delete all documents that haven to been returned by the loader.
* Clean up runs after all documents have been indexed.
* This means that users may see duplicated content during indexing.
* - **undefined**: Do not delete any documents.
*/
cleanup?: CleanupMode
/**
* Optional key that helps identify the original source of the document.
* Must either be a string representing the key of the source in the metadata
* or a function that takes a document and returns a string representing the source.
* **Required when cleanup is incremental**.
*/
sourceIdKey?: StringOrDocFunc
/**
* Batch size to use when cleaning up documents.
*/
cleanupBatchSize?: number
/**
* Force update documents even if they are present in the
* record manager. Useful if you are re-indexing with updated embeddings.
*/
forceUpdate?: boolean
vectorStoreName?: string
}
export function _batch<T>(size: number, iterable: T[]): T[][] {
const batches: T[][] = []
let currentBatch: T[] = []
iterable.forEach((item) => {
currentBatch.push(item)
if (currentBatch.length >= size) {
batches.push(currentBatch)
currentBatch = []
}
})
if (currentBatch.length > 0) {
batches.push(currentBatch)
}
return batches
}
export function _deduplicateInOrder(hashedDocuments: HashedDocumentInterface[]): HashedDocumentInterface[] {
const seen = new Set<string>()
const deduplicated: HashedDocumentInterface[] = []
for (const hashedDoc of hashedDocuments) {
if (!hashedDoc.hash_) {
throw new Error('Hashed document does not have a hash')
}
if (!seen.has(hashedDoc.hash_)) {
seen.add(hashedDoc.hash_)
deduplicated.push(hashedDoc)
}
}
return deduplicated
}
export function _getSourceIdAssigner(sourceIdKey: StringOrDocFunc | null): (doc: DocumentInterface) => string | null {
if (sourceIdKey === null) {
return (_doc: DocumentInterface) => null
} else if (typeof sourceIdKey === 'string') {
return (doc: DocumentInterface) => doc.metadata[sourceIdKey]
} else if (typeof sourceIdKey === 'function') {
return sourceIdKey
} else {
throw new Error(`sourceIdKey should be null, a string or a function, got ${typeof sourceIdKey}`)
}
}
export const _isBaseDocumentLoader = (arg: any): arg is BaseDocumentLoader => {
if ('load' in arg && typeof arg.load === 'function' && 'loadAndSplit' in arg && typeof arg.loadAndSplit === 'function') {
return true
}
return false
}
interface IndexArgs {
docsSource: BaseDocumentLoader | DocumentInterface[]
recordManager: RecordManagerInterface
vectorStore: VectorStore
options?: IndexOptions
}
/**
* Index data from the doc source into the vector store.
*
* Indexing functionality uses a manager to keep track of which documents
* are in the vector store.
*
* This allows us to keep track of which documents were updated, and which
* documents were deleted, which documents should be skipped.
*
* For the time being, documents are indexed using their hashes, and users
* are not able to specify the uid of the document.
*
* @param {IndexArgs} args
* @param {BaseDocumentLoader | DocumentInterface[]} args.docsSource The source of documents to index. Can be a DocumentLoader or a list of Documents.
* @param {RecordManagerInterface} args.recordManager The record manager to use for keeping track of indexed documents.
* @param {VectorStore} args.vectorStore The vector store to use for storing the documents.
* @param {IndexOptions | undefined} args.options Options for indexing.
* @returns {Promise<IndexingResult>}
*/
export async function index(args: IndexArgs): Promise<IndexingResult> {
const { docsSource, recordManager, vectorStore, options } = args
const { batchSize = 100, cleanup, sourceIdKey, cleanupBatchSize = 1000, forceUpdate = false, vectorStoreName } = options ?? {}
if (cleanup === 'incremental' && !sourceIdKey) {
throw new Error("sourceIdKey is required when cleanup mode is incremental. Please provide through 'options.sourceIdKey'.")
}
if (vectorStoreName) {
;(recordManager as any).namespace = (recordManager as any).namespace + '_' + vectorStoreName
}
const docs = _isBaseDocumentLoader(docsSource) ? await docsSource.load() : docsSource
const sourceIdAssigner = _getSourceIdAssigner(sourceIdKey ?? null)
const indexStartDt = await recordManager.getTime()
let numAdded = 0
let addedDocs: Document[] = []
let numDeleted = 0
let numUpdated = 0
let numSkipped = 0
let totalKeys = 0
const batches = _batch<DocumentInterface>(batchSize ?? 100, docs)
for (const batch of batches) {
const hashedDocs = _deduplicateInOrder(batch.map((doc) => _HashedDocument.fromDocument(doc)))
const sourceIds = hashedDocs.map((doc) => sourceIdAssigner(doc))
if (cleanup === 'incremental') {
hashedDocs.forEach((_hashedDoc, index) => {
const source = sourceIds[index]
if (source === null) {
throw new Error('sourceIdKey must be provided when cleanup is incremental')
}
})
}
const batchExists = await recordManager.exists(hashedDocs.map((doc) => doc.uid))
const uids: string[] = []
const docsToIndex: DocumentInterface[] = []
const docsToUpdate: string[] = []
const seenDocs = new Set<string>()
hashedDocs.forEach((hashedDoc, i) => {
const docExists = batchExists[i]
if (docExists) {
if (forceUpdate) {
seenDocs.add(hashedDoc.uid)
} else {
docsToUpdate.push(hashedDoc.uid)
return
}
}
uids.push(hashedDoc.uid)
docsToIndex.push(hashedDoc.toDocument())
})
if (docsToUpdate.length > 0) {
await recordManager.update(docsToUpdate, { timeAtLeast: indexStartDt })
numSkipped += docsToUpdate.length
}
if (docsToIndex.length > 0) {
await vectorStore.addDocuments(docsToIndex, { ids: uids })
const newDocs = docsToIndex.map((docs) => ({
pageContent: docs.pageContent,
metadata: docs.metadata
}))
addedDocs.push(...newDocs)
numAdded += docsToIndex.length - seenDocs.size
numUpdated += seenDocs.size
}
await recordManager.update(
hashedDocs.map((doc) => doc.uid),
{ timeAtLeast: indexStartDt, groupIds: sourceIds }
)
if (cleanup === 'incremental') {
sourceIds.forEach((sourceId) => {
if (!sourceId) throw new Error('Source id cannot be null')
})
const uidsToDelete = await recordManager.listKeys({
before: indexStartDt,
groupIds: sourceIds
})
await vectorStore.delete({ ids: uidsToDelete })
await recordManager.deleteKeys(uidsToDelete)
numDeleted += uidsToDelete.length
}
}
if (cleanup === 'full') {
let uidsToDelete = await recordManager.listKeys({
before: indexStartDt,
limit: cleanupBatchSize
})
while (uidsToDelete.length > 0) {
await vectorStore.delete({ ids: uidsToDelete })
await recordManager.deleteKeys(uidsToDelete)
numDeleted += uidsToDelete.length
uidsToDelete = await recordManager.listKeys({
before: indexStartDt,
limit: cleanupBatchSize
})
}
}
totalKeys = (await recordManager.listKeys({})).length
return {
numAdded,
numDeleted,
numUpdated,
numSkipped,
totalKeys,
addedDocs
}
}