diff --git a/packages/components/credentials/ElasticsearchAPI.credential.ts b/packages/components/credentials/ElasticsearchAPI.credential.ts new file mode 100644 index 00000000..fbba76f4 --- /dev/null +++ b/packages/components/credentials/ElasticsearchAPI.credential.ts @@ -0,0 +1,31 @@ +import { INodeParams, INodeCredential } from '../src/Interface' + +class ElectricsearchAPI implements INodeCredential { + label: string + name: string + version: number + description: string + inputs: INodeParams[] + + constructor() { + this.label = 'Elasticsearch API' + this.name = 'elasticsearchApi' + this.version = 1.0 + this.description = + 'Refer to official guide on how to get an API Key from ElasticSearch' + this.inputs = [ + { + label: 'Elasticsearch Endpoint', + name: 'endpoint', + type: 'string' + }, + { + label: 'Elasticsearch API Key', + name: 'apiKey', + type: 'password' + } + ] + } +} + +module.exports = { credClass: ElectricsearchAPI } diff --git a/packages/components/credentials/ElectricsearchUserPassword.credential.ts b/packages/components/credentials/ElectricsearchUserPassword.credential.ts new file mode 100644 index 00000000..6c47f7b1 --- /dev/null +++ b/packages/components/credentials/ElectricsearchUserPassword.credential.ts @@ -0,0 +1,36 @@ +import { INodeParams, INodeCredential } from '../src/Interface' + +class ElasticSearchUserPassword implements INodeCredential { + label: string + name: string + version: number + description: string + inputs: INodeParams[] + + constructor() { + this.label = 'ElasticSearch User Password' + this.name = 'elasticSearchUserPassword' + this.version = 1.0 + this.description = + 'Refer to official guide on how to get User Password from ElasticSearch' + this.inputs = [ + { + label: 'Cloud ID', + name: 'cloudId', + type: 'string' + }, + { + label: 'ElasticSearch User', + name: 'username', + type: 'string' + }, + { + label: 'ElasticSearch Password', + name: 'password', + type: 'password' + } + ] + } +} + +module.exports = { credClass: ElasticSearchUserPassword } diff --git a/packages/components/nodes/vectorstores/Elasticsearch/ElasticSearchBase.ts b/packages/components/nodes/vectorstores/Elasticsearch/ElasticSearchBase.ts new file mode 100644 index 00000000..59294b7e --- /dev/null +++ b/packages/components/nodes/vectorstores/Elasticsearch/ElasticSearchBase.ts @@ -0,0 +1,193 @@ +import { + getBaseClasses, + getCredentialData, + getCredentialParam, + ICommonObject, + INodeData, + INodeOutputsValue, + INodeParams +} from '../../../src' +import { Client, ClientOptions } from '@elastic/elasticsearch' +import { ElasticClientArgs, ElasticVectorSearch } from 'langchain/vectorstores/elasticsearch' +import { Embeddings } from 'langchain/embeddings/base' +import { VectorStore } from 'langchain/vectorstores/base' +import { Document } from 'langchain/document' + +export abstract class ElasticSearchBase { + label: string + name: string + version: number + description: string + type: string + icon: string + category: string + baseClasses: string[] + inputs: INodeParams[] + credential: INodeParams + outputs: INodeOutputsValue[] + + protected constructor() { + this.type = 'Elasticsearch' + this.icon = 'elasticsearch.png' + this.category = 'Vector Stores' + this.baseClasses = [this.type, 'VectorStoreRetriever', 'BaseRetriever'] + this.credential = { + label: 'Connect Credential', + name: 'credential', + type: 'credential', + credentialNames: ['elasticsearchApi', 'elasticSearchUserPassword'] + } + this.inputs = [ + { + label: 'Embeddings', + name: 'embeddings', + type: 'Embeddings' + }, + { + label: 'Index Name', + name: 'indexName', + placeholder: '', + type: 'string' + }, + { + label: 'Top K', + name: 'topK', + description: 'Number of top results to fetch. Default to 4', + placeholder: '4', + type: 'number', + additionalParams: true, + optional: true + }, + { + label: 'Similarity', + name: 'similarity', + description: 'Similarity measure used in Elasticsearch.', + type: 'options', + default: 'l2_norm', + options: [ + { + label: 'l2_norm', + name: 'l2_norm' + }, + { + label: 'dot_product', + name: 'dot_product' + }, + { + label: 'cosine', + name: 'cosine' + } + ], + additionalParams: true, + optional: true + } + ] + this.outputs = [ + { + label: 'Elasticsearch Retriever', + name: 'retriever', + baseClasses: this.baseClasses + }, + { + label: 'Elasticsearch Vector Store', + name: 'vectorStore', + baseClasses: [this.type, ...getBaseClasses(ElasticVectorSearch)] + } + ] + } + + abstract constructVectorStore( + embeddings: Embeddings, + elasticSearchClientArgs: ElasticClientArgs, + docs: Document>[] | undefined + ): Promise + + async init(nodeData: INodeData, _: string, options: ICommonObject, docs: Document>[] | undefined): Promise { + const credentialData = await getCredentialData(nodeData.credential ?? '', options) + const endPoint = getCredentialParam('endpoint', credentialData, nodeData) + const cloudId = getCredentialParam('cloudId', credentialData, nodeData) + const indexName = nodeData.inputs?.indexName as string + const embeddings = nodeData.inputs?.embeddings as Embeddings + const topK = nodeData.inputs?.topK as string + const similarityMeasure = nodeData.inputs?.similarityMeasure as string + const k = topK ? parseFloat(topK) : 4 + const output = nodeData.outputs?.output as string + + const elasticSearchClientArgs = this.prepareClientArgs(endPoint, cloudId, credentialData, nodeData, similarityMeasure, indexName) + + const vectorStore = await this.constructVectorStore(embeddings, elasticSearchClientArgs, docs) + + if (output === 'retriever') { + return vectorStore.asRetriever(k) + } else if (output === 'vectorStore') { + ;(vectorStore as any).k = k + return vectorStore + } + return vectorStore + } + + protected prepareConnectionOptions( + endPoint: string | undefined, + cloudId: string | undefined, + credentialData: ICommonObject, + nodeData: INodeData + ) { + let elasticSearchClientOptions: ClientOptions = {} + if (endPoint) { + let apiKey = getCredentialParam('apiKey', credentialData, nodeData) + elasticSearchClientOptions = { + node: endPoint, + auth: { + apiKey: apiKey + } + } + } else if (cloudId) { + let username = getCredentialParam('username', credentialData, nodeData) + let password = getCredentialParam('password', credentialData, nodeData) + elasticSearchClientOptions = { + cloud: { + id: cloudId + }, + auth: { + username: username, + password: password + } + } + } + return elasticSearchClientOptions + } + + protected prepareClientArgs( + endPoint: string | undefined, + cloudId: string | undefined, + credentialData: ICommonObject, + nodeData: INodeData, + similarityMeasure: string, + indexName: string + ) { + let elasticSearchClientOptions = this.prepareConnectionOptions(endPoint, cloudId, credentialData, nodeData) + let vectorSearchOptions = {} + switch (similarityMeasure) { + case 'dot_product': + vectorSearchOptions = { + similarity: 'dot_product' + } + break + case 'cosine': + vectorSearchOptions = { + similarity: 'cosine' + } + break + default: + vectorSearchOptions = { + similarity: 'l2_norm' + } + } + const elasticSearchClientArgs: ElasticClientArgs = { + client: new Client(elasticSearchClientOptions), + indexName: indexName, + vectorSearchOptions: vectorSearchOptions + } + return elasticSearchClientArgs + } +} diff --git a/packages/components/nodes/vectorstores/Elasticsearch/Elasticsearch_Existing.ts b/packages/components/nodes/vectorstores/Elasticsearch/Elasticsearch_Existing.ts new file mode 100644 index 00000000..94e45d74 --- /dev/null +++ b/packages/components/nodes/vectorstores/Elasticsearch/Elasticsearch_Existing.ts @@ -0,0 +1,31 @@ +import { ICommonObject, INode, INodeData } from '../../../src/Interface' +import { Embeddings } from 'langchain/embeddings/base' + +import { ElasticClientArgs, ElasticVectorSearch } from 'langchain/vectorstores/elasticsearch' +import { ElasticSearchBase } from './ElasticSearchBase' +import { VectorStore } from 'langchain/vectorstores/base' +import { Document } from 'langchain/document' + +class ElasicsearchExisting_VectorStores extends ElasticSearchBase implements INode { + constructor() { + super() + this.label = 'Elasticsearch Load Existing Index' + this.name = 'ElasticsearchIndex' + this.version = 1.0 + this.description = 'Load existing index from Elasticsearch (i.e: Document has been upserted)' + } + + async constructVectorStore( + embeddings: Embeddings, + elasticSearchClientArgs: ElasticClientArgs, + docs: Document>[] | undefined + ): Promise { + return await ElasticVectorSearch.fromExistingIndex(embeddings, elasticSearchClientArgs) + } + + async init(nodeData: INodeData, _: string, options: ICommonObject): Promise { + return super.init(nodeData, _, options, undefined) + } +} + +module.exports = { nodeClass: ElasicsearchExisting_VectorStores } diff --git a/packages/components/nodes/vectorstores/Elasticsearch/Elasticsearch_Upsert.ts b/packages/components/nodes/vectorstores/Elasticsearch/Elasticsearch_Upsert.ts new file mode 100644 index 00000000..d4b79a5d --- /dev/null +++ b/packages/components/nodes/vectorstores/Elasticsearch/Elasticsearch_Upsert.ts @@ -0,0 +1,55 @@ +import { ICommonObject, INode, INodeData } from '../../../src/Interface' +import { Embeddings } from 'langchain/embeddings/base' +import { Document } from 'langchain/document' + +import { ElasticClientArgs, ElasticVectorSearch } from 'langchain/vectorstores/elasticsearch' +import { flatten } from 'lodash' +import { ElasticSearchBase } from './ElasticSearchBase' +import { VectorStore } from 'langchain/vectorstores/base' + +class ElasicsearchUpsert_VectorStores extends ElasticSearchBase implements INode { + constructor() { + super() + this.label = 'Elasticsearch Upsert Document' + this.name = 'ElasticsearchUpsert' + this.version = 1.0 + this.description = 'Upsert documents to Elasticsearch' + this.inputs.unshift({ + label: 'Document', + name: 'document', + type: 'Document', + list: true + }) + } + + async constructVectorStore( + embeddings: Embeddings, + elasticSearchClientArgs: ElasticClientArgs, + docs: Document>[] + ): Promise { + const vectorStore = new ElasticVectorSearch(embeddings, elasticSearchClientArgs) + await vectorStore.addDocuments(docs) + return vectorStore + } + + async init(nodeData: INodeData, _: string, options: ICommonObject): Promise { + const docs = nodeData.inputs?.document as Document[] + + const flattenDocs = docs && docs.length ? flatten(docs) : [] + const finalDocs = [] + for (let i = 0; i < flattenDocs.length; i += 1) { + finalDocs.push(new Document(flattenDocs[i])) + } + + // The following code is a workaround for a bug (Langchain Issue #1589) in the underlying library. + // Store does not support object in metadata and fail silently + finalDocs.forEach((d) => { + delete d.metadata.pdf + delete d.metadata.loc + }) + // end of workaround + return super.init(nodeData, _, options, flattenDocs) + } +} + +module.exports = { nodeClass: ElasicsearchUpsert_VectorStores } diff --git a/packages/components/nodes/vectorstores/Elasticsearch/elasticsearch.png b/packages/components/nodes/vectorstores/Elasticsearch/elasticsearch.png new file mode 100644 index 00000000..fdb66863 Binary files /dev/null and b/packages/components/nodes/vectorstores/Elasticsearch/elasticsearch.png differ diff --git a/packages/components/package.json b/packages/components/package.json index 830e0381..b52a5d92 100644 --- a/packages/components/package.json +++ b/packages/components/package.json @@ -20,6 +20,7 @@ "@aws-sdk/client-dynamodb": "^3.360.0", "@aws-sdk/client-s3": "^3.427.0", "@dqbd/tiktoken": "^1.0.7", + "@elastic/elasticsearch": "^8.9.0", "@getzep/zep-js": "^0.6.3", "@gomomento/sdk": "^1.40.2", "@google-ai/generativelanguage": "^0.2.1",