From 261e45d74a2f912dd0ab6e356fef90c97b420f3f Mon Sep 17 00:00:00 2001 From: vinodkiran Date: Mon, 13 Nov 2023 21:56:18 +0530 Subject: [PATCH] MongoDB Atlas Integration: Adding MongoDB as a Vector Store --- .../vectorstores/MongoDB/MongoDBSearchBase.ts | 145 ++++++++++++++++++ .../vectorstores/MongoDB/MongoDB_Existing.ts | 41 +++++ .../vectorstores/MongoDB/MongoDB_Upsert.ts | 58 +++++++ .../nodes/vectorstores/MongoDB/mongodb.png | Bin 0 -> 3741 bytes 4 files changed, 244 insertions(+) create mode 100644 packages/components/nodes/vectorstores/MongoDB/MongoDBSearchBase.ts create mode 100644 packages/components/nodes/vectorstores/MongoDB/MongoDB_Existing.ts create mode 100644 packages/components/nodes/vectorstores/MongoDB/MongoDB_Upsert.ts create mode 100644 packages/components/nodes/vectorstores/MongoDB/mongodb.png diff --git a/packages/components/nodes/vectorstores/MongoDB/MongoDBSearchBase.ts b/packages/components/nodes/vectorstores/MongoDB/MongoDBSearchBase.ts new file mode 100644 index 00000000..e9ef8e9a --- /dev/null +++ b/packages/components/nodes/vectorstores/MongoDB/MongoDBSearchBase.ts @@ -0,0 +1,145 @@ +import { + getBaseClasses, + getCredentialData, + getCredentialParam, + ICommonObject, + INodeData, + INodeOutputsValue, + INodeParams +} from '../../../src' + +import { Embeddings } from 'langchain/embeddings/base' +import { VectorStore } from 'langchain/vectorstores/base' +import { Document } from 'langchain/document' +import { MongoDBAtlasVectorSearch } from 'langchain/vectorstores/mongodb_atlas' +import { Collection, MongoClient } from 'mongodb' + +export abstract class MongoDBSearchBase { + label: string + name: string + version: number + description: string + type: string + icon: string + category: string + baseClasses: string[] + inputs: INodeParams[] + credential: INodeParams + outputs: INodeOutputsValue[] + mongoClient: MongoClient + + protected constructor() { + this.type = 'MongoDB Atlas' + this.icon = 'mongodb.png' + this.category = 'Vector Stores' + this.baseClasses = [this.type, 'VectorStoreRetriever', 'BaseRetriever'] + this.credential = { + label: 'Connect Credential', + name: 'credential', + type: 'credential', + credentialNames: ['mongoDBUrlApi'] + } + this.inputs = [ + { + label: 'Embeddings', + name: 'embeddings', + type: 'Embeddings' + }, + { + label: 'Database', + name: 'databaseName', + placeholder: '', + type: 'string' + }, + { + label: 'Collection Name', + name: 'collectionName', + placeholder: '', + type: 'string' + }, + { + label: 'Index Name', + name: 'indexName', + placeholder: '', + type: 'string' + }, + { + label: 'Content Field', + name: 'textKey', + description: 'Name of the field (column) that contains the actual content', + type: 'string', + default: 'text', + additionalParams: true, + optional: true + }, + { + label: 'Embedded Field', + name: 'embeddingKey', + description: 'Name of the field (column) that contains the Embedding', + type: 'string', + default: 'embedding', + additionalParams: true, + optional: true + }, + { + label: 'Top K', + name: 'topK', + description: 'Number of top results to fetch. Default to 4', + placeholder: '4', + type: 'number', + additionalParams: true, + optional: true + } + ] + this.outputs = [ + { + label: 'MongoDB Retriever', + name: 'retriever', + baseClasses: this.baseClasses + }, + { + label: 'MongoDB Vector Store', + name: 'vectorStore', + baseClasses: [this.type, ...getBaseClasses(MongoDBAtlasVectorSearch)] + } + ] + } + + abstract constructVectorStore( + embeddings: Embeddings, + collection: Collection, + indexName: string, + textKey: string, + embeddingKey: string, + docs: Document>[] | undefined + ): Promise + + async init(nodeData: INodeData, _: string, options: ICommonObject, docs: Document>[] | undefined): Promise { + const credentialData = await getCredentialData(nodeData.credential ?? '', options) + const databaseName = nodeData.inputs?.databaseName as string + const collectionName = nodeData.inputs?.collectionName as string + const indexName = nodeData.inputs?.indexName as string + let textKey = nodeData.inputs?.textKey as string + let embeddingKey = nodeData.inputs?.embeddingKey as string + const embeddings = nodeData.inputs?.embeddings as Embeddings + const topK = nodeData.inputs?.topK as string + const k = topK ? parseFloat(topK) : 4 + const output = nodeData.outputs?.output as string + + let mongoDBConnectUrl = getCredentialParam('mongoDBConnectUrl', credentialData, nodeData) + + this.mongoClient = new MongoClient(mongoDBConnectUrl) + const collection = this.mongoClient.db(databaseName).collection(collectionName) + if (!textKey || textKey === '') textKey = 'text' + if (!embeddingKey || embeddingKey === '') embeddingKey = 'embedding' + const vectorStore = await this.constructVectorStore(embeddings, collection, indexName, textKey, embeddingKey, docs) + + if (output === 'retriever') { + return vectorStore.asRetriever(k) + } else if (output === 'vectorStore') { + ;(vectorStore as any).k = k + return vectorStore + } + return vectorStore + } +} diff --git a/packages/components/nodes/vectorstores/MongoDB/MongoDB_Existing.ts b/packages/components/nodes/vectorstores/MongoDB/MongoDB_Existing.ts new file mode 100644 index 00000000..3cbb36b8 --- /dev/null +++ b/packages/components/nodes/vectorstores/MongoDB/MongoDB_Existing.ts @@ -0,0 +1,41 @@ +import { ICommonObject, INode, INodeData } from '../../../src/Interface' +import { Embeddings } from 'langchain/embeddings/base' +import { VectorStore } from 'langchain/vectorstores/base' +import { Document } from 'langchain/document' + +import { MongoDBSearchBase } from './MongoDBSearchBase' +import { Collection } from 'mongodb' +import { MongoDBAtlasVectorSearch } from 'langchain/vectorstores/mongodb_atlas' + +class MongoDBExisting_VectorStores extends MongoDBSearchBase implements INode { + constructor() { + super() + this.label = 'MongoDB Atlas Load Existing Index' + this.name = 'MongoDBIndex' + this.version = 1.0 + this.description = 'Load existing data from MongoDB Atlas (i.e: Document has been upserted)' + } + + async init(nodeData: INodeData, _: string, options: ICommonObject): Promise { + return super.init(nodeData, _, options, undefined) + } + + constructVectorStore( + embeddings: Embeddings, + collection: Collection, + indexName: string, + textKey: string, + embeddingKey: string, + _: Document>[] | undefined + ): Promise { + const mongoDBAtlasVectorSearch = new MongoDBAtlasVectorSearch(embeddings, { + collection: collection, + indexName: indexName, + textKey: textKey, + embeddingKey: embeddingKey + }) + return Promise.resolve(mongoDBAtlasVectorSearch) + } +} + +module.exports = { nodeClass: MongoDBExisting_VectorStores } diff --git a/packages/components/nodes/vectorstores/MongoDB/MongoDB_Upsert.ts b/packages/components/nodes/vectorstores/MongoDB/MongoDB_Upsert.ts new file mode 100644 index 00000000..80dfbf19 --- /dev/null +++ b/packages/components/nodes/vectorstores/MongoDB/MongoDB_Upsert.ts @@ -0,0 +1,58 @@ +import { ICommonObject, INode, INodeData } from '../../../src/Interface' +import { Embeddings } from 'langchain/embeddings/base' +import { Document } from 'langchain/document' + +import { flatten } from 'lodash' +import { VectorStore } from 'langchain/vectorstores/base' +import { MongoDBSearchBase } from './MongoDBSearchBase' +import { Collection } from 'mongodb' +import { MongoDBAtlasVectorSearch } from 'langchain/vectorstores/mongodb_atlas' + +class MongoDBUpsert_VectorStores extends MongoDBSearchBase implements INode { + constructor() { + super() + this.label = 'MongoDB Upsert Document' + this.name = 'MongoDBUpsert' + this.version = 1.0 + this.description = 'Upsert documents to MongoDB Atlas' + this.inputs.unshift({ + label: 'Document', + name: 'document', + type: 'Document', + list: true + }) + } + + constructVectorStore( + embeddings: Embeddings, + collection: Collection, + indexName: string, + textKey: string, + embeddingKey: string, + docs: Document>[] + ): Promise { + return MongoDBAtlasVectorSearch.fromDocuments(docs, embeddings, { + collection: collection, + indexName: indexName, + textKey: textKey, + embeddingKey: embeddingKey + }) + } + + async init(nodeData: INodeData, _: string, options: ICommonObject): Promise { + const docs = nodeData.inputs?.document as Document[] + + const flattenDocs = docs && docs.length ? flatten(docs) : [] + const finalDocs = [] + for (let i = 0; i < flattenDocs.length; i += 1) { + if (flattenDocs[i] && flattenDocs[i].pageContent) { + const document = new Document(flattenDocs[i]) + finalDocs.push(document) + } + } + + return super.init(nodeData, _, options, flattenDocs) + } +} + +module.exports = { nodeClass: MongoDBUpsert_VectorStores } diff --git a/packages/components/nodes/vectorstores/MongoDB/mongodb.png b/packages/components/nodes/vectorstores/MongoDB/mongodb.png new file mode 100644 index 0000000000000000000000000000000000000000..5586fe0ac672f7997014d814389c1d6c436d9d0c GIT binary patch literal 3741 zcmb`Jc{J4R`^U$YrHqk1Gck`mMA>7^WSwCm4MMgVON56fyHH5hDNHC^_9aWUvCB4O z8H(&h8tVuN^<;@+e)IkLp80jo@0{;n-_N&iOnv!JxUh&Ts($ z0B(aTsO$81EC9fCfrEukvneL|008rziIKS;+S};wO8sMVEzz+`Q>L?XktQA7i?7g` z4#!S9-+HcfJoPC3m&8yR0hXysbjD@Fj@91-{(6BtcDfX69~WV$#}dQI@c(CEs2-Ji z3})=awe;V;{9g9oVd={Dx$4JE9~t0QrfZX~!l-ZzjFG-=Mld6Wk$WuB8?nt$!DXp2 zA{jtW`8~__*`?#Ddj$%csd`+85x@YC3Ozd0jZxDs8+NSP=lov_4!^bIn2(@Wb1KmI zeQ5?C!|_NzH{U9ZTY-Lf&CPp74mVO26Y2xPik)wyDx@_=WH-kby-bNL@t`kP+VU*! zMIb%Pv(oTIb53&Q1Isj}>b9axVtid|c~O0OV6{b48__O9y|ts!@tIafM@w^Ct!tLP zWvXIytw$HB-7DAhRVOLAA*8#rGq%*Hr?ZRrvLu9v!=|f6mw2XC#nAn|UEO!9ZRkL^ znzqto_?N%4o5BArxOnsMDg8@0^uLZV1DGYbzZVang)ybf%be<_9mO-NZPHei@kg{w zHt0v%mS!o3<|=L5u&?PPf90M<(P1Y@_@@(bP(<<27xj*E(R!&8=pXi-w!S;5z;lpZ zOXG?cjMy*qqtS95zNKLB@E}&n0XL+l!8$}{>@qm%Pwq{-uR7OZQg+G|2+k{V(G`?S`M#-Ug z9ZxcbwE)+;C(f}!X`0gpz=>NABj@G)SBb$r%=e6Ac;X@o=ikBv)h}?xJ?g?5l>N#r ztN^!JlmDOv;vX3jZ&+MhHlz7XOhnWr!~a5{LRo&e|D8;O6_7&Yz-+h@ZvAF)yg1Ct74yeTzy{M z>#J$YPPlzCwCbI%Q@&HXQaf^c2WiWK;8S{ntp76F*PNP|0l7)MDkSk|o%-N++^JJD zK9oCdQBlAiXO~#j{)9}<42mPtv+MmMueqdPSX6KmucixSJ!P$hCO!aN*ZS*_~8$1^IR(^`S8VcS#h;SXc>?vx2| zzALm#0oLp=U$&?{mjXwH%3gME7dD;G6U(RxE}Mcj9xSwoUQ^n#yQ{7hTiVRj)IPQz zDe?JWyrKqkFb}HUe9-#tbO&rug3sG*;QU$~5*x}br)2l|2Ne{_-B0ROE3zl7X7(f2 zEAd{y(cqK&ySp*l!@MN?<+?kDf$d9u(AiMqolS4>R;y2Xq2R(7lNFGyE5F?Z)>z>j z?alZZ-dxCjg5ddm?Sl=F9$){o&ES3Vq)=jc-}$mmojIuMmbCZCaN=v8=fGI2ro@I9 zPuJ50_#CGQA!J3W=w@C|L;9ogR|@ zbrv%)(Dc5J)CaxK@L^=Ue`YVlPuthW}ka%7Fq9OL)Q3Hzx#Mg(c5IIUj=XlvF9vbEivnf z2*ZDc_YgOl&u2g`asw1byEdjiumD)Jd=Ul8025!rjAIx#z$xt|WcT5LVLatquw?kX zzyyLfxA$Wwf9T`N;oFb;y!1x+DVvPH;_4hU>(m6JU2<>6w{tlo09j<^3SH(c89IbeV+;TYkl;SgbcC&MXNDj|}^==SjFPnS%A~5n(-M-C` z8SWqV)0!bX+`mY5iY{cB1F35{p#G403g;Ry)%|p>nrnlNdsU)1-q;HA76856{aN82 zN920}xVsBKcc}z?7MI7P;Qwa@-x=%!Nc@}#<# z04Od_)JCe}q~C;gONEC&`>h*=<5pJXg*VyOWIf-l;g6z>4%LfBM}3(xY^*0&+&tK^ zUqBI&#M9G;b#mx7_URW~9=_Wb#cg{@>((RUnuIR_ULz1H#gXQeeggfg;FTUfC&98c zv2HqQi_rKN)pK8HPUyyzC+h4F_x_hsb)eKg4OhLhNDtZ$%FUlg5Ee`4}lYFP%Qr2a$Q7Y!07Tda0l{0U(CbV9?G z6c)EIDw?^eF5J{*|!GN*G$G4Q5&@sr}iAcZQae|(A89tpiw5< z@zzek`)>q*NropSjqU4TTwKa=>0pU>HWunUq@hLWx3Z4DMnq5V5APnDu0OxEEKm{m zGgGxWILA2O7nb$BO1Mw+?x>*r0=0LdJ8ot_!A|`4PyD#v3D{*&Sc&iUS?f8u2BCfM z{P+$ks#ZoC@j37At~zt>+2RSfZas5Z>nn`L7TJ>u*XzG3^6}-a&86y%Ki&yP^(L`M z45DTnRC1Qkic!=D!4`Z|**uk;J512mkGj0lPyWH$Re1JvpJ2hSTq}Y|$pie~_wWc= z6xS7K%wfO7l*oX!FbhG^YYp8=kFQ+~|1G}D>HG4k% zU<)p3sh|gP=dE2}m0RS_S5GLI6MlkALrS#%CKQzY*h^Q=4m5H74E7dobHJoSYhy;9 znE6TEDVoZD(ecb1ZtV-i~whn5SBHHG{2nK&~Qi~^#rVj?tBWKPZYVchduFxy5 zy7y5*m^y6#RjhWwzAoUll_Cgby8Yacl7tB4d=e_CtemNn_{;^v78txW%u~<(5-UJ# z#r)SGA!)%WW;bcqjd+MGK-B`tE%S2W~8N{6$9zs%|z zf%q|Jbva4lY4t51YxC_gPXMRr&Qye%&`7K%TS>7U0tHUDV=r-hI_k61zpDLb(UMFd zvc{YjA}y~3Z_(B#oAjxz5`iTB0;Uq2WD5&Do&8y%sE{Z*$)IS0 tLACkG!7qJ7*HW-wMKKxE3+#3P=^7KM_f;;R=|9^513e6?O4lLce*i)hLc9O~ literal 0 HcmV?d00001