Merge branch 'main' into feature/VectorStoreRevamp

This commit is contained in:
Henry
2023-11-22 19:49:55 +00:00
50 changed files with 2154 additions and 159 deletions
@@ -111,7 +111,7 @@ class OpenAIAssistant_Agents implements INode {
const openai = new OpenAI({ apiKey: openAIApiKey })
options.logger.info(`Clearing OpenAI Thread ${sessionId}`)
await openai.beta.threads.del(sessionId)
if (sessionId) await openai.beta.threads.del(sessionId)
options.logger.info(`Successfully cleared OpenAI Thread ${sessionId}`)
}
@@ -135,16 +135,25 @@ class OpenAIAssistant_Agents implements INode {
const openai = new OpenAI({ apiKey: openAIApiKey })
// Retrieve assistant
try {
const assistantDetails = JSON.parse(assistant.details)
const openAIAssistantId = assistantDetails.id
// Retrieve assistant
const retrievedAssistant = await openai.beta.assistants.retrieve(openAIAssistantId)
if (formattedTools.length) {
let filteredTools = uniqWith([...retrievedAssistant.tools, ...formattedTools], isEqual)
let filteredTools = []
for (const tool of retrievedAssistant.tools) {
if (tool.type === 'code_interpreter' || tool.type === 'retrieval') filteredTools.push(tool)
}
filteredTools = uniqWith([...filteredTools, ...formattedTools], isEqual)
// filter out tool with empty function
filteredTools = filteredTools.filter((tool) => !(tool.type === 'function' && !(tool as any).function))
await openai.beta.assistants.update(openAIAssistantId, { tools: filteredTools })
} else {
let filteredTools = retrievedAssistant.tools.filter((tool) => tool.type !== 'function')
await openai.beta.assistants.update(openAIAssistantId, { tools: filteredTools })
}
const chatmessage = await appDataSource.getRepository(databaseEntities['ChatMessage']).findOneBy({
@@ -152,14 +161,45 @@ class OpenAIAssistant_Agents implements INode {
})
let threadId = ''
let isNewThread = false
if (!chatmessage) {
const thread = await openai.beta.threads.create({})
threadId = thread.id
isNewThread = true
} else {
const thread = await openai.beta.threads.retrieve(chatmessage.sessionId)
threadId = thread.id
}
// List all runs
if (!isNewThread) {
const promise = (threadId: string) => {
return new Promise<void>((resolve) => {
const timeout = setInterval(async () => {
const allRuns = await openai.beta.threads.runs.list(threadId)
if (allRuns.data && allRuns.data.length) {
const firstRunId = allRuns.data[0].id
const runStatus = allRuns.data.find((run) => run.id === firstRunId)?.status
if (
runStatus &&
(runStatus === 'cancelled' ||
runStatus === 'completed' ||
runStatus === 'expired' ||
runStatus === 'failed')
) {
clearInterval(timeout)
resolve()
}
} else {
clearInterval(timeout)
resolve()
}
}, 500)
})
}
await promise(threadId)
}
// Add message to thread
await openai.beta.threads.messages.create(threadId, {
role: 'user',
@@ -217,27 +257,41 @@ class OpenAIAssistant_Agents implements INode {
})
resolve(state)
} else {
reject(
new Error(
`Error processing thread: ${state}, Thread ID: ${threadId}, Run ID: ${runId}. submit_tool_outputs.tool_calls are empty`
)
)
await openai.beta.threads.runs.cancel(threadId, runId)
resolve('requires_action_retry')
}
}
} else if (state === 'cancelled' || state === 'expired' || state === 'failed') {
clearInterval(timeout)
reject(new Error(`Error processing thread: ${state}, Thread ID: ${threadId}, Run ID: ${runId}`))
reject(
new Error(`Error processing thread: ${state}, Thread ID: ${threadId}, Run ID: ${runId}, Status: ${state}`)
)
}
}, 500)
})
}
// Polling run status
let runThreadId = runThread.id
let state = await promise(threadId, runThread.id)
while (state === 'requires_action') {
state = await promise(threadId, runThread.id)
}
let retries = 3
while (state === 'requires_action_retry') {
if (retries > 0) {
retries -= 1
const newRunThread = await openai.beta.threads.runs.create(threadId, {
assistant_id: retrievedAssistant.id
})
runThreadId = newRunThread.id
state = await promise(threadId, newRunThread.id)
} else {
throw new Error(`Error processing thread: ${state}, Thread ID: ${threadId}`)
}
}
// List messages
const messages = await openai.beta.threads.messages.list(threadId)
const messageData = messages.data ?? []
@@ -245,12 +299,58 @@ class OpenAIAssistant_Agents implements INode {
if (!assistantMessages.length) return ''
let returnVal = ''
const fileAnnotations = []
for (let i = 0; i < assistantMessages[0].content.length; i += 1) {
if (assistantMessages[0].content[i].type === 'text') {
const content = assistantMessages[0].content[i] as MessageContentText
returnVal += content.text.value
//TODO: handle annotations
if (content.text.annotations) {
const message_content = content.text
const annotations = message_content.annotations
const dirPath = path.join(getUserHome(), '.flowise', 'openai-assistant')
// Iterate over the annotations and add footnotes
for (let index = 0; index < annotations.length; index++) {
const annotation = annotations[index]
let filePath = ''
// Gather citations based on annotation attributes
const file_citation = (annotation as OpenAI.Beta.Threads.Messages.MessageContentText.Text.FileCitation)
.file_citation
if (file_citation) {
const cited_file = await openai.files.retrieve(file_citation.file_id)
// eslint-disable-next-line no-useless-escape
const fileName = cited_file.filename.split(/[\/\\]/).pop() ?? cited_file.filename
filePath = path.join(getUserHome(), '.flowise', 'openai-assistant', fileName)
await downloadFile(cited_file, filePath, dirPath, openAIApiKey)
fileAnnotations.push({
filePath,
fileName
})
} else {
const file_path = (annotation as OpenAI.Beta.Threads.Messages.MessageContentText.Text.FilePath).file_path
if (file_path) {
const cited_file = await openai.files.retrieve(file_path.file_id)
// eslint-disable-next-line no-useless-escape
const fileName = cited_file.filename.split(/[\/\\]/).pop() ?? cited_file.filename
filePath = path.join(getUserHome(), '.flowise', 'openai-assistant', fileName)
await downloadFile(cited_file, filePath, dirPath, openAIApiKey)
fileAnnotations.push({
filePath,
fileName
})
}
}
// Replace the text with a footnote
message_content.value = message_content.value.replace(`${annotation.text}`, `${filePath}`)
}
returnVal += message_content.value
} else {
returnVal += content.text.value
}
} else {
const content = assistantMessages[0].content[i] as MessageContentImageFile
const fileId = content.image_file.file_id
@@ -258,7 +358,7 @@ class OpenAIAssistant_Agents implements INode {
const dirPath = path.join(getUserHome(), '.flowise', 'openai-assistant')
const filePath = path.join(getUserHome(), '.flowise', 'openai-assistant', `${fileObj.filename}.png`)
await downloadFile(fileObj, filePath, dirPath, openAIApiKey)
await downloadImg(openai, fileId, filePath, dirPath)
const bitmap = fsDefault.readFileSync(filePath)
const base64String = Buffer.from(bitmap).toString('base64')
@@ -271,7 +371,8 @@ class OpenAIAssistant_Agents implements INode {
return {
text: returnVal,
usedTools,
assistant: { assistantId: openAIAssistantId, threadId, runId: runThread.id, messages: messageData }
fileAnnotations,
assistant: { assistantId: openAIAssistantId, threadId, runId: runThreadId, messages: messageData }
}
} catch (error) {
throw new Error(error)
@@ -279,6 +380,22 @@ class OpenAIAssistant_Agents implements INode {
}
}
const downloadImg = async (openai: OpenAI, fileId: string, filePath: string, dirPath: string) => {
const response = await openai.files.content(fileId)
// Extract the binary data from the Response object
const image_data = await response.arrayBuffer()
// Convert the binary data to a Buffer
const image_data_buffer = Buffer.from(image_data)
// Save the image to a specific location
if (!fsDefault.existsSync(dirPath)) {
fsDefault.mkdirSync(path.dirname(filePath), { recursive: true })
}
fsDefault.writeFileSync(filePath, image_data_buffer)
}
const downloadFile = async (fileObj: any, filePath: string, dirPath: string, openAIApiKey: string) => {
try {
const response = await fetch(`https://api.openai.com/v1/files/${fileObj.id}/content`, {
@@ -0,0 +1,339 @@
import { INode, INodeData, INodeParams } from '../../../src/Interface'
import { getBaseClasses } from '../../../src/utils'
import { VectorDBQAChain } from 'langchain/chains'
import { Document } from 'langchain/document'
import { VectaraStore } from 'langchain/vectorstores/vectara'
import fetch from 'node-fetch'
// functionality based on https://github.com/vectara/vectara-answer
const reorderCitations = (unorderedSummary: string) => {
const allCitations = unorderedSummary.match(/\[\d+\]/g) || []
const uniqueCitations = [...new Set(allCitations)]
const citationToReplacement: { [key: string]: string } = {}
uniqueCitations.forEach((citation, index) => {
citationToReplacement[citation] = `[${index + 1}]`
})
return unorderedSummary.replace(/\[\d+\]/g, (match) => citationToReplacement[match])
}
const applyCitationOrder = (searchResults: any[], unorderedSummary: string) => {
const orderedSearchResults: any[] = []
const allCitations = unorderedSummary.match(/\[\d+\]/g) || []
const addedIndices = new Set<number>()
for (let i = 0; i < allCitations.length; i++) {
const citation = allCitations[i]
const index = Number(citation.slice(1, citation.length - 1)) - 1
if (addedIndices.has(index)) continue
orderedSearchResults.push(searchResults[index])
addedIndices.add(index)
}
return orderedSearchResults
}
class VectaraChain_Chains implements INode {
label: string
name: string
version: number
type: string
icon: string
category: string
baseClasses: string[]
description: string
inputs: INodeParams[]
constructor() {
this.label = 'Vectara QA Chain'
this.name = 'vectaraQAChain'
this.version = 1.0
this.type = 'VectaraQAChain'
this.icon = 'vectara.png'
this.category = 'Chains'
this.description = 'QA chain for Vectara'
this.baseClasses = [this.type, ...getBaseClasses(VectorDBQAChain)]
this.inputs = [
{
label: 'Vectara Store',
name: 'vectaraStore',
type: 'VectorStore'
},
{
label: 'Summarizer Prompt Name',
name: 'summarizerPromptName',
description:
'Summarize the results fetched from Vectara. Read <a target="_blank" href="https://docs.vectara.com/docs/learn/grounded-generation/select-a-summarizer">more</a>',
type: 'options',
options: [
{
label: 'vectara-summary-ext-v1.2.0 (gpt-3.5-turbo)',
name: 'vectara-summary-ext-v1.2.0'
},
{
label: 'vectara-experimental-summary-ext-2023-10-23-small (gpt-3.5-turbo)',
name: 'vectara-experimental-summary-ext-2023-10-23-small',
description: 'In beta, available to both Growth and Scale Vectara users'
},
{
label: 'vectara-summary-ext-v1.3.0 (gpt-4.0)',
name: 'vectara-summary-ext-v1.3.0',
description: 'Only available to paying Scale Vectara users'
},
{
label: 'vectara-experimental-summary-ext-2023-10-23-med (gpt-4.0)',
name: 'vectara-experimental-summary-ext-2023-10-23-med',
description: 'In beta, only available to paying Scale Vectara users'
}
],
default: 'vectara-summary-ext-v1.2.0'
},
{
label: 'Response Language',
name: 'responseLang',
description:
'Return the response in specific language. If not selected, Vectara will automatically detects the language. Read <a target="_blank" href="https://docs.vectara.com/docs/learn/grounded-generation/grounded-generation-response-languages">more</a>',
type: 'options',
options: [
{
label: 'English',
name: 'eng'
},
{
label: 'German',
name: 'deu'
},
{
label: 'French',
name: 'fra'
},
{
label: 'Chinese',
name: 'zho'
},
{
label: 'Korean',
name: 'kor'
},
{
label: 'Arabic',
name: 'ara'
},
{
label: 'Russian',
name: 'rus'
},
{
label: 'Thai',
name: 'tha'
},
{
label: 'Dutch',
name: 'nld'
},
{
label: 'Italian',
name: 'ita'
},
{
label: 'Portuguese',
name: 'por'
},
{
label: 'Spanish',
name: 'spa'
},
{
label: 'Japanese',
name: 'jpn'
},
{
label: 'Polish',
name: 'pol'
},
{
label: 'Turkish',
name: 'tur'
},
{
label: 'Vietnamese',
name: 'vie'
},
{
label: 'Indonesian',
name: 'ind'
},
{
label: 'Czech',
name: 'ces'
},
{
label: 'Ukrainian',
name: 'ukr'
},
{
label: 'Greek',
name: 'ell'
},
{
label: 'Hebrew',
name: 'heb'
},
{
label: 'Farsi/Persian',
name: 'fas'
},
{
label: 'Hindi',
name: 'hin'
},
{
label: 'Urdu',
name: 'urd'
},
{
label: 'Swedish',
name: 'swe'
},
{
label: 'Bengali',
name: 'ben'
},
{
label: 'Malay',
name: 'msa'
},
{
label: 'Romanian',
name: 'ron'
}
],
optional: true,
default: 'eng'
},
{
label: 'Max Summarized Results',
name: 'maxSummarizedResults',
description: 'Maximum results used to build the summarized response',
type: 'number',
default: 7
}
]
}
async init(): Promise<any> {
return null
}
async run(nodeData: INodeData, input: string): Promise<object> {
const vectorStore = nodeData.inputs?.vectaraStore as VectaraStore
const responseLang = (nodeData.inputs?.responseLang as string) ?? 'auto'
const summarizerPromptName = nodeData.inputs?.summarizerPromptName as string
const maxSummarizedResultsStr = nodeData.inputs?.maxSummarizedResults as string
const maxSummarizedResults = maxSummarizedResultsStr ? parseInt(maxSummarizedResultsStr, 10) : 7
const topK = (vectorStore as any)?.k ?? 10
const headers = await vectorStore.getJsonHeader()
const vectaraFilter = (vectorStore as any).vectaraFilter ?? {}
const corpusId: number[] = (vectorStore as any).corpusId ?? []
const customerId = (vectorStore as any).customerId ?? ''
const corpusKeys = corpusId.map((corpusId) => ({
customerId,
corpusId,
metadataFilter: vectaraFilter?.filter ?? '',
lexicalInterpolationConfig: { lambda: vectaraFilter?.lambda ?? 0.025 }
}))
const data = {
query: [
{
query: input,
start: 0,
numResults: topK,
contextConfig: {
sentencesAfter: vectaraFilter?.contextConfig?.sentencesAfter ?? 2,
sentencesBefore: vectaraFilter?.contextConfig?.sentencesBefore ?? 2
},
corpusKey: corpusKeys,
summary: [
{
summarizerPromptName,
responseLang,
maxSummarizedResults
}
]
}
]
}
try {
const response = await fetch(`https://api.vectara.io/v1/query`, {
method: 'POST',
headers: headers?.headers,
body: JSON.stringify(data)
})
if (response.status !== 200) {
throw new Error(`Vectara API returned status code ${response.status}`)
}
const result = await response.json()
const responses = result.responseSet[0].response
const documents = result.responseSet[0].document
let rawSummarizedText = ''
for (let i = 0; i < responses.length; i += 1) {
const responseMetadata = responses[i].metadata
const documentMetadata = documents[responses[i].documentIndex].metadata
const combinedMetadata: Record<string, unknown> = {}
responseMetadata.forEach((item: { name: string; value: unknown }) => {
combinedMetadata[item.name] = item.value
})
documentMetadata.forEach((item: { name: string; value: unknown }) => {
combinedMetadata[item.name] = item.value
})
responses[i].metadata = combinedMetadata
}
const summaryStatus = result.responseSet[0].summary[0].status
if (summaryStatus.length > 0 && summaryStatus[0].code === 'BAD_REQUEST') {
throw new Error(
`BAD REQUEST: Too much text for the summarizer to summarize. Please try reducing the number of search results to summarize, or the context of each result by adjusting the 'summary_num_sentences', and 'summary_num_results' parameters respectively.`
)
}
if (
summaryStatus.length > 0 &&
summaryStatus[0].code === 'NOT_FOUND' &&
summaryStatus[0].statusDetail === 'Failed to retrieve summarizer.'
) {
throw new Error(`BAD REQUEST: summarizer ${summarizerPromptName} is invalid for this account.`)
}
rawSummarizedText = result.responseSet[0].summary[0]?.text
let summarizedText = reorderCitations(rawSummarizedText)
let summaryResponses = applyCitationOrder(responses, rawSummarizedText)
const sourceDocuments: Document[] = summaryResponses.map(
(response: { text: string; metadata: Record<string, unknown>; score: number }) =>
new Document({
pageContent: response.text,
metadata: response.metadata
})
)
return { text: summarizedText, sourceDocuments: sourceDocuments }
} catch (error) {
throw new Error(error)
}
}
}
module.exports = { nodeClass: VectaraChain_Chains }
Binary file not shown.

After

Width:  |  Height:  |  Size: 66 KiB

@@ -19,7 +19,7 @@ class ChatAnthropic_ChatModels implements INode {
constructor() {
this.label = 'ChatAnthropic'
this.name = 'chatAnthropic'
this.version = 2.0
this.version = 3.0
this.type = 'ChatAnthropic'
this.icon = 'chatAnthropic.png'
this.category = 'Chat Models'
@@ -48,6 +48,11 @@ class ChatAnthropic_ChatModels implements INode {
name: 'claude-2',
description: 'Claude 2 latest major version, automatically get updates to the model as they are released'
},
{
label: 'claude-2.1',
name: 'claude-2.1',
description: 'Claude 2 latest full version'
},
{
label: 'claude-instant-1',
name: 'claude-instant-1',
@@ -0,0 +1,146 @@
import { getBaseClasses, getCredentialData, getCredentialParam, ICommonObject, INode, INodeData, INodeParams } from '../../../src'
import { MongoDBChatMessageHistory } from 'langchain/stores/message/mongodb'
import { BufferMemory, BufferMemoryInput } from 'langchain/memory'
import { BaseMessage, mapStoredMessageToChatMessage } from 'langchain/schema'
import { MongoClient } from 'mongodb'
class MongoDB_Memory implements INode {
label: string
name: string
version: number
description: string
type: string
icon: string
category: string
baseClasses: string[]
credential: INodeParams
inputs: INodeParams[]
constructor() {
this.label = 'MongoDB Atlas Chat Memory'
this.name = 'MongoDBAtlasChatMemory'
this.version = 1.0
this.type = 'MongoDBAtlasChatMemory'
this.icon = 'mongodb.png'
this.category = 'Memory'
this.description = 'Stores the conversation in MongoDB Atlas'
this.baseClasses = [this.type, ...getBaseClasses(BufferMemory)]
this.credential = {
label: 'Connect Credential',
name: 'credential',
type: 'credential',
credentialNames: ['mongoDBUrlApi']
}
this.inputs = [
{
label: 'Database',
name: 'databaseName',
placeholder: '<DB_NAME>',
type: 'string'
},
{
label: 'Collection Name',
name: 'collectionName',
placeholder: '<COLLECTION_NAME>',
type: 'string'
},
{
label: 'Session Id',
name: 'sessionId',
type: 'string',
description: 'If not specified, the first CHAT_MESSAGE_ID will be used as sessionId',
default: '',
additionalParams: true,
optional: true
},
{
label: 'Memory Key',
name: 'memoryKey',
type: 'string',
default: 'chat_history',
additionalParams: true
}
]
}
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
return initializeMongoDB(nodeData, options)
}
async clearSessionMemory(nodeData: INodeData, options: ICommonObject): Promise<void> {
const mongodbMemory = await initializeMongoDB(nodeData, options)
const sessionId = nodeData.inputs?.sessionId as string
const chatId = options?.chatId as string
options.logger.info(`Clearing MongoDB memory session ${sessionId ? sessionId : chatId}`)
await mongodbMemory.clear()
options.logger.info(`Successfully cleared MongoDB memory session ${sessionId ? sessionId : chatId}`)
}
}
const initializeMongoDB = async (nodeData: INodeData, options: ICommonObject): Promise<BufferMemory> => {
const databaseName = nodeData.inputs?.databaseName as string
const collectionName = nodeData.inputs?.collectionName as string
const sessionId = nodeData.inputs?.sessionId as string
const memoryKey = nodeData.inputs?.memoryKey as string
const chatId = options?.chatId as string
let isSessionIdUsingChatMessageId = false
if (!sessionId && chatId) isSessionIdUsingChatMessageId = true
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
let mongoDBConnectUrl = getCredentialParam('mongoDBConnectUrl', credentialData, nodeData)
const client = new MongoClient(mongoDBConnectUrl)
await client.connect()
const collection = client.db(databaseName).collection(collectionName)
const mongoDBChatMessageHistory = new MongoDBChatMessageHistory({
collection,
sessionId: sessionId ? sessionId : chatId
})
mongoDBChatMessageHistory.getMessages = async (): Promise<BaseMessage[]> => {
const document = await collection.findOne({
sessionId: (mongoDBChatMessageHistory as any).sessionId
})
const messages = document?.messages || []
return messages.map(mapStoredMessageToChatMessage)
}
mongoDBChatMessageHistory.addMessage = async (message: BaseMessage): Promise<void> => {
const messages = [message].map((msg) => msg.toDict())
await collection.updateOne(
{ sessionId: (mongoDBChatMessageHistory as any).sessionId },
{
$push: { messages: { $each: messages } }
},
{ upsert: true }
)
}
mongoDBChatMessageHistory.clear = async (): Promise<void> => {
await collection.deleteOne({ sessionId: (mongoDBChatMessageHistory as any).sessionId })
}
return new BufferMemoryExtended({
memoryKey,
chatHistory: mongoDBChatMessageHistory,
returnMessages: true,
isSessionIdUsingChatMessageId
})
}
interface BufferMemoryExtendedInput {
isSessionIdUsingChatMessageId: boolean
}
class BufferMemoryExtended extends BufferMemory {
isSessionIdUsingChatMessageId? = false
constructor(fields: BufferMemoryInput & Partial<BufferMemoryExtendedInput>) {
super(fields)
this.isSessionIdUsingChatMessageId = fields.isSessionIdUsingChatMessageId
}
}
module.exports = { nodeClass: MongoDB_Memory }
Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

@@ -146,13 +146,26 @@ export abstract class ElasticSearchBase {
} else if (cloudId) {
let username = getCredentialParam('username', credentialData, nodeData)
let password = getCredentialParam('password', credentialData, nodeData)
elasticSearchClientOptions = {
cloud: {
id: cloudId
},
auth: {
username: username,
password: password
if (cloudId.startsWith('http')) {
elasticSearchClientOptions = {
node: cloudId,
auth: {
username: username,
password: password
},
tls: {
rejectUnauthorized: false
}
}
} else {
elasticSearchClientOptions = {
cloud: {
id: cloudId
},
auth: {
username: username,
password: password
}
}
}
}
@@ -50,7 +50,7 @@ class ElasicsearchUpsert_VectorStores extends ElasticSearchBase implements INode
delete d.metadata.loc
})
// end of workaround
return super.init(nodeData, _, options, flattenDocs)
return super.init(nodeData, _, options, finalDocs)
}
}
@@ -0,0 +1,145 @@
import {
getBaseClasses,
getCredentialData,
getCredentialParam,
ICommonObject,
INodeData,
INodeOutputsValue,
INodeParams
} from '../../../src'
import { Embeddings } from 'langchain/embeddings/base'
import { VectorStore } from 'langchain/vectorstores/base'
import { Document } from 'langchain/document'
import { MongoDBAtlasVectorSearch } from 'langchain/vectorstores/mongodb_atlas'
import { Collection, MongoClient } from 'mongodb'
export abstract class MongoDBSearchBase {
label: string
name: string
version: number
description: string
type: string
icon: string
category: string
baseClasses: string[]
inputs: INodeParams[]
credential: INodeParams
outputs: INodeOutputsValue[]
mongoClient: MongoClient
protected constructor() {
this.type = 'MongoDB Atlas'
this.icon = 'mongodb.png'
this.category = 'Vector Stores'
this.baseClasses = [this.type, 'VectorStoreRetriever', 'BaseRetriever']
this.credential = {
label: 'Connect Credential',
name: 'credential',
type: 'credential',
credentialNames: ['mongoDBUrlApi']
}
this.inputs = [
{
label: 'Embeddings',
name: 'embeddings',
type: 'Embeddings'
},
{
label: 'Database',
name: 'databaseName',
placeholder: '<DB_NAME>',
type: 'string'
},
{
label: 'Collection Name',
name: 'collectionName',
placeholder: '<COLLECTION_NAME>',
type: 'string'
},
{
label: 'Index Name',
name: 'indexName',
placeholder: '<VECTOR_INDEX_NAME>',
type: 'string'
},
{
label: 'Content Field',
name: 'textKey',
description: 'Name of the field (column) that contains the actual content',
type: 'string',
default: 'text',
additionalParams: true,
optional: true
},
{
label: 'Embedded Field',
name: 'embeddingKey',
description: 'Name of the field (column) that contains the Embedding',
type: 'string',
default: 'embedding',
additionalParams: true,
optional: true
},
{
label: 'Top K',
name: 'topK',
description: 'Number of top results to fetch. Default to 4',
placeholder: '4',
type: 'number',
additionalParams: true,
optional: true
}
]
this.outputs = [
{
label: 'MongoDB Retriever',
name: 'retriever',
baseClasses: this.baseClasses
},
{
label: 'MongoDB Vector Store',
name: 'vectorStore',
baseClasses: [this.type, ...getBaseClasses(MongoDBAtlasVectorSearch)]
}
]
}
abstract constructVectorStore(
embeddings: Embeddings,
collection: Collection,
indexName: string,
textKey: string,
embeddingKey: string,
docs: Document<Record<string, any>>[] | undefined
): Promise<VectorStore>
async init(nodeData: INodeData, _: string, options: ICommonObject, docs: Document<Record<string, any>>[] | undefined): Promise<any> {
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
const databaseName = nodeData.inputs?.databaseName as string
const collectionName = nodeData.inputs?.collectionName as string
const indexName = nodeData.inputs?.indexName as string
let textKey = nodeData.inputs?.textKey as string
let embeddingKey = nodeData.inputs?.embeddingKey as string
const embeddings = nodeData.inputs?.embeddings as Embeddings
const topK = nodeData.inputs?.topK as string
const k = topK ? parseFloat(topK) : 4
const output = nodeData.outputs?.output as string
let mongoDBConnectUrl = getCredentialParam('mongoDBConnectUrl', credentialData, nodeData)
this.mongoClient = new MongoClient(mongoDBConnectUrl)
const collection = this.mongoClient.db(databaseName).collection(collectionName)
if (!textKey || textKey === '') textKey = 'text'
if (!embeddingKey || embeddingKey === '') embeddingKey = 'embedding'
const vectorStore = await this.constructVectorStore(embeddings, collection, indexName, textKey, embeddingKey, docs)
if (output === 'retriever') {
return vectorStore.asRetriever(k)
} else if (output === 'vectorStore') {
;(vectorStore as any).k = k
return vectorStore
}
return vectorStore
}
}
@@ -0,0 +1,39 @@
import { Collection } from 'mongodb'
import { MongoDBAtlasVectorSearch } from 'langchain/vectorstores/mongodb_atlas'
import { Embeddings } from 'langchain/embeddings/base'
import { VectorStore } from 'langchain/vectorstores/base'
import { Document } from 'langchain/document'
import { MongoDBSearchBase } from './MongoDBSearchBase'
import { ICommonObject, INode, INodeData } from '../../../src/Interface'
class MongoDBExisting_VectorStores extends MongoDBSearchBase implements INode {
constructor() {
super()
this.label = 'MongoDB Atlas Load Existing Index'
this.name = 'MongoDBIndex'
this.version = 1.0
this.description = 'Load existing data from MongoDB Atlas (i.e: Document has been upserted)'
}
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
return super.init(nodeData, _, options, undefined)
}
async constructVectorStore(
embeddings: Embeddings,
collection: Collection,
indexName: string,
textKey: string,
embeddingKey: string,
_: Document<Record<string, any>>[] | undefined
): Promise<VectorStore> {
return new MongoDBAtlasVectorSearch(embeddings, {
collection: collection,
indexName: indexName,
textKey: textKey,
embeddingKey: embeddingKey
})
}
}
module.exports = { nodeClass: MongoDBExisting_VectorStores }
@@ -0,0 +1,59 @@
import { flatten } from 'lodash'
import { Collection } from 'mongodb'
import { Embeddings } from 'langchain/embeddings/base'
import { Document } from 'langchain/document'
import { VectorStore } from 'langchain/vectorstores/base'
import { MongoDBAtlasVectorSearch } from 'langchain/vectorstores/mongodb_atlas'
import { ICommonObject, INode, INodeData } from '../../../src/Interface'
import { MongoDBSearchBase } from './MongoDBSearchBase'
class MongoDBUpsert_VectorStores extends MongoDBSearchBase implements INode {
constructor() {
super()
this.label = 'MongoDB Upsert Document'
this.name = 'MongoDBUpsert'
this.version = 1.0
this.description = 'Upsert documents to MongoDB Atlas'
this.inputs.unshift({
label: 'Document',
name: 'document',
type: 'Document',
list: true
})
}
async constructVectorStore(
embeddings: Embeddings,
collection: Collection,
indexName: string,
textKey: string,
embeddingKey: string,
docs: Document<Record<string, any>>[]
): Promise<VectorStore> {
const mongoDBAtlasVectorSearch = new MongoDBAtlasVectorSearch(embeddings, {
collection: collection,
indexName: indexName,
textKey: textKey,
embeddingKey: embeddingKey
})
await mongoDBAtlasVectorSearch.addDocuments(docs)
return mongoDBAtlasVectorSearch
}
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
const docs = nodeData.inputs?.document as Document[]
const flattenDocs = docs && docs.length ? flatten(docs) : []
const finalDocs = []
for (let i = 0; i < flattenDocs.length; i += 1) {
if (flattenDocs[i] && flattenDocs[i].pageContent) {
const document = new Document(flattenDocs[i])
finalDocs.push(document)
}
}
return super.init(nodeData, _, options, finalDocs)
}
}
module.exports = { nodeClass: MongoDBUpsert_VectorStores }
Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB