Merge pull request #913 from vectara/vectara-upload-files

Add Vectara upload file component
This commit is contained in:
Henry Heng
2023-09-15 16:00:41 +01:00
committed by GitHub
9 changed files with 335 additions and 222 deletions
@@ -1,7 +1,7 @@
import { INode, INodeData, INodeParams } from '../../../src/Interface'
import { BabyAGI } from './core'
import { BaseChatModel } from 'langchain/chat_models/base'
import { VectorStore } from 'langchain/vectorstores'
import { VectorStore } from 'langchain/vectorstores/base'
class BabyAGI_Agents implements INode {
label: string
@@ -2,7 +2,7 @@ import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Inter
import { getBaseClasses } from '../../../src/utils'
import { VectorDBQAChain } from 'langchain/chains'
import { BaseLanguageModel } from 'langchain/base_language'
import { VectorStore } from 'langchain/vectorstores'
import { VectorStore } from 'langchain/vectorstores/base'
import { ConsoleCallbackHandler, CustomChainHandler, additionalCallbacks } from '../../../src/handler'
class VectorDBQAChain_Chains implements INode {
@@ -92,7 +92,7 @@ class VectaraExisting_VectorStores implements INode {
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
const apiKey = getCredentialParam('apiKey', credentialData, nodeData)
const customerId = getCredentialParam('customerID', credentialData, nodeData)
const corpusId = getCredentialParam('corpusID', credentialData, nodeData)
const corpusId = getCredentialParam('corpusID', credentialData, nodeData).split(',')
const vectaraMetadataFilter = nodeData.inputs?.filter as string
const sentencesBefore = nodeData.inputs?.sentencesBefore as number
@@ -0,0 +1,176 @@
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../src/utils'
import { VectaraStore, VectaraLibArgs, VectaraFilter, VectaraContextConfig, VectaraFile } from 'langchain/vectorstores/vectara'
class VectaraUpload_VectorStores implements INode {
label: string
name: string
version: number
description: string
type: string
icon: string
category: string
baseClasses: string[]
inputs: INodeParams[]
credential: INodeParams
outputs: INodeOutputsValue[]
constructor() {
this.label = 'Vectara Upload File'
this.name = 'vectaraUpload'
this.version = 1.0
this.type = 'Vectara'
this.icon = 'vectara.png'
this.category = 'Vector Stores'
this.description = 'Upload files to Vectara'
this.baseClasses = [this.type, 'VectorStoreRetriever', 'BaseRetriever']
this.credential = {
label: 'Connect Credential',
name: 'credential',
type: 'credential',
credentialNames: ['vectaraApi']
}
this.inputs = [
{
label: 'File',
name: 'file',
description:
'File to upload to Vectara. Supported file types: https://docs.vectara.com/docs/api-reference/indexing-apis/file-upload/file-upload-filetypes',
type: 'file'
},
{
label: 'Vectara Metadata Filter',
name: 'filter',
description:
'Filter to apply to Vectara metadata. Refer to the <a target="_blank" href="https://docs.flowiseai.com/vector-stores/vectara">documentation</a> on how to use Vectara filters with Flowise.',
type: 'string',
additionalParams: true,
optional: true
},
{
label: 'Sentences Before',
name: 'sentencesBefore',
description: 'Number of sentences to fetch before the matched sentence. Defaults to 2.',
type: 'number',
additionalParams: true,
optional: true
},
{
label: 'Sentences After',
name: 'sentencesAfter',
description: 'Number of sentences to fetch after the matched sentence. Defaults to 2.',
type: 'number',
additionalParams: true,
optional: true
},
{
label: 'Lambda',
name: 'lambda',
description:
'Improves retrieval accuracy by adjusting the balance (from 0 to 1) between neural search and keyword-based search factors.',
type: 'number',
additionalParams: true,
optional: true
},
{
label: 'Top K',
name: 'topK',
description: 'Number of top results to fetch. Defaults to 4',
placeholder: '4',
type: 'number',
additionalParams: true,
optional: true
}
]
this.outputs = [
{
label: 'Vectara Retriever',
name: 'retriever',
baseClasses: this.baseClasses
},
{
label: 'Vectara Vector Store',
name: 'vectorStore',
baseClasses: [this.type, ...getBaseClasses(VectaraStore)]
}
]
}
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
const apiKey = getCredentialParam('apiKey', credentialData, nodeData)
const customerId = getCredentialParam('customerID', credentialData, nodeData)
const corpusId = getCredentialParam('corpusID', credentialData, nodeData).split(',')
const fileBase64 = nodeData.inputs?.file
const vectaraMetadataFilter = nodeData.inputs?.filter as string
const sentencesBefore = nodeData.inputs?.sentencesBefore as number
const sentencesAfter = nodeData.inputs?.sentencesAfter as number
const lambda = nodeData.inputs?.lambda as number
const output = nodeData.outputs?.output as string
const topK = nodeData.inputs?.topK as string
const k = topK ? parseInt(topK, 10) : 4
const vectaraArgs: VectaraLibArgs = {
apiKey: apiKey,
customerId: customerId,
corpusId: corpusId
}
const vectaraFilter: VectaraFilter = {}
if (vectaraMetadataFilter) vectaraFilter.filter = vectaraMetadataFilter
if (lambda) vectaraFilter.lambda = lambda
const vectaraContextConfig: VectaraContextConfig = {}
if (sentencesBefore) vectaraContextConfig.sentencesBefore = sentencesBefore
if (sentencesAfter) vectaraContextConfig.sentencesAfter = sentencesAfter
vectaraFilter.contextConfig = vectaraContextConfig
let files: string[] = []
if (fileBase64.startsWith('[') && fileBase64.endsWith(']')) {
files = JSON.parse(fileBase64)
} else {
files = [fileBase64]
}
const vectaraFiles: VectaraFile[] = []
for (const file of files) {
const splitDataURI = file.split(',')
splitDataURI.pop()
const bf = Buffer.from(splitDataURI.pop() || '', 'base64')
const blob = new Blob([bf])
vectaraFiles.push({ blob: blob, fileName: getFileName(file) })
}
const vectorStore = new VectaraStore(vectaraArgs)
await vectorStore.addFiles(vectaraFiles)
if (output === 'retriever') {
const retriever = vectorStore.asRetriever(k, vectaraFilter)
return retriever
} else if (output === 'vectorStore') {
;(vectorStore as any).k = k
return vectorStore
}
return vectorStore
}
}
const getFileName = (fileBase64: string) => {
let fileNames = []
if (fileBase64.startsWith('[') && fileBase64.endsWith(']')) {
const files = JSON.parse(fileBase64)
for (const file of files) {
const splitDataURI = file.split(',')
const filename = splitDataURI[splitDataURI.length - 1].split(':')[1]
fileNames.push(filename)
}
return fileNames.join(', ')
} else {
const splitDataURI = fileBase64.split(',')
const filename = splitDataURI[splitDataURI.length - 1].split(':')[1]
return filename
}
}
module.exports = { nodeClass: VectaraUpload_VectorStores }
@@ -101,7 +101,7 @@ class VectaraUpsert_VectorStores implements INode {
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
const apiKey = getCredentialParam('apiKey', credentialData, nodeData)
const customerId = getCredentialParam('customerID', credentialData, nodeData)
const corpusId = getCredentialParam('corpusID', credentialData, nodeData)
const corpusId = getCredentialParam('corpusID', credentialData, nodeData).split(',')
const docs = nodeData.inputs?.document as Document[]
const embeddings = {} as Embeddings

Before

Width:  |  Height:  |  Size: 66 KiB

After

Width:  |  Height:  |  Size: 66 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 66 KiB

+1 -1
View File
@@ -42,7 +42,7 @@
"google-auth-library": "^9.0.0",
"graphql": "^16.6.0",
"html-to-text": "^9.0.5",
"langchain": "^0.0.145",
"langchain": "^0.0.147",
"langfuse-langchain": "^1.0.14-alpha.0",
"langsmith": "^0.0.32",
"linkifyjs": "^4.1.1",