mirror of
https://github.com/farcasclaudiu/Flowise.git
synced 2026-06-28 17:01:00 +03:00
Feat/aws kendra vector search (#5088)
* Add AWS Kendra vector store integration * Fix import paths in Kendra vector store * Add AWS Kendra dependencies to package.json * Update pnpm-lock.yaml with AWS Kendra dependencies * Fix linter warnings in Kendra vector store * Fix code formatting in Kendra vector store * Update pnpm-lock.yaml --------- Co-authored-by: Henry Heng <henryheng@flowiseai.com> Co-authored-by: Henry <hzj94@hotmail.com>
This commit is contained in:
@@ -0,0 +1,293 @@
|
|||||||
|
import { flatten } from 'lodash'
|
||||||
|
import { AmazonKendraRetriever } from '@langchain/aws'
|
||||||
|
import { KendraClient, BatchPutDocumentCommand, BatchDeleteDocumentCommand } from '@aws-sdk/client-kendra'
|
||||||
|
import { Document } from '@langchain/core/documents'
|
||||||
|
import { ICommonObject, INode, INodeData, INodeOptionsValue, INodeOutputsValue, INodeParams, IndexingResult } from '../../../src/Interface'
|
||||||
|
import { FLOWISE_CHATID, getCredentialData, getCredentialParam } from '../../../src/utils'
|
||||||
|
import { howToUseFileUpload } from '../VectorStoreUtils'
|
||||||
|
import { MODEL_TYPE, getRegions } from '../../../src/modelLoader'
|
||||||
|
|
||||||
|
class Kendra_VectorStores implements INode {
|
||||||
|
label: string
|
||||||
|
name: string
|
||||||
|
version: number
|
||||||
|
description: string
|
||||||
|
type: string
|
||||||
|
icon: string
|
||||||
|
category: string
|
||||||
|
badge: string
|
||||||
|
baseClasses: string[]
|
||||||
|
inputs: INodeParams[]
|
||||||
|
credential: INodeParams
|
||||||
|
outputs: INodeOutputsValue[]
|
||||||
|
|
||||||
|
constructor() {
|
||||||
|
this.label = 'AWS Kendra'
|
||||||
|
this.name = 'kendra'
|
||||||
|
this.version = 1.0
|
||||||
|
this.type = 'Kendra'
|
||||||
|
this.icon = 'kendra.svg'
|
||||||
|
this.category = 'Vector Stores'
|
||||||
|
this.description = `Use AWS Kendra's intelligent search service for document retrieval and semantic search`
|
||||||
|
this.baseClasses = [this.type, 'VectorStoreRetriever', 'BaseRetriever']
|
||||||
|
this.credential = {
|
||||||
|
label: 'AWS Credential',
|
||||||
|
name: 'credential',
|
||||||
|
type: 'credential',
|
||||||
|
credentialNames: ['awsApi'],
|
||||||
|
optional: true
|
||||||
|
}
|
||||||
|
this.inputs = [
|
||||||
|
{
|
||||||
|
label: 'Document',
|
||||||
|
name: 'document',
|
||||||
|
type: 'Document',
|
||||||
|
list: true,
|
||||||
|
optional: true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: 'Region',
|
||||||
|
name: 'region',
|
||||||
|
type: 'asyncOptions',
|
||||||
|
loadMethod: 'listRegions',
|
||||||
|
default: 'us-east-1'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: 'Kendra Index ID',
|
||||||
|
name: 'indexId',
|
||||||
|
type: 'string',
|
||||||
|
placeholder: 'xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx',
|
||||||
|
description: 'The ID of your AWS Kendra index'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: 'File Upload',
|
||||||
|
name: 'fileUpload',
|
||||||
|
description: 'Allow file upload on the chat',
|
||||||
|
hint: {
|
||||||
|
label: 'How to use',
|
||||||
|
value: howToUseFileUpload
|
||||||
|
},
|
||||||
|
type: 'boolean',
|
||||||
|
additionalParams: true,
|
||||||
|
optional: true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: 'Top K',
|
||||||
|
name: 'topK',
|
||||||
|
description: 'Number of top results to fetch. Default to 10',
|
||||||
|
placeholder: '10',
|
||||||
|
type: 'number',
|
||||||
|
additionalParams: true,
|
||||||
|
optional: true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: 'Attribute Filter',
|
||||||
|
name: 'attributeFilter',
|
||||||
|
description: 'Optional filter to apply when retrieving documents',
|
||||||
|
type: 'json',
|
||||||
|
optional: true,
|
||||||
|
additionalParams: true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
// Note: Kendra doesn't support MMR search, but keeping the structure consistent
|
||||||
|
this.outputs = [
|
||||||
|
{
|
||||||
|
label: 'Kendra Retriever',
|
||||||
|
name: 'retriever',
|
||||||
|
baseClasses: this.baseClasses
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: 'Kendra Vector Store',
|
||||||
|
name: 'vectorStore',
|
||||||
|
baseClasses: [this.type, 'BaseRetriever']
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
loadMethods = {
|
||||||
|
async listRegions(): Promise<INodeOptionsValue[]> {
|
||||||
|
return await getRegions(MODEL_TYPE.CHAT, 'awsChatBedrock')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//@ts-ignore
|
||||||
|
vectorStoreMethods = {
|
||||||
|
async upsert(nodeData: INodeData, options: ICommonObject): Promise<Partial<IndexingResult>> {
|
||||||
|
const indexId = nodeData.inputs?.indexId as string
|
||||||
|
const region = nodeData.inputs?.region as string
|
||||||
|
const docs = nodeData.inputs?.document as Document[]
|
||||||
|
const isFileUploadEnabled = nodeData.inputs?.fileUpload as boolean
|
||||||
|
|
||||||
|
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
|
||||||
|
let clientConfig: any = { region }
|
||||||
|
|
||||||
|
if (credentialData && Object.keys(credentialData).length !== 0) {
|
||||||
|
const accessKeyId = getCredentialParam('awsKey', credentialData, nodeData)
|
||||||
|
const secretAccessKey = getCredentialParam('awsSecret', credentialData, nodeData)
|
||||||
|
const sessionToken = getCredentialParam('awsSession', credentialData, nodeData)
|
||||||
|
|
||||||
|
if (accessKeyId && secretAccessKey) {
|
||||||
|
clientConfig.credentials = {
|
||||||
|
accessKeyId,
|
||||||
|
secretAccessKey,
|
||||||
|
...(sessionToken && { sessionToken })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const client = new KendraClient(clientConfig)
|
||||||
|
|
||||||
|
const flattenDocs = docs && docs.length ? flatten(docs) : []
|
||||||
|
const finalDocs = []
|
||||||
|
const kendraDocuments = []
|
||||||
|
|
||||||
|
for (let i = 0; i < flattenDocs.length; i += 1) {
|
||||||
|
if (flattenDocs[i] && flattenDocs[i].pageContent) {
|
||||||
|
if (isFileUploadEnabled && options.chatId) {
|
||||||
|
flattenDocs[i].metadata = { ...flattenDocs[i].metadata, [FLOWISE_CHATID]: options.chatId }
|
||||||
|
}
|
||||||
|
finalDocs.push(new Document(flattenDocs[i]))
|
||||||
|
|
||||||
|
// Prepare document for Kendra
|
||||||
|
const docId = `doc_${Date.now()}_${i}`
|
||||||
|
const docTitle = flattenDocs[i].metadata?.title || flattenDocs[i].metadata?.source || `Document ${i + 1}`
|
||||||
|
|
||||||
|
kendraDocuments.push({
|
||||||
|
Id: docId,
|
||||||
|
Title: docTitle,
|
||||||
|
Blob: new Uint8Array(Buffer.from(flattenDocs[i].pageContent, 'utf-8')),
|
||||||
|
ContentType: 'PLAIN_TEXT' as any
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
if (kendraDocuments.length > 0) {
|
||||||
|
// Kendra has a limit of 10 documents per batch
|
||||||
|
const batchSize = 10
|
||||||
|
for (let i = 0; i < kendraDocuments.length; i += batchSize) {
|
||||||
|
const batch = kendraDocuments.slice(i, i + batchSize)
|
||||||
|
const command = new BatchPutDocumentCommand({
|
||||||
|
IndexId: indexId,
|
||||||
|
Documents: batch
|
||||||
|
})
|
||||||
|
|
||||||
|
const response = await client.send(command)
|
||||||
|
|
||||||
|
if (response.FailedDocuments && response.FailedDocuments.length > 0) {
|
||||||
|
console.error('Failed documents:', response.FailedDocuments)
|
||||||
|
throw new Error(`Failed to index some documents: ${JSON.stringify(response.FailedDocuments)}`)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return { numAdded: finalDocs.length, addedDocs: finalDocs }
|
||||||
|
} catch (error) {
|
||||||
|
throw new Error(`Failed to index documents to Kendra: ${error}`)
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
async delete(nodeData: INodeData, ids: string[], options: ICommonObject): Promise<void> {
|
||||||
|
const indexId = nodeData.inputs?.indexId as string
|
||||||
|
const region = nodeData.inputs?.region as string
|
||||||
|
|
||||||
|
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
|
||||||
|
let clientConfig: any = { region }
|
||||||
|
|
||||||
|
if (credentialData && Object.keys(credentialData).length !== 0) {
|
||||||
|
const accessKeyId = getCredentialParam('awsKey', credentialData, nodeData)
|
||||||
|
const secretAccessKey = getCredentialParam('awsSecret', credentialData, nodeData)
|
||||||
|
const sessionToken = getCredentialParam('awsSession', credentialData, nodeData)
|
||||||
|
|
||||||
|
if (accessKeyId && secretAccessKey) {
|
||||||
|
clientConfig.credentials = {
|
||||||
|
accessKeyId,
|
||||||
|
secretAccessKey,
|
||||||
|
...(sessionToken && { sessionToken })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const client = new KendraClient(clientConfig)
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Kendra has a limit of 10 documents per batch delete
|
||||||
|
const batchSize = 10
|
||||||
|
for (let i = 0; i < ids.length; i += batchSize) {
|
||||||
|
const batch = ids.slice(i, i + batchSize)
|
||||||
|
const command = new BatchDeleteDocumentCommand({
|
||||||
|
IndexId: indexId,
|
||||||
|
DocumentIdList: batch
|
||||||
|
})
|
||||||
|
await client.send(command)
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
throw new Error(`Failed to delete documents from Kendra: ${error}`)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
|
||||||
|
const indexId = nodeData.inputs?.indexId as string
|
||||||
|
const region = nodeData.inputs?.region as string
|
||||||
|
const topK = nodeData.inputs?.topK as string
|
||||||
|
const attributeFilter = nodeData.inputs?.attributeFilter
|
||||||
|
const isFileUploadEnabled = nodeData.inputs?.fileUpload as boolean
|
||||||
|
|
||||||
|
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
|
||||||
|
let clientOptions: any = {}
|
||||||
|
|
||||||
|
if (credentialData && Object.keys(credentialData).length !== 0) {
|
||||||
|
clientOptions.credentials = {
|
||||||
|
accessKeyId: getCredentialParam('awsKey', credentialData, nodeData),
|
||||||
|
secretAccessKey: getCredentialParam('awsSecret', credentialData, nodeData),
|
||||||
|
sessionToken: getCredentialParam('awsSession', credentialData, nodeData)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let filter = undefined
|
||||||
|
if (attributeFilter) {
|
||||||
|
filter = typeof attributeFilter === 'object' ? attributeFilter : JSON.parse(attributeFilter)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add chat-specific filtering if file upload is enabled
|
||||||
|
if (isFileUploadEnabled && options.chatId) {
|
||||||
|
if (!filter) {
|
||||||
|
filter = {}
|
||||||
|
}
|
||||||
|
filter.OrAllFilters = [
|
||||||
|
...(filter.OrAllFilters || []),
|
||||||
|
{
|
||||||
|
EqualsTo: {
|
||||||
|
Key: FLOWISE_CHATID,
|
||||||
|
Value: {
|
||||||
|
StringValue: options.chatId
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
const retriever = new AmazonKendraRetriever({
|
||||||
|
topK: topK ? parseInt(topK) : 10,
|
||||||
|
indexId,
|
||||||
|
region,
|
||||||
|
attributeFilter: filter,
|
||||||
|
clientOptions
|
||||||
|
})
|
||||||
|
|
||||||
|
const output = nodeData.outputs?.output as string
|
||||||
|
|
||||||
|
if (output === 'retriever') {
|
||||||
|
return retriever
|
||||||
|
} else if (output === 'vectorStore') {
|
||||||
|
// Kendra doesn't have a traditional vector store interface,
|
||||||
|
// but we can return the retriever with additional properties
|
||||||
|
;(retriever as any).k = topK ? parseInt(topK) : 10
|
||||||
|
;(retriever as any).filter = filter
|
||||||
|
return retriever
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = { nodeClass: Kendra_VectorStores }
|
||||||
@@ -0,0 +1,31 @@
|
|||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 80 80">
|
||||||
|
<defs>
|
||||||
|
<linearGradient id="kendraGradient" x1="0%" y1="0%" x2="100%" y2="100%">
|
||||||
|
<stop offset="0%" style="stop-color:#FF9900;stop-opacity:1" />
|
||||||
|
<stop offset="100%" style="stop-color:#FF6600;stop-opacity:1" />
|
||||||
|
</linearGradient>
|
||||||
|
</defs>
|
||||||
|
<!-- Background -->
|
||||||
|
<rect width="80" height="80" rx="4" fill="#232F3E"/>
|
||||||
|
|
||||||
|
<!-- AWS Kendra Icon representation -->
|
||||||
|
<!-- Main search/document icon -->
|
||||||
|
<g transform="translate(42, 35) scale(1.5)">
|
||||||
|
<!-- Document stack -->
|
||||||
|
<rect x="-18" y="-12" width="24" height="28" rx="2" fill="#FF9900" opacity="0.3" transform="translate(3, -3)"/>
|
||||||
|
<rect x="-18" y="-12" width="24" height="28" rx="2" fill="#FF9900" opacity="0.6" transform="translate(1.5, -1.5)"/>
|
||||||
|
<rect x="-18" y="-12" width="24" height="28" rx="2" fill="url(#kendraGradient)"/>
|
||||||
|
|
||||||
|
<!-- Search lines on document -->
|
||||||
|
<line x1="-12" y1="-4" x2="0" y2="-4" stroke="white" stroke-width="2" stroke-linecap="round"/>
|
||||||
|
<line x1="-12" y1="2" x2="-2" y2="2" stroke="white" stroke-width="2" stroke-linecap="round"/>
|
||||||
|
<line x1="-12" y1="8" x2="0" y2="8" stroke="white" stroke-width="2" stroke-linecap="round"/>
|
||||||
|
|
||||||
|
<!-- Magnifying glass -->
|
||||||
|
<circle cx="10" cy="5" r="7" fill="none" stroke="white" stroke-width="2.5"/>
|
||||||
|
<line x1="15" y1="10" x2="20" y2="15" stroke="white" stroke-width="2.5" stroke-linecap="round"/>
|
||||||
|
</g>
|
||||||
|
<!-- AWS Kendra text (optional, small) -->
|
||||||
|
<text x="40" y="74" font-family="Arial, sans-serif" font-size="14" fill="#FF9900" text-anchor="middle">Kendra</text>
|
||||||
|
|
||||||
|
</svg>
|
||||||
|
After Width: | Height: | Size: 1.6 KiB |
@@ -25,6 +25,7 @@
|
|||||||
"@arizeai/openinference-instrumentation-langchain": "^2.0.0",
|
"@arizeai/openinference-instrumentation-langchain": "^2.0.0",
|
||||||
"@aws-sdk/client-bedrock-runtime": "3.422.0",
|
"@aws-sdk/client-bedrock-runtime": "3.422.0",
|
||||||
"@aws-sdk/client-dynamodb": "^3.360.0",
|
"@aws-sdk/client-dynamodb": "^3.360.0",
|
||||||
|
"@aws-sdk/client-kendra": "^3.750.0",
|
||||||
"@aws-sdk/client-s3": "^3.844.0",
|
"@aws-sdk/client-s3": "^3.844.0",
|
||||||
"@aws-sdk/client-secrets-manager": "^3.699.0",
|
"@aws-sdk/client-secrets-manager": "^3.699.0",
|
||||||
"@aws-sdk/client-sns": "^3.699.0",
|
"@aws-sdk/client-sns": "^3.699.0",
|
||||||
|
|||||||
Generated
+307
-314
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user