mirror of
https://github.com/farcasclaudiu/Flowise.git
synced 2026-06-28 17:01:00 +03:00
Merge branch 'main' into chore/Upgrade-LC-version
# Conflicts: # packages/components/package.json
This commit is contained in:
@@ -0,0 +1,383 @@
|
||||
import {
|
||||
BaseNode,
|
||||
Document,
|
||||
Metadata,
|
||||
VectorStore,
|
||||
VectorStoreQuery,
|
||||
VectorStoreQueryResult,
|
||||
serviceContextFromDefaults,
|
||||
storageContextFromDefaults,
|
||||
VectorStoreIndex,
|
||||
BaseEmbedding
|
||||
} from 'llamaindex'
|
||||
import { FetchResponse, Index, Pinecone, ScoredPineconeRecord } from '@pinecone-database/pinecone'
|
||||
import { flatten } from 'lodash'
|
||||
import { Document as LCDocument } from 'langchain/document'
|
||||
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
|
||||
import { flattenObject, getCredentialData, getCredentialParam } from '../../../src/utils'
|
||||
|
||||
class PineconeLlamaIndex_VectorStores implements INode {
|
||||
label: string
|
||||
name: string
|
||||
version: number
|
||||
description: string
|
||||
type: string
|
||||
icon: string
|
||||
category: string
|
||||
tags: string[]
|
||||
baseClasses: string[]
|
||||
inputs: INodeParams[]
|
||||
credential: INodeParams
|
||||
outputs: INodeOutputsValue[]
|
||||
|
||||
constructor() {
|
||||
this.label = 'Pinecone'
|
||||
this.name = 'pineconeLlamaIndex'
|
||||
this.version = 1.0
|
||||
this.type = 'Pinecone'
|
||||
this.icon = 'pinecone.svg'
|
||||
this.category = 'Vector Stores'
|
||||
this.description = `Upsert embedded data and perform similarity search upon query using Pinecone, a leading fully managed hosted vector database`
|
||||
this.baseClasses = [this.type, 'VectorIndexRetriever']
|
||||
this.tags = ['LlamaIndex']
|
||||
this.credential = {
|
||||
label: 'Connect Credential',
|
||||
name: 'credential',
|
||||
type: 'credential',
|
||||
credentialNames: ['pineconeApi']
|
||||
}
|
||||
this.inputs = [
|
||||
{
|
||||
label: 'Document',
|
||||
name: 'document',
|
||||
type: 'Document',
|
||||
list: true,
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
label: 'Chat Model',
|
||||
name: 'model',
|
||||
type: 'BaseChatModel_LlamaIndex'
|
||||
},
|
||||
{
|
||||
label: 'Embeddings',
|
||||
name: 'embeddings',
|
||||
type: 'BaseEmbedding_LlamaIndex'
|
||||
},
|
||||
{
|
||||
label: 'Pinecone Index',
|
||||
name: 'pineconeIndex',
|
||||
type: 'string'
|
||||
},
|
||||
{
|
||||
label: 'Pinecone Namespace',
|
||||
name: 'pineconeNamespace',
|
||||
type: 'string',
|
||||
placeholder: 'my-first-namespace',
|
||||
additionalParams: true,
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
label: 'Pinecone Metadata Filter',
|
||||
name: 'pineconeMetadataFilter',
|
||||
type: 'json',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
},
|
||||
{
|
||||
label: 'Top K',
|
||||
name: 'topK',
|
||||
description: 'Number of top results to fetch. Default to 4',
|
||||
placeholder: '4',
|
||||
type: 'number',
|
||||
additionalParams: true,
|
||||
optional: true
|
||||
}
|
||||
]
|
||||
this.outputs = [
|
||||
{
|
||||
label: 'Pinecone Retriever',
|
||||
name: 'retriever',
|
||||
baseClasses: this.baseClasses
|
||||
},
|
||||
{
|
||||
label: 'Pinecone Vector Store Index',
|
||||
name: 'vectorStore',
|
||||
baseClasses: [this.type, 'VectorStoreIndex']
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
//@ts-ignore
|
||||
vectorStoreMethods = {
|
||||
async upsert(nodeData: INodeData, options: ICommonObject): Promise<void> {
|
||||
const indexName = nodeData.inputs?.pineconeIndex as string
|
||||
const pineconeNamespace = nodeData.inputs?.pineconeNamespace as string
|
||||
const docs = nodeData.inputs?.document as LCDocument[]
|
||||
const embeddings = nodeData.inputs?.embeddings as BaseEmbedding
|
||||
const model = nodeData.inputs?.model
|
||||
|
||||
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
|
||||
const pineconeApiKey = getCredentialParam('pineconeApiKey', credentialData, nodeData)
|
||||
|
||||
const pcvs = new PineconeVectorStore({
|
||||
indexName,
|
||||
apiKey: pineconeApiKey,
|
||||
namespace: pineconeNamespace
|
||||
})
|
||||
|
||||
const flattenDocs = docs && docs.length ? flatten(docs) : []
|
||||
const finalDocs = []
|
||||
for (let i = 0; i < flattenDocs.length; i += 1) {
|
||||
if (flattenDocs[i] && flattenDocs[i].pageContent) {
|
||||
finalDocs.push(new LCDocument(flattenDocs[i]))
|
||||
}
|
||||
}
|
||||
|
||||
const llamadocs: Document[] = []
|
||||
for (const doc of finalDocs) {
|
||||
llamadocs.push(new Document({ text: doc.pageContent, metadata: doc.metadata }))
|
||||
}
|
||||
|
||||
const serviceContext = serviceContextFromDefaults({ llm: model, embedModel: embeddings })
|
||||
const storageContext = await storageContextFromDefaults({ vectorStore: pcvs })
|
||||
|
||||
try {
|
||||
await VectorStoreIndex.fromDocuments(llamadocs, { serviceContext, storageContext })
|
||||
} catch (e) {
|
||||
throw new Error(e)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
|
||||
const indexName = nodeData.inputs?.pineconeIndex as string
|
||||
const pineconeNamespace = nodeData.inputs?.pineconeNamespace as string
|
||||
const pineconeMetadataFilter = nodeData.inputs?.pineconeMetadataFilter
|
||||
const embeddings = nodeData.inputs?.embeddings as BaseEmbedding
|
||||
const model = nodeData.inputs?.model
|
||||
const topK = nodeData.inputs?.topK as string
|
||||
const k = topK ? parseFloat(topK) : 4
|
||||
|
||||
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
|
||||
const pineconeApiKey = getCredentialParam('pineconeApiKey', credentialData, nodeData)
|
||||
|
||||
const obj: PineconeParams = {
|
||||
indexName,
|
||||
apiKey: pineconeApiKey
|
||||
}
|
||||
|
||||
if (pineconeNamespace) obj.namespace = pineconeNamespace
|
||||
|
||||
let metadatafilter = {}
|
||||
if (pineconeMetadataFilter) {
|
||||
metadatafilter = typeof pineconeMetadataFilter === 'object' ? pineconeMetadataFilter : JSON.parse(pineconeMetadataFilter)
|
||||
obj.queryFilter = metadatafilter
|
||||
}
|
||||
|
||||
const pcvs = new PineconeVectorStore(obj)
|
||||
|
||||
const serviceContext = serviceContextFromDefaults({ llm: model, embedModel: embeddings })
|
||||
const storageContext = await storageContextFromDefaults({ vectorStore: pcvs })
|
||||
|
||||
const index = await VectorStoreIndex.init({
|
||||
nodes: [],
|
||||
storageContext,
|
||||
serviceContext
|
||||
})
|
||||
|
||||
const output = nodeData.outputs?.output as string
|
||||
|
||||
if (output === 'retriever') {
|
||||
const retriever = index.asRetriever()
|
||||
retriever.similarityTopK = k
|
||||
;(retriever as any).serviceContext = serviceContext
|
||||
return retriever
|
||||
} else if (output === 'vectorStore') {
|
||||
;(index as any).k = k
|
||||
if (metadatafilter) {
|
||||
;(index as any).metadatafilter = metadatafilter
|
||||
}
|
||||
return index
|
||||
}
|
||||
return index
|
||||
}
|
||||
}
|
||||
|
||||
type PineconeParams = {
|
||||
indexName: string
|
||||
apiKey: string
|
||||
namespace?: string
|
||||
chunkSize?: number
|
||||
queryFilter?: object
|
||||
}
|
||||
|
||||
class PineconeVectorStore implements VectorStore {
|
||||
storesText: boolean = true
|
||||
db?: Pinecone
|
||||
indexName: string
|
||||
apiKey: string
|
||||
chunkSize: number
|
||||
namespace?: string
|
||||
queryFilter?: object
|
||||
|
||||
constructor(params: PineconeParams) {
|
||||
this.indexName = params?.indexName
|
||||
this.apiKey = params?.apiKey
|
||||
this.namespace = params?.namespace ?? ''
|
||||
this.chunkSize = params?.chunkSize ?? Number.parseInt(process.env.PINECONE_CHUNK_SIZE ?? '100')
|
||||
this.queryFilter = params?.queryFilter ?? {}
|
||||
}
|
||||
|
||||
private async getDb(): Promise<Pinecone> {
|
||||
if (!this.db) {
|
||||
this.db = new Pinecone({
|
||||
apiKey: this.apiKey
|
||||
})
|
||||
}
|
||||
return Promise.resolve(this.db)
|
||||
}
|
||||
|
||||
client() {
|
||||
return this.getDb()
|
||||
}
|
||||
|
||||
async index() {
|
||||
const db: Pinecone = await this.getDb()
|
||||
return db.Index(this.indexName)
|
||||
}
|
||||
|
||||
async clearIndex() {
|
||||
const db: Pinecone = await this.getDb()
|
||||
return await db.index(this.indexName).deleteAll()
|
||||
}
|
||||
|
||||
async add(embeddingResults: BaseNode<Metadata>[]): Promise<string[]> {
|
||||
if (embeddingResults.length == 0) {
|
||||
return Promise.resolve([])
|
||||
}
|
||||
|
||||
const idx: Index = await this.index()
|
||||
const nodes = embeddingResults.map(this.nodeToRecord)
|
||||
|
||||
for (let i = 0; i < nodes.length; i += this.chunkSize) {
|
||||
const chunk = nodes.slice(i, i + this.chunkSize)
|
||||
const result = await this.saveChunk(idx, chunk)
|
||||
if (!result) {
|
||||
return Promise.reject()
|
||||
}
|
||||
}
|
||||
return Promise.resolve([])
|
||||
}
|
||||
|
||||
protected async saveChunk(idx: Index, chunk: any) {
|
||||
try {
|
||||
const namespace = idx.namespace(this.namespace ?? '')
|
||||
await namespace.upsert(chunk)
|
||||
return true
|
||||
} catch (err) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
async delete(refDocId: string): Promise<void> {
|
||||
const idx = await this.index()
|
||||
const namespace = idx.namespace(this.namespace ?? '')
|
||||
return namespace.deleteOne(refDocId)
|
||||
}
|
||||
|
||||
async query(query: VectorStoreQuery): Promise<VectorStoreQueryResult> {
|
||||
const queryOptions: any = {
|
||||
vector: query.queryEmbedding,
|
||||
topK: query.similarityTopK,
|
||||
filter: this.queryFilter
|
||||
}
|
||||
|
||||
const idx = await this.index()
|
||||
const namespace = idx.namespace(this.namespace ?? '')
|
||||
const results = await namespace.query(queryOptions)
|
||||
|
||||
const idList = results.matches.map((row) => row.id)
|
||||
const records: FetchResponse<any> = await namespace.fetch(idList)
|
||||
const rows = Object.values(records.records)
|
||||
|
||||
const nodes = rows.map((row) => {
|
||||
return new Document({
|
||||
id_: row.id,
|
||||
text: this.textFromResultRow(row),
|
||||
metadata: this.metaWithoutText(row.metadata),
|
||||
embedding: row.values
|
||||
})
|
||||
})
|
||||
|
||||
const result = {
|
||||
nodes: nodes,
|
||||
similarities: results.matches.map((row) => row.score || 999),
|
||||
ids: results.matches.map((row) => row.id)
|
||||
}
|
||||
|
||||
return Promise.resolve(result)
|
||||
}
|
||||
|
||||
/**
|
||||
* Required by VectorStore interface. Currently ignored.
|
||||
*/
|
||||
persist(): Promise<void> {
|
||||
return Promise.resolve()
|
||||
}
|
||||
|
||||
textFromResultRow(row: ScoredPineconeRecord<Metadata>): string {
|
||||
return row.metadata?.text ?? ''
|
||||
}
|
||||
|
||||
metaWithoutText(meta: Metadata): any {
|
||||
return Object.keys(meta)
|
||||
.filter((key) => key != 'text')
|
||||
.reduce((acc: any, key: string) => {
|
||||
acc[key] = meta[key]
|
||||
return acc
|
||||
}, {})
|
||||
}
|
||||
|
||||
nodeToRecord(node: BaseNode<Metadata>) {
|
||||
let id: any = node.id_.length ? node.id_ : null
|
||||
return {
|
||||
id: id,
|
||||
values: node.getEmbedding(),
|
||||
metadata: {
|
||||
...cleanupMetadata(node.metadata),
|
||||
text: (node as any).text
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const cleanupMetadata = (nodeMetadata: ICommonObject) => {
|
||||
// Pinecone doesn't support nested objects, so we flatten them
|
||||
const documentMetadata: any = { ...nodeMetadata }
|
||||
// preserve string arrays which are allowed
|
||||
const stringArrays: Record<string, string[]> = {}
|
||||
for (const key of Object.keys(documentMetadata)) {
|
||||
if (Array.isArray(documentMetadata[key]) && documentMetadata[key].every((el: any) => typeof el === 'string')) {
|
||||
stringArrays[key] = documentMetadata[key]
|
||||
delete documentMetadata[key]
|
||||
}
|
||||
}
|
||||
const metadata: {
|
||||
[key: string]: string | number | boolean | string[] | null
|
||||
} = {
|
||||
...flattenObject(documentMetadata),
|
||||
...stringArrays
|
||||
}
|
||||
// Pinecone doesn't support null values, so we remove them
|
||||
for (const key of Object.keys(metadata)) {
|
||||
if (metadata[key] == null) {
|
||||
delete metadata[key]
|
||||
} else if (typeof metadata[key] === 'object' && Object.keys(metadata[key] as unknown as object).length === 0) {
|
||||
delete metadata[key]
|
||||
}
|
||||
}
|
||||
return metadata
|
||||
}
|
||||
|
||||
module.exports = { nodeClass: PineconeLlamaIndex_VectorStores }
|
||||
@@ -0,0 +1,145 @@
|
||||
import path from 'path'
|
||||
import { flatten } from 'lodash'
|
||||
import { storageContextFromDefaults, serviceContextFromDefaults, VectorStoreIndex, Document } from 'llamaindex'
|
||||
import { Document as LCDocument } from 'langchain/document'
|
||||
import { INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
|
||||
import { getUserHome } from '../../../src'
|
||||
|
||||
class SimpleStoreUpsert_LlamaIndex_VectorStores implements INode {
|
||||
label: string
|
||||
name: string
|
||||
version: number
|
||||
description: string
|
||||
type: string
|
||||
icon: string
|
||||
category: string
|
||||
baseClasses: string[]
|
||||
tags: string[]
|
||||
inputs: INodeParams[]
|
||||
outputs: INodeOutputsValue[]
|
||||
|
||||
constructor() {
|
||||
this.label = 'SimpleStore'
|
||||
this.name = 'simpleStoreLlamaIndex'
|
||||
this.version = 1.0
|
||||
this.type = 'SimpleVectorStore'
|
||||
this.icon = 'simplevs.svg'
|
||||
this.category = 'Vector Stores'
|
||||
this.description = 'Upsert embedded data to local path and perform similarity search'
|
||||
this.baseClasses = [this.type, 'VectorIndexRetriever']
|
||||
this.tags = ['LlamaIndex']
|
||||
this.inputs = [
|
||||
{
|
||||
label: 'Document',
|
||||
name: 'document',
|
||||
type: 'Document',
|
||||
list: true,
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
label: 'Chat Model',
|
||||
name: 'model',
|
||||
type: 'BaseChatModel_LlamaIndex'
|
||||
},
|
||||
{
|
||||
label: 'Embeddings',
|
||||
name: 'embeddings',
|
||||
type: 'BaseEmbedding_LlamaIndex'
|
||||
},
|
||||
{
|
||||
label: 'Base Path to store',
|
||||
name: 'basePath',
|
||||
description:
|
||||
'Path to store persist embeddings indexes with persistence. If not specified, default to same path where database is stored',
|
||||
type: 'string',
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
label: 'Top K',
|
||||
name: 'topK',
|
||||
description: 'Number of top results to fetch. Default to 4',
|
||||
placeholder: '4',
|
||||
type: 'number',
|
||||
optional: true
|
||||
}
|
||||
]
|
||||
this.outputs = [
|
||||
{
|
||||
label: 'SimpleStore Retriever',
|
||||
name: 'retriever',
|
||||
baseClasses: this.baseClasses
|
||||
},
|
||||
{
|
||||
label: 'SimpleStore Vector Store Index',
|
||||
name: 'vectorStore',
|
||||
baseClasses: [this.type, 'VectorStoreIndex']
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
//@ts-ignore
|
||||
vectorStoreMethods = {
|
||||
async upsert(nodeData: INodeData): Promise<void> {
|
||||
const basePath = nodeData.inputs?.basePath as string
|
||||
const docs = nodeData.inputs?.document as LCDocument[]
|
||||
const embeddings = nodeData.inputs?.embeddings
|
||||
const model = nodeData.inputs?.model
|
||||
|
||||
let filePath = ''
|
||||
if (!basePath) filePath = path.join(getUserHome(), '.flowise', 'llamaindex')
|
||||
else filePath = basePath
|
||||
|
||||
const flattenDocs = docs && docs.length ? flatten(docs) : []
|
||||
const finalDocs = []
|
||||
for (let i = 0; i < flattenDocs.length; i += 1) {
|
||||
finalDocs.push(new LCDocument(flattenDocs[i]))
|
||||
}
|
||||
|
||||
const llamadocs: Document[] = []
|
||||
for (const doc of finalDocs) {
|
||||
llamadocs.push(new Document({ text: doc.pageContent, metadata: doc.metadata }))
|
||||
}
|
||||
|
||||
const serviceContext = serviceContextFromDefaults({ llm: model, embedModel: embeddings })
|
||||
const storageContext = await storageContextFromDefaults({ persistDir: filePath })
|
||||
|
||||
try {
|
||||
await VectorStoreIndex.fromDocuments(llamadocs, { serviceContext, storageContext })
|
||||
} catch (e) {
|
||||
throw new Error(e)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async init(nodeData: INodeData): Promise<any> {
|
||||
const basePath = nodeData.inputs?.basePath as string
|
||||
const embeddings = nodeData.inputs?.embeddings
|
||||
const model = nodeData.inputs?.model
|
||||
const topK = nodeData.inputs?.topK as string
|
||||
const k = topK ? parseFloat(topK) : 4
|
||||
|
||||
let filePath = ''
|
||||
if (!basePath) filePath = path.join(getUserHome(), '.flowise', 'llamaindex')
|
||||
else filePath = basePath
|
||||
|
||||
const serviceContext = serviceContextFromDefaults({ llm: model, embedModel: embeddings })
|
||||
const storageContext = await storageContextFromDefaults({ persistDir: filePath })
|
||||
|
||||
const index = await VectorStoreIndex.init({ storageContext, serviceContext })
|
||||
|
||||
const output = nodeData.outputs?.output as string
|
||||
|
||||
if (output === 'retriever') {
|
||||
const retriever = index.asRetriever()
|
||||
retriever.similarityTopK = k
|
||||
;(retriever as any).serviceContext = serviceContext
|
||||
return retriever
|
||||
} else if (output === 'vectorStore') {
|
||||
;(index as any).k = k
|
||||
return index
|
||||
}
|
||||
return index
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { nodeClass: SimpleStoreUpsert_LlamaIndex_VectorStores }
|
||||
@@ -0,0 +1,6 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" class="icon icon-tabler icon-tabler-database" width="24" height="24" viewBox="0 0 24 24" stroke-width="2" stroke="currentColor" fill="none" stroke-linecap="round" stroke-linejoin="round">
|
||||
<path stroke="none" d="M0 0h24v24H0z" fill="none"></path>
|
||||
<path d="M12 6m-8 0a8 3 0 1 0 16 0a8 3 0 1 0 -16 0"></path>
|
||||
<path d="M4 6v6a8 3 0 0 0 16 0v-6"></path>
|
||||
<path d="M4 12v6a8 3 0 0 0 16 0v-6"></path>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 451 B |
Reference in New Issue
Block a user