Feature/DocumentStore (#2106)

* datasource: initial commit * datasource: datasource details and chunks * datasource: Document Store Node * more changes * Document Store - Base functionality * Document Store Loader Component * Document Store Loader Component * before merging the modularity PR * after merging the modularity PR * preview mode * initial draft PR * fixes * minor updates and fixes * preview with loader and splitter * preview with credential * show stored chunks * preview update... * edit config * save, preview and other changes * save, preview and other changes * save, process and other changes * save, process and other changes * alpha1 - for internal testing * rerouting urls * bug fix on new leader create * pagination support for chunks * delete document store * Update pnpm-lock.yaml * doc store card view * Update store files to use updated storage functions, Document Store Table View and other changes * ui changes * add expanded chunk dialog, improve ui * change throw Error to InternalError * Bug Fixes and removal of subFolder, adding of view chunks for store * lint fixes * merge changes * DocumentStoreStatus component * ui changes for doc store * add remove metadata key field, add custom document loader * add chatflows used doc store chips * add types/interfaces to DocumentStore Services * document loader list dialog title bar color change * update interfaces * Whereused Chatflow Name and Added chunkNo to retain order of created chunks. * use typeorm order chunkNo, ui changes --------- Co-authored-by: Henry <hzj94@hotmail.com> Co-authored-by: Henry Heng <henryheng@flowiseai.com>
2026-06-29 09:01:06 +03:00 · 2024-05-06 19:53:27 +05:30
parent af4e28aa91
commit 40e36d1b39
91 changed files with 38713 additions and 32791 deletions
@@ -1,9 +1,10 @@
 import axios from 'axios'
+import { omit } from 'lodash'
 import { Document } from '@langchain/core/documents'
 import { TextSplitter } from 'langchain/text_splitter'
 import { BaseDocumentLoader } from 'langchain/document_loaders/base'
 import { getCredentialData, getCredentialParam } from '../../../src/utils'
-import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
+import { IDocument, ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'

 class Airtable_DocumentLoaders implements INode {
    label: string
@@ -93,9 +94,21 @@ class Airtable_DocumentLoaders implements INode {
                description: 'Number of results to return. Ignored when Return All is enabled.'
            },
            {
-                label: 'Metadata',
+                label: 'Additional Metadata',
                name: 'metadata',
                type: 'json',
+                description: 'Additional metadata to be added to the extracted documents',
+                optional: true,
+                additionalParams: true
+            },
+            {
+                label: 'Omit Metadata Keys',
+                name: 'omitMetadataKeys',
+                type: 'string',
+                rows: 4,
+                description:
+                    'Each document loader comes with a default set of metadata keys that are extracted from the document. You can use this field to omit some of the default metadata keys. The value should be a list of keys, seperated by comma',
+                placeholder: 'key1, key2, key3.nestedKey1',
                optional: true,
                additionalParams: true
            }
@@ -111,6 +124,12 @@ class Airtable_DocumentLoaders implements INode {
        const limit = nodeData.inputs?.limit as string
        const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
        const metadata = nodeData.inputs?.metadata
+        const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
+
+        let omitMetadataKeys: string[] = []
+        if (_omitMetadataKeys) {
+            omitMetadataKeys = _omitMetadataKeys.split(',').map((key) => key.trim())
+        }

        const credentialData = await getCredentialData(nodeData.credential ?? '', options)
        const accessToken = getCredentialParam('accessToken', credentialData, nodeData)
@@ -131,7 +150,7 @@ class Airtable_DocumentLoaders implements INode {
            throw new Error('Base ID and Table ID must be provided.')
        }

-        let docs = []
+        let docs: IDocument[] = []

        if (textSplitter) {
            docs = await loader.loadAndSplit(textSplitter)
@@ -141,18 +160,26 @@ class Airtable_DocumentLoaders implements INode {

        if (metadata) {
            const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
-            let finaldocs = []
-            for (const doc of docs) {
-                const newdoc = {
-                    ...doc,
-                    metadata: {
+            docs = docs.map((doc) => ({
+                ...doc,
+                metadata: omit(
+                    {
                        ...doc.metadata,
                        ...parsedMetadata
-                    }
-                }
-                finaldocs.push(newdoc)
-            }
-            return finaldocs
+                    },
+                    omitMetadataKeys
+                )
+            }))
+        } else {
+            docs = docs.map((doc) => ({
+                ...doc,
+                metadata: omit(
+                    {
+                        ...doc.metadata
+                    },
+                    omitMetadataKeys
+                )
+            }))
        }

        return docs
@@ -213,7 +240,7 @@ class AirtableLoader extends BaseDocumentLoader {
        this.returnAll = returnAll
    }

-    public async load(): Promise<Document[]> {
+    public async load(): Promise<IDocument[]> {
        if (this.returnAll) {
            return this.loadAll()
        }
@@ -238,7 +265,7 @@ class AirtableLoader extends BaseDocumentLoader {
        }
    }

-    private createDocumentFromPage(page: AirtableLoaderPage): Document {
+    private createDocumentFromPage(page: AirtableLoaderPage): IDocument {
        // Generate the URL
        const pageUrl = `https://api.airtable.com/v0/${this.baseId}/${this.tableId}/${page.id}`

@@ -251,7 +278,7 @@ class AirtableLoader extends BaseDocumentLoader {
        })
    }

-    private async loadLimit(): Promise<Document[]> {
+    private async loadLimit(): Promise<IDocument[]> {
        let data: AirtableLoaderRequest = {
            maxRecords: this.limit,
            view: this.viewId
@@ -282,7 +309,7 @@ class AirtableLoader extends BaseDocumentLoader {
        return returnPages.map((page) => this.createDocumentFromPage(page))
    }

-    private async loadAll(): Promise<Document[]> {
+    private async loadAll(): Promise<IDocument[]> {
        let data: AirtableLoaderRequest = {
            view: this.viewId
        }