Feature/Add new doc store upsert and refresh API (#3556)

add new doc store upsert and refresh API
2026-06-22 09:01:09 +03:00 · 2024-11-25 15:47:13 +00:00
parent 36496b1611
commit a2c36b4447
15 changed files with 1424 additions and 803 deletions
@@ -26,7 +26,7 @@ class UnstructuredFile_DocumentLoaders implements INode {
    constructor() {
        this.label = 'Unstructured File Loader'
        this.name = 'unstructuredFileLoader'
-        this.version = 3.0
+        this.version = 4.0
        this.type = 'Document'
        this.icon = 'unstructured-file.svg'
        this.category = 'Document Loaders'
@@ -40,6 +40,7 @@ class UnstructuredFile_DocumentLoaders implements INode {
            optional: true
        }
        this.inputs = [
+            /** Deprecated
            {
                label: 'File Path',
                name: 'filePath',
@@ -49,6 +50,7 @@ class UnstructuredFile_DocumentLoaders implements INode {
                warning:
                    'Use the File Upload instead of File path. If file is uploaded, this path is ignored. Path will be deprecated in future releases.'
            },
+             */
            {
                label: 'Files Upload',
                name: 'fileObject',
@@ -200,7 +202,7 @@ class UnstructuredFile_DocumentLoaders implements INode {
            {
                label: 'Hi-Res Model Name',
                name: 'hiResModelName',
-                description: 'The name of the inference model used when strategy is hi_res. Default: detectron2_onnx.',
+                description: 'The name of the inference model used when strategy is hi_res',
                type: 'options',
                options: [
                    {
@@ -227,8 +229,7 @@ class UnstructuredFile_DocumentLoaders implements INode {
                    }
                ],
                optional: true,
-                additionalParams: true,
-                default: 'detectron2_onnx'
+                additionalParams: true
            },
            {
                label: 'Chunking Strategy',
@@ -241,9 +242,21 @@ class UnstructuredFile_DocumentLoaders implements INode {
                        label: 'None',
                        name: 'None'
                    },
+                    {
+                        label: 'Basic',
+                        name: 'basic'
+                    },
                    {
                        label: 'By Title',
                        name: 'by_title'
+                    },
+                    {
+                        label: 'By Page',
+                        name: 'by_page'
+                    },
+                    {
+                        label: 'By Similarity',
+                        name: 'by_similarity'
                    }
                ],
                optional: true,
@@ -434,15 +447,15 @@ class UnstructuredFile_DocumentLoaders implements INode {
            : ([] as SkipInferTableTypes[])
        const hiResModelName = nodeData.inputs?.hiResModelName as HiResModelName
        const includePageBreaks = nodeData.inputs?.includePageBreaks as boolean
-        const chunkingStrategy = nodeData.inputs?.chunkingStrategy as 'None' | 'by_title'
+        const chunkingStrategy = nodeData.inputs?.chunkingStrategy as string
        const metadata = nodeData.inputs?.metadata
        const sourceIdKey = (nodeData.inputs?.sourceIdKey as string) || 'source'
        const ocrLanguages = nodeData.inputs?.ocrLanguages ? JSON.parse(nodeData.inputs?.ocrLanguages as string) : ([] as string[])
        const xmlKeepTags = nodeData.inputs?.xmlKeepTags as boolean
        const multiPageSections = nodeData.inputs?.multiPageSections as boolean
-        const combineUnderNChars = nodeData.inputs?.combineUnderNChars as number
-        const newAfterNChars = nodeData.inputs?.newAfterNChars as number
-        const maxCharacters = nodeData.inputs?.maxCharacters as number
+        const combineUnderNChars = nodeData.inputs?.combineUnderNChars as string
+        const newAfterNChars = nodeData.inputs?.newAfterNChars as string
+        const maxCharacters = nodeData.inputs?.maxCharacters as string
        const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string

        let omitMetadataKeys: string[] = []
@@ -471,10 +484,19 @@ class UnstructuredFile_DocumentLoaders implements INode {
            chunkingStrategy,
            ocrLanguages,
            xmlKeepTags,
-            multiPageSections,
-            combineUnderNChars,
-            newAfterNChars,
-            maxCharacters
+            multiPageSections
+        }
+
+        if (combineUnderNChars) {
+            obj.combineUnderNChars = parseInt(combineUnderNChars, 10)
+        }
+
+        if (newAfterNChars) {
+            obj.newAfterNChars = parseInt(newAfterNChars, 10)
+        }
+
+        if (maxCharacters) {
+            obj.maxCharacters = parseInt(maxCharacters, 10)
        }

        const credentialData = await getCredentialData(nodeData.credential ?? '', options)