mirror of
https://github.com/farcasclaudiu/Flowise.git
synced 2026-06-28 15:00:57 +03:00
Feature/DocumentStore (#2106)
* datasource: initial commit * datasource: datasource details and chunks * datasource: Document Store Node * more changes * Document Store - Base functionality * Document Store Loader Component * Document Store Loader Component * before merging the modularity PR * after merging the modularity PR * preview mode * initial draft PR * fixes * minor updates and fixes * preview with loader and splitter * preview with credential * show stored chunks * preview update... * edit config * save, preview and other changes * save, preview and other changes * save, process and other changes * save, process and other changes * alpha1 - for internal testing * rerouting urls * bug fix on new leader create * pagination support for chunks * delete document store * Update pnpm-lock.yaml * doc store card view * Update store files to use updated storage functions, Document Store Table View and other changes * ui changes * add expanded chunk dialog, improve ui * change throw Error to InternalError * Bug Fixes and removal of subFolder, adding of view chunks for store * lint fixes * merge changes * DocumentStoreStatus component * ui changes for doc store * add remove metadata key field, add custom document loader * add chatflows used doc store chips * add types/interfaces to DocumentStore Services * document loader list dialog title bar color change * update interfaces * Whereused Chatflow Name and Added chunkNo to retain order of created chunks. * use typeorm order chunkNo, ui changes --------- Co-authored-by: Henry <hzj94@hotmail.com> Co-authored-by: Henry Heng <henryheng@flowiseai.com>
This commit is contained in:
@@ -1,8 +1,9 @@
|
||||
import axios, { AxiosRequestConfig } from 'axios'
|
||||
import { omit } from 'lodash'
|
||||
import { Document } from '@langchain/core/documents'
|
||||
import { TextSplitter } from 'langchain/text_splitter'
|
||||
import { BaseDocumentLoader } from 'langchain/document_loaders/base'
|
||||
import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import { ICommonObject, IDocument, INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
|
||||
class API_DocumentLoaders implements INode {
|
||||
label: string
|
||||
@@ -66,6 +67,25 @@ class API_DocumentLoaders implements INode {
|
||||
'JSON body for the POST request. If not specified, agent will try to figure out itself from AIPlugin if provided',
|
||||
additionalParams: true,
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
label: 'Additional Metadata',
|
||||
name: 'metadata',
|
||||
type: 'json',
|
||||
description: 'Additional metadata to be added to the extracted documents',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
},
|
||||
{
|
||||
label: 'Omit Metadata Keys',
|
||||
name: 'omitMetadataKeys',
|
||||
type: 'string',
|
||||
rows: 4,
|
||||
description:
|
||||
'Each document loader comes with a default set of metadata keys that are extracted from the document. You can use this field to omit some of the default metadata keys. The value should be a list of keys, seperated by comma',
|
||||
placeholder: 'key1, key2, key3.nestedKey1',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -76,6 +96,12 @@ class API_DocumentLoaders implements INode {
|
||||
const method = nodeData.inputs?.method as string
|
||||
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
|
||||
const metadata = nodeData.inputs?.metadata
|
||||
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
|
||||
|
||||
let omitMetadataKeys: string[] = []
|
||||
if (_omitMetadataKeys) {
|
||||
omitMetadataKeys = _omitMetadataKeys.split(',').map((key) => key.trim())
|
||||
}
|
||||
|
||||
const options: ApiLoaderParams = {
|
||||
url,
|
||||
@@ -94,7 +120,7 @@ class API_DocumentLoaders implements INode {
|
||||
|
||||
const loader = new ApiLoader(options)
|
||||
|
||||
let docs = []
|
||||
let docs: IDocument[] = []
|
||||
|
||||
if (textSplitter) {
|
||||
docs = await loader.loadAndSplit(textSplitter)
|
||||
@@ -104,18 +130,26 @@ class API_DocumentLoaders implements INode {
|
||||
|
||||
if (metadata) {
|
||||
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
|
||||
let finaldocs = []
|
||||
for (const doc of docs) {
|
||||
const newdoc = {
|
||||
...doc,
|
||||
metadata: {
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata,
|
||||
...parsedMetadata
|
||||
}
|
||||
}
|
||||
finaldocs.push(newdoc)
|
||||
}
|
||||
return finaldocs
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
} else {
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
}
|
||||
|
||||
return docs
|
||||
@@ -146,7 +180,7 @@ class ApiLoader extends BaseDocumentLoader {
|
||||
this.method = method
|
||||
}
|
||||
|
||||
public async load(): Promise<Document[]> {
|
||||
public async load(): Promise<IDocument[]> {
|
||||
if (this.method === 'POST') {
|
||||
return this.executePostRequest(this.url, this.headers, this.body)
|
||||
} else {
|
||||
@@ -154,7 +188,7 @@ class ApiLoader extends BaseDocumentLoader {
|
||||
}
|
||||
}
|
||||
|
||||
protected async executeGetRequest(url: string, headers?: ICommonObject): Promise<Document[]> {
|
||||
protected async executeGetRequest(url: string, headers?: ICommonObject): Promise<IDocument[]> {
|
||||
try {
|
||||
const config: AxiosRequestConfig = {}
|
||||
if (headers) {
|
||||
@@ -174,7 +208,7 @@ class ApiLoader extends BaseDocumentLoader {
|
||||
}
|
||||
}
|
||||
|
||||
protected async executePostRequest(url: string, headers?: ICommonObject, body?: ICommonObject): Promise<Document[]> {
|
||||
protected async executePostRequest(url: string, headers?: ICommonObject, body?: ICommonObject): Promise<IDocument[]> {
|
||||
try {
|
||||
const config: AxiosRequestConfig = {}
|
||||
if (headers) {
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
import axios from 'axios'
|
||||
import { omit } from 'lodash'
|
||||
import { Document } from '@langchain/core/documents'
|
||||
import { TextSplitter } from 'langchain/text_splitter'
|
||||
import { BaseDocumentLoader } from 'langchain/document_loaders/base'
|
||||
import { getCredentialData, getCredentialParam } from '../../../src/utils'
|
||||
import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import { IDocument, ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
|
||||
class Airtable_DocumentLoaders implements INode {
|
||||
label: string
|
||||
@@ -93,9 +94,21 @@ class Airtable_DocumentLoaders implements INode {
|
||||
description: 'Number of results to return. Ignored when Return All is enabled.'
|
||||
},
|
||||
{
|
||||
label: 'Metadata',
|
||||
label: 'Additional Metadata',
|
||||
name: 'metadata',
|
||||
type: 'json',
|
||||
description: 'Additional metadata to be added to the extracted documents',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
},
|
||||
{
|
||||
label: 'Omit Metadata Keys',
|
||||
name: 'omitMetadataKeys',
|
||||
type: 'string',
|
||||
rows: 4,
|
||||
description:
|
||||
'Each document loader comes with a default set of metadata keys that are extracted from the document. You can use this field to omit some of the default metadata keys. The value should be a list of keys, seperated by comma',
|
||||
placeholder: 'key1, key2, key3.nestedKey1',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
}
|
||||
@@ -111,6 +124,12 @@ class Airtable_DocumentLoaders implements INode {
|
||||
const limit = nodeData.inputs?.limit as string
|
||||
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
|
||||
const metadata = nodeData.inputs?.metadata
|
||||
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
|
||||
|
||||
let omitMetadataKeys: string[] = []
|
||||
if (_omitMetadataKeys) {
|
||||
omitMetadataKeys = _omitMetadataKeys.split(',').map((key) => key.trim())
|
||||
}
|
||||
|
||||
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
|
||||
const accessToken = getCredentialParam('accessToken', credentialData, nodeData)
|
||||
@@ -131,7 +150,7 @@ class Airtable_DocumentLoaders implements INode {
|
||||
throw new Error('Base ID and Table ID must be provided.')
|
||||
}
|
||||
|
||||
let docs = []
|
||||
let docs: IDocument[] = []
|
||||
|
||||
if (textSplitter) {
|
||||
docs = await loader.loadAndSplit(textSplitter)
|
||||
@@ -141,18 +160,26 @@ class Airtable_DocumentLoaders implements INode {
|
||||
|
||||
if (metadata) {
|
||||
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
|
||||
let finaldocs = []
|
||||
for (const doc of docs) {
|
||||
const newdoc = {
|
||||
...doc,
|
||||
metadata: {
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata,
|
||||
...parsedMetadata
|
||||
}
|
||||
}
|
||||
finaldocs.push(newdoc)
|
||||
}
|
||||
return finaldocs
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
} else {
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
}
|
||||
|
||||
return docs
|
||||
@@ -213,7 +240,7 @@ class AirtableLoader extends BaseDocumentLoader {
|
||||
this.returnAll = returnAll
|
||||
}
|
||||
|
||||
public async load(): Promise<Document[]> {
|
||||
public async load(): Promise<IDocument[]> {
|
||||
if (this.returnAll) {
|
||||
return this.loadAll()
|
||||
}
|
||||
@@ -238,7 +265,7 @@ class AirtableLoader extends BaseDocumentLoader {
|
||||
}
|
||||
}
|
||||
|
||||
private createDocumentFromPage(page: AirtableLoaderPage): Document {
|
||||
private createDocumentFromPage(page: AirtableLoaderPage): IDocument {
|
||||
// Generate the URL
|
||||
const pageUrl = `https://api.airtable.com/v0/${this.baseId}/${this.tableId}/${page.id}`
|
||||
|
||||
@@ -251,7 +278,7 @@ class AirtableLoader extends BaseDocumentLoader {
|
||||
})
|
||||
}
|
||||
|
||||
private async loadLimit(): Promise<Document[]> {
|
||||
private async loadLimit(): Promise<IDocument[]> {
|
||||
let data: AirtableLoaderRequest = {
|
||||
maxRecords: this.limit,
|
||||
view: this.viewId
|
||||
@@ -282,7 +309,7 @@ class AirtableLoader extends BaseDocumentLoader {
|
||||
return returnPages.map((page) => this.createDocumentFromPage(page))
|
||||
}
|
||||
|
||||
private async loadAll(): Promise<Document[]> {
|
||||
private async loadAll(): Promise<IDocument[]> {
|
||||
let data: AirtableLoaderRequest = {
|
||||
view: this.viewId
|
||||
}
|
||||
|
||||
+38
-11
@@ -1,3 +1,4 @@
|
||||
import { omit } from 'lodash'
|
||||
import { INode, INodeData, INodeParams, ICommonObject } from '../../../src/Interface'
|
||||
import { getCredentialData, getCredentialParam } from '../../../src/utils'
|
||||
import { TextSplitter } from 'langchain/text_splitter'
|
||||
@@ -92,9 +93,21 @@ class ApifyWebsiteContentCrawler_DocumentLoaders implements INode {
|
||||
additionalParams: true
|
||||
},
|
||||
{
|
||||
label: 'Metadata',
|
||||
label: 'Additional Metadata',
|
||||
name: 'metadata',
|
||||
type: 'json',
|
||||
description: 'Additional metadata to be added to the extracted documents',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
},
|
||||
{
|
||||
label: 'Omit Metadata Keys',
|
||||
name: 'omitMetadataKeys',
|
||||
type: 'string',
|
||||
rows: 4,
|
||||
description:
|
||||
'Each document loader comes with a default set of metadata keys that are extracted from the document. You can use this field to omit some of the default metadata keys. The value should be a list of keys, seperated by comma',
|
||||
placeholder: 'key1, key2, key3.nestedKey1',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
}
|
||||
@@ -110,6 +123,12 @@ class ApifyWebsiteContentCrawler_DocumentLoaders implements INode {
|
||||
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
|
||||
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
|
||||
const metadata = nodeData.inputs?.metadata
|
||||
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
|
||||
|
||||
let omitMetadataKeys: string[] = []
|
||||
if (_omitMetadataKeys) {
|
||||
omitMetadataKeys = _omitMetadataKeys.split(',').map((key) => key.trim())
|
||||
}
|
||||
|
||||
// Get input options and merge with additional input
|
||||
const urls = nodeData.inputs?.urls as string
|
||||
@@ -153,18 +172,26 @@ class ApifyWebsiteContentCrawler_DocumentLoaders implements INode {
|
||||
|
||||
if (metadata) {
|
||||
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
|
||||
let finaldocs = []
|
||||
for (const doc of docs) {
|
||||
const newdoc = {
|
||||
...doc,
|
||||
metadata: {
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata,
|
||||
...parsedMetadata
|
||||
}
|
||||
}
|
||||
finaldocs.push(newdoc)
|
||||
}
|
||||
return finaldocs
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
} else {
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
}
|
||||
|
||||
return docs
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import { TextSplitter } from 'langchain/text_splitter'
|
||||
import { omit } from 'lodash'
|
||||
import { CheerioWebBaseLoader, WebBaseLoaderParams } from 'langchain/document_loaders/web/cheerio'
|
||||
import { test } from 'linkifyjs'
|
||||
import { parse } from 'css-what'
|
||||
import { webCrawl, xmlScrape } from '../../../src'
|
||||
import { SelectorType } from 'cheerio'
|
||||
import { ICommonObject, IDocument, INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
|
||||
class Cheerio_DocumentLoaders implements INode {
|
||||
label: string
|
||||
@@ -55,6 +56,7 @@ class Cheerio_DocumentLoaders implements INode {
|
||||
description: 'Scrape relative links from XML sitemap URL'
|
||||
}
|
||||
],
|
||||
default: 'webCrawl',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
},
|
||||
@@ -78,9 +80,21 @@ class Cheerio_DocumentLoaders implements INode {
|
||||
additionalParams: true
|
||||
},
|
||||
{
|
||||
label: 'Metadata',
|
||||
label: 'Additional Metadata',
|
||||
name: 'metadata',
|
||||
type: 'json',
|
||||
description: 'Additional metadata to be added to the extracted documents',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
},
|
||||
{
|
||||
label: 'Omit Metadata Keys',
|
||||
name: 'omitMetadataKeys',
|
||||
type: 'string',
|
||||
rows: 4,
|
||||
description:
|
||||
'Each document loader comes with a default set of metadata keys that are extracted from the document. You can use this field to omit some of the default metadata keys. The value should be a list of keys, seperated by comma',
|
||||
placeholder: 'key1, key2, key3.nestedKey1',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
}
|
||||
@@ -94,6 +108,13 @@ class Cheerio_DocumentLoaders implements INode {
|
||||
const selectedLinks = nodeData.inputs?.selectedLinks as string[]
|
||||
let limit = parseInt(nodeData.inputs?.limit as string)
|
||||
|
||||
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
|
||||
|
||||
let omitMetadataKeys: string[] = []
|
||||
if (_omitMetadataKeys) {
|
||||
omitMetadataKeys = _omitMetadataKeys.split(',').map((key) => key.trim())
|
||||
}
|
||||
|
||||
let url = nodeData.inputs?.url as string
|
||||
url = url.trim()
|
||||
if (!test(url)) {
|
||||
@@ -123,7 +144,8 @@ class Cheerio_DocumentLoaders implements INode {
|
||||
}
|
||||
}
|
||||
|
||||
let docs = []
|
||||
let docs: IDocument[] = []
|
||||
|
||||
if (relativeLinksMethod) {
|
||||
if (process.env.DEBUG === 'true') options.logger.info(`Start ${relativeLinksMethod}`)
|
||||
// if limit is 0 we don't want it to default to 10 so we check explicitly for null or undefined
|
||||
@@ -154,18 +176,26 @@ class Cheerio_DocumentLoaders implements INode {
|
||||
|
||||
if (metadata) {
|
||||
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
|
||||
let finaldocs = []
|
||||
for (const doc of docs) {
|
||||
const newdoc = {
|
||||
...doc,
|
||||
metadata: {
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata,
|
||||
...parsedMetadata
|
||||
}
|
||||
}
|
||||
finaldocs.push(newdoc)
|
||||
}
|
||||
return finaldocs
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
} else {
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
}
|
||||
|
||||
return docs
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import { omit } from 'lodash'
|
||||
import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import { TextSplitter } from 'langchain/text_splitter'
|
||||
import { ConfluencePagesLoader, ConfluencePagesLoaderParams } from 'langchain/document_loaders/web/confluence'
|
||||
@@ -59,9 +60,21 @@ class Confluence_DocumentLoaders implements INode {
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
label: 'Metadata',
|
||||
label: 'Additional Metadata',
|
||||
name: 'metadata',
|
||||
type: 'json',
|
||||
description: 'Additional metadata to be added to the extracted documents',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
},
|
||||
{
|
||||
label: 'Omit Metadata Keys',
|
||||
name: 'omitMetadataKeys',
|
||||
type: 'string',
|
||||
rows: 4,
|
||||
description:
|
||||
'Each document loader comes with a default set of metadata keys that are extracted from the document. You can use this field to omit some of the default metadata keys. The value should be a list of keys, seperated by comma',
|
||||
placeholder: 'key1, key2, key3.nestedKey1',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
}
|
||||
@@ -74,6 +87,12 @@ class Confluence_DocumentLoaders implements INode {
|
||||
const limit = nodeData.inputs?.limit as number
|
||||
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
|
||||
const metadata = nodeData.inputs?.metadata
|
||||
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
|
||||
|
||||
let omitMetadataKeys: string[] = []
|
||||
if (_omitMetadataKeys) {
|
||||
omitMetadataKeys = _omitMetadataKeys.split(',').map((key) => key.trim())
|
||||
}
|
||||
|
||||
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
|
||||
const accessToken = getCredentialParam('accessToken', credentialData, nodeData)
|
||||
@@ -107,18 +126,26 @@ class Confluence_DocumentLoaders implements INode {
|
||||
|
||||
if (metadata) {
|
||||
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
|
||||
let finaldocs = []
|
||||
for (const doc of docs) {
|
||||
const newdoc = {
|
||||
...doc,
|
||||
metadata: {
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata,
|
||||
...parsedMetadata
|
||||
}
|
||||
}
|
||||
finaldocs.push(newdoc)
|
||||
}
|
||||
return finaldocs
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
} else {
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
}
|
||||
|
||||
return docs
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import { omit } from 'lodash'
|
||||
import { ICommonObject, IDocument, INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import { TextSplitter } from 'langchain/text_splitter'
|
||||
import { CSVLoader } from 'langchain/document_loaders/fs/csv'
|
||||
import { getFileFromStorage } from '../../../src'
|
||||
@@ -45,9 +46,21 @@ class Csv_DocumentLoaders implements INode {
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
label: 'Metadata',
|
||||
label: 'Additional Metadata',
|
||||
name: 'metadata',
|
||||
type: 'json',
|
||||
description: 'Additional metadata to be added to the extracted documents',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
},
|
||||
{
|
||||
label: 'Omit Metadata Keys',
|
||||
name: 'omitMetadataKeys',
|
||||
type: 'string',
|
||||
rows: 4,
|
||||
description:
|
||||
'Each document loader comes with a default set of metadata keys that are extracted from the document. You can use this field to omit some of the default metadata keys. The value should be a list of keys, seperated by comma',
|
||||
placeholder: 'key1, key2, key3.nestedKey1',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
}
|
||||
@@ -59,8 +72,14 @@ class Csv_DocumentLoaders implements INode {
|
||||
const csvFileBase64 = nodeData.inputs?.csvFile as string
|
||||
const columnName = nodeData.inputs?.columnName as string
|
||||
const metadata = nodeData.inputs?.metadata
|
||||
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
|
||||
|
||||
let alldocs = []
|
||||
let omitMetadataKeys: string[] = []
|
||||
if (_omitMetadataKeys) {
|
||||
omitMetadataKeys = _omitMetadataKeys.split(',').map((key) => key.trim())
|
||||
}
|
||||
|
||||
let docs: IDocument[] = []
|
||||
let files: string[] = []
|
||||
|
||||
if (csvFileBase64.startsWith('FILE-STORAGE::')) {
|
||||
@@ -78,11 +97,9 @@ class Csv_DocumentLoaders implements INode {
|
||||
const loader = new CSVLoader(blob, columnName.trim().length === 0 ? undefined : columnName.trim())
|
||||
|
||||
if (textSplitter) {
|
||||
const docs = await loader.loadAndSplit(textSplitter)
|
||||
alldocs.push(...docs)
|
||||
docs.push(...(await loader.loadAndSplit(textSplitter)))
|
||||
} else {
|
||||
const docs = await loader.load()
|
||||
alldocs.push(...docs)
|
||||
docs.push(...(await loader.loadAndSplit(textSplitter)))
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@@ -100,32 +117,38 @@ class Csv_DocumentLoaders implements INode {
|
||||
const loader = new CSVLoader(blob, columnName.trim().length === 0 ? undefined : columnName.trim())
|
||||
|
||||
if (textSplitter) {
|
||||
const docs = await loader.loadAndSplit(textSplitter)
|
||||
alldocs.push(...docs)
|
||||
docs.push(...(await loader.loadAndSplit(textSplitter)))
|
||||
} else {
|
||||
const docs = await loader.load()
|
||||
alldocs.push(...docs)
|
||||
docs.push(...(await loader.load()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (metadata) {
|
||||
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
|
||||
let finaldocs = []
|
||||
for (const doc of alldocs) {
|
||||
const newdoc = {
|
||||
...doc,
|
||||
metadata: {
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata,
|
||||
...parsedMetadata
|
||||
}
|
||||
}
|
||||
finaldocs.push(newdoc)
|
||||
}
|
||||
return finaldocs
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
} else {
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
}
|
||||
|
||||
return alldocs
|
||||
return docs
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
+163
@@ -0,0 +1,163 @@
|
||||
import { ICommonObject, IDatabaseEntity, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
|
||||
import { NodeVM } from 'vm2'
|
||||
import { DataSource } from 'typeorm'
|
||||
import { availableDependencies, defaultAllowBuiltInDep, getVars, handleEscapeCharacters, prepareSandboxVars } from '../../../src/utils'
|
||||
|
||||
class CustomDocumentLoader_DocumentLoaders implements INode {
|
||||
label: string
|
||||
name: string
|
||||
version: number
|
||||
description: string
|
||||
type: string
|
||||
icon: string
|
||||
category: string
|
||||
badge: string
|
||||
baseClasses: string[]
|
||||
inputs: INodeParams[]
|
||||
outputs: INodeOutputsValue[]
|
||||
|
||||
constructor() {
|
||||
this.label = 'Custom Document Loader'
|
||||
this.name = 'customDocumentLoader'
|
||||
this.version = 1.0
|
||||
this.type = 'Document'
|
||||
this.icon = 'customDocLoader.svg'
|
||||
this.category = 'Document Loaders'
|
||||
this.badge = 'NEW'
|
||||
this.description = `Custom function for loading documents`
|
||||
this.baseClasses = [this.type]
|
||||
this.inputs = [
|
||||
{
|
||||
label: 'Input Variables',
|
||||
name: 'functionInputVariables',
|
||||
description: 'Input variables can be used in the function with prefix $. For example: $var',
|
||||
type: 'json',
|
||||
optional: true,
|
||||
acceptVariable: true,
|
||||
list: true
|
||||
},
|
||||
{
|
||||
label: 'Javascript Function',
|
||||
name: 'javascriptFunction',
|
||||
type: 'code',
|
||||
description: `Must return an array of document objects containing metadata and pageContent if "Document" is selected in the output. If "Text" is selected in the output, it must return a string.`,
|
||||
placeholder: `return [
|
||||
{
|
||||
pageContent: 'Document Content',
|
||||
metadata: {
|
||||
title: 'Document Title',
|
||||
}
|
||||
}
|
||||
]`
|
||||
}
|
||||
]
|
||||
this.outputs = [
|
||||
{
|
||||
label: 'Document',
|
||||
name: 'document',
|
||||
description: 'Array of document objects containing metadata and pageContent',
|
||||
baseClasses: [...this.baseClasses, 'json']
|
||||
},
|
||||
{
|
||||
label: 'Text',
|
||||
name: 'text',
|
||||
description: 'Concatenated string from pageContent of documents',
|
||||
baseClasses: ['string', 'json']
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
async init(nodeData: INodeData, input: string, options: ICommonObject): Promise<any> {
|
||||
const output = nodeData.outputs?.output as string
|
||||
const javascriptFunction = nodeData.inputs?.javascriptFunction as string
|
||||
const functionInputVariablesRaw = nodeData.inputs?.functionInputVariables
|
||||
const appDataSource = options.appDataSource as DataSource
|
||||
const databaseEntities = options.databaseEntities as IDatabaseEntity
|
||||
|
||||
const variables = await getVars(appDataSource, databaseEntities, nodeData)
|
||||
const flow = {
|
||||
chatflowId: options.chatflowid,
|
||||
sessionId: options.sessionId,
|
||||
chatId: options.chatId,
|
||||
input
|
||||
}
|
||||
|
||||
let inputVars: ICommonObject = {}
|
||||
if (functionInputVariablesRaw) {
|
||||
try {
|
||||
inputVars =
|
||||
typeof functionInputVariablesRaw === 'object' ? functionInputVariablesRaw : JSON.parse(functionInputVariablesRaw)
|
||||
} catch (exception) {
|
||||
throw new Error('Invalid JSON in the Custom Document Loader Input Variables: ' + exception)
|
||||
}
|
||||
}
|
||||
|
||||
// Some values might be a stringified JSON, parse it
|
||||
for (const key in inputVars) {
|
||||
let value = inputVars[key]
|
||||
if (typeof value === 'string') {
|
||||
value = handleEscapeCharacters(value, true)
|
||||
if (value.startsWith('{') && value.endsWith('}')) {
|
||||
try {
|
||||
value = JSON.parse(value)
|
||||
} catch (e) {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
inputVars[key] = value
|
||||
}
|
||||
}
|
||||
|
||||
let sandbox: any = { $input: input }
|
||||
sandbox['$vars'] = prepareSandboxVars(variables)
|
||||
sandbox['$flow'] = flow
|
||||
|
||||
if (Object.keys(inputVars).length) {
|
||||
for (const item in inputVars) {
|
||||
sandbox[`$${item}`] = inputVars[item]
|
||||
}
|
||||
}
|
||||
|
||||
const builtinDeps = process.env.TOOL_FUNCTION_BUILTIN_DEP
|
||||
? defaultAllowBuiltInDep.concat(process.env.TOOL_FUNCTION_BUILTIN_DEP.split(','))
|
||||
: defaultAllowBuiltInDep
|
||||
const externalDeps = process.env.TOOL_FUNCTION_EXTERNAL_DEP ? process.env.TOOL_FUNCTION_EXTERNAL_DEP.split(',') : []
|
||||
const deps = availableDependencies.concat(externalDeps)
|
||||
|
||||
const nodeVMOptions = {
|
||||
console: 'inherit',
|
||||
sandbox,
|
||||
require: {
|
||||
external: { modules: deps },
|
||||
builtin: builtinDeps
|
||||
}
|
||||
} as any
|
||||
|
||||
const vm = new NodeVM(nodeVMOptions)
|
||||
try {
|
||||
const response = await vm.run(`module.exports = async function() {${javascriptFunction}}()`, __dirname)
|
||||
|
||||
if (output === 'document' && Array.isArray(response)) {
|
||||
if (response.length === 0) return response
|
||||
if (
|
||||
response[0].pageContent &&
|
||||
typeof response[0].pageContent === 'string' &&
|
||||
response[0].metadata &&
|
||||
typeof response[0].metadata === 'object'
|
||||
)
|
||||
return response
|
||||
throw new Error('Document object must contain pageContent and metadata')
|
||||
}
|
||||
|
||||
if (output === 'text' && typeof response === 'string') {
|
||||
return handleEscapeCharacters(response, false)
|
||||
}
|
||||
|
||||
return response
|
||||
} catch (e) {
|
||||
throw new Error(e)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { nodeClass: CustomDocumentLoader_DocumentLoaders }
|
||||
@@ -0,0 +1 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="icon icon-tabler icons-tabler-outline icon-tabler-writing"><path stroke="none" d="M0 0h24v24H0z" fill="none"/><path d="M20 17v-12c0 -1.121 -.879 -2 -2 -2s-2 .879 -2 2v12l2 2l2 -2z" /><path d="M16 7h4" /><path d="M18 19h-13a2 2 0 1 1 0 -4h4a2 2 0 1 0 0 -4h-3" /></svg>
|
||||
|
After Width: | Height: | Size: 465 B |
@@ -0,0 +1,95 @@
|
||||
import { ICommonObject, IDatabaseEntity, INode, INodeData, INodeOptionsValue, INodeOutputsValue, INodeParams } from '../../../src/Interface'
|
||||
import { DataSource } from 'typeorm'
|
||||
import { Document } from '@langchain/core/documents'
|
||||
|
||||
class DocStore_DocumentLoaders implements INode {
|
||||
label: string
|
||||
name: string
|
||||
version: number
|
||||
description: string
|
||||
type: string
|
||||
icon: string
|
||||
category: string
|
||||
baseClasses: string[]
|
||||
inputs: INodeParams[]
|
||||
outputs: INodeOutputsValue[]
|
||||
badge: string
|
||||
|
||||
constructor() {
|
||||
this.label = 'Document Store'
|
||||
this.name = 'documentStore'
|
||||
this.version = 1.0
|
||||
this.type = 'Document'
|
||||
this.icon = 'dstore.svg'
|
||||
this.badge = 'NEW'
|
||||
this.category = 'Document Loaders'
|
||||
this.description = `Load data from pre-configured document stores`
|
||||
this.baseClasses = [this.type]
|
||||
this.inputs = [
|
||||
{
|
||||
label: 'Select Store',
|
||||
name: 'selectedStore',
|
||||
type: 'asyncOptions',
|
||||
loadMethod: 'listStores'
|
||||
}
|
||||
]
|
||||
this.outputs = [
|
||||
{
|
||||
label: 'Document',
|
||||
name: 'document',
|
||||
description: 'Array of document objects containing metadata and pageContent',
|
||||
baseClasses: [...this.baseClasses, 'json']
|
||||
},
|
||||
{
|
||||
label: 'Text',
|
||||
name: 'text',
|
||||
description: 'Concatenated string from pageContent of documents',
|
||||
baseClasses: ['string', 'json']
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
//@ts-ignore
|
||||
loadMethods = {
|
||||
async listStores(_: INodeData, options: ICommonObject): Promise<INodeOptionsValue[]> {
|
||||
const returnData: INodeOptionsValue[] = []
|
||||
|
||||
const appDataSource = options.appDataSource as DataSource
|
||||
const databaseEntities = options.databaseEntities as IDatabaseEntity
|
||||
|
||||
if (appDataSource === undefined || !appDataSource) {
|
||||
return returnData
|
||||
}
|
||||
|
||||
const stores = await appDataSource.getRepository(databaseEntities['DocumentStore']).find()
|
||||
for (const store of stores) {
|
||||
if (store.status === 'SYNC') {
|
||||
const obj = {
|
||||
name: store.id,
|
||||
label: store.name,
|
||||
description: store.description
|
||||
}
|
||||
returnData.push(obj)
|
||||
}
|
||||
}
|
||||
return returnData
|
||||
}
|
||||
}
|
||||
|
||||
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
|
||||
const selectedStore = nodeData.inputs?.selectedStore as string
|
||||
const appDataSource = options.appDataSource as DataSource
|
||||
const databaseEntities = options.databaseEntities as IDatabaseEntity
|
||||
const chunks = await appDataSource
|
||||
.getRepository(databaseEntities['DocumentStoreFileChunk'])
|
||||
.find({ where: { storeId: selectedStore } })
|
||||
|
||||
const finalDocs = []
|
||||
for (const chunk of chunks) {
|
||||
finalDocs.push(new Document({ pageContent: chunk.pageContent, metadata: JSON.parse(chunk.metadata) }))
|
||||
}
|
||||
return finalDocs
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { nodeClass: DocStore_DocumentLoaders }
|
||||
@@ -0,0 +1,15 @@
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
width="24"
|
||||
height="24"
|
||||
viewBox="0 0 24 24"
|
||||
fill="none"
|
||||
stroke="currentColor"
|
||||
stroke-width="2"
|
||||
stroke-linecap="round"
|
||||
stroke-linejoin="round"
|
||||
>
|
||||
<path d="M12 4l-8 4l8 4l8 -4l-8 -4" />
|
||||
<path d="M4 12l8 4l8 -4" />
|
||||
<path d="M4 16l8 4l8 -4" />
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 305 B |
@@ -1,4 +1,5 @@
|
||||
import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import { omit } from 'lodash'
|
||||
import { ICommonObject, IDocument, INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import { TextSplitter } from 'langchain/text_splitter'
|
||||
import { DocxLoader } from 'langchain/document_loaders/fs/docx'
|
||||
import { getFileFromStorage } from '../../../src'
|
||||
@@ -37,9 +38,21 @@ class Docx_DocumentLoaders implements INode {
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
label: 'Metadata',
|
||||
label: 'Additional Metadata',
|
||||
name: 'metadata',
|
||||
type: 'json',
|
||||
description: 'Additional metadata to be added to the extracted documents',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
},
|
||||
{
|
||||
label: 'Omit Metadata Keys',
|
||||
name: 'omitMetadataKeys',
|
||||
type: 'string',
|
||||
rows: 4,
|
||||
description:
|
||||
'Each document loader comes with a default set of metadata keys that are extracted from the document. You can use this field to omit some of the default metadata keys. The value should be a list of keys, seperated by comma',
|
||||
placeholder: 'key1, key2, key3.nestedKey1',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
}
|
||||
@@ -50,8 +63,14 @@ class Docx_DocumentLoaders implements INode {
|
||||
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
|
||||
const docxFileBase64 = nodeData.inputs?.docxFile as string
|
||||
const metadata = nodeData.inputs?.metadata
|
||||
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
|
||||
|
||||
let alldocs = []
|
||||
let omitMetadataKeys: string[] = []
|
||||
if (_omitMetadataKeys) {
|
||||
omitMetadataKeys = _omitMetadataKeys.split(',').map((key) => key.trim())
|
||||
}
|
||||
|
||||
let docs: IDocument[] = []
|
||||
let files: string[] = []
|
||||
|
||||
if (docxFileBase64.startsWith('FILE-STORAGE::')) {
|
||||
@@ -69,11 +88,9 @@ class Docx_DocumentLoaders implements INode {
|
||||
const loader = new DocxLoader(blob)
|
||||
|
||||
if (textSplitter) {
|
||||
const docs = await loader.loadAndSplit(textSplitter)
|
||||
alldocs.push(...docs)
|
||||
docs.push(...(await loader.loadAndSplit(textSplitter)))
|
||||
} else {
|
||||
const docs = await loader.load()
|
||||
alldocs.push(...docs)
|
||||
docs.push(...(await loader.load()))
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@@ -91,32 +108,38 @@ class Docx_DocumentLoaders implements INode {
|
||||
const loader = new DocxLoader(blob)
|
||||
|
||||
if (textSplitter) {
|
||||
const docs = await loader.loadAndSplit(textSplitter)
|
||||
alldocs.push(...docs)
|
||||
docs.push(...(await loader.loadAndSplit(textSplitter)))
|
||||
} else {
|
||||
const docs = await loader.load()
|
||||
alldocs.push(...docs)
|
||||
docs.push(...(await loader.load()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (metadata) {
|
||||
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
|
||||
let finaldocs = []
|
||||
for (const doc of alldocs) {
|
||||
const newdoc = {
|
||||
...doc,
|
||||
metadata: {
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata,
|
||||
...parsedMetadata
|
||||
}
|
||||
}
|
||||
finaldocs.push(newdoc)
|
||||
}
|
||||
return finaldocs
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
} else {
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
}
|
||||
|
||||
return alldocs
|
||||
return docs
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import { omit } from 'lodash'
|
||||
import { getCredentialData, getCredentialParam } from '../../../src'
|
||||
import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import { FigmaFileLoader, FigmaLoaderParams } from 'langchain/document_loaders/web/figma'
|
||||
@@ -60,9 +61,21 @@ class Figma_DocumentLoaders implements INode {
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
label: 'Metadata',
|
||||
label: 'Additional Metadata',
|
||||
name: 'metadata',
|
||||
type: 'json',
|
||||
description: 'Additional metadata to be added to the extracted documents',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
},
|
||||
{
|
||||
label: 'Omit Metadata Keys',
|
||||
name: 'omitMetadataKeys',
|
||||
type: 'string',
|
||||
rows: 4,
|
||||
description:
|
||||
'Each document loader comes with a default set of metadata keys that are extracted from the document. You can use this field to omit some of the default metadata keys. The value should be a list of keys, seperated by comma',
|
||||
placeholder: 'key1, key2, key3.nestedKey1',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
}
|
||||
@@ -74,6 +87,12 @@ class Figma_DocumentLoaders implements INode {
|
||||
const fileKey = nodeData.inputs?.fileKey as string
|
||||
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
|
||||
const metadata = nodeData.inputs?.metadata
|
||||
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
|
||||
|
||||
let omitMetadataKeys: string[] = []
|
||||
if (_omitMetadataKeys) {
|
||||
omitMetadataKeys = _omitMetadataKeys.split(',').map((key) => key.trim())
|
||||
}
|
||||
|
||||
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
|
||||
const accessToken = getCredentialParam('accessToken', credentialData, nodeData)
|
||||
@@ -86,19 +105,30 @@ class Figma_DocumentLoaders implements INode {
|
||||
|
||||
const loader = new FigmaFileLoader(figmaOptions)
|
||||
|
||||
const docs = textSplitter ? await loader.loadAndSplit() : await loader.load()
|
||||
let docs = textSplitter ? await loader.loadAndSplit() : await loader.load()
|
||||
|
||||
if (metadata) {
|
||||
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
|
||||
return docs.map((doc) => {
|
||||
return {
|
||||
...doc,
|
||||
metadata: {
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata,
|
||||
...parsedMetadata
|
||||
}
|
||||
}
|
||||
})
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
} else {
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
}
|
||||
|
||||
return docs
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import { omit } from 'lodash'
|
||||
import { INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import { TextSplitter } from 'langchain/text_splitter'
|
||||
import { TextLoader } from 'langchain/document_loaders/fs/text'
|
||||
@@ -65,9 +66,21 @@ class Folder_DocumentLoaders implements INode {
|
||||
additionalParams: true
|
||||
},
|
||||
{
|
||||
label: 'Metadata',
|
||||
label: 'Additional Metadata',
|
||||
name: 'metadata',
|
||||
type: 'json',
|
||||
description: 'Additional metadata to be added to the extracted documents',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
},
|
||||
{
|
||||
label: 'Omit Metadata Keys',
|
||||
name: 'omitMetadataKeys',
|
||||
type: 'string',
|
||||
rows: 4,
|
||||
description:
|
||||
'Each document loader comes with a default set of metadata keys that are extracted from the document. You can use this field to omit some of the default metadata keys. The value should be a list of keys, seperated by comma',
|
||||
placeholder: 'key1, key2, key3.nestedKey1',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
}
|
||||
@@ -80,6 +93,12 @@ class Folder_DocumentLoaders implements INode {
|
||||
const metadata = nodeData.inputs?.metadata
|
||||
const recursive = nodeData.inputs?.recursive as boolean
|
||||
const pdfUsage = nodeData.inputs?.pdfUsage
|
||||
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
|
||||
|
||||
let omitMetadataKeys: string[] = []
|
||||
if (_omitMetadataKeys) {
|
||||
omitMetadataKeys = _omitMetadataKeys.split(',').map((key) => key.trim())
|
||||
}
|
||||
|
||||
const loader = new DirectoryLoader(
|
||||
folderPath,
|
||||
@@ -141,18 +160,26 @@ class Folder_DocumentLoaders implements INode {
|
||||
|
||||
if (metadata) {
|
||||
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
|
||||
let finaldocs = []
|
||||
for (const doc of docs) {
|
||||
const newdoc = {
|
||||
...doc,
|
||||
metadata: {
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata,
|
||||
...parsedMetadata
|
||||
}
|
||||
}
|
||||
finaldocs.push(newdoc)
|
||||
}
|
||||
return finaldocs
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
} else {
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
}
|
||||
|
||||
return docs
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import { omit } from 'lodash'
|
||||
import { INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import { TextSplitter } from 'langchain/text_splitter'
|
||||
import { GitbookLoader } from 'langchain/document_loaders/web/gitbook'
|
||||
@@ -44,9 +45,21 @@ class Gitbook_DocumentLoaders implements INode {
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
label: 'Metadata',
|
||||
label: 'Additional Metadata',
|
||||
name: 'metadata',
|
||||
type: 'json',
|
||||
description: 'Additional metadata to be added to the extracted documents',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
},
|
||||
{
|
||||
label: 'Omit Metadata Keys',
|
||||
name: 'omitMetadataKeys',
|
||||
type: 'string',
|
||||
rows: 4,
|
||||
description:
|
||||
'Each document loader comes with a default set of metadata keys that are extracted from the document. You can use this field to omit some of the default metadata keys. The value should be a list of keys, seperated by comma',
|
||||
placeholder: 'key1, key2, key3.nestedKey1',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
}
|
||||
@@ -57,22 +70,39 @@ class Gitbook_DocumentLoaders implements INode {
|
||||
const shouldLoadAllPaths = nodeData.inputs?.shouldLoadAllPaths as boolean
|
||||
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
|
||||
const metadata = nodeData.inputs?.metadata
|
||||
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
|
||||
|
||||
let omitMetadataKeys: string[] = []
|
||||
if (_omitMetadataKeys) {
|
||||
omitMetadataKeys = _omitMetadataKeys.split(',').map((key) => key.trim())
|
||||
}
|
||||
|
||||
const loader = shouldLoadAllPaths ? new GitbookLoader(webPath, { shouldLoadAllPaths }) : new GitbookLoader(webPath)
|
||||
|
||||
const docs = textSplitter ? await loader.loadAndSplit() : await loader.load()
|
||||
let docs = textSplitter ? await loader.loadAndSplit() : await loader.load()
|
||||
|
||||
if (metadata) {
|
||||
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
|
||||
return docs.map((doc) => {
|
||||
return {
|
||||
...doc,
|
||||
metadata: {
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata,
|
||||
...parsedMetadata
|
||||
}
|
||||
}
|
||||
})
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
} else {
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
}
|
||||
|
||||
return docs
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import { omit } from 'lodash'
|
||||
import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import { TextSplitter } from 'langchain/text_splitter'
|
||||
import { GithubRepoLoader, GithubRepoLoaderParams } from 'langchain/document_loaders/web/github'
|
||||
@@ -86,9 +87,21 @@ class Github_DocumentLoaders implements INode {
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
label: 'Metadata',
|
||||
label: 'Additional Metadata',
|
||||
name: 'metadata',
|
||||
type: 'json',
|
||||
description: 'Additional metadata to be added to the extracted documents',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
},
|
||||
{
|
||||
label: 'Omit Metadata Keys',
|
||||
name: 'omitMetadataKeys',
|
||||
type: 'string',
|
||||
rows: 4,
|
||||
description:
|
||||
'Each document loader comes with a default set of metadata keys that are extracted from the document. You can use this field to omit some of the default metadata keys. The value should be a list of keys, seperated by comma',
|
||||
placeholder: 'key1, key2, key3.nestedKey1',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
}
|
||||
@@ -104,6 +117,12 @@ class Github_DocumentLoaders implements INode {
|
||||
const maxConcurrency = nodeData.inputs?.maxConcurrency as string
|
||||
const maxRetries = nodeData.inputs?.maxRetries as string
|
||||
const ignorePath = nodeData.inputs?.ignorePath as string
|
||||
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
|
||||
|
||||
let omitMetadataKeys: string[] = []
|
||||
if (_omitMetadataKeys) {
|
||||
omitMetadataKeys = _omitMetadataKeys.split(',').map((key) => key.trim())
|
||||
}
|
||||
|
||||
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
|
||||
const accessToken = getCredentialParam('accessToken', credentialData, nodeData)
|
||||
@@ -120,19 +139,30 @@ class Github_DocumentLoaders implements INode {
|
||||
if (ignorePath) githubOptions.ignorePaths = JSON.parse(ignorePath)
|
||||
|
||||
const loader = new GithubRepoLoader(repoLink, githubOptions)
|
||||
const docs = textSplitter ? await loader.loadAndSplit(textSplitter) : await loader.load()
|
||||
let docs = textSplitter ? await loader.loadAndSplit(textSplitter) : await loader.load()
|
||||
|
||||
if (metadata) {
|
||||
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
|
||||
return docs.map((doc) => {
|
||||
return {
|
||||
...doc,
|
||||
metadata: {
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata,
|
||||
...parsedMetadata
|
||||
}
|
||||
}
|
||||
})
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
} else {
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
}
|
||||
|
||||
return docs
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import { omit } from 'lodash'
|
||||
import { ICommonObject, IDocument, INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import { TextSplitter } from 'langchain/text_splitter'
|
||||
import { JSONLoader } from 'langchain/document_loaders/fs/json'
|
||||
import { getFileFromStorage } from '../../../src'
|
||||
@@ -45,9 +46,21 @@ class Json_DocumentLoaders implements INode {
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
label: 'Metadata',
|
||||
label: 'Additional Metadata',
|
||||
name: 'metadata',
|
||||
type: 'json',
|
||||
description: 'Additional metadata to be added to the extracted documents',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
},
|
||||
{
|
||||
label: 'Omit Metadata Keys',
|
||||
name: 'omitMetadataKeys',
|
||||
type: 'string',
|
||||
rows: 4,
|
||||
description:
|
||||
'Each document loader comes with a default set of metadata keys that are extracted from the document. You can use this field to omit some of the default metadata keys. The value should be a list of keys, seperated by comma',
|
||||
placeholder: 'key1, key2, key3.nestedKey1',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
}
|
||||
@@ -59,6 +72,12 @@ class Json_DocumentLoaders implements INode {
|
||||
const jsonFileBase64 = nodeData.inputs?.jsonFile as string
|
||||
const pointersName = nodeData.inputs?.pointersName as string
|
||||
const metadata = nodeData.inputs?.metadata
|
||||
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
|
||||
|
||||
let omitMetadataKeys: string[] = []
|
||||
if (_omitMetadataKeys) {
|
||||
omitMetadataKeys = _omitMetadataKeys.split(',').map((key) => key.trim())
|
||||
}
|
||||
|
||||
let pointers: string[] = []
|
||||
if (pointersName) {
|
||||
@@ -66,7 +85,7 @@ class Json_DocumentLoaders implements INode {
|
||||
pointers = outputString.split(',').map((pointer) => '/' + pointer.trim())
|
||||
}
|
||||
|
||||
let alldocs = []
|
||||
let docs: IDocument[] = []
|
||||
let files: string[] = []
|
||||
|
||||
//FILE-STORAGE::["CONTRIBUTING.md","LICENSE.md","README.md"]
|
||||
@@ -85,11 +104,9 @@ class Json_DocumentLoaders implements INode {
|
||||
const loader = new JSONLoader(blob, pointers.length != 0 ? pointers : undefined)
|
||||
|
||||
if (textSplitter) {
|
||||
const docs = await loader.loadAndSplit(textSplitter)
|
||||
alldocs.push(...docs)
|
||||
docs.push(...(await loader.loadAndSplit(textSplitter)))
|
||||
} else {
|
||||
const docs = await loader.load()
|
||||
alldocs.push(...docs)
|
||||
docs.push(...(await loader.load()))
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@@ -107,32 +124,38 @@ class Json_DocumentLoaders implements INode {
|
||||
const loader = new JSONLoader(blob, pointers.length != 0 ? pointers : undefined)
|
||||
|
||||
if (textSplitter) {
|
||||
const docs = await loader.loadAndSplit(textSplitter)
|
||||
alldocs.push(...docs)
|
||||
docs.push(...(await loader.loadAndSplit(textSplitter)))
|
||||
} else {
|
||||
const docs = await loader.load()
|
||||
alldocs.push(...docs)
|
||||
docs.push(...(await loader.load()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (metadata) {
|
||||
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
|
||||
let finaldocs = []
|
||||
for (const doc of alldocs) {
|
||||
const newdoc = {
|
||||
...doc,
|
||||
metadata: {
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata,
|
||||
...parsedMetadata
|
||||
}
|
||||
}
|
||||
finaldocs.push(newdoc)
|
||||
}
|
||||
return finaldocs
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
} else {
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
}
|
||||
|
||||
return alldocs
|
||||
return docs
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import { omit } from 'lodash'
|
||||
import { ICommonObject, IDocument, INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import { TextSplitter } from 'langchain/text_splitter'
|
||||
import { JSONLinesLoader } from 'langchain/document_loaders/fs/json'
|
||||
import { getFileFromStorage } from '../../../src'
|
||||
@@ -44,9 +45,21 @@ class Jsonlines_DocumentLoaders implements INode {
|
||||
optional: false
|
||||
},
|
||||
{
|
||||
label: 'Metadata',
|
||||
label: 'Additional Metadata',
|
||||
name: 'metadata',
|
||||
type: 'json',
|
||||
description: 'Additional metadata to be added to the extracted documents',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
},
|
||||
{
|
||||
label: 'Omit Metadata Keys',
|
||||
name: 'omitMetadataKeys',
|
||||
type: 'string',
|
||||
rows: 4,
|
||||
description:
|
||||
'Each document loader comes with a default set of metadata keys that are extracted from the document. You can use this field to omit some of the default metadata keys. The value should be a list of keys, seperated by comma',
|
||||
placeholder: 'key1, key2, key3.nestedKey1',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
}
|
||||
@@ -58,8 +71,14 @@ class Jsonlines_DocumentLoaders implements INode {
|
||||
const jsonLinesFileBase64 = nodeData.inputs?.jsonlinesFile as string
|
||||
const pointerName = nodeData.inputs?.pointerName as string
|
||||
const metadata = nodeData.inputs?.metadata
|
||||
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
|
||||
|
||||
let alldocs = []
|
||||
let omitMetadataKeys: string[] = []
|
||||
if (_omitMetadataKeys) {
|
||||
omitMetadataKeys = _omitMetadataKeys.split(',').map((key) => key.trim())
|
||||
}
|
||||
|
||||
let docs: IDocument[] = []
|
||||
let files: string[] = []
|
||||
|
||||
let pointer = '/' + pointerName.trim()
|
||||
@@ -79,11 +98,9 @@ class Jsonlines_DocumentLoaders implements INode {
|
||||
const loader = new JSONLinesLoader(blob, pointer)
|
||||
|
||||
if (textSplitter) {
|
||||
const docs = await loader.loadAndSplit(textSplitter)
|
||||
alldocs.push(...docs)
|
||||
docs.push(...(await loader.loadAndSplit(textSplitter)))
|
||||
} else {
|
||||
const docs = await loader.load()
|
||||
alldocs.push(...docs)
|
||||
docs.push(...(await loader.load()))
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@@ -101,32 +118,38 @@ class Jsonlines_DocumentLoaders implements INode {
|
||||
const loader = new JSONLinesLoader(blob, pointer)
|
||||
|
||||
if (textSplitter) {
|
||||
const docs = await loader.loadAndSplit(textSplitter)
|
||||
alldocs.push(...docs)
|
||||
docs.push(...(await loader.loadAndSplit(textSplitter)))
|
||||
} else {
|
||||
const docs = await loader.load()
|
||||
alldocs.push(...docs)
|
||||
docs.push(...(await loader.load()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (metadata) {
|
||||
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
|
||||
let finaldocs = []
|
||||
for (const doc of alldocs) {
|
||||
const newdoc = {
|
||||
...doc,
|
||||
metadata: {
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata,
|
||||
...parsedMetadata
|
||||
}
|
||||
}
|
||||
finaldocs.push(newdoc)
|
||||
}
|
||||
return finaldocs
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
} else {
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
}
|
||||
|
||||
return alldocs
|
||||
return docs
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import { omit } from 'lodash'
|
||||
import { ICommonObject, IDocument, INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import { TextSplitter } from 'langchain/text_splitter'
|
||||
import { NotionAPILoader, NotionAPILoaderOptions } from 'langchain/document_loaders/web/notionapi'
|
||||
import { getCredentialData, getCredentialParam } from '../../../src'
|
||||
@@ -44,9 +45,21 @@ class NotionDB_DocumentLoaders implements INode {
|
||||
description: 'If your URL looks like - https://www.notion.so/abcdefh?v=long_hash_2, then abcdefh is the database ID'
|
||||
},
|
||||
{
|
||||
label: 'Metadata',
|
||||
label: 'Additional Metadata',
|
||||
name: 'metadata',
|
||||
type: 'json',
|
||||
description: 'Additional metadata to be added to the extracted documents',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
},
|
||||
{
|
||||
label: 'Omit Metadata Keys',
|
||||
name: 'omitMetadataKeys',
|
||||
type: 'string',
|
||||
rows: 4,
|
||||
description:
|
||||
'Each document loader comes with a default set of metadata keys that are extracted from the document. You can use this field to omit some of the default metadata keys. The value should be a list of keys, seperated by comma',
|
||||
placeholder: 'key1, key2, key3.nestedKey1',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
}
|
||||
@@ -57,6 +70,12 @@ class NotionDB_DocumentLoaders implements INode {
|
||||
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
|
||||
const databaseId = nodeData.inputs?.databaseId as string
|
||||
const metadata = nodeData.inputs?.metadata
|
||||
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
|
||||
|
||||
let omitMetadataKeys: string[] = []
|
||||
if (_omitMetadataKeys) {
|
||||
omitMetadataKeys = _omitMetadataKeys.split(',').map((key) => key.trim())
|
||||
}
|
||||
|
||||
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
|
||||
const notionIntegrationToken = getCredentialParam('notionIntegrationToken', credentialData, nodeData)
|
||||
@@ -74,7 +93,7 @@ class NotionDB_DocumentLoaders implements INode {
|
||||
}
|
||||
const loader = new NotionAPILoader(obj)
|
||||
|
||||
let docs = []
|
||||
let docs: IDocument[] = []
|
||||
if (textSplitter) {
|
||||
docs = await loader.loadAndSplit(textSplitter)
|
||||
} else {
|
||||
@@ -83,18 +102,26 @@ class NotionDB_DocumentLoaders implements INode {
|
||||
|
||||
if (metadata) {
|
||||
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
|
||||
let finaldocs = []
|
||||
for (const doc of docs) {
|
||||
const newdoc = {
|
||||
...doc,
|
||||
metadata: {
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata,
|
||||
...parsedMetadata
|
||||
}
|
||||
}
|
||||
finaldocs.push(newdoc)
|
||||
}
|
||||
return finaldocs
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
} else {
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
}
|
||||
|
||||
return docs
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import { omit } from 'lodash'
|
||||
import { IDocument, INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import { TextSplitter } from 'langchain/text_splitter'
|
||||
import { NotionLoader } from 'langchain/document_loaders/fs/notion'
|
||||
|
||||
@@ -37,9 +38,21 @@ class NotionFolder_DocumentLoaders implements INode {
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
label: 'Metadata',
|
||||
label: 'Additional Metadata',
|
||||
name: 'metadata',
|
||||
type: 'json',
|
||||
description: 'Additional metadata to be added to the extracted documents',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
},
|
||||
{
|
||||
label: 'Omit Metadata Keys',
|
||||
name: 'omitMetadataKeys',
|
||||
type: 'string',
|
||||
rows: 4,
|
||||
description:
|
||||
'Each document loader comes with a default set of metadata keys that are extracted from the document. You can use this field to omit some of the default metadata keys. The value should be a list of keys, seperated by comma',
|
||||
placeholder: 'key1, key2, key3.nestedKey1',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
}
|
||||
@@ -50,9 +63,15 @@ class NotionFolder_DocumentLoaders implements INode {
|
||||
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
|
||||
const notionFolder = nodeData.inputs?.notionFolder as string
|
||||
const metadata = nodeData.inputs?.metadata
|
||||
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
|
||||
|
||||
let omitMetadataKeys: string[] = []
|
||||
if (_omitMetadataKeys) {
|
||||
omitMetadataKeys = _omitMetadataKeys.split(',').map((key) => key.trim())
|
||||
}
|
||||
|
||||
const loader = new NotionLoader(notionFolder)
|
||||
let docs = []
|
||||
let docs: IDocument[] = []
|
||||
|
||||
if (textSplitter) {
|
||||
docs = await loader.loadAndSplit(textSplitter)
|
||||
@@ -62,18 +81,26 @@ class NotionFolder_DocumentLoaders implements INode {
|
||||
|
||||
if (metadata) {
|
||||
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
|
||||
let finaldocs = []
|
||||
for (const doc of docs) {
|
||||
const newdoc = {
|
||||
...doc,
|
||||
metadata: {
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata,
|
||||
...parsedMetadata
|
||||
}
|
||||
}
|
||||
finaldocs.push(newdoc)
|
||||
}
|
||||
return finaldocs
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
} else {
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
}
|
||||
|
||||
return docs
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import { omit } from 'lodash'
|
||||
import { ICommonObject, IDocument, INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import { TextSplitter } from 'langchain/text_splitter'
|
||||
import { NotionAPILoader, NotionAPILoaderOptions } from 'langchain/document_loaders/web/notionapi'
|
||||
import { getCredentialData, getCredentialParam } from '../../../src'
|
||||
@@ -45,9 +46,21 @@ class NotionPage_DocumentLoaders implements INode {
|
||||
'The last The 32 char hex in the url path. For example: https://www.notion.so/skarard/LangChain-Notion-API-b34ca03f219c4420a6046fc4bdfdf7b4, b34ca03f219c4420a6046fc4bdfdf7b4 is the Page ID'
|
||||
},
|
||||
{
|
||||
label: 'Metadata',
|
||||
label: 'Additional Metadata',
|
||||
name: 'metadata',
|
||||
type: 'json',
|
||||
description: 'Additional metadata to be added to the extracted documents',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
},
|
||||
{
|
||||
label: 'Omit Metadata Keys',
|
||||
name: 'omitMetadataKeys',
|
||||
type: 'string',
|
||||
rows: 4,
|
||||
description:
|
||||
'Each document loader comes with a default set of metadata keys that are extracted from the document. You can use this field to omit some of the default metadata keys. The value should be a list of keys, seperated by comma',
|
||||
placeholder: 'key1, key2, key3.nestedKey1',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
}
|
||||
@@ -58,6 +71,12 @@ class NotionPage_DocumentLoaders implements INode {
|
||||
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
|
||||
const pageId = nodeData.inputs?.pageId as string
|
||||
const metadata = nodeData.inputs?.metadata
|
||||
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
|
||||
|
||||
let omitMetadataKeys: string[] = []
|
||||
if (_omitMetadataKeys) {
|
||||
omitMetadataKeys = _omitMetadataKeys.split(',').map((key) => key.trim())
|
||||
}
|
||||
|
||||
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
|
||||
const notionIntegrationToken = getCredentialParam('notionIntegrationToken', credentialData, nodeData)
|
||||
@@ -71,7 +90,7 @@ class NotionPage_DocumentLoaders implements INode {
|
||||
}
|
||||
const loader = new NotionAPILoader(obj)
|
||||
|
||||
let docs = []
|
||||
let docs: IDocument[] = []
|
||||
if (textSplitter) {
|
||||
docs = await loader.loadAndSplit(textSplitter)
|
||||
} else {
|
||||
@@ -80,18 +99,26 @@ class NotionPage_DocumentLoaders implements INode {
|
||||
|
||||
if (metadata) {
|
||||
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
|
||||
let finaldocs = []
|
||||
for (const doc of docs) {
|
||||
const newdoc = {
|
||||
...doc,
|
||||
metadata: {
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata,
|
||||
...parsedMetadata
|
||||
}
|
||||
}
|
||||
finaldocs.push(newdoc)
|
||||
}
|
||||
return finaldocs
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
} else {
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
}
|
||||
|
||||
return docs
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import { omit } from 'lodash'
|
||||
import { IDocument, ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import { TextSplitter } from 'langchain/text_splitter'
|
||||
import { PDFLoader } from 'langchain/document_loaders/fs/pdf'
|
||||
import { getFileFromStorage } from '../../../src'
|
||||
@@ -60,9 +61,21 @@ class Pdf_DocumentLoaders implements INode {
|
||||
additionalParams: true
|
||||
},
|
||||
{
|
||||
label: 'Metadata',
|
||||
label: 'Additional Metadata',
|
||||
name: 'metadata',
|
||||
type: 'json',
|
||||
description: 'Additional metadata to be added to the extracted documents',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
},
|
||||
{
|
||||
label: 'Omit Metadata Keys',
|
||||
name: 'omitMetadataKeys',
|
||||
type: 'string',
|
||||
rows: 4,
|
||||
description:
|
||||
'Each document loader comes with a default set of metadata keys that are extracted from the document. You can use this field to omit some of the default metadata keys. The value should be a list of keys, seperated by comma',
|
||||
placeholder: 'key1, key2, key3.nestedKey1',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
}
|
||||
@@ -75,8 +88,14 @@ class Pdf_DocumentLoaders implements INode {
|
||||
const usage = nodeData.inputs?.usage as string
|
||||
const metadata = nodeData.inputs?.metadata
|
||||
const legacyBuild = nodeData.inputs?.legacyBuild as boolean
|
||||
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
|
||||
|
||||
let alldocs: any[] = []
|
||||
let omitMetadataKeys: string[] = []
|
||||
if (_omitMetadataKeys) {
|
||||
omitMetadataKeys = _omitMetadataKeys.split(',').map((key) => key.trim())
|
||||
}
|
||||
|
||||
let docs: IDocument[] = []
|
||||
let files: string[] = []
|
||||
|
||||
//FILE-STORAGE::["CONTRIBUTING.md","LICENSE.md","README.md"]
|
||||
@@ -92,7 +111,7 @@ class Pdf_DocumentLoaders implements INode {
|
||||
for (const file of files) {
|
||||
const fileData = await getFileFromStorage(file, chatflowid)
|
||||
const bf = Buffer.from(fileData)
|
||||
await this.extractDocs(usage, bf, legacyBuild, textSplitter, alldocs)
|
||||
await this.extractDocs(usage, bf, legacyBuild, textSplitter, docs)
|
||||
}
|
||||
} else {
|
||||
if (pdfFileBase64.startsWith('[') && pdfFileBase64.endsWith(']')) {
|
||||
@@ -105,30 +124,38 @@ class Pdf_DocumentLoaders implements INode {
|
||||
const splitDataURI = file.split(',')
|
||||
splitDataURI.pop()
|
||||
const bf = Buffer.from(splitDataURI.pop() || '', 'base64')
|
||||
await this.extractDocs(usage, bf, legacyBuild, textSplitter, alldocs)
|
||||
await this.extractDocs(usage, bf, legacyBuild, textSplitter, docs)
|
||||
}
|
||||
}
|
||||
|
||||
if (metadata) {
|
||||
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
|
||||
let finaldocs = []
|
||||
for (const doc of alldocs) {
|
||||
const newdoc = {
|
||||
...doc,
|
||||
metadata: {
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata,
|
||||
...parsedMetadata
|
||||
}
|
||||
}
|
||||
finaldocs.push(newdoc)
|
||||
}
|
||||
return finaldocs
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
} else {
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
}
|
||||
|
||||
return alldocs
|
||||
return docs
|
||||
}
|
||||
|
||||
private async extractDocs(usage: string, bf: Buffer, legacyBuild: boolean, textSplitter: TextSplitter, alldocs: any[]) {
|
||||
private async extractDocs(usage: string, bf: Buffer, legacyBuild: boolean, textSplitter: TextSplitter, docs: IDocument[]) {
|
||||
if (usage === 'perFile') {
|
||||
const loader = new PDFLoader(new Blob([bf]), {
|
||||
splitPages: false,
|
||||
@@ -137,11 +164,9 @@ class Pdf_DocumentLoaders implements INode {
|
||||
legacyBuild ? import('pdfjs-dist/legacy/build/pdf.js') : import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js')
|
||||
})
|
||||
if (textSplitter) {
|
||||
const docs = await loader.loadAndSplit(textSplitter)
|
||||
alldocs.push(...docs)
|
||||
docs.push(...(await loader.loadAndSplit(textSplitter)))
|
||||
} else {
|
||||
const docs = await loader.load()
|
||||
alldocs.push(...docs)
|
||||
docs.push(...(await loader.load()))
|
||||
}
|
||||
} else {
|
||||
const loader = new PDFLoader(new Blob([bf]), {
|
||||
@@ -150,11 +175,9 @@ class Pdf_DocumentLoaders implements INode {
|
||||
legacyBuild ? import('pdfjs-dist/legacy/build/pdf.js') : import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js')
|
||||
})
|
||||
if (textSplitter) {
|
||||
const docs = await loader.loadAndSplit(textSplitter)
|
||||
alldocs.push(...docs)
|
||||
docs.push(...(await loader.loadAndSplit(textSplitter)))
|
||||
} else {
|
||||
const docs = await loader.load()
|
||||
alldocs.push(...docs)
|
||||
docs.push(...(await loader.load()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
|
||||
import { omit } from 'lodash'
|
||||
import { IDocument, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
|
||||
import { TextSplitter } from 'langchain/text_splitter'
|
||||
import { Document } from '@langchain/core/documents'
|
||||
import { handleEscapeCharacters } from '../../../src'
|
||||
@@ -40,9 +41,21 @@ class PlainText_DocumentLoaders implements INode {
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
label: 'Metadata',
|
||||
label: 'Additional Metadata',
|
||||
name: 'metadata',
|
||||
type: 'json',
|
||||
description: 'Additional metadata to be added to the extracted documents',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
},
|
||||
{
|
||||
label: 'Omit Metadata Keys',
|
||||
name: 'omitMetadataKeys',
|
||||
type: 'string',
|
||||
rows: 4,
|
||||
description:
|
||||
'Each document loader comes with a default set of metadata keys that are extracted from the document. You can use this field to omit some of the default metadata keys. The value should be a list of keys, seperated by comma',
|
||||
placeholder: 'key1, key2, key3.nestedKey1',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
}
|
||||
@@ -68,42 +81,54 @@ class PlainText_DocumentLoaders implements INode {
|
||||
const text = nodeData.inputs?.text as string
|
||||
const metadata = nodeData.inputs?.metadata
|
||||
const output = nodeData.outputs?.output as string
|
||||
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
|
||||
|
||||
let alldocs: Document<Record<string, any>>[] = []
|
||||
let omitMetadataKeys: string[] = []
|
||||
if (_omitMetadataKeys) {
|
||||
omitMetadataKeys = _omitMetadataKeys.split(',').map((key) => key.trim())
|
||||
}
|
||||
|
||||
let docs: IDocument[] = []
|
||||
|
||||
if (textSplitter) {
|
||||
const docs = await textSplitter.createDocuments([text])
|
||||
alldocs.push(...docs)
|
||||
docs.push(...(await textSplitter.createDocuments([text])))
|
||||
} else {
|
||||
alldocs.push(
|
||||
docs.push(
|
||||
new Document({
|
||||
pageContent: text
|
||||
})
|
||||
)
|
||||
}
|
||||
|
||||
let finaldocs: Document<Record<string, any>>[] = []
|
||||
if (metadata) {
|
||||
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
|
||||
for (const doc of alldocs) {
|
||||
const newdoc = {
|
||||
...doc,
|
||||
metadata: {
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata,
|
||||
...parsedMetadata
|
||||
}
|
||||
}
|
||||
finaldocs.push(newdoc)
|
||||
}
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
} else {
|
||||
finaldocs = alldocs
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
}
|
||||
|
||||
if (output === 'document') {
|
||||
return finaldocs
|
||||
return docs
|
||||
} else {
|
||||
let finaltext = ''
|
||||
for (const doc of finaldocs) {
|
||||
for (const doc of docs) {
|
||||
finaltext += `${doc.pageContent}\n`
|
||||
}
|
||||
return handleEscapeCharacters(finaltext, false)
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import { omit } from 'lodash'
|
||||
import { ICommonObject, IDocument, INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import { TextSplitter } from 'langchain/text_splitter'
|
||||
import { Browser, Page, PlaywrightWebBaseLoader, PlaywrightWebBaseLoaderOptions } from 'langchain/document_loaders/web/playwright'
|
||||
import { test } from 'linkifyjs'
|
||||
@@ -53,6 +54,7 @@ class Playwright_DocumentLoaders implements INode {
|
||||
description: 'Scrape relative links from XML sitemap URL'
|
||||
}
|
||||
],
|
||||
default: 'webCrawl',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
},
|
||||
@@ -106,9 +108,21 @@ class Playwright_DocumentLoaders implements INode {
|
||||
description: 'CSS selectors like .div or #div'
|
||||
},
|
||||
{
|
||||
label: 'Metadata',
|
||||
label: 'Additional Metadata',
|
||||
name: 'metadata',
|
||||
type: 'json',
|
||||
description: 'Additional metadata to be added to the extracted documents',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
},
|
||||
{
|
||||
label: 'Omit Metadata Keys',
|
||||
name: 'omitMetadataKeys',
|
||||
type: 'string',
|
||||
rows: 4,
|
||||
description:
|
||||
'Each document loader comes with a default set of metadata keys that are extracted from the document. You can use this field to omit some of the default metadata keys. The value should be a list of keys, seperated by comma',
|
||||
placeholder: 'key1, key2, key3.nestedKey1',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
}
|
||||
@@ -123,6 +137,12 @@ class Playwright_DocumentLoaders implements INode {
|
||||
let limit = parseInt(nodeData.inputs?.limit as string)
|
||||
let waitUntilGoToOption = nodeData.inputs?.waitUntilGoToOption as 'load' | 'domcontentloaded' | 'networkidle' | 'commit' | undefined
|
||||
let waitForSelector = nodeData.inputs?.waitForSelector as string
|
||||
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
|
||||
|
||||
let omitMetadataKeys: string[] = []
|
||||
if (_omitMetadataKeys) {
|
||||
omitMetadataKeys = _omitMetadataKeys.split(',').map((key) => key.trim())
|
||||
}
|
||||
|
||||
let url = nodeData.inputs?.url as string
|
||||
url = url.trim()
|
||||
@@ -164,7 +184,7 @@ class Playwright_DocumentLoaders implements INode {
|
||||
}
|
||||
}
|
||||
|
||||
let docs = []
|
||||
let docs: IDocument[] = []
|
||||
if (relativeLinksMethod) {
|
||||
if (process.env.DEBUG === 'true') options.logger.info(`Start ${relativeLinksMethod}`)
|
||||
// if limit is 0 we don't want it to default to 10 so we check explicitly for null or undefined
|
||||
@@ -195,18 +215,26 @@ class Playwright_DocumentLoaders implements INode {
|
||||
|
||||
if (metadata) {
|
||||
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
|
||||
let finaldocs = []
|
||||
for (const doc of docs) {
|
||||
const newdoc = {
|
||||
...doc,
|
||||
metadata: {
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata,
|
||||
...parsedMetadata
|
||||
}
|
||||
}
|
||||
finaldocs.push(newdoc)
|
||||
}
|
||||
return finaldocs
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
} else {
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
}
|
||||
|
||||
return docs
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import { omit } from 'lodash'
|
||||
import { ICommonObject, IDocument, INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import { TextSplitter } from 'langchain/text_splitter'
|
||||
import { Browser, Page, PuppeteerWebBaseLoader, PuppeteerWebBaseLoaderOptions } from 'langchain/document_loaders/web/puppeteer'
|
||||
import { test } from 'linkifyjs'
|
||||
@@ -54,6 +55,7 @@ class Puppeteer_DocumentLoaders implements INode {
|
||||
description: 'Scrape relative links from XML sitemap URL'
|
||||
}
|
||||
],
|
||||
default: 'webCrawl',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
},
|
||||
@@ -107,9 +109,21 @@ class Puppeteer_DocumentLoaders implements INode {
|
||||
description: 'CSS selectors like .div or #div'
|
||||
},
|
||||
{
|
||||
label: 'Metadata',
|
||||
label: 'Additional Metadata',
|
||||
name: 'metadata',
|
||||
type: 'json',
|
||||
description: 'Additional metadata to be added to the extracted documents',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
},
|
||||
{
|
||||
label: 'Omit Metadata Keys',
|
||||
name: 'omitMetadataKeys',
|
||||
type: 'string',
|
||||
rows: 4,
|
||||
description:
|
||||
'Each document loader comes with a default set of metadata keys that are extracted from the document. You can use this field to omit some of the default metadata keys. The value should be a list of keys, seperated by comma',
|
||||
placeholder: 'key1, key2, key3.nestedKey1',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
}
|
||||
@@ -124,6 +138,12 @@ class Puppeteer_DocumentLoaders implements INode {
|
||||
let limit = parseInt(nodeData.inputs?.limit as string)
|
||||
let waitUntilGoToOption = nodeData.inputs?.waitUntilGoToOption as PuppeteerLifeCycleEvent
|
||||
let waitForSelector = nodeData.inputs?.waitForSelector as string
|
||||
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
|
||||
|
||||
let omitMetadataKeys: string[] = []
|
||||
if (_omitMetadataKeys) {
|
||||
omitMetadataKeys = _omitMetadataKeys.split(',').map((key) => key.trim())
|
||||
}
|
||||
|
||||
let url = nodeData.inputs?.url as string
|
||||
url = url.trim()
|
||||
@@ -165,7 +185,7 @@ class Puppeteer_DocumentLoaders implements INode {
|
||||
}
|
||||
}
|
||||
|
||||
let docs = []
|
||||
let docs: IDocument[] = []
|
||||
if (relativeLinksMethod) {
|
||||
if (process.env.DEBUG === 'true') options.logger.info(`Start ${relativeLinksMethod}`)
|
||||
// if limit is 0 we don't want it to default to 10 so we check explicitly for null or undefined
|
||||
@@ -196,18 +216,26 @@ class Puppeteer_DocumentLoaders implements INode {
|
||||
|
||||
if (metadata) {
|
||||
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
|
||||
let finaldocs = []
|
||||
for (const doc of docs) {
|
||||
const newdoc = {
|
||||
...doc,
|
||||
metadata: {
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata,
|
||||
...parsedMetadata
|
||||
}
|
||||
}
|
||||
finaldocs.push(newdoc)
|
||||
}
|
||||
return finaldocs
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
} else {
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
}
|
||||
|
||||
return docs
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import { omit } from 'lodash'
|
||||
import { ICommonObject, INode, INodeData, INodeOptionsValue, INodeParams } from '../../../src/Interface'
|
||||
import { S3Loader } from 'langchain/document_loaders/web/s3'
|
||||
import {
|
||||
@@ -413,9 +414,21 @@ class S3_DocumentLoaders implements INode {
|
||||
default: '500'
|
||||
},
|
||||
{
|
||||
label: 'Metadata',
|
||||
label: 'Additional Metadata',
|
||||
name: 'metadata',
|
||||
type: 'json',
|
||||
description: 'Additional metadata to be added to the extracted documents',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
},
|
||||
{
|
||||
label: 'Omit Metadata Keys',
|
||||
name: 'omitMetadataKeys',
|
||||
type: 'string',
|
||||
rows: 4,
|
||||
description:
|
||||
'Each document loader comes with a default set of metadata keys that are extracted from the document. You can use this field to omit some of the default metadata keys. The value should be a list of keys, seperated by comma',
|
||||
placeholder: 'key1, key2, key3.nestedKey1',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
}
|
||||
@@ -451,6 +464,12 @@ class S3_DocumentLoaders implements INode {
|
||||
const combineUnderNChars = nodeData.inputs?.combineUnderNChars as number
|
||||
const newAfterNChars = nodeData.inputs?.newAfterNChars as number
|
||||
const maxCharacters = nodeData.inputs?.maxCharacters as number
|
||||
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
|
||||
|
||||
let omitMetadataKeys: string[] = []
|
||||
if (_omitMetadataKeys) {
|
||||
omitMetadataKeys = _omitMetadataKeys.split(',').map((key) => key.trim())
|
||||
}
|
||||
|
||||
let credentials: S3ClientConfig['credentials'] | undefined
|
||||
|
||||
@@ -542,19 +561,25 @@ class S3_DocumentLoaders implements INode {
|
||||
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: {
|
||||
...doc.metadata,
|
||||
...parsedMetadata,
|
||||
[sourceIdKey]: doc.metadata[sourceIdKey] || sourceIdKey
|
||||
}
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata,
|
||||
...parsedMetadata,
|
||||
[sourceIdKey]: doc.metadata[sourceIdKey] || sourceIdKey
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
} else {
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: {
|
||||
...doc.metadata,
|
||||
[sourceIdKey]: doc.metadata[sourceIdKey] || sourceIdKey
|
||||
}
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata,
|
||||
[sourceIdKey]: doc.metadata[sourceIdKey] || sourceIdKey
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
}
|
||||
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import { omit } from 'lodash'
|
||||
import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import { TextSplitter } from 'langchain/text_splitter'
|
||||
import { SearchApiLoader } from 'langchain/document_loaders/web/searchapi'
|
||||
@@ -54,9 +55,21 @@ class SearchAPI_DocumentLoaders implements INode {
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
label: 'Metadata',
|
||||
label: 'Additional Metadata',
|
||||
name: 'metadata',
|
||||
type: 'json',
|
||||
description: 'Additional metadata to be added to the extracted documents',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
},
|
||||
{
|
||||
label: 'Omit Metadata Keys',
|
||||
name: 'omitMetadataKeys',
|
||||
type: 'string',
|
||||
rows: 4,
|
||||
description:
|
||||
'Each document loader comes with a default set of metadata keys that are extracted from the document. You can use this field to omit some of the default metadata keys. The value should be a list of keys, seperated by comma',
|
||||
placeholder: 'key1, key2, key3.nestedKey1',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
}
|
||||
@@ -68,6 +81,12 @@ class SearchAPI_DocumentLoaders implements INode {
|
||||
const query = nodeData.inputs?.query as string
|
||||
const customParameters = nodeData.inputs?.customParameters
|
||||
const metadata = nodeData.inputs?.metadata
|
||||
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
|
||||
|
||||
let omitMetadataKeys: string[] = []
|
||||
if (_omitMetadataKeys) {
|
||||
omitMetadataKeys = _omitMetadataKeys.split(',').map((key) => key.trim())
|
||||
}
|
||||
|
||||
// Fetch the API credentials for this node
|
||||
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
|
||||
@@ -87,19 +106,30 @@ class SearchAPI_DocumentLoaders implements INode {
|
||||
const loader = new SearchApiLoader(loaderConfig)
|
||||
|
||||
// Fetch documents, split if a text splitter is provided
|
||||
const docs = textSplitter ? await loader.loadAndSplit() : await loader.load()
|
||||
let docs = textSplitter ? await loader.loadAndSplit() : await loader.load()
|
||||
|
||||
if (metadata) {
|
||||
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
|
||||
return docs.map((doc) => {
|
||||
return {
|
||||
...doc,
|
||||
metadata: {
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata,
|
||||
...parsedMetadata
|
||||
}
|
||||
}
|
||||
})
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
} else {
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
}
|
||||
|
||||
return docs
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import { omit } from 'lodash'
|
||||
import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import { TextSplitter } from 'langchain/text_splitter'
|
||||
import { SerpAPILoader } from 'langchain/document_loaders/web/serpapi'
|
||||
@@ -44,9 +45,21 @@ class SerpAPI_DocumentLoaders implements INode {
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
label: 'Metadata',
|
||||
label: 'Additional Metadata',
|
||||
name: 'metadata',
|
||||
type: 'json',
|
||||
description: 'Additional metadata to be added to the extracted documents',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
},
|
||||
{
|
||||
label: 'Omit Metadata Keys',
|
||||
name: 'omitMetadataKeys',
|
||||
type: 'string',
|
||||
rows: 4,
|
||||
description:
|
||||
'Each document loader comes with a default set of metadata keys that are extracted from the document. You can use this field to omit some of the default metadata keys. The value should be a list of keys, seperated by comma',
|
||||
placeholder: 'key1, key2, key3.nestedKey1',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
}
|
||||
@@ -57,23 +70,40 @@ class SerpAPI_DocumentLoaders implements INode {
|
||||
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
|
||||
const query = nodeData.inputs?.query as string
|
||||
const metadata = nodeData.inputs?.metadata
|
||||
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
|
||||
|
||||
let omitMetadataKeys: string[] = []
|
||||
if (_omitMetadataKeys) {
|
||||
omitMetadataKeys = _omitMetadataKeys.split(',').map((key) => key.trim())
|
||||
}
|
||||
|
||||
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
|
||||
const serpApiKey = getCredentialParam('serpApiKey', credentialData, nodeData)
|
||||
const loader = new SerpAPILoader({ q: query, apiKey: serpApiKey })
|
||||
const docs = textSplitter ? await loader.loadAndSplit() : await loader.load()
|
||||
let docs = textSplitter ? await loader.loadAndSplit() : await loader.load()
|
||||
|
||||
if (metadata) {
|
||||
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
|
||||
return docs.map((doc) => {
|
||||
return {
|
||||
...doc,
|
||||
metadata: {
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata,
|
||||
...parsedMetadata
|
||||
}
|
||||
}
|
||||
})
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
} else {
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
}
|
||||
|
||||
return docs
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
|
||||
import { omit } from 'lodash'
|
||||
import { ICommonObject, IDocument, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
|
||||
import { TextSplitter } from 'langchain/text_splitter'
|
||||
import { TextLoader } from 'langchain/document_loaders/fs/text'
|
||||
import { Document } from '@langchain/core/documents'
|
||||
import { getFileFromStorage, handleEscapeCharacters } from '../../../src'
|
||||
|
||||
class Text_DocumentLoaders implements INode {
|
||||
@@ -40,9 +40,21 @@ class Text_DocumentLoaders implements INode {
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
label: 'Metadata',
|
||||
label: 'Additional Metadata',
|
||||
name: 'metadata',
|
||||
type: 'json',
|
||||
description: 'Additional metadata to be added to the extracted documents',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
},
|
||||
{
|
||||
label: 'Omit Metadata Keys',
|
||||
name: 'omitMetadataKeys',
|
||||
type: 'string',
|
||||
rows: 4,
|
||||
description:
|
||||
'Each document loader comes with a default set of metadata keys that are extracted from the document. You can use this field to omit some of the default metadata keys. The value should be a list of keys, seperated by comma',
|
||||
placeholder: 'key1, key2, key3.nestedKey1',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
}
|
||||
@@ -68,8 +80,14 @@ class Text_DocumentLoaders implements INode {
|
||||
const txtFileBase64 = nodeData.inputs?.txtFile as string
|
||||
const metadata = nodeData.inputs?.metadata
|
||||
const output = nodeData.outputs?.output as string
|
||||
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
|
||||
|
||||
let alldocs = []
|
||||
let omitMetadataKeys: string[] = []
|
||||
if (_omitMetadataKeys) {
|
||||
omitMetadataKeys = _omitMetadataKeys.split(',').map((key) => key.trim())
|
||||
}
|
||||
|
||||
let docs: IDocument[] = []
|
||||
let files: string[] = []
|
||||
|
||||
//FILE-STORAGE::["CONTRIBUTING.md","LICENSE.md","README.md"]
|
||||
@@ -88,11 +106,9 @@ class Text_DocumentLoaders implements INode {
|
||||
const loader = new TextLoader(blob)
|
||||
|
||||
if (textSplitter) {
|
||||
const docs = await loader.loadAndSplit(textSplitter)
|
||||
alldocs.push(...docs)
|
||||
docs.push(...(await loader.loadAndSplit(textSplitter)))
|
||||
} else {
|
||||
const docs = await loader.load()
|
||||
alldocs.push(...docs)
|
||||
docs.push(...(await loader.load()))
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@@ -110,37 +126,42 @@ class Text_DocumentLoaders implements INode {
|
||||
const loader = new TextLoader(blob)
|
||||
|
||||
if (textSplitter) {
|
||||
const docs = await loader.loadAndSplit(textSplitter)
|
||||
alldocs.push(...docs)
|
||||
docs.push(...(await loader.loadAndSplit(textSplitter)))
|
||||
} else {
|
||||
const docs = await loader.load()
|
||||
alldocs.push(...docs)
|
||||
docs.push(...(await loader.load()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let finaldocs: Document<Record<string, any>>[] = []
|
||||
if (metadata) {
|
||||
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
|
||||
for (const doc of alldocs) {
|
||||
const newdoc = {
|
||||
...doc,
|
||||
metadata: {
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata,
|
||||
...parsedMetadata
|
||||
}
|
||||
}
|
||||
finaldocs.push(newdoc)
|
||||
}
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
} else {
|
||||
finaldocs = alldocs
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
}
|
||||
|
||||
if (output === 'document') {
|
||||
return finaldocs
|
||||
return docs
|
||||
} else {
|
||||
let finaltext = ''
|
||||
for (const doc of finaldocs) {
|
||||
for (const doc of docs) {
|
||||
finaltext += `${doc.pageContent}\n`
|
||||
}
|
||||
return handleEscapeCharacters(finaltext, false)
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import { omit } from 'lodash'
|
||||
import { ICommonObject, IDocument, INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import {
|
||||
UnstructuredLoaderOptions,
|
||||
UnstructuredLoaderStrategy,
|
||||
@@ -400,9 +401,21 @@ class UnstructuredFile_DocumentLoaders implements INode {
|
||||
default: '500'
|
||||
},
|
||||
{
|
||||
label: 'Metadata',
|
||||
label: 'Additional Metadata',
|
||||
name: 'metadata',
|
||||
type: 'json',
|
||||
description: 'Additional metadata to be added to the extracted documents',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
},
|
||||
{
|
||||
label: 'Omit Metadata Keys',
|
||||
name: 'omitMetadataKeys',
|
||||
type: 'string',
|
||||
rows: 4,
|
||||
description:
|
||||
'Each document loader comes with a default set of metadata keys that are extracted from the document. You can use this field to omit some of the default metadata keys. The value should be a list of keys, seperated by comma',
|
||||
placeholder: 'key1, key2, key3.nestedKey1',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
}
|
||||
@@ -429,6 +442,12 @@ class UnstructuredFile_DocumentLoaders implements INode {
|
||||
const combineUnderNChars = nodeData.inputs?.combineUnderNChars as number
|
||||
const newAfterNChars = nodeData.inputs?.newAfterNChars as number
|
||||
const maxCharacters = nodeData.inputs?.maxCharacters as number
|
||||
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
|
||||
|
||||
let omitMetadataKeys: string[] = []
|
||||
if (_omitMetadataKeys) {
|
||||
omitMetadataKeys = _omitMetadataKeys.split(',').map((key) => key.trim())
|
||||
}
|
||||
const fileBase64 = nodeData.inputs?.fileObject as string
|
||||
|
||||
const obj: UnstructuredLoaderOptions = {
|
||||
@@ -452,7 +471,7 @@ class UnstructuredFile_DocumentLoaders implements INode {
|
||||
const unstructuredAPIKey = getCredentialParam('unstructuredAPIKey', credentialData, nodeData)
|
||||
if (unstructuredAPIKey) obj.apiKey = unstructuredAPIKey
|
||||
|
||||
let docs: any[] = []
|
||||
let docs: IDocument[] = []
|
||||
let files: string[] = []
|
||||
|
||||
if (fileBase64) {
|
||||
@@ -499,19 +518,25 @@ class UnstructuredFile_DocumentLoaders implements INode {
|
||||
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: {
|
||||
...doc.metadata,
|
||||
...parsedMetadata,
|
||||
[sourceIdKey]: doc.metadata[sourceIdKey] || sourceIdKey
|
||||
}
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata,
|
||||
...parsedMetadata,
|
||||
[sourceIdKey]: doc.metadata[sourceIdKey] || sourceIdKey
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
} else {
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: {
|
||||
...doc.metadata,
|
||||
[sourceIdKey]: doc.metadata[sourceIdKey] || sourceIdKey
|
||||
}
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata,
|
||||
[sourceIdKey]: doc.metadata[sourceIdKey] || sourceIdKey
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
}
|
||||
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import { omit } from 'lodash'
|
||||
import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import {
|
||||
UnstructuredDirectoryLoader,
|
||||
@@ -379,9 +380,21 @@ class UnstructuredFolder_DocumentLoaders implements INode {
|
||||
default: '500'
|
||||
},
|
||||
{
|
||||
label: 'Metadata',
|
||||
label: 'Additional Metadata',
|
||||
name: 'metadata',
|
||||
type: 'json',
|
||||
description: 'Additional metadata to be added to the extracted documents',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
},
|
||||
{
|
||||
label: 'Omit Metadata Keys',
|
||||
name: 'omitMetadataKeys',
|
||||
type: 'string',
|
||||
rows: 4,
|
||||
description:
|
||||
'Each document loader comes with a default set of metadata keys that are extracted from the document. You can use this field to omit some of the default metadata keys. The value should be a list of keys, seperated by comma',
|
||||
placeholder: 'key1, key2, key3.nestedKey1',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
}
|
||||
@@ -408,6 +421,12 @@ class UnstructuredFolder_DocumentLoaders implements INode {
|
||||
const combineUnderNChars = nodeData.inputs?.combineUnderNChars as number
|
||||
const newAfterNChars = nodeData.inputs?.newAfterNChars as number
|
||||
const maxCharacters = nodeData.inputs?.maxCharacters as number
|
||||
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
|
||||
|
||||
let omitMetadataKeys: string[] = []
|
||||
if (_omitMetadataKeys) {
|
||||
omitMetadataKeys = _omitMetadataKeys.split(',').map((key) => key.trim())
|
||||
}
|
||||
|
||||
const obj: UnstructuredLoaderOptions = {
|
||||
apiUrl: unstructuredAPIUrl,
|
||||
@@ -437,19 +456,25 @@ class UnstructuredFolder_DocumentLoaders implements INode {
|
||||
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: {
|
||||
...doc.metadata,
|
||||
...parsedMetadata,
|
||||
[sourceIdKey]: doc.metadata[sourceIdKey] || sourceIdKey
|
||||
}
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata,
|
||||
...parsedMetadata,
|
||||
[sourceIdKey]: doc.metadata[sourceIdKey] || sourceIdKey
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
} else {
|
||||
docs = docs.map((doc) => ({
|
||||
...doc,
|
||||
metadata: {
|
||||
...doc.metadata,
|
||||
[sourceIdKey]: doc.metadata[sourceIdKey] || sourceIdKey
|
||||
}
|
||||
metadata: omit(
|
||||
{
|
||||
...doc.metadata,
|
||||
[sourceIdKey]: doc.metadata[sourceIdKey] || sourceIdKey
|
||||
},
|
||||
omitMetadataKeys
|
||||
)
|
||||
}))
|
||||
}
|
||||
|
||||
|
||||
@@ -27,6 +27,7 @@ class CharacterTextSplitter_TextSplitters implements INode {
|
||||
label: 'Chunk Size',
|
||||
name: 'chunkSize',
|
||||
type: 'number',
|
||||
description: 'Number of characters in each chunk. Default is 1000.',
|
||||
default: 1000,
|
||||
optional: true
|
||||
},
|
||||
@@ -34,6 +35,8 @@ class CharacterTextSplitter_TextSplitters implements INode {
|
||||
label: 'Chunk Overlap',
|
||||
name: 'chunkOverlap',
|
||||
type: 'number',
|
||||
description: 'Number of characters to overlap between chunks. Default is 200.',
|
||||
default: 200,
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
|
||||
@@ -101,6 +101,7 @@ class CodeTextSplitter_TextSplitters implements INode {
|
||||
label: 'Chunk Size',
|
||||
name: 'chunkSize',
|
||||
type: 'number',
|
||||
description: 'Number of characters in each chunk. Default is 1000.',
|
||||
default: 1000,
|
||||
optional: true
|
||||
},
|
||||
@@ -108,6 +109,8 @@ class CodeTextSplitter_TextSplitters implements INode {
|
||||
label: 'Chunk Overlap',
|
||||
name: 'chunkOverlap',
|
||||
type: 'number',
|
||||
description: 'Number of characters to overlap between chunks. Default is 200.',
|
||||
default: 200,
|
||||
optional: true
|
||||
}
|
||||
]
|
||||
|
||||
+3
@@ -28,6 +28,7 @@ class HtmlToMarkdownTextSplitter_TextSplitters implements INode {
|
||||
label: 'Chunk Size',
|
||||
name: 'chunkSize',
|
||||
type: 'number',
|
||||
description: 'Number of characters in each chunk. Default is 1000.',
|
||||
default: 1000,
|
||||
optional: true
|
||||
},
|
||||
@@ -35,6 +36,8 @@ class HtmlToMarkdownTextSplitter_TextSplitters implements INode {
|
||||
label: 'Chunk Overlap',
|
||||
name: 'chunkOverlap',
|
||||
type: 'number',
|
||||
description: 'Number of characters to overlap between chunks. Default is 200.',
|
||||
default: 200,
|
||||
optional: true
|
||||
}
|
||||
]
|
||||
|
||||
@@ -27,6 +27,7 @@ class MarkdownTextSplitter_TextSplitters implements INode {
|
||||
label: 'Chunk Size',
|
||||
name: 'chunkSize',
|
||||
type: 'number',
|
||||
description: 'Number of characters in each chunk. Default is 1000.',
|
||||
default: 1000,
|
||||
optional: true
|
||||
},
|
||||
@@ -34,6 +35,8 @@ class MarkdownTextSplitter_TextSplitters implements INode {
|
||||
label: 'Chunk Overlap',
|
||||
name: 'chunkOverlap',
|
||||
type: 'number',
|
||||
description: 'Number of characters to overlap between chunks. Default is 200.',
|
||||
default: 200,
|
||||
optional: true
|
||||
}
|
||||
]
|
||||
|
||||
+3
@@ -27,6 +27,7 @@ class RecursiveCharacterTextSplitter_TextSplitters implements INode {
|
||||
label: 'Chunk Size',
|
||||
name: 'chunkSize',
|
||||
type: 'number',
|
||||
description: 'Number of characters in each chunk. Default is 1000.',
|
||||
default: 1000,
|
||||
optional: true
|
||||
},
|
||||
@@ -34,6 +35,8 @@ class RecursiveCharacterTextSplitter_TextSplitters implements INode {
|
||||
label: 'Chunk Overlap',
|
||||
name: 'chunkOverlap',
|
||||
type: 'number',
|
||||
description: 'Number of characters to overlap between chunks. Default is 200.',
|
||||
default: 200,
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
|
||||
@@ -56,6 +56,7 @@ class TokenTextSplitter_TextSplitters implements INode {
|
||||
label: 'Chunk Size',
|
||||
name: 'chunkSize',
|
||||
type: 'number',
|
||||
description: 'Number of characters in each chunk. Default is 1000.',
|
||||
default: 1000,
|
||||
optional: true
|
||||
},
|
||||
@@ -63,6 +64,8 @@ class TokenTextSplitter_TextSplitters implements INode {
|
||||
label: 'Chunk Overlap',
|
||||
name: 'chunkOverlap',
|
||||
type: 'number',
|
||||
description: 'Number of characters to overlap between chunks. Default is 200.',
|
||||
default: 200,
|
||||
optional: true
|
||||
}
|
||||
]
|
||||
|
||||
@@ -176,6 +176,11 @@ export type MessageContentImageUrl = {
|
||||
}
|
||||
}
|
||||
|
||||
export interface IDocument<Metadata extends Record<string, any> = Record<string, any>> {
|
||||
pageContent: string
|
||||
metadata: Metadata
|
||||
}
|
||||
|
||||
/**
|
||||
* Classes
|
||||
*/
|
||||
|
||||
@@ -135,6 +135,21 @@ export const removeFilesFromStorage = async (...paths: string[]) => {
|
||||
}
|
||||
}
|
||||
|
||||
export const removeSpecificFileFromStorage = async (...paths: string[]) => {
|
||||
const storageType = getStorageType()
|
||||
if (storageType === 's3') {
|
||||
let Key = paths.reduce((acc, cur) => acc + '/' + cur, '')
|
||||
// remove the first '/' if it exists
|
||||
if (Key.startsWith('/')) {
|
||||
Key = Key.substring(1)
|
||||
}
|
||||
await _deleteS3Folder(Key)
|
||||
} else {
|
||||
const file = path.join(getStoragePath(), ...paths)
|
||||
fs.unlinkSync(file)
|
||||
}
|
||||
}
|
||||
|
||||
export const removeFolderFromStorage = async (...paths: string[]) => {
|
||||
const storageType = getStorageType()
|
||||
if (storageType === 's3') {
|
||||
|
||||
Reference in New Issue
Block a user