Chore/LC v0.3 (#3517)

* bump langchain version to 0.3, upgrades on all chat models

* update all docs loader to have documents and text output options

* fix pnpm lock file
This commit is contained in:
Henry Heng
2024-11-28 11:06:12 +00:00
committed by GitHub
parent 126808b62a
commit 940c8fd3b0
82 changed files with 2373 additions and 1540 deletions
@@ -3,7 +3,8 @@ import { omit } from 'lodash'
import { Document } from '@langchain/core/documents'
import { TextSplitter } from 'langchain/text_splitter'
import { BaseDocumentLoader } from 'langchain/document_loaders/base'
import { ICommonObject, IDocument, INode, INodeData, INodeParams } from '../../../src/Interface'
import { ICommonObject, IDocument, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
import { handleEscapeCharacters } from '../../../src/utils'
class API_DocumentLoaders implements INode {
label: string
@@ -15,11 +16,12 @@ class API_DocumentLoaders implements INode {
category: string
baseClasses: string[]
inputs?: INodeParams[]
outputs: INodeOutputsValue[]
constructor() {
this.label = 'API Loader'
this.name = 'apiLoader'
this.version = 1.0
this.version = 2.0
this.type = 'Document'
this.icon = 'api.svg'
this.category = 'Document Loaders'
@@ -88,6 +90,20 @@ class API_DocumentLoaders implements INode {
additionalParams: true
}
]
this.outputs = [
{
label: 'Document',
name: 'document',
description: 'Array of document objects containing metadata and pageContent',
baseClasses: [...this.baseClasses, 'json']
},
{
label: 'Text',
name: 'text',
description: 'Concatenated string from pageContent of documents',
baseClasses: ['string', 'json']
}
]
}
async init(nodeData: INodeData): Promise<any> {
const headers = nodeData.inputs?.headers as string
@@ -97,6 +113,7 @@ class API_DocumentLoaders implements INode {
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const metadata = nodeData.inputs?.metadata
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
const output = nodeData.outputs?.output as string
let omitMetadataKeys: string[] = []
if (_omitMetadataKeys) {
@@ -161,7 +178,15 @@ class API_DocumentLoaders implements INode {
}))
}
return docs
if (output === 'document') {
return docs
} else {
let finaltext = ''
for (const doc of docs) {
finaltext += `${doc.pageContent}\n`
}
return handleEscapeCharacters(finaltext, false)
}
}
}
@@ -1,6 +1,6 @@
import { omit } from 'lodash'
import { INode, INodeData, INodeParams, ICommonObject } from '../../../src/Interface'
import { getCredentialData, getCredentialParam } from '../../../src/utils'
import { INode, INodeData, INodeParams, ICommonObject, INodeOutputsValue } from '../../../src/Interface'
import { getCredentialData, getCredentialParam, handleEscapeCharacters } from '../../../src/utils'
import { TextSplitter } from 'langchain/text_splitter'
import { ApifyDatasetLoader } from '@langchain/community/document_loaders/web/apify_dataset'
import { Document } from '@langchain/core/documents'
@@ -16,16 +16,23 @@ class ApifyWebsiteContentCrawler_DocumentLoaders implements INode {
baseClasses: string[]
inputs: INodeParams[]
credential: INodeParams
outputs: INodeOutputsValue[]
constructor() {
this.label = 'Apify Website Content Crawler'
this.name = 'apifyWebsiteContentCrawler'
this.type = 'Document'
this.icon = 'apify-symbol-transparent.svg'
this.version = 2.0
this.version = 3.0
this.category = 'Document Loaders'
this.description = 'Load data from Apify Website Content Crawler'
this.baseClasses = [this.type]
this.credential = {
label: 'Connect Apify API',
name: 'credential',
type: 'credential',
credentialNames: ['apifyApi']
}
this.inputs = [
{
label: 'Text Splitter',
@@ -112,18 +119,27 @@ class ApifyWebsiteContentCrawler_DocumentLoaders implements INode {
additionalParams: true
}
]
this.credential = {
label: 'Connect Apify API',
name: 'credential',
type: 'credential',
credentialNames: ['apifyApi']
}
this.outputs = [
{
label: 'Document',
name: 'document',
description: 'Array of document objects containing metadata and pageContent',
baseClasses: [...this.baseClasses, 'json']
},
{
label: 'Text',
name: 'text',
description: 'Concatenated string from pageContent of documents',
baseClasses: ['string', 'json']
}
]
}
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const metadata = nodeData.inputs?.metadata
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
const output = nodeData.outputs?.output as string
let omitMetadataKeys: string[] = []
if (_omitMetadataKeys) {
@@ -203,7 +219,15 @@ class ApifyWebsiteContentCrawler_DocumentLoaders implements INode {
}))
}
return docs
if (output === 'document') {
return docs
} else {
let finaltext = ''
for (const doc of docs) {
finaltext += `${doc.pageContent}\n`
}
return handleEscapeCharacters(finaltext, false)
}
}
}
@@ -1,8 +1,8 @@
import { omit } from 'lodash'
import { ICommonObject, IDocument, INode, INodeData, INodeParams } from '../../../src/Interface'
import { ICommonObject, IDocument, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
import { TextSplitter } from 'langchain/text_splitter'
import { BraveSearch } from '@langchain/community/tools/brave_search'
import { getCredentialData, getCredentialParam } from '../../../src/utils'
import { getCredentialData, getCredentialParam, handleEscapeCharacters } from '../../../src/utils'
import { Document } from '@langchain/core/documents'
class BraveSearchAPI_DocumentLoaders implements INode {
@@ -16,11 +16,12 @@ class BraveSearchAPI_DocumentLoaders implements INode {
baseClasses: string[]
credential: INodeParams
inputs: INodeParams[]
outputs: INodeOutputsValue[]
constructor() {
this.label = 'BraveSearch API Document Loader'
this.name = 'braveSearchApiLoader'
this.version = 1.0
this.version = 2.0
this.type = 'Document'
this.icon = 'brave.svg'
this.category = 'Document Loaders'
@@ -65,6 +66,20 @@ class BraveSearchAPI_DocumentLoaders implements INode {
additionalParams: true
}
]
this.outputs = [
{
label: 'Document',
name: 'document',
description: 'Array of document objects containing metadata and pageContent',
baseClasses: [...this.baseClasses, 'json']
},
{
label: 'Text',
name: 'text',
description: 'Concatenated string from pageContent of documents',
baseClasses: ['string', 'json']
}
]
}
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
@@ -72,6 +87,7 @@ class BraveSearchAPI_DocumentLoaders implements INode {
const query = nodeData.inputs?.query as string
const metadata = nodeData.inputs?.metadata
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
const output = nodeData.outputs?.output as string
let omitMetadataKeys: string[] = []
if (_omitMetadataKeys) {
@@ -134,7 +150,15 @@ class BraveSearchAPI_DocumentLoaders implements INode {
}))
}
return docs
if (output === 'document') {
return docs
} else {
let finaltext = ''
for (const doc of docs) {
finaltext += `${doc.pageContent}\n`
}
return handleEscapeCharacters(finaltext, false)
}
}
}
@@ -3,9 +3,9 @@ import { omit } from 'lodash'
import { CheerioWebBaseLoader, WebBaseLoaderParams } from '@langchain/community/document_loaders/web/cheerio'
import { test } from 'linkifyjs'
import { parse } from 'css-what'
import { webCrawl, xmlScrape } from '../../../src'
import { SelectorType } from 'cheerio'
import { ICommonObject, IDocument, INode, INodeData, INodeParams } from '../../../src/Interface'
import { ICommonObject, INodeOutputsValue, IDocument, INode, INodeData, INodeParams } from '../../../src/Interface'
import { handleEscapeCharacters, webCrawl, xmlScrape } from '../../../src/utils'
class Cheerio_DocumentLoaders implements INode {
label: string
@@ -17,11 +17,12 @@ class Cheerio_DocumentLoaders implements INode {
category: string
baseClasses: string[]
inputs: INodeParams[]
outputs: INodeOutputsValue[]
constructor() {
this.label = 'Cheerio Web Scraper'
this.name = 'cheerioWebScraper'
this.version = 1.1
this.version = 2.0
this.type = 'Document'
this.icon = 'cheerio.svg'
this.category = 'Document Loaders'
@@ -99,6 +100,20 @@ class Cheerio_DocumentLoaders implements INode {
additionalParams: true
}
]
this.outputs = [
{
label: 'Document',
name: 'document',
description: 'Array of document objects containing metadata and pageContent',
baseClasses: [...this.baseClasses, 'json']
},
{
label: 'Text',
name: 'text',
description: 'Concatenated string from pageContent of documents',
baseClasses: ['string', 'json']
}
]
}
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
@@ -107,6 +122,7 @@ class Cheerio_DocumentLoaders implements INode {
const relativeLinksMethod = nodeData.inputs?.relativeLinksMethod as string
const selectedLinks = nodeData.inputs?.selectedLinks as string[]
let limit = parseInt(nodeData.inputs?.limit as string)
const output = nodeData.outputs?.output as string
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
@@ -212,7 +228,15 @@ class Cheerio_DocumentLoaders implements INode {
}))
}
return docs
if (output === 'document') {
return docs
} else {
let finaltext = ''
for (const doc of docs) {
finaltext += `${doc.pageContent}\n`
}
return handleEscapeCharacters(finaltext, false)
}
}
}
@@ -1,8 +1,8 @@
import { omit } from 'lodash'
import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
import { TextSplitter } from 'langchain/text_splitter'
import { ConfluencePagesLoader, ConfluencePagesLoaderParams } from '@langchain/community/document_loaders/web/confluence'
import { getCredentialData, getCredentialParam } from '../../../src'
import { getCredentialData, getCredentialParam, handleEscapeCharacters } from '../../../src/utils'
import { ICommonObject, INode, INodeData, INodeParams, INodeOutputsValue } from '../../../src/Interface'
class Confluence_DocumentLoaders implements INode {
label: string
@@ -15,11 +15,12 @@ class Confluence_DocumentLoaders implements INode {
baseClasses: string[]
credential: INodeParams
inputs: INodeParams[]
outputs: INodeOutputsValue[]
constructor() {
this.label = 'Confluence'
this.name = 'confluence'
this.version = 1.0
this.version = 2.0
this.type = 'Document'
this.icon = 'confluence.svg'
this.category = 'Document Loaders'
@@ -79,6 +80,20 @@ class Confluence_DocumentLoaders implements INode {
additionalParams: true
}
]
this.outputs = [
{
label: 'Document',
name: 'document',
description: 'Array of document objects containing metadata and pageContent',
baseClasses: [...this.baseClasses, 'json']
},
{
label: 'Text',
name: 'text',
description: 'Concatenated string from pageContent of documents',
baseClasses: ['string', 'json']
}
]
}
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
@@ -88,6 +103,7 @@ class Confluence_DocumentLoaders implements INode {
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const metadata = nodeData.inputs?.metadata
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
const output = nodeData.outputs?.output as string
let omitMetadataKeys: string[] = []
if (_omitMetadataKeys) {
@@ -157,7 +173,15 @@ class Confluence_DocumentLoaders implements INode {
}))
}
return docs
if (output === 'document') {
return docs
} else {
let finaltext = ''
for (const doc of docs) {
finaltext += `${doc.pageContent}\n`
}
return handleEscapeCharacters(finaltext, false)
}
}
}
@@ -1,8 +1,8 @@
import { omit } from 'lodash'
import { ICommonObject, IDocument, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
import { TextSplitter } from 'langchain/text_splitter'
import { CSVLoader } from '@langchain/community/document_loaders/fs/csv'
import { getFileFromStorage, handleEscapeCharacters } from '../../../src'
import { ICommonObject, IDocument, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
class Csv_DocumentLoaders implements INode {
label: string
@@ -11,7 +11,6 @@ class CustomDocumentLoader_DocumentLoaders implements INode {
type: string
icon: string
category: string
badge: string
baseClasses: string[]
inputs: INodeParams[]
outputs: INodeOutputsValue[]
@@ -14,7 +14,6 @@ class DocStore_DocumentLoaders implements INode {
baseClasses: string[]
inputs: INodeParams[]
outputs: INodeOutputsValue[]
badge: string
constructor() {
this.label = 'Document Store'
@@ -1,8 +1,8 @@
import { omit } from 'lodash'
import { ICommonObject, IDocument, INode, INodeData, INodeParams } from '../../../src/Interface'
import { ICommonObject, IDocument, INode, INodeData, INodeParams, INodeOutputsValue } from '../../../src/Interface'
import { TextSplitter } from 'langchain/text_splitter'
import { DocxLoader } from '@langchain/community/document_loaders/fs/docx'
import { getFileFromStorage } from '../../../src'
import { getFileFromStorage, handleEscapeCharacters } from '../../../src'
class Docx_DocumentLoaders implements INode {
label: string
@@ -14,11 +14,12 @@ class Docx_DocumentLoaders implements INode {
category: string
baseClasses: string[]
inputs: INodeParams[]
outputs: INodeOutputsValue[]
constructor() {
this.label = 'Docx File'
this.name = 'docxFile'
this.version = 1.0
this.version = 2.0
this.type = 'Document'
this.icon = 'docx.svg'
this.category = 'Document Loaders'
@@ -57,6 +58,20 @@ class Docx_DocumentLoaders implements INode {
additionalParams: true
}
]
this.outputs = [
{
label: 'Document',
name: 'document',
description: 'Array of document objects containing metadata and pageContent',
baseClasses: [...this.baseClasses, 'json']
},
{
label: 'Text',
name: 'text',
description: 'Concatenated string from pageContent of documents',
baseClasses: ['string', 'json']
}
]
}
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
@@ -64,6 +79,7 @@ class Docx_DocumentLoaders implements INode {
const docxFileBase64 = nodeData.inputs?.docxFile as string
const metadata = nodeData.inputs?.metadata
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
const output = nodeData.outputs?.output as string
let omitMetadataKeys: string[] = []
if (_omitMetadataKeys) {
@@ -153,7 +169,15 @@ class Docx_DocumentLoaders implements INode {
}))
}
return docs
if (output === 'document') {
return docs
} else {
let finaltext = ''
for (const doc of docs) {
finaltext += `${doc.pageContent}\n`
}
return handleEscapeCharacters(finaltext, false)
}
}
}
@@ -1,6 +1,6 @@
import { omit } from 'lodash'
import { getCredentialData, getCredentialParam } from '../../../src'
import { ICommonObject, IDocument, INode, INodeData, INodeParams } from '../../../src/Interface'
import { getCredentialData, getCredentialParam, handleEscapeCharacters } from '../../../src'
import { ICommonObject, IDocument, INode, INodeData, INodeParams, INodeOutputsValue } from '../../../src/Interface'
import { FigmaFileLoader, FigmaLoaderParams } from '@langchain/community/document_loaders/web/figma'
import { TextSplitter } from 'langchain/text_splitter'
@@ -15,11 +15,12 @@ class Figma_DocumentLoaders implements INode {
baseClasses: string[]
credential: INodeParams
inputs: INodeParams[]
outputs: INodeOutputsValue[]
constructor() {
this.label = 'Figma'
this.name = 'figma'
this.version = 1.0
this.version = 2.0
this.type = 'Document'
this.icon = 'figma.svg'
this.category = 'Document Loaders'
@@ -80,6 +81,20 @@ class Figma_DocumentLoaders implements INode {
additionalParams: true
}
]
this.outputs = [
{
label: 'Document',
name: 'document',
description: 'Array of document objects containing metadata and pageContent',
baseClasses: [...this.baseClasses, 'json']
},
{
label: 'Text',
name: 'text',
description: 'Concatenated string from pageContent of documents',
baseClasses: ['string', 'json']
}
]
}
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
@@ -88,6 +103,7 @@ class Figma_DocumentLoaders implements INode {
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const metadata = nodeData.inputs?.metadata
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
const output = nodeData.outputs?.output as string
let omitMetadataKeys: string[] = []
if (_omitMetadataKeys) {
@@ -146,7 +162,15 @@ class Figma_DocumentLoaders implements INode {
}))
}
return docs
if (output === 'document') {
return docs
} else {
let finaltext = ''
for (const doc of docs) {
finaltext += `${doc.pageContent}\n`
}
return handleEscapeCharacters(finaltext, false)
}
}
}
@@ -1,5 +1,5 @@
import { omit } from 'lodash'
import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
import { TextSplitter } from 'langchain/text_splitter'
import { TextLoader } from 'langchain/document_loaders/fs/text'
import { JSONLinesLoader, JSONLoader } from 'langchain/document_loaders/fs/json'
@@ -9,7 +9,7 @@ import { DocxLoader } from '@langchain/community/document_loaders/fs/docx'
import { BaseDocumentLoader } from 'langchain/document_loaders/base'
import { Document } from '@langchain/core/documents'
import { getFileFromStorage } from '../../../src/storageUtils'
import { mapMimeTypeToExt } from '../../../src/utils'
import { handleEscapeCharacters, mapMimeTypeToExt } from '../../../src/utils'
class File_DocumentLoaders implements INode {
label: string
@@ -21,11 +21,12 @@ class File_DocumentLoaders implements INode {
category: string
baseClasses: string[]
inputs: INodeParams[]
outputs: INodeOutputsValue[]
constructor() {
this.label = 'File Loader'
this.name = 'fileLoader'
this.version = 1.0
this.version = 2.0
this.type = 'Document'
this.icon = 'file.svg'
this.category = 'Document Loaders'
@@ -92,6 +93,20 @@ class File_DocumentLoaders implements INode {
additionalParams: true
}
]
this.outputs = [
{
label: 'Document',
name: 'document',
description: 'Array of document objects containing metadata and pageContent',
baseClasses: [...this.baseClasses, 'json']
},
{
label: 'Text',
name: 'text',
description: 'Concatenated string from pageContent of documents',
baseClasses: ['string', 'json']
}
]
}
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
@@ -101,6 +116,7 @@ class File_DocumentLoaders implements INode {
const pdfUsage = nodeData.inputs?.pdfUsage
const pointerName = nodeData.inputs?.pointerName as string
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
const output = nodeData.outputs?.output as string
let omitMetadataKeys: string[] = []
if (_omitMetadataKeys) {
@@ -229,7 +245,15 @@ class File_DocumentLoaders implements INode {
}))
}
return docs
if (output === 'document') {
return docs
} else {
let finaltext = ''
for (const doc of docs) {
finaltext += `${doc.pageContent}\n`
}
return handleEscapeCharacters(finaltext, false)
}
}
}
@@ -1,8 +1,8 @@
import { TextSplitter } from 'langchain/text_splitter'
import { Document, DocumentInterface } from '@langchain/core/documents'
import { BaseDocumentLoader } from 'langchain/document_loaders/base'
import { INode, INodeData, INodeParams, ICommonObject } from '../../../src/Interface'
import { getCredentialData, getCredentialParam } from '../../../src/utils'
import { INode, INodeData, INodeParams, ICommonObject, INodeOutputsValue } from '../../../src/Interface'
import { getCredentialData, getCredentialParam, handleEscapeCharacters } from '../../../src/utils'
import axios, { AxiosResponse, AxiosRequestHeaders } from 'axios'
import { z } from 'zod'
import { zodToJsonSchema } from 'zod-to-json-schema'
@@ -259,16 +259,23 @@ class FireCrawl_DocumentLoaders implements INode {
baseClasses: string[]
inputs: INodeParams[]
credential: INodeParams
outputs: INodeOutputsValue[]
constructor() {
this.label = 'FireCrawl'
this.name = 'fireCrawl'
this.type = 'Document'
this.icon = 'firecrawl.png'
this.version = 1.0
this.version = 2.0
this.category = 'Document Loaders'
this.description = 'Load data from URL using FireCrawl'
this.baseClasses = [this.type]
this.credential = {
label: 'FireCrawl API',
name: 'credential',
type: 'credential',
credentialNames: ['fireCrawlApi']
}
this.inputs = [
{
label: 'Text Splitter',
@@ -303,12 +310,20 @@ class FireCrawl_DocumentLoaders implements INode {
}
// ... (other input parameters)
]
this.credential = {
label: 'FireCrawl API',
name: 'credential',
type: 'credential',
credentialNames: ['fireCrawlApi']
}
this.outputs = [
{
label: 'Document',
name: 'document',
description: 'Array of document objects containing metadata and pageContent',
baseClasses: [...this.baseClasses, 'json']
},
{
label: 'Text',
name: 'text',
description: 'Concatenated string from pageContent of documents',
baseClasses: ['string', 'json']
}
]
}
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
@@ -323,6 +338,7 @@ class FireCrawl_DocumentLoaders implements INode {
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
const firecrawlApiToken = getCredentialParam('firecrawlApiToken', credentialData, nodeData)
const firecrawlApiUrl = getCredentialParam('firecrawlApiUrl', credentialData, nodeData, 'https://api.firecrawl.dev')
const output = nodeData.outputs?.output as string
const urlPatternsExcludes = nodeData.inputs?.urlPatternsExcludes
? (nodeData.inputs.urlPatternsExcludes.split(',') as string[])
@@ -375,7 +391,15 @@ class FireCrawl_DocumentLoaders implements INode {
return finaldocs
}
return docs
if (output === 'document') {
return docs
} else {
let finaltext = ''
for (const doc of docs) {
finaltext += `${doc.pageContent}\n`
}
return handleEscapeCharacters(finaltext, false)
}
}
}
@@ -1,5 +1,5 @@
import { omit } from 'lodash'
import { INode, INodeData, INodeParams } from '../../../src/Interface'
import { INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
import { TextSplitter } from 'langchain/text_splitter'
import { TextLoader } from 'langchain/document_loaders/fs/text'
import { DirectoryLoader } from 'langchain/document_loaders/fs/directory'
@@ -7,6 +7,7 @@ import { JSONLinesLoader, JSONLoader } from 'langchain/document_loaders/fs/json'
import { CSVLoader } from '@langchain/community/document_loaders/fs/csv'
import { PDFLoader } from '@langchain/community/document_loaders/fs/pdf'
import { DocxLoader } from '@langchain/community/document_loaders/fs/docx'
import { handleEscapeCharacters } from '../../../src/utils'
class Folder_DocumentLoaders implements INode {
label: string
@@ -18,11 +19,12 @@ class Folder_DocumentLoaders implements INode {
category: string
baseClasses: string[]
inputs: INodeParams[]
outputs: INodeOutputsValue[]
constructor() {
this.label = 'Folder with Files'
this.name = 'folderFiles'
this.version = 3.0
this.version = 4.0
this.type = 'Document'
this.icon = 'folder.svg'
this.category = 'Document Loaders'
@@ -95,6 +97,20 @@ class Folder_DocumentLoaders implements INode {
additionalParams: true
}
]
this.outputs = [
{
label: 'Document',
name: 'document',
description: 'Array of document objects containing metadata and pageContent',
baseClasses: [...this.baseClasses, 'json']
},
{
label: 'Text',
name: 'text',
description: 'Concatenated string from pageContent of documents',
baseClasses: ['string', 'json']
}
]
}
async init(nodeData: INodeData): Promise<any> {
@@ -105,6 +121,7 @@ class Folder_DocumentLoaders implements INode {
const pdfUsage = nodeData.inputs?.pdfUsage
const pointerName = nodeData.inputs?.pointerName as string
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
const output = nodeData.outputs?.output as string
let omitMetadataKeys: string[] = []
if (_omitMetadataKeys) {
@@ -206,7 +223,15 @@ class Folder_DocumentLoaders implements INode {
}))
}
return docs
if (output === 'document') {
return docs
} else {
let finaltext = ''
for (const doc of docs) {
finaltext += `${doc.pageContent}\n`
}
return handleEscapeCharacters(finaltext, false)
}
}
}
@@ -1,7 +1,8 @@
import { omit } from 'lodash'
import { IDocument, INode, INodeData, INodeParams } from '../../../src/Interface'
import { IDocument, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
import { TextSplitter } from 'langchain/text_splitter'
import { GitbookLoader } from '@langchain/community/document_loaders/web/gitbook'
import { handleEscapeCharacters } from '../../../src/utils'
class Gitbook_DocumentLoaders implements INode {
label: string
@@ -13,11 +14,12 @@ class Gitbook_DocumentLoaders implements INode {
category: string
baseClasses: string[]
inputs?: INodeParams[]
outputs: INodeOutputsValue[]
constructor() {
this.label = 'GitBook'
this.name = 'gitbook'
this.version = 1.0
this.version = 2.0
this.type = 'Document'
this.icon = 'gitbook.svg'
this.category = 'Document Loaders'
@@ -64,6 +66,20 @@ class Gitbook_DocumentLoaders implements INode {
additionalParams: true
}
]
this.outputs = [
{
label: 'Document',
name: 'document',
description: 'Array of document objects containing metadata and pageContent',
baseClasses: [...this.baseClasses, 'json']
},
{
label: 'Text',
name: 'text',
description: 'Concatenated string from pageContent of documents',
baseClasses: ['string', 'json']
}
]
}
async init(nodeData: INodeData): Promise<any> {
const webPath = nodeData.inputs?.webPath as string
@@ -71,6 +87,7 @@ class Gitbook_DocumentLoaders implements INode {
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const metadata = nodeData.inputs?.metadata
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
const output = nodeData.outputs?.output as string
let omitMetadataKeys: string[] = []
if (_omitMetadataKeys) {
@@ -120,7 +137,15 @@ class Gitbook_DocumentLoaders implements INode {
}))
}
return docs
if (output === 'document') {
return docs
} else {
let finaltext = ''
for (const doc of docs) {
finaltext += `${doc.pageContent}\n`
}
return handleEscapeCharacters(finaltext, false)
}
}
}
@@ -2,7 +2,7 @@ import { omit } from 'lodash'
import { ICommonObject, IDocument, INode, INodeData, INodeParams } from '../../../src/Interface'
import { TextSplitter } from 'langchain/text_splitter'
import { GithubRepoLoader, GithubRepoLoaderParams } from '@langchain/community/document_loaders/web/github'
import { getCredentialData, getCredentialParam } from '../../../src'
import { getCredentialData, getCredentialParam, handleEscapeCharacters, INodeOutputsValue } from '../../../src'
class Github_DocumentLoaders implements INode {
label: string
@@ -15,11 +15,12 @@ class Github_DocumentLoaders implements INode {
baseClasses: string[]
credential: INodeParams
inputs: INodeParams[]
outputs: INodeOutputsValue[]
constructor() {
this.label = 'Github'
this.name = 'github'
this.version = 2.0
this.version = 3.0
this.type = 'Document'
this.icon = 'github.svg'
this.category = 'Document Loaders'
@@ -106,6 +107,20 @@ class Github_DocumentLoaders implements INode {
additionalParams: true
}
]
this.outputs = [
{
label: 'Document',
name: 'document',
description: 'Array of document objects containing metadata and pageContent',
baseClasses: [...this.baseClasses, 'json']
},
{
label: 'Text',
name: 'text',
description: 'Concatenated string from pageContent of documents',
baseClasses: ['string', 'json']
}
]
}
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
@@ -118,6 +133,7 @@ class Github_DocumentLoaders implements INode {
const maxRetries = nodeData.inputs?.maxRetries as string
const ignorePath = nodeData.inputs?.ignorePath as string
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
const output = nodeData.outputs?.output as string
let omitMetadataKeys: string[] = []
if (_omitMetadataKeys) {
@@ -181,7 +197,15 @@ class Github_DocumentLoaders implements INode {
}))
}
return docs
if (output === 'document') {
return docs
} else {
let finaltext = ''
for (const doc of docs) {
finaltext += `${doc.pageContent}\n`
}
return handleEscapeCharacters(finaltext, false)
}
}
}
@@ -2,7 +2,7 @@ import { omit } from 'lodash'
import { ICommonObject, IDocument, INode, INodeData, INodeParams } from '../../../src/Interface'
import { TextSplitter } from 'langchain/text_splitter'
import { JSONLoader } from 'langchain/document_loaders/fs/json'
import { getFileFromStorage } from '../../../src'
import { getFileFromStorage, handleEscapeCharacters, INodeOutputsValue } from '../../../src'
class Json_DocumentLoaders implements INode {
label: string
@@ -14,11 +14,12 @@ class Json_DocumentLoaders implements INode {
category: string
baseClasses: string[]
inputs: INodeParams[]
outputs: INodeOutputsValue[]
constructor() {
this.label = 'Json File'
this.name = 'jsonFile'
this.version = 1.0
this.version = 2.0
this.type = 'Document'
this.icon = 'json.svg'
this.category = 'Document Loaders'
@@ -65,6 +66,20 @@ class Json_DocumentLoaders implements INode {
additionalParams: true
}
]
this.outputs = [
{
label: 'Document',
name: 'document',
description: 'Array of document objects containing metadata and pageContent',
baseClasses: [...this.baseClasses, 'json']
},
{
label: 'Text',
name: 'text',
description: 'Concatenated string from pageContent of documents',
baseClasses: ['string', 'json']
}
]
}
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
@@ -73,6 +88,7 @@ class Json_DocumentLoaders implements INode {
const pointersName = nodeData.inputs?.pointersName as string
const metadata = nodeData.inputs?.metadata
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
const output = nodeData.outputs?.output as string
let omitMetadataKeys: string[] = []
if (_omitMetadataKeys) {
@@ -169,7 +185,15 @@ class Json_DocumentLoaders implements INode {
}))
}
return docs
if (output === 'document') {
return docs
} else {
let finaltext = ''
for (const doc of docs) {
finaltext += `${doc.pageContent}\n`
}
return handleEscapeCharacters(finaltext, false)
}
}
}
@@ -2,7 +2,7 @@ import { omit } from 'lodash'
import { ICommonObject, IDocument, INode, INodeData, INodeParams } from '../../../src/Interface'
import { TextSplitter } from 'langchain/text_splitter'
import jsonpointer from 'jsonpointer'
import { getFileFromStorage } from '../../../src'
import { getFileFromStorage, handleEscapeCharacters, INodeOutputsValue } from '../../../src'
import { BaseDocumentLoader } from 'langchain/document_loaders/base'
import { Document } from '@langchain/core/documents'
import type { readFile as ReadFileT } from 'node:fs/promises'
@@ -37,11 +37,12 @@ class Jsonlines_DocumentLoaders implements INode {
category: string
baseClasses: string[]
inputs: INodeParams[]
outputs: INodeOutputsValue[]
constructor() {
this.label = 'Json Lines File'
this.name = 'jsonlinesFile'
this.version = 2.0
this.version = 3.0
this.type = 'Document'
this.icon = 'jsonlines.svg'
this.category = 'Document Loaders'
@@ -93,6 +94,20 @@ class Jsonlines_DocumentLoaders implements INode {
additionalParams: true
}
]
this.outputs = [
{
label: 'Document',
name: 'document',
description: 'Array of document objects containing metadata and pageContent',
baseClasses: [...this.baseClasses, 'json']
},
{
label: 'Text',
name: 'text',
description: 'Concatenated string from pageContent of documents',
baseClasses: ['string', 'json']
}
]
}
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
@@ -101,6 +116,7 @@ class Jsonlines_DocumentLoaders implements INode {
const pointerName = nodeData.inputs?.pointerName as string
const metadata = nodeData.inputs?.metadata
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
const output = nodeData.outputs?.output as string
let omitMetadataKeys: string[] = []
if (_omitMetadataKeys) {
@@ -193,7 +209,15 @@ class Jsonlines_DocumentLoaders implements INode {
}))
}
return docs
if (output === 'document') {
return docs
} else {
let finaltext = ''
for (const doc of docs) {
finaltext += `${doc.pageContent}\n`
}
return handleEscapeCharacters(finaltext, false)
}
}
}
@@ -2,7 +2,7 @@ import { omit } from 'lodash'
import { ICommonObject, IDocument, INode, INodeData, INodeParams } from '../../../src/Interface'
import { TextSplitter } from 'langchain/text_splitter'
import { NotionAPILoader, NotionAPILoaderOptions } from '@langchain/community/document_loaders/web/notionapi'
import { getCredentialData, getCredentialParam } from '../../../src'
import { getCredentialData, getCredentialParam, handleEscapeCharacters, INodeOutputsValue } from '../../../src'
class NotionDB_DocumentLoaders implements INode {
label: string
@@ -15,11 +15,12 @@ class NotionDB_DocumentLoaders implements INode {
baseClasses: string[]
credential: INodeParams
inputs: INodeParams[]
outputs: INodeOutputsValue[]
constructor() {
this.label = 'Notion Database'
this.name = 'notionDB'
this.version = 1.0
this.version = 2.0
this.type = 'Document'
this.icon = 'notion-db.svg'
this.category = 'Document Loaders'
@@ -64,6 +65,20 @@ class NotionDB_DocumentLoaders implements INode {
additionalParams: true
}
]
this.outputs = [
{
label: 'Document',
name: 'document',
description: 'Array of document objects containing metadata and pageContent',
baseClasses: [...this.baseClasses, 'json']
},
{
label: 'Text',
name: 'text',
description: 'Concatenated string from pageContent of documents',
baseClasses: ['string', 'json']
}
]
}
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
@@ -71,6 +86,7 @@ class NotionDB_DocumentLoaders implements INode {
const databaseId = nodeData.inputs?.databaseId as string
const metadata = nodeData.inputs?.metadata
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
const output = nodeData.outputs?.output as string
let omitMetadataKeys: string[] = []
if (_omitMetadataKeys) {
@@ -133,7 +149,15 @@ class NotionDB_DocumentLoaders implements INode {
}))
}
return docs
if (output === 'document') {
return docs
} else {
let finaltext = ''
for (const doc of docs) {
finaltext += `${doc.pageContent}\n`
}
return handleEscapeCharacters(finaltext, false)
}
}
}
@@ -1,7 +1,8 @@
import { omit } from 'lodash'
import { IDocument, INode, INodeData, INodeParams } from '../../../src/Interface'
import { IDocument, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
import { TextSplitter } from 'langchain/text_splitter'
import { NotionLoader } from '@langchain/community/document_loaders/fs/notion'
import { handleEscapeCharacters } from '../../../src/utils'
class NotionFolder_DocumentLoaders implements INode {
label: string
@@ -13,11 +14,12 @@ class NotionFolder_DocumentLoaders implements INode {
category: string
baseClasses: string[]
inputs: INodeParams[]
outputs: INodeOutputsValue[]
constructor() {
this.label = 'Notion Folder'
this.name = 'notionFolder'
this.version = 1.0
this.version = 2.0
this.type = 'Document'
this.icon = 'notion-folder.svg'
this.category = 'Document Loaders'
@@ -57,6 +59,20 @@ class NotionFolder_DocumentLoaders implements INode {
additionalParams: true
}
]
this.outputs = [
{
label: 'Document',
name: 'document',
description: 'Array of document objects containing metadata and pageContent',
baseClasses: [...this.baseClasses, 'json']
},
{
label: 'Text',
name: 'text',
description: 'Concatenated string from pageContent of documents',
baseClasses: ['string', 'json']
}
]
}
async init(nodeData: INodeData): Promise<any> {
@@ -64,6 +80,7 @@ class NotionFolder_DocumentLoaders implements INode {
const notionFolder = nodeData.inputs?.notionFolder as string
const metadata = nodeData.inputs?.metadata
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
const output = nodeData.outputs?.output as string
let omitMetadataKeys: string[] = []
if (_omitMetadataKeys) {
@@ -112,7 +129,15 @@ class NotionFolder_DocumentLoaders implements INode {
}))
}
return docs
if (output === 'document') {
return docs
} else {
let finaltext = ''
for (const doc of docs) {
finaltext += `${doc.pageContent}\n`
}
return handleEscapeCharacters(finaltext, false)
}
}
}
@@ -2,7 +2,7 @@ import { omit } from 'lodash'
import { ICommonObject, IDocument, INode, INodeData, INodeParams } from '../../../src/Interface'
import { TextSplitter } from 'langchain/text_splitter'
import { NotionAPILoader, NotionAPILoaderOptions } from '@langchain/community/document_loaders/web/notionapi'
import { getCredentialData, getCredentialParam } from '../../../src'
import { getCredentialData, getCredentialParam, handleEscapeCharacters, INodeOutputsValue } from '../../../src'
class NotionPage_DocumentLoaders implements INode {
label: string
@@ -15,11 +15,12 @@ class NotionPage_DocumentLoaders implements INode {
baseClasses: string[]
credential: INodeParams
inputs: INodeParams[]
outputs: INodeOutputsValue[]
constructor() {
this.label = 'Notion Page'
this.name = 'notionPage'
this.version = 1.0
this.version = 2.0
this.type = 'Document'
this.icon = 'notion-page.svg'
this.category = 'Document Loaders'
@@ -65,6 +66,20 @@ class NotionPage_DocumentLoaders implements INode {
additionalParams: true
}
]
this.outputs = [
{
label: 'Document',
name: 'document',
description: 'Array of document objects containing metadata and pageContent',
baseClasses: [...this.baseClasses, 'json']
},
{
label: 'Text',
name: 'text',
description: 'Concatenated string from pageContent of documents',
baseClasses: ['string', 'json']
}
]
}
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
@@ -72,6 +87,7 @@ class NotionPage_DocumentLoaders implements INode {
const pageId = nodeData.inputs?.pageId as string
const metadata = nodeData.inputs?.metadata
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
const output = nodeData.outputs?.output as string
let omitMetadataKeys: string[] = []
if (_omitMetadataKeys) {
@@ -130,7 +146,15 @@ class NotionPage_DocumentLoaders implements INode {
}))
}
return docs
if (output === 'document') {
return docs
} else {
let finaltext = ''
for (const doc of docs) {
finaltext += `${doc.pageContent}\n`
}
return handleEscapeCharacters(finaltext, false)
}
}
}
@@ -2,7 +2,7 @@ import { omit } from 'lodash'
import { IDocument, ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
import { TextSplitter } from 'langchain/text_splitter'
import { PDFLoader } from '@langchain/community/document_loaders/fs/pdf'
import { getFileFromStorage } from '../../../src'
import { getFileFromStorage, handleEscapeCharacters, INodeOutputsValue } from '../../../src'
class Pdf_DocumentLoaders implements INode {
label: string
@@ -14,11 +14,12 @@ class Pdf_DocumentLoaders implements INode {
category: string
baseClasses: string[]
inputs: INodeParams[]
outputs: INodeOutputsValue[]
constructor() {
this.label = 'Pdf File'
this.name = 'pdfFile'
this.version = 1.0
this.version = 2.0
this.type = 'Document'
this.icon = 'pdf.svg'
this.category = 'Document Loaders'
@@ -80,6 +81,20 @@ class Pdf_DocumentLoaders implements INode {
additionalParams: true
}
]
this.outputs = [
{
label: 'Document',
name: 'document',
description: 'Array of document objects containing metadata and pageContent',
baseClasses: [...this.baseClasses, 'json']
},
{
label: 'Text',
name: 'text',
description: 'Concatenated string from pageContent of documents',
baseClasses: ['string', 'json']
}
]
}
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
@@ -89,6 +104,7 @@ class Pdf_DocumentLoaders implements INode {
const metadata = nodeData.inputs?.metadata
const legacyBuild = nodeData.inputs?.legacyBuild as boolean
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
const output = nodeData.outputs?.output as string
let omitMetadataKeys: string[] = []
if (_omitMetadataKeys) {
@@ -162,7 +178,15 @@ class Pdf_DocumentLoaders implements INode {
}))
}
return docs
if (output === 'document') {
return docs
} else {
let finaltext = ''
for (const doc of docs) {
finaltext += `${doc.pageContent}\n`
}
return handleEscapeCharacters(finaltext, false)
}
}
private async extractDocs(usage: string, bf: Buffer, legacyBuild: boolean, textSplitter: TextSplitter, docs: IDocument[]) {
@@ -8,7 +8,7 @@ import {
PlaywrightWebBaseLoaderOptions
} from '@langchain/community/document_loaders/web/playwright'
import { test } from 'linkifyjs'
import { webCrawl, xmlScrape } from '../../../src'
import { handleEscapeCharacters, INodeOutputsValue, webCrawl, xmlScrape } from '../../../src'
class Playwright_DocumentLoaders implements INode {
label: string
@@ -20,11 +20,12 @@ class Playwright_DocumentLoaders implements INode {
category: string
baseClasses: string[]
inputs: INodeParams[]
outputs: INodeOutputsValue[]
constructor() {
this.label = 'Playwright Web Scraper'
this.name = 'playwrightWebScraper'
this.version = 1.0
this.version = 2.0
this.type = 'Document'
this.icon = 'playwright.svg'
this.category = 'Document Loaders'
@@ -132,6 +133,20 @@ class Playwright_DocumentLoaders implements INode {
additionalParams: true
}
]
this.outputs = [
{
label: 'Document',
name: 'document',
description: 'Array of document objects containing metadata and pageContent',
baseClasses: [...this.baseClasses, 'json']
},
{
label: 'Text',
name: 'text',
description: 'Concatenated string from pageContent of documents',
baseClasses: ['string', 'json']
}
]
}
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
@@ -143,6 +158,7 @@ class Playwright_DocumentLoaders implements INode {
let waitUntilGoToOption = nodeData.inputs?.waitUntilGoToOption as 'load' | 'domcontentloaded' | 'networkidle' | 'commit' | undefined
let waitForSelector = nodeData.inputs?.waitForSelector as string
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
const output = nodeData.outputs?.output as string
let omitMetadataKeys: string[] = []
if (_omitMetadataKeys) {
@@ -251,7 +267,15 @@ class Playwright_DocumentLoaders implements INode {
}))
}
return docs
if (output === 'document') {
return docs
} else {
let finaltext = ''
for (const doc of docs) {
finaltext += `${doc.pageContent}\n`
}
return handleEscapeCharacters(finaltext, false)
}
}
}
@@ -3,7 +3,7 @@ import { ICommonObject, IDocument, INode, INodeData, INodeParams } from '../../.
import { TextSplitter } from 'langchain/text_splitter'
import { Browser, Page, PuppeteerWebBaseLoader, PuppeteerWebBaseLoaderOptions } from '@langchain/community/document_loaders/web/puppeteer'
import { test } from 'linkifyjs'
import { webCrawl, xmlScrape } from '../../../src'
import { handleEscapeCharacters, INodeOutputsValue, webCrawl, xmlScrape } from '../../../src'
import { PuppeteerLifeCycleEvent } from 'puppeteer'
class Puppeteer_DocumentLoaders implements INode {
@@ -16,11 +16,12 @@ class Puppeteer_DocumentLoaders implements INode {
category: string
baseClasses: string[]
inputs: INodeParams[]
outputs: INodeOutputsValue[]
constructor() {
this.label = 'Puppeteer Web Scraper'
this.name = 'puppeteerWebScraper'
this.version = 1.0
this.version = 2.0
this.type = 'Document'
this.icon = 'puppeteer.svg'
this.category = 'Document Loaders'
@@ -128,6 +129,20 @@ class Puppeteer_DocumentLoaders implements INode {
additionalParams: true
}
]
this.outputs = [
{
label: 'Document',
name: 'document',
description: 'Array of document objects containing metadata and pageContent',
baseClasses: [...this.baseClasses, 'json']
},
{
label: 'Text',
name: 'text',
description: 'Concatenated string from pageContent of documents',
baseClasses: ['string', 'json']
}
]
}
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
@@ -139,6 +154,7 @@ class Puppeteer_DocumentLoaders implements INode {
let waitUntilGoToOption = nodeData.inputs?.waitUntilGoToOption as PuppeteerLifeCycleEvent
let waitForSelector = nodeData.inputs?.waitForSelector as string
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
const output = nodeData.outputs?.output as string
let omitMetadataKeys: string[] = []
if (_omitMetadataKeys) {
@@ -247,7 +263,15 @@ class Puppeteer_DocumentLoaders implements INode {
}))
}
return docs
if (output === 'document') {
return docs
} else {
let finaltext = ''
for (const doc of docs) {
finaltext += `${doc.pageContent}\n`
}
return handleEscapeCharacters(finaltext, false)
}
}
}
@@ -1,6 +1,6 @@
import { omit } from 'lodash'
import { ICommonObject, INode, INodeData, INodeOptionsValue, INodeParams } from '../../../src/Interface'
import { getCredentialData, getCredentialParam } from '../../../src/utils'
import { ICommonObject, INode, INodeData, INodeOptionsValue, INodeOutputsValue, INodeParams } from '../../../src/Interface'
import { getCredentialData, getCredentialParam, handleEscapeCharacters } from '../../../src/utils'
import { S3Client, GetObjectCommand, S3ClientConfig, ListObjectsV2Command, ListObjectsV2Output } from '@aws-sdk/client-s3'
import { getRegions, MODEL_TYPE } from '../../../src/modelLoader'
import { Readable } from 'node:stream'
@@ -27,11 +27,12 @@ class S3_DocumentLoaders implements INode {
baseClasses: string[]
credential: INodeParams
inputs?: INodeParams[]
outputs: INodeOutputsValue[]
constructor() {
this.label = 'S3 Directory'
this.name = 's3Directory'
this.version = 3.0
this.version = 4.0
this.type = 'Document'
this.icon = 's3.svg'
this.category = 'Document Loaders'
@@ -117,6 +118,20 @@ class S3_DocumentLoaders implements INode {
additionalParams: true
}
]
this.outputs = [
{
label: 'Document',
name: 'document',
description: 'Array of document objects containing metadata and pageContent',
baseClasses: [...this.baseClasses, 'json']
},
{
label: 'Text',
name: 'text',
description: 'Concatenated string from pageContent of documents',
baseClasses: ['string', 'json']
}
]
}
loadMethods = {
@@ -134,6 +149,7 @@ class S3_DocumentLoaders implements INode {
const pdfUsage = nodeData.inputs?.pdfUsage
const metadata = nodeData.inputs?.metadata
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
const output = nodeData.outputs?.output as string
let omitMetadataKeys: string[] = []
if (_omitMetadataKeys) {
@@ -313,7 +329,15 @@ class S3_DocumentLoaders implements INode {
// remove the temp directory before returning docs
fsDefault.rmSync(tempDir, { recursive: true })
return docs
if (output === 'document') {
return docs
} else {
let finaltext = ''
for (const doc of docs) {
finaltext += `${doc.pageContent}\n`
}
return handleEscapeCharacters(finaltext, false)
}
} catch (e: any) {
fsDefault.rmSync(tempDir, { recursive: true })
throw new Error(`Failed to load data from bucket ${bucketName}: ${e.message}`)
@@ -1,5 +1,5 @@
import { omit } from 'lodash'
import { ICommonObject, INode, INodeData, INodeOptionsValue, INodeParams } from '../../../src/Interface'
import { ICommonObject, INode, INodeData, INodeOptionsValue, INodeOutputsValue, INodeParams } from '../../../src/Interface'
import { S3Loader } from '@langchain/community/document_loaders/web/s3'
import {
UnstructuredLoader,
@@ -8,7 +8,7 @@ import {
SkipInferTableTypes,
HiResModelName
} from '@langchain/community/document_loaders/fs/unstructured'
import { getCredentialData, getCredentialParam } from '../../../src/utils'
import { getCredentialData, getCredentialParam, handleEscapeCharacters } from '../../../src/utils'
import { S3Client, GetObjectCommand, S3ClientConfig } from '@aws-sdk/client-s3'
import { getRegions, MODEL_TYPE } from '../../../src/modelLoader'
import { Readable } from 'node:stream'
@@ -27,11 +27,12 @@ class S3_DocumentLoaders implements INode {
baseClasses: string[]
credential: INodeParams
inputs?: INodeParams[]
outputs: INodeOutputsValue[]
constructor() {
this.label = 'S3'
this.name = 'S3'
this.version = 3.0
this.version = 4.0
this.type = 'Document'
this.icon = 's3.svg'
this.category = 'Document Loaders'
@@ -434,6 +435,20 @@ class S3_DocumentLoaders implements INode {
additionalParams: true
}
]
this.outputs = [
{
label: 'Document',
name: 'document',
description: 'Array of document objects containing metadata and pageContent',
baseClasses: [...this.baseClasses, 'json']
},
{
label: 'Text',
name: 'text',
description: 'Concatenated string from pageContent of documents',
baseClasses: ['string', 'json']
}
]
}
loadMethods = {
@@ -466,6 +481,7 @@ class S3_DocumentLoaders implements INode {
const newAfterNChars = nodeData.inputs?.newAfterNChars as number
const maxCharacters = nodeData.inputs?.maxCharacters as number
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
const output = nodeData.outputs?.output as string
let omitMetadataKeys: string[] = []
if (_omitMetadataKeys) {
@@ -594,7 +610,15 @@ class S3_DocumentLoaders implements INode {
fsDefault.rmSync(path.dirname(filePath), { recursive: true })
return docs
if (output === 'document') {
return docs
} else {
let finaltext = ''
for (const doc of docs) {
finaltext += `${doc.pageContent}\n`
}
return handleEscapeCharacters(finaltext, false)
}
} catch {
fsDefault.rmSync(path.dirname(filePath), { recursive: true })
throw new Error(`Failed to load file ${filePath} using unstructured loader.`)
@@ -2,7 +2,7 @@ import { omit } from 'lodash'
import { ICommonObject, IDocument, INode, INodeData, INodeParams } from '../../../src/Interface'
import { TextSplitter } from 'langchain/text_splitter'
import { SearchApiLoader } from '@langchain/community/document_loaders/web/searchapi'
import { getCredentialData, getCredentialParam } from '../../../src'
import { getCredentialData, getCredentialParam, handleEscapeCharacters, INodeOutputsValue } from '../../../src'
// Provides access to multiple search engines using the SearchApi.
// For available parameters & engines, refer to: https://www.searchapi.io/docs/google
@@ -17,11 +17,12 @@ class SearchAPI_DocumentLoaders implements INode {
baseClasses: string[]
credential: INodeParams
inputs: INodeParams[]
outputs: INodeOutputsValue[]
constructor() {
this.label = 'SearchApi For Web Search'
this.name = 'searchApi'
this.version = 1.0
this.version = 2.0
this.type = 'Document'
this.icon = 'searchapi.svg'
this.category = 'Document Loaders'
@@ -74,6 +75,20 @@ class SearchAPI_DocumentLoaders implements INode {
additionalParams: true
}
]
this.outputs = [
{
label: 'Document',
name: 'document',
description: 'Array of document objects containing metadata and pageContent',
baseClasses: [...this.baseClasses, 'json']
},
{
label: 'Text',
name: 'text',
description: 'Concatenated string from pageContent of documents',
baseClasses: ['string', 'json']
}
]
}
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
@@ -82,6 +97,7 @@ class SearchAPI_DocumentLoaders implements INode {
const customParameters = nodeData.inputs?.customParameters
const metadata = nodeData.inputs?.metadata
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
const output = nodeData.outputs?.output as string
let omitMetadataKeys: string[] = []
if (_omitMetadataKeys) {
@@ -146,7 +162,15 @@ class SearchAPI_DocumentLoaders implements INode {
}))
}
return docs
if (output === 'document') {
return docs
} else {
let finaltext = ''
for (const doc of docs) {
finaltext += `${doc.pageContent}\n`
}
return handleEscapeCharacters(finaltext, false)
}
}
}
@@ -1,8 +1,8 @@
import { omit } from 'lodash'
import { ICommonObject, IDocument, INode, INodeData, INodeParams } from '../../../src/Interface'
import { TextSplitter } from 'langchain/text_splitter'
import { SerpAPILoader } from '@langchain/community/document_loaders/web/serpapi'
import { getCredentialData, getCredentialParam } from '../../../src'
import { getCredentialData, getCredentialParam, handleEscapeCharacters } from '../../../src'
import { ICommonObject, IDocument, INode, INodeData, INodeParams, INodeOutputsValue } from '../../../src/Interface'
class SerpAPI_DocumentLoaders implements INode {
label: string
@@ -15,11 +15,12 @@ class SerpAPI_DocumentLoaders implements INode {
baseClasses: string[]
credential: INodeParams
inputs: INodeParams[]
outputs: INodeOutputsValue[]
constructor() {
this.label = 'SerpApi For Web Search'
this.name = 'serpApi'
this.version = 1.0
this.version = 2.0
this.type = 'Document'
this.icon = 'serp.svg'
this.category = 'Document Loaders'
@@ -64,6 +65,20 @@ class SerpAPI_DocumentLoaders implements INode {
additionalParams: true
}
]
this.outputs = [
{
label: 'Document',
name: 'document',
description: 'Array of document objects containing metadata and pageContent',
baseClasses: [...this.baseClasses, 'json']
},
{
label: 'Text',
name: 'text',
description: 'Concatenated string from pageContent of documents',
baseClasses: ['string', 'json']
}
]
}
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
@@ -71,6 +86,7 @@ class SerpAPI_DocumentLoaders implements INode {
const query = nodeData.inputs?.query as string
const metadata = nodeData.inputs?.metadata
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
const output = nodeData.outputs?.output as string
let omitMetadataKeys: string[] = []
if (_omitMetadataKeys) {
@@ -121,7 +137,15 @@ class SerpAPI_DocumentLoaders implements INode {
}))
}
return docs
if (output === 'document') {
return docs
} else {
let finaltext = ''
for (const doc of docs) {
finaltext += `${doc.pageContent}\n`
}
return handleEscapeCharacters(finaltext, false)
}
}
}
@@ -2,8 +2,8 @@ import { omit } from 'lodash'
import { TextSplitter } from 'langchain/text_splitter'
import { Document, DocumentInterface } from '@langchain/core/documents'
import { BaseDocumentLoader } from 'langchain/document_loaders/base'
import { INode, INodeData, INodeParams, ICommonObject } from '../../../src/Interface'
import { getCredentialData, getCredentialParam } from '../../../src/utils'
import { INode, INodeData, INodeParams, ICommonObject, INodeOutputsValue } from '../../../src/Interface'
import { getCredentialData, getCredentialParam, handleEscapeCharacters } from '../../../src/utils'
import SpiderApp from './SpiderApp'
interface SpiderLoaderParameters {
@@ -85,11 +85,12 @@ class Spider_DocumentLoaders implements INode {
baseClasses: string[]
inputs: INodeParams[]
credential: INodeParams
outputs: INodeOutputsValue[]
constructor() {
this.label = 'Spider Document Loaders'
this.name = 'spiderDocumentLoaders'
this.version = 1.0
this.version = 2.0
this.type = 'Document'
this.icon = 'spider.svg'
this.category = 'Document Loaders'
@@ -168,6 +169,20 @@ class Spider_DocumentLoaders implements INode {
type: 'credential',
credentialNames: ['spiderApi']
}
this.outputs = [
{
label: 'Document',
name: 'document',
description: 'Array of document objects containing metadata and pageContent',
baseClasses: [...this.baseClasses, 'json']
},
{
label: 'Text',
name: 'text',
description: 'Concatenated string from pageContent of documents',
baseClasses: ['string', 'json']
}
]
}
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
@@ -180,6 +195,7 @@ class Spider_DocumentLoaders implements INode {
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
const spiderApiKey = getCredentialParam('spiderApiKey', credentialData, nodeData)
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
const output = nodeData.outputs?.output as string
let omitMetadataKeys: string[] = []
if (_omitMetadataKeys) {
@@ -244,7 +260,15 @@ class Spider_DocumentLoaders implements INode {
)
}))
return docs
if (output === 'document') {
return docs
} else {
let finaltext = ''
for (const doc of docs) {
finaltext += `${doc.pageContent}\n`
}
return handleEscapeCharacters(finaltext, false)
}
}
}
@@ -7,8 +7,8 @@ import {
HiResModelName,
UnstructuredLoader as LCUnstructuredLoader
} from '@langchain/community/document_loaders/fs/unstructured'
import { getCredentialData, getCredentialParam } from '../../../src/utils'
import { getFileFromStorage } from '../../../src'
import { getCredentialData, getCredentialParam, handleEscapeCharacters } from '../../../src/utils'
import { getFileFromStorage, INodeOutputsValue } from '../../../src'
import { UnstructuredLoader } from './Unstructured'
class UnstructuredFile_DocumentLoaders implements INode {
@@ -22,6 +22,7 @@ class UnstructuredFile_DocumentLoaders implements INode {
baseClasses: string[]
credential: INodeParams
inputs: INodeParams[]
outputs: INodeOutputsValue[]
constructor() {
this.label = 'Unstructured File Loader'
@@ -434,6 +435,20 @@ class UnstructuredFile_DocumentLoaders implements INode {
additionalParams: true
}
]
this.outputs = [
{
label: 'Document',
name: 'document',
description: 'Array of document objects containing metadata and pageContent',
baseClasses: [...this.baseClasses, 'json']
},
{
label: 'Text',
name: 'text',
description: 'Concatenated string from pageContent of documents',
baseClasses: ['string', 'json']
}
]
}
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
@@ -457,6 +472,7 @@ class UnstructuredFile_DocumentLoaders implements INode {
const newAfterNChars = nodeData.inputs?.newAfterNChars as string
const maxCharacters = nodeData.inputs?.maxCharacters as string
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
const output = nodeData.outputs?.output as string
let omitMetadataKeys: string[] = []
if (_omitMetadataKeys) {
@@ -582,7 +598,15 @@ class UnstructuredFile_DocumentLoaders implements INode {
}))
}
return docs
if (output === 'document') {
return docs
} else {
let finaltext = ''
for (const doc of docs) {
finaltext += `${doc.pageContent}\n`
}
return handleEscapeCharacters(finaltext, false)
}
}
}
@@ -1,5 +1,5 @@
import { omit } from 'lodash'
import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
import {
UnstructuredDirectoryLoader,
UnstructuredLoaderOptions,
@@ -7,7 +7,7 @@ import {
SkipInferTableTypes,
HiResModelName
} from '@langchain/community/document_loaders/fs/unstructured'
import { getCredentialData, getCredentialParam } from '../../../src/utils'
import { getCredentialData, getCredentialParam, handleEscapeCharacters } from '../../../src/utils'
class UnstructuredFolder_DocumentLoaders implements INode {
label: string
@@ -20,11 +20,12 @@ class UnstructuredFolder_DocumentLoaders implements INode {
baseClasses: string[]
credential: INodeParams
inputs: INodeParams[]
outputs: INodeOutputsValue[]
constructor() {
this.label = 'Unstructured Folder Loader'
this.name = 'unstructuredFolderLoader'
this.version = 2.0
this.version = 3.0
this.type = 'Document'
this.icon = 'unstructured-folder.svg'
this.category = 'Document Loaders'
@@ -400,6 +401,20 @@ class UnstructuredFolder_DocumentLoaders implements INode {
additionalParams: true
}
]
this.outputs = [
{
label: 'Document',
name: 'document',
description: 'Array of document objects containing metadata and pageContent',
baseClasses: [...this.baseClasses, 'json']
},
{
label: 'Text',
name: 'text',
description: 'Concatenated string from pageContent of documents',
baseClasses: ['string', 'json']
}
]
}
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
@@ -423,6 +438,7 @@ class UnstructuredFolder_DocumentLoaders implements INode {
const newAfterNChars = nodeData.inputs?.newAfterNChars as number
const maxCharacters = nodeData.inputs?.maxCharacters as number
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
const output = nodeData.outputs?.output as string
let omitMetadataKeys: string[] = []
if (_omitMetadataKeys) {
@@ -487,7 +503,15 @@ class UnstructuredFolder_DocumentLoaders implements INode {
}))
}
return docs
if (output === 'document') {
return docs
} else {
let finaltext = ''
for (const doc of docs) {
finaltext += `${doc.pageContent}\n`
}
return handleEscapeCharacters(finaltext, false)
}
}
}