Merge branch 'main' into chore/Upgrade-LC-version

This commit is contained in:
Henry
2024-02-19 17:39:32 +08:00
94 changed files with 2265 additions and 702 deletions
@@ -126,7 +126,9 @@ class Cheerio_DocumentLoaders implements INode {
let docs = []
if (relativeLinksMethod) {
if (process.env.DEBUG === 'true') options.logger.info(`Start ${relativeLinksMethod}`)
if (!limit) limit = 10
// if limit is 0 we don't want it to default to 10 so we check explicitly for null or undefined
// so when limit is 0 we can fetch all the links
if (limit === null || limit === undefined) limit = 10
else if (limit < 0) throw new Error('Limit cannot be less than 0')
const pages: string[] =
selectedLinks && selectedLinks.length > 0
@@ -143,7 +145,7 @@ class Cheerio_DocumentLoaders implements INode {
} else if (selectedLinks && selectedLinks.length > 0) {
if (process.env.DEBUG === 'true')
options.logger.info(`pages: ${JSON.stringify(selectedLinks)}, length: ${selectedLinks.length}`)
for (const page of selectedLinks) {
for (const page of selectedLinks.slice(0, limit)) {
docs.push(...(await cheerioLoader(page)))
}
} else {
@@ -51,11 +51,13 @@ class PlainText_DocumentLoaders implements INode {
{
label: 'Document',
name: 'document',
baseClasses: this.baseClasses
description: 'Array of document objects containing metadata and pageContent',
baseClasses: [...this.baseClasses, 'json']
},
{
label: 'Text',
name: 'text',
description: 'Concatenated string from pageContent of documents',
baseClasses: ['string', 'json']
}
]
@@ -167,7 +167,9 @@ class Playwright_DocumentLoaders implements INode {
let docs = []
if (relativeLinksMethod) {
if (process.env.DEBUG === 'true') options.logger.info(`Start ${relativeLinksMethod}`)
if (!limit) limit = 10
// if limit is 0 we don't want it to default to 10 so we check explicitly for null or undefined
// so when limit is 0 we can fetch all the links
if (limit === null || limit === undefined) limit = 10
else if (limit < 0) throw new Error('Limit cannot be less than 0')
const pages: string[] =
selectedLinks && selectedLinks.length > 0
@@ -184,7 +186,7 @@ class Playwright_DocumentLoaders implements INode {
} else if (selectedLinks && selectedLinks.length > 0) {
if (process.env.DEBUG === 'true')
options.logger.info(`pages: ${JSON.stringify(selectedLinks)}, length: ${selectedLinks.length}`)
for (const page of selectedLinks) {
for (const page of selectedLinks.slice(0, limit)) {
docs.push(...(await playwrightLoader(page)))
}
} else {
@@ -168,7 +168,9 @@ class Puppeteer_DocumentLoaders implements INode {
let docs = []
if (relativeLinksMethod) {
if (process.env.DEBUG === 'true') options.logger.info(`Start ${relativeLinksMethod}`)
if (!limit) limit = 10
// if limit is 0 we don't want it to default to 10 so we check explicitly for null or undefined
// so when limit is 0 we can fetch all the links
if (limit === null || limit === undefined) limit = 10
else if (limit < 0) throw new Error('Limit cannot be less than 0')
const pages: string[] =
selectedLinks && selectedLinks.length > 0
@@ -185,7 +187,7 @@ class Puppeteer_DocumentLoaders implements INode {
} else if (selectedLinks && selectedLinks.length > 0) {
if (process.env.DEBUG === 'true')
options.logger.info(`pages: ${JSON.stringify(selectedLinks)}, length: ${selectedLinks.length}`)
for (const page of selectedLinks) {
for (const page of selectedLinks.slice(0, limit)) {
docs.push(...(await puppeteerLoader(page)))
}
} else {
@@ -51,11 +51,13 @@ class Text_DocumentLoaders implements INode {
{
label: 'Document',
name: 'document',
baseClasses: this.baseClasses
description: 'Array of document objects containing metadata and pageContent',
baseClasses: [...this.baseClasses, 'json']
},
{
label: 'Text',
name: 'text',
description: 'Concatenated string from pageContent of documents',
baseClasses: ['string', 'json']
}
]
@@ -51,11 +51,13 @@ class VectorStoreToDocument_DocumentLoaders implements INode {
{
label: 'Document',
name: 'document',
description: 'Array of document objects containing metadata and pageContent',
baseClasses: [...this.baseClasses, 'json']
},
{
label: 'Text',
name: 'text',
description: 'Concatenated string from pageContent of documents',
baseClasses: ['string', 'json']
}
]