mirror of
https://github.com/farcasclaudiu/Flowise.git
synced 2026-06-29 03:01:10 +03:00
add description and modify default limit to 10 if empty
This commit is contained in:
@@ -38,28 +38,31 @@ class Cheerio_DocumentLoaders implements INode {
|
|||||||
label: 'Get Relative Links Method',
|
label: 'Get Relative Links Method',
|
||||||
name: 'relativeLinksMethod',
|
name: 'relativeLinksMethod',
|
||||||
type: 'options',
|
type: 'options',
|
||||||
|
description: 'Select a method to retrieve relative links',
|
||||||
options: [
|
options: [
|
||||||
{
|
{
|
||||||
label: 'Web Crawl',
|
label: 'Web Crawl',
|
||||||
name: 'webCrawl'
|
name: 'webCrawl',
|
||||||
|
description: 'Crawl relative links from HTML URL'
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
label: 'Scrape XML Sitemap',
|
label: 'Scrape XML Sitemap',
|
||||||
name: 'scrapeXMLSitemap'
|
name: 'scrapeXMLSitemap',
|
||||||
|
description: 'Scrape relative links from XML sitemap URL'
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
optional: true,
|
optional: true,
|
||||||
additionalParams: true
|
additionalParams: true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
label: 'Crawl/Scrape Links Limit',
|
label: 'Get Relative Links Limit',
|
||||||
name: 'limit',
|
name: 'limit',
|
||||||
type: 'number',
|
type: 'number',
|
||||||
default: 10,
|
|
||||||
optional: true,
|
optional: true,
|
||||||
additionalParams: true,
|
additionalParams: true,
|
||||||
description: 'Set 0 to crawl/scrape all relative links',
|
description:
|
||||||
warning: `Scraping all links might take long time, and all links will be upserted again if the flow's state changed (eg: different URL, chunk size, etc) `
|
'Only used when "Get Relative Links Method" is selected. Set 0 to retrieve all relative links, default limit is 10.',
|
||||||
|
warning: `Retreiving all links might take long time, and all links will be upserted again if the flow's state changed (eg: different URL, chunk size, etc)`
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
label: 'Metadata',
|
label: 'Metadata',
|
||||||
@@ -101,7 +104,7 @@ class Cheerio_DocumentLoaders implements INode {
|
|||||||
let docs = []
|
let docs = []
|
||||||
if (relativeLinksMethod) {
|
if (relativeLinksMethod) {
|
||||||
if (process.env.DEBUG === 'true') console.info(`Start ${relativeLinksMethod}`)
|
if (process.env.DEBUG === 'true') console.info(`Start ${relativeLinksMethod}`)
|
||||||
if (!limit) throw new Error('Please set a limit to crawl/scrape')
|
if (!limit) limit = '10'
|
||||||
else if (parseInt(limit) < 0) throw new Error('Limit cannot be less than 0')
|
else if (parseInt(limit) < 0) throw new Error('Limit cannot be less than 0')
|
||||||
const pages: string[] =
|
const pages: string[] =
|
||||||
relativeLinksMethod === 'webCrawl' ? await webCrawl(url, parseInt(limit)) : await xmlScrape(url, parseInt(limit))
|
relativeLinksMethod === 'webCrawl' ? await webCrawl(url, parseInt(limit)) : await xmlScrape(url, parseInt(limit))
|
||||||
|
|||||||
@@ -38,28 +38,31 @@ class Playwright_DocumentLoaders implements INode {
|
|||||||
label: 'Get Relative Links Method',
|
label: 'Get Relative Links Method',
|
||||||
name: 'relativeLinksMethod',
|
name: 'relativeLinksMethod',
|
||||||
type: 'options',
|
type: 'options',
|
||||||
|
description: 'Select a method to retrieve relative links',
|
||||||
options: [
|
options: [
|
||||||
{
|
{
|
||||||
label: 'Web Crawl',
|
label: 'Web Crawl',
|
||||||
name: 'webCrawl'
|
name: 'webCrawl',
|
||||||
|
description: 'Crawl relative links from HTML URL'
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
label: 'Scrape XML Sitemap',
|
label: 'Scrape XML Sitemap',
|
||||||
name: 'scrapeXMLSitemap'
|
name: 'scrapeXMLSitemap',
|
||||||
|
description: 'Scrape relative links from XML sitemap URL'
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
optional: true,
|
optional: true,
|
||||||
additionalParams: true
|
additionalParams: true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
label: 'Crawl/Scrape Links Limit',
|
label: 'Get Relative Links Limit',
|
||||||
name: 'limit',
|
name: 'limit',
|
||||||
type: 'number',
|
type: 'number',
|
||||||
default: 10,
|
|
||||||
optional: true,
|
optional: true,
|
||||||
additionalParams: true,
|
additionalParams: true,
|
||||||
description: 'Set 0 to crawl/scrape all relative links',
|
description:
|
||||||
warning: `Scraping all links might take long time, and all links will be upserted again if the flow's state changed (eg: different URL, chunk size, etc) `
|
'Only used when "Get Relative Links Method" is selected. Set 0 to retrieve all relative links, default limit is 10.',
|
||||||
|
warning: `Retreiving all links might take long time, and all links will be upserted again if the flow's state changed (eg: different URL, chunk size, etc)`
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
label: 'Metadata',
|
label: 'Metadata',
|
||||||
@@ -101,7 +104,7 @@ class Playwright_DocumentLoaders implements INode {
|
|||||||
let docs = []
|
let docs = []
|
||||||
if (relativeLinksMethod) {
|
if (relativeLinksMethod) {
|
||||||
if (process.env.DEBUG === 'true') console.info(`Start ${relativeLinksMethod}`)
|
if (process.env.DEBUG === 'true') console.info(`Start ${relativeLinksMethod}`)
|
||||||
if (!limit) throw new Error('Please set a limit to crawl/scrape')
|
if (!limit) limit = '10'
|
||||||
else if (parseInt(limit) < 0) throw new Error('Limit cannot be less than 0')
|
else if (parseInt(limit) < 0) throw new Error('Limit cannot be less than 0')
|
||||||
const pages: string[] =
|
const pages: string[] =
|
||||||
relativeLinksMethod === 'webCrawl' ? await webCrawl(url, parseInt(limit)) : await xmlScrape(url, parseInt(limit))
|
relativeLinksMethod === 'webCrawl' ? await webCrawl(url, parseInt(limit)) : await xmlScrape(url, parseInt(limit))
|
||||||
|
|||||||
@@ -38,28 +38,31 @@ class Puppeteer_DocumentLoaders implements INode {
|
|||||||
label: 'Get Relative Links Method',
|
label: 'Get Relative Links Method',
|
||||||
name: 'relativeLinksMethod',
|
name: 'relativeLinksMethod',
|
||||||
type: 'options',
|
type: 'options',
|
||||||
|
description: 'Select a method to retrieve relative links',
|
||||||
options: [
|
options: [
|
||||||
{
|
{
|
||||||
label: 'Web Crawl',
|
label: 'Web Crawl',
|
||||||
name: 'webCrawl'
|
name: 'webCrawl',
|
||||||
|
description: 'Crawl relative links from HTML URL'
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
label: 'Scrape XML Sitemap',
|
label: 'Scrape XML Sitemap',
|
||||||
name: 'scrapeXMLSitemap'
|
name: 'scrapeXMLSitemap',
|
||||||
|
description: 'Scrape relative links from XML sitemap URL'
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
optional: true,
|
optional: true,
|
||||||
additionalParams: true
|
additionalParams: true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
label: 'Crawl/Scrape Links Limit',
|
label: 'Get Relative Links Limit',
|
||||||
name: 'limit',
|
name: 'limit',
|
||||||
type: 'number',
|
type: 'number',
|
||||||
default: 10,
|
|
||||||
optional: true,
|
optional: true,
|
||||||
additionalParams: true,
|
additionalParams: true,
|
||||||
description: 'Set 0 to crawl/scrape all relative links',
|
description:
|
||||||
warning: `Scraping all links might take long time, and all links will be upserted again if the flow's state changed (eg: different URL, chunk size, etc) `
|
'Only used when "Get Relative Links Method" is selected. Set 0 to retrieve all relative links, default limit is 10.',
|
||||||
|
warning: `Retreiving all links might take long time, and all links will be upserted again if the flow's state changed (eg: different URL, chunk size, etc)`
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
label: 'Metadata',
|
label: 'Metadata',
|
||||||
@@ -106,7 +109,7 @@ class Puppeteer_DocumentLoaders implements INode {
|
|||||||
let docs = []
|
let docs = []
|
||||||
if (relativeLinksMethod) {
|
if (relativeLinksMethod) {
|
||||||
if (process.env.DEBUG === 'true') console.info(`Start ${relativeLinksMethod}`)
|
if (process.env.DEBUG === 'true') console.info(`Start ${relativeLinksMethod}`)
|
||||||
if (!limit) throw new Error('Please set a limit to crawl/scrape')
|
if (!limit) limit = '10'
|
||||||
else if (parseInt(limit) < 0) throw new Error('Limit cannot be less than 0')
|
else if (parseInt(limit) < 0) throw new Error('Limit cannot be less than 0')
|
||||||
const pages: string[] =
|
const pages: string[] =
|
||||||
relativeLinksMethod === 'webCrawl' ? await webCrawl(url, parseInt(limit)) : await xmlScrape(url, parseInt(limit))
|
relativeLinksMethod === 'webCrawl' ? await webCrawl(url, parseInt(limit)) : await xmlScrape(url, parseInt(limit))
|
||||||
|
|||||||
Reference in New Issue
Block a user