diff --git a/packages/components/nodes/documentloaders/Cheerio/Cheerio.ts b/packages/components/nodes/documentloaders/Cheerio/Cheerio.ts
index 3eba0ece..48ae85bc 100644
--- a/packages/components/nodes/documentloaders/Cheerio/Cheerio.ts
+++ b/packages/components/nodes/documentloaders/Cheerio/Cheerio.ts
@@ -126,7 +126,9 @@ class Cheerio_DocumentLoaders implements INode {
let docs = []
if (relativeLinksMethod) {
if (process.env.DEBUG === 'true') options.logger.info(`Start ${relativeLinksMethod}`)
- if (!limit) limit = 10
+ // if limit is 0 we don't want it to default to 10 so we check explicitly for null or undefined
+ // so when limit is 0 we can fetch all the links
+ if (limit === null || limit === undefined) limit = 10
else if (limit < 0) throw new Error('Limit cannot be less than 0')
const pages: string[] =
selectedLinks && selectedLinks.length > 0
@@ -143,7 +145,7 @@ class Cheerio_DocumentLoaders implements INode {
} else if (selectedLinks && selectedLinks.length > 0) {
if (process.env.DEBUG === 'true')
options.logger.info(`pages: ${JSON.stringify(selectedLinks)}, length: ${selectedLinks.length}`)
- for (const page of selectedLinks) {
+ for (const page of selectedLinks.slice(0, limit)) {
docs.push(...(await cheerioLoader(page)))
}
} else {
diff --git a/packages/components/nodes/documentloaders/Playwright/Playwright.ts b/packages/components/nodes/documentloaders/Playwright/Playwright.ts
index 2de166ce..55fa9608 100644
--- a/packages/components/nodes/documentloaders/Playwright/Playwright.ts
+++ b/packages/components/nodes/documentloaders/Playwright/Playwright.ts
@@ -167,7 +167,9 @@ class Playwright_DocumentLoaders implements INode {
let docs = []
if (relativeLinksMethod) {
if (process.env.DEBUG === 'true') options.logger.info(`Start ${relativeLinksMethod}`)
- if (!limit) limit = 10
+ // if limit is 0 we don't want it to default to 10 so we check explicitly for null or undefined
+ // so when limit is 0 we can fetch all the links
+ if (limit === null || limit === undefined) limit = 10
else if (limit < 0) throw new Error('Limit cannot be less than 0')
const pages: string[] =
selectedLinks && selectedLinks.length > 0
@@ -184,7 +186,7 @@ class Playwright_DocumentLoaders implements INode {
} else if (selectedLinks && selectedLinks.length > 0) {
if (process.env.DEBUG === 'true')
options.logger.info(`pages: ${JSON.stringify(selectedLinks)}, length: ${selectedLinks.length}`)
- for (const page of selectedLinks) {
+ for (const page of selectedLinks.slice(0, limit)) {
docs.push(...(await playwrightLoader(page)))
}
} else {
diff --git a/packages/components/nodes/documentloaders/Puppeteer/Puppeteer.ts b/packages/components/nodes/documentloaders/Puppeteer/Puppeteer.ts
index 3d28f310..90b5a277 100644
--- a/packages/components/nodes/documentloaders/Puppeteer/Puppeteer.ts
+++ b/packages/components/nodes/documentloaders/Puppeteer/Puppeteer.ts
@@ -168,7 +168,9 @@ class Puppeteer_DocumentLoaders implements INode {
let docs = []
if (relativeLinksMethod) {
if (process.env.DEBUG === 'true') options.logger.info(`Start ${relativeLinksMethod}`)
- if (!limit) limit = 10
+ // if limit is 0 we don't want it to default to 10 so we check explicitly for null or undefined
+ // so when limit is 0 we can fetch all the links
+ if (limit === null || limit === undefined) limit = 10
else if (limit < 0) throw new Error('Limit cannot be less than 0')
const pages: string[] =
selectedLinks && selectedLinks.length > 0
@@ -185,7 +187,7 @@ class Puppeteer_DocumentLoaders implements INode {
} else if (selectedLinks && selectedLinks.length > 0) {
if (process.env.DEBUG === 'true')
options.logger.info(`pages: ${JSON.stringify(selectedLinks)}, length: ${selectedLinks.length}`)
- for (const page of selectedLinks) {
+ for (const page of selectedLinks.slice(0, limit)) {
docs.push(...(await puppeteerLoader(page)))
}
} else {
diff --git a/packages/server/src/index.ts b/packages/server/src/index.ts
index 973ce1ea..07797f32 100644
--- a/packages/server/src/index.ts
+++ b/packages/server/src/index.ts
@@ -1149,8 +1149,14 @@ export class App {
this.app.get('/api/v1/fetch-links', async (req: Request, res: Response) => {
const url = decodeURIComponent(req.query.url as string)
const relativeLinksMethod = req.query.relativeLinksMethod as string
+ if (!relativeLinksMethod) {
+ return res.status(500).send('Please choose a Relative Links Method in Additional Parameters.')
+ }
+
+ const limit = parseInt(req.query.limit as string)
if (process.env.DEBUG === 'true') console.info(`Start ${relativeLinksMethod}`)
- const links: string[] = relativeLinksMethod === 'webCrawl' ? await webCrawl(url, 0) : await xmlScrape(url, 0)
+ const links: string[] = relativeLinksMethod === 'webCrawl' ? await webCrawl(url, limit) : await xmlScrape(url, limit)
+ if (process.env.DEBUG === 'true') console.info(`Finish ${relativeLinksMethod}`)
res.json({ status: 'OK', links })
})
diff --git a/packages/ui/src/api/scraper.js b/packages/ui/src/api/scraper.js
index 382a9263..89333156 100644
--- a/packages/ui/src/api/scraper.js
+++ b/packages/ui/src/api/scraper.js
@@ -1,8 +1,8 @@
import client from './client'
-const fetchAllLinks = (url, relativeLinksMethod) =>
- client.get(`/fetch-links?url=${encodeURIComponent(url)}&relativeLinksMethod=${relativeLinksMethod}`)
+const fetchLinks = (url, relativeLinksMethod, relativeLinksLimit) =>
+ client.get(`/fetch-links?url=${encodeURIComponent(url)}&relativeLinksMethod=${relativeLinksMethod}&limit=${relativeLinksLimit}`)
export default {
- fetchAllLinks
+ fetchLinks
}
diff --git a/packages/ui/src/ui-component/dialog/ManageScrapedLinksDialog.js b/packages/ui/src/ui-component/dialog/ManageScrapedLinksDialog.js
index a707d82e..a4199504 100644
--- a/packages/ui/src/ui-component/dialog/ManageScrapedLinksDialog.js
+++ b/packages/ui/src/ui-component/dialog/ManageScrapedLinksDialog.js
@@ -16,7 +16,7 @@ import {
Stack,
Typography
} from '@mui/material'
-import { IconTrash } from '@tabler/icons'
+import { IconTrash, IconX } from '@tabler/icons'
import PerfectScrollbar from 'react-perfect-scrollbar'
import { BackdropLoader } from 'ui-component/loading/BackdropLoader'
@@ -24,12 +24,23 @@ import { StyledButton } from 'ui-component/button/StyledButton'
import scraperApi from 'api/scraper'
-import { HIDE_CANVAS_DIALOG, SHOW_CANVAS_DIALOG } from 'store/actions'
+import useNotifier from 'utils/useNotifier'
+
+import {
+ HIDE_CANVAS_DIALOG,
+ SHOW_CANVAS_DIALOG,
+ enqueueSnackbar as enqueueSnackbarAction,
+ closeSnackbar as closeSnackbarAction
+} from 'store/actions'
const ManageScrapedLinksDialog = ({ show, dialogProps, onCancel, onSave }) => {
const portalElement = document.getElementById('portal')
const dispatch = useDispatch()
+ useNotifier()
+ const enqueueSnackbar = (...args) => dispatch(enqueueSnackbarAction(...args))
+ const closeSnackbar = (...args) => dispatch(closeSnackbarAction(...args))
+
const [loading, setLoading] = useState(false)
const [selectedLinks, setSelectedLinks] = useState([])
const [url, setUrl] = useState('')
@@ -53,9 +64,38 @@ const ManageScrapedLinksDialog = ({ show, dialogProps, onCancel, onSave }) => {
const handleFetchLinks = async () => {
setLoading(true)
- const fetchLinksResp = await scraperApi.fetchAllLinks(url, 'webCrawl')
- if (fetchLinksResp.data) {
- setSelectedLinks(fetchLinksResp.data.links)
+ try {
+ const fetchLinksResp = await scraperApi.fetchLinks(url, dialogProps.relativeLinksMethod, dialogProps.limit)
+ if (fetchLinksResp.data) {
+ setSelectedLinks(fetchLinksResp.data.links)
+ enqueueSnackbar({
+ message: 'Successfully fetched links',
+ options: {
+ key: new Date().getTime() + Math.random(),
+ variant: 'success',
+ action: (key) => (
+
+ )
+ }
+ })
+ }
+ } catch (error) {
+ const errorData = error.response.data || `${error.response.status}: ${error.response.statusText}`
+ enqueueSnackbar({
+ message: errorData,
+ options: {
+ key: new Date().getTime() + Math.random(),
+ variant: 'error',
+ persist: true,
+ action: (key) => (
+
+ )
+ }
+ })
}
setLoading(false)
}
diff --git a/packages/ui/src/views/canvas/NodeInputHandler.js b/packages/ui/src/views/canvas/NodeInputHandler.js
index bc877c9f..560fb34e 100644
--- a/packages/ui/src/views/canvas/NodeInputHandler.js
+++ b/packages/ui/src/views/canvas/NodeInputHandler.js
@@ -91,9 +91,11 @@ const NodeInputHandler = ({ inputAnchor, inputParam, data, disabled = false, isA
}
}
- const onManageLinksDialogClicked = (url, selectedLinks) => {
+ const onManageLinksDialogClicked = (url, selectedLinks, relativeLinksMethod, limit) => {
const dialogProps = {
url,
+ relativeLinksMethod,
+ limit,
selectedLinks,
confirmButtonName: 'Save',
cancelButtonName: 'Cancel'
@@ -475,7 +477,9 @@ const NodeInputHandler = ({ inputAnchor, inputParam, data, disabled = false, isA
onClick={() =>
onManageLinksDialogClicked(
data.inputs[inputParam.name] ?? inputParam.default ?? '',
- data.inputs.selectedLinks
+ data.inputs.selectedLinks,
+ data.inputs['relativeLinksMethod'] ?? 'webCrawl',
+ parseInt(data.inputs['limit']) ?? 0
)
}
>