diff --git a/packages/components/nodes/documentloaders/Cheerio/Cheerio.ts b/packages/components/nodes/documentloaders/Cheerio/Cheerio.ts index 3eba0ece..48ae85bc 100644 --- a/packages/components/nodes/documentloaders/Cheerio/Cheerio.ts +++ b/packages/components/nodes/documentloaders/Cheerio/Cheerio.ts @@ -126,7 +126,9 @@ class Cheerio_DocumentLoaders implements INode { let docs = [] if (relativeLinksMethod) { if (process.env.DEBUG === 'true') options.logger.info(`Start ${relativeLinksMethod}`) - if (!limit) limit = 10 + // if limit is 0 we don't want it to default to 10 so we check explicitly for null or undefined + // so when limit is 0 we can fetch all the links + if (limit === null || limit === undefined) limit = 10 else if (limit < 0) throw new Error('Limit cannot be less than 0') const pages: string[] = selectedLinks && selectedLinks.length > 0 @@ -143,7 +145,7 @@ class Cheerio_DocumentLoaders implements INode { } else if (selectedLinks && selectedLinks.length > 0) { if (process.env.DEBUG === 'true') options.logger.info(`pages: ${JSON.stringify(selectedLinks)}, length: ${selectedLinks.length}`) - for (const page of selectedLinks) { + for (const page of selectedLinks.slice(0, limit)) { docs.push(...(await cheerioLoader(page))) } } else { diff --git a/packages/components/nodes/documentloaders/Playwright/Playwright.ts b/packages/components/nodes/documentloaders/Playwright/Playwright.ts index 2de166ce..55fa9608 100644 --- a/packages/components/nodes/documentloaders/Playwright/Playwright.ts +++ b/packages/components/nodes/documentloaders/Playwright/Playwright.ts @@ -167,7 +167,9 @@ class Playwright_DocumentLoaders implements INode { let docs = [] if (relativeLinksMethod) { if (process.env.DEBUG === 'true') options.logger.info(`Start ${relativeLinksMethod}`) - if (!limit) limit = 10 + // if limit is 0 we don't want it to default to 10 so we check explicitly for null or undefined + // so when limit is 0 we can fetch all the links + if (limit === null || limit === undefined) limit = 10 else if (limit < 0) throw new Error('Limit cannot be less than 0') const pages: string[] = selectedLinks && selectedLinks.length > 0 @@ -184,7 +186,7 @@ class Playwright_DocumentLoaders implements INode { } else if (selectedLinks && selectedLinks.length > 0) { if (process.env.DEBUG === 'true') options.logger.info(`pages: ${JSON.stringify(selectedLinks)}, length: ${selectedLinks.length}`) - for (const page of selectedLinks) { + for (const page of selectedLinks.slice(0, limit)) { docs.push(...(await playwrightLoader(page))) } } else { diff --git a/packages/components/nodes/documentloaders/Puppeteer/Puppeteer.ts b/packages/components/nodes/documentloaders/Puppeteer/Puppeteer.ts index 3d28f310..90b5a277 100644 --- a/packages/components/nodes/documentloaders/Puppeteer/Puppeteer.ts +++ b/packages/components/nodes/documentloaders/Puppeteer/Puppeteer.ts @@ -168,7 +168,9 @@ class Puppeteer_DocumentLoaders implements INode { let docs = [] if (relativeLinksMethod) { if (process.env.DEBUG === 'true') options.logger.info(`Start ${relativeLinksMethod}`) - if (!limit) limit = 10 + // if limit is 0 we don't want it to default to 10 so we check explicitly for null or undefined + // so when limit is 0 we can fetch all the links + if (limit === null || limit === undefined) limit = 10 else if (limit < 0) throw new Error('Limit cannot be less than 0') const pages: string[] = selectedLinks && selectedLinks.length > 0 @@ -185,7 +187,7 @@ class Puppeteer_DocumentLoaders implements INode { } else if (selectedLinks && selectedLinks.length > 0) { if (process.env.DEBUG === 'true') options.logger.info(`pages: ${JSON.stringify(selectedLinks)}, length: ${selectedLinks.length}`) - for (const page of selectedLinks) { + for (const page of selectedLinks.slice(0, limit)) { docs.push(...(await puppeteerLoader(page))) } } else { diff --git a/packages/server/src/index.ts b/packages/server/src/index.ts index 973ce1ea..07797f32 100644 --- a/packages/server/src/index.ts +++ b/packages/server/src/index.ts @@ -1149,8 +1149,14 @@ export class App { this.app.get('/api/v1/fetch-links', async (req: Request, res: Response) => { const url = decodeURIComponent(req.query.url as string) const relativeLinksMethod = req.query.relativeLinksMethod as string + if (!relativeLinksMethod) { + return res.status(500).send('Please choose a Relative Links Method in Additional Parameters.') + } + + const limit = parseInt(req.query.limit as string) if (process.env.DEBUG === 'true') console.info(`Start ${relativeLinksMethod}`) - const links: string[] = relativeLinksMethod === 'webCrawl' ? await webCrawl(url, 0) : await xmlScrape(url, 0) + const links: string[] = relativeLinksMethod === 'webCrawl' ? await webCrawl(url, limit) : await xmlScrape(url, limit) + if (process.env.DEBUG === 'true') console.info(`Finish ${relativeLinksMethod}`) res.json({ status: 'OK', links }) }) diff --git a/packages/ui/src/api/scraper.js b/packages/ui/src/api/scraper.js index 382a9263..89333156 100644 --- a/packages/ui/src/api/scraper.js +++ b/packages/ui/src/api/scraper.js @@ -1,8 +1,8 @@ import client from './client' -const fetchAllLinks = (url, relativeLinksMethod) => - client.get(`/fetch-links?url=${encodeURIComponent(url)}&relativeLinksMethod=${relativeLinksMethod}`) +const fetchLinks = (url, relativeLinksMethod, relativeLinksLimit) => + client.get(`/fetch-links?url=${encodeURIComponent(url)}&relativeLinksMethod=${relativeLinksMethod}&limit=${relativeLinksLimit}`) export default { - fetchAllLinks + fetchLinks } diff --git a/packages/ui/src/ui-component/dialog/ManageScrapedLinksDialog.js b/packages/ui/src/ui-component/dialog/ManageScrapedLinksDialog.js index a707d82e..a4199504 100644 --- a/packages/ui/src/ui-component/dialog/ManageScrapedLinksDialog.js +++ b/packages/ui/src/ui-component/dialog/ManageScrapedLinksDialog.js @@ -16,7 +16,7 @@ import { Stack, Typography } from '@mui/material' -import { IconTrash } from '@tabler/icons' +import { IconTrash, IconX } from '@tabler/icons' import PerfectScrollbar from 'react-perfect-scrollbar' import { BackdropLoader } from 'ui-component/loading/BackdropLoader' @@ -24,12 +24,23 @@ import { StyledButton } from 'ui-component/button/StyledButton' import scraperApi from 'api/scraper' -import { HIDE_CANVAS_DIALOG, SHOW_CANVAS_DIALOG } from 'store/actions' +import useNotifier from 'utils/useNotifier' + +import { + HIDE_CANVAS_DIALOG, + SHOW_CANVAS_DIALOG, + enqueueSnackbar as enqueueSnackbarAction, + closeSnackbar as closeSnackbarAction +} from 'store/actions' const ManageScrapedLinksDialog = ({ show, dialogProps, onCancel, onSave }) => { const portalElement = document.getElementById('portal') const dispatch = useDispatch() + useNotifier() + const enqueueSnackbar = (...args) => dispatch(enqueueSnackbarAction(...args)) + const closeSnackbar = (...args) => dispatch(closeSnackbarAction(...args)) + const [loading, setLoading] = useState(false) const [selectedLinks, setSelectedLinks] = useState([]) const [url, setUrl] = useState('') @@ -53,9 +64,38 @@ const ManageScrapedLinksDialog = ({ show, dialogProps, onCancel, onSave }) => { const handleFetchLinks = async () => { setLoading(true) - const fetchLinksResp = await scraperApi.fetchAllLinks(url, 'webCrawl') - if (fetchLinksResp.data) { - setSelectedLinks(fetchLinksResp.data.links) + try { + const fetchLinksResp = await scraperApi.fetchLinks(url, dialogProps.relativeLinksMethod, dialogProps.limit) + if (fetchLinksResp.data) { + setSelectedLinks(fetchLinksResp.data.links) + enqueueSnackbar({ + message: 'Successfully fetched links', + options: { + key: new Date().getTime() + Math.random(), + variant: 'success', + action: (key) => ( + + ) + } + }) + } + } catch (error) { + const errorData = error.response.data || `${error.response.status}: ${error.response.statusText}` + enqueueSnackbar({ + message: errorData, + options: { + key: new Date().getTime() + Math.random(), + variant: 'error', + persist: true, + action: (key) => ( + + ) + } + }) } setLoading(false) } diff --git a/packages/ui/src/views/canvas/NodeInputHandler.js b/packages/ui/src/views/canvas/NodeInputHandler.js index bc877c9f..560fb34e 100644 --- a/packages/ui/src/views/canvas/NodeInputHandler.js +++ b/packages/ui/src/views/canvas/NodeInputHandler.js @@ -91,9 +91,11 @@ const NodeInputHandler = ({ inputAnchor, inputParam, data, disabled = false, isA } } - const onManageLinksDialogClicked = (url, selectedLinks) => { + const onManageLinksDialogClicked = (url, selectedLinks, relativeLinksMethod, limit) => { const dialogProps = { url, + relativeLinksMethod, + limit, selectedLinks, confirmButtonName: 'Save', cancelButtonName: 'Cancel' @@ -475,7 +477,9 @@ const NodeInputHandler = ({ inputAnchor, inputParam, data, disabled = false, isA onClick={() => onManageLinksDialogClicked( data.inputs[inputParam.name] ?? inputParam.default ?? '', - data.inputs.selectedLinks + data.inputs.selectedLinks, + data.inputs['relativeLinksMethod'] ?? 'webCrawl', + parseInt(data.inputs['limit']) ?? 0 ) } >