From 7486d33237c46bc0112fb7b3fb3e48f6cd1c89bf Mon Sep 17 00:00:00 2001 From: Ilango Date: Tue, 6 Feb 2024 10:36:27 +0530 Subject: [PATCH 1/4] Fix issue with relativeLinksMethod and limit not applying to manage links --- packages/server/src/index.ts | 3 ++- packages/ui/src/api/scraper.js | 6 +++--- .../src/ui-component/dialog/ManageScrapedLinksDialog.js | 2 +- packages/ui/src/views/canvas/NodeInputHandler.js | 8 ++++++-- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/packages/server/src/index.ts b/packages/server/src/index.ts index dbb5717d..7ceba556 100644 --- a/packages/server/src/index.ts +++ b/packages/server/src/index.ts @@ -1148,8 +1148,9 @@ export class App { this.app.get('/api/v1/fetch-links', async (req: Request, res: Response) => { const url = decodeURIComponent(req.query.url as string) const relativeLinksMethod = req.query.relativeLinksMethod as string + const limit = parseInt(req.query.limit as string) if (process.env.DEBUG === 'true') console.info(`Start ${relativeLinksMethod}`) - const links: string[] = relativeLinksMethod === 'webCrawl' ? await webCrawl(url, 0) : await xmlScrape(url, 0) + const links: string[] = relativeLinksMethod === 'webCrawl' ? await webCrawl(url, limit) : await xmlScrape(url, limit) res.json({ status: 'OK', links }) }) diff --git a/packages/ui/src/api/scraper.js b/packages/ui/src/api/scraper.js index 382a9263..89333156 100644 --- a/packages/ui/src/api/scraper.js +++ b/packages/ui/src/api/scraper.js @@ -1,8 +1,8 @@ import client from './client' -const fetchAllLinks = (url, relativeLinksMethod) => - client.get(`/fetch-links?url=${encodeURIComponent(url)}&relativeLinksMethod=${relativeLinksMethod}`) +const fetchLinks = (url, relativeLinksMethod, relativeLinksLimit) => + client.get(`/fetch-links?url=${encodeURIComponent(url)}&relativeLinksMethod=${relativeLinksMethod}&limit=${relativeLinksLimit}`) export default { - fetchAllLinks + fetchLinks } diff --git a/packages/ui/src/ui-component/dialog/ManageScrapedLinksDialog.js b/packages/ui/src/ui-component/dialog/ManageScrapedLinksDialog.js index a707d82e..9a846ce9 100644 --- a/packages/ui/src/ui-component/dialog/ManageScrapedLinksDialog.js +++ b/packages/ui/src/ui-component/dialog/ManageScrapedLinksDialog.js @@ -53,7 +53,7 @@ const ManageScrapedLinksDialog = ({ show, dialogProps, onCancel, onSave }) => { const handleFetchLinks = async () => { setLoading(true) - const fetchLinksResp = await scraperApi.fetchAllLinks(url, 'webCrawl') + const fetchLinksResp = await scraperApi.fetchLinks(url, dialogProps.relativeLinksMethod, dialogProps.limit) if (fetchLinksResp.data) { setSelectedLinks(fetchLinksResp.data.links) } diff --git a/packages/ui/src/views/canvas/NodeInputHandler.js b/packages/ui/src/views/canvas/NodeInputHandler.js index bc877c9f..560fb34e 100644 --- a/packages/ui/src/views/canvas/NodeInputHandler.js +++ b/packages/ui/src/views/canvas/NodeInputHandler.js @@ -91,9 +91,11 @@ const NodeInputHandler = ({ inputAnchor, inputParam, data, disabled = false, isA } } - const onManageLinksDialogClicked = (url, selectedLinks) => { + const onManageLinksDialogClicked = (url, selectedLinks, relativeLinksMethod, limit) => { const dialogProps = { url, + relativeLinksMethod, + limit, selectedLinks, confirmButtonName: 'Save', cancelButtonName: 'Cancel' @@ -475,7 +477,9 @@ const NodeInputHandler = ({ inputAnchor, inputParam, data, disabled = false, isA onClick={() => onManageLinksDialogClicked( data.inputs[inputParam.name] ?? inputParam.default ?? '', - data.inputs.selectedLinks + data.inputs.selectedLinks, + data.inputs['relativeLinksMethod'] ?? 'webCrawl', + parseInt(data.inputs['limit']) ?? 0 ) } > From c2ae7e138cbf8a4355cafc109df5b7b5a0eb0a21 Mon Sep 17 00:00:00 2001 From: Ilango Date: Tue, 6 Feb 2024 14:40:19 +0530 Subject: [PATCH 2/4] Apply limit to selectedLinks even when relative links method is not specified --- packages/components/nodes/documentloaders/Cheerio/Cheerio.ts | 2 +- .../components/nodes/documentloaders/Playwright/Playwright.ts | 2 +- .../components/nodes/documentloaders/Puppeteer/Puppeteer.ts | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/components/nodes/documentloaders/Cheerio/Cheerio.ts b/packages/components/nodes/documentloaders/Cheerio/Cheerio.ts index 3eba0ece..6af1f9a9 100644 --- a/packages/components/nodes/documentloaders/Cheerio/Cheerio.ts +++ b/packages/components/nodes/documentloaders/Cheerio/Cheerio.ts @@ -143,7 +143,7 @@ class Cheerio_DocumentLoaders implements INode { } else if (selectedLinks && selectedLinks.length > 0) { if (process.env.DEBUG === 'true') options.logger.info(`pages: ${JSON.stringify(selectedLinks)}, length: ${selectedLinks.length}`) - for (const page of selectedLinks) { + for (const page of selectedLinks.slice(0, limit)) { docs.push(...(await cheerioLoader(page))) } } else { diff --git a/packages/components/nodes/documentloaders/Playwright/Playwright.ts b/packages/components/nodes/documentloaders/Playwright/Playwright.ts index 2de166ce..2ba60d0f 100644 --- a/packages/components/nodes/documentloaders/Playwright/Playwright.ts +++ b/packages/components/nodes/documentloaders/Playwright/Playwright.ts @@ -184,7 +184,7 @@ class Playwright_DocumentLoaders implements INode { } else if (selectedLinks && selectedLinks.length > 0) { if (process.env.DEBUG === 'true') options.logger.info(`pages: ${JSON.stringify(selectedLinks)}, length: ${selectedLinks.length}`) - for (const page of selectedLinks) { + for (const page of selectedLinks.slice(0, limit)) { docs.push(...(await playwrightLoader(page))) } } else { diff --git a/packages/components/nodes/documentloaders/Puppeteer/Puppeteer.ts b/packages/components/nodes/documentloaders/Puppeteer/Puppeteer.ts index 3d28f310..1f8c8f3f 100644 --- a/packages/components/nodes/documentloaders/Puppeteer/Puppeteer.ts +++ b/packages/components/nodes/documentloaders/Puppeteer/Puppeteer.ts @@ -185,7 +185,7 @@ class Puppeteer_DocumentLoaders implements INode { } else if (selectedLinks && selectedLinks.length > 0) { if (process.env.DEBUG === 'true') options.logger.info(`pages: ${JSON.stringify(selectedLinks)}, length: ${selectedLinks.length}`) - for (const page of selectedLinks) { + for (const page of selectedLinks.slice(0, limit)) { docs.push(...(await puppeteerLoader(page))) } } else { From 4be28c4050135b6fc2354e43efccf30d3787acbc Mon Sep 17 00:00:00 2001 From: chungyau97 Date: Wed, 7 Feb 2024 19:32:48 +0800 Subject: [PATCH 3/4] add finish log --- packages/server/src/index.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/server/src/index.ts b/packages/server/src/index.ts index 7ceba556..bd44b739 100644 --- a/packages/server/src/index.ts +++ b/packages/server/src/index.ts @@ -1151,6 +1151,7 @@ export class App { const limit = parseInt(req.query.limit as string) if (process.env.DEBUG === 'true') console.info(`Start ${relativeLinksMethod}`) const links: string[] = relativeLinksMethod === 'webCrawl' ? await webCrawl(url, limit) : await xmlScrape(url, limit) + if (process.env.DEBUG === 'true') console.info(`Finish ${relativeLinksMethod}`) res.json({ status: 'OK', links }) }) From 5471a4c9aa48068a7aecf6433dc7e7da6bd8d973 Mon Sep 17 00:00:00 2001 From: Ilango Date: Mon, 12 Feb 2024 12:01:19 +0530 Subject: [PATCH 4/4] Show error when relative links method is not set and allow 0 as limit value --- .../nodes/documentloaders/Cheerio/Cheerio.ts | 4 +- .../documentloaders/Playwright/Playwright.ts | 4 +- .../documentloaders/Puppeteer/Puppeteer.ts | 4 +- packages/server/src/index.ts | 4 ++ .../dialog/ManageScrapedLinksDialog.js | 50 +++++++++++++++++-- 5 files changed, 58 insertions(+), 8 deletions(-) diff --git a/packages/components/nodes/documentloaders/Cheerio/Cheerio.ts b/packages/components/nodes/documentloaders/Cheerio/Cheerio.ts index 6af1f9a9..48ae85bc 100644 --- a/packages/components/nodes/documentloaders/Cheerio/Cheerio.ts +++ b/packages/components/nodes/documentloaders/Cheerio/Cheerio.ts @@ -126,7 +126,9 @@ class Cheerio_DocumentLoaders implements INode { let docs = [] if (relativeLinksMethod) { if (process.env.DEBUG === 'true') options.logger.info(`Start ${relativeLinksMethod}`) - if (!limit) limit = 10 + // if limit is 0 we don't want it to default to 10 so we check explicitly for null or undefined + // so when limit is 0 we can fetch all the links + if (limit === null || limit === undefined) limit = 10 else if (limit < 0) throw new Error('Limit cannot be less than 0') const pages: string[] = selectedLinks && selectedLinks.length > 0 diff --git a/packages/components/nodes/documentloaders/Playwright/Playwright.ts b/packages/components/nodes/documentloaders/Playwright/Playwright.ts index 2ba60d0f..55fa9608 100644 --- a/packages/components/nodes/documentloaders/Playwright/Playwright.ts +++ b/packages/components/nodes/documentloaders/Playwright/Playwright.ts @@ -167,7 +167,9 @@ class Playwright_DocumentLoaders implements INode { let docs = [] if (relativeLinksMethod) { if (process.env.DEBUG === 'true') options.logger.info(`Start ${relativeLinksMethod}`) - if (!limit) limit = 10 + // if limit is 0 we don't want it to default to 10 so we check explicitly for null or undefined + // so when limit is 0 we can fetch all the links + if (limit === null || limit === undefined) limit = 10 else if (limit < 0) throw new Error('Limit cannot be less than 0') const pages: string[] = selectedLinks && selectedLinks.length > 0 diff --git a/packages/components/nodes/documentloaders/Puppeteer/Puppeteer.ts b/packages/components/nodes/documentloaders/Puppeteer/Puppeteer.ts index 1f8c8f3f..90b5a277 100644 --- a/packages/components/nodes/documentloaders/Puppeteer/Puppeteer.ts +++ b/packages/components/nodes/documentloaders/Puppeteer/Puppeteer.ts @@ -168,7 +168,9 @@ class Puppeteer_DocumentLoaders implements INode { let docs = [] if (relativeLinksMethod) { if (process.env.DEBUG === 'true') options.logger.info(`Start ${relativeLinksMethod}`) - if (!limit) limit = 10 + // if limit is 0 we don't want it to default to 10 so we check explicitly for null or undefined + // so when limit is 0 we can fetch all the links + if (limit === null || limit === undefined) limit = 10 else if (limit < 0) throw new Error('Limit cannot be less than 0') const pages: string[] = selectedLinks && selectedLinks.length > 0 diff --git a/packages/server/src/index.ts b/packages/server/src/index.ts index 7ceba556..b994ba62 100644 --- a/packages/server/src/index.ts +++ b/packages/server/src/index.ts @@ -1148,6 +1148,10 @@ export class App { this.app.get('/api/v1/fetch-links', async (req: Request, res: Response) => { const url = decodeURIComponent(req.query.url as string) const relativeLinksMethod = req.query.relativeLinksMethod as string + if (!relativeLinksMethod) { + return res.status(500).send('Please choose a Relative Links Method in Additional Parameters.') + } + const limit = parseInt(req.query.limit as string) if (process.env.DEBUG === 'true') console.info(`Start ${relativeLinksMethod}`) const links: string[] = relativeLinksMethod === 'webCrawl' ? await webCrawl(url, limit) : await xmlScrape(url, limit) diff --git a/packages/ui/src/ui-component/dialog/ManageScrapedLinksDialog.js b/packages/ui/src/ui-component/dialog/ManageScrapedLinksDialog.js index 9a846ce9..a4199504 100644 --- a/packages/ui/src/ui-component/dialog/ManageScrapedLinksDialog.js +++ b/packages/ui/src/ui-component/dialog/ManageScrapedLinksDialog.js @@ -16,7 +16,7 @@ import { Stack, Typography } from '@mui/material' -import { IconTrash } from '@tabler/icons' +import { IconTrash, IconX } from '@tabler/icons' import PerfectScrollbar from 'react-perfect-scrollbar' import { BackdropLoader } from 'ui-component/loading/BackdropLoader' @@ -24,12 +24,23 @@ import { StyledButton } from 'ui-component/button/StyledButton' import scraperApi from 'api/scraper' -import { HIDE_CANVAS_DIALOG, SHOW_CANVAS_DIALOG } from 'store/actions' +import useNotifier from 'utils/useNotifier' + +import { + HIDE_CANVAS_DIALOG, + SHOW_CANVAS_DIALOG, + enqueueSnackbar as enqueueSnackbarAction, + closeSnackbar as closeSnackbarAction +} from 'store/actions' const ManageScrapedLinksDialog = ({ show, dialogProps, onCancel, onSave }) => { const portalElement = document.getElementById('portal') const dispatch = useDispatch() + useNotifier() + const enqueueSnackbar = (...args) => dispatch(enqueueSnackbarAction(...args)) + const closeSnackbar = (...args) => dispatch(closeSnackbarAction(...args)) + const [loading, setLoading] = useState(false) const [selectedLinks, setSelectedLinks] = useState([]) const [url, setUrl] = useState('') @@ -53,9 +64,38 @@ const ManageScrapedLinksDialog = ({ show, dialogProps, onCancel, onSave }) => { const handleFetchLinks = async () => { setLoading(true) - const fetchLinksResp = await scraperApi.fetchLinks(url, dialogProps.relativeLinksMethod, dialogProps.limit) - if (fetchLinksResp.data) { - setSelectedLinks(fetchLinksResp.data.links) + try { + const fetchLinksResp = await scraperApi.fetchLinks(url, dialogProps.relativeLinksMethod, dialogProps.limit) + if (fetchLinksResp.data) { + setSelectedLinks(fetchLinksResp.data.links) + enqueueSnackbar({ + message: 'Successfully fetched links', + options: { + key: new Date().getTime() + Math.random(), + variant: 'success', + action: (key) => ( + + ) + } + }) + } + } catch (error) { + const errorData = error.response.data || `${error.response.status}: ${error.response.statusText}` + enqueueSnackbar({ + message: errorData, + options: { + key: new Date().getTime() + Math.random(), + variant: 'error', + persist: true, + action: (key) => ( + + ) + } + }) } setLoading(false) }