mirror of
https://github.com/farcasclaudiu/Flowise.git
synced 2026-06-22 11:01:22 +03:00
Merge pull request #1687 from 0xi4o/bug/scrap-limit
Fix: relative links method and limit not applying to manage links
This commit is contained in:
@@ -126,7 +126,9 @@ class Cheerio_DocumentLoaders implements INode {
|
||||
let docs = []
|
||||
if (relativeLinksMethod) {
|
||||
if (process.env.DEBUG === 'true') options.logger.info(`Start ${relativeLinksMethod}`)
|
||||
if (!limit) limit = 10
|
||||
// if limit is 0 we don't want it to default to 10 so we check explicitly for null or undefined
|
||||
// so when limit is 0 we can fetch all the links
|
||||
if (limit === null || limit === undefined) limit = 10
|
||||
else if (limit < 0) throw new Error('Limit cannot be less than 0')
|
||||
const pages: string[] =
|
||||
selectedLinks && selectedLinks.length > 0
|
||||
@@ -143,7 +145,7 @@ class Cheerio_DocumentLoaders implements INode {
|
||||
} else if (selectedLinks && selectedLinks.length > 0) {
|
||||
if (process.env.DEBUG === 'true')
|
||||
options.logger.info(`pages: ${JSON.stringify(selectedLinks)}, length: ${selectedLinks.length}`)
|
||||
for (const page of selectedLinks) {
|
||||
for (const page of selectedLinks.slice(0, limit)) {
|
||||
docs.push(...(await cheerioLoader(page)))
|
||||
}
|
||||
} else {
|
||||
|
||||
@@ -167,7 +167,9 @@ class Playwright_DocumentLoaders implements INode {
|
||||
let docs = []
|
||||
if (relativeLinksMethod) {
|
||||
if (process.env.DEBUG === 'true') options.logger.info(`Start ${relativeLinksMethod}`)
|
||||
if (!limit) limit = 10
|
||||
// if limit is 0 we don't want it to default to 10 so we check explicitly for null or undefined
|
||||
// so when limit is 0 we can fetch all the links
|
||||
if (limit === null || limit === undefined) limit = 10
|
||||
else if (limit < 0) throw new Error('Limit cannot be less than 0')
|
||||
const pages: string[] =
|
||||
selectedLinks && selectedLinks.length > 0
|
||||
@@ -184,7 +186,7 @@ class Playwright_DocumentLoaders implements INode {
|
||||
} else if (selectedLinks && selectedLinks.length > 0) {
|
||||
if (process.env.DEBUG === 'true')
|
||||
options.logger.info(`pages: ${JSON.stringify(selectedLinks)}, length: ${selectedLinks.length}`)
|
||||
for (const page of selectedLinks) {
|
||||
for (const page of selectedLinks.slice(0, limit)) {
|
||||
docs.push(...(await playwrightLoader(page)))
|
||||
}
|
||||
} else {
|
||||
|
||||
@@ -168,7 +168,9 @@ class Puppeteer_DocumentLoaders implements INode {
|
||||
let docs = []
|
||||
if (relativeLinksMethod) {
|
||||
if (process.env.DEBUG === 'true') options.logger.info(`Start ${relativeLinksMethod}`)
|
||||
if (!limit) limit = 10
|
||||
// if limit is 0 we don't want it to default to 10 so we check explicitly for null or undefined
|
||||
// so when limit is 0 we can fetch all the links
|
||||
if (limit === null || limit === undefined) limit = 10
|
||||
else if (limit < 0) throw new Error('Limit cannot be less than 0')
|
||||
const pages: string[] =
|
||||
selectedLinks && selectedLinks.length > 0
|
||||
@@ -185,7 +187,7 @@ class Puppeteer_DocumentLoaders implements INode {
|
||||
} else if (selectedLinks && selectedLinks.length > 0) {
|
||||
if (process.env.DEBUG === 'true')
|
||||
options.logger.info(`pages: ${JSON.stringify(selectedLinks)}, length: ${selectedLinks.length}`)
|
||||
for (const page of selectedLinks) {
|
||||
for (const page of selectedLinks.slice(0, limit)) {
|
||||
docs.push(...(await puppeteerLoader(page)))
|
||||
}
|
||||
} else {
|
||||
|
||||
@@ -1149,8 +1149,14 @@ export class App {
|
||||
this.app.get('/api/v1/fetch-links', async (req: Request, res: Response) => {
|
||||
const url = decodeURIComponent(req.query.url as string)
|
||||
const relativeLinksMethod = req.query.relativeLinksMethod as string
|
||||
if (!relativeLinksMethod) {
|
||||
return res.status(500).send('Please choose a Relative Links Method in Additional Parameters.')
|
||||
}
|
||||
|
||||
const limit = parseInt(req.query.limit as string)
|
||||
if (process.env.DEBUG === 'true') console.info(`Start ${relativeLinksMethod}`)
|
||||
const links: string[] = relativeLinksMethod === 'webCrawl' ? await webCrawl(url, 0) : await xmlScrape(url, 0)
|
||||
const links: string[] = relativeLinksMethod === 'webCrawl' ? await webCrawl(url, limit) : await xmlScrape(url, limit)
|
||||
if (process.env.DEBUG === 'true') console.info(`Finish ${relativeLinksMethod}`)
|
||||
|
||||
res.json({ status: 'OK', links })
|
||||
})
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
import client from './client'
|
||||
|
||||
const fetchAllLinks = (url, relativeLinksMethod) =>
|
||||
client.get(`/fetch-links?url=${encodeURIComponent(url)}&relativeLinksMethod=${relativeLinksMethod}`)
|
||||
const fetchLinks = (url, relativeLinksMethod, relativeLinksLimit) =>
|
||||
client.get(`/fetch-links?url=${encodeURIComponent(url)}&relativeLinksMethod=${relativeLinksMethod}&limit=${relativeLinksLimit}`)
|
||||
|
||||
export default {
|
||||
fetchAllLinks
|
||||
fetchLinks
|
||||
}
|
||||
|
||||
@@ -16,7 +16,7 @@ import {
|
||||
Stack,
|
||||
Typography
|
||||
} from '@mui/material'
|
||||
import { IconTrash } from '@tabler/icons'
|
||||
import { IconTrash, IconX } from '@tabler/icons'
|
||||
import PerfectScrollbar from 'react-perfect-scrollbar'
|
||||
|
||||
import { BackdropLoader } from 'ui-component/loading/BackdropLoader'
|
||||
@@ -24,12 +24,23 @@ import { StyledButton } from 'ui-component/button/StyledButton'
|
||||
|
||||
import scraperApi from 'api/scraper'
|
||||
|
||||
import { HIDE_CANVAS_DIALOG, SHOW_CANVAS_DIALOG } from 'store/actions'
|
||||
import useNotifier from 'utils/useNotifier'
|
||||
|
||||
import {
|
||||
HIDE_CANVAS_DIALOG,
|
||||
SHOW_CANVAS_DIALOG,
|
||||
enqueueSnackbar as enqueueSnackbarAction,
|
||||
closeSnackbar as closeSnackbarAction
|
||||
} from 'store/actions'
|
||||
|
||||
const ManageScrapedLinksDialog = ({ show, dialogProps, onCancel, onSave }) => {
|
||||
const portalElement = document.getElementById('portal')
|
||||
const dispatch = useDispatch()
|
||||
|
||||
useNotifier()
|
||||
const enqueueSnackbar = (...args) => dispatch(enqueueSnackbarAction(...args))
|
||||
const closeSnackbar = (...args) => dispatch(closeSnackbarAction(...args))
|
||||
|
||||
const [loading, setLoading] = useState(false)
|
||||
const [selectedLinks, setSelectedLinks] = useState([])
|
||||
const [url, setUrl] = useState('')
|
||||
@@ -53,9 +64,38 @@ const ManageScrapedLinksDialog = ({ show, dialogProps, onCancel, onSave }) => {
|
||||
|
||||
const handleFetchLinks = async () => {
|
||||
setLoading(true)
|
||||
const fetchLinksResp = await scraperApi.fetchAllLinks(url, 'webCrawl')
|
||||
if (fetchLinksResp.data) {
|
||||
setSelectedLinks(fetchLinksResp.data.links)
|
||||
try {
|
||||
const fetchLinksResp = await scraperApi.fetchLinks(url, dialogProps.relativeLinksMethod, dialogProps.limit)
|
||||
if (fetchLinksResp.data) {
|
||||
setSelectedLinks(fetchLinksResp.data.links)
|
||||
enqueueSnackbar({
|
||||
message: 'Successfully fetched links',
|
||||
options: {
|
||||
key: new Date().getTime() + Math.random(),
|
||||
variant: 'success',
|
||||
action: (key) => (
|
||||
<Button style={{ color: 'white' }} onClick={() => closeSnackbar(key)}>
|
||||
<IconX />
|
||||
</Button>
|
||||
)
|
||||
}
|
||||
})
|
||||
}
|
||||
} catch (error) {
|
||||
const errorData = error.response.data || `${error.response.status}: ${error.response.statusText}`
|
||||
enqueueSnackbar({
|
||||
message: errorData,
|
||||
options: {
|
||||
key: new Date().getTime() + Math.random(),
|
||||
variant: 'error',
|
||||
persist: true,
|
||||
action: (key) => (
|
||||
<Button style={{ color: 'white' }} onClick={() => closeSnackbar(key)}>
|
||||
<IconX />
|
||||
</Button>
|
||||
)
|
||||
}
|
||||
})
|
||||
}
|
||||
setLoading(false)
|
||||
}
|
||||
|
||||
@@ -91,9 +91,11 @@ const NodeInputHandler = ({ inputAnchor, inputParam, data, disabled = false, isA
|
||||
}
|
||||
}
|
||||
|
||||
const onManageLinksDialogClicked = (url, selectedLinks) => {
|
||||
const onManageLinksDialogClicked = (url, selectedLinks, relativeLinksMethod, limit) => {
|
||||
const dialogProps = {
|
||||
url,
|
||||
relativeLinksMethod,
|
||||
limit,
|
||||
selectedLinks,
|
||||
confirmButtonName: 'Save',
|
||||
cancelButtonName: 'Cancel'
|
||||
@@ -475,7 +477,9 @@ const NodeInputHandler = ({ inputAnchor, inputParam, data, disabled = false, isA
|
||||
onClick={() =>
|
||||
onManageLinksDialogClicked(
|
||||
data.inputs[inputParam.name] ?? inputParam.default ?? '',
|
||||
data.inputs.selectedLinks
|
||||
data.inputs.selectedLinks,
|
||||
data.inputs['relativeLinksMethod'] ?? 'webCrawl',
|
||||
parseInt(data.inputs['limit']) ?? 0
|
||||
)
|
||||
}
|
||||
>
|
||||
|
||||
Reference in New Issue
Block a user