mirror of
https://github.com/farcasclaudiu/Flowise.git
synced 2026-06-28 21:00:58 +03:00
Merge pull request #1687 from 0xi4o/bug/scrap-limit
Fix: relative links method and limit not applying to manage links
This commit is contained in:
@@ -126,7 +126,9 @@ class Cheerio_DocumentLoaders implements INode {
|
|||||||
let docs = []
|
let docs = []
|
||||||
if (relativeLinksMethod) {
|
if (relativeLinksMethod) {
|
||||||
if (process.env.DEBUG === 'true') options.logger.info(`Start ${relativeLinksMethod}`)
|
if (process.env.DEBUG === 'true') options.logger.info(`Start ${relativeLinksMethod}`)
|
||||||
if (!limit) limit = 10
|
// if limit is 0 we don't want it to default to 10 so we check explicitly for null or undefined
|
||||||
|
// so when limit is 0 we can fetch all the links
|
||||||
|
if (limit === null || limit === undefined) limit = 10
|
||||||
else if (limit < 0) throw new Error('Limit cannot be less than 0')
|
else if (limit < 0) throw new Error('Limit cannot be less than 0')
|
||||||
const pages: string[] =
|
const pages: string[] =
|
||||||
selectedLinks && selectedLinks.length > 0
|
selectedLinks && selectedLinks.length > 0
|
||||||
@@ -143,7 +145,7 @@ class Cheerio_DocumentLoaders implements INode {
|
|||||||
} else if (selectedLinks && selectedLinks.length > 0) {
|
} else if (selectedLinks && selectedLinks.length > 0) {
|
||||||
if (process.env.DEBUG === 'true')
|
if (process.env.DEBUG === 'true')
|
||||||
options.logger.info(`pages: ${JSON.stringify(selectedLinks)}, length: ${selectedLinks.length}`)
|
options.logger.info(`pages: ${JSON.stringify(selectedLinks)}, length: ${selectedLinks.length}`)
|
||||||
for (const page of selectedLinks) {
|
for (const page of selectedLinks.slice(0, limit)) {
|
||||||
docs.push(...(await cheerioLoader(page)))
|
docs.push(...(await cheerioLoader(page)))
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@@ -167,7 +167,9 @@ class Playwright_DocumentLoaders implements INode {
|
|||||||
let docs = []
|
let docs = []
|
||||||
if (relativeLinksMethod) {
|
if (relativeLinksMethod) {
|
||||||
if (process.env.DEBUG === 'true') options.logger.info(`Start ${relativeLinksMethod}`)
|
if (process.env.DEBUG === 'true') options.logger.info(`Start ${relativeLinksMethod}`)
|
||||||
if (!limit) limit = 10
|
// if limit is 0 we don't want it to default to 10 so we check explicitly for null or undefined
|
||||||
|
// so when limit is 0 we can fetch all the links
|
||||||
|
if (limit === null || limit === undefined) limit = 10
|
||||||
else if (limit < 0) throw new Error('Limit cannot be less than 0')
|
else if (limit < 0) throw new Error('Limit cannot be less than 0')
|
||||||
const pages: string[] =
|
const pages: string[] =
|
||||||
selectedLinks && selectedLinks.length > 0
|
selectedLinks && selectedLinks.length > 0
|
||||||
@@ -184,7 +186,7 @@ class Playwright_DocumentLoaders implements INode {
|
|||||||
} else if (selectedLinks && selectedLinks.length > 0) {
|
} else if (selectedLinks && selectedLinks.length > 0) {
|
||||||
if (process.env.DEBUG === 'true')
|
if (process.env.DEBUG === 'true')
|
||||||
options.logger.info(`pages: ${JSON.stringify(selectedLinks)}, length: ${selectedLinks.length}`)
|
options.logger.info(`pages: ${JSON.stringify(selectedLinks)}, length: ${selectedLinks.length}`)
|
||||||
for (const page of selectedLinks) {
|
for (const page of selectedLinks.slice(0, limit)) {
|
||||||
docs.push(...(await playwrightLoader(page)))
|
docs.push(...(await playwrightLoader(page)))
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@@ -168,7 +168,9 @@ class Puppeteer_DocumentLoaders implements INode {
|
|||||||
let docs = []
|
let docs = []
|
||||||
if (relativeLinksMethod) {
|
if (relativeLinksMethod) {
|
||||||
if (process.env.DEBUG === 'true') options.logger.info(`Start ${relativeLinksMethod}`)
|
if (process.env.DEBUG === 'true') options.logger.info(`Start ${relativeLinksMethod}`)
|
||||||
if (!limit) limit = 10
|
// if limit is 0 we don't want it to default to 10 so we check explicitly for null or undefined
|
||||||
|
// so when limit is 0 we can fetch all the links
|
||||||
|
if (limit === null || limit === undefined) limit = 10
|
||||||
else if (limit < 0) throw new Error('Limit cannot be less than 0')
|
else if (limit < 0) throw new Error('Limit cannot be less than 0')
|
||||||
const pages: string[] =
|
const pages: string[] =
|
||||||
selectedLinks && selectedLinks.length > 0
|
selectedLinks && selectedLinks.length > 0
|
||||||
@@ -185,7 +187,7 @@ class Puppeteer_DocumentLoaders implements INode {
|
|||||||
} else if (selectedLinks && selectedLinks.length > 0) {
|
} else if (selectedLinks && selectedLinks.length > 0) {
|
||||||
if (process.env.DEBUG === 'true')
|
if (process.env.DEBUG === 'true')
|
||||||
options.logger.info(`pages: ${JSON.stringify(selectedLinks)}, length: ${selectedLinks.length}`)
|
options.logger.info(`pages: ${JSON.stringify(selectedLinks)}, length: ${selectedLinks.length}`)
|
||||||
for (const page of selectedLinks) {
|
for (const page of selectedLinks.slice(0, limit)) {
|
||||||
docs.push(...(await puppeteerLoader(page)))
|
docs.push(...(await puppeteerLoader(page)))
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@@ -1149,8 +1149,14 @@ export class App {
|
|||||||
this.app.get('/api/v1/fetch-links', async (req: Request, res: Response) => {
|
this.app.get('/api/v1/fetch-links', async (req: Request, res: Response) => {
|
||||||
const url = decodeURIComponent(req.query.url as string)
|
const url = decodeURIComponent(req.query.url as string)
|
||||||
const relativeLinksMethod = req.query.relativeLinksMethod as string
|
const relativeLinksMethod = req.query.relativeLinksMethod as string
|
||||||
|
if (!relativeLinksMethod) {
|
||||||
|
return res.status(500).send('Please choose a Relative Links Method in Additional Parameters.')
|
||||||
|
}
|
||||||
|
|
||||||
|
const limit = parseInt(req.query.limit as string)
|
||||||
if (process.env.DEBUG === 'true') console.info(`Start ${relativeLinksMethod}`)
|
if (process.env.DEBUG === 'true') console.info(`Start ${relativeLinksMethod}`)
|
||||||
const links: string[] = relativeLinksMethod === 'webCrawl' ? await webCrawl(url, 0) : await xmlScrape(url, 0)
|
const links: string[] = relativeLinksMethod === 'webCrawl' ? await webCrawl(url, limit) : await xmlScrape(url, limit)
|
||||||
|
if (process.env.DEBUG === 'true') console.info(`Finish ${relativeLinksMethod}`)
|
||||||
|
|
||||||
res.json({ status: 'OK', links })
|
res.json({ status: 'OK', links })
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
import client from './client'
|
import client from './client'
|
||||||
|
|
||||||
const fetchAllLinks = (url, relativeLinksMethod) =>
|
const fetchLinks = (url, relativeLinksMethod, relativeLinksLimit) =>
|
||||||
client.get(`/fetch-links?url=${encodeURIComponent(url)}&relativeLinksMethod=${relativeLinksMethod}`)
|
client.get(`/fetch-links?url=${encodeURIComponent(url)}&relativeLinksMethod=${relativeLinksMethod}&limit=${relativeLinksLimit}`)
|
||||||
|
|
||||||
export default {
|
export default {
|
||||||
fetchAllLinks
|
fetchLinks
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ import {
|
|||||||
Stack,
|
Stack,
|
||||||
Typography
|
Typography
|
||||||
} from '@mui/material'
|
} from '@mui/material'
|
||||||
import { IconTrash } from '@tabler/icons'
|
import { IconTrash, IconX } from '@tabler/icons'
|
||||||
import PerfectScrollbar from 'react-perfect-scrollbar'
|
import PerfectScrollbar from 'react-perfect-scrollbar'
|
||||||
|
|
||||||
import { BackdropLoader } from 'ui-component/loading/BackdropLoader'
|
import { BackdropLoader } from 'ui-component/loading/BackdropLoader'
|
||||||
@@ -24,12 +24,23 @@ import { StyledButton } from 'ui-component/button/StyledButton'
|
|||||||
|
|
||||||
import scraperApi from 'api/scraper'
|
import scraperApi from 'api/scraper'
|
||||||
|
|
||||||
import { HIDE_CANVAS_DIALOG, SHOW_CANVAS_DIALOG } from 'store/actions'
|
import useNotifier from 'utils/useNotifier'
|
||||||
|
|
||||||
|
import {
|
||||||
|
HIDE_CANVAS_DIALOG,
|
||||||
|
SHOW_CANVAS_DIALOG,
|
||||||
|
enqueueSnackbar as enqueueSnackbarAction,
|
||||||
|
closeSnackbar as closeSnackbarAction
|
||||||
|
} from 'store/actions'
|
||||||
|
|
||||||
const ManageScrapedLinksDialog = ({ show, dialogProps, onCancel, onSave }) => {
|
const ManageScrapedLinksDialog = ({ show, dialogProps, onCancel, onSave }) => {
|
||||||
const portalElement = document.getElementById('portal')
|
const portalElement = document.getElementById('portal')
|
||||||
const dispatch = useDispatch()
|
const dispatch = useDispatch()
|
||||||
|
|
||||||
|
useNotifier()
|
||||||
|
const enqueueSnackbar = (...args) => dispatch(enqueueSnackbarAction(...args))
|
||||||
|
const closeSnackbar = (...args) => dispatch(closeSnackbarAction(...args))
|
||||||
|
|
||||||
const [loading, setLoading] = useState(false)
|
const [loading, setLoading] = useState(false)
|
||||||
const [selectedLinks, setSelectedLinks] = useState([])
|
const [selectedLinks, setSelectedLinks] = useState([])
|
||||||
const [url, setUrl] = useState('')
|
const [url, setUrl] = useState('')
|
||||||
@@ -53,9 +64,38 @@ const ManageScrapedLinksDialog = ({ show, dialogProps, onCancel, onSave }) => {
|
|||||||
|
|
||||||
const handleFetchLinks = async () => {
|
const handleFetchLinks = async () => {
|
||||||
setLoading(true)
|
setLoading(true)
|
||||||
const fetchLinksResp = await scraperApi.fetchAllLinks(url, 'webCrawl')
|
try {
|
||||||
|
const fetchLinksResp = await scraperApi.fetchLinks(url, dialogProps.relativeLinksMethod, dialogProps.limit)
|
||||||
if (fetchLinksResp.data) {
|
if (fetchLinksResp.data) {
|
||||||
setSelectedLinks(fetchLinksResp.data.links)
|
setSelectedLinks(fetchLinksResp.data.links)
|
||||||
|
enqueueSnackbar({
|
||||||
|
message: 'Successfully fetched links',
|
||||||
|
options: {
|
||||||
|
key: new Date().getTime() + Math.random(),
|
||||||
|
variant: 'success',
|
||||||
|
action: (key) => (
|
||||||
|
<Button style={{ color: 'white' }} onClick={() => closeSnackbar(key)}>
|
||||||
|
<IconX />
|
||||||
|
</Button>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
const errorData = error.response.data || `${error.response.status}: ${error.response.statusText}`
|
||||||
|
enqueueSnackbar({
|
||||||
|
message: errorData,
|
||||||
|
options: {
|
||||||
|
key: new Date().getTime() + Math.random(),
|
||||||
|
variant: 'error',
|
||||||
|
persist: true,
|
||||||
|
action: (key) => (
|
||||||
|
<Button style={{ color: 'white' }} onClick={() => closeSnackbar(key)}>
|
||||||
|
<IconX />
|
||||||
|
</Button>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
})
|
||||||
}
|
}
|
||||||
setLoading(false)
|
setLoading(false)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -91,9 +91,11 @@ const NodeInputHandler = ({ inputAnchor, inputParam, data, disabled = false, isA
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const onManageLinksDialogClicked = (url, selectedLinks) => {
|
const onManageLinksDialogClicked = (url, selectedLinks, relativeLinksMethod, limit) => {
|
||||||
const dialogProps = {
|
const dialogProps = {
|
||||||
url,
|
url,
|
||||||
|
relativeLinksMethod,
|
||||||
|
limit,
|
||||||
selectedLinks,
|
selectedLinks,
|
||||||
confirmButtonName: 'Save',
|
confirmButtonName: 'Save',
|
||||||
cancelButtonName: 'Cancel'
|
cancelButtonName: 'Cancel'
|
||||||
@@ -475,7 +477,9 @@ const NodeInputHandler = ({ inputAnchor, inputParam, data, disabled = false, isA
|
|||||||
onClick={() =>
|
onClick={() =>
|
||||||
onManageLinksDialogClicked(
|
onManageLinksDialogClicked(
|
||||||
data.inputs[inputParam.name] ?? inputParam.default ?? '',
|
data.inputs[inputParam.name] ?? inputParam.default ?? '',
|
||||||
data.inputs.selectedLinks
|
data.inputs.selectedLinks,
|
||||||
|
data.inputs['relativeLinksMethod'] ?? 'webCrawl',
|
||||||
|
parseInt(data.inputs['limit']) ?? 0
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
>
|
>
|
||||||
|
|||||||
Reference in New Issue
Block a user