mirror of
https://github.com/farcasclaudiu/Flowise.git
synced 2026-06-29 07:01:04 +03:00
Merge pull request #1566 from 0xi4o/feature/scrapped-links
FEATURE: Select which links should be used in web scraper nodes (cheerio, puppeteer, and playwright)
This commit is contained in:
@@ -1,4 +1,4 @@
|
|||||||
import { INode, INodeData, INodeParams } from '../../../src/Interface'
|
import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||||
import { TextSplitter } from 'langchain/text_splitter'
|
import { TextSplitter } from 'langchain/text_splitter'
|
||||||
import { CheerioWebBaseLoader, WebBaseLoaderParams } from 'langchain/document_loaders/web/cheerio'
|
import { CheerioWebBaseLoader, WebBaseLoaderParams } from 'langchain/document_loaders/web/cheerio'
|
||||||
import { test } from 'linkifyjs'
|
import { test } from 'linkifyjs'
|
||||||
@@ -63,6 +63,7 @@ class Cheerio_DocumentLoaders implements INode {
|
|||||||
name: 'limit',
|
name: 'limit',
|
||||||
type: 'number',
|
type: 'number',
|
||||||
optional: true,
|
optional: true,
|
||||||
|
default: '10',
|
||||||
additionalParams: true,
|
additionalParams: true,
|
||||||
description:
|
description:
|
||||||
'Only used when "Get Relative Links Method" is selected. Set 0 to retrieve all relative links, default limit is 10.',
|
'Only used when "Get Relative Links Method" is selected. Set 0 to retrieve all relative links, default limit is 10.',
|
||||||
@@ -86,11 +87,12 @@ class Cheerio_DocumentLoaders implements INode {
|
|||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
async init(nodeData: INodeData): Promise<any> {
|
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
|
||||||
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
|
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
|
||||||
const metadata = nodeData.inputs?.metadata
|
const metadata = nodeData.inputs?.metadata
|
||||||
const relativeLinksMethod = nodeData.inputs?.relativeLinksMethod as string
|
const relativeLinksMethod = nodeData.inputs?.relativeLinksMethod as string
|
||||||
let limit = nodeData.inputs?.limit as string
|
const selectedLinks = nodeData.inputs?.selectedLinks as string[]
|
||||||
|
let limit = parseInt(nodeData.inputs?.limit as string)
|
||||||
|
|
||||||
let url = nodeData.inputs?.url as string
|
let url = nodeData.inputs?.url as string
|
||||||
url = url.trim()
|
url = url.trim()
|
||||||
@@ -117,23 +119,33 @@ class Cheerio_DocumentLoaders implements INode {
|
|||||||
}
|
}
|
||||||
return docs
|
return docs
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
if (process.env.DEBUG === 'true') console.error(`error in CheerioWebBaseLoader: ${err.message}, on page: ${url}`)
|
if (process.env.DEBUG === 'true') options.logger.error(`error in CheerioWebBaseLoader: ${err.message}, on page: ${url}`)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let docs = []
|
let docs = []
|
||||||
if (relativeLinksMethod) {
|
if (relativeLinksMethod) {
|
||||||
if (process.env.DEBUG === 'true') console.info(`Start ${relativeLinksMethod}`)
|
if (process.env.DEBUG === 'true') options.logger.info(`Start ${relativeLinksMethod}`)
|
||||||
if (!limit) limit = '10'
|
if (!limit) limit = 10
|
||||||
else if (parseInt(limit) < 0) throw new Error('Limit cannot be less than 0')
|
else if (limit < 0) throw new Error('Limit cannot be less than 0')
|
||||||
const pages: string[] =
|
const pages: string[] =
|
||||||
relativeLinksMethod === 'webCrawl' ? await webCrawl(url, parseInt(limit)) : await xmlScrape(url, parseInt(limit))
|
selectedLinks && selectedLinks.length > 0
|
||||||
if (process.env.DEBUG === 'true') console.info(`pages: ${JSON.stringify(pages)}, length: ${pages.length}`)
|
? selectedLinks.slice(0, limit)
|
||||||
|
: relativeLinksMethod === 'webCrawl'
|
||||||
|
? await webCrawl(url, limit)
|
||||||
|
: await xmlScrape(url, limit)
|
||||||
|
if (process.env.DEBUG === 'true') options.logger.info(`pages: ${JSON.stringify(pages)}, length: ${pages.length}`)
|
||||||
if (!pages || pages.length === 0) throw new Error('No relative links found')
|
if (!pages || pages.length === 0) throw new Error('No relative links found')
|
||||||
for (const page of pages) {
|
for (const page of pages) {
|
||||||
docs.push(...(await cheerioLoader(page)))
|
docs.push(...(await cheerioLoader(page)))
|
||||||
}
|
}
|
||||||
if (process.env.DEBUG === 'true') console.info(`Finish ${relativeLinksMethod}`)
|
if (process.env.DEBUG === 'true') options.logger.info(`Finish ${relativeLinksMethod}`)
|
||||||
|
} else if (selectedLinks && selectedLinks.length > 0) {
|
||||||
|
if (process.env.DEBUG === 'true')
|
||||||
|
options.logger.info(`pages: ${JSON.stringify(selectedLinks)}, length: ${selectedLinks.length}`)
|
||||||
|
for (const page of selectedLinks) {
|
||||||
|
docs.push(...(await cheerioLoader(page)))
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
docs = await cheerioLoader(url)
|
docs = await cheerioLoader(url)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
import { INode, INodeData, INodeParams } from '../../../src/Interface'
|
import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||||
import { TextSplitter } from 'langchain/text_splitter'
|
import { TextSplitter } from 'langchain/text_splitter'
|
||||||
import { Browser, Page, PlaywrightWebBaseLoader, PlaywrightWebBaseLoaderOptions } from 'langchain/document_loaders/web/playwright'
|
import { Browser, Page, PlaywrightWebBaseLoader, PlaywrightWebBaseLoaderOptions } from 'langchain/document_loaders/web/playwright'
|
||||||
import { test } from 'linkifyjs'
|
import { test } from 'linkifyjs'
|
||||||
@@ -61,6 +61,7 @@ class Playwright_DocumentLoaders implements INode {
|
|||||||
name: 'limit',
|
name: 'limit',
|
||||||
type: 'number',
|
type: 'number',
|
||||||
optional: true,
|
optional: true,
|
||||||
|
default: '10',
|
||||||
additionalParams: true,
|
additionalParams: true,
|
||||||
description:
|
description:
|
||||||
'Only used when "Get Relative Links Method" is selected. Set 0 to retrieve all relative links, default limit is 10.',
|
'Only used when "Get Relative Links Method" is selected. Set 0 to retrieve all relative links, default limit is 10.',
|
||||||
@@ -114,11 +115,12 @@ class Playwright_DocumentLoaders implements INode {
|
|||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
async init(nodeData: INodeData): Promise<any> {
|
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
|
||||||
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
|
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
|
||||||
const metadata = nodeData.inputs?.metadata
|
const metadata = nodeData.inputs?.metadata
|
||||||
const relativeLinksMethod = nodeData.inputs?.relativeLinksMethod as string
|
const relativeLinksMethod = nodeData.inputs?.relativeLinksMethod as string
|
||||||
let limit = nodeData.inputs?.limit as string
|
const selectedLinks = nodeData.inputs?.selectedLinks as string[]
|
||||||
|
let limit = parseInt(nodeData.inputs?.limit as string)
|
||||||
let waitUntilGoToOption = nodeData.inputs?.waitUntilGoToOption as 'load' | 'domcontentloaded' | 'networkidle' | 'commit' | undefined
|
let waitUntilGoToOption = nodeData.inputs?.waitUntilGoToOption as 'load' | 'domcontentloaded' | 'networkidle' | 'commit' | undefined
|
||||||
let waitForSelector = nodeData.inputs?.waitForSelector as string
|
let waitForSelector = nodeData.inputs?.waitForSelector as string
|
||||||
|
|
||||||
@@ -158,23 +160,33 @@ class Playwright_DocumentLoaders implements INode {
|
|||||||
}
|
}
|
||||||
return docs
|
return docs
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
if (process.env.DEBUG === 'true') console.error(`error in PlaywrightWebBaseLoader: ${err.message}, on page: ${url}`)
|
if (process.env.DEBUG === 'true') options.logger.error(`error in PlaywrightWebBaseLoader: ${err.message}, on page: ${url}`)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let docs = []
|
let docs = []
|
||||||
if (relativeLinksMethod) {
|
if (relativeLinksMethod) {
|
||||||
if (process.env.DEBUG === 'true') console.info(`Start ${relativeLinksMethod}`)
|
if (process.env.DEBUG === 'true') options.logger.info(`Start ${relativeLinksMethod}`)
|
||||||
if (!limit) limit = '10'
|
if (!limit) limit = 10
|
||||||
else if (parseInt(limit) < 0) throw new Error('Limit cannot be less than 0')
|
else if (limit < 0) throw new Error('Limit cannot be less than 0')
|
||||||
const pages: string[] =
|
const pages: string[] =
|
||||||
relativeLinksMethod === 'webCrawl' ? await webCrawl(url, parseInt(limit)) : await xmlScrape(url, parseInt(limit))
|
selectedLinks && selectedLinks.length > 0
|
||||||
if (process.env.DEBUG === 'true') console.info(`pages: ${JSON.stringify(pages)}, length: ${pages.length}`)
|
? selectedLinks.slice(0, limit)
|
||||||
|
: relativeLinksMethod === 'webCrawl'
|
||||||
|
? await webCrawl(url, limit)
|
||||||
|
: await xmlScrape(url, limit)
|
||||||
|
if (process.env.DEBUG === 'true') options.logger.info(`pages: ${JSON.stringify(pages)}, length: ${pages.length}`)
|
||||||
if (!pages || pages.length === 0) throw new Error('No relative links found')
|
if (!pages || pages.length === 0) throw new Error('No relative links found')
|
||||||
for (const page of pages) {
|
for (const page of pages) {
|
||||||
docs.push(...(await playwrightLoader(page)))
|
docs.push(...(await playwrightLoader(page)))
|
||||||
}
|
}
|
||||||
if (process.env.DEBUG === 'true') console.info(`Finish ${relativeLinksMethod}`)
|
if (process.env.DEBUG === 'true') options.logger.info(`Finish ${relativeLinksMethod}`)
|
||||||
|
} else if (selectedLinks && selectedLinks.length > 0) {
|
||||||
|
if (process.env.DEBUG === 'true')
|
||||||
|
options.logger.info(`pages: ${JSON.stringify(selectedLinks)}, length: ${selectedLinks.length}`)
|
||||||
|
for (const page of selectedLinks) {
|
||||||
|
docs.push(...(await playwrightLoader(page)))
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
docs = await playwrightLoader(url)
|
docs = await playwrightLoader(url)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
import { INode, INodeData, INodeParams } from '../../../src/Interface'
|
import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||||
import { TextSplitter } from 'langchain/text_splitter'
|
import { TextSplitter } from 'langchain/text_splitter'
|
||||||
import { Browser, Page, PuppeteerWebBaseLoader, PuppeteerWebBaseLoaderOptions } from 'langchain/document_loaders/web/puppeteer'
|
import { Browser, Page, PuppeteerWebBaseLoader, PuppeteerWebBaseLoaderOptions } from 'langchain/document_loaders/web/puppeteer'
|
||||||
import { test } from 'linkifyjs'
|
import { test } from 'linkifyjs'
|
||||||
@@ -62,6 +62,7 @@ class Puppeteer_DocumentLoaders implements INode {
|
|||||||
name: 'limit',
|
name: 'limit',
|
||||||
type: 'number',
|
type: 'number',
|
||||||
optional: true,
|
optional: true,
|
||||||
|
default: '10',
|
||||||
additionalParams: true,
|
additionalParams: true,
|
||||||
description:
|
description:
|
||||||
'Only used when "Get Relative Links Method" is selected. Set 0 to retrieve all relative links, default limit is 10.',
|
'Only used when "Get Relative Links Method" is selected. Set 0 to retrieve all relative links, default limit is 10.',
|
||||||
@@ -115,11 +116,12 @@ class Puppeteer_DocumentLoaders implements INode {
|
|||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
async init(nodeData: INodeData): Promise<any> {
|
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
|
||||||
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
|
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
|
||||||
const metadata = nodeData.inputs?.metadata
|
const metadata = nodeData.inputs?.metadata
|
||||||
const relativeLinksMethod = nodeData.inputs?.relativeLinksMethod as string
|
const relativeLinksMethod = nodeData.inputs?.relativeLinksMethod as string
|
||||||
let limit = nodeData.inputs?.limit as string
|
const selectedLinks = nodeData.inputs?.selectedLinks as string[]
|
||||||
|
let limit = parseInt(nodeData.inputs?.limit as string)
|
||||||
let waitUntilGoToOption = nodeData.inputs?.waitUntilGoToOption as PuppeteerLifeCycleEvent
|
let waitUntilGoToOption = nodeData.inputs?.waitUntilGoToOption as PuppeteerLifeCycleEvent
|
||||||
let waitForSelector = nodeData.inputs?.waitForSelector as string
|
let waitForSelector = nodeData.inputs?.waitForSelector as string
|
||||||
|
|
||||||
@@ -159,23 +161,33 @@ class Puppeteer_DocumentLoaders implements INode {
|
|||||||
}
|
}
|
||||||
return docs
|
return docs
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
if (process.env.DEBUG === 'true') console.error(`error in PuppeteerWebBaseLoader: ${err.message}, on page: ${url}`)
|
if (process.env.DEBUG === 'true') options.logger.error(`error in PuppeteerWebBaseLoader: ${err.message}, on page: ${url}`)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let docs = []
|
let docs = []
|
||||||
if (relativeLinksMethod) {
|
if (relativeLinksMethod) {
|
||||||
if (process.env.DEBUG === 'true') console.info(`Start ${relativeLinksMethod}`)
|
if (process.env.DEBUG === 'true') options.logger.info(`Start ${relativeLinksMethod}`)
|
||||||
if (!limit) limit = '10'
|
if (!limit) limit = 10
|
||||||
else if (parseInt(limit) < 0) throw new Error('Limit cannot be less than 0')
|
else if (limit < 0) throw new Error('Limit cannot be less than 0')
|
||||||
const pages: string[] =
|
const pages: string[] =
|
||||||
relativeLinksMethod === 'webCrawl' ? await webCrawl(url, parseInt(limit)) : await xmlScrape(url, parseInt(limit))
|
selectedLinks && selectedLinks.length > 0
|
||||||
if (process.env.DEBUG === 'true') console.info(`pages: ${JSON.stringify(pages)}, length: ${pages.length}`)
|
? selectedLinks.slice(0, limit)
|
||||||
|
: relativeLinksMethod === 'webCrawl'
|
||||||
|
? await webCrawl(url, limit)
|
||||||
|
: await xmlScrape(url, limit)
|
||||||
|
if (process.env.DEBUG === 'true') options.logger.info(`pages: ${JSON.stringify(pages)}, length: ${pages.length}`)
|
||||||
if (!pages || pages.length === 0) throw new Error('No relative links found')
|
if (!pages || pages.length === 0) throw new Error('No relative links found')
|
||||||
for (const page of pages) {
|
for (const page of pages) {
|
||||||
docs.push(...(await puppeteerLoader(page)))
|
docs.push(...(await puppeteerLoader(page)))
|
||||||
}
|
}
|
||||||
if (process.env.DEBUG === 'true') console.info(`Finish ${relativeLinksMethod}`)
|
if (process.env.DEBUG === 'true') options.logger.info(`Finish ${relativeLinksMethod}`)
|
||||||
|
} else if (selectedLinks && selectedLinks.length > 0) {
|
||||||
|
if (process.env.DEBUG === 'true')
|
||||||
|
options.logger.info(`pages: ${JSON.stringify(selectedLinks)}, length: ${selectedLinks.length}`)
|
||||||
|
for (const page of selectedLinks) {
|
||||||
|
docs.push(...(await puppeteerLoader(page)))
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
docs = await puppeteerLoader(url)
|
docs = await puppeteerLoader(url)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -59,7 +59,7 @@ import { Tool } from './database/entities/Tool'
|
|||||||
import { Assistant } from './database/entities/Assistant'
|
import { Assistant } from './database/entities/Assistant'
|
||||||
import { ChatflowPool } from './ChatflowPool'
|
import { ChatflowPool } from './ChatflowPool'
|
||||||
import { CachePool } from './CachePool'
|
import { CachePool } from './CachePool'
|
||||||
import { ICommonObject, IMessage, INodeOptionsValue, handleEscapeCharacters } from 'flowise-components'
|
import { ICommonObject, IMessage, INodeOptionsValue, handleEscapeCharacters, webCrawl, xmlScrape } from 'flowise-components'
|
||||||
import { createRateLimiter, getRateLimiter, initializeRateLimiter } from './utils/rateLimit'
|
import { createRateLimiter, getRateLimiter, initializeRateLimiter } from './utils/rateLimit'
|
||||||
import { addAPIKey, compareKeys, deleteAPIKey, getApiKey, getAPIKeys, updateAPIKey } from './utils/apiKey'
|
import { addAPIKey, compareKeys, deleteAPIKey, getApiKey, getAPIKeys, updateAPIKey } from './utils/apiKey'
|
||||||
import { sanitizeMiddleware } from './utils/XSS'
|
import { sanitizeMiddleware } from './utils/XSS'
|
||||||
@@ -1117,6 +1117,19 @@ export class App {
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
|
// ----------------------------------------
|
||||||
|
// Scraper
|
||||||
|
// ----------------------------------------
|
||||||
|
|
||||||
|
this.app.get('/api/v1/fetch-links', async (req: Request, res: Response) => {
|
||||||
|
const url = decodeURIComponent(req.query.url as string)
|
||||||
|
const relativeLinksMethod = req.query.relativeLinksMethod as string
|
||||||
|
if (process.env.DEBUG === 'true') console.info(`Start ${relativeLinksMethod}`)
|
||||||
|
const links: string[] = relativeLinksMethod === 'webCrawl' ? await webCrawl(url, 0) : await xmlScrape(url, 0)
|
||||||
|
|
||||||
|
res.json({ status: 'OK', links })
|
||||||
|
})
|
||||||
|
|
||||||
// ----------------------------------------
|
// ----------------------------------------
|
||||||
// Upsert
|
// Upsert
|
||||||
// ----------------------------------------
|
// ----------------------------------------
|
||||||
|
|||||||
@@ -0,0 +1,8 @@
|
|||||||
|
import client from './client'
|
||||||
|
|
||||||
|
const fetchAllLinks = (url, relativeLinksMethod) =>
|
||||||
|
client.get(`/fetch-links?url=${encodeURIComponent(url)}&relativeLinksMethod=${relativeLinksMethod}`)
|
||||||
|
|
||||||
|
export default {
|
||||||
|
fetchAllLinks
|
||||||
|
}
|
||||||
@@ -0,0 +1,184 @@
|
|||||||
|
import PropTypes from 'prop-types'
|
||||||
|
import { createPortal } from 'react-dom'
|
||||||
|
import { useDispatch } from 'react-redux'
|
||||||
|
import { useState, useEffect } from 'react'
|
||||||
|
|
||||||
|
import {
|
||||||
|
Box,
|
||||||
|
Button,
|
||||||
|
Dialog,
|
||||||
|
DialogActions,
|
||||||
|
DialogContent,
|
||||||
|
DialogTitle,
|
||||||
|
FormControl,
|
||||||
|
IconButton,
|
||||||
|
OutlinedInput,
|
||||||
|
Stack,
|
||||||
|
Typography
|
||||||
|
} from '@mui/material'
|
||||||
|
import { IconTrash } from '@tabler/icons'
|
||||||
|
import PerfectScrollbar from 'react-perfect-scrollbar'
|
||||||
|
|
||||||
|
import { BackdropLoader } from 'ui-component/loading/BackdropLoader'
|
||||||
|
import { StyledButton } from 'ui-component/button/StyledButton'
|
||||||
|
|
||||||
|
import scraperApi from 'api/scraper'
|
||||||
|
|
||||||
|
import { HIDE_CANVAS_DIALOG, SHOW_CANVAS_DIALOG } from 'store/actions'
|
||||||
|
|
||||||
|
const ManageScrapedLinksDialog = ({ show, dialogProps, onCancel, onSave }) => {
|
||||||
|
const portalElement = document.getElementById('portal')
|
||||||
|
const dispatch = useDispatch()
|
||||||
|
|
||||||
|
const [loading, setLoading] = useState(false)
|
||||||
|
const [selectedLinks, setSelectedLinks] = useState([])
|
||||||
|
const [url, setUrl] = useState('')
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (dialogProps.url) setUrl(dialogProps.url)
|
||||||
|
if (dialogProps.selectedLinks) setSelectedLinks(dialogProps.selectedLinks)
|
||||||
|
|
||||||
|
return () => {
|
||||||
|
setLoading(false)
|
||||||
|
setSelectedLinks([])
|
||||||
|
setUrl('')
|
||||||
|
}
|
||||||
|
}, [dialogProps])
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (show) dispatch({ type: SHOW_CANVAS_DIALOG })
|
||||||
|
else dispatch({ type: HIDE_CANVAS_DIALOG })
|
||||||
|
return () => dispatch({ type: HIDE_CANVAS_DIALOG })
|
||||||
|
}, [show, dispatch])
|
||||||
|
|
||||||
|
const handleFetchLinks = async () => {
|
||||||
|
setLoading(true)
|
||||||
|
const fetchLinksResp = await scraperApi.fetchAllLinks(url, 'webCrawl')
|
||||||
|
if (fetchLinksResp.data) {
|
||||||
|
setSelectedLinks(fetchLinksResp.data.links)
|
||||||
|
}
|
||||||
|
setLoading(false)
|
||||||
|
}
|
||||||
|
|
||||||
|
const handleChangeLink = (index, event) => {
|
||||||
|
const { value } = event.target
|
||||||
|
const links = [...selectedLinks]
|
||||||
|
links[index] = value
|
||||||
|
setSelectedLinks(links)
|
||||||
|
}
|
||||||
|
|
||||||
|
const handleRemoveLink = (index) => {
|
||||||
|
const links = [...selectedLinks]
|
||||||
|
links.splice(index, 1)
|
||||||
|
setSelectedLinks(links)
|
||||||
|
}
|
||||||
|
|
||||||
|
const handleSaveLinks = () => {
|
||||||
|
onSave(url, selectedLinks)
|
||||||
|
}
|
||||||
|
|
||||||
|
const component = show ? (
|
||||||
|
<Dialog
|
||||||
|
onClose={onCancel}
|
||||||
|
open={show}
|
||||||
|
fullWidth
|
||||||
|
maxWidth='sm'
|
||||||
|
aria-labelledby='manage-scraped-links-dialog-title'
|
||||||
|
aria-describedby='manage-scraped-links-dialog-description'
|
||||||
|
>
|
||||||
|
<DialogTitle sx={{ fontSize: '1rem' }} id='manage-scraped-links-dialog-title'>
|
||||||
|
{dialogProps.title || `Manage Scraped Links - ${url}`}
|
||||||
|
</DialogTitle>
|
||||||
|
<DialogContent>
|
||||||
|
<Box sx={{ mb: 4 }}>
|
||||||
|
<Stack flexDirection='row' gap={1} sx={{ width: '100%' }}>
|
||||||
|
<FormControl sx={{ mt: 1, width: '100%', display: 'flex', flexShrink: 1 }} size='small'>
|
||||||
|
<OutlinedInput
|
||||||
|
id='url'
|
||||||
|
size='small'
|
||||||
|
type='text'
|
||||||
|
value={url}
|
||||||
|
name='url'
|
||||||
|
onChange={(e) => {
|
||||||
|
setUrl(e.target.value)
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
</FormControl>
|
||||||
|
<Button
|
||||||
|
sx={{ borderRadius: '12px', mt: 1, display: 'flex', flexShrink: 0 }}
|
||||||
|
size='small'
|
||||||
|
variant='contained'
|
||||||
|
onClick={handleFetchLinks}
|
||||||
|
>
|
||||||
|
Fetch Links
|
||||||
|
</Button>
|
||||||
|
</Stack>
|
||||||
|
</Box>
|
||||||
|
<Typography sx={{ mb: 2, fontWeight: 500 }}>Scraped Links</Typography>
|
||||||
|
<>
|
||||||
|
{loading && <BackdropLoader open={loading} />}
|
||||||
|
{selectedLinks.length > 0 ? (
|
||||||
|
<PerfectScrollbar
|
||||||
|
style={{
|
||||||
|
height: '100%',
|
||||||
|
maxHeight: '320px',
|
||||||
|
overflowX: 'hidden',
|
||||||
|
display: 'flex',
|
||||||
|
flexDirection: 'column',
|
||||||
|
gap: 4
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{selectedLinks.map((link, index) => (
|
||||||
|
<div key={index} style={{ display: 'flex', width: '100%' }}>
|
||||||
|
<Box sx={{ display: 'flex', width: '100%' }}>
|
||||||
|
<OutlinedInput
|
||||||
|
sx={{ width: '100%' }}
|
||||||
|
key={index}
|
||||||
|
type='text'
|
||||||
|
onChange={(e) => handleChangeLink(index, e)}
|
||||||
|
size='small'
|
||||||
|
value={link}
|
||||||
|
name={`link_${index}`}
|
||||||
|
/>
|
||||||
|
</Box>
|
||||||
|
<Box sx={{ width: 'auto', flexGrow: 1 }}>
|
||||||
|
<IconButton
|
||||||
|
sx={{ height: 30, width: 30 }}
|
||||||
|
size='small'
|
||||||
|
color='error'
|
||||||
|
onClick={() => handleRemoveLink(index)}
|
||||||
|
edge='end'
|
||||||
|
>
|
||||||
|
<IconTrash />
|
||||||
|
</IconButton>
|
||||||
|
</Box>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</PerfectScrollbar>
|
||||||
|
) : (
|
||||||
|
<div style={{ display: 'flex', alignItems: 'center', justifyContent: 'center' }}>
|
||||||
|
<Typography sx={{ my: 2 }}>Links scraped from the URL will appear here</Typography>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</>
|
||||||
|
</DialogContent>
|
||||||
|
<DialogActions>
|
||||||
|
<Button onClick={onCancel}>Cancel</Button>
|
||||||
|
<StyledButton variant='contained' onClick={handleSaveLinks}>
|
||||||
|
Save
|
||||||
|
</StyledButton>
|
||||||
|
</DialogActions>
|
||||||
|
</Dialog>
|
||||||
|
) : null
|
||||||
|
|
||||||
|
return createPortal(component, portalElement)
|
||||||
|
}
|
||||||
|
|
||||||
|
ManageScrapedLinksDialog.propTypes = {
|
||||||
|
show: PropTypes.bool,
|
||||||
|
dialogProps: PropTypes.object,
|
||||||
|
onCancel: PropTypes.func,
|
||||||
|
onSave: PropTypes.func
|
||||||
|
}
|
||||||
|
|
||||||
|
export default ManageScrapedLinksDialog
|
||||||
@@ -28,6 +28,8 @@ import ToolDialog from 'views/tools/ToolDialog'
|
|||||||
import AssistantDialog from 'views/assistants/AssistantDialog'
|
import AssistantDialog from 'views/assistants/AssistantDialog'
|
||||||
import ExpandTextDialog from 'ui-component/dialog/ExpandTextDialog'
|
import ExpandTextDialog from 'ui-component/dialog/ExpandTextDialog'
|
||||||
import FormatPromptValuesDialog from 'ui-component/dialog/FormatPromptValuesDialog'
|
import FormatPromptValuesDialog from 'ui-component/dialog/FormatPromptValuesDialog'
|
||||||
|
import PromptLangsmithHubDialog from 'ui-component/dialog/PromptLangsmithHubDialog'
|
||||||
|
import ManageScrapedLinksDialog from 'ui-component/dialog/ManageScrapedLinksDialog'
|
||||||
import CredentialInputHandler from './CredentialInputHandler'
|
import CredentialInputHandler from './CredentialInputHandler'
|
||||||
|
|
||||||
// utils
|
// utils
|
||||||
@@ -35,7 +37,6 @@ import { getInputVariables } from 'utils/genericHelper'
|
|||||||
|
|
||||||
// const
|
// const
|
||||||
import { FLOWISE_CREDENTIAL_ID } from 'store/constant'
|
import { FLOWISE_CREDENTIAL_ID } from 'store/constant'
|
||||||
import PromptLangsmithHubDialog from '../../ui-component/dialog/PromptLangsmithHubDialog'
|
|
||||||
|
|
||||||
const EDITABLE_OPTIONS = ['selectedTool', 'selectedAssistant']
|
const EDITABLE_OPTIONS = ['selectedTool', 'selectedAssistant']
|
||||||
|
|
||||||
@@ -62,22 +63,25 @@ const NodeInputHandler = ({ inputAnchor, inputParam, data, disabled = false, isA
|
|||||||
const [showFormatPromptValuesDialog, setShowFormatPromptValuesDialog] = useState(false)
|
const [showFormatPromptValuesDialog, setShowFormatPromptValuesDialog] = useState(false)
|
||||||
const [formatPromptValuesDialogProps, setFormatPromptValuesDialogProps] = useState({})
|
const [formatPromptValuesDialogProps, setFormatPromptValuesDialogProps] = useState({})
|
||||||
const [showPromptHubDialog, setShowPromptHubDialog] = useState(false)
|
const [showPromptHubDialog, setShowPromptHubDialog] = useState(false)
|
||||||
|
const [showManageScrapedLinksDialog, setShowManageScrapedLinksDialog] = useState(false)
|
||||||
|
const [manageScrapedLinksDialogProps, setManageScrapedLinksDialogProps] = useState({})
|
||||||
|
|
||||||
const onExpandDialogClicked = (value, inputParam) => {
|
const onExpandDialogClicked = (value, inputParam) => {
|
||||||
const dialogProp = {
|
const dialogProps = {
|
||||||
value,
|
value,
|
||||||
inputParam,
|
inputParam,
|
||||||
disabled,
|
disabled,
|
||||||
confirmButtonName: 'Save',
|
confirmButtonName: 'Save',
|
||||||
cancelButtonName: 'Cancel'
|
cancelButtonName: 'Cancel'
|
||||||
}
|
}
|
||||||
setExpandDialogProps(dialogProp)
|
setExpandDialogProps(dialogProps)
|
||||||
setShowExpandDialog(true)
|
setShowExpandDialog(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
const onShowPromptHubButtonClicked = () => {
|
const onShowPromptHubButtonClicked = () => {
|
||||||
setShowPromptHubDialog(true)
|
setShowPromptHubDialog(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
const onShowPromptHubButtonSubmit = (templates) => {
|
const onShowPromptHubButtonSubmit = (templates) => {
|
||||||
setShowPromptHubDialog(false)
|
setShowPromptHubDialog(false)
|
||||||
for (const t of templates) {
|
for (const t of templates) {
|
||||||
@@ -86,6 +90,24 @@ const NodeInputHandler = ({ inputAnchor, inputParam, data, disabled = false, isA
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const onManageLinksDialogClicked = (url, selectedLinks) => {
|
||||||
|
const dialogProps = {
|
||||||
|
url,
|
||||||
|
selectedLinks,
|
||||||
|
confirmButtonName: 'Save',
|
||||||
|
cancelButtonName: 'Cancel'
|
||||||
|
}
|
||||||
|
setManageScrapedLinksDialogProps(dialogProps)
|
||||||
|
setShowManageScrapedLinksDialog(true)
|
||||||
|
}
|
||||||
|
|
||||||
|
const onManageLinksDialogSave = (url, links) => {
|
||||||
|
setShowManageScrapedLinksDialog(false)
|
||||||
|
data.inputs.url = url
|
||||||
|
data.inputs.selectedLinks = links
|
||||||
|
}
|
||||||
|
|
||||||
const onEditJSONClicked = (value, inputParam) => {
|
const onEditJSONClicked = (value, inputParam) => {
|
||||||
// Preset values if the field is format prompt values
|
// Preset values if the field is format prompt values
|
||||||
let inputValue = value
|
let inputValue = value
|
||||||
@@ -436,6 +458,37 @@ const NodeInputHandler = ({ inputAnchor, inputParam, data, disabled = false, isA
|
|||||||
</div>
|
</div>
|
||||||
</>
|
</>
|
||||||
)}
|
)}
|
||||||
|
{(data.name === 'cheerioWebScraper' ||
|
||||||
|
data.name === 'puppeteerWebScraper' ||
|
||||||
|
data.name === 'playwrightWebScraper') &&
|
||||||
|
inputParam.name === 'url' && (
|
||||||
|
<>
|
||||||
|
<Button
|
||||||
|
style={{
|
||||||
|
display: 'flex',
|
||||||
|
flexDirection: 'row',
|
||||||
|
width: '100%'
|
||||||
|
}}
|
||||||
|
disabled={disabled}
|
||||||
|
sx={{ borderRadius: '12px', width: '100%', mt: 1 }}
|
||||||
|
variant='outlined'
|
||||||
|
onClick={() =>
|
||||||
|
onManageLinksDialogClicked(
|
||||||
|
data.inputs[inputParam.name] ?? inputParam.default ?? '',
|
||||||
|
data.inputs.selectedLinks
|
||||||
|
)
|
||||||
|
}
|
||||||
|
>
|
||||||
|
Manage Links
|
||||||
|
</Button>
|
||||||
|
<ManageScrapedLinksDialog
|
||||||
|
show={showManageScrapedLinksDialog}
|
||||||
|
dialogProps={manageScrapedLinksDialogProps}
|
||||||
|
onCancel={() => setShowManageScrapedLinksDialog(false)}
|
||||||
|
onSave={onManageLinksDialogSave}
|
||||||
|
/>
|
||||||
|
</>
|
||||||
|
)}
|
||||||
</Box>
|
</Box>
|
||||||
</>
|
</>
|
||||||
)}
|
)}
|
||||||
|
|||||||
Reference in New Issue
Block a user