mirror of
https://github.com/farcasclaudiu/Flowise.git
synced 2026-06-28 23:01:09 +03:00
replace regex with linkifyjs
This commit is contained in:
@@ -1,7 +1,7 @@
|
|||||||
import { INode, INodeData, INodeParams } from '../../../src/Interface'
|
import { INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||||
import { TextSplitter } from 'langchain/text_splitter'
|
import { TextSplitter } from 'langchain/text_splitter'
|
||||||
import { CheerioWebBaseLoader } from 'langchain/document_loaders/web/cheerio'
|
import { CheerioWebBaseLoader } from 'langchain/document_loaders/web/cheerio'
|
||||||
|
import { test } from 'linkifyjs'
|
||||||
class Cheerio_DocumentLoaders implements INode {
|
class Cheerio_DocumentLoaders implements INode {
|
||||||
label: string
|
label: string
|
||||||
name: string
|
name: string
|
||||||
@@ -47,18 +47,12 @@ class Cheerio_DocumentLoaders implements INode {
|
|||||||
const metadata = nodeData.inputs?.metadata
|
const metadata = nodeData.inputs?.metadata
|
||||||
|
|
||||||
let url = nodeData.inputs?.url as string
|
let url = nodeData.inputs?.url as string
|
||||||
|
url = url.trim()
|
||||||
|
if (!test(url)) {
|
||||||
|
throw new Error('Invalid URL')
|
||||||
|
}
|
||||||
|
|
||||||
var urlPattern = new RegExp(
|
const loader = new CheerioWebBaseLoader(url)
|
||||||
'^(https?:\\/\\/)?' + // validate protocol
|
|
||||||
'((([a-z\\d]([a-z\\d-]*[a-z\\d])*)\\.)+[a-z]{2,}|' + // validate domain name
|
|
||||||
'((\\d{1,3}\\.){3}\\d{1,3}))' + // validate OR ip (v4) address
|
|
||||||
'(\\:\\d+)?(\\/[-a-z\\d%_.~+]*)*' + // validate port and path
|
|
||||||
'(\\?[;&a-z\\d%_.~+=-]*)?' + // validate query string
|
|
||||||
'(\\#[-a-z\\d_]*)?$',
|
|
||||||
'i'
|
|
||||||
) // validate fragment locator
|
|
||||||
|
|
||||||
const loader = new CheerioWebBaseLoader(urlPattern.test(url.trim()) ? url.trim() : '')
|
|
||||||
let docs = []
|
let docs = []
|
||||||
|
|
||||||
if (textSplitter) {
|
if (textSplitter) {
|
||||||
|
|||||||
@@ -30,6 +30,7 @@
|
|||||||
"form-data": "^4.0.0",
|
"form-data": "^4.0.0",
|
||||||
"graphql": "^16.6.0",
|
"graphql": "^16.6.0",
|
||||||
"langchain": "^0.0.73",
|
"langchain": "^0.0.73",
|
||||||
|
"linkifyjs": "^4.1.1",
|
||||||
"mammoth": "^1.5.1",
|
"mammoth": "^1.5.1",
|
||||||
"moment": "^2.29.3",
|
"moment": "^2.29.3",
|
||||||
"node-fetch": "2",
|
"node-fetch": "2",
|
||||||
|
|||||||
Reference in New Issue
Block a user