mirror of
https://github.com/farcasclaudiu/Flowise.git
synced 2026-06-29 03:01:10 +03:00
feat: add search functionality to FireCrawl with customizable parameters (#4535)
* feat: add search functionality to FireCrawl with customizable parameters * refactor: unify request parameters in FireCrawl to include integration identifier * Update FireCrawl Document Loader to version 4.0, enhancing parameter labels and adding conditional visibility for URL and crawler options based on selected crawler type. --------- Co-authored-by: Henry <hzj94@hotmail.com>
This commit is contained in:
@@ -67,6 +67,29 @@ interface ExtractResponse {
|
|||||||
data?: Record<string, any>
|
data?: Record<string, any>
|
||||||
}
|
}
|
||||||
|
|
||||||
|
interface SearchResult {
|
||||||
|
url: string
|
||||||
|
title: string
|
||||||
|
description: string
|
||||||
|
}
|
||||||
|
|
||||||
|
interface SearchResponse {
|
||||||
|
success: boolean
|
||||||
|
data?: SearchResult[]
|
||||||
|
warning?: string
|
||||||
|
}
|
||||||
|
|
||||||
|
interface SearchRequest {
|
||||||
|
query: string
|
||||||
|
limit?: number
|
||||||
|
tbs?: string
|
||||||
|
lang?: string
|
||||||
|
country?: string
|
||||||
|
location?: string
|
||||||
|
timeout?: number
|
||||||
|
ignoreInvalidURLs?: boolean
|
||||||
|
}
|
||||||
|
|
||||||
interface Params {
|
interface Params {
|
||||||
[key: string]: any
|
[key: string]: any
|
||||||
extractorOptions?: {
|
extractorOptions?: {
|
||||||
@@ -161,7 +184,11 @@ class FirecrawlApp {
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const response: AxiosResponse = await this.postRequest(this.apiUrl + '/v1/scrape', validParams, headers)
|
const parameters = {
|
||||||
|
...validParams,
|
||||||
|
integration: 'flowise'
|
||||||
|
}
|
||||||
|
const response: AxiosResponse = await this.postRequest(this.apiUrl + '/v1/scrape', parameters, headers)
|
||||||
if (response.status === 200) {
|
if (response.status === 200) {
|
||||||
const responseData = response.data
|
const responseData = response.data
|
||||||
if (responseData.success) {
|
if (responseData.success) {
|
||||||
@@ -259,7 +286,11 @@ class FirecrawlApp {
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const response: AxiosResponse = await this.postRequest(this.apiUrl + '/v1/crawl', validParams, headers)
|
const parameters = {
|
||||||
|
...validParams,
|
||||||
|
integration: 'flowise'
|
||||||
|
}
|
||||||
|
const response: AxiosResponse = await this.postRequest(this.apiUrl + '/v1/crawl', parameters, headers)
|
||||||
if (response.status === 200) {
|
if (response.status === 200) {
|
||||||
const crawlResponse = response.data as CrawlResponse
|
const crawlResponse = response.data as CrawlResponse
|
||||||
if (!crawlResponse.success) {
|
if (!crawlResponse.success) {
|
||||||
@@ -367,7 +398,11 @@ class FirecrawlApp {
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const response: AxiosResponse = await this.postRequest(this.apiUrl + '/v1/extract', validParams, headers)
|
const parameters = {
|
||||||
|
...validParams,
|
||||||
|
integration: 'flowise'
|
||||||
|
}
|
||||||
|
const response: AxiosResponse = await this.postRequest(this.apiUrl + '/v1/extract', parameters, headers)
|
||||||
if (response.status === 200) {
|
if (response.status === 200) {
|
||||||
const extractResponse = response.data as ExtractResponse
|
const extractResponse = response.data as ExtractResponse
|
||||||
if (waitUntilDone) {
|
if (waitUntilDone) {
|
||||||
@@ -384,18 +419,55 @@ class FirecrawlApp {
|
|||||||
return { success: false, id: '', url: '' }
|
return { success: false, id: '', url: '' }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async search(request: SearchRequest): Promise<SearchResponse> {
|
||||||
|
const headers = this.prepareHeaders()
|
||||||
|
|
||||||
|
// Create a clean payload with only valid parameters
|
||||||
|
const validParams: any = {
|
||||||
|
query: request.query
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add optional parameters if they exist and are not empty
|
||||||
|
const validSearchParams = ['limit', 'tbs', 'lang', 'country', 'location', 'timeout', 'ignoreInvalidURLs'] as const
|
||||||
|
|
||||||
|
validSearchParams.forEach((param) => {
|
||||||
|
if (request[param] !== undefined && request[param] !== null) {
|
||||||
|
validParams[param] = request[param]
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
try {
|
||||||
|
const parameters = {
|
||||||
|
...validParams,
|
||||||
|
integration: 'flowise'
|
||||||
|
}
|
||||||
|
const response: AxiosResponse = await this.postRequest(this.apiUrl + '/v1/search', parameters, headers)
|
||||||
|
if (response.status === 200) {
|
||||||
|
const searchResponse = response.data as SearchResponse
|
||||||
|
if (!searchResponse.success) {
|
||||||
|
throw new Error(`Search request failed: ${searchResponse.warning || 'Unknown error'}`)
|
||||||
|
}
|
||||||
|
return searchResponse
|
||||||
|
} else {
|
||||||
|
this.handleError(response, 'perform search')
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
throw new Error(error.message)
|
||||||
|
}
|
||||||
|
return { success: false }
|
||||||
|
}
|
||||||
|
|
||||||
private prepareHeaders(idempotencyKey?: string): AxiosRequestHeaders {
|
private prepareHeaders(idempotencyKey?: string): AxiosRequestHeaders {
|
||||||
return {
|
return {
|
||||||
'Content-Type': 'application/json',
|
'Content-Type': 'application/json',
|
||||||
Authorization: `Bearer ${this.apiKey}`,
|
Authorization: `Bearer ${this.apiKey}`,
|
||||||
'X-Origin': 'flowise',
|
|
||||||
'X-Origin-Type': 'integration',
|
|
||||||
...(idempotencyKey ? { 'x-idempotency-key': idempotencyKey } : {})
|
...(idempotencyKey ? { 'x-idempotency-key': idempotencyKey } : {})
|
||||||
} as AxiosRequestHeaders & { 'X-Origin': string; 'X-Origin-Type': string; 'x-idempotency-key'?: string }
|
} as AxiosRequestHeaders & { 'x-idempotency-key'?: string }
|
||||||
}
|
}
|
||||||
|
|
||||||
private postRequest(url: string, data: Params, headers: AxiosRequestHeaders): Promise<AxiosResponse> {
|
private async postRequest(url: string, data: Params, headers: AxiosRequestHeaders): Promise<AxiosResponse> {
|
||||||
return axios.post(url, data, { headers })
|
const result = await axios.post(url, data, { headers })
|
||||||
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
private getRequest(url: string, headers: AxiosRequestHeaders): Promise<AxiosResponse> {
|
private getRequest(url: string, headers: AxiosRequestHeaders): Promise<AxiosResponse> {
|
||||||
@@ -468,29 +540,32 @@ class FirecrawlApp {
|
|||||||
|
|
||||||
// FireCrawl Loader
|
// FireCrawl Loader
|
||||||
interface FirecrawlLoaderParameters {
|
interface FirecrawlLoaderParameters {
|
||||||
url: string
|
url?: string
|
||||||
|
query?: string
|
||||||
apiKey?: string
|
apiKey?: string
|
||||||
apiUrl?: string
|
apiUrl?: string
|
||||||
mode?: 'crawl' | 'scrape' | 'extract'
|
mode?: 'crawl' | 'scrape' | 'extract' | 'search'
|
||||||
params?: Record<string, unknown>
|
params?: Record<string, unknown>
|
||||||
}
|
}
|
||||||
|
|
||||||
export class FireCrawlLoader extends BaseDocumentLoader {
|
export class FireCrawlLoader extends BaseDocumentLoader {
|
||||||
private apiKey: string
|
private apiKey: string
|
||||||
private apiUrl: string
|
private apiUrl: string
|
||||||
private url: string
|
private url?: string
|
||||||
private mode: 'crawl' | 'scrape' | 'extract'
|
private query?: string
|
||||||
|
private mode: 'crawl' | 'scrape' | 'extract' | 'search'
|
||||||
private params?: Record<string, unknown>
|
private params?: Record<string, unknown>
|
||||||
|
|
||||||
constructor(loaderParams: FirecrawlLoaderParameters) {
|
constructor(loaderParams: FirecrawlLoaderParameters) {
|
||||||
super()
|
super()
|
||||||
const { apiKey, apiUrl, url, mode = 'crawl', params } = loaderParams
|
const { apiKey, apiUrl, url, query, mode = 'crawl', params } = loaderParams
|
||||||
if (!apiKey) {
|
if (!apiKey) {
|
||||||
throw new Error('Firecrawl API key not set. You can set it as FIRECRAWL_API_KEY in your .env file, or pass it to Firecrawl.')
|
throw new Error('Firecrawl API key not set. You can set it as FIRECRAWL_API_KEY in your .env file, or pass it to Firecrawl.')
|
||||||
}
|
}
|
||||||
|
|
||||||
this.apiKey = apiKey
|
this.apiKey = apiKey
|
||||||
this.url = url
|
this.url = url
|
||||||
|
this.query = query
|
||||||
this.mode = mode
|
this.mode = mode
|
||||||
this.params = params
|
this.params = params
|
||||||
this.apiUrl = apiUrl || 'https://api.firecrawl.dev'
|
this.apiUrl = apiUrl || 'https://api.firecrawl.dev'
|
||||||
@@ -500,13 +575,37 @@ export class FireCrawlLoader extends BaseDocumentLoader {
|
|||||||
const app = new FirecrawlApp({ apiKey: this.apiKey, apiUrl: this.apiUrl })
|
const app = new FirecrawlApp({ apiKey: this.apiKey, apiUrl: this.apiUrl })
|
||||||
let firecrawlDocs: FirecrawlDocument[]
|
let firecrawlDocs: FirecrawlDocument[]
|
||||||
|
|
||||||
if (this.mode === 'scrape') {
|
if (this.mode === 'search') {
|
||||||
|
if (!this.query) {
|
||||||
|
throw new Error('Firecrawl: Query is required for search mode')
|
||||||
|
}
|
||||||
|
const response = await app.search({ query: this.query, ...this.params })
|
||||||
|
if (!response.success) {
|
||||||
|
throw new Error(`Firecrawl: Failed to search. Warning: ${response.warning}`)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert search results to FirecrawlDocument format
|
||||||
|
firecrawlDocs = (response.data || []).map((result) => ({
|
||||||
|
markdown: result.description,
|
||||||
|
metadata: {
|
||||||
|
title: result.title,
|
||||||
|
sourceURL: result.url,
|
||||||
|
description: result.description
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
} else if (this.mode === 'scrape') {
|
||||||
|
if (!this.url) {
|
||||||
|
throw new Error('Firecrawl: URL is required for scrape mode')
|
||||||
|
}
|
||||||
const response = await app.scrapeUrl(this.url, this.params)
|
const response = await app.scrapeUrl(this.url, this.params)
|
||||||
if (!response.success) {
|
if (!response.success) {
|
||||||
throw new Error(`Firecrawl: Failed to scrape URL. Error: ${response.error}`)
|
throw new Error(`Firecrawl: Failed to scrape URL. Error: ${response.error}`)
|
||||||
}
|
}
|
||||||
firecrawlDocs = [response.data as FirecrawlDocument]
|
firecrawlDocs = [response.data as FirecrawlDocument]
|
||||||
} else if (this.mode === 'crawl') {
|
} else if (this.mode === 'crawl') {
|
||||||
|
if (!this.url) {
|
||||||
|
throw new Error('Firecrawl: URL is required for crawl mode')
|
||||||
|
}
|
||||||
const response = await app.crawlUrl(this.url, this.params)
|
const response = await app.crawlUrl(this.url, this.params)
|
||||||
if ('status' in response) {
|
if ('status' in response) {
|
||||||
if (response.status === 'failed') {
|
if (response.status === 'failed') {
|
||||||
@@ -520,6 +619,9 @@ export class FireCrawlLoader extends BaseDocumentLoader {
|
|||||||
firecrawlDocs = [response.data as FirecrawlDocument]
|
firecrawlDocs = [response.data as FirecrawlDocument]
|
||||||
}
|
}
|
||||||
} else if (this.mode === 'extract') {
|
} else if (this.mode === 'extract') {
|
||||||
|
if (!this.url) {
|
||||||
|
throw new Error('Firecrawl: URL is required for extract mode')
|
||||||
|
}
|
||||||
this.params!.urls = [this.url]
|
this.params!.urls = [this.url]
|
||||||
const response = await app.extract(this.params as any as ExtractRequest)
|
const response = await app.extract(this.params as any as ExtractRequest)
|
||||||
if (!response.success) {
|
if (!response.success) {
|
||||||
@@ -557,7 +659,7 @@ export class FireCrawlLoader extends BaseDocumentLoader {
|
|||||||
}
|
}
|
||||||
return []
|
return []
|
||||||
} else {
|
} else {
|
||||||
throw new Error(`Unrecognized mode '${this.mode}'. Expected one of 'crawl', 'scrape', 'extract'.`)
|
throw new Error(`Unrecognized mode '${this.mode}'. Expected one of 'crawl', 'scrape', 'extract', 'search'.`)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert Firecrawl documents to LangChain documents
|
// Convert Firecrawl documents to LangChain documents
|
||||||
@@ -602,7 +704,7 @@ class FireCrawl_DocumentLoaders implements INode {
|
|||||||
this.name = 'fireCrawl'
|
this.name = 'fireCrawl'
|
||||||
this.type = 'Document'
|
this.type = 'Document'
|
||||||
this.icon = 'firecrawl.png'
|
this.icon = 'firecrawl.png'
|
||||||
this.version = 3.0
|
this.version = 4.0
|
||||||
this.category = 'Document Loaders'
|
this.category = 'Document Loaders'
|
||||||
this.description = 'Load data from URL using FireCrawl'
|
this.description = 'Load data from URL using FireCrawl'
|
||||||
this.baseClasses = [this.type]
|
this.baseClasses = [this.type]
|
||||||
@@ -620,14 +722,7 @@ class FireCrawl_DocumentLoaders implements INode {
|
|||||||
optional: true
|
optional: true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
label: 'URLs',
|
label: 'Type',
|
||||||
name: 'url',
|
|
||||||
type: 'string',
|
|
||||||
description: 'URL to be crawled/scraped/extracted',
|
|
||||||
placeholder: 'https://docs.flowiseai.com'
|
|
||||||
},
|
|
||||||
{
|
|
||||||
label: 'Crawler type',
|
|
||||||
type: 'options',
|
type: 'options',
|
||||||
name: 'crawlerType',
|
name: 'crawlerType',
|
||||||
options: [
|
options: [
|
||||||
@@ -645,89 +740,179 @@ class FireCrawl_DocumentLoaders implements INode {
|
|||||||
label: 'Extract',
|
label: 'Extract',
|
||||||
name: 'extract',
|
name: 'extract',
|
||||||
description: 'Extract data from a URL'
|
description: 'Extract data from a URL'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: 'Search',
|
||||||
|
name: 'search',
|
||||||
|
description: 'Search the web using FireCrawl'
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
default: 'crawl'
|
default: 'crawl'
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
label: 'URLs',
|
||||||
|
name: 'url',
|
||||||
|
type: 'string',
|
||||||
|
description: 'URL to be crawled/scraped/extracted',
|
||||||
|
placeholder: 'https://docs.flowiseai.com',
|
||||||
|
optional: true,
|
||||||
|
show: {
|
||||||
|
crawlerType: ['crawl', 'scrape', 'extract']
|
||||||
|
}
|
||||||
|
},
|
||||||
{
|
{
|
||||||
// includeTags
|
// includeTags
|
||||||
label: '[Scrape] Include Tags',
|
label: 'Include Tags',
|
||||||
name: 'includeTags',
|
name: 'includeTags',
|
||||||
type: 'string',
|
type: 'string',
|
||||||
description: 'Tags to include in the output. Use comma to separate multiple tags.',
|
description: 'Tags to include in the output. Use comma to separate multiple tags.',
|
||||||
optional: true,
|
optional: true,
|
||||||
additionalParams: true
|
additionalParams: true,
|
||||||
|
show: {
|
||||||
|
crawlerType: ['scrape']
|
||||||
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
// excludeTags
|
// excludeTags
|
||||||
label: '[Scrape] Exclude Tags',
|
label: 'Exclude Tags',
|
||||||
name: 'excludeTags',
|
name: 'excludeTags',
|
||||||
type: 'string',
|
type: 'string',
|
||||||
description: 'Tags to exclude from the output. Use comma to separate multiple tags.',
|
description: 'Tags to exclude from the output. Use comma to separate multiple tags.',
|
||||||
optional: true,
|
optional: true,
|
||||||
additionalParams: true
|
additionalParams: true,
|
||||||
|
show: {
|
||||||
|
crawlerType: ['scrape']
|
||||||
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
// onlyMainContent
|
// onlyMainContent
|
||||||
label: '[Scrape] Only Main Content',
|
label: 'Only Main Content',
|
||||||
name: 'onlyMainContent',
|
name: 'onlyMainContent',
|
||||||
type: 'boolean',
|
type: 'boolean',
|
||||||
description: 'Extract only the main content of the page',
|
description: 'Extract only the main content of the page',
|
||||||
optional: true,
|
optional: true,
|
||||||
additionalParams: true
|
additionalParams: true,
|
||||||
|
show: {
|
||||||
|
crawlerType: ['scrape']
|
||||||
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
// limit
|
// limit
|
||||||
label: '[Crawl] Limit',
|
label: 'Limit',
|
||||||
name: 'limit',
|
name: 'limit',
|
||||||
type: 'string',
|
type: 'string',
|
||||||
description: 'Maximum number of pages to crawl',
|
description: 'Maximum number of pages to crawl',
|
||||||
optional: true,
|
optional: true,
|
||||||
additionalParams: true,
|
additionalParams: true,
|
||||||
default: '10000'
|
default: '10000',
|
||||||
|
show: {
|
||||||
|
crawlerType: ['crawl']
|
||||||
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
label: '[Crawl] Include Paths',
|
label: 'Include Paths',
|
||||||
name: 'includePaths',
|
name: 'includePaths',
|
||||||
type: 'string',
|
type: 'string',
|
||||||
description:
|
description:
|
||||||
'URL pathname regex patterns that include matching URLs in the crawl. Only the paths that match the specified patterns will be included in the response.',
|
'URL pathname regex patterns that include matching URLs in the crawl. Only the paths that match the specified patterns will be included in the response.',
|
||||||
placeholder: `blog/.*, news/.*`,
|
placeholder: `blog/.*, news/.*`,
|
||||||
optional: true,
|
optional: true,
|
||||||
additionalParams: true
|
additionalParams: true,
|
||||||
|
show: {
|
||||||
|
crawlerType: ['crawl']
|
||||||
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
label: '[Crawl] Exclude Paths',
|
label: 'Exclude Paths',
|
||||||
name: 'excludePaths',
|
name: 'excludePaths',
|
||||||
type: 'string',
|
type: 'string',
|
||||||
description: 'URL pathname regex patterns that exclude matching URLs from the crawl.',
|
description: 'URL pathname regex patterns that exclude matching URLs from the crawl.',
|
||||||
placeholder: `blog/.*, news/.*`,
|
placeholder: `blog/.*, news/.*`,
|
||||||
optional: true,
|
optional: true,
|
||||||
additionalParams: true
|
additionalParams: true,
|
||||||
|
show: {
|
||||||
|
crawlerType: ['crawl']
|
||||||
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
label: '[Extract] Schema',
|
label: 'Schema',
|
||||||
name: 'extractSchema',
|
name: 'extractSchema',
|
||||||
type: 'json',
|
type: 'json',
|
||||||
description: 'JSON schema for data extraction',
|
description: 'JSON schema for data extraction',
|
||||||
optional: true,
|
optional: true,
|
||||||
additionalParams: true
|
additionalParams: true,
|
||||||
|
show: {
|
||||||
|
crawlerType: ['extract']
|
||||||
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
label: '[Extract] Prompt',
|
label: 'Prompt',
|
||||||
name: 'extractPrompt',
|
name: 'extractPrompt',
|
||||||
type: 'string',
|
type: 'string',
|
||||||
description: 'Prompt for data extraction',
|
description: 'Prompt for data extraction',
|
||||||
optional: true,
|
optional: true,
|
||||||
additionalParams: true
|
additionalParams: true,
|
||||||
|
show: {
|
||||||
|
crawlerType: ['extract']
|
||||||
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
label: '[Extract] Job ID',
|
label: 'Query',
|
||||||
name: 'extractJobId',
|
name: 'searchQuery',
|
||||||
type: 'string',
|
type: 'string',
|
||||||
description: 'ID of the extract job',
|
description: 'Search query to find relevant content',
|
||||||
optional: true,
|
optional: true,
|
||||||
additionalParams: true
|
show: {
|
||||||
|
crawlerType: ['search']
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: 'Limit',
|
||||||
|
name: 'searchLimit',
|
||||||
|
type: 'string',
|
||||||
|
description: 'Maximum number of results to return',
|
||||||
|
optional: true,
|
||||||
|
additionalParams: true,
|
||||||
|
default: '5',
|
||||||
|
show: {
|
||||||
|
crawlerType: ['search']
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: 'Language',
|
||||||
|
name: 'searchLang',
|
||||||
|
type: 'string',
|
||||||
|
description: 'Language code for search results (e.g., en, es, fr)',
|
||||||
|
optional: true,
|
||||||
|
additionalParams: true,
|
||||||
|
default: 'en',
|
||||||
|
show: {
|
||||||
|
crawlerType: ['search']
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: 'Country',
|
||||||
|
name: 'searchCountry',
|
||||||
|
type: 'string',
|
||||||
|
description: 'Country code for search results (e.g., us, uk, ca)',
|
||||||
|
optional: true,
|
||||||
|
additionalParams: true,
|
||||||
|
default: 'us',
|
||||||
|
show: {
|
||||||
|
crawlerType: ['search']
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: 'Timeout',
|
||||||
|
name: 'searchTimeout',
|
||||||
|
type: 'number',
|
||||||
|
description: 'Timeout in milliseconds for search operation',
|
||||||
|
optional: true,
|
||||||
|
additionalParams: true,
|
||||||
|
default: 60000,
|
||||||
|
show: {
|
||||||
|
crawlerType: ['search']
|
||||||
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
this.outputs = [
|
this.outputs = [
|
||||||
@@ -758,6 +943,11 @@ class FireCrawl_DocumentLoaders implements INode {
|
|||||||
const firecrawlApiUrl = getCredentialParam('firecrawlApiUrl', credentialData, nodeData, 'https://api.firecrawl.dev')
|
const firecrawlApiUrl = getCredentialParam('firecrawlApiUrl', credentialData, nodeData, 'https://api.firecrawl.dev')
|
||||||
const output = nodeData.outputs?.output as string
|
const output = nodeData.outputs?.output as string
|
||||||
|
|
||||||
|
// Validate URL only for non-search methods
|
||||||
|
if (crawlerType !== 'search' && !url) {
|
||||||
|
throw new Error('Firecrawl: URL is required for ' + crawlerType + ' mode')
|
||||||
|
}
|
||||||
|
|
||||||
const includePaths = nodeData.inputs?.includePaths ? (nodeData.inputs.includePaths.split(',') as string[]) : undefined
|
const includePaths = nodeData.inputs?.includePaths ? (nodeData.inputs.includePaths.split(',') as string[]) : undefined
|
||||||
const excludePaths = nodeData.inputs?.excludePaths ? (nodeData.inputs.excludePaths.split(',') as string[]) : undefined
|
const excludePaths = nodeData.inputs?.excludePaths ? (nodeData.inputs.excludePaths.split(',') as string[]) : undefined
|
||||||
|
|
||||||
@@ -767,9 +957,16 @@ class FireCrawl_DocumentLoaders implements INode {
|
|||||||
const extractSchema = nodeData.inputs?.extractSchema
|
const extractSchema = nodeData.inputs?.extractSchema
|
||||||
const extractPrompt = nodeData.inputs?.extractPrompt as string
|
const extractPrompt = nodeData.inputs?.extractPrompt as string
|
||||||
|
|
||||||
|
const searchQuery = nodeData.inputs?.searchQuery as string
|
||||||
|
const searchLimit = nodeData.inputs?.searchLimit as string
|
||||||
|
const searchLang = nodeData.inputs?.searchLang as string
|
||||||
|
const searchCountry = nodeData.inputs?.searchCountry as string
|
||||||
|
const searchTimeout = nodeData.inputs?.searchTimeout as number
|
||||||
|
|
||||||
const input: FirecrawlLoaderParameters = {
|
const input: FirecrawlLoaderParameters = {
|
||||||
url,
|
url,
|
||||||
mode: crawlerType as 'crawl' | 'scrape' | 'extract',
|
query: searchQuery,
|
||||||
|
mode: crawlerType as 'crawl' | 'scrape' | 'extract' | 'search',
|
||||||
apiKey: firecrawlApiToken,
|
apiKey: firecrawlApiToken,
|
||||||
apiUrl: firecrawlApiUrl,
|
apiUrl: firecrawlApiUrl,
|
||||||
params: {
|
params: {
|
||||||
@@ -785,6 +982,19 @@ class FireCrawl_DocumentLoaders implements INode {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Add search-specific parameters only when in search mode
|
||||||
|
if (crawlerType === 'search') {
|
||||||
|
if (!searchQuery) {
|
||||||
|
throw new Error('Firecrawl: Search query is required for search mode')
|
||||||
|
}
|
||||||
|
input.params = {
|
||||||
|
limit: searchLimit ? parseInt(searchLimit, 10) : 5,
|
||||||
|
lang: searchLang,
|
||||||
|
country: searchCountry,
|
||||||
|
timeout: searchTimeout
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (onlyMainContent === true) {
|
if (onlyMainContent === true) {
|
||||||
const scrapeOptions = input.params?.scrapeOptions as any
|
const scrapeOptions = input.params?.scrapeOptions as any
|
||||||
input.params!.scrapeOptions = {
|
input.params!.scrapeOptions = {
|
||||||
|
|||||||
Reference in New Issue
Block a user