diff --git a/packages/components/nodes/documentloaders/ApifyWebsiteContentCrawler/ApifyWebsiteContentCrawler.ts b/packages/components/nodes/documentloaders/ApifyWebsiteContentCrawler/ApifyWebsiteContentCrawler.ts index a5e6a6e0..9ecaa594 100644 --- a/packages/components/nodes/documentloaders/ApifyWebsiteContentCrawler/ApifyWebsiteContentCrawler.ts +++ b/packages/components/nodes/documentloaders/ApifyWebsiteContentCrawler/ApifyWebsiteContentCrawler.ts @@ -21,11 +21,17 @@ class ApifyWebsiteContentCrawler_DocumentLoaders implements INode { this.name = 'apifyWebsiteContentCrawler' this.type = 'Document' this.icon = 'apify-symbol-transparent.svg' - this.version = 1.0 + this.version = 2.0 this.category = 'Document Loaders' this.description = 'Load data from Apify Website Content Crawler' this.baseClasses = [this.type] this.inputs = [ + { + label: 'Text Splitter', + name: 'textSplitter', + type: 'TextSplitter', + optional: true + }, { label: 'Start URLs', name: 'urls', @@ -64,14 +70,16 @@ class ApifyWebsiteContentCrawler_DocumentLoaders implements INode { name: 'maxCrawlDepth', type: 'number', optional: true, - default: 1 + default: 1, + additionalParams: true }, { label: 'Max crawl pages', name: 'maxCrawlPages', type: 'number', optional: true, - default: 3 + default: 3, + additionalParams: true }, { label: 'Additional input', @@ -80,13 +88,15 @@ class ApifyWebsiteContentCrawler_DocumentLoaders implements INode { default: JSON.stringify({}), description: 'For additional input options for the crawler see documentation.', - optional: true + optional: true, + additionalParams: true }, { - label: 'Text Splitter', - name: 'textSplitter', - type: 'TextSplitter', - optional: true + label: 'Metadata', + name: 'metadata', + type: 'json', + optional: true, + additionalParams: true } ] this.credential = { @@ -99,6 +109,7 @@ class ApifyWebsiteContentCrawler_DocumentLoaders implements INode { async init(nodeData: INodeData, _: string, options: ICommonObject): Promise { const textSplitter = nodeData.inputs?.textSplitter as TextSplitter + const metadata = nodeData.inputs?.metadata // Get input options and merge with additional input const urls = nodeData.inputs?.urls as string @@ -132,7 +143,31 @@ class ApifyWebsiteContentCrawler_DocumentLoaders implements INode { } }) - return textSplitter ? loader.loadAndSplit(textSplitter) : loader.load() + let docs = [] + + if (textSplitter) { + docs = await loader.loadAndSplit(textSplitter) + } else { + docs = await loader.load() + } + + if (metadata) { + const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata) + let finaldocs = [] + for (const doc of docs) { + const newdoc = { + ...doc, + metadata: { + ...doc.metadata, + ...parsedMetadata + } + } + finaldocs.push(newdoc) + } + return finaldocs + } + + return docs } }