add metadata filter

This commit is contained in:
Henry
2023-05-12 13:57:38 +01:00
parent 7313cdd9c6
commit ab875cc1b3
14 changed files with 364 additions and 48 deletions
@@ -31,12 +31,21 @@ class Cheerio_DocumentLoaders implements INode {
name: 'textSplitter',
type: 'TextSplitter',
optional: true
},
{
label: 'Metadata',
name: 'metadata',
type: 'json',
optional: true,
additionalParams: true
}
]
}
async init(nodeData: INodeData): Promise<any> {
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const metadata = nodeData.inputs?.metadata
let url = nodeData.inputs?.url as string
var urlPattern = new RegExp(
@@ -50,14 +59,31 @@ class Cheerio_DocumentLoaders implements INode {
) // validate fragment locator
const loader = new CheerioWebBaseLoader(urlPattern.test(url.trim()) ? url.trim() : '')
let docs = []
if (textSplitter) {
const docs = await loader.loadAndSplit(textSplitter)
return docs
docs = await loader.loadAndSplit(textSplitter)
} else {
const docs = await loader.load()
return docs
docs = await loader.load()
}
if (metadata) {
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
let finaldocs = []
for (const doc of docs) {
const newdoc = {
...doc,
metadata: {
...doc.metadata,
...parsedMetadata
}
}
finaldocs.push(newdoc)
}
return finaldocs
}
return docs
}
}