add metadata filter

This commit is contained in:
Henry
2023-05-12 13:57:38 +01:00
parent 7313cdd9c6
commit ab875cc1b3
14 changed files with 364 additions and 48 deletions
@@ -49,6 +49,13 @@ class Pdf_DocumentLoaders implements INode {
}
],
default: 'perPage'
},
{
label: 'Metadata',
name: 'metadata',
type: 'json',
optional: true,
additionalParams: true
}
]
}
@@ -57,30 +64,45 @@ class Pdf_DocumentLoaders implements INode {
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const pdfFileBase64 = nodeData.inputs?.pdfFile as string
const usage = nodeData.inputs?.usage as string
const metadata = nodeData.inputs?.metadata
const blob = new Blob(getBlob(pdfFileBase64))
let docs = []
if (usage === 'perFile') {
// @ts-ignore
const loader = new PDFLoader(blob, { splitPages: false, pdfjs: () => import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js') })
if (textSplitter) {
const docs = await loader.loadAndSplit(textSplitter)
return docs
docs = await loader.loadAndSplit(textSplitter)
} else {
const docs = await loader.load()
return docs
docs = await loader.load()
}
} else {
// @ts-ignore
const loader = new PDFLoader(blob, { pdfjs: () => import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js') })
if (textSplitter) {
const docs = await loader.loadAndSplit(textSplitter)
return docs
docs = await loader.loadAndSplit(textSplitter)
} else {
const docs = await loader.load()
return docs
docs = await loader.load()
}
}
if (metadata) {
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
let finaldocs = []
for (const doc of docs) {
const newdoc = {
...doc,
metadata: {
...doc.metadata,
...parsedMetadata
}
}
finaldocs.push(newdoc)
}
return finaldocs
}
return docs
}
}