add metadata filter

This commit is contained in:
Henry
2023-05-12 13:57:38 +01:00
parent 7313cdd9c6
commit ab875cc1b3
14 changed files with 364 additions and 48 deletions
@@ -31,12 +31,21 @@ class Cheerio_DocumentLoaders implements INode {
name: 'textSplitter',
type: 'TextSplitter',
optional: true
},
{
label: 'Metadata',
name: 'metadata',
type: 'json',
optional: true,
additionalParams: true
}
]
}
async init(nodeData: INodeData): Promise<any> {
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const metadata = nodeData.inputs?.metadata
let url = nodeData.inputs?.url as string
var urlPattern = new RegExp(
@@ -50,14 +59,31 @@ class Cheerio_DocumentLoaders implements INode {
) // validate fragment locator
const loader = new CheerioWebBaseLoader(urlPattern.test(url.trim()) ? url.trim() : '')
let docs = []
if (textSplitter) {
const docs = await loader.loadAndSplit(textSplitter)
return docs
docs = await loader.loadAndSplit(textSplitter)
} else {
const docs = await loader.load()
return docs
docs = await loader.load()
}
if (metadata) {
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
let finaldocs = []
for (const doc of docs) {
const newdoc = {
...doc,
metadata: {
...doc.metadata,
...parsedMetadata
}
}
finaldocs.push(newdoc)
}
return finaldocs
}
return docs
}
}
@@ -41,6 +41,13 @@ class Csv_DocumentLoaders implements INode {
description: 'Extracting a single column',
placeholder: 'Enter column name',
optional: true
},
{
label: 'Metadata',
name: 'metadata',
type: 'json',
optional: true,
additionalParams: true
}
]
}
@@ -49,17 +56,35 @@ class Csv_DocumentLoaders implements INode {
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const csvFileBase64 = nodeData.inputs?.csvFile as string
const columnName = nodeData.inputs?.columnName as string
const metadata = nodeData.inputs?.metadata
const blob = new Blob(getBlob(csvFileBase64))
const loader = new CSVLoader(blob, columnName.trim().length === 0 ? undefined : columnName.trim())
let docs = []
if (textSplitter) {
const docs = await loader.loadAndSplit(textSplitter)
return docs
docs = await loader.loadAndSplit(textSplitter)
} else {
const docs = await loader.load()
return docs
docs = await loader.load()
}
if (metadata) {
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
let finaldocs = []
for (const doc of docs) {
const newdoc = {
...doc,
metadata: {
...doc.metadata,
...parsedMetadata
}
}
finaldocs.push(newdoc)
}
return finaldocs
}
return docs
}
}
@@ -33,6 +33,13 @@ class Docx_DocumentLoaders implements INode {
name: 'textSplitter',
type: 'TextSplitter',
optional: true
},
{
label: 'Metadata',
name: 'metadata',
type: 'json',
optional: true,
additionalParams: true
}
]
}
@@ -40,17 +47,35 @@ class Docx_DocumentLoaders implements INode {
async init(nodeData: INodeData): Promise<any> {
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const docxFileBase64 = nodeData.inputs?.docxFile as string
const metadata = nodeData.inputs?.metadata
const blob = new Blob(getBlob(docxFileBase64))
const loader = new DocxLoader(blob)
let docs = []
if (textSplitter) {
const docs = await loader.loadAndSplit(textSplitter)
return docs
docs = await loader.loadAndSplit(textSplitter)
} else {
const docs = await loader.load()
return docs
docs = await loader.load()
}
if (metadata) {
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
let finaldocs = []
for (const doc of docs) {
const newdoc = {
...doc,
metadata: {
...doc.metadata,
...parsedMetadata
}
}
finaldocs.push(newdoc)
}
return finaldocs
}
return docs
}
}
@@ -37,6 +37,13 @@ class Folder_DocumentLoaders implements INode {
name: 'textSplitter',
type: 'TextSplitter',
optional: true
},
{
label: 'Metadata',
name: 'metadata',
type: 'json',
optional: true,
additionalParams: true
}
]
}
@@ -44,6 +51,7 @@ class Folder_DocumentLoaders implements INode {
async init(nodeData: INodeData): Promise<any> {
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const folderPath = nodeData.inputs?.folderPath as string
const metadata = nodeData.inputs?.metadata
const loader = new DirectoryLoader(folderPath, {
'.json': (path) => new JSONLoader(path),
@@ -53,14 +61,31 @@ class Folder_DocumentLoaders implements INode {
// @ts-ignore
'.pdf': (path) => new PDFLoader(path, { pdfjs: () => import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js') })
})
let docs = []
if (textSplitter) {
const docs = await loader.loadAndSplit(textSplitter)
return docs
docs = await loader.loadAndSplit(textSplitter)
} else {
const docs = await loader.load()
return docs
docs = await loader.load()
}
if (metadata) {
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
let finaldocs = []
for (const doc of docs) {
const newdoc = {
...doc,
metadata: {
...doc.metadata,
...parsedMetadata
}
}
finaldocs.push(newdoc)
}
return finaldocs
}
return docs
}
}
@@ -45,6 +45,13 @@ class Github_DocumentLoaders implements INode {
name: 'textSplitter',
type: 'TextSplitter',
optional: true
},
{
label: 'Metadata',
name: 'metadata',
type: 'json',
optional: true,
additionalParams: true
}
]
}
@@ -54,6 +61,7 @@ class Github_DocumentLoaders implements INode {
const branch = nodeData.inputs?.branch as string
const accessToken = nodeData.inputs?.accessToken as string
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const metadata = nodeData.inputs?.metadata
const options: GithubRepoLoaderParams = {
branch,
@@ -64,14 +72,31 @@ class Github_DocumentLoaders implements INode {
if (accessToken) options.accessToken = accessToken
const loader = new GithubRepoLoader(repoLink, options)
let docs = []
if (textSplitter) {
const docs = await loader.loadAndSplit(textSplitter)
return docs
docs = await loader.loadAndSplit(textSplitter)
} else {
const docs = await loader.load()
return docs
docs = await loader.load()
}
if (metadata) {
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
let finaldocs = []
for (const doc of docs) {
const newdoc = {
...doc,
metadata: {
...doc.metadata,
...parsedMetadata
}
}
finaldocs.push(newdoc)
}
return finaldocs
}
return docs
}
}
@@ -41,6 +41,13 @@ class Json_DocumentLoaders implements INode {
description: 'Extracting multiple pointers',
placeholder: 'Enter pointers name',
optional: true
},
{
label: 'Metadata',
name: 'metadata',
type: 'json',
optional: true,
additionalParams: true
}
]
}
@@ -49,6 +56,7 @@ class Json_DocumentLoaders implements INode {
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const jsonFileBase64 = nodeData.inputs?.jsonFile as string
const pointersName = nodeData.inputs?.pointersName as string
const metadata = nodeData.inputs?.metadata
let pointers: string[] = []
if (pointersName) {
@@ -58,14 +66,31 @@ class Json_DocumentLoaders implements INode {
const blob = new Blob(getBlob(jsonFileBase64))
const loader = new JSONLoader(blob, pointers.length != 0 ? pointers : undefined)
let docs = []
if (textSplitter) {
const docs = await loader.loadAndSplit(textSplitter)
return docs
docs = await loader.loadAndSplit(textSplitter)
} else {
const docs = await loader.load()
return docs
docs = await loader.load()
}
if (metadata) {
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
let finaldocs = []
for (const doc of docs) {
const newdoc = {
...doc,
metadata: {
...doc.metadata,
...parsedMetadata
}
}
finaldocs.push(newdoc)
}
return finaldocs
}
return docs
}
}
@@ -33,6 +33,13 @@ class Notion_DocumentLoaders implements INode {
name: 'textSplitter',
type: 'TextSplitter',
optional: true
},
{
label: 'Metadata',
name: 'metadata',
type: 'json',
optional: true,
additionalParams: true
}
]
}
@@ -40,16 +47,34 @@ class Notion_DocumentLoaders implements INode {
async init(nodeData: INodeData): Promise<any> {
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const notionFolder = nodeData.inputs?.notionFolder as string
const metadata = nodeData.inputs?.metadata
const loader = new NotionLoader(notionFolder)
let docs = []
if (textSplitter) {
const docs = await loader.loadAndSplit(textSplitter)
return docs
docs = await loader.loadAndSplit(textSplitter)
} else {
const docs = await loader.load()
return docs
docs = await loader.load()
}
if (metadata) {
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
let finaldocs = []
for (const doc of docs) {
const newdoc = {
...doc,
metadata: {
...doc.metadata,
...parsedMetadata
}
}
finaldocs.push(newdoc)
}
return finaldocs
}
return docs
}
}
@@ -49,6 +49,13 @@ class Pdf_DocumentLoaders implements INode {
}
],
default: 'perPage'
},
{
label: 'Metadata',
name: 'metadata',
type: 'json',
optional: true,
additionalParams: true
}
]
}
@@ -57,30 +64,45 @@ class Pdf_DocumentLoaders implements INode {
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const pdfFileBase64 = nodeData.inputs?.pdfFile as string
const usage = nodeData.inputs?.usage as string
const metadata = nodeData.inputs?.metadata
const blob = new Blob(getBlob(pdfFileBase64))
let docs = []
if (usage === 'perFile') {
// @ts-ignore
const loader = new PDFLoader(blob, { splitPages: false, pdfjs: () => import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js') })
if (textSplitter) {
const docs = await loader.loadAndSplit(textSplitter)
return docs
docs = await loader.loadAndSplit(textSplitter)
} else {
const docs = await loader.load()
return docs
docs = await loader.load()
}
} else {
// @ts-ignore
const loader = new PDFLoader(blob, { pdfjs: () => import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js') })
if (textSplitter) {
const docs = await loader.loadAndSplit(textSplitter)
return docs
docs = await loader.loadAndSplit(textSplitter)
} else {
const docs = await loader.load()
return docs
docs = await loader.load()
}
}
if (metadata) {
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
let finaldocs = []
for (const doc of docs) {
const newdoc = {
...doc,
metadata: {
...doc.metadata,
...parsedMetadata
}
}
finaldocs.push(newdoc)
}
return finaldocs
}
return docs
}
}
@@ -33,6 +33,13 @@ class Text_DocumentLoaders implements INode {
name: 'textSplitter',
type: 'TextSplitter',
optional: true
},
{
label: 'Metadata',
name: 'metadata',
type: 'json',
optional: true,
additionalParams: true
}
]
}
@@ -40,17 +47,34 @@ class Text_DocumentLoaders implements INode {
async init(nodeData: INodeData): Promise<any> {
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const txtFileBase64 = nodeData.inputs?.txtFile as string
const metadata = nodeData.inputs?.metadata
const blob = new Blob(getBlob(txtFileBase64))
const loader = new TextLoader(blob)
let docs = []
if (textSplitter) {
const docs = await loader.loadAndSplit(textSplitter)
return docs
docs = await loader.loadAndSplit(textSplitter)
} else {
const docs = await loader.load()
return docs
docs = await loader.load()
}
if (metadata) {
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
let finaldocs = []
for (const doc of docs) {
const newdoc = {
...doc,
metadata: {
...doc.metadata,
...parsedMetadata
}
}
finaldocs.push(newdoc)
}
return finaldocs
}
return docs
}
}
@@ -50,6 +50,13 @@ class Pinecone_Existing_VectorStores implements INode {
type: 'string',
placeholder: 'my-first-namespace',
optional: true
},
{
label: 'Pinecone Metadata Filter',
name: 'pineconeMetadataFilter',
type: 'json',
optional: true,
additionalParams: true
}
]
this.outputs = [
@@ -71,6 +78,8 @@ class Pinecone_Existing_VectorStores implements INode {
const pineconeEnv = nodeData.inputs?.pineconeEnv as string
const index = nodeData.inputs?.pineconeIndex as string
const pineconeNamespace = nodeData.inputs?.pineconeNamespace as string
const pineconeMetadataFilter = nodeData.inputs?.pineconeMetadataFilter
const embeddings = nodeData.inputs?.embeddings as Embeddings
const output = nodeData.outputs?.output as string
@@ -87,6 +96,10 @@ class Pinecone_Existing_VectorStores implements INode {
}
if (pineconeNamespace) obj.namespace = pineconeNamespace
if (pineconeMetadataFilter) {
const metadatafilter = typeof pineconeMetadataFilter === 'object' ? pineconeMetadataFilter : JSON.parse(pineconeMetadataFilter)
obj.filter = metadatafilter
}
const vectorStore = await PineconeStore.fromExistingIndex(embeddings, obj)
@@ -1,7 +1,7 @@
import { INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
import { Embeddings } from 'langchain/embeddings/base'
import { getBaseClasses } from '../../../src/utils'
import { SupabaseVectorStore } from 'langchain/vectorstores/supabase'
import { SupabaseLibArgs, SupabaseVectorStore } from 'langchain/vectorstores/supabase'
import { createClient } from '@supabase/supabase-js'
class Supabase_Existing_VectorStores implements INode {
@@ -48,6 +48,13 @@ class Supabase_Existing_VectorStores implements INode {
label: 'Query Name',
name: 'queryName',
type: 'string'
},
{
label: 'Supabase Metadata Filter',
name: 'supabaseMetadataFilter',
type: 'json',
optional: true,
additionalParams: true
}
]
this.outputs = [
@@ -70,15 +77,23 @@ class Supabase_Existing_VectorStores implements INode {
const tableName = nodeData.inputs?.tableName as string
const queryName = nodeData.inputs?.queryName as string
const embeddings = nodeData.inputs?.embeddings as Embeddings
const supabaseMetadataFilter = nodeData.inputs?.supabaseMetadataFilter
const output = nodeData.outputs?.output as string
const client = createClient(supabaseProjUrl, supabaseApiKey)
const vectorStore = await SupabaseVectorStore.fromExistingIndex(embeddings, {
const obj: SupabaseLibArgs = {
client,
tableName: tableName,
queryName: queryName
})
tableName,
queryName
}
if (supabaseMetadataFilter) {
const metadatafilter = typeof supabaseMetadataFilter === 'object' ? supabaseMetadataFilter : JSON.parse(supabaseMetadataFilter)
obj.filter = metadatafilter
}
const vectorStore = await SupabaseVectorStore.fromExistingIndex(embeddings, obj)
if (output === 'retriever') {
const retriever = vectorStore.asRetriever()