Merge branch 'main' into main
@@ -0,0 +1,200 @@
|
||||
import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import { TextSplitter } from 'langchain/text_splitter'
|
||||
import { BaseDocumentLoader } from 'langchain/document_loaders/base'
|
||||
import { Document } from 'langchain/document'
|
||||
import axios, { AxiosRequestConfig } from 'axios'
|
||||
|
||||
class API_DocumentLoaders implements INode {
|
||||
label: string
|
||||
name: string
|
||||
version: number
|
||||
description: string
|
||||
type: string
|
||||
icon: string
|
||||
category: string
|
||||
baseClasses: string[]
|
||||
inputs?: INodeParams[]
|
||||
|
||||
constructor() {
|
||||
this.label = 'API Loader'
|
||||
this.name = 'apiLoader'
|
||||
this.version = 1.0
|
||||
this.type = 'Document'
|
||||
this.icon = 'api-loader.png'
|
||||
this.category = 'Document Loaders'
|
||||
this.description = `Load data from an API`
|
||||
this.baseClasses = [this.type]
|
||||
this.inputs = [
|
||||
{
|
||||
label: 'Text Splitter',
|
||||
name: 'textSplitter',
|
||||
type: 'TextSplitter',
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
label: 'Method',
|
||||
name: 'method',
|
||||
type: 'options',
|
||||
options: [
|
||||
{
|
||||
label: 'GET',
|
||||
name: 'GET'
|
||||
},
|
||||
{
|
||||
label: 'POST',
|
||||
name: 'POST'
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
label: 'URL',
|
||||
name: 'url',
|
||||
type: 'string'
|
||||
},
|
||||
{
|
||||
label: 'Headers',
|
||||
name: 'headers',
|
||||
type: 'json',
|
||||
additionalParams: true,
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
label: 'Body',
|
||||
name: 'body',
|
||||
type: 'json',
|
||||
description:
|
||||
'JSON body for the POST request. If not specified, agent will try to figure out itself from AIPlugin if provided',
|
||||
additionalParams: true,
|
||||
optional: true
|
||||
}
|
||||
]
|
||||
}
|
||||
async init(nodeData: INodeData): Promise<any> {
|
||||
const headers = nodeData.inputs?.headers as string
|
||||
const url = nodeData.inputs?.url as string
|
||||
const body = nodeData.inputs?.body as string
|
||||
const method = nodeData.inputs?.method as string
|
||||
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
|
||||
const metadata = nodeData.inputs?.metadata
|
||||
|
||||
const options: ApiLoaderParams = {
|
||||
url,
|
||||
method
|
||||
}
|
||||
|
||||
if (headers) {
|
||||
const parsedHeaders = typeof headers === 'object' ? headers : JSON.parse(headers)
|
||||
options.headers = parsedHeaders
|
||||
}
|
||||
|
||||
if (body) {
|
||||
const parsedBody = typeof body === 'object' ? body : JSON.parse(body)
|
||||
options.body = parsedBody
|
||||
}
|
||||
|
||||
const loader = new ApiLoader(options)
|
||||
|
||||
let docs = []
|
||||
|
||||
if (textSplitter) {
|
||||
docs = await loader.loadAndSplit(textSplitter)
|
||||
} else {
|
||||
docs = await loader.load()
|
||||
}
|
||||
|
||||
if (metadata) {
|
||||
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
|
||||
let finaldocs = []
|
||||
for (const doc of docs) {
|
||||
const newdoc = {
|
||||
...doc,
|
||||
metadata: {
|
||||
...doc.metadata,
|
||||
...parsedMetadata
|
||||
}
|
||||
}
|
||||
finaldocs.push(newdoc)
|
||||
}
|
||||
return finaldocs
|
||||
}
|
||||
|
||||
return docs
|
||||
}
|
||||
}
|
||||
|
||||
interface ApiLoaderParams {
|
||||
url: string
|
||||
method: string
|
||||
headers?: ICommonObject
|
||||
body?: ICommonObject
|
||||
}
|
||||
|
||||
class ApiLoader extends BaseDocumentLoader {
|
||||
public readonly url: string
|
||||
|
||||
public readonly headers?: ICommonObject
|
||||
|
||||
public readonly body?: ICommonObject
|
||||
|
||||
public readonly method: string
|
||||
|
||||
constructor({ url, headers, body, method }: ApiLoaderParams) {
|
||||
super()
|
||||
this.url = url
|
||||
this.headers = headers
|
||||
this.body = body
|
||||
this.method = method
|
||||
}
|
||||
|
||||
public async load(): Promise<Document[]> {
|
||||
if (this.method === 'POST') {
|
||||
return this.executePostRequest(this.url, this.headers, this.body)
|
||||
} else {
|
||||
return this.executeGetRequest(this.url, this.headers)
|
||||
}
|
||||
}
|
||||
|
||||
protected async executeGetRequest(url: string, headers?: ICommonObject): Promise<Document[]> {
|
||||
try {
|
||||
const config: AxiosRequestConfig = {}
|
||||
if (headers) {
|
||||
config.headers = headers
|
||||
}
|
||||
const response = await axios.get(url, config)
|
||||
const responseJsonString = JSON.stringify(response.data, null, 2)
|
||||
const doc = new Document({
|
||||
pageContent: responseJsonString,
|
||||
metadata: {
|
||||
url
|
||||
}
|
||||
})
|
||||
return [doc]
|
||||
} catch (error) {
|
||||
throw new Error(`Failed to fetch ${url}: ${error}`)
|
||||
}
|
||||
}
|
||||
|
||||
protected async executePostRequest(url: string, headers?: ICommonObject, body?: ICommonObject): Promise<Document[]> {
|
||||
try {
|
||||
const config: AxiosRequestConfig = {}
|
||||
if (headers) {
|
||||
config.headers = headers
|
||||
}
|
||||
const response = await axios.post(url, body ?? {}, config)
|
||||
const responseJsonString = JSON.stringify(response.data, null, 2)
|
||||
const doc = new Document({
|
||||
pageContent: responseJsonString,
|
||||
metadata: {
|
||||
url
|
||||
}
|
||||
})
|
||||
return [doc]
|
||||
} catch (error) {
|
||||
throw new Error(`Failed to post ${url}: ${error}`)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
nodeClass: API_DocumentLoaders
|
||||
}
|
||||
|
After Width: | Height: | Size: 1.4 KiB |
@@ -0,0 +1,230 @@
|
||||
import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import { TextSplitter } from 'langchain/text_splitter'
|
||||
import { BaseDocumentLoader } from 'langchain/document_loaders/base'
|
||||
import { Document } from 'langchain/document'
|
||||
import axios from 'axios'
|
||||
import { getCredentialData, getCredentialParam } from '../../../src/utils'
|
||||
|
||||
class Airtable_DocumentLoaders implements INode {
|
||||
label: string
|
||||
name: string
|
||||
version: number
|
||||
description: string
|
||||
type: string
|
||||
icon: string
|
||||
category: string
|
||||
baseClasses: string[]
|
||||
credential: INodeParams
|
||||
inputs?: INodeParams[]
|
||||
|
||||
constructor() {
|
||||
this.label = 'Airtable'
|
||||
this.name = 'airtable'
|
||||
this.version = 1.0
|
||||
this.type = 'Document'
|
||||
this.icon = 'airtable.svg'
|
||||
this.category = 'Document Loaders'
|
||||
this.description = `Load data from Airtable table`
|
||||
this.baseClasses = [this.type]
|
||||
this.credential = {
|
||||
label: 'Connect Credential',
|
||||
name: 'credential',
|
||||
type: 'credential',
|
||||
credentialNames: ['airtableApi']
|
||||
}
|
||||
this.inputs = [
|
||||
{
|
||||
label: 'Text Splitter',
|
||||
name: 'textSplitter',
|
||||
type: 'TextSplitter',
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
label: 'Base Id',
|
||||
name: 'baseId',
|
||||
type: 'string',
|
||||
placeholder: 'app11RobdGoX0YNsC',
|
||||
description:
|
||||
'If your table URL looks like: https://airtable.com/app11RobdGoX0YNsC/tblJdmvbrgizbYICO/viw9UrP77Id0CE4ee, app11RovdGoX0YNsC is the base id'
|
||||
},
|
||||
{
|
||||
label: 'Table Id',
|
||||
name: 'tableId',
|
||||
type: 'string',
|
||||
placeholder: 'tblJdmvbrgizbYICO',
|
||||
description:
|
||||
'If your table URL looks like: https://airtable.com/app11RobdGoX0YNsC/tblJdmvbrgizbYICO/viw9UrP77Id0CE4ee, tblJdmvbrgizbYICO is the table id'
|
||||
},
|
||||
{
|
||||
label: 'Return All',
|
||||
name: 'returnAll',
|
||||
type: 'boolean',
|
||||
default: true,
|
||||
additionalParams: true,
|
||||
description: 'If all results should be returned or only up to a given limit'
|
||||
},
|
||||
{
|
||||
label: 'Limit',
|
||||
name: 'limit',
|
||||
type: 'number',
|
||||
default: 100,
|
||||
additionalParams: true,
|
||||
description: 'Number of results to return'
|
||||
},
|
||||
{
|
||||
label: 'Metadata',
|
||||
name: 'metadata',
|
||||
type: 'json',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
}
|
||||
]
|
||||
}
|
||||
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
|
||||
const baseId = nodeData.inputs?.baseId as string
|
||||
const tableId = nodeData.inputs?.tableId as string
|
||||
const returnAll = nodeData.inputs?.returnAll as boolean
|
||||
const limit = nodeData.inputs?.limit as string
|
||||
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
|
||||
const metadata = nodeData.inputs?.metadata
|
||||
|
||||
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
|
||||
const accessToken = getCredentialParam('accessToken', credentialData, nodeData)
|
||||
|
||||
const airtableOptions: AirtableLoaderParams = {
|
||||
baseId,
|
||||
tableId,
|
||||
returnAll,
|
||||
accessToken,
|
||||
limit: limit ? parseInt(limit, 10) : 100
|
||||
}
|
||||
|
||||
const loader = new AirtableLoader(airtableOptions)
|
||||
|
||||
let docs = []
|
||||
|
||||
if (textSplitter) {
|
||||
docs = await loader.loadAndSplit(textSplitter)
|
||||
} else {
|
||||
docs = await loader.load()
|
||||
}
|
||||
|
||||
if (metadata) {
|
||||
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
|
||||
let finaldocs = []
|
||||
for (const doc of docs) {
|
||||
const newdoc = {
|
||||
...doc,
|
||||
metadata: {
|
||||
...doc.metadata,
|
||||
...parsedMetadata
|
||||
}
|
||||
}
|
||||
finaldocs.push(newdoc)
|
||||
}
|
||||
return finaldocs
|
||||
}
|
||||
|
||||
return docs
|
||||
}
|
||||
}
|
||||
|
||||
interface AirtableLoaderParams {
|
||||
baseId: string
|
||||
tableId: string
|
||||
accessToken: string
|
||||
limit?: number
|
||||
returnAll?: boolean
|
||||
}
|
||||
|
||||
interface AirtableLoaderResponse {
|
||||
records: AirtableLoaderPage[]
|
||||
offset?: string
|
||||
}
|
||||
|
||||
interface AirtableLoaderPage {
|
||||
id: string
|
||||
createdTime: string
|
||||
fields: ICommonObject
|
||||
}
|
||||
|
||||
class AirtableLoader extends BaseDocumentLoader {
|
||||
public readonly baseId: string
|
||||
|
||||
public readonly tableId: string
|
||||
|
||||
public readonly accessToken: string
|
||||
|
||||
public readonly limit: number
|
||||
|
||||
public readonly returnAll: boolean
|
||||
|
||||
constructor({ baseId, tableId, accessToken, limit = 100, returnAll = false }: AirtableLoaderParams) {
|
||||
super()
|
||||
this.baseId = baseId
|
||||
this.tableId = tableId
|
||||
this.accessToken = accessToken
|
||||
this.limit = limit
|
||||
this.returnAll = returnAll
|
||||
}
|
||||
|
||||
public async load(): Promise<Document[]> {
|
||||
if (this.returnAll) {
|
||||
return this.loadAll()
|
||||
}
|
||||
return this.loadLimit()
|
||||
}
|
||||
|
||||
protected async fetchAirtableData(url: string, params: ICommonObject): Promise<AirtableLoaderResponse> {
|
||||
try {
|
||||
const headers = {
|
||||
Authorization: `Bearer ${this.accessToken}`,
|
||||
'Content-Type': 'application/json',
|
||||
Accept: 'application/json'
|
||||
}
|
||||
const response = await axios.get(url, { params, headers })
|
||||
return response.data
|
||||
} catch (error) {
|
||||
throw new Error(`Failed to fetch ${url} from Airtable: ${error}`)
|
||||
}
|
||||
}
|
||||
|
||||
private createDocumentFromPage(page: AirtableLoaderPage): Document {
|
||||
// Generate the URL
|
||||
const pageUrl = `https://api.airtable.com/v0/${this.baseId}/${this.tableId}/${page.id}`
|
||||
|
||||
// Return a langchain document
|
||||
return new Document({
|
||||
pageContent: JSON.stringify(page.fields, null, 2),
|
||||
metadata: {
|
||||
url: pageUrl
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
private async loadLimit(): Promise<Document[]> {
|
||||
const params = { maxRecords: this.limit }
|
||||
const data = await this.fetchAirtableData(`https://api.airtable.com/v0/${this.baseId}/${this.tableId}`, params)
|
||||
if (data.records.length === 0) {
|
||||
return []
|
||||
}
|
||||
return data.records.map((page) => this.createDocumentFromPage(page))
|
||||
}
|
||||
|
||||
private async loadAll(): Promise<Document[]> {
|
||||
const params: ICommonObject = { pageSize: 100 }
|
||||
let data: AirtableLoaderResponse
|
||||
let returnPages: AirtableLoaderPage[] = []
|
||||
|
||||
do {
|
||||
data = await this.fetchAirtableData(`https://api.airtable.com/v0/${this.baseId}/${this.tableId}`, params)
|
||||
returnPages.push.apply(returnPages, data.records)
|
||||
params.offset = data.offset
|
||||
} while (data.offset !== undefined)
|
||||
return returnPages.map((page) => this.createDocumentFromPage(page))
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
nodeClass: Airtable_DocumentLoaders
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<svg width="256px" height="215px" viewBox="0 0 256 215" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" preserveAspectRatio="xMidYMid">
|
||||
<g>
|
||||
<path d="M114.25873,2.70101695 L18.8604023,42.1756384 C13.5552723,44.3711638 13.6102328,51.9065311 18.9486282,54.0225085 L114.746142,92.0117514 C123.163769,95.3498757 132.537419,95.3498757 140.9536,92.0117514 L236.75256,54.0225085 C242.08951,51.9065311 242.145916,44.3711638 236.83934,42.1756384 L141.442459,2.70101695 C132.738459,-0.900338983 122.961284,-0.900338983 114.25873,2.70101695" fill="#FFBF00"></path>
|
||||
<path d="M136.349071,112.756863 L136.349071,207.659101 C136.349071,212.173089 140.900664,215.263892 145.096461,213.600615 L251.844122,172.166219 C254.281184,171.200072 255.879376,168.845451 255.879376,166.224705 L255.879376,71.3224678 C255.879376,66.8084791 251.327783,63.7176768 247.131986,65.3809537 L140.384325,106.815349 C137.94871,107.781496 136.349071,110.136118 136.349071,112.756863" fill="#26B5F8"></path>
|
||||
<path d="M111.422771,117.65355 L79.742409,132.949912 L76.5257763,134.504714 L9.65047684,166.548104 C5.4112904,168.593211 0.000578531073,165.503855 0.000578531073,160.794612 L0.000578531073,71.7210757 C0.000578531073,70.0173017 0.874160452,68.5463864 2.04568588,67.4384994 C2.53454463,66.9481944 3.08848814,66.5446689 3.66412655,66.2250305 C5.26231864,65.2661153 7.54173107,65.0101153 9.47981017,65.7766689 L110.890522,105.957098 C116.045234,108.002206 116.450206,115.225166 111.422771,117.65355" fill="#ED3049"></path>
|
||||
<path d="M111.422771,117.65355 L79.742409,132.949912 L2.04568588,67.4384994 C2.53454463,66.9481944 3.08848814,66.5446689 3.66412655,66.2250305 C5.26231864,65.2661153 7.54173107,65.0101153 9.47981017,65.7766689 L110.890522,105.957098 C116.045234,108.002206 116.450206,115.225166 111.422771,117.65355" fill-opacity="0.25" fill="#000000"></path>
|
||||
</g>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 1.9 KiB |
@@ -2,11 +2,12 @@ import { INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import { TextSplitter } from 'langchain/text_splitter'
|
||||
import { CheerioWebBaseLoader } from 'langchain/document_loaders/web/cheerio'
|
||||
import { test } from 'linkifyjs'
|
||||
import { getAvailableURLs } from '../../../src'
|
||||
import { webCrawl, xmlScrape } from '../../../src'
|
||||
|
||||
class Cheerio_DocumentLoaders implements INode {
|
||||
label: string
|
||||
name: string
|
||||
version: number
|
||||
description: string
|
||||
type: string
|
||||
icon: string
|
||||
@@ -17,6 +18,7 @@ class Cheerio_DocumentLoaders implements INode {
|
||||
constructor() {
|
||||
this.label = 'Cheerio Web Scraper'
|
||||
this.name = 'cheerioWebScraper'
|
||||
this.version = 1.0
|
||||
this.type = 'Document'
|
||||
this.icon = 'cheerio.svg'
|
||||
this.category = 'Document Loaders'
|
||||
@@ -35,19 +37,34 @@ class Cheerio_DocumentLoaders implements INode {
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
label: 'Web Scrap for Relative Links',
|
||||
name: 'webScrap',
|
||||
type: 'boolean',
|
||||
label: 'Get Relative Links Method',
|
||||
name: 'relativeLinksMethod',
|
||||
type: 'options',
|
||||
description: 'Select a method to retrieve relative links',
|
||||
options: [
|
||||
{
|
||||
label: 'Web Crawl',
|
||||
name: 'webCrawl',
|
||||
description: 'Crawl relative links from HTML URL'
|
||||
},
|
||||
{
|
||||
label: 'Scrape XML Sitemap',
|
||||
name: 'scrapeXMLSitemap',
|
||||
description: 'Scrape relative links from XML sitemap URL'
|
||||
}
|
||||
],
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
},
|
||||
{
|
||||
label: 'Web Scrap Links Limit',
|
||||
label: 'Get Relative Links Limit',
|
||||
name: 'limit',
|
||||
type: 'number',
|
||||
default: 10,
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
additionalParams: true,
|
||||
description:
|
||||
'Only used when "Get Relative Links Method" is selected. Set 0 to retrieve all relative links, default limit is 10.',
|
||||
warning: `Retreiving all links might take long time, and all links will be upserted again if the flow's state changed (eg: different URL, chunk size, etc)`
|
||||
},
|
||||
{
|
||||
label: 'Metadata',
|
||||
@@ -62,7 +79,7 @@ class Cheerio_DocumentLoaders implements INode {
|
||||
async init(nodeData: INodeData): Promise<any> {
|
||||
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
|
||||
const metadata = nodeData.inputs?.metadata
|
||||
const webScrap = nodeData.inputs?.webScrap as boolean
|
||||
const relativeLinksMethod = nodeData.inputs?.relativeLinksMethod as string
|
||||
let limit = nodeData.inputs?.limit as string
|
||||
|
||||
let url = nodeData.inputs?.url as string
|
||||
@@ -71,25 +88,34 @@ class Cheerio_DocumentLoaders implements INode {
|
||||
throw new Error('Invalid URL')
|
||||
}
|
||||
|
||||
const cheerioLoader = async (url: string): Promise<any> => {
|
||||
let docs = []
|
||||
const loader = new CheerioWebBaseLoader(url)
|
||||
if (textSplitter) {
|
||||
docs = await loader.loadAndSplit(textSplitter)
|
||||
} else {
|
||||
docs = await loader.load()
|
||||
async function cheerioLoader(url: string): Promise<any> {
|
||||
try {
|
||||
let docs = []
|
||||
const loader = new CheerioWebBaseLoader(url)
|
||||
if (textSplitter) {
|
||||
docs = await loader.loadAndSplit(textSplitter)
|
||||
} else {
|
||||
docs = await loader.load()
|
||||
}
|
||||
return docs
|
||||
} catch (err) {
|
||||
if (process.env.DEBUG === 'true') console.error(`error in CheerioWebBaseLoader: ${err.message}, on page: ${url}`)
|
||||
}
|
||||
return docs
|
||||
}
|
||||
|
||||
let availableUrls: string[]
|
||||
let docs = []
|
||||
if (webScrap) {
|
||||
if (relativeLinksMethod) {
|
||||
if (process.env.DEBUG === 'true') console.info(`Start ${relativeLinksMethod}`)
|
||||
if (!limit) limit = '10'
|
||||
availableUrls = await getAvailableURLs(url, parseInt(limit))
|
||||
for (let i = 0; i < availableUrls.length; i++) {
|
||||
docs.push(...(await cheerioLoader(availableUrls[i])))
|
||||
else if (parseInt(limit) < 0) throw new Error('Limit cannot be less than 0')
|
||||
const pages: string[] =
|
||||
relativeLinksMethod === 'webCrawl' ? await webCrawl(url, parseInt(limit)) : await xmlScrape(url, parseInt(limit))
|
||||
if (process.env.DEBUG === 'true') console.info(`pages: ${JSON.stringify(pages)}, length: ${pages.length}`)
|
||||
if (!pages || pages.length === 0) throw new Error('No relative links found')
|
||||
for (const page of pages) {
|
||||
docs.push(...(await cheerioLoader(page)))
|
||||
}
|
||||
if (process.env.DEBUG === 'true') console.info(`Finish ${relativeLinksMethod}`)
|
||||
} else {
|
||||
docs = await cheerioLoader(url)
|
||||
}
|
||||
|
||||
@@ -1,25 +1,35 @@
|
||||
import { INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import { TextSplitter } from 'langchain/text_splitter'
|
||||
import { ConfluencePagesLoader, ConfluencePagesLoaderParams } from 'langchain/document_loaders/web/confluence'
|
||||
import { getCredentialData, getCredentialParam } from '../../../src'
|
||||
|
||||
class Confluence_DocumentLoaders implements INode {
|
||||
label: string
|
||||
name: string
|
||||
version: number
|
||||
description: string
|
||||
type: string
|
||||
icon: string
|
||||
category: string
|
||||
baseClasses: string[]
|
||||
credential: INodeParams
|
||||
inputs: INodeParams[]
|
||||
|
||||
constructor() {
|
||||
this.label = 'Confluence'
|
||||
this.name = 'confluence'
|
||||
this.version = 1.0
|
||||
this.type = 'Document'
|
||||
this.icon = 'confluence.png'
|
||||
this.category = 'Document Loaders'
|
||||
this.description = `Load data from a Confluence Document`
|
||||
this.baseClasses = [this.type]
|
||||
this.credential = {
|
||||
label: 'Connect Credential',
|
||||
name: 'credential',
|
||||
type: 'credential',
|
||||
credentialNames: ['confluenceApi']
|
||||
}
|
||||
this.inputs = [
|
||||
{
|
||||
label: 'Text Splitter',
|
||||
@@ -27,18 +37,6 @@ class Confluence_DocumentLoaders implements INode {
|
||||
type: 'TextSplitter',
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
label: 'Username',
|
||||
name: 'username',
|
||||
type: 'string',
|
||||
placeholder: '<CONFLUENCE_USERNAME>'
|
||||
},
|
||||
{
|
||||
label: 'Access Token',
|
||||
name: 'accessToken',
|
||||
type: 'password',
|
||||
placeholder: '<CONFLUENCE_ACCESS_TOKEN>'
|
||||
},
|
||||
{
|
||||
label: 'Base URL',
|
||||
name: 'baseUrl',
|
||||
@@ -49,7 +47,9 @@ class Confluence_DocumentLoaders implements INode {
|
||||
label: 'Space Key',
|
||||
name: 'spaceKey',
|
||||
type: 'string',
|
||||
placeholder: '~EXAMPLE362906de5d343d49dcdbae5dEXAMPLE'
|
||||
placeholder: '~EXAMPLE362906de5d343d49dcdbae5dEXAMPLE',
|
||||
description:
|
||||
'Refer to <a target="_blank" href="https://community.atlassian.com/t5/Confluence-questions/How-to-find-the-key-for-a-space/qaq-p/864760">official guide</a> on how to get Confluence Space Key'
|
||||
},
|
||||
{
|
||||
label: 'Limit',
|
||||
@@ -68,16 +68,18 @@ class Confluence_DocumentLoaders implements INode {
|
||||
]
|
||||
}
|
||||
|
||||
async init(nodeData: INodeData): Promise<any> {
|
||||
const username = nodeData.inputs?.username as string
|
||||
const accessToken = nodeData.inputs?.accessToken as string
|
||||
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
|
||||
const spaceKey = nodeData.inputs?.spaceKey as string
|
||||
const baseUrl = nodeData.inputs?.baseUrl as string
|
||||
const limit = nodeData.inputs?.limit as number
|
||||
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
|
||||
const metadata = nodeData.inputs?.metadata
|
||||
|
||||
const options: ConfluencePagesLoaderParams = {
|
||||
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
|
||||
const accessToken = getCredentialParam('accessToken', credentialData, nodeData)
|
||||
const username = getCredentialParam('username', credentialData, nodeData)
|
||||
|
||||
const confluenceOptions: ConfluencePagesLoaderParams = {
|
||||
username,
|
||||
accessToken,
|
||||
baseUrl,
|
||||
@@ -85,7 +87,7 @@ class Confluence_DocumentLoaders implements INode {
|
||||
limit
|
||||
}
|
||||
|
||||
const loader = new ConfluencePagesLoader(options)
|
||||
const loader = new ConfluencePagesLoader(confluenceOptions)
|
||||
|
||||
let docs = []
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ import { CSVLoader } from 'langchain/document_loaders/fs/csv'
|
||||
class Csv_DocumentLoaders implements INode {
|
||||
label: string
|
||||
name: string
|
||||
version: number
|
||||
description: string
|
||||
type: string
|
||||
icon: string
|
||||
@@ -15,6 +16,7 @@ class Csv_DocumentLoaders implements INode {
|
||||
constructor() {
|
||||
this.label = 'Csv File'
|
||||
this.name = 'csvFile'
|
||||
this.version = 1.0
|
||||
this.type = 'Document'
|
||||
this.icon = 'Csv.png'
|
||||
this.category = 'Document Loaders'
|
||||
|
||||
@@ -5,6 +5,7 @@ import { DocxLoader } from 'langchain/document_loaders/fs/docx'
|
||||
class Docx_DocumentLoaders implements INode {
|
||||
label: string
|
||||
name: string
|
||||
version: number
|
||||
description: string
|
||||
type: string
|
||||
icon: string
|
||||
@@ -15,6 +16,7 @@ class Docx_DocumentLoaders implements INode {
|
||||
constructor() {
|
||||
this.label = 'Docx File'
|
||||
this.name = 'docxFile'
|
||||
this.version = 1.0
|
||||
this.type = 'Document'
|
||||
this.icon = 'Docx.png'
|
||||
this.category = 'Document Loaders'
|
||||
|
||||
@@ -1,42 +1,50 @@
|
||||
import { INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import { getCredentialData, getCredentialParam } from '../../../src'
|
||||
import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import { FigmaFileLoader, FigmaLoaderParams } from 'langchain/document_loaders/web/figma'
|
||||
|
||||
class Figma_DocumentLoaders implements INode {
|
||||
label: string
|
||||
name: string
|
||||
version: number
|
||||
description: string
|
||||
type: string
|
||||
icon: string
|
||||
category: string
|
||||
baseClasses: string[]
|
||||
credential: INodeParams
|
||||
inputs: INodeParams[]
|
||||
|
||||
constructor() {
|
||||
this.label = 'Figma'
|
||||
this.name = 'figma'
|
||||
this.version = 1.0
|
||||
this.type = 'Document'
|
||||
this.icon = 'figma.png'
|
||||
this.icon = 'figma.svg'
|
||||
this.category = 'Document Loaders'
|
||||
this.description = 'Load data from a Figma file'
|
||||
this.baseClasses = [this.type]
|
||||
this.credential = {
|
||||
label: 'Connect Credential',
|
||||
name: 'credential',
|
||||
type: 'credential',
|
||||
credentialNames: ['figmaApi']
|
||||
}
|
||||
this.inputs = [
|
||||
{
|
||||
label: 'Access Token',
|
||||
name: 'accessToken',
|
||||
type: 'password',
|
||||
placeholder: '<FIGMA_ACCESS_TOKEN>'
|
||||
},
|
||||
{
|
||||
label: 'File Key',
|
||||
name: 'fileKey',
|
||||
type: 'string',
|
||||
placeholder: 'key'
|
||||
placeholder: 'key',
|
||||
description:
|
||||
'The file key can be read from any Figma file URL: https://www.figma.com/file/:key/:title. For example, in https://www.figma.com/file/12345/Website, the file key is 12345'
|
||||
},
|
||||
{
|
||||
label: 'Node IDs',
|
||||
name: 'nodeIds',
|
||||
type: 'string',
|
||||
placeholder: '0, 1, 2'
|
||||
placeholder: '0, 1, 2',
|
||||
description:
|
||||
'A list of Node IDs, seperated by comma. Refer to <a target="_blank" href="https://www.figma.com/community/plugin/758276196886757462/Node-Inspector">official guide</a> on how to get Node IDs'
|
||||
},
|
||||
{
|
||||
label: 'Recursive',
|
||||
@@ -60,18 +68,20 @@ class Figma_DocumentLoaders implements INode {
|
||||
]
|
||||
}
|
||||
|
||||
async init(nodeData: INodeData): Promise<any> {
|
||||
const accessToken = nodeData.inputs?.accessToken as string
|
||||
const nodeIds = (nodeData.inputs?.nodeIds as string)?.split(',') || []
|
||||
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
|
||||
const nodeIds = (nodeData.inputs?.nodeIds as string)?.trim().split(',') || []
|
||||
const fileKey = nodeData.inputs?.fileKey as string
|
||||
|
||||
const options: FigmaLoaderParams = {
|
||||
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
|
||||
const accessToken = getCredentialParam('accessToken', credentialData, nodeData)
|
||||
|
||||
const figmaOptions: FigmaLoaderParams = {
|
||||
accessToken,
|
||||
nodeIds,
|
||||
fileKey
|
||||
}
|
||||
|
||||
const loader = new FigmaFileLoader(options)
|
||||
const loader = new FigmaFileLoader(figmaOptions)
|
||||
const docs = await loader.load()
|
||||
|
||||
return docs
|
||||
|
||||
|
Before Width: | Height: | Size: 172 KiB |
@@ -0,0 +1 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 48 48" width="96px" height="96px"><path fill="#e64a19" d="M26,17h-8c-3.866,0-7-3.134-7-7v0c0-3.866,3.134-7,7-7h8V17z"/><path fill="#7c4dff" d="M25,31h-7c-3.866,0-7-3.134-7-7v0c0-3.866,3.134-7,7-7h7V31z"/><path fill="#66bb6a" d="M18,45L18,45c-3.866,0-7-3.134-7-7v0c0-3.866,3.134-7,7-7h7v7C25,41.866,21.866,45,18,45z"/><path fill="#ff7043" d="M32,17h-7V3h7c3.866,0,7,3.134,7,7v0C39,13.866,35.866,17,32,17z"/><circle cx="32" cy="24" r="7" fill="#29b6f6"/></svg>
|
||||
|
After Width: | Height: | Size: 512 B |
@@ -10,6 +10,7 @@ import { DocxLoader } from 'langchain/document_loaders/fs/docx'
|
||||
class Folder_DocumentLoaders implements INode {
|
||||
label: string
|
||||
name: string
|
||||
version: number
|
||||
description: string
|
||||
type: string
|
||||
icon: string
|
||||
@@ -20,6 +21,7 @@ class Folder_DocumentLoaders implements INode {
|
||||
constructor() {
|
||||
this.label = 'Folder with Files'
|
||||
this.name = 'folderFiles'
|
||||
this.version = 1.0
|
||||
this.type = 'Document'
|
||||
this.icon = 'folder.svg'
|
||||
this.category = 'Document Loaders'
|
||||
|
||||
@@ -0,0 +1,84 @@
|
||||
import { INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import { TextSplitter } from 'langchain/text_splitter'
|
||||
import { GitbookLoader } from 'langchain/document_loaders/web/gitbook'
|
||||
|
||||
class Gitbook_DocumentLoaders implements INode {
|
||||
label: string
|
||||
name: string
|
||||
version: number
|
||||
description: string
|
||||
type: string
|
||||
icon: string
|
||||
category: string
|
||||
baseClasses: string[]
|
||||
inputs?: INodeParams[]
|
||||
|
||||
constructor() {
|
||||
this.label = 'GitBook'
|
||||
this.name = 'gitbook'
|
||||
this.version = 1.0
|
||||
this.type = 'Document'
|
||||
this.icon = 'gitbook.svg'
|
||||
this.category = 'Document Loaders'
|
||||
this.description = `Load data from GitBook`
|
||||
this.baseClasses = [this.type]
|
||||
this.inputs = [
|
||||
{
|
||||
label: 'Web Path',
|
||||
name: 'webPath',
|
||||
type: 'string',
|
||||
placeholder: 'https://docs.gitbook.com/product-tour/navigation',
|
||||
description: 'If want to load all paths from the GitBook provide only root path e.g.https://docs.gitbook.com/ '
|
||||
},
|
||||
{
|
||||
label: 'Should Load All Paths',
|
||||
name: 'shouldLoadAllPaths',
|
||||
type: 'boolean',
|
||||
description: 'Load from all paths in a given GitBook',
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
label: 'Text Splitter',
|
||||
name: 'textSplitter',
|
||||
type: 'TextSplitter',
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
label: 'Metadata',
|
||||
name: 'metadata',
|
||||
type: 'json',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
}
|
||||
]
|
||||
}
|
||||
async init(nodeData: INodeData): Promise<any> {
|
||||
const webPath = nodeData.inputs?.webPath as string
|
||||
const shouldLoadAllPaths = nodeData.inputs?.shouldLoadAllPaths as boolean
|
||||
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
|
||||
const metadata = nodeData.inputs?.metadata
|
||||
|
||||
const loader = shouldLoadAllPaths ? new GitbookLoader(webPath, { shouldLoadAllPaths }) : new GitbookLoader(webPath)
|
||||
|
||||
const docs = textSplitter ? await loader.loadAndSplit() : await loader.load()
|
||||
|
||||
if (metadata) {
|
||||
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
|
||||
return docs.map((doc) => {
|
||||
return {
|
||||
...doc,
|
||||
metadata: {
|
||||
...doc.metadata,
|
||||
...parsedMetadata
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
return docs
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
nodeClass: Gitbook_DocumentLoaders
|
||||
}
|
||||
@@ -0,0 +1 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="64" height="64"><switch><g><path d="M28.8 47.4c1 0 1.9.8 1.9 1.9 0 1-.8 1.9-1.9 1.9-1 0-1.9-.8-1.9-1.9 0-1.1.9-1.9 1.9-1.9m29.4-11.6c-1 0-1.9-.8-1.9-1.9 0-1 .8-1.9 1.9-1.9 1 0 1.9.8 1.9 1.9 0 1-.9 1.9-1.9 1.9m0-7.7c-3.2 0-5.8 2.6-5.8 5.8 0 .6.1 1.2.3 1.8L33.6 45.9c-1.1-1.6-2.9-2.5-4.8-2.5-2.2 0-4.2 1.3-5.2 3.2l-17.2-9c-1.8-1-3.2-3.9-3-6.7.1-1.4.6-2.5 1.3-2.9.5-.3 1-.2 1.7.1l.1.1c4.6 2.4 19.5 10.2 20.1 10.5 1 .4 1.5.6 3.2-.2l30.8-16c.5-.2 1-.6 1-1.3 0-.9-.9-1.3-.9-1.3-1.8-.8-4.5-2.1-7.1-3.3C48 14 41.6 11 38.8 9.5c-2.4-1.3-4.4-.2-4.7 0l-.7.3C20.7 16.2 3.9 24.5 2.9 25.1c-1.7 1-2.8 3.1-2.9 5.7-.2 4.1 1.9 8.4 4.9 9.9l18.2 9.4c.4 2.8 2.9 5 5.7 5 3.2 0 5.7-2.5 5.8-5.7l20-10.8c1 .8 2.3 1.2 3.6 1.2 3.2 0 5.8-2.6 5.8-5.8 0-3.3-2.6-5.9-5.8-5.9" fill="#4285fd"/></g></switch></svg>
|
||||
|
After Width: | Height: | Size: 826 B |
@@ -1,25 +1,37 @@
|
||||
import { INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import { TextSplitter } from 'langchain/text_splitter'
|
||||
import { GithubRepoLoader, GithubRepoLoaderParams } from 'langchain/document_loaders/web/github'
|
||||
import { getCredentialData, getCredentialParam } from '../../../src'
|
||||
|
||||
class Github_DocumentLoaders implements INode {
|
||||
label: string
|
||||
name: string
|
||||
version: number
|
||||
description: string
|
||||
type: string
|
||||
icon: string
|
||||
category: string
|
||||
baseClasses: string[]
|
||||
credential: INodeParams
|
||||
inputs: INodeParams[]
|
||||
|
||||
constructor() {
|
||||
this.label = 'Github'
|
||||
this.name = 'github'
|
||||
this.version = 1.0
|
||||
this.type = 'Document'
|
||||
this.icon = 'github.png'
|
||||
this.category = 'Document Loaders'
|
||||
this.description = `Load data from a GitHub repository`
|
||||
this.baseClasses = [this.type]
|
||||
this.credential = {
|
||||
label: 'Connect Credential',
|
||||
name: 'credential',
|
||||
type: 'credential',
|
||||
description: 'Only needed when accessing private repo',
|
||||
optional: true,
|
||||
credentialNames: ['githubApi']
|
||||
}
|
||||
this.inputs = [
|
||||
{
|
||||
label: 'Repo Link',
|
||||
@@ -33,13 +45,6 @@ class Github_DocumentLoaders implements INode {
|
||||
type: 'string',
|
||||
default: 'main'
|
||||
},
|
||||
{
|
||||
label: 'Access Token',
|
||||
name: 'accessToken',
|
||||
type: 'password',
|
||||
placeholder: '<GITHUB_ACCESS_TOKEN>',
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
label: 'Recursive',
|
||||
name: 'recursive',
|
||||
@@ -62,23 +67,25 @@ class Github_DocumentLoaders implements INode {
|
||||
]
|
||||
}
|
||||
|
||||
async init(nodeData: INodeData): Promise<any> {
|
||||
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
|
||||
const repoLink = nodeData.inputs?.repoLink as string
|
||||
const branch = nodeData.inputs?.branch as string
|
||||
const recursive = nodeData.inputs?.recursive as boolean
|
||||
const accessToken = nodeData.inputs?.accessToken as string
|
||||
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
|
||||
const metadata = nodeData.inputs?.metadata
|
||||
|
||||
const options: GithubRepoLoaderParams = {
|
||||
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
|
||||
const accessToken = getCredentialParam('accessToken', credentialData, nodeData)
|
||||
|
||||
const githubOptions: GithubRepoLoaderParams = {
|
||||
branch,
|
||||
recursive,
|
||||
unknown: 'warn'
|
||||
}
|
||||
|
||||
if (accessToken) options.accessToken = accessToken
|
||||
if (accessToken) githubOptions.accessToken = accessToken
|
||||
|
||||
const loader = new GithubRepoLoader(repoLink, options)
|
||||
const loader = new GithubRepoLoader(repoLink, githubOptions)
|
||||
const docs = textSplitter ? await loader.loadAndSplit(textSplitter) : await loader.load()
|
||||
|
||||
if (metadata) {
|
||||
|
||||
@@ -5,6 +5,7 @@ import { JSONLoader } from 'langchain/document_loaders/fs/json'
|
||||
class Json_DocumentLoaders implements INode {
|
||||
label: string
|
||||
name: string
|
||||
version: number
|
||||
description: string
|
||||
type: string
|
||||
icon: string
|
||||
@@ -15,6 +16,7 @@ class Json_DocumentLoaders implements INode {
|
||||
constructor() {
|
||||
this.label = 'Json File'
|
||||
this.name = 'jsonFile'
|
||||
this.version = 1.0
|
||||
this.type = 'Document'
|
||||
this.icon = 'json.svg'
|
||||
this.category = 'Document Loaders'
|
||||
|
||||
@@ -5,6 +5,7 @@ import { JSONLinesLoader } from 'langchain/document_loaders/fs/json'
|
||||
class Jsonlines_DocumentLoaders implements INode {
|
||||
label: string
|
||||
name: string
|
||||
version: number
|
||||
description: string
|
||||
type: string
|
||||
icon: string
|
||||
@@ -15,6 +16,7 @@ class Jsonlines_DocumentLoaders implements INode {
|
||||
constructor() {
|
||||
this.label = 'Json Lines File'
|
||||
this.name = 'jsonlinesFile'
|
||||
this.version = 1.0
|
||||
this.type = 'Document'
|
||||
this.icon = 'jsonlines.svg'
|
||||
this.category = 'Document Loaders'
|
||||
|
||||
@@ -1,25 +1,35 @@
|
||||
import { INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import { TextSplitter } from 'langchain/text_splitter'
|
||||
import { NotionDBLoader, NotionDBLoaderParams } from 'langchain/document_loaders/web/notiondb'
|
||||
import { NotionAPILoader, NotionAPILoaderOptions } from 'langchain/document_loaders/web/notionapi'
|
||||
import { getCredentialData, getCredentialParam } from '../../../src'
|
||||
|
||||
class NotionDB_DocumentLoaders implements INode {
|
||||
label: string
|
||||
name: string
|
||||
version: number
|
||||
description: string
|
||||
type: string
|
||||
icon: string
|
||||
category: string
|
||||
baseClasses: string[]
|
||||
credential: INodeParams
|
||||
inputs: INodeParams[]
|
||||
|
||||
constructor() {
|
||||
this.label = 'Notion Database'
|
||||
this.name = 'notionDB'
|
||||
this.version = 1.0
|
||||
this.type = 'Document'
|
||||
this.icon = 'notion.png'
|
||||
this.category = 'Document Loaders'
|
||||
this.description = 'Load data from Notion Database ID'
|
||||
this.description = 'Load data from Notion Database (each row is a separate document with all properties as metadata)'
|
||||
this.baseClasses = [this.type]
|
||||
this.credential = {
|
||||
label: 'Connect Credential',
|
||||
name: 'credential',
|
||||
type: 'credential',
|
||||
credentialNames: ['notionApi']
|
||||
}
|
||||
this.inputs = [
|
||||
{
|
||||
label: 'Text Splitter',
|
||||
@@ -31,21 +41,7 @@ class NotionDB_DocumentLoaders implements INode {
|
||||
label: 'Notion Database Id',
|
||||
name: 'databaseId',
|
||||
type: 'string',
|
||||
description:
|
||||
'If your URL looks like - https://www.notion.so/<long_hash_1>?v=<long_hash_2>, then <long_hash_1> is the database ID'
|
||||
},
|
||||
{
|
||||
label: 'Notion Integration Token',
|
||||
name: 'notionIntegrationToken',
|
||||
type: 'password',
|
||||
description:
|
||||
'You can find integration token <a target="_blank" href="https://developers.notion.com/docs/create-a-notion-integration#step-1-create-an-integration">here</a>'
|
||||
},
|
||||
{
|
||||
label: 'Page Size Limit',
|
||||
name: 'pageSizeLimit',
|
||||
type: 'number',
|
||||
default: 10
|
||||
description: 'If your URL looks like - https://www.notion.so/abcdefh?v=long_hash_2, then abcdefh is the database ID'
|
||||
},
|
||||
{
|
||||
label: 'Metadata',
|
||||
@@ -57,19 +53,22 @@ class NotionDB_DocumentLoaders implements INode {
|
||||
]
|
||||
}
|
||||
|
||||
async init(nodeData: INodeData): Promise<any> {
|
||||
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
|
||||
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
|
||||
const databaseId = nodeData.inputs?.databaseId as string
|
||||
const notionIntegrationToken = nodeData.inputs?.notionIntegrationToken as string
|
||||
const pageSizeLimit = nodeData.inputs?.pageSizeLimit as string
|
||||
const metadata = nodeData.inputs?.metadata
|
||||
|
||||
const obj: NotionDBLoaderParams = {
|
||||
pageSizeLimit: pageSizeLimit ? parseInt(pageSizeLimit, 10) : 10,
|
||||
databaseId,
|
||||
notionIntegrationToken
|
||||
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
|
||||
const notionIntegrationToken = getCredentialParam('notionIntegrationToken', credentialData, nodeData)
|
||||
|
||||
const obj: NotionAPILoaderOptions = {
|
||||
clientOptions: {
|
||||
auth: notionIntegrationToken
|
||||
},
|
||||
id: databaseId,
|
||||
type: 'database'
|
||||
}
|
||||
const loader = new NotionDBLoader(obj)
|
||||
const loader = new NotionAPILoader(obj)
|
||||
|
||||
let docs = []
|
||||
if (textSplitter) {
|
||||
@@ -5,6 +5,7 @@ import { NotionLoader } from 'langchain/document_loaders/fs/notion'
|
||||
class NotionFolder_DocumentLoaders implements INode {
|
||||
label: string
|
||||
name: string
|
||||
version: number
|
||||
description: string
|
||||
type: string
|
||||
icon: string
|
||||
@@ -15,6 +16,7 @@ class NotionFolder_DocumentLoaders implements INode {
|
||||
constructor() {
|
||||
this.label = 'Notion Folder'
|
||||
this.name = 'notionFolder'
|
||||
this.version = 1.0
|
||||
this.type = 'Document'
|
||||
this.icon = 'notion.png'
|
||||
this.category = 'Document Loaders'
|
||||
@@ -0,0 +1,101 @@
|
||||
import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import { TextSplitter } from 'langchain/text_splitter'
|
||||
import { NotionAPILoader, NotionAPILoaderOptions } from 'langchain/document_loaders/web/notionapi'
|
||||
import { getCredentialData, getCredentialParam } from '../../../src'
|
||||
|
||||
class NotionPage_DocumentLoaders implements INode {
|
||||
label: string
|
||||
name: string
|
||||
version: number
|
||||
description: string
|
||||
type: string
|
||||
icon: string
|
||||
category: string
|
||||
baseClasses: string[]
|
||||
credential: INodeParams
|
||||
inputs: INodeParams[]
|
||||
|
||||
constructor() {
|
||||
this.label = 'Notion Page'
|
||||
this.name = 'notionPage'
|
||||
this.version = 1.0
|
||||
this.type = 'Document'
|
||||
this.icon = 'notion.png'
|
||||
this.category = 'Document Loaders'
|
||||
this.description = 'Load data from Notion Page (including child pages all as separate documents)'
|
||||
this.baseClasses = [this.type]
|
||||
this.credential = {
|
||||
label: 'Connect Credential',
|
||||
name: 'credential',
|
||||
type: 'credential',
|
||||
credentialNames: ['notionApi']
|
||||
}
|
||||
this.inputs = [
|
||||
{
|
||||
label: 'Text Splitter',
|
||||
name: 'textSplitter',
|
||||
type: 'TextSplitter',
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
label: 'Notion Page Id',
|
||||
name: 'pageId',
|
||||
type: 'string',
|
||||
description:
|
||||
'The last The 32 char hex in the url path. For example: https://www.notion.so/skarard/LangChain-Notion-API-b34ca03f219c4420a6046fc4bdfdf7b4, b34ca03f219c4420a6046fc4bdfdf7b4 is the Page ID'
|
||||
},
|
||||
{
|
||||
label: 'Metadata',
|
||||
name: 'metadata',
|
||||
type: 'json',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
|
||||
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
|
||||
const pageId = nodeData.inputs?.pageId as string
|
||||
const metadata = nodeData.inputs?.metadata
|
||||
|
||||
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
|
||||
const notionIntegrationToken = getCredentialParam('notionIntegrationToken', credentialData, nodeData)
|
||||
|
||||
const obj: NotionAPILoaderOptions = {
|
||||
clientOptions: {
|
||||
auth: notionIntegrationToken
|
||||
},
|
||||
id: pageId,
|
||||
type: 'page'
|
||||
}
|
||||
const loader = new NotionAPILoader(obj)
|
||||
|
||||
let docs = []
|
||||
if (textSplitter) {
|
||||
docs = await loader.loadAndSplit(textSplitter)
|
||||
} else {
|
||||
docs = await loader.load()
|
||||
}
|
||||
|
||||
if (metadata) {
|
||||
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
|
||||
let finaldocs = []
|
||||
for (const doc of docs) {
|
||||
const newdoc = {
|
||||
...doc,
|
||||
metadata: {
|
||||
...doc.metadata,
|
||||
...parsedMetadata
|
||||
}
|
||||
}
|
||||
finaldocs.push(newdoc)
|
||||
}
|
||||
return finaldocs
|
||||
}
|
||||
|
||||
return docs
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { nodeClass: NotionPage_DocumentLoaders }
|
||||
|
Before Width: | Height: | Size: 11 KiB After Width: | Height: | Size: 11 KiB |
|
Before Width: | Height: | Size: 11 KiB |
@@ -5,6 +5,7 @@ import { PDFLoader } from 'langchain/document_loaders/fs/pdf'
|
||||
class Pdf_DocumentLoaders implements INode {
|
||||
label: string
|
||||
name: string
|
||||
version: number
|
||||
description: string
|
||||
type: string
|
||||
icon: string
|
||||
@@ -15,6 +16,7 @@ class Pdf_DocumentLoaders implements INode {
|
||||
constructor() {
|
||||
this.label = 'Pdf File'
|
||||
this.name = 'pdfFile'
|
||||
this.version = 1.0
|
||||
this.type = 'Document'
|
||||
this.icon = 'pdf.svg'
|
||||
this.category = 'Document Loaders'
|
||||
|
||||
@@ -2,11 +2,12 @@ import { INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import { TextSplitter } from 'langchain/text_splitter'
|
||||
import { PlaywrightWebBaseLoader } from 'langchain/document_loaders/web/playwright'
|
||||
import { test } from 'linkifyjs'
|
||||
import { getAvailableURLs } from '../../../src'
|
||||
import { webCrawl, xmlScrape } from '../../../src'
|
||||
|
||||
class Playwright_DocumentLoaders implements INode {
|
||||
label: string
|
||||
name: string
|
||||
version: number
|
||||
description: string
|
||||
type: string
|
||||
icon: string
|
||||
@@ -17,6 +18,7 @@ class Playwright_DocumentLoaders implements INode {
|
||||
constructor() {
|
||||
this.label = 'Playwright Web Scraper'
|
||||
this.name = 'playwrightWebScraper'
|
||||
this.version = 1.0
|
||||
this.type = 'Document'
|
||||
this.icon = 'playwright.svg'
|
||||
this.category = 'Document Loaders'
|
||||
@@ -35,19 +37,34 @@ class Playwright_DocumentLoaders implements INode {
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
label: 'Web Scrap for Relative Links',
|
||||
name: 'webScrap',
|
||||
type: 'boolean',
|
||||
label: 'Get Relative Links Method',
|
||||
name: 'relativeLinksMethod',
|
||||
type: 'options',
|
||||
description: 'Select a method to retrieve relative links',
|
||||
options: [
|
||||
{
|
||||
label: 'Web Crawl',
|
||||
name: 'webCrawl',
|
||||
description: 'Crawl relative links from HTML URL'
|
||||
},
|
||||
{
|
||||
label: 'Scrape XML Sitemap',
|
||||
name: 'scrapeXMLSitemap',
|
||||
description: 'Scrape relative links from XML sitemap URL'
|
||||
}
|
||||
],
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
},
|
||||
{
|
||||
label: 'Web Scrap Links Limit',
|
||||
label: 'Get Relative Links Limit',
|
||||
name: 'limit',
|
||||
type: 'number',
|
||||
default: 10,
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
additionalParams: true,
|
||||
description:
|
||||
'Only used when "Get Relative Links Method" is selected. Set 0 to retrieve all relative links, default limit is 10.',
|
||||
warning: `Retreiving all links might take long time, and all links will be upserted again if the flow's state changed (eg: different URL, chunk size, etc)`
|
||||
},
|
||||
{
|
||||
label: 'Metadata',
|
||||
@@ -62,7 +79,7 @@ class Playwright_DocumentLoaders implements INode {
|
||||
async init(nodeData: INodeData): Promise<any> {
|
||||
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
|
||||
const metadata = nodeData.inputs?.metadata
|
||||
const webScrap = nodeData.inputs?.webScrap as boolean
|
||||
const relativeLinksMethod = nodeData.inputs?.relativeLinksMethod as string
|
||||
let limit = nodeData.inputs?.limit as string
|
||||
|
||||
let url = nodeData.inputs?.url as string
|
||||
@@ -71,25 +88,34 @@ class Playwright_DocumentLoaders implements INode {
|
||||
throw new Error('Invalid URL')
|
||||
}
|
||||
|
||||
const playwrightLoader = async (url: string): Promise<any> => {
|
||||
let docs = []
|
||||
const loader = new PlaywrightWebBaseLoader(url)
|
||||
if (textSplitter) {
|
||||
docs = await loader.loadAndSplit(textSplitter)
|
||||
} else {
|
||||
docs = await loader.load()
|
||||
async function playwrightLoader(url: string): Promise<any> {
|
||||
try {
|
||||
let docs = []
|
||||
const loader = new PlaywrightWebBaseLoader(url)
|
||||
if (textSplitter) {
|
||||
docs = await loader.loadAndSplit(textSplitter)
|
||||
} else {
|
||||
docs = await loader.load()
|
||||
}
|
||||
return docs
|
||||
} catch (err) {
|
||||
if (process.env.DEBUG === 'true') console.error(`error in PlaywrightWebBaseLoader: ${err.message}, on page: ${url}`)
|
||||
}
|
||||
return docs
|
||||
}
|
||||
|
||||
let availableUrls: string[]
|
||||
let docs = []
|
||||
if (webScrap) {
|
||||
if (relativeLinksMethod) {
|
||||
if (process.env.DEBUG === 'true') console.info(`Start ${relativeLinksMethod}`)
|
||||
if (!limit) limit = '10'
|
||||
availableUrls = await getAvailableURLs(url, parseInt(limit))
|
||||
for (let i = 0; i < availableUrls.length; i++) {
|
||||
docs.push(...(await playwrightLoader(availableUrls[i])))
|
||||
else if (parseInt(limit) < 0) throw new Error('Limit cannot be less than 0')
|
||||
const pages: string[] =
|
||||
relativeLinksMethod === 'webCrawl' ? await webCrawl(url, parseInt(limit)) : await xmlScrape(url, parseInt(limit))
|
||||
if (process.env.DEBUG === 'true') console.info(`pages: ${JSON.stringify(pages)}, length: ${pages.length}`)
|
||||
if (!pages || pages.length === 0) throw new Error('No relative links found')
|
||||
for (const page of pages) {
|
||||
docs.push(...(await playwrightLoader(page)))
|
||||
}
|
||||
if (process.env.DEBUG === 'true') console.info(`Finish ${relativeLinksMethod}`)
|
||||
} else {
|
||||
docs = await playwrightLoader(url)
|
||||
}
|
||||
|
||||
@@ -2,11 +2,12 @@ import { INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import { TextSplitter } from 'langchain/text_splitter'
|
||||
import { PuppeteerWebBaseLoader } from 'langchain/document_loaders/web/puppeteer'
|
||||
import { test } from 'linkifyjs'
|
||||
import { getAvailableURLs } from '../../../src'
|
||||
import { webCrawl, xmlScrape } from '../../../src'
|
||||
|
||||
class Puppeteer_DocumentLoaders implements INode {
|
||||
label: string
|
||||
name: string
|
||||
version: number
|
||||
description: string
|
||||
type: string
|
||||
icon: string
|
||||
@@ -17,6 +18,7 @@ class Puppeteer_DocumentLoaders implements INode {
|
||||
constructor() {
|
||||
this.label = 'Puppeteer Web Scraper'
|
||||
this.name = 'puppeteerWebScraper'
|
||||
this.version = 1.0
|
||||
this.type = 'Document'
|
||||
this.icon = 'puppeteer.svg'
|
||||
this.category = 'Document Loaders'
|
||||
@@ -35,19 +37,34 @@ class Puppeteer_DocumentLoaders implements INode {
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
label: 'Web Scrape for Relative Links',
|
||||
name: 'webScrape',
|
||||
type: 'boolean',
|
||||
label: 'Get Relative Links Method',
|
||||
name: 'relativeLinksMethod',
|
||||
type: 'options',
|
||||
description: 'Select a method to retrieve relative links',
|
||||
options: [
|
||||
{
|
||||
label: 'Web Crawl',
|
||||
name: 'webCrawl',
|
||||
description: 'Crawl relative links from HTML URL'
|
||||
},
|
||||
{
|
||||
label: 'Scrape XML Sitemap',
|
||||
name: 'scrapeXMLSitemap',
|
||||
description: 'Scrape relative links from XML sitemap URL'
|
||||
}
|
||||
],
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
},
|
||||
{
|
||||
label: 'Web Scrape Links Limit',
|
||||
label: 'Get Relative Links Limit',
|
||||
name: 'limit',
|
||||
type: 'number',
|
||||
default: 10,
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
additionalParams: true,
|
||||
description:
|
||||
'Only used when "Get Relative Links Method" is selected. Set 0 to retrieve all relative links, default limit is 10.',
|
||||
warning: `Retreiving all links might take long time, and all links will be upserted again if the flow's state changed (eg: different URL, chunk size, etc)`
|
||||
},
|
||||
{
|
||||
label: 'Metadata',
|
||||
@@ -62,7 +79,7 @@ class Puppeteer_DocumentLoaders implements INode {
|
||||
async init(nodeData: INodeData): Promise<any> {
|
||||
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
|
||||
const metadata = nodeData.inputs?.metadata
|
||||
const webScrape = nodeData.inputs?.webScrape as boolean
|
||||
const relativeLinksMethod = nodeData.inputs?.relativeLinksMethod as string
|
||||
let limit = nodeData.inputs?.limit as string
|
||||
|
||||
let url = nodeData.inputs?.url as string
|
||||
@@ -71,30 +88,39 @@ class Puppeteer_DocumentLoaders implements INode {
|
||||
throw new Error('Invalid URL')
|
||||
}
|
||||
|
||||
const puppeteerLoader = async (url: string): Promise<any> => {
|
||||
let docs = []
|
||||
const loader = new PuppeteerWebBaseLoader(url)
|
||||
if (textSplitter) {
|
||||
docs = await loader.loadAndSplit(textSplitter)
|
||||
} else {
|
||||
docs = await loader.load()
|
||||
async function puppeteerLoader(url: string): Promise<any> {
|
||||
try {
|
||||
let docs = []
|
||||
const loader = new PuppeteerWebBaseLoader(url, {
|
||||
launchOptions: {
|
||||
args: ['--no-sandbox'],
|
||||
headless: 'new'
|
||||
}
|
||||
})
|
||||
if (textSplitter) {
|
||||
docs = await loader.loadAndSplit(textSplitter)
|
||||
} else {
|
||||
docs = await loader.load()
|
||||
}
|
||||
return docs
|
||||
} catch (err) {
|
||||
if (process.env.DEBUG === 'true') console.error(`error in PuppeteerWebBaseLoader: ${err.message}, on page: ${url}`)
|
||||
}
|
||||
return docs
|
||||
}
|
||||
|
||||
let availableUrls: string[]
|
||||
let docs = []
|
||||
if (webScrape) {
|
||||
if (relativeLinksMethod) {
|
||||
if (process.env.DEBUG === 'true') console.info(`Start ${relativeLinksMethod}`)
|
||||
if (!limit) limit = '10'
|
||||
availableUrls = await getAvailableURLs(url, parseInt(limit))
|
||||
for (let i = 0; i < availableUrls.length; i++) {
|
||||
try {
|
||||
docs.push(...(await puppeteerLoader(availableUrls[i])))
|
||||
} catch (error) {
|
||||
console.error('Error loading url with puppeteer. URL: ', availableUrls[i], 'Error: ', error)
|
||||
continue
|
||||
}
|
||||
else if (parseInt(limit) < 0) throw new Error('Limit cannot be less than 0')
|
||||
const pages: string[] =
|
||||
relativeLinksMethod === 'webCrawl' ? await webCrawl(url, parseInt(limit)) : await xmlScrape(url, parseInt(limit))
|
||||
if (process.env.DEBUG === 'true') console.info(`pages: ${JSON.stringify(pages)}, length: ${pages.length}`)
|
||||
if (!pages || pages.length === 0) throw new Error('No relative links found')
|
||||
for (const page of pages) {
|
||||
docs.push(...(await puppeteerLoader(page)))
|
||||
}
|
||||
if (process.env.DEBUG === 'true') console.info(`Finish ${relativeLinksMethod}`)
|
||||
} else {
|
||||
docs = await puppeteerLoader(url)
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ import { SRTLoader } from 'langchain/document_loaders/fs/srt'
|
||||
class Subtitles_DocumentLoaders implements INode {
|
||||
label: string
|
||||
name: string
|
||||
version: number
|
||||
description: string
|
||||
type: string
|
||||
icon: string
|
||||
@@ -15,6 +16,7 @@ class Subtitles_DocumentLoaders implements INode {
|
||||
constructor() {
|
||||
this.label = 'Subtitles File'
|
||||
this.name = 'subtitlesFile'
|
||||
this.version = 1.0
|
||||
this.type = 'Document'
|
||||
this.icon = 'subtitlesFile.svg'
|
||||
this.category = 'Document Loaders'
|
||||
|
||||
@@ -5,6 +5,7 @@ import { TextLoader } from 'langchain/document_loaders/fs/text'
|
||||
class Text_DocumentLoaders implements INode {
|
||||
label: string
|
||||
name: string
|
||||
version: number
|
||||
description: string
|
||||
type: string
|
||||
icon: string
|
||||
@@ -15,6 +16,7 @@ class Text_DocumentLoaders implements INode {
|
||||
constructor() {
|
||||
this.label = 'Text File'
|
||||
this.name = 'textFile'
|
||||
this.version = 1.0
|
||||
this.type = 'Document'
|
||||
this.icon = 'textFile.svg'
|
||||
this.category = 'Document Loaders'
|
||||
|
||||