Merge branch 'main' into feature/LlamaIndex

# Conflicts:
#	packages/server/src/index.ts
This commit is contained in:
Henry
2024-01-27 23:22:05 +00:00
47 changed files with 1543 additions and 158 deletions
@@ -19,7 +19,7 @@ class ChatOpenAI_ChatModels implements INode {
constructor() {
this.label = 'ChatOpenAI'
this.name = 'chatOpenAI'
this.version = 2.0
this.version = 3.0
this.type = 'ChatOpenAI'
this.icon = 'openai.svg'
this.category = 'Chat Models'
@@ -47,6 +47,14 @@ class ChatOpenAI_ChatModels implements INode {
label: 'gpt-4',
name: 'gpt-4'
},
{
label: 'gpt-4-turbo-preview',
name: 'gpt-4-turbo-preview'
},
{
label: 'gpt-4-0125-preview',
name: 'gpt-4-0125-preview'
},
{
label: 'gpt-4-1106-preview',
name: 'gpt-4-1106-preview'
@@ -1,4 +1,4 @@
import { INode, INodeData, INodeParams } from '../../../src/Interface'
import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
import { TextSplitter } from 'langchain/text_splitter'
import { CheerioWebBaseLoader, WebBaseLoaderParams } from 'langchain/document_loaders/web/cheerio'
import { test } from 'linkifyjs'
@@ -63,6 +63,7 @@ class Cheerio_DocumentLoaders implements INode {
name: 'limit',
type: 'number',
optional: true,
default: '10',
additionalParams: true,
description:
'Only used when "Get Relative Links Method" is selected. Set 0 to retrieve all relative links, default limit is 10.',
@@ -86,11 +87,12 @@ class Cheerio_DocumentLoaders implements INode {
]
}
async init(nodeData: INodeData): Promise<any> {
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const metadata = nodeData.inputs?.metadata
const relativeLinksMethod = nodeData.inputs?.relativeLinksMethod as string
let limit = nodeData.inputs?.limit as string
const selectedLinks = nodeData.inputs?.selectedLinks as string[]
let limit = parseInt(nodeData.inputs?.limit as string)
let url = nodeData.inputs?.url as string
url = url.trim()
@@ -117,23 +119,33 @@ class Cheerio_DocumentLoaders implements INode {
}
return docs
} catch (err) {
if (process.env.DEBUG === 'true') console.error(`error in CheerioWebBaseLoader: ${err.message}, on page: ${url}`)
if (process.env.DEBUG === 'true') options.logger.error(`error in CheerioWebBaseLoader: ${err.message}, on page: ${url}`)
}
}
let docs = []
if (relativeLinksMethod) {
if (process.env.DEBUG === 'true') console.info(`Start ${relativeLinksMethod}`)
if (!limit) limit = '10'
else if (parseInt(limit) < 0) throw new Error('Limit cannot be less than 0')
if (process.env.DEBUG === 'true') options.logger.info(`Start ${relativeLinksMethod}`)
if (!limit) limit = 10
else if (limit < 0) throw new Error('Limit cannot be less than 0')
const pages: string[] =
relativeLinksMethod === 'webCrawl' ? await webCrawl(url, parseInt(limit)) : await xmlScrape(url, parseInt(limit))
if (process.env.DEBUG === 'true') console.info(`pages: ${JSON.stringify(pages)}, length: ${pages.length}`)
selectedLinks && selectedLinks.length > 0
? selectedLinks.slice(0, limit)
: relativeLinksMethod === 'webCrawl'
? await webCrawl(url, limit)
: await xmlScrape(url, limit)
if (process.env.DEBUG === 'true') options.logger.info(`pages: ${JSON.stringify(pages)}, length: ${pages.length}`)
if (!pages || pages.length === 0) throw new Error('No relative links found')
for (const page of pages) {
docs.push(...(await cheerioLoader(page)))
}
if (process.env.DEBUG === 'true') console.info(`Finish ${relativeLinksMethod}`)
if (process.env.DEBUG === 'true') options.logger.info(`Finish ${relativeLinksMethod}`)
} else if (selectedLinks && selectedLinks.length > 0) {
if (process.env.DEBUG === 'true')
options.logger.info(`pages: ${JSON.stringify(selectedLinks)}, length: ${selectedLinks.length}`)
for (const page of selectedLinks) {
docs.push(...(await cheerioLoader(page)))
}
} else {
docs = await cheerioLoader(url)
}
@@ -1,4 +1,4 @@
import { INode, INodeData, INodeParams } from '../../../src/Interface'
import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
import { TextSplitter } from 'langchain/text_splitter'
import { Browser, Page, PlaywrightWebBaseLoader, PlaywrightWebBaseLoaderOptions } from 'langchain/document_loaders/web/playwright'
import { test } from 'linkifyjs'
@@ -61,6 +61,7 @@ class Playwright_DocumentLoaders implements INode {
name: 'limit',
type: 'number',
optional: true,
default: '10',
additionalParams: true,
description:
'Only used when "Get Relative Links Method" is selected. Set 0 to retrieve all relative links, default limit is 10.',
@@ -114,11 +115,12 @@ class Playwright_DocumentLoaders implements INode {
]
}
async init(nodeData: INodeData): Promise<any> {
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const metadata = nodeData.inputs?.metadata
const relativeLinksMethod = nodeData.inputs?.relativeLinksMethod as string
let limit = nodeData.inputs?.limit as string
const selectedLinks = nodeData.inputs?.selectedLinks as string[]
let limit = parseInt(nodeData.inputs?.limit as string)
let waitUntilGoToOption = nodeData.inputs?.waitUntilGoToOption as 'load' | 'domcontentloaded' | 'networkidle' | 'commit' | undefined
let waitForSelector = nodeData.inputs?.waitForSelector as string
@@ -158,23 +160,33 @@ class Playwright_DocumentLoaders implements INode {
}
return docs
} catch (err) {
if (process.env.DEBUG === 'true') console.error(`error in PlaywrightWebBaseLoader: ${err.message}, on page: ${url}`)
if (process.env.DEBUG === 'true') options.logger.error(`error in PlaywrightWebBaseLoader: ${err.message}, on page: ${url}`)
}
}
let docs = []
if (relativeLinksMethod) {
if (process.env.DEBUG === 'true') console.info(`Start ${relativeLinksMethod}`)
if (!limit) limit = '10'
else if (parseInt(limit) < 0) throw new Error('Limit cannot be less than 0')
if (process.env.DEBUG === 'true') options.logger.info(`Start ${relativeLinksMethod}`)
if (!limit) limit = 10
else if (limit < 0) throw new Error('Limit cannot be less than 0')
const pages: string[] =
relativeLinksMethod === 'webCrawl' ? await webCrawl(url, parseInt(limit)) : await xmlScrape(url, parseInt(limit))
if (process.env.DEBUG === 'true') console.info(`pages: ${JSON.stringify(pages)}, length: ${pages.length}`)
selectedLinks && selectedLinks.length > 0
? selectedLinks.slice(0, limit)
: relativeLinksMethod === 'webCrawl'
? await webCrawl(url, limit)
: await xmlScrape(url, limit)
if (process.env.DEBUG === 'true') options.logger.info(`pages: ${JSON.stringify(pages)}, length: ${pages.length}`)
if (!pages || pages.length === 0) throw new Error('No relative links found')
for (const page of pages) {
docs.push(...(await playwrightLoader(page)))
}
if (process.env.DEBUG === 'true') console.info(`Finish ${relativeLinksMethod}`)
if (process.env.DEBUG === 'true') options.logger.info(`Finish ${relativeLinksMethod}`)
} else if (selectedLinks && selectedLinks.length > 0) {
if (process.env.DEBUG === 'true')
options.logger.info(`pages: ${JSON.stringify(selectedLinks)}, length: ${selectedLinks.length}`)
for (const page of selectedLinks) {
docs.push(...(await playwrightLoader(page)))
}
} else {
docs = await playwrightLoader(url)
}
@@ -1,4 +1,4 @@
import { INode, INodeData, INodeParams } from '../../../src/Interface'
import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
import { TextSplitter } from 'langchain/text_splitter'
import { Browser, Page, PuppeteerWebBaseLoader, PuppeteerWebBaseLoaderOptions } from 'langchain/document_loaders/web/puppeteer'
import { test } from 'linkifyjs'
@@ -62,6 +62,7 @@ class Puppeteer_DocumentLoaders implements INode {
name: 'limit',
type: 'number',
optional: true,
default: '10',
additionalParams: true,
description:
'Only used when "Get Relative Links Method" is selected. Set 0 to retrieve all relative links, default limit is 10.',
@@ -115,11 +116,12 @@ class Puppeteer_DocumentLoaders implements INode {
]
}
async init(nodeData: INodeData): Promise<any> {
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const metadata = nodeData.inputs?.metadata
const relativeLinksMethod = nodeData.inputs?.relativeLinksMethod as string
let limit = nodeData.inputs?.limit as string
const selectedLinks = nodeData.inputs?.selectedLinks as string[]
let limit = parseInt(nodeData.inputs?.limit as string)
let waitUntilGoToOption = nodeData.inputs?.waitUntilGoToOption as PuppeteerLifeCycleEvent
let waitForSelector = nodeData.inputs?.waitForSelector as string
@@ -159,23 +161,33 @@ class Puppeteer_DocumentLoaders implements INode {
}
return docs
} catch (err) {
if (process.env.DEBUG === 'true') console.error(`error in PuppeteerWebBaseLoader: ${err.message}, on page: ${url}`)
if (process.env.DEBUG === 'true') options.logger.error(`error in PuppeteerWebBaseLoader: ${err.message}, on page: ${url}`)
}
}
let docs = []
if (relativeLinksMethod) {
if (process.env.DEBUG === 'true') console.info(`Start ${relativeLinksMethod}`)
if (!limit) limit = '10'
else if (parseInt(limit) < 0) throw new Error('Limit cannot be less than 0')
if (process.env.DEBUG === 'true') options.logger.info(`Start ${relativeLinksMethod}`)
if (!limit) limit = 10
else if (limit < 0) throw new Error('Limit cannot be less than 0')
const pages: string[] =
relativeLinksMethod === 'webCrawl' ? await webCrawl(url, parseInt(limit)) : await xmlScrape(url, parseInt(limit))
if (process.env.DEBUG === 'true') console.info(`pages: ${JSON.stringify(pages)}, length: ${pages.length}`)
selectedLinks && selectedLinks.length > 0
? selectedLinks.slice(0, limit)
: relativeLinksMethod === 'webCrawl'
? await webCrawl(url, limit)
: await xmlScrape(url, limit)
if (process.env.DEBUG === 'true') options.logger.info(`pages: ${JSON.stringify(pages)}, length: ${pages.length}`)
if (!pages || pages.length === 0) throw new Error('No relative links found')
for (const page of pages) {
docs.push(...(await puppeteerLoader(page)))
}
if (process.env.DEBUG === 'true') console.info(`Finish ${relativeLinksMethod}`)
if (process.env.DEBUG === 'true') options.logger.info(`Finish ${relativeLinksMethod}`)
} else if (selectedLinks && selectedLinks.length > 0) {
if (process.env.DEBUG === 'true')
options.logger.info(`pages: ${JSON.stringify(selectedLinks)}, length: ${selectedLinks.length}`)
for (const page of selectedLinks) {
docs.push(...(await puppeteerLoader(page)))
}
} else {
docs = await puppeteerLoader(url)
}
@@ -17,7 +17,7 @@ class OpenAIEmbedding_Embeddings implements INode {
constructor() {
this.label = 'OpenAI Embeddings'
this.name = 'openAIEmbeddings'
this.version = 1.0
this.version = 2.0
this.type = 'OpenAIEmbeddings'
this.icon = 'openai.svg'
this.category = 'Embeddings'
@@ -30,6 +30,27 @@ class OpenAIEmbedding_Embeddings implements INode {
credentialNames: ['openAIApi']
}
this.inputs = [
{
label: 'Model Name',
name: 'modelName',
type: 'options',
options: [
{
label: 'text-embedding-3-large',
name: 'text-embedding-3-large'
},
{
label: 'text-embedding-3-small',
name: 'text-embedding-3-small'
},
{
label: 'text-embedding-ada-002',
name: 'text-embedding-ada-002'
}
],
default: 'text-embedding-ada-002',
optional: true
},
{
label: 'Strip New Lines',
name: 'stripNewLines',
@@ -66,12 +87,14 @@ class OpenAIEmbedding_Embeddings implements INode {
const batchSize = nodeData.inputs?.batchSize as string
const timeout = nodeData.inputs?.timeout as string
const basePath = nodeData.inputs?.basepath as string
const modelName = nodeData.inputs?.modelName as string
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
const openAIApiKey = getCredentialParam('openAIApiKey', credentialData, nodeData)
const obj: Partial<OpenAIEmbeddingsParams> & { openAIApiKey?: string } = {
openAIApiKey
openAIApiKey,
modelName
}
if (stripNewLines) obj.stripNewLines = stripNewLines
@@ -52,11 +52,19 @@ class CustomFunction_Utilities implements INode {
label: 'Output',
name: 'output',
baseClasses: ['string', 'number', 'boolean', 'json', 'array']
},
{
label: 'Ending Node',
name: 'EndingNode',
baseClasses: [this.type]
}
]
}
async init(nodeData: INodeData, input: string, options: ICommonObject): Promise<any> {
const isEndingNode = nodeData?.outputs?.output === 'EndingNode'
if (isEndingNode && !options.isRun) return // prevent running both init and run twice
const javascriptFunction = nodeData.inputs?.javascriptFunction as string
const functionInputVariablesRaw = nodeData.inputs?.functionInputVariables
const appDataSource = options.appDataSource as DataSource
@@ -123,7 +131,8 @@ class CustomFunction_Utilities implements INode {
const vm = new NodeVM(nodeVMOptions)
try {
const response = await vm.run(`module.exports = async function() {${javascriptFunction}}()`, __dirname)
if (typeof response === 'string') {
if (typeof response === 'string' && !isEndingNode) {
return handleEscapeCharacters(response, false)
}
return response
@@ -131,6 +140,10 @@ class CustomFunction_Utilities implements INode {
throw new Error(e)
}
}
async run(nodeData: INodeData, input: string, options: ICommonObject): Promise<string> {
return await this.init(nodeData, input, { ...options, isRun: true })
}
}
module.exports = { nodeClass: CustomFunction_Utilities }
@@ -1,5 +1,6 @@
import { flatten } from 'lodash'
import { QdrantClient } from '@qdrant/js-client-rest'
import type { Schemas as QdrantSchemas } from '@qdrant/js-client-rest'
import { VectorStoreRetrieverInput } from 'langchain/vectorstores/base'
import { Document } from 'langchain/document'
import { QdrantVectorStore, QdrantLibArgs } from 'langchain/vectorstores/qdrant'
@@ -8,6 +9,12 @@ import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from
import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../src/utils'
type RetrieverConfig = Partial<VectorStoreRetrieverInput<QdrantVectorStore>>
type QdrantSearchResponse = QdrantSchemas['ScoredPoint'] & {
payload: {
metadata: object
content: string
}
}
class Qdrant_VectorStores implements INode {
label: string
@@ -194,7 +201,7 @@ class Qdrant_VectorStores implements INode {
const qdrantVectorDimension = nodeData.inputs?.qdrantVectorDimension
const output = nodeData.outputs?.output as string
const topK = nodeData.inputs?.topK as string
let queryFilter = nodeData.inputs?.queryFilter
let queryFilter = nodeData.inputs?.qdrantFilter
const k = topK ? parseFloat(topK) : 4
@@ -135,7 +135,7 @@ class Qdrant_Existing_VectorStores implements INode {
const qdrantVectorDimension = nodeData.inputs?.qdrantVectorDimension
const output = nodeData.outputs?.output as string
const topK = nodeData.inputs?.topK as string
let queryFilter = nodeData.inputs?.queryFilter
let queryFilter = nodeData.inputs?.qdrantFilter
const k = topK ? parseFloat(topK) : 4