Merge branch 'main' into feature/ZepVS

# Conflicts:
#	packages/components/package.json
This commit is contained in:
Henry
2023-08-17 10:23:41 +01:00
11 changed files with 474 additions and 232 deletions
@@ -5,6 +5,8 @@ import { flatten } from 'lodash'
import { BaseChatMemory } from 'langchain/memory'
import { ConsoleCallbackHandler, CustomChainHandler } from '../../../src/handler'
const defaultMessage = `Do your best to answer the questions. Feel free to use any tools available to look up relevant information, only if necessary.`
class ConversationalRetrievalAgent_Agents implements INode {
label: string
name: string
@@ -46,6 +48,7 @@ class ConversationalRetrievalAgent_Agents implements INode {
label: 'System Message',
name: 'systemMessage',
type: 'string',
default: defaultMessage,
rows: 4,
optional: true,
additionalParams: true
@@ -65,7 +68,7 @@ class ConversationalRetrievalAgent_Agents implements INode {
agentType: 'openai-functions',
verbose: process.env.DEBUG === 'true' ? true : false,
agentArgs: {
prefix: systemMessage ?? `You are a helpful AI assistant.`
prefix: systemMessage ?? defaultMessage
},
returnIntermediateSteps: true
})
@@ -5,6 +5,9 @@ import { DataSource } from 'typeorm'
import { SqlDatabase } from 'langchain/sql_db'
import { BaseLanguageModel } from 'langchain/base_language'
import { ConsoleCallbackHandler, CustomChainHandler } from '../../../src/handler'
import { DataSourceOptions } from 'typeorm/data-source'
type DatabaseType = 'sqlite' | 'postgres' | 'mssql' | 'mysql'
class SqlDatabaseChain_Chains implements INode {
label: string
@@ -38,36 +41,48 @@ class SqlDatabaseChain_Chains implements INode {
type: 'options',
options: [
{
label: 'SQlite',
label: 'SQLite',
name: 'sqlite'
},
{
label: 'PostgreSQL',
name: 'postgres'
},
{
label: 'MSSQL',
name: 'mssql'
},
{
label: 'MySQL',
name: 'mysql'
}
],
default: 'sqlite'
},
{
label: 'Database File Path',
name: 'dbFilePath',
label: 'Connection string or file path (sqlite only)',
name: 'url',
type: 'string',
placeholder: 'C:/Users/chinook.db'
placeholder: '1270.0.0.1:5432/chinook'
}
]
}
async init(nodeData: INodeData): Promise<any> {
const databaseType = nodeData.inputs?.database as 'sqlite'
const databaseType = nodeData.inputs?.database as DatabaseType
const model = nodeData.inputs?.model as BaseLanguageModel
const dbFilePath = nodeData.inputs?.dbFilePath
const url = nodeData.inputs?.url
const chain = await getSQLDBChain(databaseType, dbFilePath, model)
const chain = await getSQLDBChain(databaseType, url, model)
return chain
}
async run(nodeData: INodeData, input: string, options: ICommonObject): Promise<string> {
const databaseType = nodeData.inputs?.database as 'sqlite'
const databaseType = nodeData.inputs?.database as DatabaseType
const model = nodeData.inputs?.model as BaseLanguageModel
const dbFilePath = nodeData.inputs?.dbFilePath
const url = nodeData.inputs?.url
const chain = await getSQLDBChain(databaseType, dbFilePath, model)
const chain = await getSQLDBChain(databaseType, url, model)
const loggerHandler = new ConsoleCallbackHandler(options.logger)
if (options.socketIO && options.socketIOClientId) {
@@ -81,11 +96,18 @@ class SqlDatabaseChain_Chains implements INode {
}
}
const getSQLDBChain = async (databaseType: 'sqlite', dbFilePath: string, llm: BaseLanguageModel) => {
const datasource = new DataSource({
type: databaseType,
database: dbFilePath
})
const getSQLDBChain = async (databaseType: DatabaseType, url: string, llm: BaseLanguageModel) => {
const datasource = new DataSource(
databaseType === 'sqlite'
? {
type: databaseType,
database: url
}
: ({
type: databaseType,
url: url
} as DataSourceOptions)
)
const db = await SqlDatabase.fromDataSourceParams({
appDataSource: datasource
@@ -125,6 +125,13 @@ class ChatOpenAI_ChatModels implements INode {
type: 'string',
optional: true,
additionalParams: true
},
{
label: 'BaseOptions',
name: 'baseOptions',
type: 'json',
optional: true,
additionalParams: true
}
]
}
@@ -139,6 +146,7 @@ class ChatOpenAI_ChatModels implements INode {
const timeout = nodeData.inputs?.timeout as string
const streaming = nodeData.inputs?.streaming as boolean
const basePath = nodeData.inputs?.basepath as string
const baseOptions = nodeData.inputs?.baseOptions
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
const openAIApiKey = getCredentialParam('openAIApiKey', credentialData, nodeData)
@@ -156,8 +164,18 @@ class ChatOpenAI_ChatModels implements INode {
if (presencePenalty) obj.presencePenalty = parseFloat(presencePenalty)
if (timeout) obj.timeout = parseInt(timeout, 10)
let parsedBaseOptions: any | undefined = undefined
if (baseOptions) {
try {
parsedBaseOptions = typeof baseOptions === 'object' ? baseOptions : JSON.parse(baseOptions)
} catch (exception) {
throw new Error("Invalid JSON in the ChatOpenAI's BaseOptions: " + exception)
}
}
const model = new ChatOpenAI(obj, {
basePath
basePath,
baseOptions: parsedBaseOptions
})
return model
}
@@ -64,7 +64,7 @@ class Cheerio_DocumentLoaders implements INode {
additionalParams: true,
description:
'Only used when "Get Relative Links Method" is selected. Set 0 to retrieve all relative links, default limit is 10.',
warning: `Retreiving all links might take long time, and all links will be upserted again if the flow's state changed (eg: different URL, chunk size, etc)`
warning: `Retrieving all links might take long time, and all links will be upserted again if the flow's state changed (eg: different URL, chunk size, etc)`
},
{
label: 'Metadata',
@@ -61,7 +61,40 @@ class Folder_DocumentLoaders implements INode {
'.csv': (path) => new CSVLoader(path),
'.docx': (path) => new DocxLoader(path),
// @ts-ignore
'.pdf': (path) => new PDFLoader(path, { pdfjs: () => import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js') })
'.pdf': (path) => new PDFLoader(path, { pdfjs: () => import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js') }),
'.aspx': (path) => new TextLoader(path),
'.asp': (path) => new TextLoader(path),
'.cpp': (path) => new TextLoader(path), // C++
'.c': (path) => new TextLoader(path),
'.cs': (path) => new TextLoader(path),
'.css': (path) => new TextLoader(path),
'.go': (path) => new TextLoader(path), // Go
'.h': (path) => new TextLoader(path), // C++ Header files
'.java': (path) => new TextLoader(path), // Java
'.js': (path) => new TextLoader(path), // JavaScript
'.less': (path) => new TextLoader(path), // Less files
'.ts': (path) => new TextLoader(path), // TypeScript
'.php': (path) => new TextLoader(path), // PHP
'.proto': (path) => new TextLoader(path), // Protocol Buffers
'.python': (path) => new TextLoader(path), // Python
'.py': (path) => new TextLoader(path), // Python
'.rst': (path) => new TextLoader(path), // reStructuredText
'.ruby': (path) => new TextLoader(path), // Ruby
'.rb': (path) => new TextLoader(path), // Ruby
'.rs': (path) => new TextLoader(path), // Rust
'.scala': (path) => new TextLoader(path), // Scala
'.sc': (path) => new TextLoader(path), // Scala
'.scss': (path) => new TextLoader(path), // Sass
'.sol': (path) => new TextLoader(path), // Solidity
'.sql': (path) => new TextLoader(path), //SQL
'.swift': (path) => new TextLoader(path), // Swift
'.markdown': (path) => new TextLoader(path), // Markdown
'.md': (path) => new TextLoader(path), // Markdown
'.tex': (path) => new TextLoader(path), // LaTeX
'.ltx': (path) => new TextLoader(path), // LaTeX
'.html': (path) => new TextLoader(path), // HTML
'.vb': (path) => new TextLoader(path), // Visual Basic
'.xml': (path) => new TextLoader(path) // XML
})
let docs = []
@@ -1,6 +1,6 @@
import { INode, INodeData, INodeParams } from '../../../src/Interface'
import { TextSplitter } from 'langchain/text_splitter'
import { PlaywrightWebBaseLoader } from 'langchain/document_loaders/web/playwright'
import { Browser, Page, PlaywrightWebBaseLoader, PlaywrightWebBaseLoaderOptions } from 'langchain/document_loaders/web/playwright'
import { test } from 'linkifyjs'
import { webCrawl, xmlScrape } from '../../../src'
@@ -64,7 +64,45 @@ class Playwright_DocumentLoaders implements INode {
additionalParams: true,
description:
'Only used when "Get Relative Links Method" is selected. Set 0 to retrieve all relative links, default limit is 10.',
warning: `Retreiving all links might take long time, and all links will be upserted again if the flow's state changed (eg: different URL, chunk size, etc)`
warning: `Retrieving all links might take long time, and all links will be upserted again if the flow's state changed (eg: different URL, chunk size, etc)`
},
{
label: 'Wait Until',
name: 'waitUntilGoToOption',
type: 'options',
description: 'Select a go to wait until option',
options: [
{
label: 'Load',
name: 'load',
description: 'Consider operation to be finished when the load event is fired.'
},
{
label: 'DOM Content Loaded',
name: 'domcontentloaded',
description: 'Consider operation to be finished when the DOMContentLoaded event is fired.'
},
{
label: 'Network Idle',
name: 'networkidle',
description: 'Navigation is finished when there are no more connections for at least 500 ms.'
},
{
label: 'Commit',
name: 'commit',
description: 'Consider operation to be finished when network response is received and the document started loading.'
}
],
optional: true,
additionalParams: true
},
{
label: 'Wait for selector to load',
name: 'waitForSelector',
type: 'string',
optional: true,
additionalParams: true,
description: 'CSS selectors like .div or #div'
},
{
label: 'Metadata',
@@ -81,6 +119,8 @@ class Playwright_DocumentLoaders implements INode {
const metadata = nodeData.inputs?.metadata
const relativeLinksMethod = nodeData.inputs?.relativeLinksMethod as string
let limit = nodeData.inputs?.limit as string
let waitUntilGoToOption = nodeData.inputs?.waitUntilGoToOption as 'load' | 'domcontentloaded' | 'networkidle' | 'commit' | undefined
let waitForSelector = nodeData.inputs?.waitForSelector as string
let url = nodeData.inputs?.url as string
url = url.trim()
@@ -91,7 +131,26 @@ class Playwright_DocumentLoaders implements INode {
async function playwrightLoader(url: string): Promise<any> {
try {
let docs = []
const loader = new PlaywrightWebBaseLoader(url)
const config: PlaywrightWebBaseLoaderOptions = {
launchOptions: {
args: ['--no-sandbox'],
headless: true
}
}
if (waitUntilGoToOption) {
config['gotoOptions'] = {
waitUntil: waitUntilGoToOption
}
}
if (waitForSelector) {
config['evaluate'] = async (page: Page, _: Browser): Promise<string> => {
await page.waitForSelector(waitForSelector)
const result = await page.evaluate(() => document.body.innerHTML)
return result
}
}
const loader = new PlaywrightWebBaseLoader(url, config)
if (textSplitter) {
docs = await loader.loadAndSplit(textSplitter)
} else {
@@ -1,8 +1,9 @@
import { INode, INodeData, INodeParams } from '../../../src/Interface'
import { TextSplitter } from 'langchain/text_splitter'
import { PuppeteerWebBaseLoader } from 'langchain/document_loaders/web/puppeteer'
import { Browser, Page, PuppeteerWebBaseLoader, PuppeteerWebBaseLoaderOptions } from 'langchain/document_loaders/web/puppeteer'
import { test } from 'linkifyjs'
import { webCrawl, xmlScrape } from '../../../src'
import { PuppeteerLifeCycleEvent } from 'puppeteer'
class Puppeteer_DocumentLoaders implements INode {
label: string
@@ -64,7 +65,45 @@ class Puppeteer_DocumentLoaders implements INode {
additionalParams: true,
description:
'Only used when "Get Relative Links Method" is selected. Set 0 to retrieve all relative links, default limit is 10.',
warning: `Retreiving all links might take long time, and all links will be upserted again if the flow's state changed (eg: different URL, chunk size, etc)`
warning: `Retrieving all links might take long time, and all links will be upserted again if the flow's state changed (eg: different URL, chunk size, etc)`
},
{
label: 'Wait Until',
name: 'waitUntilGoToOption',
type: 'options',
description: 'Select a go to wait until option',
options: [
{
label: 'Load',
name: 'load',
description: `When the initial HTML document's DOM has been loaded and parsed`
},
{
label: 'DOM Content Loaded',
name: 'domcontentloaded',
description: `When the complete HTML document's DOM has been loaded and parsed`
},
{
label: 'Network Idle 0',
name: 'networkidle0',
description: 'Navigation is finished when there are no more than 0 network connections for at least 500 ms'
},
{
label: 'Network Idle 2',
name: 'networkidle2',
description: 'Navigation is finished when there are no more than 2 network connections for at least 500 ms'
}
],
optional: true,
additionalParams: true
},
{
label: 'Wait for selector to load',
name: 'waitForSelector',
type: 'string',
optional: true,
additionalParams: true,
description: 'CSS selectors like .div or #div'
},
{
label: 'Metadata',
@@ -81,6 +120,8 @@ class Puppeteer_DocumentLoaders implements INode {
const metadata = nodeData.inputs?.metadata
const relativeLinksMethod = nodeData.inputs?.relativeLinksMethod as string
let limit = nodeData.inputs?.limit as string
let waitUntilGoToOption = nodeData.inputs?.waitUntilGoToOption as PuppeteerLifeCycleEvent
let waitForSelector = nodeData.inputs?.waitForSelector as string
let url = nodeData.inputs?.url as string
url = url.trim()
@@ -91,12 +132,26 @@ class Puppeteer_DocumentLoaders implements INode {
async function puppeteerLoader(url: string): Promise<any> {
try {
let docs = []
const loader = new PuppeteerWebBaseLoader(url, {
const config: PuppeteerWebBaseLoaderOptions = {
launchOptions: {
args: ['--no-sandbox'],
headless: 'new'
}
})
}
if (waitUntilGoToOption) {
config['gotoOptions'] = {
waitUntil: waitUntilGoToOption
}
}
if (waitForSelector) {
config['evaluate'] = async (page: Page, _: Browser): Promise<string> => {
await page.waitForSelector(waitForSelector)
const result = await page.evaluate(() => document.body.innerHTML)
return result
}
}
const loader = new PuppeteerWebBaseLoader(url, config)
if (textSplitter) {
docs = await loader.loadAndSplit(textSplitter)
} else {
@@ -1,6 +1,6 @@
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../src/utils'
import { VectaraStore, VectaraLibArgs, VectaraFilter } from 'langchain/vectorstores/vectara'
import { VectaraStore, VectaraLibArgs, VectaraFilter, VectaraContextConfig } from 'langchain/vectorstores/vectara'
class VectaraExisting_VectorStores implements INode {
label: string
@@ -40,9 +40,27 @@ class VectaraExisting_VectorStores implements INode {
additionalParams: true,
optional: true
},
{
label: 'Sentences Before',
name: 'sentencesBefore',
description: 'Number of sentences to fetch before the matched sentence. Defaults to 2.',
type: 'number',
additionalParams: true,
optional: true
},
{
label: 'Sentences After',
name: 'sentencesAfter',
description: 'Number of sentences to fetch after the matched sentence. Defaults to 2.',
type: 'number',
additionalParams: true,
optional: true
},
{
label: 'Lambda',
name: 'lambda',
description:
'Improves retrieval accuracy by adjusting the balance (from 0 to 1) between neural search and keyword-based search factors.',
type: 'number',
additionalParams: true,
optional: true
@@ -77,6 +95,8 @@ class VectaraExisting_VectorStores implements INode {
const corpusId = getCredentialParam('corpusID', credentialData, nodeData)
const vectaraMetadataFilter = nodeData.inputs?.filter as string
const sentencesBefore = nodeData.inputs?.sentencesBefore as number
const sentencesAfter = nodeData.inputs?.sentencesAfter as number
const lambda = nodeData.inputs?.lambda as number
const output = nodeData.outputs?.output as string
const topK = nodeData.inputs?.topK as string
@@ -92,6 +112,11 @@ class VectaraExisting_VectorStores implements INode {
if (vectaraMetadataFilter) vectaraFilter.filter = vectaraMetadataFilter
if (lambda) vectaraFilter.lambda = lambda
const vectaraContextConfig: VectaraContextConfig = {}
if (sentencesBefore) vectaraContextConfig.sentencesBefore = sentencesBefore
if (sentencesAfter) vectaraContextConfig.sentencesAfter = sentencesAfter
vectaraFilter.contextConfig = vectaraContextConfig
const vectorStore = new VectaraStore(vectaraArgs)
if (output === 'retriever') {
@@ -1,7 +1,7 @@
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
import { Embeddings } from 'langchain/embeddings/base'
import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../src/utils'
import { VectaraStore, VectaraLibArgs, VectaraFilter } from 'langchain/vectorstores/vectara'
import { VectaraStore, VectaraLibArgs, VectaraFilter, VectaraContextConfig } from 'langchain/vectorstores/vectara'
import { Document } from 'langchain/document'
import { flatten } from 'lodash'
@@ -49,9 +49,27 @@ class VectaraUpsert_VectorStores implements INode {
additionalParams: true,
optional: true
},
{
label: 'Sentences Before',
name: 'sentencesBefore',
description: 'Number of sentences to fetch before the matched sentence. Defaults to 2.',
type: 'number',
additionalParams: true,
optional: true
},
{
label: 'Sentences After',
name: 'sentencesAfter',
description: 'Number of sentences to fetch after the matched sentence. Defaults to 2.',
type: 'number',
additionalParams: true,
optional: true
},
{
label: 'Lambda',
name: 'lambda',
description:
'Improves retrieval accuracy by adjusting the balance (from 0 to 1) between neural search and keyword-based search factors.',
type: 'number',
additionalParams: true,
optional: true
@@ -88,6 +106,8 @@ class VectaraUpsert_VectorStores implements INode {
const docs = nodeData.inputs?.document as Document[]
const embeddings = {} as Embeddings
const vectaraMetadataFilter = nodeData.inputs?.filter as string
const sentencesBefore = nodeData.inputs?.sentencesBefore as number
const sentencesAfter = nodeData.inputs?.sentencesAfter as number
const lambda = nodeData.inputs?.lambda as number
const output = nodeData.outputs?.output as string
const topK = nodeData.inputs?.topK as string
@@ -103,6 +123,11 @@ class VectaraUpsert_VectorStores implements INode {
if (vectaraMetadataFilter) vectaraFilter.filter = vectaraMetadataFilter
if (lambda) vectaraFilter.lambda = lambda
const vectaraContextConfig: VectaraContextConfig = {}
if (sentencesBefore) vectaraContextConfig.sentencesBefore = sentencesBefore
if (sentencesAfter) vectaraContextConfig.sentencesAfter = sentencesAfter
vectaraFilter.contextConfig = vectaraContextConfig
const flattenDocs = docs && docs.length ? flatten(docs) : []
const finalDocs = []
for (let i = 0; i < flattenDocs.length; i += 1) {