Merge branch 'main' into feature/ZepVS

# Conflicts: # packages/components/package.json
2026-06-28 13:00:56 +03:00 · 2023-08-17 10:23:41 +01:00
parent 913b343956 5a8db9a534
commit a7b1c8c8b3
11 changed files with 474 additions and 232 deletions
@@ -5,6 +5,8 @@ import { flatten } from 'lodash'
 import { BaseChatMemory } from 'langchain/memory'
 import { ConsoleCallbackHandler, CustomChainHandler } from '../../../src/handler'

+const defaultMessage = `Do your best to answer the questions. Feel free to use any tools available to look up relevant information, only if necessary.`
+
 class ConversationalRetrievalAgent_Agents implements INode {
    label: string
    name: string
@@ -46,6 +48,7 @@ class ConversationalRetrievalAgent_Agents implements INode {
                label: 'System Message',
                name: 'systemMessage',
                type: 'string',
+                default: defaultMessage,
                rows: 4,
                optional: true,
                additionalParams: true
@@ -65,7 +68,7 @@ class ConversationalRetrievalAgent_Agents implements INode {
            agentType: 'openai-functions',
            verbose: process.env.DEBUG === 'true' ? true : false,
            agentArgs: {
-                prefix: systemMessage ?? `You are a helpful AI assistant.`
+                prefix: systemMessage ?? defaultMessage
            },
            returnIntermediateSteps: true
        })
@@ -5,6 +5,9 @@ import { DataSource } from 'typeorm'
 import { SqlDatabase } from 'langchain/sql_db'
 import { BaseLanguageModel } from 'langchain/base_language'
 import { ConsoleCallbackHandler, CustomChainHandler } from '../../../src/handler'
+import { DataSourceOptions } from 'typeorm/data-source'
+
+type DatabaseType = 'sqlite' | 'postgres' | 'mssql' | 'mysql'

 class SqlDatabaseChain_Chains implements INode {
    label: string
@@ -38,36 +41,48 @@ class SqlDatabaseChain_Chains implements INode {
                type: 'options',
                options: [
                    {
-                        label: 'SQlite',
+                        label: 'SQLite',
                        name: 'sqlite'
+                    },
+                    {
+                        label: 'PostgreSQL',
+                        name: 'postgres'
+                    },
+                    {
+                        label: 'MSSQL',
+                        name: 'mssql'
+                    },
+                    {
+                        label: 'MySQL',
+                        name: 'mysql'
                    }
                ],
                default: 'sqlite'
            },
            {
-                label: 'Database File Path',
-                name: 'dbFilePath',
+                label: 'Connection string or file path (sqlite only)',
+                name: 'url',
                type: 'string',
-                placeholder: 'C:/Users/chinook.db'
+                placeholder: '1270.0.0.1:5432/chinook'
            }
        ]
    }

    async init(nodeData: INodeData): Promise<any> {
-        const databaseType = nodeData.inputs?.database as 'sqlite'
+        const databaseType = nodeData.inputs?.database as DatabaseType
        const model = nodeData.inputs?.model as BaseLanguageModel
-        const dbFilePath = nodeData.inputs?.dbFilePath
+        const url = nodeData.inputs?.url

-        const chain = await getSQLDBChain(databaseType, dbFilePath, model)
+        const chain = await getSQLDBChain(databaseType, url, model)
        return chain
    }

    async run(nodeData: INodeData, input: string, options: ICommonObject): Promise<string> {
-        const databaseType = nodeData.inputs?.database as 'sqlite'
+        const databaseType = nodeData.inputs?.database as DatabaseType
        const model = nodeData.inputs?.model as BaseLanguageModel
-        const dbFilePath = nodeData.inputs?.dbFilePath
+        const url = nodeData.inputs?.url

-        const chain = await getSQLDBChain(databaseType, dbFilePath, model)
+        const chain = await getSQLDBChain(databaseType, url, model)
        const loggerHandler = new ConsoleCallbackHandler(options.logger)

        if (options.socketIO && options.socketIOClientId) {
@@ -81,11 +96,18 @@ class SqlDatabaseChain_Chains implements INode {
    }
 }

-const getSQLDBChain = async (databaseType: 'sqlite', dbFilePath: string, llm: BaseLanguageModel) => {
-    const datasource = new DataSource({
-        type: databaseType,
-        database: dbFilePath
-    })
+const getSQLDBChain = async (databaseType: DatabaseType, url: string, llm: BaseLanguageModel) => {
+    const datasource = new DataSource(
+        databaseType === 'sqlite'
+            ? {
+                  type: databaseType,
+                  database: url
+              }
+            : ({
+                  type: databaseType,
+                  url: url
+              } as DataSourceOptions)
+    )

    const db = await SqlDatabase.fromDataSourceParams({
        appDataSource: datasource
@@ -125,6 +125,13 @@ class ChatOpenAI_ChatModels implements INode {
                type: 'string',
                optional: true,
                additionalParams: true
+            },
+            {
+                label: 'BaseOptions',
+                name: 'baseOptions',
+                type: 'json',
+                optional: true,
+                additionalParams: true
            }
        ]
    }
@@ -139,6 +146,7 @@ class ChatOpenAI_ChatModels implements INode {
        const timeout = nodeData.inputs?.timeout as string
        const streaming = nodeData.inputs?.streaming as boolean
        const basePath = nodeData.inputs?.basepath as string
+        const baseOptions = nodeData.inputs?.baseOptions

        const credentialData = await getCredentialData(nodeData.credential ?? '', options)
        const openAIApiKey = getCredentialParam('openAIApiKey', credentialData, nodeData)
@@ -156,8 +164,18 @@ class ChatOpenAI_ChatModels implements INode {
        if (presencePenalty) obj.presencePenalty = parseFloat(presencePenalty)
        if (timeout) obj.timeout = parseInt(timeout, 10)

+        let parsedBaseOptions: any | undefined = undefined
+
+        if (baseOptions) {
+            try {
+                parsedBaseOptions = typeof baseOptions === 'object' ? baseOptions : JSON.parse(baseOptions)
+            } catch (exception) {
+                throw new Error("Invalid JSON in the ChatOpenAI's BaseOptions: " + exception)
+            }
+        }
        const model = new ChatOpenAI(obj, {
-            basePath
+            basePath,
+            baseOptions: parsedBaseOptions
        })
        return model
    }
@@ -64,7 +64,7 @@ class Cheerio_DocumentLoaders implements INode {
                additionalParams: true,
                description:
                    'Only used when "Get Relative Links Method" is selected. Set 0 to retrieve all relative links, default limit is 10.',
-                warning: `Retreiving all links might take long time, and all links will be upserted again if the flow's state changed (eg: different URL, chunk size, etc)`
+                warning: `Retrieving all links might take long time, and all links will be upserted again if the flow's state changed (eg: different URL, chunk size, etc)`
            },
            {
                label: 'Metadata',
@@ -61,7 +61,40 @@ class Folder_DocumentLoaders implements INode {
            '.csv': (path) => new CSVLoader(path),
            '.docx': (path) => new DocxLoader(path),
            // @ts-ignore
-            '.pdf': (path) => new PDFLoader(path, { pdfjs: () => import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js') })
+            '.pdf': (path) => new PDFLoader(path, { pdfjs: () => import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js') }),
+            '.aspx': (path) => new TextLoader(path),
+            '.asp': (path) => new TextLoader(path),
+            '.cpp': (path) => new TextLoader(path), // C++
+            '.c': (path) => new TextLoader(path),
+            '.cs': (path) => new TextLoader(path),
+            '.css': (path) => new TextLoader(path),
+            '.go': (path) => new TextLoader(path), // Go
+            '.h': (path) => new TextLoader(path), // C++ Header files
+            '.java': (path) => new TextLoader(path), // Java
+            '.js': (path) => new TextLoader(path), // JavaScript
+            '.less': (path) => new TextLoader(path), // Less files
+            '.ts': (path) => new TextLoader(path), // TypeScript
+            '.php': (path) => new TextLoader(path), // PHP
+            '.proto': (path) => new TextLoader(path), // Protocol Buffers
+            '.python': (path) => new TextLoader(path), // Python
+            '.py': (path) => new TextLoader(path), // Python
+            '.rst': (path) => new TextLoader(path), // reStructuredText
+            '.ruby': (path) => new TextLoader(path), // Ruby
+            '.rb': (path) => new TextLoader(path), // Ruby
+            '.rs': (path) => new TextLoader(path), // Rust
+            '.scala': (path) => new TextLoader(path), // Scala
+            '.sc': (path) => new TextLoader(path), // Scala
+            '.scss': (path) => new TextLoader(path), // Sass
+            '.sol': (path) => new TextLoader(path), // Solidity
+            '.sql': (path) => new TextLoader(path), //SQL
+            '.swift': (path) => new TextLoader(path), // Swift
+            '.markdown': (path) => new TextLoader(path), // Markdown
+            '.md': (path) => new TextLoader(path), // Markdown
+            '.tex': (path) => new TextLoader(path), // LaTeX
+            '.ltx': (path) => new TextLoader(path), // LaTeX
+            '.html': (path) => new TextLoader(path), // HTML
+            '.vb': (path) => new TextLoader(path), // Visual Basic
+            '.xml': (path) => new TextLoader(path) // XML
        })
        let docs = []

@@ -1,6 +1,6 @@
 import { INode, INodeData, INodeParams } from '../../../src/Interface'
 import { TextSplitter } from 'langchain/text_splitter'
-import { PlaywrightWebBaseLoader } from 'langchain/document_loaders/web/playwright'
+import { Browser, Page, PlaywrightWebBaseLoader, PlaywrightWebBaseLoaderOptions } from 'langchain/document_loaders/web/playwright'
 import { test } from 'linkifyjs'
 import { webCrawl, xmlScrape } from '../../../src'

@@ -64,7 +64,45 @@ class Playwright_DocumentLoaders implements INode {
                additionalParams: true,
                description:
                    'Only used when "Get Relative Links Method" is selected. Set 0 to retrieve all relative links, default limit is 10.',
-                warning: `Retreiving all links might take long time, and all links will be upserted again if the flow's state changed (eg: different URL, chunk size, etc)`
+                warning: `Retrieving all links might take long time, and all links will be upserted again if the flow's state changed (eg: different URL, chunk size, etc)`
+            },
+            {
+                label: 'Wait Until',
+                name: 'waitUntilGoToOption',
+                type: 'options',
+                description: 'Select a go to wait until option',
+                options: [
+                    {
+                        label: 'Load',
+                        name: 'load',
+                        description: 'Consider operation to be finished when the load event is fired.'
+                    },
+                    {
+                        label: 'DOM Content Loaded',
+                        name: 'domcontentloaded',
+                        description: 'Consider operation to be finished when the DOMContentLoaded event is fired.'
+                    },
+                    {
+                        label: 'Network Idle',
+                        name: 'networkidle',
+                        description: 'Navigation is finished when there are no more connections for at least 500 ms.'
+                    },
+                    {
+                        label: 'Commit',
+                        name: 'commit',
+                        description: 'Consider operation to be finished when network response is received and the document started loading.'
+                    }
+                ],
+                optional: true,
+                additionalParams: true
+            },
+            {
+                label: 'Wait for selector to load',
+                name: 'waitForSelector',
+                type: 'string',
+                optional: true,
+                additionalParams: true,
+                description: 'CSS selectors like .div or #div'
            },
            {
                label: 'Metadata',
@@ -81,6 +119,8 @@ class Playwright_DocumentLoaders implements INode {
        const metadata = nodeData.inputs?.metadata
        const relativeLinksMethod = nodeData.inputs?.relativeLinksMethod as string
        let limit = nodeData.inputs?.limit as string
+        let waitUntilGoToOption = nodeData.inputs?.waitUntilGoToOption as 'load' | 'domcontentloaded' | 'networkidle' | 'commit' | undefined
+        let waitForSelector = nodeData.inputs?.waitForSelector as string

        let url = nodeData.inputs?.url as string
        url = url.trim()
@@ -91,7 +131,26 @@ class Playwright_DocumentLoaders implements INode {
        async function playwrightLoader(url: string): Promise<any> {
            try {
                let docs = []
-                const loader = new PlaywrightWebBaseLoader(url)
+                const config: PlaywrightWebBaseLoaderOptions = {
+                    launchOptions: {
+                        args: ['--no-sandbox'],
+                        headless: true
+                    }
+                }
+                if (waitUntilGoToOption) {
+                    config['gotoOptions'] = {
+                        waitUntil: waitUntilGoToOption
+                    }
+                }
+                if (waitForSelector) {
+                    config['evaluate'] = async (page: Page, _: Browser): Promise<string> => {
+                        await page.waitForSelector(waitForSelector)
+
+                        const result = await page.evaluate(() => document.body.innerHTML)
+                        return result
+                    }
+                }
+                const loader = new PlaywrightWebBaseLoader(url, config)
                if (textSplitter) {
                    docs = await loader.loadAndSplit(textSplitter)
                } else {
@@ -1,8 +1,9 @@
 import { INode, INodeData, INodeParams } from '../../../src/Interface'
 import { TextSplitter } from 'langchain/text_splitter'
-import { PuppeteerWebBaseLoader } from 'langchain/document_loaders/web/puppeteer'
+import { Browser, Page, PuppeteerWebBaseLoader, PuppeteerWebBaseLoaderOptions } from 'langchain/document_loaders/web/puppeteer'
 import { test } from 'linkifyjs'
 import { webCrawl, xmlScrape } from '../../../src'
+import { PuppeteerLifeCycleEvent } from 'puppeteer'

 class Puppeteer_DocumentLoaders implements INode {
    label: string
@@ -64,7 +65,45 @@ class Puppeteer_DocumentLoaders implements INode {
                additionalParams: true,
                description:
                    'Only used when "Get Relative Links Method" is selected. Set 0 to retrieve all relative links, default limit is 10.',
-                warning: `Retreiving all links might take long time, and all links will be upserted again if the flow's state changed (eg: different URL, chunk size, etc)`
+                warning: `Retrieving all links might take long time, and all links will be upserted again if the flow's state changed (eg: different URL, chunk size, etc)`
+            },
+            {
+                label: 'Wait Until',
+                name: 'waitUntilGoToOption',
+                type: 'options',
+                description: 'Select a go to wait until option',
+                options: [
+                    {
+                        label: 'Load',
+                        name: 'load',
+                        description: `When the initial HTML document's DOM has been loaded and parsed`
+                    },
+                    {
+                        label: 'DOM Content Loaded',
+                        name: 'domcontentloaded',
+                        description: `When the complete HTML document's DOM has been loaded and parsed`
+                    },
+                    {
+                        label: 'Network Idle 0',
+                        name: 'networkidle0',
+                        description: 'Navigation is finished when there are no more than 0 network connections for at least 500 ms'
+                    },
+                    {
+                        label: 'Network Idle 2',
+                        name: 'networkidle2',
+                        description: 'Navigation is finished when there are no more than 2 network connections for at least 500 ms'
+                    }
+                ],
+                optional: true,
+                additionalParams: true
+            },
+            {
+                label: 'Wait for selector to load',
+                name: 'waitForSelector',
+                type: 'string',
+                optional: true,
+                additionalParams: true,
+                description: 'CSS selectors like .div or #div'
            },
            {
                label: 'Metadata',
@@ -81,6 +120,8 @@ class Puppeteer_DocumentLoaders implements INode {
        const metadata = nodeData.inputs?.metadata
        const relativeLinksMethod = nodeData.inputs?.relativeLinksMethod as string
        let limit = nodeData.inputs?.limit as string
+        let waitUntilGoToOption = nodeData.inputs?.waitUntilGoToOption as PuppeteerLifeCycleEvent
+        let waitForSelector = nodeData.inputs?.waitForSelector as string

        let url = nodeData.inputs?.url as string
        url = url.trim()
@@ -91,12 +132,26 @@ class Puppeteer_DocumentLoaders implements INode {
        async function puppeteerLoader(url: string): Promise<any> {
            try {
                let docs = []
-                const loader = new PuppeteerWebBaseLoader(url, {
+                const config: PuppeteerWebBaseLoaderOptions = {
                    launchOptions: {
                        args: ['--no-sandbox'],
                        headless: 'new'
                    }
-                })
+                }
+                if (waitUntilGoToOption) {
+                    config['gotoOptions'] = {
+                        waitUntil: waitUntilGoToOption
+                    }
+                }
+                if (waitForSelector) {
+                    config['evaluate'] = async (page: Page, _: Browser): Promise<string> => {
+                        await page.waitForSelector(waitForSelector)
+
+                        const result = await page.evaluate(() => document.body.innerHTML)
+                        return result
+                    }
+                }
+                const loader = new PuppeteerWebBaseLoader(url, config)
                if (textSplitter) {
                    docs = await loader.loadAndSplit(textSplitter)
                } else {
@@ -1,6 +1,6 @@
 import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
 import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../src/utils'
-import { VectaraStore, VectaraLibArgs, VectaraFilter } from 'langchain/vectorstores/vectara'
+import { VectaraStore, VectaraLibArgs, VectaraFilter, VectaraContextConfig } from 'langchain/vectorstores/vectara'

 class VectaraExisting_VectorStores implements INode {
    label: string
@@ -40,9 +40,27 @@ class VectaraExisting_VectorStores implements INode {
                additionalParams: true,
                optional: true
            },
+            {
+                label: 'Sentences Before',
+                name: 'sentencesBefore',
+                description: 'Number of sentences to fetch before the matched sentence. Defaults to 2.',
+                type: 'number',
+                additionalParams: true,
+                optional: true
+            },
+            {
+                label: 'Sentences After',
+                name: 'sentencesAfter',
+                description: 'Number of sentences to fetch after the matched sentence. Defaults to 2.',
+                type: 'number',
+                additionalParams: true,
+                optional: true
+            },
            {
                label: 'Lambda',
                name: 'lambda',
+                description:
+                    'Improves retrieval accuracy by adjusting the balance (from 0 to 1) between neural search and keyword-based search factors.',
                type: 'number',
                additionalParams: true,
                optional: true
@@ -77,6 +95,8 @@ class VectaraExisting_VectorStores implements INode {
        const corpusId = getCredentialParam('corpusID', credentialData, nodeData)

        const vectaraMetadataFilter = nodeData.inputs?.filter as string
+        const sentencesBefore = nodeData.inputs?.sentencesBefore as number
+        const sentencesAfter = nodeData.inputs?.sentencesAfter as number
        const lambda = nodeData.inputs?.lambda as number
        const output = nodeData.outputs?.output as string
        const topK = nodeData.inputs?.topK as string
@@ -92,6 +112,11 @@ class VectaraExisting_VectorStores implements INode {
        if (vectaraMetadataFilter) vectaraFilter.filter = vectaraMetadataFilter
        if (lambda) vectaraFilter.lambda = lambda

+        const vectaraContextConfig: VectaraContextConfig = {}
+        if (sentencesBefore) vectaraContextConfig.sentencesBefore = sentencesBefore
+        if (sentencesAfter) vectaraContextConfig.sentencesAfter = sentencesAfter
+        vectaraFilter.contextConfig = vectaraContextConfig
+
        const vectorStore = new VectaraStore(vectaraArgs)

        if (output === 'retriever') {
@@ -1,7 +1,7 @@
 import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
 import { Embeddings } from 'langchain/embeddings/base'
 import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../src/utils'
-import { VectaraStore, VectaraLibArgs, VectaraFilter } from 'langchain/vectorstores/vectara'
+import { VectaraStore, VectaraLibArgs, VectaraFilter, VectaraContextConfig } from 'langchain/vectorstores/vectara'
 import { Document } from 'langchain/document'
 import { flatten } from 'lodash'

@@ -49,9 +49,27 @@ class VectaraUpsert_VectorStores implements INode {
                additionalParams: true,
                optional: true
            },
+            {
+                label: 'Sentences Before',
+                name: 'sentencesBefore',
+                description: 'Number of sentences to fetch before the matched sentence. Defaults to 2.',
+                type: 'number',
+                additionalParams: true,
+                optional: true
+            },
+            {
+                label: 'Sentences After',
+                name: 'sentencesAfter',
+                description: 'Number of sentences to fetch after the matched sentence. Defaults to 2.',
+                type: 'number',
+                additionalParams: true,
+                optional: true
+            },
            {
                label: 'Lambda',
                name: 'lambda',
+                description:
+                    'Improves retrieval accuracy by adjusting the balance (from 0 to 1) between neural search and keyword-based search factors.',
                type: 'number',
                additionalParams: true,
                optional: true
@@ -88,6 +106,8 @@ class VectaraUpsert_VectorStores implements INode {
        const docs = nodeData.inputs?.document as Document[]
        const embeddings = {} as Embeddings
        const vectaraMetadataFilter = nodeData.inputs?.filter as string
+        const sentencesBefore = nodeData.inputs?.sentencesBefore as number
+        const sentencesAfter = nodeData.inputs?.sentencesAfter as number
        const lambda = nodeData.inputs?.lambda as number
        const output = nodeData.outputs?.output as string
        const topK = nodeData.inputs?.topK as string
@@ -103,6 +123,11 @@ class VectaraUpsert_VectorStores implements INode {
        if (vectaraMetadataFilter) vectaraFilter.filter = vectaraMetadataFilter
        if (lambda) vectaraFilter.lambda = lambda

+        const vectaraContextConfig: VectaraContextConfig = {}
+        if (sentencesBefore) vectaraContextConfig.sentencesBefore = sentencesBefore
+        if (sentencesAfter) vectaraContextConfig.sentencesAfter = sentencesAfter
+        vectaraFilter.contextConfig = vectaraContextConfig
+
        const flattenDocs = docs && docs.length ? flatten(docs) : []
        const finalDocs = []
        for (let i = 0; i < flattenDocs.length; i += 1) {