From 177e7f5c0fa83ecbae2509f09c3101cb20381c27 Mon Sep 17 00:00:00 2001 From: rkeshwani Date: Fri, 11 Aug 2023 00:20:04 +0000 Subject: [PATCH 01/15] Add additional optional input parameter for adding additional file loaders. --- .../nodes/documentloaders/Folder/Folder.ts | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/packages/components/nodes/documentloaders/Folder/Folder.ts b/packages/components/nodes/documentloaders/Folder/Folder.ts index 83bffd18..4ffdce91 100644 --- a/packages/components/nodes/documentloaders/Folder/Folder.ts +++ b/packages/components/nodes/documentloaders/Folder/Folder.ts @@ -46,6 +46,13 @@ class Folder_DocumentLoaders implements INode { type: 'json', optional: true, additionalParams: true + }, + { + label: 'Additional File Loaders', + name: 'additionalLoaders', + type: 'json', + optional: true, + additionalParams: true } ] } @@ -54,6 +61,8 @@ class Folder_DocumentLoaders implements INode { const textSplitter = nodeData.inputs?.textSplitter as TextSplitter const folderPath = nodeData.inputs?.folderPath as string const metadata = nodeData.inputs?.metadata + const additionalLoaders = nodeData.inputs?.additionalLoaders + const parsedLoaders = additionalLoaders ? ( typeof metadata === 'object' ? additionalLoaders: JSON.parse( additionalLoaders ) ) : [] const loader = new DirectoryLoader(folderPath, { '.json': (path) => new JSONLoader(path), @@ -61,7 +70,8 @@ class Folder_DocumentLoaders implements INode { '.csv': (path) => new CSVLoader(path), '.docx': (path) => new DocxLoader(path), // @ts-ignore - '.pdf': (path) => new PDFLoader(path, { pdfjs: () => import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js') }) + '.pdf': (path) => new PDFLoader(path, { pdfjs: () => import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js') }), + ...parsedLoaders }) let docs = [] From ff7ee41758dd4fc24893cadb8c7551706f111529 Mon Sep 17 00:00:00 2001 From: Lorenzo Date: Mon, 14 Aug 2023 00:31:07 +0200 Subject: [PATCH 02/15] Added postgres, cockroachdb, mssql, mysql, mariadb, mongodb and oracle to SqlDatabaseChain_Chains --- .../SqlDatabaseChain/SqlDatabaseChain.ts | 68 +++++++++++++++---- 1 file changed, 53 insertions(+), 15 deletions(-) diff --git a/packages/components/nodes/chains/SqlDatabaseChain/SqlDatabaseChain.ts b/packages/components/nodes/chains/SqlDatabaseChain/SqlDatabaseChain.ts index 9416371b..6bfc2f8a 100644 --- a/packages/components/nodes/chains/SqlDatabaseChain/SqlDatabaseChain.ts +++ b/packages/components/nodes/chains/SqlDatabaseChain/SqlDatabaseChain.ts @@ -5,6 +5,9 @@ import { DataSource } from 'typeorm' import { SqlDatabase } from 'langchain/sql_db' import { BaseLanguageModel } from 'langchain/base_language' import { ConsoleCallbackHandler, CustomChainHandler } from '../../../src/handler' +import { DataSourceOptions } from 'typeorm/data-source' + +type DatabaseType = 'sqlite' | 'postgres' | 'cockroachdb' | 'mssql' | 'mysql' | 'mariadb' | 'mongodb' | 'oracle' class SqlDatabaseChain_Chains implements INode { label: string @@ -38,36 +41,64 @@ class SqlDatabaseChain_Chains implements INode { type: 'options', options: [ { - label: 'SQlite', + label: 'SQLite', name: 'sqlite' + }, + { + label: 'PostgreSQL', + name: 'postgres' + }, + { + label: 'CockroachDB', + name: 'cockroachdb' + }, + { + label: 'MSSQL', + name: 'mssql' + }, + { + label: 'MySQL', + name: 'mysql' + }, + { + label: 'MariaDB', + name: 'mariadb' + }, + { + label: 'MongoDB', + name: 'mongodb' + }, + { + label: 'Oracle', + name: 'oracle' } ], default: 'sqlite' }, { - label: 'Database File Path', - name: 'dbFilePath', + label: 'Connection string or file path (sqlite only)', + name: 'url', type: 'string', - placeholder: 'C:/Users/chinook.db' + placeholder: '1270.0.0.1:5432/chinook' } ] } async init(nodeData: INodeData): Promise { - const databaseType = nodeData.inputs?.database as 'sqlite' + const databaseType = nodeData.inputs?.database as DatabaseType const model = nodeData.inputs?.model as BaseLanguageModel - const dbFilePath = nodeData.inputs?.dbFilePath + const url = nodeData.inputs?.url - const chain = await getSQLDBChain(databaseType, dbFilePath, model) + const chain = await getSQLDBChain(databaseType, url, model) return chain } async run(nodeData: INodeData, input: string, options: ICommonObject): Promise { - const databaseType = nodeData.inputs?.database as 'sqlite' + const databaseType = nodeData.inputs?.database as DatabaseType const model = nodeData.inputs?.model as BaseLanguageModel - const dbFilePath = nodeData.inputs?.dbFilePath + const url = nodeData.inputs?.url - const chain = await getSQLDBChain(databaseType, dbFilePath, model) + const chain = await getSQLDBChain(databaseType, url, model) const loggerHandler = new ConsoleCallbackHandler(options.logger) if (options.socketIO && options.socketIOClientId) { @@ -81,11 +112,18 @@ class SqlDatabaseChain_Chains implements INode { } } -const getSQLDBChain = async (databaseType: 'sqlite', dbFilePath: string, llm: BaseLanguageModel) => { - const datasource = new DataSource({ - type: databaseType, - database: dbFilePath - }) +const getSQLDBChain = async (databaseType: DatabaseType, url: string, llm: BaseLanguageModel) => { + const datasource = new DataSource( + databaseType === 'sqlite' + ? { + type: databaseType, + database: url + } + : ({ + type: databaseType, + url: url + } as DataSourceOptions) + ) const db = await SqlDatabase.fromDataSourceParams({ appDataSource: datasource From 1e90db79890b53bb1b81747acb3021d87c917765 Mon Sep 17 00:00:00 2001 From: Lorenzo Date: Mon, 14 Aug 2023 16:18:10 +0200 Subject: [PATCH 03/15] Remove unsupported databases --- .../SqlDatabaseChain/SqlDatabaseChain.ts | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/packages/components/nodes/chains/SqlDatabaseChain/SqlDatabaseChain.ts b/packages/components/nodes/chains/SqlDatabaseChain/SqlDatabaseChain.ts index 6bfc2f8a..2a0c71cf 100644 --- a/packages/components/nodes/chains/SqlDatabaseChain/SqlDatabaseChain.ts +++ b/packages/components/nodes/chains/SqlDatabaseChain/SqlDatabaseChain.ts @@ -7,7 +7,7 @@ import { BaseLanguageModel } from 'langchain/base_language' import { ConsoleCallbackHandler, CustomChainHandler } from '../../../src/handler' import { DataSourceOptions } from 'typeorm/data-source' -type DatabaseType = 'sqlite' | 'postgres' | 'cockroachdb' | 'mssql' | 'mysql' | 'mariadb' | 'mongodb' | 'oracle' +type DatabaseType = 'sqlite' | 'postgres' | 'mssql' | 'mysql' class SqlDatabaseChain_Chains implements INode { label: string @@ -48,10 +48,6 @@ class SqlDatabaseChain_Chains implements INode { label: 'PostgreSQL', name: 'postgres' }, - { - label: 'CockroachDB', - name: 'cockroachdb' - }, { label: 'MSSQL', name: 'mssql' @@ -59,18 +55,6 @@ class SqlDatabaseChain_Chains implements INode { { label: 'MySQL', name: 'mysql' - }, - { - label: 'MariaDB', - name: 'mariadb' - }, - { - label: 'MongoDB', - name: 'mongodb' - }, - { - label: 'Oracle', - name: 'oracle' } ], default: 'sqlite' From f6933b592d934fc2530ddd621066c0a7a30fea3c Mon Sep 17 00:00:00 2001 From: rkeshwani Date: Tue, 15 Aug 2023 00:13:38 +0000 Subject: [PATCH 04/15] Added additional file extensions and removed abstracted inputs. --- .../nodes/documentloaders/Folder/Folder.ts | 43 ++++++++++++++----- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/packages/components/nodes/documentloaders/Folder/Folder.ts b/packages/components/nodes/documentloaders/Folder/Folder.ts index 4ffdce91..7b90d9ed 100644 --- a/packages/components/nodes/documentloaders/Folder/Folder.ts +++ b/packages/components/nodes/documentloaders/Folder/Folder.ts @@ -47,13 +47,6 @@ class Folder_DocumentLoaders implements INode { optional: true, additionalParams: true }, - { - label: 'Additional File Loaders', - name: 'additionalLoaders', - type: 'json', - optional: true, - additionalParams: true - } ] } @@ -61,8 +54,6 @@ class Folder_DocumentLoaders implements INode { const textSplitter = nodeData.inputs?.textSplitter as TextSplitter const folderPath = nodeData.inputs?.folderPath as string const metadata = nodeData.inputs?.metadata - const additionalLoaders = nodeData.inputs?.additionalLoaders - const parsedLoaders = additionalLoaders ? ( typeof metadata === 'object' ? additionalLoaders: JSON.parse( additionalLoaders ) ) : [] const loader = new DirectoryLoader(folderPath, { '.json': (path) => new JSONLoader(path), @@ -71,7 +62,39 @@ class Folder_DocumentLoaders implements INode { '.docx': (path) => new DocxLoader(path), // @ts-ignore '.pdf': (path) => new PDFLoader(path, { pdfjs: () => import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js') }), - ...parsedLoaders + '.aspx': (path) => new TextLoader(path), + '.asp': (path) => new TextLoader(path), + '.cpp': (path) => new TextLoader(path), // C++ + '.c': (path) => new TextLoader(path), + '.cs': (path) => new TextLoader(path), + '.css': (path) => new TextLoader(path), + '.go': (path) => new TextLoader(path), // Go + '.h': (path) => new TextLoader(path), // C++ Header files + '.java': (path) => new TextLoader(path), // Java + '.js': (path) => new TextLoader(path), // JavaScript + '.less': (path) => new TextLoader(path), // Less files + '.ts': (path) => new TextLoader(path), // TypeScript + '.php': (path) => new TextLoader(path), // PHP + '.proto': (path) => new TextLoader(path), // Protocol Buffers + '.python': (path) => new TextLoader(path), // Python + '.py': (path) => new TextLoader(path), // Python + '.rst': (path) => new TextLoader(path), // reStructuredText + '.ruby': (path) => new TextLoader(path), // Ruby + '.rb': (path) => new TextLoader(path), // Ruby + '.rs': (path) => new TextLoader(path), // Rust + '.scala': (path) => new TextLoader(path), // Scala + '.sc': (path) => new TextLoader(path), // Scala + '.scss': (path) => new TextLoader(path),// Sass + '.sol': (path) => new TextLoader(path), // Solidity + '.sql': (path) => new TextLoader(path),//SQL + '.swift': (path) => new TextLoader(path), // Swift + '.markdown': (path) => new TextLoader(path), // Markdown + '.md': (path) => new TextLoader(path), // Markdown + '.tex': (path) => new TextLoader(path), // LaTeX + '.ltx': (path) => new TextLoader(path), // LaTeX + '.html': (path) => new TextLoader(path), // HTML + '.vb': (path) => new TextLoader(path), // Visual Basic + '.xml': (path) => new TextLoader(path)// XML }) let docs = [] From d37bc6acf6a5936a892a5810db6093c726fcbb7a Mon Sep 17 00:00:00 2001 From: Henry Date: Tue, 15 Aug 2023 17:20:53 +0100 Subject: [PATCH 05/15] fix conversation retrieval qa agent original prompt --- .../ConversationalRetrievalAgent.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/packages/components/nodes/agents/ConversationalRetrievalAgent/ConversationalRetrievalAgent.ts b/packages/components/nodes/agents/ConversationalRetrievalAgent/ConversationalRetrievalAgent.ts index ed39fbc8..c0cef052 100644 --- a/packages/components/nodes/agents/ConversationalRetrievalAgent/ConversationalRetrievalAgent.ts +++ b/packages/components/nodes/agents/ConversationalRetrievalAgent/ConversationalRetrievalAgent.ts @@ -5,6 +5,8 @@ import { flatten } from 'lodash' import { BaseChatMemory } from 'langchain/memory' import { ConsoleCallbackHandler, CustomChainHandler } from '../../../src/handler' +const defaultMessage = `Do your best to answer the questions. Feel free to use any tools available to look up relevant information, only if necessary.` + class ConversationalRetrievalAgent_Agents implements INode { label: string name: string @@ -46,6 +48,7 @@ class ConversationalRetrievalAgent_Agents implements INode { label: 'System Message', name: 'systemMessage', type: 'string', + default: defaultMessage, rows: 4, optional: true, additionalParams: true @@ -65,7 +68,7 @@ class ConversationalRetrievalAgent_Agents implements INode { agentType: 'openai-functions', verbose: process.env.DEBUG === 'true' ? true : false, agentArgs: { - prefix: systemMessage ?? `You are a helpful AI assistant.` + prefix: systemMessage ?? defaultMessage }, returnIntermediateSteps: true }) From f80547af60f9d5a54b2e916b83888abf4e016004 Mon Sep 17 00:00:00 2001 From: Seif Date: Tue, 15 Aug 2023 11:45:31 -0700 Subject: [PATCH 06/15] Add sentence config to Flowise --- .../Vectara_Existing/Vectara_Existing.ts | 23 ++++++++++++++++++- .../Vectara_Upsert/Vectara_Upsert.ts | 23 ++++++++++++++++++- packages/components/package.json | 2 +- 3 files changed, 45 insertions(+), 3 deletions(-) diff --git a/packages/components/nodes/vectorstores/Vectara_Existing/Vectara_Existing.ts b/packages/components/nodes/vectorstores/Vectara_Existing/Vectara_Existing.ts index f344338a..80fd0639 100644 --- a/packages/components/nodes/vectorstores/Vectara_Existing/Vectara_Existing.ts +++ b/packages/components/nodes/vectorstores/Vectara_Existing/Vectara_Existing.ts @@ -1,6 +1,6 @@ import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface' import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../src/utils' -import { VectaraStore, VectaraLibArgs, VectaraFilter } from 'langchain/vectorstores/vectara' +import { VectaraStore, VectaraLibArgs, VectaraFilter, VectaraContextConfig } from 'langchain/vectorstores/vectara' class VectaraExisting_VectorStores implements INode { label: string @@ -40,6 +40,20 @@ class VectaraExisting_VectorStores implements INode { additionalParams: true, optional: true }, + { + label: 'Sentences Before', + name: 'sentencesBefore', + type: 'number', + additionalParams: true, + optional: true + }, + { + label: 'Sentences After', + name: 'sentencesAfter', + type: 'number', + additionalParams: true, + optional: true + }, { label: 'Lambda', name: 'lambda', @@ -77,6 +91,8 @@ class VectaraExisting_VectorStores implements INode { const corpusId = getCredentialParam('corpusID', credentialData, nodeData) const vectaraMetadataFilter = nodeData.inputs?.filter as string + const sentencesBefore = nodeData.inputs?.sentencesBefore as number + const sentencesAfter = nodeData.inputs?.sentencesAfter as number const lambda = nodeData.inputs?.lambda as number const output = nodeData.outputs?.output as string const topK = nodeData.inputs?.topK as string @@ -92,6 +108,11 @@ class VectaraExisting_VectorStores implements INode { if (vectaraMetadataFilter) vectaraFilter.filter = vectaraMetadataFilter if (lambda) vectaraFilter.lambda = lambda + const vectaraContextConfig: VectaraContextConfig = {} + if (sentencesBefore) vectaraContextConfig.sentencesBefore = sentencesBefore + if (sentencesAfter) vectaraContextConfig.sentencesAfter = sentencesAfter + vectaraFilter.contextConfig = vectaraContextConfig + const vectorStore = new VectaraStore(vectaraArgs) if (output === 'retriever') { diff --git a/packages/components/nodes/vectorstores/Vectara_Upsert/Vectara_Upsert.ts b/packages/components/nodes/vectorstores/Vectara_Upsert/Vectara_Upsert.ts index b2ee79e7..cda03240 100644 --- a/packages/components/nodes/vectorstores/Vectara_Upsert/Vectara_Upsert.ts +++ b/packages/components/nodes/vectorstores/Vectara_Upsert/Vectara_Upsert.ts @@ -1,7 +1,7 @@ import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface' import { Embeddings } from 'langchain/embeddings/base' import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../src/utils' -import { VectaraStore, VectaraLibArgs, VectaraFilter } from 'langchain/vectorstores/vectara' +import { VectaraStore, VectaraLibArgs, VectaraFilter, VectaraContextConfig } from 'langchain/vectorstores/vectara' import { Document } from 'langchain/document' import { flatten } from 'lodash' @@ -49,6 +49,20 @@ class VectaraUpsert_VectorStores implements INode { additionalParams: true, optional: true }, + { + label: 'Sentences Before', + name: 'sentencesBefore', + type: 'number', + additionalParams: true, + optional: true + }, + { + label: 'Sentences After', + name: 'sentencesAfter', + type: 'number', + additionalParams: true, + optional: true + }, { label: 'Lambda', name: 'lambda', @@ -88,6 +102,8 @@ class VectaraUpsert_VectorStores implements INode { const docs = nodeData.inputs?.document as Document[] const embeddings = {} as Embeddings const vectaraMetadataFilter = nodeData.inputs?.filter as string + const sentencesBefore = nodeData.inputs?.sentencesBefore as number + const sentencesAfter = nodeData.inputs?.sentencesAfter as number const lambda = nodeData.inputs?.lambda as number const output = nodeData.outputs?.output as string const topK = nodeData.inputs?.topK as string @@ -103,6 +119,11 @@ class VectaraUpsert_VectorStores implements INode { if (vectaraMetadataFilter) vectaraFilter.filter = vectaraMetadataFilter if (lambda) vectaraFilter.lambda = lambda + const vectaraContextConfig: VectaraContextConfig = {} + if (sentencesBefore) vectaraContextConfig.sentencesBefore = sentencesBefore + if (sentencesAfter) vectaraContextConfig.sentencesAfter = sentencesAfter + vectaraFilter.contextConfig = vectaraContextConfig + const flattenDocs = docs && docs.length ? flatten(docs) : [] const finalDocs = [] for (let i = 0; i < flattenDocs.length; i += 1) { diff --git a/packages/components/package.json b/packages/components/package.json index bad9fb74..da4d0971 100644 --- a/packages/components/package.json +++ b/packages/components/package.json @@ -40,7 +40,7 @@ "google-auth-library": "^9.0.0", "graphql": "^16.6.0", "html-to-text": "^9.0.5", - "langchain": "^0.0.122", + "langchain": "^0.0.126", "linkifyjs": "^4.1.1", "mammoth": "^1.5.1", "moment": "^2.29.3", From 2e8bbaeab1fb1e8bedab6afe77272eac126a1d94 Mon Sep 17 00:00:00 2001 From: Seif Date: Tue, 15 Aug 2023 11:50:37 -0700 Subject: [PATCH 07/15] Add descriptions --- .../nodes/vectorstores/Vectara_Existing/Vectara_Existing.ts | 4 ++++ .../nodes/vectorstores/Vectara_Upsert/Vectara_Upsert.ts | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/packages/components/nodes/vectorstores/Vectara_Existing/Vectara_Existing.ts b/packages/components/nodes/vectorstores/Vectara_Existing/Vectara_Existing.ts index 80fd0639..3ef04f07 100644 --- a/packages/components/nodes/vectorstores/Vectara_Existing/Vectara_Existing.ts +++ b/packages/components/nodes/vectorstores/Vectara_Existing/Vectara_Existing.ts @@ -43,6 +43,7 @@ class VectaraExisting_VectorStores implements INode { { label: 'Sentences Before', name: 'sentencesBefore', + description: 'Number of sentences to fetch before the matched sentence. Defaults to 2.', type: 'number', additionalParams: true, optional: true @@ -50,6 +51,7 @@ class VectaraExisting_VectorStores implements INode { { label: 'Sentences After', name: 'sentencesAfter', + description: 'Number of sentences to fetch after the matched sentence. Defaults to 2.', type: 'number', additionalParams: true, optional: true @@ -57,6 +59,8 @@ class VectaraExisting_VectorStores implements INode { { label: 'Lambda', name: 'lambda', + description: + 'Improves retrieval accuracy by adjusting the balance (from 0 to 1) between neural search and keyword-based search factors.', type: 'number', additionalParams: true, optional: true diff --git a/packages/components/nodes/vectorstores/Vectara_Upsert/Vectara_Upsert.ts b/packages/components/nodes/vectorstores/Vectara_Upsert/Vectara_Upsert.ts index cda03240..51fb67ed 100644 --- a/packages/components/nodes/vectorstores/Vectara_Upsert/Vectara_Upsert.ts +++ b/packages/components/nodes/vectorstores/Vectara_Upsert/Vectara_Upsert.ts @@ -52,6 +52,7 @@ class VectaraUpsert_VectorStores implements INode { { label: 'Sentences Before', name: 'sentencesBefore', + description: 'Number of sentences to fetch before the matched sentence. Defaults to 2.', type: 'number', additionalParams: true, optional: true @@ -59,6 +60,7 @@ class VectaraUpsert_VectorStores implements INode { { label: 'Sentences After', name: 'sentencesAfter', + description: 'Number of sentences to fetch after the matched sentence. Defaults to 2.', type: 'number', additionalParams: true, optional: true @@ -66,6 +68,8 @@ class VectaraUpsert_VectorStores implements INode { { label: 'Lambda', name: 'lambda', + description: + 'Improves retrieval accuracy by adjusting the balance (from 0 to 1) between neural search and keyword-based search factors.', type: 'number', additionalParams: true, optional: true From d10f3800e6a66cf201f63f993865bcb5fb3f96fa Mon Sep 17 00:00:00 2001 From: rkeshwani Date: Tue, 15 Aug 2023 23:36:50 +0000 Subject: [PATCH 08/15] Fixed spaces and comma issue. --- .../components/nodes/documentloaders/Folder/Folder.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/components/nodes/documentloaders/Folder/Folder.ts b/packages/components/nodes/documentloaders/Folder/Folder.ts index 7b90d9ed..f5d0c640 100644 --- a/packages/components/nodes/documentloaders/Folder/Folder.ts +++ b/packages/components/nodes/documentloaders/Folder/Folder.ts @@ -46,7 +46,7 @@ class Folder_DocumentLoaders implements INode { type: 'json', optional: true, additionalParams: true - }, + } ] } @@ -84,9 +84,9 @@ class Folder_DocumentLoaders implements INode { '.rs': (path) => new TextLoader(path), // Rust '.scala': (path) => new TextLoader(path), // Scala '.sc': (path) => new TextLoader(path), // Scala - '.scss': (path) => new TextLoader(path),// Sass + '.scss': (path) => new TextLoader(path), // Sass '.sol': (path) => new TextLoader(path), // Solidity - '.sql': (path) => new TextLoader(path),//SQL + '.sql': (path) => new TextLoader(path), //SQL '.swift': (path) => new TextLoader(path), // Swift '.markdown': (path) => new TextLoader(path), // Markdown '.md': (path) => new TextLoader(path), // Markdown @@ -94,7 +94,7 @@ class Folder_DocumentLoaders implements INode { '.ltx': (path) => new TextLoader(path), // LaTeX '.html': (path) => new TextLoader(path), // HTML '.vb': (path) => new TextLoader(path), // Visual Basic - '.xml': (path) => new TextLoader(path)// XML + '.xml': (path) => new TextLoader(path) // XML }) let docs = [] From 8034076361d82dd12dae8b36f36f7ad27702e88d Mon Sep 17 00:00:00 2001 From: Henry Date: Wed, 16 Aug 2023 02:02:49 +0100 Subject: [PATCH 09/15] update Vectara template --- .../chatflows/Vectara LLM Chain Upload.json | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/packages/server/marketplaces/chatflows/Vectara LLM Chain Upload.json b/packages/server/marketplaces/chatflows/Vectara LLM Chain Upload.json index 2146aa12..0758ec9a 100644 --- a/packages/server/marketplaces/chatflows/Vectara LLM Chain Upload.json +++ b/packages/server/marketplaces/chatflows/Vectara LLM Chain Upload.json @@ -4,14 +4,14 @@ { "width": 300, "height": 408, - "id": "vectaraExisting_0", + "id": "vectaraUpsert_0", "position": { "x": 438, "y": 214 }, "type": "customNode", "data": { - "id": "vectaraExisting_0", + "id": "vectaraUpsert_0", "label": "Vectara Upsert Document", "version": 1, - "name": "vectaraExisting", + "name": "vectaraUpsert", "type": "Vectara", "baseClasses": ["Vectara", "VectorStoreRetriever", "BaseRetriever"], "category": "Vector Stores", @@ -22,7 +22,7 @@ "name": "credential", "type": "credential", "credentialNames": ["vectaraApi"], - "id": "vectaraExisting_0-input-credential-credential" + "id": "vectaraUpsert_0-input-credential-credential" }, { "label": "Filter", @@ -30,7 +30,7 @@ "type": "json", "additionalParams": true, "optional": true, - "id": "vectaraExisting_0-input-filter-json" + "id": "vectaraUpsert_0-input-filter-json" }, { "label": "Lambda", @@ -38,7 +38,7 @@ "type": "number", "additionalParams": true, "optional": true, - "id": "vectaraExisting_0-input-lambda-number" + "id": "vectaraUpsert_0-input-lambda-number" }, { "label": "Top K", @@ -48,7 +48,7 @@ "type": "number", "additionalParams": true, "optional": true, - "id": "vectaraExisting_0-input-topK-number" + "id": "vectaraUpsert_0-input-topK-number" } ], "inputAnchors": [ @@ -57,7 +57,7 @@ "name": "document", "type": "Document", "list": true, - "id": "vectaraExisting_0-input-document-Document" + "id": "vectaraUpsert_0-input-document-Document" } ], "inputs": { @@ -73,13 +73,13 @@ "type": "options", "options": [ { - "id": "vectaraExisting_0-output-retriever-Vectara|VectorStoreRetriever|BaseRetriever", + "id": "vectaraUpsert_0-output-retriever-Vectara|VectorStoreRetriever|BaseRetriever", "name": "retriever", "label": "Vectara Retriever", "type": "Vectara | VectorStoreRetriever | BaseRetriever" }, { - "id": "vectaraExisting_0-output-vectorStore-Vectara|VectorStore", + "id": "vectaraUpsert_0-output-vectorStore-Vectara|VectorStore", "name": "vectorStore", "label": "Vectara Vector Store", "type": "Vectara | VectorStore" @@ -392,7 +392,7 @@ ], "inputs": { "model": "{{chatOpenAI_0.data.instance}}", - "vectorStoreRetriever": "{{vectaraExisting_0.data.instance}}", + "vectorStoreRetriever": "{{vectaraUpsert_0.data.instance}}", "memory": "", "returnSourceDocuments": "", "systemMessagePrompt": "", @@ -418,19 +418,19 @@ { "source": "pdfFile_0", "sourceHandle": "pdfFile_0-output-pdfFile-Document", - "target": "vectaraExisting_0", - "targetHandle": "vectaraExisting_0-input-document-Document", + "target": "vectaraUpsert_0", + "targetHandle": "vectaraUpsert_0-input-document-Document", "type": "buttonedge", - "id": "pdfFile_0-pdfFile_0-output-pdfFile-Document-vectaraExisting_0-vectaraExisting_0-input-document-Document", + "id": "pdfFile_0-pdfFile_0-output-pdfFile-Document-vectaraUpsert_0-vectaraUpsert_0-input-document-Document", "data": { "label": "" } }, { - "source": "vectaraExisting_0", - "sourceHandle": "vectaraExisting_0-output-retriever-Vectara|VectorStoreRetriever|BaseRetriever", + "source": "vectaraUpsert_0", + "sourceHandle": "vectaraUpsert_0-output-retriever-Vectara|VectorStoreRetriever|BaseRetriever", "target": "conversationalRetrievalQAChain_0", "targetHandle": "conversationalRetrievalQAChain_0-input-vectorStoreRetriever-BaseRetriever", "type": "buttonedge", - "id": "vectaraExisting_0-vectaraExisting_0-output-retriever-Vectara|VectorStoreRetriever|BaseRetriever-conversationalRetrievalQAChain_0-conversationalRetrievalQAChain_0-input-vectorStoreRetriever-BaseRetriever", + "id": "vectaraUpsert_0-vectaraUpsert_0-output-retriever-Vectara|VectorStoreRetriever|BaseRetriever-conversationalRetrievalQAChain_0-conversationalRetrievalQAChain_0-input-vectorStoreRetriever-BaseRetriever", "data": { "label": "" } }, { From f0f5585cac26ff7a8252b4761a5da6dc923de2d1 Mon Sep 17 00:00:00 2001 From: Seif Date: Wed, 16 Aug 2023 09:56:14 -0700 Subject: [PATCH 10/15] Update template --- .../chatflows/Vectara LLM Chain Upload.json | 388 +++++++++--------- 1 file changed, 195 insertions(+), 193 deletions(-) diff --git a/packages/server/marketplaces/chatflows/Vectara LLM Chain Upload.json b/packages/server/marketplaces/chatflows/Vectara LLM Chain Upload.json index 0758ec9a..784ad240 100644 --- a/packages/server/marketplaces/chatflows/Vectara LLM Chain Upload.json +++ b/packages/server/marketplaces/chatflows/Vectara LLM Chain Upload.json @@ -1,186 +1,11 @@ { "description": "A simple LLM chain that uses Vectara to enable conversations with uploaded documents", "nodes": [ - { - "width": 300, - "height": 408, - "id": "vectaraUpsert_0", - "position": { "x": 438, "y": 214 }, - "type": "customNode", - "data": { - "id": "vectaraUpsert_0", - "label": "Vectara Upsert Document", - "version": 1, - "name": "vectaraUpsert", - "type": "Vectara", - "baseClasses": ["Vectara", "VectorStoreRetriever", "BaseRetriever"], - "category": "Vector Stores", - "description": "Upsert documents to Vectara", - "inputParams": [ - { - "label": "Connect Credential", - "name": "credential", - "type": "credential", - "credentialNames": ["vectaraApi"], - "id": "vectaraUpsert_0-input-credential-credential" - }, - { - "label": "Filter", - "name": "filter", - "type": "json", - "additionalParams": true, - "optional": true, - "id": "vectaraUpsert_0-input-filter-json" - }, - { - "label": "Lambda", - "name": "lambda", - "type": "number", - "additionalParams": true, - "optional": true, - "id": "vectaraUpsert_0-input-lambda-number" - }, - { - "label": "Top K", - "name": "topK", - "description": "Number of top results to fetch. Defaults to 4", - "placeholder": "4", - "type": "number", - "additionalParams": true, - "optional": true, - "id": "vectaraUpsert_0-input-topK-number" - } - ], - "inputAnchors": [ - { - "label": "Document", - "name": "document", - "type": "Document", - "list": true, - "id": "vectaraUpsert_0-input-document-Document" - } - ], - "inputs": { - "document": ["{{pdfFile_0.data.instance}}"], - "filter": "", - "lambda": "", - "topK": "" - }, - "outputAnchors": [ - { - "name": "output", - "label": "Output", - "type": "options", - "options": [ - { - "id": "vectaraUpsert_0-output-retriever-Vectara|VectorStoreRetriever|BaseRetriever", - "name": "retriever", - "label": "Vectara Retriever", - "type": "Vectara | VectorStoreRetriever | BaseRetriever" - }, - { - "id": "vectaraUpsert_0-output-vectorStore-Vectara|VectorStore", - "name": "vectorStore", - "label": "Vectara Vector Store", - "type": "Vectara | VectorStore" - } - ], - "default": "retriever" - } - ], - "outputs": { "output": "retriever" }, - "selected": false - }, - "selected": false, - "dragging": false, - "positionAbsolute": { "x": 438, "y": 214 } - }, - { - "width": 300, - "height": 509, - "id": "pdfFile_0", - "position": { "x": 68.3013317598369, "y": 199.60454731299677 }, - "type": "customNode", - "data": { - "id": "pdfFile_0", - "label": "Pdf File", - "version": 1, - "name": "pdfFile", - "type": "Document", - "baseClasses": ["Document"], - "category": "Document Loaders", - "description": "Load data from PDF files", - "inputParams": [ - { - "label": "Pdf File", - "name": "pdfFile", - "type": "file", - "fileType": ".pdf", - "id": "pdfFile_0-input-pdfFile-file" - }, - { - "label": "Usage", - "name": "usage", - "type": "options", - "options": [ - { "label": "One document per page", "name": "perPage" }, - { "label": "One document per file", "name": "perFile" } - ], - "default": "perPage", - "id": "pdfFile_0-input-usage-options" - }, - { - "label": "Use Legacy Build", - "name": "legacyBuild", - "type": "boolean", - "optional": true, - "additionalParams": true, - "id": "pdfFile_0-input-legacyBuild-boolean" - }, - { - "label": "Metadata", - "name": "metadata", - "type": "json", - "optional": true, - "additionalParams": true, - "id": "pdfFile_0-input-metadata-json" - } - ], - "inputAnchors": [ - { - "label": "Text Splitter", - "name": "textSplitter", - "type": "TextSplitter", - "optional": true, - "id": "pdfFile_0-input-textSplitter-TextSplitter" - } - ], - "inputs": { - "textSplitter": "", - "usage": "perPage", - "legacyBuild": "", - "metadata": "" - }, - "outputAnchors": [ - { - "id": "pdfFile_0-output-pdfFile-Document", - "name": "pdfFile", - "label": "Document", - "type": "Document" - } - ], - "outputs": {}, - "selected": false - }, - "selected": false, - "positionAbsolute": { "x": 68.3013317598369, "y": 199.60454731299677 }, - "dragging": false - }, { "width": 300, "height": 525, "id": "chatOpenAI_0", - "position": { "x": 804.3889791707068, "y": 195.11620799951592 }, + "position": { "x": 514.1088940275924, "y": 199.574479681537 }, "type": "customNode", "data": { "id": "chatOpenAI_0", @@ -211,10 +36,7 @@ { "label": "gpt-3.5-turbo", "name": "gpt-3.5-turbo" }, { "label": "gpt-3.5-turbo-0613", "name": "gpt-3.5-turbo-0613" }, { "label": "gpt-3.5-turbo-16k", "name": "gpt-3.5-turbo-16k" }, - { - "label": "gpt-3.5-turbo-16k-0613", - "name": "gpt-3.5-turbo-16k-0613" - } + { "label": "gpt-3.5-turbo-16k-0613", "name": "gpt-3.5-turbo-16k-0613" } ], "default": "gpt-3.5-turbo", "optional": true, @@ -286,7 +108,7 @@ "inputAnchors": [], "inputs": { "modelName": "gpt-3.5-turbo", - "temperature": "0.2", + "temperature": "0.5", "maxTokens": "", "topP": "", "frequencyPenalty": "", @@ -306,14 +128,14 @@ "selected": false }, "selected": false, - "positionAbsolute": { "x": 804.3889791707068, "y": 195.11620799951592 }, + "positionAbsolute": { "x": 514.1088940275924, "y": 199.574479681537 }, "dragging": false }, { "width": 300, "height": 481, "id": "conversationalRetrievalQAChain_0", - "position": { "x": 1160.4877473512795, "y": 259.2799138505109 }, + "position": { "x": 900.4793407261002, "y": 205.9476004518217 }, "type": "customNode", "data": { "id": "conversationalRetrievalQAChain_0", @@ -410,11 +232,200 @@ "selected": false }, "selected": false, - "positionAbsolute": { "x": 1160.4877473512795, "y": 259.2799138505109 }, + "positionAbsolute": { "x": 900.4793407261002, "y": 205.9476004518217 }, "dragging": false + }, + { + "width": 300, + "height": 509, + "id": "pdfFile_0", + "position": { "x": -210.44158723479913, "y": 236.6627524951051 }, + "type": "customNode", + "data": { + "id": "pdfFile_0", + "label": "Pdf File", + "version": 1, + "name": "pdfFile", + "type": "Document", + "baseClasses": ["Document"], + "category": "Document Loaders", + "description": "Load data from PDF files", + "inputParams": [ + { "label": "Pdf File", "name": "pdfFile", "type": "file", "fileType": ".pdf", "id": "pdfFile_0-input-pdfFile-file" }, + { + "label": "Usage", + "name": "usage", + "type": "options", + "options": [ + { "label": "One document per page", "name": "perPage" }, + { "label": "One document per file", "name": "perFile" } + ], + "default": "perPage", + "id": "pdfFile_0-input-usage-options" + }, + { + "label": "Use Legacy Build", + "name": "legacyBuild", + "type": "boolean", + "optional": true, + "additionalParams": true, + "id": "pdfFile_0-input-legacyBuild-boolean" + }, + { + "label": "Metadata", + "name": "metadata", + "type": "json", + "optional": true, + "additionalParams": true, + "id": "pdfFile_0-input-metadata-json" + } + ], + "inputAnchors": [ + { + "label": "Text Splitter", + "name": "textSplitter", + "type": "TextSplitter", + "optional": true, + "id": "pdfFile_0-input-textSplitter-TextSplitter" + } + ], + "inputs": { "textSplitter": "", "usage": "perPage", "legacyBuild": "", "metadata": "" }, + "outputAnchors": [ + { "id": "pdfFile_0-output-pdfFile-Document", "name": "pdfFile", "label": "Document", "type": "Document" } + ], + "outputs": {}, + "selected": false + }, + "selected": false, + "positionAbsolute": { "x": -210.44158723479913, "y": 236.6627524951051 }, + "dragging": false + }, + { + "width": 300, + "height": 408, + "id": "vectaraUpsert_0", + "position": { "x": 172.06946164914868, "y": 373.11406233089934 }, + "type": "customNode", + "data": { + "id": "vectaraUpsert_0", + "label": "Vectara Upsert Document", + "version": 1, + "name": "vectaraUpsert", + "type": "Vectara", + "baseClasses": ["Vectara", "VectorStoreRetriever", "BaseRetriever"], + "category": "Vector Stores", + "description": "Upsert documents to Vectara", + "inputParams": [ + { + "label": "Connect Credential", + "name": "credential", + "type": "credential", + "credentialNames": ["vectaraApi"], + "id": "vectaraUpsert_0-input-credential-credential" + }, + { + "label": "Vectara Metadata Filter", + "name": "filter", + "description": "Filter to apply to Vectara metadata. Refer to the documentation on how to use Vectara filters with Flowise.", + "type": "string", + "additionalParams": true, + "optional": true, + "id": "vectaraUpsert_0-input-filter-string" + }, + { + "label": "Sentences Before", + "name": "sentencesBefore", + "description": "Number of sentences to fetch before the matched sentence. Defaults to 2.", + "type": "number", + "additionalParams": true, + "optional": true, + "id": "vectaraUpsert_0-input-sentencesBefore-number" + }, + { + "label": "Sentences After", + "name": "sentencesAfter", + "description": "Number of sentences to fetch after the matched sentence. Defaults to 2.", + "type": "number", + "additionalParams": true, + "optional": true, + "id": "vectaraUpsert_0-input-sentencesAfter-number" + }, + { + "label": "Lambda", + "name": "lambda", + "description": "Improves retrieval accuracy by adjusting the balance (from 0 to 1) between neural search and keyword-based search factors.", + "type": "number", + "additionalParams": true, + "optional": true, + "id": "vectaraUpsert_0-input-lambda-number" + }, + { + "label": "Top K", + "name": "topK", + "description": "Number of top results to fetch. Defaults to 4", + "placeholder": "4", + "type": "number", + "additionalParams": true, + "optional": true, + "id": "vectaraUpsert_0-input-topK-number" + } + ], + "inputAnchors": [ + { + "label": "Document", + "name": "document", + "type": "Document", + "list": true, + "id": "vectaraUpsert_0-input-document-Document" + } + ], + "inputs": { + "document": ["{{pdfFile_0.data.instance}}"], + "filter": "", + "sentencesBefore": "", + "sentencesAfter": "", + "lambda": "", + "topK": "" + }, + "outputAnchors": [ + { + "name": "output", + "label": "Output", + "type": "options", + "options": [ + { + "id": "vectaraUpsert_0-output-retriever-Vectara|VectorStoreRetriever|BaseRetriever", + "name": "retriever", + "label": "Vectara Retriever", + "type": "Vectara | VectorStoreRetriever | BaseRetriever" + }, + { + "id": "vectaraUpsert_0-output-vectorStore-Vectara|VectorStore", + "name": "vectorStore", + "label": "Vectara Vector Store", + "type": "Vectara | VectorStore" + } + ], + "default": "retriever" + } + ], + "outputs": { "output": "retriever" }, + "selected": false + }, + "positionAbsolute": { "x": 172.06946164914868, "y": 373.11406233089934 }, + "selected": false } ], "edges": [ + { + "source": "chatOpenAI_0", + "sourceHandle": "chatOpenAI_0-output-chatOpenAI-ChatOpenAI|BaseChatModel|BaseLanguageModel", + "target": "conversationalRetrievalQAChain_0", + "targetHandle": "conversationalRetrievalQAChain_0-input-model-BaseLanguageModel", + "type": "buttonedge", + "id": "chatOpenAI_0-chatOpenAI_0-output-chatOpenAI-ChatOpenAI|BaseChatModel|BaseLanguageModel-conversationalRetrievalQAChain_0-conversationalRetrievalQAChain_0-input-model-BaseLanguageModel", + "data": { "label": "" } + }, { "source": "pdfFile_0", "sourceHandle": "pdfFile_0-output-pdfFile-Document", @@ -432,15 +443,6 @@ "type": "buttonedge", "id": "vectaraUpsert_0-vectaraUpsert_0-output-retriever-Vectara|VectorStoreRetriever|BaseRetriever-conversationalRetrievalQAChain_0-conversationalRetrievalQAChain_0-input-vectorStoreRetriever-BaseRetriever", "data": { "label": "" } - }, - { - "source": "chatOpenAI_0", - "sourceHandle": "chatOpenAI_0-output-chatOpenAI-ChatOpenAI|BaseChatModel|BaseLanguageModel", - "target": "conversationalRetrievalQAChain_0", - "targetHandle": "conversationalRetrievalQAChain_0-input-model-BaseLanguageModel", - "type": "buttonedge", - "id": "chatOpenAI_0-chatOpenAI_0-output-chatOpenAI-ChatOpenAI|BaseChatModel|BaseLanguageModel-conversationalRetrievalQAChain_0-conversationalRetrievalQAChain_0-input-model-BaseLanguageModel", - "data": { "label": "" } } ] } From c83d0ab3205739dc5fa9e289ed609c2241041a36 Mon Sep 17 00:00:00 2001 From: Atish Amte Date: Thu, 17 Aug 2023 00:33:01 +0530 Subject: [PATCH 11/15] added puppeteer options --- .../documentloaders/Puppeteer/Puppeteer.ts | 64 +++++++++++++++++-- 1 file changed, 59 insertions(+), 5 deletions(-) diff --git a/packages/components/nodes/documentloaders/Puppeteer/Puppeteer.ts b/packages/components/nodes/documentloaders/Puppeteer/Puppeteer.ts index ea6280db..036e4053 100644 --- a/packages/components/nodes/documentloaders/Puppeteer/Puppeteer.ts +++ b/packages/components/nodes/documentloaders/Puppeteer/Puppeteer.ts @@ -1,8 +1,9 @@ import { INode, INodeData, INodeParams } from '../../../src/Interface' import { TextSplitter } from 'langchain/text_splitter' -import { PuppeteerWebBaseLoader } from 'langchain/document_loaders/web/puppeteer' +import { Browser, Page, PuppeteerWebBaseLoader, PuppeteerWebBaseLoaderOptions } from 'langchain/document_loaders/web/puppeteer' import { test } from 'linkifyjs' import { webCrawl, xmlScrape } from '../../../src' +import { PuppeteerLifeCycleEvent } from 'puppeteer' class Puppeteer_DocumentLoaders implements INode { label: string @@ -62,10 +63,47 @@ class Puppeteer_DocumentLoaders implements INode { type: 'number', optional: true, additionalParams: true, - description: - 'Only used when "Get Relative Links Method" is selected. Set 0 to retrieve all relative links, default limit is 10.', + description: 'Only used when "Get Relative Links Method" is selected. Set 0 to retrieve all relative links, default limit is 10.', warning: `Retreiving all links might take long time, and all links will be upserted again if the flow's state changed (eg: different URL, chunk size, etc)` }, + { + label: 'Wait Until', + name: 'waitUntilGoToOption', + type: 'options', + description: 'Select a go to wait until option', + options: [ + { + label: 'Load', + name: 'load', + description: `When the initial HTML document\'s DOM has been loaded and parsed` + }, + { + label: 'DOM Content Loaded', + name: 'domcontentloaded', + description: `When the complete HTML document\'s DOM has been loaded and parsed` + }, + { + label: 'Network Idle 0', + name: 'networkidle0', + description: 'Navigation is finished when there are no more than 0 network connections for at least 500 ms' + }, + { + label: 'Network Idle 2', + name: 'networkidle2', + description: 'Navigation is finished when there are no more than 2 network connections for at least 500 ms' + } + ], + optional: true, + additionalParams: true + }, + { + label: 'Wait for selector to load', + name: 'waitForSelector', + type: 'string', + optional: true, + additionalParams: true, + description: 'CSS selectors like .div or #div', + }, { label: 'Metadata', name: 'metadata', @@ -81,6 +119,8 @@ class Puppeteer_DocumentLoaders implements INode { const metadata = nodeData.inputs?.metadata const relativeLinksMethod = nodeData.inputs?.relativeLinksMethod as string let limit = nodeData.inputs?.limit as string + let waitUntilGoToOption = nodeData.inputs?.waitUntilGoToOption as PuppeteerLifeCycleEvent + let waitForSelector = nodeData.inputs?.waitForSelector as string let url = nodeData.inputs?.url as string url = url.trim() @@ -91,12 +131,26 @@ class Puppeteer_DocumentLoaders implements INode { async function puppeteerLoader(url: string): Promise { try { let docs = [] - const loader = new PuppeteerWebBaseLoader(url, { + const config: PuppeteerWebBaseLoaderOptions = { launchOptions: { args: ['--no-sandbox'], headless: 'new' } - }) + }; + if (waitUntilGoToOption) { + config['gotoOptions'] = { + waitUntil: waitUntilGoToOption + } + } + if (waitForSelector) { + config['evaluate'] = async (page: Page, browser: Browser): Promise => { + await page.waitForSelector(waitForSelector) + + const result = await page.evaluate(() => document.body.innerHTML) + return result + } + } + const loader = new PuppeteerWebBaseLoader(url, config) if (textSplitter) { docs = await loader.loadAndSplit(textSplitter) } else { From 8414f347def05fe405744562c28615e5bc9952d8 Mon Sep 17 00:00:00 2001 From: Atish Amte Date: Thu, 17 Aug 2023 00:36:03 +0530 Subject: [PATCH 12/15] spelling correction --- .../nodes/documentloaders/Cheerio/Cheerio.ts | 2 +- .../documentloaders/Playwright/Playwright.ts | 2 +- .../documentloaders/Puppeteer/Puppeteer.ts | 2 +- .../marketplaces/chatflows/WebPage QnA.json | 56 ++++++++++++++----- 4 files changed, 46 insertions(+), 16 deletions(-) diff --git a/packages/components/nodes/documentloaders/Cheerio/Cheerio.ts b/packages/components/nodes/documentloaders/Cheerio/Cheerio.ts index 310aa9e6..1c21c1ea 100644 --- a/packages/components/nodes/documentloaders/Cheerio/Cheerio.ts +++ b/packages/components/nodes/documentloaders/Cheerio/Cheerio.ts @@ -64,7 +64,7 @@ class Cheerio_DocumentLoaders implements INode { additionalParams: true, description: 'Only used when "Get Relative Links Method" is selected. Set 0 to retrieve all relative links, default limit is 10.', - warning: `Retreiving all links might take long time, and all links will be upserted again if the flow's state changed (eg: different URL, chunk size, etc)` + warning: `Retrieving all links might take long time, and all links will be upserted again if the flow's state changed (eg: different URL, chunk size, etc)` }, { label: 'Metadata', diff --git a/packages/components/nodes/documentloaders/Playwright/Playwright.ts b/packages/components/nodes/documentloaders/Playwright/Playwright.ts index 3399574d..2ddd6a8d 100644 --- a/packages/components/nodes/documentloaders/Playwright/Playwright.ts +++ b/packages/components/nodes/documentloaders/Playwright/Playwright.ts @@ -64,7 +64,7 @@ class Playwright_DocumentLoaders implements INode { additionalParams: true, description: 'Only used when "Get Relative Links Method" is selected. Set 0 to retrieve all relative links, default limit is 10.', - warning: `Retreiving all links might take long time, and all links will be upserted again if the flow's state changed (eg: different URL, chunk size, etc)` + warning: `Retrieving all links might take long time, and all links will be upserted again if the flow's state changed (eg: different URL, chunk size, etc)` }, { label: 'Metadata', diff --git a/packages/components/nodes/documentloaders/Puppeteer/Puppeteer.ts b/packages/components/nodes/documentloaders/Puppeteer/Puppeteer.ts index 036e4053..c3b61a2b 100644 --- a/packages/components/nodes/documentloaders/Puppeteer/Puppeteer.ts +++ b/packages/components/nodes/documentloaders/Puppeteer/Puppeteer.ts @@ -64,7 +64,7 @@ class Puppeteer_DocumentLoaders implements INode { optional: true, additionalParams: true, description: 'Only used when "Get Relative Links Method" is selected. Set 0 to retrieve all relative links, default limit is 10.', - warning: `Retreiving all links might take long time, and all links will be upserted again if the flow's state changed (eg: different URL, chunk size, etc)` + warning: `Retrieving all links might take long time, and all links will be upserted again if the flow's state changed (eg: different URL, chunk size, etc)` }, { label: 'Wait Until', diff --git a/packages/server/marketplaces/chatflows/WebPage QnA.json b/packages/server/marketplaces/chatflows/WebPage QnA.json index 8197c20a..09246150 100644 --- a/packages/server/marketplaces/chatflows/WebPage QnA.json +++ b/packages/server/marketplaces/chatflows/WebPage QnA.json @@ -16,7 +16,11 @@ "version": 1, "name": "chatOpenAI", "type": "ChatOpenAI", - "baseClasses": ["ChatOpenAI", "BaseChatModel", "BaseLanguageModel"], + "baseClasses": [ + "ChatOpenAI", + "BaseChatModel", + "BaseLanguageModel" + ], "category": "Chat Models", "description": "Wrapper around OpenAI large language models that use the Chat endpoint", "inputParams": [ @@ -24,7 +28,9 @@ "label": "Connect Credential", "name": "credential", "type": "credential", - "credentialNames": ["openAIApi"], + "credentialNames": [ + "openAIApi" + ], "id": "chatOpenAI_0-input-credential-credential" }, { @@ -170,7 +176,10 @@ "version": 1, "name": "openAIEmbeddings", "type": "OpenAIEmbeddings", - "baseClasses": ["OpenAIEmbeddings", "Embeddings"], + "baseClasses": [ + "OpenAIEmbeddings", + "Embeddings" + ], "category": "Embeddings", "description": "OpenAI API to generate embeddings for a given text", "inputParams": [ @@ -178,7 +187,9 @@ "label": "Connect Credential", "name": "credential", "type": "credential", - "credentialNames": ["openAIApi"], + "credentialNames": [ + "openAIApi" + ], "id": "openAIEmbeddings_0-input-credential-credential" }, { @@ -318,7 +329,10 @@ "version": 1, "name": "conversationalRetrievalQAChain", "type": "ConversationalRetrievalQAChain", - "baseClasses": ["ConversationalRetrievalQAChain", "BaseChain"], + "baseClasses": [ + "ConversationalRetrievalQAChain", + "BaseChain" + ], "category": "Chains", "description": "Document QA - built on RetrievalQAChain to provide a chat history component", "inputParams": [ @@ -428,7 +442,9 @@ "version": 1, "name": "cheerioWebScraper", "type": "Document", - "baseClasses": ["Document"], + "baseClasses": [ + "Document" + ], "category": "Document Loaders", "description": "Load data from webpages", "inputParams": [ @@ -466,7 +482,7 @@ "optional": true, "additionalParams": true, "description": "Only used when \"Get Relative Links Method\" is selected. Set 0 to retrieve all relative links, default limit is 10.", - "warning": "Retreiving all links might take long time, and all links will be upserted again if the flow's state changed (eg: different URL, chunk size, etc)", + "warning": "Retrieving all links might take long time, and all links will be upserted again if the flow's state changed (eg: different URL, chunk size, etc)", "id": "cheerioWebScraper_0-input-limit-number" }, { @@ -527,7 +543,11 @@ "version": 1, "name": "pineconeUpsert", "type": "Pinecone", - "baseClasses": ["Pinecone", "VectorStoreRetriever", "BaseRetriever"], + "baseClasses": [ + "Pinecone", + "VectorStoreRetriever", + "BaseRetriever" + ], "category": "Vector Stores", "description": "Upsert documents to Pinecone", "inputParams": [ @@ -535,7 +555,9 @@ "label": "Connect Credential", "name": "credential", "type": "credential", - "credentialNames": ["pineconeApi"], + "credentialNames": [ + "pineconeApi" + ], "id": "pineconeUpsert_0-input-credential-credential" }, { @@ -580,7 +602,9 @@ } ], "inputs": { - "document": ["{{cheerioWebScraper_0.data.instance}}"], + "document": [ + "{{cheerioWebScraper_0.data.instance}}" + ], "embeddings": "{{openAIEmbeddings_0.data.instance}}", "pineconeIndex": "", "pineconeNamespace": "", @@ -635,7 +659,11 @@ "version": 1, "name": "motorheadMemory", "type": "MotorheadMemory", - "baseClasses": ["MotorheadMemory", "BaseChatMemory", "BaseMemory"], + "baseClasses": [ + "MotorheadMemory", + "BaseChatMemory", + "BaseMemory" + ], "category": "Memory", "description": "Use Motorhead Memory to store chat conversations", "inputParams": [ @@ -645,7 +673,9 @@ "type": "credential", "optional": true, "description": "Only needed when using hosted solution - https://getmetal.io", - "credentialNames": ["motorheadMemoryApi"], + "credentialNames": [ + "motorheadMemoryApi" + ], "id": "motorheadMemory_0-input-credential-credential" }, { @@ -768,4 +798,4 @@ } } ] -} +} \ No newline at end of file From 338082f0aa6e7bfc7d61077d03b0ff10253c3d9b Mon Sep 17 00:00:00 2001 From: Atish Amte Date: Thu, 17 Aug 2023 00:52:35 +0530 Subject: [PATCH 13/15] playwright config --- .../documentloaders/Playwright/Playwright.ts | 63 ++++++++++++++++++- 1 file changed, 61 insertions(+), 2 deletions(-) diff --git a/packages/components/nodes/documentloaders/Playwright/Playwright.ts b/packages/components/nodes/documentloaders/Playwright/Playwright.ts index 2ddd6a8d..b376c05b 100644 --- a/packages/components/nodes/documentloaders/Playwright/Playwright.ts +++ b/packages/components/nodes/documentloaders/Playwright/Playwright.ts @@ -1,6 +1,6 @@ import { INode, INodeData, INodeParams } from '../../../src/Interface' import { TextSplitter } from 'langchain/text_splitter' -import { PlaywrightWebBaseLoader } from 'langchain/document_loaders/web/playwright' +import { Browser, Page, PlaywrightWebBaseLoader, PlaywrightWebBaseLoaderOptions } from 'langchain/document_loaders/web/playwright' import { test } from 'linkifyjs' import { webCrawl, xmlScrape } from '../../../src' @@ -66,6 +66,44 @@ class Playwright_DocumentLoaders implements INode { 'Only used when "Get Relative Links Method" is selected. Set 0 to retrieve all relative links, default limit is 10.', warning: `Retrieving all links might take long time, and all links will be upserted again if the flow's state changed (eg: different URL, chunk size, etc)` }, + { + label: 'Wait Until', + name: 'waitUntilGoToOption', + type: 'options', + description: 'Select a go to wait until option', + options: [ + { + label: 'Load', + name: 'load', + description: 'Consider operation to be finished when the load event is fired.' + }, + { + label: 'DOM Content Loaded', + name: 'domcontentloaded', + description: 'Consider operation to be finished when the DOMContentLoaded event is fired.' + }, + { + label: 'Network Idle', + name: 'networkidle', + description: 'Navigation is finished when there are no more connections for at least 500 ms.' + }, + { + label: 'Commit', + name: 'commit', + description: 'Consider operation to be finished when network response is received and the document started loading.' + } + ], + optional: true, + additionalParams: true + }, + { + label: 'Wait for selector to load', + name: 'waitForSelector', + type: 'string', + optional: true, + additionalParams: true, + description: 'CSS selectors like .div or #div', + }, { label: 'Metadata', name: 'metadata', @@ -81,6 +119,8 @@ class Playwright_DocumentLoaders implements INode { const metadata = nodeData.inputs?.metadata const relativeLinksMethod = nodeData.inputs?.relativeLinksMethod as string let limit = nodeData.inputs?.limit as string + let waitUntilGoToOption = nodeData.inputs?.waitUntilGoToOption as "load" | "domcontentloaded" | "networkidle" | "commit" | undefined + let waitForSelector = nodeData.inputs?.waitForSelector as string let url = nodeData.inputs?.url as string url = url.trim() @@ -91,7 +131,26 @@ class Playwright_DocumentLoaders implements INode { async function playwrightLoader(url: string): Promise { try { let docs = [] - const loader = new PlaywrightWebBaseLoader(url) + const config: PlaywrightWebBaseLoaderOptions = { + launchOptions: { + args: ['--no-sandbox'], + headless: true + } + }; + if (waitUntilGoToOption) { + config['gotoOptions'] = { + waitUntil: waitUntilGoToOption + } + } + if (waitForSelector) { + config['evaluate'] = async (page: Page, browser: Browser): Promise => { + await page.waitForSelector(waitForSelector) + + const result = await page.evaluate(() => document.body.innerHTML) + return result + } + } + const loader = new PlaywrightWebBaseLoader(url, config) if (textSplitter) { docs = await loader.loadAndSplit(textSplitter) } else { From 888fa356b93d2d0d2ff3b11addd11c839c5b225f Mon Sep 17 00:00:00 2001 From: Atish Amte Date: Thu, 17 Aug 2023 01:11:31 +0530 Subject: [PATCH 14/15] lint fixes --- .../documentloaders/Playwright/Playwright.ts | 8 +-- .../documentloaders/Puppeteer/Puppeteer.ts | 13 ++--- .../marketplaces/chatflows/WebPage QnA.json | 54 +++++-------------- 3 files changed, 23 insertions(+), 52 deletions(-) diff --git a/packages/components/nodes/documentloaders/Playwright/Playwright.ts b/packages/components/nodes/documentloaders/Playwright/Playwright.ts index b376c05b..eb246045 100644 --- a/packages/components/nodes/documentloaders/Playwright/Playwright.ts +++ b/packages/components/nodes/documentloaders/Playwright/Playwright.ts @@ -102,7 +102,7 @@ class Playwright_DocumentLoaders implements INode { type: 'string', optional: true, additionalParams: true, - description: 'CSS selectors like .div or #div', + description: 'CSS selectors like .div or #div' }, { label: 'Metadata', @@ -119,7 +119,7 @@ class Playwright_DocumentLoaders implements INode { const metadata = nodeData.inputs?.metadata const relativeLinksMethod = nodeData.inputs?.relativeLinksMethod as string let limit = nodeData.inputs?.limit as string - let waitUntilGoToOption = nodeData.inputs?.waitUntilGoToOption as "load" | "domcontentloaded" | "networkidle" | "commit" | undefined + let waitUntilGoToOption = nodeData.inputs?.waitUntilGoToOption as 'load' | 'domcontentloaded' | 'networkidle' | 'commit' | undefined let waitForSelector = nodeData.inputs?.waitForSelector as string let url = nodeData.inputs?.url as string @@ -136,14 +136,14 @@ class Playwright_DocumentLoaders implements INode { args: ['--no-sandbox'], headless: true } - }; + } if (waitUntilGoToOption) { config['gotoOptions'] = { waitUntil: waitUntilGoToOption } } if (waitForSelector) { - config['evaluate'] = async (page: Page, browser: Browser): Promise => { + config['evaluate'] = async (page: Page, _: Browser): Promise => { await page.waitForSelector(waitForSelector) const result = await page.evaluate(() => document.body.innerHTML) diff --git a/packages/components/nodes/documentloaders/Puppeteer/Puppeteer.ts b/packages/components/nodes/documentloaders/Puppeteer/Puppeteer.ts index c3b61a2b..4691eb94 100644 --- a/packages/components/nodes/documentloaders/Puppeteer/Puppeteer.ts +++ b/packages/components/nodes/documentloaders/Puppeteer/Puppeteer.ts @@ -63,7 +63,8 @@ class Puppeteer_DocumentLoaders implements INode { type: 'number', optional: true, additionalParams: true, - description: 'Only used when "Get Relative Links Method" is selected. Set 0 to retrieve all relative links, default limit is 10.', + description: + 'Only used when "Get Relative Links Method" is selected. Set 0 to retrieve all relative links, default limit is 10.', warning: `Retrieving all links might take long time, and all links will be upserted again if the flow's state changed (eg: different URL, chunk size, etc)` }, { @@ -75,12 +76,12 @@ class Puppeteer_DocumentLoaders implements INode { { label: 'Load', name: 'load', - description: `When the initial HTML document\'s DOM has been loaded and parsed` + description: `When the initial HTML document's DOM has been loaded and parsed` }, { label: 'DOM Content Loaded', name: 'domcontentloaded', - description: `When the complete HTML document\'s DOM has been loaded and parsed` + description: `When the complete HTML document's DOM has been loaded and parsed` }, { label: 'Network Idle 0', @@ -102,7 +103,7 @@ class Puppeteer_DocumentLoaders implements INode { type: 'string', optional: true, additionalParams: true, - description: 'CSS selectors like .div or #div', + description: 'CSS selectors like .div or #div' }, { label: 'Metadata', @@ -136,14 +137,14 @@ class Puppeteer_DocumentLoaders implements INode { args: ['--no-sandbox'], headless: 'new' } - }; + } if (waitUntilGoToOption) { config['gotoOptions'] = { waitUntil: waitUntilGoToOption } } if (waitForSelector) { - config['evaluate'] = async (page: Page, browser: Browser): Promise => { + config['evaluate'] = async (page: Page, _: Browser): Promise => { await page.waitForSelector(waitForSelector) const result = await page.evaluate(() => document.body.innerHTML) diff --git a/packages/server/marketplaces/chatflows/WebPage QnA.json b/packages/server/marketplaces/chatflows/WebPage QnA.json index 09246150..812f0bd5 100644 --- a/packages/server/marketplaces/chatflows/WebPage QnA.json +++ b/packages/server/marketplaces/chatflows/WebPage QnA.json @@ -16,11 +16,7 @@ "version": 1, "name": "chatOpenAI", "type": "ChatOpenAI", - "baseClasses": [ - "ChatOpenAI", - "BaseChatModel", - "BaseLanguageModel" - ], + "baseClasses": ["ChatOpenAI", "BaseChatModel", "BaseLanguageModel"], "category": "Chat Models", "description": "Wrapper around OpenAI large language models that use the Chat endpoint", "inputParams": [ @@ -28,9 +24,7 @@ "label": "Connect Credential", "name": "credential", "type": "credential", - "credentialNames": [ - "openAIApi" - ], + "credentialNames": ["openAIApi"], "id": "chatOpenAI_0-input-credential-credential" }, { @@ -176,10 +170,7 @@ "version": 1, "name": "openAIEmbeddings", "type": "OpenAIEmbeddings", - "baseClasses": [ - "OpenAIEmbeddings", - "Embeddings" - ], + "baseClasses": ["OpenAIEmbeddings", "Embeddings"], "category": "Embeddings", "description": "OpenAI API to generate embeddings for a given text", "inputParams": [ @@ -187,9 +178,7 @@ "label": "Connect Credential", "name": "credential", "type": "credential", - "credentialNames": [ - "openAIApi" - ], + "credentialNames": ["openAIApi"], "id": "openAIEmbeddings_0-input-credential-credential" }, { @@ -329,10 +318,7 @@ "version": 1, "name": "conversationalRetrievalQAChain", "type": "ConversationalRetrievalQAChain", - "baseClasses": [ - "ConversationalRetrievalQAChain", - "BaseChain" - ], + "baseClasses": ["ConversationalRetrievalQAChain", "BaseChain"], "category": "Chains", "description": "Document QA - built on RetrievalQAChain to provide a chat history component", "inputParams": [ @@ -442,9 +428,7 @@ "version": 1, "name": "cheerioWebScraper", "type": "Document", - "baseClasses": [ - "Document" - ], + "baseClasses": ["Document"], "category": "Document Loaders", "description": "Load data from webpages", "inputParams": [ @@ -543,11 +527,7 @@ "version": 1, "name": "pineconeUpsert", "type": "Pinecone", - "baseClasses": [ - "Pinecone", - "VectorStoreRetriever", - "BaseRetriever" - ], + "baseClasses": ["Pinecone", "VectorStoreRetriever", "BaseRetriever"], "category": "Vector Stores", "description": "Upsert documents to Pinecone", "inputParams": [ @@ -555,9 +535,7 @@ "label": "Connect Credential", "name": "credential", "type": "credential", - "credentialNames": [ - "pineconeApi" - ], + "credentialNames": ["pineconeApi"], "id": "pineconeUpsert_0-input-credential-credential" }, { @@ -602,9 +580,7 @@ } ], "inputs": { - "document": [ - "{{cheerioWebScraper_0.data.instance}}" - ], + "document": ["{{cheerioWebScraper_0.data.instance}}"], "embeddings": "{{openAIEmbeddings_0.data.instance}}", "pineconeIndex": "", "pineconeNamespace": "", @@ -659,11 +635,7 @@ "version": 1, "name": "motorheadMemory", "type": "MotorheadMemory", - "baseClasses": [ - "MotorheadMemory", - "BaseChatMemory", - "BaseMemory" - ], + "baseClasses": ["MotorheadMemory", "BaseChatMemory", "BaseMemory"], "category": "Memory", "description": "Use Motorhead Memory to store chat conversations", "inputParams": [ @@ -673,9 +645,7 @@ "type": "credential", "optional": true, "description": "Only needed when using hosted solution - https://getmetal.io", - "credentialNames": [ - "motorheadMemoryApi" - ], + "credentialNames": ["motorheadMemoryApi"], "id": "motorheadMemory_0-input-credential-credential" }, { @@ -798,4 +768,4 @@ } } ] -} \ No newline at end of file +} From 35c3555a655b335f714f7378cb4c844df4553873 Mon Sep 17 00:00:00 2001 From: Rafael Reis Date: Wed, 16 Aug 2023 20:07:22 -0300 Subject: [PATCH 15/15] Added BaseOptions to ChatOpenAI --- .../nodes/chatmodels/ChatOpenAI/ChatOpenAI.ts | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/packages/components/nodes/chatmodels/ChatOpenAI/ChatOpenAI.ts b/packages/components/nodes/chatmodels/ChatOpenAI/ChatOpenAI.ts index 9512da66..ca081ff4 100644 --- a/packages/components/nodes/chatmodels/ChatOpenAI/ChatOpenAI.ts +++ b/packages/components/nodes/chatmodels/ChatOpenAI/ChatOpenAI.ts @@ -125,6 +125,13 @@ class ChatOpenAI_ChatModels implements INode { type: 'string', optional: true, additionalParams: true + }, + { + label: 'BaseOptions', + name: 'baseOptions', + type: 'json', + optional: true, + additionalParams: true } ] } @@ -139,6 +146,7 @@ class ChatOpenAI_ChatModels implements INode { const timeout = nodeData.inputs?.timeout as string const streaming = nodeData.inputs?.streaming as boolean const basePath = nodeData.inputs?.basepath as string + const baseOptions = nodeData.inputs?.baseOptions const credentialData = await getCredentialData(nodeData.credential ?? '', options) const openAIApiKey = getCredentialParam('openAIApiKey', credentialData, nodeData) @@ -156,8 +164,18 @@ class ChatOpenAI_ChatModels implements INode { if (presencePenalty) obj.presencePenalty = parseFloat(presencePenalty) if (timeout) obj.timeout = parseInt(timeout, 10) + let parsedBaseOptions: any | undefined = undefined + + if (baseOptions) { + try { + parsedBaseOptions = typeof baseOptions === 'object' ? baseOptions : JSON.parse(baseOptions) + } catch (exception) { + throw new Error("Invalid JSON in the ChatOpenAI's BaseOptions: " + exception) + } + } const model = new ChatOpenAI(obj, { - basePath + basePath, + baseOptions: parsedBaseOptions }) return model }