From feb899ab19e3630535dcba236ee3148ffc5415e3 Mon Sep 17 00:00:00 2001 From: kpj2006 <24ucs074@lnmiit.ac.in> Date: Thu, 7 Aug 2025 21:28:52 +0530 Subject: [PATCH] =?UTF-8?q?Enhance:=20Improve=20'Strip=20New=20Lines'=20fo?= =?UTF-8?q?r=20Gemini/Vertex=20embedding=20effici=E2=80=A6=20(#5010)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Enhance: Improve 'Strip New Lines' for Gemini/Vertex embedding efficiency * Run lint-fix --------- Co-authored-by: Ilango Rajagopal --- .../GoogleGenerativeAIEmbedding.ts | 37 +++++++++++++++++-- .../GoogleVertexAIEmbedding.ts | 37 +++++++++++++++++-- 2 files changed, 66 insertions(+), 8 deletions(-) diff --git a/packages/components/nodes/embeddings/GoogleGenerativeAIEmbedding/GoogleGenerativeAIEmbedding.ts b/packages/components/nodes/embeddings/GoogleGenerativeAIEmbedding/GoogleGenerativeAIEmbedding.ts index 2aaf53d8..f270d7d0 100644 --- a/packages/components/nodes/embeddings/GoogleGenerativeAIEmbedding/GoogleGenerativeAIEmbedding.ts +++ b/packages/components/nodes/embeddings/GoogleGenerativeAIEmbedding/GoogleGenerativeAIEmbedding.ts @@ -4,6 +4,25 @@ import { GoogleGenerativeAIEmbeddings, GoogleGenerativeAIEmbeddingsParams } from import { TaskType } from '@google/generative-ai' import { MODEL_TYPE, getModels } from '../../../src/modelLoader' +class GoogleGenerativeAIEmbeddingsWithStripNewLines extends GoogleGenerativeAIEmbeddings { + stripNewLines: boolean + + constructor(params: GoogleGenerativeAIEmbeddingsParams & { stripNewLines?: boolean }) { + super(params) + this.stripNewLines = params.stripNewLines ?? false + } + + async embedDocuments(texts: string[]): Promise { + const processedTexts = this.stripNewLines ? texts.map((text) => text.replace(/\n/g, ' ')) : texts + return super.embedDocuments(processedTexts) + } + + async embedQuery(text: string): Promise { + const processedText = this.stripNewLines ? text.replace(/\n/g, ' ') : text + return super.embedQuery(processedText) + } +} + class GoogleGenerativeAIEmbedding_Embeddings implements INode { label: string name: string @@ -24,7 +43,7 @@ class GoogleGenerativeAIEmbedding_Embeddings implements INode { this.icon = 'GoogleGemini.svg' this.category = 'Embeddings' this.description = 'Google Generative API to generate embeddings for a given text' - this.baseClasses = [this.type, ...getBaseClasses(GoogleGenerativeAIEmbeddings)] + this.baseClasses = [this.type, ...getBaseClasses(GoogleGenerativeAIEmbeddingsWithStripNewLines)] this.credential = { label: 'Connect Credential', name: 'credential', @@ -55,6 +74,14 @@ class GoogleGenerativeAIEmbedding_Embeddings implements INode { { label: 'CLUSTERING', name: 'CLUSTERING' } ], default: 'TASK_TYPE_UNSPECIFIED' + }, + { + label: 'Strip New Lines', + name: 'stripNewLines', + type: 'boolean', + optional: true, + additionalParams: true, + description: 'Remove new lines from input text before embedding to reduce token count' } ] } @@ -71,6 +98,7 @@ class GoogleGenerativeAIEmbedding_Embeddings implements INode { const modelName = nodeData.inputs?.modelName as string const credentialData = await getCredentialData(nodeData.credential ?? '', options) const apiKey = getCredentialParam('googleGenerativeAPIKey', credentialData, nodeData) + const stripNewLines = nodeData.inputs?.stripNewLines as boolean let taskType: TaskType switch (nodeData.inputs?.tasktype as string) { @@ -93,13 +121,14 @@ class GoogleGenerativeAIEmbedding_Embeddings implements INode { taskType = TaskType.TASK_TYPE_UNSPECIFIED break } - const obj: GoogleGenerativeAIEmbeddingsParams = { + const obj: GoogleGenerativeAIEmbeddingsParams & { stripNewLines?: boolean } = { apiKey: apiKey, modelName: modelName, - taskType: taskType + taskType: taskType, + stripNewLines } - const model = new GoogleGenerativeAIEmbeddings(obj) + const model = new GoogleGenerativeAIEmbeddingsWithStripNewLines(obj) return model } } diff --git a/packages/components/nodes/embeddings/GoogleVertexAIEmbedding/GoogleVertexAIEmbedding.ts b/packages/components/nodes/embeddings/GoogleVertexAIEmbedding/GoogleVertexAIEmbedding.ts index e73ed50e..1744ff24 100644 --- a/packages/components/nodes/embeddings/GoogleVertexAIEmbedding/GoogleVertexAIEmbedding.ts +++ b/packages/components/nodes/embeddings/GoogleVertexAIEmbedding/GoogleVertexAIEmbedding.ts @@ -4,6 +4,25 @@ import { ICommonObject, INode, INodeData, INodeOptionsValue, INodeParams } from import { MODEL_TYPE, getModels, getRegions } from '../../../src/modelLoader' import { getBaseClasses } from '../../../src/utils' +class VertexAIEmbeddingsWithStripNewLines extends VertexAIEmbeddings { + stripNewLines: boolean + + constructor(params: GoogleVertexAIEmbeddingsInput & { stripNewLines?: boolean }) { + super(params) + this.stripNewLines = params.stripNewLines ?? false + } + + async embedDocuments(texts: string[]): Promise { + const processedTexts = this.stripNewLines ? texts.map((text) => text.replace(/\n/g, ' ')) : texts + return super.embedDocuments(processedTexts) + } + + async embedQuery(text: string): Promise { + const processedText = this.stripNewLines ? text.replace(/\n/g, ' ') : text + return super.embedQuery(processedText) + } +} + class GoogleVertexAIEmbedding_Embeddings implements INode { label: string name: string @@ -24,7 +43,7 @@ class GoogleVertexAIEmbedding_Embeddings implements INode { this.icon = 'GoogleVertex.svg' this.category = 'Embeddings' this.description = 'Google vertexAI API to generate embeddings for a given text' - this.baseClasses = [this.type, ...getBaseClasses(VertexAIEmbeddings)] + this.baseClasses = [this.type, ...getBaseClasses(VertexAIEmbeddingsWithStripNewLines)] this.credential = { label: 'Connect Credential', name: 'credential', @@ -49,6 +68,14 @@ class GoogleVertexAIEmbedding_Embeddings implements INode { type: 'asyncOptions', loadMethod: 'listRegions', optional: true + }, + { + label: 'Strip New Lines', + name: 'stripNewLines', + type: 'boolean', + optional: true, + additionalParams: true, + description: 'Remove new lines from input text before embedding to reduce token count' } ] } @@ -66,9 +93,11 @@ class GoogleVertexAIEmbedding_Embeddings implements INode { async init(nodeData: INodeData, _: string, options: ICommonObject): Promise { const modelName = nodeData.inputs?.modelName as string const region = nodeData.inputs?.region as string + const stripNewLines = nodeData.inputs?.stripNewLines as boolean - const obj: GoogleVertexAIEmbeddingsInput = { - model: modelName + const obj: GoogleVertexAIEmbeddingsInput & { stripNewLines?: boolean } = { + model: modelName, + stripNewLines } const authOptions = await buildGoogleCredentials(nodeData, options) @@ -76,7 +105,7 @@ class GoogleVertexAIEmbedding_Embeddings implements INode { if (region) obj.location = region - const model = new VertexAIEmbeddings(obj) + const model = new VertexAIEmbeddingsWithStripNewLines(obj) return model } }