Enhance: Improve 'Strip New Lines' for Gemini/Vertex embedding effici… (#5010)

* Enhance: Improve 'Strip New Lines' for Gemini/Vertex embedding efficiency

* Run lint-fix

---------

Co-authored-by: Ilango Rajagopal <rajagopalilango@gmail.com>
This commit is contained in:
kpj2006
2025-08-07 21:28:52 +05:30
committed by GitHub
parent 9e743e4aa1
commit feb899ab19
2 changed files with 66 additions and 8 deletions
@@ -4,6 +4,25 @@ import { GoogleGenerativeAIEmbeddings, GoogleGenerativeAIEmbeddingsParams } from
import { TaskType } from '@google/generative-ai'
import { MODEL_TYPE, getModels } from '../../../src/modelLoader'
class GoogleGenerativeAIEmbeddingsWithStripNewLines extends GoogleGenerativeAIEmbeddings {
stripNewLines: boolean
constructor(params: GoogleGenerativeAIEmbeddingsParams & { stripNewLines?: boolean }) {
super(params)
this.stripNewLines = params.stripNewLines ?? false
}
async embedDocuments(texts: string[]): Promise<number[][]> {
const processedTexts = this.stripNewLines ? texts.map((text) => text.replace(/\n/g, ' ')) : texts
return super.embedDocuments(processedTexts)
}
async embedQuery(text: string): Promise<number[]> {
const processedText = this.stripNewLines ? text.replace(/\n/g, ' ') : text
return super.embedQuery(processedText)
}
}
class GoogleGenerativeAIEmbedding_Embeddings implements INode {
label: string
name: string
@@ -24,7 +43,7 @@ class GoogleGenerativeAIEmbedding_Embeddings implements INode {
this.icon = 'GoogleGemini.svg'
this.category = 'Embeddings'
this.description = 'Google Generative API to generate embeddings for a given text'
this.baseClasses = [this.type, ...getBaseClasses(GoogleGenerativeAIEmbeddings)]
this.baseClasses = [this.type, ...getBaseClasses(GoogleGenerativeAIEmbeddingsWithStripNewLines)]
this.credential = {
label: 'Connect Credential',
name: 'credential',
@@ -55,6 +74,14 @@ class GoogleGenerativeAIEmbedding_Embeddings implements INode {
{ label: 'CLUSTERING', name: 'CLUSTERING' }
],
default: 'TASK_TYPE_UNSPECIFIED'
},
{
label: 'Strip New Lines',
name: 'stripNewLines',
type: 'boolean',
optional: true,
additionalParams: true,
description: 'Remove new lines from input text before embedding to reduce token count'
}
]
}
@@ -71,6 +98,7 @@ class GoogleGenerativeAIEmbedding_Embeddings implements INode {
const modelName = nodeData.inputs?.modelName as string
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
const apiKey = getCredentialParam('googleGenerativeAPIKey', credentialData, nodeData)
const stripNewLines = nodeData.inputs?.stripNewLines as boolean
let taskType: TaskType
switch (nodeData.inputs?.tasktype as string) {
@@ -93,13 +121,14 @@ class GoogleGenerativeAIEmbedding_Embeddings implements INode {
taskType = TaskType.TASK_TYPE_UNSPECIFIED
break
}
const obj: GoogleGenerativeAIEmbeddingsParams = {
const obj: GoogleGenerativeAIEmbeddingsParams & { stripNewLines?: boolean } = {
apiKey: apiKey,
modelName: modelName,
taskType: taskType
taskType: taskType,
stripNewLines
}
const model = new GoogleGenerativeAIEmbeddings(obj)
const model = new GoogleGenerativeAIEmbeddingsWithStripNewLines(obj)
return model
}
}
@@ -4,6 +4,25 @@ import { ICommonObject, INode, INodeData, INodeOptionsValue, INodeParams } from
import { MODEL_TYPE, getModels, getRegions } from '../../../src/modelLoader'
import { getBaseClasses } from '../../../src/utils'
class VertexAIEmbeddingsWithStripNewLines extends VertexAIEmbeddings {
stripNewLines: boolean
constructor(params: GoogleVertexAIEmbeddingsInput & { stripNewLines?: boolean }) {
super(params)
this.stripNewLines = params.stripNewLines ?? false
}
async embedDocuments(texts: string[]): Promise<number[][]> {
const processedTexts = this.stripNewLines ? texts.map((text) => text.replace(/\n/g, ' ')) : texts
return super.embedDocuments(processedTexts)
}
async embedQuery(text: string): Promise<number[]> {
const processedText = this.stripNewLines ? text.replace(/\n/g, ' ') : text
return super.embedQuery(processedText)
}
}
class GoogleVertexAIEmbedding_Embeddings implements INode {
label: string
name: string
@@ -24,7 +43,7 @@ class GoogleVertexAIEmbedding_Embeddings implements INode {
this.icon = 'GoogleVertex.svg'
this.category = 'Embeddings'
this.description = 'Google vertexAI API to generate embeddings for a given text'
this.baseClasses = [this.type, ...getBaseClasses(VertexAIEmbeddings)]
this.baseClasses = [this.type, ...getBaseClasses(VertexAIEmbeddingsWithStripNewLines)]
this.credential = {
label: 'Connect Credential',
name: 'credential',
@@ -49,6 +68,14 @@ class GoogleVertexAIEmbedding_Embeddings implements INode {
type: 'asyncOptions',
loadMethod: 'listRegions',
optional: true
},
{
label: 'Strip New Lines',
name: 'stripNewLines',
type: 'boolean',
optional: true,
additionalParams: true,
description: 'Remove new lines from input text before embedding to reduce token count'
}
]
}
@@ -66,9 +93,11 @@ class GoogleVertexAIEmbedding_Embeddings implements INode {
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
const modelName = nodeData.inputs?.modelName as string
const region = nodeData.inputs?.region as string
const stripNewLines = nodeData.inputs?.stripNewLines as boolean
const obj: GoogleVertexAIEmbeddingsInput = {
model: modelName
const obj: GoogleVertexAIEmbeddingsInput & { stripNewLines?: boolean } = {
model: modelName,
stripNewLines
}
const authOptions = await buildGoogleCredentials(nodeData, options)
@@ -76,7 +105,7 @@ class GoogleVertexAIEmbedding_Embeddings implements INode {
if (region) obj.location = region
const model = new VertexAIEmbeddings(obj)
const model = new VertexAIEmbeddingsWithStripNewLines(obj)
return model
}
}