From 0923a356834607a728be3ebccc9a534bb2e73c39 Mon Sep 17 00:00:00 2001 From: Henry Date: Fri, 7 Jul 2023 17:36:23 +0100 Subject: [PATCH] add endpoint to HF --- .../ChatHuggingFace/ChatHuggingFace.ts | 13 ++- .../nodes/chatmodels/ChatHuggingFace/core.ts | 109 ++++++++++++++++++ .../HuggingFaceInferenceEmbedding.ts | 12 +- .../HuggingFaceInferenceEmbedding/core.ts | 48 ++++++++ .../HuggingFaceInference.ts | 13 ++- .../nodes/llms/HuggingFaceInference/core.ts | 109 ++++++++++++++++++ packages/components/package.json | 2 +- packages/components/src/utils.ts | 14 +++ 8 files changed, 316 insertions(+), 4 deletions(-) create mode 100644 packages/components/nodes/chatmodels/ChatHuggingFace/core.ts create mode 100644 packages/components/nodes/embeddings/HuggingFaceInferenceEmbedding/core.ts create mode 100644 packages/components/nodes/llms/HuggingFaceInference/core.ts diff --git a/packages/components/nodes/chatmodels/ChatHuggingFace/ChatHuggingFace.ts b/packages/components/nodes/chatmodels/ChatHuggingFace/ChatHuggingFace.ts index 1dae41e4..d92dd1e0 100644 --- a/packages/components/nodes/chatmodels/ChatHuggingFace/ChatHuggingFace.ts +++ b/packages/components/nodes/chatmodels/ChatHuggingFace/ChatHuggingFace.ts @@ -1,6 +1,6 @@ import { INode, INodeData, INodeParams } from '../../../src/Interface' import { getBaseClasses } from '../../../src/utils' -import { HFInput, HuggingFaceInference } from 'langchain/llms/hf' +import { HFInput, HuggingFaceInference } from './core' class ChatHuggingFace_ChatModels implements INode { label: string @@ -71,6 +71,15 @@ class ChatHuggingFace_ChatModels implements INode { description: 'Frequency Penalty parameter may not apply to certain model. Please check available model parameters', optional: true, additionalParams: true + }, + { + label: 'Endpoint', + name: 'endpoint', + type: 'string', + placeholder: 'https://xyz.eu-west-1.aws.endpoints.huggingface.cloud/gpt2', + description: 'Using your own inference endpoint', + optional: true, + additionalParams: true } ] } @@ -83,6 +92,7 @@ class ChatHuggingFace_ChatModels implements INode { const topP = nodeData.inputs?.topP as string const hfTopK = nodeData.inputs?.hfTopK as string const frequencyPenalty = nodeData.inputs?.frequencyPenalty as string + const endpoint = nodeData.inputs?.endpoint as string const obj: Partial = { model, @@ -94,6 +104,7 @@ class ChatHuggingFace_ChatModels implements INode { if (topP) obj.topP = parseInt(topP, 10) if (hfTopK) obj.topK = parseInt(hfTopK, 10) if (frequencyPenalty) obj.frequencyPenalty = parseInt(frequencyPenalty, 10) + if (endpoint) obj.endpoint = endpoint const huggingFace = new HuggingFaceInference(obj) return huggingFace diff --git a/packages/components/nodes/chatmodels/ChatHuggingFace/core.ts b/packages/components/nodes/chatmodels/ChatHuggingFace/core.ts new file mode 100644 index 00000000..958e9072 --- /dev/null +++ b/packages/components/nodes/chatmodels/ChatHuggingFace/core.ts @@ -0,0 +1,109 @@ +import { getEnvironmentVariable } from '../../../src/utils' +import { LLM, BaseLLMParams } from 'langchain/llms/base' + +export interface HFInput { + /** Model to use */ + model: string + + /** Sampling temperature to use */ + temperature?: number + + /** + * Maximum number of tokens to generate in the completion. + */ + maxTokens?: number + + /** Total probability mass of tokens to consider at each step */ + topP?: number + + /** Integer to define the top tokens considered within the sample operation to create new text. */ + topK?: number + + /** Penalizes repeated tokens according to frequency */ + frequencyPenalty?: number + + /** API key to use. */ + apiKey?: string + + /** Private endpoint to use. */ + endpoint?: string +} + +export class HuggingFaceInference extends LLM implements HFInput { + get lc_secrets(): { [key: string]: string } | undefined { + return { + apiKey: 'HUGGINGFACEHUB_API_KEY' + } + } + + model = 'gpt2' + + temperature: number | undefined = undefined + + maxTokens: number | undefined = undefined + + topP: number | undefined = undefined + + topK: number | undefined = undefined + + frequencyPenalty: number | undefined = undefined + + apiKey: string | undefined = undefined + + endpoint: string | undefined = undefined + + constructor(fields?: Partial & BaseLLMParams) { + super(fields ?? {}) + + this.model = fields?.model ?? this.model + this.temperature = fields?.temperature ?? this.temperature + this.maxTokens = fields?.maxTokens ?? this.maxTokens + this.topP = fields?.topP ?? this.topP + this.topK = fields?.topK ?? this.topK + this.frequencyPenalty = fields?.frequencyPenalty ?? this.frequencyPenalty + this.endpoint = fields?.endpoint ?? '' + this.apiKey = fields?.apiKey ?? getEnvironmentVariable('HUGGINGFACEHUB_API_KEY') + if (!this.apiKey) { + throw new Error( + 'Please set an API key for HuggingFace Hub in the environment variable HUGGINGFACEHUB_API_KEY or in the apiKey field of the HuggingFaceInference constructor.' + ) + } + } + + _llmType() { + return 'hf' + } + + /** @ignore */ + async _call(prompt: string, options: this['ParsedCallOptions']): Promise { + const { HfInference } = await HuggingFaceInference.imports() + const hf = new HfInference(this.apiKey) + if (this.endpoint) hf.endpoint(this.endpoint) + const res = await this.caller.callWithOptions({ signal: options.signal }, hf.textGeneration.bind(hf), { + model: this.model, + parameters: { + // make it behave similar to openai, returning only the generated text + return_full_text: false, + temperature: this.temperature, + max_new_tokens: this.maxTokens, + top_p: this.topP, + top_k: this.topK, + repetition_penalty: this.frequencyPenalty + }, + inputs: prompt + }) + return res.generated_text + } + + /** @ignore */ + static async imports(): Promise<{ + HfInference: typeof import('@huggingface/inference').HfInference + }> { + try { + const { HfInference } = await import('@huggingface/inference') + return { HfInference } + } catch (e) { + throw new Error('Please install huggingface as a dependency with, e.g. `yarn add @huggingface/inference`') + } + } +} diff --git a/packages/components/nodes/embeddings/HuggingFaceInferenceEmbedding/HuggingFaceInferenceEmbedding.ts b/packages/components/nodes/embeddings/HuggingFaceInferenceEmbedding/HuggingFaceInferenceEmbedding.ts index 6f14325a..d77d623f 100644 --- a/packages/components/nodes/embeddings/HuggingFaceInferenceEmbedding/HuggingFaceInferenceEmbedding.ts +++ b/packages/components/nodes/embeddings/HuggingFaceInferenceEmbedding/HuggingFaceInferenceEmbedding.ts @@ -1,6 +1,6 @@ import { INode, INodeData, INodeParams } from '../../../src/Interface' import { getBaseClasses } from '../../../src/utils' -import { HuggingFaceInferenceEmbeddings, HuggingFaceInferenceEmbeddingsParams } from 'langchain/embeddings/hf' +import { HuggingFaceInferenceEmbeddings, HuggingFaceInferenceEmbeddingsParams } from './core' class HuggingFaceInferenceEmbedding_Embeddings implements INode { label: string @@ -31,6 +31,14 @@ class HuggingFaceInferenceEmbedding_Embeddings implements INode { name: 'modelName', type: 'string', optional: true + }, + { + label: 'Endpoint', + name: 'endpoint', + type: 'string', + placeholder: 'https://xyz.eu-west-1.aws.endpoints.huggingface.cloud/sentence-transformers/all-MiniLM-L6-v2', + description: 'Using your own inference endpoint', + optional: true } ] } @@ -38,12 +46,14 @@ class HuggingFaceInferenceEmbedding_Embeddings implements INode { async init(nodeData: INodeData): Promise { const apiKey = nodeData.inputs?.apiKey as string const modelName = nodeData.inputs?.modelName as string + const endpoint = nodeData.inputs?.endpoint as string const obj: Partial = { apiKey } if (modelName) obj.model = modelName + if (endpoint) obj.endpoint = endpoint const model = new HuggingFaceInferenceEmbeddings(obj) return model diff --git a/packages/components/nodes/embeddings/HuggingFaceInferenceEmbedding/core.ts b/packages/components/nodes/embeddings/HuggingFaceInferenceEmbedding/core.ts new file mode 100644 index 00000000..b8d89ebe --- /dev/null +++ b/packages/components/nodes/embeddings/HuggingFaceInferenceEmbedding/core.ts @@ -0,0 +1,48 @@ +import { HfInference } from '@huggingface/inference' +import { Embeddings, EmbeddingsParams } from 'langchain/embeddings/base' +import { getEnvironmentVariable } from '../../../src/utils' + +export interface HuggingFaceInferenceEmbeddingsParams extends EmbeddingsParams { + apiKey?: string + model?: string + endpoint?: string +} + +export class HuggingFaceInferenceEmbeddings extends Embeddings implements HuggingFaceInferenceEmbeddingsParams { + apiKey?: string + + endpoint?: string + + model: string + + client: HfInference + + constructor(fields?: HuggingFaceInferenceEmbeddingsParams) { + super(fields ?? {}) + + this.model = fields?.model ?? 'sentence-transformers/distilbert-base-nli-mean-tokens' + this.apiKey = fields?.apiKey ?? getEnvironmentVariable('HUGGINGFACEHUB_API_KEY') + this.endpoint = fields?.endpoint ?? '' + this.client = new HfInference(this.apiKey) + if (this.endpoint) this.client.endpoint(this.endpoint) + } + + async _embed(texts: string[]): Promise { + // replace newlines, which can negatively affect performance. + const clean = texts.map((text) => text.replace(/\n/g, ' ')) + return this.caller.call(() => + this.client.featureExtraction({ + model: this.model, + inputs: clean + }) + ) as Promise + } + + embedQuery(document: string): Promise { + return this._embed([document]).then((embeddings) => embeddings[0]) + } + + embedDocuments(documents: string[]): Promise { + return this._embed(documents) + } +} diff --git a/packages/components/nodes/llms/HuggingFaceInference/HuggingFaceInference.ts b/packages/components/nodes/llms/HuggingFaceInference/HuggingFaceInference.ts index 291f67c9..92eb46d5 100644 --- a/packages/components/nodes/llms/HuggingFaceInference/HuggingFaceInference.ts +++ b/packages/components/nodes/llms/HuggingFaceInference/HuggingFaceInference.ts @@ -1,6 +1,6 @@ import { INode, INodeData, INodeParams } from '../../../src/Interface' import { getBaseClasses } from '../../../src/utils' -import { HFInput, HuggingFaceInference } from 'langchain/llms/hf' +import { HFInput, HuggingFaceInference } from './core' class HuggingFaceInference_LLMs implements INode { label: string @@ -71,6 +71,15 @@ class HuggingFaceInference_LLMs implements INode { description: 'Frequency Penalty parameter may not apply to certain model. Please check available model parameters', optional: true, additionalParams: true + }, + { + label: 'Endpoint', + name: 'endpoint', + type: 'string', + placeholder: 'https://xyz.eu-west-1.aws.endpoints.huggingface.cloud/gpt2', + description: 'Using your own inference endpoint', + optional: true, + additionalParams: true } ] } @@ -83,6 +92,7 @@ class HuggingFaceInference_LLMs implements INode { const topP = nodeData.inputs?.topP as string const hfTopK = nodeData.inputs?.hfTopK as string const frequencyPenalty = nodeData.inputs?.frequencyPenalty as string + const endpoint = nodeData.inputs?.endpoint as string const obj: Partial = { model, @@ -94,6 +104,7 @@ class HuggingFaceInference_LLMs implements INode { if (topP) obj.topP = parseInt(topP, 10) if (hfTopK) obj.topK = parseInt(hfTopK, 10) if (frequencyPenalty) obj.frequencyPenalty = parseInt(frequencyPenalty, 10) + if (endpoint) obj.endpoint = endpoint const huggingFace = new HuggingFaceInference(obj) return huggingFace diff --git a/packages/components/nodes/llms/HuggingFaceInference/core.ts b/packages/components/nodes/llms/HuggingFaceInference/core.ts new file mode 100644 index 00000000..958e9072 --- /dev/null +++ b/packages/components/nodes/llms/HuggingFaceInference/core.ts @@ -0,0 +1,109 @@ +import { getEnvironmentVariable } from '../../../src/utils' +import { LLM, BaseLLMParams } from 'langchain/llms/base' + +export interface HFInput { + /** Model to use */ + model: string + + /** Sampling temperature to use */ + temperature?: number + + /** + * Maximum number of tokens to generate in the completion. + */ + maxTokens?: number + + /** Total probability mass of tokens to consider at each step */ + topP?: number + + /** Integer to define the top tokens considered within the sample operation to create new text. */ + topK?: number + + /** Penalizes repeated tokens according to frequency */ + frequencyPenalty?: number + + /** API key to use. */ + apiKey?: string + + /** Private endpoint to use. */ + endpoint?: string +} + +export class HuggingFaceInference extends LLM implements HFInput { + get lc_secrets(): { [key: string]: string } | undefined { + return { + apiKey: 'HUGGINGFACEHUB_API_KEY' + } + } + + model = 'gpt2' + + temperature: number | undefined = undefined + + maxTokens: number | undefined = undefined + + topP: number | undefined = undefined + + topK: number | undefined = undefined + + frequencyPenalty: number | undefined = undefined + + apiKey: string | undefined = undefined + + endpoint: string | undefined = undefined + + constructor(fields?: Partial & BaseLLMParams) { + super(fields ?? {}) + + this.model = fields?.model ?? this.model + this.temperature = fields?.temperature ?? this.temperature + this.maxTokens = fields?.maxTokens ?? this.maxTokens + this.topP = fields?.topP ?? this.topP + this.topK = fields?.topK ?? this.topK + this.frequencyPenalty = fields?.frequencyPenalty ?? this.frequencyPenalty + this.endpoint = fields?.endpoint ?? '' + this.apiKey = fields?.apiKey ?? getEnvironmentVariable('HUGGINGFACEHUB_API_KEY') + if (!this.apiKey) { + throw new Error( + 'Please set an API key for HuggingFace Hub in the environment variable HUGGINGFACEHUB_API_KEY or in the apiKey field of the HuggingFaceInference constructor.' + ) + } + } + + _llmType() { + return 'hf' + } + + /** @ignore */ + async _call(prompt: string, options: this['ParsedCallOptions']): Promise { + const { HfInference } = await HuggingFaceInference.imports() + const hf = new HfInference(this.apiKey) + if (this.endpoint) hf.endpoint(this.endpoint) + const res = await this.caller.callWithOptions({ signal: options.signal }, hf.textGeneration.bind(hf), { + model: this.model, + parameters: { + // make it behave similar to openai, returning only the generated text + return_full_text: false, + temperature: this.temperature, + max_new_tokens: this.maxTokens, + top_p: this.topP, + top_k: this.topK, + repetition_penalty: this.frequencyPenalty + }, + inputs: prompt + }) + return res.generated_text + } + + /** @ignore */ + static async imports(): Promise<{ + HfInference: typeof import('@huggingface/inference').HfInference + }> { + try { + const { HfInference } = await import('@huggingface/inference') + return { HfInference } + } catch (e) { + throw new Error('Please install huggingface as a dependency with, e.g. `yarn add @huggingface/inference`') + } + } +} diff --git a/packages/components/package.json b/packages/components/package.json index a5f03e10..df0589e7 100644 --- a/packages/components/package.json +++ b/packages/components/package.json @@ -19,7 +19,7 @@ "@aws-sdk/client-dynamodb": "^3.360.0", "@dqbd/tiktoken": "^1.0.7", "@getzep/zep-js": "^0.3.1", - "@huggingface/inference": "1", + "@huggingface/inference": "^2.6.1", "@pinecone-database/pinecone": "^0.0.12", "@qdrant/js-client-rest": "^1.2.2", "@supabase/supabase-js": "^2.21.0", diff --git a/packages/components/src/utils.ts b/packages/components/src/utils.ts index c247ebc2..c87b0831 100644 --- a/packages/components/src/utils.ts +++ b/packages/components/src/utils.ts @@ -201,6 +201,20 @@ export const getAvailableURLs = async (url: string, limit: number) => { } } +/** + * Get env variables + * @param {string} url + * @param {number} limit + * @returns {string[]} + */ +export const getEnvironmentVariable = (name: string): string | undefined => { + try { + return typeof process !== 'undefined' ? process.env?.[name] : undefined + } catch (e) { + return undefined + } +} + /** * Custom chain handler class */