From 0923a356834607a728be3ebccc9a534bb2e73c39 Mon Sep 17 00:00:00 2001
From: Henry <hzj94@hotmail.com>
Date: Fri, 7 Jul 2023 17:36:23 +0100
Subject: [PATCH] add endpoint to HF

---
 .../ChatHuggingFace/ChatHuggingFace.ts        |  13 ++-
 .../nodes/chatmodels/ChatHuggingFace/core.ts  | 109 ++++++++++++++++++
 .../HuggingFaceInferenceEmbedding.ts          |  12 +-
 .../HuggingFaceInferenceEmbedding/core.ts     |  48 ++++++++
 .../HuggingFaceInference.ts                   |  13 ++-
 .../nodes/llms/HuggingFaceInference/core.ts   | 109 ++++++++++++++++++
 packages/components/package.json              |   2 +-
 packages/components/src/utils.ts              |  14 +++
 8 files changed, 316 insertions(+), 4 deletions(-)
 create mode 100644 packages/components/nodes/chatmodels/ChatHuggingFace/core.ts
 create mode 100644 packages/components/nodes/embeddings/HuggingFaceInferenceEmbedding/core.ts
 create mode 100644 packages/components/nodes/llms/HuggingFaceInference/core.ts
diff --git a/packages/components/nodes/chatmodels/ChatHuggingFace/ChatHuggingFace.ts b/packages/components/nodes/chatmodels/ChatHuggingFace/ChatHuggingFace.ts
index 1dae41e4..d92dd1e0 100644
--- a/packages/components/nodes/chatmodels/ChatHuggingFace/ChatHuggingFace.ts
+++ b/packages/components/nodes/chatmodels/ChatHuggingFace/ChatHuggingFace.ts
@@ -1,6 +1,6 @@
 import { INode, INodeData, INodeParams } from '../../../src/Interface'
 import { getBaseClasses } from '../../../src/utils'
-import { HFInput, HuggingFaceInference } from 'langchain/llms/hf'
+import { HFInput, HuggingFaceInference } from './core'
 
 class ChatHuggingFace_ChatModels implements INode {
     label: string
@@ -71,6 +71,15 @@ class ChatHuggingFace_ChatModels implements INode {
                 description: 'Frequency Penalty parameter may not apply to certain model. Please check available model parameters',
                 optional: true,
                 additionalParams: true
+            },
+            {
+                label: 'Endpoint',
+                name: 'endpoint',
+                type: 'string',
+                placeholder: 'https://xyz.eu-west-1.aws.endpoints.huggingface.cloud/gpt2',
+                description: 'Using your own inference endpoint',
+                optional: true,
+                additionalParams: true
             }
         ]
     }
@@ -83,6 +92,7 @@ class ChatHuggingFace_ChatModels implements INode {
         const topP = nodeData.inputs?.topP as string
         const hfTopK = nodeData.inputs?.hfTopK as string
         const frequencyPenalty = nodeData.inputs?.frequencyPenalty as string
+        const endpoint = nodeData.inputs?.endpoint as string
 
         const obj: Partial<HFInput> = {
             model,
@@ -94,6 +104,7 @@ class ChatHuggingFace_ChatModels implements INode {
         if (topP) obj.topP = parseInt(topP, 10)
         if (hfTopK) obj.topK = parseInt(hfTopK, 10)
         if (frequencyPenalty) obj.frequencyPenalty = parseInt(frequencyPenalty, 10)
+        if (endpoint) obj.endpoint = endpoint
 
         const huggingFace = new HuggingFaceInference(obj)
         return huggingFace
diff --git a/packages/components/nodes/chatmodels/ChatHuggingFace/core.ts b/packages/components/nodes/chatmodels/ChatHuggingFace/core.ts
new file mode 100644
index 00000000..958e9072
--- /dev/null
+++ b/packages/components/nodes/chatmodels/ChatHuggingFace/core.ts
@@ -0,0 +1,109 @@
+import { getEnvironmentVariable } from '../../../src/utils'
+import { LLM, BaseLLMParams } from 'langchain/llms/base'
+
+export interface HFInput {
+    /** Model to use */
+    model: string
+
+    /** Sampling temperature to use */
+    temperature?: number
+
+    /**
+     * Maximum number of tokens to generate in the completion.
+     */
+    maxTokens?: number
+
+    /** Total probability mass of tokens to consider at each step */
+    topP?: number
+
+    /** Integer to define the top tokens considered within the sample operation to create new text. */
+    topK?: number
+
+    /** Penalizes repeated tokens according to frequency */
+    frequencyPenalty?: number
+
+    /** API key to use. */
+    apiKey?: string
+
+    /** Private endpoint to use. */
+    endpoint?: string
+}
+
+export class HuggingFaceInference extends LLM implements HFInput {
+    get lc_secrets(): { [key: string]: string } | undefined {
+        return {
+            apiKey: 'HUGGINGFACEHUB_API_KEY'
+        }
+    }
+
+    model = 'gpt2'
+
+    temperature: number | undefined = undefined
+
+    maxTokens: number | undefined = undefined
+
+    topP: number | undefined = undefined
+
+    topK: number | undefined = undefined
+
+    frequencyPenalty: number | undefined = undefined
+
+    apiKey: string | undefined = undefined
+
+    endpoint: string | undefined = undefined
+
+    constructor(fields?: Partial<HFInput> & BaseLLMParams) {
+        super(fields ?? {})
+
+        this.model = fields?.model ?? this.model
+        this.temperature = fields?.temperature ?? this.temperature
+        this.maxTokens = fields?.maxTokens ?? this.maxTokens
+        this.topP = fields?.topP ?? this.topP
+        this.topK = fields?.topK ?? this.topK
+        this.frequencyPenalty = fields?.frequencyPenalty ?? this.frequencyPenalty
+        this.endpoint = fields?.endpoint ?? ''
+        this.apiKey = fields?.apiKey ?? getEnvironmentVariable('HUGGINGFACEHUB_API_KEY')
+        if (!this.apiKey) {
+            throw new Error(
+                'Please set an API key for HuggingFace Hub in the environment variable HUGGINGFACEHUB_API_KEY or in the apiKey field of the HuggingFaceInference constructor.'
+            )
+        }
+    }
+
+    _llmType() {
+        return 'hf'
+    }
+
+    /** @ignore */
+    async _call(prompt: string, options: this['ParsedCallOptions']): Promise<string> {
+        const { HfInference } = await HuggingFaceInference.imports()
+        const hf = new HfInference(this.apiKey)
+        if (this.endpoint) hf.endpoint(this.endpoint)
+        const res = await this.caller.callWithOptions({ signal: options.signal }, hf.textGeneration.bind(hf), {
+            model: this.model,
+            parameters: {
+                // make it behave similar to openai, returning only the generated text
+                return_full_text: false,
+                temperature: this.temperature,
+                max_new_tokens: this.maxTokens,
+                top_p: this.topP,
+                top_k: this.topK,
+                repetition_penalty: this.frequencyPenalty
+            },
+            inputs: prompt
+        })
+        return res.generated_text
+    }
+
+    /** @ignore */
+    static async imports(): Promise<{
+        HfInference: typeof import('@huggingface/inference').HfInference
+    }> {
+        try {
+            const { HfInference } = await import('@huggingface/inference')
+            return { HfInference }
+        } catch (e) {
+            throw new Error('Please install huggingface as a dependency with, e.g. `yarn add @huggingface/inference`')
+        }
+    }
+}
diff --git a/packages/components/nodes/embeddings/HuggingFaceInferenceEmbedding/HuggingFaceInferenceEmbedding.ts b/packages/components/nodes/embeddings/HuggingFaceInferenceEmbedding/HuggingFaceInferenceEmbedding.ts
index 6f14325a..d77d623f 100644
--- a/packages/components/nodes/embeddings/HuggingFaceInferenceEmbedding/HuggingFaceInferenceEmbedding.ts
+++ b/packages/components/nodes/embeddings/HuggingFaceInferenceEmbedding/HuggingFaceInferenceEmbedding.ts
@@ -1,6 +1,6 @@
 import { INode, INodeData, INodeParams } from '../../../src/Interface'
 import { getBaseClasses } from '../../../src/utils'
-import { HuggingFaceInferenceEmbeddings, HuggingFaceInferenceEmbeddingsParams } from 'langchain/embeddings/hf'
+import { HuggingFaceInferenceEmbeddings, HuggingFaceInferenceEmbeddingsParams } from './core'
 
 class HuggingFaceInferenceEmbedding_Embeddings implements INode {
     label: string
@@ -31,6 +31,14 @@ class HuggingFaceInferenceEmbedding_Embeddings implements INode {
                 name: 'modelName',
                 type: 'string',
                 optional: true
+            },
+            {
+                label: 'Endpoint',
+                name: 'endpoint',
+                type: 'string',
+                placeholder: 'https://xyz.eu-west-1.aws.endpoints.huggingface.cloud/sentence-transformers/all-MiniLM-L6-v2',
+                description: 'Using your own inference endpoint',
+                optional: true
             }
         ]
     }
@@ -38,12 +46,14 @@ class HuggingFaceInferenceEmbedding_Embeddings implements INode {
     async init(nodeData: INodeData): Promise<any> {
         const apiKey = nodeData.inputs?.apiKey as string
         const modelName = nodeData.inputs?.modelName as string
+        const endpoint = nodeData.inputs?.endpoint as string
 
         const obj: Partial<HuggingFaceInferenceEmbeddingsParams> = {
             apiKey
         }
 
         if (modelName) obj.model = modelName
+        if (endpoint) obj.endpoint = endpoint
 
         const model = new HuggingFaceInferenceEmbeddings(obj)
         return model
diff --git a/packages/components/nodes/embeddings/HuggingFaceInferenceEmbedding/core.ts b/packages/components/nodes/embeddings/HuggingFaceInferenceEmbedding/core.ts
new file mode 100644
index 00000000..b8d89ebe
--- /dev/null
+++ b/packages/components/nodes/embeddings/HuggingFaceInferenceEmbedding/core.ts
@@ -0,0 +1,48 @@
+import { HfInference } from '@huggingface/inference'
+import { Embeddings, EmbeddingsParams } from 'langchain/embeddings/base'
+import { getEnvironmentVariable } from '../../../src/utils'
+
+export interface HuggingFaceInferenceEmbeddingsParams extends EmbeddingsParams {
+    apiKey?: string
+    model?: string
+    endpoint?: string
+}
+
+export class HuggingFaceInferenceEmbeddings extends Embeddings implements HuggingFaceInferenceEmbeddingsParams {
+    apiKey?: string
+
+    endpoint?: string
+
+    model: string
+
+    client: HfInference
+
+    constructor(fields?: HuggingFaceInferenceEmbeddingsParams) {
+        super(fields ?? {})
+
+        this.model = fields?.model ?? 'sentence-transformers/distilbert-base-nli-mean-tokens'
+        this.apiKey = fields?.apiKey ?? getEnvironmentVariable('HUGGINGFACEHUB_API_KEY')
+        this.endpoint = fields?.endpoint ?? ''
+        this.client = new HfInference(this.apiKey)
+        if (this.endpoint) this.client.endpoint(this.endpoint)
+    }
+
+    async _embed(texts: string[]): Promise<number[][]> {
+        // replace newlines, which can negatively affect performance.
+        const clean = texts.map((text) => text.replace(/\n/g, ' '))
+        return this.caller.call(() =>
+            this.client.featureExtraction({
+                model: this.model,
+                inputs: clean
+            })
+        ) as Promise<number[][]>
+    }
+
+    embedQuery(document: string): Promise<number[]> {
+        return this._embed([document]).then((embeddings) => embeddings[0])
+    }
+
+    embedDocuments(documents: string[]): Promise<number[][]> {
+        return this._embed(documents)
+    }
+}
diff --git a/packages/components/nodes/llms/HuggingFaceInference/HuggingFaceInference.ts b/packages/components/nodes/llms/HuggingFaceInference/HuggingFaceInference.ts
index 291f67c9..92eb46d5 100644
--- a/packages/components/nodes/llms/HuggingFaceInference/HuggingFaceInference.ts
+++ b/packages/components/nodes/llms/HuggingFaceInference/HuggingFaceInference.ts
@@ -1,6 +1,6 @@
 import { INode, INodeData, INodeParams } from '../../../src/Interface'
 import { getBaseClasses } from '../../../src/utils'
-import { HFInput, HuggingFaceInference } from 'langchain/llms/hf'
+import { HFInput, HuggingFaceInference } from './core'
 
 class HuggingFaceInference_LLMs implements INode {
     label: string
@@ -71,6 +71,15 @@ class HuggingFaceInference_LLMs implements INode {
                 description: 'Frequency Penalty parameter may not apply to certain model. Please check available model parameters',
                 optional: true,
                 additionalParams: true
+            },
+            {
+                label: 'Endpoint',
+                name: 'endpoint',
+                type: 'string',
+                placeholder: 'https://xyz.eu-west-1.aws.endpoints.huggingface.cloud/gpt2',
+                description: 'Using your own inference endpoint',
+                optional: true,
+                additionalParams: true
             }
         ]
     }
@@ -83,6 +92,7 @@ class HuggingFaceInference_LLMs implements INode {
         const topP = nodeData.inputs?.topP as string
         const hfTopK = nodeData.inputs?.hfTopK as string
         const frequencyPenalty = nodeData.inputs?.frequencyPenalty as string
+        const endpoint = nodeData.inputs?.endpoint as string
 
         const obj: Partial<HFInput> = {
             model,
@@ -94,6 +104,7 @@ class HuggingFaceInference_LLMs implements INode {
         if (topP) obj.topP = parseInt(topP, 10)
         if (hfTopK) obj.topK = parseInt(hfTopK, 10)
         if (frequencyPenalty) obj.frequencyPenalty = parseInt(frequencyPenalty, 10)
+        if (endpoint) obj.endpoint = endpoint
 
         const huggingFace = new HuggingFaceInference(obj)
         return huggingFace
diff --git a/packages/components/nodes/llms/HuggingFaceInference/core.ts b/packages/components/nodes/llms/HuggingFaceInference/core.ts
new file mode 100644
index 00000000..958e9072
--- /dev/null
+++ b/packages/components/nodes/llms/HuggingFaceInference/core.ts
@@ -0,0 +1,109 @@
+import { getEnvironmentVariable } from '../../../src/utils'
+import { LLM, BaseLLMParams } from 'langchain/llms/base'
+
+export interface HFInput {
+    /** Model to use */
+    model: string
+
+    /** Sampling temperature to use */
+    temperature?: number
+
+    /**
+     * Maximum number of tokens to generate in the completion.
+     */
+    maxTokens?: number
+
+    /** Total probability mass of tokens to consider at each step */
+    topP?: number
+
+    /** Integer to define the top tokens considered within the sample operation to create new text. */
+    topK?: number
+
+    /** Penalizes repeated tokens according to frequency */
+    frequencyPenalty?: number
+
+    /** API key to use. */
+    apiKey?: string
+
+    /** Private endpoint to use. */
+    endpoint?: string
+}
+
+export class HuggingFaceInference extends LLM implements HFInput {
+    get lc_secrets(): { [key: string]: string } | undefined {
+        return {
+            apiKey: 'HUGGINGFACEHUB_API_KEY'
+        }
+    }
+
+    model = 'gpt2'
+
+    temperature: number | undefined = undefined
+
+    maxTokens: number | undefined = undefined
+
+    topP: number | undefined = undefined
+
+    topK: number | undefined = undefined
+
+    frequencyPenalty: number | undefined = undefined
+
+    apiKey: string | undefined = undefined
+
+    endpoint: string | undefined = undefined
+
+    constructor(fields?: Partial<HFInput> & BaseLLMParams) {
+        super(fields ?? {})
+
+        this.model = fields?.model ?? this.model
+        this.temperature = fields?.temperature ?? this.temperature
+        this.maxTokens = fields?.maxTokens ?? this.maxTokens
+        this.topP = fields?.topP ?? this.topP
+        this.topK = fields?.topK ?? this.topK
+        this.frequencyPenalty = fields?.frequencyPenalty ?? this.frequencyPenalty
+        this.endpoint = fields?.endpoint ?? ''
+        this.apiKey = fields?.apiKey ?? getEnvironmentVariable('HUGGINGFACEHUB_API_KEY')
+        if (!this.apiKey) {
+            throw new Error(
+                'Please set an API key for HuggingFace Hub in the environment variable HUGGINGFACEHUB_API_KEY or in the apiKey field of the HuggingFaceInference constructor.'
+            )
+        }
+    }
+
+    _llmType() {
+        return 'hf'
+    }
+
+    /** @ignore */
+    async _call(prompt: string, options: this['ParsedCallOptions']): Promise<string> {
+        const { HfInference } = await HuggingFaceInference.imports()
+        const hf = new HfInference(this.apiKey)
+        if (this.endpoint) hf.endpoint(this.endpoint)
+        const res = await this.caller.callWithOptions({ signal: options.signal }, hf.textGeneration.bind(hf), {
+            model: this.model,
+            parameters: {
+                // make it behave similar to openai, returning only the generated text
+                return_full_text: false,
+                temperature: this.temperature,
+                max_new_tokens: this.maxTokens,
+                top_p: this.topP,
+                top_k: this.topK,
+                repetition_penalty: this.frequencyPenalty
+            },
+            inputs: prompt
+        })
+        return res.generated_text
+    }
+
+    /** @ignore */
+    static async imports(): Promise<{
+        HfInference: typeof import('@huggingface/inference').HfInference
+    }> {
+        try {
+            const { HfInference } = await import('@huggingface/inference')
+            return { HfInference }
+        } catch (e) {
+            throw new Error('Please install huggingface as a dependency with, e.g. `yarn add @huggingface/inference`')
+        }
+    }
+}
diff --git a/packages/components/package.json b/packages/components/package.json
index a5f03e10..df0589e7 100644
--- a/packages/components/package.json
+++ b/packages/components/package.json
@@ -19,7 +19,7 @@
         "@aws-sdk/client-dynamodb": "^3.360.0",
         "@dqbd/tiktoken": "^1.0.7",
         "@getzep/zep-js": "^0.3.1",
-        "@huggingface/inference": "1",
+        "@huggingface/inference": "^2.6.1",
         "@pinecone-database/pinecone": "^0.0.12",
         "@qdrant/js-client-rest": "^1.2.2",
         "@supabase/supabase-js": "^2.21.0",
diff --git a/packages/components/src/utils.ts b/packages/components/src/utils.ts
index c247ebc2..c87b0831 100644
--- a/packages/components/src/utils.ts
+++ b/packages/components/src/utils.ts
@@ -201,6 +201,20 @@ export const getAvailableURLs = async (url: string, limit: number) => {
     }
 }
 
+/**
+ * Get env variables
+ * @param {string} url
+ * @param {number} limit
+ * @returns {string[]}
+ */
+export const getEnvironmentVariable = (name: string): string | undefined => {
+    try {
+        return typeof process !== 'undefined' ? process.env?.[name] : undefined
+    } catch (e) {
+        return undefined
+    }
+}
+
 /**
  * Custom chain handler class
  */