fix: Upgrade Hugging Face Inference API to support Inference Providers (#5454)

- Upgrade @huggingface/inference from v2.6.1 to v4.13.2 - Update ChatHuggingFace to use InferenceClient with chatCompletion API - Update HuggingFaceInference (LLM) to use v4 HfInference with Inference Providers - Update HuggingFaceInferenceEmbedding to use v4 HfInference - Add endpoint handling logic to ignore custom endpoints for provider-based models - Add improved error handling and validation for API keys - Update UI descriptions to guide users on proper configuration Fixes #5161 Co-authored-by: Henry <hzj94@hotmail.com>
2026-06-28 15:00:57 +03:00 · 2025-11-25 17:13:36 +05:30
parent 097404f24a
commit 0cc7b3036e
9 changed files with 192 additions and 82 deletions
@@ -78,6 +78,8 @@ export class HuggingFaceInference extends LLM implements HFInput {
    async _call(prompt: string, options: this['ParsedCallOptions']): Promise<string> {
        const { HfInference } = await HuggingFaceInference.imports()
        const hf = new HfInference(this.apiKey)
+        // v4 uses Inference Providers by default; only override if custom endpoint provided
+        const hfClient = this.endpoint ? hf.endpoint(this.endpoint) : hf
        const obj: any = {
            parameters: {
                // make it behave similar to openai, returning only the generated text
@@ -90,12 +92,10 @@ export class HuggingFaceInference extends LLM implements HFInput {
            },
            inputs: prompt
        }
-        if (this.endpoint) {
-            hf.endpoint(this.endpoint)
-        } else {
+        if (!this.endpoint) {
            obj.model = this.model
        }
-        const res = await this.caller.callWithOptions({ signal: options.signal }, hf.textGeneration.bind(hf), obj)
+        const res = await this.caller.callWithOptions({ signal: options.signal }, hfClient.textGeneration.bind(hfClient), obj)
        return res.generated_text
    }