fix: Upgrade Hugging Face Inference API to support Inference Providers (#5454)

- Upgrade @huggingface/inference from v2.6.1 to v4.13.2
- Update ChatHuggingFace to use InferenceClient with chatCompletion API
- Update HuggingFaceInference (LLM) to use v4 HfInference with Inference Providers
- Update HuggingFaceInferenceEmbedding to use v4 HfInference
- Add endpoint handling logic to ignore custom endpoints for provider-based models
- Add improved error handling and validation for API keys
- Update UI descriptions to guide users on proper configuration

Fixes #5161

Co-authored-by: Henry <hzj94@hotmail.com>
This commit is contained in:
Siddharth Chauhan
2025-11-25 17:13:36 +05:30
committed by GitHub
parent 097404f24a
commit 0cc7b3036e
9 changed files with 192 additions and 82 deletions
@@ -78,6 +78,8 @@ export class HuggingFaceInference extends LLM implements HFInput {
async _call(prompt: string, options: this['ParsedCallOptions']): Promise<string> {
const { HfInference } = await HuggingFaceInference.imports()
const hf = new HfInference(this.apiKey)
// v4 uses Inference Providers by default; only override if custom endpoint provided
const hfClient = this.endpoint ? hf.endpoint(this.endpoint) : hf
const obj: any = {
parameters: {
// make it behave similar to openai, returning only the generated text
@@ -90,12 +92,10 @@ export class HuggingFaceInference extends LLM implements HFInput {
},
inputs: prompt
}
if (this.endpoint) {
hf.endpoint(this.endpoint)
} else {
if (!this.endpoint) {
obj.model = this.model
}
const res = await this.caller.callWithOptions({ signal: options.signal }, hf.textGeneration.bind(hf), obj)
const res = await this.caller.callWithOptions({ signal: options.signal }, hfClient.textGeneration.bind(hfClient), obj)
return res.generated_text
}