mirror of
https://github.com/farcasclaudiu/Flowise.git
synced 2026-06-28 21:00:58 +03:00
@@ -1,32 +1,19 @@
|
||||
import { LLM, BaseLLMParams } from '@langchain/core/language_models/llms'
|
||||
import { getEnvironmentVariable } from '../../../src/utils'
|
||||
import { GenerationChunk } from '@langchain/core/outputs'
|
||||
import { CallbackManagerForLLMRun } from '@langchain/core/callbacks/manager'
|
||||
|
||||
export interface HFInput {
|
||||
/** Model to use */
|
||||
model: string
|
||||
|
||||
/** Sampling temperature to use */
|
||||
temperature?: number
|
||||
|
||||
/**
|
||||
* Maximum number of tokens to generate in the completion.
|
||||
*/
|
||||
maxTokens?: number
|
||||
|
||||
/** Total probability mass of tokens to consider at each step */
|
||||
stopSequences?: string[]
|
||||
topP?: number
|
||||
|
||||
/** Integer to define the top tokens considered within the sample operation to create new text. */
|
||||
topK?: number
|
||||
|
||||
/** Penalizes repeated tokens according to frequency */
|
||||
frequencyPenalty?: number
|
||||
|
||||
/** API key to use. */
|
||||
apiKey?: string
|
||||
|
||||
/** Private endpoint to use. */
|
||||
endpoint?: string
|
||||
endpointUrl?: string
|
||||
includeCredentials?: string | boolean
|
||||
}
|
||||
|
||||
export class HuggingFaceInference extends LLM implements HFInput {
|
||||
@@ -40,6 +27,8 @@ export class HuggingFaceInference extends LLM implements HFInput {
|
||||
|
||||
temperature: number | undefined = undefined
|
||||
|
||||
stopSequences: string[] | undefined = undefined
|
||||
|
||||
maxTokens: number | undefined = undefined
|
||||
|
||||
topP: number | undefined = undefined
|
||||
@@ -50,7 +39,9 @@ export class HuggingFaceInference extends LLM implements HFInput {
|
||||
|
||||
apiKey: string | undefined = undefined
|
||||
|
||||
endpoint: string | undefined = undefined
|
||||
endpointUrl: string | undefined = undefined
|
||||
|
||||
includeCredentials: string | boolean | undefined = undefined
|
||||
|
||||
constructor(fields?: Partial<HFInput> & BaseLLMParams) {
|
||||
super(fields ?? {})
|
||||
@@ -58,11 +49,13 @@ export class HuggingFaceInference extends LLM implements HFInput {
|
||||
this.model = fields?.model ?? this.model
|
||||
this.temperature = fields?.temperature ?? this.temperature
|
||||
this.maxTokens = fields?.maxTokens ?? this.maxTokens
|
||||
this.stopSequences = fields?.stopSequences ?? this.stopSequences
|
||||
this.topP = fields?.topP ?? this.topP
|
||||
this.topK = fields?.topK ?? this.topK
|
||||
this.frequencyPenalty = fields?.frequencyPenalty ?? this.frequencyPenalty
|
||||
this.endpoint = fields?.endpoint ?? ''
|
||||
this.apiKey = fields?.apiKey ?? getEnvironmentVariable('HUGGINGFACEHUB_API_KEY')
|
||||
this.endpointUrl = fields?.endpointUrl
|
||||
this.includeCredentials = fields?.includeCredentials
|
||||
if (!this.apiKey) {
|
||||
throw new Error(
|
||||
'Please set an API key for HuggingFace Hub in the environment variable HUGGINGFACEHUB_API_KEY or in the apiKey field of the HuggingFaceInference constructor.'
|
||||
@@ -74,31 +67,65 @@ export class HuggingFaceInference extends LLM implements HFInput {
|
||||
return 'hf'
|
||||
}
|
||||
|
||||
/** @ignore */
|
||||
async _call(prompt: string, options: this['ParsedCallOptions']): Promise<string> {
|
||||
const { HfInference } = await HuggingFaceInference.imports()
|
||||
const hf = new HfInference(this.apiKey)
|
||||
const obj: any = {
|
||||
invocationParams(options?: this['ParsedCallOptions']) {
|
||||
return {
|
||||
model: this.model,
|
||||
parameters: {
|
||||
// make it behave similar to openai, returning only the generated text
|
||||
return_full_text: false,
|
||||
temperature: this.temperature,
|
||||
max_new_tokens: this.maxTokens,
|
||||
stop: options?.stop ?? this.stopSequences,
|
||||
top_p: this.topP,
|
||||
top_k: this.topK,
|
||||
repetition_penalty: this.frequencyPenalty
|
||||
},
|
||||
inputs: prompt
|
||||
}
|
||||
}
|
||||
if (this.endpoint) {
|
||||
hf.endpoint(this.endpoint)
|
||||
} else {
|
||||
obj.model = this.model
|
||||
}
|
||||
|
||||
async *_streamResponseChunks(
|
||||
prompt: string,
|
||||
options: this['ParsedCallOptions'],
|
||||
runManager?: CallbackManagerForLLMRun
|
||||
): AsyncGenerator<GenerationChunk> {
|
||||
const hfi = await this._prepareHFInference()
|
||||
const stream = await this.caller.call(async () =>
|
||||
hfi.textGenerationStream({
|
||||
...this.invocationParams(options),
|
||||
inputs: prompt
|
||||
})
|
||||
)
|
||||
for await (const chunk of stream) {
|
||||
const token = chunk.token.text
|
||||
yield new GenerationChunk({ text: token, generationInfo: chunk })
|
||||
await runManager?.handleLLMNewToken(token ?? '')
|
||||
|
||||
// stream is done
|
||||
if (chunk.generated_text)
|
||||
yield new GenerationChunk({
|
||||
text: '',
|
||||
generationInfo: { finished: true }
|
||||
})
|
||||
}
|
||||
const res = await this.caller.callWithOptions({ signal: options.signal }, hf.textGeneration.bind(hf), obj)
|
||||
}
|
||||
|
||||
/** @ignore */
|
||||
async _call(prompt: string, options: this['ParsedCallOptions']): Promise<string> {
|
||||
const hfi = await this._prepareHFInference()
|
||||
const args = { ...this.invocationParams(options), inputs: prompt }
|
||||
const res = await this.caller.callWithOptions({ signal: options.signal }, hfi.textGeneration.bind(hfi), args)
|
||||
return res.generated_text
|
||||
}
|
||||
|
||||
/** @ignore */
|
||||
private async _prepareHFInference() {
|
||||
const { HfInference } = await HuggingFaceInference.imports()
|
||||
const hfi = new HfInference(this.apiKey, {
|
||||
includeCredentials: this.includeCredentials
|
||||
})
|
||||
return this.endpointUrl ? hfi.endpoint(this.endpointUrl) : hfi
|
||||
}
|
||||
|
||||
/** @ignore */
|
||||
static async imports(): Promise<{
|
||||
HfInference: typeof import('@huggingface/inference').HfInference
|
||||
|
||||
Reference in New Issue
Block a user