feature/Add ChatOllama Function (#2403)

* add chat ollama function * update description * update tool system prompt description
2026-06-28 17:01:00 +03:00 · 2024-05-15 19:41:56 +01:00
parent b5e502f3b6
commit 0de7fb8509
3 changed files with 995 additions and 0 deletions
@@ -0,0 +1,185 @@
+import { IterableReadableStream } from '@langchain/core/utils/stream'
+import type { StringWithAutocomplete } from '@langchain/core/utils/types'
+import { BaseLanguageModelCallOptions } from '@langchain/core/language_models/base'
+
+export interface OllamaInput {
+    embeddingOnly?: boolean
+    f16KV?: boolean
+    frequencyPenalty?: number
+    headers?: Record<string, string>
+    keepAlive?: string
+    logitsAll?: boolean
+    lowVram?: boolean
+    mainGpu?: number
+    model?: string
+    baseUrl?: string
+    mirostat?: number
+    mirostatEta?: number
+    mirostatTau?: number
+    numBatch?: number
+    numCtx?: number
+    numGpu?: number
+    numGqa?: number
+    numKeep?: number
+    numPredict?: number
+    numThread?: number
+    penalizeNewline?: boolean
+    presencePenalty?: number
+    repeatLastN?: number
+    repeatPenalty?: number
+    ropeFrequencyBase?: number
+    ropeFrequencyScale?: number
+    temperature?: number
+    stop?: string[]
+    tfsZ?: number
+    topK?: number
+    topP?: number
+    typicalP?: number
+    useMLock?: boolean
+    useMMap?: boolean
+    vocabOnly?: boolean
+    format?: StringWithAutocomplete<'json'>
+}
+
+export interface OllamaRequestParams {
+    model: string
+    format?: StringWithAutocomplete<'json'>
+    images?: string[]
+    options: {
+        embedding_only?: boolean
+        f16_kv?: boolean
+        frequency_penalty?: number
+        logits_all?: boolean
+        low_vram?: boolean
+        main_gpu?: number
+        mirostat?: number
+        mirostat_eta?: number
+        mirostat_tau?: number
+        num_batch?: number
+        num_ctx?: number
+        num_gpu?: number
+        num_gqa?: number
+        num_keep?: number
+        num_thread?: number
+        num_predict?: number
+        penalize_newline?: boolean
+        presence_penalty?: number
+        repeat_last_n?: number
+        repeat_penalty?: number
+        rope_frequency_base?: number
+        rope_frequency_scale?: number
+        temperature?: number
+        stop?: string[]
+        tfs_z?: number
+        top_k?: number
+        top_p?: number
+        typical_p?: number
+        use_mlock?: boolean
+        use_mmap?: boolean
+        vocab_only?: boolean
+    }
+}
+
+export type OllamaMessage = {
+    role: StringWithAutocomplete<'user' | 'assistant' | 'system'>
+    content: string
+    images?: string[]
+}
+
+export interface OllamaGenerateRequestParams extends OllamaRequestParams {
+    prompt: string
+}
+
+export interface OllamaChatRequestParams extends OllamaRequestParams {
+    messages: OllamaMessage[]
+}
+
+export type BaseOllamaGenerationChunk = {
+    model: string
+    created_at: string
+    done: boolean
+    total_duration?: number
+    load_duration?: number
+    prompt_eval_count?: number
+    prompt_eval_duration?: number
+    eval_count?: number
+    eval_duration?: number
+}
+
+export type OllamaGenerationChunk = BaseOllamaGenerationChunk & {
+    response: string
+}
+
+export type OllamaChatGenerationChunk = BaseOllamaGenerationChunk & {
+    message: OllamaMessage
+}
+
+export type OllamaCallOptions = BaseLanguageModelCallOptions & {
+    headers?: Record<string, string>
+}
+
+async function* createOllamaStream(url: string, params: OllamaRequestParams, options: OllamaCallOptions) {
+    let formattedUrl = url
+    if (formattedUrl.startsWith('http://localhost:')) {
+        // Node 18 has issues with resolving "localhost"
+        // See https://github.com/node-fetch/node-fetch/issues/1624
+        formattedUrl = formattedUrl.replace('http://localhost:', 'http://127.0.0.1:')
+    }
+    const response = await fetch(formattedUrl, {
+        method: 'POST',
+        body: JSON.stringify(params),
+        headers: {
+            'Content-Type': 'application/json',
+            ...options.headers
+        },
+        signal: options.signal
+    })
+    if (!response.ok) {
+        let error
+        const responseText = await response.text()
+        try {
+            const json = JSON.parse(responseText)
+            error = new Error(`Ollama call failed with status code ${response.status}: ${json.error}`)
+        } catch (e) {
+            error = new Error(`Ollama call failed with status code ${response.status}: ${responseText}`)
+        }
+        ;(error as any).response = response
+        throw error
+    }
+    if (!response.body) {
+        throw new Error('Could not begin Ollama stream. Please check the given URL and try again.')
+    }
+
+    const stream = IterableReadableStream.fromReadableStream(response.body)
+
+    const decoder = new TextDecoder()
+    let extra = ''
+    for await (const chunk of stream) {
+        const decoded = extra + decoder.decode(chunk)
+        const lines = decoded.split('\n')
+        extra = lines.pop() || ''
+        for (const line of lines) {
+            try {
+                yield JSON.parse(line)
+            } catch (e) {
+                console.warn(`Received a non-JSON parseable chunk: ${line}`)
+            }
+        }
+    }
+}
+
+export async function* createOllamaGenerateStream(
+    baseUrl: string,
+    params: OllamaGenerateRequestParams,
+    options: OllamaCallOptions
+): AsyncGenerator<OllamaGenerationChunk> {
+    yield* createOllamaStream(`${baseUrl}/api/generate`, params, options)
+}
+
+export async function* createOllamaChatStream(
+    baseUrl: string,
+    params: OllamaChatRequestParams,
+    options: OllamaCallOptions
+): AsyncGenerator<OllamaChatGenerationChunk> {
+    yield* createOllamaStream(`${baseUrl}/api/chat`, params, options)
+}