Adding support for LLM Caching.

2026-06-22 09:01:09 +03:00 · 2023-09-29 07:54:16 +05:30
parent 113d90e5dc
commit d588ac0480
6 changed files with 87 additions and 5 deletions
@@ -0,0 +1,44 @@
+import { INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
+import { InMemoryCache } from 'langchain/cache'
+import { getBaseClasses } from '../../../src'
+
+class LocalMemoryCache implements INode {
+    label: string
+    name: string
+    version: number
+    description: string
+    type: string
+    icon: string
+    category: string
+    baseClasses: string[]
+    inputs: INodeParams[]
+    outputs: INodeOutputsValue[]
+    inMemoryCache: any
+
+    constructor() {
+        this.label = 'Local (Builtin) Cache'
+        this.name = 'localCache'
+        this.version = 1.0
+        this.type = 'LLMCache'
+        this.icon = 'memorycache.png'
+        this.category = 'LLM Cache'
+        this.baseClasses = [this.type, 'LLMCacheBase']
+        this.inputs = []
+        this.outputs = [
+            {
+                label: 'LLM Cache',
+                name: 'cache',
+                baseClasses: [this.type, ...getBaseClasses(InMemoryCache)]
+            }
+        ]
+    }
+
+    async init(nodeData: INodeData): Promise<any> {
+        if (!this.inMemoryCache) {
+            this.inMemoryCache = InMemoryCache.global()
+        }
+        return this.inMemoryCache
+    }
+}
+
+module.exports = { nodeClass: LocalMemoryCache }
@@ -1,6 +1,8 @@
 import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
 import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../src/utils'
 import { OpenAI, OpenAIInput } from 'langchain/llms/openai'
+import { BaseLLMParams } from 'langchain/dist/llms/base'
+import { BaseCache } from 'langchain/schema'

 class OpenAI_LLMs implements INode {
    label: string
@@ -17,7 +19,7 @@ class OpenAI_LLMs implements INode {
    constructor() {
        this.label = 'OpenAI'
        this.name = 'openAI'
-        this.version = 2.0
+        this.version = 3.0
        this.type = 'OpenAI'
        this.icon = 'openai.png'
        this.category = 'LLMs'
@@ -30,6 +32,12 @@ class OpenAI_LLMs implements INode {
            credentialNames: ['openAIApi']
        }
        this.inputs = [
+            {
+                label: 'Cache',
+                name: 'llmCache',
+                type: 'LLMCache',
+                optional: true
+            },
            {
                label: 'Model Name',
                name: 'modelName',
@@ -149,7 +157,9 @@ class OpenAI_LLMs implements INode {
        const credentialData = await getCredentialData(nodeData.credential ?? '', options)
        const openAIApiKey = getCredentialParam('openAIApiKey', credentialData, nodeData)

-        const obj: Partial<OpenAIInput> & { openAIApiKey?: string } = {
+        const llmCache = nodeData.inputs?.llmCache as BaseCache
+
+        const obj: Partial<OpenAIInput> & BaseLLMParams & { openAIApiKey?: string } = {
            temperature: parseFloat(temperature),
            modelName,
            openAIApiKey,
@@ -164,8 +174,9 @@ class OpenAI_LLMs implements INode {
        if (batchSize) obj.batchSize = parseInt(batchSize, 10)
        if (bestOf) obj.bestOf = parseInt(bestOf, 10)

-        let parsedBaseOptions: any | undefined = undefined
+        if (llmCache) obj.cache = llmCache

+        let parsedBaseOptions: any | undefined = undefined
        if (baseOptions) {
            try {
                parsedBaseOptions = typeof baseOptions === 'object' ? baseOptions : JSON.parse(baseOptions)
@@ -42,7 +42,7 @@
        "google-auth-library": "^9.0.0",
        "graphql": "^16.6.0",
        "html-to-text": "^9.0.5",
-        "langchain": "^0.0.152",
+        "langchain": "^0.0.154",
        "langfuse-langchain": "^1.0.14-alpha.0",
        "langsmith": "^0.0.32",
        "linkifyjs": "^4.1.1",
@@ -1,6 +1,7 @@
 /**
 * Types
 */
+import { BaseCache } from 'langchain/schema'

 export type NodeParamsType =
    | 'asyncOptions'
@@ -176,3 +177,9 @@ export class VectorStoreRetriever {
        this.vectorStore = fields.vectorStore
    }
 }
+
+export interface LLMCacheBase {
+    name: string
+    description: string
+    baseCache: BaseCache
+}
@@ -151,6 +151,7 @@ export class CustomChainHandler extends BaseCallbackHandler {
    socketIOClientId = ''
    skipK = 0 // Skip streaming for first K numbers of handleLLMStart
    returnSourceDocuments = false
+    cachedResponse = true

    constructor(socketIO: Server, socketIOClientId: string, skipK?: number, returnSourceDocuments?: boolean) {
        super()
@@ -161,6 +162,7 @@ export class CustomChainHandler extends BaseCallbackHandler {
    }

    handleLLMStart() {
+        this.cachedResponse = false
        if (this.skipK > 0) this.skipK -= 1
    }

@@ -175,13 +177,31 @@ export class CustomChainHandler extends BaseCallbackHandler {
    }

    handleLLMEnd() {
-        this.socketIO.to(this.socketIOClientId).emit('end')
+        /* send the end event from handleChainEnd */
+        // this.socketIO.to(this.socketIOClientId).emit('end')
    }

    handleChainEnd(outputs: ChainValues): void | Promise<void> {
        if (this.returnSourceDocuments) {
            this.socketIO.to(this.socketIOClientId).emit('sourceDocuments', outputs?.sourceDocuments)
        }
+        /*
+            Langchain does not call handleLLMStart, handleLLMEnd, handleLLMNewToken when the chain is cached.
+            Callback Order is "Chain Start -> LLM Start --> LLM Token --> LLM End -> Chain End" for normal responses.
+            Callback Order is "Chain Start -> Chain End" for cached responses.
+         */
+        if (this.cachedResponse) {
+            const cachedValue = outputs.text as string
+            //split at whitespace, and keep the whitespace. This is to preserve the original formatting.
+            const result = cachedValue.split(/(\s+)/)
+            result.forEach((token: string, index: number) => {
+                if (index === 0) {
+                    this.socketIO.to(this.socketIOClientId).emit('start', token)
+                }
+                this.socketIO.to(this.socketIOClientId).emit('token', token)
+            })
+        }
+        this.socketIO.to(this.socketIOClientId).emit('end')
    }
 }