From d588ac0480846271e00324caf413cd408eb5d19e Mon Sep 17 00:00:00 2001 From: vinodkiran Date: Fri, 29 Sep 2023 07:54:16 +0530 Subject: [PATCH] Adding support for LLM Caching. --- .../LocalMemoryCache/LocalMemoryCache.ts | 44 ++++++++++++++++++ .../llmcache/LocalMemoryCache/memorycache.png | Bin 0 -> 2717 bytes .../components/nodes/llms/OpenAI/OpenAI.ts | 17 +++++-- packages/components/package.json | 2 +- packages/components/src/Interface.ts | 7 +++ packages/components/src/handler.ts | 22 ++++++++- 6 files changed, 87 insertions(+), 5 deletions(-) create mode 100644 packages/components/nodes/llmcache/LocalMemoryCache/LocalMemoryCache.ts create mode 100644 packages/components/nodes/llmcache/LocalMemoryCache/memorycache.png diff --git a/packages/components/nodes/llmcache/LocalMemoryCache/LocalMemoryCache.ts b/packages/components/nodes/llmcache/LocalMemoryCache/LocalMemoryCache.ts new file mode 100644 index 00000000..73f4415e --- /dev/null +++ b/packages/components/nodes/llmcache/LocalMemoryCache/LocalMemoryCache.ts @@ -0,0 +1,44 @@ +import { INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface' +import { InMemoryCache } from 'langchain/cache' +import { getBaseClasses } from '../../../src' + +class LocalMemoryCache implements INode { + label: string + name: string + version: number + description: string + type: string + icon: string + category: string + baseClasses: string[] + inputs: INodeParams[] + outputs: INodeOutputsValue[] + inMemoryCache: any + + constructor() { + this.label = 'Local (Builtin) Cache' + this.name = 'localCache' + this.version = 1.0 + this.type = 'LLMCache' + this.icon = 'memorycache.png' + this.category = 'LLM Cache' + this.baseClasses = [this.type, 'LLMCacheBase'] + this.inputs = [] + this.outputs = [ + { + label: 'LLM Cache', + name: 'cache', + baseClasses: [this.type, ...getBaseClasses(InMemoryCache)] + } + ] + } + + async init(nodeData: INodeData): Promise { + if (!this.inMemoryCache) { + this.inMemoryCache = InMemoryCache.global() + } + return this.inMemoryCache + } +} + +module.exports = { nodeClass: LocalMemoryCache } diff --git a/packages/components/nodes/llmcache/LocalMemoryCache/memorycache.png b/packages/components/nodes/llmcache/LocalMemoryCache/memorycache.png new file mode 100644 index 0000000000000000000000000000000000000000..aaeecd6fb100be3a10b3d1faca046f9fa409f018 GIT binary patch literal 2717 zcmV;O3S#w%P)Px#1ZP1_K>z@;j|==^1poj532;bRa{vG=N&o;XN&$CzbWH#N3NlGVK~!i%-C283 zROc1{vCA&(gIyj10)qh}C=crxZQ5yO635s{6UQ-WYc%!I^ba*Q6JNDcCz}4D@m1TI zDEO$24-_rdM;v3EJghMqry4;Ul?7k;Ho7411s0a2=bU@*x*~*YY%KOSbGUoI``zz5 z_q@+XNIL;4s%vTxYBZ{MJrB%kXlN*`R;%0#^WWUOiTMi_;ABw|!otI8mOSfmVCD^> zM$CKX9Sk2nTw#9c(xsR)XATl$tbE&FA1gRoC+yb-NOwtd7(PR=8ndoY!dz0A|AS0PpIBCcn zri@!?F;-76UK}kgK0Bo z!qeEOjy^b8>rSTwKOH&@&CSj7K}MrNVV<~_32pv1?B2BtrLGd3JA01q{n)fQAJI|K zICku)GL=wUU5&gIEBG~(Uo3}-?2r*`9=slJQgplsx$iAUTU(pVe^Br%Y%Xo>ZNhl4 z`HIcYeS02Ou6Pgeaq(y*X|;G;#8fP#s)mLJ`3N=v4Qx)H`g*j}LTny<-zj6XPfJUS zXy9RCpuKE%yD)$2)-8N}=Cnwwea7?|y!B&6B_*zQk$h=cDO|2na#oY_s3(IY-{|pR z#E21!*;G-qsjaOg+f`klhl9gqT52i@nlSCHw?q}>cI{-Q4@4AeJ01VNi1}o4m6Y(_ zk1rl;<^!Q}Ro)6TH#JMF=9o+>>K1;|U{qw3WIcZGQGoZkefu^}m7Gx|&#|+H;`a8P zJJ9TFMQghsEjq|Y3ypf^Wq2qTJW6PSQY1fj9{#jwAzt{|ix@a)5Sl2Jv*eXYA@Xrl zUdam#9_KO6dHz%pnR0XJJGZ(A#a#2mv*8`_b$#jf}hzKd>9uljYbzQ_TlKU?U0t2z z429}YM^~oWI=u~bK2~<{AqPC_MUawNR@>;x_(O?vZci0_>h;pJ& zaxz}|#m_Nn)XSu)-V&`|{pDz7al-Lq$KY@})w_GqHS@8=d{9&o(B#_sIxPFkpVa~2 z)iGl+W%6X~-mwb}-X=Jxld_ZJvUUvABv2e8=9Rg8k=By0q(!!ME|aUPs?kcBB0cR% zEL^e}qetImKBr$M>Kp3CpE@F#`ED~TZ)>7XD=T|IfUaD=gn}(wgz;EvRH9>Iq%D*V zhoo#UDB)Ei5)s=o6Mr1rxShuFS&AuvuvM{jA?niPw&C<0fEhVWBj~Jm=#13)Dioq(N@? zw~NDagU-_C^Pr6W{gIxYE)L1Ke(f5LQhS-dF(2#Juahx`Dbyt9)*W;)&kM0BXyB-{ zV96qE+K`W!=okzbI1nkRDa2F^o}eo`VWcw>5e^4@lmj?KdKx`A{q-q)`pGA_b@Lku z!&k3f!S(Amkd%}t#~BZbZXK4@*XU`)$NN81M}XawsB$T^hFQXqkd%b75*MuD7SaYs z7(UNwho4l>j>^5KX7>!f=d&JmY%McKMMvW3SI6ZyV?1f}%P$Vzqiqe$k8X$U46K8C z3U8C9yn&+PVr-;1%^5T@(kT@tOP!sV%_EAgoP>CcpK%C`a70KB363T#GL7^?Kl7>t znQCflSOhqKwhV4}1&$m(3@b4c0MAnq_;q*aJ~Y&O&_d>=kzVL#o-0j_CjWM-oL+6) zwhd{?eXw!;1~}NhDVecanL2{UIL`B&77BwNN_rfDuHC$WpFHynob`E1e$9mwtcRJ1`ip6 zDO0E7`i+~|w534ZMT0)$(G_hNY@-?$zq<&RE|p{AZ(hgW*1s=fEX6CAFT-8orjE=@ zq#QJH0N}Gtn_8$PB_<)Q?~`O%ArgfOHgCqELx-_$!+M#|FdFr-rJH$<4jMT8ZKVvm zch4R)QmW6%%tU5(4zjbesJcu>X8%mYQ=ukiPJevfR-B>uUsiS&A5mg+opGV0)I}w8 z4g6#xadELQ2NiEYF7vI>@32APfTM(+B$<$yh}S1h5L4N+`vbf?e?ERaegd8vGMFTs zh(xA@MD$5{0x!MvBBs6hCf2U`D~gMb6VpCKI-KGw;TDT%B2ZH3gf8Ygv%sdtCXAmj z5v$j(Mc$g#l1&q+mQP7ZL3(N$GN|ARroJhuNTzBshC(MtCa!b3X7s-Duv6%ld=JF` zKl5Cnvzj$(@YKt(_rZe)q)J>v%}FF>PLAWj@jRHiivL`ZvdQ`Q9hHpgd4G4%#XMCh zxNzY@fZia-OYheYpa1g!)#bhM*~g#4VzUZ!n$u}DyhhqM{{w^1>p8S?jlv%`a^y&< z@in@7`D&+)NJn%VDtSYW(|?>dR~-Qg3jYpQS*e^PaQjzWTm)B1sZ@zRbxv1^u-K5% zF9SIPaxm}*1Mvbyp{%Sd%$xhRIs;U3mLZCo`R>qt8>oVtW(^J0OguMqD3&f;MvY0P zjIlJi7vhHY=;|b{$E#FSs4yf zG3WKQ!eTNbp1La5t{yx*^D0C-v>0oJCC?>+r={!O(?lP^O$R3>{$Cc`wdUk{aUb(@ z{xAot@^ZU8`p^URjo@on(ANL1`|x|$sC_IU?FsYRf4~!Z#XXSZA2nDotYJPM{pv)q z!aR5I3l}ZIN$QHs<}fjZo(Mlh0IM||vq{j;4;!X1uRfE*QN51yzW|>|goT?G0R#9C XQt;1#;KI))00000NkvXXu0mjfl~Wzs literal 0 HcmV?d00001 diff --git a/packages/components/nodes/llms/OpenAI/OpenAI.ts b/packages/components/nodes/llms/OpenAI/OpenAI.ts index 2960ad2a..9fa61653 100644 --- a/packages/components/nodes/llms/OpenAI/OpenAI.ts +++ b/packages/components/nodes/llms/OpenAI/OpenAI.ts @@ -1,6 +1,8 @@ import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface' import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../src/utils' import { OpenAI, OpenAIInput } from 'langchain/llms/openai' +import { BaseLLMParams } from 'langchain/dist/llms/base' +import { BaseCache } from 'langchain/schema' class OpenAI_LLMs implements INode { label: string @@ -17,7 +19,7 @@ class OpenAI_LLMs implements INode { constructor() { this.label = 'OpenAI' this.name = 'openAI' - this.version = 2.0 + this.version = 3.0 this.type = 'OpenAI' this.icon = 'openai.png' this.category = 'LLMs' @@ -30,6 +32,12 @@ class OpenAI_LLMs implements INode { credentialNames: ['openAIApi'] } this.inputs = [ + { + label: 'Cache', + name: 'llmCache', + type: 'LLMCache', + optional: true + }, { label: 'Model Name', name: 'modelName', @@ -149,7 +157,9 @@ class OpenAI_LLMs implements INode { const credentialData = await getCredentialData(nodeData.credential ?? '', options) const openAIApiKey = getCredentialParam('openAIApiKey', credentialData, nodeData) - const obj: Partial & { openAIApiKey?: string } = { + const llmCache = nodeData.inputs?.llmCache as BaseCache + + const obj: Partial & BaseLLMParams & { openAIApiKey?: string } = { temperature: parseFloat(temperature), modelName, openAIApiKey, @@ -164,8 +174,9 @@ class OpenAI_LLMs implements INode { if (batchSize) obj.batchSize = parseInt(batchSize, 10) if (bestOf) obj.bestOf = parseInt(bestOf, 10) - let parsedBaseOptions: any | undefined = undefined + if (llmCache) obj.cache = llmCache + let parsedBaseOptions: any | undefined = undefined if (baseOptions) { try { parsedBaseOptions = typeof baseOptions === 'object' ? baseOptions : JSON.parse(baseOptions) diff --git a/packages/components/package.json b/packages/components/package.json index 93609106..f8498e31 100644 --- a/packages/components/package.json +++ b/packages/components/package.json @@ -42,7 +42,7 @@ "google-auth-library": "^9.0.0", "graphql": "^16.6.0", "html-to-text": "^9.0.5", - "langchain": "^0.0.152", + "langchain": "^0.0.154", "langfuse-langchain": "^1.0.14-alpha.0", "langsmith": "^0.0.32", "linkifyjs": "^4.1.1", diff --git a/packages/components/src/Interface.ts b/packages/components/src/Interface.ts index e883d056..76dc7354 100644 --- a/packages/components/src/Interface.ts +++ b/packages/components/src/Interface.ts @@ -1,6 +1,7 @@ /** * Types */ +import { BaseCache } from 'langchain/schema' export type NodeParamsType = | 'asyncOptions' @@ -176,3 +177,9 @@ export class VectorStoreRetriever { this.vectorStore = fields.vectorStore } } + +export interface LLMCacheBase { + name: string + description: string + baseCache: BaseCache +} diff --git a/packages/components/src/handler.ts b/packages/components/src/handler.ts index 10f9a214..a102b473 100644 --- a/packages/components/src/handler.ts +++ b/packages/components/src/handler.ts @@ -151,6 +151,7 @@ export class CustomChainHandler extends BaseCallbackHandler { socketIOClientId = '' skipK = 0 // Skip streaming for first K numbers of handleLLMStart returnSourceDocuments = false + cachedResponse = true constructor(socketIO: Server, socketIOClientId: string, skipK?: number, returnSourceDocuments?: boolean) { super() @@ -161,6 +162,7 @@ export class CustomChainHandler extends BaseCallbackHandler { } handleLLMStart() { + this.cachedResponse = false if (this.skipK > 0) this.skipK -= 1 } @@ -175,13 +177,31 @@ export class CustomChainHandler extends BaseCallbackHandler { } handleLLMEnd() { - this.socketIO.to(this.socketIOClientId).emit('end') + /* send the end event from handleChainEnd */ + // this.socketIO.to(this.socketIOClientId).emit('end') } handleChainEnd(outputs: ChainValues): void | Promise { if (this.returnSourceDocuments) { this.socketIO.to(this.socketIOClientId).emit('sourceDocuments', outputs?.sourceDocuments) } + /* + Langchain does not call handleLLMStart, handleLLMEnd, handleLLMNewToken when the chain is cached. + Callback Order is "Chain Start -> LLM Start --> LLM Token --> LLM End -> Chain End" for normal responses. + Callback Order is "Chain Start -> Chain End" for cached responses. + */ + if (this.cachedResponse) { + const cachedValue = outputs.text as string + //split at whitespace, and keep the whitespace. This is to preserve the original formatting. + const result = cachedValue.split(/(\s+)/) + result.forEach((token: string, index: number) => { + if (index === 0) { + this.socketIO.to(this.socketIOClientId).emit('start', token) + } + this.socketIO.to(this.socketIOClientId).emit('token', token) + }) + } + this.socketIO.to(this.socketIOClientId).emit('end') } }