Chore/Update langchain version, openai, mistral, vertex, anthropic (#2180)

* update langchain version, openai, mistral, vertex, anthropic, introduced toolagent

* upgrade @google/generative-ai 0.7.0, replicate and faiss-node

* update cohere ver

* adding chatCohere to streaming

* update gemini to have image upload

* update google genai, remove aiplugin
This commit is contained in:
Henry Heng
2024-04-20 02:20:30 +01:00
committed by GitHub
parent f5be889ea8
commit 95beaba9d9
27 changed files with 1541 additions and 1008 deletions
@@ -1,10 +1,10 @@
import { HarmBlockThreshold, HarmCategory } from '@google/generative-ai'
import type { SafetySetting } from '@google/generative-ai'
import { ChatGoogleGenerativeAI, GoogleGenerativeAIChatInput } from '@langchain/google-genai'
import { BaseCache } from '@langchain/core/caches'
import { ICommonObject, INode, INodeData, INodeOptionsValue, INodeParams } from '../../../src/Interface'
import { ICommonObject, IMultiModalOption, INode, INodeData, INodeOptionsValue, INodeParams } from '../../../src/Interface'
import { convertMultiOptionsToStringArray, getBaseClasses, getCredentialData, getCredentialParam } from '../../../src/utils'
import { getModels, MODEL_TYPE } from '../../../src/modelLoader'
import { ChatGoogleGenerativeAI, GoogleGenerativeAIChatInput } from './FlowiseChatGoogleGenerativeAI'
class GoogleGenerativeAI_ChatModels implements INode {
label: string
@@ -139,6 +139,15 @@ class GoogleGenerativeAI_ChatModels implements INode {
],
optional: true,
additionalParams: true
},
{
label: 'Allow Image Uploads',
name: 'allowImageUploads',
type: 'boolean',
description:
'Automatically uses vision model when image is being uploaded from chat. Only works with LLMChain, Conversation Chain, ReAct Agent, and Conversational Agent',
default: false,
optional: true
}
]
}
@@ -162,20 +171,21 @@ class GoogleGenerativeAI_ChatModels implements INode {
const harmCategory = nodeData.inputs?.harmCategory as string
const harmBlockThreshold = nodeData.inputs?.harmBlockThreshold as string
const cache = nodeData.inputs?.cache as BaseCache
const streaming = nodeData.inputs?.streaming as boolean
const allowImageUploads = nodeData.inputs?.allowImageUploads as boolean
const obj: Partial<GoogleGenerativeAIChatInput> = {
apiKey: apiKey,
modelName: modelName,
maxOutputTokens: 2048
streaming: streaming ?? true
}
if (maxOutputTokens) obj.maxOutputTokens = parseInt(maxOutputTokens, 10)
const model = new ChatGoogleGenerativeAI(obj)
if (topP) model.topP = parseFloat(topP)
if (topK) model.topK = parseFloat(topK)
if (cache) model.cache = cache
if (temperature) model.temperature = parseFloat(temperature)
if (topP) obj.topP = parseFloat(topP)
if (topK) obj.topK = parseFloat(topK)
if (cache) obj.cache = cache
if (temperature) obj.temperature = parseFloat(temperature)
// Safety Settings
let harmCategories: string[] = convertMultiOptionsToStringArray(harmCategory)
@@ -188,7 +198,16 @@ class GoogleGenerativeAI_ChatModels implements INode {
threshold: harmBlockThresholds[index] as HarmBlockThreshold
}
})
if (safetySettings.length > 0) model.safetySettings = safetySettings
if (safetySettings.length > 0) obj.safetySettings = safetySettings
const multiModalOption: IMultiModalOption = {
image: {
allowImageUploads: allowImageUploads ?? false
}
}
const model = new ChatGoogleGenerativeAI(nodeData.id, obj)
model.setMultiModalOption(multiModalOption)
return model
}
@@ -0,0 +1,550 @@
import { BaseMessage, AIMessage, AIMessageChunk, isBaseMessage, ChatMessage, MessageContent } from '@langchain/core/messages'
import { CallbackManagerForLLMRun } from '@langchain/core/callbacks/manager'
import { BaseChatModel, type BaseChatModelParams } from '@langchain/core/language_models/chat_models'
import { ChatGeneration, ChatGenerationChunk, ChatResult } from '@langchain/core/outputs'
import { ToolCall } from '@langchain/core/messages/tool'
import { NewTokenIndices } from '@langchain/core/callbacks/base'
import {
EnhancedGenerateContentResponse,
Content,
Part,
Tool,
GenerativeModel,
GoogleGenerativeAI as GenerativeAI
} from '@google/generative-ai'
import type { SafetySetting } from '@google/generative-ai'
import { ICommonObject, IMultiModalOption, IVisionChatModal } from '../../../src'
import { StructuredToolInterface } from '@langchain/core/tools'
import { isStructuredTool } from '@langchain/core/utils/function_calling'
import { zodToJsonSchema } from 'zod-to-json-schema'
interface TokenUsage {
completionTokens?: number
promptTokens?: number
totalTokens?: number
}
export interface GoogleGenerativeAIChatInput extends BaseChatModelParams {
modelName?: string
model?: string
temperature?: number
maxOutputTokens?: number
topP?: number
topK?: number
stopSequences?: string[]
safetySettings?: SafetySetting[]
apiKey?: string
streaming?: boolean
}
class LangchainChatGoogleGenerativeAI extends BaseChatModel implements GoogleGenerativeAIChatInput {
modelName = 'gemini-pro'
temperature?: number
maxOutputTokens?: number
topP?: number
topK?: number
stopSequences: string[] = []
safetySettings?: SafetySetting[]
apiKey?: string
streaming = false
private client: GenerativeModel
get _isMultimodalModel() {
return this.modelName.includes('vision') || this.modelName.startsWith('gemini-1.5')
}
constructor(fields?: GoogleGenerativeAIChatInput) {
super(fields ?? {})
this.modelName = fields?.model?.replace(/^models\//, '') ?? fields?.modelName?.replace(/^models\//, '') ?? 'gemini-pro'
this.maxOutputTokens = fields?.maxOutputTokens ?? this.maxOutputTokens
if (this.maxOutputTokens && this.maxOutputTokens < 0) {
throw new Error('`maxOutputTokens` must be a positive integer')
}
this.temperature = fields?.temperature ?? this.temperature
if (this.temperature && (this.temperature < 0 || this.temperature > 1)) {
throw new Error('`temperature` must be in the range of [0.0,1.0]')
}
this.topP = fields?.topP ?? this.topP
if (this.topP && this.topP < 0) {
throw new Error('`topP` must be a positive integer')
}
if (this.topP && this.topP > 1) {
throw new Error('`topP` must be below 1.')
}
this.topK = fields?.topK ?? this.topK
if (this.topK && this.topK < 0) {
throw new Error('`topK` must be a positive integer')
}
this.stopSequences = fields?.stopSequences ?? this.stopSequences
this.apiKey = fields?.apiKey ?? process.env['GOOGLE_API_KEY']
if (!this.apiKey) {
throw new Error(
'Please set an API key for Google GenerativeAI ' +
'in the environment variable GOOGLE_API_KEY ' +
'or in the `apiKey` field of the ' +
'ChatGoogleGenerativeAI constructor'
)
}
this.safetySettings = fields?.safetySettings ?? this.safetySettings
if (this.safetySettings && this.safetySettings.length > 0) {
const safetySettingsSet = new Set(this.safetySettings.map((s) => s.category))
if (safetySettingsSet.size !== this.safetySettings.length) {
throw new Error('The categories in `safetySettings` array must be unique')
}
}
this.streaming = fields?.streaming ?? this.streaming
this.getClient()
}
getClient(tools?: Tool[]) {
this.client = new GenerativeAI(this.apiKey ?? '').getGenerativeModel({
model: this.modelName,
tools,
safetySettings: this.safetySettings as SafetySetting[],
generationConfig: {
candidateCount: 1,
stopSequences: this.stopSequences,
maxOutputTokens: this.maxOutputTokens,
temperature: this.temperature,
topP: this.topP,
topK: this.topK
}
})
}
_combineLLMOutput() {
return []
}
_llmType() {
return 'googlegenerativeai'
}
override bindTools(tools: (StructuredToolInterface | Record<string, unknown>)[], kwargs?: Partial<ICommonObject>) {
//@ts-ignore
return this.bind({ tools: convertToGeminiTools(tools), ...kwargs })
}
convertFunctionResponse(prompts: Content[]) {
for (let i = 0; i < prompts.length; i += 1) {
if (prompts[i].role === 'function') {
if (prompts[i - 1].role === 'model') {
const toolName = prompts[i - 1].parts[0].functionCall?.name ?? ''
prompts[i].parts = [
{
functionResponse: {
name: toolName,
response: {
name: toolName,
content: prompts[i].parts[0].text
}
}
}
]
}
}
}
}
async _generateNonStreaming(
prompt: Content[],
options: this['ParsedCallOptions'],
_runManager?: CallbackManagerForLLMRun
): Promise<ChatResult> {
//@ts-ignore
const tools = options.tools ?? []
this.convertFunctionResponse(prompt)
if (tools.length > 0) {
this.getClient(tools)
} else {
this.getClient()
}
const res = await this.caller.callWithOptions({ signal: options?.signal }, async () => {
let output
try {
output = await this.client.generateContent({
contents: prompt
})
} catch (e: any) {
if (e.message?.includes('400 Bad Request')) {
e.status = 400
}
throw e
}
return output
})
const generationResult = mapGenerateContentResultToChatResult(res.response)
await _runManager?.handleLLMNewToken(generationResult.generations?.length ? generationResult.generations[0].text : '')
return generationResult
}
async _generate(
messages: BaseMessage[],
options: this['ParsedCallOptions'],
runManager?: CallbackManagerForLLMRun
): Promise<ChatResult> {
const prompt = convertBaseMessagesToContent(messages, this._isMultimodalModel)
// Handle streaming
if (this.streaming) {
const tokenUsage: TokenUsage = {}
const stream = this._streamResponseChunks(messages, options, runManager)
const finalChunks: Record<number, ChatGenerationChunk> = {}
for await (const chunk of stream) {
const index = (chunk.generationInfo as NewTokenIndices)?.completion ?? 0
if (finalChunks[index] === undefined) {
finalChunks[index] = chunk
} else {
finalChunks[index] = finalChunks[index].concat(chunk)
}
}
const generations = Object.entries(finalChunks)
.sort(([aKey], [bKey]) => parseInt(aKey, 10) - parseInt(bKey, 10))
.map(([_, value]) => value)
return { generations, llmOutput: { estimatedTokenUsage: tokenUsage } }
}
return this._generateNonStreaming(prompt, options, runManager)
}
async *_streamResponseChunks(
messages: BaseMessage[],
options: this['ParsedCallOptions'],
runManager?: CallbackManagerForLLMRun
): AsyncGenerator<ChatGenerationChunk> {
const prompt = convertBaseMessagesToContent(messages, this._isMultimodalModel)
//@ts-ignore
if (options.tools !== undefined && options.tools.length > 0) {
const result = await this._generateNonStreaming(prompt, options, runManager)
const generationMessage = result.generations[0].message as AIMessage
if (generationMessage === undefined) {
throw new Error('Could not parse Groq output.')
}
const toolCallChunks = generationMessage.tool_calls?.map((toolCall, i) => ({
name: toolCall.name,
args: JSON.stringify(toolCall.args),
id: toolCall.id,
index: i
}))
yield new ChatGenerationChunk({
message: new AIMessageChunk({
content: generationMessage.content,
additional_kwargs: generationMessage.additional_kwargs,
tool_call_chunks: toolCallChunks
}),
text: generationMessage.tool_calls?.length ? '' : (generationMessage.content as string)
})
} else {
const stream = await this.caller.callWithOptions({ signal: options?.signal }, async () => {
this.getClient()
const { stream } = await this.client.generateContentStream({
contents: prompt
})
return stream
})
for await (const response of stream) {
const chunk = convertResponseContentToChatGenerationChunk(response)
if (!chunk) {
continue
}
yield chunk
await runManager?.handleLLMNewToken(chunk.text ?? '')
}
}
}
}
export class ChatGoogleGenerativeAI extends LangchainChatGoogleGenerativeAI implements IVisionChatModal {
configuredModel: string
configuredMaxToken?: number
multiModalOption: IMultiModalOption
id: string
constructor(id: string, fields?: GoogleGenerativeAIChatInput) {
super(fields)
this.id = id
this.configuredModel = fields?.modelName ?? ''
this.configuredMaxToken = fields?.maxOutputTokens
}
revertToOriginalModel(): void {
super.modelName = this.configuredModel
super.maxOutputTokens = this.configuredMaxToken
}
setMultiModalOption(multiModalOption: IMultiModalOption): void {
this.multiModalOption = multiModalOption
}
setVisionModel(): void {
if (this.modelName !== 'gemini-pro-vision' && this.modelName !== 'gemini-1.5-pro-latest') {
super.modelName = 'gemini-1.5-pro-latest'
super.maxOutputTokens = this.configuredMaxToken ? this.configuredMaxToken : 8192
}
}
}
function getMessageAuthor(message: BaseMessage) {
const type = message._getType()
if (ChatMessage.isInstance(message)) {
return message.role
}
return message.name ?? type
}
function convertAuthorToRole(author: string) {
switch (author) {
/**
* Note: Gemini currently is not supporting system messages
* we will convert them to human messages and merge with following
* */
case 'ai':
case 'model': // getMessageAuthor returns message.name. code ex.: return message.name ?? type;
return 'model'
case 'system':
case 'human':
return 'user'
case 'function':
case 'tool':
return 'function'
default:
throw new Error(`Unknown / unsupported author: ${author}`)
}
}
function convertMessageContentToParts(content: MessageContent, isMultimodalModel: boolean): Part[] {
if (typeof content === 'string') {
return [{ text: content }]
}
return content.map((c) => {
if (c.type === 'text') {
return {
text: c.text
}
}
if (c.type === 'tool_use') {
return {
functionCall: c.functionCall
}
}
/*if (c.type === "tool_use" || c.type === "tool_result") {
// TODO: Fix when SDK types are fixed
return {
...contentPart,
// eslint-disable-next-line @typescript-eslint/no-explicit-any
} as any;
}*/
if (c.type === 'image_url') {
if (!isMultimodalModel) {
throw new Error(`This model does not support images`)
}
let source
if (typeof c.image_url === 'string') {
source = c.image_url
} else if (typeof c.image_url === 'object' && 'url' in c.image_url) {
source = c.image_url.url
} else {
throw new Error('Please provide image as base64 encoded data URL')
}
const [dm, data] = source.split(',')
if (!dm.startsWith('data:')) {
throw new Error('Please provide image as base64 encoded data URL')
}
const [mimeType, encoding] = dm.replace(/^data:/, '').split(';')
if (encoding !== 'base64') {
throw new Error('Please provide image as base64 encoded data URL')
}
return {
inlineData: {
data,
mimeType
}
}
}
throw new Error(`Unknown content type ${(c as { type: string }).type}`)
})
}
function convertBaseMessagesToContent(messages: BaseMessage[], isMultimodalModel: boolean) {
return messages.reduce<{
content: Content[]
mergeWithPreviousContent: boolean
}>(
(acc, message, index) => {
if (!isBaseMessage(message)) {
throw new Error('Unsupported message input')
}
const author = getMessageAuthor(message)
if (author === 'system' && index !== 0) {
throw new Error('System message should be the first one')
}
const role = convertAuthorToRole(author)
const prevContent = acc.content[acc.content.length]
if (!acc.mergeWithPreviousContent && prevContent && prevContent.role === role) {
throw new Error('Google Generative AI requires alternate messages between authors')
}
const parts = convertMessageContentToParts(message.content, isMultimodalModel)
if (acc.mergeWithPreviousContent) {
const prevContent = acc.content[acc.content.length - 1]
if (!prevContent) {
throw new Error('There was a problem parsing your system message. Please try a prompt without one.')
}
prevContent.parts.push(...parts)
return {
mergeWithPreviousContent: false,
content: acc.content
}
}
const content: Content = {
role,
parts
}
return {
mergeWithPreviousContent: author === 'system',
content: [...acc.content, content]
}
},
{ content: [], mergeWithPreviousContent: false }
).content
}
function mapGenerateContentResultToChatResult(response: EnhancedGenerateContentResponse): ChatResult {
// if rejected or error, return empty generations with reason in filters
if (!response.candidates || response.candidates.length === 0 || !response.candidates[0]) {
return {
generations: [],
llmOutput: {
filters: response?.promptFeedback
}
}
}
const [candidate] = response.candidates
const { content, ...generationInfo } = candidate
const text = content.parts.map(({ text }) => text).join('')
if (content.parts.some((part) => part.functionCall)) {
const toolCalls: ToolCall[] = []
for (const fcPart of content.parts) {
const fc = fcPart.functionCall
if (fc) {
const { name, args } = fc
toolCalls.push({ name, args })
}
}
const functionCalls = toolCalls.map((tool) => {
return { functionCall: { name: tool.name, args: tool.args }, type: 'tool_use' }
})
const generation: ChatGeneration = {
text,
message: new AIMessage({
content: functionCalls,
name: !content ? undefined : content.role,
additional_kwargs: generationInfo,
tool_calls: toolCalls
}),
generationInfo
}
return {
generations: [generation]
}
} else {
const generation: ChatGeneration = {
text,
message: new AIMessage({
content: text,
name: !content ? undefined : content.role,
additional_kwargs: generationInfo
}),
generationInfo
}
return {
generations: [generation]
}
}
}
function convertResponseContentToChatGenerationChunk(response: EnhancedGenerateContentResponse): ChatGenerationChunk | null {
if (!response.candidates || response.candidates.length === 0) {
return null
}
const [candidate] = response.candidates
const { content, ...generationInfo } = candidate
const text = content?.parts[0]?.text ?? ''
return new ChatGenerationChunk({
text,
message: new AIMessageChunk({
content: text,
name: !content ? undefined : content.role,
// Each chunk can have unique "generationInfo", and merging strategy is unclear,
// so leave blank for now.
additional_kwargs: {}
}),
generationInfo
})
}
function zodToGeminiParameters(zodObj: any) {
// Gemini doesn't accept either the $schema or additionalProperties
// attributes, so we need to explicitly remove them.
const jsonSchema: any = zodToJsonSchema(zodObj)
// eslint-disable-next-line unused-imports/no-unused-vars
const { $schema, additionalProperties, ...rest } = jsonSchema
return rest
}
function convertToGeminiTools(structuredTools: (StructuredToolInterface | Record<string, unknown>)[]) {
return [
{
functionDeclarations: structuredTools.map((structuredTool) => {
if (isStructuredTool(structuredTool)) {
const jsonSchema = zodToGeminiParameters(structuredTool.schema)
return {
name: structuredTool.name,
description: structuredTool.description,
parameters: jsonSchema
}
}
return structuredTool
})
}
]
}
@@ -1,6 +1,5 @@
import { GoogleAuthOptions } from 'google-auth-library'
import { BaseCache } from '@langchain/core/caches'
import { ChatGoogleVertexAI, GoogleVertexAIChatInput } from '@langchain/community/chat_models/googlevertexai'
import { ChatVertexAI, ChatVertexAIInput } from '@langchain/google-vertexai'
import { ICommonObject, INode, INodeData, INodeOptionsValue, INodeParams } from '../../../src/Interface'
import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../src/utils'
import { getModels, MODEL_TYPE } from '../../../src/modelLoader'
@@ -20,12 +19,12 @@ class GoogleVertexAI_ChatModels implements INode {
constructor() {
this.label = 'ChatGoogleVertexAI'
this.name = 'chatGoogleVertexAI'
this.version = 3.0
this.version = 4.0
this.type = 'ChatGoogleVertexAI'
this.icon = 'GoogleVertex.svg'
this.category = 'Chat Models'
this.description = 'Wrapper around VertexAI large language models that use the Chat endpoint'
this.baseClasses = [this.type, ...getBaseClasses(ChatGoogleVertexAI)]
this.baseClasses = [this.type, ...getBaseClasses(ChatVertexAI)]
this.credential = {
label: 'Connect Credential',
name: 'credential',
@@ -72,6 +71,15 @@ class GoogleVertexAI_ChatModels implements INode {
step: 0.1,
optional: true,
additionalParams: true
},
{
label: 'Top Next Highest Probability Tokens',
name: 'topK',
type: 'number',
description: `Decode using top-k sampling: consider the set of top_k most probable tokens. Must be positive`,
step: 1,
optional: true,
additionalParams: true
}
]
}
@@ -89,7 +97,7 @@ class GoogleVertexAI_ChatModels implements INode {
const googleApplicationCredential = getCredentialParam('googleApplicationCredential', credentialData, nodeData)
const projectID = getCredentialParam('projectID', credentialData, nodeData)
const authOptions: GoogleAuthOptions = {}
const authOptions: ICommonObject = {}
if (Object.keys(credentialData).length !== 0) {
if (!googleApplicationCredentialFilePath && !googleApplicationCredential)
throw new Error('Please specify your Google Application Credential')
@@ -111,8 +119,9 @@ class GoogleVertexAI_ChatModels implements INode {
const maxOutputTokens = nodeData.inputs?.maxOutputTokens as string
const topP = nodeData.inputs?.topP as string
const cache = nodeData.inputs?.cache as BaseCache
const topK = nodeData.inputs?.topK as string
const obj: GoogleVertexAIChatInput<GoogleAuthOptions> = {
const obj: ChatVertexAIInput = {
temperature: parseFloat(temperature),
model: modelName
}
@@ -121,8 +130,9 @@ class GoogleVertexAI_ChatModels implements INode {
if (maxOutputTokens) obj.maxOutputTokens = parseInt(maxOutputTokens, 10)
if (topP) obj.topP = parseFloat(topP)
if (cache) obj.cache = cache
if (topK) obj.topK = parseFloat(topK)
const model = new ChatGoogleVertexAI(obj)
const model = new ChatVertexAI(obj)
return model
}
}
@@ -1,37 +1,9 @@
import { ChatCompletionResponse, ToolCalls as MistralAIToolCalls } from '@mistralai/mistralai'
import { BaseCache } from '@langchain/core/caches'
import { CallbackManagerForLLMRun } from '@langchain/core/callbacks/manager'
import { NewTokenIndices } from '@langchain/core/callbacks/base'
import { ChatGeneration, ChatGenerationChunk, ChatResult } from '@langchain/core/outputs'
import {
MessageType,
type BaseMessage,
MessageContent,
AIMessage,
HumanMessage,
HumanMessageChunk,
AIMessageChunk,
ToolMessageChunk,
ChatMessageChunk
} from '@langchain/core/messages'
import { ChatMistralAI as LangchainChatMistralAI, ChatMistralAIInput } from '@langchain/mistralai'
import { ChatMistralAI, ChatMistralAIInput } from '@langchain/mistralai'
import { ICommonObject, INode, INodeData, INodeOptionsValue, INodeParams } from '../../../src/Interface'
import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../src/utils'
import { getModels, MODEL_TYPE } from '../../../src/modelLoader'
interface TokenUsage {
completionTokens?: number
promptTokens?: number
totalTokens?: number
}
type MistralAIInputMessage = {
role: string
name?: string
content: string | string[]
tool_calls?: MistralAIToolCalls[] | any[]
}
class ChatMistral_ChatModels implements INode {
label: string
name: string
@@ -170,243 +142,4 @@ class ChatMistral_ChatModels implements INode {
}
}
class ChatMistralAI extends LangchainChatMistralAI {
async _generate(
messages: BaseMessage[],
options?: this['ParsedCallOptions'],
runManager?: CallbackManagerForLLMRun
): Promise<ChatResult> {
const tokenUsage: TokenUsage = {}
const params = this.invocationParams(options)
const mistralMessages = this.convertMessagesToMistralMessages(messages)
const input = {
...params,
messages: mistralMessages
}
// Handle streaming
if (this.streaming) {
const stream = this._streamResponseChunks(messages, options, runManager)
const finalChunks: Record<number, ChatGenerationChunk> = {}
for await (const chunk of stream) {
const index = (chunk.generationInfo as NewTokenIndices)?.completion ?? 0
if (finalChunks[index] === undefined) {
finalChunks[index] = chunk
} else {
finalChunks[index] = finalChunks[index].concat(chunk)
}
}
const generations = Object.entries(finalChunks)
.sort(([aKey], [bKey]) => parseInt(aKey, 10) - parseInt(bKey, 10))
.map(([_, value]) => value)
return { generations, llmOutput: { estimatedTokenUsage: tokenUsage } }
}
// Not streaming, so we can just call the API once.
const response = await this.completionWithRetry(input, false)
const { completion_tokens: completionTokens, prompt_tokens: promptTokens, total_tokens: totalTokens } = response?.usage ?? {}
if (completionTokens) {
tokenUsage.completionTokens = (tokenUsage.completionTokens ?? 0) + completionTokens
}
if (promptTokens) {
tokenUsage.promptTokens = (tokenUsage.promptTokens ?? 0) + promptTokens
}
if (totalTokens) {
tokenUsage.totalTokens = (tokenUsage.totalTokens ?? 0) + totalTokens
}
const generations: ChatGeneration[] = []
for (const part of response?.choices ?? []) {
if ('delta' in part) {
throw new Error('Delta not supported in non-streaming mode.')
}
if (!('message' in part)) {
throw new Error('No message found in the choice.')
}
const text = part.message?.content ?? ''
const generation: ChatGeneration = {
text,
message: this.mistralAIResponseToChatMessage(part)
}
if (part.finish_reason) {
generation.generationInfo = { finish_reason: part.finish_reason }
}
generations.push(generation)
}
return {
generations,
llmOutput: { tokenUsage }
}
}
async *_streamResponseChunks(
messages: BaseMessage[],
options?: this['ParsedCallOptions'],
runManager?: CallbackManagerForLLMRun
): AsyncGenerator<ChatGenerationChunk> {
const mistralMessages = this.convertMessagesToMistralMessages(messages)
const params = this.invocationParams(options)
const input = {
...params,
messages: mistralMessages
}
const streamIterable = await this.completionWithRetry(input, true)
for await (const data of streamIterable) {
const choice = data?.choices[0]
if (!choice || !('delta' in choice)) {
continue
}
const { delta } = choice
if (!delta) {
continue
}
const newTokenIndices = {
prompt: 0,
completion: choice.index ?? 0
}
const message = this._convertDeltaToMessageChunk(delta)
if (message === null) {
// Do not yield a chunk if the message is empty
continue
}
const generationChunk = new ChatGenerationChunk({
message,
text: delta.content ?? '',
generationInfo: newTokenIndices
})
yield generationChunk
void runManager?.handleLLMNewToken(generationChunk.text ?? '', newTokenIndices, undefined, undefined, undefined, {
chunk: generationChunk
})
}
if (options?.signal?.aborted) {
throw new Error('AbortError')
}
}
_convertDeltaToMessageChunk(delta: {
role?: string | undefined
content?: string | undefined
tool_calls?: MistralAIToolCalls[] | undefined
}) {
if (!delta.content && !delta.tool_calls) {
return null
}
// Our merge additional kwargs util function will throw unless there
// is an index key in each tool object (as seen in OpenAI's) so we
// need to insert it here.
const toolCallsWithIndex = delta.tool_calls?.length
? delta.tool_calls?.map((toolCall, index) => ({
...toolCall,
index
}))
: undefined
let role = 'assistant'
if (delta.role) {
role = delta.role
} else if (toolCallsWithIndex) {
role = 'tool'
}
const content = delta.content ?? ''
let additional_kwargs
if (toolCallsWithIndex) {
additional_kwargs = {
tool_calls: toolCallsWithIndex
}
} else {
additional_kwargs = {}
}
if (role === 'user') {
return new HumanMessageChunk({ content })
} else if (role === 'assistant') {
return new AIMessageChunk({ content, additional_kwargs })
} else if (role === 'tool') {
return new ToolMessageChunk({
content,
additional_kwargs,
tool_call_id: toolCallsWithIndex?.[0].id ?? ''
})
} else {
return new ChatMessageChunk({ content, role })
}
}
convertMessagesToMistralMessages(messages: Array<BaseMessage>): Array<MistralAIInputMessage> {
const getRole = (role: MessageType) => {
switch (role) {
case 'human':
return 'user'
case 'ai':
return 'assistant'
case 'tool':
return 'tool'
case 'function':
return 'function'
case 'system':
return 'system'
default:
throw new Error(`Unknown message type: ${role}`)
}
}
const getContent = (content: MessageContent): string => {
if (typeof content === 'string') {
return content
}
throw new Error(`ChatMistralAI does not support non text message content. Received: ${JSON.stringify(content, null, 2)}`)
}
const mistralMessages = []
for (const msg of messages) {
const msgObj: MistralAIInputMessage = {
role: getRole(msg._getType()),
content: getContent(msg.content)
}
if (getRole(msg._getType()) === 'tool') {
msgObj.role = 'assistant'
msgObj.tool_calls = msg.additional_kwargs?.tool_calls ?? []
} else if (getRole(msg._getType()) === 'function') {
msgObj.role = 'tool'
msgObj.name = msg.name
}
mistralMessages.push(msgObj)
}
return mistralMessages
}
mistralAIResponseToChatMessage(choice: ChatCompletionResponse['choices'][0]): BaseMessage {
const { message } = choice
// MistralAI SDK does not include tool_calls in the non
// streaming return type, so we need to extract it like this
// to satisfy typescript.
let toolCalls: MistralAIToolCalls[] = []
if ('tool_calls' in message) {
toolCalls = message.tool_calls as MistralAIToolCalls[]
}
switch (message.role) {
case 'assistant':
return new AIMessage({
content: message.content ?? '',
additional_kwargs: {
tool_calls: toolCalls
}
})
default:
return new HumanMessage(message.content ?? '')
}
}
}
module.exports = { nodeClass: ChatMistral_ChatModels }
@@ -33,7 +33,9 @@ export class ChatOpenAI extends LangchainChatOpenAI implements IVisionChatModal
}
setVisionModel(): void {
super.modelName = 'gpt-4-vision-preview'
super.maxTokens = this.configuredMaxToken ? this.configuredMaxToken : 1024
if (this.modelName !== 'gpt-4-turbo' && !this.modelName.includes('vision')) {
super.modelName = 'gpt-4-turbo'
super.maxTokens = this.configuredMaxToken ? this.configuredMaxToken : 1024
}
}
}