diff --git a/packages/components/evaluation/EvaluationRunner.ts b/packages/components/evaluation/EvaluationRunner.ts index 73f22b70..acde7944 100644 --- a/packages/components/evaluation/EvaluationRunner.ts +++ b/packages/components/evaluation/EvaluationRunner.ts @@ -6,6 +6,26 @@ import { getModelConfigByModelName, MODEL_TYPE } from '../src/modelLoader' export class EvaluationRunner { static metrics = new Map() + + static getCostMetrics = async (selectedProvider: string, selectedModel: string) => { + let modelConfig = await getModelConfigByModelName(MODEL_TYPE.CHAT, selectedProvider, selectedModel) + if (modelConfig) { + if (modelConfig['cost_values']) { + return modelConfig.cost_values + } + return { cost_values: modelConfig } + } else { + modelConfig = await getModelConfigByModelName(MODEL_TYPE.LLM, selectedProvider, selectedModel) + if (modelConfig) { + if (modelConfig['cost_values']) { + return modelConfig.cost_values + } + return { cost_values: modelConfig } + } + } + return undefined + } + static async getAndDeleteMetrics(id: string) { const val = EvaluationRunner.metrics.get(id) if (val) { @@ -34,11 +54,8 @@ export class EvaluationRunner { } } } - let modelConfig = await getModelConfigByModelName(MODEL_TYPE.CHAT, selectedProvider, selectedModel) - if (modelConfig) { - val.push(JSON.stringify({ cost_values: modelConfig })) - } else { - modelConfig = await getModelConfigByModelName(MODEL_TYPE.LLM, selectedProvider, selectedModel) + if (selectedProvider && selectedModel) { + const modelConfig = await EvaluationRunner.getCostMetrics(selectedProvider, selectedModel) if (modelConfig) { val.push(JSON.stringify({ cost_values: modelConfig })) } @@ -116,6 +133,40 @@ export class EvaluationRunner { } try { let response = await axios.post(`${this.baseURL}/api/v1/prediction/${chatflowId}`, postData, axiosConfig) + let agentFlowMetrics: any[] = [] + if (response?.data?.agentFlowExecutedData) { + for (let i = 0; i < response.data.agentFlowExecutedData.length; i++) { + const agentFlowExecutedData = response.data.agentFlowExecutedData[i] + const input_tokens = agentFlowExecutedData?.data?.output?.usageMetadata?.input_tokens || 0 + const output_tokens = agentFlowExecutedData?.data?.output?.usageMetadata?.output_tokens || 0 + const total_tokens = + agentFlowExecutedData?.data?.output?.usageMetadata?.total_tokens || input_tokens + output_tokens + const metrics: any = { + promptTokens: input_tokens, + completionTokens: output_tokens, + totalTokens: total_tokens, + provider: + agentFlowExecutedData.data?.input?.llmModelConfig?.llmModel || + agentFlowExecutedData.data?.input?.agentModelConfig?.agentModel, + model: + agentFlowExecutedData.data?.input?.llmModelConfig?.modelName || + agentFlowExecutedData.data?.input?.agentModelConfig?.modelName, + nodeLabel: agentFlowExecutedData?.nodeLabel, + nodeId: agentFlowExecutedData?.nodeId + } + if (metrics.provider && metrics.model) { + const modelConfig = await EvaluationRunner.getCostMetrics(metrics.provider, metrics.model) + if (modelConfig) { + metrics.cost_values = { + input_cost: (modelConfig.cost_values.input_cost || 0) * (input_tokens / 1000), + output_cost: (modelConfig.cost_values.output_cost || 0) * (output_tokens / 1000) + } + metrics.cost_values.total_cost = metrics.cost_values.input_cost + metrics.cost_values.output_cost + } + } + agentFlowMetrics.push(metrics) + } + } const endTime = performance.now() const timeTaken = (endTime - startTime).toFixed(2) if (response?.data?.metrics) { @@ -130,6 +181,9 @@ export class EvaluationRunner { } ] } + if (agentFlowMetrics.length > 0) { + runData.nested_metrics = agentFlowMetrics + } runData.status = 'complete' let resultText = '' if (response.data.text) resultText = response.data.text diff --git a/packages/server/src/enterprise/middleware/passport/index.ts b/packages/server/src/enterprise/middleware/passport/index.ts index 3381018e..b93d2413 100644 --- a/packages/server/src/enterprise/middleware/passport/index.ts +++ b/packages/server/src/enterprise/middleware/passport/index.ts @@ -218,7 +218,7 @@ export const initializeJwtCookieMiddleware = async (app: express.Application, id if (!refreshToken) return res.sendStatus(401) jwt.verify(refreshToken, jwtRefreshSecret, async (err: any, payload: any) => { - if (err || !payload) return res.status(403).json({ message: ErrorMessage.REFRESH_TOKEN_EXPIRED }) + if (err || !payload) return res.status(401).json({ message: ErrorMessage.REFRESH_TOKEN_EXPIRED }) // @ts-ignore const loggedInUser = req.user as LoggedInUser let isSSO = false @@ -227,16 +227,16 @@ export const initializeJwtCookieMiddleware = async (app: express.Application, id try { newTokenResponse = await identityManager.getRefreshToken(loggedInUser.ssoProvider, loggedInUser.ssoRefreshToken) if (newTokenResponse.error) { - return res.status(403).json({ message: ErrorMessage.REFRESH_TOKEN_EXPIRED }) + return res.status(401).json({ message: ErrorMessage.REFRESH_TOKEN_EXPIRED }) } isSSO = true } catch (error) { - return res.status(403).json({ message: ErrorMessage.REFRESH_TOKEN_EXPIRED }) + return res.status(401).json({ message: ErrorMessage.REFRESH_TOKEN_EXPIRED }) } } const meta = decryptToken(payload.meta) if (!meta) { - return res.status(403).json({ message: ErrorMessage.REFRESH_TOKEN_EXPIRED }) + return res.status(401).json({ message: ErrorMessage.REFRESH_TOKEN_EXPIRED }) } if (isSSO) { loggedInUser.ssoToken = newTokenResponse.access_token diff --git a/packages/server/src/services/evaluations/CostCalculator.ts b/packages/server/src/services/evaluations/CostCalculator.ts index 8813ade9..4013706e 100644 --- a/packages/server/src/services/evaluations/CostCalculator.ts +++ b/packages/server/src/services/evaluations/CostCalculator.ts @@ -18,39 +18,29 @@ export const calculateCost = (metricsArray: ICommonObject[]) => { let completionTokensCost: string = '0' let totalTokensCost = '0' if (metric.cost_values) { - const costValues = metric.cost_values + let costValues: any = {} + if (metric.cost_values?.cost_values) { + costValues = metric.cost_values.cost_values + } else { + costValues = metric.cost_values + } + if (costValues.total_price > 0) { let cost = costValues.total_cost * (totalTokens / 1000) - if (cost < 0.01) { - totalTokensCost = '$ <0.01' - } else { - totalTokensCost = '$ ' + cost.toFixed(fractionDigits) - } + totalTokensCost = formatCost(cost) } else { let totalCost = 0 if (promptTokens) { const cost = costValues.input_cost * (promptTokens / 1000) totalCost += cost - if (cost < 0.01) { - promptTokensCost = '$ <0.01' - } else { - promptTokensCost = '$ ' + cost.toFixed(fractionDigits) - } + promptTokensCost = formatCost(cost) } if (completionTokens) { const cost = costValues.output_cost * (completionTokens / 1000) totalCost += cost - if (cost < 0.01) { - completionTokensCost = '$ <0.01' - } else { - completionTokensCost = '$ ' + cost.toFixed(fractionDigits) - } - } - if (totalCost < 0.01) { - totalTokensCost = '$ <0.01' - } else { - totalTokensCost = '$ ' + totalCost.toFixed(fractionDigits) + completionTokensCost = formatCost(cost) } + totalTokensCost = formatCost(totalCost) } } metric['totalCost'] = totalTokensCost @@ -58,3 +48,10 @@ export const calculateCost = (metricsArray: ICommonObject[]) => { metric['completionCost'] = completionTokensCost } } + +export const formatCost = (cost: number) => { + if (cost == 0) { + return '$ 0' + } + return cost < 0.01 ? '$ <0.01' : '$ ' + cost.toFixed(fractionDigits) +} diff --git a/packages/server/src/services/evaluations/index.ts b/packages/server/src/services/evaluations/index.ts index acca639c..2d44b3ce 100644 --- a/packages/server/src/services/evaluations/index.ts +++ b/packages/server/src/services/evaluations/index.ts @@ -15,10 +15,11 @@ import { getAppVersion } from '../../utils' import { In } from 'typeorm' import { getWorkspaceSearchOptions } from '../../enterprise/utils/ControllerServiceUtils' import { v4 as uuidv4 } from 'uuid' -import { calculateCost } from './CostCalculator' +import { calculateCost, formatCost } from './CostCalculator' import { runAdditionalEvaluators } from './EvaluatorRunner' import evaluatorsService from '../evaluator' import { LLMEvaluationRunner } from './LLMEvaluationRunner' +import { Assistant } from '../../database/entities/Assistant' const runAgain = async (id: string, baseURL: string, orgId: string) => { try { @@ -27,7 +28,7 @@ const runAgain = async (id: string, baseURL: string, orgId: string) => { id: id }) if (!evaluation) throw new Error(`Evaluation ${id} not found`) - const additionalConfig: any = JSON.parse(evaluation.additionalConfig) + const additionalConfig = evaluation.additionalConfig ? JSON.parse(evaluation.additionalConfig) : {} const data: ICommonObject = { chatflowId: evaluation.chatflowId, chatflowName: evaluation.chatflowName, @@ -35,7 +36,8 @@ const runAgain = async (id: string, baseURL: string, orgId: string) => { datasetId: evaluation.datasetId, evaluationType: evaluation.evaluationType, selectedSimpleEvaluators: JSON.stringify(additionalConfig.simpleEvaluators), - datasetAsOneConversation: additionalConfig.datasetAsOneConversation + datasetAsOneConversation: additionalConfig.datasetAsOneConversation, + chatflowType: JSON.stringify(additionalConfig.chatflowTypes ? additionalConfig.chatflowTypes : []) } data.name = evaluation.name data.workspaceId = evaluation.workspaceId @@ -69,7 +71,8 @@ const createEvaluation = async (body: ICommonObject, baseURL: string, orgId: str const row = appServer.AppDataSource.getRepository(Evaluation).create(newEval) row.average_metrics = JSON.stringify({}) - const additionalConfig: any = { + const additionalConfig: ICommonObject = { + chatflowTypes: body.chatflowType ? JSON.parse(body.chatflowType) : [], datasetAsOneConversation: body.datasetAsOneConversation, simpleEvaluators: body.selectedSimpleEvaluators.length > 0 ? JSON.parse(body.selectedSimpleEvaluators) : [] } @@ -152,7 +155,7 @@ const createEvaluation = async (body: ICommonObject, baseURL: string, orgId: str let evalMetrics = { passCount: 0, failCount: 0, errorCount: 0 } evalRunner .runEvaluations(data) - .then(async (result: any) => { + .then(async (result) => { let totalTime = 0 // let us assume that the eval is successful let allRowsSuccessful = true @@ -171,8 +174,48 @@ const createEvaluation = async (body: ICommonObject, baseURL: string, orgId: str totalTime += parseFloat(evaluationRow.latency) let metricsObjFromRun: ICommonObject = {} + let nested_metrics = evaluationRow.nested_metrics + + let promptTokens = 0, + completionTokens = 0, + totalTokens = 0 + let inputCost = 0, + outputCost = 0, + totalCost = 0 + if (nested_metrics && nested_metrics.length > 0) { + for (let i = 0; i < nested_metrics.length; i++) { + const nested_metric = nested_metrics[i] + if (nested_metric.model && nested_metric.promptTokens > 0) { + promptTokens += nested_metric.promptTokens + completionTokens += nested_metric.completionTokens + totalTokens += nested_metric.totalTokens + + inputCost += nested_metric.cost_values.input_cost + outputCost += nested_metric.cost_values.output_cost + totalCost += nested_metric.cost_values.total_cost + + nested_metric['totalCost'] = formatCost(nested_metric.cost_values.total_cost) + nested_metric['promptCost'] = formatCost(nested_metric.cost_values.input_cost) + nested_metric['completionCost'] = formatCost(nested_metric.cost_values.output_cost) + } + } + nested_metrics = nested_metrics.filter((metric: any) => { + return metric.model && metric.provider + }) + } const metrics = evaluationRow.metrics if (metrics) { + if (nested_metrics && nested_metrics.length > 0) { + metrics.push({ + promptTokens: promptTokens, + completionTokens: completionTokens, + totalTokens: totalTokens, + totalCost: formatCost(totalCost), + promptCost: formatCost(inputCost), + completionCost: formatCost(outputCost) + }) + metricsObjFromRun.nested_metrics = nested_metrics + } metrics.map((metric: any) => { if (metric) { const json = typeof metric === 'object' ? metric : JSON.parse(metric) @@ -211,7 +254,7 @@ const createEvaluation = async (body: ICommonObject, baseURL: string, orgId: str if (body.evaluationType === 'llm') { resultRow.llmConfig = additionalConfig.llmConfig resultRow.LLMEvaluators = body.selectedLLMEvaluators.length > 0 ? JSON.parse(body.selectedLLMEvaluators) : [] - const llmEvaluatorMap: any = [] + const llmEvaluatorMap: { evaluatorId: string; evaluator: any }[] = [] for (let i = 0; i < resultRow.LLMEvaluators.length; i++) { const evaluatorId = resultRow.LLMEvaluators[i] const evaluator = await evaluatorsService.getEvaluator(evaluatorId) @@ -243,23 +286,27 @@ const createEvaluation = async (body: ICommonObject, baseURL: string, orgId: str } appServer.AppDataSource.getRepository(Evaluation) .findOneBy({ id: newEvaluation.id }) - .then((evaluation: any) => { - evaluation.status = allRowsSuccessful ? EvaluationStatus.COMPLETED : EvaluationStatus.ERROR - evaluation.average_metrics = JSON.stringify({ - averageLatency: (totalTime / result.rows.length).toFixed(3), - totalRuns: result.rows.length, - ...evalMetrics, - passPcnt: passPercent.toFixed(2) - }) - appServer.AppDataSource.getRepository(Evaluation).save(evaluation) + .then((evaluation) => { + if (evaluation) { + evaluation.status = allRowsSuccessful ? EvaluationStatus.COMPLETED : EvaluationStatus.ERROR + evaluation.average_metrics = JSON.stringify({ + averageLatency: (totalTime / result.rows.length).toFixed(3), + totalRuns: result.rows.length, + ...evalMetrics, + passPcnt: passPercent.toFixed(2) + }) + appServer.AppDataSource.getRepository(Evaluation).save(evaluation) + } }) } catch (error) { //update the evaluation with status as error appServer.AppDataSource.getRepository(Evaluation) .findOneBy({ id: newEvaluation.id }) - .then((evaluation: any) => { - evaluation.status = EvaluationStatus.ERROR - appServer.AppDataSource.getRepository(Evaluation).save(evaluation) + .then((evaluation) => { + if (evaluation) { + evaluation.status = EvaluationStatus.ERROR + appServer.AppDataSource.getRepository(Evaluation).save(evaluation) + } }) } }) @@ -268,12 +315,14 @@ const createEvaluation = async (body: ICommonObject, baseURL: string, orgId: str console.error('Error running evaluations:', getErrorMessage(error)) appServer.AppDataSource.getRepository(Evaluation) .findOneBy({ id: newEvaluation.id }) - .then((evaluation: any) => { - evaluation.status = EvaluationStatus.ERROR - evaluation.average_metrics = JSON.stringify({ - error: getErrorMessage(error) - }) - appServer.AppDataSource.getRepository(Evaluation).save(evaluation) + .then((evaluation) => { + if (evaluation) { + evaluation.status = EvaluationStatus.ERROR + evaluation.average_metrics = JSON.stringify({ + error: getErrorMessage(error) + }) + appServer.AppDataSource.getRepository(Evaluation).save(evaluation) + } }) .catch((dbError) => { console.error('Error updating evaluation status:', getErrorMessage(dbError)) @@ -378,18 +427,31 @@ const isOutdated = async (id: string) => { returnObj.dataset = dataset } } else { - returnObj.errors.push(`Dataset ${evaluation.datasetName} not found`) + returnObj.errors.push({ + message: `Dataset ${evaluation.datasetName} not found`, + id: evaluation.datasetId + }) isOutdated = true } - const chatflows = JSON.parse(evaluation.chatflowId) - const chatflowNames = JSON.parse(evaluation.chatflowName) - - for (let i = 0; i < chatflows.length; i++) { + const chatflowIds = evaluation.chatflowId ? JSON.parse(evaluation.chatflowId) : [] + const chatflowNames = evaluation.chatflowName ? JSON.parse(evaluation.chatflowName) : [] + const chatflowTypes = evaluation.additionalConfig ? JSON.parse(evaluation.additionalConfig).chatflowTypes : [] + for (let i = 0; i < chatflowIds.length; i++) { + // check for backward compatibility, as previous versions did not the types in additionalConfig + if (chatflowTypes && chatflowTypes.length >= 0) { + if (chatflowTypes[i] === 'Custom Assistant') { + // if the chatflow type is custom assistant, then we should NOT check in the chatflows table + continue + } + } const chatflow = await appServer.AppDataSource.getRepository(ChatFlow).findOneBy({ - id: chatflows[i] + id: chatflowIds[i] }) if (!chatflow) { - returnObj.errors.push(`Chatflow ${chatflowNames[i]} not found`) + returnObj.errors.push({ + message: `Chatflow ${chatflowNames[i]} not found`, + id: chatflowIds[i] + }) isOutdated = true } else { const chatflowLastUpdated = chatflow.updatedDate.getTime() @@ -397,12 +459,42 @@ const isOutdated = async (id: string) => { isOutdated = true returnObj.chatflows.push({ chatflowName: chatflowNames[i], - chatflowId: chatflows[i], + chatflowId: chatflowIds[i], + chatflowType: chatflow.type === 'AGENTFLOW' ? 'Agentflow v2' : 'Chatflow', isOutdated: true }) } } } + if (chatflowTypes && chatflowTypes.length > 0) { + for (let i = 0; i < chatflowIds.length; i++) { + if (chatflowTypes[i] !== 'Custom Assistant') { + // if the chatflow type is NOT custom assistant, then bail out for this item + continue + } + const assistant = await appServer.AppDataSource.getRepository(Assistant).findOneBy({ + id: chatflowIds[i] + }) + if (!assistant) { + returnObj.errors.push({ + message: `Custom Assistant ${chatflowNames[i]} not found`, + id: chatflowIds[i] + }) + isOutdated = true + } else { + const chatflowLastUpdated = assistant.updatedDate.getTime() + if (chatflowLastUpdated > evaluationRunDate) { + isOutdated = true + returnObj.chatflows.push({ + chatflowName: chatflowNames[i], + chatflowId: chatflowIds[i], + chatflowType: 'Custom Assistant', + isOutdated: true + }) + } + } + } + } returnObj.isOutdated = isOutdated return returnObj } catch (error) { @@ -424,7 +516,7 @@ const getEvaluation = async (id: string) => { where: { evaluationId: id } }) const versions = (await getVersions(id)).versions - const versionNo = versions.findIndex((version: any) => version.id === id) + 1 + const versionNo = versions.findIndex((version) => version.id === id) + 1 return { ...evaluation, versionCount: versionCount, @@ -451,7 +543,7 @@ const getVersions = async (id: string) => { runDate: 'ASC' } }) - const returnResults: any[] = [] + const returnResults: { id: string; runDate: Date; version: number }[] = [] versions.map((version, index) => { returnResults.push({ id: version.id, diff --git a/packages/server/src/utils/buildAgentflow.ts b/packages/server/src/utils/buildAgentflow.ts index 37805497..40953bf3 100644 --- a/packages/server/src/utils/buildAgentflow.ts +++ b/packages/server/src/utils/buildAgentflow.ts @@ -1805,7 +1805,7 @@ export const executeAgentFlow = async ({ role: 'userMessage', content: finalUserInput, chatflowid, - chatType: isInternal ? ChatType.INTERNAL : ChatType.EXTERNAL, + chatType: evaluationRunId ? ChatType.EVALUATION : isInternal ? ChatType.INTERNAL : ChatType.EXTERNAL, chatId, sessionId, createdDate: userMessageDateTime, @@ -1820,7 +1820,7 @@ export const executeAgentFlow = async ({ role: 'apiMessage', content: content, chatflowid, - chatType: isInternal ? ChatType.INTERNAL : ChatType.EXTERNAL, + chatType: evaluationRunId ? ChatType.EVALUATION : isInternal ? ChatType.INTERNAL : ChatType.EXTERNAL, chatId, sessionId, executionId: newExecution.id @@ -1856,7 +1856,7 @@ export const executeAgentFlow = async ({ version: await getAppVersion(), chatflowId: chatflowid, chatId, - type: isInternal ? ChatType.INTERNAL : ChatType.EXTERNAL, + type: evaluationRunId ? ChatType.EVALUATION : isInternal ? ChatType.INTERNAL : ChatType.EXTERNAL, flowGraph: getTelemetryFlowObj(nodes, edges) }, orgId diff --git a/packages/server/src/utils/buildChatflow.ts b/packages/server/src/utils/buildChatflow.ts index b138922e..df5b64e6 100644 --- a/packages/server/src/utils/buildChatflow.ts +++ b/packages/server/src/utils/buildChatflow.ts @@ -551,7 +551,7 @@ export const executeFlow = async ({ role: 'userMessage', content: incomingInput.question, chatflowid: agentflow.id, - chatType: isInternal ? ChatType.INTERNAL : ChatType.EXTERNAL, + chatType: isEvaluation ? ChatType.EVALUATION : isInternal ? ChatType.INTERNAL : ChatType.EXTERNAL, chatId, memoryType, sessionId, @@ -566,7 +566,7 @@ export const executeFlow = async ({ role: 'apiMessage', content: finalResult, chatflowid: agentflow.id, - chatType: isInternal ? ChatType.INTERNAL : ChatType.EXTERNAL, + chatType: isEvaluation ? ChatType.EVALUATION : isInternal ? ChatType.INTERNAL : ChatType.EXTERNAL, chatId, memoryType, sessionId @@ -598,7 +598,7 @@ export const executeFlow = async ({ version: await getAppVersion(), agentflowId: agentflow.id, chatId, - type: isInternal ? ChatType.INTERNAL : ChatType.EXTERNAL, + type: isEvaluation ? ChatType.EVALUATION : isInternal ? ChatType.INTERNAL : ChatType.EXTERNAL, flowGraph: getTelemetryFlowObj(nodes, edges) }, orgId @@ -807,7 +807,7 @@ export const executeFlow = async ({ version: await getAppVersion(), chatflowId: chatflowid, chatId, - type: isInternal ? ChatType.INTERNAL : ChatType.EXTERNAL, + type: isEvaluation ? ChatType.EVALUATION : isInternal ? ChatType.INTERNAL : ChatType.EXTERNAL, flowGraph: getTelemetryFlowObj(nodes, edges) }, orgId @@ -905,17 +905,17 @@ export const utilBuildChatflow = async (req: Request, isInternal: boolean = fals const isTool = req.get('flowise-tool') === 'true' const isEvaluation: boolean = req.headers['X-Flowise-Evaluation'] || req.body.evaluation let evaluationRunId = '' - if (isEvaluation) { - evaluationRunId = req.body.evaluationRunId - if (evaluationRunId) { - const newEval = { - evaluation: { - status: true, - evaluationRunId - } + evaluationRunId = req.body.evaluationRunId + if (isEvaluation && chatflow.type !== 'AGENTFLOW' && req.body.evaluationRunId) { + // this is needed for the collection of token metrics for non-agent flows, + // for agentflows the execution trace has the info needed + const newEval = { + evaluation: { + status: true, + evaluationRunId } - chatflow.analytic = JSON.stringify(newEval) } + chatflow.analytic = JSON.stringify(newEval) } try { diff --git a/packages/ui/src/ui-component/dialog/ViewMessagesDialog.jsx b/packages/ui/src/ui-component/dialog/ViewMessagesDialog.jsx index de36dd17..7e336bfa 100644 --- a/packages/ui/src/ui-component/dialog/ViewMessagesDialog.jsx +++ b/packages/ui/src/ui-component/dialog/ViewMessagesDialog.jsx @@ -149,7 +149,7 @@ const ViewMessagesDialog = ({ show, dialogProps, onCancel }) => { const [sourceDialogProps, setSourceDialogProps] = useState({}) const [hardDeleteDialogOpen, setHardDeleteDialogOpen] = useState(false) const [hardDeleteDialogProps, setHardDeleteDialogProps] = useState({}) - const [chatTypeFilter, setChatTypeFilter] = useState([]) + const [chatTypeFilter, setChatTypeFilter] = useState(['INTERNAL', 'EXTERNAL']) const [feedbackTypeFilter, setFeedbackTypeFilter] = useState([]) const [startDate, setStartDate] = useState(new Date(new Date().setMonth(new Date().getMonth() - 1))) const [endDate, setEndDate] = useState(new Date()) @@ -310,6 +310,15 @@ const ViewMessagesDialog = ({ show, dialogProps, onCancel }) => { } } + const getChatType = (chatType) => { + if (chatType === 'INTERNAL') { + return 'UI' + } else if (chatType === 'EVALUATION') { + return 'Evaluation' + } + return 'API/Embed' + } + const exportMessages = async () => { if (!storagePath && getStoragePathFromServer.data) { storagePath = getStoragePathFromServer.data.storagePath @@ -356,7 +365,7 @@ const ViewMessagesDialog = ({ show, dialogProps, onCancel }) => { if (!Object.prototype.hasOwnProperty.call(obj, chatPK)) { obj[chatPK] = { id: chatmsg.chatId, - source: chatmsg.chatType === 'INTERNAL' ? 'UI' : 'API/Embed', + source: getChatType(chatmsg.chatType), sessionId: chatmsg.sessionId ?? null, memoryType: chatmsg.memoryType ?? null, email: chatmsg.leadEmail ?? null, @@ -716,7 +725,7 @@ const ViewMessagesDialog = ({ show, dialogProps, onCancel }) => { setChatLogs([]) setAllChatLogs([]) setChatMessages([]) - setChatTypeFilter([]) + setChatTypeFilter(['INTERNAL', 'EXTERNAL']) setFeedbackTypeFilter([]) setSelectedMessageIndex(0) setSelectedChatId('') @@ -880,6 +889,10 @@ const ViewMessagesDialog = ({ show, dialogProps, onCancel }) => { { label: 'API/Embed', name: 'EXTERNAL' + }, + { + label: 'Evaluations', + name: 'EVALUATION' } ]} onSelect={(newValue) => onChatTypeSelected(newValue)} @@ -1016,7 +1029,7 @@ const ViewMessagesDialog = ({ show, dialogProps, onCancel }) => { )} {chatMessages[1].chatType && (
- Source: {chatMessages[1].chatType === 'INTERNAL' ? 'UI' : 'API/Embed'} + Source: {getChatType(chatMessages[1].chatType)}
)} {chatMessages[1].memoryType && ( diff --git a/packages/ui/src/views/evaluations/CreateEvaluationDialog.jsx b/packages/ui/src/views/evaluations/CreateEvaluationDialog.jsx index a1845398..1eb8df65 100644 --- a/packages/ui/src/views/evaluations/CreateEvaluationDialog.jsx +++ b/packages/ui/src/views/evaluations/CreateEvaluationDialog.jsx @@ -21,7 +21,8 @@ import { Switch, StepLabel, IconButton, - FormControlLabel + FormControlLabel, + Checkbox } from '@mui/material' import { useTheme } from '@mui/material/styles' @@ -42,6 +43,7 @@ import useApi from '@/hooks/useApi' import datasetsApi from '@/api/dataset' import evaluatorsApi from '@/api/evaluators' import nodesApi from '@/api/nodes' +import assistantsApi from '@/api/assistants' // utils import useNotifier from '@/utils/useNotifier' @@ -57,14 +59,18 @@ const CreateEvaluationDialog = ({ show, dialogProps, onCancel, onConfirm }) => { useNotifier() const getAllChatflowsApi = useApi(chatflowsApi.getAllChatflows) + const getAllAgentflowsApi = useApi(chatflowsApi.getAllAgentflows) + const getAllDatasetsApi = useApi(datasetsApi.getAllDatasets) const getAllEvaluatorsApi = useApi(evaluatorsApi.getAllEvaluators) const getNodesByCategoryApi = useApi(nodesApi.getNodesByCategory) const getModelsApi = useApi(nodesApi.executeNodeLoadMethod) + const getAssistantsApi = useApi(assistantsApi.getAllAssistants) const [chatflow, setChatflow] = useState([]) const [dataset, setDataset] = useState('') const [datasetAsOneConversation, setDatasetAsOneConversation] = useState(false) + const [flowTypes, setFlowTypes] = useState([]) const [flows, setFlows] = useState([]) const [datasets, setDatasets] = useState([]) @@ -163,6 +169,10 @@ const CreateEvaluationDialog = ({ show, dialogProps, onCancel, onConfirm }) => { for (let i = 0; i < selectedChatflows.length; i += 1) { selectedChatflowNames.push(flows.find((f) => f.name === selectedChatflows[i])?.label) } + const selectedChatflowTypes = [] + for (let i = 0; i < selectedChatflows.length; i += 1) { + selectedChatflowTypes.push(flows.find((f) => f.name === selectedChatflows[i])?.type) + } const chatflowName = JSON.stringify(selectedChatflowNames) const datasetName = datasets.find((f) => f.name === dataset)?.label const obj = { @@ -173,6 +183,7 @@ const CreateEvaluationDialog = ({ show, dialogProps, onCancel, onConfirm }) => { datasetName: datasetName, chatflowId: chatflow, chatflowName: chatflowName, + chatflowType: JSON.stringify(selectedChatflowTypes), selectedSimpleEvaluators: selectedSimpleEvaluators, selectedLLMEvaluators: selectedLLMEvaluators, model: selectedModel, @@ -216,6 +227,8 @@ const CreateEvaluationDialog = ({ show, dialogProps, onCancel, onConfirm }) => { getNodesByCategoryApi.request('Chat Models') if (flows.length === 0) { getAllChatflowsApi.request() + getAssistantsApi.request('CUSTOM') + getAllAgentflowsApi.request('AGENTFLOW') } if (datasets.length === 0) { getAllDatasetsApi.request() @@ -225,23 +238,18 @@ const CreateEvaluationDialog = ({ show, dialogProps, onCancel, onConfirm }) => { }, []) useEffect(() => { - if (getAllChatflowsApi.data) { + if (getAllAgentflowsApi.data && getAllChatflowsApi.data && getAssistantsApi.data) { try { - const chatflows = getAllChatflowsApi.data - let flowNames = [] - for (let i = 0; i < chatflows.length; i += 1) { - const flow = chatflows[i] - flowNames.push({ - label: flow.name, - name: flow.id - }) - } - setFlows(flowNames) + const agentFlows = populateFlowNames(getAllAgentflowsApi.data, 'Agentflow v2') + const chatFlows = populateFlowNames(getAllChatflowsApi.data, 'Chatflow') + const assistants = populateAssistants(getAssistantsApi.data) + setFlows([...agentFlows, ...chatFlows, ...assistants]) + setFlowTypes(['Agentflow v2', 'Chatflow', 'Custom Assistant']) } catch (e) { console.error(e) } } - }, [getAllChatflowsApi.data]) + }, [getAllAgentflowsApi.data, getAllChatflowsApi.data, getAssistantsApi.data]) useEffect(() => { if (getNodesByCategoryApi.data) { @@ -337,6 +345,44 @@ const CreateEvaluationDialog = ({ show, dialogProps, onCancel, onConfirm }) => { if (llm !== 'no_grading') getModelsApi.request(llm, { loadMethod: 'listModels' }) } + const onChangeFlowType = (flowType) => { + const selected = flowType.target.checked + const flowTypeValue = flowType.target.value + if (selected) { + setFlowTypes([...flowTypes, flowTypeValue]) + } else { + setFlowTypes(flowTypes.filter((f) => f !== flowTypeValue)) + } + } + + const populateFlowNames = (data, type) => { + let flowNames = [] + for (let i = 0; i < data.length; i += 1) { + const flow = data[i] + flowNames.push({ + label: flow.name, + name: flow.id, + type: type, + description: type + }) + } + return flowNames + } + + const populateAssistants = (assistants) => { + let assistantNames = [] + for (let i = 0; i < assistants.length; i += 1) { + const assistant = assistants[i] + assistantNames.push({ + label: JSON.parse(assistant.details).name || '', + name: assistant.id, + type: 'Custom Assistant', + description: 'Custom Assistant' + }) + } + return assistantNames + } + const component = show ? ( { Treat all dataset rows as one conversation ? } value={datasetAsOneConversation} onChange={() => setDatasetAsOneConversation(!datasetAsOneConversation)} /> - - Chatflow(s) to Evaluate * - +
+ + Select your flows to Evaluate +  * + + + {' '} + Chatflows + {' '} + Agentflows (v2) + {' '} + Custom Assistants + +
flowTypes.includes(f.type))} onSelect={(newValue) => setChatflow(newValue)} value={chatflow ?? chatflow ?? 'choose an option'} /> diff --git a/packages/ui/src/views/evaluations/EvalsResultDialog.jsx b/packages/ui/src/views/evaluations/EvalsResultDialog.jsx index 3fb013f0..a74c2b76 100644 --- a/packages/ui/src/views/evaluations/EvalsResultDialog.jsx +++ b/packages/ui/src/views/evaluations/EvalsResultDialog.jsx @@ -2,7 +2,6 @@ import React from 'react' import { createPortal } from 'react-dom' import PropTypes from 'prop-types' import { useSelector } from 'react-redux' -import { useNavigate } from 'react-router-dom' // Material import { @@ -36,7 +35,6 @@ const EvalsResultDialog = ({ show, dialogProps, onCancel, openDetailsDrawer }) = const portalElement = document.getElementById('portal') const customization = useSelector((state) => state.customization) const theme = useTheme() - const navigate = useNavigate() const getColSpan = (evaluationsShown, llmEvaluations) => { let colSpan = 1 @@ -45,6 +43,23 @@ const EvalsResultDialog = ({ show, dialogProps, onCancel, openDetailsDrawer }) = return colSpan } + const getOpenLink = (index) => { + if (index === undefined) { + return '' + } + if (dialogProps.data?.additionalConfig?.chatflowTypes) { + switch (dialogProps.data.additionalConfig.chatflowTypes[index]) { + case 'Chatflow': + return '/canvas/' + dialogProps.data.evaluation.chatflowId[index] + case 'Custom Assistant': + return '/assistants/custom/' + dialogProps.data.evaluation.chatflowId[index] + case 'Agentflow v2': + return '/v2/agentcanvas/' + dialogProps.data.evaluation.chatflowId[index] + } + } + return '/canvas/' + dialogProps.data.evaluation.chatflowId[index] + } + const component = show ? ( @@ -65,7 +80,7 @@ const EvalsResultDialog = ({ show, dialogProps, onCancel, openDetailsDrawer }) = }} > - Chatflows Used: + Flows Used: {(dialogProps.data.evaluation.chatflowName || []).map((chatflowUsed, index) => ( navigate('/canvas/' + dialogProps.data.evaluation.chatflowId[index])} + onClick={() => window.open(getOpenLink(index), '_blank')} > ))} diff --git a/packages/ui/src/views/evaluations/EvaluationResult.jsx b/packages/ui/src/views/evaluations/EvaluationResult.jsx index e6d35079..6fdde95b 100644 --- a/packages/ui/src/views/evaluations/EvaluationResult.jsx +++ b/packages/ui/src/views/evaluations/EvaluationResult.jsx @@ -25,6 +25,7 @@ import { import { useTheme } from '@mui/material/styles' import moment from 'moment' import PaidIcon from '@mui/icons-material/Paid' +import { IconHierarchy, IconUsersGroup, IconRobot } from '@tabler/icons-react' import LLMIcon from '@mui/icons-material/ModelTraining' import AlarmIcon from '@mui/icons-material/AlarmOn' import TokensIcon from '@mui/icons-material/AutoAwesomeMotion' @@ -116,10 +117,13 @@ const EvalEvaluationRows = () => { const [expandTableProps, setExpandTableProps] = useState({}) const [isTableLoading, setTableLoading] = useState(false) + const [additionalConfig, setAdditionalConfig] = useState({}) + const openDetailsDrawer = (item) => { setSideDrawerDialogProps({ type: 'View', data: item, + additionalConfig: additionalConfig, evaluationType: evaluation.evaluationType, evaluationChatflows: evaluation.chatflowName }) @@ -169,7 +173,8 @@ const EvalEvaluationRows = () => { showCustomEvals, showTokenMetrics, showLatencyMetrics, - showCostMetrics + showCostMetrics, + additionalConfig } }) setShowExpandTableDialog(true) @@ -239,6 +244,9 @@ const EvalEvaluationRows = () => { const data = getEvaluation.data setSelectedEvaluationName(data.name) getIsOutdatedApi.request(data.id) + if (data.additionalConfig) { + setAdditionalConfig(JSON.parse(data.additionalConfig)) + } data.chatflowId = typeof data.chatflowId === 'object' ? data.chatflowId : JSON.parse(data.chatflowId) data.chatflowName = typeof data.chatflowName === 'object' ? data.chatflowName : JSON.parse(data.chatflowName) const rows = getEvaluation.data.rows @@ -314,6 +322,51 @@ const EvalEvaluationRows = () => { // eslint-disable-next-line react-hooks/exhaustive-deps }, [getEvaluation.data]) + const getOpenLink = (index) => { + if (index === undefined) { + return undefined + } + const id = evaluation.chatflowId[index] + // this is to check if the evaluation is deleted! + if (outdated?.errors?.length > 0 && outdated.errors.find((e) => e.id === id)) { + return undefined + } + if (additionalConfig.chatflowTypes) { + switch (additionalConfig.chatflowTypes[index]) { + case 'Chatflow': + return '/canvas/' + evaluation.chatflowId[index] + case 'Custom Assistant': + return '/assistants/custom/' + evaluation.chatflowId[index] + case 'Agentflow v2': + return '/v2/agentcanvas/' + evaluation.chatflowId[index] + } + } + return '/canvas/' + evaluation.chatflowId[index] + } + + const openFlow = (index) => { + const url = getOpenLink(index) + if (url) { + window.open(getOpenLink(index), '_blank') + } + } + + const getFlowIcon = (index) => { + if (index === undefined) { + return + } + if (additionalConfig.chatflowTypes) { + switch (additionalConfig.chatflowTypes[index]) { + case 'Chatflow': + return + case 'Custom Assistant': + return + case 'Agentflow v2': + return + } + } + return + } return ( <> @@ -405,14 +458,14 @@ const EvalEvaluationRows = () => { }} variant='outlined' label={outdated.dataset.name} - onClick={() => navigate(`/dataset_rows/${outdated.dataset.id}`)} + onClick={() => window.open(`/dataset_rows/${outdated.dataset.id}`, '_blank')} > )} {outdated.chatflows && outdated?.errors?.length === 0 && outdated.chatflows.length > 0 && ( <>
- Chatflows: + Flows: {outdated.chatflows.map((chatflow, index) => ( { }} variant='outlined' label={chatflow.chatflowName} - onClick={() => navigate(`/canvas/${chatflow.chatflowId}`)} + onClick={() => + window.open( + chatflow.chatflowType === 'Chatflow' + ? '/canvas/' + chatflow.chatflowId + : chatflow.chatflowType === 'Custom Assistant' + ? '/assistants/custom/' + chatflow.chatflowId + : '/v2/agentcanvas/' + chatflow.chatflowId, + '_blank' + ) + } > ))} )} {outdated.errors.length > 0 && - outdated.errors.map((error, index) => {error})} + outdated.errors.map((error, index) => {error.message})} { {showCharts && ( {customEvalsDefined && ( - + { }} > - Chatflows Used: + Flows Used: {(evaluation.chatflowName || []).map((chatflowUsed, index) => ( { : '0 2px 14px 0 rgb(32 40 45 / 10%)' }} label={chatflowUsed} - onClick={() => navigate('/canvas/' + evaluation.chatflowId[index])} + onClick={() => openFlow(index)} > ))} diff --git a/packages/ui/src/views/evaluations/EvaluationResultSideDrawer.jsx b/packages/ui/src/views/evaluations/EvaluationResultSideDrawer.jsx index 9a3ada22..c415fb98 100644 --- a/packages/ui/src/views/evaluations/EvaluationResultSideDrawer.jsx +++ b/packages/ui/src/views/evaluations/EvaluationResultSideDrawer.jsx @@ -1,8 +1,25 @@ import PropTypes from 'prop-types' -import { CardContent, Card, Box, SwipeableDrawer, Stack, Button, Chip, Divider, Typography } from '@mui/material' +import { + CardContent, + Card, + Box, + SwipeableDrawer, + Stack, + Button, + Chip, + Divider, + Typography, + Table, + TableHead, + TableRow, + TableBody +} from '@mui/material' +import { IconHierarchy, IconUsersGroup, IconRobot } from '@tabler/icons-react' + import { useSelector } from 'react-redux' -import { IconSquareRoundedChevronsRight } from '@tabler/icons-react' import { evaluators as evaluatorsOptions, numericOperators } from '../evaluators/evaluatorConstant' +import TableCell from '@mui/material/TableCell' +import { Close } from '@mui/icons-material' const EvaluationResultSideDrawer = ({ show, dialogProps, onClickFunction }) => { const onOpen = () => {} @@ -19,12 +36,32 @@ const EvaluationResultSideDrawer = ({ show, dialogProps, onClickFunction }) => { return '' } + const getFlowIcon = (index) => { + if (index === undefined) { + return + } + if (dialogProps.additionalConfig.chatflowTypes) { + switch (dialogProps.additionalConfig.chatflowTypes[index]) { + case 'Chatflow': + return + case 'Custom Assistant': + return + case 'Agentflow v2': + return + } + } + return + } + return ( onClickFunction()} onOpen={onOpen}> - - +
+ + Evaluation Details + +
+ Evaluation Id @@ -61,13 +98,19 @@ const EvaluationResultSideDrawer = ({ show, dialogProps, onClickFunction }) => { {dialogProps.evaluationChatflows?.length > 0 && ( <> - - - Chatflow +
+ {getFlowIcon(index)} + + {dialogProps.evaluationChatflows[index]} - {dialogProps.evaluationChatflows[index]} - -
+
)} @@ -153,79 +196,222 @@ const EvaluationResultSideDrawer = ({ show, dialogProps, onClickFunction }) => {

- - - Tokens - - - - - - - - - + {dialogProps.data.metrics[index]?.nested_metrics ? ( + + + Tokens + + + + + + Node + + + Provider & Model + + + Input + + + Output + + + Total + + + + + {dialogProps.data.metrics[index]?.nested_metrics?.map((metric, index) => ( + + + {metric.nodeLabel} + + + {metric.provider} +
+ {metric.model} +
+ + {metric.promptTokens} + + + {metric.completionTokens} + + + {metric.totalTokens} + +
+ ))} + + + Total + + + {dialogProps.data.metrics[index].promptTokens} + + + {dialogProps.data.metrics[index].completionTokens} + + + {dialogProps.data.metrics[index].totalTokens} + + +
+
+
+ ) : ( + + + Tokens + + + + + + + + + + )}
- - - Cost - - - - - - - - - + {dialogProps.data.metrics[index]?.nested_metrics ? ( + + + Cost + + + + + + Node + + + Provider & Model + + + Input + + + Output + + + Total + + + + + {dialogProps.data.metrics[index]?.nested_metrics?.map((metric, index) => ( + + + {metric.nodeLabel} + + + {metric.provider}
+ {metric.model} +
+ + {metric.promptCost} + + + {metric.completionCost} + + + {metric.totalCost} + +
+ ))} + + + Total + + + {dialogProps.data.metrics[index].promptCost} + + + {dialogProps.data.metrics[index].completionCost} + + + {dialogProps.data.metrics[index].totalCost} + + +
+
+
+ ) : ( + + + Cost + + + + + + + + + + )}

diff --git a/packages/ui/src/views/evaluations/MetricsItemCard.jsx b/packages/ui/src/views/evaluations/MetricsItemCard.jsx index a8bdca82..380f19c7 100644 --- a/packages/ui/src/views/evaluations/MetricsItemCard.jsx +++ b/packages/ui/src/views/evaluations/MetricsItemCard.jsx @@ -11,7 +11,7 @@ import SkeletonChatflowCard from '@/ui-component/cards/Skeleton/ChatflowCard' const CardWrapper = styled(MainCard)(({ theme }) => ({ background: theme.palette.card.main, color: theme.darkTextPrimary, - overflow: 'auto', + overflow: 'hidden', position: 'relative', boxShadow: '0 2px 14px 0 rgb(32 40 45 / 8%)', cursor: 'pointer', diff --git a/packages/ui/src/views/evaluations/index.jsx b/packages/ui/src/views/evaluations/index.jsx index 2f28e1b1..6f7c0931 100644 --- a/packages/ui/src/views/evaluations/index.jsx +++ b/packages/ui/src/views/evaluations/index.jsx @@ -1,4 +1,4 @@ -import React, { useEffect, useState } from 'react' +import React, { useEffect, useState, useCallback } from 'react' import * as PropTypes from 'prop-types' import moment from 'moment/moment' import { useNavigate } from 'react-router-dom' @@ -20,7 +20,8 @@ import { TableBody, TableContainer, TableHead, - TableRow + TableRow, + ToggleButton } from '@mui/material' import { useTheme } from '@mui/material/styles' import { closeSnackbar as closeSnackbarAction, enqueueSnackbar as enqueueSnackbarAction } from '@/store/actions' @@ -35,7 +36,6 @@ import useNotifier from '@/utils/useNotifier' // project import MainCard from '@/ui-component/cards/MainCard' -import { StyledButton } from '@/ui-component/button/StyledButton' import { BackdropLoader } from '@/ui-component/loading/BackdropLoader' import ConfirmDialog from '@/ui-component/dialog/ConfirmDialog' import ErrorBoundary from '@/ErrorBoundary' @@ -53,7 +53,9 @@ import { IconTrash, IconX, IconChevronsUp, - IconChevronsDown + IconChevronsDown, + IconPlayerPlay, + IconPlayerPause } from '@tabler/icons-react' import empty_evalSVG from '@/assets/images/empty_evals.svg' @@ -79,6 +81,7 @@ const EvalsEvaluation = () => { const [loading, setLoading] = useState(false) const [isTableLoading, setTableLoading] = useState(false) const [selected, setSelected] = useState([]) + const [autoRefresh, setAutoRefresh] = useState(false) const onSelectAllClick = (event) => { if (event.target.checked) { @@ -240,14 +243,34 @@ const EvalsEvaluation = () => { // eslint-disable-next-line react-hooks/exhaustive-deps }, [createNewEvaluation.error]) - const onRefresh = () => { + const onRefresh = useCallback(() => { getAllEvaluations.request() - } + }, [getAllEvaluations]) useEffect(() => { setTableLoading(getAllEvaluations.loading) }, [getAllEvaluations.loading]) + useEffect(() => { + let intervalId = null + + if (autoRefresh) { + intervalId = setInterval(() => { + onRefresh() + }, 5000) + } + + return () => { + if (intervalId) { + clearInterval(intervalId) + } + } + }, [autoRefresh, onRefresh]) + + const toggleAutoRefresh = () => { + setAutoRefresh(!autoRefresh) + } + return ( <> @@ -256,15 +279,52 @@ const EvalsEvaluation = () => { ) : ( - } + - Refresh - + {autoRefresh ? : } + + + + { Latest Version Average Metrics Last Evaluated - Chatflow(s) + Flow(s) Dataset @@ -438,7 +498,7 @@ function EvaluationRunRow(props) { } const goToDataset = (id) => { - navigate(`/dataset_rows/${id}`) + window.open(`/dataset_rows/${id}`, '_blank') } const onSelectAllChildClick = (event) => { @@ -513,10 +573,6 @@ function EvaluationRunRow(props) { } } - const goToCanvas = (id) => { - navigate(`/canvas/${id}`) - } - const getStatusColor = (status) => { switch (status) { case 'pending': @@ -619,16 +675,11 @@ function EvaluationRunRow(props) { {props.item?.usedFlows?.map((usedFlow, index) => ( goToCanvas(props.item.chatIds[index])} > ))} @@ -637,6 +688,7 @@ function EvaluationRunRow(props) {