Chore/refractor (#4454)

* markdown files and env examples cleanup

* components update

* update jsonlines description

* server refractor

* update telemetry

* add execute custom node

* add ui refractor

* add username and password authenticate

* correctly retrieve past images in agentflowv2

* disable e2e temporarily

* add existing username and password authenticate

* update migration to default workspace

* update todo

* blob storage migrating

* throw error on agent tool call error

* add missing execution import

* add referral

* chore: add error message when importData is undefined

* migrate api keys to db

* fix: data too long for column executionData

* migrate api keys from json to db at init

* add info on account setup

* update docstore missing fields

---------

Co-authored-by: chungyau97 <chungyau97@gmail.com>
This commit is contained in:
Henry Heng
2025-05-27 14:29:42 +08:00
committed by GitHub
parent e35a126b46
commit 5a37227d14
560 changed files with 62127 additions and 4100 deletions
@@ -0,0 +1,165 @@
import { RunCollectorCallbackHandler } from '@langchain/core/tracers/run_collector'
import { Run } from '@langchain/core/tracers/base'
import { EvaluationRunner } from './EvaluationRunner'
import { encoding_for_model, get_encoding } from '@dqbd/tiktoken'
export class EvaluationRunTracer extends RunCollectorCallbackHandler {
evaluationRunId: string
model: string
constructor(id: string) {
super()
this.evaluationRunId = id
}
async persistRun(run: Run): Promise<void> {
return super.persistRun(run)
}
countPromptTokens = (encoding: any, run: Run): number => {
let promptTokenCount = 0
if (encoding) {
if (run.inputs?.messages?.length > 0 && run.inputs?.messages[0]?.length > 0) {
run.inputs.messages[0].map((message: any) => {
let content = message.content
? message.content
: message.SystemMessage?.content
? message.SystemMessage.content
: message.HumanMessage?.content
? message.HumanMessage.content
: message.AIMessage?.content
? message.AIMessage.content
: undefined
promptTokenCount += content ? encoding.encode(content).length : 0
})
}
if (run.inputs?.prompts?.length > 0) {
const content = run.inputs.prompts[0]
promptTokenCount += content ? encoding.encode(content).length : 0
}
}
return promptTokenCount
}
countCompletionTokens = (encoding: any, run: Run): number => {
let completionTokenCount = 0
if (encoding) {
if (run.outputs?.generations?.length > 0 && run.outputs?.generations[0]?.length > 0) {
run.outputs?.generations[0].map((chunk: any) => {
let content = chunk.text ? chunk.text : chunk.message?.content ? chunk.message?.content : undefined
completionTokenCount += content ? encoding.encode(content).length : 0
})
}
}
return completionTokenCount
}
extractModelName = (run: Run): string => {
return (
(run?.serialized as any)?.kwargs?.model ||
(run?.serialized as any)?.kwargs?.model_name ||
(run?.extra as any)?.metadata?.ls_model_name ||
(run?.extra as any)?.metadata?.fw_model_name
)
}
onLLMEnd?(run: Run): void | Promise<void> {
if (run.name) {
let provider = run.name
if (provider === 'BedrockChat') {
provider = 'awsChatBedrock'
}
EvaluationRunner.addMetrics(
this.evaluationRunId,
JSON.stringify({
provider: provider
})
)
}
let model = this.extractModelName(run)
if (run.outputs?.llmOutput?.tokenUsage) {
const tokenUsage = run.outputs?.llmOutput?.tokenUsage
if (tokenUsage) {
const metric = {
completionTokens: tokenUsage.completionTokens,
promptTokens: tokenUsage.promptTokens,
model: model,
totalTokens: tokenUsage.totalTokens
}
EvaluationRunner.addMetrics(this.evaluationRunId, JSON.stringify(metric))
}
} else if (
run.outputs?.generations?.length > 0 &&
run.outputs?.generations[0].length > 0 &&
run.outputs?.generations[0][0]?.message?.usage_metadata?.total_tokens
) {
const usage_metadata = run.outputs?.generations[0][0]?.message?.usage_metadata
if (usage_metadata) {
const metric = {
completionTokens: usage_metadata.output_tokens,
promptTokens: usage_metadata.input_tokens,
model: model || this.model,
totalTokens: usage_metadata.total_tokens
}
EvaluationRunner.addMetrics(this.evaluationRunId, JSON.stringify(metric))
}
} else {
let encoding: any = undefined
let promptInputTokens = 0
let completionTokenCount = 0
try {
encoding = encoding_for_model(model as any)
promptInputTokens = this.countPromptTokens(encoding, run)
completionTokenCount = this.countCompletionTokens(encoding, run)
} catch (e) {
try {
// as tiktoken will fail for non openai models, assume that is 'cl100k_base'
encoding = get_encoding('cl100k_base')
promptInputTokens = this.countPromptTokens(encoding, run)
completionTokenCount = this.countCompletionTokens(encoding, run)
} catch (e) {
// stay silent
}
}
const metric = {
completionTokens: completionTokenCount,
promptTokens: promptInputTokens,
model: model,
totalTokens: promptInputTokens + completionTokenCount
}
EvaluationRunner.addMetrics(this.evaluationRunId, JSON.stringify(metric))
//cleanup
this.model = ''
}
}
async onRunUpdate(run: Run): Promise<void> {
const json = {
[run.run_type]: elapsed(run)
}
let metric = JSON.stringify(json)
if (metric) {
EvaluationRunner.addMetrics(this.evaluationRunId, metric)
}
if (run.run_type === 'llm') {
let model = this.extractModelName(run)
if (model) {
EvaluationRunner.addMetrics(this.evaluationRunId, JSON.stringify({ model: model }))
this.model = model
}
// OpenAI non streaming models
const estimatedTokenUsage = run.outputs?.llmOutput?.estimatedTokenUsage
if (estimatedTokenUsage && typeof estimatedTokenUsage === 'object' && Object.keys(estimatedTokenUsage).length > 0) {
EvaluationRunner.addMetrics(this.evaluationRunId, estimatedTokenUsage)
}
}
}
}
function elapsed(run: Run) {
if (!run.end_time) return ''
const elapsed = run.end_time - run.start_time
return `${elapsed.toFixed(2)}`
}
@@ -0,0 +1,186 @@
import { ChatMessage, LLMEndEvent, LLMStartEvent, LLMStreamEvent, MessageContentTextDetail, RetrievalEndEvent, Settings } from 'llamaindex'
import { EvaluationRunner } from './EvaluationRunner'
import { additionalCallbacks, ICommonObject, INodeData } from '../src'
import { RetrievalStartEvent } from 'llamaindex/dist/type/llm/types'
import { AgentEndEvent, AgentStartEvent } from 'llamaindex/dist/type/agent/types'
import { encoding_for_model } from '@dqbd/tiktoken'
import { MessageContent } from '@langchain/core/messages'
export class EvaluationRunTracerLlama {
evaluationRunId: string
static cbInit = false
static startTimes = new Map<string, number>()
static models = new Map<string, string>()
static tokenCounts = new Map<string, number>()
constructor(id: string) {
this.evaluationRunId = id
EvaluationRunTracerLlama.constructCallBacks()
}
static constructCallBacks = () => {
if (!EvaluationRunTracerLlama.cbInit) {
Settings.callbackManager.on('llm-start', (event: LLMStartEvent) => {
const evalID = (event as any).reason.parent?.caller?.evaluationRunId || (event as any).reason.caller?.evaluationRunId
if (!evalID) return
const model = (event as any).reason?.caller?.model
if (model) {
EvaluationRunTracerLlama.models.set(evalID, model)
try {
const encoding = encoding_for_model(model)
if (encoding) {
const { messages } = event.detail.payload
let tokenCount = messages.reduce((count: number, message: ChatMessage) => {
return count + encoding.encode(extractText(message.content)).length
}, 0)
EvaluationRunTracerLlama.tokenCounts.set(evalID + '_promptTokens', tokenCount)
EvaluationRunTracerLlama.tokenCounts.set(evalID + '_outputTokens', 0)
}
} catch (e) {
// catch the error and continue to work.
}
}
EvaluationRunTracerLlama.startTimes.set(evalID + '_llm', event.timeStamp)
})
Settings.callbackManager.on('llm-end', (event: LLMEndEvent) => {
this.calculateAndSetMetrics(event, 'llm')
})
Settings.callbackManager.on('llm-stream', (event: LLMStreamEvent) => {
const evalID = (event as any).reason.parent?.caller?.evaluationRunId || (event as any).reason.caller?.evaluationRunId
if (!evalID) return
const { chunk } = event.detail.payload
const { delta } = chunk
const model = (event as any).reason?.caller?.model
try {
const encoding = encoding_for_model(model)
if (encoding) {
let tokenCount = EvaluationRunTracerLlama.tokenCounts.get(evalID + '_outputTokens') || 0
tokenCount += encoding.encode(extractText(delta)).length
EvaluationRunTracerLlama.tokenCounts.set(evalID + '_outputTokens', tokenCount)
}
} catch (e) {
// catch the error and continue to work.
}
})
Settings.callbackManager.on('retrieve-start', (event: RetrievalStartEvent) => {
const evalID = (event as any).reason.parent?.caller?.evaluationRunId || (event as any).reason.caller?.evaluationRunId
if (evalID) {
EvaluationRunTracerLlama.startTimes.set(evalID + '_retriever', event.timeStamp)
}
})
Settings.callbackManager.on('retrieve-end', (event: RetrievalEndEvent) => {
this.calculateAndSetMetrics(event, 'retriever')
})
Settings.callbackManager.on('agent-start', (event: AgentStartEvent) => {
const evalID = (event as any).reason.parent?.caller?.evaluationRunId || (event as any).reason.caller?.evaluationRunId
if (evalID) {
EvaluationRunTracerLlama.startTimes.set(evalID + '_agent', event.timeStamp)
}
})
Settings.callbackManager.on('agent-end', (event: AgentEndEvent) => {
this.calculateAndSetMetrics(event, 'agent')
})
EvaluationRunTracerLlama.cbInit = true
}
}
private static calculateAndSetMetrics(event: any, label: string) {
const evalID = event.reason.parent?.caller?.evaluationRunId || event.reason.caller?.evaluationRunId
if (!evalID) return
const startTime = EvaluationRunTracerLlama.startTimes.get(evalID + '_' + label) as number
let model =
(event as any).reason?.caller?.model || (event as any).reason?.caller?.llm?.model || EvaluationRunTracerLlama.models.get(evalID)
if (event.detail.payload?.response?.message && model) {
try {
const encoding = encoding_for_model(model)
if (encoding) {
let tokenCount = EvaluationRunTracerLlama.tokenCounts.get(evalID + '_outputTokens') || 0
tokenCount += encoding.encode(event.detail.payload.response?.message?.content || '').length
EvaluationRunTracerLlama.tokenCounts.set(evalID + '_outputTokens', tokenCount)
}
} catch (e) {
// catch the error and continue to work.
}
}
// Anthropic
if (event.detail?.payload?.response?.raw?.usage) {
const usage = event.detail.payload.response.raw.usage
if (usage.output_tokens) {
const metric = {
completionTokens: usage.output_tokens,
promptTokens: usage.input_tokens,
model: model,
totalTokens: usage.input_tokens + usage.output_tokens
}
EvaluationRunner.addMetrics(evalID, JSON.stringify(metric))
} else if (usage.completion_tokens) {
const metric = {
completionTokens: usage.completion_tokens,
promptTokens: usage.prompt_tokens,
model: model,
totalTokens: usage.total_tokens
}
EvaluationRunner.addMetrics(evalID, JSON.stringify(metric))
}
} else if (event.detail?.payload?.response?.raw['amazon-bedrock-invocationMetrics']) {
const usage = event.detail?.payload?.response?.raw['amazon-bedrock-invocationMetrics']
const metric = {
completionTokens: usage.outputTokenCount,
promptTokens: usage.inputTokenCount,
model: event.detail?.payload?.response?.raw.model,
totalTokens: usage.inputTokenCount + usage.outputTokenCount
}
EvaluationRunner.addMetrics(evalID, JSON.stringify(metric))
} else {
const metric = {
[label]: (event.timeStamp - startTime).toFixed(2),
completionTokens: EvaluationRunTracerLlama.tokenCounts.get(evalID + '_outputTokens'),
promptTokens: EvaluationRunTracerLlama.tokenCounts.get(evalID + '_promptTokens'),
model: model || EvaluationRunTracerLlama.models.get(evalID) || '',
totalTokens:
(EvaluationRunTracerLlama.tokenCounts.get(evalID + '_outputTokens') || 0) +
(EvaluationRunTracerLlama.tokenCounts.get(evalID + '_promptTokens') || 0)
}
EvaluationRunner.addMetrics(evalID, JSON.stringify(metric))
}
//cleanup
EvaluationRunTracerLlama.startTimes.delete(evalID + '_' + label)
EvaluationRunTracerLlama.startTimes.delete(evalID + '_outputTokens')
EvaluationRunTracerLlama.startTimes.delete(evalID + '_promptTokens')
EvaluationRunTracerLlama.models.delete(evalID)
}
static async injectEvaluationMetadata(nodeData: INodeData, options: ICommonObject, callerObj: any) {
if (options.evaluationRunId && callerObj) {
// these are needed for evaluation runs
options.llamaIndex = true
await additionalCallbacks(nodeData, options)
Object.defineProperty(callerObj, 'evaluationRunId', {
enumerable: true,
configurable: true,
writable: true,
value: options.evaluationRunId
})
}
}
}
// from https://github.com/run-llama/LlamaIndexTS/blob/main/packages/core/src/llm/utils.ts
export function extractText(message: MessageContent): string {
if (typeof message !== 'string' && !Array.isArray(message)) {
console.warn('extractText called with non-MessageContent message, this is likely a bug.')
return `${message}`
} else if (typeof message !== 'string' && Array.isArray(message)) {
// message is of type MessageContentDetail[] - retrieve just the text parts and concatenate them
// so we can pass them to the context generator
return message
.filter((c): c is MessageContentTextDetail => c.type === 'text')
.map((c) => c.text)
.join('\n\n')
} else {
return message
}
}
@@ -0,0 +1,172 @@
import axios from 'axios'
import { v4 as uuidv4 } from 'uuid'
import { ICommonObject } from '../src'
import { getModelConfigByModelName, MODEL_TYPE } from '../src/modelLoader'
export class EvaluationRunner {
static metrics = new Map<string, string[]>()
static async getAndDeleteMetrics(id: string) {
const val = EvaluationRunner.metrics.get(id)
if (val) {
try {
//first lets get the provider and model
let selectedModel = undefined
let selectedProvider = undefined
if (val && val.length > 0) {
let modelName = ''
let providerName = ''
for (let i = 0; i < val.length; i++) {
const metric = val[i]
if (typeof metric === 'object') {
modelName = metric['model']
providerName = metric['provider']
} else {
modelName = JSON.parse(metric)['model']
providerName = JSON.parse(metric)['provider']
}
if (modelName) {
selectedModel = modelName
}
if (providerName) {
selectedProvider = providerName
}
}
}
let modelConfig = await getModelConfigByModelName(MODEL_TYPE.CHAT, selectedProvider, selectedModel)
if (modelConfig) {
val.push(JSON.stringify({ cost_values: modelConfig }))
} else {
modelConfig = await getModelConfigByModelName(MODEL_TYPE.LLM, selectedProvider, selectedModel)
if (modelConfig) {
val.push(JSON.stringify({ cost_values: modelConfig }))
}
}
} catch (error) {
//stay silent
}
}
EvaluationRunner.metrics.delete(id)
return val
}
static addMetrics(id: string, metric: string) {
if (EvaluationRunner.metrics.has(id)) {
EvaluationRunner.metrics.get(id)?.push(metric)
} else {
EvaluationRunner.metrics.set(id, [metric])
}
}
baseURL = ''
constructor(baseURL: string) {
this.baseURL = baseURL
}
getChatflowApiKey(chatflowId: string, apiKeys: { chatflowId: string; apiKey: string }[] = []) {
return apiKeys.find((item) => item.chatflowId === chatflowId)?.apiKey || ''
}
public async runEvaluations(data: ICommonObject) {
const chatflowIds = JSON.parse(data.chatflowId)
const returnData: ICommonObject = {}
returnData.evaluationId = data.evaluationId
returnData.runDate = new Date()
returnData.rows = []
for (let i = 0; i < data.dataset.rows.length; i++) {
returnData.rows.push({
input: data.dataset.rows[i].input,
expectedOutput: data.dataset.rows[i].output,
itemNo: data.dataset.rows[i].sequenceNo,
evaluations: [],
status: 'pending'
})
}
for (let i = 0; i < chatflowIds.length; i++) {
const chatflowId = chatflowIds[i]
await this.evaluateChatflow(chatflowId, this.getChatflowApiKey(chatflowId, data.apiKeys), data, returnData)
}
return returnData
}
async evaluateChatflow(chatflowId: string, apiKey: string, data: any, returnData: any) {
for (let i = 0; i < data.dataset.rows.length; i++) {
const item = data.dataset.rows[i]
const uuid = uuidv4()
const headers: any = {
'X-Request-ID': uuid,
'X-Flowise-Evaluation': 'true'
}
if (apiKey) {
headers['Authorization'] = `Bearer ${apiKey}`
}
let axiosConfig = {
headers: headers
}
let startTime = performance.now()
const runData: any = {}
runData.chatflowId = chatflowId
runData.startTime = startTime
const postData: any = { question: item.input, evaluationRunId: uuid, evaluation: true }
if (data.sessionId) {
postData.overrideConfig = { sessionId: data.sessionId }
}
try {
let response = await axios.post(`${this.baseURL}/api/v1/prediction/${chatflowId}`, postData, axiosConfig)
const endTime = performance.now()
const timeTaken = (endTime - startTime).toFixed(2)
if (response?.data?.metrics) {
runData.metrics = response.data.metrics
runData.metrics.push({
apiLatency: timeTaken
})
} else {
runData.metrics = [
{
apiLatency: timeTaken
}
]
}
runData.status = 'complete'
let resultText = ''
if (response.data.text) resultText = response.data.text
else if (response.data.json) resultText = '```json\n' + JSON.stringify(response.data.json, null, 2)
else resultText = JSON.stringify(response.data, null, 2)
runData.actualOutput = resultText
runData.latency = timeTaken
runData.error = ''
} catch (error: any) {
runData.status = 'error'
runData.actualOutput = ''
runData.error = error?.response?.data?.message
? error.response.data.message
: error?.message
? error.message
: 'Unknown error'
try {
if (runData.error.indexOf('-') > -1) {
// if there is a dash, remove all content before
runData.error = 'Error: ' + runData.error.substr(runData.error.indexOf('-') + 1).trim()
}
} catch (error) {
//stay silent
}
const endTime = performance.now()
const timeTaken = (endTime - startTime).toFixed(2)
runData.metrics = [
{
apiLatency: timeTaken
}
]
runData.latency = timeTaken
}
runData.uuid = uuid
returnData.rows[i].evaluations.push(runData)
}
return returnData
}
}