Bugfix/Prevent duplicated metrics on prometheus and opentel (#4269)

prevent duplicated metrics on prometheus and opentel
This commit is contained in:
Henry Heng
2025-04-08 17:01:26 +08:00
committed by GitHub
parent f963e5aa48
commit 3098c8e75f
2 changed files with 287 additions and 129 deletions
+107 -7
View File
@@ -6,6 +6,9 @@ import { diag, DiagLogLevel, DiagConsoleLogger, Attributes, Counter } from '@ope
import { getVersion } from 'flowise-components' import { getVersion } from 'flowise-components'
import express from 'express' import express from 'express'
// Create a static map to track created metrics and prevent duplicates
const createdMetrics = new Map<string, boolean>()
export class OpenTelemetry implements IMetricsProvider { export class OpenTelemetry implements IMetricsProvider {
private app: express.Application private app: express.Application
private resource: Resource private resource: Resource
@@ -30,6 +33,9 @@ export class OpenTelemetry implements IMetricsProvider {
if (process.env.METRICS_OPEN_TELEMETRY_DEBUG === 'true') { if (process.env.METRICS_OPEN_TELEMETRY_DEBUG === 'true') {
diag.setLogger(new DiagConsoleLogger(), DiagLogLevel.DEBUG) diag.setLogger(new DiagConsoleLogger(), DiagLogLevel.DEBUG)
} }
// Clear metrics tracking on new instance
createdMetrics.clear()
} }
public getName(): string { public getName(): string {
@@ -37,6 +43,7 @@ export class OpenTelemetry implements IMetricsProvider {
} }
async initializeCounters(): Promise<void> { async initializeCounters(): Promise<void> {
try {
// Define the resource with the service name for trace grouping // Define the resource with the service name for trace grouping
const flowiseVersion = await getVersion() const flowiseVersion = await getVersion()
@@ -59,14 +66,42 @@ export class OpenTelemetry implements IMetricsProvider {
process.exit(1) // Exit if invalid protocol type is specified process.exit(1) // Exit if invalid protocol type is specified
} }
// Handle any existing metric exporter
if (this.otlpMetricExporter) {
try {
await this.otlpMetricExporter.shutdown()
} catch (error) {
// Ignore shutdown errors
}
}
this.otlpMetricExporter = new OTLPMetricExporter({ this.otlpMetricExporter = new OTLPMetricExporter({
url: process.env.METRICS_OPEN_TELEMETRY_METRIC_ENDPOINT // OTLP endpoint for metrics url: process.env.METRICS_OPEN_TELEMETRY_METRIC_ENDPOINT // OTLP endpoint for metrics
}) })
// Clean up any existing metric reader
if (this.metricReader) {
try {
await this.metricReader.shutdown()
} catch (error) {
// Ignore shutdown errors
}
}
this.metricReader = new PeriodicExportingMetricReader({ this.metricReader = new PeriodicExportingMetricReader({
exporter: this.otlpMetricExporter, exporter: this.otlpMetricExporter,
exportIntervalMillis: 5000 // Export metrics every 5 seconds exportIntervalMillis: 5000 // Export metrics every 5 seconds
}) })
// Clean up any existing meter provider
if (this.meterProvider) {
try {
await this.meterProvider.shutdown()
} catch (error) {
// Ignore shutdown errors
}
}
this.meterProvider = new MeterProvider({ resource: this.resource, readers: [this.metricReader] }) this.meterProvider = new MeterProvider({ resource: this.resource, readers: [this.metricReader] })
const meter = this.meterProvider.getMeter('flowise-metrics') const meter = this.meterProvider.getMeter('flowise-metrics')
@@ -74,6 +109,9 @@ export class OpenTelemetry implements IMetricsProvider {
// for each counter in the enum, create a new promClient.Counter and add it to the registry // for each counter in the enum, create a new promClient.Counter and add it to the registry
const enumEntries = Object.entries(FLOWISE_METRIC_COUNTERS) const enumEntries = Object.entries(FLOWISE_METRIC_COUNTERS)
enumEntries.forEach(([name, value]) => { enumEntries.forEach(([name, value]) => {
try {
// Check if we've already created this metric
if (!createdMetrics.has(value)) {
// derive proper counter name from the enum value (chatflow_created = Chatflow Created) // derive proper counter name from the enum value (chatflow_created = Chatflow Created)
const properCounterName: string = name.replace(/_/g, ' ').replace(/\b\w/g, (l) => l.toUpperCase()) const properCounterName: string = name.replace(/_/g, ' ').replace(/\b\w/g, (l) => l.toUpperCase())
this.counters.set( this.counters.set(
@@ -82,51 +120,102 @@ export class OpenTelemetry implements IMetricsProvider {
description: properCounterName description: properCounterName
}) })
) )
createdMetrics.set(value, true)
}
} catch (error) {
// Log error but continue with other metrics
console.error(`Error creating metric ${value}:`, error)
}
}) })
// in addition to the enum counters, add a few more custom counters try {
// Add version gauge if not already created
if (!createdMetrics.has('flowise_version')) {
const versionGuage = meter.createGauge('flowise_version', { const versionGuage = meter.createGauge('flowise_version', {
description: 'Flowise version' description: 'Flowise version'
}) })
// remove the last dot from the version string, e.g. 2.1.3 -> 2.13 (guage needs a number - float) // remove the last dot from the version string, e.g. 2.1.3 -> 2.13 (gauge needs a number - float)
const formattedVersion = flowiseVersion.version.replace(/\.(\d+)$/, '$1') const formattedVersion = flowiseVersion.version.replace(/\.(\d+)$/, '$1')
versionGuage.record(parseFloat(formattedVersion)) versionGuage.record(parseFloat(formattedVersion))
createdMetrics.set('flowise_version', true)
}
} catch (error) {
console.error('Error creating version gauge:', error)
}
// Counter for HTTP requests with method, path, and status as labels try {
// HTTP requests counter
if (!createdMetrics.has('http_requests_total')) {
this.httpRequestCounter = meter.createCounter('http_requests_total', { this.httpRequestCounter = meter.createCounter('http_requests_total', {
description: 'Counts the number of HTTP requests received' description: 'Counts the number of HTTP requests received'
}) })
createdMetrics.set('http_requests_total', true)
}
} catch (error) {
console.error('Error creating HTTP request counter:', error)
}
// Histogram to measure HTTP request duration in milliseconds try {
// HTTP request duration histogram
if (!createdMetrics.has('http_request_duration_ms')) {
this.httpRequestDuration = meter.createHistogram('http_request_duration_ms', { this.httpRequestDuration = meter.createHistogram('http_request_duration_ms', {
description: 'Records the duration of HTTP requests in ms' description: 'Records the duration of HTTP requests in ms'
}) })
createdMetrics.set('http_request_duration_ms', true)
}
} catch (error) {
console.error('Error creating HTTP request duration histogram:', error)
}
await this.setupMetricsEndpoint()
} catch (error) {
console.error('Error initializing OpenTelemetry metrics:', error)
// Don't throw - allow app to continue without metrics
}
} }
// Function to record HTTP request duration // Function to record HTTP request duration
private recordHttpRequestDuration(durationMs: number, method: string, path: string, status: number) { private recordHttpRequestDuration(durationMs: number, method: string, path: string, status: number) {
try {
if (this.httpRequestDuration) {
this.httpRequestDuration.record(durationMs, { this.httpRequestDuration.record(durationMs, {
method, method,
path, path,
status: status.toString() status: status.toString()
}) })
} }
} catch (error) {
// Log error but don't crash the application
console.error('Error recording HTTP request duration:', error)
}
}
// Function to record HTTP requests with specific labels // Function to record HTTP requests with specific labels
private recordHttpRequest(method: string, path: string, status: number) { private recordHttpRequest(method: string, path: string, status: number) {
try {
if (this.httpRequestCounter) {
this.httpRequestCounter.add(1, { this.httpRequestCounter.add(1, {
method, method,
path, path,
status: status.toString() status: status.toString()
}) })
} }
} catch (error) {
// Log error but don't crash the application
console.error('Error recording HTTP request:', error)
}
}
async setupMetricsEndpoint(): Promise<void> { async setupMetricsEndpoint(): Promise<void> {
try {
// Graceful shutdown for telemetry data flushing // Graceful shutdown for telemetry data flushing
process.on('SIGTERM', async () => { process.on('SIGTERM', async () => {
await this.metricReader.shutdown() try {
await this.meterProvider.shutdown() if (this.metricReader) await this.metricReader.shutdown()
if (this.meterProvider) await this.meterProvider.shutdown()
} catch (error) {
console.error('Error during metrics shutdown:', error)
}
}) })
// Runs before each requests // Runs before each requests
@@ -138,20 +227,31 @@ export class OpenTelemetry implements IMetricsProvider {
// Runs after each requests // Runs after each requests
this.app.use((req, res, next) => { this.app.use((req, res, next) => {
res.on('finish', async () => { res.on('finish', async () => {
try {
if (res.locals.startEpoch) { if (res.locals.startEpoch) {
const responseTimeInMs = Date.now() - res.locals.startEpoch const responseTimeInMs = Date.now() - res.locals.startEpoch
this.recordHttpRequest(req.method, req.path, res.statusCode) this.recordHttpRequest(req.method, req.path, res.statusCode)
this.recordHttpRequestDuration(responseTimeInMs, req.method, req.path, res.statusCode) this.recordHttpRequestDuration(responseTimeInMs, req.method, req.path, res.statusCode)
} }
} catch (error) {
console.error('Error in metrics middleware:', error)
}
}) })
next() next()
}) })
} catch (error) {
console.error('Error setting up metrics endpoint:', error)
}
} }
async incrementCounter(counter: string, payload: any): Promise<void> { async incrementCounter(counter: string, payload: any): Promise<void> {
try {
// Increment OpenTelemetry counter with the payload // Increment OpenTelemetry counter with the payload
if (this.counters.has(counter)) { if (this.counters.has(counter)) {
;(this.counters.get(counter) as Counter<Attributes>).add(1, payload) ;(this.counters.get(counter) as Counter<Attributes>).add(1, payload)
} }
} catch (error) {
console.error(`Error incrementing counter ${counter}:`, error)
}
} }
} }
+65 -7
View File
@@ -12,6 +12,9 @@ export class Prometheus implements IMetricsProvider {
constructor(app: express.Application) { constructor(app: express.Application) {
this.app = app this.app = app
// Clear any existing default registry metrics to avoid conflicts
promClient.register.clear()
// Create a separate registry for our metrics
this.register = new promClient.Registry() this.register = new promClient.Registry()
} }
@@ -27,48 +30,87 @@ export class Prometheus implements IMetricsProvider {
// look at the FLOWISE_COUNTER enum in Interface.Metrics.ts and get all values // look at the FLOWISE_COUNTER enum in Interface.Metrics.ts and get all values
// for each counter in the enum, create a new promClient.Counter and add it to the registry // for each counter in the enum, create a new promClient.Counter and add it to the registry
this.counters = new Map<string, promClient.Counter<string>>() this.counters = new Map<string, promClient.Counter<string> | promClient.Gauge<string> | promClient.Histogram<string>>()
const enumEntries = Object.entries(FLOWISE_METRIC_COUNTERS) const enumEntries = Object.entries(FLOWISE_METRIC_COUNTERS)
enumEntries.forEach(([name, value]) => { enumEntries.forEach(([name, value]) => {
// derive proper counter name from the enum value (chatflow_created = Chatflow Created) // derive proper counter name from the enum value (chatflow_created = Chatflow Created)
const properCounterName: string = name.replace(/_/g, ' ').replace(/\b\w/g, (l) => l.toUpperCase()) const properCounterName: string = name.replace(/_/g, ' ').replace(/\b\w/g, (l) => l.toUpperCase())
try {
this.counters.set( this.counters.set(
value, value,
new promClient.Counter({ new promClient.Counter({
name: value, name: value,
help: `Total number of ${properCounterName}`, help: `Total number of ${properCounterName}`,
labelNames: ['status'] labelNames: ['status'],
registers: [this.register] // Explicitly set the registry
}) })
) )
} catch (error) {
// If metric already exists, get it from the registry instead
const existingMetrics = this.register.getSingleMetric(value)
if (existingMetrics) {
this.counters.set(value, existingMetrics as promClient.Counter<string>)
}
}
}) })
// in addition to the enum counters, add a few more custom counters // in addition to the enum counters, add a few more custom counters
// version, http_request_duration_ms, http_requests_total // version, http_request_duration_ms, http_requests_total
try {
const versionGaugeCounter = new promClient.Gauge({ const versionGaugeCounter = new promClient.Gauge({
name: 'flowise_version_info', name: 'flowise_version_info',
help: 'Flowise version info.', help: 'Flowise version info.',
labelNames: ['version'] labelNames: ['version'],
registers: [this.register] // Explicitly set the registry
}) })
const { version } = await getVersion() const { version } = await getVersion()
versionGaugeCounter.set({ version: 'v' + version }, 1) versionGaugeCounter.set({ version: 'v' + version }, 1)
this.counters.set('flowise_version', versionGaugeCounter) this.counters.set('flowise_version', versionGaugeCounter)
} catch (error) {
// If metric already exists, get it from the registry
const existingMetric = this.register.getSingleMetric('flowise_version')
if (existingMetric) {
this.counters.set('flowise_version', existingMetric as promClient.Gauge<string>)
}
}
try {
this.httpRequestDurationMicroseconds = new promClient.Histogram({ this.httpRequestDurationMicroseconds = new promClient.Histogram({
name: 'http_request_duration_ms', name: 'http_request_duration_ms',
help: 'Duration of HTTP requests in ms', help: 'Duration of HTTP requests in ms',
labelNames: ['method', 'route', 'code'], labelNames: ['method', 'route', 'code'],
buckets: [1, 5, 15, 50, 100, 200, 300, 400, 500] // buckets for response time from 0.1ms to 500ms buckets: [1, 5, 15, 50, 100, 200, 300, 400, 500], // buckets for response time from 0.1ms to 500ms
registers: [this.register] // Explicitly set the registry
}) })
this.counters.set('http_request_duration_ms', this.httpRequestDurationMicroseconds) this.counters.set('http_request_duration_ms', this.httpRequestDurationMicroseconds)
} catch (error) {
// If metric already exists, get it from the registry
const existingMetric = this.register.getSingleMetric('http_request_duration_ms')
if (existingMetric) {
this.httpRequestDurationMicroseconds = existingMetric as Histogram<string>
this.counters.set('http_request_duration_ms', this.httpRequestDurationMicroseconds)
}
}
try {
this.requestCounter = new Counter({ this.requestCounter = new Counter({
name: 'http_requests_total', name: 'http_requests_total',
help: 'Total number of HTTP requests', help: 'Total number of HTTP requests',
labelNames: ['method', 'path', 'status'] labelNames: ['method', 'path', 'status'],
registers: [this.register] // Explicitly set the registry
}) })
this.counters.set('http_requests_total', this.requestCounter) this.counters.set('http_requests_total', this.requestCounter)
} catch (error) {
// If metric already exists, get it from the registry
const existingMetric = this.register.getSingleMetric('http_requests_total')
if (existingMetric) {
this.requestCounter = existingMetric as Counter<string>
this.counters.set('http_requests_total', this.requestCounter)
}
}
// Only register metrics that aren't already in the registry
this.registerMetrics() this.registerMetrics()
await this.setupMetricsEndpoint() await this.setupMetricsEndpoint()
} }
@@ -111,12 +153,28 @@ export class Prometheus implements IMetricsProvider {
private registerMetrics() { private registerMetrics() {
if (process.env.METRICS_INCLUDE_NODE_METRICS !== 'false') { if (process.env.METRICS_INCLUDE_NODE_METRICS !== 'false') {
// Clear any existing default metrics to avoid conflicts
promClient.register.clear()
// enable default metrics like CPU usage, memory usage, etc. // enable default metrics like CPU usage, memory usage, etc.
promClient.collectDefaultMetrics({ register: this.register }) // and ensure they're only registered with our custom registry
promClient.collectDefaultMetrics({
register: this.register,
prefix: 'flowise_' // Add a prefix to avoid conflicts
})
} }
// Add our custom metrics to the registry
// Add only the custom metrics that haven't been registered yet
for (const counter of this.counters.values()) { for (const counter of this.counters.values()) {
try {
// Type assertion to access the name property
const metricName = (counter as any).name
if (!this.register.getSingleMetric(metricName)) {
this.register.registerMetric(counter) this.register.registerMetric(counter)
} }
} catch (error) {
// If we can't register the metric, it probably already exists
// Just continue with the next one
}
}
} }
} }