Bugfix/Chroma Metadata (#5552)

* update Chroma metadata handling with sanitization function

* Update packages/components/nodes/vectorstores/Chroma/core.ts

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>

---------

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
Henry Heng
2025-12-05 12:55:45 +00:00
committed by GitHub
parent 47de40761f
commit 5cdaf3c494
@@ -5,6 +5,7 @@ import type {
Collection, Collection,
CollectionConfiguration, CollectionConfiguration,
CollectionMetadata, CollectionMetadata,
Metadata,
Where Where
} from 'chromadb' } from 'chromadb'
@@ -145,7 +146,7 @@ export class Chroma extends VectorStore {
const documentIds = options?.ids ?? Array.from({ length: vectors.length }, () => uuid.v1()) const documentIds = options?.ids ?? Array.from({ length: vectors.length }, () => uuid.v1())
const collection = await this.ensureCollection() const collection = await this.ensureCollection()
const mappedMetadatas = documents.map(({ metadata }) => { const mappedMetadatas: Metadata[] = documents.map(({ metadata }) => {
let locFrom let locFrom
let locTo let locTo
@@ -162,7 +163,7 @@ export class Chroma extends VectorStore {
if (newMetadata.loc) delete newMetadata.loc if (newMetadata.loc) delete newMetadata.loc
return newMetadata return sanitizeMetadata(newMetadata)
}) })
await collection.upsert({ await collection.upsert({
@@ -343,3 +344,27 @@ function ensureCollectionName(collectionName?: string) {
} }
return collectionName return collectionName
} }
/**
* Sanitizes metadata to only include Chroma-compatible primitive values.
* Chroma metadata only supports boolean, number, string, and null values.
* Arrays and objects are JSON stringified to preserve the data.
*/
function sanitizeMetadata(metadata: Document['metadata']): Metadata {
const sanitized: Metadata = {}
for (const [key, value] of Object.entries(metadata)) {
if (value === null || typeof value === 'boolean' || typeof value === 'number' || typeof value === 'string') {
sanitized[key] = value
} else if (value !== undefined) {
try {
const stringified = JSON.stringify(value)
if (stringified !== undefined) {
sanitized[key] = stringified
}
} catch {
// Skip values that cannot be stringified (e.g. circular references)
}
}
}
return sanitized
}