From 7dfa269502705060cd60295034bd6b20f103852b Mon Sep 17 00:00:00 2001 From: Ahmed Rowaihi Date: Sat, 7 Jun 2025 02:19:02 +0300 Subject: [PATCH] fix(components/weaviate): fix metadata special chars upsertion failure (#4436) * Enhancement: Add recursive key normalization for metadata in Weaviate vector store - Introduced `normalizeKeysRecursively` utility to standardize metadata keys. - Updated Weaviate vector store to apply normalization on document metadata before processing. * format(compnonents/utils): format for ci * Update utils.ts --------- Co-authored-by: Henry Heng --- .../nodes/vectorstores/Weaviate/Weaviate.ts | 8 +++-- packages/components/src/utils.ts | 29 +++++++++++++++++++ 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/packages/components/nodes/vectorstores/Weaviate/Weaviate.ts b/packages/components/nodes/vectorstores/Weaviate/Weaviate.ts index ae2c0164..5d83eaa9 100644 --- a/packages/components/nodes/vectorstores/Weaviate/Weaviate.ts +++ b/packages/components/nodes/vectorstores/Weaviate/Weaviate.ts @@ -4,7 +4,7 @@ import { WeaviateLibArgs, WeaviateStore } from '@langchain/weaviate' import { Document } from '@langchain/core/documents' import { Embeddings } from '@langchain/core/embeddings' import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams, IndexingResult } from '../../../src/Interface' -import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../src/utils' +import { getBaseClasses, getCredentialData, getCredentialParam, normalizeKeysRecursively } from '../../../src/utils' import { addMMRInputParams, resolveVectorStoreOrRetriever } from '../VectorStoreUtils' import { index } from '../../../src/indexing' import { VectorStore } from '@langchain/core/vectorstores' @@ -175,7 +175,11 @@ class Weaviate_VectorStores implements INode { const finalDocs = [] for (let i = 0; i < flattenDocs.length; i += 1) { if (flattenDocs[i] && flattenDocs[i].pageContent) { - finalDocs.push(new Document(flattenDocs[i])) + const doc = { ...flattenDocs[i] } + if (doc.metadata) { + doc.metadata = normalizeKeysRecursively(doc.metadata) + } + finalDocs.push(new Document(doc)) } } diff --git a/packages/components/src/utils.ts b/packages/components/src/utils.ts index c8e9fe00..5a62f93c 100644 --- a/packages/components/src/utils.ts +++ b/packages/components/src/utils.ts @@ -1216,6 +1216,35 @@ export const handleDocumentLoaderDocuments = async (loader: DocumentLoader, text return docs } +/** + * Normalize special characters in key to be used in vector store + * @param str - Key to normalize + * @returns Normalized key + */ +export const normalizeSpecialChars = (str: string) => { + return str.replace(/[^a-zA-Z0-9_]/g, '_') +} + +/** + * recursively normalize object keys + * @param data - Object to normalize + * @returns Normalized object + */ +export const normalizeKeysRecursively = (data: any): any => { + if (Array.isArray(data)) { + return data.map(normalizeKeysRecursively) + } + + if (data !== null && typeof data === 'object') { + return Object.entries(data).reduce((acc, [key, value]) => { + const newKey = normalizeSpecialChars(key) + acc[newKey] = normalizeKeysRecursively(value) + return acc + }, {} as Record) + } + return data +} + /** * Check if OAuth2 token is expired and refresh if needed * @param {string} credentialId