Bugfix/supabase upsert ids (#2561)

* fix upserting same id with supabase

* remove dedicated addvectors logic for ids

* Update pnpm-lock.yaml

* add fix for null id column
This commit is contained in:
Henry Heng
2024-06-03 13:09:59 +01:00
committed by GitHub
parent f2a0ffe542
commit 272fd914bd
@@ -1,4 +1,5 @@
import { flatten } from 'lodash' import { flatten } from 'lodash'
import { v4 as uuidv4 } from 'uuid'
import { createClient } from '@supabase/supabase-js' import { createClient } from '@supabase/supabase-js'
import { Document } from '@langchain/core/documents' import { Document } from '@langchain/core/documents'
import { Embeddings } from '@langchain/core/embeddings' import { Embeddings } from '@langchain/core/embeddings'
@@ -213,7 +214,7 @@ class Supabase_VectorStores implements INode {
} }
class SupabaseUpsertVectorStore extends SupabaseVectorStore { class SupabaseUpsertVectorStore extends SupabaseVectorStore {
async addVectors(vectors: number[][], documents: Document[]): Promise<string[]> { async addVectors(vectors: number[][], documents: Document[], options?: { ids?: string[] | number[] }): Promise<string[]> {
if (vectors.length === 0) { if (vectors.length === 0) {
return [] return []
} }
@@ -223,23 +224,36 @@ class SupabaseUpsertVectorStore extends SupabaseVectorStore {
metadata: documents[idx].metadata metadata: documents[idx].metadata
})) }))
let idx = 0
const { count } = await this.client.from(this.tableName).select('*', { count: 'exact', head: true })
if (count) {
idx = count
}
let returnedIds: string[] = [] let returnedIds: string[] = []
for (let i = 0; i < rows.length; i += this.upsertBatchSize) { for (let i = 0; i < rows.length; i += this.upsertBatchSize) {
const chunk = rows.slice(i, i + this.upsertBatchSize).map((row) => { const chunk = rows.slice(i, i + this.upsertBatchSize).map((row, j) => {
idx = idx += 1 if (options?.ids) {
return { id: idx, ...row } return { id: options.ids[i + j], ...row }
}
return row
}) })
const res = await this.client.from(this.tableName).upsert(chunk).select() let res = await this.client.from(this.tableName).upsert(chunk).select()
if (res.error) {
// If the error is due to null value in column "id", we will generate a new id for the row
if (res.error.message.includes(`null value in column "id"`)) {
const chunk = rows.slice(i, i + this.upsertBatchSize).map((row, y) => {
if (options?.ids) {
return { id: options.ids[i + y], ...row }
}
return { id: uuidv4(), ...row }
})
res = await this.client.from(this.tableName).upsert(chunk).select()
if (res.error) { if (res.error) {
throw new Error(`Error inserting: ${res.error.message} ${res.status} ${res.statusText}`) throw new Error(`Error inserting: ${res.error.message} ${res.status} ${res.statusText}`)
} }
} else {
throw new Error(`Error inserting: ${res.error.message} ${res.status} ${res.statusText}`)
}
}
if (res.data) { if (res.data) {
returnedIds = returnedIds.concat(res.data.map((row) => row.id)) returnedIds = returnedIds.concat(res.data.map((row) => row.id))
} }