Feature/DocumentStore (#2106)

* datasource: initial commit

* datasource: datasource details and chunks

* datasource: Document Store Node

* more changes

* Document Store - Base functionality

* Document Store Loader Component

* Document Store Loader Component

* before merging the modularity PR

* after merging the modularity PR

* preview mode

* initial draft PR

* fixes

* minor updates and  fixes

* preview with loader and splitter

* preview with credential

* show stored chunks

* preview update...

* edit config

* save, preview and other changes

* save, preview and other changes

* save, process and other changes

* save, process and other changes

* alpha1 - for internal testing

* rerouting urls

* bug fix on new leader create

* pagination support for chunks

* delete document store

* Update pnpm-lock.yaml

* doc store card view

* Update store files to use updated storage functions, Document Store Table View and other changes

* ui changes

* add expanded chunk dialog, improve ui

* change throw Error to InternalError

* Bug Fixes and removal of subFolder, adding of view chunks for store

* lint fixes

* merge changes

* DocumentStoreStatus component

* ui changes for doc store

* add remove metadata key field, add custom document loader

* add chatflows used doc store chips

* add types/interfaces to DocumentStore Services

* document loader list dialog title bar color change

* update interfaces

* Whereused Chatflow Name and Added chunkNo to retain order of created chunks.

* use typeorm order chunkNo, ui changes

---------

Co-authored-by: Henry <hzj94@hotmail.com>
Co-authored-by: Henry Heng <henryheng@flowiseai.com>
This commit is contained in:
Vinod Kiran
2024-05-06 19:53:27 +05:30
committed by GitHub
parent af4e28aa91
commit 40e36d1b39
91 changed files with 38713 additions and 32791 deletions
@@ -0,0 +1,165 @@
import { DocumentStore } from './database/entities/DocumentStore'
export enum DocumentStoreStatus {
EMPTY_SYNC = 'EMPTY',
SYNC = 'SYNC',
SYNCING = 'SYNCING',
STALE = 'STALE',
NEW = 'NEW'
}
export interface IDocumentStore {
id: string
name: string
description: string
loaders: string // JSON string
whereUsed: string // JSON string
updatedDate: Date
createdDate: Date
status: DocumentStoreStatus
}
export interface IDocumentStoreFileChunk {
id: string
chunkNo: number
docId: string
storeId: string
pageContent: string
metadata: string
}
export interface IDocumentStoreFileChunkPagedResponse {
chunks: IDocumentStoreFileChunk[]
count: number
file?: IDocumentStoreLoader
currentPage: number
storeName: string
description: string
}
export interface IDocumentStoreLoader {
id: string
loaderId: string
loaderName: string
loaderConfig: any // JSON string
splitterId: string
splitterName: string
splitterConfig: any // JSON string
totalChunks: number
totalChars: number
status: DocumentStoreStatus
storeId?: string
files?: IDocumentStoreLoaderFile[]
source?: string
credential?: string
}
export interface IDocumentStoreLoaderForPreview extends IDocumentStoreLoader {
rehydrated: boolean
preview: boolean
previewChunkCount: number
}
export interface IDocumentStoreLoaderFile {
id: string
name: string
mimePrefix: string
size: number
status: DocumentStoreStatus
uploaded: Date
}
export interface IDocumentStoreWhereUsed {
id: string
name: string
}
export class DocumentStoreDTO {
id: string
name: string
description: string
files: IDocumentStoreLoaderFile[]
whereUsed: IDocumentStoreWhereUsed[]
createdDate: Date
updatedDate: Date
status: DocumentStoreStatus
chunkOverlap: number
splitter: string
totalChunks: number
totalChars: number
chunkSize: number
loaders: IDocumentStoreLoader[]
constructor() {}
static fromEntity(entity: DocumentStore): DocumentStoreDTO {
let documentStoreDTO = new DocumentStoreDTO()
Object.assign(documentStoreDTO, entity)
documentStoreDTO.id = entity.id
documentStoreDTO.name = entity.name
documentStoreDTO.description = entity.description
documentStoreDTO.status = entity.status
documentStoreDTO.totalChars = 0
documentStoreDTO.totalChunks = 0
if (entity.whereUsed) {
documentStoreDTO.whereUsed = JSON.parse(entity.whereUsed)
} else {
documentStoreDTO.whereUsed = []
}
if (entity.loaders) {
documentStoreDTO.loaders = JSON.parse(entity.loaders)
documentStoreDTO.loaders.map((loader) => {
documentStoreDTO.totalChars += loader.totalChars
documentStoreDTO.totalChunks += loader.totalChunks
switch (loader.loaderId) {
case 'pdfFile':
loader.source = loader.loaderConfig.pdfFile.replace('FILE-STORAGE::', '')
break
case 'apiLoader':
loader.source = loader.loaderConfig.url + ' (' + loader.loaderConfig.method + ')'
break
case 'cheerioWebScraper':
loader.source = loader.loaderConfig.url
break
case 'jsonFile':
loader.source = loader.loaderConfig.jsonFile.replace('FILE-STORAGE::', '')
break
case 'docxFile':
loader.source = loader.loaderConfig.docxFile.replace('FILE-STORAGE::', '')
break
case 'textFile':
loader.source = loader.loaderConfig.txtFile.replace('FILE-STORAGE::', '')
break
case 'unstructuredFileLoader':
loader.source = loader.loaderConfig.filePath
break
default:
loader.source = 'None'
break
}
if (loader.status !== 'SYNC') {
documentStoreDTO.status = DocumentStoreStatus.STALE
}
})
}
return documentStoreDTO
}
static fromEntities(entities: DocumentStore[]): DocumentStoreDTO[] {
return entities.map((entity) => this.fromEntity(entity))
}
static toEntity(body: any): DocumentStore {
const docStore = new DocumentStore()
Object.assign(docStore, body)
docStore.loaders = '[]'
docStore.whereUsed = '[]'
// when a new document store is created, it is empty and in sync
docStore.status = DocumentStoreStatus.EMPTY_SYNC
return docStore
}
}
+3
View File
@@ -253,3 +253,6 @@ export interface IUploadFileSizeAndTypes {
fileTypes: string[]
maxUploadSize: number
}
// DocumentStore related
export * from './Interface.DocumentStore'
@@ -0,0 +1,263 @@
import { NextFunction, Request, Response } from 'express'
import { StatusCodes } from 'http-status-codes'
import documentStoreService from '../../services/documentstore'
import { DocumentStore } from '../../database/entities/DocumentStore'
import { InternalFlowiseError } from '../../errors/internalFlowiseError'
import { DocumentStoreDTO } from '../../Interface'
const createDocumentStore = async (req: Request, res: Response, next: NextFunction) => {
try {
if (typeof req.body === 'undefined') {
throw new InternalFlowiseError(
StatusCodes.PRECONDITION_FAILED,
`Error: documentStoreController.createDocumentStore - body not provided!`
)
}
const body = req.body
const docStore = DocumentStoreDTO.toEntity(body)
const apiResponse = await documentStoreService.createDocumentStore(docStore)
return res.json(apiResponse)
} catch (error) {
next(error)
}
}
const getAllDocumentStores = async (req: Request, res: Response, next: NextFunction) => {
try {
const apiResponse = await documentStoreService.getAllDocumentStores()
return res.json(DocumentStoreDTO.fromEntities(apiResponse))
} catch (error) {
next(error)
}
}
const deleteLoaderFromDocumentStore = async (req: Request, res: Response, next: NextFunction) => {
try {
const storeId = req.params.id
const loaderId = req.params.loaderId
if (!storeId || !loaderId) {
throw new InternalFlowiseError(
StatusCodes.PRECONDITION_FAILED,
`Error: documentStoreController.deleteLoaderFromDocumentStore - missing storeId or loaderId.`
)
}
const apiResponse = await documentStoreService.deleteLoaderFromDocumentStore(storeId, loaderId)
return res.json(DocumentStoreDTO.fromEntity(apiResponse))
} catch (error) {
next(error)
}
}
const getDocumentStoreById = async (req: Request, res: Response, next: NextFunction) => {
try {
if (typeof req.params.id === 'undefined' || req.params.id === '') {
throw new InternalFlowiseError(
StatusCodes.PRECONDITION_FAILED,
`Error: documentStoreController.getDocumentStoreById - id not provided!`
)
}
const apiResponse = await documentStoreService.getDocumentStoreById(req.params.id)
if (apiResponse && apiResponse.whereUsed) {
apiResponse.whereUsed = JSON.stringify(await documentStoreService.getUsedChatflowNames(apiResponse))
}
return res.json(DocumentStoreDTO.fromEntity(apiResponse))
} catch (error) {
next(error)
}
}
const getDocumentStoreFileChunks = async (req: Request, res: Response, next: NextFunction) => {
try {
if (typeof req.params.storeId === 'undefined' || req.params.storeId === '') {
throw new InternalFlowiseError(
StatusCodes.PRECONDITION_FAILED,
`Error: documentStoreController.getDocumentStoreFileChunks - storeId not provided!`
)
}
if (typeof req.params.fileId === 'undefined' || req.params.fileId === '') {
throw new InternalFlowiseError(
StatusCodes.PRECONDITION_FAILED,
`Error: documentStoreController.getDocumentStoreFileChunks - fileId not provided!`
)
}
const page = req.params.pageNo ? parseInt(req.params.pageNo) : 1
const apiResponse = await documentStoreService.getDocumentStoreFileChunks(req.params.storeId, req.params.fileId, page)
return res.json(apiResponse)
} catch (error) {
next(error)
}
}
const deleteDocumentStoreFileChunk = async (req: Request, res: Response, next: NextFunction) => {
try {
if (typeof req.params.storeId === 'undefined' || req.params.storeId === '') {
throw new InternalFlowiseError(
StatusCodes.PRECONDITION_FAILED,
`Error: documentStoreController.deleteDocumentStoreFileChunk - storeId not provided!`
)
}
if (typeof req.params.loaderId === 'undefined' || req.params.loaderId === '') {
throw new InternalFlowiseError(
StatusCodes.PRECONDITION_FAILED,
`Error: documentStoreController.deleteDocumentStoreFileChunk - loaderId not provided!`
)
}
if (typeof req.params.chunkId === 'undefined' || req.params.chunkId === '') {
throw new InternalFlowiseError(
StatusCodes.PRECONDITION_FAILED,
`Error: documentStoreController.deleteDocumentStoreFileChunk - chunkId not provided!`
)
}
const apiResponse = await documentStoreService.deleteDocumentStoreFileChunk(
req.params.storeId,
req.params.loaderId,
req.params.chunkId
)
return res.json(apiResponse)
} catch (error) {
next(error)
}
}
const editDocumentStoreFileChunk = async (req: Request, res: Response, next: NextFunction) => {
try {
if (typeof req.params.storeId === 'undefined' || req.params.storeId === '') {
throw new InternalFlowiseError(
StatusCodes.PRECONDITION_FAILED,
`Error: documentStoreController.editDocumentStoreFileChunk - storeId not provided!`
)
}
if (typeof req.params.loaderId === 'undefined' || req.params.loaderId === '') {
throw new InternalFlowiseError(
StatusCodes.PRECONDITION_FAILED,
`Error: documentStoreController.editDocumentStoreFileChunk - loaderId not provided!`
)
}
if (typeof req.params.chunkId === 'undefined' || req.params.chunkId === '') {
throw new InternalFlowiseError(
StatusCodes.PRECONDITION_FAILED,
`Error: documentStoreController.editDocumentStoreFileChunk - chunkId not provided!`
)
}
const body = req.body
if (typeof body === 'undefined' || body.pageContent === 'undefined' || body.pageContent === '') {
throw new InternalFlowiseError(
StatusCodes.PRECONDITION_FAILED,
`Error: documentStoreController.editDocumentStoreFileChunk - body not provided!`
)
}
const apiResponse = await documentStoreService.editDocumentStoreFileChunk(
req.params.storeId,
req.params.loaderId,
req.params.chunkId,
body.pageContent
)
return res.json(apiResponse)
} catch (error) {
next(error)
}
}
const processFileChunks = async (req: Request, res: Response, next: NextFunction) => {
try {
if (typeof req.body === 'undefined') {
throw new InternalFlowiseError(
StatusCodes.PRECONDITION_FAILED,
`Error: documentStoreController.processFileChunks - body not provided!`
)
}
const body = req.body
const apiResponse = await documentStoreService.processAndSaveChunks(body)
return res.json(apiResponse)
} catch (error) {
next(error)
}
}
const updateDocumentStore = async (req: Request, res: Response, next: NextFunction) => {
try {
if (typeof req.params.id === 'undefined' || req.params.id === '') {
throw new InternalFlowiseError(
StatusCodes.PRECONDITION_FAILED,
`Error: documentStoreController.updateDocumentStore - storeId not provided!`
)
}
if (typeof req.body === 'undefined') {
throw new InternalFlowiseError(
StatusCodes.PRECONDITION_FAILED,
`Error: documentStoreController.updateDocumentStore - body not provided!`
)
}
const store = await documentStoreService.getDocumentStoreById(req.params.id)
if (!store) {
throw new InternalFlowiseError(
StatusCodes.NOT_FOUND,
`Error: documentStoreController.updateDocumentStore - DocumentStore ${req.params.id} not found in the database`
)
}
const body = req.body
const updateDocStore = new DocumentStore()
Object.assign(updateDocStore, body)
const apiResponse = await documentStoreService.updateDocumentStore(store, updateDocStore)
return res.json(DocumentStoreDTO.fromEntity(apiResponse))
} catch (error) {
next(error)
}
}
const deleteDocumentStore = async (req: Request, res: Response, next: NextFunction) => {
try {
if (typeof req.params.id === 'undefined' || req.params.id === '') {
throw new InternalFlowiseError(
StatusCodes.PRECONDITION_FAILED,
`Error: documentStoreController.deleteDocumentStore - storeId not provided!`
)
}
const apiResponse = await documentStoreService.deleteDocumentStore(req.params.id)
return res.json(apiResponse)
} catch (error) {
next(error)
}
}
const previewFileChunks = async (req: Request, res: Response, next: NextFunction) => {
try {
if (typeof req.body === 'undefined') {
throw new InternalFlowiseError(
StatusCodes.PRECONDITION_FAILED,
`Error: documentStoreController.previewFileChunks - body not provided!`
)
}
const body = req.body
body.preview = true
const apiResponse = await documentStoreService.previewChunks(body)
return res.json(apiResponse)
} catch (error) {
next(error)
}
}
const getDocumentLoaders = async (req: Request, res: Response, next: NextFunction) => {
try {
const apiResponse = await documentStoreService.getDocumentLoaders()
return res.json(apiResponse)
} catch (error) {
next(error)
}
}
export default {
deleteDocumentStore,
createDocumentStore,
getAllDocumentStores,
deleteLoaderFromDocumentStore,
getDocumentStoreById,
getDocumentStoreFileChunks,
updateDocumentStore,
processFileChunks,
previewFileChunks,
getDocumentLoaders,
deleteDocumentStoreFileChunk,
editDocumentStoreFileChunk
}
+19 -1
View File
@@ -1,4 +1,5 @@
import { Request, Response, NextFunction } from 'express'
import _ from 'lodash'
import nodesService from '../../services/nodes'
import { InternalFlowiseError } from '../../errors/internalFlowiseError'
import { StatusCodes } from 'http-status-codes'
@@ -24,6 +25,22 @@ const getNodeByName = async (req: Request, res: Response, next: NextFunction) =>
}
}
const getNodesByCategory = async (req: Request, res: Response, next: NextFunction) => {
try {
if (typeof req.params.name === 'undefined' || req.params.name === '') {
throw new InternalFlowiseError(
StatusCodes.PRECONDITION_FAILED,
`Error: nodesController.getNodesByCategory - name not provided!`
)
}
const name = _.unescape(req.params.name)
const apiResponse = await nodesService.getAllNodesForCategory(name)
return res.json(apiResponse)
} catch (error) {
next(error)
}
}
const getSingleNodeIcon = async (req: Request, res: Response, next: NextFunction) => {
try {
if (typeof req.params === 'undefined' || !req.params.name) {
@@ -77,5 +94,6 @@ export default {
getNodeByName,
getSingleNodeIcon,
getSingleNodeAsyncOptions,
executeCustomFunction
executeCustomFunction,
getNodesByCategory
}
@@ -0,0 +1,31 @@
import { Column, CreateDateColumn, Entity, PrimaryGeneratedColumn, UpdateDateColumn } from 'typeorm'
import { DocumentStoreStatus, IDocumentStore } from '../../Interface'
@Entity()
export class DocumentStore implements IDocumentStore {
@PrimaryGeneratedColumn('uuid')
id: string
@Column({ nullable: false, type: 'text' })
name: string
@Column({ nullable: true, type: 'text' })
description: string
@Column({ nullable: true, type: 'text' })
loaders: string
@Column({ nullable: true, type: 'text' })
whereUsed: string
@Column({ type: 'timestamp' })
@CreateDateColumn()
createdDate: Date
@Column({ type: 'timestamp' })
@UpdateDateColumn()
updatedDate: Date
@Column({ nullable: false, type: 'text' })
status: DocumentStoreStatus
}
@@ -0,0 +1,25 @@
import { Column, Entity, Index, PrimaryGeneratedColumn } from 'typeorm'
import { IDocumentStoreFileChunk } from '../../Interface'
@Entity()
export class DocumentStoreFileChunk implements IDocumentStoreFileChunk {
@PrimaryGeneratedColumn('uuid')
id: string
@Index()
@Column({ type: 'uuid' })
docId: string
@Index()
@Column({ type: 'uuid' })
storeId: string
@Column()
chunkNo: number
@Column({ nullable: false, type: 'text' })
pageContent: string
@Column({ nullable: true, type: 'text' })
metadata: string
}
@@ -5,6 +5,8 @@ import { Credential } from './Credential'
import { Tool } from './Tool'
import { Assistant } from './Assistant'
import { Variable } from './Variable'
import { DocumentStore } from './DocumentStore'
import { DocumentStoreFileChunk } from './DocumentStoreFileChunk'
import { Lead } from './Lead'
import { UpsertHistory } from './UpsertHistory'
@@ -16,6 +18,8 @@ export const entities = {
Tool,
Assistant,
Variable,
DocumentStore,
DocumentStoreFileChunk,
Lead,
UpsertHistory
}
@@ -0,0 +1,37 @@
import { MigrationInterface, QueryRunner } from 'typeorm'
export class AddDocumentStore1711637331047 implements MigrationInterface {
public async up(queryRunner: QueryRunner): Promise<void> {
await queryRunner.query(
`CREATE TABLE IF NOT EXISTS \`document_store\` (
\`id\` varchar(36) NOT NULL,
\`name\` varchar(255) NOT NULL,
\`description\` varchar(255),
\`loaders\` text,
\`whereUsed\` text,
\`status\` varchar(20) NOT NULL,
\`createdDate\` datetime(6) NOT NULL DEFAULT CURRENT_TIMESTAMP(6),
\`updatedDate\` datetime(6) NOT NULL DEFAULT CURRENT_TIMESTAMP(6) ON UPDATE CURRENT_TIMESTAMP(6),
PRIMARY KEY (\`id\`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;`
)
await queryRunner.query(
`CREATE TABLE IF NOT EXISTS \`document_store_file_chunk\` (
\`id\` varchar(36) NOT NULL,
\`docId\` varchar(36) NOT NULL,
\`storeId\` varchar(36) NOT NULL,
\`chunkNo\` INT NOT NULL,
\`pageContent\` text,
\`metadata\` text,
PRIMARY KEY (\`id\`),
KEY \`IDX_e76bae1780b77e56aab1h2asd4\` (\`docId\`),
KEY \`IDX_e213b811b01405a42309a6a410\` (\`storeId\`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;`
)
}
public async down(queryRunner: QueryRunner): Promise<void> {
await queryRunner.query(`DROP TABLE document_store`)
await queryRunner.query(`DROP TABLE document_store_file_chunk`)
}
}
@@ -15,6 +15,7 @@ import { AddVariableEntity1699325775451 } from './1702200925471-AddVariableEntit
import { AddSpeechToText1706364937060 } from './1706364937060-AddSpeechToText'
import { AddUpsertHistoryEntity1709814301358 } from './1709814301358-AddUpsertHistoryEntity'
import { AddFeedback1707213626553 } from './1707213626553-AddFeedback'
import { AddDocumentStore1711637331047 } from './1711637331047-AddDocumentStore'
import { AddLead1710832127079 } from './1710832127079-AddLead'
import { AddLeadToChatMessage1711538023578 } from './1711538023578-AddLeadToChatMessage'
@@ -36,6 +37,7 @@ export const mysqlMigrations = [
AddSpeechToText1706364937060,
AddUpsertHistoryEntity1709814301358,
AddFeedback1707213626553,
AddDocumentStore1711637331047,
AddLead1710832127079,
AddLeadToChatMessage1711538023578
]
@@ -0,0 +1,41 @@
import { MigrationInterface, QueryRunner } from 'typeorm'
export class AddDocumentStore1711637331047 implements MigrationInterface {
public async up(queryRunner: QueryRunner): Promise<void> {
await queryRunner.query(
`CREATE TABLE IF NOT EXISTS document_store (
id uuid NOT NULL DEFAULT uuid_generate_v4(),
"name" varchar NOT NULL,
"description" varchar,
"loaders" text,
"whereUsed" text,
"status" varchar NOT NULL,
"createdDate" timestamp NOT NULL DEFAULT now(),
"updatedDate" timestamp NOT NULL DEFAULT now(),
CONSTRAINT "PK_98495043dd774f54-9830ab78f9" PRIMARY KEY (id)
);`
)
await queryRunner.query(
`CREATE TABLE IF NOT EXISTS document_store_file_chunk (
id uuid NOT NULL DEFAULT uuid_generate_v4(),
"docId" uuid NOT NULL,
"chunkNo" integer NOT NULL,
"storeId" uuid NOT NULL,
"pageContent" text,
"metadata" text,
CONSTRAINT "PK_90005043dd774f54-9830ab78f9" PRIMARY KEY (id)
);`
)
await queryRunner.query(
`CREATE INDEX IF NOT EXISTS "IDX_e76bae1780b77e56aab1h2asd4" ON document_store_file_chunk USING btree (docId);`
)
await queryRunner.query(
`CREATE INDEX IF NOT EXISTS "IDX_e213b811b01405a42309a6a410" ON document_store_file_chunk USING btree (storeId);`
)
}
public async down(queryRunner: QueryRunner): Promise<void> {
await queryRunner.query(`DROP TABLE document_store`)
await queryRunner.query(`DROP TABLE document_store_file_chunk`)
}
}
@@ -16,6 +16,7 @@ import { AddSpeechToText1706364937060 } from './1706364937060-AddSpeechToText'
import { AddUpsertHistoryEntity1709814301358 } from './1709814301358-AddUpsertHistoryEntity'
import { AddFeedback1707213601923 } from './1707213601923-AddFeedback'
import { FieldTypes1710497452584 } from './1710497452584-FieldTypes'
import { AddDocumentStore1711637331047 } from './1711637331047-AddDocumentStore'
import { AddLead1710832137905 } from './1710832137905-AddLead'
import { AddLeadToChatMessage1711538016098 } from './1711538016098-AddLeadToChatMessage'
@@ -38,6 +39,7 @@ export const postgresMigrations = [
AddUpsertHistoryEntity1709814301358,
AddFeedback1707213601923,
FieldTypes1710497452584,
AddDocumentStore1711637331047,
AddLead1710832137905,
AddLeadToChatMessage1711538016098
]
@@ -0,0 +1,34 @@
import { MigrationInterface, QueryRunner } from 'typeorm'
export class AddDocumentStore1711637331047 implements MigrationInterface {
public async up(queryRunner: QueryRunner): Promise<void> {
await queryRunner.query(
`CREATE TABLE IF NOT EXISTS "document_store" (
"id" varchar PRIMARY KEY NOT NULL,
"name" varchar NOT NULL,
"description" varchar,
"status" varchar NOT NULL,
"loaders" text,
"whereUsed" text,
"updatedDate" datetime NOT NULL DEFAULT (datetime('now')),
"createdDate" datetime NOT NULL DEFAULT (datetime('now')));`
)
await queryRunner.query(
`CREATE TABLE IF NOT EXISTS "document_store_file_chunk" (
"id" varchar PRIMARY KEY NOT NULL,
"docId" varchar NOT NULL,
"storeId" varchar NOT NULL,
"chunkNo" INTEGER NOT NULL,
"pageContent" text,
"metadata" text
);`
)
await queryRunner.query(`CREATE INDEX "IDX_e76bae1780b77e56aab1h2asd4" ON "document_store_file_chunk" ("docId") ;`)
await queryRunner.query(`CREATE INDEX "IDX_e213b811b01405a42309a6a410" ON "document_store_file_chunk" ("storeId") ;`)
}
public async down(queryRunner: QueryRunner): Promise<void> {
await queryRunner.query(`DROP TABLE IF EXISTS "document_store";`)
await queryRunner.query(`DROP TABLE IF EXISTS "document_store_file_chunk";`)
}
}
@@ -15,6 +15,7 @@ import { AddVariableEntity1699325775451 } from './1702200925471-AddVariableEntit
import { AddSpeechToText1706364937060 } from './1706364937060-AddSpeechToText'
import { AddUpsertHistoryEntity1709814301358 } from './1709814301358-AddUpsertHistoryEntity'
import { AddFeedback1707213619308 } from './1707213619308-AddFeedback'
import { AddDocumentStore1711637331047 } from './1711637331047-AddDocumentStore'
import { AddLead1710832117612 } from './1710832117612-AddLead'
import { AddLeadToChatMessage1711537986113 } from './1711537986113-AddLeadToChatMessage'
@@ -36,6 +37,7 @@ export const sqliteMigrations = [
AddSpeechToText1706364937060,
AddUpsertHistoryEntity1709814301358,
AddFeedback1707213619308,
AddDocumentStore1711637331047,
AddLead1710832117612,
AddLeadToChatMessage1711537986113
]
+1 -2
View File
@@ -5,11 +5,10 @@ import cors from 'cors'
import http from 'http'
import basicAuth from 'express-basic-auth'
import { Server } from 'socket.io'
import logger from './utils/logger'
import { expressRequestLogger } from './utils/logger'
import { DataSource } from 'typeorm'
import { IChatFlow } from './Interface'
import { getNodeModulesPackagePath, getEncryptionKey } from './utils'
import logger, { expressRequestLogger } from './utils/logger'
import { getDataSource } from './DataSource'
import { NodesPool } from './NodesPool'
import { ChatFlow } from './database/entities/ChatFlow'
@@ -0,0 +1,36 @@
import express from 'express'
import documentStoreController from '../../controllers/documentstore'
const router = express.Router()
/** Document Store Routes */
// Create document store
router.post('/store', documentStoreController.createDocumentStore)
// List all stores
router.get('/stores', documentStoreController.getAllDocumentStores)
// Get specific store
router.get('/store/:id', documentStoreController.getDocumentStoreById)
// Update documentStore
router.put('/store/:id', documentStoreController.updateDocumentStore)
// Delete documentStore
router.delete('/store/:id', documentStoreController.deleteDocumentStore)
/** Component Nodes = Document Store - Loaders */
// Get all loaders
router.get('/loaders', documentStoreController.getDocumentLoaders)
// delete loader from document store
router.delete('/loader/:id/:loaderId', documentStoreController.deleteLoaderFromDocumentStore)
// chunking preview
router.post('/loader/preview', documentStoreController.previewFileChunks)
// chunking process
router.post('/loader/process', documentStoreController.processFileChunks)
/** Document Store - Loaders - Chunks */
// delete specific file chunk from the store
router.delete('/chunks/:storeId/:loaderId/:chunkId', documentStoreController.deleteDocumentStoreFileChunk)
// edit specific file chunk from the store
router.put('/chunks/:storeId/:loaderId/:chunkId', documentStoreController.editDocumentStoreFileChunk)
// Get all file chunks from the store
router.get('/chunks/:storeId/:fileId/:pageNo', documentStoreController.getDocumentStoreFileChunks)
export default router
+2
View File
@@ -8,6 +8,7 @@ import chatMessageRouter from './chat-messages'
import componentsCredentialsRouter from './components-credentials'
import componentsCredentialsIconRouter from './components-credentials-icon'
import credentialsRouter from './credentials'
import documentStoreRouter from './documentstore'
import feedbackRouter from './feedback'
import fetchLinksRouter from './fetch-links'
import flowConfigRouter from './flow-config'
@@ -49,6 +50,7 @@ router.use('/components-credentials', componentsCredentialsRouter)
router.use('/components-credentials-icon', componentsCredentialsIconRouter)
router.use('/chatflows-uploads', chatflowsUploadsRouter)
router.use('/credentials', credentialsRouter)
router.use('/document-store', documentStoreRouter)
router.use('/feedback', feedbackRouter)
router.use('/fetch-links', fetchLinksRouter)
router.use('/flow-config', flowConfigRouter)
@@ -5,5 +5,6 @@ const router = express.Router()
// READ
router.get('/', nodesController.getAllNodes)
router.get(['/', '/:name'], nodesController.getNodeByName)
router.get('/category/:name', nodesController.getNodesByCategory)
export default router
@@ -13,6 +13,7 @@ import { ChatMessageFeedback } from '../../database/entities/ChatMessageFeedback
import { UpsertHistory } from '../../database/entities/UpsertHistory'
import { containsBase64File, updateFlowDataWithFilePaths } from '../../utils/fileRepository'
import { getErrorMessage } from '../../errors/utils'
import documentStoreService from '../../services/documentstore'
// Check if chatflow valid for streaming
const checkIfChatflowIsValidForStreaming = async (chatflowId: string): Promise<any> => {
@@ -76,6 +77,7 @@ const deleteChatflow = async (chatflowId: string): Promise<any> => {
try {
// Delete all uploads corresponding to this chatflow
await removeFolderFromStorage(chatflowId)
await documentStoreService.updateDocumentStoreUsage(chatflowId, undefined)
// Delete all chat messages
await appServer.AppDataSource.getRepository(ChatMessage).delete({ chatflowid: chatflowId })
@@ -166,6 +168,7 @@ const saveChatflow = async (newChatFlow: ChatFlow): Promise<any> => {
// step 2 - convert base64 to file paths and update the chatflow
step1Results.flowData = await updateFlowDataWithFilePaths(step1Results.id, incomingFlowData)
await _checkAndUpdateDocumentStoreUsage(step1Results)
dbResponse = await appServer.AppDataSource.getRepository(ChatFlow).save(step1Results)
} else {
const chatflow = appServer.AppDataSource.getRepository(ChatFlow).create(newChatFlow)
@@ -192,6 +195,7 @@ const updateChatflow = async (chatflow: ChatFlow, updateChatFlow: ChatFlow): Pro
updateChatFlow.flowData = await updateFlowDataWithFilePaths(chatflow.id, updateChatFlow.flowData)
}
const newDbChatflow = appServer.AppDataSource.getRepository(ChatFlow).merge(chatflow, updateChatFlow)
await _checkAndUpdateDocumentStoreUsage(newDbChatflow)
const dbResponse = await appServer.AppDataSource.getRepository(ChatFlow).save(newDbChatflow)
// chatFlowPool is initialized only when a flow is opened
@@ -261,6 +265,18 @@ const getSinglePublicChatbotConfig = async (chatflowId: string): Promise<any> =>
}
}
const _checkAndUpdateDocumentStoreUsage = async (chatflow: ChatFlow) => {
const parsedFlowData: IReactFlowObject = JSON.parse(chatflow.flowData)
const nodes = parsedFlowData.nodes
// from the nodes array find if there is a node with name == documentStore)
const node = nodes.length > 0 && nodes.find((node) => node.data.name === 'documentStore')
if (!node || !node.data || !node.data.inputs || node.data.inputs['selectedStore'] === undefined) {
await documentStoreService.updateDocumentStoreUsage(chatflow.id, undefined)
} else {
await documentStoreService.updateDocumentStoreUsage(chatflow.id, node.data.inputs['selectedStore'])
}
}
export default {
checkIfChatflowIsValidForStreaming,
checkIfChatflowIsValidForUploads,
@@ -0,0 +1,710 @@
import { getRunningExpressApp } from '../../utils/getRunningExpressApp'
import { DocumentStore } from '../../database/entities/DocumentStore'
// @ts-ignore
import {
addFileToStorage,
getFileFromStorage,
ICommonObject,
IDocument,
removeFilesFromStorage,
removeSpecificFileFromStorage
} from 'flowise-components'
import {
DocumentStoreStatus,
IDocumentStoreFileChunkPagedResponse,
IDocumentStoreLoader,
IDocumentStoreLoaderFile,
IDocumentStoreLoaderForPreview,
IDocumentStoreWhereUsed
} from '../../Interface'
import { DocumentStoreFileChunk } from '../../database/entities/DocumentStoreFileChunk'
import { v4 as uuidv4 } from 'uuid'
import { databaseEntities } from '../../utils'
import logger from '../../utils/logger'
import nodesService from '../nodes'
import { InternalFlowiseError } from '../../errors/internalFlowiseError'
import { StatusCodes } from 'http-status-codes'
import { getErrorMessage } from '../../errors/utils'
import { ChatFlow } from '../../database/entities/ChatFlow'
const DOCUMENT_STORE_BASE_FOLDER = 'docustore'
const createDocumentStore = async (newDocumentStore: DocumentStore) => {
try {
const appServer = getRunningExpressApp()
const documentStore = appServer.AppDataSource.getRepository(DocumentStore).create(newDocumentStore)
const dbResponse = await appServer.AppDataSource.getRepository(DocumentStore).save(documentStore)
return dbResponse
} catch (error) {
throw new InternalFlowiseError(
StatusCodes.INTERNAL_SERVER_ERROR,
`Error: documentStoreServices.createDocumentStore - ${getErrorMessage(error)}`
)
}
}
const getAllDocumentStores = async () => {
try {
const appServer = getRunningExpressApp()
const entities = await appServer.AppDataSource.getRepository(DocumentStore).find()
return entities
} catch (error) {
throw new InternalFlowiseError(
StatusCodes.INTERNAL_SERVER_ERROR,
`Error: documentStoreServices.getAllDocumentStores - ${getErrorMessage(error)}`
)
}
}
const deleteLoaderFromDocumentStore = async (storeId: string, loaderId: string) => {
try {
const appServer = getRunningExpressApp()
const entity = await appServer.AppDataSource.getRepository(DocumentStore).findOneBy({
id: storeId
})
if (!entity) {
throw new InternalFlowiseError(
StatusCodes.NOT_FOUND,
`Error: documentStoreServices.deleteLoaderFromDocumentStore - Document store ${storeId} not found`
)
}
const existingLoaders = JSON.parse(entity.loaders)
const found = existingLoaders.find((uFile: IDocumentStoreLoader) => uFile.id === loaderId)
if (found) {
if (found.path) {
//remove the existing files, if any of the file loaders were used.
await removeSpecificFileFromStorage(DOCUMENT_STORE_BASE_FOLDER, entity.id, found.path)
}
const index = existingLoaders.indexOf(found)
if (index > -1) {
existingLoaders.splice(index, 1)
}
// remove the chunks
await appServer.AppDataSource.getRepository(DocumentStoreFileChunk).delete({ docId: found.id })
entity.loaders = JSON.stringify(existingLoaders)
const results = await appServer.AppDataSource.getRepository(DocumentStore).save(entity)
return results
} else {
throw new InternalFlowiseError(StatusCodes.INTERNAL_SERVER_ERROR, `Unable to locate loader in Document Store ${entity.name}`)
}
} catch (error) {
throw new InternalFlowiseError(
StatusCodes.INTERNAL_SERVER_ERROR,
`Error: documentStoreServices.deleteLoaderFromDocumentStore - ${getErrorMessage(error)}`
)
}
}
const getDocumentStoreById = async (storeId: string) => {
try {
const appServer = getRunningExpressApp()
const entity = await appServer.AppDataSource.getRepository(DocumentStore).findOneBy({
id: storeId
})
if (!entity) {
throw new InternalFlowiseError(
StatusCodes.NOT_FOUND,
`Error: documentStoreServices.getDocumentStoreById - Document store ${storeId} not found`
)
}
return entity
} catch (error) {
throw new InternalFlowiseError(
StatusCodes.INTERNAL_SERVER_ERROR,
`Error: documentStoreServices.getDocumentStoreById - ${getErrorMessage(error)}`
)
}
}
const getUsedChatflowNames = async (entity: DocumentStore) => {
try {
const appServer = getRunningExpressApp()
if (entity.whereUsed) {
const whereUsed = JSON.parse(entity.whereUsed)
const updatedWhereUsed: IDocumentStoreWhereUsed[] = []
for (let i = 0; i < whereUsed.length; i++) {
const associatedChatflow = await appServer.AppDataSource.getRepository(ChatFlow).findOne({
where: { id: whereUsed[i] },
select: ['id', 'name']
})
if (associatedChatflow) {
updatedWhereUsed.push({
id: whereUsed[i],
name: associatedChatflow.name
})
}
}
return updatedWhereUsed
}
return []
} catch (error) {
throw new InternalFlowiseError(
StatusCodes.INTERNAL_SERVER_ERROR,
`Error: documentStoreServices.getUsedChatflowNames - ${getErrorMessage(error)}`
)
}
}
// Get chunks for a specific loader or store
const getDocumentStoreFileChunks = async (storeId: string, fileId: string, pageNo: number = 1) => {
try {
const appServer = getRunningExpressApp()
const entity = await appServer.AppDataSource.getRepository(DocumentStore).findOneBy({
id: storeId
})
if (!entity) {
throw new InternalFlowiseError(
StatusCodes.NOT_FOUND,
`Error: documentStoreServices.getDocumentStoreById - Document store ${storeId} not found`
)
}
const loaders = JSON.parse(entity.loaders)
let found: IDocumentStoreLoader | undefined
if (fileId !== 'all') {
found = loaders.find((loader: IDocumentStoreLoader) => loader.id === fileId)
if (!found) {
throw new InternalFlowiseError(
StatusCodes.NOT_FOUND,
`Error: documentStoreServices.getDocumentStoreById - Document file ${fileId} not found`
)
}
}
let totalChars = 0
loaders.forEach((loader: IDocumentStoreLoader) => {
totalChars += loader.totalChars
})
if (found) {
found.totalChars = totalChars
found.id = entity.id
found.status = entity.status
}
const PAGE_SIZE = 50
const skip = (pageNo - 1) * PAGE_SIZE
const take = PAGE_SIZE
let whereCondition: any = { docId: fileId }
if (fileId === 'all') {
whereCondition = { storeId: storeId }
}
const count = await appServer.AppDataSource.getRepository(DocumentStoreFileChunk).count({
where: whereCondition
})
const chunksWithCount = await appServer.AppDataSource.getRepository(DocumentStoreFileChunk).find({
skip,
take,
where: whereCondition,
order: {
chunkNo: 'ASC'
}
})
if (!chunksWithCount) {
throw new InternalFlowiseError(StatusCodes.NOT_FOUND, `File ${fileId} not found`)
}
const response: IDocumentStoreFileChunkPagedResponse = {
chunks: chunksWithCount,
count: count,
file: found,
currentPage: pageNo,
storeName: entity.name,
description: entity.description
}
return response
} catch (error) {
throw new InternalFlowiseError(
StatusCodes.INTERNAL_SERVER_ERROR,
`Error: documentStoreServices.getDocumentStoreFileChunks - ${getErrorMessage(error)}`
)
}
}
const deleteDocumentStore = async (storeId: string) => {
try {
const appServer = getRunningExpressApp()
// delete all the chunks associated with the store
await appServer.AppDataSource.getRepository(DocumentStoreFileChunk).delete({
storeId: storeId
})
// now delete the files associated with the store
const entity = await appServer.AppDataSource.getRepository(DocumentStore).findOneBy({
id: storeId
})
if (!entity) throw new Error(`Document store ${storeId} not found`)
await removeFilesFromStorage(DOCUMENT_STORE_BASE_FOLDER, entity.id)
// now delete the store
const tbd = await appServer.AppDataSource.getRepository(DocumentStore).delete({
id: storeId
})
return { deleted: tbd.affected }
} catch (error) {
throw new InternalFlowiseError(
StatusCodes.INTERNAL_SERVER_ERROR,
`Error: documentStoreServices.deleteDocumentStore - ${getErrorMessage(error)}`
)
}
}
const deleteDocumentStoreFileChunk = async (storeId: string, docId: string, chunkId: string) => {
try {
const appServer = getRunningExpressApp()
const entity = await appServer.AppDataSource.getRepository(DocumentStore).findOneBy({
id: storeId
})
if (!entity) {
throw new InternalFlowiseError(StatusCodes.NOT_FOUND, `Document store ${storeId} not found`)
}
const loaders = JSON.parse(entity.loaders)
const found = loaders.find((ldr: IDocumentStoreLoader) => ldr.id === docId)
if (!found) {
throw new InternalFlowiseError(StatusCodes.NOT_FOUND, `Document store loader ${docId} not found`)
}
const tbdChunk = await appServer.AppDataSource.getRepository(DocumentStoreFileChunk).findOneBy({
id: chunkId
})
if (!tbdChunk) {
throw new InternalFlowiseError(StatusCodes.NOT_FOUND, `Document Chunk ${chunkId} not found`)
}
await appServer.AppDataSource.getRepository(DocumentStoreFileChunk).delete(chunkId)
found.totalChunks--
found.totalChars -= tbdChunk.pageContent.length
entity.loaders = JSON.stringify(loaders)
await appServer.AppDataSource.getRepository(DocumentStore).save(entity)
return getDocumentStoreFileChunks(storeId, docId)
} catch (error) {
throw new InternalFlowiseError(
StatusCodes.INTERNAL_SERVER_ERROR,
`Error: documentStoreServices.deleteDocumentStoreFileChunk - ${getErrorMessage(error)}`
)
}
}
const editDocumentStoreFileChunk = async (storeId: string, docId: string, chunkId: string, content: string) => {
try {
const appServer = getRunningExpressApp()
const entity = await appServer.AppDataSource.getRepository(DocumentStore).findOneBy({
id: storeId
})
if (!entity) {
throw new InternalFlowiseError(StatusCodes.NOT_FOUND, `Document store ${storeId} not found`)
}
const loaders = JSON.parse(entity.loaders)
const found = loaders.find((ldr: IDocumentStoreLoader) => ldr.id === docId)
if (!found) {
throw new InternalFlowiseError(StatusCodes.NOT_FOUND, `Document store loader ${docId} not found`)
}
const editChunk = await appServer.AppDataSource.getRepository(DocumentStoreFileChunk).findOneBy({
id: chunkId
})
if (!editChunk) {
throw new InternalFlowiseError(StatusCodes.NOT_FOUND, `Document Chunk ${chunkId} not found`)
}
found.totalChars -= editChunk.pageContent.length
editChunk.pageContent = content
found.totalChars += content.length
await appServer.AppDataSource.getRepository(DocumentStoreFileChunk).save(editChunk)
entity.loaders = JSON.stringify(loaders)
await appServer.AppDataSource.getRepository(DocumentStore).save(entity)
return getDocumentStoreFileChunks(storeId, docId)
} catch (error) {
throw new InternalFlowiseError(
StatusCodes.INTERNAL_SERVER_ERROR,
`Error: documentStoreServices.editDocumentStoreFileChunk - ${getErrorMessage(error)}`
)
}
}
// Update documentStore
const updateDocumentStore = async (documentStore: DocumentStore, updatedDocumentStore: DocumentStore) => {
try {
const appServer = getRunningExpressApp()
const tmpUpdatedDocumentStore = appServer.AppDataSource.getRepository(DocumentStore).merge(documentStore, updatedDocumentStore)
const dbResponse = await appServer.AppDataSource.getRepository(DocumentStore).save(tmpUpdatedDocumentStore)
return dbResponse
} catch (error) {
throw new InternalFlowiseError(
StatusCodes.INTERNAL_SERVER_ERROR,
`Error: documentStoreServices.updateDocumentStore - ${getErrorMessage(error)}`
)
}
}
const _saveFileToStorage = async (fileBase64: string, entity: DocumentStore) => {
const splitDataURI = fileBase64.split(',')
const filename = splitDataURI.pop()?.split(':')[1] ?? ''
const bf = Buffer.from(splitDataURI.pop() || '', 'base64')
const mimePrefix = splitDataURI.pop()
let mime = ''
if (mimePrefix) {
mime = mimePrefix.split(';')[0].split(':')[1]
}
await addFileToStorage(mime, bf, filename, DOCUMENT_STORE_BASE_FOLDER, entity.id)
return {
id: uuidv4(),
name: filename,
mimePrefix: mime,
size: bf.length,
status: DocumentStoreStatus.NEW,
uploaded: new Date()
}
}
const _splitIntoChunks = async (data: IDocumentStoreLoaderForPreview) => {
try {
const appServer = getRunningExpressApp()
let splitterInstance = null
if (data.splitterConfig && Object.keys(data.splitterConfig).length > 0) {
const nodeInstanceFilePath = appServer.nodesPool.componentNodes[data.splitterId].filePath as string
const nodeModule = await import(nodeInstanceFilePath)
const newNodeInstance = new nodeModule.nodeClass()
let nodeData = {
inputs: { ...data.splitterConfig },
id: 'splitter_0'
}
splitterInstance = await newNodeInstance.init(nodeData)
}
const nodeInstanceFilePath = appServer.nodesPool.componentNodes[data.loaderId].filePath as string
const nodeModule = await import(nodeInstanceFilePath)
// doc loader configs
const nodeData = {
credential: data.credential || undefined,
inputs: { ...data.loaderConfig, textSplitter: splitterInstance },
outputs: { output: 'document' }
}
const options: ICommonObject = {
chatflowid: uuidv4(),
appDataSource: appServer.AppDataSource,
databaseEntities,
logger
}
const docNodeInstance = new nodeModule.nodeClass()
let docs: IDocument[] = await docNodeInstance.init(nodeData, '', options)
return docs
} catch (error) {
throw new InternalFlowiseError(
StatusCodes.INTERNAL_SERVER_ERROR,
`Error: documentStoreServices.splitIntoChunks - ${getErrorMessage(error)}`
)
}
}
const _normalizeFilePaths = async (data: IDocumentStoreLoaderForPreview, entity: DocumentStore | null) => {
const keys = Object.getOwnPropertyNames(data.loaderConfig)
let rehydrated = false
for (let i = 0; i < keys.length; i++) {
const input = data.loaderConfig[keys[i]]
if (!input) {
continue
}
if (typeof input !== 'string') {
continue
}
let documentStoreEntity: DocumentStore | null = entity
if (input.startsWith('FILE-STORAGE::')) {
if (!documentStoreEntity) {
const appServer = getRunningExpressApp()
documentStoreEntity = await appServer.AppDataSource.getRepository(DocumentStore).findOneBy({
id: data.storeId
})
if (!documentStoreEntity) {
throw new InternalFlowiseError(StatusCodes.NOT_FOUND, `Document store ${data.storeId} not found`)
}
}
const fileName = input.replace('FILE-STORAGE::', '')
let files: string[] = []
if (fileName.startsWith('[') && fileName.endsWith(']')) {
files = JSON.parse(fileName)
} else {
files = [fileName]
}
const loaders = JSON.parse(documentStoreEntity.loaders)
const currentLoader = loaders.find((ldr: IDocumentStoreLoader) => ldr.id === data.id)
if (currentLoader) {
const base64Files: string[] = []
for (const file of files) {
const bf = await getFileFromStorage(file, DOCUMENT_STORE_BASE_FOLDER, documentStoreEntity.id)
// find the file entry that has the same name as the file
const uploadedFile = currentLoader.files.find((uFile: IDocumentStoreLoaderFile) => uFile.name === file)
const mimePrefix = 'data:' + uploadedFile.mimePrefix + ';base64'
const base64String = mimePrefix + ',' + bf.toString('base64') + `,filename:${file}`
base64Files.push(base64String)
}
data.loaderConfig[keys[i]] = JSON.stringify(base64Files)
rehydrated = true
}
}
}
data.rehydrated = rehydrated
}
const previewChunks = async (data: IDocumentStoreLoaderForPreview) => {
try {
if (data.preview) {
if (
data.loaderId === 'cheerioWebScraper' ||
data.loaderId === 'puppeteerWebScraper' ||
data.loaderId === 'playwrightWebScraper'
) {
data.loaderConfig['limit'] = 3
}
}
if (!data.rehydrated) {
await _normalizeFilePaths(data, null)
}
let docs = await _splitIntoChunks(data)
const totalChunks = docs.length
// if -1, return all chunks
if (data.previewChunkCount === -1) data.previewChunkCount = totalChunks
// return all docs if the user ask for more than we have
if (totalChunks <= data.previewChunkCount) data.previewChunkCount = totalChunks
// return only the first n chunks
if (totalChunks > data.previewChunkCount) docs = docs.slice(0, data.previewChunkCount)
return { chunks: docs, totalChunks: totalChunks, previewChunkCount: data.previewChunkCount }
} catch (error) {
throw new InternalFlowiseError(
StatusCodes.INTERNAL_SERVER_ERROR,
`Error: documentStoreServices.previewChunks - ${getErrorMessage(error)}`
)
}
}
const processAndSaveChunks = async (data: IDocumentStoreLoaderForPreview) => {
try {
const appServer = getRunningExpressApp()
const entity = await appServer.AppDataSource.getRepository(DocumentStore).findOneBy({
id: data.storeId
})
if (!entity) {
throw new InternalFlowiseError(
StatusCodes.NOT_FOUND,
`Error: documentStoreServices.processAndSaveChunks - Document store ${data.storeId} not found`
)
}
const newLoaderId = data.id ?? uuidv4()
const existingLoaders = JSON.parse(entity.loaders)
const found = existingLoaders.find((ldr: IDocumentStoreLoader) => ldr.id === newLoaderId)
if (found) {
// clean up the current status and mark the loader as pending_sync
found.totalChunks = 0
found.totalChars = 0
found.status = DocumentStoreStatus.SYNCING
entity.loaders = JSON.stringify(existingLoaders)
} else {
let loader: IDocumentStoreLoader = {
id: newLoaderId,
loaderId: data.loaderId,
loaderName: data.loaderName,
loaderConfig: data.loaderConfig,
splitterId: data.splitterId,
splitterName: data.splitterName,
splitterConfig: data.splitterConfig,
totalChunks: 0,
totalChars: 0,
status: DocumentStoreStatus.SYNCING
}
if (data.credential) {
loader.credential = data.credential
}
existingLoaders.push(loader)
entity.loaders = JSON.stringify(existingLoaders)
}
await appServer.AppDataSource.getRepository(DocumentStore).save(entity)
// this method will run async, will have to be moved to a worker thread
_saveChunksToStorage(data, entity, newLoaderId).then(() => {})
return getDocumentStoreFileChunks(data.storeId as string, newLoaderId)
} catch (error) {
throw new InternalFlowiseError(
StatusCodes.INTERNAL_SERVER_ERROR,
`Error: documentStoreServices.processAndSaveChunks - ${getErrorMessage(error)}`
)
}
}
const _saveChunksToStorage = async (data: IDocumentStoreLoaderForPreview, entity: DocumentStore, newLoaderId: string) => {
const re = new RegExp('^data.*;base64', 'i')
try {
const appServer = getRunningExpressApp()
//step 1: restore the full paths, if any
await _normalizeFilePaths(data, entity)
//step 2: split the file into chunks
previewChunks(data).then(async (response) => {
//{ chunks: docs, totalChunks: totalChunks, previewChunkCount: data.previewChunkCount }
//step 3: remove base64 files and save them to storage, this needs to be rewritten
let filesWithMetadata = []
const keys = Object.getOwnPropertyNames(data.loaderConfig)
for (let i = 0; i < keys.length; i++) {
const input = data.loaderConfig[keys[i]]
if (!input) {
continue
}
if (typeof input !== 'string') {
continue
}
if (input.startsWith('[') && input.endsWith(']')) {
const files = JSON.parse(input)
const fileNames: string[] = []
for (let j = 0; j < files.length; j++) {
const file = files[j]
if (re.test(file)) {
const fileMetadata = await _saveFileToStorage(file, entity)
fileNames.push(fileMetadata.name)
filesWithMetadata.push(fileMetadata)
}
}
data.loaderConfig[keys[i]] = 'FILE-STORAGE::' + JSON.stringify(fileNames)
} else if (re.test(input)) {
const fileNames: string[] = []
const fileMetadata = await _saveFileToStorage(input, entity)
fileNames.push(fileMetadata.name)
filesWithMetadata.push(fileMetadata)
data.loaderConfig[keys[i]] = 'FILE-STORAGE::' + JSON.stringify(fileNames)
break
}
}
const existingLoaders = JSON.parse(entity.loaders)
const loader = existingLoaders.find((ldr: IDocumentStoreLoader) => ldr.id === newLoaderId)
if (data.id) {
//step 4: remove all files and chunks associated with the previous loader
const index = existingLoaders.indexOf(loader)
if (index > -1) {
existingLoaders.splice(index, 1)
if (!data.rehydrated) {
if (loader.files) {
loader.files.map(async (file: IDocumentStoreLoaderFile) => {
await removeSpecificFileFromStorage(DOCUMENT_STORE_BASE_FOLDER, entity.id, file.name)
})
}
}
}
}
//step 5: upload with the new files and loaderConfig
if (filesWithMetadata.length > 0) {
loader.loaderConfig = data.loaderConfig
loader.files = filesWithMetadata
}
//step 6: update the loaders with the new loaderConfig
if (data.id) {
existingLoaders.push(loader)
}
//step 7: remove all previous chunks
await appServer.AppDataSource.getRepository(DocumentStoreFileChunk).delete({ docId: newLoaderId })
if (response.chunks) {
//step 8: now save the new chunks
const totalChars = response.chunks.reduce((acc: number, chunk) => acc + chunk.pageContent.length, 0)
response.chunks.map(async (chunk: IDocument, index: number) => {
const docChunk: DocumentStoreFileChunk = {
docId: newLoaderId,
storeId: data.storeId || '',
id: uuidv4(),
chunkNo: index + 1,
pageContent: chunk.pageContent,
metadata: JSON.stringify(chunk.metadata)
}
const dChunk = appServer.AppDataSource.getRepository(DocumentStoreFileChunk).create(docChunk)
await appServer.AppDataSource.getRepository(DocumentStoreFileChunk).save(dChunk)
})
// update the loader with the new metrics
loader.totalChunks = response.totalChunks
loader.totalChars = totalChars
}
loader.status = 'SYNC'
// have a flag and iterate over the loaders and update the entity status to SYNC
const allSynced = existingLoaders.every((ldr: IDocumentStoreLoader) => ldr.status === 'SYNC')
entity.status = allSynced ? DocumentStoreStatus.SYNC : DocumentStoreStatus.STALE
entity.loaders = JSON.stringify(existingLoaders)
//step 9: update the entity in the database
await appServer.AppDataSource.getRepository(DocumentStore).save(entity)
return
})
} catch (error) {
throw new InternalFlowiseError(
StatusCodes.INTERNAL_SERVER_ERROR,
`Error: documentStoreServices._saveChunksToStorage - ${getErrorMessage(error)}`
)
}
}
// Get all component nodes
const getDocumentLoaders = async () => {
const removeDocumentLoadersWithName = ['documentStore', 'vectorStoreToDocument', 'unstructuredFolderLoader', 'folderFiles']
try {
const dbResponse = await nodesService.getAllNodesForCategory('Document Loaders')
return dbResponse.filter((node) => !removeDocumentLoadersWithName.includes(node.name))
} catch (error) {
throw new InternalFlowiseError(
StatusCodes.INTERNAL_SERVER_ERROR,
`Error: documentStoreServices.getDocumentLoaders - ${getErrorMessage(error)}`
)
}
}
const updateDocumentStoreUsage = async (chatId: string, storeId: string | undefined) => {
try {
// find the document store
const appServer = getRunningExpressApp()
// find all entities that have the chatId in their whereUsed
const entities = await appServer.AppDataSource.getRepository(DocumentStore).find()
entities.map(async (entity: DocumentStore) => {
const whereUsed = JSON.parse(entity.whereUsed)
const found = whereUsed.find((w: string) => w === chatId)
if (found) {
if (!storeId) {
// remove the chatId from the whereUsed, as the store is being deleted
const index = whereUsed.indexOf(chatId)
if (index > -1) {
whereUsed.splice(index, 1)
entity.whereUsed = JSON.stringify(whereUsed)
await appServer.AppDataSource.getRepository(DocumentStore).save(entity)
}
} else if (entity.id === storeId) {
// do nothing, already found and updated
} else if (entity.id !== storeId) {
// remove the chatId from the whereUsed, as a new store is being used
const index = whereUsed.indexOf(chatId)
if (index > -1) {
whereUsed.splice(index, 1)
entity.whereUsed = JSON.stringify(whereUsed)
await appServer.AppDataSource.getRepository(DocumentStore).save(entity)
}
}
} else {
if (entity.id === storeId) {
// add the chatId to the whereUsed
whereUsed.push(chatId)
entity.whereUsed = JSON.stringify(whereUsed)
await appServer.AppDataSource.getRepository(DocumentStore).save(entity)
}
}
})
} catch (error) {
throw new InternalFlowiseError(
StatusCodes.INTERNAL_SERVER_ERROR,
`Error: documentStoreServices.updateDocumentStoreUsage - ${getErrorMessage(error)}`
)
}
}
export default {
updateDocumentStoreUsage,
deleteDocumentStore,
createDocumentStore,
deleteLoaderFromDocumentStore,
getAllDocumentStores,
getDocumentStoreById,
getUsedChatflowNames,
getDocumentStoreFileChunks,
updateDocumentStore,
previewChunks,
processAndSaveChunks,
deleteDocumentStoreFileChunk,
editDocumentStoreFileChunk,
getDocumentLoaders
}
+23 -1
View File
@@ -23,6 +23,27 @@ const getAllNodes = async () => {
}
}
// Get all component nodes for a specific category
const getAllNodesForCategory = async (category: string) => {
try {
const appServer = getRunningExpressApp()
const dbResponse = []
for (const nodeName in appServer.nodesPool.componentNodes) {
const componentNode = appServer.nodesPool.componentNodes[nodeName]
if (componentNode.category === category) {
const clonedNode = cloneDeep(componentNode)
dbResponse.push(clonedNode)
}
}
return dbResponse
} catch (error) {
throw new InternalFlowiseError(
StatusCodes.INTERNAL_SERVER_ERROR,
`Error: nodesService.getAllNodesForCategory - ${getErrorMessage(error)}`
)
}
}
// Get specific component node via name
const getNodeByName = async (nodeName: string) => {
try {
@@ -138,5 +159,6 @@ export default {
getNodeByName,
getSingleNodeIcon,
getSingleNodeAsyncOptions,
executeCustomFunction
executeCustomFunction,
getAllNodesForCategory
}
+13 -1
View File
@@ -41,6 +41,8 @@ import { Assistant } from '../database/entities/Assistant'
import { DataSource } from 'typeorm'
import { CachePool } from '../CachePool'
import { Variable } from '../database/entities/Variable'
import { DocumentStore } from '../database/entities/DocumentStore'
import { DocumentStoreFileChunk } from '../database/entities/DocumentStoreFileChunk'
import { InternalFlowiseError } from '../errors/internalFlowiseError'
import { StatusCodes } from 'http-status-codes'
@@ -54,7 +56,9 @@ export const databaseEntities: IDatabaseEntity = {
Tool: Tool,
Credential: Credential,
Assistant: Assistant,
Variable: Variable
Variable: Variable,
DocumentStore: DocumentStore,
DocumentStoreFileChunk: DocumentStoreFileChunk
}
/**
@@ -471,6 +475,7 @@ export const buildFlow = async (
appDataSource,
databaseEntities,
cachePool,
isUpsert,
dynamicVariables,
uploads
})
@@ -1384,3 +1389,10 @@ export const getAppVersion = async () => {
return ''
}
}
export const convertToValidFilename = (word: string) => {
return word
.replace(/[/|\\:*?"<>]/g, ' ')
.replace(' ', '')
.toLowerCase()
}