diff --git a/packages/api-documentation/src/yml/swagger.yml b/packages/api-documentation/src/yml/swagger.yml index 3b37cc52..9cca7597 100644 --- a/packages/api-documentation/src/yml/swagger.yml +++ b/packages/api-documentation/src/yml/swagger.yml @@ -679,6 +679,11 @@ paths: type: string format: binary description: Files to be uploaded + docId: + type: string + nullable: true + example: '603a7b51-ae7c-4b0a-8865-e454ed2f6766' + description: Document ID to use existing configuration loader: type: string nullable: true @@ -704,6 +709,32 @@ paths: nullable: true example: '{"name":"postgresRecordManager"}' description: Record Manager configurations + metadata: + type: object + nullable: true + description: Metadata associated with the document + example: { 'foo': 'bar' } + replaceExisting: + type: boolean + nullable: true + description: Whether to replace existing document loader with the new upserted chunks. However this does not delete the existing embeddings in the vector store + createNewDocStore: + type: boolean + nullable: true + description: Whether to create a new document store + docStore: + type: object + nullable: true + description: Only when createNewDocStore is true, pass in the new document store configuration + properties: + name: + type: string + example: plainText + description: Name of the new document store to be created + description: + type: string + example: plainText + description: Description of the new document store to be created required: - files required: true @@ -2350,16 +2381,37 @@ components: docId: type: string format: uuid + nullable: true description: Document ID within the store. If provided, existing configuration from the document will be used for the new document metadata: type: object + nullable: true description: Metadata associated with the document example: { 'foo': 'bar' } replaceExisting: type: boolean + nullable: true description: Whether to replace existing document loader with the new upserted chunks. However this does not delete the existing embeddings in the vector store + createNewDocStore: + type: boolean + nullable: true + description: Whether to create a new document store + docStore: + type: object + nullable: true + description: Only when createNewDocStore is true, pass in the new document store configuration + properties: + name: + type: string + example: plainText + description: Name of the new document store to be created + description: + type: string + example: plainText + description: Description of the new document store to be created loader: type: object + nullable: true properties: name: type: string @@ -2370,6 +2422,7 @@ components: description: Configuration for the loader splitter: type: object + nullable: true properties: name: type: string @@ -2380,6 +2433,7 @@ components: description: Configuration for the text splitter embedding: type: object + nullable: true properties: name: type: string @@ -2390,6 +2444,7 @@ components: description: Configuration for the embedding generator vectorStore: type: object + nullable: true properties: name: type: string @@ -2400,6 +2455,7 @@ components: description: Configuration for the vector store recordManager: type: object + nullable: true properties: name: type: string diff --git a/packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts b/packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts index 04d0efbc..a2707d13 100644 --- a/packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts +++ b/packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts @@ -266,7 +266,7 @@ class FireCrawl_DocumentLoaders implements INode { this.name = 'fireCrawl' this.type = 'Document' this.icon = 'firecrawl.png' - this.version = 2.0 + this.version = 2.1 this.category = 'Document Loaders' this.description = 'Load data from URL using FireCrawl' this.baseClasses = [this.type] @@ -307,6 +307,42 @@ class FireCrawl_DocumentLoaders implements INode { } ], default: 'crawl' + }, + { + // maxCrawlPages + label: 'Max Crawl Pages', + name: 'maxCrawlPages', + type: 'string', + description: 'Maximum number of pages to crawl', + optional: true, + additionalParams: true + }, + { + // generateImgAltText + label: 'Generate Image Alt Text', + name: 'generateImgAltText', + type: 'boolean', + description: 'Generate alt text for images', + optional: true, + additionalParams: true + }, + { + // returnOnlyUrls + label: 'Return Only URLs', + name: 'returnOnlyUrls', + type: 'boolean', + description: 'Return only URLs of the crawled pages', + optional: true, + additionalParams: true + }, + { + // onlyMainContent + label: 'Only Main Content', + name: 'onlyMainContent', + type: 'boolean', + description: 'Extract only the main content of the page', + optional: true, + additionalParams: true } // ... (other input parameters) ] diff --git a/packages/server/src/Interface.DocumentStore.ts b/packages/server/src/Interface.DocumentStore.ts index a2c158af..e882e005 100644 --- a/packages/server/src/Interface.DocumentStore.ts +++ b/packages/server/src/Interface.DocumentStore.ts @@ -76,6 +76,8 @@ export interface IDocumentStoreUpsertData { docId: string metadata?: string | object replaceExisting?: boolean + createNewDocStore?: boolean + docStore?: IDocumentStore loader?: { name: string config: ICommonObject diff --git a/packages/server/src/services/documentstore/index.ts b/packages/server/src/services/documentstore/index.ts index a195dacb..1d893cc6 100644 --- a/packages/server/src/services/documentstore/index.ts +++ b/packages/server/src/services/documentstore/index.ts @@ -32,7 +32,8 @@ import { INodeData, MODE, IOverrideConfig, - IExecutePreviewLoader + IExecutePreviewLoader, + DocumentStoreDTO } from '../../Interface' import { DocumentStoreFileChunk } from '../../database/entities/DocumentStoreFileChunk' import { v4 as uuidv4 } from 'uuid' @@ -1464,6 +1465,7 @@ const upsertDocStore = async ( } } const replaceExisting = data.replaceExisting ?? false + const createNewDocStore = data.createNewDocStore ?? false const newLoader = typeof data.loader === 'string' ? JSON.parse(data.loader) : data.loader const newSplitter = typeof data.splitter === 'string' ? JSON.parse(data.splitter) : data.splitter const newVectorStore = typeof data.vectorStore === 'string' ? JSON.parse(data.vectorStore) : data.vectorStore @@ -1533,6 +1535,15 @@ const upsertDocStore = async ( recordManagerConfig = JSON.parse(entity.recordManagerConfig || '{}')?.config } + if (createNewDocStore) { + const docStoreBody = typeof data.docStore === 'string' ? JSON.parse(data.docStore) : data.docStore + const newDocumentStore = docStoreBody ?? { name: `Document Store ${Date.now().toString()}` } + const docStore = DocumentStoreDTO.toEntity(newDocumentStore) + const documentStore = appDataSource.getRepository(DocumentStore).create(docStore) + const dbResponse = await appDataSource.getRepository(DocumentStore).save(documentStore) + storeId = dbResponse.id + } + // Step 2: Replace with new values loaderName = newLoader?.name ? getComponentLabelFromName(newLoader?.name) : loaderName loaderId = newLoader?.name || loaderId @@ -1687,6 +1698,7 @@ const upsertDocStore = async ( isVectorStoreInsert: true }) res.docId = newDocId + if (createNewDocStore) res.storeId = storeId return res } catch (error) { diff --git a/packages/ui/src/views/docstore/DocStoreAPIDialog.jsx b/packages/ui/src/views/docstore/DocStoreAPIDialog.jsx index 0cf56496..8cf06e75 100644 --- a/packages/ui/src/views/docstore/DocStoreAPIDialog.jsx +++ b/packages/ui/src/views/docstore/DocStoreAPIDialog.jsx @@ -41,11 +41,13 @@ body_data = { "docId": "${dialogProps.loaderId}", "metadata": {}, # Add additional metadata to the document chunks "replaceExisting": True, # Replace existing document with the new upserted chunks + "createNewDocStore": False, # Create a new document store "splitter": json.dumps({"config":{"chunkSize":20000}}) # Override existing configuration # "loader": "", # "vectorStore": "", # "embedding": "", # "recordManager": "", + # "docStore": "" } headers = { @@ -71,11 +73,14 @@ formData.append("splitter", JSON.stringify({"config":{"chunkSize":20000}})); formData.append("metadata", "{}"); // Replace existing document with the new upserted chunks formData.append("replaceExisting", "true"); +// Create a new document store +formData.append("createNewDocStore", "false"); // Override existing configuration // formData.append("loader", ""); // formData.append("embedding", ""); // formData.append("vectorStore", ""); // formData.append("recordManager", ""); +// formData.append("docStore", ""); async function query(formData) { const response = await fetch( @@ -105,11 +110,13 @@ curl -X POST http://localhost:3000/api/v1/document-store/upsert/${dialogProps.st -F "splitter={"config":{"chunkSize":20000}}" \\ -F "metadata={}" \\ -F "replaceExisting=true" \\ + -F "createNewDocStore=false" \\ # Override existing configuration: # -F "loader=" \\ # -F "embedding=" \\ # -F "vectorStore=" \\ - # -F "recordManager=" + # -F "recordManager=" \\ + # -F "docStore=" \`\`\` ` } @@ -135,6 +142,7 @@ output = query({ "docId": "${dialogProps.loaderId}", "metadata": "{}", # Add additional metadata to the document chunks "replaceExisting": True, # Replace existing document with the new upserted chunks + "createNewDocStore": False, # Create a new document store # Override existing configuration "loader": { "config": { @@ -149,6 +157,7 @@ output = query({ # embedding: {}, # vectorStore: {}, # recordManager: {} + # docStore: {} }) print(output) \`\`\` @@ -174,6 +183,7 @@ query({ "docId": "${dialogProps.loaderId}, "metadata": "{}", // Add additional metadata to the document chunks "replaceExisting": true, // Replace existing document with the new upserted chunks + "createNewDocStore": false, // Create a new document store // Override existing configuration "loader": { "config": { @@ -188,6 +198,7 @@ query({ // embedding: {}, // vectorStore: {}, // recordManager: {} + // docStore: {} }).then((response) => { console.log(response); }); @@ -201,6 +212,7 @@ curl -X POST http://localhost:3000/api/v1/document-store/upsert/${dialogProps.st "docId": "${dialogProps.loaderId}", "metadata": "{}", "replaceExisting": true, + "createNewDocStore": false, "loader": { "config": { "text": "This is a new text" @@ -215,6 +227,7 @@ curl -X POST http://localhost:3000/api/v1/document-store/upsert/${dialogProps.st // "embedding": {}, // "vectorStore": {}, // "recordManager": {} + // "docStore": {} }' \`\`\`