diff --git a/packages/components/nodes/documentloaders/PlainText/PlainText.ts b/packages/components/nodes/documentloaders/PlainText/PlainText.ts new file mode 100644 index 00000000..261f2d98 --- /dev/null +++ b/packages/components/nodes/documentloaders/PlainText/PlainText.ts @@ -0,0 +1,88 @@ +import { INode, INodeData, INodeParams } from '../../../src/Interface' +import { TextSplitter } from 'langchain/text_splitter' +import { Document } from 'langchain/document' + +class PlainText_DocumentLoaders implements INode { + label: string + name: string + version: number + description: string + type: string + icon: string + category: string + baseClasses: string[] + inputs: INodeParams[] + + constructor() { + this.label = 'Plain Text' + this.name = 'plainText' + this.version = 1.0 + this.type = 'Document' + this.icon = 'plaintext.svg' + this.category = 'Document Loaders' + this.description = `Load data from plain text` + this.baseClasses = [this.type] + this.inputs = [ + { + label: 'Text', + name: 'text', + type: 'string', + rows: 4, + placeholder: + 'Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua...' + }, + { + label: 'Text Splitter', + name: 'textSplitter', + type: 'TextSplitter', + optional: true + }, + { + label: 'Metadata', + name: 'metadata', + type: 'json', + optional: true, + additionalParams: true + } + ] + } + + async init(nodeData: INodeData): Promise { + const textSplitter = nodeData.inputs?.textSplitter as TextSplitter + const text = nodeData.inputs?.text as string + const metadata = nodeData.inputs?.metadata + + let alldocs: Document>[] = [] + + if (textSplitter) { + const docs = await textSplitter.createDocuments([text]) + alldocs.push(...docs) + } else { + alldocs.push( + new Document({ + pageContent: text + }) + ) + } + + if (metadata) { + const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata) + let finaldocs: Document>[] = [] + for (const doc of alldocs) { + const newdoc = { + ...doc, + metadata: { + ...doc.metadata, + ...parsedMetadata + } + } + finaldocs.push(newdoc) + } + return finaldocs + } + + return alldocs + } +} + +module.exports = { nodeClass: PlainText_DocumentLoaders } diff --git a/packages/components/nodes/documentloaders/PlainText/plaintext.svg b/packages/components/nodes/documentloaders/PlainText/plaintext.svg new file mode 100644 index 00000000..b9fec035 --- /dev/null +++ b/packages/components/nodes/documentloaders/PlainText/plaintext.svg @@ -0,0 +1,7 @@ + + + + + + + \ No newline at end of file diff --git a/packages/components/nodes/retrievers/SimilarityThresholdRetriever/SimilarityThresholdRetriever.ts b/packages/components/nodes/retrievers/SimilarityThresholdRetriever/SimilarityThresholdRetriever.ts new file mode 100644 index 00000000..a9f4b3d8 --- /dev/null +++ b/packages/components/nodes/retrievers/SimilarityThresholdRetriever/SimilarityThresholdRetriever.ts @@ -0,0 +1,107 @@ +import { VectorStore } from 'langchain/vectorstores/base' +import { INode, INodeData, INodeParams, INodeOutputsValue } from '../../../src/Interface' +import { handleEscapeCharacters } from '../../../src' +import { ScoreThresholdRetriever } from 'langchain/retrievers/score_threshold' + +class SimilarityThresholdRetriever_Retrievers implements INode { + label: string + name: string + version: number + description: string + type: string + icon: string + category: string + baseClasses: string[] + inputs: INodeParams[] + outputs: INodeOutputsValue[] + + constructor() { + this.label = 'Similarity Score Threshold Retriever' + this.name = 'similarityThresholdRetriever' + this.version = 1.0 + this.type = 'SimilarityThresholdRetriever' + this.icon = 'similaritythreshold.svg' + this.category = 'Retrievers' + this.description = 'Return results based on the minimum similarity percentage' + this.baseClasses = [this.type, 'BaseRetriever'] + this.inputs = [ + { + label: 'Vector Store', + name: 'vectorStore', + type: 'VectorStore' + }, + { + label: 'Minimum Similarity Score (%)', + name: 'minSimilarityScore', + description: 'Finds results with at least this similarity score', + type: 'number', + default: 80, + step: 1 + }, + { + label: 'Max K', + name: 'maxK', + description: `The maximum number of results to fetch`, + type: 'number', + default: 20, + step: 1 + }, + { + label: 'K Increment', + name: 'kIncrement', + description: `How much to increase K by each time. It'll fetch N results, then N + kIncrement, then N + kIncrement * 2, etc.`, + type: 'number', + default: 2, + step: 1 + } + ] + this.outputs = [ + { + label: 'Similarity Threshold Retriever', + name: 'retriever', + baseClasses: this.baseClasses + }, + { + label: 'Document', + name: 'document', + baseClasses: ['Document'] + }, + { + label: 'Text', + name: 'text', + baseClasses: ['string', 'json'] + } + ] + } + + async init(nodeData: INodeData, input: string): Promise { + const vectorStore = nodeData.inputs?.vectorStore as VectorStore + const minSimilarityScore = nodeData.inputs?.minSimilarityScore as number + const maxK = nodeData.inputs?.maxK as string + const kIncrement = nodeData.inputs?.kIncrement as string + + const output = nodeData.outputs?.output as string + + const retriever = ScoreThresholdRetriever.fromVectorStore(vectorStore, { + minSimilarityScore: minSimilarityScore ? minSimilarityScore / 100 : 0.9, + maxK: maxK ? parseInt(maxK, 10) : 100, + kIncrement: kIncrement ? parseInt(kIncrement, 10) : 2 + }) + + if (output === 'retriever') return retriever + else if (output === 'document') return await retriever.getRelevantDocuments(input) + else if (output === 'text') { + let finaltext = '' + + const docs = await retriever.getRelevantDocuments(input) + + for (const doc of docs) finaltext += `${doc.pageContent}\n` + + return handleEscapeCharacters(finaltext, false) + } + + return retriever + } +} + +module.exports = { nodeClass: SimilarityThresholdRetriever_Retrievers } diff --git a/packages/components/nodes/retrievers/SimilarityThresholdRetriever/similaritythreshold.svg b/packages/components/nodes/retrievers/SimilarityThresholdRetriever/similaritythreshold.svg new file mode 100644 index 00000000..6b918fd8 --- /dev/null +++ b/packages/components/nodes/retrievers/SimilarityThresholdRetriever/similaritythreshold.svg @@ -0,0 +1,5 @@ + + + + + \ No newline at end of file diff --git a/packages/ui/src/views/chatmessage/ChatMessage.js b/packages/ui/src/views/chatmessage/ChatMessage.js index 836a29c3..7b186358 100644 --- a/packages/ui/src/views/chatmessage/ChatMessage.js +++ b/packages/ui/src/views/chatmessage/ChatMessage.js @@ -65,27 +65,10 @@ export const ChatMessage = ({ open, chatflowid, isDialog }) => { window.open(data, '_blank') } - const handleVectaraMetadata = (message) => { - if (message.sourceDocuments && message.sourceDocuments[0].metadata.length) - message.sourceDocuments = message.sourceDocuments.map((docs) => { - const newMetadata = docs.metadata.reduce((newMetadata, metadata) => { - newMetadata[metadata.name] = metadata.value - return newMetadata - }, {}) - return { - pageContent: docs.pageContent, - metadata: newMetadata - } - }) - return message - } - const removeDuplicateURL = (message) => { const visitedURLs = [] const newSourceDocuments = [] - message = handleVectaraMetadata(message) - message.sourceDocuments.forEach((source) => { if (isValidURL(source.metadata.source) && !visitedURLs.includes(source.metadata.source)) { visitedURLs.push(source.metadata.source) @@ -159,8 +142,6 @@ export const ChatMessage = ({ open, chatflowid, isDialog }) => { if (response.data) { let data = response.data - data = handleVectaraMetadata(data) - if (!chatId) { setChatId(data.chatId) localStorage.setItem(`${chatflowid}_INTERNAL`, data.chatId)