Merge pull request #1080 from FlowiseAI/feature/RecursiveCharacterTextSplitter

Feature/Add custom separators
This commit is contained in:
Henry Heng
2023-10-18 01:16:18 +01:00
committed by GitHub
2 changed files with 26 additions and 7 deletions
@@ -23,12 +23,6 @@ class CharacterTextSplitter_TextSplitters implements INode {
this.description = `splits only on one type of character (defaults to "\\n\\n").` this.description = `splits only on one type of character (defaults to "\\n\\n").`
this.baseClasses = [this.type, ...getBaseClasses(CharacterTextSplitter)] this.baseClasses = [this.type, ...getBaseClasses(CharacterTextSplitter)]
this.inputs = [ this.inputs = [
{
label: 'Separator',
name: 'separator',
type: 'string',
optional: true
},
{ {
label: 'Chunk Size', label: 'Chunk Size',
name: 'chunkSize', name: 'chunkSize',
@@ -41,6 +35,14 @@ class CharacterTextSplitter_TextSplitters implements INode {
name: 'chunkOverlap', name: 'chunkOverlap',
type: 'number', type: 'number',
optional: true optional: true
},
{
label: 'Custom Separator',
name: 'separator',
type: 'string',
placeholder: `" "`,
description: 'Seperator to determine when to split the text, will override the default separator',
optional: true
} }
] ]
} }
@@ -16,7 +16,7 @@ class RecursiveCharacterTextSplitter_TextSplitters implements INode {
constructor() { constructor() {
this.label = 'Recursive Character Text Splitter' this.label = 'Recursive Character Text Splitter'
this.name = 'recursiveCharacterTextSplitter' this.name = 'recursiveCharacterTextSplitter'
this.version = 1.0 this.version = 2.0
this.type = 'RecursiveCharacterTextSplitter' this.type = 'RecursiveCharacterTextSplitter'
this.icon = 'textsplitter.svg' this.icon = 'textsplitter.svg'
this.category = 'Text Splitters' this.category = 'Text Splitters'
@@ -35,6 +35,15 @@ class RecursiveCharacterTextSplitter_TextSplitters implements INode {
name: 'chunkOverlap', name: 'chunkOverlap',
type: 'number', type: 'number',
optional: true optional: true
},
{
label: 'Custom Separators',
name: 'separators',
type: 'string',
rows: 4,
description: 'Array of custom seperators to determine when to split the text, will override the default separators',
placeholder: `["|", "##", ">", "-"]`,
optional: true
} }
] ]
} }
@@ -42,11 +51,19 @@ class RecursiveCharacterTextSplitter_TextSplitters implements INode {
async init(nodeData: INodeData): Promise<any> { async init(nodeData: INodeData): Promise<any> {
const chunkSize = nodeData.inputs?.chunkSize as string const chunkSize = nodeData.inputs?.chunkSize as string
const chunkOverlap = nodeData.inputs?.chunkOverlap as string const chunkOverlap = nodeData.inputs?.chunkOverlap as string
const separators = nodeData.inputs?.separators as string
const obj = {} as RecursiveCharacterTextSplitterParams const obj = {} as RecursiveCharacterTextSplitterParams
if (chunkSize) obj.chunkSize = parseInt(chunkSize, 10) if (chunkSize) obj.chunkSize = parseInt(chunkSize, 10)
if (chunkOverlap) obj.chunkOverlap = parseInt(chunkOverlap, 10) if (chunkOverlap) obj.chunkOverlap = parseInt(chunkOverlap, 10)
if (separators) {
try {
obj.separators = JSON.parse(separators)
} catch (e) {
throw new Error(e)
}
}
const splitter = new RecursiveCharacterTextSplitter(obj) const splitter = new RecursiveCharacterTextSplitter(obj)