Bugfix/Update file loader (#4420)

* update file loader

* delete temp for o1

* fix tavily
This commit is contained in:
Henry Heng
2025-05-14 10:30:33 +08:00
committed by GitHub
parent eadf1b11b3
commit 82d16458e4
6 changed files with 119 additions and 28 deletions
@@ -237,7 +237,7 @@ class AzureChatOpenAI_ChatModels implements INode {
console.error('Error parsing base options', exception) console.error('Error parsing base options', exception)
} }
} }
if (modelName === 'o3-mini') { if (modelName === 'o3-mini' || modelName.includes('o1')) {
delete obj.temperature delete obj.temperature
} }
if ((modelName.includes('o1') || modelName.includes('o3')) && reasoningEffort) { if ((modelName.includes('o1') || modelName.includes('o3')) && reasoningEffort) {
@@ -243,7 +243,7 @@ class ChatOpenAI_ChatModels implements INode {
streaming: streaming ?? true streaming: streaming ?? true
} }
if (modelName.includes('o3')) { if (modelName.includes('o3') || modelName.includes('o1')) {
delete obj.temperature delete obj.temperature
} }
if ((modelName.includes('o1') || modelName.includes('o3')) && reasoningEffort) { if ((modelName.includes('o1') || modelName.includes('o3')) && reasoningEffort) {
@@ -47,7 +47,7 @@ class File_DocumentLoaders implements INode {
}, },
{ {
label: 'Pdf Usage', label: 'Pdf Usage',
name: 'pdfUsage', name: 'usage',
type: 'options', type: 'options',
description: 'Only when loading PDF files', description: 'Only when loading PDF files',
options: [ options: [
@@ -64,6 +64,14 @@ class File_DocumentLoaders implements INode {
optional: true, optional: true,
additionalParams: true additionalParams: true
}, },
{
label: 'Use Legacy Build',
name: 'legacyBuild',
type: 'boolean',
description: 'Use legacy build for PDF compatibility issues',
optional: true,
additionalParams: true
},
{ {
label: 'JSONL Pointer Extraction', label: 'JSONL Pointer Extraction',
name: 'pointerName', name: 'pointerName',
@@ -113,7 +121,8 @@ class File_DocumentLoaders implements INode {
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const fileBase64 = nodeData.inputs?.file as string const fileBase64 = nodeData.inputs?.file as string
const metadata = nodeData.inputs?.metadata const metadata = nodeData.inputs?.metadata
const pdfUsage = nodeData.inputs?.pdfUsage const pdfUsage = nodeData.inputs?.pdfUsage || nodeData.inputs?.usage
const legacyBuild = nodeData.inputs?.legacyBuild as boolean
const pointerName = nodeData.inputs?.pointerName as string const pointerName = nodeData.inputs?.pointerName as string
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
const output = nodeData.outputs?.output as string const output = nodeData.outputs?.output as string
@@ -173,10 +182,21 @@ class File_DocumentLoaders implements INode {
const match = file.match(/^data:([A-Za-z-+\/]+);base64,/) const match = file.match(/^data:([A-Za-z-+\/]+);base64,/)
if (!match) { if (!match) {
// Fallback: check if there's a filename pattern at the end
const filenameMatch = file.match(/,filename:(.+\.\w+)$/)
if (filenameMatch && filenameMatch[1]) {
const filename = filenameMatch[1]
const fileExt = filename.split('.').pop() || ''
fileBlobs.push({
blob,
ext: fileExt
})
} else {
fileBlobs.push({ fileBlobs.push({
blob, blob,
ext: extension ext: extension
}) })
}
} else { } else {
const mimeType = match[1] const mimeType = match[1]
fileBlobs.push({ fileBlobs.push({
@@ -199,9 +219,18 @@ class File_DocumentLoaders implements INode {
pdf: (blob) => pdf: (blob) =>
pdfUsage === 'perFile' pdfUsage === 'perFile'
? // @ts-ignore ? // @ts-ignore
new PDFLoader(blob, { splitPages: false, pdfjs: () => import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js') }) new PDFLoader(blob, {
splitPages: false,
pdfjs: () =>
// @ts-ignore
legacyBuild ? import('pdfjs-dist/legacy/build/pdf.js') : import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js')
})
: // @ts-ignore : // @ts-ignore
new PDFLoader(blob, { pdfjs: () => import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js') }), new PDFLoader(blob, {
pdfjs: () =>
// @ts-ignore
legacyBuild ? import('pdfjs-dist/legacy/build/pdf.js') : import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js')
}),
'': (blob) => new TextLoader(blob) '': (blob) => new TextLoader(blob)
}) })
let docs = [] let docs = []
@@ -18,20 +18,12 @@ class TavilyAPI_Tools implements INode {
constructor() { constructor() {
this.label = 'Tavily API' this.label = 'Tavily API'
this.name = 'tavilyAPI' this.name = 'tavilyAPI'
this.version = 1.1 this.version = 1.2
this.type = 'TavilyAPI' this.type = 'TavilyAPI'
this.icon = 'tavily.svg' this.icon = 'tavily.svg'
this.category = 'Tools' this.category = 'Tools'
this.description = 'Wrapper around TavilyAPI - A specialized search engine designed for LLMs and AI agents' this.description = 'Wrapper around TavilyAPI - A specialized search engine designed for LLMs and AI agents'
this.inputs = [ this.inputs = [
{
label: 'Query',
name: 'query',
type: 'string',
optional: false,
description: 'The search query to execute with Tavily',
additionalParams: true
},
{ {
label: 'Topic', label: 'Topic',
name: 'topic', name: 'topic',
@@ -165,7 +157,6 @@ class TavilyAPI_Tools implements INode {
const credentialData = await getCredentialData(nodeData.credential ?? '', options) const credentialData = await getCredentialData(nodeData.credential ?? '', options)
const tavilyApiKey = getCredentialParam('tavilyApiKey', credentialData, nodeData) const tavilyApiKey = getCredentialParam('tavilyApiKey', credentialData, nodeData)
const query = nodeData.inputs?.query as string
const topic = nodeData.inputs?.topic as string const topic = nodeData.inputs?.topic as string
const searchDepth = nodeData.inputs?.searchDepth as string const searchDepth = nodeData.inputs?.searchDepth as string
const chunksPerSource = nodeData.inputs?.chunksPerSource as number const chunksPerSource = nodeData.inputs?.chunksPerSource as number
@@ -181,14 +172,13 @@ class TavilyAPI_Tools implements INode {
const config: any = { const config: any = {
apiKey: tavilyApiKey, apiKey: tavilyApiKey,
query,
topic, topic,
searchDepth, searchDepth,
maxResults, maxResults,
includeAnswer, includeAnswer: includeAnswer || undefined,
includeRawContent, includeRawContent: includeRawContent || undefined,
includeImages, includeImages: includeImages || undefined,
includeImageDescriptions includeImageDescriptions: includeImageDescriptions || undefined
} }
if (chunksPerSource) config.chunksPerSource = chunksPerSource if (chunksPerSource) config.chunksPerSource = chunksPerSource
@@ -46,6 +46,28 @@ export const createFileAttachment = async (req: Request) => {
throw new InternalFlowiseError(StatusCodes.NOT_FOUND, `Chatflow ${chatflowid} not found`) throw new InternalFlowiseError(StatusCodes.NOT_FOUND, `Chatflow ${chatflowid} not found`)
} }
// Parse chatbot configuration to get file upload settings
let pdfConfig = {
usage: 'perPage',
legacyBuild: false
}
if (chatflow.chatbotConfig) {
try {
const chatbotConfig = JSON.parse(chatflow.chatbotConfig)
if (chatbotConfig?.fullFileUpload?.pdfFile) {
if (chatbotConfig.fullFileUpload.pdfFile.usage) {
pdfConfig.usage = chatbotConfig.fullFileUpload.pdfFile.usage
}
if (chatbotConfig.fullFileUpload.pdfFile.legacyBuild !== undefined) {
pdfConfig.legacyBuild = chatbotConfig.fullFileUpload.pdfFile.legacyBuild
}
}
} catch (e) {
// Use default PDF config if parsing fails
}
}
// Find FileLoader node // Find FileLoader node
const fileLoaderComponent = appServer.nodesPool.componentNodes['fileLoader'] const fileLoaderComponent = appServer.nodesPool.componentNodes['fileLoader']
const fileLoaderNodeInstanceFilePath = fileLoaderComponent.filePath as string const fileLoaderNodeInstanceFilePath = fileLoaderComponent.filePath as string
@@ -93,6 +115,12 @@ export const createFileAttachment = async (req: Request) => {
outputs: { output: 'document' } outputs: { output: 'document' }
} }
// Apply PDF specific configuration if this is a PDF file
if (fileInputField === 'pdfFile') {
nodeData.inputs.usage = pdfConfig.usage
nodeData.inputs.legacyBuild = pdfConfig.legacyBuild as unknown as string
}
let content = '' let content = ''
if (isBase64) { if (isBase64) {
@@ -5,7 +5,7 @@ import { enqueueSnackbar as enqueueSnackbarAction, closeSnackbar as closeSnackba
import parser from 'html-react-parser' import parser from 'html-react-parser'
// material-ui // material-ui
import { Button, Box, Typography } from '@mui/material' import { Button, Box, Typography, FormControl, RadioGroup, FormControlLabel, Radio } from '@mui/material'
import { IconX, IconBulb } from '@tabler/icons-react' import { IconX, IconBulb } from '@tabler/icons-react'
// Project import // Project import
@@ -31,7 +31,9 @@ const availableFileTypes = [
{ name: 'PDF', ext: 'application/pdf' }, { name: 'PDF', ext: 'application/pdf' },
{ name: 'SQL', ext: 'application/sql' }, { name: 'SQL', ext: 'application/sql' },
{ name: 'Text File', ext: 'text/plain' }, { name: 'Text File', ext: 'text/plain' },
{ name: 'XML', ext: 'application/xml' } { name: 'XML', ext: 'application/xml' },
{ name: 'DOC', ext: 'application/msword' },
{ name: 'DOCX', ext: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' }
] ]
const FileUpload = ({ dialogProps }) => { const FileUpload = ({ dialogProps }) => {
@@ -45,6 +47,8 @@ const FileUpload = ({ dialogProps }) => {
const [fullFileUpload, setFullFileUpload] = useState(false) const [fullFileUpload, setFullFileUpload] = useState(false)
const [allowedFileTypes, setAllowedFileTypes] = useState([]) const [allowedFileTypes, setAllowedFileTypes] = useState([])
const [chatbotConfig, setChatbotConfig] = useState({}) const [chatbotConfig, setChatbotConfig] = useState({})
const [pdfUsage, setPdfUsage] = useState('perPage')
const [pdfLegacyBuild, setPdfLegacyBuild] = useState(false)
const handleChange = (value) => { const handleChange = (value) => {
setFullFileUpload(value) setFullFileUpload(value)
@@ -59,11 +63,23 @@ const FileUpload = ({ dialogProps }) => {
} }
} }
const handlePdfUsageChange = (event) => {
setPdfUsage(event.target.value)
}
const handleLegacyBuildChange = (value) => {
setPdfLegacyBuild(value)
}
const onSave = async () => { const onSave = async () => {
try { try {
const value = { const value = {
status: fullFileUpload, status: fullFileUpload,
allowedUploadFileTypes: allowedFileTypes.join(',') allowedUploadFileTypes: allowedFileTypes.join(','),
pdfFile: {
usage: pdfUsage,
legacyBuild: pdfLegacyBuild
}
} }
chatbotConfig.fullFileUpload = value chatbotConfig.fullFileUpload = value
@@ -120,6 +136,14 @@ const FileUpload = ({ dialogProps }) => {
const allowedFileTypes = chatbotConfig.fullFileUpload.allowedUploadFileTypes.split(',') const allowedFileTypes = chatbotConfig.fullFileUpload.allowedUploadFileTypes.split(',')
setAllowedFileTypes(allowedFileTypes) setAllowedFileTypes(allowedFileTypes)
} }
if (chatbotConfig.fullFileUpload?.pdfFile) {
if (chatbotConfig.fullFileUpload.pdfFile.usage) {
setPdfUsage(chatbotConfig.fullFileUpload.pdfFile.usage)
}
if (chatbotConfig.fullFileUpload.pdfFile.legacyBuild !== undefined) {
setPdfLegacyBuild(chatbotConfig.fullFileUpload.pdfFile.legacyBuild)
}
}
} catch (e) { } catch (e) {
setChatbotConfig({}) setChatbotConfig({})
} }
@@ -202,6 +226,26 @@ const FileUpload = ({ dialogProps }) => {
</div> </div>
))} ))}
</div> </div>
<Box sx={{ marginBottom: 3 }}>
<Typography sx={{ fontSize: 14, fontWeight: 500, marginBottom: 1 }}>PDF Usage</Typography>
<FormControl disabled={!fullFileUpload}>
<RadioGroup name='pdf-usage' value={pdfUsage} onChange={handlePdfUsageChange}>
<FormControlLabel value='perPage' control={<Radio />} label='One document per page' />
<FormControlLabel value='perFile' control={<Radio />} label='One document per file' />
</RadioGroup>
</FormControl>
</Box>
<Box sx={{ marginBottom: 3 }}>
<SwitchInput
label='Use Legacy Build (for PDF compatibility issues)'
onChange={handleLegacyBuildChange}
value={pdfLegacyBuild}
disabled={!fullFileUpload}
/>
</Box>
<StyledButton style={{ marginBottom: 10, marginTop: 20 }} variant='contained' onClick={onSave}> <StyledButton style={{ marginBottom: 10, marginTop: 20 }} variant='contained' onClick={onSave}>
Save Save
</StyledButton> </StyledButton>