Bugfix/Update file loader (#4420)

* update file loader

* delete temp for o1

* fix tavily
This commit is contained in:
Henry Heng
2025-05-14 10:30:33 +08:00
committed by GitHub
parent eadf1b11b3
commit 82d16458e4
6 changed files with 119 additions and 28 deletions
@@ -237,7 +237,7 @@ class AzureChatOpenAI_ChatModels implements INode {
console.error('Error parsing base options', exception)
}
}
if (modelName === 'o3-mini') {
if (modelName === 'o3-mini' || modelName.includes('o1')) {
delete obj.temperature
}
if ((modelName.includes('o1') || modelName.includes('o3')) && reasoningEffort) {
@@ -243,7 +243,7 @@ class ChatOpenAI_ChatModels implements INode {
streaming: streaming ?? true
}
if (modelName.includes('o3')) {
if (modelName.includes('o3') || modelName.includes('o1')) {
delete obj.temperature
}
if ((modelName.includes('o1') || modelName.includes('o3')) && reasoningEffort) {
@@ -47,7 +47,7 @@ class File_DocumentLoaders implements INode {
},
{
label: 'Pdf Usage',
name: 'pdfUsage',
name: 'usage',
type: 'options',
description: 'Only when loading PDF files',
options: [
@@ -64,6 +64,14 @@ class File_DocumentLoaders implements INode {
optional: true,
additionalParams: true
},
{
label: 'Use Legacy Build',
name: 'legacyBuild',
type: 'boolean',
description: 'Use legacy build for PDF compatibility issues',
optional: true,
additionalParams: true
},
{
label: 'JSONL Pointer Extraction',
name: 'pointerName',
@@ -113,7 +121,8 @@ class File_DocumentLoaders implements INode {
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const fileBase64 = nodeData.inputs?.file as string
const metadata = nodeData.inputs?.metadata
const pdfUsage = nodeData.inputs?.pdfUsage
const pdfUsage = nodeData.inputs?.pdfUsage || nodeData.inputs?.usage
const legacyBuild = nodeData.inputs?.legacyBuild as boolean
const pointerName = nodeData.inputs?.pointerName as string
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
const output = nodeData.outputs?.output as string
@@ -173,10 +182,21 @@ class File_DocumentLoaders implements INode {
const match = file.match(/^data:([A-Za-z-+\/]+);base64,/)
if (!match) {
fileBlobs.push({
blob,
ext: extension
})
// Fallback: check if there's a filename pattern at the end
const filenameMatch = file.match(/,filename:(.+\.\w+)$/)
if (filenameMatch && filenameMatch[1]) {
const filename = filenameMatch[1]
const fileExt = filename.split('.').pop() || ''
fileBlobs.push({
blob,
ext: fileExt
})
} else {
fileBlobs.push({
blob,
ext: extension
})
}
} else {
const mimeType = match[1]
fileBlobs.push({
@@ -199,9 +219,18 @@ class File_DocumentLoaders implements INode {
pdf: (blob) =>
pdfUsage === 'perFile'
? // @ts-ignore
new PDFLoader(blob, { splitPages: false, pdfjs: () => import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js') })
new PDFLoader(blob, {
splitPages: false,
pdfjs: () =>
// @ts-ignore
legacyBuild ? import('pdfjs-dist/legacy/build/pdf.js') : import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js')
})
: // @ts-ignore
new PDFLoader(blob, { pdfjs: () => import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js') }),
new PDFLoader(blob, {
pdfjs: () =>
// @ts-ignore
legacyBuild ? import('pdfjs-dist/legacy/build/pdf.js') : import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js')
}),
'': (blob) => new TextLoader(blob)
})
let docs = []
@@ -18,20 +18,12 @@ class TavilyAPI_Tools implements INode {
constructor() {
this.label = 'Tavily API'
this.name = 'tavilyAPI'
this.version = 1.1
this.version = 1.2
this.type = 'TavilyAPI'
this.icon = 'tavily.svg'
this.category = 'Tools'
this.description = 'Wrapper around TavilyAPI - A specialized search engine designed for LLMs and AI agents'
this.inputs = [
{
label: 'Query',
name: 'query',
type: 'string',
optional: false,
description: 'The search query to execute with Tavily',
additionalParams: true
},
{
label: 'Topic',
name: 'topic',
@@ -165,7 +157,6 @@ class TavilyAPI_Tools implements INode {
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
const tavilyApiKey = getCredentialParam('tavilyApiKey', credentialData, nodeData)
const query = nodeData.inputs?.query as string
const topic = nodeData.inputs?.topic as string
const searchDepth = nodeData.inputs?.searchDepth as string
const chunksPerSource = nodeData.inputs?.chunksPerSource as number
@@ -181,14 +172,13 @@ class TavilyAPI_Tools implements INode {
const config: any = {
apiKey: tavilyApiKey,
query,
topic,
searchDepth,
maxResults,
includeAnswer,
includeRawContent,
includeImages,
includeImageDescriptions
includeAnswer: includeAnswer || undefined,
includeRawContent: includeRawContent || undefined,
includeImages: includeImages || undefined,
includeImageDescriptions: includeImageDescriptions || undefined
}
if (chunksPerSource) config.chunksPerSource = chunksPerSource
@@ -46,6 +46,28 @@ export const createFileAttachment = async (req: Request) => {
throw new InternalFlowiseError(StatusCodes.NOT_FOUND, `Chatflow ${chatflowid} not found`)
}
// Parse chatbot configuration to get file upload settings
let pdfConfig = {
usage: 'perPage',
legacyBuild: false
}
if (chatflow.chatbotConfig) {
try {
const chatbotConfig = JSON.parse(chatflow.chatbotConfig)
if (chatbotConfig?.fullFileUpload?.pdfFile) {
if (chatbotConfig.fullFileUpload.pdfFile.usage) {
pdfConfig.usage = chatbotConfig.fullFileUpload.pdfFile.usage
}
if (chatbotConfig.fullFileUpload.pdfFile.legacyBuild !== undefined) {
pdfConfig.legacyBuild = chatbotConfig.fullFileUpload.pdfFile.legacyBuild
}
}
} catch (e) {
// Use default PDF config if parsing fails
}
}
// Find FileLoader node
const fileLoaderComponent = appServer.nodesPool.componentNodes['fileLoader']
const fileLoaderNodeInstanceFilePath = fileLoaderComponent.filePath as string
@@ -93,6 +115,12 @@ export const createFileAttachment = async (req: Request) => {
outputs: { output: 'document' }
}
// Apply PDF specific configuration if this is a PDF file
if (fileInputField === 'pdfFile') {
nodeData.inputs.usage = pdfConfig.usage
nodeData.inputs.legacyBuild = pdfConfig.legacyBuild as unknown as string
}
let content = ''
if (isBase64) {
@@ -5,7 +5,7 @@ import { enqueueSnackbar as enqueueSnackbarAction, closeSnackbar as closeSnackba
import parser from 'html-react-parser'
// material-ui
import { Button, Box, Typography } from '@mui/material'
import { Button, Box, Typography, FormControl, RadioGroup, FormControlLabel, Radio } from '@mui/material'
import { IconX, IconBulb } from '@tabler/icons-react'
// Project import
@@ -31,7 +31,9 @@ const availableFileTypes = [
{ name: 'PDF', ext: 'application/pdf' },
{ name: 'SQL', ext: 'application/sql' },
{ name: 'Text File', ext: 'text/plain' },
{ name: 'XML', ext: 'application/xml' }
{ name: 'XML', ext: 'application/xml' },
{ name: 'DOC', ext: 'application/msword' },
{ name: 'DOCX', ext: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' }
]
const FileUpload = ({ dialogProps }) => {
@@ -45,6 +47,8 @@ const FileUpload = ({ dialogProps }) => {
const [fullFileUpload, setFullFileUpload] = useState(false)
const [allowedFileTypes, setAllowedFileTypes] = useState([])
const [chatbotConfig, setChatbotConfig] = useState({})
const [pdfUsage, setPdfUsage] = useState('perPage')
const [pdfLegacyBuild, setPdfLegacyBuild] = useState(false)
const handleChange = (value) => {
setFullFileUpload(value)
@@ -59,11 +63,23 @@ const FileUpload = ({ dialogProps }) => {
}
}
const handlePdfUsageChange = (event) => {
setPdfUsage(event.target.value)
}
const handleLegacyBuildChange = (value) => {
setPdfLegacyBuild(value)
}
const onSave = async () => {
try {
const value = {
status: fullFileUpload,
allowedUploadFileTypes: allowedFileTypes.join(',')
allowedUploadFileTypes: allowedFileTypes.join(','),
pdfFile: {
usage: pdfUsage,
legacyBuild: pdfLegacyBuild
}
}
chatbotConfig.fullFileUpload = value
@@ -120,6 +136,14 @@ const FileUpload = ({ dialogProps }) => {
const allowedFileTypes = chatbotConfig.fullFileUpload.allowedUploadFileTypes.split(',')
setAllowedFileTypes(allowedFileTypes)
}
if (chatbotConfig.fullFileUpload?.pdfFile) {
if (chatbotConfig.fullFileUpload.pdfFile.usage) {
setPdfUsage(chatbotConfig.fullFileUpload.pdfFile.usage)
}
if (chatbotConfig.fullFileUpload.pdfFile.legacyBuild !== undefined) {
setPdfLegacyBuild(chatbotConfig.fullFileUpload.pdfFile.legacyBuild)
}
}
} catch (e) {
setChatbotConfig({})
}
@@ -202,6 +226,26 @@ const FileUpload = ({ dialogProps }) => {
</div>
))}
</div>
<Box sx={{ marginBottom: 3 }}>
<Typography sx={{ fontSize: 14, fontWeight: 500, marginBottom: 1 }}>PDF Usage</Typography>
<FormControl disabled={!fullFileUpload}>
<RadioGroup name='pdf-usage' value={pdfUsage} onChange={handlePdfUsageChange}>
<FormControlLabel value='perPage' control={<Radio />} label='One document per page' />
<FormControlLabel value='perFile' control={<Radio />} label='One document per file' />
</RadioGroup>
</FormControl>
</Box>
<Box sx={{ marginBottom: 3 }}>
<SwitchInput
label='Use Legacy Build (for PDF compatibility issues)'
onChange={handleLegacyBuildChange}
value={pdfLegacyBuild}
disabled={!fullFileUpload}
/>
</Box>
<StyledButton style={{ marginBottom: 10, marginTop: 20 }} variant='contained' onClick={onSave}>
Save
</StyledButton>