mirror of
https://github.com/farcasclaudiu/Flowise.git
synced 2026-06-28 19:00:59 +03:00
Merge pull request #322 from FlowiseAI/bugfix/Weaviate-PDF
Bugfix/pdf loader add legacy option
This commit is contained in:
@@ -6,6 +6,8 @@
|
|||||||
|
|
||||||
FROM node:18-alpine
|
FROM node:18-alpine
|
||||||
RUN apk add --update libc6-compat python3 make g++
|
RUN apk add --update libc6-compat python3 make g++
|
||||||
|
# needed for pdfjs-dist
|
||||||
|
RUN apk add --no-cache build-base cairo-dev pango-dev
|
||||||
|
|
||||||
WORKDIR /usr/src/packages
|
WORKDIR /usr/src/packages
|
||||||
|
|
||||||
|
|||||||
@@ -4,6 +4,8 @@ USER root
|
|||||||
|
|
||||||
RUN apk add --no-cache git
|
RUN apk add --no-cache git
|
||||||
RUN apk add --no-cache python3 py3-pip make g++
|
RUN apk add --no-cache python3 py3-pip make g++
|
||||||
|
# needed for pdfjs-dist
|
||||||
|
RUN apk add --no-cache build-base cairo-dev pango-dev
|
||||||
|
|
||||||
# You can install a specific version like: flowise@1.0.0
|
# You can install a specific version like: flowise@1.0.0
|
||||||
RUN npm install -g flowise
|
RUN npm install -g flowise
|
||||||
|
|||||||
@@ -49,6 +49,13 @@ class Pdf_DocumentLoaders implements INode {
|
|||||||
],
|
],
|
||||||
default: 'perPage'
|
default: 'perPage'
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
label: 'Use Legacy Build',
|
||||||
|
name: 'legacyBuild',
|
||||||
|
type: 'boolean',
|
||||||
|
optional: true,
|
||||||
|
additionalParams: true
|
||||||
|
},
|
||||||
{
|
{
|
||||||
label: 'Metadata',
|
label: 'Metadata',
|
||||||
name: 'metadata',
|
name: 'metadata',
|
||||||
@@ -64,6 +71,7 @@ class Pdf_DocumentLoaders implements INode {
|
|||||||
const pdfFileBase64 = nodeData.inputs?.pdfFile as string
|
const pdfFileBase64 = nodeData.inputs?.pdfFile as string
|
||||||
const usage = nodeData.inputs?.usage as string
|
const usage = nodeData.inputs?.usage as string
|
||||||
const metadata = nodeData.inputs?.metadata
|
const metadata = nodeData.inputs?.metadata
|
||||||
|
const legacyBuild = nodeData.inputs?.legacyBuild as boolean
|
||||||
|
|
||||||
let alldocs = []
|
let alldocs = []
|
||||||
let files: string[] = []
|
let files: string[] = []
|
||||||
@@ -81,8 +89,9 @@ class Pdf_DocumentLoaders implements INode {
|
|||||||
if (usage === 'perFile') {
|
if (usage === 'perFile') {
|
||||||
const loader = new PDFLoader(new Blob([bf]), {
|
const loader = new PDFLoader(new Blob([bf]), {
|
||||||
splitPages: false,
|
splitPages: false,
|
||||||
// @ts-ignore
|
pdfjs: () =>
|
||||||
pdfjs: () => import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js')
|
// @ts-ignore
|
||||||
|
legacyBuild ? import('pdfjs-dist/legacy/build/pdf.js') : import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js')
|
||||||
})
|
})
|
||||||
if (textSplitter) {
|
if (textSplitter) {
|
||||||
const docs = await loader.loadAndSplit(textSplitter)
|
const docs = await loader.loadAndSplit(textSplitter)
|
||||||
@@ -92,8 +101,11 @@ class Pdf_DocumentLoaders implements INode {
|
|||||||
alldocs.push(...docs)
|
alldocs.push(...docs)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// @ts-ignore
|
const loader = new PDFLoader(new Blob([bf]), {
|
||||||
const loader = new PDFLoader(new Blob([bf]), { pdfjs: () => import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js') })
|
pdfjs: () =>
|
||||||
|
// @ts-ignore
|
||||||
|
legacyBuild ? import('pdfjs-dist/legacy/build/pdf.js') : import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js')
|
||||||
|
})
|
||||||
if (textSplitter) {
|
if (textSplitter) {
|
||||||
const docs = await loader.loadAndSplit(textSplitter)
|
const docs = await loader.loadAndSplit(textSplitter)
|
||||||
alldocs.push(...docs)
|
alldocs.push(...docs)
|
||||||
|
|||||||
@@ -39,9 +39,10 @@
|
|||||||
"moment": "^2.29.3",
|
"moment": "^2.29.3",
|
||||||
"node-fetch": "^2.6.11",
|
"node-fetch": "^2.6.11",
|
||||||
"pdf-parse": "^1.1.1",
|
"pdf-parse": "^1.1.1",
|
||||||
|
"pdfjs-dist": "^3.7.107",
|
||||||
"playwright": "^1.35.0",
|
"playwright": "^1.35.0",
|
||||||
"srt-parser-2": "^1.2.3",
|
|
||||||
"puppeteer": "^20.7.1",
|
"puppeteer": "^20.7.1",
|
||||||
|
"srt-parser-2": "^1.2.3",
|
||||||
"weaviate-ts-client": "^1.1.0",
|
"weaviate-ts-client": "^1.1.0",
|
||||||
"ws": "^8.9.0"
|
"ws": "^8.9.0"
|
||||||
},
|
},
|
||||||
|
|||||||
Reference in New Issue
Block a user