pdf upload fix
All checks were successful
Deploy to Web Server flyer-crawler.projectium.com / deploy (push) Successful in 21s
All checks were successful
Deploy to Web Server flyer-crawler.projectium.com / deploy (push) Successful in 21s
This commit is contained in:
@@ -1,13 +1,58 @@
|
||||
// This assumes pdfjsLib is available on the window, loaded from index.html
|
||||
declare const pdfjsLib: any;
|
||||
/**
|
||||
* Renders a single PDF page to a canvas and returns it as a JPEG File object.
|
||||
* @param pdfPage The PDF page object from pdf.js.
|
||||
* @param pageNumber The page number (1-based).
|
||||
* @param originalFileName The name of the original PDF file.
|
||||
* @param scale The scale at which to render the page.
|
||||
* @returns A promise that resolves to an image File object.
|
||||
*/
|
||||
const renderPageToImageFile = async (
|
||||
pdfPage: any, // pdf.js page object
|
||||
pageNumber: number,
|
||||
originalFileName: string,
|
||||
scale: number
|
||||
): Promise<File> => {
|
||||
const viewport = pdfPage.getViewport({ scale });
|
||||
|
||||
// Add a global declaration for `window.pdfjsWorker` to inform TypeScript
|
||||
// that this property is available, as it's loaded via a script in index.html.
|
||||
declare global {
|
||||
interface Window {
|
||||
pdfjsWorker: string;
|
||||
const canvas = document.createElement('canvas');
|
||||
const context = canvas.getContext('2d');
|
||||
canvas.height = viewport.height;
|
||||
canvas.width = viewport.width;
|
||||
|
||||
if (!context) {
|
||||
throw new Error('Could not get canvas context');
|
||||
}
|
||||
}
|
||||
|
||||
await pdfPage.render({ canvasContext: context, viewport: viewport }).promise;
|
||||
|
||||
// Promisify canvas.toBlob for async/await usage
|
||||
const blob = await new Promise<Blob | null>((resolve) => {
|
||||
canvas.toBlob(resolve, 'image/jpeg', 0.9);
|
||||
});
|
||||
|
||||
if (!blob) {
|
||||
throw new Error(`Failed to convert page ${pageNumber} of PDF to blob.`);
|
||||
}
|
||||
|
||||
const newFileName = originalFileName.replace(/\.pdf$/i, `_page_${pageNumber}.jpeg`);
|
||||
return new File([blob], newFileName, { type: 'image/jpeg' });
|
||||
};
|
||||
|
||||
/**
|
||||
* Fetches a PDF document from a File object.
|
||||
* @param pdfFile The PDF file.
|
||||
* @returns A promise that resolves to the pdf.js document object.
|
||||
*/
|
||||
const getPdfDocument = async (pdfFile: File) => {
|
||||
// @ts-ignore - pdfjsLib is globally available from the script tag in index.html
|
||||
if (typeof pdfjsLib === 'undefined') {
|
||||
throw new Error('pdf.js library is not loaded. Please check the script tag in index.html.');
|
||||
}
|
||||
const arrayBuffer = await pdfFile.arrayBuffer();
|
||||
// @ts-ignore - pdfjsLib is globally available
|
||||
const pdf = await pdfjsLib.getDocument(arrayBuffer).promise;
|
||||
return pdf;
|
||||
};
|
||||
|
||||
/**
|
||||
* Converts all pages of a PDF file into an array of image File objects.
|
||||
@@ -19,48 +64,30 @@ export const convertPdfToImageFiles = async (
|
||||
pdfFile: File,
|
||||
onProgress?: (currentPage: number, totalPages: number) => void
|
||||
): Promise<{ imageFiles: File[], pageCount: number }> => {
|
||||
if (typeof pdfjsLib === 'undefined' || !pdfjsLib.GlobalWorkerOptions.workerSrc) {
|
||||
// Set workerSrc if not already set by the script in index.html
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = window.pdfjsWorker;
|
||||
}
|
||||
const pdf = await getPdfDocument(pdfFile);
|
||||
const pageCount = pdf.numPages;
|
||||
const imageFiles: File[] = [];
|
||||
const scale = 1.5;
|
||||
|
||||
// Create an array of promises, one for each page rendering task.
|
||||
const pagePromises = Array.from({ length: pageCount }, async (_, i) => {
|
||||
const pageNumber = i + 1;
|
||||
const page = await pdf.getPage(pageNumber);
|
||||
const imageFile = await renderPageToImageFile(page, pageNumber, pdfFile.name, scale);
|
||||
|
||||
const arrayBuffer = await pdfFile.arrayBuffer();
|
||||
const pdf = await pdfjsLib.getDocument(arrayBuffer).promise;
|
||||
const pageCount = pdf.numPages;
|
||||
const imageFiles: File[] = [];
|
||||
|
||||
for (let i = 1; i <= pageCount; i++) {
|
||||
onProgress?.(i, pageCount);
|
||||
const page = await pdf.getPage(i);
|
||||
const viewport = page.getViewport({ scale: 1.5 });
|
||||
|
||||
const canvas = document.createElement('canvas');
|
||||
const context = canvas.getContext('2d');
|
||||
canvas.height = viewport.height;
|
||||
canvas.width = viewport.width;
|
||||
|
||||
if (!context) {
|
||||
throw new Error('Could not get canvas context');
|
||||
}
|
||||
|
||||
await page.render({ canvasContext: context, viewport: viewport }).promise;
|
||||
|
||||
const blob = await new Promise<Blob | null>((resolve) => {
|
||||
canvas.toBlob(resolve, 'image/jpeg', 0.9);
|
||||
});
|
||||
|
||||
if (blob) {
|
||||
const newFileName = pdfFile.name.replace(/\.pdf$/i, `_page_${i}.jpeg`);
|
||||
const imageFile = new File([blob], newFileName, { type: 'image/jpeg' });
|
||||
imageFiles.push(imageFile);
|
||||
} else {
|
||||
console.warn(`Failed to convert page ${i} of PDF to blob.`);
|
||||
}
|
||||
}
|
||||
// Report progress as each page finishes.
|
||||
onProgress?.(pageNumber, pageCount);
|
||||
|
||||
if (imageFiles.length === 0 && pageCount > 0) {
|
||||
throw new Error('PDF conversion resulted in zero images, though the PDF has pages. It might be corrupted or contain non-standard content.');
|
||||
}
|
||||
return imageFile;
|
||||
});
|
||||
|
||||
return { imageFiles, pageCount };
|
||||
// Process all pages in parallel and collect the results.
|
||||
const results = await Promise.all(pagePromises);
|
||||
imageFiles.push(...results);
|
||||
|
||||
if (imageFiles.length === 0 && pageCount > 0) {
|
||||
throw new Error('PDF conversion resulted in zero images, though the PDF has pages. It might be corrupted or contain non-standard content.');
|
||||
}
|
||||
|
||||
return { imageFiles, pageCount };
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user