move AI flyer processing to background BullMQ jobs using redis for storage

2025-12-02 17:15:10 -08:00
parent 12f4a1be64
commit d028511d38
3 changed files with 35 additions and 21 deletions
--- a/src/services/queueService.server.ts
+++ b/src/services/queueService.server.ts
@@ -3,7 +3,9 @@ import { Queue, Worker, Job } from 'bullmq';
 import IORedis from 'ioredis'; // Correctly imported
 import path from 'path';
 import fs from 'fs/promises';
-import Poppler from 'pdf-poppler';
+// Use pdfjs-dist for PDF parsing and sharp for image processing.
+import * as pdfjs from 'pdfjs-dist/legacy/build/pdf.mjs';
+import sharp from 'sharp';

 import { logger } from './logger.server';
 import * as aiService from './aiService.server';
@@ -92,19 +94,39 @@ export const flyerWorker = new Worker<FlyerJobData>(
      const fileExt = path.extname(filePath).toLowerCase();

      if (fileExt === '.pdf') {
-        const poppler = new Poppler();
-        const outputDir = path.dirname(filePath);
-        const outputFilePrefix = path.basename(filePath, '.pdf');
-        await poppler.pdfToCairo(filePath, path.join(outputDir, outputFilePrefix), {
-          jpegFile: true,
-          resolution: 150,
-        });
+        await job.updateProgress({ message: 'Converting PDF to images...' });
+        
+        // Load the PDF document using pdfjs-dist
+        const data = new Uint8Array(await fs.readFile(filePath));
+        const pdfDocument = await pdfjs.getDocument({ data }).promise;

-        const files = await fs.readdir(outputDir);
-        const generatedImages = files.filter(f => f.startsWith(outputFilePrefix) && f.endsWith('.jpg'));
-        generatedImages.sort(); // Ensure pages are in order
-        for (const img of generatedImages) {
-            imagePaths.push({ path: path.join(outputDir, img), mimetype: 'image/jpeg' });
+        const outputDir = path.dirname(filePath);
+
+        for (let i = 1; i <= pdfDocument.numPages; i++) {
+          const page = await pdfDocument.getPage(i);
+          const viewport = page.getViewport({ scale: 1.5 }); // ~150 DPI
+
+          // Create a fake canvas and context to render the PDF into raw pixel data.
+          // This is a common pattern for using pdf.js on the server without a real canvas.
+          const canvas = {
+            width: viewport.width,
+            height: viewport.height,
+            getContext: () => context,
+          };
+          const context = {
+            canvas: canvas, // The context needs a back-reference to its canvas.
+            getImageData: () => ({ data: new Uint8ClampedArray(viewport.width * viewport.height * 4) }),
+          };
+
+          const renderContext = { canvasContext: context as any, viewport, canvas: canvas as any };
+          const renderTask = page.render(renderContext);
+          await renderTask.promise;
+
+          const rawPixelData = context.getImageData().data;
+          const imageFileName = `${path.basename(filePath, '.pdf')}_page_${i}.jpeg`;
+          const imageOutputPath = path.join(outputDir, imageFileName);
+          await sharp(rawPixelData, { raw: { width: viewport.width, height: viewport.height, channels: 4 } }).jpeg().toFile(imageOutputPath);
+          imagePaths.push({ path: imageOutputPath, mimetype: 'image/jpeg' });
        }
        logger.info(`[Worker] Converted PDF to ${imagePaths.length} images.`);
      } else {