Files
flyer-crawler.projectium.com/src/services/flyerFileHandler.server.ts
Torben Sorensen 1af8be3f15
Some checks failed
Deploy to Test Environment / deploy-to-test (push) Failing after 38s
more fixings
2025-12-28 22:20:28 -08:00

205 lines
7.7 KiB
TypeScript

// src/services/flyerFileHandler.server.ts
import path from 'path';
import sharp from 'sharp';
import type { Dirent } from 'node:fs';
import type { Job } from 'bullmq';
import type { Logger } from 'pino';
import { ImageConversionError, PdfConversionError, UnsupportedFileTypeError } from './processingErrors';
import type { FlyerJobData } from '../types/job-data';
// Define the image formats supported by the AI model
const SUPPORTED_IMAGE_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.webp', '.heic', '.heif'];
// Define image formats that are not directly supported but can be converted to PNG.
const CONVERTIBLE_IMAGE_EXTENSIONS = ['.gif', '.tiff', '.svg', '.bmp'];
export interface IFileSystem {
readdir(path: string, options: { withFileTypes: true }): Promise<Dirent[]>;
unlink(path: string): Promise<void>;
}
export interface ICommandExecutor {
(command: string): Promise<{ stdout: string; stderr: string }>;
}
/**
* This class encapsulates the logic for handling different file types (PDF, images)
* and preparing them for AI processing.
*/
export class FlyerFileHandler {
constructor(
private fs: IFileSystem,
private exec: ICommandExecutor,
) {}
/**
* Executes the pdftocairo command to convert the PDF.
*/
private async _executePdfConversion(
filePath: string,
outputFilePrefix: string,
logger: Logger,
): Promise<{ stdout: string; stderr: string }> {
const command = `pdftocairo -jpeg -r 150 "${filePath}" "${outputFilePrefix}"`;
logger.info(`Executing PDF conversion command`);
logger.debug({ command });
try {
const { stdout, stderr } = await this.exec(command);
if (stdout) logger.debug({ stdout }, `[Worker] pdftocairo stdout for ${filePath}:`);
if (stderr) logger.warn({ stderr }, `[Worker] pdftocairo stderr for ${filePath}:`);
return { stdout, stderr };
} catch (error) {
const execError = error as Error & { stderr?: string };
const errorMessage = `The pdftocairo command failed for file: ${filePath}.`;
logger.error({ err: execError, stderr: execError.stderr }, errorMessage);
throw new PdfConversionError(errorMessage, execError.stderr);
}
}
/**
* Scans the output directory for generated JPEG images and returns their paths.
*/
private async _collectGeneratedImages(
outputDir: string,
outputFilePrefix: string,
logger: Logger,
): Promise<string[]> {
logger.debug(`[Worker] Reading contents of output directory: ${outputDir}`);
const filesInDir = await this.fs.readdir(outputDir, { withFileTypes: true });
logger.debug(`[Worker] Found ${filesInDir.length} total entries in output directory.`);
const generatedImages = filesInDir
.filter((f) => f.name.startsWith(path.basename(outputFilePrefix)) && f.name.endsWith('.jpg'))
.sort((a, b) => a.name.localeCompare(b.name, undefined, { numeric: true }));
logger.debug(
{ imageNames: generatedImages.map((f) => f.name) },
`Filtered down to ${generatedImages.length} generated JPGs.`,
);
return generatedImages.map((img) => path.join(outputDir, img.name));
}
/**
* Converts a PDF file to a series of JPEG images using an external tool.
*/
private async _convertPdfToImages(
filePath: string,
job: Job<FlyerJobData>,
logger: Logger,
): Promise<string[]> {
logger.info(`Starting PDF conversion for: ${filePath}`);
const outputDir = path.dirname(filePath);
const outputFilePrefix = path.join(outputDir, path.basename(filePath, '.pdf'));
logger.debug({ outputDir, outputFilePrefix }, `PDF output details`);
const { stderr } = await this._executePdfConversion(filePath, outputFilePrefix, logger);
const imagePaths = await this._collectGeneratedImages(outputDir, outputFilePrefix, logger);
if (imagePaths.length === 0) {
const errorMessage = `PDF conversion resulted in 0 images for file: ${filePath}. The PDF might be blank or corrupt.`;
logger.error({ stderr }, `PdfConversionError: ${errorMessage}`);
throw new PdfConversionError(errorMessage, stderr);
}
return imagePaths;
}
/**
* Converts an image file (e.g., GIF, TIFF) to a PNG format that the AI can process.
*/
private async _convertImageToPng(filePath: string, logger: Logger): Promise<string> {
const outputDir = path.dirname(filePath);
const originalFileName = path.parse(path.basename(filePath)).name;
const newFileName = `${originalFileName}-converted.png`;
const outputPath = path.join(outputDir, newFileName);
logger.info({ from: filePath, to: outputPath }, 'Converting unsupported image format to PNG.');
try {
await sharp(filePath).png().toFile(outputPath);
return outputPath;
} catch (error) {
logger.error({ err: error, filePath }, 'Failed to convert image to PNG using sharp.');
throw new ImageConversionError(`Image conversion to PNG failed for ${path.basename(filePath)}.`);
}
}
/**
* Handles PDF files by converting them to a series of JPEG images.
*/
private async _handlePdfInput(
filePath: string,
job: Job<FlyerJobData>,
logger: Logger,
): Promise<{ imagePaths: { path: string; mimetype: string }[]; createdImagePaths: string[] }> {
const createdImagePaths = await this._convertPdfToImages(filePath, job, logger);
const imagePaths = createdImagePaths.map((p) => ({ path: p, mimetype: 'image/jpeg' }));
logger.info(`Converted PDF to ${imagePaths.length} images.`);
return { imagePaths, createdImagePaths };
}
/**
* Handles image files that are directly supported by the AI.
*/
private async _handleSupportedImageInput(
filePath: string,
fileExt: string,
logger: Logger,
): Promise<{ imagePaths: { path: string; mimetype: string }[]; createdImagePaths: string[] }> {
logger.info(`Processing as a single image file: ${filePath}`);
const mimetype =
fileExt === '.jpg' || fileExt === '.jpeg' ? 'image/jpeg' : `image/${fileExt.slice(1)}`;
const imagePaths = [{ path: filePath, mimetype }];
return { imagePaths, createdImagePaths: [] };
}
/**
* Handles image files that need to be converted to PNG before AI processing.
*/
private async _handleConvertibleImageInput(
filePath: string,
logger: Logger,
): Promise<{ imagePaths: { path: string; mimetype: string }[]; createdImagePaths: string[] }> {
const createdPngPath = await this._convertImageToPng(filePath, logger);
const imagePaths = [{ path: createdPngPath, mimetype: 'image/png' }];
const createdImagePaths = [createdPngPath];
return { imagePaths, createdImagePaths };
}
/**
* Throws an error for unsupported file types.
*/
private _handleUnsupportedInput(
fileExt: string,
originalFileName: string,
logger: Logger,
): never {
const errorMessage = `Unsupported file type: ${fileExt}. Supported types are PDF, JPG, PNG, WEBP, HEIC, HEIF, GIF, TIFF, SVG, BMP.`;
logger.error({ originalFileName, fileExt }, errorMessage);
throw new UnsupportedFileTypeError(errorMessage);
}
/**
* Prepares the input images for the AI service. If the input is a PDF, it's converted to images.
*/
public async prepareImageInputs(
filePath: string,
job: Job<FlyerJobData>,
logger: Logger,
): Promise<{ imagePaths: { path: string; mimetype: string }[]; createdImagePaths: string[] }> {
const fileExt = path.extname(filePath).toLowerCase();
if (fileExt === '.pdf') {
return this._handlePdfInput(filePath, job, logger);
}
if (SUPPORTED_IMAGE_EXTENSIONS.includes(fileExt)) {
return this._handleSupportedImageInput(filePath, fileExt, logger);
}
if (CONVERTIBLE_IMAGE_EXTENSIONS.includes(fileExt)) {
return this._handleConvertibleImageInput(filePath, logger);
}
return this._handleUnsupportedInput(fileExt, job.data.originalFileName, logger);
}
}