Files
flyer-crawler.projectium.com/src/services/flyerProcessingService.server.test.ts
Torben Sorensen 2489ec8d2d
All checks were successful
Deploy to Test Environment / deploy-to-test (push) Successful in 12m3s
fix unit tests
2025-12-29 17:57:40 -08:00

608 lines
25 KiB
TypeScript

// src/services/flyerProcessingService.server.test.ts
import { describe, it, expect, vi, beforeEach, type Mocked } from 'vitest';
import sharp from 'sharp';
import { Job, UnrecoverableError } from 'bullmq';
import type { Dirent } from 'node:fs';
import type { Logger } from 'pino';
import { z } from 'zod';
import { AiFlyerDataSchema } from './flyerAiProcessor.server';
import type { Flyer, FlyerInsert, FlyerItemInsert } from '../types';
import type { CleanupJobData, FlyerJobData } from '../types/job-data';
// 1. Create hoisted mocks FIRST
const mocks = vi.hoisted(() => ({
unlink: vi.fn(),
readdir: vi.fn(),
execAsync: vi.fn(),
}));
// 2. Mock modules using the hoisted variables
vi.mock('util', () => ({ promisify: () => mocks.execAsync }));
vi.mock('node:fs/promises', async (importOriginal) => {
const actual = await importOriginal<typeof import('node:fs/promises')>();
return {
...actual,
default: actual, // Ensure default export exists
unlink: mocks.unlink,
readdir: mocks.readdir,
};
});
// Import service and dependencies (FlyerJobData already imported from types above)
import { FlyerProcessingService } from './flyerProcessingService.server';
import * as db from './db/index.db';
import { createFlyerAndItems } from './db/flyer.db';
import { createMockFlyer } from '../tests/utils/mockFactories';
import { FlyerDataTransformer } from './flyerDataTransformer';
import {
AiDataValidationError,
PdfConversionError,
UnsupportedFileTypeError,
} from './processingErrors';
import { FlyerFileHandler } from './flyerFileHandler.server';
import { FlyerAiProcessor } from './flyerAiProcessor.server';
import type { IFileSystem, ICommandExecutor } from './flyerFileHandler.server';
import type { AIService } from './aiService.server';
// Mock dependencies
vi.mock('./aiService.server', () => ({
aiService: {
extractCoreDataFromFlyerImage: vi.fn(),
},
}));
vi.mock('./db/flyer.db', () => ({
createFlyerAndItems: vi.fn(),
}));
vi.mock('./db/index.db', () => ({
personalizationRepo: { getAllMasterItems: vi.fn() },
adminRepo: { logActivity: vi.fn() },
}));
vi.mock('./logger.server', () => ({
logger: {
info: vi.fn(),
error: vi.fn(),
warn: vi.fn(),
debug: vi.fn(),
child: vi.fn().mockReturnThis(),
},
}));
vi.mock('./flyerFileHandler.server');
vi.mock('./flyerAiProcessor.server');
const mockedDb = db as Mocked<typeof db>;
describe('FlyerProcessingService', () => {
let service: FlyerProcessingService;
let mockFileHandler: Mocked<FlyerFileHandler>;
let mockAiProcessor: Mocked<FlyerAiProcessor>;
const mockCleanupQueue = {
add: vi.fn(),
};
beforeEach(() => {
vi.clearAllMocks();
// Spy on the real transformer's method and provide a mock implementation.
// This is more robust than mocking the entire class constructor.
vi.spyOn(FlyerDataTransformer.prototype, 'transform').mockResolvedValue({
flyerData: {
file_name: 'test.jpg',
image_url: 'test.jpg',
icon_url: 'icon.webp',
checksum: 'checksum-123',
store_name: 'Mock Store',
// Add required fields for FlyerInsert type
status: 'processed',
item_count: 0,
valid_from: '2024-01-01',
valid_to: '2024-01-07',
store_address: '123 Mock St',
} as FlyerInsert, // Cast is okay here as it's a mock value
itemsForDb: [],
});
// Default mock for readdir returns an empty array of Dirent-like objects.
mocks.readdir.mockResolvedValue([]);
// Mock the file system adapter that will be passed to the service
const mockFs: IFileSystem = {
readdir: mocks.readdir,
unlink: mocks.unlink,
};
mockFileHandler = new FlyerFileHandler(mockFs, vi.fn()) as Mocked<FlyerFileHandler>;
mockAiProcessor = new FlyerAiProcessor(
{} as AIService,
mockedDb.personalizationRepo,
) as Mocked<FlyerAiProcessor>;
// Instantiate the service with all its dependencies mocked
service = new FlyerProcessingService(
mockFileHandler,
mockAiProcessor,
mockedDb,
mockFs,
mockCleanupQueue,
new FlyerDataTransformer(),
);
// Provide default successful mock implementations for dependencies
mockAiProcessor.extractAndValidateData.mockResolvedValue({
data: {
store_name: 'Mock Store',
valid_from: '2024-01-01',
valid_to: '2024-01-07',
store_address: '123 Mock St',
items: [
{
item: 'Test Item',
price_display: '$1.99',
price_in_cents: 199,
quantity: 'each',
category_name: 'Test Category',
master_item_id: 1,
},
],
},
needsReview: false,
});
mockFileHandler.prepareImageInputs.mockResolvedValue({
imagePaths: [{ path: '/tmp/flyer.jpg', mimetype: 'image/jpeg' }],
createdImagePaths: [],
});
vi.mocked(createFlyerAndItems).mockResolvedValue({
flyer: createMockFlyer({
flyer_id: 1,
file_name: 'test.jpg',
image_url: 'test.jpg',
item_count: 1,
}),
items: [],
});
vi.mocked(mockedDb.adminRepo.logActivity).mockResolvedValue();
// FIX: Provide a default mock for getAllMasterItems to prevent a TypeError on `.length`.
vi.mocked(mockedDb.personalizationRepo.getAllMasterItems).mockResolvedValue([]);
});
const createMockJob = (data: Partial<FlyerJobData>): Job<FlyerJobData> => {
return {
id: 'job-1',
data: {
filePath: '/tmp/flyer.jpg',
originalFileName: 'flyer.jpg',
checksum: 'checksum-123',
...data,
},
updateProgress: vi.fn(),
opts: { attempts: 3 },
attemptsMade: 1,
} as unknown as Job<FlyerJobData>;
};
const createMockCleanupJob = (data: CleanupJobData): Job<CleanupJobData> => {
return {
id: `cleanup-job-${data.flyerId}`,
data,
opts: { attempts: 3 },
attemptsMade: 1,
updateProgress: vi.fn(),
} as unknown as Job<CleanupJobData>;
};
describe('processJob (Orchestrator)', () => {
it('should process an image file successfully and enqueue a cleanup job', async () => {
const job = createMockJob({ filePath: '/tmp/flyer.jpg', originalFileName: 'flyer.jpg' });
const result = await service.processJob(job);
expect(result).toEqual({ flyerId: 1 });
expect(mockFileHandler.prepareImageInputs).toHaveBeenCalledWith(job.data.filePath, job, expect.any(Object));
expect(mockAiProcessor.extractAndValidateData).toHaveBeenCalledTimes(1);
expect(createFlyerAndItems).toHaveBeenCalledTimes(1);
expect(mockedDb.adminRepo.logActivity).toHaveBeenCalledTimes(1);
expect(mockCleanupQueue.add).toHaveBeenCalledWith(
'cleanup-flyer-files',
{ flyerId: 1, paths: ['/tmp/flyer.jpg'] },
expect.any(Object),
);
});
it('should convert a PDF, process its images, and enqueue a cleanup job for all files', async () => {
const job = createMockJob({ filePath: '/tmp/flyer.pdf', originalFileName: 'flyer.pdf' });
// Mock the file handler to return multiple created paths
const createdPaths = ['/tmp/flyer-1.jpg', '/tmp/flyer-2.jpg'];
mockFileHandler.prepareImageInputs.mockResolvedValue({
imagePaths: createdPaths.map(p => ({ path: p, mimetype: 'image/jpeg' })),
createdImagePaths: createdPaths,
});
await service.processJob(job);
expect(mockFileHandler.prepareImageInputs).toHaveBeenCalledWith('/tmp/flyer.pdf', job, expect.any(Object));
expect(mockAiProcessor.extractAndValidateData).toHaveBeenCalledTimes(1);
expect(createFlyerAndItems).toHaveBeenCalledTimes(1);
// Verify cleanup job includes original PDF and both generated images
expect(mockCleanupQueue.add).toHaveBeenCalledWith(
'cleanup-flyer-files',
{
flyerId: 1,
paths: [
'/tmp/flyer.pdf',
'/tmp/flyer-1.jpg',
'/tmp/flyer-2.jpg',
],
},
expect.any(Object),
);
});
it('should throw an error and not enqueue cleanup if the AI service fails', async () => {
const job = createMockJob({});
const { logger } = await import('./logger.server');
const aiError = new Error('AI model exploded');
mockAiProcessor.extractAndValidateData.mockRejectedValue(aiError);
await expect(service.processJob(job)).rejects.toThrow('AI model exploded');
expect(job.updateProgress).toHaveBeenCalledWith({
errorCode: 'UNKNOWN_ERROR',
message: 'AI model exploded',
stages: [
{ name: 'Preparing Inputs', status: 'completed', critical: true, detail: '1 page(s) ready for AI.' },
{ name: 'Extracting Data with AI', status: 'failed', critical: true, detail: 'AI model exploded' },
{ name: 'Transforming AI Data', status: 'skipped', critical: true },
{ name: 'Saving to Database', status: 'skipped', critical: true },
],
}); // This was a duplicate, fixed.
expect(mockCleanupQueue.add).not.toHaveBeenCalled();
expect(logger.warn).toHaveBeenCalledWith(
'Job failed. Temporary files will NOT be cleaned up to allow for manual inspection.',
);
});
it('should throw UnrecoverableError for quota issues and not enqueue cleanup', async () => {
const job = createMockJob({});
// Simulate an AI error that contains a keyword for unrecoverable errors
const quotaError = new Error('AI model quota exceeded');
const { logger } = await import('./logger.server');
mockAiProcessor.extractAndValidateData.mockRejectedValue(quotaError);
await expect(service.processJob(job)).rejects.toThrow(UnrecoverableError);
expect(job.updateProgress).toHaveBeenCalledWith({
errorCode: 'QUOTA_EXCEEDED',
message: 'An AI quota has been exceeded. Please try again later.',
stages: [
{ name: 'Preparing Inputs', status: 'completed', critical: true, detail: '1 page(s) ready for AI.' },
{ name: 'Extracting Data with AI', status: 'failed', critical: true, detail: 'AI model quota exceeded' },
{ name: 'Transforming AI Data', status: 'skipped', critical: true },
{ name: 'Saving to Database', status: 'skipped', critical: true },
],
});
expect(mockCleanupQueue.add).not.toHaveBeenCalled();
expect(logger.warn).toHaveBeenCalledWith(
'Job failed. Temporary files will NOT be cleaned up to allow for manual inspection.',
);
});
it('should throw PdfConversionError and not enqueue cleanup if PDF conversion fails', async () => {
const job = createMockJob({ filePath: '/tmp/bad.pdf', originalFileName: 'bad.pdf' });
const { logger } = await import('./logger.server');
const conversionError = new PdfConversionError('Conversion failed', 'pdftocairo error');
mockFileHandler.prepareImageInputs.mockRejectedValue(conversionError);
await expect(service.processJob(job)).rejects.toThrow(conversionError);
// Use `toHaveBeenLastCalledWith` to check only the final error payload, ignoring earlier progress updates.
expect(job.updateProgress).toHaveBeenLastCalledWith({
errorCode: 'PDF_CONVERSION_FAILED',
message:
'The uploaded PDF could not be processed. It might be blank, corrupt, or password-protected.', // This was a duplicate, fixed.
stderr: 'pdftocairo error',
stages: [
{ name: 'Preparing Inputs', status: 'failed', critical: true, detail: 'The uploaded PDF could not be processed. It might be blank, corrupt, or password-protected.' },
{ name: 'Extracting Data with AI', status: 'skipped', critical: true },
{ name: 'Transforming AI Data', status: 'skipped', critical: true },
{ name: 'Saving to Database', status: 'skipped', critical: true },
],
});
expect(mockCleanupQueue.add).not.toHaveBeenCalled();
expect(logger.warn).toHaveBeenCalledWith(
'Job failed. Temporary files will NOT be cleaned up to allow for manual inspection.',
);
});
it('should throw AiDataValidationError and not enqueue cleanup if AI validation fails', async () => {
const { logger } = await import('./logger.server');
const job = createMockJob({});
const validationError = new AiDataValidationError('Validation failed', {}, {});
mockAiProcessor.extractAndValidateData.mockRejectedValue(validationError);
await expect(service.processJob(job)).rejects.toThrow(validationError);
// Verify the specific error handling logic in the catch block
expect(logger.error).toHaveBeenCalledWith(
{
err: validationError,
errorCode: 'AI_VALIDATION_FAILED',
message: "The AI couldn't read the flyer's format. Please try a clearer image or a different flyer.",
validationErrors: {},
rawData: {},
stages: expect.any(Array), // Stages will be dynamically generated
},
'A known processing error occurred: AiDataValidationError',
);
// Use `toHaveBeenLastCalledWith` to check only the final error payload.
// FIX: The payload from AiDataValidationError includes validationErrors and rawData.
expect(job.updateProgress).toHaveBeenLastCalledWith({
errorCode: 'AI_VALIDATION_FAILED',
message:
"The AI couldn't read the flyer's format. Please try a clearer image or a different flyer.", // This was a duplicate, fixed.
validationErrors: {},
rawData: {},
stages: [
{ name: 'Preparing Inputs', status: 'completed', critical: true, detail: '1 page(s) ready for AI.' },
{ name: 'Extracting Data with AI', status: 'failed', critical: true, detail: "The AI couldn't read the flyer's format. Please try a clearer image or a different flyer." },
{ name: 'Transforming AI Data', status: 'skipped', critical: true },
{ name: 'Saving to Database', status: 'skipped', critical: true },
],
});
expect(mockCleanupQueue.add).not.toHaveBeenCalled();
expect(logger.warn).toHaveBeenCalledWith(
'Job failed. Temporary files will NOT be cleaned up to allow for manual inspection.',
);
});
it('should handle convertible image types and include original and converted files in cleanup', async () => {
const job = createMockJob({ filePath: '/tmp/flyer.gif', originalFileName: 'flyer.gif' });
const convertedPath = '/tmp/flyer-converted.png';
// Mock the file handler to return the converted path
mockFileHandler.prepareImageInputs.mockResolvedValue({
imagePaths: [{ path: convertedPath, mimetype: 'image/png' }],
createdImagePaths: [convertedPath],
});
await service.processJob(job);
expect(mockFileHandler.prepareImageInputs).toHaveBeenCalledWith('/tmp/flyer.gif', job, expect.any(Object));
expect(mockAiProcessor.extractAndValidateData).toHaveBeenCalledTimes(1);
expect(mockCleanupQueue.add).toHaveBeenCalledWith(
'cleanup-flyer-files',
{ flyerId: 1, paths: ['/tmp/flyer.gif', convertedPath] },
expect.any(Object),
);
});
it('should throw an error and not enqueue cleanup if the database service fails', async () => {
const job = createMockJob({});
const { logger } = await import('./logger.server');
const dbError = new Error('Database transaction failed');
vi.mocked(createFlyerAndItems).mockRejectedValue(dbError);
await expect(service.processJob(job)).rejects.toThrow('Database transaction failed');
expect(job.updateProgress).toHaveBeenCalledWith({
errorCode: 'UNKNOWN_ERROR',
message: 'Database transaction failed',
stages: [
{ name: 'Preparing Inputs', status: 'completed', critical: true, detail: '1 page(s) ready for AI.' },
{ name: 'Extracting Data with AI', status: 'completed', critical: true, detail: 'Communicating with AI model...' },
{ name: 'Transforming AI Data', status: 'completed', critical: true },
{ name: 'Saving to Database', status: 'failed', critical: true, detail: 'Database transaction failed' },
],
}); // This was a duplicate, fixed.
expect(mockCleanupQueue.add).not.toHaveBeenCalled();
expect(logger.warn).toHaveBeenCalledWith(
'Job failed. Temporary files will NOT be cleaned up to allow for manual inspection.',
);
});
it('should throw UnsupportedFileTypeError for an unsupported file type', async () => {
const job = createMockJob({
filePath: '/tmp/document.txt',
originalFileName: 'document.txt',
});
const fileTypeError = new UnsupportedFileTypeError('Unsupported file type: .txt. Supported types are PDF, JPG, PNG, WEBP, HEIC, HEIF, GIF, TIFF, SVG, BMP.');
mockFileHandler.prepareImageInputs.mockRejectedValue(fileTypeError);
const { logger } = await import('./logger.server');
const reportErrorSpy = vi.spyOn(service as any, '_reportErrorAndThrow');
await expect(service.processJob(job)).rejects.toThrow(UnsupportedFileTypeError);
expect(reportErrorSpy).toHaveBeenCalledWith(fileTypeError, job, expect.any(Object), expect.any(Array));
expect(mockCleanupQueue.add).not.toHaveBeenCalled();
expect(logger.warn).toHaveBeenCalledWith(
'Job failed. Temporary files will NOT be cleaned up to allow for manual inspection.',
);
});
it('should delegate to _reportErrorAndThrow if icon generation fails', async () => {
const job = createMockJob({});
const { logger } = await import('./logger.server');
const iconError = new Error('Icon generation failed.');
// The `transform` method calls `generateFlyerIcon`. In `beforeEach`, `transform` is mocked
// to always succeed. For this test, we override that mock to simulate a failure
// bubbling up from the icon generation step.
vi.spyOn(FlyerDataTransformer.prototype, 'transform').mockRejectedValue(iconError);
const reportErrorSpy = vi.spyOn(service as any, '_reportErrorAndThrow');
await expect(service.processJob(job)).rejects.toThrow('Icon generation failed.');
expect(reportErrorSpy).toHaveBeenCalledWith(iconError, job, expect.any(Object), expect.any(Array));
expect(mockCleanupQueue.add).not.toHaveBeenCalled();
expect(logger.warn).toHaveBeenCalledWith(
'Job failed. Temporary files will NOT be cleaned up to allow for manual inspection.',
);
});
});
describe('_reportErrorAndThrow (Error Reporting Logic)', () => {
it('should update progress with a generic error and re-throw', async () => {
const { logger } = await import('./logger.server');
const job = createMockJob({});
const genericError = new Error('A standard failure');
const initialStages = [
{ name: 'Stage 1', status: 'completed', critical: true, detail: 'Done' },
{ name: 'Stage 2', status: 'in-progress', critical: true, detail: 'Working...' },
{ name: 'Stage 3', status: 'pending', critical: true, detail: 'Waiting...' },
];
const privateMethod = (service as any)._reportErrorAndThrow;
await expect(privateMethod(genericError, job, logger, initialStages)).rejects.toThrow(genericError);
expect(job.updateProgress).toHaveBeenCalledWith({
errorCode: 'UNKNOWN_ERROR',
message: 'A standard failure',
stages: [
{ name: 'Stage 1', status: 'completed', critical: true, detail: 'Done' },
{ name: 'Stage 2', status: 'failed', critical: true, detail: 'A standard failure' },
{ name: 'Stage 3', status: 'skipped', critical: true },
],
});
});
it('should use toErrorPayload for FlyerProcessingError instances', async () => {
const { logger } = await import('./logger.server');
const job = createMockJob({});
const validationError = new AiDataValidationError(
'Validation failed',
{ foo: 'bar' },
{ raw: 'data' },
);
const initialStages = [
{ name: 'Extracting Data with AI', status: 'in-progress', critical: true, detail: '...' },
];
const privateMethod = (service as any)._reportErrorAndThrow;
await expect(privateMethod(validationError, job, logger, initialStages)).rejects.toThrow(validationError);
expect(job.updateProgress).toHaveBeenCalledWith({
errorCode: 'AI_VALIDATION_FAILED',
message: "The AI couldn't read the flyer's format. Please try a clearer image or a different flyer.",
validationErrors: { foo: 'bar' },
rawData: { raw: 'data' },
stages: [
{ name: 'Extracting Data with AI', status: 'failed', critical: true, detail: "The AI couldn't read the flyer's format. Please try a clearer image or a different flyer." },
],
});
});
it('should throw UnrecoverableError for quota messages', async () => {
const { logger } = await import('./logger.server');
const job = createMockJob({});
const quotaError = new Error('RESOURCE_EXHAUSTED');
const privateMethod = (service as any)._reportErrorAndThrow;
await expect(privateMethod(quotaError, job, logger, [])).rejects.toThrow(
UnrecoverableError,
);
expect(job.updateProgress).toHaveBeenCalledWith({
errorCode: 'QUOTA_EXCEEDED',
message: 'An AI quota has been exceeded. Please try again later.',
stages: [],
});
});
it('should wrap and throw non-Error objects', async () => {
const { logger } = await import('./logger.server');
const job = createMockJob({});
const nonError = 'just a string error';
const privateMethod = (service as any)._reportErrorAndThrow;
await expect(privateMethod(nonError, job, logger, [])).rejects.toThrow(
'just a string error',
);
});
it('should correctly identify the failed stage based on error code', async () => {
const { logger } = await import('./logger.server');
const job = createMockJob({});
const pdfError = new PdfConversionError('PDF failed');
const initialStages = [
{ name: 'Preparing Inputs', status: 'in-progress', critical: true, detail: '...' },
{ name: 'Extracting Data with AI', status: 'pending', critical: true, detail: '...' },
];
const privateMethod = (service as any)._reportErrorAndThrow;
await expect(privateMethod(pdfError, job, logger, initialStages)).rejects.toThrow(pdfError);
expect(job.updateProgress).toHaveBeenCalledWith(expect.objectContaining({
stages: [
{ name: 'Preparing Inputs', status: 'failed', critical: true, detail: expect.any(String) },
{ name: 'Extracting Data with AI', status: 'skipped', critical: true },
],
}));
});
});
describe('processCleanupJob', () => {
it('should delete all files successfully', async () => {
const job = createMockCleanupJob({ flyerId: 1, paths: ['/tmp/file1', '/tmp/file2'] });
mocks.unlink.mockResolvedValue(undefined);
const result = await service.processCleanupJob(job);
expect(mocks.unlink).toHaveBeenCalledTimes(2);
expect(mocks.unlink).toHaveBeenCalledWith('/tmp/file1');
expect(mocks.unlink).toHaveBeenCalledWith('/tmp/file2');
expect(result).toEqual({ status: 'success', deletedCount: 2 });
});
it('should handle ENOENT errors gracefully and still succeed', async () => {
const job = createMockCleanupJob({ flyerId: 1, paths: ['/tmp/file1', '/tmp/file2'] });
const enoentError: NodeJS.ErrnoException = new Error('File not found');
enoentError.code = 'ENOENT';
mocks.unlink.mockResolvedValueOnce(undefined).mockRejectedValueOnce(enoentError);
const result = await service.processCleanupJob(job);
expect(mocks.unlink).toHaveBeenCalledTimes(2);
expect(result).toEqual({ status: 'success', deletedCount: 2 });
// Check that the warning was logged
const { logger } = await import('./logger.server');
expect(logger.warn).toHaveBeenCalledWith(
'File not found during cleanup (already deleted?): /tmp/file2',
);
});
it('should throw an aggregate error if a non-ENOENT error occurs', async () => {
const job = createMockCleanupJob({
flyerId: 1,
paths: ['/tmp/file1', '/tmp/permission-denied'],
});
const permissionError: NodeJS.ErrnoException = new Error('Permission denied');
permissionError.code = 'EACCES';
mocks.unlink.mockResolvedValueOnce(undefined).mockRejectedValueOnce(permissionError);
await expect(service.processCleanupJob(job)).rejects.toThrow(
'Failed to delete 1 file(s): /tmp/permission-denied',
);
// Check that the error was logged
const { logger } = await import('./logger.server');
expect(logger.error).toHaveBeenCalledWith(
expect.objectContaining({ err: permissionError, path: '/tmp/permission-denied' }),
'Failed to delete temporary file.',
);
});
it('should skip processing and return "skipped" if paths array is empty', async () => {
const job = createMockCleanupJob({ flyerId: 1, paths: [] });
const result = await service.processCleanupJob(job);
expect(mocks.unlink).not.toHaveBeenCalled();
expect(result).toEqual({ status: 'skipped', reason: 'no paths' });
const { logger } = await import('./logger.server');
expect(logger.warn).toHaveBeenCalledWith('Job received no paths to clean. Skipping.');
});
});
});