Files
flyer-crawler.projectium.com/src/services/flyerAiProcessor.server.test.ts
Torben Sorensen 941626004e
All checks were successful
Deploy to Test Environment / deploy-to-test (push) Successful in 16m51s
test fixes to align with latest tests
2026-01-18 14:27:20 -08:00

667 lines
22 KiB
TypeScript

// src/services/flyerAiProcessor.server.test.ts
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
import { FlyerAiProcessor } from './flyerAiProcessor.server';
import { AiDataValidationError } from './processingErrors';
import { logger } from './logger.server'; // Keep this import for the logger instance
import type { AIService } from './aiService.server';
import type { PersonalizationRepository } from './db/personalization.db';
import type { FlyerJobData } from '../types/job-data';
vi.mock('./logger.server', () => ({
logger: {
info: vi.fn(),
error: vi.fn(),
warn: vi.fn(),
debug: vi.fn(),
child: vi.fn().mockReturnThis(),
},
}));
const createMockJobData = (data: Partial<FlyerJobData>): FlyerJobData => ({
filePath: '/tmp/flyer.jpg',
originalFileName: 'flyer.jpg',
checksum: 'checksum-123',
baseUrl: 'https://example.com',
...data,
});
describe('FlyerAiProcessor', () => {
let service: FlyerAiProcessor;
let mockAiService: AIService;
let mockPersonalizationRepo: PersonalizationRepository;
beforeEach(() => {
vi.clearAllMocks();
mockAiService = {
extractCoreDataFromFlyerImage: vi.fn(),
} as unknown as AIService;
mockPersonalizationRepo = {
getAllMasterItems: vi.fn().mockResolvedValue({ items: [], total: 0 }),
} as unknown as PersonalizationRepository;
service = new FlyerAiProcessor(mockAiService, mockPersonalizationRepo);
});
afterEach(() => {
// Ensure env stubs are cleaned up after each test
vi.unstubAllEnvs();
});
it('should call AI service and return validated data on success', async () => {
const jobData = createMockJobData({});
const mockAiResponse = {
store_name: 'AI Store',
valid_from: '2024-01-01',
valid_to: '2024-01-07',
store_address: '123 AI St',
// FIX: Add an item to pass the new "must have items" quality check.
items: [
{
item: 'Test Item',
price_display: '$1.99',
price_in_cents: 199,
// ADDED to satisfy ExtractedFlyerItem type
quantity: 'each',
category_name: 'Grocery',
},
],
};
vi.mocked(mockAiService.extractCoreDataFromFlyerImage).mockResolvedValue(mockAiResponse);
const imagePaths = [{ path: 'page1.jpg', mimetype: 'image/jpeg' }];
const result = await service.extractAndValidateData(imagePaths, jobData, logger);
expect(mockAiService.extractCoreDataFromFlyerImage).toHaveBeenCalledTimes(1);
expect(mockPersonalizationRepo.getAllMasterItems).toHaveBeenCalledTimes(1);
expect(result.data).toEqual(mockAiResponse);
expect(result.needsReview).toBe(false);
});
it('should throw an error if getAllMasterItems fails', async () => {
// Arrange
const jobData = createMockJobData({});
const dbError = new Error('Database connection failed');
vi.mocked(mockPersonalizationRepo.getAllMasterItems).mockRejectedValue(dbError);
const imagePaths = [{ path: 'page1.jpg', mimetype: 'image/jpeg' }];
// Act & Assert
await expect(service.extractAndValidateData(imagePaths, jobData, logger)).rejects.toThrow(
dbError,
);
// Verify that the process stops before calling the AI service
expect(mockAiService.extractCoreDataFromFlyerImage).not.toHaveBeenCalled();
});
describe('Validation and Quality Checks', () => {
it('should pass validation and not flag for review with good quality data', async () => {
const jobData = createMockJobData({});
const mockAiResponse = {
store_name: 'Good Store',
valid_from: '2024-01-01',
valid_to: '2024-01-07',
store_address: '123 Good St',
items: [
{
item: 'Priced Item 1',
price_in_cents: 199,
price_display: '$1.99',
quantity: '1',
category_name: 'A',
},
{
item: 'Priced Item 2',
price_in_cents: 299,
price_display: '$2.99',
quantity: '1',
category_name: 'B',
},
],
};
vi.mocked(mockAiService.extractCoreDataFromFlyerImage).mockResolvedValue(mockAiResponse);
const { logger } = await import('./logger.server');
const imagePaths = [{ path: 'page1.jpg', mimetype: 'image/jpeg' }];
const result = await service.extractAndValidateData(imagePaths, jobData, logger);
// With all data present and correct, it should not need a review.
expect(result.needsReview).toBe(false);
expect(logger.warn).not.toHaveBeenCalled();
});
it('should throw AiDataValidationError if AI response has incorrect data structure', async () => {
const jobData = createMockJobData({});
// Mock AI to return a structurally invalid response (e.g., items is not an array)
const invalidResponse = {
store_name: 'Invalid Store',
items: 'not-an-array',
valid_from: null,
valid_to: null,
store_address: null,
};
vi.mocked(mockAiService.extractCoreDataFromFlyerImage).mockResolvedValue(
invalidResponse as any,
);
const imagePaths = [{ path: 'page1.jpg', mimetype: 'image/jpeg' }];
await expect(service.extractAndValidateData(imagePaths, jobData, logger)).rejects.toThrow(
AiDataValidationError,
);
});
it('should flag for review if store_name is missing', async () => {
const jobData = createMockJobData({});
const mockAiResponse = {
store_name: null, // Missing store name
items: [
{
item: 'Test Item',
price_display: '$1.99',
price_in_cents: 199,
quantity: 'each',
category_name: 'Grocery',
},
],
valid_from: '2024-01-01',
valid_to: '2024-01-07',
store_address: null,
};
vi.mocked(mockAiService.extractCoreDataFromFlyerImage).mockResolvedValue(mockAiResponse);
const { logger } = await import('./logger.server');
const imagePaths = [{ path: 'page1.jpg', mimetype: 'image/jpeg' }];
const result = await service.extractAndValidateData(imagePaths, jobData, logger);
expect(result.needsReview).toBe(true);
expect(logger.warn).toHaveBeenCalledWith(
expect.objectContaining({ qualityIssues: ['Missing store name'] }),
expect.stringContaining('AI response has quality issues.'),
);
});
it('should flag for review if items array is empty', async () => {
const jobData = createMockJobData({});
const mockAiResponse = {
store_name: 'Test Store',
items: [], // Empty items array
valid_from: '2024-01-01',
valid_to: '2024-01-07',
store_address: null,
};
vi.mocked(mockAiService.extractCoreDataFromFlyerImage).mockResolvedValue(mockAiResponse);
const { logger } = await import('./logger.server');
const imagePaths = [{ path: 'page1.jpg', mimetype: 'image/jpeg' }];
const result = await service.extractAndValidateData(imagePaths, jobData, logger);
expect(result.needsReview).toBe(true);
expect(logger.warn).toHaveBeenCalledWith(
expect.objectContaining({ qualityIssues: ['No items were extracted'] }),
expect.stringContaining('AI response has quality issues.'),
);
});
it('should flag for review if item price quality is low', async () => {
const jobData = createMockJobData({});
const mockAiResponse = {
store_name: 'Test Store',
valid_from: '2024-01-01',
valid_to: '2024-01-07',
store_address: '123 Test St',
items: [
{
item: 'Priced Item',
price_in_cents: 199,
price_display: '$1.99',
quantity: '1',
category_name: 'A',
},
{
item: 'Unpriced Item 1',
price_in_cents: null,
price_display: 'See store',
quantity: '1',
category_name: 'B',
},
{
item: 'Unpriced Item 2',
price_in_cents: null,
price_display: 'FREE',
quantity: '1',
category_name: 'C',
},
], // 1/3 = 33% have price, which is < 50%
};
vi.mocked(mockAiService.extractCoreDataFromFlyerImage).mockResolvedValue(mockAiResponse);
const { logger } = await import('./logger.server');
const imagePaths = [{ path: 'page1.jpg', mimetype: 'image/jpeg' }];
const result = await service.extractAndValidateData(imagePaths, jobData, logger);
expect(result.needsReview).toBe(true);
expect(logger.warn).toHaveBeenCalledWith(
expect.objectContaining({
qualityIssues: ['Low price quality (33% of items have a price)'],
}),
expect.stringContaining('AI response has quality issues.'),
);
});
it('should use a custom price quality threshold from an environment variable', async () => {
// Arrange
vi.stubEnv('AI_PRICE_QUALITY_THRESHOLD', '0.8'); // Set a stricter threshold (80%)
const jobData = createMockJobData({});
const mockAiResponse = {
store_name: 'Test Store',
valid_from: '2024-01-01',
valid_to: '2024-01-07',
store_address: '123 Test St',
items: [
{
item: 'Priced Item 1',
price_in_cents: 199,
price_display: '$1.99',
quantity: '1',
category_name: 'A',
},
{
item: 'Priced Item 2',
price_in_cents: 299,
price_display: '$2.99',
quantity: '1',
category_name: 'B',
},
{
item: 'Priced Item 3',
price_in_cents: 399,
price_display: '$3.99',
quantity: '1',
category_name: 'C',
},
{
item: 'Unpriced Item 1',
price_in_cents: null,
price_display: 'See store',
quantity: '1',
category_name: 'D',
},
], // 3/4 = 75% have price. This is > 50% (default) but < 80% (custom).
};
vi.mocked(mockAiService.extractCoreDataFromFlyerImage).mockResolvedValue(mockAiResponse);
const { logger } = await import('./logger.server');
// Act
const imagePaths = [{ path: 'page1.jpg', mimetype: 'image/jpeg' }];
const result = await service.extractAndValidateData(imagePaths, jobData, logger);
// Assert
// Because 75% < 80%, it should be flagged for review.
expect(result.needsReview).toBe(true);
expect(logger.warn).toHaveBeenCalledWith(
expect.objectContaining({
qualityIssues: ['Low price quality (75% of items have a price)'],
}),
expect.stringContaining('AI response has quality issues.'),
);
});
it('should flag for review if validity dates are missing', async () => {
const jobData = createMockJobData({});
const mockAiResponse = {
store_name: 'Test Store',
valid_from: null, // Missing date
valid_to: null, // Missing date
store_address: '123 Test St',
items: [
{
item: 'Test Item',
price_in_cents: 199,
price_display: '$1.99',
quantity: '1',
category_name: 'A',
},
],
};
vi.mocked(mockAiService.extractCoreDataFromFlyerImage).mockResolvedValue(mockAiResponse);
const { logger } = await import('./logger.server');
const imagePaths = [{ path: 'page1.jpg', mimetype: 'image/jpeg' }];
const result = await service.extractAndValidateData(imagePaths, jobData, logger);
expect(result.needsReview).toBe(true);
expect(logger.warn).toHaveBeenCalledWith(
expect.objectContaining({ qualityIssues: ['Missing both valid_from and valid_to dates'] }),
expect.stringContaining('AI response has quality issues.'),
);
});
it('should combine multiple quality issues in the log', async () => {
const jobData = createMockJobData({});
const mockAiResponse = {
store_name: null, // Issue 1
items: [], // Issue 2
valid_from: null, // Issue 3
valid_to: null,
store_address: null,
};
vi.mocked(mockAiService.extractCoreDataFromFlyerImage).mockResolvedValue(mockAiResponse);
const { logger } = await import('./logger.server');
const imagePaths = [{ path: 'page1.jpg', mimetype: 'image/jpeg' }];
const result = await service.extractAndValidateData(imagePaths, jobData, logger);
expect(result.needsReview).toBe(true);
expect(logger.warn).toHaveBeenCalledWith(
{
rawData: mockAiResponse,
qualityIssues: [
'Missing store name',
'No items were extracted',
'Missing both valid_from and valid_to dates',
],
},
'AI response has quality issues. Flagging for review. Issues: Missing store name, No items were extracted, Missing both valid_from and valid_to dates',
);
});
});
it('should pass the userProfileAddress from jobData to the AI service', async () => {
// Arrange
const jobData = createMockJobData({ userProfileAddress: '456 Fallback Ave' });
const mockAiResponse = {
store_name: 'Test Store',
valid_from: '2024-01-01',
valid_to: '2024-01-07',
store_address: '123 Test St',
items: [
{
item: 'Test Item',
price_in_cents: 199,
price_display: '$1.99',
quantity: '1',
category_name: 'A',
},
],
};
vi.mocked(mockAiService.extractCoreDataFromFlyerImage).mockResolvedValue(mockAiResponse);
const imagePaths = [{ path: 'page1.jpg', mimetype: 'image/jpeg' }];
await service.extractAndValidateData(imagePaths, jobData, logger);
// Assert
expect(mockAiService.extractCoreDataFromFlyerImage).toHaveBeenCalledWith(
imagePaths,
[],
undefined,
'456 Fallback Ave',
logger,
);
});
describe('Batching Logic', () => {
it('should process images in batches and merge the results correctly', async () => {
// Arrange
const jobData = createMockJobData({});
// 5 images, with BATCH_SIZE = 4, should result in 2 batches.
const imagePaths = [
{ path: 'page1.jpg', mimetype: 'image/jpeg' },
{ path: 'page2.jpg', mimetype: 'image/jpeg' },
{ path: 'page3.jpg', mimetype: 'image/jpeg' },
{ path: 'page4.jpg', mimetype: 'image/jpeg' },
{ path: 'page5.jpg', mimetype: 'image/jpeg' },
];
const mockAiResponseBatch1 = {
store_name: 'Batch 1 Store',
valid_from: '2025-01-01',
valid_to: '2025-01-07',
store_address: '123 Batch St',
items: [
{
item: 'Item A',
price_display: '$1',
price_in_cents: 100,
quantity: '1',
category_name: 'Cat A',
master_item_id: 1,
},
{
item: 'Item B',
price_display: '$2',
price_in_cents: 200,
quantity: '1',
category_name: 'Cat B',
master_item_id: 2,
},
],
};
const mockAiResponseBatch2 = {
store_name: 'Batch 2 Store', // This should be ignored in the merge
valid_from: null,
valid_to: null,
store_address: null,
items: [
{
item: 'Item C',
price_display: '$3',
price_in_cents: 300,
quantity: '1',
category_name: 'Cat C',
master_item_id: 3,
},
],
};
// Mock the AI service to return different results for each batch call
vi.mocked(mockAiService.extractCoreDataFromFlyerImage)
.mockResolvedValueOnce(mockAiResponseBatch1)
.mockResolvedValueOnce(mockAiResponseBatch2);
// Act
const result = await service.extractAndValidateData(imagePaths, jobData, logger);
// Assert
// 1. AI service was called twice (for 2 batches)
expect(mockAiService.extractCoreDataFromFlyerImage).toHaveBeenCalledTimes(2);
// 2. Check the arguments for each call
expect(mockAiService.extractCoreDataFromFlyerImage).toHaveBeenNthCalledWith(
1,
imagePaths.slice(0, 4),
[],
undefined,
undefined,
logger,
);
expect(mockAiService.extractCoreDataFromFlyerImage).toHaveBeenNthCalledWith(
2,
imagePaths.slice(4, 5),
[],
undefined,
undefined,
logger,
);
// 3. Check the merged data
expect(result.data.store_name).toBe('Batch 1 Store'); // Metadata from the first batch
expect(result.data.valid_from).toBe('2025-01-01');
expect(result.data.valid_to).toBe('2025-01-07');
expect(result.data.store_address).toBe('123 Batch St');
// 4. Check that items from both batches are merged
expect(result.data.items).toHaveLength(3);
expect(result.data.items).toEqual(
expect.arrayContaining([
expect.objectContaining({ item: 'Item A' }),
expect.objectContaining({ item: 'Item B' }),
expect.objectContaining({ item: 'Item C' }),
]),
);
// 5. Check that the job is not flagged for review
expect(result.needsReview).toBe(false);
});
it('should handle an empty object response from a batch without crashing', async () => {
// Arrange
const jobData = createMockJobData({});
const imagePaths = [
{ path: 'page1.jpg', mimetype: 'image/jpeg' },
{ path: 'page2.jpg', mimetype: 'image/jpeg' },
{ path: 'page3.jpg', mimetype: 'image/jpeg' },
{ path: 'page4.jpg', mimetype: 'image/jpeg' },
{ path: 'page5.jpg', mimetype: 'image/jpeg' },
];
const mockAiResponseBatch1 = {
store_name: 'Good Store',
valid_from: '2025-01-01',
valid_to: '2025-01-07',
store_address: '123 Good St',
items: [
{
item: 'Item A',
price_display: '$1',
price_in_cents: 100,
quantity: '1',
category_name: 'Cat A',
master_item_id: 1,
},
],
};
// The AI returns an empty object for the second batch.
const mockAiResponseBatch2 = {};
vi.mocked(mockAiService.extractCoreDataFromFlyerImage)
.mockResolvedValueOnce(mockAiResponseBatch1)
.mockResolvedValueOnce(mockAiResponseBatch2 as any); // Use `as any` to bypass strict type check for the test mock
// Act
const result = await service.extractAndValidateData(imagePaths, jobData, logger);
// Assert
// 1. AI service was called twice.
expect(mockAiService.extractCoreDataFromFlyerImage).toHaveBeenCalledTimes(2);
// 2. The final data should only contain data from the first batch.
expect(result.data.store_name).toBe('Good Store');
expect(result.data.items).toHaveLength(1);
expect(result.data.items[0].item).toBe('Item A');
// 3. The process should complete without errors and not be flagged for review if the first batch was good.
expect(result.needsReview).toBe(false);
});
it('should fill in missing metadata from subsequent batches', async () => {
// Arrange
const jobData = createMockJobData({});
const imagePaths = [
{ path: 'page1.jpg', mimetype: 'image/jpeg' },
{ path: 'page2.jpg', mimetype: 'image/jpeg' },
{ path: 'page3.jpg', mimetype: 'image/jpeg' },
{ path: 'page4.jpg', mimetype: 'image/jpeg' },
{ path: 'page5.jpg', mimetype: 'image/jpeg' },
];
const mockAiResponseBatch1 = {
store_name: null,
valid_from: '2025-01-01',
valid_to: '2025-01-07',
store_address: null,
items: [
{
item: 'Item A',
price_display: '$1',
price_in_cents: 100,
quantity: '1',
category_name: 'Cat A',
master_item_id: 1,
},
],
};
const mockAiResponseBatch2 = {
store_name: 'Batch 2 Store',
valid_from: '2025-01-02',
valid_to: null,
store_address: '456 Subsequent St',
items: [
{
item: 'Item C',
price_display: '$3',
price_in_cents: 300,
quantity: '1',
category_name: 'Cat C',
master_item_id: 3,
},
],
};
vi.mocked(mockAiService.extractCoreDataFromFlyerImage)
.mockResolvedValueOnce(mockAiResponseBatch1)
.mockResolvedValueOnce(mockAiResponseBatch2);
// Act
const result = await service.extractAndValidateData(imagePaths, jobData, logger);
// Assert
expect(result.data.store_name).toBe('Batch 2 Store'); // Filled from batch 2
expect(result.data.valid_from).toBe('2025-01-01'); // Kept from batch 1
expect(result.data.valid_to).toBe('2025-01-07'); // Kept from batch 1
expect(result.data.store_address).toBe('456 Subsequent St'); // Filled from batch 2
expect(result.data.items).toHaveLength(2);
});
});
it('should handle a single batch correctly when image count is less than BATCH_SIZE', async () => {
// Arrange
const jobData = createMockJobData({});
// 2 images, which is less than the BATCH_SIZE of 4.
const imagePaths = [
{ path: 'page1.jpg', mimetype: 'image/jpeg' },
{ path: 'page2.jpg', mimetype: 'image/jpeg' },
];
const mockAiResponse = {
store_name: 'Single Batch Store',
valid_from: '2025-02-01',
valid_to: '2025-02-07',
store_address: '789 Single St',
items: [
{
item: 'Item X',
price_display: '$10',
price_in_cents: 1000,
quantity: '1',
category_name: 'Cat X',
master_item_id: 10,
},
],
};
// Mock the AI service to be called only once.
vi.mocked(mockAiService.extractCoreDataFromFlyerImage).mockResolvedValueOnce(mockAiResponse);
// Act
const result = await service.extractAndValidateData(imagePaths, jobData, logger);
// Assert
// 1. AI service was called only once.
expect(mockAiService.extractCoreDataFromFlyerImage).toHaveBeenCalledTimes(1);
// 2. Check the arguments for the single call.
expect(mockAiService.extractCoreDataFromFlyerImage).toHaveBeenCalledWith(
imagePaths,
[],
undefined,
undefined,
logger,
);
// 3. Check that the final data matches the single batch's data.
expect(result.data).toEqual(mockAiResponse);
});
});