Files
flyer-crawler.projectium.com/src/services/flyerDataTransformer.ts
Torben Sorensen 6ab473f5f0
Some checks failed
Deploy to Test Environment / deploy-to-test (push) Failing after 58s
huge linting fixes
2026-01-09 18:50:04 -08:00

146 lines
5.9 KiB
TypeScript

// src/services/flyerDataTransformer.ts
import type { z } from 'zod';
import type { Logger } from 'pino';
import type { FlyerInsert, FlyerItemInsert } from '../types';
import type { AiProcessorResult } from './flyerAiProcessor.server'; // Keep this import for AiProcessorResult
import { AiFlyerDataSchema } from '../types/ai'; // Import consolidated schema
import { TransformationError } from './processingErrors';
import { parsePriceToCents } from '../utils/priceParser';
import { getBaseUrl } from '../utils/serverUtils';
/**
* This class is responsible for transforming the validated data from the AI service
* into the structured format required for database insertion (FlyerInsert and FlyerItemInsert).
*/
export class FlyerDataTransformer {
/**
* Normalizes a single raw item from the AI, providing default values for nullable fields.
* @param item The raw item object from the AI.
* @returns A normalized item object ready for database insertion.
*/
private _normalizeItem(
item: z.infer<typeof AiFlyerDataSchema>['items'][number],
): FlyerItemInsert {
// If the AI fails to provide `price_in_cents` but provides a parsable `price_display`,
// we can use our own parser as a fallback to improve data quality.
const priceFromDisplay = parsePriceToCents(item.price_display ?? '');
// Prioritize the AI's direct `price_in_cents` value, but use the parsed value if the former is null.
const finalPriceInCents = item.price_in_cents ?? priceFromDisplay;
return {
...item,
// Use nullish coalescing and trim for robustness.
// An empty or whitespace-only name falls back to 'Unknown Item'.
item: String(item.item ?? '').trim() || 'Unknown Item',
// Default null/undefined to an empty string and trim.
price_display: String(item.price_display ?? '').trim(),
quantity: String(item.quantity ?? '').trim(),
// An empty or whitespace-only category falls back to 'Other/Miscellaneous'.
category_name: String(item.category_name ?? '').trim() || 'Other/Miscellaneous',
// Overwrite price_in_cents with our calculated value.
price_in_cents: finalPriceInCents,
// Use nullish coalescing to convert null to undefined for the database.
master_item_id: item.master_item_id ?? undefined,
view_count: 0,
click_count: 0,
};
}
/**
* Constructs the full public URLs for the flyer image and its icon.
* @param imageFileName The filename of the main processed image.
* @param iconFileName The filename of the generated icon.
* @param baseUrl The base URL from the job payload.
* @param logger The logger instance.
* @returns An object containing the full image_url and icon_url.
*/
private _buildUrls(
imageFileName: string,
iconFileName: string,
baseUrl: string,
logger: Logger,
): { imageUrl: string; iconUrl: string } {
console.error('[DEBUG] FlyerDataTransformer._buildUrls inputs:', {
imageFileName,
iconFileName,
baseUrl,
});
logger.debug({ imageFileName, iconFileName, baseUrl }, 'Building URLs');
const finalBaseUrl = baseUrl || getBaseUrl(logger);
console.error(
'[DEBUG] FlyerDataTransformer._buildUrls finalBaseUrl resolved to:',
finalBaseUrl,
);
const imageUrl = `${finalBaseUrl}/flyer-images/${imageFileName}`;
const iconUrl = `${finalBaseUrl}/flyer-images/icons/${iconFileName}`;
console.error('[DEBUG] FlyerDataTransformer._buildUrls constructed:', { imageUrl, iconUrl });
logger.debug({ imageUrl, iconUrl }, 'Constructed URLs');
return { imageUrl, iconUrl };
}
/**
* Transforms AI-extracted data into database-ready flyer and item records.
* @param extractedData The validated data from the AI.
* @param imagePaths The paths to the flyer images.
* @param originalFileName The original name of the uploaded file.
* @param checksum The checksum of the file.
* @param userId The ID of the user who uploaded the file, if any.
* @param logger The request-scoped or job-scoped logger instance.
* @returns A promise that resolves to an object containing the prepared flyer and item data.
*/
async transform(
aiResult: AiProcessorResult,
originalFileName: string,
imageFileName: string,
iconFileName: string,
checksum: string,
userId: string | undefined,
logger: Logger,
baseUrl: string,
): Promise<{ flyerData: FlyerInsert; itemsForDb: FlyerItemInsert[] }> {
console.error('[DEBUG] FlyerDataTransformer.transform called with baseUrl:', baseUrl);
logger.info('Starting data transformation from AI output to database format.');
try {
const { data: extractedData, needsReview } = aiResult;
const { imageUrl, iconUrl } = this._buildUrls(imageFileName, iconFileName, baseUrl, logger);
const itemsForDb: FlyerItemInsert[] = extractedData.items.map((item) =>
this._normalizeItem(item),
);
const storeName = extractedData.store_name || 'Unknown Store (auto)';
if (!extractedData.store_name) {
logger.warn('AI did not return a store name. Using fallback "Unknown Store (auto)".');
}
const flyerData: FlyerInsert = {
file_name: originalFileName,
image_url: imageUrl,
icon_url: iconUrl,
checksum,
store_name: storeName,
valid_from: extractedData.valid_from,
valid_to: extractedData.valid_to,
store_address: extractedData.store_address,
item_count: itemsForDb.length,
uploaded_by: userId ? userId : null,
status: needsReview ? 'needs_review' : 'processed',
};
logger.info(
{ itemCount: itemsForDb.length, storeName: flyerData.store_name },
'Data transformation complete.',
);
return { flyerData, itemsForDb };
} catch (err) {
logger.error({ err }, 'Transformation process failed');
// Wrap and rethrow with the new error class
throw new TransformationError('Flyer Data Transformation Failed');
}
}
}