Files
flyer-crawler.projectium.com/src/routes/ai.routes.ts
Torben Sorensen 921c48fc57
All checks were successful
Deploy to Test Environment / deploy-to-test (push) Successful in 10m14s
more unit test fixes now the UseProfileAddress OOM has been identified
2025-12-23 15:50:01 -08:00

682 lines
24 KiB
TypeScript

// src/routes/ai.routes.ts
import { Router, Request, Response, NextFunction } from 'express';
import multer from 'multer';
import path from 'path';
import fs from 'node:fs';
import { z } from 'zod';
import passport from './passport.routes';
import { optionalAuth } from './passport.routes';
import * as db from '../services/db/index.db';
import { createFlyerAndItems } from '../services/db/flyer.db';
import * as aiService from '../services/aiService.server'; // Correctly import server-side AI service
import { generateFlyerIcon } from '../utils/imageProcessor';
import { sanitizeFilename } from '../utils/stringUtils';
import { logger } from '../services/logger.server';
import { UserProfile, ExtractedCoreData, ExtractedFlyerItem } from '../types';
import { flyerQueue } from '../services/queueService.server';
import { validateRequest } from '../middleware/validation.middleware';
const router = Router();
interface FlyerProcessPayload extends Partial<ExtractedCoreData> {
checksum?: string;
originalFileName?: string;
extractedData?: Partial<ExtractedCoreData>;
data?: FlyerProcessPayload; // For nested data structures
}
// --- Zod Schemas for AI Routes (as per ADR-003) ---
// Helper for consistent required string validation (handles missing/null/empty)
const requiredString = (message: string) =>
z.preprocess((val) => val ?? '', z.string().min(1, message));
const uploadAndProcessSchema = z.object({
body: z.object({
checksum: requiredString('File checksum is required.'),
// Potential improvement: If checksum is always a specific format (e.g., SHA-256),
// you could add `.length(64).regex(/^[a-f0-9]+$/)` for stricter validation.
}),
});
const jobIdParamSchema = z.object({
params: z.object({
jobId: requiredString('A valid Job ID is required.'),
}),
});
// Helper to safely extract an error message from unknown `catch` values.
const errMsg = (e: unknown) => {
if (e instanceof Error) return e.message;
if (typeof e === 'object' && e !== null && 'message' in e)
return String((e as { message: unknown }).message);
return String(e || 'An unknown error occurred.');
};
const cropAreaObjectSchema = z.object({
x: z.number(),
y: z.number(),
width: z.number().positive('Crop area width must be positive.'),
height: z.number().positive('Crop area height must be positive.'),
});
const rescanAreaSchema = z.object({
body: z.object({
cropArea: requiredString('cropArea must be a valid JSON string.')
.transform((val, ctx) => {
try {
return JSON.parse(val);
} catch (err) {
// Log the actual parsing error for better debugging if invalid JSON is sent.
logger.warn(
{ error: errMsg(err), receivedValue: val },
'Failed to parse cropArea in rescanAreaSchema',
);
ctx.addIssue({
code: z.ZodIssueCode.custom,
message: 'cropArea must be a valid JSON string.',
});
return z.NEVER;
}
})
.pipe(cropAreaObjectSchema), // Further validate the structure of the parsed object
extractionType: z.enum(['store_name', 'dates', 'item_details'], {
// This is the line with the error
message: "extractionType must be one of 'store_name', 'dates', or 'item_details'.",
}),
}),
});
const flyerItemForAnalysisSchema = z
.object({
name: requiredString('Item name is required.'),
// Allow other properties to pass through without validation
})
.passthrough();
const insightsSchema = z.object({
body: z.object({
items: z.array(flyerItemForAnalysisSchema).nonempty("The 'items' array cannot be empty."),
}),
});
const comparePricesSchema = z.object({
body: z.object({
items: z.array(flyerItemForAnalysisSchema).nonempty("The 'items' array cannot be empty."),
}),
});
const planTripSchema = z.object({
body: z.object({
items: z.array(flyerItemForAnalysisSchema),
store: z.object({ name: requiredString('Store name is required.') }),
userLocation: z.object({
latitude: z
.number()
.min(-90, 'Latitude must be between -90 and 90.')
.max(90, 'Latitude must be between -90 and 90.'),
longitude: z
.number()
.min(-180, 'Longitude must be between -180 and 180.')
.max(180, 'Longitude must be between -180 and 180.'),
}),
}),
});
const generateImageSchema = z.object({
body: z.object({ prompt: requiredString('A prompt is required.') }),
});
const generateSpeechSchema = z.object({
body: z.object({ text: requiredString('Text is required.') }),
});
const searchWebSchema = z.object({
body: z.object({ query: requiredString('A search query is required.') }),
});
// --- Multer Configuration for File Uploads ---
const storagePath =
process.env.STORAGE_PATH || '/var/www/flyer-crawler.projectium.com/flyer-images';
// Ensure the storage path exists at startup so multer can write files there.
try {
fs.mkdirSync(storagePath, { recursive: true });
logger.debug(`AI upload storage path ready: ${storagePath}`);
} catch (err) {
logger.error(
{ error: errMsg(err) },
`Failed to create storage path (${storagePath}). File uploads may fail.`,
);
}
const diskStorage = multer.diskStorage({
destination: function (req, file, cb) {
cb(null, storagePath);
},
filename: function (req, file, cb) {
// If in a test environment, use a predictable filename for easy cleanup.
if (process.env.NODE_ENV === 'test') {
return cb(null, `${file.fieldname}-test-flyer-image.jpg`);
} else {
const uniqueSuffix = Date.now() + '-' + Math.round(Math.random() * 1e9);
// Sanitize the original filename to remove spaces and special characters
return cb(
null,
file.fieldname + '-' + uniqueSuffix + '-' + sanitizeFilename(file.originalname),
);
}
},
});
const uploadToDisk = multer({ storage: diskStorage });
// Diagnostic middleware: log incoming AI route requests (headers and sizes)
router.use((req: Request, res: Response, next: NextFunction) => {
try {
const contentType = req.headers['content-type'] || '';
const contentLength = req.headers['content-length'] || 'unknown';
const authPresent = !!req.headers['authorization'];
logger.debug(
{ method: req.method, url: req.originalUrl, contentType, contentLength, authPresent },
'[API /ai] Incoming request',
);
} catch (e: unknown) {
logger.error({ error: e }, 'Failed to log incoming AI request headers');
}
next();
});
/**
* NEW ENDPOINT: Accepts a single flyer file (PDF or image), enqueues it for
* background processing, and immediately returns a job ID.
*/
router.post(
'/upload-and-process',
optionalAuth,
uploadToDisk.single('flyerFile'),
validateRequest(uploadAndProcessSchema),
async (req, res, next: NextFunction) => {
try {
if (!req.file) {
return res.status(400).json({ message: 'A flyer file (PDF or image) is required.' });
}
logger.debug(
{ filename: req.file.originalname, size: req.file.size, checksum: req.body.checksum },
'Handling /upload-and-process',
);
const { checksum } = req.body;
// Check for duplicate flyer using checksum before even creating a job
const existingFlyer = await db.flyerRepo.findFlyerByChecksum(checksum, req.log);
if (existingFlyer) {
logger.warn(`Duplicate flyer upload attempt blocked for checksum: ${checksum}`);
// Use 409 Conflict for duplicates
return res.status(409).json({
message: 'This flyer has already been processed.',
flyerId: existingFlyer.flyer_id,
});
}
const userProfile = req.user as UserProfile | undefined;
// Construct a user address string from their profile if they are logged in.
let userProfileAddress: string | undefined = undefined;
if (userProfile?.address) {
userProfileAddress = [
userProfile.address.address_line_1,
userProfile.address.address_line_2,
userProfile.address.city,
userProfile.address.province_state,
userProfile.address.postal_code,
userProfile.address.country,
]
.filter(Boolean)
.join(', ');
}
// Add job to the queue
const job = await flyerQueue.add('process-flyer', {
filePath: req.file.path,
originalFileName: req.file.originalname,
checksum: checksum,
userId: userProfile?.user.user_id,
submitterIp: req.ip, // Capture the submitter's IP address
userProfileAddress: userProfileAddress, // Pass the user's profile address
});
logger.info(
`Enqueued flyer for processing. File: ${req.file.originalname}, Job ID: ${job.id}`,
);
// Respond immediately to the client with 202 Accepted
res.status(202).json({
message: 'Flyer accepted for processing.',
jobId: job.id,
});
} catch (error) {
next(error);
}
},
);
/**
* NEW ENDPOINT: Checks the status of a background job.
*/
router.get(
'/jobs/:jobId/status',
validateRequest(jobIdParamSchema),
async (req, res, next: NextFunction) => {
type JobIdRequest = z.infer<typeof jobIdParamSchema>;
const {
params: { jobId },
} = req as unknown as JobIdRequest;
try {
const job = await flyerQueue.getJob(jobId);
if (!job) {
// Adhere to ADR-001 by throwing a specific error to be handled centrally.
return res.status(404).json({ message: 'Job not found.' });
}
const state = await job.getState();
const progress = job.progress;
const returnValue = job.returnvalue;
const failedReason = job.failedReason;
logger.debug(`[API /ai/jobs] Status check for job ${jobId}: ${state}`);
res.json({ id: job.id, state, progress, returnValue, failedReason });
} catch (error) {
next(error);
}
},
);
/**
* This endpoint saves the processed flyer data to the database. It is the final step
* in the flyer upload workflow after the AI has extracted the data.
* It uses `optionalAuth` to handle submissions from both anonymous and authenticated users.
*/
router.post(
'/flyers/process',
optionalAuth,
uploadToDisk.single('flyerImage'),
async (req, res, next: NextFunction) => {
try {
if (!req.file) {
return res.status(400).json({ message: 'Flyer image file is required.' });
}
// Diagnostic & tolerant parsing for flyers/process
logger.debug(
{ keys: Object.keys(req.body || {}) },
'[API /ai/flyers/process] Processing legacy upload',
);
logger.debug({ filePresent: !!req.file }, '[API /ai/flyers/process] file present:');
// Try several ways to obtain the payload so we are tolerant to client variations.
let parsed: FlyerProcessPayload = {};
let extractedData: Partial<ExtractedCoreData> = {};
try {
// If the client sent a top-level `data` field (stringified JSON), parse it.
if (req.body && (req.body.data || req.body.extractedData)) {
const raw = req.body.data ?? req.body.extractedData;
logger.debug(
{ type: typeof raw, length: raw?.length ?? 0 },
'[API /ai/flyers/process] raw extractedData',
);
try {
parsed = typeof raw === 'string' ? JSON.parse(raw) : raw;
} catch (err) {
logger.warn(
{ error: errMsg(err) },
'[API /ai/flyers/process] Failed to JSON.parse raw extractedData; falling back to direct assign',
);
parsed = (
typeof raw === 'string' ? JSON.parse(String(raw).slice(0, 2000)) : raw
) as FlyerProcessPayload;
}
// If parsed itself contains an `extractedData` field, use that, otherwise assume parsed is the extractedData
extractedData = parsed.extractedData ?? (parsed as Partial<ExtractedCoreData>);
} else {
// No explicit `data` field found. Attempt to interpret req.body as an object (Express may have parsed multipart fields differently).
try {
parsed = typeof req.body === 'string' ? JSON.parse(req.body) : req.body;
} catch (err) {
logger.warn(
{ error: errMsg(err) },
'[API /ai/flyers/process] Failed to JSON.parse req.body; using empty object',
);
parsed = (req.body as FlyerProcessPayload) || {};
}
// extractedData might be nested under `data` or `extractedData`, or the body itself may be the extracted data.
if (parsed.data) {
try {
const inner = typeof parsed.data === 'string' ? JSON.parse(parsed.data) : parsed.data;
extractedData = inner.extractedData ?? inner;
} catch (err) {
logger.warn(
{ error: errMsg(err) },
'[API /ai/flyers/process] Failed to parse parsed.data; falling back',
);
extractedData = parsed.data as unknown as Partial<ExtractedCoreData>;
}
} else if (parsed.extractedData) {
extractedData = parsed.extractedData;
} else {
// Assume the body itself is the extracted data if it looks like it (has items or store_name keys)
if ('items' in parsed || 'store_name' in parsed || 'valid_from' in parsed) {
extractedData = parsed as Partial<ExtractedCoreData>;
} else {
extractedData = {};
}
}
}
} catch (err) {
logger.error(
{ error: err },
'[API /ai/flyers/process] Unexpected error while parsing request body',
);
parsed = {};
extractedData = {};
}
// Pull common metadata fields (checksum, originalFileName) from whichever shape we parsed.
const checksum = parsed.checksum ?? parsed?.data?.checksum ?? '';
const originalFileName =
parsed.originalFileName ?? parsed?.data?.originalFileName ?? req.file.originalname;
const userProfile = req.user as UserProfile | undefined;
// Validate extractedData to avoid database errors (e.g., null store_name)
if (!extractedData || typeof extractedData !== 'object') {
logger.warn(
{ bodyData: parsed },
'Missing extractedData in /api/ai/flyers/process payload.',
);
// Don't fail hard here; proceed with empty items and fallback store name so the upload can be saved for manual review.
extractedData = {};
}
// Transform the extracted items into the format required for database insertion.
// This adds default values for fields like `view_count` and `click_count`
// and makes this legacy endpoint consistent with the newer FlyerDataTransformer service.
const rawItems = extractedData.items ?? [];
const itemsArray = Array.isArray(rawItems)
? rawItems
: typeof rawItems === 'string'
? JSON.parse(rawItems)
: [];
const itemsForDb = itemsArray.map((item: Partial<ExtractedFlyerItem>) => ({
...item,
master_item_id: item.master_item_id === null ? undefined : item.master_item_id,
view_count: 0,
click_count: 0,
updated_at: new Date().toISOString(),
}));
// Ensure we have a valid store name; the DB requires a non-null store name.
const storeName =
extractedData.store_name && String(extractedData.store_name).trim().length > 0
? String(extractedData.store_name)
: 'Unknown Store (auto)';
if (storeName.startsWith('Unknown')) {
logger.warn(
'extractedData.store_name missing; using fallback store name to avoid DB constraint error.',
);
}
// 1. Check for duplicate flyer using checksum
const existingFlyer = await db.flyerRepo.findFlyerByChecksum(checksum, req.log);
if (existingFlyer) {
logger.warn(`Duplicate flyer upload attempt blocked for checksum: ${checksum}`);
return res.status(409).json({ message: 'This flyer has already been processed.' });
}
// Generate a 64x64 icon from the uploaded flyer image.
const iconsDir = path.join(path.dirname(req.file.path), 'icons');
const iconFileName = await generateFlyerIcon(req.file.path, iconsDir, req.log);
const iconUrl = `/flyer-images/icons/${iconFileName}`;
// 2. Prepare flyer data for insertion
const flyerData = {
file_name: originalFileName,
image_url: `/flyer-images/${req.file.filename}`, // Store the full URL path
icon_url: iconUrl,
checksum: checksum,
// Use normalized store name (fallback applied above).
store_name: storeName,
valid_from: extractedData.valid_from ?? null,
valid_to: extractedData.valid_to ?? null,
store_address: extractedData.store_address ?? null,
item_count: 0, // Set default to 0; the trigger will update it.
uploaded_by: userProfile?.user.user_id, // Associate with user if logged in
};
// 3. Create flyer and its items in a transaction
const { flyer: newFlyer, items: newItems } = await createFlyerAndItems(
flyerData,
itemsForDb,
req.log,
);
logger.info(
`Successfully processed and saved new flyer: ${newFlyer.file_name} (ID: ${newFlyer.flyer_id}) with ${newItems.length} items.`,
);
// Log this significant event
await db.adminRepo.logActivity(
{
userId: userProfile?.user.user_id,
action: 'flyer_processed',
displayText: `Processed a new flyer for ${flyerData.store_name}.`,
details: { flyerId: newFlyer.flyer_id, storeName: flyerData.store_name },
},
req.log,
);
res.status(201).json({ message: 'Flyer processed and saved successfully.', flyer: newFlyer });
} catch (error) {
next(error);
}
},
);
/**
* This endpoint checks if an image is a flyer. It uses `optionalAuth` to allow
* both authenticated and anonymous users to perform this check.
*/
router.post(
'/check-flyer',
optionalAuth,
uploadToDisk.single('image'),
async (req, res, next: NextFunction) => {
try {
if (!req.file) {
return res.status(400).json({ message: 'Image file is required.' });
}
logger.info(`Server-side flyer check for file: ${req.file.originalname}`);
res.status(200).json({ is_flyer: true }); // Stubbed response
} catch (error) {
next(error);
}
},
);
router.post(
'/extract-address',
optionalAuth,
uploadToDisk.single('image'),
async (req, res, next: NextFunction) => {
try {
if (!req.file) {
return res.status(400).json({ message: 'Image file is required.' });
}
logger.info(`Server-side address extraction for file: ${req.file.originalname}`);
res.status(200).json({ address: 'not identified' }); // Updated stubbed response
} catch (error) {
next(error);
}
},
);
router.post(
'/extract-logo',
optionalAuth,
uploadToDisk.array('images'),
async (req, res, next: NextFunction) => {
try {
if (!req.files || !Array.isArray(req.files) || req.files.length === 0) {
return res.status(400).json({ message: 'Image files are required.' });
}
logger.info(`Server-side logo extraction for ${req.files.length} image(s).`);
res.status(200).json({ store_logo_base_64: null }); // Stubbed response
} catch (error) {
next(error);
}
},
);
router.post(
'/quick-insights',
passport.authenticate('jwt', { session: false }),
validateRequest(insightsSchema),
async (req, res, next: NextFunction) => {
try {
logger.info(`Server-side quick insights requested.`);
res
.status(200)
.json({ text: 'This is a server-generated quick insight: buy the cheap stuff!' }); // Stubbed response
} catch (error) {
next(error);
}
},
);
router.post(
'/deep-dive',
passport.authenticate('jwt', { session: false }),
validateRequest(insightsSchema),
async (req, res, next: NextFunction) => {
try {
logger.info(`Server-side deep dive requested.`);
res
.status(200)
.json({ text: 'This is a server-generated deep dive analysis. It is very detailed.' }); // Stubbed response
} catch (error) {
next(error);
}
},
);
router.post(
'/search-web',
passport.authenticate('jwt', { session: false }),
validateRequest(searchWebSchema),
async (req, res, next: NextFunction) => {
try {
logger.info(`Server-side web search requested.`);
res.status(200).json({ text: 'The web says this is good.', sources: [] }); // Stubbed response
} catch (error) {
next(error);
}
},
);
router.post(
'/compare-prices',
passport.authenticate('jwt', { session: false }),
validateRequest(comparePricesSchema),
async (req, res, next: NextFunction) => {
try {
const { items } = req.body;
logger.info(`Server-side price comparison requested for ${items.length} items.`);
res.status(200).json({
text: 'This is a server-generated price comparison. Milk is cheaper at SuperMart.',
sources: [],
}); // Stubbed response
} catch (error) {
next(error);
}
},
);
router.post(
'/plan-trip',
passport.authenticate('jwt', { session: false }),
validateRequest(planTripSchema),
async (req, res, next: NextFunction) => {
try {
const { items, store, userLocation } = req.body;
logger.debug({ itemCount: items.length, storeName: store.name }, 'Trip planning requested.');
const result = await aiService.aiService.planTripWithMaps(items, store, userLocation);
res.status(200).json(result);
} catch (error) {
logger.error({ error: errMsg(error) }, 'Error in /api/ai/plan-trip endpoint:');
next(error);
}
},
);
// --- STUBBED AI Routes for Future Features ---
router.post(
'/generate-image',
passport.authenticate('jwt', { session: false }),
validateRequest(generateImageSchema),
(req: Request, res: Response) => {
// This endpoint is a placeholder for a future feature.
// Returning 501 Not Implemented is the correct HTTP response for this case.
logger.info('Request received for unimplemented endpoint: /api/ai/generate-image');
res.status(501).json({ message: 'Image generation is not yet implemented.' });
},
);
router.post(
'/generate-speech',
passport.authenticate('jwt', { session: false }),
validateRequest(generateSpeechSchema),
(req: Request, res: Response) => {
// This endpoint is a placeholder for a future feature.
// Returning 501 Not Implemented is the correct HTTP response for this case.
logger.info('Request received for unimplemented endpoint: /api/ai/generate-speech');
res.status(501).json({ message: 'Speech generation is not yet implemented.' });
},
);
/**
* POST /api/ai/rescan-area - Performs a targeted AI scan on a specific area of an image.
* Requires authentication.
*/
router.post(
'/rescan-area',
passport.authenticate('jwt', { session: false }),
uploadToDisk.single('image'),
validateRequest(rescanAreaSchema),
async (req, res, next: NextFunction) => {
try {
if (!req.file) {
return res.status(400).json({ message: 'Image file is required.' });
}
// validateRequest transforms the cropArea JSON string into an object in req.body.
// So we use it directly instead of JSON.parse().
const cropArea = req.body.cropArea;
const { extractionType } = req.body;
const { path, mimetype } = req.file;
logger.debug(
{ extractionType, cropArea, filename: req.file.originalname },
'Rescan area requested',
);
const result = await aiService.aiService.extractTextFromImageArea(
path,
mimetype,
cropArea,
extractionType,
req.log,
);
res.status(200).json(result);
} catch (error) {
next(error);
}
},
);
export default router;