Files
flyer-crawler.projectium.com/src/routes/ai.routes.ts
Torben Sorensen 11aeac5edd
Some checks failed
Deploy to Test Environment / deploy-to-test (push) Failing after 1m10s
whoa - so much - new features (UPC,etc) - Sentry for app logging! so much more !
2026-01-11 19:07:02 -08:00

1002 lines
30 KiB
TypeScript

// src/routes/ai.routes.ts
// All route handlers now use req.log (request-scoped logger) as per ADR-004
import { Router, Request, Response, NextFunction } from 'express';
// All route handlers now use req.log (request-scoped logger) as per ADR-004
import { z } from 'zod';
import passport, { optionalAuth } from '../config/passport';
// All route handlers now use req.log (request-scoped logger) as per ADR-004
import { aiService, DuplicateFlyerError } from '../services/aiService.server';
// All route handlers now use req.log (request-scoped logger) as per ADR-004
import { createUploadMiddleware, handleMulterError } from '../middleware/multer.middleware';
import { logger } from '../services/logger.server'; // Needed for module-level logging (e.g., Zod schema transforms)
// All route handlers now use req.log (request-scoped logger) as per ADR-004
import { UserProfile } from '../types'; // This was a duplicate, fixed.
// All route handlers now use req.log (request-scoped logger) as per ADR-004
import { validateRequest } from '../middleware/validation.middleware';
// All route handlers now use req.log (request-scoped logger) as per ADR-004
import { requiredString } from '../utils/zodUtils';
// All route handlers now use req.log (request-scoped logger) as per ADR-004
import { cleanupUploadedFile, cleanupUploadedFiles } from '../utils/fileUtils';
// All route handlers now use req.log (request-scoped logger) as per ADR-004
import { monitoringService } from '../services/monitoringService.server';
// All route handlers now use req.log (request-scoped logger) as per ADR-004
import { aiUploadLimiter, aiGenerationLimiter } from '../config/rateLimiters';
import { sendSuccess, sendError, ErrorCode } from '../utils/apiResponse';
const router = Router();
// --- Zod Schemas for AI Routes (as per ADR-003) ---
const uploadAndProcessSchema = z.object({
body: z.object({
// Stricter validation for SHA-256 checksum. It must be a 64-character hexadecimal string.
checksum: requiredString('File checksum is required.').pipe(
z
.string()
.length(64, 'Checksum must be 64 characters long.')
.regex(/^[a-f0-9]+$/, 'Checksum must be a valid hexadecimal string.'),
),
baseUrl: z.string().url().optional(),
}),
});
const jobIdParamSchema = z.object({
params: z.object({
jobId: requiredString('A valid Job ID is required.'),
}),
});
// Helper to safely extract an error message from unknown `catch` values.
const errMsg = (e: unknown) => {
if (e instanceof Error) return e.message;
if (typeof e === 'object' && e !== null && 'message' in e)
return String((e as { message: unknown }).message);
return String(e || 'An unknown error occurred.');
};
const cropAreaObjectSchema = z.object({
x: z.number(),
y: z.number(),
width: z.number().positive('Crop area width must be positive.'),
height: z.number().positive('Crop area height must be positive.'),
});
const rescanAreaSchema = z.object({
body: z.object({
cropArea: requiredString('cropArea must be a valid JSON string.')
.transform((val, ctx) => {
try {
return JSON.parse(val);
} catch (err) {
// Log the actual parsing error for better debugging if invalid JSON is sent.
// Using module-level logger since Zod transforms don't have access to request context
logger.warn(
{ error: errMsg(err), receivedValue: val },
'Failed to parse cropArea in rescanAreaSchema',
);
ctx.addIssue({
code: z.ZodIssueCode.custom,
message: 'cropArea must be a valid JSON string.',
});
return z.NEVER;
}
})
.pipe(cropAreaObjectSchema), // Further validate the structure of the parsed object
extractionType: z.enum(['store_name', 'dates', 'item_details'], {
message: "extractionType must be one of 'store_name', 'dates', or 'item_details'.",
}),
}),
});
const flyerItemForAnalysisSchema = z
.object({
// Sanitize item and name by trimming whitespace.
// The transform ensures that null/undefined values are preserved
// while trimming any actual string values.
item: z
.string()
.nullish()
.transform((val) => (val ? val.trim() : val)),
name: z
.string()
.nullish()
.transform((val) => (val ? val.trim() : val)),
})
// Using .passthrough() allows extra properties on the item object.
// If the intent is to strictly enforce only 'item' and 'name' (and other known properties),
// consider using .strict() instead for tighter security and data integrity.
.passthrough()
.refine(
(data) =>
// After the transform, the values are already trimmed.
(data.item && data.item.length > 0) || (data.name && data.name.length > 0),
{
message: "Item identifier is required (either 'item' or 'name').",
},
);
const insightsSchema = z.object({
body: z.object({
items: z.array(flyerItemForAnalysisSchema).nonempty("The 'items' array cannot be empty."),
}),
});
const comparePricesSchema = z.object({
body: z.object({
items: z.array(flyerItemForAnalysisSchema).nonempty("The 'items' array cannot be empty."),
}),
});
const planTripSchema = z.object({
body: z.object({
// Consider if this array should be non-empty. If a trip plan requires at least one item,
// you could add `.nonempty('At least one item is required to plan a trip.')`
items: z.array(flyerItemForAnalysisSchema),
store: z.object({ name: requiredString('Store name is required.') }),
userLocation: z.object({
latitude: z
.number()
.min(-90, 'Latitude must be between -90 and 90.')
.max(90, 'Latitude must be between -90 and 90.'),
longitude: z
.number()
.min(-180, 'Longitude must be between -180 and 180.')
.max(180, 'Longitude must be between -180 and 180.'),
}),
}),
});
const generateImageSchema = z.object({
body: z.object({ prompt: requiredString('A prompt is required.') }),
});
const generateSpeechSchema = z.object({
body: z.object({ text: requiredString('Text is required.') }),
});
const searchWebSchema = z.object({
body: z.object({ query: requiredString('A search query is required.') }),
});
const uploadToDisk = createUploadMiddleware({ storageType: 'flyer' });
// Diagnostic middleware: log incoming AI route requests (headers and sizes)
router.use((req: Request, res: Response, next: NextFunction) => {
try {
const contentType = req.headers['content-type'] || '';
const contentLength = req.headers['content-length'] || 'unknown';
const authPresent = !!req.headers['authorization'];
req.log.debug(
{ method: req.method, url: req.originalUrl, contentType, contentLength, authPresent },
'[API /ai] Incoming request',
);
} catch (e: unknown) {
req.log.error({ error: errMsg(e) }, 'Failed to log incoming AI request headers');
}
next();
});
/**
* @openapi
* /ai/upload-and-process:
* post:
* tags: [AI]
* summary: Upload and process flyer
* description: Accepts a single flyer file (PDF or image), enqueues it for background processing, and immediately returns a job ID.
* requestBody:
* required: true
* content:
* multipart/form-data:
* schema:
* type: object
* required:
* - flyerFile
* - checksum
* properties:
* flyerFile:
* type: string
* format: binary
* description: Flyer file (PDF or image)
* checksum:
* type: string
* pattern: ^[a-f0-9]{64}$
* description: SHA-256 checksum of the file
* baseUrl:
* type: string
* format: uri
* description: Optional base URL
* responses:
* 202:
* description: Flyer accepted for processing
* 400:
* description: Missing file or invalid checksum
* 409:
* description: Duplicate flyer detected
*/
router.post(
'/upload-and-process',
aiUploadLimiter,
optionalAuth,
uploadToDisk.single('flyerFile'),
// Validation is now handled inside the route to ensure file cleanup on failure.
// validateRequest(uploadAndProcessSchema),
async (req, res, next: NextFunction) => {
try {
// Manually validate the request body. This will throw if validation fails.
const { body } = uploadAndProcessSchema.parse({ body: req.body });
if (!req.file) {
return sendError(
res,
ErrorCode.BAD_REQUEST,
'A flyer file (PDF or image) is required.',
400,
);
}
req.log.debug(
{ filename: req.file.originalname, size: req.file.size, checksum: body.checksum },
'Handling /upload-and-process',
);
// Fix: Explicitly clear userProfile if no auth header is present in test env
// This prevents mockAuth from injecting a non-existent user ID for anonymous requests.
let userProfile = req.user as UserProfile | undefined;
if (process.env.NODE_ENV === 'test' && !req.headers['authorization']) {
userProfile = undefined;
}
const job = await aiService.enqueueFlyerProcessing(
req.file,
body.checksum,
userProfile,
req.ip ?? 'unknown',
req.log,
body.baseUrl,
);
// Respond immediately to the client with 202 Accepted
sendSuccess(
res,
{
message: 'Flyer accepted for processing.',
jobId: job.id,
},
202,
);
} catch (error) {
await cleanupUploadedFile(req.file);
if (error instanceof DuplicateFlyerError) {
req.log.warn(`Duplicate flyer upload attempt blocked for checksum: ${req.body?.checksum}`);
return sendError(res, ErrorCode.CONFLICT, error.message, 409, { flyerId: error.flyerId });
}
next(error);
}
},
);
/**
* @openapi
* /ai/upload-legacy:
* post:
* tags: [AI]
* summary: Legacy flyer upload (deprecated)
* description: Process a flyer upload synchronously. Deprecated - use /upload-and-process instead.
* deprecated: true
* security:
* - bearerAuth: []
* requestBody:
* required: true
* content:
* multipart/form-data:
* schema:
* type: object
* required:
* - flyerFile
* properties:
* flyerFile:
* type: string
* format: binary
* description: Flyer file (PDF or image)
* responses:
* 200:
* description: Flyer processed successfully
* 400:
* description: No flyer file uploaded
* 401:
* description: Unauthorized
* 409:
* description: Duplicate flyer detected
*/
router.post(
'/upload-legacy',
aiUploadLimiter,
passport.authenticate('jwt', { session: false }),
uploadToDisk.single('flyerFile'),
async (req: Request, res: Response, next: NextFunction) => {
try {
if (!req.file) {
return sendError(res, ErrorCode.BAD_REQUEST, 'No flyer file uploaded.', 400);
}
const userProfile = req.user as UserProfile;
const newFlyer = await aiService.processLegacyFlyerUpload(
req.file,
req.body,
userProfile,
req.log,
);
sendSuccess(res, newFlyer);
} catch (error) {
await cleanupUploadedFile(req.file);
if (error instanceof DuplicateFlyerError) {
req.log.warn(`Duplicate legacy flyer upload attempt blocked.`);
return sendError(res, ErrorCode.CONFLICT, error.message, 409, { flyerId: error.flyerId });
}
next(error);
}
},
);
/**
* @openapi
* /ai/jobs/{jobId}/status:
* get:
* tags: [AI]
* summary: Check job status
* description: Checks the status of a background flyer processing job.
* parameters:
* - in: path
* name: jobId
* required: true
* schema:
* type: string
* description: Job ID returned from upload-and-process
* responses:
* 200:
* description: Job status information
* 404:
* description: Job not found
*/
router.get(
'/jobs/:jobId/status',
validateRequest(jobIdParamSchema),
async (req, res, next: NextFunction) => {
type JobIdRequest = z.infer<typeof jobIdParamSchema>;
const {
params: { jobId },
} = req as unknown as JobIdRequest;
try {
const jobStatus = await monitoringService.getFlyerJobStatus(jobId); // This was a duplicate, fixed.
req.log.debug(`[API /ai/jobs] Status check for job ${jobId}: ${jobStatus.state}`);
sendSuccess(res, jobStatus);
} catch (error) {
next(error);
}
},
);
/**
* @openapi
* /ai/flyers/process:
* post:
* tags: [AI]
* summary: Process flyer data (deprecated)
* description: Saves processed flyer data to the database. Deprecated - use /upload-and-process instead.
* deprecated: true
* requestBody:
* required: true
* content:
* multipart/form-data:
* schema:
* type: object
* required:
* - flyerImage
* properties:
* flyerImage:
* type: string
* format: binary
* description: Flyer image file
* responses:
* 201:
* description: Flyer processed and saved successfully
* 400:
* description: Flyer image file is required
* 409:
* description: Duplicate flyer detected
*/
router.post(
'/flyers/process',
aiUploadLimiter,
optionalAuth,
uploadToDisk.single('flyerImage'),
async (req, res, next: NextFunction) => {
try {
if (!req.file) {
return sendError(res, ErrorCode.BAD_REQUEST, 'Flyer image file is required.', 400);
}
const userProfile = req.user as UserProfile | undefined;
const newFlyer = await aiService.processLegacyFlyerUpload(
req.file,
req.body,
userProfile,
req.log,
);
sendSuccess(
res,
{ message: 'Flyer processed and saved successfully.', flyer: newFlyer },
201,
);
} catch (error) {
await cleanupUploadedFile(req.file);
if (error instanceof DuplicateFlyerError) {
req.log.warn(`Duplicate flyer upload attempt blocked.`);
return sendError(res, ErrorCode.CONFLICT, error.message, 409, { flyerId: error.flyerId });
}
next(error);
}
},
);
/**
* @openapi
* /ai/check-flyer:
* post:
* tags: [AI]
* summary: Check if image is a flyer
* description: Analyzes an image to determine if it's a grocery store flyer.
* requestBody:
* required: true
* content:
* multipart/form-data:
* schema:
* type: object
* required:
* - image
* properties:
* image:
* type: string
* format: binary
* description: Image file to check
* responses:
* 200:
* description: Flyer check result
* 400:
* description: Image file is required
*/
router.post(
'/check-flyer',
aiUploadLimiter,
optionalAuth,
uploadToDisk.single('image'),
async (req, res, next: NextFunction) => {
try {
if (!req.file) {
return sendError(res, ErrorCode.BAD_REQUEST, 'Image file is required.', 400);
}
req.log.info(`Server-side flyer check for file: ${req.file.originalname}`);
sendSuccess(res, { is_flyer: true }); // Stubbed response
} catch (error) {
next(error);
} finally {
await cleanupUploadedFile(req.file);
}
},
);
/**
* @openapi
* /ai/extract-address:
* post:
* tags: [AI]
* summary: Extract address from image
* description: Extracts store address information from a flyer image.
* requestBody:
* required: true
* content:
* multipart/form-data:
* schema:
* type: object
* required:
* - image
* properties:
* image:
* type: string
* format: binary
* description: Image file to extract address from
* responses:
* 200:
* description: Extracted address information
* 400:
* description: Image file is required
*/
router.post(
'/extract-address',
aiUploadLimiter,
optionalAuth,
uploadToDisk.single('image'),
async (req, res, next: NextFunction) => {
try {
if (!req.file) {
return sendError(res, ErrorCode.BAD_REQUEST, 'Image file is required.', 400);
}
req.log.info(`Server-side address extraction for file: ${req.file.originalname}`);
sendSuccess(res, { address: 'not identified' }); // Updated stubbed response
} catch (error) {
next(error);
} finally {
await cleanupUploadedFile(req.file);
}
},
);
/**
* @openapi
* /ai/extract-logo:
* post:
* tags: [AI]
* summary: Extract store logo
* description: Extracts store logo from flyer images.
* requestBody:
* required: true
* content:
* multipart/form-data:
* schema:
* type: object
* required:
* - images
* properties:
* images:
* type: array
* items:
* type: string
* format: binary
* description: Image files to extract logo from
* responses:
* 200:
* description: Extracted logo as base64
* 400:
* description: Image files are required
*/
router.post(
'/extract-logo',
aiUploadLimiter,
optionalAuth,
uploadToDisk.array('images'),
async (req, res, next: NextFunction) => {
try {
if (!req.files || !Array.isArray(req.files) || req.files.length === 0) {
return sendError(res, ErrorCode.BAD_REQUEST, 'Image files are required.', 400);
}
req.log.info(`Server-side logo extraction for ${req.files.length} image(s).`);
sendSuccess(res, { store_logo_base_64: null }); // Stubbed response
} catch (error) {
next(error);
} finally {
await cleanupUploadedFiles(req.files as Express.Multer.File[]);
}
},
);
/**
* @openapi
* /ai/quick-insights:
* post:
* tags: [AI]
* summary: Get quick insights
* description: Get AI-generated quick insights about flyer items.
* security:
* - bearerAuth: []
* requestBody:
* required: true
* content:
* application/json:
* schema:
* type: object
* required:
* - items
* properties:
* items:
* type: array
* items:
* type: object
* minItems: 1
* description: List of flyer items to analyze
* responses:
* 200:
* description: AI-generated quick insights
* 401:
* description: Unauthorized
*/
router.post(
'/quick-insights',
aiGenerationLimiter,
passport.authenticate('jwt', { session: false }),
validateRequest(insightsSchema),
async (req, res, next: NextFunction) => {
try {
req.log.info(`Server-side quick insights requested.`);
sendSuccess(res, { text: 'This is a server-generated quick insight: buy the cheap stuff!' }); // Stubbed response
} catch (error) {
next(error);
}
},
);
/**
* @openapi
* /ai/deep-dive:
* post:
* tags: [AI]
* summary: Get deep dive analysis
* description: Get detailed AI-generated analysis about flyer items.
* security:
* - bearerAuth: []
* requestBody:
* required: true
* content:
* application/json:
* schema:
* type: object
* required:
* - items
* properties:
* items:
* type: array
* items:
* type: object
* minItems: 1
* description: List of flyer items to analyze
* responses:
* 200:
* description: Detailed AI analysis
* 401:
* description: Unauthorized
*/
router.post(
'/deep-dive',
aiGenerationLimiter,
passport.authenticate('jwt', { session: false }),
validateRequest(insightsSchema),
async (req, res, next: NextFunction) => {
try {
req.log.info(`Server-side deep dive requested.`);
sendSuccess(res, {
text: 'This is a server-generated deep dive analysis. It is very detailed.',
}); // Stubbed response
} catch (error) {
next(error);
}
},
);
/**
* @openapi
* /ai/search-web:
* post:
* tags: [AI]
* summary: Search web for information
* description: Search the web for product or deal information.
* security:
* - bearerAuth: []
* requestBody:
* required: true
* content:
* application/json:
* schema:
* type: object
* required:
* - query
* properties:
* query:
* type: string
* description: Search query
* responses:
* 200:
* description: Search results with sources
* 401:
* description: Unauthorized
*/
router.post(
'/search-web',
aiGenerationLimiter,
passport.authenticate('jwt', { session: false }),
validateRequest(searchWebSchema),
async (req, res, next: NextFunction) => {
try {
req.log.info(`Server-side web search requested.`);
sendSuccess(res, { text: 'The web says this is good.', sources: [] }); // Stubbed response
} catch (error) {
next(error);
}
},
);
/**
* @openapi
* /ai/compare-prices:
* post:
* tags: [AI]
* summary: Compare prices across stores
* description: Compare prices for items across different stores.
* security:
* - bearerAuth: []
* requestBody:
* required: true
* content:
* application/json:
* schema:
* type: object
* required:
* - items
* properties:
* items:
* type: array
* items:
* type: object
* minItems: 1
* description: List of items to compare
* responses:
* 200:
* description: Price comparison results
* 401:
* description: Unauthorized
*/
router.post(
'/compare-prices',
aiGenerationLimiter,
passport.authenticate('jwt', { session: false }),
validateRequest(comparePricesSchema),
async (req, res, next: NextFunction) => {
try {
const { items } = req.body;
req.log.info(`Server-side price comparison requested for ${items.length} items.`);
sendSuccess(res, {
text: 'This is a server-generated price comparison. Milk is cheaper at SuperMart.',
sources: [],
}); // Stubbed response
} catch (error) {
next(error);
}
},
);
/**
* @openapi
* /ai/plan-trip:
* post:
* tags: [AI]
* summary: Plan shopping trip
* description: Plan an optimized shopping trip to a store based on items and location.
* security:
* - bearerAuth: []
* requestBody:
* required: true
* content:
* application/json:
* schema:
* type: object
* required:
* - items
* - store
* - userLocation
* properties:
* items:
* type: array
* items:
* type: object
* description: List of items to buy
* store:
* type: object
* required:
* - name
* properties:
* name:
* type: string
* description: Store name
* userLocation:
* type: object
* required:
* - latitude
* - longitude
* properties:
* latitude:
* type: number
* minimum: -90
* maximum: 90
* longitude:
* type: number
* minimum: -180
* maximum: 180
* responses:
* 200:
* description: Trip plan with directions
* 401:
* description: Unauthorized
*/
router.post(
'/plan-trip',
aiGenerationLimiter,
passport.authenticate('jwt', { session: false }),
validateRequest(planTripSchema),
async (req, res, next: NextFunction) => {
try {
const { items, store, userLocation } = req.body;
req.log.debug({ itemCount: items.length, storeName: store.name }, 'Trip planning requested.');
const result = await aiService.planTripWithMaps(items, store, userLocation);
sendSuccess(res, result);
} catch (error) {
req.log.error({ error: errMsg(error) }, 'Error in /api/ai/plan-trip endpoint:');
next(error);
}
},
);
// --- STUBBED AI Routes for Future Features ---
/**
* @openapi
* /ai/generate-image:
* post:
* tags: [AI]
* summary: Generate image (not implemented)
* description: Generate an image from a prompt. Currently not implemented.
* security:
* - bearerAuth: []
* requestBody:
* required: true
* content:
* application/json:
* schema:
* type: object
* required:
* - prompt
* properties:
* prompt:
* type: string
* description: Image generation prompt
* responses:
* 501:
* description: Not implemented
* 401:
* description: Unauthorized
*/
router.post(
'/generate-image',
aiGenerationLimiter,
passport.authenticate('jwt', { session: false }),
validateRequest(generateImageSchema),
(req: Request, res: Response) => {
// This endpoint is a placeholder for a future feature.
// Returning 501 Not Implemented is the correct HTTP response for this case.
req.log.info('Request received for unimplemented endpoint: /api/ai/generate-image');
sendError(res, ErrorCode.NOT_IMPLEMENTED, 'Image generation is not yet implemented.', 501);
},
);
/**
* @openapi
* /ai/generate-speech:
* post:
* tags: [AI]
* summary: Generate speech (not implemented)
* description: Generate speech from text. Currently not implemented.
* security:
* - bearerAuth: []
* requestBody:
* required: true
* content:
* application/json:
* schema:
* type: object
* required:
* - text
* properties:
* text:
* type: string
* description: Text to convert to speech
* responses:
* 501:
* description: Not implemented
* 401:
* description: Unauthorized
*/
router.post(
'/generate-speech',
aiGenerationLimiter,
passport.authenticate('jwt', { session: false }),
validateRequest(generateSpeechSchema),
(req: Request, res: Response) => {
// This endpoint is a placeholder for a future feature.
// Returning 501 Not Implemented is the correct HTTP response for this case.
req.log.info('Request received for unimplemented endpoint: /api/ai/generate-speech');
sendError(res, ErrorCode.NOT_IMPLEMENTED, 'Speech generation is not yet implemented.', 501);
},
);
/**
* @openapi
* /ai/rescan-area:
* post:
* tags: [AI]
* summary: Rescan area of image
* description: Performs a targeted AI scan on a specific area of an image.
* security:
* - bearerAuth: []
* requestBody:
* required: true
* content:
* multipart/form-data:
* schema:
* type: object
* required:
* - image
* - cropArea
* - extractionType
* properties:
* image:
* type: string
* format: binary
* description: Image file to scan
* cropArea:
* type: string
* description: JSON string with x, y, width, height
* extractionType:
* type: string
* enum: [store_name, dates, item_details]
* description: Type of data to extract
* responses:
* 200:
* description: Extracted data from image area
* 400:
* description: Image file is required
* 401:
* description: Unauthorized
*/
router.post(
'/rescan-area',
aiUploadLimiter,
passport.authenticate('jwt', { session: false }),
uploadToDisk.single('image'),
validateRequest(rescanAreaSchema),
async (req, res, next: NextFunction) => {
try {
if (!req.file) {
return sendError(res, ErrorCode.BAD_REQUEST, 'Image file is required.', 400);
}
// validateRequest transforms the cropArea JSON string into an object in req.body.
// So we use it directly instead of JSON.parse().
const cropArea = req.body.cropArea;
const { extractionType } = req.body;
const { path, mimetype } = req.file;
req.log.debug(
{ extractionType, cropArea, filename: req.file.originalname },
'Rescan area requested',
);
const result = await aiService.extractTextFromImageArea(
path,
mimetype,
cropArea,
extractionType,
req.log,
);
sendSuccess(res, result);
} catch (error) {
next(error);
} finally {
await cleanupUploadedFile(req.file);
}
},
);
/* Catches errors from multer (e.g., file size, file filter) */
router.use(handleMulterError);
export default router;