Files
flyer-crawler.projectium.com/src/routes/ai.routes.ts
Torben Sorensen 68bfaa50e6
All checks were successful
Deploy to Test Environment / deploy-to-test (push) Successful in 26m5s
more baseurl work - hopefully that does it for now
2026-01-05 17:33:00 -08:00

534 lines
17 KiB
TypeScript

// src/routes/ai.routes.ts
import { Router, Request, Response, NextFunction } from 'express';
import { z } from 'zod';
import passport from './passport.routes';
import { optionalAuth } from './passport.routes';
import { aiService, DuplicateFlyerError } from '../services/aiService.server';
import {
createUploadMiddleware,
handleMulterError,
} from '../middleware/multer.middleware';
import { logger } from '../services/logger.server'; // This was a duplicate, fixed.
import { UserProfile } from '../types'; // This was a duplicate, fixed.
import { validateRequest } from '../middleware/validation.middleware';
import { requiredString } from '../utils/zodUtils';
import { cleanupUploadedFile, cleanupUploadedFiles } from '../utils/fileUtils';
import { monitoringService } from '../services/monitoringService.server';
import { aiUploadLimiter, aiGenerationLimiter } from '../config/rateLimiters';
const router = Router();
// --- Zod Schemas for AI Routes (as per ADR-003) ---
const uploadAndProcessSchema = z.object({
body: z.object({
// Stricter validation for SHA-256 checksum. It must be a 64-character hexadecimal string.
checksum: requiredString('File checksum is required.').pipe(
z.string()
.length(64, 'Checksum must be 64 characters long.')
.regex(/^[a-f0-9]+$/, 'Checksum must be a valid hexadecimal string.'),
),
baseUrl: z.string().url().optional(),
}),
});
const jobIdParamSchema = z.object({
params: z.object({
jobId: requiredString('A valid Job ID is required.'),
}),
});
// Helper to safely extract an error message from unknown `catch` values.
const errMsg = (e: unknown) => {
if (e instanceof Error) return e.message;
if (typeof e === 'object' && e !== null && 'message' in e)
return String((e as { message: unknown }).message);
return String(e || 'An unknown error occurred.');
};
const cropAreaObjectSchema = z.object({
x: z.number(),
y: z.number(),
width: z.number().positive('Crop area width must be positive.'),
height: z.number().positive('Crop area height must be positive.'),
});
const rescanAreaSchema = z.object({
body: z.object({
cropArea: requiredString('cropArea must be a valid JSON string.')
.transform((val, ctx) => {
try {
return JSON.parse(val);
} catch (err) {
// Log the actual parsing error for better debugging if invalid JSON is sent.
logger.warn(
{ error: errMsg(err), receivedValue: val },
'Failed to parse cropArea in rescanAreaSchema',
);
ctx.addIssue({
code: z.ZodIssueCode.custom,
message: 'cropArea must be a valid JSON string.',
});
return z.NEVER;
}
})
.pipe(cropAreaObjectSchema), // Further validate the structure of the parsed object
extractionType: z.enum(['store_name', 'dates', 'item_details'], {
message: "extractionType must be one of 'store_name', 'dates', or 'item_details'.",
}),
}),
});
const flyerItemForAnalysisSchema = z
.object({
// Sanitize item and name by trimming whitespace.
// The transform ensures that null/undefined values are preserved
// while trimming any actual string values.
item: z.string().nullish().transform(val => (val ? val.trim() : val)),
name: z.string().nullish().transform(val => (val ? val.trim() : val)),
})
// Using .passthrough() allows extra properties on the item object.
// If the intent is to strictly enforce only 'item' and 'name' (and other known properties),
// consider using .strict() instead for tighter security and data integrity.
.passthrough()
.refine(
(data) =>
// After the transform, the values are already trimmed.
(data.item && data.item.length > 0) || (data.name && data.name.length > 0),
{
message: "Item identifier is required (either 'item' or 'name').",
},
);
const insightsSchema = z.object({
body: z.object({
items: z.array(flyerItemForAnalysisSchema).nonempty("The 'items' array cannot be empty."),
}),
});
const comparePricesSchema = z.object({
body: z.object({
items: z.array(flyerItemForAnalysisSchema).nonempty("The 'items' array cannot be empty."),
}),
});
const planTripSchema = z.object({
body: z.object({
// Consider if this array should be non-empty. If a trip plan requires at least one item,
// you could add `.nonempty('At least one item is required to plan a trip.')`
items: z.array(flyerItemForAnalysisSchema),
store: z.object({ name: requiredString('Store name is required.') }),
userLocation: z.object({
latitude: z
.number()
.min(-90, 'Latitude must be between -90 and 90.')
.max(90, 'Latitude must be between -90 and 90.'),
longitude: z
.number()
.min(-180, 'Longitude must be between -180 and 180.')
.max(180, 'Longitude must be between -180 and 180.'),
}),
}),
});
const generateImageSchema = z.object({
body: z.object({ prompt: requiredString('A prompt is required.') }),
});
const generateSpeechSchema = z.object({
body: z.object({ text: requiredString('Text is required.') }),
});
const searchWebSchema = z.object({
body: z.object({ query: requiredString('A search query is required.') }),
});
const uploadToDisk = createUploadMiddleware({ storageType: 'flyer' });
// Diagnostic middleware: log incoming AI route requests (headers and sizes)
router.use((req: Request, res: Response, next: NextFunction) => {
try {
const contentType = req.headers['content-type'] || '';
const contentLength = req.headers['content-length'] || 'unknown';
const authPresent = !!req.headers['authorization'];
logger.debug(
{ method: req.method, url: req.originalUrl, contentType, contentLength, authPresent },
'[API /ai] Incoming request',
);
} catch (e: unknown) {
logger.error({ error: errMsg(e) }, 'Failed to log incoming AI request headers');
}
next();
});
/**
* NEW ENDPOINT: Accepts a single flyer file (PDF or image), enqueues it for
* background processing, and immediately returns a job ID.
*/
router.post(
'/upload-and-process',
aiUploadLimiter,
optionalAuth,
uploadToDisk.single('flyerFile'),
// Validation is now handled inside the route to ensure file cleanup on failure.
// validateRequest(uploadAndProcessSchema),
async (req, res, next: NextFunction) => {
try {
// Manually validate the request body. This will throw if validation fails.
const { body } = uploadAndProcessSchema.parse({ body: req.body });
if (!req.file) {
return res.status(400).json({ message: 'A flyer file (PDF or image) is required.' });
}
logger.debug(
{ filename: req.file.originalname, size: req.file.size, checksum: body.checksum },
'Handling /upload-and-process',
);
// Fix: Explicitly clear userProfile if no auth header is present in test env
// This prevents mockAuth from injecting a non-existent user ID for anonymous requests.
let userProfile = req.user as UserProfile | undefined;
if (process.env.NODE_ENV === 'test' && !req.headers['authorization']) {
userProfile = undefined;
}
const job = await aiService.enqueueFlyerProcessing(
req.file,
body.checksum,
userProfile,
req.ip ?? 'unknown',
req.log,
body.baseUrl,
);
// Respond immediately to the client with 202 Accepted
res.status(202).json({
message: 'Flyer accepted for processing.',
jobId: job.id,
});
} catch (error) {
await cleanupUploadedFile(req.file);
if (error instanceof DuplicateFlyerError) {
logger.warn(`Duplicate flyer upload attempt blocked for checksum: ${req.body?.checksum}`);
return res.status(409).json({ message: error.message, flyerId: error.flyerId });
}
next(error);
}
},
);
/**
* POST /api/ai/upload-legacy - Process a flyer upload from a legacy client.
* This is an authenticated route that processes the flyer synchronously.
* This is used for integration testing the legacy upload flow.
*/
router.post(
'/upload-legacy',
aiUploadLimiter,
passport.authenticate('jwt', { session: false }),
uploadToDisk.single('flyerFile'),
async (req: Request, res: Response, next: NextFunction) => {
try {
if (!req.file) {
return res.status(400).json({ message: 'No flyer file uploaded.' });
}
const userProfile = req.user as UserProfile;
const newFlyer = await aiService.processLegacyFlyerUpload(req.file, req.body, userProfile, req.log);
res.status(200).json(newFlyer);
} catch (error) {
await cleanupUploadedFile(req.file);
if (error instanceof DuplicateFlyerError) {
logger.warn(`Duplicate legacy flyer upload attempt blocked.`);
return res.status(409).json({ message: error.message, flyerId: error.flyerId });
}
next(error);
}
},
);
/**
* NEW ENDPOINT: Checks the status of a background job.
*/
router.get(
'/jobs/:jobId/status',
validateRequest(jobIdParamSchema),
async (req, res, next: NextFunction) => {
type JobIdRequest = z.infer<typeof jobIdParamSchema>;
const {
params: { jobId },
} = req as unknown as JobIdRequest;
try {
const jobStatus = await monitoringService.getFlyerJobStatus(jobId); // This was a duplicate, fixed.
logger.debug(`[API /ai/jobs] Status check for job ${jobId}: ${jobStatus.state}`);
res.json(jobStatus);
} catch (error) {
next(error);
}
},
);
/**
* This endpoint saves the processed flyer data to the database. It is the final step
* in the flyer upload workflow after the AI has extracted the data.
* It uses `optionalAuth` to handle submissions from both anonymous and authenticated users.
*/
router.post(
'/flyers/process',
aiUploadLimiter,
optionalAuth,
uploadToDisk.single('flyerImage'),
async (req, res, next: NextFunction) => {
try {
if (!req.file) {
return res.status(400).json({ message: 'Flyer image file is required.' });
}
const userProfile = req.user as UserProfile | undefined;
const newFlyer = await aiService.processLegacyFlyerUpload(
req.file,
req.body,
userProfile,
req.log,
);
res.status(201).json({ message: 'Flyer processed and saved successfully.', flyer: newFlyer });
} catch (error) {
await cleanupUploadedFile(req.file);
if (error instanceof DuplicateFlyerError) {
logger.warn(`Duplicate flyer upload attempt blocked.`);
return res.status(409).json({ message: error.message, flyerId: error.flyerId });
}
next(error);
}
},
);
/**
* This endpoint checks if an image is a flyer. It uses `optionalAuth` to allow
* both authenticated and anonymous users to perform this check.
*/
router.post(
'/check-flyer',
aiUploadLimiter,
optionalAuth,
uploadToDisk.single('image'),
async (req, res, next: NextFunction) => {
try {
if (!req.file) {
return res.status(400).json({ message: 'Image file is required.' });
}
logger.info(`Server-side flyer check for file: ${req.file.originalname}`);
res.status(200).json({ is_flyer: true }); // Stubbed response
} catch (error) {
next(error);
} finally {
await cleanupUploadedFile(req.file);
}
},
);
router.post(
'/extract-address',
aiUploadLimiter,
optionalAuth,
uploadToDisk.single('image'),
async (req, res, next: NextFunction) => {
try {
if (!req.file) {
return res.status(400).json({ message: 'Image file is required.' });
}
logger.info(`Server-side address extraction for file: ${req.file.originalname}`);
res.status(200).json({ address: 'not identified' }); // Updated stubbed response
} catch (error) {
next(error);
} finally {
await cleanupUploadedFile(req.file);
}
},
);
router.post(
'/extract-logo',
aiUploadLimiter,
optionalAuth,
uploadToDisk.array('images'),
async (req, res, next: NextFunction) => {
try {
if (!req.files || !Array.isArray(req.files) || req.files.length === 0) {
return res.status(400).json({ message: 'Image files are required.' });
}
logger.info(`Server-side logo extraction for ${req.files.length} image(s).`);
res.status(200).json({ store_logo_base_64: null }); // Stubbed response
} catch (error) {
next(error);
} finally {
await cleanupUploadedFiles(req.files as Express.Multer.File[]);
}
},
);
router.post(
'/quick-insights',
aiGenerationLimiter,
passport.authenticate('jwt', { session: false }),
validateRequest(insightsSchema),
async (req, res, next: NextFunction) => {
try {
logger.info(`Server-side quick insights requested.`);
res
.status(200)
.json({ text: 'This is a server-generated quick insight: buy the cheap stuff!' }); // Stubbed response
} catch (error) {
next(error);
}
},
);
router.post(
'/deep-dive',
aiGenerationLimiter,
passport.authenticate('jwt', { session: false }),
validateRequest(insightsSchema),
async (req, res, next: NextFunction) => {
try {
logger.info(`Server-side deep dive requested.`);
res
.status(200)
.json({ text: 'This is a server-generated deep dive analysis. It is very detailed.' }); // Stubbed response
} catch (error) {
next(error);
}
},
);
router.post(
'/search-web',
aiGenerationLimiter,
passport.authenticate('jwt', { session: false }),
validateRequest(searchWebSchema),
async (req, res, next: NextFunction) => {
try {
logger.info(`Server-side web search requested.`);
res.status(200).json({ text: 'The web says this is good.', sources: [] }); // Stubbed response
} catch (error) {
next(error);
}
},
);
router.post(
'/compare-prices',
aiGenerationLimiter,
passport.authenticate('jwt', { session: false }),
validateRequest(comparePricesSchema),
async (req, res, next: NextFunction) => {
try {
const { items } = req.body;
logger.info(`Server-side price comparison requested for ${items.length} items.`);
res.status(200).json({
text: 'This is a server-generated price comparison. Milk is cheaper at SuperMart.',
sources: [],
}); // Stubbed response
} catch (error) {
next(error);
}
},
);
router.post(
'/plan-trip',
aiGenerationLimiter,
passport.authenticate('jwt', { session: false }),
validateRequest(planTripSchema),
async (req, res, next: NextFunction) => {
try {
const { items, store, userLocation } = req.body;
logger.debug({ itemCount: items.length, storeName: store.name }, 'Trip planning requested.');
const result = await aiService.planTripWithMaps(items, store, userLocation);
res.status(200).json(result);
} catch (error) {
logger.error({ error: errMsg(error) }, 'Error in /api/ai/plan-trip endpoint:');
next(error);
}
},
);
// --- STUBBED AI Routes for Future Features ---
router.post(
'/generate-image',
aiGenerationLimiter,
passport.authenticate('jwt', { session: false }),
validateRequest(generateImageSchema),
(req: Request, res: Response) => {
// This endpoint is a placeholder for a future feature.
// Returning 501 Not Implemented is the correct HTTP response for this case.
logger.info('Request received for unimplemented endpoint: /api/ai/generate-image');
res.status(501).json({ message: 'Image generation is not yet implemented.' });
},
);
router.post(
'/generate-speech',
aiGenerationLimiter,
passport.authenticate('jwt', { session: false }),
validateRequest(generateSpeechSchema),
(req: Request, res: Response) => {
// This endpoint is a placeholder for a future feature.
// Returning 501 Not Implemented is the correct HTTP response for this case.
logger.info('Request received for unimplemented endpoint: /api/ai/generate-speech');
res.status(501).json({ message: 'Speech generation is not yet implemented.' });
},
);
/**
* POST /api/ai/rescan-area - Performs a targeted AI scan on a specific area of an image.
* Requires authentication.
*/
router.post(
'/rescan-area',
aiUploadLimiter,
passport.authenticate('jwt', { session: false }),
uploadToDisk.single('image'),
validateRequest(rescanAreaSchema),
async (req, res, next: NextFunction) => {
try {
if (!req.file) {
return res.status(400).json({ message: 'Image file is required.' });
}
// validateRequest transforms the cropArea JSON string into an object in req.body.
// So we use it directly instead of JSON.parse().
const cropArea = req.body.cropArea;
const { extractionType } = req.body;
const { path, mimetype } = req.file;
logger.debug(
{ extractionType, cropArea, filename: req.file.originalname },
'Rescan area requested',
);
const result = await aiService.extractTextFromImageArea(
path,
mimetype,
cropArea,
extractionType,
req.log,
);
res.status(200).json(result);
} catch (error) {
next(error);
} finally {
await cleanupUploadedFile(req.file);
}
},
);
/* Catches errors from multer (e.g., file size, file filter) */
router.use(handleMulterError);
export default router;