Compare commits

...

9 Commits

Author SHA1 Message Date
Gitea Actions
d4557e13fb ci: Bump version to 0.7.2 [skip ci] 2025-12-31 13:32:58 +05:00
3e41130c69 again
All checks were successful
Deploy to Test Environment / deploy-to-test (push) Successful in 18m59s
2025-12-31 00:31:18 -08:00
Gitea Actions
d9034563d6 ci: Bump version to 0.7.1 [skip ci] 2025-12-31 13:21:54 +05:00
5836a75157 flyer upload (anon) issues
Some checks failed
Deploy to Test Environment / deploy-to-test (push) Failing after 42s
2025-12-31 00:21:19 -08:00
Gitea Actions
790008ae0d ci: Bump version to 0.7.0 for production release [skip ci] 2025-12-31 12:43:41 +05:00
Gitea Actions
b5b91eb968 ci: Bump version to 0.6.6 [skip ci] 2025-12-31 12:29:43 +05:00
38eb810e7a logging the frontend loop
All checks were successful
Deploy to Test Environment / deploy-to-test (push) Successful in 11m55s
2025-12-30 23:28:38 -08:00
Gitea Actions
458588a6e7 ci: Bump version to 0.6.5 [skip ci] 2025-12-31 11:34:23 +05:00
0b4113417f flyer upload (anon) issues
All checks were successful
Deploy to Test Environment / deploy-to-test (push) Successful in 11m56s
2025-12-30 22:33:55 -08:00
16 changed files with 207 additions and 31 deletions

4
package-lock.json generated
View File

@@ -1,12 +1,12 @@
{
"name": "flyer-crawler",
"version": "0.6.4",
"version": "0.7.2",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "flyer-crawler",
"version": "0.6.4",
"version": "0.7.2",
"dependencies": {
"@bull-board/api": "^6.14.2",
"@bull-board/express": "^6.14.2",

View File

@@ -1,7 +1,7 @@
{
"name": "flyer-crawler",
"private": true,
"version": "0.6.4",
"version": "0.7.2",
"type": "module",
"scripts": {
"dev": "concurrently \"npm:start:dev\" \"vite\"",

View File

@@ -15,7 +15,7 @@ export const AuthProvider: React.FC<{ children: ReactNode }> = ({ children }) =>
// FIX: Stabilize the apiFunction passed to useApi.
// By wrapping this in useCallback, we ensure the same function instance is passed to
// useApi on every render. This prevents the `execute` function returned by `useApi`
// from being recreated, which in turn breaks the infinite re-render loop in the useEffect below.
// from being recreated, which in turn breaks the infinite re-render loop in the useEffect.
const getProfileCallback = useCallback(() => apiClient.getAuthenticatedUserProfile(), []);
const { execute: checkTokenApi } = useApi<UserProfile, []>(getProfileCallback);

View File

@@ -4,17 +4,21 @@ import { FlyersContext, FlyersContextType } from '../contexts/FlyersContext';
import type { Flyer } from '../types';
import * as apiClient from '../services/apiClient';
import { useInfiniteQuery } from '../hooks/useInfiniteQuery';
import { useCallback } from 'react';
export const FlyersProvider: React.FC<{ children: ReactNode }> = ({ children }) => {
// Memoize the fetch function to ensure stability for the useInfiniteQuery hook.
const fetchFlyersFn = useCallback(apiClient.fetchFlyers, []);
const {
data: flyers,
isLoading: isLoadingFlyers,
isLoading: isLoadingFlyers,
error: flyersError,
fetchNextPage: fetchNextFlyersPage,
hasNextPage: hasNextFlyersPage,
refetch: refetchFlyers,
isRefetching: isRefetchingFlyers,
} = useInfiniteQuery<Flyer>(apiClient.fetchFlyers);
} = useInfiniteQuery<Flyer>(fetchFlyersFn);
const value: FlyersContextType = {
flyers: flyers || [],
@@ -26,5 +30,5 @@ export const FlyersProvider: React.FC<{ children: ReactNode }> = ({ children })
refetchFlyers,
};
return <FlyersContext.Provider value={value}>{children}</FlyersContext.Provider>;
return <FlyersContext.Provider value={value}>{children}</FlyersContext.Provider>;
};

View File

@@ -1,14 +1,22 @@
// src/providers/MasterItemsProvider.tsx
import React, { ReactNode, useMemo } from 'react';
import React, { ReactNode, useMemo, useEffect, useCallback } from 'react';
import { MasterItemsContext } from '../contexts/MasterItemsContext';
import type { MasterGroceryItem } from '../types';
import * as apiClient from '../services/apiClient';
import { useApiOnMount } from '../hooks/useApiOnMount';
import { logger } from '../services/logger.client';
export const MasterItemsProvider: React.FC<{ children: ReactNode }> = ({ children }) => {
const { data, loading, error } = useApiOnMount<MasterGroceryItem[], []>(() =>
apiClient.fetchMasterItems(),
);
// LOGGING: Check if the provider is unmounting/remounting repeatedly
useEffect(() => {
logger.debug('MasterItemsProvider: MOUNTED');
return () => logger.debug('MasterItemsProvider: UNMOUNTED');
}, []);
// Memoize the fetch function to ensure stability for the useApiOnMount hook.
const fetchFn = useCallback(() => apiClient.fetchMasterItems(), []);
const { data, loading, error } = useApiOnMount<MasterGroceryItem[], []>(fetchFn);
const value = useMemo(
() => ({

View File

@@ -1,5 +1,6 @@
// src/providers/UserDataProvider.tsx
import React, { useState, useEffect, useMemo, ReactNode } from 'react';
import { logger } from '../services/logger.client';
import React, { useState, useEffect, useMemo, ReactNode, useCallback } from 'react';
import { UserDataContext } from '../contexts/UserDataContext';
import type { MasterGroceryItem, ShoppingList } from '../types';
import * as apiClient from '../services/apiClient';
@@ -9,18 +10,25 @@ import { useAuth } from '../hooks/useAuth';
export const UserDataProvider: React.FC<{ children: ReactNode }> = ({ children }) => {
const { userProfile } = useAuth();
// Wrap the API calls in useCallback to prevent unnecessary re-renders.
const fetchWatchedItemsFn = useCallback(
() => apiClient.fetchWatchedItems(),
[],
);
const fetchShoppingListsFn = useCallback(() => apiClient.fetchShoppingLists(), []);
const {
data: watchedItemsData,
loading: isLoadingWatched,
error: watchedItemsError,
} = useApiOnMount<MasterGroceryItem[], []>(() => apiClient.fetchWatchedItems(), [userProfile], {
} = useApiOnMount<MasterGroceryItem[], []>(fetchWatchedItemsFn, [userProfile], {
enabled: !!userProfile,
});
const {
data: shoppingListsData,
loading: isLoadingShoppingLists,
loading: isLoadingShoppingLists,
error: shoppingListsError,
} = useApiOnMount<ShoppingList[], []>(() => apiClient.fetchShoppingLists(), [userProfile], {
} = useApiOnMount<ShoppingList[], []>(fetchShoppingListsFn, [userProfile], {
enabled: !!userProfile,
});
@@ -32,7 +40,7 @@ export const UserDataProvider: React.FC<{ children: ReactNode }> = ({ children }
useEffect(() => {
// When the user logs out (user becomes null), immediately clear all user-specific data.
// This also serves to clear out old data when a new user logs in, before their new data arrives.
if (!userProfile) {
if (!userProfile) {
setWatchedItems([]);
setShoppingLists([]);
return;
@@ -60,7 +68,7 @@ export const UserDataProvider: React.FC<{ children: ReactNode }> = ({ children }
watchedItemsError,
shoppingListsError,
],
);
);
return <UserDataContext.Provider value={value}>{children}</UserDataContext.Provider>;
};

View File

@@ -19,6 +19,12 @@ router.get(
validateRequest(emptySchema),
async (req: Request, res: Response, next: NextFunction) => {
try {
// LOGGING: Track how often this heavy DB call is actually made vs served from cache
req.log.info('Fetching master items list from database...');
// Optimization: This list changes rarely. Instruct clients to cache it for 1 hour (3600s).
res.set('Cache-Control', 'public, max-age=3600');
const masterItems = await db.personalizationRepo.getAllMasterItems(req.log);
res.json(masterItems);
} catch (error) {

View File

@@ -43,7 +43,7 @@ const ExtractedFlyerItemSchema = z.object({
});
export const AiFlyerDataSchema = z.object({
store_name: requiredString('Store name cannot be empty'),
store_name: z.string().nullable(),
valid_from: z.string().nullable(),
valid_to: z.string().nullable(),
store_address: z.string().nullable(),
@@ -507,7 +507,7 @@ export class AIService {
userProfileAddress?: string,
logger: Logger = this.logger,
): Promise<{
store_name: string;
store_name: string | null;
valid_from: string | null;
valid_to: string | null;
store_address: string | null;

View File

@@ -283,7 +283,10 @@ export const fetchFlyerById = (flyerId: number): Promise<Response> =>
* Fetches all master grocery items from the backend.
* @returns A promise that resolves to an array of MasterGroceryItem objects.
*/
export const fetchMasterItems = (): Promise<Response> => publicGet('/personalization/master-items');
export const fetchMasterItems = (): Promise<Response> => {
logger.debug('apiClient: fetchMasterItems called');
return publicGet('/personalization/master-items');
};
/**
* Fetches all categories from the backend.

View File

@@ -127,4 +127,98 @@ describe('FlyerAiProcessor', () => {
expect(result.needsReview).toBe(true);
expect(logger.warn).toHaveBeenCalledWith(expect.any(Object), expect.stringContaining('contains no items. The flyer will be saved with an item_count of 0. Flagging for review.'));
});
describe('Batching Logic', () => {
it('should process images in batches and merge the results correctly', async () => {
// Arrange
const jobData = createMockJobData({});
// 5 images, with BATCH_SIZE = 4, should result in 2 batches.
const imagePaths = [
{ path: 'page1.jpg', mimetype: 'image/jpeg' },
{ path: 'page2.jpg', mimetype: 'image/jpeg' },
{ path: 'page3.jpg', mimetype: 'image/jpeg' },
{ path: 'page4.jpg', mimetype: 'image/jpeg' },
{ path: 'page5.jpg', mimetype: 'image/jpeg' },
];
const mockAiResponseBatch1 = {
store_name: 'Batch 1 Store',
valid_from: '2025-01-01',
valid_to: '2025-01-07',
store_address: '123 Batch St',
items: [
{ item: 'Item A', price_display: '$1', price_in_cents: 100, quantity: '1', category_name: 'Cat A', master_item_id: 1 },
{ item: 'Item B', price_display: '$2', price_in_cents: 200, quantity: '1', category_name: 'Cat B', master_item_id: 2 },
],
};
const mockAiResponseBatch2 = {
store_name: 'Batch 2 Store', // This should be ignored in the merge
valid_from: null,
valid_to: null,
store_address: null,
items: [
{ item: 'Item C', price_display: '$3', price_in_cents: 300, quantity: '1', category_name: 'Cat C', master_item_id: 3 },
],
};
// Mock the AI service to return different results for each batch call
vi.mocked(mockAiService.extractCoreDataFromFlyerImage)
.mockResolvedValueOnce(mockAiResponseBatch1)
.mockResolvedValueOnce(mockAiResponseBatch2);
// Act
const result = await service.extractAndValidateData(imagePaths, jobData, logger);
// Assert
// 1. AI service was called twice (for 2 batches)
expect(mockAiService.extractCoreDataFromFlyerImage).toHaveBeenCalledTimes(2);
// 2. Check the arguments for each call
expect(mockAiService.extractCoreDataFromFlyerImage).toHaveBeenNthCalledWith(1, imagePaths.slice(0, 4), [], undefined, undefined, logger);
expect(mockAiService.extractCoreDataFromFlyerImage).toHaveBeenNthCalledWith(2, imagePaths.slice(4, 5), [], undefined, undefined, logger);
// 3. Check the merged data
expect(result.data.store_name).toBe('Batch 1 Store'); // Metadata from the first batch
expect(result.data.valid_from).toBe('2025-01-01');
expect(result.data.valid_to).toBe('2025-01-07');
expect(result.data.store_address).toBe('123 Batch St');
// 4. Check that items from both batches are merged
expect(result.data.items).toHaveLength(3);
expect(result.data.items).toEqual(expect.arrayContaining([
expect.objectContaining({ item: 'Item A' }),
expect.objectContaining({ item: 'Item B' }),
expect.objectContaining({ item: 'Item C' }),
]));
// 5. Check that the job is not flagged for review
expect(result.needsReview).toBe(false);
});
it('should fill in missing metadata from subsequent batches', async () => {
// Arrange
const jobData = createMockJobData({});
const imagePaths = [
{ path: 'page1.jpg', mimetype: 'image/jpeg' }, { path: 'page2.jpg', mimetype: 'image/jpeg' }, { path: 'page3.jpg', mimetype: 'image/jpeg' }, { path: 'page4.jpg', mimetype: 'image/jpeg' }, { path: 'page5.jpg', mimetype: 'image/jpeg' },
];
const mockAiResponseBatch1 = { store_name: null, valid_from: '2025-01-01', valid_to: '2025-01-07', store_address: null, items: [{ item: 'Item A', price_display: '$1', price_in_cents: 100, quantity: '1', category_name: 'Cat A', master_item_id: 1 }] };
const mockAiResponseBatch2 = { store_name: 'Batch 2 Store', valid_from: '2025-01-02', valid_to: null, store_address: '456 Subsequent St', items: [{ item: 'Item C', price_display: '$3', price_in_cents: 300, quantity: '1', category_name: 'Cat C', master_item_id: 3 }] };
vi.mocked(mockAiService.extractCoreDataFromFlyerImage)
.mockResolvedValueOnce(mockAiResponseBatch1)
.mockResolvedValueOnce(mockAiResponseBatch2);
// Act
const result = await service.extractAndValidateData(imagePaths, jobData, logger);
// Assert
expect(result.data.store_name).toBe('Batch 2 Store'); // Filled from batch 2
expect(result.data.valid_from).toBe('2025-01-01'); // Kept from batch 1
expect(result.data.valid_to).toBe('2025-01-07'); // Kept from batch 1
expect(result.data.store_address).toBe('456 Subsequent St'); // Filled from batch 2
expect(result.data.items).toHaveLength(2);
});
});
});

View File

@@ -94,19 +94,64 @@ export class FlyerAiProcessor {
jobData: FlyerJobData,
logger: Logger,
): Promise<AiProcessorResult> {
logger.info(`Starting AI data extraction.`);
logger.info(`Starting AI data extraction for ${imagePaths.length} pages.`);
const { submitterIp, userProfileAddress } = jobData;
const masterItems = await this.personalizationRepo.getAllMasterItems(logger);
logger.debug(`Retrieved ${masterItems.length} master items for AI matching.`);
const extractedData = await this.ai.extractCoreDataFromFlyerImage(
imagePaths,
masterItems,
submitterIp,
userProfileAddress,
logger,
);
// BATCHING LOGIC: Process images in chunks to avoid hitting AI payload/token limits.
const BATCH_SIZE = 4;
const batches = [];
for (let i = 0; i < imagePaths.length; i += BATCH_SIZE) {
batches.push(imagePaths.slice(i, i + BATCH_SIZE));
}
return this._validateAiData(extractedData, logger);
// Initialize container for merged data
const mergedData: ValidatedAiDataType = {
store_name: null,
valid_from: null,
valid_to: null,
store_address: null,
items: [],
};
logger.info(`Processing ${imagePaths.length} pages in ${batches.length} batches (Batch Size: ${BATCH_SIZE}).`);
for (const [index, batch] of batches.entries()) {
logger.info(`Processing batch ${index + 1}/${batches.length} (${batch.length} pages)...`);
// The AI service handles rate limiting internally (e.g., max 5 RPM).
// Processing these sequentially ensures we respect that limit.
const batchResult = await this.ai.extractCoreDataFromFlyerImage(
batch,
masterItems,
submitterIp,
userProfileAddress,
logger,
);
// MERGE LOGIC:
// 1. Metadata (Store Name, Dates): Prioritize the first batch (usually the cover page).
// If subsequent batches have data and the current is null, fill it in.
if (index === 0) {
mergedData.store_name = batchResult.store_name;
mergedData.valid_from = batchResult.valid_from;
mergedData.valid_to = batchResult.valid_to;
mergedData.store_address = batchResult.store_address;
} else {
if (!mergedData.store_name && batchResult.store_name) mergedData.store_name = batchResult.store_name;
if (!mergedData.valid_from && batchResult.valid_from) mergedData.valid_from = batchResult.valid_from;
if (!mergedData.valid_to && batchResult.valid_to) mergedData.valid_to = batchResult.valid_to;
if (!mergedData.store_address && batchResult.store_address) mergedData.store_address = batchResult.store_address;
}
// 2. Items: Append all found items to the master list.
mergedData.items.push(...batchResult.items);
}
logger.info(`Batch processing complete. Total items extracted: ${mergedData.items.length}`);
// Validate the final merged dataset
return this._validateAiData(mergedData, logger);
}
}

View File

@@ -133,6 +133,12 @@ export class FlyerProcessingService {
return { flyerId: flyer.flyer_id };
} catch (error) {
logger.warn('Job failed. Temporary files will NOT be cleaned up to allow for manual inspection.');
// Add detailed logging of the raw error object
if (error instanceof Error) {
logger.error({ err: error, stack: error.stack }, 'Raw error object in processJob catch block');
} else {
logger.error({ error }, 'Raw non-Error object in processJob catch block');
}
// This private method handles error reporting and re-throwing.
await this._reportErrorAndThrow(error, job, logger, stages);
// This line is technically unreachable because the above method always throws,

View File

@@ -1,3 +1,4 @@
// src/services/workers.server.ts
import { Worker, Job, UnrecoverableError } from 'bullmq';
import fsPromises from 'node:fs/promises';
import { exec } from 'child_process';

View File

@@ -82,6 +82,7 @@ describe('Flyer Processing Background Job Integration Test', () => {
let jobStatus;
const maxRetries = 30; // Poll for up to 90 seconds (30 * 3s)
for (let i = 0; i < maxRetries; i++) {
console.log(`Polling attempt ${i + 1}...`);
await new Promise((resolve) => setTimeout(resolve, 3000)); // Wait 3 seconds between polls
const statusReq = request.get(`/api/ai/jobs/${jobId}/status`);
if (token) {
@@ -89,6 +90,7 @@ describe('Flyer Processing Background Job Integration Test', () => {
}
const statusResponse = await statusReq;
jobStatus = statusResponse.body;
console.log(`Job status: ${JSON.stringify(jobStatus)}`);
if (jobStatus.state === 'completed' || jobStatus.state === 'failed') {
break;
}

View File

@@ -38,6 +38,7 @@ describe('Public API Routes Integration Tests', () => {
email: userEmail,
password: 'a-Very-Strong-Password-123!',
fullName: 'Public Routes Test User',
request,
});
testUser = createdUser;

View File

@@ -43,8 +43,6 @@ export const cleanupDb = async (ids: TestResourceIds) => {
await pool.query('DELETE FROM public.user_watched_items WHERE user_id = ANY($1::uuid[])', [userIds]);
await pool.query('DELETE FROM public.user_achievements WHERE user_id = ANY($1::uuid[])', [userIds]);
await pool.query('DELETE FROM public.activity_log WHERE user_id = ANY($1::uuid[])', [userIds]);
await pool.query('DELETE FROM public.user_refresh_tokens WHERE user_id = ANY($1::uuid[])', [userIds]);
await pool.query('DELETE FROM public.password_reset_tokens WHERE user_id = ANY($1::uuid[])', [userIds]);
}
// --- Stage 2: Delete parent records that other things depend on ---