ADR work, dockerfile work, integration test fixes

This commit is contained in:
2026-01-09 11:43:35 -08:00
parent 639313485a
commit e75054b1ab
30 changed files with 3934 additions and 108 deletions

View File

@@ -63,7 +63,15 @@
"Bash(npm install:*)", "Bash(npm install:*)",
"Bash(git grep:*)", "Bash(git grep:*)",
"Bash(findstr:*)", "Bash(findstr:*)",
"Bash(git add:*)" "Bash(git add:*)",
"mcp__filesystem__write_file",
"mcp__podman__container_list",
"Bash(podman cp:*)",
"mcp__podman__container_inspect",
"mcp__podman__network_list",
"Bash(podman network connect:*)",
"Bash(npm run build:*)",
"Bash(set NODE_ENV=test)"
] ]
} }
} }

View File

@@ -1,18 +1,96 @@
{ {
// ============================================================================
// VS CODE DEV CONTAINER CONFIGURATION
// ============================================================================
// This file configures VS Code's Dev Containers extension to provide a
// consistent, fully-configured development environment.
//
// Features:
// - Automatic PostgreSQL + Redis startup with healthchecks
// - Automatic npm install
// - Automatic database schema initialization and seeding
// - Pre-configured VS Code extensions (ESLint, Prettier)
// - Podman support for Windows users
//
// Usage:
// 1. Install the "Dev Containers" extension in VS Code
// 2. Open this project folder
// 3. Click "Reopen in Container" when prompted (or use Command Palette)
// 4. Wait for container build and initialization
// 5. Development server starts automatically
// ============================================================================
"name": "Flyer Crawler Dev (Ubuntu 22.04)", "name": "Flyer Crawler Dev (Ubuntu 22.04)",
// Use Docker Compose for multi-container setup
"dockerComposeFile": ["../compose.dev.yml"], "dockerComposeFile": ["../compose.dev.yml"],
"service": "app", "service": "app",
"workspaceFolder": "/app", "workspaceFolder": "/app",
// VS Code customizations
"customizations": { "customizations": {
"vscode": { "vscode": {
"extensions": ["dbaeumer.vscode-eslint", "esbenp.prettier-vscode"] "extensions": [
// Code quality
"dbaeumer.vscode-eslint",
"esbenp.prettier-vscode",
// TypeScript
"ms-vscode.vscode-typescript-next",
// Database
"mtxr.sqltools",
"mtxr.sqltools-driver-pg",
// Utilities
"eamodio.gitlens",
"streetsidesoftware.code-spell-checker"
],
"settings": {
"editor.formatOnSave": true,
"editor.defaultFormatter": "esbenp.prettier-vscode",
"typescript.preferences.importModuleSpecifier": "relative"
}
} }
}, },
// Run as root (required for npm global installs)
"remoteUser": "root", "remoteUser": "root",
// Automatically install dependencies when the container is created.
// This runs inside the container, populating the isolated node_modules volume. // ============================================================================
"postCreateCommand": "npm install", // Lifecycle Commands
// ============================================================================
// initializeCommand: Runs on the HOST before the container is created.
// Starts Podman machine on Windows (no-op if already running or using Docker).
"initializeCommand": "powershell -Command \"podman machine start; exit 0\"",
// postCreateCommand: Runs ONCE when the container is first created.
// This is where we do full initialization: npm install + database setup.
"postCreateCommand": "chmod +x scripts/docker-init.sh && ./scripts/docker-init.sh",
// postAttachCommand: Runs EVERY TIME VS Code attaches to the container.
// Starts the development server automatically.
"postAttachCommand": "npm run dev:container", "postAttachCommand": "npm run dev:container",
// Try to start podman machine, but exit with success (0) even if it's already running
"initializeCommand": "powershell -Command \"podman machine start; exit 0\"" // ============================================================================
// Port Forwarding
// ============================================================================
// Automatically forward these ports from the container to the host
"forwardPorts": [3000, 3001],
// Labels for forwarded ports in VS Code's Ports panel
"portsAttributes": {
"3000": {
"label": "Frontend (Vite)",
"onAutoForward": "notify"
},
"3001": {
"label": "Backend API",
"onAutoForward": "notify"
}
},
// ============================================================================
// Features
// ============================================================================
// Additional dev container features (optional)
"features": {}
} }

77
.env.example Normal file
View File

@@ -0,0 +1,77 @@
# .env.example
# ============================================================================
# ENVIRONMENT VARIABLES TEMPLATE
# ============================================================================
# Copy this file to .env and fill in your values.
# For local development with Docker/Podman, these defaults should work out of the box.
#
# IMPORTANT: Never commit .env files with real credentials to version control!
# ============================================================================
# ===================
# Database Configuration
# ===================
# PostgreSQL connection settings
# For container development, use the service name "postgres"
DB_HOST=postgres
DB_PORT=5432
DB_USER=postgres
DB_PASSWORD=postgres
DB_NAME=flyer_crawler_dev
# ===================
# Redis Configuration
# ===================
# Redis URL for caching and job queues
# For container development, use the service name "redis"
REDIS_URL=redis://redis:6379
# Optional: Redis password (leave empty if not required)
REDIS_PASSWORD=
# ===================
# Application Settings
# ===================
NODE_ENV=development
# Frontend URL for CORS and email links
FRONTEND_URL=http://localhost:3000
# ===================
# Authentication
# ===================
# REQUIRED: Secret key for signing JWT tokens (generate a random 64+ character string)
JWT_SECRET=your-super-secret-jwt-key-change-this-in-production
# ===================
# AI/ML Services
# ===================
# REQUIRED: Google Gemini API key for flyer OCR processing
GEMINI_API_KEY=your-gemini-api-key
# ===================
# External APIs
# ===================
# Optional: Google Maps API key for geocoding store addresses
GOOGLE_MAPS_API_KEY=
# ===================
# Email Configuration (Optional)
# ===================
# SMTP settings for sending emails (deal notifications, password reset)
SMTP_HOST=
SMTP_PORT=587
SMTP_SECURE=false
SMTP_USER=
SMTP_PASS=
SMTP_FROM_EMAIL=noreply@example.com
# ===================
# Worker Configuration (Optional)
# ===================
# Concurrency settings for background job workers
WORKER_CONCURRENCY=1
EMAIL_WORKER_CONCURRENCY=10
ANALYTICS_WORKER_CONCURRENCY=1
CLEANUP_WORKER_CONCURRENCY=10
# Worker lock duration in milliseconds (default: 2 minutes)
WORKER_LOCK_DURATION=120000

6
.env.test Normal file
View File

@@ -0,0 +1,6 @@
DB_HOST=10.89.0.4
DB_USER=flyer
DB_PASSWORD=flyer
DB_NAME=flyer_crawler_test
REDIS_URL=redis://redis:6379
NODE_ENV=test

View File

@@ -137,6 +137,13 @@ jobs:
VITE_API_BASE_URL: 'http://localhost:3001/api' VITE_API_BASE_URL: 'http://localhost:3001/api'
GEMINI_API_KEY: ${{ secrets.VITE_GOOGLE_GENAI_API_KEY }} GEMINI_API_KEY: ${{ secrets.VITE_GOOGLE_GENAI_API_KEY }}
# --- Storage path for flyer images ---
# CRITICAL: Use an absolute path in the test runner's working directory for file storage.
# This ensures tests can read processed files to verify their contents (e.g., EXIF stripping).
# Without this, multer and flyerProcessingService default to /var/www/.../flyer-images.
# NOTE: We use ${{ github.workspace }} which resolves to the checkout directory.
STORAGE_PATH: '${{ github.workspace }}/flyer-images'
# --- JWT Secret for Passport authentication in tests --- # --- JWT Secret for Passport authentication in tests ---
JWT_SECRET: ${{ secrets.JWT_SECRET }} JWT_SECRET: ${{ secrets.JWT_SECRET }}

View File

@@ -1,31 +1,60 @@
# Use Ubuntu 22.04 (LTS) as the base image to match production # Dockerfile.dev
# ============================================================================
# DEVELOPMENT DOCKERFILE
# ============================================================================
# This Dockerfile creates a development environment that matches production
# as closely as possible while providing the tools needed for development.
#
# Base: Ubuntu 22.04 (LTS) - matches production server
# Node: v20.x (LTS) - matches production
# Includes: PostgreSQL client, Redis CLI, build tools
# ============================================================================
FROM ubuntu:22.04 FROM ubuntu:22.04
# Set environment variables to non-interactive to avoid prompts during installation # Set environment variables to non-interactive to avoid prompts during installation
ENV DEBIAN_FRONTEND=noninteractive ENV DEBIAN_FRONTEND=noninteractive
# Update package lists and install essential tools # ============================================================================
# - curl: for downloading Node.js setup script # Install System Dependencies
# ============================================================================
# - curl: for downloading Node.js setup script and health checks
# - git: for version control operations # - git: for version control operations
# - build-essential: for compiling native Node.js modules (node-gyp) # - build-essential: for compiling native Node.js modules (node-gyp)
# - python3: required by some Node.js build tools # - python3: required by some Node.js build tools
# - postgresql-client: for psql CLI (database initialization)
# - redis-tools: for redis-cli (health checks)
RUN apt-get update && apt-get install -y \ RUN apt-get update && apt-get install -y \
curl \ curl \
git \ git \
build-essential \ build-essential \
python3 \ python3 \
postgresql-client \
redis-tools \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
# Install Node.js 20.x (LTS) from NodeSource # ============================================================================
# Install Node.js 20.x (LTS)
# ============================================================================
RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \ RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
&& apt-get install -y nodejs && apt-get install -y nodejs
# Set the working directory inside the container # ============================================================================
# Set Working Directory
# ============================================================================
WORKDIR /app WORKDIR /app
# Set default environment variables for development # ============================================================================
# Environment Configuration
# ============================================================================
# Default environment variables for development
ENV NODE_ENV=development ENV NODE_ENV=development
# Increase Node.js memory limit for large builds
ENV NODE_OPTIONS='--max-old-space-size=8192' ENV NODE_OPTIONS='--max-old-space-size=8192'
# Default command keeps the container running so you can attach to it # ============================================================================
# Default Command
# ============================================================================
# Keep container running so VS Code can attach.
# Actual commands (npm run dev, etc.) are run via devcontainer.json.
CMD ["bash"] CMD ["bash"]

View File

@@ -1,6 +1,31 @@
# compose.dev.yml
# ============================================================================
# DEVELOPMENT DOCKER COMPOSE CONFIGURATION
# ============================================================================
# This file defines the local development environment using Docker/Podman.
#
# Services:
# - app: Node.js application (API + Frontend)
# - postgres: PostgreSQL 15 with PostGIS extension
# - redis: Redis for caching and job queues
#
# Usage:
# Start all services: podman-compose -f compose.dev.yml up -d
# Stop all services: podman-compose -f compose.dev.yml down
# View logs: podman-compose -f compose.dev.yml logs -f
# Reset everything: podman-compose -f compose.dev.yml down -v
#
# VS Code Dev Containers:
# This file is referenced by .devcontainer/devcontainer.json for seamless
# VS Code integration. Open the project in VS Code and use "Reopen in Container".
# ============================================================================
version: '3.8' version: '3.8'
services: services:
# ===================
# Application Service
# ===================
app: app:
container_name: flyer-crawler-dev container_name: flyer-crawler-dev
build: build:
@@ -16,19 +41,42 @@ services:
- '3000:3000' # Frontend (Vite default) - '3000:3000' # Frontend (Vite default)
- '3001:3001' # Backend API - '3001:3001' # Backend API
environment: environment:
# Core settings
- NODE_ENV=development - NODE_ENV=development
# Database - use service name for Docker networking
- DB_HOST=postgres - DB_HOST=postgres
- DB_PORT=5432
- DB_USER=postgres - DB_USER=postgres
- DB_PASSWORD=postgres - DB_PASSWORD=postgres
- DB_NAME=flyer_crawler_dev - DB_NAME=flyer_crawler_dev
# Redis - use service name for Docker networking
- REDIS_URL=redis://redis:6379 - REDIS_URL=redis://redis:6379
# Add other secrets here or use a .env file - REDIS_HOST=redis
- REDIS_PORT=6379
# Frontend URL for CORS
- FRONTEND_URL=http://localhost:3000
# Default JWT secret for development (override in production!)
- JWT_SECRET=dev-jwt-secret-change-in-production
# Worker settings
- WORKER_LOCK_DURATION=120000
depends_on: depends_on:
- postgres postgres:
- redis condition: service_healthy
redis:
condition: service_healthy
# Keep container running so VS Code can attach # Keep container running so VS Code can attach
command: tail -f /dev/null command: tail -f /dev/null
# Healthcheck for the app (once it's running)
healthcheck:
test: ['CMD', 'curl', '-f', 'http://localhost:3001/api/health', '||', 'exit', '0']
interval: 30s
timeout: 10s
retries: 3
start_period: 60s
# ===================
# PostgreSQL Database
# ===================
postgres: postgres:
image: docker.io/library/postgis/postgis:15-3.4 image: docker.io/library/postgis/postgis:15-3.4
container_name: flyer-crawler-postgres container_name: flyer-crawler-postgres
@@ -38,15 +86,53 @@ services:
POSTGRES_USER: postgres POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres POSTGRES_PASSWORD: postgres
POSTGRES_DB: flyer_crawler_dev POSTGRES_DB: flyer_crawler_dev
# Optimize for development
POSTGRES_INITDB_ARGS: '--encoding=UTF8 --locale=C'
volumes: volumes:
- postgres_data:/var/lib/postgresql/data - postgres_data:/var/lib/postgresql/data
# Mount SQL files for manual initialization if needed
- ./sql:/docker-entrypoint-initdb.d/sql:ro
# Healthcheck ensures postgres is ready before app starts
healthcheck:
test: ['CMD-SHELL', 'pg_isready -U postgres -d flyer_crawler_dev']
interval: 5s
timeout: 5s
retries: 10
start_period: 10s
# ===================
# Redis Cache/Queue
# ===================
redis: redis:
image: docker.io/library/redis:alpine image: docker.io/library/redis:alpine
container_name: flyer-crawler-redis container_name: flyer-crawler-redis
ports: ports:
- '6379:6379' - '6379:6379'
volumes:
- redis_data:/data
# Healthcheck ensures redis is ready before app starts
healthcheck:
test: ['CMD', 'redis-cli', 'ping']
interval: 5s
timeout: 5s
retries: 10
start_period: 5s
# Enable persistence for development data
command: redis-server --appendonly yes
# ===================
# Named Volumes
# ===================
volumes: volumes:
postgres_data: postgres_data:
name: flyer-crawler-postgres-data
redis_data:
name: flyer-crawler-redis-data
node_modules_data: node_modules_data:
name: flyer-crawler-node-modules
# ===================
# Network Configuration
# ===================
# All services are on the default bridge network.
# Use service names (postgres, redis) as hostnames.

View File

@@ -4,6 +4,8 @@
**Status**: Accepted **Status**: Accepted
**Implemented**: 2026-01-07
## Context ## Context
Our application has experienced a recurring pattern of bugs and brittle tests related to error handling, specifically for "resource not found" scenarios. The root causes identified are: Our application has experienced a recurring pattern of bugs and brittle tests related to error handling, specifically for "resource not found" scenarios. The root causes identified are:
@@ -41,3 +43,86 @@ We will adopt a strict, consistent error-handling contract for the service and r
**Initial Refactoring**: Requires a one-time effort to audit and refactor all existing repository methods to conform to this new standard. **Initial Refactoring**: Requires a one-time effort to audit and refactor all existing repository methods to conform to this new standard.
**Convention Adherence**: Developers must be aware of and adhere to this convention. This ADR serves as the primary documentation for this pattern. **Convention Adherence**: Developers must be aware of and adhere to this convention. This ADR serves as the primary documentation for this pattern.
## Implementation Details
### Custom Error Types
All custom errors are defined in `src/services/db/errors.db.ts`:
| Error Class | HTTP Status | PostgreSQL Code | Use Case |
| -------------------------------- | ----------- | --------------- | ------------------------------- |
| `NotFoundError` | 404 | - | Resource not found |
| `UniqueConstraintError` | 409 | 23505 | Duplicate key violation |
| `ForeignKeyConstraintError` | 400 | 23503 | Referenced record doesn't exist |
| `NotNullConstraintError` | 400 | 23502 | Required field is null |
| `CheckConstraintError` | 400 | 23514 | Check constraint violated |
| `InvalidTextRepresentationError` | 400 | 22P02 | Invalid data type format |
| `NumericValueOutOfRangeError` | 400 | 22003 | Numeric overflow |
| `ValidationError` | 400 | - | Request validation failed |
| `ForbiddenError` | 403 | - | Access denied |
### Error Handler Middleware
The centralized error handler in `src/middleware/errorHandler.ts`:
1. Catches all errors from route handlers
2. Maps custom error types to HTTP status codes
3. Logs errors with appropriate severity (warn for 4xx, error for 5xx)
4. Returns consistent JSON error responses
5. Includes error ID for server errors (for support correlation)
### Usage Pattern
```typescript
// In repository (throws NotFoundError)
async function getUserById(id: number): Promise<User> {
const result = await pool.query('SELECT * FROM users WHERE id = $1', [id]);
if (result.rows.length === 0) {
throw new NotFoundError(`User with ID ${id} not found.`);
}
return result.rows[0];
}
// In route handler (simple try/catch)
router.get('/:id', async (req, res, next) => {
try {
const user = await getUserById(req.params.id);
res.json(user);
} catch (error) {
next(error); // errorHandler maps NotFoundError to 404
}
});
```
### Centralized Error Handler Helper
The `handleDbError` function in `src/services/db/errors.db.ts` provides centralized PostgreSQL error handling:
```typescript
import { handleDbError } from './errors.db';
try {
await pool.query('INSERT INTO users (email) VALUES ($1)', [email]);
} catch (error) {
handleDbError(
error,
logger,
'Failed to create user',
{ email },
{
uniqueMessage: 'A user with this email already exists.',
defaultMessage: 'Failed to create user.',
},
);
}
```
## Key Files
- `src/services/db/errors.db.ts` - Custom error classes and `handleDbError` utility
- `src/middleware/errorHandler.ts` - Centralized Express error handling middleware
## Related ADRs
- [ADR-034](./0034-repository-pattern-standards.md) - Repository Pattern Standards (extends this ADR)

View File

@@ -60,3 +60,109 @@ async function registerUserAndCreateDefaultList(userData) {
**Learning Curve**: Developers will need to learn and adopt the `withTransaction` pattern for all transactional database work. **Learning Curve**: Developers will need to learn and adopt the `withTransaction` pattern for all transactional database work.
**Refactoring Effort**: Existing methods that manually manage transactions (`createUser`, `createBudget`, etc.) will need to be refactored to use the new pattern. **Refactoring Effort**: Existing methods that manually manage transactions (`createUser`, `createBudget`, etc.) will need to be refactored to use the new pattern.
## Implementation Details
### The `withTransaction` Helper
Located in `src/services/db/connection.db.ts`:
```typescript
export async function withTransaction<T>(callback: (client: PoolClient) => Promise<T>): Promise<T> {
const client = await getPool().connect();
try {
await client.query('BEGIN');
const result = await callback(client);
await client.query('COMMIT');
return result;
} catch (error) {
await client.query('ROLLBACK');
logger.error({ err: error }, 'Transaction failed, rolling back.');
throw error;
} finally {
client.release();
}
}
```
### Repository Pattern for Transaction Support
Repository methods accept an optional `PoolClient` parameter:
```typescript
// Function-based approach
export async function createUser(userData: CreateUserInput, client?: PoolClient): Promise<User> {
const queryable = client || getPool();
const result = await queryable.query<User>(
'INSERT INTO users (email, password_hash) VALUES ($1, $2) RETURNING *',
[userData.email, userData.passwordHash],
);
return result.rows[0];
}
```
### Transactional Service Example
```typescript
// src/services/authService.ts
import { withTransaction } from './db/connection.db';
import { createUser, createProfile } from './db';
export async function registerUserWithProfile(
email: string,
password: string,
profileData: ProfileInput,
): Promise<UserWithProfile> {
return withTransaction(async (client) => {
// All operations use the same transactional client
const user = await createUser({ email, password }, client);
const profile = await createProfile(
{
userId: user.user_id,
...profileData,
},
client,
);
return { user, profile };
});
}
```
### Services Using `withTransaction`
| Service | Function | Operations |
| ------------------------- | ----------------------- | ----------------------------------- |
| `authService` | `registerAndLoginUser` | Create user + profile + preferences |
| `userService` | `updateUserWithProfile` | Update user + profile atomically |
| `flyerPersistenceService` | `saveFlyer` | Create flyer + items + metadata |
| `shoppingService` | `createListWithItems` | Create list + initial items |
| `gamificationService` | `awardAchievement` | Create achievement + update points |
### Connection Pool Configuration
```typescript
const poolConfig: PoolConfig = {
max: 20, // Max clients in pool
idleTimeoutMillis: 30000, // Close idle clients after 30s
connectionTimeoutMillis: 2000, // Fail connect after 2s
};
```
### Pool Status Monitoring
```typescript
import { getPoolStatus } from './db/connection.db';
const status = getPoolStatus();
// { totalCount: 20, idleCount: 15, waitingCount: 0 }
```
## Key Files
- `src/services/db/connection.db.ts` - `getPool()`, `withTransaction()`, `getPoolStatus()`
## Related ADRs
- [ADR-001](./0001-standardized-error-handling.md) - Error handling within transactions
- [ADR-034](./0034-repository-pattern-standards.md) - Repository patterns for transaction participation

View File

@@ -79,3 +79,140 @@ router.get('/:id', validateRequest(getFlyerSchema), async (req, res, next) => {
**New Dependency**: Introduces `zod` as a new project dependency. **New Dependency**: Introduces `zod` as a new project dependency.
**Learning Curve**: Developers need to learn the `zod` schema definition syntax. **Learning Curve**: Developers need to learn the `zod` schema definition syntax.
**Refactoring Effort**: Requires a one-time effort to create schemas and refactor all existing routes to use the `validateRequest` middleware. **Refactoring Effort**: Requires a one-time effort to create schemas and refactor all existing routes to use the `validateRequest` middleware.
## Implementation Details
### The `validateRequest` Middleware
Located in `src/middleware/validation.middleware.ts`:
```typescript
export const validateRequest =
(schema: ZodObject<z.ZodRawShape>) => async (req: Request, res: Response, next: NextFunction) => {
try {
const { params, query, body } = await schema.parseAsync({
params: req.params,
query: req.query,
body: req.body,
});
// Merge parsed data back into request
Object.keys(req.params).forEach((key) => delete req.params[key]);
Object.assign(req.params, params);
Object.keys(req.query).forEach((key) => delete req.query[key]);
Object.assign(req.query, query);
req.body = body;
return next();
} catch (error) {
if (error instanceof ZodError) {
const validationIssues = error.issues.map((issue) => ({
...issue,
path: issue.path.map((p) => String(p)),
}));
return next(new ValidationError(validationIssues));
}
return next(error);
}
};
```
### Common Zod Patterns
```typescript
import { z } from 'zod';
import { requiredString } from '../utils/zodUtils';
// String that coerces to positive integer (for ID params)
const idParam = z.string().pipe(z.coerce.number().int().positive());
// Pagination query params with defaults
const paginationQuery = z.object({
limit: z.coerce.number().int().positive().max(100).default(20),
offset: z.coerce.number().int().nonnegative().default(0),
});
// Email with sanitization
const emailSchema = z.string().trim().toLowerCase().email('A valid email is required.');
// Password with strength validation
const passwordSchema = z
.string()
.trim()
.min(8, 'Password must be at least 8 characters long.')
.superRefine((password, ctx) => {
const strength = validatePasswordStrength(password);
if (!strength.isValid) ctx.addIssue({ code: 'custom', message: strength.feedback });
});
// Optional string that converts empty string to undefined
const optionalString = z.preprocess(
(val) => (val === '' ? undefined : val),
z.string().trim().optional(),
);
```
### Routes Using `validateRequest`
All API routes use the validation middleware:
| Router | Schemas Defined | Validated Endpoints |
| ------------------------ | --------------- | -------------------------------------------------------------------------------- |
| `auth.routes.ts` | 5 | `/register`, `/login`, `/forgot-password`, `/reset-password`, `/change-password` |
| `user.routes.ts` | 4 | `/profile`, `/address`, `/preferences`, `/notifications` |
| `flyer.routes.ts` | 6 | `GET /:id`, `GET /`, `GET /:id/items`, `DELETE /:id` |
| `budget.routes.ts` | 5 | `/`, `/:id`, `/batch`, `/categories` |
| `recipe.routes.ts` | 4 | `GET /`, `GET /:id`, `POST /`, `PATCH /:id` |
| `admin.routes.ts` | 8 | Various admin endpoints |
| `ai.routes.ts` | 3 | `/upload-and-process`, `/analyze`, `/jobs/:jobId/status` |
| `gamification.routes.ts` | 3 | `/achievements`, `/leaderboard`, `/points` |
### Validation Error Response Format
When validation fails, the `errorHandler` returns:
```json
{
"message": "The request data is invalid.",
"errors": [
{
"path": ["body", "email"],
"message": "A valid email is required."
},
{
"path": ["body", "password"],
"message": "Password must be at least 8 characters long."
}
]
}
```
HTTP Status: `400 Bad Request`
### Zod Utility Functions
Located in `src/utils/zodUtils.ts`:
```typescript
// String that rejects empty strings
export const requiredString = (message?: string) =>
z.string().min(1, message || 'This field is required.');
// Number from string with validation
export const numericString = z.string().pipe(z.coerce.number());
// Boolean from string ('true'/'false')
export const booleanString = z.enum(['true', 'false']).transform((v) => v === 'true');
```
## Key Files
- `src/middleware/validation.middleware.ts` - The `validateRequest` middleware
- `src/services/db/errors.db.ts` - `ValidationError` class definition
- `src/middleware/errorHandler.ts` - Error formatting for validation errors
- `src/utils/zodUtils.ts` - Reusable Zod schema utilities
## Related ADRs
- [ADR-001](./0001-standardized-error-handling.md) - Error handling for validation errors
- [ADR-032](./0032-rate-limiting-strategy.md) - Rate limiting applied alongside validation

View File

@@ -86,3 +86,219 @@ router.get('/:id', async (req, res, next) => {
**Refactoring Effort**: Requires adding the `requestLogger` middleware and refactoring all routes and services to use `req.log` instead of the global `logger`. **Refactoring Effort**: Requires adding the `requestLogger` middleware and refactoring all routes and services to use `req.log` instead of the global `logger`.
**Slight Performance Overhead**: Creating a child logger for every request adds a minor performance cost, though this is negligible for most modern logging libraries. **Slight Performance Overhead**: Creating a child logger for every request adds a minor performance cost, though this is negligible for most modern logging libraries.
## Implementation Details
### Logger Configuration
Located in `src/services/logger.server.ts`:
```typescript
import pino from 'pino';
const isProduction = process.env.NODE_ENV === 'production';
const isTest = process.env.NODE_ENV === 'test';
export const logger = pino({
level: isProduction ? 'info' : 'debug',
transport:
isProduction || isTest
? undefined
: {
target: 'pino-pretty',
options: {
colorize: true,
translateTime: 'SYS:standard',
ignore: 'pid,hostname',
},
},
redact: {
paths: [
'req.headers.authorization',
'req.headers.cookie',
'*.body.password',
'*.body.newPassword',
'*.body.currentPassword',
'*.body.confirmPassword',
'*.body.refreshToken',
'*.body.token',
],
censor: '[REDACTED]',
},
});
```
### Request Logger Middleware
Located in `server.ts`:
```typescript
const requestLogger = (req: Request, res: Response, next: NextFunction) => {
const requestId = randomUUID();
const user = req.user as UserProfile | undefined;
const start = process.hrtime();
// Create request-scoped logger
req.log = logger.child({
request_id: requestId,
user_id: user?.user.user_id,
ip_address: req.ip,
});
req.log.debug({ method: req.method, originalUrl: req.originalUrl }, 'INCOMING');
res.on('finish', () => {
const duration = getDurationInMilliseconds(start);
const { statusCode, statusMessage } = res;
const logDetails = {
user_id: (req.user as UserProfile | undefined)?.user.user_id,
method: req.method,
originalUrl: req.originalUrl,
statusCode,
statusMessage,
duration: duration.toFixed(2),
};
// Include request details for failed requests (for debugging)
if (statusCode >= 400) {
logDetails.req = { headers: req.headers, body: req.body };
}
if (statusCode >= 500) req.log.error(logDetails, 'Request completed with server error');
else if (statusCode >= 400) req.log.warn(logDetails, 'Request completed with client error');
else req.log.info(logDetails, 'Request completed successfully');
});
next();
};
app.use(requestLogger);
```
### TypeScript Support
The `req.log` property is typed via declaration merging in `src/types/express.d.ts`:
```typescript
import { Logger } from 'pino';
declare global {
namespace Express {
export interface Request {
log: Logger;
}
}
}
```
### Automatic Sensitive Data Redaction
The Pino logger automatically redacts sensitive fields:
```json
// Before redaction
{
"body": {
"email": "user@example.com",
"password": "secret123",
"newPassword": "newsecret456"
}
}
// After redaction (in logs)
{
"body": {
"email": "user@example.com",
"password": "[REDACTED]",
"newPassword": "[REDACTED]"
}
}
```
### Log Levels by Scenario
| Level | HTTP Status | Scenario |
| ----- | ----------- | -------------------------------------------------- |
| DEBUG | Any | Request incoming, internal state, development info |
| INFO | 2xx | Successful requests, business events |
| WARN | 4xx | Client errors, validation failures, not found |
| ERROR | 5xx | Server errors, unhandled exceptions |
### Service Layer Logging
Services accept the request-scoped logger as an optional parameter:
```typescript
export async function registerUser(email: string, password: string, reqLog?: Logger) {
const log = reqLog || logger; // Fall back to global logger
log.info({ email }, 'Registering new user');
// ... implementation
log.debug({ userId: user.user_id }, 'User created successfully');
return user;
}
// In route handler
router.post('/register', async (req, res, next) => {
await authService.registerUser(req.body.email, req.body.password, req.log);
});
```
### Log Output Format
**Development** (pino-pretty):
```text
[2026-01-09 12:34:56.789] INFO (request_id=abc123): Request completed successfully
method: "GET"
originalUrl: "/api/flyers"
statusCode: 200
duration: "45.23"
```
**Production** (JSON):
```json
{
"level": 30,
"time": 1704812096789,
"request_id": "abc123",
"user_id": "user_456",
"ip_address": "192.168.1.1",
"method": "GET",
"originalUrl": "/api/flyers",
"statusCode": 200,
"duration": "45.23",
"msg": "Request completed successfully"
}
```
### Routes Using `req.log`
All route files have been migrated to use the request-scoped logger:
- `src/routes/auth.routes.ts`
- `src/routes/user.routes.ts`
- `src/routes/flyer.routes.ts`
- `src/routes/ai.routes.ts`
- `src/routes/admin.routes.ts`
- `src/routes/budget.routes.ts`
- `src/routes/recipe.routes.ts`
- `src/routes/gamification.routes.ts`
- `src/routes/personalization.routes.ts`
- `src/routes/stats.routes.ts`
- `src/routes/health.routes.ts`
- `src/routes/system.routes.ts`
## Key Files
- `src/services/logger.server.ts` - Pino logger configuration
- `src/services/logger.client.ts` - Client-side logger (for frontend)
- `src/types/express.d.ts` - TypeScript declaration for `req.log`
- `server.ts` - Request logger middleware
## Related ADRs
- [ADR-001](./0001-standardized-error-handling.md) - Error handler uses `req.log` for error logging
- [ADR-026](./0026-standardized-client-side-structured-logging.md) - Client-side logging strategy

View File

@@ -2,17 +2,288 @@
**Date**: 2025-12-12 **Date**: 2025-12-12
**Status**: Proposed **Status**: Implemented
**Implemented**: 2026-01-09
## Context ## Context
The project is currently run using `pm2`, and the `README.md` contains manual setup instructions. While functional, this lacks the portability, scalability, and consistency of modern deployment practices. The project is currently run using `pm2`, and the `README.md` contains manual setup instructions. While functional, this lacks the portability, scalability, and consistency of modern deployment practices. Local development environments also suffered from inconsistency issues.
## Decision ## Decision
We will standardize the deployment process by containerizing the application using **Docker**. This will involve defining a `Dockerfile` for building a production-ready image and a `docker-compose.yml` file for orchestrating the application, database, and other services (like Redis) in a development environment. We will standardize the deployment process using a hybrid approach:
1. **PM2 for Production**: Use PM2 cluster mode for process management, load balancing, and zero-downtime reloads.
2. **Docker/Podman for Development**: Provide a complete containerized development environment with automatic initialization.
3. **VS Code Dev Containers**: Enable one-click development environment setup.
4. **Gitea Actions for CI/CD**: Automated deployment pipelines handle builds and deployments.
## Consequences ## Consequences
- **Positive**: Ensures consistency between development and production environments. Simplifies the setup for new developers. Improves portability and scalability of the application. - **Positive**: Ensures consistency between development and production environments. Simplifies the setup for new developers to a single "Reopen in Container" action. Improves portability and scalability of the application.
- **Negative**: Requires learning Docker and containerization concepts. Adds `Dockerfile` and `docker-compose.yml` to the project's configuration. - **Negative**: Requires Docker/Podman installation. Container builds take time on first setup.
## Implementation Details
### Quick Start (Development)
```bash
# Prerequisites:
# - Docker Desktop or Podman installed
# - VS Code with "Dev Containers" extension
# Option 1: VS Code Dev Containers (Recommended)
# 1. Open project in VS Code
# 2. Click "Reopen in Container" when prompted
# 3. Wait for initialization to complete
# 4. Development server starts automatically
# Option 2: Manual Docker Compose
podman-compose -f compose.dev.yml up -d
podman exec -it flyer-crawler-dev bash
./scripts/docker-init.sh
npm run dev:container
```
### Container Services Architecture
```text
┌─────────────────────────────────────────────────────────────┐
│ Development Environment │
├─────────────────────────────────────────────────────────────┤
│ │
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
│ │ app │ │ postgres │ │ redis │ │
│ │ (Node.js) │───▶│ (PostGIS) │ │ (Cache) │ │
│ │ │───▶│ │ │ │ │
│ └─────────────┘ └─────────────┘ └─────────────┘ │
│ :3000/:3001 :5432 :6379 │
│ │
└─────────────────────────────────────────────────────────────┘
```
### compose.dev.yml Services
| Service | Image | Purpose | Healthcheck |
| ---------- | ----------------------- | ---------------------- | ---------------- |
| `app` | Custom (Dockerfile.dev) | Node.js application | HTTP /api/health |
| `postgres` | postgis/postgis:15-3.4 | Database with PostGIS | pg_isready |
| `redis` | redis:alpine | Caching and job queues | redis-cli ping |
### Automatic Initialization
The container initialization script (`scripts/docker-init.sh`) performs:
1. **npm install** - Installs dependencies into isolated volume
2. **Wait for PostgreSQL** - Polls until database is ready
3. **Wait for Redis** - Polls until Redis is responding
4. **Schema Check** - Detects if database needs initialization
5. **Database Setup** - Runs `npm run db:reset:dev` if needed (schema + seed data)
### Development Dockerfile
Located in `Dockerfile.dev`:
```dockerfile
FROM ubuntu:22.04
ENV DEBIAN_FRONTEND=noninteractive
# Install Node.js 20.x LTS + database clients
RUN apt-get update && apt-get install -y \
curl git build-essential python3 \
postgresql-client redis-tools \
&& rm -rf /var/lib/apt/lists/*
RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
&& apt-get install -y nodejs
WORKDIR /app
ENV NODE_ENV=development
ENV NODE_OPTIONS='--max-old-space-size=8192'
CMD ["bash"]
```
### Environment Configuration
Copy `.env.example` to `.env` for local overrides (optional for containers):
```bash
# Container defaults (set in compose.dev.yml)
DB_HOST=postgres # Use Docker service name, not IP
DB_PORT=5432
DB_USER=postgres
DB_PASSWORD=postgres
DB_NAME=flyer_crawler_dev
REDIS_URL=redis://redis:6379
```
### VS Code Dev Container Configuration
Located in `.devcontainer/devcontainer.json`:
| Lifecycle Hook | Timing | Action |
| ------------------- | ----------------- | ------------------------------ |
| `initializeCommand` | Before container | Start Podman machine (Windows) |
| `postCreateCommand` | Container created | Run `docker-init.sh` |
| `postAttachCommand` | VS Code attached | Start dev server |
### Default Test Accounts
After initialization, these accounts are available:
| Role | Email | Password |
| ----- | ------------------- | --------- |
| Admin | `admin@example.com` | adminpass |
| User | `user@example.com` | userpass |
---
## Production Deployment (PM2)
### PM2 Ecosystem Configuration
Located in `ecosystem.config.cjs`:
```javascript
module.exports = {
apps: [
{
// API Server - Cluster mode for load balancing
name: 'flyer-crawler-api',
script: './node_modules/.bin/tsx',
args: 'server.ts',
max_memory_restart: '500M',
instances: 'max', // Use all CPU cores
exec_mode: 'cluster', // Enable cluster mode
kill_timeout: 5000, // Graceful shutdown timeout
// Restart configuration
max_restarts: 40,
exp_backoff_restart_delay: 100,
min_uptime: '10s',
env_production: {
NODE_ENV: 'production',
cwd: '/var/www/flyer-crawler.projectium.com',
},
env_test: {
NODE_ENV: 'test',
cwd: '/var/www/flyer-crawler-test.projectium.com',
},
},
{
// Background Worker - Single instance
name: 'flyer-crawler-worker',
script: './node_modules/.bin/tsx',
args: 'src/services/worker.ts',
max_memory_restart: '1G',
kill_timeout: 10000, // Workers need more time for jobs
// ... similar config
},
],
};
```
### Deployment Directory Structure
```text
/var/www/
├── flyer-crawler.projectium.com/ # Production
│ ├── server.ts
│ ├── ecosystem.config.cjs
│ ├── package.json
│ ├── flyer-images/
│ │ ├── icons/
│ │ └── archive/
│ └── ...
└── flyer-crawler-test.projectium.com/ # Test environment
└── ... (same structure)
```
### Environment-Specific Configuration
| Environment | Port | Redis DB | PM2 Process Suffix |
| ----------- | ---- | -------- | ------------------ |
| Production | 3000 | 0 | (none) |
| Test | 3001 | 1 | `-test` |
| Development | 3000 | 0 | `-dev` |
### PM2 Commands Reference
```bash
# Start/reload with environment
pm2 startOrReload ecosystem.config.cjs --env production --update-env
# Save process list for startup
pm2 save
# View logs
pm2 logs flyer-crawler-api --lines 50
# Monitor processes
pm2 monit
# List all processes
pm2 list
# Describe process details
pm2 describe flyer-crawler-api
```
### Resource Limits
| Process | Memory Limit | Restart Delay | Kill Timeout |
| ---------------- | ------------ | ------------------------ | ------------ |
| API Server | 500MB | Exponential (100ms base) | 5s |
| Worker | 1GB | Exponential (100ms base) | 10s |
| Analytics Worker | 1GB | Exponential (100ms base) | 10s |
---
## Troubleshooting
### Container Issues
```bash
# Reset everything and start fresh
podman-compose -f compose.dev.yml down -v
podman-compose -f compose.dev.yml up -d --build
# View container logs
podman-compose -f compose.dev.yml logs -f app
# Connect to database manually
podman exec -it flyer-crawler-postgres psql -U postgres -d flyer_crawler_dev
# Rebuild just the app container
podman-compose -f compose.dev.yml build app
```
### Common Issues
| Issue | Solution |
| ------------------------ | --------------------------------------------------------------- |
| "Database not ready" | Wait for postgres healthcheck, or run `docker-init.sh` manually |
| "node_modules not found" | Run `npm install` inside container |
| "Permission denied" | Ensure scripts have execute permission: `chmod +x scripts/*.sh` |
| "Network unreachable" | Use service names (postgres, redis) not IPs |
## Key Files
- `compose.dev.yml` - Docker Compose configuration
- `Dockerfile.dev` - Development container definition
- `.devcontainer/devcontainer.json` - VS Code Dev Container config
- `scripts/docker-init.sh` - Container initialization script
- `.env.example` - Environment variable template
- `ecosystem.config.cjs` - PM2 production configuration
- `.gitea/workflows/deploy-to-prod.yml` - Production deployment pipeline
- `.gitea/workflows/deploy-to-test.yml` - Test deployment pipeline
## Related ADRs
- [ADR-017](./0017-ci-cd-and-branching-strategy.md) - CI/CD Strategy
- [ADR-038](./0038-graceful-shutdown-pattern.md) - Graceful Shutdown Pattern

View File

@@ -2,7 +2,9 @@
**Date**: 2025-12-12 **Date**: 2025-12-12
**Status**: Proposed **Status**: Accepted
**Implemented**: 2026-01-09
## Context ## Context
@@ -10,9 +12,186 @@ The project has Gitea workflows but lacks a documented standard for how code mov
## Decision ## Decision
We will formalize the end-to-end CI/CD process. This ADR will define the project's **branching strategy** (e.g., GitFlow or Trunk-Based Development), establish mandatory checks in the pipeline (e.g., linting, unit tests, vulnerability scanning), and specify the process for building and publishing Docker images (`ADR-014`) to a registry. We will formalize the end-to-end CI/CD process using:
1. **Trunk-Based Development**: All work is merged to `main` branch.
2. **Automated Test Deployment**: Every push to `main` triggers deployment to test environment.
3. **Manual Production Deployment**: Production deployments require explicit confirmation.
4. **Semantic Versioning**: Automated version bumping on deployments.
## Consequences ## Consequences
- **Positive**: Automates quality control and creates a safe, repeatable path to production. Increases development velocity and reduces deployment-related errors. - **Positive**: Automates quality control and creates a safe, repeatable path to production. Increases development velocity and reduces deployment-related errors.
- **Negative**: Initial setup effort for the CI/CD pipeline. May slightly increase the time to merge code due to mandatory checks. - **Negative**: Initial setup effort for the CI/CD pipeline. May slightly increase the time to merge code due to mandatory checks.
## Implementation Details
### Branching Strategy
**Trunk-Based Development**:
```text
main ─────●─────●─────●─────●─────●─────▶
│ │ │ │ │
│ │ │ │ └── Deploy to Prod (manual)
│ │ │ └── v0.9.70 (patch bump)
│ │ └── Deploy to Test (auto)
│ └── v0.9.69 (patch bump)
└── Feature complete
```
- All development happens on `main` branch
- Feature branches are short-lived (< 1 day)
- Every merge to `main` triggers test deployment
- Production deploys are manual with confirmation
### Pipeline Stages
**Deploy to Test** (Automatic on push to `main`):
```yaml
jobs:
deploy-to-test:
steps:
- Checkout code
- Setup Node.js 20
- Install dependencies (npm ci)
- Bump patch version (npm version patch)
- TypeScript type-check
- Prettier check
- ESLint check
- Run unit tests with coverage
- Run integration tests with coverage
- Run E2E tests with coverage
- Merge coverage reports
- Check database schema hash
- Build React application
- Deploy to test server (rsync)
- Install production dependencies
- Reload PM2 processes
- Update schema hash in database
```
**Deploy to Production** (Manual trigger):
```yaml
on:
workflow_dispatch:
inputs:
confirmation:
description: 'Type "deploy-to-prod" to confirm'
required: true
jobs:
deploy-production:
steps:
- Verify confirmation phrase
- Checkout main branch
- Install dependencies
- Bump minor version (npm version minor)
- Check production schema hash
- Build React application
- Deploy to production server
- Reload PM2 processes
- Update schema hash
```
### Version Bumping Strategy
| Trigger | Version Change | Example |
| -------------------------- | -------------- | --------------- |
| Push to main (test deploy) | Patch bump | 0.9.69 → 0.9.70 |
| Production deploy | Minor bump | 0.9.70 → 0.10.0 |
| Major release | Manual | 0.10.0 → 1.0.0 |
**Commit Message Format**:
```text
ci: Bump version to 0.9.70 [skip ci]
```
The `[skip ci]` tag prevents version bump commits from triggering another workflow.
### Database Schema Management
Schema changes are tracked via SHA-256 hash:
```sql
CREATE TABLE public.schema_info (
environment VARCHAR(50) PRIMARY KEY,
schema_hash VARCHAR(64) NOT NULL,
deployed_at TIMESTAMP DEFAULT NOW()
);
```
**Deployment Checks**:
1. Calculate hash of `sql/master_schema_rollup.sql`
2. Compare with hash in target database
3. If mismatch: **FAIL** deployment (manual migration required)
4. If match: Continue deployment
5. After deploy: Update hash in database
### Quality Gates
| Check | Required | Blocking |
| --------------------- | -------- | ---------------------- |
| TypeScript type-check | ✅ | No (continue-on-error) |
| Prettier formatting | ✅ | No |
| ESLint | ✅ | No |
| Unit tests | ✅ | No |
| Integration tests | ✅ | No |
| E2E tests | ✅ | No |
| Schema hash check | ✅ | **Yes** |
| Build | ✅ | **Yes** |
### Environment Variables
Secrets are injected from Gitea repository settings:
| Secret | Test | Production |
| -------------------------------------------------------------- | ------------------ | ------------- |
| `DB_DATABASE_TEST` / `DB_DATABASE_PROD` | flyer-crawler-test | flyer-crawler |
| `REDIS_PASSWORD_TEST` / `REDIS_PASSWORD_PROD` | \*\*\* | \*\*\* |
| `VITE_GOOGLE_GENAI_API_KEY_TEST` / `VITE_GOOGLE_GENAI_API_KEY` | \*\*\* | \*\*\* |
### Coverage Reporting
Coverage reports are generated and published:
```text
https://flyer-crawler-test.projectium.com/coverage/
```
Coverage merging combines:
- Unit test coverage (Vitest)
- Integration test coverage (Vitest)
- E2E test coverage (Vitest)
- Server V8 coverage (c8)
### Gitea Workflows
| Workflow | Trigger | Purpose |
| ----------------------------- | ------------ | ------------------------- |
| `deploy-to-test.yml` | Push to main | Automated test deployment |
| `deploy-to-prod.yml` | Manual | Production deployment |
| `manual-db-backup.yml` | Manual | Create database backup |
| `manual-db-restore.yml` | Manual | Restore from backup |
| `manual-db-reset-test.yml` | Manual | Reset test database |
| `manual-db-reset-prod.yml` | Manual | Reset production database |
| `manual-deploy-major.yml` | Manual | Major version release |
| `manual-redis-flush-prod.yml` | Manual | Flush Redis cache |
## Key Files
- `.gitea/workflows/deploy-to-test.yml` - Test deployment pipeline
- `.gitea/workflows/deploy-to-prod.yml` - Production deployment pipeline
- `.gitea/workflows/manual-db-backup.yml` - Database backup workflow
- `ecosystem.config.cjs` - PM2 configuration
## Related ADRs
- [ADR-014](./0014-containerization-and-deployment-strategy.md) - Containerization Strategy
- [ADR-010](./0010-testing-strategy-and-standards.md) - Testing Strategy
- [ADR-019](./0019-data-backup-and-recovery-strategy.md) - Backup Strategy

View File

@@ -2,7 +2,9 @@
**Date**: 2025-12-12 **Date**: 2025-12-12
**Status**: Proposed **Status**: Accepted
**Implemented**: 2026-01-09
## Context ## Context
@@ -16,3 +18,210 @@ We will implement a formal data backup and recovery strategy. This will involve
- **Positive**: Protects against catastrophic data loss, ensuring business continuity. Provides a clear, tested plan for disaster recovery. - **Positive**: Protects against catastrophic data loss, ensuring business continuity. Provides a clear, tested plan for disaster recovery.
- **Negative**: Requires setup and maintenance of backup scripts and secure storage. Incurs storage costs for backup files. - **Negative**: Requires setup and maintenance of backup scripts and secure storage. Incurs storage costs for backup files.
## Implementation Details
### Backup Workflow
Located in `.gitea/workflows/manual-db-backup.yml`:
```yaml
name: Manual - Backup Production Database
on:
workflow_dispatch:
inputs:
confirmation:
description: 'Type "backup-production-db" to confirm'
required: true
jobs:
backup-database:
runs-on: projectium.com
env:
DB_HOST: ${{ secrets.DB_HOST }}
DB_PORT: ${{ secrets.DB_PORT }}
DB_USER: ${{ secrets.DB_USER }}
DB_PASSWORD: ${{ secrets.DB_PASSWORD }}
DB_NAME: ${{ secrets.DB_NAME_PROD }}
steps:
- name: Validate Secrets
run: |
if [ -z "$DB_HOST" ] || [ -z "$DB_USER" ]; then
echo "ERROR: Database secrets not configured."
exit 1
fi
- name: Create Database Backup
run: |
TIMESTAMP=$(date +'%Y%m%d-%H%M%S')
BACKUP_FILENAME="flyer-crawler-prod-backup-${TIMESTAMP}.sql.gz"
# Create compressed backup
PGPASSWORD="$DB_PASSWORD" pg_dump \
-h "$DB_HOST" -p "$DB_PORT" \
-U "$DB_USER" -d "$DB_NAME" \
--clean --if-exists | gzip > "$BACKUP_FILENAME"
echo "backup_filename=$BACKUP_FILENAME" >> $GITEA_ENV
- name: Upload Backup as Artifact
uses: actions/upload-artifact@v3
with:
name: database-backup
path: ${{ env.backup_filename }}
```
### Restore Workflow
Located in `.gitea/workflows/manual-db-restore.yml`:
```yaml
name: Manual - Restore Database from Backup
on:
workflow_dispatch:
inputs:
confirmation:
description: 'Type "restore-from-backup" to confirm'
required: true
backup_file:
description: 'Path to backup file on server'
required: true
jobs:
restore-database:
steps:
- name: Verify Confirmation
run: |
if [ "${{ inputs.confirmation }}" != "restore-from-backup" ]; then
exit 1
fi
- name: Restore Database
run: |
# Decompress and restore
gunzip -c "${{ inputs.backup_file }}" | \
PGPASSWORD="$DB_PASSWORD" psql \
-h "$DB_HOST" -p "$DB_PORT" \
-U "$DB_USER" -d "$DB_NAME"
```
### Backup Command Reference
**Manual Backup**:
```bash
# Create compressed backup
PGPASSWORD="password" pg_dump \
-h localhost -p 5432 \
-U dbuser -d flyer-crawler \
--clean --if-exists | gzip > backup-$(date +%Y%m%d).sql.gz
# List backup contents (without restoring)
gunzip -c backup-20260109.sql.gz | head -100
```
**Manual Restore**:
```bash
# Restore from compressed backup
gunzip -c backup-20260109.sql.gz | \
PGPASSWORD="password" psql \
-h localhost -p 5432 \
-U dbuser -d flyer-crawler
```
### pg_dump Options
| Option | Purpose |
| ----------------- | ------------------------------ |
| `--clean` | Drop objects before recreating |
| `--if-exists` | Use IF EXISTS when dropping |
| `--no-owner` | Skip ownership commands |
| `--no-privileges` | Skip access privilege commands |
| `-F c` | Custom format (for pg_restore) |
| `-F p` | Plain text SQL (default) |
### Recovery Objectives
| Metric | Target | Current |
| ---------------------------------- | -------- | -------------- |
| **RPO** (Recovery Point Objective) | 24 hours | Manual trigger |
| **RTO** (Recovery Time Objective) | 1 hour | ~15 minutes |
### Backup Retention Policy
| Type | Retention | Storage |
| --------------- | --------- | ---------------- |
| Daily backups | 7 days | Gitea artifacts |
| Weekly backups | 4 weeks | Gitea artifacts |
| Monthly backups | 12 months | Off-site storage |
### Backup Verification
Periodically test backup integrity:
```bash
# Verify backup can be read
gunzip -t backup-20260109.sql.gz
# Test restore to a temporary database
createdb flyer-crawler-restore-test
gunzip -c backup-20260109.sql.gz | psql -d flyer-crawler-restore-test
# Verify data integrity...
dropdb flyer-crawler-restore-test
```
### Disaster Recovery Checklist
1. **Identify the Issue**
- Data corruption?
- Accidental deletion?
- Full database loss?
2. **Select Backup**
- Find most recent valid backup
- Download from Gitea artifacts or off-site storage
3. **Stop Application**
```bash
pm2 stop all
```
4. **Restore Database**
```bash
gunzip -c backup.sql.gz | psql -d flyer-crawler
```
5. **Verify Data**
- Check table row counts
- Verify recent data exists
- Test critical queries
6. **Restart Application**
```bash
pm2 start all
```
7. **Post-Mortem**
- Document incident
- Update procedures if needed
## Key Files
- `.gitea/workflows/manual-db-backup.yml` - Backup workflow
- `.gitea/workflows/manual-db-restore.yml` - Restore workflow
- `.gitea/workflows/manual-db-reset-test.yml` - Reset test database
- `.gitea/workflows/manual-db-reset-prod.yml` - Reset production database
- `sql/master_schema_rollup.sql` - Current schema definition
## Related ADRs
- [ADR-013](./0013-database-schema-migration-strategy.md) - Schema Migration Strategy
- [ADR-017](./0017-ci-cd-and-branching-strategy.md) - CI/CD Strategy

View File

@@ -0,0 +1,147 @@
# ADR-032: Rate Limiting Strategy
**Date**: 2026-01-09
**Status**: Accepted
**Implemented**: 2026-01-09
## Context
Public-facing APIs are vulnerable to abuse through excessive requests, whether from malicious actors attempting denial-of-service attacks, automated scrapers, or accidental loops in client code. Without proper rate limiting, the application could:
1. **Experience degraded performance**: Excessive requests can overwhelm database connections and server resources
2. **Incur unexpected costs**: AI service calls (Gemini API) and external APIs (Google Maps) are billed per request
3. **Allow credential stuffing**: Login endpoints without limits enable brute-force attacks
4. **Suffer from data scraping**: Public endpoints could be scraped at high volume
## Decision
We will implement a tiered rate limiting strategy using `express-rate-limit` middleware, with different limits based on endpoint sensitivity and resource cost.
### Tier System
| Tier | Window | Max Requests | Use Case |
| --------------------------- | ------ | ------------ | -------------------------------- |
| **Authentication (Strict)** | 15 min | 5 | Login, registration |
| **Sensitive Operations** | 1 hour | 5 | Password changes, email updates |
| **AI/Costly Operations** | 15 min | 10-20 | Gemini API calls, geocoding |
| **File Uploads** | 15 min | 10-20 | Flyer uploads, avatar uploads |
| **Batch Operations** | 15 min | 50 | Bulk updates |
| **User Read** | 15 min | 100 | Standard authenticated endpoints |
| **Public Read** | 15 min | 100 | Public data endpoints |
| **Tracking/High-Volume** | 15 min | 150-200 | Analytics, reactions |
### Rate Limiter Configuration
All rate limiters share a standard configuration:
```typescript
const standardConfig = {
standardHeaders: true, // Return rate limit info in headers
legacyHeaders: false, // Disable deprecated X-RateLimit headers
skip: shouldSkipRateLimit, // Allow bypassing in test environment
};
```
### Test Environment Bypass
Rate limiting is bypassed during integration and E2E tests to avoid test flakiness:
```typescript
export const shouldSkipRateLimit = (req: Request): boolean => {
return process.env.NODE_ENV === 'test';
};
```
## Implementation Details
### Available Rate Limiters
| Limiter | Window | Max | Endpoint Examples |
| ---------------------------- | ------ | --- | --------------------------------- |
| `loginLimiter` | 15 min | 5 | POST /api/auth/login |
| `registerLimiter` | 1 hour | 5 | POST /api/auth/register |
| `forgotPasswordLimiter` | 15 min | 5 | POST /api/auth/forgot-password |
| `resetPasswordLimiter` | 15 min | 10 | POST /api/auth/reset-password |
| `refreshTokenLimiter` | 15 min | 20 | POST /api/auth/refresh |
| `logoutLimiter` | 15 min | 10 | POST /api/auth/logout |
| `publicReadLimiter` | 15 min | 100 | GET /api/flyers, GET /api/recipes |
| `userReadLimiter` | 15 min | 100 | GET /api/users/profile |
| `userUpdateLimiter` | 15 min | 100 | PUT /api/users/profile |
| `userSensitiveUpdateLimiter` | 1 hour | 5 | PUT /api/auth/change-password |
| `adminTriggerLimiter` | 15 min | 30 | POST /api/admin/jobs/\* |
| `aiGenerationLimiter` | 15 min | 20 | POST /api/ai/analyze |
| `aiUploadLimiter` | 15 min | 10 | POST /api/ai/upload-and-process |
| `geocodeLimiter` | 1 hour | 100 | GET /api/users/geocode |
| `priceHistoryLimiter` | 15 min | 50 | GET /api/price-history/\* |
| `reactionToggleLimiter` | 15 min | 150 | POST /api/reactions/toggle |
| `trackingLimiter` | 15 min | 200 | POST /api/personalization/track |
| `batchLimiter` | 15 min | 50 | PATCH /api/budgets/batch |
### Usage Pattern
```typescript
import { loginLimiter, userReadLimiter } from '../config/rateLimiters';
// Apply to individual routes
router.post('/login', loginLimiter, validateRequest(loginSchema), async (req, res, next) => {
// handler
});
// Or apply to entire router for consistent limits
router.use(userReadLimiter);
router.get('/me', async (req, res, next) => {
/* handler */
});
```
### Response Headers
When rate limiting is active, responses include standard headers:
```
RateLimit-Limit: 100
RateLimit-Remaining: 95
RateLimit-Reset: 900
```
### Rate Limit Exceeded Response
When a client exceeds their limit:
```json
{
"message": "Too many login attempts from this IP, please try again after 15 minutes."
}
```
HTTP Status: `429 Too Many Requests`
## Key Files
- `src/config/rateLimiters.ts` - Rate limiter definitions
- `src/utils/rateLimit.ts` - Helper functions (test bypass)
## Consequences
### Positive
- **Security**: Protects against brute-force and credential stuffing attacks
- **Cost Control**: Prevents runaway costs from AI/external API abuse
- **Fair Usage**: Ensures all users get reasonable service access
- **DDoS Mitigation**: Provides basic protection against request flooding
### Negative
- **Legitimate User Impact**: Aggressive users may hit limits during normal use
- **IP-Based Limitations**: Shared IPs (offices, VPNs) may cause false positives
- **No Distributed State**: Rate limits are per-instance, not cluster-wide (would need Redis store for that)
## Future Enhancements
1. **Redis Store**: Implement distributed rate limiting with Redis for multi-instance deployments
2. **User-Based Limits**: Track limits per authenticated user rather than just IP
3. **Dynamic Limits**: Adjust limits based on user tier (free vs premium)
4. **Monitoring Dashboard**: Track rate limit hits in admin dashboard
5. **Allowlisting**: Allow specific IPs (monitoring services) to bypass limits

View File

@@ -0,0 +1,196 @@
# ADR-033: File Upload and Storage Strategy
**Date**: 2026-01-09
**Status**: Accepted
**Implemented**: 2026-01-09
## Context
The application handles file uploads for flyer images and user avatars. Without a consistent strategy, file uploads can introduce security vulnerabilities (path traversal, malicious file types), performance issues (unbounded file sizes), and maintenance challenges (inconsistent storage locations).
Key concerns:
1. **Security**: Preventing malicious file uploads, path traversal attacks, and unsafe filenames
2. **Storage Organization**: Consistent directory structure for uploaded files
3. **Size Limits**: Preventing resource exhaustion from oversized uploads
4. **File Type Validation**: Ensuring only expected file types are accepted
5. **Cleanup**: Managing temporary and orphaned files
## Decision
We will implement a centralized file upload strategy using `multer` middleware with custom storage configurations, file type validation, and size limits.
### Storage Types
| Type | Directory | Purpose | Size Limit |
| -------- | ------------------------------ | ------------------------------ | ---------- |
| `flyer` | `$STORAGE_PATH` (configurable) | Flyer images for AI processing | 100MB |
| `avatar` | `public/uploads/avatars/` | User profile pictures | 5MB |
### Filename Strategy
All uploaded files are renamed to prevent:
- Path traversal attacks
- Filename collisions
- Problematic characters in filenames
**Pattern**: `{fieldname}-{timestamp}-{random}-{sanitized-original}`
Example: `flyer-1704825600000-829461742-grocery-flyer.jpg`
### File Type Validation
Only image files (`image/*` MIME type) are accepted. Non-image uploads are rejected with a structured `ValidationError`.
## Implementation Details
### Multer Configuration Factory
```typescript
import { createUploadMiddleware } from '../middleware/multer.middleware';
// For flyer uploads (100MB limit)
const flyerUpload = createUploadMiddleware({
storageType: 'flyer',
fileSize: 100 * 1024 * 1024, // 100MB
fileFilter: 'image',
});
// For avatar uploads (5MB limit)
const avatarUpload = createUploadMiddleware({
storageType: 'avatar',
fileSize: 5 * 1024 * 1024, // 5MB
fileFilter: 'image',
});
```
### Storage Configuration
```typescript
// Configurable via environment variable
export const flyerStoragePath =
process.env.STORAGE_PATH || '/var/www/flyer-crawler.projectium.com/flyer-images';
// Relative to project root
export const avatarStoragePath = path.join(process.cwd(), 'public', 'uploads', 'avatars');
```
### Filename Sanitization
The `sanitizeFilename` utility removes dangerous characters:
```typescript
// Removes: path separators, null bytes, special characters
// Keeps: alphanumeric, dots, hyphens, underscores
const sanitized = sanitizeFilename(file.originalname);
```
### Required File Validation Middleware
Ensures a file was uploaded before processing:
```typescript
import { requireFileUpload } from '../middleware/fileUpload.middleware';
router.post(
'/upload',
flyerUpload.single('flyerImage'),
requireFileUpload('flyerImage'), // 400 error if missing
handleMulterError,
async (req, res) => {
// req.file is guaranteed to exist
},
);
```
### Error Handling
```typescript
import { handleMulterError } from '../middleware/multer.middleware';
// Catches multer-specific errors (file too large, etc.)
router.use(handleMulterError);
```
### Directory Initialization
Storage directories are created automatically at application startup:
```typescript
(async () => {
await fs.mkdir(flyerStoragePath, { recursive: true });
await fs.mkdir(avatarStoragePath, { recursive: true });
})();
```
### Test Environment Handling
In test environments, files use predictable names for easy cleanup:
```typescript
if (process.env.NODE_ENV === 'test') {
return cb(null, `test-avatar${path.extname(file.originalname) || '.png'}`);
}
```
## Usage Example
```typescript
import { createUploadMiddleware, handleMulterError } from '../middleware/multer.middleware';
import { requireFileUpload } from '../middleware/fileUpload.middleware';
import { validateRequest } from '../middleware/validation.middleware';
import { aiUploadLimiter } from '../config/rateLimiters';
const flyerUpload = createUploadMiddleware({
storageType: 'flyer',
fileSize: 100 * 1024 * 1024,
fileFilter: 'image',
});
router.post(
'/upload-and-process',
aiUploadLimiter,
validateRequest(uploadSchema),
flyerUpload.single('flyerImage'),
requireFileUpload('flyerImage'),
handleMulterError,
async (req, res, next) => {
const filePath = req.file!.path;
// Process the uploaded file...
},
);
```
## Key Files
- `src/middleware/multer.middleware.ts` - Multer configuration and storage handlers
- `src/middleware/fileUpload.middleware.ts` - File requirement validation
- `src/utils/stringUtils.ts` - Filename sanitization utilities
- `src/utils/fileUtils.ts` - File system utilities (deletion, etc.)
## Consequences
### Positive
- **Security**: Prevents path traversal and malicious uploads through sanitization and validation
- **Consistency**: All uploads follow the same patterns and storage organization
- **Predictability**: Test environments use predictable filenames for cleanup
- **Extensibility**: Factory pattern allows easy addition of new upload types
### Negative
- **Disk Storage**: Files stored on disk require backup and cleanup strategies
- **Single Server**: Current implementation doesn't support cloud storage (S3, etc.)
- **No Virus Scanning**: Files aren't scanned for malware before processing
## Future Enhancements
1. **Cloud Storage**: Support for S3/GCS as storage backend
2. **Virus Scanning**: Integrate ClamAV or cloud-based scanning
3. **Image Optimization**: Automatic resizing/compression before storage
4. **CDN Integration**: Serve uploaded files through CDN
5. **Cleanup Job**: Scheduled job to remove orphaned/temporary files
6. **Presigned URLs**: Direct upload to cloud storage to reduce server load

View File

@@ -0,0 +1,345 @@
# ADR-034: Repository Pattern Standards
**Date**: 2026-01-09
**Status**: Accepted
**Implemented**: 2026-01-09
## Context
The application uses a repository pattern to abstract database access from business logic. However, without clear standards, repository implementations can diverge in:
1. **Method naming**: Inconsistent verbs (get vs find vs fetch)
2. **Return types**: Some methods return `undefined`, others throw errors
3. **Error handling**: Varied approaches to database error handling
4. **Transaction participation**: Unclear how methods participate in transactions
5. **Logging patterns**: Inconsistent logging context and messages
This ADR establishes standards for all repository implementations, complementing ADR-001 (Error Handling) and ADR-002 (Transaction Management).
## Decision
All repository implementations MUST follow these standards:
### Method Naming Conventions
| Prefix | Returns | Behavior on Not Found |
| --------- | ---------------------- | ------------------------------------ |
| `get*` | Single entity | Throws `NotFoundError` |
| `find*` | Entity or `null` | Returns `null` |
| `list*` | Array (possibly empty) | Returns `[]` |
| `create*` | Created entity | Throws on constraint violation |
| `update*` | Updated entity | Throws `NotFoundError` if not exists |
| `delete*` | `void` or `boolean` | Throws `NotFoundError` if not exists |
| `exists*` | `boolean` | Returns true/false |
| `count*` | `number` | Returns count |
### Error Handling Pattern
All repository methods MUST use the centralized `handleDbError` function:
```typescript
import { handleDbError, NotFoundError } from './errors.db';
async getById(id: number): Promise<Entity> {
try {
const result = await this.pool.query('SELECT * FROM entities WHERE id = $1', [id]);
if (result.rows.length === 0) {
throw new NotFoundError(`Entity with ID ${id} not found.`);
}
return result.rows[0];
} catch (error) {
handleDbError(error, this.logger, 'Database error in getById', { id }, {
entityName: 'Entity',
defaultMessage: 'Failed to fetch entity.',
});
}
}
```
### Transaction Participation
Repository methods that need to participate in transactions MUST accept an optional `PoolClient`:
```typescript
class UserRepository {
private pool: Pool;
private client?: PoolClient;
constructor(poolOrClient?: Pool | PoolClient) {
if (poolOrClient && 'query' in poolOrClient && !('connect' in poolOrClient)) {
// It's a PoolClient (for transactions)
this.client = poolOrClient as PoolClient;
} else {
this.pool = (poolOrClient as Pool) || getPool();
}
}
private get queryable() {
return this.client || this.pool;
}
}
```
Or using the function-based pattern:
```typescript
async function createUser(userData: CreateUserInput, client?: PoolClient): Promise<User> {
const queryable = client || getPool();
// ...
}
```
## Implementation Details
### Repository File Structure
```
src/services/db/
├── connection.db.ts # Pool management, withTransaction
├── errors.db.ts # Custom error types, handleDbError
├── index.db.ts # Barrel exports
├── user.db.ts # User repository
├── user.db.test.ts # User repository tests
├── flyer.db.ts # Flyer repository
├── flyer.db.test.ts # Flyer repository tests
└── ... # Other domain repositories
```
### Standard Repository Template
```typescript
// src/services/db/example.db.ts
import { Pool, PoolClient } from 'pg';
import { getPool } from './connection.db';
import { handleDbError, NotFoundError } from './errors.db';
import { logger } from '../logger.server';
import type { Example, CreateExampleInput, UpdateExampleInput } from '../../types';
const log = logger.child({ module: 'example.db' });
/**
* Gets an example by ID.
* @throws {NotFoundError} If the example doesn't exist.
*/
export async function getExampleById(id: number, client?: PoolClient): Promise<Example> {
const queryable = client || getPool();
try {
const result = await queryable.query<Example>('SELECT * FROM examples WHERE id = $1', [id]);
if (result.rows.length === 0) {
throw new NotFoundError(`Example with ID ${id} not found.`);
}
return result.rows[0];
} catch (error) {
handleDbError(
error,
log,
'Database error in getExampleById',
{ id },
{
entityName: 'Example',
defaultMessage: 'Failed to fetch example.',
},
);
}
}
/**
* Finds an example by slug, returns null if not found.
*/
export async function findExampleBySlug(
slug: string,
client?: PoolClient,
): Promise<Example | null> {
const queryable = client || getPool();
try {
const result = await queryable.query<Example>('SELECT * FROM examples WHERE slug = $1', [slug]);
return result.rows[0] || null;
} catch (error) {
handleDbError(
error,
log,
'Database error in findExampleBySlug',
{ slug },
{
entityName: 'Example',
defaultMessage: 'Failed to find example.',
},
);
}
}
/**
* Lists all examples with optional pagination.
*/
export async function listExamples(
options: { limit?: number; offset?: number } = {},
client?: PoolClient,
): Promise<Example[]> {
const queryable = client || getPool();
const { limit = 100, offset = 0 } = options;
try {
const result = await queryable.query<Example>(
'SELECT * FROM examples ORDER BY created_at DESC LIMIT $1 OFFSET $2',
[limit, offset],
);
return result.rows;
} catch (error) {
handleDbError(
error,
log,
'Database error in listExamples',
{ limit, offset },
{
entityName: 'Example',
defaultMessage: 'Failed to list examples.',
},
);
}
}
/**
* Creates a new example.
* @throws {UniqueConstraintError} If slug already exists.
*/
export async function createExample(
input: CreateExampleInput,
client?: PoolClient,
): Promise<Example> {
const queryable = client || getPool();
try {
const result = await queryable.query<Example>(
`INSERT INTO examples (name, slug, description)
VALUES ($1, $2, $3)
RETURNING *`,
[input.name, input.slug, input.description],
);
return result.rows[0];
} catch (error) {
handleDbError(
error,
log,
'Database error in createExample',
{ input },
{
entityName: 'Example',
uniqueMessage: 'An example with this slug already exists.',
defaultMessage: 'Failed to create example.',
},
);
}
}
/**
* Updates an existing example.
* @throws {NotFoundError} If the example doesn't exist.
*/
export async function updateExample(
id: number,
input: UpdateExampleInput,
client?: PoolClient,
): Promise<Example> {
const queryable = client || getPool();
try {
const result = await queryable.query<Example>(
`UPDATE examples
SET name = COALESCE($2, name), description = COALESCE($3, description)
WHERE id = $1
RETURNING *`,
[id, input.name, input.description],
);
if (result.rows.length === 0) {
throw new NotFoundError(`Example with ID ${id} not found.`);
}
return result.rows[0];
} catch (error) {
handleDbError(
error,
log,
'Database error in updateExample',
{ id, input },
{
entityName: 'Example',
defaultMessage: 'Failed to update example.',
},
);
}
}
/**
* Deletes an example.
* @throws {NotFoundError} If the example doesn't exist.
*/
export async function deleteExample(id: number, client?: PoolClient): Promise<void> {
const queryable = client || getPool();
try {
const result = await queryable.query('DELETE FROM examples WHERE id = $1', [id]);
if (result.rowCount === 0) {
throw new NotFoundError(`Example with ID ${id} not found.`);
}
} catch (error) {
handleDbError(
error,
log,
'Database error in deleteExample',
{ id },
{
entityName: 'Example',
defaultMessage: 'Failed to delete example.',
},
);
}
}
```
### Using with Transactions
```typescript
import { withTransaction } from './connection.db';
import { createExample, updateExample } from './example.db';
import { createRelated } from './related.db';
async function createExampleWithRelated(data: ComplexInput): Promise<Example> {
return withTransaction(async (client) => {
const example = await createExample(data.example, client);
await createRelated({ exampleId: example.id, ...data.related }, client);
return example;
});
}
```
## Key Files
- `src/services/db/connection.db.ts` - `getPool()`, `withTransaction()`
- `src/services/db/errors.db.ts` - `handleDbError()`, custom error classes
- `src/services/db/index.db.ts` - Barrel exports for all repositories
- `src/services/db/*.db.ts` - Individual domain repositories
## Consequences
### Positive
- **Consistency**: All repositories follow the same patterns
- **Predictability**: Method names clearly indicate behavior
- **Testability**: Consistent interfaces make mocking straightforward
- **Error Handling**: Centralized error handling prevents inconsistent responses
- **Transaction Safety**: Clear pattern for transaction participation
### Negative
- **Learning Curve**: Developers must learn and follow conventions
- **Boilerplate**: Each method requires similar error handling structure
- **Refactoring**: Existing repositories may need updates to conform
## Compliance Checklist
For new repository methods:
- [ ] Method name follows prefix convention (get/find/list/create/update/delete)
- [ ] Throws `NotFoundError` for `get*` methods when entity not found
- [ ] Returns `null` for `find*` methods when entity not found
- [ ] Uses `handleDbError` for database error handling
- [ ] Accepts optional `PoolClient` parameter for transaction support
- [ ] Includes JSDoc with `@throws` documentation
- [ ] Has corresponding unit tests

View File

@@ -0,0 +1,328 @@
# ADR-035: Service Layer Architecture
**Date**: 2026-01-09
**Status**: Accepted
**Implemented**: 2026-01-09
## Context
The application has evolved to include multiple service types:
1. **Repository services** (`*.db.ts`): Direct database access
2. **Business services** (`*Service.ts`): Business logic orchestration
3. **External services** (`*Service.server.ts`): Integration with external APIs
4. **Infrastructure services** (`logger`, `redis`, `queues`): Cross-cutting concerns
Without clear boundaries, business logic can leak into routes, repositories can contain business rules, and services can become tightly coupled.
## Decision
We will establish a clear layered architecture with defined responsibilities for each layer:
### Layer Responsibilities
```
┌─────────────────────────────────────────────────────────────────┐
│ Routes Layer │
│ - Request/response handling │
│ - Input validation (via middleware) │
│ - Authentication/authorization │
│ - Rate limiting │
│ - Response formatting │
└─────────────────────────────────────────────────────────────────┘
┌─────────────────────────────────────────────────────────────────┐
│ Services Layer │
│ - Business logic orchestration │
│ - Transaction coordination │
│ - External API integration │
│ - Cross-repository operations │
│ - Event publishing │
└─────────────────────────────────────────────────────────────────┘
┌─────────────────────────────────────────────────────────────────┐
│ Repository Layer │
│ - Direct database access │
│ - Query construction │
│ - Entity mapping │
│ - Error translation │
└─────────────────────────────────────────────────────────────────┘
```
### Service Types and Naming
| Type | Pattern | Suffix | Example |
| ------------------- | ------------------------------- | ------------- | --------------------- |
| Business Service | Orchestrates business logic | `*Service.ts` | `authService.ts` |
| Server-Only Service | External APIs, server-side only | `*.server.ts` | `aiService.server.ts` |
| Database Repository | Direct DB access | `*.db.ts` | `user.db.ts` |
| Infrastructure | Cross-cutting concerns | Descriptive | `logger.server.ts` |
### Service Dependencies
```
Routes → Business Services → Repositories
External Services
Infrastructure (logger, redis, queues)
```
**Rules**:
- Routes MUST NOT directly access repositories (except simple CRUD)
- Repositories MUST NOT call other repositories (use services)
- Services MAY call other services
- Infrastructure services MAY be called from any layer
## Implementation Details
### Business Service Pattern
```typescript
// src/services/authService.ts
import { withTransaction } from './db/connection.db';
import * as userRepo from './db/user.db';
import * as profileRepo from './db/personalization.db';
import { emailService } from './emailService.server';
import { logger } from './logger.server';
const log = logger.child({ service: 'auth' });
interface LoginResult {
user: UserProfile;
accessToken: string;
refreshToken: string;
}
export const authService = {
/**
* Registers a new user and sends welcome email.
* Orchestrates multiple repositories in a transaction.
*/
async registerAndLoginUser(
email: string,
password: string,
fullName?: string,
avatarUrl?: string,
reqLog?: Logger,
): Promise<LoginResult> {
const log = reqLog || logger;
return withTransaction(async (client) => {
// 1. Create user (repository)
const user = await userRepo.createUser({ email, password }, client);
// 2. Create profile (repository)
await profileRepo.createProfile(
{
userId: user.user_id,
fullName,
avatarUrl,
},
client,
);
// 3. Generate tokens (business logic)
const { accessToken, refreshToken } = this.generateTokens(user);
// 4. Send welcome email (external service, non-blocking)
emailService.sendWelcomeEmail(email, fullName).catch((err) => {
log.warn({ err, email }, 'Failed to send welcome email');
});
log.info({ userId: user.user_id }, 'User registered successfully');
return {
user: await this.buildUserProfile(user.user_id, client),
accessToken,
refreshToken,
};
});
},
// ... other methods
};
```
### Server-Only Service Pattern
```typescript
// src/services/aiService.server.ts
// This file MUST only be imported by server-side code
import { GenAI } from '@google/genai';
import { config } from '../config/env';
import { logger } from './logger.server';
const log = logger.child({ service: 'ai' });
class AiService {
private client: GenAI;
constructor() {
this.client = new GenAI({ apiKey: config.ai.geminiApiKey });
}
async analyzeImage(imagePath: string): Promise<AnalysisResult> {
log.info({ imagePath }, 'Starting image analysis');
// ... implementation
}
}
export const aiService = new AiService();
```
### Route Handler Pattern
```typescript
// src/routes/auth.routes.ts
import { Router } from 'express';
import { validateRequest } from '../middleware/validation.middleware';
import { loginLimiter } from '../config/rateLimiters';
import { authService } from '../services/authService';
const router = Router();
// Route is thin - delegates to service
router.post(
'/register',
registerLimiter,
validateRequest(registerSchema),
async (req, res, next) => {
try {
const { email, password, full_name } = req.body;
// Delegate to service
const result = await authService.registerAndLoginUser(
email,
password,
full_name,
undefined,
req.log, // Pass request-scoped logger
);
// Format response
res.status(201).json({
message: 'Registration successful',
user: result.user,
accessToken: result.accessToken,
});
} catch (error) {
next(error); // Let error handler deal with it
}
},
);
```
### Service File Organization
```
src/services/
├── db/ # Repository layer
│ ├── connection.db.ts # Pool, transactions
│ ├── errors.db.ts # DB error types
│ ├── user.db.ts # User repository
│ ├── flyer.db.ts # Flyer repository
│ └── index.db.ts # Barrel exports
├── authService.ts # Authentication business logic
├── userService.ts # User management business logic
├── gamificationService.ts # Gamification business logic
├── aiService.server.ts # AI API integration (server-only)
├── emailService.server.ts # Email sending (server-only)
├── geocodingService.server.ts # Geocoding API (server-only)
├── cacheService.server.ts # Redis caching (server-only)
├── queueService.server.ts # BullMQ queues (server-only)
├── logger.server.ts # Pino logger (server-only)
└── logger.client.ts # Client-side logger
```
### Dependency Injection for Testing
Services should support dependency injection for easier testing:
```typescript
// Production: use singleton
export const authService = createAuthService();
// Testing: inject mocks
export function createAuthService(deps?: Partial<AuthServiceDeps>) {
const userRepo = deps?.userRepo || defaultUserRepo;
const emailService = deps?.emailService || defaultEmailService;
return {
async registerAndLoginUser(...) { /* ... */ },
};
}
```
## Key Files
### Infrastructure Services
- `src/services/logger.server.ts` - Server-side structured logging
- `src/services/logger.client.ts` - Client-side logging
- `src/services/redis.server.ts` - Redis connection management
- `src/services/queueService.server.ts` - BullMQ queue management
- `src/services/cacheService.server.ts` - Caching abstraction
### Business Services
- `src/services/authService.ts` - Authentication flows
- `src/services/userService.ts` - User management
- `src/services/gamificationService.ts` - Achievements, leaderboards
- `src/services/flyerProcessingService.server.ts` - Flyer pipeline
### External Integration Services
- `src/services/aiService.server.ts` - Gemini AI integration
- `src/services/emailService.server.ts` - Email sending
- `src/services/geocodingService.server.ts` - Address geocoding
## Consequences
### Positive
- **Separation of Concerns**: Clear boundaries between layers
- **Testability**: Services can be tested in isolation with mocked dependencies
- **Reusability**: Business logic in services can be used by multiple routes
- **Maintainability**: Changes to one layer don't ripple through others
- **Transaction Safety**: Services coordinate transactions across repositories
### Negative
- **Indirection**: More layers mean more code to navigate
- **Potential Over-Engineering**: Simple CRUD operations don't need full service layer
- **Coordination Overhead**: Team must agree on layer boundaries
## Guidelines
### When to Create a Service
Create a business service when:
- Logic spans multiple repositories
- External APIs need to be called
- Complex business rules exist
- The same logic is needed by multiple routes
- Transaction coordination is required
### When Direct Repository Access is OK
Routes can directly use repositories for:
- Simple single-entity CRUD operations
- Read-only queries with no business logic
- Operations that don't need transaction coordination
### Service Method Guidelines
- Accept a request-scoped logger as an optional parameter
- Return domain objects, not HTTP-specific responses
- Throw domain errors, let routes handle HTTP status codes
- Use `withTransaction` for multi-repository operations
- Log business events (user registered, order placed, etc.)

View File

@@ -0,0 +1,212 @@
# ADR-036: Event Bus and Pub/Sub Pattern
**Date**: 2026-01-09
**Status**: Accepted
**Implemented**: 2026-01-09
## Context
Modern web applications often need to handle cross-component communication without creating tight coupling between modules. In our application, several scenarios require broadcasting events across the system:
1. **Session Expiry**: When a user's session expires, multiple components need to respond (auth state, UI notifications, API client).
2. **Real-time Updates**: When data changes on the server, multiple UI components may need to update.
3. **Cross-Component Communication**: Independent components need to communicate without direct references to each other.
Traditional approaches like prop drilling or global state management can lead to tightly coupled code that is difficult to maintain and test.
## Decision
We will implement a lightweight, in-memory event bus pattern using a publish/subscribe (pub/sub) architecture. This provides:
1. **Decoupled Communication**: Publishers and subscribers don't need to know about each other.
2. **Event-Driven Architecture**: Components react to events rather than polling for changes.
3. **Testability**: Events can be easily mocked and verified in tests.
### Design Principles
- **Singleton Pattern**: A single event bus instance is shared across the application.
- **Type-Safe Events**: Event names are string constants to prevent typos.
- **Memory Management**: Subscribers must unsubscribe when components unmount to prevent memory leaks.
## Implementation Details
### EventBus Class
Located in `src/services/eventBus.ts`:
```typescript
type EventCallback = (data?: any) => void;
export class EventBus {
private listeners: { [key: string]: EventCallback[] } = {};
on(event: string, callback: EventCallback): void {
if (!this.listeners[event]) {
this.listeners[event] = [];
}
this.listeners[event].push(callback);
}
off(event: string, callback: EventCallback): void {
if (!this.listeners[event]) return;
this.listeners[event] = this.listeners[event].filter((l) => l !== callback);
}
dispatch(event: string, data?: any): void {
if (!this.listeners[event]) return;
this.listeners[event].forEach((callback) => callback(data));
}
}
// Singleton instance
export const eventBus = new EventBus();
```
### Event Constants
Define event names as constants to prevent typos:
```typescript
// src/constants/events.ts
export const EVENTS = {
SESSION_EXPIRED: 'session:expired',
SESSION_REFRESHED: 'session:refreshed',
USER_LOGGED_OUT: 'user:loggedOut',
DATA_UPDATED: 'data:updated',
NOTIFICATION_RECEIVED: 'notification:received',
} as const;
```
### React Hook for Event Subscription
```typescript
// src/hooks/useEventBus.ts
import { useEffect } from 'react';
import { eventBus } from '../services/eventBus';
export function useEventBus(event: string, callback: (data?: any) => void) {
useEffect(() => {
eventBus.on(event, callback);
// Cleanup on unmount
return () => {
eventBus.off(event, callback);
};
}, [event, callback]);
}
```
### Usage Examples
**Publishing Events**:
```typescript
import { eventBus } from '../services/eventBus';
import { EVENTS } from '../constants/events';
// In API client when session expires
function handleSessionExpiry() {
eventBus.dispatch(EVENTS.SESSION_EXPIRED, { reason: 'token_expired' });
}
```
**Subscribing in Components**:
```typescript
import { useCallback } from 'react';
import { useEventBus } from '../hooks/useEventBus';
import { EVENTS } from '../constants/events';
function AuthenticatedComponent() {
const handleSessionExpired = useCallback((data) => {
console.log('Session expired:', data.reason);
// Redirect to login, show notification, etc.
}, []);
useEventBus(EVENTS.SESSION_EXPIRED, handleSessionExpired);
return <div>Protected Content</div>;
}
```
**Subscribing in Non-React Code**:
```typescript
import { eventBus } from '../services/eventBus';
import { EVENTS } from '../constants/events';
// In API client
const handleLogout = () => {
clearAuthToken();
};
eventBus.on(EVENTS.USER_LOGGED_OUT, handleLogout);
```
### Testing
The EventBus is fully tested in `src/services/eventBus.test.ts`:
```typescript
import { EventBus } from './eventBus';
describe('EventBus', () => {
let bus: EventBus;
beforeEach(() => {
bus = new EventBus();
});
it('should call registered listeners when event is dispatched', () => {
const callback = vi.fn();
bus.on('test', callback);
bus.dispatch('test', { value: 42 });
expect(callback).toHaveBeenCalledWith({ value: 42 });
});
it('should unsubscribe listeners correctly', () => {
const callback = vi.fn();
bus.on('test', callback);
bus.off('test', callback);
bus.dispatch('test');
expect(callback).not.toHaveBeenCalled();
});
it('should handle multiple listeners for the same event', () => {
const callback1 = vi.fn();
const callback2 = vi.fn();
bus.on('test', callback1);
bus.on('test', callback2);
bus.dispatch('test');
expect(callback1).toHaveBeenCalled();
expect(callback2).toHaveBeenCalled();
});
});
```
## Consequences
### Positive
- **Loose Coupling**: Components don't need direct references to communicate.
- **Flexibility**: New subscribers can be added without modifying publishers.
- **Testability**: Easy to mock events and verify interactions.
- **Simplicity**: Minimal code footprint compared to full state management solutions.
### Negative
- **Debugging Complexity**: Event-driven flows can be harder to trace than direct function calls.
- **Memory Leaks**: Forgetting to unsubscribe can cause memory leaks (mitigated by the React hook).
- **No Type Safety for Payloads**: Event data is typed as `any` (could be improved with generics).
## Key Files
- `src/services/eventBus.ts` - EventBus implementation
- `src/services/eventBus.test.ts` - EventBus tests
## Related ADRs
- [ADR-005](./0005-frontend-state-management-and-server-cache-strategy.md) - State Management Strategy
- [ADR-022](./0022-real-time-notification-system.md) - Real-time Notification System

View File

@@ -0,0 +1,265 @@
# ADR-037: Scheduled Jobs and Cron Pattern
**Date**: 2026-01-09
**Status**: Accepted
**Implemented**: 2026-01-09
## Context
Many business operations need to run on a recurring schedule without user intervention:
1. **Daily Deal Checks**: Scan watched items for price drops and notify users.
2. **Analytics Generation**: Compile daily and weekly statistics reports.
3. **Token Cleanup**: Remove expired password reset tokens from the database.
4. **Data Maintenance**: Archive old data, clean up temporary files.
These scheduled operations require:
- Reliable execution at specific times
- Protection against overlapping runs
- Graceful error handling that doesn't crash the server
- Integration with the existing job queue system (BullMQ)
## Decision
We will use `node-cron` for scheduling jobs and integrate with BullMQ for job execution. This provides:
1. **Cron Expressions**: Standard, well-understood scheduling syntax.
2. **Job Queue Integration**: Scheduled jobs enqueue work to BullMQ for reliable processing.
3. **Idempotency**: Jobs use predictable IDs to prevent duplicate runs.
4. **Overlap Protection**: In-memory locks prevent concurrent execution of the same job.
### Architecture
```text
┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐
│ node-cron │────▶│ BullMQ Queue │────▶│ Worker │
│ (Scheduler) │ │ (Job Store) │ │ (Processor) │
└─────────────────┘ └─────────────────┘ └─────────────────┘
┌─────────────────┐
│ Redis │
│ (Persistence) │
└─────────────────┘
```
## Implementation Details
### BackgroundJobService
Located in `src/services/backgroundJobService.ts`:
```typescript
import cron from 'node-cron';
import type { Logger } from 'pino';
import type { Queue } from 'bullmq';
export class BackgroundJobService {
constructor(
private personalizationRepo: PersonalizationRepository,
private notificationRepo: NotificationRepository,
private emailQueue: Queue<EmailJobData>,
private logger: Logger,
) {}
async runDailyDealCheck(): Promise<void> {
this.logger.info('[BackgroundJob] Starting daily deal check...');
// 1. Fetch all deals for all users in one efficient query
const allDeals = await this.personalizationRepo.getBestSalePricesForAllUsers(this.logger);
// 2. Group deals by user
const dealsByUser = this.groupDealsByUser(allDeals);
// 3. Process each user's deals in parallel
const results = await Promise.allSettled(
Array.from(dealsByUser.values()).map((userGroup) => this._processDealsForUser(userGroup)),
);
// 4. Bulk insert notifications
await this.bulkCreateNotifications(results);
this.logger.info('[BackgroundJob] Daily deal check completed.');
}
async triggerAnalyticsReport(): Promise<string> {
const reportDate = getCurrentDateISOString();
const jobId = `manual-report-${reportDate}-${Date.now()}`;
const job = await analyticsQueue.add('generate-daily-report', { reportDate }, { jobId });
return job.id;
}
}
```
### Cron Job Initialization
```typescript
// In-memory lock to prevent job overlap
let isDailyDealCheckRunning = false;
export function startBackgroundJobs(
backgroundJobService: BackgroundJobService,
analyticsQueue: Queue,
weeklyAnalyticsQueue: Queue,
tokenCleanupQueue: Queue,
logger: Logger,
): void {
// Daily deal check at 2:00 AM
cron.schedule('0 2 * * *', () => {
(async () => {
if (isDailyDealCheckRunning) {
logger.warn('[BackgroundJob] Daily deal check already running. Skipping.');
return;
}
isDailyDealCheckRunning = true;
try {
await backgroundJobService.runDailyDealCheck();
} catch (error) {
logger.error({ err: error }, '[BackgroundJob] Daily deal check failed.');
} finally {
isDailyDealCheckRunning = false;
}
})().catch((error) => {
logger.error({ err: error }, '[BackgroundJob] Unhandled rejection in cron wrapper.');
isDailyDealCheckRunning = false;
});
});
// Daily analytics at 3:00 AM
cron.schedule('0 3 * * *', () => {
(async () => {
const reportDate = getCurrentDateISOString();
await analyticsQueue.add(
'generate-daily-report',
{ reportDate },
{ jobId: `daily-report-${reportDate}` }, // Prevents duplicates
);
})().catch((error) => {
logger.error({ err: error }, '[BackgroundJob] Analytics job enqueue failed.');
});
});
// Weekly analytics at 4:00 AM on Sundays
cron.schedule('0 4 * * 0', () => {
(async () => {
const { year, week } = getSimpleWeekAndYear();
await weeklyAnalyticsQueue.add(
'generate-weekly-report',
{ reportYear: year, reportWeek: week },
{ jobId: `weekly-report-${year}-${week}` },
);
})().catch((error) => {
logger.error({ err: error }, '[BackgroundJob] Weekly analytics enqueue failed.');
});
});
// Token cleanup at 5:00 AM
cron.schedule('0 5 * * *', () => {
(async () => {
const timestamp = new Date().toISOString();
await tokenCleanupQueue.add(
'cleanup-tokens',
{ timestamp },
{ jobId: `token-cleanup-${timestamp.split('T')[0]}` },
);
})().catch((error) => {
logger.error({ err: error }, '[BackgroundJob] Token cleanup enqueue failed.');
});
});
logger.info('[BackgroundJob] All cron jobs scheduled successfully.');
}
```
### Job Schedule Reference
| Job | Schedule | Queue | Purpose |
| ---------------- | ---------------------------- | ---------------------- | --------------------------------- |
| Daily Deal Check | `0 2 * * *` (2:00 AM) | Direct execution | Find price drops on watched items |
| Daily Analytics | `0 3 * * *` (3:00 AM) | `analyticsQueue` | Generate daily statistics |
| Weekly Analytics | `0 4 * * 0` (4:00 AM Sunday) | `weeklyAnalyticsQueue` | Generate weekly reports |
| Token Cleanup | `0 5 * * *` (5:00 AM) | `tokenCleanupQueue` | Remove expired tokens |
### Cron Expression Reference
```text
┌───────────── minute (0 - 59)
│ ┌───────────── hour (0 - 23)
│ │ ┌───────────── day of month (1 - 31)
│ │ │ ┌───────────── month (1 - 12)
│ │ │ │ ┌───────────── day of week (0 - 7, Sun = 0 or 7)
│ │ │ │ │
* * * * *
Examples:
0 2 * * * = 2:00 AM every day
0 4 * * 0 = 4:00 AM every Sunday
*/15 * * * * = Every 15 minutes
0 0 1 * * = Midnight on the 1st of each month
```
### Error Handling Pattern
The async IIFE wrapper with `.catch()` ensures that:
1. Errors in the job don't crash the cron scheduler
2. Unhandled promise rejections are logged
3. The lock is always released in the `finally` block
```typescript
cron.schedule('0 2 * * *', () => {
(async () => {
// Job logic here
})().catch((error) => {
// Handle unhandled rejections from the async wrapper
logger.error({ err: error }, 'Unhandled rejection');
});
});
```
### Manual Trigger API
Admin endpoints allow manual triggering of scheduled jobs:
```typescript
// src/routes/admin.routes.ts
router.post('/jobs/daily-deals', isAdmin, async (req, res, next) => {
await backgroundJobService.runDailyDealCheck();
res.json({ message: 'Daily deal check triggered' });
});
router.post('/jobs/analytics', isAdmin, async (req, res, next) => {
const jobId = await backgroundJobService.triggerAnalyticsReport();
res.json({ message: 'Analytics report queued', jobId });
});
```
## Consequences
### Positive
- **Reliability**: Jobs run at predictable times without manual intervention.
- **Idempotency**: Duplicate job prevention via job IDs.
- **Observability**: All job activity is logged with structured logging.
- **Flexibility**: Jobs can be triggered manually for testing or urgent runs.
- **Separation**: Scheduling is decoupled from job execution (cron vs BullMQ).
### Negative
- **Single Server**: Cron runs on a single server instance. For multi-server deployments, consider distributed scheduling.
- **Time Zone Dependency**: Cron times are server-local; consider UTC for distributed systems.
- **In-Memory Locks**: Overlap protection is per-process, not cluster-wide.
## Key Files
- `src/services/backgroundJobService.ts` - BackgroundJobService class and `startBackgroundJobs`
- `src/services/queueService.server.ts` - BullMQ queue definitions
- `src/services/workers.server.ts` - BullMQ worker processors
## Related ADRs
- [ADR-006](./0006-background-job-processing-and-task-queues.md) - Background Job Processing
- [ADR-004](./0004-standardized-application-wide-structured-logging.md) - Structured Logging

View File

@@ -0,0 +1,290 @@
# ADR-038: Graceful Shutdown Pattern
**Date**: 2026-01-09
**Status**: Accepted
**Implemented**: 2026-01-09
## Context
When deploying or restarting the application, abrupt termination can cause:
1. **Lost Jobs**: BullMQ jobs in progress may be marked as failed or stalled.
2. **Connection Leaks**: Database and Redis connections may not be properly closed.
3. **Incomplete Requests**: HTTP requests in flight may receive no response.
4. **Data Corruption**: Transactions may be left in an inconsistent state.
Kubernetes and PM2 send termination signals (SIGTERM, SIGINT) to processes before forcefully killing them. The application must handle these signals to shut down gracefully.
## Decision
We will implement a coordinated graceful shutdown pattern that:
1. **Stops Accepting New Work**: Closes HTTP server, pauses job queues.
2. **Completes In-Flight Work**: Waits for active requests and jobs to finish.
3. **Releases Resources**: Closes database pools, Redis connections, and queues.
4. **Logs Shutdown Progress**: Provides visibility into the shutdown process.
### Signal Handling
| Signal | Source | Behavior |
| ------- | ------------------ | --------------------------------------- |
| SIGTERM | Kubernetes, PM2 | Graceful shutdown with resource cleanup |
| SIGINT | Ctrl+C in terminal | Same as SIGTERM |
| SIGKILL | Force kill | Cannot be caught; immediate termination |
## Implementation Details
### Queue and Worker Shutdown
Located in `src/services/queueService.server.ts`:
```typescript
import { logger } from './logger.server';
export const gracefulShutdown = async (signal: string): Promise<void> => {
logger.info(`[Shutdown] Received ${signal}. Closing all queues and workers...`);
const resources = [
{ name: 'flyerQueue', close: () => flyerQueue.close() },
{ name: 'emailQueue', close: () => emailQueue.close() },
{ name: 'analyticsQueue', close: () => analyticsQueue.close() },
{ name: 'weeklyAnalyticsQueue', close: () => weeklyAnalyticsQueue.close() },
{ name: 'cleanupQueue', close: () => cleanupQueue.close() },
{ name: 'tokenCleanupQueue', close: () => tokenCleanupQueue.close() },
{ name: 'redisConnection', close: () => connection.quit() },
];
const results = await Promise.allSettled(
resources.map(async (resource) => {
try {
await resource.close();
logger.info(`[Shutdown] ${resource.name} closed successfully.`);
} catch (error) {
logger.error({ err: error }, `[Shutdown] Error closing ${resource.name}`);
throw error;
}
}),
);
const failures = results.filter((r) => r.status === 'rejected');
if (failures.length > 0) {
logger.error(`[Shutdown] ${failures.length} resources failed to close.`);
}
logger.info('[Shutdown] All resources closed. Process can now exit.');
};
// Register signal handlers
process.on('SIGTERM', () => gracefulShutdown('SIGTERM'));
process.on('SIGINT', () => gracefulShutdown('SIGINT'));
```
### HTTP Server Shutdown
Located in `server.ts`:
```typescript
import { gracefulShutdown as shutdownQueues } from './src/services/queueService.server';
import { closePool } from './src/services/db/connection.db';
const server = app.listen(PORT, () => {
logger.info(`Server listening on port ${PORT}`);
});
const gracefulShutdown = async (signal: string): Promise<void> => {
logger.info(`[Shutdown] Received ${signal}. Starting graceful shutdown...`);
// 1. Stop accepting new connections
server.close((err) => {
if (err) {
logger.error({ err }, '[Shutdown] Error closing HTTP server');
} else {
logger.info('[Shutdown] HTTP server closed.');
}
});
// 2. Wait for in-flight requests (with timeout)
await new Promise((resolve) => setTimeout(resolve, 5000));
// 3. Close queues and workers
await shutdownQueues(signal);
// 4. Close database pool
await closePool();
logger.info('[Shutdown] Database pool closed.');
// 5. Exit process
process.exit(0);
};
process.on('SIGTERM', () => gracefulShutdown('SIGTERM'));
process.on('SIGINT', () => gracefulShutdown('SIGINT'));
```
### Database Pool Shutdown
Located in `src/services/db/connection.db.ts`:
```typescript
let pool: Pool | null = null;
export function getPool(): Pool {
if (!pool) {
pool = new Pool({
max: 20,
idleTimeoutMillis: 30000,
connectionTimeoutMillis: 2000,
});
}
return pool;
}
export async function closePool(): Promise<void> {
if (pool) {
await pool.end();
pool = null;
logger.info('[Database] Connection pool closed.');
}
}
export function getPoolStatus(): { totalCount: number; idleCount: number; waitingCount: number } {
const p = getPool();
return {
totalCount: p.totalCount,
idleCount: p.idleCount,
waitingCount: p.waitingCount,
};
}
```
### PM2 Ecosystem Configuration
Located in `ecosystem.config.cjs`:
```javascript
module.exports = {
apps: [
{
name: 'flyer-crawler-api',
script: 'server.ts',
interpreter: 'tsx',
// Graceful shutdown settings
kill_timeout: 10000, // 10 seconds to cleanup before SIGKILL
wait_ready: true, // Wait for 'ready' signal before considering app started
listen_timeout: 10000, // Timeout for ready signal
// Cluster mode for zero-downtime reloads
instances: 1,
exec_mode: 'fork',
// Environment variables
env_production: {
NODE_ENV: 'production',
PORT: 3000,
},
env_test: {
NODE_ENV: 'test',
PORT: 3001,
},
},
],
};
```
### Worker Graceful Shutdown
BullMQ workers can be configured to wait for active jobs:
```typescript
import { Worker } from 'bullmq';
const worker = new Worker('flyerQueue', processor, {
connection,
// Graceful shutdown: wait for active jobs before closing
settings: {
lockDuration: 30000, // Time before job is considered stalled
stalledInterval: 5000, // Check for stalled jobs every 5s
},
});
// Workers auto-close when connection closes
worker.on('closing', () => {
logger.info('[Worker] flyerQueue worker is closing...');
});
worker.on('closed', () => {
logger.info('[Worker] flyerQueue worker closed.');
});
```
### Shutdown Sequence Diagram
```text
SIGTERM Received
┌──────────────────────┐
│ Stop HTTP Server │ ← No new connections accepted
│ (server.close()) │
└──────────────────────┘
┌──────────────────────┐
│ Wait for In-Flight │ ← 5-second grace period
│ Requests │
└──────────────────────┘
┌──────────────────────┐
│ Close BullMQ Queues │ ← Stop processing new jobs
│ and Workers │
└──────────────────────┘
┌──────────────────────┐
│ Close Redis │ ← Disconnect from Redis
│ Connection │
└──────────────────────┘
┌──────────────────────┐
│ Close Database Pool │ ← Release all DB connections
│ (pool.end()) │
└──────────────────────┘
┌──────────────────────┐
│ process.exit(0) │ ← Clean exit
└──────────────────────┘
```
## Consequences
### Positive
- **Zero Lost Work**: In-flight requests and jobs complete before shutdown.
- **Clean Resource Cleanup**: All connections are properly closed.
- **Zero-Downtime Deploys**: PM2 can reload without dropping requests.
- **Observability**: Shutdown progress is logged for debugging.
### Negative
- **Shutdown Delay**: Takes 5-15 seconds to fully shutdown.
- **Complexity**: Multiple shutdown handlers must be coordinated.
- **Edge Cases**: Very long-running jobs may be killed if they exceed the grace period.
## Key Files
- `server.ts` - HTTP server shutdown and signal handling
- `src/services/queueService.server.ts` - Queue shutdown (`gracefulShutdown`)
- `src/services/db/connection.db.ts` - Database pool shutdown (`closePool`)
- `ecosystem.config.cjs` - PM2 configuration with `kill_timeout`
## Related ADRs
- [ADR-006](./0006-background-job-processing-and-task-queues.md) - Background Job Processing
- [ADR-020](./0020-health-checks-and-liveness-readiness-probes.md) - Health Checks
- [ADR-014](./0014-containerization-and-deployment-strategy.md) - Containerization

View File

@@ -0,0 +1,278 @@
# ADR-039: Dependency Injection Pattern
**Date**: 2026-01-09
**Status**: Accepted
**Implemented**: 2026-01-09
## Context
As the application grows, tightly coupled components become difficult to test and maintain. Common issues include:
1. **Hard-to-Test Code**: Components that instantiate their own dependencies cannot be easily unit tested with mocks.
2. **Rigid Architecture**: Changing one implementation requires modifying all consumers.
3. **Hidden Dependencies**: It's unclear what a component needs to function.
4. **Circular Dependencies**: Tight coupling can lead to circular import issues.
Dependency Injection (DI) addresses these issues by inverting the control of dependency creation.
## Decision
We will adopt a constructor-based dependency injection pattern for all services and repositories. This approach:
1. **Explicit Dependencies**: All dependencies are declared in the constructor.
2. **Default Values**: Production dependencies have sensible defaults.
3. **Testability**: Test code can inject mocks without modifying source code.
4. **Loose Coupling**: Components depend on interfaces, not implementations.
### Design Principles
- **Constructor Injection**: Dependencies are passed through constructors, not looked up globally.
- **Default Production Dependencies**: Use default parameter values for production instances.
- **Interface Segregation**: Depend on the minimal interface needed (e.g., `Pick<Pool, 'query'>`).
- **Composition Root**: Wire dependencies at the application entry point.
## Implementation Details
### Repository Pattern with DI
Located in `src/services/db/flyer.db.ts`:
```typescript
import { Pool, PoolClient } from 'pg';
import { getPool } from './connection.db';
export class FlyerRepository {
// Accept any object with a 'query' method - Pool or PoolClient
private db: Pick<Pool | PoolClient, 'query'>;
constructor(db: Pick<Pool | PoolClient, 'query'> = getPool()) {
this.db = db;
}
async getFlyerById(flyerId: number, logger: Logger): Promise<Flyer> {
const result = await this.db.query<Flyer>('SELECT * FROM flyers WHERE flyer_id = $1', [
flyerId,
]);
if (result.rows.length === 0) {
throw new NotFoundError(`Flyer with ID ${flyerId} not found.`);
}
return result.rows[0];
}
async insertFlyer(flyer: FlyerDbInsert, logger: Logger): Promise<Flyer> {
// Implementation
}
}
```
**Usage in Production**:
```typescript
// Uses default pool
const flyerRepo = new FlyerRepository();
```
**Usage in Tests**:
```typescript
const mockDb = {
query: vi.fn().mockResolvedValue({ rows: [mockFlyer] }),
};
const flyerRepo = new FlyerRepository(mockDb);
```
**Usage in Transactions**:
```typescript
import { withTransaction } from './connection.db';
await withTransaction(async (client) => {
// Pass transactional client to repository
const flyerRepo = new FlyerRepository(client);
const flyer = await flyerRepo.insertFlyer(flyerData, logger);
// ... more operations in the same transaction
});
```
### Service Layer with DI
Located in `src/services/backgroundJobService.ts`:
```typescript
export class BackgroundJobService {
constructor(
private personalizationRepo: PersonalizationRepository,
private notificationRepo: NotificationRepository,
private emailQueue: Queue<EmailJobData>,
private logger: Logger,
) {}
async runDailyDealCheck(): Promise<void> {
this.logger.info('[BackgroundJob] Starting daily deal check...');
const deals = await this.personalizationRepo.getBestSalePricesForAllUsers(this.logger);
// ... process deals
}
}
// Composition root - wire production dependencies
import { personalizationRepo, notificationRepo } from './db/index.db';
import { logger } from './logger.server';
import { emailQueue } from './queueService.server';
export const backgroundJobService = new BackgroundJobService(
personalizationRepo,
notificationRepo,
emailQueue,
logger,
);
```
**Testing with Mocks**:
```typescript
describe('BackgroundJobService', () => {
it('should process deals for all users', async () => {
const mockPersonalizationRepo = {
getBestSalePricesForAllUsers: vi.fn().mockResolvedValue([mockDeal]),
};
const mockNotificationRepo = {
createBulkNotifications: vi.fn().mockResolvedValue([]),
};
const mockEmailQueue = {
add: vi.fn().mockResolvedValue({ id: 'job-1' }),
};
const mockLogger = {
info: vi.fn(),
error: vi.fn(),
};
const service = new BackgroundJobService(
mockPersonalizationRepo as any,
mockNotificationRepo as any,
mockEmailQueue as any,
mockLogger as any,
);
await service.runDailyDealCheck();
expect(mockPersonalizationRepo.getBestSalePricesForAllUsers).toHaveBeenCalled();
expect(mockEmailQueue.add).toHaveBeenCalled();
});
});
```
### Processing Service with DI
Located in `src/services/flyer/flyerProcessingService.ts`:
```typescript
export class FlyerProcessingService {
constructor(
private fileHandler: FlyerFileHandler,
private aiProcessor: FlyerAiProcessor,
private fsAdapter: FileSystemAdapter,
private cleanupQueue: Queue<CleanupJobData>,
private dataTransformer: FlyerDataTransformer,
private persistenceService: FlyerPersistenceService,
) {}
async processFlyer(filePath: string, logger: Logger): Promise<ProcessedFlyer> {
// Use injected dependencies
const fileInfo = await this.fileHandler.extractMetadata(filePath);
const aiResult = await this.aiProcessor.analyze(filePath, logger);
const transformed = this.dataTransformer.transform(aiResult);
const saved = await this.persistenceService.save(transformed, logger);
// Queue cleanup
await this.cleanupQueue.add('cleanup', { filePath });
return saved;
}
}
// Composition root
const flyerProcessingService = new FlyerProcessingService(
new FlyerFileHandler(fsAdapter, execAsync),
new FlyerAiProcessor(aiService, db.personalizationRepo),
fsAdapter,
cleanupQueue,
new FlyerDataTransformer(),
new FlyerPersistenceService(),
);
```
### Interface Segregation
Use the minimum interface required:
```typescript
// Bad - depends on full Pool
constructor(pool: Pool) {}
// Good - depends only on what's needed
constructor(db: Pick<Pool | PoolClient, 'query'>) {}
```
This allows injecting either a `Pool`, `PoolClient` (for transactions), or a mock object with just a `query` method.
### Composition Root Pattern
Wire all dependencies at application startup:
```typescript
// src/services/db/index.db.ts - Composition root for repositories
import { getPool } from './connection.db';
export const userRepo = new UserRepository(getPool());
export const flyerRepo = new FlyerRepository(getPool());
export const adminRepo = new AdminRepository(getPool());
export const personalizationRepo = new PersonalizationRepository(getPool());
export const notificationRepo = new NotificationRepository(getPool());
export const db = {
userRepo,
flyerRepo,
adminRepo,
personalizationRepo,
notificationRepo,
};
```
## Consequences
### Positive
- **Testability**: Unit tests can inject mocks without modifying production code.
- **Flexibility**: Swap implementations (e.g., different database adapters) easily.
- **Explicit Dependencies**: Clear contract of what a component needs.
- **Transaction Support**: Repositories can participate in transactions by accepting a client.
### Negative
- **More Boilerplate**: Constructors become longer with many dependencies.
- **Composition Complexity**: Must wire dependencies somewhere (composition root).
- **No Runtime Type Checking**: TypeScript types are erased at runtime.
### Mitigation
For complex services with many dependencies, consider:
1. **Factory Functions**: Encapsulate construction logic.
2. **Dependency Groups**: Pass related dependencies as a single object.
3. **DI Containers**: For very large applications, consider a DI library like `tsyringe` or `inversify`.
## Key Files
- `src/services/db/*.db.ts` - Repository classes with constructor DI
- `src/services/db/index.db.ts` - Composition root for repositories
- `src/services/backgroundJobService.ts` - Service class with constructor DI
- `src/services/flyer/flyerProcessingService.ts` - Complex service with multiple dependencies
## Related ADRs
- [ADR-002](./0002-standardized-transaction-management.md) - Transaction Management
- [ADR-034](./0034-repository-pattern-standards.md) - Repository Pattern Standards
- [ADR-035](./0035-service-layer-architecture.md) - Service Layer Architecture

View File

@@ -11,9 +11,9 @@ This directory contains a log of the architectural decisions made for the Flyer
## 2. Data Management ## 2. Data Management
**[ADR-009](./0009-caching-strategy-for-read-heavy-operations.md)**: Caching Strategy for Read-Heavy Operations (Partially Implemented) **[ADR-009](./0009-caching-strategy-for-read-heavy-operations.md)**: Caching Strategy for Read-Heavy Operations (Accepted)
**[ADR-013](./0013-database-schema-migration-strategy.md)**: Database Schema Migration Strategy (Proposed) **[ADR-013](./0013-database-schema-migration-strategy.md)**: Database Schema Migration Strategy (Proposed)
**[ADR-019](./0019-data-backup-and-recovery-strategy.md)**: Data Backup and Recovery Strategy (Proposed) **[ADR-019](./0019-data-backup-and-recovery-strategy.md)**: Data Backup and Recovery Strategy (Accepted)
**[ADR-023](./0023-database-schema-migration-strategy.md)**: Database Schema Migration Strategy (Proposed) **[ADR-023](./0023-database-schema-migration-strategy.md)**: Database Schema Migration Strategy (Proposed)
**[ADR-031](./0031-data-retention-and-privacy-compliance.md)**: Data Retention and Privacy Compliance (Proposed) **[ADR-031](./0031-data-retention-and-privacy-compliance.md)**: Data Retention and Privacy Compliance (Proposed)
@@ -31,6 +31,8 @@ This directory contains a log of the architectural decisions made for the Flyer
**[ADR-011](./0011-advanced-authorization-and-access-control-strategy.md)**: Advanced Authorization and Access Control Strategy (Proposed) **[ADR-011](./0011-advanced-authorization-and-access-control-strategy.md)**: Advanced Authorization and Access Control Strategy (Proposed)
**[ADR-016](./0016-api-security-hardening.md)**: API Security Hardening (Accepted) **[ADR-016](./0016-api-security-hardening.md)**: API Security Hardening (Accepted)
**[ADR-029](./0029-secret-rotation-and-key-management.md)**: Secret Rotation and Key Management Strategy (Proposed) **[ADR-029](./0029-secret-rotation-and-key-management.md)**: Secret Rotation and Key Management Strategy (Proposed)
**[ADR-032](./0032-rate-limiting-strategy.md)**: Rate Limiting Strategy (Accepted)
**[ADR-033](./0033-file-upload-and-storage-strategy.md)**: File Upload and Storage Strategy (Accepted)
## 5. Observability & Monitoring ## 5. Observability & Monitoring
@@ -39,10 +41,12 @@ This directory contains a log of the architectural decisions made for the Flyer
## 6. Deployment & Operations ## 6. Deployment & Operations
**[ADR-006](./0006-background-job-processing-and-task-queues.md)**: Background Job Processing and Task Queues (Partially Implemented) **[ADR-006](./0006-background-job-processing-and-task-queues.md)**: Background Job Processing and Task Queues (Accepted)
**[ADR-014](./0014-containerization-and-deployment-strategy.md)**: Containerization and Deployment Strategy (Proposed) **[ADR-014](./0014-containerization-and-deployment-strategy.md)**: Containerization and Deployment Strategy (Partially Implemented)
**[ADR-017](./0017-ci-cd-and-branching-strategy.md)**: CI/CD and Branching Strategy (Proposed) **[ADR-017](./0017-ci-cd-and-branching-strategy.md)**: CI/CD and Branching Strategy (Accepted)
**[ADR-024](./0024-feature-flagging-strategy.md)**: Feature Flagging Strategy (Proposed) **[ADR-024](./0024-feature-flagging-strategy.md)**: Feature Flagging Strategy (Proposed)
**[ADR-037](./0037-scheduled-jobs-and-cron-pattern.md)**: Scheduled Jobs and Cron Pattern (Accepted)
**[ADR-038](./0038-graceful-shutdown-pattern.md)**: Graceful Shutdown Pattern (Accepted)
## 7. Frontend / User Interface ## 7. Frontend / User Interface
@@ -56,3 +60,10 @@ This directory contains a log of the architectural decisions made for the Flyer
**[ADR-010](./0010-testing-strategy-and-standards.md)**: Testing Strategy and Standards (Accepted) **[ADR-010](./0010-testing-strategy-and-standards.md)**: Testing Strategy and Standards (Accepted)
**[ADR-021](./0021-code-formatting-and-linting-unification.md)**: Code Formatting and Linting Unification (Accepted) **[ADR-021](./0021-code-formatting-and-linting-unification.md)**: Code Formatting and Linting Unification (Accepted)
**[ADR-027](./0027-standardized-naming-convention-for-ai-and-database-types.md)**: Standardized Naming Convention for AI and Database Types (Accepted) **[ADR-027](./0027-standardized-naming-convention-for-ai-and-database-types.md)**: Standardized Naming Convention for AI and Database Types (Accepted)
## 9. Architecture Patterns
**[ADR-034](./0034-repository-pattern-standards.md)**: Repository Pattern Standards (Accepted)
**[ADR-035](./0035-service-layer-architecture.md)**: Service Layer Architecture (Accepted)
**[ADR-036](./0036-event-bus-and-pub-sub-pattern.md)**: Event Bus and Pub/Sub Pattern (Accepted)
**[ADR-039](./0039-dependency-injection-pattern.md)**: Dependency Injection Pattern (Accepted)

Binary file not shown.

After

Width:  |  Height:  |  Size: 160 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 189 KiB

150
scripts/docker-init.sh Normal file
View File

@@ -0,0 +1,150 @@
#!/bin/bash
# scripts/docker-init.sh
# ============================================================================
# CONTAINER INITIALIZATION SCRIPT
# ============================================================================
# Purpose:
# This script is run when the dev container is created for the first time.
# It handles all first-run setup tasks to ensure a fully working environment.
#
# Tasks performed:
# 1. Install npm dependencies (if not already done)
# 2. Wait for PostgreSQL to be ready
# 3. Wait for Redis to be ready
# 4. Initialize the database schema
# 5. Seed the database with development data
#
# Usage:
# This script is called automatically by devcontainer.json's postCreateCommand.
# It can also be run manually: ./scripts/docker-init.sh
# ============================================================================
set -e # Exit immediately on error
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
log_info() {
echo -e "${BLUE}[INFO]${NC} $1"
}
log_success() {
echo -e "${GREEN}[SUCCESS]${NC} $1"
}
log_warning() {
echo -e "${YELLOW}[WARNING]${NC} $1"
}
log_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
# ============================================================================
# 1. Install npm dependencies
# ============================================================================
log_info "Step 1/5: Installing npm dependencies..."
if [ -d "node_modules" ] && [ -f "node_modules/.package-lock.json" ]; then
log_info "node_modules exists, running npm install to sync..."
fi
npm install
log_success "npm dependencies installed."
# ============================================================================
# 2. Wait for PostgreSQL to be ready
# ============================================================================
log_info "Step 2/5: Waiting for PostgreSQL to be ready..."
POSTGRES_HOST="${DB_HOST:-postgres}"
POSTGRES_PORT="${DB_PORT:-5432}"
POSTGRES_USER="${DB_USER:-postgres}"
POSTGRES_DB="${DB_NAME:-flyer_crawler_dev}"
MAX_RETRIES=30
RETRY_COUNT=0
until PGPASSWORD="${DB_PASSWORD:-postgres}" psql -h "$POSTGRES_HOST" -p "$POSTGRES_PORT" -U "$POSTGRES_USER" -d "postgres" -c '\q' 2>/dev/null; do
RETRY_COUNT=$((RETRY_COUNT + 1))
if [ $RETRY_COUNT -ge $MAX_RETRIES ]; then
log_error "PostgreSQL did not become ready after $MAX_RETRIES attempts. Exiting."
exit 1
fi
log_warning "PostgreSQL is not ready yet (attempt $RETRY_COUNT/$MAX_RETRIES). Waiting 2 seconds..."
sleep 2
done
log_success "PostgreSQL is ready."
# ============================================================================
# 3. Wait for Redis to be ready
# ============================================================================
log_info "Step 3/5: Waiting for Redis to be ready..."
REDIS_HOST="${REDIS_HOST:-redis}"
REDIS_PORT="${REDIS_PORT:-6379}"
MAX_RETRIES=30
RETRY_COUNT=0
# Extract host from REDIS_URL if set
if [ -n "$REDIS_URL" ]; then
# Parse redis://host:port format
REDIS_HOST=$(echo "$REDIS_URL" | sed -E 's|redis://([^:]+):?.*|\1|')
fi
until redis-cli -h "$REDIS_HOST" -p "$REDIS_PORT" ping 2>/dev/null | grep -q PONG; do
RETRY_COUNT=$((RETRY_COUNT + 1))
if [ $RETRY_COUNT -ge $MAX_RETRIES ]; then
log_error "Redis did not become ready after $MAX_RETRIES attempts. Exiting."
exit 1
fi
log_warning "Redis is not ready yet (attempt $RETRY_COUNT/$MAX_RETRIES). Waiting 2 seconds..."
sleep 2
done
log_success "Redis is ready."
# ============================================================================
# 4. Check if database needs initialization
# ============================================================================
log_info "Step 4/5: Checking database state..."
# Check if the users table exists (indicator of initialized schema)
TABLE_EXISTS=$(PGPASSWORD="${DB_PASSWORD:-postgres}" psql -h "$POSTGRES_HOST" -p "$POSTGRES_PORT" -U "$POSTGRES_USER" -d "$POSTGRES_DB" -t -c "SELECT EXISTS (SELECT FROM information_schema.tables WHERE table_schema = 'public' AND table_name = 'users');" 2>/dev/null | tr -d '[:space:]' || echo "f")
if [ "$TABLE_EXISTS" = "t" ]; then
log_info "Database schema already exists. Skipping initialization."
log_info "To reset the database, run: npm run db:reset:dev"
else
log_info "Database schema not found. Initializing..."
# ============================================================================
# 5. Initialize and seed the database
# ============================================================================
log_info "Step 5/5: Running database initialization and seed..."
# The db:reset:dev script handles both schema creation and seeding
npm run db:reset:dev
log_success "Database initialized and seeded successfully."
fi
# ============================================================================
# Done!
# ============================================================================
echo ""
log_success "=========================================="
log_success "Container initialization complete!"
log_success "=========================================="
echo ""
log_info "Default test accounts:"
echo " Admin: admin@example.com / adminpass"
echo " User: user@example.com / userpass"
echo ""
log_info "To start the development server, run:"
echo " npm run dev:container"
echo ""

View File

@@ -1,13 +1,31 @@
// src/services/flyerPersistenceService.server.ts // src/services/flyerPersistenceService.server.ts
import type { Logger } from 'pino'; import type { Logger } from 'pino';
import { withTransaction } from './db/connection.db'; import type { PoolClient } from 'pg';
import { withTransaction as defaultWithTransaction } from './db/connection.db';
import { createFlyerAndItems } from './db/flyer.db'; import { createFlyerAndItems } from './db/flyer.db';
import { AdminRepository } from './db/admin.db'; import { AdminRepository } from './db/admin.db';
import { GamificationRepository } from './db/gamification.db'; import { GamificationRepository } from './db/gamification.db';
import { cacheService } from './cacheService.server'; import { cacheService } from './cacheService.server';
import type { FlyerInsert, FlyerItemInsert, Flyer } from '../types'; import type { FlyerInsert, FlyerItemInsert, Flyer } from '../types';
export type WithTransactionFn = <T>(callback: (client: PoolClient) => Promise<T>) => Promise<T>;
export class FlyerPersistenceService { export class FlyerPersistenceService {
private withTransaction: WithTransactionFn;
constructor(withTransactionFn: WithTransactionFn = defaultWithTransaction) {
this.withTransaction = withTransactionFn;
}
/**
* Allows replacing the withTransaction function at runtime.
* This is primarily used for testing to inject mock implementations.
* @internal
*/
_setWithTransaction(fn: WithTransactionFn): void {
this.withTransaction = fn;
}
/** /**
* Saves the flyer and its items to the database within a transaction. * Saves the flyer and its items to the database within a transaction.
* Also logs the activity and invalidates related cache entries. * Also logs the activity and invalidates related cache entries.
@@ -18,7 +36,7 @@ export class FlyerPersistenceService {
userId: string | undefined, userId: string | undefined,
logger: Logger, logger: Logger,
): Promise<Flyer> { ): Promise<Flyer> {
const flyer = await withTransaction(async (client) => { const flyer = await this.withTransaction(async (client) => {
const { flyer, items } = await createFlyerAndItems(flyerData, itemsForDb, logger, client); const { flyer, items } = await createFlyerAndItems(flyerData, itemsForDb, logger, client);
logger.info( logger.info(

View File

@@ -2,7 +2,7 @@
import { UnrecoverableError, type Job, type Queue } from 'bullmq'; import { UnrecoverableError, type Job, type Queue } from 'bullmq';
import path from 'path'; import path from 'path';
import type { Logger } from 'pino'; import type { Logger } from 'pino';
import type { FlyerFileHandler, IFileSystem, ICommandExecutor } from './flyerFileHandler.server'; import type { FlyerFileHandler, IFileSystem } from './flyerFileHandler.server';
import type { FlyerAiProcessor } from './flyerAiProcessor.server'; import type { FlyerAiProcessor } from './flyerAiProcessor.server';
import * as db from './db/index.db'; import * as db from './db/index.db';
import { FlyerDataTransformer } from './flyerDataTransformer'; import { FlyerDataTransformer } from './flyerDataTransformer';
@@ -11,7 +11,6 @@ import {
FlyerProcessingError, FlyerProcessingError,
PdfConversionError, PdfConversionError,
AiDataValidationError, AiDataValidationError,
UnsupportedFileTypeError,
} from './processingErrors'; } from './processingErrors';
import { NotFoundError } from './db/errors.db'; import { NotFoundError } from './db/errors.db';
import { logger as globalLogger } from './logger.server'; // This was a duplicate, fixed. import { logger as globalLogger } from './logger.server'; // This was a duplicate, fixed.
@@ -44,6 +43,14 @@ export class FlyerProcessingService {
private persistenceService: FlyerPersistenceService, private persistenceService: FlyerPersistenceService,
) {} ) {}
/**
* Provides access to the persistence service for testing purposes.
* @internal
*/
_getPersistenceService(): FlyerPersistenceService {
return this.persistenceService;
}
/** /**
* Orchestrates the processing of a flyer job. * Orchestrates the processing of a flyer job.
* @param job The BullMQ job containing flyer data. * @param job The BullMQ job containing flyer data.
@@ -55,9 +62,24 @@ export class FlyerProcessingService {
logger.info('Picked up flyer processing job.'); logger.info('Picked up flyer processing job.');
const stages: ProcessingStage[] = [ const stages: ProcessingStage[] = [
{ name: 'Preparing Inputs', status: 'pending', critical: true, detail: 'Validating and preparing file...' }, {
{ name: 'Image Optimization', status: 'pending', critical: true, detail: 'Compressing and resizing images...' }, name: 'Preparing Inputs',
{ name: 'Extracting Data with AI', status: 'pending', critical: true, detail: 'Communicating with AI model...' }, status: 'pending',
critical: true,
detail: 'Validating and preparing file...',
},
{
name: 'Image Optimization',
status: 'pending',
critical: true,
detail: 'Compressing and resizing images...',
},
{
name: 'Extracting Data with AI',
status: 'pending',
critical: true,
detail: 'Communicating with AI model...',
},
{ name: 'Transforming AI Data', status: 'pending', critical: true }, { name: 'Transforming AI Data', status: 'pending', critical: true },
{ name: 'Saving to Database', status: 'pending', critical: true }, { name: 'Saving to Database', status: 'pending', critical: true },
]; ];
@@ -69,7 +91,9 @@ export class FlyerProcessingService {
// Stage 1: Prepare Inputs (e.g., convert PDF to images) // Stage 1: Prepare Inputs (e.g., convert PDF to images)
stages[0].status = 'in-progress'; stages[0].status = 'in-progress';
await job.updateProgress({ stages }); await job.updateProgress({ stages });
console.error(`[WORKER DEBUG] ProcessingService: Calling fileHandler.prepareImageInputs for ${job.data.filePath}`); console.error(
`[WORKER DEBUG] ProcessingService: Calling fileHandler.prepareImageInputs for ${job.data.filePath}`,
);
const { imagePaths, createdImagePaths } = await this.fileHandler.prepareImageInputs( const { imagePaths, createdImagePaths } = await this.fileHandler.prepareImageInputs(
job.data.filePath, job.data.filePath,
@@ -77,7 +101,9 @@ export class FlyerProcessingService {
logger, logger,
); );
allFilePaths.push(...createdImagePaths); allFilePaths.push(...createdImagePaths);
console.error(`[WORKER DEBUG] ProcessingService: fileHandler returned ${imagePaths.length} images.`); console.error(
`[WORKER DEBUG] ProcessingService: fileHandler returned ${imagePaths.length} images.`,
);
stages[0].status = 'completed'; stages[0].status = 'completed';
stages[0].detail = `${imagePaths.length} page(s) ready for AI.`; stages[0].detail = `${imagePaths.length} page(s) ready for AI.`;
await job.updateProgress({ stages }); await job.updateProgress({ stages });
@@ -96,7 +122,9 @@ export class FlyerProcessingService {
console.error(`[WORKER DEBUG] ProcessingService: Calling aiProcessor.extractAndValidateData`); console.error(`[WORKER DEBUG] ProcessingService: Calling aiProcessor.extractAndValidateData`);
const aiResult = await this.aiProcessor.extractAndValidateData(imagePaths, job.data, logger); const aiResult = await this.aiProcessor.extractAndValidateData(imagePaths, job.data, logger);
console.error(`[WORKER DEBUG] ProcessingService: aiProcessor returned data for store: ${aiResult.data.store_name}`); console.error(
`[WORKER DEBUG] ProcessingService: aiProcessor returned data for store: ${aiResult.data.store_name}`,
);
stages[2].status = 'completed'; stages[2].status = 'completed';
await job.updateProgress({ stages }); await job.updateProgress({ stages });
@@ -109,7 +137,9 @@ export class FlyerProcessingService {
const primaryImagePath = imagePaths[0].path; const primaryImagePath = imagePaths[0].path;
const imageFileName = path.basename(primaryImagePath); const imageFileName = path.basename(primaryImagePath);
const iconsDir = path.join(path.dirname(primaryImagePath), 'icons'); const iconsDir = path.join(path.dirname(primaryImagePath), 'icons');
console.error(`[WORKER DEBUG] ProcessingService: Generating icon from ${primaryImagePath} to ${iconsDir}`); console.error(
`[WORKER DEBUG] ProcessingService: Generating icon from ${primaryImagePath} to ${iconsDir}`,
);
const iconFileName = await generateFlyerIcon(primaryImagePath, iconsDir, logger); const iconFileName = await generateFlyerIcon(primaryImagePath, iconsDir, logger);
console.error(`[WORKER DEBUG] ProcessingService: Icon generated: ${iconFileName}`); console.error(`[WORKER DEBUG] ProcessingService: Icon generated: ${iconFileName}`);
@@ -120,8 +150,16 @@ export class FlyerProcessingService {
// Ensure we have a valid base URL, preferring the one from the job data. // Ensure we have a valid base URL, preferring the one from the job data.
// This is critical for workers where process.env.FRONTEND_URL might be undefined. // This is critical for workers where process.env.FRONTEND_URL might be undefined.
const baseUrl = job.data.baseUrl || process.env.FRONTEND_URL || 'http://localhost:3000'; const baseUrl = job.data.baseUrl || process.env.FRONTEND_URL || 'http://localhost:3000';
console.error(`[DEBUG] FlyerProcessingService resolved baseUrl: "${baseUrl}" (job.data.baseUrl: "${job.data.baseUrl}", env.FRONTEND_URL: "${process.env.FRONTEND_URL}")`); console.error(
console.error('[DEBUG] FlyerProcessingService calling transformer with:', { originalFileName: job.data.originalFileName, imageFileName, iconFileName, checksum: job.data.checksum, baseUrl }); `[DEBUG] FlyerProcessingService resolved baseUrl: "${baseUrl}" (job.data.baseUrl: "${job.data.baseUrl}", env.FRONTEND_URL: "${process.env.FRONTEND_URL}")`,
);
console.error('[DEBUG] FlyerProcessingService calling transformer with:', {
originalFileName: job.data.originalFileName,
imageFileName,
iconFileName,
checksum: job.data.checksum,
baseUrl,
});
const { flyerData, itemsForDb } = await this.transformer.transform( const { flyerData, itemsForDb } = await this.transformer.transform(
aiResult, aiResult,
@@ -133,7 +171,10 @@ export class FlyerProcessingService {
logger, logger,
baseUrl, baseUrl,
); );
console.error('[DEBUG] FlyerProcessingService transformer output URLs:', { imageUrl: flyerData.image_url, iconUrl: flyerData.icon_url }); console.error('[DEBUG] FlyerProcessingService transformer output URLs:', {
imageUrl: flyerData.image_url,
iconUrl: flyerData.icon_url,
});
console.error('[DEBUG] Full Flyer Data to be saved:', JSON.stringify(flyerData, null, 2)); console.error('[DEBUG] Full Flyer Data to be saved:', JSON.stringify(flyerData, null, 2));
stages[3].status = 'completed'; stages[3].status = 'completed';
await job.updateProgress({ stages }); await job.updateProgress({ stages });
@@ -170,10 +211,15 @@ export class FlyerProcessingService {
return { flyerId }; return { flyerId };
} catch (error) { } catch (error) {
logger.warn('Job failed. Temporary files will NOT be cleaned up to allow for manual inspection.'); logger.warn(
'Job failed. Temporary files will NOT be cleaned up to allow for manual inspection.',
);
// Add detailed logging of the raw error object // Add detailed logging of the raw error object
if (error instanceof Error) { if (error instanceof Error) {
logger.error({ err: error, stack: error.stack }, 'Raw error object in processJob catch block'); logger.error(
{ err: error, stack: error.stack },
'Raw error object in processJob catch block',
);
} else { } else {
logger.error({ error }, 'Raw non-Error object in processJob catch block'); logger.error({ error }, 'Raw non-Error object in processJob catch block');
} }
@@ -190,7 +236,9 @@ export class FlyerProcessingService {
* @param job The BullMQ job containing cleanup data. * @param job The BullMQ job containing cleanup data.
* @returns An object indicating the status of the cleanup operation. * @returns An object indicating the status of the cleanup operation.
*/ */
async processCleanupJob(job: Job<CleanupJobData>): Promise<{ status: string; deletedCount?: number; reason?: string }> { async processCleanupJob(
job: Job<CleanupJobData>,
): Promise<{ status: string; deletedCount?: number; reason?: string }> {
const logger = globalLogger.child({ jobId: job.id, jobName: job.name, ...job.data }); const logger = globalLogger.child({ jobId: job.id, jobName: job.name, ...job.data });
logger.info('Picked up file cleanup job.'); logger.info('Picked up file cleanup job.');
@@ -199,19 +247,25 @@ export class FlyerProcessingService {
// If no paths are provided (e.g., from a manual trigger), attempt to derive them from the database. // If no paths are provided (e.g., from a manual trigger), attempt to derive them from the database.
if (!pathsToDelete || pathsToDelete.length === 0) { if (!pathsToDelete || pathsToDelete.length === 0) {
logger.warn(`Cleanup job for flyer ${flyerId} received no paths. Attempting to derive paths from DB.`); logger.warn(
`Cleanup job for flyer ${flyerId} received no paths. Attempting to derive paths from DB.`,
);
try { try {
const flyer = await db.flyerRepo.getFlyerById(flyerId); const flyer = await db.flyerRepo.getFlyerById(flyerId);
const derivedPaths: string[] = []; const derivedPaths: string[] = [];
// This path needs to be configurable and match where multer saves files. // This path needs to be configurable and match where multer saves files.
const storagePath = process.env.STORAGE_PATH || '/var/www/flyer-crawler.projectium.com/flyer-images'; const storagePath =
process.env.STORAGE_PATH || '/var/www/flyer-crawler.projectium.com/flyer-images';
if (flyer.image_url) { if (flyer.image_url) {
try { try {
const imageName = path.basename(new URL(flyer.image_url).pathname); const imageName = path.basename(new URL(flyer.image_url).pathname);
derivedPaths.push(path.join(storagePath, imageName)); derivedPaths.push(path.join(storagePath, imageName));
} catch (urlError) { } catch (urlError) {
logger.error({ err: urlError, url: flyer.image_url }, 'Failed to parse flyer.image_url to derive file path.'); logger.error(
{ err: urlError, url: flyer.image_url },
'Failed to parse flyer.image_url to derive file path.',
);
} }
} }
if (flyer.icon_url) { if (flyer.icon_url) {
@@ -219,13 +273,19 @@ export class FlyerProcessingService {
const iconName = path.basename(new URL(flyer.icon_url).pathname); const iconName = path.basename(new URL(flyer.icon_url).pathname);
derivedPaths.push(path.join(storagePath, 'icons', iconName)); derivedPaths.push(path.join(storagePath, 'icons', iconName));
} catch (urlError) { } catch (urlError) {
logger.error({ err: urlError, url: flyer.icon_url }, 'Failed to parse flyer.icon_url to derive file path.'); logger.error(
{ err: urlError, url: flyer.icon_url },
'Failed to parse flyer.icon_url to derive file path.',
);
} }
} }
pathsToDelete = derivedPaths; pathsToDelete = derivedPaths;
} catch (error) { } catch (error) {
if (error instanceof NotFoundError) { if (error instanceof NotFoundError) {
logger.error({ flyerId }, 'Cannot derive cleanup paths because flyer was not found in DB.'); logger.error(
{ flyerId },
'Cannot derive cleanup paths because flyer was not found in DB.',
);
// Do not throw. Allow the job to be marked as skipped if no paths are found. // Do not throw. Allow the job to be marked as skipped if no paths are found.
} else { } else {
throw error; // Re-throw other DB errors to allow for retries. throw error; // Re-throw other DB errors to allow for retries.
@@ -259,7 +319,9 @@ export class FlyerProcessingService {
const failedDeletions = results.filter((r) => r.status === 'rejected'); const failedDeletions = results.filter((r) => r.status === 'rejected');
if (failedDeletions.length > 0) { if (failedDeletions.length > 0) {
const failedPaths = pathsToDelete.filter((_, i) => results[i].status === 'rejected'); const failedPaths = pathsToDelete.filter((_, i) => results[i].status === 'rejected');
throw new Error(`Failed to delete ${failedDeletions.length} file(s): ${failedPaths.join(', ')}`); throw new Error(
`Failed to delete ${failedDeletions.length} file(s): ${failedPaths.join(', ')}`,
);
} }
logger.info(`Successfully deleted all ${pathsToDelete.length} temporary files.`); logger.info(`Successfully deleted all ${pathsToDelete.length} temporary files.`);
@@ -290,8 +352,13 @@ export class FlyerProcessingService {
['DATABASE_ERROR', 'Saving to Database'], ['DATABASE_ERROR', 'Saving to Database'],
]); ]);
const normalizedError = error instanceof Error ? error : new Error(String(error)); const normalizedError = error instanceof Error ? error : new Error(String(error));
let errorPayload: { errorCode: string; message: string; [key: string]: any }; let errorPayload: {
let stagesToReport: ProcessingStage[] = [...initialStages]; // Create a mutable copy errorCode: string;
message: string;
stages?: ProcessingStage[];
[key: string]: unknown;
};
const stagesToReport: ProcessingStage[] = [...initialStages]; // Create a mutable copy
if (normalizedError instanceof FlyerProcessingError) { if (normalizedError instanceof FlyerProcessingError) {
errorPayload = normalizedError.toErrorPayload(); errorPayload = normalizedError.toErrorPayload();
@@ -302,11 +369,13 @@ export class FlyerProcessingService {
// Determine which stage failed // Determine which stage failed
const failedStageName = errorCodeToStageMap.get(errorPayload.errorCode); const failedStageName = errorCodeToStageMap.get(errorPayload.errorCode);
let errorStageIndex = failedStageName ? stagesToReport.findIndex(s => s.name === failedStageName) : -1; let errorStageIndex = failedStageName
? stagesToReport.findIndex((s) => s.name === failedStageName)
: -1;
// 2. If not mapped, find the currently running stage // 2. If not mapped, find the currently running stage
if (errorStageIndex === -1) { if (errorStageIndex === -1) {
errorStageIndex = stagesToReport.findIndex(s => s.status === 'in-progress'); errorStageIndex = stagesToReport.findIndex((s) => s.status === 'in-progress');
} }
// 3. Fallback to the last stage // 3. Fallback to the last stage
@@ -324,10 +393,12 @@ export class FlyerProcessingService {
// Mark subsequent critical stages as skipped // Mark subsequent critical stages as skipped
for (let i = errorStageIndex + 1; i < stagesToReport.length; i++) { for (let i = errorStageIndex + 1; i < stagesToReport.length; i++) {
if (stagesToReport[i].critical) { if (stagesToReport[i].critical) {
// When a stage is skipped, we don't need its previous 'detail' property. // When a stage is skipped, we create a clean 'skipped' state object without the 'detail' property.
// This creates a clean 'skipped' state object by removing `detail` and keeping the rest. stagesToReport[i] = {
const { detail, ...restOfStage } = stagesToReport[i]; name: stagesToReport[i].name,
stagesToReport[i] = { ...restOfStage, status: 'skipped' }; status: 'skipped',
critical: stagesToReport[i].critical,
};
} }
} }
} }
@@ -337,7 +408,7 @@ export class FlyerProcessingService {
// Logging logic // Logging logic
if (normalizedError instanceof FlyerProcessingError) { if (normalizedError instanceof FlyerProcessingError) {
// Simplify log object creation // Simplify log object creation
const logDetails: Record<string, any> = { ...errorPayload, err: normalizedError }; const logDetails: Record<string, unknown> = { ...errorPayload, err: normalizedError };
if (normalizedError instanceof AiDataValidationError) { if (normalizedError instanceof AiDataValidationError) {
logDetails.validationErrors = normalizedError.validationErrors; logDetails.validationErrors = normalizedError.validationErrors;
@@ -349,12 +420,22 @@ export class FlyerProcessingService {
logger.error(logDetails, `A known processing error occurred: ${normalizedError.name}`); logger.error(logDetails, `A known processing error occurred: ${normalizedError.name}`);
} else { } else {
logger.error({ err: normalizedError, ...errorPayload }, `An unknown error occurred: ${errorPayload.message}`); logger.error(
{ err: normalizedError, ...errorPayload },
`An unknown error occurred: ${errorPayload.message}`,
);
} }
// Check for specific error messages that indicate a non-retriable failure, like quota exhaustion. // Check for specific error messages that indicate a non-retriable failure, like quota exhaustion.
if (errorPayload.message.toLowerCase().includes('quota') || errorPayload.message.toLowerCase().includes('resource_exhausted')) { if (
const unrecoverablePayload = { errorCode: 'QUOTA_EXCEEDED', message: 'An AI quota has been exceeded. Please try again later.', stages: errorPayload.stages }; errorPayload.message.toLowerCase().includes('quota') ||
errorPayload.message.toLowerCase().includes('resource_exhausted')
) {
const unrecoverablePayload = {
errorCode: 'QUOTA_EXCEEDED',
message: 'An AI quota has been exceeded. Please try again later.',
stages: errorPayload.stages,
};
await job.updateProgress(unrecoverablePayload); await job.updateProgress(unrecoverablePayload);
throw new UnrecoverableError(unrecoverablePayload.message); throw new UnrecoverableError(unrecoverablePayload.message);
} }

View File

@@ -1,5 +1,5 @@
// src/services/workers.server.ts // src/services/workers.server.ts
import { Worker, Job, UnrecoverableError } from 'bullmq'; import { Worker, Job } from 'bullmq';
import fsPromises from 'node:fs/promises'; import fsPromises from 'node:fs/promises';
import { exec } from 'child_process'; import { exec } from 'child_process';
import { promisify } from 'util'; import { promisify } from 'util';
@@ -15,6 +15,7 @@ import { FlyerProcessingService } from './flyerProcessingService.server';
import { FlyerAiProcessor } from './flyerAiProcessor.server'; import { FlyerAiProcessor } from './flyerAiProcessor.server';
import { FlyerDataTransformer } from './flyerDataTransformer'; import { FlyerDataTransformer } from './flyerDataTransformer';
import { FlyerPersistenceService } from './flyerPersistenceService.server'; import { FlyerPersistenceService } from './flyerPersistenceService.server';
import { withTransaction } from './db/connection.db';
import { import {
cleanupQueue, cleanupQueue,
flyerQueue, flyerQueue,
@@ -43,13 +44,13 @@ export const fsAdapter: IFileSystem = {
rename: (oldPath: string, newPath: string) => fsPromises.rename(oldPath, newPath), rename: (oldPath: string, newPath: string) => fsPromises.rename(oldPath, newPath),
}; };
const flyerProcessingService = new FlyerProcessingService( export const flyerProcessingService = new FlyerProcessingService(
new FlyerFileHandler(fsAdapter, execAsync), new FlyerFileHandler(fsAdapter, execAsync),
new FlyerAiProcessor(aiService, db.personalizationRepo), new FlyerAiProcessor(aiService, db.personalizationRepo),
fsAdapter, fsAdapter,
cleanupQueue, cleanupQueue,
new FlyerDataTransformer(), new FlyerDataTransformer(),
new FlyerPersistenceService(), new FlyerPersistenceService(withTransaction),
); );
const normalizeError = (error: unknown): Error => { const normalizeError = (error: unknown): Error => {
@@ -63,7 +64,7 @@ const normalizeError = (error: unknown): Error => {
* @param processor The core logic for the worker. * @param processor The core logic for the worker.
* @returns An async function that takes a job and executes the processor. * @returns An async function that takes a job and executes the processor.
*/ */
const createWorkerProcessor = <T>(processor: (job: Job<T>) => Promise<any>) => { const createWorkerProcessor = <T, R>(processor: (job: Job<T>) => Promise<R>) => {
return async (job: Job<T>) => { return async (job: Job<T>) => {
try { try {
return await processor(job); return await processor(job);
@@ -179,7 +180,10 @@ export const gracefulShutdown = async (signal: string) => {
let hasErrors = false; let hasErrors = false;
// Helper function to close a group of resources and log results // Helper function to close a group of resources and log results
const closeResources = async (resources: { name: string; close: () => Promise<any> }[], type: string) => { const closeResources = async (
resources: { name: string; close: () => Promise<void> }[],
type: string,
) => {
logger.info(`[Shutdown] Closing all ${type}...`); logger.info(`[Shutdown] Closing all ${type}...`);
const results = await Promise.allSettled(resources.map((r) => r.close())); const results = await Promise.allSettled(resources.map((r) => r.close()));
let groupHasErrors = false; let groupHasErrors = false;
@@ -230,7 +234,10 @@ export const gracefulShutdown = async (signal: string) => {
logger.info('[Shutdown] Redis connection closed successfully.'); logger.info('[Shutdown] Redis connection closed successfully.');
} catch (err) { } catch (err) {
hasErrors = true; hasErrors = true;
logger.error({ err, resource: 'redisConnection' }, `[Shutdown] Error closing Redis connection.`); logger.error(
{ err, resource: 'redisConnection' },
`[Shutdown] Error closing Redis connection.`,
);
} }
return hasErrors; return hasErrors;

View File

@@ -15,6 +15,13 @@ import piexif from 'piexifjs';
import exifParser from 'exif-parser'; import exifParser from 'exif-parser';
import sharp from 'sharp'; import sharp from 'sharp';
// NOTE: STORAGE_PATH is set via the CI environment (deploy-to-test.yml).
// This ensures multer and flyerProcessingService use the test runner's directory
// instead of the production path (/var/www/.../flyer-images).
// The testStoragePath variable is used for constructing paths in test assertions.
const testStoragePath =
process.env.STORAGE_PATH || path.resolve(__dirname, '../../../flyer-images');
// Mock the image processor to ensure safe filenames for DB constraints // Mock the image processor to ensure safe filenames for DB constraints
vi.mock('../../utils/imageProcessor', async () => { vi.mock('../../utils/imageProcessor', async () => {
const actual = await vi.importActual<typeof import('../../utils/imageProcessor')>( const actual = await vi.importActual<typeof import('../../utils/imageProcessor')>(
@@ -92,10 +99,9 @@ vi.mock('../../services/storage/storageService', () => {
// CRITICAL: These mock functions must be declared with vi.hoisted() to ensure they're available // CRITICAL: These mock functions must be declared with vi.hoisted() to ensure they're available
// at the module level BEFORE any imports are resolved. // at the module level BEFORE any imports are resolved.
const { mockExtractCoreData, mockWithTransaction } = vi.hoisted(() => { const { mockExtractCoreData } = vi.hoisted(() => {
return { return {
mockExtractCoreData: vi.fn(), mockExtractCoreData: vi.fn(),
mockWithTransaction: vi.fn(),
}; };
}); });
@@ -122,20 +128,9 @@ vi.mock('../../services/aiService.server', async (importOriginal) => {
}; };
}); });
// Mock the connection DB service to intercept withTransaction. // NOTE: We no longer mock connection.db at the module level because vi.mock() doesn't work
// This is crucial because FlyerPersistenceService imports directly from connection.db, // across module boundaries (the worker imports the real module before our mock is applied).
// so mocking index.db is insufficient. // Instead, we use dependency injection via FlyerPersistenceService._setWithTransaction().
// CRITICAL: We use the hoisted mockWithTransaction function so tests can manipulate the same
// function instance that workers are using.
vi.mock('../../services/db/connection.db', async (importOriginal) => {
const actual = await importOriginal<typeof import('../../services/db/connection.db')>();
// Initialize the hoisted mock to use the real implementation by default
mockWithTransaction.mockImplementation(actual.withTransaction);
return {
...actual,
withTransaction: mockWithTransaction,
};
});
describe('Flyer Processing Background Job Integration Test', () => { describe('Flyer Processing Background Job Integration Test', () => {
let request: ReturnType<typeof supertest>; let request: ReturnType<typeof supertest>;
@@ -152,6 +147,12 @@ describe('Flyer Processing Background Job Integration Test', () => {
// for the database, satisfying the 'url_check' constraint. // for the database, satisfying the 'url_check' constraint.
// IMPORTANT: This must run BEFORE the app is imported so workers inherit the env var. // IMPORTANT: This must run BEFORE the app is imported so workers inherit the env var.
vi.stubEnv('FRONTEND_URL', 'https://example.com'); vi.stubEnv('FRONTEND_URL', 'https://example.com');
// STORAGE_PATH is primarily set via CI environment (deploy-to-test.yml).
// This stubEnv call serves as a fallback for local development runs.
// It ensures multer and flyerProcessingService use the test directory, not production path.
vi.stubEnv('STORAGE_PATH', testStoragePath);
console.error('[TEST SETUP] STORAGE_PATH:', testStoragePath);
process.env.FRONTEND_URL = 'https://example.com'; process.env.FRONTEND_URL = 'https://example.com';
console.error('[TEST SETUP] FRONTEND_URL stubbed to:', process.env.FRONTEND_URL); console.error('[TEST SETUP] FRONTEND_URL stubbed to:', process.env.FRONTEND_URL);
@@ -190,15 +191,15 @@ describe('Flyer Processing Background Job Integration Test', () => {
], ],
}); });
// 2. Restore DB Service Mock to real implementation // 2. Restore withTransaction to real implementation via dependency injection
// This ensures that unless a test specifically mocks a failure, the DB logic works as expected. // This ensures that unless a test specifically injects a mock, the DB logic works as expected.
// CRITICAL: Use the hoisted mockWithTransaction directly so we're manipulating the same instance if (workersModule) {
// that the workers are using. const { withTransaction } = await import('../../services/db/connection.db');
const actualDb = await vi.importActual<typeof import('../../services/db/connection.db')>( workersModule.flyerProcessingService
'../../services/db/connection.db', ._getPersistenceService()
); ._setWithTransaction(withTransaction);
mockWithTransaction.mockReset(); console.error('[TEST SETUP] withTransaction restored to real implementation via DI');
mockWithTransaction.mockImplementation(actualDb.withTransaction); }
}); });
afterAll(async () => { afterAll(async () => {
@@ -263,7 +264,7 @@ describe('Flyer Processing Background Job Integration Test', () => {
console.error('[TEST DATA] Generated checksum for test:', checksum); console.error('[TEST DATA] Generated checksum for test:', checksum);
// Track created files for cleanup // Track created files for cleanup
const uploadDir = path.resolve(__dirname, '../../../flyer-images'); const uploadDir = testStoragePath;
createdFilePaths.push(path.join(uploadDir, uniqueFileName)); createdFilePaths.push(path.join(uploadDir, uniqueFileName));
console.error('[TEST] createdFilesPaths after 1st push: ', createdFilePaths); console.error('[TEST] createdFilesPaths after 1st push: ', createdFilePaths);
// The icon name is derived from the original filename. // The icon name is derived from the original filename.
@@ -393,7 +394,7 @@ describe('Flyer Processing Background Job Integration Test', () => {
const checksum = await generateFileChecksum(mockImageFile); const checksum = await generateFileChecksum(mockImageFile);
// Track original and derived files for cleanup // Track original and derived files for cleanup
const uploadDir = path.resolve(__dirname, '../../../flyer-images'); const uploadDir = testStoragePath;
createdFilePaths.push(path.join(uploadDir, uniqueFileName)); createdFilePaths.push(path.join(uploadDir, uniqueFileName));
const iconFileName = `icon-${path.parse(uniqueFileName).name}.webp`; const iconFileName = `icon-${path.parse(uniqueFileName).name}.webp`;
createdFilePaths.push(path.join(uploadDir, 'icons', iconFileName)); createdFilePaths.push(path.join(uploadDir, 'icons', iconFileName));
@@ -484,7 +485,7 @@ describe('Flyer Processing Background Job Integration Test', () => {
const checksum = await generateFileChecksum(mockImageFile); const checksum = await generateFileChecksum(mockImageFile);
// Track files for cleanup // Track files for cleanup
const uploadDir = path.resolve(__dirname, '../../../flyer-images'); const uploadDir = testStoragePath;
createdFilePaths.push(path.join(uploadDir, uniqueFileName)); createdFilePaths.push(path.join(uploadDir, uniqueFileName));
const iconFileName = `icon-${path.parse(uniqueFileName).name}.webp`; const iconFileName = `icon-${path.parse(uniqueFileName).name}.webp`;
createdFilePaths.push(path.join(uploadDir, 'icons', iconFileName)); createdFilePaths.push(path.join(uploadDir, 'icons', iconFileName));
@@ -559,7 +560,7 @@ describe('Flyer Processing Background Job Integration Test', () => {
const checksum = await generateFileChecksum(mockImageFile); const checksum = await generateFileChecksum(mockImageFile);
// Track created files for cleanup // Track created files for cleanup
const uploadDir = path.resolve(__dirname, '../../../flyer-images'); const uploadDir = testStoragePath;
createdFilePaths.push(path.join(uploadDir, uniqueFileName)); createdFilePaths.push(path.join(uploadDir, uniqueFileName));
// Act 1: Upload the file to start the background job. // Act 1: Upload the file to start the background job.
@@ -596,11 +597,14 @@ describe('Flyer Processing Background Job Integration Test', () => {
}, 240000); }, 240000);
it('should handle a database failure during flyer creation', async () => { it('should handle a database failure during flyer creation', async () => {
// Arrange: Mock the database transaction function to throw an error. // Arrange: Inject a failing withTransaction function via dependency injection.
// This is a more realistic simulation of a DB failure than mocking the inner createFlyerAndItems function. // This is the correct approach because vi.mock() doesn't work across module boundaries -
// CRITICAL: Use the hoisted mockWithTransaction directly - this is the same instance the workers use. // the worker imports the real module before our mock is applied.
const dbError = new Error('DB transaction failed'); const dbError = new Error('DB transaction failed');
mockWithTransaction.mockRejectedValue(dbError); const failingWithTransaction = vi.fn().mockRejectedValue(dbError);
workersModule.flyerProcessingService
._getPersistenceService()
._setWithTransaction(failingWithTransaction);
// Arrange: Prepare a unique flyer file for upload. // Arrange: Prepare a unique flyer file for upload.
const imagePath = path.resolve(__dirname, '../assets/test-flyer-image.jpg'); const imagePath = path.resolve(__dirname, '../assets/test-flyer-image.jpg');
@@ -613,7 +617,7 @@ describe('Flyer Processing Background Job Integration Test', () => {
const checksum = await generateFileChecksum(mockImageFile); const checksum = await generateFileChecksum(mockImageFile);
// Track created files for cleanup // Track created files for cleanup
const uploadDir = path.resolve(__dirname, '../../../flyer-images'); const uploadDir = testStoragePath;
createdFilePaths.push(path.join(uploadDir, uniqueFileName)); createdFilePaths.push(path.join(uploadDir, uniqueFileName));
// Act 1: Upload the file to start the background job. // Act 1: Upload the file to start the background job.
@@ -661,7 +665,7 @@ describe('Flyer Processing Background Job Integration Test', () => {
const checksum = await generateFileChecksum(mockImageFile); const checksum = await generateFileChecksum(mockImageFile);
// Track the path of the file that will be created in the uploads directory. // Track the path of the file that will be created in the uploads directory.
const uploadDir = path.resolve(__dirname, '../../../flyer-images'); const uploadDir = testStoragePath;
const tempFilePath = path.join(uploadDir, uniqueFileName); const tempFilePath = path.join(uploadDir, uniqueFileName);
createdFilePaths.push(tempFilePath); createdFilePaths.push(tempFilePath);