Compare commits
18 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4346332bbf | ||
| 61cfb518e6 | |||
|
|
e86ce51b6c | ||
| 840a7a62d3 | |||
| 5720820d95 | |||
|
|
e5cdb54308 | ||
| a3f212ff81 | |||
|
|
de263f74b0 | ||
| a71e41302b | |||
|
|
3575803252 | ||
| d03900cefe | |||
|
|
6d49639845 | ||
| d4543cf4b9 | |||
|
|
4f08238698 | ||
| 38b35f87aa | |||
|
|
dd067183ed | ||
| 9f3a070612 | |||
| 8a38befb1c |
@@ -114,7 +114,19 @@
|
||||
"Bash(git rm:*)",
|
||||
"Bash(git -C \"C:\\\\Users\\\\games3\\\\.claude\\\\plugins\\\\marketplaces\\\\claude-plugins-official\" log -1 --format=\"%H %ci %s\")",
|
||||
"Bash(git -C \"C:\\\\Users\\\\games3\\\\.claude\\\\plugins\\\\marketplaces\\\\claude-plugins-official\" config --get remote.origin.url)",
|
||||
"Bash(git -C \"C:\\\\Users\\\\games3\\\\.claude\\\\plugins\\\\marketplaces\\\\claude-plugins-official\" fetch --dry-run -v)"
|
||||
"Bash(git -C \"C:\\\\Users\\\\games3\\\\.claude\\\\plugins\\\\marketplaces\\\\claude-plugins-official\" fetch --dry-run -v)",
|
||||
"mcp__localerrors__get_project",
|
||||
"mcp__localerrors__get_issue",
|
||||
"mcp__localerrors__get_event",
|
||||
"mcp__localerrors__list_teams",
|
||||
"WebSearch",
|
||||
"Bash(for trigger in update_price_history_on_flyer_item_insert update_recipe_rating_aggregates log_new_recipe log_new_flyer)",
|
||||
"Bash(do echo \"=== $trigger ===\")"
|
||||
]
|
||||
}
|
||||
},
|
||||
"enabledMcpjsonServers": [
|
||||
"localerrors",
|
||||
"devdb",
|
||||
"gitea-projectium"
|
||||
]
|
||||
}
|
||||
|
||||
21
.env.example
21
.env.example
@@ -36,10 +36,10 @@ NODE_ENV=development
|
||||
FRONTEND_URL=http://localhost:3000
|
||||
|
||||
# Flyer Base URL - used for seed data and flyer image URLs
|
||||
# Dev container: http://127.0.0.1
|
||||
# Dev container: https://localhost (NOT 127.0.0.1 - avoids SSL mixed-origin issues)
|
||||
# Test: https://flyer-crawler-test.projectium.com
|
||||
# Production: https://flyer-crawler.projectium.com
|
||||
FLYER_BASE_URL=http://127.0.0.1
|
||||
FLYER_BASE_URL=https://localhost
|
||||
|
||||
# ===================
|
||||
# Authentication
|
||||
@@ -94,11 +94,18 @@ WORKER_LOCK_DURATION=120000
|
||||
# Error Tracking (ADR-015)
|
||||
# ===================
|
||||
# Sentry-compatible error tracking via Bugsink (self-hosted)
|
||||
# DSNs are created in Bugsink UI at http://localhost:8000 (dev) or your production URL
|
||||
# Backend DSN - for Express/Node.js errors
|
||||
SENTRY_DSN=
|
||||
# Frontend DSN - for React/browser errors (uses VITE_ prefix)
|
||||
VITE_SENTRY_DSN=
|
||||
# DSNs are created in Bugsink UI at https://localhost:8443 (dev) or your production URL
|
||||
#
|
||||
# Dev container projects:
|
||||
# - Project 1: Backend API (Dev) - receives Pino, PostgreSQL errors
|
||||
# - Project 2: Frontend (Dev) - receives browser errors via Sentry SDK
|
||||
# - Project 4: Infrastructure (Dev) - receives Redis, NGINX, Vite errors
|
||||
#
|
||||
# Backend DSN - for Express/Node.js errors (internal container URL)
|
||||
SENTRY_DSN=http://<key>@localhost:8000/1
|
||||
# Frontend DSN - for React/browser errors (uses nginx proxy for browser access)
|
||||
# Note: Browsers cannot reach localhost:8000 directly, so we use nginx proxy at /bugsink-api/
|
||||
VITE_SENTRY_DSN=https://<key>@localhost/bugsink-api/2
|
||||
# Environment name for error grouping (defaults to NODE_ENV)
|
||||
SENTRY_ENVIRONMENT=development
|
||||
VITE_SENTRY_ENVIRONMENT=development
|
||||
|
||||
@@ -15,6 +15,14 @@
|
||||
"@modelcontextprotocol/server-postgres",
|
||||
"postgresql://postgres:postgres@127.0.0.1:5432/flyer_crawler_dev"
|
||||
]
|
||||
},
|
||||
"gitea-projectium": {
|
||||
"command": "d:\\gitea-mcp\\gitea-mcp.exe",
|
||||
"args": ["run", "-t", "stdio"],
|
||||
"env": {
|
||||
"GITEA_HOST": "https://gitea.projectium.com",
|
||||
"GITEA_ACCESS_TOKEN": "b111259253aa3cadcb6a37618de03bf388f6235a"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
136
CLAUDE.md
136
CLAUDE.md
@@ -40,6 +40,7 @@ Ask before assuming. Never assume:
|
||||
1. **Memory**: `mcp__memory__read_graph` - Recall project context, credentials, known issues
|
||||
2. **Git**: `git log --oneline -10` - Recent changes
|
||||
3. **Containers**: `mcp__podman__container_list` - Running state
|
||||
4. **PM2 Status**: `podman exec flyer-crawler-dev pm2 status` - Process health (API, Worker, Vite)
|
||||
|
||||
---
|
||||
|
||||
@@ -47,12 +48,15 @@ Ask before assuming. Never assume:
|
||||
|
||||
### Essential Commands
|
||||
|
||||
| Command | Description |
|
||||
| ------------------------------------------------------------ | ----------------- |
|
||||
| `podman exec -it flyer-crawler-dev npm test` | Run all tests |
|
||||
| `podman exec -it flyer-crawler-dev npm run test:unit` | Unit tests only |
|
||||
| `podman exec -it flyer-crawler-dev npm run type-check` | TypeScript check |
|
||||
| `podman exec -it flyer-crawler-dev npm run test:integration` | Integration tests |
|
||||
| Command | Description |
|
||||
| ------------------------------------------------------------ | --------------------- |
|
||||
| `podman exec -it flyer-crawler-dev npm test` | Run all tests |
|
||||
| `podman exec -it flyer-crawler-dev npm run test:unit` | Unit tests only |
|
||||
| `podman exec -it flyer-crawler-dev npm run type-check` | TypeScript check |
|
||||
| `podman exec -it flyer-crawler-dev npm run test:integration` | Integration tests |
|
||||
| `podman exec -it flyer-crawler-dev pm2 status` | PM2 process status |
|
||||
| `podman exec -it flyer-crawler-dev pm2 logs` | View all PM2 logs |
|
||||
| `podman exec -it flyer-crawler-dev pm2 restart all` | Restart all processes |
|
||||
|
||||
### Key Patterns (with file locations)
|
||||
|
||||
@@ -65,14 +69,16 @@ Ask before assuming. Never assume:
|
||||
|
||||
### Key Files Quick Access
|
||||
|
||||
| Purpose | File |
|
||||
| ------------ | -------------------------------- |
|
||||
| Express app | `server.ts` |
|
||||
| Environment | `src/config/env.ts` |
|
||||
| Routes | `src/routes/*.routes.ts` |
|
||||
| Repositories | `src/services/db/*.db.ts` |
|
||||
| Workers | `src/services/workers.server.ts` |
|
||||
| Queues | `src/services/queues.server.ts` |
|
||||
| Purpose | File |
|
||||
| ----------------- | -------------------------------- |
|
||||
| Express app | `server.ts` |
|
||||
| Environment | `src/config/env.ts` |
|
||||
| Routes | `src/routes/*.routes.ts` |
|
||||
| Repositories | `src/services/db/*.db.ts` |
|
||||
| Workers | `src/services/workers.server.ts` |
|
||||
| Queues | `src/services/queues.server.ts` |
|
||||
| PM2 Config (Dev) | `ecosystem.dev.config.cjs` |
|
||||
| PM2 Config (Prod) | `ecosystem.config.cjs` |
|
||||
|
||||
---
|
||||
|
||||
@@ -96,6 +102,56 @@ Routes → Services → Repositories → Database
|
||||
|
||||
---
|
||||
|
||||
## Dev Container Architecture (ADR-014)
|
||||
|
||||
The dev container now matches production by using PM2 for process management.
|
||||
|
||||
### Process Management
|
||||
|
||||
| Component | Production | Dev Container |
|
||||
| ---------- | ---------------------- | ------------------------- |
|
||||
| API Server | PM2 cluster mode | PM2 fork mode + tsx watch |
|
||||
| Worker | PM2 process | PM2 process + tsx watch |
|
||||
| Frontend | Static files via NGINX | PM2 + Vite dev server |
|
||||
| Logs | PM2 logs -> Logstash | PM2 logs -> Logstash |
|
||||
|
||||
**PM2 Processes in Dev Container**:
|
||||
|
||||
- `flyer-crawler-api-dev` - API server (port 3001)
|
||||
- `flyer-crawler-worker-dev` - Background job worker
|
||||
- `flyer-crawler-vite-dev` - Vite frontend dev server (port 5173)
|
||||
|
||||
### Log Aggregation (ADR-050)
|
||||
|
||||
All logs flow to Bugsink via Logstash with 3-project routing:
|
||||
|
||||
| Source | Log Location | Bugsink Project |
|
||||
| ----------------- | --------------------------------- | ------------------ |
|
||||
| Backend (Pino) | `/var/log/pm2/api-*.log` | Backend API (1) |
|
||||
| Worker (Pino) | `/var/log/pm2/worker-*.log` | Backend API (1) |
|
||||
| PostgreSQL | `/var/log/postgresql/*.log` | Backend API (1) |
|
||||
| Vite | `/var/log/pm2/vite-*.log` | Infrastructure (4) |
|
||||
| Redis | `/var/log/redis/redis-server.log` | Infrastructure (4) |
|
||||
| NGINX | `/var/log/nginx/*.log` | Infrastructure (4) |
|
||||
| Frontend (Sentry) | Browser -> nginx proxy | Frontend (2) |
|
||||
|
||||
**Bugsink Projects (Dev Container)**:
|
||||
|
||||
- Project 1: Backend API (Dev) - Application errors
|
||||
- Project 2: Frontend (Dev) - Browser errors via nginx proxy
|
||||
- Project 4: Infrastructure (Dev) - Redis, NGINX, Vite errors
|
||||
|
||||
**Key Files**:
|
||||
|
||||
- `ecosystem.dev.config.cjs` - PM2 development configuration
|
||||
- `scripts/dev-entrypoint.sh` - Container startup script
|
||||
- `docker/logstash/bugsink.conf` - Logstash pipeline configuration
|
||||
- `docker/nginx/dev.conf` - NGINX config with Bugsink API proxy
|
||||
|
||||
**Full Dev Container Guide**: See [docs/development/DEV-CONTAINER.md](docs/development/DEV-CONTAINER.md)
|
||||
|
||||
---
|
||||
|
||||
## Common Workflows
|
||||
|
||||
### Adding a New API Endpoint
|
||||
@@ -166,6 +222,7 @@ Common issues with solutions:
|
||||
4. **Filename collisions** - Multer predictable names → Use `${Date.now()}-${Math.round(Math.random() * 1e9)}`
|
||||
5. **Response format mismatches** - API format changes → Log response bodies, update assertions
|
||||
6. **External service failures** - PM2/Redis unavailable → try/catch with graceful degradation
|
||||
7. **TZ environment variable breaks async hooks** - `TZ=America/Los_Angeles` causes `RangeError: Invalid triggerAsyncId value: NaN` → Tests now explicitly set `TZ=` (empty) in package.json scripts
|
||||
|
||||
**Full Details**: See test issues section at end of this document or [docs/development/TESTING.md](docs/development/TESTING.md)
|
||||
|
||||
@@ -251,18 +308,20 @@ Log aggregation: PostgreSQL + PM2 + Redis + NGINX → Bugsink (ADR-050)
|
||||
|
||||
## Documentation Quick Links
|
||||
|
||||
| Topic | Document |
|
||||
| ------------------- | ----------------------------------------------------- |
|
||||
| **Getting Started** | [QUICKSTART.md](docs/getting-started/QUICKSTART.md) |
|
||||
| **Architecture** | [OVERVIEW.md](docs/architecture/OVERVIEW.md) |
|
||||
| **Code Patterns** | [CODE-PATTERNS.md](docs/development/CODE-PATTERNS.md) |
|
||||
| **Testing** | [TESTING.md](docs/development/TESTING.md) |
|
||||
| **Debugging** | [DEBUGGING.md](docs/development/DEBUGGING.md) |
|
||||
| **Database** | [DATABASE.md](docs/architecture/DATABASE.md) |
|
||||
| **Deployment** | [DEPLOYMENT.md](docs/operations/DEPLOYMENT.md) |
|
||||
| **Monitoring** | [MONITORING.md](docs/operations/MONITORING.md) |
|
||||
| **ADRs** | [docs/adr/index.md](docs/adr/index.md) |
|
||||
| **All Docs** | [docs/README.md](docs/README.md) |
|
||||
| Topic | Document |
|
||||
| ------------------- | -------------------------------------------------------------- |
|
||||
| **Getting Started** | [QUICKSTART.md](docs/getting-started/QUICKSTART.md) |
|
||||
| **Dev Container** | [DEV-CONTAINER.md](docs/development/DEV-CONTAINER.md) |
|
||||
| **Architecture** | [OVERVIEW.md](docs/architecture/OVERVIEW.md) |
|
||||
| **Code Patterns** | [CODE-PATTERNS.md](docs/development/CODE-PATTERNS.md) |
|
||||
| **Testing** | [TESTING.md](docs/development/TESTING.md) |
|
||||
| **Debugging** | [DEBUGGING.md](docs/development/DEBUGGING.md) |
|
||||
| **Database** | [DATABASE.md](docs/architecture/DATABASE.md) |
|
||||
| **Deployment** | [DEPLOYMENT.md](docs/operations/DEPLOYMENT.md) |
|
||||
| **Monitoring** | [MONITORING.md](docs/operations/MONITORING.md) |
|
||||
| **Logstash** | [LOGSTASH-QUICK-REF.md](docs/operations/LOGSTASH-QUICK-REF.md) |
|
||||
| **ADRs** | [docs/adr/index.md](docs/adr/index.md) |
|
||||
| **All Docs** | [docs/README.md](docs/README.md) |
|
||||
|
||||
---
|
||||
|
||||
@@ -319,3 +378,28 @@ API formats change: `data.jobId` vs `data.job.id`, nested vs flat, string vs num
|
||||
PM2/Redis health checks fail when unavailable.
|
||||
|
||||
**Solution**: try/catch with graceful degradation or mock
|
||||
|
||||
### 7. TZ Environment Variable Breaking Async Hooks
|
||||
|
||||
**Problem**: When `TZ=America/Los_Angeles` (or other timezone values) is set in the environment, Node.js async_hooks module can produce `RangeError: Invalid triggerAsyncId value: NaN`. This breaks React Testing Library's `render()` function which uses async hooks internally.
|
||||
|
||||
**Root Cause**: Setting `TZ` to certain timezone values interferes with Node.js's internal async tracking mechanism, causing invalid async IDs to be generated.
|
||||
|
||||
**Symptoms**:
|
||||
|
||||
```text
|
||||
RangeError: Invalid triggerAsyncId value: NaN
|
||||
❯ process.env.NODE_ENV.queueSeveralMicrotasks node_modules/react/cjs/react.development.js:751:15
|
||||
❯ process.env.NODE_ENV.exports.act node_modules/react/cjs/react.development.js:886:11
|
||||
❯ node_modules/@testing-library/react/dist/act-compat.js:46:25
|
||||
❯ renderRoot node_modules/@testing-library/react/dist/pure.js:189:26
|
||||
```
|
||||
|
||||
**Solution**: Explicitly unset `TZ` in all test scripts by adding `TZ=` (empty value) to cross-env:
|
||||
|
||||
```json
|
||||
"test:unit": "cross-env NODE_ENV=test TZ= tsx ..."
|
||||
"test:integration": "cross-env NODE_ENV=test TZ= tsx ..."
|
||||
```
|
||||
|
||||
**Context**: This issue was introduced in commit `d03900c` which added `TZ: 'America/Los_Angeles'` to PM2 ecosystem configs for consistent log timestamps in production/dev environments. Tests must explicitly override this to prevent the async hooks error.
|
||||
|
||||
161
Dockerfile.dev
161
Dockerfile.dev
@@ -54,6 +54,13 @@ RUN apt-get update && apt-get install -y \
|
||||
RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
|
||||
&& apt-get install -y nodejs
|
||||
|
||||
# ============================================================================
|
||||
# Install PM2 Globally (ADR-014 Parity)
|
||||
# ============================================================================
|
||||
# Install PM2 to match production architecture. This allows dev container to
|
||||
# run the same process management as production (cluster mode, workers, etc.)
|
||||
RUN npm install -g pm2
|
||||
|
||||
# ============================================================================
|
||||
# Install mkcert and Generate Self-Signed Certificates
|
||||
# ============================================================================
|
||||
@@ -63,6 +70,27 @@ RUN wget -O /usr/local/bin/mkcert https://github.com/FiloSottile/mkcert/releases
|
||||
&& chmod +x /usr/local/bin/mkcert
|
||||
|
||||
# Create certificates directory and generate localhost certificates
|
||||
# ============================================================================
|
||||
# IMPORTANT: Certificate includes MULTIPLE hostnames (SANs)
|
||||
# ============================================================================
|
||||
# The certificate is generated for 'localhost', '127.0.0.1', AND '::1' because:
|
||||
#
|
||||
# 1. Users may access the site via https://localhost/ OR https://127.0.0.1/
|
||||
# 2. Database stores image URLs using one hostname (typically 127.0.0.1)
|
||||
# 3. The seed script uses https://127.0.0.1 for image URLs (database constraint)
|
||||
# 4. NGINX is configured to accept BOTH hostnames (see docker/nginx/dev.conf)
|
||||
#
|
||||
# Without all hostnames in the certificate's Subject Alternative Names (SANs),
|
||||
# browsers would show ERR_CERT_AUTHORITY_INVALID when loading images or other
|
||||
# resources that use a different hostname than the one in the address bar.
|
||||
#
|
||||
# The mkcert command below creates a certificate valid for all three:
|
||||
# - localhost (IPv4 hostname)
|
||||
# - 127.0.0.1 (IPv4 address)
|
||||
# - ::1 (IPv6 loopback)
|
||||
#
|
||||
# See also: docker/nginx/dev.conf, docs/FLYER-URL-CONFIGURATION.md
|
||||
# ============================================================================
|
||||
RUN mkdir -p /app/certs \
|
||||
&& cd /app/certs \
|
||||
&& mkcert -install \
|
||||
@@ -146,6 +174,21 @@ BUGSINK = {\n\
|
||||
}\n\
|
||||
\n\
|
||||
ALLOWED_HOSTS = deduce_allowed_hosts(BUGSINK["BASE_URL"])\n\
|
||||
# Also allow 127.0.0.1 access (both localhost and 127.0.0.1 should work)\n\
|
||||
if "127.0.0.1" not in ALLOWED_HOSTS:\n\
|
||||
ALLOWED_HOSTS.append("127.0.0.1")\n\
|
||||
if "localhost" not in ALLOWED_HOSTS:\n\
|
||||
ALLOWED_HOSTS.append("localhost")\n\
|
||||
\n\
|
||||
# CSRF Trusted Origins (Django 4.0+ requires full origin for HTTPS POST requests)\n\
|
||||
# This fixes "CSRF verification failed" errors when accessing Bugsink via HTTPS\n\
|
||||
# Both localhost and 127.0.0.1 must be trusted to support different access patterns\n\
|
||||
CSRF_TRUSTED_ORIGINS = [\n\
|
||||
"https://localhost:8443",\n\
|
||||
"https://127.0.0.1:8443",\n\
|
||||
"http://localhost:8000",\n\
|
||||
"http://127.0.0.1:8000",\n\
|
||||
]\n\
|
||||
\n\
|
||||
# Console email backend for dev\n\
|
||||
EMAIL_BACKEND = "bugsink.email_backends.QuietConsoleEmailBackend"\n\
|
||||
@@ -211,119 +254,25 @@ exec /opt/bugsink/bin/gunicorn \\\n\
|
||||
&& chmod +x /usr/local/bin/start-bugsink.sh
|
||||
|
||||
# ============================================================================
|
||||
# Create Logstash Pipeline Configuration
|
||||
# Copy Logstash Pipeline Configuration
|
||||
# ============================================================================
|
||||
# ADR-015: Pino and Redis logs → Bugsink
|
||||
# ADR-015 + ADR-050: Multi-source log aggregation to Bugsink
|
||||
# Configuration file includes:
|
||||
# - Pino application logs (Backend API errors)
|
||||
# - PostgreSQL logs (including fn_log() structured output)
|
||||
# - NGINX access and error logs
|
||||
# See docker/logstash/bugsink.conf for full configuration
|
||||
RUN mkdir -p /etc/logstash/conf.d /app/logs
|
||||
|
||||
RUN echo 'input {\n\
|
||||
# Pino application logs\n\
|
||||
file {\n\
|
||||
path => "/app/logs/*.log"\n\
|
||||
codec => json\n\
|
||||
type => "pino"\n\
|
||||
tags => ["app"]\n\
|
||||
start_position => "beginning"\n\
|
||||
sincedb_path => "/var/lib/logstash/sincedb_pino"\n\
|
||||
}\n\
|
||||
\n\
|
||||
# Redis logs\n\
|
||||
file {\n\
|
||||
path => "/var/log/redis/*.log"\n\
|
||||
type => "redis"\n\
|
||||
tags => ["redis"]\n\
|
||||
start_position => "beginning"\n\
|
||||
sincedb_path => "/var/lib/logstash/sincedb_redis"\n\
|
||||
}\n\
|
||||
\n\
|
||||
# PostgreSQL function logs (ADR-050)\n\
|
||||
file {\n\
|
||||
path => "/var/log/postgresql/*.log"\n\
|
||||
type => "postgres"\n\
|
||||
tags => ["postgres", "database"]\n\
|
||||
start_position => "beginning"\n\
|
||||
sincedb_path => "/var/lib/logstash/sincedb_postgres"\n\
|
||||
}\n\
|
||||
}\n\
|
||||
\n\
|
||||
filter {\n\
|
||||
# Pino error detection (level 50 = error, 60 = fatal)\n\
|
||||
if [type] == "pino" and [level] >= 50 {\n\
|
||||
mutate { add_tag => ["error"] }\n\
|
||||
}\n\
|
||||
\n\
|
||||
# Redis log parsing\n\
|
||||
if [type] == "redis" {\n\
|
||||
grok {\n\
|
||||
match => { "message" => "%%{POSINT:pid}:%%{WORD:role} %%{MONTHDAY} %%{MONTH} %%{TIME} %%{WORD:loglevel} %%{GREEDYDATA:redis_message}" }\n\
|
||||
}\n\
|
||||
\n\
|
||||
# Tag errors (WARNING/ERROR) for Bugsink forwarding\n\
|
||||
if [loglevel] in ["WARNING", "ERROR"] {\n\
|
||||
mutate { add_tag => ["error"] }\n\
|
||||
}\n\
|
||||
# Tag INFO-level operational events (startup, config, persistence)\n\
|
||||
else if [loglevel] == "INFO" {\n\
|
||||
mutate { add_tag => ["redis_operational"] }\n\
|
||||
}\n\
|
||||
}\n\
|
||||
\n\
|
||||
# PostgreSQL function log parsing (ADR-050)\n\
|
||||
if [type] == "postgres" {\n\
|
||||
# Extract timestamp and process ID from PostgreSQL log prefix\n\
|
||||
# Format: "2026-01-18 10:30:00 PST [12345] user@database "\n\
|
||||
grok {\n\
|
||||
match => { "message" => "%%{TIMESTAMP_ISO8601:pg_timestamp} \\\\[%%{POSINT:pg_pid}\\\\] %%{USERNAME:pg_user}@%%{WORD:pg_database} %%{GREEDYDATA:pg_message}" }\n\
|
||||
}\n\
|
||||
\n\
|
||||
# Check if this is a structured JSON log from fn_log()\n\
|
||||
# fn_log() emits JSON like: {"timestamp":"...","level":"WARNING","source":"postgresql","function":"award_achievement",...}\n\
|
||||
if [pg_message] =~ /^\\{.*"source":"postgresql".*\\}$/ {\n\
|
||||
json {\n\
|
||||
source => "pg_message"\n\
|
||||
target => "fn_log"\n\
|
||||
}\n\
|
||||
\n\
|
||||
# Mark as error if level is WARNING or ERROR\n\
|
||||
if [fn_log][level] in ["WARNING", "ERROR"] {\n\
|
||||
mutate { add_tag => ["error", "db_function"] }\n\
|
||||
}\n\
|
||||
}\n\
|
||||
\n\
|
||||
# Also catch native PostgreSQL errors\n\
|
||||
if [pg_message] =~ /^ERROR:/ or [pg_message] =~ /^FATAL:/ {\n\
|
||||
mutate { add_tag => ["error", "postgres_native"] }\n\
|
||||
}\n\
|
||||
}\n\
|
||||
}\n\
|
||||
\n\
|
||||
output {\n\
|
||||
# Forward errors to Bugsink\n\
|
||||
if "error" in [tags] {\n\
|
||||
http {\n\
|
||||
url => "http://localhost:8000/api/store/"\n\
|
||||
http_method => "post"\n\
|
||||
format => "json"\n\
|
||||
}\n\
|
||||
}\n\
|
||||
\n\
|
||||
# Store Redis operational logs (INFO level) to file\n\
|
||||
if "redis_operational" in [tags] {\n\
|
||||
file {\n\
|
||||
path => "/var/log/logstash/redis-operational-%%{+YYYY-MM-dd}.log"\n\
|
||||
codec => json_lines\n\
|
||||
}\n\
|
||||
}\n\
|
||||
\n\
|
||||
# Debug output (comment out in production)\n\
|
||||
stdout { codec => rubydebug }\n\
|
||||
}\n\
|
||||
' > /etc/logstash/conf.d/bugsink.conf
|
||||
COPY docker/logstash/bugsink.conf /etc/logstash/conf.d/bugsink.conf
|
||||
|
||||
# Create Logstash directories
|
||||
RUN mkdir -p /var/lib/logstash && chown -R logstash:logstash /var/lib/logstash
|
||||
RUN mkdir -p /var/log/logstash && chown -R logstash:logstash /var/log/logstash
|
||||
|
||||
# Create PM2 log directory (ADR-014 Parity)
|
||||
# Logs written here will be picked up by Logstash (ADR-050)
|
||||
RUN mkdir -p /var/log/pm2
|
||||
|
||||
# ============================================================================
|
||||
# Configure Nginx
|
||||
# ============================================================================
|
||||
|
||||
105
certs/README.md
Normal file
105
certs/README.md
Normal file
@@ -0,0 +1,105 @@
|
||||
# Development SSL Certificates
|
||||
|
||||
This directory contains SSL certificates for the development container HTTPS setup.
|
||||
|
||||
## Files
|
||||
|
||||
| File | Purpose | Generated By |
|
||||
| --------------- | ---------------------------------------------------- | -------------------------- |
|
||||
| `localhost.crt` | SSL certificate for localhost and 127.0.0.1 | mkcert (in Dockerfile.dev) |
|
||||
| `localhost.key` | Private key for localhost.crt | mkcert (in Dockerfile.dev) |
|
||||
| `mkcert-ca.crt` | Root CA certificate for trusting mkcert certificates | mkcert |
|
||||
|
||||
## Certificate Details
|
||||
|
||||
The `localhost.crt` certificate includes the following Subject Alternative Names (SANs):
|
||||
|
||||
- `DNS:localhost`
|
||||
- `IP Address:127.0.0.1`
|
||||
- `IP Address:::1` (IPv6 localhost)
|
||||
|
||||
This allows the development server to be accessed via both `https://localhost/` and `https://127.0.0.1/` without SSL errors.
|
||||
|
||||
## Installing the CA Certificate (Recommended)
|
||||
|
||||
To avoid SSL certificate warnings in your browser, install the mkcert CA certificate on your system.
|
||||
|
||||
### Windows
|
||||
|
||||
1. Double-click `mkcert-ca.crt`
|
||||
2. Click **"Install Certificate..."**
|
||||
3. Select **"Local Machine"** > Next
|
||||
4. Select **"Place all certificates in the following store"**
|
||||
5. Click **Browse** > Select **"Trusted Root Certification Authorities"** > OK
|
||||
6. Click **Next** > **Finish**
|
||||
7. Restart your browser
|
||||
|
||||
### macOS
|
||||
|
||||
```bash
|
||||
sudo security add-trusted-cert -d -r trustRoot -k /Library/Keychains/System.keychain certs/mkcert-ca.crt
|
||||
```
|
||||
|
||||
### Linux
|
||||
|
||||
```bash
|
||||
# Ubuntu/Debian
|
||||
sudo cp certs/mkcert-ca.crt /usr/local/share/ca-certificates/mkcert-ca.crt
|
||||
sudo update-ca-certificates
|
||||
|
||||
# Fedora/RHEL
|
||||
sudo cp certs/mkcert-ca.crt /etc/pki/ca-trust/source/anchors/
|
||||
sudo update-ca-trust
|
||||
```
|
||||
|
||||
### Firefox (All Platforms)
|
||||
|
||||
Firefox uses its own certificate store:
|
||||
|
||||
1. Open Firefox Settings
|
||||
2. Search for "Certificates"
|
||||
3. Click **"View Certificates"**
|
||||
4. Go to **"Authorities"** tab
|
||||
5. Click **"Import..."**
|
||||
6. Select `certs/mkcert-ca.crt`
|
||||
7. Check **"Trust this CA to identify websites"**
|
||||
8. Click **OK**
|
||||
|
||||
## After Installation
|
||||
|
||||
Once the CA certificate is installed:
|
||||
|
||||
- Your browser will trust all mkcert certificates without warnings
|
||||
- Access `https://localhost/` with no security warnings
|
||||
- Images from `https://127.0.0.1/flyer-images/` will load without SSL errors
|
||||
|
||||
## Regenerating Certificates
|
||||
|
||||
If you need to regenerate the certificates (e.g., after rebuilding the container):
|
||||
|
||||
```bash
|
||||
# Inside the container
|
||||
cd /app/certs
|
||||
mkcert localhost 127.0.0.1 ::1
|
||||
mv localhost+2.pem localhost.crt
|
||||
mv localhost+2-key.pem localhost.key
|
||||
nginx -s reload
|
||||
|
||||
# Copy the new CA to the host
|
||||
podman cp flyer-crawler-dev:/app/certs/mkcert-ca.crt ./certs/mkcert-ca.crt
|
||||
```
|
||||
|
||||
Then reinstall the CA certificate as described above.
|
||||
|
||||
## Security Note
|
||||
|
||||
**DO NOT** commit the private key (`localhost.key`) to version control in production projects. For this development-only project, the certificates are checked in for convenience since they're only used locally with self-signed certificates.
|
||||
|
||||
The certificates in this directory are automatically generated by the Dockerfile.dev and should not be used in production.
|
||||
|
||||
## See Also
|
||||
|
||||
- [Dockerfile.dev](../Dockerfile.dev) - Certificate generation (line ~69)
|
||||
- [docker/nginx/dev.conf](../docker/nginx/dev.conf) - NGINX SSL configuration
|
||||
- [docs/FLYER-URL-CONFIGURATION.md](../docs/FLYER-URL-CONFIGURATION.md) - URL configuration details
|
||||
- [docs/development/DEBUGGING.md](../docs/development/DEBUGGING.md) - SSL troubleshooting
|
||||
@@ -1,19 +1,25 @@
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIIDCTCCAfGgAwIBAgIUHhZUK1vmww2wCepWPuVcU6d27hMwDQYJKoZIhvcNAQEL
|
||||
BQAwFDESMBAGA1UEAwwJbG9jYWxob3N0MB4XDTI2MDExODAyMzM0NFoXDTI3MDEx
|
||||
ODAyMzM0NFowFDESMBAGA1UEAwwJbG9jYWxob3N0MIIBIjANBgkqhkiG9w0BAQEF
|
||||
AAOCAQ8AMIIBCgKCAQEAuUJGtSZzd+ZpLi+efjrkxJJNfVxVz2VLhknNM2WKeOYx
|
||||
JTK/VaTYq5hrczy6fEUnMhDAJCgEPUFlOK3vn1gFJKNMN8m7arkLVk6PYtrx8CTw
|
||||
w78Q06FLITr6hR0vlJNpN4MsmGxYwUoUpn1j5JdfZF7foxNAZRiwoopf7ZJxltDu
|
||||
PIuFjmVZqdzR8c6vmqIqdawx/V6sL9fizZr+CDH3oTsTUirn2qM+1ibBtPDiBvfX
|
||||
omUsr6MVOcTtvnMvAdy9NfV88qwF7MEWBGCjXkoT1bKCLD8hjn8l7GjRmPcmMFE2
|
||||
GqWEvfJiFkBK0CgSHYEUwzo0UtVNeQr0k0qkDRub6QIDAQABo1MwUTAdBgNVHQ4E
|
||||
FgQU5VeD67yFLV0QNYbHaJ6u9cM6UbkwHwYDVR0jBBgwFoAU5VeD67yFLV0QNYbH
|
||||
aJ6u9cM6UbkwDwYDVR0TAQH/BAUwAwEB/zANBgkqhkiG9w0BAQsFAAOCAQEABueA
|
||||
8ujAD+yjeP5dTgqQH1G0hlriD5LmlJYnktaLarFU+y+EZlRFwjdORF/vLPwSG+y7
|
||||
CLty/xlmKKQop70QzQ5jtJcsWzUjww8w1sO3AevfZlIF3HNhJmt51ihfvtJ7DVCv
|
||||
CNyMeYO0pBqRKwOuhbG3EtJgyV7MF8J25UEtO4t+GzX3jcKKU4pWP+kyLBVfeDU3
|
||||
MQuigd2LBwBQQFxZdpYpcXVKnAJJlHZIt68ycO1oSBEJO9fIF0CiAlC6ITxjtYtz
|
||||
oCjd6cCLKMJiC6Zg7t1Q17vGl+FdGyQObSsiYsYO9N3CVaeDdpyGCH0Rfa0+oZzu
|
||||
a5U9/l1FHlvpX980bw==
|
||||
MIIEJDCCAoygAwIBAgIQfbdj1KSREvW82wxWZcDRsDANBgkqhkiG9w0BAQsFADBf
|
||||
MR4wHAYDVQQKExVta2NlcnQgZGV2ZWxvcG1lbnQgQ0ExGjAYBgNVBAsMEXJvb3RA
|
||||
OTIxZjA1YmRkNDk4MSEwHwYDVQQDDBhta2NlcnQgcm9vdEA5MjFmMDViZGQ0OTgw
|
||||
HhcNMjYwMTIyMjAwMzIwWhcNMjgwNDIyMjAwMzIwWjBFMScwJQYDVQQKEx5ta2Nl
|
||||
cnQgZGV2ZWxvcG1lbnQgY2VydGlmaWNhdGUxGjAYBgNVBAsMEXJvb3RANzk2OWU1
|
||||
ZjA2MGM4MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA2u+HpnTr+Ecj
|
||||
X6Ur4I8YHVKiEIgOFozJWWAeCPSZg0lr9/z3UDl7QPfRKaDk11OMN7DB0Y/ZAIHj
|
||||
kMFdYc+ODOUAKGv/MvlM41OeRUXhrt0Gr/TJoDe9RLzs9ffASpqTHUNNrmyj/fLP
|
||||
M5VZU+4Y2POFq8qji6Otkvqr6wp+2CTS/fkAtenFS2X+Z5u6BBq1/KBSWZhUFSuJ
|
||||
sAGN9u+l20Cj/Sxp2nD1npvMEvPehFKEK2tZGgFr+X426jJ4Znd19EyZoI/xetG6
|
||||
ybSzBdQk1KbhWFa3LPuOG814m9Qh21vaL0Hj2hpqC3KEQ2jiuCovjRS+RBGatltl
|
||||
5m47Cj0UrQIDAQABo3YwdDAOBgNVHQ8BAf8EBAMCBaAwEwYDVR0lBAwwCgYIKwYB
|
||||
BQUHAwEwHwYDVR0jBBgwFoAUjf0NOmUuoqSSD1Mbu/3G+wYxvjEwLAYDVR0RBCUw
|
||||
I4IJbG9jYWxob3N0hwR/AAABhxAAAAAAAAAAAAAAAAAAAAABMA0GCSqGSIb3DQEB
|
||||
CwUAA4IBgQAorThUyu4FxHkOafMp/4CmvptfNvYeYP81DW0wpJxGL68Bz+idvXKv
|
||||
JopX/ZAr+dDhS/TLeSnzt83W7TaBBHY+usvsiBx9+pZOVZIpreXRamPu7utmuC46
|
||||
dictMNGlRNX9bwAApOJ24NCpOVSIIKtjHcjl4idwUHqLVGS+3wsmxIILYxigzkuT
|
||||
fcK5vs0ItZWeuunsBAGb/U/Iu9zZ71rtmBejxNPyEvd8+bZC0m2mtV8C0Lpn58jZ
|
||||
FiEf5OHiOdWG9O/uh3QeVWkuKLmaH6a8VdKRSIlOxEEZdkUYlwuVvzeWgFw4kjm8
|
||||
rNWz0aIPovmcgLXoUG1d1D8klveGd3plF7N2p3xWqKmS6R6FJHx7MIxH6XmBATii
|
||||
x/193Sgzqe8mwXQr14ulg2M/B3ZWleNdD6SeieADSgvRKjnlO7xwUmcFrffnEKEG
|
||||
WcSPoGIuZ8V2pkYLh7ipPN+tFUbhmrWnsao5kQ0sqLlfscyO9hYQeQRtTjtC3P8r
|
||||
FJYOdBMOCzE=
|
||||
-----END CERTIFICATE-----
|
||||
|
||||
@@ -1,28 +1,28 @@
|
||||
-----BEGIN PRIVATE KEY-----
|
||||
MIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQC5Qka1JnN35mku
|
||||
L55+OuTEkk19XFXPZUuGSc0zZYp45jElMr9VpNirmGtzPLp8RScyEMAkKAQ9QWU4
|
||||
re+fWAUko0w3ybtquQtWTo9i2vHwJPDDvxDToUshOvqFHS+Uk2k3gyyYbFjBShSm
|
||||
fWPkl19kXt+jE0BlGLCiil/tknGW0O48i4WOZVmp3NHxzq+aoip1rDH9Xqwv1+LN
|
||||
mv4IMfehOxNSKufaoz7WJsG08OIG99eiZSyvoxU5xO2+cy8B3L019XzyrAXswRYE
|
||||
YKNeShPVsoIsPyGOfyXsaNGY9yYwUTYapYS98mIWQErQKBIdgRTDOjRS1U15CvST
|
||||
SqQNG5vpAgMBAAECggEAAnv0Dw1Mv+rRy4ZyxtObEVPXPRzoxnDDXzHP4E16BTye
|
||||
Fc/4pSBUIAUn2bPvLz0/X8bMOa4dlDcIv7Eu9Pvns8AY70vMaUReA80fmtHVD2xX
|
||||
1PCT0X3InnxRAYKstSIUIGs+aHvV5Z+iJ8F82soOStN1MU56h+JLWElL5deCPHq3
|
||||
tLZT8wM9aOZlNG72kJ71+DlcViahynQj8+VrionOLNjTJ2Jv/ByjM3GMIuSdBrgd
|
||||
Sl4YAcdn6ontjJGoTgI+e+qkBAPwMZxHarNGQgbS0yNVIJe7Lq4zIKHErU/ZSmpD
|
||||
GzhdVNzhrjADNIDzS7G+pxtz+aUxGtmRvOyopy8GAQKBgQDEPp2mRM+uZVVT4e1j
|
||||
pkKO1c3O8j24I5mGKwFqhhNs3qGy051RXZa0+cQNx63GokXQan9DIXzc/Il7Y72E
|
||||
z9bCFbcSWnlP8dBIpWiJm+UmqLXRyY4N8ecNnzL5x+Tuxm5Ij+ixJwXgdz/TLNeO
|
||||
MBzu+Qy738/l/cAYxwcF7mR7AQKBgQDxq1F95HzCxBahRU9OGUO4s3naXqc8xKCC
|
||||
m3vbbI8V0Exse2cuiwtlPPQWzTPabLCJVvCGXNru98sdeOu9FO9yicwZX0knOABK
|
||||
QfPyDeITsh2u0C63+T9DNn6ixI/T68bTs7DHawEYbpS7bR50BnbHbQrrOAo6FSXF
|
||||
yC7+Te+o6QKBgQCXEWSmo/4D0Dn5Usg9l7VQ40GFd3EPmUgLwntal0/I1TFAyiom
|
||||
gpcLReIogXhCmpSHthO1h8fpDfZ/p+4ymRRHYBQH6uHMKugdpEdu9zVVpzYgArp5
|
||||
/afSEqVZJwoSzWoELdQA23toqiPV2oUtDdiYFdw5nDccY1RHPp8nb7amAQKBgQDj
|
||||
f4DhYDxKJMmg21xCiuoDb4DgHoaUYA0xpii8cL9pq4KmBK0nVWFO1kh5Robvsa2m
|
||||
PB+EfNjkaIPepLxWbOTUEAAASoDU2JT9UoTQcl1GaUAkFnpEWfBB14TyuNMkjinH
|
||||
lLpvn72SQFbm8VvfoU4jgfTrZP/LmajLPR1v6/IWMQKBgBh9qvOTax/GugBAWNj3
|
||||
ZvF99rHOx0rfotEdaPcRN66OOiSWILR9yfMsTvwt1V0VEj7OqO9juMRFuIyB57gd
|
||||
Hs/zgbkuggqjr1dW9r22P/UpzpodAEEN2d52RSX8nkMOkH61JXlH2MyRX65kdExA
|
||||
VkTDq6KwomuhrU3z0+r/MSOn
|
||||
MIIEvwIBADANBgkqhkiG9w0BAQEFAASCBKkwggSlAgEAAoIBAQDa74emdOv4RyNf
|
||||
pSvgjxgdUqIQiA4WjMlZYB4I9JmDSWv3/PdQOXtA99EpoOTXU4w3sMHRj9kAgeOQ
|
||||
wV1hz44M5QAoa/8y+UzjU55FReGu3Qav9MmgN71EvOz198BKmpMdQ02ubKP98s8z
|
||||
lVlT7hjY84WryqOLo62S+qvrCn7YJNL9+QC16cVLZf5nm7oEGrX8oFJZmFQVK4mw
|
||||
AY3276XbQKP9LGnacPWem8wS896EUoQra1kaAWv5fjbqMnhmd3X0TJmgj/F60brJ
|
||||
tLMF1CTUpuFYVrcs+44bzXib1CHbW9ovQePaGmoLcoRDaOK4Ki+NFL5EEZq2W2Xm
|
||||
bjsKPRStAgMBAAECggEBALFhpHwe+xh7OpPBlR0pkpYfXyMZuKBYjMIW9/61frM6
|
||||
B3oywIWFLPFkV1js/Lvg+xgb48zQSTb6BdBAelJHAYY8+7XEWk2IYt1D4FWr2r/8
|
||||
X/Cr2bgvsO9CSpK2mltXhZ4N66BIcU3NLkdS178Ch6svErwvP/ZhNL6Czktug3rG
|
||||
S2fxpKqoVQhqWiEBV0vBWpw2oskvcpP2Btev2eaJeDmP1IkCaKIU9jJzmSg8UKj3
|
||||
AxJJ8lJvlDi2Z8mfB0BVIFZSI1s44LqbuPdITsWvG49srdAhyLkifcbY2r7eH8+s
|
||||
rFNswCaqqNwmzZXHMFedfvgVJHtCTGX/U8G55dcG/YECgYEA6PvcX+axT4aSaG+/
|
||||
fQpyW8TmNmIwS/8HD8rA5dSBB2AE+RnqbxxB1SWWk4w47R/riYOJCy6C88XUZWLn
|
||||
05cYotR9lA6tZOYqIPUIxfCHl4vDDBnOGAB0ga3z5vF3X7HV6uspPFcrGVJ9k42S
|
||||
BK1gk7Kj8RxVQMqXSqkR/pXJ4l0CgYEA8JBkZ4MRi1D74SAU+TY1rnUCTs2EnP1U
|
||||
TKAI9qHyvNJOFZaObWPzegkZryZTm+yTblwvYkryMjvsiuCdv6ro/VKc667F3OBs
|
||||
dJ/8+ylO0lArP9a23QHViNSvQmyi3bsw2UpIGQRGq2C4LxDceo5cYoVOWIlLl8u3
|
||||
LUuL0IfxdpECgYEAyMd0DPljyGLyfSoAXaPJFajDtA4+DOAEl+lk/yt43oAzCPD6
|
||||
hTJW0XcJIrJuxHsDoohGa+pzU90iwxTPMBtAUeLJLfTQHOn1WF2SZ/J3B3ScbCs4
|
||||
3ppVzQO580YYV9GLxl1ONf/w1muuaKBSO9GmLuJ+QeTm22U7qE23gixXxMkCgYEA
|
||||
3R7cK4l+huBZpgUnQitiDInhJS4jx2nUItq3YnxZ8tYckBtjr4lAM9xJj4VbNOew
|
||||
XLC/nUnmdeY+9yif153xq2hUdQ6hMPXYuxqUHwlJOmgWWQez7lHRRYS51ASnb8iw
|
||||
jgqJWvVjQAQXSKvm/X/9y1FdQmRw54aJSUk3quZKPQECgYA5DqFXn/jYOUCGfCUD
|
||||
RENWe+3fNZSJCWmr4yvjEy9NUT5VcA+/hZHbbLUnVHfAHxJmdGhTzFe+ZGWFAGwi
|
||||
Wo62BDu1fqHHegCWaFFluDnz8iZTxXtHMEGSBZnXC4f5wylcxRtCPUbrLMRTPo4O
|
||||
t85qeBu1dMq1XQtU6ab3w3W0nw==
|
||||
-----END PRIVATE KEY-----
|
||||
|
||||
27
certs/mkcert-ca.crt
Normal file
27
certs/mkcert-ca.crt
Normal file
@@ -0,0 +1,27 @@
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIIEjjCCAvagAwIBAgIRAJtq2Z0+W981Ct2dMVPb3bQwDQYJKoZIhvcNAQELBQAw
|
||||
XzEeMBwGA1UEChMVbWtjZXJ0IGRldmVsb3BtZW50IENBMRowGAYDVQQLDBFyb290
|
||||
QDkyMWYwNWJkZDQ5ODEhMB8GA1UEAwwYbWtjZXJ0IHJvb3RAOTIxZjA1YmRkNDk4
|
||||
MB4XDTI2MDEyMjA5NDcxNVoXDTM2MDEyMjA5NDcxNVowXzEeMBwGA1UEChMVbWtj
|
||||
ZXJ0IGRldmVsb3BtZW50IENBMRowGAYDVQQLDBFyb290QDkyMWYwNWJkZDQ5ODEh
|
||||
MB8GA1UEAwwYbWtjZXJ0IHJvb3RAOTIxZjA1YmRkNDk4MIIBojANBgkqhkiG9w0B
|
||||
AQEFAAOCAY8AMIIBigKCAYEAxXR5gXDwv5cfQSud1eEhwDuaSaf5kf8NtPnucZXY
|
||||
AN+/QW1+OEKJFuawj/YrSbL/yIB8sUSJToEYNJ4LAgzIZ4+TPYVvOIPqQnimfj98
|
||||
2AKCt73U/AMvoEpts7U0s37f5wF8o+BHXfChxyN//z96+wsQ+2+Q9QBGjirvScF+
|
||||
8FRnupcygDeGZ8x3JQVaEfEV6iYyXFl/4tEDVr9QX4avyUlf0vp1Y90TG3L42JYQ
|
||||
xDU37Ct9dqsxPCPOPjmkQi9HV5TeqLTs/4NdrEYOSk7bOVMzL8EHs2prRL7sWzYJ
|
||||
gRT+VXFPpoSCkZs1gS3FNXukTGx5LNsstyJZRa99cGgDcqvNseig06KUzZrRnCig
|
||||
kARLF/n8VTpHETEuTdxdnXJO3i2N/99mG/2/lej9HNDMaqg45ur5EhaFhHarXMtc
|
||||
wy7nfGoThzscZvpFbVHorhgRxzsUqRHTMHa9mUOYtShMA0IwFccHcczY3CDxXLg9
|
||||
hC+24pdiCxtRmi23JB10Th+nAgMBAAGjRTBDMA4GA1UdDwEB/wQEAwICBDASBgNV
|
||||
HRMBAf8ECDAGAQH/AgEAMB0GA1UdDgQWBBSN/Q06ZS6ipJIPUxu7/cb7BjG+MTAN
|
||||
BgkqhkiG9w0BAQsFAAOCAYEAghQL80XpPs70EbCmETYri5ryjJPbsxK3Z3QgBYoy
|
||||
/P/021eK5woLx4EDeEUyrRayOtLlDWiIMdV7FmlY4GpEdonVbqFJY2gghkIuQpkw
|
||||
lWqbk08F+iXe8PSGD1qz4y6enioCRAx+RaZ6jnVJtaC8AR257FVhGIzDBiOA+LDM
|
||||
L1yq6Bxxij17q9s5HL9KuzxWRMuXACHmaGBXHpl/1n4dIxi2lXRgp+1xCR/VPNPt
|
||||
/ZRy29kncd8Fxx+VEtc0muoJvRo4ttVWhBvAVJrkAeukjYKpcDDfRU6Y22o54jD/
|
||||
mseDb+0UPwSxaKbnGJlCcRbbh6i14bA4KfPq93bZX+Tlq9VCp8LvQSl3oU+23RVc
|
||||
KjBB9EJnoBBNGIY7VmRI+QowrnlP2wtg2fTNaPqULtjqA9frbMTP0xTlumLzGB+6
|
||||
9Da7/+AE2in3Aa8Xyry4BiXbk2L6c1xz/Cd1ZpFrSXAOEi1Xt7so/Ck0yM3c2KWK
|
||||
5aSfCjjOzONMPJyY1oxodJ1p
|
||||
-----END CERTIFICATE-----
|
||||
@@ -46,6 +46,10 @@ services:
|
||||
- node_modules_data:/app/node_modules
|
||||
# Mount PostgreSQL logs for Logstash access (ADR-050)
|
||||
- postgres_logs:/var/log/postgresql:ro
|
||||
# Mount Redis logs for Logstash access (ADR-050)
|
||||
- redis_logs:/var/log/redis:ro
|
||||
# Mount PM2 logs for Logstash access (ADR-050, ADR-014)
|
||||
- pm2_logs:/var/log/pm2
|
||||
ports:
|
||||
- '80:80' # HTTP redirect to HTTPS (matches production)
|
||||
- '443:443' # Frontend HTTPS (nginx proxies Vite 5173 → 443)
|
||||
@@ -53,6 +57,8 @@ services:
|
||||
- '8000:8000' # Bugsink error tracking HTTP (ADR-015)
|
||||
- '8443:8443' # Bugsink error tracking HTTPS (ADR-015)
|
||||
environment:
|
||||
# Timezone: PST (America/Los_Angeles) for consistent log timestamps
|
||||
- TZ=America/Los_Angeles
|
||||
# Core settings
|
||||
- NODE_ENV=development
|
||||
# Database - use service name for Docker networking
|
||||
@@ -118,6 +124,10 @@ services:
|
||||
ports:
|
||||
- '5432:5432'
|
||||
environment:
|
||||
# Timezone: PST (America/Los_Angeles) for consistent log timestamps
|
||||
TZ: America/Los_Angeles
|
||||
# PostgreSQL timezone setting (used by log_timezone and timezone parameters)
|
||||
PGTZ: America/Los_Angeles
|
||||
POSTGRES_USER: postgres
|
||||
POSTGRES_PASSWORD: postgres
|
||||
POSTGRES_DB: flyer_crawler_dev
|
||||
@@ -138,6 +148,8 @@ services:
|
||||
postgres
|
||||
-c config_file=/var/lib/postgresql/data/postgresql.conf
|
||||
-c hba_file=/var/lib/postgresql/data/pg_hba.conf
|
||||
-c timezone=America/Los_Angeles
|
||||
-c log_timezone=America/Los_Angeles
|
||||
-c log_min_messages=notice
|
||||
-c client_min_messages=notice
|
||||
-c logging_collector=on
|
||||
@@ -166,10 +178,20 @@ services:
|
||||
redis:
|
||||
image: docker.io/library/redis:alpine
|
||||
container_name: flyer-crawler-redis
|
||||
# Run as root to allow entrypoint to set up log directory permissions
|
||||
# Redis will drop privileges after setup via gosu in entrypoint
|
||||
user: root
|
||||
ports:
|
||||
- '6379:6379'
|
||||
environment:
|
||||
# Timezone: PST (America/Los_Angeles) for consistent log timestamps
|
||||
TZ: America/Los_Angeles
|
||||
volumes:
|
||||
- redis_data:/data
|
||||
# Create log volume for Logstash access (ADR-050)
|
||||
- redis_logs:/var/log/redis
|
||||
# Mount custom entrypoint for log directory setup (ADR-050)
|
||||
- ./docker/redis/docker-entrypoint.sh:/usr/local/bin/docker-entrypoint.sh:ro
|
||||
# Healthcheck ensures redis is ready before app starts
|
||||
healthcheck:
|
||||
test: ['CMD', 'redis-cli', 'ping']
|
||||
@@ -177,8 +199,17 @@ services:
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
start_period: 5s
|
||||
# Enable persistence for development data
|
||||
command: redis-server --appendonly yes
|
||||
# Enable persistence and file logging for Logstash (ADR-050)
|
||||
# Redis log levels: debug, verbose, notice (default), warning
|
||||
# Custom entrypoint sets up log directory with correct permissions
|
||||
entrypoint: ['/usr/local/bin/docker-entrypoint.sh']
|
||||
command:
|
||||
- --appendonly
|
||||
- 'yes'
|
||||
- --logfile
|
||||
- /var/log/redis/redis-server.log
|
||||
- --loglevel
|
||||
- notice
|
||||
|
||||
# ===================
|
||||
# Named Volumes
|
||||
@@ -190,6 +221,10 @@ volumes:
|
||||
name: flyer-crawler-postgres-logs
|
||||
redis_data:
|
||||
name: flyer-crawler-redis-data
|
||||
redis_logs:
|
||||
name: flyer-crawler-redis-logs
|
||||
pm2_logs:
|
||||
name: flyer-crawler-pm2-logs
|
||||
node_modules_data:
|
||||
name: flyer-crawler-node-modules
|
||||
|
||||
|
||||
574
docker/logstash/bugsink.conf
Normal file
574
docker/logstash/bugsink.conf
Normal file
@@ -0,0 +1,574 @@
|
||||
# docker/logstash/bugsink.conf
|
||||
# ============================================================================
|
||||
# Logstash Pipeline Configuration for Bugsink Error Tracking
|
||||
# ============================================================================
|
||||
# This configuration aggregates logs from multiple sources and forwards errors
|
||||
# to Bugsink (Sentry-compatible error tracking) in the dev container.
|
||||
#
|
||||
# Sources:
|
||||
# - PM2 managed logs (/var/log/pm2/*.log) - API, Worker, Vite (ADR-014)
|
||||
# - Pino application logs (/app/logs/*.log) - JSON format (fallback)
|
||||
# - PostgreSQL logs (/var/log/postgresql/*.log) - Including fn_log() output
|
||||
# - NGINX logs (/var/log/nginx/*.log) - Access and error logs
|
||||
# - Redis logs (/var/log/redis/*.log) - Via shared volume (ADR-050)
|
||||
#
|
||||
# Bugsink Projects (3-project architecture):
|
||||
# - Project 1: Backend API (Dev) - Pino/PM2 app errors, PostgreSQL errors
|
||||
# DSN Key: cea01396c56246adb5878fa5ee6b1d22
|
||||
# - Project 2: Frontend (Dev) - Configured via Sentry SDK in browser
|
||||
# DSN Key: d92663cb73cf4145b677b84029e4b762
|
||||
# - Project 4: Infrastructure (Dev) - Redis, NGINX, PM2 operational logs
|
||||
# DSN Key: 14e8791da3d347fa98073261b596cab9
|
||||
#
|
||||
# Routing Logic:
|
||||
# - Backend logs (type: pm2_api, pm2_worker, pino, postgres) -> Project 1
|
||||
# - Infrastructure logs (type: redis, nginx_error, nginx_5xx) -> Project 4
|
||||
# - Vite errors (type: pm2_vite with errors) -> Project 4 (build tooling)
|
||||
#
|
||||
# Related Documentation:
|
||||
# - docs/adr/0050-postgresql-function-observability.md
|
||||
# - docs/adr/0015-application-performance-monitoring-and-error-tracking.md
|
||||
# - docs/operations/LOGSTASH-QUICK-REF.md
|
||||
# ============================================================================
|
||||
|
||||
input {
|
||||
# ============================================================================
|
||||
# PM2 Managed Process Logs (ADR-014, ADR-050)
|
||||
# ============================================================================
|
||||
# PM2 manages all Node.js processes in the dev container, matching production.
|
||||
# Logs are written to /var/log/pm2 for Logstash integration.
|
||||
#
|
||||
# Process logs:
|
||||
# - api-out.log / api-error.log: API server (Pino JSON format)
|
||||
# - worker-out.log / worker-error.log: Background worker (Pino JSON format)
|
||||
# - vite-out.log / vite-error.log: Vite dev server (plain text)
|
||||
# ============================================================================
|
||||
|
||||
# PM2 API Server Logs (Pino JSON format)
|
||||
file {
|
||||
path => "/var/log/pm2/api-out.log"
|
||||
codec => json_lines
|
||||
type => "pm2_api"
|
||||
tags => ["app", "backend", "pm2", "api"]
|
||||
start_position => "beginning"
|
||||
sincedb_path => "/var/lib/logstash/sincedb_pm2_api_out"
|
||||
}
|
||||
|
||||
file {
|
||||
path => "/var/log/pm2/api-error.log"
|
||||
codec => json_lines
|
||||
type => "pm2_api"
|
||||
tags => ["app", "backend", "pm2", "api", "stderr"]
|
||||
start_position => "beginning"
|
||||
sincedb_path => "/var/lib/logstash/sincedb_pm2_api_error"
|
||||
}
|
||||
|
||||
# PM2 Worker Logs (Pino JSON format)
|
||||
file {
|
||||
path => "/var/log/pm2/worker-out.log"
|
||||
codec => json_lines
|
||||
type => "pm2_worker"
|
||||
tags => ["app", "worker", "pm2"]
|
||||
start_position => "beginning"
|
||||
sincedb_path => "/var/lib/logstash/sincedb_pm2_worker_out"
|
||||
}
|
||||
|
||||
file {
|
||||
path => "/var/log/pm2/worker-error.log"
|
||||
codec => json_lines
|
||||
type => "pm2_worker"
|
||||
tags => ["app", "worker", "pm2", "stderr"]
|
||||
start_position => "beginning"
|
||||
sincedb_path => "/var/lib/logstash/sincedb_pm2_worker_error"
|
||||
}
|
||||
|
||||
# PM2 Vite Logs (plain text format)
|
||||
file {
|
||||
path => "/var/log/pm2/vite-out.log"
|
||||
codec => plain
|
||||
type => "pm2_vite"
|
||||
tags => ["app", "frontend", "pm2", "vite"]
|
||||
start_position => "beginning"
|
||||
sincedb_path => "/var/lib/logstash/sincedb_pm2_vite_out"
|
||||
}
|
||||
|
||||
file {
|
||||
path => "/var/log/pm2/vite-error.log"
|
||||
codec => plain
|
||||
type => "pm2_vite"
|
||||
tags => ["app", "frontend", "pm2", "vite", "stderr"]
|
||||
start_position => "beginning"
|
||||
sincedb_path => "/var/lib/logstash/sincedb_pm2_vite_error"
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Pino Application Logs (Fallback Path)
|
||||
# ============================================================================
|
||||
# JSON-formatted logs from the Node.js application using Pino logger.
|
||||
# Note: Primary logs now go to /var/log/pm2. This is a fallback.
|
||||
# Log levels: 10=trace, 20=debug, 30=info, 40=warn, 50=error, 60=fatal
|
||||
file {
|
||||
path => "/app/logs/*.log"
|
||||
codec => json
|
||||
type => "pino"
|
||||
tags => ["app", "backend"]
|
||||
start_position => "beginning"
|
||||
sincedb_path => "/var/lib/logstash/sincedb_pino"
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# PostgreSQL Function Logs (ADR-050)
|
||||
# ============================================================================
|
||||
# Captures PostgreSQL log output including fn_log() structured JSON messages.
|
||||
# PostgreSQL is configured to write logs to /var/log/postgresql/ (shared volume).
|
||||
# Log format: "2026-01-22 14:30:00 PST [5724] postgres@flyer_crawler_dev LOG: message"
|
||||
# Note: Timestamps are in PST (America/Los_Angeles) timezone as configured in compose.dev.yml
|
||||
file {
|
||||
path => "/var/log/postgresql/*.log"
|
||||
type => "postgres"
|
||||
tags => ["postgres", "database"]
|
||||
start_position => "beginning"
|
||||
sincedb_path => "/var/lib/logstash/sincedb_postgres"
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# NGINX Logs
|
||||
# ============================================================================
|
||||
# Access logs for request monitoring and error logs for proxy/SSL issues.
|
||||
file {
|
||||
path => "/var/log/nginx/access.log"
|
||||
type => "nginx_access"
|
||||
tags => ["nginx", "access"]
|
||||
start_position => "beginning"
|
||||
sincedb_path => "/var/lib/logstash/sincedb_nginx_access"
|
||||
}
|
||||
|
||||
file {
|
||||
path => "/var/log/nginx/error.log"
|
||||
type => "nginx_error"
|
||||
tags => ["nginx", "error"]
|
||||
start_position => "beginning"
|
||||
sincedb_path => "/var/lib/logstash/sincedb_nginx_error"
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Redis Logs (ADR-050)
|
||||
# ============================================================================
|
||||
# Redis logs from the shared volume. Redis uses its own log format:
|
||||
# "<pid>:<role> <day> <month> <time> <loglevel> <message>"
|
||||
# Example: "1:M 22 Jan 2026 14:30:00.123 * Ready to accept connections"
|
||||
# Log levels: . (debug), - (verbose), * (notice), # (warning)
|
||||
file {
|
||||
path => "/var/log/redis/redis-server.log"
|
||||
type => "redis"
|
||||
tags => ["redis", "infra"]
|
||||
start_position => "beginning"
|
||||
sincedb_path => "/var/lib/logstash/sincedb_redis"
|
||||
}
|
||||
}
|
||||
|
||||
filter {
|
||||
# ============================================================================
|
||||
# PM2 API/Worker Log Processing (ADR-014)
|
||||
# ============================================================================
|
||||
# PM2 API and Worker logs are in Pino JSON format.
|
||||
# Process them the same way as direct Pino logs.
|
||||
if [type] in ["pm2_api", "pm2_worker"] {
|
||||
# Tag errors (level 50 = error, 60 = fatal)
|
||||
if [level] >= 50 {
|
||||
mutate { add_tag => ["error", "pm2_pino_error"] }
|
||||
|
||||
# Map Pino level to Sentry level and set error_message field
|
||||
if [level] == 60 {
|
||||
mutate { add_field => { "sentry_level" => "fatal" } }
|
||||
} else {
|
||||
mutate { add_field => { "sentry_level" => "error" } }
|
||||
}
|
||||
|
||||
# Copy msg to error_message for consistent access in output
|
||||
mutate { add_field => { "error_message" => "%{msg}" } }
|
||||
}
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# PM2 Vite Log Processing (ADR-014)
|
||||
# ============================================================================
|
||||
# Vite logs are plain text. Look for error patterns.
|
||||
if [type] == "pm2_vite" {
|
||||
# Detect Vite build/compilation errors
|
||||
if [message] =~ /(?i)(error|failed|exception|cannot|enoent|eperm|EACCES|ECONNREFUSED)/ {
|
||||
mutate { add_tag => ["error", "vite_error"] }
|
||||
mutate { add_field => { "sentry_level" => "error" } }
|
||||
mutate { add_field => { "error_message" => "Vite: %{message}" } }
|
||||
}
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Pino Log Processing (Fallback)
|
||||
# ============================================================================
|
||||
if [type] == "pino" {
|
||||
# Tag errors (level 50 = error, 60 = fatal)
|
||||
if [level] >= 50 {
|
||||
mutate { add_tag => ["error", "pino_error"] }
|
||||
|
||||
# Map Pino level to Sentry level and set error_message field
|
||||
if [level] == 60 {
|
||||
mutate { add_field => { "sentry_level" => "fatal" } }
|
||||
} else {
|
||||
mutate { add_field => { "sentry_level" => "error" } }
|
||||
}
|
||||
|
||||
# Copy msg to error_message for consistent access in output
|
||||
mutate { add_field => { "error_message" => "%{msg}" } }
|
||||
}
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# PostgreSQL Log Processing (ADR-050)
|
||||
# ============================================================================
|
||||
# PostgreSQL log format in dev container:
|
||||
# "2026-01-22 14:30:00 PST [5724] postgres@flyer_crawler_dev LOG: message"
|
||||
# "2026-01-22 15:06:03 PST [19851] postgres@flyer_crawler_dev ERROR: column "id" does not exist"
|
||||
# Note: Timestamps are in PST (America/Los_Angeles) timezone
|
||||
if [type] == "postgres" {
|
||||
# Parse PostgreSQL log prefix with timezone (PST in dev, may vary in prod)
|
||||
grok {
|
||||
match => { "message" => "%{YEAR}-%{MONTHNUM}-%{MONTHDAY} %{TIME} %{WORD:pg_timezone} \[%{POSINT:pg_pid}\] %{DATA:pg_user}@%{DATA:pg_database} %{WORD:pg_level}: ?%{GREEDYDATA:pg_message}" }
|
||||
tag_on_failure => ["_postgres_grok_failure"]
|
||||
}
|
||||
|
||||
# Check if this is a structured JSON log from fn_log()
|
||||
if [pg_message] =~ /^\{.*"source":"postgresql".*\}$/ {
|
||||
json {
|
||||
source => "pg_message"
|
||||
target => "fn_log"
|
||||
}
|
||||
|
||||
# Mark as error if level is WARNING or ERROR
|
||||
if [fn_log][level] in ["WARNING", "ERROR"] {
|
||||
mutate { add_tag => ["error", "db_function"] }
|
||||
mutate { add_field => { "sentry_level" => "warning" } }
|
||||
if [fn_log][level] == "ERROR" {
|
||||
mutate { replace => { "sentry_level" => "error" } }
|
||||
}
|
||||
# Use fn_log message for error_message
|
||||
mutate { add_field => { "error_message" => "%{[fn_log][message]}" } }
|
||||
}
|
||||
}
|
||||
|
||||
# Catch native PostgreSQL errors (ERROR: or FATAL: prefix in pg_level)
|
||||
if [pg_level] == "ERROR" or [pg_level] == "FATAL" {
|
||||
mutate { add_tag => ["error", "postgres_native"] }
|
||||
|
||||
if [pg_level] == "FATAL" {
|
||||
mutate { add_field => { "sentry_level" => "fatal" } }
|
||||
} else {
|
||||
mutate { add_field => { "sentry_level" => "error" } }
|
||||
}
|
||||
|
||||
# Use the full pg_message for error_message
|
||||
mutate { add_field => { "error_message" => "PostgreSQL %{pg_level}: %{pg_message}" } }
|
||||
}
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# NGINX Access Log Processing
|
||||
# ============================================================================
|
||||
if [type] == "nginx_access" {
|
||||
grok {
|
||||
match => { "message" => '%{IPORHOST:client_ip} - %{DATA:user} \[%{HTTPDATE:timestamp}\] "%{WORD:method} %{URIPATHPARAM:request} HTTP/%{NUMBER:http_version}" %{NUMBER:status} %{NUMBER:bytes} "%{DATA:referrer}" "%{DATA:user_agent}"' }
|
||||
tag_on_failure => ["_nginx_access_grok_failure"]
|
||||
}
|
||||
|
||||
# Tag 5xx errors for Bugsink
|
||||
if [status] =~ /^5/ {
|
||||
mutate { add_tag => ["error", "nginx_5xx"] }
|
||||
mutate { add_field => { "sentry_level" => "error" } }
|
||||
mutate { add_field => { "error_message" => "HTTP %{status} %{method} %{request}" } }
|
||||
}
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# NGINX Error Log Processing
|
||||
# ============================================================================
|
||||
# NGINX error log format: "2026/01/22 17:55:01 [error] 16#16: *3 message..."
|
||||
if [type] == "nginx_error" {
|
||||
grok {
|
||||
match => { "message" => "%{YEAR}/%{MONTHNUM}/%{MONTHDAY} %{TIME} \[%{LOGLEVEL:nginx_level}\] %{POSINT:pid}#%{POSINT}: (\*%{POSINT:connection} )?%{GREEDYDATA:nginx_message}" }
|
||||
tag_on_failure => ["_nginx_error_grok_failure"]
|
||||
}
|
||||
|
||||
# Only process actual errors, not notices (like "signal process started")
|
||||
if [nginx_level] in ["error", "crit", "alert", "emerg"] {
|
||||
mutate { add_tag => ["error", "nginx_error"] }
|
||||
|
||||
# Map NGINX log level to Sentry level
|
||||
if [nginx_level] in ["crit", "alert", "emerg"] {
|
||||
mutate { add_field => { "sentry_level" => "fatal" } }
|
||||
} else {
|
||||
mutate { add_field => { "sentry_level" => "error" } }
|
||||
}
|
||||
|
||||
mutate { add_field => { "error_message" => "NGINX [%{nginx_level}]: %{nginx_message}" } }
|
||||
}
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Redis Log Processing (ADR-050)
|
||||
# ============================================================================
|
||||
# Redis log format: "<pid>:<role> <day> <month> <time>.<ms> <level> <message>"
|
||||
# Example: "1:M 22 Jan 14:30:00.123 * Ready to accept connections"
|
||||
# Roles: M=master, S=slave, C=sentinel, X=cluster
|
||||
# Levels: . (debug), - (verbose), * (notice), # (warning)
|
||||
if [type] == "redis" {
|
||||
# Parse Redis log format
|
||||
grok {
|
||||
match => { "message" => "%{POSINT:redis_pid}:%{WORD:redis_role} %{MONTHDAY} %{MONTH} %{YEAR}? ?%{TIME} %{DATA:redis_level_char} %{GREEDYDATA:redis_message}" }
|
||||
tag_on_failure => ["_redis_grok_failure"]
|
||||
}
|
||||
|
||||
# Map Redis level characters to human-readable levels
|
||||
# . = debug, - = verbose, * = notice, # = warning
|
||||
if [redis_level_char] == "#" {
|
||||
mutate {
|
||||
add_field => { "redis_level" => "warning" }
|
||||
add_tag => ["error", "redis_warning"]
|
||||
}
|
||||
mutate { add_field => { "sentry_level" => "warning" } }
|
||||
mutate { add_field => { "error_message" => "Redis WARNING: %{redis_message}" } }
|
||||
} else if [redis_level_char] == "*" {
|
||||
mutate { add_field => { "redis_level" => "notice" } }
|
||||
} else if [redis_level_char] == "-" {
|
||||
mutate { add_field => { "redis_level" => "verbose" } }
|
||||
} else if [redis_level_char] == "." {
|
||||
mutate { add_field => { "redis_level" => "debug" } }
|
||||
}
|
||||
|
||||
# Also detect error keywords in message content (e.g., ECONNREFUSED, OOM, etc.)
|
||||
if [redis_message] =~ /(?i)(error|failed|refused|denied|timeout|oom|crash|fatal|exception)/ {
|
||||
if "error" not in [tags] {
|
||||
mutate { add_tag => ["error", "redis_error"] }
|
||||
mutate { add_field => { "sentry_level" => "error" } }
|
||||
mutate { add_field => { "error_message" => "Redis ERROR: %{redis_message}" } }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Generate Sentry Event ID and Ensure Required Fields for all errors
|
||||
# ============================================================================
|
||||
# CRITICAL: sentry_level MUST be set for all errors before output.
|
||||
# Bugsink's PostgreSQL schema limits level to varchar(7), so valid values are:
|
||||
# fatal, error, warning, info, debug (all <= 7 chars)
|
||||
# If sentry_level is not set, the literal "%{sentry_level}" (16 chars) is sent,
|
||||
# causing PostgreSQL insertion failures.
|
||||
# ============================================================================
|
||||
if "error" in [tags] {
|
||||
# Use Ruby for robust field handling - handles all edge cases
|
||||
ruby {
|
||||
code => '
|
||||
require "securerandom"
|
||||
|
||||
# Generate unique event ID for Sentry
|
||||
event.set("sentry_event_id", SecureRandom.hex(16))
|
||||
|
||||
# =====================================================================
|
||||
# CRITICAL: Validate and set sentry_level
|
||||
# =====================================================================
|
||||
# Valid Sentry levels (max 7 chars for Bugsink PostgreSQL schema):
|
||||
# fatal, error, warning, info, debug
|
||||
# Default to "error" if missing, empty, or invalid.
|
||||
# =====================================================================
|
||||
valid_levels = ["fatal", "error", "warning", "info", "debug"]
|
||||
current_level = event.get("sentry_level")
|
||||
|
||||
if current_level.nil? || current_level.to_s.strip.empty? || !valid_levels.include?(current_level.to_s.downcase)
|
||||
event.set("sentry_level", "error")
|
||||
else
|
||||
# Normalize to lowercase
|
||||
event.set("sentry_level", current_level.to_s.downcase)
|
||||
end
|
||||
|
||||
# =====================================================================
|
||||
# Ensure error_message has a fallback value
|
||||
# =====================================================================
|
||||
error_msg = event.get("error_message")
|
||||
if error_msg.nil? || error_msg.to_s.strip.empty?
|
||||
fallback_msg = event.get("message") || event.get("msg") || "Unknown error"
|
||||
event.set("error_message", fallback_msg.to_s)
|
||||
end
|
||||
'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
output {
|
||||
# ============================================================================
|
||||
# Forward Errors to Bugsink (Project Routing)
|
||||
# ============================================================================
|
||||
# Bugsink uses Sentry-compatible API. Events must include:
|
||||
# - event_id: 32 hex characters (UUID without dashes)
|
||||
# - message: Human-readable error description
|
||||
# - level: fatal, error, warning, info, debug
|
||||
# - timestamp: Unix epoch in seconds
|
||||
# - platform: "node" for backend, "javascript" for frontend
|
||||
#
|
||||
# Authentication via X-Sentry-Auth header with project's public key.
|
||||
#
|
||||
# Project Routing:
|
||||
# - Project 1 (Backend): Pino app logs, PostgreSQL errors
|
||||
# - Project 4 (Infrastructure): Redis, NGINX, Vite build errors
|
||||
# ============================================================================
|
||||
|
||||
# ============================================================================
|
||||
# Infrastructure Errors -> Project 4
|
||||
# ============================================================================
|
||||
# Redis warnings/errors, NGINX errors, and Vite build errors go to
|
||||
# the Infrastructure project for separation from application code errors.
|
||||
if "error" in [tags] and ([type] == "redis" or [type] == "nginx_error" or [type] == "nginx_access" or [type] == "pm2_vite") {
|
||||
http {
|
||||
url => "http://localhost:8000/api/4/store/"
|
||||
http_method => "post"
|
||||
format => "json"
|
||||
headers => {
|
||||
"X-Sentry-Auth" => "Sentry sentry_key=14e8791da3d347fa98073261b596cab9, sentry_version=7"
|
||||
"Content-Type" => "application/json"
|
||||
}
|
||||
mapping => {
|
||||
"event_id" => "%{sentry_event_id}"
|
||||
"timestamp" => "%{@timestamp}"
|
||||
"level" => "%{sentry_level}"
|
||||
"platform" => "other"
|
||||
"logger" => "%{type}"
|
||||
"message" => "%{error_message}"
|
||||
"extra" => {
|
||||
"hostname" => "%{[host][name]}"
|
||||
"source_type" => "%{type}"
|
||||
"tags" => "%{tags}"
|
||||
"original_message" => "%{message}"
|
||||
"project" => "infrastructure"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Backend Application Errors -> Project 1
|
||||
# ============================================================================
|
||||
# Pino application logs (API, Worker), PostgreSQL function errors, and
|
||||
# native PostgreSQL errors go to the Backend API project.
|
||||
else if "error" in [tags] and ([type] in ["pm2_api", "pm2_worker", "pino", "postgres"]) {
|
||||
http {
|
||||
url => "http://localhost:8000/api/1/store/"
|
||||
http_method => "post"
|
||||
format => "json"
|
||||
headers => {
|
||||
"X-Sentry-Auth" => "Sentry sentry_key=cea01396c56246adb5878fa5ee6b1d22, sentry_version=7"
|
||||
"Content-Type" => "application/json"
|
||||
}
|
||||
mapping => {
|
||||
"event_id" => "%{sentry_event_id}"
|
||||
"timestamp" => "%{@timestamp}"
|
||||
"level" => "%{sentry_level}"
|
||||
"platform" => "node"
|
||||
"logger" => "%{type}"
|
||||
"message" => "%{error_message}"
|
||||
"extra" => {
|
||||
"hostname" => "%{[host][name]}"
|
||||
"source_type" => "%{type}"
|
||||
"tags" => "%{tags}"
|
||||
"original_message" => "%{message}"
|
||||
"project" => "backend"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Fallback: Any other errors -> Project 1
|
||||
# ============================================================================
|
||||
# Catch-all for any errors that don't match specific routing rules.
|
||||
else if "error" in [tags] {
|
||||
http {
|
||||
url => "http://localhost:8000/api/1/store/"
|
||||
http_method => "post"
|
||||
format => "json"
|
||||
headers => {
|
||||
"X-Sentry-Auth" => "Sentry sentry_key=cea01396c56246adb5878fa5ee6b1d22, sentry_version=7"
|
||||
"Content-Type" => "application/json"
|
||||
}
|
||||
mapping => {
|
||||
"event_id" => "%{sentry_event_id}"
|
||||
"timestamp" => "%{@timestamp}"
|
||||
"level" => "%{sentry_level}"
|
||||
"platform" => "node"
|
||||
"logger" => "%{type}"
|
||||
"message" => "%{error_message}"
|
||||
"extra" => {
|
||||
"hostname" => "%{[host][name]}"
|
||||
"source_type" => "%{type}"
|
||||
"tags" => "%{tags}"
|
||||
"original_message" => "%{message}"
|
||||
"project" => "backend-fallback"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Store Operational Logs to Files (for debugging/audit)
|
||||
# ============================================================================
|
||||
# NGINX access logs (all requests, not just errors)
|
||||
if [type] == "nginx_access" and "error" not in [tags] {
|
||||
file {
|
||||
path => "/var/log/logstash/nginx-access-%{+YYYY-MM-dd}.log"
|
||||
codec => json_lines
|
||||
}
|
||||
}
|
||||
|
||||
# PostgreSQL operational logs (non-error)
|
||||
if [type] == "postgres" and "error" not in [tags] {
|
||||
file {
|
||||
path => "/var/log/logstash/postgres-operational-%{+YYYY-MM-dd}.log"
|
||||
codec => json_lines
|
||||
}
|
||||
}
|
||||
|
||||
# Redis operational logs (non-error)
|
||||
if [type] == "redis" and "error" not in [tags] {
|
||||
file {
|
||||
path => "/var/log/logstash/redis-operational-%{+YYYY-MM-dd}.log"
|
||||
codec => json_lines
|
||||
}
|
||||
}
|
||||
|
||||
# PM2 API operational logs (non-error) - ADR-014
|
||||
if [type] == "pm2_api" and "error" not in [tags] {
|
||||
file {
|
||||
path => "/var/log/logstash/pm2-api-%{+YYYY-MM-dd}.log"
|
||||
codec => json_lines
|
||||
}
|
||||
}
|
||||
|
||||
# PM2 Worker operational logs (non-error) - ADR-014
|
||||
if [type] == "pm2_worker" and "error" not in [tags] {
|
||||
file {
|
||||
path => "/var/log/logstash/pm2-worker-%{+YYYY-MM-dd}.log"
|
||||
codec => json_lines
|
||||
}
|
||||
}
|
||||
|
||||
# PM2 Vite operational logs (non-error) - ADR-014
|
||||
if [type] == "pm2_vite" and "error" not in [tags] {
|
||||
file {
|
||||
path => "/var/log/logstash/pm2-vite-%{+YYYY-MM-dd}.log"
|
||||
codec => json_lines
|
||||
}
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Debug Output (for development only)
|
||||
# ============================================================================
|
||||
# Uncomment to see all processed events in Logstash stdout
|
||||
# stdout { codec => rubydebug }
|
||||
}
|
||||
@@ -9,13 +9,39 @@
|
||||
# - Frontend accessible on https://localhost (port 443)
|
||||
# - Backend API on http://localhost:3001
|
||||
# - Port 80 redirects to HTTPS
|
||||
#
|
||||
# IMPORTANT: Dual Hostname Configuration (localhost AND 127.0.0.1)
|
||||
# ============================================================================
|
||||
# The server_name directive includes BOTH 'localhost' and '127.0.0.1' to
|
||||
# prevent SSL certificate errors when resources use different hostnames.
|
||||
#
|
||||
# Problem Scenario:
|
||||
# 1. User accesses site via https://localhost/
|
||||
# 2. Database stores image URLs as https://127.0.0.1/flyer-images/...
|
||||
# 3. Browser treats these as different origins, showing ERR_CERT_AUTHORITY_INVALID
|
||||
#
|
||||
# Solution:
|
||||
# - mkcert generates certificates valid for: localhost, 127.0.0.1, ::1
|
||||
# - NGINX accepts requests to BOTH hostnames using the same certificate
|
||||
# - Users can access via either hostname without SSL warnings
|
||||
#
|
||||
# The self-signed certificate is generated in Dockerfile.dev with:
|
||||
# mkcert localhost 127.0.0.1 ::1
|
||||
#
|
||||
# This creates a certificate with Subject Alternative Names (SANs) for all
|
||||
# three hostnames, allowing NGINX to serve valid HTTPS for each.
|
||||
#
|
||||
# See also:
|
||||
# - Dockerfile.dev (certificate generation, ~line 69)
|
||||
# - docs/FLYER-URL-CONFIGURATION.md (URL configuration details)
|
||||
# - docs/development/DEBUGGING.md (SSL troubleshooting)
|
||||
# ============================================================================
|
||||
|
||||
# HTTPS Server (main)
|
||||
server {
|
||||
listen 443 ssl;
|
||||
listen [::]:443 ssl;
|
||||
server_name localhost;
|
||||
server_name localhost 127.0.0.1;
|
||||
|
||||
# SSL Configuration (self-signed certificates from mkcert)
|
||||
ssl_certificate /app/certs/localhost.crt;
|
||||
@@ -34,6 +60,37 @@ server {
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# Bugsink Sentry API Proxy (for frontend error reporting)
|
||||
# ============================================================================
|
||||
# The frontend Sentry SDK cannot reach localhost:8000 directly from the browser
|
||||
# because port 8000 is only accessible within the container network.
|
||||
# This proxy allows the browser to send errors to https://localhost/bugsink-api/
|
||||
# which NGINX forwards to the Bugsink container on port 8000.
|
||||
#
|
||||
# Frontend DSN format: https://localhost/bugsink-api/<project_id>
|
||||
# Example: https://localhost/bugsink-api/2 for Frontend (Dev) project
|
||||
#
|
||||
# The Sentry SDK sends POST requests to /bugsink-api/<project>/store/
|
||||
# This proxy strips /bugsink-api and forwards to http://localhost:8000/api/
|
||||
# ============================================================================
|
||||
location /bugsink-api/ {
|
||||
proxy_pass http://localhost:8000/api/;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
|
||||
# Allow large error payloads with stack traces
|
||||
client_max_body_size 10M;
|
||||
|
||||
# Timeouts for error reporting (should be fast)
|
||||
proxy_connect_timeout 10s;
|
||||
proxy_send_timeout 30s;
|
||||
proxy_read_timeout 30s;
|
||||
}
|
||||
|
||||
# Proxy WebSocket connections for real-time notifications
|
||||
location /ws {
|
||||
proxy_pass http://localhost:3001;
|
||||
@@ -80,7 +137,7 @@ server {
|
||||
server {
|
||||
listen 80;
|
||||
listen [::]:80;
|
||||
server_name localhost;
|
||||
server_name localhost 127.0.0.1;
|
||||
|
||||
return 301 https://$host$request_uri;
|
||||
}
|
||||
|
||||
@@ -2,6 +2,10 @@
|
||||
# This file is mounted into the PostgreSQL container to enable structured logging
|
||||
# from database functions via fn_log()
|
||||
|
||||
# Timezone: PST (America/Los_Angeles) for consistent log timestamps
|
||||
timezone = 'America/Los_Angeles'
|
||||
log_timezone = 'America/Los_Angeles'
|
||||
|
||||
# Enable logging to files for Logstash pickup
|
||||
logging_collector = on
|
||||
log_destination = 'stderr'
|
||||
|
||||
24
docker/redis/docker-entrypoint.sh
Normal file
24
docker/redis/docker-entrypoint.sh
Normal file
@@ -0,0 +1,24 @@
|
||||
#!/bin/sh
|
||||
# docker/redis/docker-entrypoint.sh
|
||||
# ============================================================================
|
||||
# Redis Entrypoint Script for Dev Container
|
||||
# ============================================================================
|
||||
# This script ensures the Redis log directory exists and is writable before
|
||||
# starting Redis. This is needed because the redis_logs volume is mounted
|
||||
# at /var/log/redis but Redis Alpine runs as a non-root user.
|
||||
#
|
||||
# Related: ADR-050 (Log aggregation via Logstash)
|
||||
# ============================================================================
|
||||
|
||||
set -e
|
||||
|
||||
# Create log directory if it doesn't exist
|
||||
mkdir -p /var/log/redis
|
||||
|
||||
# Ensure redis user can write to the log directory
|
||||
# Note: In Alpine Redis image, redis runs as uid 999
|
||||
chown -R redis:redis /var/log/redis
|
||||
chmod 755 /var/log/redis
|
||||
|
||||
# Start Redis with the provided arguments
|
||||
exec redis-server "$@"
|
||||
@@ -28,9 +28,28 @@ The `.env.local` file uses `localhost` while `compose.dev.yml` uses `127.0.0.1`.
|
||||
## HTTPS Setup
|
||||
|
||||
- Self-signed certificates auto-generated with mkcert on container startup
|
||||
- CSRF Protection: Django configured with `SECURE_PROXY_SSL_HEADER` to trust `X-Forwarded-Proto` from nginx
|
||||
- CSRF Protection: Django configured with `CSRF_TRUSTED_ORIGINS` for both `localhost` and `127.0.0.1` (see below)
|
||||
- HTTPS proxy: nginx on port 8443 proxies to Bugsink on port 8000
|
||||
- HTTPS is for UI access only - Sentry SDK uses HTTP directly
|
||||
|
||||
### CSRF Configuration
|
||||
|
||||
Django 4.0+ requires `CSRF_TRUSTED_ORIGINS` for HTTPS POST requests. The Bugsink configuration (`Dockerfile.dev`) includes:
|
||||
|
||||
```python
|
||||
CSRF_TRUSTED_ORIGINS = [
|
||||
"https://localhost:8443",
|
||||
"https://127.0.0.1:8443",
|
||||
"http://localhost:8000",
|
||||
"http://127.0.0.1:8000",
|
||||
]
|
||||
SECURE_PROXY_SSL_HEADER = ("HTTP_X_FORWARDED_PROTO", "https")
|
||||
```
|
||||
|
||||
**Both hostnames are required** because browsers treat `localhost` and `127.0.0.1` as different origins.
|
||||
|
||||
If you get "CSRF verification failed" errors, see [BUGSINK-SETUP.md](tools/BUGSINK-SETUP.md#csrf-verification-failed) for troubleshooting.
|
||||
|
||||
## Isolation Benefits
|
||||
|
||||
- Dev errors stay local, don't pollute production/test dashboards
|
||||
|
||||
@@ -12,6 +12,87 @@ Flyer image and icon URLs are environment-specific to ensure they point to the c
|
||||
| Test | `https://flyer-crawler-test.projectium.com` | `https://flyer-crawler-test.projectium.com/flyer-images/safeway-flyer.jpg` |
|
||||
| Production | `https://flyer-crawler.projectium.com` | `https://flyer-crawler.projectium.com/flyer-images/safeway-flyer.jpg` |
|
||||
|
||||
**Note:** The dev container accepts connections to **both** `https://localhost/` and `https://127.0.0.1/` thanks to the SSL certificate and NGINX configuration. See [SSL Certificate Configuration](#ssl-certificate-configuration-dev-container) below.
|
||||
|
||||
## SSL Certificate Configuration (Dev Container)
|
||||
|
||||
The dev container uses self-signed certificates generated by [mkcert](https://github.com/FiloSottile/mkcert) to enable HTTPS locally. This configuration solves a common mixed-origin SSL issue.
|
||||
|
||||
### The Problem
|
||||
|
||||
When users access the site via `https://localhost/` but image URLs in the database use `https://127.0.0.1/...`, browsers treat these as different origins. This causes `ERR_CERT_AUTHORITY_INVALID` errors when loading images, even though both hostnames point to the same server.
|
||||
|
||||
### The Solution
|
||||
|
||||
1. **Certificate Generation** (`Dockerfile.dev`):
|
||||
|
||||
```bash
|
||||
mkcert localhost 127.0.0.1 ::1
|
||||
```
|
||||
|
||||
This creates a certificate with Subject Alternative Names (SANs) for all three hostnames.
|
||||
|
||||
2. **NGINX Configuration** (`docker/nginx/dev.conf`):
|
||||
|
||||
```nginx
|
||||
server_name localhost 127.0.0.1;
|
||||
```
|
||||
|
||||
NGINX accepts requests to both hostnames using the same SSL certificate.
|
||||
|
||||
### How It Works
|
||||
|
||||
| Component | Configuration |
|
||||
| -------------------- | ---------------------------------------------------- |
|
||||
| SSL Certificate SANs | `localhost`, `127.0.0.1`, `::1` |
|
||||
| NGINX `server_name` | `localhost 127.0.0.1` |
|
||||
| Seed Script URLs | Uses `https://127.0.0.1` (works with DB constraints) |
|
||||
| User Access | Either `https://localhost/` or `https://127.0.0.1/` |
|
||||
|
||||
### Why This Matters
|
||||
|
||||
- **Database Constraints**: The `flyers` table has CHECK constraints requiring URLs to start with `http://` or `https://`. Relative URLs are not allowed.
|
||||
- **Consistent Behavior**: Users can access the site using either hostname without SSL warnings.
|
||||
- **Same Certificate**: Both hostnames use the same self-signed certificate, eliminating mixed-content errors.
|
||||
|
||||
### Verifying the Configuration
|
||||
|
||||
```bash
|
||||
# Check certificate SANs
|
||||
podman exec flyer-crawler-dev openssl x509 -in /app/certs/localhost.crt -text -noout | grep -A1 "Subject Alternative Name"
|
||||
|
||||
# Expected output:
|
||||
# X509v3 Subject Alternative Name:
|
||||
# DNS:localhost, IP Address:127.0.0.1, IP Address:0:0:0:0:0:0:0:1
|
||||
|
||||
# Test both hostnames respond
|
||||
curl -k https://localhost/health
|
||||
curl -k https://127.0.0.1/health
|
||||
```
|
||||
|
||||
### Troubleshooting SSL Issues
|
||||
|
||||
If you encounter `ERR_CERT_AUTHORITY_INVALID`:
|
||||
|
||||
1. **Check NGINX is running**: `podman exec flyer-crawler-dev nginx -t`
|
||||
2. **Verify certificate exists**: `podman exec flyer-crawler-dev ls -la /app/certs/`
|
||||
3. **Ensure both hostnames are in server_name**: Check `/etc/nginx/sites-available/default`
|
||||
4. **Rebuild container if needed**: The certificate is generated at build time
|
||||
|
||||
### Permanent Fix: Install CA Certificate (Recommended)
|
||||
|
||||
To permanently eliminate SSL certificate warnings, install the mkcert CA certificate on your system. This is optional but provides a better development experience.
|
||||
|
||||
The CA certificate is located at `certs/mkcert-ca.crt` in the project root. See [`certs/README.md`](../certs/README.md) for platform-specific installation instructions (Windows, macOS, Linux, Firefox).
|
||||
|
||||
After installation:
|
||||
|
||||
- Your browser will trust all mkcert certificates without warnings
|
||||
- Both `https://localhost/` and `https://127.0.0.1/` will work without SSL errors
|
||||
- Flyer images will load without `ERR_CERT_AUTHORITY_INVALID` errors
|
||||
|
||||
See also: [Debugging Guide - SSL Issues](development/DEBUGGING.md#ssl-certificate-issues)
|
||||
|
||||
## NGINX Static File Serving
|
||||
|
||||
All environments serve flyer images as static files with browser caching:
|
||||
@@ -41,7 +122,7 @@ Set `FLYER_BASE_URL` in your environment configuration:
|
||||
|
||||
```bash
|
||||
# Dev container (.env)
|
||||
FLYER_BASE_URL=https://127.0.0.1
|
||||
FLYER_BASE_URL=https://localhost
|
||||
|
||||
# Test environment
|
||||
FLYER_BASE_URL=https://flyer-crawler-test.projectium.com
|
||||
@@ -58,7 +139,7 @@ The seed script ([src/db/seed.ts](../src/db/seed.ts)) automatically uses the cor
|
||||
2. `NODE_ENV` value:
|
||||
- `production` → `https://flyer-crawler.projectium.com`
|
||||
- `test` → `https://flyer-crawler-test.projectium.com`
|
||||
- Default → `https://127.0.0.1`
|
||||
- Default → `https://localhost`
|
||||
|
||||
The seed script also copies test images from `src/tests/assets/` to `public/flyer-images/`:
|
||||
|
||||
@@ -78,8 +159,8 @@ podman exec -it flyer-crawler-postgres psql -U postgres -d flyer_crawler_dev
|
||||
# Run the update (dev container uses HTTPS with self-signed certs)
|
||||
UPDATE flyers
|
||||
SET
|
||||
image_url = REPLACE(image_url, 'example.com', '127.0.0.1'),
|
||||
icon_url = REPLACE(icon_url, 'example.com', '127.0.0.1')
|
||||
image_url = REPLACE(image_url, 'example.com', 'localhost'),
|
||||
icon_url = REPLACE(icon_url, 'example.com', 'localhost')
|
||||
WHERE
|
||||
image_url LIKE '%example.com%'
|
||||
OR icon_url LIKE '%example.com%';
|
||||
@@ -131,8 +212,10 @@ export const getFlyerBaseUrl = (): string => {
|
||||
}
|
||||
|
||||
// Check if we're in dev container (DB_HOST=postgres is typical indicator)
|
||||
// Use 'localhost' instead of '127.0.0.1' to match the hostname users access
|
||||
// This avoids SSL certificate mixed-origin issues in browsers
|
||||
if (process.env.DB_HOST === 'postgres' || process.env.DB_HOST === '127.0.0.1') {
|
||||
return 'https://127.0.0.1';
|
||||
return 'https://localhost';
|
||||
}
|
||||
|
||||
if (process.env.NODE_ENV === 'production') {
|
||||
@@ -169,13 +252,13 @@ This approach ensures tests work correctly in all environments (dev container, C
|
||||
| `docker/nginx/dev.conf` | Added `/flyer-images/` location block for static file serving |
|
||||
| `.env.example` | Added `FLYER_BASE_URL` variable |
|
||||
| `sql/update_flyer_urls.sql` | SQL script for updating existing data |
|
||||
| Test files | Updated mock data to use `https://127.0.0.1` |
|
||||
| Test files | Updated mock data to use `https://localhost` |
|
||||
|
||||
## Summary
|
||||
|
||||
- Seed script now uses environment-specific HTTPS URLs
|
||||
- Seed script copies test images from `src/tests/assets/` to `public/flyer-images/`
|
||||
- NGINX serves `/flyer-images/` as static files with 7-day cache
|
||||
- Test files updated with `https://127.0.0.1`
|
||||
- Test files updated with `https://localhost` (not `127.0.0.1` to avoid SSL mixed-origin issues)
|
||||
- SQL script provided for updating existing data
|
||||
- Documentation updated for each environment
|
||||
|
||||
@@ -1,324 +0,0 @@
|
||||
# ADR-015: Application Performance Monitoring (APM) and Error Tracking
|
||||
|
||||
**Date**: 2025-12-12
|
||||
|
||||
**Status**: Accepted
|
||||
|
||||
**Updated**: 2026-01-11
|
||||
|
||||
## Context
|
||||
|
||||
While `ADR-004` established structured logging with Pino, the application lacks a high-level, aggregated view of its health, performance, and errors. It's difficult to spot trends, identify slow API endpoints, or be proactively notified of new types of errors.
|
||||
|
||||
Key requirements:
|
||||
|
||||
1. **Self-hosted**: No external SaaS dependencies for error tracking
|
||||
2. **Sentry SDK compatible**: Leverage mature, well-documented SDKs
|
||||
3. **Lightweight**: Minimal resource overhead in the dev container
|
||||
4. **Production-ready**: Same architecture works on bare-metal production servers
|
||||
5. **AI-accessible**: MCP server integration for Claude Code and other AI tools
|
||||
|
||||
## Decision
|
||||
|
||||
We will implement a self-hosted error tracking stack using **Bugsink** as the Sentry-compatible backend, with the following components:
|
||||
|
||||
### 1. Error Tracking Backend: Bugsink
|
||||
|
||||
**Bugsink** is a lightweight, self-hosted Sentry alternative that:
|
||||
|
||||
- Runs as a single process (no Kafka, Redis, ClickHouse required)
|
||||
- Is fully compatible with Sentry SDKs
|
||||
- Supports ARM64 and AMD64 architectures
|
||||
- Can use SQLite (dev) or PostgreSQL (production)
|
||||
|
||||
**Deployment**:
|
||||
|
||||
- **Dev container**: Installed as a systemd service inside the container
|
||||
- **Production**: Runs as a systemd service on bare-metal, listening on localhost only
|
||||
- **Database**: Uses PostgreSQL with a dedicated `bugsink` user and `bugsink` database (same PostgreSQL instance as the main application)
|
||||
|
||||
### 2. Backend Integration: @sentry/node
|
||||
|
||||
The Express backend will integrate `@sentry/node` SDK to:
|
||||
|
||||
- Capture unhandled exceptions before PM2/process manager restarts
|
||||
- Report errors with full stack traces and context
|
||||
- Integrate with Pino logger for breadcrumbs
|
||||
- Track transaction performance (optional)
|
||||
|
||||
### 3. Frontend Integration: @sentry/react
|
||||
|
||||
The React frontend will integrate `@sentry/react` SDK to:
|
||||
|
||||
- Wrap the app in a Sentry Error Boundary
|
||||
- Capture unhandled JavaScript errors
|
||||
- Report errors with component stack traces
|
||||
- Track user session context
|
||||
- **Frontend Error Correlation**: The global API client (Axios/Fetch wrapper) MUST intercept 4xx/5xx responses. It MUST extract the `x-request-id` header (if present) and attach it to the Sentry scope as a tag `api_request_id` before re-throwing the error. This allows developers to copy the ID from Sentry and search for it in backend logs.
|
||||
|
||||
### 4. Log Aggregation: Logstash
|
||||
|
||||
**Logstash** parses application and infrastructure logs, forwarding error patterns to Bugsink:
|
||||
|
||||
- **Installation**: Installed inside the dev container (and on bare-metal prod servers)
|
||||
- **Inputs**:
|
||||
- Pino JSON logs from the Node.js application
|
||||
- Redis logs (connection errors, memory warnings, slow commands)
|
||||
- PostgreSQL function logs (future - see Implementation Steps)
|
||||
- **Filter**: Identifies error-level logs (5xx responses, unhandled exceptions, Redis errors)
|
||||
- **Output**: Sends to Bugsink via Sentry-compatible HTTP API
|
||||
|
||||
This provides a secondary error capture path for:
|
||||
|
||||
- Errors that occur before Sentry SDK initialization
|
||||
- Log-based errors that don't throw exceptions
|
||||
- Redis connection/performance issues
|
||||
- Database function errors and slow queries
|
||||
- Historical error analysis from log files
|
||||
|
||||
### 5. MCP Server Integration: bugsink-mcp
|
||||
|
||||
For AI tool integration (Claude Code, Cursor, etc.), we use the open-source [bugsink-mcp](https://github.com/j-shelfwood/bugsink-mcp) server:
|
||||
|
||||
- **No code changes required**: Configurable via environment variables
|
||||
- **Capabilities**: List projects, get issues, view events, get stacktraces, manage releases
|
||||
- **Configuration**:
|
||||
- `BUGSINK_URL`: Points to Bugsink instance (`http://localhost:8000` for dev, `https://bugsink.projectium.com` for prod)
|
||||
- `BUGSINK_API_TOKEN`: API token from Bugsink (created via Django management command)
|
||||
- `BUGSINK_ORG_SLUG`: Organization identifier (usually "sentry")
|
||||
|
||||
**Note:** Despite the name `sentry-selfhosted-mcp` mentioned in earlier drafts of this ADR, the actual MCP server used is `bugsink-mcp` which is specifically designed for Bugsink's API structure.
|
||||
|
||||
## Architecture
|
||||
|
||||
```text
|
||||
┌─────────────────────────────────────────────────────────────────────────┐
|
||||
│ Dev Container / Production Server │
|
||||
├─────────────────────────────────────────────────────────────────────────┤
|
||||
│ │
|
||||
│ ┌──────────────────┐ ┌──────────────────┐ │
|
||||
│ │ Frontend │ │ Backend │ │
|
||||
│ │ (React) │ │ (Express) │ │
|
||||
│ │ @sentry/react │ │ @sentry/node │ │
|
||||
│ └────────┬─────────┘ └────────┬─────────┘ │
|
||||
│ │ │ │
|
||||
│ │ Sentry SDK Protocol │ │
|
||||
│ └───────────┬───────────────┘ │
|
||||
│ │ │
|
||||
│ ▼ │
|
||||
│ ┌──────────────────────┐ │
|
||||
│ │ Bugsink │ │
|
||||
│ │ (localhost:8000) │◄──────────────────┐ │
|
||||
│ │ │ │ │
|
||||
│ │ PostgreSQL backend │ │ │
|
||||
│ └──────────────────────┘ │ │
|
||||
│ │ │
|
||||
│ ┌──────────────────────┐ │ │
|
||||
│ │ Logstash │───────────────────┘ │
|
||||
│ │ (Log Aggregator) │ Sentry Output │
|
||||
│ │ │ │
|
||||
│ │ Inputs: │ │
|
||||
│ │ - Pino app logs │ │
|
||||
│ │ - Redis logs │ │
|
||||
│ │ - PostgreSQL (future) │
|
||||
│ └──────────────────────┘ │
|
||||
│ ▲ ▲ ▲ │
|
||||
│ │ │ │ │
|
||||
│ ┌───────────┘ │ └───────────┐ │
|
||||
│ │ │ │ │
|
||||
│ ┌────┴─────┐ ┌─────┴────┐ ┌──────┴─────┐ │
|
||||
│ │ Pino │ │ Redis │ │ PostgreSQL │ │
|
||||
│ │ Logs │ │ Logs │ │ Logs (TBD) │ │
|
||||
│ └──────────┘ └──────────┘ └────────────┘ │
|
||||
│ │
|
||||
│ ┌──────────────────────┐ │
|
||||
│ │ PostgreSQL │ │
|
||||
│ │ ┌────────────────┐ │ │
|
||||
│ │ │ flyer_crawler │ │ (main app database) │
|
||||
│ │ ├────────────────┤ │ │
|
||||
│ │ │ bugsink │ │ (error tracking database) │
|
||||
│ │ └────────────────┘ │ │
|
||||
│ └──────────────────────┘ │
|
||||
│ │
|
||||
└─────────────────────────────────────────────────────────────────────────┘
|
||||
|
||||
External (Developer Machine):
|
||||
┌──────────────────────────────────────┐
|
||||
│ Claude Code / Cursor / VS Code │
|
||||
│ ┌────────────────────────────────┐ │
|
||||
│ │ bugsink-mcp │ │
|
||||
│ │ (MCP Server) │ │
|
||||
│ │ │ │
|
||||
│ │ BUGSINK_URL=http://localhost:8000
|
||||
│ │ BUGSINK_API_TOKEN=... │ │
|
||||
│ │ BUGSINK_ORG_SLUG=... │ │
|
||||
│ └────────────────────────────────┘ │
|
||||
└──────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
### Environment Variables
|
||||
|
||||
| Variable | Description | Default (Dev) |
|
||||
| ------------------ | ------------------------------ | -------------------------- |
|
||||
| `BUGSINK_DSN` | Sentry-compatible DSN for SDKs | Set after project creation |
|
||||
| `BUGSINK_ENABLED` | Enable/disable error reporting | `true` |
|
||||
| `BUGSINK_BASE_URL` | Bugsink web UI URL (internal) | `http://localhost:8000` |
|
||||
|
||||
### PostgreSQL Setup
|
||||
|
||||
```sql
|
||||
-- Create dedicated Bugsink database and user
|
||||
CREATE USER bugsink WITH PASSWORD 'bugsink_dev_password';
|
||||
CREATE DATABASE bugsink OWNER bugsink;
|
||||
GRANT ALL PRIVILEGES ON DATABASE bugsink TO bugsink;
|
||||
```
|
||||
|
||||
### Bugsink Configuration
|
||||
|
||||
```bash
|
||||
# Environment variables for Bugsink service
|
||||
SECRET_KEY=<random-50-char-string>
|
||||
DATABASE_URL=postgresql://bugsink:bugsink_dev_password@localhost:5432/bugsink
|
||||
BASE_URL=http://localhost:8000
|
||||
PORT=8000
|
||||
```
|
||||
|
||||
### Logstash Pipeline
|
||||
|
||||
```conf
|
||||
# /etc/logstash/conf.d/bugsink.conf
|
||||
|
||||
# === INPUTS ===
|
||||
input {
|
||||
# Pino application logs
|
||||
file {
|
||||
path => "/app/logs/*.log"
|
||||
codec => json
|
||||
type => "pino"
|
||||
tags => ["app"]
|
||||
}
|
||||
|
||||
# Redis logs
|
||||
file {
|
||||
path => "/var/log/redis/*.log"
|
||||
type => "redis"
|
||||
tags => ["redis"]
|
||||
}
|
||||
|
||||
# PostgreSQL logs (for function logging - future)
|
||||
# file {
|
||||
# path => "/var/log/postgresql/*.log"
|
||||
# type => "postgres"
|
||||
# tags => ["postgres"]
|
||||
# }
|
||||
}
|
||||
|
||||
# === FILTERS ===
|
||||
filter {
|
||||
# Pino error detection (level 50 = error, 60 = fatal)
|
||||
if [type] == "pino" and [level] >= 50 {
|
||||
mutate { add_tag => ["error"] }
|
||||
}
|
||||
|
||||
# Redis error detection
|
||||
if [type] == "redis" {
|
||||
grok {
|
||||
match => { "message" => "%{POSINT:pid}:%{WORD:role} %{MONTHDAY} %{MONTH} %{TIME} %{WORD:loglevel} %{GREEDYDATA:redis_message}" }
|
||||
}
|
||||
if [loglevel] in ["WARNING", "ERROR"] {
|
||||
mutate { add_tag => ["error"] }
|
||||
}
|
||||
}
|
||||
|
||||
# PostgreSQL function error detection (future)
|
||||
# if [type] == "postgres" {
|
||||
# # Parse PostgreSQL log format and detect ERROR/FATAL levels
|
||||
# }
|
||||
}
|
||||
|
||||
# === OUTPUT ===
|
||||
output {
|
||||
if "error" in [tags] {
|
||||
http {
|
||||
url => "http://localhost:8000/api/store/"
|
||||
http_method => "post"
|
||||
format => "json"
|
||||
# Sentry envelope format
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Implementation Steps
|
||||
|
||||
1. **Update Dockerfile.dev**:
|
||||
- Install Bugsink (pip package or binary)
|
||||
- Install Logstash (Elastic APT repository)
|
||||
- Add systemd service files for both
|
||||
|
||||
2. **PostgreSQL initialization**:
|
||||
- Add Bugsink user/database creation to `sql/00-init-extensions.sql`
|
||||
|
||||
3. **Backend SDK integration**:
|
||||
- Install `@sentry/node`
|
||||
- Initialize in `server.ts` before Express app
|
||||
- Configure error handler middleware integration
|
||||
|
||||
4. **Frontend SDK integration**:
|
||||
- Install `@sentry/react`
|
||||
- Wrap `App` component with `Sentry.ErrorBoundary`
|
||||
- Configure in `src/index.tsx`
|
||||
|
||||
5. **Environment configuration**:
|
||||
- Add Bugsink variables to `src/config/env.ts`
|
||||
- Update `.env.example` and `compose.dev.yml`
|
||||
|
||||
6. **Logstash configuration**:
|
||||
- Create pipeline config for Pino → Bugsink
|
||||
- Configure Pino to write to log file in addition to stdout
|
||||
- Configure Redis log monitoring (connection errors, slow commands)
|
||||
|
||||
7. **MCP server documentation**:
|
||||
- Document `bugsink-mcp` setup in CLAUDE.md
|
||||
|
||||
8. **PostgreSQL function logging** (future):
|
||||
- Configure PostgreSQL to log function execution errors
|
||||
- Add Logstash input for PostgreSQL logs
|
||||
- Define filter rules for function-level error detection
|
||||
- _Note: Ask for implementation details when this step is reached_
|
||||
|
||||
## Consequences
|
||||
|
||||
### Positive
|
||||
|
||||
- **Full observability**: Aggregated view of errors, trends, and performance
|
||||
- **Self-hosted**: No external SaaS dependencies or subscription costs
|
||||
- **SDK compatibility**: Leverages mature Sentry SDKs with excellent documentation
|
||||
- **AI integration**: MCP server enables Claude Code to query and analyze errors
|
||||
- **Unified architecture**: Same setup works in dev container and production
|
||||
- **Lightweight**: Bugsink runs in a single process, unlike full Sentry (16GB+ RAM)
|
||||
|
||||
### Negative
|
||||
|
||||
- **Additional services**: Bugsink and Logstash add complexity to the container
|
||||
- **PostgreSQL overhead**: Additional database for error tracking
|
||||
- **Initial setup**: Requires configuration of multiple components
|
||||
- **Logstash learning curve**: Pipeline configuration requires Logstash knowledge
|
||||
|
||||
## Alternatives Considered
|
||||
|
||||
1. **Full Sentry self-hosted**: Rejected due to complexity (Kafka, Redis, ClickHouse, 16GB+ RAM minimum)
|
||||
2. **GlitchTip**: Considered, but Bugsink is lighter weight and easier to deploy
|
||||
3. **Sentry SaaS**: Rejected due to self-hosted requirement
|
||||
4. **Custom error aggregation**: Rejected in favor of proven Sentry SDK ecosystem
|
||||
|
||||
## References
|
||||
|
||||
- [Bugsink Documentation](https://www.bugsink.com/docs/)
|
||||
- [Bugsink Docker Install](https://www.bugsink.com/docs/docker-install/)
|
||||
- [@sentry/node Documentation](https://docs.sentry.io/platforms/javascript/guides/node/)
|
||||
- [@sentry/react Documentation](https://docs.sentry.io/platforms/javascript/guides/react/)
|
||||
- [bugsink-mcp](https://github.com/j-shelfwood/bugsink-mcp)
|
||||
- [Logstash Reference](https://www.elastic.co/guide/en/logstash/current/index.html)
|
||||
272
docs/adr/0015-error-tracking-and-observability.md
Normal file
272
docs/adr/0015-error-tracking-and-observability.md
Normal file
@@ -0,0 +1,272 @@
|
||||
# ADR-015: Error Tracking and Observability
|
||||
|
||||
**Date**: 2025-12-12
|
||||
|
||||
**Status**: Accepted (Fully Implemented)
|
||||
|
||||
**Updated**: 2026-01-26 (user context integration completed)
|
||||
|
||||
**Related**: [ADR-056](./0056-application-performance-monitoring.md) (Application Performance Monitoring)
|
||||
|
||||
## Context
|
||||
|
||||
While ADR-004 established structured logging with Pino, the application lacks a high-level, aggregated view of its health and errors. It's difficult to spot trends, identify recurring issues, or be proactively notified of new types of errors.
|
||||
|
||||
Key requirements:
|
||||
|
||||
1. **Self-hosted**: No external SaaS dependencies for error tracking
|
||||
2. **Sentry SDK compatible**: Leverage mature, well-documented SDKs
|
||||
3. **Lightweight**: Minimal resource overhead in the dev container
|
||||
4. **Production-ready**: Same architecture works on bare-metal production servers
|
||||
5. **AI-accessible**: MCP server integration for Claude Code and other AI tools
|
||||
|
||||
**Note**: Application Performance Monitoring (APM) and distributed tracing are covered separately in [ADR-056](./0056-application-performance-monitoring.md).
|
||||
|
||||
## Decision
|
||||
|
||||
We implement a self-hosted error tracking stack using **Bugsink** as the Sentry-compatible backend, with the following components:
|
||||
|
||||
### 1. Error Tracking Backend: Bugsink
|
||||
|
||||
**Bugsink** is a lightweight, self-hosted Sentry alternative that:
|
||||
|
||||
- Runs as a single process (no Kafka, Redis, ClickHouse required)
|
||||
- Is fully compatible with Sentry SDKs
|
||||
- Supports ARM64 and AMD64 architectures
|
||||
- Can use SQLite (dev) or PostgreSQL (production)
|
||||
|
||||
**Deployment**:
|
||||
|
||||
- **Dev container**: Installed as a systemd service inside the container
|
||||
- **Production**: Runs as a systemd service on bare-metal, listening on localhost only
|
||||
- **Database**: Uses PostgreSQL with a dedicated `bugsink` user and `bugsink` database (same PostgreSQL instance as the main application)
|
||||
|
||||
### 2. Backend Integration: @sentry/node
|
||||
|
||||
The Express backend integrates `@sentry/node` SDK to:
|
||||
|
||||
- Capture unhandled exceptions before PM2/process manager restarts
|
||||
- Report errors with full stack traces and context
|
||||
- Integrate with Pino logger for breadcrumbs
|
||||
- Filter errors by severity (only 5xx errors sent by default)
|
||||
|
||||
### 3. Frontend Integration: @sentry/react
|
||||
|
||||
The React frontend integrates `@sentry/react` SDK to:
|
||||
|
||||
- Wrap the app in an Error Boundary for graceful error handling
|
||||
- Capture unhandled JavaScript errors
|
||||
- Report errors with component stack traces
|
||||
- Filter out browser extension errors
|
||||
- **Frontend Error Correlation**: The global API client intercepts 4xx/5xx responses and can attach the `x-request-id` header to Sentry scope for correlation with backend logs
|
||||
|
||||
### 4. Log Aggregation: Logstash
|
||||
|
||||
**Logstash** parses application and infrastructure logs, forwarding error patterns to Bugsink:
|
||||
|
||||
- **Installation**: Installed inside the dev container (and on bare-metal prod servers)
|
||||
- **Inputs**:
|
||||
- Pino JSON logs from the Node.js application (PM2 managed)
|
||||
- Redis logs (connection errors, memory warnings, slow commands)
|
||||
- PostgreSQL function logs (via `fn_log()` - see ADR-050)
|
||||
- NGINX access/error logs
|
||||
- **Filter**: Identifies error-level logs (5xx responses, unhandled exceptions, Redis errors)
|
||||
- **Output**: Sends to Bugsink via Sentry-compatible HTTP API
|
||||
|
||||
This provides a secondary error capture path for:
|
||||
|
||||
- Errors that occur before Sentry SDK initialization
|
||||
- Log-based errors that don't throw exceptions
|
||||
- Redis connection/performance issues
|
||||
- Database function errors and slow queries
|
||||
- Historical error analysis from log files
|
||||
|
||||
### 5. MCP Server Integration: bugsink-mcp
|
||||
|
||||
For AI tool integration (Claude Code, Cursor, etc.), we use the open-source [bugsink-mcp](https://github.com/j-shelfwood/bugsink-mcp) server:
|
||||
|
||||
- **No code changes required**: Configurable via environment variables
|
||||
- **Capabilities**: List projects, get issues, view events, get stacktraces, manage releases
|
||||
- **Configuration**:
|
||||
- `BUGSINK_URL`: Points to Bugsink instance (`http://localhost:8000` for dev, `https://bugsink.projectium.com` for prod)
|
||||
- `BUGSINK_API_TOKEN`: API token from Bugsink (created via Django management command)
|
||||
- `BUGSINK_ORG_SLUG`: Organization identifier (usually "sentry")
|
||||
|
||||
## Architecture
|
||||
|
||||
```text
|
||||
+---------------------------------------------------------------------------+
|
||||
| Dev Container / Production Server |
|
||||
+---------------------------------------------------------------------------+
|
||||
| |
|
||||
| +------------------+ +------------------+ |
|
||||
| | Frontend | | Backend | |
|
||||
| | (React) | | (Express) | |
|
||||
| | @sentry/react | | @sentry/node | |
|
||||
| +--------+---------+ +--------+---------+ |
|
||||
| | | |
|
||||
| | Sentry SDK Protocol | |
|
||||
| +-----------+---------------+ |
|
||||
| | |
|
||||
| v |
|
||||
| +----------------------+ |
|
||||
| | Bugsink | |
|
||||
| | (localhost:8000) |<------------------+ |
|
||||
| | | | |
|
||||
| | PostgreSQL backend | | |
|
||||
| +----------------------+ | |
|
||||
| | |
|
||||
| +----------------------+ | |
|
||||
| | Logstash |-------------------+ |
|
||||
| | (Log Aggregator) | Sentry Output |
|
||||
| | | |
|
||||
| | Inputs: | |
|
||||
| | - PM2/Pino logs | |
|
||||
| | - Redis logs | |
|
||||
| | - PostgreSQL logs | |
|
||||
| | - NGINX logs | |
|
||||
| +----------------------+ |
|
||||
| ^ ^ ^ ^ |
|
||||
| | | | | |
|
||||
| +-----------+ | | +-----------+ |
|
||||
| | | | | |
|
||||
| +----+-----+ +-----+----+ +-----+----+ +-----+----+ |
|
||||
| | PM2 | | Redis | | PostgreSQL| | NGINX | |
|
||||
| | Logs | | Logs | | Logs | | Logs | |
|
||||
| +----------+ +----------+ +-----------+ +---------+ |
|
||||
| |
|
||||
| +----------------------+ |
|
||||
| | PostgreSQL | |
|
||||
| | +----------------+ | |
|
||||
| | | flyer_crawler | | (main app database) |
|
||||
| | +----------------+ | |
|
||||
| | | bugsink | | (error tracking database) |
|
||||
| | +----------------+ | |
|
||||
| +----------------------+ |
|
||||
| |
|
||||
+---------------------------------------------------------------------------+
|
||||
|
||||
External (Developer Machine):
|
||||
+--------------------------------------+
|
||||
| Claude Code / Cursor / VS Code |
|
||||
| +--------------------------------+ |
|
||||
| | bugsink-mcp | |
|
||||
| | (MCP Server) | |
|
||||
| | | |
|
||||
| | BUGSINK_URL=http://localhost:8000
|
||||
| | BUGSINK_API_TOKEN=... | |
|
||||
| | BUGSINK_ORG_SLUG=... | |
|
||||
| +--------------------------------+ |
|
||||
+--------------------------------------+
|
||||
```
|
||||
|
||||
## Implementation Status
|
||||
|
||||
### Completed
|
||||
|
||||
- [x] Bugsink installed and configured in dev container
|
||||
- [x] PostgreSQL `bugsink` database and user created
|
||||
- [x] `@sentry/node` SDK integrated in backend (`src/services/sentry.server.ts`)
|
||||
- [x] `@sentry/react` SDK integrated in frontend (`src/services/sentry.client.ts`)
|
||||
- [x] ErrorBoundary component created (`src/components/ErrorBoundary.tsx`)
|
||||
- [x] ErrorBoundary wrapped around app (`src/providers/AppProviders.tsx`)
|
||||
- [x] Logstash pipeline configured for PM2/Pino, Redis, PostgreSQL, NGINX logs
|
||||
- [x] MCP server (`bugsink-mcp`) documented and configured
|
||||
- [x] Environment variables added to `src/config/env.ts` and frontend `src/config.ts`
|
||||
- [x] Browser extension errors filtered in `beforeSend`
|
||||
- [x] 5xx error filtering in backend error handler
|
||||
|
||||
### Recently Completed (2026-01-26)
|
||||
|
||||
- [x] **User context after authentication**: Integrated `setUser()` calls in `AuthProvider.tsx` to associate errors with authenticated users
|
||||
- Called on profile fetch from query (line 44-49)
|
||||
- Called on direct login with profile (line 94-99)
|
||||
- Called on login with profile fetch (line 124-129)
|
||||
- Cleared on logout (line 76-77)
|
||||
- Maps `user_id` → `id`, `email` → `email`, `full_name` → `username`
|
||||
|
||||
This completes the error tracking implementation - all errors are now associated with the authenticated user who encountered them, enabling user-specific error analysis and debugging.
|
||||
|
||||
## Configuration
|
||||
|
||||
### Environment Variables
|
||||
|
||||
| Variable | Description | Default (Dev) |
|
||||
| -------------------- | -------------------------------- | -------------------------- |
|
||||
| `SENTRY_DSN` | Sentry-compatible DSN (backend) | Set after project creation |
|
||||
| `VITE_SENTRY_DSN` | Sentry-compatible DSN (frontend) | Set after project creation |
|
||||
| `SENTRY_ENVIRONMENT` | Environment name | `development` |
|
||||
| `SENTRY_DEBUG` | Enable debug logging | `false` |
|
||||
| `SENTRY_ENABLED` | Enable/disable error reporting | `true` |
|
||||
|
||||
### PostgreSQL Setup
|
||||
|
||||
```sql
|
||||
-- Create dedicated Bugsink database and user
|
||||
CREATE USER bugsink WITH PASSWORD 'bugsink_dev_password';
|
||||
CREATE DATABASE bugsink OWNER bugsink;
|
||||
GRANT ALL PRIVILEGES ON DATABASE bugsink TO bugsink;
|
||||
```
|
||||
|
||||
### Bugsink Configuration
|
||||
|
||||
```bash
|
||||
# Environment variables for Bugsink service
|
||||
SECRET_KEY=<random-50-char-string>
|
||||
DATABASE_URL=postgresql://bugsink:bugsink_dev_password@localhost:5432/bugsink
|
||||
BASE_URL=http://localhost:8000
|
||||
PORT=8000
|
||||
```
|
||||
|
||||
### Logstash Pipeline
|
||||
|
||||
See `docker/logstash/bugsink.conf` for the full pipeline configuration.
|
||||
|
||||
Key routing:
|
||||
|
||||
| Source | Bugsink Project |
|
||||
| --------------- | --------------- |
|
||||
| Backend (Pino) | Backend API |
|
||||
| Worker (Pino) | Backend API |
|
||||
| PostgreSQL logs | Backend API |
|
||||
| Vite logs | Infrastructure |
|
||||
| Redis logs | Infrastructure |
|
||||
| NGINX logs | Infrastructure |
|
||||
| Frontend errors | Frontend |
|
||||
|
||||
## Consequences
|
||||
|
||||
### Positive
|
||||
|
||||
- **Full observability**: Aggregated view of errors and trends
|
||||
- **Self-hosted**: No external SaaS dependencies or subscription costs
|
||||
- **SDK compatibility**: Leverages mature Sentry SDKs with excellent documentation
|
||||
- **AI integration**: MCP server enables Claude Code to query and analyze errors
|
||||
- **Unified architecture**: Same setup works in dev container and production
|
||||
- **Lightweight**: Bugsink runs in a single process, unlike full Sentry (16GB+ RAM)
|
||||
- **Error correlation**: Request IDs allow correlation between frontend errors and backend logs
|
||||
|
||||
### Negative
|
||||
|
||||
- **Additional services**: Bugsink and Logstash add complexity to the container
|
||||
- **PostgreSQL overhead**: Additional database for error tracking
|
||||
- **Initial setup**: Requires configuration of multiple components
|
||||
- **Logstash learning curve**: Pipeline configuration requires Logstash knowledge
|
||||
|
||||
## Alternatives Considered
|
||||
|
||||
1. **Full Sentry self-hosted**: Rejected due to complexity (Kafka, Redis, ClickHouse, 16GB+ RAM minimum)
|
||||
2. **GlitchTip**: Considered, but Bugsink is lighter weight and easier to deploy
|
||||
3. **Sentry SaaS**: Rejected due to self-hosted requirement
|
||||
4. **Custom error aggregation**: Rejected in favor of proven Sentry SDK ecosystem
|
||||
|
||||
## References
|
||||
|
||||
- [Bugsink Documentation](https://www.bugsink.com/docs/)
|
||||
- [Bugsink Docker Install](https://www.bugsink.com/docs/docker-install/)
|
||||
- [@sentry/node Documentation](https://docs.sentry.io/platforms/javascript/guides/node/)
|
||||
- [@sentry/react Documentation](https://docs.sentry.io/platforms/javascript/guides/react/)
|
||||
- [bugsink-mcp](https://github.com/j-shelfwood/bugsink-mcp)
|
||||
- [Logstash Reference](https://www.elastic.co/guide/en/logstash/current/index.html)
|
||||
- [ADR-050: PostgreSQL Function Observability](./0050-postgresql-function-observability.md)
|
||||
- [ADR-056: Application Performance Monitoring](./0056-application-performance-monitoring.md)
|
||||
@@ -2,22 +2,22 @@
|
||||
|
||||
**Date**: 2026-01-09
|
||||
|
||||
**Status**: Partially Implemented
|
||||
**Status**: Accepted (Fully Implemented)
|
||||
|
||||
**Implemented**: 2026-01-09 (Local auth only)
|
||||
**Implemented**: 2026-01-09 (Local auth + JWT), 2026-01-26 (OAuth enabled)
|
||||
|
||||
## Context
|
||||
|
||||
The application requires a secure authentication system that supports both traditional email/password login and social OAuth providers (Google, GitHub). The system must handle user sessions, token refresh, account security (lockout after failed attempts), and integrate seamlessly with the existing Express middleware pipeline.
|
||||
|
||||
Currently, **only local authentication is enabled**. OAuth strategies are fully implemented but commented out, pending configuration of OAuth provider credentials.
|
||||
**All authentication methods are now fully implemented**: Local authentication (email/password), JWT tokens, and OAuth (Google + GitHub). OAuth strategies use conditional registration - they activate automatically when the corresponding environment variables are configured.
|
||||
|
||||
## Decision
|
||||
|
||||
We will implement a stateless JWT-based authentication system with the following components:
|
||||
|
||||
1. **Local Authentication**: Email/password login with bcrypt hashing.
|
||||
2. **OAuth Authentication**: Google and GitHub OAuth 2.0 (currently disabled).
|
||||
2. **OAuth Authentication**: Google and GitHub OAuth 2.0 (conditionally enabled via environment variables).
|
||||
3. **JWT Access Tokens**: Short-lived tokens (15 minutes) for API authentication.
|
||||
4. **Refresh Tokens**: Long-lived tokens (7 days) stored in HTTP-only cookies.
|
||||
5. **Account Security**: Lockout after 5 failed login attempts for 15 minutes.
|
||||
@@ -59,7 +59,7 @@ We will implement a stateless JWT-based authentication system with the following
|
||||
│ │ │ │ │
|
||||
│ │ ┌──────────┐ │ │ │
|
||||
│ └────────>│ OAuth │─────────────┘ │ │
|
||||
│ (disabled) │ Provider │ │ │
|
||||
│ │ Provider │ │ │
|
||||
│ └──────────┘ │ │
|
||||
│ │ │
|
||||
│ ┌──────────┐ ┌──────────┐ │ │
|
||||
@@ -130,72 +130,139 @@ passport.use(
|
||||
- Refresh token: 7 days expiry, 64-byte random hex
|
||||
- Refresh token stored in HTTP-only cookie with `secure` flag in production
|
||||
|
||||
### OAuth Strategies (Disabled)
|
||||
### OAuth Strategies (Conditionally Enabled)
|
||||
|
||||
OAuth strategies are **fully implemented** and activate automatically when the corresponding environment variables are set. The strategies use conditional registration to gracefully handle missing credentials.
|
||||
|
||||
#### Google OAuth
|
||||
|
||||
Located in `src/routes/passport.routes.ts` (lines 167-217, commented):
|
||||
Located in `src/config/passport.ts` (lines 167-235):
|
||||
|
||||
```typescript
|
||||
// passport.use(new GoogleStrategy({
|
||||
// clientID: process.env.GOOGLE_CLIENT_ID!,
|
||||
// clientSecret: process.env.GOOGLE_CLIENT_SECRET!,
|
||||
// callbackURL: '/api/auth/google/callback',
|
||||
// scope: ['profile', 'email']
|
||||
// },
|
||||
// async (accessToken, refreshToken, profile, done) => {
|
||||
// const email = profile.emails?.[0]?.value;
|
||||
// const user = await db.findUserByEmail(email);
|
||||
// if (user) {
|
||||
// return done(null, user);
|
||||
// }
|
||||
// // Create new user with null password_hash
|
||||
// const newUser = await db.createUser(email, null, {
|
||||
// full_name: profile.displayName,
|
||||
// avatar_url: profile.photos?.[0]?.value
|
||||
// });
|
||||
// return done(null, newUser);
|
||||
// }
|
||||
// ));
|
||||
// Only register the strategy if the required environment variables are set.
|
||||
if (process.env.GOOGLE_CLIENT_ID && process.env.GOOGLE_CLIENT_SECRET) {
|
||||
passport.use(
|
||||
new GoogleStrategy(
|
||||
{
|
||||
clientID: process.env.GOOGLE_CLIENT_ID,
|
||||
clientSecret: process.env.GOOGLE_CLIENT_SECRET,
|
||||
callbackURL: '/api/auth/google/callback',
|
||||
scope: ['profile', 'email'],
|
||||
},
|
||||
async (_accessToken, _refreshToken, profile, done) => {
|
||||
const email = profile.emails?.[0]?.value;
|
||||
if (!email) {
|
||||
return done(new Error('No email found in Google profile.'), false);
|
||||
}
|
||||
|
||||
const existingUserProfile = await db.userRepo.findUserWithProfileByEmail(email, logger);
|
||||
if (existingUserProfile) {
|
||||
// User exists, log them in (strip sensitive fields)
|
||||
return done(null, cleanUserProfile);
|
||||
} else {
|
||||
// Create new user with null password_hash for OAuth users
|
||||
const newUserProfile = await db.userRepo.createUser(
|
||||
email,
|
||||
null,
|
||||
{
|
||||
full_name: profile.displayName,
|
||||
avatar_url: profile.photos?.[0]?.value,
|
||||
},
|
||||
logger,
|
||||
);
|
||||
return done(null, newUserProfile);
|
||||
}
|
||||
},
|
||||
),
|
||||
);
|
||||
logger.info('[Passport] Google OAuth strategy registered.');
|
||||
} else {
|
||||
logger.warn('[Passport] Google OAuth strategy NOT registered: credentials not set.');
|
||||
}
|
||||
```
|
||||
|
||||
#### GitHub OAuth
|
||||
|
||||
Located in `src/routes/passport.routes.ts` (lines 219-269, commented):
|
||||
Located in `src/config/passport.ts` (lines 237-310):
|
||||
|
||||
```typescript
|
||||
// passport.use(new GitHubStrategy({
|
||||
// clientID: process.env.GITHUB_CLIENT_ID!,
|
||||
// clientSecret: process.env.GITHUB_CLIENT_SECRET!,
|
||||
// callbackURL: '/api/auth/github/callback',
|
||||
// scope: ['user:email']
|
||||
// },
|
||||
// async (accessToken, refreshToken, profile, done) => {
|
||||
// const email = profile.emails?.[0]?.value;
|
||||
// // Similar flow to Google OAuth
|
||||
// }
|
||||
// ));
|
||||
// Only register the strategy if the required environment variables are set.
|
||||
if (process.env.GITHUB_CLIENT_ID && process.env.GITHUB_CLIENT_SECRET) {
|
||||
passport.use(
|
||||
new GitHubStrategy(
|
||||
{
|
||||
clientID: process.env.GITHUB_CLIENT_ID,
|
||||
clientSecret: process.env.GITHUB_CLIENT_SECRET,
|
||||
callbackURL: '/api/auth/github/callback',
|
||||
scope: ['user:email'],
|
||||
},
|
||||
async (_accessToken, _refreshToken, profile, done) => {
|
||||
const email = profile.emails?.[0]?.value;
|
||||
if (!email) {
|
||||
return done(new Error('No public email found in GitHub profile.'), false);
|
||||
}
|
||||
// Same flow as Google OAuth - find or create user
|
||||
},
|
||||
),
|
||||
);
|
||||
logger.info('[Passport] GitHub OAuth strategy registered.');
|
||||
} else {
|
||||
logger.warn('[Passport] GitHub OAuth strategy NOT registered: credentials not set.');
|
||||
}
|
||||
```
|
||||
|
||||
#### OAuth Routes (Disabled)
|
||||
#### OAuth Routes (Active)
|
||||
|
||||
Located in `src/routes/auth.routes.ts` (lines 289-315, commented):
|
||||
Located in `src/routes/auth.routes.ts` (lines 587-609):
|
||||
|
||||
```typescript
|
||||
// const handleOAuthCallback = (req, res) => {
|
||||
// const user = req.user;
|
||||
// const accessToken = jwt.sign(payload, JWT_SECRET, { expiresIn: '15m' });
|
||||
// const refreshToken = crypto.randomBytes(64).toString('hex');
|
||||
//
|
||||
// await db.saveRefreshToken(user.user_id, refreshToken);
|
||||
// res.cookie('refreshToken', refreshToken, { httpOnly: true, secure: true });
|
||||
// res.redirect(`${FRONTEND_URL}/auth/callback?token=${accessToken}`);
|
||||
// };
|
||||
// Google OAuth routes
|
||||
router.get('/google', passport.authenticate('google', { session: false }));
|
||||
router.get(
|
||||
'/google/callback',
|
||||
passport.authenticate('google', {
|
||||
session: false,
|
||||
failureRedirect: '/?error=google_auth_failed',
|
||||
}),
|
||||
createOAuthCallbackHandler('google'),
|
||||
);
|
||||
|
||||
// router.get('/google', passport.authenticate('google', { session: false }));
|
||||
// router.get('/google/callback', passport.authenticate('google', { ... }), handleOAuthCallback);
|
||||
// router.get('/github', passport.authenticate('github', { session: false }));
|
||||
// router.get('/github/callback', passport.authenticate('github', { ... }), handleOAuthCallback);
|
||||
// GitHub OAuth routes
|
||||
router.get('/github', passport.authenticate('github', { session: false }));
|
||||
router.get(
|
||||
'/github/callback',
|
||||
passport.authenticate('github', {
|
||||
session: false,
|
||||
failureRedirect: '/?error=github_auth_failed',
|
||||
}),
|
||||
createOAuthCallbackHandler('github'),
|
||||
);
|
||||
```
|
||||
|
||||
#### OAuth Callback Handler
|
||||
|
||||
The callback handler generates tokens and redirects to the frontend:
|
||||
|
||||
```typescript
|
||||
const createOAuthCallbackHandler = (provider: 'google' | 'github') => {
|
||||
return async (req: Request, res: Response) => {
|
||||
const userProfile = req.user as UserProfile;
|
||||
const { accessToken, refreshToken } = await authService.handleSuccessfulLogin(
|
||||
userProfile,
|
||||
req.log,
|
||||
);
|
||||
|
||||
res.cookie('refreshToken', refreshToken, {
|
||||
httpOnly: true,
|
||||
secure: process.env.NODE_ENV === 'production',
|
||||
maxAge: 30 * 24 * 60 * 60 * 1000, // 30 days
|
||||
});
|
||||
|
||||
// Redirect to frontend with provider-specific token param
|
||||
const tokenParam = provider === 'google' ? 'googleAuthToken' : 'githubAuthToken';
|
||||
res.redirect(`${process.env.FRONTEND_URL}/?${tokenParam}=${accessToken}`);
|
||||
};
|
||||
};
|
||||
```
|
||||
|
||||
### Database Schema
|
||||
@@ -248,11 +315,13 @@ export const mockAuth = (req, res, next) => {
|
||||
};
|
||||
```
|
||||
|
||||
## Enabling OAuth
|
||||
## Configuring OAuth Providers
|
||||
|
||||
OAuth is fully implemented and activates automatically when credentials are provided. No code changes are required.
|
||||
|
||||
### Step 1: Set Environment Variables
|
||||
|
||||
Add to `.env`:
|
||||
Add to your environment (`.env.local` for development, Gitea secrets for production):
|
||||
|
||||
```bash
|
||||
# Google OAuth
|
||||
@@ -283,54 +352,29 @@ GITHUB_CLIENT_SECRET=your-github-client-secret
|
||||
- Development: `http://localhost:3001/api/auth/github/callback`
|
||||
- Production: `https://your-domain.com/api/auth/github/callback`
|
||||
|
||||
### Step 3: Uncomment Backend Code
|
||||
### Step 3: Restart the Application
|
||||
|
||||
**In `src/routes/passport.routes.ts`**:
|
||||
After setting the environment variables, restart PM2:
|
||||
|
||||
1. Uncomment import statements (lines 5-6):
|
||||
|
||||
```typescript
|
||||
import { Strategy as GoogleStrategy } from 'passport-google-oauth20';
|
||||
import { Strategy as GitHubStrategy } from 'passport-github2';
|
||||
```
|
||||
|
||||
2. Uncomment Google strategy (lines 167-217)
|
||||
3. Uncomment GitHub strategy (lines 219-269)
|
||||
|
||||
**In `src/routes/auth.routes.ts`**:
|
||||
|
||||
1. Uncomment `handleOAuthCallback` function (lines 291-309)
|
||||
2. Uncomment OAuth routes (lines 311-315)
|
||||
|
||||
### Step 4: Add Frontend OAuth Buttons
|
||||
|
||||
Create login buttons that redirect to:
|
||||
|
||||
- Google: `GET /api/auth/google`
|
||||
- GitHub: `GET /api/auth/github`
|
||||
|
||||
Handle callback at `/auth/callback?token=<accessToken>`:
|
||||
|
||||
1. Extract token from URL
|
||||
2. Store in client-side token storage
|
||||
3. Redirect to dashboard
|
||||
|
||||
### Step 5: Handle OAuth Callback Page
|
||||
|
||||
Create `src/pages/AuthCallback.tsx`:
|
||||
|
||||
```typescript
|
||||
const AuthCallback = () => {
|
||||
const token = new URLSearchParams(location.search).get('token');
|
||||
if (token) {
|
||||
setToken(token);
|
||||
navigate('/dashboard');
|
||||
} else {
|
||||
navigate('/login?error=auth_failed');
|
||||
}
|
||||
};
|
||||
```bash
|
||||
podman exec -it flyer-crawler-dev pm2 restart all
|
||||
```
|
||||
|
||||
The Passport configuration will automatically register the OAuth strategies when it detects the credentials. Check the logs for confirmation:
|
||||
|
||||
```text
|
||||
[Passport] Google OAuth strategy registered.
|
||||
[Passport] GitHub OAuth strategy registered.
|
||||
```
|
||||
|
||||
### Frontend Integration
|
||||
|
||||
OAuth login buttons are implemented in `src/client/pages/AuthView.tsx`. The frontend:
|
||||
|
||||
1. Redirects users to `/api/auth/google` or `/api/auth/github`
|
||||
2. Handles the callback via the `useAppInitialization` hook which looks for `googleAuthToken` or `githubAuthToken` query parameters
|
||||
3. Stores the token and redirects to the dashboard
|
||||
|
||||
## Known Limitations
|
||||
|
||||
1. **No OAuth Provider ID Mapping**: Users are identified by email only. If a user has accounts with different emails on Google and GitHub, they create separate accounts.
|
||||
@@ -372,31 +416,32 @@ const AuthCallback = () => {
|
||||
- **Stateless Architecture**: No session storage required; scales horizontally.
|
||||
- **Secure by Default**: HTTP-only cookies, short token expiry, bcrypt hashing.
|
||||
- **Account Protection**: Lockout prevents brute-force attacks.
|
||||
- **Flexible OAuth**: Can enable/disable OAuth without code changes (just env vars + uncommenting).
|
||||
- **Graceful Degradation**: System works with local auth only.
|
||||
- **Flexible OAuth**: OAuth activates automatically when credentials are set - no code changes needed.
|
||||
- **Graceful Degradation**: System works with local auth only when OAuth credentials are not configured.
|
||||
- **Full Feature Set**: Both local and OAuth authentication are production-ready.
|
||||
|
||||
### Negative
|
||||
|
||||
- **OAuth Disabled by Default**: Requires manual uncommenting to enable.
|
||||
- **No Account Linking**: Multiple OAuth providers create separate accounts.
|
||||
- **Frontend Work Required**: OAuth login buttons don't exist yet.
|
||||
- **Token in URL**: OAuth callback passes token in URL (visible in browser history).
|
||||
- **No Account Linking**: Multiple OAuth providers create separate accounts if emails differ.
|
||||
- **Token in URL**: OAuth callback passes token in URL query parameter (visible in browser history).
|
||||
- **Email-Based Identity**: OAuth users are identified by email only, not provider-specific IDs.
|
||||
|
||||
### Mitigation
|
||||
|
||||
- Document OAuth enablement steps clearly (see [../architecture/AUTHENTICATION.md](../architecture/AUTHENTICATION.md)).
|
||||
- Document OAuth configuration steps clearly (see [../architecture/AUTHENTICATION.md](../architecture/AUTHENTICATION.md)).
|
||||
- Consider adding OAuth provider ID columns for future account linking.
|
||||
- Use URL fragment (`#token=`) instead of query parameter for callback.
|
||||
- Consider using URL fragment (`#token=`) instead of query parameter for callback in future enhancement.
|
||||
|
||||
## Key Files
|
||||
|
||||
| File | Purpose |
|
||||
| ------------------------------------------------------ | ------------------------------------------------ |
|
||||
| `src/routes/passport.routes.ts` | Passport strategies (local, JWT, OAuth) |
|
||||
| `src/config/passport.ts` | Passport strategies (local, JWT, OAuth) |
|
||||
| `src/routes/auth.routes.ts` | Auth endpoints (login, register, refresh, OAuth) |
|
||||
| `src/services/authService.ts` | Auth business logic |
|
||||
| `src/services/db/user.db.ts` | User database operations |
|
||||
| `src/config/env.ts` | Environment variable validation |
|
||||
| `src/client/pages/AuthView.tsx` | Frontend login/register UI with OAuth buttons |
|
||||
| [AUTHENTICATION.md](../architecture/AUTHENTICATION.md) | OAuth setup guide |
|
||||
| `.env.example` | Environment variable template |
|
||||
|
||||
@@ -409,11 +454,11 @@ const AuthCallback = () => {
|
||||
|
||||
## Future Enhancements
|
||||
|
||||
1. **Enable OAuth**: Uncomment strategies and configure providers.
|
||||
2. **Add OAuth Provider Mapping Table**: Store `googleId`, `githubId` for account linking.
|
||||
3. **Implement Account Linking**: Allow users to connect multiple OAuth providers.
|
||||
4. **Add Password to OAuth Users**: Allow OAuth users to set a password.
|
||||
5. **Implement PKCE**: Add PKCE flow for enhanced OAuth security.
|
||||
6. **Token in Fragment**: Use URL fragment for OAuth callback token.
|
||||
7. **OAuth Token Storage**: Store OAuth refresh tokens for provider API access.
|
||||
8. **Magic Link Login**: Add passwordless email login option.
|
||||
1. **Add OAuth Provider Mapping Table**: Store `googleId`, `githubId` for account linking.
|
||||
2. **Implement Account Linking**: Allow users to connect multiple OAuth providers.
|
||||
3. **Add Password to OAuth Users**: Allow OAuth users to set a password for local login.
|
||||
4. **Implement PKCE**: Add PKCE flow for enhanced OAuth security.
|
||||
5. **Token in Fragment**: Use URL fragment for OAuth callback token instead of query parameter.
|
||||
6. **OAuth Token Storage**: Store OAuth refresh tokens for provider API access.
|
||||
7. **Magic Link Login**: Add passwordless email login option.
|
||||
8. **Additional OAuth Providers**: Support for Apple, Microsoft, or other providers.
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
**Date**: 2026-01-11
|
||||
|
||||
**Status**: Proposed
|
||||
**Status**: Accepted (Fully Implemented)
|
||||
|
||||
**Related**: [ADR-015](0015-application-performance-monitoring-and-error-tracking.md), [ADR-004](0004-standardized-application-wide-structured-logging.md)
|
||||
|
||||
|
||||
@@ -2,7 +2,9 @@
|
||||
|
||||
**Date**: 2026-01-11
|
||||
|
||||
**Status**: Proposed
|
||||
**Status**: Accepted (Fully Implemented)
|
||||
|
||||
**Related**: [ADR-004](0004-standardized-application-wide-structured-logging.md)
|
||||
|
||||
## Context
|
||||
|
||||
@@ -17,7 +19,9 @@ We will adopt a namespace-based debug filter pattern, similar to the `debug` npm
|
||||
|
||||
## Implementation
|
||||
|
||||
In `src/services/logger.server.ts`:
|
||||
### Core Implementation (Completed 2026-01-11)
|
||||
|
||||
Implemented in [src/services/logger.server.ts:140-150](src/services/logger.server.ts#L140-L150):
|
||||
|
||||
```typescript
|
||||
const debugModules = (process.env.DEBUG_MODULES || '').split(',').map((s) => s.trim());
|
||||
@@ -33,10 +37,100 @@ export const createScopedLogger = (moduleName: string) => {
|
||||
};
|
||||
```
|
||||
|
||||
### Adopted Services (Completed 2026-01-26)
|
||||
|
||||
Services currently using `createScopedLogger`:
|
||||
|
||||
- `ai-service` - AI/Gemini integration ([src/services/aiService.server.ts:1020](src/services/aiService.server.ts#L1020))
|
||||
- `flyer-processing-service` - Flyer upload and processing ([src/services/flyerProcessingService.server.ts:20](src/services/flyerProcessingService.server.ts#L20))
|
||||
|
||||
## Usage
|
||||
|
||||
To debug only AI and Database interactions:
|
||||
### Enable Debug Logging for Specific Modules
|
||||
|
||||
To debug only AI and flyer processing:
|
||||
|
||||
```bash
|
||||
DEBUG_MODULES=ai-service,db-repo npm run dev
|
||||
DEBUG_MODULES=ai-service,flyer-processing-service npm run dev
|
||||
```
|
||||
|
||||
### Enable All Debug Logging
|
||||
|
||||
Use wildcard to enable debug logging for all modules:
|
||||
|
||||
```bash
|
||||
DEBUG_MODULES=* npm run dev
|
||||
```
|
||||
|
||||
### Common Module Names
|
||||
|
||||
| Module Name | Purpose | File |
|
||||
| -------------------------- | ---------------------------------------- | ----------------------------------------------- |
|
||||
| `ai-service` | AI/Gemini API interactions | `src/services/aiService.server.ts` |
|
||||
| `flyer-processing-service` | Flyer upload, validation, and processing | `src/services/flyerProcessingService.server.ts` |
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Use Scoped Loggers for Long-Running Services**: Services with complex workflows or external API calls should use `createScopedLogger` to allow targeted debugging.
|
||||
|
||||
2. **Use Child Loggers for Contextual Data**: Even within scoped loggers, create child loggers with job/request-specific context:
|
||||
|
||||
```typescript
|
||||
const logger = createScopedLogger('my-service');
|
||||
|
||||
async function processJob(job: Job) {
|
||||
const jobLogger = logger.child({ jobId: job.id, jobName: job.name });
|
||||
jobLogger.debug('Starting job processing');
|
||||
}
|
||||
```
|
||||
|
||||
3. **Module Naming Convention**: Use kebab-case suffixed with `-service` or `-worker` (e.g., `ai-service`, `email-worker`).
|
||||
|
||||
4. **Production Usage**: `DEBUG_MODULES` can be set in production for temporary debugging, but should not be used continuously due to increased log volume.
|
||||
|
||||
## Examples
|
||||
|
||||
### Development Debugging
|
||||
|
||||
Debug AI service issues during development:
|
||||
|
||||
```bash
|
||||
# Dev container
|
||||
DEBUG_MODULES=ai-service npm run dev
|
||||
|
||||
# Or via PM2
|
||||
DEBUG_MODULES=ai-service pm2 restart flyer-crawler-api-dev
|
||||
```
|
||||
|
||||
### Production Troubleshooting
|
||||
|
||||
Temporarily enable debug logging for a specific subsystem:
|
||||
|
||||
```bash
|
||||
# SSH into production server
|
||||
ssh root@projectium.com
|
||||
|
||||
# Set environment variable and restart
|
||||
DEBUG_MODULES=ai-service pm2 restart flyer-crawler-api
|
||||
|
||||
# View logs
|
||||
pm2 logs flyer-crawler-api --lines 100
|
||||
|
||||
# Disable debug logging
|
||||
pm2 unset DEBUG_MODULES flyer-crawler-api
|
||||
pm2 restart flyer-crawler-api
|
||||
```
|
||||
|
||||
## Consequences
|
||||
|
||||
**Positive**:
|
||||
|
||||
- Developers can inspect detailed logs for specific subsystems without log flooding
|
||||
- Production debugging becomes more targeted and efficient
|
||||
- No performance impact when debug logging is disabled
|
||||
- Compatible with existing Pino logging infrastructure
|
||||
|
||||
**Negative**:
|
||||
|
||||
- Requires developers to know module names (mitigated by documentation above)
|
||||
- Not all services have adopted scoped loggers yet (gradual migration)
|
||||
|
||||
@@ -2,7 +2,14 @@
|
||||
|
||||
**Date**: 2026-01-11
|
||||
|
||||
**Status**: Proposed
|
||||
**Status**: Accepted (Fully Implemented)
|
||||
|
||||
**Implementation Status**:
|
||||
|
||||
- ✅ BullMQ worker stall configuration (complete)
|
||||
- ✅ Basic health endpoints (/live, /ready, /redis, etc.)
|
||||
- ✅ /health/queues endpoint (complete)
|
||||
- ✅ Worker heartbeat mechanism (complete)
|
||||
|
||||
## Context
|
||||
|
||||
@@ -60,3 +67,76 @@ The `/health/queues` endpoint will:
|
||||
**Negative**:
|
||||
|
||||
- Requires configuring external monitoring to poll the new endpoint.
|
||||
|
||||
## Implementation Notes
|
||||
|
||||
### Completed (2026-01-11)
|
||||
|
||||
1. **BullMQ Stall Configuration** - `src/config/workerOptions.ts`
|
||||
- All workers use `defaultWorkerOptions` with:
|
||||
- `stalledInterval: 30000` (30s)
|
||||
- `maxStalledCount: 3`
|
||||
- `lockDuration: 30000` (30s)
|
||||
- Applied to all 9 workers: flyer, email, analytics, cleanup, weekly-analytics, token-cleanup, receipt, expiry-alert, barcode
|
||||
|
||||
2. **Basic Health Endpoints** - `src/routes/health.routes.ts`
|
||||
- `/health/live` - Liveness probe
|
||||
- `/health/ready` - Readiness probe (checks DB, Redis, storage)
|
||||
- `/health/startup` - Startup probe
|
||||
- `/health/redis` - Redis connectivity
|
||||
- `/health/db-pool` - Database connection pool status
|
||||
|
||||
### Implementation Completed (2026-01-26)
|
||||
|
||||
1. **`/health/queues` Endpoint** ✅
|
||||
- Added route to `src/routes/health.routes.ts:511-674`
|
||||
- Iterates through all 9 queues from `src/services/queues.server.ts`
|
||||
- Fetches job counts using BullMQ Queue API: `getJobCounts()`
|
||||
- Returns structured response including both queue metrics and worker heartbeats:
|
||||
|
||||
```typescript
|
||||
{
|
||||
status: 'healthy' | 'unhealthy',
|
||||
timestamp: string,
|
||||
queues: {
|
||||
[queueName]: {
|
||||
waiting: number,
|
||||
active: number,
|
||||
failed: number,
|
||||
delayed: number
|
||||
}
|
||||
},
|
||||
workers: {
|
||||
[workerName]: {
|
||||
alive: boolean,
|
||||
lastSeen?: string,
|
||||
pid?: number,
|
||||
host?: string
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
- Returns 200 OK if all healthy, 503 if any queue/worker unavailable
|
||||
- Full OpenAPI documentation included
|
||||
|
||||
2. **Worker Heartbeat Mechanism** ✅
|
||||
- Added `updateWorkerHeartbeat()` and `startWorkerHeartbeat()` in `src/services/workers.server.ts:100-149`
|
||||
- Key pattern: `worker:heartbeat:<worker-name>`
|
||||
- Stores: `{ timestamp: ISO8601, pid: number, host: string }`
|
||||
- Updates every 30s with 90s TTL
|
||||
- Integrated with `/health/queues` endpoint (checks if heartbeat < 60s old)
|
||||
- Heartbeat intervals properly cleaned up in `closeWorkers()` and `gracefulShutdown()`
|
||||
|
||||
3. **Comprehensive Tests** ✅
|
||||
- Added 5 test cases in `src/routes/health.routes.test.ts:623-858`
|
||||
- Tests cover: healthy state, queue failures, stale heartbeats, missing heartbeats, Redis errors
|
||||
- All tests follow existing patterns with proper mocking
|
||||
|
||||
### Future Enhancements (Not Implemented)
|
||||
|
||||
1. **Queue Depth Alerting** (Low Priority)
|
||||
- Add configurable thresholds per queue type
|
||||
- Return 500 if `waiting` count exceeds threshold for extended period
|
||||
- Consider using Redis for storing threshold breach timestamps
|
||||
- **Estimate**: 1-2 hours
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# ADR-023: Database Normalization and Referential Integrity
|
||||
# ADR-055: Database Normalization and Referential Integrity
|
||||
|
||||
**Date:** 2026-01-19
|
||||
**Status:** Accepted
|
||||
262
docs/adr/0056-application-performance-monitoring.md
Normal file
262
docs/adr/0056-application-performance-monitoring.md
Normal file
@@ -0,0 +1,262 @@
|
||||
# ADR-056: Application Performance Monitoring (APM)
|
||||
|
||||
**Date**: 2026-01-26
|
||||
|
||||
**Status**: Proposed
|
||||
|
||||
**Related**: [ADR-015](./0015-error-tracking-and-observability.md) (Error Tracking and Observability)
|
||||
|
||||
## Context
|
||||
|
||||
Application Performance Monitoring (APM) provides visibility into application behavior through:
|
||||
|
||||
- **Distributed Tracing**: Track requests across services, queues, and database calls
|
||||
- **Performance Metrics**: Response times, throughput, error rates
|
||||
- **Resource Monitoring**: Memory usage, CPU, database connections
|
||||
- **Transaction Analysis**: Identify slow endpoints and bottlenecks
|
||||
|
||||
While ADR-015 covers error tracking and observability, APM is a distinct concern focused on performance rather than errors. The Sentry SDK supports APM through its tracing features, but this capability is currently **intentionally disabled** in our application.
|
||||
|
||||
### Current State
|
||||
|
||||
The Sentry SDK is installed and configured for error tracking (see ADR-015), but APM features are disabled:
|
||||
|
||||
```typescript
|
||||
// src/services/sentry.client.ts
|
||||
Sentry.init({
|
||||
dsn: config.sentry.dsn,
|
||||
environment: config.sentry.environment,
|
||||
// Performance monitoring - disabled for now to keep it simple
|
||||
tracesSampleRate: 0,
|
||||
// ...
|
||||
});
|
||||
```
|
||||
|
||||
```typescript
|
||||
// src/services/sentry.server.ts
|
||||
Sentry.init({
|
||||
dsn: config.sentry.dsn,
|
||||
environment: config.sentry.environment || config.server.nodeEnv,
|
||||
// Performance monitoring - disabled for now to keep it simple
|
||||
tracesSampleRate: 0,
|
||||
// ...
|
||||
});
|
||||
```
|
||||
|
||||
### Why APM is Currently Disabled
|
||||
|
||||
1. **Complexity**: APM adds overhead and complexity to debugging
|
||||
2. **Bugsink Limitations**: Bugsink's APM support is less mature than its error tracking
|
||||
3. **Resource Overhead**: Tracing adds memory and CPU overhead
|
||||
4. **Focus**: Error tracking provides more immediate value for our current scale
|
||||
5. **Cost**: High sample rates can significantly increase storage requirements
|
||||
|
||||
## Decision
|
||||
|
||||
We propose a **staged approach** to APM implementation:
|
||||
|
||||
### Phase 1: Selective Backend Tracing (Low Priority)
|
||||
|
||||
Enable tracing for specific high-value operations:
|
||||
|
||||
```typescript
|
||||
// Enable tracing for specific transactions only
|
||||
Sentry.init({
|
||||
dsn: config.sentry.dsn,
|
||||
tracesSampleRate: 0, // Keep default at 0
|
||||
|
||||
// Trace only specific high-value transactions
|
||||
tracesSampler: (samplingContext) => {
|
||||
const transactionName = samplingContext.transactionContext?.name;
|
||||
|
||||
// Always trace flyer processing jobs
|
||||
if (transactionName?.includes('flyer-processing')) {
|
||||
return 0.1; // 10% sample rate
|
||||
}
|
||||
|
||||
// Always trace AI/Gemini calls
|
||||
if (transactionName?.includes('gemini')) {
|
||||
return 0.5; // 50% sample rate
|
||||
}
|
||||
|
||||
// Trace slow endpoints (determined by custom logic)
|
||||
if (samplingContext.parentSampled) {
|
||||
return 0.1; // 10% for child transactions
|
||||
}
|
||||
|
||||
return 0; // Don't trace other transactions
|
||||
},
|
||||
});
|
||||
```
|
||||
|
||||
### Phase 2: Custom Performance Metrics
|
||||
|
||||
Add custom metrics without full tracing overhead:
|
||||
|
||||
```typescript
|
||||
// Custom metric for slow database queries
|
||||
import { metrics } from '@sentry/node';
|
||||
|
||||
// In repository methods
|
||||
const startTime = performance.now();
|
||||
const result = await pool.query(sql, params);
|
||||
const duration = performance.now() - startTime;
|
||||
|
||||
metrics.distribution('db.query.duration', duration, {
|
||||
tags: { query_type: 'select', table: 'flyers' },
|
||||
});
|
||||
|
||||
if (duration > 1000) {
|
||||
logger.warn({ duration, sql }, 'Slow query detected');
|
||||
}
|
||||
```
|
||||
|
||||
### Phase 3: Full APM Integration (Future)
|
||||
|
||||
When/if full APM is needed:
|
||||
|
||||
```typescript
|
||||
Sentry.init({
|
||||
dsn: config.sentry.dsn,
|
||||
tracesSampleRate: 0.1, // 10% of transactions
|
||||
profilesSampleRate: 0.1, // 10% of traced transactions get profiled
|
||||
|
||||
integrations: [
|
||||
// Database tracing
|
||||
Sentry.postgresIntegration(),
|
||||
// Redis tracing
|
||||
Sentry.redisIntegration(),
|
||||
// BullMQ job tracing
|
||||
Sentry.prismaIntegration(), // or custom BullMQ integration
|
||||
],
|
||||
});
|
||||
```
|
||||
|
||||
## Implementation Steps
|
||||
|
||||
### To Enable Basic APM
|
||||
|
||||
1. **Update Sentry Configuration**:
|
||||
- Set `tracesSampleRate` > 0 in `src/services/sentry.server.ts`
|
||||
- Set `tracesSampleRate` > 0 in `src/services/sentry.client.ts`
|
||||
- Add environment variable `SENTRY_TRACES_SAMPLE_RATE` (default: 0)
|
||||
|
||||
2. **Add Instrumentation**:
|
||||
- Enable automatic Express instrumentation
|
||||
- Add manual spans for BullMQ job processing
|
||||
- Add database query instrumentation
|
||||
|
||||
3. **Frontend Tracing**:
|
||||
- Add Browser Tracing integration
|
||||
- Configure page load and navigation tracing
|
||||
|
||||
4. **Environment Variables**:
|
||||
|
||||
```bash
|
||||
SENTRY_TRACES_SAMPLE_RATE=0.1 # 10% sampling
|
||||
SENTRY_PROFILES_SAMPLE_RATE=0 # Profiling disabled
|
||||
```
|
||||
|
||||
5. **Bugsink Configuration**:
|
||||
- Verify Bugsink supports performance data ingestion
|
||||
- Configure retention policies for performance data
|
||||
|
||||
### Configuration Changes Required
|
||||
|
||||
```typescript
|
||||
// src/config/env.ts - Add new config
|
||||
sentry: {
|
||||
dsn: env.SENTRY_DSN,
|
||||
environment: env.SENTRY_ENVIRONMENT,
|
||||
debug: env.SENTRY_DEBUG === 'true',
|
||||
tracesSampleRate: parseFloat(env.SENTRY_TRACES_SAMPLE_RATE || '0'),
|
||||
profilesSampleRate: parseFloat(env.SENTRY_PROFILES_SAMPLE_RATE || '0'),
|
||||
},
|
||||
```
|
||||
|
||||
```typescript
|
||||
// src/services/sentry.server.ts - Updated init
|
||||
Sentry.init({
|
||||
dsn: config.sentry.dsn,
|
||||
environment: config.sentry.environment,
|
||||
tracesSampleRate: config.sentry.tracesSampleRate,
|
||||
profilesSampleRate: config.sentry.profilesSampleRate,
|
||||
// ... rest of config
|
||||
});
|
||||
```
|
||||
|
||||
## Trade-offs
|
||||
|
||||
### Enabling APM
|
||||
|
||||
**Benefits**:
|
||||
|
||||
- Identify performance bottlenecks
|
||||
- Track distributed transactions across services
|
||||
- Profile slow endpoints
|
||||
- Monitor resource utilization trends
|
||||
|
||||
**Costs**:
|
||||
|
||||
- Increased memory usage (~5-15% overhead)
|
||||
- Additional CPU for trace processing
|
||||
- Increased storage in Bugsink/Sentry
|
||||
- More complex debugging (noise in traces)
|
||||
- Potential latency from tracing overhead
|
||||
|
||||
### Keeping APM Disabled
|
||||
|
||||
**Benefits**:
|
||||
|
||||
- Simpler operation and debugging
|
||||
- Lower resource overhead
|
||||
- Focused on error tracking (higher priority)
|
||||
- No additional storage costs
|
||||
|
||||
**Costs**:
|
||||
|
||||
- No automated performance insights
|
||||
- Manual profiling required for bottleneck detection
|
||||
- Limited visibility into slow transactions
|
||||
|
||||
## Alternatives Considered
|
||||
|
||||
1. **OpenTelemetry**: More vendor-neutral, but adds another dependency and complexity
|
||||
2. **Prometheus + Grafana**: Good for metrics, but doesn't provide distributed tracing
|
||||
3. **Jaeger/Zipkin**: Purpose-built for tracing, but requires additional infrastructure
|
||||
4. **New Relic/Datadog SaaS**: Full-featured but conflicts with self-hosted requirement
|
||||
|
||||
## Current Recommendation
|
||||
|
||||
**Keep APM disabled** (`tracesSampleRate: 0`) until:
|
||||
|
||||
1. Specific performance issues are identified that require tracing
|
||||
2. Bugsink's APM support is verified and tested
|
||||
3. Infrastructure can support the additional overhead
|
||||
4. There is a clear business need for performance visibility
|
||||
|
||||
When enabling APM becomes necessary, start with Phase 1 (selective tracing) to minimize overhead while gaining targeted insights.
|
||||
|
||||
## Consequences
|
||||
|
||||
### Positive (When Implemented)
|
||||
|
||||
- Automated identification of slow endpoints
|
||||
- Distributed trace visualization across async operations
|
||||
- Correlation between errors and performance issues
|
||||
- Proactive alerting on performance degradation
|
||||
|
||||
### Negative
|
||||
|
||||
- Additional infrastructure complexity
|
||||
- Storage overhead for trace data
|
||||
- Potential performance impact from tracing itself
|
||||
- Learning curve for trace analysis
|
||||
|
||||
## References
|
||||
|
||||
- [Sentry Performance Monitoring](https://docs.sentry.io/product/performance/)
|
||||
- [@sentry/node Performance](https://docs.sentry.io/platforms/javascript/guides/node/performance/)
|
||||
- [@sentry/react Performance](https://docs.sentry.io/platforms/javascript/guides/react/performance/)
|
||||
- [OpenTelemetry](https://opentelemetry.io/) (alternative approach)
|
||||
- [ADR-015: Error Tracking and Observability](./0015-error-tracking-and-observability.md)
|
||||
@@ -15,9 +15,9 @@ This document tracks the implementation status and estimated effort for all Arch
|
||||
|
||||
| Status | Count |
|
||||
| ---------------------------- | ----- |
|
||||
| Accepted (Fully Implemented) | 30 |
|
||||
| Accepted (Fully Implemented) | 39 |
|
||||
| Partially Implemented | 2 |
|
||||
| Proposed (Not Started) | 16 |
|
||||
| Proposed (Not Started) | 15 |
|
||||
|
||||
---
|
||||
|
||||
@@ -49,7 +49,7 @@ This document tracks the implementation status and estimated effort for all Arch
|
||||
| [ADR-003](./0003-standardized-input-validation-using-middleware.md) | Input Validation | Accepted | - | Fully implemented |
|
||||
| [ADR-008](./0008-api-versioning-strategy.md) | API Versioning | Proposed | L | Major URL/routing changes |
|
||||
| [ADR-018](./0018-api-documentation-strategy.md) | API Documentation | Accepted | - | OpenAPI/Swagger implemented |
|
||||
| [ADR-022](./0022-real-time-notification-system.md) | Real-time Notifications | Proposed | XL | WebSocket infrastructure |
|
||||
| [ADR-022](./0022-real-time-notification-system.md) | Real-time Notifications | Accepted | - | Fully implemented |
|
||||
| [ADR-028](./0028-api-response-standardization.md) | Response Standardization | Implemented | L | Completed (routes, middleware, tests) |
|
||||
|
||||
### Category 4: Security & Compliance
|
||||
@@ -62,25 +62,31 @@ This document tracks the implementation status and estimated effort for all Arch
|
||||
| [ADR-029](./0029-secret-rotation-and-key-management.md) | Secret Rotation | Proposed | L | Infrastructure changes needed |
|
||||
| [ADR-032](./0032-rate-limiting-strategy.md) | Rate Limiting | Accepted | - | Fully implemented |
|
||||
| [ADR-033](./0033-file-upload-and-storage-strategy.md) | File Upload & Storage | Accepted | - | Fully implemented |
|
||||
| [ADR-048](./0048-authentication-strategy.md) | Authentication | Accepted | - | Fully implemented |
|
||||
|
||||
### Category 5: Observability & Monitoring
|
||||
|
||||
| ADR | Title | Status | Effort | Notes |
|
||||
| -------------------------------------------------------------------------- | --------------------------- | -------- | ------ | --------------------------------- |
|
||||
| [ADR-004](./0004-standardized-application-wide-structured-logging.md) | Structured Logging | Accepted | - | Fully implemented |
|
||||
| [ADR-015](./0015-application-performance-monitoring-and-error-tracking.md) | APM & Error Tracking | Proposed | M | Third-party integration |
|
||||
| [ADR-050](./0050-postgresql-function-observability.md) | PostgreSQL Fn Observability | Proposed | M | Depends on ADR-015 implementation |
|
||||
| ADR | Title | Status | Effort | Notes |
|
||||
| --------------------------------------------------------------------- | --------------------------- | -------- | ------ | ------------------------------------------ |
|
||||
| [ADR-004](./0004-standardized-application-wide-structured-logging.md) | Structured Logging | Accepted | - | Fully implemented |
|
||||
| [ADR-015](./0015-error-tracking-and-observability.md) | Error Tracking | Accepted | - | Fully implemented |
|
||||
| [ADR-050](./0050-postgresql-function-observability.md) | PostgreSQL Fn Observability | Accepted | - | Fully implemented |
|
||||
| [ADR-051](./0051-asynchronous-context-propagation.md) | Context Propagation | Accepted | - | Fully implemented |
|
||||
| [ADR-052](./0052-granular-debug-logging-strategy.md) | Granular Debug Logging | Accepted | - | Fully implemented |
|
||||
| [ADR-056](./0056-application-performance-monitoring.md) | APM (Performance) | Proposed | M | tracesSampleRate=0, intentionally disabled |
|
||||
|
||||
### Category 6: Deployment & Operations
|
||||
|
||||
| ADR | Title | Status | Effort | Notes |
|
||||
| -------------------------------------------------------------- | ----------------- | -------- | ------ | -------------------------- |
|
||||
| [ADR-006](./0006-background-job-processing-and-task-queues.md) | Background Jobs | Accepted | - | Fully implemented |
|
||||
| [ADR-014](./0014-containerization-and-deployment-strategy.md) | Containerization | Partial | M | Docker done, K8s pending |
|
||||
| [ADR-017](./0017-ci-cd-and-branching-strategy.md) | CI/CD & Branching | Accepted | - | Fully implemented |
|
||||
| [ADR-024](./0024-feature-flagging-strategy.md) | Feature Flags | Proposed | M | New service/library needed |
|
||||
| [ADR-037](./0037-scheduled-jobs-and-cron-pattern.md) | Scheduled Jobs | Accepted | - | Fully implemented |
|
||||
| [ADR-038](./0038-graceful-shutdown-pattern.md) | Graceful Shutdown | Accepted | - | Fully implemented |
|
||||
| ADR | Title | Status | Effort | Notes |
|
||||
| -------------------------------------------------------------- | ------------------ | -------- | ------ | -------------------------- |
|
||||
| [ADR-006](./0006-background-job-processing-and-task-queues.md) | Background Jobs | Accepted | - | Fully implemented |
|
||||
| [ADR-014](./0014-containerization-and-deployment-strategy.md) | Containerization | Partial | M | Docker done, K8s pending |
|
||||
| [ADR-017](./0017-ci-cd-and-branching-strategy.md) | CI/CD & Branching | Accepted | - | Fully implemented |
|
||||
| [ADR-024](./0024-feature-flagging-strategy.md) | Feature Flags | Proposed | M | New service/library needed |
|
||||
| [ADR-037](./0037-scheduled-jobs-and-cron-pattern.md) | Scheduled Jobs | Accepted | - | Fully implemented |
|
||||
| [ADR-038](./0038-graceful-shutdown-pattern.md) | Graceful Shutdown | Accepted | - | Fully implemented |
|
||||
| [ADR-053](./0053-worker-health-checks.md) | Worker Health | Accepted | - | Fully implemented |
|
||||
| [ADR-054](./0054-bugsink-gitea-issue-sync.md) | Bugsink-Gitea Sync | Proposed | L | Automated issue creation |
|
||||
|
||||
### Category 7: Frontend / User Interface
|
||||
|
||||
@@ -99,61 +105,78 @@ This document tracks the implementation status and estimated effort for all Arch
|
||||
| [ADR-010](./0010-testing-strategy-and-standards.md) | Testing Strategy | Accepted | - | Fully implemented |
|
||||
| [ADR-021](./0021-code-formatting-and-linting-unification.md) | Formatting & Linting | Accepted | - | Fully implemented |
|
||||
| [ADR-027](./0027-standardized-naming-convention-for-ai-and-database-types.md) | Naming Conventions | Accepted | - | Fully implemented |
|
||||
| [ADR-040](./0040-testing-economics-and-priorities.md) | Testing Economics | Accepted | - | Fully implemented |
|
||||
| [ADR-045](./0045-test-data-factories-and-fixtures.md) | Test Data Factories | Accepted | - | Fully implemented |
|
||||
| [ADR-047](./0047-project-file-and-folder-organization.md) | Project Organization | Proposed | XL | Major reorganization |
|
||||
|
||||
### Category 9: Architecture Patterns
|
||||
|
||||
| ADR | Title | Status | Effort | Notes |
|
||||
| -------------------------------------------------------- | --------------------- | -------- | ------ | ----------------- |
|
||||
| [ADR-034](./0034-repository-pattern-standards.md) | Repository Pattern | Accepted | - | Fully implemented |
|
||||
| [ADR-035](./0035-service-layer-architecture.md) | Service Layer | Accepted | - | Fully implemented |
|
||||
| [ADR-036](./0036-event-bus-and-pub-sub-pattern.md) | Event Bus | Accepted | - | Fully implemented |
|
||||
| [ADR-039](./0039-dependency-injection-pattern.md) | Dependency Injection | Accepted | - | Fully implemented |
|
||||
| [ADR-041](./0041-ai-gemini-integration-architecture.md) | AI/Gemini Integration | Accepted | - | Fully implemented |
|
||||
| [ADR-042](./0042-email-and-notification-architecture.md) | Email & Notifications | Accepted | - | Fully implemented |
|
||||
| [ADR-043](./0043-express-middleware-pipeline.md) | Middleware Pipeline | Accepted | - | Fully implemented |
|
||||
| [ADR-046](./0046-image-processing-pipeline.md) | Image Processing | Accepted | - | Fully implemented |
|
||||
| [ADR-049](./0049-gamification-and-achievement-system.md) | Gamification System | Accepted | - | Fully implemented |
|
||||
| ADR | Title | Status | Effort | Notes |
|
||||
| --------------------------------------------------------------------- | --------------------- | -------- | ------ | ------------------------- |
|
||||
| [ADR-034](./0034-repository-pattern-standards.md) | Repository Pattern | Accepted | - | Fully implemented |
|
||||
| [ADR-035](./0035-service-layer-architecture.md) | Service Layer | Accepted | - | Fully implemented |
|
||||
| [ADR-036](./0036-event-bus-and-pub-sub-pattern.md) | Event Bus | Accepted | - | Fully implemented |
|
||||
| [ADR-039](./0039-dependency-injection-pattern.md) | Dependency Injection | Accepted | - | Fully implemented |
|
||||
| [ADR-041](./0041-ai-gemini-integration-architecture.md) | AI/Gemini Integration | Accepted | - | Fully implemented |
|
||||
| [ADR-042](./0042-email-and-notification-architecture.md) | Email & Notifications | Accepted | - | Fully implemented |
|
||||
| [ADR-043](./0043-express-middleware-pipeline.md) | Middleware Pipeline | Accepted | - | Fully implemented |
|
||||
| [ADR-046](./0046-image-processing-pipeline.md) | Image Processing | Accepted | - | Fully implemented |
|
||||
| [ADR-049](./0049-gamification-and-achievement-system.md) | Gamification System | Accepted | - | Fully implemented |
|
||||
| [ADR-055](./0055-database-normalization-and-referential-integrity.md) | DB Normalization | Accepted | M | API uses IDs, not strings |
|
||||
|
||||
---
|
||||
|
||||
## Work Still To Be Completed (Priority Order)
|
||||
|
||||
These ADRs are proposed but not yet implemented, ordered by suggested implementation priority:
|
||||
These ADRs are proposed or partially implemented, ordered by suggested implementation priority:
|
||||
|
||||
| Priority | ADR | Title | Effort | Rationale |
|
||||
| -------- | ------- | --------------------------- | ------ | ------------------------------------------------- |
|
||||
| 1 | ADR-015 | APM & Error Tracking | M | Production visibility, debugging |
|
||||
| 1b | ADR-050 | PostgreSQL Fn Observability | M | Database function visibility (depends on ADR-015) |
|
||||
| 2 | ADR-024 | Feature Flags | M | Safer deployments, A/B testing |
|
||||
| 3 | ADR-023 | Schema Migrations v2 | L | Database evolution support |
|
||||
| 4 | ADR-029 | Secret Rotation | L | Security improvement |
|
||||
| 5 | ADR-008 | API Versioning | L | Future API evolution |
|
||||
| 6 | ADR-030 | Circuit Breaker | L | Resilience improvement |
|
||||
| 7 | ADR-022 | Real-time Notifications | XL | Major feature enhancement |
|
||||
| 8 | ADR-011 | Authorization & RBAC | XL | Advanced permission system |
|
||||
| 9 | ADR-025 | i18n & l10n | XL | Multi-language support |
|
||||
| 10 | ADR-031 | Data Retention & Privacy | XL | Compliance requirements |
|
||||
| Priority | ADR | Title | Status | Effort | Rationale |
|
||||
| -------- | ------- | ------------------------ | -------- | ------ | ------------------------------------ |
|
||||
| 1 | ADR-024 | Feature Flags | Proposed | M | Safer deployments, A/B testing |
|
||||
| 2 | ADR-054 | Bugsink-Gitea Sync | Proposed | L | Automated issue tracking from errors |
|
||||
| 3 | ADR-023 | Schema Migrations v2 | Proposed | L | Database evolution support |
|
||||
| 4 | ADR-029 | Secret Rotation | Proposed | L | Security improvement |
|
||||
| 5 | ADR-008 | API Versioning | Proposed | L | Future API evolution |
|
||||
| 6 | ADR-030 | Circuit Breaker | Proposed | L | Resilience improvement |
|
||||
| 7 | ADR-056 | APM (Performance) | Proposed | M | Enable when performance issues arise |
|
||||
| 8 | ADR-011 | Authorization & RBAC | Proposed | XL | Advanced permission system |
|
||||
| 9 | ADR-025 | i18n & l10n | Proposed | XL | Multi-language support |
|
||||
| 10 | ADR-031 | Data Retention & Privacy | Proposed | XL | Compliance requirements |
|
||||
|
||||
---
|
||||
|
||||
## Recent Implementation History
|
||||
|
||||
| Date | ADR | Change |
|
||||
| ---------- | ------- | ---------------------------------------------------------------------- |
|
||||
| 2026-01-11 | ADR-050 | Created - PostgreSQL function observability with fn_log() and Logstash |
|
||||
| 2026-01-11 | ADR-018 | Implemented - OpenAPI/Swagger documentation at /docs/api-docs |
|
||||
| 2026-01-11 | ADR-049 | Created - Gamification system, achievements, and testing requirements |
|
||||
| 2026-01-09 | ADR-047 | Created - Project file/folder organization with migration plan |
|
||||
| 2026-01-09 | ADR-041 | Created - AI/Gemini integration with model fallback and rate limiting |
|
||||
| 2026-01-09 | ADR-042 | Created - Email and notification architecture with BullMQ queuing |
|
||||
| 2026-01-09 | ADR-043 | Created - Express middleware pipeline ordering and patterns |
|
||||
| 2026-01-09 | ADR-044 | Created - Frontend feature-based folder organization |
|
||||
| 2026-01-09 | ADR-045 | Created - Test data factory pattern for mock generation |
|
||||
| 2026-01-09 | ADR-046 | Created - Image processing pipeline with Sharp and EXIF stripping |
|
||||
| 2026-01-09 | ADR-026 | Fully implemented - client-side structured logger |
|
||||
| 2026-01-09 | ADR-028 | Fully implemented - all routes, middleware, and tests updated |
|
||||
| Date | ADR | Change |
|
||||
| ---------- | ------- | ---------------------------------------------------------------------------- |
|
||||
| 2026-01-26 | ADR-015 | Completed - Added Sentry user context in AuthProvider, now fully implemented |
|
||||
| 2026-01-26 | ADR-056 | Created - APM split from ADR-015, status Proposed (tracesSampleRate=0) |
|
||||
| 2026-01-26 | ADR-015 | Refactored to focus on error tracking only, temporarily status Partial |
|
||||
| 2026-01-26 | ADR-048 | Verified as fully implemented - JWT + OAuth authentication complete |
|
||||
| 2026-01-26 | ADR-022 | Verified as fully implemented - WebSocket notifications complete |
|
||||
| 2026-01-26 | ADR-052 | Marked as fully implemented - createScopedLogger complete |
|
||||
| 2026-01-26 | ADR-053 | Marked as fully implemented - /health/queues endpoint complete |
|
||||
| 2026-01-26 | ADR-050 | Marked as fully implemented - PostgreSQL function observability |
|
||||
| 2026-01-26 | ADR-055 | Created (renumbered from duplicate ADR-023) - DB normalization |
|
||||
| 2026-01-26 | ADR-054 | Added to tracker - Bugsink to Gitea issue synchronization |
|
||||
| 2026-01-26 | ADR-053 | Added to tracker - Worker health checks and monitoring |
|
||||
| 2026-01-26 | ADR-052 | Added to tracker - Granular debug logging strategy |
|
||||
| 2026-01-26 | ADR-051 | Added to tracker - Asynchronous context propagation |
|
||||
| 2026-01-26 | ADR-048 | Added to tracker - Authentication strategy |
|
||||
| 2026-01-26 | ADR-040 | Added to tracker - Testing economics and priorities |
|
||||
| 2026-01-17 | ADR-054 | Created - Bugsink-Gitea sync worker proposal |
|
||||
| 2026-01-11 | ADR-050 | Created - PostgreSQL function observability with fn_log() |
|
||||
| 2026-01-11 | ADR-018 | Implemented - OpenAPI/Swagger documentation at /docs/api-docs |
|
||||
| 2026-01-11 | ADR-049 | Created - Gamification system, achievements, and testing |
|
||||
| 2026-01-09 | ADR-047 | Created - Project file/folder organization with migration plan |
|
||||
| 2026-01-09 | ADR-041 | Created - AI/Gemini integration with model fallback |
|
||||
| 2026-01-09 | ADR-042 | Created - Email and notification architecture with BullMQ |
|
||||
| 2026-01-09 | ADR-043 | Created - Express middleware pipeline ordering and patterns |
|
||||
| 2026-01-09 | ADR-044 | Created - Frontend feature-based folder organization |
|
||||
| 2026-01-09 | ADR-045 | Created - Test data factory pattern for mock generation |
|
||||
| 2026-01-09 | ADR-046 | Created - Image processing pipeline with Sharp and EXIF stripping |
|
||||
| 2026-01-09 | ADR-026 | Fully implemented - client-side structured logger |
|
||||
| 2026-01-09 | ADR-028 | Fully implemented - all routes, middleware, and tests updated |
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ This directory contains a log of the architectural decisions made for the Flyer
|
||||
|
||||
**[ADR-003](./0003-standardized-input-validation-using-middleware.md)**: Standardized Input Validation using Middleware (Accepted)
|
||||
**[ADR-008](./0008-api-versioning-strategy.md)**: API Versioning Strategy (Proposed)
|
||||
**[ADR-018](./0018-api-documentation-strategy.md)**: API Documentation Strategy (Proposed)
|
||||
**[ADR-018](./0018-api-documentation-strategy.md)**: API Documentation Strategy (Accepted)
|
||||
**[ADR-022](./0022-real-time-notification-system.md)**: Real-time Notification System (Proposed)
|
||||
**[ADR-028](./0028-api-response-standardization.md)**: API Response Standardization and Envelope Pattern (Implemented)
|
||||
|
||||
@@ -38,7 +38,11 @@ This directory contains a log of the architectural decisions made for the Flyer
|
||||
## 5. Observability & Monitoring
|
||||
|
||||
**[ADR-004](./0004-standardized-application-wide-structured-logging.md)**: Standardized Application-Wide Structured Logging (Accepted)
|
||||
**[ADR-015](./0015-application-performance-monitoring-and-error-tracking.md)**: Application Performance Monitoring (APM) and Error Tracking (Proposed)
|
||||
**[ADR-015](./0015-error-tracking-and-observability.md)**: Error Tracking and Observability (Partial)
|
||||
**[ADR-050](./0050-postgresql-function-observability.md)**: PostgreSQL Function Observability (Accepted)
|
||||
**[ADR-051](./0051-asynchronous-context-propagation.md)**: Asynchronous Context Propagation (Accepted)
|
||||
**[ADR-052](./0052-granular-debug-logging-strategy.md)**: Granular Debug Logging Strategy (Accepted)
|
||||
**[ADR-056](./0056-application-performance-monitoring.md)**: Application Performance Monitoring (Proposed)
|
||||
|
||||
## 6. Deployment & Operations
|
||||
|
||||
@@ -48,13 +52,15 @@ This directory contains a log of the architectural decisions made for the Flyer
|
||||
**[ADR-024](./0024-feature-flagging-strategy.md)**: Feature Flagging Strategy (Proposed)
|
||||
**[ADR-037](./0037-scheduled-jobs-and-cron-pattern.md)**: Scheduled Jobs and Cron Pattern (Accepted)
|
||||
**[ADR-038](./0038-graceful-shutdown-pattern.md)**: Graceful Shutdown Pattern (Accepted)
|
||||
**[ADR-053](./0053-worker-health-checks-and-monitoring.md)**: Worker Health Checks and Monitoring (Proposed)
|
||||
**[ADR-054](./0054-bugsink-gitea-issue-sync.md)**: Bugsink to Gitea Issue Synchronization (Proposed)
|
||||
|
||||
## 7. Frontend / User Interface
|
||||
|
||||
**[ADR-005](./0005-frontend-state-management-and-server-cache-strategy.md)**: Frontend State Management and Server Cache Strategy (Accepted)
|
||||
**[ADR-012](./0012-frontend-component-library-and-design-system.md)**: Frontend Component Library and Design System (Partially Implemented)
|
||||
**[ADR-025](./0025-internationalization-and-localization-strategy.md)**: Internationalization (i18n) and Localization (l10n) Strategy (Proposed)
|
||||
**[ADR-026](./0026-standardized-client-side-structured-logging.md)**: Standardized Client-Side Structured Logging (Proposed)
|
||||
**[ADR-026](./0026-standardized-client-side-structured-logging.md)**: Standardized Client-Side Structured Logging (Accepted)
|
||||
**[ADR-044](./0044-frontend-feature-organization.md)**: Frontend Feature Organization Pattern (Accepted)
|
||||
|
||||
## 8. Development Workflow & Quality
|
||||
@@ -76,3 +82,5 @@ This directory contains a log of the architectural decisions made for the Flyer
|
||||
**[ADR-042](./0042-email-and-notification-architecture.md)**: Email and Notification Architecture (Accepted)
|
||||
**[ADR-043](./0043-express-middleware-pipeline.md)**: Express Middleware Pipeline Architecture (Accepted)
|
||||
**[ADR-046](./0046-image-processing-pipeline.md)**: Image Processing Pipeline (Accepted)
|
||||
**[ADR-049](./0049-gamification-and-achievement-system.md)**: Gamification and Achievement System (Accepted)
|
||||
**[ADR-055](./0055-database-normalization-and-referential-integrity.md)**: Database Normalization and Referential Integrity (Accepted)
|
||||
|
||||
@@ -293,7 +293,23 @@ Google Gemini powers the AI extraction capabilities.
|
||||
|
||||
### Background Workers (BullMQ/PM2)
|
||||
|
||||
BullMQ workers handle asynchronous processing tasks.
|
||||
BullMQ workers handle asynchronous processing tasks. PM2 manages both the API server and worker processes in production and development environments (ADR-014).
|
||||
|
||||
**Dev Container PM2 Processes**:
|
||||
|
||||
| Process | Script | Purpose |
|
||||
| -------------------------- | ---------------------------------- | -------------------------- |
|
||||
| `flyer-crawler-api-dev` | `tsx watch server.ts` | API server with hot reload |
|
||||
| `flyer-crawler-worker-dev` | `tsx watch src/services/worker.ts` | Background job processing |
|
||||
| `flyer-crawler-vite-dev` | `vite --host` | Frontend dev server |
|
||||
|
||||
**Production PM2 Processes**:
|
||||
|
||||
| Process | Script | Purpose |
|
||||
| -------------------------------- | ------------------ | --------------------------- |
|
||||
| `flyer-crawler-api` | `tsx server.ts` | API server (cluster mode) |
|
||||
| `flyer-crawler-worker` | `tsx worker.ts` | Background job processing |
|
||||
| `flyer-crawler-analytics-worker` | `tsx analytics.ts` | Analytics report generation |
|
||||
|
||||
**Job Queues**:
|
||||
|
||||
@@ -651,9 +667,11 @@ Attempt 4: 4x delay (e.g., 20 seconds)
|
||||
| | Windows Host Machine | | Linux Dev Container | |
|
||||
| | - VS Code | | (flyer-crawler-dev) | |
|
||||
| | - Podman Desktop +---->+ - Node.js 22 | |
|
||||
| | - Git | | - PostgreSQL 16 | |
|
||||
| +-----------------------------------+ | - Redis 7 | |
|
||||
| | - Git | | - PM2 (process manager) | |
|
||||
| +-----------------------------------+ | - PostgreSQL 16 | |
|
||||
| | - Redis 7 | |
|
||||
| | - Bugsink (local) | |
|
||||
| | - Logstash (log aggregation) | |
|
||||
| +-----------------------------------+ |
|
||||
+-----------------------------------------------------------------------------------+
|
||||
|
||||
|
||||
@@ -11,6 +11,7 @@ Common debugging strategies and troubleshooting patterns for Flyer Crawler.
|
||||
- [API Errors](#api-errors)
|
||||
- [Authentication Problems](#authentication-problems)
|
||||
- [Background Job Issues](#background-job-issues)
|
||||
- [SSL Certificate Issues](#ssl-certificate-issues)
|
||||
- [Frontend Issues](#frontend-issues)
|
||||
- [Performance Problems](#performance-problems)
|
||||
- [Debugging Tools](#debugging-tools)
|
||||
@@ -43,14 +44,24 @@ When something breaks, check these first:
|
||||
4. **Are there recent errors in logs?**
|
||||
|
||||
```bash
|
||||
# Application logs
|
||||
# PM2 logs (dev container)
|
||||
podman exec -it flyer-crawler-dev pm2 logs
|
||||
|
||||
# Container logs
|
||||
podman logs -f flyer-crawler-dev
|
||||
|
||||
# PM2 logs (production)
|
||||
pm2 logs flyer-crawler-api
|
||||
```
|
||||
|
||||
5. **Is Redis accessible?**
|
||||
5. **Are PM2 processes running?**
|
||||
|
||||
```bash
|
||||
podman exec -it flyer-crawler-dev pm2 status
|
||||
```
|
||||
|
||||
6. **Is Redis accessible?**
|
||||
|
||||
```bash
|
||||
podman exec flyer-crawler-redis redis-cli ping
|
||||
```
|
||||
@@ -426,6 +437,92 @@ console.log('Expired:', decoded.exp < Date.now() / 1000);
|
||||
|
||||
---
|
||||
|
||||
## PM2 Process Issues (Dev Container)
|
||||
|
||||
### PM2 Process Not Starting
|
||||
|
||||
**Symptom**: `pm2 status` shows process as "errored" or "stopped"
|
||||
|
||||
**Debug**:
|
||||
|
||||
```bash
|
||||
# Check process status
|
||||
podman exec -it flyer-crawler-dev pm2 status
|
||||
|
||||
# View process logs
|
||||
podman exec -it flyer-crawler-dev pm2 logs flyer-crawler-api-dev --lines 50
|
||||
|
||||
# Show detailed process info
|
||||
podman exec -it flyer-crawler-dev pm2 show flyer-crawler-api-dev
|
||||
|
||||
# Check if port is in use
|
||||
podman exec -it flyer-crawler-dev netstat -tlnp | grep 3001
|
||||
```
|
||||
|
||||
**Common Causes**:
|
||||
|
||||
- Port already in use by another process
|
||||
- Missing environment variables
|
||||
- Syntax error in code
|
||||
- Database connection failure
|
||||
|
||||
**Solutions**:
|
||||
|
||||
```bash
|
||||
# Restart specific process
|
||||
podman exec -it flyer-crawler-dev pm2 restart flyer-crawler-api-dev
|
||||
|
||||
# Restart all processes
|
||||
podman exec -it flyer-crawler-dev pm2 restart all
|
||||
|
||||
# Delete and recreate processes
|
||||
podman exec -it flyer-crawler-dev pm2 delete all
|
||||
podman exec -it flyer-crawler-dev pm2 start /app/ecosystem.dev.config.cjs
|
||||
```
|
||||
|
||||
### PM2 Log Access
|
||||
|
||||
**View logs in real-time**:
|
||||
|
||||
```bash
|
||||
# All processes
|
||||
podman exec -it flyer-crawler-dev pm2 logs
|
||||
|
||||
# Specific process
|
||||
podman exec -it flyer-crawler-dev pm2 logs flyer-crawler-api-dev
|
||||
|
||||
# Last 100 lines
|
||||
podman exec -it flyer-crawler-dev pm2 logs --lines 100
|
||||
```
|
||||
|
||||
**View log files directly**:
|
||||
|
||||
```bash
|
||||
# API logs
|
||||
MSYS_NO_PATHCONV=1 podman exec flyer-crawler-dev cat /var/log/pm2/api-out.log
|
||||
MSYS_NO_PATHCONV=1 podman exec flyer-crawler-dev cat /var/log/pm2/api-error.log
|
||||
|
||||
# Worker logs
|
||||
MSYS_NO_PATHCONV=1 podman exec flyer-crawler-dev cat /var/log/pm2/worker-out.log
|
||||
|
||||
# Vite logs
|
||||
MSYS_NO_PATHCONV=1 podman exec flyer-crawler-dev cat /var/log/pm2/vite-out.log
|
||||
```
|
||||
|
||||
### Redis Log Access
|
||||
|
||||
**View Redis logs**:
|
||||
|
||||
```bash
|
||||
# View Redis log file
|
||||
MSYS_NO_PATHCONV=1 podman exec flyer-crawler-redis cat /var/log/redis/redis-server.log
|
||||
|
||||
# View from dev container (shared volume)
|
||||
MSYS_NO_PATHCONV=1 podman exec flyer-crawler-dev cat /var/log/redis/redis-server.log
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Background Job Issues
|
||||
|
||||
### Jobs Not Processing
|
||||
@@ -433,10 +530,16 @@ console.log('Expired:', decoded.exp < Date.now() / 1000);
|
||||
**Debug**:
|
||||
|
||||
```bash
|
||||
# Check if worker is running
|
||||
# Check if worker is running (dev container)
|
||||
podman exec -it flyer-crawler-dev pm2 status
|
||||
|
||||
# Check worker logs (dev container)
|
||||
podman exec -it flyer-crawler-dev pm2 logs flyer-crawler-worker-dev
|
||||
|
||||
# Check if worker is running (production)
|
||||
pm2 list
|
||||
|
||||
# Check worker logs
|
||||
# Check worker logs (production)
|
||||
pm2 logs flyer-crawler-worker
|
||||
|
||||
# Check Redis connection
|
||||
@@ -494,6 +597,77 @@ pm2 logs flyer-crawler-worker --lines 100
|
||||
|
||||
---
|
||||
|
||||
## SSL Certificate Issues
|
||||
|
||||
### Images Not Loading (ERR_CERT_AUTHORITY_INVALID)
|
||||
|
||||
**Symptom**: Flyer images fail to load with `ERR_CERT_AUTHORITY_INVALID` in browser console
|
||||
|
||||
**Cause**: Mixed hostname origins - user accesses via `localhost` but images use `127.0.0.1` (or vice versa)
|
||||
|
||||
**Debug**:
|
||||
|
||||
```bash
|
||||
# Check which hostname images are using
|
||||
podman exec flyer-crawler-dev psql -U postgres -d flyer_crawler_dev \
|
||||
-c "SELECT image_url FROM flyers LIMIT 1;"
|
||||
|
||||
# Verify certificate includes both hostnames
|
||||
podman exec flyer-crawler-dev openssl x509 -in /app/certs/localhost.crt -text -noout | grep -A1 "Subject Alternative Name"
|
||||
|
||||
# Check NGINX accepts both hostnames
|
||||
podman exec flyer-crawler-dev grep "server_name" /etc/nginx/sites-available/default
|
||||
```
|
||||
|
||||
**Solution**: The dev container is configured to handle both hostnames:
|
||||
|
||||
1. Certificate includes SANs for `localhost`, `127.0.0.1`, and `::1`
|
||||
2. NGINX `server_name` directive includes both `localhost` and `127.0.0.1`
|
||||
|
||||
If you still see errors:
|
||||
|
||||
```bash
|
||||
# Rebuild container to regenerate certificate
|
||||
podman-compose down
|
||||
podman-compose build --no-cache flyer-crawler-dev
|
||||
podman-compose up -d
|
||||
```
|
||||
|
||||
See [FLYER-URL-CONFIGURATION.md](../FLYER-URL-CONFIGURATION.md#ssl-certificate-configuration-dev-container) for full details.
|
||||
|
||||
### Self-Signed Certificate Not Trusted
|
||||
|
||||
**Symptom**: Browser shows security warning for `https://localhost`
|
||||
|
||||
**Temporary Workaround**: Accept the warning by clicking "Advanced" > "Proceed to localhost"
|
||||
|
||||
**Permanent Fix (Recommended)**: Install the mkcert CA certificate to eliminate all SSL warnings.
|
||||
|
||||
The CA certificate is located at `certs/mkcert-ca.crt` in the project root. See [`certs/README.md`](../../certs/README.md) for platform-specific installation instructions (Windows, macOS, Linux, Firefox).
|
||||
|
||||
After installation:
|
||||
|
||||
- Your browser will trust all mkcert certificates without warnings
|
||||
- Both `https://localhost/` and `https://127.0.0.1/` will work without SSL errors
|
||||
- Flyer images will load without `ERR_CERT_AUTHORITY_INVALID` errors
|
||||
|
||||
### NGINX SSL Configuration Test
|
||||
|
||||
**Debug**:
|
||||
|
||||
```bash
|
||||
# Test NGINX configuration
|
||||
podman exec flyer-crawler-dev nginx -t
|
||||
|
||||
# Check if NGINX is listening on 443
|
||||
podman exec flyer-crawler-dev netstat -tlnp | grep 443
|
||||
|
||||
# Verify certificate files exist
|
||||
podman exec flyer-crawler-dev ls -la /app/certs/
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Frontend Issues
|
||||
|
||||
### Hot Reload Not Working
|
||||
@@ -661,8 +835,10 @@ podman exec flyer-crawler-redis redis-cli info stats
|
||||
|
||||
## See Also
|
||||
|
||||
- [DEV-CONTAINER.md](DEV-CONTAINER.md) - Dev container guide (PM2, Logstash)
|
||||
- [TESTING.md](TESTING.md) - Testing strategies
|
||||
- [CODE-PATTERNS.md](CODE-PATTERNS.md) - Common patterns
|
||||
- [MONITORING.md](../operations/MONITORING.md) - Production monitoring
|
||||
- [LOGSTASH-QUICK-REF.md](../operations/LOGSTASH-QUICK-REF.md) - Log aggregation
|
||||
- [Bugsink Setup](../tools/BUGSINK-SETUP.md) - Error tracking
|
||||
- [DevOps Guide](../subagents/DEVOPS-GUIDE.md) - Container debugging
|
||||
|
||||
408
docs/development/DEV-CONTAINER.md
Normal file
408
docs/development/DEV-CONTAINER.md
Normal file
@@ -0,0 +1,408 @@
|
||||
# Dev Container Guide
|
||||
|
||||
Comprehensive documentation for the Flyer Crawler development container.
|
||||
|
||||
**Last Updated**: 2026-01-22
|
||||
|
||||
---
|
||||
|
||||
## Table of Contents
|
||||
|
||||
1. [Overview](#overview)
|
||||
2. [Architecture](#architecture)
|
||||
3. [PM2 Process Management](#pm2-process-management)
|
||||
4. [Log Aggregation](#log-aggregation)
|
||||
5. [Quick Reference](#quick-reference)
|
||||
6. [Troubleshooting](#troubleshooting)
|
||||
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
The dev container provides a production-like environment for local development. Key features:
|
||||
|
||||
- **PM2 Process Management** - Matches production architecture (ADR-014)
|
||||
- **Logstash Integration** - All logs forwarded to Bugsink (ADR-050)
|
||||
- **HTTPS by Default** - Self-signed certificates via mkcert
|
||||
- **Hot Reloading** - tsx watch mode for API and worker processes
|
||||
|
||||
### Container Services
|
||||
|
||||
| Service | Container Name | Purpose |
|
||||
| ----------- | ------------------------ | ------------------------------ |
|
||||
| Application | `flyer-crawler-dev` | Node.js app, Bugsink, Logstash |
|
||||
| PostgreSQL | `flyer-crawler-postgres` | Primary database with PostGIS |
|
||||
| Redis | `flyer-crawler-redis` | Cache and job queue backing |
|
||||
|
||||
### Access Points
|
||||
|
||||
| Service | URL | Notes |
|
||||
| ----------- | ------------------------ | ------------------------- |
|
||||
| Frontend | `https://localhost` | NGINX proxies Vite (5173) |
|
||||
| Backend API | `http://localhost:3001` | Express server |
|
||||
| Bugsink | `https://localhost:8443` | Error tracking UI |
|
||||
| PostgreSQL | `localhost:5432` | Direct database access |
|
||||
| Redis | `localhost:6379` | Direct Redis access |
|
||||
|
||||
---
|
||||
|
||||
## Architecture
|
||||
|
||||
### Production vs Development Parity
|
||||
|
||||
The dev container was updated to match production architecture (ADR-014):
|
||||
|
||||
| Component | Production | Dev Container (OLD) | Dev Container (NEW) |
|
||||
| ------------ | ---------------------- | ---------------------- | ----------------------- |
|
||||
| API Server | PM2 cluster mode | `npm run dev` (inline) | PM2 fork + tsx watch |
|
||||
| Worker | PM2 process | Inline with API | PM2 process + tsx watch |
|
||||
| Frontend | Static files via NGINX | Vite standalone | PM2 + Vite dev server |
|
||||
| Logs | PM2 logs -> Logstash | Console only | PM2 logs -> Logstash |
|
||||
| Process Mgmt | PM2 | None | PM2 |
|
||||
|
||||
### Container Startup Flow
|
||||
|
||||
When the container starts (`scripts/dev-entrypoint.sh`):
|
||||
|
||||
1. **NGINX** starts (HTTPS proxy for Vite and Bugsink)
|
||||
2. **Bugsink** starts (error tracking on port 8000)
|
||||
3. **Logstash** starts (log aggregation)
|
||||
4. **PM2** starts with `ecosystem.dev.config.cjs`:
|
||||
- `flyer-crawler-api-dev` - API server
|
||||
- `flyer-crawler-worker-dev` - Background worker
|
||||
- `flyer-crawler-vite-dev` - Vite dev server
|
||||
5. Container tails PM2 logs to stay alive
|
||||
|
||||
### Key Configuration Files
|
||||
|
||||
| File | Purpose |
|
||||
| ------------------------------ | ---------------------------------- |
|
||||
| `ecosystem.dev.config.cjs` | PM2 development configuration |
|
||||
| `ecosystem.config.cjs` | PM2 production configuration |
|
||||
| `scripts/dev-entrypoint.sh` | Container startup script |
|
||||
| `docker/logstash/bugsink.conf` | Logstash pipeline configuration |
|
||||
| `docker/nginx/dev.conf` | NGINX development configuration |
|
||||
| `compose.dev.yml` | Docker Compose service definitions |
|
||||
| `Dockerfile.dev` | Container image definition |
|
||||
|
||||
---
|
||||
|
||||
## PM2 Process Management
|
||||
|
||||
### Process Overview
|
||||
|
||||
PM2 manages three processes in the dev container:
|
||||
|
||||
```
|
||||
+--------------------+ +------------------------+ +--------------------+
|
||||
| flyer-crawler- | | flyer-crawler- | | flyer-crawler- |
|
||||
| api-dev | | worker-dev | | vite-dev |
|
||||
+--------------------+ +------------------------+ +--------------------+
|
||||
| tsx watch | | tsx watch | | vite --host |
|
||||
| server.ts | | src/services/worker.ts | | |
|
||||
| Port: 3001 | | No port | | Port: 5173 |
|
||||
+--------------------+ +------------------------+ +--------------------+
|
||||
| | |
|
||||
v v v
|
||||
+------------------------------------------------------------------------+
|
||||
| /var/log/pm2/*.log |
|
||||
| (Logstash picks up for Bugsink) |
|
||||
+------------------------------------------------------------------------+
|
||||
```
|
||||
|
||||
### PM2 Commands
|
||||
|
||||
All commands should be run inside the container:
|
||||
|
||||
```bash
|
||||
# View process status
|
||||
podman exec -it flyer-crawler-dev pm2 status
|
||||
|
||||
# View all logs (tail -f style)
|
||||
podman exec -it flyer-crawler-dev pm2 logs
|
||||
|
||||
# View specific process logs
|
||||
podman exec -it flyer-crawler-dev pm2 logs flyer-crawler-api-dev
|
||||
|
||||
# Restart all processes
|
||||
podman exec -it flyer-crawler-dev pm2 restart all
|
||||
|
||||
# Restart specific process
|
||||
podman exec -it flyer-crawler-dev pm2 restart flyer-crawler-api-dev
|
||||
|
||||
# Stop all processes
|
||||
podman exec -it flyer-crawler-dev pm2 delete all
|
||||
|
||||
# Show detailed process info
|
||||
podman exec -it flyer-crawler-dev pm2 show flyer-crawler-api-dev
|
||||
|
||||
# Monitor processes in real-time
|
||||
podman exec -it flyer-crawler-dev pm2 monit
|
||||
```
|
||||
|
||||
### PM2 Log Locations
|
||||
|
||||
| Process | stdout Log | stderr Log |
|
||||
| -------------------------- | ----------------------------- | ------------------------------- |
|
||||
| `flyer-crawler-api-dev` | `/var/log/pm2/api-out.log` | `/var/log/pm2/api-error.log` |
|
||||
| `flyer-crawler-worker-dev` | `/var/log/pm2/worker-out.log` | `/var/log/pm2/worker-error.log` |
|
||||
| `flyer-crawler-vite-dev` | `/var/log/pm2/vite-out.log` | `/var/log/pm2/vite-error.log` |
|
||||
|
||||
### NPM Scripts for PM2
|
||||
|
||||
The following npm scripts are available for PM2 management:
|
||||
|
||||
```bash
|
||||
# Start PM2 with dev config (inside container)
|
||||
npm run dev:pm2
|
||||
|
||||
# Restart all PM2 processes
|
||||
npm run dev:pm2:restart
|
||||
|
||||
# Stop all PM2 processes
|
||||
npm run dev:pm2:stop
|
||||
|
||||
# View PM2 status
|
||||
npm run dev:pm2:status
|
||||
|
||||
# View PM2 logs
|
||||
npm run dev:pm2:logs
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Log Aggregation
|
||||
|
||||
### Log Flow Architecture (ADR-050)
|
||||
|
||||
All application logs flow through Logstash to Bugsink using a 3-project architecture:
|
||||
|
||||
```text
|
||||
+------------------+ +------------------+ +------------------+
|
||||
| PM2 Logs | | PostgreSQL | | Redis/NGINX |
|
||||
| /var/log/pm2/ | | /var/log/ | | /var/log/redis/ |
|
||||
| (API + Worker) | | postgresql/ | | /var/log/nginx/ |
|
||||
+--------+---------+ +--------+---------+ +--------+---------+
|
||||
| | |
|
||||
v v v
|
||||
+------------------------------------------------------------------------+
|
||||
| LOGSTASH |
|
||||
| /etc/logstash/conf.d/bugsink.conf |
|
||||
| (Routes by log type) |
|
||||
+------------------------------------------------------------------------+
|
||||
| | |
|
||||
v v v
|
||||
+------------------+ +------------------+ +------------------+
|
||||
| Backend API | | Frontend (Dev) | | Infrastructure |
|
||||
| (Project 1) | | (Project 2) | | (Project 4) |
|
||||
| - Pino errors | | - Browser SDK | | - Redis warnings |
|
||||
| - PostgreSQL | | (not Logstash) | | - NGINX errors |
|
||||
+------------------+ +------------------+ | - Vite errors |
|
||||
+------------------+
|
||||
```
|
||||
|
||||
### Log Sources
|
||||
|
||||
| Source | Log Path | Format | Errors To Bugsink |
|
||||
| ------------ | --------------------------------- | ---------- | ----------------- |
|
||||
| API Server | `/var/log/pm2/api-*.log` | Pino JSON | Yes |
|
||||
| Worker | `/var/log/pm2/worker-*.log` | Pino JSON | Yes |
|
||||
| Vite | `/var/log/pm2/vite-*.log` | Plain text | Yes (if error) |
|
||||
| PostgreSQL | `/var/log/postgresql/*.log` | PostgreSQL | Yes (ERROR/FATAL) |
|
||||
| Redis | `/var/log/redis/redis-server.log` | Redis | Yes (warnings) |
|
||||
| NGINX Access | `/var/log/nginx/access.log` | Combined | Yes (5xx only) |
|
||||
| NGINX Error | `/var/log/nginx/error.log` | NGINX | Yes |
|
||||
|
||||
### Viewing Logs
|
||||
|
||||
```bash
|
||||
# View Logstash processed logs
|
||||
MSYS_NO_PATHCONV=1 podman exec flyer-crawler-dev cat /var/log/logstash/pm2-api-$(date +%Y-%m-%d).log
|
||||
MSYS_NO_PATHCONV=1 podman exec flyer-crawler-dev cat /var/log/logstash/redis-operational-$(date +%Y-%m-%d).log
|
||||
|
||||
# View raw Redis logs
|
||||
MSYS_NO_PATHCONV=1 podman exec flyer-crawler-redis cat /var/log/redis/redis-server.log
|
||||
|
||||
# Check Logstash status
|
||||
podman exec flyer-crawler-dev curl -s localhost:9600/_node/stats/pipelines?pretty
|
||||
```
|
||||
|
||||
### Bugsink Access
|
||||
|
||||
- **URL**: `https://localhost:8443`
|
||||
- **Login**: `admin@localhost` / `admin`
|
||||
- **Projects**:
|
||||
- Project 1: Backend API (Dev) - Pino app errors, PostgreSQL errors
|
||||
- Project 2: Frontend (Dev) - Browser errors via Sentry SDK
|
||||
- Project 4: Infrastructure (Dev) - Redis warnings, NGINX errors, Vite build errors
|
||||
|
||||
**Note**: Frontend DSN uses nginx proxy (`/bugsink-api/`) because browsers cannot reach `localhost:8000` directly. See [BUGSINK-SETUP.md](../tools/BUGSINK-SETUP.md#frontend-nginx-proxy) for details.
|
||||
|
||||
---
|
||||
|
||||
## Quick Reference
|
||||
|
||||
### Starting the Dev Container
|
||||
|
||||
```bash
|
||||
# Start all services
|
||||
podman-compose -f compose.dev.yml up -d
|
||||
|
||||
# View container logs
|
||||
podman-compose -f compose.dev.yml logs -f
|
||||
|
||||
# Stop all services
|
||||
podman-compose -f compose.dev.yml down
|
||||
```
|
||||
|
||||
### Common Tasks
|
||||
|
||||
| Task | Command |
|
||||
| -------------------- | ------------------------------------------------------------------------------ |
|
||||
| Run tests | `podman exec -it flyer-crawler-dev npm test` |
|
||||
| Run type check | `podman exec -it flyer-crawler-dev npm run type-check` |
|
||||
| View PM2 status | `podman exec -it flyer-crawler-dev pm2 status` |
|
||||
| View PM2 logs | `podman exec -it flyer-crawler-dev pm2 logs` |
|
||||
| Restart API | `podman exec -it flyer-crawler-dev pm2 restart flyer-crawler-api-dev` |
|
||||
| Access PostgreSQL | `podman exec -it flyer-crawler-postgres psql -U postgres -d flyer_crawler_dev` |
|
||||
| Access Redis CLI | `podman exec -it flyer-crawler-redis redis-cli` |
|
||||
| Shell into container | `podman exec -it flyer-crawler-dev bash` |
|
||||
|
||||
### Environment Variables
|
||||
|
||||
Key environment variables are set in `compose.dev.yml`:
|
||||
|
||||
| Variable | Value | Purpose |
|
||||
| ----------------- | ----------------------------- | --------------------------- |
|
||||
| `TZ` | `America/Los_Angeles` | Timezone (PST) for all logs |
|
||||
| `NODE_ENV` | `development` | Environment mode |
|
||||
| `DB_HOST` | `postgres` | PostgreSQL hostname |
|
||||
| `REDIS_URL` | `redis://redis:6379` | Redis connection URL |
|
||||
| `FRONTEND_URL` | `https://localhost` | CORS origin |
|
||||
| `SENTRY_DSN` | `http://...@127.0.0.1:8000/1` | Backend Bugsink DSN |
|
||||
| `VITE_SENTRY_DSN` | `http://...@127.0.0.1:8000/2` | Frontend Bugsink DSN |
|
||||
|
||||
### Timezone Configuration
|
||||
|
||||
All dev container services are configured to use PST (America/Los_Angeles) timezone for consistent log timestamps:
|
||||
|
||||
| Service | Configuration | Notes |
|
||||
| ---------- | ------------------------------------------------ | ------------------------------ |
|
||||
| App | `TZ=America/Los_Angeles` in compose.dev.yml | Also set via dev-entrypoint.sh |
|
||||
| PostgreSQL | `timezone` and `log_timezone` in postgres config | Logs timestamps in PST |
|
||||
| Redis | `TZ=America/Los_Angeles` in compose.dev.yml | Alpine uses TZ env var |
|
||||
| PM2 | `TZ` in ecosystem.dev.config.cjs | Pino timestamps use local time |
|
||||
|
||||
**Verifying Timezone**:
|
||||
|
||||
```bash
|
||||
# Check container timezone
|
||||
podman exec flyer-crawler-dev date
|
||||
|
||||
# Check PostgreSQL timezone
|
||||
podman exec flyer-crawler-postgres psql -U postgres -c "SHOW timezone;"
|
||||
|
||||
# Check Redis log timestamps
|
||||
MSYS_NO_PATHCONV=1 podman exec flyer-crawler-redis cat /var/log/redis/redis-server.log | head -5
|
||||
```
|
||||
|
||||
**Note**: If you need UTC timestamps for production compatibility, change `TZ=UTC` in compose.dev.yml and restart containers.
|
||||
|
||||
---
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### PM2 Process Not Starting
|
||||
|
||||
**Symptom**: `pm2 status` shows process as "errored" or "stopped"
|
||||
|
||||
**Debug**:
|
||||
|
||||
```bash
|
||||
# Check process logs
|
||||
podman exec -it flyer-crawler-dev pm2 logs flyer-crawler-api-dev --lines 50
|
||||
|
||||
# Check if port is in use
|
||||
podman exec -it flyer-crawler-dev netstat -tlnp | grep 3001
|
||||
|
||||
# Try manual start to see errors
|
||||
podman exec -it flyer-crawler-dev tsx server.ts
|
||||
```
|
||||
|
||||
**Common Causes**:
|
||||
|
||||
- Port already in use
|
||||
- Missing environment variables
|
||||
- Syntax error in code
|
||||
|
||||
### Logs Not Appearing in Bugsink
|
||||
|
||||
**Symptom**: Errors in application but nothing in Bugsink UI
|
||||
|
||||
**Debug**:
|
||||
|
||||
```bash
|
||||
# Check Logstash is running
|
||||
podman exec flyer-crawler-dev curl -s localhost:9600/_node/stats/pipelines?pretty
|
||||
|
||||
# Check Logstash logs for errors
|
||||
MSYS_NO_PATHCONV=1 podman exec flyer-crawler-dev cat /var/log/logstash/logstash.log
|
||||
|
||||
# Verify PM2 logs exist
|
||||
podman exec flyer-crawler-dev ls -la /var/log/pm2/
|
||||
```
|
||||
|
||||
**Common Causes**:
|
||||
|
||||
- Logstash not started
|
||||
- Log file permissions
|
||||
- Bugsink not running
|
||||
|
||||
### Redis Logs Not Captured
|
||||
|
||||
**Symptom**: Redis warnings not appearing in Bugsink
|
||||
|
||||
**Debug**:
|
||||
|
||||
```bash
|
||||
# Verify Redis logs exist
|
||||
MSYS_NO_PATHCONV=1 podman exec flyer-crawler-redis cat /var/log/redis/redis-server.log
|
||||
|
||||
# Verify shared volume is mounted
|
||||
podman exec flyer-crawler-dev ls -la /var/log/redis/
|
||||
```
|
||||
|
||||
**Common Causes**:
|
||||
|
||||
- `redis_logs` volume not mounted
|
||||
- Redis not configured to write to file
|
||||
|
||||
### Hot Reload Not Working
|
||||
|
||||
**Symptom**: Code changes not reflected in running application
|
||||
|
||||
**Debug**:
|
||||
|
||||
```bash
|
||||
# Check tsx watch is running
|
||||
podman exec -it flyer-crawler-dev pm2 logs flyer-crawler-api-dev
|
||||
|
||||
# Manually restart process
|
||||
podman exec -it flyer-crawler-dev pm2 restart flyer-crawler-api-dev
|
||||
```
|
||||
|
||||
**Common Causes**:
|
||||
|
||||
- File watcher limit reached
|
||||
- Volume mount issues on Windows
|
||||
|
||||
---
|
||||
|
||||
## Related Documentation
|
||||
|
||||
- [QUICKSTART.md](../getting-started/QUICKSTART.md) - Getting started guide
|
||||
- [DEBUGGING.md](DEBUGGING.md) - Debugging strategies
|
||||
- [LOGSTASH-QUICK-REF.md](../operations/LOGSTASH-QUICK-REF.md) - Logstash quick reference
|
||||
- [DEV-CONTAINER-BUGSINK.md](../DEV-CONTAINER-BUGSINK.md) - Bugsink setup in dev container
|
||||
- [ADR-014](../adr/0014-linux-only-platform.md) - Linux-only platform decision
|
||||
- [ADR-050](../adr/0050-postgresql-function-observability.md) - PostgreSQL function observability
|
||||
@@ -119,6 +119,31 @@ npm run dev
|
||||
- **Frontend**: http://localhost:5173
|
||||
- **Backend API**: http://localhost:3001
|
||||
|
||||
### Dev Container with HTTPS (Full Stack)
|
||||
|
||||
When using the full dev container stack with NGINX (via `compose.dev.yml`), access the application over HTTPS:
|
||||
|
||||
- **Frontend**: https://localhost or https://127.0.0.1
|
||||
- **Backend API**: http://localhost:3001
|
||||
|
||||
**SSL Certificate Notes:**
|
||||
|
||||
- The dev container uses self-signed certificates generated by mkcert
|
||||
- Both `localhost` and `127.0.0.1` are valid hostnames (certificate includes both as SANs)
|
||||
- If images fail to load with SSL errors, see [FLYER-URL-CONFIGURATION.md](../FLYER-URL-CONFIGURATION.md#ssl-certificate-configuration-dev-container)
|
||||
|
||||
**Eliminate SSL Warnings (Recommended):**
|
||||
|
||||
To avoid browser security warnings for self-signed certificates, install the mkcert CA certificate on your system. The CA certificate is located at `certs/mkcert-ca.crt` in the project root.
|
||||
|
||||
See [`certs/README.md`](../../certs/README.md) for platform-specific installation instructions (Windows, macOS, Linux, Firefox).
|
||||
|
||||
After installation:
|
||||
|
||||
- Your browser will trust all mkcert certificates without warnings
|
||||
- Both `https://localhost/` and `https://127.0.0.1/` will work without SSL errors
|
||||
- Flyer images will load without `ERR_CERT_AUTHORITY_INVALID` errors
|
||||
|
||||
### Managing the Container
|
||||
|
||||
| Action | Command |
|
||||
|
||||
@@ -86,6 +86,44 @@ npm run dev
|
||||
- **Backend API**: http://localhost:3001
|
||||
- **Health Check**: http://localhost:3001/health
|
||||
|
||||
### Dev Container (HTTPS)
|
||||
|
||||
When using the full dev container with NGINX, access via HTTPS:
|
||||
|
||||
- **Frontend**: https://localhost or https://127.0.0.1
|
||||
- **Backend API**: http://localhost:3001
|
||||
- **Bugsink**: `https://localhost:8443` (error tracking)
|
||||
|
||||
**Note:** The dev container accepts both `localhost` and `127.0.0.1` for HTTPS connections. The self-signed certificate is valid for both hostnames.
|
||||
|
||||
**SSL Certificate Warnings:** To eliminate browser security warnings for self-signed certificates, install the mkcert CA certificate. See [`certs/README.md`](../../certs/README.md) for platform-specific installation instructions. This is optional but recommended for a better development experience.
|
||||
|
||||
### Dev Container Architecture
|
||||
|
||||
The dev container uses PM2 for process management, matching production (ADR-014):
|
||||
|
||||
| Process | Description | Port |
|
||||
| -------------------------- | ------------------------ | ---- |
|
||||
| `flyer-crawler-api-dev` | API server (tsx watch) | 3001 |
|
||||
| `flyer-crawler-worker-dev` | Background job worker | - |
|
||||
| `flyer-crawler-vite-dev` | Vite frontend dev server | 5173 |
|
||||
|
||||
**PM2 Commands** (run inside container):
|
||||
|
||||
```bash
|
||||
# View process status
|
||||
podman exec -it flyer-crawler-dev pm2 status
|
||||
|
||||
# View logs (all processes)
|
||||
podman exec -it flyer-crawler-dev pm2 logs
|
||||
|
||||
# Restart all processes
|
||||
podman exec -it flyer-crawler-dev pm2 restart all
|
||||
|
||||
# Restart specific process
|
||||
podman exec -it flyer-crawler-dev pm2 restart flyer-crawler-api-dev
|
||||
```
|
||||
|
||||
## Verify Installation
|
||||
|
||||
```bash
|
||||
|
||||
@@ -11,6 +11,22 @@ This guide covers deploying Flyer Crawler to a production server.
|
||||
- NGINX (reverse proxy)
|
||||
- PM2 (process manager)
|
||||
|
||||
## Dev Container Parity (ADR-014)
|
||||
|
||||
The development container now matches production architecture:
|
||||
|
||||
| Component | Production | Dev Container |
|
||||
| ------------ | ---------------- | -------------------------- |
|
||||
| Process Mgmt | PM2 | PM2 |
|
||||
| API Server | PM2 cluster mode | PM2 fork + tsx watch |
|
||||
| Worker | PM2 process | PM2 process + tsx watch |
|
||||
| Logs | PM2 -> Logstash | PM2 -> Logstash -> Bugsink |
|
||||
| HTTPS | Let's Encrypt | Self-signed (mkcert) |
|
||||
|
||||
This ensures issues caught in dev will behave the same in production.
|
||||
|
||||
**Dev Container Guide**: See [docs/development/DEV-CONTAINER.md](../development/DEV-CONTAINER.md)
|
||||
|
||||
---
|
||||
|
||||
## Server Setup
|
||||
|
||||
@@ -31,6 +31,8 @@ Aggregates logs from PostgreSQL, PM2, Redis, NGINX; forwards errors to Bugsink.
|
||||
|
||||
## Commands
|
||||
|
||||
### Production (Bare Metal)
|
||||
|
||||
```bash
|
||||
# Status and control
|
||||
systemctl status logstash
|
||||
@@ -55,6 +57,36 @@ tail -f /var/log/logstash/nginx-access-$(date +%Y-%m-%d).log
|
||||
du -sh /var/log/logstash/
|
||||
```
|
||||
|
||||
### Dev Container
|
||||
|
||||
```bash
|
||||
# View Logstash logs (inside container)
|
||||
MSYS_NO_PATHCONV=1 podman exec flyer-crawler-dev cat /var/log/logstash/logstash.log
|
||||
|
||||
# Check PM2 API logs are being processed (ADR-014)
|
||||
MSYS_NO_PATHCONV=1 podman exec flyer-crawler-dev cat /var/log/logstash/pm2-api-$(date +%Y-%m-%d).log
|
||||
|
||||
# Check PM2 Worker logs are being processed
|
||||
MSYS_NO_PATHCONV=1 podman exec flyer-crawler-dev cat /var/log/logstash/pm2-worker-$(date +%Y-%m-%d).log
|
||||
|
||||
# Check Redis logs are being processed
|
||||
MSYS_NO_PATHCONV=1 podman exec flyer-crawler-dev cat /var/log/logstash/redis-operational-$(date +%Y-%m-%d).log
|
||||
|
||||
# View raw PM2 logs
|
||||
MSYS_NO_PATHCONV=1 podman exec flyer-crawler-dev cat /var/log/pm2/api-out.log
|
||||
MSYS_NO_PATHCONV=1 podman exec flyer-crawler-dev cat /var/log/pm2/worker-out.log
|
||||
|
||||
# View raw Redis logs
|
||||
MSYS_NO_PATHCONV=1 podman exec flyer-crawler-redis cat /var/log/redis/redis-server.log
|
||||
|
||||
# Check Logstash stats
|
||||
podman exec flyer-crawler-dev curl -s localhost:9600/_node/stats/pipelines?pretty
|
||||
|
||||
# Verify shared volume mounts
|
||||
MSYS_NO_PATHCONV=1 podman exec flyer-crawler-dev ls -la /var/log/pm2/
|
||||
MSYS_NO_PATHCONV=1 podman exec flyer-crawler-dev ls -la /var/log/redis/
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
| Issue | Check | Solution |
|
||||
@@ -64,12 +96,16 @@ du -sh /var/log/logstash/
|
||||
| Grok pattern failures | Stats endpoint | `curl localhost:9600/_node/stats/pipelines?pretty \| jq '.pipelines.main.plugins.filters'` |
|
||||
| Wrong Bugsink project | Env detection | Check tags in logs match expected environment |
|
||||
| Permission denied | Logstash groups | `groups logstash` should include `postgres`, `adm` |
|
||||
| PM2 not captured | File paths | `ls /home/gitea-runner/.pm2/logs/flyer-crawler-worker-*.log` |
|
||||
| PM2 not captured | File paths | Dev: `ls /var/log/pm2/`; Prod: `ls /home/gitea-runner/.pm2/logs/` |
|
||||
| PM2 logs empty | PM2 running | Dev: `podman exec flyer-crawler-dev pm2 status`; Prod: `pm2 status` |
|
||||
| NGINX logs missing | Output directory | `ls -lh /var/log/logstash/nginx-access-*.log` |
|
||||
| Redis logs missing | Shared volume | Dev: Check `redis_logs` volume mounted; Prod: Check `/var/log/redis/redis-server.log` exists |
|
||||
| High disk usage | Log rotation | Verify `/etc/logrotate.d/logstash` configured |
|
||||
| varchar(7) error | Level validation | Add Ruby filter to validate/normalize `sentry_level` before output |
|
||||
|
||||
## Related Documentation
|
||||
|
||||
- **Dev Container Guide**: [DEV-CONTAINER.md](../development/DEV-CONTAINER.md) - PM2 and log aggregation in dev
|
||||
- **Full setup**: [BARE-METAL-SETUP.md](BARE-METAL-SETUP.md) - PostgreSQL Function Observability section
|
||||
- **Architecture**: [adr/0050-postgresql-function-observability.md](adr/0050-postgresql-function-observability.md)
|
||||
- **Architecture**: [adr/0050-postgresql-function-observability.md](../adr/0050-postgresql-function-observability.md)
|
||||
- **Troubleshooting details**: [LOGSTASH-TROUBLESHOOTING.md](LOGSTASH-TROUBLESHOOTING.md)
|
||||
|
||||
@@ -11,6 +11,7 @@ This runbook provides step-by-step diagnostics and solutions for common Logstash
|
||||
| Wrong Bugsink project | Environment detection failed | Verify `pg_database` field extraction |
|
||||
| 403 authentication error | Missing/wrong DSN key | Check `X-Sentry-Auth` header |
|
||||
| 500 error from Bugsink | Invalid event format | Verify `event_id` and required fields |
|
||||
| varchar(7) constraint | Unresolved `%{sentry_level}` | Add Ruby filter for level validation |
|
||||
|
||||
---
|
||||
|
||||
@@ -385,7 +386,88 @@ systemctl status logstash
|
||||
|
||||
---
|
||||
|
||||
### Issue 7: Log File Rotation Issues
|
||||
### Issue 7: Level Field Constraint Violation (varchar(7))
|
||||
|
||||
**Symptoms:**
|
||||
|
||||
- Bugsink returns HTTP 500 errors
|
||||
- PostgreSQL errors: `value too long for type character varying(7)`
|
||||
- Events fail to insert with literal `%{sentry_level}` string (16 characters)
|
||||
|
||||
**Root Cause:**
|
||||
|
||||
When Logstash cannot determine the log level (no error patterns matched), the `sentry_level` field remains as the unresolved placeholder `%{sentry_level}`. Bugsink's PostgreSQL schema has a `varchar(7)` constraint on the level field.
|
||||
|
||||
Valid Sentry levels (all <= 7 characters): `fatal`, `error`, `warning`, `info`, `debug`
|
||||
|
||||
**Diagnosis:**
|
||||
|
||||
```bash
|
||||
# Check for HTTP 500 responses in Logstash logs
|
||||
podman exec flyer-crawler-dev cat /var/log/logstash/logstash.log | grep "500"
|
||||
|
||||
# Check Bugsink for constraint violation errors
|
||||
# Via MCP:
|
||||
mcp__localerrors__list_issues({ project_id: 1, status: 'unresolved' })
|
||||
```
|
||||
|
||||
**Solution:**
|
||||
|
||||
Add a Ruby filter block in `docker/logstash/bugsink.conf` to validate and normalize the `sentry_level` field before sending to Bugsink:
|
||||
|
||||
```ruby
|
||||
# Add this AFTER all mutate filters that set sentry_level
|
||||
# and BEFORE the output section
|
||||
|
||||
ruby {
|
||||
code => '
|
||||
level = event.get("sentry_level")
|
||||
# Check if level is invalid (nil, empty, contains placeholder, or too long)
|
||||
if level.nil? || level.to_s.empty? || level.to_s.include?("%{") || level.to_s.length > 7
|
||||
# Default to "error" for error-tagged events, "info" otherwise
|
||||
if event.get("tags")&.include?("error")
|
||||
event.set("sentry_level", "error")
|
||||
else
|
||||
event.set("sentry_level", "info")
|
||||
end
|
||||
else
|
||||
# Normalize to lowercase and validate
|
||||
normalized = level.to_s.downcase
|
||||
valid_levels = ["fatal", "error", "warning", "info", "debug"]
|
||||
unless valid_levels.include?(normalized)
|
||||
normalized = "error"
|
||||
end
|
||||
event.set("sentry_level", normalized)
|
||||
end
|
||||
'
|
||||
}
|
||||
```
|
||||
|
||||
**Key validations performed:**
|
||||
|
||||
1. Checks for nil or empty values
|
||||
2. Detects unresolved placeholders (`%{...}`)
|
||||
3. Enforces 7-character maximum length
|
||||
4. Normalizes to lowercase
|
||||
5. Validates against allowed Sentry levels
|
||||
6. Defaults to "error" for error-tagged events, "info" otherwise
|
||||
|
||||
**Verification:**
|
||||
|
||||
```bash
|
||||
# Restart Logstash
|
||||
podman exec flyer-crawler-dev systemctl restart logstash
|
||||
|
||||
# Generate a test log that triggers the filter
|
||||
podman exec flyer-crawler-dev pm2 restart flyer-crawler-api-dev
|
||||
|
||||
# Check no new HTTP 500 errors
|
||||
podman exec flyer-crawler-dev cat /var/log/logstash/logstash.log | tail -50 | grep -E "(500|error)"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Issue 8: Log File Rotation Issues
|
||||
|
||||
**Symptoms:**
|
||||
|
||||
|
||||
@@ -49,18 +49,24 @@ Bugsink is a lightweight, self-hosted error tracking platform that is fully comp
|
||||
| Web UI | `https://localhost:8443` (nginx proxy) |
|
||||
| Internal URL | `http://localhost:8000` (direct) |
|
||||
| Credentials | `admin@localhost` / `admin` |
|
||||
| Backend Project | Project ID 1 - `flyer-crawler-dev-backend` |
|
||||
| Frontend Project | Project ID 2 - `flyer-crawler-dev-frontend` |
|
||||
| Backend Project | Project ID 1 - `Backend API (Dev)` |
|
||||
| Frontend Project | Project ID 2 - `Frontend (Dev)` |
|
||||
| Infra Project | Project ID 4 - `Infrastructure (Dev)` |
|
||||
| Backend DSN | `http://<key>@localhost:8000/1` |
|
||||
| Frontend DSN | `http://<key>@localhost:8000/2` |
|
||||
| Frontend DSN | `https://<key>@localhost/bugsink-api/2` (via nginx proxy) |
|
||||
| Infra DSN | `http://<key>@localhost:8000/4` (Logstash only) |
|
||||
| Database | `postgresql://bugsink:bugsink_dev_password@postgres:5432/bugsink` |
|
||||
|
||||
**Important:** The Frontend DSN uses an nginx proxy (`/bugsink-api/`) because the browser cannot reach `localhost:8000` directly (container-internal port). See [Frontend Nginx Proxy](#frontend-nginx-proxy) for details.
|
||||
|
||||
**Configuration Files:**
|
||||
|
||||
| File | Purpose |
|
||||
| ----------------- | ----------------------------------------------------------------- |
|
||||
| `compose.dev.yml` | Initial DSNs using `127.0.0.1:8000` (container startup) |
|
||||
| `.env.local` | **OVERRIDES** compose.dev.yml with `localhost:8000` (app runtime) |
|
||||
| File | Purpose |
|
||||
| ------------------------------ | ------------------------------------------------------- |
|
||||
| `compose.dev.yml` | Initial DSNs using `127.0.0.1:8000` (container startup) |
|
||||
| `.env.local` | **OVERRIDES** compose.dev.yml (app runtime) |
|
||||
| `docker/nginx/dev.conf` | Nginx proxy for Bugsink API (frontend error reporting) |
|
||||
| `docker/logstash/bugsink.conf` | Log routing to Backend/Infrastructure projects |
|
||||
|
||||
**Note:** `.env.local` takes precedence over `compose.dev.yml` environment variables.
|
||||
|
||||
@@ -360,75 +366,127 @@ const config = {
|
||||
|
||||
---
|
||||
|
||||
## Frontend Nginx Proxy
|
||||
|
||||
The frontend Sentry SDK runs in the browser, which cannot directly reach `localhost:8000` (the Bugsink container-internal port). To solve this, we use an nginx proxy.
|
||||
|
||||
### How It Works
|
||||
|
||||
```text
|
||||
Browser --HTTPS--> https://localhost/bugsink-api/2/store/
|
||||
|
|
||||
v (nginx proxy)
|
||||
http://localhost:8000/api/2/store/
|
||||
|
|
||||
v
|
||||
Bugsink (internal)
|
||||
```
|
||||
|
||||
### Nginx Configuration
|
||||
|
||||
Location: `docker/nginx/dev.conf`
|
||||
|
||||
```nginx
|
||||
# Proxy Bugsink Sentry API for frontend error reporting
|
||||
location /bugsink-api/ {
|
||||
proxy_pass http://localhost:8000/api/;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
|
||||
# Allow large error payloads with stack traces
|
||||
client_max_body_size 10M;
|
||||
|
||||
# Timeouts for error reporting
|
||||
proxy_connect_timeout 10s;
|
||||
proxy_send_timeout 30s;
|
||||
proxy_read_timeout 30s;
|
||||
}
|
||||
```
|
||||
|
||||
### Frontend DSN Format
|
||||
|
||||
```bash
|
||||
# .env.local
|
||||
# Uses nginx proxy path instead of direct port
|
||||
VITE_SENTRY_DSN=https://<key>@localhost/bugsink-api/2
|
||||
```
|
||||
|
||||
### Testing Frontend Error Reporting
|
||||
|
||||
1. Open browser console at `https://localhost`
|
||||
|
||||
2. Trigger a test error:
|
||||
|
||||
```javascript
|
||||
throw new Error('Test frontend error from browser');
|
||||
```
|
||||
|
||||
3. Check Bugsink Frontend (Dev) project for the error
|
||||
|
||||
4. Verify browser console shows Sentry SDK activity (if VITE_SENTRY_DEBUG=true)
|
||||
|
||||
---
|
||||
|
||||
## Logstash Integration
|
||||
|
||||
Logstash aggregates logs from multiple sources and forwards error patterns to Bugsink.
|
||||
|
||||
**Note:** See [ADR-015](../adr/0015-application-performance-monitoring-and-error-tracking.md) for the full architecture.
|
||||
|
||||
### 3-Project Architecture
|
||||
|
||||
Logstash routes errors to different Bugsink projects based on log source:
|
||||
|
||||
| Project | ID | Receives |
|
||||
| -------------------- | --- | --------------------------------------------- |
|
||||
| Backend API (Dev) | 1 | Pino app errors, PostgreSQL errors |
|
||||
| Frontend (Dev) | 2 | Browser errors (via Sentry SDK, not Logstash) |
|
||||
| Infrastructure (Dev) | 4 | Redis warnings, NGINX errors, Vite errors |
|
||||
|
||||
### Log Sources
|
||||
|
||||
| Source | Log Path | Error Detection |
|
||||
| ---------- | ---------------------- | ------------------------- |
|
||||
| Pino (app) | `/app/logs/*.log` | level >= 50 (error/fatal) |
|
||||
| Redis | `/var/log/redis/*.log` | WARNING/ERROR log levels |
|
||||
| PostgreSQL | (future) | ERROR/FATAL log levels |
|
||||
| Source | Log Path | Project Destination | Error Detection |
|
||||
| ---------- | --------------------------- | ------------------- | ------------------------- |
|
||||
| PM2 API | `/var/log/pm2/api-*.log` | Backend (1) | level >= 50 (error/fatal) |
|
||||
| PM2 Worker | `/var/log/pm2/worker-*.log` | Backend (1) | level >= 50 (error/fatal) |
|
||||
| PM2 Vite | `/var/log/pm2/vite-*.log` | Infrastructure (4) | error keyword patterns |
|
||||
| PostgreSQL | `/var/log/postgresql/*.log` | Backend (1) | ERROR/FATAL log levels |
|
||||
| Redis | `/var/log/redis/*.log` | Infrastructure (4) | WARNING level (`#`) |
|
||||
| NGINX | `/var/log/nginx/error.log` | Infrastructure (4) | error/crit/alert/emerg |
|
||||
|
||||
### Pipeline Configuration
|
||||
|
||||
**Location:** `/etc/logstash/conf.d/bugsink.conf`
|
||||
**Location:** `/etc/logstash/conf.d/bugsink.conf` (or `docker/logstash/bugsink.conf` in project)
|
||||
|
||||
```conf
|
||||
# === INPUTS ===
|
||||
input {
|
||||
file {
|
||||
path => "/app/logs/*.log"
|
||||
codec => json
|
||||
type => "pino"
|
||||
tags => ["app"]
|
||||
}
|
||||
The configuration:
|
||||
|
||||
file {
|
||||
path => "/var/log/redis/*.log"
|
||||
type => "redis"
|
||||
tags => ["redis"]
|
||||
}
|
||||
1. **Inputs**: Reads from PM2 logs, PostgreSQL logs, Redis logs, NGINX logs
|
||||
2. **Filters**: Detects errors and assigns tags based on log type
|
||||
3. **Outputs**: Routes to appropriate Bugsink project based on log source
|
||||
|
||||
**Key Routing Logic:**
|
||||
|
||||
```ruby
|
||||
# Infrastructure logs -> Project 4
|
||||
if "error" in [tags] and ([type] == "redis" or [type] == "nginx_error" or [type] == "pm2_vite") {
|
||||
http { url => "http://localhost:8000/api/4/store/" ... }
|
||||
}
|
||||
|
||||
# === FILTERS ===
|
||||
filter {
|
||||
if [type] == "pino" and [level] >= 50 {
|
||||
mutate { add_tag => ["error"] }
|
||||
}
|
||||
|
||||
if [type] == "redis" {
|
||||
grok {
|
||||
match => { "message" => "%{POSINT:pid}:%{WORD:role} %{MONTHDAY} %{MONTH} %{TIME} %{WORD:loglevel} %{GREEDYDATA:redis_message}" }
|
||||
}
|
||||
if [loglevel] in ["WARNING", "ERROR"] {
|
||||
mutate { add_tag => ["error"] }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# === OUTPUT ===
|
||||
output {
|
||||
if "error" in [tags] {
|
||||
http {
|
||||
url => "http://localhost:8000/api/store/"
|
||||
http_method => "post"
|
||||
format => "json"
|
||||
}
|
||||
}
|
||||
# Backend logs -> Project 1
|
||||
else if "error" in [tags] and ([type] in ["pm2_api", "pm2_worker", "pino", "postgres"]) {
|
||||
http { url => "http://localhost:8000/api/1/store/" ... }
|
||||
}
|
||||
```
|
||||
|
||||
### Benefits
|
||||
|
||||
1. **Secondary Capture Path**: Catches errors before SDK initialization
|
||||
2. **Log-Based Errors**: Captures errors that don't throw exceptions
|
||||
3. **Infrastructure Monitoring**: Redis connection issues, slow commands
|
||||
4. **Historical Analysis**: Process existing log files
|
||||
1. **Separation of Concerns**: Application errors separate from infrastructure issues
|
||||
2. **Secondary Capture Path**: Catches errors before SDK initialization
|
||||
3. **Log-Based Errors**: Captures errors that don't throw exceptions
|
||||
4. **Infrastructure Monitoring**: Redis, NGINX, build tooling issues
|
||||
5. **Historical Analysis**: Process existing log files
|
||||
|
||||
---
|
||||
|
||||
@@ -743,6 +801,228 @@ podman exec flyer-crawler-dev psql -U postgres -h postgres -c "\l" | grep bugsin
|
||||
ssh root@projectium.com "cd /opt/bugsink && bugsink-manage check"
|
||||
```
|
||||
|
||||
### PostgreSQL Sequence Out of Sync (Duplicate Key Errors)
|
||||
|
||||
**Symptoms:**
|
||||
|
||||
- Bugsink throws `duplicate key value violates unique constraint "projects_project_pkey"`
|
||||
- Error detail shows: `Key (id)=(1) already exists`
|
||||
- New projects or other entities fail to create
|
||||
|
||||
**Root Cause:**
|
||||
|
||||
PostgreSQL sequences can become out of sync with actual data after:
|
||||
|
||||
- Manual data insertion or database seeding
|
||||
- Restoring from backup
|
||||
- Copying data between environments
|
||||
|
||||
The sequence generates IDs that already exist in the table.
|
||||
|
||||
**Diagnosis:**
|
||||
|
||||
```bash
|
||||
# Dev Container - Check sequence vs max ID
|
||||
podman exec flyer-crawler-dev psql -U bugsink -h postgres -d bugsink -c "
|
||||
SELECT
|
||||
(SELECT MAX(id) FROM projects_project) as max_id,
|
||||
(SELECT last_value FROM projects_project_id_seq) as seq_last_value,
|
||||
CASE
|
||||
WHEN (SELECT MAX(id) FROM projects_project) <= (SELECT last_value FROM projects_project_id_seq)
|
||||
THEN 'OK'
|
||||
ELSE 'OUT OF SYNC - Needs reset'
|
||||
END as status;
|
||||
"
|
||||
|
||||
# Production
|
||||
ssh root@projectium.com "cd /opt/bugsink && bugsink-manage dbshell" <<< "
|
||||
SELECT MAX(id) as max_id, (SELECT last_value FROM projects_project_id_seq) as seq_value FROM projects_project;
|
||||
"
|
||||
```
|
||||
|
||||
**Solution:**
|
||||
|
||||
Reset the sequence to the maximum existing ID:
|
||||
|
||||
```bash
|
||||
# Dev Container
|
||||
podman exec flyer-crawler-dev psql -U bugsink -h postgres -d bugsink -c "
|
||||
SELECT setval('projects_project_id_seq', COALESCE((SELECT MAX(id) FROM projects_project), 1), true);
|
||||
"
|
||||
|
||||
# Production
|
||||
ssh root@projectium.com "cd /opt/bugsink && bugsink-manage dbshell" <<< "
|
||||
SELECT setval('projects_project_id_seq', COALESCE((SELECT MAX(id) FROM projects_project), 1), true);
|
||||
"
|
||||
```
|
||||
|
||||
**Verification:**
|
||||
|
||||
After running the fix, verify:
|
||||
|
||||
```bash
|
||||
# Next ID should be max_id + 1
|
||||
podman exec flyer-crawler-dev psql -U bugsink -h postgres -d bugsink -c "
|
||||
SELECT nextval('projects_project_id_seq') - 1 as current_seq_value;
|
||||
"
|
||||
```
|
||||
|
||||
**Prevention:**
|
||||
|
||||
When manually inserting data or restoring backups, always reset sequences:
|
||||
|
||||
```sql
|
||||
-- Generic pattern for any table/sequence
|
||||
SELECT setval('SEQUENCE_NAME', COALESCE((SELECT MAX(id) FROM TABLE_NAME), 1), true);
|
||||
|
||||
-- Common Bugsink sequences that may need reset:
|
||||
SELECT setval('projects_project_id_seq', COALESCE((SELECT MAX(id) FROM projects_project), 1), true);
|
||||
SELECT setval('teams_team_id_seq', COALESCE((SELECT MAX(id) FROM teams_team), 1), true);
|
||||
SELECT setval('releases_release_id_seq', COALESCE((SELECT MAX(id) FROM releases_release), 1), true);
|
||||
```
|
||||
|
||||
### Logstash Level Field Constraint Violation
|
||||
|
||||
**Symptoms:**
|
||||
|
||||
- Bugsink errors: `value too long for type character varying(7)`
|
||||
- Errors in Backend API project from Logstash
|
||||
- Log shows `%{sentry_level}` literal string being sent
|
||||
|
||||
**Root Cause:**
|
||||
|
||||
Logstash sends the literal placeholder `%{sentry_level}` (16 characters) to Bugsink when:
|
||||
|
||||
- No error pattern is detected in the log message
|
||||
- The `sentry_level` field is not properly initialized
|
||||
- Bugsink's `level` column has a `varchar(7)` constraint
|
||||
|
||||
Valid Sentry levels are: `fatal`, `error`, `warning`, `info`, `debug` (all <= 7 characters).
|
||||
|
||||
**Diagnosis:**
|
||||
|
||||
```bash
|
||||
# Check for recent level constraint errors in Bugsink
|
||||
# Via MCP:
|
||||
mcp__localerrors__list_issues({ project_id: 1, status: 'unresolved' })
|
||||
|
||||
# Or check Logstash logs for HTTP 500 responses
|
||||
podman exec flyer-crawler-dev cat /var/log/logstash/logstash.log | grep "500"
|
||||
```
|
||||
|
||||
**Solution:**
|
||||
|
||||
The fix requires updating the Logstash configuration (`docker/logstash/bugsink.conf`) to:
|
||||
|
||||
1. Validate `sentry_level` is not nil, empty, or contains placeholder text
|
||||
2. Set a default value of "error" for any error-tagged event without a valid level
|
||||
3. Normalize levels to lowercase
|
||||
|
||||
**Key filter block (Ruby):**
|
||||
|
||||
```ruby
|
||||
ruby {
|
||||
code => '
|
||||
level = event.get("sentry_level")
|
||||
# Check if level is invalid (nil, empty, contains placeholder, or invalid value)
|
||||
if level.nil? || level.to_s.empty? || level.to_s.include?("%{") || level.to_s.length > 7
|
||||
# Default to "error" for error-tagged events, "info" otherwise
|
||||
if event.get("tags")&.include?("error")
|
||||
event.set("sentry_level", "error")
|
||||
else
|
||||
event.set("sentry_level", "info")
|
||||
end
|
||||
else
|
||||
# Normalize to lowercase and validate
|
||||
normalized = level.to_s.downcase
|
||||
valid_levels = ["fatal", "error", "warning", "info", "debug"]
|
||||
unless valid_levels.include?(normalized)
|
||||
normalized = "error"
|
||||
end
|
||||
event.set("sentry_level", normalized)
|
||||
end
|
||||
'
|
||||
}
|
||||
```
|
||||
|
||||
**Verification:**
|
||||
|
||||
After applying the fix:
|
||||
|
||||
1. Restart Logstash: `podman exec flyer-crawler-dev systemctl restart logstash`
|
||||
2. Generate a test error and verify it appears in Bugsink without level errors
|
||||
3. Check no new "value too long" errors appear in the project
|
||||
|
||||
### CSRF Verification Failed
|
||||
|
||||
**Symptoms:** "CSRF verification failed. Request aborted." error when performing actions in Bugsink UI (resolving issues, changing settings, etc.)
|
||||
|
||||
**Root Cause:**
|
||||
|
||||
Django 4.0+ requires `CSRF_TRUSTED_ORIGINS` to be explicitly configured for HTTPS POST requests. The error occurs because:
|
||||
|
||||
1. Bugsink is accessed via `https://localhost:8443` (nginx HTTPS proxy)
|
||||
2. Django's CSRF protection validates the `Origin` header against `CSRF_TRUSTED_ORIGINS`
|
||||
3. Without explicit configuration, Django rejects POST requests from HTTPS origins
|
||||
|
||||
**Why localhost vs 127.0.0.1 Matters:**
|
||||
|
||||
- `localhost` and `127.0.0.1` are treated as DIFFERENT origins by browsers
|
||||
- If you access Bugsink via `https://localhost:8443`, Django must trust `https://localhost:8443`
|
||||
- If you access via `https://127.0.0.1:8443`, Django must trust `https://127.0.0.1:8443`
|
||||
- The fix includes BOTH to allow either access pattern
|
||||
|
||||
**Configuration (Already Applied):**
|
||||
|
||||
The Bugsink Django configuration in `Dockerfile.dev` includes:
|
||||
|
||||
```python
|
||||
# CSRF Trusted Origins (Django 4.0+ requires full origin for HTTPS POST requests)
|
||||
CSRF_TRUSTED_ORIGINS = [
|
||||
"https://localhost:8443",
|
||||
"https://127.0.0.1:8443",
|
||||
"http://localhost:8000",
|
||||
"http://127.0.0.1:8000",
|
||||
]
|
||||
|
||||
# HTTPS proxy support (nginx reverse proxy on port 8443)
|
||||
SECURE_PROXY_SSL_HEADER = ("HTTP_X_FORWARDED_PROTO", "https")
|
||||
```
|
||||
|
||||
**Verification:**
|
||||
|
||||
```bash
|
||||
# Verify CSRF_TRUSTED_ORIGINS is configured
|
||||
podman exec flyer-crawler-dev sh -c 'cat /opt/bugsink/conf/bugsink_conf.py | grep -A 6 CSRF_TRUSTED'
|
||||
|
||||
# Expected output:
|
||||
# CSRF_TRUSTED_ORIGINS = [
|
||||
# "https://localhost:8443",
|
||||
# "https://127.0.0.1:8443",
|
||||
# "http://localhost:8000",
|
||||
# "http://127.0.0.1:8000",
|
||||
# ]
|
||||
```
|
||||
|
||||
**If Issue Persists After Fix:**
|
||||
|
||||
1. **Rebuild the container image** (configuration is baked into the image):
|
||||
|
||||
```bash
|
||||
podman-compose -f compose.dev.yml down
|
||||
podman build -f Dockerfile.dev -t localhost/flyer-crawler-dev:latest .
|
||||
podman-compose -f compose.dev.yml up -d
|
||||
```
|
||||
|
||||
2. **Clear browser cookies** for localhost:8443
|
||||
|
||||
3. **Check nginx X-Forwarded-Proto header** - the nginx config must set this header for Django to recognize HTTPS:
|
||||
|
||||
```bash
|
||||
podman exec flyer-crawler-dev cat /etc/nginx/sites-available/bugsink | grep X-Forwarded-Proto
|
||||
# Should show: proxy_set_header X-Forwarded-Proto $scheme;
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Related Documentation
|
||||
|
||||
176
ecosystem.dev.config.cjs
Normal file
176
ecosystem.dev.config.cjs
Normal file
@@ -0,0 +1,176 @@
|
||||
// ecosystem.dev.config.cjs
|
||||
// ============================================================================
|
||||
// DEVELOPMENT PM2 CONFIGURATION
|
||||
// ============================================================================
|
||||
// This file mirrors the production ecosystem.config.cjs but with settings
|
||||
// appropriate for the development environment inside the dev container.
|
||||
//
|
||||
// Key differences from production:
|
||||
// - Single instance for API (not cluster mode) to allow debugger attachment
|
||||
// - tsx watch mode for hot reloading
|
||||
// - Development-specific environment variables
|
||||
// - Logs written to /var/log/pm2 for Logstash integration (ADR-050)
|
||||
//
|
||||
// Usage:
|
||||
// pm2 start ecosystem.dev.config.cjs
|
||||
// pm2 logs # View all logs
|
||||
// pm2 logs flyer-crawler-api-dev # View API logs only
|
||||
// pm2 restart all # Restart all processes
|
||||
// pm2 delete all # Stop all processes
|
||||
//
|
||||
// Related:
|
||||
// - ecosystem.config.cjs (production configuration)
|
||||
// - docs/adr/0014-linux-only-platform.md
|
||||
// - docs/adr/0050-postgresql-function-observability.md
|
||||
// ============================================================================
|
||||
|
||||
// --- Environment Variable Validation ---
|
||||
// In dev, we warn but don't fail - variables come from compose.dev.yml
|
||||
const requiredVars = ['DB_HOST', 'JWT_SECRET'];
|
||||
const missingVars = requiredVars.filter((key) => !process.env[key]);
|
||||
|
||||
if (missingVars.length > 0) {
|
||||
console.warn(
|
||||
'\n[ecosystem.dev.config.cjs] WARNING: The following environment variables are MISSING:',
|
||||
);
|
||||
missingVars.forEach((key) => console.warn(` - ${key}`));
|
||||
console.warn(
|
||||
'[ecosystem.dev.config.cjs] These should be set in compose.dev.yml or .env.local\n',
|
||||
);
|
||||
} else {
|
||||
console.log('[ecosystem.dev.config.cjs] Required environment variables are present.');
|
||||
}
|
||||
|
||||
// --- Shared Environment Variables ---
|
||||
// These come from compose.dev.yml environment section
|
||||
const sharedEnv = {
|
||||
// Timezone: PST (America/Los_Angeles) for consistent log timestamps
|
||||
TZ: process.env.TZ || 'America/Los_Angeles',
|
||||
NODE_ENV: 'development',
|
||||
DB_HOST: process.env.DB_HOST || 'postgres',
|
||||
DB_PORT: process.env.DB_PORT || '5432',
|
||||
DB_USER: process.env.DB_USER || 'postgres',
|
||||
DB_PASSWORD: process.env.DB_PASSWORD || 'postgres',
|
||||
DB_NAME: process.env.DB_NAME || 'flyer_crawler_dev',
|
||||
REDIS_URL: process.env.REDIS_URL || 'redis://redis:6379',
|
||||
REDIS_HOST: process.env.REDIS_HOST || 'redis',
|
||||
REDIS_PORT: process.env.REDIS_PORT || '6379',
|
||||
FRONTEND_URL: process.env.FRONTEND_URL || 'https://localhost',
|
||||
JWT_SECRET: process.env.JWT_SECRET || 'dev-jwt-secret-change-in-production',
|
||||
WORKER_LOCK_DURATION: process.env.WORKER_LOCK_DURATION || '120000',
|
||||
// Sentry/Bugsink (ADR-015)
|
||||
SENTRY_DSN: process.env.SENTRY_DSN,
|
||||
SENTRY_ENVIRONMENT: process.env.SENTRY_ENVIRONMENT || 'development',
|
||||
SENTRY_ENABLED: process.env.SENTRY_ENABLED || 'true',
|
||||
SENTRY_DEBUG: process.env.SENTRY_DEBUG || 'true',
|
||||
// Optional API keys (may not be set in dev)
|
||||
GEMINI_API_KEY: process.env.GEMINI_API_KEY,
|
||||
GOOGLE_MAPS_API_KEY: process.env.GOOGLE_MAPS_API_KEY,
|
||||
GOOGLE_CLIENT_ID: process.env.GOOGLE_CLIENT_ID,
|
||||
GOOGLE_CLIENT_SECRET: process.env.GOOGLE_CLIENT_SECRET,
|
||||
};
|
||||
|
||||
module.exports = {
|
||||
apps: [
|
||||
// =========================================================================
|
||||
// API SERVER (Development)
|
||||
// =========================================================================
|
||||
// Single instance with tsx watch for hot reloading.
|
||||
// Unlike production, we don't use cluster mode to allow:
|
||||
// - Debugger attachment
|
||||
// - Simpler log output
|
||||
// - Hot reloading via tsx watch
|
||||
{
|
||||
name: 'flyer-crawler-api-dev',
|
||||
script: './node_modules/.bin/tsx',
|
||||
args: 'watch server.ts',
|
||||
cwd: '/app',
|
||||
watch: false, // tsx watch handles this
|
||||
instances: 1, // Single instance for debugging
|
||||
exec_mode: 'fork', // Fork mode (not cluster) for debugging
|
||||
max_memory_restart: '1G', // More generous in dev
|
||||
kill_timeout: 5000,
|
||||
// PM2 log configuration (ADR-050 - Logstash integration)
|
||||
output: '/var/log/pm2/api-out.log',
|
||||
error: '/var/log/pm2/api-error.log',
|
||||
log_date_format: 'YYYY-MM-DD HH:mm:ss Z',
|
||||
merge_logs: true,
|
||||
// Restart configuration
|
||||
max_restarts: 10,
|
||||
exp_backoff_restart_delay: 500,
|
||||
min_uptime: '5s',
|
||||
// Environment
|
||||
env: {
|
||||
...sharedEnv,
|
||||
PORT: '3001',
|
||||
},
|
||||
},
|
||||
|
||||
// =========================================================================
|
||||
// BACKGROUND WORKER (Development)
|
||||
// =========================================================================
|
||||
// Processes background jobs (flyer processing, cleanup, etc.)
|
||||
// Runs as a separate process like in production.
|
||||
{
|
||||
name: 'flyer-crawler-worker-dev',
|
||||
script: './node_modules/.bin/tsx',
|
||||
args: 'watch src/services/worker.ts',
|
||||
cwd: '/app',
|
||||
watch: false, // tsx watch handles this
|
||||
instances: 1,
|
||||
exec_mode: 'fork',
|
||||
max_memory_restart: '1G',
|
||||
kill_timeout: 10000, // Workers need more time to finish jobs
|
||||
// PM2 log configuration (ADR-050)
|
||||
output: '/var/log/pm2/worker-out.log',
|
||||
error: '/var/log/pm2/worker-error.log',
|
||||
log_date_format: 'YYYY-MM-DD HH:mm:ss Z',
|
||||
merge_logs: true,
|
||||
// Restart configuration
|
||||
max_restarts: 10,
|
||||
exp_backoff_restart_delay: 500,
|
||||
min_uptime: '5s',
|
||||
// Environment
|
||||
env: {
|
||||
...sharedEnv,
|
||||
},
|
||||
},
|
||||
|
||||
// =========================================================================
|
||||
// VITE FRONTEND DEV SERVER (Development)
|
||||
// =========================================================================
|
||||
// Vite dev server for frontend hot module replacement.
|
||||
// Listens on 5173 and is proxied by nginx to 443.
|
||||
{
|
||||
name: 'flyer-crawler-vite-dev',
|
||||
script: './node_modules/.bin/vite',
|
||||
args: '--host',
|
||||
cwd: '/app',
|
||||
watch: false, // Vite handles its own watching
|
||||
instances: 1,
|
||||
exec_mode: 'fork',
|
||||
max_memory_restart: '2G', // Vite can use significant memory
|
||||
kill_timeout: 5000,
|
||||
// PM2 log configuration (ADR-050)
|
||||
output: '/var/log/pm2/vite-out.log',
|
||||
error: '/var/log/pm2/vite-error.log',
|
||||
log_date_format: 'YYYY-MM-DD HH:mm:ss Z',
|
||||
merge_logs: true,
|
||||
// Restart configuration
|
||||
max_restarts: 10,
|
||||
exp_backoff_restart_delay: 500,
|
||||
min_uptime: '5s',
|
||||
// Environment
|
||||
env: {
|
||||
// Timezone: PST (America/Los_Angeles) for consistent log timestamps
|
||||
TZ: process.env.TZ || 'America/Los_Angeles',
|
||||
NODE_ENV: 'development',
|
||||
// Vite-specific env vars (VITE_ prefix)
|
||||
VITE_SENTRY_DSN: process.env.VITE_SENTRY_DSN,
|
||||
VITE_SENTRY_ENVIRONMENT: process.env.VITE_SENTRY_ENVIRONMENT || 'development',
|
||||
VITE_SENTRY_ENABLED: process.env.VITE_SENTRY_ENABLED || 'true',
|
||||
VITE_SENTRY_DEBUG: process.env.VITE_SENTRY_DEBUG || 'true',
|
||||
},
|
||||
},
|
||||
],
|
||||
};
|
||||
4
package-lock.json
generated
4
package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "flyer-crawler",
|
||||
"version": "0.12.7",
|
||||
"version": "0.12.15",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "flyer-crawler",
|
||||
"version": "0.12.7",
|
||||
"version": "0.12.15",
|
||||
"dependencies": {
|
||||
"@bull-board/api": "^6.14.2",
|
||||
"@bull-board/express": "^6.14.2",
|
||||
|
||||
17
package.json
17
package.json
@@ -1,20 +1,25 @@
|
||||
{
|
||||
"name": "flyer-crawler",
|
||||
"private": true,
|
||||
"version": "0.12.7",
|
||||
"version": "0.12.15",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"dev": "concurrently \"npm:start:dev\" \"vite\"",
|
||||
"dev:container": "concurrently \"npm:start:dev\" \"vite --host\"",
|
||||
"dev:pm2": "pm2 start ecosystem.dev.config.cjs && pm2 logs",
|
||||
"dev:pm2:restart": "pm2 restart ecosystem.dev.config.cjs",
|
||||
"dev:pm2:stop": "pm2 delete all",
|
||||
"dev:pm2:status": "pm2 status",
|
||||
"dev:pm2:logs": "pm2 logs",
|
||||
"start": "npm run start:prod",
|
||||
"build": "vite build",
|
||||
"preview": "vite preview",
|
||||
"test": "node scripts/check-linux.js && cross-env NODE_ENV=test tsx ./node_modules/vitest/vitest.mjs run",
|
||||
"test-wsl": "cross-env NODE_ENV=test vitest run",
|
||||
"test": "node scripts/check-linux.js && cross-env NODE_ENV=test TZ= tsx ./node_modules/vitest/vitest.mjs run",
|
||||
"test-wsl": "cross-env NODE_ENV=test TZ= vitest run",
|
||||
"test:coverage": "npm run clean && npm run test:unit -- --coverage && npm run test:integration -- --coverage",
|
||||
"test:unit": "node scripts/check-linux.js && cross-env NODE_ENV=test tsx --max-old-space-size=8192 ./node_modules/vitest/vitest.mjs run --project unit -c vite.config.ts",
|
||||
"test:integration": "node scripts/check-linux.js && cross-env NODE_ENV=test tsx --max-old-space-size=8192 ./node_modules/vitest/vitest.mjs run --project integration -c vitest.config.integration.ts",
|
||||
"test:e2e": "node scripts/check-linux.js && cross-env NODE_ENV=test tsx --max-old-space-size=8192 ./node_modules/vitest/vitest.mjs run --config vitest.config.e2e.ts",
|
||||
"test:unit": "node scripts/check-linux.js && cross-env NODE_ENV=test TZ= tsx --max-old-space-size=8192 ./node_modules/vitest/vitest.mjs run --project unit -c vite.config.ts",
|
||||
"test:integration": "node scripts/check-linux.js && cross-env NODE_ENV=test TZ= tsx --max-old-space-size=8192 ./node_modules/vitest/vitest.mjs run --project integration -c vitest.config.integration.ts",
|
||||
"test:e2e": "node scripts/check-linux.js && cross-env NODE_ENV=test TZ= tsx --max-old-space-size=8192 ./node_modules/vitest/vitest.mjs run --config vitest.config.e2e.ts",
|
||||
"format": "prettier --write .",
|
||||
"lint": "eslint . --ext ts,tsx --report-unused-disable-directives --max-warnings 0",
|
||||
"type-check": "tsc --noEmit",
|
||||
|
||||
@@ -3,22 +3,48 @@
|
||||
# ============================================================================
|
||||
# Development Container Entrypoint
|
||||
# ============================================================================
|
||||
# This script starts the development server automatically when the container
|
||||
# starts, both with VS Code Dev Containers and with plain podman-compose.
|
||||
# This script starts all development services when the container starts,
|
||||
# both with VS Code Dev Containers and with plain podman-compose.
|
||||
#
|
||||
# Services started:
|
||||
# - Nginx (proxies Vite 5173 → 3000)
|
||||
# - Nginx (HTTPS proxy: Vite 5173 -> 443, Bugsink 8000 -> 8443, API 3001)
|
||||
# - Bugsink (error tracking) on port 8000
|
||||
# - Logstash (log aggregation)
|
||||
# - Node.js dev server (API + Frontend) on ports 3001 and 5173
|
||||
# - PM2 (process manager) running:
|
||||
# - flyer-crawler-api-dev: API server (port 3001)
|
||||
# - flyer-crawler-worker-dev: Background job worker
|
||||
# - flyer-crawler-vite-dev: Vite frontend dev server (port 5173)
|
||||
#
|
||||
# ADR-014: This architecture matches production (PM2 managing processes)
|
||||
# ADR-050: Logs are written to /var/log/pm2 for Logstash integration
|
||||
# ============================================================================
|
||||
|
||||
set -e
|
||||
|
||||
echo "🚀 Starting Flyer Crawler Dev Container..."
|
||||
echo "Starting Flyer Crawler Dev Container..."
|
||||
|
||||
# ============================================================================
|
||||
# Timezone Configuration
|
||||
# ============================================================================
|
||||
# Ensure TZ is set for consistent log timestamps across all services.
|
||||
# TZ should be set via compose.dev.yml environment (default: America/Los_Angeles)
|
||||
# ============================================================================
|
||||
if [ -n "$TZ" ]; then
|
||||
echo "Timezone configured: $TZ"
|
||||
# Link timezone data if available (for date command and other tools)
|
||||
if [ -f "/usr/share/zoneinfo/$TZ" ]; then
|
||||
ln -sf "/usr/share/zoneinfo/$TZ" /etc/localtime
|
||||
echo "$TZ" > /etc/timezone
|
||||
echo "System timezone set to: $(date +%Z) ($(date))"
|
||||
else
|
||||
echo "Warning: Timezone data not found for $TZ, using TZ environment variable only"
|
||||
fi
|
||||
else
|
||||
echo "Warning: TZ environment variable not set, using container default timezone"
|
||||
fi
|
||||
|
||||
# Configure Bugsink HTTPS (ADR-015)
|
||||
echo "🔒 Configuring Bugsink HTTPS..."
|
||||
echo "Configuring Bugsink HTTPS..."
|
||||
mkdir -p /etc/bugsink/ssl
|
||||
if [ ! -f "/etc/bugsink/ssl/localhost+2.pem" ]; then
|
||||
cd /etc/bugsink/ssl && mkcert localhost 127.0.0.1 ::1 > /dev/null 2>&1
|
||||
@@ -53,52 +79,92 @@ NGINX_EOF
|
||||
ln -sf /etc/nginx/sites-available/bugsink /etc/nginx/sites-enabled/bugsink
|
||||
|
||||
# Copy the dev nginx config from mounted volume to nginx sites-available
|
||||
echo "📋 Copying nginx dev config..."
|
||||
echo "Copying nginx dev config..."
|
||||
cp /app/docker/nginx/dev.conf /etc/nginx/sites-available/default
|
||||
|
||||
# Ensure PM2 log directory exists with correct permissions
|
||||
echo "Setting up PM2 log directory..."
|
||||
mkdir -p /var/log/pm2
|
||||
chmod 755 /var/log/pm2
|
||||
|
||||
# Start nginx in background (if installed)
|
||||
if command -v nginx &> /dev/null; then
|
||||
echo "🌐 Starting nginx (HTTPS: Vite 5173 → 443, Bugsink 8000 → 8443, API 3001 → /api/)..."
|
||||
echo "Starting nginx (HTTPS: Vite 5173 -> 443, Bugsink 8000 -> 8443, API 3001 -> /api/)..."
|
||||
nginx &
|
||||
fi
|
||||
|
||||
# Start Bugsink in background
|
||||
echo "📊 Starting Bugsink error tracking..."
|
||||
echo "Starting Bugsink error tracking..."
|
||||
/usr/local/bin/start-bugsink.sh > /var/log/bugsink/server.log 2>&1 &
|
||||
|
||||
# Wait for Bugsink to initialize, then run snappea migrations
|
||||
echo "⏳ Waiting for Bugsink to initialize..."
|
||||
echo "Waiting for Bugsink to initialize..."
|
||||
sleep 5
|
||||
echo "🔧 Running Bugsink snappea database migrations..."
|
||||
echo "Running Bugsink snappea database migrations..."
|
||||
cd /opt/bugsink/conf && \
|
||||
export DATABASE_URL="postgresql://bugsink:bugsink_dev_password@postgres:5432/bugsink" && \
|
||||
export SECRET_KEY="dev-bugsink-secret-key-minimum-50-characters-for-security" && \
|
||||
/opt/bugsink/bin/bugsink-manage migrate --database=snappea > /dev/null 2>&1
|
||||
|
||||
# Start Snappea task worker
|
||||
echo "🔄 Starting Snappea task worker..."
|
||||
echo "Starting Snappea task worker..."
|
||||
cd /opt/bugsink/conf && \
|
||||
export DATABASE_URL="postgresql://bugsink:bugsink_dev_password@postgres:5432/bugsink" && \
|
||||
export SECRET_KEY="dev-bugsink-secret-key-minimum-50-characters-for-security" && \
|
||||
/opt/bugsink/bin/bugsink-manage runsnappea > /var/log/bugsink/snappea.log 2>&1 &
|
||||
|
||||
# Start Logstash in background
|
||||
echo "📝 Starting Logstash..."
|
||||
echo "Starting Logstash..."
|
||||
/usr/share/logstash/bin/logstash -f /etc/logstash/conf.d/bugsink.conf > /var/log/logstash/logstash.log 2>&1 &
|
||||
|
||||
# Wait a few seconds for services to initialize
|
||||
# Wait for services to initialize
|
||||
sleep 3
|
||||
|
||||
# Change to app directory
|
||||
cd /app
|
||||
|
||||
# Start development server
|
||||
echo "💻 Starting development server..."
|
||||
echo " - Frontend: https://localhost (nginx HTTPS → Vite on 5173)"
|
||||
echo " - Backend API: http://localhost:3001"
|
||||
echo " - Bugsink: https://localhost:8443 (nginx HTTPS → Bugsink on 8000)"
|
||||
echo " - Note: Accept the self-signed certificate warnings in your browser"
|
||||
# ============================================================================
|
||||
# Start PM2 (ADR-014: Production-like process management)
|
||||
# ============================================================================
|
||||
# PM2 manages all Node.js processes:
|
||||
# - API server (tsx watch server.ts)
|
||||
# - Background worker (tsx watch src/services/worker.ts)
|
||||
# - Vite dev server (vite --host)
|
||||
#
|
||||
# Logs are written to /var/log/pm2 for Logstash integration (ADR-050)
|
||||
# ============================================================================
|
||||
|
||||
echo "Starting PM2 with development configuration..."
|
||||
echo " - API Server: http://localhost:3001 (proxied via nginx)"
|
||||
echo " - Frontend: https://localhost (nginx HTTPS -> Vite on 5173)"
|
||||
echo " - Bugsink: https://localhost:8443 (nginx HTTPS -> Bugsink on 8000)"
|
||||
echo " - PM2 Logs: /var/log/pm2/*.log"
|
||||
echo ""
|
||||
echo "Note: Accept the self-signed certificate warnings in your browser"
|
||||
echo ""
|
||||
|
||||
# Run npm dev server (this will block and keep container alive)
|
||||
exec npm run dev:container
|
||||
# Delete any existing PM2 processes (clean start)
|
||||
pm2 delete all 2>/dev/null || true
|
||||
|
||||
# Start PM2 with the dev ecosystem config
|
||||
pm2 start /app/ecosystem.dev.config.cjs
|
||||
|
||||
# Show PM2 status
|
||||
echo ""
|
||||
echo "--- PM2 Process Status ---"
|
||||
pm2 status
|
||||
echo "-------------------------"
|
||||
echo ""
|
||||
echo "PM2 Commands:"
|
||||
echo " pm2 logs # View all logs (tail -f style)"
|
||||
echo " pm2 logs api # View API logs only"
|
||||
echo " pm2 restart all # Restart all processes"
|
||||
echo " pm2 status # Show process status"
|
||||
echo ""
|
||||
|
||||
# Keep the container running by tailing PM2 logs
|
||||
# This ensures:
|
||||
# 1. Container stays alive
|
||||
# 2. Logs are visible in docker logs / podman logs
|
||||
# 3. PM2 processes continue running
|
||||
exec pm2 logs --raw
|
||||
|
||||
@@ -1,21 +1,31 @@
|
||||
-- Update flyer URLs from example.com to environment-specific URLs
|
||||
--
|
||||
-- This script should be run after determining the correct base URL for the environment:
|
||||
-- - Dev container: http://127.0.0.1
|
||||
-- - Dev container: https://localhost (NOT 127.0.0.1 - avoids SSL mixed-origin issues)
|
||||
-- - Test environment: https://flyer-crawler-test.projectium.com
|
||||
-- - Production: https://flyer-crawler.projectium.com
|
||||
|
||||
-- For dev container (run in dev database):
|
||||
-- Uses 'localhost' instead of '127.0.0.1' to match how users access the site.
|
||||
-- This avoids ERR_CERT_AUTHORITY_INVALID errors when images are loaded from a
|
||||
-- different origin than the page.
|
||||
UPDATE flyers
|
||||
SET
|
||||
image_url = REPLACE(image_url, 'example.com', '127.0.0.1'),
|
||||
image_url = REPLACE(image_url, 'https://', 'http://'),
|
||||
icon_url = REPLACE(icon_url, 'example.com', '127.0.0.1'),
|
||||
icon_url = REPLACE(icon_url, 'https://', 'http://')
|
||||
image_url = REPLACE(image_url, 'example.com', 'localhost'),
|
||||
icon_url = REPLACE(icon_url, 'example.com', 'localhost')
|
||||
WHERE
|
||||
image_url LIKE '%example.com%'
|
||||
OR icon_url LIKE '%example.com%';
|
||||
|
||||
-- Also fix any existing 127.0.0.1 URLs to use localhost:
|
||||
UPDATE flyers
|
||||
SET
|
||||
image_url = REPLACE(image_url, '127.0.0.1', 'localhost'),
|
||||
icon_url = REPLACE(icon_url, '127.0.0.1', 'localhost')
|
||||
WHERE
|
||||
image_url LIKE '%127.0.0.1%'
|
||||
OR icon_url LIKE '%127.0.0.1%';
|
||||
|
||||
-- For test environment (run in test database):
|
||||
-- UPDATE flyers
|
||||
-- SET
|
||||
@@ -37,7 +47,7 @@ WHERE
|
||||
-- Verify the changes:
|
||||
SELECT flyer_id, image_url, icon_url
|
||||
FROM flyers
|
||||
WHERE image_url LIKE '%127.0.0.1%'
|
||||
OR icon_url LIKE '%127.0.0.1%'
|
||||
WHERE image_url LIKE '%localhost%'
|
||||
OR icon_url LIKE '%localhost%'
|
||||
OR image_url LIKE '%flyer-crawler%'
|
||||
OR icon_url LIKE '%flyer-crawler%';
|
||||
|
||||
@@ -112,6 +112,15 @@ const googleSchema = z.object({
|
||||
clientSecret: z.string().optional(),
|
||||
});
|
||||
|
||||
/**
|
||||
* GitHub OAuth configuration schema.
|
||||
* Used for GitHub social login functionality.
|
||||
*/
|
||||
const githubSchema = z.object({
|
||||
clientId: z.string().optional(),
|
||||
clientSecret: z.string().optional(),
|
||||
});
|
||||
|
||||
/**
|
||||
* Worker concurrency configuration schema.
|
||||
*/
|
||||
@@ -157,6 +166,7 @@ const envSchema = z.object({
|
||||
ai: aiSchema,
|
||||
upc: upcSchema,
|
||||
google: googleSchema,
|
||||
github: githubSchema,
|
||||
worker: workerSchema,
|
||||
server: serverSchema,
|
||||
sentry: sentrySchema,
|
||||
@@ -209,6 +219,10 @@ function loadEnvVars(): unknown {
|
||||
clientId: process.env.GOOGLE_CLIENT_ID,
|
||||
clientSecret: process.env.GOOGLE_CLIENT_SECRET,
|
||||
},
|
||||
github: {
|
||||
clientId: process.env.GITHUB_CLIENT_ID,
|
||||
clientSecret: process.env.GITHUB_CLIENT_SECRET,
|
||||
},
|
||||
worker: {
|
||||
concurrency: process.env.WORKER_CONCURRENCY,
|
||||
lockDuration: process.env.WORKER_LOCK_DURATION,
|
||||
@@ -367,3 +381,13 @@ export const isUpcItemDbConfigured = !!config.upc.upcItemDbApiKey;
|
||||
* Returns true if Barcode Lookup API is configured.
|
||||
*/
|
||||
export const isBarcodeLookupConfigured = !!config.upc.barcodeLookupApiKey;
|
||||
|
||||
/**
|
||||
* Returns true if Google OAuth is configured (both client ID and secret present).
|
||||
*/
|
||||
export const isGoogleOAuthConfigured = !!config.google.clientId && !!config.google.clientSecret;
|
||||
|
||||
/**
|
||||
* Returns true if GitHub OAuth is configured (both client ID and secret present).
|
||||
*/
|
||||
export const isGithubOAuthConfigured = !!config.github.clientId && !!config.github.clientSecret;
|
||||
|
||||
@@ -12,8 +12,8 @@ import path from 'node:path';
|
||||
import bcrypt from 'bcrypt';
|
||||
import { logger } from '../services/logger.server';
|
||||
|
||||
// Determine base URL based on environment
|
||||
// Dev container: https://127.0.0.1
|
||||
// Determine base URL for flyer images based on environment
|
||||
// Dev container: https://127.0.0.1 (NGINX now accepts both localhost and 127.0.0.1)
|
||||
// Test: https://flyer-crawler-test.projectium.com
|
||||
// Production: https://flyer-crawler.projectium.com
|
||||
const BASE_URL =
|
||||
|
||||
@@ -159,7 +159,7 @@ export const MainLayout: React.FC<MainLayoutProps> = ({
|
||||
<>
|
||||
<PriceHistoryChart />
|
||||
<Leaderboard />
|
||||
{userProfile && (
|
||||
{userProfile?.role === 'admin' && (
|
||||
<ActivityLog userProfile={userProfile} onLogClick={handleActivityLogClick} />
|
||||
)}
|
||||
</>
|
||||
|
||||
@@ -27,9 +27,13 @@ const defaultProps = {
|
||||
};
|
||||
|
||||
const setupSuccessMocks = () => {
|
||||
// The API returns {success, data: {userprofile, token}}, and the mutation extracts .data
|
||||
const mockAuthResponse = {
|
||||
userprofile: createMockUserProfile({ user: { user_id: '123', email: 'test@example.com' } }),
|
||||
token: 'mock-token',
|
||||
success: true,
|
||||
data: {
|
||||
userprofile: createMockUserProfile({ user: { user_id: '123', email: 'test@example.com' } }),
|
||||
token: 'mock-token',
|
||||
},
|
||||
};
|
||||
(mockedApiClient.loginUser as Mock).mockResolvedValue(
|
||||
new Response(JSON.stringify(mockAuthResponse)),
|
||||
|
||||
@@ -82,7 +82,11 @@ const defaultAuthenticatedProps = {
|
||||
};
|
||||
|
||||
const setupSuccessMocks = () => {
|
||||
const mockAuthResponse = { userprofile: authenticatedProfile, token: 'mock-token' };
|
||||
// The API returns {success, data: {userprofile, token}}, and the mutation extracts .data
|
||||
const mockAuthResponse = {
|
||||
success: true,
|
||||
data: { userprofile: authenticatedProfile, token: 'mock-token' },
|
||||
};
|
||||
(mockedApiClient.loginUser as Mock).mockResolvedValue(
|
||||
new Response(JSON.stringify(mockAuthResponse)),
|
||||
);
|
||||
|
||||
@@ -7,6 +7,7 @@ import * as apiClient from '../services/apiClient';
|
||||
import { useAuthProfileQuery, AUTH_PROFILE_QUERY_KEY } from '../hooks/queries/useAuthProfileQuery';
|
||||
import { getToken, setToken, removeToken } from '../services/tokenStorage';
|
||||
import { logger } from '../services/logger.client';
|
||||
import { setUser as setSentryUser } from '../services/sentry.client';
|
||||
|
||||
/**
|
||||
* AuthProvider component that manages authentication state.
|
||||
@@ -40,6 +41,12 @@ export const AuthProvider: React.FC<{ children: ReactNode }> = ({ children }) =>
|
||||
logger.info('[AuthProvider] Profile received from query, setting state to AUTHENTICATED.');
|
||||
setUserProfile(fetchedProfile);
|
||||
setAuthStatus('AUTHENTICATED');
|
||||
// Set Sentry user context for error tracking (ADR-015)
|
||||
setSentryUser({
|
||||
id: fetchedProfile.user.user_id,
|
||||
email: fetchedProfile.user.email,
|
||||
username: fetchedProfile.full_name || fetchedProfile.user.email,
|
||||
});
|
||||
} else if (token && isError) {
|
||||
logger.warn('[AuthProvider] Token was present but validation failed. Signing out.');
|
||||
removeToken();
|
||||
@@ -66,6 +73,8 @@ export const AuthProvider: React.FC<{ children: ReactNode }> = ({ children }) =>
|
||||
setAuthStatus('SIGNED_OUT');
|
||||
// Clear the auth profile cache on logout
|
||||
queryClient.removeQueries({ queryKey: AUTH_PROFILE_QUERY_KEY });
|
||||
// Clear Sentry user context (ADR-015)
|
||||
setSentryUser(null);
|
||||
}, [queryClient]);
|
||||
|
||||
const login = useCallback(
|
||||
@@ -82,6 +91,12 @@ export const AuthProvider: React.FC<{ children: ReactNode }> = ({ children }) =>
|
||||
setAuthStatus('AUTHENTICATED');
|
||||
// Update the query cache with the provided profile
|
||||
queryClient.setQueryData(AUTH_PROFILE_QUERY_KEY, profileData);
|
||||
// Set Sentry user context for error tracking (ADR-015)
|
||||
setSentryUser({
|
||||
id: profileData.user.user_id,
|
||||
email: profileData.user.email,
|
||||
username: profileData.full_name || profileData.user.email,
|
||||
});
|
||||
logger.info('[AuthProvider-Login] Login successful. State set to AUTHENTICATED.', {
|
||||
user: profileData.user,
|
||||
});
|
||||
@@ -106,6 +121,12 @@ export const AuthProvider: React.FC<{ children: ReactNode }> = ({ children }) =>
|
||||
setAuthStatus('AUTHENTICATED');
|
||||
// Update the query cache with the fetched profile
|
||||
queryClient.setQueryData(AUTH_PROFILE_QUERY_KEY, fetchedProfileData);
|
||||
// Set Sentry user context for error tracking (ADR-015)
|
||||
setSentryUser({
|
||||
id: fetchedProfileData.user.user_id,
|
||||
email: fetchedProfileData.user.email,
|
||||
username: fetchedProfileData.full_name || fetchedProfileData.user.email,
|
||||
});
|
||||
logger.info('[AuthProvider-Login] Profile fetch successful. State set to AUTHENTICATED.');
|
||||
} catch (e) {
|
||||
const errorMessage = e instanceof Error ? e.message : String(e);
|
||||
|
||||
@@ -619,4 +619,240 @@ describe('Health Routes (/api/health)', () => {
|
||||
expect(response.body.error.details.database.message).toBe('Database connection failed');
|
||||
});
|
||||
});
|
||||
|
||||
// =============================================================================
|
||||
// QUEUE HEALTH MONITORING (ADR-053)
|
||||
// =============================================================================
|
||||
|
||||
describe('GET /queues', () => {
|
||||
// Mock the queues module
|
||||
beforeEach(async () => {
|
||||
vi.resetModules();
|
||||
// Re-import after mocks are set up
|
||||
});
|
||||
|
||||
it('should return 200 OK with queue metrics and worker heartbeats when all healthy', async () => {
|
||||
// Arrange: Mock queue getJobCounts() and Redis heartbeats
|
||||
const mockQueues = await import('../services/queues.server');
|
||||
const mockQueue = {
|
||||
getJobCounts: vi.fn().mockResolvedValue({
|
||||
waiting: 5,
|
||||
active: 2,
|
||||
failed: 1,
|
||||
delayed: 0,
|
||||
}),
|
||||
};
|
||||
|
||||
// Mock all queues
|
||||
vi.spyOn(mockQueues, 'flyerQueue', 'get').mockReturnValue(mockQueue as never);
|
||||
vi.spyOn(mockQueues, 'emailQueue', 'get').mockReturnValue(mockQueue as never);
|
||||
vi.spyOn(mockQueues, 'analyticsQueue', 'get').mockReturnValue(mockQueue as never);
|
||||
vi.spyOn(mockQueues, 'weeklyAnalyticsQueue', 'get').mockReturnValue(mockQueue as never);
|
||||
vi.spyOn(mockQueues, 'cleanupQueue', 'get').mockReturnValue(mockQueue as never);
|
||||
vi.spyOn(mockQueues, 'tokenCleanupQueue', 'get').mockReturnValue(mockQueue as never);
|
||||
vi.spyOn(mockQueues, 'receiptQueue', 'get').mockReturnValue(mockQueue as never);
|
||||
vi.spyOn(mockQueues, 'expiryAlertQueue', 'get').mockReturnValue(mockQueue as never);
|
||||
vi.spyOn(mockQueues, 'barcodeQueue', 'get').mockReturnValue(mockQueue as never);
|
||||
|
||||
// Mock Redis heartbeat responses (all healthy, last seen < 60s ago)
|
||||
const recentTimestamp = new Date(Date.now() - 10000).toISOString(); // 10 seconds ago
|
||||
const heartbeatValue = JSON.stringify({
|
||||
timestamp: recentTimestamp,
|
||||
pid: 1234,
|
||||
host: 'test-host',
|
||||
});
|
||||
|
||||
mockedRedisConnection.get = vi.fn().mockResolvedValue(heartbeatValue);
|
||||
|
||||
// Act
|
||||
const response = await supertest(app).get('/api/health/queues');
|
||||
|
||||
// Assert
|
||||
expect(response.status).toBe(200);
|
||||
expect(response.body.success).toBe(true);
|
||||
expect(response.body.data.status).toBe('healthy');
|
||||
expect(response.body.data.queues).toBeDefined();
|
||||
expect(response.body.data.workers).toBeDefined();
|
||||
|
||||
// Verify queue metrics structure
|
||||
expect(response.body.data.queues['flyer-processing']).toEqual({
|
||||
waiting: 5,
|
||||
active: 2,
|
||||
failed: 1,
|
||||
delayed: 0,
|
||||
});
|
||||
|
||||
// Verify worker heartbeat structure
|
||||
expect(response.body.data.workers['flyer-processing']).toEqual({
|
||||
alive: true,
|
||||
lastSeen: recentTimestamp,
|
||||
pid: 1234,
|
||||
host: 'test-host',
|
||||
});
|
||||
});
|
||||
|
||||
it('should return 503 when a queue is unavailable', async () => {
|
||||
// Arrange: Mock one queue to fail
|
||||
const mockQueues = await import('../services/queues.server');
|
||||
const healthyQueue = {
|
||||
getJobCounts: vi.fn().mockResolvedValue({
|
||||
waiting: 0,
|
||||
active: 0,
|
||||
failed: 0,
|
||||
delayed: 0,
|
||||
}),
|
||||
};
|
||||
const failingQueue = {
|
||||
getJobCounts: vi.fn().mockRejectedValue(new Error('Redis connection lost')),
|
||||
};
|
||||
|
||||
vi.spyOn(mockQueues, 'flyerQueue', 'get').mockReturnValue(failingQueue as never);
|
||||
vi.spyOn(mockQueues, 'emailQueue', 'get').mockReturnValue(healthyQueue as never);
|
||||
vi.spyOn(mockQueues, 'analyticsQueue', 'get').mockReturnValue(healthyQueue as never);
|
||||
vi.spyOn(mockQueues, 'weeklyAnalyticsQueue', 'get').mockReturnValue(healthyQueue as never);
|
||||
vi.spyOn(mockQueues, 'cleanupQueue', 'get').mockReturnValue(healthyQueue as never);
|
||||
vi.spyOn(mockQueues, 'tokenCleanupQueue', 'get').mockReturnValue(healthyQueue as never);
|
||||
vi.spyOn(mockQueues, 'receiptQueue', 'get').mockReturnValue(healthyQueue as never);
|
||||
vi.spyOn(mockQueues, 'expiryAlertQueue', 'get').mockReturnValue(healthyQueue as never);
|
||||
vi.spyOn(mockQueues, 'barcodeQueue', 'get').mockReturnValue(healthyQueue as never);
|
||||
|
||||
mockedRedisConnection.get = vi.fn().mockResolvedValue(null);
|
||||
|
||||
// Act
|
||||
const response = await supertest(app).get('/api/health/queues');
|
||||
|
||||
// Assert
|
||||
expect(response.status).toBe(503);
|
||||
expect(response.body.success).toBe(false);
|
||||
expect(response.body.error.message).toBe('One or more queues or workers unavailable');
|
||||
expect(response.body.error.details.status).toBe('unhealthy');
|
||||
expect(response.body.error.details.queues['flyer-processing']).toEqual({
|
||||
error: 'Redis connection lost',
|
||||
});
|
||||
});
|
||||
|
||||
it('should return 503 when a worker heartbeat is stale', async () => {
|
||||
// Arrange: Mock queues as healthy but one worker heartbeat as stale
|
||||
const mockQueues = await import('../services/queues.server');
|
||||
const mockQueue = {
|
||||
getJobCounts: vi.fn().mockResolvedValue({
|
||||
waiting: 0,
|
||||
active: 0,
|
||||
failed: 0,
|
||||
delayed: 0,
|
||||
}),
|
||||
};
|
||||
|
||||
vi.spyOn(mockQueues, 'flyerQueue', 'get').mockReturnValue(mockQueue as never);
|
||||
vi.spyOn(mockQueues, 'emailQueue', 'get').mockReturnValue(mockQueue as never);
|
||||
vi.spyOn(mockQueues, 'analyticsQueue', 'get').mockReturnValue(mockQueue as never);
|
||||
vi.spyOn(mockQueues, 'weeklyAnalyticsQueue', 'get').mockReturnValue(mockQueue as never);
|
||||
vi.spyOn(mockQueues, 'cleanupQueue', 'get').mockReturnValue(mockQueue as never);
|
||||
vi.spyOn(mockQueues, 'tokenCleanupQueue', 'get').mockReturnValue(mockQueue as never);
|
||||
vi.spyOn(mockQueues, 'receiptQueue', 'get').mockReturnValue(mockQueue as never);
|
||||
vi.spyOn(mockQueues, 'expiryAlertQueue', 'get').mockReturnValue(mockQueue as never);
|
||||
vi.spyOn(mockQueues, 'barcodeQueue', 'get').mockReturnValue(mockQueue as never);
|
||||
|
||||
// Mock heartbeat - one worker is stale (> 60s ago)
|
||||
const staleTimestamp = new Date(Date.now() - 120000).toISOString(); // 120 seconds ago
|
||||
const staleHeartbeat = JSON.stringify({
|
||||
timestamp: staleTimestamp,
|
||||
pid: 1234,
|
||||
host: 'test-host',
|
||||
});
|
||||
|
||||
// First call returns stale heartbeat for flyer-processing, rest return null (no heartbeat)
|
||||
let callCount = 0;
|
||||
mockedRedisConnection.get = vi.fn().mockImplementation(() => {
|
||||
callCount++;
|
||||
return Promise.resolve(callCount === 1 ? staleHeartbeat : null);
|
||||
});
|
||||
|
||||
// Act
|
||||
const response = await supertest(app).get('/api/health/queues');
|
||||
|
||||
// Assert
|
||||
expect(response.status).toBe(503);
|
||||
expect(response.body.success).toBe(false);
|
||||
expect(response.body.error.details.status).toBe('unhealthy');
|
||||
expect(response.body.error.details.workers['flyer-processing']).toEqual({ alive: false });
|
||||
});
|
||||
|
||||
it('should return 503 when worker heartbeat is missing', async () => {
|
||||
// Arrange: Mock queues as healthy but no worker heartbeats in Redis
|
||||
const mockQueues = await import('../services/queues.server');
|
||||
const mockQueue = {
|
||||
getJobCounts: vi.fn().mockResolvedValue({
|
||||
waiting: 0,
|
||||
active: 0,
|
||||
failed: 0,
|
||||
delayed: 0,
|
||||
}),
|
||||
};
|
||||
|
||||
vi.spyOn(mockQueues, 'flyerQueue', 'get').mockReturnValue(mockQueue as never);
|
||||
vi.spyOn(mockQueues, 'emailQueue', 'get').mockReturnValue(mockQueue as never);
|
||||
vi.spyOn(mockQueues, 'analyticsQueue', 'get').mockReturnValue(mockQueue as never);
|
||||
vi.spyOn(mockQueues, 'weeklyAnalyticsQueue', 'get').mockReturnValue(mockQueue as never);
|
||||
vi.spyOn(mockQueues, 'cleanupQueue', 'get').mockReturnValue(mockQueue as never);
|
||||
vi.spyOn(mockQueues, 'tokenCleanupQueue', 'get').mockReturnValue(mockQueue as never);
|
||||
vi.spyOn(mockQueues, 'receiptQueue', 'get').mockReturnValue(mockQueue as never);
|
||||
vi.spyOn(mockQueues, 'expiryAlertQueue', 'get').mockReturnValue(mockQueue as never);
|
||||
vi.spyOn(mockQueues, 'barcodeQueue', 'get').mockReturnValue(mockQueue as never);
|
||||
|
||||
// Mock Redis to return null (no heartbeat found)
|
||||
mockedRedisConnection.get = vi.fn().mockResolvedValue(null);
|
||||
|
||||
// Act
|
||||
const response = await supertest(app).get('/api/health/queues');
|
||||
|
||||
// Assert
|
||||
expect(response.status).toBe(503);
|
||||
expect(response.body.success).toBe(false);
|
||||
expect(response.body.error.details.status).toBe('unhealthy');
|
||||
expect(response.body.error.details.workers['flyer-processing']).toEqual({ alive: false });
|
||||
});
|
||||
|
||||
it('should handle Redis connection errors gracefully', async () => {
|
||||
// Arrange: Mock queues to succeed but Redis get() to fail
|
||||
const mockQueues = await import('../services/queues.server');
|
||||
const mockQueue = {
|
||||
getJobCounts: vi.fn().mockResolvedValue({
|
||||
waiting: 0,
|
||||
active: 0,
|
||||
failed: 0,
|
||||
delayed: 0,
|
||||
}),
|
||||
};
|
||||
|
||||
vi.spyOn(mockQueues, 'flyerQueue', 'get').mockReturnValue(mockQueue as never);
|
||||
vi.spyOn(mockQueues, 'emailQueue', 'get').mockReturnValue(mockQueue as never);
|
||||
vi.spyOn(mockQueues, 'analyticsQueue', 'get').mockReturnValue(mockQueue as never);
|
||||
vi.spyOn(mockQueues, 'weeklyAnalyticsQueue', 'get').mockReturnValue(mockQueue as never);
|
||||
vi.spyOn(mockQueues, 'cleanupQueue', 'get').mockReturnValue(mockQueue as never);
|
||||
vi.spyOn(mockQueues, 'tokenCleanupQueue', 'get').mockReturnValue(mockQueue as never);
|
||||
vi.spyOn(mockQueues, 'receiptQueue', 'get').mockReturnValue(mockQueue as never);
|
||||
vi.spyOn(mockQueues, 'expiryAlertQueue', 'get').mockReturnValue(mockQueue as never);
|
||||
vi.spyOn(mockQueues, 'barcodeQueue', 'get').mockReturnValue(mockQueue as never);
|
||||
|
||||
// Mock Redis get() to throw error
|
||||
mockedRedisConnection.get = vi.fn().mockRejectedValue(new Error('Redis connection lost'));
|
||||
|
||||
// Act
|
||||
const response = await supertest(app).get('/api/health/queues');
|
||||
|
||||
// Assert: Should still return queue metrics but mark workers as unhealthy
|
||||
expect(response.status).toBe(503);
|
||||
expect(response.body.error.details.queues['flyer-processing']).toEqual({
|
||||
waiting: 0,
|
||||
active: 0,
|
||||
failed: 0,
|
||||
delayed: 0,
|
||||
});
|
||||
expect(response.body.error.details.workers['flyer-processing']).toEqual({
|
||||
alive: false,
|
||||
error: 'Redis connection lost',
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -15,6 +15,17 @@ import fs from 'node:fs/promises';
|
||||
import { getSimpleWeekAndYear } from '../utils/dateUtils';
|
||||
import { validateRequest } from '../middleware/validation.middleware';
|
||||
import { sendSuccess, sendError, ErrorCode } from '../utils/apiResponse';
|
||||
import {
|
||||
flyerQueue,
|
||||
emailQueue,
|
||||
analyticsQueue,
|
||||
weeklyAnalyticsQueue,
|
||||
cleanupQueue,
|
||||
tokenCleanupQueue,
|
||||
receiptQueue,
|
||||
expiryAlertQueue,
|
||||
barcodeQueue,
|
||||
} from '../services/queues.server';
|
||||
|
||||
const router = Router();
|
||||
|
||||
@@ -442,4 +453,224 @@ router.get(
|
||||
},
|
||||
);
|
||||
|
||||
// =============================================================================
|
||||
// QUEUE HEALTH MONITORING (ADR-053)
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* @openapi
|
||||
* /health/queues:
|
||||
* get:
|
||||
* summary: Queue health and metrics with worker heartbeats
|
||||
* description: |
|
||||
* Returns job counts for all BullMQ queues and worker heartbeat status.
|
||||
* Use this endpoint to monitor queue depths and detect stuck/frozen workers.
|
||||
* Implements ADR-053: Worker Health Checks and Stalled Job Monitoring.
|
||||
* tags:
|
||||
* - Health
|
||||
* responses:
|
||||
* 200:
|
||||
* description: Queue metrics and worker heartbeats retrieved successfully
|
||||
* content:
|
||||
* application/json:
|
||||
* schema:
|
||||
* type: object
|
||||
* properties:
|
||||
* success:
|
||||
* type: boolean
|
||||
* example: true
|
||||
* data:
|
||||
* type: object
|
||||
* properties:
|
||||
* status:
|
||||
* type: string
|
||||
* enum: [healthy, unhealthy]
|
||||
* timestamp:
|
||||
* type: string
|
||||
* format: date-time
|
||||
* queues:
|
||||
* type: object
|
||||
* additionalProperties:
|
||||
* type: object
|
||||
* properties:
|
||||
* waiting:
|
||||
* type: integer
|
||||
* active:
|
||||
* type: integer
|
||||
* failed:
|
||||
* type: integer
|
||||
* delayed:
|
||||
* type: integer
|
||||
* workers:
|
||||
* type: object
|
||||
* additionalProperties:
|
||||
* type: object
|
||||
* properties:
|
||||
* alive:
|
||||
* type: boolean
|
||||
* lastSeen:
|
||||
* type: string
|
||||
* format: date-time
|
||||
* pid:
|
||||
* type: integer
|
||||
* host:
|
||||
* type: string
|
||||
* 503:
|
||||
* description: Redis unavailable or workers not responding
|
||||
* content:
|
||||
* application/json:
|
||||
* schema:
|
||||
* $ref: '#/components/schemas/ErrorResponse'
|
||||
*/
|
||||
router.get(
|
||||
'/queues',
|
||||
validateRequest(emptySchema),
|
||||
async (req: Request, res: Response, next: NextFunction) => {
|
||||
try {
|
||||
// Define all queues to monitor
|
||||
const queues = [
|
||||
{ name: 'flyer-processing', queue: flyerQueue },
|
||||
{ name: 'email-sending', queue: emailQueue },
|
||||
{ name: 'analytics-reporting', queue: analyticsQueue },
|
||||
{ name: 'weekly-analytics-reporting', queue: weeklyAnalyticsQueue },
|
||||
{ name: 'file-cleanup', queue: cleanupQueue },
|
||||
{ name: 'token-cleanup', queue: tokenCleanupQueue },
|
||||
{ name: 'receipt-processing', queue: receiptQueue },
|
||||
{ name: 'expiry-alerts', queue: expiryAlertQueue },
|
||||
{ name: 'barcode-detection', queue: barcodeQueue },
|
||||
];
|
||||
|
||||
// Fetch job counts for all queues in parallel
|
||||
const queueMetrics = await Promise.all(
|
||||
queues.map(async ({ name, queue }) => {
|
||||
try {
|
||||
const counts = await queue.getJobCounts();
|
||||
return {
|
||||
name,
|
||||
counts: {
|
||||
waiting: counts.waiting || 0,
|
||||
active: counts.active || 0,
|
||||
failed: counts.failed || 0,
|
||||
delayed: counts.delayed || 0,
|
||||
},
|
||||
};
|
||||
} catch (error) {
|
||||
// If individual queue fails, return error state
|
||||
return {
|
||||
name,
|
||||
error: error instanceof Error ? error.message : 'Unknown error',
|
||||
};
|
||||
}
|
||||
}),
|
||||
);
|
||||
|
||||
// Fetch worker heartbeats in parallel
|
||||
const workerNames = queues.map((q) => q.name);
|
||||
const workerHeartbeats = await Promise.all(
|
||||
workerNames.map(async (name) => {
|
||||
try {
|
||||
const key = `worker:heartbeat:${name}`;
|
||||
const value = await redisConnection.get(key);
|
||||
|
||||
if (!value) {
|
||||
return { name, alive: false };
|
||||
}
|
||||
|
||||
const heartbeat = JSON.parse(value) as {
|
||||
timestamp: string;
|
||||
pid: number;
|
||||
host: string;
|
||||
};
|
||||
const lastSeenMs = new Date(heartbeat.timestamp).getTime();
|
||||
const nowMs = Date.now();
|
||||
const ageSeconds = (nowMs - lastSeenMs) / 1000;
|
||||
|
||||
// Consider alive if last heartbeat < 60 seconds ago
|
||||
const alive = ageSeconds < 60;
|
||||
|
||||
return {
|
||||
name,
|
||||
alive,
|
||||
lastSeen: heartbeat.timestamp,
|
||||
pid: heartbeat.pid,
|
||||
host: heartbeat.host,
|
||||
};
|
||||
} catch (error) {
|
||||
// If heartbeat check fails, mark as unknown
|
||||
return {
|
||||
name,
|
||||
alive: false,
|
||||
error: error instanceof Error ? error.message : 'Unknown error',
|
||||
};
|
||||
}
|
||||
}),
|
||||
);
|
||||
|
||||
// Build response objects
|
||||
const queuesData: Record<
|
||||
string,
|
||||
{ waiting: number; active: number; failed: number; delayed: number } | { error: string }
|
||||
> = {};
|
||||
const workersData: Record<
|
||||
string,
|
||||
| { alive: boolean; lastSeen?: string; pid?: number; host?: string }
|
||||
| { alive: boolean; error: string }
|
||||
> = {};
|
||||
let hasErrors = false;
|
||||
|
||||
for (const metric of queueMetrics) {
|
||||
if ('error' in metric) {
|
||||
queuesData[metric.name] = { error: metric.error };
|
||||
hasErrors = true;
|
||||
} else {
|
||||
queuesData[metric.name] = metric.counts;
|
||||
}
|
||||
}
|
||||
|
||||
for (const heartbeat of workerHeartbeats) {
|
||||
if ('error' in heartbeat) {
|
||||
workersData[heartbeat.name] = { alive: false, error: heartbeat.error };
|
||||
} else if (!heartbeat.alive) {
|
||||
workersData[heartbeat.name] = { alive: false };
|
||||
hasErrors = true;
|
||||
} else {
|
||||
workersData[heartbeat.name] = {
|
||||
alive: heartbeat.alive,
|
||||
lastSeen: heartbeat.lastSeen,
|
||||
pid: heartbeat.pid,
|
||||
host: heartbeat.host,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
const response = {
|
||||
status: hasErrors ? ('unhealthy' as const) : ('healthy' as const),
|
||||
timestamp: new Date().toISOString(),
|
||||
queues: queuesData,
|
||||
workers: workersData,
|
||||
};
|
||||
|
||||
if (hasErrors) {
|
||||
return sendError(
|
||||
res,
|
||||
ErrorCode.SERVICE_UNAVAILABLE,
|
||||
'One or more queues or workers unavailable',
|
||||
503,
|
||||
response,
|
||||
);
|
||||
}
|
||||
|
||||
return sendSuccess(res, response);
|
||||
} catch (error: unknown) {
|
||||
// Redis connection error or other unexpected failure
|
||||
if (error instanceof Error) {
|
||||
return next(error);
|
||||
}
|
||||
const message =
|
||||
(error as { message?: string })?.message || 'Failed to retrieve queue metrics';
|
||||
return next(new Error(message));
|
||||
}
|
||||
},
|
||||
);
|
||||
|
||||
export default router;
|
||||
|
||||
@@ -132,7 +132,8 @@ describe('API Client', () => {
|
||||
.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
status: 200,
|
||||
json: () => Promise.resolve({ token: 'new-refreshed-token' }),
|
||||
// The API returns {success, data: {token}} wrapper format
|
||||
json: () => Promise.resolve({ success: true, data: { token: 'new-refreshed-token' } }),
|
||||
} as Response)
|
||||
.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
@@ -218,7 +219,7 @@ describe('API Client', () => {
|
||||
localStorage.setItem('authToken', 'expired-token');
|
||||
// Mock the global fetch to return a sequence of responses:
|
||||
// 1. 401 Unauthorized (initial API call)
|
||||
// 2. 200 OK (token refresh call)
|
||||
// 2. 200 OK (token refresh call) - uses API wrapper format {success, data: {token}}
|
||||
// 3. 200 OK (retry of the initial API call)
|
||||
vi.mocked(global.fetch)
|
||||
.mockResolvedValueOnce({
|
||||
@@ -229,7 +230,8 @@ describe('API Client', () => {
|
||||
.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
status: 200,
|
||||
json: () => Promise.resolve({ token: 'new-refreshed-token' }),
|
||||
// The API returns {success, data: {token}} wrapper format
|
||||
json: () => Promise.resolve({ success: true, data: { token: 'new-refreshed-token' } }),
|
||||
} as Response)
|
||||
.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
|
||||
@@ -21,6 +21,8 @@ const isProduction = process.env.NODE_ENV === 'production';
|
||||
const isTest = process.env.NODE_ENV === 'test';
|
||||
const isStaging = process.env.NODE_ENV === 'staging';
|
||||
const isDevelopment = !isProduction && !isTest && !isStaging;
|
||||
// PM2_HOME is set when running under PM2 process manager
|
||||
const isRunningUnderPM2 = !!process.env.PM2_HOME;
|
||||
|
||||
// Determine log directory based on environment
|
||||
// In production/test, use the application directory's logs folder
|
||||
@@ -73,8 +75,9 @@ const redactConfig = {
|
||||
const createLogger = (): pino.Logger => {
|
||||
const logDir = ensureLogDirectory();
|
||||
|
||||
// Development: Use pino-pretty for human-readable output
|
||||
if (isDevelopment) {
|
||||
// Development WITHOUT PM2: Use pino-pretty for human-readable console output
|
||||
// This is for local development outside of containers (rare case now)
|
||||
if (isDevelopment && !isRunningUnderPM2) {
|
||||
return pino({
|
||||
level: 'debug',
|
||||
transport: {
|
||||
@@ -89,6 +92,15 @@ const createLogger = (): pino.Logger => {
|
||||
});
|
||||
}
|
||||
|
||||
// Development WITH PM2 (ADR-014): Output JSON for Logstash integration
|
||||
// PM2 captures stdout to /var/log/pm2/*.log files which Logstash parses
|
||||
if (isDevelopment && isRunningUnderPM2) {
|
||||
return pino({
|
||||
level: 'debug',
|
||||
redact: redactConfig,
|
||||
});
|
||||
}
|
||||
|
||||
// Production/Test: Write to both stdout and file
|
||||
if (logDir) {
|
||||
const logFilePath = path.join(logDir, 'app.log');
|
||||
|
||||
@@ -62,12 +62,33 @@ vi.mock('./logger.server', () => ({
|
||||
vi.mock('bullmq', () => ({
|
||||
Worker: mocks.MockWorker,
|
||||
Queue: vi.fn(function () {
|
||||
return { add: vi.fn() };
|
||||
return { add: vi.fn(), close: vi.fn().mockResolvedValue(undefined) };
|
||||
}),
|
||||
// Add UnrecoverableError to the mock so it can be used in tests
|
||||
UnrecoverableError: class UnrecoverableError extends Error {},
|
||||
}));
|
||||
|
||||
// Mock redis.server to prevent real Redis connection attempts
|
||||
vi.mock('./redis.server', () => ({
|
||||
connection: {
|
||||
on: vi.fn(),
|
||||
quit: vi.fn().mockResolvedValue(undefined),
|
||||
},
|
||||
}));
|
||||
|
||||
// Mock queues.server to provide mock queue instances
|
||||
vi.mock('./queues.server', () => ({
|
||||
flyerQueue: { add: vi.fn(), close: vi.fn().mockResolvedValue(undefined) },
|
||||
emailQueue: { add: vi.fn(), close: vi.fn().mockResolvedValue(undefined) },
|
||||
analyticsQueue: { add: vi.fn(), close: vi.fn().mockResolvedValue(undefined) },
|
||||
cleanupQueue: { add: vi.fn(), close: vi.fn().mockResolvedValue(undefined) },
|
||||
weeklyAnalyticsQueue: { add: vi.fn(), close: vi.fn().mockResolvedValue(undefined) },
|
||||
tokenCleanupQueue: { add: vi.fn(), close: vi.fn().mockResolvedValue(undefined) },
|
||||
receiptQueue: { add: vi.fn(), close: vi.fn().mockResolvedValue(undefined) },
|
||||
expiryAlertQueue: { add: vi.fn(), close: vi.fn().mockResolvedValue(undefined) },
|
||||
barcodeQueue: { add: vi.fn(), close: vi.fn().mockResolvedValue(undefined) },
|
||||
}));
|
||||
|
||||
// Mock flyerProcessingService.server as flyerWorker and cleanupWorker depend on it
|
||||
vi.mock('./flyerProcessingService.server', () => {
|
||||
// Mock the constructor to return an object with the mocked methods
|
||||
@@ -88,6 +109,67 @@ vi.mock('./flyerDataTransformer', () => ({
|
||||
},
|
||||
}));
|
||||
|
||||
// Mock aiService.server to prevent initialization issues
|
||||
vi.mock('./aiService.server', () => ({
|
||||
aiService: {
|
||||
extractAndValidateData: vi.fn(),
|
||||
},
|
||||
}));
|
||||
|
||||
// Mock db/index.db to prevent database connections
|
||||
vi.mock('./db/index.db', () => ({
|
||||
personalizationRepo: {},
|
||||
}));
|
||||
|
||||
// Mock flyerAiProcessor.server
|
||||
vi.mock('./flyerAiProcessor.server', () => ({
|
||||
FlyerAiProcessor: vi.fn().mockImplementation(function () {
|
||||
return { processFlyer: vi.fn() };
|
||||
}),
|
||||
}));
|
||||
|
||||
// Mock flyerPersistenceService.server
|
||||
vi.mock('./flyerPersistenceService.server', () => ({
|
||||
FlyerPersistenceService: vi.fn().mockImplementation(function () {
|
||||
return { persistFlyerData: vi.fn() };
|
||||
}),
|
||||
}));
|
||||
|
||||
// Mock db/connection.db to prevent database connections
|
||||
vi.mock('./db/connection.db', () => ({
|
||||
withTransaction: vi.fn(),
|
||||
}));
|
||||
|
||||
// Mock receiptService.server
|
||||
vi.mock('./receiptService.server', () => ({
|
||||
processReceiptJob: vi.fn().mockResolvedValue(undefined),
|
||||
}));
|
||||
|
||||
// Mock expiryService.server
|
||||
vi.mock('./expiryService.server', () => ({
|
||||
processExpiryAlertJob: vi.fn().mockResolvedValue(undefined),
|
||||
}));
|
||||
|
||||
// Mock barcodeService.server
|
||||
vi.mock('./barcodeService.server', () => ({
|
||||
processBarcodeDetectionJob: vi.fn().mockResolvedValue(undefined),
|
||||
}));
|
||||
|
||||
// Mock flyerFileHandler.server
|
||||
vi.mock('./flyerFileHandler.server', () => ({
|
||||
FlyerFileHandler: vi.fn().mockImplementation(function () {
|
||||
return { handleFile: vi.fn() };
|
||||
}),
|
||||
}));
|
||||
|
||||
// Mock workerOptions config
|
||||
vi.mock('../config/workerOptions', () => ({
|
||||
defaultWorkerOptions: {
|
||||
lockDuration: 30000,
|
||||
stalledInterval: 30000,
|
||||
},
|
||||
}));
|
||||
|
||||
// Helper to create a mock BullMQ Job object
|
||||
const createMockJob = <T>(data: T): Job<T> => {
|
||||
return {
|
||||
|
||||
@@ -3,6 +3,7 @@ import { Worker, Job } from 'bullmq';
|
||||
import fsPromises from 'node:fs/promises';
|
||||
import { exec } from 'child_process';
|
||||
import { promisify } from 'util';
|
||||
import os from 'os';
|
||||
|
||||
import { logger } from './logger.server';
|
||||
import { connection } from './redis.server';
|
||||
@@ -91,6 +92,45 @@ const createWorkerProcessor = <T, R>(processor: (job: Job<T>) => Promise<R>) =>
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
* Updates the worker heartbeat in Redis.
|
||||
* Stores timestamp, PID, and hostname to detect frozen/hung workers.
|
||||
* TTL is 90s, so if heartbeat isn't updated for 90s, the key expires.
|
||||
* Implements ADR-053: Worker Health Checks.
|
||||
*/
|
||||
const updateWorkerHeartbeat = async (workerName: string) => {
|
||||
const key = `worker:heartbeat:${workerName}`;
|
||||
const value = JSON.stringify({
|
||||
timestamp: new Date().toISOString(),
|
||||
pid: process.pid,
|
||||
host: os.hostname(),
|
||||
});
|
||||
|
||||
try {
|
||||
await connection.set(key, value, 'EX', 90);
|
||||
} catch (error) {
|
||||
logger.error({ err: error, workerName }, `Failed to update heartbeat for worker ${workerName}`);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Starts periodic heartbeat updates for a worker.
|
||||
* Updates every 30 seconds with 90s TTL.
|
||||
*/
|
||||
const startWorkerHeartbeat = (worker: Worker) => {
|
||||
// Initial heartbeat
|
||||
updateWorkerHeartbeat(worker.name);
|
||||
|
||||
// Periodic heartbeat updates
|
||||
const heartbeatInterval = setInterval(() => {
|
||||
updateWorkerHeartbeat(worker.name);
|
||||
}, 30000); // 30 seconds
|
||||
|
||||
// Store interval on worker for cleanup
|
||||
(worker as unknown as { heartbeatInterval?: NodeJS.Timeout }).heartbeatInterval =
|
||||
heartbeatInterval;
|
||||
};
|
||||
|
||||
const attachWorkerEventListeners = (worker: Worker) => {
|
||||
worker.on('completed', (job: Job, returnValue: unknown) => {
|
||||
logger.info({ returnValue }, `[${worker.name}] Job ${job.id} completed successfully.`);
|
||||
@@ -102,6 +142,9 @@ const attachWorkerEventListeners = (worker: Worker) => {
|
||||
`[${worker.name}] Job ${job?.id} has ultimately failed after all attempts.`,
|
||||
);
|
||||
});
|
||||
|
||||
// Start heartbeat monitoring for this worker
|
||||
startWorkerHeartbeat(worker);
|
||||
};
|
||||
|
||||
export const flyerWorker = new Worker<FlyerJobData>(
|
||||
@@ -219,17 +262,28 @@ const SHUTDOWN_TIMEOUT = 30000; // 30 seconds
|
||||
* without exiting the process.
|
||||
*/
|
||||
export const closeWorkers = async () => {
|
||||
await Promise.all([
|
||||
flyerWorker.close(),
|
||||
emailWorker.close(),
|
||||
analyticsWorker.close(),
|
||||
cleanupWorker.close(),
|
||||
weeklyAnalyticsWorker.close(),
|
||||
tokenCleanupWorker.close(),
|
||||
receiptWorker.close(),
|
||||
expiryAlertWorker.close(),
|
||||
barcodeWorker.close(),
|
||||
]);
|
||||
// Clear heartbeat intervals
|
||||
const workers = [
|
||||
flyerWorker,
|
||||
emailWorker,
|
||||
analyticsWorker,
|
||||
cleanupWorker,
|
||||
weeklyAnalyticsWorker,
|
||||
tokenCleanupWorker,
|
||||
receiptWorker,
|
||||
expiryAlertWorker,
|
||||
barcodeWorker,
|
||||
];
|
||||
|
||||
workers.forEach((worker) => {
|
||||
const interval = (worker as unknown as { heartbeatInterval?: NodeJS.Timeout })
|
||||
.heartbeatInterval;
|
||||
if (interval) {
|
||||
clearInterval(interval);
|
||||
}
|
||||
});
|
||||
|
||||
await Promise.all(workers.map((w) => w.close()));
|
||||
};
|
||||
|
||||
export const gracefulShutdown = async (signal: string) => {
|
||||
|
||||
@@ -15,7 +15,7 @@ export const getTestBaseUrl = (): string => {
|
||||
/**
|
||||
* Get the flyer base URL for test data based on environment.
|
||||
* Uses FLYER_BASE_URL if set, otherwise detects environment:
|
||||
* - Dev container: http://127.0.0.1
|
||||
* - Dev container: https://localhost (NOT 127.0.0.1 - avoids SSL mixed-origin issues)
|
||||
* - Test: https://flyer-crawler-test.projectium.com
|
||||
* - Production: https://flyer-crawler.projectium.com
|
||||
* - Default: https://example.com (for unit tests)
|
||||
@@ -26,8 +26,10 @@ export const getFlyerBaseUrl = (): string => {
|
||||
}
|
||||
|
||||
// Check if we're in dev container (DB_HOST=postgres is typical indicator)
|
||||
// Use 'localhost' instead of '127.0.0.1' to match the hostname users access
|
||||
// This avoids SSL certificate mixed-origin issues in browsers
|
||||
if (process.env.DB_HOST === 'postgres' || process.env.DB_HOST === '127.0.0.1') {
|
||||
return 'http://127.0.0.1';
|
||||
return 'https://localhost';
|
||||
}
|
||||
|
||||
if (process.env.NODE_ENV === 'production') {
|
||||
|
||||
Reference in New Issue
Block a user