Files
flyer-crawler.projectium.com/Dockerfile.dev

322 lines
12 KiB
Docker

# Dockerfile.dev
# ============================================================================
# DEVELOPMENT DOCKERFILE
# ============================================================================
# This Dockerfile creates a development environment that matches production
# as closely as possible while providing the tools needed for development.
#
# Base: Ubuntu 22.04 (LTS) - matches production server
# Node: v20.x (LTS) - matches production
# Includes: PostgreSQL client, Redis CLI, build tools, Bugsink, Logstash
# ============================================================================
FROM ubuntu:22.04
# Set environment variables to non-interactive to avoid prompts during installation
ENV DEBIAN_FRONTEND=noninteractive
# ============================================================================
# Install System Dependencies
# ============================================================================
# - curl: for downloading Node.js setup script and health checks
# - git: for version control operations
# - build-essential: for compiling native Node.js modules (node-gyp)
# - python3, python3-pip, python3-venv: for Bugsink
# - postgresql-client: for psql CLI (database initialization)
# - redis-tools: for redis-cli (health checks)
# - gnupg, apt-transport-https: for Elastic APT repository (Logstash)
# - openjdk-17-jre-headless: required by Logstash
RUN apt-get update && apt-get install -y \
curl \
git \
build-essential \
python3 \
python3-pip \
python3-venv \
postgresql-client \
redis-tools \
gnupg \
apt-transport-https \
openjdk-17-jre-headless \
&& rm -rf /var/lib/apt/lists/*
# ============================================================================
# Install Node.js 20.x (LTS)
# ============================================================================
RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
&& apt-get install -y nodejs
# ============================================================================
# Install Logstash (Elastic APT Repository)
# ============================================================================
# ADR-015: Log aggregation for Pino and Redis logs → Bugsink
RUN curl -fsSL https://artifacts.elastic.co/GPG-KEY-elasticsearch | gpg --dearmor -o /usr/share/keyrings/elastic-keyring.gpg \
&& echo "deb [signed-by=/usr/share/keyrings/elastic-keyring.gpg] https://artifacts.elastic.co/packages/8.x/apt stable main" | tee /etc/apt/sources.list.d/elastic-8.x.list \
&& apt-get update \
&& apt-get install -y logstash \
&& rm -rf /var/lib/apt/lists/*
# ============================================================================
# Install Bugsink (Python Package)
# ============================================================================
# ADR-015: Self-hosted Sentry-compatible error tracking
# Create a virtual environment for Bugsink to avoid conflicts
RUN python3 -m venv /opt/bugsink \
&& /opt/bugsink/bin/pip install --upgrade pip \
&& /opt/bugsink/bin/pip install bugsink gunicorn psycopg2-binary
# Create Bugsink directories and configuration
RUN mkdir -p /var/log/bugsink /var/lib/bugsink /opt/bugsink/conf
# Create Bugsink configuration file (Django settings module)
# This file is imported by bugsink-manage via DJANGO_SETTINGS_MODULE
# Based on bugsink/conf_templates/docker.py.template but customized for our setup
RUN echo 'import os\n\
from urllib.parse import urlparse\n\
\n\
from bugsink.settings.default import *\n\
from bugsink.settings.default import DATABASES, SILENCED_SYSTEM_CHECKS\n\
from bugsink.conf_utils import deduce_allowed_hosts, deduce_script_name\n\
\n\
IS_DOCKER = True\n\
\n\
# Security settings\n\
SECRET_KEY = os.getenv("SECRET_KEY")\n\
DEBUG = os.getenv("DEBUG", "False").lower() in ("true", "1", "yes")\n\
\n\
# Silence cookie security warnings for dev (no HTTPS)\n\
SILENCED_SYSTEM_CHECKS += ["security.W012", "security.W016"]\n\
\n\
# Database configuration from DATABASE_URL environment variable\n\
if os.getenv("DATABASE_URL"):\n\
DATABASE_URL = os.getenv("DATABASE_URL")\n\
parsed = urlparse(DATABASE_URL)\n\
\n\
if parsed.scheme in ["postgres", "postgresql"]:\n\
DATABASES["default"] = {\n\
"ENGINE": "django.db.backends.postgresql",\n\
"NAME": parsed.path.lstrip("/"),\n\
"USER": parsed.username,\n\
"PASSWORD": parsed.password,\n\
"HOST": parsed.hostname,\n\
"PORT": parsed.port or "5432",\n\
}\n\
\n\
# Snappea (background task runner) settings\n\
SNAPPEA = {\n\
"TASK_ALWAYS_EAGER": False,\n\
"WORKAHOLIC": True,\n\
"NUM_WORKERS": 2,\n\
"PID_FILE": None,\n\
}\n\
DATABASES["snappea"]["NAME"] = "/tmp/snappea.sqlite3"\n\
\n\
# Site settings\n\
_PORT = os.getenv("PORT", "8000")\n\
BUGSINK = {\n\
"BASE_URL": os.getenv("BASE_URL", f"http://localhost:{_PORT}"),\n\
"SITE_TITLE": os.getenv("SITE_TITLE", "Flyer Crawler Error Tracking"),\n\
"SINGLE_USER": os.getenv("SINGLE_USER", "True").lower() in ("true", "1", "yes"),\n\
"SINGLE_TEAM": os.getenv("SINGLE_TEAM", "True").lower() in ("true", "1", "yes"),\n\
"PHONEHOME": False,\n\
}\n\
\n\
ALLOWED_HOSTS = deduce_allowed_hosts(BUGSINK["BASE_URL"])\n\
\n\
# Console email backend for dev\n\
EMAIL_BACKEND = "bugsink.email_backends.QuietConsoleEmailBackend"\n\
' > /opt/bugsink/conf/bugsink_conf.py
# Create Bugsink startup script
# Uses DATABASE_URL environment variable (standard Docker approach per docs)
RUN echo '#!/bin/bash\n\
set -e\n\
\n\
# Build DATABASE_URL from individual env vars for flexibility\n\
export DATABASE_URL="postgresql://${BUGSINK_DB_USER:-bugsink}:${BUGSINK_DB_PASSWORD:-bugsink_dev_password}@${BUGSINK_DB_HOST:-postgres}:${BUGSINK_DB_PORT:-5432}/${BUGSINK_DB_NAME:-bugsink}"\n\
# SECRET_KEY is required by Bugsink/Django\n\
export SECRET_KEY="${BUGSINK_SECRET_KEY:-dev-bugsink-secret-key-minimum-50-characters-for-security}"\n\
\n\
# Create superuser if not exists (for dev convenience)\n\
if [ -n "$BUGSINK_ADMIN_EMAIL" ] && [ -n "$BUGSINK_ADMIN_PASSWORD" ]; then\n\
export CREATE_SUPERUSER="${BUGSINK_ADMIN_EMAIL}:${BUGSINK_ADMIN_PASSWORD}"\n\
fi\n\
\n\
# Wait for PostgreSQL to be ready\n\
until pg_isready -h ${BUGSINK_DB_HOST:-postgres} -p ${BUGSINK_DB_PORT:-5432} -U ${BUGSINK_DB_USER:-bugsink}; do\n\
echo "Waiting for PostgreSQL..."\n\
sleep 2\n\
done\n\
\n\
echo "PostgreSQL is ready. Starting Bugsink..."\n\
echo "DATABASE_URL: postgresql://${BUGSINK_DB_USER}:***@${BUGSINK_DB_HOST}:${BUGSINK_DB_PORT}/${BUGSINK_DB_NAME}"\n\
\n\
# Change to config directory so bugsink_conf.py can be found\n\
cd /opt/bugsink/conf\n\
\n\
# Run migrations\n\
echo "Running database migrations..."\n\
/opt/bugsink/bin/bugsink-manage migrate --noinput\n\
\n\
# Create superuser if CREATE_SUPERUSER is set (format: email:password)\n\
if [ -n "$CREATE_SUPERUSER" ]; then\n\
IFS=":" read -r ADMIN_EMAIL ADMIN_PASS <<< "$CREATE_SUPERUSER"\n\
/opt/bugsink/bin/bugsink-manage shell -c "\n\
from django.contrib.auth import get_user_model\n\
User = get_user_model()\n\
if not User.objects.filter(email='"'"'$ADMIN_EMAIL'"'"').exists():\n\
User.objects.create_superuser('"'"'$ADMIN_EMAIL'"'"', '"'"'$ADMIN_PASS'"'"')\n\
print('"'"'Superuser created'"'"')\n\
else:\n\
print('"'"'Superuser already exists'"'"')\n\
" || true\n\
fi\n\
\n\
# Start Bugsink with Gunicorn\n\
echo "Starting Gunicorn on port ${BUGSINK_PORT:-8000}..."\n\
exec /opt/bugsink/bin/gunicorn \\\n\
--bind 0.0.0.0:${BUGSINK_PORT:-8000} \\\n\
--workers ${BUGSINK_WORKERS:-2} \\\n\
--access-logfile - \\\n\
--error-logfile - \\\n\
bugsink.wsgi:application\n\
' > /usr/local/bin/start-bugsink.sh \
&& chmod +x /usr/local/bin/start-bugsink.sh
# ============================================================================
# Create Logstash Pipeline Configuration
# ============================================================================
# ADR-015: Pino and Redis logs → Bugsink
RUN mkdir -p /etc/logstash/conf.d /app/logs
RUN echo 'input {\n\
# Pino application logs\n\
file {\n\
path => "/app/logs/*.log"\n\
codec => json\n\
type => "pino"\n\
tags => ["app"]\n\
start_position => "beginning"\n\
sincedb_path => "/var/lib/logstash/sincedb_pino"\n\
}\n\
\n\
# Redis logs\n\
file {\n\
path => "/var/log/redis/*.log"\n\
type => "redis"\n\
tags => ["redis"]\n\
start_position => "beginning"\n\
sincedb_path => "/var/lib/logstash/sincedb_redis"\n\
}\n\
\n\
# PostgreSQL function logs (ADR-050)\n\
file {\n\
path => "/var/log/postgresql/*.log"\n\
type => "postgres"\n\
tags => ["postgres", "database"]\n\
start_position => "beginning"\n\
sincedb_path => "/var/lib/logstash/sincedb_postgres"\n\
}\n\
}\n\
\n\
filter {\n\
# Pino error detection (level 50 = error, 60 = fatal)\n\
if [type] == "pino" and [level] >= 50 {\n\
mutate { add_tag => ["error"] }\n\
}\n\
\n\
# Redis error detection\n\
if [type] == "redis" {\n\
grok {\n\
match => { "message" => "%%{POSINT:pid}:%%{WORD:role} %%{MONTHDAY} %%{MONTH} %%{TIME} %%{WORD:loglevel} %%{GREEDYDATA:redis_message}" }\n\
}\n\
if [loglevel] in ["WARNING", "ERROR"] {\n\
mutate { add_tag => ["error"] }\n\
}\n\
}\n\
\n\
# PostgreSQL function log parsing (ADR-050)\n\
if [type] == "postgres" {\n\
# Extract timestamp and process ID from PostgreSQL log prefix\n\
# Format: "2026-01-18 10:30:00 PST [12345] user@database "\n\
grok {\n\
match => { "message" => "%%{TIMESTAMP_ISO8601:pg_timestamp} \\\\[%%{POSINT:pg_pid}\\\\] %%{USERNAME:pg_user}@%%{WORD:pg_database} %%{GREEDYDATA:pg_message}" }\n\
}\n\
\n\
# Check if this is a structured JSON log from fn_log()\n\
# fn_log() emits JSON like: {"timestamp":"...","level":"WARNING","source":"postgresql","function":"award_achievement",...}\n\
if [pg_message] =~ /^\\{.*"source":"postgresql".*\\}$/ {\n\
json {\n\
source => "pg_message"\n\
target => "fn_log"\n\
}\n\
\n\
# Mark as error if level is WARNING or ERROR\n\
if [fn_log][level] in ["WARNING", "ERROR"] {\n\
mutate { add_tag => ["error", "db_function"] }\n\
}\n\
}\n\
\n\
# Also catch native PostgreSQL errors\n\
if [pg_message] =~ /^ERROR:/ or [pg_message] =~ /^FATAL:/ {\n\
mutate { add_tag => ["error", "postgres_native"] }\n\
}\n\
}\n\
}\n\
\n\
output {\n\
if "error" in [tags] {\n\
http {\n\
url => "http://localhost:8000/api/store/"\n\
http_method => "post"\n\
format => "json"\n\
}\n\
}\n\
\n\
# Debug output (comment out in production)\n\
stdout { codec => rubydebug }\n\
}\n\
' > /etc/logstash/conf.d/bugsink.conf
# Create Logstash sincedb directory
RUN mkdir -p /var/lib/logstash && chown -R logstash:logstash /var/lib/logstash
# ============================================================================
# Set Working Directory
# ============================================================================
WORKDIR /app
# ============================================================================
# Environment Configuration
# ============================================================================
# Default environment variables for development
ENV NODE_ENV=development
# Increase Node.js memory limit for large builds
ENV NODE_OPTIONS='--max-old-space-size=8192'
# Bugsink defaults (ADR-015)
ENV BUGSINK_DB_HOST=postgres
ENV BUGSINK_DB_PORT=5432
ENV BUGSINK_DB_NAME=bugsink
ENV BUGSINK_DB_USER=bugsink
ENV BUGSINK_DB_PASSWORD=bugsink_dev_password
ENV BUGSINK_PORT=8000
ENV BUGSINK_BASE_URL=http://localhost:8000
ENV BUGSINK_ADMIN_EMAIL=admin@localhost
ENV BUGSINK_ADMIN_PASSWORD=admin
# ============================================================================
# Expose Ports
# ============================================================================
# 3000 - Vite frontend
# 3001 - Express backend
# 8000 - Bugsink error tracking
EXPOSE 3000 3001 8000
# ============================================================================
# Default Command
# ============================================================================
# Keep container running so VS Code can attach.
# Actual commands (npm run dev, etc.) are run via devcontainer.json.
CMD ["bash"]