- Updated TypeScript configuration to include test files. - Modified Vitest configuration to include test files from both src and tests directories. - Added ADR-063 documenting the decision and implementation of PM2 namespaces. - Created implementation report detailing the migration to PM2 namespaces. - Developed migration script for transitioning to namespaced PM2 processes. - Updated ecosystem configuration files to define namespaces for production, test, and development environments. - Enhanced workflow files to include namespace flags in all PM2 commands. - Verified migration with comprehensive tests ensuring all processes are correctly namespaced.
259 lines
12 KiB
YAML
259 lines
12 KiB
YAML
# .gitea/workflows/pm2-diagnostics.yml
|
|
#
|
|
# Comprehensive PM2 diagnostics to identify crash causes and problematic projects
|
|
name: PM2 Diagnostics
|
|
|
|
on:
|
|
workflow_dispatch:
|
|
inputs:
|
|
capture_interval:
|
|
description: 'Seconds between PM2 state captures (default: 5)'
|
|
required: false
|
|
default: '5'
|
|
duration:
|
|
description: 'Total monitoring duration in seconds (default: 60)'
|
|
required: false
|
|
default: '60'
|
|
|
|
jobs:
|
|
pm2-diagnostics:
|
|
runs-on: projectium.com
|
|
|
|
steps:
|
|
- name: PM2 Current State Snapshot
|
|
run: |
|
|
echo "========================================="
|
|
echo "PM2 CURRENT STATE SNAPSHOT"
|
|
echo "========================================="
|
|
echo ""
|
|
echo "=== Production Namespace (flyer-crawler-prod) ==="
|
|
echo "--- PM2 List (Human Readable) ---"
|
|
pm2 list --namespace flyer-crawler-prod
|
|
echo ""
|
|
echo "--- PM2 List (JSON) ---"
|
|
pm2 jlist --namespace flyer-crawler-prod > /tmp/pm2-state-initial-prod.json
|
|
cat /tmp/pm2-state-initial-prod.json | jq '.'
|
|
echo ""
|
|
echo "=== Test Namespace (flyer-crawler-test) ==="
|
|
echo "--- PM2 List (Human Readable) ---"
|
|
pm2 list --namespace flyer-crawler-test
|
|
echo ""
|
|
echo "--- PM2 List (JSON) ---"
|
|
pm2 jlist --namespace flyer-crawler-test > /tmp/pm2-state-initial-test.json
|
|
cat /tmp/pm2-state-initial-test.json | jq '.'
|
|
echo ""
|
|
echo "=== All Namespaces Combined ==="
|
|
echo "--- PM2 List (All) ---"
|
|
pm2 list
|
|
echo ""
|
|
echo "--- PM2 Daemon Info ---"
|
|
pm2 info pm2-logrotate || echo "pm2-logrotate not found"
|
|
echo ""
|
|
echo "--- PM2 Version ---"
|
|
pm2 --version
|
|
echo ""
|
|
echo "--- Node Version ---"
|
|
node --version
|
|
|
|
- name: PM2 Process Working Directories
|
|
run: |
|
|
echo "========================================="
|
|
echo "PROCESS WORKING DIRECTORIES"
|
|
echo "========================================="
|
|
echo ""
|
|
echo "=== Production Namespace (flyer-crawler-prod) ==="
|
|
pm2 jlist --namespace flyer-crawler-prod | jq -r '.[] | "Process: \(.name) | CWD: \(.pm2_env.pm_cwd) | Exists: \(if .pm2_env.pm_cwd then "checking..." else "N/A" end)"'
|
|
echo ""
|
|
echo "--- Checking if CWDs still exist ---"
|
|
pm2 jlist --namespace flyer-crawler-prod | jq -r '.[].pm2_env.pm_cwd' | while read cwd; do
|
|
if [ -n "$cwd" ] && [ "$cwd" != "null" ]; then
|
|
if [ -d "$cwd" ]; then
|
|
echo "✅ EXISTS: $cwd"
|
|
else
|
|
echo "❌ MISSING: $cwd (THIS WILL CAUSE CRASHES!)"
|
|
fi
|
|
fi
|
|
done
|
|
echo ""
|
|
echo "=== Test Namespace (flyer-crawler-test) ==="
|
|
pm2 jlist --namespace flyer-crawler-test | jq -r '.[] | "Process: \(.name) | CWD: \(.pm2_env.pm_cwd) | Exists: \(if .pm2_env.pm_cwd then "checking..." else "N/A" end)"'
|
|
echo ""
|
|
echo "--- Checking if CWDs still exist ---"
|
|
pm2 jlist --namespace flyer-crawler-test | jq -r '.[].pm2_env.pm_cwd' | while read cwd; do
|
|
if [ -n "$cwd" ] && [ "$cwd" != "null" ]; then
|
|
if [ -d "$cwd" ]; then
|
|
echo "✅ EXISTS: $cwd"
|
|
else
|
|
echo "❌ MISSING: $cwd (THIS WILL CAUSE CRASHES!)"
|
|
fi
|
|
fi
|
|
done
|
|
|
|
- name: PM2 Log Analysis
|
|
run: |
|
|
echo "========================================="
|
|
echo "PM2 LOG ANALYSIS"
|
|
echo "========================================="
|
|
echo ""
|
|
echo "--- PM2 Daemon Log (Last 100 Lines) ---"
|
|
tail -100 /home/gitea-runner/.pm2/pm2.log
|
|
echo ""
|
|
echo "--- Searching for ENOENT errors ---"
|
|
grep -i "ENOENT\|no such file or directory\|uv_cwd" /home/gitea-runner/.pm2/pm2.log || echo "No ENOENT errors found"
|
|
echo ""
|
|
echo "--- Searching for crash patterns ---"
|
|
grep -i "crash\|error\|exception" /home/gitea-runner/.pm2/pm2.log | tail -50 || echo "No crashes found"
|
|
|
|
- name: Identify All PM2-Managed Projects
|
|
run: |
|
|
echo "========================================="
|
|
echo "ALL PM2-MANAGED PROJECTS"
|
|
echo "========================================="
|
|
echo ""
|
|
echo "=== Production Namespace (flyer-crawler-prod) ==="
|
|
pm2 jlist --namespace flyer-crawler-prod | jq -r '.[] | "[\(.pm_id)] \(.name) - v\(.pm2_env.version // "N/A") - \(.pm2_env.status) - CWD: \(.pm2_env.pm_cwd)"'
|
|
echo ""
|
|
echo "--- Projects by CWD ---"
|
|
pm2 jlist --namespace flyer-crawler-prod | jq -r '.[].pm2_env.pm_cwd' | sort -u
|
|
echo ""
|
|
echo "=== Test Namespace (flyer-crawler-test) ==="
|
|
pm2 jlist --namespace flyer-crawler-test | jq -r '.[] | "[\(.pm_id)] \(.name) - v\(.pm2_env.version // "N/A") - \(.pm2_env.status) - CWD: \(.pm2_env.pm_cwd)"'
|
|
echo ""
|
|
echo "--- Projects by CWD ---"
|
|
pm2 jlist --namespace flyer-crawler-test | jq -r '.[].pm2_env.pm_cwd' | sort -u
|
|
echo ""
|
|
echo "=== All Namespaces (for reference) ==="
|
|
pm2 jlist | jq -r '.[] | "[\(.pm_id)] \(.name) [ns: \(.pm2_env.namespace // "default")] - \(.pm2_env.status)"'
|
|
echo ""
|
|
echo "--- Checking which projects might interfere ---"
|
|
for dir in /var/www/*; do
|
|
if [ -d "$dir" ]; then
|
|
echo ""
|
|
echo "Directory: $dir"
|
|
ls -la "$dir" | grep -E "ecosystem|package.json|node_modules" || echo " No PM2/Node files"
|
|
fi
|
|
done
|
|
|
|
- name: Monitor PM2 State Over Time
|
|
run: |
|
|
echo "========================================="
|
|
echo "PM2 STATE MONITORING"
|
|
echo "========================================="
|
|
echo "Monitoring PM2 for ${{ gitea.event.inputs.duration }} seconds..."
|
|
echo "Capturing state every ${{ gitea.event.inputs.capture_interval }} seconds"
|
|
echo ""
|
|
|
|
INTERVAL=${{ gitea.event.inputs.capture_interval }}
|
|
DURATION=${{ gitea.event.inputs.duration }}
|
|
COUNT=$((DURATION / INTERVAL))
|
|
|
|
for i in $(seq 1 $COUNT); do
|
|
echo "--- Capture $i at $(date) ---"
|
|
echo ""
|
|
echo "=== Production Namespace (flyer-crawler-prod) ==="
|
|
pm2 jlist --namespace flyer-crawler-prod | jq -r '.[] | "\(.name): \(.pm2_env.status) (restarts: \(.pm2_env.restart_time))"'
|
|
|
|
# Check for crashes in production
|
|
CRASHED_PROD=$(pm2 jlist --namespace flyer-crawler-prod | jq '[.[] | select(.pm2_env.status == "errored" or .pm2_env.status == "stopped")] | length')
|
|
if [ "$CRASHED_PROD" -gt 0 ]; then
|
|
echo "⚠️ WARNING: $CRASHED_PROD PRODUCTION process(es) in crashed state!"
|
|
pm2 jlist --namespace flyer-crawler-prod | jq -r '.[] | select(.pm2_env.status == "errored" or .pm2_env.status == "stopped") | " - \(.name): \(.pm2_env.status)"'
|
|
fi
|
|
|
|
echo ""
|
|
echo "=== Test Namespace (flyer-crawler-test) ==="
|
|
pm2 jlist --namespace flyer-crawler-test | jq -r '.[] | "\(.name): \(.pm2_env.status) (restarts: \(.pm2_env.restart_time))"'
|
|
|
|
# Check for crashes in test
|
|
CRASHED_TEST=$(pm2 jlist --namespace flyer-crawler-test | jq '[.[] | select(.pm2_env.status == "errored" or .pm2_env.status == "stopped")] | length')
|
|
if [ "$CRASHED_TEST" -gt 0 ]; then
|
|
echo "⚠️ WARNING: $CRASHED_TEST TEST process(es) in crashed state!"
|
|
pm2 jlist --namespace flyer-crawler-test | jq -r '.[] | select(.pm2_env.status == "errored" or .pm2_env.status == "stopped") | " - \(.name): \(.pm2_env.status)"'
|
|
fi
|
|
|
|
echo ""
|
|
sleep $INTERVAL
|
|
done
|
|
|
|
- name: PM2 Dump File Analysis
|
|
run: |
|
|
echo "========================================="
|
|
echo "PM2 DUMP FILE ANALYSIS"
|
|
echo "========================================="
|
|
echo "--- Dump file location ---"
|
|
ls -lh /home/gitea-runner/.pm2/dump.pm2
|
|
echo ""
|
|
echo "--- Dump file contents ---"
|
|
cat /home/gitea-runner/.pm2/dump.pm2 | jq '.'
|
|
echo ""
|
|
echo "--- Processes in dump ---"
|
|
cat /home/gitea-runner/.pm2/dump.pm2 | jq -r '.apps[] | "\(.name) at \(.pm_cwd)"'
|
|
|
|
- name: Check for Rogue Deployment Scripts
|
|
run: |
|
|
echo "========================================="
|
|
echo "DEPLOYMENT SCRIPT ANALYSIS"
|
|
echo "========================================="
|
|
echo "Checking for scripts that might delete directories..."
|
|
echo ""
|
|
for project in flyer-crawler stock-alert; do
|
|
for env in "" "-test"; do
|
|
DIR="/var/www/$project$env.projectium.com"
|
|
if [ -d "$DIR" ]; then
|
|
echo "--- Project: $project$env ---"
|
|
echo "Location: $DIR"
|
|
if [ -f "$DIR/.gitea/workflows/deploy-to-test.yml" ]; then
|
|
echo "Has deploy-to-test workflow"
|
|
grep -n "rsync.*--delete\|rm -rf" "$DIR/.gitea/workflows/deploy-to-test.yml" | head -5 || echo "No dangerous commands found"
|
|
fi
|
|
if [ -f "$DIR/.gitea/workflows/deploy-to-prod.yml" ]; then
|
|
echo "Has deploy-to-prod workflow"
|
|
grep -n "rsync.*--delete\|rm -rf" "$DIR/.gitea/workflows/deploy-to-prod.yml" | head -5 || echo "No dangerous commands found"
|
|
fi
|
|
echo ""
|
|
fi
|
|
done
|
|
done
|
|
|
|
- name: Generate Diagnostic Report
|
|
run: |
|
|
echo "========================================="
|
|
echo "DIAGNOSTIC SUMMARY"
|
|
echo "========================================="
|
|
echo ""
|
|
echo "=== Production Namespace (flyer-crawler-prod) ==="
|
|
echo "Total processes: $(pm2 jlist --namespace flyer-crawler-prod | jq 'length')"
|
|
echo "Online: $(pm2 jlist --namespace flyer-crawler-prod | jq '[.[] | select(.pm2_env.status == "online")] | length')"
|
|
echo "Stopped: $(pm2 jlist --namespace flyer-crawler-prod | jq '[.[] | select(.pm2_env.status == "stopped")] | length')"
|
|
echo "Errored: $(pm2 jlist --namespace flyer-crawler-prod | jq '[.[] | select(.pm2_env.status == "errored")] | length')"
|
|
echo ""
|
|
echo "Flyer-crawler PROD processes:"
|
|
pm2 jlist --namespace flyer-crawler-prod | jq -r '.[] | select(.name | contains("flyer-crawler")) | " \(.name): \(.pm2_env.status)"'
|
|
echo ""
|
|
echo "=== Test Namespace (flyer-crawler-test) ==="
|
|
echo "Total processes: $(pm2 jlist --namespace flyer-crawler-test | jq 'length')"
|
|
echo "Online: $(pm2 jlist --namespace flyer-crawler-test | jq '[.[] | select(.pm2_env.status == "online")] | length')"
|
|
echo "Stopped: $(pm2 jlist --namespace flyer-crawler-test | jq '[.[] | select(.pm2_env.status == "stopped")] | length')"
|
|
echo "Errored: $(pm2 jlist --namespace flyer-crawler-test | jq '[.[] | select(.pm2_env.status == "errored")] | length')"
|
|
echo ""
|
|
echo "Flyer-crawler TEST processes:"
|
|
pm2 jlist --namespace flyer-crawler-test | jq -r '.[] | select(.name | contains("flyer-crawler")) | " \(.name): \(.pm2_env.status)"'
|
|
echo ""
|
|
echo "=== All Namespaces Summary ==="
|
|
echo "Total PM2 processes (all): $(pm2 jlist | jq 'length')"
|
|
echo ""
|
|
echo "Stock-alert processes (separate project):"
|
|
pm2 jlist | jq -r '.[] | select(.name | contains("stock-alert")) | " \(.name): \(.pm2_env.status) [ns: \(.pm2_env.namespace // "default")]"'
|
|
echo ""
|
|
echo "Other processes:"
|
|
pm2 jlist | jq -r '.[] | select(.name | contains("flyer-crawler") | not) | select(.name | contains("stock-alert") | not) | " \(.name): \(.pm2_env.status) [ns: \(.pm2_env.namespace // "default")]"'
|
|
echo ""
|
|
echo "========================================="
|
|
echo "RECOMMENDATIONS"
|
|
echo "========================================="
|
|
echo "1. Check for missing CWDs (marked with ❌ above)"
|
|
echo "2. Review PM2 daemon log for ENOENT errors"
|
|
echo "3. Verify no deployments are running rsync --delete while processes are online"
|
|
echo "4. Use namespace-specific commands: pm2 list --namespace flyer-crawler-prod"
|
|
echo "5. Avoid pm2 restart all - use namespace targeting instead"
|