Files
flyer-crawler.projectium.com/.gitea/workflows/pm2-diagnostics.yml
Torben Sorensen cd8ee92813
Some checks failed
Deploy to Test Environment / deploy-to-test (push) Has been cancelled
debug: add PM2 crash debugging tools
2026-02-18 09:43:40 -08:00

189 lines
8.0 KiB
YAML

# .gitea/workflows/pm2-diagnostics.yml
#
# Comprehensive PM2 diagnostics to identify crash causes and problematic projects
name: PM2 Diagnostics
on:
workflow_dispatch:
inputs:
capture_interval:
description: 'Seconds between PM2 state captures (default: 5)'
required: false
default: '5'
duration:
description: 'Total monitoring duration in seconds (default: 60)'
required: false
default: '60'
jobs:
pm2-diagnostics:
runs-on: projectium.com
steps:
- name: PM2 Current State Snapshot
run: |
echo "========================================="
echo "PM2 CURRENT STATE SNAPSHOT"
echo "========================================="
echo ""
echo "--- PM2 List (Human Readable) ---"
pm2 list
echo ""
echo "--- PM2 List (JSON) ---"
pm2 jlist > /tmp/pm2-state-initial.json
cat /tmp/pm2-state-initial.json | jq '.'
echo ""
echo "--- PM2 Daemon Info ---"
pm2 info pm2-logrotate || echo "pm2-logrotate not found"
echo ""
echo "--- PM2 Version ---"
pm2 --version
echo ""
echo "--- Node Version ---"
node --version
- name: PM2 Process Working Directories
run: |
echo "========================================="
echo "PROCESS WORKING DIRECTORIES"
echo "========================================="
pm2 jlist | jq -r '.[] | "Process: \(.name) | CWD: \(.pm2_env.pm_cwd) | Exists: \(if .pm2_env.pm_cwd then "checking..." else "N/A" end)"'
echo ""
echo "--- Checking if CWDs still exist ---"
pm2 jlist | jq -r '.[].pm2_env.pm_cwd' | while read cwd; do
if [ -d "$cwd" ]; then
echo "✅ EXISTS: $cwd"
else
echo "❌ MISSING: $cwd (THIS WILL CAUSE CRASHES!)"
fi
done
- name: PM2 Log Analysis
run: |
echo "========================================="
echo "PM2 LOG ANALYSIS"
echo "========================================="
echo ""
echo "--- PM2 Daemon Log (Last 100 Lines) ---"
tail -100 /home/gitea-runner/.pm2/pm2.log
echo ""
echo "--- Searching for ENOENT errors ---"
grep -i "ENOENT\|no such file or directory\|uv_cwd" /home/gitea-runner/.pm2/pm2.log || echo "No ENOENT errors found"
echo ""
echo "--- Searching for crash patterns ---"
grep -i "crash\|error\|exception" /home/gitea-runner/.pm2/pm2.log | tail -50 || echo "No crashes found"
- name: Identify All PM2-Managed Projects
run: |
echo "========================================="
echo "ALL PM2-MANAGED PROJECTS"
echo "========================================="
pm2 jlist | jq -r '.[] | "[\(.pm_id)] \(.name) - v\(.pm2_env.version // "N/A") - \(.pm2_env.status) - CWD: \(.pm2_env.pm_cwd)"'
echo ""
echo "--- Projects by CWD ---"
pm2 jlist | jq -r '.[].pm2_env.pm_cwd' | sort -u
echo ""
echo "--- Checking which projects might interfere ---"
for dir in /var/www/*; do
if [ -d "$dir" ]; then
echo ""
echo "Directory: $dir"
ls -la "$dir" | grep -E "ecosystem|package.json|node_modules" || echo " No PM2/Node files"
fi
done
- name: Monitor PM2 State Over Time
run: |
echo "========================================="
echo "PM2 STATE MONITORING"
echo "========================================="
echo "Monitoring PM2 for ${{ gitea.event.inputs.duration }} seconds..."
echo "Capturing state every ${{ gitea.event.inputs.capture_interval }} seconds"
echo ""
INTERVAL=${{ gitea.event.inputs.capture_interval }}
DURATION=${{ gitea.event.inputs.duration }}
COUNT=$((DURATION / INTERVAL))
for i in $(seq 1 $COUNT); do
echo "--- Capture $i at $(date) ---"
pm2 jlist | jq -r '.[] | "\(.name): \(.pm2_env.status) (restarts: \(.pm2_env.restart_time))"'
# Check for new crashes
CRASHED=$(pm2 jlist | jq '[.[] | select(.pm2_env.status == "errored" or .pm2_env.status == "stopped")] | length')
if [ "$CRASHED" -gt 0 ]; then
echo "⚠️ WARNING: $CRASHED process(es) in crashed state!"
pm2 jlist | jq -r '.[] | select(.pm2_env.status == "errored" or .pm2_env.status == "stopped") | " - \(.name): \(.pm2_env.status)"'
fi
sleep $INTERVAL
done
- name: PM2 Dump File Analysis
run: |
echo "========================================="
echo "PM2 DUMP FILE ANALYSIS"
echo "========================================="
echo "--- Dump file location ---"
ls -lh /home/gitea-runner/.pm2/dump.pm2
echo ""
echo "--- Dump file contents ---"
cat /home/gitea-runner/.pm2/dump.pm2 | jq '.'
echo ""
echo "--- Processes in dump ---"
cat /home/gitea-runner/.pm2/dump.pm2 | jq -r '.apps[] | "\(.name) at \(.pm_cwd)"'
- name: Check for Rogue Deployment Scripts
run: |
echo "========================================="
echo "DEPLOYMENT SCRIPT ANALYSIS"
echo "========================================="
echo "Checking for scripts that might delete directories..."
echo ""
for project in flyer-crawler stock-alert; do
for env in "" "-test"; do
DIR="/var/www/$project$env.projectium.com"
if [ -d "$DIR" ]; then
echo "--- Project: $project$env ---"
echo "Location: $DIR"
if [ -f "$DIR/.gitea/workflows/deploy-to-test.yml" ]; then
echo "Has deploy-to-test workflow"
grep -n "rsync.*--delete\|rm -rf" "$DIR/.gitea/workflows/deploy-to-test.yml" | head -5 || echo "No dangerous commands found"
fi
if [ -f "$DIR/.gitea/workflows/deploy-to-prod.yml" ]; then
echo "Has deploy-to-prod workflow"
grep -n "rsync.*--delete\|rm -rf" "$DIR/.gitea/workflows/deploy-to-prod.yml" | head -5 || echo "No dangerous commands found"
fi
echo ""
fi
done
done
- name: Generate Diagnostic Report
run: |
echo "========================================="
echo "DIAGNOSTIC SUMMARY"
echo "========================================="
echo ""
echo "Total PM2 processes: $(pm2 jlist | jq 'length')"
echo "Online: $(pm2 jlist | jq '[.[] | select(.pm2_env.status == "online")] | length')"
echo "Stopped: $(pm2 jlist | jq '[.[] | select(.pm2_env.status == "stopped")] | length')"
echo "Errored: $(pm2 jlist | jq '[.[] | select(.pm2_env.status == "errored")] | length')"
echo ""
echo "Flyer-crawler processes:"
pm2 jlist | jq -r '.[] | select(.name | contains("flyer-crawler")) | " \(.name): \(.pm2_env.status)"'
echo ""
echo "Stock-alert processes:"
pm2 jlist | jq -r '.[] | select(.name | contains("stock-alert")) | " \(.name): \(.pm2_env.status)"'
echo ""
echo "Other processes:"
pm2 jlist | jq -r '.[] | select(.name | contains("flyer-crawler") | not) | select(.name | contains("stock-alert") | not) | " \(.name): \(.pm2_env.status)"'
echo ""
echo "========================================="
echo "RECOMMENDATIONS"
echo "========================================="
echo "1. Check for missing CWDs (marked with ❌ above)"
echo "2. Review PM2 daemon log for ENOENT errors"
echo "3. Verify no deployments are running rsync --delete while processes are online"
echo "4. Consider separating PM2 daemons by user or using PM2 namespaces"