Some checks failed
Deploy to Test Environment / deploy-to-test (push) Has been cancelled
189 lines
8.0 KiB
YAML
189 lines
8.0 KiB
YAML
# .gitea/workflows/pm2-diagnostics.yml
|
|
#
|
|
# Comprehensive PM2 diagnostics to identify crash causes and problematic projects
|
|
name: PM2 Diagnostics
|
|
|
|
on:
|
|
workflow_dispatch:
|
|
inputs:
|
|
capture_interval:
|
|
description: 'Seconds between PM2 state captures (default: 5)'
|
|
required: false
|
|
default: '5'
|
|
duration:
|
|
description: 'Total monitoring duration in seconds (default: 60)'
|
|
required: false
|
|
default: '60'
|
|
|
|
jobs:
|
|
pm2-diagnostics:
|
|
runs-on: projectium.com
|
|
|
|
steps:
|
|
- name: PM2 Current State Snapshot
|
|
run: |
|
|
echo "========================================="
|
|
echo "PM2 CURRENT STATE SNAPSHOT"
|
|
echo "========================================="
|
|
echo ""
|
|
echo "--- PM2 List (Human Readable) ---"
|
|
pm2 list
|
|
echo ""
|
|
echo "--- PM2 List (JSON) ---"
|
|
pm2 jlist > /tmp/pm2-state-initial.json
|
|
cat /tmp/pm2-state-initial.json | jq '.'
|
|
echo ""
|
|
echo "--- PM2 Daemon Info ---"
|
|
pm2 info pm2-logrotate || echo "pm2-logrotate not found"
|
|
echo ""
|
|
echo "--- PM2 Version ---"
|
|
pm2 --version
|
|
echo ""
|
|
echo "--- Node Version ---"
|
|
node --version
|
|
|
|
- name: PM2 Process Working Directories
|
|
run: |
|
|
echo "========================================="
|
|
echo "PROCESS WORKING DIRECTORIES"
|
|
echo "========================================="
|
|
pm2 jlist | jq -r '.[] | "Process: \(.name) | CWD: \(.pm2_env.pm_cwd) | Exists: \(if .pm2_env.pm_cwd then "checking..." else "N/A" end)"'
|
|
echo ""
|
|
echo "--- Checking if CWDs still exist ---"
|
|
pm2 jlist | jq -r '.[].pm2_env.pm_cwd' | while read cwd; do
|
|
if [ -d "$cwd" ]; then
|
|
echo "✅ EXISTS: $cwd"
|
|
else
|
|
echo "❌ MISSING: $cwd (THIS WILL CAUSE CRASHES!)"
|
|
fi
|
|
done
|
|
|
|
- name: PM2 Log Analysis
|
|
run: |
|
|
echo "========================================="
|
|
echo "PM2 LOG ANALYSIS"
|
|
echo "========================================="
|
|
echo ""
|
|
echo "--- PM2 Daemon Log (Last 100 Lines) ---"
|
|
tail -100 /home/gitea-runner/.pm2/pm2.log
|
|
echo ""
|
|
echo "--- Searching for ENOENT errors ---"
|
|
grep -i "ENOENT\|no such file or directory\|uv_cwd" /home/gitea-runner/.pm2/pm2.log || echo "No ENOENT errors found"
|
|
echo ""
|
|
echo "--- Searching for crash patterns ---"
|
|
grep -i "crash\|error\|exception" /home/gitea-runner/.pm2/pm2.log | tail -50 || echo "No crashes found"
|
|
|
|
- name: Identify All PM2-Managed Projects
|
|
run: |
|
|
echo "========================================="
|
|
echo "ALL PM2-MANAGED PROJECTS"
|
|
echo "========================================="
|
|
pm2 jlist | jq -r '.[] | "[\(.pm_id)] \(.name) - v\(.pm2_env.version // "N/A") - \(.pm2_env.status) - CWD: \(.pm2_env.pm_cwd)"'
|
|
echo ""
|
|
echo "--- Projects by CWD ---"
|
|
pm2 jlist | jq -r '.[].pm2_env.pm_cwd' | sort -u
|
|
echo ""
|
|
echo "--- Checking which projects might interfere ---"
|
|
for dir in /var/www/*; do
|
|
if [ -d "$dir" ]; then
|
|
echo ""
|
|
echo "Directory: $dir"
|
|
ls -la "$dir" | grep -E "ecosystem|package.json|node_modules" || echo " No PM2/Node files"
|
|
fi
|
|
done
|
|
|
|
- name: Monitor PM2 State Over Time
|
|
run: |
|
|
echo "========================================="
|
|
echo "PM2 STATE MONITORING"
|
|
echo "========================================="
|
|
echo "Monitoring PM2 for ${{ gitea.event.inputs.duration }} seconds..."
|
|
echo "Capturing state every ${{ gitea.event.inputs.capture_interval }} seconds"
|
|
echo ""
|
|
|
|
INTERVAL=${{ gitea.event.inputs.capture_interval }}
|
|
DURATION=${{ gitea.event.inputs.duration }}
|
|
COUNT=$((DURATION / INTERVAL))
|
|
|
|
for i in $(seq 1 $COUNT); do
|
|
echo "--- Capture $i at $(date) ---"
|
|
pm2 jlist | jq -r '.[] | "\(.name): \(.pm2_env.status) (restarts: \(.pm2_env.restart_time))"'
|
|
|
|
# Check for new crashes
|
|
CRASHED=$(pm2 jlist | jq '[.[] | select(.pm2_env.status == "errored" or .pm2_env.status == "stopped")] | length')
|
|
if [ "$CRASHED" -gt 0 ]; then
|
|
echo "⚠️ WARNING: $CRASHED process(es) in crashed state!"
|
|
pm2 jlist | jq -r '.[] | select(.pm2_env.status == "errored" or .pm2_env.status == "stopped") | " - \(.name): \(.pm2_env.status)"'
|
|
fi
|
|
|
|
sleep $INTERVAL
|
|
done
|
|
|
|
- name: PM2 Dump File Analysis
|
|
run: |
|
|
echo "========================================="
|
|
echo "PM2 DUMP FILE ANALYSIS"
|
|
echo "========================================="
|
|
echo "--- Dump file location ---"
|
|
ls -lh /home/gitea-runner/.pm2/dump.pm2
|
|
echo ""
|
|
echo "--- Dump file contents ---"
|
|
cat /home/gitea-runner/.pm2/dump.pm2 | jq '.'
|
|
echo ""
|
|
echo "--- Processes in dump ---"
|
|
cat /home/gitea-runner/.pm2/dump.pm2 | jq -r '.apps[] | "\(.name) at \(.pm_cwd)"'
|
|
|
|
- name: Check for Rogue Deployment Scripts
|
|
run: |
|
|
echo "========================================="
|
|
echo "DEPLOYMENT SCRIPT ANALYSIS"
|
|
echo "========================================="
|
|
echo "Checking for scripts that might delete directories..."
|
|
echo ""
|
|
for project in flyer-crawler stock-alert; do
|
|
for env in "" "-test"; do
|
|
DIR="/var/www/$project$env.projectium.com"
|
|
if [ -d "$DIR" ]; then
|
|
echo "--- Project: $project$env ---"
|
|
echo "Location: $DIR"
|
|
if [ -f "$DIR/.gitea/workflows/deploy-to-test.yml" ]; then
|
|
echo "Has deploy-to-test workflow"
|
|
grep -n "rsync.*--delete\|rm -rf" "$DIR/.gitea/workflows/deploy-to-test.yml" | head -5 || echo "No dangerous commands found"
|
|
fi
|
|
if [ -f "$DIR/.gitea/workflows/deploy-to-prod.yml" ]; then
|
|
echo "Has deploy-to-prod workflow"
|
|
grep -n "rsync.*--delete\|rm -rf" "$DIR/.gitea/workflows/deploy-to-prod.yml" | head -5 || echo "No dangerous commands found"
|
|
fi
|
|
echo ""
|
|
fi
|
|
done
|
|
done
|
|
|
|
- name: Generate Diagnostic Report
|
|
run: |
|
|
echo "========================================="
|
|
echo "DIAGNOSTIC SUMMARY"
|
|
echo "========================================="
|
|
echo ""
|
|
echo "Total PM2 processes: $(pm2 jlist | jq 'length')"
|
|
echo "Online: $(pm2 jlist | jq '[.[] | select(.pm2_env.status == "online")] | length')"
|
|
echo "Stopped: $(pm2 jlist | jq '[.[] | select(.pm2_env.status == "stopped")] | length')"
|
|
echo "Errored: $(pm2 jlist | jq '[.[] | select(.pm2_env.status == "errored")] | length')"
|
|
echo ""
|
|
echo "Flyer-crawler processes:"
|
|
pm2 jlist | jq -r '.[] | select(.name | contains("flyer-crawler")) | " \(.name): \(.pm2_env.status)"'
|
|
echo ""
|
|
echo "Stock-alert processes:"
|
|
pm2 jlist | jq -r '.[] | select(.name | contains("stock-alert")) | " \(.name): \(.pm2_env.status)"'
|
|
echo ""
|
|
echo "Other processes:"
|
|
pm2 jlist | jq -r '.[] | select(.name | contains("flyer-crawler") | not) | select(.name | contains("stock-alert") | not) | " \(.name): \(.pm2_env.status)"'
|
|
echo ""
|
|
echo "========================================="
|
|
echo "RECOMMENDATIONS"
|
|
echo "========================================="
|
|
echo "1. Check for missing CWDs (marked with ❌ above)"
|
|
echo "2. Review PM2 daemon log for ENOENT errors"
|
|
echo "3. Verify no deployments are running rsync --delete while processes are online"
|
|
echo "4. Consider separating PM2 daemons by user or using PM2 namespaces"
|