From 626aa80799cc8ba2bed56814afbafc1c81b5d112 Mon Sep 17 00:00:00 2001 From: Torben Sorensen Date: Wed, 18 Feb 2026 18:32:54 -0800 Subject: [PATCH] feat: add extensive PM2 logging to test deployment workflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add comprehensive before/after logging for all PM2 commands to diagnose test process startup issues. Each PM2 operation now includes: - Pre-command state (process list, JSON details, timestamps) - Command execution with captured exit codes - Post-command verification with expected vs actual comparison - Clear success/warning/error indicators (✅/⚠️/❌) Enhanced sections: 1. Stop Test Server Before Tests - delete commands with verification 2. LAYER 2: Stop PM2 Before File Operations - stop commands + port checks 3. PM2 Cleanup - errored/stopped process removal with safety checks 4. PM2 Start/Reload - critical section with stability checks 5. Final Verification - comprehensive status + logs + environment This addresses reported issues where PM2 test processes fail to start by providing detailed diagnostics at every deployment stage. Co-Authored-By: Claude Sonnet 4.5 --- .gitea/workflows/deploy-to-test.yml | 394 ++++++++++++++++++++++++---- 1 file changed, 345 insertions(+), 49 deletions(-) diff --git a/.gitea/workflows/deploy-to-test.yml b/.gitea/workflows/deploy-to-test.yml index 7e9daeec..112827c2 100644 --- a/.gitea/workflows/deploy-to-test.yml +++ b/.gitea/workflows/deploy-to-test.yml @@ -120,16 +120,36 @@ jobs: # integration test suite can launch its own, fresh server instance. # '|| true' ensures the workflow doesn't fail if the process isn't running. run: | - echo "--- Stopping and deleting all test processes ---" + echo "=========================================" + echo "STOPPING TEST PROCESSES BEFORE TEST RUN" + echo "=========================================" + echo "Timestamp: $(date '+%Y-%m-%d %H:%M:%S')" + echo "" # === PRE-CLEANUP PM2 STATE LOGGING === echo "=== PRE-CLEANUP PM2 STATE ===" - pm2 jlist || echo "No PM2 processes running" + pm2 list || echo "PM2 list failed" + echo "" + pm2 jlist | node -e " + try { + const list = JSON.parse(require('fs').readFileSync(0, 'utf-8')); + console.log('Total PM2 processes: ' + list.length); + const testProcs = list.filter(p => p.name && p.name.endsWith('-test')); + console.log('Test processes: ' + testProcs.length); + testProcs.forEach(p => { + console.log(' ' + p.name + ': status=' + p.pm2_env.status + ', pm_id=' + p.pm2_env.pm_id); + }); + } catch(e) { + console.log('No PM2 processes running or parse failed'); + } + " || echo "No PM2 processes running" echo "=== END PRE-CLEANUP STATE ===" + echo "" # Use a script to parse pm2's JSON output and delete any process whose name ends with '-test'. # This is safer than 'pm2 delete all' and more robust than naming each process individually. # It prevents the accumulation of duplicate processes from previous test runs. + echo "[PM2 CLEANUP] Starting deletion of test processes..." node -e " const exec = require('child_process').execSync; try { @@ -154,38 +174,57 @@ jobs: }); // Perform the cleanup - targetProcesses.forEach(p => { - console.log('Deleting test process: ' + p.name + ' (' + p.pm2_env.pm_id + ')'); - try { - exec('pm2 delete ' + p.pm2_env.pm_id); - } catch(e) { - console.error('Failed to delete ' + p.pm2_env.pm_id, e.message); - } - }); + if (targetProcesses.length > 0) { + targetProcesses.forEach(p => { + console.log('[PM2 COMMAND] Deleting test process: ' + p.name + ' (pm_id: ' + p.pm2_env.pm_id + ')'); + try { + const output = exec('pm2 delete ' + p.pm2_env.pm_id).toString(); + console.log('[PM2 COMMAND RESULT] Delete succeeded for: ' + p.name); + } catch(e) { + console.error('[PM2 COMMAND RESULT] Failed to delete ' + p.pm2_env.pm_id + ':', e.message); + } + }); + } else { + console.log('No test processes to delete'); + } - console.log('Test process cleanup complete.'); + console.log('[PM2 CLEANUP] Test process cleanup complete.'); } catch (e) { if (e.stdout && e.stdout.toString().includes('No process found')) { console.log('No PM2 processes running, cleanup not needed.'); } else { - console.error('Error cleaning up test processes:', e.message); + console.error('[PM2 CLEANUP] Error cleaning up test processes:', e.message); } } " || true + CLEANUP_PRE_TEST_EXIT=$? + echo "[PM2 CLEANUP RESULT] Cleanup script exit code: $CLEANUP_PRE_TEST_EXIT" + echo "" # Save PM2 process list after cleanup to persist deletions - echo "Saving PM2 process list after cleanup..." + echo "[PM2 COMMAND] About to execute: pm2 save" pm2 save || true + SAVE_EXIT=$? + echo "[PM2 COMMAND RESULT] pm2 save exit code: $SAVE_EXIT" + echo "" # === POST-CLEANUP VERIFICATION === echo "=== POST-CLEANUP VERIFICATION ===" + pm2 list || echo "PM2 list failed" + echo "" pm2 jlist 2>/dev/null | node -e " try { const list = JSON.parse(require('fs').readFileSync(0, 'utf-8')); const testProcesses = list.filter(p => p.name && p.name.endsWith('-test')); const prodProcesses = list.filter(p => p.name && p.name.startsWith('flyer-crawler-') && !p.name.endsWith('-test') && !p.name.endsWith('-dev')); console.log('Test processes after cleanup: ' + testProcesses.length); - testProcesses.forEach(p => console.log(' ' + p.name + ': ' + p.pm2_env.status)); + if (testProcesses.length > 0) { + testProcesses.forEach(p => console.log(' ' + p.name + ': ' + p.pm2_env.status)); + console.log('⚠️ WARNING: Expected 0 test processes, found ' + testProcesses.length); + } else { + console.log('✅ All test processes deleted'); + } + console.log(''); console.log('Production processes (should be untouched): ' + prodProcesses.length); prodProcesses.forEach(p => console.log(' ' + p.name + ': ' + p.pm2_env.status)); } catch (e) { @@ -193,6 +232,7 @@ jobs: } " || true echo "=== END POST-CLEANUP VERIFICATION ===" + echo "" - name: Flush Redis Test Database Before Tests # CRITICAL: Clear Redis database 1 (test database) to remove stale BullMQ jobs. @@ -554,14 +594,47 @@ jobs: # Prevents ENOENT/uv_cwd errors by stopping processes before rsync --delete echo "" echo "--- LAYER 2: Stopping test PM2 processes ---" - echo "Current PM2 state:" + echo "" + echo "=== PM2 STATE BEFORE STOP COMMAND ===" + echo "Timestamp: $(date '+%Y-%m-%d %H:%M:%S')" pm2 list || echo "PM2 list failed" + echo "" + echo "Detailed JSON state:" + pm2 jlist | grep -E '"name"|"pm_id"|"status"|"pid"' || echo "PM2 jlist failed" + echo "=== END PM2 STATE BEFORE STOP ===" + echo "" - echo "Stopping flyer-crawler test processes..." + echo "[PM2 COMMAND] About to execute: pm2 stop flyer-crawler-api-test flyer-crawler-worker-test flyer-crawler-analytics-worker-test" pm2 stop flyer-crawler-api-test flyer-crawler-worker-test flyer-crawler-analytics-worker-test 2>&1 || echo "No test processes to stop (exit code: $?)" + STOP_EXIT_CODE=$? + echo "[PM2 COMMAND RESULT] pm2 stop exit code: $STOP_EXIT_CODE" + echo "" - echo "PM2 state after stop:" + echo "=== PM2 STATE AFTER STOP COMMAND ===" + echo "Timestamp: $(date '+%Y-%m-%d %H:%M:%S')" pm2 list || echo "PM2 list failed" + echo "" + echo "Verifying test processes are stopped:" + pm2 jlist | node -e " + try { + const list = JSON.parse(require('fs').readFileSync(0, 'utf-8')); + const testProcs = list.filter(p => p.name && p.name.endsWith('-test')); + console.log('Test processes count: ' + testProcs.length); + testProcs.forEach(p => { + console.log(' ' + p.name + ' (pm_id: ' + p.pm2_env.pm_id + ', status: ' + p.pm2_env.status + ', pid: ' + (p.pid || 'N/A') + ')'); + }); + const runningTests = testProcs.filter(p => p.pm2_env.status === 'online'); + if (runningTests.length > 0) { + console.log('WARNING: ' + runningTests.length + ' test processes still running!'); + } else { + console.log('✅ All test processes stopped'); + } + } catch(e) { + console.log('Failed to parse PM2 output'); + } + " || echo "Verification script failed" + echo "=== END PM2 STATE AFTER STOP ===" + echo "" # ====================================================================== # LAYER 3: SAFE RSYNC WITH COMPREHENSIVE EXCLUDES @@ -687,13 +760,31 @@ jobs: cd /var/www/flyer-crawler-test.projectium.com npm install --omit=dev - # === PRE-CLEANUP PM2 STATE LOGGING === + # ====================================================================== + # PM2 CLEANUP OF ERRORED/STOPPED TEST PROCESSES + # ====================================================================== + echo "" echo "=== PRE-CLEANUP PM2 STATE ===" - pm2 jlist + echo "Timestamp: $(date '+%Y-%m-%d %H:%M:%S')" + echo "Full PM2 list:" + pm2 list + echo "" + echo "Detailed JSON state (test processes only):" + pm2 jlist | node -e " + try { + const list = JSON.parse(require('fs').readFileSync(0, 'utf-8')); + const testProcs = list.filter(p => p.name && p.name.endsWith('-test')); + console.log('Total test processes: ' + testProcs.length); + testProcs.forEach(p => { + console.log(' ' + p.name + ': status=' + p.pm2_env.status + ', pm_id=' + p.pm2_env.pm_id + ', pid=' + (p.pid || 'N/A')); + }); + } catch(e) { console.log('Failed to parse'); } + " || echo "Parse failed" echo "=== END PRE-CLEANUP STATE ===" + echo "" # --- Cleanup Errored Processes with Defense-in-Depth Safeguards --- - echo "Cleaning up errored or stopped TEST PM2 processes..." + echo "[PM2 CLEANUP] Starting cleanup of errored/stopped TEST processes..." node -e " const exec = require('child_process').execSync; try { @@ -721,24 +812,36 @@ jobs: }); // Perform the cleanup - targetProcesses.forEach(p => { - console.log('Deleting ' + p.pm2_env.status + ' test process: ' + p.name + ' (' + p.pm2_env.pm_id + ')'); - try { - exec('pm2 delete ' + p.pm2_env.pm_id); - } catch(e) { - console.error('Failed to delete ' + p.pm2_env.pm_id); - } - }); + if (targetProcesses.length > 0) { + targetProcesses.forEach(p => { + console.log('[PM2 COMMAND] About to delete: ' + p.name + ' (pm_id: ' + p.pm2_env.pm_id + ')'); + try { + const output = exec('pm2 delete ' + p.pm2_env.pm_id).toString(); + console.log('[PM2 COMMAND RESULT] Delete succeeded: ' + p.name); + console.log(output); + } catch(e) { + console.error('[PM2 COMMAND RESULT] Delete failed for ' + p.pm2_env.pm_id + ':', e.message); + } + }); + } else { + console.log('No errored/stopped test processes to clean'); + } - console.log('Test process cleanup complete.'); + console.log('[PM2 CLEANUP] Test process cleanup complete.'); } catch (e) { - console.error('Error cleaning up processes:', e); + console.error('[PM2 CLEANUP] Error during cleanup:', e.message); } " + CLEANUP_EXIT_CODE=$? + echo "[PM2 CLEANUP RESULT] Cleanup script exit code: $CLEANUP_EXIT_CODE" + echo "" # Save PM2 process list after cleanup to persist deletions - echo "Saving PM2 process list after cleanup..." + echo "[PM2 COMMAND] About to execute: pm2 save (after cleanup)" pm2 save + SAVE_CLEANUP_EXIT_CODE=$? + echo "[PM2 COMMAND RESULT] pm2 save exit code: $SAVE_CLEANUP_EXIT_CODE" + echo "" # === POST-CLEANUP VERIFICATION === echo "=== POST-CLEANUP VERIFICATION ===" @@ -759,13 +862,124 @@ jobs: } " echo "=== END POST-CLEANUP VERIFICATION ===" + echo "" - # Use `startOrReload` with the TEST ecosystem file. This starts test-specific processes - # (flyer-crawler-api-test, flyer-crawler-worker-test, flyer-crawler-analytics-worker-test) - # that run separately from production processes. - # We also add `&& pm2 save` to persist the process list across server reboots. - pm2 startOrReload ecosystem-test.config.cjs --update-env && pm2 save - echo "Test backend server reloaded successfully." + # ====================================================================== + # PM2 START/RELOAD - CRITICAL SECTION + # ====================================================================== + echo "=== PM2 STATE BEFORE START/RELOAD ===" + echo "Timestamp: $(date '+%Y-%m-%d %H:%M:%S')" + echo "Working directory: $(pwd)" + echo "Checking ecosystem-test.config.cjs exists:" + ls -lh ecosystem-test.config.cjs || echo "ERROR: ecosystem-test.config.cjs not found!" + echo "" + echo "Current PM2 processes:" + pm2 list + echo "" + echo "Checking port 3002 availability (test API port):" + netstat -tlnp | grep ':3002' || echo "Port 3002 is free" + echo "=== END PRE-START STATE ===" + echo "" + + echo "[PM2 COMMAND] About to execute: pm2 startOrReload ecosystem-test.config.cjs --update-env" + pm2 startOrReload ecosystem-test.config.cjs --update-env + STARTOR_RELOAD_EXIT_CODE=$? + echo "[PM2 COMMAND RESULT] pm2 startOrReload exit code: $STARTOR_RELOAD_EXIT_CODE" + echo "" + + if [ $STARTOR_RELOAD_EXIT_CODE -ne 0 ]; then + echo "ERROR: pm2 startOrReload failed with exit code $STARTOR_RELOAD_EXIT_CODE" + echo "Attempting to diagnose..." + pm2 list + pm2 logs --lines 50 --nostream || echo "Could not retrieve logs" + fi + + echo "=== PM2 STATE IMMEDIATELY AFTER START/RELOAD ===" + echo "Timestamp: $(date '+%Y-%m-%d %H:%M:%S')" + pm2 list + echo "" + echo "Detailed process status:" + pm2 jlist | node -e " + try { + const list = JSON.parse(require('fs').readFileSync(0, 'utf-8')); + const testProcs = list.filter(p => p.name && p.name.endsWith('-test')); + console.log('Test processes found: ' + testProcs.length); + console.log('Expected: 3 (flyer-crawler-api-test, flyer-crawler-worker-test, flyer-crawler-analytics-worker-test)'); + testProcs.forEach(p => { + console.log(''); + console.log('Process: ' + p.name); + console.log(' pm_id: ' + p.pm2_env.pm_id); + console.log(' status: ' + p.pm2_env.status); + console.log(' pid: ' + (p.pid || 'N/A')); + console.log(' restarts: ' + p.pm2_env.restart_time); + console.log(' uptime: ' + ((Date.now() - p.pm2_env.pm_uptime) / 1000).toFixed(1) + 's'); + console.log(' memory: ' + (p.monit ? (p.monit.memory / 1024 / 1024).toFixed(1) + 'MB' : 'N/A')); + }); + + const onlineTests = testProcs.filter(p => p.pm2_env.status === 'online'); + const erroredTests = testProcs.filter(p => p.pm2_env.status === 'errored'); + const stoppedTests = testProcs.filter(p => p.pm2_env.status === 'stopped'); + + console.log(''); + console.log('Summary:'); + console.log(' Online: ' + onlineTests.length); + console.log(' Errored: ' + erroredTests.length); + console.log(' Stopped: ' + stoppedTests.length); + + if (onlineTests.length === 3) { + console.log(''); + console.log('✅ All 3 test processes started successfully'); + } else { + console.log(''); + console.log('⚠️ WARNING: Expected 3 online processes, got ' + onlineTests.length); + } + } catch(e) { + console.log('Failed to parse PM2 output:', e.message); + } + " || echo "Verification script failed" + echo "=== END POST-START STATE ===" + echo "" + + echo "[PM2 COMMAND] About to execute: pm2 save" + pm2 save + SAVE_EXIT_CODE=$? + echo "[PM2 COMMAND RESULT] pm2 save exit code: $SAVE_EXIT_CODE" + echo "" + + if [ $SAVE_EXIT_CODE -eq 0 ]; then + echo "✅ PM2 process list saved successfully" + else + echo "⚠️ WARNING: pm2 save failed with exit code $SAVE_EXIT_CODE" + fi + + echo "" + echo "Waiting 5 seconds for processes to stabilize..." + sleep 5 + echo "" + + echo "=== PM2 STATE AFTER 5-SECOND STABILIZATION ===" + echo "Timestamp: $(date '+%Y-%m-%d %H:%M:%S')" + pm2 list + echo "" + pm2 jlist | node -e " + try { + const list = JSON.parse(require('fs').readFileSync(0, 'utf-8')); + const testProcs = list.filter(p => p.name && p.name.endsWith('-test')); + const crashedRecently = testProcs.filter(p => p.pm2_env.restart_time > 0 && p.pm2_env.status !== 'online'); + if (crashedRecently.length > 0) { + console.log('⚠️ WARNING: ' + crashedRecently.length + ' processes crashed after initial start'); + crashedRecently.forEach(p => console.log(' - ' + p.name + ': ' + p.pm2_env.status)); + } else { + console.log('✅ All processes stable after 5 seconds'); + } + } catch(e) { + console.log('Stability check failed'); + } + " || echo "Stability check script failed" + echo "=== END STABILIZATION CHECK ===" + echo "" + + echo "Test backend server reload complete." # After a successful deployment, update the schema hash in the database. # This ensures the next deployment will compare against this new state. @@ -795,21 +1009,103 @@ jobs: find "$PROD_APP_PATH/flyer-images/archive" -mindepth 1 -maxdepth 1 -type f -delete || echo "Prod archive directory is empty or not found, skipping." echo "✅ Test artifacts cleared from production asset directories." - - name: Show PM2 Environment for Test + - name: Show PM2 Environment for Test and Final Verification run: | - echo "--- Displaying recent PM2 logs for flyer-crawler-api-test ---" - # After a reload, the server restarts. We'll show the last 20 lines of the log to see the startup messages. - sleep 5 + echo "=========================================" + echo "FINAL PM2 VERIFICATION" + echo "=========================================" + echo "Timestamp: $(date '+%Y-%m-%d %H:%M:%S')" + echo "" + + echo "--- Full PM2 Process List ---" + pm2 list + echo "" + + echo "--- Test Processes Detailed Status ---" + pm2 jlist | node -e " + try { + const list = JSON.parse(require('fs').readFileSync(0, 'utf-8')); + const testProcs = list.filter(p => p.name && p.name.endsWith('-test')); + + console.log('Test processes count: ' + testProcs.length); + console.log('Expected: 3'); + console.log(''); + + const procNames = ['flyer-crawler-api-test', 'flyer-crawler-worker-test', 'flyer-crawler-analytics-worker-test']; + procNames.forEach(procName => { + const proc = list.find(p => p.name === procName); + if (proc) { + console.log('✅ ' + procName + ':'); + console.log(' pm_id: ' + proc.pm2_env.pm_id); + console.log(' status: ' + proc.pm2_env.status); + console.log(' pid: ' + (proc.pid || 'N/A')); + console.log(' uptime: ' + ((Date.now() - proc.pm2_env.pm_uptime) / 1000).toFixed(1) + 's'); + console.log(' restarts: ' + proc.pm2_env.restart_time); + console.log(' memory: ' + (proc.monit ? (proc.monit.memory / 1024 / 1024).toFixed(1) + 'MB' : 'N/A')); + } else { + console.log('❌ ' + procName + ': NOT FOUND'); + } + console.log(''); + }); + + const allOnline = testProcs.every(p => p.pm2_env.status === 'online'); + if (testProcs.length === 3 && allOnline) { + console.log('✅✅✅ DEPLOYMENT SUCCESS: All 3 test processes are online'); + } else { + console.log('⚠️⚠️⚠️ DEPLOYMENT WARNING: Not all test processes are online'); + } + } catch(e) { + console.log('Failed to parse PM2 output:', e.message); + } + " || echo "Status check script failed" + echo "" + + echo "--- Port Status Check ---" + echo "Checking if test API is listening on port 3002:" + netstat -tlnp | grep ':3002' || echo "⚠️ WARNING: Port 3002 not in use (expected for flyer-crawler-api-test)" + echo "" + + echo "--- Displaying PM2 Logs for Test API ---" + sleep 3 # Resolve the PM2 ID dynamically to ensure we target the correct process - PM2_ID=$(pm2 jlist | node -e "try { const list = JSON.parse(require('fs').readFileSync(0, 'utf-8')); const app = list.find(p => p.name === 'flyer-crawler-api-test'); console.log(app ? app.pm2_env.pm_id : ''); } catch(e) { console.log(''); }") + PM2_API_ID=$(pm2 jlist | node -e "try { const list = JSON.parse(require('fs').readFileSync(0, 'utf-8')); const app = list.find(p => p.name === 'flyer-crawler-api-test'); console.log(app ? app.pm2_env.pm_id : ''); } catch(e) { console.log(''); }") + PM2_WORKER_ID=$(pm2 jlist | node -e "try { const list = JSON.parse(require('fs').readFileSync(0, 'utf-8')); const app = list.find(p => p.name === 'flyer-crawler-worker-test'); console.log(app ? app.pm2_env.pm_id : ''); } catch(e) { console.log(''); }") + PM2_ANALYTICS_ID=$(pm2 jlist | node -e "try { const list = JSON.parse(require('fs').readFileSync(0, 'utf-8')); const app = list.find(p => p.name === 'flyer-crawler-analytics-worker-test'); console.log(app ? app.pm2_env.pm_id : ''); } catch(e) { console.log(''); }") - if [ -n "$PM2_ID" ]; then - echo "Found process ID: $PM2_ID" - pm2 describe "$PM2_ID" || echo "Failed to describe process $PM2_ID" - pm2 logs "$PM2_ID" --lines 20 --nostream || echo "Failed to get logs for $PM2_ID" - pm2 env "$PM2_ID" || echo "Failed to get env for $PM2_ID" + if [ -n "$PM2_API_ID" ]; then + echo "=== flyer-crawler-api-test (pm_id: $PM2_API_ID) ===" + echo "Process description:" + pm2 describe "$PM2_API_ID" || echo "Failed to describe process" + echo "" + echo "Recent logs (last 30 lines):" + pm2 logs "$PM2_API_ID" --lines 30 --nostream || echo "Failed to get logs" + echo "" + echo "Environment variables:" + pm2 env "$PM2_API_ID" | grep -E 'PORT|NODE_ENV|DB_|REDIS_' || echo "Failed to get env" + echo "" else - echo "Could not find process 'flyer-crawler-api-test' in pm2 list." - pm2 list # Fallback to listing everything to help debug + echo "⚠️ flyer-crawler-api-test NOT FOUND in PM2" fi + + if [ -n "$PM2_WORKER_ID" ]; then + echo "=== flyer-crawler-worker-test (pm_id: $PM2_WORKER_ID) ===" + echo "Recent logs (last 20 lines):" + pm2 logs "$PM2_WORKER_ID" --lines 20 --nostream || echo "Failed to get logs" + echo "" + else + echo "⚠️ flyer-crawler-worker-test NOT FOUND in PM2" + fi + + if [ -n "$PM2_ANALYTICS_ID" ]; then + echo "=== flyer-crawler-analytics-worker-test (pm_id: $PM2_ANALYTICS_ID) ===" + echo "Recent logs (last 20 lines):" + pm2 logs "$PM2_ANALYTICS_ID" --lines 20 --nostream || echo "Failed to get logs" + echo "" + else + echo "⚠️ flyer-crawler-analytics-worker-test NOT FOUND in PM2" + fi + + echo "=========================================" + echo "END FINAL VERIFICATION" + echo "========================================="