flyer-crawler.projectium.com/.gitea/workflows/deploy.yml

# .gitea/workflows/deploy.yml
#
# deploy to  production which is an ubuntu co-lo server with nginx + postgres
#
# note to AI - the order in this file matters - also, minor changes to this file can have big impacts and is easy to break
name: Deploy to Web Server flyer-crawler.projectium.com

on:
  push:
    branches:
      - main # This pipeline runs only on a push to the 'main' branch.

jobs:
  deploy:
    runs-on: projectium.com # This job runs on your self-hosted Gitea runner.

    # Environment variables are used to pass secrets and configuration to the steps below.
    # These must be configured as secrets in your Gitea repository settings.
    env:
      # Public keys needed for the React build process.
      # (Empty in original, assuming secrets are injected below)

    steps:
      - name: Checkout Code
        uses: actions/checkout@v3

      # Add this NEW STEP FOR DEBUGGING
      - name: Show Git REF
        run: |
          echo "Gitea ref: ${{ gitea.ref }}"
          echo "Gitea ref_name: ${{ gitea.ref_name }}" # often more useful (e.g., 'main' or 'my-feature-branch')
          echo "Gitea ref_type: ${{ gitea.ref_type }}" # 'branch' or 'tag'
          echo "Gitea SHA: ${{ gitea.sha }}"
          echo "Triggering actor: ${{ gitea.actor }}"
          echo "Repository: ${{ gitea.repository }}"

      - name: Setup Node.js
        uses: actions/setup-node@v3
        with:
          node-version: '20'
          cache: 'npm' # Re-enable the cache. If this fails, we will remove it again.
          cache-dependency-path: '**/package-lock.json'

      # The setup-node action with caching handles installation correctly.
      # If dependencies are not found in cache, it will run 'npm ci' automatically.
      # If they are found, it restores them. This is the standard, reliable way.
      - name: Install Dependencies
        run: npm ci # 'ci' is faster and safer for CI/CD than 'install'.

# -----------------------------------------------------------------------

      # --- NEW DEBUGGING STEPS ---
      - name: Verify Project Structure
        run: |
          echo "--- Current Working Directory ---"
          pwd
          echo "--- Listing Root Directory ---"
          ls -alF
          echo "--- Listing SRC Directory ---"
          ls -alF src

      - name: Lint TypeScript Code
        run: npm run lint # Run the linter to check for code quality issues.
        continue-on-error: true # Allows the workflow to proceed even if linting fails.

      - name: Stop Production Server Before Tests
        # This is a critical step to ensure a clean test environment.
        # It stops the currently running pm2 process, freeing up port 3001 so that the
        # integration test suite can launch its own, fresh server instance.
        # '|| true' ensures the workflow doesn't fail if the process isn't running.
        run: |
          pm2 stop flyer-crawler-api || true
          pm2 stop flyer-crawler-worker || true
          pm2 delete flyer-crawler-api || true
          pm2 delete flyer-crawler-worker || true

      - name: Run All Tests and Generate Merged Coverage Report
        # This single step runs both unit and integration tests, then merges their
        # coverage data into a single report. It combines the environment variables
        # needed for both test suites.
        env:
          # --- Database credentials for the test suite ---
          # These are injected from Gitea secrets into the runner's environment.
          DB_HOST: ${{ secrets.DB_HOST }}
          DB_PORT: ${{ secrets.DB_PORT }}
          DB_USER: ${{ secrets.DB_USER }}
          DB_PASSWORD: ${{ secrets.DB_PASSWORD }}
          DB_DATABASE: "flyer-crawler-test"
          DB_NAME: "flyer-crawler-test"

          # --- Redis credentials for the test suite ---
          REDIS_URL: "redis://localhost:6379"
          REDIS_PASSWORD: ${{ secrets.REDIS_PASSWORD_TEST }}

          # --- Integration test specific variables ---
          FRONTEND_URL: "http://localhost:3000"
          VITE_API_BASE_URL: "http://localhost:3001/api"
          GEMINI_API_KEY: ${{ secrets.VITE_GOOGLE_GENAI_API_KEY }}

          # --- Increase Node.js memory limit to prevent heap out of memory errors ---
          # This is crucial for memory-intensive tasks like running tests and coverage.
          NODE_OPTIONS: "--max-old-space-size=8192"

        run: |
          # Fail-fast check to ensure secrets are configured in Gitea for testing.
          if [ -z "$DB_HOST" ] || [ -z "$DB_USER" ] || [ -z "$DB_PASSWORD" ] || [ -z "$DB_DATABASE" ] || [ -z "$GEMINI_API_KEY" ] || [ -z "$REDIS_PASSWORD" ]; then
            echo "ERROR: One or more test secrets (DB_*, GEMINI_API_KEY, REDIS_PASSWORD_TEST) are not set in Gitea repository secrets."
            exit 1
          fi

          # Run unit and integration tests as separate steps.
          # The `|| true` ensures that the workflow continues even if one of the test suites fails.
          # This allows the coverage reports to be generated and merged regardless of test success.
          echo "--- Running Unit Tests ---"
          npm run test:unit -- --coverage --reporter=verbose --includeTaskLocation --testTimeout=20000 || true

          echo "--- Running Integration Tests ---"
          npm run test:integration -- --coverage --reporter=verbose --includeTaskLocation --testTimeout=20000 || true

        continue-on-error: true # Allows the workflow to proceed even if tests fail.

      - name: Merge Coverage and Display Summary
        if: always() # This step runs even if the previous test step failed.
        run: |
          echo "--- Merging Coverage Reports and Displaying Text Summary ---"

          # Add logging to verify that the source coverage files exist before merging.
          echo "Checking for source coverage files..."
          ls -l .coverage/unit/coverage-final.json
          ls -l .coverage/integration/coverage-final.json

          # --- V8 Coverage Processing for Backend Server ---
          # The integration tests start the server, which generates raw V8 coverage data.
          # This step uses the `c8` tool to convert that raw data into a standard
          # Istanbul coverage report (`coverage-final.json`) that can be merged.
          echo "Processing V8 coverage data from the integration test server..."
          # Create a dedicated output directory for the server's coverage report.
          mkdir -p .coverage/integration-server || echo "Directory .coverage/integration-server already exists."
          mkdir -p .coverage/tmp/integration-server || echo "Directory .coverage/tmp/integration-server already exists."
          # Run c8: read raw files from the temp dir, and output an Istanbul JSON report.
          # We only generate the 'json' report here because it's all nyc needs for merging.
          echo "Server coverage report about to be generated..."
          npx c8 report --reporter=json --temp-directory .coverage/tmp/integration-server --reports-dir .coverage/integration-server
          echo "Server coverage report generated. Verifying existence:"
          ls -l .coverage/integration-server/coverage-final.json

          # Now we have three coverage reports:
          # nyc's `report` command can merge multiple coverage files automatically.
          # The standard way to do this is to place all `coverage-final.json` files
          # into a single directory and point `nyc report` to it.

          # Step 1: Define a directory for nyc to use as its source for merging.
          # We use a path relative to the workspace to avoid issues with the runner's CWD.
          NYC_SOURCE_DIR=".coverage/nyc-source-for-report"
          mkdir -p "$NYC_SOURCE_DIR" || echo "Directory $NYC_SOURCE_DIR already exists."
          echo "Created temporary directory for nyc reporting source: $NYC_SOURCE_DIR"

          # Step 2: Copy the individual coverage reports into the source directory.
          # We give them unique names to be safe, though it's not strictly necessary.
          cp .coverage/unit/coverage-final.json "$NYC_SOURCE_DIR/unit-coverage.json"
          cp .coverage/integration/coverage-final.json "$NYC_SOURCE_DIR/integration-coverage.json"
          # This file might not exist if integration tests fail early, so we add `|| true`
          cp .coverage/integration-server/coverage-final.json "$NYC_SOURCE_DIR/integration-server-coverage.json" || echo "Server coverage file not found, skipping."
          echo "Copied coverage files to source directory. Contents:"
          ls -l "$NYC_SOURCE_DIR"

          # Step 3: Generate the reports directly from the source directory.
          # We explicitly tell nyc where to find the source coverage files (`--temp-dir`)
          # and where to output the final reports (`--report-dir`).
          # This avoids the ENOENT error by preventing `nyc` from looking in a default
          # cache location (`.nyc_output`) which was causing the failure.
          echo "Generating reports from coverage data..."
          npx nyc report \
            --reporter=text \
            --reporter=html \
            --report-dir .coverage/ \
            --temp-dir "$NYC_SOURCE_DIR"

          echo "✅ Coverage reports generated successfully."

        continue-on-error: true # Allows the workflow to proceed even if coverage merge fails.

      - name: Archive Code Coverage Report
        # This action saves the generated HTML coverage report as a downloadable artifact.
        uses: actions/upload-artifact@v3
        with:
          name: code-coverage-report
          path: .coverage/

        continue-on-error: true # Allows the workflow to proceed even if tests fail.

      - name: Check for Database Schema Changes
        env:
          # Use production database credentials for this check.
          DB_HOST: ${{ secrets.DB_HOST }}
          DB_PORT: ${{ secrets.DB_PORT }}
          DB_USER: ${{ secrets.DB_USER }}
          DB_PASSWORD: ${{ secrets.DB_PASSWORD }}
          DB_DATABASE: ${{ secrets.DB_DATABASE_PROD }} # Assumes a secret for the production DB name.
        run: |
          # Fail-fast check to ensure secrets are configured in Gitea.
          if [ -z "$DB_HOST" ] || [ -z "$DB_USER" ] || [ -z "$DB_PASSWORD" ] || [ -z "$DB_DATABASE" ]; then
            echo "ERROR: One or more production database secrets (DB_HOST, DB_USER, DB_PASSWORD, DB_DATABASE_PROD) are not set in Gitea repository settings."
            exit 1
          fi

          echo "--- Checking for schema changes ---"
          # Calculate the hash of the current schema file in the repository.
          # We normalize line endings to ensure the hash is consistent across different OS environments.
          CURRENT_HASH=$(cat sql/master_schema_rollup.sql | dos2unix | sha256sum | awk '{ print $1 }')
          echo "Current Git Schema Hash: $CURRENT_HASH"

          # Query the production database to get the hash of the deployed schema.
          # The `psql` command requires PGPASSWORD to be set.
          # `\t` sets tuples-only mode and `\A` unaligns output to get just the raw value.
          # The `|| echo "none"` ensures the command doesn't fail if the table or row doesn't exist yet.
          DEPLOYED_HASH=$(PGPASSWORD="$DB_PASSWORD" psql -v ON_ERROR_STOP=1 -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_DATABASE" -c "SELECT schema_hash FROM public.schema_info WHERE id = 1;" -t -A || echo "none")
          echo "Deployed DB Schema Hash: $DEPLOYED_HASH"

          # Check if the hash is "none" (command failed) OR if it's an empty string (table exists but is empty).
          if [ "$DEPLOYED_HASH" = "none" ] || [ -z "$DEPLOYED_HASH" ]; then
            echo "WARNING: No schema hash found in the production database."
            echo "This is expected for a first-time deployment. The hash will be set after a successful deployment."
            # We allow the deployment to continue, but a manual schema update is required.
            # You could choose to fail here by adding `exit 1`.
          elif [ "$CURRENT_HASH" != "$DEPLOYED_HASH" ]; then
            echo "ERROR: Database schema mismatch detected!"
            echo "The schema file in the repository has changed. A manual database migration is required."
            exit 1 # Fail the deployment pipeline.
          else
            echo "✅ Schema is up to date. No changes detected."
          fi

      # --- Frontend Deployment ---
      - name: Build React Application
        # We set the environment variable directly in the command line for this step.
        # This maps the Gitea secret to the environment variable the application expects.
        # We also generate and inject the application version, commit URL, and commit message.
        run: |
          # Fail-fast check for the build-time secret.
          if [ -z "${{ secrets.VITE_GOOGLE_GENAI_API_KEY }}" ]; then
            echo "ERROR: The VITE_GOOGLE_GENAI_API_KEY secret is not set."
            exit 1
          fi

          GITEA_SERVER_URL="https://gitea.projectium.com" # Your Gitea instance URL
          COMMIT_MESSAGE=$(git log -1 --pretty=%s)
          VITE_APP_VERSION="$(date +'%Y%m%d-%H%M'):$(git rev-parse --short HEAD)" \
          VITE_APP_COMMIT_URL="$GITEA_SERVER_URL/${{ gitea.repository }}/commit/${{ gitea.sha }}" \
          VITE_APP_COMMIT_MESSAGE="$COMMIT_MESSAGE" \
          VITE_API_BASE_URL=/api VITE_API_KEY=${{ secrets.VITE_GOOGLE_GENAI_API_KEY }} npm run build

      - name: Deploy Application to Server
        run: |
          echo "Deploying application files to /var/www/flyer-crawler.projectium.com..."
          APP_PATH="/var/www/flyer-crawler.projectium.com"

          # Ensure the destination directory exists
          mkdir -p "$APP_PATH"
          mkdir -p "$APP_PATH/flyer-images/icons" "$APP_PATH/flyer-images/archive" # Ensure all required subdirectories exist

          # 1. Copy the backend source code and project files first.
          # CRITICAL: We exclude '.env', 'node_modules', '.git', 'dist', and now 'flyer-images' to protect user content.
          rsync -avz --delete --exclude '.env' --exclude '.env.test' --exclude 'node_modules' --exclude '.git' --exclude 'dist' --exclude 'flyer-images' ./ "$APP_PATH/"

          # 2. Copy the built frontend assets into the same directory.
          # This will correctly place index.html and the assets/ folder in the webroot.
          rsync -avz --exclude '.env.local' dist/ "/var/www/flyer-crawler.projectium.com"
          echo "Application deployment complete."

      - name: Install Backend Dependencies and Restart Server
        env:
          # These credentials are required for the psql command at the end of this step.
          DB_HOST: ${{ secrets.DB_HOST }}
          DB_PORT: ${{ secrets.DB_PORT }}
          DB_USER: ${{ secrets.DB_USER }}
          DB_PASSWORD: ${{ secrets.DB_PASSWORD }}
          DB_DATABASE: ${{ secrets.DB_DATABASE_PROD }}
        run: |
          # Fail-fast check to ensure secrets are configured in Gitea.
          if [ -z "$DB_HOST" ] || [ -z "$DB_USER" ] || [ -z "$DB_PASSWORD" ] || [ -z "$DB_DATABASE" ]; then
            echo "ERROR: One or more production database secrets (DB_HOST, DB_USER, DB_PASSWORD, DB_DATABASE_PROD) are not set in Gitea repository settings."
            exit 1
          fi

          echo "Installing production dependencies and restarting server..."
          cd /var/www/flyer-crawler.projectium.com
          npm install --omit=dev # Install only production dependencies
          # Use `startOrReload` with the ecosystem file. This is the standard, idempotent way to deploy.
          # It will START the process if it's not running, or RELOAD it if it is.
          # We also add `&& pm2 save` to persist the process list across server reboots.
          pm2 startOrReload ecosystem.config.cjs --env production && pm2 save
          echo "Backend server reloaded successfully."

          # After a successful deployment, update the schema hash in the database.
          # This ensures the next deployment will compare against this new state.
          echo "Updating schema hash in production database..."
          CURRENT_HASH=$(cat sql/master_schema_rollup.sql | dos2unix | sha256sum | awk '{ print $1 }')
          PGPASSWORD="$DB_PASSWORD" psql -v ON_ERROR_STOP=1 -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_DATABASE" -c \
          "INSERT INTO public.schema_info (id, schema_hash, deployed_at) VALUES (1, '$CURRENT_HASH', NOW())
           ON CONFLICT (id) DO UPDATE SET schema_hash = EXCLUDED.schema_hash, deployed_at = NOW();"

          # Verify the hash was updated
          UPDATED_HASH=$(PGPASSWORD="$DB_PASSWORD" psql -v ON_ERROR_STOP=1 -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_DATABASE" -c "SELECT schema_hash FROM public.schema_info WHERE id = 1;" -t -A)
          if [ "$CURRENT_HASH" = "$UPDATED_HASH" ]; then
            echo "✅ Schema hash successfully updated in the database to: $UPDATED_HASH"
          else
            echo "ERROR: Failed to update schema hash in the database."
          fi

      - name: Show PM2 Environment for Production
        run: |
          echo "--- Displaying recent PM2 logs for flyer-crawler-api ---"
          # After a reload, the server restarts. We'll show the last 20 lines of the log
          # to see the startup messages, which include the environment variables loaded from the .env file.
          sleep 5 # Wait a few seconds for the app to start and log its output.
          pm2 describe flyer-crawler-api || echo "Could not find pm2 process."
          pm2 logs flyer-crawler-api --lines 20 --nostream || echo "Could not find pm2 process."
          pm2 env 0 || echo "Could not find pm2 process."