🔄 Advanced Shell in CI/CD

Integrate shell scripts seamlessly into continuous integration and delivery pipelines with professional-grade practices.

🧭 CI/CD Script Lifecycle

Modern CI/CD pipelines typically follow this flow: 1. Checkout — Retrieve source code 2. Setup — Configure environment 3. Build — Compile/test artifacts 4. Deploy — Release to target environments 5. Verify — Confirm deployment success 6. Report — Publish results and metrics

Each stage often involves shell scripts or commands.

🧪 Professional Script Structure

Standard Header Pattern

#!/usr/bin/env bash
# vim: set ft=sh:

# Script metadata
readonly SCRIPT_NAME="$(basename "$0")"
readonly SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
readonly START_TIME="$(date +%s)"

# Exit on error, undefined vars, pipe failures
set -euo pipefail

# Color codes for output
readonly RED='\033[0;31m'
readonly GREEN='\033[0;32m'
readonly YELLOW='\033[1;33m'
readonly NC='\033[0m' # No Color

# Logging functions
log_info() { echo -e "${GREEN}[INFO]${NC} $*" >&2; }
log_warn() { echo -e "${YELLOW}[WARN]${NC} $*" >&2; }
log_error() { echo -e "${RED}[ERROR]${NC} $*" >&2; }
log_debug() { [ "${DEBUG:-false}" = true ] && echo -e "[DEBUG] $*" >&2; }

# Error handling
error_exit() {
    local line_no=$1
    local error_code=$2
    local error_msg=$3
    log_error "Error on line $line_no: $error_msg (exit code: $error_code)"
    exit $error_code
}

trap 'error_exit $LINENO $? "Unexpected error"' ERR

🧠 Environment Management

Configuration Loading

# Load configuration with precedence
load_config() {
    local config_files=(
        "/etc/${SCRIPT_NAME%.sh}.conf"
        "${HOME}/.${SCRIPT_NAME%.sh}"
        "./${SCRIPT_NAME%.sh}.conf"
        "./.env"
    )

    for config_file in "${config_files[@]}"; do
        if [ -f "$config_file" ]; then
            log_debug "Loading config from $config_file"
            # shellcheck source=/dev/null
            source "$config_file"
        fi
    done

    # Override with environment variables
    : "${API_URL:=https://api.example.com}"
    : "${TIMEOUT:=30}"
    : "${RETRIES:=3}"
}

load_config

Secret Management

# Secure secret handling
get_secret() {
    local secret_name="$1"
    local secret_value=""

    # Try multiple sources in order of preference
    if [ -n "${!secret_name:-}" ]; then
        # Environment variable
        secret_value="${!secret_name}"
    elif command -v vault >/dev/null 2>&1; then
        # HashiCorp Vault
        secret_value=$(vault kv get -field=value "secret/$secret_name")
    elif [ -f "/run/secrets/$secret_name" ]; then
        # Docker secrets
        secret_value=$(cat "/run/secrets/$secret_name")
    else
        log_error "Secret $secret_name not found"
        return 1
    fi

    echo "$secret_value"
}

# Usage
API_KEY=$(get_secret "API_KEY")

🧪 Build and Test Automation

Dependency Verification

# Verify required tools are available
verify_dependencies() {
    local required_tools=(
        "curl:required for API calls"
        "jq:required for JSON processing"
        "docker:required for container operations"
        "git:required for version control"
    )

    local missing_tools=()

    for tool_desc in "${required_tools[@]}"; do
        local tool="${tool_desc%%:*}"
        local desc="${tool_desc#*:}"

        if ! command -v "$tool" >/dev/null 2>&1; then
            log_error "Missing required tool: $tool ($desc)"
            missing_tools+=("$tool")
        fi
    done

    if [ ${#missing_tools[@]} -gt 0 ]; then
        log_error "Please install missing tools: ${missing_tools[*]}"
        exit 1
    fi

    log_info "All dependencies verified"
}

verify_dependencies

Test Suite Execution

# Comprehensive test runner
run_tests() {
    local test_results_dir="${TEST_RESULTS_DIR:-/tmp/test-results}"
    local junit_report="$test_results_dir/junit.xml"

    mkdir -p "$test_results_dir"

    log_info "Running unit tests..."
    if ! npm test -- --reporters=junit --outputFile="$junit_report"; then
        log_error "Unit tests failed"
        return 1
    fi

    log_info "Running integration tests..."
    if ! ./scripts/integration-tests.sh; then
        log_error "Integration tests failed"
        return 1
    fi

    log_info "Running security scans..."
    if ! npm audit; then
        log_warn "Security vulnerabilities found"
        [ "${FAIL_ON_VULNERABILITIES:-false}" = true ] && return 1
    fi

    log_info "All tests passed"
    return 0
}

🧠 Deployment Strategies

Blue-Green Deployment

# Blue-green deployment implementation
blue_green_deploy() {
    local app_name="$1"
    local new_version="$2"
    local blue_env="${app_name}-blue"
    local green_env="${app_name}-green"

    # Determine current active environment
    local active_env
    active_env=$(get_active_environment "$app_name")
    local inactive_env
    inactive_env=$(get_inactive_environment "$app_name")

    log_info "Deploying $new_version to $inactive_env"

    # Deploy to inactive environment
    deploy_to_environment "$inactive_env" "$new_version"

    # Health check
    if ! health_check_environment "$inactive_env"; then
        log_error "Health check failed for $inactive_env"
        rollback_deployment "$inactive_env"
        return 1
    fi

    # Switch traffic
    log_info "Switching traffic to $inactive_env"
    switch_traffic "$app_name" "$inactive_env"

    # Cleanup old version
    cleanup_old_version "$active_env"

    log_info "Deployment completed successfully"
}

Rolling Updates

# Rolling update implementation
rolling_update() {
    local service_name="$1"
    local new_image="$2"
    local replicas="${3:-3}"
    local max_unavailable="${4:-1}"

    log_info "Starting rolling update for $service_name"

    # Update one replica at a time
    for ((i=1; i<=replicas; i++)); do
        log_info "Updating replica $i/$replicas"

        # Scale down one old replica
        kubectl scale deployment "$service_name" --replicas=$((replicas - max_unavailable))

        # Wait for scale down
        kubectl rollout status deployment "$service_name" --timeout=60s

        # Scale up with new image
        kubectl set image deployment/"$service_name" "$service_name"="$new_image"
        kubectl scale deployment "$service_name" --replicas=$replicas

        # Wait for update
        kubectl rollout status deployment "$service_name" --timeout=60s

        # Health check
        if ! health_check_replica "$service_name" "$i"; then
            log_error "Health check failed for replica $i"
            kubectl rollout undo deployment "$service_name"
            return 1
        fi

        log_info "Replica $i updated successfully"
    done

    log_info "Rolling update completed"
}

🧪 Artifact Management

Build Artifact Creation

# Create comprehensive build artifacts
create_artifacts() {
    local version="$1"
    local build_dir="${BUILD_DIR:-./build}"
    local artifacts_dir="${ARTIFACTS_DIR:-./artifacts}"

    mkdir -p "$artifacts_dir"

    # Create version file
    echo "$version" > "$artifacts_dir/VERSION"

    # Create checksums
    find "$build_dir" -type f -exec sha256sum {} \; > "$artifacts_dir/checksums.sha256"

    # Create deployment manifest
    cat > "$artifacts_dir/manifest.json" <<EOF
{
  "version": "$version",
  "build_time": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
  "commit": "$(git rev-parse HEAD)",
  "branch": "$(git rev-parse --abbrev-ref HEAD)",
  "artifacts": [
    $(find "$build_dir" -type f -exec basename {} \; | sed 's/.*/"&"/' | paste -sd,)
  ]
}
EOF

    # Package artifacts
    tar -czf "$artifacts_dir/build-$version.tar.gz" -C "$build_dir" .

    log_info "Artifacts created in $artifacts_dir"
}

Artifact Publishing

# Publish artifacts to registry
publish_artifacts() {
    local version="$1"
    local artifacts_dir="${ARTIFACTS_DIR:-./artifacts}"
    local registry_url="${REGISTRY_URL:-https://artifacts.company.com}"

    log_info "Publishing artifacts for version $version"

    # Upload each artifact
    for artifact in "$artifacts_dir"/*; do
        local filename
        filename=$(basename "$artifact")

        log_info "Uploading $filename"

        if ! curl -sf \
            -u "${ARTIFACT_USER}:${ARTIFACT_PASSWORD}" \
            -T "$artifact" \
            "$registry_url/$version/$filename"; then
            log_error "Failed to upload $filename"
            return 1
        fi
    done

    # Mark version as published
    curl -sf \
        -u "${ARTIFACT_USER}:${ARTIFACT_PASSWORD}" \
        -X POST \
        "$registry_url/api/versions/$version/publish"

    log_info "Artifacts published successfully"
}

🧠 Monitoring and Observability

Health Checks

# Comprehensive health checking
health_check() {
    local service_url="$1"
    local timeout="${2:-30}"

    # HTTP health check
    if ! curl -sf --max-time "$timeout" "$service_url/health" >/dev/null; then
        log_error "HTTP health check failed"
        return 1
    fi

    # Database connectivity
    if ! nc -z "${DB_HOST:-localhost}" "${DB_PORT:-5432}"; then
        log_error "Database connection failed"
        return 1
    fi

    # Cache connectivity
    if ! redis-cli ping >/dev/null; then
        log_error "Redis connection failed"
        return 1
    fi

    # Custom business logic checks
    if ! custom_health_checks; then
        log_error "Custom health checks failed"
        return 1
    fi

    log_info "All health checks passed"
    return 0
}

Metrics Collection

# Collect and report metrics
collect_metrics() {
    local metrics_file="${METRICS_FILE:-/tmp/metrics.json}"

    # System metrics
    local cpu_usage
    cpu_usage=$(top -bn1 | grep "Cpu(s)" | awk '{print $2}' | cut -d'%' -f1)

    local memory_usage
    memory_usage=$(free | grep Mem | awk '{printf "%.2f", $3/$2 * 100.0}')

    local disk_usage
    disk_usage=$(df / | tail -1 | awk '{print $5}' | sed 's/%//')

    # Application metrics
    local request_count
    request_count=$(get_request_count)

    local error_rate
    error_rate=$(get_error_rate)

    # Generate metrics JSON
    jq -n \
        --arg cpu "$cpu_usage" \
        --arg mem "$memory_usage" \
        --arg disk "$disk_usage" \
        --arg requests "$request_count" \
        --arg errors "$error_rate" \
        '{
            timestamp: (now | todate),
            system: {
                cpu: ($cpu | tonumber),
                memory: ($mem | tonumber),
                disk: ($disk | tonumber)
            },
            application: {
                requests: ($requests | tonumber),
                error_rate: ($errors | tonumber)
            }
        }' > "$metrics_file"

    log_debug "Metrics collected: $(cat "$metrics_file")"
}

🧪 Error Handling and Rollback

Automated Rollback

# Automated rollback mechanism
rollback_on_failure() {
    local deployment_name="$1"
    local max_rollbacks="${MAX_ROLLBACKS:-3}"
    local rollback_count_file="/tmp/${deployment_name}_rollbacks"

    # Track rollback attempts
    local current_rollbacks=0
    if [ -f "$rollback_count_file" ]; then
        current_rollbacks=$(cat "$rollback_count_file")
    fi

    if [ $current_rollbacks -ge $max_rollbacks ]; then
        log_error "Maximum rollbacks ($max_rollbacks) exceeded"
        alert_team "Critical: Deployment $deployment_name failed after $max_rollbacks rollbacks"
        exit 1
    fi

    # Increment rollback counter
    echo $((current_rollbacks + 1)) > "$rollback_count_file"

    log_warn "Initiating rollback for $deployment_name"

    # Perform rollback
    if kubectl rollout undo deployment/"$deployment_name"; then
        log_info "Rollback successful"

        # Notify stakeholders
        send_notification "Deployment rolled back: $deployment_name"

        # Reset rollback counter on success
        rm -f "$rollback_count_file"
    else
        log_error "Rollback failed"
        alert_team "Critical: Rollback failed for $deployment_name"
        exit 1
    fi
}

Failure Notification

# Comprehensive failure notification
notify_failure() {
    local component="$1"
    local error_message="$2"
    local duration="${3:-unknown}"

    local payload
    payload=$(jq -n \
        --arg component "$component" \
        --arg message "$error_message" \
        --arg duration "$duration" \
        --arg commit "$(git rev-parse HEAD 2>/dev/null || echo 'unknown')" \
        --arg branch "$(git rev-parse --abbrev-ref HEAD 2>/dev/null || echo 'unknown')" \
        --arg build_url "${CI_BUILD_URL:-unknown}" \
        --arg job_id "${CI_JOB_ID:-unknown}" \
        '{
            component: $component,
            message: $message,
            duration: $duration,
            commit: $commit,
            branch: $branch,
            build_url: $build_url,
            job_id: $job_id,
            timestamp: (now | todate)
        }')

    # Send to multiple channels
    send_slack_alert "$payload"
    send_email_alert "$payload"
    send_pagerduty_alert "$payload"

    # Log for debugging
    log_error "Failure notification sent: $payload"
}

🧾 CI/CD Best Practices

Script Quality Assurance

# Pre-commit hook for script validation
validate_scripts() {
    local scripts_dir="${1:-.}"

    find "$scripts_dir" -name "*.sh" -type f | while read -r script; do
        echo "Validating $script..."

        # ShellCheck validation
        if ! shellcheck "$script"; then
            echo "ShellCheck failed for $script"
            return 1
        fi

        # Syntax check
        if ! bash -n "$script"; then
            echo "Syntax check failed for $script"
            return 1
        fi

        # Executable check
        if [ ! -x "$script" ]; then
            echo "Warning: $script is not executable"
        fi
    done

    echo "All scripts validated successfully"
}

Performance Optimization

# Optimize CI/CD performance
optimize_ci_cd() {
    # Cache dependencies
    if [ -d "node_modules" ] && [ "node_modules" -nt "package-lock.json" ]; then
        echo "Using cached node_modules"
    else
        echo "Installing dependencies"
        npm ci
    fi

    # Parallel test execution
    if command -v parallel >/dev/null 2>&1; then
        find test/ -name "*test.js" | parallel npm run test-file {}
    else
        npm test
    fi

    # Incremental builds
    if git diff --name-only HEAD~1 | grep -E "\.(js|ts|jsx|tsx)$" >/dev/null; then
        echo "JavaScript files changed - running frontend build"
        npm run build-frontend
    fi

    if git diff --name-only HEAD~1 | grep -E "\.(py|pyi)$" >/dev/null; then
        echo "Python files changed - running backend build"
        npm run build-backend
    fi
}

🧾 Summary

Structure CI/CD scripts with professional headers
Manage configuration and secrets securely
Implement comprehensive testing and verification
Use blue-green or rolling deployment strategies
Create and publish proper build artifacts
Monitor health and collect metrics
Handle failures with automated rollback
Ensure script quality with validation
Optimize performance with caching and parallelization
Follow security best practices for CI/CD

👉 Continue to: Shell in Containers