📜 Logging and Telemetry Patterns
Effective logging and telemetry are crucial for monitoring, debugging, and understanding system behavior. This pattern establishes consistent approaches for capturing, formatting, and managing operational data.
🎯 Core Principles
Structured Logging
Use structured formats that are easily parseable by machines while remaining readable by humans.
1
2
3
4
5
6
7
8
9
10
11
12 | # ❌ Unstructured logging
echo "Starting backup process for database at $(date)"
# ✅ Structured logging
log_info() {
local message="$1"
local timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
echo "{\"timestamp\":\"$timestamp\",\"level\":\"INFO\",\"message\":\"$message\",\"pid\":$$}"
}
log_info "Starting backup process for database"
# {"timestamp":"2024-01-15T10:30:45Z","level":"INFO","message":"Starting backup process for database","pid":12345}
|
Consistent Severity Levels
Standardize log levels for predictable filtering and routing.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41 | # Standard log levels
LOG_LEVEL_TRACE=5
LOG_LEVEL_DEBUG=4
LOG_LEVEL_INFO=3
LOG_LEVEL_WARN=2
LOG_LEVEL_ERROR=1
LOG_LEVEL_FATAL=0
# Current log level (can be configured)
CURRENT_LOG_LEVEL=$LOG_LEVEL_INFO
# Level checking function
should_log() {
local level_value="$1"
[ "$level_value" -le "$CURRENT_LOG_LEVEL" ]
}
# Logging functions with level checking
log_trace() {
should_log $LOG_LEVEL_TRACE && log_message "TRACE" "$1"
}
log_debug() {
should_log $LOG_LEVEL_DEBUG && log_message "DEBUG" "$1"
}
log_info() {
should_log $LOG_LEVEL_INFO && log_message "INFO" "$1"
}
log_warn() {
should_log $LOG_LEVEL_WARN && log_message "WARN" "$1"
}
log_error() {
should_log $LOG_LEVEL_ERROR && log_message "ERROR" "$1" >&2
}
log_fatal() {
should_log $LOG_LEVEL_FATAL && log_message "FATAL" "$1" >&2
}
|
📋 Log Message Structure
Rich Context Information
Include relevant metadata for better diagnostics.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29 | # Enhanced logging with context
log_with_context() {
local level="$1"
local message="$2"
local timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
# Build context object
local context="{"
context+="\"timestamp\":\"$timestamp\","
context+="\"level\":\"$level\","
context+="\"message\":\"$message\","
context+="\"pid\":$$,"
context+="\"user\":\"$(whoami)\","
context+="\"host\":\"$(hostname)\","
context+="\"script\":\"$(basename "$0")\""
# Add custom fields if provided
if [ -n "${LOG_CONTEXT_EXTRA:-}" ]; then
context+=",$LOG_CONTEXT_EXTRA"
fi
context+="}"
echo "$context"
}
# Usage with additional context
export LOG_CONTEXT_EXTRA="\"component\":\"database\",\"operation\":\"backup\""
log_info "Backup started"
|
🎨 Output Destinations
Multi-Stream Logging
Route different log levels to appropriate destinations.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47 | # Multi-stream logging setup
setup_logging() {
local log_dir="${1:-"/var/log/myapp"}"
# Create log directory
mkdir -p "$log_dir"
# Setup log files
LOG_FILE_ALL="$log_dir/application.log"
LOG_FILE_ERROR="$log_dir/error.log"
LOG_FILE_AUDIT="$log_dir/audit.log"
# Touch files to ensure they exist
touch "$LOG_FILE_ALL" "$LOG_FILE_ERROR" "$LOG_FILE_AUDIT"
}
# Enhanced logging with routing
log_message_routed() {
local level="$1"
local message="$2"
local log_entry=$(log_with_context "$level" "$message")
# Always log to main file
echo "$log_entry" >> "$LOG_FILE_ALL"
# Route errors to error file
case "$level" in
ERROR|FATAL)
echo "$log_entry" >> "$LOG_FILE_ERROR"
;;
esac
# Route audit events to audit log
if [[ "$message" == *"AUDIT"* ]]; then
echo "$log_entry" >> "$LOG_FILE_AUDIT"
fi
# Also output to console based on level
case "$level" in
WARN|ERROR|FATAL)
echo "$log_entry" >&2
;;
INFO)
echo "$log_entry"
;;
esac
}
|
📊 Telemetry Integration
Metrics Collection
Collect quantitative data alongside logs.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38 | # Simple metrics collection
declare -A METRICS_COUNTERS
declare -A METRICS_TIMERS
metric_increment() {
local metric_name="$1"
local increment="${2:-1}"
local current_value="${METRICS_COUNTERS[$metric_name]:-0}"
METRICS_COUNTERS["$metric_name"]=$((current_value + increment))
}
metric_timer_start() {
local timer_name="$1"
METRICS_TIMERS["$timer_name"]=$(date +%s%3N) # milliseconds
}
metric_timer_stop() {
local timer_name="$1"
local start_time="${METRICS_TIMERS[$timer_name]:-0}"
if [ "$start_time" -gt 0 ]; then
local end_time=$(date +%s%3N)
local duration=$((end_time - start_time))
# Log timing metric
log_info "METRIC:timer.$timer_name:${duration}ms"
# Clean up timer
unset METRICS_TIMERS["$timer_name"]
fi
}
# Usage example
metric_timer_start "database_backup"
# ... backup operation ...
metric_timer_stop "database_backup"
metric_increment "backups.completed"
|
Health Checks and Probes
Implement logging for system health monitoring.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33 | # Health check logging
health_check_component() {
local component="$1"
local check_function="$2"
log_debug "HEALTH:Checking $component"
if $check_function; then
log_info "HEALTH:OK:$component"
metric_increment "health.check.$component.pass"
return 0
else
log_error "HEALTH:FAILED:$component"
metric_increment "health.check.$component.fail"
return 1
fi
}
# Component-specific health checks
check_database_connection() {
# Implementation here
nc -z database-host 5432 2>/dev/null
}
check_disk_space() {
local threshold="${1:-90}"
local usage=$(df / | awk 'NR==2 {print $5}' | sed 's/%//')
[ "$usage" -lt "$threshold" ]
}
# Usage
health_check_component "database" check_database_connection
health_check_component "disk" check_disk_space
|
🛠️ Advanced Logging Features
Log Rotation and Management
Handle log lifecycle automatically.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29 | # Log rotation helper
rotate_log_if_needed() {
local log_file="$1"
local max_size="${2:-10485760}" # 10MB default
if [ -f "$log_file" ]; then
local current_size=$(stat -f%z "$log_file" 2>/dev/null || stat -c%s "$log_file" 2>/dev/null)
if [ "$current_size" -gt "$max_size" ]; then
local timestamp=$(date +%Y%m%d_%H%M%S)
local rotated_name="${log_file}.${timestamp}"
mv "$log_file" "$rotated_name"
touch "$log_file" # Create new empty log
log_info "Rotated log file: $log_file -> $rotated_name"
# Compress old logs (optional)
gzip "$rotated_name" &
fi
fi
}
# Periodic log maintenance
maintain_logs() {
rotate_log_if_needed "$LOG_FILE_ALL"
rotate_log_if_needed "$LOG_FILE_ERROR"
rotate_log_if_needed "$LOG_FILE_AUDIT"
}
|
Contextual Logging
Maintain context across function calls.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44 | # Context stack for nested operations
CONTEXT_STACK=()
push_context() {
local context="$1"
CONTEXT_STACK+=("$context")
}
pop_context() {
if [ ${#CONTEXT_STACK[@]} -gt 0 ]; then
unset CONTEXT_STACK[$((${#CONTEXT_STACK[@]}-1))]
fi
}
get_current_context() {
if [ ${#CONTEXT_STACK[@]} -gt 0 ]; then
local last_index=$((${#CONTEXT_STACK[@]}-1))
echo "${CONTEXT_STACK[$last_index]}"
fi
}
# Context-aware logging
log_with_context_stack() {
local level="$1"
local message="$2"
local context=$(get_current_context)
if [ -n "$context" ]; then
message="[$context] $message"
fi
log_message_routed "$level" "$message"
}
# Usage example
database_operation() {
push_context "DATABASE"
log_info "Starting database operation"
# ... database work ...
log_info "Database operation completed"
pop_context
}
|
🧾 Summary Best Practices
Logging Principles
✅ Use structured, parseable formats
✅ Include rich contextual information
✅ Implement consistent severity levels
✅ Route logs to appropriate destinations
✅ Handle log rotation automatically
✅ Collect metrics alongside logs
Telemetry Guidelines
✅ Instrument critical operations
✅ Track both business and technical metrics
✅ Implement health checks with logging
✅ Use timers for performance monitoring
✅ Correlate events with unique identifiers
Security Considerations
✅ Never log sensitive data (passwords, keys, PII)
✅ Sanitize log content before output
✅ Control log verbosity in production
✅ Protect log files with appropriate permissions
✅ Implement log retention policies
🧠 Complete Logging Library Template
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56 | #!/bin/bash
# lib/logging.sh - Complete logging solution
# Log levels
readonly LOG_LEVEL_TRACE=5
readonly LOG_LEVEL_DEBUG=4
readonly LOG_LEVEL_INFO=3
readonly LOG_LEVEL_WARN=2
readonly LOG_LEVEL_ERROR=1
readonly LOG_LEVEL_FATAL=0
# Configuration
LOG_LEVEL=${LOG_LEVEL:-$LOG_LEVEL_INFO}
LOG_TO_CONSOLE=${LOG_TO_CONSOLE:-true}
LOG_TO_FILE=${LOG_TO_FILE:-false}
LOG_FILE=${LOG_FILE:-"/var/log/application.log"}
# Initialize logging
init_logging() {
if [ "$LOG_TO_FILE" = true ] && [ -n "$LOG_FILE" ]; then
mkdir -p "$(dirname "$LOG_FILE")"
touch "$LOG_FILE"
fi
}
# Core logging function
_log_output() {
local level="$1"
local message="$2"
local timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
local log_entry="{\"timestamp\":\"$timestamp\",\"level\":\"$level\",\"message\":\"$message\",\"pid\":$$}"
# Console output
if [ "$LOG_TO_CONSOLE" = true ]; then
case "$level" in
ERROR|FATAL) echo "$log_entry" >&2 ;;
*) echo "$log_entry" ;;
esac
fi
# File output
if [ "$LOG_TO_FILE" = true ] && [ -n "$LOG_FILE" ]; then
echo "$log_entry" >> "$LOG_FILE"
fi
}
# Level-specific functions
log_trace() { [ $LOG_LEVEL_TRACE -le $LOG_LEVEL ] && _log_output "TRACE" "$1"; }
log_debug() { [ $LOG_LEVEL_DEBUG -le $LOG_LEVEL ] && _log_output "DEBUG" "$1"; }
log_info() { [ $LOG_LEVEL_INFO -le $LOG_LEVEL ] && _log_output "INFO" "$1"; }
log_warn() { [ $LOG_LEVEL_WARN -le $LOG_LEVEL ] && _log_output "WARN" "$1"; }
log_error() { [ $LOG_LEVEL_ERROR -le $LOG_LEVEL ] && _log_output "ERROR" "$1"; }
log_fatal() { [ $LOG_LEVEL_FATAL -le $LOG_LEVEL ] && _log_output "FATAL" "$1"; }
# Initialize on load
init_logging
|
🧾 See Also