Optimize shell scripts for maximum performance through efficient file descriptor management, I/O patterns, and resource utilization.
Shell script performance is primarily affected by:
1. Process Creation Overhead - Each external command spawns a process
2. I/O Operations - Reading/writing files and pipes
3. Variable Expansions - Complex parameter expansions
4. File Descriptor Management - Opening/closing files
5. Algorithmic Efficiency - Choice of approaches
Understanding these factors enables targeted optimization.
🧪 File Descriptor Management
Efficient File Descriptor Usage
| # ❌ Inefficient - opens/closes file repeatedly
for i in {1..1000}; do
echo "Line $i" >> output.txt
done
# ✅ Efficient - keep file descriptor open
exec 3>>output.txt
for i in {1..1000}; do
echo "Line $i" >&3
done
exec 3>&-
|
File Descriptor Reuse Patterns
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27 | # Reuse file descriptors for multiple operations
setup_fds() {
exec 3< input.txt # Read input
exec 4> output.txt # Write output
exec 5>> log.txt # Append to log
exec 6<> temp.txt # Read/write temp file
}
close_fds() {
exec 3<&- 4>&- 5>&- 6<&-
}
process_data() {
local line
while IFS= read -r -u 3 line; do
echo "${line^^}" >&4
echo "$(date): Processed line" >&5
done
}
main() {
setup_fds
process_data
close_fds
}
main
|
🧠 I/O Optimization Techniques
Buffered I/O vs Line-by-Line
| # ❌ Slow - line-by-line processing
while IFS= read -r line; do
process_line "$line"
done < large_file.txt
# ✅ Fast - batch processing with mapfile
mapfile -t lines < large_file.txt
for line in "${lines[@]}"; do
process_line "$line"
done
|
Memory-Mapped Files (when available)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 | # For very large files, consider external tools
process_large_file() {
local file="$1"
# Use mmap-like behavior with dd
local chunk_size=1048576 # 1MB chunks
local offset=0
local file_size
file_size=$(stat -c%s "$file")
while [ $offset -lt $file_size ]; do
dd if="$file" bs=$chunk_size skip=$((offset / chunk_size)) count=1 2>/dev/null | \
process_chunk
offset=$((offset + chunk_size))
done
}
|
🧪 Process Creation Optimization
Minimize External Commands
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 | # ❌ Expensive - many external calls
count=0
for file in *.txt; do
lines=$(wc -l < "$file")
count=$((count + lines))
done
# ✅ Efficient - single external call
total_lines=$(wc -l *.txt | tail -1 | awk '{print $1}')
# ✅ Even better - pure shell arithmetic
total_lines=0
for file in *.txt; do
while IFS= read -r _; do
total_lines=$((total_lines + 1))
done < "$file"
done
|
Built-in vs External Commands
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 | # Performance comparison functions
# Built-in arithmetic
builtin_math() {
local sum=0
for i in {1..10000}; do
sum=$((sum + i))
done
echo $sum
}
# External expr
external_math() {
local sum=0
for i in {1..10000}; do
sum=$(expr $sum + $i)
done
echo $sum
}
# External bc
bc_math() {
echo "sum=0; for(i=1;i<=10000;i++) sum+=i; sum" | bc
}
|
🧠 Advanced Parameter Expansion Optimization
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35 | # Efficient string manipulation patterns
# Substring extraction
extract_filename() {
local path="$1"
# Fast - parameter expansion
echo "${path##*/}"
# Slow - external command
# basename "$path"
}
# String replacement
replace_extension() {
local file="$1"
local new_ext="$2"
# Fast - parameter expansion
echo "${file%.*}.${new_ext}"
# Slow - external command
# echo "$(basename "$file" .txt).$new_ext"
}
# Case conversion (Bash 4+)
to_upper() {
local str="$1"
# Fast - built-in
echo "${str^^}"
# Slow - external
# echo "$str" | tr '[:lower:]' '[:upper:]'
}
|
🧪 Pipeline Optimization
Reducing Pipeline Stages
| # ❌ Multiple processes
cat data.txt | grep pattern | awk '{print $1}' | sort | uniq -c
# ✅ Fewer processes
awk '/pattern/ {print $1}' data.txt | sort | uniq -c
# ✅ Single process (when possible)
sort data.txt | uniq -c | grep pattern
|
Efficient Sorting Strategies
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 | # Optimize sorting for performance
# For large datasets, use external sort
sort_large_dataset() {
local input="$1"
local output="$2"
local memory_limit="${3:-512M}"
# Use sort with memory limit
sort -S "$memory_limit" --compress-program=gzip "$input" > "$output"
}
# Stable sort when order matters
stable_sort() {
sort -k1,1 -k2,2n data.txt # Sort by field 1, then field 2 numerically
}
# Locale-independent sorting
locale_free_sort() {
LC_ALL=C sort data.txt # Faster, consistent ordering
}
|
🧠 Memory Usage Optimization
Variable Management
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 | # Efficient variable usage patterns
# Reuse variables instead of creating new ones
process_data_efficient() {
local data=""
local result=""
for item in "${items[@]}"; do
data="$item" # Reuse variable
result=$(transform "$data")
output_result "$result"
done
}
# ❌ Inefficient - creates many variables
process_data_inefficient() {
local data1="$item1"
local data2="$item2"
local data3="$item3"
# ... many more variables
}
|
Array Optimization
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 | # Efficient array operations
# Pre-size arrays when possible
initialize_array() {
local size=1000
local -a arr
# Reserve space (Bash 4.4+)
arr=()
for ((i=0; i<size; i++)); do
arr[i]="value_$i"
done
}
# Efficient array concatenation
concat_arrays() {
local -a arr1=("$@")
local -a arr2=("${arr1[@]}" "new_item1" "new_item2")
echo "${arr2[@]}"
}
|
🧪 Resource Monitoring and Profiling
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 | # Comprehensive performance measurement functions
measure_execution_time() {
local start_time
local end_time
local duration
start_time=$(date +%s.%N)
"$@"
end_time=$(date +%s.%N)
duration=$(echo "$end_time - $start_time" | bc)
echo "Execution time: ${duration}s" >&2
}
# Usage
measure_execution_time process_large_dataset input.txt output.txt
|
Resource Usage Monitoring
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 | # Monitor resource usage during execution
monitor_resources() {
local pid=$$
# Background monitoring process
(
while kill -0 $pid 2>/dev/null; do
echo "$(date): CPU=$(ps -p $pid -o %cpu= 2>/dev/null || echo 0)%, MEM=$(ps -p $pid -o %mem= 2>/dev/null || echo 0)%"
sleep 1
done
) &
local monitor_pid=$!
# Execute main function
"$@"
local exit_code=$?
# Stop monitoring
kill $monitor_pid 2>/dev/null || true
wait $monitor_pid 2>/dev/null || true
return $exit_code
}
|
Memory Leak Detection
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 | # Simple memory leak detection
detect_memory_leaks() {
local baseline_mem
local current_mem
baseline_mem=$(ps -o vsz= -p $$)
"$@"
local exit_code=$?
current_mem=$(ps -o vsz= -p $$)
if [ $current_mem -gt $((baseline_mem + 10000)) ]; then
echo "Warning: Potential memory leak detected" >&2
echo "Baseline: ${baseline_mem}KB, Current: ${current_mem}KB" >&2
fi
return $exit_code
}
|
🧠 Advanced I/O Patterns
Asynchronous I/O Simulation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 | # Simulate async I/O with background processes
async_io_processor() {
local input_fd="$1"
local output_fd="$2"
# Background processor
(
local line
while IFS= read -r -u "$input_fd" line; do
# Process line asynchronously
local result
result=$(expensive_operation "$line")
echo "$result" >&"$output_fd"
done
) &
}
# Usage
exec 3< input.txt
exec 4> output.txt
async_io_processor 3 4
wait
exec 3<&- 4>&-
|
Stream Processing Optimization
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 | # Efficient stream processing patterns
stream_processor() {
local buffer_size=1000
local buffer=()
local count=0
while IFS= read -r line; do
buffer+=("$line")
count=$((count + 1))
if [ $count -ge $buffer_size ]; then
# Process buffer in batch
process_batch "${buffer[@]}"
buffer=()
count=0
fi
done
# Process remaining items
if [ ${#buffer[@]} -gt 0 ]; then
process_batch "${buffer[@]}"
fi
}
|
🧪 Caching and Memoization
Simple Caching Mechanism
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35 | # Basic memoization for expensive operations
declare -A cache
memoized_operation() {
local key="$1"
if [[ -n "${cache[$key]:-}" ]]; then
echo "${cache[$key]}"
return
fi
local result
result=$(expensive_computation "$key")
cache["$key"]="$result"
echo "$result"
}
# Persistent caching
persistent_cache_get() {
local key="$1"
local cache_file="/tmp/cache_${key}"
if [ -f "$cache_file" ]; then
cat "$cache_file"
return
fi
local result
result=$(compute_value "$key")
echo "$result" > "$cache_file"
echo "$result"
}
|
🧠 Parallel Processing Optimization
Controlled Parallelization
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48 | # Efficient parallel processing with resource limits
parallel_processor() {
local max_jobs="${1:-$(nproc)}"
local current_jobs=0
for item in "${items[@]}"; do
process_item "$item" &
current_jobs=$((current_jobs + 1))
if [ $current_jobs -ge $max_jobs ]; then
wait # Wait for batch to complete
current_jobs=0
fi
done
wait # Wait for final batch
}
# Advanced parallel processing with job control
advanced_parallel_processor() {
local max_jobs="${1:-$(nproc)}"
local job_pids=()
for item in "${items[@]}"; do
# Wait if we've reached max jobs
while [ ${#job_pids[@]} -ge $max_jobs ]; do
# Check for completed jobs
for i in "${!job_pids[@]}"; do
if ! kill -0 "${job_pids[i]}" 2>/dev/null; then
wait "${job_pids[i]}"
unset 'job_pids[i]'
break
fi
done
sleep 0.1
done
# Start new job
process_item "$item" &
job_pids+=($!)
done
# Wait for all remaining jobs
for pid in "${job_pids[@]}"; do
wait "$pid"
done
}
|
Comprehensive Benchmarking
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53 | #!/usr/bin/env bash
# Performance benchmarking framework
benchmark_function() {
local function_name="$1"
local iterations="${2:-10}"
echo "Benchmarking $function_name ($iterations iterations)..."
local times=()
local total_time=0
for ((i=1; i<=iterations; i++)); do
local start_time
local end_time
local duration
start_time=$(date +%s.%N)
"$function_name"
end_time=$(date +%s.%N)
duration=$(echo "$end_time - $start_time" | bc)
times+=("$duration")
total_time=$(echo "$total_time + $duration" | bc)
echo "Iteration $i: ${duration}s"
done
local avg_time
avg_time=$(echo "scale=6; $total_time / $iterations" | bc)
# Calculate min/max
local min_time=${times[0]}
local max_time=${times[0]}
for time in "${times[@]}"; do
if (( $(echo "$time < $min_time" | bc -l) )); then
min_time=$time
fi
if (( $(echo "$time > $max_time" | bc -l) )); then
max_time=$time
fi
done
echo "Results for $function_name:"
echo " Average: ${avg_time}s"
echo " Minimum: ${min_time}s"
echo " Maximum: ${max_time}s"
echo " Total: ${total_time}s"
}
# Usage example
# benchmark_function "my_slow_function" 5
|
🧾 Summary
- Minimize Process Creation - Use built-ins over external commands
- Efficient File Descriptor Usage - Reuse FDs, keep them open
- Smart I/O Patterns - Batch operations, use appropriate buffering
- Algorithmic Efficiency - Choose optimal approaches for data size
- Memory Management - Reuse variables, monitor leaks
- Parallel Processing - Controlled concurrency with resource limits
- Caching - Memoize expensive operations
- Profiling - Measure and monitor performance continuously
- Repeated external command calls in loops
- Opening/closing files unnecessarily
- Uncontrolled parallelization
- Inefficient string operations
- Poor algorithmic choices for data size
- Ignoring resource limits
- Lack of performance monitoring
time - Basic timing information
strace - System call tracing
perf - CPU profiling (Linux)
htop/top - Real-time resource monitoring
- Custom benchmarking frameworks
- Memory usage monitoring with
ps
👉 Continue to: Debugging and Tracing