Przejdź do treści

🧬 Linux Namespaces and Cgroups

Linux namespaces and control groups (cgroups) provide powerful isolation and resource management capabilities essential for containerization and system administration.

🧭 Linux Namespaces Overview

Namespaces partition kernel resources, providing isolation between processes. Six main namespace types exist:

Namespace Types

Type Flag Description Use Cases
Mount CLONE_NEWNS Filesystem mount points chroot, containers
UTS CLONE_NEWUTS Hostname and domain Container identification
IPC CLONE_NEWIPC IPC objects Process communication
PID CLONE_NEWPID Process IDs Process isolation
Network CLONE_NEWNET Network interfaces Network isolation
User CLONE_NEWUSER User and group IDs Privilege separation
Cgroup CLONE_NEWCGROUP Cgroup root directory Cgroup isolation

🧪 Mount Namespaces

Basic Mount Namespace Operations

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
# Create and enter new mount namespace
unshare --mount --fork bash

# In new namespace, demonstrate isolation
mkdir /mnt/test
mount -t tmpfs tmpfs /mnt/test
echo "data" > /mnt/test/file.txt

# From host, this mount is invisible
ls /mnt/test  # Directory exists but is empty

Advanced Mount Namespace Management

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# Mount namespace demonstration script
demo_mount_namespace() {
    echo "=== Mount Namespace Demo ==="

    # Create temporary directory
    local temp_dir
    temp_dir=$(mktemp -d)

    # Start process in new mount namespace
    sudo unshare --mount --fork sh -c "
        echo 'Inside new mount namespace'

        # Create private mount
        mkdir -p $temp_dir/mount
        mount -t tmpfs tmpfs $temp_dir/mount
        echo 'Private data' > $temp_dir/mount/private.txt

        echo 'Mounts inside namespace:'
        mount | grep tmpfs

        # Keep namespace alive
        read -p 'Press Enter to exit namespace...'
    "

    echo "Back in original namespace"
    echo "Mounts outside namespace:"
    mount | grep tmpfs

    # Cleanup
    rm -rf "$temp_dir"
}

# Usage
# demo_mount_namespace

Private Mount Propagation

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
# Configure mount propagation
setup_private_mounts() {
    # Make all mounts private (prevent propagation)
    mount --make-rprivate /

    # Or make specific subtree private
    mount --make-rprivate /mnt

    # Alternative: slave propagation (receive but don't send)
    mount --make-rslave /mnt

    # Shared propagation (send and receive)
    mount --make-rshared /mnt
}

# Verify mount propagation settings
check_mount_propagation() {
    findmnt -o TARGET,PROPAGATION /
    findmnt -o TARGET,PROPAGATION /mnt
}

🧠 PID Namespaces

Process ID Isolation

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
# Create PID namespace
unshare --pid --fork --mount-proc bash

# Inside namespace, PID 1 is the shell
echo "PID inside namespace: $$"
ps aux  # Shows only processes in this namespace

# From host, different PID visible
echo "PID from host: $$"
ps aux | grep bash  # Shows actual host PID

PID Namespace Management Script

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# PID namespace manager
manage_pid_namespace() {
    local action="$1"

    case "$action" in
        create)
            echo "Creating new PID namespace..."
            sudo unshare --pid --fork --mount-proc sh -c '
                echo "New PID namespace created"
                echo "PID 1 in namespace: $$"
                ps aux
                exec bash
            '
            ;;
        inspect)
            local pid="$2"
            if [ -n "$pid" ]; then
                echo "Namespace info for PID $pid:"
                ls -la "/proc/$pid/ns/" 2>/dev/null || echo "Process not found"
            else
                echo "Current process namespaces:"
                ls -la "/proc/self/ns/"
            fi
            ;;
        *)
            echo "Usage: manage_pid_namespace {create|inspect [pid]}"
            return 1
            ;;
    esac
}

🧪 Network Namespaces

Network Isolation

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# Create network namespace
sudo ip netns add testns

# List namespaces
sudo ip netns list

# Execute command in namespace
sudo ip netns exec testns ip link show

# Configure loopback interface
sudo ip netns exec testns ip link set lo up

# Create virtual ethernet pair
sudo ip link add veth0 type veth peer name veth1

# Move one end to namespace
sudo ip link set veth1 netns testns

# Configure interfaces
sudo ip addr add 192.168.1.1/24 dev veth0
sudo ip link set veth0 up

sudo ip netns exec testns ip addr add 192.168.1.2/24 dev veth1
sudo ip netns exec testns ip link set veth1 up
sudo ip netns exec testns ip link set lo up

# Test connectivity
ping -c 3 192.168.1.2
sudo ip netns exec testns ping -c 3 192.168.1.1

# Cleanup
sudo ip netns delete testns
sudo ip link delete veth0

Network Namespace Manager

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# Network namespace manager
network_namespace_manager() {
    local ns_name="$1"
    local action="$2"

    case "$action" in
        create)
            sudo ip netns add "$ns_name"
            echo "Created network namespace: $ns_name"
            ;;
        delete)
            sudo ip netns delete "$ns_name"
            echo "Deleted network namespace: $ns_name"
            ;;
        exec)
            shift 2
            sudo ip netns exec "$ns_name" "$@"
            ;;
        list)
            sudo ip netns list
            ;;
        setup-loopback)
            sudo ip netns exec "$ns_name" ip link set lo up
            echo "Loopback interface activated in $ns_name"
            ;;
        *)
            echo "Usage: network_namespace_manager <namespace> {create|delete|exec|list|setup-loopback}"
            return 1
            ;;
    esac
}

# Usage examples:
# network_namespace_manager testns create
# network_namespace_manager testns setup-loopback
# network_namespace_manager testns exec ip addr show
# network_namespace_manager testns delete

🧠 User Namespaces

User and Group ID Mapping

1
2
3
4
5
6
7
8
# Create user namespace
unshare --user --fork bash

# Initially, no mappings exist
cat /proc/self/uid_map  # Shows empty mapping

# Must be configured from outside (requires root or setuid)
# This is typically done by container runtimes

User Namespace Setup Script

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# User namespace setup (requires root privileges)
setup_user_namespace() {
    local user_ns_pid

    # Create user namespace in background
    sudo unshare --user --fork sh -c '
        echo "User namespace created with PID: $$"
        echo "UID map: $(cat /proc/self/uid_map)"
        echo "GID map: $(cat /proc/self/gid_map)"
        exec bash
    ' &

    user_ns_pid=$!
    sleep 1

    # Set up UID/GID mappings
    echo "Setting up UID/GID mappings for PID: $user_ns_pid"

    # Map current user to root inside namespace
    sudo sh -c "echo '0 $(id -u) 1' > /proc/$user_ns_pid/uid_map"
    sudo sh -c "echo '0 $(id -g) 1' > /proc/$user_ns_pid/gid_map"

    # Wait for namespace process
    wait $user_ns_pid
}

🧪 Control Groups (Cgroups)

Cgroups v1 vs v2

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
# Check cgroups version
check_cgroups_version() {
    if [ -d "/sys/fs/cgroup/unified" ] || mount | grep -q cgroup2; then
        echo "Cgroups v2"
    else
        echo "Cgroups v1"
    fi
}

# View cgroups hierarchy
view_cgroups() {
    echo "=== Cgroups Mounts ==="
    mount | grep cgroup

    echo -e "\n=== Cgroups Controllers ==="
    if [ -f "/proc/cgroups" ]; then
        cat /proc/cgroups
    fi

    echo -e "\n=== Active Cgroups ==="
    systemd-cgls
}

Cgroups Management

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# Cgroups v2 management
manage_cgroups_v2() {
    local action="$1"
    local group_name="$2"
    local cgroup_root="/sys/fs/cgroup"

    case "$action" in
        create)
            if [ ! -d "$cgroup_root/$group_name" ]; then
                sudo mkdir -p "$cgroup_root/$group_name"
                echo "Created cgroup: $group_name"
            else
                echo "Cgroup already exists: $group_name"
            fi
            ;;
        delete)
            if [ -d "$cgroup_root/$group_name" ]; then
                sudo rmdir "$cgroup_root/$group_name"
                echo "Deleted cgroup: $group_name"
            else
                echo "Cgroup not found: $group_name"
            fi
            ;;
        set-memory-limit)
            local limit="$3"
            echo "$limit" | sudo tee "$cgroup_root/$group_name/memory.max"
            echo "Set memory limit for $group_name: $limit"
            ;;
        set-cpu-limit)
            local quota="$3"
            echo "$quota" | sudo tee "$cgroup_root/$group_name/cpu.max"
            echo "Set CPU limit for $group_name: $quota"
            ;;
        add-process)
            local pid="$3"
            echo "$pid" | sudo tee "$cgroup_root/$group_name/cgroup.procs"
            echo "Added PID $pid to cgroup $group_name"
            ;;
        show-stats)
            echo "=== Cgroup Stats for $group_name ==="
            for file in "$cgroup_root/$group_name"/*; do
                if [ -f "$file" ] && [ "$(basename "$file")" != "cgroup.procs" ]; then
                    echo "$(basename "$file"): $(cat "$file")"
                fi
            done
            ;;
        *)
            echo "Usage: manage_cgroups_v2 {create|delete|set-memory-limit|set-cpu-limit|add-process|show-stats} <group> [value]"
            return 1
            ;;
    esac
}

Resource Limiting Example

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# Resource limiting demonstration
resource_limiting_demo() {
    local group_name="demo_limited"

    echo "=== Resource Limiting Demo ==="

    # Create cgroup
    manage_cgroups_v2 create "$group_name"

    # Set limits
    manage_cgroups_v2 set-memory-limit "$group_name" "50M"  # 50 MB limit
    manage_cgroups_v2 set-cpu-limit "$group_name" "5000 10000"  # 50% CPU limit

    # Start memory-intensive process in cgroup
    echo "Starting memory-intensive process..."
    sudo sh -c "
        echo \$\$ > /sys/fs/cgroup/$group_name/cgroup.procs
        # Allocate memory until limit hit
        python3 -c '
import array
data = []
try:
    while True:
        data.append(array.array(\"B\", [0] * 1024 * 1024))  # 1MB chunks
        print(f\"Allocated {(len(data)} MB\")
except MemoryError:
    print(\"Memory limit reached\")
'
    " &

    local demo_pid=$!

    # Monitor resource usage
    echo "Monitoring resource usage..."
    for i in {1..10}; do
        echo "Memory usage:"
        manage_cgroups_v2 show-stats "$group_name" | grep -E "(memory.current|memory.max)"
        sleep 2
    done

    # Cleanup
    sudo kill $demo_pid 2>/dev/null || true
    manage_cgroups_v2 delete "$group_name"

    echo "Demo completed"
}

🧠 Namespace and Cgroup Integration

Container-like Environment

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# Create container-like isolated environment
create_isolated_environment() {
    local container_name="$1"
    local cgroup_name="containers/$container_name"

    echo "Creating isolated environment: $container_name"

    # Create cgroup
    manage_cgroups_v2 create "$cgroup_name"
    manage_cgroups_v2 set-memory-limit "$cgroup_name" "100M"
    manage_cgroups_v2 set-cpu-limit "$cgroup_name" "10000 50000"  # 20% CPU

    # Create namespaces
    sudo unshare --mount --uts --ipc --net --pid --fork sh -c "
        # Set hostname
        hostname $container_name

        # Create private mount
        mount --make-rprivate /
        mkdir -p /tmp/container_root
        mount -t tmpfs tmpfs /tmp/container_root

        # Move to cgroup
        echo \$\$ > /sys/fs/cgroup/$cgroup_name/cgroup.procs

        echo '=== Container Environment ==='
        echo 'Hostname: $(hostname)'
        echo 'PID: $$'
        echo 'Mounts:'
        mount | head -10

        echo ''
        echo 'Entering container shell...'
        exec bash
    "

    # Cleanup
    manage_cgroups_v2 delete "$cgroup_name"
    echo "Isolated environment destroyed"
}

Namespace Inspection Tools

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# Comprehensive namespace inspector
inspect_namespaces() {
    local pid="${1:-self}"

    echo "=== Namespace Inspection for PID: $pid ==="

    if [ ! -d "/proc/$pid/ns" ]; then
        echo "Process $pid not found"
        return 1
    fi

    for ns in /proc/$pid/ns/*; do
        local ns_name
        ns_name=$(basename "$ns")
        local ns_target
        ns_target=$(readlink "$ns")

        echo "$ns_name: $ns_target"
    done

    echo ""
    echo "=== Process Information ==="
    echo "PID: $pid"
    echo "Command: $(cat /proc/$pid/comm 2>/dev/null || echo 'unknown')"
    echo "Status: $(cat /proc/$pid/stat 2>/dev/null | cut -d' ' -f3 || echo 'unknown')"

    if [ "$pid" != "self" ]; then
        echo "Parent PID: $(cat /proc/$pid/stat 2>/dev/null | cut -d' ' -f4 || echo 'unknown')"
    fi
}

🧪 Advanced Namespace Patterns

Namespace Persistence

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# Persistent namespace management
persistent_namespace() {
    local ns_name="$1"
    local action="$2"
    local ns_file="/tmp/ns_$ns_name"

    case "$action" in
        create)
            # Create namespace and save reference
            sudo unshare --mount --fork sh -c "
                # Save namespace reference
                exec 3< /proc/self/ns/mnt
                echo 3 > '$ns_file.fd'
                echo \$$ > '$ns_file.pid'

                echo 'Namespace $ns_name created (PID: $$)'
                echo 'Reference saved to $ns_file'

                # Keep namespace alive
                sleep infinity
            " &

            local bg_pid=$!
            echo "$bg_pid" > "$ns_file.bgpid"
            echo "Namespace $ns_name background process: $bg_pid"
            ;;
        enter)
            if [ ! -f "$ns_file.pid" ]; then
                echo "Namespace $ns_name not found"
                return 1
            fi

            local ns_pid
            ns_pid=$(cat "$ns_file.pid")

            sudo nsenter --mount="/proc/$ns_pid/ns/mnt" bash
            ;;
        destroy)
            if [ -f "$ns_file.bgpid" ]; then
                local bg_pid
                bg_pid=$(cat "$ns_file.bgpid")
                sudo kill "$bg_pid" 2>/dev/null || true
                rm -f "$ns_file"*
                echo "Namespace $ns_name destroyed"
            else
                echo "Namespace $ns_name not found"
            fi
            ;;
        *)
            echo "Usage: persistent_namespace <name> {create|enter|destroy}"
            return 1
            ;;
    esac
}

Cross-Namespace Communication

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# Cross-namespace communication setup
setup_cross_namespace_comm() {
    local ns1="$1"
    local ns2="$2"

    # Create network namespaces
    sudo ip netns add "$ns1"
    sudo ip netns add "$ns2"

    # Create virtual ethernet pair
    sudo ip link add "${ns1}_veth" type veth peer name "${ns2}_veth"

    # Move interfaces to respective namespaces
    sudo ip link set "${ns2}_veth" netns "$ns2"

    # Configure interfaces
    sudo ip addr add "10.0.1.1/24" dev "${ns1}_veth"
    sudo ip link set "${ns1}_veth" up

    sudo ip netns exec "$ns2" ip addr add "10.0.1.2/24" dev "${ns2}_veth"
    sudo ip netns exec "$ns2" ip link set "${ns2}_veth" up
    sudo ip netns exec "$ns2" ip link set lo up

    echo "Cross-namespace communication established"
    echo "NS1 IP: 10.0.1.1"
    echo "NS2 IP: 10.0.1.2"

    # Test connectivity
    echo "Testing connectivity..."
    ping -c 2 -W 1 10.0.1.2 >/dev/null 2>&1 && echo "✓ Connectivity OK" || echo "✗ Connectivity failed"
}

# Cleanup cross-namespace setup
cleanup_cross_namespace_comm() {
    local ns1="$1"
    local ns2="$2"

    sudo ip netns delete "$ns1" "$ns2" 2>/dev/null || true
    sudo ip link delete "${ns1}_veth" 2>/dev/null || true

    echo "Cross-namespace communication cleaned up"
}

🧾 Security Considerations

Namespace Security Best Practices

  1. User Namespace Restrictions

    1
    2
    3
    # Limit user namespace creation
    echo "user.max_user_namespaces = 0" >> /etc/sysctl.conf
    sysctl -p
    

  2. Cgroup Resource Limits

    1
    2
    3
    # Prevent resource exhaustion
    echo 1000 > /sys/fs/cgroup/memory.max
    echo "10000 50000" > /sys/fs/cgroup/cpu.max
    

  3. Namespace Escape Prevention

    1
    2
    # Disable dangerous sysctls
    echo 0 > /proc/sys/kernel/unprivileged_userns_clone  # Ubuntu/Debian
    

Monitoring Namespace Usage

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
# Namespace usage monitor
monitor_namespaces() {
    echo "=== Namespace Usage Report ==="
    echo "Timestamp: $(date)"
    echo

    echo "Active namespaces:"
    lsns | head -20

    echo
    echo "User namespace restrictions:"
    sysctl kernel.unprivileged_userns_clone 2>/dev/null || echo "Not applicable"

    echo
    echo "Cgroups version: $(check_cgroups_version)"

    if [ "$(check_cgroups_version)" = "Cgroups v2" ]; then
        echo "Active cgroups:"
        systemd-cgls | head -20
    fi
}

🧾 Summary

Linux namespaces and cgroups provide powerful isolation and resource management:

Key Benefits

  • Process Isolation - Complete separation between environments
  • Resource Control - Precise CPU, memory, and I/O limits
  • Security Enhancement - Reduced attack surface and privilege escalation
  • Container Foundation - Core technology behind Docker and Kubernetes
  • System Stability - Prevent resource exhaustion and system crashes

Advanced Use Cases

  • Microservices Architecture - Isolated service deployment
  • Multi-tenant Systems - Secure resource sharing
  • Development Environments - Consistent testing setups
  • Security Sandboxing - Protected application execution
  • Performance Optimization - Guaranteed resource allocation

Enterprise Applications

  • Cloud Infrastructure - Scalable container orchestration
  • DevOps Pipelines - Reproducible build environments
  • Security Compliance - Regulatory requirement satisfaction
  • Cost Optimization - Efficient resource utilization
  • Disaster Recovery - Rapid environment restoration

👉 Continue to: Linux Debugging Tools