linux 内核开发之 for_each_online_cpu vs for_each_possible

1. 基本概念区别

1.1 Online CPU vs Possible CPU

// Online CPU：当前正在运行的CPU
// Possible CPU：系统中可能存在的所有CPU（包括当前离线的）

// 理解CPU状态的层次结构：
/*
所有CPU状态层次：
├── Possible CPUs (可能的CPU)
│   ├── Present CPUs (存在的CPU) 
│   │   ├── Online CPUs (在线的CPU)
│   │   │   └── Active CPUs (活跃的CPU)
│   │   └── Offline CPUs (离线的CPU)
│   └── Not Present CPUs (不存在的CPU)
└── Impossible CPUs (不可能的CPU)
*/

// 基本示例
void show_cpu_differences(void) {
    int cpu;

    printk("=== CPU Status Comparison ===\n");

    printk("Possible CPUs (%d total): ", num_possible_cpus());
    for_each_possible_cpu(cpu) {
        printk("%d ", cpu);
    }
    printk("\n");

    printk("Online CPUs (%d total): ", num_online_cpus());
    for_each_online_cpu(cpu) {
        printk("%d ", cpu);
    }
    printk("\n");

    printk("Present CPUs (%d total): ", num_present_cpus());
    for_each_present_cpu(cpu) {
        printk("%d ", cpu);
    }
    printk("\n");
}

/* 典型输出示例：
 * === CPU Status Comparison ===
 * Possible CPUs (8 total): 0 1 2 3 4 5 6 7 
 * Online CPUs (6 total): 0 1 2 3 4 5 
 * Present CPUs (8 total): 0 1 2 3 4 5 6 7 
 * 
 * 说明：系统有8个可能的CPU，都物理存在，但只有6个在线
 */

1.2 CPU状态转换

// CPU状态可以动态改变
void demonstrate_cpu_hotplug(void) {
    int cpu = 2;

    // 检查CPU当前状态
    printk("CPU %d status:\n", cpu);
    printk("  possible: %s\n", cpu_possible(cpu) ? "yes" : "no");
    printk("  present:  %s\n", cpu_present(cpu) ? "yes" : "no"); 
    printk("  online:   %s\n", cpu_online(cpu) ? "yes" : "no");
    printk("  active:   %s\n", cpu_active(cpu) ? "yes" : "no");

    // CPU热插拔操作（需要适当权限）
    /*
    echo 0 > /sys/devices/system/cpu/cpu2/online  // 下线CPU 2
    echo 1 > /sys/devices/system/cpu/cpu2/online  // 上线CPU 2
    */
}

// 状态检查函数
static inline int cpu_possible(int cpu);   // CPU是否可能存在
static inline int cpu_present(int cpu);    // CPU是否物理存在  
static inline int cpu_online(int cpu);     // CPU是否在线
static inline int cpu_active(int cpu);     // CPU是否活跃

2. 使用场景差异

2.1 初始化场景 – 使用 for_each_possible_cpu

// ✅ 正确：初始化所有可能的CPU
DEFINE_PER_CPU(struct cpu_data, cpu_info);
DEFINE_PER_CPU(int, cpu_counter);

static int __init init_percpu_data(void) {
    int cpu;

    printk("Initializing per-CPU data for all possible CPUs\n");

    // 必须初始化所有可能的CPU，包括当前离线的
    for_each_possible_cpu(cpu) {
        struct cpu_data *data = per_cpu_ptr(&cpu_info, cpu);

        // 初始化数据结构
        data->cpu_id = cpu;
        data->state = CPU_STATE_INIT;
        data->counter = 0;
        data->buffer = kmalloc(BUFFER_SIZE, GFP_KERNEL);
        if (!data->buffer) {
            goto cleanup;
        }

        INIT_LIST_HEAD(&data->work_list);
        spin_lock_init(&data->lock);

        // 初始化计数器
        per_cpu(cpu_counter, cpu) = 0;

        printk("  Initialized CPU %d (online: %s)\n", 
               cpu, cpu_online(cpu) ? "yes" : "no");
    }

    return 0;

cleanup:
    // 清理已分配的资源
    for_each_possible_cpu(cpu) {
        struct cpu_data *data = per_cpu_ptr(&cpu_info, cpu);
        if (data->buffer) {
            kfree(data->buffer);
            data->buffer = NULL;
        }
    }
    return -ENOMEM;
}

// ❌ 错误：只初始化在线CPU
static int __init wrong_init_percpu_data(void) {
    int cpu;

    // 错误！如果CPU 2当前离线，就不会被初始化
    // 当CPU 2后来上线时，数据未初始化，可能导致崩溃
    for_each_online_cpu(cpu) {
        struct cpu_data *data = per_cpu_ptr(&cpu_info, cpu);
        data->cpu_id = cpu;  // 只初始化在线CPU
    }

    return 0;
}

/* 问题场景：
 * 1. 系统启动时CPU 2离线
 * 2. wrong_init_percpu_data只初始化CPU 0,1
 * 3. 运行时CPU 2被上线
 * 4. 访问CPU 2的cpu_info时，数据未初始化！
 */

2.2 清理场景 – 使用 for_each_possible_cpu

// ✅ 正确：清理所有可能的CPU
static void __exit cleanup_percpu_data(void) {
    int cpu;

    printk("Cleaning up per-CPU data for all possible CPUs\n");

    // 必须清理所有可能的CPU，包括当前离线的
    for_each_possible_cpu(cpu) {
        struct cpu_data *data = per_cpu_ptr(&cpu_info, cpu);

        if (data->buffer) {
            kfree(data->buffer);
            data->buffer = NULL;
        }

        // 清理列表中可能残留的工作项
        if (!list_empty(&data->work_list)) {
            struct work_item *item, *tmp;
            list_for_each_entry_safe(item, tmp, &data->work_list, list) {
                list_del(&item->list);
                kfree(item);
            }
        }

        printk("  Cleaned up CPU %d\n", cpu);
    }
}

// ❌ 错误：只清理在线CPU
static void __exit wrong_cleanup_percpu_data(void) {
    int cpu;

    // 错误！离线CPU的资源不会被释放
    for_each_online_cpu(cpu) {
        struct cpu_data *data = per_cpu_ptr(&cpu_info, cpu);
        if (data->buffer) {
            kfree(data->buffer);  // 离线CPU的内存泄漏！
        }
    }
}

2.3 统计收集场景 – 使用 for_each_online_cpu

// ✅ 正确：只统计在线CPU的数据
DEFINE_PER_CPU(struct performance_stats, perf_stats);

struct global_performance_stats {
    unsigned long total_operations;
    unsigned long total_errors;
    unsigned long total_time;
    unsigned int active_cpus;
    unsigned int reporting_cpus;
};

void collect_performance_stats(struct global_performance_stats *global) {
    int cpu;

    memset(global, 0, sizeof(*global));

    printk("Collecting performance statistics from online CPUs\n");

    // 只统计在线CPU，离线CPU的数据无意义
    for_each_online_cpu(cpu) {
        struct performance_stats *stats = per_cpu_ptr(&perf_stats, cpu);

        global->total_operations += stats->operations;
        global->total_errors += stats->errors;
        global->total_time += stats->runtime;
        global->reporting_cpus++;

        printk("  CPU %d: ops=%lu, errors=%lu, time=%lu\n",
               cpu, stats->operations, stats->errors, stats->runtime);
    }

    global->active_cpus = num_online_cpus();

    printk("Global stats: %lu ops, %lu errors, %u CPUs\n",
           global->total_operations, global->total_errors, global->active_cpus);
}

// ❌ 错误：统计包括离线CPU
void wrong_collect_stats(struct global_performance_stats *global) {
    int cpu;

    memset(global, 0, sizeof(*global));

    // 错误！离线CPU的统计数据可能是陈旧的或无意义的
    for_each_possible_cpu(cpu) {
        struct performance_stats *stats = per_cpu_ptr(&perf_stats, cpu);
        global->total_operations += stats->operations;  // 包含离线CPU的旧数据
    }
}

2.4 负载均衡场景 – 使用 for_each_online_cpu

// ✅ 正确：负载均衡只考虑在线CPU
DEFINE_PER_CPU(unsigned int, work_queue_length);
DEFINE_PER_CPU(unsigned int, cpu_load_percentage);

int find_least_loaded_cpu(void) {
    int cpu, best_cpu = -1;
    unsigned int min_load = UINT_MAX;

    // 只在在线CPU中查找，离线CPU不能接受工作
    for_each_online_cpu(cpu) {
        unsigned int load = per_cpu(cpu_load_percentage, cpu);
        unsigned int queue_len = per_cpu(work_queue_length, cpu);

        // 综合考虑CPU负载和队列长度
        unsigned int total_load = load + (queue_len * 10);

        if (total_load < min_load) {
            min_load = total_load;
            best_cpu = cpu;
        }

        printk("CPU %d: load=%u%%, queue=%u, total_load=%u\n",
               cpu, load, queue_len, total_load);
    }

    if (best_cpu >= 0) {
        printk("Selected CPU %d (load=%u)\n", best_cpu, min_load);
    }

    return best_cpu;
}

// 工作分配函数
int schedule_work_on_best_cpu(struct work_struct *work) {
    int cpu = find_least_loaded_cpu();

    if (cpu < 0) {
        printk(KERN_ERR "No online CPU available for work\n");
        return -ENODEV;
    }

    // 分配到负载最轻的CPU
    schedule_work_on(cpu, work);
    this_cpu_inc(work_queue_length);

    return 0;
}

3. CPU热插拔场景

3.3 CPU热插拔通知处理

// 处理CPU状态变化
static int percpu_cpu_callback(struct notifier_block *nfb,
                              unsigned long action, void *hcpu) {
    unsigned int cpu = (unsigned long)hcpu;
    struct cpu_data *data = per_cpu_ptr(&cpu_info, cpu);

    switch (action) {
    case CPU_UP_PREPARE:
        printk("CPU %d coming online - preparing\n", cpu);

        // CPU即将上线，确保数据已初始化
        if (!data->buffer) {
            data->buffer = kmalloc(BUFFER_SIZE, GFP_KERNEL);
            if (!data->buffer) {
                printk(KERN_ERR "Failed to allocate buffer for CPU %d\n", cpu);
                return NOTIFY_BAD;
            }
        }

        data->state = CPU_STATE_ONLINE;
        data->online_time = jiffies;
        break;

    case CPU_ONLINE:
        printk("CPU %d is now online\n", cpu);

        // 启动CPU特定的工作
        data->worker_thread = kthread_create_on_cpu(cpu_worker_thread,
                                                   data, cpu, "worker/%u");
        if (!IS_ERR(data->worker_thread)) {
            wake_up_process(data->worker_thread);
        }
        break;

    case CPU_DEAD:
        printk("CPU %d went offline\n", cpu);

        // 停止CPU特定的工作
        if (data->worker_thread) {
            kthread_stop(data->worker_thread);
            data->worker_thread = NULL;
        }

        data->state = CPU_STATE_OFFLINE;
        data->offline_time = jiffies;

        // 注意：不释放内存，CPU可能会重新上线
        break;

    default:
        break;
    }

    return NOTIFY_OK;
}

static struct notifier_block percpu_cpu_notifier = {
    .notifier_call = percpu_cpu_callback,
};

// 注册CPU热插拔通知
static int __init register_cpu_hotplug(void) {
    return register_cpu_notifier(&percpu_cpu_notifier);
}

// 处理CPU状态变化时的数据迁移
void migrate_cpu_data(int src_cpu, int dest_cpu) {
    struct cpu_data *src_data, *dest_data;

    if (!cpu_online(dest_cpu)) {
        printk(KERN_ERR "Destination CPU %d is offline\n", dest_cpu);
        return;
    }

    src_data = per_cpu_ptr(&cpu_info, src_cpu);
    dest_data = per_cpu_ptr(&cpu_info, dest_cpu);

    // 迁移工作队列
    spin_lock(&src_data->lock);
    spin_lock(&dest_data->lock);

    list_splice_tail_init(&src_data->work_list, &dest_data->work_list);
    dest_data->counter += src_data->counter;
    src_data->counter = 0;

    spin_unlock(&dest_data->lock);
    spin_unlock(&src_data->lock);

    printk("Migrated data from CPU %d to CPU %d\n", src_cpu, dest_cpu);
}

4. 实际应用对比示例

4.1 网络子系统统计

// 网络统计的正确实现
struct net_device_stats {
    unsigned long rx_packets;
    unsigned long tx_packets;
    unsigned long rx_bytes;
    unsigned long tx_bytes;
    unsigned long rx_errors;
    unsigned long tx_errors;
};

DEFINE_PER_CPU(struct net_device_stats, net_stats);

// 初始化：使用 for_each_possible_cpu
static int __init init_net_stats(void) {
    int cpu;

    for_each_possible_cpu(cpu) {
        struct net_device_stats *stats = per_cpu_ptr(&net_stats, cpu);
        memset(stats, 0, sizeof(*stats));
        printk("Initialized network stats for CPU %d\n", cpu);
    }

    return 0;
}

// 数据更新：当前CPU操作（自然在在线CPU上）
static inline void update_rx_stats(struct net_device *dev, 
                                  unsigned int len) {
    // 自动在当前CPU上更新，当前CPU必然是在线的
    this_cpu_inc(net_stats.rx_packets);
    this_cpu_add(net_stats.rx_bytes, len);
}

// 统计收集：使用 for_each_online_cpu
void get_net_device_stats(struct net_device *dev, 
                         struct net_device_stats *total) {
    int cpu;

    memset(total, 0, sizeof(*total));

    // 只统计在线CPU的数据
    for_each_online_cpu(cpu) {
        struct net_device_stats *cpu_stats = per_cpu_ptr(&net_stats, cpu);

        total->rx_packets += cpu_stats->rx_packets;
        total->tx_packets += cpu_stats->tx_packets;
        total->rx_bytes += cpu_stats->rx_bytes;
        total->tx_bytes += cpu_stats->tx_bytes;
        total->rx_errors += cpu_stats->rx_errors;
        total->tx_errors += cpu_stats->tx_errors;
    }
}

// 调试信息：显示所有可能的CPU
void debug_net_stats(void) {
    int cpu;

    printk("=== Network Statistics Debug ===\n");

    for_each_possible_cpu(cpu) {
        struct net_device_stats *stats = per_cpu_ptr(&net_stats, cpu);

        printk("CPU %d (%s): RX=%lu/%lu, TX=%lu/%lu\n",
               cpu, cpu_online(cpu) ? "online" : "offline",
               stats->rx_packets, stats->rx_bytes,
               stats->tx_packets, stats->tx_bytes);
    }
}

4.2 内存分配器per-CPU缓存

// 内存分配器的per-CPU缓存实现
#define PERCPU_CACHE_SIZE 64

struct percpu_cache {
    void *objects[PERCPU_CACHE_SIZE];
    int count;
    unsigned long alloc_count;
    unsigned long free_count;
    unsigned long miss_count;
    spinlock_t lock;
};

DEFINE_PER_CPU_ALIGNED(struct percpu_cache, object_cache);

// 初始化：使用 for_each_possible_cpu
static int __init init_percpu_cache(void) {
    int cpu;

    for_each_possible_cpu(cpu) {
        struct percpu_cache *cache = per_cpu_ptr(&object_cache, cpu);

        cache->count = 0;
        cache->alloc_count = 0;
        cache->free_count = 0;
        cache->miss_count = 0;
        spin_lock_init(&cache->lock);

        // 预填充一些对象
        for (int i = 0; i < PERCPU_CACHE_SIZE / 2; i++) {
            cache->objects[i] = kmalloc(OBJECT_SIZE, GFP_KERNEL);
            if (cache->objects[i]) {
                cache->count++;
            }
        }

        printk("Initialized cache for CPU %d with %d objects\n", 
               cpu, cache->count);
    }

    return 0;
}

// 分配：在当前CPU上操作
void *alloc_from_cache(void) {
    struct percpu_cache *cache;
    void *obj = NULL;

    preempt_disable();
    cache = this_cpu_ptr(&object_cache);

    if (cache->count > 0) {
        obj = cache->objects[--cache->count];
        cache->alloc_count++;
    } else {
        cache->miss_count++;
    }

    preempt_enable();

    // 缓存未命中，从全局分配器分配
    if (!obj) {
        obj = kmalloc(OBJECT_SIZE, GFP_KERNEL);
    }

    return obj;
}

// 释放：在当前CPU上操作
void free_to_cache(void *obj) {
    struct percpu_cache *cache;

    if (!obj) return;

    preempt_disable();
    cache = this_cpu_ptr(&object_cache);

    if (cache->count < PERCPU_CACHE_SIZE) {
        cache->objects[cache->count++] = obj;
        cache->free_count++;
        obj = NULL;  // 成功放入缓存
    }

    preempt_enable();

    // 缓存已满，直接释放
    if (obj) {
        kfree(obj);
    }
}

// 统计收集：使用 for_each_online_cpu
void show_cache_statistics(void) {
    int cpu;
    unsigned long total_allocs = 0, total_frees = 0, total_misses = 0;
    unsigned int total_cached = 0;

    printk("=== Per-CPU Cache Statistics ===\n");

    for_each_online_cpu(cpu) {
        struct percpu_cache *cache = per_cpu_ptr(&object_cache, cpu);

        printk("CPU %d: cached=%d, allocs=%lu, frees=%lu, misses=%lu\n",
               cpu, cache->count, cache->alloc_count, 
               cache->free_count, cache->miss_count);

        total_cached += cache->count;
        total_allocs += cache->alloc_count;
        total_frees += cache->free_count;
        total_misses += cache->miss_count;
    }

    printk("Total: cached=%u, allocs=%lu, frees=%lu, misses=%lu\n",
           total_cached, total_allocs, total_frees, total_misses);

    if (total_allocs > 0) {
        printk("Cache hit rate: %lu%%\n", 
               (total_allocs - total_misses) * 100 / total_allocs);
    }
}

// 清理：使用 for_each_possible_cpu
static void __exit cleanup_percpu_cache(void) {
    int cpu;

    for_each_possible_cpu(cpu) {
        struct percpu_cache *cache = per_cpu_ptr(&object_cache, cpu);

        // 释放缓存中的所有对象
        for (int i = 0; i < cache->count; i++) {
            kfree(cache->objects[i]);
        }

        printk("Cleaned up cache for CPU %d (%d objects)\n", 
               cpu, cache->count);
    }
}

5. 性能和行为差异

5.1 性能测试对比

// 性能测试：遍历不同CPU集合的开销
void benchmark_cpu_iteration(void) {
    int iterations = 1000;
    ktime_t start, end;
    s64 duration;
    int cpu, count;

    // 测试 for_each_possible_cpu
    start = ktime_get();
    for (int i = 0; i < iterations; i++) {
        count = 0;
        for_each_possible_cpu(cpu) {
            count++;  // 简单操作
        }
    }
    end = ktime_get();
    duration = ktime_to_ns(ktime_sub(end, start));
    printk("for_each_possible_cpu: %lld ns (%d CPUs scanned)\n",
           duration / iterations, count);

    // 测试 for_each_online_cpu  
    start = ktime_get();
    for (int i = 0; i < iterations; i++) {
        count = 0;
        for_each_online_cpu(cpu) {
            count++;
        }
    }
    end = ktime_get();
    duration = ktime_to_ns(ktime_sub(end, start));
    printk("for_each_online_cpu: %lld ns (%d CPUs scanned)\n",
           duration / iterations, count);

    printk("Performance difference: for_each_online_cpu is typically faster\n");
}

/* 典型结果：
 * for_each_possible_cpu: 1500 ns (8 CPUs scanned)
 * for_each_online_cpu: 900 ns (6 CPUs scanned)  
 * 
 * for_each_online_cpu更快，因为扫描的CPU更少
 */

5.2 正确性验证

// 验证不同遍历的正确性
void verify_cpu_iteration_correctness(void) {
    int cpu, possible_count = 0, online_count = 0;

    printk("=== CPU Iteration Correctness Verification ===\n");

    // 统计各类CPU数量
    for_each_possible_cpu(cpu) {
        possible_count++;
        printk("Possible CPU %d: online=%s, present=%s\n",
               cpu, cpu_online(cpu) ? "yes" : "no",
               cpu_present(cpu) ? "yes" : "no");
    }

    for_each_online_cpu(cpu) {
        online_count++;
        // 验证：在线CPU必须是可能的CPU
        if (!cpu_possible(cpu)) {
            printk(KERN_ERR "ERROR: Online CPU %d is not possible!\n", cpu);
        }
    }

    printk("Verification results:\n");
    printk("  Possible CPUs: %d (num_possible_cpus=%d)\n", 
           possible_count, num_possible_cpus());
    printk("  Online CPUs: %d (num_online_cpus=%d)\n",
           online_count, num_online_cpus());

    // 验证关系：online ⊆ possible
    if (online_count <= possible_count) {
        printk("  ✓ Correct: online_cpus ⊆ possible_cpus\n");
    } else {
        printk("  ✗ ERROR: More online CPUs than possible CPUs!\n");
    }
}

6. 最佳实践总结

6.1 选择决策流程图

// 使用决策辅助函数
enum cpu_iteration_type {
    ITER_POSSIBLE,   // for_each_possible_cpu
    ITER_ONLINE,     // for_each_online_cpu
    ITER_PRESENT     // for_each_present_cpu
};

enum cpu_iteration_type choose_iteration_type(const char *operation) {
    // 初始化/清理操作 -> possible
    if (strstr(operation, "init") || strstr(operation, "cleanup") ||
        strstr(operation, "alloc") || strstr(operation, "free")) {
        return ITER_POSSIBLE;
    }

    // 统计/监控操作 -> online
    if (strstr(operation, "stat") || strstr(operation, "monitor") ||
        strstr(operation, "collect") || strstr(operation, "report")) {
        return ITER_ONLINE;
    }

    // 工作分配 -> online
    if (strstr(operation, "schedule") || strstr(operation, "balance") ||
        strstr(operation, "assign")) {
        return ITER_ONLINE;
    }

    // 硬件相关 -> present
    if (strstr(operation, "hardware") || strstr(operation, "detect")) {
        return ITER_PRESENT;
    }

    // 默认：在线CPU
    return ITER_ONLINE;
}

// 使用示例
void smart_cpu_iteration(const char *operation) {
    enum cpu_iteration_type type = choose_iteration_type(operation);
    int cpu;

    switch (type) {
    case ITER_POSSIBLE:
        printk("Using for_each_possible_cpu for: %s\n", operation);
        for_each_possible_cpu(cpu) {
            handle_cpu_operation(cpu, operation);
        }
        break;

    case ITER_ONLINE:
        printk("Using for_each_online_cpu for: %s\n", operation);
        for_each_online_cpu(cpu) {
            handle_cpu_operation(cpu, operation);
        }
        break;

    case ITER_PRESENT:
        printk("Using for_each_present_cpu for: %s\n", operation);
        for_each_present_cpu(cpu) {
            handle_cpu_operation(cpu, operation);
        }
        break;
    }
}

6.2 综合使用模式

// 综合示例：完整的per-CPU子系统
struct percpu_subsystem {
    const char *name;
    bool initialized;
    atomic_t ref_count;
};

DEFINE_PER_CPU(struct subsystem_data, subsys_data);
static struct percpu_subsystem subsystem = {
    .name = "example_subsystem",
    .initialized = false,
    .ref_count = ATOMIC_INIT(0)
};

// 1. 初始化阶段 - for_each_possible_cpu
static int __init subsystem_init(void) {
    int cpu;

    printk("Initializing %s for all possible CPUs\n", subsystem.name);

    for_each_possible_cpu(cpu) {
        struct subsystem_data *data = per_cpu_ptr(&subsys_data, cpu);

        data->cpu_id = cpu;
        data->state = SUBSYS_STATE_INIT;
        data->buffer = kzalloc(DATA_BUFFER_SIZE, GFP_KERNEL);
        if (!data->buffer) {
            goto cleanup_buffers;
        }

        atomic_set(&data->usage_count, 0);
        spin_lock_init(&data->lock);
        INIT_LIST_HEAD(&data->pending_work);

        printk("  CPU %d: %s\n", cpu, 
               cpu_online(cpu) ? "online" : "offline");
    }

    subsystem.initialized = true;
    printk("%s initialization completed\n", subsystem.name);
    return 0;

cleanup_buffers:
    for_each_possible_cpu(cpu) {
        struct subsystem_data *data = per_cpu_ptr(&subsys_data, cpu);
        if (data->buffer) {
            kfree(data->buffer);
            data->buffer = NULL;
        }
    }
    return -ENOMEM;
}

// 2. 运行时统计 - for_each_online_cpu  
void subsystem_get_statistics(struct global_stats *stats) {
    int cpu;

    if (!subsystem.initialized) {
        memset(stats, 0, sizeof(*stats));
        return;
    }

    memset(stats, 0, sizeof(*stats));
    stats->timestamp = jiffies;

    for_each_online_cpu(cpu) {
        struct subsystem_data *data = per_cpu_ptr(&subsys_data, cpu);

        stats->total_usage += atomic_read(&data->usage_count);
        stats->total_pending += data->pending_count;

        if (data->state == SUBSYS_STATE_ERROR) {
            stats->error_cpus++;
        }

        stats->active_cpus++;
    }

    stats->total_cpus = num_possible_cpus();
    stats->online_cpus = num_online_cpus();
}

// 3. 负载均衡 - for_each_online_cpu
int subsystem_find_best_cpu(void) {
    int cpu, best_cpu = -1;
    int min_usage = INT_MAX;

    if (!subsystem.initialized) {
        return -1;
    }

    for_each_online_cpu(cpu) {
        struct subsystem_data *data = per_cpu_ptr(&subsys_data, cpu);
        int usage = atomic_read(&data->usage_count) + data->pending_count;

        if (data->state == SUBSYS_STATE_ACTIVE && usage < min_usage) {
            min_usage = usage;
            best_cpu = cpu;
        }
    }

    return best_cpu;
}

// 4. 运行时更新 - 当前CPU
void subsystem_process_work(struct work_item *work) {
    struct subsystem_data *data;

    if (!subsystem.initialized) {
        return;
    }

    // 在当前CPU上处理工作（当前CPU肯定是在线的）
    atomic_inc(&subsystem.ref_count);

    preempt_disable();
    data = this_cpu_ptr(&subsys_data);

    atomic_inc(&data->usage_count);
    data->last_work_time = jiffies;

    // 处理工作项
    process_work_item(data, work);

    atomic_dec(&data->usage_count);
    preempt_enable();

    atomic_dec(&subsystem.ref_count);
}

// 5. 清理阶段 - for_each_possible_cpu
static void __exit subsystem_exit(void) {
    int cpu;

    printk("Cleaning up %s\n", subsystem.name);

    // 等待所有工作完成
    while (atomic_read(&subsystem.ref_count) > 0) {
        schedule_timeout(1);
    }

    // 清理所有可能的CPU
    for_each_possible_cpu(cpu) {
        struct subsystem_data *data = per_cpu_ptr(&subsys_data, cpu);

        // 清理pending工作
        if (!list_empty(&data->pending_work)) {
            struct work_item *item, *tmp;
            list_for_each_entry_safe(item, tmp, &data->pending_work, list) {
                list_del(&item->list);
                kfree(item);
            }
        }

        // 释放缓冲区
        if (data->buffer) {
            kfree(data->buffer);
            data->buffer = NULL;
        }

        data->state = SUBSYS_STATE_CLEANUP;
    }

    subsystem.initialized = false;
    printk("%s cleanup completed\n", subsystem.name);
}

module_init(subsystem_init);
module_exit(subsystem_exit);

总结

关键区别：

操作类型	使用接口	原因
初始化	`for_each_possible_cpu`	必须为所有可能的CPU准备数据，即使当前离线
清理	`for_each_possible_cpu`	必须清理所有CPU的资源，避免内存泄漏
统计收集	`for_each_online_cpu`	只统计活跃的CPU，离线CPU数据无意义
工作分配	`for_each_online_cpu`	只能向在线CPU分配工作
负载均衡	`for_each_online_cpu`	离线CPU不参与负载均衡
调试显示	`for_each_possible_cpu`	显示完整的系统状态

选择原则：

需要完整覆盖（初始化、清理）→ for_each_possible_cpu
需要当前状态（统计、负载）→ for_each_online_cpu
性能敏感的操作 → for_each_online_cpu（扫描CPU更少）

本文版权归原作者zhaofujian所有，采用 CC BY-NC-ND 4.0 协议进行许可，转载请注明出处。

linux 内核开发之 for_each_online_cpu vs for_each_possible_cpu 详细对比

1. 基本概念区别

1.1 Online CPU vs Possible CPU

1.2 CPU状态转换

2. 使用场景差异

2.1 初始化场景 – 使用 for_each_possible_cpu

2.2 清理场景 – 使用 for_each_possible_cpu

2.3 统计收集场景 – 使用 for_each_online_cpu

2.4 负载均衡场景 – 使用 for_each_online_cpu

3. CPU热插拔场景

3.3 CPU热插拔通知处理

4. 实际应用对比示例

4.1 网络子系统统计

4.2 内存分配器per-CPU缓存

5. 性能和行为差异

5.1 性能测试对比

5.2 正确性验证

6. 最佳实践总结

6.1 选择决策流程图

6.2 综合使用模式

总结

发表评论取消回复

1. 基本概念区别

1.1 Online CPU vs Possible CPU

1.2 CPU状态转换

2. 使用场景差异

2.1 初始化场景 – 使用 for_each_possible_cpu

2.2 清理场景 – 使用 for_each_possible_cpu

2.3 统计收集场景 – 使用 for_each_online_cpu

2.4 负载均衡场景 – 使用 for_each_online_cpu

3. CPU热插拔场景

3.3 CPU热插拔通知处理

4. 实际应用对比示例

4.1 网络子系统统计

4.2 内存分配器per-CPU缓存

5. 性能和行为差异

5.1 性能测试对比

5.2 正确性验证

6. 最佳实践总结

6.1 选择决策流程图

6.2 综合使用模式

总结

发表评论 取消回复

发表评论取消回复