Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /* cpumap.c: used for optimizing CPU assignment
0003  *
0004  * Copyright (C) 2009 Hong H. Pham <hong.pham@windriver.com>
0005  */
0006 
0007 #include <linux/export.h>
0008 #include <linux/slab.h>
0009 #include <linux/kernel.h>
0010 #include <linux/cpumask.h>
0011 #include <linux/spinlock.h>
0012 #include <asm/cpudata.h>
0013 #include "cpumap.h"
0014 
0015 
0016 enum {
0017     CPUINFO_LVL_ROOT = 0,
0018     CPUINFO_LVL_NODE,
0019     CPUINFO_LVL_CORE,
0020     CPUINFO_LVL_PROC,
0021     CPUINFO_LVL_MAX,
0022 };
0023 
0024 enum {
0025     ROVER_NO_OP              = 0,
0026     /* Increment rover every time level is visited */
0027     ROVER_INC_ON_VISIT       = 1 << 0,
0028     /* Increment parent's rover every time rover wraps around */
0029     ROVER_INC_PARENT_ON_LOOP = 1 << 1,
0030 };
0031 
0032 struct cpuinfo_node {
0033     int id;
0034     int level;
0035     int num_cpus;    /* Number of CPUs in this hierarchy */
0036     int parent_index;
0037     int child_start; /* Array index of the first child node */
0038     int child_end;   /* Array index of the last child node */
0039     int rover;       /* Child node iterator */
0040 };
0041 
0042 struct cpuinfo_level {
0043     int start_index; /* Index of first node of a level in a cpuinfo tree */
0044     int end_index;   /* Index of last node of a level in a cpuinfo tree */
0045     int num_nodes;   /* Number of nodes in a level in a cpuinfo tree */
0046 };
0047 
0048 struct cpuinfo_tree {
0049     int total_nodes;
0050 
0051     /* Offsets into nodes[] for each level of the tree */
0052     struct cpuinfo_level level[CPUINFO_LVL_MAX];
0053     struct cpuinfo_node  nodes[];
0054 };
0055 
0056 
0057 static struct cpuinfo_tree *cpuinfo_tree;
0058 
0059 static u16 cpu_distribution_map[NR_CPUS];
0060 static DEFINE_SPINLOCK(cpu_map_lock);
0061 
0062 
0063 /* Niagara optimized cpuinfo tree traversal. */
0064 static const int niagara_iterate_method[] = {
0065     [CPUINFO_LVL_ROOT] = ROVER_NO_OP,
0066 
0067     /* Strands (or virtual CPUs) within a core may not run concurrently
0068      * on the Niagara, as instruction pipeline(s) are shared.  Distribute
0069      * work to strands in different cores first for better concurrency.
0070      * Go to next NUMA node when all cores are used.
0071      */
0072     [CPUINFO_LVL_NODE] = ROVER_INC_ON_VISIT|ROVER_INC_PARENT_ON_LOOP,
0073 
0074     /* Strands are grouped together by proc_id in cpuinfo_sparc, i.e.
0075      * a proc_id represents an instruction pipeline.  Distribute work to
0076      * strands in different proc_id groups if the core has multiple
0077      * instruction pipelines (e.g. the Niagara 2/2+ has two).
0078      */
0079     [CPUINFO_LVL_CORE] = ROVER_INC_ON_VISIT,
0080 
0081     /* Pick the next strand in the proc_id group. */
0082     [CPUINFO_LVL_PROC] = ROVER_INC_ON_VISIT,
0083 };
0084 
0085 /* Generic cpuinfo tree traversal.  Distribute work round robin across NUMA
0086  * nodes.
0087  */
0088 static const int generic_iterate_method[] = {
0089     [CPUINFO_LVL_ROOT] = ROVER_INC_ON_VISIT,
0090     [CPUINFO_LVL_NODE] = ROVER_NO_OP,
0091     [CPUINFO_LVL_CORE] = ROVER_INC_PARENT_ON_LOOP,
0092     [CPUINFO_LVL_PROC] = ROVER_INC_ON_VISIT|ROVER_INC_PARENT_ON_LOOP,
0093 };
0094 
0095 
0096 static int cpuinfo_id(int cpu, int level)
0097 {
0098     int id;
0099 
0100     switch (level) {
0101     case CPUINFO_LVL_ROOT:
0102         id = 0;
0103         break;
0104     case CPUINFO_LVL_NODE:
0105         id = cpu_to_node(cpu);
0106         break;
0107     case CPUINFO_LVL_CORE:
0108         id = cpu_data(cpu).core_id;
0109         break;
0110     case CPUINFO_LVL_PROC:
0111         id = cpu_data(cpu).proc_id;
0112         break;
0113     default:
0114         id = -EINVAL;
0115     }
0116     return id;
0117 }
0118 
0119 /*
0120  * Enumerate the CPU information in __cpu_data to determine the start index,
0121  * end index, and number of nodes for each level in the cpuinfo tree.  The
0122  * total number of cpuinfo nodes required to build the tree is returned.
0123  */
0124 static int enumerate_cpuinfo_nodes(struct cpuinfo_level *tree_level)
0125 {
0126     int prev_id[CPUINFO_LVL_MAX];
0127     int i, n, num_nodes;
0128 
0129     for (i = CPUINFO_LVL_ROOT; i < CPUINFO_LVL_MAX; i++) {
0130         struct cpuinfo_level *lv = &tree_level[i];
0131 
0132         prev_id[i] = -1;
0133         lv->start_index = lv->end_index = lv->num_nodes = 0;
0134     }
0135 
0136     num_nodes = 1; /* Include the root node */
0137 
0138     for (i = 0; i < num_possible_cpus(); i++) {
0139         if (!cpu_online(i))
0140             continue;
0141 
0142         n = cpuinfo_id(i, CPUINFO_LVL_NODE);
0143         if (n > prev_id[CPUINFO_LVL_NODE]) {
0144             tree_level[CPUINFO_LVL_NODE].num_nodes++;
0145             prev_id[CPUINFO_LVL_NODE] = n;
0146             num_nodes++;
0147         }
0148         n = cpuinfo_id(i, CPUINFO_LVL_CORE);
0149         if (n > prev_id[CPUINFO_LVL_CORE]) {
0150             tree_level[CPUINFO_LVL_CORE].num_nodes++;
0151             prev_id[CPUINFO_LVL_CORE] = n;
0152             num_nodes++;
0153         }
0154         n = cpuinfo_id(i, CPUINFO_LVL_PROC);
0155         if (n > prev_id[CPUINFO_LVL_PROC]) {
0156             tree_level[CPUINFO_LVL_PROC].num_nodes++;
0157             prev_id[CPUINFO_LVL_PROC] = n;
0158             num_nodes++;
0159         }
0160     }
0161 
0162     tree_level[CPUINFO_LVL_ROOT].num_nodes = 1;
0163 
0164     n = tree_level[CPUINFO_LVL_NODE].num_nodes;
0165     tree_level[CPUINFO_LVL_NODE].start_index = 1;
0166     tree_level[CPUINFO_LVL_NODE].end_index   = n;
0167 
0168     n++;
0169     tree_level[CPUINFO_LVL_CORE].start_index = n;
0170     n += tree_level[CPUINFO_LVL_CORE].num_nodes;
0171     tree_level[CPUINFO_LVL_CORE].end_index   = n - 1;
0172 
0173     tree_level[CPUINFO_LVL_PROC].start_index = n;
0174     n += tree_level[CPUINFO_LVL_PROC].num_nodes;
0175     tree_level[CPUINFO_LVL_PROC].end_index   = n - 1;
0176 
0177     return num_nodes;
0178 }
0179 
0180 /* Build a tree representation of the CPU hierarchy using the per CPU
0181  * information in __cpu_data.  Entries in __cpu_data[0..NR_CPUS] are
0182  * assumed to be sorted in ascending order based on node, core_id, and
0183  * proc_id (in order of significance).
0184  */
0185 static struct cpuinfo_tree *build_cpuinfo_tree(void)
0186 {
0187     struct cpuinfo_tree *new_tree;
0188     struct cpuinfo_node *node;
0189     struct cpuinfo_level tmp_level[CPUINFO_LVL_MAX];
0190     int num_cpus[CPUINFO_LVL_MAX];
0191     int level_rover[CPUINFO_LVL_MAX];
0192     int prev_id[CPUINFO_LVL_MAX];
0193     int n, id, cpu, prev_cpu, last_cpu, level;
0194 
0195     n = enumerate_cpuinfo_nodes(tmp_level);
0196 
0197     new_tree = kzalloc(struct_size(new_tree, nodes, n), GFP_ATOMIC);
0198     if (!new_tree)
0199         return NULL;
0200 
0201     new_tree->total_nodes = n;
0202     memcpy(&new_tree->level, tmp_level, sizeof(tmp_level));
0203 
0204     prev_cpu = cpu = cpumask_first(cpu_online_mask);
0205 
0206     /* Initialize all levels in the tree with the first CPU */
0207     for (level = CPUINFO_LVL_PROC; level >= CPUINFO_LVL_ROOT; level--) {
0208         n = new_tree->level[level].start_index;
0209 
0210         level_rover[level] = n;
0211         node = &new_tree->nodes[n];
0212 
0213         id = cpuinfo_id(cpu, level);
0214         if (unlikely(id < 0)) {
0215             kfree(new_tree);
0216             return NULL;
0217         }
0218         node->id = id;
0219         node->level = level;
0220         node->num_cpus = 1;
0221 
0222         node->parent_index = (level > CPUINFO_LVL_ROOT)
0223             ? new_tree->level[level - 1].start_index : -1;
0224 
0225         node->child_start = node->child_end = node->rover =
0226             (level == CPUINFO_LVL_PROC)
0227             ? cpu : new_tree->level[level + 1].start_index;
0228 
0229         prev_id[level] = node->id;
0230         num_cpus[level] = 1;
0231     }
0232 
0233     for (last_cpu = (num_possible_cpus() - 1); last_cpu >= 0; last_cpu--) {
0234         if (cpu_online(last_cpu))
0235             break;
0236     }
0237 
0238     while (++cpu <= last_cpu) {
0239         if (!cpu_online(cpu))
0240             continue;
0241 
0242         for (level = CPUINFO_LVL_PROC; level >= CPUINFO_LVL_ROOT;
0243              level--) {
0244             id = cpuinfo_id(cpu, level);
0245             if (unlikely(id < 0)) {
0246                 kfree(new_tree);
0247                 return NULL;
0248             }
0249 
0250             if ((id != prev_id[level]) || (cpu == last_cpu)) {
0251                 prev_id[level] = id;
0252                 node = &new_tree->nodes[level_rover[level]];
0253                 node->num_cpus = num_cpus[level];
0254                 num_cpus[level] = 1;
0255 
0256                 if (cpu == last_cpu)
0257                     node->num_cpus++;
0258 
0259                 /* Connect tree node to parent */
0260                 if (level == CPUINFO_LVL_ROOT)
0261                     node->parent_index = -1;
0262                 else
0263                     node->parent_index =
0264                         level_rover[level - 1];
0265 
0266                 if (level == CPUINFO_LVL_PROC) {
0267                     node->child_end =
0268                         (cpu == last_cpu) ? cpu : prev_cpu;
0269                 } else {
0270                     node->child_end =
0271                         level_rover[level + 1] - 1;
0272                 }
0273 
0274                 /* Initialize the next node in the same level */
0275                 n = ++level_rover[level];
0276                 if (n <= new_tree->level[level].end_index) {
0277                     node = &new_tree->nodes[n];
0278                     node->id = id;
0279                     node->level = level;
0280 
0281                     /* Connect node to child */
0282                     node->child_start = node->child_end =
0283                     node->rover =
0284                         (level == CPUINFO_LVL_PROC)
0285                         ? cpu : level_rover[level + 1];
0286                 }
0287             } else
0288                 num_cpus[level]++;
0289         }
0290         prev_cpu = cpu;
0291     }
0292 
0293     return new_tree;
0294 }
0295 
0296 static void increment_rover(struct cpuinfo_tree *t, int node_index,
0297                             int root_index, const int *rover_inc_table)
0298 {
0299     struct cpuinfo_node *node = &t->nodes[node_index];
0300     int top_level, level;
0301 
0302     top_level = t->nodes[root_index].level;
0303     for (level = node->level; level >= top_level; level--) {
0304         node->rover++;
0305         if (node->rover <= node->child_end)
0306             return;
0307 
0308         node->rover = node->child_start;
0309         /* If parent's rover does not need to be adjusted, stop here. */
0310         if ((level == top_level) ||
0311             !(rover_inc_table[level] & ROVER_INC_PARENT_ON_LOOP))
0312             return;
0313 
0314         node = &t->nodes[node->parent_index];
0315     }
0316 }
0317 
0318 static int iterate_cpu(struct cpuinfo_tree *t, unsigned int root_index)
0319 {
0320     const int *rover_inc_table;
0321     int level, new_index, index = root_index;
0322 
0323     switch (sun4v_chip_type) {
0324     case SUN4V_CHIP_NIAGARA1:
0325     case SUN4V_CHIP_NIAGARA2:
0326     case SUN4V_CHIP_NIAGARA3:
0327     case SUN4V_CHIP_NIAGARA4:
0328     case SUN4V_CHIP_NIAGARA5:
0329     case SUN4V_CHIP_SPARC_M6:
0330     case SUN4V_CHIP_SPARC_M7:
0331     case SUN4V_CHIP_SPARC_M8:
0332     case SUN4V_CHIP_SPARC_SN:
0333     case SUN4V_CHIP_SPARC64X:
0334         rover_inc_table = niagara_iterate_method;
0335         break;
0336     default:
0337         rover_inc_table = generic_iterate_method;
0338     }
0339 
0340     for (level = t->nodes[root_index].level; level < CPUINFO_LVL_MAX;
0341          level++) {
0342         new_index = t->nodes[index].rover;
0343         if (rover_inc_table[level] & ROVER_INC_ON_VISIT)
0344             increment_rover(t, index, root_index, rover_inc_table);
0345 
0346         index = new_index;
0347     }
0348     return index;
0349 }
0350 
0351 static void _cpu_map_rebuild(void)
0352 {
0353     int i;
0354 
0355     if (cpuinfo_tree) {
0356         kfree(cpuinfo_tree);
0357         cpuinfo_tree = NULL;
0358     }
0359 
0360     cpuinfo_tree = build_cpuinfo_tree();
0361     if (!cpuinfo_tree)
0362         return;
0363 
0364     /* Build CPU distribution map that spans all online CPUs.  No need
0365      * to check if the CPU is online, as that is done when the cpuinfo
0366      * tree is being built.
0367      */
0368     for (i = 0; i < cpuinfo_tree->nodes[0].num_cpus; i++)
0369         cpu_distribution_map[i] = iterate_cpu(cpuinfo_tree, 0);
0370 }
0371 
0372 /* Fallback if the cpuinfo tree could not be built.  CPU mapping is linear
0373  * round robin.
0374  */
0375 static int simple_map_to_cpu(unsigned int index)
0376 {
0377     int i, end, cpu_rover;
0378 
0379     cpu_rover = 0;
0380     end = index % num_online_cpus();
0381     for (i = 0; i < num_possible_cpus(); i++) {
0382         if (cpu_online(cpu_rover)) {
0383             if (cpu_rover >= end)
0384                 return cpu_rover;
0385 
0386             cpu_rover++;
0387         }
0388     }
0389 
0390     /* Impossible, since num_online_cpus() <= num_possible_cpus() */
0391     return cpumask_first(cpu_online_mask);
0392 }
0393 
0394 static int _map_to_cpu(unsigned int index)
0395 {
0396     struct cpuinfo_node *root_node;
0397 
0398     if (unlikely(!cpuinfo_tree)) {
0399         _cpu_map_rebuild();
0400         if (!cpuinfo_tree)
0401             return simple_map_to_cpu(index);
0402     }
0403 
0404     root_node = &cpuinfo_tree->nodes[0];
0405 #ifdef CONFIG_HOTPLUG_CPU
0406     if (unlikely(root_node->num_cpus != num_online_cpus())) {
0407         _cpu_map_rebuild();
0408         if (!cpuinfo_tree)
0409             return simple_map_to_cpu(index);
0410     }
0411 #endif
0412     return cpu_distribution_map[index % root_node->num_cpus];
0413 }
0414 
0415 int map_to_cpu(unsigned int index)
0416 {
0417     int mapped_cpu;
0418     unsigned long flag;
0419 
0420     spin_lock_irqsave(&cpu_map_lock, flag);
0421     mapped_cpu = _map_to_cpu(index);
0422 
0423 #ifdef CONFIG_HOTPLUG_CPU
0424     while (unlikely(!cpu_online(mapped_cpu)))
0425         mapped_cpu = _map_to_cpu(index);
0426 #endif
0427     spin_unlock_irqrestore(&cpu_map_lock, flag);
0428     return mapped_cpu;
0429 }
0430 EXPORT_SYMBOL(map_to_cpu);
0431 
0432 void cpu_map_rebuild(void)
0433 {
0434     unsigned long flag;
0435 
0436     spin_lock_irqsave(&cpu_map_lock, flag);
0437     _cpu_map_rebuild();
0438     spin_unlock_irqrestore(&cpu_map_lock, flag);
0439 }