Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 #include <api/fs/fs.h>
0003 #include "cpumap.h"
0004 #include "debug.h"
0005 #include "event.h"
0006 #include <assert.h>
0007 #include <dirent.h>
0008 #include <stdio.h>
0009 #include <stdlib.h>
0010 #include <linux/bitmap.h>
0011 #include "asm/bug.h"
0012 
0013 #include <linux/ctype.h>
0014 #include <linux/zalloc.h>
0015 
0016 static struct perf_cpu max_cpu_num;
0017 static struct perf_cpu max_present_cpu_num;
0018 static int max_node_num;
0019 /**
0020  * The numa node X as read from /sys/devices/system/node/nodeX indexed by the
0021  * CPU number.
0022  */
0023 static int *cpunode_map;
0024 
0025 bool perf_record_cpu_map_data__test_bit(int i,
0026                     const struct perf_record_cpu_map_data *data)
0027 {
0028     int bit_word32 = i / 32;
0029     __u32 bit_mask32 = 1U << (i & 31);
0030     int bit_word64 = i / 64;
0031     __u64 bit_mask64 = ((__u64)1) << (i & 63);
0032 
0033     return (data->mask32_data.long_size == 4)
0034         ? (bit_word32 < data->mask32_data.nr) &&
0035         (data->mask32_data.mask[bit_word32] & bit_mask32) != 0
0036         : (bit_word64 < data->mask64_data.nr) &&
0037         (data->mask64_data.mask[bit_word64] & bit_mask64) != 0;
0038 }
0039 
0040 /* Read ith mask value from data into the given 64-bit sized bitmap */
0041 static void perf_record_cpu_map_data__read_one_mask(const struct perf_record_cpu_map_data *data,
0042                             int i, unsigned long *bitmap)
0043 {
0044 #if __SIZEOF_LONG__ == 8
0045     if (data->mask32_data.long_size == 4)
0046         bitmap[0] = data->mask32_data.mask[i];
0047     else
0048         bitmap[0] = data->mask64_data.mask[i];
0049 #else
0050     if (data->mask32_data.long_size == 4) {
0051         bitmap[0] = data->mask32_data.mask[i];
0052         bitmap[1] = 0;
0053     } else {
0054 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
0055         bitmap[0] = (unsigned long)(data->mask64_data.mask[i] >> 32);
0056         bitmap[1] = (unsigned long)data->mask64_data.mask[i];
0057 #else
0058         bitmap[0] = (unsigned long)data->mask64_data.mask[i];
0059         bitmap[1] = (unsigned long)(data->mask64_data.mask[i] >> 32);
0060 #endif
0061     }
0062 #endif
0063 }
0064 static struct perf_cpu_map *cpu_map__from_entries(const struct perf_record_cpu_map_data *data)
0065 {
0066     struct perf_cpu_map *map;
0067 
0068     map = perf_cpu_map__empty_new(data->cpus_data.nr);
0069     if (map) {
0070         unsigned i;
0071 
0072         for (i = 0; i < data->cpus_data.nr; i++) {
0073             /*
0074              * Special treatment for -1, which is not real cpu number,
0075              * and we need to use (int) -1 to initialize map[i],
0076              * otherwise it would become 65535.
0077              */
0078             if (data->cpus_data.cpu[i] == (u16) -1)
0079                 map->map[i].cpu = -1;
0080             else
0081                 map->map[i].cpu = (int) data->cpus_data.cpu[i];
0082         }
0083     }
0084 
0085     return map;
0086 }
0087 
0088 static struct perf_cpu_map *cpu_map__from_mask(const struct perf_record_cpu_map_data *data)
0089 {
0090     DECLARE_BITMAP(local_copy, 64);
0091     int weight = 0, mask_nr = data->mask32_data.nr;
0092     struct perf_cpu_map *map;
0093 
0094     for (int i = 0; i < mask_nr; i++) {
0095         perf_record_cpu_map_data__read_one_mask(data, i, local_copy);
0096         weight += bitmap_weight(local_copy, 64);
0097     }
0098 
0099     map = perf_cpu_map__empty_new(weight);
0100     if (!map)
0101         return NULL;
0102 
0103     for (int i = 0, j = 0; i < mask_nr; i++) {
0104         int cpus_per_i = (i * data->mask32_data.long_size  * BITS_PER_BYTE);
0105         int cpu;
0106 
0107         perf_record_cpu_map_data__read_one_mask(data, i, local_copy);
0108         for_each_set_bit(cpu, local_copy, 64)
0109             map->map[j++].cpu = cpu + cpus_per_i;
0110     }
0111     return map;
0112 
0113 }
0114 
0115 struct perf_cpu_map *cpu_map__new_data(const struct perf_record_cpu_map_data *data)
0116 {
0117     if (data->type == PERF_CPU_MAP__CPUS)
0118         return cpu_map__from_entries(data);
0119     else
0120         return cpu_map__from_mask(data);
0121 }
0122 
0123 size_t cpu_map__fprintf(struct perf_cpu_map *map, FILE *fp)
0124 {
0125 #define BUFSIZE 1024
0126     char buf[BUFSIZE];
0127 
0128     cpu_map__snprint(map, buf, sizeof(buf));
0129     return fprintf(fp, "%s\n", buf);
0130 #undef BUFSIZE
0131 }
0132 
0133 struct perf_cpu_map *perf_cpu_map__empty_new(int nr)
0134 {
0135     struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(int) * nr);
0136 
0137     if (cpus != NULL) {
0138         int i;
0139 
0140         cpus->nr = nr;
0141         for (i = 0; i < nr; i++)
0142             cpus->map[i].cpu = -1;
0143 
0144         refcount_set(&cpus->refcnt, 1);
0145     }
0146 
0147     return cpus;
0148 }
0149 
0150 struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr)
0151 {
0152     struct cpu_aggr_map *cpus = malloc(sizeof(*cpus) + sizeof(struct aggr_cpu_id) * nr);
0153 
0154     if (cpus != NULL) {
0155         int i;
0156 
0157         cpus->nr = nr;
0158         for (i = 0; i < nr; i++)
0159             cpus->map[i] = aggr_cpu_id__empty();
0160 
0161         refcount_set(&cpus->refcnt, 1);
0162     }
0163 
0164     return cpus;
0165 }
0166 
0167 static int cpu__get_topology_int(int cpu, const char *name, int *value)
0168 {
0169     char path[PATH_MAX];
0170 
0171     snprintf(path, PATH_MAX,
0172         "devices/system/cpu/cpu%d/topology/%s", cpu, name);
0173 
0174     return sysfs__read_int(path, value);
0175 }
0176 
0177 int cpu__get_socket_id(struct perf_cpu cpu)
0178 {
0179     int value, ret = cpu__get_topology_int(cpu.cpu, "physical_package_id", &value);
0180     return ret ?: value;
0181 }
0182 
0183 struct aggr_cpu_id aggr_cpu_id__socket(struct perf_cpu cpu, void *data __maybe_unused)
0184 {
0185     struct aggr_cpu_id id = aggr_cpu_id__empty();
0186 
0187     id.socket = cpu__get_socket_id(cpu);
0188     return id;
0189 }
0190 
0191 static int aggr_cpu_id__cmp(const void *a_pointer, const void *b_pointer)
0192 {
0193     struct aggr_cpu_id *a = (struct aggr_cpu_id *)a_pointer;
0194     struct aggr_cpu_id *b = (struct aggr_cpu_id *)b_pointer;
0195 
0196     if (a->node != b->node)
0197         return a->node - b->node;
0198     else if (a->socket != b->socket)
0199         return a->socket - b->socket;
0200     else if (a->die != b->die)
0201         return a->die - b->die;
0202     else if (a->core != b->core)
0203         return a->core - b->core;
0204     else
0205         return a->thread - b->thread;
0206 }
0207 
0208 struct cpu_aggr_map *cpu_aggr_map__new(const struct perf_cpu_map *cpus,
0209                        aggr_cpu_id_get_t get_id,
0210                        void *data)
0211 {
0212     int idx;
0213     struct perf_cpu cpu;
0214     struct cpu_aggr_map *c = cpu_aggr_map__empty_new(cpus->nr);
0215 
0216     if (!c)
0217         return NULL;
0218 
0219     /* Reset size as it may only be partially filled */
0220     c->nr = 0;
0221 
0222     perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
0223         bool duplicate = false;
0224         struct aggr_cpu_id cpu_id = get_id(cpu, data);
0225 
0226         for (int j = 0; j < c->nr; j++) {
0227             if (aggr_cpu_id__equal(&cpu_id, &c->map[j])) {
0228                 duplicate = true;
0229                 break;
0230             }
0231         }
0232         if (!duplicate) {
0233             c->map[c->nr] = cpu_id;
0234             c->nr++;
0235         }
0236     }
0237     /* Trim. */
0238     if (c->nr != cpus->nr) {
0239         struct cpu_aggr_map *trimmed_c =
0240             realloc(c,
0241                 sizeof(struct cpu_aggr_map) + sizeof(struct aggr_cpu_id) * c->nr);
0242 
0243         if (trimmed_c)
0244             c = trimmed_c;
0245     }
0246     /* ensure we process id in increasing order */
0247     qsort(c->map, c->nr, sizeof(struct aggr_cpu_id), aggr_cpu_id__cmp);
0248 
0249     return c;
0250 
0251 }
0252 
0253 int cpu__get_die_id(struct perf_cpu cpu)
0254 {
0255     int value, ret = cpu__get_topology_int(cpu.cpu, "die_id", &value);
0256 
0257     return ret ?: value;
0258 }
0259 
0260 struct aggr_cpu_id aggr_cpu_id__die(struct perf_cpu cpu, void *data)
0261 {
0262     struct aggr_cpu_id id;
0263     int die;
0264 
0265     die = cpu__get_die_id(cpu);
0266     /* There is no die_id on legacy system. */
0267     if (die == -1)
0268         die = 0;
0269 
0270     /*
0271      * die_id is relative to socket, so start
0272      * with the socket ID and then add die to
0273      * make a unique ID.
0274      */
0275     id = aggr_cpu_id__socket(cpu, data);
0276     if (aggr_cpu_id__is_empty(&id))
0277         return id;
0278 
0279     id.die = die;
0280     return id;
0281 }
0282 
0283 int cpu__get_core_id(struct perf_cpu cpu)
0284 {
0285     int value, ret = cpu__get_topology_int(cpu.cpu, "core_id", &value);
0286     return ret ?: value;
0287 }
0288 
0289 struct aggr_cpu_id aggr_cpu_id__core(struct perf_cpu cpu, void *data)
0290 {
0291     struct aggr_cpu_id id;
0292     int core = cpu__get_core_id(cpu);
0293 
0294     /* aggr_cpu_id__die returns a struct with socket and die set. */
0295     id = aggr_cpu_id__die(cpu, data);
0296     if (aggr_cpu_id__is_empty(&id))
0297         return id;
0298 
0299     /*
0300      * core_id is relative to socket and die, we need a global id.
0301      * So we combine the result from cpu_map__get_die with the core id
0302      */
0303     id.core = core;
0304     return id;
0305 
0306 }
0307 
0308 struct aggr_cpu_id aggr_cpu_id__cpu(struct perf_cpu cpu, void *data)
0309 {
0310     struct aggr_cpu_id id;
0311 
0312     /* aggr_cpu_id__core returns a struct with socket, die and core set. */
0313     id = aggr_cpu_id__core(cpu, data);
0314     if (aggr_cpu_id__is_empty(&id))
0315         return id;
0316 
0317     id.cpu = cpu;
0318     return id;
0319 
0320 }
0321 
0322 struct aggr_cpu_id aggr_cpu_id__node(struct perf_cpu cpu, void *data __maybe_unused)
0323 {
0324     struct aggr_cpu_id id = aggr_cpu_id__empty();
0325 
0326     id.node = cpu__get_node(cpu);
0327     return id;
0328 }
0329 
0330 /* setup simple routines to easily access node numbers given a cpu number */
0331 static int get_max_num(char *path, int *max)
0332 {
0333     size_t num;
0334     char *buf;
0335     int err = 0;
0336 
0337     if (filename__read_str(path, &buf, &num))
0338         return -1;
0339 
0340     buf[num] = '\0';
0341 
0342     /* start on the right, to find highest node num */
0343     while (--num) {
0344         if ((buf[num] == ',') || (buf[num] == '-')) {
0345             num++;
0346             break;
0347         }
0348     }
0349     if (sscanf(&buf[num], "%d", max) < 1) {
0350         err = -1;
0351         goto out;
0352     }
0353 
0354     /* convert from 0-based to 1-based */
0355     (*max)++;
0356 
0357 out:
0358     free(buf);
0359     return err;
0360 }
0361 
0362 /* Determine highest possible cpu in the system for sparse allocation */
0363 static void set_max_cpu_num(void)
0364 {
0365     const char *mnt;
0366     char path[PATH_MAX];
0367     int ret = -1;
0368 
0369     /* set up default */
0370     max_cpu_num.cpu = 4096;
0371     max_present_cpu_num.cpu = 4096;
0372 
0373     mnt = sysfs__mountpoint();
0374     if (!mnt)
0375         goto out;
0376 
0377     /* get the highest possible cpu number for a sparse allocation */
0378     ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/possible", mnt);
0379     if (ret >= PATH_MAX) {
0380         pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX);
0381         goto out;
0382     }
0383 
0384     ret = get_max_num(path, &max_cpu_num.cpu);
0385     if (ret)
0386         goto out;
0387 
0388     /* get the highest present cpu number for a sparse allocation */
0389     ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/present", mnt);
0390     if (ret >= PATH_MAX) {
0391         pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX);
0392         goto out;
0393     }
0394 
0395     ret = get_max_num(path, &max_present_cpu_num.cpu);
0396 
0397 out:
0398     if (ret)
0399         pr_err("Failed to read max cpus, using default of %d\n", max_cpu_num.cpu);
0400 }
0401 
0402 /* Determine highest possible node in the system for sparse allocation */
0403 static void set_max_node_num(void)
0404 {
0405     const char *mnt;
0406     char path[PATH_MAX];
0407     int ret = -1;
0408 
0409     /* set up default */
0410     max_node_num = 8;
0411 
0412     mnt = sysfs__mountpoint();
0413     if (!mnt)
0414         goto out;
0415 
0416     /* get the highest possible cpu number for a sparse allocation */
0417     ret = snprintf(path, PATH_MAX, "%s/devices/system/node/possible", mnt);
0418     if (ret >= PATH_MAX) {
0419         pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX);
0420         goto out;
0421     }
0422 
0423     ret = get_max_num(path, &max_node_num);
0424 
0425 out:
0426     if (ret)
0427         pr_err("Failed to read max nodes, using default of %d\n", max_node_num);
0428 }
0429 
0430 int cpu__max_node(void)
0431 {
0432     if (unlikely(!max_node_num))
0433         set_max_node_num();
0434 
0435     return max_node_num;
0436 }
0437 
0438 struct perf_cpu cpu__max_cpu(void)
0439 {
0440     if (unlikely(!max_cpu_num.cpu))
0441         set_max_cpu_num();
0442 
0443     return max_cpu_num;
0444 }
0445 
0446 struct perf_cpu cpu__max_present_cpu(void)
0447 {
0448     if (unlikely(!max_present_cpu_num.cpu))
0449         set_max_cpu_num();
0450 
0451     return max_present_cpu_num;
0452 }
0453 
0454 
0455 int cpu__get_node(struct perf_cpu cpu)
0456 {
0457     if (unlikely(cpunode_map == NULL)) {
0458         pr_debug("cpu_map not initialized\n");
0459         return -1;
0460     }
0461 
0462     return cpunode_map[cpu.cpu];
0463 }
0464 
0465 static int init_cpunode_map(void)
0466 {
0467     int i;
0468 
0469     set_max_cpu_num();
0470     set_max_node_num();
0471 
0472     cpunode_map = calloc(max_cpu_num.cpu, sizeof(int));
0473     if (!cpunode_map) {
0474         pr_err("%s: calloc failed\n", __func__);
0475         return -1;
0476     }
0477 
0478     for (i = 0; i < max_cpu_num.cpu; i++)
0479         cpunode_map[i] = -1;
0480 
0481     return 0;
0482 }
0483 
0484 int cpu__setup_cpunode_map(void)
0485 {
0486     struct dirent *dent1, *dent2;
0487     DIR *dir1, *dir2;
0488     unsigned int cpu, mem;
0489     char buf[PATH_MAX];
0490     char path[PATH_MAX];
0491     const char *mnt;
0492     int n;
0493 
0494     /* initialize globals */
0495     if (init_cpunode_map())
0496         return -1;
0497 
0498     mnt = sysfs__mountpoint();
0499     if (!mnt)
0500         return 0;
0501 
0502     n = snprintf(path, PATH_MAX, "%s/devices/system/node", mnt);
0503     if (n >= PATH_MAX) {
0504         pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX);
0505         return -1;
0506     }
0507 
0508     dir1 = opendir(path);
0509     if (!dir1)
0510         return 0;
0511 
0512     /* walk tree and setup map */
0513     while ((dent1 = readdir(dir1)) != NULL) {
0514         if (dent1->d_type != DT_DIR || sscanf(dent1->d_name, "node%u", &mem) < 1)
0515             continue;
0516 
0517         n = snprintf(buf, PATH_MAX, "%s/%s", path, dent1->d_name);
0518         if (n >= PATH_MAX) {
0519             pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX);
0520             continue;
0521         }
0522 
0523         dir2 = opendir(buf);
0524         if (!dir2)
0525             continue;
0526         while ((dent2 = readdir(dir2)) != NULL) {
0527             if (dent2->d_type != DT_LNK || sscanf(dent2->d_name, "cpu%u", &cpu) < 1)
0528                 continue;
0529             cpunode_map[cpu] = mem;
0530         }
0531         closedir(dir2);
0532     }
0533     closedir(dir1);
0534     return 0;
0535 }
0536 
0537 size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size)
0538 {
0539     int i, start = -1;
0540     bool first = true;
0541     size_t ret = 0;
0542 
0543 #define COMMA first ? "" : ","
0544 
0545     for (i = 0; i < map->nr + 1; i++) {
0546         struct perf_cpu cpu = { .cpu = INT_MAX };
0547         bool last = i == map->nr;
0548 
0549         if (!last)
0550             cpu = map->map[i];
0551 
0552         if (start == -1) {
0553             start = i;
0554             if (last) {
0555                 ret += snprintf(buf + ret, size - ret,
0556                         "%s%d", COMMA,
0557                         map->map[i].cpu);
0558             }
0559         } else if (((i - start) != (cpu.cpu - map->map[start].cpu)) || last) {
0560             int end = i - 1;
0561 
0562             if (start == end) {
0563                 ret += snprintf(buf + ret, size - ret,
0564                         "%s%d", COMMA,
0565                         map->map[start].cpu);
0566             } else {
0567                 ret += snprintf(buf + ret, size - ret,
0568                         "%s%d-%d", COMMA,
0569                         map->map[start].cpu, map->map[end].cpu);
0570             }
0571             first = false;
0572             start = i;
0573         }
0574     }
0575 
0576 #undef COMMA
0577 
0578     pr_debug2("cpumask list: %s\n", buf);
0579     return ret;
0580 }
0581 
0582 static char hex_char(unsigned char val)
0583 {
0584     if (val < 10)
0585         return val + '0';
0586     if (val < 16)
0587         return val - 10 + 'a';
0588     return '?';
0589 }
0590 
0591 size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size)
0592 {
0593     int i, cpu;
0594     char *ptr = buf;
0595     unsigned char *bitmap;
0596     struct perf_cpu last_cpu = perf_cpu_map__cpu(map, map->nr - 1);
0597 
0598     if (buf == NULL)
0599         return 0;
0600 
0601     bitmap = zalloc(last_cpu.cpu / 8 + 1);
0602     if (bitmap == NULL) {
0603         buf[0] = '\0';
0604         return 0;
0605     }
0606 
0607     for (i = 0; i < map->nr; i++) {
0608         cpu = perf_cpu_map__cpu(map, i).cpu;
0609         bitmap[cpu / 8] |= 1 << (cpu % 8);
0610     }
0611 
0612     for (cpu = last_cpu.cpu / 4 * 4; cpu >= 0; cpu -= 4) {
0613         unsigned char bits = bitmap[cpu / 8];
0614 
0615         if (cpu % 8)
0616             bits >>= 4;
0617         else
0618             bits &= 0xf;
0619 
0620         *ptr++ = hex_char(bits);
0621         if ((cpu % 32) == 0 && cpu > 0)
0622             *ptr++ = ',';
0623     }
0624     *ptr = '\0';
0625     free(bitmap);
0626 
0627     buf[size - 1] = '\0';
0628     return ptr - buf;
0629 }
0630 
0631 const struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */
0632 {
0633     static const struct perf_cpu_map *online = NULL;
0634 
0635     if (!online)
0636         online = perf_cpu_map__new(NULL); /* from /sys/devices/system/cpu/online */
0637 
0638     return online;
0639 }
0640 
0641 bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b)
0642 {
0643     return a->thread == b->thread &&
0644         a->node == b->node &&
0645         a->socket == b->socket &&
0646         a->die == b->die &&
0647         a->core == b->core &&
0648         a->cpu.cpu == b->cpu.cpu;
0649 }
0650 
0651 bool aggr_cpu_id__is_empty(const struct aggr_cpu_id *a)
0652 {
0653     return a->thread == -1 &&
0654         a->node == -1 &&
0655         a->socket == -1 &&
0656         a->die == -1 &&
0657         a->core == -1 &&
0658         a->cpu.cpu == -1;
0659 }
0660 
0661 struct aggr_cpu_id aggr_cpu_id__empty(void)
0662 {
0663     struct aggr_cpu_id ret = {
0664         .thread = -1,
0665         .node = -1,
0666         .socket = -1,
0667         .die = -1,
0668         .core = -1,
0669         .cpu = (struct perf_cpu){ .cpu = -1 },
0670     };
0671     return ret;
0672 }