perf/Documentation/libperf-sampling.txt

0001 libperf-sampling(7)
0002 ===================
0003
0004 NAME
0005 ----
0006 libperf-sampling - sampling interface
0007
0008
0009 DESCRIPTION
0010 -----------
0011 The sampling interface provides API to measure and get count for specific perf events.
0012
0013 The following test tries to explain count on `sampling.c` example.
0014
0015 It is by no means complete guide to sampling, but shows libperf basic API for sampling.
0016
0017 The `sampling.c` comes with libperf package and can be compiled and run like:
0018
0019 [source,bash]
0020 --
0021 $ gcc -o sampling sampling.c -lperf
0022 $ sudo ./sampling
0023 cpu   0, pid      0, tid      0, ip     ffffffffad06c4e6, period                    1
0024 cpu   0, pid   4465, tid   4469, ip     ffffffffad118748, period             18322959
0025 cpu   0, pid      0, tid      0, ip     ffffffffad115722, period             33544846
0026 cpu   0, pid   4465, tid   4470, ip         7f84fe0cdad6, period             23687474
0027 cpu   0, pid      0, tid      0, ip     ffffffffad9e0349, period             34255790
0028 cpu   0, pid   4465, tid   4469, ip     ffffffffad136581, period             38664069
0029 cpu   0, pid      0, tid      0, ip     ffffffffad9e55e2, period             21922384
0030 cpu   0, pid   4465, tid   4470, ip         7f84fe0ebebf, period             17655175
0031 ...
0032 --
0033
0034 It requires root access, because it uses hardware cycles event.
0035
0036 The `sampling.c` example profiles/samples all CPUs with hardware cycles, in a
0037 nutshell it:
0038
0039 - creates events
0040 - adds them to the event list
0041 - opens and enables events through the event list
0042 - sleeps for 3 seconds
0043 - disables events
0044 - reads and displays recorded samples
0045 - destroys the event list
0046
0047 The first thing you need to do before using libperf is to call init function:
0048
0049 [source,c]
0050 --
0051  12 static int libperf_print(enum libperf_print_level level,
0052  13                          const char *fmt, va_list ap)
0053  14 {
0054  15         return vfprintf(stderr, fmt, ap);
0055  16 }
0056
0057  23 int main(int argc, char **argv)
0058  24 {
0059  ...
0060  40         libperf_init(libperf_print);
0061 --
0062
0063 It will setup the library and sets function for debug output from library.
0064
0065 The `libperf_print` callback will receive any message with its debug level,
0066 defined as:
0067
0068 [source,c]
0069 --
0070 enum libperf_print_level {
0071         LIBPERF_ERR,
0072         LIBPERF_WARN,
0073         LIBPERF_INFO,
0074         LIBPERF_DEBUG,
0075         LIBPERF_DEBUG2,
0076         LIBPERF_DEBUG3,
0077 };
0078 --
0079
0080 Once the setup is complete we start by defining cycles event using the `struct perf_event_attr`:
0081
0082 [source,c]
0083 --
0084  29         struct perf_event_attr attr = {
0085  30                 .type        = PERF_TYPE_HARDWARE,
0086  31                 .config      = PERF_COUNT_HW_CPU_CYCLES,
0087  32                 .disabled    = 1,
0088  33                 .freq        = 1,
0089  34                 .sample_freq = 10,
0090  35                 .sample_type = PERF_SAMPLE_IP|PERF_SAMPLE_TID|PERF_SAMPLE_CPU|PERF_SAMPLE_PERIOD,
0091  36         };
0092 --
0093
0094 Next step is to prepare CPUs map.
0095
0096 In this case we will monitor all the available CPUs:
0097
0098 [source,c]
0099 --
0100  42         cpus = perf_cpu_map__new(NULL);
0101  43         if (!cpus) {
0102  44                 fprintf(stderr, "failed to create cpus\n");
0103  45                 return -1;
0104  46         }
0105 --
0106
0107 Now we create libperf's event list, which will serve as holder for the cycles event:
0108
0109 [source,c]
0110 --
0111  48         evlist = perf_evlist__new();
0112  49         if (!evlist) {
0113  50                 fprintf(stderr, "failed to create evlist\n");
0114  51                 goto out_cpus;
0115  52         }
0116 --
0117
0118 We create libperf's event for the cycles attribute we defined earlier and add it to the list:
0119
0120 [source,c]
0121 --
0122  54         evsel = perf_evsel__new(&attr);
0123  55         if (!evsel) {
0124  56                 fprintf(stderr, "failed to create cycles\n");
0125  57                 goto out_cpus;
0126  58         }
0127  59
0128  60         perf_evlist__add(evlist, evsel);
0129 --
0130
0131 Configure event list with the cpus map and open event:
0132
0133 [source,c]
0134 --
0135  62         perf_evlist__set_maps(evlist, cpus, NULL);
0136  63
0137  64         err = perf_evlist__open(evlist);
0138  65         if (err) {
0139  66                 fprintf(stderr, "failed to open evlist\n");
0140  67                 goto out_evlist;
0141  68         }
0142 --
0143
0144 Once the events list is open, we can create memory maps AKA perf ring buffers:
0145
0146 [source,c]
0147 --
0148  70         err = perf_evlist__mmap(evlist, 4);
0149  71         if (err) {
0150  72                 fprintf(stderr, "failed to mmap evlist\n");
0151  73                 goto out_evlist;
0152  74         }
0153 --
0154
0155 The event is created as disabled (note the `disabled = 1` assignment above),
0156 so we need to enable the events list explicitly.
0157
0158 From this moment the cycles event is sampling.
0159
0160 We will sleep for 3 seconds while the ring buffers get data from all CPUs, then we disable the events list.
0161
0162 [source,c]
0163 --
0164  76         perf_evlist__enable(evlist);
0165  77         sleep(3);
0166  78         perf_evlist__disable(evlist);
0167 --
0168
0169 Following code walks through the ring buffers and reads stored events/samples:
0170
0171 [source,c]
0172 --
0173  80         perf_evlist__for_each_mmap(evlist, map, false) {
0174  81                 if (perf_mmap__read_init(map) < 0)
0175  82                         continue;
0176  83
0177  84                 while ((event = perf_mmap__read_event(map)) != NULL) {
0178
0179                             /* process event */
0180
0181 108                         perf_mmap__consume(map);
0182 109                 }
0183 110                 perf_mmap__read_done(map);
0184 111         }
0185
0186 --
0187
0188 Each sample needs to get parsed:
0189
0190 [source,c]
0191 --
0192  85                         int cpu, pid, tid;
0193  86                         __u64 ip, period, *array;
0194  87                         union u64_swap u;
0195  88
0196  89                         array = event->sample.array;
0197  90
0198  91                         ip = *array;
0199  92                         array++;
0200  93
0201  94                         u.val64 = *array;
0202  95                         pid = u.val32[0];
0203  96                         tid = u.val32[1];
0204  97                         array++;
0205  98
0206  99                         u.val64 = *array;
0207 100                         cpu = u.val32[0];
0208 101                         array++;
0209 102
0210 103                         period = *array;
0211 104
0212 105                         fprintf(stdout, "cpu %3d, pid %6d, tid %6d, ip %20llx, period %20llu\n",
0213 106                                 cpu, pid, tid, ip, period);
0214 --
0215
0216 And finally cleanup.
0217
0218 We close the whole events list (both events) and remove it together with the threads map:
0219
0220 [source,c]
0221 --
0222 113 out_evlist:
0223 114         perf_evlist__delete(evlist);
0224 115 out_cpus:
0225 116         perf_cpu_map__put(cpus);
0226 117         return err;
0227 118 }
0228 --
0229
0230 REPORTING BUGS
0231 --------------
0232 Report bugs to <linux-perf-users@vger.kernel.org>.
0233
0234 LICENSE
0235 -------
0236 libperf is Free Software licensed under the GNU LGPL 2.1
0237
0238 RESOURCES
0239 ---------
0240 https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
0241
0242 SEE ALSO
0243 --------
0244 libperf(3), libperf-counting(7)