0001 perf.data format
0002
0003 Uptodate as of v4.7
0004
0005 This document describes the on-disk perf.data format, generated by perf record
0006 or perf inject and consumed by the other perf tools.
0007
0008 On a high level perf.data contains the events generated by the PMUs, plus metadata.
0009
0010 All fields are in native-endian of the machine that generated the perf.data.
0011
0012 When perf is writing to a pipe it uses a special version of the file
0013 format that does not rely on seeking to adjust data offsets. This
0014 format is described in "Pipe-mode data" section. The pipe data version can be
0015 augmented with additional events using perf inject.
0016
0017 The file starts with a perf_header:
0018
0019 struct perf_header {
0020 char magic[8]; /* PERFILE2 */
0021 uint64_t size; /* size of the header */
0022 uint64_t attr_size; /* size of an attribute in attrs */
0023 struct perf_file_section attrs;
0024 struct perf_file_section data;
0025 struct perf_file_section event_types;
0026 uint64_t flags;
0027 uint64_t flags1[3];
0028 };
0029
0030 The magic number identifies the perf file and the version. Current perf versions
0031 use PERFILE2. Old perf versions generated a version 1 format (PERFFILE). Version 1
0032 is not described here. The magic number also identifies the endian. When the
0033 magic value is 64bit byte swapped compared the file is in non-native
0034 endian.
0035
0036 A perf_file_section contains a pointer to another section of the perf file.
0037 The header contains three such pointers: for attributes, data and event types.
0038
0039 struct perf_file_section {
0040 uint64_t offset; /* offset from start of file */
0041 uint64_t size; /* size of the section */
0042 };
0043
0044 Flags section:
0045
0046 For each of the optional features a perf_file_section it placed after the data
0047 section if the feature bit is set in the perf_header flags bitset. The
0048 respective perf_file_section points to the data of the additional header and
0049 defines its size.
0050
0051 Some headers consist of strings, which are defined like this:
0052
0053 struct perf_header_string {
0054 uint32_t len;
0055 char string[len]; /* zero terminated */
0056 };
0057
0058 Some headers consist of a sequence of strings, which start with a
0059
0060 struct perf_header_string_list {
0061 uint32_t nr;
0062 struct perf_header_string strings[nr]; /* variable length records */
0063 };
0064
0065 The bits are the flags bits in a 256 bit bitmap starting with
0066 flags. These define the valid bits:
0067
0068 HEADER_RESERVED = 0, /* always cleared */
0069 HEADER_FIRST_FEATURE = 1,
0070 HEADER_TRACING_DATA = 1,
0071
0072 Describe me.
0073
0074 HEADER_BUILD_ID = 2,
0075
0076 The header consists of an sequence of build_id_event. The size of each record
0077 is defined by header.size (see perf_event.h). Each event defines a ELF build id
0078 for a executable file name for a pid. An ELF build id is a unique identifier
0079 assigned by the linker to an executable.
0080
0081 struct build_id_event {
0082 struct perf_event_header header;
0083 pid_t pid;
0084 uint8_t build_id[24];
0085 char filename[header.size - offsetof(struct build_id_event, filename)];
0086 };
0087
0088 HEADER_HOSTNAME = 3,
0089
0090 A perf_header_string with the hostname where the data was collected
0091 (uname -n)
0092
0093 HEADER_OSRELEASE = 4,
0094
0095 A perf_header_string with the os release where the data was collected
0096 (uname -r)
0097
0098 HEADER_VERSION = 5,
0099
0100 A perf_header_string with the perf user tool version where the
0101 data was collected. This is the same as the version of the source tree
0102 the perf tool was built from.
0103
0104 HEADER_ARCH = 6,
0105
0106 A perf_header_string with the CPU architecture (uname -m)
0107
0108 HEADER_NRCPUS = 7,
0109
0110 A structure defining the number of CPUs.
0111
0112 struct nr_cpus {
0113 uint32_t nr_cpus_available; /* CPUs not yet onlined */
0114 uint32_t nr_cpus_online;
0115 };
0116
0117 HEADER_CPUDESC = 8,
0118
0119 A perf_header_string with description of the CPU. On x86 this is the model name
0120 in /proc/cpuinfo
0121
0122 HEADER_CPUID = 9,
0123
0124 A perf_header_string with the exact CPU type. On x86 this is
0125 vendor,family,model,stepping. For example: GenuineIntel,6,69,1
0126
0127 HEADER_TOTAL_MEM = 10,
0128
0129 An uint64_t with the total memory in kilobytes.
0130
0131 HEADER_CMDLINE = 11,
0132
0133 A perf_header_string_list with the perf arg-vector used to collect the data.
0134
0135 HEADER_EVENT_DESC = 12,
0136
0137 Another description of the perf_event_attrs, more detailed than header.attrs
0138 including IDs and names. See perf_event.h or the man page for a description
0139 of a struct perf_event_attr.
0140
0141 struct {
0142 uint32_t nr; /* number of events */
0143 uint32_t attr_size; /* size of each perf_event_attr */
0144 struct {
0145 struct perf_event_attr attr; /* size of attr_size */
0146 uint32_t nr_ids;
0147 struct perf_header_string event_string;
0148 uint64_t ids[nr_ids];
0149 } events[nr]; /* Variable length records */
0150 };
0151
0152 HEADER_CPU_TOPOLOGY = 13,
0153
0154 struct {
0155 /*
0156 * First revision of HEADER_CPU_TOPOLOGY
0157 *
0158 * See 'struct perf_header_string_list' definition earlier
0159 * in this file.
0160 */
0161
0162 struct perf_header_string_list cores; /* Variable length */
0163 struct perf_header_string_list threads; /* Variable length */
0164
0165 /*
0166 * Second revision of HEADER_CPU_TOPOLOGY, older tools
0167 * will not consider what comes next
0168 */
0169
0170 struct {
0171 uint32_t core_id;
0172 uint32_t socket_id;
0173 } cpus[nr]; /* Variable length records */
0174 /* 'nr' comes from previously processed HEADER_NRCPUS's nr_cpu_avail */
0175
0176 /*
0177 * Third revision of HEADER_CPU_TOPOLOGY, older tools
0178 * will not consider what comes next
0179 */
0180
0181 struct perf_header_string_list dies; /* Variable length */
0182 uint32_t die_id[nr_cpus_avail]; /* from previously processed HEADER_NR_CPUS, VLA */
0183 };
0184
0185 Example:
0186 sibling sockets : 0-8
0187 sibling dies : 0-3
0188 sibling dies : 4-7
0189 sibling threads : 0-1
0190 sibling threads : 2-3
0191 sibling threads : 4-5
0192 sibling threads : 6-7
0193
0194 HEADER_NUMA_TOPOLOGY = 14,
0195
0196 A list of NUMA node descriptions
0197
0198 struct {
0199 uint32_t nr;
0200 struct {
0201 uint32_t nodenr;
0202 uint64_t mem_total;
0203 uint64_t mem_free;
0204 struct perf_header_string cpus;
0205 } nodes[nr]; /* Variable length records */
0206 };
0207
0208 HEADER_BRANCH_STACK = 15,
0209
0210 Not implemented in perf.
0211
0212 HEADER_PMU_MAPPINGS = 16,
0213
0214 A list of PMU structures, defining the different PMUs supported by perf.
0215
0216 struct {
0217 uint32_t nr;
0218 struct pmu {
0219 uint32_t pmu_type;
0220 struct perf_header_string pmu_name;
0221 } [nr]; /* Variable length records */
0222 };
0223
0224 HEADER_GROUP_DESC = 17,
0225
0226 Description of counter groups ({...} in perf syntax)
0227
0228 struct {
0229 uint32_t nr;
0230 struct {
0231 struct perf_header_string string;
0232 uint32_t leader_idx;
0233 uint32_t nr_members;
0234 } [nr]; /* Variable length records */
0235 };
0236
0237 HEADER_AUXTRACE = 18,
0238
0239 Define additional auxtrace areas in the perf.data. auxtrace is used to store
0240 undecoded hardware tracing information, such as Intel Processor Trace data.
0241
0242 /**
0243 * struct auxtrace_index_entry - indexes a AUX area tracing event within a
0244 * perf.data file.
0245 * @file_offset: offset within the perf.data file
0246 * @sz: size of the event
0247 */
0248 struct auxtrace_index_entry {
0249 u64 file_offset;
0250 u64 sz;
0251 };
0252
0253 #define PERF_AUXTRACE_INDEX_ENTRY_COUNT 256
0254
0255 /**
0256 * struct auxtrace_index - index of AUX area tracing events within a perf.data
0257 * file.
0258 * @list: linking a number of arrays of entries
0259 * @nr: number of entries
0260 * @entries: array of entries
0261 */
0262 struct auxtrace_index {
0263 struct list_head list;
0264 size_t nr;
0265 struct auxtrace_index_entry entries[PERF_AUXTRACE_INDEX_ENTRY_COUNT];
0266 };
0267
0268 HEADER_STAT = 19,
0269
0270 This is merely a flag signifying that the data section contains data
0271 recorded from perf stat record.
0272
0273 HEADER_CACHE = 20,
0274
0275 Description of the cache hierarchy. Based on the Linux sysfs format
0276 in /sys/devices/system/cpu/cpu*/cache/
0277
0278 u32 version Currently always 1
0279 u32 number_of_cache_levels
0280
0281 struct {
0282 u32 level;
0283 u32 line_size;
0284 u32 sets;
0285 u32 ways;
0286 struct perf_header_string type;
0287 struct perf_header_string size;
0288 struct perf_header_string map;
0289 }[number_of_cache_levels];
0290
0291 HEADER_SAMPLE_TIME = 21,
0292
0293 Two uint64_t for the time of first sample and the time of last sample.
0294
0295 HEADER_SAMPLE_TOPOLOGY = 22,
0296
0297 Physical memory map and its node assignments.
0298
0299 The format of data in MEM_TOPOLOGY is as follows:
0300
0301 u64 version; // Currently 1
0302 u64 block_size_bytes; // /sys/devices/system/memory/block_size_bytes
0303 u64 count; // number of nodes
0304
0305 struct memory_node {
0306 u64 node_id; // node index
0307 u64 size; // size of bitmap
0308 struct bitmap {
0309 /* size of bitmap again */
0310 u64 bitmapsize;
0311 /* bitmap of memory indexes that belongs to node */
0312 /* /sys/devices/system/node/node<NODE>/memory<INDEX> */
0313 u64 entries[(bitmapsize/64)+1];
0314 }
0315 }[count];
0316
0317 The MEM_TOPOLOGY can be displayed with following command:
0318
0319 $ perf report --header-only -I
0320 ...
0321 # memory nodes (nr 1, block size 0x8000000):
0322 # 0 [7G]: 0-23,32-69
0323
0324 HEADER_CLOCKID = 23,
0325
0326 One uint64_t for the clockid frequency, specified, for instance, via 'perf
0327 record -k' (see clock_gettime()), to enable timestamps derived metrics
0328 conversion into wall clock time on the reporting stage.
0329
0330 HEADER_DIR_FORMAT = 24,
0331
0332 The data files layout is described by HEADER_DIR_FORMAT feature. Currently it
0333 holds only version number (1):
0334
0335 uint64_t version;
0336
0337 The current version holds only version value (1) means that data files:
0338
0339 - Follow the 'data.*' name format.
0340
0341 - Contain raw events data in standard perf format as read from kernel (and need
0342 to be sorted)
0343
0344 Future versions are expected to describe different data files layout according
0345 to special needs.
0346
0347 HEADER_BPF_PROG_INFO = 25,
0348
0349 struct perf_bpil, which contains detailed information about
0350 a BPF program, including type, id, tag, jited/xlated instructions, etc.
0351
0352 HEADER_BPF_BTF = 26,
0353
0354 Contains BPF Type Format (BTF). For more information about BTF, please
0355 refer to Documentation/bpf/btf.rst.
0356
0357 struct {
0358 u32 id;
0359 u32 data_size;
0360 char data[];
0361 };
0362
0363 HEADER_COMPRESSED = 27,
0364
0365 struct {
0366 u32 version;
0367 u32 type;
0368 u32 level;
0369 u32 ratio;
0370 u32 mmap_len;
0371 };
0372
0373 Indicates that trace contains records of PERF_RECORD_COMPRESSED type
0374 that have perf_events records in compressed form.
0375
0376 HEADER_CPU_PMU_CAPS = 28,
0377
0378 A list of cpu PMU capabilities. The format of data is as below.
0379
0380 struct {
0381 u32 nr_cpu_pmu_caps;
0382 {
0383 char name[];
0384 char value[];
0385 } [nr_cpu_pmu_caps]
0386 };
0387
0388
0389 Example:
0390 cpu pmu capabilities: branches=32, max_precise=3, pmu_name=icelake
0391
0392 HEADER_CLOCK_DATA = 29,
0393
0394 Contains clock id and its reference time together with wall clock
0395 time taken at the 'same time', both values are in nanoseconds.
0396 The format of data is as below.
0397
0398 struct {
0399 u32 version; /* version = 1 */
0400 u32 clockid;
0401 u64 wall_clock_ns;
0402 u64 clockid_time_ns;
0403 };
0404
0405 HEADER_HYBRID_TOPOLOGY = 30,
0406
0407 Indicate the hybrid CPUs. The format of data is as below.
0408
0409 struct {
0410 u32 nr;
0411 struct {
0412 char pmu_name[];
0413 char cpus[];
0414 } [nr]; /* Variable length records */
0415 };
0416
0417 Example:
0418 hybrid cpu system:
0419 cpu_core cpu list : 0-15
0420 cpu_atom cpu list : 16-23
0421
0422 HEADER_PMU_CAPS = 31,
0423
0424 List of pmu capabilities (except cpu pmu which is already
0425 covered by HEADER_CPU_PMU_CAPS). Note that hybrid cpu pmu
0426 capabilities are also stored here.
0427
0428 struct {
0429 u32 nr_pmu;
0430 struct {
0431 u32 nr_caps;
0432 {
0433 char name[];
0434 char value[];
0435 } [nr_caps];
0436 char pmu_name[];
0437 } [nr_pmu];
0438 };
0439
0440 other bits are reserved and should ignored for now
0441 HEADER_FEAT_BITS = 256,
0442
0443 Attributes
0444
0445 This is an array of perf_event_attrs, each attr_size bytes long, which defines
0446 each event collected. See perf_event.h or the man page for a detailed
0447 description.
0448
0449 Data
0450
0451 This section is the bulk of the file. It consist of a stream of perf_events
0452 describing events. This matches the format generated by the kernel.
0453 See perf_event.h or the manpage for a detailed description.
0454
0455 Some notes on parsing:
0456
0457 Ordering
0458
0459 The events are not necessarily in time stamp order, as they can be
0460 collected in parallel on different CPUs. If the events should be
0461 processed in time order they need to be sorted first. It is possible
0462 to only do a partial sort using the FINISHED_ROUND event header (see
0463 below). perf record guarantees that there is no reordering over a
0464 FINISHED_ROUND.
0465
0466 ID vs IDENTIFIER
0467
0468 When the event stream contains multiple events each event is identified
0469 by an ID. This can be either through the PERF_SAMPLE_ID or the
0470 PERF_SAMPLE_IDENTIFIER header. The PERF_SAMPLE_IDENTIFIER header is
0471 at a fixed offset from the event header, which allows reliable
0472 parsing of the header. Relying on ID may be ambiguous.
0473 IDENTIFIER is only supported by newer Linux kernels.
0474
0475 Perf record specific events:
0476
0477 In addition to the kernel generated event types perf record adds its
0478 own event types (in addition it also synthesizes some kernel events,
0479 for example MMAP events)
0480
0481 PERF_RECORD_USER_TYPE_START = 64,
0482 PERF_RECORD_HEADER_ATTR = 64,
0483
0484 struct attr_event {
0485 struct perf_event_header header;
0486 struct perf_event_attr attr;
0487 uint64_t id[];
0488 };
0489
0490 PERF_RECORD_HEADER_EVENT_TYPE = 65, /* deprecated */
0491
0492 #define MAX_EVENT_NAME 64
0493
0494 struct perf_trace_event_type {
0495 uint64_t event_id;
0496 char name[MAX_EVENT_NAME];
0497 };
0498
0499 struct event_type_event {
0500 struct perf_event_header header;
0501 struct perf_trace_event_type event_type;
0502 };
0503
0504
0505 PERF_RECORD_HEADER_TRACING_DATA = 66,
0506
0507 Describe me
0508
0509 struct tracing_data_event {
0510 struct perf_event_header header;
0511 uint32_t size;
0512 };
0513
0514 PERF_RECORD_HEADER_BUILD_ID = 67,
0515
0516 Define a ELF build ID for a referenced executable.
0517
0518 struct build_id_event; /* See above */
0519
0520 PERF_RECORD_FINISHED_ROUND = 68,
0521
0522 No event reordering over this header. No payload.
0523
0524 PERF_RECORD_ID_INDEX = 69,
0525
0526 Map event ids to CPUs and TIDs.
0527
0528 struct id_index_entry {
0529 uint64_t id;
0530 uint64_t idx;
0531 uint64_t cpu;
0532 uint64_t tid;
0533 };
0534
0535 struct id_index_event {
0536 struct perf_event_header header;
0537 uint64_t nr;
0538 struct id_index_entry entries[nr];
0539 };
0540
0541 PERF_RECORD_AUXTRACE_INFO = 70,
0542
0543 Auxtrace type specific information. Describe me
0544
0545 struct auxtrace_info_event {
0546 struct perf_event_header header;
0547 uint32_t type;
0548 uint32_t reserved__; /* For alignment */
0549 uint64_t priv[];
0550 };
0551
0552 PERF_RECORD_AUXTRACE = 71,
0553
0554 Defines auxtrace data. Followed by the actual data. The contents of
0555 the auxtrace data is dependent on the event and the CPU. For example
0556 for Intel Processor Trace it contains Processor Trace data generated
0557 by the CPU.
0558
0559 struct auxtrace_event {
0560 struct perf_event_header header;
0561 uint64_t size;
0562 uint64_t offset;
0563 uint64_t reference;
0564 uint32_t idx;
0565 uint32_t tid;
0566 uint32_t cpu;
0567 uint32_t reserved__; /* For alignment */
0568 };
0569
0570 struct aux_event {
0571 struct perf_event_header header;
0572 uint64_t aux_offset;
0573 uint64_t aux_size;
0574 uint64_t flags;
0575 };
0576
0577 PERF_RECORD_AUXTRACE_ERROR = 72,
0578
0579 Describes an error in hardware tracing
0580
0581 enum auxtrace_error_type {
0582 PERF_AUXTRACE_ERROR_ITRACE = 1,
0583 PERF_AUXTRACE_ERROR_MAX
0584 };
0585
0586 #define MAX_AUXTRACE_ERROR_MSG 64
0587
0588 struct auxtrace_error_event {
0589 struct perf_event_header header;
0590 uint32_t type;
0591 uint32_t code;
0592 uint32_t cpu;
0593 uint32_t pid;
0594 uint32_t tid;
0595 uint32_t reserved__; /* For alignment */
0596 uint64_t ip;
0597 char msg[MAX_AUXTRACE_ERROR_MSG];
0598 };
0599
0600 PERF_RECORD_HEADER_FEATURE = 80,
0601
0602 Describes a header feature. These are records used in pipe-mode that
0603 contain information that otherwise would be in perf.data file's header.
0604
0605 PERF_RECORD_COMPRESSED = 81,
0606
0607 struct compressed_event {
0608 struct perf_event_header header;
0609 char data[];
0610 };
0611
0612 PERF_RECORD_FINISHED_INIT = 82,
0613
0614 Marks the end of records for the system, pre-existing threads in system wide
0615 sessions, etc. Those are the ones prefixed PERF_RECORD_USER_*.
0616
0617 This is used, for instance, to 'perf inject' events after init and before
0618 regular events, those emitted by the kernel, to support combining guest and
0619 host records.
0620
0621
0622 The header is followed by compressed data frame that can be decompressed
0623 into array of perf trace records. The size of the entire compressed event
0624 record including the header is limited by the max value of header.size.
0625
0626 Event types
0627
0628 Define the event attributes with their IDs.
0629
0630 An array bound by the perf_file_section size.
0631
0632 struct {
0633 struct perf_event_attr attr; /* Size defined by header.attr_size */
0634 struct perf_file_section ids;
0635 }
0636
0637 ids points to a array of uint64_t defining the ids for event attr attr.
0638
0639 Pipe-mode data
0640
0641 Pipe-mode avoid seeks in the file by removing the perf_file_section and flags
0642 from the struct perf_header. The trimmed header is:
0643
0644 struct perf_pipe_file_header {
0645 u64 magic;
0646 u64 size;
0647 };
0648
0649 The information about attrs, data, and event_types is instead in the
0650 synthesized events PERF_RECORD_ATTR, PERF_RECORD_HEADER_TRACING_DATA,
0651 PERF_RECORD_HEADER_EVENT_TYPE, and PERF_RECORD_HEADER_FEATURE
0652 that are generated by perf record in pipe-mode.
0653
0654
0655 References:
0656
0657 include/uapi/linux/perf_event.h
0658
0659 This is the canonical description of the kernel generated perf_events
0660 and the perf_event_attrs.
0661
0662 perf_events manpage
0663
0664 A manpage describing perf_event and perf_event_attr is here:
0665 http://web.eece.maine.edu/~vweaver/projects/perf_events/programming.html
0666 This tends to be slightly behind the kernel include, but has better
0667 descriptions. An (typically older) version of the man page may be
0668 included with the standard Linux man pages, available with "man
0669 perf_events"
0670
0671 pmu-tools
0672
0673 https://github.com/andikleen/pmu-tools/tree/master/parser
0674
0675 A definition of the perf.data format in python "construct" format is available
0676 in pmu-tools parser. This allows to read perf.data from python and dump it.
0677
0678 quipper
0679
0680 The quipper C++ parser is available at
0681 http://github.com/google/perf_data_converter/tree/master/src/quipper
0682