Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * Copyright IBM Corp. 2018
0004  * Auxtrace support for s390 CPU-Measurement Sampling Facility
0005  *
0006  * Author(s):  Thomas Richter <tmricht@linux.ibm.com>
0007  *
0008  * Auxiliary traces are collected during 'perf record' using rbd000 event.
0009  * Several PERF_RECORD_XXX are generated during recording:
0010  *
0011  * PERF_RECORD_AUX:
0012  *  Records that new data landed in the AUX buffer part.
0013  * PERF_RECORD_AUXTRACE:
0014  *  Defines auxtrace data. Followed by the actual data. The contents of
0015  *  the auxtrace data is dependent on the event and the CPU.
0016  *  This record is generated by perf record command. For details
0017  *  see Documentation/perf.data-file-format.txt.
0018  * PERF_RECORD_AUXTRACE_INFO:
0019  *  Defines a table of contains for PERF_RECORD_AUXTRACE records. This
0020  *  record is generated during 'perf record' command. Each record contains
0021  *  up to 256 entries describing offset and size of the AUXTRACE data in the
0022  *  perf.data file.
0023  * PERF_RECORD_AUXTRACE_ERROR:
0024  *  Indicates an error during AUXTRACE collection such as buffer overflow.
0025  * PERF_RECORD_FINISHED_ROUND:
0026  *  Perf events are not necessarily in time stamp order, as they can be
0027  *  collected in parallel on different CPUs. If the events should be
0028  *  processed in time order they need to be sorted first.
0029  *  Perf report guarantees that there is no reordering over a
0030  *  PERF_RECORD_FINISHED_ROUND boundary event. All perf records with a
0031  *  time stamp lower than this record are processed (and displayed) before
0032  *  the succeeding perf record are processed.
0033  *
0034  * These records are evaluated during perf report command.
0035  *
0036  * 1. PERF_RECORD_AUXTRACE_INFO is used to set up the infrastructure for
0037  * auxiliary trace data processing. See s390_cpumsf_process_auxtrace_info()
0038  * below.
0039  * Auxiliary trace data is collected per CPU. To merge the data into the report
0040  * an auxtrace_queue is created for each CPU. It is assumed that the auxtrace
0041  * data is in ascending order.
0042  *
0043  * Each queue has a double linked list of auxtrace_buffers. This list contains
0044  * the offset and size of a CPU's auxtrace data. During auxtrace processing
0045  * the data portion is mmap()'ed.
0046  *
0047  * To sort the queues in chronological order, all queue access is controlled
0048  * by the auxtrace_heap. This is basically a stack, each stack element has two
0049  * entries, the queue number and a time stamp. However the stack is sorted by
0050  * the time stamps. The highest time stamp is at the bottom the lowest
0051  * (nearest) time stamp is at the top. That sort order is maintained at all
0052  * times!
0053  *
0054  * After the auxtrace infrastructure has been setup, the auxtrace queues are
0055  * filled with data (offset/size pairs) and the auxtrace_heap is populated.
0056  *
0057  * 2. PERF_RECORD_XXX processing triggers access to the auxtrace_queues.
0058  * Each record is handled by s390_cpumsf_process_event(). The time stamp of
0059  * the perf record is compared with the time stamp located on the auxtrace_heap
0060  * top element. If that time stamp is lower than the time stamp from the
0061  * record sample, the auxtrace queues will be processed. As auxtrace queues
0062  * control many auxtrace_buffers and each buffer can be quite large, the
0063  * auxtrace buffer might be processed only partially. In this case the
0064  * position in the auxtrace_buffer of that queue is remembered and the time
0065  * stamp of the last processed entry of the auxtrace_buffer replaces the
0066  * current auxtrace_heap top.
0067  *
0068  * 3. Auxtrace_queues might run of out data and are fed by the
0069  * PERF_RECORD_AUXTRACE handling, see s390_cpumsf_process_auxtrace_event().
0070  *
0071  * Event Generation
0072  * Each sampling-data entry in the auxiliary trace data generates a perf sample.
0073  * This sample is filled
0074  * with data from the auxtrace such as PID/TID, instruction address, CPU state,
0075  * etc. This sample is processed with perf_session__deliver_synth_event() to
0076  * be included into the GUI.
0077  *
0078  * 4. PERF_RECORD_FINISHED_ROUND event is used to process all the remaining
0079  * auxiliary traces entries until the time stamp of this record is reached
0080  * auxtrace_heap top. This is triggered by ordered_event->deliver().
0081  *
0082  *
0083  * Perf event processing.
0084  * Event processing of PERF_RECORD_XXX entries relies on time stamp entries.
0085  * This is the function call sequence:
0086  *
0087  * __cmd_report()
0088  * |
0089  * perf_session__process_events()
0090  * |
0091  * __perf_session__process_events()
0092  * |
0093  * perf_session__process_event()
0094  * |  This functions splits the PERF_RECORD_XXX records.
0095  * |  - Those generated by perf record command (type number equal or higher
0096  * |    than PERF_RECORD_USER_TYPE_START) are handled by
0097  * |    perf_session__process_user_event(see below)
0098  * |  - Those generated by the kernel are handled by
0099  * |    evlist__parse_sample_timestamp()
0100  * |
0101  * evlist__parse_sample_timestamp()
0102  * |  Extract time stamp from sample data.
0103  * |
0104  * perf_session__queue_event()
0105  * |  If timestamp is positive the sample is entered into an ordered_event
0106  * |  list, sort order is the timestamp. The event processing is deferred until
0107  * |  later (see perf_session__process_user_event()).
0108  * |  Other timestamps (0 or -1) are handled immediately by
0109  * |  perf_session__deliver_event(). These are events generated at start up
0110  * |  of command perf record. They create PERF_RECORD_COMM and PERF_RECORD_MMAP*
0111  * |  records. They are needed to create a list of running processes and its
0112  * |  memory mappings and layout. They are needed at the beginning to enable
0113  * |  command perf report to create process trees and memory mappings.
0114  * |
0115  * perf_session__deliver_event()
0116  * |  Delivers a PERF_RECORD_XXX entry for handling.
0117  * |
0118  * auxtrace__process_event()
0119  * |  The timestamp of the PERF_RECORD_XXX entry is taken to correlate with
0120  * |  time stamps from the auxiliary trace buffers. This enables
0121  * |  synchronization between auxiliary trace data and the events on the
0122  * |  perf.data file.
0123  * |
0124  * machine__deliver_event()
0125  * |  Handles the PERF_RECORD_XXX event. This depends on the record type.
0126  *    It might update the process tree, update a process memory map or enter
0127  *    a sample with IP and call back chain data into GUI data pool.
0128  *
0129  *
0130  * Deferred processing determined by perf_session__process_user_event() is
0131  * finally processed when a PERF_RECORD_FINISHED_ROUND is encountered. These
0132  * are generated during command perf record.
0133  * The timestamp of PERF_RECORD_FINISHED_ROUND event is taken to process all
0134  * PERF_RECORD_XXX entries stored in the ordered_event list. This list was
0135  * built up while reading the perf.data file.
0136  * Each event is now processed by calling perf_session__deliver_event().
0137  * This enables time synchronization between the data in the perf.data file and
0138  * the data in the auxiliary trace buffers.
0139  */
0140 
0141 #include <endian.h>
0142 #include <errno.h>
0143 #include <byteswap.h>
0144 #include <inttypes.h>
0145 #include <linux/kernel.h>
0146 #include <linux/types.h>
0147 #include <linux/bitops.h>
0148 #include <linux/log2.h>
0149 #include <linux/zalloc.h>
0150 
0151 #include <sys/stat.h>
0152 #include <sys/types.h>
0153 
0154 #include "color.h"
0155 #include "evsel.h"
0156 #include "evlist.h"
0157 #include "machine.h"
0158 #include "session.h"
0159 #include "tool.h"
0160 #include "debug.h"
0161 #include "auxtrace.h"
0162 #include "s390-cpumsf.h"
0163 #include "s390-cpumsf-kernel.h"
0164 #include "s390-cpumcf-kernel.h"
0165 #include "config.h"
0166 
0167 struct s390_cpumsf {
0168     struct auxtrace     auxtrace;
0169     struct auxtrace_queues  queues;
0170     struct auxtrace_heap    heap;
0171     struct perf_session *session;
0172     struct machine      *machine;
0173     u32         auxtrace_type;
0174     u32         pmu_type;
0175     u16         machine_type;
0176     bool            data_queued;
0177     bool            use_logfile;
0178     char            *logdir;
0179 };
0180 
0181 struct s390_cpumsf_queue {
0182     struct s390_cpumsf  *sf;
0183     unsigned int        queue_nr;
0184     struct auxtrace_buffer  *buffer;
0185     int         cpu;
0186     FILE            *logfile;
0187     FILE            *logfile_ctr;
0188 };
0189 
0190 /* Check if the raw data should be dumped to file. If this is the case and
0191  * the file to dump to has not been opened for writing, do so.
0192  *
0193  * Return 0 on success and greater zero on error so processing continues.
0194  */
0195 static int s390_cpumcf_dumpctr(struct s390_cpumsf *sf,
0196                    struct perf_sample *sample)
0197 {
0198     struct s390_cpumsf_queue *sfq;
0199     struct auxtrace_queue *q;
0200     int rc = 0;
0201 
0202     if (!sf->use_logfile || sf->queues.nr_queues <= sample->cpu)
0203         return rc;
0204 
0205     q = &sf->queues.queue_array[sample->cpu];
0206     sfq = q->priv;
0207     if (!sfq)       /* Queue not yet allocated */
0208         return rc;
0209 
0210     if (!sfq->logfile_ctr) {
0211         char *name;
0212 
0213         rc = (sf->logdir)
0214             ? asprintf(&name, "%s/aux.ctr.%02x",
0215                  sf->logdir, sample->cpu)
0216             : asprintf(&name, "aux.ctr.%02x", sample->cpu);
0217         if (rc > 0)
0218             sfq->logfile_ctr = fopen(name, "w");
0219         if (sfq->logfile_ctr == NULL) {
0220             pr_err("Failed to open counter set log file %s, "
0221                    "continue...\n", name);
0222             rc = 1;
0223         }
0224         free(name);
0225     }
0226 
0227     if (sfq->logfile_ctr) {
0228         /* See comment above for -4 */
0229         size_t n = fwrite(sample->raw_data, sample->raw_size - 4, 1,
0230                   sfq->logfile_ctr);
0231         if (n != 1) {
0232             pr_err("Failed to write counter set data\n");
0233             rc = 1;
0234         }
0235     }
0236     return rc;
0237 }
0238 
0239 /* Display s390 CPU measurement facility basic-sampling data entry
0240  * Data written on s390 in big endian byte order and contains bit
0241  * fields across byte boundaries.
0242  */
0243 static bool s390_cpumsf_basic_show(const char *color, size_t pos,
0244                    struct hws_basic_entry *basicp)
0245 {
0246     struct hws_basic_entry *basic = basicp;
0247 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
0248     struct hws_basic_entry local;
0249     unsigned long long word = be64toh(*(unsigned long long *)basicp);
0250 
0251     memset(&local, 0, sizeof(local));
0252     local.def = be16toh(basicp->def);
0253     local.prim_asn = word & 0xffff;
0254     local.CL = word >> 30 & 0x3;
0255     local.I = word >> 32 & 0x1;
0256     local.AS = word >> 33 & 0x3;
0257     local.P = word >> 35 & 0x1;
0258     local.W = word >> 36 & 0x1;
0259     local.T = word >> 37 & 0x1;
0260     local.U = word >> 40 & 0xf;
0261     local.ia = be64toh(basicp->ia);
0262     local.gpp = be64toh(basicp->gpp);
0263     local.hpp = be64toh(basicp->hpp);
0264     basic = &local;
0265 #endif
0266     if (basic->def != 1) {
0267         pr_err("Invalid AUX trace basic entry [%#08zx]\n", pos);
0268         return false;
0269     }
0270     color_fprintf(stdout, color, "    [%#08zx] Basic   Def:%04x Inst:%#04x"
0271               " %c%c%c%c AS:%d ASN:%#04x IA:%#018llx\n"
0272               "\t\tCL:%d HPP:%#018llx GPP:%#018llx\n",
0273               pos, basic->def, basic->U,
0274               basic->T ? 'T' : ' ',
0275               basic->W ? 'W' : ' ',
0276               basic->P ? 'P' : ' ',
0277               basic->I ? 'I' : ' ',
0278               basic->AS, basic->prim_asn, basic->ia, basic->CL,
0279               basic->hpp, basic->gpp);
0280     return true;
0281 }
0282 
0283 /* Display s390 CPU measurement facility diagnostic-sampling data entry.
0284  * Data written on s390 in big endian byte order and contains bit
0285  * fields across byte boundaries.
0286  */
0287 static bool s390_cpumsf_diag_show(const char *color, size_t pos,
0288                   struct hws_diag_entry *diagp)
0289 {
0290     struct hws_diag_entry *diag = diagp;
0291 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
0292     struct hws_diag_entry local;
0293     unsigned long long word = be64toh(*(unsigned long long *)diagp);
0294 
0295     local.def = be16toh(diagp->def);
0296     local.I = word >> 32 & 0x1;
0297     diag = &local;
0298 #endif
0299     if (diag->def < S390_CPUMSF_DIAG_DEF_FIRST) {
0300         pr_err("Invalid AUX trace diagnostic entry [%#08zx]\n", pos);
0301         return false;
0302     }
0303     color_fprintf(stdout, color, "    [%#08zx] Diag    Def:%04x %c\n",
0304               pos, diag->def, diag->I ? 'I' : ' ');
0305     return true;
0306 }
0307 
0308 /* Return TOD timestamp contained in an trailer entry */
0309 static unsigned long long trailer_timestamp(struct hws_trailer_entry *te,
0310                         int idx)
0311 {
0312     /* te->t set: TOD in STCKE format, bytes 8-15
0313      * to->t not set: TOD in STCK format, bytes 0-7
0314      */
0315     unsigned long long ts;
0316 
0317     memcpy(&ts, &te->timestamp[idx], sizeof(ts));
0318     return be64toh(ts);
0319 }
0320 
0321 /* Display s390 CPU measurement facility trailer entry */
0322 static bool s390_cpumsf_trailer_show(const char *color, size_t pos,
0323                      struct hws_trailer_entry *te)
0324 {
0325 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
0326     struct hws_trailer_entry local;
0327     const unsigned long long flags = be64toh(te->flags);
0328 
0329     memset(&local, 0, sizeof(local));
0330     local.f = flags >> 63 & 0x1;
0331     local.a = flags >> 62 & 0x1;
0332     local.t = flags >> 61 & 0x1;
0333     local.bsdes = be16toh((flags >> 16 & 0xffff));
0334     local.dsdes = be16toh((flags & 0xffff));
0335     memcpy(&local.timestamp, te->timestamp, sizeof(te->timestamp));
0336     local.overflow = be64toh(te->overflow);
0337     local.clock_base = be64toh(te->progusage[0]) >> 63 & 1;
0338     local.progusage2 = be64toh(te->progusage2);
0339     te = &local;
0340 #endif
0341     if (te->bsdes != sizeof(struct hws_basic_entry)) {
0342         pr_err("Invalid AUX trace trailer entry [%#08zx]\n", pos);
0343         return false;
0344     }
0345     color_fprintf(stdout, color, "    [%#08zx] Trailer %c%c%c bsdes:%d"
0346               " dsdes:%d Overflow:%lld Time:%#llx\n"
0347               "\t\tC:%d TOD:%#lx\n",
0348               pos,
0349               te->f ? 'F' : ' ',
0350               te->a ? 'A' : ' ',
0351               te->t ? 'T' : ' ',
0352               te->bsdes, te->dsdes, te->overflow,
0353               trailer_timestamp(te, te->clock_base),
0354               te->clock_base, te->progusage2);
0355     return true;
0356 }
0357 
0358 /* Test a sample data block. It must be 4KB or a multiple thereof in size and
0359  * 4KB page aligned. Each sample data page has a trailer entry at the
0360  * end which contains the sample entry data sizes.
0361  *
0362  * Return true if the sample data block passes the checks and set the
0363  * basic set entry size and diagnostic set entry size.
0364  *
0365  * Return false on failure.
0366  *
0367  * Note: Old hardware does not set the basic or diagnostic entry sizes
0368  * in the trailer entry. Use the type number instead.
0369  */
0370 static bool s390_cpumsf_validate(int machine_type,
0371                  unsigned char *buf, size_t len,
0372                  unsigned short *bsdes,
0373                  unsigned short *dsdes)
0374 {
0375     struct hws_basic_entry *basic = (struct hws_basic_entry *)buf;
0376     struct hws_trailer_entry *te;
0377 
0378     *dsdes = *bsdes = 0;
0379     if (len & (S390_CPUMSF_PAGESZ - 1)) /* Illegal size */
0380         return false;
0381     if (be16toh(basic->def) != 1)   /* No basic set entry, must be first */
0382         return false;
0383     /* Check for trailer entry at end of SDB */
0384     te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ
0385                           - sizeof(*te));
0386     *bsdes = be16toh(te->bsdes);
0387     *dsdes = be16toh(te->dsdes);
0388     if (!te->bsdes && !te->dsdes) {
0389         /* Very old hardware, use CPUID */
0390         switch (machine_type) {
0391         case 2097:
0392         case 2098:
0393             *dsdes = 64;
0394             *bsdes = 32;
0395             break;
0396         case 2817:
0397         case 2818:
0398             *dsdes = 74;
0399             *bsdes = 32;
0400             break;
0401         case 2827:
0402         case 2828:
0403             *dsdes = 85;
0404             *bsdes = 32;
0405             break;
0406         case 2964:
0407         case 2965:
0408             *dsdes = 112;
0409             *bsdes = 32;
0410             break;
0411         default:
0412             /* Illegal trailer entry */
0413             return false;
0414         }
0415     }
0416     return true;
0417 }
0418 
0419 /* Return true if there is room for another entry */
0420 static bool s390_cpumsf_reached_trailer(size_t entry_sz, size_t pos)
0421 {
0422     size_t payload = S390_CPUMSF_PAGESZ - sizeof(struct hws_trailer_entry);
0423 
0424     if (payload - (pos & (S390_CPUMSF_PAGESZ - 1)) < entry_sz)
0425         return false;
0426     return true;
0427 }
0428 
0429 /* Dump an auxiliary buffer. These buffers are multiple of
0430  * 4KB SDB pages.
0431  */
0432 static void s390_cpumsf_dump(struct s390_cpumsf *sf,
0433                  unsigned char *buf, size_t len)
0434 {
0435     const char *color = PERF_COLOR_BLUE;
0436     struct hws_basic_entry *basic;
0437     struct hws_diag_entry *diag;
0438     unsigned short bsdes, dsdes;
0439     size_t pos = 0;
0440 
0441     color_fprintf(stdout, color,
0442               ". ... s390 AUX data: size %zu bytes\n",
0443               len);
0444 
0445     if (!s390_cpumsf_validate(sf->machine_type, buf, len, &bsdes,
0446                   &dsdes)) {
0447         pr_err("Invalid AUX trace data block size:%zu"
0448                " (type:%d bsdes:%hd dsdes:%hd)\n",
0449                len, sf->machine_type, bsdes, dsdes);
0450         return;
0451     }
0452 
0453     /* s390 kernel always returns 4KB blocks fully occupied,
0454      * no partially filled SDBs.
0455      */
0456     while (pos < len) {
0457         /* Handle Basic entry */
0458         basic = (struct hws_basic_entry *)(buf + pos);
0459         if (s390_cpumsf_basic_show(color, pos, basic))
0460             pos += bsdes;
0461         else
0462             return;
0463 
0464         /* Handle Diagnostic entry */
0465         diag = (struct hws_diag_entry *)(buf + pos);
0466         if (s390_cpumsf_diag_show(color, pos, diag))
0467             pos += dsdes;
0468         else
0469             return;
0470 
0471         /* Check for trailer entry */
0472         if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) {
0473             /* Show trailer entry */
0474             struct hws_trailer_entry te;
0475 
0476             pos = (pos + S390_CPUMSF_PAGESZ)
0477                    & ~(S390_CPUMSF_PAGESZ - 1);
0478             pos -= sizeof(te);
0479             memcpy(&te, buf + pos, sizeof(te));
0480             /* Set descriptor sizes in case of old hardware
0481              * where these values are not set.
0482              */
0483             te.bsdes = bsdes;
0484             te.dsdes = dsdes;
0485             if (s390_cpumsf_trailer_show(color, pos, &te))
0486                 pos += sizeof(te);
0487             else
0488                 return;
0489         }
0490     }
0491 }
0492 
0493 static void s390_cpumsf_dump_event(struct s390_cpumsf *sf, unsigned char *buf,
0494                    size_t len)
0495 {
0496     printf(".\n");
0497     s390_cpumsf_dump(sf, buf, len);
0498 }
0499 
0500 #define S390_LPP_PID_MASK   0xffffffff
0501 
0502 static bool s390_cpumsf_make_event(size_t pos,
0503                    struct hws_basic_entry *basic,
0504                    struct s390_cpumsf_queue *sfq)
0505 {
0506     struct perf_sample sample = {
0507                 .ip = basic->ia,
0508                 .pid = basic->hpp & S390_LPP_PID_MASK,
0509                 .tid = basic->hpp & S390_LPP_PID_MASK,
0510                 .cpumode = PERF_RECORD_MISC_CPUMODE_UNKNOWN,
0511                 .cpu = sfq->cpu,
0512                 .period = 1
0513                 };
0514     union perf_event event;
0515 
0516     memset(&event, 0, sizeof(event));
0517     if (basic->CL == 1) /* Native LPAR mode */
0518         sample.cpumode = basic->P ? PERF_RECORD_MISC_USER
0519                       : PERF_RECORD_MISC_KERNEL;
0520     else if (basic->CL == 2)    /* Guest kernel/user space */
0521         sample.cpumode = basic->P ? PERF_RECORD_MISC_GUEST_USER
0522                       : PERF_RECORD_MISC_GUEST_KERNEL;
0523     else if (basic->gpp || basic->prim_asn != 0xffff)
0524         /* Use heuristics on old hardware */
0525         sample.cpumode = basic->P ? PERF_RECORD_MISC_GUEST_USER
0526                       : PERF_RECORD_MISC_GUEST_KERNEL;
0527     else
0528         sample.cpumode = basic->P ? PERF_RECORD_MISC_USER
0529                       : PERF_RECORD_MISC_KERNEL;
0530 
0531     event.sample.header.type = PERF_RECORD_SAMPLE;
0532     event.sample.header.misc = sample.cpumode;
0533     event.sample.header.size = sizeof(struct perf_event_header);
0534 
0535     pr_debug4("%s pos:%#zx ip:%#" PRIx64 " P:%d CL:%d pid:%d.%d cpumode:%d cpu:%d\n",
0536          __func__, pos, sample.ip, basic->P, basic->CL, sample.pid,
0537          sample.tid, sample.cpumode, sample.cpu);
0538     if (perf_session__deliver_synth_event(sfq->sf->session, &event,
0539                           &sample)) {
0540         pr_err("s390 Auxiliary Trace: failed to deliver event\n");
0541         return false;
0542     }
0543     return true;
0544 }
0545 
0546 static unsigned long long get_trailer_time(const unsigned char *buf)
0547 {
0548     struct hws_trailer_entry *te;
0549     unsigned long long aux_time, progusage2;
0550     bool clock_base;
0551 
0552     te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ
0553                           - sizeof(*te));
0554 
0555 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
0556     clock_base = be64toh(te->progusage[0]) >> 63 & 0x1;
0557     progusage2 = be64toh(te->progusage[1]);
0558 #else
0559     clock_base = te->clock_base;
0560     progusage2 = te->progusage2;
0561 #endif
0562     if (!clock_base)    /* TOD_CLOCK_BASE value missing */
0563         return 0;
0564 
0565     /* Correct calculation to convert time stamp in trailer entry to
0566      * nano seconds (taken from arch/s390 function tod_to_ns()).
0567      * TOD_CLOCK_BASE is stored in trailer entry member progusage2.
0568      */
0569     aux_time = trailer_timestamp(te, clock_base) - progusage2;
0570     aux_time = (aux_time >> 9) * 125 + (((aux_time & 0x1ff) * 125) >> 9);
0571     return aux_time;
0572 }
0573 
0574 /* Process the data samples of a single queue. The first parameter is a
0575  * pointer to the queue, the second parameter is the time stamp. This
0576  * is the time stamp:
0577  * - of the event that triggered this processing.
0578  * - or the time stamp when the last processing of this queue stopped.
0579  *   In this case it stopped at a 4KB page boundary and record the
0580  *   position on where to continue processing on the next invocation
0581  *   (see buffer->use_data and buffer->use_size).
0582  *
0583  * When this function returns the second parameter is updated to
0584  * reflect the time stamp of the last processed auxiliary data entry
0585  * (taken from the trailer entry of that page). The caller uses this
0586  * returned time stamp to record the last processed entry in this
0587  * queue.
0588  *
0589  * The function returns:
0590  * 0:  Processing successful. The second parameter returns the
0591  *     time stamp from the trailer entry until which position
0592  *     processing took place. Subsequent calls resume from this
0593  *     position.
0594  * <0: An error occurred during processing. The second parameter
0595  *     returns the maximum time stamp.
0596  * >0: Done on this queue. The second parameter returns the
0597  *     maximum time stamp.
0598  */
0599 static int s390_cpumsf_samples(struct s390_cpumsf_queue *sfq, u64 *ts)
0600 {
0601     struct s390_cpumsf *sf = sfq->sf;
0602     unsigned char *buf = sfq->buffer->use_data;
0603     size_t len = sfq->buffer->use_size;
0604     struct hws_basic_entry *basic;
0605     unsigned short bsdes, dsdes;
0606     size_t pos = 0;
0607     int err = 1;
0608     u64 aux_ts;
0609 
0610     if (!s390_cpumsf_validate(sf->machine_type, buf, len, &bsdes,
0611                   &dsdes)) {
0612         *ts = ~0ULL;
0613         return -1;
0614     }
0615 
0616     /* Get trailer entry time stamp and check if entries in
0617      * this auxiliary page are ready for processing. If the
0618      * time stamp of the first entry is too high, whole buffer
0619      * can be skipped. In this case return time stamp.
0620      */
0621     aux_ts = get_trailer_time(buf);
0622     if (!aux_ts) {
0623         pr_err("[%#08" PRIx64 "] Invalid AUX trailer entry TOD clock base\n",
0624                (s64)sfq->buffer->data_offset);
0625         aux_ts = ~0ULL;
0626         goto out;
0627     }
0628     if (aux_ts > *ts) {
0629         *ts = aux_ts;
0630         return 0;
0631     }
0632 
0633     while (pos < len) {
0634         /* Handle Basic entry */
0635         basic = (struct hws_basic_entry *)(buf + pos);
0636         if (s390_cpumsf_make_event(pos, basic, sfq))
0637             pos += bsdes;
0638         else {
0639             err = -EBADF;
0640             goto out;
0641         }
0642 
0643         pos += dsdes;   /* Skip diagnostic entry */
0644 
0645         /* Check for trailer entry */
0646         if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) {
0647             pos = (pos + S390_CPUMSF_PAGESZ)
0648                    & ~(S390_CPUMSF_PAGESZ - 1);
0649             /* Check existence of next page */
0650             if (pos >= len)
0651                 break;
0652             aux_ts = get_trailer_time(buf + pos);
0653             if (!aux_ts) {
0654                 aux_ts = ~0ULL;
0655                 goto out;
0656             }
0657             if (aux_ts > *ts) {
0658                 *ts = aux_ts;
0659                 sfq->buffer->use_data += pos;
0660                 sfq->buffer->use_size -= pos;
0661                 return 0;
0662             }
0663         }
0664     }
0665 out:
0666     *ts = aux_ts;
0667     sfq->buffer->use_size = 0;
0668     sfq->buffer->use_data = NULL;
0669     return err; /* Buffer completely scanned or error */
0670 }
0671 
0672 /* Run the s390 auxiliary trace decoder.
0673  * Select the queue buffer to operate on, the caller already selected
0674  * the proper queue, depending on second parameter 'ts'.
0675  * This is the time stamp until which the auxiliary entries should
0676  * be processed. This value is updated by called functions and
0677  * returned to the caller.
0678  *
0679  * Resume processing in the current buffer. If there is no buffer
0680  * get a new buffer from the queue and setup start position for
0681  * processing.
0682  * When a buffer is completely processed remove it from the queue
0683  * before returning.
0684  *
0685  * This function returns
0686  * 1: When the queue is empty. Second parameter will be set to
0687  *    maximum time stamp.
0688  * 0: Normal processing done.
0689  * <0: Error during queue buffer setup. This causes the caller
0690  *     to stop processing completely.
0691  */
0692 static int s390_cpumsf_run_decoder(struct s390_cpumsf_queue *sfq,
0693                    u64 *ts)
0694 {
0695 
0696     struct auxtrace_buffer *buffer;
0697     struct auxtrace_queue *queue;
0698     int err;
0699 
0700     queue = &sfq->sf->queues.queue_array[sfq->queue_nr];
0701 
0702     /* Get buffer and last position in buffer to resume
0703      * decoding the auxiliary entries. One buffer might be large
0704      * and decoding might stop in between. This depends on the time
0705      * stamp of the trailer entry in each page of the auxiliary
0706      * data and the time stamp of the event triggering the decoding.
0707      */
0708     if (sfq->buffer == NULL) {
0709         sfq->buffer = buffer = auxtrace_buffer__next(queue,
0710                                  sfq->buffer);
0711         if (!buffer) {
0712             *ts = ~0ULL;
0713             return 1;   /* Processing done on this queue */
0714         }
0715         /* Start with a new buffer on this queue */
0716         if (buffer->data) {
0717             buffer->use_size = buffer->size;
0718             buffer->use_data = buffer->data;
0719         }
0720         if (sfq->logfile) { /* Write into log file */
0721             size_t rc = fwrite(buffer->data, buffer->size, 1,
0722                        sfq->logfile);
0723             if (rc != 1)
0724                 pr_err("Failed to write auxiliary data\n");
0725         }
0726     } else
0727         buffer = sfq->buffer;
0728 
0729     if (!buffer->data) {
0730         int fd = perf_data__fd(sfq->sf->session->data);
0731 
0732         buffer->data = auxtrace_buffer__get_data(buffer, fd);
0733         if (!buffer->data)
0734             return -ENOMEM;
0735         buffer->use_size = buffer->size;
0736         buffer->use_data = buffer->data;
0737 
0738         if (sfq->logfile) { /* Write into log file */
0739             size_t rc = fwrite(buffer->data, buffer->size, 1,
0740                        sfq->logfile);
0741             if (rc != 1)
0742                 pr_err("Failed to write auxiliary data\n");
0743         }
0744     }
0745     pr_debug4("%s queue_nr:%d buffer:%" PRId64 " offset:%#" PRIx64 " size:%#zx rest:%#zx\n",
0746           __func__, sfq->queue_nr, buffer->buffer_nr, buffer->offset,
0747           buffer->size, buffer->use_size);
0748     err = s390_cpumsf_samples(sfq, ts);
0749 
0750     /* If non-zero, there is either an error (err < 0) or the buffer is
0751      * completely done (err > 0). The error is unrecoverable, usually
0752      * some descriptors could not be read successfully, so continue with
0753      * the next buffer.
0754      * In both cases the parameter 'ts' has been updated.
0755      */
0756     if (err) {
0757         sfq->buffer = NULL;
0758         list_del_init(&buffer->list);
0759         auxtrace_buffer__free(buffer);
0760         if (err > 0)        /* Buffer done, no error */
0761             err = 0;
0762     }
0763     return err;
0764 }
0765 
0766 static struct s390_cpumsf_queue *
0767 s390_cpumsf_alloc_queue(struct s390_cpumsf *sf, unsigned int queue_nr)
0768 {
0769     struct s390_cpumsf_queue *sfq;
0770 
0771     sfq = zalloc(sizeof(struct s390_cpumsf_queue));
0772     if (sfq == NULL)
0773         return NULL;
0774 
0775     sfq->sf = sf;
0776     sfq->queue_nr = queue_nr;
0777     sfq->cpu = -1;
0778     if (sf->use_logfile) {
0779         char *name;
0780         int rc;
0781 
0782         rc = (sf->logdir)
0783             ? asprintf(&name, "%s/aux.smp.%02x",
0784                  sf->logdir, queue_nr)
0785             : asprintf(&name, "aux.smp.%02x", queue_nr);
0786         if (rc > 0)
0787             sfq->logfile = fopen(name, "w");
0788         if (sfq->logfile == NULL) {
0789             pr_err("Failed to open auxiliary log file %s,"
0790                    "continue...\n", name);
0791             sf->use_logfile = false;
0792         }
0793         free(name);
0794     }
0795     return sfq;
0796 }
0797 
0798 static int s390_cpumsf_setup_queue(struct s390_cpumsf *sf,
0799                    struct auxtrace_queue *queue,
0800                    unsigned int queue_nr, u64 ts)
0801 {
0802     struct s390_cpumsf_queue *sfq = queue->priv;
0803 
0804     if (list_empty(&queue->head))
0805         return 0;
0806 
0807     if (sfq == NULL) {
0808         sfq = s390_cpumsf_alloc_queue(sf, queue_nr);
0809         if (!sfq)
0810             return -ENOMEM;
0811         queue->priv = sfq;
0812 
0813         if (queue->cpu != -1)
0814             sfq->cpu = queue->cpu;
0815     }
0816     return auxtrace_heap__add(&sf->heap, queue_nr, ts);
0817 }
0818 
0819 static int s390_cpumsf_setup_queues(struct s390_cpumsf *sf, u64 ts)
0820 {
0821     unsigned int i;
0822     int ret = 0;
0823 
0824     for (i = 0; i < sf->queues.nr_queues; i++) {
0825         ret = s390_cpumsf_setup_queue(sf, &sf->queues.queue_array[i],
0826                           i, ts);
0827         if (ret)
0828             break;
0829     }
0830     return ret;
0831 }
0832 
0833 static int s390_cpumsf_update_queues(struct s390_cpumsf *sf, u64 ts)
0834 {
0835     if (!sf->queues.new_data)
0836         return 0;
0837 
0838     sf->queues.new_data = false;
0839     return s390_cpumsf_setup_queues(sf, ts);
0840 }
0841 
0842 static int s390_cpumsf_process_queues(struct s390_cpumsf *sf, u64 timestamp)
0843 {
0844     unsigned int queue_nr;
0845     u64 ts;
0846     int ret;
0847 
0848     while (1) {
0849         struct auxtrace_queue *queue;
0850         struct s390_cpumsf_queue *sfq;
0851 
0852         if (!sf->heap.heap_cnt)
0853             return 0;
0854 
0855         if (sf->heap.heap_array[0].ordinal >= timestamp)
0856             return 0;
0857 
0858         queue_nr = sf->heap.heap_array[0].queue_nr;
0859         queue = &sf->queues.queue_array[queue_nr];
0860         sfq = queue->priv;
0861 
0862         auxtrace_heap__pop(&sf->heap);
0863         if (sf->heap.heap_cnt) {
0864             ts = sf->heap.heap_array[0].ordinal + 1;
0865             if (ts > timestamp)
0866                 ts = timestamp;
0867         } else {
0868             ts = timestamp;
0869         }
0870 
0871         ret = s390_cpumsf_run_decoder(sfq, &ts);
0872         if (ret < 0) {
0873             auxtrace_heap__add(&sf->heap, queue_nr, ts);
0874             return ret;
0875         }
0876         if (!ret) {
0877             ret = auxtrace_heap__add(&sf->heap, queue_nr, ts);
0878             if (ret < 0)
0879                 return ret;
0880         }
0881     }
0882     return 0;
0883 }
0884 
0885 static int s390_cpumsf_synth_error(struct s390_cpumsf *sf, int code, int cpu,
0886                    pid_t pid, pid_t tid, u64 ip, u64 timestamp)
0887 {
0888     char msg[MAX_AUXTRACE_ERROR_MSG];
0889     union perf_event event;
0890     int err;
0891 
0892     strncpy(msg, "Lost Auxiliary Trace Buffer", sizeof(msg) - 1);
0893     auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
0894                  code, cpu, pid, tid, ip, msg, timestamp);
0895 
0896     err = perf_session__deliver_synth_event(sf->session, &event, NULL);
0897     if (err)
0898         pr_err("s390 Auxiliary Trace: failed to deliver error event,"
0899             "error %d\n", err);
0900     return err;
0901 }
0902 
0903 static int s390_cpumsf_lost(struct s390_cpumsf *sf, struct perf_sample *sample)
0904 {
0905     return s390_cpumsf_synth_error(sf, 1, sample->cpu,
0906                        sample->pid, sample->tid, 0,
0907                        sample->time);
0908 }
0909 
0910 static int
0911 s390_cpumsf_process_event(struct perf_session *session,
0912               union perf_event *event,
0913               struct perf_sample *sample,
0914               struct perf_tool *tool)
0915 {
0916     struct s390_cpumsf *sf = container_of(session->auxtrace,
0917                           struct s390_cpumsf,
0918                           auxtrace);
0919     u64 timestamp = sample->time;
0920     struct evsel *ev_bc000;
0921 
0922     int err = 0;
0923 
0924     if (dump_trace)
0925         return 0;
0926 
0927     if (!tool->ordered_events) {
0928         pr_err("s390 Auxiliary Trace requires ordered events\n");
0929         return -EINVAL;
0930     }
0931 
0932     if (event->header.type == PERF_RECORD_SAMPLE &&
0933         sample->raw_size) {
0934         /* Handle event with raw data */
0935         ev_bc000 = evlist__event2evsel(session->evlist, event);
0936         if (ev_bc000 &&
0937             ev_bc000->core.attr.config == PERF_EVENT_CPUM_CF_DIAG)
0938             err = s390_cpumcf_dumpctr(sf, sample);
0939         return err;
0940     }
0941 
0942     if (event->header.type == PERF_RECORD_AUX &&
0943         event->aux.flags & PERF_AUX_FLAG_TRUNCATED)
0944         return s390_cpumsf_lost(sf, sample);
0945 
0946     if (timestamp) {
0947         err = s390_cpumsf_update_queues(sf, timestamp);
0948         if (!err)
0949             err = s390_cpumsf_process_queues(sf, timestamp);
0950     }
0951     return err;
0952 }
0953 
0954 struct s390_cpumsf_synth {
0955     struct perf_tool cpumsf_tool;
0956     struct perf_session *session;
0957 };
0958 
0959 static int
0960 s390_cpumsf_process_auxtrace_event(struct perf_session *session,
0961                    union perf_event *event __maybe_unused,
0962                    struct perf_tool *tool __maybe_unused)
0963 {
0964     struct s390_cpumsf *sf = container_of(session->auxtrace,
0965                           struct s390_cpumsf,
0966                           auxtrace);
0967 
0968     int fd = perf_data__fd(session->data);
0969     struct auxtrace_buffer *buffer;
0970     off_t data_offset;
0971     int err;
0972 
0973     if (sf->data_queued)
0974         return 0;
0975 
0976     if (perf_data__is_pipe(session->data)) {
0977         data_offset = 0;
0978     } else {
0979         data_offset = lseek(fd, 0, SEEK_CUR);
0980         if (data_offset == -1)
0981             return -errno;
0982     }
0983 
0984     err = auxtrace_queues__add_event(&sf->queues, session, event,
0985                      data_offset, &buffer);
0986     if (err)
0987         return err;
0988 
0989     /* Dump here after copying piped trace out of the pipe */
0990     if (dump_trace) {
0991         if (auxtrace_buffer__get_data(buffer, fd)) {
0992             s390_cpumsf_dump_event(sf, buffer->data,
0993                            buffer->size);
0994             auxtrace_buffer__put_data(buffer);
0995         }
0996     }
0997     return 0;
0998 }
0999 
1000 static void s390_cpumsf_free_events(struct perf_session *session __maybe_unused)
1001 {
1002 }
1003 
1004 static int s390_cpumsf_flush(struct perf_session *session __maybe_unused,
1005                  struct perf_tool *tool __maybe_unused)
1006 {
1007     return 0;
1008 }
1009 
1010 static void s390_cpumsf_free_queues(struct perf_session *session)
1011 {
1012     struct s390_cpumsf *sf = container_of(session->auxtrace,
1013                           struct s390_cpumsf,
1014                           auxtrace);
1015     struct auxtrace_queues *queues = &sf->queues;
1016     unsigned int i;
1017 
1018     for (i = 0; i < queues->nr_queues; i++) {
1019         struct s390_cpumsf_queue *sfq = (struct s390_cpumsf_queue *)
1020                         queues->queue_array[i].priv;
1021 
1022         if (sfq != NULL) {
1023             if (sfq->logfile) {
1024                 fclose(sfq->logfile);
1025                 sfq->logfile = NULL;
1026             }
1027             if (sfq->logfile_ctr) {
1028                 fclose(sfq->logfile_ctr);
1029                 sfq->logfile_ctr = NULL;
1030             }
1031         }
1032         zfree(&queues->queue_array[i].priv);
1033     }
1034     auxtrace_queues__free(queues);
1035 }
1036 
1037 static void s390_cpumsf_free(struct perf_session *session)
1038 {
1039     struct s390_cpumsf *sf = container_of(session->auxtrace,
1040                           struct s390_cpumsf,
1041                           auxtrace);
1042 
1043     auxtrace_heap__free(&sf->heap);
1044     s390_cpumsf_free_queues(session);
1045     session->auxtrace = NULL;
1046     zfree(&sf->logdir);
1047     free(sf);
1048 }
1049 
1050 static bool
1051 s390_cpumsf_evsel_is_auxtrace(struct perf_session *session __maybe_unused,
1052                   struct evsel *evsel)
1053 {
1054     return evsel->core.attr.type == PERF_TYPE_RAW &&
1055            evsel->core.attr.config == PERF_EVENT_CPUM_SF_DIAG;
1056 }
1057 
1058 static int s390_cpumsf_get_type(const char *cpuid)
1059 {
1060     int ret, family = 0;
1061 
1062     ret = sscanf(cpuid, "%*[^,],%u", &family);
1063     return (ret == 1) ? family : 0;
1064 }
1065 
1066 /* Check itrace options set on perf report command.
1067  * Return true, if none are set or all options specified can be
1068  * handled on s390 (currently only option 'd' for logging.
1069  * Return false otherwise.
1070  */
1071 static bool check_auxtrace_itrace(struct itrace_synth_opts *itops)
1072 {
1073     bool ison = false;
1074 
1075     if (!itops || !itops->set)
1076         return true;
1077     ison = itops->inject || itops->instructions || itops->branches ||
1078         itops->transactions || itops->ptwrites ||
1079         itops->pwr_events || itops->errors ||
1080         itops->dont_decode || itops->calls || itops->returns ||
1081         itops->callchain || itops->thread_stack ||
1082         itops->last_branch || itops->add_callchain ||
1083         itops->add_last_branch;
1084     if (!ison)
1085         return true;
1086     pr_err("Unsupported --itrace options specified\n");
1087     return false;
1088 }
1089 
1090 /* Check for AUXTRACE dump directory if it is needed.
1091  * On failure print an error message but continue.
1092  * Return 0 on wrong keyword in config file and 1 otherwise.
1093  */
1094 static int s390_cpumsf__config(const char *var, const char *value, void *cb)
1095 {
1096     struct s390_cpumsf *sf = cb;
1097     struct stat stbuf;
1098     int rc;
1099 
1100     if (strcmp(var, "auxtrace.dumpdir"))
1101         return 0;
1102     sf->logdir = strdup(value);
1103     if (sf->logdir == NULL) {
1104         pr_err("Failed to find auxtrace log directory %s,"
1105                " continue with current directory...\n", value);
1106         return 1;
1107     }
1108     rc = stat(sf->logdir, &stbuf);
1109     if (rc == -1 || !S_ISDIR(stbuf.st_mode)) {
1110         pr_err("Missing auxtrace log directory %s,"
1111                " continue with current directory...\n", value);
1112         zfree(&sf->logdir);
1113     }
1114     return 1;
1115 }
1116 
1117 int s390_cpumsf_process_auxtrace_info(union perf_event *event,
1118                       struct perf_session *session)
1119 {
1120     struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
1121     struct s390_cpumsf *sf;
1122     int err;
1123 
1124     if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info))
1125         return -EINVAL;
1126 
1127     sf = zalloc(sizeof(struct s390_cpumsf));
1128     if (sf == NULL)
1129         return -ENOMEM;
1130 
1131     if (!check_auxtrace_itrace(session->itrace_synth_opts)) {
1132         err = -EINVAL;
1133         goto err_free;
1134     }
1135     sf->use_logfile = session->itrace_synth_opts->log;
1136     if (sf->use_logfile)
1137         perf_config(s390_cpumsf__config, sf);
1138 
1139     err = auxtrace_queues__init(&sf->queues);
1140     if (err)
1141         goto err_free;
1142 
1143     sf->session = session;
1144     sf->machine = &session->machines.host; /* No kvm support */
1145     sf->auxtrace_type = auxtrace_info->type;
1146     sf->pmu_type = PERF_TYPE_RAW;
1147     sf->machine_type = s390_cpumsf_get_type(session->evlist->env->cpuid);
1148 
1149     sf->auxtrace.process_event = s390_cpumsf_process_event;
1150     sf->auxtrace.process_auxtrace_event = s390_cpumsf_process_auxtrace_event;
1151     sf->auxtrace.flush_events = s390_cpumsf_flush;
1152     sf->auxtrace.free_events = s390_cpumsf_free_events;
1153     sf->auxtrace.free = s390_cpumsf_free;
1154     sf->auxtrace.evsel_is_auxtrace = s390_cpumsf_evsel_is_auxtrace;
1155     session->auxtrace = &sf->auxtrace;
1156 
1157     if (dump_trace)
1158         return 0;
1159 
1160     err = auxtrace_queues__process_index(&sf->queues, session);
1161     if (err)
1162         goto err_free_queues;
1163 
1164     if (sf->queues.populated)
1165         sf->data_queued = true;
1166 
1167     return 0;
1168 
1169 err_free_queues:
1170     auxtrace_queues__free(&sf->queues);
1171     session->auxtrace = NULL;
1172 err_free:
1173     zfree(&sf->logdir);
1174     free(sf);
1175     return err;
1176 }