0001
0002
0003
0004
0005
0006 #include <perf/perf_dlfilter.h>
0007 #include <string.h>
0008 #include <stdio.h>
0009
0010 #define MAX_CPU 4096
0011
0012 enum {
0013 INSTR_CYC,
0014 BRNCH_CYC,
0015 OTHER_CYC,
0016 MAX_ENTRY
0017 };
0018
0019 static __u64 cycles[MAX_CPU][MAX_ENTRY];
0020 static __u64 cycles_rpt[MAX_CPU][MAX_ENTRY];
0021
0022 #define BITS 16
0023 #define TABLESZ (1 << BITS)
0024 #define TABLEMAX (TABLESZ / 2)
0025 #define MASK (TABLESZ - 1)
0026
0027 static struct entry {
0028 __u32 used;
0029 __s32 tid;
0030 __u64 cycles[MAX_ENTRY];
0031 __u64 cycles_rpt[MAX_ENTRY];
0032 } table[TABLESZ];
0033
0034 static int tid_cnt;
0035
0036 static int event_entry(const char *event)
0037 {
0038 if (!event)
0039 return OTHER_CYC;
0040 if (!strncmp(event, "instructions", 12))
0041 return INSTR_CYC;
0042 if (!strncmp(event, "branches", 8))
0043 return BRNCH_CYC;
0044 return OTHER_CYC;
0045 }
0046
0047 static struct entry *find_entry(__s32 tid)
0048 {
0049 __u32 pos = tid & MASK;
0050 struct entry *e;
0051
0052 e = &table[pos];
0053 while (e->used) {
0054 if (e->tid == tid)
0055 return e;
0056 if (++pos == TABLESZ)
0057 pos = 0;
0058 e = &table[pos];
0059 }
0060
0061 if (tid_cnt >= TABLEMAX) {
0062 fprintf(stderr, "Too many threads\n");
0063 return NULL;
0064 }
0065
0066 tid_cnt += 1;
0067 e->used = 1;
0068 e->tid = tid;
0069 return e;
0070 }
0071
0072 static void add_entry(__s32 tid, int pos, __u64 cnt)
0073 {
0074 struct entry *e = find_entry(tid);
0075
0076 if (e)
0077 e->cycles[pos] += cnt;
0078 }
0079
0080 int filter_event_early(void *data, const struct perf_dlfilter_sample *sample, void *ctx)
0081 {
0082 __s32 cpu = sample->cpu;
0083 __s32 tid = sample->tid;
0084 int pos;
0085
0086 if (!sample->cyc_cnt)
0087 return 0;
0088
0089 pos = event_entry(sample->event);
0090
0091 if (cpu >= 0 && cpu < MAX_CPU)
0092 cycles[cpu][pos] += sample->cyc_cnt;
0093 else if (tid != -1)
0094 add_entry(tid, pos, sample->cyc_cnt);
0095 return 0;
0096 }
0097
0098 static void print_vals(__u64 cycles, __u64 delta)
0099 {
0100 if (delta)
0101 printf("%10llu %10llu ", (unsigned long long)cycles, (unsigned long long)delta);
0102 else
0103 printf("%10llu %10s ", (unsigned long long)cycles, "");
0104 }
0105
0106 int filter_event(void *data, const struct perf_dlfilter_sample *sample, void *ctx)
0107 {
0108 __s32 cpu = sample->cpu;
0109 __s32 tid = sample->tid;
0110 int pos;
0111
0112 pos = event_entry(sample->event);
0113
0114 if (cpu >= 0 && cpu < MAX_CPU) {
0115 print_vals(cycles[cpu][pos], cycles[cpu][pos] - cycles_rpt[cpu][pos]);
0116 cycles_rpt[cpu][pos] = cycles[cpu][pos];
0117 return 0;
0118 }
0119
0120 if (tid != -1) {
0121 struct entry *e = find_entry(tid);
0122
0123 if (e) {
0124 print_vals(e->cycles[pos], e->cycles[pos] - e->cycles_rpt[pos]);
0125 e->cycles_rpt[pos] = e->cycles[pos];
0126 return 0;
0127 }
0128 }
0129
0130 printf("%22s", "");
0131 return 0;
0132 }
0133
0134 const char *filter_description(const char **long_description)
0135 {
0136 static char *long_desc = "Cycle counts are accumulated per CPU (or "
0137 "per thread if CPU is not recorded) from IPC information, and "
0138 "printed together with the change since the last print, at the "
0139 "start of each line. Separate counts are kept for branches, "
0140 "instructions or other events.";
0141
0142 *long_description = long_desc;
0143 return "Print the number of cycles at the start of each line";
0144 }