0001
0002
0003
0004
0005
0006 #include <unistd.h>
0007 #include <stdio.h>
0008 #include <string.h>
0009 #include <inttypes.h>
0010
0011 #include <linux/string.h>
0012 #include "../../arch/x86/include/asm/amd-ibs.h"
0013
0014 #include "debug.h"
0015 #include "session.h"
0016 #include "evlist.h"
0017 #include "sample-raw.h"
0018 #include "pmu-events/pmu-events.h"
0019
0020 static u32 cpu_family, cpu_model, ibs_fetch_type, ibs_op_type;
0021 static bool zen4_ibs_extensions;
0022
0023 static void pr_ibs_fetch_ctl(union ibs_fetch_ctl reg)
0024 {
0025 const char * const ic_miss_strs[] = {
0026 " IcMiss 0",
0027 " IcMiss 1",
0028 };
0029 const char * const l1tlb_pgsz_strs[] = {
0030 " L1TlbPgSz 4KB",
0031 " L1TlbPgSz 2MB",
0032 " L1TlbPgSz 1GB",
0033 " L1TlbPgSz RESERVED"
0034 };
0035 const char * const l1tlb_pgsz_strs_erratum1347[] = {
0036 " L1TlbPgSz 4KB",
0037 " L1TlbPgSz 16KB",
0038 " L1TlbPgSz 2MB",
0039 " L1TlbPgSz 1GB"
0040 };
0041 const char *ic_miss_str = NULL;
0042 const char *l1tlb_pgsz_str = NULL;
0043 char l3_miss_str[sizeof(" L3MissOnly _ FetchOcMiss _ FetchL3Miss _")] = "";
0044
0045 if (cpu_family == 0x19 && cpu_model < 0x10) {
0046
0047
0048
0049
0050 if (reg.phy_addr_valid)
0051 l1tlb_pgsz_str = l1tlb_pgsz_strs_erratum1347[reg.l1tlb_pgsz];
0052 } else {
0053 if (reg.phy_addr_valid)
0054 l1tlb_pgsz_str = l1tlb_pgsz_strs[reg.l1tlb_pgsz];
0055 ic_miss_str = ic_miss_strs[reg.ic_miss];
0056 }
0057
0058 if (zen4_ibs_extensions) {
0059 snprintf(l3_miss_str, sizeof(l3_miss_str),
0060 " L3MissOnly %d FetchOcMiss %d FetchL3Miss %d",
0061 reg.l3_miss_only, reg.fetch_oc_miss, reg.fetch_l3_miss);
0062 }
0063
0064 printf("ibs_fetch_ctl:\t%016llx MaxCnt %7d Cnt %7d Lat %5d En %d Val %d Comp %d%s "
0065 "PhyAddrValid %d%s L1TlbMiss %d L2TlbMiss %d RandEn %d%s%s\n",
0066 reg.val, reg.fetch_maxcnt << 4, reg.fetch_cnt << 4, reg.fetch_lat,
0067 reg.fetch_en, reg.fetch_val, reg.fetch_comp, ic_miss_str ? : "",
0068 reg.phy_addr_valid, l1tlb_pgsz_str ? : "", reg.l1tlb_miss, reg.l2tlb_miss,
0069 reg.rand_en, reg.fetch_comp ? (reg.fetch_l2_miss ? " L2Miss 1" : " L2Miss 0") : "",
0070 l3_miss_str);
0071 }
0072
0073 static void pr_ic_ibs_extd_ctl(union ic_ibs_extd_ctl reg)
0074 {
0075 printf("ic_ibs_ext_ctl:\t%016llx IbsItlbRefillLat %3d\n", reg.val, reg.itlb_refill_lat);
0076 }
0077
0078 static void pr_ibs_op_ctl(union ibs_op_ctl reg)
0079 {
0080 char l3_miss_only[sizeof(" L3MissOnly _")] = "";
0081
0082 if (zen4_ibs_extensions)
0083 snprintf(l3_miss_only, sizeof(l3_miss_only), " L3MissOnly %d", reg.l3_miss_only);
0084
0085 printf("ibs_op_ctl:\t%016llx MaxCnt %9d%s En %d Val %d CntCtl %d=%s CurCnt %9d\n",
0086 reg.val, ((reg.opmaxcnt_ext << 16) | reg.opmaxcnt) << 4, l3_miss_only,
0087 reg.op_en, reg.op_val, reg.cnt_ctl,
0088 reg.cnt_ctl ? "uOps" : "cycles", reg.opcurcnt);
0089 }
0090
0091 static void pr_ibs_op_data(union ibs_op_data reg)
0092 {
0093 printf("ibs_op_data:\t%016llx CompToRetCtr %5d TagToRetCtr %5d%s%s%s BrnRet %d "
0094 " RipInvalid %d BrnFuse %d Microcode %d\n",
0095 reg.val, reg.comp_to_ret_ctr, reg.tag_to_ret_ctr,
0096 reg.op_brn_ret ? (reg.op_return ? " OpReturn 1" : " OpReturn 0") : "",
0097 reg.op_brn_ret ? (reg.op_brn_taken ? " OpBrnTaken 1" : " OpBrnTaken 0") : "",
0098 reg.op_brn_ret ? (reg.op_brn_misp ? " OpBrnMisp 1" : " OpBrnMisp 0") : "",
0099 reg.op_brn_ret, reg.op_rip_invalid, reg.op_brn_fuse, reg.op_microcode);
0100 }
0101
0102 static void pr_ibs_op_data2_extended(union ibs_op_data2 reg)
0103 {
0104 static const char * const data_src_str[] = {
0105 "",
0106 " DataSrc 1=Local L3 or other L1/L2 in CCX",
0107 " DataSrc 2=A peer cache in a near CCX",
0108 " DataSrc 3=Data returned from DRAM",
0109 " DataSrc 4=(reserved)",
0110 " DataSrc 5=A peer cache in a far CCX",
0111 " DataSrc 6=DRAM address map with \"long latency\" bit set",
0112 " DataSrc 7=Data returned from MMIO/Config/PCI/APIC",
0113 " DataSrc 8=Extension Memory (S-Link, GenZ, etc)",
0114 " DataSrc 9=(reserved)",
0115 " DataSrc 10=(reserved)",
0116 " DataSrc 11=(reserved)",
0117 " DataSrc 12=Peer Agent Memory",
0118
0119 };
0120 int data_src = (reg.data_src_hi << 3) | reg.data_src_lo;
0121
0122 printf("ibs_op_data2:\t%016llx %sRmtNode %d%s\n", reg.val,
0123 (data_src == 1 || data_src == 2 || data_src == 5) ?
0124 (reg.cache_hit_st ? "CacheHitSt 1=O-State " : "CacheHitSt 0=M-state ") : "",
0125 reg.rmt_node,
0126 data_src < (int)ARRAY_SIZE(data_src_str) ? data_src_str[data_src] : "");
0127 }
0128
0129 static void pr_ibs_op_data2_default(union ibs_op_data2 reg)
0130 {
0131 static const char * const data_src_str[] = {
0132 "",
0133 " DataSrc 1=(reserved)",
0134 " DataSrc 2=Local node cache",
0135 " DataSrc 3=DRAM",
0136 " DataSrc 4=Remote node cache",
0137 " DataSrc 5=(reserved)",
0138 " DataSrc 6=(reserved)",
0139 " DataSrc 7=Other"
0140 };
0141
0142 printf("ibs_op_data2:\t%016llx %sRmtNode %d%s\n", reg.val,
0143 reg.data_src_lo == 2 ? (reg.cache_hit_st ? "CacheHitSt 1=O-State "
0144 : "CacheHitSt 0=M-state ") : "",
0145 reg.rmt_node, data_src_str[reg.data_src_lo]);
0146 }
0147
0148 static void pr_ibs_op_data2(union ibs_op_data2 reg)
0149 {
0150 if (zen4_ibs_extensions)
0151 return pr_ibs_op_data2_extended(reg);
0152 pr_ibs_op_data2_default(reg);
0153 }
0154
0155 static void pr_ibs_op_data3(union ibs_op_data3 reg)
0156 {
0157 char l2_miss_str[sizeof(" L2Miss _")] = "";
0158 char op_mem_width_str[sizeof(" OpMemWidth _____ bytes")] = "";
0159 char op_dc_miss_open_mem_reqs_str[sizeof(" OpDcMissOpenMemReqs __")] = "";
0160
0161
0162
0163
0164
0165 if (!(cpu_family == 0x19 && cpu_model < 0x10 && (reg.dc_miss_no_mab_alloc || reg.sw_pf))) {
0166 snprintf(l2_miss_str, sizeof(l2_miss_str), " L2Miss %d", reg.l2_miss);
0167 snprintf(op_dc_miss_open_mem_reqs_str, sizeof(op_dc_miss_open_mem_reqs_str),
0168 " OpDcMissOpenMemReqs %2d", reg.op_dc_miss_open_mem_reqs);
0169 }
0170
0171 if (reg.op_mem_width)
0172 snprintf(op_mem_width_str, sizeof(op_mem_width_str),
0173 " OpMemWidth %2d bytes", 1 << (reg.op_mem_width - 1));
0174
0175 printf("ibs_op_data3:\t%016llx LdOp %d StOp %d DcL1TlbMiss %d DcL2TlbMiss %d "
0176 "DcL1TlbHit2M %d DcL1TlbHit1G %d DcL2TlbHit2M %d DcMiss %d DcMisAcc %d "
0177 "DcWcMemAcc %d DcUcMemAcc %d DcLockedOp %d DcMissNoMabAlloc %d DcLinAddrValid %d "
0178 "DcPhyAddrValid %d DcL2TlbHit1G %d%s SwPf %d%s%s DcMissLat %5d TlbRefillLat %5d\n",
0179 reg.val, reg.ld_op, reg.st_op, reg.dc_l1tlb_miss, reg.dc_l2tlb_miss,
0180 reg.dc_l1tlb_hit_2m, reg.dc_l1tlb_hit_1g, reg.dc_l2tlb_hit_2m, reg.dc_miss,
0181 reg.dc_mis_acc, reg.dc_wc_mem_acc, reg.dc_uc_mem_acc, reg.dc_locked_op,
0182 reg.dc_miss_no_mab_alloc, reg.dc_lin_addr_valid, reg.dc_phy_addr_valid,
0183 reg.dc_l2_tlb_hit_1g, l2_miss_str, reg.sw_pf, op_mem_width_str,
0184 op_dc_miss_open_mem_reqs_str, reg.dc_miss_lat, reg.tlb_refill_lat);
0185 }
0186
0187
0188
0189
0190
0191
0192 static void amd_dump_ibs_op(struct perf_sample *sample)
0193 {
0194 struct perf_ibs_data *data = sample->raw_data;
0195 union ibs_op_ctl *op_ctl = (union ibs_op_ctl *)data->data;
0196 __u64 *rip = (__u64 *)op_ctl + 1;
0197 union ibs_op_data *op_data = (union ibs_op_data *)(rip + 1);
0198 union ibs_op_data3 *op_data3 = (union ibs_op_data3 *)(rip + 3);
0199
0200 pr_ibs_op_ctl(*op_ctl);
0201 if (!op_data->op_rip_invalid)
0202 printf("IbsOpRip:\t%016llx\n", *rip);
0203 pr_ibs_op_data(*op_data);
0204
0205
0206
0207 if (!(cpu_family == 0x19 && cpu_model < 0x10 &&
0208 (op_data3->dc_miss_no_mab_alloc || op_data3->sw_pf)))
0209 pr_ibs_op_data2(*(union ibs_op_data2 *)(rip + 2));
0210 pr_ibs_op_data3(*op_data3);
0211 if (op_data3->dc_lin_addr_valid)
0212 printf("IbsDCLinAd:\t%016llx\n", *(rip + 4));
0213 if (op_data3->dc_phy_addr_valid)
0214 printf("IbsDCPhysAd:\t%016llx\n", *(rip + 5));
0215 if (op_data->op_brn_ret && *(rip + 6))
0216 printf("IbsBrTarget:\t%016llx\n", *(rip + 6));
0217 }
0218
0219
0220
0221
0222
0223 static void amd_dump_ibs_fetch(struct perf_sample *sample)
0224 {
0225 struct perf_ibs_data *data = sample->raw_data;
0226 union ibs_fetch_ctl *fetch_ctl = (union ibs_fetch_ctl *)data->data;
0227 __u64 *addr = (__u64 *)fetch_ctl + 1;
0228 union ic_ibs_extd_ctl *extd_ctl = (union ic_ibs_extd_ctl *)addr + 2;
0229
0230 pr_ibs_fetch_ctl(*fetch_ctl);
0231 printf("IbsFetchLinAd:\t%016llx\n", *addr++);
0232 if (fetch_ctl->phy_addr_valid)
0233 printf("IbsFetchPhysAd:\t%016llx\n", *addr);
0234 pr_ic_ibs_extd_ctl(*extd_ctl);
0235 }
0236
0237
0238
0239
0240 static bool is_valid_ibs_fetch_sample(struct perf_sample *sample)
0241 {
0242 struct perf_ibs_data *data = sample->raw_data;
0243 union ibs_fetch_ctl *fetch_ctl = (union ibs_fetch_ctl *)data->data;
0244
0245 if (fetch_ctl->fetch_en && fetch_ctl->fetch_val)
0246 return true;
0247
0248 return false;
0249 }
0250
0251 static bool is_valid_ibs_op_sample(struct perf_sample *sample)
0252 {
0253 struct perf_ibs_data *data = sample->raw_data;
0254 union ibs_op_ctl *op_ctl = (union ibs_op_ctl *)data->data;
0255
0256 if (op_ctl->op_en && op_ctl->op_val)
0257 return true;
0258
0259 return false;
0260 }
0261
0262
0263
0264
0265
0266 void evlist__amd_sample_raw(struct evlist *evlist, union perf_event *event,
0267 struct perf_sample *sample)
0268 {
0269 struct evsel *evsel;
0270
0271 if (event->header.type != PERF_RECORD_SAMPLE || !sample->raw_size)
0272 return;
0273
0274 evsel = evlist__event2evsel(evlist, event);
0275 if (!evsel)
0276 return;
0277
0278 if (evsel->core.attr.type == ibs_fetch_type) {
0279 if (!is_valid_ibs_fetch_sample(sample)) {
0280 pr_debug("Invalid raw IBS Fetch MSR data encountered\n");
0281 return;
0282 }
0283 amd_dump_ibs_fetch(sample);
0284 } else if (evsel->core.attr.type == ibs_op_type) {
0285 if (!is_valid_ibs_op_sample(sample)) {
0286 pr_debug("Invalid raw IBS Op MSR data encountered\n");
0287 return;
0288 }
0289 amd_dump_ibs_op(sample);
0290 }
0291 }
0292
0293 static void parse_cpuid(struct perf_env *env)
0294 {
0295 const char *cpuid;
0296 int ret;
0297
0298 cpuid = perf_env__cpuid(env);
0299
0300
0301
0302 ret = sscanf(cpuid, "%*[^,],%u,%u", &cpu_family, &cpu_model);
0303 if (ret != 2)
0304 pr_debug("problem parsing cpuid\n");
0305 }
0306
0307
0308
0309
0310
0311
0312 bool evlist__has_amd_ibs(struct evlist *evlist)
0313 {
0314 struct perf_env *env = evlist->env;
0315 int ret, nr_pmu_mappings = perf_env__nr_pmu_mappings(env);
0316 const char *pmu_mapping = perf_env__pmu_mappings(env);
0317 char name[sizeof("ibs_fetch")];
0318 u32 type;
0319
0320 while (nr_pmu_mappings--) {
0321 ret = sscanf(pmu_mapping, "%u:%9s", &type, name);
0322 if (ret == 2) {
0323 if (strstarts(name, "ibs_op"))
0324 ibs_op_type = type;
0325 else if (strstarts(name, "ibs_fetch"))
0326 ibs_fetch_type = type;
0327 }
0328 pmu_mapping += strlen(pmu_mapping) + 1 ;
0329 }
0330
0331 if (perf_env__find_pmu_cap(env, "ibs_op", "zen4_ibs_extensions"))
0332 zen4_ibs_extensions = 1;
0333
0334 if (ibs_fetch_type || ibs_op_type) {
0335 if (!cpu_family)
0336 parse_cpuid(env);
0337 return true;
0338 }
0339
0340 return false;
0341 }