0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011 #define pr_fmt(fmt) "perf/amd_iommu: " fmt
0012
0013 #include <linux/perf_event.h>
0014 #include <linux/init.h>
0015 #include <linux/cpumask.h>
0016 #include <linux/slab.h>
0017 #include <linux/amd-iommu.h>
0018
0019 #include "../perf_event.h"
0020 #include "iommu.h"
0021
0022
0023 #define GET_CSOURCE(x) ((x)->conf & 0xFFULL)
0024 #define GET_DEVID(x) (((x)->conf >> 8) & 0xFFFFULL)
0025 #define GET_DOMID(x) (((x)->conf >> 24) & 0xFFFFULL)
0026 #define GET_PASID(x) (((x)->conf >> 40) & 0xFFFFFULL)
0027
0028
0029 #define GET_DEVID_MASK(x) ((x)->conf1 & 0xFFFFULL)
0030 #define GET_DOMID_MASK(x) (((x)->conf1 >> 16) & 0xFFFFULL)
0031 #define GET_PASID_MASK(x) (((x)->conf1 >> 32) & 0xFFFFFULL)
0032
0033 #define IOMMU_NAME_SIZE 16
0034
0035 struct perf_amd_iommu {
0036 struct list_head list;
0037 struct pmu pmu;
0038 struct amd_iommu *iommu;
0039 char name[IOMMU_NAME_SIZE];
0040 u8 max_banks;
0041 u8 max_counters;
0042 u64 cntr_assign_mask;
0043 raw_spinlock_t lock;
0044 };
0045
0046 static LIST_HEAD(perf_amd_iommu_list);
0047
0048
0049
0050
0051 PMU_FORMAT_ATTR(csource, "config:0-7");
0052 PMU_FORMAT_ATTR(devid, "config:8-23");
0053 PMU_FORMAT_ATTR(domid, "config:24-39");
0054 PMU_FORMAT_ATTR(pasid, "config:40-59");
0055 PMU_FORMAT_ATTR(devid_mask, "config1:0-15");
0056 PMU_FORMAT_ATTR(domid_mask, "config1:16-31");
0057 PMU_FORMAT_ATTR(pasid_mask, "config1:32-51");
0058
0059 static struct attribute *iommu_format_attrs[] = {
0060 &format_attr_csource.attr,
0061 &format_attr_devid.attr,
0062 &format_attr_pasid.attr,
0063 &format_attr_domid.attr,
0064 &format_attr_devid_mask.attr,
0065 &format_attr_pasid_mask.attr,
0066 &format_attr_domid_mask.attr,
0067 NULL,
0068 };
0069
0070 static struct attribute_group amd_iommu_format_group = {
0071 .name = "format",
0072 .attrs = iommu_format_attrs,
0073 };
0074
0075
0076
0077
0078 static struct attribute_group amd_iommu_events_group = {
0079 .name = "events",
0080 };
0081
0082 struct amd_iommu_event_desc {
0083 struct device_attribute attr;
0084 const char *event;
0085 };
0086
0087 static ssize_t _iommu_event_show(struct device *dev,
0088 struct device_attribute *attr, char *buf)
0089 {
0090 struct amd_iommu_event_desc *event =
0091 container_of(attr, struct amd_iommu_event_desc, attr);
0092 return sprintf(buf, "%s\n", event->event);
0093 }
0094
0095 #define AMD_IOMMU_EVENT_DESC(_name, _event) \
0096 { \
0097 .attr = __ATTR(_name, 0444, _iommu_event_show, NULL), \
0098 .event = _event, \
0099 }
0100
0101 static struct amd_iommu_event_desc amd_iommu_v2_event_descs[] = {
0102 AMD_IOMMU_EVENT_DESC(mem_pass_untrans, "csource=0x01"),
0103 AMD_IOMMU_EVENT_DESC(mem_pass_pretrans, "csource=0x02"),
0104 AMD_IOMMU_EVENT_DESC(mem_pass_excl, "csource=0x03"),
0105 AMD_IOMMU_EVENT_DESC(mem_target_abort, "csource=0x04"),
0106 AMD_IOMMU_EVENT_DESC(mem_trans_total, "csource=0x05"),
0107 AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_hit, "csource=0x06"),
0108 AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_mis, "csource=0x07"),
0109 AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_hit, "csource=0x08"),
0110 AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_mis, "csource=0x09"),
0111 AMD_IOMMU_EVENT_DESC(mem_dte_hit, "csource=0x0a"),
0112 AMD_IOMMU_EVENT_DESC(mem_dte_mis, "csource=0x0b"),
0113 AMD_IOMMU_EVENT_DESC(page_tbl_read_tot, "csource=0x0c"),
0114 AMD_IOMMU_EVENT_DESC(page_tbl_read_nst, "csource=0x0d"),
0115 AMD_IOMMU_EVENT_DESC(page_tbl_read_gst, "csource=0x0e"),
0116 AMD_IOMMU_EVENT_DESC(int_dte_hit, "csource=0x0f"),
0117 AMD_IOMMU_EVENT_DESC(int_dte_mis, "csource=0x10"),
0118 AMD_IOMMU_EVENT_DESC(cmd_processed, "csource=0x11"),
0119 AMD_IOMMU_EVENT_DESC(cmd_processed_inv, "csource=0x12"),
0120 AMD_IOMMU_EVENT_DESC(tlb_inv, "csource=0x13"),
0121 AMD_IOMMU_EVENT_DESC(ign_rd_wr_mmio_1ff8h, "csource=0x14"),
0122 AMD_IOMMU_EVENT_DESC(vapic_int_non_guest, "csource=0x15"),
0123 AMD_IOMMU_EVENT_DESC(vapic_int_guest, "csource=0x16"),
0124 AMD_IOMMU_EVENT_DESC(smi_recv, "csource=0x17"),
0125 AMD_IOMMU_EVENT_DESC(smi_blk, "csource=0x18"),
0126 { },
0127 };
0128
0129
0130
0131
0132 static cpumask_t iommu_cpumask;
0133
0134 static ssize_t _iommu_cpumask_show(struct device *dev,
0135 struct device_attribute *attr,
0136 char *buf)
0137 {
0138 return cpumap_print_to_pagebuf(true, buf, &iommu_cpumask);
0139 }
0140 static DEVICE_ATTR(cpumask, S_IRUGO, _iommu_cpumask_show, NULL);
0141
0142 static struct attribute *iommu_cpumask_attrs[] = {
0143 &dev_attr_cpumask.attr,
0144 NULL,
0145 };
0146
0147 static struct attribute_group amd_iommu_cpumask_group = {
0148 .attrs = iommu_cpumask_attrs,
0149 };
0150
0151
0152
0153 static int get_next_avail_iommu_bnk_cntr(struct perf_event *event)
0154 {
0155 struct perf_amd_iommu *piommu = container_of(event->pmu, struct perf_amd_iommu, pmu);
0156 int max_cntrs = piommu->max_counters;
0157 int max_banks = piommu->max_banks;
0158 u32 shift, bank, cntr;
0159 unsigned long flags;
0160 int retval;
0161
0162 raw_spin_lock_irqsave(&piommu->lock, flags);
0163
0164 for (bank = 0; bank < max_banks; bank++) {
0165 for (cntr = 0; cntr < max_cntrs; cntr++) {
0166 shift = bank + (bank*3) + cntr;
0167 if (piommu->cntr_assign_mask & BIT_ULL(shift)) {
0168 continue;
0169 } else {
0170 piommu->cntr_assign_mask |= BIT_ULL(shift);
0171 event->hw.iommu_bank = bank;
0172 event->hw.iommu_cntr = cntr;
0173 retval = 0;
0174 goto out;
0175 }
0176 }
0177 }
0178 retval = -ENOSPC;
0179 out:
0180 raw_spin_unlock_irqrestore(&piommu->lock, flags);
0181 return retval;
0182 }
0183
0184 static int clear_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu,
0185 u8 bank, u8 cntr)
0186 {
0187 unsigned long flags;
0188 int max_banks, max_cntrs;
0189 int shift = 0;
0190
0191 max_banks = perf_iommu->max_banks;
0192 max_cntrs = perf_iommu->max_counters;
0193
0194 if ((bank > max_banks) || (cntr > max_cntrs))
0195 return -EINVAL;
0196
0197 shift = bank + cntr + (bank*3);
0198
0199 raw_spin_lock_irqsave(&perf_iommu->lock, flags);
0200 perf_iommu->cntr_assign_mask &= ~(1ULL<<shift);
0201 raw_spin_unlock_irqrestore(&perf_iommu->lock, flags);
0202
0203 return 0;
0204 }
0205
0206 static int perf_iommu_event_init(struct perf_event *event)
0207 {
0208 struct hw_perf_event *hwc = &event->hw;
0209
0210
0211 if (event->attr.type != event->pmu->type)
0212 return -ENOENT;
0213
0214
0215
0216
0217
0218
0219 if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
0220 return -EINVAL;
0221
0222 if (event->cpu < 0)
0223 return -EINVAL;
0224
0225
0226 hwc->conf = event->attr.config;
0227 hwc->conf1 = event->attr.config1;
0228
0229 return 0;
0230 }
0231
0232 static inline struct amd_iommu *perf_event_2_iommu(struct perf_event *ev)
0233 {
0234 return (container_of(ev->pmu, struct perf_amd_iommu, pmu))->iommu;
0235 }
0236
0237 static void perf_iommu_enable_event(struct perf_event *ev)
0238 {
0239 struct amd_iommu *iommu = perf_event_2_iommu(ev);
0240 struct hw_perf_event *hwc = &ev->hw;
0241 u8 bank = hwc->iommu_bank;
0242 u8 cntr = hwc->iommu_cntr;
0243 u64 reg = 0ULL;
0244
0245 reg = GET_CSOURCE(hwc);
0246 amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_COUNTER_SRC_REG, ®);
0247
0248 reg = GET_DEVID_MASK(hwc);
0249 reg = GET_DEVID(hwc) | (reg << 32);
0250 if (reg)
0251 reg |= BIT(31);
0252 amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_DEVID_MATCH_REG, ®);
0253
0254 reg = GET_PASID_MASK(hwc);
0255 reg = GET_PASID(hwc) | (reg << 32);
0256 if (reg)
0257 reg |= BIT(31);
0258 amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_PASID_MATCH_REG, ®);
0259
0260 reg = GET_DOMID_MASK(hwc);
0261 reg = GET_DOMID(hwc) | (reg << 32);
0262 if (reg)
0263 reg |= BIT(31);
0264 amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_DOMID_MATCH_REG, ®);
0265 }
0266
0267 static void perf_iommu_disable_event(struct perf_event *event)
0268 {
0269 struct amd_iommu *iommu = perf_event_2_iommu(event);
0270 struct hw_perf_event *hwc = &event->hw;
0271 u64 reg = 0ULL;
0272
0273 amd_iommu_pc_set_reg(iommu, hwc->iommu_bank, hwc->iommu_cntr,
0274 IOMMU_PC_COUNTER_SRC_REG, ®);
0275 }
0276
0277 static void perf_iommu_start(struct perf_event *event, int flags)
0278 {
0279 struct hw_perf_event *hwc = &event->hw;
0280
0281 if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
0282 return;
0283
0284 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
0285 hwc->state = 0;
0286
0287
0288
0289
0290
0291
0292 perf_iommu_enable_event(event);
0293
0294 if (flags & PERF_EF_RELOAD) {
0295 u64 count = 0;
0296 struct amd_iommu *iommu = perf_event_2_iommu(event);
0297
0298
0299
0300
0301
0302 amd_iommu_pc_set_reg(iommu, hwc->iommu_bank, hwc->iommu_cntr,
0303 IOMMU_PC_COUNTER_REG, &count);
0304 }
0305
0306 perf_event_update_userpage(event);
0307 }
0308
0309 static void perf_iommu_read(struct perf_event *event)
0310 {
0311 u64 count;
0312 struct hw_perf_event *hwc = &event->hw;
0313 struct amd_iommu *iommu = perf_event_2_iommu(event);
0314
0315 if (amd_iommu_pc_get_reg(iommu, hwc->iommu_bank, hwc->iommu_cntr,
0316 IOMMU_PC_COUNTER_REG, &count))
0317 return;
0318
0319
0320 count &= GENMASK_ULL(47, 0);
0321
0322
0323
0324
0325
0326 local64_add(count, &event->count);
0327 }
0328
0329 static void perf_iommu_stop(struct perf_event *event, int flags)
0330 {
0331 struct hw_perf_event *hwc = &event->hw;
0332
0333 if (hwc->state & PERF_HES_UPTODATE)
0334 return;
0335
0336
0337
0338
0339
0340 perf_iommu_read(event);
0341 hwc->state |= PERF_HES_UPTODATE;
0342
0343 perf_iommu_disable_event(event);
0344 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
0345 hwc->state |= PERF_HES_STOPPED;
0346 }
0347
0348 static int perf_iommu_add(struct perf_event *event, int flags)
0349 {
0350 int retval;
0351
0352 event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
0353
0354
0355 retval = get_next_avail_iommu_bnk_cntr(event);
0356 if (retval)
0357 return retval;
0358
0359 if (flags & PERF_EF_START)
0360 perf_iommu_start(event, PERF_EF_RELOAD);
0361
0362 return 0;
0363 }
0364
0365 static void perf_iommu_del(struct perf_event *event, int flags)
0366 {
0367 struct hw_perf_event *hwc = &event->hw;
0368 struct perf_amd_iommu *perf_iommu =
0369 container_of(event->pmu, struct perf_amd_iommu, pmu);
0370
0371 perf_iommu_stop(event, PERF_EF_UPDATE);
0372
0373
0374 clear_avail_iommu_bnk_cntr(perf_iommu,
0375 hwc->iommu_bank, hwc->iommu_cntr);
0376
0377 perf_event_update_userpage(event);
0378 }
0379
0380 static __init int _init_events_attrs(void)
0381 {
0382 int i = 0, j;
0383 struct attribute **attrs;
0384
0385 while (amd_iommu_v2_event_descs[i].attr.attr.name)
0386 i++;
0387
0388 attrs = kcalloc(i + 1, sizeof(*attrs), GFP_KERNEL);
0389 if (!attrs)
0390 return -ENOMEM;
0391
0392 for (j = 0; j < i; j++)
0393 attrs[j] = &amd_iommu_v2_event_descs[j].attr.attr;
0394
0395 amd_iommu_events_group.attrs = attrs;
0396 return 0;
0397 }
0398
0399 static const struct attribute_group *amd_iommu_attr_groups[] = {
0400 &amd_iommu_format_group,
0401 &amd_iommu_cpumask_group,
0402 &amd_iommu_events_group,
0403 NULL,
0404 };
0405
0406 static const struct pmu iommu_pmu __initconst = {
0407 .event_init = perf_iommu_event_init,
0408 .add = perf_iommu_add,
0409 .del = perf_iommu_del,
0410 .start = perf_iommu_start,
0411 .stop = perf_iommu_stop,
0412 .read = perf_iommu_read,
0413 .task_ctx_nr = perf_invalid_context,
0414 .attr_groups = amd_iommu_attr_groups,
0415 .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
0416 };
0417
0418 static __init int init_one_iommu(unsigned int idx)
0419 {
0420 struct perf_amd_iommu *perf_iommu;
0421 int ret;
0422
0423 perf_iommu = kzalloc(sizeof(struct perf_amd_iommu), GFP_KERNEL);
0424 if (!perf_iommu)
0425 return -ENOMEM;
0426
0427 raw_spin_lock_init(&perf_iommu->lock);
0428
0429 perf_iommu->pmu = iommu_pmu;
0430 perf_iommu->iommu = get_amd_iommu(idx);
0431 perf_iommu->max_banks = amd_iommu_pc_get_max_banks(idx);
0432 perf_iommu->max_counters = amd_iommu_pc_get_max_counters(idx);
0433
0434 if (!perf_iommu->iommu ||
0435 !perf_iommu->max_banks ||
0436 !perf_iommu->max_counters) {
0437 kfree(perf_iommu);
0438 return -EINVAL;
0439 }
0440
0441 snprintf(perf_iommu->name, IOMMU_NAME_SIZE, "amd_iommu_%u", idx);
0442
0443 ret = perf_pmu_register(&perf_iommu->pmu, perf_iommu->name, -1);
0444 if (!ret) {
0445 pr_info("Detected AMD IOMMU #%d (%d banks, %d counters/bank).\n",
0446 idx, perf_iommu->max_banks, perf_iommu->max_counters);
0447 list_add_tail(&perf_iommu->list, &perf_amd_iommu_list);
0448 } else {
0449 pr_warn("Error initializing IOMMU %d.\n", idx);
0450 kfree(perf_iommu);
0451 }
0452 return ret;
0453 }
0454
0455 static __init int amd_iommu_pc_init(void)
0456 {
0457 unsigned int i, cnt = 0;
0458 int ret;
0459
0460
0461 if (!amd_iommu_pc_supported())
0462 return -ENODEV;
0463
0464 ret = _init_events_attrs();
0465 if (ret)
0466 return ret;
0467
0468
0469
0470
0471
0472
0473 for (i = 0; i < amd_iommu_get_num_iommus(); i++) {
0474 ret = init_one_iommu(i);
0475 if (!ret)
0476 cnt++;
0477 }
0478
0479 if (!cnt) {
0480 kfree(amd_iommu_events_group.attrs);
0481 return -ENODEV;
0482 }
0483
0484
0485 cpumask_set_cpu(0, &iommu_cpumask);
0486 return 0;
0487 }
0488
0489 device_initcall(amd_iommu_pc_init);