0001
0002
0003
0004 #include <linux/sched/task.h>
0005 #include <linux/io-64-nonatomic-lo-hi.h>
0006 #include "idxd.h"
0007 #include "perfmon.h"
0008
0009 static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr,
0010 char *buf);
0011
0012 static cpumask_t perfmon_dsa_cpu_mask;
0013 static bool cpuhp_set_up;
0014 static enum cpuhp_state cpuhp_slot;
0015
0016
0017
0018
0019
0020
0021 static DEVICE_ATTR_RO(cpumask);
0022
0023 static struct attribute *perfmon_cpumask_attrs[] = {
0024 &dev_attr_cpumask.attr,
0025 NULL,
0026 };
0027
0028 static struct attribute_group cpumask_attr_group = {
0029 .attrs = perfmon_cpumask_attrs,
0030 };
0031
0032
0033
0034
0035
0036 DEFINE_PERFMON_FORMAT_ATTR(event_category, "config:0-3");
0037 DEFINE_PERFMON_FORMAT_ATTR(event, "config:4-31");
0038
0039
0040
0041
0042
0043 DEFINE_PERFMON_FORMAT_ATTR(filter_wq, "config1:0-31");
0044 DEFINE_PERFMON_FORMAT_ATTR(filter_tc, "config1:32-39");
0045 DEFINE_PERFMON_FORMAT_ATTR(filter_pgsz, "config1:40-43");
0046 DEFINE_PERFMON_FORMAT_ATTR(filter_sz, "config1:44-51");
0047 DEFINE_PERFMON_FORMAT_ATTR(filter_eng, "config1:52-59");
0048
0049 #define PERFMON_FILTERS_START 2
0050 #define PERFMON_FILTERS_MAX 5
0051
0052 static struct attribute *perfmon_format_attrs[] = {
0053 &format_attr_idxd_event_category.attr,
0054 &format_attr_idxd_event.attr,
0055 &format_attr_idxd_filter_wq.attr,
0056 &format_attr_idxd_filter_tc.attr,
0057 &format_attr_idxd_filter_pgsz.attr,
0058 &format_attr_idxd_filter_sz.attr,
0059 &format_attr_idxd_filter_eng.attr,
0060 NULL,
0061 };
0062
0063 static struct attribute_group perfmon_format_attr_group = {
0064 .name = "format",
0065 .attrs = perfmon_format_attrs,
0066 };
0067
0068 static const struct attribute_group *perfmon_attr_groups[] = {
0069 &perfmon_format_attr_group,
0070 &cpumask_attr_group,
0071 NULL,
0072 };
0073
0074 static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr,
0075 char *buf)
0076 {
0077 return cpumap_print_to_pagebuf(true, buf, &perfmon_dsa_cpu_mask);
0078 }
0079
0080 static bool is_idxd_event(struct idxd_pmu *idxd_pmu, struct perf_event *event)
0081 {
0082 return &idxd_pmu->pmu == event->pmu;
0083 }
0084
0085 static int perfmon_collect_events(struct idxd_pmu *idxd_pmu,
0086 struct perf_event *leader,
0087 bool do_grp)
0088 {
0089 struct perf_event *event;
0090 int n, max_count;
0091
0092 max_count = idxd_pmu->n_counters;
0093 n = idxd_pmu->n_events;
0094
0095 if (n >= max_count)
0096 return -EINVAL;
0097
0098 if (is_idxd_event(idxd_pmu, leader)) {
0099 idxd_pmu->event_list[n] = leader;
0100 idxd_pmu->event_list[n]->hw.idx = n;
0101 n++;
0102 }
0103
0104 if (!do_grp)
0105 return n;
0106
0107 for_each_sibling_event(event, leader) {
0108 if (!is_idxd_event(idxd_pmu, event) ||
0109 event->state <= PERF_EVENT_STATE_OFF)
0110 continue;
0111
0112 if (n >= max_count)
0113 return -EINVAL;
0114
0115 idxd_pmu->event_list[n] = event;
0116 idxd_pmu->event_list[n]->hw.idx = n;
0117 n++;
0118 }
0119
0120 return n;
0121 }
0122
0123 static void perfmon_assign_hw_event(struct idxd_pmu *idxd_pmu,
0124 struct perf_event *event, int idx)
0125 {
0126 struct idxd_device *idxd = idxd_pmu->idxd;
0127 struct hw_perf_event *hwc = &event->hw;
0128
0129 hwc->idx = idx;
0130 hwc->config_base = ioread64(CNTRCFG_REG(idxd, idx));
0131 hwc->event_base = ioread64(CNTRCFG_REG(idxd, idx));
0132 }
0133
0134 static int perfmon_assign_event(struct idxd_pmu *idxd_pmu,
0135 struct perf_event *event)
0136 {
0137 int i;
0138
0139 for (i = 0; i < IDXD_PMU_EVENT_MAX; i++)
0140 if (!test_and_set_bit(i, idxd_pmu->used_mask))
0141 return i;
0142
0143 return -EINVAL;
0144 }
0145
0146
0147
0148
0149
0150
0151
0152
0153
0154 static int perfmon_validate_group(struct idxd_pmu *pmu,
0155 struct perf_event *event)
0156 {
0157 struct perf_event *leader = event->group_leader;
0158 struct idxd_pmu *fake_pmu;
0159 int i, ret = 0, n, idx;
0160
0161 fake_pmu = kzalloc(sizeof(*fake_pmu), GFP_KERNEL);
0162 if (!fake_pmu)
0163 return -ENOMEM;
0164
0165 fake_pmu->pmu.name = pmu->pmu.name;
0166 fake_pmu->n_counters = pmu->n_counters;
0167
0168 n = perfmon_collect_events(fake_pmu, leader, true);
0169 if (n < 0) {
0170 ret = n;
0171 goto out;
0172 }
0173
0174 fake_pmu->n_events = n;
0175 n = perfmon_collect_events(fake_pmu, event, false);
0176 if (n < 0) {
0177 ret = n;
0178 goto out;
0179 }
0180
0181 fake_pmu->n_events = n;
0182
0183 for (i = 0; i < n; i++) {
0184 event = fake_pmu->event_list[i];
0185
0186 idx = perfmon_assign_event(fake_pmu, event);
0187 if (idx < 0) {
0188 ret = idx;
0189 goto out;
0190 }
0191 }
0192 out:
0193 kfree(fake_pmu);
0194
0195 return ret;
0196 }
0197
0198 static int perfmon_pmu_event_init(struct perf_event *event)
0199 {
0200 struct idxd_device *idxd;
0201 int ret = 0;
0202
0203 idxd = event_to_idxd(event);
0204 event->hw.idx = -1;
0205
0206 if (event->attr.type != event->pmu->type)
0207 return -ENOENT;
0208
0209
0210 if (event->attr.sample_period)
0211 return -EINVAL;
0212
0213 if (event->cpu < 0)
0214 return -EINVAL;
0215
0216 if (event->pmu != &idxd->idxd_pmu->pmu)
0217 return -EINVAL;
0218
0219 event->hw.event_base = ioread64(PERFMON_TABLE_OFFSET(idxd));
0220 event->cpu = idxd->idxd_pmu->cpu;
0221 event->hw.config = event->attr.config;
0222
0223 if (event->group_leader != event)
0224
0225 ret = perfmon_validate_group(idxd->idxd_pmu, event);
0226
0227 return ret;
0228 }
0229
0230 static inline u64 perfmon_pmu_read_counter(struct perf_event *event)
0231 {
0232 struct hw_perf_event *hwc = &event->hw;
0233 struct idxd_device *idxd;
0234 int cntr = hwc->idx;
0235
0236 idxd = event_to_idxd(event);
0237
0238 return ioread64(CNTRDATA_REG(idxd, cntr));
0239 }
0240
0241 static void perfmon_pmu_event_update(struct perf_event *event)
0242 {
0243 struct idxd_device *idxd = event_to_idxd(event);
0244 u64 prev_raw_count, new_raw_count, delta, p, n;
0245 int shift = 64 - idxd->idxd_pmu->counter_width;
0246 struct hw_perf_event *hwc = &event->hw;
0247
0248 do {
0249 prev_raw_count = local64_read(&hwc->prev_count);
0250 new_raw_count = perfmon_pmu_read_counter(event);
0251 } while (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
0252 new_raw_count) != prev_raw_count);
0253
0254 n = (new_raw_count << shift);
0255 p = (prev_raw_count << shift);
0256
0257 delta = ((n - p) >> shift);
0258
0259 local64_add(delta, &event->count);
0260 }
0261
0262 void perfmon_counter_overflow(struct idxd_device *idxd)
0263 {
0264 int i, n_counters, max_loop = OVERFLOW_SIZE;
0265 struct perf_event *event;
0266 unsigned long ovfstatus;
0267
0268 n_counters = min(idxd->idxd_pmu->n_counters, OVERFLOW_SIZE);
0269
0270 ovfstatus = ioread32(OVFSTATUS_REG(idxd));
0271
0272
0273
0274
0275
0276
0277
0278
0279
0280 while (ovfstatus && max_loop--) {
0281
0282 for_each_set_bit(i, &ovfstatus, n_counters) {
0283 unsigned long ovfstatus_clear = 0;
0284
0285
0286 event = idxd->idxd_pmu->event_list[i];
0287 perfmon_pmu_event_update(event);
0288
0289 set_bit(i, &ovfstatus_clear);
0290 iowrite32(ovfstatus_clear, OVFSTATUS_REG(idxd));
0291 }
0292
0293 ovfstatus = ioread32(OVFSTATUS_REG(idxd));
0294 }
0295
0296
0297
0298
0299
0300 WARN_ON_ONCE(ovfstatus);
0301 }
0302
0303 static inline void perfmon_reset_config(struct idxd_device *idxd)
0304 {
0305 iowrite32(CONFIG_RESET, PERFRST_REG(idxd));
0306 iowrite32(0, OVFSTATUS_REG(idxd));
0307 iowrite32(0, PERFFRZ_REG(idxd));
0308 }
0309
0310 static inline void perfmon_reset_counters(struct idxd_device *idxd)
0311 {
0312 iowrite32(CNTR_RESET, PERFRST_REG(idxd));
0313 }
0314
0315 static inline void perfmon_reset(struct idxd_device *idxd)
0316 {
0317 perfmon_reset_config(idxd);
0318 perfmon_reset_counters(idxd);
0319 }
0320
0321 static void perfmon_pmu_event_start(struct perf_event *event, int mode)
0322 {
0323 u32 flt_wq, flt_tc, flt_pg_sz, flt_xfer_sz, flt_eng = 0;
0324 u64 cntr_cfg, cntrdata, event_enc, event_cat = 0;
0325 struct hw_perf_event *hwc = &event->hw;
0326 union filter_cfg flt_cfg;
0327 union event_cfg event_cfg;
0328 struct idxd_device *idxd;
0329 int cntr;
0330
0331 idxd = event_to_idxd(event);
0332
0333 event->hw.idx = hwc->idx;
0334 cntr = hwc->idx;
0335
0336
0337 event_cfg.val = event->attr.config;
0338 flt_cfg.val = event->attr.config1;
0339 event_cat = event_cfg.event_cat;
0340 event_enc = event_cfg.event_enc;
0341
0342
0343 flt_wq = flt_cfg.wq;
0344 flt_tc = flt_cfg.tc;
0345 flt_pg_sz = flt_cfg.pg_sz;
0346 flt_xfer_sz = flt_cfg.xfer_sz;
0347 flt_eng = flt_cfg.eng;
0348
0349 if (flt_wq && test_bit(FLT_WQ, &idxd->idxd_pmu->supported_filters))
0350 iowrite32(flt_wq, FLTCFG_REG(idxd, cntr, FLT_WQ));
0351 if (flt_tc && test_bit(FLT_TC, &idxd->idxd_pmu->supported_filters))
0352 iowrite32(flt_tc, FLTCFG_REG(idxd, cntr, FLT_TC));
0353 if (flt_pg_sz && test_bit(FLT_PG_SZ, &idxd->idxd_pmu->supported_filters))
0354 iowrite32(flt_pg_sz, FLTCFG_REG(idxd, cntr, FLT_PG_SZ));
0355 if (flt_xfer_sz && test_bit(FLT_XFER_SZ, &idxd->idxd_pmu->supported_filters))
0356 iowrite32(flt_xfer_sz, FLTCFG_REG(idxd, cntr, FLT_XFER_SZ));
0357 if (flt_eng && test_bit(FLT_ENG, &idxd->idxd_pmu->supported_filters))
0358 iowrite32(flt_eng, FLTCFG_REG(idxd, cntr, FLT_ENG));
0359
0360
0361 cntrdata = ioread64(CNTRDATA_REG(idxd, cntr));
0362 local64_set(&event->hw.prev_count, cntrdata);
0363
0364
0365 cntr_cfg = event_cat << CNTRCFG_CATEGORY_SHIFT;
0366 cntr_cfg |= event_enc << CNTRCFG_EVENT_SHIFT;
0367
0368 cntr_cfg |= (CNTRCFG_IRQ_OVERFLOW | CNTRCFG_ENABLE);
0369
0370 iowrite64(cntr_cfg, CNTRCFG_REG(idxd, cntr));
0371 }
0372
0373 static void perfmon_pmu_event_stop(struct perf_event *event, int mode)
0374 {
0375 struct hw_perf_event *hwc = &event->hw;
0376 struct idxd_device *idxd;
0377 int i, cntr = hwc->idx;
0378 u64 cntr_cfg;
0379
0380 idxd = event_to_idxd(event);
0381
0382
0383 for (i = 0; i < idxd->idxd_pmu->n_events; i++) {
0384 if (event != idxd->idxd_pmu->event_list[i])
0385 continue;
0386
0387 for (++i; i < idxd->idxd_pmu->n_events; i++)
0388 idxd->idxd_pmu->event_list[i - 1] = idxd->idxd_pmu->event_list[i];
0389 --idxd->idxd_pmu->n_events;
0390 break;
0391 }
0392
0393 cntr_cfg = ioread64(CNTRCFG_REG(idxd, cntr));
0394 cntr_cfg &= ~CNTRCFG_ENABLE;
0395 iowrite64(cntr_cfg, CNTRCFG_REG(idxd, cntr));
0396
0397 if (mode == PERF_EF_UPDATE)
0398 perfmon_pmu_event_update(event);
0399
0400 event->hw.idx = -1;
0401 clear_bit(cntr, idxd->idxd_pmu->used_mask);
0402 }
0403
0404 static void perfmon_pmu_event_del(struct perf_event *event, int mode)
0405 {
0406 perfmon_pmu_event_stop(event, PERF_EF_UPDATE);
0407 }
0408
0409 static int perfmon_pmu_event_add(struct perf_event *event, int flags)
0410 {
0411 struct idxd_device *idxd = event_to_idxd(event);
0412 struct idxd_pmu *idxd_pmu = idxd->idxd_pmu;
0413 struct hw_perf_event *hwc = &event->hw;
0414 int idx, n;
0415
0416 n = perfmon_collect_events(idxd_pmu, event, false);
0417 if (n < 0)
0418 return n;
0419
0420 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
0421 if (!(flags & PERF_EF_START))
0422 hwc->state |= PERF_HES_ARCH;
0423
0424 idx = perfmon_assign_event(idxd_pmu, event);
0425 if (idx < 0)
0426 return idx;
0427
0428 perfmon_assign_hw_event(idxd_pmu, event, idx);
0429
0430 if (flags & PERF_EF_START)
0431 perfmon_pmu_event_start(event, 0);
0432
0433 idxd_pmu->n_events = n;
0434
0435 return 0;
0436 }
0437
0438 static void enable_perfmon_pmu(struct idxd_device *idxd)
0439 {
0440 iowrite32(COUNTER_UNFREEZE, PERFFRZ_REG(idxd));
0441 }
0442
0443 static void disable_perfmon_pmu(struct idxd_device *idxd)
0444 {
0445 iowrite32(COUNTER_FREEZE, PERFFRZ_REG(idxd));
0446 }
0447
0448 static void perfmon_pmu_enable(struct pmu *pmu)
0449 {
0450 struct idxd_device *idxd = pmu_to_idxd(pmu);
0451
0452 enable_perfmon_pmu(idxd);
0453 }
0454
0455 static void perfmon_pmu_disable(struct pmu *pmu)
0456 {
0457 struct idxd_device *idxd = pmu_to_idxd(pmu);
0458
0459 disable_perfmon_pmu(idxd);
0460 }
0461
0462 static void skip_filter(int i)
0463 {
0464 int j;
0465
0466 for (j = i; j < PERFMON_FILTERS_MAX; j++)
0467 perfmon_format_attrs[PERFMON_FILTERS_START + j] =
0468 perfmon_format_attrs[PERFMON_FILTERS_START + j + 1];
0469 }
0470
0471 static void idxd_pmu_init(struct idxd_pmu *idxd_pmu)
0472 {
0473 int i;
0474
0475 for (i = 0 ; i < PERFMON_FILTERS_MAX; i++) {
0476 if (!test_bit(i, &idxd_pmu->supported_filters))
0477 skip_filter(i);
0478 }
0479
0480 idxd_pmu->pmu.name = idxd_pmu->name;
0481 idxd_pmu->pmu.attr_groups = perfmon_attr_groups;
0482 idxd_pmu->pmu.task_ctx_nr = perf_invalid_context;
0483 idxd_pmu->pmu.event_init = perfmon_pmu_event_init;
0484 idxd_pmu->pmu.pmu_enable = perfmon_pmu_enable,
0485 idxd_pmu->pmu.pmu_disable = perfmon_pmu_disable,
0486 idxd_pmu->pmu.add = perfmon_pmu_event_add;
0487 idxd_pmu->pmu.del = perfmon_pmu_event_del;
0488 idxd_pmu->pmu.start = perfmon_pmu_event_start;
0489 idxd_pmu->pmu.stop = perfmon_pmu_event_stop;
0490 idxd_pmu->pmu.read = perfmon_pmu_event_update;
0491 idxd_pmu->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE;
0492 idxd_pmu->pmu.module = THIS_MODULE;
0493 }
0494
0495 void perfmon_pmu_remove(struct idxd_device *idxd)
0496 {
0497 if (!idxd->idxd_pmu)
0498 return;
0499
0500 cpuhp_state_remove_instance(cpuhp_slot, &idxd->idxd_pmu->cpuhp_node);
0501 perf_pmu_unregister(&idxd->idxd_pmu->pmu);
0502 kfree(idxd->idxd_pmu);
0503 idxd->idxd_pmu = NULL;
0504 }
0505
0506 static int perf_event_cpu_online(unsigned int cpu, struct hlist_node *node)
0507 {
0508 struct idxd_pmu *idxd_pmu;
0509
0510 idxd_pmu = hlist_entry_safe(node, typeof(*idxd_pmu), cpuhp_node);
0511
0512
0513 if (cpumask_empty(&perfmon_dsa_cpu_mask)) {
0514 cpumask_set_cpu(cpu, &perfmon_dsa_cpu_mask);
0515 idxd_pmu->cpu = cpu;
0516 }
0517
0518 return 0;
0519 }
0520
0521 static int perf_event_cpu_offline(unsigned int cpu, struct hlist_node *node)
0522 {
0523 struct idxd_pmu *idxd_pmu;
0524 unsigned int target;
0525
0526 idxd_pmu = hlist_entry_safe(node, typeof(*idxd_pmu), cpuhp_node);
0527
0528 if (!cpumask_test_and_clear_cpu(cpu, &perfmon_dsa_cpu_mask))
0529 return 0;
0530
0531 target = cpumask_any_but(cpu_online_mask, cpu);
0532
0533
0534 if (target < nr_cpu_ids)
0535 cpumask_set_cpu(target, &perfmon_dsa_cpu_mask);
0536 else
0537 target = -1;
0538
0539 perf_pmu_migrate_context(&idxd_pmu->pmu, cpu, target);
0540
0541 return 0;
0542 }
0543
0544 int perfmon_pmu_init(struct idxd_device *idxd)
0545 {
0546 union idxd_perfcap perfcap;
0547 struct idxd_pmu *idxd_pmu;
0548 int rc = -ENODEV;
0549
0550
0551
0552
0553 if (!cpuhp_set_up)
0554 return -ENODEV;
0555
0556
0557
0558
0559
0560 if (idxd->perfmon_offset == 0)
0561 return -ENODEV;
0562
0563 idxd_pmu = kzalloc(sizeof(*idxd_pmu), GFP_KERNEL);
0564 if (!idxd_pmu)
0565 return -ENOMEM;
0566
0567 idxd_pmu->idxd = idxd;
0568 idxd->idxd_pmu = idxd_pmu;
0569
0570 if (idxd->data->type == IDXD_TYPE_DSA) {
0571 rc = sprintf(idxd_pmu->name, "dsa%d", idxd->id);
0572 if (rc < 0)
0573 goto free;
0574 } else if (idxd->data->type == IDXD_TYPE_IAX) {
0575 rc = sprintf(idxd_pmu->name, "iax%d", idxd->id);
0576 if (rc < 0)
0577 goto free;
0578 } else {
0579 goto free;
0580 }
0581
0582 perfmon_reset(idxd);
0583
0584 perfcap.bits = ioread64(PERFCAP_REG(idxd));
0585
0586
0587
0588
0589
0590
0591 if (perfcap.num_perf_counter == 0)
0592 goto free;
0593
0594
0595 if (perfcap.counter_width == 0)
0596 goto free;
0597
0598
0599 if (!perfcap.overflow_interrupt || !perfcap.counter_freeze)
0600 goto free;
0601
0602
0603 if (perfcap.num_event_category == 0)
0604 goto free;
0605
0606
0607
0608
0609 if (perfcap.cap_per_counter)
0610 goto free;
0611
0612 idxd_pmu->n_event_categories = perfcap.num_event_category;
0613 idxd_pmu->supported_event_categories = perfcap.global_event_category;
0614 idxd_pmu->per_counter_caps_supported = perfcap.cap_per_counter;
0615
0616
0617 idxd_pmu->supported_filters = perfcap.filter;
0618 if (perfcap.filter)
0619 idxd_pmu->n_filters = hweight8(perfcap.filter);
0620
0621
0622 idxd_pmu->n_counters = perfcap.num_perf_counter;
0623 idxd_pmu->counter_width = perfcap.counter_width;
0624
0625 idxd_pmu_init(idxd_pmu);
0626
0627 rc = perf_pmu_register(&idxd_pmu->pmu, idxd_pmu->name, -1);
0628 if (rc)
0629 goto free;
0630
0631 rc = cpuhp_state_add_instance(cpuhp_slot, &idxd_pmu->cpuhp_node);
0632 if (rc) {
0633 perf_pmu_unregister(&idxd->idxd_pmu->pmu);
0634 goto free;
0635 }
0636 out:
0637 return rc;
0638 free:
0639 kfree(idxd_pmu);
0640 idxd->idxd_pmu = NULL;
0641
0642 goto out;
0643 }
0644
0645 void __init perfmon_init(void)
0646 {
0647 int rc = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
0648 "driver/dma/idxd/perf:online",
0649 perf_event_cpu_online,
0650 perf_event_cpu_offline);
0651 if (WARN_ON(rc < 0))
0652 return;
0653
0654 cpuhp_slot = rc;
0655 cpuhp_set_up = true;
0656 }
0657
0658 void __exit perfmon_exit(void)
0659 {
0660 if (cpuhp_set_up)
0661 cpuhp_remove_multi_state(cpuhp_slot);
0662 }