0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 #include <linux/module.h>
0019 #include <linux/slab.h>
0020 #include <asm/cpu_device_id.h>
0021 #include "internal.h"
0022
0023 struct rmid_entry {
0024 u32 rmid;
0025 int busy;
0026 struct list_head list;
0027 };
0028
0029
0030
0031
0032
0033
0034 static LIST_HEAD(rmid_free_lru);
0035
0036
0037
0038
0039
0040
0041
0042
0043 static unsigned int rmid_limbo_count;
0044
0045
0046
0047
0048 static struct rmid_entry *rmid_ptrs;
0049
0050
0051
0052
0053
0054 bool rdt_mon_capable;
0055
0056
0057
0058
0059 unsigned int rdt_mon_features;
0060
0061
0062
0063
0064
0065 unsigned int resctrl_cqm_threshold;
0066
0067 #define CF(cf) ((unsigned long)(1048576 * (cf) + 0.5))
0068
0069
0070
0071
0072
0073
0074
0075
0076
0077
0078
0079
0080
0081
0082
0083
0084 static const struct mbm_correction_factor_table {
0085 u32 rmidthreshold;
0086 u64 cf;
0087 } mbm_cf_table[] __initconst = {
0088 {7, CF(1.000000)},
0089 {15, CF(1.000000)},
0090 {15, CF(0.969650)},
0091 {31, CF(1.000000)},
0092 {31, CF(1.066667)},
0093 {31, CF(0.969650)},
0094 {47, CF(1.142857)},
0095 {63, CF(1.000000)},
0096 {63, CF(1.185115)},
0097 {63, CF(1.066553)},
0098 {79, CF(1.454545)},
0099 {95, CF(1.000000)},
0100 {95, CF(1.230769)},
0101 {95, CF(1.142857)},
0102 {95, CF(1.066667)},
0103 {127, CF(1.000000)},
0104 {127, CF(1.254863)},
0105 {127, CF(1.185255)},
0106 {151, CF(1.000000)},
0107 {127, CF(1.066667)},
0108 {167, CF(1.000000)},
0109 {159, CF(1.454334)},
0110 {183, CF(1.000000)},
0111 {127, CF(0.969744)},
0112 {191, CF(1.280246)},
0113 {191, CF(1.230921)},
0114 {215, CF(1.000000)},
0115 {191, CF(1.143118)},
0116 };
0117
0118 static u32 mbm_cf_rmidthreshold __read_mostly = UINT_MAX;
0119 static u64 mbm_cf __read_mostly;
0120
0121 static inline u64 get_corrected_mbm_count(u32 rmid, unsigned long val)
0122 {
0123
0124 if (rmid > mbm_cf_rmidthreshold)
0125 val = (val * mbm_cf) >> 20;
0126
0127 return val;
0128 }
0129
0130 static inline struct rmid_entry *__rmid_entry(u32 rmid)
0131 {
0132 struct rmid_entry *entry;
0133
0134 entry = &rmid_ptrs[rmid];
0135 WARN_ON(entry->rmid != rmid);
0136
0137 return entry;
0138 }
0139
0140 static u64 __rmid_read(u32 rmid, u32 eventid)
0141 {
0142 u64 val;
0143
0144
0145
0146
0147
0148
0149
0150
0151
0152 wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid);
0153 rdmsrl(MSR_IA32_QM_CTR, val);
0154
0155 return val;
0156 }
0157
0158 static bool rmid_dirty(struct rmid_entry *entry)
0159 {
0160 u64 val = __rmid_read(entry->rmid, QOS_L3_OCCUP_EVENT_ID);
0161
0162 return val >= resctrl_cqm_threshold;
0163 }
0164
0165
0166
0167
0168
0169
0170
0171 void __check_limbo(struct rdt_domain *d, bool force_free)
0172 {
0173 struct rmid_entry *entry;
0174 struct rdt_resource *r;
0175 u32 crmid = 1, nrmid;
0176
0177 r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
0178
0179
0180
0181
0182
0183
0184
0185 for (;;) {
0186 nrmid = find_next_bit(d->rmid_busy_llc, r->num_rmid, crmid);
0187 if (nrmid >= r->num_rmid)
0188 break;
0189
0190 entry = __rmid_entry(nrmid);
0191 if (force_free || !rmid_dirty(entry)) {
0192 clear_bit(entry->rmid, d->rmid_busy_llc);
0193 if (!--entry->busy) {
0194 rmid_limbo_count--;
0195 list_add_tail(&entry->list, &rmid_free_lru);
0196 }
0197 }
0198 crmid = nrmid + 1;
0199 }
0200 }
0201
0202 bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d)
0203 {
0204 return find_first_bit(d->rmid_busy_llc, r->num_rmid) != r->num_rmid;
0205 }
0206
0207
0208
0209
0210
0211
0212 int alloc_rmid(void)
0213 {
0214 struct rmid_entry *entry;
0215
0216 lockdep_assert_held(&rdtgroup_mutex);
0217
0218 if (list_empty(&rmid_free_lru))
0219 return rmid_limbo_count ? -EBUSY : -ENOSPC;
0220
0221 entry = list_first_entry(&rmid_free_lru,
0222 struct rmid_entry, list);
0223 list_del(&entry->list);
0224
0225 return entry->rmid;
0226 }
0227
0228 static void add_rmid_to_limbo(struct rmid_entry *entry)
0229 {
0230 struct rdt_resource *r;
0231 struct rdt_domain *d;
0232 int cpu;
0233 u64 val;
0234
0235 r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
0236
0237 entry->busy = 0;
0238 cpu = get_cpu();
0239 list_for_each_entry(d, &r->domains, list) {
0240 if (cpumask_test_cpu(cpu, &d->cpu_mask)) {
0241 val = __rmid_read(entry->rmid, QOS_L3_OCCUP_EVENT_ID);
0242 if (val <= resctrl_cqm_threshold)
0243 continue;
0244 }
0245
0246
0247
0248
0249
0250 if (!has_busy_rmid(r, d))
0251 cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL);
0252 set_bit(entry->rmid, d->rmid_busy_llc);
0253 entry->busy++;
0254 }
0255 put_cpu();
0256
0257 if (entry->busy)
0258 rmid_limbo_count++;
0259 else
0260 list_add_tail(&entry->list, &rmid_free_lru);
0261 }
0262
0263 void free_rmid(u32 rmid)
0264 {
0265 struct rmid_entry *entry;
0266
0267 if (!rmid)
0268 return;
0269
0270 lockdep_assert_held(&rdtgroup_mutex);
0271
0272 entry = __rmid_entry(rmid);
0273
0274 if (is_llc_occupancy_enabled())
0275 add_rmid_to_limbo(entry);
0276 else
0277 list_add_tail(&entry->list, &rmid_free_lru);
0278 }
0279
0280 static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width)
0281 {
0282 u64 shift = 64 - width, chunks;
0283
0284 chunks = (cur_msr << shift) - (prev_msr << shift);
0285 return chunks >> shift;
0286 }
0287
0288 static u64 __mon_event_count(u32 rmid, struct rmid_read *rr)
0289 {
0290 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(rr->r);
0291 struct mbm_state *m;
0292 u64 chunks, tval;
0293
0294 tval = __rmid_read(rmid, rr->evtid);
0295 if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) {
0296 return tval;
0297 }
0298 switch (rr->evtid) {
0299 case QOS_L3_OCCUP_EVENT_ID:
0300 rr->val += tval;
0301 return 0;
0302 case QOS_L3_MBM_TOTAL_EVENT_ID:
0303 m = &rr->d->mbm_total[rmid];
0304 break;
0305 case QOS_L3_MBM_LOCAL_EVENT_ID:
0306 m = &rr->d->mbm_local[rmid];
0307 break;
0308 default:
0309
0310
0311
0312
0313 return RMID_VAL_ERROR;
0314 }
0315
0316 if (rr->first) {
0317 memset(m, 0, sizeof(struct mbm_state));
0318 m->prev_bw_msr = m->prev_msr = tval;
0319 return 0;
0320 }
0321
0322 chunks = mbm_overflow_count(m->prev_msr, tval, hw_res->mbm_width);
0323 m->chunks += chunks;
0324 m->prev_msr = tval;
0325
0326 rr->val += get_corrected_mbm_count(rmid, m->chunks);
0327
0328 return 0;
0329 }
0330
0331
0332
0333
0334
0335 static void mbm_bw_count(u32 rmid, struct rmid_read *rr)
0336 {
0337 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(rr->r);
0338 struct mbm_state *m = &rr->d->mbm_local[rmid];
0339 u64 tval, cur_bw, chunks;
0340
0341 tval = __rmid_read(rmid, rr->evtid);
0342 if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
0343 return;
0344
0345 chunks = mbm_overflow_count(m->prev_bw_msr, tval, hw_res->mbm_width);
0346 cur_bw = (get_corrected_mbm_count(rmid, chunks) * hw_res->mon_scale) >> 20;
0347
0348 if (m->delta_comp)
0349 m->delta_bw = abs(cur_bw - m->prev_bw);
0350 m->delta_comp = false;
0351 m->prev_bw = cur_bw;
0352 m->prev_bw_msr = tval;
0353 }
0354
0355
0356
0357
0358
0359 void mon_event_count(void *info)
0360 {
0361 struct rdtgroup *rdtgrp, *entry;
0362 struct rmid_read *rr = info;
0363 struct list_head *head;
0364 u64 ret_val;
0365
0366 rdtgrp = rr->rgrp;
0367
0368 ret_val = __mon_event_count(rdtgrp->mon.rmid, rr);
0369
0370
0371
0372
0373
0374
0375 head = &rdtgrp->mon.crdtgrp_list;
0376
0377 if (rdtgrp->type == RDTCTRL_GROUP) {
0378 list_for_each_entry(entry, head, mon.crdtgrp_list) {
0379 if (__mon_event_count(entry->mon.rmid, rr) == 0)
0380 ret_val = 0;
0381 }
0382 }
0383
0384
0385 if (ret_val)
0386 rr->val = ret_val;
0387 }
0388
0389
0390
0391
0392
0393
0394
0395
0396
0397
0398
0399
0400
0401
0402
0403
0404
0405
0406
0407
0408
0409
0410
0411
0412
0413
0414
0415
0416
0417
0418
0419
0420
0421 static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
0422 {
0423 u32 closid, rmid, cur_msr, cur_msr_val, new_msr_val;
0424 struct mbm_state *pmbm_data, *cmbm_data;
0425 struct rdt_hw_resource *hw_r_mba;
0426 struct rdt_hw_domain *hw_dom_mba;
0427 u32 cur_bw, delta_bw, user_bw;
0428 struct rdt_resource *r_mba;
0429 struct rdt_domain *dom_mba;
0430 struct list_head *head;
0431 struct rdtgroup *entry;
0432
0433 if (!is_mbm_local_enabled())
0434 return;
0435
0436 hw_r_mba = &rdt_resources_all[RDT_RESOURCE_MBA];
0437 r_mba = &hw_r_mba->r_resctrl;
0438 closid = rgrp->closid;
0439 rmid = rgrp->mon.rmid;
0440 pmbm_data = &dom_mbm->mbm_local[rmid];
0441
0442 dom_mba = get_domain_from_cpu(smp_processor_id(), r_mba);
0443 if (!dom_mba) {
0444 pr_warn_once("Failure to get domain for MBA update\n");
0445 return;
0446 }
0447 hw_dom_mba = resctrl_to_arch_dom(dom_mba);
0448
0449 cur_bw = pmbm_data->prev_bw;
0450 user_bw = resctrl_arch_get_config(r_mba, dom_mba, closid, CDP_NONE);
0451 delta_bw = pmbm_data->delta_bw;
0452
0453
0454
0455
0456 cur_msr_val = hw_dom_mba->ctrl_val[closid];
0457
0458
0459
0460
0461 head = &rgrp->mon.crdtgrp_list;
0462 list_for_each_entry(entry, head, mon.crdtgrp_list) {
0463 cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid];
0464 cur_bw += cmbm_data->prev_bw;
0465 delta_bw += cmbm_data->delta_bw;
0466 }
0467
0468
0469
0470
0471
0472
0473
0474
0475
0476
0477
0478
0479
0480
0481
0482 if (cur_msr_val > r_mba->membw.min_bw && user_bw < cur_bw) {
0483 new_msr_val = cur_msr_val - r_mba->membw.bw_gran;
0484 } else if (cur_msr_val < MAX_MBA_BW &&
0485 (user_bw > (cur_bw + delta_bw))) {
0486 new_msr_val = cur_msr_val + r_mba->membw.bw_gran;
0487 } else {
0488 return;
0489 }
0490
0491 cur_msr = hw_r_mba->msr_base + closid;
0492 wrmsrl(cur_msr, delay_bw_map(new_msr_val, r_mba));
0493 hw_dom_mba->ctrl_val[closid] = new_msr_val;
0494
0495
0496
0497
0498
0499
0500
0501
0502
0503
0504
0505 pmbm_data->delta_comp = true;
0506 list_for_each_entry(entry, head, mon.crdtgrp_list) {
0507 cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid];
0508 cmbm_data->delta_comp = true;
0509 }
0510 }
0511
0512 static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, int rmid)
0513 {
0514 struct rmid_read rr;
0515
0516 rr.first = false;
0517 rr.r = r;
0518 rr.d = d;
0519
0520
0521
0522
0523
0524 if (is_mbm_total_enabled()) {
0525 rr.evtid = QOS_L3_MBM_TOTAL_EVENT_ID;
0526 __mon_event_count(rmid, &rr);
0527 }
0528 if (is_mbm_local_enabled()) {
0529 rr.evtid = QOS_L3_MBM_LOCAL_EVENT_ID;
0530 __mon_event_count(rmid, &rr);
0531
0532
0533
0534
0535
0536
0537 if (is_mba_sc(NULL))
0538 mbm_bw_count(rmid, &rr);
0539 }
0540 }
0541
0542
0543
0544
0545
0546 void cqm_handle_limbo(struct work_struct *work)
0547 {
0548 unsigned long delay = msecs_to_jiffies(CQM_LIMBOCHECK_INTERVAL);
0549 int cpu = smp_processor_id();
0550 struct rdt_resource *r;
0551 struct rdt_domain *d;
0552
0553 mutex_lock(&rdtgroup_mutex);
0554
0555 r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
0556 d = container_of(work, struct rdt_domain, cqm_limbo.work);
0557
0558 __check_limbo(d, false);
0559
0560 if (has_busy_rmid(r, d))
0561 schedule_delayed_work_on(cpu, &d->cqm_limbo, delay);
0562
0563 mutex_unlock(&rdtgroup_mutex);
0564 }
0565
0566 void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms)
0567 {
0568 unsigned long delay = msecs_to_jiffies(delay_ms);
0569 int cpu;
0570
0571 cpu = cpumask_any(&dom->cpu_mask);
0572 dom->cqm_work_cpu = cpu;
0573
0574 schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay);
0575 }
0576
0577 void mbm_handle_overflow(struct work_struct *work)
0578 {
0579 unsigned long delay = msecs_to_jiffies(MBM_OVERFLOW_INTERVAL);
0580 struct rdtgroup *prgrp, *crgrp;
0581 int cpu = smp_processor_id();
0582 struct list_head *head;
0583 struct rdt_resource *r;
0584 struct rdt_domain *d;
0585
0586 mutex_lock(&rdtgroup_mutex);
0587
0588 if (!static_branch_likely(&rdt_mon_enable_key))
0589 goto out_unlock;
0590
0591 r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
0592 d = container_of(work, struct rdt_domain, mbm_over.work);
0593
0594 list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
0595 mbm_update(r, d, prgrp->mon.rmid);
0596
0597 head = &prgrp->mon.crdtgrp_list;
0598 list_for_each_entry(crgrp, head, mon.crdtgrp_list)
0599 mbm_update(r, d, crgrp->mon.rmid);
0600
0601 if (is_mba_sc(NULL))
0602 update_mba_bw(prgrp, d);
0603 }
0604
0605 schedule_delayed_work_on(cpu, &d->mbm_over, delay);
0606
0607 out_unlock:
0608 mutex_unlock(&rdtgroup_mutex);
0609 }
0610
0611 void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms)
0612 {
0613 unsigned long delay = msecs_to_jiffies(delay_ms);
0614 int cpu;
0615
0616 if (!static_branch_likely(&rdt_mon_enable_key))
0617 return;
0618 cpu = cpumask_any(&dom->cpu_mask);
0619 dom->mbm_work_cpu = cpu;
0620 schedule_delayed_work_on(cpu, &dom->mbm_over, delay);
0621 }
0622
0623 static int dom_data_init(struct rdt_resource *r)
0624 {
0625 struct rmid_entry *entry = NULL;
0626 int i, nr_rmids;
0627
0628 nr_rmids = r->num_rmid;
0629 rmid_ptrs = kcalloc(nr_rmids, sizeof(struct rmid_entry), GFP_KERNEL);
0630 if (!rmid_ptrs)
0631 return -ENOMEM;
0632
0633 for (i = 0; i < nr_rmids; i++) {
0634 entry = &rmid_ptrs[i];
0635 INIT_LIST_HEAD(&entry->list);
0636
0637 entry->rmid = i;
0638 list_add_tail(&entry->list, &rmid_free_lru);
0639 }
0640
0641
0642
0643
0644
0645 entry = __rmid_entry(0);
0646 list_del(&entry->list);
0647
0648 return 0;
0649 }
0650
0651 static struct mon_evt llc_occupancy_event = {
0652 .name = "llc_occupancy",
0653 .evtid = QOS_L3_OCCUP_EVENT_ID,
0654 };
0655
0656 static struct mon_evt mbm_total_event = {
0657 .name = "mbm_total_bytes",
0658 .evtid = QOS_L3_MBM_TOTAL_EVENT_ID,
0659 };
0660
0661 static struct mon_evt mbm_local_event = {
0662 .name = "mbm_local_bytes",
0663 .evtid = QOS_L3_MBM_LOCAL_EVENT_ID,
0664 };
0665
0666
0667
0668
0669
0670
0671
0672
0673 static void l3_mon_evt_init(struct rdt_resource *r)
0674 {
0675 INIT_LIST_HEAD(&r->evt_list);
0676
0677 if (is_llc_occupancy_enabled())
0678 list_add_tail(&llc_occupancy_event.list, &r->evt_list);
0679 if (is_mbm_total_enabled())
0680 list_add_tail(&mbm_total_event.list, &r->evt_list);
0681 if (is_mbm_local_enabled())
0682 list_add_tail(&mbm_local_event.list, &r->evt_list);
0683 }
0684
0685 int rdt_get_mon_l3_config(struct rdt_resource *r)
0686 {
0687 unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset;
0688 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
0689 unsigned int cl_size = boot_cpu_data.x86_cache_size;
0690 int ret;
0691
0692 hw_res->mon_scale = boot_cpu_data.x86_cache_occ_scale;
0693 r->num_rmid = boot_cpu_data.x86_cache_max_rmid + 1;
0694 hw_res->mbm_width = MBM_CNTR_WIDTH_BASE;
0695
0696 if (mbm_offset > 0 && mbm_offset <= MBM_CNTR_WIDTH_OFFSET_MAX)
0697 hw_res->mbm_width += mbm_offset;
0698 else if (mbm_offset > MBM_CNTR_WIDTH_OFFSET_MAX)
0699 pr_warn("Ignoring impossible MBM counter offset\n");
0700
0701
0702
0703
0704
0705
0706
0707
0708 resctrl_cqm_threshold = cl_size * 1024 / r->num_rmid;
0709
0710
0711 resctrl_cqm_threshold /= hw_res->mon_scale;
0712
0713 ret = dom_data_init(r);
0714 if (ret)
0715 return ret;
0716
0717 l3_mon_evt_init(r);
0718
0719 r->mon_capable = true;
0720 r->mon_enabled = true;
0721
0722 return 0;
0723 }
0724
0725 void __init intel_rdt_mbm_apply_quirk(void)
0726 {
0727 int cf_index;
0728
0729 cf_index = (boot_cpu_data.x86_cache_max_rmid + 1) / 8 - 1;
0730 if (cf_index >= ARRAY_SIZE(mbm_cf_table)) {
0731 pr_info("No MBM correction factor available\n");
0732 return;
0733 }
0734
0735 mbm_cf_rmidthreshold = mbm_cf_table[cf_index].rmidthreshold;
0736 mbm_cf = mbm_cf_table[cf_index].cf;
0737 }