0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
0013
0014 #include <linux/cacheinfo.h>
0015 #include <linux/cpu.h>
0016 #include <linux/cpumask.h>
0017 #include <linux/debugfs.h>
0018 #include <linux/kthread.h>
0019 #include <linux/mman.h>
0020 #include <linux/perf_event.h>
0021 #include <linux/pm_qos.h>
0022 #include <linux/slab.h>
0023 #include <linux/uaccess.h>
0024
0025 #include <asm/cacheflush.h>
0026 #include <asm/intel-family.h>
0027 #include <asm/resctrl.h>
0028 #include <asm/perf_event.h>
0029
0030 #include "../../events/perf_event.h" /* For X86_CONFIG() */
0031 #include "internal.h"
0032
0033 #define CREATE_TRACE_POINTS
0034 #include "pseudo_lock_event.h"
0035
0036
0037
0038
0039
0040 static u64 prefetch_disable_bits;
0041
0042
0043
0044
0045
0046 static unsigned int pseudo_lock_major;
0047 static unsigned long pseudo_lock_minor_avail = GENMASK(MINORBITS, 0);
0048 static struct class *pseudo_lock_class;
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068
0069
0070
0071 static u64 get_prefetch_disable_bits(void)
0072 {
0073 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
0074 boot_cpu_data.x86 != 6)
0075 return 0;
0076
0077 switch (boot_cpu_data.x86_model) {
0078 case INTEL_FAM6_BROADWELL_X:
0079
0080
0081
0082
0083
0084
0085
0086
0087
0088 return 0xF;
0089 case INTEL_FAM6_ATOM_GOLDMONT:
0090 case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
0091
0092
0093
0094
0095
0096
0097
0098
0099 return 0x5;
0100 }
0101
0102 return 0;
0103 }
0104
0105
0106
0107
0108
0109
0110
0111
0112
0113
0114 static int pseudo_lock_minor_get(unsigned int *minor)
0115 {
0116 unsigned long first_bit;
0117
0118 first_bit = find_first_bit(&pseudo_lock_minor_avail, MINORBITS);
0119
0120 if (first_bit == MINORBITS)
0121 return -ENOSPC;
0122
0123 __clear_bit(first_bit, &pseudo_lock_minor_avail);
0124 *minor = first_bit;
0125
0126 return 0;
0127 }
0128
0129
0130
0131
0132
0133 static void pseudo_lock_minor_release(unsigned int minor)
0134 {
0135 __set_bit(minor, &pseudo_lock_minor_avail);
0136 }
0137
0138
0139
0140
0141
0142
0143
0144
0145
0146
0147
0148
0149
0150
0151
0152 static struct rdtgroup *region_find_by_minor(unsigned int minor)
0153 {
0154 struct rdtgroup *rdtgrp, *rdtgrp_match = NULL;
0155
0156 list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
0157 if (rdtgrp->plr && rdtgrp->plr->minor == minor) {
0158 rdtgrp_match = rdtgrp;
0159 break;
0160 }
0161 }
0162 return rdtgrp_match;
0163 }
0164
0165
0166
0167
0168
0169
0170 struct pseudo_lock_pm_req {
0171 struct list_head list;
0172 struct dev_pm_qos_request req;
0173 };
0174
0175 static void pseudo_lock_cstates_relax(struct pseudo_lock_region *plr)
0176 {
0177 struct pseudo_lock_pm_req *pm_req, *next;
0178
0179 list_for_each_entry_safe(pm_req, next, &plr->pm_reqs, list) {
0180 dev_pm_qos_remove_request(&pm_req->req);
0181 list_del(&pm_req->list);
0182 kfree(pm_req);
0183 }
0184 }
0185
0186
0187
0188
0189
0190
0191
0192
0193
0194
0195
0196
0197
0198
0199
0200
0201
0202
0203
0204 static int pseudo_lock_cstates_constrain(struct pseudo_lock_region *plr)
0205 {
0206 struct pseudo_lock_pm_req *pm_req;
0207 int cpu;
0208 int ret;
0209
0210 for_each_cpu(cpu, &plr->d->cpu_mask) {
0211 pm_req = kzalloc(sizeof(*pm_req), GFP_KERNEL);
0212 if (!pm_req) {
0213 rdt_last_cmd_puts("Failure to allocate memory for PM QoS\n");
0214 ret = -ENOMEM;
0215 goto out_err;
0216 }
0217 ret = dev_pm_qos_add_request(get_cpu_device(cpu),
0218 &pm_req->req,
0219 DEV_PM_QOS_RESUME_LATENCY,
0220 30);
0221 if (ret < 0) {
0222 rdt_last_cmd_printf("Failed to add latency req CPU%d\n",
0223 cpu);
0224 kfree(pm_req);
0225 ret = -1;
0226 goto out_err;
0227 }
0228 list_add(&pm_req->list, &plr->pm_reqs);
0229 }
0230
0231 return 0;
0232
0233 out_err:
0234 pseudo_lock_cstates_relax(plr);
0235 return ret;
0236 }
0237
0238
0239
0240
0241
0242
0243
0244
0245
0246
0247 static void pseudo_lock_region_clear(struct pseudo_lock_region *plr)
0248 {
0249 plr->size = 0;
0250 plr->line_size = 0;
0251 kfree(plr->kmem);
0252 plr->kmem = NULL;
0253 plr->s = NULL;
0254 if (plr->d)
0255 plr->d->plr = NULL;
0256 plr->d = NULL;
0257 plr->cbm = 0;
0258 plr->debugfs_dir = NULL;
0259 }
0260
0261
0262
0263
0264
0265
0266
0267
0268
0269
0270
0271
0272
0273
0274
0275
0276
0277
0278
0279 static int pseudo_lock_region_init(struct pseudo_lock_region *plr)
0280 {
0281 struct cpu_cacheinfo *ci;
0282 int ret;
0283 int i;
0284
0285
0286 plr->cpu = cpumask_first(&plr->d->cpu_mask);
0287
0288 if (!cpu_online(plr->cpu)) {
0289 rdt_last_cmd_printf("CPU %u associated with cache not online\n",
0290 plr->cpu);
0291 ret = -ENODEV;
0292 goto out_region;
0293 }
0294
0295 ci = get_cpu_cacheinfo(plr->cpu);
0296
0297 plr->size = rdtgroup_cbm_to_size(plr->s->res, plr->d, plr->cbm);
0298
0299 for (i = 0; i < ci->num_leaves; i++) {
0300 if (ci->info_list[i].level == plr->s->res->cache_level) {
0301 plr->line_size = ci->info_list[i].coherency_line_size;
0302 return 0;
0303 }
0304 }
0305
0306 ret = -1;
0307 rdt_last_cmd_puts("Unable to determine cache line size\n");
0308 out_region:
0309 pseudo_lock_region_clear(plr);
0310 return ret;
0311 }
0312
0313
0314
0315
0316
0317
0318
0319
0320
0321
0322
0323
0324 static int pseudo_lock_init(struct rdtgroup *rdtgrp)
0325 {
0326 struct pseudo_lock_region *plr;
0327
0328 plr = kzalloc(sizeof(*plr), GFP_KERNEL);
0329 if (!plr)
0330 return -ENOMEM;
0331
0332 init_waitqueue_head(&plr->lock_thread_wq);
0333 INIT_LIST_HEAD(&plr->pm_reqs);
0334 rdtgrp->plr = plr;
0335 return 0;
0336 }
0337
0338
0339
0340
0341
0342
0343
0344
0345
0346
0347
0348 static int pseudo_lock_region_alloc(struct pseudo_lock_region *plr)
0349 {
0350 int ret;
0351
0352 ret = pseudo_lock_region_init(plr);
0353 if (ret < 0)
0354 return ret;
0355
0356
0357
0358
0359
0360 if (plr->size > KMALLOC_MAX_SIZE) {
0361 rdt_last_cmd_puts("Requested region exceeds maximum size\n");
0362 ret = -E2BIG;
0363 goto out_region;
0364 }
0365
0366 plr->kmem = kzalloc(plr->size, GFP_KERNEL);
0367 if (!plr->kmem) {
0368 rdt_last_cmd_puts("Unable to allocate memory\n");
0369 ret = -ENOMEM;
0370 goto out_region;
0371 }
0372
0373 ret = 0;
0374 goto out;
0375 out_region:
0376 pseudo_lock_region_clear(plr);
0377 out:
0378 return ret;
0379 }
0380
0381
0382
0383
0384
0385
0386
0387
0388
0389
0390
0391 static void pseudo_lock_free(struct rdtgroup *rdtgrp)
0392 {
0393 pseudo_lock_region_clear(rdtgrp->plr);
0394 kfree(rdtgrp->plr);
0395 rdtgrp->plr = NULL;
0396 }
0397
0398
0399
0400
0401
0402
0403
0404
0405
0406
0407
0408
0409
0410
0411
0412
0413
0414
0415
0416
0417 static int pseudo_lock_fn(void *_rdtgrp)
0418 {
0419 struct rdtgroup *rdtgrp = _rdtgrp;
0420 struct pseudo_lock_region *plr = rdtgrp->plr;
0421 u32 rmid_p, closid_p;
0422 unsigned long i;
0423 #ifdef CONFIG_KASAN
0424
0425
0426
0427
0428
0429
0430
0431
0432 unsigned int line_size;
0433 unsigned int size;
0434 void *mem_r;
0435 #else
0436 register unsigned int line_size asm("esi");
0437 register unsigned int size asm("edi");
0438 register void *mem_r asm(_ASM_BX);
0439 #endif
0440
0441
0442
0443
0444
0445
0446
0447
0448
0449 native_wbinvd();
0450
0451
0452
0453
0454
0455 local_irq_disable();
0456
0457
0458
0459
0460
0461
0462
0463
0464
0465
0466 __wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
0467 closid_p = this_cpu_read(pqr_state.cur_closid);
0468 rmid_p = this_cpu_read(pqr_state.cur_rmid);
0469 mem_r = plr->kmem;
0470 size = plr->size;
0471 line_size = plr->line_size;
0472
0473
0474
0475
0476
0477
0478 __wrmsr(IA32_PQR_ASSOC, rmid_p, rdtgrp->closid);
0479
0480
0481
0482
0483
0484
0485
0486
0487
0488 for (i = 0; i < size; i += PAGE_SIZE) {
0489
0490
0491
0492
0493 rmb();
0494 asm volatile("mov (%0,%1,1), %%eax\n\t"
0495 :
0496 : "r" (mem_r), "r" (i)
0497 : "%eax", "memory");
0498 }
0499 for (i = 0; i < size; i += line_size) {
0500
0501
0502
0503
0504 rmb();
0505 asm volatile("mov (%0,%1,1), %%eax\n\t"
0506 :
0507 : "r" (mem_r), "r" (i)
0508 : "%eax", "memory");
0509 }
0510
0511
0512
0513
0514 __wrmsr(IA32_PQR_ASSOC, rmid_p, closid_p);
0515
0516
0517 wrmsr(MSR_MISC_FEATURE_CONTROL, 0x0, 0x0);
0518 local_irq_enable();
0519
0520 plr->thread_done = 1;
0521 wake_up_interruptible(&plr->lock_thread_wq);
0522 return 0;
0523 }
0524
0525
0526
0527
0528
0529
0530
0531
0532 static int rdtgroup_monitor_in_progress(struct rdtgroup *rdtgrp)
0533 {
0534 return !list_empty(&rdtgrp->mon.crdtgrp_list);
0535 }
0536
0537
0538
0539
0540
0541
0542
0543
0544
0545
0546
0547
0548
0549
0550
0551
0552 static int rdtgroup_locksetup_user_restrict(struct rdtgroup *rdtgrp)
0553 {
0554 int ret;
0555
0556 ret = rdtgroup_kn_mode_restrict(rdtgrp, "tasks");
0557 if (ret)
0558 return ret;
0559
0560 ret = rdtgroup_kn_mode_restrict(rdtgrp, "cpus");
0561 if (ret)
0562 goto err_tasks;
0563
0564 ret = rdtgroup_kn_mode_restrict(rdtgrp, "cpus_list");
0565 if (ret)
0566 goto err_cpus;
0567
0568 if (rdt_mon_capable) {
0569 ret = rdtgroup_kn_mode_restrict(rdtgrp, "mon_groups");
0570 if (ret)
0571 goto err_cpus_list;
0572 }
0573
0574 ret = 0;
0575 goto out;
0576
0577 err_cpus_list:
0578 rdtgroup_kn_mode_restore(rdtgrp, "cpus_list", 0777);
0579 err_cpus:
0580 rdtgroup_kn_mode_restore(rdtgrp, "cpus", 0777);
0581 err_tasks:
0582 rdtgroup_kn_mode_restore(rdtgrp, "tasks", 0777);
0583 out:
0584 return ret;
0585 }
0586
0587
0588
0589
0590
0591
0592
0593
0594
0595
0596
0597
0598
0599 static int rdtgroup_locksetup_user_restore(struct rdtgroup *rdtgrp)
0600 {
0601 int ret;
0602
0603 ret = rdtgroup_kn_mode_restore(rdtgrp, "tasks", 0777);
0604 if (ret)
0605 return ret;
0606
0607 ret = rdtgroup_kn_mode_restore(rdtgrp, "cpus", 0777);
0608 if (ret)
0609 goto err_tasks;
0610
0611 ret = rdtgroup_kn_mode_restore(rdtgrp, "cpus_list", 0777);
0612 if (ret)
0613 goto err_cpus;
0614
0615 if (rdt_mon_capable) {
0616 ret = rdtgroup_kn_mode_restore(rdtgrp, "mon_groups", 0777);
0617 if (ret)
0618 goto err_cpus_list;
0619 }
0620
0621 ret = 0;
0622 goto out;
0623
0624 err_cpus_list:
0625 rdtgroup_kn_mode_restrict(rdtgrp, "cpus_list");
0626 err_cpus:
0627 rdtgroup_kn_mode_restrict(rdtgrp, "cpus");
0628 err_tasks:
0629 rdtgroup_kn_mode_restrict(rdtgrp, "tasks");
0630 out:
0631 return ret;
0632 }
0633
0634
0635
0636
0637
0638
0639
0640
0641
0642
0643
0644
0645
0646
0647
0648
0649
0650
0651
0652 int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp)
0653 {
0654 int ret;
0655
0656
0657
0658
0659
0660 if (rdtgrp == &rdtgroup_default) {
0661 rdt_last_cmd_puts("Cannot pseudo-lock default group\n");
0662 return -EINVAL;
0663 }
0664
0665
0666
0667
0668
0669
0670
0671
0672
0673
0674
0675
0676
0677
0678
0679
0680
0681
0682
0683
0684
0685
0686
0687
0688
0689
0690
0691 if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3) ||
0692 resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2)) {
0693 rdt_last_cmd_puts("CDP enabled\n");
0694 return -EINVAL;
0695 }
0696
0697
0698
0699
0700
0701 prefetch_disable_bits = get_prefetch_disable_bits();
0702 if (prefetch_disable_bits == 0) {
0703 rdt_last_cmd_puts("Pseudo-locking not supported\n");
0704 return -EINVAL;
0705 }
0706
0707 if (rdtgroup_monitor_in_progress(rdtgrp)) {
0708 rdt_last_cmd_puts("Monitoring in progress\n");
0709 return -EINVAL;
0710 }
0711
0712 if (rdtgroup_tasks_assigned(rdtgrp)) {
0713 rdt_last_cmd_puts("Tasks assigned to resource group\n");
0714 return -EINVAL;
0715 }
0716
0717 if (!cpumask_empty(&rdtgrp->cpu_mask)) {
0718 rdt_last_cmd_puts("CPUs assigned to resource group\n");
0719 return -EINVAL;
0720 }
0721
0722 if (rdtgroup_locksetup_user_restrict(rdtgrp)) {
0723 rdt_last_cmd_puts("Unable to modify resctrl permissions\n");
0724 return -EIO;
0725 }
0726
0727 ret = pseudo_lock_init(rdtgrp);
0728 if (ret) {
0729 rdt_last_cmd_puts("Unable to init pseudo-lock region\n");
0730 goto out_release;
0731 }
0732
0733
0734
0735
0736
0737
0738
0739 free_rmid(rdtgrp->mon.rmid);
0740
0741 ret = 0;
0742 goto out;
0743
0744 out_release:
0745 rdtgroup_locksetup_user_restore(rdtgrp);
0746 out:
0747 return ret;
0748 }
0749
0750
0751
0752
0753
0754
0755
0756
0757
0758
0759 int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp)
0760 {
0761 int ret;
0762
0763 if (rdt_mon_capable) {
0764 ret = alloc_rmid();
0765 if (ret < 0) {
0766 rdt_last_cmd_puts("Out of RMIDs\n");
0767 return ret;
0768 }
0769 rdtgrp->mon.rmid = ret;
0770 }
0771
0772 ret = rdtgroup_locksetup_user_restore(rdtgrp);
0773 if (ret) {
0774 free_rmid(rdtgrp->mon.rmid);
0775 return ret;
0776 }
0777
0778 pseudo_lock_free(rdtgrp);
0779 return 0;
0780 }
0781
0782
0783
0784
0785
0786
0787
0788
0789
0790
0791
0792
0793
0794
0795
0796
0797 bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm)
0798 {
0799 unsigned int cbm_len;
0800 unsigned long cbm_b;
0801
0802 if (d->plr) {
0803 cbm_len = d->plr->s->res->cache.cbm_len;
0804 cbm_b = d->plr->cbm;
0805 if (bitmap_intersects(&cbm, &cbm_b, cbm_len))
0806 return true;
0807 }
0808 return false;
0809 }
0810
0811
0812
0813
0814
0815
0816
0817
0818
0819
0820
0821
0822
0823
0824 bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d)
0825 {
0826 cpumask_var_t cpu_with_psl;
0827 struct rdt_resource *r;
0828 struct rdt_domain *d_i;
0829 bool ret = false;
0830
0831 if (!zalloc_cpumask_var(&cpu_with_psl, GFP_KERNEL))
0832 return true;
0833
0834
0835
0836
0837
0838 for_each_alloc_enabled_rdt_resource(r) {
0839 list_for_each_entry(d_i, &r->domains, list) {
0840 if (d_i->plr)
0841 cpumask_or(cpu_with_psl, cpu_with_psl,
0842 &d_i->cpu_mask);
0843 }
0844 }
0845
0846
0847
0848
0849
0850 if (cpumask_intersects(&d->cpu_mask, cpu_with_psl))
0851 ret = true;
0852
0853 free_cpumask_var(cpu_with_psl);
0854 return ret;
0855 }
0856
0857
0858
0859
0860
0861
0862
0863
0864
0865
0866
0867
0868
0869
0870
0871 static int measure_cycles_lat_fn(void *_plr)
0872 {
0873 struct pseudo_lock_region *plr = _plr;
0874 unsigned long i;
0875 u64 start, end;
0876 void *mem_r;
0877
0878 local_irq_disable();
0879
0880
0881
0882 wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
0883 mem_r = READ_ONCE(plr->kmem);
0884
0885
0886
0887
0888 start = rdtsc_ordered();
0889 for (i = 0; i < plr->size; i += 32) {
0890 start = rdtsc_ordered();
0891 asm volatile("mov (%0,%1,1), %%eax\n\t"
0892 :
0893 : "r" (mem_r), "r" (i)
0894 : "%eax", "memory");
0895 end = rdtsc_ordered();
0896 trace_pseudo_lock_mem_latency((u32)(end - start));
0897 }
0898 wrmsr(MSR_MISC_FEATURE_CONTROL, 0x0, 0x0);
0899 local_irq_enable();
0900 plr->thread_done = 1;
0901 wake_up_interruptible(&plr->lock_thread_wq);
0902 return 0;
0903 }
0904
0905
0906
0907
0908
0909
0910
0911
0912
0913
0914 static struct perf_event_attr perf_miss_attr = {
0915 .type = PERF_TYPE_RAW,
0916 .size = sizeof(struct perf_event_attr),
0917 .pinned = 1,
0918 .disabled = 0,
0919 .exclude_user = 1,
0920 };
0921
0922 static struct perf_event_attr perf_hit_attr = {
0923 .type = PERF_TYPE_RAW,
0924 .size = sizeof(struct perf_event_attr),
0925 .pinned = 1,
0926 .disabled = 0,
0927 .exclude_user = 1,
0928 };
0929
0930 struct residency_counts {
0931 u64 miss_before, hits_before;
0932 u64 miss_after, hits_after;
0933 };
0934
0935 static int measure_residency_fn(struct perf_event_attr *miss_attr,
0936 struct perf_event_attr *hit_attr,
0937 struct pseudo_lock_region *plr,
0938 struct residency_counts *counts)
0939 {
0940 u64 hits_before = 0, hits_after = 0, miss_before = 0, miss_after = 0;
0941 struct perf_event *miss_event, *hit_event;
0942 int hit_pmcnum, miss_pmcnum;
0943 unsigned int line_size;
0944 unsigned int size;
0945 unsigned long i;
0946 void *mem_r;
0947 u64 tmp;
0948
0949 miss_event = perf_event_create_kernel_counter(miss_attr, plr->cpu,
0950 NULL, NULL, NULL);
0951 if (IS_ERR(miss_event))
0952 goto out;
0953
0954 hit_event = perf_event_create_kernel_counter(hit_attr, plr->cpu,
0955 NULL, NULL, NULL);
0956 if (IS_ERR(hit_event))
0957 goto out_miss;
0958
0959 local_irq_disable();
0960
0961
0962
0963
0964 if (perf_event_read_local(miss_event, &tmp, NULL, NULL)) {
0965 local_irq_enable();
0966 goto out_hit;
0967 }
0968 if (perf_event_read_local(hit_event, &tmp, NULL, NULL)) {
0969 local_irq_enable();
0970 goto out_hit;
0971 }
0972
0973
0974
0975
0976 wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
0977
0978
0979
0980
0981
0982
0983 miss_pmcnum = x86_perf_rdpmc_index(miss_event);
0984 hit_pmcnum = x86_perf_rdpmc_index(hit_event);
0985 line_size = READ_ONCE(plr->line_size);
0986 mem_r = READ_ONCE(plr->kmem);
0987 size = READ_ONCE(plr->size);
0988
0989
0990
0991
0992
0993
0994 rdpmcl(hit_pmcnum, hits_before);
0995 rdpmcl(miss_pmcnum, miss_before);
0996
0997
0998
0999
1000
1001
1002 rmb();
1003 rdpmcl(hit_pmcnum, hits_before);
1004 rdpmcl(miss_pmcnum, miss_before);
1005
1006
1007
1008
1009 rmb();
1010 for (i = 0; i < size; i += line_size) {
1011
1012
1013
1014
1015 rmb();
1016 asm volatile("mov (%0,%1,1), %%eax\n\t"
1017 :
1018 : "r" (mem_r), "r" (i)
1019 : "%eax", "memory");
1020 }
1021
1022
1023
1024
1025 rmb();
1026 rdpmcl(hit_pmcnum, hits_after);
1027 rdpmcl(miss_pmcnum, miss_after);
1028
1029
1030
1031
1032 rmb();
1033
1034 wrmsr(MSR_MISC_FEATURE_CONTROL, 0x0, 0x0);
1035 local_irq_enable();
1036 out_hit:
1037 perf_event_release_kernel(hit_event);
1038 out_miss:
1039 perf_event_release_kernel(miss_event);
1040 out:
1041
1042
1043
1044 counts->miss_before = miss_before;
1045 counts->hits_before = hits_before;
1046 counts->miss_after = miss_after;
1047 counts->hits_after = hits_after;
1048 return 0;
1049 }
1050
1051 static int measure_l2_residency(void *_plr)
1052 {
1053 struct pseudo_lock_region *plr = _plr;
1054 struct residency_counts counts = {0};
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064 switch (boot_cpu_data.x86_model) {
1065 case INTEL_FAM6_ATOM_GOLDMONT:
1066 case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
1067 perf_miss_attr.config = X86_CONFIG(.event = 0xd1,
1068 .umask = 0x10);
1069 perf_hit_attr.config = X86_CONFIG(.event = 0xd1,
1070 .umask = 0x2);
1071 break;
1072 default:
1073 goto out;
1074 }
1075
1076 measure_residency_fn(&perf_miss_attr, &perf_hit_attr, plr, &counts);
1077
1078
1079
1080
1081 trace_pseudo_lock_l2(counts.hits_after - counts.hits_before,
1082 counts.miss_after - counts.miss_before);
1083 out:
1084 plr->thread_done = 1;
1085 wake_up_interruptible(&plr->lock_thread_wq);
1086 return 0;
1087 }
1088
1089 static int measure_l3_residency(void *_plr)
1090 {
1091 struct pseudo_lock_region *plr = _plr;
1092 struct residency_counts counts = {0};
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103 switch (boot_cpu_data.x86_model) {
1104 case INTEL_FAM6_BROADWELL_X:
1105
1106 perf_hit_attr.config = X86_CONFIG(.event = 0x2e,
1107 .umask = 0x4f);
1108 perf_miss_attr.config = X86_CONFIG(.event = 0x2e,
1109 .umask = 0x41);
1110 break;
1111 default:
1112 goto out;
1113 }
1114
1115 measure_residency_fn(&perf_miss_attr, &perf_hit_attr, plr, &counts);
1116
1117
1118
1119
1120
1121 counts.miss_after -= counts.miss_before;
1122 if (boot_cpu_data.x86_model == INTEL_FAM6_BROADWELL_X) {
1123
1124
1125
1126
1127
1128
1129
1130
1131 counts.hits_after -= counts.hits_before;
1132
1133 counts.hits_after -= min(counts.miss_after, counts.hits_after);
1134 } else {
1135 counts.hits_after -= counts.hits_before;
1136 }
1137
1138 trace_pseudo_lock_l3(counts.hits_after, counts.miss_after);
1139 out:
1140 plr->thread_done = 1;
1141 wake_up_interruptible(&plr->lock_thread_wq);
1142 return 0;
1143 }
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157 static int pseudo_lock_measure_cycles(struct rdtgroup *rdtgrp, int sel)
1158 {
1159 struct pseudo_lock_region *plr = rdtgrp->plr;
1160 struct task_struct *thread;
1161 unsigned int cpu;
1162 int ret = -1;
1163
1164 cpus_read_lock();
1165 mutex_lock(&rdtgroup_mutex);
1166
1167 if (rdtgrp->flags & RDT_DELETED) {
1168 ret = -ENODEV;
1169 goto out;
1170 }
1171
1172 if (!plr->d) {
1173 ret = -ENODEV;
1174 goto out;
1175 }
1176
1177 plr->thread_done = 0;
1178 cpu = cpumask_first(&plr->d->cpu_mask);
1179 if (!cpu_online(cpu)) {
1180 ret = -ENODEV;
1181 goto out;
1182 }
1183
1184 plr->cpu = cpu;
1185
1186 if (sel == 1)
1187 thread = kthread_create_on_node(measure_cycles_lat_fn, plr,
1188 cpu_to_node(cpu),
1189 "pseudo_lock_measure/%u",
1190 cpu);
1191 else if (sel == 2)
1192 thread = kthread_create_on_node(measure_l2_residency, plr,
1193 cpu_to_node(cpu),
1194 "pseudo_lock_measure/%u",
1195 cpu);
1196 else if (sel == 3)
1197 thread = kthread_create_on_node(measure_l3_residency, plr,
1198 cpu_to_node(cpu),
1199 "pseudo_lock_measure/%u",
1200 cpu);
1201 else
1202 goto out;
1203
1204 if (IS_ERR(thread)) {
1205 ret = PTR_ERR(thread);
1206 goto out;
1207 }
1208 kthread_bind(thread, cpu);
1209 wake_up_process(thread);
1210
1211 ret = wait_event_interruptible(plr->lock_thread_wq,
1212 plr->thread_done == 1);
1213 if (ret < 0)
1214 goto out;
1215
1216 ret = 0;
1217
1218 out:
1219 mutex_unlock(&rdtgroup_mutex);
1220 cpus_read_unlock();
1221 return ret;
1222 }
1223
1224 static ssize_t pseudo_lock_measure_trigger(struct file *file,
1225 const char __user *user_buf,
1226 size_t count, loff_t *ppos)
1227 {
1228 struct rdtgroup *rdtgrp = file->private_data;
1229 size_t buf_size;
1230 char buf[32];
1231 int ret;
1232 int sel;
1233
1234 buf_size = min(count, (sizeof(buf) - 1));
1235 if (copy_from_user(buf, user_buf, buf_size))
1236 return -EFAULT;
1237
1238 buf[buf_size] = '\0';
1239 ret = kstrtoint(buf, 10, &sel);
1240 if (ret == 0) {
1241 if (sel != 1 && sel != 2 && sel != 3)
1242 return -EINVAL;
1243 ret = debugfs_file_get(file->f_path.dentry);
1244 if (ret)
1245 return ret;
1246 ret = pseudo_lock_measure_cycles(rdtgrp, sel);
1247 if (ret == 0)
1248 ret = count;
1249 debugfs_file_put(file->f_path.dentry);
1250 }
1251
1252 return ret;
1253 }
1254
1255 static const struct file_operations pseudo_measure_fops = {
1256 .write = pseudo_lock_measure_trigger,
1257 .open = simple_open,
1258 .llseek = default_llseek,
1259 };
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277 int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp)
1278 {
1279 struct pseudo_lock_region *plr = rdtgrp->plr;
1280 struct task_struct *thread;
1281 unsigned int new_minor;
1282 struct device *dev;
1283 int ret;
1284
1285 ret = pseudo_lock_region_alloc(plr);
1286 if (ret < 0)
1287 return ret;
1288
1289 ret = pseudo_lock_cstates_constrain(plr);
1290 if (ret < 0) {
1291 ret = -EINVAL;
1292 goto out_region;
1293 }
1294
1295 plr->thread_done = 0;
1296
1297 thread = kthread_create_on_node(pseudo_lock_fn, rdtgrp,
1298 cpu_to_node(plr->cpu),
1299 "pseudo_lock/%u", plr->cpu);
1300 if (IS_ERR(thread)) {
1301 ret = PTR_ERR(thread);
1302 rdt_last_cmd_printf("Locking thread returned error %d\n", ret);
1303 goto out_cstates;
1304 }
1305
1306 kthread_bind(thread, plr->cpu);
1307 wake_up_process(thread);
1308
1309 ret = wait_event_interruptible(plr->lock_thread_wq,
1310 plr->thread_done == 1);
1311 if (ret < 0) {
1312
1313
1314
1315
1316
1317
1318
1319
1320 rdt_last_cmd_puts("Locking thread interrupted\n");
1321 goto out_cstates;
1322 }
1323
1324 ret = pseudo_lock_minor_get(&new_minor);
1325 if (ret < 0) {
1326 rdt_last_cmd_puts("Unable to obtain a new minor number\n");
1327 goto out_cstates;
1328 }
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339 mutex_unlock(&rdtgroup_mutex);
1340
1341 if (!IS_ERR_OR_NULL(debugfs_resctrl)) {
1342 plr->debugfs_dir = debugfs_create_dir(rdtgrp->kn->name,
1343 debugfs_resctrl);
1344 if (!IS_ERR_OR_NULL(plr->debugfs_dir))
1345 debugfs_create_file("pseudo_lock_measure", 0200,
1346 plr->debugfs_dir, rdtgrp,
1347 &pseudo_measure_fops);
1348 }
1349
1350 dev = device_create(pseudo_lock_class, NULL,
1351 MKDEV(pseudo_lock_major, new_minor),
1352 rdtgrp, "%s", rdtgrp->kn->name);
1353
1354 mutex_lock(&rdtgroup_mutex);
1355
1356 if (IS_ERR(dev)) {
1357 ret = PTR_ERR(dev);
1358 rdt_last_cmd_printf("Failed to create character device: %d\n",
1359 ret);
1360 goto out_debugfs;
1361 }
1362
1363
1364 if (rdtgrp->flags & RDT_DELETED) {
1365 ret = -ENODEV;
1366 goto out_device;
1367 }
1368
1369 plr->minor = new_minor;
1370
1371 rdtgrp->mode = RDT_MODE_PSEUDO_LOCKED;
1372 closid_free(rdtgrp->closid);
1373 rdtgroup_kn_mode_restore(rdtgrp, "cpus", 0444);
1374 rdtgroup_kn_mode_restore(rdtgrp, "cpus_list", 0444);
1375
1376 ret = 0;
1377 goto out;
1378
1379 out_device:
1380 device_destroy(pseudo_lock_class, MKDEV(pseudo_lock_major, new_minor));
1381 out_debugfs:
1382 debugfs_remove_recursive(plr->debugfs_dir);
1383 pseudo_lock_minor_release(new_minor);
1384 out_cstates:
1385 pseudo_lock_cstates_relax(plr);
1386 out_region:
1387 pseudo_lock_region_clear(plr);
1388 out:
1389 return ret;
1390 }
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406 void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp)
1407 {
1408 struct pseudo_lock_region *plr = rdtgrp->plr;
1409
1410 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1411
1412
1413
1414
1415 closid_free(rdtgrp->closid);
1416 goto free;
1417 }
1418
1419 pseudo_lock_cstates_relax(plr);
1420 debugfs_remove_recursive(rdtgrp->plr->debugfs_dir);
1421 device_destroy(pseudo_lock_class, MKDEV(pseudo_lock_major, plr->minor));
1422 pseudo_lock_minor_release(plr->minor);
1423
1424 free:
1425 pseudo_lock_free(rdtgrp);
1426 }
1427
1428 static int pseudo_lock_dev_open(struct inode *inode, struct file *filp)
1429 {
1430 struct rdtgroup *rdtgrp;
1431
1432 mutex_lock(&rdtgroup_mutex);
1433
1434 rdtgrp = region_find_by_minor(iminor(inode));
1435 if (!rdtgrp) {
1436 mutex_unlock(&rdtgroup_mutex);
1437 return -ENODEV;
1438 }
1439
1440 filp->private_data = rdtgrp;
1441 atomic_inc(&rdtgrp->waitcount);
1442
1443 filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
1444
1445 mutex_unlock(&rdtgroup_mutex);
1446
1447 return 0;
1448 }
1449
1450 static int pseudo_lock_dev_release(struct inode *inode, struct file *filp)
1451 {
1452 struct rdtgroup *rdtgrp;
1453
1454 mutex_lock(&rdtgroup_mutex);
1455 rdtgrp = filp->private_data;
1456 WARN_ON(!rdtgrp);
1457 if (!rdtgrp) {
1458 mutex_unlock(&rdtgroup_mutex);
1459 return -ENODEV;
1460 }
1461 filp->private_data = NULL;
1462 atomic_dec(&rdtgrp->waitcount);
1463 mutex_unlock(&rdtgroup_mutex);
1464 return 0;
1465 }
1466
1467 static int pseudo_lock_dev_mremap(struct vm_area_struct *area)
1468 {
1469
1470 return -EINVAL;
1471 }
1472
1473 static const struct vm_operations_struct pseudo_mmap_ops = {
1474 .mremap = pseudo_lock_dev_mremap,
1475 };
1476
1477 static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma)
1478 {
1479 unsigned long vsize = vma->vm_end - vma->vm_start;
1480 unsigned long off = vma->vm_pgoff << PAGE_SHIFT;
1481 struct pseudo_lock_region *plr;
1482 struct rdtgroup *rdtgrp;
1483 unsigned long physical;
1484 unsigned long psize;
1485
1486 mutex_lock(&rdtgroup_mutex);
1487
1488 rdtgrp = filp->private_data;
1489 WARN_ON(!rdtgrp);
1490 if (!rdtgrp) {
1491 mutex_unlock(&rdtgroup_mutex);
1492 return -ENODEV;
1493 }
1494
1495 plr = rdtgrp->plr;
1496
1497 if (!plr->d) {
1498 mutex_unlock(&rdtgroup_mutex);
1499 return -ENODEV;
1500 }
1501
1502
1503
1504
1505
1506
1507
1508 if (!cpumask_subset(current->cpus_ptr, &plr->d->cpu_mask)) {
1509 mutex_unlock(&rdtgroup_mutex);
1510 return -EINVAL;
1511 }
1512
1513 physical = __pa(plr->kmem) >> PAGE_SHIFT;
1514 psize = plr->size - off;
1515
1516 if (off > plr->size) {
1517 mutex_unlock(&rdtgroup_mutex);
1518 return -ENOSPC;
1519 }
1520
1521
1522
1523
1524
1525 if (!(vma->vm_flags & VM_SHARED)) {
1526 mutex_unlock(&rdtgroup_mutex);
1527 return -EINVAL;
1528 }
1529
1530 if (vsize > psize) {
1531 mutex_unlock(&rdtgroup_mutex);
1532 return -ENOSPC;
1533 }
1534
1535 memset(plr->kmem + off, 0, vsize);
1536
1537 if (remap_pfn_range(vma, vma->vm_start, physical + vma->vm_pgoff,
1538 vsize, vma->vm_page_prot)) {
1539 mutex_unlock(&rdtgroup_mutex);
1540 return -EAGAIN;
1541 }
1542 vma->vm_ops = &pseudo_mmap_ops;
1543 mutex_unlock(&rdtgroup_mutex);
1544 return 0;
1545 }
1546
1547 static const struct file_operations pseudo_lock_dev_fops = {
1548 .owner = THIS_MODULE,
1549 .llseek = no_llseek,
1550 .read = NULL,
1551 .write = NULL,
1552 .open = pseudo_lock_dev_open,
1553 .release = pseudo_lock_dev_release,
1554 .mmap = pseudo_lock_dev_mmap,
1555 };
1556
1557 static char *pseudo_lock_devnode(struct device *dev, umode_t *mode)
1558 {
1559 struct rdtgroup *rdtgrp;
1560
1561 rdtgrp = dev_get_drvdata(dev);
1562 if (mode)
1563 *mode = 0600;
1564 return kasprintf(GFP_KERNEL, "pseudo_lock/%s", rdtgrp->kn->name);
1565 }
1566
1567 int rdt_pseudo_lock_init(void)
1568 {
1569 int ret;
1570
1571 ret = register_chrdev(0, "pseudo_lock", &pseudo_lock_dev_fops);
1572 if (ret < 0)
1573 return ret;
1574
1575 pseudo_lock_major = ret;
1576
1577 pseudo_lock_class = class_create(THIS_MODULE, "pseudo_lock");
1578 if (IS_ERR(pseudo_lock_class)) {
1579 ret = PTR_ERR(pseudo_lock_class);
1580 unregister_chrdev(pseudo_lock_major, "pseudo_lock");
1581 return ret;
1582 }
1583
1584 pseudo_lock_class->devnode = pseudo_lock_devnode;
1585 return 0;
1586 }
1587
1588 void rdt_pseudo_lock_release(void)
1589 {
1590 class_destroy(pseudo_lock_class);
1591 pseudo_lock_class = NULL;
1592 unregister_chrdev(pseudo_lock_major, "pseudo_lock");
1593 pseudo_lock_major = 0;
1594 }