0001
0002
0003
0004
0005
0006
0007
0008 #define KMSG_COMPONENT "cpum_sf"
0009 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
0010
0011 #include <linux/kernel.h>
0012 #include <linux/kernel_stat.h>
0013 #include <linux/perf_event.h>
0014 #include <linux/percpu.h>
0015 #include <linux/pid.h>
0016 #include <linux/notifier.h>
0017 #include <linux/export.h>
0018 #include <linux/slab.h>
0019 #include <linux/mm.h>
0020 #include <linux/moduleparam.h>
0021 #include <asm/cpu_mf.h>
0022 #include <asm/irq.h>
0023 #include <asm/debug.h>
0024 #include <asm/timex.h>
0025
0026
0027
0028
0029
0030 #define CPUM_SF_MIN_SDBT 1
0031
0032
0033
0034
0035
0036 #define CPUM_SF_SDB_PER_TABLE ((PAGE_SIZE - 8) / 8)
0037
0038
0039
0040
0041
0042 #define CPUM_SF_SDBT_TL_OFFSET (CPUM_SF_SDB_PER_TABLE * 8)
0043 static inline int require_table_link(const void *sdbt)
0044 {
0045 return ((unsigned long) sdbt & ~PAGE_MASK) == CPUM_SF_SDBT_TL_OFFSET;
0046 }
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066 static unsigned long __read_mostly CPUM_SF_MIN_SDB = 15;
0067 static unsigned long __read_mostly CPUM_SF_MAX_SDB = 8176;
0068 static unsigned long __read_mostly CPUM_SF_SDB_DIAG_FACTOR = 1;
0069
0070 struct sf_buffer {
0071 unsigned long *sdbt;
0072
0073 unsigned long num_sdb;
0074 unsigned long num_sdbt;
0075 unsigned long *tail;
0076 };
0077
0078 struct aux_buffer {
0079 struct sf_buffer sfb;
0080 unsigned long head;
0081 unsigned long alert_mark;
0082 unsigned long empty_mark;
0083 unsigned long *sdb_index;
0084 unsigned long *sdbt_index;
0085 };
0086
0087 struct cpu_hw_sf {
0088
0089 struct hws_qsi_info_block qsi;
0090
0091 struct hws_lsctl_request_block lsctl;
0092 struct sf_buffer sfb;
0093 unsigned int flags;
0094 struct perf_event *event;
0095 struct perf_output_handle handle;
0096 };
0097 static DEFINE_PER_CPU(struct cpu_hw_sf, cpu_hw_sf);
0098
0099
0100 static debug_info_t *sfdbg;
0101
0102
0103
0104
0105 static int sf_disable(void)
0106 {
0107 struct hws_lsctl_request_block sreq;
0108
0109 memset(&sreq, 0, sizeof(sreq));
0110 return lsctl(&sreq);
0111 }
0112
0113
0114
0115
0116 static int sf_buffer_available(struct cpu_hw_sf *cpuhw)
0117 {
0118 return !!cpuhw->sfb.sdbt;
0119 }
0120
0121
0122
0123
0124 static void free_sampling_buffer(struct sf_buffer *sfb)
0125 {
0126 unsigned long *sdbt, *curr;
0127
0128 if (!sfb->sdbt)
0129 return;
0130
0131 sdbt = sfb->sdbt;
0132 curr = sdbt;
0133
0134
0135 while (1) {
0136 if (!*curr || !sdbt)
0137 break;
0138
0139
0140 if (is_link_entry(curr)) {
0141 curr = get_next_sdbt(curr);
0142 if (sdbt)
0143 free_page((unsigned long) sdbt);
0144
0145
0146 if (curr == sfb->sdbt)
0147 break;
0148 else
0149 sdbt = curr;
0150 } else {
0151
0152 if (*curr) {
0153 free_page(*curr);
0154 curr++;
0155 }
0156 }
0157 }
0158
0159 debug_sprintf_event(sfdbg, 5, "%s: freed sdbt %#lx\n", __func__,
0160 (unsigned long)sfb->sdbt);
0161 memset(sfb, 0, sizeof(*sfb));
0162 }
0163
0164 static int alloc_sample_data_block(unsigned long *sdbt, gfp_t gfp_flags)
0165 {
0166 unsigned long sdb, *trailer;
0167
0168
0169 sdb = get_zeroed_page(gfp_flags);
0170 if (!sdb)
0171 return -ENOMEM;
0172 trailer = trailer_entry_ptr(sdb);
0173 *trailer = SDB_TE_ALERT_REQ_MASK;
0174
0175
0176 *sdbt = sdb;
0177
0178 return 0;
0179 }
0180
0181
0182
0183
0184
0185
0186
0187
0188
0189
0190
0191
0192 static int realloc_sampling_buffer(struct sf_buffer *sfb,
0193 unsigned long num_sdb, gfp_t gfp_flags)
0194 {
0195 int i, rc;
0196 unsigned long *new, *tail, *tail_prev = NULL;
0197
0198 if (!sfb->sdbt || !sfb->tail)
0199 return -EINVAL;
0200
0201 if (!is_link_entry(sfb->tail))
0202 return -EINVAL;
0203
0204
0205
0206
0207
0208
0209 tail = sfb->tail;
0210
0211
0212
0213
0214 if (sfb->sdbt != get_next_sdbt(tail)) {
0215 debug_sprintf_event(sfdbg, 3, "%s: "
0216 "sampling buffer is not linked: origin %#lx"
0217 " tail %#lx\n", __func__,
0218 (unsigned long)sfb->sdbt,
0219 (unsigned long)tail);
0220 return -EINVAL;
0221 }
0222
0223
0224 rc = 0;
0225 for (i = 0; i < num_sdb; i++) {
0226
0227 if (require_table_link(tail)) {
0228 new = (unsigned long *) get_zeroed_page(gfp_flags);
0229 if (!new) {
0230 rc = -ENOMEM;
0231 break;
0232 }
0233 sfb->num_sdbt++;
0234
0235 *tail = (unsigned long)(void *) new + 1;
0236 tail_prev = tail;
0237 tail = new;
0238 }
0239
0240
0241
0242
0243
0244
0245 rc = alloc_sample_data_block(tail, gfp_flags);
0246 if (rc) {
0247
0248
0249
0250
0251
0252 if (tail_prev) {
0253 sfb->num_sdbt--;
0254 free_page((unsigned long) new);
0255 tail = tail_prev;
0256 }
0257 break;
0258 }
0259 sfb->num_sdb++;
0260 tail++;
0261 tail_prev = new = NULL;
0262 }
0263
0264
0265 *tail = (unsigned long) sfb->sdbt + 1;
0266 sfb->tail = tail;
0267
0268 debug_sprintf_event(sfdbg, 4, "%s: new buffer"
0269 " settings: sdbt %lu sdb %lu\n", __func__,
0270 sfb->num_sdbt, sfb->num_sdb);
0271 return rc;
0272 }
0273
0274
0275
0276
0277
0278
0279
0280
0281
0282
0283
0284
0285 static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb)
0286 {
0287 int rc;
0288
0289 if (sfb->sdbt)
0290 return -EINVAL;
0291
0292
0293 sfb->sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL);
0294 if (!sfb->sdbt)
0295 return -ENOMEM;
0296 sfb->num_sdb = 0;
0297 sfb->num_sdbt = 1;
0298
0299
0300
0301
0302 sfb->tail = sfb->sdbt;
0303 *sfb->tail = (unsigned long)(void *) sfb->sdbt + 1;
0304
0305
0306 rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL);
0307 if (rc) {
0308 free_sampling_buffer(sfb);
0309 debug_sprintf_event(sfdbg, 4, "%s: "
0310 "realloc_sampling_buffer failed with rc %i\n",
0311 __func__, rc);
0312 } else
0313 debug_sprintf_event(sfdbg, 4,
0314 "%s: tear %#lx dear %#lx\n", __func__,
0315 (unsigned long)sfb->sdbt, (unsigned long)*sfb->sdbt);
0316 return rc;
0317 }
0318
0319 static void sfb_set_limits(unsigned long min, unsigned long max)
0320 {
0321 struct hws_qsi_info_block si;
0322
0323 CPUM_SF_MIN_SDB = min;
0324 CPUM_SF_MAX_SDB = max;
0325
0326 memset(&si, 0, sizeof(si));
0327 if (!qsi(&si))
0328 CPUM_SF_SDB_DIAG_FACTOR = DIV_ROUND_UP(si.dsdes, si.bsdes);
0329 }
0330
0331 static unsigned long sfb_max_limit(struct hw_perf_event *hwc)
0332 {
0333 return SAMPL_DIAG_MODE(hwc) ? CPUM_SF_MAX_SDB * CPUM_SF_SDB_DIAG_FACTOR
0334 : CPUM_SF_MAX_SDB;
0335 }
0336
0337 static unsigned long sfb_pending_allocs(struct sf_buffer *sfb,
0338 struct hw_perf_event *hwc)
0339 {
0340 if (!sfb->sdbt)
0341 return SFB_ALLOC_REG(hwc);
0342 if (SFB_ALLOC_REG(hwc) > sfb->num_sdb)
0343 return SFB_ALLOC_REG(hwc) - sfb->num_sdb;
0344 return 0;
0345 }
0346
0347 static int sfb_has_pending_allocs(struct sf_buffer *sfb,
0348 struct hw_perf_event *hwc)
0349 {
0350 return sfb_pending_allocs(sfb, hwc) > 0;
0351 }
0352
0353 static void sfb_account_allocs(unsigned long num, struct hw_perf_event *hwc)
0354 {
0355
0356 num = min_t(unsigned long, num, sfb_max_limit(hwc) - SFB_ALLOC_REG(hwc));
0357 if (num)
0358 SFB_ALLOC_REG(hwc) += num;
0359 }
0360
0361 static void sfb_init_allocs(unsigned long num, struct hw_perf_event *hwc)
0362 {
0363 SFB_ALLOC_REG(hwc) = 0;
0364 sfb_account_allocs(num, hwc);
0365 }
0366
0367 static void deallocate_buffers(struct cpu_hw_sf *cpuhw)
0368 {
0369 if (cpuhw->sfb.sdbt)
0370 free_sampling_buffer(&cpuhw->sfb);
0371 }
0372
0373 static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
0374 {
0375 unsigned long n_sdb, freq;
0376 size_t sample_size;
0377
0378
0379
0380
0381
0382
0383
0384
0385
0386
0387
0388
0389
0390
0391
0392
0393
0394
0395
0396
0397
0398
0399
0400
0401
0402
0403
0404
0405
0406
0407 sample_size = sizeof(struct hws_basic_entry);
0408 freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc));
0409 n_sdb = CPUM_SF_MIN_SDB + DIV_ROUND_UP(freq, 10000);
0410
0411
0412
0413
0414
0415
0416
0417
0418
0419 sfb_init_allocs(n_sdb, hwc);
0420 if (sf_buffer_available(cpuhw))
0421 return 0;
0422
0423 debug_sprintf_event(sfdbg, 3,
0424 "%s: rate %lu f %lu sdb %lu/%lu"
0425 " sample_size %lu cpuhw %p\n", __func__,
0426 SAMPL_RATE(hwc), freq, n_sdb, sfb_max_limit(hwc),
0427 sample_size, cpuhw);
0428
0429 return alloc_sampling_buffer(&cpuhw->sfb,
0430 sfb_pending_allocs(&cpuhw->sfb, hwc));
0431 }
0432
0433 static unsigned long min_percent(unsigned int percent, unsigned long base,
0434 unsigned long min)
0435 {
0436 return min_t(unsigned long, min, DIV_ROUND_UP(percent * base, 100));
0437 }
0438
0439 static unsigned long compute_sfb_extent(unsigned long ratio, unsigned long base)
0440 {
0441
0442
0443
0444
0445
0446 if (ratio <= 5)
0447 return 0;
0448 if (ratio <= 25)
0449 return min_percent(1, base, 1);
0450 if (ratio <= 50)
0451 return min_percent(1, base, 1);
0452 if (ratio <= 75)
0453 return min_percent(2, base, 2);
0454 if (ratio <= 100)
0455 return min_percent(3, base, 3);
0456 if (ratio <= 250)
0457 return min_percent(4, base, 4);
0458
0459 return min_percent(5, base, 8);
0460 }
0461
0462 static void sfb_account_overflows(struct cpu_hw_sf *cpuhw,
0463 struct hw_perf_event *hwc)
0464 {
0465 unsigned long ratio, num;
0466
0467 if (!OVERFLOW_REG(hwc))
0468 return;
0469
0470
0471
0472
0473
0474
0475
0476
0477 ratio = DIV_ROUND_UP(100 * OVERFLOW_REG(hwc) * cpuhw->sfb.num_sdb,
0478 sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc)));
0479
0480
0481 num = compute_sfb_extent(ratio, cpuhw->sfb.num_sdb);
0482 if (num)
0483 sfb_account_allocs(num, hwc);
0484
0485 debug_sprintf_event(sfdbg, 5, "%s: overflow %llu ratio %lu num %lu\n",
0486 __func__, OVERFLOW_REG(hwc), ratio, num);
0487 OVERFLOW_REG(hwc) = 0;
0488 }
0489
0490
0491
0492
0493
0494
0495
0496
0497
0498
0499
0500
0501 static void extend_sampling_buffer(struct sf_buffer *sfb,
0502 struct hw_perf_event *hwc)
0503 {
0504 unsigned long num, num_old;
0505 int rc;
0506
0507 num = sfb_pending_allocs(sfb, hwc);
0508 if (!num)
0509 return;
0510 num_old = sfb->num_sdb;
0511
0512
0513
0514
0515 sf_disable();
0516
0517
0518
0519
0520
0521
0522 rc = realloc_sampling_buffer(sfb, num, GFP_ATOMIC);
0523 if (rc)
0524 debug_sprintf_event(sfdbg, 5, "%s: realloc failed with rc %i\n",
0525 __func__, rc);
0526
0527 if (sfb_has_pending_allocs(sfb, hwc))
0528 debug_sprintf_event(sfdbg, 5, "%s: "
0529 "req %lu alloc %lu remaining %lu\n",
0530 __func__, num, sfb->num_sdb - num_old,
0531 sfb_pending_allocs(sfb, hwc));
0532 }
0533
0534
0535 static atomic_t num_events;
0536
0537 static DEFINE_MUTEX(pmc_reserve_mutex);
0538
0539 #define PMC_INIT 0
0540 #define PMC_RELEASE 1
0541 #define PMC_FAILURE 2
0542 static void setup_pmc_cpu(void *flags)
0543 {
0544 int err;
0545 struct cpu_hw_sf *cpusf = this_cpu_ptr(&cpu_hw_sf);
0546
0547 err = 0;
0548 switch (*((int *) flags)) {
0549 case PMC_INIT:
0550 memset(cpusf, 0, sizeof(*cpusf));
0551 err = qsi(&cpusf->qsi);
0552 if (err)
0553 break;
0554 cpusf->flags |= PMU_F_RESERVED;
0555 err = sf_disable();
0556 if (err)
0557 pr_err("Switching off the sampling facility failed "
0558 "with rc %i\n", err);
0559 debug_sprintf_event(sfdbg, 5,
0560 "%s: initialized: cpuhw %p\n", __func__,
0561 cpusf);
0562 break;
0563 case PMC_RELEASE:
0564 cpusf->flags &= ~PMU_F_RESERVED;
0565 err = sf_disable();
0566 if (err) {
0567 pr_err("Switching off the sampling facility failed "
0568 "with rc %i\n", err);
0569 } else
0570 deallocate_buffers(cpusf);
0571 debug_sprintf_event(sfdbg, 5,
0572 "%s: released: cpuhw %p\n", __func__,
0573 cpusf);
0574 break;
0575 }
0576 if (err)
0577 *((int *) flags) |= PMC_FAILURE;
0578 }
0579
0580 static void release_pmc_hardware(void)
0581 {
0582 int flags = PMC_RELEASE;
0583
0584 irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
0585 on_each_cpu(setup_pmc_cpu, &flags, 1);
0586 }
0587
0588 static int reserve_pmc_hardware(void)
0589 {
0590 int flags = PMC_INIT;
0591
0592 on_each_cpu(setup_pmc_cpu, &flags, 1);
0593 if (flags & PMC_FAILURE) {
0594 release_pmc_hardware();
0595 return -ENODEV;
0596 }
0597 irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
0598
0599 return 0;
0600 }
0601
0602 static void hw_perf_event_destroy(struct perf_event *event)
0603 {
0604
0605 if (!atomic_add_unless(&num_events, -1, 1)) {
0606 mutex_lock(&pmc_reserve_mutex);
0607 if (atomic_dec_return(&num_events) == 0)
0608 release_pmc_hardware();
0609 mutex_unlock(&pmc_reserve_mutex);
0610 }
0611 }
0612
0613 static void hw_init_period(struct hw_perf_event *hwc, u64 period)
0614 {
0615 hwc->sample_period = period;
0616 hwc->last_period = hwc->sample_period;
0617 local64_set(&hwc->period_left, hwc->sample_period);
0618 }
0619
0620 static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si,
0621 unsigned long rate)
0622 {
0623 return clamp_t(unsigned long, rate,
0624 si->min_sampl_rate, si->max_sampl_rate);
0625 }
0626
0627 static u32 cpumsf_pid_type(struct perf_event *event,
0628 u32 pid, enum pid_type type)
0629 {
0630 struct task_struct *tsk;
0631
0632
0633 if (!pid)
0634 goto out;
0635
0636 tsk = find_task_by_pid_ns(pid, &init_pid_ns);
0637 pid = -1;
0638 if (tsk) {
0639
0640
0641
0642
0643 if (event->parent)
0644 event = event->parent;
0645 pid = __task_pid_nr_ns(tsk, type, event->ns);
0646
0647
0648
0649
0650 if (!pid && !pid_alive(tsk))
0651 pid = -1;
0652 }
0653 out:
0654 return pid;
0655 }
0656
0657 static void cpumsf_output_event_pid(struct perf_event *event,
0658 struct perf_sample_data *data,
0659 struct pt_regs *regs)
0660 {
0661 u32 pid;
0662 struct perf_event_header header;
0663 struct perf_output_handle handle;
0664
0665
0666
0667
0668
0669 pid = data->tid_entry.pid;
0670
0671
0672 rcu_read_lock();
0673
0674 perf_prepare_sample(&header, data, event, regs);
0675 if (perf_output_begin(&handle, data, event, header.size))
0676 goto out;
0677
0678
0679 data->tid_entry.pid = cpumsf_pid_type(event, pid, PIDTYPE_TGID);
0680 data->tid_entry.tid = cpumsf_pid_type(event, pid, PIDTYPE_PID);
0681
0682 perf_output_sample(&handle, &header, data, event);
0683 perf_output_end(&handle);
0684 out:
0685 rcu_read_unlock();
0686 }
0687
0688 static unsigned long getrate(bool freq, unsigned long sample,
0689 struct hws_qsi_info_block *si)
0690 {
0691 unsigned long rate;
0692
0693 if (freq) {
0694 rate = freq_to_sample_rate(si, sample);
0695 rate = hw_limit_rate(si, rate);
0696 } else {
0697
0698
0699
0700
0701 rate = hw_limit_rate(si, sample);
0702
0703
0704
0705
0706
0707
0708
0709 if (sample_rate_to_freq(si, rate) >
0710 sysctl_perf_event_sample_rate) {
0711 debug_sprintf_event(sfdbg, 1, "%s: "
0712 "Sampling rate exceeds maximum "
0713 "perf sample rate\n", __func__);
0714 rate = 0;
0715 }
0716 }
0717 return rate;
0718 }
0719
0720
0721
0722
0723
0724
0725
0726
0727
0728
0729
0730
0731
0732
0733
0734
0735
0736
0737 static int __hw_perf_event_init_rate(struct perf_event *event,
0738 struct hws_qsi_info_block *si)
0739 {
0740 struct perf_event_attr *attr = &event->attr;
0741 struct hw_perf_event *hwc = &event->hw;
0742 unsigned long rate;
0743
0744 if (attr->freq) {
0745 if (!attr->sample_freq)
0746 return -EINVAL;
0747 rate = getrate(attr->freq, attr->sample_freq, si);
0748 attr->freq = 0;
0749 SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_FREQ_MODE;
0750 } else {
0751 rate = getrate(attr->freq, attr->sample_period, si);
0752 if (!rate)
0753 return -EINVAL;
0754 }
0755 attr->sample_period = rate;
0756 SAMPL_RATE(hwc) = rate;
0757 hw_init_period(hwc, SAMPL_RATE(hwc));
0758 debug_sprintf_event(sfdbg, 4, "%s: cpu %d period %#llx freq %d,%#lx\n",
0759 __func__, event->cpu, event->attr.sample_period,
0760 event->attr.freq, SAMPLE_FREQ_MODE(hwc));
0761 return 0;
0762 }
0763
0764 static int __hw_perf_event_init(struct perf_event *event)
0765 {
0766 struct cpu_hw_sf *cpuhw;
0767 struct hws_qsi_info_block si;
0768 struct perf_event_attr *attr = &event->attr;
0769 struct hw_perf_event *hwc = &event->hw;
0770 int cpu, err;
0771
0772
0773 err = 0;
0774 if (!atomic_inc_not_zero(&num_events)) {
0775 mutex_lock(&pmc_reserve_mutex);
0776 if (atomic_read(&num_events) == 0 && reserve_pmc_hardware())
0777 err = -EBUSY;
0778 else
0779 atomic_inc(&num_events);
0780 mutex_unlock(&pmc_reserve_mutex);
0781 }
0782 event->destroy = hw_perf_event_destroy;
0783
0784 if (err)
0785 goto out;
0786
0787
0788
0789
0790
0791
0792
0793
0794
0795
0796 memset(&si, 0, sizeof(si));
0797 cpuhw = NULL;
0798 if (event->cpu == -1)
0799 qsi(&si);
0800 else {
0801
0802
0803
0804 cpuhw = &per_cpu(cpu_hw_sf, event->cpu);
0805 si = cpuhw->qsi;
0806 }
0807
0808
0809
0810
0811
0812 if (!si.as) {
0813 err = -ENOENT;
0814 goto out;
0815 }
0816
0817 if (si.ribm & CPU_MF_SF_RIBM_NOTAV) {
0818 pr_warn("CPU Measurement Facility sampling is temporarily not available\n");
0819 err = -EBUSY;
0820 goto out;
0821 }
0822
0823
0824 SAMPL_FLAGS(hwc) = PERF_CPUM_SF_BASIC_MODE;
0825
0826
0827
0828
0829 if (attr->config == PERF_EVENT_CPUM_SF_DIAG) {
0830 if (!si.ad) {
0831 err = -EPERM;
0832 goto out;
0833 }
0834 SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_DIAG_MODE;
0835 }
0836
0837
0838 if (attr->config1 & PERF_CPUM_SF_FULL_BLOCKS)
0839 SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_FULL_BLOCKS;
0840
0841 err = __hw_perf_event_init_rate(event, &si);
0842 if (err)
0843 goto out;
0844
0845
0846 hwc->extra_reg.reg = REG_OVERFLOW;
0847 OVERFLOW_REG(hwc) = 0;
0848
0849
0850 if (attr->config == PERF_EVENT_CPUM_SF_DIAG)
0851 return 0;
0852
0853
0854
0855
0856
0857
0858 if (cpuhw)
0859
0860 err = allocate_buffers(cpuhw, hwc);
0861 else {
0862
0863
0864
0865 for_each_online_cpu(cpu) {
0866 cpuhw = &per_cpu(cpu_hw_sf, cpu);
0867 err = allocate_buffers(cpuhw, hwc);
0868 if (err)
0869 break;
0870 }
0871 }
0872
0873
0874
0875
0876
0877 if (event->attr.sample_type & PERF_SAMPLE_TID)
0878 if (is_default_overflow_handler(event))
0879 event->overflow_handler = cpumsf_output_event_pid;
0880 out:
0881 return err;
0882 }
0883
0884 static bool is_callchain_event(struct perf_event *event)
0885 {
0886 u64 sample_type = event->attr.sample_type;
0887
0888 return sample_type & (PERF_SAMPLE_CALLCHAIN | PERF_SAMPLE_REGS_USER |
0889 PERF_SAMPLE_STACK_USER);
0890 }
0891
0892 static int cpumsf_pmu_event_init(struct perf_event *event)
0893 {
0894 int err;
0895
0896
0897
0898 if (has_branch_stack(event) || is_callchain_event(event))
0899 return -EOPNOTSUPP;
0900
0901 switch (event->attr.type) {
0902 case PERF_TYPE_RAW:
0903 if ((event->attr.config != PERF_EVENT_CPUM_SF) &&
0904 (event->attr.config != PERF_EVENT_CPUM_SF_DIAG))
0905 return -ENOENT;
0906 break;
0907 case PERF_TYPE_HARDWARE:
0908
0909
0910
0911
0912
0913 if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES)
0914 return -ENOENT;
0915 if (!is_sampling_event(event))
0916 return -ENOENT;
0917 break;
0918 default:
0919 return -ENOENT;
0920 }
0921
0922
0923 if (event->cpu >= 0 && !cpu_online(event->cpu))
0924 return -ENODEV;
0925
0926
0927
0928
0929 if (event->attr.exclude_hv)
0930 event->attr.exclude_hv = 0;
0931 if (event->attr.exclude_idle)
0932 event->attr.exclude_idle = 0;
0933
0934 err = __hw_perf_event_init(event);
0935 if (unlikely(err))
0936 if (event->destroy)
0937 event->destroy(event);
0938 return err;
0939 }
0940
0941 static void cpumsf_pmu_enable(struct pmu *pmu)
0942 {
0943 struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
0944 struct hw_perf_event *hwc;
0945 int err;
0946
0947 if (cpuhw->flags & PMU_F_ENABLED)
0948 return;
0949
0950 if (cpuhw->flags & PMU_F_ERR_MASK)
0951 return;
0952
0953
0954
0955
0956
0957
0958
0959
0960
0961
0962
0963
0964 if (cpuhw->event) {
0965 hwc = &cpuhw->event->hw;
0966 if (!(SAMPL_DIAG_MODE(hwc))) {
0967
0968
0969
0970
0971 sfb_account_overflows(cpuhw, hwc);
0972 extend_sampling_buffer(&cpuhw->sfb, hwc);
0973 }
0974
0975 cpuhw->lsctl.interval = SAMPL_RATE(&cpuhw->event->hw);
0976 }
0977
0978
0979 cpuhw->flags |= PMU_F_ENABLED;
0980 barrier();
0981
0982 err = lsctl(&cpuhw->lsctl);
0983 if (err) {
0984 cpuhw->flags &= ~PMU_F_ENABLED;
0985 pr_err("Loading sampling controls failed: op %i err %i\n",
0986 1, err);
0987 return;
0988 }
0989
0990
0991 lpp(&S390_lowcore.lpp);
0992
0993 debug_sprintf_event(sfdbg, 6, "%s: es %i cs %i ed %i cd %i "
0994 "interval %#lx tear %#lx dear %#lx\n", __func__,
0995 cpuhw->lsctl.es, cpuhw->lsctl.cs, cpuhw->lsctl.ed,
0996 cpuhw->lsctl.cd, cpuhw->lsctl.interval,
0997 cpuhw->lsctl.tear, cpuhw->lsctl.dear);
0998 }
0999
1000 static void cpumsf_pmu_disable(struct pmu *pmu)
1001 {
1002 struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
1003 struct hws_lsctl_request_block inactive;
1004 struct hws_qsi_info_block si;
1005 int err;
1006
1007 if (!(cpuhw->flags & PMU_F_ENABLED))
1008 return;
1009
1010 if (cpuhw->flags & PMU_F_ERR_MASK)
1011 return;
1012
1013
1014 inactive = cpuhw->lsctl;
1015 inactive.cs = 0;
1016 inactive.cd = 0;
1017
1018 err = lsctl(&inactive);
1019 if (err) {
1020 pr_err("Loading sampling controls failed: op %i err %i\n",
1021 2, err);
1022 return;
1023 }
1024
1025
1026 err = qsi(&si);
1027 if (!err) {
1028
1029
1030
1031
1032
1033 if (si.es) {
1034 cpuhw->lsctl.tear = si.tear;
1035 cpuhw->lsctl.dear = si.dear;
1036 }
1037 } else
1038 debug_sprintf_event(sfdbg, 3, "%s: qsi() failed with err %i\n",
1039 __func__, err);
1040
1041 cpuhw->flags &= ~PMU_F_ENABLED;
1042 }
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053 static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs,
1054 struct perf_sf_sde_regs *sde_regs)
1055 {
1056 if (event->attr.exclude_user && user_mode(regs))
1057 return 1;
1058 if (event->attr.exclude_kernel && !user_mode(regs))
1059 return 1;
1060 if (event->attr.exclude_guest && sde_regs->in_guest)
1061 return 1;
1062 if (event->attr.exclude_host && !sde_regs->in_guest)
1063 return 1;
1064 return 0;
1065 }
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078 static int perf_push_sample(struct perf_event *event,
1079 struct hws_basic_entry *basic)
1080 {
1081 int overflow;
1082 struct pt_regs regs;
1083 struct perf_sf_sde_regs *sde_regs;
1084 struct perf_sample_data data;
1085
1086
1087 perf_sample_data_init(&data, 0, event->hw.last_period);
1088
1089
1090
1091
1092
1093
1094 memset(®s, 0, sizeof(regs));
1095 regs.int_code = 0x1407;
1096 regs.int_parm = CPU_MF_INT_SF_PRA;
1097 sde_regs = (struct perf_sf_sde_regs *) ®s.int_parm_long;
1098
1099 psw_bits(regs.psw).ia = basic->ia;
1100 psw_bits(regs.psw).dat = basic->T;
1101 psw_bits(regs.psw).wait = basic->W;
1102 psw_bits(regs.psw).pstate = basic->P;
1103 psw_bits(regs.psw).as = basic->AS;
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116 switch (basic->CL) {
1117 case 1:
1118 sde_regs->in_guest = 0;
1119 break;
1120 case 2:
1121 sde_regs->in_guest = 1;
1122 break;
1123 default:
1124 if (basic->gpp || basic->prim_asn != 0xffff)
1125 sde_regs->in_guest = 1;
1126 break;
1127 }
1128
1129
1130
1131
1132
1133 data.tid_entry.pid = basic->hpp & LPP_PID_MASK;
1134
1135 overflow = 0;
1136 if (perf_exclude_event(event, ®s, sde_regs))
1137 goto out;
1138 if (perf_event_overflow(event, &data, ®s)) {
1139 overflow = 1;
1140 event->pmu->stop(event, 0);
1141 }
1142 perf_event_update_userpage(event);
1143 out:
1144 return overflow;
1145 }
1146
1147 static void perf_event_count_update(struct perf_event *event, u64 count)
1148 {
1149 local64_add(count, &event->count);
1150 }
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172 static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
1173 unsigned long long *overflow)
1174 {
1175 struct hws_trailer_entry *te;
1176 struct hws_basic_entry *sample;
1177
1178 te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt);
1179 sample = (struct hws_basic_entry *) *sdbt;
1180 while ((unsigned long *) sample < (unsigned long *) te) {
1181
1182 if (!sample->def || sample->LS)
1183 break;
1184
1185
1186 perf_event_count_update(event, SAMPL_RATE(&event->hw));
1187
1188
1189 if (sample->def == 0x0001) {
1190
1191
1192
1193
1194 if (!*overflow) {
1195
1196 if (sample->I == 0 && sample->W == 0) {
1197
1198 *overflow = perf_push_sample(event,
1199 sample);
1200 }
1201 } else
1202
1203 *overflow += 1;
1204 } else {
1205 debug_sprintf_event(sfdbg, 4,
1206 "%s: Found unknown"
1207 " sampling data entry: te->f %i"
1208 " basic.def %#4x (%p)\n", __func__,
1209 te->f, sample->def, sample);
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220 if (!te->f)
1221 break;
1222 }
1223
1224
1225 sample->def = 0;
1226 sample++;
1227 }
1228 }
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244 static void hw_perf_event_update(struct perf_event *event, int flush_all)
1245 {
1246 struct hw_perf_event *hwc = &event->hw;
1247 struct hws_trailer_entry *te;
1248 unsigned long *sdbt;
1249 unsigned long long event_overflow, sampl_overflow, num_sdb, te_flags;
1250 int done;
1251
1252
1253
1254
1255
1256 if (SAMPL_DIAG_MODE(&event->hw))
1257 return;
1258
1259 if (flush_all && SDB_FULL_BLOCKS(hwc))
1260 flush_all = 0;
1261
1262 sdbt = (unsigned long *) TEAR_REG(hwc);
1263 done = event_overflow = sampl_overflow = num_sdb = 0;
1264 while (!done) {
1265
1266 te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt);
1267
1268
1269 if (!te->f) {
1270 done = 1;
1271 if (!flush_all)
1272 break;
1273 }
1274
1275
1276 if (te->overflow)
1277
1278
1279
1280
1281 sampl_overflow += te->overflow;
1282
1283
1284 debug_sprintf_event(sfdbg, 6, "%s: sdbt %#lx "
1285 "overflow %llu timestamp %#llx\n",
1286 __func__, (unsigned long)sdbt, te->overflow,
1287 (te->f) ? trailer_timestamp(te) : 0ULL);
1288
1289
1290
1291
1292
1293 hw_collect_samples(event, sdbt, &event_overflow);
1294 num_sdb++;
1295
1296
1297 do {
1298 te_flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK;
1299 te_flags |= SDB_TE_ALERT_REQ_MASK;
1300 } while (!cmpxchg_double(&te->flags, &te->overflow,
1301 te->flags, te->overflow,
1302 te_flags, 0ULL));
1303
1304
1305 sdbt++;
1306 if (is_link_entry(sdbt))
1307 sdbt = get_next_sdbt(sdbt);
1308
1309
1310 TEAR_REG(hwc) = (unsigned long) sdbt;
1311
1312
1313
1314
1315 if (flush_all && done)
1316 break;
1317 }
1318
1319
1320 if (sampl_overflow)
1321 OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) +
1322 sampl_overflow, 1 + num_sdb);
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332 if (event_overflow) {
1333 SAMPL_RATE(hwc) += DIV_ROUND_UP(SAMPL_RATE(hwc), 10);
1334 debug_sprintf_event(sfdbg, 1, "%s: rate adjustment %ld\n",
1335 __func__,
1336 DIV_ROUND_UP(SAMPL_RATE(hwc), 10));
1337 }
1338
1339 if (sampl_overflow || event_overflow)
1340 debug_sprintf_event(sfdbg, 4, "%s: "
1341 "overflows: sample %llu event %llu"
1342 " total %llu num_sdb %llu\n",
1343 __func__, sampl_overflow, event_overflow,
1344 OVERFLOW_REG(hwc), num_sdb);
1345 }
1346
1347 #define AUX_SDB_INDEX(aux, i) ((i) % aux->sfb.num_sdb)
1348 #define AUX_SDB_NUM(aux, start, end) (end >= start ? end - start + 1 : 0)
1349 #define AUX_SDB_NUM_ALERT(aux) AUX_SDB_NUM(aux, aux->head, aux->alert_mark)
1350 #define AUX_SDB_NUM_EMPTY(aux) AUX_SDB_NUM(aux, aux->head, aux->empty_mark)
1351
1352
1353
1354
1355 static struct hws_trailer_entry *aux_sdb_trailer(struct aux_buffer *aux,
1356 unsigned long index)
1357 {
1358 unsigned long sdb;
1359
1360 index = AUX_SDB_INDEX(aux, index);
1361 sdb = aux->sdb_index[index];
1362 return (struct hws_trailer_entry *)trailer_entry_ptr(sdb);
1363 }
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374 static void aux_output_end(struct perf_output_handle *handle)
1375 {
1376 unsigned long i, range_scan, idx;
1377 struct aux_buffer *aux;
1378 struct hws_trailer_entry *te;
1379
1380 aux = perf_get_aux(handle);
1381 if (!aux)
1382 return;
1383
1384 range_scan = AUX_SDB_NUM_ALERT(aux);
1385 for (i = 0, idx = aux->head; i < range_scan; i++, idx++) {
1386 te = aux_sdb_trailer(aux, idx);
1387 if (!(te->flags & SDB_TE_BUFFER_FULL_MASK))
1388 break;
1389 }
1390
1391 perf_aux_output_end(handle, i << PAGE_SHIFT);
1392
1393
1394 te = aux_sdb_trailer(aux, aux->alert_mark);
1395 te->flags &= ~SDB_TE_ALERT_REQ_MASK;
1396
1397 debug_sprintf_event(sfdbg, 6, "%s: SDBs %ld range %ld head %ld\n",
1398 __func__, i, range_scan, aux->head);
1399 }
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410 static int aux_output_begin(struct perf_output_handle *handle,
1411 struct aux_buffer *aux,
1412 struct cpu_hw_sf *cpuhw)
1413 {
1414 unsigned long range;
1415 unsigned long i, range_scan, idx;
1416 unsigned long head, base, offset;
1417 struct hws_trailer_entry *te;
1418
1419 if (WARN_ON_ONCE(handle->head & ~PAGE_MASK))
1420 return -EINVAL;
1421
1422 aux->head = handle->head >> PAGE_SHIFT;
1423 range = (handle->size + 1) >> PAGE_SHIFT;
1424 if (range <= 1)
1425 return -ENOMEM;
1426
1427
1428
1429
1430
1431 debug_sprintf_event(sfdbg, 6,
1432 "%s: range %ld head %ld alert %ld empty %ld\n",
1433 __func__, range, aux->head, aux->alert_mark,
1434 aux->empty_mark);
1435 if (range > AUX_SDB_NUM_EMPTY(aux)) {
1436 range_scan = range - AUX_SDB_NUM_EMPTY(aux);
1437 idx = aux->empty_mark + 1;
1438 for (i = 0; i < range_scan; i++, idx++) {
1439 te = aux_sdb_trailer(aux, idx);
1440 te->flags &= ~(SDB_TE_BUFFER_FULL_MASK |
1441 SDB_TE_ALERT_REQ_MASK);
1442 te->overflow = 0;
1443 }
1444
1445 aux->empty_mark = aux->head + range - 1;
1446 }
1447
1448
1449 aux->alert_mark = aux->head + range/2 - 1;
1450 te = aux_sdb_trailer(aux, aux->alert_mark);
1451 te->flags = te->flags | SDB_TE_ALERT_REQ_MASK;
1452
1453
1454 head = AUX_SDB_INDEX(aux, aux->head);
1455 base = aux->sdbt_index[head / CPUM_SF_SDB_PER_TABLE];
1456 offset = head % CPUM_SF_SDB_PER_TABLE;
1457 cpuhw->lsctl.tear = base + offset * sizeof(unsigned long);
1458 cpuhw->lsctl.dear = aux->sdb_index[head];
1459
1460 debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld empty %ld "
1461 "index %ld tear %#lx dear %#lx\n", __func__,
1462 aux->head, aux->alert_mark, aux->empty_mark,
1463 head / CPUM_SF_SDB_PER_TABLE,
1464 cpuhw->lsctl.tear, cpuhw->lsctl.dear);
1465
1466 return 0;
1467 }
1468
1469
1470
1471
1472
1473
1474
1475 static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index,
1476 unsigned long long *overflow)
1477 {
1478 unsigned long long orig_overflow, orig_flags, new_flags;
1479 struct hws_trailer_entry *te;
1480
1481 te = aux_sdb_trailer(aux, alert_index);
1482 do {
1483 orig_flags = te->flags;
1484 *overflow = orig_overflow = te->overflow;
1485 if (orig_flags & SDB_TE_BUFFER_FULL_MASK) {
1486
1487
1488
1489
1490
1491 return false;
1492 }
1493 new_flags = orig_flags | SDB_TE_ALERT_REQ_MASK;
1494 } while (!cmpxchg_double(&te->flags, &te->overflow,
1495 orig_flags, orig_overflow,
1496 new_flags, 0ULL));
1497 return true;
1498 }
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522 static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range,
1523 unsigned long long *overflow)
1524 {
1525 unsigned long long orig_overflow, orig_flags, new_flags;
1526 unsigned long i, range_scan, idx, idx_old;
1527 struct hws_trailer_entry *te;
1528
1529 debug_sprintf_event(sfdbg, 6, "%s: range %ld head %ld alert %ld "
1530 "empty %ld\n", __func__, range, aux->head,
1531 aux->alert_mark, aux->empty_mark);
1532 if (range <= AUX_SDB_NUM_EMPTY(aux))
1533
1534
1535
1536
1537
1538 return aux_set_alert(aux, aux->alert_mark, overflow);
1539
1540 if (aux->alert_mark <= aux->empty_mark)
1541
1542
1543
1544
1545 if (!aux_set_alert(aux, aux->alert_mark, overflow))
1546 return false;
1547
1548
1549
1550
1551
1552
1553 range_scan = range - AUX_SDB_NUM_EMPTY(aux);
1554 idx_old = idx = aux->empty_mark + 1;
1555 for (i = 0; i < range_scan; i++, idx++) {
1556 te = aux_sdb_trailer(aux, idx);
1557 do {
1558 orig_flags = te->flags;
1559 orig_overflow = te->overflow;
1560 new_flags = orig_flags & ~SDB_TE_BUFFER_FULL_MASK;
1561 if (idx == aux->alert_mark)
1562 new_flags |= SDB_TE_ALERT_REQ_MASK;
1563 else
1564 new_flags &= ~SDB_TE_ALERT_REQ_MASK;
1565 } while (!cmpxchg_double(&te->flags, &te->overflow,
1566 orig_flags, orig_overflow,
1567 new_flags, 0ULL));
1568 *overflow += orig_overflow;
1569 }
1570
1571
1572 aux->empty_mark = aux->head + range - 1;
1573
1574 debug_sprintf_event(sfdbg, 6, "%s: range_scan %ld idx %ld..%ld "
1575 "empty %ld\n", __func__, range_scan, idx_old,
1576 idx - 1, aux->empty_mark);
1577 return true;
1578 }
1579
1580
1581
1582
1583 static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
1584 {
1585 struct aux_buffer *aux;
1586 int done = 0;
1587 unsigned long range = 0, size;
1588 unsigned long long overflow = 0;
1589 struct perf_output_handle *handle = &cpuhw->handle;
1590 unsigned long num_sdb;
1591
1592 aux = perf_get_aux(handle);
1593 if (WARN_ON_ONCE(!aux))
1594 return;
1595
1596
1597 size = AUX_SDB_NUM_ALERT(aux) << PAGE_SHIFT;
1598 debug_sprintf_event(sfdbg, 6, "%s: #alert %ld\n", __func__,
1599 size >> PAGE_SHIFT);
1600 perf_aux_output_end(handle, size);
1601
1602 num_sdb = aux->sfb.num_sdb;
1603 while (!done) {
1604
1605 aux = perf_aux_output_begin(handle, cpuhw->event);
1606 if (handle->size == 0) {
1607 pr_err("The AUX buffer with %lu pages for the "
1608 "diagnostic-sampling mode is full\n",
1609 num_sdb);
1610 debug_sprintf_event(sfdbg, 1,
1611 "%s: AUX buffer used up\n",
1612 __func__);
1613 break;
1614 }
1615 if (WARN_ON_ONCE(!aux))
1616 return;
1617
1618
1619 aux->head = handle->head >> PAGE_SHIFT;
1620 range = (handle->size + 1) >> PAGE_SHIFT;
1621 if (range == 1)
1622 aux->alert_mark = aux->head;
1623 else
1624 aux->alert_mark = aux->head + range/2 - 1;
1625
1626 if (aux_reset_buffer(aux, range, &overflow)) {
1627 if (!overflow) {
1628 done = 1;
1629 break;
1630 }
1631 size = range << PAGE_SHIFT;
1632 perf_aux_output_end(&cpuhw->handle, size);
1633 pr_err("Sample data caused the AUX buffer with %lu "
1634 "pages to overflow\n", aux->sfb.num_sdb);
1635 debug_sprintf_event(sfdbg, 1, "%s: head %ld range %ld "
1636 "overflow %lld\n", __func__,
1637 aux->head, range, overflow);
1638 } else {
1639 size = AUX_SDB_NUM_ALERT(aux) << PAGE_SHIFT;
1640 perf_aux_output_end(&cpuhw->handle, size);
1641 debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld "
1642 "already full, try another\n",
1643 __func__,
1644 aux->head, aux->alert_mark);
1645 }
1646 }
1647
1648 if (done)
1649 debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld "
1650 "empty %ld\n", __func__, aux->head,
1651 aux->alert_mark, aux->empty_mark);
1652 }
1653
1654
1655
1656
1657 static void aux_buffer_free(void *data)
1658 {
1659 struct aux_buffer *aux = data;
1660 unsigned long i, num_sdbt;
1661
1662 if (!aux)
1663 return;
1664
1665
1666 num_sdbt = aux->sfb.num_sdbt;
1667 for (i = 0; i < num_sdbt; i++)
1668 free_page(aux->sdbt_index[i]);
1669
1670 kfree(aux->sdbt_index);
1671 kfree(aux->sdb_index);
1672 kfree(aux);
1673
1674 debug_sprintf_event(sfdbg, 4, "%s: SDBTs %lu\n", __func__, num_sdbt);
1675 }
1676
1677 static void aux_sdb_init(unsigned long sdb)
1678 {
1679 struct hws_trailer_entry *te;
1680
1681 te = (struct hws_trailer_entry *)trailer_entry_ptr(sdb);
1682
1683
1684 te->clock_base = 1;
1685 te->progusage2 = tod_clock_base.tod;
1686 }
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702 static void *aux_buffer_setup(struct perf_event *event, void **pages,
1703 int nr_pages, bool snapshot)
1704 {
1705 struct sf_buffer *sfb;
1706 struct aux_buffer *aux;
1707 unsigned long *new, *tail;
1708 int i, n_sdbt;
1709
1710 if (!nr_pages || !pages)
1711 return NULL;
1712
1713 if (nr_pages > CPUM_SF_MAX_SDB * CPUM_SF_SDB_DIAG_FACTOR) {
1714 pr_err("AUX buffer size (%i pages) is larger than the "
1715 "maximum sampling buffer limit\n",
1716 nr_pages);
1717 return NULL;
1718 } else if (nr_pages < CPUM_SF_MIN_SDB * CPUM_SF_SDB_DIAG_FACTOR) {
1719 pr_err("AUX buffer size (%i pages) is less than the "
1720 "minimum sampling buffer limit\n",
1721 nr_pages);
1722 return NULL;
1723 }
1724
1725
1726 aux = kzalloc(sizeof(struct aux_buffer), GFP_KERNEL);
1727 if (!aux)
1728 goto no_aux;
1729 sfb = &aux->sfb;
1730
1731
1732 n_sdbt = DIV_ROUND_UP(nr_pages, CPUM_SF_SDB_PER_TABLE);
1733 aux->sdbt_index = kmalloc_array(n_sdbt, sizeof(void *), GFP_KERNEL);
1734 if (!aux->sdbt_index)
1735 goto no_sdbt_index;
1736
1737
1738 aux->sdb_index = kmalloc_array(nr_pages, sizeof(void *), GFP_KERNEL);
1739 if (!aux->sdb_index)
1740 goto no_sdb_index;
1741
1742
1743 sfb->num_sdbt = 0;
1744 sfb->sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL);
1745 if (!sfb->sdbt)
1746 goto no_sdbt;
1747 aux->sdbt_index[sfb->num_sdbt++] = (unsigned long)sfb->sdbt;
1748 tail = sfb->tail = sfb->sdbt;
1749
1750
1751
1752
1753
1754 for (i = 0; i < nr_pages; i++, tail++) {
1755 if (require_table_link(tail)) {
1756 new = (unsigned long *) get_zeroed_page(GFP_KERNEL);
1757 if (!new)
1758 goto no_sdbt;
1759 aux->sdbt_index[sfb->num_sdbt++] = (unsigned long)new;
1760
1761 *tail = (unsigned long)(void *) new + 1;
1762 tail = new;
1763 }
1764
1765 *tail = (unsigned long)pages[i];
1766 aux->sdb_index[i] = (unsigned long)pages[i];
1767 aux_sdb_init((unsigned long)pages[i]);
1768 }
1769 sfb->num_sdb = nr_pages;
1770
1771
1772 *tail = (unsigned long) sfb->sdbt + 1;
1773 sfb->tail = tail;
1774
1775
1776
1777
1778
1779
1780 aux->empty_mark = sfb->num_sdb - 1;
1781
1782 debug_sprintf_event(sfdbg, 4, "%s: SDBTs %lu SDBs %lu\n", __func__,
1783 sfb->num_sdbt, sfb->num_sdb);
1784
1785 return aux;
1786
1787 no_sdbt:
1788
1789 for (i = 0; i < sfb->num_sdbt; i++)
1790 free_page(aux->sdbt_index[i]);
1791 kfree(aux->sdb_index);
1792 no_sdb_index:
1793 kfree(aux->sdbt_index);
1794 no_sdbt_index:
1795 kfree(aux);
1796 no_aux:
1797 return NULL;
1798 }
1799
1800 static void cpumsf_pmu_read(struct perf_event *event)
1801 {
1802
1803 }
1804
1805
1806
1807
1808
1809 static int cpumsf_pmu_check_period(struct perf_event *event, u64 value)
1810 {
1811 struct hws_qsi_info_block si;
1812 unsigned long rate;
1813 bool do_freq;
1814
1815 memset(&si, 0, sizeof(si));
1816 if (event->cpu == -1) {
1817 if (qsi(&si))
1818 return -ENODEV;
1819 } else {
1820
1821
1822
1823 struct cpu_hw_sf *cpuhw = &per_cpu(cpu_hw_sf, event->cpu);
1824
1825 si = cpuhw->qsi;
1826 }
1827
1828 do_freq = !!SAMPLE_FREQ_MODE(&event->hw);
1829 rate = getrate(do_freq, value, &si);
1830 if (!rate)
1831 return -EINVAL;
1832
1833 event->attr.sample_period = rate;
1834 SAMPL_RATE(&event->hw) = rate;
1835 hw_init_period(&event->hw, SAMPL_RATE(&event->hw));
1836 debug_sprintf_event(sfdbg, 4, "%s:"
1837 " cpu %d value %#llx period %#llx freq %d\n",
1838 __func__, event->cpu, value,
1839 event->attr.sample_period, do_freq);
1840 return 0;
1841 }
1842
1843
1844
1845
1846 static void cpumsf_pmu_start(struct perf_event *event, int flags)
1847 {
1848 struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
1849
1850 if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
1851 return;
1852
1853 if (flags & PERF_EF_RELOAD)
1854 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
1855
1856 perf_pmu_disable(event->pmu);
1857 event->hw.state = 0;
1858 cpuhw->lsctl.cs = 1;
1859 if (SAMPL_DIAG_MODE(&event->hw))
1860 cpuhw->lsctl.cd = 1;
1861 perf_pmu_enable(event->pmu);
1862 }
1863
1864
1865
1866
1867 static void cpumsf_pmu_stop(struct perf_event *event, int flags)
1868 {
1869 struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
1870
1871 if (event->hw.state & PERF_HES_STOPPED)
1872 return;
1873
1874 perf_pmu_disable(event->pmu);
1875 cpuhw->lsctl.cs = 0;
1876 cpuhw->lsctl.cd = 0;
1877 event->hw.state |= PERF_HES_STOPPED;
1878
1879 if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) {
1880 hw_perf_event_update(event, 1);
1881 event->hw.state |= PERF_HES_UPTODATE;
1882 }
1883 perf_pmu_enable(event->pmu);
1884 }
1885
1886 static int cpumsf_pmu_add(struct perf_event *event, int flags)
1887 {
1888 struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
1889 struct aux_buffer *aux;
1890 int err;
1891
1892 if (cpuhw->flags & PMU_F_IN_USE)
1893 return -EAGAIN;
1894
1895 if (!SAMPL_DIAG_MODE(&event->hw) && !cpuhw->sfb.sdbt)
1896 return -EINVAL;
1897
1898 err = 0;
1899 perf_pmu_disable(event->pmu);
1900
1901 event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
1902
1903
1904
1905
1906
1907
1908 cpuhw->lsctl.s = 0;
1909 cpuhw->lsctl.h = 1;
1910 cpuhw->lsctl.interval = SAMPL_RATE(&event->hw);
1911 if (!SAMPL_DIAG_MODE(&event->hw)) {
1912 cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt;
1913 cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt;
1914 TEAR_REG(&event->hw) = (unsigned long) cpuhw->sfb.sdbt;
1915 }
1916
1917
1918
1919 if (WARN_ON_ONCE(cpuhw->lsctl.es == 1 || cpuhw->lsctl.ed == 1)) {
1920 err = -EAGAIN;
1921 goto out;
1922 }
1923 if (SAMPL_DIAG_MODE(&event->hw)) {
1924 aux = perf_aux_output_begin(&cpuhw->handle, event);
1925 if (!aux) {
1926 err = -EINVAL;
1927 goto out;
1928 }
1929 err = aux_output_begin(&cpuhw->handle, aux, cpuhw);
1930 if (err)
1931 goto out;
1932 cpuhw->lsctl.ed = 1;
1933 }
1934 cpuhw->lsctl.es = 1;
1935
1936
1937 cpuhw->event = event;
1938 cpuhw->flags |= PMU_F_IN_USE;
1939
1940 if (flags & PERF_EF_START)
1941 cpumsf_pmu_start(event, PERF_EF_RELOAD);
1942 out:
1943 perf_event_update_userpage(event);
1944 perf_pmu_enable(event->pmu);
1945 return err;
1946 }
1947
1948 static void cpumsf_pmu_del(struct perf_event *event, int flags)
1949 {
1950 struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
1951
1952 perf_pmu_disable(event->pmu);
1953 cpumsf_pmu_stop(event, PERF_EF_UPDATE);
1954
1955 cpuhw->lsctl.es = 0;
1956 cpuhw->lsctl.ed = 0;
1957 cpuhw->flags &= ~PMU_F_IN_USE;
1958 cpuhw->event = NULL;
1959
1960 if (SAMPL_DIAG_MODE(&event->hw))
1961 aux_output_end(&cpuhw->handle);
1962 perf_event_update_userpage(event);
1963 perf_pmu_enable(event->pmu);
1964 }
1965
1966 CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC, PERF_EVENT_CPUM_SF);
1967 CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC_DIAG, PERF_EVENT_CPUM_SF_DIAG);
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985 enum {
1986 SF_CYCLES_BASIC_ATTR_IDX = 0,
1987 SF_CYCLES_BASIC_DIAG_ATTR_IDX,
1988 SF_CYCLES_ATTR_MAX
1989 };
1990
1991 static struct attribute *cpumsf_pmu_events_attr[SF_CYCLES_ATTR_MAX + 1] = {
1992 [SF_CYCLES_BASIC_ATTR_IDX] = CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC)
1993 };
1994
1995 PMU_FORMAT_ATTR(event, "config:0-63");
1996
1997 static struct attribute *cpumsf_pmu_format_attr[] = {
1998 &format_attr_event.attr,
1999 NULL,
2000 };
2001
2002 static struct attribute_group cpumsf_pmu_events_group = {
2003 .name = "events",
2004 .attrs = cpumsf_pmu_events_attr,
2005 };
2006
2007 static struct attribute_group cpumsf_pmu_format_group = {
2008 .name = "format",
2009 .attrs = cpumsf_pmu_format_attr,
2010 };
2011
2012 static const struct attribute_group *cpumsf_pmu_attr_groups[] = {
2013 &cpumsf_pmu_events_group,
2014 &cpumsf_pmu_format_group,
2015 NULL,
2016 };
2017
2018 static struct pmu cpumf_sampling = {
2019 .pmu_enable = cpumsf_pmu_enable,
2020 .pmu_disable = cpumsf_pmu_disable,
2021
2022 .event_init = cpumsf_pmu_event_init,
2023 .add = cpumsf_pmu_add,
2024 .del = cpumsf_pmu_del,
2025
2026 .start = cpumsf_pmu_start,
2027 .stop = cpumsf_pmu_stop,
2028 .read = cpumsf_pmu_read,
2029
2030 .attr_groups = cpumsf_pmu_attr_groups,
2031
2032 .setup_aux = aux_buffer_setup,
2033 .free_aux = aux_buffer_free,
2034
2035 .check_period = cpumsf_pmu_check_period,
2036 };
2037
2038 static void cpumf_measurement_alert(struct ext_code ext_code,
2039 unsigned int alert, unsigned long unused)
2040 {
2041 struct cpu_hw_sf *cpuhw;
2042
2043 if (!(alert & CPU_MF_INT_SF_MASK))
2044 return;
2045 inc_irq_stat(IRQEXT_CMS);
2046 cpuhw = this_cpu_ptr(&cpu_hw_sf);
2047
2048
2049
2050 if (!(cpuhw->flags & PMU_F_RESERVED))
2051 return;
2052
2053
2054
2055
2056
2057 if (alert & CPU_MF_INT_SF_PRA) {
2058 if (cpuhw->flags & PMU_F_IN_USE)
2059 if (SAMPL_DIAG_MODE(&cpuhw->event->hw))
2060 hw_collect_aux(cpuhw);
2061 else
2062 hw_perf_event_update(cpuhw->event, 0);
2063 else
2064 WARN_ON_ONCE(!(cpuhw->flags & PMU_F_IN_USE));
2065 }
2066
2067
2068 if (alert != CPU_MF_INT_SF_PRA)
2069 debug_sprintf_event(sfdbg, 6, "%s: alert %#x\n", __func__,
2070 alert);
2071
2072
2073 if (alert & CPU_MF_INT_SF_SACA)
2074 qsi(&cpuhw->qsi);
2075
2076
2077 if (alert & CPU_MF_INT_SF_LSDA) {
2078 pr_err("Sample data was lost\n");
2079 cpuhw->flags |= PMU_F_ERR_LSDA;
2080 sf_disable();
2081 }
2082
2083
2084 if (alert & (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE)) {
2085 pr_err("A sampling buffer entry is incorrect (alert=0x%x)\n",
2086 alert);
2087 cpuhw->flags |= PMU_F_ERR_IBE;
2088 sf_disable();
2089 }
2090 }
2091
2092 static int cpusf_pmu_setup(unsigned int cpu, int flags)
2093 {
2094
2095
2096
2097 if (!atomic_read(&num_events))
2098 return 0;
2099
2100 local_irq_disable();
2101 setup_pmc_cpu(&flags);
2102 local_irq_enable();
2103 return 0;
2104 }
2105
2106 static int s390_pmu_sf_online_cpu(unsigned int cpu)
2107 {
2108 return cpusf_pmu_setup(cpu, PMC_INIT);
2109 }
2110
2111 static int s390_pmu_sf_offline_cpu(unsigned int cpu)
2112 {
2113 return cpusf_pmu_setup(cpu, PMC_RELEASE);
2114 }
2115
2116 static int param_get_sfb_size(char *buffer, const struct kernel_param *kp)
2117 {
2118 if (!cpum_sf_avail())
2119 return -ENODEV;
2120 return sprintf(buffer, "%lu,%lu", CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
2121 }
2122
2123 static int param_set_sfb_size(const char *val, const struct kernel_param *kp)
2124 {
2125 int rc;
2126 unsigned long min, max;
2127
2128 if (!cpum_sf_avail())
2129 return -ENODEV;
2130 if (!val || !strlen(val))
2131 return -EINVAL;
2132
2133
2134 min = CPUM_SF_MIN_SDB;
2135 max = CPUM_SF_MAX_SDB;
2136 if (strchr(val, ','))
2137 rc = (sscanf(val, "%lu,%lu", &min, &max) == 2) ? 0 : -EINVAL;
2138 else
2139 rc = kstrtoul(val, 10, &max);
2140
2141 if (min < 2 || min >= max || max > get_num_physpages())
2142 rc = -EINVAL;
2143 if (rc)
2144 return rc;
2145
2146 sfb_set_limits(min, max);
2147 pr_info("The sampling buffer limits have changed to: "
2148 "min %lu max %lu (diag %lu)\n",
2149 CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB, CPUM_SF_SDB_DIAG_FACTOR);
2150 return 0;
2151 }
2152
2153 #define param_check_sfb_size(name, p) __param_check(name, p, void)
2154 static const struct kernel_param_ops param_ops_sfb_size = {
2155 .set = param_set_sfb_size,
2156 .get = param_get_sfb_size,
2157 };
2158
2159 #define RS_INIT_FAILURE_QSI 0x0001
2160 #define RS_INIT_FAILURE_BSDES 0x0002
2161 #define RS_INIT_FAILURE_ALRT 0x0003
2162 #define RS_INIT_FAILURE_PERF 0x0004
2163 static void __init pr_cpumsf_err(unsigned int reason)
2164 {
2165 pr_err("Sampling facility support for perf is not available: "
2166 "reason %#x\n", reason);
2167 }
2168
2169 static int __init init_cpum_sampling_pmu(void)
2170 {
2171 struct hws_qsi_info_block si;
2172 int err;
2173
2174 if (!cpum_sf_avail())
2175 return -ENODEV;
2176
2177 memset(&si, 0, sizeof(si));
2178 if (qsi(&si)) {
2179 pr_cpumsf_err(RS_INIT_FAILURE_QSI);
2180 return -ENODEV;
2181 }
2182
2183 if (!si.as && !si.ad)
2184 return -ENODEV;
2185
2186 if (si.bsdes != sizeof(struct hws_basic_entry)) {
2187 pr_cpumsf_err(RS_INIT_FAILURE_BSDES);
2188 return -EINVAL;
2189 }
2190
2191 if (si.ad) {
2192 sfb_set_limits(CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
2193
2194
2195
2196 cpumsf_pmu_events_attr[SF_CYCLES_BASIC_DIAG_ATTR_IDX] =
2197 CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG);
2198 }
2199
2200 sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80);
2201 if (!sfdbg) {
2202 pr_err("Registering for s390dbf failed\n");
2203 return -ENOMEM;
2204 }
2205 debug_register_view(sfdbg, &debug_sprintf_view);
2206
2207 err = register_external_irq(EXT_IRQ_MEASURE_ALERT,
2208 cpumf_measurement_alert);
2209 if (err) {
2210 pr_cpumsf_err(RS_INIT_FAILURE_ALRT);
2211 debug_unregister(sfdbg);
2212 goto out;
2213 }
2214
2215 err = perf_pmu_register(&cpumf_sampling, "cpum_sf", PERF_TYPE_RAW);
2216 if (err) {
2217 pr_cpumsf_err(RS_INIT_FAILURE_PERF);
2218 unregister_external_irq(EXT_IRQ_MEASURE_ALERT,
2219 cpumf_measurement_alert);
2220 debug_unregister(sfdbg);
2221 goto out;
2222 }
2223
2224 cpuhp_setup_state(CPUHP_AP_PERF_S390_SF_ONLINE, "perf/s390/sf:online",
2225 s390_pmu_sf_online_cpu, s390_pmu_sf_offline_cpu);
2226 out:
2227 return err;
2228 }
2229
2230 arch_initcall(init_cpum_sampling_pmu);
2231 core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0644);