0001
0002
0003
0004
0005
0006
0007 #include <linux/pm_runtime.h>
0008
0009 #include "gt/intel_engine.h"
0010 #include "gt/intel_engine_pm.h"
0011 #include "gt/intel_engine_regs.h"
0012 #include "gt/intel_engine_user.h"
0013 #include "gt/intel_gt_pm.h"
0014 #include "gt/intel_gt_regs.h"
0015 #include "gt/intel_rc6.h"
0016 #include "gt/intel_rps.h"
0017
0018 #include "i915_drv.h"
0019 #include "i915_pmu.h"
0020 #include "intel_pm.h"
0021
0022
0023 #define FREQUENCY 200
0024 #define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY)
0025
0026 #define ENGINE_SAMPLE_MASK \
0027 (BIT(I915_SAMPLE_BUSY) | \
0028 BIT(I915_SAMPLE_WAIT) | \
0029 BIT(I915_SAMPLE_SEMA))
0030
0031 static cpumask_t i915_pmu_cpumask;
0032 static unsigned int i915_pmu_target_cpu = -1;
0033
0034 static u8 engine_config_sample(u64 config)
0035 {
0036 return config & I915_PMU_SAMPLE_MASK;
0037 }
0038
0039 static u8 engine_event_sample(struct perf_event *event)
0040 {
0041 return engine_config_sample(event->attr.config);
0042 }
0043
0044 static u8 engine_event_class(struct perf_event *event)
0045 {
0046 return (event->attr.config >> I915_PMU_CLASS_SHIFT) & 0xff;
0047 }
0048
0049 static u8 engine_event_instance(struct perf_event *event)
0050 {
0051 return (event->attr.config >> I915_PMU_SAMPLE_BITS) & 0xff;
0052 }
0053
0054 static bool is_engine_config(u64 config)
0055 {
0056 return config < __I915_PMU_OTHER(0);
0057 }
0058
0059 static unsigned int other_bit(const u64 config)
0060 {
0061 unsigned int val;
0062
0063 switch (config) {
0064 case I915_PMU_ACTUAL_FREQUENCY:
0065 val = __I915_PMU_ACTUAL_FREQUENCY_ENABLED;
0066 break;
0067 case I915_PMU_REQUESTED_FREQUENCY:
0068 val = __I915_PMU_REQUESTED_FREQUENCY_ENABLED;
0069 break;
0070 case I915_PMU_RC6_RESIDENCY:
0071 val = __I915_PMU_RC6_RESIDENCY_ENABLED;
0072 break;
0073 default:
0074
0075
0076
0077
0078 return -1;
0079 }
0080
0081 return I915_ENGINE_SAMPLE_COUNT + val;
0082 }
0083
0084 static unsigned int config_bit(const u64 config)
0085 {
0086 if (is_engine_config(config))
0087 return engine_config_sample(config);
0088 else
0089 return other_bit(config);
0090 }
0091
0092 static u64 config_mask(u64 config)
0093 {
0094 return BIT_ULL(config_bit(config));
0095 }
0096
0097 static bool is_engine_event(struct perf_event *event)
0098 {
0099 return is_engine_config(event->attr.config);
0100 }
0101
0102 static unsigned int event_bit(struct perf_event *event)
0103 {
0104 return config_bit(event->attr.config);
0105 }
0106
0107 static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active)
0108 {
0109 struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
0110 u32 enable;
0111
0112
0113
0114
0115
0116
0117 enable = pmu->enable;
0118
0119
0120
0121
0122
0123 enable &= config_mask(I915_PMU_ACTUAL_FREQUENCY) |
0124 config_mask(I915_PMU_REQUESTED_FREQUENCY) |
0125 ENGINE_SAMPLE_MASK;
0126
0127
0128
0129
0130
0131 if (!gpu_active)
0132 enable &= ~ENGINE_SAMPLE_MASK;
0133
0134
0135
0136
0137 else if (i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS)
0138 enable &= ~BIT(I915_SAMPLE_BUSY);
0139
0140
0141
0142
0143 return enable;
0144 }
0145
0146 static u64 __get_rc6(struct intel_gt *gt)
0147 {
0148 struct drm_i915_private *i915 = gt->i915;
0149 u64 val;
0150
0151 val = intel_rc6_residency_ns(>->rc6, GEN6_GT_GFX_RC6);
0152
0153 if (HAS_RC6p(i915))
0154 val += intel_rc6_residency_ns(>->rc6, GEN6_GT_GFX_RC6p);
0155
0156 if (HAS_RC6pp(i915))
0157 val += intel_rc6_residency_ns(>->rc6, GEN6_GT_GFX_RC6pp);
0158
0159 return val;
0160 }
0161
0162 static inline s64 ktime_since_raw(const ktime_t kt)
0163 {
0164 return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
0165 }
0166
0167 static u64 get_rc6(struct intel_gt *gt)
0168 {
0169 struct drm_i915_private *i915 = gt->i915;
0170 struct i915_pmu *pmu = &i915->pmu;
0171 unsigned long flags;
0172 bool awake = false;
0173 u64 val;
0174
0175 if (intel_gt_pm_get_if_awake(gt)) {
0176 val = __get_rc6(gt);
0177 intel_gt_pm_put_async(gt);
0178 awake = true;
0179 }
0180
0181 spin_lock_irqsave(&pmu->lock, flags);
0182
0183 if (awake) {
0184 pmu->sample[__I915_SAMPLE_RC6].cur = val;
0185 } else {
0186
0187
0188
0189
0190
0191
0192
0193 val = ktime_since_raw(pmu->sleep_last);
0194 val += pmu->sample[__I915_SAMPLE_RC6].cur;
0195 }
0196
0197 if (val < pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur)
0198 val = pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur;
0199 else
0200 pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
0201
0202 spin_unlock_irqrestore(&pmu->lock, flags);
0203
0204 return val;
0205 }
0206
0207 static void init_rc6(struct i915_pmu *pmu)
0208 {
0209 struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
0210 intel_wakeref_t wakeref;
0211
0212 with_intel_runtime_pm(to_gt(i915)->uncore->rpm, wakeref) {
0213 pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(to_gt(i915));
0214 pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur =
0215 pmu->sample[__I915_SAMPLE_RC6].cur;
0216 pmu->sleep_last = ktime_get_raw();
0217 }
0218 }
0219
0220 static void park_rc6(struct drm_i915_private *i915)
0221 {
0222 struct i915_pmu *pmu = &i915->pmu;
0223
0224 pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(to_gt(i915));
0225 pmu->sleep_last = ktime_get_raw();
0226 }
0227
0228 static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu)
0229 {
0230 if (!pmu->timer_enabled && pmu_needs_timer(pmu, true)) {
0231 pmu->timer_enabled = true;
0232 pmu->timer_last = ktime_get();
0233 hrtimer_start_range_ns(&pmu->timer,
0234 ns_to_ktime(PERIOD), 0,
0235 HRTIMER_MODE_REL_PINNED);
0236 }
0237 }
0238
0239 void i915_pmu_gt_parked(struct drm_i915_private *i915)
0240 {
0241 struct i915_pmu *pmu = &i915->pmu;
0242
0243 if (!pmu->base.event_init)
0244 return;
0245
0246 spin_lock_irq(&pmu->lock);
0247
0248 park_rc6(i915);
0249
0250
0251
0252
0253
0254 pmu->timer_enabled = pmu_needs_timer(pmu, false);
0255
0256 spin_unlock_irq(&pmu->lock);
0257 }
0258
0259 void i915_pmu_gt_unparked(struct drm_i915_private *i915)
0260 {
0261 struct i915_pmu *pmu = &i915->pmu;
0262
0263 if (!pmu->base.event_init)
0264 return;
0265
0266 spin_lock_irq(&pmu->lock);
0267
0268
0269
0270
0271 __i915_pmu_maybe_start_timer(pmu);
0272
0273 spin_unlock_irq(&pmu->lock);
0274 }
0275
0276 static void
0277 add_sample(struct i915_pmu_sample *sample, u32 val)
0278 {
0279 sample->cur += val;
0280 }
0281
0282 static bool exclusive_mmio_access(const struct drm_i915_private *i915)
0283 {
0284
0285
0286
0287
0288
0289 return GRAPHICS_VER(i915) == 7;
0290 }
0291
0292 static void engine_sample(struct intel_engine_cs *engine, unsigned int period_ns)
0293 {
0294 struct intel_engine_pmu *pmu = &engine->pmu;
0295 bool busy;
0296 u32 val;
0297
0298 val = ENGINE_READ_FW(engine, RING_CTL);
0299 if (val == 0)
0300 return;
0301
0302 if (val & RING_WAIT)
0303 add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns);
0304 if (val & RING_WAIT_SEMAPHORE)
0305 add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns);
0306
0307
0308 if (intel_engine_supports_stats(engine))
0309 return;
0310
0311
0312
0313
0314
0315
0316
0317
0318 busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT);
0319 if (!busy) {
0320 val = ENGINE_READ_FW(engine, RING_MI_MODE);
0321 busy = !(val & MODE_IDLE);
0322 }
0323 if (busy)
0324 add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns);
0325 }
0326
0327 static void
0328 engines_sample(struct intel_gt *gt, unsigned int period_ns)
0329 {
0330 struct drm_i915_private *i915 = gt->i915;
0331 struct intel_engine_cs *engine;
0332 enum intel_engine_id id;
0333 unsigned long flags;
0334
0335 if ((i915->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
0336 return;
0337
0338 if (!intel_gt_pm_is_awake(gt))
0339 return;
0340
0341 for_each_engine(engine, gt, id) {
0342 if (!intel_engine_pm_get_if_awake(engine))
0343 continue;
0344
0345 if (exclusive_mmio_access(i915)) {
0346 spin_lock_irqsave(&engine->uncore->lock, flags);
0347 engine_sample(engine, period_ns);
0348 spin_unlock_irqrestore(&engine->uncore->lock, flags);
0349 } else {
0350 engine_sample(engine, period_ns);
0351 }
0352
0353 intel_engine_pm_put_async(engine);
0354 }
0355 }
0356
0357 static void
0358 add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
0359 {
0360 sample->cur += mul_u32_u32(val, mul);
0361 }
0362
0363 static bool frequency_sampling_enabled(struct i915_pmu *pmu)
0364 {
0365 return pmu->enable &
0366 (config_mask(I915_PMU_ACTUAL_FREQUENCY) |
0367 config_mask(I915_PMU_REQUESTED_FREQUENCY));
0368 }
0369
0370 static void
0371 frequency_sample(struct intel_gt *gt, unsigned int period_ns)
0372 {
0373 struct drm_i915_private *i915 = gt->i915;
0374 struct intel_uncore *uncore = gt->uncore;
0375 struct i915_pmu *pmu = &i915->pmu;
0376 struct intel_rps *rps = >->rps;
0377
0378 if (!frequency_sampling_enabled(pmu))
0379 return;
0380
0381
0382 if (!intel_gt_pm_get_if_awake(gt))
0383 return;
0384
0385 if (pmu->enable & config_mask(I915_PMU_ACTUAL_FREQUENCY)) {
0386 u32 val;
0387
0388
0389
0390
0391
0392
0393
0394
0395
0396
0397 val = intel_uncore_read_fw(uncore, GEN6_RPSTAT1);
0398 if (val)
0399 val = intel_rps_get_cagf(rps, val);
0400 else
0401 val = rps->cur_freq;
0402
0403 add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT],
0404 intel_gpu_freq(rps, val), period_ns / 1000);
0405 }
0406
0407 if (pmu->enable & config_mask(I915_PMU_REQUESTED_FREQUENCY)) {
0408 add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_REQ],
0409 intel_rps_get_requested_frequency(rps),
0410 period_ns / 1000);
0411 }
0412
0413 intel_gt_pm_put_async(gt);
0414 }
0415
0416 static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
0417 {
0418 struct drm_i915_private *i915 =
0419 container_of(hrtimer, struct drm_i915_private, pmu.timer);
0420 struct i915_pmu *pmu = &i915->pmu;
0421 struct intel_gt *gt = to_gt(i915);
0422 unsigned int period_ns;
0423 ktime_t now;
0424
0425 if (!READ_ONCE(pmu->timer_enabled))
0426 return HRTIMER_NORESTART;
0427
0428 now = ktime_get();
0429 period_ns = ktime_to_ns(ktime_sub(now, pmu->timer_last));
0430 pmu->timer_last = now;
0431
0432
0433
0434
0435
0436
0437
0438 engines_sample(gt, period_ns);
0439 frequency_sample(gt, period_ns);
0440
0441 hrtimer_forward(hrtimer, now, ns_to_ktime(PERIOD));
0442
0443 return HRTIMER_RESTART;
0444 }
0445
0446 static void i915_pmu_event_destroy(struct perf_event *event)
0447 {
0448 struct drm_i915_private *i915 =
0449 container_of(event->pmu, typeof(*i915), pmu.base);
0450
0451 drm_WARN_ON(&i915->drm, event->parent);
0452
0453 drm_dev_put(&i915->drm);
0454 }
0455
0456 static int
0457 engine_event_status(struct intel_engine_cs *engine,
0458 enum drm_i915_pmu_engine_sample sample)
0459 {
0460 switch (sample) {
0461 case I915_SAMPLE_BUSY:
0462 case I915_SAMPLE_WAIT:
0463 break;
0464 case I915_SAMPLE_SEMA:
0465 if (GRAPHICS_VER(engine->i915) < 6)
0466 return -ENODEV;
0467 break;
0468 default:
0469 return -ENOENT;
0470 }
0471
0472 return 0;
0473 }
0474
0475 static int
0476 config_status(struct drm_i915_private *i915, u64 config)
0477 {
0478 struct intel_gt *gt = to_gt(i915);
0479
0480 switch (config) {
0481 case I915_PMU_ACTUAL_FREQUENCY:
0482 if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
0483
0484 return -ENODEV;
0485 fallthrough;
0486 case I915_PMU_REQUESTED_FREQUENCY:
0487 if (GRAPHICS_VER(i915) < 6)
0488 return -ENODEV;
0489 break;
0490 case I915_PMU_INTERRUPTS:
0491 break;
0492 case I915_PMU_RC6_RESIDENCY:
0493 if (!gt->rc6.supported)
0494 return -ENODEV;
0495 break;
0496 case I915_PMU_SOFTWARE_GT_AWAKE_TIME:
0497 break;
0498 default:
0499 return -ENOENT;
0500 }
0501
0502 return 0;
0503 }
0504
0505 static int engine_event_init(struct perf_event *event)
0506 {
0507 struct drm_i915_private *i915 =
0508 container_of(event->pmu, typeof(*i915), pmu.base);
0509 struct intel_engine_cs *engine;
0510
0511 engine = intel_engine_lookup_user(i915, engine_event_class(event),
0512 engine_event_instance(event));
0513 if (!engine)
0514 return -ENODEV;
0515
0516 return engine_event_status(engine, engine_event_sample(event));
0517 }
0518
0519 static int i915_pmu_event_init(struct perf_event *event)
0520 {
0521 struct drm_i915_private *i915 =
0522 container_of(event->pmu, typeof(*i915), pmu.base);
0523 struct i915_pmu *pmu = &i915->pmu;
0524 int ret;
0525
0526 if (pmu->closed)
0527 return -ENODEV;
0528
0529 if (event->attr.type != event->pmu->type)
0530 return -ENOENT;
0531
0532
0533 if (event->attr.sample_period)
0534 return -EINVAL;
0535
0536 if (has_branch_stack(event))
0537 return -EOPNOTSUPP;
0538
0539 if (event->cpu < 0)
0540 return -EINVAL;
0541
0542
0543 if (!cpumask_test_cpu(event->cpu, &i915_pmu_cpumask))
0544 return -EINVAL;
0545
0546 if (is_engine_event(event))
0547 ret = engine_event_init(event);
0548 else
0549 ret = config_status(i915, event->attr.config);
0550 if (ret)
0551 return ret;
0552
0553 if (!event->parent) {
0554 drm_dev_get(&i915->drm);
0555 event->destroy = i915_pmu_event_destroy;
0556 }
0557
0558 return 0;
0559 }
0560
0561 static u64 __i915_pmu_event_read(struct perf_event *event)
0562 {
0563 struct drm_i915_private *i915 =
0564 container_of(event->pmu, typeof(*i915), pmu.base);
0565 struct i915_pmu *pmu = &i915->pmu;
0566 u64 val = 0;
0567
0568 if (is_engine_event(event)) {
0569 u8 sample = engine_event_sample(event);
0570 struct intel_engine_cs *engine;
0571
0572 engine = intel_engine_lookup_user(i915,
0573 engine_event_class(event),
0574 engine_event_instance(event));
0575
0576 if (drm_WARN_ON_ONCE(&i915->drm, !engine)) {
0577
0578 } else if (sample == I915_SAMPLE_BUSY &&
0579 intel_engine_supports_stats(engine)) {
0580 ktime_t unused;
0581
0582 val = ktime_to_ns(intel_engine_get_busy_time(engine,
0583 &unused));
0584 } else {
0585 val = engine->pmu.sample[sample].cur;
0586 }
0587 } else {
0588 switch (event->attr.config) {
0589 case I915_PMU_ACTUAL_FREQUENCY:
0590 val =
0591 div_u64(pmu->sample[__I915_SAMPLE_FREQ_ACT].cur,
0592 USEC_PER_SEC );
0593 break;
0594 case I915_PMU_REQUESTED_FREQUENCY:
0595 val =
0596 div_u64(pmu->sample[__I915_SAMPLE_FREQ_REQ].cur,
0597 USEC_PER_SEC );
0598 break;
0599 case I915_PMU_INTERRUPTS:
0600 val = READ_ONCE(pmu->irq_count);
0601 break;
0602 case I915_PMU_RC6_RESIDENCY:
0603 val = get_rc6(to_gt(i915));
0604 break;
0605 case I915_PMU_SOFTWARE_GT_AWAKE_TIME:
0606 val = ktime_to_ns(intel_gt_get_awake_time(to_gt(i915)));
0607 break;
0608 }
0609 }
0610
0611 return val;
0612 }
0613
0614 static void i915_pmu_event_read(struct perf_event *event)
0615 {
0616 struct drm_i915_private *i915 =
0617 container_of(event->pmu, typeof(*i915), pmu.base);
0618 struct hw_perf_event *hwc = &event->hw;
0619 struct i915_pmu *pmu = &i915->pmu;
0620 u64 prev, new;
0621
0622 if (pmu->closed) {
0623 event->hw.state = PERF_HES_STOPPED;
0624 return;
0625 }
0626 again:
0627 prev = local64_read(&hwc->prev_count);
0628 new = __i915_pmu_event_read(event);
0629
0630 if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev)
0631 goto again;
0632
0633 local64_add(new - prev, &event->count);
0634 }
0635
0636 static void i915_pmu_enable(struct perf_event *event)
0637 {
0638 struct drm_i915_private *i915 =
0639 container_of(event->pmu, typeof(*i915), pmu.base);
0640 struct i915_pmu *pmu = &i915->pmu;
0641 unsigned long flags;
0642 unsigned int bit;
0643
0644 bit = event_bit(event);
0645 if (bit == -1)
0646 goto update;
0647
0648 spin_lock_irqsave(&pmu->lock, flags);
0649
0650
0651
0652
0653
0654 BUILD_BUG_ON(ARRAY_SIZE(pmu->enable_count) != I915_PMU_MASK_BITS);
0655 GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count));
0656 GEM_BUG_ON(pmu->enable_count[bit] == ~0);
0657
0658 pmu->enable |= BIT_ULL(bit);
0659 pmu->enable_count[bit]++;
0660
0661
0662
0663
0664 __i915_pmu_maybe_start_timer(pmu);
0665
0666
0667
0668
0669
0670 if (is_engine_event(event)) {
0671 u8 sample = engine_event_sample(event);
0672 struct intel_engine_cs *engine;
0673
0674 engine = intel_engine_lookup_user(i915,
0675 engine_event_class(event),
0676 engine_event_instance(event));
0677
0678 BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.enable_count) !=
0679 I915_ENGINE_SAMPLE_COUNT);
0680 BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.sample) !=
0681 I915_ENGINE_SAMPLE_COUNT);
0682 GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
0683 GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
0684 GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
0685
0686 engine->pmu.enable |= BIT(sample);
0687 engine->pmu.enable_count[sample]++;
0688 }
0689
0690 spin_unlock_irqrestore(&pmu->lock, flags);
0691
0692 update:
0693
0694
0695
0696
0697
0698 local64_set(&event->hw.prev_count, __i915_pmu_event_read(event));
0699 }
0700
0701 static void i915_pmu_disable(struct perf_event *event)
0702 {
0703 struct drm_i915_private *i915 =
0704 container_of(event->pmu, typeof(*i915), pmu.base);
0705 unsigned int bit = event_bit(event);
0706 struct i915_pmu *pmu = &i915->pmu;
0707 unsigned long flags;
0708
0709 if (bit == -1)
0710 return;
0711
0712 spin_lock_irqsave(&pmu->lock, flags);
0713
0714 if (is_engine_event(event)) {
0715 u8 sample = engine_event_sample(event);
0716 struct intel_engine_cs *engine;
0717
0718 engine = intel_engine_lookup_user(i915,
0719 engine_event_class(event),
0720 engine_event_instance(event));
0721
0722 GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
0723 GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
0724 GEM_BUG_ON(engine->pmu.enable_count[sample] == 0);
0725
0726
0727
0728
0729
0730 if (--engine->pmu.enable_count[sample] == 0)
0731 engine->pmu.enable &= ~BIT(sample);
0732 }
0733
0734 GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count));
0735 GEM_BUG_ON(pmu->enable_count[bit] == 0);
0736
0737
0738
0739
0740 if (--pmu->enable_count[bit] == 0) {
0741 pmu->enable &= ~BIT_ULL(bit);
0742 pmu->timer_enabled &= pmu_needs_timer(pmu, true);
0743 }
0744
0745 spin_unlock_irqrestore(&pmu->lock, flags);
0746 }
0747
0748 static void i915_pmu_event_start(struct perf_event *event, int flags)
0749 {
0750 struct drm_i915_private *i915 =
0751 container_of(event->pmu, typeof(*i915), pmu.base);
0752 struct i915_pmu *pmu = &i915->pmu;
0753
0754 if (pmu->closed)
0755 return;
0756
0757 i915_pmu_enable(event);
0758 event->hw.state = 0;
0759 }
0760
0761 static void i915_pmu_event_stop(struct perf_event *event, int flags)
0762 {
0763 if (flags & PERF_EF_UPDATE)
0764 i915_pmu_event_read(event);
0765 i915_pmu_disable(event);
0766 event->hw.state = PERF_HES_STOPPED;
0767 }
0768
0769 static int i915_pmu_event_add(struct perf_event *event, int flags)
0770 {
0771 struct drm_i915_private *i915 =
0772 container_of(event->pmu, typeof(*i915), pmu.base);
0773 struct i915_pmu *pmu = &i915->pmu;
0774
0775 if (pmu->closed)
0776 return -ENODEV;
0777
0778 if (flags & PERF_EF_START)
0779 i915_pmu_event_start(event, flags);
0780
0781 return 0;
0782 }
0783
0784 static void i915_pmu_event_del(struct perf_event *event, int flags)
0785 {
0786 i915_pmu_event_stop(event, PERF_EF_UPDATE);
0787 }
0788
0789 static int i915_pmu_event_event_idx(struct perf_event *event)
0790 {
0791 return 0;
0792 }
0793
0794 struct i915_str_attribute {
0795 struct device_attribute attr;
0796 const char *str;
0797 };
0798
0799 static ssize_t i915_pmu_format_show(struct device *dev,
0800 struct device_attribute *attr, char *buf)
0801 {
0802 struct i915_str_attribute *eattr;
0803
0804 eattr = container_of(attr, struct i915_str_attribute, attr);
0805 return sprintf(buf, "%s\n", eattr->str);
0806 }
0807
0808 #define I915_PMU_FORMAT_ATTR(_name, _config) \
0809 (&((struct i915_str_attribute[]) { \
0810 { .attr = __ATTR(_name, 0444, i915_pmu_format_show, NULL), \
0811 .str = _config, } \
0812 })[0].attr.attr)
0813
0814 static struct attribute *i915_pmu_format_attrs[] = {
0815 I915_PMU_FORMAT_ATTR(i915_eventid, "config:0-20"),
0816 NULL,
0817 };
0818
0819 static const struct attribute_group i915_pmu_format_attr_group = {
0820 .name = "format",
0821 .attrs = i915_pmu_format_attrs,
0822 };
0823
0824 struct i915_ext_attribute {
0825 struct device_attribute attr;
0826 unsigned long val;
0827 };
0828
0829 static ssize_t i915_pmu_event_show(struct device *dev,
0830 struct device_attribute *attr, char *buf)
0831 {
0832 struct i915_ext_attribute *eattr;
0833
0834 eattr = container_of(attr, struct i915_ext_attribute, attr);
0835 return sprintf(buf, "config=0x%lx\n", eattr->val);
0836 }
0837
0838 static ssize_t cpumask_show(struct device *dev,
0839 struct device_attribute *attr, char *buf)
0840 {
0841 return cpumap_print_to_pagebuf(true, buf, &i915_pmu_cpumask);
0842 }
0843
0844 static DEVICE_ATTR_RO(cpumask);
0845
0846 static struct attribute *i915_cpumask_attrs[] = {
0847 &dev_attr_cpumask.attr,
0848 NULL,
0849 };
0850
0851 static const struct attribute_group i915_pmu_cpumask_attr_group = {
0852 .attrs = i915_cpumask_attrs,
0853 };
0854
0855 #define __event(__config, __name, __unit) \
0856 { \
0857 .config = (__config), \
0858 .name = (__name), \
0859 .unit = (__unit), \
0860 }
0861
0862 #define __engine_event(__sample, __name) \
0863 { \
0864 .sample = (__sample), \
0865 .name = (__name), \
0866 }
0867
0868 static struct i915_ext_attribute *
0869 add_i915_attr(struct i915_ext_attribute *attr, const char *name, u64 config)
0870 {
0871 sysfs_attr_init(&attr->attr.attr);
0872 attr->attr.attr.name = name;
0873 attr->attr.attr.mode = 0444;
0874 attr->attr.show = i915_pmu_event_show;
0875 attr->val = config;
0876
0877 return ++attr;
0878 }
0879
0880 static struct perf_pmu_events_attr *
0881 add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name,
0882 const char *str)
0883 {
0884 sysfs_attr_init(&attr->attr.attr);
0885 attr->attr.attr.name = name;
0886 attr->attr.attr.mode = 0444;
0887 attr->attr.show = perf_event_sysfs_show;
0888 attr->event_str = str;
0889
0890 return ++attr;
0891 }
0892
0893 static struct attribute **
0894 create_event_attributes(struct i915_pmu *pmu)
0895 {
0896 struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
0897 static const struct {
0898 u64 config;
0899 const char *name;
0900 const char *unit;
0901 } events[] = {
0902 __event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "M"),
0903 __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "M"),
0904 __event(I915_PMU_INTERRUPTS, "interrupts", NULL),
0905 __event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"),
0906 __event(I915_PMU_SOFTWARE_GT_AWAKE_TIME, "software-gt-awake-time", "ns"),
0907 };
0908 static const struct {
0909 enum drm_i915_pmu_engine_sample sample;
0910 char *name;
0911 } engine_events[] = {
0912 __engine_event(I915_SAMPLE_BUSY, "busy"),
0913 __engine_event(I915_SAMPLE_SEMA, "sema"),
0914 __engine_event(I915_SAMPLE_WAIT, "wait"),
0915 };
0916 unsigned int count = 0;
0917 struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
0918 struct i915_ext_attribute *i915_attr = NULL, *i915_iter;
0919 struct attribute **attr = NULL, **attr_iter;
0920 struct intel_engine_cs *engine;
0921 unsigned int i;
0922
0923
0924 for (i = 0; i < ARRAY_SIZE(events); i++) {
0925 if (!config_status(i915, events[i].config))
0926 count++;
0927 }
0928
0929 for_each_uabi_engine(engine, i915) {
0930 for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
0931 if (!engine_event_status(engine,
0932 engine_events[i].sample))
0933 count++;
0934 }
0935 }
0936
0937
0938 i915_attr = kcalloc(count, sizeof(*i915_attr), GFP_KERNEL);
0939 if (!i915_attr)
0940 goto err_alloc;
0941
0942 pmu_attr = kcalloc(count, sizeof(*pmu_attr), GFP_KERNEL);
0943 if (!pmu_attr)
0944 goto err_alloc;
0945
0946
0947 attr = kcalloc(count * 2 + 1, sizeof(*attr), GFP_KERNEL);
0948 if (!attr)
0949 goto err_alloc;
0950
0951 i915_iter = i915_attr;
0952 pmu_iter = pmu_attr;
0953 attr_iter = attr;
0954
0955
0956 for (i = 0; i < ARRAY_SIZE(events); i++) {
0957 char *str;
0958
0959 if (config_status(i915, events[i].config))
0960 continue;
0961
0962 str = kstrdup(events[i].name, GFP_KERNEL);
0963 if (!str)
0964 goto err;
0965
0966 *attr_iter++ = &i915_iter->attr.attr;
0967 i915_iter = add_i915_attr(i915_iter, str, events[i].config);
0968
0969 if (events[i].unit) {
0970 str = kasprintf(GFP_KERNEL, "%s.unit", events[i].name);
0971 if (!str)
0972 goto err;
0973
0974 *attr_iter++ = &pmu_iter->attr.attr;
0975 pmu_iter = add_pmu_attr(pmu_iter, str, events[i].unit);
0976 }
0977 }
0978
0979
0980 for_each_uabi_engine(engine, i915) {
0981 for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
0982 char *str;
0983
0984 if (engine_event_status(engine,
0985 engine_events[i].sample))
0986 continue;
0987
0988 str = kasprintf(GFP_KERNEL, "%s-%s",
0989 engine->name, engine_events[i].name);
0990 if (!str)
0991 goto err;
0992
0993 *attr_iter++ = &i915_iter->attr.attr;
0994 i915_iter =
0995 add_i915_attr(i915_iter, str,
0996 __I915_PMU_ENGINE(engine->uabi_class,
0997 engine->uabi_instance,
0998 engine_events[i].sample));
0999
1000 str = kasprintf(GFP_KERNEL, "%s-%s.unit",
1001 engine->name, engine_events[i].name);
1002 if (!str)
1003 goto err;
1004
1005 *attr_iter++ = &pmu_iter->attr.attr;
1006 pmu_iter = add_pmu_attr(pmu_iter, str, "ns");
1007 }
1008 }
1009
1010 pmu->i915_attr = i915_attr;
1011 pmu->pmu_attr = pmu_attr;
1012
1013 return attr;
1014
1015 err:;
1016 for (attr_iter = attr; *attr_iter; attr_iter++)
1017 kfree((*attr_iter)->name);
1018
1019 err_alloc:
1020 kfree(attr);
1021 kfree(i915_attr);
1022 kfree(pmu_attr);
1023
1024 return NULL;
1025 }
1026
1027 static void free_event_attributes(struct i915_pmu *pmu)
1028 {
1029 struct attribute **attr_iter = pmu->events_attr_group.attrs;
1030
1031 for (; *attr_iter; attr_iter++)
1032 kfree((*attr_iter)->name);
1033
1034 kfree(pmu->events_attr_group.attrs);
1035 kfree(pmu->i915_attr);
1036 kfree(pmu->pmu_attr);
1037
1038 pmu->events_attr_group.attrs = NULL;
1039 pmu->i915_attr = NULL;
1040 pmu->pmu_attr = NULL;
1041 }
1042
1043 static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
1044 {
1045 struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
1046
1047 GEM_BUG_ON(!pmu->base.event_init);
1048
1049
1050 if (cpumask_empty(&i915_pmu_cpumask))
1051 cpumask_set_cpu(cpu, &i915_pmu_cpumask);
1052
1053 return 0;
1054 }
1055
1056 static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
1057 {
1058 struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
1059 unsigned int target = i915_pmu_target_cpu;
1060
1061 GEM_BUG_ON(!pmu->base.event_init);
1062
1063
1064
1065
1066
1067 if (pmu->closed)
1068 return 0;
1069
1070 if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) {
1071 target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
1072
1073
1074 if (target < nr_cpu_ids) {
1075 cpumask_set_cpu(target, &i915_pmu_cpumask);
1076 i915_pmu_target_cpu = target;
1077 }
1078 }
1079
1080 if (target < nr_cpu_ids && target != pmu->cpuhp.cpu) {
1081 perf_pmu_migrate_context(&pmu->base, cpu, target);
1082 pmu->cpuhp.cpu = target;
1083 }
1084
1085 return 0;
1086 }
1087
1088 static enum cpuhp_state cpuhp_slot = CPUHP_INVALID;
1089
1090 int i915_pmu_init(void)
1091 {
1092 int ret;
1093
1094 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
1095 "perf/x86/intel/i915:online",
1096 i915_pmu_cpu_online,
1097 i915_pmu_cpu_offline);
1098 if (ret < 0)
1099 pr_notice("Failed to setup cpuhp state for i915 PMU! (%d)\n",
1100 ret);
1101 else
1102 cpuhp_slot = ret;
1103
1104 return 0;
1105 }
1106
1107 void i915_pmu_exit(void)
1108 {
1109 if (cpuhp_slot != CPUHP_INVALID)
1110 cpuhp_remove_multi_state(cpuhp_slot);
1111 }
1112
1113 static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu)
1114 {
1115 if (cpuhp_slot == CPUHP_INVALID)
1116 return -EINVAL;
1117
1118 return cpuhp_state_add_instance(cpuhp_slot, &pmu->cpuhp.node);
1119 }
1120
1121 static void i915_pmu_unregister_cpuhp_state(struct i915_pmu *pmu)
1122 {
1123 cpuhp_state_remove_instance(cpuhp_slot, &pmu->cpuhp.node);
1124 }
1125
1126 static bool is_igp(struct drm_i915_private *i915)
1127 {
1128 struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
1129
1130
1131 return pci_domain_nr(pdev->bus) == 0 &&
1132 pdev->bus->number == 0 &&
1133 PCI_SLOT(pdev->devfn) == 2 &&
1134 PCI_FUNC(pdev->devfn) == 0;
1135 }
1136
1137 void i915_pmu_register(struct drm_i915_private *i915)
1138 {
1139 struct i915_pmu *pmu = &i915->pmu;
1140 const struct attribute_group *attr_groups[] = {
1141 &i915_pmu_format_attr_group,
1142 &pmu->events_attr_group,
1143 &i915_pmu_cpumask_attr_group,
1144 NULL
1145 };
1146
1147 int ret = -ENOMEM;
1148
1149 if (GRAPHICS_VER(i915) <= 2) {
1150 drm_info(&i915->drm, "PMU not supported for this GPU.");
1151 return;
1152 }
1153
1154 spin_lock_init(&pmu->lock);
1155 hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1156 pmu->timer.function = i915_sample;
1157 pmu->cpuhp.cpu = -1;
1158 init_rc6(pmu);
1159
1160 if (!is_igp(i915)) {
1161 pmu->name = kasprintf(GFP_KERNEL,
1162 "i915_%s",
1163 dev_name(i915->drm.dev));
1164 if (pmu->name) {
1165
1166 strreplace((char *)pmu->name, ':', '_');
1167 }
1168 } else {
1169 pmu->name = "i915";
1170 }
1171 if (!pmu->name)
1172 goto err;
1173
1174 pmu->events_attr_group.name = "events";
1175 pmu->events_attr_group.attrs = create_event_attributes(pmu);
1176 if (!pmu->events_attr_group.attrs)
1177 goto err_name;
1178
1179 pmu->base.attr_groups = kmemdup(attr_groups, sizeof(attr_groups),
1180 GFP_KERNEL);
1181 if (!pmu->base.attr_groups)
1182 goto err_attr;
1183
1184 pmu->base.module = THIS_MODULE;
1185 pmu->base.task_ctx_nr = perf_invalid_context;
1186 pmu->base.event_init = i915_pmu_event_init;
1187 pmu->base.add = i915_pmu_event_add;
1188 pmu->base.del = i915_pmu_event_del;
1189 pmu->base.start = i915_pmu_event_start;
1190 pmu->base.stop = i915_pmu_event_stop;
1191 pmu->base.read = i915_pmu_event_read;
1192 pmu->base.event_idx = i915_pmu_event_event_idx;
1193
1194 ret = perf_pmu_register(&pmu->base, pmu->name, -1);
1195 if (ret)
1196 goto err_groups;
1197
1198 ret = i915_pmu_register_cpuhp_state(pmu);
1199 if (ret)
1200 goto err_unreg;
1201
1202 return;
1203
1204 err_unreg:
1205 perf_pmu_unregister(&pmu->base);
1206 err_groups:
1207 kfree(pmu->base.attr_groups);
1208 err_attr:
1209 pmu->base.event_init = NULL;
1210 free_event_attributes(pmu);
1211 err_name:
1212 if (!is_igp(i915))
1213 kfree(pmu->name);
1214 err:
1215 drm_notice(&i915->drm, "Failed to register PMU!\n");
1216 }
1217
1218 void i915_pmu_unregister(struct drm_i915_private *i915)
1219 {
1220 struct i915_pmu *pmu = &i915->pmu;
1221
1222 if (!pmu->base.event_init)
1223 return;
1224
1225
1226
1227
1228
1229
1230 pmu->closed = true;
1231 synchronize_rcu();
1232
1233 hrtimer_cancel(&pmu->timer);
1234
1235 i915_pmu_unregister_cpuhp_state(pmu);
1236
1237 perf_pmu_unregister(&pmu->base);
1238 pmu->base.event_init = NULL;
1239 kfree(pmu->base.attr_groups);
1240 if (!is_igp(i915))
1241 kfree(pmu->name);
1242 free_event_attributes(pmu);
1243 }