Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * SPDX-License-Identifier: MIT
0003  *
0004  * Copyright © 2017-2018 Intel Corporation
0005  */
0006 
0007 #include <linux/pm_runtime.h>
0008 
0009 #include "gt/intel_engine.h"
0010 #include "gt/intel_engine_pm.h"
0011 #include "gt/intel_engine_regs.h"
0012 #include "gt/intel_engine_user.h"
0013 #include "gt/intel_gt_pm.h"
0014 #include "gt/intel_gt_regs.h"
0015 #include "gt/intel_rc6.h"
0016 #include "gt/intel_rps.h"
0017 
0018 #include "i915_drv.h"
0019 #include "i915_pmu.h"
0020 #include "intel_pm.h"
0021 
0022 /* Frequency for the sampling timer for events which need it. */
0023 #define FREQUENCY 200
0024 #define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY)
0025 
0026 #define ENGINE_SAMPLE_MASK \
0027     (BIT(I915_SAMPLE_BUSY) | \
0028      BIT(I915_SAMPLE_WAIT) | \
0029      BIT(I915_SAMPLE_SEMA))
0030 
0031 static cpumask_t i915_pmu_cpumask;
0032 static unsigned int i915_pmu_target_cpu = -1;
0033 
0034 static u8 engine_config_sample(u64 config)
0035 {
0036     return config & I915_PMU_SAMPLE_MASK;
0037 }
0038 
0039 static u8 engine_event_sample(struct perf_event *event)
0040 {
0041     return engine_config_sample(event->attr.config);
0042 }
0043 
0044 static u8 engine_event_class(struct perf_event *event)
0045 {
0046     return (event->attr.config >> I915_PMU_CLASS_SHIFT) & 0xff;
0047 }
0048 
0049 static u8 engine_event_instance(struct perf_event *event)
0050 {
0051     return (event->attr.config >> I915_PMU_SAMPLE_BITS) & 0xff;
0052 }
0053 
0054 static bool is_engine_config(u64 config)
0055 {
0056     return config < __I915_PMU_OTHER(0);
0057 }
0058 
0059 static unsigned int other_bit(const u64 config)
0060 {
0061     unsigned int val;
0062 
0063     switch (config) {
0064     case I915_PMU_ACTUAL_FREQUENCY:
0065         val =  __I915_PMU_ACTUAL_FREQUENCY_ENABLED;
0066         break;
0067     case I915_PMU_REQUESTED_FREQUENCY:
0068         val = __I915_PMU_REQUESTED_FREQUENCY_ENABLED;
0069         break;
0070     case I915_PMU_RC6_RESIDENCY:
0071         val = __I915_PMU_RC6_RESIDENCY_ENABLED;
0072         break;
0073     default:
0074         /*
0075          * Events that do not require sampling, or tracking state
0076          * transitions between enabled and disabled can be ignored.
0077          */
0078         return -1;
0079     }
0080 
0081     return I915_ENGINE_SAMPLE_COUNT + val;
0082 }
0083 
0084 static unsigned int config_bit(const u64 config)
0085 {
0086     if (is_engine_config(config))
0087         return engine_config_sample(config);
0088     else
0089         return other_bit(config);
0090 }
0091 
0092 static u64 config_mask(u64 config)
0093 {
0094     return BIT_ULL(config_bit(config));
0095 }
0096 
0097 static bool is_engine_event(struct perf_event *event)
0098 {
0099     return is_engine_config(event->attr.config);
0100 }
0101 
0102 static unsigned int event_bit(struct perf_event *event)
0103 {
0104     return config_bit(event->attr.config);
0105 }
0106 
0107 static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active)
0108 {
0109     struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
0110     u32 enable;
0111 
0112     /*
0113      * Only some counters need the sampling timer.
0114      *
0115      * We start with a bitmask of all currently enabled events.
0116      */
0117     enable = pmu->enable;
0118 
0119     /*
0120      * Mask out all the ones which do not need the timer, or in
0121      * other words keep all the ones that could need the timer.
0122      */
0123     enable &= config_mask(I915_PMU_ACTUAL_FREQUENCY) |
0124           config_mask(I915_PMU_REQUESTED_FREQUENCY) |
0125           ENGINE_SAMPLE_MASK;
0126 
0127     /*
0128      * When the GPU is idle per-engine counters do not need to be
0129      * running so clear those bits out.
0130      */
0131     if (!gpu_active)
0132         enable &= ~ENGINE_SAMPLE_MASK;
0133     /*
0134      * Also there is software busyness tracking available we do not
0135      * need the timer for I915_SAMPLE_BUSY counter.
0136      */
0137     else if (i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS)
0138         enable &= ~BIT(I915_SAMPLE_BUSY);
0139 
0140     /*
0141      * If some bits remain it means we need the sampling timer running.
0142      */
0143     return enable;
0144 }
0145 
0146 static u64 __get_rc6(struct intel_gt *gt)
0147 {
0148     struct drm_i915_private *i915 = gt->i915;
0149     u64 val;
0150 
0151     val = intel_rc6_residency_ns(&gt->rc6, GEN6_GT_GFX_RC6);
0152 
0153     if (HAS_RC6p(i915))
0154         val += intel_rc6_residency_ns(&gt->rc6, GEN6_GT_GFX_RC6p);
0155 
0156     if (HAS_RC6pp(i915))
0157         val += intel_rc6_residency_ns(&gt->rc6, GEN6_GT_GFX_RC6pp);
0158 
0159     return val;
0160 }
0161 
0162 static inline s64 ktime_since_raw(const ktime_t kt)
0163 {
0164     return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
0165 }
0166 
0167 static u64 get_rc6(struct intel_gt *gt)
0168 {
0169     struct drm_i915_private *i915 = gt->i915;
0170     struct i915_pmu *pmu = &i915->pmu;
0171     unsigned long flags;
0172     bool awake = false;
0173     u64 val;
0174 
0175     if (intel_gt_pm_get_if_awake(gt)) {
0176         val = __get_rc6(gt);
0177         intel_gt_pm_put_async(gt);
0178         awake = true;
0179     }
0180 
0181     spin_lock_irqsave(&pmu->lock, flags);
0182 
0183     if (awake) {
0184         pmu->sample[__I915_SAMPLE_RC6].cur = val;
0185     } else {
0186         /*
0187          * We think we are runtime suspended.
0188          *
0189          * Report the delta from when the device was suspended to now,
0190          * on top of the last known real value, as the approximated RC6
0191          * counter value.
0192          */
0193         val = ktime_since_raw(pmu->sleep_last);
0194         val += pmu->sample[__I915_SAMPLE_RC6].cur;
0195     }
0196 
0197     if (val < pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur)
0198         val = pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur;
0199     else
0200         pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
0201 
0202     spin_unlock_irqrestore(&pmu->lock, flags);
0203 
0204     return val;
0205 }
0206 
0207 static void init_rc6(struct i915_pmu *pmu)
0208 {
0209     struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
0210     intel_wakeref_t wakeref;
0211 
0212     with_intel_runtime_pm(to_gt(i915)->uncore->rpm, wakeref) {
0213         pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(to_gt(i915));
0214         pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur =
0215                     pmu->sample[__I915_SAMPLE_RC6].cur;
0216         pmu->sleep_last = ktime_get_raw();
0217     }
0218 }
0219 
0220 static void park_rc6(struct drm_i915_private *i915)
0221 {
0222     struct i915_pmu *pmu = &i915->pmu;
0223 
0224     pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(to_gt(i915));
0225     pmu->sleep_last = ktime_get_raw();
0226 }
0227 
0228 static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu)
0229 {
0230     if (!pmu->timer_enabled && pmu_needs_timer(pmu, true)) {
0231         pmu->timer_enabled = true;
0232         pmu->timer_last = ktime_get();
0233         hrtimer_start_range_ns(&pmu->timer,
0234                        ns_to_ktime(PERIOD), 0,
0235                        HRTIMER_MODE_REL_PINNED);
0236     }
0237 }
0238 
0239 void i915_pmu_gt_parked(struct drm_i915_private *i915)
0240 {
0241     struct i915_pmu *pmu = &i915->pmu;
0242 
0243     if (!pmu->base.event_init)
0244         return;
0245 
0246     spin_lock_irq(&pmu->lock);
0247 
0248     park_rc6(i915);
0249 
0250     /*
0251      * Signal sampling timer to stop if only engine events are enabled and
0252      * GPU went idle.
0253      */
0254     pmu->timer_enabled = pmu_needs_timer(pmu, false);
0255 
0256     spin_unlock_irq(&pmu->lock);
0257 }
0258 
0259 void i915_pmu_gt_unparked(struct drm_i915_private *i915)
0260 {
0261     struct i915_pmu *pmu = &i915->pmu;
0262 
0263     if (!pmu->base.event_init)
0264         return;
0265 
0266     spin_lock_irq(&pmu->lock);
0267 
0268     /*
0269      * Re-enable sampling timer when GPU goes active.
0270      */
0271     __i915_pmu_maybe_start_timer(pmu);
0272 
0273     spin_unlock_irq(&pmu->lock);
0274 }
0275 
0276 static void
0277 add_sample(struct i915_pmu_sample *sample, u32 val)
0278 {
0279     sample->cur += val;
0280 }
0281 
0282 static bool exclusive_mmio_access(const struct drm_i915_private *i915)
0283 {
0284     /*
0285      * We have to avoid concurrent mmio cache line access on gen7 or
0286      * risk a machine hang. For a fun history lesson dig out the old
0287      * userspace intel_gpu_top and run it on Ivybridge or Haswell!
0288      */
0289     return GRAPHICS_VER(i915) == 7;
0290 }
0291 
0292 static void engine_sample(struct intel_engine_cs *engine, unsigned int period_ns)
0293 {
0294     struct intel_engine_pmu *pmu = &engine->pmu;
0295     bool busy;
0296     u32 val;
0297 
0298     val = ENGINE_READ_FW(engine, RING_CTL);
0299     if (val == 0) /* powerwell off => engine idle */
0300         return;
0301 
0302     if (val & RING_WAIT)
0303         add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns);
0304     if (val & RING_WAIT_SEMAPHORE)
0305         add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns);
0306 
0307     /* No need to sample when busy stats are supported. */
0308     if (intel_engine_supports_stats(engine))
0309         return;
0310 
0311     /*
0312      * While waiting on a semaphore or event, MI_MODE reports the
0313      * ring as idle. However, previously using the seqno, and with
0314      * execlists sampling, we account for the ring waiting as the
0315      * engine being busy. Therefore, we record the sample as being
0316      * busy if either waiting or !idle.
0317      */
0318     busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT);
0319     if (!busy) {
0320         val = ENGINE_READ_FW(engine, RING_MI_MODE);
0321         busy = !(val & MODE_IDLE);
0322     }
0323     if (busy)
0324         add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns);
0325 }
0326 
0327 static void
0328 engines_sample(struct intel_gt *gt, unsigned int period_ns)
0329 {
0330     struct drm_i915_private *i915 = gt->i915;
0331     struct intel_engine_cs *engine;
0332     enum intel_engine_id id;
0333     unsigned long flags;
0334 
0335     if ((i915->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
0336         return;
0337 
0338     if (!intel_gt_pm_is_awake(gt))
0339         return;
0340 
0341     for_each_engine(engine, gt, id) {
0342         if (!intel_engine_pm_get_if_awake(engine))
0343             continue;
0344 
0345         if (exclusive_mmio_access(i915)) {
0346             spin_lock_irqsave(&engine->uncore->lock, flags);
0347             engine_sample(engine, period_ns);
0348             spin_unlock_irqrestore(&engine->uncore->lock, flags);
0349         } else {
0350             engine_sample(engine, period_ns);
0351         }
0352 
0353         intel_engine_pm_put_async(engine);
0354     }
0355 }
0356 
0357 static void
0358 add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
0359 {
0360     sample->cur += mul_u32_u32(val, mul);
0361 }
0362 
0363 static bool frequency_sampling_enabled(struct i915_pmu *pmu)
0364 {
0365     return pmu->enable &
0366            (config_mask(I915_PMU_ACTUAL_FREQUENCY) |
0367         config_mask(I915_PMU_REQUESTED_FREQUENCY));
0368 }
0369 
0370 static void
0371 frequency_sample(struct intel_gt *gt, unsigned int period_ns)
0372 {
0373     struct drm_i915_private *i915 = gt->i915;
0374     struct intel_uncore *uncore = gt->uncore;
0375     struct i915_pmu *pmu = &i915->pmu;
0376     struct intel_rps *rps = &gt->rps;
0377 
0378     if (!frequency_sampling_enabled(pmu))
0379         return;
0380 
0381     /* Report 0/0 (actual/requested) frequency while parked. */
0382     if (!intel_gt_pm_get_if_awake(gt))
0383         return;
0384 
0385     if (pmu->enable & config_mask(I915_PMU_ACTUAL_FREQUENCY)) {
0386         u32 val;
0387 
0388         /*
0389          * We take a quick peek here without using forcewake
0390          * so that we don't perturb the system under observation
0391          * (forcewake => !rc6 => increased power use). We expect
0392          * that if the read fails because it is outside of the
0393          * mmio power well, then it will return 0 -- in which
0394          * case we assume the system is running at the intended
0395          * frequency. Fortunately, the read should rarely fail!
0396          */
0397         val = intel_uncore_read_fw(uncore, GEN6_RPSTAT1);
0398         if (val)
0399             val = intel_rps_get_cagf(rps, val);
0400         else
0401             val = rps->cur_freq;
0402 
0403         add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT],
0404                 intel_gpu_freq(rps, val), period_ns / 1000);
0405     }
0406 
0407     if (pmu->enable & config_mask(I915_PMU_REQUESTED_FREQUENCY)) {
0408         add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_REQ],
0409                 intel_rps_get_requested_frequency(rps),
0410                 period_ns / 1000);
0411     }
0412 
0413     intel_gt_pm_put_async(gt);
0414 }
0415 
0416 static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
0417 {
0418     struct drm_i915_private *i915 =
0419         container_of(hrtimer, struct drm_i915_private, pmu.timer);
0420     struct i915_pmu *pmu = &i915->pmu;
0421     struct intel_gt *gt = to_gt(i915);
0422     unsigned int period_ns;
0423     ktime_t now;
0424 
0425     if (!READ_ONCE(pmu->timer_enabled))
0426         return HRTIMER_NORESTART;
0427 
0428     now = ktime_get();
0429     period_ns = ktime_to_ns(ktime_sub(now, pmu->timer_last));
0430     pmu->timer_last = now;
0431 
0432     /*
0433      * Strictly speaking the passed in period may not be 100% accurate for
0434      * all internal calculation, since some amount of time can be spent on
0435      * grabbing the forcewake. However the potential error from timer call-
0436      * back delay greatly dominates this so we keep it simple.
0437      */
0438     engines_sample(gt, period_ns);
0439     frequency_sample(gt, period_ns);
0440 
0441     hrtimer_forward(hrtimer, now, ns_to_ktime(PERIOD));
0442 
0443     return HRTIMER_RESTART;
0444 }
0445 
0446 static void i915_pmu_event_destroy(struct perf_event *event)
0447 {
0448     struct drm_i915_private *i915 =
0449         container_of(event->pmu, typeof(*i915), pmu.base);
0450 
0451     drm_WARN_ON(&i915->drm, event->parent);
0452 
0453     drm_dev_put(&i915->drm);
0454 }
0455 
0456 static int
0457 engine_event_status(struct intel_engine_cs *engine,
0458             enum drm_i915_pmu_engine_sample sample)
0459 {
0460     switch (sample) {
0461     case I915_SAMPLE_BUSY:
0462     case I915_SAMPLE_WAIT:
0463         break;
0464     case I915_SAMPLE_SEMA:
0465         if (GRAPHICS_VER(engine->i915) < 6)
0466             return -ENODEV;
0467         break;
0468     default:
0469         return -ENOENT;
0470     }
0471 
0472     return 0;
0473 }
0474 
0475 static int
0476 config_status(struct drm_i915_private *i915, u64 config)
0477 {
0478     struct intel_gt *gt = to_gt(i915);
0479 
0480     switch (config) {
0481     case I915_PMU_ACTUAL_FREQUENCY:
0482         if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
0483             /* Requires a mutex for sampling! */
0484             return -ENODEV;
0485         fallthrough;
0486     case I915_PMU_REQUESTED_FREQUENCY:
0487         if (GRAPHICS_VER(i915) < 6)
0488             return -ENODEV;
0489         break;
0490     case I915_PMU_INTERRUPTS:
0491         break;
0492     case I915_PMU_RC6_RESIDENCY:
0493         if (!gt->rc6.supported)
0494             return -ENODEV;
0495         break;
0496     case I915_PMU_SOFTWARE_GT_AWAKE_TIME:
0497         break;
0498     default:
0499         return -ENOENT;
0500     }
0501 
0502     return 0;
0503 }
0504 
0505 static int engine_event_init(struct perf_event *event)
0506 {
0507     struct drm_i915_private *i915 =
0508         container_of(event->pmu, typeof(*i915), pmu.base);
0509     struct intel_engine_cs *engine;
0510 
0511     engine = intel_engine_lookup_user(i915, engine_event_class(event),
0512                       engine_event_instance(event));
0513     if (!engine)
0514         return -ENODEV;
0515 
0516     return engine_event_status(engine, engine_event_sample(event));
0517 }
0518 
0519 static int i915_pmu_event_init(struct perf_event *event)
0520 {
0521     struct drm_i915_private *i915 =
0522         container_of(event->pmu, typeof(*i915), pmu.base);
0523     struct i915_pmu *pmu = &i915->pmu;
0524     int ret;
0525 
0526     if (pmu->closed)
0527         return -ENODEV;
0528 
0529     if (event->attr.type != event->pmu->type)
0530         return -ENOENT;
0531 
0532     /* unsupported modes and filters */
0533     if (event->attr.sample_period) /* no sampling */
0534         return -EINVAL;
0535 
0536     if (has_branch_stack(event))
0537         return -EOPNOTSUPP;
0538 
0539     if (event->cpu < 0)
0540         return -EINVAL;
0541 
0542     /* only allow running on one cpu at a time */
0543     if (!cpumask_test_cpu(event->cpu, &i915_pmu_cpumask))
0544         return -EINVAL;
0545 
0546     if (is_engine_event(event))
0547         ret = engine_event_init(event);
0548     else
0549         ret = config_status(i915, event->attr.config);
0550     if (ret)
0551         return ret;
0552 
0553     if (!event->parent) {
0554         drm_dev_get(&i915->drm);
0555         event->destroy = i915_pmu_event_destroy;
0556     }
0557 
0558     return 0;
0559 }
0560 
0561 static u64 __i915_pmu_event_read(struct perf_event *event)
0562 {
0563     struct drm_i915_private *i915 =
0564         container_of(event->pmu, typeof(*i915), pmu.base);
0565     struct i915_pmu *pmu = &i915->pmu;
0566     u64 val = 0;
0567 
0568     if (is_engine_event(event)) {
0569         u8 sample = engine_event_sample(event);
0570         struct intel_engine_cs *engine;
0571 
0572         engine = intel_engine_lookup_user(i915,
0573                           engine_event_class(event),
0574                           engine_event_instance(event));
0575 
0576         if (drm_WARN_ON_ONCE(&i915->drm, !engine)) {
0577             /* Do nothing */
0578         } else if (sample == I915_SAMPLE_BUSY &&
0579                intel_engine_supports_stats(engine)) {
0580             ktime_t unused;
0581 
0582             val = ktime_to_ns(intel_engine_get_busy_time(engine,
0583                                      &unused));
0584         } else {
0585             val = engine->pmu.sample[sample].cur;
0586         }
0587     } else {
0588         switch (event->attr.config) {
0589         case I915_PMU_ACTUAL_FREQUENCY:
0590             val =
0591                div_u64(pmu->sample[__I915_SAMPLE_FREQ_ACT].cur,
0592                    USEC_PER_SEC /* to MHz */);
0593             break;
0594         case I915_PMU_REQUESTED_FREQUENCY:
0595             val =
0596                div_u64(pmu->sample[__I915_SAMPLE_FREQ_REQ].cur,
0597                    USEC_PER_SEC /* to MHz */);
0598             break;
0599         case I915_PMU_INTERRUPTS:
0600             val = READ_ONCE(pmu->irq_count);
0601             break;
0602         case I915_PMU_RC6_RESIDENCY:
0603             val = get_rc6(to_gt(i915));
0604             break;
0605         case I915_PMU_SOFTWARE_GT_AWAKE_TIME:
0606             val = ktime_to_ns(intel_gt_get_awake_time(to_gt(i915)));
0607             break;
0608         }
0609     }
0610 
0611     return val;
0612 }
0613 
0614 static void i915_pmu_event_read(struct perf_event *event)
0615 {
0616     struct drm_i915_private *i915 =
0617         container_of(event->pmu, typeof(*i915), pmu.base);
0618     struct hw_perf_event *hwc = &event->hw;
0619     struct i915_pmu *pmu = &i915->pmu;
0620     u64 prev, new;
0621 
0622     if (pmu->closed) {
0623         event->hw.state = PERF_HES_STOPPED;
0624         return;
0625     }
0626 again:
0627     prev = local64_read(&hwc->prev_count);
0628     new = __i915_pmu_event_read(event);
0629 
0630     if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev)
0631         goto again;
0632 
0633     local64_add(new - prev, &event->count);
0634 }
0635 
0636 static void i915_pmu_enable(struct perf_event *event)
0637 {
0638     struct drm_i915_private *i915 =
0639         container_of(event->pmu, typeof(*i915), pmu.base);
0640     struct i915_pmu *pmu = &i915->pmu;
0641     unsigned long flags;
0642     unsigned int bit;
0643 
0644     bit = event_bit(event);
0645     if (bit == -1)
0646         goto update;
0647 
0648     spin_lock_irqsave(&pmu->lock, flags);
0649 
0650     /*
0651      * Update the bitmask of enabled events and increment
0652      * the event reference counter.
0653      */
0654     BUILD_BUG_ON(ARRAY_SIZE(pmu->enable_count) != I915_PMU_MASK_BITS);
0655     GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count));
0656     GEM_BUG_ON(pmu->enable_count[bit] == ~0);
0657 
0658     pmu->enable |= BIT_ULL(bit);
0659     pmu->enable_count[bit]++;
0660 
0661     /*
0662      * Start the sampling timer if needed and not already enabled.
0663      */
0664     __i915_pmu_maybe_start_timer(pmu);
0665 
0666     /*
0667      * For per-engine events the bitmask and reference counting
0668      * is stored per engine.
0669      */
0670     if (is_engine_event(event)) {
0671         u8 sample = engine_event_sample(event);
0672         struct intel_engine_cs *engine;
0673 
0674         engine = intel_engine_lookup_user(i915,
0675                           engine_event_class(event),
0676                           engine_event_instance(event));
0677 
0678         BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.enable_count) !=
0679                  I915_ENGINE_SAMPLE_COUNT);
0680         BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.sample) !=
0681                  I915_ENGINE_SAMPLE_COUNT);
0682         GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
0683         GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
0684         GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
0685 
0686         engine->pmu.enable |= BIT(sample);
0687         engine->pmu.enable_count[sample]++;
0688     }
0689 
0690     spin_unlock_irqrestore(&pmu->lock, flags);
0691 
0692 update:
0693     /*
0694      * Store the current counter value so we can report the correct delta
0695      * for all listeners. Even when the event was already enabled and has
0696      * an existing non-zero value.
0697      */
0698     local64_set(&event->hw.prev_count, __i915_pmu_event_read(event));
0699 }
0700 
0701 static void i915_pmu_disable(struct perf_event *event)
0702 {
0703     struct drm_i915_private *i915 =
0704         container_of(event->pmu, typeof(*i915), pmu.base);
0705     unsigned int bit = event_bit(event);
0706     struct i915_pmu *pmu = &i915->pmu;
0707     unsigned long flags;
0708 
0709     if (bit == -1)
0710         return;
0711 
0712     spin_lock_irqsave(&pmu->lock, flags);
0713 
0714     if (is_engine_event(event)) {
0715         u8 sample = engine_event_sample(event);
0716         struct intel_engine_cs *engine;
0717 
0718         engine = intel_engine_lookup_user(i915,
0719                           engine_event_class(event),
0720                           engine_event_instance(event));
0721 
0722         GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
0723         GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
0724         GEM_BUG_ON(engine->pmu.enable_count[sample] == 0);
0725 
0726         /*
0727          * Decrement the reference count and clear the enabled
0728          * bitmask when the last listener on an event goes away.
0729          */
0730         if (--engine->pmu.enable_count[sample] == 0)
0731             engine->pmu.enable &= ~BIT(sample);
0732     }
0733 
0734     GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count));
0735     GEM_BUG_ON(pmu->enable_count[bit] == 0);
0736     /*
0737      * Decrement the reference count and clear the enabled
0738      * bitmask when the last listener on an event goes away.
0739      */
0740     if (--pmu->enable_count[bit] == 0) {
0741         pmu->enable &= ~BIT_ULL(bit);
0742         pmu->timer_enabled &= pmu_needs_timer(pmu, true);
0743     }
0744 
0745     spin_unlock_irqrestore(&pmu->lock, flags);
0746 }
0747 
0748 static void i915_pmu_event_start(struct perf_event *event, int flags)
0749 {
0750     struct drm_i915_private *i915 =
0751         container_of(event->pmu, typeof(*i915), pmu.base);
0752     struct i915_pmu *pmu = &i915->pmu;
0753 
0754     if (pmu->closed)
0755         return;
0756 
0757     i915_pmu_enable(event);
0758     event->hw.state = 0;
0759 }
0760 
0761 static void i915_pmu_event_stop(struct perf_event *event, int flags)
0762 {
0763     if (flags & PERF_EF_UPDATE)
0764         i915_pmu_event_read(event);
0765     i915_pmu_disable(event);
0766     event->hw.state = PERF_HES_STOPPED;
0767 }
0768 
0769 static int i915_pmu_event_add(struct perf_event *event, int flags)
0770 {
0771     struct drm_i915_private *i915 =
0772         container_of(event->pmu, typeof(*i915), pmu.base);
0773     struct i915_pmu *pmu = &i915->pmu;
0774 
0775     if (pmu->closed)
0776         return -ENODEV;
0777 
0778     if (flags & PERF_EF_START)
0779         i915_pmu_event_start(event, flags);
0780 
0781     return 0;
0782 }
0783 
0784 static void i915_pmu_event_del(struct perf_event *event, int flags)
0785 {
0786     i915_pmu_event_stop(event, PERF_EF_UPDATE);
0787 }
0788 
0789 static int i915_pmu_event_event_idx(struct perf_event *event)
0790 {
0791     return 0;
0792 }
0793 
0794 struct i915_str_attribute {
0795     struct device_attribute attr;
0796     const char *str;
0797 };
0798 
0799 static ssize_t i915_pmu_format_show(struct device *dev,
0800                     struct device_attribute *attr, char *buf)
0801 {
0802     struct i915_str_attribute *eattr;
0803 
0804     eattr = container_of(attr, struct i915_str_attribute, attr);
0805     return sprintf(buf, "%s\n", eattr->str);
0806 }
0807 
0808 #define I915_PMU_FORMAT_ATTR(_name, _config) \
0809     (&((struct i915_str_attribute[]) { \
0810         { .attr = __ATTR(_name, 0444, i915_pmu_format_show, NULL), \
0811           .str = _config, } \
0812     })[0].attr.attr)
0813 
0814 static struct attribute *i915_pmu_format_attrs[] = {
0815     I915_PMU_FORMAT_ATTR(i915_eventid, "config:0-20"),
0816     NULL,
0817 };
0818 
0819 static const struct attribute_group i915_pmu_format_attr_group = {
0820     .name = "format",
0821     .attrs = i915_pmu_format_attrs,
0822 };
0823 
0824 struct i915_ext_attribute {
0825     struct device_attribute attr;
0826     unsigned long val;
0827 };
0828 
0829 static ssize_t i915_pmu_event_show(struct device *dev,
0830                    struct device_attribute *attr, char *buf)
0831 {
0832     struct i915_ext_attribute *eattr;
0833 
0834     eattr = container_of(attr, struct i915_ext_attribute, attr);
0835     return sprintf(buf, "config=0x%lx\n", eattr->val);
0836 }
0837 
0838 static ssize_t cpumask_show(struct device *dev,
0839                 struct device_attribute *attr, char *buf)
0840 {
0841     return cpumap_print_to_pagebuf(true, buf, &i915_pmu_cpumask);
0842 }
0843 
0844 static DEVICE_ATTR_RO(cpumask);
0845 
0846 static struct attribute *i915_cpumask_attrs[] = {
0847     &dev_attr_cpumask.attr,
0848     NULL,
0849 };
0850 
0851 static const struct attribute_group i915_pmu_cpumask_attr_group = {
0852     .attrs = i915_cpumask_attrs,
0853 };
0854 
0855 #define __event(__config, __name, __unit) \
0856 { \
0857     .config = (__config), \
0858     .name = (__name), \
0859     .unit = (__unit), \
0860 }
0861 
0862 #define __engine_event(__sample, __name) \
0863 { \
0864     .sample = (__sample), \
0865     .name = (__name), \
0866 }
0867 
0868 static struct i915_ext_attribute *
0869 add_i915_attr(struct i915_ext_attribute *attr, const char *name, u64 config)
0870 {
0871     sysfs_attr_init(&attr->attr.attr);
0872     attr->attr.attr.name = name;
0873     attr->attr.attr.mode = 0444;
0874     attr->attr.show = i915_pmu_event_show;
0875     attr->val = config;
0876 
0877     return ++attr;
0878 }
0879 
0880 static struct perf_pmu_events_attr *
0881 add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name,
0882          const char *str)
0883 {
0884     sysfs_attr_init(&attr->attr.attr);
0885     attr->attr.attr.name = name;
0886     attr->attr.attr.mode = 0444;
0887     attr->attr.show = perf_event_sysfs_show;
0888     attr->event_str = str;
0889 
0890     return ++attr;
0891 }
0892 
0893 static struct attribute **
0894 create_event_attributes(struct i915_pmu *pmu)
0895 {
0896     struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
0897     static const struct {
0898         u64 config;
0899         const char *name;
0900         const char *unit;
0901     } events[] = {
0902         __event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "M"),
0903         __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "M"),
0904         __event(I915_PMU_INTERRUPTS, "interrupts", NULL),
0905         __event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"),
0906         __event(I915_PMU_SOFTWARE_GT_AWAKE_TIME, "software-gt-awake-time", "ns"),
0907     };
0908     static const struct {
0909         enum drm_i915_pmu_engine_sample sample;
0910         char *name;
0911     } engine_events[] = {
0912         __engine_event(I915_SAMPLE_BUSY, "busy"),
0913         __engine_event(I915_SAMPLE_SEMA, "sema"),
0914         __engine_event(I915_SAMPLE_WAIT, "wait"),
0915     };
0916     unsigned int count = 0;
0917     struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
0918     struct i915_ext_attribute *i915_attr = NULL, *i915_iter;
0919     struct attribute **attr = NULL, **attr_iter;
0920     struct intel_engine_cs *engine;
0921     unsigned int i;
0922 
0923     /* Count how many counters we will be exposing. */
0924     for (i = 0; i < ARRAY_SIZE(events); i++) {
0925         if (!config_status(i915, events[i].config))
0926             count++;
0927     }
0928 
0929     for_each_uabi_engine(engine, i915) {
0930         for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
0931             if (!engine_event_status(engine,
0932                          engine_events[i].sample))
0933                 count++;
0934         }
0935     }
0936 
0937     /* Allocate attribute objects and table. */
0938     i915_attr = kcalloc(count, sizeof(*i915_attr), GFP_KERNEL);
0939     if (!i915_attr)
0940         goto err_alloc;
0941 
0942     pmu_attr = kcalloc(count, sizeof(*pmu_attr), GFP_KERNEL);
0943     if (!pmu_attr)
0944         goto err_alloc;
0945 
0946     /* Max one pointer of each attribute type plus a termination entry. */
0947     attr = kcalloc(count * 2 + 1, sizeof(*attr), GFP_KERNEL);
0948     if (!attr)
0949         goto err_alloc;
0950 
0951     i915_iter = i915_attr;
0952     pmu_iter = pmu_attr;
0953     attr_iter = attr;
0954 
0955     /* Initialize supported non-engine counters. */
0956     for (i = 0; i < ARRAY_SIZE(events); i++) {
0957         char *str;
0958 
0959         if (config_status(i915, events[i].config))
0960             continue;
0961 
0962         str = kstrdup(events[i].name, GFP_KERNEL);
0963         if (!str)
0964             goto err;
0965 
0966         *attr_iter++ = &i915_iter->attr.attr;
0967         i915_iter = add_i915_attr(i915_iter, str, events[i].config);
0968 
0969         if (events[i].unit) {
0970             str = kasprintf(GFP_KERNEL, "%s.unit", events[i].name);
0971             if (!str)
0972                 goto err;
0973 
0974             *attr_iter++ = &pmu_iter->attr.attr;
0975             pmu_iter = add_pmu_attr(pmu_iter, str, events[i].unit);
0976         }
0977     }
0978 
0979     /* Initialize supported engine counters. */
0980     for_each_uabi_engine(engine, i915) {
0981         for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
0982             char *str;
0983 
0984             if (engine_event_status(engine,
0985                         engine_events[i].sample))
0986                 continue;
0987 
0988             str = kasprintf(GFP_KERNEL, "%s-%s",
0989                     engine->name, engine_events[i].name);
0990             if (!str)
0991                 goto err;
0992 
0993             *attr_iter++ = &i915_iter->attr.attr;
0994             i915_iter =
0995                 add_i915_attr(i915_iter, str,
0996                           __I915_PMU_ENGINE(engine->uabi_class,
0997                                 engine->uabi_instance,
0998                                 engine_events[i].sample));
0999 
1000             str = kasprintf(GFP_KERNEL, "%s-%s.unit",
1001                     engine->name, engine_events[i].name);
1002             if (!str)
1003                 goto err;
1004 
1005             *attr_iter++ = &pmu_iter->attr.attr;
1006             pmu_iter = add_pmu_attr(pmu_iter, str, "ns");
1007         }
1008     }
1009 
1010     pmu->i915_attr = i915_attr;
1011     pmu->pmu_attr = pmu_attr;
1012 
1013     return attr;
1014 
1015 err:;
1016     for (attr_iter = attr; *attr_iter; attr_iter++)
1017         kfree((*attr_iter)->name);
1018 
1019 err_alloc:
1020     kfree(attr);
1021     kfree(i915_attr);
1022     kfree(pmu_attr);
1023 
1024     return NULL;
1025 }
1026 
1027 static void free_event_attributes(struct i915_pmu *pmu)
1028 {
1029     struct attribute **attr_iter = pmu->events_attr_group.attrs;
1030 
1031     for (; *attr_iter; attr_iter++)
1032         kfree((*attr_iter)->name);
1033 
1034     kfree(pmu->events_attr_group.attrs);
1035     kfree(pmu->i915_attr);
1036     kfree(pmu->pmu_attr);
1037 
1038     pmu->events_attr_group.attrs = NULL;
1039     pmu->i915_attr = NULL;
1040     pmu->pmu_attr = NULL;
1041 }
1042 
1043 static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
1044 {
1045     struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
1046 
1047     GEM_BUG_ON(!pmu->base.event_init);
1048 
1049     /* Select the first online CPU as a designated reader. */
1050     if (cpumask_empty(&i915_pmu_cpumask))
1051         cpumask_set_cpu(cpu, &i915_pmu_cpumask);
1052 
1053     return 0;
1054 }
1055 
1056 static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
1057 {
1058     struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
1059     unsigned int target = i915_pmu_target_cpu;
1060 
1061     GEM_BUG_ON(!pmu->base.event_init);
1062 
1063     /*
1064      * Unregistering an instance generates a CPU offline event which we must
1065      * ignore to avoid incorrectly modifying the shared i915_pmu_cpumask.
1066      */
1067     if (pmu->closed)
1068         return 0;
1069 
1070     if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) {
1071         target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
1072 
1073         /* Migrate events if there is a valid target */
1074         if (target < nr_cpu_ids) {
1075             cpumask_set_cpu(target, &i915_pmu_cpumask);
1076             i915_pmu_target_cpu = target;
1077         }
1078     }
1079 
1080     if (target < nr_cpu_ids && target != pmu->cpuhp.cpu) {
1081         perf_pmu_migrate_context(&pmu->base, cpu, target);
1082         pmu->cpuhp.cpu = target;
1083     }
1084 
1085     return 0;
1086 }
1087 
1088 static enum cpuhp_state cpuhp_slot = CPUHP_INVALID;
1089 
1090 int i915_pmu_init(void)
1091 {
1092     int ret;
1093 
1094     ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
1095                       "perf/x86/intel/i915:online",
1096                       i915_pmu_cpu_online,
1097                       i915_pmu_cpu_offline);
1098     if (ret < 0)
1099         pr_notice("Failed to setup cpuhp state for i915 PMU! (%d)\n",
1100               ret);
1101     else
1102         cpuhp_slot = ret;
1103 
1104     return 0;
1105 }
1106 
1107 void i915_pmu_exit(void)
1108 {
1109     if (cpuhp_slot != CPUHP_INVALID)
1110         cpuhp_remove_multi_state(cpuhp_slot);
1111 }
1112 
1113 static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu)
1114 {
1115     if (cpuhp_slot == CPUHP_INVALID)
1116         return -EINVAL;
1117 
1118     return cpuhp_state_add_instance(cpuhp_slot, &pmu->cpuhp.node);
1119 }
1120 
1121 static void i915_pmu_unregister_cpuhp_state(struct i915_pmu *pmu)
1122 {
1123     cpuhp_state_remove_instance(cpuhp_slot, &pmu->cpuhp.node);
1124 }
1125 
1126 static bool is_igp(struct drm_i915_private *i915)
1127 {
1128     struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
1129 
1130     /* IGP is 0000:00:02.0 */
1131     return pci_domain_nr(pdev->bus) == 0 &&
1132            pdev->bus->number == 0 &&
1133            PCI_SLOT(pdev->devfn) == 2 &&
1134            PCI_FUNC(pdev->devfn) == 0;
1135 }
1136 
1137 void i915_pmu_register(struct drm_i915_private *i915)
1138 {
1139     struct i915_pmu *pmu = &i915->pmu;
1140     const struct attribute_group *attr_groups[] = {
1141         &i915_pmu_format_attr_group,
1142         &pmu->events_attr_group,
1143         &i915_pmu_cpumask_attr_group,
1144         NULL
1145     };
1146 
1147     int ret = -ENOMEM;
1148 
1149     if (GRAPHICS_VER(i915) <= 2) {
1150         drm_info(&i915->drm, "PMU not supported for this GPU.");
1151         return;
1152     }
1153 
1154     spin_lock_init(&pmu->lock);
1155     hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1156     pmu->timer.function = i915_sample;
1157     pmu->cpuhp.cpu = -1;
1158     init_rc6(pmu);
1159 
1160     if (!is_igp(i915)) {
1161         pmu->name = kasprintf(GFP_KERNEL,
1162                       "i915_%s",
1163                       dev_name(i915->drm.dev));
1164         if (pmu->name) {
1165             /* tools/perf reserves colons as special. */
1166             strreplace((char *)pmu->name, ':', '_');
1167         }
1168     } else {
1169         pmu->name = "i915";
1170     }
1171     if (!pmu->name)
1172         goto err;
1173 
1174     pmu->events_attr_group.name = "events";
1175     pmu->events_attr_group.attrs = create_event_attributes(pmu);
1176     if (!pmu->events_attr_group.attrs)
1177         goto err_name;
1178 
1179     pmu->base.attr_groups = kmemdup(attr_groups, sizeof(attr_groups),
1180                     GFP_KERNEL);
1181     if (!pmu->base.attr_groups)
1182         goto err_attr;
1183 
1184     pmu->base.module    = THIS_MODULE;
1185     pmu->base.task_ctx_nr   = perf_invalid_context;
1186     pmu->base.event_init    = i915_pmu_event_init;
1187     pmu->base.add       = i915_pmu_event_add;
1188     pmu->base.del       = i915_pmu_event_del;
1189     pmu->base.start     = i915_pmu_event_start;
1190     pmu->base.stop      = i915_pmu_event_stop;
1191     pmu->base.read      = i915_pmu_event_read;
1192     pmu->base.event_idx = i915_pmu_event_event_idx;
1193 
1194     ret = perf_pmu_register(&pmu->base, pmu->name, -1);
1195     if (ret)
1196         goto err_groups;
1197 
1198     ret = i915_pmu_register_cpuhp_state(pmu);
1199     if (ret)
1200         goto err_unreg;
1201 
1202     return;
1203 
1204 err_unreg:
1205     perf_pmu_unregister(&pmu->base);
1206 err_groups:
1207     kfree(pmu->base.attr_groups);
1208 err_attr:
1209     pmu->base.event_init = NULL;
1210     free_event_attributes(pmu);
1211 err_name:
1212     if (!is_igp(i915))
1213         kfree(pmu->name);
1214 err:
1215     drm_notice(&i915->drm, "Failed to register PMU!\n");
1216 }
1217 
1218 void i915_pmu_unregister(struct drm_i915_private *i915)
1219 {
1220     struct i915_pmu *pmu = &i915->pmu;
1221 
1222     if (!pmu->base.event_init)
1223         return;
1224 
1225     /*
1226      * "Disconnect" the PMU callbacks - since all are atomic synchronize_rcu
1227      * ensures all currently executing ones will have exited before we
1228      * proceed with unregistration.
1229      */
1230     pmu->closed = true;
1231     synchronize_rcu();
1232 
1233     hrtimer_cancel(&pmu->timer);
1234 
1235     i915_pmu_unregister_cpuhp_state(pmu);
1236 
1237     perf_pmu_unregister(&pmu->base);
1238     pmu->base.event_init = NULL;
1239     kfree(pmu->base.attr_groups);
1240     if (!is_igp(i915))
1241         kfree(pmu->name);
1242     free_event_attributes(pmu);
1243 }