Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: MIT
0002 /*
0003  * Copyright © 2020 Intel Corporation
0004  */
0005 
0006 #include <linux/pm_qos.h>
0007 #include <linux/sort.h>
0008 
0009 #include "gem/i915_gem_internal.h"
0010 
0011 #include "intel_engine_heartbeat.h"
0012 #include "intel_engine_pm.h"
0013 #include "intel_engine_regs.h"
0014 #include "intel_gpu_commands.h"
0015 #include "intel_gt_clock_utils.h"
0016 #include "intel_gt_pm.h"
0017 #include "intel_rc6.h"
0018 #include "selftest_engine_heartbeat.h"
0019 #include "selftest_rps.h"
0020 #include "selftests/igt_flush_test.h"
0021 #include "selftests/igt_spinner.h"
0022 #include "selftests/librapl.h"
0023 
0024 /* Try to isolate the impact of cstates from determing frequency response */
0025 #define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */
0026 
0027 static void dummy_rps_work(struct work_struct *wrk)
0028 {
0029 }
0030 
0031 static int cmp_u64(const void *A, const void *B)
0032 {
0033     const u64 *a = A, *b = B;
0034 
0035     if (*a < *b)
0036         return -1;
0037     else if (*a > *b)
0038         return 1;
0039     else
0040         return 0;
0041 }
0042 
0043 static int cmp_u32(const void *A, const void *B)
0044 {
0045     const u32 *a = A, *b = B;
0046 
0047     if (*a < *b)
0048         return -1;
0049     else if (*a > *b)
0050         return 1;
0051     else
0052         return 0;
0053 }
0054 
0055 static struct i915_vma *
0056 create_spin_counter(struct intel_engine_cs *engine,
0057             struct i915_address_space *vm,
0058             bool srm,
0059             u32 **cancel,
0060             u32 **counter)
0061 {
0062     enum {
0063         COUNT,
0064         INC,
0065         __NGPR__,
0066     };
0067 #define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
0068     struct drm_i915_gem_object *obj;
0069     struct i915_vma *vma;
0070     unsigned long end;
0071     u32 *base, *cs;
0072     int loop, i;
0073     int err;
0074 
0075     obj = i915_gem_object_create_internal(vm->i915, 64 << 10);
0076     if (IS_ERR(obj))
0077         return ERR_CAST(obj);
0078 
0079     end = obj->base.size / sizeof(u32) - 1;
0080 
0081     vma = i915_vma_instance(obj, vm, NULL);
0082     if (IS_ERR(vma)) {
0083         err = PTR_ERR(vma);
0084         goto err_put;
0085     }
0086 
0087     err = i915_vma_pin(vma, 0, 0, PIN_USER);
0088     if (err)
0089         goto err_unlock;
0090 
0091     i915_vma_lock(vma);
0092 
0093     base = i915_gem_object_pin_map(obj, I915_MAP_WC);
0094     if (IS_ERR(base)) {
0095         err = PTR_ERR(base);
0096         goto err_unpin;
0097     }
0098     cs = base;
0099 
0100     *cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2);
0101     for (i = 0; i < __NGPR__; i++) {
0102         *cs++ = i915_mmio_reg_offset(CS_GPR(i));
0103         *cs++ = 0;
0104         *cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4;
0105         *cs++ = 0;
0106     }
0107 
0108     *cs++ = MI_LOAD_REGISTER_IMM(1);
0109     *cs++ = i915_mmio_reg_offset(CS_GPR(INC));
0110     *cs++ = 1;
0111 
0112     loop = cs - base;
0113 
0114     /* Unroll the loop to avoid MI_BB_START stalls impacting measurements */
0115     for (i = 0; i < 1024; i++) {
0116         *cs++ = MI_MATH(4);
0117         *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
0118         *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
0119         *cs++ = MI_MATH_ADD;
0120         *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
0121 
0122         if (srm) {
0123             *cs++ = MI_STORE_REGISTER_MEM_GEN8;
0124             *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
0125             *cs++ = lower_32_bits(vma->node.start + end * sizeof(*cs));
0126             *cs++ = upper_32_bits(vma->node.start + end * sizeof(*cs));
0127         }
0128     }
0129 
0130     *cs++ = MI_BATCH_BUFFER_START_GEN8;
0131     *cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs));
0132     *cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs));
0133     GEM_BUG_ON(cs - base > end);
0134 
0135     i915_gem_object_flush_map(obj);
0136 
0137     *cancel = base + loop;
0138     *counter = srm ? memset32(base + end, 0, 1) : NULL;
0139     return vma;
0140 
0141 err_unpin:
0142     i915_vma_unpin(vma);
0143 err_unlock:
0144     i915_vma_unlock(vma);
0145 err_put:
0146     i915_gem_object_put(obj);
0147     return ERR_PTR(err);
0148 }
0149 
0150 static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms)
0151 {
0152     u8 history[64], i;
0153     unsigned long end;
0154     int sleep;
0155 
0156     i = 0;
0157     memset(history, freq, sizeof(history));
0158     sleep = 20;
0159 
0160     /* The PCU does not change instantly, but drifts towards the goal? */
0161     end = jiffies + msecs_to_jiffies(timeout_ms);
0162     do {
0163         u8 act;
0164 
0165         act = read_cagf(rps);
0166         if (time_after(jiffies, end))
0167             return act;
0168 
0169         /* Target acquired */
0170         if (act == freq)
0171             return act;
0172 
0173         /* Any change within the last N samples? */
0174         if (!memchr_inv(history, act, sizeof(history)))
0175             return act;
0176 
0177         history[i] = act;
0178         i = (i + 1) % ARRAY_SIZE(history);
0179 
0180         usleep_range(sleep, 2 * sleep);
0181         sleep *= 2;
0182         if (sleep > timeout_ms * 20)
0183             sleep = timeout_ms * 20;
0184     } while (1);
0185 }
0186 
0187 static u8 rps_set_check(struct intel_rps *rps, u8 freq)
0188 {
0189     mutex_lock(&rps->lock);
0190     GEM_BUG_ON(!intel_rps_is_active(rps));
0191     if (wait_for(!intel_rps_set(rps, freq), 50)) {
0192         mutex_unlock(&rps->lock);
0193         return 0;
0194     }
0195     GEM_BUG_ON(rps->last_freq != freq);
0196     mutex_unlock(&rps->lock);
0197 
0198     return wait_for_freq(rps, freq, 50);
0199 }
0200 
0201 static void show_pstate_limits(struct intel_rps *rps)
0202 {
0203     struct drm_i915_private *i915 = rps_to_i915(rps);
0204 
0205     if (IS_BROXTON(i915)) {
0206         pr_info("P_STATE_CAP[%x]: 0x%08x\n",
0207             i915_mmio_reg_offset(BXT_RP_STATE_CAP),
0208             intel_uncore_read(rps_to_uncore(rps),
0209                       BXT_RP_STATE_CAP));
0210     } else if (GRAPHICS_VER(i915) == 9) {
0211         pr_info("P_STATE_LIMITS[%x]: 0x%08x\n",
0212             i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS),
0213             intel_uncore_read(rps_to_uncore(rps),
0214                       GEN9_RP_STATE_LIMITS));
0215     }
0216 }
0217 
0218 int live_rps_clock_interval(void *arg)
0219 {
0220     struct intel_gt *gt = arg;
0221     struct intel_rps *rps = &gt->rps;
0222     void (*saved_work)(struct work_struct *wrk);
0223     struct intel_engine_cs *engine;
0224     enum intel_engine_id id;
0225     struct igt_spinner spin;
0226     int err = 0;
0227 
0228     if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
0229         return 0;
0230 
0231     if (igt_spinner_init(&spin, gt))
0232         return -ENOMEM;
0233 
0234     intel_gt_pm_wait_for_idle(gt);
0235     saved_work = rps->work.func;
0236     rps->work.func = dummy_rps_work;
0237 
0238     intel_gt_pm_get(gt);
0239     intel_rps_disable(&gt->rps);
0240 
0241     intel_gt_check_clock_frequency(gt);
0242 
0243     for_each_engine(engine, gt, id) {
0244         struct i915_request *rq;
0245         u32 cycles;
0246         u64 dt;
0247 
0248         if (!intel_engine_can_store_dword(engine))
0249             continue;
0250 
0251         st_engine_heartbeat_disable(engine);
0252 
0253         rq = igt_spinner_create_request(&spin,
0254                         engine->kernel_context,
0255                         MI_NOOP);
0256         if (IS_ERR(rq)) {
0257             st_engine_heartbeat_enable(engine);
0258             err = PTR_ERR(rq);
0259             break;
0260         }
0261 
0262         i915_request_add(rq);
0263 
0264         if (!igt_wait_for_spinner(&spin, rq)) {
0265             pr_err("%s: RPS spinner did not start\n",
0266                    engine->name);
0267             igt_spinner_end(&spin);
0268             st_engine_heartbeat_enable(engine);
0269             intel_gt_set_wedged(engine->gt);
0270             err = -EIO;
0271             break;
0272         }
0273 
0274         intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
0275 
0276         intel_uncore_write_fw(gt->uncore, GEN6_RP_CUR_UP_EI, 0);
0277 
0278         /* Set the evaluation interval to infinity! */
0279         intel_uncore_write_fw(gt->uncore,
0280                       GEN6_RP_UP_EI, 0xffffffff);
0281         intel_uncore_write_fw(gt->uncore,
0282                       GEN6_RP_UP_THRESHOLD, 0xffffffff);
0283 
0284         intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL,
0285                       GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG);
0286 
0287         if (wait_for(intel_uncore_read_fw(gt->uncore,
0288                           GEN6_RP_CUR_UP_EI),
0289                  10)) {
0290             /* Just skip the test; assume lack of HW support */
0291             pr_notice("%s: rps evaluation interval not ticking\n",
0292                   engine->name);
0293             err = -ENODEV;
0294         } else {
0295             ktime_t dt_[5];
0296             u32 cycles_[5];
0297             int i;
0298 
0299             for (i = 0; i < 5; i++) {
0300                 preempt_disable();
0301 
0302                 dt_[i] = ktime_get();
0303                 cycles_[i] = -intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
0304 
0305                 udelay(1000);
0306 
0307                 dt_[i] = ktime_sub(ktime_get(), dt_[i]);
0308                 cycles_[i] += intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
0309 
0310                 preempt_enable();
0311             }
0312 
0313             /* Use the median of both cycle/dt; close enough */
0314             sort(cycles_, 5, sizeof(*cycles_), cmp_u32, NULL);
0315             cycles = (cycles_[1] + 2 * cycles_[2] + cycles_[3]) / 4;
0316             sort(dt_, 5, sizeof(*dt_), cmp_u64, NULL);
0317             dt = div_u64(dt_[1] + 2 * dt_[2] + dt_[3], 4);
0318         }
0319 
0320         intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 0);
0321         intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
0322 
0323         igt_spinner_end(&spin);
0324         st_engine_heartbeat_enable(engine);
0325 
0326         if (err == 0) {
0327             u64 time = intel_gt_pm_interval_to_ns(gt, cycles);
0328             u32 expected =
0329                 intel_gt_ns_to_pm_interval(gt, dt);
0330 
0331             pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n",
0332                 engine->name, cycles, time, dt, expected,
0333                 gt->clock_frequency / 1000);
0334 
0335             if (10 * time < 8 * dt ||
0336                 8 * time > 10 * dt) {
0337                 pr_err("%s: rps clock time does not match walltime!\n",
0338                        engine->name);
0339                 err = -EINVAL;
0340             }
0341 
0342             if (10 * expected < 8 * cycles ||
0343                 8 * expected > 10 * cycles) {
0344                 pr_err("%s: walltime does not match rps clock ticks!\n",
0345                        engine->name);
0346                 err = -EINVAL;
0347             }
0348         }
0349 
0350         if (igt_flush_test(gt->i915))
0351             err = -EIO;
0352 
0353         break; /* once is enough */
0354     }
0355 
0356     intel_rps_enable(&gt->rps);
0357     intel_gt_pm_put(gt);
0358 
0359     igt_spinner_fini(&spin);
0360 
0361     intel_gt_pm_wait_for_idle(gt);
0362     rps->work.func = saved_work;
0363 
0364     if (err == -ENODEV) /* skipped, don't report a fail */
0365         err = 0;
0366 
0367     return err;
0368 }
0369 
0370 int live_rps_control(void *arg)
0371 {
0372     struct intel_gt *gt = arg;
0373     struct intel_rps *rps = &gt->rps;
0374     void (*saved_work)(struct work_struct *wrk);
0375     struct intel_engine_cs *engine;
0376     enum intel_engine_id id;
0377     struct igt_spinner spin;
0378     int err = 0;
0379 
0380     /*
0381      * Check that the actual frequency matches our requested frequency,
0382      * to verify our control mechanism. We have to be careful that the
0383      * PCU may throttle the GPU in which case the actual frequency used
0384      * will be lowered than requested.
0385      */
0386 
0387     if (!intel_rps_is_enabled(rps))
0388         return 0;
0389 
0390     if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */
0391         return 0;
0392 
0393     if (igt_spinner_init(&spin, gt))
0394         return -ENOMEM;
0395 
0396     intel_gt_pm_wait_for_idle(gt);
0397     saved_work = rps->work.func;
0398     rps->work.func = dummy_rps_work;
0399 
0400     intel_gt_pm_get(gt);
0401     for_each_engine(engine, gt, id) {
0402         struct i915_request *rq;
0403         ktime_t min_dt, max_dt;
0404         int f, limit;
0405         int min, max;
0406 
0407         if (!intel_engine_can_store_dword(engine))
0408             continue;
0409 
0410         st_engine_heartbeat_disable(engine);
0411 
0412         rq = igt_spinner_create_request(&spin,
0413                         engine->kernel_context,
0414                         MI_NOOP);
0415         if (IS_ERR(rq)) {
0416             err = PTR_ERR(rq);
0417             break;
0418         }
0419 
0420         i915_request_add(rq);
0421 
0422         if (!igt_wait_for_spinner(&spin, rq)) {
0423             pr_err("%s: RPS spinner did not start\n",
0424                    engine->name);
0425             igt_spinner_end(&spin);
0426             st_engine_heartbeat_enable(engine);
0427             intel_gt_set_wedged(engine->gt);
0428             err = -EIO;
0429             break;
0430         }
0431 
0432         if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
0433             pr_err("%s: could not set minimum frequency [%x], only %x!\n",
0434                    engine->name, rps->min_freq, read_cagf(rps));
0435             igt_spinner_end(&spin);
0436             st_engine_heartbeat_enable(engine);
0437             show_pstate_limits(rps);
0438             err = -EINVAL;
0439             break;
0440         }
0441 
0442         for (f = rps->min_freq + 1; f < rps->max_freq; f++) {
0443             if (rps_set_check(rps, f) < f)
0444                 break;
0445         }
0446 
0447         limit = rps_set_check(rps, f);
0448 
0449         if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
0450             pr_err("%s: could not restore minimum frequency [%x], only %x!\n",
0451                    engine->name, rps->min_freq, read_cagf(rps));
0452             igt_spinner_end(&spin);
0453             st_engine_heartbeat_enable(engine);
0454             show_pstate_limits(rps);
0455             err = -EINVAL;
0456             break;
0457         }
0458 
0459         max_dt = ktime_get();
0460         max = rps_set_check(rps, limit);
0461         max_dt = ktime_sub(ktime_get(), max_dt);
0462 
0463         min_dt = ktime_get();
0464         min = rps_set_check(rps, rps->min_freq);
0465         min_dt = ktime_sub(ktime_get(), min_dt);
0466 
0467         igt_spinner_end(&spin);
0468         st_engine_heartbeat_enable(engine);
0469 
0470         pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n",
0471             engine->name,
0472             rps->min_freq, intel_gpu_freq(rps, rps->min_freq),
0473             rps->max_freq, intel_gpu_freq(rps, rps->max_freq),
0474             limit, intel_gpu_freq(rps, limit),
0475             min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt));
0476 
0477         if (limit == rps->min_freq) {
0478             pr_err("%s: GPU throttled to minimum!\n",
0479                    engine->name);
0480             show_pstate_limits(rps);
0481             err = -ENODEV;
0482             break;
0483         }
0484 
0485         if (igt_flush_test(gt->i915)) {
0486             err = -EIO;
0487             break;
0488         }
0489     }
0490     intel_gt_pm_put(gt);
0491 
0492     igt_spinner_fini(&spin);
0493 
0494     intel_gt_pm_wait_for_idle(gt);
0495     rps->work.func = saved_work;
0496 
0497     return err;
0498 }
0499 
0500 static void show_pcu_config(struct intel_rps *rps)
0501 {
0502     struct drm_i915_private *i915 = rps_to_i915(rps);
0503     unsigned int max_gpu_freq, min_gpu_freq;
0504     intel_wakeref_t wakeref;
0505     int gpu_freq;
0506 
0507     if (!HAS_LLC(i915))
0508         return;
0509 
0510     min_gpu_freq = rps->min_freq;
0511     max_gpu_freq = rps->max_freq;
0512     if (GRAPHICS_VER(i915) >= 9) {
0513         /* Convert GT frequency to 50 HZ units */
0514         min_gpu_freq /= GEN9_FREQ_SCALER;
0515         max_gpu_freq /= GEN9_FREQ_SCALER;
0516     }
0517 
0518     wakeref = intel_runtime_pm_get(rps_to_uncore(rps)->rpm);
0519 
0520     pr_info("%5s  %5s  %5s\n", "GPU", "eCPU", "eRing");
0521     for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
0522         int ia_freq = gpu_freq;
0523 
0524         snb_pcode_read(rps_to_gt(rps)->uncore, GEN6_PCODE_READ_MIN_FREQ_TABLE,
0525                    &ia_freq, NULL);
0526 
0527         pr_info("%5d  %5d  %5d\n",
0528             gpu_freq * 50,
0529             ((ia_freq >> 0) & 0xff) * 100,
0530             ((ia_freq >> 8) & 0xff) * 100);
0531     }
0532 
0533     intel_runtime_pm_put(rps_to_uncore(rps)->rpm, wakeref);
0534 }
0535 
0536 static u64 __measure_frequency(u32 *cntr, int duration_ms)
0537 {
0538     u64 dc, dt;
0539 
0540     dt = ktime_get();
0541     dc = READ_ONCE(*cntr);
0542     usleep_range(1000 * duration_ms, 2000 * duration_ms);
0543     dc = READ_ONCE(*cntr) - dc;
0544     dt = ktime_get() - dt;
0545 
0546     return div64_u64(1000 * 1000 * dc, dt);
0547 }
0548 
0549 static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
0550 {
0551     u64 x[5];
0552     int i;
0553 
0554     *freq = rps_set_check(rps, *freq);
0555     for (i = 0; i < 5; i++)
0556         x[i] = __measure_frequency(cntr, 2);
0557     *freq = (*freq + read_cagf(rps)) / 2;
0558 
0559     /* A simple triangle filter for better result stability */
0560     sort(x, 5, sizeof(*x), cmp_u64, NULL);
0561     return div_u64(x[1] + 2 * x[2] + x[3], 4);
0562 }
0563 
0564 static u64 __measure_cs_frequency(struct intel_engine_cs *engine,
0565                   int duration_ms)
0566 {
0567     u64 dc, dt;
0568 
0569     dt = ktime_get();
0570     dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0));
0571     usleep_range(1000 * duration_ms, 2000 * duration_ms);
0572     dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc;
0573     dt = ktime_get() - dt;
0574 
0575     return div64_u64(1000 * 1000 * dc, dt);
0576 }
0577 
0578 static u64 measure_cs_frequency_at(struct intel_rps *rps,
0579                    struct intel_engine_cs *engine,
0580                    int *freq)
0581 {
0582     u64 x[5];
0583     int i;
0584 
0585     *freq = rps_set_check(rps, *freq);
0586     for (i = 0; i < 5; i++)
0587         x[i] = __measure_cs_frequency(engine, 2);
0588     *freq = (*freq + read_cagf(rps)) / 2;
0589 
0590     /* A simple triangle filter for better result stability */
0591     sort(x, 5, sizeof(*x), cmp_u64, NULL);
0592     return div_u64(x[1] + 2 * x[2] + x[3], 4);
0593 }
0594 
0595 static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
0596 {
0597     return f_d * x > f_n * y && f_n * x < f_d * y;
0598 }
0599 
0600 int live_rps_frequency_cs(void *arg)
0601 {
0602     void (*saved_work)(struct work_struct *wrk);
0603     struct intel_gt *gt = arg;
0604     struct intel_rps *rps = &gt->rps;
0605     struct intel_engine_cs *engine;
0606     struct pm_qos_request qos;
0607     enum intel_engine_id id;
0608     int err = 0;
0609 
0610     /*
0611      * The premise is that the GPU does change frequency at our behest.
0612      * Let's check there is a correspondence between the requested
0613      * frequency, the actual frequency, and the observed clock rate.
0614      */
0615 
0616     if (!intel_rps_is_enabled(rps))
0617         return 0;
0618 
0619     if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */
0620         return 0;
0621 
0622     if (CPU_LATENCY >= 0)
0623         cpu_latency_qos_add_request(&qos, CPU_LATENCY);
0624 
0625     intel_gt_pm_wait_for_idle(gt);
0626     saved_work = rps->work.func;
0627     rps->work.func = dummy_rps_work;
0628 
0629     for_each_engine(engine, gt, id) {
0630         struct i915_request *rq;
0631         struct i915_vma *vma;
0632         u32 *cancel, *cntr;
0633         struct {
0634             u64 count;
0635             int freq;
0636         } min, max;
0637 
0638         st_engine_heartbeat_disable(engine);
0639 
0640         vma = create_spin_counter(engine,
0641                       engine->kernel_context->vm, false,
0642                       &cancel, &cntr);
0643         if (IS_ERR(vma)) {
0644             err = PTR_ERR(vma);
0645             st_engine_heartbeat_enable(engine);
0646             break;
0647         }
0648 
0649         rq = intel_engine_create_kernel_request(engine);
0650         if (IS_ERR(rq)) {
0651             err = PTR_ERR(rq);
0652             goto err_vma;
0653         }
0654 
0655         err = i915_request_await_object(rq, vma->obj, false);
0656         if (!err)
0657             err = i915_vma_move_to_active(vma, rq, 0);
0658         if (!err)
0659             err = rq->engine->emit_bb_start(rq,
0660                             vma->node.start,
0661                             PAGE_SIZE, 0);
0662         i915_request_add(rq);
0663         if (err)
0664             goto err_vma;
0665 
0666         if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)),
0667                  10)) {
0668             pr_err("%s: timed loop did not start\n",
0669                    engine->name);
0670             goto err_vma;
0671         }
0672 
0673         min.freq = rps->min_freq;
0674         min.count = measure_cs_frequency_at(rps, engine, &min.freq);
0675 
0676         max.freq = rps->max_freq;
0677         max.count = measure_cs_frequency_at(rps, engine, &max.freq);
0678 
0679         pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
0680             engine->name,
0681             min.count, intel_gpu_freq(rps, min.freq),
0682             max.count, intel_gpu_freq(rps, max.freq),
0683             (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
0684                              max.freq * min.count));
0685 
0686         if (!scaled_within(max.freq * min.count,
0687                    min.freq * max.count,
0688                    2, 3)) {
0689             int f;
0690 
0691             pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
0692                    engine->name,
0693                    max.freq * min.count,
0694                    min.freq * max.count);
0695             show_pcu_config(rps);
0696 
0697             for (f = min.freq + 1; f <= rps->max_freq; f++) {
0698                 int act = f;
0699                 u64 count;
0700 
0701                 count = measure_cs_frequency_at(rps, engine, &act);
0702                 if (act < f)
0703                     break;
0704 
0705                 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
0706                     engine->name,
0707                     act, intel_gpu_freq(rps, act), count,
0708                     (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
0709                                      act * min.count));
0710 
0711                 f = act; /* may skip ahead [pcu granularity] */
0712             }
0713 
0714             err = -EINTR; /* ignore error, continue on with test */
0715         }
0716 
0717 err_vma:
0718         *cancel = MI_BATCH_BUFFER_END;
0719         i915_gem_object_flush_map(vma->obj);
0720         i915_gem_object_unpin_map(vma->obj);
0721         i915_vma_unpin(vma);
0722         i915_vma_unlock(vma);
0723         i915_vma_put(vma);
0724 
0725         st_engine_heartbeat_enable(engine);
0726         if (igt_flush_test(gt->i915))
0727             err = -EIO;
0728         if (err)
0729             break;
0730     }
0731 
0732     intel_gt_pm_wait_for_idle(gt);
0733     rps->work.func = saved_work;
0734 
0735     if (CPU_LATENCY >= 0)
0736         cpu_latency_qos_remove_request(&qos);
0737 
0738     return err;
0739 }
0740 
0741 int live_rps_frequency_srm(void *arg)
0742 {
0743     void (*saved_work)(struct work_struct *wrk);
0744     struct intel_gt *gt = arg;
0745     struct intel_rps *rps = &gt->rps;
0746     struct intel_engine_cs *engine;
0747     struct pm_qos_request qos;
0748     enum intel_engine_id id;
0749     int err = 0;
0750 
0751     /*
0752      * The premise is that the GPU does change frequency at our behest.
0753      * Let's check there is a correspondence between the requested
0754      * frequency, the actual frequency, and the observed clock rate.
0755      */
0756 
0757     if (!intel_rps_is_enabled(rps))
0758         return 0;
0759 
0760     if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */
0761         return 0;
0762 
0763     if (CPU_LATENCY >= 0)
0764         cpu_latency_qos_add_request(&qos, CPU_LATENCY);
0765 
0766     intel_gt_pm_wait_for_idle(gt);
0767     saved_work = rps->work.func;
0768     rps->work.func = dummy_rps_work;
0769 
0770     for_each_engine(engine, gt, id) {
0771         struct i915_request *rq;
0772         struct i915_vma *vma;
0773         u32 *cancel, *cntr;
0774         struct {
0775             u64 count;
0776             int freq;
0777         } min, max;
0778 
0779         st_engine_heartbeat_disable(engine);
0780 
0781         vma = create_spin_counter(engine,
0782                       engine->kernel_context->vm, true,
0783                       &cancel, &cntr);
0784         if (IS_ERR(vma)) {
0785             err = PTR_ERR(vma);
0786             st_engine_heartbeat_enable(engine);
0787             break;
0788         }
0789 
0790         rq = intel_engine_create_kernel_request(engine);
0791         if (IS_ERR(rq)) {
0792             err = PTR_ERR(rq);
0793             goto err_vma;
0794         }
0795 
0796         err = i915_request_await_object(rq, vma->obj, false);
0797         if (!err)
0798             err = i915_vma_move_to_active(vma, rq, 0);
0799         if (!err)
0800             err = rq->engine->emit_bb_start(rq,
0801                             vma->node.start,
0802                             PAGE_SIZE, 0);
0803         i915_request_add(rq);
0804         if (err)
0805             goto err_vma;
0806 
0807         if (wait_for(READ_ONCE(*cntr), 10)) {
0808             pr_err("%s: timed loop did not start\n",
0809                    engine->name);
0810             goto err_vma;
0811         }
0812 
0813         min.freq = rps->min_freq;
0814         min.count = measure_frequency_at(rps, cntr, &min.freq);
0815 
0816         max.freq = rps->max_freq;
0817         max.count = measure_frequency_at(rps, cntr, &max.freq);
0818 
0819         pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
0820             engine->name,
0821             min.count, intel_gpu_freq(rps, min.freq),
0822             max.count, intel_gpu_freq(rps, max.freq),
0823             (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
0824                              max.freq * min.count));
0825 
0826         if (!scaled_within(max.freq * min.count,
0827                    min.freq * max.count,
0828                    1, 2)) {
0829             int f;
0830 
0831             pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
0832                    engine->name,
0833                    max.freq * min.count,
0834                    min.freq * max.count);
0835             show_pcu_config(rps);
0836 
0837             for (f = min.freq + 1; f <= rps->max_freq; f++) {
0838                 int act = f;
0839                 u64 count;
0840 
0841                 count = measure_frequency_at(rps, cntr, &act);
0842                 if (act < f)
0843                     break;
0844 
0845                 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
0846                     engine->name,
0847                     act, intel_gpu_freq(rps, act), count,
0848                     (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
0849                                      act * min.count));
0850 
0851                 f = act; /* may skip ahead [pcu granularity] */
0852             }
0853 
0854             err = -EINTR; /* ignore error, continue on with test */
0855         }
0856 
0857 err_vma:
0858         *cancel = MI_BATCH_BUFFER_END;
0859         i915_gem_object_flush_map(vma->obj);
0860         i915_gem_object_unpin_map(vma->obj);
0861         i915_vma_unpin(vma);
0862         i915_vma_unlock(vma);
0863         i915_vma_put(vma);
0864 
0865         st_engine_heartbeat_enable(engine);
0866         if (igt_flush_test(gt->i915))
0867             err = -EIO;
0868         if (err)
0869             break;
0870     }
0871 
0872     intel_gt_pm_wait_for_idle(gt);
0873     rps->work.func = saved_work;
0874 
0875     if (CPU_LATENCY >= 0)
0876         cpu_latency_qos_remove_request(&qos);
0877 
0878     return err;
0879 }
0880 
0881 static void sleep_for_ei(struct intel_rps *rps, int timeout_us)
0882 {
0883     /* Flush any previous EI */
0884     usleep_range(timeout_us, 2 * timeout_us);
0885 
0886     /* Reset the interrupt status */
0887     rps_disable_interrupts(rps);
0888     GEM_BUG_ON(rps->pm_iir);
0889     rps_enable_interrupts(rps);
0890 
0891     /* And then wait for the timeout, for real this time */
0892     usleep_range(2 * timeout_us, 3 * timeout_us);
0893 }
0894 
0895 static int __rps_up_interrupt(struct intel_rps *rps,
0896                   struct intel_engine_cs *engine,
0897                   struct igt_spinner *spin)
0898 {
0899     struct intel_uncore *uncore = engine->uncore;
0900     struct i915_request *rq;
0901     u32 timeout;
0902 
0903     if (!intel_engine_can_store_dword(engine))
0904         return 0;
0905 
0906     rps_set_check(rps, rps->min_freq);
0907 
0908     rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP);
0909     if (IS_ERR(rq))
0910         return PTR_ERR(rq);
0911 
0912     i915_request_get(rq);
0913     i915_request_add(rq);
0914 
0915     if (!igt_wait_for_spinner(spin, rq)) {
0916         pr_err("%s: RPS spinner did not start\n",
0917                engine->name);
0918         i915_request_put(rq);
0919         intel_gt_set_wedged(engine->gt);
0920         return -EIO;
0921     }
0922 
0923     if (!intel_rps_is_active(rps)) {
0924         pr_err("%s: RPS not enabled on starting spinner\n",
0925                engine->name);
0926         igt_spinner_end(spin);
0927         i915_request_put(rq);
0928         return -EINVAL;
0929     }
0930 
0931     if (!(rps->pm_events & GEN6_PM_RP_UP_THRESHOLD)) {
0932         pr_err("%s: RPS did not register UP interrupt\n",
0933                engine->name);
0934         i915_request_put(rq);
0935         return -EINVAL;
0936     }
0937 
0938     if (rps->last_freq != rps->min_freq) {
0939         pr_err("%s: RPS did not program min frequency\n",
0940                engine->name);
0941         i915_request_put(rq);
0942         return -EINVAL;
0943     }
0944 
0945     timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI);
0946     timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
0947     timeout = DIV_ROUND_UP(timeout, 1000);
0948 
0949     sleep_for_ei(rps, timeout);
0950     GEM_BUG_ON(i915_request_completed(rq));
0951 
0952     igt_spinner_end(spin);
0953     i915_request_put(rq);
0954 
0955     if (rps->cur_freq != rps->min_freq) {
0956         pr_err("%s: Frequency unexpectedly changed [up], now %d!\n",
0957                engine->name, intel_rps_read_actual_frequency(rps));
0958         return -EINVAL;
0959     }
0960 
0961     if (!(rps->pm_iir & GEN6_PM_RP_UP_THRESHOLD)) {
0962         pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n",
0963                engine->name, rps->pm_iir,
0964                intel_uncore_read(uncore, GEN6_RP_PREV_UP),
0965                intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
0966                intel_uncore_read(uncore, GEN6_RP_UP_EI));
0967         return -EINVAL;
0968     }
0969 
0970     return 0;
0971 }
0972 
0973 static int __rps_down_interrupt(struct intel_rps *rps,
0974                 struct intel_engine_cs *engine)
0975 {
0976     struct intel_uncore *uncore = engine->uncore;
0977     u32 timeout;
0978 
0979     rps_set_check(rps, rps->max_freq);
0980 
0981     if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) {
0982         pr_err("%s: RPS did not register DOWN interrupt\n",
0983                engine->name);
0984         return -EINVAL;
0985     }
0986 
0987     if (rps->last_freq != rps->max_freq) {
0988         pr_err("%s: RPS did not program max frequency\n",
0989                engine->name);
0990         return -EINVAL;
0991     }
0992 
0993     timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
0994     timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
0995     timeout = DIV_ROUND_UP(timeout, 1000);
0996 
0997     sleep_for_ei(rps, timeout);
0998 
0999     if (rps->cur_freq != rps->max_freq) {
1000         pr_err("%s: Frequency unexpectedly changed [down], now %d!\n",
1001                engine->name,
1002                intel_rps_read_actual_frequency(rps));
1003         return -EINVAL;
1004     }
1005 
1006     if (!(rps->pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT))) {
1007         pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n",
1008                engine->name, rps->pm_iir,
1009                intel_uncore_read(uncore, GEN6_RP_PREV_DOWN),
1010                intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD),
1011                intel_uncore_read(uncore, GEN6_RP_DOWN_EI),
1012                intel_uncore_read(uncore, GEN6_RP_PREV_UP),
1013                intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
1014                intel_uncore_read(uncore, GEN6_RP_UP_EI));
1015         return -EINVAL;
1016     }
1017 
1018     return 0;
1019 }
1020 
1021 int live_rps_interrupt(void *arg)
1022 {
1023     struct intel_gt *gt = arg;
1024     struct intel_rps *rps = &gt->rps;
1025     void (*saved_work)(struct work_struct *wrk);
1026     struct intel_engine_cs *engine;
1027     enum intel_engine_id id;
1028     struct igt_spinner spin;
1029     u32 pm_events;
1030     int err = 0;
1031 
1032     /*
1033      * First, let's check whether or not we are receiving interrupts.
1034      */
1035 
1036     if (!intel_rps_has_interrupts(rps) || GRAPHICS_VER(gt->i915) < 6)
1037         return 0;
1038 
1039     intel_gt_pm_get(gt);
1040     pm_events = rps->pm_events;
1041     intel_gt_pm_put(gt);
1042     if (!pm_events) {
1043         pr_err("No RPS PM events registered, but RPS is enabled?\n");
1044         return -ENODEV;
1045     }
1046 
1047     if (igt_spinner_init(&spin, gt))
1048         return -ENOMEM;
1049 
1050     intel_gt_pm_wait_for_idle(gt);
1051     saved_work = rps->work.func;
1052     rps->work.func = dummy_rps_work;
1053 
1054     for_each_engine(engine, gt, id) {
1055         /* Keep the engine busy with a spinner; expect an UP! */
1056         if (pm_events & GEN6_PM_RP_UP_THRESHOLD) {
1057             intel_gt_pm_wait_for_idle(engine->gt);
1058             GEM_BUG_ON(intel_rps_is_active(rps));
1059 
1060             st_engine_heartbeat_disable(engine);
1061 
1062             err = __rps_up_interrupt(rps, engine, &spin);
1063 
1064             st_engine_heartbeat_enable(engine);
1065             if (err)
1066                 goto out;
1067 
1068             intel_gt_pm_wait_for_idle(engine->gt);
1069         }
1070 
1071         /* Keep the engine awake but idle and check for DOWN */
1072         if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) {
1073             st_engine_heartbeat_disable(engine);
1074             intel_rc6_disable(&gt->rc6);
1075 
1076             err = __rps_down_interrupt(rps, engine);
1077 
1078             intel_rc6_enable(&gt->rc6);
1079             st_engine_heartbeat_enable(engine);
1080             if (err)
1081                 goto out;
1082         }
1083     }
1084 
1085 out:
1086     if (igt_flush_test(gt->i915))
1087         err = -EIO;
1088 
1089     igt_spinner_fini(&spin);
1090 
1091     intel_gt_pm_wait_for_idle(gt);
1092     rps->work.func = saved_work;
1093 
1094     return err;
1095 }
1096 
1097 static u64 __measure_power(int duration_ms)
1098 {
1099     u64 dE, dt;
1100 
1101     dt = ktime_get();
1102     dE = librapl_energy_uJ();
1103     usleep_range(1000 * duration_ms, 2000 * duration_ms);
1104     dE = librapl_energy_uJ() - dE;
1105     dt = ktime_get() - dt;
1106 
1107     return div64_u64(1000 * 1000 * dE, dt);
1108 }
1109 
1110 static u64 measure_power_at(struct intel_rps *rps, int *freq)
1111 {
1112     u64 x[5];
1113     int i;
1114 
1115     *freq = rps_set_check(rps, *freq);
1116     for (i = 0; i < 5; i++)
1117         x[i] = __measure_power(5);
1118     *freq = (*freq + read_cagf(rps)) / 2;
1119 
1120     /* A simple triangle filter for better result stability */
1121     sort(x, 5, sizeof(*x), cmp_u64, NULL);
1122     return div_u64(x[1] + 2 * x[2] + x[3], 4);
1123 }
1124 
1125 int live_rps_power(void *arg)
1126 {
1127     struct intel_gt *gt = arg;
1128     struct intel_rps *rps = &gt->rps;
1129     void (*saved_work)(struct work_struct *wrk);
1130     struct intel_engine_cs *engine;
1131     enum intel_engine_id id;
1132     struct igt_spinner spin;
1133     int err = 0;
1134 
1135     /*
1136      * Our fundamental assumption is that running at lower frequency
1137      * actually saves power. Let's see if our RAPL measurement support
1138      * that theory.
1139      */
1140 
1141     if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
1142         return 0;
1143 
1144     if (!librapl_supported(gt->i915))
1145         return 0;
1146 
1147     if (igt_spinner_init(&spin, gt))
1148         return -ENOMEM;
1149 
1150     intel_gt_pm_wait_for_idle(gt);
1151     saved_work = rps->work.func;
1152     rps->work.func = dummy_rps_work;
1153 
1154     for_each_engine(engine, gt, id) {
1155         struct i915_request *rq;
1156         struct {
1157             u64 power;
1158             int freq;
1159         } min, max;
1160 
1161         if (!intel_engine_can_store_dword(engine))
1162             continue;
1163 
1164         st_engine_heartbeat_disable(engine);
1165 
1166         rq = igt_spinner_create_request(&spin,
1167                         engine->kernel_context,
1168                         MI_NOOP);
1169         if (IS_ERR(rq)) {
1170             st_engine_heartbeat_enable(engine);
1171             err = PTR_ERR(rq);
1172             break;
1173         }
1174 
1175         i915_request_add(rq);
1176 
1177         if (!igt_wait_for_spinner(&spin, rq)) {
1178             pr_err("%s: RPS spinner did not start\n",
1179                    engine->name);
1180             igt_spinner_end(&spin);
1181             st_engine_heartbeat_enable(engine);
1182             intel_gt_set_wedged(engine->gt);
1183             err = -EIO;
1184             break;
1185         }
1186 
1187         max.freq = rps->max_freq;
1188         max.power = measure_power_at(rps, &max.freq);
1189 
1190         min.freq = rps->min_freq;
1191         min.power = measure_power_at(rps, &min.freq);
1192 
1193         igt_spinner_end(&spin);
1194         st_engine_heartbeat_enable(engine);
1195 
1196         pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
1197             engine->name,
1198             min.power, intel_gpu_freq(rps, min.freq),
1199             max.power, intel_gpu_freq(rps, max.freq));
1200 
1201         if (10 * min.freq >= 9 * max.freq) {
1202             pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n",
1203                   min.freq, intel_gpu_freq(rps, min.freq),
1204                   max.freq, intel_gpu_freq(rps, max.freq));
1205             continue;
1206         }
1207 
1208         if (11 * min.power > 10 * max.power) {
1209             pr_err("%s: did not conserve power when setting lower frequency!\n",
1210                    engine->name);
1211             err = -EINVAL;
1212             break;
1213         }
1214 
1215         if (igt_flush_test(gt->i915)) {
1216             err = -EIO;
1217             break;
1218         }
1219     }
1220 
1221     igt_spinner_fini(&spin);
1222 
1223     intel_gt_pm_wait_for_idle(gt);
1224     rps->work.func = saved_work;
1225 
1226     return err;
1227 }
1228 
1229 int live_rps_dynamic(void *arg)
1230 {
1231     struct intel_gt *gt = arg;
1232     struct intel_rps *rps = &gt->rps;
1233     struct intel_engine_cs *engine;
1234     enum intel_engine_id id;
1235     struct igt_spinner spin;
1236     int err = 0;
1237 
1238     /*
1239      * We've looked at the bascs, and have established that we
1240      * can change the clock frequency and that the HW will generate
1241      * interrupts based on load. Now we check how we integrate those
1242      * moving parts into dynamic reclocking based on load.
1243      */
1244 
1245     if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
1246         return 0;
1247 
1248     if (igt_spinner_init(&spin, gt))
1249         return -ENOMEM;
1250 
1251     if (intel_rps_has_interrupts(rps))
1252         pr_info("RPS has interrupt support\n");
1253     if (intel_rps_uses_timer(rps))
1254         pr_info("RPS has timer support\n");
1255 
1256     for_each_engine(engine, gt, id) {
1257         struct i915_request *rq;
1258         struct {
1259             ktime_t dt;
1260             u8 freq;
1261         } min, max;
1262 
1263         if (!intel_engine_can_store_dword(engine))
1264             continue;
1265 
1266         intel_gt_pm_wait_for_idle(gt);
1267         GEM_BUG_ON(intel_rps_is_active(rps));
1268         rps->cur_freq = rps->min_freq;
1269 
1270         intel_engine_pm_get(engine);
1271         intel_rc6_disable(&gt->rc6);
1272         GEM_BUG_ON(rps->last_freq != rps->min_freq);
1273 
1274         rq = igt_spinner_create_request(&spin,
1275                         engine->kernel_context,
1276                         MI_NOOP);
1277         if (IS_ERR(rq)) {
1278             err = PTR_ERR(rq);
1279             goto err;
1280         }
1281 
1282         i915_request_add(rq);
1283 
1284         max.dt = ktime_get();
1285         max.freq = wait_for_freq(rps, rps->max_freq, 500);
1286         max.dt = ktime_sub(ktime_get(), max.dt);
1287 
1288         igt_spinner_end(&spin);
1289 
1290         min.dt = ktime_get();
1291         min.freq = wait_for_freq(rps, rps->min_freq, 2000);
1292         min.dt = ktime_sub(ktime_get(), min.dt);
1293 
1294         pr_info("%s: dynamically reclocked to %u:%uMHz while busy in %lluns, and %u:%uMHz while idle in %lluns\n",
1295             engine->name,
1296             max.freq, intel_gpu_freq(rps, max.freq),
1297             ktime_to_ns(max.dt),
1298             min.freq, intel_gpu_freq(rps, min.freq),
1299             ktime_to_ns(min.dt));
1300         if (min.freq >= max.freq) {
1301             pr_err("%s: dynamic reclocking of spinner failed\n!",
1302                    engine->name);
1303             err = -EINVAL;
1304         }
1305 
1306 err:
1307         intel_rc6_enable(&gt->rc6);
1308         intel_engine_pm_put(engine);
1309 
1310         if (igt_flush_test(gt->i915))
1311             err = -EIO;
1312         if (err)
1313             break;
1314     }
1315 
1316     igt_spinner_fini(&spin);
1317 
1318     return err;
1319 }