0001
0002
0003
0004
0005
0006 #include <linux/pm_qos.h>
0007 #include <linux/sort.h>
0008
0009 #include "gem/i915_gem_internal.h"
0010
0011 #include "intel_engine_heartbeat.h"
0012 #include "intel_engine_pm.h"
0013 #include "intel_engine_regs.h"
0014 #include "intel_gpu_commands.h"
0015 #include "intel_gt_clock_utils.h"
0016 #include "intel_gt_pm.h"
0017 #include "intel_rc6.h"
0018 #include "selftest_engine_heartbeat.h"
0019 #include "selftest_rps.h"
0020 #include "selftests/igt_flush_test.h"
0021 #include "selftests/igt_spinner.h"
0022 #include "selftests/librapl.h"
0023
0024
0025 #define CPU_LATENCY 0
0026
0027 static void dummy_rps_work(struct work_struct *wrk)
0028 {
0029 }
0030
0031 static int cmp_u64(const void *A, const void *B)
0032 {
0033 const u64 *a = A, *b = B;
0034
0035 if (*a < *b)
0036 return -1;
0037 else if (*a > *b)
0038 return 1;
0039 else
0040 return 0;
0041 }
0042
0043 static int cmp_u32(const void *A, const void *B)
0044 {
0045 const u32 *a = A, *b = B;
0046
0047 if (*a < *b)
0048 return -1;
0049 else if (*a > *b)
0050 return 1;
0051 else
0052 return 0;
0053 }
0054
0055 static struct i915_vma *
0056 create_spin_counter(struct intel_engine_cs *engine,
0057 struct i915_address_space *vm,
0058 bool srm,
0059 u32 **cancel,
0060 u32 **counter)
0061 {
0062 enum {
0063 COUNT,
0064 INC,
0065 __NGPR__,
0066 };
0067 #define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
0068 struct drm_i915_gem_object *obj;
0069 struct i915_vma *vma;
0070 unsigned long end;
0071 u32 *base, *cs;
0072 int loop, i;
0073 int err;
0074
0075 obj = i915_gem_object_create_internal(vm->i915, 64 << 10);
0076 if (IS_ERR(obj))
0077 return ERR_CAST(obj);
0078
0079 end = obj->base.size / sizeof(u32) - 1;
0080
0081 vma = i915_vma_instance(obj, vm, NULL);
0082 if (IS_ERR(vma)) {
0083 err = PTR_ERR(vma);
0084 goto err_put;
0085 }
0086
0087 err = i915_vma_pin(vma, 0, 0, PIN_USER);
0088 if (err)
0089 goto err_unlock;
0090
0091 i915_vma_lock(vma);
0092
0093 base = i915_gem_object_pin_map(obj, I915_MAP_WC);
0094 if (IS_ERR(base)) {
0095 err = PTR_ERR(base);
0096 goto err_unpin;
0097 }
0098 cs = base;
0099
0100 *cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2);
0101 for (i = 0; i < __NGPR__; i++) {
0102 *cs++ = i915_mmio_reg_offset(CS_GPR(i));
0103 *cs++ = 0;
0104 *cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4;
0105 *cs++ = 0;
0106 }
0107
0108 *cs++ = MI_LOAD_REGISTER_IMM(1);
0109 *cs++ = i915_mmio_reg_offset(CS_GPR(INC));
0110 *cs++ = 1;
0111
0112 loop = cs - base;
0113
0114
0115 for (i = 0; i < 1024; i++) {
0116 *cs++ = MI_MATH(4);
0117 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
0118 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
0119 *cs++ = MI_MATH_ADD;
0120 *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
0121
0122 if (srm) {
0123 *cs++ = MI_STORE_REGISTER_MEM_GEN8;
0124 *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
0125 *cs++ = lower_32_bits(vma->node.start + end * sizeof(*cs));
0126 *cs++ = upper_32_bits(vma->node.start + end * sizeof(*cs));
0127 }
0128 }
0129
0130 *cs++ = MI_BATCH_BUFFER_START_GEN8;
0131 *cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs));
0132 *cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs));
0133 GEM_BUG_ON(cs - base > end);
0134
0135 i915_gem_object_flush_map(obj);
0136
0137 *cancel = base + loop;
0138 *counter = srm ? memset32(base + end, 0, 1) : NULL;
0139 return vma;
0140
0141 err_unpin:
0142 i915_vma_unpin(vma);
0143 err_unlock:
0144 i915_vma_unlock(vma);
0145 err_put:
0146 i915_gem_object_put(obj);
0147 return ERR_PTR(err);
0148 }
0149
0150 static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms)
0151 {
0152 u8 history[64], i;
0153 unsigned long end;
0154 int sleep;
0155
0156 i = 0;
0157 memset(history, freq, sizeof(history));
0158 sleep = 20;
0159
0160
0161 end = jiffies + msecs_to_jiffies(timeout_ms);
0162 do {
0163 u8 act;
0164
0165 act = read_cagf(rps);
0166 if (time_after(jiffies, end))
0167 return act;
0168
0169
0170 if (act == freq)
0171 return act;
0172
0173
0174 if (!memchr_inv(history, act, sizeof(history)))
0175 return act;
0176
0177 history[i] = act;
0178 i = (i + 1) % ARRAY_SIZE(history);
0179
0180 usleep_range(sleep, 2 * sleep);
0181 sleep *= 2;
0182 if (sleep > timeout_ms * 20)
0183 sleep = timeout_ms * 20;
0184 } while (1);
0185 }
0186
0187 static u8 rps_set_check(struct intel_rps *rps, u8 freq)
0188 {
0189 mutex_lock(&rps->lock);
0190 GEM_BUG_ON(!intel_rps_is_active(rps));
0191 if (wait_for(!intel_rps_set(rps, freq), 50)) {
0192 mutex_unlock(&rps->lock);
0193 return 0;
0194 }
0195 GEM_BUG_ON(rps->last_freq != freq);
0196 mutex_unlock(&rps->lock);
0197
0198 return wait_for_freq(rps, freq, 50);
0199 }
0200
0201 static void show_pstate_limits(struct intel_rps *rps)
0202 {
0203 struct drm_i915_private *i915 = rps_to_i915(rps);
0204
0205 if (IS_BROXTON(i915)) {
0206 pr_info("P_STATE_CAP[%x]: 0x%08x\n",
0207 i915_mmio_reg_offset(BXT_RP_STATE_CAP),
0208 intel_uncore_read(rps_to_uncore(rps),
0209 BXT_RP_STATE_CAP));
0210 } else if (GRAPHICS_VER(i915) == 9) {
0211 pr_info("P_STATE_LIMITS[%x]: 0x%08x\n",
0212 i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS),
0213 intel_uncore_read(rps_to_uncore(rps),
0214 GEN9_RP_STATE_LIMITS));
0215 }
0216 }
0217
0218 int live_rps_clock_interval(void *arg)
0219 {
0220 struct intel_gt *gt = arg;
0221 struct intel_rps *rps = >->rps;
0222 void (*saved_work)(struct work_struct *wrk);
0223 struct intel_engine_cs *engine;
0224 enum intel_engine_id id;
0225 struct igt_spinner spin;
0226 int err = 0;
0227
0228 if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
0229 return 0;
0230
0231 if (igt_spinner_init(&spin, gt))
0232 return -ENOMEM;
0233
0234 intel_gt_pm_wait_for_idle(gt);
0235 saved_work = rps->work.func;
0236 rps->work.func = dummy_rps_work;
0237
0238 intel_gt_pm_get(gt);
0239 intel_rps_disable(>->rps);
0240
0241 intel_gt_check_clock_frequency(gt);
0242
0243 for_each_engine(engine, gt, id) {
0244 struct i915_request *rq;
0245 u32 cycles;
0246 u64 dt;
0247
0248 if (!intel_engine_can_store_dword(engine))
0249 continue;
0250
0251 st_engine_heartbeat_disable(engine);
0252
0253 rq = igt_spinner_create_request(&spin,
0254 engine->kernel_context,
0255 MI_NOOP);
0256 if (IS_ERR(rq)) {
0257 st_engine_heartbeat_enable(engine);
0258 err = PTR_ERR(rq);
0259 break;
0260 }
0261
0262 i915_request_add(rq);
0263
0264 if (!igt_wait_for_spinner(&spin, rq)) {
0265 pr_err("%s: RPS spinner did not start\n",
0266 engine->name);
0267 igt_spinner_end(&spin);
0268 st_engine_heartbeat_enable(engine);
0269 intel_gt_set_wedged(engine->gt);
0270 err = -EIO;
0271 break;
0272 }
0273
0274 intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
0275
0276 intel_uncore_write_fw(gt->uncore, GEN6_RP_CUR_UP_EI, 0);
0277
0278
0279 intel_uncore_write_fw(gt->uncore,
0280 GEN6_RP_UP_EI, 0xffffffff);
0281 intel_uncore_write_fw(gt->uncore,
0282 GEN6_RP_UP_THRESHOLD, 0xffffffff);
0283
0284 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL,
0285 GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG);
0286
0287 if (wait_for(intel_uncore_read_fw(gt->uncore,
0288 GEN6_RP_CUR_UP_EI),
0289 10)) {
0290
0291 pr_notice("%s: rps evaluation interval not ticking\n",
0292 engine->name);
0293 err = -ENODEV;
0294 } else {
0295 ktime_t dt_[5];
0296 u32 cycles_[5];
0297 int i;
0298
0299 for (i = 0; i < 5; i++) {
0300 preempt_disable();
0301
0302 dt_[i] = ktime_get();
0303 cycles_[i] = -intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
0304
0305 udelay(1000);
0306
0307 dt_[i] = ktime_sub(ktime_get(), dt_[i]);
0308 cycles_[i] += intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
0309
0310 preempt_enable();
0311 }
0312
0313
0314 sort(cycles_, 5, sizeof(*cycles_), cmp_u32, NULL);
0315 cycles = (cycles_[1] + 2 * cycles_[2] + cycles_[3]) / 4;
0316 sort(dt_, 5, sizeof(*dt_), cmp_u64, NULL);
0317 dt = div_u64(dt_[1] + 2 * dt_[2] + dt_[3], 4);
0318 }
0319
0320 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 0);
0321 intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
0322
0323 igt_spinner_end(&spin);
0324 st_engine_heartbeat_enable(engine);
0325
0326 if (err == 0) {
0327 u64 time = intel_gt_pm_interval_to_ns(gt, cycles);
0328 u32 expected =
0329 intel_gt_ns_to_pm_interval(gt, dt);
0330
0331 pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n",
0332 engine->name, cycles, time, dt, expected,
0333 gt->clock_frequency / 1000);
0334
0335 if (10 * time < 8 * dt ||
0336 8 * time > 10 * dt) {
0337 pr_err("%s: rps clock time does not match walltime!\n",
0338 engine->name);
0339 err = -EINVAL;
0340 }
0341
0342 if (10 * expected < 8 * cycles ||
0343 8 * expected > 10 * cycles) {
0344 pr_err("%s: walltime does not match rps clock ticks!\n",
0345 engine->name);
0346 err = -EINVAL;
0347 }
0348 }
0349
0350 if (igt_flush_test(gt->i915))
0351 err = -EIO;
0352
0353 break;
0354 }
0355
0356 intel_rps_enable(>->rps);
0357 intel_gt_pm_put(gt);
0358
0359 igt_spinner_fini(&spin);
0360
0361 intel_gt_pm_wait_for_idle(gt);
0362 rps->work.func = saved_work;
0363
0364 if (err == -ENODEV)
0365 err = 0;
0366
0367 return err;
0368 }
0369
0370 int live_rps_control(void *arg)
0371 {
0372 struct intel_gt *gt = arg;
0373 struct intel_rps *rps = >->rps;
0374 void (*saved_work)(struct work_struct *wrk);
0375 struct intel_engine_cs *engine;
0376 enum intel_engine_id id;
0377 struct igt_spinner spin;
0378 int err = 0;
0379
0380
0381
0382
0383
0384
0385
0386
0387 if (!intel_rps_is_enabled(rps))
0388 return 0;
0389
0390 if (IS_CHERRYVIEW(gt->i915))
0391 return 0;
0392
0393 if (igt_spinner_init(&spin, gt))
0394 return -ENOMEM;
0395
0396 intel_gt_pm_wait_for_idle(gt);
0397 saved_work = rps->work.func;
0398 rps->work.func = dummy_rps_work;
0399
0400 intel_gt_pm_get(gt);
0401 for_each_engine(engine, gt, id) {
0402 struct i915_request *rq;
0403 ktime_t min_dt, max_dt;
0404 int f, limit;
0405 int min, max;
0406
0407 if (!intel_engine_can_store_dword(engine))
0408 continue;
0409
0410 st_engine_heartbeat_disable(engine);
0411
0412 rq = igt_spinner_create_request(&spin,
0413 engine->kernel_context,
0414 MI_NOOP);
0415 if (IS_ERR(rq)) {
0416 err = PTR_ERR(rq);
0417 break;
0418 }
0419
0420 i915_request_add(rq);
0421
0422 if (!igt_wait_for_spinner(&spin, rq)) {
0423 pr_err("%s: RPS spinner did not start\n",
0424 engine->name);
0425 igt_spinner_end(&spin);
0426 st_engine_heartbeat_enable(engine);
0427 intel_gt_set_wedged(engine->gt);
0428 err = -EIO;
0429 break;
0430 }
0431
0432 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
0433 pr_err("%s: could not set minimum frequency [%x], only %x!\n",
0434 engine->name, rps->min_freq, read_cagf(rps));
0435 igt_spinner_end(&spin);
0436 st_engine_heartbeat_enable(engine);
0437 show_pstate_limits(rps);
0438 err = -EINVAL;
0439 break;
0440 }
0441
0442 for (f = rps->min_freq + 1; f < rps->max_freq; f++) {
0443 if (rps_set_check(rps, f) < f)
0444 break;
0445 }
0446
0447 limit = rps_set_check(rps, f);
0448
0449 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
0450 pr_err("%s: could not restore minimum frequency [%x], only %x!\n",
0451 engine->name, rps->min_freq, read_cagf(rps));
0452 igt_spinner_end(&spin);
0453 st_engine_heartbeat_enable(engine);
0454 show_pstate_limits(rps);
0455 err = -EINVAL;
0456 break;
0457 }
0458
0459 max_dt = ktime_get();
0460 max = rps_set_check(rps, limit);
0461 max_dt = ktime_sub(ktime_get(), max_dt);
0462
0463 min_dt = ktime_get();
0464 min = rps_set_check(rps, rps->min_freq);
0465 min_dt = ktime_sub(ktime_get(), min_dt);
0466
0467 igt_spinner_end(&spin);
0468 st_engine_heartbeat_enable(engine);
0469
0470 pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n",
0471 engine->name,
0472 rps->min_freq, intel_gpu_freq(rps, rps->min_freq),
0473 rps->max_freq, intel_gpu_freq(rps, rps->max_freq),
0474 limit, intel_gpu_freq(rps, limit),
0475 min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt));
0476
0477 if (limit == rps->min_freq) {
0478 pr_err("%s: GPU throttled to minimum!\n",
0479 engine->name);
0480 show_pstate_limits(rps);
0481 err = -ENODEV;
0482 break;
0483 }
0484
0485 if (igt_flush_test(gt->i915)) {
0486 err = -EIO;
0487 break;
0488 }
0489 }
0490 intel_gt_pm_put(gt);
0491
0492 igt_spinner_fini(&spin);
0493
0494 intel_gt_pm_wait_for_idle(gt);
0495 rps->work.func = saved_work;
0496
0497 return err;
0498 }
0499
0500 static void show_pcu_config(struct intel_rps *rps)
0501 {
0502 struct drm_i915_private *i915 = rps_to_i915(rps);
0503 unsigned int max_gpu_freq, min_gpu_freq;
0504 intel_wakeref_t wakeref;
0505 int gpu_freq;
0506
0507 if (!HAS_LLC(i915))
0508 return;
0509
0510 min_gpu_freq = rps->min_freq;
0511 max_gpu_freq = rps->max_freq;
0512 if (GRAPHICS_VER(i915) >= 9) {
0513
0514 min_gpu_freq /= GEN9_FREQ_SCALER;
0515 max_gpu_freq /= GEN9_FREQ_SCALER;
0516 }
0517
0518 wakeref = intel_runtime_pm_get(rps_to_uncore(rps)->rpm);
0519
0520 pr_info("%5s %5s %5s\n", "GPU", "eCPU", "eRing");
0521 for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
0522 int ia_freq = gpu_freq;
0523
0524 snb_pcode_read(rps_to_gt(rps)->uncore, GEN6_PCODE_READ_MIN_FREQ_TABLE,
0525 &ia_freq, NULL);
0526
0527 pr_info("%5d %5d %5d\n",
0528 gpu_freq * 50,
0529 ((ia_freq >> 0) & 0xff) * 100,
0530 ((ia_freq >> 8) & 0xff) * 100);
0531 }
0532
0533 intel_runtime_pm_put(rps_to_uncore(rps)->rpm, wakeref);
0534 }
0535
0536 static u64 __measure_frequency(u32 *cntr, int duration_ms)
0537 {
0538 u64 dc, dt;
0539
0540 dt = ktime_get();
0541 dc = READ_ONCE(*cntr);
0542 usleep_range(1000 * duration_ms, 2000 * duration_ms);
0543 dc = READ_ONCE(*cntr) - dc;
0544 dt = ktime_get() - dt;
0545
0546 return div64_u64(1000 * 1000 * dc, dt);
0547 }
0548
0549 static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
0550 {
0551 u64 x[5];
0552 int i;
0553
0554 *freq = rps_set_check(rps, *freq);
0555 for (i = 0; i < 5; i++)
0556 x[i] = __measure_frequency(cntr, 2);
0557 *freq = (*freq + read_cagf(rps)) / 2;
0558
0559
0560 sort(x, 5, sizeof(*x), cmp_u64, NULL);
0561 return div_u64(x[1] + 2 * x[2] + x[3], 4);
0562 }
0563
0564 static u64 __measure_cs_frequency(struct intel_engine_cs *engine,
0565 int duration_ms)
0566 {
0567 u64 dc, dt;
0568
0569 dt = ktime_get();
0570 dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0));
0571 usleep_range(1000 * duration_ms, 2000 * duration_ms);
0572 dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc;
0573 dt = ktime_get() - dt;
0574
0575 return div64_u64(1000 * 1000 * dc, dt);
0576 }
0577
0578 static u64 measure_cs_frequency_at(struct intel_rps *rps,
0579 struct intel_engine_cs *engine,
0580 int *freq)
0581 {
0582 u64 x[5];
0583 int i;
0584
0585 *freq = rps_set_check(rps, *freq);
0586 for (i = 0; i < 5; i++)
0587 x[i] = __measure_cs_frequency(engine, 2);
0588 *freq = (*freq + read_cagf(rps)) / 2;
0589
0590
0591 sort(x, 5, sizeof(*x), cmp_u64, NULL);
0592 return div_u64(x[1] + 2 * x[2] + x[3], 4);
0593 }
0594
0595 static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
0596 {
0597 return f_d * x > f_n * y && f_n * x < f_d * y;
0598 }
0599
0600 int live_rps_frequency_cs(void *arg)
0601 {
0602 void (*saved_work)(struct work_struct *wrk);
0603 struct intel_gt *gt = arg;
0604 struct intel_rps *rps = >->rps;
0605 struct intel_engine_cs *engine;
0606 struct pm_qos_request qos;
0607 enum intel_engine_id id;
0608 int err = 0;
0609
0610
0611
0612
0613
0614
0615
0616 if (!intel_rps_is_enabled(rps))
0617 return 0;
0618
0619 if (GRAPHICS_VER(gt->i915) < 8)
0620 return 0;
0621
0622 if (CPU_LATENCY >= 0)
0623 cpu_latency_qos_add_request(&qos, CPU_LATENCY);
0624
0625 intel_gt_pm_wait_for_idle(gt);
0626 saved_work = rps->work.func;
0627 rps->work.func = dummy_rps_work;
0628
0629 for_each_engine(engine, gt, id) {
0630 struct i915_request *rq;
0631 struct i915_vma *vma;
0632 u32 *cancel, *cntr;
0633 struct {
0634 u64 count;
0635 int freq;
0636 } min, max;
0637
0638 st_engine_heartbeat_disable(engine);
0639
0640 vma = create_spin_counter(engine,
0641 engine->kernel_context->vm, false,
0642 &cancel, &cntr);
0643 if (IS_ERR(vma)) {
0644 err = PTR_ERR(vma);
0645 st_engine_heartbeat_enable(engine);
0646 break;
0647 }
0648
0649 rq = intel_engine_create_kernel_request(engine);
0650 if (IS_ERR(rq)) {
0651 err = PTR_ERR(rq);
0652 goto err_vma;
0653 }
0654
0655 err = i915_request_await_object(rq, vma->obj, false);
0656 if (!err)
0657 err = i915_vma_move_to_active(vma, rq, 0);
0658 if (!err)
0659 err = rq->engine->emit_bb_start(rq,
0660 vma->node.start,
0661 PAGE_SIZE, 0);
0662 i915_request_add(rq);
0663 if (err)
0664 goto err_vma;
0665
0666 if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)),
0667 10)) {
0668 pr_err("%s: timed loop did not start\n",
0669 engine->name);
0670 goto err_vma;
0671 }
0672
0673 min.freq = rps->min_freq;
0674 min.count = measure_cs_frequency_at(rps, engine, &min.freq);
0675
0676 max.freq = rps->max_freq;
0677 max.count = measure_cs_frequency_at(rps, engine, &max.freq);
0678
0679 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
0680 engine->name,
0681 min.count, intel_gpu_freq(rps, min.freq),
0682 max.count, intel_gpu_freq(rps, max.freq),
0683 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
0684 max.freq * min.count));
0685
0686 if (!scaled_within(max.freq * min.count,
0687 min.freq * max.count,
0688 2, 3)) {
0689 int f;
0690
0691 pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
0692 engine->name,
0693 max.freq * min.count,
0694 min.freq * max.count);
0695 show_pcu_config(rps);
0696
0697 for (f = min.freq + 1; f <= rps->max_freq; f++) {
0698 int act = f;
0699 u64 count;
0700
0701 count = measure_cs_frequency_at(rps, engine, &act);
0702 if (act < f)
0703 break;
0704
0705 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
0706 engine->name,
0707 act, intel_gpu_freq(rps, act), count,
0708 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
0709 act * min.count));
0710
0711 f = act;
0712 }
0713
0714 err = -EINTR;
0715 }
0716
0717 err_vma:
0718 *cancel = MI_BATCH_BUFFER_END;
0719 i915_gem_object_flush_map(vma->obj);
0720 i915_gem_object_unpin_map(vma->obj);
0721 i915_vma_unpin(vma);
0722 i915_vma_unlock(vma);
0723 i915_vma_put(vma);
0724
0725 st_engine_heartbeat_enable(engine);
0726 if (igt_flush_test(gt->i915))
0727 err = -EIO;
0728 if (err)
0729 break;
0730 }
0731
0732 intel_gt_pm_wait_for_idle(gt);
0733 rps->work.func = saved_work;
0734
0735 if (CPU_LATENCY >= 0)
0736 cpu_latency_qos_remove_request(&qos);
0737
0738 return err;
0739 }
0740
0741 int live_rps_frequency_srm(void *arg)
0742 {
0743 void (*saved_work)(struct work_struct *wrk);
0744 struct intel_gt *gt = arg;
0745 struct intel_rps *rps = >->rps;
0746 struct intel_engine_cs *engine;
0747 struct pm_qos_request qos;
0748 enum intel_engine_id id;
0749 int err = 0;
0750
0751
0752
0753
0754
0755
0756
0757 if (!intel_rps_is_enabled(rps))
0758 return 0;
0759
0760 if (GRAPHICS_VER(gt->i915) < 8)
0761 return 0;
0762
0763 if (CPU_LATENCY >= 0)
0764 cpu_latency_qos_add_request(&qos, CPU_LATENCY);
0765
0766 intel_gt_pm_wait_for_idle(gt);
0767 saved_work = rps->work.func;
0768 rps->work.func = dummy_rps_work;
0769
0770 for_each_engine(engine, gt, id) {
0771 struct i915_request *rq;
0772 struct i915_vma *vma;
0773 u32 *cancel, *cntr;
0774 struct {
0775 u64 count;
0776 int freq;
0777 } min, max;
0778
0779 st_engine_heartbeat_disable(engine);
0780
0781 vma = create_spin_counter(engine,
0782 engine->kernel_context->vm, true,
0783 &cancel, &cntr);
0784 if (IS_ERR(vma)) {
0785 err = PTR_ERR(vma);
0786 st_engine_heartbeat_enable(engine);
0787 break;
0788 }
0789
0790 rq = intel_engine_create_kernel_request(engine);
0791 if (IS_ERR(rq)) {
0792 err = PTR_ERR(rq);
0793 goto err_vma;
0794 }
0795
0796 err = i915_request_await_object(rq, vma->obj, false);
0797 if (!err)
0798 err = i915_vma_move_to_active(vma, rq, 0);
0799 if (!err)
0800 err = rq->engine->emit_bb_start(rq,
0801 vma->node.start,
0802 PAGE_SIZE, 0);
0803 i915_request_add(rq);
0804 if (err)
0805 goto err_vma;
0806
0807 if (wait_for(READ_ONCE(*cntr), 10)) {
0808 pr_err("%s: timed loop did not start\n",
0809 engine->name);
0810 goto err_vma;
0811 }
0812
0813 min.freq = rps->min_freq;
0814 min.count = measure_frequency_at(rps, cntr, &min.freq);
0815
0816 max.freq = rps->max_freq;
0817 max.count = measure_frequency_at(rps, cntr, &max.freq);
0818
0819 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
0820 engine->name,
0821 min.count, intel_gpu_freq(rps, min.freq),
0822 max.count, intel_gpu_freq(rps, max.freq),
0823 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
0824 max.freq * min.count));
0825
0826 if (!scaled_within(max.freq * min.count,
0827 min.freq * max.count,
0828 1, 2)) {
0829 int f;
0830
0831 pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
0832 engine->name,
0833 max.freq * min.count,
0834 min.freq * max.count);
0835 show_pcu_config(rps);
0836
0837 for (f = min.freq + 1; f <= rps->max_freq; f++) {
0838 int act = f;
0839 u64 count;
0840
0841 count = measure_frequency_at(rps, cntr, &act);
0842 if (act < f)
0843 break;
0844
0845 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
0846 engine->name,
0847 act, intel_gpu_freq(rps, act), count,
0848 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
0849 act * min.count));
0850
0851 f = act;
0852 }
0853
0854 err = -EINTR;
0855 }
0856
0857 err_vma:
0858 *cancel = MI_BATCH_BUFFER_END;
0859 i915_gem_object_flush_map(vma->obj);
0860 i915_gem_object_unpin_map(vma->obj);
0861 i915_vma_unpin(vma);
0862 i915_vma_unlock(vma);
0863 i915_vma_put(vma);
0864
0865 st_engine_heartbeat_enable(engine);
0866 if (igt_flush_test(gt->i915))
0867 err = -EIO;
0868 if (err)
0869 break;
0870 }
0871
0872 intel_gt_pm_wait_for_idle(gt);
0873 rps->work.func = saved_work;
0874
0875 if (CPU_LATENCY >= 0)
0876 cpu_latency_qos_remove_request(&qos);
0877
0878 return err;
0879 }
0880
0881 static void sleep_for_ei(struct intel_rps *rps, int timeout_us)
0882 {
0883
0884 usleep_range(timeout_us, 2 * timeout_us);
0885
0886
0887 rps_disable_interrupts(rps);
0888 GEM_BUG_ON(rps->pm_iir);
0889 rps_enable_interrupts(rps);
0890
0891
0892 usleep_range(2 * timeout_us, 3 * timeout_us);
0893 }
0894
0895 static int __rps_up_interrupt(struct intel_rps *rps,
0896 struct intel_engine_cs *engine,
0897 struct igt_spinner *spin)
0898 {
0899 struct intel_uncore *uncore = engine->uncore;
0900 struct i915_request *rq;
0901 u32 timeout;
0902
0903 if (!intel_engine_can_store_dword(engine))
0904 return 0;
0905
0906 rps_set_check(rps, rps->min_freq);
0907
0908 rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP);
0909 if (IS_ERR(rq))
0910 return PTR_ERR(rq);
0911
0912 i915_request_get(rq);
0913 i915_request_add(rq);
0914
0915 if (!igt_wait_for_spinner(spin, rq)) {
0916 pr_err("%s: RPS spinner did not start\n",
0917 engine->name);
0918 i915_request_put(rq);
0919 intel_gt_set_wedged(engine->gt);
0920 return -EIO;
0921 }
0922
0923 if (!intel_rps_is_active(rps)) {
0924 pr_err("%s: RPS not enabled on starting spinner\n",
0925 engine->name);
0926 igt_spinner_end(spin);
0927 i915_request_put(rq);
0928 return -EINVAL;
0929 }
0930
0931 if (!(rps->pm_events & GEN6_PM_RP_UP_THRESHOLD)) {
0932 pr_err("%s: RPS did not register UP interrupt\n",
0933 engine->name);
0934 i915_request_put(rq);
0935 return -EINVAL;
0936 }
0937
0938 if (rps->last_freq != rps->min_freq) {
0939 pr_err("%s: RPS did not program min frequency\n",
0940 engine->name);
0941 i915_request_put(rq);
0942 return -EINVAL;
0943 }
0944
0945 timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI);
0946 timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
0947 timeout = DIV_ROUND_UP(timeout, 1000);
0948
0949 sleep_for_ei(rps, timeout);
0950 GEM_BUG_ON(i915_request_completed(rq));
0951
0952 igt_spinner_end(spin);
0953 i915_request_put(rq);
0954
0955 if (rps->cur_freq != rps->min_freq) {
0956 pr_err("%s: Frequency unexpectedly changed [up], now %d!\n",
0957 engine->name, intel_rps_read_actual_frequency(rps));
0958 return -EINVAL;
0959 }
0960
0961 if (!(rps->pm_iir & GEN6_PM_RP_UP_THRESHOLD)) {
0962 pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n",
0963 engine->name, rps->pm_iir,
0964 intel_uncore_read(uncore, GEN6_RP_PREV_UP),
0965 intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
0966 intel_uncore_read(uncore, GEN6_RP_UP_EI));
0967 return -EINVAL;
0968 }
0969
0970 return 0;
0971 }
0972
0973 static int __rps_down_interrupt(struct intel_rps *rps,
0974 struct intel_engine_cs *engine)
0975 {
0976 struct intel_uncore *uncore = engine->uncore;
0977 u32 timeout;
0978
0979 rps_set_check(rps, rps->max_freq);
0980
0981 if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) {
0982 pr_err("%s: RPS did not register DOWN interrupt\n",
0983 engine->name);
0984 return -EINVAL;
0985 }
0986
0987 if (rps->last_freq != rps->max_freq) {
0988 pr_err("%s: RPS did not program max frequency\n",
0989 engine->name);
0990 return -EINVAL;
0991 }
0992
0993 timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
0994 timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
0995 timeout = DIV_ROUND_UP(timeout, 1000);
0996
0997 sleep_for_ei(rps, timeout);
0998
0999 if (rps->cur_freq != rps->max_freq) {
1000 pr_err("%s: Frequency unexpectedly changed [down], now %d!\n",
1001 engine->name,
1002 intel_rps_read_actual_frequency(rps));
1003 return -EINVAL;
1004 }
1005
1006 if (!(rps->pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT))) {
1007 pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n",
1008 engine->name, rps->pm_iir,
1009 intel_uncore_read(uncore, GEN6_RP_PREV_DOWN),
1010 intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD),
1011 intel_uncore_read(uncore, GEN6_RP_DOWN_EI),
1012 intel_uncore_read(uncore, GEN6_RP_PREV_UP),
1013 intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
1014 intel_uncore_read(uncore, GEN6_RP_UP_EI));
1015 return -EINVAL;
1016 }
1017
1018 return 0;
1019 }
1020
1021 int live_rps_interrupt(void *arg)
1022 {
1023 struct intel_gt *gt = arg;
1024 struct intel_rps *rps = >->rps;
1025 void (*saved_work)(struct work_struct *wrk);
1026 struct intel_engine_cs *engine;
1027 enum intel_engine_id id;
1028 struct igt_spinner spin;
1029 u32 pm_events;
1030 int err = 0;
1031
1032
1033
1034
1035
1036 if (!intel_rps_has_interrupts(rps) || GRAPHICS_VER(gt->i915) < 6)
1037 return 0;
1038
1039 intel_gt_pm_get(gt);
1040 pm_events = rps->pm_events;
1041 intel_gt_pm_put(gt);
1042 if (!pm_events) {
1043 pr_err("No RPS PM events registered, but RPS is enabled?\n");
1044 return -ENODEV;
1045 }
1046
1047 if (igt_spinner_init(&spin, gt))
1048 return -ENOMEM;
1049
1050 intel_gt_pm_wait_for_idle(gt);
1051 saved_work = rps->work.func;
1052 rps->work.func = dummy_rps_work;
1053
1054 for_each_engine(engine, gt, id) {
1055
1056 if (pm_events & GEN6_PM_RP_UP_THRESHOLD) {
1057 intel_gt_pm_wait_for_idle(engine->gt);
1058 GEM_BUG_ON(intel_rps_is_active(rps));
1059
1060 st_engine_heartbeat_disable(engine);
1061
1062 err = __rps_up_interrupt(rps, engine, &spin);
1063
1064 st_engine_heartbeat_enable(engine);
1065 if (err)
1066 goto out;
1067
1068 intel_gt_pm_wait_for_idle(engine->gt);
1069 }
1070
1071
1072 if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) {
1073 st_engine_heartbeat_disable(engine);
1074 intel_rc6_disable(>->rc6);
1075
1076 err = __rps_down_interrupt(rps, engine);
1077
1078 intel_rc6_enable(>->rc6);
1079 st_engine_heartbeat_enable(engine);
1080 if (err)
1081 goto out;
1082 }
1083 }
1084
1085 out:
1086 if (igt_flush_test(gt->i915))
1087 err = -EIO;
1088
1089 igt_spinner_fini(&spin);
1090
1091 intel_gt_pm_wait_for_idle(gt);
1092 rps->work.func = saved_work;
1093
1094 return err;
1095 }
1096
1097 static u64 __measure_power(int duration_ms)
1098 {
1099 u64 dE, dt;
1100
1101 dt = ktime_get();
1102 dE = librapl_energy_uJ();
1103 usleep_range(1000 * duration_ms, 2000 * duration_ms);
1104 dE = librapl_energy_uJ() - dE;
1105 dt = ktime_get() - dt;
1106
1107 return div64_u64(1000 * 1000 * dE, dt);
1108 }
1109
1110 static u64 measure_power_at(struct intel_rps *rps, int *freq)
1111 {
1112 u64 x[5];
1113 int i;
1114
1115 *freq = rps_set_check(rps, *freq);
1116 for (i = 0; i < 5; i++)
1117 x[i] = __measure_power(5);
1118 *freq = (*freq + read_cagf(rps)) / 2;
1119
1120
1121 sort(x, 5, sizeof(*x), cmp_u64, NULL);
1122 return div_u64(x[1] + 2 * x[2] + x[3], 4);
1123 }
1124
1125 int live_rps_power(void *arg)
1126 {
1127 struct intel_gt *gt = arg;
1128 struct intel_rps *rps = >->rps;
1129 void (*saved_work)(struct work_struct *wrk);
1130 struct intel_engine_cs *engine;
1131 enum intel_engine_id id;
1132 struct igt_spinner spin;
1133 int err = 0;
1134
1135
1136
1137
1138
1139
1140
1141 if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
1142 return 0;
1143
1144 if (!librapl_supported(gt->i915))
1145 return 0;
1146
1147 if (igt_spinner_init(&spin, gt))
1148 return -ENOMEM;
1149
1150 intel_gt_pm_wait_for_idle(gt);
1151 saved_work = rps->work.func;
1152 rps->work.func = dummy_rps_work;
1153
1154 for_each_engine(engine, gt, id) {
1155 struct i915_request *rq;
1156 struct {
1157 u64 power;
1158 int freq;
1159 } min, max;
1160
1161 if (!intel_engine_can_store_dword(engine))
1162 continue;
1163
1164 st_engine_heartbeat_disable(engine);
1165
1166 rq = igt_spinner_create_request(&spin,
1167 engine->kernel_context,
1168 MI_NOOP);
1169 if (IS_ERR(rq)) {
1170 st_engine_heartbeat_enable(engine);
1171 err = PTR_ERR(rq);
1172 break;
1173 }
1174
1175 i915_request_add(rq);
1176
1177 if (!igt_wait_for_spinner(&spin, rq)) {
1178 pr_err("%s: RPS spinner did not start\n",
1179 engine->name);
1180 igt_spinner_end(&spin);
1181 st_engine_heartbeat_enable(engine);
1182 intel_gt_set_wedged(engine->gt);
1183 err = -EIO;
1184 break;
1185 }
1186
1187 max.freq = rps->max_freq;
1188 max.power = measure_power_at(rps, &max.freq);
1189
1190 min.freq = rps->min_freq;
1191 min.power = measure_power_at(rps, &min.freq);
1192
1193 igt_spinner_end(&spin);
1194 st_engine_heartbeat_enable(engine);
1195
1196 pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
1197 engine->name,
1198 min.power, intel_gpu_freq(rps, min.freq),
1199 max.power, intel_gpu_freq(rps, max.freq));
1200
1201 if (10 * min.freq >= 9 * max.freq) {
1202 pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n",
1203 min.freq, intel_gpu_freq(rps, min.freq),
1204 max.freq, intel_gpu_freq(rps, max.freq));
1205 continue;
1206 }
1207
1208 if (11 * min.power > 10 * max.power) {
1209 pr_err("%s: did not conserve power when setting lower frequency!\n",
1210 engine->name);
1211 err = -EINVAL;
1212 break;
1213 }
1214
1215 if (igt_flush_test(gt->i915)) {
1216 err = -EIO;
1217 break;
1218 }
1219 }
1220
1221 igt_spinner_fini(&spin);
1222
1223 intel_gt_pm_wait_for_idle(gt);
1224 rps->work.func = saved_work;
1225
1226 return err;
1227 }
1228
1229 int live_rps_dynamic(void *arg)
1230 {
1231 struct intel_gt *gt = arg;
1232 struct intel_rps *rps = >->rps;
1233 struct intel_engine_cs *engine;
1234 enum intel_engine_id id;
1235 struct igt_spinner spin;
1236 int err = 0;
1237
1238
1239
1240
1241
1242
1243
1244
1245 if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
1246 return 0;
1247
1248 if (igt_spinner_init(&spin, gt))
1249 return -ENOMEM;
1250
1251 if (intel_rps_has_interrupts(rps))
1252 pr_info("RPS has interrupt support\n");
1253 if (intel_rps_uses_timer(rps))
1254 pr_info("RPS has timer support\n");
1255
1256 for_each_engine(engine, gt, id) {
1257 struct i915_request *rq;
1258 struct {
1259 ktime_t dt;
1260 u8 freq;
1261 } min, max;
1262
1263 if (!intel_engine_can_store_dword(engine))
1264 continue;
1265
1266 intel_gt_pm_wait_for_idle(gt);
1267 GEM_BUG_ON(intel_rps_is_active(rps));
1268 rps->cur_freq = rps->min_freq;
1269
1270 intel_engine_pm_get(engine);
1271 intel_rc6_disable(>->rc6);
1272 GEM_BUG_ON(rps->last_freq != rps->min_freq);
1273
1274 rq = igt_spinner_create_request(&spin,
1275 engine->kernel_context,
1276 MI_NOOP);
1277 if (IS_ERR(rq)) {
1278 err = PTR_ERR(rq);
1279 goto err;
1280 }
1281
1282 i915_request_add(rq);
1283
1284 max.dt = ktime_get();
1285 max.freq = wait_for_freq(rps, rps->max_freq, 500);
1286 max.dt = ktime_sub(ktime_get(), max.dt);
1287
1288 igt_spinner_end(&spin);
1289
1290 min.dt = ktime_get();
1291 min.freq = wait_for_freq(rps, rps->min_freq, 2000);
1292 min.dt = ktime_sub(ktime_get(), min.dt);
1293
1294 pr_info("%s: dynamically reclocked to %u:%uMHz while busy in %lluns, and %u:%uMHz while idle in %lluns\n",
1295 engine->name,
1296 max.freq, intel_gpu_freq(rps, max.freq),
1297 ktime_to_ns(max.dt),
1298 min.freq, intel_gpu_freq(rps, min.freq),
1299 ktime_to_ns(min.dt));
1300 if (min.freq >= max.freq) {
1301 pr_err("%s: dynamic reclocking of spinner failed\n!",
1302 engine->name);
1303 err = -EINVAL;
1304 }
1305
1306 err:
1307 intel_rc6_enable(>->rc6);
1308 intel_engine_pm_put(engine);
1309
1310 if (igt_flush_test(gt->i915))
1311 err = -EIO;
1312 if (err)
1313 break;
1314 }
1315
1316 igt_spinner_fini(&spin);
1317
1318 return err;
1319 }