Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: MIT
0002 /*
0003  * Copyright © 2019 Intel Corporation
0004  */
0005 
0006 #include <linux/pm_runtime.h>
0007 #include <linux/string_helpers.h>
0008 
0009 #include "gem/i915_gem_region.h"
0010 #include "i915_drv.h"
0011 #include "i915_reg.h"
0012 #include "i915_vgpu.h"
0013 #include "intel_engine_regs.h"
0014 #include "intel_gt.h"
0015 #include "intel_gt_pm.h"
0016 #include "intel_gt_regs.h"
0017 #include "intel_pcode.h"
0018 #include "intel_rc6.h"
0019 
0020 /**
0021  * DOC: RC6
0022  *
0023  * RC6 is a special power stage which allows the GPU to enter an very
0024  * low-voltage mode when idle, using down to 0V while at this stage.  This
0025  * stage is entered automatically when the GPU is idle when RC6 support is
0026  * enabled, and as soon as new workload arises GPU wakes up automatically as
0027  * well.
0028  *
0029  * There are different RC6 modes available in Intel GPU, which differentiate
0030  * among each other with the latency required to enter and leave RC6 and
0031  * voltage consumed by the GPU in different states.
0032  *
0033  * The combination of the following flags define which states GPU is allowed
0034  * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and
0035  * RC6pp is deepest RC6. Their support by hardware varies according to the
0036  * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one
0037  * which brings the most power savings; deeper states save more power, but
0038  * require higher latency to switch to and wake up.
0039  */
0040 
0041 static struct intel_gt *rc6_to_gt(struct intel_rc6 *rc6)
0042 {
0043     return container_of(rc6, struct intel_gt, rc6);
0044 }
0045 
0046 static struct intel_uncore *rc6_to_uncore(struct intel_rc6 *rc)
0047 {
0048     return rc6_to_gt(rc)->uncore;
0049 }
0050 
0051 static struct drm_i915_private *rc6_to_i915(struct intel_rc6 *rc)
0052 {
0053     return rc6_to_gt(rc)->i915;
0054 }
0055 
0056 static void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val)
0057 {
0058     intel_uncore_write_fw(uncore, reg, val);
0059 }
0060 
0061 static void gen11_rc6_enable(struct intel_rc6 *rc6)
0062 {
0063     struct intel_gt *gt = rc6_to_gt(rc6);
0064     struct intel_uncore *uncore = gt->uncore;
0065     struct intel_engine_cs *engine;
0066     enum intel_engine_id id;
0067     u32 pg_enable;
0068     int i;
0069 
0070     /*
0071      * With GuCRC, these parameters are set by GuC
0072      */
0073     if (!intel_uc_uses_guc_rc(&gt->uc)) {
0074         /* 2b: Program RC6 thresholds.*/
0075         set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
0076         set(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
0077 
0078         set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
0079         set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
0080         for_each_engine(engine, rc6_to_gt(rc6), id)
0081             set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
0082 
0083         set(uncore, GUC_MAX_IDLE_COUNT, 0xA);
0084 
0085         set(uncore, GEN6_RC_SLEEP, 0);
0086 
0087         set(uncore, GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */
0088     }
0089 
0090     /*
0091      * 2c: Program Coarse Power Gating Policies.
0092      *
0093      * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we
0094      * use instead is a more conservative estimate for the maximum time
0095      * it takes us to service a CS interrupt and submit a new ELSP - that
0096      * is the time which the GPU is idle waiting for the CPU to select the
0097      * next request to execute. If the idle hysteresis is less than that
0098      * interrupt service latency, the hardware will automatically gate
0099      * the power well and we will then incur the wake up cost on top of
0100      * the service latency. A similar guide from plane_state is that we
0101      * do not want the enable hysteresis to less than the wakeup latency.
0102      *
0103      * igt/gem_exec_nop/sequential provides a rough estimate for the
0104      * service latency, and puts it under 10us for Icelake, similar to
0105      * Broadwell+, To be conservative, we want to factor in a context
0106      * switch on top (due to ksoftirqd).
0107      */
0108     set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 60);
0109     set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 60);
0110 
0111     /* 3a: Enable RC6
0112      *
0113      * With GuCRC, we do not enable bit 31 of RC_CTL,
0114      * thus allowing GuC to control RC6 entry/exit fully instead.
0115      * We will not set the HW ENABLE and EI bits
0116      */
0117     if (!intel_guc_rc_enable(&gt->uc.guc))
0118         rc6->ctl_enable = GEN6_RC_CTL_RC6_ENABLE;
0119     else
0120         rc6->ctl_enable =
0121             GEN6_RC_CTL_HW_ENABLE |
0122             GEN6_RC_CTL_RC6_ENABLE |
0123             GEN6_RC_CTL_EI_MODE(1);
0124 
0125     /* Wa_16011777198 - Render powergating must remain disabled */
0126     if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) ||
0127         IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0))
0128         pg_enable =
0129             GEN9_MEDIA_PG_ENABLE |
0130             GEN11_MEDIA_SAMPLER_PG_ENABLE;
0131     else
0132         pg_enable =
0133             GEN9_RENDER_PG_ENABLE |
0134             GEN9_MEDIA_PG_ENABLE |
0135             GEN11_MEDIA_SAMPLER_PG_ENABLE;
0136 
0137     if (GRAPHICS_VER(gt->i915) >= 12) {
0138         for (i = 0; i < I915_MAX_VCS; i++)
0139             if (HAS_ENGINE(gt, _VCS(i)))
0140                 pg_enable |= (VDN_HCP_POWERGATE_ENABLE(i) |
0141                           VDN_MFX_POWERGATE_ENABLE(i));
0142     }
0143 
0144     set(uncore, GEN9_PG_ENABLE, pg_enable);
0145 }
0146 
0147 static void gen9_rc6_enable(struct intel_rc6 *rc6)
0148 {
0149     struct intel_uncore *uncore = rc6_to_uncore(rc6);
0150     struct intel_engine_cs *engine;
0151     enum intel_engine_id id;
0152 
0153     /* 2b: Program RC6 thresholds.*/
0154     if (GRAPHICS_VER(rc6_to_i915(rc6)) >= 11) {
0155         set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
0156         set(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
0157     } else if (IS_SKYLAKE(rc6_to_i915(rc6))) {
0158         /*
0159          * WaRsDoubleRc6WrlWithCoarsePowerGating:skl Doubling WRL only
0160          * when CPG is enabled
0161          */
0162         set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16);
0163     } else {
0164         set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
0165     }
0166 
0167     set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
0168     set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
0169     for_each_engine(engine, rc6_to_gt(rc6), id)
0170         set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
0171 
0172     set(uncore, GUC_MAX_IDLE_COUNT, 0xA);
0173 
0174     set(uncore, GEN6_RC_SLEEP, 0);
0175 
0176     /*
0177      * 2c: Program Coarse Power Gating Policies.
0178      *
0179      * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we
0180      * use instead is a more conservative estimate for the maximum time
0181      * it takes us to service a CS interrupt and submit a new ELSP - that
0182      * is the time which the GPU is idle waiting for the CPU to select the
0183      * next request to execute. If the idle hysteresis is less than that
0184      * interrupt service latency, the hardware will automatically gate
0185      * the power well and we will then incur the wake up cost on top of
0186      * the service latency. A similar guide from plane_state is that we
0187      * do not want the enable hysteresis to less than the wakeup latency.
0188      *
0189      * igt/gem_exec_nop/sequential provides a rough estimate for the
0190      * service latency, and puts it around 10us for Broadwell (and other
0191      * big core) and around 40us for Broxton (and other low power cores).
0192      * [Note that for legacy ringbuffer submission, this is less than 1us!]
0193      * However, the wakeup latency on Broxton is closer to 100us. To be
0194      * conservative, we have to factor in a context switch on top (due
0195      * to ksoftirqd).
0196      */
0197     set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250);
0198     set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 250);
0199 
0200     /* 3a: Enable RC6 */
0201     set(uncore, GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
0202 
0203     rc6->ctl_enable =
0204         GEN6_RC_CTL_HW_ENABLE |
0205         GEN6_RC_CTL_RC6_ENABLE |
0206         GEN6_RC_CTL_EI_MODE(1);
0207 
0208     /*
0209      * WaRsDisableCoarsePowerGating:skl,cnl
0210      *   - Render/Media PG need to be disabled with RC6.
0211      */
0212     if (!NEEDS_WaRsDisableCoarsePowerGating(rc6_to_i915(rc6)))
0213         set(uncore, GEN9_PG_ENABLE,
0214             GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE);
0215 }
0216 
0217 static void gen8_rc6_enable(struct intel_rc6 *rc6)
0218 {
0219     struct intel_uncore *uncore = rc6_to_uncore(rc6);
0220     struct intel_engine_cs *engine;
0221     enum intel_engine_id id;
0222 
0223     /* 2b: Program RC6 thresholds.*/
0224     set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
0225     set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
0226     set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
0227     for_each_engine(engine, rc6_to_gt(rc6), id)
0228         set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
0229     set(uncore, GEN6_RC_SLEEP, 0);
0230     set(uncore, GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
0231 
0232     /* 3: Enable RC6 */
0233     rc6->ctl_enable =
0234         GEN6_RC_CTL_HW_ENABLE |
0235         GEN7_RC_CTL_TO_MODE |
0236         GEN6_RC_CTL_RC6_ENABLE;
0237 }
0238 
0239 static void gen6_rc6_enable(struct intel_rc6 *rc6)
0240 {
0241     struct intel_uncore *uncore = rc6_to_uncore(rc6);
0242     struct drm_i915_private *i915 = rc6_to_i915(rc6);
0243     struct intel_engine_cs *engine;
0244     enum intel_engine_id id;
0245     u32 rc6vids, rc6_mask;
0246     int ret;
0247 
0248     set(uncore, GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
0249     set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
0250     set(uncore, GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
0251     set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000);
0252     set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25);
0253 
0254     for_each_engine(engine, rc6_to_gt(rc6), id)
0255         set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
0256 
0257     set(uncore, GEN6_RC_SLEEP, 0);
0258     set(uncore, GEN6_RC1e_THRESHOLD, 1000);
0259     set(uncore, GEN6_RC6_THRESHOLD, 50000);
0260     set(uncore, GEN6_RC6p_THRESHOLD, 150000);
0261     set(uncore, GEN6_RC6pp_THRESHOLD, 64000); /* unused */
0262 
0263     /* We don't use those on Haswell */
0264     rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
0265     if (HAS_RC6p(i915))
0266         rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
0267     if (HAS_RC6pp(i915))
0268         rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
0269     rc6->ctl_enable =
0270         rc6_mask |
0271         GEN6_RC_CTL_EI_MODE(1) |
0272         GEN6_RC_CTL_HW_ENABLE;
0273 
0274     rc6vids = 0;
0275     ret = snb_pcode_read(rc6_to_gt(rc6)->uncore, GEN6_PCODE_READ_RC6VIDS, &rc6vids, NULL);
0276     if (GRAPHICS_VER(i915) == 6 && ret) {
0277         drm_dbg(&i915->drm, "Couldn't check for BIOS workaround\n");
0278     } else if (GRAPHICS_VER(i915) == 6 &&
0279            (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
0280         drm_dbg(&i915->drm,
0281             "You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
0282             GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
0283         rc6vids &= 0xffff00;
0284         rc6vids |= GEN6_ENCODE_RC6_VID(450);
0285         ret = snb_pcode_write(rc6_to_gt(rc6)->uncore, GEN6_PCODE_WRITE_RC6VIDS, rc6vids);
0286         if (ret)
0287             drm_err(&i915->drm,
0288                 "Couldn't fix incorrect rc6 voltage\n");
0289     }
0290 }
0291 
0292 /* Check that the pcbr address is not empty. */
0293 static int chv_rc6_init(struct intel_rc6 *rc6)
0294 {
0295     struct intel_uncore *uncore = rc6_to_uncore(rc6);
0296     struct drm_i915_private *i915 = rc6_to_i915(rc6);
0297     resource_size_t pctx_paddr, paddr;
0298     resource_size_t pctx_size = 32 * SZ_1K;
0299     u32 pcbr;
0300 
0301     pcbr = intel_uncore_read(uncore, VLV_PCBR);
0302     if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) {
0303         drm_dbg(&i915->drm, "BIOS didn't set up PCBR, fixing up\n");
0304         paddr = i915->dsm.end + 1 - pctx_size;
0305         GEM_BUG_ON(paddr > U32_MAX);
0306 
0307         pctx_paddr = (paddr & ~4095);
0308         intel_uncore_write(uncore, VLV_PCBR, pctx_paddr);
0309     }
0310 
0311     return 0;
0312 }
0313 
0314 static int vlv_rc6_init(struct intel_rc6 *rc6)
0315 {
0316     struct drm_i915_private *i915 = rc6_to_i915(rc6);
0317     struct intel_uncore *uncore = rc6_to_uncore(rc6);
0318     struct drm_i915_gem_object *pctx;
0319     resource_size_t pctx_paddr;
0320     resource_size_t pctx_size = 24 * SZ_1K;
0321     u32 pcbr;
0322 
0323     pcbr = intel_uncore_read(uncore, VLV_PCBR);
0324     if (pcbr) {
0325         /* BIOS set it up already, grab the pre-alloc'd space */
0326         resource_size_t pcbr_offset;
0327 
0328         pcbr_offset = (pcbr & ~4095) - i915->dsm.start;
0329         pctx = i915_gem_object_create_region_at(i915->mm.stolen_region,
0330                             pcbr_offset,
0331                             pctx_size,
0332                             0);
0333         if (IS_ERR(pctx))
0334             return PTR_ERR(pctx);
0335 
0336         goto out;
0337     }
0338 
0339     drm_dbg(&i915->drm, "BIOS didn't set up PCBR, fixing up\n");
0340 
0341     /*
0342      * From the Gunit register HAS:
0343      * The Gfx driver is expected to program this register and ensure
0344      * proper allocation within Gfx stolen memory.  For example, this
0345      * register should be programmed such than the PCBR range does not
0346      * overlap with other ranges, such as the frame buffer, protected
0347      * memory, or any other relevant ranges.
0348      */
0349     pctx = i915_gem_object_create_stolen(i915, pctx_size);
0350     if (IS_ERR(pctx)) {
0351         drm_dbg(&i915->drm,
0352             "not enough stolen space for PCTX, disabling\n");
0353         return PTR_ERR(pctx);
0354     }
0355 
0356     GEM_BUG_ON(range_overflows_end_t(u64,
0357                      i915->dsm.start,
0358                      pctx->stolen->start,
0359                      U32_MAX));
0360     pctx_paddr = i915->dsm.start + pctx->stolen->start;
0361     intel_uncore_write(uncore, VLV_PCBR, pctx_paddr);
0362 
0363 out:
0364     rc6->pctx = pctx;
0365     return 0;
0366 }
0367 
0368 static void chv_rc6_enable(struct intel_rc6 *rc6)
0369 {
0370     struct intel_uncore *uncore = rc6_to_uncore(rc6);
0371     struct intel_engine_cs *engine;
0372     enum intel_engine_id id;
0373 
0374     /* 2a: Program RC6 thresholds.*/
0375     set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
0376     set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
0377     set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
0378 
0379     for_each_engine(engine, rc6_to_gt(rc6), id)
0380         set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
0381     set(uncore, GEN6_RC_SLEEP, 0);
0382 
0383     /* TO threshold set to 500 us (0x186 * 1.28 us) */
0384     set(uncore, GEN6_RC6_THRESHOLD, 0x186);
0385 
0386     /* Allows RC6 residency counter to work */
0387     set(uncore, VLV_COUNTER_CONTROL,
0388         _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
0389                    VLV_MEDIA_RC6_COUNT_EN |
0390                    VLV_RENDER_RC6_COUNT_EN));
0391 
0392     /* 3: Enable RC6 */
0393     rc6->ctl_enable = GEN7_RC_CTL_TO_MODE;
0394 }
0395 
0396 static void vlv_rc6_enable(struct intel_rc6 *rc6)
0397 {
0398     struct intel_uncore *uncore = rc6_to_uncore(rc6);
0399     struct intel_engine_cs *engine;
0400     enum intel_engine_id id;
0401 
0402     set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
0403     set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000);
0404     set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25);
0405 
0406     for_each_engine(engine, rc6_to_gt(rc6), id)
0407         set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
0408 
0409     set(uncore, GEN6_RC6_THRESHOLD, 0x557);
0410 
0411     /* Allows RC6 residency counter to work */
0412     set(uncore, VLV_COUNTER_CONTROL,
0413         _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
0414                    VLV_MEDIA_RC0_COUNT_EN |
0415                    VLV_RENDER_RC0_COUNT_EN |
0416                    VLV_MEDIA_RC6_COUNT_EN |
0417                    VLV_RENDER_RC6_COUNT_EN));
0418 
0419     rc6->ctl_enable =
0420         GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
0421 }
0422 
0423 static bool bxt_check_bios_rc6_setup(struct intel_rc6 *rc6)
0424 {
0425     struct intel_uncore *uncore = rc6_to_uncore(rc6);
0426     struct drm_i915_private *i915 = rc6_to_i915(rc6);
0427     u32 rc6_ctx_base, rc_ctl, rc_sw_target;
0428     bool enable_rc6 = true;
0429 
0430     rc_ctl = intel_uncore_read(uncore, GEN6_RC_CONTROL);
0431     rc_sw_target = intel_uncore_read(uncore, GEN6_RC_STATE);
0432     rc_sw_target &= RC_SW_TARGET_STATE_MASK;
0433     rc_sw_target >>= RC_SW_TARGET_STATE_SHIFT;
0434     drm_dbg(&i915->drm, "BIOS enabled RC states: "
0435              "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n",
0436              str_on_off(rc_ctl & GEN6_RC_CTL_HW_ENABLE),
0437              str_on_off(rc_ctl & GEN6_RC_CTL_RC6_ENABLE),
0438              rc_sw_target);
0439 
0440     if (!(intel_uncore_read(uncore, RC6_LOCATION) & RC6_CTX_IN_DRAM)) {
0441         drm_dbg(&i915->drm, "RC6 Base location not set properly.\n");
0442         enable_rc6 = false;
0443     }
0444 
0445     /*
0446      * The exact context size is not known for BXT, so assume a page size
0447      * for this check.
0448      */
0449     rc6_ctx_base =
0450         intel_uncore_read(uncore, RC6_CTX_BASE) & RC6_CTX_BASE_MASK;
0451     if (!(rc6_ctx_base >= i915->dsm_reserved.start &&
0452           rc6_ctx_base + PAGE_SIZE < i915->dsm_reserved.end)) {
0453         drm_dbg(&i915->drm, "RC6 Base address not as expected.\n");
0454         enable_rc6 = false;
0455     }
0456 
0457     if (!((intel_uncore_read(uncore, PWRCTX_MAXCNT(RENDER_RING_BASE)) & IDLE_TIME_MASK) > 1 &&
0458           (intel_uncore_read(uncore, PWRCTX_MAXCNT(GEN6_BSD_RING_BASE)) & IDLE_TIME_MASK) > 1 &&
0459           (intel_uncore_read(uncore, PWRCTX_MAXCNT(BLT_RING_BASE)) & IDLE_TIME_MASK) > 1 &&
0460           (intel_uncore_read(uncore, PWRCTX_MAXCNT(VEBOX_RING_BASE)) & IDLE_TIME_MASK) > 1)) {
0461         drm_dbg(&i915->drm,
0462             "Engine Idle wait time not set properly.\n");
0463         enable_rc6 = false;
0464     }
0465 
0466     if (!intel_uncore_read(uncore, GEN8_PUSHBUS_CONTROL) ||
0467         !intel_uncore_read(uncore, GEN8_PUSHBUS_ENABLE) ||
0468         !intel_uncore_read(uncore, GEN8_PUSHBUS_SHIFT)) {
0469         drm_dbg(&i915->drm, "Pushbus not setup properly.\n");
0470         enable_rc6 = false;
0471     }
0472 
0473     if (!intel_uncore_read(uncore, GEN6_GFXPAUSE)) {
0474         drm_dbg(&i915->drm, "GFX pause not setup properly.\n");
0475         enable_rc6 = false;
0476     }
0477 
0478     if (!intel_uncore_read(uncore, GEN8_MISC_CTRL0)) {
0479         drm_dbg(&i915->drm, "GPM control not setup properly.\n");
0480         enable_rc6 = false;
0481     }
0482 
0483     return enable_rc6;
0484 }
0485 
0486 static bool rc6_supported(struct intel_rc6 *rc6)
0487 {
0488     struct drm_i915_private *i915 = rc6_to_i915(rc6);
0489 
0490     if (!HAS_RC6(i915))
0491         return false;
0492 
0493     if (intel_vgpu_active(i915))
0494         return false;
0495 
0496     if (is_mock_gt(rc6_to_gt(rc6)))
0497         return false;
0498 
0499     if (IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(rc6)) {
0500         drm_notice(&i915->drm,
0501                "RC6 and powersaving disabled by BIOS\n");
0502         return false;
0503     }
0504 
0505     return true;
0506 }
0507 
0508 static void rpm_get(struct intel_rc6 *rc6)
0509 {
0510     GEM_BUG_ON(rc6->wakeref);
0511     pm_runtime_get_sync(rc6_to_i915(rc6)->drm.dev);
0512     rc6->wakeref = true;
0513 }
0514 
0515 static void rpm_put(struct intel_rc6 *rc6)
0516 {
0517     GEM_BUG_ON(!rc6->wakeref);
0518     pm_runtime_put(rc6_to_i915(rc6)->drm.dev);
0519     rc6->wakeref = false;
0520 }
0521 
0522 static bool pctx_corrupted(struct intel_rc6 *rc6)
0523 {
0524     struct drm_i915_private *i915 = rc6_to_i915(rc6);
0525 
0526     if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915))
0527         return false;
0528 
0529     if (intel_uncore_read(rc6_to_uncore(rc6), GEN8_RC6_CTX_INFO))
0530         return false;
0531 
0532     drm_notice(&i915->drm,
0533            "RC6 context corruption, disabling runtime power management\n");
0534     return true;
0535 }
0536 
0537 static void __intel_rc6_disable(struct intel_rc6 *rc6)
0538 {
0539     struct drm_i915_private *i915 = rc6_to_i915(rc6);
0540     struct intel_uncore *uncore = rc6_to_uncore(rc6);
0541     struct intel_gt *gt = rc6_to_gt(rc6);
0542 
0543     /* Take control of RC6 back from GuC */
0544     intel_guc_rc_disable(&gt->uc.guc);
0545 
0546     intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
0547     if (GRAPHICS_VER(i915) >= 9)
0548         set(uncore, GEN9_PG_ENABLE, 0);
0549     set(uncore, GEN6_RC_CONTROL, 0);
0550     set(uncore, GEN6_RC_STATE, 0);
0551     intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
0552 }
0553 
0554 void intel_rc6_init(struct intel_rc6 *rc6)
0555 {
0556     struct drm_i915_private *i915 = rc6_to_i915(rc6);
0557     int err;
0558 
0559     /* Disable runtime-pm until we can save the GPU state with rc6 pctx */
0560     rpm_get(rc6);
0561 
0562     if (!rc6_supported(rc6))
0563         return;
0564 
0565     if (IS_CHERRYVIEW(i915))
0566         err = chv_rc6_init(rc6);
0567     else if (IS_VALLEYVIEW(i915))
0568         err = vlv_rc6_init(rc6);
0569     else
0570         err = 0;
0571 
0572     /* Sanitize rc6, ensure it is disabled before we are ready. */
0573     __intel_rc6_disable(rc6);
0574 
0575     rc6->supported = err == 0;
0576 }
0577 
0578 void intel_rc6_sanitize(struct intel_rc6 *rc6)
0579 {
0580     memset(rc6->prev_hw_residency, 0, sizeof(rc6->prev_hw_residency));
0581 
0582     if (rc6->enabled) { /* unbalanced suspend/resume */
0583         rpm_get(rc6);
0584         rc6->enabled = false;
0585     }
0586 
0587     if (rc6->supported)
0588         __intel_rc6_disable(rc6);
0589 }
0590 
0591 void intel_rc6_enable(struct intel_rc6 *rc6)
0592 {
0593     struct drm_i915_private *i915 = rc6_to_i915(rc6);
0594     struct intel_uncore *uncore = rc6_to_uncore(rc6);
0595 
0596     if (!rc6->supported)
0597         return;
0598 
0599     GEM_BUG_ON(rc6->enabled);
0600 
0601     intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
0602 
0603     if (IS_CHERRYVIEW(i915))
0604         chv_rc6_enable(rc6);
0605     else if (IS_VALLEYVIEW(i915))
0606         vlv_rc6_enable(rc6);
0607     else if (GRAPHICS_VER(i915) >= 11)
0608         gen11_rc6_enable(rc6);
0609     else if (GRAPHICS_VER(i915) >= 9)
0610         gen9_rc6_enable(rc6);
0611     else if (IS_BROADWELL(i915))
0612         gen8_rc6_enable(rc6);
0613     else if (GRAPHICS_VER(i915) >= 6)
0614         gen6_rc6_enable(rc6);
0615 
0616     rc6->manual = rc6->ctl_enable & GEN6_RC_CTL_RC6_ENABLE;
0617     if (NEEDS_RC6_CTX_CORRUPTION_WA(i915))
0618         rc6->ctl_enable = 0;
0619 
0620     intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
0621 
0622     if (unlikely(pctx_corrupted(rc6)))
0623         return;
0624 
0625     /* rc6 is ready, runtime-pm is go! */
0626     rpm_put(rc6);
0627     rc6->enabled = true;
0628 }
0629 
0630 void intel_rc6_unpark(struct intel_rc6 *rc6)
0631 {
0632     struct intel_uncore *uncore = rc6_to_uncore(rc6);
0633 
0634     if (!rc6->enabled)
0635         return;
0636 
0637     /* Restore HW timers for automatic RC6 entry while busy */
0638     set(uncore, GEN6_RC_CONTROL, rc6->ctl_enable);
0639 }
0640 
0641 void intel_rc6_park(struct intel_rc6 *rc6)
0642 {
0643     struct intel_uncore *uncore = rc6_to_uncore(rc6);
0644     unsigned int target;
0645 
0646     if (!rc6->enabled)
0647         return;
0648 
0649     if (unlikely(pctx_corrupted(rc6))) {
0650         intel_rc6_disable(rc6);
0651         return;
0652     }
0653 
0654     if (!rc6->manual)
0655         return;
0656 
0657     /* Turn off the HW timers and go directly to rc6 */
0658     set(uncore, GEN6_RC_CONTROL, GEN6_RC_CTL_RC6_ENABLE);
0659 
0660     if (HAS_RC6pp(rc6_to_i915(rc6)))
0661         target = 0x6; /* deepest rc6 */
0662     else if (HAS_RC6p(rc6_to_i915(rc6)))
0663         target = 0x5; /* deep rc6 */
0664     else
0665         target = 0x4; /* normal rc6 */
0666     set(uncore, GEN6_RC_STATE, target << RC_SW_TARGET_STATE_SHIFT);
0667 }
0668 
0669 void intel_rc6_disable(struct intel_rc6 *rc6)
0670 {
0671     if (!rc6->enabled)
0672         return;
0673 
0674     rpm_get(rc6);
0675     rc6->enabled = false;
0676 
0677     __intel_rc6_disable(rc6);
0678 }
0679 
0680 void intel_rc6_fini(struct intel_rc6 *rc6)
0681 {
0682     struct drm_i915_gem_object *pctx;
0683 
0684     intel_rc6_disable(rc6);
0685 
0686     pctx = fetch_and_zero(&rc6->pctx);
0687     if (pctx)
0688         i915_gem_object_put(pctx);
0689 
0690     if (rc6->wakeref)
0691         rpm_put(rc6);
0692 }
0693 
0694 static u64 vlv_residency_raw(struct intel_uncore *uncore, const i915_reg_t reg)
0695 {
0696     u32 lower, upper, tmp;
0697     int loop = 2;
0698 
0699     /*
0700      * The register accessed do not need forcewake. We borrow
0701      * uncore lock to prevent concurrent access to range reg.
0702      */
0703     lockdep_assert_held(&uncore->lock);
0704 
0705     /*
0706      * vlv and chv residency counters are 40 bits in width.
0707      * With a control bit, we can choose between upper or lower
0708      * 32bit window into this counter.
0709      *
0710      * Although we always use the counter in high-range mode elsewhere,
0711      * userspace may attempt to read the value before rc6 is initialised,
0712      * before we have set the default VLV_COUNTER_CONTROL value. So always
0713      * set the high bit to be safe.
0714      */
0715     set(uncore, VLV_COUNTER_CONTROL,
0716         _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
0717     upper = intel_uncore_read_fw(uncore, reg);
0718     do {
0719         tmp = upper;
0720 
0721         set(uncore, VLV_COUNTER_CONTROL,
0722             _MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH));
0723         lower = intel_uncore_read_fw(uncore, reg);
0724 
0725         set(uncore, VLV_COUNTER_CONTROL,
0726             _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
0727         upper = intel_uncore_read_fw(uncore, reg);
0728     } while (upper != tmp && --loop);
0729 
0730     /*
0731      * Everywhere else we always use VLV_COUNTER_CONTROL with the
0732      * VLV_COUNT_RANGE_HIGH bit set - so it is safe to leave it set
0733      * now.
0734      */
0735 
0736     return lower | (u64)upper << 8;
0737 }
0738 
0739 u64 intel_rc6_residency_ns(struct intel_rc6 *rc6, const i915_reg_t reg)
0740 {
0741     struct drm_i915_private *i915 = rc6_to_i915(rc6);
0742     struct intel_uncore *uncore = rc6_to_uncore(rc6);
0743     u64 time_hw, prev_hw, overflow_hw;
0744     unsigned int fw_domains;
0745     unsigned long flags;
0746     unsigned int i;
0747     u32 mul, div;
0748 
0749     if (!rc6->supported)
0750         return 0;
0751 
0752     /*
0753      * Store previous hw counter values for counter wrap-around handling.
0754      *
0755      * There are only four interesting registers and they live next to each
0756      * other so we can use the relative address, compared to the smallest
0757      * one as the index into driver storage.
0758      */
0759     i = (i915_mmio_reg_offset(reg) -
0760          i915_mmio_reg_offset(GEN6_GT_GFX_RC6_LOCKED)) / sizeof(u32);
0761     if (drm_WARN_ON_ONCE(&i915->drm, i >= ARRAY_SIZE(rc6->cur_residency)))
0762         return 0;
0763 
0764     fw_domains = intel_uncore_forcewake_for_reg(uncore, reg, FW_REG_READ);
0765 
0766     spin_lock_irqsave(&uncore->lock, flags);
0767     intel_uncore_forcewake_get__locked(uncore, fw_domains);
0768 
0769     /* On VLV and CHV, residency time is in CZ units rather than 1.28us */
0770     if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
0771         mul = 1000000;
0772         div = i915->czclk_freq;
0773         overflow_hw = BIT_ULL(40);
0774         time_hw = vlv_residency_raw(uncore, reg);
0775     } else {
0776         /* 833.33ns units on Gen9LP, 1.28us elsewhere. */
0777         if (IS_GEN9_LP(i915)) {
0778             mul = 10000;
0779             div = 12;
0780         } else {
0781             mul = 1280;
0782             div = 1;
0783         }
0784 
0785         overflow_hw = BIT_ULL(32);
0786         time_hw = intel_uncore_read_fw(uncore, reg);
0787     }
0788 
0789     /*
0790      * Counter wrap handling.
0791      *
0792      * But relying on a sufficient frequency of queries otherwise counters
0793      * can still wrap.
0794      */
0795     prev_hw = rc6->prev_hw_residency[i];
0796     rc6->prev_hw_residency[i] = time_hw;
0797 
0798     /* RC6 delta from last sample. */
0799     if (time_hw >= prev_hw)
0800         time_hw -= prev_hw;
0801     else
0802         time_hw += overflow_hw - prev_hw;
0803 
0804     /* Add delta to RC6 extended raw driver copy. */
0805     time_hw += rc6->cur_residency[i];
0806     rc6->cur_residency[i] = time_hw;
0807 
0808     intel_uncore_forcewake_put__locked(uncore, fw_domains);
0809     spin_unlock_irqrestore(&uncore->lock, flags);
0810 
0811     return mul_u64_u32_div(time_hw, mul, div);
0812 }
0813 
0814 u64 intel_rc6_residency_us(struct intel_rc6 *rc6, i915_reg_t reg)
0815 {
0816     return DIV_ROUND_UP_ULL(intel_rc6_residency_ns(rc6, reg), 1000);
0817 }
0818 
0819 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
0820 #include "selftest_rc6.c"
0821 #endif