Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: MIT
0002 /*
0003  * Copyright © 2019 Intel Corporation
0004  */
0005 
0006 #include <linux/string_helpers.h>
0007 
0008 #include "i915_drv.h"
0009 #include "intel_engine_regs.h"
0010 #include "intel_gt_regs.h"
0011 #include "intel_sseu.h"
0012 
0013 void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices,
0014              u8 max_subslices, u8 max_eus_per_subslice)
0015 {
0016     sseu->max_slices = max_slices;
0017     sseu->max_subslices = max_subslices;
0018     sseu->max_eus_per_subslice = max_eus_per_subslice;
0019 }
0020 
0021 unsigned int
0022 intel_sseu_subslice_total(const struct sseu_dev_info *sseu)
0023 {
0024     unsigned int i, total = 0;
0025 
0026     if (sseu->has_xehp_dss)
0027         return bitmap_weight(sseu->subslice_mask.xehp,
0028                      XEHP_BITMAP_BITS(sseu->subslice_mask));
0029 
0030     for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask.hsw); i++)
0031         total += hweight8(sseu->subslice_mask.hsw[i]);
0032 
0033     return total;
0034 }
0035 
0036 unsigned int
0037 intel_sseu_get_hsw_subslices(const struct sseu_dev_info *sseu, u8 slice)
0038 {
0039     WARN_ON(sseu->has_xehp_dss);
0040     if (WARN_ON(slice >= sseu->max_slices))
0041         return 0;
0042 
0043     return sseu->subslice_mask.hsw[slice];
0044 }
0045 
0046 static u16 sseu_get_eus(const struct sseu_dev_info *sseu, int slice,
0047             int subslice)
0048 {
0049     if (sseu->has_xehp_dss) {
0050         WARN_ON(slice > 0);
0051         return sseu->eu_mask.xehp[subslice];
0052     } else {
0053         return sseu->eu_mask.hsw[slice][subslice];
0054     }
0055 }
0056 
0057 static void sseu_set_eus(struct sseu_dev_info *sseu, int slice, int subslice,
0058              u16 eu_mask)
0059 {
0060     GEM_WARN_ON(eu_mask && __fls(eu_mask) >= sseu->max_eus_per_subslice);
0061     if (sseu->has_xehp_dss) {
0062         GEM_WARN_ON(slice > 0);
0063         sseu->eu_mask.xehp[subslice] = eu_mask;
0064     } else {
0065         sseu->eu_mask.hsw[slice][subslice] = eu_mask;
0066     }
0067 }
0068 
0069 static u16 compute_eu_total(const struct sseu_dev_info *sseu)
0070 {
0071     int s, ss, total = 0;
0072 
0073     for (s = 0; s < sseu->max_slices; s++)
0074         for (ss = 0; ss < sseu->max_subslices; ss++)
0075             if (sseu->has_xehp_dss)
0076                 total += hweight16(sseu->eu_mask.xehp[ss]);
0077             else
0078                 total += hweight16(sseu->eu_mask.hsw[s][ss]);
0079 
0080     return total;
0081 }
0082 
0083 /**
0084  * intel_sseu_copy_eumask_to_user - Copy EU mask into a userspace buffer
0085  * @to: Pointer to userspace buffer to copy to
0086  * @sseu: SSEU structure containing EU mask to copy
0087  *
0088  * Copies the EU mask to a userspace buffer in the format expected by
0089  * the query ioctl's topology queries.
0090  *
0091  * Returns the result of the copy_to_user() operation.
0092  */
0093 int intel_sseu_copy_eumask_to_user(void __user *to,
0094                    const struct sseu_dev_info *sseu)
0095 {
0096     u8 eu_mask[GEN_SS_MASK_SIZE * GEN_MAX_EU_STRIDE] = {};
0097     int eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice);
0098     int len = sseu->max_slices * sseu->max_subslices * eu_stride;
0099     int s, ss, i;
0100 
0101     for (s = 0; s < sseu->max_slices; s++) {
0102         for (ss = 0; ss < sseu->max_subslices; ss++) {
0103             int uapi_offset =
0104                 s * sseu->max_subslices * eu_stride +
0105                 ss * eu_stride;
0106             u16 mask = sseu_get_eus(sseu, s, ss);
0107 
0108             for (i = 0; i < eu_stride; i++)
0109                 eu_mask[uapi_offset + i] =
0110                     (mask >> (BITS_PER_BYTE * i)) & 0xff;
0111         }
0112     }
0113 
0114     return copy_to_user(to, eu_mask, len);
0115 }
0116 
0117 /**
0118  * intel_sseu_copy_ssmask_to_user - Copy subslice mask into a userspace buffer
0119  * @to: Pointer to userspace buffer to copy to
0120  * @sseu: SSEU structure containing subslice mask to copy
0121  *
0122  * Copies the subslice mask to a userspace buffer in the format expected by
0123  * the query ioctl's topology queries.
0124  *
0125  * Returns the result of the copy_to_user() operation.
0126  */
0127 int intel_sseu_copy_ssmask_to_user(void __user *to,
0128                    const struct sseu_dev_info *sseu)
0129 {
0130     u8 ss_mask[GEN_SS_MASK_SIZE] = {};
0131     int ss_stride = GEN_SSEU_STRIDE(sseu->max_subslices);
0132     int len = sseu->max_slices * ss_stride;
0133     int s, ss, i;
0134 
0135     for (s = 0; s < sseu->max_slices; s++) {
0136         for (ss = 0; ss < sseu->max_subslices; ss++) {
0137             i = s * ss_stride * BITS_PER_BYTE + ss;
0138 
0139             if (!intel_sseu_has_subslice(sseu, s, ss))
0140                 continue;
0141 
0142             ss_mask[i / BITS_PER_BYTE] |= BIT(i % BITS_PER_BYTE);
0143         }
0144     }
0145 
0146     return copy_to_user(to, ss_mask, len);
0147 }
0148 
0149 static void gen11_compute_sseu_info(struct sseu_dev_info *sseu,
0150                     u32 ss_en, u16 eu_en)
0151 {
0152     u32 valid_ss_mask = GENMASK(sseu->max_subslices - 1, 0);
0153     int ss;
0154 
0155     sseu->slice_mask |= BIT(0);
0156     sseu->subslice_mask.hsw[0] = ss_en & valid_ss_mask;
0157 
0158     for (ss = 0; ss < sseu->max_subslices; ss++)
0159         if (intel_sseu_has_subslice(sseu, 0, ss))
0160             sseu_set_eus(sseu, 0, ss, eu_en);
0161 
0162     sseu->eu_per_subslice = hweight16(eu_en);
0163     sseu->eu_total = compute_eu_total(sseu);
0164 }
0165 
0166 static void xehp_compute_sseu_info(struct sseu_dev_info *sseu,
0167                    u16 eu_en)
0168 {
0169     int ss;
0170 
0171     sseu->slice_mask |= BIT(0);
0172 
0173     bitmap_or(sseu->subslice_mask.xehp,
0174           sseu->compute_subslice_mask.xehp,
0175           sseu->geometry_subslice_mask.xehp,
0176           XEHP_BITMAP_BITS(sseu->subslice_mask));
0177 
0178     for (ss = 0; ss < sseu->max_subslices; ss++)
0179         if (intel_sseu_has_subslice(sseu, 0, ss))
0180             sseu_set_eus(sseu, 0, ss, eu_en);
0181 
0182     sseu->eu_per_subslice = hweight16(eu_en);
0183     sseu->eu_total = compute_eu_total(sseu);
0184 }
0185 
0186 static void
0187 xehp_load_dss_mask(struct intel_uncore *uncore,
0188            intel_sseu_ss_mask_t *ssmask,
0189            int numregs,
0190            ...)
0191 {
0192     va_list argp;
0193     u32 fuse_val[I915_MAX_SS_FUSE_REGS] = {};
0194     int i;
0195 
0196     if (WARN_ON(numregs > I915_MAX_SS_FUSE_REGS))
0197         numregs = I915_MAX_SS_FUSE_REGS;
0198 
0199     va_start(argp, numregs);
0200     for (i = 0; i < numregs; i++)
0201         fuse_val[i] = intel_uncore_read(uncore, va_arg(argp, i915_reg_t));
0202     va_end(argp);
0203 
0204     bitmap_from_arr32(ssmask->xehp, fuse_val, numregs * 32);
0205 }
0206 
0207 static void xehp_sseu_info_init(struct intel_gt *gt)
0208 {
0209     struct sseu_dev_info *sseu = &gt->info.sseu;
0210     struct intel_uncore *uncore = gt->uncore;
0211     u16 eu_en = 0;
0212     u8 eu_en_fuse;
0213     int num_compute_regs, num_geometry_regs;
0214     int eu;
0215 
0216     if (IS_PONTEVECCHIO(gt->i915)) {
0217         num_geometry_regs = 0;
0218         num_compute_regs = 2;
0219     } else {
0220         num_geometry_regs = 1;
0221         num_compute_regs = 1;
0222     }
0223 
0224     /*
0225      * The concept of slice has been removed in Xe_HP.  To be compatible
0226      * with prior generations, assume a single slice across the entire
0227      * device. Then calculate out the DSS for each workload type within
0228      * that software slice.
0229      */
0230     intel_sseu_set_info(sseu, 1,
0231                 32 * max(num_geometry_regs, num_compute_regs),
0232                 HAS_ONE_EU_PER_FUSE_BIT(gt->i915) ? 8 : 16);
0233     sseu->has_xehp_dss = 1;
0234 
0235     xehp_load_dss_mask(uncore, &sseu->geometry_subslice_mask,
0236                num_geometry_regs,
0237                GEN12_GT_GEOMETRY_DSS_ENABLE);
0238     xehp_load_dss_mask(uncore, &sseu->compute_subslice_mask,
0239                num_compute_regs,
0240                GEN12_GT_COMPUTE_DSS_ENABLE,
0241                XEHPC_GT_COMPUTE_DSS_ENABLE_EXT);
0242 
0243     eu_en_fuse = intel_uncore_read(uncore, XEHP_EU_ENABLE) & XEHP_EU_ENA_MASK;
0244 
0245     if (HAS_ONE_EU_PER_FUSE_BIT(gt->i915))
0246         eu_en = eu_en_fuse;
0247     else
0248         for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++)
0249             if (eu_en_fuse & BIT(eu))
0250                 eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1);
0251 
0252     xehp_compute_sseu_info(sseu, eu_en);
0253 }
0254 
0255 static void gen12_sseu_info_init(struct intel_gt *gt)
0256 {
0257     struct sseu_dev_info *sseu = &gt->info.sseu;
0258     struct intel_uncore *uncore = gt->uncore;
0259     u32 g_dss_en;
0260     u16 eu_en = 0;
0261     u8 eu_en_fuse;
0262     u8 s_en;
0263     int eu;
0264 
0265     /*
0266      * Gen12 has Dual-Subslices, which behave similarly to 2 gen11 SS.
0267      * Instead of splitting these, provide userspace with an array
0268      * of DSS to more closely represent the hardware resource.
0269      */
0270     intel_sseu_set_info(sseu, 1, 6, 16);
0271 
0272     /*
0273      * Although gen12 architecture supported multiple slices, TGL, RKL,
0274      * DG1, and ADL only had a single slice.
0275      */
0276     s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) &
0277         GEN11_GT_S_ENA_MASK;
0278     drm_WARN_ON(&gt->i915->drm, s_en != 0x1);
0279 
0280     g_dss_en = intel_uncore_read(uncore, GEN12_GT_GEOMETRY_DSS_ENABLE);
0281 
0282     /* one bit per pair of EUs */
0283     eu_en_fuse = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) &
0284                GEN11_EU_DIS_MASK);
0285 
0286     for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++)
0287         if (eu_en_fuse & BIT(eu))
0288             eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1);
0289 
0290     gen11_compute_sseu_info(sseu, g_dss_en, eu_en);
0291 
0292     /* TGL only supports slice-level power gating */
0293     sseu->has_slice_pg = 1;
0294 }
0295 
0296 static void gen11_sseu_info_init(struct intel_gt *gt)
0297 {
0298     struct sseu_dev_info *sseu = &gt->info.sseu;
0299     struct intel_uncore *uncore = gt->uncore;
0300     u32 ss_en;
0301     u8 eu_en;
0302     u8 s_en;
0303 
0304     if (IS_JSL_EHL(gt->i915))
0305         intel_sseu_set_info(sseu, 1, 4, 8);
0306     else
0307         intel_sseu_set_info(sseu, 1, 8, 8);
0308 
0309     /*
0310      * Although gen11 architecture supported multiple slices, ICL and
0311      * EHL/JSL only had a single slice in practice.
0312      */
0313     s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) &
0314         GEN11_GT_S_ENA_MASK;
0315     drm_WARN_ON(&gt->i915->drm, s_en != 0x1);
0316 
0317     ss_en = ~intel_uncore_read(uncore, GEN11_GT_SUBSLICE_DISABLE);
0318 
0319     eu_en = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) &
0320           GEN11_EU_DIS_MASK);
0321 
0322     gen11_compute_sseu_info(sseu, ss_en, eu_en);
0323 
0324     /* ICL has no power gating restrictions. */
0325     sseu->has_slice_pg = 1;
0326     sseu->has_subslice_pg = 1;
0327     sseu->has_eu_pg = 1;
0328 }
0329 
0330 static void cherryview_sseu_info_init(struct intel_gt *gt)
0331 {
0332     struct sseu_dev_info *sseu = &gt->info.sseu;
0333     u32 fuse;
0334 
0335     fuse = intel_uncore_read(gt->uncore, CHV_FUSE_GT);
0336 
0337     sseu->slice_mask = BIT(0);
0338     intel_sseu_set_info(sseu, 1, 2, 8);
0339 
0340     if (!(fuse & CHV_FGT_DISABLE_SS0)) {
0341         u8 disabled_mask =
0342             ((fuse & CHV_FGT_EU_DIS_SS0_R0_MASK) >>
0343              CHV_FGT_EU_DIS_SS0_R0_SHIFT) |
0344             (((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >>
0345               CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4);
0346 
0347         sseu->subslice_mask.hsw[0] |= BIT(0);
0348         sseu_set_eus(sseu, 0, 0, ~disabled_mask & 0xFF);
0349     }
0350 
0351     if (!(fuse & CHV_FGT_DISABLE_SS1)) {
0352         u8 disabled_mask =
0353             ((fuse & CHV_FGT_EU_DIS_SS1_R0_MASK) >>
0354              CHV_FGT_EU_DIS_SS1_R0_SHIFT) |
0355             (((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >>
0356               CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4);
0357 
0358         sseu->subslice_mask.hsw[0] |= BIT(1);
0359         sseu_set_eus(sseu, 0, 1, ~disabled_mask & 0xFF);
0360     }
0361 
0362     sseu->eu_total = compute_eu_total(sseu);
0363 
0364     /*
0365      * CHV expected to always have a uniform distribution of EU
0366      * across subslices.
0367      */
0368     sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ?
0369         sseu->eu_total /
0370         intel_sseu_subslice_total(sseu) :
0371         0;
0372     /*
0373      * CHV supports subslice power gating on devices with more than
0374      * one subslice, and supports EU power gating on devices with
0375      * more than one EU pair per subslice.
0376      */
0377     sseu->has_slice_pg = 0;
0378     sseu->has_subslice_pg = intel_sseu_subslice_total(sseu) > 1;
0379     sseu->has_eu_pg = (sseu->eu_per_subslice > 2);
0380 }
0381 
0382 static void gen9_sseu_info_init(struct intel_gt *gt)
0383 {
0384     struct drm_i915_private *i915 = gt->i915;
0385     struct intel_device_info *info = mkwrite_device_info(i915);
0386     struct sseu_dev_info *sseu = &gt->info.sseu;
0387     struct intel_uncore *uncore = gt->uncore;
0388     u32 fuse2, eu_disable, subslice_mask;
0389     const u8 eu_mask = 0xff;
0390     int s, ss;
0391 
0392     fuse2 = intel_uncore_read(uncore, GEN8_FUSE2);
0393     sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
0394 
0395     /* BXT has a single slice and at most 3 subslices. */
0396     intel_sseu_set_info(sseu, IS_GEN9_LP(i915) ? 1 : 3,
0397                 IS_GEN9_LP(i915) ? 3 : 4, 8);
0398 
0399     /*
0400      * The subslice disable field is global, i.e. it applies
0401      * to each of the enabled slices.
0402      */
0403     subslice_mask = (1 << sseu->max_subslices) - 1;
0404     subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >>
0405                GEN9_F2_SS_DIS_SHIFT);
0406 
0407     /*
0408      * Iterate through enabled slices and subslices to
0409      * count the total enabled EU.
0410      */
0411     for (s = 0; s < sseu->max_slices; s++) {
0412         if (!(sseu->slice_mask & BIT(s)))
0413             /* skip disabled slice */
0414             continue;
0415 
0416         sseu->subslice_mask.hsw[s] = subslice_mask;
0417 
0418         eu_disable = intel_uncore_read(uncore, GEN9_EU_DISABLE(s));
0419         for (ss = 0; ss < sseu->max_subslices; ss++) {
0420             int eu_per_ss;
0421             u8 eu_disabled_mask;
0422 
0423             if (!intel_sseu_has_subslice(sseu, s, ss))
0424                 /* skip disabled subslice */
0425                 continue;
0426 
0427             eu_disabled_mask = (eu_disable >> (ss * 8)) & eu_mask;
0428 
0429             sseu_set_eus(sseu, s, ss, ~eu_disabled_mask & eu_mask);
0430 
0431             eu_per_ss = sseu->max_eus_per_subslice -
0432                 hweight8(eu_disabled_mask);
0433 
0434             /*
0435              * Record which subslice(s) has(have) 7 EUs. we
0436              * can tune the hash used to spread work among
0437              * subslices if they are unbalanced.
0438              */
0439             if (eu_per_ss == 7)
0440                 sseu->subslice_7eu[s] |= BIT(ss);
0441         }
0442     }
0443 
0444     sseu->eu_total = compute_eu_total(sseu);
0445 
0446     /*
0447      * SKL is expected to always have a uniform distribution
0448      * of EU across subslices with the exception that any one
0449      * EU in any one subslice may be fused off for die
0450      * recovery. BXT is expected to be perfectly uniform in EU
0451      * distribution.
0452      */
0453     sseu->eu_per_subslice =
0454         intel_sseu_subslice_total(sseu) ?
0455         DIV_ROUND_UP(sseu->eu_total, intel_sseu_subslice_total(sseu)) :
0456         0;
0457 
0458     /*
0459      * SKL+ supports slice power gating on devices with more than
0460      * one slice, and supports EU power gating on devices with
0461      * more than one EU pair per subslice. BXT+ supports subslice
0462      * power gating on devices with more than one subslice, and
0463      * supports EU power gating on devices with more than one EU
0464      * pair per subslice.
0465      */
0466     sseu->has_slice_pg =
0467         !IS_GEN9_LP(i915) && hweight8(sseu->slice_mask) > 1;
0468     sseu->has_subslice_pg =
0469         IS_GEN9_LP(i915) && intel_sseu_subslice_total(sseu) > 1;
0470     sseu->has_eu_pg = sseu->eu_per_subslice > 2;
0471 
0472     if (IS_GEN9_LP(i915)) {
0473 #define IS_SS_DISABLED(ss)  (!(sseu->subslice_mask.hsw[0] & BIT(ss)))
0474         info->has_pooled_eu = hweight8(sseu->subslice_mask.hsw[0]) == 3;
0475 
0476         sseu->min_eu_in_pool = 0;
0477         if (info->has_pooled_eu) {
0478             if (IS_SS_DISABLED(2) || IS_SS_DISABLED(0))
0479                 sseu->min_eu_in_pool = 3;
0480             else if (IS_SS_DISABLED(1))
0481                 sseu->min_eu_in_pool = 6;
0482             else
0483                 sseu->min_eu_in_pool = 9;
0484         }
0485 #undef IS_SS_DISABLED
0486     }
0487 }
0488 
0489 static void bdw_sseu_info_init(struct intel_gt *gt)
0490 {
0491     struct sseu_dev_info *sseu = &gt->info.sseu;
0492     struct intel_uncore *uncore = gt->uncore;
0493     int s, ss;
0494     u32 fuse2, subslice_mask, eu_disable[3]; /* s_max */
0495     u32 eu_disable0, eu_disable1, eu_disable2;
0496 
0497     fuse2 = intel_uncore_read(uncore, GEN8_FUSE2);
0498     sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
0499     intel_sseu_set_info(sseu, 3, 3, 8);
0500 
0501     /*
0502      * The subslice disable field is global, i.e. it applies
0503      * to each of the enabled slices.
0504      */
0505     subslice_mask = GENMASK(sseu->max_subslices - 1, 0);
0506     subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >>
0507                GEN8_F2_SS_DIS_SHIFT);
0508     eu_disable0 = intel_uncore_read(uncore, GEN8_EU_DISABLE0);
0509     eu_disable1 = intel_uncore_read(uncore, GEN8_EU_DISABLE1);
0510     eu_disable2 = intel_uncore_read(uncore, GEN8_EU_DISABLE2);
0511     eu_disable[0] = eu_disable0 & GEN8_EU_DIS0_S0_MASK;
0512     eu_disable[1] = (eu_disable0 >> GEN8_EU_DIS0_S1_SHIFT) |
0513         ((eu_disable1 & GEN8_EU_DIS1_S1_MASK) <<
0514          (32 - GEN8_EU_DIS0_S1_SHIFT));
0515     eu_disable[2] = (eu_disable1 >> GEN8_EU_DIS1_S2_SHIFT) |
0516         ((eu_disable2 & GEN8_EU_DIS2_S2_MASK) <<
0517          (32 - GEN8_EU_DIS1_S2_SHIFT));
0518 
0519     /*
0520      * Iterate through enabled slices and subslices to
0521      * count the total enabled EU.
0522      */
0523     for (s = 0; s < sseu->max_slices; s++) {
0524         if (!(sseu->slice_mask & BIT(s)))
0525             /* skip disabled slice */
0526             continue;
0527 
0528         sseu->subslice_mask.hsw[s] = subslice_mask;
0529 
0530         for (ss = 0; ss < sseu->max_subslices; ss++) {
0531             u8 eu_disabled_mask;
0532             u32 n_disabled;
0533 
0534             if (!intel_sseu_has_subslice(sseu, s, ss))
0535                 /* skip disabled subslice */
0536                 continue;
0537 
0538             eu_disabled_mask =
0539                 eu_disable[s] >> (ss * sseu->max_eus_per_subslice);
0540 
0541             sseu_set_eus(sseu, s, ss, ~eu_disabled_mask & 0xFF);
0542 
0543             n_disabled = hweight8(eu_disabled_mask);
0544 
0545             /*
0546              * Record which subslices have 7 EUs.
0547              */
0548             if (sseu->max_eus_per_subslice - n_disabled == 7)
0549                 sseu->subslice_7eu[s] |= 1 << ss;
0550         }
0551     }
0552 
0553     sseu->eu_total = compute_eu_total(sseu);
0554 
0555     /*
0556      * BDW is expected to always have a uniform distribution of EU across
0557      * subslices with the exception that any one EU in any one subslice may
0558      * be fused off for die recovery.
0559      */
0560     sseu->eu_per_subslice =
0561         intel_sseu_subslice_total(sseu) ?
0562         DIV_ROUND_UP(sseu->eu_total, intel_sseu_subslice_total(sseu)) :
0563         0;
0564 
0565     /*
0566      * BDW supports slice power gating on devices with more than
0567      * one slice.
0568      */
0569     sseu->has_slice_pg = hweight8(sseu->slice_mask) > 1;
0570     sseu->has_subslice_pg = 0;
0571     sseu->has_eu_pg = 0;
0572 }
0573 
0574 static void hsw_sseu_info_init(struct intel_gt *gt)
0575 {
0576     struct drm_i915_private *i915 = gt->i915;
0577     struct sseu_dev_info *sseu = &gt->info.sseu;
0578     u32 fuse1;
0579     u8 subslice_mask = 0;
0580     int s, ss;
0581 
0582     /*
0583      * There isn't a register to tell us how many slices/subslices. We
0584      * work off the PCI-ids here.
0585      */
0586     switch (INTEL_INFO(i915)->gt) {
0587     default:
0588         MISSING_CASE(INTEL_INFO(i915)->gt);
0589         fallthrough;
0590     case 1:
0591         sseu->slice_mask = BIT(0);
0592         subslice_mask = BIT(0);
0593         break;
0594     case 2:
0595         sseu->slice_mask = BIT(0);
0596         subslice_mask = BIT(0) | BIT(1);
0597         break;
0598     case 3:
0599         sseu->slice_mask = BIT(0) | BIT(1);
0600         subslice_mask = BIT(0) | BIT(1);
0601         break;
0602     }
0603 
0604     fuse1 = intel_uncore_read(gt->uncore, HSW_PAVP_FUSE1);
0605     switch (REG_FIELD_GET(HSW_F1_EU_DIS_MASK, fuse1)) {
0606     default:
0607         MISSING_CASE(REG_FIELD_GET(HSW_F1_EU_DIS_MASK, fuse1));
0608         fallthrough;
0609     case HSW_F1_EU_DIS_10EUS:
0610         sseu->eu_per_subslice = 10;
0611         break;
0612     case HSW_F1_EU_DIS_8EUS:
0613         sseu->eu_per_subslice = 8;
0614         break;
0615     case HSW_F1_EU_DIS_6EUS:
0616         sseu->eu_per_subslice = 6;
0617         break;
0618     }
0619 
0620     intel_sseu_set_info(sseu, hweight8(sseu->slice_mask),
0621                 hweight8(subslice_mask),
0622                 sseu->eu_per_subslice);
0623 
0624     for (s = 0; s < sseu->max_slices; s++) {
0625         sseu->subslice_mask.hsw[s] = subslice_mask;
0626 
0627         for (ss = 0; ss < sseu->max_subslices; ss++) {
0628             sseu_set_eus(sseu, s, ss,
0629                      (1UL << sseu->eu_per_subslice) - 1);
0630         }
0631     }
0632 
0633     sseu->eu_total = compute_eu_total(sseu);
0634 
0635     /* No powergating for you. */
0636     sseu->has_slice_pg = 0;
0637     sseu->has_subslice_pg = 0;
0638     sseu->has_eu_pg = 0;
0639 }
0640 
0641 void intel_sseu_info_init(struct intel_gt *gt)
0642 {
0643     struct drm_i915_private *i915 = gt->i915;
0644 
0645     if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
0646         xehp_sseu_info_init(gt);
0647     else if (GRAPHICS_VER(i915) >= 12)
0648         gen12_sseu_info_init(gt);
0649     else if (GRAPHICS_VER(i915) >= 11)
0650         gen11_sseu_info_init(gt);
0651     else if (GRAPHICS_VER(i915) >= 9)
0652         gen9_sseu_info_init(gt);
0653     else if (IS_BROADWELL(i915))
0654         bdw_sseu_info_init(gt);
0655     else if (IS_CHERRYVIEW(i915))
0656         cherryview_sseu_info_init(gt);
0657     else if (IS_HASWELL(i915))
0658         hsw_sseu_info_init(gt);
0659 }
0660 
0661 u32 intel_sseu_make_rpcs(struct intel_gt *gt,
0662              const struct intel_sseu *req_sseu)
0663 {
0664     struct drm_i915_private *i915 = gt->i915;
0665     const struct sseu_dev_info *sseu = &gt->info.sseu;
0666     bool subslice_pg = sseu->has_subslice_pg;
0667     u8 slices, subslices;
0668     u32 rpcs = 0;
0669 
0670     /*
0671      * No explicit RPCS request is needed to ensure full
0672      * slice/subslice/EU enablement prior to Gen9.
0673      */
0674     if (GRAPHICS_VER(i915) < 9)
0675         return 0;
0676 
0677     /*
0678      * If i915/perf is active, we want a stable powergating configuration
0679      * on the system. Use the configuration pinned by i915/perf.
0680      */
0681     if (i915->perf.exclusive_stream)
0682         req_sseu = &i915->perf.sseu;
0683 
0684     slices = hweight8(req_sseu->slice_mask);
0685     subslices = hweight8(req_sseu->subslice_mask);
0686 
0687     /*
0688      * Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits
0689      * wide and Icelake has up to eight subslices, specfial programming is
0690      * needed in order to correctly enable all subslices.
0691      *
0692      * According to documentation software must consider the configuration
0693      * as 2x4x8 and hardware will translate this to 1x8x8.
0694      *
0695      * Furthemore, even though SScount is three bits, maximum documented
0696      * value for it is four. From this some rules/restrictions follow:
0697      *
0698      * 1.
0699      * If enabled subslice count is greater than four, two whole slices must
0700      * be enabled instead.
0701      *
0702      * 2.
0703      * When more than one slice is enabled, hardware ignores the subslice
0704      * count altogether.
0705      *
0706      * From these restrictions it follows that it is not possible to enable
0707      * a count of subslices between the SScount maximum of four restriction,
0708      * and the maximum available number on a particular SKU. Either all
0709      * subslices are enabled, or a count between one and four on the first
0710      * slice.
0711      */
0712     if (GRAPHICS_VER(i915) == 11 &&
0713         slices == 1 &&
0714         subslices > min_t(u8, 4, hweight8(sseu->subslice_mask.hsw[0]) / 2)) {
0715         GEM_BUG_ON(subslices & 1);
0716 
0717         subslice_pg = false;
0718         slices *= 2;
0719     }
0720 
0721     /*
0722      * Starting in Gen9, render power gating can leave
0723      * slice/subslice/EU in a partially enabled state. We
0724      * must make an explicit request through RPCS for full
0725      * enablement.
0726      */
0727     if (sseu->has_slice_pg) {
0728         u32 mask, val = slices;
0729 
0730         if (GRAPHICS_VER(i915) >= 11) {
0731             mask = GEN11_RPCS_S_CNT_MASK;
0732             val <<= GEN11_RPCS_S_CNT_SHIFT;
0733         } else {
0734             mask = GEN8_RPCS_S_CNT_MASK;
0735             val <<= GEN8_RPCS_S_CNT_SHIFT;
0736         }
0737 
0738         GEM_BUG_ON(val & ~mask);
0739         val &= mask;
0740 
0741         rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_S_CNT_ENABLE | val;
0742     }
0743 
0744     if (subslice_pg) {
0745         u32 val = subslices;
0746 
0747         val <<= GEN8_RPCS_SS_CNT_SHIFT;
0748 
0749         GEM_BUG_ON(val & ~GEN8_RPCS_SS_CNT_MASK);
0750         val &= GEN8_RPCS_SS_CNT_MASK;
0751 
0752         rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_SS_CNT_ENABLE | val;
0753     }
0754 
0755     if (sseu->has_eu_pg) {
0756         u32 val;
0757 
0758         val = req_sseu->min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT;
0759         GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK);
0760         val &= GEN8_RPCS_EU_MIN_MASK;
0761 
0762         rpcs |= val;
0763 
0764         val = req_sseu->max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT;
0765         GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK);
0766         val &= GEN8_RPCS_EU_MAX_MASK;
0767 
0768         rpcs |= val;
0769 
0770         rpcs |= GEN8_RPCS_ENABLE;
0771     }
0772 
0773     return rpcs;
0774 }
0775 
0776 void intel_sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p)
0777 {
0778     int s;
0779 
0780     if (sseu->has_xehp_dss) {
0781         drm_printf(p, "subslice total: %u\n",
0782                intel_sseu_subslice_total(sseu));
0783         drm_printf(p, "geometry dss mask=%*pb\n",
0784                XEHP_BITMAP_BITS(sseu->geometry_subslice_mask),
0785                sseu->geometry_subslice_mask.xehp);
0786         drm_printf(p, "compute dss mask=%*pb\n",
0787                XEHP_BITMAP_BITS(sseu->compute_subslice_mask),
0788                sseu->compute_subslice_mask.xehp);
0789     } else {
0790         drm_printf(p, "slice total: %u, mask=%04x\n",
0791                hweight8(sseu->slice_mask), sseu->slice_mask);
0792         drm_printf(p, "subslice total: %u\n",
0793                intel_sseu_subslice_total(sseu));
0794 
0795         for (s = 0; s < sseu->max_slices; s++) {
0796             u8 ss_mask = sseu->subslice_mask.hsw[s];
0797 
0798             drm_printf(p, "slice%d: %u subslices, mask=%08x\n",
0799                    s, hweight8(ss_mask), ss_mask);
0800         }
0801     }
0802 
0803     drm_printf(p, "EU total: %u\n", sseu->eu_total);
0804     drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice);
0805     drm_printf(p, "has slice power gating: %s\n",
0806            str_yes_no(sseu->has_slice_pg));
0807     drm_printf(p, "has subslice power gating: %s\n",
0808            str_yes_no(sseu->has_subslice_pg));
0809     drm_printf(p, "has EU power gating: %s\n",
0810            str_yes_no(sseu->has_eu_pg));
0811 }
0812 
0813 static void sseu_print_hsw_topology(const struct sseu_dev_info *sseu,
0814                     struct drm_printer *p)
0815 {
0816     int s, ss;
0817 
0818     for (s = 0; s < sseu->max_slices; s++) {
0819         u8 ss_mask = sseu->subslice_mask.hsw[s];
0820 
0821         drm_printf(p, "slice%d: %u subslice(s) (0x%08x):\n",
0822                s, hweight8(ss_mask), ss_mask);
0823 
0824         for (ss = 0; ss < sseu->max_subslices; ss++) {
0825             u16 enabled_eus = sseu_get_eus(sseu, s, ss);
0826 
0827             drm_printf(p, "\tsubslice%d: %u EUs (0x%hx)\n",
0828                    ss, hweight16(enabled_eus), enabled_eus);
0829         }
0830     }
0831 }
0832 
0833 static void sseu_print_xehp_topology(const struct sseu_dev_info *sseu,
0834                      struct drm_printer *p)
0835 {
0836     int dss;
0837 
0838     for (dss = 0; dss < sseu->max_subslices; dss++) {
0839         u16 enabled_eus = sseu_get_eus(sseu, 0, dss);
0840 
0841         drm_printf(p, "DSS_%02d: G:%3s C:%3s, %2u EUs (0x%04hx)\n", dss,
0842                str_yes_no(test_bit(dss, sseu->geometry_subslice_mask.xehp)),
0843                str_yes_no(test_bit(dss, sseu->compute_subslice_mask.xehp)),
0844                hweight16(enabled_eus), enabled_eus);
0845     }
0846 }
0847 
0848 void intel_sseu_print_topology(struct drm_i915_private *i915,
0849                    const struct sseu_dev_info *sseu,
0850                    struct drm_printer *p)
0851 {
0852     if (sseu->max_slices == 0) {
0853         drm_printf(p, "Unavailable\n");
0854     } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
0855         sseu_print_xehp_topology(sseu, p);
0856     } else {
0857         sseu_print_hsw_topology(sseu, p);
0858     }
0859 }
0860 
0861 void intel_sseu_print_ss_info(const char *type,
0862                   const struct sseu_dev_info *sseu,
0863                   struct seq_file *m)
0864 {
0865     int s;
0866 
0867     if (sseu->has_xehp_dss) {
0868         seq_printf(m, "  %s Geometry DSS: %u\n", type,
0869                bitmap_weight(sseu->geometry_subslice_mask.xehp,
0870                      XEHP_BITMAP_BITS(sseu->geometry_subslice_mask)));
0871         seq_printf(m, "  %s Compute DSS: %u\n", type,
0872                bitmap_weight(sseu->compute_subslice_mask.xehp,
0873                      XEHP_BITMAP_BITS(sseu->compute_subslice_mask)));
0874     } else {
0875         for (s = 0; s < fls(sseu->slice_mask); s++)
0876             seq_printf(m, "  %s Slice%i subslices: %u\n", type,
0877                    s, hweight8(sseu->subslice_mask.hsw[s]));
0878     }
0879 }
0880 
0881 u16 intel_slicemask_from_xehp_dssmask(intel_sseu_ss_mask_t dss_mask,
0882                       int dss_per_slice)
0883 {
0884     intel_sseu_ss_mask_t per_slice_mask = {};
0885     unsigned long slice_mask = 0;
0886     int i;
0887 
0888     WARN_ON(DIV_ROUND_UP(XEHP_BITMAP_BITS(dss_mask), dss_per_slice) >
0889         8 * sizeof(slice_mask));
0890 
0891     bitmap_fill(per_slice_mask.xehp, dss_per_slice);
0892     for (i = 0; !bitmap_empty(dss_mask.xehp, XEHP_BITMAP_BITS(dss_mask)); i++) {
0893         if (bitmap_intersects(dss_mask.xehp, per_slice_mask.xehp, dss_per_slice))
0894             slice_mask |= BIT(i);
0895 
0896         bitmap_shift_right(dss_mask.xehp, dss_mask.xehp, dss_per_slice,
0897                    XEHP_BITMAP_BITS(dss_mask));
0898     }
0899 
0900     return slice_mask;
0901 }