0001
0002
0003
0004
0005
0006 #include "gem/i915_gem_lmem.h"
0007
0008 #include "gen8_engine_cs.h"
0009 #include "i915_drv.h"
0010 #include "i915_perf.h"
0011 #include "i915_reg.h"
0012 #include "intel_context.h"
0013 #include "intel_engine.h"
0014 #include "intel_engine_regs.h"
0015 #include "intel_gpu_commands.h"
0016 #include "intel_gt.h"
0017 #include "intel_gt_regs.h"
0018 #include "intel_lrc.h"
0019 #include "intel_lrc_reg.h"
0020 #include "intel_ring.h"
0021 #include "shmem_utils.h"
0022
0023 static void set_offsets(u32 *regs,
0024 const u8 *data,
0025 const struct intel_engine_cs *engine,
0026 bool close)
0027 #define NOP(x) (BIT(7) | (x))
0028 #define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6)))
0029 #define POSTED BIT(0)
0030 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
0031 #define REG16(x) \
0032 (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
0033 (((x) >> 2) & 0x7f)
0034 #define END 0
0035 {
0036 const u32 base = engine->mmio_base;
0037
0038 while (*data) {
0039 u8 count, flags;
0040
0041 if (*data & BIT(7)) {
0042 count = *data++ & ~BIT(7);
0043 regs += count;
0044 continue;
0045 }
0046
0047 count = *data & 0x3f;
0048 flags = *data >> 6;
0049 data++;
0050
0051 *regs = MI_LOAD_REGISTER_IMM(count);
0052 if (flags & POSTED)
0053 *regs |= MI_LRI_FORCE_POSTED;
0054 if (GRAPHICS_VER(engine->i915) >= 11)
0055 *regs |= MI_LRI_LRM_CS_MMIO;
0056 regs++;
0057
0058 GEM_BUG_ON(!count);
0059 do {
0060 u32 offset = 0;
0061 u8 v;
0062
0063 do {
0064 v = *data++;
0065 offset <<= 7;
0066 offset |= v & ~BIT(7);
0067 } while (v & BIT(7));
0068
0069 regs[0] = base + (offset << 2);
0070 regs += 2;
0071 } while (--count);
0072 }
0073
0074 if (close) {
0075
0076 *regs = MI_BATCH_BUFFER_END;
0077 if (GRAPHICS_VER(engine->i915) >= 11)
0078 *regs |= BIT(0);
0079 }
0080 }
0081
0082 static const u8 gen8_xcs_offsets[] = {
0083 NOP(1),
0084 LRI(11, 0),
0085 REG16(0x244),
0086 REG(0x034),
0087 REG(0x030),
0088 REG(0x038),
0089 REG(0x03c),
0090 REG(0x168),
0091 REG(0x140),
0092 REG(0x110),
0093 REG(0x11c),
0094 REG(0x114),
0095 REG(0x118),
0096
0097 NOP(9),
0098 LRI(9, 0),
0099 REG16(0x3a8),
0100 REG16(0x28c),
0101 REG16(0x288),
0102 REG16(0x284),
0103 REG16(0x280),
0104 REG16(0x27c),
0105 REG16(0x278),
0106 REG16(0x274),
0107 REG16(0x270),
0108
0109 NOP(13),
0110 LRI(2, 0),
0111 REG16(0x200),
0112 REG(0x028),
0113
0114 END
0115 };
0116
0117 static const u8 gen9_xcs_offsets[] = {
0118 NOP(1),
0119 LRI(14, POSTED),
0120 REG16(0x244),
0121 REG(0x034),
0122 REG(0x030),
0123 REG(0x038),
0124 REG(0x03c),
0125 REG(0x168),
0126 REG(0x140),
0127 REG(0x110),
0128 REG(0x11c),
0129 REG(0x114),
0130 REG(0x118),
0131 REG(0x1c0),
0132 REG(0x1c4),
0133 REG(0x1c8),
0134
0135 NOP(3),
0136 LRI(9, POSTED),
0137 REG16(0x3a8),
0138 REG16(0x28c),
0139 REG16(0x288),
0140 REG16(0x284),
0141 REG16(0x280),
0142 REG16(0x27c),
0143 REG16(0x278),
0144 REG16(0x274),
0145 REG16(0x270),
0146
0147 NOP(13),
0148 LRI(1, POSTED),
0149 REG16(0x200),
0150
0151 NOP(13),
0152 LRI(44, POSTED),
0153 REG(0x028),
0154 REG(0x09c),
0155 REG(0x0c0),
0156 REG(0x178),
0157 REG(0x17c),
0158 REG16(0x358),
0159 REG(0x170),
0160 REG(0x150),
0161 REG(0x154),
0162 REG(0x158),
0163 REG16(0x41c),
0164 REG16(0x600),
0165 REG16(0x604),
0166 REG16(0x608),
0167 REG16(0x60c),
0168 REG16(0x610),
0169 REG16(0x614),
0170 REG16(0x618),
0171 REG16(0x61c),
0172 REG16(0x620),
0173 REG16(0x624),
0174 REG16(0x628),
0175 REG16(0x62c),
0176 REG16(0x630),
0177 REG16(0x634),
0178 REG16(0x638),
0179 REG16(0x63c),
0180 REG16(0x640),
0181 REG16(0x644),
0182 REG16(0x648),
0183 REG16(0x64c),
0184 REG16(0x650),
0185 REG16(0x654),
0186 REG16(0x658),
0187 REG16(0x65c),
0188 REG16(0x660),
0189 REG16(0x664),
0190 REG16(0x668),
0191 REG16(0x66c),
0192 REG16(0x670),
0193 REG16(0x674),
0194 REG16(0x678),
0195 REG16(0x67c),
0196 REG(0x068),
0197
0198 END
0199 };
0200
0201 static const u8 gen12_xcs_offsets[] = {
0202 NOP(1),
0203 LRI(13, POSTED),
0204 REG16(0x244),
0205 REG(0x034),
0206 REG(0x030),
0207 REG(0x038),
0208 REG(0x03c),
0209 REG(0x168),
0210 REG(0x140),
0211 REG(0x110),
0212 REG(0x1c0),
0213 REG(0x1c4),
0214 REG(0x1c8),
0215 REG(0x180),
0216 REG16(0x2b4),
0217
0218 NOP(5),
0219 LRI(9, POSTED),
0220 REG16(0x3a8),
0221 REG16(0x28c),
0222 REG16(0x288),
0223 REG16(0x284),
0224 REG16(0x280),
0225 REG16(0x27c),
0226 REG16(0x278),
0227 REG16(0x274),
0228 REG16(0x270),
0229
0230 END
0231 };
0232
0233 static const u8 dg2_xcs_offsets[] = {
0234 NOP(1),
0235 LRI(15, POSTED),
0236 REG16(0x244),
0237 REG(0x034),
0238 REG(0x030),
0239 REG(0x038),
0240 REG(0x03c),
0241 REG(0x168),
0242 REG(0x140),
0243 REG(0x110),
0244 REG(0x1c0),
0245 REG(0x1c4),
0246 REG(0x1c8),
0247 REG(0x180),
0248 REG16(0x2b4),
0249 REG(0x120),
0250 REG(0x124),
0251
0252 NOP(1),
0253 LRI(9, POSTED),
0254 REG16(0x3a8),
0255 REG16(0x28c),
0256 REG16(0x288),
0257 REG16(0x284),
0258 REG16(0x280),
0259 REG16(0x27c),
0260 REG16(0x278),
0261 REG16(0x274),
0262 REG16(0x270),
0263
0264 END
0265 };
0266
0267 static const u8 gen8_rcs_offsets[] = {
0268 NOP(1),
0269 LRI(14, POSTED),
0270 REG16(0x244),
0271 REG(0x034),
0272 REG(0x030),
0273 REG(0x038),
0274 REG(0x03c),
0275 REG(0x168),
0276 REG(0x140),
0277 REG(0x110),
0278 REG(0x11c),
0279 REG(0x114),
0280 REG(0x118),
0281 REG(0x1c0),
0282 REG(0x1c4),
0283 REG(0x1c8),
0284
0285 NOP(3),
0286 LRI(9, POSTED),
0287 REG16(0x3a8),
0288 REG16(0x28c),
0289 REG16(0x288),
0290 REG16(0x284),
0291 REG16(0x280),
0292 REG16(0x27c),
0293 REG16(0x278),
0294 REG16(0x274),
0295 REG16(0x270),
0296
0297 NOP(13),
0298 LRI(1, 0),
0299 REG(0x0c8),
0300
0301 END
0302 };
0303
0304 static const u8 gen9_rcs_offsets[] = {
0305 NOP(1),
0306 LRI(14, POSTED),
0307 REG16(0x244),
0308 REG(0x34),
0309 REG(0x30),
0310 REG(0x38),
0311 REG(0x3c),
0312 REG(0x168),
0313 REG(0x140),
0314 REG(0x110),
0315 REG(0x11c),
0316 REG(0x114),
0317 REG(0x118),
0318 REG(0x1c0),
0319 REG(0x1c4),
0320 REG(0x1c8),
0321
0322 NOP(3),
0323 LRI(9, POSTED),
0324 REG16(0x3a8),
0325 REG16(0x28c),
0326 REG16(0x288),
0327 REG16(0x284),
0328 REG16(0x280),
0329 REG16(0x27c),
0330 REG16(0x278),
0331 REG16(0x274),
0332 REG16(0x270),
0333
0334 NOP(13),
0335 LRI(1, 0),
0336 REG(0xc8),
0337
0338 NOP(13),
0339 LRI(44, POSTED),
0340 REG(0x28),
0341 REG(0x9c),
0342 REG(0xc0),
0343 REG(0x178),
0344 REG(0x17c),
0345 REG16(0x358),
0346 REG(0x170),
0347 REG(0x150),
0348 REG(0x154),
0349 REG(0x158),
0350 REG16(0x41c),
0351 REG16(0x600),
0352 REG16(0x604),
0353 REG16(0x608),
0354 REG16(0x60c),
0355 REG16(0x610),
0356 REG16(0x614),
0357 REG16(0x618),
0358 REG16(0x61c),
0359 REG16(0x620),
0360 REG16(0x624),
0361 REG16(0x628),
0362 REG16(0x62c),
0363 REG16(0x630),
0364 REG16(0x634),
0365 REG16(0x638),
0366 REG16(0x63c),
0367 REG16(0x640),
0368 REG16(0x644),
0369 REG16(0x648),
0370 REG16(0x64c),
0371 REG16(0x650),
0372 REG16(0x654),
0373 REG16(0x658),
0374 REG16(0x65c),
0375 REG16(0x660),
0376 REG16(0x664),
0377 REG16(0x668),
0378 REG16(0x66c),
0379 REG16(0x670),
0380 REG16(0x674),
0381 REG16(0x678),
0382 REG16(0x67c),
0383 REG(0x68),
0384
0385 END
0386 };
0387
0388 static const u8 gen11_rcs_offsets[] = {
0389 NOP(1),
0390 LRI(15, POSTED),
0391 REG16(0x244),
0392 REG(0x034),
0393 REG(0x030),
0394 REG(0x038),
0395 REG(0x03c),
0396 REG(0x168),
0397 REG(0x140),
0398 REG(0x110),
0399 REG(0x11c),
0400 REG(0x114),
0401 REG(0x118),
0402 REG(0x1c0),
0403 REG(0x1c4),
0404 REG(0x1c8),
0405 REG(0x180),
0406
0407 NOP(1),
0408 LRI(9, POSTED),
0409 REG16(0x3a8),
0410 REG16(0x28c),
0411 REG16(0x288),
0412 REG16(0x284),
0413 REG16(0x280),
0414 REG16(0x27c),
0415 REG16(0x278),
0416 REG16(0x274),
0417 REG16(0x270),
0418
0419 LRI(1, POSTED),
0420 REG(0x1b0),
0421
0422 NOP(10),
0423 LRI(1, 0),
0424 REG(0x0c8),
0425
0426 END
0427 };
0428
0429 static const u8 gen12_rcs_offsets[] = {
0430 NOP(1),
0431 LRI(13, POSTED),
0432 REG16(0x244),
0433 REG(0x034),
0434 REG(0x030),
0435 REG(0x038),
0436 REG(0x03c),
0437 REG(0x168),
0438 REG(0x140),
0439 REG(0x110),
0440 REG(0x1c0),
0441 REG(0x1c4),
0442 REG(0x1c8),
0443 REG(0x180),
0444 REG16(0x2b4),
0445
0446 NOP(5),
0447 LRI(9, POSTED),
0448 REG16(0x3a8),
0449 REG16(0x28c),
0450 REG16(0x288),
0451 REG16(0x284),
0452 REG16(0x280),
0453 REG16(0x27c),
0454 REG16(0x278),
0455 REG16(0x274),
0456 REG16(0x270),
0457
0458 LRI(3, POSTED),
0459 REG(0x1b0),
0460 REG16(0x5a8),
0461 REG16(0x5ac),
0462
0463 NOP(6),
0464 LRI(1, 0),
0465 REG(0x0c8),
0466 NOP(3 + 9 + 1),
0467
0468 LRI(51, POSTED),
0469 REG16(0x588),
0470 REG16(0x588),
0471 REG16(0x588),
0472 REG16(0x588),
0473 REG16(0x588),
0474 REG16(0x588),
0475 REG(0x028),
0476 REG(0x09c),
0477 REG(0x0c0),
0478 REG(0x178),
0479 REG(0x17c),
0480 REG16(0x358),
0481 REG(0x170),
0482 REG(0x150),
0483 REG(0x154),
0484 REG(0x158),
0485 REG16(0x41c),
0486 REG16(0x600),
0487 REG16(0x604),
0488 REG16(0x608),
0489 REG16(0x60c),
0490 REG16(0x610),
0491 REG16(0x614),
0492 REG16(0x618),
0493 REG16(0x61c),
0494 REG16(0x620),
0495 REG16(0x624),
0496 REG16(0x628),
0497 REG16(0x62c),
0498 REG16(0x630),
0499 REG16(0x634),
0500 REG16(0x638),
0501 REG16(0x63c),
0502 REG16(0x640),
0503 REG16(0x644),
0504 REG16(0x648),
0505 REG16(0x64c),
0506 REG16(0x650),
0507 REG16(0x654),
0508 REG16(0x658),
0509 REG16(0x65c),
0510 REG16(0x660),
0511 REG16(0x664),
0512 REG16(0x668),
0513 REG16(0x66c),
0514 REG16(0x670),
0515 REG16(0x674),
0516 REG16(0x678),
0517 REG16(0x67c),
0518 REG(0x068),
0519 REG(0x084),
0520 NOP(1),
0521
0522 END
0523 };
0524
0525 static const u8 xehp_rcs_offsets[] = {
0526 NOP(1),
0527 LRI(13, POSTED),
0528 REG16(0x244),
0529 REG(0x034),
0530 REG(0x030),
0531 REG(0x038),
0532 REG(0x03c),
0533 REG(0x168),
0534 REG(0x140),
0535 REG(0x110),
0536 REG(0x1c0),
0537 REG(0x1c4),
0538 REG(0x1c8),
0539 REG(0x180),
0540 REG16(0x2b4),
0541
0542 NOP(5),
0543 LRI(9, POSTED),
0544 REG16(0x3a8),
0545 REG16(0x28c),
0546 REG16(0x288),
0547 REG16(0x284),
0548 REG16(0x280),
0549 REG16(0x27c),
0550 REG16(0x278),
0551 REG16(0x274),
0552 REG16(0x270),
0553
0554 LRI(3, POSTED),
0555 REG(0x1b0),
0556 REG16(0x5a8),
0557 REG16(0x5ac),
0558
0559 NOP(6),
0560 LRI(1, 0),
0561 REG(0x0c8),
0562
0563 END
0564 };
0565
0566 static const u8 dg2_rcs_offsets[] = {
0567 NOP(1),
0568 LRI(15, POSTED),
0569 REG16(0x244),
0570 REG(0x034),
0571 REG(0x030),
0572 REG(0x038),
0573 REG(0x03c),
0574 REG(0x168),
0575 REG(0x140),
0576 REG(0x110),
0577 REG(0x1c0),
0578 REG(0x1c4),
0579 REG(0x1c8),
0580 REG(0x180),
0581 REG16(0x2b4),
0582 REG(0x120),
0583 REG(0x124),
0584
0585 NOP(1),
0586 LRI(9, POSTED),
0587 REG16(0x3a8),
0588 REG16(0x28c),
0589 REG16(0x288),
0590 REG16(0x284),
0591 REG16(0x280),
0592 REG16(0x27c),
0593 REG16(0x278),
0594 REG16(0x274),
0595 REG16(0x270),
0596
0597 LRI(3, POSTED),
0598 REG(0x1b0),
0599 REG16(0x5a8),
0600 REG16(0x5ac),
0601
0602 NOP(6),
0603 LRI(1, 0),
0604 REG(0x0c8),
0605
0606 END
0607 };
0608
0609 #undef END
0610 #undef REG16
0611 #undef REG
0612 #undef LRI
0613 #undef NOP
0614
0615 static const u8 *reg_offsets(const struct intel_engine_cs *engine)
0616 {
0617
0618
0619
0620
0621
0622
0623 GEM_BUG_ON(GRAPHICS_VER(engine->i915) >= 12 &&
0624 !intel_engine_has_relative_mmio(engine));
0625
0626 if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) {
0627 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
0628 return dg2_rcs_offsets;
0629 else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
0630 return xehp_rcs_offsets;
0631 else if (GRAPHICS_VER(engine->i915) >= 12)
0632 return gen12_rcs_offsets;
0633 else if (GRAPHICS_VER(engine->i915) >= 11)
0634 return gen11_rcs_offsets;
0635 else if (GRAPHICS_VER(engine->i915) >= 9)
0636 return gen9_rcs_offsets;
0637 else
0638 return gen8_rcs_offsets;
0639 } else {
0640 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
0641 return dg2_xcs_offsets;
0642 else if (GRAPHICS_VER(engine->i915) >= 12)
0643 return gen12_xcs_offsets;
0644 else if (GRAPHICS_VER(engine->i915) >= 9)
0645 return gen9_xcs_offsets;
0646 else
0647 return gen8_xcs_offsets;
0648 }
0649 }
0650
0651 static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
0652 {
0653 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
0654 return 0x70;
0655 else if (GRAPHICS_VER(engine->i915) >= 12)
0656 return 0x60;
0657 else if (GRAPHICS_VER(engine->i915) >= 9)
0658 return 0x54;
0659 else if (engine->class == RENDER_CLASS)
0660 return 0x58;
0661 else
0662 return -1;
0663 }
0664
0665 static int lrc_ring_gpr0(const struct intel_engine_cs *engine)
0666 {
0667 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
0668 return 0x84;
0669 else if (GRAPHICS_VER(engine->i915) >= 12)
0670 return 0x74;
0671 else if (GRAPHICS_VER(engine->i915) >= 9)
0672 return 0x68;
0673 else if (engine->class == RENDER_CLASS)
0674 return 0xd8;
0675 else
0676 return -1;
0677 }
0678
0679 static int lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs *engine)
0680 {
0681 if (GRAPHICS_VER(engine->i915) >= 12)
0682 return 0x12;
0683 else if (GRAPHICS_VER(engine->i915) >= 9 || engine->class == RENDER_CLASS)
0684 return 0x18;
0685 else
0686 return -1;
0687 }
0688
0689 static int lrc_ring_indirect_ptr(const struct intel_engine_cs *engine)
0690 {
0691 int x;
0692
0693 x = lrc_ring_wa_bb_per_ctx(engine);
0694 if (x < 0)
0695 return x;
0696
0697 return x + 2;
0698 }
0699
0700 static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine)
0701 {
0702 int x;
0703
0704 x = lrc_ring_indirect_ptr(engine);
0705 if (x < 0)
0706 return x;
0707
0708 return x + 2;
0709 }
0710
0711 static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine)
0712 {
0713
0714 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
0715
0716
0717
0718
0719 return 0xc6;
0720 else if (engine->class != RENDER_CLASS)
0721 return -1;
0722 else if (GRAPHICS_VER(engine->i915) >= 12)
0723 return 0xb6;
0724 else if (GRAPHICS_VER(engine->i915) >= 11)
0725 return 0xaa;
0726 else
0727 return -1;
0728 }
0729
0730 static u32
0731 lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine)
0732 {
0733 switch (GRAPHICS_VER(engine->i915)) {
0734 default:
0735 MISSING_CASE(GRAPHICS_VER(engine->i915));
0736 fallthrough;
0737 case 12:
0738 return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
0739 case 11:
0740 return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
0741 case 9:
0742 return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
0743 case 8:
0744 return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
0745 }
0746 }
0747
0748 static void
0749 lrc_setup_indirect_ctx(u32 *regs,
0750 const struct intel_engine_cs *engine,
0751 u32 ctx_bb_ggtt_addr,
0752 u32 size)
0753 {
0754 GEM_BUG_ON(!size);
0755 GEM_BUG_ON(!IS_ALIGNED(size, CACHELINE_BYTES));
0756 GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -1);
0757 regs[lrc_ring_indirect_ptr(engine) + 1] =
0758 ctx_bb_ggtt_addr | (size / CACHELINE_BYTES);
0759
0760 GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -1);
0761 regs[lrc_ring_indirect_offset(engine) + 1] =
0762 lrc_ring_indirect_offset_default(engine) << 6;
0763 }
0764
0765 static void init_common_regs(u32 * const regs,
0766 const struct intel_context *ce,
0767 const struct intel_engine_cs *engine,
0768 bool inhibit)
0769 {
0770 u32 ctl;
0771
0772 ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
0773 ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
0774 if (inhibit)
0775 ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
0776 if (GRAPHICS_VER(engine->i915) < 11)
0777 ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
0778 CTX_CTRL_RS_CTX_ENABLE);
0779 regs[CTX_CONTEXT_CONTROL] = ctl;
0780
0781 regs[CTX_TIMESTAMP] = ce->stats.runtime.last;
0782 }
0783
0784 static void init_wa_bb_regs(u32 * const regs,
0785 const struct intel_engine_cs *engine)
0786 {
0787 const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
0788
0789 if (wa_ctx->per_ctx.size) {
0790 const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
0791
0792 GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1);
0793 regs[lrc_ring_wa_bb_per_ctx(engine) + 1] =
0794 (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
0795 }
0796
0797 if (wa_ctx->indirect_ctx.size) {
0798 lrc_setup_indirect_ctx(regs, engine,
0799 i915_ggtt_offset(wa_ctx->vma) +
0800 wa_ctx->indirect_ctx.offset,
0801 wa_ctx->indirect_ctx.size);
0802 }
0803 }
0804
0805 static void init_ppgtt_regs(u32 *regs, const struct i915_ppgtt *ppgtt)
0806 {
0807 if (i915_vm_is_4lvl(&ppgtt->vm)) {
0808
0809
0810
0811
0812 ASSIGN_CTX_PML4(ppgtt, regs);
0813 } else {
0814 ASSIGN_CTX_PDP(ppgtt, regs, 3);
0815 ASSIGN_CTX_PDP(ppgtt, regs, 2);
0816 ASSIGN_CTX_PDP(ppgtt, regs, 1);
0817 ASSIGN_CTX_PDP(ppgtt, regs, 0);
0818 }
0819 }
0820
0821 static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
0822 {
0823 if (i915_is_ggtt(vm))
0824 return i915_vm_to_ggtt(vm)->alias;
0825 else
0826 return i915_vm_to_ppgtt(vm);
0827 }
0828
0829 static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
0830 {
0831 int x;
0832
0833 x = lrc_ring_mi_mode(engine);
0834 if (x != -1) {
0835 regs[x + 1] &= ~STOP_RING;
0836 regs[x + 1] |= STOP_RING << 16;
0837 }
0838 }
0839
0840 static void __lrc_init_regs(u32 *regs,
0841 const struct intel_context *ce,
0842 const struct intel_engine_cs *engine,
0843 bool inhibit)
0844 {
0845
0846
0847
0848
0849
0850
0851
0852
0853
0854
0855
0856 if (inhibit)
0857 memset(regs, 0, PAGE_SIZE);
0858
0859 set_offsets(regs, reg_offsets(engine), engine, inhibit);
0860
0861 init_common_regs(regs, ce, engine, inhibit);
0862 init_ppgtt_regs(regs, vm_alias(ce->vm));
0863
0864 init_wa_bb_regs(regs, engine);
0865
0866 __reset_stop_ring(regs, engine);
0867 }
0868
0869 void lrc_init_regs(const struct intel_context *ce,
0870 const struct intel_engine_cs *engine,
0871 bool inhibit)
0872 {
0873 __lrc_init_regs(ce->lrc_reg_state, ce, engine, inhibit);
0874 }
0875
0876 void lrc_reset_regs(const struct intel_context *ce,
0877 const struct intel_engine_cs *engine)
0878 {
0879 __reset_stop_ring(ce->lrc_reg_state, engine);
0880 }
0881
0882 static void
0883 set_redzone(void *vaddr, const struct intel_engine_cs *engine)
0884 {
0885 if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
0886 return;
0887
0888 vaddr += engine->context_size;
0889
0890 memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE);
0891 }
0892
0893 static void
0894 check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
0895 {
0896 if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
0897 return;
0898
0899 vaddr += engine->context_size;
0900
0901 if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE))
0902 drm_err_once(&engine->i915->drm,
0903 "%s context redzone overwritten!\n",
0904 engine->name);
0905 }
0906
0907 static u32 context_wa_bb_offset(const struct intel_context *ce)
0908 {
0909 return PAGE_SIZE * ce->wa_bb_page;
0910 }
0911
0912 static u32 *context_indirect_bb(const struct intel_context *ce)
0913 {
0914 void *ptr;
0915
0916 GEM_BUG_ON(!ce->wa_bb_page);
0917
0918 ptr = ce->lrc_reg_state;
0919 ptr -= LRC_STATE_OFFSET;
0920 ptr += context_wa_bb_offset(ce);
0921
0922 return ptr;
0923 }
0924
0925 void lrc_init_state(struct intel_context *ce,
0926 struct intel_engine_cs *engine,
0927 void *state)
0928 {
0929 bool inhibit = true;
0930
0931 set_redzone(state, engine);
0932
0933 if (engine->default_state) {
0934 shmem_read(engine->default_state, 0,
0935 state, engine->context_size);
0936 __set_bit(CONTEXT_VALID_BIT, &ce->flags);
0937 inhibit = false;
0938 }
0939
0940
0941 memset(state, 0, PAGE_SIZE);
0942
0943
0944 if (ce->wa_bb_page)
0945 memset(state + context_wa_bb_offset(ce), 0, PAGE_SIZE);
0946
0947
0948
0949
0950
0951 __lrc_init_regs(state + LRC_STATE_OFFSET, ce, engine, inhibit);
0952 }
0953
0954 u32 lrc_indirect_bb(const struct intel_context *ce)
0955 {
0956 return i915_ggtt_offset(ce->state) + context_wa_bb_offset(ce);
0957 }
0958
0959 static u32 *setup_predicate_disable_wa(const struct intel_context *ce, u32 *cs)
0960 {
0961
0962 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT | (4 - 2);
0963 *cs++ = lrc_indirect_bb(ce) + DG2_PREDICATE_RESULT_WA;
0964 *cs++ = 0;
0965 *cs++ = 0;
0966
0967
0968 *cs++ = MI_BATCH_BUFFER_END | BIT(15);
0969 *cs++ = MI_SET_PREDICATE | MI_SET_PREDICATE_DISABLE;
0970
0971
0972 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT | (4 - 2);
0973 *cs++ = lrc_indirect_bb(ce) + DG2_PREDICATE_RESULT_WA;
0974 *cs++ = 0;
0975 *cs++ = 1;
0976
0977 *cs++ = MI_BATCH_BUFFER_END;
0978 GEM_BUG_ON(offset_in_page(cs) > DG2_PREDICATE_RESULT_WA);
0979
0980 return cs;
0981 }
0982
0983 static struct i915_vma *
0984 __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine)
0985 {
0986 struct drm_i915_gem_object *obj;
0987 struct i915_vma *vma;
0988 u32 context_size;
0989
0990 context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
0991
0992 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
0993 context_size += I915_GTT_PAGE_SIZE;
0994
0995 if (GRAPHICS_VER(engine->i915) == 12) {
0996 ce->wa_bb_page = context_size / PAGE_SIZE;
0997 context_size += PAGE_SIZE;
0998 }
0999
1000 if (intel_context_is_parent(ce) && intel_engine_uses_guc(engine)) {
1001 ce->parallel.guc.parent_page = context_size / PAGE_SIZE;
1002 context_size += PARENT_SCRATCH_SIZE;
1003 }
1004
1005 obj = i915_gem_object_create_lmem(engine->i915, context_size,
1006 I915_BO_ALLOC_PM_VOLATILE);
1007 if (IS_ERR(obj))
1008 obj = i915_gem_object_create_shmem(engine->i915, context_size);
1009 if (IS_ERR(obj))
1010 return ERR_CAST(obj);
1011
1012 vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
1013 if (IS_ERR(vma)) {
1014 i915_gem_object_put(obj);
1015 return vma;
1016 }
1017
1018 return vma;
1019 }
1020
1021 static struct intel_timeline *
1022 pinned_timeline(struct intel_context *ce, struct intel_engine_cs *engine)
1023 {
1024 struct intel_timeline *tl = fetch_and_zero(&ce->timeline);
1025
1026 return intel_timeline_create_from_engine(engine, page_unmask_bits(tl));
1027 }
1028
1029 int lrc_alloc(struct intel_context *ce, struct intel_engine_cs *engine)
1030 {
1031 struct intel_ring *ring;
1032 struct i915_vma *vma;
1033 int err;
1034
1035 GEM_BUG_ON(ce->state);
1036
1037 vma = __lrc_alloc_state(ce, engine);
1038 if (IS_ERR(vma))
1039 return PTR_ERR(vma);
1040
1041 ring = intel_engine_create_ring(engine, ce->ring_size);
1042 if (IS_ERR(ring)) {
1043 err = PTR_ERR(ring);
1044 goto err_vma;
1045 }
1046
1047 if (!page_mask_bits(ce->timeline)) {
1048 struct intel_timeline *tl;
1049
1050
1051
1052
1053
1054 if (unlikely(ce->timeline))
1055 tl = pinned_timeline(ce, engine);
1056 else
1057 tl = intel_timeline_create(engine->gt);
1058 if (IS_ERR(tl)) {
1059 err = PTR_ERR(tl);
1060 goto err_ring;
1061 }
1062
1063 ce->timeline = tl;
1064 }
1065
1066 ce->ring = ring;
1067 ce->state = vma;
1068
1069 return 0;
1070
1071 err_ring:
1072 intel_ring_put(ring);
1073 err_vma:
1074 i915_vma_put(vma);
1075 return err;
1076 }
1077
1078 void lrc_reset(struct intel_context *ce)
1079 {
1080 GEM_BUG_ON(!intel_context_is_pinned(ce));
1081
1082 intel_ring_reset(ce->ring, ce->ring->emit);
1083
1084
1085 lrc_init_regs(ce, ce->engine, true);
1086 ce->lrc.lrca = lrc_update_regs(ce, ce->engine, ce->ring->tail);
1087 }
1088
1089 int
1090 lrc_pre_pin(struct intel_context *ce,
1091 struct intel_engine_cs *engine,
1092 struct i915_gem_ww_ctx *ww,
1093 void **vaddr)
1094 {
1095 GEM_BUG_ON(!ce->state);
1096 GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
1097
1098 *vaddr = i915_gem_object_pin_map(ce->state->obj,
1099 i915_coherent_map_type(ce->engine->i915,
1100 ce->state->obj,
1101 false) |
1102 I915_MAP_OVERRIDE);
1103
1104 return PTR_ERR_OR_ZERO(*vaddr);
1105 }
1106
1107 int
1108 lrc_pin(struct intel_context *ce,
1109 struct intel_engine_cs *engine,
1110 void *vaddr)
1111 {
1112 ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET;
1113
1114 if (!__test_and_set_bit(CONTEXT_INIT_BIT, &ce->flags))
1115 lrc_init_state(ce, engine, vaddr);
1116
1117 ce->lrc.lrca = lrc_update_regs(ce, engine, ce->ring->tail);
1118 return 0;
1119 }
1120
1121 void lrc_unpin(struct intel_context *ce)
1122 {
1123 if (unlikely(ce->parallel.last_rq)) {
1124 i915_request_put(ce->parallel.last_rq);
1125 ce->parallel.last_rq = NULL;
1126 }
1127 check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET,
1128 ce->engine);
1129 }
1130
1131 void lrc_post_unpin(struct intel_context *ce)
1132 {
1133 i915_gem_object_unpin_map(ce->state->obj);
1134 }
1135
1136 void lrc_fini(struct intel_context *ce)
1137 {
1138 if (!ce->state)
1139 return;
1140
1141 intel_ring_put(fetch_and_zero(&ce->ring));
1142 i915_vma_put(fetch_and_zero(&ce->state));
1143 }
1144
1145 void lrc_destroy(struct kref *kref)
1146 {
1147 struct intel_context *ce = container_of(kref, typeof(*ce), ref);
1148
1149 GEM_BUG_ON(!i915_active_is_idle(&ce->active));
1150 GEM_BUG_ON(intel_context_is_pinned(ce));
1151
1152 lrc_fini(ce);
1153
1154 intel_context_fini(ce);
1155 intel_context_free(ce);
1156 }
1157
1158 static u32 *
1159 gen12_emit_timestamp_wa(const struct intel_context *ce, u32 *cs)
1160 {
1161 *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
1162 MI_SRM_LRM_GLOBAL_GTT |
1163 MI_LRI_LRM_CS_MMIO;
1164 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1165 *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
1166 CTX_TIMESTAMP * sizeof(u32);
1167 *cs++ = 0;
1168
1169 *cs++ = MI_LOAD_REGISTER_REG |
1170 MI_LRR_SOURCE_CS_MMIO |
1171 MI_LRI_LRM_CS_MMIO;
1172 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1173 *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
1174
1175 *cs++ = MI_LOAD_REGISTER_REG |
1176 MI_LRR_SOURCE_CS_MMIO |
1177 MI_LRI_LRM_CS_MMIO;
1178 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1179 *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
1180
1181 return cs;
1182 }
1183
1184 static u32 *
1185 gen12_emit_restore_scratch(const struct intel_context *ce, u32 *cs)
1186 {
1187 GEM_BUG_ON(lrc_ring_gpr0(ce->engine) == -1);
1188
1189 *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
1190 MI_SRM_LRM_GLOBAL_GTT |
1191 MI_LRI_LRM_CS_MMIO;
1192 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1193 *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
1194 (lrc_ring_gpr0(ce->engine) + 1) * sizeof(u32);
1195 *cs++ = 0;
1196
1197 return cs;
1198 }
1199
1200 static u32 *
1201 gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs)
1202 {
1203 GEM_BUG_ON(lrc_ring_cmd_buf_cctl(ce->engine) == -1);
1204
1205 *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
1206 MI_SRM_LRM_GLOBAL_GTT |
1207 MI_LRI_LRM_CS_MMIO;
1208 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1209 *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
1210 (lrc_ring_cmd_buf_cctl(ce->engine) + 1) * sizeof(u32);
1211 *cs++ = 0;
1212
1213 *cs++ = MI_LOAD_REGISTER_REG |
1214 MI_LRR_SOURCE_CS_MMIO |
1215 MI_LRI_LRM_CS_MMIO;
1216 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1217 *cs++ = i915_mmio_reg_offset(RING_CMD_BUF_CCTL(0));
1218
1219 return cs;
1220 }
1221
1222
1223
1224
1225
1226
1227 static u32 *
1228 dg2_emit_rcs_hang_wabb(const struct intel_context *ce, u32 *cs)
1229 {
1230 *cs++ = MI_LOAD_REGISTER_IMM(1);
1231 *cs++ = i915_mmio_reg_offset(GEN12_STATE_ACK_DEBUG);
1232 *cs++ = 0x21;
1233
1234 *cs++ = MI_LOAD_REGISTER_REG;
1235 *cs++ = i915_mmio_reg_offset(RING_NOPID(ce->engine->mmio_base));
1236 *cs++ = i915_mmio_reg_offset(GEN12_CULLBIT1);
1237
1238 *cs++ = MI_LOAD_REGISTER_REG;
1239 *cs++ = i915_mmio_reg_offset(RING_NOPID(ce->engine->mmio_base));
1240 *cs++ = i915_mmio_reg_offset(GEN12_CULLBIT2);
1241
1242 return cs;
1243 }
1244
1245 static u32 *
1246 gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
1247 {
1248 cs = gen12_emit_timestamp_wa(ce, cs);
1249 cs = gen12_emit_cmd_buf_wa(ce, cs);
1250 cs = gen12_emit_restore_scratch(ce, cs);
1251
1252
1253 if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_A0, STEP_B0) ||
1254 IS_DG2_GRAPHICS_STEP(ce->engine->i915, G11, STEP_A0, STEP_B0))
1255 cs = dg2_emit_rcs_hang_wabb(ce, cs);
1256
1257
1258 if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_B0, STEP_C0) ||
1259 IS_DG2_G11(ce->engine->i915))
1260 cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0);
1261
1262
1263 if (!HAS_FLAT_CCS(ce->engine->i915))
1264 cs = gen12_emit_aux_table_inv(cs, GEN12_GFX_CCS_AUX_NV);
1265
1266 return cs;
1267 }
1268
1269 static u32 *
1270 gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
1271 {
1272 cs = gen12_emit_timestamp_wa(ce, cs);
1273 cs = gen12_emit_restore_scratch(ce, cs);
1274
1275
1276 if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_B0, STEP_C0) ||
1277 IS_DG2_G11(ce->engine->i915))
1278 if (ce->engine->class == COMPUTE_CLASS)
1279 cs = gen8_emit_pipe_control(cs,
1280 PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE,
1281 0);
1282
1283
1284 if (!HAS_FLAT_CCS(ce->engine->i915)) {
1285 if (ce->engine->class == VIDEO_DECODE_CLASS)
1286 cs = gen12_emit_aux_table_inv(cs, GEN12_VD0_AUX_NV);
1287 else if (ce->engine->class == VIDEO_ENHANCEMENT_CLASS)
1288 cs = gen12_emit_aux_table_inv(cs, GEN12_VE0_AUX_NV);
1289 }
1290
1291 return cs;
1292 }
1293
1294 static void
1295 setup_indirect_ctx_bb(const struct intel_context *ce,
1296 const struct intel_engine_cs *engine,
1297 u32 *(*emit)(const struct intel_context *, u32 *))
1298 {
1299 u32 * const start = context_indirect_bb(ce);
1300 u32 *cs;
1301
1302 cs = emit(ce, start);
1303 GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs));
1304 while ((unsigned long)cs % CACHELINE_BYTES)
1305 *cs++ = MI_NOOP;
1306
1307 GEM_BUG_ON(cs - start > DG2_PREDICATE_RESULT_BB / sizeof(*start));
1308 setup_predicate_disable_wa(ce, start + DG2_PREDICATE_RESULT_BB / sizeof(*start));
1309
1310 lrc_setup_indirect_ctx(ce->lrc_reg_state, engine,
1311 lrc_indirect_bb(ce),
1312 (cs - start) * sizeof(*cs));
1313 }
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349 static u32 lrc_descriptor(const struct intel_context *ce)
1350 {
1351 u32 desc;
1352
1353 desc = INTEL_LEGACY_32B_CONTEXT;
1354 if (i915_vm_is_4lvl(ce->vm))
1355 desc = INTEL_LEGACY_64B_CONTEXT;
1356 desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
1357
1358 desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
1359 if (GRAPHICS_VER(ce->vm->i915) == 8)
1360 desc |= GEN8_CTX_L3LLC_COHERENT;
1361
1362 return i915_ggtt_offset(ce->state) | desc;
1363 }
1364
1365 u32 lrc_update_regs(const struct intel_context *ce,
1366 const struct intel_engine_cs *engine,
1367 u32 head)
1368 {
1369 struct intel_ring *ring = ce->ring;
1370 u32 *regs = ce->lrc_reg_state;
1371
1372 GEM_BUG_ON(!intel_ring_offset_valid(ring, head));
1373 GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
1374
1375 regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
1376 regs[CTX_RING_HEAD] = head;
1377 regs[CTX_RING_TAIL] = ring->tail;
1378 regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
1379
1380
1381 if (engine->class == RENDER_CLASS) {
1382 regs[CTX_R_PWR_CLK_STATE] =
1383 intel_sseu_make_rpcs(engine->gt, &ce->sseu);
1384
1385 i915_oa_init_reg_state(ce, engine);
1386 }
1387
1388 if (ce->wa_bb_page) {
1389 u32 *(*fn)(const struct intel_context *ce, u32 *cs);
1390
1391 fn = gen12_emit_indirect_ctx_xcs;
1392 if (ce->engine->class == RENDER_CLASS)
1393 fn = gen12_emit_indirect_ctx_rcs;
1394
1395
1396 GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size);
1397 setup_indirect_ctx_bb(ce, engine, fn);
1398 }
1399
1400 return lrc_descriptor(ce) | CTX_DESC_FORCE_RESTORE;
1401 }
1402
1403 void lrc_update_offsets(struct intel_context *ce,
1404 struct intel_engine_cs *engine)
1405 {
1406 set_offsets(ce->lrc_reg_state, reg_offsets(engine), engine, false);
1407 }
1408
1409 void lrc_check_regs(const struct intel_context *ce,
1410 const struct intel_engine_cs *engine,
1411 const char *when)
1412 {
1413 const struct intel_ring *ring = ce->ring;
1414 u32 *regs = ce->lrc_reg_state;
1415 bool valid = true;
1416 int x;
1417
1418 if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) {
1419 pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n",
1420 engine->name,
1421 regs[CTX_RING_START],
1422 i915_ggtt_offset(ring->vma));
1423 regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
1424 valid = false;
1425 }
1426
1427 if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) !=
1428 (RING_CTL_SIZE(ring->size) | RING_VALID)) {
1429 pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n",
1430 engine->name,
1431 regs[CTX_RING_CTL],
1432 (u32)(RING_CTL_SIZE(ring->size) | RING_VALID));
1433 regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
1434 valid = false;
1435 }
1436
1437 x = lrc_ring_mi_mode(engine);
1438 if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) {
1439 pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n",
1440 engine->name, regs[x + 1]);
1441 regs[x + 1] &= ~STOP_RING;
1442 regs[x + 1] |= STOP_RING << 16;
1443 valid = false;
1444 }
1445
1446 WARN_ONCE(!valid, "Invalid lrc state found %s submission\n", when);
1447 }
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465 static u32 *
1466 gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
1467 {
1468
1469 *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
1470 *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1471 *batch++ = intel_gt_scratch_offset(engine->gt,
1472 INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
1473 *batch++ = 0;
1474
1475 *batch++ = MI_LOAD_REGISTER_IMM(1);
1476 *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1477 *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
1478
1479 batch = gen8_emit_pipe_control(batch,
1480 PIPE_CONTROL_CS_STALL |
1481 PIPE_CONTROL_DC_FLUSH_ENABLE,
1482 0);
1483
1484 *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
1485 *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1486 *batch++ = intel_gt_scratch_offset(engine->gt,
1487 INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
1488 *batch++ = 0;
1489
1490 return batch;
1491 }
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508 static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
1509 {
1510
1511 *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1512
1513
1514 if (IS_BROADWELL(engine->i915))
1515 batch = gen8_emit_flush_coherentl3_wa(engine, batch);
1516
1517
1518
1519 batch = gen8_emit_pipe_control(batch,
1520 PIPE_CONTROL_FLUSH_L3 |
1521 PIPE_CONTROL_STORE_DATA_INDEX |
1522 PIPE_CONTROL_CS_STALL |
1523 PIPE_CONTROL_QW_WRITE,
1524 LRC_PPHWSP_SCRATCH_ADDR);
1525
1526 *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1527
1528
1529 while ((unsigned long)batch % CACHELINE_BYTES)
1530 *batch++ = MI_NOOP;
1531
1532
1533
1534
1535
1536
1537
1538 return batch;
1539 }
1540
1541 struct lri {
1542 i915_reg_t reg;
1543 u32 value;
1544 };
1545
1546 static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
1547 {
1548 GEM_BUG_ON(!count || count > 63);
1549
1550 *batch++ = MI_LOAD_REGISTER_IMM(count);
1551 do {
1552 *batch++ = i915_mmio_reg_offset(lri->reg);
1553 *batch++ = lri->value;
1554 } while (lri++, --count);
1555 *batch++ = MI_NOOP;
1556
1557 return batch;
1558 }
1559
1560 static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
1561 {
1562 static const struct lri lri[] = {
1563
1564 {
1565 COMMON_SLICE_CHICKEN2,
1566 __MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,
1567 0),
1568 },
1569
1570
1571 {
1572 FF_SLICE_CHICKEN,
1573 __MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,
1574 FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX),
1575 },
1576
1577
1578 {
1579 _3D_CHICKEN3,
1580 __MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,
1581 _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX),
1582 }
1583 };
1584
1585 *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1586
1587
1588 batch = gen8_emit_flush_coherentl3_wa(engine, batch);
1589
1590
1591 batch = gen8_emit_pipe_control(batch,
1592 PIPE_CONTROL_FLUSH_L3 |
1593 PIPE_CONTROL_STORE_DATA_INDEX |
1594 PIPE_CONTROL_CS_STALL |
1595 PIPE_CONTROL_QW_WRITE,
1596 LRC_PPHWSP_SCRATCH_ADDR);
1597
1598 batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
1599
1600
1601 if (HAS_POOLED_EU(engine->i915)) {
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615 *batch++ = GEN9_MEDIA_POOL_STATE;
1616 *batch++ = GEN9_MEDIA_POOL_ENABLE;
1617 *batch++ = 0x00777000;
1618 *batch++ = 0;
1619 *batch++ = 0;
1620 *batch++ = 0;
1621 }
1622
1623 *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1624
1625
1626 while ((unsigned long)batch % CACHELINE_BYTES)
1627 *batch++ = MI_NOOP;
1628
1629 return batch;
1630 }
1631
1632 #define CTX_WA_BB_SIZE (PAGE_SIZE)
1633
1634 static int lrc_create_wa_ctx(struct intel_engine_cs *engine)
1635 {
1636 struct drm_i915_gem_object *obj;
1637 struct i915_vma *vma;
1638 int err;
1639
1640 obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_SIZE);
1641 if (IS_ERR(obj))
1642 return PTR_ERR(obj);
1643
1644 vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
1645 if (IS_ERR(vma)) {
1646 err = PTR_ERR(vma);
1647 goto err;
1648 }
1649
1650 engine->wa_ctx.vma = vma;
1651 return 0;
1652
1653 err:
1654 i915_gem_object_put(obj);
1655 return err;
1656 }
1657
1658 void lrc_fini_wa_ctx(struct intel_engine_cs *engine)
1659 {
1660 i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
1661 }
1662
1663 typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
1664
1665 void lrc_init_wa_ctx(struct intel_engine_cs *engine)
1666 {
1667 struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
1668 struct i915_wa_ctx_bb *wa_bb[] = {
1669 &wa_ctx->indirect_ctx, &wa_ctx->per_ctx
1670 };
1671 wa_bb_func_t wa_bb_fn[ARRAY_SIZE(wa_bb)];
1672 struct i915_gem_ww_ctx ww;
1673 void *batch, *batch_ptr;
1674 unsigned int i;
1675 int err;
1676
1677 if (!(engine->flags & I915_ENGINE_HAS_RCS_REG_STATE))
1678 return;
1679
1680 switch (GRAPHICS_VER(engine->i915)) {
1681 case 12:
1682 case 11:
1683 return;
1684 case 9:
1685 wa_bb_fn[0] = gen9_init_indirectctx_bb;
1686 wa_bb_fn[1] = NULL;
1687 break;
1688 case 8:
1689 wa_bb_fn[0] = gen8_init_indirectctx_bb;
1690 wa_bb_fn[1] = NULL;
1691 break;
1692 default:
1693 MISSING_CASE(GRAPHICS_VER(engine->i915));
1694 return;
1695 }
1696
1697 err = lrc_create_wa_ctx(engine);
1698 if (err) {
1699
1700
1701
1702
1703
1704 drm_err(&engine->i915->drm,
1705 "Ignoring context switch w/a allocation error:%d\n",
1706 err);
1707 return;
1708 }
1709
1710 if (!engine->wa_ctx.vma)
1711 return;
1712
1713 i915_gem_ww_ctx_init(&ww, true);
1714 retry:
1715 err = i915_gem_object_lock(wa_ctx->vma->obj, &ww);
1716 if (!err)
1717 err = i915_ggtt_pin(wa_ctx->vma, &ww, 0, PIN_HIGH);
1718 if (err)
1719 goto err;
1720
1721 batch = i915_gem_object_pin_map(wa_ctx->vma->obj, I915_MAP_WB);
1722 if (IS_ERR(batch)) {
1723 err = PTR_ERR(batch);
1724 goto err_unpin;
1725 }
1726
1727
1728
1729
1730
1731
1732 batch_ptr = batch;
1733 for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
1734 wa_bb[i]->offset = batch_ptr - batch;
1735 if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
1736 CACHELINE_BYTES))) {
1737 err = -EINVAL;
1738 break;
1739 }
1740 if (wa_bb_fn[i])
1741 batch_ptr = wa_bb_fn[i](engine, batch_ptr);
1742 wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
1743 }
1744 GEM_BUG_ON(batch_ptr - batch > CTX_WA_BB_SIZE);
1745
1746 __i915_gem_object_flush_map(wa_ctx->vma->obj, 0, batch_ptr - batch);
1747 __i915_gem_object_release_map(wa_ctx->vma->obj);
1748
1749
1750 if (!err)
1751 err = i915_inject_probe_error(engine->i915, -ENODEV);
1752
1753 err_unpin:
1754 if (err)
1755 i915_vma_unpin(wa_ctx->vma);
1756 err:
1757 if (err == -EDEADLK) {
1758 err = i915_gem_ww_ctx_backoff(&ww);
1759 if (!err)
1760 goto retry;
1761 }
1762 i915_gem_ww_ctx_fini(&ww);
1763
1764 if (err) {
1765 i915_vma_put(engine->wa_ctx.vma);
1766
1767
1768 memset(wa_ctx, 0, sizeof(*wa_ctx));
1769 }
1770 }
1771
1772 static void st_runtime_underflow(struct intel_context_stats *stats, s32 dt)
1773 {
1774 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1775 stats->runtime.num_underflow++;
1776 stats->runtime.max_underflow =
1777 max_t(u32, stats->runtime.max_underflow, -dt);
1778 #endif
1779 }
1780
1781 static u32 lrc_get_runtime(const struct intel_context *ce)
1782 {
1783
1784
1785
1786
1787
1788
1789 return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
1790 }
1791
1792 void lrc_update_runtime(struct intel_context *ce)
1793 {
1794 struct intel_context_stats *stats = &ce->stats;
1795 u32 old;
1796 s32 dt;
1797
1798 old = stats->runtime.last;
1799 stats->runtime.last = lrc_get_runtime(ce);
1800 dt = stats->runtime.last - old;
1801 if (!dt)
1802 return;
1803
1804 if (unlikely(dt < 0)) {
1805 CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n",
1806 old, stats->runtime.last, dt);
1807 st_runtime_underflow(stats, dt);
1808 return;
1809 }
1810
1811 ewma_runtime_add(&stats->runtime.avg, dt);
1812 stats->runtime.total += dt;
1813 }
1814
1815 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1816 #include "selftest_lrc.c"
1817 #endif