0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068
0069
0070
0071
0072
0073
0074
0075
0076
0077
0078
0079
0080
0081
0082
0083
0084
0085
0086
0087
0088
0089
0090
0091
0092
0093
0094
0095
0096
0097
0098
0099
0100
0101
0102
0103
0104
0105
0106
0107
0108
0109 #include <linux/interrupt.h>
0110 #include <linux/string_helpers.h>
0111
0112 #include "i915_drv.h"
0113 #include "i915_trace.h"
0114 #include "i915_vgpu.h"
0115 #include "gen8_engine_cs.h"
0116 #include "intel_breadcrumbs.h"
0117 #include "intel_context.h"
0118 #include "intel_engine_heartbeat.h"
0119 #include "intel_engine_pm.h"
0120 #include "intel_engine_regs.h"
0121 #include "intel_engine_stats.h"
0122 #include "intel_execlists_submission.h"
0123 #include "intel_gt.h"
0124 #include "intel_gt_irq.h"
0125 #include "intel_gt_pm.h"
0126 #include "intel_gt_regs.h"
0127 #include "intel_gt_requests.h"
0128 #include "intel_lrc.h"
0129 #include "intel_lrc_reg.h"
0130 #include "intel_mocs.h"
0131 #include "intel_reset.h"
0132 #include "intel_ring.h"
0133 #include "intel_workarounds.h"
0134 #include "shmem_utils.h"
0135
0136 #define RING_EXECLIST_QFULL (1 << 0x2)
0137 #define RING_EXECLIST1_VALID (1 << 0x3)
0138 #define RING_EXECLIST0_VALID (1 << 0x4)
0139 #define RING_EXECLIST_ACTIVE_STATUS (3 << 0xE)
0140 #define RING_EXECLIST1_ACTIVE (1 << 0x11)
0141 #define RING_EXECLIST0_ACTIVE (1 << 0x12)
0142
0143 #define GEN8_CTX_STATUS_IDLE_ACTIVE (1 << 0)
0144 #define GEN8_CTX_STATUS_PREEMPTED (1 << 1)
0145 #define GEN8_CTX_STATUS_ELEMENT_SWITCH (1 << 2)
0146 #define GEN8_CTX_STATUS_ACTIVE_IDLE (1 << 3)
0147 #define GEN8_CTX_STATUS_COMPLETE (1 << 4)
0148 #define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15)
0149
0150 #define GEN8_CTX_STATUS_COMPLETED_MASK \
0151 (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
0152
0153 #define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE (0x1)
0154 #define GEN12_CTX_SWITCH_DETAIL(csb_dw) ((csb_dw) & 0xF)
0155 #define GEN12_CSB_SW_CTX_ID_MASK GENMASK(25, 15)
0156 #define GEN12_IDLE_CTX_ID 0x7FF
0157 #define GEN12_CSB_CTX_VALID(csb_dw) \
0158 (FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID)
0159
0160 #define XEHP_CTX_STATUS_SWITCHED_TO_NEW_QUEUE BIT(1)
0161 #define XEHP_CSB_SW_CTX_ID_MASK GENMASK(31, 10)
0162 #define XEHP_IDLE_CTX_ID 0xFFFF
0163 #define XEHP_CSB_CTX_VALID(csb_dw) \
0164 (FIELD_GET(XEHP_CSB_SW_CTX_ID_MASK, csb_dw) != XEHP_IDLE_CTX_ID)
0165
0166
0167 #define EXECLISTS_REQUEST_SIZE 64
0168
0169 struct virtual_engine {
0170 struct intel_engine_cs base;
0171 struct intel_context context;
0172 struct rcu_work rcu;
0173
0174
0175
0176
0177
0178
0179
0180
0181
0182
0183 struct i915_request *request;
0184
0185
0186
0187
0188
0189
0190 struct ve_node {
0191 struct rb_node rb;
0192 int prio;
0193 } nodes[I915_NUM_ENGINES];
0194
0195
0196 unsigned int num_siblings;
0197 struct intel_engine_cs *siblings[];
0198 };
0199
0200 static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
0201 {
0202 GEM_BUG_ON(!intel_engine_is_virtual(engine));
0203 return container_of(engine, struct virtual_engine, base);
0204 }
0205
0206 static struct intel_context *
0207 execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
0208 unsigned long flags);
0209
0210 static struct i915_request *
0211 __active_request(const struct intel_timeline * const tl,
0212 struct i915_request *rq,
0213 int error)
0214 {
0215 struct i915_request *active = rq;
0216
0217 list_for_each_entry_from_reverse(rq, &tl->requests, link) {
0218 if (__i915_request_is_complete(rq))
0219 break;
0220
0221 if (error) {
0222 i915_request_set_error_once(rq, error);
0223 __i915_request_skip(rq);
0224 }
0225 active = rq;
0226 }
0227
0228 return active;
0229 }
0230
0231 static struct i915_request *
0232 active_request(const struct intel_timeline * const tl, struct i915_request *rq)
0233 {
0234 return __active_request(tl, rq, 0);
0235 }
0236
0237 static void ring_set_paused(const struct intel_engine_cs *engine, int state)
0238 {
0239
0240
0241
0242
0243
0244
0245 engine->status_page.addr[I915_GEM_HWS_PREEMPT] = state;
0246 if (state)
0247 wmb();
0248 }
0249
0250 static struct i915_priolist *to_priolist(struct rb_node *rb)
0251 {
0252 return rb_entry(rb, struct i915_priolist, node);
0253 }
0254
0255 static int rq_prio(const struct i915_request *rq)
0256 {
0257 return READ_ONCE(rq->sched.attr.priority);
0258 }
0259
0260 static int effective_prio(const struct i915_request *rq)
0261 {
0262 int prio = rq_prio(rq);
0263
0264
0265
0266
0267
0268
0269
0270
0271
0272 if (i915_request_has_nopreempt(rq))
0273 prio = I915_PRIORITY_UNPREEMPTABLE;
0274
0275 return prio;
0276 }
0277
0278 static int queue_prio(const struct i915_sched_engine *sched_engine)
0279 {
0280 struct rb_node *rb;
0281
0282 rb = rb_first_cached(&sched_engine->queue);
0283 if (!rb)
0284 return INT_MIN;
0285
0286 return to_priolist(rb)->priority;
0287 }
0288
0289 static int virtual_prio(const struct intel_engine_execlists *el)
0290 {
0291 struct rb_node *rb = rb_first_cached(&el->virtual);
0292
0293 return rb ? rb_entry(rb, struct ve_node, rb)->prio : INT_MIN;
0294 }
0295
0296 static bool need_preempt(const struct intel_engine_cs *engine,
0297 const struct i915_request *rq)
0298 {
0299 int last_prio;
0300
0301 if (!intel_engine_has_semaphores(engine))
0302 return false;
0303
0304
0305
0306
0307
0308
0309
0310
0311
0312
0313
0314
0315
0316
0317
0318
0319
0320
0321
0322 last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1);
0323 if (engine->sched_engine->queue_priority_hint <= last_prio)
0324 return false;
0325
0326
0327
0328
0329
0330 if (!list_is_last(&rq->sched.link, &engine->sched_engine->requests) &&
0331 rq_prio(list_next_entry(rq, sched.link)) > last_prio)
0332 return true;
0333
0334
0335
0336
0337
0338
0339
0340
0341
0342
0343
0344 return max(virtual_prio(&engine->execlists),
0345 queue_prio(engine->sched_engine)) > last_prio;
0346 }
0347
0348 __maybe_unused static bool
0349 assert_priority_queue(const struct i915_request *prev,
0350 const struct i915_request *next)
0351 {
0352
0353
0354
0355
0356
0357
0358
0359 if (i915_request_is_active(prev))
0360 return true;
0361
0362 return rq_prio(prev) >= rq_prio(next);
0363 }
0364
0365 static struct i915_request *
0366 __unwind_incomplete_requests(struct intel_engine_cs *engine)
0367 {
0368 struct i915_request *rq, *rn, *active = NULL;
0369 struct list_head *pl;
0370 int prio = I915_PRIORITY_INVALID;
0371
0372 lockdep_assert_held(&engine->sched_engine->lock);
0373
0374 list_for_each_entry_safe_reverse(rq, rn,
0375 &engine->sched_engine->requests,
0376 sched.link) {
0377 if (__i915_request_is_complete(rq)) {
0378 list_del_init(&rq->sched.link);
0379 continue;
0380 }
0381
0382 __i915_request_unsubmit(rq);
0383
0384 GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
0385 if (rq_prio(rq) != prio) {
0386 prio = rq_prio(rq);
0387 pl = i915_sched_lookup_priolist(engine->sched_engine,
0388 prio);
0389 }
0390 GEM_BUG_ON(i915_sched_engine_is_empty(engine->sched_engine));
0391
0392 list_move(&rq->sched.link, pl);
0393 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
0394
0395
0396 if (intel_ring_direction(rq->ring,
0397 rq->tail,
0398 rq->ring->tail + 8) > 0)
0399 rq->context->lrc.desc |= CTX_DESC_FORCE_RESTORE;
0400
0401 active = rq;
0402 }
0403
0404 return active;
0405 }
0406
0407 struct i915_request *
0408 execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
0409 {
0410 struct intel_engine_cs *engine =
0411 container_of(execlists, typeof(*engine), execlists);
0412
0413 return __unwind_incomplete_requests(engine);
0414 }
0415
0416 static void
0417 execlists_context_status_change(struct i915_request *rq, unsigned long status)
0418 {
0419
0420
0421
0422
0423 if (!IS_ENABLED(CONFIG_DRM_I915_GVT))
0424 return;
0425
0426 atomic_notifier_call_chain(&rq->engine->context_status_notifier,
0427 status, rq);
0428 }
0429
0430 static void reset_active(struct i915_request *rq,
0431 struct intel_engine_cs *engine)
0432 {
0433 struct intel_context * const ce = rq->context;
0434 u32 head;
0435
0436
0437
0438
0439
0440
0441
0442
0443
0444
0445
0446
0447
0448
0449
0450
0451 ENGINE_TRACE(engine, "{ reset rq=%llx:%lld }\n",
0452 rq->fence.context, rq->fence.seqno);
0453
0454
0455 if (__i915_request_is_complete(rq))
0456 head = rq->tail;
0457 else
0458 head = __active_request(ce->timeline, rq, -EIO)->head;
0459 head = intel_ring_wrap(ce->ring, head);
0460
0461
0462 lrc_init_regs(ce, engine, true);
0463
0464
0465 ce->lrc.lrca = lrc_update_regs(ce, engine, head);
0466 }
0467
0468 static bool bad_request(const struct i915_request *rq)
0469 {
0470 return rq->fence.error && i915_request_started(rq);
0471 }
0472
0473 static struct intel_engine_cs *
0474 __execlists_schedule_in(struct i915_request *rq)
0475 {
0476 struct intel_engine_cs * const engine = rq->engine;
0477 struct intel_context * const ce = rq->context;
0478
0479 intel_context_get(ce);
0480
0481 if (unlikely(intel_context_is_closed(ce) &&
0482 !intel_engine_has_heartbeat(engine)))
0483 intel_context_set_exiting(ce);
0484
0485 if (unlikely(!intel_context_is_schedulable(ce) || bad_request(rq)))
0486 reset_active(rq, engine);
0487
0488 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
0489 lrc_check_regs(ce, engine, "before");
0490
0491 if (ce->tag) {
0492
0493 GEM_BUG_ON(ce->tag <= BITS_PER_LONG);
0494 ce->lrc.ccid = ce->tag;
0495 } else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) {
0496
0497 unsigned int tag = ffs(READ_ONCE(engine->context_tag));
0498
0499 GEM_BUG_ON(tag == 0 || tag >= BITS_PER_LONG);
0500 clear_bit(tag - 1, &engine->context_tag);
0501 ce->lrc.ccid = tag << (XEHP_SW_CTX_ID_SHIFT - 32);
0502
0503 BUILD_BUG_ON(BITS_PER_LONG > GEN12_MAX_CONTEXT_HW_ID);
0504
0505 } else {
0506
0507 unsigned int tag = __ffs(engine->context_tag);
0508
0509 GEM_BUG_ON(tag >= BITS_PER_LONG);
0510 __clear_bit(tag, &engine->context_tag);
0511 ce->lrc.ccid = (1 + tag) << (GEN11_SW_CTX_ID_SHIFT - 32);
0512
0513 BUILD_BUG_ON(BITS_PER_LONG > GEN12_MAX_CONTEXT_HW_ID);
0514 }
0515
0516 ce->lrc.ccid |= engine->execlists.ccid;
0517
0518 __intel_gt_pm_get(engine->gt);
0519 if (engine->fw_domain && !engine->fw_active++)
0520 intel_uncore_forcewake_get(engine->uncore, engine->fw_domain);
0521 execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
0522 intel_engine_context_in(engine);
0523
0524 CE_TRACE(ce, "schedule-in, ccid:%x\n", ce->lrc.ccid);
0525
0526 return engine;
0527 }
0528
0529 static void execlists_schedule_in(struct i915_request *rq, int idx)
0530 {
0531 struct intel_context * const ce = rq->context;
0532 struct intel_engine_cs *old;
0533
0534 GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine));
0535 trace_i915_request_in(rq, idx);
0536
0537 old = ce->inflight;
0538 if (!old)
0539 old = __execlists_schedule_in(rq);
0540 WRITE_ONCE(ce->inflight, ptr_inc(old));
0541
0542 GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
0543 }
0544
0545 static void
0546 resubmit_virtual_request(struct i915_request *rq, struct virtual_engine *ve)
0547 {
0548 struct intel_engine_cs *engine = rq->engine;
0549
0550 spin_lock_irq(&engine->sched_engine->lock);
0551
0552 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
0553 WRITE_ONCE(rq->engine, &ve->base);
0554 ve->base.submit_request(rq);
0555
0556 spin_unlock_irq(&engine->sched_engine->lock);
0557 }
0558
0559 static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
0560 {
0561 struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
0562 struct intel_engine_cs *engine = rq->engine;
0563
0564
0565
0566
0567
0568
0569
0570
0571 if (!list_empty(&ce->signals))
0572 intel_context_remove_breadcrumbs(ce, engine->breadcrumbs);
0573
0574
0575
0576
0577
0578
0579
0580 if (i915_request_in_priority_queue(rq) &&
0581 rq->execution_mask != engine->mask)
0582 resubmit_virtual_request(rq, ve);
0583
0584 if (READ_ONCE(ve->request))
0585 tasklet_hi_schedule(&ve->base.sched_engine->tasklet);
0586 }
0587
0588 static void __execlists_schedule_out(struct i915_request * const rq,
0589 struct intel_context * const ce)
0590 {
0591 struct intel_engine_cs * const engine = rq->engine;
0592 unsigned int ccid;
0593
0594
0595
0596
0597
0598
0599
0600 CE_TRACE(ce, "schedule-out, ccid:%x\n", ce->lrc.ccid);
0601 GEM_BUG_ON(ce->inflight != engine);
0602
0603 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
0604 lrc_check_regs(ce, engine, "after");
0605
0606
0607
0608
0609
0610 if (intel_timeline_is_last(ce->timeline, rq) &&
0611 __i915_request_is_complete(rq))
0612 intel_engine_add_retire(engine, ce->timeline);
0613
0614 ccid = ce->lrc.ccid;
0615 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) {
0616 ccid >>= XEHP_SW_CTX_ID_SHIFT - 32;
0617 ccid &= XEHP_MAX_CONTEXT_HW_ID;
0618 } else {
0619 ccid >>= GEN11_SW_CTX_ID_SHIFT - 32;
0620 ccid &= GEN12_MAX_CONTEXT_HW_ID;
0621 }
0622
0623 if (ccid < BITS_PER_LONG) {
0624 GEM_BUG_ON(ccid == 0);
0625 GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag));
0626 __set_bit(ccid - 1, &engine->context_tag);
0627 }
0628 intel_engine_context_out(engine);
0629 execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
0630 if (engine->fw_domain && !--engine->fw_active)
0631 intel_uncore_forcewake_put(engine->uncore, engine->fw_domain);
0632 intel_gt_pm_put_async(engine->gt);
0633
0634
0635
0636
0637
0638
0639
0640
0641
0642
0643 if (ce->engine != engine)
0644 kick_siblings(rq, ce);
0645
0646 WRITE_ONCE(ce->inflight, NULL);
0647 intel_context_put(ce);
0648 }
0649
0650 static inline void execlists_schedule_out(struct i915_request *rq)
0651 {
0652 struct intel_context * const ce = rq->context;
0653
0654 trace_i915_request_out(rq);
0655
0656 GEM_BUG_ON(!ce->inflight);
0657 ce->inflight = ptr_dec(ce->inflight);
0658 if (!__intel_context_inflight_count(ce->inflight))
0659 __execlists_schedule_out(rq, ce);
0660
0661 i915_request_put(rq);
0662 }
0663
0664 static u32 map_i915_prio_to_lrc_desc_prio(int prio)
0665 {
0666 if (prio > I915_PRIORITY_NORMAL)
0667 return GEN12_CTX_PRIORITY_HIGH;
0668 else if (prio < I915_PRIORITY_NORMAL)
0669 return GEN12_CTX_PRIORITY_LOW;
0670 else
0671 return GEN12_CTX_PRIORITY_NORMAL;
0672 }
0673
0674 static u64 execlists_update_context(struct i915_request *rq)
0675 {
0676 struct intel_context *ce = rq->context;
0677 u64 desc;
0678 u32 tail, prev;
0679
0680 desc = ce->lrc.desc;
0681 if (rq->engine->flags & I915_ENGINE_HAS_EU_PRIORITY)
0682 desc |= map_i915_prio_to_lrc_desc_prio(rq_prio(rq));
0683
0684
0685
0686
0687
0688
0689
0690
0691
0692
0693
0694
0695
0696
0697
0698
0699
0700
0701 GEM_BUG_ON(ce->lrc_reg_state[CTX_RING_TAIL] != rq->ring->tail);
0702 prev = rq->ring->tail;
0703 tail = intel_ring_set_tail(rq->ring, rq->tail);
0704 if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0))
0705 desc |= CTX_DESC_FORCE_RESTORE;
0706 ce->lrc_reg_state[CTX_RING_TAIL] = tail;
0707 rq->tail = rq->wa_tail;
0708
0709
0710
0711
0712
0713
0714
0715
0716
0717
0718
0719 wmb();
0720
0721 ce->lrc.desc &= ~CTX_DESC_FORCE_RESTORE;
0722 return desc;
0723 }
0724
0725 static void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port)
0726 {
0727 if (execlists->ctrl_reg) {
0728 writel(lower_32_bits(desc), execlists->submit_reg + port * 2);
0729 writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1);
0730 } else {
0731 writel(upper_32_bits(desc), execlists->submit_reg);
0732 writel(lower_32_bits(desc), execlists->submit_reg);
0733 }
0734 }
0735
0736 static __maybe_unused char *
0737 dump_port(char *buf, int buflen, const char *prefix, struct i915_request *rq)
0738 {
0739 if (!rq)
0740 return "";
0741
0742 snprintf(buf, buflen, "%sccid:%x %llx:%lld%s prio %d",
0743 prefix,
0744 rq->context->lrc.ccid,
0745 rq->fence.context, rq->fence.seqno,
0746 __i915_request_is_complete(rq) ? "!" :
0747 __i915_request_has_started(rq) ? "*" :
0748 "",
0749 rq_prio(rq));
0750
0751 return buf;
0752 }
0753
0754 static __maybe_unused noinline void
0755 trace_ports(const struct intel_engine_execlists *execlists,
0756 const char *msg,
0757 struct i915_request * const *ports)
0758 {
0759 const struct intel_engine_cs *engine =
0760 container_of(execlists, typeof(*engine), execlists);
0761 char __maybe_unused p0[40], p1[40];
0762
0763 if (!ports[0])
0764 return;
0765
0766 ENGINE_TRACE(engine, "%s { %s%s }\n", msg,
0767 dump_port(p0, sizeof(p0), "", ports[0]),
0768 dump_port(p1, sizeof(p1), ", ", ports[1]));
0769 }
0770
0771 static bool
0772 reset_in_progress(const struct intel_engine_cs *engine)
0773 {
0774 return unlikely(!__tasklet_is_enabled(&engine->sched_engine->tasklet));
0775 }
0776
0777 static __maybe_unused noinline bool
0778 assert_pending_valid(const struct intel_engine_execlists *execlists,
0779 const char *msg)
0780 {
0781 struct intel_engine_cs *engine =
0782 container_of(execlists, typeof(*engine), execlists);
0783 struct i915_request * const *port, *rq, *prev = NULL;
0784 struct intel_context *ce = NULL;
0785 u32 ccid = -1;
0786
0787 trace_ports(execlists, msg, execlists->pending);
0788
0789
0790 if (reset_in_progress(engine))
0791 return true;
0792
0793 if (!execlists->pending[0]) {
0794 GEM_TRACE_ERR("%s: Nothing pending for promotion!\n",
0795 engine->name);
0796 return false;
0797 }
0798
0799 if (execlists->pending[execlists_num_ports(execlists)]) {
0800 GEM_TRACE_ERR("%s: Excess pending[%d] for promotion!\n",
0801 engine->name, execlists_num_ports(execlists));
0802 return false;
0803 }
0804
0805 for (port = execlists->pending; (rq = *port); port++) {
0806 unsigned long flags;
0807 bool ok = true;
0808
0809 GEM_BUG_ON(!kref_read(&rq->fence.refcount));
0810 GEM_BUG_ON(!i915_request_is_active(rq));
0811
0812 if (ce == rq->context) {
0813 GEM_TRACE_ERR("%s: Dup context:%llx in pending[%zd]\n",
0814 engine->name,
0815 ce->timeline->fence_context,
0816 port - execlists->pending);
0817 return false;
0818 }
0819 ce = rq->context;
0820
0821 if (ccid == ce->lrc.ccid) {
0822 GEM_TRACE_ERR("%s: Dup ccid:%x context:%llx in pending[%zd]\n",
0823 engine->name,
0824 ccid, ce->timeline->fence_context,
0825 port - execlists->pending);
0826 return false;
0827 }
0828 ccid = ce->lrc.ccid;
0829
0830
0831
0832
0833
0834
0835
0836
0837
0838
0839 if (prev && i915_request_has_sentinel(prev) &&
0840 !READ_ONCE(prev->fence.error)) {
0841 GEM_TRACE_ERR("%s: context:%llx after sentinel in pending[%zd]\n",
0842 engine->name,
0843 ce->timeline->fence_context,
0844 port - execlists->pending);
0845 return false;
0846 }
0847 prev = rq;
0848
0849
0850
0851
0852
0853
0854 if (rq->execution_mask != engine->mask &&
0855 port != execlists->pending) {
0856 GEM_TRACE_ERR("%s: virtual engine:%llx not in prime position[%zd]\n",
0857 engine->name,
0858 ce->timeline->fence_context,
0859 port - execlists->pending);
0860 return false;
0861 }
0862
0863
0864 if (!spin_trylock_irqsave(&rq->lock, flags))
0865 continue;
0866
0867 if (__i915_request_is_complete(rq))
0868 goto unlock;
0869
0870 if (i915_active_is_idle(&ce->active) &&
0871 !intel_context_is_barrier(ce)) {
0872 GEM_TRACE_ERR("%s: Inactive context:%llx in pending[%zd]\n",
0873 engine->name,
0874 ce->timeline->fence_context,
0875 port - execlists->pending);
0876 ok = false;
0877 goto unlock;
0878 }
0879
0880 if (!i915_vma_is_pinned(ce->state)) {
0881 GEM_TRACE_ERR("%s: Unpinned context:%llx in pending[%zd]\n",
0882 engine->name,
0883 ce->timeline->fence_context,
0884 port - execlists->pending);
0885 ok = false;
0886 goto unlock;
0887 }
0888
0889 if (!i915_vma_is_pinned(ce->ring->vma)) {
0890 GEM_TRACE_ERR("%s: Unpinned ring:%llx in pending[%zd]\n",
0891 engine->name,
0892 ce->timeline->fence_context,
0893 port - execlists->pending);
0894 ok = false;
0895 goto unlock;
0896 }
0897
0898 unlock:
0899 spin_unlock_irqrestore(&rq->lock, flags);
0900 if (!ok)
0901 return false;
0902 }
0903
0904 return ce;
0905 }
0906
0907 static void execlists_submit_ports(struct intel_engine_cs *engine)
0908 {
0909 struct intel_engine_execlists *execlists = &engine->execlists;
0910 unsigned int n;
0911
0912 GEM_BUG_ON(!assert_pending_valid(execlists, "submit"));
0913
0914
0915
0916
0917
0918
0919
0920
0921
0922 GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
0923
0924
0925
0926
0927
0928
0929
0930 for (n = execlists_num_ports(execlists); n--; ) {
0931 struct i915_request *rq = execlists->pending[n];
0932
0933 write_desc(execlists,
0934 rq ? execlists_update_context(rq) : 0,
0935 n);
0936 }
0937
0938
0939 if (execlists->ctrl_reg)
0940 writel(EL_CTRL_LOAD, execlists->ctrl_reg);
0941 }
0942
0943 static bool ctx_single_port_submission(const struct intel_context *ce)
0944 {
0945 return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
0946 intel_context_force_single_submission(ce));
0947 }
0948
0949 static bool can_merge_ctx(const struct intel_context *prev,
0950 const struct intel_context *next)
0951 {
0952 if (prev != next)
0953 return false;
0954
0955 if (ctx_single_port_submission(prev))
0956 return false;
0957
0958 return true;
0959 }
0960
0961 static unsigned long i915_request_flags(const struct i915_request *rq)
0962 {
0963 return READ_ONCE(rq->fence.flags);
0964 }
0965
0966 static bool can_merge_rq(const struct i915_request *prev,
0967 const struct i915_request *next)
0968 {
0969 GEM_BUG_ON(prev == next);
0970 GEM_BUG_ON(!assert_priority_queue(prev, next));
0971
0972
0973
0974
0975
0976
0977
0978
0979
0980 if (__i915_request_is_complete(next))
0981 return true;
0982
0983 if (unlikely((i915_request_flags(prev) | i915_request_flags(next)) &
0984 (BIT(I915_FENCE_FLAG_NOPREEMPT) |
0985 BIT(I915_FENCE_FLAG_SENTINEL))))
0986 return false;
0987
0988 if (!can_merge_ctx(prev->context, next->context))
0989 return false;
0990
0991 GEM_BUG_ON(i915_seqno_passed(prev->fence.seqno, next->fence.seqno));
0992 return true;
0993 }
0994
0995 static bool virtual_matches(const struct virtual_engine *ve,
0996 const struct i915_request *rq,
0997 const struct intel_engine_cs *engine)
0998 {
0999 const struct intel_engine_cs *inflight;
1000
1001 if (!rq)
1002 return false;
1003
1004 if (!(rq->execution_mask & engine->mask))
1005 return false;
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016 inflight = intel_context_inflight(&ve->context);
1017 if (inflight && inflight != engine)
1018 return false;
1019
1020 return true;
1021 }
1022
1023 static struct virtual_engine *
1024 first_virtual_engine(struct intel_engine_cs *engine)
1025 {
1026 struct intel_engine_execlists *el = &engine->execlists;
1027 struct rb_node *rb = rb_first_cached(&el->virtual);
1028
1029 while (rb) {
1030 struct virtual_engine *ve =
1031 rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
1032 struct i915_request *rq = READ_ONCE(ve->request);
1033
1034
1035 if (!rq || !virtual_matches(ve, rq, engine)) {
1036 rb_erase_cached(rb, &el->virtual);
1037 RB_CLEAR_NODE(rb);
1038 rb = rb_first_cached(&el->virtual);
1039 continue;
1040 }
1041
1042 return ve;
1043 }
1044
1045 return NULL;
1046 }
1047
1048 static void virtual_xfer_context(struct virtual_engine *ve,
1049 struct intel_engine_cs *engine)
1050 {
1051 unsigned int n;
1052
1053 if (likely(engine == ve->siblings[0]))
1054 return;
1055
1056 GEM_BUG_ON(READ_ONCE(ve->context.inflight));
1057 if (!intel_engine_has_relative_mmio(engine))
1058 lrc_update_offsets(&ve->context, engine);
1059
1060
1061
1062
1063
1064
1065
1066 for (n = 1; n < ve->num_siblings; n++) {
1067 if (ve->siblings[n] == engine) {
1068 swap(ve->siblings[n], ve->siblings[0]);
1069 break;
1070 }
1071 }
1072 }
1073
1074 static void defer_request(struct i915_request *rq, struct list_head * const pl)
1075 {
1076 LIST_HEAD(list);
1077
1078
1079
1080
1081
1082
1083
1084
1085 do {
1086 struct i915_dependency *p;
1087
1088 GEM_BUG_ON(i915_request_is_active(rq));
1089 list_move_tail(&rq->sched.link, pl);
1090
1091 for_each_waiter(p, rq) {
1092 struct i915_request *w =
1093 container_of(p->waiter, typeof(*w), sched);
1094
1095 if (p->flags & I915_DEPENDENCY_WEAK)
1096 continue;
1097
1098
1099 if (w->engine != rq->engine)
1100 continue;
1101
1102
1103 GEM_BUG_ON(i915_request_has_initial_breadcrumb(w) &&
1104 __i915_request_has_started(w) &&
1105 !__i915_request_is_complete(rq));
1106
1107 if (!i915_request_is_ready(w))
1108 continue;
1109
1110 if (rq_prio(w) < rq_prio(rq))
1111 continue;
1112
1113 GEM_BUG_ON(rq_prio(w) > rq_prio(rq));
1114 GEM_BUG_ON(i915_request_is_active(w));
1115 list_move_tail(&w->sched.link, &list);
1116 }
1117
1118 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
1119 } while (rq);
1120 }
1121
1122 static void defer_active(struct intel_engine_cs *engine)
1123 {
1124 struct i915_request *rq;
1125
1126 rq = __unwind_incomplete_requests(engine);
1127 if (!rq)
1128 return;
1129
1130 defer_request(rq, i915_sched_lookup_priolist(engine->sched_engine,
1131 rq_prio(rq)));
1132 }
1133
1134 static bool
1135 timeslice_yield(const struct intel_engine_execlists *el,
1136 const struct i915_request *rq)
1137 {
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150 return rq->context->lrc.ccid == READ_ONCE(el->yield);
1151 }
1152
1153 static bool needs_timeslice(const struct intel_engine_cs *engine,
1154 const struct i915_request *rq)
1155 {
1156 if (!intel_engine_has_timeslices(engine))
1157 return false;
1158
1159
1160 if (!rq || __i915_request_is_complete(rq))
1161 return false;
1162
1163
1164 if (READ_ONCE(engine->execlists.pending[0]))
1165 return false;
1166
1167
1168 if (!list_is_last_rcu(&rq->sched.link,
1169 &engine->sched_engine->requests)) {
1170 ENGINE_TRACE(engine, "timeslice required for second inflight context\n");
1171 return true;
1172 }
1173
1174
1175 if (!i915_sched_engine_is_empty(engine->sched_engine)) {
1176 ENGINE_TRACE(engine, "timeslice required for queue\n");
1177 return true;
1178 }
1179
1180 if (!RB_EMPTY_ROOT(&engine->execlists.virtual.rb_root)) {
1181 ENGINE_TRACE(engine, "timeslice required for virtual\n");
1182 return true;
1183 }
1184
1185 return false;
1186 }
1187
1188 static bool
1189 timeslice_expired(struct intel_engine_cs *engine, const struct i915_request *rq)
1190 {
1191 const struct intel_engine_execlists *el = &engine->execlists;
1192
1193 if (i915_request_has_nopreempt(rq) && __i915_request_has_started(rq))
1194 return false;
1195
1196 if (!needs_timeslice(engine, rq))
1197 return false;
1198
1199 return timer_expired(&el->timer) || timeslice_yield(el, rq);
1200 }
1201
1202 static unsigned long timeslice(const struct intel_engine_cs *engine)
1203 {
1204 return READ_ONCE(engine->props.timeslice_duration_ms);
1205 }
1206
1207 static void start_timeslice(struct intel_engine_cs *engine)
1208 {
1209 struct intel_engine_execlists *el = &engine->execlists;
1210 unsigned long duration;
1211
1212
1213 duration = 0;
1214 if (needs_timeslice(engine, *el->active)) {
1215
1216 if (timer_active(&el->timer)) {
1217
1218
1219
1220
1221
1222 if (!timer_pending(&el->timer))
1223 tasklet_hi_schedule(&engine->sched_engine->tasklet);
1224 return;
1225 }
1226
1227 duration = timeslice(engine);
1228 }
1229
1230 set_timer_ms(&el->timer, duration);
1231 }
1232
1233 static void record_preemption(struct intel_engine_execlists *execlists)
1234 {
1235 (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
1236 }
1237
1238 static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
1239 const struct i915_request *rq)
1240 {
1241 if (!rq)
1242 return 0;
1243
1244
1245 engine->execlists.preempt_target = rq;
1246
1247
1248 if (unlikely(intel_context_is_banned(rq->context) || bad_request(rq)))
1249 return INTEL_CONTEXT_BANNED_PREEMPT_TIMEOUT_MS;
1250
1251 return READ_ONCE(engine->props.preempt_timeout_ms);
1252 }
1253
1254 static void set_preempt_timeout(struct intel_engine_cs *engine,
1255 const struct i915_request *rq)
1256 {
1257 if (!intel_engine_has_preempt_reset(engine))
1258 return;
1259
1260 set_timer_ms(&engine->execlists.preempt,
1261 active_preempt_timeout(engine, rq));
1262 }
1263
1264 static bool completed(const struct i915_request *rq)
1265 {
1266 if (i915_request_has_sentinel(rq))
1267 return false;
1268
1269 return __i915_request_is_complete(rq);
1270 }
1271
1272 static void execlists_dequeue(struct intel_engine_cs *engine)
1273 {
1274 struct intel_engine_execlists * const execlists = &engine->execlists;
1275 struct i915_sched_engine * const sched_engine = engine->sched_engine;
1276 struct i915_request **port = execlists->pending;
1277 struct i915_request ** const last_port = port + execlists->port_mask;
1278 struct i915_request *last, * const *active;
1279 struct virtual_engine *ve;
1280 struct rb_node *rb;
1281 bool submit = false;
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305 spin_lock(&sched_engine->lock);
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316 active = execlists->active;
1317 while ((last = *active) && completed(last))
1318 active++;
1319
1320 if (last) {
1321 if (need_preempt(engine, last)) {
1322 ENGINE_TRACE(engine,
1323 "preempting last=%llx:%lld, prio=%d, hint=%d\n",
1324 last->fence.context,
1325 last->fence.seqno,
1326 last->sched.attr.priority,
1327 sched_engine->queue_priority_hint);
1328 record_preemption(execlists);
1329
1330
1331
1332
1333
1334
1335 ring_set_paused(engine, 1);
1336
1337
1338
1339
1340
1341
1342
1343
1344 __unwind_incomplete_requests(engine);
1345
1346 last = NULL;
1347 } else if (timeslice_expired(engine, last)) {
1348 ENGINE_TRACE(engine,
1349 "expired:%s last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n",
1350 str_yes_no(timer_expired(&execlists->timer)),
1351 last->fence.context, last->fence.seqno,
1352 rq_prio(last),
1353 sched_engine->queue_priority_hint,
1354 str_yes_no(timeslice_yield(execlists, last)));
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372 cancel_timer(&execlists->timer);
1373 ring_set_paused(engine, 1);
1374 defer_active(engine);
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389 last = NULL;
1390 } else {
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400 if (active[1]) {
1401
1402
1403
1404
1405 spin_unlock(&sched_engine->lock);
1406 return;
1407 }
1408 }
1409 }
1410
1411
1412 while ((ve = first_virtual_engine(engine))) {
1413 struct i915_request *rq;
1414
1415 spin_lock(&ve->base.sched_engine->lock);
1416
1417 rq = ve->request;
1418 if (unlikely(!virtual_matches(ve, rq, engine)))
1419 goto unlock;
1420
1421 GEM_BUG_ON(rq->engine != &ve->base);
1422 GEM_BUG_ON(rq->context != &ve->context);
1423
1424 if (unlikely(rq_prio(rq) < queue_prio(sched_engine))) {
1425 spin_unlock(&ve->base.sched_engine->lock);
1426 break;
1427 }
1428
1429 if (last && !can_merge_rq(last, rq)) {
1430 spin_unlock(&ve->base.sched_engine->lock);
1431 spin_unlock(&engine->sched_engine->lock);
1432 return;
1433 }
1434
1435 ENGINE_TRACE(engine,
1436 "virtual rq=%llx:%lld%s, new engine? %s\n",
1437 rq->fence.context,
1438 rq->fence.seqno,
1439 __i915_request_is_complete(rq) ? "!" :
1440 __i915_request_has_started(rq) ? "*" :
1441 "",
1442 str_yes_no(engine != ve->siblings[0]));
1443
1444 WRITE_ONCE(ve->request, NULL);
1445 WRITE_ONCE(ve->base.sched_engine->queue_priority_hint, INT_MIN);
1446
1447 rb = &ve->nodes[engine->id].rb;
1448 rb_erase_cached(rb, &execlists->virtual);
1449 RB_CLEAR_NODE(rb);
1450
1451 GEM_BUG_ON(!(rq->execution_mask & engine->mask));
1452 WRITE_ONCE(rq->engine, engine);
1453
1454 if (__i915_request_submit(rq)) {
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468 virtual_xfer_context(ve, engine);
1469 GEM_BUG_ON(ve->siblings[0] != engine);
1470
1471 submit = true;
1472 last = rq;
1473 }
1474
1475 i915_request_put(rq);
1476 unlock:
1477 spin_unlock(&ve->base.sched_engine->lock);
1478
1479
1480
1481
1482
1483
1484
1485
1486 if (submit)
1487 break;
1488 }
1489
1490 while ((rb = rb_first_cached(&sched_engine->queue))) {
1491 struct i915_priolist *p = to_priolist(rb);
1492 struct i915_request *rq, *rn;
1493
1494 priolist_for_each_request_consume(rq, rn, p) {
1495 bool merge = true;
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508 if (last && !can_merge_rq(last, rq)) {
1509
1510
1511
1512
1513
1514 if (port == last_port)
1515 goto done;
1516
1517
1518
1519
1520
1521
1522 if (last->context == rq->context)
1523 goto done;
1524
1525 if (i915_request_has_sentinel(last))
1526 goto done;
1527
1528
1529
1530
1531
1532
1533
1534 if (rq->execution_mask != engine->mask)
1535 goto done;
1536
1537
1538
1539
1540
1541
1542
1543
1544 if (ctx_single_port_submission(last->context) ||
1545 ctx_single_port_submission(rq->context))
1546 goto done;
1547
1548 merge = false;
1549 }
1550
1551 if (__i915_request_submit(rq)) {
1552 if (!merge) {
1553 *port++ = i915_request_get(last);
1554 last = NULL;
1555 }
1556
1557 GEM_BUG_ON(last &&
1558 !can_merge_ctx(last->context,
1559 rq->context));
1560 GEM_BUG_ON(last &&
1561 i915_seqno_passed(last->fence.seqno,
1562 rq->fence.seqno));
1563
1564 submit = true;
1565 last = rq;
1566 }
1567 }
1568
1569 rb_erase_cached(&p->node, &sched_engine->queue);
1570 i915_priolist_free(p);
1571 }
1572 done:
1573 *port++ = i915_request_get(last);
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591 sched_engine->queue_priority_hint = queue_prio(sched_engine);
1592 i915_sched_engine_reset_on_empty(sched_engine);
1593 spin_unlock(&sched_engine->lock);
1594
1595
1596
1597
1598
1599
1600 if (submit &&
1601 memcmp(active,
1602 execlists->pending,
1603 (port - execlists->pending) * sizeof(*port))) {
1604 *port = NULL;
1605 while (port-- != execlists->pending)
1606 execlists_schedule_in(*port, port - execlists->pending);
1607
1608 WRITE_ONCE(execlists->yield, -1);
1609 set_preempt_timeout(engine, *active);
1610 execlists_submit_ports(engine);
1611 } else {
1612 ring_set_paused(engine, 0);
1613 while (port-- != execlists->pending)
1614 i915_request_put(*port);
1615 *execlists->pending = NULL;
1616 }
1617 }
1618
1619 static void execlists_dequeue_irq(struct intel_engine_cs *engine)
1620 {
1621 local_irq_disable();
1622 execlists_dequeue(engine);
1623 local_irq_enable();
1624 }
1625
1626 static void clear_ports(struct i915_request **ports, int count)
1627 {
1628 memset_p((void **)ports, NULL, count);
1629 }
1630
1631 static void
1632 copy_ports(struct i915_request **dst, struct i915_request **src, int count)
1633 {
1634
1635 while (count--)
1636 WRITE_ONCE(*dst++, *src++);
1637 }
1638
1639 static struct i915_request **
1640 cancel_port_requests(struct intel_engine_execlists * const execlists,
1641 struct i915_request **inactive)
1642 {
1643 struct i915_request * const *port;
1644
1645 for (port = execlists->pending; *port; port++)
1646 *inactive++ = *port;
1647 clear_ports(execlists->pending, ARRAY_SIZE(execlists->pending));
1648
1649
1650 for (port = xchg(&execlists->active, execlists->pending); *port; port++)
1651 *inactive++ = *port;
1652 clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight));
1653
1654 smp_wmb();
1655 WRITE_ONCE(execlists->active, execlists->inflight);
1656
1657
1658 GEM_BUG_ON(execlists->pending[0]);
1659 cancel_timer(&execlists->timer);
1660 cancel_timer(&execlists->preempt);
1661
1662 return inactive;
1663 }
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704 static inline bool
1705 __gen12_csb_parse(bool ctx_to_valid, bool ctx_away_valid, bool new_queue,
1706 u8 switch_detail)
1707 {
1708
1709
1710
1711
1712
1713
1714
1715 if (!ctx_away_valid || new_queue) {
1716 GEM_BUG_ON(!ctx_to_valid);
1717 return true;
1718 }
1719
1720
1721
1722
1723
1724
1725 GEM_BUG_ON(switch_detail);
1726 return false;
1727 }
1728
1729 static bool xehp_csb_parse(const u64 csb)
1730 {
1731 return __gen12_csb_parse(XEHP_CSB_CTX_VALID(lower_32_bits(csb)),
1732 XEHP_CSB_CTX_VALID(upper_32_bits(csb)),
1733 upper_32_bits(csb) & XEHP_CTX_STATUS_SWITCHED_TO_NEW_QUEUE,
1734 GEN12_CTX_SWITCH_DETAIL(lower_32_bits(csb)));
1735 }
1736
1737 static bool gen12_csb_parse(const u64 csb)
1738 {
1739 return __gen12_csb_parse(GEN12_CSB_CTX_VALID(lower_32_bits(csb)),
1740 GEN12_CSB_CTX_VALID(upper_32_bits(csb)),
1741 lower_32_bits(csb) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE,
1742 GEN12_CTX_SWITCH_DETAIL(upper_32_bits(csb)));
1743 }
1744
1745 static bool gen8_csb_parse(const u64 csb)
1746 {
1747 return csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
1748 }
1749
1750 static noinline u64
1751 wa_csb_read(const struct intel_engine_cs *engine, u64 * const csb)
1752 {
1753 u64 entry;
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764 preempt_disable();
1765 if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 10)) {
1766 int idx = csb - engine->execlists.csb_status;
1767 int status;
1768
1769 status = GEN8_EXECLISTS_STATUS_BUF;
1770 if (idx >= 6) {
1771 status = GEN11_EXECLISTS_STATUS_BUF2;
1772 idx -= 6;
1773 }
1774 status += sizeof(u64) * idx;
1775
1776 entry = intel_uncore_read64(engine->uncore,
1777 _MMIO(engine->mmio_base + status));
1778 }
1779 preempt_enable();
1780
1781 return entry;
1782 }
1783
1784 static u64 csb_read(const struct intel_engine_cs *engine, u64 * const csb)
1785 {
1786 u64 entry = READ_ONCE(*csb);
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799 if (unlikely(entry == -1))
1800 entry = wa_csb_read(engine, csb);
1801
1802
1803 WRITE_ONCE(*csb, -1);
1804
1805
1806 return entry;
1807 }
1808
1809 static void new_timeslice(struct intel_engine_execlists *el)
1810 {
1811
1812 cancel_timer(&el->timer);
1813 }
1814
1815 static struct i915_request **
1816 process_csb(struct intel_engine_cs *engine, struct i915_request **inactive)
1817 {
1818 struct intel_engine_execlists * const execlists = &engine->execlists;
1819 u64 * const buf = execlists->csb_status;
1820 const u8 num_entries = execlists->csb_size;
1821 struct i915_request **prev;
1822 u8 head, tail;
1823
1824
1825
1826
1827
1828
1829 GEM_BUG_ON(!tasklet_is_locked(&engine->sched_engine->tasklet) &&
1830 !reset_in_progress(engine));
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842 head = execlists->csb_head;
1843 tail = READ_ONCE(*execlists->csb_write);
1844 if (unlikely(head == tail))
1845 return inactive;
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863 execlists->csb_head = tail;
1864 ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail);
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874 rmb();
1875
1876
1877 prev = inactive;
1878 *prev = NULL;
1879
1880 do {
1881 bool promote;
1882 u64 csb;
1883
1884 if (++head == num_entries)
1885 head = 0;
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905 csb = csb_read(engine, buf + head);
1906 ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
1907 head, upper_32_bits(csb), lower_32_bits(csb));
1908
1909 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
1910 promote = xehp_csb_parse(csb);
1911 else if (GRAPHICS_VER(engine->i915) >= 12)
1912 promote = gen12_csb_parse(csb);
1913 else
1914 promote = gen8_csb_parse(csb);
1915 if (promote) {
1916 struct i915_request * const *old = execlists->active;
1917
1918 if (GEM_WARN_ON(!*execlists->pending)) {
1919 execlists->error_interrupt |= ERROR_CSB;
1920 break;
1921 }
1922
1923 ring_set_paused(engine, 0);
1924
1925
1926 WRITE_ONCE(execlists->active, execlists->pending);
1927 smp_wmb();
1928
1929
1930 trace_ports(execlists, "preempted", old);
1931 while (*old)
1932 *inactive++ = *old++;
1933
1934
1935 GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
1936 copy_ports(execlists->inflight,
1937 execlists->pending,
1938 execlists_num_ports(execlists));
1939 smp_wmb();
1940 WRITE_ONCE(execlists->active, execlists->inflight);
1941
1942
1943 ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
1944
1945 WRITE_ONCE(execlists->pending[0], NULL);
1946 } else {
1947 if (GEM_WARN_ON(!*execlists->active)) {
1948 execlists->error_interrupt |= ERROR_CSB;
1949 break;
1950 }
1951
1952
1953 trace_ports(execlists, "completed", execlists->active);
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965 if (GEM_SHOW_DEBUG() &&
1966 !__i915_request_is_complete(*execlists->active)) {
1967 struct i915_request *rq = *execlists->active;
1968 const u32 *regs __maybe_unused =
1969 rq->context->lrc_reg_state;
1970
1971 ENGINE_TRACE(engine,
1972 "context completed before request!\n");
1973 ENGINE_TRACE(engine,
1974 "ring:{start:0x%08x, head:%04x, tail:%04x, ctl:%08x, mode:%08x}\n",
1975 ENGINE_READ(engine, RING_START),
1976 ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR,
1977 ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR,
1978 ENGINE_READ(engine, RING_CTL),
1979 ENGINE_READ(engine, RING_MI_MODE));
1980 ENGINE_TRACE(engine,
1981 "rq:{start:%08x, head:%04x, tail:%04x, seqno:%llx:%d, hwsp:%d}, ",
1982 i915_ggtt_offset(rq->ring->vma),
1983 rq->head, rq->tail,
1984 rq->fence.context,
1985 lower_32_bits(rq->fence.seqno),
1986 hwsp_seqno(rq));
1987 ENGINE_TRACE(engine,
1988 "ctx:{start:%08x, head:%04x, tail:%04x}, ",
1989 regs[CTX_RING_START],
1990 regs[CTX_RING_HEAD],
1991 regs[CTX_RING_TAIL]);
1992 }
1993
1994 *inactive++ = *execlists->active++;
1995
1996 GEM_BUG_ON(execlists->active - execlists->inflight >
1997 execlists_num_ports(execlists));
1998 }
1999 } while (head != tail);
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012 drm_clflush_virt_range(&buf[0], num_entries * sizeof(buf[0]));
2013
2014
2015
2016
2017
2018
2019 if (*prev != *execlists->active) {
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030 if (*prev)
2031 lrc_runtime_stop((*prev)->context);
2032 if (*execlists->active)
2033 lrc_runtime_start((*execlists->active)->context);
2034 new_timeslice(execlists);
2035 }
2036
2037 return inactive;
2038 }
2039
2040 static void post_process_csb(struct i915_request **port,
2041 struct i915_request **last)
2042 {
2043 while (port != last)
2044 execlists_schedule_out(*port++);
2045 }
2046
2047 static void __execlists_hold(struct i915_request *rq)
2048 {
2049 LIST_HEAD(list);
2050
2051 do {
2052 struct i915_dependency *p;
2053
2054 if (i915_request_is_active(rq))
2055 __i915_request_unsubmit(rq);
2056
2057 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2058 list_move_tail(&rq->sched.link,
2059 &rq->engine->sched_engine->hold);
2060 i915_request_set_hold(rq);
2061 RQ_TRACE(rq, "on hold\n");
2062
2063 for_each_waiter(p, rq) {
2064 struct i915_request *w =
2065 container_of(p->waiter, typeof(*w), sched);
2066
2067 if (p->flags & I915_DEPENDENCY_WEAK)
2068 continue;
2069
2070
2071 if (w->engine != rq->engine)
2072 continue;
2073
2074 if (!i915_request_is_ready(w))
2075 continue;
2076
2077 if (__i915_request_is_complete(w))
2078 continue;
2079
2080 if (i915_request_on_hold(w))
2081 continue;
2082
2083 list_move_tail(&w->sched.link, &list);
2084 }
2085
2086 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
2087 } while (rq);
2088 }
2089
2090 static bool execlists_hold(struct intel_engine_cs *engine,
2091 struct i915_request *rq)
2092 {
2093 if (i915_request_on_hold(rq))
2094 return false;
2095
2096 spin_lock_irq(&engine->sched_engine->lock);
2097
2098 if (__i915_request_is_complete(rq)) {
2099 rq = NULL;
2100 goto unlock;
2101 }
2102
2103
2104
2105
2106
2107
2108
2109 GEM_BUG_ON(i915_request_on_hold(rq));
2110 GEM_BUG_ON(rq->engine != engine);
2111 __execlists_hold(rq);
2112 GEM_BUG_ON(list_empty(&engine->sched_engine->hold));
2113
2114 unlock:
2115 spin_unlock_irq(&engine->sched_engine->lock);
2116 return rq;
2117 }
2118
2119 static bool hold_request(const struct i915_request *rq)
2120 {
2121 struct i915_dependency *p;
2122 bool result = false;
2123
2124
2125
2126
2127
2128 rcu_read_lock();
2129 for_each_signaler(p, rq) {
2130 const struct i915_request *s =
2131 container_of(p->signaler, typeof(*s), sched);
2132
2133 if (s->engine != rq->engine)
2134 continue;
2135
2136 result = i915_request_on_hold(s);
2137 if (result)
2138 break;
2139 }
2140 rcu_read_unlock();
2141
2142 return result;
2143 }
2144
2145 static void __execlists_unhold(struct i915_request *rq)
2146 {
2147 LIST_HEAD(list);
2148
2149 do {
2150 struct i915_dependency *p;
2151
2152 RQ_TRACE(rq, "hold release\n");
2153
2154 GEM_BUG_ON(!i915_request_on_hold(rq));
2155 GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
2156
2157 i915_request_clear_hold(rq);
2158 list_move_tail(&rq->sched.link,
2159 i915_sched_lookup_priolist(rq->engine->sched_engine,
2160 rq_prio(rq)));
2161 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2162
2163
2164 for_each_waiter(p, rq) {
2165 struct i915_request *w =
2166 container_of(p->waiter, typeof(*w), sched);
2167
2168 if (p->flags & I915_DEPENDENCY_WEAK)
2169 continue;
2170
2171 if (w->engine != rq->engine)
2172 continue;
2173
2174 if (!i915_request_on_hold(w))
2175 continue;
2176
2177
2178 if (hold_request(w))
2179 continue;
2180
2181 list_move_tail(&w->sched.link, &list);
2182 }
2183
2184 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
2185 } while (rq);
2186 }
2187
2188 static void execlists_unhold(struct intel_engine_cs *engine,
2189 struct i915_request *rq)
2190 {
2191 spin_lock_irq(&engine->sched_engine->lock);
2192
2193
2194
2195
2196
2197 __execlists_unhold(rq);
2198
2199 if (rq_prio(rq) > engine->sched_engine->queue_priority_hint) {
2200 engine->sched_engine->queue_priority_hint = rq_prio(rq);
2201 tasklet_hi_schedule(&engine->sched_engine->tasklet);
2202 }
2203
2204 spin_unlock_irq(&engine->sched_engine->lock);
2205 }
2206
2207 struct execlists_capture {
2208 struct work_struct work;
2209 struct i915_request *rq;
2210 struct i915_gpu_coredump *error;
2211 };
2212
2213 static void execlists_capture_work(struct work_struct *work)
2214 {
2215 struct execlists_capture *cap = container_of(work, typeof(*cap), work);
2216 const gfp_t gfp = __GFP_KSWAPD_RECLAIM | __GFP_RETRY_MAYFAIL |
2217 __GFP_NOWARN;
2218 struct intel_engine_cs *engine = cap->rq->engine;
2219 struct intel_gt_coredump *gt = cap->error->gt;
2220 struct intel_engine_capture_vma *vma;
2221
2222
2223 vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp);
2224 if (vma) {
2225 struct i915_vma_compress *compress =
2226 i915_vma_capture_prepare(gt);
2227
2228 intel_engine_coredump_add_vma(gt->engine, vma, compress);
2229 i915_vma_capture_finish(gt, compress);
2230 }
2231
2232 gt->simulated = gt->engine->simulated;
2233 cap->error->simulated = gt->simulated;
2234
2235
2236 i915_error_state_store(cap->error);
2237 i915_gpu_coredump_put(cap->error);
2238
2239
2240 execlists_unhold(engine, cap->rq);
2241 i915_request_put(cap->rq);
2242
2243 kfree(cap);
2244 }
2245
2246 static struct execlists_capture *capture_regs(struct intel_engine_cs *engine)
2247 {
2248 const gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
2249 struct execlists_capture *cap;
2250
2251 cap = kmalloc(sizeof(*cap), gfp);
2252 if (!cap)
2253 return NULL;
2254
2255 cap->error = i915_gpu_coredump_alloc(engine->i915, gfp);
2256 if (!cap->error)
2257 goto err_cap;
2258
2259 cap->error->gt = intel_gt_coredump_alloc(engine->gt, gfp, CORE_DUMP_FLAG_NONE);
2260 if (!cap->error->gt)
2261 goto err_gpu;
2262
2263 cap->error->gt->engine = intel_engine_coredump_alloc(engine, gfp, CORE_DUMP_FLAG_NONE);
2264 if (!cap->error->gt->engine)
2265 goto err_gt;
2266
2267 cap->error->gt->engine->hung = true;
2268
2269 return cap;
2270
2271 err_gt:
2272 kfree(cap->error->gt);
2273 err_gpu:
2274 kfree(cap->error);
2275 err_cap:
2276 kfree(cap);
2277 return NULL;
2278 }
2279
2280 static struct i915_request *
2281 active_context(struct intel_engine_cs *engine, u32 ccid)
2282 {
2283 const struct intel_engine_execlists * const el = &engine->execlists;
2284 struct i915_request * const *port, *rq;
2285
2286
2287
2288
2289
2290
2291
2292 for (port = el->active; (rq = *port); port++) {
2293 if (rq->context->lrc.ccid == ccid) {
2294 ENGINE_TRACE(engine,
2295 "ccid:%x found at active:%zd\n",
2296 ccid, port - el->active);
2297 return rq;
2298 }
2299 }
2300
2301 for (port = el->pending; (rq = *port); port++) {
2302 if (rq->context->lrc.ccid == ccid) {
2303 ENGINE_TRACE(engine,
2304 "ccid:%x found at pending:%zd\n",
2305 ccid, port - el->pending);
2306 return rq;
2307 }
2308 }
2309
2310 ENGINE_TRACE(engine, "ccid:%x not found\n", ccid);
2311 return NULL;
2312 }
2313
2314 static u32 active_ccid(struct intel_engine_cs *engine)
2315 {
2316 return ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI);
2317 }
2318
2319 static void execlists_capture(struct intel_engine_cs *engine)
2320 {
2321 struct execlists_capture *cap;
2322
2323 if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR))
2324 return;
2325
2326
2327
2328
2329
2330
2331 cap = capture_regs(engine);
2332 if (!cap)
2333 return;
2334
2335 spin_lock_irq(&engine->sched_engine->lock);
2336 cap->rq = active_context(engine, active_ccid(engine));
2337 if (cap->rq) {
2338 cap->rq = active_request(cap->rq->context->timeline, cap->rq);
2339 cap->rq = i915_request_get_rcu(cap->rq);
2340 }
2341 spin_unlock_irq(&engine->sched_engine->lock);
2342 if (!cap->rq)
2343 goto err_free;
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365 if (!execlists_hold(engine, cap->rq))
2366 goto err_rq;
2367
2368 INIT_WORK(&cap->work, execlists_capture_work);
2369 schedule_work(&cap->work);
2370 return;
2371
2372 err_rq:
2373 i915_request_put(cap->rq);
2374 err_free:
2375 i915_gpu_coredump_put(cap->error);
2376 kfree(cap);
2377 }
2378
2379 static void execlists_reset(struct intel_engine_cs *engine, const char *msg)
2380 {
2381 const unsigned int bit = I915_RESET_ENGINE + engine->id;
2382 unsigned long *lock = &engine->gt->reset.flags;
2383
2384 if (!intel_has_reset_engine(engine->gt))
2385 return;
2386
2387 if (test_and_set_bit(bit, lock))
2388 return;
2389
2390 ENGINE_TRACE(engine, "reset for %s\n", msg);
2391
2392
2393 tasklet_disable_nosync(&engine->sched_engine->tasklet);
2394
2395 ring_set_paused(engine, 1);
2396 execlists_capture(engine);
2397 intel_engine_reset(engine, msg);
2398
2399 tasklet_enable(&engine->sched_engine->tasklet);
2400 clear_and_wake_up_bit(bit, lock);
2401 }
2402
2403 static bool preempt_timeout(const struct intel_engine_cs *const engine)
2404 {
2405 const struct timer_list *t = &engine->execlists.preempt;
2406
2407 if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
2408 return false;
2409
2410 if (!timer_expired(t))
2411 return false;
2412
2413 return engine->execlists.pending[0];
2414 }
2415
2416
2417
2418
2419
2420 static void execlists_submission_tasklet(struct tasklet_struct *t)
2421 {
2422 struct i915_sched_engine *sched_engine =
2423 from_tasklet(sched_engine, t, tasklet);
2424 struct intel_engine_cs * const engine = sched_engine->private_data;
2425 struct i915_request *post[2 * EXECLIST_MAX_PORTS];
2426 struct i915_request **inactive;
2427
2428 rcu_read_lock();
2429 inactive = process_csb(engine, post);
2430 GEM_BUG_ON(inactive - post > ARRAY_SIZE(post));
2431
2432 if (unlikely(preempt_timeout(engine))) {
2433 const struct i915_request *rq = *engine->execlists.active;
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445 cancel_timer(&engine->execlists.preempt);
2446 if (rq == engine->execlists.preempt_target)
2447 engine->execlists.error_interrupt |= ERROR_PREEMPT;
2448 else
2449 set_timer_ms(&engine->execlists.preempt,
2450 active_preempt_timeout(engine, rq));
2451 }
2452
2453 if (unlikely(READ_ONCE(engine->execlists.error_interrupt))) {
2454 const char *msg;
2455
2456
2457 if (engine->execlists.error_interrupt & GENMASK(15, 0))
2458 msg = "CS error";
2459 else if (engine->execlists.error_interrupt & ERROR_CSB)
2460 msg = "invalid CSB event";
2461 else if (engine->execlists.error_interrupt & ERROR_PREEMPT)
2462 msg = "preemption time out";
2463 else
2464 msg = "internal error";
2465
2466 engine->execlists.error_interrupt = 0;
2467 execlists_reset(engine, msg);
2468 }
2469
2470 if (!engine->execlists.pending[0]) {
2471 execlists_dequeue_irq(engine);
2472 start_timeslice(engine);
2473 }
2474
2475 post_process_csb(post, inactive);
2476 rcu_read_unlock();
2477 }
2478
2479 static void execlists_irq_handler(struct intel_engine_cs *engine, u16 iir)
2480 {
2481 bool tasklet = false;
2482
2483 if (unlikely(iir & GT_CS_MASTER_ERROR_INTERRUPT)) {
2484 u32 eir;
2485
2486
2487 eir = ENGINE_READ(engine, RING_EIR) & GENMASK(15, 0);
2488 ENGINE_TRACE(engine, "CS error: %x\n", eir);
2489
2490
2491 if (likely(eir)) {
2492 ENGINE_WRITE(engine, RING_EMR, ~0u);
2493 ENGINE_WRITE(engine, RING_EIR, eir);
2494 WRITE_ONCE(engine->execlists.error_interrupt, eir);
2495 tasklet = true;
2496 }
2497 }
2498
2499 if (iir & GT_WAIT_SEMAPHORE_INTERRUPT) {
2500 WRITE_ONCE(engine->execlists.yield,
2501 ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI));
2502 ENGINE_TRACE(engine, "semaphore yield: %08x\n",
2503 engine->execlists.yield);
2504 if (del_timer(&engine->execlists.timer))
2505 tasklet = true;
2506 }
2507
2508 if (iir & GT_CONTEXT_SWITCH_INTERRUPT)
2509 tasklet = true;
2510
2511 if (iir & GT_RENDER_USER_INTERRUPT)
2512 intel_engine_signal_breadcrumbs(engine);
2513
2514 if (tasklet)
2515 tasklet_hi_schedule(&engine->sched_engine->tasklet);
2516 }
2517
2518 static void __execlists_kick(struct intel_engine_execlists *execlists)
2519 {
2520 struct intel_engine_cs *engine =
2521 container_of(execlists, typeof(*engine), execlists);
2522
2523
2524 tasklet_hi_schedule(&engine->sched_engine->tasklet);
2525 }
2526
2527 #define execlists_kick(t, member) \
2528 __execlists_kick(container_of(t, struct intel_engine_execlists, member))
2529
2530 static void execlists_timeslice(struct timer_list *timer)
2531 {
2532 execlists_kick(timer, timer);
2533 }
2534
2535 static void execlists_preempt(struct timer_list *timer)
2536 {
2537 execlists_kick(timer, preempt);
2538 }
2539
2540 static void queue_request(struct intel_engine_cs *engine,
2541 struct i915_request *rq)
2542 {
2543 GEM_BUG_ON(!list_empty(&rq->sched.link));
2544 list_add_tail(&rq->sched.link,
2545 i915_sched_lookup_priolist(engine->sched_engine,
2546 rq_prio(rq)));
2547 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2548 }
2549
2550 static bool submit_queue(struct intel_engine_cs *engine,
2551 const struct i915_request *rq)
2552 {
2553 struct i915_sched_engine *sched_engine = engine->sched_engine;
2554
2555 if (rq_prio(rq) <= sched_engine->queue_priority_hint)
2556 return false;
2557
2558 sched_engine->queue_priority_hint = rq_prio(rq);
2559 return true;
2560 }
2561
2562 static bool ancestor_on_hold(const struct intel_engine_cs *engine,
2563 const struct i915_request *rq)
2564 {
2565 GEM_BUG_ON(i915_request_on_hold(rq));
2566 return !list_empty(&engine->sched_engine->hold) && hold_request(rq);
2567 }
2568
2569 static void execlists_submit_request(struct i915_request *request)
2570 {
2571 struct intel_engine_cs *engine = request->engine;
2572 unsigned long flags;
2573
2574
2575 spin_lock_irqsave(&engine->sched_engine->lock, flags);
2576
2577 if (unlikely(ancestor_on_hold(engine, request))) {
2578 RQ_TRACE(request, "ancestor on hold\n");
2579 list_add_tail(&request->sched.link,
2580 &engine->sched_engine->hold);
2581 i915_request_set_hold(request);
2582 } else {
2583 queue_request(engine, request);
2584
2585 GEM_BUG_ON(i915_sched_engine_is_empty(engine->sched_engine));
2586 GEM_BUG_ON(list_empty(&request->sched.link));
2587
2588 if (submit_queue(engine, request))
2589 __execlists_kick(&engine->execlists);
2590 }
2591
2592 spin_unlock_irqrestore(&engine->sched_engine->lock, flags);
2593 }
2594
2595 static int
2596 __execlists_context_pre_pin(struct intel_context *ce,
2597 struct intel_engine_cs *engine,
2598 struct i915_gem_ww_ctx *ww, void **vaddr)
2599 {
2600 int err;
2601
2602 err = lrc_pre_pin(ce, engine, ww, vaddr);
2603 if (err)
2604 return err;
2605
2606 if (!__test_and_set_bit(CONTEXT_INIT_BIT, &ce->flags)) {
2607 lrc_init_state(ce, engine, *vaddr);
2608
2609 __i915_gem_object_flush_map(ce->state->obj, 0, engine->context_size);
2610 }
2611
2612 return 0;
2613 }
2614
2615 static int execlists_context_pre_pin(struct intel_context *ce,
2616 struct i915_gem_ww_ctx *ww,
2617 void **vaddr)
2618 {
2619 return __execlists_context_pre_pin(ce, ce->engine, ww, vaddr);
2620 }
2621
2622 static int execlists_context_pin(struct intel_context *ce, void *vaddr)
2623 {
2624 return lrc_pin(ce, ce->engine, vaddr);
2625 }
2626
2627 static int execlists_context_alloc(struct intel_context *ce)
2628 {
2629 return lrc_alloc(ce, ce->engine);
2630 }
2631
2632 static void execlists_context_cancel_request(struct intel_context *ce,
2633 struct i915_request *rq)
2634 {
2635 struct intel_engine_cs *engine = NULL;
2636
2637 i915_request_active_engine(rq, &engine);
2638
2639 if (engine && intel_engine_pulse(engine))
2640 intel_gt_handle_error(engine->gt, engine->mask, 0,
2641 "request cancellation by %s",
2642 current->comm);
2643 }
2644
2645 static struct intel_context *
2646 execlists_create_parallel(struct intel_engine_cs **engines,
2647 unsigned int num_siblings,
2648 unsigned int width)
2649 {
2650 struct intel_context *parent = NULL, *ce, *err;
2651 int i;
2652
2653 GEM_BUG_ON(num_siblings != 1);
2654
2655 for (i = 0; i < width; ++i) {
2656 ce = intel_context_create(engines[i]);
2657 if (IS_ERR(ce)) {
2658 err = ce;
2659 goto unwind;
2660 }
2661
2662 if (i == 0)
2663 parent = ce;
2664 else
2665 intel_context_bind_parent_child(parent, ce);
2666 }
2667
2668 parent->parallel.fence_context = dma_fence_context_alloc(1);
2669
2670 intel_context_set_nopreempt(parent);
2671 for_each_child(parent, ce)
2672 intel_context_set_nopreempt(ce);
2673
2674 return parent;
2675
2676 unwind:
2677 if (parent)
2678 intel_context_put(parent);
2679 return err;
2680 }
2681
2682 static const struct intel_context_ops execlists_context_ops = {
2683 .flags = COPS_HAS_INFLIGHT | COPS_RUNTIME_CYCLES,
2684
2685 .alloc = execlists_context_alloc,
2686
2687 .cancel_request = execlists_context_cancel_request,
2688
2689 .pre_pin = execlists_context_pre_pin,
2690 .pin = execlists_context_pin,
2691 .unpin = lrc_unpin,
2692 .post_unpin = lrc_post_unpin,
2693
2694 .enter = intel_context_enter_engine,
2695 .exit = intel_context_exit_engine,
2696
2697 .reset = lrc_reset,
2698 .destroy = lrc_destroy,
2699
2700 .create_parallel = execlists_create_parallel,
2701 .create_virtual = execlists_create_virtual,
2702 };
2703
2704 static int emit_pdps(struct i915_request *rq)
2705 {
2706 const struct intel_engine_cs * const engine = rq->engine;
2707 struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(rq->context->vm);
2708 int err, i;
2709 u32 *cs;
2710
2711 GEM_BUG_ON(intel_vgpu_active(rq->engine->i915));
2712
2713
2714
2715
2716
2717
2718
2719
2720 cs = intel_ring_begin(rq, 2);
2721 if (IS_ERR(cs))
2722 return PTR_ERR(cs);
2723
2724 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
2725 *cs++ = MI_NOOP;
2726 intel_ring_advance(rq, cs);
2727
2728
2729 err = engine->emit_flush(rq, EMIT_FLUSH);
2730 if (err)
2731 return err;
2732
2733
2734 err = engine->emit_flush(rq, EMIT_INVALIDATE);
2735 if (err)
2736 return err;
2737
2738 cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2);
2739 if (IS_ERR(cs))
2740 return PTR_ERR(cs);
2741
2742
2743 *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED;
2744 for (i = GEN8_3LVL_PDPES; i--; ) {
2745 const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
2746 u32 base = engine->mmio_base;
2747
2748 *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i));
2749 *cs++ = upper_32_bits(pd_daddr);
2750 *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i));
2751 *cs++ = lower_32_bits(pd_daddr);
2752 }
2753 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2754 intel_ring_advance(rq, cs);
2755
2756 intel_ring_advance(rq, cs);
2757
2758 return 0;
2759 }
2760
2761 static int execlists_request_alloc(struct i915_request *request)
2762 {
2763 int ret;
2764
2765 GEM_BUG_ON(!intel_context_is_pinned(request->context));
2766
2767
2768
2769
2770
2771
2772 request->reserved_space += EXECLISTS_REQUEST_SIZE;
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782 if (!i915_vm_is_4lvl(request->context->vm)) {
2783 ret = emit_pdps(request);
2784 if (ret)
2785 return ret;
2786 }
2787
2788
2789 ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
2790 if (ret)
2791 return ret;
2792
2793 request->reserved_space -= EXECLISTS_REQUEST_SIZE;
2794 return 0;
2795 }
2796
2797 static void reset_csb_pointers(struct intel_engine_cs *engine)
2798 {
2799 struct intel_engine_execlists * const execlists = &engine->execlists;
2800 const unsigned int reset_value = execlists->csb_size - 1;
2801
2802 ring_set_paused(engine, 0);
2803
2804
2805
2806
2807
2808 ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
2809 0xffff << 16 | reset_value << 8 | reset_value);
2810 ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821 execlists->csb_head = reset_value;
2822 WRITE_ONCE(*execlists->csb_write, reset_value);
2823 wmb();
2824
2825
2826 memset(execlists->csb_status, -1, (reset_value + 1) * sizeof(u64));
2827 drm_clflush_virt_range(execlists->csb_status,
2828 execlists->csb_size *
2829 sizeof(execlists->csb_status));
2830
2831
2832 ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
2833 0xffff << 16 | reset_value << 8 | reset_value);
2834 ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
2835
2836 GEM_BUG_ON(READ_ONCE(*execlists->csb_write) != reset_value);
2837 }
2838
2839 static void sanitize_hwsp(struct intel_engine_cs *engine)
2840 {
2841 struct intel_timeline *tl;
2842
2843 list_for_each_entry(tl, &engine->status_page.timelines, engine_link)
2844 intel_timeline_reset_seqno(tl);
2845 }
2846
2847 static void execlists_sanitize(struct intel_engine_cs *engine)
2848 {
2849 GEM_BUG_ON(execlists_active(&engine->execlists));
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
2861 memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE);
2862
2863 reset_csb_pointers(engine);
2864
2865
2866
2867
2868
2869
2870 sanitize_hwsp(engine);
2871
2872
2873 drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE);
2874
2875 intel_engine_reset_pinned_contexts(engine);
2876 }
2877
2878 static void enable_error_interrupt(struct intel_engine_cs *engine)
2879 {
2880 u32 status;
2881
2882 engine->execlists.error_interrupt = 0;
2883 ENGINE_WRITE(engine, RING_EMR, ~0u);
2884 ENGINE_WRITE(engine, RING_EIR, ~0u);
2885
2886 status = ENGINE_READ(engine, RING_ESR);
2887 if (unlikely(status)) {
2888 drm_err(&engine->i915->drm,
2889 "engine '%s' resumed still in error: %08x\n",
2890 engine->name, status);
2891 __intel_gt_reset(engine->gt, engine->mask);
2892 }
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918 ENGINE_WRITE(engine, RING_EMR, ~I915_ERROR_INSTRUCTION);
2919 }
2920
2921 static void enable_execlists(struct intel_engine_cs *engine)
2922 {
2923 u32 mode;
2924
2925 assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
2926
2927 intel_engine_set_hwsp_writemask(engine, ~0u);
2928
2929 if (GRAPHICS_VER(engine->i915) >= 11)
2930 mode = _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE);
2931 else
2932 mode = _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE);
2933 ENGINE_WRITE_FW(engine, RING_MODE_GEN7, mode);
2934
2935 ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
2936
2937 ENGINE_WRITE_FW(engine,
2938 RING_HWS_PGA,
2939 i915_ggtt_offset(engine->status_page.vma));
2940 ENGINE_POSTING_READ(engine, RING_HWS_PGA);
2941
2942 enable_error_interrupt(engine);
2943 }
2944
2945 static int execlists_resume(struct intel_engine_cs *engine)
2946 {
2947 intel_mocs_init_engine(engine);
2948 intel_breadcrumbs_reset(engine->breadcrumbs);
2949
2950 enable_execlists(engine);
2951
2952 if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE)
2953 xehp_enable_ccs_engines(engine);
2954
2955 return 0;
2956 }
2957
2958 static void execlists_reset_prepare(struct intel_engine_cs *engine)
2959 {
2960 ENGINE_TRACE(engine, "depth<-%d\n",
2961 atomic_read(&engine->sched_engine->tasklet.count));
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972 __tasklet_disable_sync_once(&engine->sched_engine->tasklet);
2973 GEM_BUG_ON(!reset_in_progress(engine));
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987 ring_set_paused(engine, 1);
2988 intel_engine_stop_cs(engine);
2989
2990
2991
2992
2993
2994 if (IS_GRAPHICS_VER(engine->i915, 11, 12))
2995 intel_engine_wait_for_pending_mi_fw(engine);
2996
2997 engine->execlists.reset_ccid = active_ccid(engine);
2998 }
2999
3000 static struct i915_request **
3001 reset_csb(struct intel_engine_cs *engine, struct i915_request **inactive)
3002 {
3003 struct intel_engine_execlists * const execlists = &engine->execlists;
3004
3005 drm_clflush_virt_range(execlists->csb_write,
3006 sizeof(execlists->csb_write[0]));
3007
3008 inactive = process_csb(engine, inactive);
3009
3010
3011 reset_csb_pointers(engine);
3012
3013 return inactive;
3014 }
3015
3016 static void
3017 execlists_reset_active(struct intel_engine_cs *engine, bool stalled)
3018 {
3019 struct intel_context *ce;
3020 struct i915_request *rq;
3021 u32 head;
3022
3023
3024
3025
3026
3027
3028 rq = active_context(engine, engine->execlists.reset_ccid);
3029 if (!rq)
3030 return;
3031
3032 ce = rq->context;
3033 GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
3034
3035 if (__i915_request_is_complete(rq)) {
3036
3037 head = intel_ring_wrap(ce->ring, rq->tail);
3038 goto out_replay;
3039 }
3040
3041
3042 GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
3043
3044
3045 GEM_BUG_ON(i915_active_is_idle(&ce->active));
3046
3047 rq = active_request(ce->timeline, rq);
3048 head = intel_ring_wrap(ce->ring, rq->head);
3049 GEM_BUG_ON(head == ce->ring->tail);
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063 if (!__i915_request_has_started(rq))
3064 goto out_replay;
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077 __i915_request_reset(rq, stalled);
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087 out_replay:
3088 ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n",
3089 head, ce->ring->tail);
3090 lrc_reset_regs(ce, engine);
3091 ce->lrc.lrca = lrc_update_regs(ce, engine, head);
3092 }
3093
3094 static void execlists_reset_csb(struct intel_engine_cs *engine, bool stalled)
3095 {
3096 struct intel_engine_execlists * const execlists = &engine->execlists;
3097 struct i915_request *post[2 * EXECLIST_MAX_PORTS];
3098 struct i915_request **inactive;
3099
3100 rcu_read_lock();
3101 inactive = reset_csb(engine, post);
3102
3103 execlists_reset_active(engine, true);
3104
3105 inactive = cancel_port_requests(execlists, inactive);
3106 post_process_csb(post, inactive);
3107 rcu_read_unlock();
3108 }
3109
3110 static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled)
3111 {
3112 unsigned long flags;
3113
3114 ENGINE_TRACE(engine, "\n");
3115
3116
3117 execlists_reset_csb(engine, stalled);
3118
3119
3120 rcu_read_lock();
3121 spin_lock_irqsave(&engine->sched_engine->lock, flags);
3122 __unwind_incomplete_requests(engine);
3123 spin_unlock_irqrestore(&engine->sched_engine->lock, flags);
3124 rcu_read_unlock();
3125 }
3126
3127 static void nop_submission_tasklet(struct tasklet_struct *t)
3128 {
3129 struct i915_sched_engine *sched_engine =
3130 from_tasklet(sched_engine, t, tasklet);
3131 struct intel_engine_cs * const engine = sched_engine->private_data;
3132
3133
3134 WRITE_ONCE(engine->sched_engine->queue_priority_hint, INT_MIN);
3135 }
3136
3137 static void execlists_reset_cancel(struct intel_engine_cs *engine)
3138 {
3139 struct intel_engine_execlists * const execlists = &engine->execlists;
3140 struct i915_sched_engine * const sched_engine = engine->sched_engine;
3141 struct i915_request *rq, *rn;
3142 struct rb_node *rb;
3143 unsigned long flags;
3144
3145 ENGINE_TRACE(engine, "\n");
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161 execlists_reset_csb(engine, true);
3162
3163 rcu_read_lock();
3164 spin_lock_irqsave(&engine->sched_engine->lock, flags);
3165
3166
3167 list_for_each_entry(rq, &engine->sched_engine->requests, sched.link)
3168 i915_request_put(i915_request_mark_eio(rq));
3169 intel_engine_signal_breadcrumbs(engine);
3170
3171
3172 while ((rb = rb_first_cached(&sched_engine->queue))) {
3173 struct i915_priolist *p = to_priolist(rb);
3174
3175 priolist_for_each_request_consume(rq, rn, p) {
3176 if (i915_request_mark_eio(rq)) {
3177 __i915_request_submit(rq);
3178 i915_request_put(rq);
3179 }
3180 }
3181
3182 rb_erase_cached(&p->node, &sched_engine->queue);
3183 i915_priolist_free(p);
3184 }
3185
3186
3187 list_for_each_entry(rq, &sched_engine->hold, sched.link)
3188 i915_request_put(i915_request_mark_eio(rq));
3189
3190
3191 while ((rb = rb_first_cached(&execlists->virtual))) {
3192 struct virtual_engine *ve =
3193 rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
3194
3195 rb_erase_cached(rb, &execlists->virtual);
3196 RB_CLEAR_NODE(rb);
3197
3198 spin_lock(&ve->base.sched_engine->lock);
3199 rq = fetch_and_zero(&ve->request);
3200 if (rq) {
3201 if (i915_request_mark_eio(rq)) {
3202 rq->engine = engine;
3203 __i915_request_submit(rq);
3204 i915_request_put(rq);
3205 }
3206 i915_request_put(rq);
3207
3208 ve->base.sched_engine->queue_priority_hint = INT_MIN;
3209 }
3210 spin_unlock(&ve->base.sched_engine->lock);
3211 }
3212
3213
3214
3215 sched_engine->queue_priority_hint = INT_MIN;
3216 sched_engine->queue = RB_ROOT_CACHED;
3217
3218 GEM_BUG_ON(__tasklet_is_enabled(&engine->sched_engine->tasklet));
3219 engine->sched_engine->tasklet.callback = nop_submission_tasklet;
3220
3221 spin_unlock_irqrestore(&engine->sched_engine->lock, flags);
3222 rcu_read_unlock();
3223 }
3224
3225 static void execlists_reset_finish(struct intel_engine_cs *engine)
3226 {
3227 struct intel_engine_execlists * const execlists = &engine->execlists;
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239 GEM_BUG_ON(!reset_in_progress(engine));
3240
3241
3242 if (__tasklet_enable(&engine->sched_engine->tasklet))
3243 __execlists_kick(execlists);
3244
3245 ENGINE_TRACE(engine, "depth->%d\n",
3246 atomic_read(&engine->sched_engine->tasklet.count));
3247 }
3248
3249 static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine)
3250 {
3251 ENGINE_WRITE(engine, RING_IMR,
3252 ~(engine->irq_enable_mask | engine->irq_keep_mask));
3253 ENGINE_POSTING_READ(engine, RING_IMR);
3254 }
3255
3256 static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine)
3257 {
3258 ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
3259 }
3260
3261 static void execlists_park(struct intel_engine_cs *engine)
3262 {
3263 cancel_timer(&engine->execlists.timer);
3264 cancel_timer(&engine->execlists.preempt);
3265 }
3266
3267 static void add_to_engine(struct i915_request *rq)
3268 {
3269 lockdep_assert_held(&rq->engine->sched_engine->lock);
3270 list_move_tail(&rq->sched.link, &rq->engine->sched_engine->requests);
3271 }
3272
3273 static void remove_from_engine(struct i915_request *rq)
3274 {
3275 struct intel_engine_cs *engine, *locked;
3276
3277
3278
3279
3280
3281
3282
3283 locked = READ_ONCE(rq->engine);
3284 spin_lock_irq(&locked->sched_engine->lock);
3285 while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) {
3286 spin_unlock(&locked->sched_engine->lock);
3287 spin_lock(&engine->sched_engine->lock);
3288 locked = engine;
3289 }
3290 list_del_init(&rq->sched.link);
3291
3292 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
3293 clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags);
3294
3295
3296 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
3297
3298 spin_unlock_irq(&locked->sched_engine->lock);
3299
3300 i915_request_notify_execute_cb_imm(rq);
3301 }
3302
3303 static bool can_preempt(struct intel_engine_cs *engine)
3304 {
3305 if (GRAPHICS_VER(engine->i915) > 8)
3306 return true;
3307
3308
3309 return engine->class != RENDER_CLASS;
3310 }
3311
3312 static void kick_execlists(const struct i915_request *rq, int prio)
3313 {
3314 struct intel_engine_cs *engine = rq->engine;
3315 struct i915_sched_engine *sched_engine = engine->sched_engine;
3316 const struct i915_request *inflight;
3317
3318
3319
3320
3321
3322 if (prio <= sched_engine->queue_priority_hint)
3323 return;
3324
3325 rcu_read_lock();
3326
3327
3328 inflight = execlists_active(&engine->execlists);
3329 if (!inflight)
3330 goto unlock;
3331
3332
3333
3334
3335
3336 if (inflight->context == rq->context)
3337 goto unlock;
3338
3339 ENGINE_TRACE(engine,
3340 "bumping queue-priority-hint:%d for rq:%llx:%lld, inflight:%llx:%lld prio %d\n",
3341 prio,
3342 rq->fence.context, rq->fence.seqno,
3343 inflight->fence.context, inflight->fence.seqno,
3344 inflight->sched.attr.priority);
3345
3346 sched_engine->queue_priority_hint = prio;
3347
3348
3349
3350
3351
3352
3353
3354
3355 if (prio >= max(I915_PRIORITY_NORMAL, rq_prio(inflight)))
3356 tasklet_hi_schedule(&sched_engine->tasklet);
3357
3358 unlock:
3359 rcu_read_unlock();
3360 }
3361
3362 static void execlists_set_default_submission(struct intel_engine_cs *engine)
3363 {
3364 engine->submit_request = execlists_submit_request;
3365 engine->sched_engine->schedule = i915_schedule;
3366 engine->sched_engine->kick_backend = kick_execlists;
3367 engine->sched_engine->tasklet.callback = execlists_submission_tasklet;
3368 }
3369
3370 static void execlists_shutdown(struct intel_engine_cs *engine)
3371 {
3372
3373 del_timer_sync(&engine->execlists.timer);
3374 del_timer_sync(&engine->execlists.preempt);
3375 tasklet_kill(&engine->sched_engine->tasklet);
3376 }
3377
3378 static void execlists_release(struct intel_engine_cs *engine)
3379 {
3380 engine->sanitize = NULL;
3381
3382 execlists_shutdown(engine);
3383
3384 intel_engine_cleanup_common(engine);
3385 lrc_fini_wa_ctx(engine);
3386 }
3387
3388 static ktime_t __execlists_engine_busyness(struct intel_engine_cs *engine,
3389 ktime_t *now)
3390 {
3391 struct intel_engine_execlists_stats *stats = &engine->stats.execlists;
3392 ktime_t total = stats->total;
3393
3394
3395
3396
3397
3398 *now = ktime_get();
3399 if (READ_ONCE(stats->active))
3400 total = ktime_add(total, ktime_sub(*now, stats->start));
3401
3402 return total;
3403 }
3404
3405 static ktime_t execlists_engine_busyness(struct intel_engine_cs *engine,
3406 ktime_t *now)
3407 {
3408 struct intel_engine_execlists_stats *stats = &engine->stats.execlists;
3409 unsigned int seq;
3410 ktime_t total;
3411
3412 do {
3413 seq = read_seqcount_begin(&stats->lock);
3414 total = __execlists_engine_busyness(engine, now);
3415 } while (read_seqcount_retry(&stats->lock, seq));
3416
3417 return total;
3418 }
3419
3420 static void
3421 logical_ring_default_vfuncs(struct intel_engine_cs *engine)
3422 {
3423
3424
3425 engine->resume = execlists_resume;
3426
3427 engine->cops = &execlists_context_ops;
3428 engine->request_alloc = execlists_request_alloc;
3429 engine->add_active_request = add_to_engine;
3430 engine->remove_active_request = remove_from_engine;
3431
3432 engine->reset.prepare = execlists_reset_prepare;
3433 engine->reset.rewind = execlists_reset_rewind;
3434 engine->reset.cancel = execlists_reset_cancel;
3435 engine->reset.finish = execlists_reset_finish;
3436
3437 engine->park = execlists_park;
3438 engine->unpark = NULL;
3439
3440 engine->emit_flush = gen8_emit_flush_xcs;
3441 engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
3442 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs;
3443 if (GRAPHICS_VER(engine->i915) >= 12) {
3444 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_xcs;
3445 engine->emit_flush = gen12_emit_flush_xcs;
3446 }
3447 engine->set_default_submission = execlists_set_default_submission;
3448
3449 if (GRAPHICS_VER(engine->i915) < 11) {
3450 engine->irq_enable = gen8_logical_ring_enable_irq;
3451 engine->irq_disable = gen8_logical_ring_disable_irq;
3452 } else {
3453
3454
3455
3456
3457
3458
3459 }
3460 intel_engine_set_irq_handler(engine, execlists_irq_handler);
3461
3462 engine->flags |= I915_ENGINE_SUPPORTS_STATS;
3463 if (!intel_vgpu_active(engine->i915)) {
3464 engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
3465 if (can_preempt(engine)) {
3466 engine->flags |= I915_ENGINE_HAS_PREEMPTION;
3467 if (CONFIG_DRM_I915_TIMESLICE_DURATION)
3468 engine->flags |= I915_ENGINE_HAS_TIMESLICES;
3469 }
3470 }
3471
3472 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) {
3473 if (intel_engine_has_preemption(engine))
3474 engine->emit_bb_start = gen125_emit_bb_start;
3475 else
3476 engine->emit_bb_start = gen125_emit_bb_start_noarb;
3477 } else {
3478 if (intel_engine_has_preemption(engine))
3479 engine->emit_bb_start = gen8_emit_bb_start;
3480 else
3481 engine->emit_bb_start = gen8_emit_bb_start_noarb;
3482 }
3483
3484 engine->busyness = execlists_engine_busyness;
3485 }
3486
3487 static void logical_ring_default_irqs(struct intel_engine_cs *engine)
3488 {
3489 unsigned int shift = 0;
3490
3491 if (GRAPHICS_VER(engine->i915) < 11) {
3492 const u8 irq_shifts[] = {
3493 [RCS0] = GEN8_RCS_IRQ_SHIFT,
3494 [BCS0] = GEN8_BCS_IRQ_SHIFT,
3495 [VCS0] = GEN8_VCS0_IRQ_SHIFT,
3496 [VCS1] = GEN8_VCS1_IRQ_SHIFT,
3497 [VECS0] = GEN8_VECS_IRQ_SHIFT,
3498 };
3499
3500 shift = irq_shifts[engine->id];
3501 }
3502
3503 engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
3504 engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
3505 engine->irq_keep_mask |= GT_CS_MASTER_ERROR_INTERRUPT << shift;
3506 engine->irq_keep_mask |= GT_WAIT_SEMAPHORE_INTERRUPT << shift;
3507 }
3508
3509 static void rcs_submission_override(struct intel_engine_cs *engine)
3510 {
3511 switch (GRAPHICS_VER(engine->i915)) {
3512 case 12:
3513 engine->emit_flush = gen12_emit_flush_rcs;
3514 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
3515 break;
3516 case 11:
3517 engine->emit_flush = gen11_emit_flush_rcs;
3518 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
3519 break;
3520 default:
3521 engine->emit_flush = gen8_emit_flush_rcs;
3522 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
3523 break;
3524 }
3525 }
3526
3527 int intel_execlists_submission_setup(struct intel_engine_cs *engine)
3528 {
3529 struct intel_engine_execlists * const execlists = &engine->execlists;
3530 struct drm_i915_private *i915 = engine->i915;
3531 struct intel_uncore *uncore = engine->uncore;
3532 u32 base = engine->mmio_base;
3533
3534 tasklet_setup(&engine->sched_engine->tasklet, execlists_submission_tasklet);
3535 timer_setup(&engine->execlists.timer, execlists_timeslice, 0);
3536 timer_setup(&engine->execlists.preempt, execlists_preempt, 0);
3537
3538 logical_ring_default_vfuncs(engine);
3539 logical_ring_default_irqs(engine);
3540
3541 if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE)
3542 rcs_submission_override(engine);
3543
3544 lrc_init_wa_ctx(engine);
3545
3546 if (HAS_LOGICAL_RING_ELSQ(i915)) {
3547 execlists->submit_reg = uncore->regs +
3548 i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base));
3549 execlists->ctrl_reg = uncore->regs +
3550 i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base));
3551
3552 engine->fw_domain = intel_uncore_forcewake_for_reg(engine->uncore,
3553 RING_EXECLIST_CONTROL(engine->mmio_base),
3554 FW_REG_WRITE);
3555 } else {
3556 execlists->submit_reg = uncore->regs +
3557 i915_mmio_reg_offset(RING_ELSP(base));
3558 }
3559
3560 execlists->csb_status =
3561 (u64 *)&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
3562
3563 execlists->csb_write =
3564 &engine->status_page.addr[INTEL_HWS_CSB_WRITE_INDEX(i915)];
3565
3566 if (GRAPHICS_VER(i915) < 11)
3567 execlists->csb_size = GEN8_CSB_ENTRIES;
3568 else
3569 execlists->csb_size = GEN11_CSB_ENTRIES;
3570
3571 engine->context_tag = GENMASK(BITS_PER_LONG - 2, 0);
3572 if (GRAPHICS_VER(engine->i915) >= 11 &&
3573 GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 50)) {
3574 execlists->ccid |= engine->instance << (GEN11_ENGINE_INSTANCE_SHIFT - 32);
3575 execlists->ccid |= engine->class << (GEN11_ENGINE_CLASS_SHIFT - 32);
3576 }
3577
3578
3579 engine->sanitize = execlists_sanitize;
3580 engine->release = execlists_release;
3581
3582 return 0;
3583 }
3584
3585 static struct list_head *virtual_queue(struct virtual_engine *ve)
3586 {
3587 return &ve->base.sched_engine->default_priolist.requests;
3588 }
3589
3590 static void rcu_virtual_context_destroy(struct work_struct *wrk)
3591 {
3592 struct virtual_engine *ve =
3593 container_of(wrk, typeof(*ve), rcu.work);
3594 unsigned int n;
3595
3596 GEM_BUG_ON(ve->context.inflight);
3597
3598
3599 if (unlikely(ve->request)) {
3600 struct i915_request *old;
3601
3602 spin_lock_irq(&ve->base.sched_engine->lock);
3603
3604 old = fetch_and_zero(&ve->request);
3605 if (old) {
3606 GEM_BUG_ON(!__i915_request_is_complete(old));
3607 __i915_request_submit(old);
3608 i915_request_put(old);
3609 }
3610
3611 spin_unlock_irq(&ve->base.sched_engine->lock);
3612 }
3613
3614
3615
3616
3617
3618
3619
3620
3621 tasklet_kill(&ve->base.sched_engine->tasklet);
3622
3623
3624 for (n = 0; n < ve->num_siblings; n++) {
3625 struct intel_engine_cs *sibling = ve->siblings[n];
3626 struct rb_node *node = &ve->nodes[sibling->id].rb;
3627
3628 if (RB_EMPTY_NODE(node))
3629 continue;
3630
3631 spin_lock_irq(&sibling->sched_engine->lock);
3632
3633
3634 if (!RB_EMPTY_NODE(node))
3635 rb_erase_cached(node, &sibling->execlists.virtual);
3636
3637 spin_unlock_irq(&sibling->sched_engine->lock);
3638 }
3639 GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.sched_engine->tasklet));
3640 GEM_BUG_ON(!list_empty(virtual_queue(ve)));
3641
3642 lrc_fini(&ve->context);
3643 intel_context_fini(&ve->context);
3644
3645 if (ve->base.breadcrumbs)
3646 intel_breadcrumbs_put(ve->base.breadcrumbs);
3647 if (ve->base.sched_engine)
3648 i915_sched_engine_put(ve->base.sched_engine);
3649 intel_engine_free_request_pool(&ve->base);
3650
3651 kfree(ve);
3652 }
3653
3654 static void virtual_context_destroy(struct kref *kref)
3655 {
3656 struct virtual_engine *ve =
3657 container_of(kref, typeof(*ve), context.ref);
3658
3659 GEM_BUG_ON(!list_empty(&ve->context.signals));
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671 INIT_RCU_WORK(&ve->rcu, rcu_virtual_context_destroy);
3672 queue_rcu_work(system_wq, &ve->rcu);
3673 }
3674
3675 static void virtual_engine_initial_hint(struct virtual_engine *ve)
3676 {
3677 int swp;
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692 swp = prandom_u32_max(ve->num_siblings);
3693 if (swp)
3694 swap(ve->siblings[swp], ve->siblings[0]);
3695 }
3696
3697 static int virtual_context_alloc(struct intel_context *ce)
3698 {
3699 struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
3700
3701 return lrc_alloc(ce, ve->siblings[0]);
3702 }
3703
3704 static int virtual_context_pre_pin(struct intel_context *ce,
3705 struct i915_gem_ww_ctx *ww,
3706 void **vaddr)
3707 {
3708 struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
3709
3710
3711 return __execlists_context_pre_pin(ce, ve->siblings[0], ww, vaddr);
3712 }
3713
3714 static int virtual_context_pin(struct intel_context *ce, void *vaddr)
3715 {
3716 struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
3717
3718 return lrc_pin(ce, ve->siblings[0], vaddr);
3719 }
3720
3721 static void virtual_context_enter(struct intel_context *ce)
3722 {
3723 struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
3724 unsigned int n;
3725
3726 for (n = 0; n < ve->num_siblings; n++)
3727 intel_engine_pm_get(ve->siblings[n]);
3728
3729 intel_timeline_enter(ce->timeline);
3730 }
3731
3732 static void virtual_context_exit(struct intel_context *ce)
3733 {
3734 struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
3735 unsigned int n;
3736
3737 intel_timeline_exit(ce->timeline);
3738
3739 for (n = 0; n < ve->num_siblings; n++)
3740 intel_engine_pm_put(ve->siblings[n]);
3741 }
3742
3743 static struct intel_engine_cs *
3744 virtual_get_sibling(struct intel_engine_cs *engine, unsigned int sibling)
3745 {
3746 struct virtual_engine *ve = to_virtual_engine(engine);
3747
3748 if (sibling >= ve->num_siblings)
3749 return NULL;
3750
3751 return ve->siblings[sibling];
3752 }
3753
3754 static const struct intel_context_ops virtual_context_ops = {
3755 .flags = COPS_HAS_INFLIGHT | COPS_RUNTIME_CYCLES,
3756
3757 .alloc = virtual_context_alloc,
3758
3759 .cancel_request = execlists_context_cancel_request,
3760
3761 .pre_pin = virtual_context_pre_pin,
3762 .pin = virtual_context_pin,
3763 .unpin = lrc_unpin,
3764 .post_unpin = lrc_post_unpin,
3765
3766 .enter = virtual_context_enter,
3767 .exit = virtual_context_exit,
3768
3769 .destroy = virtual_context_destroy,
3770
3771 .get_sibling = virtual_get_sibling,
3772 };
3773
3774 static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
3775 {
3776 struct i915_request *rq;
3777 intel_engine_mask_t mask;
3778
3779 rq = READ_ONCE(ve->request);
3780 if (!rq)
3781 return 0;
3782
3783
3784 mask = rq->execution_mask;
3785 if (unlikely(!mask)) {
3786
3787 i915_request_set_error_once(rq, -ENODEV);
3788 mask = ve->siblings[0]->mask;
3789 }
3790
3791 ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n",
3792 rq->fence.context, rq->fence.seqno,
3793 mask, ve->base.sched_engine->queue_priority_hint);
3794
3795 return mask;
3796 }
3797
3798 static void virtual_submission_tasklet(struct tasklet_struct *t)
3799 {
3800 struct i915_sched_engine *sched_engine =
3801 from_tasklet(sched_engine, t, tasklet);
3802 struct virtual_engine * const ve =
3803 (struct virtual_engine *)sched_engine->private_data;
3804 const int prio = READ_ONCE(sched_engine->queue_priority_hint);
3805 intel_engine_mask_t mask;
3806 unsigned int n;
3807
3808 rcu_read_lock();
3809 mask = virtual_submission_mask(ve);
3810 rcu_read_unlock();
3811 if (unlikely(!mask))
3812 return;
3813
3814 for (n = 0; n < ve->num_siblings; n++) {
3815 struct intel_engine_cs *sibling = READ_ONCE(ve->siblings[n]);
3816 struct ve_node * const node = &ve->nodes[sibling->id];
3817 struct rb_node **parent, *rb;
3818 bool first;
3819
3820 if (!READ_ONCE(ve->request))
3821 break;
3822
3823 spin_lock_irq(&sibling->sched_engine->lock);
3824
3825 if (unlikely(!(mask & sibling->mask))) {
3826 if (!RB_EMPTY_NODE(&node->rb)) {
3827 rb_erase_cached(&node->rb,
3828 &sibling->execlists.virtual);
3829 RB_CLEAR_NODE(&node->rb);
3830 }
3831
3832 goto unlock_engine;
3833 }
3834
3835 if (unlikely(!RB_EMPTY_NODE(&node->rb))) {
3836
3837
3838
3839
3840 first = rb_first_cached(&sibling->execlists.virtual) ==
3841 &node->rb;
3842 if (prio == node->prio || (prio > node->prio && first))
3843 goto submit_engine;
3844
3845 rb_erase_cached(&node->rb, &sibling->execlists.virtual);
3846 }
3847
3848 rb = NULL;
3849 first = true;
3850 parent = &sibling->execlists.virtual.rb_root.rb_node;
3851 while (*parent) {
3852 struct ve_node *other;
3853
3854 rb = *parent;
3855 other = rb_entry(rb, typeof(*other), rb);
3856 if (prio > other->prio) {
3857 parent = &rb->rb_left;
3858 } else {
3859 parent = &rb->rb_right;
3860 first = false;
3861 }
3862 }
3863
3864 rb_link_node(&node->rb, rb, parent);
3865 rb_insert_color_cached(&node->rb,
3866 &sibling->execlists.virtual,
3867 first);
3868
3869 submit_engine:
3870 GEM_BUG_ON(RB_EMPTY_NODE(&node->rb));
3871 node->prio = prio;
3872 if (first && prio > sibling->sched_engine->queue_priority_hint)
3873 tasklet_hi_schedule(&sibling->sched_engine->tasklet);
3874
3875 unlock_engine:
3876 spin_unlock_irq(&sibling->sched_engine->lock);
3877
3878 if (intel_context_inflight(&ve->context))
3879 break;
3880 }
3881 }
3882
3883 static void virtual_submit_request(struct i915_request *rq)
3884 {
3885 struct virtual_engine *ve = to_virtual_engine(rq->engine);
3886 unsigned long flags;
3887
3888 ENGINE_TRACE(&ve->base, "rq=%llx:%lld\n",
3889 rq->fence.context,
3890 rq->fence.seqno);
3891
3892 GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
3893
3894 spin_lock_irqsave(&ve->base.sched_engine->lock, flags);
3895
3896
3897 if (__i915_request_is_complete(rq)) {
3898 __i915_request_submit(rq);
3899 goto unlock;
3900 }
3901
3902 if (ve->request) {
3903 GEM_BUG_ON(!__i915_request_is_complete(ve->request));
3904 __i915_request_submit(ve->request);
3905 i915_request_put(ve->request);
3906 }
3907
3908 ve->base.sched_engine->queue_priority_hint = rq_prio(rq);
3909 ve->request = i915_request_get(rq);
3910
3911 GEM_BUG_ON(!list_empty(virtual_queue(ve)));
3912 list_move_tail(&rq->sched.link, virtual_queue(ve));
3913
3914 tasklet_hi_schedule(&ve->base.sched_engine->tasklet);
3915
3916 unlock:
3917 spin_unlock_irqrestore(&ve->base.sched_engine->lock, flags);
3918 }
3919
3920 static struct intel_context *
3921 execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
3922 unsigned long flags)
3923 {
3924 struct virtual_engine *ve;
3925 unsigned int n;
3926 int err;
3927
3928 ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL);
3929 if (!ve)
3930 return ERR_PTR(-ENOMEM);
3931
3932 ve->base.i915 = siblings[0]->i915;
3933 ve->base.gt = siblings[0]->gt;
3934 ve->base.uncore = siblings[0]->uncore;
3935 ve->base.id = -1;
3936
3937 ve->base.class = OTHER_CLASS;
3938 ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
3939 ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
3940 ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955 ve->base.saturated = ALL_ENGINES;
3956
3957 snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
3958
3959 intel_engine_init_execlists(&ve->base);
3960
3961 ve->base.sched_engine = i915_sched_engine_create(ENGINE_VIRTUAL);
3962 if (!ve->base.sched_engine) {
3963 err = -ENOMEM;
3964 goto err_put;
3965 }
3966 ve->base.sched_engine->private_data = &ve->base;
3967
3968 ve->base.cops = &virtual_context_ops;
3969 ve->base.request_alloc = execlists_request_alloc;
3970
3971 ve->base.sched_engine->schedule = i915_schedule;
3972 ve->base.sched_engine->kick_backend = kick_execlists;
3973 ve->base.submit_request = virtual_submit_request;
3974
3975 INIT_LIST_HEAD(virtual_queue(ve));
3976 tasklet_setup(&ve->base.sched_engine->tasklet, virtual_submission_tasklet);
3977
3978 intel_context_init(&ve->context, &ve->base);
3979
3980 ve->base.breadcrumbs = intel_breadcrumbs_create(NULL);
3981 if (!ve->base.breadcrumbs) {
3982 err = -ENOMEM;
3983 goto err_put;
3984 }
3985
3986 for (n = 0; n < count; n++) {
3987 struct intel_engine_cs *sibling = siblings[n];
3988
3989 GEM_BUG_ON(!is_power_of_2(sibling->mask));
3990 if (sibling->mask & ve->base.mask) {
3991 DRM_DEBUG("duplicate %s entry in load balancer\n",
3992 sibling->name);
3993 err = -EINVAL;
3994 goto err_put;
3995 }
3996
3997
3998
3999
4000
4001
4002
4003
4004 if (sibling->sched_engine->tasklet.callback !=
4005 execlists_submission_tasklet) {
4006 err = -ENODEV;
4007 goto err_put;
4008 }
4009
4010 GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb));
4011 RB_CLEAR_NODE(&ve->nodes[sibling->id].rb);
4012
4013 ve->siblings[ve->num_siblings++] = sibling;
4014 ve->base.mask |= sibling->mask;
4015 ve->base.logical_mask |= sibling->logical_mask;
4016
4017
4018
4019
4020
4021
4022
4023
4024 if (ve->base.class != OTHER_CLASS) {
4025 if (ve->base.class != sibling->class) {
4026 DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
4027 sibling->class, ve->base.class);
4028 err = -EINVAL;
4029 goto err_put;
4030 }
4031 continue;
4032 }
4033
4034 ve->base.class = sibling->class;
4035 ve->base.uabi_class = sibling->uabi_class;
4036 snprintf(ve->base.name, sizeof(ve->base.name),
4037 "v%dx%d", ve->base.class, count);
4038 ve->base.context_size = sibling->context_size;
4039
4040 ve->base.add_active_request = sibling->add_active_request;
4041 ve->base.remove_active_request = sibling->remove_active_request;
4042 ve->base.emit_bb_start = sibling->emit_bb_start;
4043 ve->base.emit_flush = sibling->emit_flush;
4044 ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb;
4045 ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb;
4046 ve->base.emit_fini_breadcrumb_dw =
4047 sibling->emit_fini_breadcrumb_dw;
4048
4049 ve->base.flags = sibling->flags;
4050 }
4051
4052 ve->base.flags |= I915_ENGINE_IS_VIRTUAL;
4053
4054 virtual_engine_initial_hint(ve);
4055 return &ve->context;
4056
4057 err_put:
4058 intel_context_put(&ve->context);
4059 return ERR_PTR(err);
4060 }
4061
4062 void intel_execlists_show_requests(struct intel_engine_cs *engine,
4063 struct drm_printer *m,
4064 void (*show_request)(struct drm_printer *m,
4065 const struct i915_request *rq,
4066 const char *prefix,
4067 int indent),
4068 unsigned int max)
4069 {
4070 const struct intel_engine_execlists *execlists = &engine->execlists;
4071 struct i915_sched_engine *sched_engine = engine->sched_engine;
4072 struct i915_request *rq, *last;
4073 unsigned long flags;
4074 unsigned int count;
4075 struct rb_node *rb;
4076
4077 spin_lock_irqsave(&sched_engine->lock, flags);
4078
4079 last = NULL;
4080 count = 0;
4081 list_for_each_entry(rq, &sched_engine->requests, sched.link) {
4082 if (count++ < max - 1)
4083 show_request(m, rq, "\t\t", 0);
4084 else
4085 last = rq;
4086 }
4087 if (last) {
4088 if (count > max) {
4089 drm_printf(m,
4090 "\t\t...skipping %d executing requests...\n",
4091 count - max);
4092 }
4093 show_request(m, last, "\t\t", 0);
4094 }
4095
4096 if (sched_engine->queue_priority_hint != INT_MIN)
4097 drm_printf(m, "\t\tQueue priority hint: %d\n",
4098 READ_ONCE(sched_engine->queue_priority_hint));
4099
4100 last = NULL;
4101 count = 0;
4102 for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) {
4103 struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
4104
4105 priolist_for_each_request(rq, p) {
4106 if (count++ < max - 1)
4107 show_request(m, rq, "\t\t", 0);
4108 else
4109 last = rq;
4110 }
4111 }
4112 if (last) {
4113 if (count > max) {
4114 drm_printf(m,
4115 "\t\t...skipping %d queued requests...\n",
4116 count - max);
4117 }
4118 show_request(m, last, "\t\t", 0);
4119 }
4120
4121 last = NULL;
4122 count = 0;
4123 for (rb = rb_first_cached(&execlists->virtual); rb; rb = rb_next(rb)) {
4124 struct virtual_engine *ve =
4125 rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
4126 struct i915_request *rq = READ_ONCE(ve->request);
4127
4128 if (rq) {
4129 if (count++ < max - 1)
4130 show_request(m, rq, "\t\t", 0);
4131 else
4132 last = rq;
4133 }
4134 }
4135 if (last) {
4136 if (count > max) {
4137 drm_printf(m,
4138 "\t\t...skipping %d virtual requests...\n",
4139 count - max);
4140 }
4141 show_request(m, last, "\t\t", 0);
4142 }
4143
4144 spin_unlock_irqrestore(&sched_engine->lock, flags);
4145 }
4146
4147 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
4148 #include "selftest_execlists.c"
4149 #endif