Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: MIT
0002 /*
0003  * Copyright © 2015-2021 Intel Corporation
0004  */
0005 
0006 #include <linux/kthread.h>
0007 #include <linux/string_helpers.h>
0008 #include <trace/events/dma_fence.h>
0009 #include <uapi/linux/sched/types.h>
0010 
0011 #include "i915_drv.h"
0012 #include "i915_trace.h"
0013 #include "intel_breadcrumbs.h"
0014 #include "intel_context.h"
0015 #include "intel_engine_pm.h"
0016 #include "intel_gt_pm.h"
0017 #include "intel_gt_requests.h"
0018 
0019 static bool irq_enable(struct intel_breadcrumbs *b)
0020 {
0021     return intel_engine_irq_enable(b->irq_engine);
0022 }
0023 
0024 static void irq_disable(struct intel_breadcrumbs *b)
0025 {
0026     intel_engine_irq_disable(b->irq_engine);
0027 }
0028 
0029 static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
0030 {
0031     /*
0032      * Since we are waiting on a request, the GPU should be busy
0033      * and should have its own rpm reference.
0034      */
0035     if (GEM_WARN_ON(!intel_gt_pm_get_if_awake(b->irq_engine->gt)))
0036         return;
0037 
0038     /*
0039      * The breadcrumb irq will be disarmed on the interrupt after the
0040      * waiters are signaled. This gives us a single interrupt window in
0041      * which we can add a new waiter and avoid the cost of re-enabling
0042      * the irq.
0043      */
0044     WRITE_ONCE(b->irq_armed, true);
0045 
0046     /* Requests may have completed before we could enable the interrupt. */
0047     if (!b->irq_enabled++ && b->irq_enable(b))
0048         irq_work_queue(&b->irq_work);
0049 }
0050 
0051 static void intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
0052 {
0053     if (!b->irq_engine)
0054         return;
0055 
0056     spin_lock(&b->irq_lock);
0057     if (!b->irq_armed)
0058         __intel_breadcrumbs_arm_irq(b);
0059     spin_unlock(&b->irq_lock);
0060 }
0061 
0062 static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
0063 {
0064     GEM_BUG_ON(!b->irq_enabled);
0065     if (!--b->irq_enabled)
0066         b->irq_disable(b);
0067 
0068     WRITE_ONCE(b->irq_armed, false);
0069     intel_gt_pm_put_async(b->irq_engine->gt);
0070 }
0071 
0072 static void intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
0073 {
0074     spin_lock(&b->irq_lock);
0075     if (b->irq_armed)
0076         __intel_breadcrumbs_disarm_irq(b);
0077     spin_unlock(&b->irq_lock);
0078 }
0079 
0080 static void add_signaling_context(struct intel_breadcrumbs *b,
0081                   struct intel_context *ce)
0082 {
0083     lockdep_assert_held(&ce->signal_lock);
0084 
0085     spin_lock(&b->signalers_lock);
0086     list_add_rcu(&ce->signal_link, &b->signalers);
0087     spin_unlock(&b->signalers_lock);
0088 }
0089 
0090 static bool remove_signaling_context(struct intel_breadcrumbs *b,
0091                      struct intel_context *ce)
0092 {
0093     lockdep_assert_held(&ce->signal_lock);
0094 
0095     if (!list_empty(&ce->signals))
0096         return false;
0097 
0098     spin_lock(&b->signalers_lock);
0099     list_del_rcu(&ce->signal_link);
0100     spin_unlock(&b->signalers_lock);
0101 
0102     return true;
0103 }
0104 
0105 __maybe_unused static bool
0106 check_signal_order(struct intel_context *ce, struct i915_request *rq)
0107 {
0108     if (rq->context != ce)
0109         return false;
0110 
0111     if (!list_is_last(&rq->signal_link, &ce->signals) &&
0112         i915_seqno_passed(rq->fence.seqno,
0113                   list_next_entry(rq, signal_link)->fence.seqno))
0114         return false;
0115 
0116     if (!list_is_first(&rq->signal_link, &ce->signals) &&
0117         i915_seqno_passed(list_prev_entry(rq, signal_link)->fence.seqno,
0118                   rq->fence.seqno))
0119         return false;
0120 
0121     return true;
0122 }
0123 
0124 static bool
0125 __dma_fence_signal(struct dma_fence *fence)
0126 {
0127     return !test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags);
0128 }
0129 
0130 static void
0131 __dma_fence_signal__timestamp(struct dma_fence *fence, ktime_t timestamp)
0132 {
0133     fence->timestamp = timestamp;
0134     set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags);
0135     trace_dma_fence_signaled(fence);
0136 }
0137 
0138 static void
0139 __dma_fence_signal__notify(struct dma_fence *fence,
0140                const struct list_head *list)
0141 {
0142     struct dma_fence_cb *cur, *tmp;
0143 
0144     lockdep_assert_held(fence->lock);
0145 
0146     list_for_each_entry_safe(cur, tmp, list, node) {
0147         INIT_LIST_HEAD(&cur->node);
0148         cur->func(fence, cur);
0149     }
0150 }
0151 
0152 static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl)
0153 {
0154     if (b->irq_engine)
0155         intel_engine_add_retire(b->irq_engine, tl);
0156 }
0157 
0158 static struct llist_node *
0159 slist_add(struct llist_node *node, struct llist_node *head)
0160 {
0161     node->next = head;
0162     return node;
0163 }
0164 
0165 static void signal_irq_work(struct irq_work *work)
0166 {
0167     struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work);
0168     const ktime_t timestamp = ktime_get();
0169     struct llist_node *signal, *sn;
0170     struct intel_context *ce;
0171 
0172     signal = NULL;
0173     if (unlikely(!llist_empty(&b->signaled_requests)))
0174         signal = llist_del_all(&b->signaled_requests);
0175 
0176     /*
0177      * Keep the irq armed until the interrupt after all listeners are gone.
0178      *
0179      * Enabling/disabling the interrupt is rather costly, roughly a couple
0180      * of hundred microseconds. If we are proactive and enable/disable
0181      * the interrupt around every request that wants a breadcrumb, we
0182      * quickly drown in the extra orders of magnitude of latency imposed
0183      * on request submission.
0184      *
0185      * So we try to be lazy, and keep the interrupts enabled until no
0186      * more listeners appear within a breadcrumb interrupt interval (that
0187      * is until a request completes that no one cares about). The
0188      * observation is that listeners come in batches, and will often
0189      * listen to a bunch of requests in succession. Though note on icl+,
0190      * interrupts are always enabled due to concerns with rc6 being
0191      * dysfunctional with per-engine interrupt masking.
0192      *
0193      * We also try to avoid raising too many interrupts, as they may
0194      * be generated by userspace batches and it is unfortunately rather
0195      * too easy to drown the CPU under a flood of GPU interrupts. Thus
0196      * whenever no one appears to be listening, we turn off the interrupts.
0197      * Fewer interrupts should conserve power -- at the very least, fewer
0198      * interrupt draw less ire from other users of the system and tools
0199      * like powertop.
0200      */
0201     if (!signal && READ_ONCE(b->irq_armed) && list_empty(&b->signalers))
0202         intel_breadcrumbs_disarm_irq(b);
0203 
0204     rcu_read_lock();
0205     atomic_inc(&b->signaler_active);
0206     list_for_each_entry_rcu(ce, &b->signalers, signal_link) {
0207         struct i915_request *rq;
0208 
0209         list_for_each_entry_rcu(rq, &ce->signals, signal_link) {
0210             bool release;
0211 
0212             if (!__i915_request_is_complete(rq))
0213                 break;
0214 
0215             if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL,
0216                         &rq->fence.flags))
0217                 break;
0218 
0219             /*
0220              * Queue for execution after dropping the signaling
0221              * spinlock as the callback chain may end up adding
0222              * more signalers to the same context or engine.
0223              */
0224             spin_lock(&ce->signal_lock);
0225             list_del_rcu(&rq->signal_link);
0226             release = remove_signaling_context(b, ce);
0227             spin_unlock(&ce->signal_lock);
0228             if (release) {
0229                 if (intel_timeline_is_last(ce->timeline, rq))
0230                     add_retire(b, ce->timeline);
0231                 intel_context_put(ce);
0232             }
0233 
0234             if (__dma_fence_signal(&rq->fence))
0235                 /* We own signal_node now, xfer to local list */
0236                 signal = slist_add(&rq->signal_node, signal);
0237             else
0238                 i915_request_put(rq);
0239         }
0240     }
0241     atomic_dec(&b->signaler_active);
0242     rcu_read_unlock();
0243 
0244     llist_for_each_safe(signal, sn, signal) {
0245         struct i915_request *rq =
0246             llist_entry(signal, typeof(*rq), signal_node);
0247         struct list_head cb_list;
0248 
0249         if (rq->engine->sched_engine->retire_inflight_request_prio)
0250             rq->engine->sched_engine->retire_inflight_request_prio(rq);
0251 
0252         spin_lock(&rq->lock);
0253         list_replace(&rq->fence.cb_list, &cb_list);
0254         __dma_fence_signal__timestamp(&rq->fence, timestamp);
0255         __dma_fence_signal__notify(&rq->fence, &cb_list);
0256         spin_unlock(&rq->lock);
0257 
0258         i915_request_put(rq);
0259     }
0260 
0261     if (!READ_ONCE(b->irq_armed) && !list_empty(&b->signalers))
0262         intel_breadcrumbs_arm_irq(b);
0263 }
0264 
0265 struct intel_breadcrumbs *
0266 intel_breadcrumbs_create(struct intel_engine_cs *irq_engine)
0267 {
0268     struct intel_breadcrumbs *b;
0269 
0270     b = kzalloc(sizeof(*b), GFP_KERNEL);
0271     if (!b)
0272         return NULL;
0273 
0274     kref_init(&b->ref);
0275 
0276     spin_lock_init(&b->signalers_lock);
0277     INIT_LIST_HEAD(&b->signalers);
0278     init_llist_head(&b->signaled_requests);
0279 
0280     spin_lock_init(&b->irq_lock);
0281     init_irq_work(&b->irq_work, signal_irq_work);
0282 
0283     b->irq_engine = irq_engine;
0284     b->irq_enable = irq_enable;
0285     b->irq_disable = irq_disable;
0286 
0287     return b;
0288 }
0289 
0290 void intel_breadcrumbs_reset(struct intel_breadcrumbs *b)
0291 {
0292     unsigned long flags;
0293 
0294     if (!b->irq_engine)
0295         return;
0296 
0297     spin_lock_irqsave(&b->irq_lock, flags);
0298 
0299     if (b->irq_enabled)
0300         b->irq_enable(b);
0301     else
0302         b->irq_disable(b);
0303 
0304     spin_unlock_irqrestore(&b->irq_lock, flags);
0305 }
0306 
0307 void __intel_breadcrumbs_park(struct intel_breadcrumbs *b)
0308 {
0309     if (!READ_ONCE(b->irq_armed))
0310         return;
0311 
0312     /* Kick the work once more to drain the signalers, and disarm the irq */
0313     irq_work_sync(&b->irq_work);
0314     while (READ_ONCE(b->irq_armed) && !atomic_read(&b->active)) {
0315         local_irq_disable();
0316         signal_irq_work(&b->irq_work);
0317         local_irq_enable();
0318         cond_resched();
0319     }
0320 }
0321 
0322 void intel_breadcrumbs_free(struct kref *kref)
0323 {
0324     struct intel_breadcrumbs *b = container_of(kref, typeof(*b), ref);
0325 
0326     irq_work_sync(&b->irq_work);
0327     GEM_BUG_ON(!list_empty(&b->signalers));
0328     GEM_BUG_ON(b->irq_armed);
0329 
0330     kfree(b);
0331 }
0332 
0333 static void irq_signal_request(struct i915_request *rq,
0334                    struct intel_breadcrumbs *b)
0335 {
0336     if (!__dma_fence_signal(&rq->fence))
0337         return;
0338 
0339     i915_request_get(rq);
0340     if (llist_add(&rq->signal_node, &b->signaled_requests))
0341         irq_work_queue(&b->irq_work);
0342 }
0343 
0344 static void insert_breadcrumb(struct i915_request *rq)
0345 {
0346     struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs;
0347     struct intel_context *ce = rq->context;
0348     struct list_head *pos;
0349 
0350     if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
0351         return;
0352 
0353     /*
0354      * If the request is already completed, we can transfer it
0355      * straight onto a signaled list, and queue the irq worker for
0356      * its signal completion.
0357      */
0358     if (__i915_request_is_complete(rq)) {
0359         irq_signal_request(rq, b);
0360         return;
0361     }
0362 
0363     if (list_empty(&ce->signals)) {
0364         intel_context_get(ce);
0365         add_signaling_context(b, ce);
0366         pos = &ce->signals;
0367     } else {
0368         /*
0369          * We keep the seqno in retirement order, so we can break
0370          * inside intel_engine_signal_breadcrumbs as soon as we've
0371          * passed the last completed request (or seen a request that
0372          * hasn't event started). We could walk the timeline->requests,
0373          * but keeping a separate signalers_list has the advantage of
0374          * hopefully being much smaller than the full list and so
0375          * provides faster iteration and detection when there are no
0376          * more interrupts required for this context.
0377          *
0378          * We typically expect to add new signalers in order, so we
0379          * start looking for our insertion point from the tail of
0380          * the list.
0381          */
0382         list_for_each_prev(pos, &ce->signals) {
0383             struct i915_request *it =
0384                 list_entry(pos, typeof(*it), signal_link);
0385 
0386             if (i915_seqno_passed(rq->fence.seqno, it->fence.seqno))
0387                 break;
0388         }
0389     }
0390 
0391     i915_request_get(rq);
0392     list_add_rcu(&rq->signal_link, pos);
0393     GEM_BUG_ON(!check_signal_order(ce, rq));
0394     GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags));
0395     set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
0396 
0397     /*
0398      * Defer enabling the interrupt to after HW submission and recheck
0399      * the request as it may have completed and raised the interrupt as
0400      * we were attaching it into the lists.
0401      */
0402     if (!b->irq_armed || __i915_request_is_complete(rq))
0403         irq_work_queue(&b->irq_work);
0404 }
0405 
0406 bool i915_request_enable_breadcrumb(struct i915_request *rq)
0407 {
0408     struct intel_context *ce = rq->context;
0409 
0410     /* Serialises with i915_request_retire() using rq->lock */
0411     if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
0412         return true;
0413 
0414     /*
0415      * Peek at i915_request_submit()/i915_request_unsubmit() status.
0416      *
0417      * If the request is not yet active (and not signaled), we will
0418      * attach the breadcrumb later.
0419      */
0420     if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
0421         return true;
0422 
0423     spin_lock(&ce->signal_lock);
0424     if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
0425         insert_breadcrumb(rq);
0426     spin_unlock(&ce->signal_lock);
0427 
0428     return true;
0429 }
0430 
0431 void i915_request_cancel_breadcrumb(struct i915_request *rq)
0432 {
0433     struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs;
0434     struct intel_context *ce = rq->context;
0435     bool release;
0436 
0437     spin_lock(&ce->signal_lock);
0438     if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) {
0439         spin_unlock(&ce->signal_lock);
0440         return;
0441     }
0442 
0443     list_del_rcu(&rq->signal_link);
0444     release = remove_signaling_context(b, ce);
0445     spin_unlock(&ce->signal_lock);
0446     if (release)
0447         intel_context_put(ce);
0448 
0449     if (__i915_request_is_complete(rq))
0450         irq_signal_request(rq, b);
0451 
0452     i915_request_put(rq);
0453 }
0454 
0455 void intel_context_remove_breadcrumbs(struct intel_context *ce,
0456                       struct intel_breadcrumbs *b)
0457 {
0458     struct i915_request *rq, *rn;
0459     bool release = false;
0460     unsigned long flags;
0461 
0462     spin_lock_irqsave(&ce->signal_lock, flags);
0463 
0464     if (list_empty(&ce->signals))
0465         goto unlock;
0466 
0467     list_for_each_entry_safe(rq, rn, &ce->signals, signal_link) {
0468         GEM_BUG_ON(!__i915_request_is_complete(rq));
0469         if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL,
0470                     &rq->fence.flags))
0471             continue;
0472 
0473         list_del_rcu(&rq->signal_link);
0474         irq_signal_request(rq, b);
0475         i915_request_put(rq);
0476     }
0477     release = remove_signaling_context(b, ce);
0478 
0479 unlock:
0480     spin_unlock_irqrestore(&ce->signal_lock, flags);
0481     if (release)
0482         intel_context_put(ce);
0483 
0484     while (atomic_read(&b->signaler_active))
0485         cpu_relax();
0486 }
0487 
0488 static void print_signals(struct intel_breadcrumbs *b, struct drm_printer *p)
0489 {
0490     struct intel_context *ce;
0491     struct i915_request *rq;
0492 
0493     drm_printf(p, "Signals:\n");
0494 
0495     rcu_read_lock();
0496     list_for_each_entry_rcu(ce, &b->signalers, signal_link) {
0497         list_for_each_entry_rcu(rq, &ce->signals, signal_link)
0498             drm_printf(p, "\t[%llx:%llx%s] @ %dms\n",
0499                    rq->fence.context, rq->fence.seqno,
0500                    __i915_request_is_complete(rq) ? "!" :
0501                    __i915_request_has_started(rq) ? "*" :
0502                    "",
0503                    jiffies_to_msecs(jiffies - rq->emitted_jiffies));
0504     }
0505     rcu_read_unlock();
0506 }
0507 
0508 void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
0509                     struct drm_printer *p)
0510 {
0511     struct intel_breadcrumbs *b;
0512 
0513     b = engine->breadcrumbs;
0514     if (!b)
0515         return;
0516 
0517     drm_printf(p, "IRQ: %s\n", str_enabled_disabled(b->irq_armed));
0518     if (!list_empty(&b->signalers))
0519         print_signals(b, p);
0520 }