i915/gt/intel_timeline.c

0001 // SPDX-License-Identifier: MIT
0002 /*
0003  * Copyright © 2016-2018 Intel Corporation
0004  */
0005
0006 #include <drm/drm_cache.h>
0007
0008 #include "gem/i915_gem_internal.h"
0009
0010 #include "i915_active.h"
0011 #include "i915_drv.h"
0012 #include "i915_syncmap.h"
0013 #include "intel_gt.h"
0014 #include "intel_ring.h"
0015 #include "intel_timeline.h"
0016
0017 #define TIMELINE_SEQNO_BYTES 8
0018
0019 static struct i915_vma *hwsp_alloc(struct intel_gt *gt)
0020 {
0021     struct drm_i915_private *i915 = gt->i915;
0022     struct drm_i915_gem_object *obj;
0023     struct i915_vma *vma;
0024
0025     obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
0026     if (IS_ERR(obj))
0027         return ERR_CAST(obj);
0028
0029     i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
0030
0031     vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
0032     if (IS_ERR(vma))
0033         i915_gem_object_put(obj);
0034
0035     return vma;
0036 }
0037
0038 static void __timeline_retire(struct i915_active *active)
0039 {
0040     struct intel_timeline *tl =
0041         container_of(active, typeof(*tl), active);
0042
0043     i915_vma_unpin(tl->hwsp_ggtt);
0044     intel_timeline_put(tl);
0045 }
0046
0047 static int __timeline_active(struct i915_active *active)
0048 {
0049     struct intel_timeline *tl =
0050         container_of(active, typeof(*tl), active);
0051
0052     __i915_vma_pin(tl->hwsp_ggtt);
0053     intel_timeline_get(tl);
0054     return 0;
0055 }
0056
0057 I915_SELFTEST_EXPORT int
0058 intel_timeline_pin_map(struct intel_timeline *timeline)
0059 {
0060     struct drm_i915_gem_object *obj = timeline->hwsp_ggtt->obj;
0061     u32 ofs = offset_in_page(timeline->hwsp_offset);
0062     void *vaddr;
0063
0064     vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
0065     if (IS_ERR(vaddr))
0066         return PTR_ERR(vaddr);
0067
0068     timeline->hwsp_map = vaddr;
0069     timeline->hwsp_seqno = memset(vaddr + ofs, 0, TIMELINE_SEQNO_BYTES);
0070     drm_clflush_virt_range(vaddr + ofs, TIMELINE_SEQNO_BYTES);
0071
0072     return 0;
0073 }
0074
0075 static int intel_timeline_init(struct intel_timeline *timeline,
0076                    struct intel_gt *gt,
0077                    struct i915_vma *hwsp,
0078                    unsigned int offset)
0079 {
0080     kref_init(&timeline->kref);
0081     atomic_set(&timeline->pin_count, 0);
0082
0083     timeline->gt = gt;
0084
0085     if (hwsp) {
0086         timeline->hwsp_offset = offset;
0087         timeline->hwsp_ggtt = i915_vma_get(hwsp);
0088     } else {
0089         timeline->has_initial_breadcrumb = true;
0090         hwsp = hwsp_alloc(gt);
0091         if (IS_ERR(hwsp))
0092             return PTR_ERR(hwsp);
0093         timeline->hwsp_ggtt = hwsp;
0094     }
0095
0096     timeline->hwsp_map = NULL;
0097     timeline->hwsp_seqno = (void *)(long)timeline->hwsp_offset;
0098
0099     GEM_BUG_ON(timeline->hwsp_offset >= hwsp->size);
0100
0101     timeline->fence_context = dma_fence_context_alloc(1);
0102
0103     mutex_init(&timeline->mutex);
0104
0105     INIT_ACTIVE_FENCE(&timeline->last_request);
0106     INIT_LIST_HEAD(&timeline->requests);
0107
0108     i915_syncmap_init(&timeline->sync);
0109     i915_active_init(&timeline->active, __timeline_active,
0110              __timeline_retire, 0);
0111
0112     return 0;
0113 }
0114
0115 void intel_gt_init_timelines(struct intel_gt *gt)
0116 {
0117     struct intel_gt_timelines *timelines = &gt->timelines;
0118
0119     spin_lock_init(&timelines->lock);
0120     INIT_LIST_HEAD(&timelines->active_list);
0121 }
0122
0123 static void intel_timeline_fini(struct rcu_head *rcu)
0124 {
0125     struct intel_timeline *timeline =
0126         container_of(rcu, struct intel_timeline, rcu);
0127
0128     if (timeline->hwsp_map)
0129         i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj);
0130
0131     i915_vma_put(timeline->hwsp_ggtt);
0132     i915_active_fini(&timeline->active);
0133
0134     /*
0135      * A small race exists between intel_gt_retire_requests_timeout and
0136      * intel_timeline_exit which could result in the syncmap not getting
0137      * free'd. Rather than work to hard to seal this race, simply cleanup
0138      * the syncmap on fini.
0139      */
0140     i915_syncmap_free(&timeline->sync);
0141
0142     kfree(timeline);
0143 }
0144
0145 struct intel_timeline *
0146 __intel_timeline_create(struct intel_gt *gt,
0147             struct i915_vma *global_hwsp,
0148             unsigned int offset)
0149 {
0150     struct intel_timeline *timeline;
0151     int err;
0152
0153     timeline = kzalloc(sizeof(*timeline), GFP_KERNEL);
0154     if (!timeline)
0155         return ERR_PTR(-ENOMEM);
0156
0157     err = intel_timeline_init(timeline, gt, global_hwsp, offset);
0158     if (err) {
0159         kfree(timeline);
0160         return ERR_PTR(err);
0161     }
0162
0163     return timeline;
0164 }
0165
0166 struct intel_timeline *
0167 intel_timeline_create_from_engine(struct intel_engine_cs *engine,
0168                   unsigned int offset)
0169 {
0170     struct i915_vma *hwsp = engine->status_page.vma;
0171     struct intel_timeline *tl;
0172
0173     tl = __intel_timeline_create(engine->gt, hwsp, offset);
0174     if (IS_ERR(tl))
0175         return tl;
0176
0177     /* Borrow a nearby lock; we only create these timelines during init */
0178     mutex_lock(&hwsp->vm->mutex);
0179     list_add_tail(&tl->engine_link, &engine->status_page.timelines);
0180     mutex_unlock(&hwsp->vm->mutex);
0181
0182     return tl;
0183 }
0184
0185 void __intel_timeline_pin(struct intel_timeline *tl)
0186 {
0187     GEM_BUG_ON(!atomic_read(&tl->pin_count));
0188     atomic_inc(&tl->pin_count);
0189 }
0190
0191 int intel_timeline_pin(struct intel_timeline *tl, struct i915_gem_ww_ctx *ww)
0192 {
0193     int err;
0194
0195     if (atomic_add_unless(&tl->pin_count, 1, 0))
0196         return 0;
0197
0198     if (!tl->hwsp_map) {
0199         err = intel_timeline_pin_map(tl);
0200         if (err)
0201             return err;
0202     }
0203
0204     err = i915_ggtt_pin(tl->hwsp_ggtt, ww, 0, PIN_HIGH);
0205     if (err)
0206         return err;
0207
0208     tl->hwsp_offset =
0209         i915_ggtt_offset(tl->hwsp_ggtt) +
0210         offset_in_page(tl->hwsp_offset);
0211     GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n",
0212          tl->fence_context, tl->hwsp_offset);
0213
0214     i915_active_acquire(&tl->active);
0215     if (atomic_fetch_inc(&tl->pin_count)) {
0216         i915_active_release(&tl->active);
0217         __i915_vma_unpin(tl->hwsp_ggtt);
0218     }
0219
0220     return 0;
0221 }
0222
0223 void intel_timeline_reset_seqno(const struct intel_timeline *tl)
0224 {
0225     u32 *hwsp_seqno = (u32 *)tl->hwsp_seqno;
0226     /* Must be pinned to be writable, and no requests in flight. */
0227     GEM_BUG_ON(!atomic_read(&tl->pin_count));
0228
0229     memset(hwsp_seqno + 1, 0, TIMELINE_SEQNO_BYTES - sizeof(*hwsp_seqno));
0230     WRITE_ONCE(*hwsp_seqno, tl->seqno);
0231     drm_clflush_virt_range(hwsp_seqno, TIMELINE_SEQNO_BYTES);
0232 }
0233
0234 void intel_timeline_enter(struct intel_timeline *tl)
0235 {
0236     struct intel_gt_timelines *timelines = &tl->gt->timelines;
0237
0238     /*
0239      * Pretend we are serialised by the timeline->mutex.
0240      *
0241      * While generally true, there are a few exceptions to the rule
0242      * for the engine->kernel_context being used to manage power
0243      * transitions. As the engine_park may be called from under any
0244      * timeline, it uses the power mutex as a global serialisation
0245      * lock to prevent any other request entering its timeline.
0246      *
0247      * The rule is generally tl->mutex, otherwise engine->wakeref.mutex.
0248      *
0249      * However, intel_gt_retire_request() does not know which engine
0250      * it is retiring along and so cannot partake in the engine-pm
0251      * barrier, and there we use the tl->active_count as a means to
0252      * pin the timeline in the active_list while the locks are dropped.
0253      * Ergo, as that is outside of the engine-pm barrier, we need to
0254      * use atomic to manipulate tl->active_count.
0255      */
0256     lockdep_assert_held(&tl->mutex);
0257
0258     if (atomic_add_unless(&tl->active_count, 1, 0))
0259         return;
0260
0261     spin_lock(&timelines->lock);
0262     if (!atomic_fetch_inc(&tl->active_count)) {
0263         /*
0264          * The HWSP is volatile, and may have been lost while inactive,
0265          * e.g. across suspend/resume. Be paranoid, and ensure that
0266          * the HWSP value matches our seqno so we don't proclaim
0267          * the next request as already complete.
0268          */
0269         intel_timeline_reset_seqno(tl);
0270         list_add_tail(&tl->link, &timelines->active_list);
0271     }
0272     spin_unlock(&timelines->lock);
0273 }
0274
0275 void intel_timeline_exit(struct intel_timeline *tl)
0276 {
0277     struct intel_gt_timelines *timelines = &tl->gt->timelines;
0278
0279     /* See intel_timeline_enter() */
0280     lockdep_assert_held(&tl->mutex);
0281
0282     GEM_BUG_ON(!atomic_read(&tl->active_count));
0283     if (atomic_add_unless(&tl->active_count, -1, 1))
0284         return;
0285
0286     spin_lock(&timelines->lock);
0287     if (atomic_dec_and_test(&tl->active_count))
0288         list_del(&tl->link);
0289     spin_unlock(&timelines->lock);
0290
0291     /*
0292      * Since this timeline is idle, all bariers upon which we were waiting
0293      * must also be complete and so we can discard the last used barriers
0294      * without loss of information.
0295      */
0296     i915_syncmap_free(&tl->sync);
0297 }
0298
0299 static u32 timeline_advance(struct intel_timeline *tl)
0300 {
0301     GEM_BUG_ON(!atomic_read(&tl->pin_count));
0302     GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb);
0303
0304     return tl->seqno += 1 + tl->has_initial_breadcrumb;
0305 }
0306
0307 static noinline int
0308 __intel_timeline_get_seqno(struct intel_timeline *tl,
0309                u32 *seqno)
0310 {
0311     u32 next_ofs = offset_in_page(tl->hwsp_offset + TIMELINE_SEQNO_BYTES);
0312
0313     /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
0314     if (TIMELINE_SEQNO_BYTES <= BIT(5) && (next_ofs & BIT(5)))
0315         next_ofs = offset_in_page(next_ofs + BIT(5));
0316
0317     tl->hwsp_offset = i915_ggtt_offset(tl->hwsp_ggtt) + next_ofs;
0318     tl->hwsp_seqno = tl->hwsp_map + next_ofs;
0319     intel_timeline_reset_seqno(tl);
0320
0321     *seqno = timeline_advance(tl);
0322     GEM_BUG_ON(i915_seqno_passed(*tl->hwsp_seqno, *seqno));
0323     return 0;
0324 }
0325
0326 int intel_timeline_get_seqno(struct intel_timeline *tl,
0327                  struct i915_request *rq,
0328                  u32 *seqno)
0329 {
0330     *seqno = timeline_advance(tl);
0331
0332     /* Replace the HWSP on wraparound for HW semaphores */
0333     if (unlikely(!*seqno && tl->has_initial_breadcrumb))
0334         return __intel_timeline_get_seqno(tl, seqno);
0335
0336     return 0;
0337 }
0338
0339 int intel_timeline_read_hwsp(struct i915_request *from,
0340                  struct i915_request *to,
0341                  u32 *hwsp)
0342 {
0343     struct intel_timeline *tl;
0344     int err;
0345
0346     rcu_read_lock();
0347     tl = rcu_dereference(from->timeline);
0348     if (i915_request_signaled(from) ||
0349         !i915_active_acquire_if_busy(&tl->active))
0350         tl = NULL;
0351
0352     if (tl) {
0353         /* hwsp_offset may wraparound, so use from->hwsp_seqno */
0354         *hwsp = i915_ggtt_offset(tl->hwsp_ggtt) +
0355             offset_in_page(from->hwsp_seqno);
0356     }
0357
0358     /* ensure we wait on the right request, if not, we completed */
0359     if (tl && __i915_request_is_complete(from)) {
0360         i915_active_release(&tl->active);
0361         tl = NULL;
0362     }
0363     rcu_read_unlock();
0364
0365     if (!tl)
0366         return 1;
0367
0368     /* Can't do semaphore waits on kernel context */
0369     if (!tl->has_initial_breadcrumb) {
0370         err = -EINVAL;
0371         goto out;
0372     }
0373
0374     err = i915_active_add_request(&tl->active, to);
0375
0376 out:
0377     i915_active_release(&tl->active);
0378     return err;
0379 }
0380
0381 void intel_timeline_unpin(struct intel_timeline *tl)
0382 {
0383     GEM_BUG_ON(!atomic_read(&tl->pin_count));
0384     if (!atomic_dec_and_test(&tl->pin_count))
0385         return;
0386
0387     i915_active_release(&tl->active);
0388     __i915_vma_unpin(tl->hwsp_ggtt);
0389 }
0390
0391 void __intel_timeline_free(struct kref *kref)
0392 {
0393     struct intel_timeline *timeline =
0394         container_of(kref, typeof(*timeline), kref);
0395
0396     GEM_BUG_ON(atomic_read(&timeline->pin_count));
0397     GEM_BUG_ON(!list_empty(&timeline->requests));
0398     GEM_BUG_ON(timeline->retire);
0399
0400     call_rcu(&timeline->rcu, intel_timeline_fini);
0401 }
0402
0403 void intel_gt_fini_timelines(struct intel_gt *gt)
0404 {
0405     struct intel_gt_timelines *timelines = &gt->timelines;
0406
0407     GEM_BUG_ON(!list_empty(&timelines->active_list));
0408 }
0409
0410 void intel_gt_show_timelines(struct intel_gt *gt,
0411                  struct drm_printer *m,
0412                  void (*show_request)(struct drm_printer *m,
0413                           const struct i915_request *rq,
0414                           const char *prefix,
0415                           int indent))
0416 {
0417     struct intel_gt_timelines *timelines = &gt->timelines;
0418     struct intel_timeline *tl, *tn;
0419     LIST_HEAD(free);
0420
0421     spin_lock(&timelines->lock);
0422     list_for_each_entry_safe(tl, tn, &timelines->active_list, link) {
0423         unsigned long count, ready, inflight;
0424         struct i915_request *rq, *rn;
0425         struct dma_fence *fence;
0426
0427         if (!mutex_trylock(&tl->mutex)) {
0428             drm_printf(m, "Timeline %llx: busy; skipping\n",
0429                    tl->fence_context);
0430             continue;
0431         }
0432
0433         intel_timeline_get(tl);
0434         GEM_BUG_ON(!atomic_read(&tl->active_count));
0435         atomic_inc(&tl->active_count); /* pin the list element */
0436         spin_unlock(&timelines->lock);
0437
0438         count = 0;
0439         ready = 0;
0440         inflight = 0;
0441         list_for_each_entry_safe(rq, rn, &tl->requests, link) {
0442             if (i915_request_completed(rq))
0443                 continue;
0444
0445             count++;
0446             if (i915_request_is_ready(rq))
0447                 ready++;
0448             if (i915_request_is_active(rq))
0449                 inflight++;
0450         }
0451
0452         drm_printf(m, "Timeline %llx: { ", tl->fence_context);
0453         drm_printf(m, "count: %lu, ready: %lu, inflight: %lu",
0454                count, ready, inflight);
0455         drm_printf(m, ", seqno: { current: %d, last: %d }",
0456                *tl->hwsp_seqno, tl->seqno);
0457         fence = i915_active_fence_get(&tl->last_request);
0458         if (fence) {
0459             drm_printf(m, ", engine: %s",
0460                    to_request(fence)->engine->name);
0461             dma_fence_put(fence);
0462         }
0463         drm_printf(m, " }\n");
0464
0465         if (show_request) {
0466             list_for_each_entry_safe(rq, rn, &tl->requests, link)
0467                 show_request(m, rq, "", 2);
0468         }
0469
0470         mutex_unlock(&tl->mutex);
0471         spin_lock(&timelines->lock);
0472
0473         /* Resume list iteration after reacquiring spinlock */
0474         list_safe_reset_next(tl, tn, link);
0475         if (atomic_dec_and_test(&tl->active_count))
0476             list_del(&tl->link);
0477
0478         /* Defer the final release to after the spinlock */
0479         if (refcount_dec_and_test(&tl->kref.refcount)) {
0480             GEM_BUG_ON(atomic_read(&tl->active_count));
0481             list_add(&tl->link, &free);
0482         }
0483     }
0484     spin_unlock(&timelines->lock);
0485
0486     list_for_each_entry_safe(tl, tn, &free, link)
0487         __intel_timeline_free(&tl->kref);
0488 }
0489
0490 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
0491 #include "gt/selftests/mock_timeline.c"
0492 #include "gt/selftest_timeline.c"
0493 #endif