Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * SPDX-License-Identifier: MIT
0003  *
0004  * Copyright © 2019 Intel Corporation
0005  */
0006 
0007 #include <linux/debugobjects.h>
0008 
0009 #include "gt/intel_context.h"
0010 #include "gt/intel_engine_heartbeat.h"
0011 #include "gt/intel_engine_pm.h"
0012 #include "gt/intel_ring.h"
0013 
0014 #include "i915_drv.h"
0015 #include "i915_active.h"
0016 
0017 /*
0018  * Active refs memory management
0019  *
0020  * To be more economical with memory, we reap all the i915_active trees as
0021  * they idle (when we know the active requests are inactive) and allocate the
0022  * nodes from a local slab cache to hopefully reduce the fragmentation.
0023  */
0024 static struct kmem_cache *slab_cache;
0025 
0026 struct active_node {
0027     struct rb_node node;
0028     struct i915_active_fence base;
0029     struct i915_active *ref;
0030     u64 timeline;
0031 };
0032 
0033 #define fetch_node(x) rb_entry(READ_ONCE(x), typeof(struct active_node), node)
0034 
0035 static inline struct active_node *
0036 node_from_active(struct i915_active_fence *active)
0037 {
0038     return container_of(active, struct active_node, base);
0039 }
0040 
0041 #define take_preallocated_barriers(x) llist_del_all(&(x)->preallocated_barriers)
0042 
0043 static inline bool is_barrier(const struct i915_active_fence *active)
0044 {
0045     return IS_ERR(rcu_access_pointer(active->fence));
0046 }
0047 
0048 static inline struct llist_node *barrier_to_ll(struct active_node *node)
0049 {
0050     GEM_BUG_ON(!is_barrier(&node->base));
0051     return (struct llist_node *)&node->base.cb.node;
0052 }
0053 
0054 static inline struct intel_engine_cs *
0055 __barrier_to_engine(struct active_node *node)
0056 {
0057     return (struct intel_engine_cs *)READ_ONCE(node->base.cb.node.prev);
0058 }
0059 
0060 static inline struct intel_engine_cs *
0061 barrier_to_engine(struct active_node *node)
0062 {
0063     GEM_BUG_ON(!is_barrier(&node->base));
0064     return __barrier_to_engine(node);
0065 }
0066 
0067 static inline struct active_node *barrier_from_ll(struct llist_node *x)
0068 {
0069     return container_of((struct list_head *)x,
0070                 struct active_node, base.cb.node);
0071 }
0072 
0073 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && IS_ENABLED(CONFIG_DEBUG_OBJECTS)
0074 
0075 static void *active_debug_hint(void *addr)
0076 {
0077     struct i915_active *ref = addr;
0078 
0079     return (void *)ref->active ?: (void *)ref->retire ?: (void *)ref;
0080 }
0081 
0082 static const struct debug_obj_descr active_debug_desc = {
0083     .name = "i915_active",
0084     .debug_hint = active_debug_hint,
0085 };
0086 
0087 static void debug_active_init(struct i915_active *ref)
0088 {
0089     debug_object_init(ref, &active_debug_desc);
0090 }
0091 
0092 static void debug_active_activate(struct i915_active *ref)
0093 {
0094     lockdep_assert_held(&ref->tree_lock);
0095     if (!atomic_read(&ref->count)) /* before the first inc */
0096         debug_object_activate(ref, &active_debug_desc);
0097 }
0098 
0099 static void debug_active_deactivate(struct i915_active *ref)
0100 {
0101     lockdep_assert_held(&ref->tree_lock);
0102     if (!atomic_read(&ref->count)) /* after the last dec */
0103         debug_object_deactivate(ref, &active_debug_desc);
0104 }
0105 
0106 static void debug_active_fini(struct i915_active *ref)
0107 {
0108     debug_object_free(ref, &active_debug_desc);
0109 }
0110 
0111 static void debug_active_assert(struct i915_active *ref)
0112 {
0113     debug_object_assert_init(ref, &active_debug_desc);
0114 }
0115 
0116 #else
0117 
0118 static inline void debug_active_init(struct i915_active *ref) { }
0119 static inline void debug_active_activate(struct i915_active *ref) { }
0120 static inline void debug_active_deactivate(struct i915_active *ref) { }
0121 static inline void debug_active_fini(struct i915_active *ref) { }
0122 static inline void debug_active_assert(struct i915_active *ref) { }
0123 
0124 #endif
0125 
0126 static void
0127 __active_retire(struct i915_active *ref)
0128 {
0129     struct rb_root root = RB_ROOT;
0130     struct active_node *it, *n;
0131     unsigned long flags;
0132 
0133     GEM_BUG_ON(i915_active_is_idle(ref));
0134 
0135     /* return the unused nodes to our slabcache -- flushing the allocator */
0136     if (!atomic_dec_and_lock_irqsave(&ref->count, &ref->tree_lock, flags))
0137         return;
0138 
0139     GEM_BUG_ON(rcu_access_pointer(ref->excl.fence));
0140     debug_active_deactivate(ref);
0141 
0142     /* Even if we have not used the cache, we may still have a barrier */
0143     if (!ref->cache)
0144         ref->cache = fetch_node(ref->tree.rb_node);
0145 
0146     /* Keep the MRU cached node for reuse */
0147     if (ref->cache) {
0148         /* Discard all other nodes in the tree */
0149         rb_erase(&ref->cache->node, &ref->tree);
0150         root = ref->tree;
0151 
0152         /* Rebuild the tree with only the cached node */
0153         rb_link_node(&ref->cache->node, NULL, &ref->tree.rb_node);
0154         rb_insert_color(&ref->cache->node, &ref->tree);
0155         GEM_BUG_ON(ref->tree.rb_node != &ref->cache->node);
0156 
0157         /* Make the cached node available for reuse with any timeline */
0158         ref->cache->timeline = 0; /* needs cmpxchg(u64) */
0159     }
0160 
0161     spin_unlock_irqrestore(&ref->tree_lock, flags);
0162 
0163     /* After the final retire, the entire struct may be freed */
0164     if (ref->retire)
0165         ref->retire(ref);
0166 
0167     /* ... except if you wait on it, you must manage your own references! */
0168     wake_up_var(ref);
0169 
0170     /* Finally free the discarded timeline tree  */
0171     rbtree_postorder_for_each_entry_safe(it, n, &root, node) {
0172         GEM_BUG_ON(i915_active_fence_isset(&it->base));
0173         kmem_cache_free(slab_cache, it);
0174     }
0175 }
0176 
0177 static void
0178 active_work(struct work_struct *wrk)
0179 {
0180     struct i915_active *ref = container_of(wrk, typeof(*ref), work);
0181 
0182     GEM_BUG_ON(!atomic_read(&ref->count));
0183     if (atomic_add_unless(&ref->count, -1, 1))
0184         return;
0185 
0186     __active_retire(ref);
0187 }
0188 
0189 static void
0190 active_retire(struct i915_active *ref)
0191 {
0192     GEM_BUG_ON(!atomic_read(&ref->count));
0193     if (atomic_add_unless(&ref->count, -1, 1))
0194         return;
0195 
0196     if (ref->flags & I915_ACTIVE_RETIRE_SLEEPS) {
0197         queue_work(system_unbound_wq, &ref->work);
0198         return;
0199     }
0200 
0201     __active_retire(ref);
0202 }
0203 
0204 static inline struct dma_fence **
0205 __active_fence_slot(struct i915_active_fence *active)
0206 {
0207     return (struct dma_fence ** __force)&active->fence;
0208 }
0209 
0210 static inline bool
0211 active_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
0212 {
0213     struct i915_active_fence *active =
0214         container_of(cb, typeof(*active), cb);
0215 
0216     return cmpxchg(__active_fence_slot(active), fence, NULL) == fence;
0217 }
0218 
0219 static void
0220 node_retire(struct dma_fence *fence, struct dma_fence_cb *cb)
0221 {
0222     if (active_fence_cb(fence, cb))
0223         active_retire(container_of(cb, struct active_node, base.cb)->ref);
0224 }
0225 
0226 static void
0227 excl_retire(struct dma_fence *fence, struct dma_fence_cb *cb)
0228 {
0229     if (active_fence_cb(fence, cb))
0230         active_retire(container_of(cb, struct i915_active, excl.cb));
0231 }
0232 
0233 static struct active_node *__active_lookup(struct i915_active *ref, u64 idx)
0234 {
0235     struct active_node *it;
0236 
0237     GEM_BUG_ON(idx == 0); /* 0 is the unordered timeline, rsvd for cache */
0238 
0239     /*
0240      * We track the most recently used timeline to skip a rbtree search
0241      * for the common case, under typical loads we never need the rbtree
0242      * at all. We can reuse the last slot if it is empty, that is
0243      * after the previous activity has been retired, or if it matches the
0244      * current timeline.
0245      */
0246     it = READ_ONCE(ref->cache);
0247     if (it) {
0248         u64 cached = READ_ONCE(it->timeline);
0249 
0250         /* Once claimed, this slot will only belong to this idx */
0251         if (cached == idx)
0252             return it;
0253 
0254         /*
0255          * An unclaimed cache [.timeline=0] can only be claimed once.
0256          *
0257          * If the value is already non-zero, some other thread has
0258          * claimed the cache and we know that is does not match our
0259          * idx. If, and only if, the timeline is currently zero is it
0260          * worth competing to claim it atomically for ourselves (for
0261          * only the winner of that race will cmpxchg return the old
0262          * value of 0).
0263          */
0264         if (!cached && !cmpxchg64(&it->timeline, 0, idx))
0265             return it;
0266     }
0267 
0268     BUILD_BUG_ON(offsetof(typeof(*it), node));
0269 
0270     /* While active, the tree can only be built; not destroyed */
0271     GEM_BUG_ON(i915_active_is_idle(ref));
0272 
0273     it = fetch_node(ref->tree.rb_node);
0274     while (it) {
0275         if (it->timeline < idx) {
0276             it = fetch_node(it->node.rb_right);
0277         } else if (it->timeline > idx) {
0278             it = fetch_node(it->node.rb_left);
0279         } else {
0280             WRITE_ONCE(ref->cache, it);
0281             break;
0282         }
0283     }
0284 
0285     /* NB: If the tree rotated beneath us, we may miss our target. */
0286     return it;
0287 }
0288 
0289 static struct i915_active_fence *
0290 active_instance(struct i915_active *ref, u64 idx)
0291 {
0292     struct active_node *node;
0293     struct rb_node **p, *parent;
0294 
0295     node = __active_lookup(ref, idx);
0296     if (likely(node))
0297         return &node->base;
0298 
0299     spin_lock_irq(&ref->tree_lock);
0300     GEM_BUG_ON(i915_active_is_idle(ref));
0301 
0302     parent = NULL;
0303     p = &ref->tree.rb_node;
0304     while (*p) {
0305         parent = *p;
0306 
0307         node = rb_entry(parent, struct active_node, node);
0308         if (node->timeline == idx)
0309             goto out;
0310 
0311         if (node->timeline < idx)
0312             p = &parent->rb_right;
0313         else
0314             p = &parent->rb_left;
0315     }
0316 
0317     /*
0318      * XXX: We should preallocate this before i915_active_ref() is ever
0319      *  called, but we cannot call into fs_reclaim() anyway, so use GFP_ATOMIC.
0320      */
0321     node = kmem_cache_alloc(slab_cache, GFP_ATOMIC);
0322     if (!node)
0323         goto out;
0324 
0325     __i915_active_fence_init(&node->base, NULL, node_retire);
0326     node->ref = ref;
0327     node->timeline = idx;
0328 
0329     rb_link_node(&node->node, parent, p);
0330     rb_insert_color(&node->node, &ref->tree);
0331 
0332 out:
0333     WRITE_ONCE(ref->cache, node);
0334     spin_unlock_irq(&ref->tree_lock);
0335 
0336     return &node->base;
0337 }
0338 
0339 void __i915_active_init(struct i915_active *ref,
0340             int (*active)(struct i915_active *ref),
0341             void (*retire)(struct i915_active *ref),
0342             unsigned long flags,
0343             struct lock_class_key *mkey,
0344             struct lock_class_key *wkey)
0345 {
0346     debug_active_init(ref);
0347 
0348     ref->flags = flags;
0349     ref->active = active;
0350     ref->retire = retire;
0351 
0352     spin_lock_init(&ref->tree_lock);
0353     ref->tree = RB_ROOT;
0354     ref->cache = NULL;
0355 
0356     init_llist_head(&ref->preallocated_barriers);
0357     atomic_set(&ref->count, 0);
0358     __mutex_init(&ref->mutex, "i915_active", mkey);
0359     __i915_active_fence_init(&ref->excl, NULL, excl_retire);
0360     INIT_WORK(&ref->work, active_work);
0361 #if IS_ENABLED(CONFIG_LOCKDEP)
0362     lockdep_init_map(&ref->work.lockdep_map, "i915_active.work", wkey, 0);
0363 #endif
0364 }
0365 
0366 static bool ____active_del_barrier(struct i915_active *ref,
0367                    struct active_node *node,
0368                    struct intel_engine_cs *engine)
0369 
0370 {
0371     struct llist_node *head = NULL, *tail = NULL;
0372     struct llist_node *pos, *next;
0373 
0374     GEM_BUG_ON(node->timeline != engine->kernel_context->timeline->fence_context);
0375 
0376     /*
0377      * Rebuild the llist excluding our node. We may perform this
0378      * outside of the kernel_context timeline mutex and so someone
0379      * else may be manipulating the engine->barrier_tasks, in
0380      * which case either we or they will be upset :)
0381      *
0382      * A second __active_del_barrier() will report failure to claim
0383      * the active_node and the caller will just shrug and know not to
0384      * claim ownership of its node.
0385      *
0386      * A concurrent i915_request_add_active_barriers() will miss adding
0387      * any of the tasks, but we will try again on the next -- and since
0388      * we are actively using the barrier, we know that there will be
0389      * at least another opportunity when we idle.
0390      */
0391     llist_for_each_safe(pos, next, llist_del_all(&engine->barrier_tasks)) {
0392         if (node == barrier_from_ll(pos)) {
0393             node = NULL;
0394             continue;
0395         }
0396 
0397         pos->next = head;
0398         head = pos;
0399         if (!tail)
0400             tail = pos;
0401     }
0402     if (head)
0403         llist_add_batch(head, tail, &engine->barrier_tasks);
0404 
0405     return !node;
0406 }
0407 
0408 static bool
0409 __active_del_barrier(struct i915_active *ref, struct active_node *node)
0410 {
0411     return ____active_del_barrier(ref, node, barrier_to_engine(node));
0412 }
0413 
0414 static bool
0415 replace_barrier(struct i915_active *ref, struct i915_active_fence *active)
0416 {
0417     if (!is_barrier(active)) /* proto-node used by our idle barrier? */
0418         return false;
0419 
0420     /*
0421      * This request is on the kernel_context timeline, and so
0422      * we can use it to substitute for the pending idle-barrer
0423      * request that we want to emit on the kernel_context.
0424      */
0425     __active_del_barrier(ref, node_from_active(active));
0426     return true;
0427 }
0428 
0429 int i915_active_add_request(struct i915_active *ref, struct i915_request *rq)
0430 {
0431     struct dma_fence *fence = &rq->fence;
0432     struct i915_active_fence *active;
0433     int err;
0434 
0435     /* Prevent reaping in case we malloc/wait while building the tree */
0436     err = i915_active_acquire(ref);
0437     if (err)
0438         return err;
0439 
0440     active = active_instance(ref, i915_request_timeline(rq)->fence_context);
0441     if (!active) {
0442         err = -ENOMEM;
0443         goto out;
0444     }
0445 
0446     if (replace_barrier(ref, active)) {
0447         RCU_INIT_POINTER(active->fence, NULL);
0448         atomic_dec(&ref->count);
0449     }
0450     if (!__i915_active_fence_set(active, fence))
0451         __i915_active_acquire(ref);
0452 
0453 out:
0454     i915_active_release(ref);
0455     return err;
0456 }
0457 
0458 static struct dma_fence *
0459 __i915_active_set_fence(struct i915_active *ref,
0460             struct i915_active_fence *active,
0461             struct dma_fence *fence)
0462 {
0463     struct dma_fence *prev;
0464 
0465     if (replace_barrier(ref, active)) {
0466         RCU_INIT_POINTER(active->fence, fence);
0467         return NULL;
0468     }
0469 
0470     rcu_read_lock();
0471     prev = __i915_active_fence_set(active, fence);
0472     if (prev)
0473         prev = dma_fence_get_rcu(prev);
0474     else
0475         __i915_active_acquire(ref);
0476     rcu_read_unlock();
0477 
0478     return prev;
0479 }
0480 
0481 struct dma_fence *
0482 i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f)
0483 {
0484     /* We expect the caller to manage the exclusive timeline ordering */
0485     return __i915_active_set_fence(ref, &ref->excl, f);
0486 }
0487 
0488 bool i915_active_acquire_if_busy(struct i915_active *ref)
0489 {
0490     debug_active_assert(ref);
0491     return atomic_add_unless(&ref->count, 1, 0);
0492 }
0493 
0494 static void __i915_active_activate(struct i915_active *ref)
0495 {
0496     spin_lock_irq(&ref->tree_lock); /* __active_retire() */
0497     if (!atomic_fetch_inc(&ref->count))
0498         debug_active_activate(ref);
0499     spin_unlock_irq(&ref->tree_lock);
0500 }
0501 
0502 int i915_active_acquire(struct i915_active *ref)
0503 {
0504     int err;
0505 
0506     if (i915_active_acquire_if_busy(ref))
0507         return 0;
0508 
0509     if (!ref->active) {
0510         __i915_active_activate(ref);
0511         return 0;
0512     }
0513 
0514     err = mutex_lock_interruptible(&ref->mutex);
0515     if (err)
0516         return err;
0517 
0518     if (likely(!i915_active_acquire_if_busy(ref))) {
0519         err = ref->active(ref);
0520         if (!err)
0521             __i915_active_activate(ref);
0522     }
0523 
0524     mutex_unlock(&ref->mutex);
0525 
0526     return err;
0527 }
0528 
0529 int i915_active_acquire_for_context(struct i915_active *ref, u64 idx)
0530 {
0531     struct i915_active_fence *active;
0532     int err;
0533 
0534     err = i915_active_acquire(ref);
0535     if (err)
0536         return err;
0537 
0538     active = active_instance(ref, idx);
0539     if (!active) {
0540         i915_active_release(ref);
0541         return -ENOMEM;
0542     }
0543 
0544     return 0; /* return with active ref */
0545 }
0546 
0547 void i915_active_release(struct i915_active *ref)
0548 {
0549     debug_active_assert(ref);
0550     active_retire(ref);
0551 }
0552 
0553 static void enable_signaling(struct i915_active_fence *active)
0554 {
0555     struct dma_fence *fence;
0556 
0557     if (unlikely(is_barrier(active)))
0558         return;
0559 
0560     fence = i915_active_fence_get(active);
0561     if (!fence)
0562         return;
0563 
0564     dma_fence_enable_sw_signaling(fence);
0565     dma_fence_put(fence);
0566 }
0567 
0568 static int flush_barrier(struct active_node *it)
0569 {
0570     struct intel_engine_cs *engine;
0571 
0572     if (likely(!is_barrier(&it->base)))
0573         return 0;
0574 
0575     engine = __barrier_to_engine(it);
0576     smp_rmb(); /* serialise with add_active_barriers */
0577     if (!is_barrier(&it->base))
0578         return 0;
0579 
0580     return intel_engine_flush_barriers(engine);
0581 }
0582 
0583 static int flush_lazy_signals(struct i915_active *ref)
0584 {
0585     struct active_node *it, *n;
0586     int err = 0;
0587 
0588     enable_signaling(&ref->excl);
0589     rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
0590         err = flush_barrier(it); /* unconnected idle barrier? */
0591         if (err)
0592             break;
0593 
0594         enable_signaling(&it->base);
0595     }
0596 
0597     return err;
0598 }
0599 
0600 int __i915_active_wait(struct i915_active *ref, int state)
0601 {
0602     might_sleep();
0603 
0604     /* Any fence added after the wait begins will not be auto-signaled */
0605     if (i915_active_acquire_if_busy(ref)) {
0606         int err;
0607 
0608         err = flush_lazy_signals(ref);
0609         i915_active_release(ref);
0610         if (err)
0611             return err;
0612 
0613         if (___wait_var_event(ref, i915_active_is_idle(ref),
0614                       state, 0, 0, schedule()))
0615             return -EINTR;
0616     }
0617 
0618     /*
0619      * After the wait is complete, the caller may free the active.
0620      * We have to flush any concurrent retirement before returning.
0621      */
0622     flush_work(&ref->work);
0623     return 0;
0624 }
0625 
0626 static int __await_active(struct i915_active_fence *active,
0627               int (*fn)(void *arg, struct dma_fence *fence),
0628               void *arg)
0629 {
0630     struct dma_fence *fence;
0631 
0632     if (is_barrier(active)) /* XXX flush the barrier? */
0633         return 0;
0634 
0635     fence = i915_active_fence_get(active);
0636     if (fence) {
0637         int err;
0638 
0639         err = fn(arg, fence);
0640         dma_fence_put(fence);
0641         if (err < 0)
0642             return err;
0643     }
0644 
0645     return 0;
0646 }
0647 
0648 struct wait_barrier {
0649     struct wait_queue_entry base;
0650     struct i915_active *ref;
0651 };
0652 
0653 static int
0654 barrier_wake(wait_queue_entry_t *wq, unsigned int mode, int flags, void *key)
0655 {
0656     struct wait_barrier *wb = container_of(wq, typeof(*wb), base);
0657 
0658     if (i915_active_is_idle(wb->ref)) {
0659         list_del(&wq->entry);
0660         i915_sw_fence_complete(wq->private);
0661         kfree(wq);
0662     }
0663 
0664     return 0;
0665 }
0666 
0667 static int __await_barrier(struct i915_active *ref, struct i915_sw_fence *fence)
0668 {
0669     struct wait_barrier *wb;
0670 
0671     wb = kmalloc(sizeof(*wb), GFP_KERNEL);
0672     if (unlikely(!wb))
0673         return -ENOMEM;
0674 
0675     GEM_BUG_ON(i915_active_is_idle(ref));
0676     if (!i915_sw_fence_await(fence)) {
0677         kfree(wb);
0678         return -EINVAL;
0679     }
0680 
0681     wb->base.flags = 0;
0682     wb->base.func = barrier_wake;
0683     wb->base.private = fence;
0684     wb->ref = ref;
0685 
0686     add_wait_queue(__var_waitqueue(ref), &wb->base);
0687     return 0;
0688 }
0689 
0690 static int await_active(struct i915_active *ref,
0691             unsigned int flags,
0692             int (*fn)(void *arg, struct dma_fence *fence),
0693             void *arg, struct i915_sw_fence *barrier)
0694 {
0695     int err = 0;
0696 
0697     if (!i915_active_acquire_if_busy(ref))
0698         return 0;
0699 
0700     if (flags & I915_ACTIVE_AWAIT_EXCL &&
0701         rcu_access_pointer(ref->excl.fence)) {
0702         err = __await_active(&ref->excl, fn, arg);
0703         if (err)
0704             goto out;
0705     }
0706 
0707     if (flags & I915_ACTIVE_AWAIT_ACTIVE) {
0708         struct active_node *it, *n;
0709 
0710         rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
0711             err = __await_active(&it->base, fn, arg);
0712             if (err)
0713                 goto out;
0714         }
0715     }
0716 
0717     if (flags & I915_ACTIVE_AWAIT_BARRIER) {
0718         err = flush_lazy_signals(ref);
0719         if (err)
0720             goto out;
0721 
0722         err = __await_barrier(ref, barrier);
0723         if (err)
0724             goto out;
0725     }
0726 
0727 out:
0728     i915_active_release(ref);
0729     return err;
0730 }
0731 
0732 static int rq_await_fence(void *arg, struct dma_fence *fence)
0733 {
0734     return i915_request_await_dma_fence(arg, fence);
0735 }
0736 
0737 int i915_request_await_active(struct i915_request *rq,
0738                   struct i915_active *ref,
0739                   unsigned int flags)
0740 {
0741     return await_active(ref, flags, rq_await_fence, rq, &rq->submit);
0742 }
0743 
0744 static int sw_await_fence(void *arg, struct dma_fence *fence)
0745 {
0746     return i915_sw_fence_await_dma_fence(arg, fence, 0,
0747                          GFP_NOWAIT | __GFP_NOWARN);
0748 }
0749 
0750 int i915_sw_fence_await_active(struct i915_sw_fence *fence,
0751                    struct i915_active *ref,
0752                    unsigned int flags)
0753 {
0754     return await_active(ref, flags, sw_await_fence, fence, fence);
0755 }
0756 
0757 void i915_active_fini(struct i915_active *ref)
0758 {
0759     debug_active_fini(ref);
0760     GEM_BUG_ON(atomic_read(&ref->count));
0761     GEM_BUG_ON(work_pending(&ref->work));
0762     mutex_destroy(&ref->mutex);
0763 
0764     if (ref->cache)
0765         kmem_cache_free(slab_cache, ref->cache);
0766 }
0767 
0768 static inline bool is_idle_barrier(struct active_node *node, u64 idx)
0769 {
0770     return node->timeline == idx && !i915_active_fence_isset(&node->base);
0771 }
0772 
0773 static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx)
0774 {
0775     struct rb_node *prev, *p;
0776 
0777     if (RB_EMPTY_ROOT(&ref->tree))
0778         return NULL;
0779 
0780     GEM_BUG_ON(i915_active_is_idle(ref));
0781 
0782     /*
0783      * Try to reuse any existing barrier nodes already allocated for this
0784      * i915_active, due to overlapping active phases there is likely a
0785      * node kept alive (as we reuse before parking). We prefer to reuse
0786      * completely idle barriers (less hassle in manipulating the llists),
0787      * but otherwise any will do.
0788      */
0789     if (ref->cache && is_idle_barrier(ref->cache, idx)) {
0790         p = &ref->cache->node;
0791         goto match;
0792     }
0793 
0794     prev = NULL;
0795     p = ref->tree.rb_node;
0796     while (p) {
0797         struct active_node *node =
0798             rb_entry(p, struct active_node, node);
0799 
0800         if (is_idle_barrier(node, idx))
0801             goto match;
0802 
0803         prev = p;
0804         if (node->timeline < idx)
0805             p = READ_ONCE(p->rb_right);
0806         else
0807             p = READ_ONCE(p->rb_left);
0808     }
0809 
0810     /*
0811      * No quick match, but we did find the leftmost rb_node for the
0812      * kernel_context. Walk the rb_tree in-order to see if there were
0813      * any idle-barriers on this timeline that we missed, or just use
0814      * the first pending barrier.
0815      */
0816     for (p = prev; p; p = rb_next(p)) {
0817         struct active_node *node =
0818             rb_entry(p, struct active_node, node);
0819         struct intel_engine_cs *engine;
0820 
0821         if (node->timeline > idx)
0822             break;
0823 
0824         if (node->timeline < idx)
0825             continue;
0826 
0827         if (is_idle_barrier(node, idx))
0828             goto match;
0829 
0830         /*
0831          * The list of pending barriers is protected by the
0832          * kernel_context timeline, which notably we do not hold
0833          * here. i915_request_add_active_barriers() may consume
0834          * the barrier before we claim it, so we have to check
0835          * for success.
0836          */
0837         engine = __barrier_to_engine(node);
0838         smp_rmb(); /* serialise with add_active_barriers */
0839         if (is_barrier(&node->base) &&
0840             ____active_del_barrier(ref, node, engine))
0841             goto match;
0842     }
0843 
0844     return NULL;
0845 
0846 match:
0847     spin_lock_irq(&ref->tree_lock);
0848     rb_erase(p, &ref->tree); /* Hide from waits and sibling allocations */
0849     if (p == &ref->cache->node)
0850         WRITE_ONCE(ref->cache, NULL);
0851     spin_unlock_irq(&ref->tree_lock);
0852 
0853     return rb_entry(p, struct active_node, node);
0854 }
0855 
0856 int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
0857                         struct intel_engine_cs *engine)
0858 {
0859     intel_engine_mask_t tmp, mask = engine->mask;
0860     struct llist_node *first = NULL, *last = NULL;
0861     struct intel_gt *gt = engine->gt;
0862 
0863     GEM_BUG_ON(i915_active_is_idle(ref));
0864 
0865     /* Wait until the previous preallocation is completed */
0866     while (!llist_empty(&ref->preallocated_barriers))
0867         cond_resched();
0868 
0869     /*
0870      * Preallocate a node for each physical engine supporting the target
0871      * engine (remember virtual engines have more than one sibling).
0872      * We can then use the preallocated nodes in
0873      * i915_active_acquire_barrier()
0874      */
0875     GEM_BUG_ON(!mask);
0876     for_each_engine_masked(engine, gt, mask, tmp) {
0877         u64 idx = engine->kernel_context->timeline->fence_context;
0878         struct llist_node *prev = first;
0879         struct active_node *node;
0880 
0881         rcu_read_lock();
0882         node = reuse_idle_barrier(ref, idx);
0883         rcu_read_unlock();
0884         if (!node) {
0885             node = kmem_cache_alloc(slab_cache, GFP_KERNEL);
0886             if (!node)
0887                 goto unwind;
0888 
0889             RCU_INIT_POINTER(node->base.fence, NULL);
0890             node->base.cb.func = node_retire;
0891             node->timeline = idx;
0892             node->ref = ref;
0893         }
0894 
0895         if (!i915_active_fence_isset(&node->base)) {
0896             /*
0897              * Mark this as being *our* unconnected proto-node.
0898              *
0899              * Since this node is not in any list, and we have
0900              * decoupled it from the rbtree, we can reuse the
0901              * request to indicate this is an idle-barrier node
0902              * and then we can use the rb_node and list pointers
0903              * for our tracking of the pending barrier.
0904              */
0905             RCU_INIT_POINTER(node->base.fence, ERR_PTR(-EAGAIN));
0906             node->base.cb.node.prev = (void *)engine;
0907             __i915_active_acquire(ref);
0908         }
0909         GEM_BUG_ON(rcu_access_pointer(node->base.fence) != ERR_PTR(-EAGAIN));
0910 
0911         GEM_BUG_ON(barrier_to_engine(node) != engine);
0912         first = barrier_to_ll(node);
0913         first->next = prev;
0914         if (!last)
0915             last = first;
0916         intel_engine_pm_get(engine);
0917     }
0918 
0919     GEM_BUG_ON(!llist_empty(&ref->preallocated_barriers));
0920     llist_add_batch(first, last, &ref->preallocated_barriers);
0921 
0922     return 0;
0923 
0924 unwind:
0925     while (first) {
0926         struct active_node *node = barrier_from_ll(first);
0927 
0928         first = first->next;
0929 
0930         atomic_dec(&ref->count);
0931         intel_engine_pm_put(barrier_to_engine(node));
0932 
0933         kmem_cache_free(slab_cache, node);
0934     }
0935     return -ENOMEM;
0936 }
0937 
0938 void i915_active_acquire_barrier(struct i915_active *ref)
0939 {
0940     struct llist_node *pos, *next;
0941     unsigned long flags;
0942 
0943     GEM_BUG_ON(i915_active_is_idle(ref));
0944 
0945     /*
0946      * Transfer the list of preallocated barriers into the
0947      * i915_active rbtree, but only as proto-nodes. They will be
0948      * populated by i915_request_add_active_barriers() to point to the
0949      * request that will eventually release them.
0950      */
0951     llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) {
0952         struct active_node *node = barrier_from_ll(pos);
0953         struct intel_engine_cs *engine = barrier_to_engine(node);
0954         struct rb_node **p, *parent;
0955 
0956         spin_lock_irqsave_nested(&ref->tree_lock, flags,
0957                      SINGLE_DEPTH_NESTING);
0958         parent = NULL;
0959         p = &ref->tree.rb_node;
0960         while (*p) {
0961             struct active_node *it;
0962 
0963             parent = *p;
0964 
0965             it = rb_entry(parent, struct active_node, node);
0966             if (it->timeline < node->timeline)
0967                 p = &parent->rb_right;
0968             else
0969                 p = &parent->rb_left;
0970         }
0971         rb_link_node(&node->node, parent, p);
0972         rb_insert_color(&node->node, &ref->tree);
0973         spin_unlock_irqrestore(&ref->tree_lock, flags);
0974 
0975         GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
0976         llist_add(barrier_to_ll(node), &engine->barrier_tasks);
0977         intel_engine_pm_put_delay(engine, 2);
0978     }
0979 }
0980 
0981 static struct dma_fence **ll_to_fence_slot(struct llist_node *node)
0982 {
0983     return __active_fence_slot(&barrier_from_ll(node)->base);
0984 }
0985 
0986 void i915_request_add_active_barriers(struct i915_request *rq)
0987 {
0988     struct intel_engine_cs *engine = rq->engine;
0989     struct llist_node *node, *next;
0990     unsigned long flags;
0991 
0992     GEM_BUG_ON(!intel_context_is_barrier(rq->context));
0993     GEM_BUG_ON(intel_engine_is_virtual(engine));
0994     GEM_BUG_ON(i915_request_timeline(rq) != engine->kernel_context->timeline);
0995 
0996     node = llist_del_all(&engine->barrier_tasks);
0997     if (!node)
0998         return;
0999     /*
1000      * Attach the list of proto-fences to the in-flight request such
1001      * that the parent i915_active will be released when this request
1002      * is retired.
1003      */
1004     spin_lock_irqsave(&rq->lock, flags);
1005     llist_for_each_safe(node, next, node) {
1006         /* serialise with reuse_idle_barrier */
1007         smp_store_mb(*ll_to_fence_slot(node), &rq->fence);
1008         list_add_tail((struct list_head *)node, &rq->fence.cb_list);
1009     }
1010     spin_unlock_irqrestore(&rq->lock, flags);
1011 }
1012 
1013 /*
1014  * __i915_active_fence_set: Update the last active fence along its timeline
1015  * @active: the active tracker
1016  * @fence: the new fence (under construction)
1017  *
1018  * Records the new @fence as the last active fence along its timeline in
1019  * this active tracker, moving the tracking callbacks from the previous
1020  * fence onto this one. Returns the previous fence (if not already completed),
1021  * which the caller must ensure is executed before the new fence. To ensure
1022  * that the order of fences within the timeline of the i915_active_fence is
1023  * understood, it should be locked by the caller.
1024  */
1025 struct dma_fence *
1026 __i915_active_fence_set(struct i915_active_fence *active,
1027             struct dma_fence *fence)
1028 {
1029     struct dma_fence *prev;
1030     unsigned long flags;
1031 
1032     if (fence == rcu_access_pointer(active->fence))
1033         return fence;
1034 
1035     GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags));
1036 
1037     /*
1038      * Consider that we have two threads arriving (A and B), with
1039      * C already resident as the active->fence.
1040      *
1041      * A does the xchg first, and so it sees C or NULL depending
1042      * on the timing of the interrupt handler. If it is NULL, the
1043      * previous fence must have been signaled and we know that
1044      * we are first on the timeline. If it is still present,
1045      * we acquire the lock on that fence and serialise with the interrupt
1046      * handler, in the process removing it from any future interrupt
1047      * callback. A will then wait on C before executing (if present).
1048      *
1049      * As B is second, it sees A as the previous fence and so waits for
1050      * it to complete its transition and takes over the occupancy for
1051      * itself -- remembering that it needs to wait on A before executing.
1052      *
1053      * Note the strong ordering of the timeline also provides consistent
1054      * nesting rules for the fence->lock; the inner lock is always the
1055      * older lock.
1056      */
1057     spin_lock_irqsave(fence->lock, flags);
1058     prev = xchg(__active_fence_slot(active), fence);
1059     if (prev) {
1060         GEM_BUG_ON(prev == fence);
1061         spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING);
1062         __list_del_entry(&active->cb.node);
1063         spin_unlock(prev->lock); /* serialise with prev->cb_list */
1064     }
1065     list_add_tail(&active->cb.node, &fence->cb_list);
1066     spin_unlock_irqrestore(fence->lock, flags);
1067 
1068     return prev;
1069 }
1070 
1071 int i915_active_fence_set(struct i915_active_fence *active,
1072               struct i915_request *rq)
1073 {
1074     struct dma_fence *fence;
1075     int err = 0;
1076 
1077     /* Must maintain timeline ordering wrt previous active requests */
1078     rcu_read_lock();
1079     fence = __i915_active_fence_set(active, &rq->fence);
1080     if (fence) /* but the previous fence may not belong to that timeline! */
1081         fence = dma_fence_get_rcu(fence);
1082     rcu_read_unlock();
1083     if (fence) {
1084         err = i915_request_await_dma_fence(rq, fence);
1085         dma_fence_put(fence);
1086     }
1087 
1088     return err;
1089 }
1090 
1091 void i915_active_noop(struct dma_fence *fence, struct dma_fence_cb *cb)
1092 {
1093     active_fence_cb(fence, cb);
1094 }
1095 
1096 struct auto_active {
1097     struct i915_active base;
1098     struct kref ref;
1099 };
1100 
1101 struct i915_active *i915_active_get(struct i915_active *ref)
1102 {
1103     struct auto_active *aa = container_of(ref, typeof(*aa), base);
1104 
1105     kref_get(&aa->ref);
1106     return &aa->base;
1107 }
1108 
1109 static void auto_release(struct kref *ref)
1110 {
1111     struct auto_active *aa = container_of(ref, typeof(*aa), ref);
1112 
1113     i915_active_fini(&aa->base);
1114     kfree(aa);
1115 }
1116 
1117 void i915_active_put(struct i915_active *ref)
1118 {
1119     struct auto_active *aa = container_of(ref, typeof(*aa), base);
1120 
1121     kref_put(&aa->ref, auto_release);
1122 }
1123 
1124 static int auto_active(struct i915_active *ref)
1125 {
1126     i915_active_get(ref);
1127     return 0;
1128 }
1129 
1130 static void auto_retire(struct i915_active *ref)
1131 {
1132     i915_active_put(ref);
1133 }
1134 
1135 struct i915_active *i915_active_create(void)
1136 {
1137     struct auto_active *aa;
1138 
1139     aa = kmalloc(sizeof(*aa), GFP_KERNEL);
1140     if (!aa)
1141         return NULL;
1142 
1143     kref_init(&aa->ref);
1144     i915_active_init(&aa->base, auto_active, auto_retire, 0);
1145 
1146     return &aa->base;
1147 }
1148 
1149 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1150 #include "selftests/i915_active.c"
1151 #endif
1152 
1153 void i915_active_module_exit(void)
1154 {
1155     kmem_cache_destroy(slab_cache);
1156 }
1157 
1158 int __init i915_active_module_init(void)
1159 {
1160     slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN);
1161     if (!slab_cache)
1162         return -ENOMEM;
1163 
1164     return 0;
1165 }