Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright 2014 Advanced Micro Devices, Inc.
0003  * All Rights Reserved.
0004  *
0005  * Permission is hereby granted, free of charge, to any person obtaining a
0006  * copy of this software and associated documentation files (the
0007  * "Software"), to deal in the Software without restriction, including
0008  * without limitation the rights to use, copy, modify, merge, publish,
0009  * distribute, sub license, and/or sell copies of the Software, and to
0010  * permit persons to whom the Software is furnished to do so, subject to
0011  * the following conditions:
0012  *
0013  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
0014  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
0015  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
0016  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
0017  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
0018  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
0019  * USE OR OTHER DEALINGS IN THE SOFTWARE.
0020  *
0021  * The above copyright notice and this permission notice (including the
0022  * next paragraph) shall be included in all copies or substantial portions
0023  * of the Software.
0024  *
0025  */
0026 /*
0027  * Authors:
0028  *    Christian König <christian.koenig@amd.com>
0029  */
0030 
0031 #include <linux/dma-fence-chain.h>
0032 
0033 #include "amdgpu.h"
0034 #include "amdgpu_trace.h"
0035 #include "amdgpu_amdkfd.h"
0036 
0037 struct amdgpu_sync_entry {
0038     struct hlist_node   node;
0039     struct dma_fence    *fence;
0040 };
0041 
0042 static struct kmem_cache *amdgpu_sync_slab;
0043 
0044 /**
0045  * amdgpu_sync_create - zero init sync object
0046  *
0047  * @sync: sync object to initialize
0048  *
0049  * Just clear the sync object for now.
0050  */
0051 void amdgpu_sync_create(struct amdgpu_sync *sync)
0052 {
0053     hash_init(sync->fences);
0054 }
0055 
0056 /**
0057  * amdgpu_sync_same_dev - test if fence belong to us
0058  *
0059  * @adev: amdgpu device to use for the test
0060  * @f: fence to test
0061  *
0062  * Test if the fence was issued by us.
0063  */
0064 static bool amdgpu_sync_same_dev(struct amdgpu_device *adev,
0065                  struct dma_fence *f)
0066 {
0067     struct drm_sched_fence *s_fence = to_drm_sched_fence(f);
0068 
0069     if (s_fence) {
0070         struct amdgpu_ring *ring;
0071 
0072         ring = container_of(s_fence->sched, struct amdgpu_ring, sched);
0073         return ring->adev == adev;
0074     }
0075 
0076     return false;
0077 }
0078 
0079 /**
0080  * amdgpu_sync_get_owner - extract the owner of a fence
0081  *
0082  * @f: fence get the owner from
0083  *
0084  * Extract who originally created the fence.
0085  */
0086 static void *amdgpu_sync_get_owner(struct dma_fence *f)
0087 {
0088     struct drm_sched_fence *s_fence;
0089     struct amdgpu_amdkfd_fence *kfd_fence;
0090 
0091     if (!f)
0092         return AMDGPU_FENCE_OWNER_UNDEFINED;
0093 
0094     s_fence = to_drm_sched_fence(f);
0095     if (s_fence)
0096         return s_fence->owner;
0097 
0098     kfd_fence = to_amdgpu_amdkfd_fence(f);
0099     if (kfd_fence)
0100         return AMDGPU_FENCE_OWNER_KFD;
0101 
0102     return AMDGPU_FENCE_OWNER_UNDEFINED;
0103 }
0104 
0105 /**
0106  * amdgpu_sync_keep_later - Keep the later fence
0107  *
0108  * @keep: existing fence to test
0109  * @fence: new fence
0110  *
0111  * Either keep the existing fence or the new one, depending which one is later.
0112  */
0113 static void amdgpu_sync_keep_later(struct dma_fence **keep,
0114                    struct dma_fence *fence)
0115 {
0116     if (*keep && dma_fence_is_later(*keep, fence))
0117         return;
0118 
0119     dma_fence_put(*keep);
0120     *keep = dma_fence_get(fence);
0121 }
0122 
0123 /**
0124  * amdgpu_sync_add_later - add the fence to the hash
0125  *
0126  * @sync: sync object to add the fence to
0127  * @f: fence to add
0128  *
0129  * Tries to add the fence to an existing hash entry. Returns true when an entry
0130  * was found, false otherwise.
0131  */
0132 static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f)
0133 {
0134     struct amdgpu_sync_entry *e;
0135 
0136     hash_for_each_possible(sync->fences, e, node, f->context) {
0137         if (unlikely(e->fence->context != f->context))
0138             continue;
0139 
0140         amdgpu_sync_keep_later(&e->fence, f);
0141         return true;
0142     }
0143     return false;
0144 }
0145 
0146 /**
0147  * amdgpu_sync_fence - remember to sync to this fence
0148  *
0149  * @sync: sync object to add fence to
0150  * @f: fence to sync to
0151  *
0152  * Add the fence to the sync object.
0153  */
0154 int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f)
0155 {
0156     struct amdgpu_sync_entry *e;
0157 
0158     if (!f)
0159         return 0;
0160 
0161     if (amdgpu_sync_add_later(sync, f))
0162         return 0;
0163 
0164     e = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL);
0165     if (!e)
0166         return -ENOMEM;
0167 
0168     hash_add(sync->fences, &e->node, f->context);
0169     e->fence = dma_fence_get(f);
0170     return 0;
0171 }
0172 
0173 /* Determine based on the owner and mode if we should sync to a fence or not */
0174 static bool amdgpu_sync_test_fence(struct amdgpu_device *adev,
0175                    enum amdgpu_sync_mode mode,
0176                    void *owner, struct dma_fence *f)
0177 {
0178     void *fence_owner = amdgpu_sync_get_owner(f);
0179 
0180     /* Always sync to moves, no matter what */
0181     if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED)
0182         return true;
0183 
0184     /* We only want to trigger KFD eviction fences on
0185      * evict or move jobs. Skip KFD fences otherwise.
0186      */
0187     if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
0188         owner != AMDGPU_FENCE_OWNER_UNDEFINED)
0189         return false;
0190 
0191     /* Never sync to VM updates either. */
0192     if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
0193         owner != AMDGPU_FENCE_OWNER_UNDEFINED)
0194         return false;
0195 
0196     /* Ignore fences depending on the sync mode */
0197     switch (mode) {
0198     case AMDGPU_SYNC_ALWAYS:
0199         return true;
0200 
0201     case AMDGPU_SYNC_NE_OWNER:
0202         if (amdgpu_sync_same_dev(adev, f) &&
0203             fence_owner == owner)
0204             return false;
0205         break;
0206 
0207     case AMDGPU_SYNC_EQ_OWNER:
0208         if (amdgpu_sync_same_dev(adev, f) &&
0209             fence_owner != owner)
0210             return false;
0211         break;
0212 
0213     case AMDGPU_SYNC_EXPLICIT:
0214         return false;
0215     }
0216 
0217     WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
0218          "Adding eviction fence to sync obj");
0219     return true;
0220 }
0221 
0222 /**
0223  * amdgpu_sync_resv - sync to a reservation object
0224  *
0225  * @adev: amdgpu device
0226  * @sync: sync object to add fences from reservation object to
0227  * @resv: reservation object with embedded fence
0228  * @mode: how owner affects which fences we sync to
0229  * @owner: owner of the planned job submission
0230  *
0231  * Sync to the fence
0232  */
0233 int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
0234              struct dma_resv *resv, enum amdgpu_sync_mode mode,
0235              void *owner)
0236 {
0237     struct dma_resv_iter cursor;
0238     struct dma_fence *f;
0239     int r;
0240 
0241     if (resv == NULL)
0242         return -EINVAL;
0243 
0244     /* TODO: Use DMA_RESV_USAGE_READ here */
0245     dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP, f) {
0246         dma_fence_chain_for_each(f, f) {
0247             struct dma_fence *tmp = dma_fence_chain_contained(f);
0248 
0249             if (amdgpu_sync_test_fence(adev, mode, owner, tmp)) {
0250                 r = amdgpu_sync_fence(sync, f);
0251                 dma_fence_put(f);
0252                 if (r)
0253                     return r;
0254                 break;
0255             }
0256         }
0257     }
0258     return 0;
0259 }
0260 
0261 /**
0262  * amdgpu_sync_peek_fence - get the next fence not signaled yet
0263  *
0264  * @sync: the sync object
0265  * @ring: optional ring to use for test
0266  *
0267  * Returns the next fence not signaled yet without removing it from the sync
0268  * object.
0269  */
0270 struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
0271                      struct amdgpu_ring *ring)
0272 {
0273     struct amdgpu_sync_entry *e;
0274     struct hlist_node *tmp;
0275     int i;
0276 
0277     hash_for_each_safe(sync->fences, i, tmp, e, node) {
0278         struct dma_fence *f = e->fence;
0279         struct drm_sched_fence *s_fence = to_drm_sched_fence(f);
0280 
0281         if (dma_fence_is_signaled(f)) {
0282             hash_del(&e->node);
0283             dma_fence_put(f);
0284             kmem_cache_free(amdgpu_sync_slab, e);
0285             continue;
0286         }
0287         if (ring && s_fence) {
0288             /* For fences from the same ring it is sufficient
0289              * when they are scheduled.
0290              */
0291             if (s_fence->sched == &ring->sched) {
0292                 if (dma_fence_is_signaled(&s_fence->scheduled))
0293                     continue;
0294 
0295                 return &s_fence->scheduled;
0296             }
0297         }
0298 
0299         return f;
0300     }
0301 
0302     return NULL;
0303 }
0304 
0305 /**
0306  * amdgpu_sync_get_fence - get the next fence from the sync object
0307  *
0308  * @sync: sync object to use
0309  *
0310  * Get and removes the next fence from the sync object not signaled yet.
0311  */
0312 struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
0313 {
0314     struct amdgpu_sync_entry *e;
0315     struct hlist_node *tmp;
0316     struct dma_fence *f;
0317     int i;
0318     hash_for_each_safe(sync->fences, i, tmp, e, node) {
0319 
0320         f = e->fence;
0321 
0322         hash_del(&e->node);
0323         kmem_cache_free(amdgpu_sync_slab, e);
0324 
0325         if (!dma_fence_is_signaled(f))
0326             return f;
0327 
0328         dma_fence_put(f);
0329     }
0330     return NULL;
0331 }
0332 
0333 /**
0334  * amdgpu_sync_clone - clone a sync object
0335  *
0336  * @source: sync object to clone
0337  * @clone: pointer to destination sync object
0338  *
0339  * Adds references to all unsignaled fences in @source to @clone. Also
0340  * removes signaled fences from @source while at it.
0341  */
0342 int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone)
0343 {
0344     struct amdgpu_sync_entry *e;
0345     struct hlist_node *tmp;
0346     struct dma_fence *f;
0347     int i, r;
0348 
0349     hash_for_each_safe(source->fences, i, tmp, e, node) {
0350         f = e->fence;
0351         if (!dma_fence_is_signaled(f)) {
0352             r = amdgpu_sync_fence(clone, f);
0353             if (r)
0354                 return r;
0355         } else {
0356             hash_del(&e->node);
0357             dma_fence_put(f);
0358             kmem_cache_free(amdgpu_sync_slab, e);
0359         }
0360     }
0361 
0362     return 0;
0363 }
0364 
0365 int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr)
0366 {
0367     struct amdgpu_sync_entry *e;
0368     struct hlist_node *tmp;
0369     int i, r;
0370 
0371     hash_for_each_safe(sync->fences, i, tmp, e, node) {
0372         r = dma_fence_wait(e->fence, intr);
0373         if (r)
0374             return r;
0375 
0376         hash_del(&e->node);
0377         dma_fence_put(e->fence);
0378         kmem_cache_free(amdgpu_sync_slab, e);
0379     }
0380 
0381     return 0;
0382 }
0383 
0384 /**
0385  * amdgpu_sync_free - free the sync object
0386  *
0387  * @sync: sync object to use
0388  *
0389  * Free the sync object.
0390  */
0391 void amdgpu_sync_free(struct amdgpu_sync *sync)
0392 {
0393     struct amdgpu_sync_entry *e;
0394     struct hlist_node *tmp;
0395     unsigned i;
0396 
0397     hash_for_each_safe(sync->fences, i, tmp, e, node) {
0398         hash_del(&e->node);
0399         dma_fence_put(e->fence);
0400         kmem_cache_free(amdgpu_sync_slab, e);
0401     }
0402 }
0403 
0404 /**
0405  * amdgpu_sync_init - init sync object subsystem
0406  *
0407  * Allocate the slab allocator.
0408  */
0409 int amdgpu_sync_init(void)
0410 {
0411     amdgpu_sync_slab = kmem_cache_create(
0412         "amdgpu_sync", sizeof(struct amdgpu_sync_entry), 0,
0413         SLAB_HWCACHE_ALIGN, NULL);
0414     if (!amdgpu_sync_slab)
0415         return -ENOMEM;
0416 
0417     return 0;
0418 }
0419 
0420 /**
0421  * amdgpu_sync_fini - fini sync object subsystem
0422  *
0423  * Free the slab allocator.
0424  */
0425 void amdgpu_sync_fini(void)
0426 {
0427     kmem_cache_destroy(amdgpu_sync_slab);
0428 }