amd/amdgpu/amdgpu_sa.c

0001 /*
0002  * Copyright 2011 Red Hat Inc.
0003  * All Rights Reserved.
0004  *
0005  * Permission is hereby granted, free of charge, to any person obtaining a
0006  * copy of this software and associated documentation files (the
0007  * "Software"), to deal in the Software without restriction, including
0008  * without limitation the rights to use, copy, modify, merge, publish,
0009  * distribute, sub license, and/or sell copies of the Software, and to
0010  * permit persons to whom the Software is furnished to do so, subject to
0011  * the following conditions:
0012  *
0013  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
0014  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
0015  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
0016  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
0017  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
0018  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
0019  * USE OR OTHER DEALINGS IN THE SOFTWARE.
0020  *
0021  * The above copyright notice and this permission notice (including the
0022  * next paragraph) shall be included in all copies or substantial portions
0023  * of the Software.
0024  *
0025  */
0026 /*
0027  * Authors:
0028  *    Jerome Glisse <glisse@freedesktop.org>
0029  */
0030 /* Algorithm:
0031  *
0032  * We store the last allocated bo in "hole", we always try to allocate
0033  * after the last allocated bo. Principle is that in a linear GPU ring
0034  * progression was is after last is the oldest bo we allocated and thus
0035  * the first one that should no longer be in use by the GPU.
0036  *
0037  * If it's not the case we skip over the bo after last to the closest
0038  * done bo if such one exist. If none exist and we are not asked to
0039  * block we report failure to allocate.
0040  *
0041  * If we are asked to block we wait on all the oldest fence of all
0042  * rings. We just wait for any of those fence to complete.
0043  */
0044
0045 #include "amdgpu.h"
0046
0047 static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo);
0048 static void amdgpu_sa_bo_try_free(struct amdgpu_sa_manager *sa_manager);
0049
0050 int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev,
0051                   struct amdgpu_sa_manager *sa_manager,
0052                   unsigned size, u32 align, u32 domain)
0053 {
0054     int i, r;
0055
0056     init_waitqueue_head(&sa_manager->wq);
0057     sa_manager->bo = NULL;
0058     sa_manager->size = size;
0059     sa_manager->domain = domain;
0060     sa_manager->align = align;
0061     sa_manager->hole = &sa_manager->olist;
0062     INIT_LIST_HEAD(&sa_manager->olist);
0063     for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
0064         INIT_LIST_HEAD(&sa_manager->flist[i]);
0065
0066     r = amdgpu_bo_create_kernel(adev, size, align, domain, &sa_manager->bo,
0067                 &sa_manager->gpu_addr, &sa_manager->cpu_ptr);
0068     if (r) {
0069         dev_err(adev->dev, "(%d) failed to allocate bo for manager\n", r);
0070         return r;
0071     }
0072
0073     memset(sa_manager->cpu_ptr, 0, sa_manager->size);
0074     return r;
0075 }
0076
0077 void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev,
0078                    struct amdgpu_sa_manager *sa_manager)
0079 {
0080     struct amdgpu_sa_bo *sa_bo, *tmp;
0081
0082     if (sa_manager->bo == NULL) {
0083         dev_err(adev->dev, "no bo for sa manager\n");
0084         return;
0085     }
0086
0087     if (!list_empty(&sa_manager->olist)) {
0088         sa_manager->hole = &sa_manager->olist,
0089         amdgpu_sa_bo_try_free(sa_manager);
0090         if (!list_empty(&sa_manager->olist)) {
0091             dev_err(adev->dev, "sa_manager is not empty, clearing anyway\n");
0092         }
0093     }
0094     list_for_each_entry_safe(sa_bo, tmp, &sa_manager->olist, olist) {
0095         amdgpu_sa_bo_remove_locked(sa_bo);
0096     }
0097
0098     amdgpu_bo_free_kernel(&sa_manager->bo, &sa_manager->gpu_addr, &sa_manager->cpu_ptr);
0099     sa_manager->size = 0;
0100 }
0101
0102 static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo)
0103 {
0104     struct amdgpu_sa_manager *sa_manager = sa_bo->manager;
0105     if (sa_manager->hole == &sa_bo->olist) {
0106         sa_manager->hole = sa_bo->olist.prev;
0107     }
0108     list_del_init(&sa_bo->olist);
0109     list_del_init(&sa_bo->flist);
0110     dma_fence_put(sa_bo->fence);
0111     kfree(sa_bo);
0112 }
0113
0114 static void amdgpu_sa_bo_try_free(struct amdgpu_sa_manager *sa_manager)
0115 {
0116     struct amdgpu_sa_bo *sa_bo, *tmp;
0117
0118     if (sa_manager->hole->next == &sa_manager->olist)
0119         return;
0120
0121     sa_bo = list_entry(sa_manager->hole->next, struct amdgpu_sa_bo, olist);
0122     list_for_each_entry_safe_from(sa_bo, tmp, &sa_manager->olist, olist) {
0123         if (sa_bo->fence == NULL ||
0124             !dma_fence_is_signaled(sa_bo->fence)) {
0125             return;
0126         }
0127         amdgpu_sa_bo_remove_locked(sa_bo);
0128     }
0129 }
0130
0131 static inline unsigned amdgpu_sa_bo_hole_soffset(struct amdgpu_sa_manager *sa_manager)
0132 {
0133     struct list_head *hole = sa_manager->hole;
0134
0135     if (hole != &sa_manager->olist) {
0136         return list_entry(hole, struct amdgpu_sa_bo, olist)->eoffset;
0137     }
0138     return 0;
0139 }
0140
0141 static inline unsigned amdgpu_sa_bo_hole_eoffset(struct amdgpu_sa_manager *sa_manager)
0142 {
0143     struct list_head *hole = sa_manager->hole;
0144
0145     if (hole->next != &sa_manager->olist) {
0146         return list_entry(hole->next, struct amdgpu_sa_bo, olist)->soffset;
0147     }
0148     return sa_manager->size;
0149 }
0150
0151 static bool amdgpu_sa_bo_try_alloc(struct amdgpu_sa_manager *sa_manager,
0152                    struct amdgpu_sa_bo *sa_bo,
0153                    unsigned size, unsigned align)
0154 {
0155     unsigned soffset, eoffset, wasted;
0156
0157     soffset = amdgpu_sa_bo_hole_soffset(sa_manager);
0158     eoffset = amdgpu_sa_bo_hole_eoffset(sa_manager);
0159     wasted = (align - (soffset % align)) % align;
0160
0161     if ((eoffset - soffset) >= (size + wasted)) {
0162         soffset += wasted;
0163
0164         sa_bo->manager = sa_manager;
0165         sa_bo->soffset = soffset;
0166         sa_bo->eoffset = soffset + size;
0167         list_add(&sa_bo->olist, sa_manager->hole);
0168         INIT_LIST_HEAD(&sa_bo->flist);
0169         sa_manager->hole = &sa_bo->olist;
0170         return true;
0171     }
0172     return false;
0173 }
0174
0175 /**
0176  * amdgpu_sa_event - Check if we can stop waiting
0177  *
0178  * @sa_manager: pointer to the sa_manager
0179  * @size: number of bytes we want to allocate
0180  * @align: alignment we need to match
0181  *
0182  * Check if either there is a fence we can wait for or
0183  * enough free memory to satisfy the allocation directly
0184  */
0185 static bool amdgpu_sa_event(struct amdgpu_sa_manager *sa_manager,
0186                 unsigned size, unsigned align)
0187 {
0188     unsigned soffset, eoffset, wasted;
0189     int i;
0190
0191     for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
0192         if (!list_empty(&sa_manager->flist[i]))
0193             return true;
0194
0195     soffset = amdgpu_sa_bo_hole_soffset(sa_manager);
0196     eoffset = amdgpu_sa_bo_hole_eoffset(sa_manager);
0197     wasted = (align - (soffset % align)) % align;
0198
0199     if ((eoffset - soffset) >= (size + wasted)) {
0200         return true;
0201     }
0202
0203     return false;
0204 }
0205
0206 static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager,
0207                    struct dma_fence **fences,
0208                    unsigned *tries)
0209 {
0210     struct amdgpu_sa_bo *best_bo = NULL;
0211     unsigned i, soffset, best, tmp;
0212
0213     /* if hole points to the end of the buffer */
0214     if (sa_manager->hole->next == &sa_manager->olist) {
0215         /* try again with its beginning */
0216         sa_manager->hole = &sa_manager->olist;
0217         return true;
0218     }
0219
0220     soffset = amdgpu_sa_bo_hole_soffset(sa_manager);
0221     /* to handle wrap around we add sa_manager->size */
0222     best = sa_manager->size * 2;
0223     /* go over all fence list and try to find the closest sa_bo
0224      * of the current last
0225      */
0226     for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) {
0227         struct amdgpu_sa_bo *sa_bo;
0228
0229         fences[i] = NULL;
0230
0231         if (list_empty(&sa_manager->flist[i]))
0232             continue;
0233
0234         sa_bo = list_first_entry(&sa_manager->flist[i],
0235                      struct amdgpu_sa_bo, flist);
0236
0237         if (!dma_fence_is_signaled(sa_bo->fence)) {
0238             fences[i] = sa_bo->fence;
0239             continue;
0240         }
0241
0242         /* limit the number of tries each ring gets */
0243         if (tries[i] > 2) {
0244             continue;
0245         }
0246
0247         tmp = sa_bo->soffset;
0248         if (tmp < soffset) {
0249             /* wrap around, pretend it's after */
0250             tmp += sa_manager->size;
0251         }
0252         tmp -= soffset;
0253         if (tmp < best) {
0254             /* this sa bo is the closest one */
0255             best = tmp;
0256             best_bo = sa_bo;
0257         }
0258     }
0259
0260     if (best_bo) {
0261         uint32_t idx = best_bo->fence->context;
0262
0263         idx %= AMDGPU_SA_NUM_FENCE_LISTS;
0264         ++tries[idx];
0265         sa_manager->hole = best_bo->olist.prev;
0266
0267         /* we knew that this one is signaled,
0268            so it's save to remote it */
0269         amdgpu_sa_bo_remove_locked(best_bo);
0270         return true;
0271     }
0272     return false;
0273 }
0274
0275 int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
0276              struct amdgpu_sa_bo **sa_bo,
0277              unsigned size, unsigned align)
0278 {
0279     struct dma_fence *fences[AMDGPU_SA_NUM_FENCE_LISTS];
0280     unsigned tries[AMDGPU_SA_NUM_FENCE_LISTS];
0281     unsigned count;
0282     int i, r;
0283     signed long t;
0284
0285     if (WARN_ON_ONCE(align > sa_manager->align))
0286         return -EINVAL;
0287
0288     if (WARN_ON_ONCE(size > sa_manager->size))
0289         return -EINVAL;
0290
0291     *sa_bo = kmalloc(sizeof(struct amdgpu_sa_bo), GFP_KERNEL);
0292     if (!(*sa_bo))
0293         return -ENOMEM;
0294     (*sa_bo)->manager = sa_manager;
0295     (*sa_bo)->fence = NULL;
0296     INIT_LIST_HEAD(&(*sa_bo)->olist);
0297     INIT_LIST_HEAD(&(*sa_bo)->flist);
0298
0299     spin_lock(&sa_manager->wq.lock);
0300     do {
0301         for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
0302             tries[i] = 0;
0303
0304         do {
0305             amdgpu_sa_bo_try_free(sa_manager);
0306
0307             if (amdgpu_sa_bo_try_alloc(sa_manager, *sa_bo,
0308                            size, align)) {
0309                 spin_unlock(&sa_manager->wq.lock);
0310                 return 0;
0311             }
0312
0313             /* see if we can skip over some allocations */
0314         } while (amdgpu_sa_bo_next_hole(sa_manager, fences, tries));
0315
0316         for (i = 0, count = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
0317             if (fences[i])
0318                 fences[count++] = dma_fence_get(fences[i]);
0319
0320         if (count) {
0321             spin_unlock(&sa_manager->wq.lock);
0322             t = dma_fence_wait_any_timeout(fences, count, false,
0323                                MAX_SCHEDULE_TIMEOUT,
0324                                NULL);
0325             for (i = 0; i < count; ++i)
0326                 dma_fence_put(fences[i]);
0327
0328             r = (t > 0) ? 0 : t;
0329             spin_lock(&sa_manager->wq.lock);
0330         } else {
0331             /* if we have nothing to wait for block */
0332             r = wait_event_interruptible_locked(
0333                 sa_manager->wq,
0334                 amdgpu_sa_event(sa_manager, size, align)
0335             );
0336         }
0337
0338     } while (!r);
0339
0340     spin_unlock(&sa_manager->wq.lock);
0341     kfree(*sa_bo);
0342     *sa_bo = NULL;
0343     return r;
0344 }
0345
0346 void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo,
0347                struct dma_fence *fence)
0348 {
0349     struct amdgpu_sa_manager *sa_manager;
0350
0351     if (sa_bo == NULL || *sa_bo == NULL) {
0352         return;
0353     }
0354
0355     sa_manager = (*sa_bo)->manager;
0356     spin_lock(&sa_manager->wq.lock);
0357     if (fence && !dma_fence_is_signaled(fence)) {
0358         uint32_t idx;
0359
0360         (*sa_bo)->fence = dma_fence_get(fence);
0361         idx = fence->context % AMDGPU_SA_NUM_FENCE_LISTS;
0362         list_add_tail(&(*sa_bo)->flist, &sa_manager->flist[idx]);
0363     } else {
0364         amdgpu_sa_bo_remove_locked(*sa_bo);
0365     }
0366     wake_up_all_locked(&sa_manager->wq);
0367     spin_unlock(&sa_manager->wq.lock);
0368     *sa_bo = NULL;
0369 }
0370
0371 #if defined(CONFIG_DEBUG_FS)
0372
0373 void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,
0374                   struct seq_file *m)
0375 {
0376     struct amdgpu_sa_bo *i;
0377
0378     spin_lock(&sa_manager->wq.lock);
0379     list_for_each_entry(i, &sa_manager->olist, olist) {
0380         uint64_t soffset = i->soffset + sa_manager->gpu_addr;
0381         uint64_t eoffset = i->eoffset + sa_manager->gpu_addr;
0382         if (&i->olist == sa_manager->hole) {
0383             seq_printf(m, ">");
0384         } else {
0385             seq_printf(m, " ");
0386         }
0387         seq_printf(m, "[0x%010llx 0x%010llx] size %8lld",
0388                soffset, eoffset, eoffset - soffset);
0389
0390         if (i->fence)
0391             seq_printf(m, " protected by 0x%016llx on context %llu",
0392                    i->fence->seqno, i->fence->context);
0393
0394         seq_printf(m, "\n");
0395     }
0396     spin_unlock(&sa_manager->wq.lock);
0397 }
0398 #endif