Back to home page

LXR

 
 

    


0001 /*
0002  * Copyright 2009 Jerome Glisse.
0003  * All Rights Reserved.
0004  *
0005  * Permission is hereby granted, free of charge, to any person obtaining a
0006  * copy of this software and associated documentation files (the
0007  * "Software"), to deal in the Software without restriction, including
0008  * without limitation the rights to use, copy, modify, merge, publish,
0009  * distribute, sub license, and/or sell copies of the Software, and to
0010  * permit persons to whom the Software is furnished to do so, subject to
0011  * the following conditions:
0012  *
0013  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
0014  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
0015  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
0016  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
0017  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
0018  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
0019  * USE OR OTHER DEALINGS IN THE SOFTWARE.
0020  *
0021  * The above copyright notice and this permission notice (including the
0022  * next paragraph) shall be included in all copies or substantial portions
0023  * of the Software.
0024  *
0025  */
0026 /*
0027  * Authors:
0028  *    Jerome Glisse <glisse@freedesktop.org>
0029  *    Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
0030  *    Dave Airlie
0031  */
0032 #include <linux/list.h>
0033 #include <linux/slab.h>
0034 #include <drm/drmP.h>
0035 #include <drm/radeon_drm.h>
0036 #include <drm/drm_cache.h>
0037 #include "radeon.h"
0038 #include "radeon_trace.h"
0039 
0040 
0041 int radeon_ttm_init(struct radeon_device *rdev);
0042 void radeon_ttm_fini(struct radeon_device *rdev);
0043 static void radeon_bo_clear_surface_reg(struct radeon_bo *bo);
0044 
0045 /*
0046  * To exclude mutual BO access we rely on bo_reserve exclusion, as all
0047  * function are calling it.
0048  */
0049 
0050 static void radeon_update_memory_usage(struct radeon_bo *bo,
0051                        unsigned mem_type, int sign)
0052 {
0053     struct radeon_device *rdev = bo->rdev;
0054     u64 size = (u64)bo->tbo.num_pages << PAGE_SHIFT;
0055 
0056     switch (mem_type) {
0057     case TTM_PL_TT:
0058         if (sign > 0)
0059             atomic64_add(size, &rdev->gtt_usage);
0060         else
0061             atomic64_sub(size, &rdev->gtt_usage);
0062         break;
0063     case TTM_PL_VRAM:
0064         if (sign > 0)
0065             atomic64_add(size, &rdev->vram_usage);
0066         else
0067             atomic64_sub(size, &rdev->vram_usage);
0068         break;
0069     }
0070 }
0071 
0072 static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo)
0073 {
0074     struct radeon_bo *bo;
0075 
0076     bo = container_of(tbo, struct radeon_bo, tbo);
0077 
0078     radeon_update_memory_usage(bo, bo->tbo.mem.mem_type, -1);
0079 
0080     mutex_lock(&bo->rdev->gem.mutex);
0081     list_del_init(&bo->list);
0082     mutex_unlock(&bo->rdev->gem.mutex);
0083     radeon_bo_clear_surface_reg(bo);
0084     WARN_ON(!list_empty(&bo->va));
0085     drm_gem_object_release(&bo->gem_base);
0086     kfree(bo);
0087 }
0088 
0089 bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo)
0090 {
0091     if (bo->destroy == &radeon_ttm_bo_destroy)
0092         return true;
0093     return false;
0094 }
0095 
0096 void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
0097 {
0098     u32 c = 0, i;
0099 
0100     rbo->placement.placement = rbo->placements;
0101     rbo->placement.busy_placement = rbo->placements;
0102     if (domain & RADEON_GEM_DOMAIN_VRAM) {
0103         /* Try placing BOs which don't need CPU access outside of the
0104          * CPU accessible part of VRAM
0105          */
0106         if ((rbo->flags & RADEON_GEM_NO_CPU_ACCESS) &&
0107             rbo->rdev->mc.visible_vram_size < rbo->rdev->mc.real_vram_size) {
0108             rbo->placements[c].fpfn =
0109                 rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
0110             rbo->placements[c++].flags = TTM_PL_FLAG_WC |
0111                              TTM_PL_FLAG_UNCACHED |
0112                              TTM_PL_FLAG_VRAM;
0113         }
0114 
0115         rbo->placements[c].fpfn = 0;
0116         rbo->placements[c++].flags = TTM_PL_FLAG_WC |
0117                          TTM_PL_FLAG_UNCACHED |
0118                          TTM_PL_FLAG_VRAM;
0119     }
0120 
0121     if (domain & RADEON_GEM_DOMAIN_GTT) {
0122         if (rbo->flags & RADEON_GEM_GTT_UC) {
0123             rbo->placements[c].fpfn = 0;
0124             rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED |
0125                 TTM_PL_FLAG_TT;
0126 
0127         } else if ((rbo->flags & RADEON_GEM_GTT_WC) ||
0128                (rbo->rdev->flags & RADEON_IS_AGP)) {
0129             rbo->placements[c].fpfn = 0;
0130             rbo->placements[c++].flags = TTM_PL_FLAG_WC |
0131                 TTM_PL_FLAG_UNCACHED |
0132                 TTM_PL_FLAG_TT;
0133         } else {
0134             rbo->placements[c].fpfn = 0;
0135             rbo->placements[c++].flags = TTM_PL_FLAG_CACHED |
0136                              TTM_PL_FLAG_TT;
0137         }
0138     }
0139 
0140     if (domain & RADEON_GEM_DOMAIN_CPU) {
0141         if (rbo->flags & RADEON_GEM_GTT_UC) {
0142             rbo->placements[c].fpfn = 0;
0143             rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED |
0144                 TTM_PL_FLAG_SYSTEM;
0145 
0146         } else if ((rbo->flags & RADEON_GEM_GTT_WC) ||
0147             rbo->rdev->flags & RADEON_IS_AGP) {
0148             rbo->placements[c].fpfn = 0;
0149             rbo->placements[c++].flags = TTM_PL_FLAG_WC |
0150                 TTM_PL_FLAG_UNCACHED |
0151                 TTM_PL_FLAG_SYSTEM;
0152         } else {
0153             rbo->placements[c].fpfn = 0;
0154             rbo->placements[c++].flags = TTM_PL_FLAG_CACHED |
0155                              TTM_PL_FLAG_SYSTEM;
0156         }
0157     }
0158     if (!c) {
0159         rbo->placements[c].fpfn = 0;
0160         rbo->placements[c++].flags = TTM_PL_MASK_CACHING |
0161                          TTM_PL_FLAG_SYSTEM;
0162     }
0163 
0164     rbo->placement.num_placement = c;
0165     rbo->placement.num_busy_placement = c;
0166 
0167     for (i = 0; i < c; ++i) {
0168         if ((rbo->flags & RADEON_GEM_CPU_ACCESS) &&
0169             (rbo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
0170             !rbo->placements[i].fpfn)
0171             rbo->placements[i].lpfn =
0172                 rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
0173         else
0174             rbo->placements[i].lpfn = 0;
0175     }
0176 }
0177 
0178 int radeon_bo_create(struct radeon_device *rdev,
0179              unsigned long size, int byte_align, bool kernel,
0180              u32 domain, u32 flags, struct sg_table *sg,
0181              struct reservation_object *resv,
0182              struct radeon_bo **bo_ptr)
0183 {
0184     struct radeon_bo *bo;
0185     enum ttm_bo_type type;
0186     unsigned long page_align = roundup(byte_align, PAGE_SIZE) >> PAGE_SHIFT;
0187     size_t acc_size;
0188     int r;
0189 
0190     size = ALIGN(size, PAGE_SIZE);
0191 
0192     if (kernel) {
0193         type = ttm_bo_type_kernel;
0194     } else if (sg) {
0195         type = ttm_bo_type_sg;
0196     } else {
0197         type = ttm_bo_type_device;
0198     }
0199     *bo_ptr = NULL;
0200 
0201     acc_size = ttm_bo_dma_acc_size(&rdev->mman.bdev, size,
0202                        sizeof(struct radeon_bo));
0203 
0204     bo = kzalloc(sizeof(struct radeon_bo), GFP_KERNEL);
0205     if (bo == NULL)
0206         return -ENOMEM;
0207     r = drm_gem_object_init(rdev->ddev, &bo->gem_base, size);
0208     if (unlikely(r)) {
0209         kfree(bo);
0210         return r;
0211     }
0212     bo->rdev = rdev;
0213     bo->surface_reg = -1;
0214     INIT_LIST_HEAD(&bo->list);
0215     INIT_LIST_HEAD(&bo->va);
0216     bo->initial_domain = domain & (RADEON_GEM_DOMAIN_VRAM |
0217                        RADEON_GEM_DOMAIN_GTT |
0218                        RADEON_GEM_DOMAIN_CPU);
0219 
0220     bo->flags = flags;
0221     /* PCI GART is always snooped */
0222     if (!(rdev->flags & RADEON_IS_PCIE))
0223         bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
0224 
0225     /* Write-combined CPU mappings of GTT cause GPU hangs with RV6xx
0226      * See https://bugs.freedesktop.org/show_bug.cgi?id=91268
0227      */
0228     if (rdev->family >= CHIP_RV610 && rdev->family <= CHIP_RV635)
0229         bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
0230 
0231 #ifdef CONFIG_X86_32
0232     /* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit
0233      * See https://bugs.freedesktop.org/show_bug.cgi?id=84627
0234      */
0235     bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
0236 #elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT)
0237     /* Don't try to enable write-combining when it can't work, or things
0238      * may be slow
0239      * See https://bugs.freedesktop.org/show_bug.cgi?id=88758
0240      */
0241 
0242 #warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \
0243      thanks to write-combining
0244 
0245     if (bo->flags & RADEON_GEM_GTT_WC)
0246         DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for "
0247                   "better performance thanks to write-combining\n");
0248     bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
0249 #else
0250     /* For architectures that don't support WC memory,
0251      * mask out the WC flag from the BO
0252      */
0253     if (!drm_arch_can_wc_memory())
0254         bo->flags &= ~RADEON_GEM_GTT_WC;
0255 #endif
0256 
0257     radeon_ttm_placement_from_domain(bo, domain);
0258     /* Kernel allocation are uninterruptible */
0259     down_read(&rdev->pm.mclk_lock);
0260     r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type,
0261             &bo->placement, page_align, !kernel, NULL,
0262             acc_size, sg, resv, &radeon_ttm_bo_destroy);
0263     up_read(&rdev->pm.mclk_lock);
0264     if (unlikely(r != 0)) {
0265         return r;
0266     }
0267     *bo_ptr = bo;
0268 
0269     trace_radeon_bo_create(bo);
0270 
0271     return 0;
0272 }
0273 
0274 int radeon_bo_kmap(struct radeon_bo *bo, void **ptr)
0275 {
0276     bool is_iomem;
0277     int r;
0278 
0279     if (bo->kptr) {
0280         if (ptr) {
0281             *ptr = bo->kptr;
0282         }
0283         return 0;
0284     }
0285     r = ttm_bo_kmap(&bo->tbo, 0, bo->tbo.num_pages, &bo->kmap);
0286     if (r) {
0287         return r;
0288     }
0289     bo->kptr = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
0290     if (ptr) {
0291         *ptr = bo->kptr;
0292     }
0293     radeon_bo_check_tiling(bo, 0, 0);
0294     return 0;
0295 }
0296 
0297 void radeon_bo_kunmap(struct radeon_bo *bo)
0298 {
0299     if (bo->kptr == NULL)
0300         return;
0301     bo->kptr = NULL;
0302     radeon_bo_check_tiling(bo, 0, 0);
0303     ttm_bo_kunmap(&bo->kmap);
0304 }
0305 
0306 struct radeon_bo *radeon_bo_ref(struct radeon_bo *bo)
0307 {
0308     if (bo == NULL)
0309         return NULL;
0310 
0311     ttm_bo_reference(&bo->tbo);
0312     return bo;
0313 }
0314 
0315 void radeon_bo_unref(struct radeon_bo **bo)
0316 {
0317     struct ttm_buffer_object *tbo;
0318     struct radeon_device *rdev;
0319 
0320     if ((*bo) == NULL)
0321         return;
0322     rdev = (*bo)->rdev;
0323     tbo = &((*bo)->tbo);
0324     ttm_bo_unref(&tbo);
0325     if (tbo == NULL)
0326         *bo = NULL;
0327 }
0328 
0329 int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset,
0330                  u64 *gpu_addr)
0331 {
0332     int r, i;
0333 
0334     if (radeon_ttm_tt_has_userptr(bo->tbo.ttm))
0335         return -EPERM;
0336 
0337     if (bo->pin_count) {
0338         bo->pin_count++;
0339         if (gpu_addr)
0340             *gpu_addr = radeon_bo_gpu_offset(bo);
0341 
0342         if (max_offset != 0) {
0343             u64 domain_start;
0344 
0345             if (domain == RADEON_GEM_DOMAIN_VRAM)
0346                 domain_start = bo->rdev->mc.vram_start;
0347             else
0348                 domain_start = bo->rdev->mc.gtt_start;
0349             WARN_ON_ONCE(max_offset <
0350                      (radeon_bo_gpu_offset(bo) - domain_start));
0351         }
0352 
0353         return 0;
0354     }
0355     radeon_ttm_placement_from_domain(bo, domain);
0356     for (i = 0; i < bo->placement.num_placement; i++) {
0357         /* force to pin into visible video ram */
0358         if ((bo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
0359             !(bo->flags & RADEON_GEM_NO_CPU_ACCESS) &&
0360             (!max_offset || max_offset > bo->rdev->mc.visible_vram_size))
0361             bo->placements[i].lpfn =
0362                 bo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
0363         else
0364             bo->placements[i].lpfn = max_offset >> PAGE_SHIFT;
0365 
0366         bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
0367     }
0368 
0369     r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
0370     if (likely(r == 0)) {
0371         bo->pin_count = 1;
0372         if (gpu_addr != NULL)
0373             *gpu_addr = radeon_bo_gpu_offset(bo);
0374         if (domain == RADEON_GEM_DOMAIN_VRAM)
0375             bo->rdev->vram_pin_size += radeon_bo_size(bo);
0376         else
0377             bo->rdev->gart_pin_size += radeon_bo_size(bo);
0378     } else {
0379         dev_err(bo->rdev->dev, "%p pin failed\n", bo);
0380     }
0381     return r;
0382 }
0383 
0384 int radeon_bo_pin(struct radeon_bo *bo, u32 domain, u64 *gpu_addr)
0385 {
0386     return radeon_bo_pin_restricted(bo, domain, 0, gpu_addr);
0387 }
0388 
0389 int radeon_bo_unpin(struct radeon_bo *bo)
0390 {
0391     int r, i;
0392 
0393     if (!bo->pin_count) {
0394         dev_warn(bo->rdev->dev, "%p unpin not necessary\n", bo);
0395         return 0;
0396     }
0397     bo->pin_count--;
0398     if (bo->pin_count)
0399         return 0;
0400     for (i = 0; i < bo->placement.num_placement; i++) {
0401         bo->placements[i].lpfn = 0;
0402         bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT;
0403     }
0404     r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
0405     if (likely(r == 0)) {
0406         if (bo->tbo.mem.mem_type == TTM_PL_VRAM)
0407             bo->rdev->vram_pin_size -= radeon_bo_size(bo);
0408         else
0409             bo->rdev->gart_pin_size -= radeon_bo_size(bo);
0410     } else {
0411         dev_err(bo->rdev->dev, "%p validate failed for unpin\n", bo);
0412     }
0413     return r;
0414 }
0415 
0416 int radeon_bo_evict_vram(struct radeon_device *rdev)
0417 {
0418     /* late 2.6.33 fix IGP hibernate - we need pm ops to do this correct */
0419     if (0 && (rdev->flags & RADEON_IS_IGP)) {
0420         if (rdev->mc.igp_sideport_enabled == false)
0421             /* Useless to evict on IGP chips */
0422             return 0;
0423     }
0424     return ttm_bo_evict_mm(&rdev->mman.bdev, TTM_PL_VRAM);
0425 }
0426 
0427 void radeon_bo_force_delete(struct radeon_device *rdev)
0428 {
0429     struct radeon_bo *bo, *n;
0430 
0431     if (list_empty(&rdev->gem.objects)) {
0432         return;
0433     }
0434     dev_err(rdev->dev, "Userspace still has active objects !\n");
0435     list_for_each_entry_safe(bo, n, &rdev->gem.objects, list) {
0436         dev_err(rdev->dev, "%p %p %lu %lu force free\n",
0437             &bo->gem_base, bo, (unsigned long)bo->gem_base.size,
0438             *((unsigned long *)&bo->gem_base.refcount));
0439         mutex_lock(&bo->rdev->gem.mutex);
0440         list_del_init(&bo->list);
0441         mutex_unlock(&bo->rdev->gem.mutex);
0442         /* this should unref the ttm bo */
0443         drm_gem_object_unreference_unlocked(&bo->gem_base);
0444     }
0445 }
0446 
0447 int radeon_bo_init(struct radeon_device *rdev)
0448 {
0449     /* reserve PAT memory space to WC for VRAM */
0450     arch_io_reserve_memtype_wc(rdev->mc.aper_base,
0451                    rdev->mc.aper_size);
0452 
0453     /* Add an MTRR for the VRAM */
0454     if (!rdev->fastfb_working) {
0455         rdev->mc.vram_mtrr = arch_phys_wc_add(rdev->mc.aper_base,
0456                               rdev->mc.aper_size);
0457     }
0458     DRM_INFO("Detected VRAM RAM=%lluM, BAR=%lluM\n",
0459         rdev->mc.mc_vram_size >> 20,
0460         (unsigned long long)rdev->mc.aper_size >> 20);
0461     DRM_INFO("RAM width %dbits %cDR\n",
0462             rdev->mc.vram_width, rdev->mc.vram_is_ddr ? 'D' : 'S');
0463     return radeon_ttm_init(rdev);
0464 }
0465 
0466 void radeon_bo_fini(struct radeon_device *rdev)
0467 {
0468     radeon_ttm_fini(rdev);
0469     arch_phys_wc_del(rdev->mc.vram_mtrr);
0470     arch_io_free_memtype_wc(rdev->mc.aper_base, rdev->mc.aper_size);
0471 }
0472 
0473 /* Returns how many bytes TTM can move per IB.
0474  */
0475 static u64 radeon_bo_get_threshold_for_moves(struct radeon_device *rdev)
0476 {
0477     u64 real_vram_size = rdev->mc.real_vram_size;
0478     u64 vram_usage = atomic64_read(&rdev->vram_usage);
0479 
0480     /* This function is based on the current VRAM usage.
0481      *
0482      * - If all of VRAM is free, allow relocating the number of bytes that
0483      *   is equal to 1/4 of the size of VRAM for this IB.
0484 
0485      * - If more than one half of VRAM is occupied, only allow relocating
0486      *   1 MB of data for this IB.
0487      *
0488      * - From 0 to one half of used VRAM, the threshold decreases
0489      *   linearly.
0490      *         __________________
0491      * 1/4 of -|\               |
0492      * VRAM    | \              |
0493      *         |  \             |
0494      *         |   \            |
0495      *         |    \           |
0496      *         |     \          |
0497      *         |      \         |
0498      *         |       \________|1 MB
0499      *         |----------------|
0500      *    VRAM 0 %             100 %
0501      *         used            used
0502      *
0503      * Note: It's a threshold, not a limit. The threshold must be crossed
0504      * for buffer relocations to stop, so any buffer of an arbitrary size
0505      * can be moved as long as the threshold isn't crossed before
0506      * the relocation takes place. We don't want to disable buffer
0507      * relocations completely.
0508      *
0509      * The idea is that buffers should be placed in VRAM at creation time
0510      * and TTM should only do a minimum number of relocations during
0511      * command submission. In practice, you need to submit at least
0512      * a dozen IBs to move all buffers to VRAM if they are in GTT.
0513      *
0514      * Also, things can get pretty crazy under memory pressure and actual
0515      * VRAM usage can change a lot, so playing safe even at 50% does
0516      * consistently increase performance.
0517      */
0518 
0519     u64 half_vram = real_vram_size >> 1;
0520     u64 half_free_vram = vram_usage >= half_vram ? 0 : half_vram - vram_usage;
0521     u64 bytes_moved_threshold = half_free_vram >> 1;
0522     return max(bytes_moved_threshold, 1024*1024ull);
0523 }
0524 
0525 int radeon_bo_list_validate(struct radeon_device *rdev,
0526                 struct ww_acquire_ctx *ticket,
0527                 struct list_head *head, int ring)
0528 {
0529     struct radeon_bo_list *lobj;
0530     struct list_head duplicates;
0531     int r;
0532     u64 bytes_moved = 0, initial_bytes_moved;
0533     u64 bytes_moved_threshold = radeon_bo_get_threshold_for_moves(rdev);
0534 
0535     INIT_LIST_HEAD(&duplicates);
0536     r = ttm_eu_reserve_buffers(ticket, head, true, &duplicates);
0537     if (unlikely(r != 0)) {
0538         return r;
0539     }
0540 
0541     list_for_each_entry(lobj, head, tv.head) {
0542         struct radeon_bo *bo = lobj->robj;
0543         if (!bo->pin_count) {
0544             u32 domain = lobj->prefered_domains;
0545             u32 allowed = lobj->allowed_domains;
0546             u32 current_domain =
0547                 radeon_mem_type_to_domain(bo->tbo.mem.mem_type);
0548 
0549             /* Check if this buffer will be moved and don't move it
0550              * if we have moved too many buffers for this IB already.
0551              *
0552              * Note that this allows moving at least one buffer of
0553              * any size, because it doesn't take the current "bo"
0554              * into account. We don't want to disallow buffer moves
0555              * completely.
0556              */
0557             if ((allowed & current_domain) != 0 &&
0558                 (domain & current_domain) == 0 && /* will be moved */
0559                 bytes_moved > bytes_moved_threshold) {
0560                 /* don't move it */
0561                 domain = current_domain;
0562             }
0563 
0564         retry:
0565             radeon_ttm_placement_from_domain(bo, domain);
0566             if (ring == R600_RING_TYPE_UVD_INDEX)
0567                 radeon_uvd_force_into_uvd_segment(bo, allowed);
0568 
0569             initial_bytes_moved = atomic64_read(&rdev->num_bytes_moved);
0570             r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
0571             bytes_moved += atomic64_read(&rdev->num_bytes_moved) -
0572                        initial_bytes_moved;
0573 
0574             if (unlikely(r)) {
0575                 if (r != -ERESTARTSYS &&
0576                     domain != lobj->allowed_domains) {
0577                     domain = lobj->allowed_domains;
0578                     goto retry;
0579                 }
0580                 ttm_eu_backoff_reservation(ticket, head);
0581                 return r;
0582             }
0583         }
0584         lobj->gpu_offset = radeon_bo_gpu_offset(bo);
0585         lobj->tiling_flags = bo->tiling_flags;
0586     }
0587 
0588     list_for_each_entry(lobj, &duplicates, tv.head) {
0589         lobj->gpu_offset = radeon_bo_gpu_offset(lobj->robj);
0590         lobj->tiling_flags = lobj->robj->tiling_flags;
0591     }
0592 
0593     return 0;
0594 }
0595 
0596 int radeon_bo_get_surface_reg(struct radeon_bo *bo)
0597 {
0598     struct radeon_device *rdev = bo->rdev;
0599     struct radeon_surface_reg *reg;
0600     struct radeon_bo *old_object;
0601     int steal;
0602     int i;
0603 
0604     lockdep_assert_held(&bo->tbo.resv->lock.base);
0605 
0606     if (!bo->tiling_flags)
0607         return 0;
0608 
0609     if (bo->surface_reg >= 0) {
0610         reg = &rdev->surface_regs[bo->surface_reg];
0611         i = bo->surface_reg;
0612         goto out;
0613     }
0614 
0615     steal = -1;
0616     for (i = 0; i < RADEON_GEM_MAX_SURFACES; i++) {
0617 
0618         reg = &rdev->surface_regs[i];
0619         if (!reg->bo)
0620             break;
0621 
0622         old_object = reg->bo;
0623         if (old_object->pin_count == 0)
0624             steal = i;
0625     }
0626 
0627     /* if we are all out */
0628     if (i == RADEON_GEM_MAX_SURFACES) {
0629         if (steal == -1)
0630             return -ENOMEM;
0631         /* find someone with a surface reg and nuke their BO */
0632         reg = &rdev->surface_regs[steal];
0633         old_object = reg->bo;
0634         /* blow away the mapping */
0635         DRM_DEBUG("stealing surface reg %d from %p\n", steal, old_object);
0636         ttm_bo_unmap_virtual(&old_object->tbo);
0637         old_object->surface_reg = -1;
0638         i = steal;
0639     }
0640 
0641     bo->surface_reg = i;
0642     reg->bo = bo;
0643 
0644 out:
0645     radeon_set_surface_reg(rdev, i, bo->tiling_flags, bo->pitch,
0646                    bo->tbo.mem.start << PAGE_SHIFT,
0647                    bo->tbo.num_pages << PAGE_SHIFT);
0648     return 0;
0649 }
0650 
0651 static void radeon_bo_clear_surface_reg(struct radeon_bo *bo)
0652 {
0653     struct radeon_device *rdev = bo->rdev;
0654     struct radeon_surface_reg *reg;
0655 
0656     if (bo->surface_reg == -1)
0657         return;
0658 
0659     reg = &rdev->surface_regs[bo->surface_reg];
0660     radeon_clear_surface_reg(rdev, bo->surface_reg);
0661 
0662     reg->bo = NULL;
0663     bo->surface_reg = -1;
0664 }
0665 
0666 int radeon_bo_set_tiling_flags(struct radeon_bo *bo,
0667                 uint32_t tiling_flags, uint32_t pitch)
0668 {
0669     struct radeon_device *rdev = bo->rdev;
0670     int r;
0671 
0672     if (rdev->family >= CHIP_CEDAR) {
0673         unsigned bankw, bankh, mtaspect, tilesplit, stilesplit;
0674 
0675         bankw = (tiling_flags >> RADEON_TILING_EG_BANKW_SHIFT) & RADEON_TILING_EG_BANKW_MASK;
0676         bankh = (tiling_flags >> RADEON_TILING_EG_BANKH_SHIFT) & RADEON_TILING_EG_BANKH_MASK;
0677         mtaspect = (tiling_flags >> RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT) & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK;
0678         tilesplit = (tiling_flags >> RADEON_TILING_EG_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_TILE_SPLIT_MASK;
0679         stilesplit = (tiling_flags >> RADEON_TILING_EG_STENCIL_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_STENCIL_TILE_SPLIT_MASK;
0680         switch (bankw) {
0681         case 0:
0682         case 1:
0683         case 2:
0684         case 4:
0685         case 8:
0686             break;
0687         default:
0688             return -EINVAL;
0689         }
0690         switch (bankh) {
0691         case 0:
0692         case 1:
0693         case 2:
0694         case 4:
0695         case 8:
0696             break;
0697         default:
0698             return -EINVAL;
0699         }
0700         switch (mtaspect) {
0701         case 0:
0702         case 1:
0703         case 2:
0704         case 4:
0705         case 8:
0706             break;
0707         default:
0708             return -EINVAL;
0709         }
0710         if (tilesplit > 6) {
0711             return -EINVAL;
0712         }
0713         if (stilesplit > 6) {
0714             return -EINVAL;
0715         }
0716     }
0717     r = radeon_bo_reserve(bo, false);
0718     if (unlikely(r != 0))
0719         return r;
0720     bo->tiling_flags = tiling_flags;
0721     bo->pitch = pitch;
0722     radeon_bo_unreserve(bo);
0723     return 0;
0724 }
0725 
0726 void radeon_bo_get_tiling_flags(struct radeon_bo *bo,
0727                 uint32_t *tiling_flags,
0728                 uint32_t *pitch)
0729 {
0730     lockdep_assert_held(&bo->tbo.resv->lock.base);
0731 
0732     if (tiling_flags)
0733         *tiling_flags = bo->tiling_flags;
0734     if (pitch)
0735         *pitch = bo->pitch;
0736 }
0737 
0738 int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved,
0739                 bool force_drop)
0740 {
0741     if (!force_drop)
0742         lockdep_assert_held(&bo->tbo.resv->lock.base);
0743 
0744     if (!(bo->tiling_flags & RADEON_TILING_SURFACE))
0745         return 0;
0746 
0747     if (force_drop) {
0748         radeon_bo_clear_surface_reg(bo);
0749         return 0;
0750     }
0751 
0752     if (bo->tbo.mem.mem_type != TTM_PL_VRAM) {
0753         if (!has_moved)
0754             return 0;
0755 
0756         if (bo->surface_reg >= 0)
0757             radeon_bo_clear_surface_reg(bo);
0758         return 0;
0759     }
0760 
0761     if ((bo->surface_reg >= 0) && !has_moved)
0762         return 0;
0763 
0764     return radeon_bo_get_surface_reg(bo);
0765 }
0766 
0767 void radeon_bo_move_notify(struct ttm_buffer_object *bo,
0768                struct ttm_mem_reg *new_mem)
0769 {
0770     struct radeon_bo *rbo;
0771 
0772     if (!radeon_ttm_bo_is_radeon_bo(bo))
0773         return;
0774 
0775     rbo = container_of(bo, struct radeon_bo, tbo);
0776     radeon_bo_check_tiling(rbo, 0, 1);
0777     radeon_vm_bo_invalidate(rbo->rdev, rbo);
0778 
0779     /* update statistics */
0780     if (!new_mem)
0781         return;
0782 
0783     radeon_update_memory_usage(rbo, bo->mem.mem_type, -1);
0784     radeon_update_memory_usage(rbo, new_mem->mem_type, 1);
0785 }
0786 
0787 int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
0788 {
0789     struct radeon_device *rdev;
0790     struct radeon_bo *rbo;
0791     unsigned long offset, size, lpfn;
0792     int i, r;
0793 
0794     if (!radeon_ttm_bo_is_radeon_bo(bo))
0795         return 0;
0796     rbo = container_of(bo, struct radeon_bo, tbo);
0797     radeon_bo_check_tiling(rbo, 0, 0);
0798     rdev = rbo->rdev;
0799     if (bo->mem.mem_type != TTM_PL_VRAM)
0800         return 0;
0801 
0802     size = bo->mem.num_pages << PAGE_SHIFT;
0803     offset = bo->mem.start << PAGE_SHIFT;
0804     if ((offset + size) <= rdev->mc.visible_vram_size)
0805         return 0;
0806 
0807     /* Can't move a pinned BO to visible VRAM */
0808     if (rbo->pin_count > 0)
0809         return -EINVAL;
0810 
0811     /* hurrah the memory is not visible ! */
0812     radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM);
0813     lpfn =  rdev->mc.visible_vram_size >> PAGE_SHIFT;
0814     for (i = 0; i < rbo->placement.num_placement; i++) {
0815         /* Force into visible VRAM */
0816         if ((rbo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
0817             (!rbo->placements[i].lpfn || rbo->placements[i].lpfn > lpfn))
0818             rbo->placements[i].lpfn = lpfn;
0819     }
0820     r = ttm_bo_validate(bo, &rbo->placement, false, false);
0821     if (unlikely(r == -ENOMEM)) {
0822         radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT);
0823         return ttm_bo_validate(bo, &rbo->placement, false, false);
0824     } else if (unlikely(r != 0)) {
0825         return r;
0826     }
0827 
0828     offset = bo->mem.start << PAGE_SHIFT;
0829     /* this should never happen */
0830     if ((offset + size) > rdev->mc.visible_vram_size)
0831         return -EINVAL;
0832 
0833     return 0;
0834 }
0835 
0836 int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type, bool no_wait)
0837 {
0838     int r;
0839 
0840     r = ttm_bo_reserve(&bo->tbo, true, no_wait, NULL);
0841     if (unlikely(r != 0))
0842         return r;
0843     if (mem_type)
0844         *mem_type = bo->tbo.mem.mem_type;
0845 
0846     r = ttm_bo_wait(&bo->tbo, true, no_wait);
0847     ttm_bo_unreserve(&bo->tbo);
0848     return r;
0849 }
0850 
0851 /**
0852  * radeon_bo_fence - add fence to buffer object
0853  *
0854  * @bo: buffer object in question
0855  * @fence: fence to add
0856  * @shared: true if fence should be added shared
0857  *
0858  */
0859 void radeon_bo_fence(struct radeon_bo *bo, struct radeon_fence *fence,
0860              bool shared)
0861 {
0862     struct reservation_object *resv = bo->tbo.resv;
0863 
0864     if (shared)
0865         reservation_object_add_shared_fence(resv, &fence->base);
0866     else
0867         reservation_object_add_excl_fence(resv, &fence->base);
0868 }