drm/radeon/radeon_object.c

0001 /*
0002  * Copyright 2009 Jerome Glisse.
0003  * All Rights Reserved.
0004  *
0005  * Permission is hereby granted, free of charge, to any person obtaining a
0006  * copy of this software and associated documentation files (the
0007  * "Software"), to deal in the Software without restriction, including
0008  * without limitation the rights to use, copy, modify, merge, publish,
0009  * distribute, sub license, and/or sell copies of the Software, and to
0010  * permit persons to whom the Software is furnished to do so, subject to
0011  * the following conditions:
0012  *
0013  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
0014  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
0015  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
0016  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
0017  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
0018  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
0019  * USE OR OTHER DEALINGS IN THE SOFTWARE.
0020  *
0021  * The above copyright notice and this permission notice (including the
0022  * next paragraph) shall be included in all copies or substantial portions
0023  * of the Software.
0024  *
0025  */
0026 /*
0027  * Authors:
0028  *    Jerome Glisse <glisse@freedesktop.org>
0029  *    Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
0030  *    Dave Airlie
0031  */
0032
0033 #include <linux/io.h>
0034 #include <linux/list.h>
0035 #include <linux/slab.h>
0036
0037 #include <drm/drm_cache.h>
0038 #include <drm/drm_prime.h>
0039 #include <drm/radeon_drm.h>
0040
0041 #include "radeon.h"
0042 #include "radeon_trace.h"
0043 #include "radeon_ttm.h"
0044
0045 static void radeon_bo_clear_surface_reg(struct radeon_bo *bo);
0046
0047 /*
0048  * To exclude mutual BO access we rely on bo_reserve exclusion, as all
0049  * function are calling it.
0050  */
0051
0052 static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo)
0053 {
0054     struct radeon_bo *bo;
0055
0056     bo = container_of(tbo, struct radeon_bo, tbo);
0057
0058     mutex_lock(&bo->rdev->gem.mutex);
0059     list_del_init(&bo->list);
0060     mutex_unlock(&bo->rdev->gem.mutex);
0061     radeon_bo_clear_surface_reg(bo);
0062     WARN_ON_ONCE(!list_empty(&bo->va));
0063     if (bo->tbo.base.import_attach)
0064         drm_prime_gem_destroy(&bo->tbo.base, bo->tbo.sg);
0065     drm_gem_object_release(&bo->tbo.base);
0066     kfree(bo);
0067 }
0068
0069 bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo)
0070 {
0071     if (bo->destroy == &radeon_ttm_bo_destroy)
0072         return true;
0073     return false;
0074 }
0075
0076 void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
0077 {
0078     u32 c = 0, i;
0079
0080     rbo->placement.placement = rbo->placements;
0081     rbo->placement.busy_placement = rbo->placements;
0082     if (domain & RADEON_GEM_DOMAIN_VRAM) {
0083         /* Try placing BOs which don't need CPU access outside of the
0084          * CPU accessible part of VRAM
0085          */
0086         if ((rbo->flags & RADEON_GEM_NO_CPU_ACCESS) &&
0087             rbo->rdev->mc.visible_vram_size < rbo->rdev->mc.real_vram_size) {
0088             rbo->placements[c].fpfn =
0089                 rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
0090             rbo->placements[c].mem_type = TTM_PL_VRAM;
0091             rbo->placements[c++].flags = 0;
0092         }
0093
0094         rbo->placements[c].fpfn = 0;
0095         rbo->placements[c].mem_type = TTM_PL_VRAM;
0096         rbo->placements[c++].flags = 0;
0097     }
0098
0099     if (domain & RADEON_GEM_DOMAIN_GTT) {
0100         rbo->placements[c].fpfn = 0;
0101         rbo->placements[c].mem_type = TTM_PL_TT;
0102         rbo->placements[c++].flags = 0;
0103     }
0104
0105     if (domain & RADEON_GEM_DOMAIN_CPU) {
0106         rbo->placements[c].fpfn = 0;
0107         rbo->placements[c].mem_type = TTM_PL_SYSTEM;
0108         rbo->placements[c++].flags = 0;
0109     }
0110     if (!c) {
0111         rbo->placements[c].fpfn = 0;
0112         rbo->placements[c].mem_type = TTM_PL_SYSTEM;
0113         rbo->placements[c++].flags = 0;
0114     }
0115
0116     rbo->placement.num_placement = c;
0117     rbo->placement.num_busy_placement = c;
0118
0119     for (i = 0; i < c; ++i) {
0120         if ((rbo->flags & RADEON_GEM_CPU_ACCESS) &&
0121             (rbo->placements[i].mem_type == TTM_PL_VRAM) &&
0122             !rbo->placements[i].fpfn)
0123             rbo->placements[i].lpfn =
0124                 rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
0125         else
0126             rbo->placements[i].lpfn = 0;
0127     }
0128 }
0129
0130 int radeon_bo_create(struct radeon_device *rdev,
0131              unsigned long size, int byte_align, bool kernel,
0132              u32 domain, u32 flags, struct sg_table *sg,
0133              struct dma_resv *resv,
0134              struct radeon_bo **bo_ptr)
0135 {
0136     struct radeon_bo *bo;
0137     enum ttm_bo_type type;
0138     unsigned long page_align = roundup(byte_align, PAGE_SIZE) >> PAGE_SHIFT;
0139     int r;
0140
0141     size = ALIGN(size, PAGE_SIZE);
0142
0143     if (kernel) {
0144         type = ttm_bo_type_kernel;
0145     } else if (sg) {
0146         type = ttm_bo_type_sg;
0147     } else {
0148         type = ttm_bo_type_device;
0149     }
0150     *bo_ptr = NULL;
0151
0152     bo = kzalloc(sizeof(struct radeon_bo), GFP_KERNEL);
0153     if (bo == NULL)
0154         return -ENOMEM;
0155     drm_gem_private_object_init(rdev->ddev, &bo->tbo.base, size);
0156     bo->rdev = rdev;
0157     bo->surface_reg = -1;
0158     INIT_LIST_HEAD(&bo->list);
0159     INIT_LIST_HEAD(&bo->va);
0160     bo->initial_domain = domain & (RADEON_GEM_DOMAIN_VRAM |
0161                        RADEON_GEM_DOMAIN_GTT |
0162                        RADEON_GEM_DOMAIN_CPU);
0163
0164     bo->flags = flags;
0165     /* PCI GART is always snooped */
0166     if (!(rdev->flags & RADEON_IS_PCIE))
0167         bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
0168
0169     /* Write-combined CPU mappings of GTT cause GPU hangs with RV6xx
0170      * See https://bugs.freedesktop.org/show_bug.cgi?id=91268
0171      */
0172     if (rdev->family >= CHIP_RV610 && rdev->family <= CHIP_RV635)
0173         bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
0174
0175 #ifdef CONFIG_X86_32
0176     /* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit
0177      * See https://bugs.freedesktop.org/show_bug.cgi?id=84627
0178      */
0179     bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
0180 #elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT)
0181     /* Don't try to enable write-combining when it can't work, or things
0182      * may be slow
0183      * See https://bugs.freedesktop.org/show_bug.cgi?id=88758
0184      */
0185 #ifndef CONFIG_COMPILE_TEST
0186 #warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \
0187      thanks to write-combining
0188 #endif
0189
0190     if (bo->flags & RADEON_GEM_GTT_WC)
0191         DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for "
0192                   "better performance thanks to write-combining\n");
0193     bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
0194 #else
0195     /* For architectures that don't support WC memory,
0196      * mask out the WC flag from the BO
0197      */
0198     if (!drm_arch_can_wc_memory())
0199         bo->flags &= ~RADEON_GEM_GTT_WC;
0200 #endif
0201
0202     radeon_ttm_placement_from_domain(bo, domain);
0203     /* Kernel allocation are uninterruptible */
0204     down_read(&rdev->pm.mclk_lock);
0205     r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type,
0206             &bo->placement, page_align, !kernel, sg, resv,
0207             &radeon_ttm_bo_destroy);
0208     up_read(&rdev->pm.mclk_lock);
0209     if (unlikely(r != 0)) {
0210         return r;
0211     }
0212     *bo_ptr = bo;
0213
0214     trace_radeon_bo_create(bo);
0215
0216     return 0;
0217 }
0218
0219 int radeon_bo_kmap(struct radeon_bo *bo, void **ptr)
0220 {
0221     bool is_iomem;
0222     long r;
0223
0224     r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_KERNEL,
0225                   false, MAX_SCHEDULE_TIMEOUT);
0226     if (r < 0)
0227         return r;
0228
0229     if (bo->kptr) {
0230         if (ptr) {
0231             *ptr = bo->kptr;
0232         }
0233         return 0;
0234     }
0235     r = ttm_bo_kmap(&bo->tbo, 0, bo->tbo.resource->num_pages, &bo->kmap);
0236     if (r) {
0237         return r;
0238     }
0239     bo->kptr = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
0240     if (ptr) {
0241         *ptr = bo->kptr;
0242     }
0243     radeon_bo_check_tiling(bo, 0, 0);
0244     return 0;
0245 }
0246
0247 void radeon_bo_kunmap(struct radeon_bo *bo)
0248 {
0249     if (bo->kptr == NULL)
0250         return;
0251     bo->kptr = NULL;
0252     radeon_bo_check_tiling(bo, 0, 0);
0253     ttm_bo_kunmap(&bo->kmap);
0254 }
0255
0256 struct radeon_bo *radeon_bo_ref(struct radeon_bo *bo)
0257 {
0258     if (bo == NULL)
0259         return NULL;
0260
0261     ttm_bo_get(&bo->tbo);
0262     return bo;
0263 }
0264
0265 void radeon_bo_unref(struct radeon_bo **bo)
0266 {
0267     struct ttm_buffer_object *tbo;
0268
0269     if ((*bo) == NULL)
0270         return;
0271     tbo = &((*bo)->tbo);
0272     ttm_bo_put(tbo);
0273     *bo = NULL;
0274 }
0275
0276 int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset,
0277                  u64 *gpu_addr)
0278 {
0279     struct ttm_operation_ctx ctx = { false, false };
0280     int r, i;
0281
0282     if (radeon_ttm_tt_has_userptr(bo->rdev, bo->tbo.ttm))
0283         return -EPERM;
0284
0285     if (bo->tbo.pin_count) {
0286         ttm_bo_pin(&bo->tbo);
0287         if (gpu_addr)
0288             *gpu_addr = radeon_bo_gpu_offset(bo);
0289
0290         if (max_offset != 0) {
0291             u64 domain_start;
0292
0293             if (domain == RADEON_GEM_DOMAIN_VRAM)
0294                 domain_start = bo->rdev->mc.vram_start;
0295             else
0296                 domain_start = bo->rdev->mc.gtt_start;
0297             WARN_ON_ONCE(max_offset <
0298                      (radeon_bo_gpu_offset(bo) - domain_start));
0299         }
0300
0301         return 0;
0302     }
0303     if (bo->prime_shared_count && domain == RADEON_GEM_DOMAIN_VRAM) {
0304         /* A BO shared as a dma-buf cannot be sensibly migrated to VRAM */
0305         return -EINVAL;
0306     }
0307
0308     radeon_ttm_placement_from_domain(bo, domain);
0309     for (i = 0; i < bo->placement.num_placement; i++) {
0310         /* force to pin into visible video ram */
0311         if ((bo->placements[i].mem_type == TTM_PL_VRAM) &&
0312             !(bo->flags & RADEON_GEM_NO_CPU_ACCESS) &&
0313             (!max_offset || max_offset > bo->rdev->mc.visible_vram_size))
0314             bo->placements[i].lpfn =
0315                 bo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
0316         else
0317             bo->placements[i].lpfn = max_offset >> PAGE_SHIFT;
0318     }
0319
0320     r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
0321     if (likely(r == 0)) {
0322         ttm_bo_pin(&bo->tbo);
0323         if (gpu_addr != NULL)
0324             *gpu_addr = radeon_bo_gpu_offset(bo);
0325         if (domain == RADEON_GEM_DOMAIN_VRAM)
0326             bo->rdev->vram_pin_size += radeon_bo_size(bo);
0327         else
0328             bo->rdev->gart_pin_size += radeon_bo_size(bo);
0329     } else {
0330         dev_err(bo->rdev->dev, "%p pin failed\n", bo);
0331     }
0332     return r;
0333 }
0334
0335 int radeon_bo_pin(struct radeon_bo *bo, u32 domain, u64 *gpu_addr)
0336 {
0337     return radeon_bo_pin_restricted(bo, domain, 0, gpu_addr);
0338 }
0339
0340 void radeon_bo_unpin(struct radeon_bo *bo)
0341 {
0342     ttm_bo_unpin(&bo->tbo);
0343     if (!bo->tbo.pin_count) {
0344         if (bo->tbo.resource->mem_type == TTM_PL_VRAM)
0345             bo->rdev->vram_pin_size -= radeon_bo_size(bo);
0346         else
0347             bo->rdev->gart_pin_size -= radeon_bo_size(bo);
0348     }
0349 }
0350
0351 int radeon_bo_evict_vram(struct radeon_device *rdev)
0352 {
0353     struct ttm_device *bdev = &rdev->mman.bdev;
0354     struct ttm_resource_manager *man;
0355
0356     /* late 2.6.33 fix IGP hibernate - we need pm ops to do this correct */
0357 #ifndef CONFIG_HIBERNATION
0358     if (rdev->flags & RADEON_IS_IGP) {
0359         if (rdev->mc.igp_sideport_enabled == false)
0360             /* Useless to evict on IGP chips */
0361             return 0;
0362     }
0363 #endif
0364     man = ttm_manager_type(bdev, TTM_PL_VRAM);
0365     if (!man)
0366         return 0;
0367     return ttm_resource_manager_evict_all(bdev, man);
0368 }
0369
0370 void radeon_bo_force_delete(struct radeon_device *rdev)
0371 {
0372     struct radeon_bo *bo, *n;
0373
0374     if (list_empty(&rdev->gem.objects)) {
0375         return;
0376     }
0377     dev_err(rdev->dev, "Userspace still has active objects !\n");
0378     list_for_each_entry_safe(bo, n, &rdev->gem.objects, list) {
0379         dev_err(rdev->dev, "%p %p %lu %lu force free\n",
0380             &bo->tbo.base, bo, (unsigned long)bo->tbo.base.size,
0381             *((unsigned long *)&bo->tbo.base.refcount));
0382         mutex_lock(&bo->rdev->gem.mutex);
0383         list_del_init(&bo->list);
0384         mutex_unlock(&bo->rdev->gem.mutex);
0385         /* this should unref the ttm bo */
0386         drm_gem_object_put(&bo->tbo.base);
0387     }
0388 }
0389
0390 int radeon_bo_init(struct radeon_device *rdev)
0391 {
0392     /* reserve PAT memory space to WC for VRAM */
0393     arch_io_reserve_memtype_wc(rdev->mc.aper_base,
0394                    rdev->mc.aper_size);
0395
0396     /* Add an MTRR for the VRAM */
0397     if (!rdev->fastfb_working) {
0398         rdev->mc.vram_mtrr = arch_phys_wc_add(rdev->mc.aper_base,
0399                               rdev->mc.aper_size);
0400     }
0401     DRM_INFO("Detected VRAM RAM=%lluM, BAR=%lluM\n",
0402         rdev->mc.mc_vram_size >> 20,
0403         (unsigned long long)rdev->mc.aper_size >> 20);
0404     DRM_INFO("RAM width %dbits %cDR\n",
0405             rdev->mc.vram_width, rdev->mc.vram_is_ddr ? 'D' : 'S');
0406     return radeon_ttm_init(rdev);
0407 }
0408
0409 void radeon_bo_fini(struct radeon_device *rdev)
0410 {
0411     radeon_ttm_fini(rdev);
0412     arch_phys_wc_del(rdev->mc.vram_mtrr);
0413     arch_io_free_memtype_wc(rdev->mc.aper_base, rdev->mc.aper_size);
0414 }
0415
0416 /* Returns how many bytes TTM can move per IB.
0417  */
0418 static u64 radeon_bo_get_threshold_for_moves(struct radeon_device *rdev)
0419 {
0420     u64 real_vram_size = rdev->mc.real_vram_size;
0421     struct ttm_resource_manager *man =
0422         ttm_manager_type(&rdev->mman.bdev, TTM_PL_VRAM);
0423     u64 vram_usage = ttm_resource_manager_usage(man);
0424
0425     /* This function is based on the current VRAM usage.
0426      *
0427      * - If all of VRAM is free, allow relocating the number of bytes that
0428      *   is equal to 1/4 of the size of VRAM for this IB.
0429
0430      * - If more than one half of VRAM is occupied, only allow relocating
0431      *   1 MB of data for this IB.
0432      *
0433      * - From 0 to one half of used VRAM, the threshold decreases
0434      *   linearly.
0435      *         __________________
0436      * 1/4 of -|\               |
0437      * VRAM    | \              |
0438      *         |  \             |
0439      *         |   \            |
0440      *         |    \           |
0441      *         |     \          |
0442      *         |      \         |
0443      *         |       \________|1 MB
0444      *         |----------------|
0445      *    VRAM 0 %             100 %
0446      *         used            used
0447      *
0448      * Note: It's a threshold, not a limit. The threshold must be crossed
0449      * for buffer relocations to stop, so any buffer of an arbitrary size
0450      * can be moved as long as the threshold isn't crossed before
0451      * the relocation takes place. We don't want to disable buffer
0452      * relocations completely.
0453      *
0454      * The idea is that buffers should be placed in VRAM at creation time
0455      * and TTM should only do a minimum number of relocations during
0456      * command submission. In practice, you need to submit at least
0457      * a dozen IBs to move all buffers to VRAM if they are in GTT.
0458      *
0459      * Also, things can get pretty crazy under memory pressure and actual
0460      * VRAM usage can change a lot, so playing safe even at 50% does
0461      * consistently increase performance.
0462      */
0463
0464     u64 half_vram = real_vram_size >> 1;
0465     u64 half_free_vram = vram_usage >= half_vram ? 0 : half_vram - vram_usage;
0466     u64 bytes_moved_threshold = half_free_vram >> 1;
0467     return max(bytes_moved_threshold, 1024*1024ull);
0468 }
0469
0470 int radeon_bo_list_validate(struct radeon_device *rdev,
0471                 struct ww_acquire_ctx *ticket,
0472                 struct list_head *head, int ring)
0473 {
0474     struct ttm_operation_ctx ctx = { true, false };
0475     struct radeon_bo_list *lobj;
0476     struct list_head duplicates;
0477     int r;
0478     u64 bytes_moved = 0, initial_bytes_moved;
0479     u64 bytes_moved_threshold = radeon_bo_get_threshold_for_moves(rdev);
0480
0481     INIT_LIST_HEAD(&duplicates);
0482     r = ttm_eu_reserve_buffers(ticket, head, true, &duplicates);
0483     if (unlikely(r != 0)) {
0484         return r;
0485     }
0486
0487     list_for_each_entry(lobj, head, tv.head) {
0488         struct radeon_bo *bo = lobj->robj;
0489         if (!bo->tbo.pin_count) {
0490             u32 domain = lobj->preferred_domains;
0491             u32 allowed = lobj->allowed_domains;
0492             u32 current_domain =
0493                 radeon_mem_type_to_domain(bo->tbo.resource->mem_type);
0494
0495             /* Check if this buffer will be moved and don't move it
0496              * if we have moved too many buffers for this IB already.
0497              *
0498              * Note that this allows moving at least one buffer of
0499              * any size, because it doesn't take the current "bo"
0500              * into account. We don't want to disallow buffer moves
0501              * completely.
0502              */
0503             if ((allowed & current_domain) != 0 &&
0504                 (domain & current_domain) == 0 && /* will be moved */
0505                 bytes_moved > bytes_moved_threshold) {
0506                 /* don't move it */
0507                 domain = current_domain;
0508             }
0509
0510         retry:
0511             radeon_ttm_placement_from_domain(bo, domain);
0512             if (ring == R600_RING_TYPE_UVD_INDEX)
0513                 radeon_uvd_force_into_uvd_segment(bo, allowed);
0514
0515             initial_bytes_moved = atomic64_read(&rdev->num_bytes_moved);
0516             r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
0517             bytes_moved += atomic64_read(&rdev->num_bytes_moved) -
0518                        initial_bytes_moved;
0519
0520             if (unlikely(r)) {
0521                 if (r != -ERESTARTSYS &&
0522                     domain != lobj->allowed_domains) {
0523                     domain = lobj->allowed_domains;
0524                     goto retry;
0525                 }
0526                 ttm_eu_backoff_reservation(ticket, head);
0527                 return r;
0528             }
0529         }
0530         lobj->gpu_offset = radeon_bo_gpu_offset(bo);
0531         lobj->tiling_flags = bo->tiling_flags;
0532     }
0533
0534     list_for_each_entry(lobj, &duplicates, tv.head) {
0535         lobj->gpu_offset = radeon_bo_gpu_offset(lobj->robj);
0536         lobj->tiling_flags = lobj->robj->tiling_flags;
0537     }
0538
0539     return 0;
0540 }
0541
0542 int radeon_bo_get_surface_reg(struct radeon_bo *bo)
0543 {
0544     struct radeon_device *rdev = bo->rdev;
0545     struct radeon_surface_reg *reg;
0546     struct radeon_bo *old_object;
0547     int steal;
0548     int i;
0549
0550     dma_resv_assert_held(bo->tbo.base.resv);
0551
0552     if (!bo->tiling_flags)
0553         return 0;
0554
0555     if (bo->surface_reg >= 0) {
0556         i = bo->surface_reg;
0557         goto out;
0558     }
0559
0560     steal = -1;
0561     for (i = 0; i < RADEON_GEM_MAX_SURFACES; i++) {
0562
0563         reg = &rdev->surface_regs[i];
0564         if (!reg->bo)
0565             break;
0566
0567         old_object = reg->bo;
0568         if (old_object->tbo.pin_count == 0)
0569             steal = i;
0570     }
0571
0572     /* if we are all out */
0573     if (i == RADEON_GEM_MAX_SURFACES) {
0574         if (steal == -1)
0575             return -ENOMEM;
0576         /* find someone with a surface reg and nuke their BO */
0577         reg = &rdev->surface_regs[steal];
0578         old_object = reg->bo;
0579         /* blow away the mapping */
0580         DRM_DEBUG("stealing surface reg %d from %p\n", steal, old_object);
0581         ttm_bo_unmap_virtual(&old_object->tbo);
0582         old_object->surface_reg = -1;
0583         i = steal;
0584     }
0585
0586     bo->surface_reg = i;
0587     reg->bo = bo;
0588
0589 out:
0590     radeon_set_surface_reg(rdev, i, bo->tiling_flags, bo->pitch,
0591                    bo->tbo.resource->start << PAGE_SHIFT,
0592                    bo->tbo.base.size);
0593     return 0;
0594 }
0595
0596 static void radeon_bo_clear_surface_reg(struct radeon_bo *bo)
0597 {
0598     struct radeon_device *rdev = bo->rdev;
0599     struct radeon_surface_reg *reg;
0600
0601     if (bo->surface_reg == -1)
0602         return;
0603
0604     reg = &rdev->surface_regs[bo->surface_reg];
0605     radeon_clear_surface_reg(rdev, bo->surface_reg);
0606
0607     reg->bo = NULL;
0608     bo->surface_reg = -1;
0609 }
0610
0611 int radeon_bo_set_tiling_flags(struct radeon_bo *bo,
0612                 uint32_t tiling_flags, uint32_t pitch)
0613 {
0614     struct radeon_device *rdev = bo->rdev;
0615     int r;
0616
0617     if (rdev->family >= CHIP_CEDAR) {
0618         unsigned bankw, bankh, mtaspect, tilesplit, stilesplit;
0619
0620         bankw = (tiling_flags >> RADEON_TILING_EG_BANKW_SHIFT) & RADEON_TILING_EG_BANKW_MASK;
0621         bankh = (tiling_flags >> RADEON_TILING_EG_BANKH_SHIFT) & RADEON_TILING_EG_BANKH_MASK;
0622         mtaspect = (tiling_flags >> RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT) & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK;
0623         tilesplit = (tiling_flags >> RADEON_TILING_EG_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_TILE_SPLIT_MASK;
0624         stilesplit = (tiling_flags >> RADEON_TILING_EG_STENCIL_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_STENCIL_TILE_SPLIT_MASK;
0625         switch (bankw) {
0626         case 0:
0627         case 1:
0628         case 2:
0629         case 4:
0630         case 8:
0631             break;
0632         default:
0633             return -EINVAL;
0634         }
0635         switch (bankh) {
0636         case 0:
0637         case 1:
0638         case 2:
0639         case 4:
0640         case 8:
0641             break;
0642         default:
0643             return -EINVAL;
0644         }
0645         switch (mtaspect) {
0646         case 0:
0647         case 1:
0648         case 2:
0649         case 4:
0650         case 8:
0651             break;
0652         default:
0653             return -EINVAL;
0654         }
0655         if (tilesplit > 6) {
0656             return -EINVAL;
0657         }
0658         if (stilesplit > 6) {
0659             return -EINVAL;
0660         }
0661     }
0662     r = radeon_bo_reserve(bo, false);
0663     if (unlikely(r != 0))
0664         return r;
0665     bo->tiling_flags = tiling_flags;
0666     bo->pitch = pitch;
0667     radeon_bo_unreserve(bo);
0668     return 0;
0669 }
0670
0671 void radeon_bo_get_tiling_flags(struct radeon_bo *bo,
0672                 uint32_t *tiling_flags,
0673                 uint32_t *pitch)
0674 {
0675     dma_resv_assert_held(bo->tbo.base.resv);
0676
0677     if (tiling_flags)
0678         *tiling_flags = bo->tiling_flags;
0679     if (pitch)
0680         *pitch = bo->pitch;
0681 }
0682
0683 int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved,
0684                 bool force_drop)
0685 {
0686     if (!force_drop)
0687         dma_resv_assert_held(bo->tbo.base.resv);
0688
0689     if (!(bo->tiling_flags & RADEON_TILING_SURFACE))
0690         return 0;
0691
0692     if (force_drop) {
0693         radeon_bo_clear_surface_reg(bo);
0694         return 0;
0695     }
0696
0697     if (bo->tbo.resource->mem_type != TTM_PL_VRAM) {
0698         if (!has_moved)
0699             return 0;
0700
0701         if (bo->surface_reg >= 0)
0702             radeon_bo_clear_surface_reg(bo);
0703         return 0;
0704     }
0705
0706     if ((bo->surface_reg >= 0) && !has_moved)
0707         return 0;
0708
0709     return radeon_bo_get_surface_reg(bo);
0710 }
0711
0712 void radeon_bo_move_notify(struct ttm_buffer_object *bo)
0713 {
0714     struct radeon_bo *rbo;
0715
0716     if (!radeon_ttm_bo_is_radeon_bo(bo))
0717         return;
0718
0719     rbo = container_of(bo, struct radeon_bo, tbo);
0720     radeon_bo_check_tiling(rbo, 0, 1);
0721     radeon_vm_bo_invalidate(rbo->rdev, rbo);
0722 }
0723
0724 vm_fault_t radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
0725 {
0726     struct ttm_operation_ctx ctx = { false, false };
0727     struct radeon_device *rdev;
0728     struct radeon_bo *rbo;
0729     unsigned long offset, size, lpfn;
0730     int i, r;
0731
0732     if (!radeon_ttm_bo_is_radeon_bo(bo))
0733         return 0;
0734     rbo = container_of(bo, struct radeon_bo, tbo);
0735     radeon_bo_check_tiling(rbo, 0, 0);
0736     rdev = rbo->rdev;
0737     if (bo->resource->mem_type != TTM_PL_VRAM)
0738         return 0;
0739
0740     size = bo->resource->num_pages << PAGE_SHIFT;
0741     offset = bo->resource->start << PAGE_SHIFT;
0742     if ((offset + size) <= rdev->mc.visible_vram_size)
0743         return 0;
0744
0745     /* Can't move a pinned BO to visible VRAM */
0746     if (rbo->tbo.pin_count > 0)
0747         return VM_FAULT_SIGBUS;
0748
0749     /* hurrah the memory is not visible ! */
0750     radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM);
0751     lpfn =  rdev->mc.visible_vram_size >> PAGE_SHIFT;
0752     for (i = 0; i < rbo->placement.num_placement; i++) {
0753         /* Force into visible VRAM */
0754         if ((rbo->placements[i].mem_type == TTM_PL_VRAM) &&
0755             (!rbo->placements[i].lpfn || rbo->placements[i].lpfn > lpfn))
0756             rbo->placements[i].lpfn = lpfn;
0757     }
0758     r = ttm_bo_validate(bo, &rbo->placement, &ctx);
0759     if (unlikely(r == -ENOMEM)) {
0760         radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT);
0761         r = ttm_bo_validate(bo, &rbo->placement, &ctx);
0762     } else if (likely(!r)) {
0763         offset = bo->resource->start << PAGE_SHIFT;
0764         /* this should never happen */
0765         if ((offset + size) > rdev->mc.visible_vram_size)
0766             return VM_FAULT_SIGBUS;
0767     }
0768
0769     if (unlikely(r == -EBUSY || r == -ERESTARTSYS))
0770         return VM_FAULT_NOPAGE;
0771     else if (unlikely(r))
0772         return VM_FAULT_SIGBUS;
0773
0774     ttm_bo_move_to_lru_tail_unlocked(bo);
0775     return 0;
0776 }
0777
0778 /**
0779  * radeon_bo_fence - add fence to buffer object
0780  *
0781  * @bo: buffer object in question
0782  * @fence: fence to add
0783  * @shared: true if fence should be added shared
0784  *
0785  */
0786 void radeon_bo_fence(struct radeon_bo *bo, struct radeon_fence *fence,
0787              bool shared)
0788 {
0789     struct dma_resv *resv = bo->tbo.base.resv;
0790     int r;
0791
0792     r = dma_resv_reserve_fences(resv, 1);
0793     if (r) {
0794         /* As last resort on OOM we block for the fence */
0795         dma_fence_wait(&fence->base, false);
0796         return;
0797     }
0798
0799     dma_resv_add_fence(resv, &fence->base, shared ?
0800                DMA_RESV_USAGE_READ : DMA_RESV_USAGE_WRITE);
0801 }