amd/amdgpu/amdgpu_gmc.c

0001 /*
0002  * Copyright 2018 Advanced Micro Devices, Inc.
0003  * All Rights Reserved.
0004  *
0005  * Permission is hereby granted, free of charge, to any person obtaining a
0006  * copy of this software and associated documentation files (the
0007  * "Software"), to deal in the Software without restriction, including
0008  * without limitation the rights to use, copy, modify, merge, publish,
0009  * distribute, sub license, and/or sell copies of the Software, and to
0010  * permit persons to whom the Software is furnished to do so, subject to
0011  * the following conditions:
0012  *
0013  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
0014  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
0015  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
0016  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
0017  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
0018  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
0019  * USE OR OTHER DEALINGS IN THE SOFTWARE.
0020  *
0021  * The above copyright notice and this permission notice (including the
0022  * next paragraph) shall be included in all copies or substantial portions
0023  * of the Software.
0024  *
0025  */
0026
0027 #include <linux/io-64-nonatomic-lo-hi.h>
0028 #ifdef CONFIG_X86
0029 #include <asm/hypervisor.h>
0030 #endif
0031
0032 #include "amdgpu.h"
0033 #include "amdgpu_gmc.h"
0034 #include "amdgpu_ras.h"
0035 #include "amdgpu_xgmi.h"
0036
0037 #include <drm/drm_drv.h>
0038
0039 /**
0040  * amdgpu_gmc_pdb0_alloc - allocate vram for pdb0
0041  *
0042  * @adev: amdgpu_device pointer
0043  *
0044  * Allocate video memory for pdb0 and map it for CPU access
0045  * Returns 0 for success, error for failure.
0046  */
0047 int amdgpu_gmc_pdb0_alloc(struct amdgpu_device *adev)
0048 {
0049     int r;
0050     struct amdgpu_bo_param bp;
0051     u64 vram_size = adev->gmc.xgmi.node_segment_size * adev->gmc.xgmi.num_physical_nodes;
0052     uint32_t pde0_page_shift = adev->gmc.vmid0_page_table_block_size + 21;
0053     uint32_t npdes = (vram_size + (1ULL << pde0_page_shift) -1) >> pde0_page_shift;
0054
0055     memset(&bp, 0, sizeof(bp));
0056     bp.size = PAGE_ALIGN((npdes + 1) * 8);
0057     bp.byte_align = PAGE_SIZE;
0058     bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
0059     bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
0060         AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
0061     bp.type = ttm_bo_type_kernel;
0062     bp.resv = NULL;
0063     bp.bo_ptr_size = sizeof(struct amdgpu_bo);
0064
0065     r = amdgpu_bo_create(adev, &bp, &adev->gmc.pdb0_bo);
0066     if (r)
0067         return r;
0068
0069     r = amdgpu_bo_reserve(adev->gmc.pdb0_bo, false);
0070     if (unlikely(r != 0))
0071         goto bo_reserve_failure;
0072
0073     r = amdgpu_bo_pin(adev->gmc.pdb0_bo, AMDGPU_GEM_DOMAIN_VRAM);
0074     if (r)
0075         goto bo_pin_failure;
0076     r = amdgpu_bo_kmap(adev->gmc.pdb0_bo, &adev->gmc.ptr_pdb0);
0077     if (r)
0078         goto bo_kmap_failure;
0079
0080     amdgpu_bo_unreserve(adev->gmc.pdb0_bo);
0081     return 0;
0082
0083 bo_kmap_failure:
0084     amdgpu_bo_unpin(adev->gmc.pdb0_bo);
0085 bo_pin_failure:
0086     amdgpu_bo_unreserve(adev->gmc.pdb0_bo);
0087 bo_reserve_failure:
0088     amdgpu_bo_unref(&adev->gmc.pdb0_bo);
0089     return r;
0090 }
0091
0092 /**
0093  * amdgpu_gmc_get_pde_for_bo - get the PDE for a BO
0094  *
0095  * @bo: the BO to get the PDE for
0096  * @level: the level in the PD hirarchy
0097  * @addr: resulting addr
0098  * @flags: resulting flags
0099  *
0100  * Get the address and flags to be used for a PDE (Page Directory Entry).
0101  */
0102 void amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo *bo, int level,
0103                    uint64_t *addr, uint64_t *flags)
0104 {
0105     struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
0106
0107     switch (bo->tbo.resource->mem_type) {
0108     case TTM_PL_TT:
0109         *addr = bo->tbo.ttm->dma_address[0];
0110         break;
0111     case TTM_PL_VRAM:
0112         *addr = amdgpu_bo_gpu_offset(bo);
0113         break;
0114     default:
0115         *addr = 0;
0116         break;
0117     }
0118     *flags = amdgpu_ttm_tt_pde_flags(bo->tbo.ttm, bo->tbo.resource);
0119     amdgpu_gmc_get_vm_pde(adev, level, addr, flags);
0120 }
0121
0122 /*
0123  * amdgpu_gmc_pd_addr - return the address of the root directory
0124  */
0125 uint64_t amdgpu_gmc_pd_addr(struct amdgpu_bo *bo)
0126 {
0127     struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
0128     uint64_t pd_addr;
0129
0130     /* TODO: move that into ASIC specific code */
0131     if (adev->asic_type >= CHIP_VEGA10) {
0132         uint64_t flags = AMDGPU_PTE_VALID;
0133
0134         amdgpu_gmc_get_pde_for_bo(bo, -1, &pd_addr, &flags);
0135         pd_addr |= flags;
0136     } else {
0137         pd_addr = amdgpu_bo_gpu_offset(bo);
0138     }
0139     return pd_addr;
0140 }
0141
0142 /**
0143  * amdgpu_gmc_set_pte_pde - update the page tables using CPU
0144  *
0145  * @adev: amdgpu_device pointer
0146  * @cpu_pt_addr: cpu address of the page table
0147  * @gpu_page_idx: entry in the page table to update
0148  * @addr: dst addr to write into pte/pde
0149  * @flags: access flags
0150  *
0151  * Update the page tables using CPU.
0152  */
0153 int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
0154                 uint32_t gpu_page_idx, uint64_t addr,
0155                 uint64_t flags)
0156 {
0157     void __iomem *ptr = (void *)cpu_pt_addr;
0158     uint64_t value;
0159
0160     /*
0161      * The following is for PTE only. GART does not have PDEs.
0162     */
0163     value = addr & 0x0000FFFFFFFFF000ULL;
0164     value |= flags;
0165     writeq(value, ptr + (gpu_page_idx * 8));
0166
0167     return 0;
0168 }
0169
0170 /**
0171  * amdgpu_gmc_agp_addr - return the address in the AGP address space
0172  *
0173  * @bo: TTM BO which needs the address, must be in GTT domain
0174  *
0175  * Tries to figure out how to access the BO through the AGP aperture. Returns
0176  * AMDGPU_BO_INVALID_OFFSET if that is not possible.
0177  */
0178 uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo)
0179 {
0180     struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
0181
0182     if (bo->ttm->num_pages != 1 || bo->ttm->caching == ttm_cached)
0183         return AMDGPU_BO_INVALID_OFFSET;
0184
0185     if (bo->ttm->dma_address[0] + PAGE_SIZE >= adev->gmc.agp_size)
0186         return AMDGPU_BO_INVALID_OFFSET;
0187
0188     return adev->gmc.agp_start + bo->ttm->dma_address[0];
0189 }
0190
0191 /**
0192  * amdgpu_gmc_vram_location - try to find VRAM location
0193  *
0194  * @adev: amdgpu device structure holding all necessary information
0195  * @mc: memory controller structure holding memory information
0196  * @base: base address at which to put VRAM
0197  *
0198  * Function will try to place VRAM at base address provided
0199  * as parameter.
0200  */
0201 void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,
0202                   u64 base)
0203 {
0204     uint64_t limit = (uint64_t)amdgpu_vram_limit << 20;
0205
0206     mc->vram_start = base;
0207     mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
0208     if (limit && limit < mc->real_vram_size)
0209         mc->real_vram_size = limit;
0210
0211     if (mc->xgmi.num_physical_nodes == 0) {
0212         mc->fb_start = mc->vram_start;
0213         mc->fb_end = mc->vram_end;
0214     }
0215     dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
0216             mc->mc_vram_size >> 20, mc->vram_start,
0217             mc->vram_end, mc->real_vram_size >> 20);
0218 }
0219
0220 /** amdgpu_gmc_sysvm_location - place vram and gart in sysvm aperture
0221  *
0222  * @adev: amdgpu device structure holding all necessary information
0223  * @mc: memory controller structure holding memory information
0224  *
0225  * This function is only used if use GART for FB translation. In such
0226  * case, we use sysvm aperture (vmid0 page tables) for both vram
0227  * and gart (aka system memory) access.
0228  *
0229  * GPUVM (and our organization of vmid0 page tables) require sysvm
0230  * aperture to be placed at a location aligned with 8 times of native
0231  * page size. For example, if vm_context0_cntl.page_table_block_size
0232  * is 12, then native page size is 8G (2M*2^12), sysvm should start
0233  * with a 64G aligned address. For simplicity, we just put sysvm at
0234  * address 0. So vram start at address 0 and gart is right after vram.
0235  */
0236 void amdgpu_gmc_sysvm_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
0237 {
0238     u64 hive_vram_start = 0;
0239     u64 hive_vram_end = mc->xgmi.node_segment_size * mc->xgmi.num_physical_nodes - 1;
0240     mc->vram_start = mc->xgmi.node_segment_size * mc->xgmi.physical_node_id;
0241     mc->vram_end = mc->vram_start + mc->xgmi.node_segment_size - 1;
0242     mc->gart_start = hive_vram_end + 1;
0243     mc->gart_end = mc->gart_start + mc->gart_size - 1;
0244     mc->fb_start = hive_vram_start;
0245     mc->fb_end = hive_vram_end;
0246     dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
0247             mc->mc_vram_size >> 20, mc->vram_start,
0248             mc->vram_end, mc->real_vram_size >> 20);
0249     dev_info(adev->dev, "GART: %lluM 0x%016llX - 0x%016llX\n",
0250             mc->gart_size >> 20, mc->gart_start, mc->gart_end);
0251 }
0252
0253 /**
0254  * amdgpu_gmc_gart_location - try to find GART location
0255  *
0256  * @adev: amdgpu device structure holding all necessary information
0257  * @mc: memory controller structure holding memory information
0258  *
0259  * Function will place try to place GART before or after VRAM.
0260  * If GART size is bigger than space left then we ajust GART size.
0261  * Thus function will never fails.
0262  */
0263 void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
0264 {
0265     const uint64_t four_gb = 0x100000000ULL;
0266     u64 size_af, size_bf;
0267     /*To avoid the hole, limit the max mc address to AMDGPU_GMC_HOLE_START*/
0268     u64 max_mc_address = min(adev->gmc.mc_mask, AMDGPU_GMC_HOLE_START - 1);
0269
0270     /* VCE doesn't like it when BOs cross a 4GB segment, so align
0271      * the GART base on a 4GB boundary as well.
0272      */
0273     size_bf = mc->fb_start;
0274     size_af = max_mc_address + 1 - ALIGN(mc->fb_end + 1, four_gb);
0275
0276     if (mc->gart_size > max(size_bf, size_af)) {
0277         dev_warn(adev->dev, "limiting GART\n");
0278         mc->gart_size = max(size_bf, size_af);
0279     }
0280
0281     if ((size_bf >= mc->gart_size && size_bf < size_af) ||
0282         (size_af < mc->gart_size))
0283         mc->gart_start = 0;
0284     else
0285         mc->gart_start = max_mc_address - mc->gart_size + 1;
0286
0287     mc->gart_start &= ~(four_gb - 1);
0288     mc->gart_end = mc->gart_start + mc->gart_size - 1;
0289     dev_info(adev->dev, "GART: %lluM 0x%016llX - 0x%016llX\n",
0290             mc->gart_size >> 20, mc->gart_start, mc->gart_end);
0291 }
0292
0293 /**
0294  * amdgpu_gmc_agp_location - try to find AGP location
0295  * @adev: amdgpu device structure holding all necessary information
0296  * @mc: memory controller structure holding memory information
0297  *
0298  * Function will place try to find a place for the AGP BAR in the MC address
0299  * space.
0300  *
0301  * AGP BAR will be assigned the largest available hole in the address space.
0302  * Should be called after VRAM and GART locations are setup.
0303  */
0304 void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
0305 {
0306     const uint64_t sixteen_gb = 1ULL << 34;
0307     const uint64_t sixteen_gb_mask = ~(sixteen_gb - 1);
0308     u64 size_af, size_bf;
0309
0310     if (amdgpu_sriov_vf(adev)) {
0311         mc->agp_start = 0xffffffffffff;
0312         mc->agp_end = 0x0;
0313         mc->agp_size = 0;
0314
0315         return;
0316     }
0317
0318     if (mc->fb_start > mc->gart_start) {
0319         size_bf = (mc->fb_start & sixteen_gb_mask) -
0320             ALIGN(mc->gart_end + 1, sixteen_gb);
0321         size_af = mc->mc_mask + 1 - ALIGN(mc->fb_end + 1, sixteen_gb);
0322     } else {
0323         size_bf = mc->fb_start & sixteen_gb_mask;
0324         size_af = (mc->gart_start & sixteen_gb_mask) -
0325             ALIGN(mc->fb_end + 1, sixteen_gb);
0326     }
0327
0328     if (size_bf > size_af) {
0329         mc->agp_start = (mc->fb_start - size_bf) & sixteen_gb_mask;
0330         mc->agp_size = size_bf;
0331     } else {
0332         mc->agp_start = ALIGN(mc->fb_end + 1, sixteen_gb);
0333         mc->agp_size = size_af;
0334     }
0335
0336     mc->agp_end = mc->agp_start + mc->agp_size - 1;
0337     dev_info(adev->dev, "AGP: %lluM 0x%016llX - 0x%016llX\n",
0338             mc->agp_size >> 20, mc->agp_start, mc->agp_end);
0339 }
0340
0341 /**
0342  * amdgpu_gmc_fault_key - get hask key from vm fault address and pasid
0343  *
0344  * @addr: 48 bit physical address, page aligned (36 significant bits)
0345  * @pasid: 16 bit process address space identifier
0346  */
0347 static inline uint64_t amdgpu_gmc_fault_key(uint64_t addr, uint16_t pasid)
0348 {
0349     return addr << 4 | pasid;
0350 }
0351
0352 /**
0353  * amdgpu_gmc_filter_faults - filter VM faults
0354  *
0355  * @adev: amdgpu device structure
0356  * @ih: interrupt ring that the fault received from
0357  * @addr: address of the VM fault
0358  * @pasid: PASID of the process causing the fault
0359  * @timestamp: timestamp of the fault
0360  *
0361  * Returns:
0362  * True if the fault was filtered and should not be processed further.
0363  * False if the fault is a new one and needs to be handled.
0364  */
0365 bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev,
0366                   struct amdgpu_ih_ring *ih, uint64_t addr,
0367                   uint16_t pasid, uint64_t timestamp)
0368 {
0369     struct amdgpu_gmc *gmc = &adev->gmc;
0370     uint64_t stamp, key = amdgpu_gmc_fault_key(addr, pasid);
0371     struct amdgpu_gmc_fault *fault;
0372     uint32_t hash;
0373
0374     /* Stale retry fault if timestamp goes backward */
0375     if (amdgpu_ih_ts_after(timestamp, ih->processed_timestamp))
0376         return true;
0377
0378     /* If we don't have space left in the ring buffer return immediately */
0379     stamp = max(timestamp, AMDGPU_GMC_FAULT_TIMEOUT + 1) -
0380         AMDGPU_GMC_FAULT_TIMEOUT;
0381     if (gmc->fault_ring[gmc->last_fault].timestamp >= stamp)
0382         return true;
0383
0384     /* Try to find the fault in the hash */
0385     hash = hash_64(key, AMDGPU_GMC_FAULT_HASH_ORDER);
0386     fault = &gmc->fault_ring[gmc->fault_hash[hash].idx];
0387     while (fault->timestamp >= stamp) {
0388         uint64_t tmp;
0389
0390         if (atomic64_read(&fault->key) == key)
0391             return true;
0392
0393         tmp = fault->timestamp;
0394         fault = &gmc->fault_ring[fault->next];
0395
0396         /* Check if the entry was reused */
0397         if (fault->timestamp >= tmp)
0398             break;
0399     }
0400
0401     /* Add the fault to the ring */
0402     fault = &gmc->fault_ring[gmc->last_fault];
0403     atomic64_set(&fault->key, key);
0404     fault->timestamp = timestamp;
0405
0406     /* And update the hash */
0407     fault->next = gmc->fault_hash[hash].idx;
0408     gmc->fault_hash[hash].idx = gmc->last_fault++;
0409     return false;
0410 }
0411
0412 /**
0413  * amdgpu_gmc_filter_faults_remove - remove address from VM faults filter
0414  *
0415  * @adev: amdgpu device structure
0416  * @addr: address of the VM fault
0417  * @pasid: PASID of the process causing the fault
0418  *
0419  * Remove the address from fault filter, then future vm fault on this address
0420  * will pass to retry fault handler to recover.
0421  */
0422 void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr,
0423                      uint16_t pasid)
0424 {
0425     struct amdgpu_gmc *gmc = &adev->gmc;
0426     uint64_t key = amdgpu_gmc_fault_key(addr, pasid);
0427     struct amdgpu_gmc_fault *fault;
0428     uint32_t hash;
0429     uint64_t tmp;
0430
0431     hash = hash_64(key, AMDGPU_GMC_FAULT_HASH_ORDER);
0432     fault = &gmc->fault_ring[gmc->fault_hash[hash].idx];
0433     do {
0434         if (atomic64_cmpxchg(&fault->key, key, 0) == key)
0435             break;
0436
0437         tmp = fault->timestamp;
0438         fault = &gmc->fault_ring[fault->next];
0439     } while (fault->timestamp < tmp);
0440 }
0441
0442 int amdgpu_gmc_ras_early_init(struct amdgpu_device *adev)
0443 {
0444     if (!adev->gmc.xgmi.connected_to_cpu) {
0445         adev->gmc.xgmi.ras = &xgmi_ras;
0446         amdgpu_ras_register_ras_block(adev, &adev->gmc.xgmi.ras->ras_block);
0447         adev->gmc.xgmi.ras_if = &adev->gmc.xgmi.ras->ras_block.ras_comm;
0448     }
0449
0450     return 0;
0451 }
0452
0453 int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
0454 {
0455     return 0;
0456 }
0457
0458 void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
0459 {
0460
0461 }
0462
0463     /*
0464      * The latest engine allocation on gfx9/10 is:
0465      * Engine 2, 3: firmware
0466      * Engine 0, 1, 4~16: amdgpu ring,
0467      *                    subject to change when ring number changes
0468      * Engine 17: Gart flushes
0469      */
0470 #define GFXHUB_FREE_VM_INV_ENGS_BITMAP      0x1FFF3
0471 #define MMHUB_FREE_VM_INV_ENGS_BITMAP       0x1FFF3
0472
0473 int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
0474 {
0475     struct amdgpu_ring *ring;
0476     unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] =
0477         {GFXHUB_FREE_VM_INV_ENGS_BITMAP, MMHUB_FREE_VM_INV_ENGS_BITMAP,
0478         GFXHUB_FREE_VM_INV_ENGS_BITMAP};
0479     unsigned i;
0480     unsigned vmhub, inv_eng;
0481
0482     for (i = 0; i < adev->num_rings; ++i) {
0483         ring = adev->rings[i];
0484         vmhub = ring->funcs->vmhub;
0485
0486         if (ring == &adev->mes.ring)
0487             continue;
0488
0489         inv_eng = ffs(vm_inv_engs[vmhub]);
0490         if (!inv_eng) {
0491             dev_err(adev->dev, "no VM inv eng for ring %s\n",
0492                 ring->name);
0493             return -EINVAL;
0494         }
0495
0496         ring->vm_inv_eng = inv_eng - 1;
0497         vm_inv_engs[vmhub] &= ~(1 << ring->vm_inv_eng);
0498
0499         dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n",
0500              ring->name, ring->vm_inv_eng, ring->funcs->vmhub);
0501     }
0502
0503     return 0;
0504 }
0505
0506 /**
0507  * amdgpu_gmc_tmz_set -- check and set if a device supports TMZ
0508  * @adev: amdgpu_device pointer
0509  *
0510  * Check and set if an the device @adev supports Trusted Memory
0511  * Zones (TMZ).
0512  */
0513 void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)
0514 {
0515     switch (adev->ip_versions[GC_HWIP][0]) {
0516     /* RAVEN */
0517     case IP_VERSION(9, 2, 2):
0518     case IP_VERSION(9, 1, 0):
0519     /* RENOIR looks like RAVEN */
0520     case IP_VERSION(9, 3, 0):
0521     /* GC 10.3.7 */
0522     case IP_VERSION(10, 3, 7):
0523         if (amdgpu_tmz == 0) {
0524             adev->gmc.tmz_enabled = false;
0525             dev_info(adev->dev,
0526                  "Trusted Memory Zone (TMZ) feature disabled (cmd line)\n");
0527         } else {
0528             adev->gmc.tmz_enabled = true;
0529             dev_info(adev->dev,
0530                  "Trusted Memory Zone (TMZ) feature enabled\n");
0531         }
0532         break;
0533     case IP_VERSION(10, 1, 10):
0534     case IP_VERSION(10, 1, 1):
0535     case IP_VERSION(10, 1, 2):
0536     case IP_VERSION(10, 1, 3):
0537     case IP_VERSION(10, 3, 0):
0538     case IP_VERSION(10, 3, 2):
0539     case IP_VERSION(10, 3, 4):
0540     case IP_VERSION(10, 3, 5):
0541     /* VANGOGH */
0542     case IP_VERSION(10, 3, 1):
0543     /* YELLOW_CARP*/
0544     case IP_VERSION(10, 3, 3):
0545         /* Don't enable it by default yet.
0546          */
0547         if (amdgpu_tmz < 1) {
0548             adev->gmc.tmz_enabled = false;
0549             dev_info(adev->dev,
0550                  "Trusted Memory Zone (TMZ) feature disabled as experimental (default)\n");
0551         } else {
0552             adev->gmc.tmz_enabled = true;
0553             dev_info(adev->dev,
0554                  "Trusted Memory Zone (TMZ) feature enabled as experimental (cmd line)\n");
0555         }
0556         break;
0557     default:
0558         adev->gmc.tmz_enabled = false;
0559         dev_info(adev->dev,
0560              "Trusted Memory Zone (TMZ) feature not supported\n");
0561         break;
0562     }
0563 }
0564
0565 /**
0566  * amdgpu_gmc_noretry_set -- set per asic noretry defaults
0567  * @adev: amdgpu_device pointer
0568  *
0569  * Set a per asic default for the no-retry parameter.
0570  *
0571  */
0572 void amdgpu_gmc_noretry_set(struct amdgpu_device *adev)
0573 {
0574     struct amdgpu_gmc *gmc = &adev->gmc;
0575
0576     switch (adev->ip_versions[GC_HWIP][0]) {
0577     case IP_VERSION(9, 0, 1):
0578     case IP_VERSION(9, 3, 0):
0579     case IP_VERSION(9, 4, 0):
0580     case IP_VERSION(9, 4, 1):
0581     case IP_VERSION(9, 4, 2):
0582     case IP_VERSION(10, 3, 3):
0583     case IP_VERSION(10, 3, 4):
0584     case IP_VERSION(10, 3, 5):
0585     case IP_VERSION(10, 3, 6):
0586     case IP_VERSION(10, 3, 7):
0587         /*
0588          * noretry = 0 will cause kfd page fault tests fail
0589          * for some ASICs, so set default to 1 for these ASICs.
0590          */
0591         if (amdgpu_noretry == -1)
0592             gmc->noretry = 1;
0593         else
0594             gmc->noretry = amdgpu_noretry;
0595         break;
0596     default:
0597         /* Raven currently has issues with noretry
0598          * regardless of what we decide for other
0599          * asics, we should leave raven with
0600          * noretry = 0 until we root cause the
0601          * issues.
0602          *
0603          * default this to 0 for now, but we may want
0604          * to change this in the future for certain
0605          * GPUs as it can increase performance in
0606          * certain cases.
0607          */
0608         if (amdgpu_noretry == -1)
0609             gmc->noretry = 0;
0610         else
0611             gmc->noretry = amdgpu_noretry;
0612         break;
0613     }
0614 }
0615
0616 void amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type,
0617                    bool enable)
0618 {
0619     struct amdgpu_vmhub *hub;
0620     u32 tmp, reg, i;
0621
0622     hub = &adev->vmhub[hub_type];
0623     for (i = 0; i < 16; i++) {
0624         reg = hub->vm_context0_cntl + hub->ctx_distance * i;
0625
0626         tmp = (hub_type == AMDGPU_GFXHUB_0) ?
0627             RREG32_SOC15_IP(GC, reg) :
0628             RREG32_SOC15_IP(MMHUB, reg);
0629
0630         if (enable)
0631             tmp |= hub->vm_cntx_cntl_vm_fault;
0632         else
0633             tmp &= ~hub->vm_cntx_cntl_vm_fault;
0634
0635         (hub_type == AMDGPU_GFXHUB_0) ?
0636             WREG32_SOC15_IP(GC, reg, tmp) :
0637             WREG32_SOC15_IP(MMHUB, reg, tmp);
0638     }
0639 }
0640
0641 void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev)
0642 {
0643     unsigned size;
0644
0645     /*
0646      * Some ASICs need to reserve a region of video memory to avoid access
0647      * from driver
0648      */
0649     adev->mman.stolen_reserved_offset = 0;
0650     adev->mman.stolen_reserved_size = 0;
0651
0652     /*
0653      * TODO:
0654      * Currently there is a bug where some memory client outside
0655      * of the driver writes to first 8M of VRAM on S3 resume,
0656      * this overrides GART which by default gets placed in first 8M and
0657      * causes VM_FAULTS once GTT is accessed.
0658      * Keep the stolen memory reservation until the while this is not solved.
0659      */
0660     switch (adev->asic_type) {
0661     case CHIP_VEGA10:
0662         adev->mman.keep_stolen_vga_memory = true;
0663         /*
0664          * VEGA10 SRIOV VF with MS_HYPERV host needs some firmware reserved area.
0665          */
0666 #ifdef CONFIG_X86
0667         if (amdgpu_sriov_vf(adev) && hypervisor_is_type(X86_HYPER_MS_HYPERV)) {
0668             adev->mman.stolen_reserved_offset = 0x500000;
0669             adev->mman.stolen_reserved_size = 0x200000;
0670         }
0671 #endif
0672         break;
0673     case CHIP_RAVEN:
0674     case CHIP_RENOIR:
0675         adev->mman.keep_stolen_vga_memory = true;
0676         break;
0677     case CHIP_YELLOW_CARP:
0678         if (amdgpu_discovery == 0) {
0679             adev->mman.stolen_reserved_offset = 0x1ffb0000;
0680             adev->mman.stolen_reserved_size = 64 * PAGE_SIZE;
0681         }
0682         break;
0683     default:
0684         adev->mman.keep_stolen_vga_memory = false;
0685         break;
0686     }
0687
0688     if (amdgpu_sriov_vf(adev) ||
0689         !amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_DCE)) {
0690         size = 0;
0691     } else {
0692         size = amdgpu_gmc_get_vbios_fb_size(adev);
0693
0694         if (adev->mman.keep_stolen_vga_memory)
0695             size = max(size, (unsigned)AMDGPU_VBIOS_VGA_ALLOCATION);
0696     }
0697
0698     /* set to 0 if the pre-OS buffer uses up most of vram */
0699     if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024))
0700         size = 0;
0701
0702     if (size > AMDGPU_VBIOS_VGA_ALLOCATION) {
0703         adev->mman.stolen_vga_size = AMDGPU_VBIOS_VGA_ALLOCATION;
0704         adev->mman.stolen_extended_size = size - adev->mman.stolen_vga_size;
0705     } else {
0706         adev->mman.stolen_vga_size = size;
0707         adev->mman.stolen_extended_size = 0;
0708     }
0709 }
0710
0711 /**
0712  * amdgpu_gmc_init_pdb0 - initialize PDB0
0713  *
0714  * @adev: amdgpu_device pointer
0715  *
0716  * This function is only used when GART page table is used
0717  * for FB address translatioin. In such a case, we construct
0718  * a 2-level system VM page table: PDB0->PTB, to cover both
0719  * VRAM of the hive and system memory.
0720  *
0721  * PDB0 is static, initialized once on driver initialization.
0722  * The first n entries of PDB0 are used as PTE by setting
0723  * P bit to 1, pointing to VRAM. The n+1'th entry points
0724  * to a big PTB covering system memory.
0725  *
0726  */
0727 void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
0728 {
0729     int i;
0730     uint64_t flags = adev->gart.gart_pte_flags; //TODO it is UC. explore NC/RW?
0731     /* Each PDE0 (used as PTE) covers (2^vmid0_page_table_block_size)*2M
0732      */
0733     u64 vram_size = adev->gmc.xgmi.node_segment_size * adev->gmc.xgmi.num_physical_nodes;
0734     u64 pde0_page_size = (1ULL<<adev->gmc.vmid0_page_table_block_size)<<21;
0735     u64 vram_addr = adev->vm_manager.vram_base_offset -
0736         adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
0737     u64 vram_end = vram_addr + vram_size;
0738     u64 gart_ptb_gpu_pa = amdgpu_gmc_vram_pa(adev, adev->gart.bo);
0739     int idx;
0740
0741     if (!drm_dev_enter(adev_to_drm(adev), &idx))
0742         return;
0743
0744     flags |= AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE;
0745     flags |= AMDGPU_PTE_WRITEABLE;
0746     flags |= AMDGPU_PTE_SNOOPED;
0747     flags |= AMDGPU_PTE_FRAG((adev->gmc.vmid0_page_table_block_size + 9*1));
0748     flags |= AMDGPU_PDE_PTE;
0749
0750     /* The first n PDE0 entries are used as PTE,
0751      * pointing to vram
0752      */
0753     for (i = 0; vram_addr < vram_end; i++, vram_addr += pde0_page_size)
0754         amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, vram_addr, flags);
0755
0756     /* The n+1'th PDE0 entry points to a huge
0757      * PTB who has more than 512 entries each
0758      * pointing to a 4K system page
0759      */
0760     flags = AMDGPU_PTE_VALID;
0761     flags |= AMDGPU_PDE_BFS(0) | AMDGPU_PTE_SNOOPED;
0762     /* Requires gart_ptb_gpu_pa to be 4K aligned */
0763     amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, gart_ptb_gpu_pa, flags);
0764     drm_dev_exit(idx);
0765 }
0766
0767 /**
0768  * amdgpu_gmc_vram_mc2pa - calculate vram buffer's physical address from MC
0769  * address
0770  *
0771  * @adev: amdgpu_device pointer
0772  * @mc_addr: MC address of buffer
0773  */
0774 uint64_t amdgpu_gmc_vram_mc2pa(struct amdgpu_device *adev, uint64_t mc_addr)
0775 {
0776     return mc_addr - adev->gmc.vram_start + adev->vm_manager.vram_base_offset;
0777 }
0778
0779 /**
0780  * amdgpu_gmc_vram_pa - calculate vram buffer object's physical address from
0781  * GPU's view
0782  *
0783  * @adev: amdgpu_device pointer
0784  * @bo: amdgpu buffer object
0785  */
0786 uint64_t amdgpu_gmc_vram_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo)
0787 {
0788     return amdgpu_gmc_vram_mc2pa(adev, amdgpu_bo_gpu_offset(bo));
0789 }
0790
0791 /**
0792  * amdgpu_gmc_vram_cpu_pa - calculate vram buffer object's physical address
0793  * from CPU's view
0794  *
0795  * @adev: amdgpu_device pointer
0796  * @bo: amdgpu buffer object
0797  */
0798 uint64_t amdgpu_gmc_vram_cpu_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo)
0799 {
0800     return amdgpu_bo_gpu_offset(bo) - adev->gmc.vram_start + adev->gmc.aper_base;
0801 }
0802
0803 int amdgpu_gmc_vram_checking(struct amdgpu_device *adev)
0804 {
0805     struct amdgpu_bo *vram_bo = NULL;
0806     uint64_t vram_gpu = 0;
0807     void *vram_ptr = NULL;
0808
0809     int ret, size = 0x100000;
0810     uint8_t cptr[10];
0811
0812     ret = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
0813                 AMDGPU_GEM_DOMAIN_VRAM,
0814                 &vram_bo,
0815                 &vram_gpu,
0816                 &vram_ptr);
0817     if (ret)
0818         return ret;
0819
0820     memset(vram_ptr, 0x86, size);
0821     memset(cptr, 0x86, 10);
0822
0823     /**
0824      * Check the start, the mid, and the end of the memory if the content of
0825      * each byte is the pattern "0x86". If yes, we suppose the vram bo is
0826      * workable.
0827      *
0828      * Note: If check the each byte of whole 1M bo, it will cost too many
0829      * seconds, so here, we just pick up three parts for emulation.
0830      */
0831     ret = memcmp(vram_ptr, cptr, 10);
0832     if (ret)
0833         return ret;
0834
0835     ret = memcmp(vram_ptr + (size / 2), cptr, 10);
0836     if (ret)
0837         return ret;
0838
0839     ret = memcmp(vram_ptr + size - 10, cptr, 10);
0840     if (ret)
0841         return ret;
0842
0843     amdgpu_bo_free_kernel(&vram_bo, &vram_gpu,
0844             &vram_ptr);
0845
0846     return 0;
0847 }