Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright 2019 Advanced Micro Devices, Inc.
0003  *
0004  * Permission is hereby granted, free of charge, to any person obtaining a
0005  * copy of this software and associated documentation files (the "Software"),
0006  * to deal in the Software without restriction, including without limitation
0007  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
0008  * and/or sell copies of the Software, and to permit persons to whom the
0009  * Software is furnished to do so, subject to the following conditions:
0010  *
0011  * The above copyright notice and this permission notice shall be included in
0012  * all copies or substantial portions of the Software.
0013  *
0014  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
0015  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
0016  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
0017  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
0018  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
0019  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
0020  * OTHER DEALINGS IN THE SOFTWARE.
0021  */
0022 #include <linux/module.h>
0023 #include <linux/fdtable.h>
0024 #include <linux/uaccess.h>
0025 #include <linux/firmware.h>
0026 #include "amdgpu.h"
0027 #include "amdgpu_amdkfd.h"
0028 #include "amdgpu_amdkfd_arcturus.h"
0029 #include "sdma0/sdma0_4_2_2_offset.h"
0030 #include "sdma0/sdma0_4_2_2_sh_mask.h"
0031 #include "sdma1/sdma1_4_2_2_offset.h"
0032 #include "sdma1/sdma1_4_2_2_sh_mask.h"
0033 #include "sdma2/sdma2_4_2_2_offset.h"
0034 #include "sdma2/sdma2_4_2_2_sh_mask.h"
0035 #include "sdma3/sdma3_4_2_2_offset.h"
0036 #include "sdma3/sdma3_4_2_2_sh_mask.h"
0037 #include "sdma4/sdma4_4_2_2_offset.h"
0038 #include "sdma4/sdma4_4_2_2_sh_mask.h"
0039 #include "sdma5/sdma5_4_2_2_offset.h"
0040 #include "sdma5/sdma5_4_2_2_sh_mask.h"
0041 #include "sdma6/sdma6_4_2_2_offset.h"
0042 #include "sdma6/sdma6_4_2_2_sh_mask.h"
0043 #include "sdma7/sdma7_4_2_2_offset.h"
0044 #include "sdma7/sdma7_4_2_2_sh_mask.h"
0045 #include "v9_structs.h"
0046 #include "soc15.h"
0047 #include "soc15d.h"
0048 #include "amdgpu_amdkfd_gfx_v9.h"
0049 #include "gfxhub_v1_0.h"
0050 #include "mmhub_v9_4.h"
0051 
0052 #define HQD_N_REGS 56
0053 #define DUMP_REG(addr) do {             \
0054         if (WARN_ON_ONCE(i >= HQD_N_REGS))  \
0055             break;              \
0056         (*dump)[i][0] = (addr) << 2;        \
0057         (*dump)[i++][1] = RREG32(addr);     \
0058     } while (0)
0059 
0060 static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
0061 {
0062     return (struct v9_sdma_mqd *)mqd;
0063 }
0064 
0065 static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
0066                 unsigned int engine_id,
0067                 unsigned int queue_id)
0068 {
0069     uint32_t sdma_engine_reg_base = 0;
0070     uint32_t sdma_rlc_reg_offset;
0071 
0072     switch (engine_id) {
0073     default:
0074         dev_warn(adev->dev,
0075              "Invalid sdma engine id (%d), using engine id 0\n",
0076              engine_id);
0077         fallthrough;
0078     case 0:
0079         sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
0080                 mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
0081         break;
0082     case 1:
0083         sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0,
0084                 mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL;
0085         break;
0086     case 2:
0087         sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA2, 0,
0088                 mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL;
0089         break;
0090     case 3:
0091         sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA3, 0,
0092                 mmSDMA3_RLC0_RB_CNTL) - mmSDMA3_RLC0_RB_CNTL;
0093         break;
0094     case 4:
0095         sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA4, 0,
0096                 mmSDMA4_RLC0_RB_CNTL) - mmSDMA4_RLC0_RB_CNTL;
0097         break;
0098     case 5:
0099         sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA5, 0,
0100                 mmSDMA5_RLC0_RB_CNTL) - mmSDMA5_RLC0_RB_CNTL;
0101         break;
0102     case 6:
0103         sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA6, 0,
0104                 mmSDMA6_RLC0_RB_CNTL) - mmSDMA6_RLC0_RB_CNTL;
0105         break;
0106     case 7:
0107         sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA7, 0,
0108                 mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL;
0109         break;
0110     }
0111 
0112     sdma_rlc_reg_offset = sdma_engine_reg_base
0113         + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL);
0114 
0115     pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
0116             queue_id, sdma_rlc_reg_offset);
0117 
0118     return sdma_rlc_reg_offset;
0119 }
0120 
0121 int kgd_arcturus_hqd_sdma_load(struct amdgpu_device *adev, void *mqd,
0122                  uint32_t __user *wptr, struct mm_struct *mm)
0123 {
0124     struct v9_sdma_mqd *m;
0125     uint32_t sdma_rlc_reg_offset;
0126     unsigned long end_jiffies;
0127     uint32_t data;
0128     uint64_t data64;
0129     uint64_t __user *wptr64 = (uint64_t __user *)wptr;
0130 
0131     m = get_sdma_mqd(mqd);
0132     sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
0133                         m->sdma_queue_id);
0134 
0135     WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
0136         m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
0137 
0138     end_jiffies = msecs_to_jiffies(2000) + jiffies;
0139     while (true) {
0140         data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
0141         if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
0142             break;
0143         if (time_after(jiffies, end_jiffies)) {
0144             pr_err("SDMA RLC not idle in %s\n", __func__);
0145             return -ETIME;
0146         }
0147         usleep_range(500, 1000);
0148     }
0149 
0150     WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET,
0151            m->sdmax_rlcx_doorbell_offset);
0152 
0153     data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
0154                  ENABLE, 1);
0155     WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data);
0156     WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR,
0157                 m->sdmax_rlcx_rb_rptr);
0158     WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI,
0159                 m->sdmax_rlcx_rb_rptr_hi);
0160 
0161     WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
0162     if (read_user_wptr(mm, wptr64, data64)) {
0163         WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
0164                lower_32_bits(data64));
0165         WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
0166                upper_32_bits(data64));
0167     } else {
0168         WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
0169                m->sdmax_rlcx_rb_rptr);
0170         WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
0171                m->sdmax_rlcx_rb_rptr_hi);
0172     }
0173     WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
0174 
0175     WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
0176     WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI,
0177             m->sdmax_rlcx_rb_base_hi);
0178     WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
0179             m->sdmax_rlcx_rb_rptr_addr_lo);
0180     WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
0181             m->sdmax_rlcx_rb_rptr_addr_hi);
0182 
0183     data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
0184                  RB_ENABLE, 1);
0185     WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data);
0186 
0187     return 0;
0188 }
0189 
0190 int kgd_arcturus_hqd_sdma_dump(struct amdgpu_device *adev,
0191                  uint32_t engine_id, uint32_t queue_id,
0192                  uint32_t (**dump)[2], uint32_t *n_regs)
0193 {
0194     uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
0195             engine_id, queue_id);
0196     uint32_t i = 0, reg;
0197 #undef HQD_N_REGS
0198 #define HQD_N_REGS (19+6+7+10)
0199 
0200     *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
0201     if (*dump == NULL)
0202         return -ENOMEM;
0203 
0204     for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
0205         DUMP_REG(sdma_rlc_reg_offset + reg);
0206     for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++)
0207         DUMP_REG(sdma_rlc_reg_offset + reg);
0208     for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN;
0209          reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++)
0210         DUMP_REG(sdma_rlc_reg_offset + reg);
0211     for (reg = mmSDMA0_RLC0_MIDCMD_DATA0;
0212          reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++)
0213         DUMP_REG(sdma_rlc_reg_offset + reg);
0214 
0215     WARN_ON_ONCE(i != HQD_N_REGS);
0216     *n_regs = i;
0217 
0218     return 0;
0219 }
0220 
0221 bool kgd_arcturus_hqd_sdma_is_occupied(struct amdgpu_device *adev,
0222                 void *mqd)
0223 {
0224     struct v9_sdma_mqd *m;
0225     uint32_t sdma_rlc_reg_offset;
0226     uint32_t sdma_rlc_rb_cntl;
0227 
0228     m = get_sdma_mqd(mqd);
0229     sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
0230                         m->sdma_queue_id);
0231 
0232     sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
0233 
0234     if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
0235         return true;
0236 
0237     return false;
0238 }
0239 
0240 int kgd_arcturus_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
0241                 unsigned int utimeout)
0242 {
0243     struct v9_sdma_mqd *m;
0244     uint32_t sdma_rlc_reg_offset;
0245     uint32_t temp;
0246     unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
0247 
0248     m = get_sdma_mqd(mqd);
0249     sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
0250                         m->sdma_queue_id);
0251 
0252     temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
0253     temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
0254     WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp);
0255 
0256     while (true) {
0257         temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
0258         if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
0259             break;
0260         if (time_after(jiffies, end_jiffies)) {
0261             pr_err("SDMA RLC not idle in %s\n", __func__);
0262             return -ETIME;
0263         }
0264         usleep_range(500, 1000);
0265     }
0266 
0267     WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0);
0268     WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
0269         RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) |
0270         SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
0271 
0272     m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR);
0273     m->sdmax_rlcx_rb_rptr_hi =
0274         RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI);
0275 
0276     return 0;
0277 }
0278 
0279 const struct kfd2kgd_calls arcturus_kfd2kgd = {
0280     .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
0281     .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
0282     .init_interrupts = kgd_gfx_v9_init_interrupts,
0283     .hqd_load = kgd_gfx_v9_hqd_load,
0284     .hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load,
0285     .hqd_sdma_load = kgd_arcturus_hqd_sdma_load,
0286     .hqd_dump = kgd_gfx_v9_hqd_dump,
0287     .hqd_sdma_dump = kgd_arcturus_hqd_sdma_dump,
0288     .hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied,
0289     .hqd_sdma_is_occupied = kgd_arcturus_hqd_sdma_is_occupied,
0290     .hqd_destroy = kgd_gfx_v9_hqd_destroy,
0291     .hqd_sdma_destroy = kgd_arcturus_hqd_sdma_destroy,
0292     .wave_control_execute = kgd_gfx_v9_wave_control_execute,
0293     .get_atc_vmid_pasid_mapping_info =
0294                 kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
0295     .set_vm_context_page_table_base =
0296                 kgd_gfx_v9_set_vm_context_page_table_base,
0297     .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
0298     .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings
0299 };