Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright 2014 Advanced Micro Devices, Inc.
0003  *
0004  * Permission is hereby granted, free of charge, to any person obtaining a
0005  * copy of this software and associated documentation files (the "Software"),
0006  * to deal in the Software without restriction, including without limitation
0007  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
0008  * and/or sell copies of the Software, and to permit persons to whom the
0009  * Software is furnished to do so, subject to the following conditions:
0010  *
0011  * The above copyright notice and this permission notice shall be included in
0012  * all copies or substantial portions of the Software.
0013  *
0014  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
0015  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
0016  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
0017  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
0018  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
0019  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
0020  * OTHER DEALINGS IN THE SOFTWARE.
0021  */
0022 
0023 #include "amdgpu.h"
0024 #include "amdgpu_amdkfd.h"
0025 #include "gfx_v8_0.h"
0026 #include "gca/gfx_8_0_sh_mask.h"
0027 #include "gca/gfx_8_0_d.h"
0028 #include "gca/gfx_8_0_enum.h"
0029 #include "oss/oss_3_0_sh_mask.h"
0030 #include "oss/oss_3_0_d.h"
0031 #include "gmc/gmc_8_1_sh_mask.h"
0032 #include "gmc/gmc_8_1_d.h"
0033 #include "vi_structs.h"
0034 #include "vid.h"
0035 
0036 enum hqd_dequeue_request_type {
0037     NO_ACTION = 0,
0038     DRAIN_PIPE,
0039     RESET_WAVES
0040 };
0041 
0042 static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
0043             uint32_t queue, uint32_t vmid)
0044 {
0045     uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue);
0046 
0047     mutex_lock(&adev->srbm_mutex);
0048     WREG32(mmSRBM_GFX_CNTL, value);
0049 }
0050 
0051 static void unlock_srbm(struct amdgpu_device *adev)
0052 {
0053     WREG32(mmSRBM_GFX_CNTL, 0);
0054     mutex_unlock(&adev->srbm_mutex);
0055 }
0056 
0057 static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
0058                 uint32_t queue_id)
0059 {
0060     uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
0061     uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
0062 
0063     lock_srbm(adev, mec, pipe, queue_id, 0);
0064 }
0065 
0066 static void release_queue(struct amdgpu_device *adev)
0067 {
0068     unlock_srbm(adev);
0069 }
0070 
0071 static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid,
0072                     uint32_t sh_mem_config,
0073                     uint32_t sh_mem_ape1_base,
0074                     uint32_t sh_mem_ape1_limit,
0075                     uint32_t sh_mem_bases)
0076 {
0077     lock_srbm(adev, 0, 0, 0, vmid);
0078 
0079     WREG32(mmSH_MEM_CONFIG, sh_mem_config);
0080     WREG32(mmSH_MEM_APE1_BASE, sh_mem_ape1_base);
0081     WREG32(mmSH_MEM_APE1_LIMIT, sh_mem_ape1_limit);
0082     WREG32(mmSH_MEM_BASES, sh_mem_bases);
0083 
0084     unlock_srbm(adev);
0085 }
0086 
0087 static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
0088                     unsigned int vmid)
0089 {
0090     /*
0091      * We have to assume that there is no outstanding mapping.
0092      * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
0093      * a mapping is in progress or because a mapping finished
0094      * and the SW cleared it.
0095      * So the protocol is to always wait & clear.
0096      */
0097     uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
0098             ATC_VMID0_PASID_MAPPING__VALID_MASK;
0099 
0100     WREG32(mmATC_VMID0_PASID_MAPPING + vmid, pasid_mapping);
0101 
0102     while (!(RREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS) & (1U << vmid)))
0103         cpu_relax();
0104     WREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid);
0105 
0106     /* Mapping vmid to pasid also for IH block */
0107     WREG32(mmIH_VMID_0_LUT + vmid, pasid_mapping);
0108 
0109     return 0;
0110 }
0111 
0112 static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id)
0113 {
0114     uint32_t mec;
0115     uint32_t pipe;
0116 
0117     mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
0118     pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
0119 
0120     lock_srbm(adev, mec, pipe, 0, 0);
0121 
0122     WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
0123             CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
0124 
0125     unlock_srbm(adev);
0126 
0127     return 0;
0128 }
0129 
0130 static inline uint32_t get_sdma_rlc_reg_offset(struct vi_sdma_mqd *m)
0131 {
0132     uint32_t retval;
0133 
0134     retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
0135         m->sdma_queue_id * KFD_VI_SDMA_QUEUE_OFFSET;
0136 
0137     pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n",
0138             m->sdma_engine_id, m->sdma_queue_id, retval);
0139 
0140     return retval;
0141 }
0142 
0143 static inline struct vi_mqd *get_mqd(void *mqd)
0144 {
0145     return (struct vi_mqd *)mqd;
0146 }
0147 
0148 static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd)
0149 {
0150     return (struct vi_sdma_mqd *)mqd;
0151 }
0152 
0153 static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd,
0154             uint32_t pipe_id, uint32_t queue_id,
0155             uint32_t __user *wptr, uint32_t wptr_shift,
0156             uint32_t wptr_mask, struct mm_struct *mm)
0157 {
0158     struct vi_mqd *m;
0159     uint32_t *mqd_hqd;
0160     uint32_t reg, wptr_val, data;
0161     bool valid_wptr = false;
0162 
0163     m = get_mqd(mqd);
0164 
0165     acquire_queue(adev, pipe_id, queue_id);
0166 
0167     /* HIQ is set during driver init period with vmid set to 0*/
0168     if (m->cp_hqd_vmid == 0) {
0169         uint32_t value, mec, pipe;
0170 
0171         mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
0172         pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
0173 
0174         pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
0175             mec, pipe, queue_id);
0176         value = RREG32(mmRLC_CP_SCHEDULERS);
0177         value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
0178             ((mec << 5) | (pipe << 3) | queue_id | 0x80));
0179         WREG32(mmRLC_CP_SCHEDULERS, value);
0180     }
0181 
0182     /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
0183     mqd_hqd = &m->cp_mqd_base_addr_lo;
0184 
0185     for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_EOP_CONTROL; reg++)
0186         WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
0187 
0188     /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
0189      * This is safe since EOP RPTR==WPTR for any inactive HQD
0190      * on ASICs that do not support context-save.
0191      * EOP writes/reads can start anywhere in the ring.
0192      */
0193     if (adev->asic_type != CHIP_TONGA) {
0194         WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr);
0195         WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr);
0196         WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem);
0197     }
0198 
0199     for (reg = mmCP_HQD_EOP_EVENTS; reg <= mmCP_HQD_ERROR; reg++)
0200         WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
0201 
0202     /* Copy userspace write pointer value to register.
0203      * Activate doorbell logic to monitor subsequent changes.
0204      */
0205     data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
0206                  CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
0207     WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data);
0208 
0209     /* read_user_ptr may take the mm->mmap_lock.
0210      * release srbm_mutex to avoid circular dependency between
0211      * srbm_mutex->mmap_lock->reservation_ww_class_mutex->srbm_mutex.
0212      */
0213     release_queue(adev);
0214     valid_wptr = read_user_wptr(mm, wptr, wptr_val);
0215     acquire_queue(adev, pipe_id, queue_id);
0216     if (valid_wptr)
0217         WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask);
0218 
0219     data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
0220     WREG32(mmCP_HQD_ACTIVE, data);
0221 
0222     release_queue(adev);
0223 
0224     return 0;
0225 }
0226 
0227 static int kgd_hqd_dump(struct amdgpu_device *adev,
0228             uint32_t pipe_id, uint32_t queue_id,
0229             uint32_t (**dump)[2], uint32_t *n_regs)
0230 {
0231     uint32_t i = 0, reg;
0232 #define HQD_N_REGS (54+4)
0233 #define DUMP_REG(addr) do {             \
0234         if (WARN_ON_ONCE(i >= HQD_N_REGS))  \
0235             break;              \
0236         (*dump)[i][0] = (addr) << 2;        \
0237         (*dump)[i++][1] = RREG32(addr);     \
0238     } while (0)
0239 
0240     *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
0241     if (*dump == NULL)
0242         return -ENOMEM;
0243 
0244     acquire_queue(adev, pipe_id, queue_id);
0245 
0246     DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE0);
0247     DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE1);
0248     DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE2);
0249     DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE3);
0250 
0251     for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_EOP_DONES; reg++)
0252         DUMP_REG(reg);
0253 
0254     release_queue(adev);
0255 
0256     WARN_ON_ONCE(i != HQD_N_REGS);
0257     *n_regs = i;
0258 
0259     return 0;
0260 }
0261 
0262 static int kgd_hqd_sdma_load(struct amdgpu_device *adev, void *mqd,
0263                  uint32_t __user *wptr, struct mm_struct *mm)
0264 {
0265     struct vi_sdma_mqd *m;
0266     unsigned long end_jiffies;
0267     uint32_t sdma_rlc_reg_offset;
0268     uint32_t data;
0269 
0270     m = get_sdma_mqd(mqd);
0271     sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m);
0272     WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
0273         m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
0274 
0275     end_jiffies = msecs_to_jiffies(2000) + jiffies;
0276     while (true) {
0277         data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
0278         if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
0279             break;
0280         if (time_after(jiffies, end_jiffies)) {
0281             pr_err("SDMA RLC not idle in %s\n", __func__);
0282             return -ETIME;
0283         }
0284         usleep_range(500, 1000);
0285     }
0286 
0287     data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
0288                  ENABLE, 1);
0289     WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data);
0290     WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR,
0291                 m->sdmax_rlcx_rb_rptr);
0292 
0293     if (read_user_wptr(mm, wptr, data))
0294         WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, data);
0295     else
0296         WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
0297                m->sdmax_rlcx_rb_rptr);
0298 
0299     WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_VIRTUAL_ADDR,
0300                 m->sdmax_rlcx_virtual_addr);
0301     WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
0302     WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI,
0303             m->sdmax_rlcx_rb_base_hi);
0304     WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
0305             m->sdmax_rlcx_rb_rptr_addr_lo);
0306     WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
0307             m->sdmax_rlcx_rb_rptr_addr_hi);
0308 
0309     data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
0310                  RB_ENABLE, 1);
0311     WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data);
0312 
0313     return 0;
0314 }
0315 
0316 static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
0317                  uint32_t engine_id, uint32_t queue_id,
0318                  uint32_t (**dump)[2], uint32_t *n_regs)
0319 {
0320     uint32_t sdma_offset = engine_id * SDMA1_REGISTER_OFFSET +
0321         queue_id * KFD_VI_SDMA_QUEUE_OFFSET;
0322     uint32_t i = 0, reg;
0323 #undef HQD_N_REGS
0324 #define HQD_N_REGS (19+4+2+3+7)
0325 
0326     *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
0327     if (*dump == NULL)
0328         return -ENOMEM;
0329 
0330     for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
0331         DUMP_REG(sdma_offset + reg);
0332     for (reg = mmSDMA0_RLC0_VIRTUAL_ADDR; reg <= mmSDMA0_RLC0_WATERMARK;
0333          reg++)
0334         DUMP_REG(sdma_offset + reg);
0335     for (reg = mmSDMA0_RLC0_CSA_ADDR_LO; reg <= mmSDMA0_RLC0_CSA_ADDR_HI;
0336          reg++)
0337         DUMP_REG(sdma_offset + reg);
0338     for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; reg <= mmSDMA0_RLC0_DUMMY_REG;
0339          reg++)
0340         DUMP_REG(sdma_offset + reg);
0341     for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; reg <= mmSDMA0_RLC0_MIDCMD_CNTL;
0342          reg++)
0343         DUMP_REG(sdma_offset + reg);
0344 
0345     WARN_ON_ONCE(i != HQD_N_REGS);
0346     *n_regs = i;
0347 
0348     return 0;
0349 }
0350 
0351 static bool kgd_hqd_is_occupied(struct amdgpu_device *adev,
0352                 uint64_t queue_address, uint32_t pipe_id,
0353                 uint32_t queue_id)
0354 {
0355     uint32_t act;
0356     bool retval = false;
0357     uint32_t low, high;
0358 
0359     acquire_queue(adev, pipe_id, queue_id);
0360     act = RREG32(mmCP_HQD_ACTIVE);
0361     if (act) {
0362         low = lower_32_bits(queue_address >> 8);
0363         high = upper_32_bits(queue_address >> 8);
0364 
0365         if (low == RREG32(mmCP_HQD_PQ_BASE) &&
0366                 high == RREG32(mmCP_HQD_PQ_BASE_HI))
0367             retval = true;
0368     }
0369     release_queue(adev);
0370     return retval;
0371 }
0372 
0373 static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd)
0374 {
0375     struct vi_sdma_mqd *m;
0376     uint32_t sdma_rlc_reg_offset;
0377     uint32_t sdma_rlc_rb_cntl;
0378 
0379     m = get_sdma_mqd(mqd);
0380     sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m);
0381 
0382     sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
0383 
0384     if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
0385         return true;
0386 
0387     return false;
0388 }
0389 
0390 static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd,
0391                 enum kfd_preempt_type reset_type,
0392                 unsigned int utimeout, uint32_t pipe_id,
0393                 uint32_t queue_id)
0394 {
0395     uint32_t temp;
0396     enum hqd_dequeue_request_type type;
0397     unsigned long flags, end_jiffies;
0398     int retry;
0399     struct vi_mqd *m = get_mqd(mqd);
0400 
0401     if (amdgpu_in_reset(adev))
0402         return -EIO;
0403 
0404     acquire_queue(adev, pipe_id, queue_id);
0405 
0406     if (m->cp_hqd_vmid == 0)
0407         WREG32_FIELD(RLC_CP_SCHEDULERS, scheduler1, 0);
0408 
0409     switch (reset_type) {
0410     case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
0411         type = DRAIN_PIPE;
0412         break;
0413     case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
0414         type = RESET_WAVES;
0415         break;
0416     default:
0417         type = DRAIN_PIPE;
0418         break;
0419     }
0420 
0421     /* Workaround: If IQ timer is active and the wait time is close to or
0422      * equal to 0, dequeueing is not safe. Wait until either the wait time
0423      * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is
0424      * cleared before continuing. Also, ensure wait times are set to at
0425      * least 0x3.
0426      */
0427     local_irq_save(flags);
0428     preempt_disable();
0429     retry = 5000; /* wait for 500 usecs at maximum */
0430     while (true) {
0431         temp = RREG32(mmCP_HQD_IQ_TIMER);
0432         if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) {
0433             pr_debug("HW is processing IQ\n");
0434             goto loop;
0435         }
0436         if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) {
0437             if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE)
0438                     == 3) /* SEM-rearm is safe */
0439                 break;
0440             /* Wait time 3 is safe for CP, but our MMIO read/write
0441              * time is close to 1 microsecond, so check for 10 to
0442              * leave more buffer room
0443              */
0444             if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME)
0445                     >= 10)
0446                 break;
0447             pr_debug("IQ timer is active\n");
0448         } else
0449             break;
0450 loop:
0451         if (!retry) {
0452             pr_err("CP HQD IQ timer status time out\n");
0453             break;
0454         }
0455         ndelay(100);
0456         --retry;
0457     }
0458     retry = 1000;
0459     while (true) {
0460         temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
0461         if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK))
0462             break;
0463         pr_debug("Dequeue request is pending\n");
0464 
0465         if (!retry) {
0466             pr_err("CP HQD dequeue request time out\n");
0467             break;
0468         }
0469         ndelay(100);
0470         --retry;
0471     }
0472     local_irq_restore(flags);
0473     preempt_enable();
0474 
0475     WREG32(mmCP_HQD_DEQUEUE_REQUEST, type);
0476 
0477     end_jiffies = (utimeout * HZ / 1000) + jiffies;
0478     while (true) {
0479         temp = RREG32(mmCP_HQD_ACTIVE);
0480         if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
0481             break;
0482         if (time_after(jiffies, end_jiffies)) {
0483             pr_err("cp queue preemption time out.\n");
0484             release_queue(adev);
0485             return -ETIME;
0486         }
0487         usleep_range(500, 1000);
0488     }
0489 
0490     release_queue(adev);
0491     return 0;
0492 }
0493 
0494 static int kgd_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
0495                 unsigned int utimeout)
0496 {
0497     struct vi_sdma_mqd *m;
0498     uint32_t sdma_rlc_reg_offset;
0499     uint32_t temp;
0500     unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
0501 
0502     m = get_sdma_mqd(mqd);
0503     sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m);
0504 
0505     temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
0506     temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
0507     WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp);
0508 
0509     while (true) {
0510         temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
0511         if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
0512             break;
0513         if (time_after(jiffies, end_jiffies)) {
0514             pr_err("SDMA RLC not idle in %s\n", __func__);
0515             return -ETIME;
0516         }
0517         usleep_range(500, 1000);
0518     }
0519 
0520     WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0);
0521     WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
0522         RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) |
0523         SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
0524 
0525     m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR);
0526 
0527     return 0;
0528 }
0529 
0530 static bool get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
0531                     uint8_t vmid, uint16_t *p_pasid)
0532 {
0533     uint32_t value;
0534 
0535     value = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
0536     *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
0537 
0538     return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
0539 }
0540 
0541 static int kgd_wave_control_execute(struct amdgpu_device *adev,
0542                     uint32_t gfx_index_val,
0543                     uint32_t sq_cmd)
0544 {
0545     uint32_t data = 0;
0546 
0547     mutex_lock(&adev->grbm_idx_mutex);
0548 
0549     WREG32(mmGRBM_GFX_INDEX, gfx_index_val);
0550     WREG32(mmSQ_CMD, sq_cmd);
0551 
0552     data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
0553         INSTANCE_BROADCAST_WRITES, 1);
0554     data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
0555         SH_BROADCAST_WRITES, 1);
0556     data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
0557         SE_BROADCAST_WRITES, 1);
0558 
0559     WREG32(mmGRBM_GFX_INDEX, data);
0560     mutex_unlock(&adev->grbm_idx_mutex);
0561 
0562     return 0;
0563 }
0564 
0565 static void set_scratch_backing_va(struct amdgpu_device *adev,
0566                     uint64_t va, uint32_t vmid)
0567 {
0568     lock_srbm(adev, 0, 0, 0, vmid);
0569     WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va);
0570     unlock_srbm(adev);
0571 }
0572 
0573 static void set_vm_context_page_table_base(struct amdgpu_device *adev,
0574         uint32_t vmid, uint64_t page_table_base)
0575 {
0576     if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
0577         pr_err("trying to set page table base for wrong VMID\n");
0578         return;
0579     }
0580     WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8,
0581             lower_32_bits(page_table_base));
0582 }
0583 
0584 const struct kfd2kgd_calls gfx_v8_kfd2kgd = {
0585     .program_sh_mem_settings = kgd_program_sh_mem_settings,
0586     .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
0587     .init_interrupts = kgd_init_interrupts,
0588     .hqd_load = kgd_hqd_load,
0589     .hqd_sdma_load = kgd_hqd_sdma_load,
0590     .hqd_dump = kgd_hqd_dump,
0591     .hqd_sdma_dump = kgd_hqd_sdma_dump,
0592     .hqd_is_occupied = kgd_hqd_is_occupied,
0593     .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
0594     .hqd_destroy = kgd_hqd_destroy,
0595     .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
0596     .wave_control_execute = kgd_wave_control_execute,
0597     .get_atc_vmid_pasid_mapping_info =
0598             get_atc_vmid_pasid_mapping_info,
0599     .set_scratch_backing_va = set_scratch_backing_va,
0600     .set_vm_context_page_table_base = set_vm_context_page_table_base,
0601 };