Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright 2021 Advanced Micro Devices, Inc.
0003  *
0004  * Permission is hereby granted, free of charge, to any person obtaining a
0005  * copy of this software and associated documentation files (the "Software"),
0006  * to deal in the Software without restriction, including without limitation
0007  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
0008  * and/or sell copies of the Software, and to permit persons to whom the
0009  * Software is furnished to do so, subject to the following conditions:
0010  *
0011  * The above copyright notice and this permission notice shall be included in
0012  * all copies or substantial portions of the Software.
0013  *
0014  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
0015  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
0016  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
0017  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
0018  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
0019  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
0020  * OTHER DEALINGS IN THE SOFTWARE.
0021  */
0022 #include <linux/mmu_context.h>
0023 #include "amdgpu.h"
0024 #include "amdgpu_amdkfd.h"
0025 #include "gc/gc_11_0_0_offset.h"
0026 #include "gc/gc_11_0_0_sh_mask.h"
0027 #include "oss/osssys_6_0_0_offset.h"
0028 #include "oss/osssys_6_0_0_sh_mask.h"
0029 #include "soc15_common.h"
0030 #include "soc15d.h"
0031 #include "v11_structs.h"
0032 #include "soc21.h"
0033 
0034 enum hqd_dequeue_request_type {
0035     NO_ACTION = 0,
0036     DRAIN_PIPE,
0037     RESET_WAVES,
0038     SAVE_WAVES
0039 };
0040 
0041 static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
0042             uint32_t queue, uint32_t vmid)
0043 {
0044     mutex_lock(&adev->srbm_mutex);
0045     soc21_grbm_select(adev, mec, pipe, queue, vmid);
0046 }
0047 
0048 static void unlock_srbm(struct amdgpu_device *adev)
0049 {
0050     soc21_grbm_select(adev, 0, 0, 0, 0);
0051     mutex_unlock(&adev->srbm_mutex);
0052 }
0053 
0054 static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
0055                 uint32_t queue_id)
0056 {
0057     uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
0058     uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
0059 
0060     lock_srbm(adev, mec, pipe, queue_id, 0);
0061 }
0062 
0063 static uint64_t get_queue_mask(struct amdgpu_device *adev,
0064                    uint32_t pipe_id, uint32_t queue_id)
0065 {
0066     unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe +
0067             queue_id;
0068 
0069     return 1ull << bit;
0070 }
0071 
0072 static void release_queue(struct amdgpu_device *adev)
0073 {
0074     unlock_srbm(adev);
0075 }
0076 
0077 static void program_sh_mem_settings_v11(struct amdgpu_device *adev, uint32_t vmid,
0078                     uint32_t sh_mem_config,
0079                     uint32_t sh_mem_ape1_base,
0080                     uint32_t sh_mem_ape1_limit,
0081                     uint32_t sh_mem_bases)
0082 {
0083     lock_srbm(adev, 0, 0, 0, vmid);
0084 
0085     WREG32(SOC15_REG_OFFSET(GC, 0, regSH_MEM_CONFIG), sh_mem_config);
0086     WREG32(SOC15_REG_OFFSET(GC, 0, regSH_MEM_BASES), sh_mem_bases);
0087 
0088     unlock_srbm(adev);
0089 }
0090 
0091 static int set_pasid_vmid_mapping_v11(struct amdgpu_device *adev, unsigned int pasid,
0092                     unsigned int vmid)
0093 {
0094     uint32_t value = pasid << IH_VMID_0_LUT__PASID__SHIFT;
0095 
0096     /* Mapping vmid to pasid also for IH block */
0097     pr_debug("mapping vmid %d -> pasid %d in IH block for GFX client\n",
0098             vmid, pasid);
0099     WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid, value);
0100 
0101     return 0;
0102 }
0103 
0104 static int init_interrupts_v11(struct amdgpu_device *adev, uint32_t pipe_id)
0105 {
0106     uint32_t mec;
0107     uint32_t pipe;
0108 
0109     mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
0110     pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
0111 
0112     lock_srbm(adev, mec, pipe, 0, 0);
0113 
0114     WREG32(SOC15_REG_OFFSET(GC, 0, regCPC_INT_CNTL),
0115         CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
0116         CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
0117 
0118     unlock_srbm(adev);
0119 
0120     return 0;
0121 }
0122 
0123 static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
0124                 unsigned int engine_id,
0125                 unsigned int queue_id)
0126 {
0127     uint32_t sdma_engine_reg_base = 0;
0128     uint32_t sdma_rlc_reg_offset;
0129 
0130     switch (engine_id) {
0131     case 0:
0132         sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
0133                 regSDMA0_QUEUE0_RB_CNTL) - regSDMA0_QUEUE0_RB_CNTL;
0134         break;
0135     case 1:
0136         sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0,
0137                 regSDMA1_QUEUE0_RB_CNTL) - regSDMA0_QUEUE0_RB_CNTL;
0138         break;
0139     default:
0140         BUG();
0141     }
0142 
0143     sdma_rlc_reg_offset = sdma_engine_reg_base
0144         + queue_id * (regSDMA0_QUEUE1_RB_CNTL - regSDMA0_QUEUE0_RB_CNTL);
0145 
0146     pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
0147             queue_id, sdma_rlc_reg_offset);
0148 
0149     return sdma_rlc_reg_offset;
0150 }
0151 
0152 static inline struct v11_compute_mqd *get_mqd(void *mqd)
0153 {
0154     return (struct v11_compute_mqd *)mqd;
0155 }
0156 
0157 static inline struct v11_sdma_mqd *get_sdma_mqd(void *mqd)
0158 {
0159     return (struct v11_sdma_mqd *)mqd;
0160 }
0161 
0162 static int hqd_load_v11(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id,
0163             uint32_t queue_id, uint32_t __user *wptr,
0164             uint32_t wptr_shift, uint32_t wptr_mask,
0165             struct mm_struct *mm)
0166 {
0167     struct v11_compute_mqd *m;
0168     uint32_t *mqd_hqd;
0169     uint32_t reg, hqd_base, data;
0170 
0171     m = get_mqd(mqd);
0172 
0173     pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id);
0174     acquire_queue(adev, pipe_id, queue_id);
0175 
0176     /* HIQ is set during driver init period with vmid set to 0*/
0177     if (m->cp_hqd_vmid == 0) {
0178         uint32_t value, mec, pipe;
0179 
0180         mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
0181         pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
0182 
0183         pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
0184             mec, pipe, queue_id);
0185         value = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_CP_SCHEDULERS));
0186         value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
0187             ((mec << 5) | (pipe << 3) | queue_id | 0x80));
0188         WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_CP_SCHEDULERS), value);
0189     }
0190 
0191     /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
0192     mqd_hqd = &m->cp_mqd_base_addr_lo;
0193     hqd_base = SOC15_REG_OFFSET(GC, 0, regCP_MQD_BASE_ADDR);
0194 
0195     for (reg = hqd_base;
0196          reg <= SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_HI); reg++)
0197         WREG32(reg, mqd_hqd[reg - hqd_base]);
0198 
0199 
0200     /* Activate doorbell logic before triggering WPTR poll. */
0201     data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
0202                  CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
0203     WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL), data);
0204 
0205     if (wptr) {
0206         /* Don't read wptr with get_user because the user
0207          * context may not be accessible (if this function
0208          * runs in a work queue). Instead trigger a one-shot
0209          * polling read from memory in the CP. This assumes
0210          * that wptr is GPU-accessible in the queue's VMID via
0211          * ATC or SVM. WPTR==RPTR before starting the poll so
0212          * the CP starts fetching new commands from the right
0213          * place.
0214          *
0215          * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit
0216          * tricky. Assume that the queue didn't overflow. The
0217          * number of valid bits in the 32-bit RPTR depends on
0218          * the queue size. The remaining bits are taken from
0219          * the saved 64-bit WPTR. If the WPTR wrapped, add the
0220          * queue size.
0221          */
0222         uint32_t queue_size =
0223             2 << REG_GET_FIELD(m->cp_hqd_pq_control,
0224                        CP_HQD_PQ_CONTROL, QUEUE_SIZE);
0225         uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1);
0226 
0227         if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr)
0228             guessed_wptr += queue_size;
0229         guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
0230         guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
0231 
0232         WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_LO),
0233                lower_32_bits(guessed_wptr));
0234         WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_HI),
0235                upper_32_bits(guessed_wptr));
0236         WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR),
0237                lower_32_bits((uint64_t)wptr));
0238         WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI),
0239                upper_32_bits((uint64_t)wptr));
0240         pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__,
0241              (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
0242         WREG32(SOC15_REG_OFFSET(GC, 0, regCP_PQ_WPTR_POLL_CNTL1),
0243                (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
0244     }
0245 
0246     /* Start the EOP fetcher */
0247     WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_EOP_RPTR),
0248            REG_SET_FIELD(m->cp_hqd_eop_rptr,
0249                  CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
0250 
0251     data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
0252     WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_ACTIVE), data);
0253 
0254     release_queue(adev);
0255 
0256     return 0;
0257 }
0258 
0259 static int hiq_mqd_load_v11(struct amdgpu_device *adev, void *mqd,
0260                   uint32_t pipe_id, uint32_t queue_id,
0261                   uint32_t doorbell_off)
0262 {
0263     struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
0264     struct v11_compute_mqd *m;
0265     uint32_t mec, pipe;
0266     int r;
0267 
0268     m = get_mqd(mqd);
0269 
0270     acquire_queue(adev, pipe_id, queue_id);
0271 
0272     mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
0273     pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
0274 
0275     pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
0276          mec, pipe, queue_id);
0277 
0278     spin_lock(&adev->gfx.kiq.ring_lock);
0279     r = amdgpu_ring_alloc(kiq_ring, 7);
0280     if (r) {
0281         pr_err("Failed to alloc KIQ (%d).\n", r);
0282         goto out_unlock;
0283     }
0284 
0285     amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
0286     amdgpu_ring_write(kiq_ring,
0287               PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
0288               PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */
0289               PACKET3_MAP_QUEUES_QUEUE(queue_id) |
0290               PACKET3_MAP_QUEUES_PIPE(pipe) |
0291               PACKET3_MAP_QUEUES_ME((mec - 1)) |
0292               PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
0293               PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
0294               PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */
0295               PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
0296     amdgpu_ring_write(kiq_ring,
0297             PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off));
0298     amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo);
0299     amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi);
0300     amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo);
0301     amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi);
0302     amdgpu_ring_commit(kiq_ring);
0303 
0304 out_unlock:
0305     spin_unlock(&adev->gfx.kiq.ring_lock);
0306     release_queue(adev);
0307 
0308     return r;
0309 }
0310 
0311 static int hqd_dump_v11(struct amdgpu_device *adev,
0312             uint32_t pipe_id, uint32_t queue_id,
0313             uint32_t (**dump)[2], uint32_t *n_regs)
0314 {
0315     uint32_t i = 0, reg;
0316 #define HQD_N_REGS 56
0317 #define DUMP_REG(addr) do {             \
0318         if (WARN_ON_ONCE(i >= HQD_N_REGS))  \
0319             break;              \
0320         (*dump)[i][0] = (addr) << 2;        \
0321         (*dump)[i++][1] = RREG32(addr);     \
0322     } while (0)
0323 
0324     *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
0325     if (*dump == NULL)
0326         return -ENOMEM;
0327 
0328     acquire_queue(adev, pipe_id, queue_id);
0329 
0330     for (reg = SOC15_REG_OFFSET(GC, 0, regCP_MQD_BASE_ADDR);
0331          reg <= SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_HI); reg++)
0332         DUMP_REG(reg);
0333 
0334     release_queue(adev);
0335 
0336     WARN_ON_ONCE(i != HQD_N_REGS);
0337     *n_regs = i;
0338 
0339     return 0;
0340 }
0341 
0342 static int hqd_sdma_load_v11(struct amdgpu_device *adev, void *mqd,
0343                  uint32_t __user *wptr, struct mm_struct *mm)
0344 {
0345     struct v11_sdma_mqd *m;
0346     uint32_t sdma_rlc_reg_offset;
0347     unsigned long end_jiffies;
0348     uint32_t data;
0349     uint64_t data64;
0350     uint64_t __user *wptr64 = (uint64_t __user *)wptr;
0351 
0352     m = get_sdma_mqd(mqd);
0353     sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
0354                         m->sdma_queue_id);
0355 
0356     WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL,
0357         m->sdmax_rlcx_rb_cntl & (~SDMA0_QUEUE0_RB_CNTL__RB_ENABLE_MASK));
0358 
0359     end_jiffies = msecs_to_jiffies(2000) + jiffies;
0360     while (true) {
0361         data = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_CONTEXT_STATUS);
0362         if (data & SDMA0_QUEUE0_CONTEXT_STATUS__IDLE_MASK)
0363             break;
0364         if (time_after(jiffies, end_jiffies)) {
0365             pr_err("SDMA RLC not idle in %s\n", __func__);
0366             return -ETIME;
0367         }
0368         usleep_range(500, 1000);
0369     }
0370 
0371     WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_DOORBELL_OFFSET,
0372            m->sdmax_rlcx_doorbell_offset);
0373 
0374     data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_QUEUE0_DOORBELL,
0375                  ENABLE, 1);
0376     WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_DOORBELL, data);
0377     WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR,
0378                 m->sdmax_rlcx_rb_rptr);
0379     WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR_HI,
0380                 m->sdmax_rlcx_rb_rptr_hi);
0381 
0382     WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_MINOR_PTR_UPDATE, 1);
0383     if (read_user_wptr(mm, wptr64, data64)) {
0384         WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_WPTR,
0385                lower_32_bits(data64));
0386         WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_WPTR_HI,
0387                upper_32_bits(data64));
0388     } else {
0389         WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_WPTR,
0390                m->sdmax_rlcx_rb_rptr);
0391         WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_WPTR_HI,
0392                m->sdmax_rlcx_rb_rptr_hi);
0393     }
0394     WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_MINOR_PTR_UPDATE, 0);
0395 
0396     WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_BASE, m->sdmax_rlcx_rb_base);
0397     WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_BASE_HI,
0398             m->sdmax_rlcx_rb_base_hi);
0399     WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR_ADDR_LO,
0400             m->sdmax_rlcx_rb_rptr_addr_lo);
0401     WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR_ADDR_HI,
0402             m->sdmax_rlcx_rb_rptr_addr_hi);
0403 
0404     data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_QUEUE0_RB_CNTL,
0405                  RB_ENABLE, 1);
0406     WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL, data);
0407 
0408     return 0;
0409 }
0410 
0411 static int hqd_sdma_dump_v11(struct amdgpu_device *adev,
0412                  uint32_t engine_id, uint32_t queue_id,
0413                  uint32_t (**dump)[2], uint32_t *n_regs)
0414 {
0415     uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
0416             engine_id, queue_id);
0417     uint32_t i = 0, reg;
0418 #undef HQD_N_REGS
0419 #define HQD_N_REGS (7+11+1+12+12)
0420 
0421     *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
0422     if (*dump == NULL)
0423         return -ENOMEM;
0424 
0425     for (reg = regSDMA0_QUEUE0_RB_CNTL;
0426          reg <= regSDMA0_QUEUE0_RB_WPTR_HI; reg++)
0427         DUMP_REG(sdma_rlc_reg_offset + reg);
0428     for (reg = regSDMA0_QUEUE0_RB_RPTR_ADDR_HI;
0429          reg <= regSDMA0_QUEUE0_DOORBELL; reg++)
0430         DUMP_REG(sdma_rlc_reg_offset + reg);
0431     for (reg = regSDMA0_QUEUE0_DOORBELL_LOG;
0432          reg <= regSDMA0_QUEUE0_DOORBELL_LOG; reg++)
0433         DUMP_REG(sdma_rlc_reg_offset + reg);
0434     for (reg = regSDMA0_QUEUE0_DOORBELL_OFFSET;
0435          reg <= regSDMA0_QUEUE0_RB_PREEMPT; reg++)
0436         DUMP_REG(sdma_rlc_reg_offset + reg);
0437     for (reg = regSDMA0_QUEUE0_MIDCMD_DATA0;
0438          reg <= regSDMA0_QUEUE0_MIDCMD_CNTL; reg++)
0439         DUMP_REG(sdma_rlc_reg_offset + reg);
0440 
0441     WARN_ON_ONCE(i != HQD_N_REGS);
0442     *n_regs = i;
0443 
0444     return 0;
0445 }
0446 
0447 static bool hqd_is_occupied_v11(struct amdgpu_device *adev, uint64_t queue_address,
0448                 uint32_t pipe_id, uint32_t queue_id)
0449 {
0450     uint32_t act;
0451     bool retval = false;
0452     uint32_t low, high;
0453 
0454     acquire_queue(adev, pipe_id, queue_id);
0455     act = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_ACTIVE));
0456     if (act) {
0457         low = lower_32_bits(queue_address >> 8);
0458         high = upper_32_bits(queue_address >> 8);
0459 
0460         if (low == RREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_BASE)) &&
0461            high == RREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_BASE_HI)))
0462             retval = true;
0463     }
0464     release_queue(adev);
0465     return retval;
0466 }
0467 
0468 static bool hqd_sdma_is_occupied_v11(struct amdgpu_device *adev, void *mqd)
0469 {
0470     struct v11_sdma_mqd *m;
0471     uint32_t sdma_rlc_reg_offset;
0472     uint32_t sdma_rlc_rb_cntl;
0473 
0474     m = get_sdma_mqd(mqd);
0475     sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
0476                         m->sdma_queue_id);
0477 
0478     sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL);
0479 
0480     if (sdma_rlc_rb_cntl & SDMA0_QUEUE0_RB_CNTL__RB_ENABLE_MASK)
0481         return true;
0482 
0483     return false;
0484 }
0485 
0486 static int hqd_destroy_v11(struct amdgpu_device *adev, void *mqd,
0487                 enum kfd_preempt_type reset_type,
0488                 unsigned int utimeout, uint32_t pipe_id,
0489                 uint32_t queue_id)
0490 {
0491     enum hqd_dequeue_request_type type;
0492     unsigned long end_jiffies;
0493     uint32_t temp;
0494     struct v11_compute_mqd *m = get_mqd(mqd);
0495 
0496     acquire_queue(adev, pipe_id, queue_id);
0497 
0498     if (m->cp_hqd_vmid == 0)
0499         WREG32_FIELD15_PREREG(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
0500 
0501     switch (reset_type) {
0502     case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
0503         type = DRAIN_PIPE;
0504         break;
0505     case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
0506         type = RESET_WAVES;
0507         break;
0508     default:
0509         type = DRAIN_PIPE;
0510         break;
0511     }
0512 
0513     WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_DEQUEUE_REQUEST), type);
0514 
0515     end_jiffies = (utimeout * HZ / 1000) + jiffies;
0516     while (true) {
0517         temp = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_ACTIVE));
0518         if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
0519             break;
0520         if (time_after(jiffies, end_jiffies)) {
0521             pr_err("cp queue pipe %d queue %d preemption failed\n",
0522                     pipe_id, queue_id);
0523             release_queue(adev);
0524             return -ETIME;
0525         }
0526         usleep_range(500, 1000);
0527     }
0528 
0529     release_queue(adev);
0530     return 0;
0531 }
0532 
0533 static int hqd_sdma_destroy_v11(struct amdgpu_device *adev, void *mqd,
0534                 unsigned int utimeout)
0535 {
0536     struct v11_sdma_mqd *m;
0537     uint32_t sdma_rlc_reg_offset;
0538     uint32_t temp;
0539     unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
0540 
0541     m = get_sdma_mqd(mqd);
0542     sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
0543                         m->sdma_queue_id);
0544 
0545     temp = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL);
0546     temp = temp & ~SDMA0_QUEUE0_RB_CNTL__RB_ENABLE_MASK;
0547     WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL, temp);
0548 
0549     while (true) {
0550         temp = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_CONTEXT_STATUS);
0551         if (temp & SDMA0_QUEUE0_CONTEXT_STATUS__IDLE_MASK)
0552             break;
0553         if (time_after(jiffies, end_jiffies)) {
0554             pr_err("SDMA RLC not idle in %s\n", __func__);
0555             return -ETIME;
0556         }
0557         usleep_range(500, 1000);
0558     }
0559 
0560     WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_DOORBELL, 0);
0561     WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL,
0562         RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL) |
0563         SDMA0_QUEUE0_RB_CNTL__RB_ENABLE_MASK);
0564 
0565     m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR);
0566     m->sdmax_rlcx_rb_rptr_hi =
0567         RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR_HI);
0568 
0569     return 0;
0570 }
0571 
0572 static int wave_control_execute_v11(struct amdgpu_device *adev,
0573                     uint32_t gfx_index_val,
0574                     uint32_t sq_cmd)
0575 {
0576     uint32_t data = 0;
0577 
0578     mutex_lock(&adev->grbm_idx_mutex);
0579 
0580     WREG32(SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX), gfx_index_val);
0581     WREG32(SOC15_REG_OFFSET(GC, 0, regSQ_CMD), sq_cmd);
0582 
0583     data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
0584         INSTANCE_BROADCAST_WRITES, 1);
0585     data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
0586         SA_BROADCAST_WRITES, 1);
0587     data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
0588         SE_BROADCAST_WRITES, 1);
0589 
0590     WREG32(SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX), data);
0591     mutex_unlock(&adev->grbm_idx_mutex);
0592 
0593     return 0;
0594 }
0595 
0596 static void set_vm_context_page_table_base_v11(struct amdgpu_device *adev,
0597         uint32_t vmid, uint64_t page_table_base)
0598 {
0599     if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
0600         pr_err("trying to set page table base for wrong VMID %u\n",
0601                vmid);
0602         return;
0603     }
0604 
0605     /* SDMA is on gfxhub as well for gfx11 adapters */
0606     adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
0607 }
0608 
0609 const struct kfd2kgd_calls gfx_v11_kfd2kgd = {
0610     .program_sh_mem_settings = program_sh_mem_settings_v11,
0611     .set_pasid_vmid_mapping = set_pasid_vmid_mapping_v11,
0612     .init_interrupts = init_interrupts_v11,
0613     .hqd_load = hqd_load_v11,
0614     .hiq_mqd_load = hiq_mqd_load_v11,
0615     .hqd_sdma_load = hqd_sdma_load_v11,
0616     .hqd_dump = hqd_dump_v11,
0617     .hqd_sdma_dump = hqd_sdma_dump_v11,
0618     .hqd_is_occupied = hqd_is_occupied_v11,
0619     .hqd_sdma_is_occupied = hqd_sdma_is_occupied_v11,
0620     .hqd_destroy = hqd_destroy_v11,
0621     .hqd_sdma_destroy = hqd_sdma_destroy_v11,
0622     .wave_control_execute = wave_control_execute_v11,
0623     .get_atc_vmid_pasid_mapping_info = NULL,
0624     .set_vm_context_page_table_base = set_vm_context_page_table_base_v11,
0625 };