Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright 2016 Advanced Micro Devices, Inc.
0003  * All Rights Reserved.
0004  *
0005  * Permission is hereby granted, free of charge, to any person obtaining a
0006  * copy of this software and associated documentation files (the
0007  * "Software"), to deal in the Software without restriction, including
0008  * without limitation the rights to use, copy, modify, merge, publish,
0009  * distribute, sub license, and/or sell copies of the Software, and to
0010  * permit persons to whom the Software is furnished to do so, subject to
0011  * the following conditions:
0012  *
0013  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
0014  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
0015  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
0016  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
0017  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
0018  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
0019  * USE OR OTHER DEALINGS IN THE SOFTWARE.
0020  *
0021  * The above copyright notice and this permission notice (including the
0022  * next paragraph) shall be included in all copies or substantial portions
0023  * of the Software.
0024  *
0025  */
0026 
0027 #include <linux/firmware.h>
0028 #include <drm/drm_drv.h>
0029 
0030 #include "amdgpu.h"
0031 #include "amdgpu_vce.h"
0032 #include "soc15.h"
0033 #include "soc15d.h"
0034 #include "soc15_common.h"
0035 #include "mmsch_v1_0.h"
0036 
0037 #include "vce/vce_4_0_offset.h"
0038 #include "vce/vce_4_0_default.h"
0039 #include "vce/vce_4_0_sh_mask.h"
0040 #include "mmhub/mmhub_1_0_offset.h"
0041 #include "mmhub/mmhub_1_0_sh_mask.h"
0042 
0043 #include "ivsrcid/vce/irqsrcs_vce_4_0.h"
0044 
0045 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK   0x02
0046 
0047 #define VCE_V4_0_FW_SIZE    (384 * 1024)
0048 #define VCE_V4_0_STACK_SIZE (64 * 1024)
0049 #define VCE_V4_0_DATA_SIZE  ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
0050 
0051 static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
0052 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
0053 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
0054 
0055 /**
0056  * vce_v4_0_ring_get_rptr - get read pointer
0057  *
0058  * @ring: amdgpu_ring pointer
0059  *
0060  * Returns the current hardware read pointer
0061  */
0062 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
0063 {
0064     struct amdgpu_device *adev = ring->adev;
0065 
0066     if (ring->me == 0)
0067         return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
0068     else if (ring->me == 1)
0069         return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
0070     else
0071         return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
0072 }
0073 
0074 /**
0075  * vce_v4_0_ring_get_wptr - get write pointer
0076  *
0077  * @ring: amdgpu_ring pointer
0078  *
0079  * Returns the current hardware write pointer
0080  */
0081 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
0082 {
0083     struct amdgpu_device *adev = ring->adev;
0084 
0085     if (ring->use_doorbell)
0086         return *ring->wptr_cpu_addr;
0087 
0088     if (ring->me == 0)
0089         return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
0090     else if (ring->me == 1)
0091         return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
0092     else
0093         return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
0094 }
0095 
0096 /**
0097  * vce_v4_0_ring_set_wptr - set write pointer
0098  *
0099  * @ring: amdgpu_ring pointer
0100  *
0101  * Commits the write pointer to the hardware
0102  */
0103 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
0104 {
0105     struct amdgpu_device *adev = ring->adev;
0106 
0107     if (ring->use_doorbell) {
0108         /* XXX check if swapping is necessary on BE */
0109         *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
0110         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
0111         return;
0112     }
0113 
0114     if (ring->me == 0)
0115         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
0116             lower_32_bits(ring->wptr));
0117     else if (ring->me == 1)
0118         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
0119             lower_32_bits(ring->wptr));
0120     else
0121         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
0122             lower_32_bits(ring->wptr));
0123 }
0124 
0125 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
0126 {
0127     int i, j;
0128 
0129     for (i = 0; i < 10; ++i) {
0130         for (j = 0; j < 100; ++j) {
0131             uint32_t status =
0132                 RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
0133 
0134             if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
0135                 return 0;
0136             mdelay(10);
0137         }
0138 
0139         DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
0140         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
0141                 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
0142                 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
0143         mdelay(10);
0144         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
0145                 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
0146         mdelay(10);
0147 
0148     }
0149 
0150     return -ETIMEDOUT;
0151 }
0152 
0153 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
0154                 struct amdgpu_mm_table *table)
0155 {
0156     uint32_t data = 0, loop;
0157     uint64_t addr = table->gpu_addr;
0158     struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
0159     uint32_t size;
0160 
0161     size = header->header_size + header->vce_table_size + header->uvd_table_size;
0162 
0163     /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
0164     WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
0165     WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
0166 
0167     /* 2, update vmid of descriptor */
0168     data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
0169     data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
0170     data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
0171     WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
0172 
0173     /* 3, notify mmsch about the size of this descriptor */
0174     WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
0175 
0176     /* 4, set resp to zero */
0177     WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
0178 
0179     WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
0180     *adev->vce.ring[0].wptr_cpu_addr = 0;
0181     adev->vce.ring[0].wptr = 0;
0182     adev->vce.ring[0].wptr_old = 0;
0183 
0184     /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
0185     WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
0186 
0187     data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
0188     loop = 1000;
0189     while ((data & 0x10000002) != 0x10000002) {
0190         udelay(10);
0191         data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
0192         loop--;
0193         if (!loop)
0194             break;
0195     }
0196 
0197     if (!loop) {
0198         dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
0199         return -EBUSY;
0200     }
0201 
0202     return 0;
0203 }
0204 
0205 static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
0206 {
0207     struct amdgpu_ring *ring;
0208     uint32_t offset, size;
0209     uint32_t table_size = 0;
0210     struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
0211     struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
0212     struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
0213     struct mmsch_v1_0_cmd_end end = { { 0 } };
0214     uint32_t *init_table = adev->virt.mm_table.cpu_addr;
0215     struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
0216 
0217     direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
0218     direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
0219     direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
0220     end.cmd_header.command_type = MMSCH_COMMAND__END;
0221 
0222     if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
0223         header->version = MMSCH_VERSION;
0224         header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
0225 
0226         if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
0227             header->vce_table_offset = header->header_size;
0228         else
0229             header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
0230 
0231         init_table += header->vce_table_offset;
0232 
0233         ring = &adev->vce.ring[0];
0234         MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
0235                         lower_32_bits(ring->gpu_addr));
0236         MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
0237                         upper_32_bits(ring->gpu_addr));
0238         MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
0239                         ring->ring_size / 4);
0240 
0241         /* BEGING OF MC_RESUME */
0242         MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
0243         MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
0244         MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
0245         MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
0246         MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
0247 
0248         offset = AMDGPU_VCE_FIRMWARE_OFFSET;
0249         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
0250             uint32_t low = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
0251             uint32_t hi = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi;
0252             uint64_t tmr_mc_addr = (uint64_t)(hi) << 32 | low;
0253 
0254             MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
0255                         mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), tmr_mc_addr >> 8);
0256             MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
0257                         mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
0258                         (tmr_mc_addr >> 40) & 0xff);
0259             MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
0260         } else {
0261             MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
0262                         mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
0263                         adev->vce.gpu_addr >> 8);
0264             MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
0265                         mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
0266                         (adev->vce.gpu_addr >> 40) & 0xff);
0267             MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
0268                         offset & ~0x0f000000);
0269 
0270         }
0271         MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
0272                         mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
0273                         adev->vce.gpu_addr >> 8);
0274         MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
0275                         mmVCE_LMI_VCPU_CACHE_64BIT_BAR1),
0276                         (adev->vce.gpu_addr >> 40) & 0xff);
0277         MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
0278                         mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
0279                         adev->vce.gpu_addr >> 8);
0280         MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
0281                         mmVCE_LMI_VCPU_CACHE_64BIT_BAR2),
0282                         (adev->vce.gpu_addr >> 40) & 0xff);
0283 
0284         size = VCE_V4_0_FW_SIZE;
0285         MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
0286 
0287         offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
0288         size = VCE_V4_0_STACK_SIZE;
0289         MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
0290                     (offset & ~0x0f000000) | (1 << 24));
0291         MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
0292 
0293         offset += size;
0294         size = VCE_V4_0_DATA_SIZE;
0295         MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
0296                     (offset & ~0x0f000000) | (2 << 24));
0297         MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
0298 
0299         MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
0300         MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
0301                            VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
0302                            VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
0303 
0304         /* end of MC_RESUME */
0305         MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
0306                            VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
0307         MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
0308                            ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
0309         MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
0310                            ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
0311 
0312         MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
0313                           VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
0314                           VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
0315 
0316         /* clear BUSY flag */
0317         MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
0318                            ~VCE_STATUS__JOB_BUSY_MASK, 0);
0319 
0320         /* add end packet */
0321         memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
0322         table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
0323         header->vce_table_size = table_size;
0324     }
0325 
0326     return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
0327 }
0328 
0329 /**
0330  * vce_v4_0_start - start VCE block
0331  *
0332  * @adev: amdgpu_device pointer
0333  *
0334  * Setup and start the VCE block
0335  */
0336 static int vce_v4_0_start(struct amdgpu_device *adev)
0337 {
0338     struct amdgpu_ring *ring;
0339     int r;
0340 
0341     ring = &adev->vce.ring[0];
0342 
0343     WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
0344     WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
0345     WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
0346     WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
0347     WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
0348 
0349     ring = &adev->vce.ring[1];
0350 
0351     WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
0352     WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
0353     WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
0354     WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
0355     WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
0356 
0357     ring = &adev->vce.ring[2];
0358 
0359     WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
0360     WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
0361     WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
0362     WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
0363     WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
0364 
0365     vce_v4_0_mc_resume(adev);
0366     WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
0367             ~VCE_STATUS__JOB_BUSY_MASK);
0368 
0369     WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
0370 
0371     WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
0372             ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
0373     mdelay(100);
0374 
0375     r = vce_v4_0_firmware_loaded(adev);
0376 
0377     /* clear BUSY flag */
0378     WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
0379 
0380     if (r) {
0381         DRM_ERROR("VCE not responding, giving up!!!\n");
0382         return r;
0383     }
0384 
0385     return 0;
0386 }
0387 
0388 static int vce_v4_0_stop(struct amdgpu_device *adev)
0389 {
0390 
0391     /* Disable VCPU */
0392     WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
0393 
0394     /* hold on ECPU */
0395     WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
0396             VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
0397             ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
0398 
0399     /* clear VCE_STATUS */
0400     WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0);
0401 
0402     /* Set Clock-Gating off */
0403     /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
0404         vce_v4_0_set_vce_sw_clock_gating(adev, false);
0405     */
0406 
0407     return 0;
0408 }
0409 
0410 static int vce_v4_0_early_init(void *handle)
0411 {
0412     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
0413 
0414     if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
0415         adev->vce.num_rings = 1;
0416     else
0417         adev->vce.num_rings = 3;
0418 
0419     vce_v4_0_set_ring_funcs(adev);
0420     vce_v4_0_set_irq_funcs(adev);
0421 
0422     return 0;
0423 }
0424 
0425 static int vce_v4_0_sw_init(void *handle)
0426 {
0427     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
0428     struct amdgpu_ring *ring;
0429 
0430     unsigned size;
0431     int r, i;
0432 
0433     r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
0434     if (r)
0435         return r;
0436 
0437     size  = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE;
0438     if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
0439         size += VCE_V4_0_FW_SIZE;
0440 
0441     r = amdgpu_vce_sw_init(adev, size);
0442     if (r)
0443         return r;
0444 
0445     if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
0446         const struct common_firmware_header *hdr;
0447         unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
0448 
0449         adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL);
0450         if (!adev->vce.saved_bo)
0451             return -ENOMEM;
0452 
0453         hdr = (const struct common_firmware_header *)adev->vce.fw->data;
0454         adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
0455         adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
0456         adev->firmware.fw_size +=
0457             ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
0458         DRM_INFO("PSP loading VCE firmware\n");
0459     } else {
0460         r = amdgpu_vce_resume(adev);
0461         if (r)
0462             return r;
0463     }
0464 
0465     for (i = 0; i < adev->vce.num_rings; i++) {
0466         enum amdgpu_ring_priority_level hw_prio = amdgpu_vce_get_ring_prio(i);
0467 
0468         ring = &adev->vce.ring[i];
0469         sprintf(ring->name, "vce%d", i);
0470         if (amdgpu_sriov_vf(adev)) {
0471             /* DOORBELL only works under SRIOV */
0472             ring->use_doorbell = true;
0473 
0474             /* currently only use the first encoding ring for sriov,
0475              * so set unused location for other unused rings.
0476              */
0477             if (i == 0)
0478                 ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring0_1 * 2;
0479             else
0480                 ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring2_3 * 2 + 1;
0481         }
0482         r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0,
0483                      hw_prio, NULL);
0484         if (r)
0485             return r;
0486     }
0487 
0488 
0489     r = amdgpu_vce_entity_init(adev);
0490     if (r)
0491         return r;
0492 
0493     r = amdgpu_virt_alloc_mm_table(adev);
0494     if (r)
0495         return r;
0496 
0497     return r;
0498 }
0499 
0500 static int vce_v4_0_sw_fini(void *handle)
0501 {
0502     int r;
0503     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
0504 
0505     /* free MM table */
0506     amdgpu_virt_free_mm_table(adev);
0507 
0508     if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
0509         kvfree(adev->vce.saved_bo);
0510         adev->vce.saved_bo = NULL;
0511     }
0512 
0513     r = amdgpu_vce_suspend(adev);
0514     if (r)
0515         return r;
0516 
0517     return amdgpu_vce_sw_fini(adev);
0518 }
0519 
0520 static int vce_v4_0_hw_init(void *handle)
0521 {
0522     int r, i;
0523     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
0524 
0525     if (amdgpu_sriov_vf(adev))
0526         r = vce_v4_0_sriov_start(adev);
0527     else
0528         r = vce_v4_0_start(adev);
0529     if (r)
0530         return r;
0531 
0532     for (i = 0; i < adev->vce.num_rings; i++) {
0533         r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
0534         if (r)
0535             return r;
0536     }
0537 
0538     DRM_INFO("VCE initialized successfully.\n");
0539 
0540     return 0;
0541 }
0542 
0543 static int vce_v4_0_hw_fini(void *handle)
0544 {
0545     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
0546 
0547     cancel_delayed_work_sync(&adev->vce.idle_work);
0548 
0549     if (!amdgpu_sriov_vf(adev)) {
0550         /* vce_v4_0_wait_for_idle(handle); */
0551         vce_v4_0_stop(adev);
0552     } else {
0553         /* full access mode, so don't touch any VCE register */
0554         DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
0555     }
0556 
0557     return 0;
0558 }
0559 
0560 static int vce_v4_0_suspend(void *handle)
0561 {
0562     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
0563     int r, idx;
0564 
0565     if (adev->vce.vcpu_bo == NULL)
0566         return 0;
0567 
0568     if (drm_dev_enter(adev_to_drm(adev), &idx)) {
0569         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
0570             unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
0571             void *ptr = adev->vce.cpu_addr;
0572 
0573             memcpy_fromio(adev->vce.saved_bo, ptr, size);
0574         }
0575         drm_dev_exit(idx);
0576     }
0577 
0578     /*
0579      * Proper cleanups before halting the HW engine:
0580      *   - cancel the delayed idle work
0581      *   - enable powergating
0582      *   - enable clockgating
0583      *   - disable dpm
0584      *
0585      * TODO: to align with the VCN implementation, move the
0586      * jobs for clockgating/powergating/dpm setting to
0587      * ->set_powergating_state().
0588      */
0589     cancel_delayed_work_sync(&adev->vce.idle_work);
0590 
0591     if (adev->pm.dpm_enabled) {
0592         amdgpu_dpm_enable_vce(adev, false);
0593     } else {
0594         amdgpu_asic_set_vce_clocks(adev, 0, 0);
0595         amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
0596                                AMD_PG_STATE_GATE);
0597         amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
0598                                AMD_CG_STATE_GATE);
0599     }
0600 
0601     r = vce_v4_0_hw_fini(adev);
0602     if (r)
0603         return r;
0604 
0605     return amdgpu_vce_suspend(adev);
0606 }
0607 
0608 static int vce_v4_0_resume(void *handle)
0609 {
0610     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
0611     int r, idx;
0612 
0613     if (adev->vce.vcpu_bo == NULL)
0614         return -EINVAL;
0615 
0616     if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
0617 
0618         if (drm_dev_enter(adev_to_drm(adev), &idx)) {
0619             unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
0620             void *ptr = adev->vce.cpu_addr;
0621 
0622             memcpy_toio(ptr, adev->vce.saved_bo, size);
0623             drm_dev_exit(idx);
0624         }
0625     } else {
0626         r = amdgpu_vce_resume(adev);
0627         if (r)
0628             return r;
0629     }
0630 
0631     return vce_v4_0_hw_init(adev);
0632 }
0633 
0634 static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
0635 {
0636     uint32_t offset, size;
0637     uint64_t tmr_mc_addr;
0638 
0639     WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
0640     WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
0641     WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
0642     WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
0643 
0644     WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
0645     WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
0646     WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
0647     WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
0648     WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
0649 
0650     offset = AMDGPU_VCE_FIRMWARE_OFFSET;
0651 
0652     if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
0653         tmr_mc_addr = (uint64_t)(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi) << 32 |
0654                                         adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
0655         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
0656             (tmr_mc_addr >> 8));
0657         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
0658             (tmr_mc_addr >> 40) & 0xff);
0659         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
0660     } else {
0661         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
0662             (adev->vce.gpu_addr >> 8));
0663         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
0664             (adev->vce.gpu_addr >> 40) & 0xff);
0665         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
0666     }
0667 
0668     size = VCE_V4_0_FW_SIZE;
0669     WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
0670 
0671     WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
0672     WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
0673     offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
0674     size = VCE_V4_0_STACK_SIZE;
0675     WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
0676     WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
0677 
0678     WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
0679     WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
0680     offset += size;
0681     size = VCE_V4_0_DATA_SIZE;
0682     WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
0683     WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
0684 
0685     WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
0686     WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
0687             VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
0688             ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
0689 }
0690 
0691 static int vce_v4_0_set_clockgating_state(void *handle,
0692                       enum amd_clockgating_state state)
0693 {
0694     /* needed for driver unload*/
0695     return 0;
0696 }
0697 
0698 #if 0
0699 static bool vce_v4_0_is_idle(void *handle)
0700 {
0701     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
0702     u32 mask = 0;
0703 
0704     mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
0705     mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
0706 
0707     return !(RREG32(mmSRBM_STATUS2) & mask);
0708 }
0709 
0710 static int vce_v4_0_wait_for_idle(void *handle)
0711 {
0712     unsigned i;
0713     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
0714 
0715     for (i = 0; i < adev->usec_timeout; i++)
0716         if (vce_v4_0_is_idle(handle))
0717             return 0;
0718 
0719     return -ETIMEDOUT;
0720 }
0721 
0722 #define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
0723 #define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
0724 #define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
0725 #define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
0726                       VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
0727 
0728 static bool vce_v4_0_check_soft_reset(void *handle)
0729 {
0730     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
0731     u32 srbm_soft_reset = 0;
0732 
0733     /* According to VCE team , we should use VCE_STATUS instead
0734      * SRBM_STATUS.VCE_BUSY bit for busy status checking.
0735      * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
0736      * instance's registers are accessed
0737      * (0 for 1st instance, 10 for 2nd instance).
0738      *
0739      *VCE_STATUS
0740      *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
0741      *|----+----+-----------+----+----+----+----------+---------+----|
0742      *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
0743      *
0744      * VCE team suggest use bit 3--bit 6 for busy status check
0745      */
0746     mutex_lock(&adev->grbm_idx_mutex);
0747     WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
0748     if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
0749         srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
0750         srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
0751     }
0752     WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
0753     if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
0754         srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
0755         srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
0756     }
0757     WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
0758     mutex_unlock(&adev->grbm_idx_mutex);
0759 
0760     if (srbm_soft_reset) {
0761         adev->vce.srbm_soft_reset = srbm_soft_reset;
0762         return true;
0763     } else {
0764         adev->vce.srbm_soft_reset = 0;
0765         return false;
0766     }
0767 }
0768 
0769 static int vce_v4_0_soft_reset(void *handle)
0770 {
0771     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
0772     u32 srbm_soft_reset;
0773 
0774     if (!adev->vce.srbm_soft_reset)
0775         return 0;
0776     srbm_soft_reset = adev->vce.srbm_soft_reset;
0777 
0778     if (srbm_soft_reset) {
0779         u32 tmp;
0780 
0781         tmp = RREG32(mmSRBM_SOFT_RESET);
0782         tmp |= srbm_soft_reset;
0783         dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
0784         WREG32(mmSRBM_SOFT_RESET, tmp);
0785         tmp = RREG32(mmSRBM_SOFT_RESET);
0786 
0787         udelay(50);
0788 
0789         tmp &= ~srbm_soft_reset;
0790         WREG32(mmSRBM_SOFT_RESET, tmp);
0791         tmp = RREG32(mmSRBM_SOFT_RESET);
0792 
0793         /* Wait a little for things to settle down */
0794         udelay(50);
0795     }
0796 
0797     return 0;
0798 }
0799 
0800 static int vce_v4_0_pre_soft_reset(void *handle)
0801 {
0802     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
0803 
0804     if (!adev->vce.srbm_soft_reset)
0805         return 0;
0806 
0807     mdelay(5);
0808 
0809     return vce_v4_0_suspend(adev);
0810 }
0811 
0812 
0813 static int vce_v4_0_post_soft_reset(void *handle)
0814 {
0815     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
0816 
0817     if (!adev->vce.srbm_soft_reset)
0818         return 0;
0819 
0820     mdelay(5);
0821 
0822     return vce_v4_0_resume(adev);
0823 }
0824 
0825 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
0826 {
0827     u32 tmp, data;
0828 
0829     tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
0830     if (override)
0831         data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
0832     else
0833         data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
0834 
0835     if (tmp != data)
0836         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
0837 }
0838 
0839 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
0840                          bool gated)
0841 {
0842     u32 data;
0843 
0844     /* Set Override to disable Clock Gating */
0845     vce_v4_0_override_vce_clock_gating(adev, true);
0846 
0847     /* This function enables MGCG which is controlled by firmware.
0848        With the clocks in the gated state the core is still
0849        accessible but the firmware will throttle the clocks on the
0850        fly as necessary.
0851     */
0852     if (gated) {
0853         data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
0854         data |= 0x1ff;
0855         data &= ~0xef0000;
0856         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
0857 
0858         data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
0859         data |= 0x3ff000;
0860         data &= ~0xffc00000;
0861         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
0862 
0863         data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
0864         data |= 0x2;
0865         data &= ~0x00010000;
0866         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
0867 
0868         data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
0869         data |= 0x37f;
0870         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
0871 
0872         data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
0873         data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
0874             VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
0875             VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
0876             0x8;
0877         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
0878     } else {
0879         data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
0880         data &= ~0x80010;
0881         data |= 0xe70008;
0882         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
0883 
0884         data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
0885         data |= 0xffc00000;
0886         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
0887 
0888         data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
0889         data |= 0x10000;
0890         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
0891 
0892         data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
0893         data &= ~0xffc00000;
0894         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
0895 
0896         data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
0897         data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
0898               VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
0899               VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
0900               0x8);
0901         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
0902     }
0903     vce_v4_0_override_vce_clock_gating(adev, false);
0904 }
0905 
0906 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
0907 {
0908     u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
0909 
0910     if (enable)
0911         tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
0912     else
0913         tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
0914 
0915     WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
0916 }
0917 
0918 static int vce_v4_0_set_clockgating_state(void *handle,
0919                       enum amd_clockgating_state state)
0920 {
0921     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
0922     bool enable = (state == AMD_CG_STATE_GATE);
0923     int i;
0924 
0925     if ((adev->asic_type == CHIP_POLARIS10) ||
0926         (adev->asic_type == CHIP_TONGA) ||
0927         (adev->asic_type == CHIP_FIJI))
0928         vce_v4_0_set_bypass_mode(adev, enable);
0929 
0930     if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
0931         return 0;
0932 
0933     mutex_lock(&adev->grbm_idx_mutex);
0934     for (i = 0; i < 2; i++) {
0935         /* Program VCE Instance 0 or 1 if not harvested */
0936         if (adev->vce.harvest_config & (1 << i))
0937             continue;
0938 
0939         WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
0940 
0941         if (enable) {
0942             /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
0943             uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
0944             data &= ~(0xf | 0xff0);
0945             data |= ((0x0 << 0) | (0x04 << 4));
0946             WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
0947 
0948             /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
0949             data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
0950             data &= ~(0xf | 0xff0);
0951             data |= ((0x0 << 0) | (0x04 << 4));
0952             WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
0953         }
0954 
0955         vce_v4_0_set_vce_sw_clock_gating(adev, enable);
0956     }
0957 
0958     WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
0959     mutex_unlock(&adev->grbm_idx_mutex);
0960 
0961     return 0;
0962 }
0963 #endif
0964 
0965 static int vce_v4_0_set_powergating_state(void *handle,
0966                       enum amd_powergating_state state)
0967 {
0968     /* This doesn't actually powergate the VCE block.
0969      * That's done in the dpm code via the SMC.  This
0970      * just re-inits the block as necessary.  The actual
0971      * gating still happens in the dpm code.  We should
0972      * revisit this when there is a cleaner line between
0973      * the smc and the hw blocks
0974      */
0975     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
0976 
0977     if (state == AMD_PG_STATE_GATE)
0978         return vce_v4_0_stop(adev);
0979     else
0980         return vce_v4_0_start(adev);
0981 }
0982 
0983 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
0984                     struct amdgpu_ib *ib, uint32_t flags)
0985 {
0986     unsigned vmid = AMDGPU_JOB_GET_VMID(job);
0987 
0988     amdgpu_ring_write(ring, VCE_CMD_IB_VM);
0989     amdgpu_ring_write(ring, vmid);
0990     amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
0991     amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
0992     amdgpu_ring_write(ring, ib->length_dw);
0993 }
0994 
0995 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
0996             u64 seq, unsigned flags)
0997 {
0998     WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
0999 
1000     amdgpu_ring_write(ring, VCE_CMD_FENCE);
1001     amdgpu_ring_write(ring, addr);
1002     amdgpu_ring_write(ring, upper_32_bits(addr));
1003     amdgpu_ring_write(ring, seq);
1004     amdgpu_ring_write(ring, VCE_CMD_TRAP);
1005 }
1006 
1007 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
1008 {
1009     amdgpu_ring_write(ring, VCE_CMD_END);
1010 }
1011 
1012 static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
1013                    uint32_t val, uint32_t mask)
1014 {
1015     amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
1016     amdgpu_ring_write(ring, reg << 2);
1017     amdgpu_ring_write(ring, mask);
1018     amdgpu_ring_write(ring, val);
1019 }
1020 
1021 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
1022                    unsigned int vmid, uint64_t pd_addr)
1023 {
1024     struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
1025 
1026     pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
1027 
1028     /* wait for reg writes */
1029     vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 +
1030                    vmid * hub->ctx_addr_distance,
1031                    lower_32_bits(pd_addr), 0xffffffff);
1032 }
1033 
1034 static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring,
1035                    uint32_t reg, uint32_t val)
1036 {
1037     amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
1038     amdgpu_ring_write(ring, reg << 2);
1039     amdgpu_ring_write(ring, val);
1040 }
1041 
1042 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
1043                     struct amdgpu_irq_src *source,
1044                     unsigned type,
1045                     enum amdgpu_interrupt_state state)
1046 {
1047     uint32_t val = 0;
1048 
1049     if (!amdgpu_sriov_vf(adev)) {
1050         if (state == AMDGPU_IRQ_STATE_ENABLE)
1051             val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
1052 
1053         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
1054                 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
1055     }
1056     return 0;
1057 }
1058 
1059 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
1060                       struct amdgpu_irq_src *source,
1061                       struct amdgpu_iv_entry *entry)
1062 {
1063     DRM_DEBUG("IH: VCE\n");
1064 
1065     switch (entry->src_data[0]) {
1066     case 0:
1067     case 1:
1068     case 2:
1069         amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
1070         break;
1071     default:
1072         DRM_ERROR("Unhandled interrupt: %d %d\n",
1073               entry->src_id, entry->src_data[0]);
1074         break;
1075     }
1076 
1077     return 0;
1078 }
1079 
1080 const struct amd_ip_funcs vce_v4_0_ip_funcs = {
1081     .name = "vce_v4_0",
1082     .early_init = vce_v4_0_early_init,
1083     .late_init = NULL,
1084     .sw_init = vce_v4_0_sw_init,
1085     .sw_fini = vce_v4_0_sw_fini,
1086     .hw_init = vce_v4_0_hw_init,
1087     .hw_fini = vce_v4_0_hw_fini,
1088     .suspend = vce_v4_0_suspend,
1089     .resume = vce_v4_0_resume,
1090     .is_idle = NULL /* vce_v4_0_is_idle */,
1091     .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1092     .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1093     .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1094     .soft_reset = NULL /* vce_v4_0_soft_reset */,
1095     .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1096     .set_clockgating_state = vce_v4_0_set_clockgating_state,
1097     .set_powergating_state = vce_v4_0_set_powergating_state,
1098 };
1099 
1100 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1101     .type = AMDGPU_RING_TYPE_VCE,
1102     .align_mask = 0x3f,
1103     .nop = VCE_CMD_NO_OP,
1104     .support_64bit_ptrs = false,
1105     .no_user_fence = true,
1106     .vmhub = AMDGPU_MMHUB_0,
1107     .get_rptr = vce_v4_0_ring_get_rptr,
1108     .get_wptr = vce_v4_0_ring_get_wptr,
1109     .set_wptr = vce_v4_0_ring_set_wptr,
1110     .parse_cs = amdgpu_vce_ring_parse_cs_vm,
1111     .emit_frame_size =
1112         SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1113         SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1114         4 + /* vce_v4_0_emit_vm_flush */
1115         5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1116         1, /* vce_v4_0_ring_insert_end */
1117     .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1118     .emit_ib = vce_v4_0_ring_emit_ib,
1119     .emit_vm_flush = vce_v4_0_emit_vm_flush,
1120     .emit_fence = vce_v4_0_ring_emit_fence,
1121     .test_ring = amdgpu_vce_ring_test_ring,
1122     .test_ib = amdgpu_vce_ring_test_ib,
1123     .insert_nop = amdgpu_ring_insert_nop,
1124     .insert_end = vce_v4_0_ring_insert_end,
1125     .pad_ib = amdgpu_ring_generic_pad_ib,
1126     .begin_use = amdgpu_vce_ring_begin_use,
1127     .end_use = amdgpu_vce_ring_end_use,
1128     .emit_wreg = vce_v4_0_emit_wreg,
1129     .emit_reg_wait = vce_v4_0_emit_reg_wait,
1130     .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1131 };
1132 
1133 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1134 {
1135     int i;
1136 
1137     for (i = 0; i < adev->vce.num_rings; i++) {
1138         adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1139         adev->vce.ring[i].me = i;
1140     }
1141     DRM_INFO("VCE enabled in VM mode\n");
1142 }
1143 
1144 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1145     .set = vce_v4_0_set_interrupt_state,
1146     .process = vce_v4_0_process_interrupt,
1147 };
1148 
1149 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1150 {
1151     adev->vce.irq.num_types = 1;
1152     adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1153 };
1154 
1155 const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1156 {
1157     .type = AMD_IP_BLOCK_TYPE_VCE,
1158     .major = 4,
1159     .minor = 0,
1160     .rev = 0,
1161     .funcs = &vce_v4_0_ip_funcs,
1162 };