Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright 2013 Advanced Micro Devices, Inc.
0003  * All Rights Reserved.
0004  *
0005  * Permission is hereby granted, free of charge, to any person obtaining a
0006  * copy of this software and associated documentation files (the
0007  * "Software"), to deal in the Software without restriction, including
0008  * without limitation the rights to use, copy, modify, merge, publish,
0009  * distribute, sub license, and/or sell copies of the Software, and to
0010  * permit persons to whom the Software is furnished to do so, subject to
0011  * the following conditions:
0012  *
0013  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
0014  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
0015  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
0016  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
0017  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
0018  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
0019  * USE OR OTHER DEALINGS IN THE SOFTWARE.
0020  *
0021  * The above copyright notice and this permission notice (including the
0022  * next paragraph) shall be included in all copies or substantial portions
0023  * of the Software.
0024  *
0025  * Authors: Christian König <christian.koenig@amd.com>
0026  */
0027 
0028 #include <linux/firmware.h>
0029 #include <linux/module.h>
0030 
0031 #include <drm/drm.h>
0032 #include <drm/drm_drv.h>
0033 
0034 #include "amdgpu.h"
0035 #include "amdgpu_pm.h"
0036 #include "amdgpu_vce.h"
0037 #include "amdgpu_cs.h"
0038 #include "cikd.h"
0039 
0040 /* 1 second timeout */
0041 #define VCE_IDLE_TIMEOUT    msecs_to_jiffies(1000)
0042 
0043 /* Firmware Names */
0044 #ifdef CONFIG_DRM_AMDGPU_CIK
0045 #define FIRMWARE_BONAIRE    "amdgpu/bonaire_vce.bin"
0046 #define FIRMWARE_KABINI "amdgpu/kabini_vce.bin"
0047 #define FIRMWARE_KAVERI "amdgpu/kaveri_vce.bin"
0048 #define FIRMWARE_HAWAII "amdgpu/hawaii_vce.bin"
0049 #define FIRMWARE_MULLINS    "amdgpu/mullins_vce.bin"
0050 #endif
0051 #define FIRMWARE_TONGA      "amdgpu/tonga_vce.bin"
0052 #define FIRMWARE_CARRIZO    "amdgpu/carrizo_vce.bin"
0053 #define FIRMWARE_FIJI       "amdgpu/fiji_vce.bin"
0054 #define FIRMWARE_STONEY     "amdgpu/stoney_vce.bin"
0055 #define FIRMWARE_POLARIS10  "amdgpu/polaris10_vce.bin"
0056 #define FIRMWARE_POLARIS11  "amdgpu/polaris11_vce.bin"
0057 #define FIRMWARE_POLARIS12  "amdgpu/polaris12_vce.bin"
0058 #define FIRMWARE_VEGAM      "amdgpu/vegam_vce.bin"
0059 
0060 #define FIRMWARE_VEGA10     "amdgpu/vega10_vce.bin"
0061 #define FIRMWARE_VEGA12     "amdgpu/vega12_vce.bin"
0062 #define FIRMWARE_VEGA20     "amdgpu/vega20_vce.bin"
0063 
0064 #ifdef CONFIG_DRM_AMDGPU_CIK
0065 MODULE_FIRMWARE(FIRMWARE_BONAIRE);
0066 MODULE_FIRMWARE(FIRMWARE_KABINI);
0067 MODULE_FIRMWARE(FIRMWARE_KAVERI);
0068 MODULE_FIRMWARE(FIRMWARE_HAWAII);
0069 MODULE_FIRMWARE(FIRMWARE_MULLINS);
0070 #endif
0071 MODULE_FIRMWARE(FIRMWARE_TONGA);
0072 MODULE_FIRMWARE(FIRMWARE_CARRIZO);
0073 MODULE_FIRMWARE(FIRMWARE_FIJI);
0074 MODULE_FIRMWARE(FIRMWARE_STONEY);
0075 MODULE_FIRMWARE(FIRMWARE_POLARIS10);
0076 MODULE_FIRMWARE(FIRMWARE_POLARIS11);
0077 MODULE_FIRMWARE(FIRMWARE_POLARIS12);
0078 MODULE_FIRMWARE(FIRMWARE_VEGAM);
0079 
0080 MODULE_FIRMWARE(FIRMWARE_VEGA10);
0081 MODULE_FIRMWARE(FIRMWARE_VEGA12);
0082 MODULE_FIRMWARE(FIRMWARE_VEGA20);
0083 
0084 static void amdgpu_vce_idle_work_handler(struct work_struct *work);
0085 static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
0086                      struct dma_fence **fence);
0087 static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
0088                       bool direct, struct dma_fence **fence);
0089 
0090 /**
0091  * amdgpu_vce_sw_init - allocate memory, load vce firmware
0092  *
0093  * @adev: amdgpu_device pointer
0094  * @size: size for the new BO
0095  *
0096  * First step to get VCE online, allocate memory and load the firmware
0097  */
0098 int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
0099 {
0100     const char *fw_name;
0101     const struct common_firmware_header *hdr;
0102     unsigned ucode_version, version_major, version_minor, binary_id;
0103     int i, r;
0104 
0105     switch (adev->asic_type) {
0106 #ifdef CONFIG_DRM_AMDGPU_CIK
0107     case CHIP_BONAIRE:
0108         fw_name = FIRMWARE_BONAIRE;
0109         break;
0110     case CHIP_KAVERI:
0111         fw_name = FIRMWARE_KAVERI;
0112         break;
0113     case CHIP_KABINI:
0114         fw_name = FIRMWARE_KABINI;
0115         break;
0116     case CHIP_HAWAII:
0117         fw_name = FIRMWARE_HAWAII;
0118         break;
0119     case CHIP_MULLINS:
0120         fw_name = FIRMWARE_MULLINS;
0121         break;
0122 #endif
0123     case CHIP_TONGA:
0124         fw_name = FIRMWARE_TONGA;
0125         break;
0126     case CHIP_CARRIZO:
0127         fw_name = FIRMWARE_CARRIZO;
0128         break;
0129     case CHIP_FIJI:
0130         fw_name = FIRMWARE_FIJI;
0131         break;
0132     case CHIP_STONEY:
0133         fw_name = FIRMWARE_STONEY;
0134         break;
0135     case CHIP_POLARIS10:
0136         fw_name = FIRMWARE_POLARIS10;
0137         break;
0138     case CHIP_POLARIS11:
0139         fw_name = FIRMWARE_POLARIS11;
0140         break;
0141     case CHIP_POLARIS12:
0142         fw_name = FIRMWARE_POLARIS12;
0143         break;
0144     case CHIP_VEGAM:
0145         fw_name = FIRMWARE_VEGAM;
0146         break;
0147     case CHIP_VEGA10:
0148         fw_name = FIRMWARE_VEGA10;
0149         break;
0150     case CHIP_VEGA12:
0151         fw_name = FIRMWARE_VEGA12;
0152         break;
0153     case CHIP_VEGA20:
0154         fw_name = FIRMWARE_VEGA20;
0155         break;
0156 
0157     default:
0158         return -EINVAL;
0159     }
0160 
0161     r = request_firmware(&adev->vce.fw, fw_name, adev->dev);
0162     if (r) {
0163         dev_err(adev->dev, "amdgpu_vce: Can't load firmware \"%s\"\n",
0164             fw_name);
0165         return r;
0166     }
0167 
0168     r = amdgpu_ucode_validate(adev->vce.fw);
0169     if (r) {
0170         dev_err(adev->dev, "amdgpu_vce: Can't validate firmware \"%s\"\n",
0171             fw_name);
0172         release_firmware(adev->vce.fw);
0173         adev->vce.fw = NULL;
0174         return r;
0175     }
0176 
0177     hdr = (const struct common_firmware_header *)adev->vce.fw->data;
0178 
0179     ucode_version = le32_to_cpu(hdr->ucode_version);
0180     version_major = (ucode_version >> 20) & 0xfff;
0181     version_minor = (ucode_version >> 8) & 0xfff;
0182     binary_id = ucode_version & 0xff;
0183     DRM_INFO("Found VCE firmware Version: %d.%d Binary ID: %d\n",
0184         version_major, version_minor, binary_id);
0185     adev->vce.fw_version = ((version_major << 24) | (version_minor << 16) |
0186                 (binary_id << 8));
0187 
0188     r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
0189                     AMDGPU_GEM_DOMAIN_VRAM, &adev->vce.vcpu_bo,
0190                     &adev->vce.gpu_addr, &adev->vce.cpu_addr);
0191     if (r) {
0192         dev_err(adev->dev, "(%d) failed to allocate VCE bo\n", r);
0193         return r;
0194     }
0195 
0196     for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
0197         atomic_set(&adev->vce.handles[i], 0);
0198         adev->vce.filp[i] = NULL;
0199     }
0200 
0201     INIT_DELAYED_WORK(&adev->vce.idle_work, amdgpu_vce_idle_work_handler);
0202     mutex_init(&adev->vce.idle_mutex);
0203 
0204     return 0;
0205 }
0206 
0207 /**
0208  * amdgpu_vce_sw_fini - free memory
0209  *
0210  * @adev: amdgpu_device pointer
0211  *
0212  * Last step on VCE teardown, free firmware memory
0213  */
0214 int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
0215 {
0216     unsigned i;
0217 
0218     if (adev->vce.vcpu_bo == NULL)
0219         return 0;
0220 
0221     drm_sched_entity_destroy(&adev->vce.entity);
0222 
0223     amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr,
0224         (void **)&adev->vce.cpu_addr);
0225 
0226     for (i = 0; i < adev->vce.num_rings; i++)
0227         amdgpu_ring_fini(&adev->vce.ring[i]);
0228 
0229     release_firmware(adev->vce.fw);
0230     mutex_destroy(&adev->vce.idle_mutex);
0231 
0232     return 0;
0233 }
0234 
0235 /**
0236  * amdgpu_vce_entity_init - init entity
0237  *
0238  * @adev: amdgpu_device pointer
0239  *
0240  */
0241 int amdgpu_vce_entity_init(struct amdgpu_device *adev)
0242 {
0243     struct amdgpu_ring *ring;
0244     struct drm_gpu_scheduler *sched;
0245     int r;
0246 
0247     ring = &adev->vce.ring[0];
0248     sched = &ring->sched;
0249     r = drm_sched_entity_init(&adev->vce.entity, DRM_SCHED_PRIORITY_NORMAL,
0250                   &sched, 1, NULL);
0251     if (r != 0) {
0252         DRM_ERROR("Failed setting up VCE run queue.\n");
0253         return r;
0254     }
0255 
0256     return 0;
0257 }
0258 
0259 /**
0260  * amdgpu_vce_suspend - unpin VCE fw memory
0261  *
0262  * @adev: amdgpu_device pointer
0263  *
0264  */
0265 int amdgpu_vce_suspend(struct amdgpu_device *adev)
0266 {
0267     int i;
0268 
0269     cancel_delayed_work_sync(&adev->vce.idle_work);
0270 
0271     if (adev->vce.vcpu_bo == NULL)
0272         return 0;
0273 
0274     for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i)
0275         if (atomic_read(&adev->vce.handles[i]))
0276             break;
0277 
0278     if (i == AMDGPU_MAX_VCE_HANDLES)
0279         return 0;
0280 
0281     /* TODO: suspending running encoding sessions isn't supported */
0282     return -EINVAL;
0283 }
0284 
0285 /**
0286  * amdgpu_vce_resume - pin VCE fw memory
0287  *
0288  * @adev: amdgpu_device pointer
0289  *
0290  */
0291 int amdgpu_vce_resume(struct amdgpu_device *adev)
0292 {
0293     void *cpu_addr;
0294     const struct common_firmware_header *hdr;
0295     unsigned offset;
0296     int r, idx;
0297 
0298     if (adev->vce.vcpu_bo == NULL)
0299         return -EINVAL;
0300 
0301     r = amdgpu_bo_reserve(adev->vce.vcpu_bo, false);
0302     if (r) {
0303         dev_err(adev->dev, "(%d) failed to reserve VCE bo\n", r);
0304         return r;
0305     }
0306 
0307     r = amdgpu_bo_kmap(adev->vce.vcpu_bo, &cpu_addr);
0308     if (r) {
0309         amdgpu_bo_unreserve(adev->vce.vcpu_bo);
0310         dev_err(adev->dev, "(%d) VCE map failed\n", r);
0311         return r;
0312     }
0313 
0314     hdr = (const struct common_firmware_header *)adev->vce.fw->data;
0315     offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
0316 
0317     if (drm_dev_enter(adev_to_drm(adev), &idx)) {
0318         memcpy_toio(cpu_addr, adev->vce.fw->data + offset,
0319                 adev->vce.fw->size - offset);
0320         drm_dev_exit(idx);
0321     }
0322 
0323     amdgpu_bo_kunmap(adev->vce.vcpu_bo);
0324 
0325     amdgpu_bo_unreserve(adev->vce.vcpu_bo);
0326 
0327     return 0;
0328 }
0329 
0330 /**
0331  * amdgpu_vce_idle_work_handler - power off VCE
0332  *
0333  * @work: pointer to work structure
0334  *
0335  * power of VCE when it's not used any more
0336  */
0337 static void amdgpu_vce_idle_work_handler(struct work_struct *work)
0338 {
0339     struct amdgpu_device *adev =
0340         container_of(work, struct amdgpu_device, vce.idle_work.work);
0341     unsigned i, count = 0;
0342 
0343     for (i = 0; i < adev->vce.num_rings; i++)
0344         count += amdgpu_fence_count_emitted(&adev->vce.ring[i]);
0345 
0346     if (count == 0) {
0347         if (adev->pm.dpm_enabled) {
0348             amdgpu_dpm_enable_vce(adev, false);
0349         } else {
0350             amdgpu_asic_set_vce_clocks(adev, 0, 0);
0351             amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
0352                                    AMD_PG_STATE_GATE);
0353             amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
0354                                    AMD_CG_STATE_GATE);
0355         }
0356     } else {
0357         schedule_delayed_work(&adev->vce.idle_work, VCE_IDLE_TIMEOUT);
0358     }
0359 }
0360 
0361 /**
0362  * amdgpu_vce_ring_begin_use - power up VCE
0363  *
0364  * @ring: amdgpu ring
0365  *
0366  * Make sure VCE is powerd up when we want to use it
0367  */
0368 void amdgpu_vce_ring_begin_use(struct amdgpu_ring *ring)
0369 {
0370     struct amdgpu_device *adev = ring->adev;
0371     bool set_clocks;
0372 
0373     if (amdgpu_sriov_vf(adev))
0374         return;
0375 
0376     mutex_lock(&adev->vce.idle_mutex);
0377     set_clocks = !cancel_delayed_work_sync(&adev->vce.idle_work);
0378     if (set_clocks) {
0379         if (adev->pm.dpm_enabled) {
0380             amdgpu_dpm_enable_vce(adev, true);
0381         } else {
0382             amdgpu_asic_set_vce_clocks(adev, 53300, 40000);
0383             amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
0384                                    AMD_CG_STATE_UNGATE);
0385             amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
0386                                    AMD_PG_STATE_UNGATE);
0387 
0388         }
0389     }
0390     mutex_unlock(&adev->vce.idle_mutex);
0391 }
0392 
0393 /**
0394  * amdgpu_vce_ring_end_use - power VCE down
0395  *
0396  * @ring: amdgpu ring
0397  *
0398  * Schedule work to power VCE down again
0399  */
0400 void amdgpu_vce_ring_end_use(struct amdgpu_ring *ring)
0401 {
0402     if (!amdgpu_sriov_vf(ring->adev))
0403         schedule_delayed_work(&ring->adev->vce.idle_work, VCE_IDLE_TIMEOUT);
0404 }
0405 
0406 /**
0407  * amdgpu_vce_free_handles - free still open VCE handles
0408  *
0409  * @adev: amdgpu_device pointer
0410  * @filp: drm file pointer
0411  *
0412  * Close all VCE handles still open by this file pointer
0413  */
0414 void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
0415 {
0416     struct amdgpu_ring *ring = &adev->vce.ring[0];
0417     int i, r;
0418     for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
0419         uint32_t handle = atomic_read(&adev->vce.handles[i]);
0420 
0421         if (!handle || adev->vce.filp[i] != filp)
0422             continue;
0423 
0424         r = amdgpu_vce_get_destroy_msg(ring, handle, false, NULL);
0425         if (r)
0426             DRM_ERROR("Error destroying VCE handle (%d)!\n", r);
0427 
0428         adev->vce.filp[i] = NULL;
0429         atomic_set(&adev->vce.handles[i], 0);
0430     }
0431 }
0432 
0433 /**
0434  * amdgpu_vce_get_create_msg - generate a VCE create msg
0435  *
0436  * @ring: ring we should submit the msg to
0437  * @handle: VCE session handle to use
0438  * @fence: optional fence to return
0439  *
0440  * Open up a stream for HW test
0441  */
0442 static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
0443                      struct dma_fence **fence)
0444 {
0445     const unsigned ib_size_dw = 1024;
0446     struct amdgpu_job *job;
0447     struct amdgpu_ib *ib;
0448     struct amdgpu_ib ib_msg;
0449     struct dma_fence *f = NULL;
0450     uint64_t addr;
0451     int i, r;
0452 
0453     r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
0454                      AMDGPU_IB_POOL_DIRECT, &job);
0455     if (r)
0456         return r;
0457 
0458     memset(&ib_msg, 0, sizeof(ib_msg));
0459     /* only one gpu page is needed, alloc +1 page to make addr aligned. */
0460     r = amdgpu_ib_get(ring->adev, NULL, AMDGPU_GPU_PAGE_SIZE * 2,
0461               AMDGPU_IB_POOL_DIRECT,
0462               &ib_msg);
0463     if (r)
0464         goto err;
0465 
0466     ib = &job->ibs[0];
0467     /* let addr point to page boundary */
0468     addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg.gpu_addr);
0469 
0470     /* stitch together an VCE create msg */
0471     ib->length_dw = 0;
0472     ib->ptr[ib->length_dw++] = 0x0000000c; /* len */
0473     ib->ptr[ib->length_dw++] = 0x00000001; /* session cmd */
0474     ib->ptr[ib->length_dw++] = handle;
0475 
0476     if ((ring->adev->vce.fw_version >> 24) >= 52)
0477         ib->ptr[ib->length_dw++] = 0x00000040; /* len */
0478     else
0479         ib->ptr[ib->length_dw++] = 0x00000030; /* len */
0480     ib->ptr[ib->length_dw++] = 0x01000001; /* create cmd */
0481     ib->ptr[ib->length_dw++] = 0x00000000;
0482     ib->ptr[ib->length_dw++] = 0x00000042;
0483     ib->ptr[ib->length_dw++] = 0x0000000a;
0484     ib->ptr[ib->length_dw++] = 0x00000001;
0485     ib->ptr[ib->length_dw++] = 0x00000080;
0486     ib->ptr[ib->length_dw++] = 0x00000060;
0487     ib->ptr[ib->length_dw++] = 0x00000100;
0488     ib->ptr[ib->length_dw++] = 0x00000100;
0489     ib->ptr[ib->length_dw++] = 0x0000000c;
0490     ib->ptr[ib->length_dw++] = 0x00000000;
0491     if ((ring->adev->vce.fw_version >> 24) >= 52) {
0492         ib->ptr[ib->length_dw++] = 0x00000000;
0493         ib->ptr[ib->length_dw++] = 0x00000000;
0494         ib->ptr[ib->length_dw++] = 0x00000000;
0495         ib->ptr[ib->length_dw++] = 0x00000000;
0496     }
0497 
0498     ib->ptr[ib->length_dw++] = 0x00000014; /* len */
0499     ib->ptr[ib->length_dw++] = 0x05000005; /* feedback buffer */
0500     ib->ptr[ib->length_dw++] = upper_32_bits(addr);
0501     ib->ptr[ib->length_dw++] = addr;
0502     ib->ptr[ib->length_dw++] = 0x00000001;
0503 
0504     for (i = ib->length_dw; i < ib_size_dw; ++i)
0505         ib->ptr[i] = 0x0;
0506 
0507     r = amdgpu_job_submit_direct(job, ring, &f);
0508     amdgpu_ib_free(ring->adev, &ib_msg, f);
0509     if (r)
0510         goto err;
0511 
0512     if (fence)
0513         *fence = dma_fence_get(f);
0514     dma_fence_put(f);
0515     return 0;
0516 
0517 err:
0518     amdgpu_job_free(job);
0519     return r;
0520 }
0521 
0522 /**
0523  * amdgpu_vce_get_destroy_msg - generate a VCE destroy msg
0524  *
0525  * @ring: ring we should submit the msg to
0526  * @handle: VCE session handle to use
0527  * @direct: direct or delayed pool
0528  * @fence: optional fence to return
0529  *
0530  * Close up a stream for HW test or if userspace failed to do so
0531  */
0532 static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
0533                       bool direct, struct dma_fence **fence)
0534 {
0535     const unsigned ib_size_dw = 1024;
0536     struct amdgpu_job *job;
0537     struct amdgpu_ib *ib;
0538     struct dma_fence *f = NULL;
0539     int i, r;
0540 
0541     r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
0542                      direct ? AMDGPU_IB_POOL_DIRECT :
0543                      AMDGPU_IB_POOL_DELAYED, &job);
0544     if (r)
0545         return r;
0546 
0547     ib = &job->ibs[0];
0548 
0549     /* stitch together an VCE destroy msg */
0550     ib->length_dw = 0;
0551     ib->ptr[ib->length_dw++] = 0x0000000c; /* len */
0552     ib->ptr[ib->length_dw++] = 0x00000001; /* session cmd */
0553     ib->ptr[ib->length_dw++] = handle;
0554 
0555     ib->ptr[ib->length_dw++] = 0x00000020; /* len */
0556     ib->ptr[ib->length_dw++] = 0x00000002; /* task info */
0557     ib->ptr[ib->length_dw++] = 0xffffffff; /* next task info, set to 0xffffffff if no */
0558     ib->ptr[ib->length_dw++] = 0x00000001; /* destroy session */
0559     ib->ptr[ib->length_dw++] = 0x00000000;
0560     ib->ptr[ib->length_dw++] = 0x00000000;
0561     ib->ptr[ib->length_dw++] = 0xffffffff; /* feedback is not needed, set to 0xffffffff and firmware will not output feedback */
0562     ib->ptr[ib->length_dw++] = 0x00000000;
0563 
0564     ib->ptr[ib->length_dw++] = 0x00000008; /* len */
0565     ib->ptr[ib->length_dw++] = 0x02000001; /* destroy cmd */
0566 
0567     for (i = ib->length_dw; i < ib_size_dw; ++i)
0568         ib->ptr[i] = 0x0;
0569 
0570     if (direct)
0571         r = amdgpu_job_submit_direct(job, ring, &f);
0572     else
0573         r = amdgpu_job_submit(job, &ring->adev->vce.entity,
0574                       AMDGPU_FENCE_OWNER_UNDEFINED, &f);
0575     if (r)
0576         goto err;
0577 
0578     if (fence)
0579         *fence = dma_fence_get(f);
0580     dma_fence_put(f);
0581     return 0;
0582 
0583 err:
0584     amdgpu_job_free(job);
0585     return r;
0586 }
0587 
0588 /**
0589  * amdgpu_vce_validate_bo - make sure not to cross 4GB boundary
0590  *
0591  * @ib: indirect buffer to use
0592  * @lo: address of lower dword
0593  * @hi: address of higher dword
0594  * @size: minimum size
0595  * @index: bs/fb index
0596  *
0597  * Make sure that no BO cross a 4GB boundary.
0598  */
0599 static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p,
0600                   struct amdgpu_ib *ib, int lo, int hi,
0601                   unsigned size, int32_t index)
0602 {
0603     int64_t offset = ((uint64_t)size) * ((int64_t)index);
0604     struct ttm_operation_ctx ctx = { false, false };
0605     struct amdgpu_bo_va_mapping *mapping;
0606     unsigned i, fpfn, lpfn;
0607     struct amdgpu_bo *bo;
0608     uint64_t addr;
0609     int r;
0610 
0611     addr = ((uint64_t)amdgpu_ib_get_value(ib, lo)) |
0612            ((uint64_t)amdgpu_ib_get_value(ib, hi)) << 32;
0613     if (index >= 0) {
0614         addr += offset;
0615         fpfn = PAGE_ALIGN(offset) >> PAGE_SHIFT;
0616         lpfn = 0x100000000ULL >> PAGE_SHIFT;
0617     } else {
0618         fpfn = 0;
0619         lpfn = (0x100000000ULL - PAGE_ALIGN(offset)) >> PAGE_SHIFT;
0620     }
0621 
0622     r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping);
0623     if (r) {
0624         DRM_ERROR("Can't find BO for addr 0x%010Lx %d %d %d %d\n",
0625               addr, lo, hi, size, index);
0626         return r;
0627     }
0628 
0629     for (i = 0; i < bo->placement.num_placement; ++i) {
0630         bo->placements[i].fpfn = max(bo->placements[i].fpfn, fpfn);
0631         bo->placements[i].lpfn = bo->placements[i].lpfn ?
0632             min(bo->placements[i].lpfn, lpfn) : lpfn;
0633     }
0634     return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
0635 }
0636 
0637 
0638 /**
0639  * amdgpu_vce_cs_reloc - command submission relocation
0640  *
0641  * @p: parser context
0642  * @ib: indirect buffer to use
0643  * @lo: address of lower dword
0644  * @hi: address of higher dword
0645  * @size: minimum size
0646  * @index: bs/fb index
0647  *
0648  * Patch relocation inside command stream with real buffer address
0649  */
0650 static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, struct amdgpu_ib *ib,
0651                    int lo, int hi, unsigned size, uint32_t index)
0652 {
0653     struct amdgpu_bo_va_mapping *mapping;
0654     struct amdgpu_bo *bo;
0655     uint64_t addr;
0656     int r;
0657 
0658     if (index == 0xffffffff)
0659         index = 0;
0660 
0661     addr = ((uint64_t)amdgpu_ib_get_value(ib, lo)) |
0662            ((uint64_t)amdgpu_ib_get_value(ib, hi)) << 32;
0663     addr += ((uint64_t)size) * ((uint64_t)index);
0664 
0665     r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping);
0666     if (r) {
0667         DRM_ERROR("Can't find BO for addr 0x%010Lx %d %d %d %d\n",
0668               addr, lo, hi, size, index);
0669         return r;
0670     }
0671 
0672     if ((addr + (uint64_t)size) >
0673         (mapping->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
0674         DRM_ERROR("BO too small for addr 0x%010Lx %d %d\n",
0675               addr, lo, hi);
0676         return -EINVAL;
0677     }
0678 
0679     addr -= mapping->start * AMDGPU_GPU_PAGE_SIZE;
0680     addr += amdgpu_bo_gpu_offset(bo);
0681     addr -= ((uint64_t)size) * ((uint64_t)index);
0682 
0683     amdgpu_ib_set_value(ib, lo, lower_32_bits(addr));
0684     amdgpu_ib_set_value(ib, hi, upper_32_bits(addr));
0685 
0686     return 0;
0687 }
0688 
0689 /**
0690  * amdgpu_vce_validate_handle - validate stream handle
0691  *
0692  * @p: parser context
0693  * @handle: handle to validate
0694  * @allocated: allocated a new handle?
0695  *
0696  * Validates the handle and return the found session index or -EINVAL
0697  * we we don't have another free session index.
0698  */
0699 static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p,
0700                       uint32_t handle, uint32_t *allocated)
0701 {
0702     unsigned i;
0703 
0704     /* validate the handle */
0705     for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
0706         if (atomic_read(&p->adev->vce.handles[i]) == handle) {
0707             if (p->adev->vce.filp[i] != p->filp) {
0708                 DRM_ERROR("VCE handle collision detected!\n");
0709                 return -EINVAL;
0710             }
0711             return i;
0712         }
0713     }
0714 
0715     /* handle not found try to alloc a new one */
0716     for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
0717         if (!atomic_cmpxchg(&p->adev->vce.handles[i], 0, handle)) {
0718             p->adev->vce.filp[i] = p->filp;
0719             p->adev->vce.img_size[i] = 0;
0720             *allocated |= 1 << i;
0721             return i;
0722         }
0723     }
0724 
0725     DRM_ERROR("No more free VCE handles!\n");
0726     return -EINVAL;
0727 }
0728 
0729 /**
0730  * amdgpu_vce_ring_parse_cs - parse and validate the command stream
0731  *
0732  * @p: parser context
0733  * @job: the job to parse
0734  * @ib: the IB to patch
0735  */
0736 int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p,
0737                  struct amdgpu_job *job,
0738                  struct amdgpu_ib *ib)
0739 {
0740     unsigned fb_idx = 0, bs_idx = 0;
0741     int session_idx = -1;
0742     uint32_t destroyed = 0;
0743     uint32_t created = 0;
0744     uint32_t allocated = 0;
0745     uint32_t tmp, handle = 0;
0746     uint32_t *size = &tmp;
0747     unsigned idx;
0748     int i, r = 0;
0749 
0750     job->vm = NULL;
0751     ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
0752 
0753     for (idx = 0; idx < ib->length_dw;) {
0754         uint32_t len = amdgpu_ib_get_value(ib, idx);
0755         uint32_t cmd = amdgpu_ib_get_value(ib, idx + 1);
0756 
0757         if ((len < 8) || (len & 3)) {
0758             DRM_ERROR("invalid VCE command length (%d)!\n", len);
0759             r = -EINVAL;
0760             goto out;
0761         }
0762 
0763         switch (cmd) {
0764         case 0x00000002: /* task info */
0765             fb_idx = amdgpu_ib_get_value(ib, idx + 6);
0766             bs_idx = amdgpu_ib_get_value(ib, idx + 7);
0767             break;
0768 
0769         case 0x03000001: /* encode */
0770             r = amdgpu_vce_validate_bo(p, ib, idx + 10, idx + 9,
0771                            0, 0);
0772             if (r)
0773                 goto out;
0774 
0775             r = amdgpu_vce_validate_bo(p, ib, idx + 12, idx + 11,
0776                            0, 0);
0777             if (r)
0778                 goto out;
0779             break;
0780 
0781         case 0x05000001: /* context buffer */
0782             r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2,
0783                            0, 0);
0784             if (r)
0785                 goto out;
0786             break;
0787 
0788         case 0x05000004: /* video bitstream buffer */
0789             tmp = amdgpu_ib_get_value(ib, idx + 4);
0790             r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2,
0791                            tmp, bs_idx);
0792             if (r)
0793                 goto out;
0794             break;
0795 
0796         case 0x05000005: /* feedback buffer */
0797             r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2,
0798                            4096, fb_idx);
0799             if (r)
0800                 goto out;
0801             break;
0802 
0803         case 0x0500000d: /* MV buffer */
0804             r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2,
0805                            0, 0);
0806             if (r)
0807                 goto out;
0808 
0809             r = amdgpu_vce_validate_bo(p, ib, idx + 8, idx + 7,
0810                            0, 0);
0811             if (r)
0812                 goto out;
0813             break;
0814         }
0815 
0816         idx += len / 4;
0817     }
0818 
0819     for (idx = 0; idx < ib->length_dw;) {
0820         uint32_t len = amdgpu_ib_get_value(ib, idx);
0821         uint32_t cmd = amdgpu_ib_get_value(ib, idx + 1);
0822 
0823         switch (cmd) {
0824         case 0x00000001: /* session */
0825             handle = amdgpu_ib_get_value(ib, idx + 2);
0826             session_idx = amdgpu_vce_validate_handle(p, handle,
0827                                  &allocated);
0828             if (session_idx < 0) {
0829                 r = session_idx;
0830                 goto out;
0831             }
0832             size = &p->adev->vce.img_size[session_idx];
0833             break;
0834 
0835         case 0x00000002: /* task info */
0836             fb_idx = amdgpu_ib_get_value(ib, idx + 6);
0837             bs_idx = amdgpu_ib_get_value(ib, idx + 7);
0838             break;
0839 
0840         case 0x01000001: /* create */
0841             created |= 1 << session_idx;
0842             if (destroyed & (1 << session_idx)) {
0843                 destroyed &= ~(1 << session_idx);
0844                 allocated |= 1 << session_idx;
0845 
0846             } else if (!(allocated & (1 << session_idx))) {
0847                 DRM_ERROR("Handle already in use!\n");
0848                 r = -EINVAL;
0849                 goto out;
0850             }
0851 
0852             *size = amdgpu_ib_get_value(ib, idx + 8) *
0853                 amdgpu_ib_get_value(ib, idx + 10) *
0854                 8 * 3 / 2;
0855             break;
0856 
0857         case 0x04000001: /* config extension */
0858         case 0x04000002: /* pic control */
0859         case 0x04000005: /* rate control */
0860         case 0x04000007: /* motion estimation */
0861         case 0x04000008: /* rdo */
0862         case 0x04000009: /* vui */
0863         case 0x05000002: /* auxiliary buffer */
0864         case 0x05000009: /* clock table */
0865             break;
0866 
0867         case 0x0500000c: /* hw config */
0868             switch (p->adev->asic_type) {
0869 #ifdef CONFIG_DRM_AMDGPU_CIK
0870             case CHIP_KAVERI:
0871             case CHIP_MULLINS:
0872 #endif
0873             case CHIP_CARRIZO:
0874                 break;
0875             default:
0876                 r = -EINVAL;
0877                 goto out;
0878             }
0879             break;
0880 
0881         case 0x03000001: /* encode */
0882             r = amdgpu_vce_cs_reloc(p, ib, idx + 10, idx + 9,
0883                         *size, 0);
0884             if (r)
0885                 goto out;
0886 
0887             r = amdgpu_vce_cs_reloc(p, ib, idx + 12, idx + 11,
0888                         *size / 3, 0);
0889             if (r)
0890                 goto out;
0891             break;
0892 
0893         case 0x02000001: /* destroy */
0894             destroyed |= 1 << session_idx;
0895             break;
0896 
0897         case 0x05000001: /* context buffer */
0898             r = amdgpu_vce_cs_reloc(p, ib, idx + 3, idx + 2,
0899                         *size * 2, 0);
0900             if (r)
0901                 goto out;
0902             break;
0903 
0904         case 0x05000004: /* video bitstream buffer */
0905             tmp = amdgpu_ib_get_value(ib, idx + 4);
0906             r = amdgpu_vce_cs_reloc(p, ib, idx + 3, idx + 2,
0907                         tmp, bs_idx);
0908             if (r)
0909                 goto out;
0910             break;
0911 
0912         case 0x05000005: /* feedback buffer */
0913             r = amdgpu_vce_cs_reloc(p, ib, idx + 3, idx + 2,
0914                         4096, fb_idx);
0915             if (r)
0916                 goto out;
0917             break;
0918 
0919         case 0x0500000d: /* MV buffer */
0920             r = amdgpu_vce_cs_reloc(p, ib, idx + 3,
0921                         idx + 2, *size, 0);
0922             if (r)
0923                 goto out;
0924 
0925             r = amdgpu_vce_cs_reloc(p, ib, idx + 8,
0926                         idx + 7, *size / 12, 0);
0927             if (r)
0928                 goto out;
0929             break;
0930 
0931         default:
0932             DRM_ERROR("invalid VCE command (0x%x)!\n", cmd);
0933             r = -EINVAL;
0934             goto out;
0935         }
0936 
0937         if (session_idx == -1) {
0938             DRM_ERROR("no session command at start of IB\n");
0939             r = -EINVAL;
0940             goto out;
0941         }
0942 
0943         idx += len / 4;
0944     }
0945 
0946     if (allocated & ~created) {
0947         DRM_ERROR("New session without create command!\n");
0948         r = -ENOENT;
0949     }
0950 
0951 out:
0952     if (!r) {
0953         /* No error, free all destroyed handle slots */
0954         tmp = destroyed;
0955     } else {
0956         /* Error during parsing, free all allocated handle slots */
0957         tmp = allocated;
0958     }
0959 
0960     for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i)
0961         if (tmp & (1 << i))
0962             atomic_set(&p->adev->vce.handles[i], 0);
0963 
0964     return r;
0965 }
0966 
0967 /**
0968  * amdgpu_vce_ring_parse_cs_vm - parse the command stream in VM mode
0969  *
0970  * @p: parser context
0971  * @job: the job to parse
0972  * @ib: the IB to patch
0973  */
0974 int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p,
0975                 struct amdgpu_job *job,
0976                 struct amdgpu_ib *ib)
0977 {
0978     int session_idx = -1;
0979     uint32_t destroyed = 0;
0980     uint32_t created = 0;
0981     uint32_t allocated = 0;
0982     uint32_t tmp, handle = 0;
0983     int i, r = 0, idx = 0;
0984 
0985     while (idx < ib->length_dw) {
0986         uint32_t len = amdgpu_ib_get_value(ib, idx);
0987         uint32_t cmd = amdgpu_ib_get_value(ib, idx + 1);
0988 
0989         if ((len < 8) || (len & 3)) {
0990             DRM_ERROR("invalid VCE command length (%d)!\n", len);
0991             r = -EINVAL;
0992             goto out;
0993         }
0994 
0995         switch (cmd) {
0996         case 0x00000001: /* session */
0997             handle = amdgpu_ib_get_value(ib, idx + 2);
0998             session_idx = amdgpu_vce_validate_handle(p, handle,
0999                                  &allocated);
1000             if (session_idx < 0) {
1001                 r = session_idx;
1002                 goto out;
1003             }
1004             break;
1005 
1006         case 0x01000001: /* create */
1007             created |= 1 << session_idx;
1008             if (destroyed & (1 << session_idx)) {
1009                 destroyed &= ~(1 << session_idx);
1010                 allocated |= 1 << session_idx;
1011 
1012             } else if (!(allocated & (1 << session_idx))) {
1013                 DRM_ERROR("Handle already in use!\n");
1014                 r = -EINVAL;
1015                 goto out;
1016             }
1017 
1018             break;
1019 
1020         case 0x02000001: /* destroy */
1021             destroyed |= 1 << session_idx;
1022             break;
1023 
1024         default:
1025             break;
1026         }
1027 
1028         if (session_idx == -1) {
1029             DRM_ERROR("no session command at start of IB\n");
1030             r = -EINVAL;
1031             goto out;
1032         }
1033 
1034         idx += len / 4;
1035     }
1036 
1037     if (allocated & ~created) {
1038         DRM_ERROR("New session without create command!\n");
1039         r = -ENOENT;
1040     }
1041 
1042 out:
1043     if (!r) {
1044         /* No error, free all destroyed handle slots */
1045         tmp = destroyed;
1046         amdgpu_ib_free(p->adev, ib, NULL);
1047     } else {
1048         /* Error during parsing, free all allocated handle slots */
1049         tmp = allocated;
1050     }
1051 
1052     for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i)
1053         if (tmp & (1 << i))
1054             atomic_set(&p->adev->vce.handles[i], 0);
1055 
1056     return r;
1057 }
1058 
1059 /**
1060  * amdgpu_vce_ring_emit_ib - execute indirect buffer
1061  *
1062  * @ring: engine to use
1063  * @job: job to retrieve vmid from
1064  * @ib: the IB to execute
1065  * @flags: unused
1066  *
1067  */
1068 void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring,
1069                 struct amdgpu_job *job,
1070                 struct amdgpu_ib *ib,
1071                 uint32_t flags)
1072 {
1073     amdgpu_ring_write(ring, VCE_CMD_IB);
1074     amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
1075     amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
1076     amdgpu_ring_write(ring, ib->length_dw);
1077 }
1078 
1079 /**
1080  * amdgpu_vce_ring_emit_fence - add a fence command to the ring
1081  *
1082  * @ring: engine to use
1083  * @addr: address
1084  * @seq: sequence number
1085  * @flags: fence related flags
1086  *
1087  */
1088 void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
1089                 unsigned flags)
1090 {
1091     WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
1092 
1093     amdgpu_ring_write(ring, VCE_CMD_FENCE);
1094     amdgpu_ring_write(ring, addr);
1095     amdgpu_ring_write(ring, upper_32_bits(addr));
1096     amdgpu_ring_write(ring, seq);
1097     amdgpu_ring_write(ring, VCE_CMD_TRAP);
1098     amdgpu_ring_write(ring, VCE_CMD_END);
1099 }
1100 
1101 /**
1102  * amdgpu_vce_ring_test_ring - test if VCE ring is working
1103  *
1104  * @ring: the engine to test on
1105  *
1106  */
1107 int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
1108 {
1109     struct amdgpu_device *adev = ring->adev;
1110     uint32_t rptr;
1111     unsigned i;
1112     int r, timeout = adev->usec_timeout;
1113 
1114     /* skip ring test for sriov*/
1115     if (amdgpu_sriov_vf(adev))
1116         return 0;
1117 
1118     r = amdgpu_ring_alloc(ring, 16);
1119     if (r)
1120         return r;
1121 
1122     rptr = amdgpu_ring_get_rptr(ring);
1123 
1124     amdgpu_ring_write(ring, VCE_CMD_END);
1125     amdgpu_ring_commit(ring);
1126 
1127     for (i = 0; i < timeout; i++) {
1128         if (amdgpu_ring_get_rptr(ring) != rptr)
1129             break;
1130         udelay(1);
1131     }
1132 
1133     if (i >= timeout)
1134         r = -ETIMEDOUT;
1135 
1136     return r;
1137 }
1138 
1139 /**
1140  * amdgpu_vce_ring_test_ib - test if VCE IBs are working
1141  *
1142  * @ring: the engine to test on
1143  * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
1144  *
1145  */
1146 int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1147 {
1148     struct dma_fence *fence = NULL;
1149     long r;
1150 
1151     /* skip vce ring1/2 ib test for now, since it's not reliable */
1152     if (ring != &ring->adev->vce.ring[0])
1153         return 0;
1154 
1155     r = amdgpu_vce_get_create_msg(ring, 1, NULL);
1156     if (r)
1157         goto error;
1158 
1159     r = amdgpu_vce_get_destroy_msg(ring, 1, true, &fence);
1160     if (r)
1161         goto error;
1162 
1163     r = dma_fence_wait_timeout(fence, false, timeout);
1164     if (r == 0)
1165         r = -ETIMEDOUT;
1166     else if (r > 0)
1167         r = 0;
1168 
1169 error:
1170     dma_fence_put(fence);
1171     return r;
1172 }
1173 
1174 enum amdgpu_ring_priority_level amdgpu_vce_get_ring_prio(int ring)
1175 {
1176     switch(ring) {
1177     case 0:
1178         return AMDGPU_RING_PRIO_0;
1179     case 1:
1180         return AMDGPU_RING_PRIO_1;
1181     case 2:
1182         return AMDGPU_RING_PRIO_2;
1183     default:
1184         return AMDGPU_RING_PRIO_0;
1185     }
1186 }