amd/amdgpu/amdgpu_gfx.c

0001 /*
0002  * Copyright 2014 Advanced Micro Devices, Inc.
0003  * Copyright 2008 Red Hat Inc.
0004  * Copyright 2009 Jerome Glisse.
0005  *
0006  * Permission is hereby granted, free of charge, to any person obtaining a
0007  * copy of this software and associated documentation files (the "Software"),
0008  * to deal in the Software without restriction, including without limitation
0009  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
0010  * and/or sell copies of the Software, and to permit persons to whom the
0011  * Software is furnished to do so, subject to the following conditions:
0012  *
0013  * The above copyright notice and this permission notice shall be included in
0014  * all copies or substantial portions of the Software.
0015  *
0016  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
0017  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
0018  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
0019  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
0020  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
0021  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
0022  * OTHER DEALINGS IN THE SOFTWARE.
0023  *
0024  */
0025
0026 #include "amdgpu.h"
0027 #include "amdgpu_gfx.h"
0028 #include "amdgpu_rlc.h"
0029 #include "amdgpu_ras.h"
0030
0031 /* delay 0.1 second to enable gfx off feature */
0032 #define GFX_OFF_DELAY_ENABLE         msecs_to_jiffies(100)
0033
0034 #define GFX_OFF_NO_DELAY 0
0035
0036 /*
0037  * GPU GFX IP block helpers function.
0038  */
0039
0040 int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec,
0041                 int pipe, int queue)
0042 {
0043     int bit = 0;
0044
0045     bit += mec * adev->gfx.mec.num_pipe_per_mec
0046         * adev->gfx.mec.num_queue_per_pipe;
0047     bit += pipe * adev->gfx.mec.num_queue_per_pipe;
0048     bit += queue;
0049
0050     return bit;
0051 }
0052
0053 void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
0054                  int *mec, int *pipe, int *queue)
0055 {
0056     *queue = bit % adev->gfx.mec.num_queue_per_pipe;
0057     *pipe = (bit / adev->gfx.mec.num_queue_per_pipe)
0058         % adev->gfx.mec.num_pipe_per_mec;
0059     *mec = (bit / adev->gfx.mec.num_queue_per_pipe)
0060            / adev->gfx.mec.num_pipe_per_mec;
0061
0062 }
0063
0064 bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev,
0065                      int mec, int pipe, int queue)
0066 {
0067     return test_bit(amdgpu_gfx_mec_queue_to_bit(adev, mec, pipe, queue),
0068             adev->gfx.mec.queue_bitmap);
0069 }
0070
0071 int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev,
0072                    int me, int pipe, int queue)
0073 {
0074     int bit = 0;
0075
0076     bit += me * adev->gfx.me.num_pipe_per_me
0077         * adev->gfx.me.num_queue_per_pipe;
0078     bit += pipe * adev->gfx.me.num_queue_per_pipe;
0079     bit += queue;
0080
0081     return bit;
0082 }
0083
0084 void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit,
0085                 int *me, int *pipe, int *queue)
0086 {
0087     *queue = bit % adev->gfx.me.num_queue_per_pipe;
0088     *pipe = (bit / adev->gfx.me.num_queue_per_pipe)
0089         % adev->gfx.me.num_pipe_per_me;
0090     *me = (bit / adev->gfx.me.num_queue_per_pipe)
0091         / adev->gfx.me.num_pipe_per_me;
0092 }
0093
0094 bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev,
0095                     int me, int pipe, int queue)
0096 {
0097     return test_bit(amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue),
0098             adev->gfx.me.queue_bitmap);
0099 }
0100
0101 /**
0102  * amdgpu_gfx_parse_disable_cu - Parse the disable_cu module parameter
0103  *
0104  * @mask: array in which the per-shader array disable masks will be stored
0105  * @max_se: number of SEs
0106  * @max_sh: number of SHs
0107  *
0108  * The bitmask of CUs to be disabled in the shader array determined by se and
0109  * sh is stored in mask[se * max_sh + sh].
0110  */
0111 void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_sh)
0112 {
0113     unsigned se, sh, cu;
0114     const char *p;
0115
0116     memset(mask, 0, sizeof(*mask) * max_se * max_sh);
0117
0118     if (!amdgpu_disable_cu || !*amdgpu_disable_cu)
0119         return;
0120
0121     p = amdgpu_disable_cu;
0122     for (;;) {
0123         char *next;
0124         int ret = sscanf(p, "%u.%u.%u", &se, &sh, &cu);
0125         if (ret < 3) {
0126             DRM_ERROR("amdgpu: could not parse disable_cu\n");
0127             return;
0128         }
0129
0130         if (se < max_se && sh < max_sh && cu < 16) {
0131             DRM_INFO("amdgpu: disabling CU %u.%u.%u\n", se, sh, cu);
0132             mask[se * max_sh + sh] |= 1u << cu;
0133         } else {
0134             DRM_ERROR("amdgpu: disable_cu %u.%u.%u is out of range\n",
0135                   se, sh, cu);
0136         }
0137
0138         next = strchr(p, ',');
0139         if (!next)
0140             break;
0141         p = next + 1;
0142     }
0143 }
0144
0145 static bool amdgpu_gfx_is_graphics_multipipe_capable(struct amdgpu_device *adev)
0146 {
0147     return amdgpu_async_gfx_ring && adev->gfx.me.num_pipe_per_me > 1;
0148 }
0149
0150 static bool amdgpu_gfx_is_compute_multipipe_capable(struct amdgpu_device *adev)
0151 {
0152     if (amdgpu_compute_multipipe != -1) {
0153         DRM_INFO("amdgpu: forcing compute pipe policy %d\n",
0154              amdgpu_compute_multipipe);
0155         return amdgpu_compute_multipipe == 1;
0156     }
0157
0158     /* FIXME: spreading the queues across pipes causes perf regressions
0159      * on POLARIS11 compute workloads */
0160     if (adev->asic_type == CHIP_POLARIS11)
0161         return false;
0162
0163     return adev->gfx.mec.num_mec > 1;
0164 }
0165
0166 bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev,
0167                         struct amdgpu_ring *ring)
0168 {
0169     int queue = ring->queue;
0170     int pipe = ring->pipe;
0171
0172     /* Policy: use pipe1 queue0 as high priority graphics queue if we
0173      * have more than one gfx pipe.
0174      */
0175     if (amdgpu_gfx_is_graphics_multipipe_capable(adev) &&
0176         adev->gfx.num_gfx_rings > 1 && pipe == 1 && queue == 0) {
0177         int me = ring->me;
0178         int bit;
0179
0180         bit = amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue);
0181         if (ring == &adev->gfx.gfx_ring[bit])
0182             return true;
0183     }
0184
0185     return false;
0186 }
0187
0188 bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
0189                            struct amdgpu_ring *ring)
0190 {
0191     /* Policy: use 1st queue as high priority compute queue if we
0192      * have more than one compute queue.
0193      */
0194     if (adev->gfx.num_compute_rings > 1 &&
0195         ring == &adev->gfx.compute_ring[0])
0196         return true;
0197
0198     return false;
0199 }
0200
0201 void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
0202 {
0203     int i, queue, pipe;
0204     bool multipipe_policy = amdgpu_gfx_is_compute_multipipe_capable(adev);
0205     int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
0206                      adev->gfx.mec.num_queue_per_pipe,
0207                      adev->gfx.num_compute_rings);
0208
0209     if (multipipe_policy) {
0210         /* policy: make queues evenly cross all pipes on MEC1 only */
0211         for (i = 0; i < max_queues_per_mec; i++) {
0212             pipe = i % adev->gfx.mec.num_pipe_per_mec;
0213             queue = (i / adev->gfx.mec.num_pipe_per_mec) %
0214                 adev->gfx.mec.num_queue_per_pipe;
0215
0216             set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
0217                     adev->gfx.mec.queue_bitmap);
0218         }
0219     } else {
0220         /* policy: amdgpu owns all queues in the given pipe */
0221         for (i = 0; i < max_queues_per_mec; ++i)
0222             set_bit(i, adev->gfx.mec.queue_bitmap);
0223     }
0224
0225     dev_dbg(adev->dev, "mec queue bitmap weight=%d\n", bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
0226 }
0227
0228 void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
0229 {
0230     int i, queue, pipe;
0231     bool multipipe_policy = amdgpu_gfx_is_graphics_multipipe_capable(adev);
0232     int max_queues_per_me = adev->gfx.me.num_pipe_per_me *
0233                     adev->gfx.me.num_queue_per_pipe;
0234
0235     if (multipipe_policy) {
0236         /* policy: amdgpu owns the first queue per pipe at this stage
0237          * will extend to mulitple queues per pipe later */
0238         for (i = 0; i < max_queues_per_me; i++) {
0239             pipe = i % adev->gfx.me.num_pipe_per_me;
0240             queue = (i / adev->gfx.me.num_pipe_per_me) %
0241                 adev->gfx.me.num_queue_per_pipe;
0242
0243             set_bit(pipe * adev->gfx.me.num_queue_per_pipe + queue,
0244                 adev->gfx.me.queue_bitmap);
0245         }
0246     } else {
0247         for (i = 0; i < max_queues_per_me; ++i)
0248             set_bit(i, adev->gfx.me.queue_bitmap);
0249     }
0250
0251     /* update the number of active graphics rings */
0252     adev->gfx.num_gfx_rings =
0253         bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
0254 }
0255
0256 static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
0257                   struct amdgpu_ring *ring)
0258 {
0259     int queue_bit;
0260     int mec, pipe, queue;
0261
0262     queue_bit = adev->gfx.mec.num_mec
0263             * adev->gfx.mec.num_pipe_per_mec
0264             * adev->gfx.mec.num_queue_per_pipe;
0265
0266     while (--queue_bit >= 0) {
0267         if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap))
0268             continue;
0269
0270         amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
0271
0272         /*
0273          * 1. Using pipes 2/3 from MEC 2 seems cause problems.
0274          * 2. It must use queue id 0, because CGPG_IDLE/SAVE/LOAD/RUN
0275          * only can be issued on queue 0.
0276          */
0277         if ((mec == 1 && pipe > 1) || queue != 0)
0278             continue;
0279
0280         ring->me = mec + 1;
0281         ring->pipe = pipe;
0282         ring->queue = queue;
0283
0284         return 0;
0285     }
0286
0287     dev_err(adev->dev, "Failed to find a queue for KIQ\n");
0288     return -EINVAL;
0289 }
0290
0291 int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
0292                  struct amdgpu_ring *ring,
0293                  struct amdgpu_irq_src *irq)
0294 {
0295     struct amdgpu_kiq *kiq = &adev->gfx.kiq;
0296     int r = 0;
0297
0298     spin_lock_init(&kiq->ring_lock);
0299
0300     ring->adev = NULL;
0301     ring->ring_obj = NULL;
0302     ring->use_doorbell = true;
0303     ring->doorbell_index = adev->doorbell_index.kiq;
0304
0305     r = amdgpu_gfx_kiq_acquire(adev, ring);
0306     if (r)
0307         return r;
0308
0309     ring->eop_gpu_addr = kiq->eop_gpu_addr;
0310     ring->no_scheduler = true;
0311     sprintf(ring->name, "kiq_%d.%d.%d", ring->me, ring->pipe, ring->queue);
0312     r = amdgpu_ring_init(adev, ring, 1024, irq, AMDGPU_CP_KIQ_IRQ_DRIVER0,
0313                  AMDGPU_RING_PRIO_DEFAULT, NULL);
0314     if (r)
0315         dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
0316
0317     return r;
0318 }
0319
0320 void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring)
0321 {
0322     amdgpu_ring_fini(ring);
0323 }
0324
0325 void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev)
0326 {
0327     struct amdgpu_kiq *kiq = &adev->gfx.kiq;
0328
0329     amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
0330 }
0331
0332 int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
0333             unsigned hpd_size)
0334 {
0335     int r;
0336     u32 *hpd;
0337     struct amdgpu_kiq *kiq = &adev->gfx.kiq;
0338
0339     r = amdgpu_bo_create_kernel(adev, hpd_size, PAGE_SIZE,
0340                     AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
0341                     &kiq->eop_gpu_addr, (void **)&hpd);
0342     if (r) {
0343         dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
0344         return r;
0345     }
0346
0347     memset(hpd, 0, hpd_size);
0348
0349     r = amdgpu_bo_reserve(kiq->eop_obj, true);
0350     if (unlikely(r != 0))
0351         dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
0352     amdgpu_bo_kunmap(kiq->eop_obj);
0353     amdgpu_bo_unreserve(kiq->eop_obj);
0354
0355     return 0;
0356 }
0357
0358 /* create MQD for each compute/gfx queue */
0359 int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
0360                unsigned mqd_size)
0361 {
0362     struct amdgpu_ring *ring = NULL;
0363     int r, i;
0364
0365     /* create MQD for KIQ */
0366     ring = &adev->gfx.kiq.ring;
0367     if (!adev->enable_mes_kiq && !ring->mqd_obj) {
0368         /* originaly the KIQ MQD is put in GTT domain, but for SRIOV VRAM domain is a must
0369          * otherwise hypervisor trigger SAVE_VF fail after driver unloaded which mean MQD
0370          * deallocated and gart_unbind, to strict diverage we decide to use VRAM domain for
0371          * KIQ MQD no matter SRIOV or Bare-metal
0372          */
0373         r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
0374                         AMDGPU_GEM_DOMAIN_VRAM, &ring->mqd_obj,
0375                         &ring->mqd_gpu_addr, &ring->mqd_ptr);
0376         if (r) {
0377             dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
0378             return r;
0379         }
0380
0381         /* prepare MQD backup */
0382         adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(mqd_size, GFP_KERNEL);
0383         if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS])
0384                 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
0385     }
0386
0387     if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
0388         /* create MQD for each KGQ */
0389         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
0390             ring = &adev->gfx.gfx_ring[i];
0391             if (!ring->mqd_obj) {
0392                 r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
0393                                 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
0394                                 &ring->mqd_gpu_addr, &ring->mqd_ptr);
0395                 if (r) {
0396                     dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
0397                     return r;
0398                 }
0399
0400                 /* prepare MQD backup */
0401                 adev->gfx.me.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL);
0402                 if (!adev->gfx.me.mqd_backup[i])
0403                     dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
0404             }
0405         }
0406     }
0407
0408     /* create MQD for each KCQ */
0409     for (i = 0; i < adev->gfx.num_compute_rings; i++) {
0410         ring = &adev->gfx.compute_ring[i];
0411         if (!ring->mqd_obj) {
0412             r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
0413                             AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
0414                             &ring->mqd_gpu_addr, &ring->mqd_ptr);
0415             if (r) {
0416                 dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
0417                 return r;
0418             }
0419
0420             /* prepare MQD backup */
0421             adev->gfx.mec.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL);
0422             if (!adev->gfx.mec.mqd_backup[i])
0423                 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
0424         }
0425     }
0426
0427     return 0;
0428 }
0429
0430 void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev)
0431 {
0432     struct amdgpu_ring *ring = NULL;
0433     int i;
0434
0435     if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
0436         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
0437             ring = &adev->gfx.gfx_ring[i];
0438             kfree(adev->gfx.me.mqd_backup[i]);
0439             amdgpu_bo_free_kernel(&ring->mqd_obj,
0440                           &ring->mqd_gpu_addr,
0441                           &ring->mqd_ptr);
0442         }
0443     }
0444
0445     for (i = 0; i < adev->gfx.num_compute_rings; i++) {
0446         ring = &adev->gfx.compute_ring[i];
0447         kfree(adev->gfx.mec.mqd_backup[i]);
0448         amdgpu_bo_free_kernel(&ring->mqd_obj,
0449                       &ring->mqd_gpu_addr,
0450                       &ring->mqd_ptr);
0451     }
0452
0453     ring = &adev->gfx.kiq.ring;
0454     kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]);
0455     amdgpu_bo_free_kernel(&ring->mqd_obj,
0456                   &ring->mqd_gpu_addr,
0457                   &ring->mqd_ptr);
0458 }
0459
0460 int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev)
0461 {
0462     struct amdgpu_kiq *kiq = &adev->gfx.kiq;
0463     struct amdgpu_ring *kiq_ring = &kiq->ring;
0464     int i, r = 0;
0465
0466     if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
0467         return -EINVAL;
0468
0469     spin_lock(&adev->gfx.kiq.ring_lock);
0470     if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
0471                     adev->gfx.num_compute_rings)) {
0472         spin_unlock(&adev->gfx.kiq.ring_lock);
0473         return -ENOMEM;
0474     }
0475
0476     for (i = 0; i < adev->gfx.num_compute_rings; i++)
0477         kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.compute_ring[i],
0478                        RESET_QUEUES, 0, 0);
0479
0480     if (adev->gfx.kiq.ring.sched.ready)
0481         r = amdgpu_ring_test_helper(kiq_ring);
0482     spin_unlock(&adev->gfx.kiq.ring_lock);
0483
0484     return r;
0485 }
0486
0487 int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
0488                     int queue_bit)
0489 {
0490     int mec, pipe, queue;
0491     int set_resource_bit = 0;
0492
0493     amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
0494
0495     set_resource_bit = mec * 4 * 8 + pipe * 8 + queue;
0496
0497     return set_resource_bit;
0498 }
0499
0500 int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev)
0501 {
0502     struct amdgpu_kiq *kiq = &adev->gfx.kiq;
0503     struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
0504     uint64_t queue_mask = 0;
0505     int r, i;
0506
0507     if (!kiq->pmf || !kiq->pmf->kiq_map_queues || !kiq->pmf->kiq_set_resources)
0508         return -EINVAL;
0509
0510     for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
0511         if (!test_bit(i, adev->gfx.mec.queue_bitmap))
0512             continue;
0513
0514         /* This situation may be hit in the future if a new HW
0515          * generation exposes more than 64 queues. If so, the
0516          * definition of queue_mask needs updating */
0517         if (WARN_ON(i > (sizeof(queue_mask)*8))) {
0518             DRM_ERROR("Invalid KCQ enabled: %d\n", i);
0519             break;
0520         }
0521
0522         queue_mask |= (1ull << amdgpu_queue_mask_bit_to_set_resource_bit(adev, i));
0523     }
0524
0525     DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe,
0526                             kiq_ring->queue);
0527     spin_lock(&adev->gfx.kiq.ring_lock);
0528     r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
0529                     adev->gfx.num_compute_rings +
0530                     kiq->pmf->set_resources_size);
0531     if (r) {
0532         DRM_ERROR("Failed to lock KIQ (%d).\n", r);
0533         spin_unlock(&adev->gfx.kiq.ring_lock);
0534         return r;
0535     }
0536
0537     if (adev->enable_mes)
0538         queue_mask = ~0ULL;
0539
0540     kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
0541     for (i = 0; i < adev->gfx.num_compute_rings; i++)
0542         kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.compute_ring[i]);
0543
0544     r = amdgpu_ring_test_helper(kiq_ring);
0545     spin_unlock(&adev->gfx.kiq.ring_lock);
0546     if (r)
0547         DRM_ERROR("KCQ enable failed\n");
0548
0549     return r;
0550 }
0551
0552 /* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable
0553  *
0554  * @adev: amdgpu_device pointer
0555  * @bool enable true: enable gfx off feature, false: disable gfx off feature
0556  *
0557  * 1. gfx off feature will be enabled by gfx ip after gfx cg gp enabled.
0558  * 2. other client can send request to disable gfx off feature, the request should be honored.
0559  * 3. other client can cancel their request of disable gfx off feature
0560  * 4. other client should not send request to enable gfx off feature before disable gfx off feature.
0561  */
0562
0563 void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
0564 {
0565     unsigned long delay = GFX_OFF_DELAY_ENABLE;
0566
0567     if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
0568         return;
0569
0570     mutex_lock(&adev->gfx.gfx_off_mutex);
0571
0572     if (enable) {
0573         /* If the count is already 0, it means there's an imbalance bug somewhere.
0574          * Note that the bug may be in a different caller than the one which triggers the
0575          * WARN_ON_ONCE.
0576          */
0577         if (WARN_ON_ONCE(adev->gfx.gfx_off_req_count == 0))
0578             goto unlock;
0579
0580         adev->gfx.gfx_off_req_count--;
0581
0582         if (adev->gfx.gfx_off_req_count == 0 &&
0583             !adev->gfx.gfx_off_state) {
0584             /* If going to s2idle, no need to wait */
0585             if (adev->in_s0ix)
0586                 delay = GFX_OFF_NO_DELAY;
0587             schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
0588                           delay);
0589         }
0590     } else {
0591         if (adev->gfx.gfx_off_req_count == 0) {
0592             cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
0593
0594             if (adev->gfx.gfx_off_state &&
0595                 !amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false)) {
0596                 adev->gfx.gfx_off_state = false;
0597
0598                 if (adev->gfx.funcs->init_spm_golden) {
0599                     dev_dbg(adev->dev,
0600                         "GFXOFF is disabled, re-init SPM golden settings\n");
0601                     amdgpu_gfx_init_spm_golden(adev);
0602                 }
0603             }
0604         }
0605
0606         adev->gfx.gfx_off_req_count++;
0607     }
0608
0609 unlock:
0610     mutex_unlock(&adev->gfx.gfx_off_mutex);
0611 }
0612
0613 int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value)
0614 {
0615
0616     int r = 0;
0617
0618     mutex_lock(&adev->gfx.gfx_off_mutex);
0619
0620     r = amdgpu_dpm_get_status_gfxoff(adev, value);
0621
0622     mutex_unlock(&adev->gfx.gfx_off_mutex);
0623
0624     return r;
0625 }
0626
0627 int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
0628 {
0629     int r;
0630
0631     if (amdgpu_ras_is_supported(adev, ras_block->block)) {
0632         if (!amdgpu_persistent_edc_harvesting_supported(adev))
0633             amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX);
0634
0635         r = amdgpu_ras_block_late_init(adev, ras_block);
0636         if (r)
0637             return r;
0638
0639         r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
0640         if (r)
0641             goto late_fini;
0642     } else {
0643         amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0);
0644     }
0645
0646     return 0;
0647 late_fini:
0648     amdgpu_ras_block_late_fini(adev, ras_block);
0649     return r;
0650 }
0651
0652 int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
0653         void *err_data,
0654         struct amdgpu_iv_entry *entry)
0655 {
0656     /* TODO ue will trigger an interrupt.
0657      *
0658      * When “Full RAS” is enabled, the per-IP interrupt sources should
0659      * be disabled and the driver should only look for the aggregated
0660      * interrupt via sync flood
0661      */
0662     if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
0663         kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
0664         if (adev->gfx.ras && adev->gfx.ras->ras_block.hw_ops &&
0665             adev->gfx.ras->ras_block.hw_ops->query_ras_error_count)
0666             adev->gfx.ras->ras_block.hw_ops->query_ras_error_count(adev, err_data);
0667         amdgpu_ras_reset_gpu(adev);
0668     }
0669     return AMDGPU_RAS_SUCCESS;
0670 }
0671
0672 int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
0673                   struct amdgpu_irq_src *source,
0674                   struct amdgpu_iv_entry *entry)
0675 {
0676     struct ras_common_if *ras_if = adev->gfx.ras_if;
0677     struct ras_dispatch_if ih_data = {
0678         .entry = entry,
0679     };
0680
0681     if (!ras_if)
0682         return 0;
0683
0684     ih_data.head = *ras_if;
0685
0686     DRM_ERROR("CP ECC ERROR IRQ\n");
0687     amdgpu_ras_interrupt_dispatch(adev, &ih_data);
0688     return 0;
0689 }
0690
0691 uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
0692 {
0693     signed long r, cnt = 0;
0694     unsigned long flags;
0695     uint32_t seq, reg_val_offs = 0, value = 0;
0696     struct amdgpu_kiq *kiq = &adev->gfx.kiq;
0697     struct amdgpu_ring *ring = &kiq->ring;
0698
0699     if (amdgpu_device_skip_hw_access(adev))
0700         return 0;
0701
0702     if (adev->mes.ring.sched.ready)
0703         return amdgpu_mes_rreg(adev, reg);
0704
0705     BUG_ON(!ring->funcs->emit_rreg);
0706
0707     spin_lock_irqsave(&kiq->ring_lock, flags);
0708     if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
0709         pr_err("critical bug! too many kiq readers\n");
0710         goto failed_unlock;
0711     }
0712     amdgpu_ring_alloc(ring, 32);
0713     amdgpu_ring_emit_rreg(ring, reg, reg_val_offs);
0714     r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
0715     if (r)
0716         goto failed_undo;
0717
0718     amdgpu_ring_commit(ring);
0719     spin_unlock_irqrestore(&kiq->ring_lock, flags);
0720
0721     r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
0722
0723     /* don't wait anymore for gpu reset case because this way may
0724      * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
0725      * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
0726      * never return if we keep waiting in virt_kiq_rreg, which cause
0727      * gpu_recover() hang there.
0728      *
0729      * also don't wait anymore for IRQ context
0730      * */
0731     if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
0732         goto failed_kiq_read;
0733
0734     might_sleep();
0735     while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
0736         msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
0737         r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
0738     }
0739
0740     if (cnt > MAX_KIQ_REG_TRY)
0741         goto failed_kiq_read;
0742
0743     mb();
0744     value = adev->wb.wb[reg_val_offs];
0745     amdgpu_device_wb_free(adev, reg_val_offs);
0746     return value;
0747
0748 failed_undo:
0749     amdgpu_ring_undo(ring);
0750 failed_unlock:
0751     spin_unlock_irqrestore(&kiq->ring_lock, flags);
0752 failed_kiq_read:
0753     if (reg_val_offs)
0754         amdgpu_device_wb_free(adev, reg_val_offs);
0755     dev_err(adev->dev, "failed to read reg:%x\n", reg);
0756     return ~0;
0757 }
0758
0759 void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
0760 {
0761     signed long r, cnt = 0;
0762     unsigned long flags;
0763     uint32_t seq;
0764     struct amdgpu_kiq *kiq = &adev->gfx.kiq;
0765     struct amdgpu_ring *ring = &kiq->ring;
0766
0767     BUG_ON(!ring->funcs->emit_wreg);
0768
0769     if (amdgpu_device_skip_hw_access(adev))
0770         return;
0771
0772     if (adev->mes.ring.sched.ready) {
0773         amdgpu_mes_wreg(adev, reg, v);
0774         return;
0775     }
0776
0777     spin_lock_irqsave(&kiq->ring_lock, flags);
0778     amdgpu_ring_alloc(ring, 32);
0779     amdgpu_ring_emit_wreg(ring, reg, v);
0780     r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
0781     if (r)
0782         goto failed_undo;
0783
0784     amdgpu_ring_commit(ring);
0785     spin_unlock_irqrestore(&kiq->ring_lock, flags);
0786
0787     r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
0788
0789     /* don't wait anymore for gpu reset case because this way may
0790      * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
0791      * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
0792      * never return if we keep waiting in virt_kiq_rreg, which cause
0793      * gpu_recover() hang there.
0794      *
0795      * also don't wait anymore for IRQ context
0796      * */
0797     if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
0798         goto failed_kiq_write;
0799
0800     might_sleep();
0801     while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
0802
0803         msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
0804         r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
0805     }
0806
0807     if (cnt > MAX_KIQ_REG_TRY)
0808         goto failed_kiq_write;
0809
0810     return;
0811
0812 failed_undo:
0813     amdgpu_ring_undo(ring);
0814     spin_unlock_irqrestore(&kiq->ring_lock, flags);
0815 failed_kiq_write:
0816     dev_err(adev->dev, "failed to write reg:%x\n", reg);
0817 }
0818
0819 int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev)
0820 {
0821     if (amdgpu_num_kcq == -1) {
0822         return 8;
0823     } else if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) {
0824         dev_warn(adev->dev, "set kernel compute queue number to 8 due to invalid parameter provided by user\n");
0825         return 8;
0826     }
0827     return amdgpu_num_kcq;
0828 }