amd/amdgpu/amdgpu_ctx.c

0001 /*
0002  * Copyright 2015 Advanced Micro Devices, Inc.
0003  *
0004  * Permission is hereby granted, free of charge, to any person obtaining a
0005  * copy of this software and associated documentation files (the "Software"),
0006  * to deal in the Software without restriction, including without limitation
0007  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
0008  * and/or sell copies of the Software, and to permit persons to whom the
0009  * Software is furnished to do so, subject to the following conditions:
0010  *
0011  * The above copyright notice and this permission notice shall be included in
0012  * all copies or substantial portions of the Software.
0013  *
0014  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
0015  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
0016  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
0017  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
0018  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
0019  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
0020  * OTHER DEALINGS IN THE SOFTWARE.
0021  *
0022  * Authors: monk liu <monk.liu@amd.com>
0023  */
0024
0025 #include <drm/drm_auth.h>
0026 #include <drm/drm_drv.h>
0027 #include "amdgpu.h"
0028 #include "amdgpu_sched.h"
0029 #include "amdgpu_ras.h"
0030 #include <linux/nospec.h>
0031
0032 #define to_amdgpu_ctx_entity(e) \
0033     container_of((e), struct amdgpu_ctx_entity, entity)
0034
0035 const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
0036     [AMDGPU_HW_IP_GFX]  =   1,
0037     [AMDGPU_HW_IP_COMPUTE]  =   4,
0038     [AMDGPU_HW_IP_DMA]  =   2,
0039     [AMDGPU_HW_IP_UVD]  =   1,
0040     [AMDGPU_HW_IP_VCE]  =   1,
0041     [AMDGPU_HW_IP_UVD_ENC]  =   1,
0042     [AMDGPU_HW_IP_VCN_DEC]  =   1,
0043     [AMDGPU_HW_IP_VCN_ENC]  =   1,
0044     [AMDGPU_HW_IP_VCN_JPEG] =   1,
0045 };
0046
0047 bool amdgpu_ctx_priority_is_valid(int32_t ctx_prio)
0048 {
0049     switch (ctx_prio) {
0050     case AMDGPU_CTX_PRIORITY_UNSET:
0051     case AMDGPU_CTX_PRIORITY_VERY_LOW:
0052     case AMDGPU_CTX_PRIORITY_LOW:
0053     case AMDGPU_CTX_PRIORITY_NORMAL:
0054     case AMDGPU_CTX_PRIORITY_HIGH:
0055     case AMDGPU_CTX_PRIORITY_VERY_HIGH:
0056         return true;
0057     default:
0058         return false;
0059     }
0060 }
0061
0062 static enum drm_sched_priority
0063 amdgpu_ctx_to_drm_sched_prio(int32_t ctx_prio)
0064 {
0065     switch (ctx_prio) {
0066     case AMDGPU_CTX_PRIORITY_UNSET:
0067         return DRM_SCHED_PRIORITY_UNSET;
0068
0069     case AMDGPU_CTX_PRIORITY_VERY_LOW:
0070         return DRM_SCHED_PRIORITY_MIN;
0071
0072     case AMDGPU_CTX_PRIORITY_LOW:
0073         return DRM_SCHED_PRIORITY_MIN;
0074
0075     case AMDGPU_CTX_PRIORITY_NORMAL:
0076         return DRM_SCHED_PRIORITY_NORMAL;
0077
0078     case AMDGPU_CTX_PRIORITY_HIGH:
0079         return DRM_SCHED_PRIORITY_HIGH;
0080
0081     case AMDGPU_CTX_PRIORITY_VERY_HIGH:
0082         return DRM_SCHED_PRIORITY_HIGH;
0083
0084     /* This should not happen as we sanitized userspace provided priority
0085      * already, WARN if this happens.
0086      */
0087     default:
0088         WARN(1, "Invalid context priority %d\n", ctx_prio);
0089         return DRM_SCHED_PRIORITY_NORMAL;
0090     }
0091
0092 }
0093
0094 static int amdgpu_ctx_priority_permit(struct drm_file *filp,
0095                       int32_t priority)
0096 {
0097     if (!amdgpu_ctx_priority_is_valid(priority))
0098         return -EINVAL;
0099
0100     /* NORMAL and below are accessible by everyone */
0101     if (priority <= AMDGPU_CTX_PRIORITY_NORMAL)
0102         return 0;
0103
0104     if (capable(CAP_SYS_NICE))
0105         return 0;
0106
0107     if (drm_is_current_master(filp))
0108         return 0;
0109
0110     return -EACCES;
0111 }
0112
0113 static enum amdgpu_gfx_pipe_priority amdgpu_ctx_prio_to_gfx_pipe_prio(int32_t prio)
0114 {
0115     switch (prio) {
0116     case AMDGPU_CTX_PRIORITY_HIGH:
0117     case AMDGPU_CTX_PRIORITY_VERY_HIGH:
0118         return AMDGPU_GFX_PIPE_PRIO_HIGH;
0119     default:
0120         return AMDGPU_GFX_PIPE_PRIO_NORMAL;
0121     }
0122 }
0123
0124 static enum amdgpu_ring_priority_level amdgpu_ctx_sched_prio_to_ring_prio(int32_t prio)
0125 {
0126     switch (prio) {
0127     case AMDGPU_CTX_PRIORITY_HIGH:
0128         return AMDGPU_RING_PRIO_1;
0129     case AMDGPU_CTX_PRIORITY_VERY_HIGH:
0130         return AMDGPU_RING_PRIO_2;
0131     default:
0132         return AMDGPU_RING_PRIO_0;
0133     }
0134 }
0135
0136 static unsigned int amdgpu_ctx_get_hw_prio(struct amdgpu_ctx *ctx, u32 hw_ip)
0137 {
0138     struct amdgpu_device *adev = ctx->mgr->adev;
0139     unsigned int hw_prio;
0140     int32_t ctx_prio;
0141
0142     ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
0143             ctx->init_priority : ctx->override_priority;
0144
0145     switch (hw_ip) {
0146     case AMDGPU_HW_IP_GFX:
0147     case AMDGPU_HW_IP_COMPUTE:
0148         hw_prio = amdgpu_ctx_prio_to_gfx_pipe_prio(ctx_prio);
0149         break;
0150     case AMDGPU_HW_IP_VCE:
0151     case AMDGPU_HW_IP_VCN_ENC:
0152         hw_prio = amdgpu_ctx_sched_prio_to_ring_prio(ctx_prio);
0153         break;
0154     default:
0155         hw_prio = AMDGPU_RING_PRIO_DEFAULT;
0156         break;
0157     }
0158
0159     hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM);
0160     if (adev->gpu_sched[hw_ip][hw_prio].num_scheds == 0)
0161         hw_prio = AMDGPU_RING_PRIO_DEFAULT;
0162
0163     return hw_prio;
0164 }
0165
0166 /* Calculate the time spend on the hw */
0167 static ktime_t amdgpu_ctx_fence_time(struct dma_fence *fence)
0168 {
0169     struct drm_sched_fence *s_fence;
0170
0171     if (!fence)
0172         return ns_to_ktime(0);
0173
0174     /* When the fence is not even scheduled it can't have spend time */
0175     s_fence = to_drm_sched_fence(fence);
0176     if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->scheduled.flags))
0177         return ns_to_ktime(0);
0178
0179     /* When it is still running account how much already spend */
0180     if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->finished.flags))
0181         return ktime_sub(ktime_get(), s_fence->scheduled.timestamp);
0182
0183     return ktime_sub(s_fence->finished.timestamp,
0184              s_fence->scheduled.timestamp);
0185 }
0186
0187 static ktime_t amdgpu_ctx_entity_time(struct amdgpu_ctx *ctx,
0188                       struct amdgpu_ctx_entity *centity)
0189 {
0190     ktime_t res = ns_to_ktime(0);
0191     uint32_t i;
0192
0193     spin_lock(&ctx->ring_lock);
0194     for (i = 0; i < amdgpu_sched_jobs; i++) {
0195         res = ktime_add(res, amdgpu_ctx_fence_time(centity->fences[i]));
0196     }
0197     spin_unlock(&ctx->ring_lock);
0198     return res;
0199 }
0200
0201 static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,
0202                   const u32 ring)
0203 {
0204     struct drm_gpu_scheduler **scheds = NULL, *sched = NULL;
0205     struct amdgpu_device *adev = ctx->mgr->adev;
0206     struct amdgpu_ctx_entity *entity;
0207     enum drm_sched_priority drm_prio;
0208     unsigned int hw_prio, num_scheds;
0209     int32_t ctx_prio;
0210     int r;
0211
0212     entity = kzalloc(struct_size(entity, fences, amdgpu_sched_jobs),
0213              GFP_KERNEL);
0214     if (!entity)
0215         return  -ENOMEM;
0216
0217     ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
0218             ctx->init_priority : ctx->override_priority;
0219     entity->hw_ip = hw_ip;
0220     entity->sequence = 1;
0221     hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip);
0222     drm_prio = amdgpu_ctx_to_drm_sched_prio(ctx_prio);
0223
0224     hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM);
0225     scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
0226     num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;
0227
0228     /* disable load balance if the hw engine retains context among dependent jobs */
0229     if (hw_ip == AMDGPU_HW_IP_VCN_ENC ||
0230         hw_ip == AMDGPU_HW_IP_VCN_DEC ||
0231         hw_ip == AMDGPU_HW_IP_UVD_ENC ||
0232         hw_ip == AMDGPU_HW_IP_UVD) {
0233         sched = drm_sched_pick_best(scheds, num_scheds);
0234         scheds = &sched;
0235         num_scheds = 1;
0236     }
0237
0238     r = drm_sched_entity_init(&entity->entity, drm_prio, scheds, num_scheds,
0239                   &ctx->guilty);
0240     if (r)
0241         goto error_free_entity;
0242
0243     /* It's not an error if we fail to install the new entity */
0244     if (cmpxchg(&ctx->entities[hw_ip][ring], NULL, entity))
0245         goto cleanup_entity;
0246
0247     return 0;
0248
0249 cleanup_entity:
0250     drm_sched_entity_fini(&entity->entity);
0251
0252 error_free_entity:
0253     kfree(entity);
0254
0255     return r;
0256 }
0257
0258 static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
0259 {
0260     ktime_t res = ns_to_ktime(0);
0261     int i;
0262
0263     if (!entity)
0264         return res;
0265
0266     for (i = 0; i < amdgpu_sched_jobs; ++i) {
0267         res = ktime_add(res, amdgpu_ctx_fence_time(entity->fences[i]));
0268         dma_fence_put(entity->fences[i]);
0269     }
0270
0271     kfree(entity);
0272     return res;
0273 }
0274
0275 static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx,
0276                     u32 *stable_pstate)
0277 {
0278     struct amdgpu_device *adev = ctx->mgr->adev;
0279     enum amd_dpm_forced_level current_level;
0280
0281     current_level = amdgpu_dpm_get_performance_level(adev);
0282
0283     switch (current_level) {
0284     case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD:
0285         *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_STANDARD;
0286         break;
0287     case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK:
0288         *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK;
0289         break;
0290     case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK:
0291         *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK;
0292         break;
0293     case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK:
0294         *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_PEAK;
0295         break;
0296     default:
0297         *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_NONE;
0298         break;
0299     }
0300     return 0;
0301 }
0302
0303 static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority,
0304                struct drm_file *filp, struct amdgpu_ctx *ctx)
0305 {
0306     u32 current_stable_pstate;
0307     int r;
0308
0309     r = amdgpu_ctx_priority_permit(filp, priority);
0310     if (r)
0311         return r;
0312
0313     memset(ctx, 0, sizeof(*ctx));
0314
0315     kref_init(&ctx->refcount);
0316     ctx->mgr = mgr;
0317     spin_lock_init(&ctx->ring_lock);
0318     mutex_init(&ctx->lock);
0319
0320     ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter);
0321     ctx->reset_counter_query = ctx->reset_counter;
0322     ctx->vram_lost_counter = atomic_read(&mgr->adev->vram_lost_counter);
0323     ctx->init_priority = priority;
0324     ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET;
0325
0326     r = amdgpu_ctx_get_stable_pstate(ctx, &current_stable_pstate);
0327     if (r)
0328         return r;
0329
0330     ctx->stable_pstate = current_stable_pstate;
0331
0332     return 0;
0333 }
0334
0335 static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx,
0336                     u32 stable_pstate)
0337 {
0338     struct amdgpu_device *adev = ctx->mgr->adev;
0339     enum amd_dpm_forced_level level;
0340     u32 current_stable_pstate;
0341     int r;
0342
0343     mutex_lock(&adev->pm.stable_pstate_ctx_lock);
0344     if (adev->pm.stable_pstate_ctx && adev->pm.stable_pstate_ctx != ctx) {
0345         r = -EBUSY;
0346         goto done;
0347     }
0348
0349     r = amdgpu_ctx_get_stable_pstate(ctx, &current_stable_pstate);
0350     if (r || (stable_pstate == current_stable_pstate))
0351         goto done;
0352
0353     switch (stable_pstate) {
0354     case AMDGPU_CTX_STABLE_PSTATE_NONE:
0355         level = AMD_DPM_FORCED_LEVEL_AUTO;
0356         break;
0357     case AMDGPU_CTX_STABLE_PSTATE_STANDARD:
0358         level = AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD;
0359         break;
0360     case AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK:
0361         level = AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK;
0362         break;
0363     case AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK:
0364         level = AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK;
0365         break;
0366     case AMDGPU_CTX_STABLE_PSTATE_PEAK:
0367         level = AMD_DPM_FORCED_LEVEL_PROFILE_PEAK;
0368         break;
0369     default:
0370         r = -EINVAL;
0371         goto done;
0372     }
0373
0374     r = amdgpu_dpm_force_performance_level(adev, level);
0375
0376     if (level == AMD_DPM_FORCED_LEVEL_AUTO)
0377         adev->pm.stable_pstate_ctx = NULL;
0378     else
0379         adev->pm.stable_pstate_ctx = ctx;
0380 done:
0381     mutex_unlock(&adev->pm.stable_pstate_ctx_lock);
0382
0383     return r;
0384 }
0385
0386 static void amdgpu_ctx_fini(struct kref *ref)
0387 {
0388     struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
0389     struct amdgpu_ctx_mgr *mgr = ctx->mgr;
0390     struct amdgpu_device *adev = mgr->adev;
0391     unsigned i, j, idx;
0392
0393     if (!adev)
0394         return;
0395
0396     for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
0397         for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) {
0398             ktime_t spend;
0399
0400             spend = amdgpu_ctx_fini_entity(ctx->entities[i][j]);
0401             atomic64_add(ktime_to_ns(spend), &mgr->time_spend[i]);
0402         }
0403     }
0404
0405     if (drm_dev_enter(&adev->ddev, &idx)) {
0406         amdgpu_ctx_set_stable_pstate(ctx, ctx->stable_pstate);
0407         drm_dev_exit(idx);
0408     }
0409
0410     mutex_destroy(&ctx->lock);
0411     kfree(ctx);
0412 }
0413
0414 int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
0415               u32 ring, struct drm_sched_entity **entity)
0416 {
0417     int r;
0418
0419     if (hw_ip >= AMDGPU_HW_IP_NUM) {
0420         DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
0421         return -EINVAL;
0422     }
0423
0424     /* Right now all IPs have only one instance - multiple rings. */
0425     if (instance != 0) {
0426         DRM_DEBUG("invalid ip instance: %d\n", instance);
0427         return -EINVAL;
0428     }
0429
0430     if (ring >= amdgpu_ctx_num_entities[hw_ip]) {
0431         DRM_DEBUG("invalid ring: %d %d\n", hw_ip, ring);
0432         return -EINVAL;
0433     }
0434
0435     if (ctx->entities[hw_ip][ring] == NULL) {
0436         r = amdgpu_ctx_init_entity(ctx, hw_ip, ring);
0437         if (r)
0438             return r;
0439     }
0440
0441     *entity = &ctx->entities[hw_ip][ring]->entity;
0442     return 0;
0443 }
0444
0445 static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
0446                 struct amdgpu_fpriv *fpriv,
0447                 struct drm_file *filp,
0448                 int32_t priority,
0449                 uint32_t *id)
0450 {
0451     struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
0452     struct amdgpu_ctx *ctx;
0453     int r;
0454
0455     ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
0456     if (!ctx)
0457         return -ENOMEM;
0458
0459     mutex_lock(&mgr->lock);
0460     r = idr_alloc(&mgr->ctx_handles, ctx, 1, AMDGPU_VM_MAX_NUM_CTX, GFP_KERNEL);
0461     if (r < 0) {
0462         mutex_unlock(&mgr->lock);
0463         kfree(ctx);
0464         return r;
0465     }
0466
0467     *id = (uint32_t)r;
0468     r = amdgpu_ctx_init(mgr, priority, filp, ctx);
0469     if (r) {
0470         idr_remove(&mgr->ctx_handles, *id);
0471         *id = 0;
0472         kfree(ctx);
0473     }
0474     mutex_unlock(&mgr->lock);
0475     return r;
0476 }
0477
0478 static void amdgpu_ctx_do_release(struct kref *ref)
0479 {
0480     struct amdgpu_ctx *ctx;
0481     u32 i, j;
0482
0483     ctx = container_of(ref, struct amdgpu_ctx, refcount);
0484     for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
0485         for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
0486             if (!ctx->entities[i][j])
0487                 continue;
0488
0489             drm_sched_entity_destroy(&ctx->entities[i][j]->entity);
0490         }
0491     }
0492
0493     amdgpu_ctx_fini(ref);
0494 }
0495
0496 static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id)
0497 {
0498     struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
0499     struct amdgpu_ctx *ctx;
0500
0501     mutex_lock(&mgr->lock);
0502     ctx = idr_remove(&mgr->ctx_handles, id);
0503     if (ctx)
0504         kref_put(&ctx->refcount, amdgpu_ctx_do_release);
0505     mutex_unlock(&mgr->lock);
0506     return ctx ? 0 : -EINVAL;
0507 }
0508
0509 static int amdgpu_ctx_query(struct amdgpu_device *adev,
0510                 struct amdgpu_fpriv *fpriv, uint32_t id,
0511                 union drm_amdgpu_ctx_out *out)
0512 {
0513     struct amdgpu_ctx *ctx;
0514     struct amdgpu_ctx_mgr *mgr;
0515     unsigned reset_counter;
0516
0517     if (!fpriv)
0518         return -EINVAL;
0519
0520     mgr = &fpriv->ctx_mgr;
0521     mutex_lock(&mgr->lock);
0522     ctx = idr_find(&mgr->ctx_handles, id);
0523     if (!ctx) {
0524         mutex_unlock(&mgr->lock);
0525         return -EINVAL;
0526     }
0527
0528     /* TODO: these two are always zero */
0529     out->state.flags = 0x0;
0530     out->state.hangs = 0x0;
0531
0532     /* determine if a GPU reset has occured since the last call */
0533     reset_counter = atomic_read(&adev->gpu_reset_counter);
0534     /* TODO: this should ideally return NO, GUILTY, or INNOCENT. */
0535     if (ctx->reset_counter_query == reset_counter)
0536         out->state.reset_status = AMDGPU_CTX_NO_RESET;
0537     else
0538         out->state.reset_status = AMDGPU_CTX_UNKNOWN_RESET;
0539     ctx->reset_counter_query = reset_counter;
0540
0541     mutex_unlock(&mgr->lock);
0542     return 0;
0543 }
0544
0545 #define AMDGPU_RAS_COUNTE_DELAY_MS 3000
0546
0547 static int amdgpu_ctx_query2(struct amdgpu_device *adev,
0548                  struct amdgpu_fpriv *fpriv, uint32_t id,
0549                  union drm_amdgpu_ctx_out *out)
0550 {
0551     struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
0552     struct amdgpu_ctx *ctx;
0553     struct amdgpu_ctx_mgr *mgr;
0554
0555     if (!fpriv)
0556         return -EINVAL;
0557
0558     mgr = &fpriv->ctx_mgr;
0559     mutex_lock(&mgr->lock);
0560     ctx = idr_find(&mgr->ctx_handles, id);
0561     if (!ctx) {
0562         mutex_unlock(&mgr->lock);
0563         return -EINVAL;
0564     }
0565
0566     out->state.flags = 0x0;
0567     out->state.hangs = 0x0;
0568
0569     if (ctx->reset_counter != atomic_read(&adev->gpu_reset_counter))
0570         out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RESET;
0571
0572     if (ctx->vram_lost_counter != atomic_read(&adev->vram_lost_counter))
0573         out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST;
0574
0575     if (atomic_read(&ctx->guilty))
0576         out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY;
0577
0578     if (adev->ras_enabled && con) {
0579         /* Return the cached values in O(1),
0580          * and schedule delayed work to cache
0581          * new vaues.
0582          */
0583         int ce_count, ue_count;
0584
0585         ce_count = atomic_read(&con->ras_ce_count);
0586         ue_count = atomic_read(&con->ras_ue_count);
0587
0588         if (ce_count != ctx->ras_counter_ce) {
0589             ctx->ras_counter_ce = ce_count;
0590             out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_CE;
0591         }
0592
0593         if (ue_count != ctx->ras_counter_ue) {
0594             ctx->ras_counter_ue = ue_count;
0595             out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_UE;
0596         }
0597
0598         schedule_delayed_work(&con->ras_counte_delay_work,
0599                       msecs_to_jiffies(AMDGPU_RAS_COUNTE_DELAY_MS));
0600     }
0601
0602     mutex_unlock(&mgr->lock);
0603     return 0;
0604 }
0605
0606
0607
0608 static int amdgpu_ctx_stable_pstate(struct amdgpu_device *adev,
0609                     struct amdgpu_fpriv *fpriv, uint32_t id,
0610                     bool set, u32 *stable_pstate)
0611 {
0612     struct amdgpu_ctx *ctx;
0613     struct amdgpu_ctx_mgr *mgr;
0614     int r;
0615
0616     if (!fpriv)
0617         return -EINVAL;
0618
0619     mgr = &fpriv->ctx_mgr;
0620     mutex_lock(&mgr->lock);
0621     ctx = idr_find(&mgr->ctx_handles, id);
0622     if (!ctx) {
0623         mutex_unlock(&mgr->lock);
0624         return -EINVAL;
0625     }
0626
0627     if (set)
0628         r = amdgpu_ctx_set_stable_pstate(ctx, *stable_pstate);
0629     else
0630         r = amdgpu_ctx_get_stable_pstate(ctx, stable_pstate);
0631
0632     mutex_unlock(&mgr->lock);
0633     return r;
0634 }
0635
0636 int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
0637              struct drm_file *filp)
0638 {
0639     int r;
0640     uint32_t id, stable_pstate;
0641     int32_t priority;
0642
0643     union drm_amdgpu_ctx *args = data;
0644     struct amdgpu_device *adev = drm_to_adev(dev);
0645     struct amdgpu_fpriv *fpriv = filp->driver_priv;
0646
0647     id = args->in.ctx_id;
0648     priority = args->in.priority;
0649
0650     /* For backwards compatibility reasons, we need to accept
0651      * ioctls with garbage in the priority field */
0652     if (!amdgpu_ctx_priority_is_valid(priority))
0653         priority = AMDGPU_CTX_PRIORITY_NORMAL;
0654
0655     switch (args->in.op) {
0656     case AMDGPU_CTX_OP_ALLOC_CTX:
0657         r = amdgpu_ctx_alloc(adev, fpriv, filp, priority, &id);
0658         args->out.alloc.ctx_id = id;
0659         break;
0660     case AMDGPU_CTX_OP_FREE_CTX:
0661         r = amdgpu_ctx_free(fpriv, id);
0662         break;
0663     case AMDGPU_CTX_OP_QUERY_STATE:
0664         r = amdgpu_ctx_query(adev, fpriv, id, &args->out);
0665         break;
0666     case AMDGPU_CTX_OP_QUERY_STATE2:
0667         r = amdgpu_ctx_query2(adev, fpriv, id, &args->out);
0668         break;
0669     case AMDGPU_CTX_OP_GET_STABLE_PSTATE:
0670         if (args->in.flags)
0671             return -EINVAL;
0672         r = amdgpu_ctx_stable_pstate(adev, fpriv, id, false, &stable_pstate);
0673         if (!r)
0674             args->out.pstate.flags = stable_pstate;
0675         break;
0676     case AMDGPU_CTX_OP_SET_STABLE_PSTATE:
0677         if (args->in.flags & ~AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK)
0678             return -EINVAL;
0679         stable_pstate = args->in.flags & AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK;
0680         if (stable_pstate > AMDGPU_CTX_STABLE_PSTATE_PEAK)
0681             return -EINVAL;
0682         r = amdgpu_ctx_stable_pstate(adev, fpriv, id, true, &stable_pstate);
0683         break;
0684     default:
0685         return -EINVAL;
0686     }
0687
0688     return r;
0689 }
0690
0691 struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id)
0692 {
0693     struct amdgpu_ctx *ctx;
0694     struct amdgpu_ctx_mgr *mgr;
0695
0696     if (!fpriv)
0697         return NULL;
0698
0699     mgr = &fpriv->ctx_mgr;
0700
0701     mutex_lock(&mgr->lock);
0702     ctx = idr_find(&mgr->ctx_handles, id);
0703     if (ctx)
0704         kref_get(&ctx->refcount);
0705     mutex_unlock(&mgr->lock);
0706     return ctx;
0707 }
0708
0709 int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
0710 {
0711     if (ctx == NULL)
0712         return -EINVAL;
0713
0714     kref_put(&ctx->refcount, amdgpu_ctx_do_release);
0715     return 0;
0716 }
0717
0718 uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
0719                   struct drm_sched_entity *entity,
0720                   struct dma_fence *fence)
0721 {
0722     struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
0723     uint64_t seq = centity->sequence;
0724     struct dma_fence *other = NULL;
0725     unsigned idx = 0;
0726
0727     idx = seq & (amdgpu_sched_jobs - 1);
0728     other = centity->fences[idx];
0729     WARN_ON(other && !dma_fence_is_signaled(other));
0730
0731     dma_fence_get(fence);
0732
0733     spin_lock(&ctx->ring_lock);
0734     centity->fences[idx] = fence;
0735     centity->sequence++;
0736     spin_unlock(&ctx->ring_lock);
0737
0738     atomic64_add(ktime_to_ns(amdgpu_ctx_fence_time(other)),
0739              &ctx->mgr->time_spend[centity->hw_ip]);
0740
0741     dma_fence_put(other);
0742     return seq;
0743 }
0744
0745 struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
0746                        struct drm_sched_entity *entity,
0747                        uint64_t seq)
0748 {
0749     struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
0750     struct dma_fence *fence;
0751
0752     spin_lock(&ctx->ring_lock);
0753
0754     if (seq == ~0ull)
0755         seq = centity->sequence - 1;
0756
0757     if (seq >= centity->sequence) {
0758         spin_unlock(&ctx->ring_lock);
0759         return ERR_PTR(-EINVAL);
0760     }
0761
0762
0763     if (seq + amdgpu_sched_jobs < centity->sequence) {
0764         spin_unlock(&ctx->ring_lock);
0765         return NULL;
0766     }
0767
0768     fence = dma_fence_get(centity->fences[seq & (amdgpu_sched_jobs - 1)]);
0769     spin_unlock(&ctx->ring_lock);
0770
0771     return fence;
0772 }
0773
0774 static void amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx,
0775                        struct amdgpu_ctx_entity *aentity,
0776                        int hw_ip,
0777                        int32_t priority)
0778 {
0779     struct amdgpu_device *adev = ctx->mgr->adev;
0780     unsigned int hw_prio;
0781     struct drm_gpu_scheduler **scheds = NULL;
0782     unsigned num_scheds;
0783
0784     /* set sw priority */
0785     drm_sched_entity_set_priority(&aentity->entity,
0786                       amdgpu_ctx_to_drm_sched_prio(priority));
0787
0788     /* set hw priority */
0789     if (hw_ip == AMDGPU_HW_IP_COMPUTE || hw_ip == AMDGPU_HW_IP_GFX) {
0790         hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip);
0791         hw_prio = array_index_nospec(hw_prio, AMDGPU_RING_PRIO_MAX);
0792         scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
0793         num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;
0794         drm_sched_entity_modify_sched(&aentity->entity, scheds,
0795                           num_scheds);
0796     }
0797 }
0798
0799 void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
0800                   int32_t priority)
0801 {
0802     int32_t ctx_prio;
0803     unsigned i, j;
0804
0805     ctx->override_priority = priority;
0806
0807     ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
0808             ctx->init_priority : ctx->override_priority;
0809     for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
0810         for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
0811             if (!ctx->entities[i][j])
0812                 continue;
0813
0814             amdgpu_ctx_set_entity_priority(ctx, ctx->entities[i][j],
0815                                i, ctx_prio);
0816         }
0817     }
0818 }
0819
0820 int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
0821                    struct drm_sched_entity *entity)
0822 {
0823     struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
0824     struct dma_fence *other;
0825     unsigned idx;
0826     long r;
0827
0828     spin_lock(&ctx->ring_lock);
0829     idx = centity->sequence & (amdgpu_sched_jobs - 1);
0830     other = dma_fence_get(centity->fences[idx]);
0831     spin_unlock(&ctx->ring_lock);
0832
0833     if (!other)
0834         return 0;
0835
0836     r = dma_fence_wait(other, true);
0837     if (r < 0 && r != -ERESTARTSYS)
0838         DRM_ERROR("Error (%ld) waiting for fence!\n", r);
0839
0840     dma_fence_put(other);
0841     return r;
0842 }
0843
0844 void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr,
0845              struct amdgpu_device *adev)
0846 {
0847     unsigned int i;
0848
0849     mgr->adev = adev;
0850     mutex_init(&mgr->lock);
0851     idr_init(&mgr->ctx_handles);
0852
0853     for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
0854         atomic64_set(&mgr->time_spend[i], 0);
0855 }
0856
0857 long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
0858 {
0859     struct amdgpu_ctx *ctx;
0860     struct idr *idp;
0861     uint32_t id, i, j;
0862
0863     idp = &mgr->ctx_handles;
0864
0865     mutex_lock(&mgr->lock);
0866     idr_for_each_entry(idp, ctx, id) {
0867         for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
0868             for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
0869                 struct drm_sched_entity *entity;
0870
0871                 if (!ctx->entities[i][j])
0872                     continue;
0873
0874                 entity = &ctx->entities[i][j]->entity;
0875                 timeout = drm_sched_entity_flush(entity, timeout);
0876             }
0877         }
0878     }
0879     mutex_unlock(&mgr->lock);
0880     return timeout;
0881 }
0882
0883 void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
0884 {
0885     struct amdgpu_ctx *ctx;
0886     struct idr *idp;
0887     uint32_t id, i, j;
0888
0889     idp = &mgr->ctx_handles;
0890
0891     idr_for_each_entry(idp, ctx, id) {
0892         if (kref_read(&ctx->refcount) != 1) {
0893             DRM_ERROR("ctx %p is still alive\n", ctx);
0894             continue;
0895         }
0896
0897         for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
0898             for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
0899                 struct drm_sched_entity *entity;
0900
0901                 if (!ctx->entities[i][j])
0902                     continue;
0903
0904                 entity = &ctx->entities[i][j]->entity;
0905                 drm_sched_entity_fini(entity);
0906             }
0907         }
0908     }
0909 }
0910
0911 void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
0912 {
0913     struct amdgpu_ctx *ctx;
0914     struct idr *idp;
0915     uint32_t id;
0916
0917     amdgpu_ctx_mgr_entity_fini(mgr);
0918
0919     idp = &mgr->ctx_handles;
0920
0921     idr_for_each_entry(idp, ctx, id) {
0922         if (kref_put(&ctx->refcount, amdgpu_ctx_fini) != 1)
0923             DRM_ERROR("ctx %p is still alive\n", ctx);
0924     }
0925
0926     idr_destroy(&mgr->ctx_handles);
0927     mutex_destroy(&mgr->lock);
0928 }
0929
0930 void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr,
0931               ktime_t usage[AMDGPU_HW_IP_NUM])
0932 {
0933     struct amdgpu_ctx *ctx;
0934     unsigned int hw_ip, i;
0935     uint32_t id;
0936
0937     /*
0938      * This is a little bit racy because it can be that a ctx or a fence are
0939      * destroyed just in the moment we try to account them. But that is ok
0940      * since exactly that case is explicitely allowed by the interface.
0941      */
0942     mutex_lock(&mgr->lock);
0943     for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
0944         uint64_t ns = atomic64_read(&mgr->time_spend[hw_ip]);
0945
0946         usage[hw_ip] = ns_to_ktime(ns);
0947     }
0948
0949     idr_for_each_entry(&mgr->ctx_handles, ctx, id) {
0950         for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
0951             for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i) {
0952                 struct amdgpu_ctx_entity *centity;
0953                 ktime_t spend;
0954
0955                 centity = ctx->entities[hw_ip][i];
0956                 if (!centity)
0957                     continue;
0958                 spend = amdgpu_ctx_entity_time(ctx, centity);
0959                 usage[hw_ip] = ktime_add(usage[hw_ip], spend);
0960             }
0961         }
0962     }
0963     mutex_unlock(&mgr->lock);
0964 }