drm/scheduler/sched_main.c

0001 /*
0002  * Copyright 2015 Advanced Micro Devices, Inc.
0003  *
0004  * Permission is hereby granted, free of charge, to any person obtaining a
0005  * copy of this software and associated documentation files (the "Software"),
0006  * to deal in the Software without restriction, including without limitation
0007  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
0008  * and/or sell copies of the Software, and to permit persons to whom the
0009  * Software is furnished to do so, subject to the following conditions:
0010  *
0011  * The above copyright notice and this permission notice shall be included in
0012  * all copies or substantial portions of the Software.
0013  *
0014  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
0015  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
0016  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
0017  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
0018  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
0019  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
0020  * OTHER DEALINGS IN THE SOFTWARE.
0021  *
0022  */
0023
0024 /**
0025  * DOC: Overview
0026  *
0027  * The GPU scheduler provides entities which allow userspace to push jobs
0028  * into software queues which are then scheduled on a hardware run queue.
0029  * The software queues have a priority among them. The scheduler selects the entities
0030  * from the run queue using a FIFO. The scheduler provides dependency handling
0031  * features among jobs. The driver is supposed to provide callback functions for
0032  * backend operations to the scheduler like submitting a job to hardware run queue,
0033  * returning the dependencies of a job etc.
0034  *
0035  * The organisation of the scheduler is the following:
0036  *
0037  * 1. Each hw run queue has one scheduler
0038  * 2. Each scheduler has multiple run queues with different priorities
0039  *    (e.g., HIGH_HW,HIGH_SW, KERNEL, NORMAL)
0040  * 3. Each scheduler run queue has a queue of entities to schedule
0041  * 4. Entities themselves maintain a queue of jobs that will be scheduled on
0042  *    the hardware.
0043  *
0044  * The jobs in a entity are always scheduled in the order that they were pushed.
0045  */
0046
0047 #include <linux/kthread.h>
0048 #include <linux/wait.h>
0049 #include <linux/sched.h>
0050 #include <linux/completion.h>
0051 #include <linux/dma-resv.h>
0052 #include <uapi/linux/sched/types.h>
0053
0054 #include <drm/drm_print.h>
0055 #include <drm/drm_gem.h>
0056 #include <drm/gpu_scheduler.h>
0057 #include <drm/spsc_queue.h>
0058
0059 #define CREATE_TRACE_POINTS
0060 #include "gpu_scheduler_trace.h"
0061
0062 #define to_drm_sched_job(sched_job)     \
0063         container_of((sched_job), struct drm_sched_job, queue_node)
0064
0065 /**
0066  * drm_sched_rq_init - initialize a given run queue struct
0067  *
0068  * @sched: scheduler instance to associate with this run queue
0069  * @rq: scheduler run queue
0070  *
0071  * Initializes a scheduler runqueue.
0072  */
0073 static void drm_sched_rq_init(struct drm_gpu_scheduler *sched,
0074                   struct drm_sched_rq *rq)
0075 {
0076     spin_lock_init(&rq->lock);
0077     INIT_LIST_HEAD(&rq->entities);
0078     rq->current_entity = NULL;
0079     rq->sched = sched;
0080 }
0081
0082 /**
0083  * drm_sched_rq_add_entity - add an entity
0084  *
0085  * @rq: scheduler run queue
0086  * @entity: scheduler entity
0087  *
0088  * Adds a scheduler entity to the run queue.
0089  */
0090 void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
0091                  struct drm_sched_entity *entity)
0092 {
0093     if (!list_empty(&entity->list))
0094         return;
0095     spin_lock(&rq->lock);
0096     atomic_inc(rq->sched->score);
0097     list_add_tail(&entity->list, &rq->entities);
0098     spin_unlock(&rq->lock);
0099 }
0100
0101 /**
0102  * drm_sched_rq_remove_entity - remove an entity
0103  *
0104  * @rq: scheduler run queue
0105  * @entity: scheduler entity
0106  *
0107  * Removes a scheduler entity from the run queue.
0108  */
0109 void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
0110                 struct drm_sched_entity *entity)
0111 {
0112     if (list_empty(&entity->list))
0113         return;
0114     spin_lock(&rq->lock);
0115     atomic_dec(rq->sched->score);
0116     list_del_init(&entity->list);
0117     if (rq->current_entity == entity)
0118         rq->current_entity = NULL;
0119     spin_unlock(&rq->lock);
0120 }
0121
0122 /**
0123  * drm_sched_rq_select_entity - Select an entity which could provide a job to run
0124  *
0125  * @rq: scheduler run queue to check.
0126  *
0127  * Try to find a ready entity, returns NULL if none found.
0128  */
0129 static struct drm_sched_entity *
0130 drm_sched_rq_select_entity(struct drm_sched_rq *rq)
0131 {
0132     struct drm_sched_entity *entity;
0133
0134     spin_lock(&rq->lock);
0135
0136     entity = rq->current_entity;
0137     if (entity) {
0138         list_for_each_entry_continue(entity, &rq->entities, list) {
0139             if (drm_sched_entity_is_ready(entity)) {
0140                 rq->current_entity = entity;
0141                 reinit_completion(&entity->entity_idle);
0142                 spin_unlock(&rq->lock);
0143                 return entity;
0144             }
0145         }
0146     }
0147
0148     list_for_each_entry(entity, &rq->entities, list) {
0149
0150         if (drm_sched_entity_is_ready(entity)) {
0151             rq->current_entity = entity;
0152             reinit_completion(&entity->entity_idle);
0153             spin_unlock(&rq->lock);
0154             return entity;
0155         }
0156
0157         if (entity == rq->current_entity)
0158             break;
0159     }
0160
0161     spin_unlock(&rq->lock);
0162
0163     return NULL;
0164 }
0165
0166 /**
0167  * drm_sched_job_done - complete a job
0168  * @s_job: pointer to the job which is done
0169  *
0170  * Finish the job's fence and wake up the worker thread.
0171  */
0172 static void drm_sched_job_done(struct drm_sched_job *s_job)
0173 {
0174     struct drm_sched_fence *s_fence = s_job->s_fence;
0175     struct drm_gpu_scheduler *sched = s_fence->sched;
0176
0177     atomic_dec(&sched->hw_rq_count);
0178     atomic_dec(sched->score);
0179
0180     trace_drm_sched_process_job(s_fence);
0181
0182     dma_fence_get(&s_fence->finished);
0183     drm_sched_fence_finished(s_fence);
0184     dma_fence_put(&s_fence->finished);
0185     wake_up_interruptible(&sched->wake_up_worker);
0186 }
0187
0188 /**
0189  * drm_sched_job_done_cb - the callback for a done job
0190  * @f: fence
0191  * @cb: fence callbacks
0192  */
0193 static void drm_sched_job_done_cb(struct dma_fence *f, struct dma_fence_cb *cb)
0194 {
0195     struct drm_sched_job *s_job = container_of(cb, struct drm_sched_job, cb);
0196
0197     drm_sched_job_done(s_job);
0198 }
0199
0200 /**
0201  * drm_sched_dependency_optimized
0202  *
0203  * @fence: the dependency fence
0204  * @entity: the entity which depends on the above fence
0205  *
0206  * Returns true if the dependency can be optimized and false otherwise
0207  */
0208 bool drm_sched_dependency_optimized(struct dma_fence* fence,
0209                     struct drm_sched_entity *entity)
0210 {
0211     struct drm_gpu_scheduler *sched = entity->rq->sched;
0212     struct drm_sched_fence *s_fence;
0213
0214     if (!fence || dma_fence_is_signaled(fence))
0215         return false;
0216     if (fence->context == entity->fence_context)
0217         return true;
0218     s_fence = to_drm_sched_fence(fence);
0219     if (s_fence && s_fence->sched == sched)
0220         return true;
0221
0222     return false;
0223 }
0224 EXPORT_SYMBOL(drm_sched_dependency_optimized);
0225
0226 /**
0227  * drm_sched_start_timeout - start timeout for reset worker
0228  *
0229  * @sched: scheduler instance to start the worker for
0230  *
0231  * Start the timeout for the given scheduler.
0232  */
0233 static void drm_sched_start_timeout(struct drm_gpu_scheduler *sched)
0234 {
0235     if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
0236         !list_empty(&sched->pending_list))
0237         queue_delayed_work(sched->timeout_wq, &sched->work_tdr, sched->timeout);
0238 }
0239
0240 /**
0241  * drm_sched_fault - immediately start timeout handler
0242  *
0243  * @sched: scheduler where the timeout handling should be started.
0244  *
0245  * Start timeout handling immediately when the driver detects a hardware fault.
0246  */
0247 void drm_sched_fault(struct drm_gpu_scheduler *sched)
0248 {
0249     mod_delayed_work(sched->timeout_wq, &sched->work_tdr, 0);
0250 }
0251 EXPORT_SYMBOL(drm_sched_fault);
0252
0253 /**
0254  * drm_sched_suspend_timeout - Suspend scheduler job timeout
0255  *
0256  * @sched: scheduler instance for which to suspend the timeout
0257  *
0258  * Suspend the delayed work timeout for the scheduler. This is done by
0259  * modifying the delayed work timeout to an arbitrary large value,
0260  * MAX_SCHEDULE_TIMEOUT in this case.
0261  *
0262  * Returns the timeout remaining
0263  *
0264  */
0265 unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched)
0266 {
0267     unsigned long sched_timeout, now = jiffies;
0268
0269     sched_timeout = sched->work_tdr.timer.expires;
0270
0271     /*
0272      * Modify the timeout to an arbitrarily large value. This also prevents
0273      * the timeout to be restarted when new submissions arrive
0274      */
0275     if (mod_delayed_work(sched->timeout_wq, &sched->work_tdr, MAX_SCHEDULE_TIMEOUT)
0276             && time_after(sched_timeout, now))
0277         return sched_timeout - now;
0278     else
0279         return sched->timeout;
0280 }
0281 EXPORT_SYMBOL(drm_sched_suspend_timeout);
0282
0283 /**
0284  * drm_sched_resume_timeout - Resume scheduler job timeout
0285  *
0286  * @sched: scheduler instance for which to resume the timeout
0287  * @remaining: remaining timeout
0288  *
0289  * Resume the delayed work timeout for the scheduler.
0290  */
0291 void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched,
0292         unsigned long remaining)
0293 {
0294     spin_lock(&sched->job_list_lock);
0295
0296     if (list_empty(&sched->pending_list))
0297         cancel_delayed_work(&sched->work_tdr);
0298     else
0299         mod_delayed_work(sched->timeout_wq, &sched->work_tdr, remaining);
0300
0301     spin_unlock(&sched->job_list_lock);
0302 }
0303 EXPORT_SYMBOL(drm_sched_resume_timeout);
0304
0305 static void drm_sched_job_begin(struct drm_sched_job *s_job)
0306 {
0307     struct drm_gpu_scheduler *sched = s_job->sched;
0308
0309     spin_lock(&sched->job_list_lock);
0310     list_add_tail(&s_job->list, &sched->pending_list);
0311     drm_sched_start_timeout(sched);
0312     spin_unlock(&sched->job_list_lock);
0313 }
0314
0315 static void drm_sched_job_timedout(struct work_struct *work)
0316 {
0317     struct drm_gpu_scheduler *sched;
0318     struct drm_sched_job *job;
0319     enum drm_gpu_sched_stat status = DRM_GPU_SCHED_STAT_NOMINAL;
0320
0321     sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
0322
0323     /* Protects against concurrent deletion in drm_sched_get_cleanup_job */
0324     spin_lock(&sched->job_list_lock);
0325     job = list_first_entry_or_null(&sched->pending_list,
0326                        struct drm_sched_job, list);
0327
0328     if (job) {
0329         /*
0330          * Remove the bad job so it cannot be freed by concurrent
0331          * drm_sched_cleanup_jobs. It will be reinserted back after sched->thread
0332          * is parked at which point it's safe.
0333          */
0334         list_del_init(&job->list);
0335         spin_unlock(&sched->job_list_lock);
0336
0337         status = job->sched->ops->timedout_job(job);
0338
0339         /*
0340          * Guilty job did complete and hence needs to be manually removed
0341          * See drm_sched_stop doc.
0342          */
0343         if (sched->free_guilty) {
0344             job->sched->ops->free_job(job);
0345             sched->free_guilty = false;
0346         }
0347     } else {
0348         spin_unlock(&sched->job_list_lock);
0349     }
0350
0351     if (status != DRM_GPU_SCHED_STAT_ENODEV) {
0352         spin_lock(&sched->job_list_lock);
0353         drm_sched_start_timeout(sched);
0354         spin_unlock(&sched->job_list_lock);
0355     }
0356 }
0357
0358  /**
0359   * drm_sched_increase_karma - Update sched_entity guilty flag
0360   *
0361   * @bad: The job guilty of time out
0362   *
0363   * Increment on every hang caused by the 'bad' job. If this exceeds the hang
0364   * limit of the scheduler then the respective sched entity is marked guilty and
0365   * jobs from it will not be scheduled further
0366   */
0367 void drm_sched_increase_karma(struct drm_sched_job *bad)
0368 {
0369     drm_sched_increase_karma_ext(bad, 1);
0370 }
0371 EXPORT_SYMBOL(drm_sched_increase_karma);
0372
0373 void drm_sched_reset_karma(struct drm_sched_job *bad)
0374 {
0375     drm_sched_increase_karma_ext(bad, 0);
0376 }
0377 EXPORT_SYMBOL(drm_sched_reset_karma);
0378
0379 /**
0380  * drm_sched_stop - stop the scheduler
0381  *
0382  * @sched: scheduler instance
0383  * @bad: job which caused the time out
0384  *
0385  * Stop the scheduler and also removes and frees all completed jobs.
0386  * Note: bad job will not be freed as it might be used later and so it's
0387  * callers responsibility to release it manually if it's not part of the
0388  * pending list any more.
0389  *
0390  */
0391 void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad)
0392 {
0393     struct drm_sched_job *s_job, *tmp;
0394
0395     kthread_park(sched->thread);
0396
0397     /*
0398      * Reinsert back the bad job here - now it's safe as
0399      * drm_sched_get_cleanup_job cannot race against us and release the
0400      * bad job at this point - we parked (waited for) any in progress
0401      * (earlier) cleanups and drm_sched_get_cleanup_job will not be called
0402      * now until the scheduler thread is unparked.
0403      */
0404     if (bad && bad->sched == sched)
0405         /*
0406          * Add at the head of the queue to reflect it was the earliest
0407          * job extracted.
0408          */
0409         list_add(&bad->list, &sched->pending_list);
0410
0411     /*
0412      * Iterate the job list from later to  earlier one and either deactive
0413      * their HW callbacks or remove them from pending list if they already
0414      * signaled.
0415      * This iteration is thread safe as sched thread is stopped.
0416      */
0417     list_for_each_entry_safe_reverse(s_job, tmp, &sched->pending_list,
0418                      list) {
0419         if (s_job->s_fence->parent &&
0420             dma_fence_remove_callback(s_job->s_fence->parent,
0421                           &s_job->cb)) {
0422             dma_fence_put(s_job->s_fence->parent);
0423             s_job->s_fence->parent = NULL;
0424             atomic_dec(&sched->hw_rq_count);
0425         } else {
0426             /*
0427              * remove job from pending_list.
0428              * Locking here is for concurrent resume timeout
0429              */
0430             spin_lock(&sched->job_list_lock);
0431             list_del_init(&s_job->list);
0432             spin_unlock(&sched->job_list_lock);
0433
0434             /*
0435              * Wait for job's HW fence callback to finish using s_job
0436              * before releasing it.
0437              *
0438              * Job is still alive so fence refcount at least 1
0439              */
0440             dma_fence_wait(&s_job->s_fence->finished, false);
0441
0442             /*
0443              * We must keep bad job alive for later use during
0444              * recovery by some of the drivers but leave a hint
0445              * that the guilty job must be released.
0446              */
0447             if (bad != s_job)
0448                 sched->ops->free_job(s_job);
0449             else
0450                 sched->free_guilty = true;
0451         }
0452     }
0453
0454     /*
0455      * Stop pending timer in flight as we rearm it in  drm_sched_start. This
0456      * avoids the pending timeout work in progress to fire right away after
0457      * this TDR finished and before the newly restarted jobs had a
0458      * chance to complete.
0459      */
0460     cancel_delayed_work(&sched->work_tdr);
0461 }
0462
0463 EXPORT_SYMBOL(drm_sched_stop);
0464
0465 /**
0466  * drm_sched_start - recover jobs after a reset
0467  *
0468  * @sched: scheduler instance
0469  * @full_recovery: proceed with complete sched restart
0470  *
0471  */
0472 void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery)
0473 {
0474     struct drm_sched_job *s_job, *tmp;
0475     int r;
0476
0477     /*
0478      * Locking the list is not required here as the sched thread is parked
0479      * so no new jobs are being inserted or removed. Also concurrent
0480      * GPU recovers can't run in parallel.
0481      */
0482     list_for_each_entry_safe(s_job, tmp, &sched->pending_list, list) {
0483         struct dma_fence *fence = s_job->s_fence->parent;
0484
0485         atomic_inc(&sched->hw_rq_count);
0486
0487         if (!full_recovery)
0488             continue;
0489
0490         if (fence) {
0491             r = dma_fence_add_callback(fence, &s_job->cb,
0492                            drm_sched_job_done_cb);
0493             if (r == -ENOENT)
0494                 drm_sched_job_done(s_job);
0495             else if (r)
0496                 DRM_DEV_ERROR(sched->dev, "fence add callback failed (%d)\n",
0497                       r);
0498         } else
0499             drm_sched_job_done(s_job);
0500     }
0501
0502     if (full_recovery) {
0503         spin_lock(&sched->job_list_lock);
0504         drm_sched_start_timeout(sched);
0505         spin_unlock(&sched->job_list_lock);
0506     }
0507
0508     kthread_unpark(sched->thread);
0509 }
0510 EXPORT_SYMBOL(drm_sched_start);
0511
0512 /**
0513  * drm_sched_resubmit_jobs - helper to relaunch jobs from the pending list
0514  *
0515  * @sched: scheduler instance
0516  *
0517  */
0518 void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched)
0519 {
0520     drm_sched_resubmit_jobs_ext(sched, INT_MAX);
0521 }
0522 EXPORT_SYMBOL(drm_sched_resubmit_jobs);
0523
0524 /**
0525  * drm_sched_resubmit_jobs_ext - helper to relunch certain number of jobs from mirror ring list
0526  *
0527  * @sched: scheduler instance
0528  * @max: job numbers to relaunch
0529  *
0530  */
0531 void drm_sched_resubmit_jobs_ext(struct drm_gpu_scheduler *sched, int max)
0532 {
0533     struct drm_sched_job *s_job, *tmp;
0534     uint64_t guilty_context;
0535     bool found_guilty = false;
0536     struct dma_fence *fence;
0537     int i = 0;
0538
0539     list_for_each_entry_safe(s_job, tmp, &sched->pending_list, list) {
0540         struct drm_sched_fence *s_fence = s_job->s_fence;
0541
0542         if (i >= max)
0543             break;
0544
0545         if (!found_guilty && atomic_read(&s_job->karma) > sched->hang_limit) {
0546             found_guilty = true;
0547             guilty_context = s_job->s_fence->scheduled.context;
0548         }
0549
0550         if (found_guilty && s_job->s_fence->scheduled.context == guilty_context)
0551             dma_fence_set_error(&s_fence->finished, -ECANCELED);
0552
0553         fence = sched->ops->run_job(s_job);
0554         i++;
0555
0556         if (IS_ERR_OR_NULL(fence)) {
0557             if (IS_ERR(fence))
0558                 dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
0559
0560             s_job->s_fence->parent = NULL;
0561         } else {
0562
0563             s_job->s_fence->parent = dma_fence_get(fence);
0564
0565             /* Drop for orignal kref_init */
0566             dma_fence_put(fence);
0567         }
0568     }
0569 }
0570 EXPORT_SYMBOL(drm_sched_resubmit_jobs_ext);
0571
0572 /**
0573  * drm_sched_job_init - init a scheduler job
0574  * @job: scheduler job to init
0575  * @entity: scheduler entity to use
0576  * @owner: job owner for debugging
0577  *
0578  * Refer to drm_sched_entity_push_job() documentation
0579  * for locking considerations.
0580  *
0581  * Drivers must make sure drm_sched_job_cleanup() if this function returns
0582  * successfully, even when @job is aborted before drm_sched_job_arm() is called.
0583  *
0584  * WARNING: amdgpu abuses &drm_sched.ready to signal when the hardware
0585  * has died, which can mean that there's no valid runqueue for a @entity.
0586  * This function returns -ENOENT in this case (which probably should be -EIO as
0587  * a more meanigful return value).
0588  *
0589  * Returns 0 for success, negative error code otherwise.
0590  */
0591 int drm_sched_job_init(struct drm_sched_job *job,
0592                struct drm_sched_entity *entity,
0593                void *owner)
0594 {
0595     drm_sched_entity_select_rq(entity);
0596     if (!entity->rq)
0597         return -ENOENT;
0598
0599     job->entity = entity;
0600     job->s_fence = drm_sched_fence_alloc(entity, owner);
0601     if (!job->s_fence)
0602         return -ENOMEM;
0603
0604     INIT_LIST_HEAD(&job->list);
0605
0606     xa_init_flags(&job->dependencies, XA_FLAGS_ALLOC);
0607
0608     return 0;
0609 }
0610 EXPORT_SYMBOL(drm_sched_job_init);
0611
0612 /**
0613  * drm_sched_job_arm - arm a scheduler job for execution
0614  * @job: scheduler job to arm
0615  *
0616  * This arms a scheduler job for execution. Specifically it initializes the
0617  * &drm_sched_job.s_fence of @job, so that it can be attached to struct dma_resv
0618  * or other places that need to track the completion of this job.
0619  *
0620  * Refer to drm_sched_entity_push_job() documentation for locking
0621  * considerations.
0622  *
0623  * This can only be called if drm_sched_job_init() succeeded.
0624  */
0625 void drm_sched_job_arm(struct drm_sched_job *job)
0626 {
0627     struct drm_gpu_scheduler *sched;
0628     struct drm_sched_entity *entity = job->entity;
0629
0630     BUG_ON(!entity);
0631
0632     sched = entity->rq->sched;
0633
0634     job->sched = sched;
0635     job->s_priority = entity->rq - sched->sched_rq;
0636     job->id = atomic64_inc_return(&sched->job_id_count);
0637
0638     drm_sched_fence_init(job->s_fence, job->entity);
0639 }
0640 EXPORT_SYMBOL(drm_sched_job_arm);
0641
0642 /**
0643  * drm_sched_job_add_dependency - adds the fence as a job dependency
0644  * @job: scheduler job to add the dependencies to
0645  * @fence: the dma_fence to add to the list of dependencies.
0646  *
0647  * Note that @fence is consumed in both the success and error cases.
0648  *
0649  * Returns:
0650  * 0 on success, or an error on failing to expand the array.
0651  */
0652 int drm_sched_job_add_dependency(struct drm_sched_job *job,
0653                  struct dma_fence *fence)
0654 {
0655     struct dma_fence *entry;
0656     unsigned long index;
0657     u32 id = 0;
0658     int ret;
0659
0660     if (!fence)
0661         return 0;
0662
0663     /* Deduplicate if we already depend on a fence from the same context.
0664      * This lets the size of the array of deps scale with the number of
0665      * engines involved, rather than the number of BOs.
0666      */
0667     xa_for_each(&job->dependencies, index, entry) {
0668         if (entry->context != fence->context)
0669             continue;
0670
0671         if (dma_fence_is_later(fence, entry)) {
0672             dma_fence_put(entry);
0673             xa_store(&job->dependencies, index, fence, GFP_KERNEL);
0674         } else {
0675             dma_fence_put(fence);
0676         }
0677         return 0;
0678     }
0679
0680     ret = xa_alloc(&job->dependencies, &id, fence, xa_limit_32b, GFP_KERNEL);
0681     if (ret != 0)
0682         dma_fence_put(fence);
0683
0684     return ret;
0685 }
0686 EXPORT_SYMBOL(drm_sched_job_add_dependency);
0687
0688 /**
0689  * drm_sched_job_add_implicit_dependencies - adds implicit dependencies as job
0690  *   dependencies
0691  * @job: scheduler job to add the dependencies to
0692  * @obj: the gem object to add new dependencies from.
0693  * @write: whether the job might write the object (so we need to depend on
0694  * shared fences in the reservation object).
0695  *
0696  * This should be called after drm_gem_lock_reservations() on your array of
0697  * GEM objects used in the job but before updating the reservations with your
0698  * own fences.
0699  *
0700  * Returns:
0701  * 0 on success, or an error on failing to expand the array.
0702  */
0703 int drm_sched_job_add_implicit_dependencies(struct drm_sched_job *job,
0704                         struct drm_gem_object *obj,
0705                         bool write)
0706 {
0707     struct dma_resv_iter cursor;
0708     struct dma_fence *fence;
0709     int ret;
0710
0711     dma_resv_assert_held(obj->resv);
0712
0713     dma_resv_for_each_fence(&cursor, obj->resv, dma_resv_usage_rw(write),
0714                 fence) {
0715         /* Make sure to grab an additional ref on the added fence */
0716         dma_fence_get(fence);
0717         ret = drm_sched_job_add_dependency(job, fence);
0718         if (ret) {
0719             dma_fence_put(fence);
0720             return ret;
0721         }
0722     }
0723     return 0;
0724 }
0725 EXPORT_SYMBOL(drm_sched_job_add_implicit_dependencies);
0726
0727
0728 /**
0729  * drm_sched_job_cleanup - clean up scheduler job resources
0730  * @job: scheduler job to clean up
0731  *
0732  * Cleans up the resources allocated with drm_sched_job_init().
0733  *
0734  * Drivers should call this from their error unwind code if @job is aborted
0735  * before drm_sched_job_arm() is called.
0736  *
0737  * After that point of no return @job is committed to be executed by the
0738  * scheduler, and this function should be called from the
0739  * &drm_sched_backend_ops.free_job callback.
0740  */
0741 void drm_sched_job_cleanup(struct drm_sched_job *job)
0742 {
0743     struct dma_fence *fence;
0744     unsigned long index;
0745
0746     if (kref_read(&job->s_fence->finished.refcount)) {
0747         /* drm_sched_job_arm() has been called */
0748         dma_fence_put(&job->s_fence->finished);
0749     } else {
0750         /* aborted job before committing to run it */
0751         drm_sched_fence_free(job->s_fence);
0752     }
0753
0754     job->s_fence = NULL;
0755
0756     xa_for_each(&job->dependencies, index, fence) {
0757         dma_fence_put(fence);
0758     }
0759     xa_destroy(&job->dependencies);
0760
0761 }
0762 EXPORT_SYMBOL(drm_sched_job_cleanup);
0763
0764 /**
0765  * drm_sched_ready - is the scheduler ready
0766  *
0767  * @sched: scheduler instance
0768  *
0769  * Return true if we can push more jobs to the hw, otherwise false.
0770  */
0771 static bool drm_sched_ready(struct drm_gpu_scheduler *sched)
0772 {
0773     return atomic_read(&sched->hw_rq_count) <
0774         sched->hw_submission_limit;
0775 }
0776
0777 /**
0778  * drm_sched_wakeup - Wake up the scheduler when it is ready
0779  *
0780  * @sched: scheduler instance
0781  *
0782  */
0783 void drm_sched_wakeup(struct drm_gpu_scheduler *sched)
0784 {
0785     if (drm_sched_ready(sched))
0786         wake_up_interruptible(&sched->wake_up_worker);
0787 }
0788
0789 /**
0790  * drm_sched_select_entity - Select next entity to process
0791  *
0792  * @sched: scheduler instance
0793  *
0794  * Returns the entity to process or NULL if none are found.
0795  */
0796 static struct drm_sched_entity *
0797 drm_sched_select_entity(struct drm_gpu_scheduler *sched)
0798 {
0799     struct drm_sched_entity *entity;
0800     int i;
0801
0802     if (!drm_sched_ready(sched))
0803         return NULL;
0804
0805     /* Kernel run queue has higher priority than normal run queue*/
0806     for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
0807         entity = drm_sched_rq_select_entity(&sched->sched_rq[i]);
0808         if (entity)
0809             break;
0810     }
0811
0812     return entity;
0813 }
0814
0815 /**
0816  * drm_sched_get_cleanup_job - fetch the next finished job to be destroyed
0817  *
0818  * @sched: scheduler instance
0819  *
0820  * Returns the next finished job from the pending list (if there is one)
0821  * ready for it to be destroyed.
0822  */
0823 static struct drm_sched_job *
0824 drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
0825 {
0826     struct drm_sched_job *job, *next;
0827
0828     spin_lock(&sched->job_list_lock);
0829
0830     job = list_first_entry_or_null(&sched->pending_list,
0831                        struct drm_sched_job, list);
0832
0833     if (job && dma_fence_is_signaled(&job->s_fence->finished)) {
0834         /* remove job from pending_list */
0835         list_del_init(&job->list);
0836
0837         /* cancel this job's TO timer */
0838         cancel_delayed_work(&sched->work_tdr);
0839         /* make the scheduled timestamp more accurate */
0840         next = list_first_entry_or_null(&sched->pending_list,
0841                         typeof(*next), list);
0842
0843         if (next) {
0844             next->s_fence->scheduled.timestamp =
0845                 job->s_fence->finished.timestamp;
0846             /* start TO timer for next job */
0847             drm_sched_start_timeout(sched);
0848         }
0849     } else {
0850         job = NULL;
0851     }
0852
0853     spin_unlock(&sched->job_list_lock);
0854
0855     return job;
0856 }
0857
0858 /**
0859  * drm_sched_pick_best - Get a drm sched from a sched_list with the least load
0860  * @sched_list: list of drm_gpu_schedulers
0861  * @num_sched_list: number of drm_gpu_schedulers in the sched_list
0862  *
0863  * Returns pointer of the sched with the least load or NULL if none of the
0864  * drm_gpu_schedulers are ready
0865  */
0866 struct drm_gpu_scheduler *
0867 drm_sched_pick_best(struct drm_gpu_scheduler **sched_list,
0868              unsigned int num_sched_list)
0869 {
0870     struct drm_gpu_scheduler *sched, *picked_sched = NULL;
0871     int i;
0872     unsigned int min_score = UINT_MAX, num_score;
0873
0874     for (i = 0; i < num_sched_list; ++i) {
0875         sched = sched_list[i];
0876
0877         if (!sched->ready) {
0878             DRM_WARN("scheduler %s is not ready, skipping",
0879                  sched->name);
0880             continue;
0881         }
0882
0883         num_score = atomic_read(sched->score);
0884         if (num_score < min_score) {
0885             min_score = num_score;
0886             picked_sched = sched;
0887         }
0888     }
0889
0890     return picked_sched;
0891 }
0892 EXPORT_SYMBOL(drm_sched_pick_best);
0893
0894 /**
0895  * drm_sched_blocked - check if the scheduler is blocked
0896  *
0897  * @sched: scheduler instance
0898  *
0899  * Returns true if blocked, otherwise false.
0900  */
0901 static bool drm_sched_blocked(struct drm_gpu_scheduler *sched)
0902 {
0903     if (kthread_should_park()) {
0904         kthread_parkme();
0905         return true;
0906     }
0907
0908     return false;
0909 }
0910
0911 /**
0912  * drm_sched_main - main scheduler thread
0913  *
0914  * @param: scheduler instance
0915  *
0916  * Returns 0.
0917  */
0918 static int drm_sched_main(void *param)
0919 {
0920     struct drm_gpu_scheduler *sched = (struct drm_gpu_scheduler *)param;
0921     int r;
0922
0923     sched_set_fifo_low(current);
0924
0925     while (!kthread_should_stop()) {
0926         struct drm_sched_entity *entity = NULL;
0927         struct drm_sched_fence *s_fence;
0928         struct drm_sched_job *sched_job;
0929         struct dma_fence *fence;
0930         struct drm_sched_job *cleanup_job = NULL;
0931
0932         wait_event_interruptible(sched->wake_up_worker,
0933                      (cleanup_job = drm_sched_get_cleanup_job(sched)) ||
0934                      (!drm_sched_blocked(sched) &&
0935                       (entity = drm_sched_select_entity(sched))) ||
0936                      kthread_should_stop());
0937
0938         if (cleanup_job)
0939             sched->ops->free_job(cleanup_job);
0940
0941         if (!entity)
0942             continue;
0943
0944         sched_job = drm_sched_entity_pop_job(entity);
0945
0946         if (!sched_job) {
0947             complete(&entity->entity_idle);
0948             continue;
0949         }
0950
0951         s_fence = sched_job->s_fence;
0952
0953         atomic_inc(&sched->hw_rq_count);
0954         drm_sched_job_begin(sched_job);
0955
0956         trace_drm_run_job(sched_job, entity);
0957         fence = sched->ops->run_job(sched_job);
0958         complete(&entity->entity_idle);
0959         drm_sched_fence_scheduled(s_fence);
0960
0961         if (!IS_ERR_OR_NULL(fence)) {
0962             s_fence->parent = dma_fence_get(fence);
0963             /* Drop for original kref_init of the fence */
0964             dma_fence_put(fence);
0965
0966             r = dma_fence_add_callback(fence, &sched_job->cb,
0967                            drm_sched_job_done_cb);
0968             if (r == -ENOENT)
0969                 drm_sched_job_done(sched_job);
0970             else if (r)
0971                 DRM_DEV_ERROR(sched->dev, "fence add callback failed (%d)\n",
0972                       r);
0973         } else {
0974             if (IS_ERR(fence))
0975                 dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
0976
0977             drm_sched_job_done(sched_job);
0978         }
0979
0980         wake_up(&sched->job_scheduled);
0981     }
0982     return 0;
0983 }
0984
0985 /**
0986  * drm_sched_init - Init a gpu scheduler instance
0987  *
0988  * @sched: scheduler instance
0989  * @ops: backend operations for this scheduler
0990  * @hw_submission: number of hw submissions that can be in flight
0991  * @hang_limit: number of times to allow a job to hang before dropping it
0992  * @timeout: timeout value in jiffies for the scheduler
0993  * @timeout_wq: workqueue to use for timeout work. If NULL, the system_wq is
0994  *      used
0995  * @score: optional score atomic shared with other schedulers
0996  * @name: name used for debugging
0997  *
0998  * Return 0 on success, otherwise error code.
0999  */
1000 int drm_sched_init(struct drm_gpu_scheduler *sched,
1001            const struct drm_sched_backend_ops *ops,
1002            unsigned hw_submission, unsigned hang_limit,
1003            long timeout, struct workqueue_struct *timeout_wq,
1004            atomic_t *score, const char *name, struct device *dev)
1005 {
1006     int i, ret;
1007     sched->ops = ops;
1008     sched->hw_submission_limit = hw_submission;
1009     sched->name = name;
1010     sched->timeout = timeout;
1011     sched->timeout_wq = timeout_wq ? : system_wq;
1012     sched->hang_limit = hang_limit;
1013     sched->score = score ? score : &sched->_score;
1014     sched->dev = dev;
1015     for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_COUNT; i++)
1016         drm_sched_rq_init(sched, &sched->sched_rq[i]);
1017
1018     init_waitqueue_head(&sched->wake_up_worker);
1019     init_waitqueue_head(&sched->job_scheduled);
1020     INIT_LIST_HEAD(&sched->pending_list);
1021     spin_lock_init(&sched->job_list_lock);
1022     atomic_set(&sched->hw_rq_count, 0);
1023     INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
1024     atomic_set(&sched->_score, 0);
1025     atomic64_set(&sched->job_id_count, 0);
1026
1027     /* Each scheduler will run on a seperate kernel thread */
1028     sched->thread = kthread_run(drm_sched_main, sched, sched->name);
1029     if (IS_ERR(sched->thread)) {
1030         ret = PTR_ERR(sched->thread);
1031         sched->thread = NULL;
1032         DRM_DEV_ERROR(sched->dev, "Failed to create scheduler for %s.\n", name);
1033         return ret;
1034     }
1035
1036     sched->ready = true;
1037     return 0;
1038 }
1039 EXPORT_SYMBOL(drm_sched_init);
1040
1041 /**
1042  * drm_sched_fini - Destroy a gpu scheduler
1043  *
1044  * @sched: scheduler instance
1045  *
1046  * Tears down and cleans up the scheduler.
1047  */
1048 void drm_sched_fini(struct drm_gpu_scheduler *sched)
1049 {
1050     struct drm_sched_entity *s_entity;
1051     int i;
1052
1053     if (sched->thread)
1054         kthread_stop(sched->thread);
1055
1056     for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
1057         struct drm_sched_rq *rq = &sched->sched_rq[i];
1058
1059         if (!rq)
1060             continue;
1061
1062         spin_lock(&rq->lock);
1063         list_for_each_entry(s_entity, &rq->entities, list)
1064             /*
1065              * Prevents reinsertion and marks job_queue as idle,
1066              * it will removed from rq in drm_sched_entity_fini
1067              * eventually
1068              */
1069             s_entity->stopped = true;
1070         spin_unlock(&rq->lock);
1071
1072     }
1073
1074     /* Wakeup everyone stuck in drm_sched_entity_flush for this scheduler */
1075     wake_up_all(&sched->job_scheduled);
1076
1077     /* Confirm no work left behind accessing device structures */
1078     cancel_delayed_work_sync(&sched->work_tdr);
1079
1080     sched->ready = false;
1081 }
1082 EXPORT_SYMBOL(drm_sched_fini);
1083
1084 /**
1085  * drm_sched_increase_karma_ext - Update sched_entity guilty flag
1086  *
1087  * @bad: The job guilty of time out
1088  * @type: type for increase/reset karma
1089  *
1090  */
1091 void drm_sched_increase_karma_ext(struct drm_sched_job *bad, int type)
1092 {
1093     int i;
1094     struct drm_sched_entity *tmp;
1095     struct drm_sched_entity *entity;
1096     struct drm_gpu_scheduler *sched = bad->sched;
1097
1098     /* don't change @bad's karma if it's from KERNEL RQ,
1099      * because sometimes GPU hang would cause kernel jobs (like VM updating jobs)
1100      * corrupt but keep in mind that kernel jobs always considered good.
1101      */
1102     if (bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) {
1103         if (type == 0)
1104             atomic_set(&bad->karma, 0);
1105         else if (type == 1)
1106             atomic_inc(&bad->karma);
1107
1108         for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_KERNEL;
1109              i++) {
1110             struct drm_sched_rq *rq = &sched->sched_rq[i];
1111
1112             spin_lock(&rq->lock);
1113             list_for_each_entry_safe(entity, tmp, &rq->entities, list) {
1114                 if (bad->s_fence->scheduled.context ==
1115                     entity->fence_context) {
1116                     if (entity->guilty)
1117                         atomic_set(entity->guilty, type);
1118                     break;
1119                 }
1120             }
1121             spin_unlock(&rq->lock);
1122             if (&entity->list != &rq->entities)
1123                 break;
1124         }
1125     }
1126 }
1127 EXPORT_SYMBOL(drm_sched_increase_karma_ext);