0001
0002
0003
0004 #include <linux/iosys-map.h>
0005 #include <linux/kthread.h>
0006 #include <linux/slab.h>
0007 #include <linux/vmalloc.h>
0008 #include <linux/pm_runtime.h>
0009
0010 #include "lima_devfreq.h"
0011 #include "lima_drv.h"
0012 #include "lima_sched.h"
0013 #include "lima_vm.h"
0014 #include "lima_mmu.h"
0015 #include "lima_l2_cache.h"
0016 #include "lima_gem.h"
0017 #include "lima_trace.h"
0018
0019 struct lima_fence {
0020 struct dma_fence base;
0021 struct lima_sched_pipe *pipe;
0022 };
0023
0024 static struct kmem_cache *lima_fence_slab;
0025 static int lima_fence_slab_refcnt;
0026
0027 int lima_sched_slab_init(void)
0028 {
0029 if (!lima_fence_slab) {
0030 lima_fence_slab = kmem_cache_create(
0031 "lima_fence", sizeof(struct lima_fence), 0,
0032 SLAB_HWCACHE_ALIGN, NULL);
0033 if (!lima_fence_slab)
0034 return -ENOMEM;
0035 }
0036
0037 lima_fence_slab_refcnt++;
0038 return 0;
0039 }
0040
0041 void lima_sched_slab_fini(void)
0042 {
0043 if (!--lima_fence_slab_refcnt) {
0044 kmem_cache_destroy(lima_fence_slab);
0045 lima_fence_slab = NULL;
0046 }
0047 }
0048
0049 static inline struct lima_fence *to_lima_fence(struct dma_fence *fence)
0050 {
0051 return container_of(fence, struct lima_fence, base);
0052 }
0053
0054 static const char *lima_fence_get_driver_name(struct dma_fence *fence)
0055 {
0056 return "lima";
0057 }
0058
0059 static const char *lima_fence_get_timeline_name(struct dma_fence *fence)
0060 {
0061 struct lima_fence *f = to_lima_fence(fence);
0062
0063 return f->pipe->base.name;
0064 }
0065
0066 static void lima_fence_release_rcu(struct rcu_head *rcu)
0067 {
0068 struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);
0069 struct lima_fence *fence = to_lima_fence(f);
0070
0071 kmem_cache_free(lima_fence_slab, fence);
0072 }
0073
0074 static void lima_fence_release(struct dma_fence *fence)
0075 {
0076 struct lima_fence *f = to_lima_fence(fence);
0077
0078 call_rcu(&f->base.rcu, lima_fence_release_rcu);
0079 }
0080
0081 static const struct dma_fence_ops lima_fence_ops = {
0082 .get_driver_name = lima_fence_get_driver_name,
0083 .get_timeline_name = lima_fence_get_timeline_name,
0084 .release = lima_fence_release,
0085 };
0086
0087 static struct lima_fence *lima_fence_create(struct lima_sched_pipe *pipe)
0088 {
0089 struct lima_fence *fence;
0090
0091 fence = kmem_cache_zalloc(lima_fence_slab, GFP_KERNEL);
0092 if (!fence)
0093 return NULL;
0094
0095 fence->pipe = pipe;
0096 dma_fence_init(&fence->base, &lima_fence_ops, &pipe->fence_lock,
0097 pipe->fence_context, ++pipe->fence_seqno);
0098
0099 return fence;
0100 }
0101
0102 static inline struct lima_sched_task *to_lima_task(struct drm_sched_job *job)
0103 {
0104 return container_of(job, struct lima_sched_task, base);
0105 }
0106
0107 static inline struct lima_sched_pipe *to_lima_pipe(struct drm_gpu_scheduler *sched)
0108 {
0109 return container_of(sched, struct lima_sched_pipe, base);
0110 }
0111
0112 int lima_sched_task_init(struct lima_sched_task *task,
0113 struct lima_sched_context *context,
0114 struct lima_bo **bos, int num_bos,
0115 struct lima_vm *vm)
0116 {
0117 int err, i;
0118
0119 task->bos = kmemdup(bos, sizeof(*bos) * num_bos, GFP_KERNEL);
0120 if (!task->bos)
0121 return -ENOMEM;
0122
0123 for (i = 0; i < num_bos; i++)
0124 drm_gem_object_get(&bos[i]->base.base);
0125
0126 err = drm_sched_job_init(&task->base, &context->base, vm);
0127 if (err) {
0128 kfree(task->bos);
0129 return err;
0130 }
0131
0132 drm_sched_job_arm(&task->base);
0133
0134 task->num_bos = num_bos;
0135 task->vm = lima_vm_get(vm);
0136
0137 return 0;
0138 }
0139
0140 void lima_sched_task_fini(struct lima_sched_task *task)
0141 {
0142 int i;
0143
0144 drm_sched_job_cleanup(&task->base);
0145
0146 if (task->bos) {
0147 for (i = 0; i < task->num_bos; i++)
0148 drm_gem_object_put(&task->bos[i]->base.base);
0149 kfree(task->bos);
0150 }
0151
0152 lima_vm_put(task->vm);
0153 }
0154
0155 int lima_sched_context_init(struct lima_sched_pipe *pipe,
0156 struct lima_sched_context *context,
0157 atomic_t *guilty)
0158 {
0159 struct drm_gpu_scheduler *sched = &pipe->base;
0160
0161 return drm_sched_entity_init(&context->base, DRM_SCHED_PRIORITY_NORMAL,
0162 &sched, 1, guilty);
0163 }
0164
0165 void lima_sched_context_fini(struct lima_sched_pipe *pipe,
0166 struct lima_sched_context *context)
0167 {
0168 drm_sched_entity_fini(&context->base);
0169 }
0170
0171 struct dma_fence *lima_sched_context_queue_task(struct lima_sched_task *task)
0172 {
0173 struct dma_fence *fence = dma_fence_get(&task->base.s_fence->finished);
0174
0175 trace_lima_task_submit(task);
0176 drm_sched_entity_push_job(&task->base);
0177 return fence;
0178 }
0179
0180 static int lima_pm_busy(struct lima_device *ldev)
0181 {
0182 int ret;
0183
0184
0185 ret = pm_runtime_resume_and_get(ldev->dev);
0186 if (ret < 0)
0187 return ret;
0188
0189 lima_devfreq_record_busy(&ldev->devfreq);
0190 return 0;
0191 }
0192
0193 static void lima_pm_idle(struct lima_device *ldev)
0194 {
0195 lima_devfreq_record_idle(&ldev->devfreq);
0196
0197
0198 pm_runtime_mark_last_busy(ldev->dev);
0199 pm_runtime_put_autosuspend(ldev->dev);
0200 }
0201
0202 static struct dma_fence *lima_sched_run_job(struct drm_sched_job *job)
0203 {
0204 struct lima_sched_task *task = to_lima_task(job);
0205 struct lima_sched_pipe *pipe = to_lima_pipe(job->sched);
0206 struct lima_device *ldev = pipe->ldev;
0207 struct lima_fence *fence;
0208 int i, err;
0209
0210
0211 if (job->s_fence->finished.error < 0)
0212 return NULL;
0213
0214 fence = lima_fence_create(pipe);
0215 if (!fence)
0216 return NULL;
0217
0218 err = lima_pm_busy(ldev);
0219 if (err < 0) {
0220 dma_fence_put(&fence->base);
0221 return NULL;
0222 }
0223
0224 task->fence = &fence->base;
0225
0226
0227
0228
0229 dma_fence_get(task->fence);
0230
0231 pipe->current_task = task;
0232
0233
0234
0235
0236
0237
0238
0239
0240
0241
0242
0243
0244
0245
0246
0247
0248 for (i = 0; i < pipe->num_l2_cache; i++)
0249 lima_l2_cache_flush(pipe->l2_cache[i]);
0250
0251 lima_vm_put(pipe->current_vm);
0252 pipe->current_vm = lima_vm_get(task->vm);
0253
0254 if (pipe->bcast_mmu)
0255 lima_mmu_switch_vm(pipe->bcast_mmu, pipe->current_vm);
0256 else {
0257 for (i = 0; i < pipe->num_mmu; i++)
0258 lima_mmu_switch_vm(pipe->mmu[i], pipe->current_vm);
0259 }
0260
0261 trace_lima_task_run(task);
0262
0263 pipe->error = false;
0264 pipe->task_run(pipe, task);
0265
0266 return task->fence;
0267 }
0268
0269 static void lima_sched_build_error_task_list(struct lima_sched_task *task)
0270 {
0271 struct lima_sched_error_task *et;
0272 struct lima_sched_pipe *pipe = to_lima_pipe(task->base.sched);
0273 struct lima_ip *ip = pipe->processor[0];
0274 int pipe_id = ip->id == lima_ip_gp ? lima_pipe_gp : lima_pipe_pp;
0275 struct lima_device *dev = ip->dev;
0276 struct lima_sched_context *sched_ctx =
0277 container_of(task->base.entity,
0278 struct lima_sched_context, base);
0279 struct lima_ctx *ctx =
0280 container_of(sched_ctx, struct lima_ctx, context[pipe_id]);
0281 struct lima_dump_task *dt;
0282 struct lima_dump_chunk *chunk;
0283 struct lima_dump_chunk_pid *pid_chunk;
0284 struct lima_dump_chunk_buffer *buffer_chunk;
0285 u32 size, task_size, mem_size;
0286 int i;
0287 struct iosys_map map;
0288 int ret;
0289
0290 mutex_lock(&dev->error_task_list_lock);
0291
0292 if (dev->dump.num_tasks >= lima_max_error_tasks) {
0293 dev_info(dev->dev, "fail to save task state from %s pid %d: "
0294 "error task list is full\n", ctx->pname, ctx->pid);
0295 goto out;
0296 }
0297
0298
0299 size = sizeof(struct lima_dump_chunk) + pipe->frame_size;
0300
0301 size += sizeof(struct lima_dump_chunk) + sizeof(ctx->pname);
0302
0303 size += sizeof(struct lima_dump_chunk);
0304
0305 for (i = 0; i < task->num_bos; i++) {
0306 struct lima_bo *bo = task->bos[i];
0307
0308 size += sizeof(struct lima_dump_chunk);
0309 size += bo->heap_size ? bo->heap_size : lima_bo_size(bo);
0310 }
0311
0312 task_size = size + sizeof(struct lima_dump_task);
0313 mem_size = task_size + sizeof(*et);
0314 et = kvmalloc(mem_size, GFP_KERNEL);
0315 if (!et) {
0316 dev_err(dev->dev, "fail to alloc task dump buffer of size %x\n",
0317 mem_size);
0318 goto out;
0319 }
0320
0321 et->data = et + 1;
0322 et->size = task_size;
0323
0324 dt = et->data;
0325 memset(dt, 0, sizeof(*dt));
0326 dt->id = pipe_id;
0327 dt->size = size;
0328
0329 chunk = (struct lima_dump_chunk *)(dt + 1);
0330 memset(chunk, 0, sizeof(*chunk));
0331 chunk->id = LIMA_DUMP_CHUNK_FRAME;
0332 chunk->size = pipe->frame_size;
0333 memcpy(chunk + 1, task->frame, pipe->frame_size);
0334 dt->num_chunks++;
0335
0336 chunk = (void *)(chunk + 1) + chunk->size;
0337 memset(chunk, 0, sizeof(*chunk));
0338 chunk->id = LIMA_DUMP_CHUNK_PROCESS_NAME;
0339 chunk->size = sizeof(ctx->pname);
0340 memcpy(chunk + 1, ctx->pname, sizeof(ctx->pname));
0341 dt->num_chunks++;
0342
0343 pid_chunk = (void *)(chunk + 1) + chunk->size;
0344 memset(pid_chunk, 0, sizeof(*pid_chunk));
0345 pid_chunk->id = LIMA_DUMP_CHUNK_PROCESS_ID;
0346 pid_chunk->pid = ctx->pid;
0347 dt->num_chunks++;
0348
0349 buffer_chunk = (void *)(pid_chunk + 1) + pid_chunk->size;
0350 for (i = 0; i < task->num_bos; i++) {
0351 struct lima_bo *bo = task->bos[i];
0352 void *data;
0353
0354 memset(buffer_chunk, 0, sizeof(*buffer_chunk));
0355 buffer_chunk->id = LIMA_DUMP_CHUNK_BUFFER;
0356 buffer_chunk->va = lima_vm_get_va(task->vm, bo);
0357
0358 if (bo->heap_size) {
0359 buffer_chunk->size = bo->heap_size;
0360
0361 data = vmap(bo->base.pages, bo->heap_size >> PAGE_SHIFT,
0362 VM_MAP, pgprot_writecombine(PAGE_KERNEL));
0363 if (!data) {
0364 kvfree(et);
0365 goto out;
0366 }
0367
0368 memcpy(buffer_chunk + 1, data, buffer_chunk->size);
0369
0370 vunmap(data);
0371 } else {
0372 buffer_chunk->size = lima_bo_size(bo);
0373
0374 ret = drm_gem_shmem_vmap(&bo->base, &map);
0375 if (ret) {
0376 kvfree(et);
0377 goto out;
0378 }
0379
0380 memcpy(buffer_chunk + 1, map.vaddr, buffer_chunk->size);
0381
0382 drm_gem_shmem_vunmap(&bo->base, &map);
0383 }
0384
0385 buffer_chunk = (void *)(buffer_chunk + 1) + buffer_chunk->size;
0386 dt->num_chunks++;
0387 }
0388
0389 list_add(&et->list, &dev->error_task_list);
0390 dev->dump.size += et->size;
0391 dev->dump.num_tasks++;
0392
0393 dev_info(dev->dev, "save error task state success\n");
0394
0395 out:
0396 mutex_unlock(&dev->error_task_list_lock);
0397 }
0398
0399 static enum drm_gpu_sched_stat lima_sched_timedout_job(struct drm_sched_job *job)
0400 {
0401 struct lima_sched_pipe *pipe = to_lima_pipe(job->sched);
0402 struct lima_sched_task *task = to_lima_task(job);
0403 struct lima_device *ldev = pipe->ldev;
0404
0405 if (!pipe->error)
0406 DRM_ERROR("lima job timeout\n");
0407
0408 drm_sched_stop(&pipe->base, &task->base);
0409
0410 drm_sched_increase_karma(&task->base);
0411
0412 if (lima_max_error_tasks)
0413 lima_sched_build_error_task_list(task);
0414
0415 pipe->task_error(pipe);
0416
0417 if (pipe->bcast_mmu)
0418 lima_mmu_page_fault_resume(pipe->bcast_mmu);
0419 else {
0420 int i;
0421
0422 for (i = 0; i < pipe->num_mmu; i++)
0423 lima_mmu_page_fault_resume(pipe->mmu[i]);
0424 }
0425
0426 lima_vm_put(pipe->current_vm);
0427 pipe->current_vm = NULL;
0428 pipe->current_task = NULL;
0429
0430 lima_pm_idle(ldev);
0431
0432 drm_sched_resubmit_jobs(&pipe->base);
0433 drm_sched_start(&pipe->base, true);
0434
0435 return DRM_GPU_SCHED_STAT_NOMINAL;
0436 }
0437
0438 static void lima_sched_free_job(struct drm_sched_job *job)
0439 {
0440 struct lima_sched_task *task = to_lima_task(job);
0441 struct lima_sched_pipe *pipe = to_lima_pipe(job->sched);
0442 struct lima_vm *vm = task->vm;
0443 struct lima_bo **bos = task->bos;
0444 int i;
0445
0446 dma_fence_put(task->fence);
0447
0448 for (i = 0; i < task->num_bos; i++)
0449 lima_vm_bo_del(vm, bos[i]);
0450
0451 lima_sched_task_fini(task);
0452 kmem_cache_free(pipe->task_slab, task);
0453 }
0454
0455 static const struct drm_sched_backend_ops lima_sched_ops = {
0456 .run_job = lima_sched_run_job,
0457 .timedout_job = lima_sched_timedout_job,
0458 .free_job = lima_sched_free_job,
0459 };
0460
0461 static void lima_sched_recover_work(struct work_struct *work)
0462 {
0463 struct lima_sched_pipe *pipe =
0464 container_of(work, struct lima_sched_pipe, recover_work);
0465 int i;
0466
0467 for (i = 0; i < pipe->num_l2_cache; i++)
0468 lima_l2_cache_flush(pipe->l2_cache[i]);
0469
0470 if (pipe->bcast_mmu) {
0471 lima_mmu_flush_tlb(pipe->bcast_mmu);
0472 } else {
0473 for (i = 0; i < pipe->num_mmu; i++)
0474 lima_mmu_flush_tlb(pipe->mmu[i]);
0475 }
0476
0477 if (pipe->task_recover(pipe))
0478 drm_sched_fault(&pipe->base);
0479 }
0480
0481 int lima_sched_pipe_init(struct lima_sched_pipe *pipe, const char *name)
0482 {
0483 unsigned int timeout = lima_sched_timeout_ms > 0 ?
0484 lima_sched_timeout_ms : 500;
0485
0486 pipe->fence_context = dma_fence_context_alloc(1);
0487 spin_lock_init(&pipe->fence_lock);
0488
0489 INIT_WORK(&pipe->recover_work, lima_sched_recover_work);
0490
0491 return drm_sched_init(&pipe->base, &lima_sched_ops, 1,
0492 lima_job_hang_limit,
0493 msecs_to_jiffies(timeout), NULL,
0494 NULL, name, pipe->ldev->dev);
0495 }
0496
0497 void lima_sched_pipe_fini(struct lima_sched_pipe *pipe)
0498 {
0499 drm_sched_fini(&pipe->base);
0500 }
0501
0502 void lima_sched_pipe_task_done(struct lima_sched_pipe *pipe)
0503 {
0504 struct lima_sched_task *task = pipe->current_task;
0505 struct lima_device *ldev = pipe->ldev;
0506
0507 if (pipe->error) {
0508 if (task && task->recoverable)
0509 schedule_work(&pipe->recover_work);
0510 else
0511 drm_sched_fault(&pipe->base);
0512 } else {
0513 pipe->task_fini(pipe);
0514 dma_fence_signal(task->fence);
0515
0516 lima_pm_idle(ldev);
0517 }
0518 }