0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024 #include <linux/module.h>
0025 #include <linux/platform_device.h>
0026 #include <linux/pm_runtime.h>
0027 #include <linux/device.h>
0028 #include <linux/io.h>
0029 #include <linux/sched/signal.h>
0030 #include <linux/dma-fence-array.h>
0031
0032 #include <drm/drm_syncobj.h>
0033
0034 #include "uapi/drm/vc4_drm.h"
0035 #include "vc4_drv.h"
0036 #include "vc4_regs.h"
0037 #include "vc4_trace.h"
0038
0039 static void
0040 vc4_queue_hangcheck(struct drm_device *dev)
0041 {
0042 struct vc4_dev *vc4 = to_vc4_dev(dev);
0043
0044 mod_timer(&vc4->hangcheck.timer,
0045 round_jiffies_up(jiffies + msecs_to_jiffies(100)));
0046 }
0047
0048 struct vc4_hang_state {
0049 struct drm_vc4_get_hang_state user_state;
0050
0051 u32 bo_count;
0052 struct drm_gem_object **bo;
0053 };
0054
0055 static void
0056 vc4_free_hang_state(struct drm_device *dev, struct vc4_hang_state *state)
0057 {
0058 unsigned int i;
0059
0060 for (i = 0; i < state->user_state.bo_count; i++)
0061 drm_gem_object_put(state->bo[i]);
0062
0063 kfree(state);
0064 }
0065
0066 int
0067 vc4_get_hang_state_ioctl(struct drm_device *dev, void *data,
0068 struct drm_file *file_priv)
0069 {
0070 struct drm_vc4_get_hang_state *get_state = data;
0071 struct drm_vc4_get_hang_state_bo *bo_state;
0072 struct vc4_hang_state *kernel_state;
0073 struct drm_vc4_get_hang_state *state;
0074 struct vc4_dev *vc4 = to_vc4_dev(dev);
0075 unsigned long irqflags;
0076 u32 i;
0077 int ret = 0;
0078
0079 if (WARN_ON_ONCE(vc4->is_vc5))
0080 return -ENODEV;
0081
0082 if (!vc4->v3d) {
0083 DRM_DEBUG("VC4_GET_HANG_STATE with no VC4 V3D probed\n");
0084 return -ENODEV;
0085 }
0086
0087 spin_lock_irqsave(&vc4->job_lock, irqflags);
0088 kernel_state = vc4->hang_state;
0089 if (!kernel_state) {
0090 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
0091 return -ENOENT;
0092 }
0093 state = &kernel_state->user_state;
0094
0095
0096
0097
0098 if (get_state->bo_count < state->bo_count) {
0099 get_state->bo_count = state->bo_count;
0100 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
0101 return 0;
0102 }
0103
0104 vc4->hang_state = NULL;
0105 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
0106
0107
0108 state->bo = get_state->bo;
0109 memcpy(get_state, state, sizeof(*state));
0110
0111 bo_state = kcalloc(state->bo_count, sizeof(*bo_state), GFP_KERNEL);
0112 if (!bo_state) {
0113 ret = -ENOMEM;
0114 goto err_free;
0115 }
0116
0117 for (i = 0; i < state->bo_count; i++) {
0118 struct vc4_bo *vc4_bo = to_vc4_bo(kernel_state->bo[i]);
0119 u32 handle;
0120
0121 ret = drm_gem_handle_create(file_priv, kernel_state->bo[i],
0122 &handle);
0123
0124 if (ret) {
0125 state->bo_count = i;
0126 goto err_delete_handle;
0127 }
0128 bo_state[i].handle = handle;
0129 bo_state[i].paddr = vc4_bo->base.paddr;
0130 bo_state[i].size = vc4_bo->base.base.size;
0131 }
0132
0133 if (copy_to_user(u64_to_user_ptr(get_state->bo),
0134 bo_state,
0135 state->bo_count * sizeof(*bo_state)))
0136 ret = -EFAULT;
0137
0138 err_delete_handle:
0139 if (ret) {
0140 for (i = 0; i < state->bo_count; i++)
0141 drm_gem_handle_delete(file_priv, bo_state[i].handle);
0142 }
0143
0144 err_free:
0145 vc4_free_hang_state(dev, kernel_state);
0146 kfree(bo_state);
0147
0148 return ret;
0149 }
0150
0151 static void
0152 vc4_save_hang_state(struct drm_device *dev)
0153 {
0154 struct vc4_dev *vc4 = to_vc4_dev(dev);
0155 struct drm_vc4_get_hang_state *state;
0156 struct vc4_hang_state *kernel_state;
0157 struct vc4_exec_info *exec[2];
0158 struct vc4_bo *bo;
0159 unsigned long irqflags;
0160 unsigned int i, j, k, unref_list_count;
0161
0162 kernel_state = kcalloc(1, sizeof(*kernel_state), GFP_KERNEL);
0163 if (!kernel_state)
0164 return;
0165
0166 state = &kernel_state->user_state;
0167
0168 spin_lock_irqsave(&vc4->job_lock, irqflags);
0169 exec[0] = vc4_first_bin_job(vc4);
0170 exec[1] = vc4_first_render_job(vc4);
0171 if (!exec[0] && !exec[1]) {
0172 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
0173 return;
0174 }
0175
0176
0177 state->bo_count = 0;
0178 for (i = 0; i < 2; i++) {
0179 if (!exec[i])
0180 continue;
0181
0182 unref_list_count = 0;
0183 list_for_each_entry(bo, &exec[i]->unref_list, unref_head)
0184 unref_list_count++;
0185 state->bo_count += exec[i]->bo_count + unref_list_count;
0186 }
0187
0188 kernel_state->bo = kcalloc(state->bo_count,
0189 sizeof(*kernel_state->bo), GFP_ATOMIC);
0190
0191 if (!kernel_state->bo) {
0192 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
0193 return;
0194 }
0195
0196 k = 0;
0197 for (i = 0; i < 2; i++) {
0198 if (!exec[i])
0199 continue;
0200
0201 for (j = 0; j < exec[i]->bo_count; j++) {
0202 bo = to_vc4_bo(&exec[i]->bo[j]->base);
0203
0204
0205
0206
0207
0208 WARN_ON(!refcount_read(&bo->usecnt));
0209 refcount_inc(&bo->usecnt);
0210 drm_gem_object_get(&exec[i]->bo[j]->base);
0211 kernel_state->bo[k++] = &exec[i]->bo[j]->base;
0212 }
0213
0214 list_for_each_entry(bo, &exec[i]->unref_list, unref_head) {
0215
0216
0217
0218 drm_gem_object_get(&bo->base.base);
0219 kernel_state->bo[k++] = &bo->base.base;
0220 }
0221 }
0222
0223 WARN_ON_ONCE(k != state->bo_count);
0224
0225 if (exec[0])
0226 state->start_bin = exec[0]->ct0ca;
0227 if (exec[1])
0228 state->start_render = exec[1]->ct1ca;
0229
0230 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
0231
0232 state->ct0ca = V3D_READ(V3D_CTNCA(0));
0233 state->ct0ea = V3D_READ(V3D_CTNEA(0));
0234
0235 state->ct1ca = V3D_READ(V3D_CTNCA(1));
0236 state->ct1ea = V3D_READ(V3D_CTNEA(1));
0237
0238 state->ct0cs = V3D_READ(V3D_CTNCS(0));
0239 state->ct1cs = V3D_READ(V3D_CTNCS(1));
0240
0241 state->ct0ra0 = V3D_READ(V3D_CT00RA0);
0242 state->ct1ra0 = V3D_READ(V3D_CT01RA0);
0243
0244 state->bpca = V3D_READ(V3D_BPCA);
0245 state->bpcs = V3D_READ(V3D_BPCS);
0246 state->bpoa = V3D_READ(V3D_BPOA);
0247 state->bpos = V3D_READ(V3D_BPOS);
0248
0249 state->vpmbase = V3D_READ(V3D_VPMBASE);
0250
0251 state->dbge = V3D_READ(V3D_DBGE);
0252 state->fdbgo = V3D_READ(V3D_FDBGO);
0253 state->fdbgb = V3D_READ(V3D_FDBGB);
0254 state->fdbgr = V3D_READ(V3D_FDBGR);
0255 state->fdbgs = V3D_READ(V3D_FDBGS);
0256 state->errstat = V3D_READ(V3D_ERRSTAT);
0257
0258
0259
0260
0261
0262
0263
0264
0265 for (i = 0; i < kernel_state->user_state.bo_count; i++) {
0266 struct vc4_bo *bo = to_vc4_bo(kernel_state->bo[i]);
0267
0268 if (bo->madv == __VC4_MADV_NOTSUPP)
0269 continue;
0270
0271 mutex_lock(&bo->madv_lock);
0272 if (!WARN_ON(bo->madv == __VC4_MADV_PURGED))
0273 bo->madv = VC4_MADV_WILLNEED;
0274 refcount_dec(&bo->usecnt);
0275 mutex_unlock(&bo->madv_lock);
0276 }
0277
0278 spin_lock_irqsave(&vc4->job_lock, irqflags);
0279 if (vc4->hang_state) {
0280 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
0281 vc4_free_hang_state(dev, kernel_state);
0282 } else {
0283 vc4->hang_state = kernel_state;
0284 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
0285 }
0286 }
0287
0288 static void
0289 vc4_reset(struct drm_device *dev)
0290 {
0291 struct vc4_dev *vc4 = to_vc4_dev(dev);
0292
0293 DRM_INFO("Resetting GPU.\n");
0294
0295 mutex_lock(&vc4->power_lock);
0296 if (vc4->power_refcount) {
0297
0298
0299
0300 pm_runtime_put_sync_suspend(&vc4->v3d->pdev->dev);
0301 pm_runtime_get_sync(&vc4->v3d->pdev->dev);
0302 }
0303 mutex_unlock(&vc4->power_lock);
0304
0305 vc4_irq_reset(dev);
0306
0307
0308
0309
0310
0311 vc4_queue_hangcheck(dev);
0312 }
0313
0314 static void
0315 vc4_reset_work(struct work_struct *work)
0316 {
0317 struct vc4_dev *vc4 =
0318 container_of(work, struct vc4_dev, hangcheck.reset_work);
0319
0320 vc4_save_hang_state(&vc4->base);
0321
0322 vc4_reset(&vc4->base);
0323 }
0324
0325 static void
0326 vc4_hangcheck_elapsed(struct timer_list *t)
0327 {
0328 struct vc4_dev *vc4 = from_timer(vc4, t, hangcheck.timer);
0329 struct drm_device *dev = &vc4->base;
0330 uint32_t ct0ca, ct1ca;
0331 unsigned long irqflags;
0332 struct vc4_exec_info *bin_exec, *render_exec;
0333
0334 spin_lock_irqsave(&vc4->job_lock, irqflags);
0335
0336 bin_exec = vc4_first_bin_job(vc4);
0337 render_exec = vc4_first_render_job(vc4);
0338
0339
0340 if (!bin_exec && !render_exec) {
0341 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
0342 return;
0343 }
0344
0345 ct0ca = V3D_READ(V3D_CTNCA(0));
0346 ct1ca = V3D_READ(V3D_CTNCA(1));
0347
0348
0349
0350
0351 if ((bin_exec && ct0ca != bin_exec->last_ct0ca) ||
0352 (render_exec && ct1ca != render_exec->last_ct1ca)) {
0353 if (bin_exec)
0354 bin_exec->last_ct0ca = ct0ca;
0355 if (render_exec)
0356 render_exec->last_ct1ca = ct1ca;
0357 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
0358 vc4_queue_hangcheck(dev);
0359 return;
0360 }
0361
0362 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
0363
0364
0365
0366
0367
0368 schedule_work(&vc4->hangcheck.reset_work);
0369 }
0370
0371 static void
0372 submit_cl(struct drm_device *dev, uint32_t thread, uint32_t start, uint32_t end)
0373 {
0374 struct vc4_dev *vc4 = to_vc4_dev(dev);
0375
0376
0377
0378
0379 V3D_WRITE(V3D_CTNCA(thread), start);
0380 V3D_WRITE(V3D_CTNEA(thread), end);
0381 }
0382
0383 int
0384 vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno, uint64_t timeout_ns,
0385 bool interruptible)
0386 {
0387 struct vc4_dev *vc4 = to_vc4_dev(dev);
0388 int ret = 0;
0389 unsigned long timeout_expire;
0390 DEFINE_WAIT(wait);
0391
0392 if (WARN_ON_ONCE(vc4->is_vc5))
0393 return -ENODEV;
0394
0395 if (vc4->finished_seqno >= seqno)
0396 return 0;
0397
0398 if (timeout_ns == 0)
0399 return -ETIME;
0400
0401 timeout_expire = jiffies + nsecs_to_jiffies(timeout_ns);
0402
0403 trace_vc4_wait_for_seqno_begin(dev, seqno, timeout_ns);
0404 for (;;) {
0405 prepare_to_wait(&vc4->job_wait_queue, &wait,
0406 interruptible ? TASK_INTERRUPTIBLE :
0407 TASK_UNINTERRUPTIBLE);
0408
0409 if (interruptible && signal_pending(current)) {
0410 ret = -ERESTARTSYS;
0411 break;
0412 }
0413
0414 if (vc4->finished_seqno >= seqno)
0415 break;
0416
0417 if (timeout_ns != ~0ull) {
0418 if (time_after_eq(jiffies, timeout_expire)) {
0419 ret = -ETIME;
0420 break;
0421 }
0422 schedule_timeout(timeout_expire - jiffies);
0423 } else {
0424 schedule();
0425 }
0426 }
0427
0428 finish_wait(&vc4->job_wait_queue, &wait);
0429 trace_vc4_wait_for_seqno_end(dev, seqno);
0430
0431 return ret;
0432 }
0433
0434 static void
0435 vc4_flush_caches(struct drm_device *dev)
0436 {
0437 struct vc4_dev *vc4 = to_vc4_dev(dev);
0438
0439
0440
0441
0442
0443 V3D_WRITE(V3D_L2CACTL,
0444 V3D_L2CACTL_L2CCLR);
0445
0446 V3D_WRITE(V3D_SLCACTL,
0447 VC4_SET_FIELD(0xf, V3D_SLCACTL_T1CC) |
0448 VC4_SET_FIELD(0xf, V3D_SLCACTL_T0CC) |
0449 VC4_SET_FIELD(0xf, V3D_SLCACTL_UCC) |
0450 VC4_SET_FIELD(0xf, V3D_SLCACTL_ICC));
0451 }
0452
0453 static void
0454 vc4_flush_texture_caches(struct drm_device *dev)
0455 {
0456 struct vc4_dev *vc4 = to_vc4_dev(dev);
0457
0458 V3D_WRITE(V3D_L2CACTL,
0459 V3D_L2CACTL_L2CCLR);
0460
0461 V3D_WRITE(V3D_SLCACTL,
0462 VC4_SET_FIELD(0xf, V3D_SLCACTL_T1CC) |
0463 VC4_SET_FIELD(0xf, V3D_SLCACTL_T0CC));
0464 }
0465
0466
0467
0468
0469
0470
0471 void
0472 vc4_submit_next_bin_job(struct drm_device *dev)
0473 {
0474 struct vc4_dev *vc4 = to_vc4_dev(dev);
0475 struct vc4_exec_info *exec;
0476
0477 if (WARN_ON_ONCE(vc4->is_vc5))
0478 return;
0479
0480 again:
0481 exec = vc4_first_bin_job(vc4);
0482 if (!exec)
0483 return;
0484
0485 vc4_flush_caches(dev);
0486
0487
0488
0489
0490 if (exec->perfmon && vc4->active_perfmon != exec->perfmon)
0491 vc4_perfmon_start(vc4, exec->perfmon);
0492
0493
0494
0495
0496 if (exec->ct0ca != exec->ct0ea) {
0497 trace_vc4_submit_cl(dev, false, exec->seqno, exec->ct0ca,
0498 exec->ct0ea);
0499 submit_cl(dev, 0, exec->ct0ca, exec->ct0ea);
0500 } else {
0501 struct vc4_exec_info *next;
0502
0503 vc4_move_job_to_render(dev, exec);
0504 next = vc4_first_bin_job(vc4);
0505
0506
0507
0508
0509
0510
0511 if (next && next->perfmon == exec->perfmon)
0512 goto again;
0513 }
0514 }
0515
0516 void
0517 vc4_submit_next_render_job(struct drm_device *dev)
0518 {
0519 struct vc4_dev *vc4 = to_vc4_dev(dev);
0520 struct vc4_exec_info *exec = vc4_first_render_job(vc4);
0521
0522 if (!exec)
0523 return;
0524
0525 if (WARN_ON_ONCE(vc4->is_vc5))
0526 return;
0527
0528
0529
0530
0531
0532
0533
0534 vc4_flush_texture_caches(dev);
0535
0536 trace_vc4_submit_cl(dev, true, exec->seqno, exec->ct1ca, exec->ct1ea);
0537 submit_cl(dev, 1, exec->ct1ca, exec->ct1ea);
0538 }
0539
0540 void
0541 vc4_move_job_to_render(struct drm_device *dev, struct vc4_exec_info *exec)
0542 {
0543 struct vc4_dev *vc4 = to_vc4_dev(dev);
0544 bool was_empty = list_empty(&vc4->render_job_list);
0545
0546 if (WARN_ON_ONCE(vc4->is_vc5))
0547 return;
0548
0549 list_move_tail(&exec->head, &vc4->render_job_list);
0550 if (was_empty)
0551 vc4_submit_next_render_job(dev);
0552 }
0553
0554 static void
0555 vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno)
0556 {
0557 struct vc4_bo *bo;
0558 unsigned i;
0559
0560 for (i = 0; i < exec->bo_count; i++) {
0561 bo = to_vc4_bo(&exec->bo[i]->base);
0562 bo->seqno = seqno;
0563
0564 dma_resv_add_fence(bo->base.base.resv, exec->fence,
0565 DMA_RESV_USAGE_READ);
0566 }
0567
0568 list_for_each_entry(bo, &exec->unref_list, unref_head) {
0569 bo->seqno = seqno;
0570 }
0571
0572 for (i = 0; i < exec->rcl_write_bo_count; i++) {
0573 bo = to_vc4_bo(&exec->rcl_write_bo[i]->base);
0574 bo->write_seqno = seqno;
0575
0576 dma_resv_add_fence(bo->base.base.resv, exec->fence,
0577 DMA_RESV_USAGE_WRITE);
0578 }
0579 }
0580
0581 static void
0582 vc4_unlock_bo_reservations(struct drm_device *dev,
0583 struct vc4_exec_info *exec,
0584 struct ww_acquire_ctx *acquire_ctx)
0585 {
0586 int i;
0587
0588 for (i = 0; i < exec->bo_count; i++) {
0589 struct drm_gem_object *bo = &exec->bo[i]->base;
0590
0591 dma_resv_unlock(bo->resv);
0592 }
0593
0594 ww_acquire_fini(acquire_ctx);
0595 }
0596
0597
0598
0599
0600
0601
0602
0603
0604 static int
0605 vc4_lock_bo_reservations(struct drm_device *dev,
0606 struct vc4_exec_info *exec,
0607 struct ww_acquire_ctx *acquire_ctx)
0608 {
0609 int contended_lock = -1;
0610 int i, ret;
0611 struct drm_gem_object *bo;
0612
0613 ww_acquire_init(acquire_ctx, &reservation_ww_class);
0614
0615 retry:
0616 if (contended_lock != -1) {
0617 bo = &exec->bo[contended_lock]->base;
0618 ret = dma_resv_lock_slow_interruptible(bo->resv, acquire_ctx);
0619 if (ret) {
0620 ww_acquire_done(acquire_ctx);
0621 return ret;
0622 }
0623 }
0624
0625 for (i = 0; i < exec->bo_count; i++) {
0626 if (i == contended_lock)
0627 continue;
0628
0629 bo = &exec->bo[i]->base;
0630
0631 ret = dma_resv_lock_interruptible(bo->resv, acquire_ctx);
0632 if (ret) {
0633 int j;
0634
0635 for (j = 0; j < i; j++) {
0636 bo = &exec->bo[j]->base;
0637 dma_resv_unlock(bo->resv);
0638 }
0639
0640 if (contended_lock != -1 && contended_lock >= i) {
0641 bo = &exec->bo[contended_lock]->base;
0642
0643 dma_resv_unlock(bo->resv);
0644 }
0645
0646 if (ret == -EDEADLK) {
0647 contended_lock = i;
0648 goto retry;
0649 }
0650
0651 ww_acquire_done(acquire_ctx);
0652 return ret;
0653 }
0654 }
0655
0656 ww_acquire_done(acquire_ctx);
0657
0658
0659
0660
0661 for (i = 0; i < exec->bo_count; i++) {
0662 bo = &exec->bo[i]->base;
0663
0664 ret = dma_resv_reserve_fences(bo->resv, 1);
0665 if (ret) {
0666 vc4_unlock_bo_reservations(dev, exec, acquire_ctx);
0667 return ret;
0668 }
0669 }
0670
0671 return 0;
0672 }
0673
0674
0675
0676
0677
0678
0679
0680
0681
0682
0683 static int
0684 vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec,
0685 struct ww_acquire_ctx *acquire_ctx,
0686 struct drm_syncobj *out_sync)
0687 {
0688 struct vc4_dev *vc4 = to_vc4_dev(dev);
0689 struct vc4_exec_info *renderjob;
0690 uint64_t seqno;
0691 unsigned long irqflags;
0692 struct vc4_fence *fence;
0693
0694 fence = kzalloc(sizeof(*fence), GFP_KERNEL);
0695 if (!fence)
0696 return -ENOMEM;
0697 fence->dev = dev;
0698
0699 spin_lock_irqsave(&vc4->job_lock, irqflags);
0700
0701 seqno = ++vc4->emit_seqno;
0702 exec->seqno = seqno;
0703
0704 dma_fence_init(&fence->base, &vc4_fence_ops, &vc4->job_lock,
0705 vc4->dma_fence_context, exec->seqno);
0706 fence->seqno = exec->seqno;
0707 exec->fence = &fence->base;
0708
0709 if (out_sync)
0710 drm_syncobj_replace_fence(out_sync, exec->fence);
0711
0712 vc4_update_bo_seqnos(exec, seqno);
0713
0714 vc4_unlock_bo_reservations(dev, exec, acquire_ctx);
0715
0716 list_add_tail(&exec->head, &vc4->bin_job_list);
0717
0718
0719
0720
0721
0722
0723 renderjob = vc4_first_render_job(vc4);
0724 if (vc4_first_bin_job(vc4) == exec &&
0725 (!renderjob || renderjob->perfmon == exec->perfmon)) {
0726 vc4_submit_next_bin_job(dev);
0727 vc4_queue_hangcheck(dev);
0728 }
0729
0730 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
0731
0732 return 0;
0733 }
0734
0735
0736
0737
0738
0739
0740
0741
0742
0743
0744
0745
0746 static int
0747 vc4_cl_lookup_bos(struct drm_device *dev,
0748 struct drm_file *file_priv,
0749 struct vc4_exec_info *exec)
0750 {
0751 struct drm_vc4_submit_cl *args = exec->args;
0752 uint32_t *handles;
0753 int ret = 0;
0754 int i;
0755
0756 exec->bo_count = args->bo_handle_count;
0757
0758 if (!exec->bo_count) {
0759
0760
0761
0762 DRM_DEBUG("Rendering requires BOs to validate\n");
0763 return -EINVAL;
0764 }
0765
0766 exec->bo = kvmalloc_array(exec->bo_count,
0767 sizeof(struct drm_gem_cma_object *),
0768 GFP_KERNEL | __GFP_ZERO);
0769 if (!exec->bo) {
0770 DRM_ERROR("Failed to allocate validated BO pointers\n");
0771 return -ENOMEM;
0772 }
0773
0774 handles = kvmalloc_array(exec->bo_count, sizeof(uint32_t), GFP_KERNEL);
0775 if (!handles) {
0776 ret = -ENOMEM;
0777 DRM_ERROR("Failed to allocate incoming GEM handles\n");
0778 goto fail;
0779 }
0780
0781 if (copy_from_user(handles, u64_to_user_ptr(args->bo_handles),
0782 exec->bo_count * sizeof(uint32_t))) {
0783 ret = -EFAULT;
0784 DRM_ERROR("Failed to copy in GEM handles\n");
0785 goto fail;
0786 }
0787
0788 spin_lock(&file_priv->table_lock);
0789 for (i = 0; i < exec->bo_count; i++) {
0790 struct drm_gem_object *bo = idr_find(&file_priv->object_idr,
0791 handles[i]);
0792 if (!bo) {
0793 DRM_DEBUG("Failed to look up GEM BO %d: %d\n",
0794 i, handles[i]);
0795 ret = -EINVAL;
0796 break;
0797 }
0798
0799 drm_gem_object_get(bo);
0800 exec->bo[i] = (struct drm_gem_cma_object *)bo;
0801 }
0802 spin_unlock(&file_priv->table_lock);
0803
0804 if (ret)
0805 goto fail_put_bo;
0806
0807 for (i = 0; i < exec->bo_count; i++) {
0808 ret = vc4_bo_inc_usecnt(to_vc4_bo(&exec->bo[i]->base));
0809 if (ret)
0810 goto fail_dec_usecnt;
0811 }
0812
0813 kvfree(handles);
0814 return 0;
0815
0816 fail_dec_usecnt:
0817
0818
0819
0820
0821
0822
0823
0824
0825 for (i-- ; i >= 0; i--)
0826 vc4_bo_dec_usecnt(to_vc4_bo(&exec->bo[i]->base));
0827
0828 fail_put_bo:
0829
0830 for (i = 0; i < exec->bo_count && exec->bo[i]; i++)
0831 drm_gem_object_put(&exec->bo[i]->base);
0832
0833 fail:
0834 kvfree(handles);
0835 kvfree(exec->bo);
0836 exec->bo = NULL;
0837 return ret;
0838 }
0839
0840 static int
0841 vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec)
0842 {
0843 struct drm_vc4_submit_cl *args = exec->args;
0844 struct vc4_dev *vc4 = to_vc4_dev(dev);
0845 void *temp = NULL;
0846 void *bin;
0847 int ret = 0;
0848 uint32_t bin_offset = 0;
0849 uint32_t shader_rec_offset = roundup(bin_offset + args->bin_cl_size,
0850 16);
0851 uint32_t uniforms_offset = shader_rec_offset + args->shader_rec_size;
0852 uint32_t exec_size = uniforms_offset + args->uniforms_size;
0853 uint32_t temp_size = exec_size + (sizeof(struct vc4_shader_state) *
0854 args->shader_rec_count);
0855 struct vc4_bo *bo;
0856
0857 if (shader_rec_offset < args->bin_cl_size ||
0858 uniforms_offset < shader_rec_offset ||
0859 exec_size < uniforms_offset ||
0860 args->shader_rec_count >= (UINT_MAX /
0861 sizeof(struct vc4_shader_state)) ||
0862 temp_size < exec_size) {
0863 DRM_DEBUG("overflow in exec arguments\n");
0864 ret = -EINVAL;
0865 goto fail;
0866 }
0867
0868
0869
0870
0871
0872
0873
0874
0875 temp = kvmalloc_array(temp_size, 1, GFP_KERNEL);
0876 if (!temp) {
0877 DRM_ERROR("Failed to allocate storage for copying "
0878 "in bin/render CLs.\n");
0879 ret = -ENOMEM;
0880 goto fail;
0881 }
0882 bin = temp + bin_offset;
0883 exec->shader_rec_u = temp + shader_rec_offset;
0884 exec->uniforms_u = temp + uniforms_offset;
0885 exec->shader_state = temp + exec_size;
0886 exec->shader_state_size = args->shader_rec_count;
0887
0888 if (copy_from_user(bin,
0889 u64_to_user_ptr(args->bin_cl),
0890 args->bin_cl_size)) {
0891 ret = -EFAULT;
0892 goto fail;
0893 }
0894
0895 if (copy_from_user(exec->shader_rec_u,
0896 u64_to_user_ptr(args->shader_rec),
0897 args->shader_rec_size)) {
0898 ret = -EFAULT;
0899 goto fail;
0900 }
0901
0902 if (copy_from_user(exec->uniforms_u,
0903 u64_to_user_ptr(args->uniforms),
0904 args->uniforms_size)) {
0905 ret = -EFAULT;
0906 goto fail;
0907 }
0908
0909 bo = vc4_bo_create(dev, exec_size, true, VC4_BO_TYPE_BCL);
0910 if (IS_ERR(bo)) {
0911 DRM_ERROR("Couldn't allocate BO for binning\n");
0912 ret = PTR_ERR(bo);
0913 goto fail;
0914 }
0915 exec->exec_bo = &bo->base;
0916
0917 list_add_tail(&to_vc4_bo(&exec->exec_bo->base)->unref_head,
0918 &exec->unref_list);
0919
0920 exec->ct0ca = exec->exec_bo->paddr + bin_offset;
0921
0922 exec->bin_u = bin;
0923
0924 exec->shader_rec_v = exec->exec_bo->vaddr + shader_rec_offset;
0925 exec->shader_rec_p = exec->exec_bo->paddr + shader_rec_offset;
0926 exec->shader_rec_size = args->shader_rec_size;
0927
0928 exec->uniforms_v = exec->exec_bo->vaddr + uniforms_offset;
0929 exec->uniforms_p = exec->exec_bo->paddr + uniforms_offset;
0930 exec->uniforms_size = args->uniforms_size;
0931
0932 ret = vc4_validate_bin_cl(dev,
0933 exec->exec_bo->vaddr + bin_offset,
0934 bin,
0935 exec);
0936 if (ret)
0937 goto fail;
0938
0939 ret = vc4_validate_shader_recs(dev, exec);
0940 if (ret)
0941 goto fail;
0942
0943 if (exec->found_tile_binning_mode_config_packet) {
0944 ret = vc4_v3d_bin_bo_get(vc4, &exec->bin_bo_used);
0945 if (ret)
0946 goto fail;
0947 }
0948
0949
0950
0951
0952
0953 ret = vc4_wait_for_seqno(dev, exec->bin_dep_seqno, ~0ull, true);
0954
0955 fail:
0956 kvfree(temp);
0957 return ret;
0958 }
0959
0960 static void
0961 vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec)
0962 {
0963 struct vc4_dev *vc4 = to_vc4_dev(dev);
0964 unsigned long irqflags;
0965 unsigned i;
0966
0967
0968
0969
0970 if (exec->fence) {
0971 dma_fence_signal(exec->fence);
0972 dma_fence_put(exec->fence);
0973 }
0974
0975 if (exec->bo) {
0976 for (i = 0; i < exec->bo_count; i++) {
0977 struct vc4_bo *bo = to_vc4_bo(&exec->bo[i]->base);
0978
0979 vc4_bo_dec_usecnt(bo);
0980 drm_gem_object_put(&exec->bo[i]->base);
0981 }
0982 kvfree(exec->bo);
0983 }
0984
0985 while (!list_empty(&exec->unref_list)) {
0986 struct vc4_bo *bo = list_first_entry(&exec->unref_list,
0987 struct vc4_bo, unref_head);
0988 list_del(&bo->unref_head);
0989 drm_gem_object_put(&bo->base.base);
0990 }
0991
0992
0993 spin_lock_irqsave(&vc4->job_lock, irqflags);
0994 vc4->bin_alloc_used &= ~exec->bin_slots;
0995 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
0996
0997
0998 if (exec->bin_bo_used)
0999 vc4_v3d_bin_bo_put(vc4);
1000
1001
1002 vc4_perfmon_put(exec->perfmon);
1003
1004 vc4_v3d_pm_put(vc4);
1005
1006 kfree(exec);
1007 }
1008
1009 void
1010 vc4_job_handle_completed(struct vc4_dev *vc4)
1011 {
1012 unsigned long irqflags;
1013 struct vc4_seqno_cb *cb, *cb_temp;
1014
1015 if (WARN_ON_ONCE(vc4->is_vc5))
1016 return;
1017
1018 spin_lock_irqsave(&vc4->job_lock, irqflags);
1019 while (!list_empty(&vc4->job_done_list)) {
1020 struct vc4_exec_info *exec =
1021 list_first_entry(&vc4->job_done_list,
1022 struct vc4_exec_info, head);
1023 list_del(&exec->head);
1024
1025 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
1026 vc4_complete_exec(&vc4->base, exec);
1027 spin_lock_irqsave(&vc4->job_lock, irqflags);
1028 }
1029
1030 list_for_each_entry_safe(cb, cb_temp, &vc4->seqno_cb_list, work.entry) {
1031 if (cb->seqno <= vc4->finished_seqno) {
1032 list_del_init(&cb->work.entry);
1033 schedule_work(&cb->work);
1034 }
1035 }
1036
1037 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
1038 }
1039
1040 static void vc4_seqno_cb_work(struct work_struct *work)
1041 {
1042 struct vc4_seqno_cb *cb = container_of(work, struct vc4_seqno_cb, work);
1043
1044 cb->func(cb);
1045 }
1046
1047 int vc4_queue_seqno_cb(struct drm_device *dev,
1048 struct vc4_seqno_cb *cb, uint64_t seqno,
1049 void (*func)(struct vc4_seqno_cb *cb))
1050 {
1051 struct vc4_dev *vc4 = to_vc4_dev(dev);
1052 unsigned long irqflags;
1053
1054 if (WARN_ON_ONCE(vc4->is_vc5))
1055 return -ENODEV;
1056
1057 cb->func = func;
1058 INIT_WORK(&cb->work, vc4_seqno_cb_work);
1059
1060 spin_lock_irqsave(&vc4->job_lock, irqflags);
1061 if (seqno > vc4->finished_seqno) {
1062 cb->seqno = seqno;
1063 list_add_tail(&cb->work.entry, &vc4->seqno_cb_list);
1064 } else {
1065 schedule_work(&cb->work);
1066 }
1067 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
1068
1069 return 0;
1070 }
1071
1072
1073
1074
1075
1076 static void
1077 vc4_job_done_work(struct work_struct *work)
1078 {
1079 struct vc4_dev *vc4 =
1080 container_of(work, struct vc4_dev, job_done_work);
1081
1082 vc4_job_handle_completed(vc4);
1083 }
1084
1085 static int
1086 vc4_wait_for_seqno_ioctl_helper(struct drm_device *dev,
1087 uint64_t seqno,
1088 uint64_t *timeout_ns)
1089 {
1090 unsigned long start = jiffies;
1091 int ret = vc4_wait_for_seqno(dev, seqno, *timeout_ns, true);
1092
1093 if ((ret == -EINTR || ret == -ERESTARTSYS) && *timeout_ns != ~0ull) {
1094 uint64_t delta = jiffies_to_nsecs(jiffies - start);
1095
1096 if (*timeout_ns >= delta)
1097 *timeout_ns -= delta;
1098 }
1099
1100 return ret;
1101 }
1102
1103 int
1104 vc4_wait_seqno_ioctl(struct drm_device *dev, void *data,
1105 struct drm_file *file_priv)
1106 {
1107 struct vc4_dev *vc4 = to_vc4_dev(dev);
1108 struct drm_vc4_wait_seqno *args = data;
1109
1110 if (WARN_ON_ONCE(vc4->is_vc5))
1111 return -ENODEV;
1112
1113 return vc4_wait_for_seqno_ioctl_helper(dev, args->seqno,
1114 &args->timeout_ns);
1115 }
1116
1117 int
1118 vc4_wait_bo_ioctl(struct drm_device *dev, void *data,
1119 struct drm_file *file_priv)
1120 {
1121 struct vc4_dev *vc4 = to_vc4_dev(dev);
1122 int ret;
1123 struct drm_vc4_wait_bo *args = data;
1124 struct drm_gem_object *gem_obj;
1125 struct vc4_bo *bo;
1126
1127 if (WARN_ON_ONCE(vc4->is_vc5))
1128 return -ENODEV;
1129
1130 if (args->pad != 0)
1131 return -EINVAL;
1132
1133 gem_obj = drm_gem_object_lookup(file_priv, args->handle);
1134 if (!gem_obj) {
1135 DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle);
1136 return -EINVAL;
1137 }
1138 bo = to_vc4_bo(gem_obj);
1139
1140 ret = vc4_wait_for_seqno_ioctl_helper(dev, bo->seqno,
1141 &args->timeout_ns);
1142
1143 drm_gem_object_put(gem_obj);
1144 return ret;
1145 }
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159 int
1160 vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
1161 struct drm_file *file_priv)
1162 {
1163 struct vc4_dev *vc4 = to_vc4_dev(dev);
1164 struct vc4_file *vc4file = file_priv->driver_priv;
1165 struct drm_vc4_submit_cl *args = data;
1166 struct drm_syncobj *out_sync = NULL;
1167 struct vc4_exec_info *exec;
1168 struct ww_acquire_ctx acquire_ctx;
1169 struct dma_fence *in_fence;
1170 int ret = 0;
1171
1172 trace_vc4_submit_cl_ioctl(dev, args->bin_cl_size,
1173 args->shader_rec_size,
1174 args->bo_handle_count);
1175
1176 if (WARN_ON_ONCE(vc4->is_vc5))
1177 return -ENODEV;
1178
1179 if (!vc4->v3d) {
1180 DRM_DEBUG("VC4_SUBMIT_CL with no VC4 V3D probed\n");
1181 return -ENODEV;
1182 }
1183
1184 if ((args->flags & ~(VC4_SUBMIT_CL_USE_CLEAR_COLOR |
1185 VC4_SUBMIT_CL_FIXED_RCL_ORDER |
1186 VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X |
1187 VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y)) != 0) {
1188 DRM_DEBUG("Unknown flags: 0x%02x\n", args->flags);
1189 return -EINVAL;
1190 }
1191
1192 if (args->pad2 != 0) {
1193 DRM_DEBUG("Invalid pad: 0x%08x\n", args->pad2);
1194 return -EINVAL;
1195 }
1196
1197 exec = kcalloc(1, sizeof(*exec), GFP_KERNEL);
1198 if (!exec) {
1199 DRM_ERROR("malloc failure on exec struct\n");
1200 return -ENOMEM;
1201 }
1202 exec->dev = vc4;
1203
1204 ret = vc4_v3d_pm_get(vc4);
1205 if (ret) {
1206 kfree(exec);
1207 return ret;
1208 }
1209
1210 exec->args = args;
1211 INIT_LIST_HEAD(&exec->unref_list);
1212
1213 ret = vc4_cl_lookup_bos(dev, file_priv, exec);
1214 if (ret)
1215 goto fail;
1216
1217 if (args->perfmonid) {
1218 exec->perfmon = vc4_perfmon_find(vc4file,
1219 args->perfmonid);
1220 if (!exec->perfmon) {
1221 ret = -ENOENT;
1222 goto fail;
1223 }
1224 }
1225
1226 if (args->in_sync) {
1227 ret = drm_syncobj_find_fence(file_priv, args->in_sync,
1228 0, 0, &in_fence);
1229 if (ret)
1230 goto fail;
1231
1232
1233
1234
1235
1236
1237 if (!dma_fence_match_context(in_fence,
1238 vc4->dma_fence_context)) {
1239 ret = dma_fence_wait(in_fence, true);
1240 if (ret) {
1241 dma_fence_put(in_fence);
1242 goto fail;
1243 }
1244 }
1245
1246 dma_fence_put(in_fence);
1247 }
1248
1249 if (exec->args->bin_cl_size != 0) {
1250 ret = vc4_get_bcl(dev, exec);
1251 if (ret)
1252 goto fail;
1253 } else {
1254 exec->ct0ca = 0;
1255 exec->ct0ea = 0;
1256 }
1257
1258 ret = vc4_get_rcl(dev, exec);
1259 if (ret)
1260 goto fail;
1261
1262 ret = vc4_lock_bo_reservations(dev, exec, &acquire_ctx);
1263 if (ret)
1264 goto fail;
1265
1266 if (args->out_sync) {
1267 out_sync = drm_syncobj_find(file_priv, args->out_sync);
1268 if (!out_sync) {
1269 ret = -EINVAL;
1270 goto fail;
1271 }
1272
1273
1274
1275
1276
1277
1278 }
1279
1280
1281
1282
1283 exec->args = NULL;
1284
1285 ret = vc4_queue_submit(dev, exec, &acquire_ctx, out_sync);
1286
1287
1288
1289
1290 if (out_sync)
1291 drm_syncobj_put(out_sync);
1292
1293 if (ret)
1294 goto fail;
1295
1296
1297 args->seqno = vc4->emit_seqno;
1298
1299 return 0;
1300
1301 fail:
1302 vc4_complete_exec(&vc4->base, exec);
1303
1304 return ret;
1305 }
1306
1307 static void vc4_gem_destroy(struct drm_device *dev, void *unused);
1308 int vc4_gem_init(struct drm_device *dev)
1309 {
1310 struct vc4_dev *vc4 = to_vc4_dev(dev);
1311
1312 if (WARN_ON_ONCE(vc4->is_vc5))
1313 return -ENODEV;
1314
1315 vc4->dma_fence_context = dma_fence_context_alloc(1);
1316
1317 INIT_LIST_HEAD(&vc4->bin_job_list);
1318 INIT_LIST_HEAD(&vc4->render_job_list);
1319 INIT_LIST_HEAD(&vc4->job_done_list);
1320 INIT_LIST_HEAD(&vc4->seqno_cb_list);
1321 spin_lock_init(&vc4->job_lock);
1322
1323 INIT_WORK(&vc4->hangcheck.reset_work, vc4_reset_work);
1324 timer_setup(&vc4->hangcheck.timer, vc4_hangcheck_elapsed, 0);
1325
1326 INIT_WORK(&vc4->job_done_work, vc4_job_done_work);
1327
1328 mutex_init(&vc4->power_lock);
1329
1330 INIT_LIST_HEAD(&vc4->purgeable.list);
1331 mutex_init(&vc4->purgeable.lock);
1332
1333 return drmm_add_action_or_reset(dev, vc4_gem_destroy, NULL);
1334 }
1335
1336 static void vc4_gem_destroy(struct drm_device *dev, void *unused)
1337 {
1338 struct vc4_dev *vc4 = to_vc4_dev(dev);
1339
1340
1341
1342
1343 WARN_ON(vc4->emit_seqno != vc4->finished_seqno);
1344
1345
1346
1347
1348 if (vc4->bin_bo) {
1349 drm_gem_object_put(&vc4->bin_bo->base.base);
1350 vc4->bin_bo = NULL;
1351 }
1352
1353 if (vc4->hang_state)
1354 vc4_free_hang_state(dev, vc4->hang_state);
1355 }
1356
1357 int vc4_gem_madvise_ioctl(struct drm_device *dev, void *data,
1358 struct drm_file *file_priv)
1359 {
1360 struct vc4_dev *vc4 = to_vc4_dev(dev);
1361 struct drm_vc4_gem_madvise *args = data;
1362 struct drm_gem_object *gem_obj;
1363 struct vc4_bo *bo;
1364 int ret;
1365
1366 if (WARN_ON_ONCE(vc4->is_vc5))
1367 return -ENODEV;
1368
1369 switch (args->madv) {
1370 case VC4_MADV_DONTNEED:
1371 case VC4_MADV_WILLNEED:
1372 break;
1373 default:
1374 return -EINVAL;
1375 }
1376
1377 if (args->pad != 0)
1378 return -EINVAL;
1379
1380 gem_obj = drm_gem_object_lookup(file_priv, args->handle);
1381 if (!gem_obj) {
1382 DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle);
1383 return -ENOENT;
1384 }
1385
1386 bo = to_vc4_bo(gem_obj);
1387
1388
1389 if (bo->madv == __VC4_MADV_NOTSUPP) {
1390 DRM_DEBUG("madvise not supported on this BO\n");
1391 ret = -EINVAL;
1392 goto out_put_gem;
1393 }
1394
1395
1396
1397
1398 if (gem_obj->import_attach) {
1399 DRM_DEBUG("madvise not supported on imported BOs\n");
1400 ret = -EINVAL;
1401 goto out_put_gem;
1402 }
1403
1404 mutex_lock(&bo->madv_lock);
1405
1406 if (args->madv == VC4_MADV_DONTNEED && bo->madv == VC4_MADV_WILLNEED &&
1407 !refcount_read(&bo->usecnt)) {
1408
1409
1410
1411
1412 vc4_bo_add_to_purgeable_pool(bo);
1413 } else if (args->madv == VC4_MADV_WILLNEED &&
1414 bo->madv == VC4_MADV_DONTNEED &&
1415 !refcount_read(&bo->usecnt)) {
1416
1417
1418
1419 vc4_bo_remove_from_purgeable_pool(bo);
1420 }
1421
1422
1423 args->retained = bo->madv != __VC4_MADV_PURGED;
1424
1425
1426 if (bo->madv != __VC4_MADV_PURGED)
1427 bo->madv = args->madv;
1428
1429 mutex_unlock(&bo->madv_lock);
1430
1431 ret = 0;
1432
1433 out_put_gem:
1434 drm_gem_object_put(gem_obj);
1435
1436 return ret;
1437 }