Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0+
0002 /* Copyright (C) 2014-2018 Broadcom */
0003 
0004 #include <linux/device.h>
0005 #include <linux/dma-mapping.h>
0006 #include <linux/io.h>
0007 #include <linux/module.h>
0008 #include <linux/platform_device.h>
0009 #include <linux/reset.h>
0010 #include <linux/sched/signal.h>
0011 #include <linux/uaccess.h>
0012 
0013 #include <drm/drm_syncobj.h>
0014 #include <uapi/drm/v3d_drm.h>
0015 
0016 #include "v3d_drv.h"
0017 #include "v3d_regs.h"
0018 #include "v3d_trace.h"
0019 
0020 static void
0021 v3d_init_core(struct v3d_dev *v3d, int core)
0022 {
0023     /* Set OVRTMUOUT, which means that the texture sampler uniform
0024      * configuration's tmu output type field is used, instead of
0025      * using the hardware default behavior based on the texture
0026      * type.  If you want the default behavior, you can still put
0027      * "2" in the indirect texture state's output_type field.
0028      */
0029     if (v3d->ver < 40)
0030         V3D_CORE_WRITE(core, V3D_CTL_MISCCFG, V3D_MISCCFG_OVRTMUOUT);
0031 
0032     /* Whenever we flush the L2T cache, we always want to flush
0033      * the whole thing.
0034      */
0035     V3D_CORE_WRITE(core, V3D_CTL_L2TFLSTA, 0);
0036     V3D_CORE_WRITE(core, V3D_CTL_L2TFLEND, ~0);
0037 }
0038 
0039 /* Sets invariant state for the HW. */
0040 static void
0041 v3d_init_hw_state(struct v3d_dev *v3d)
0042 {
0043     v3d_init_core(v3d, 0);
0044 }
0045 
0046 static void
0047 v3d_idle_axi(struct v3d_dev *v3d, int core)
0048 {
0049     V3D_CORE_WRITE(core, V3D_GMP_CFG, V3D_GMP_CFG_STOP_REQ);
0050 
0051     if (wait_for((V3D_CORE_READ(core, V3D_GMP_STATUS) &
0052               (V3D_GMP_STATUS_RD_COUNT_MASK |
0053                V3D_GMP_STATUS_WR_COUNT_MASK |
0054                V3D_GMP_STATUS_CFG_BUSY)) == 0, 100)) {
0055         DRM_ERROR("Failed to wait for safe GMP shutdown\n");
0056     }
0057 }
0058 
0059 static void
0060 v3d_idle_gca(struct v3d_dev *v3d)
0061 {
0062     if (v3d->ver >= 41)
0063         return;
0064 
0065     V3D_GCA_WRITE(V3D_GCA_SAFE_SHUTDOWN, V3D_GCA_SAFE_SHUTDOWN_EN);
0066 
0067     if (wait_for((V3D_GCA_READ(V3D_GCA_SAFE_SHUTDOWN_ACK) &
0068               V3D_GCA_SAFE_SHUTDOWN_ACK_ACKED) ==
0069              V3D_GCA_SAFE_SHUTDOWN_ACK_ACKED, 100)) {
0070         DRM_ERROR("Failed to wait for safe GCA shutdown\n");
0071     }
0072 }
0073 
0074 static void
0075 v3d_reset_by_bridge(struct v3d_dev *v3d)
0076 {
0077     int version = V3D_BRIDGE_READ(V3D_TOP_GR_BRIDGE_REVISION);
0078 
0079     if (V3D_GET_FIELD(version, V3D_TOP_GR_BRIDGE_MAJOR) == 2) {
0080         V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_0,
0081                  V3D_TOP_GR_BRIDGE_SW_INIT_0_V3D_CLK_108_SW_INIT);
0082         V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_0, 0);
0083 
0084         /* GFXH-1383: The SW_INIT may cause a stray write to address 0
0085          * of the unit, so reset it to its power-on value here.
0086          */
0087         V3D_WRITE(V3D_HUB_AXICFG, V3D_HUB_AXICFG_MAX_LEN_MASK);
0088     } else {
0089         WARN_ON_ONCE(V3D_GET_FIELD(version,
0090                        V3D_TOP_GR_BRIDGE_MAJOR) != 7);
0091         V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_1,
0092                  V3D_TOP_GR_BRIDGE_SW_INIT_1_V3D_CLK_108_SW_INIT);
0093         V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_1, 0);
0094     }
0095 }
0096 
0097 static void
0098 v3d_reset_v3d(struct v3d_dev *v3d)
0099 {
0100     if (v3d->reset)
0101         reset_control_reset(v3d->reset);
0102     else
0103         v3d_reset_by_bridge(v3d);
0104 
0105     v3d_init_hw_state(v3d);
0106 }
0107 
0108 void
0109 v3d_reset(struct v3d_dev *v3d)
0110 {
0111     struct drm_device *dev = &v3d->drm;
0112 
0113     DRM_DEV_ERROR(dev->dev, "Resetting GPU for hang.\n");
0114     DRM_DEV_ERROR(dev->dev, "V3D_ERR_STAT: 0x%08x\n",
0115               V3D_CORE_READ(0, V3D_ERR_STAT));
0116     trace_v3d_reset_begin(dev);
0117 
0118     /* XXX: only needed for safe powerdown, not reset. */
0119     if (false)
0120         v3d_idle_axi(v3d, 0);
0121 
0122     v3d_idle_gca(v3d);
0123     v3d_reset_v3d(v3d);
0124 
0125     v3d_mmu_set_page_table(v3d);
0126     v3d_irq_reset(v3d);
0127 
0128     v3d_perfmon_stop(v3d, v3d->active_perfmon, false);
0129 
0130     trace_v3d_reset_end(dev);
0131 }
0132 
0133 static void
0134 v3d_flush_l3(struct v3d_dev *v3d)
0135 {
0136     if (v3d->ver < 41) {
0137         u32 gca_ctrl = V3D_GCA_READ(V3D_GCA_CACHE_CTRL);
0138 
0139         V3D_GCA_WRITE(V3D_GCA_CACHE_CTRL,
0140                   gca_ctrl | V3D_GCA_CACHE_CTRL_FLUSH);
0141 
0142         if (v3d->ver < 33) {
0143             V3D_GCA_WRITE(V3D_GCA_CACHE_CTRL,
0144                       gca_ctrl & ~V3D_GCA_CACHE_CTRL_FLUSH);
0145         }
0146     }
0147 }
0148 
0149 /* Invalidates the (read-only) L2C cache.  This was the L2 cache for
0150  * uniforms and instructions on V3D 3.2.
0151  */
0152 static void
0153 v3d_invalidate_l2c(struct v3d_dev *v3d, int core)
0154 {
0155     if (v3d->ver > 32)
0156         return;
0157 
0158     V3D_CORE_WRITE(core, V3D_CTL_L2CACTL,
0159                V3D_L2CACTL_L2CCLR |
0160                V3D_L2CACTL_L2CENA);
0161 }
0162 
0163 /* Invalidates texture L2 cachelines */
0164 static void
0165 v3d_flush_l2t(struct v3d_dev *v3d, int core)
0166 {
0167     /* While there is a busy bit (V3D_L2TCACTL_L2TFLS), we don't
0168      * need to wait for completion before dispatching the job --
0169      * L2T accesses will be stalled until the flush has completed.
0170      * However, we do need to make sure we don't try to trigger a
0171      * new flush while the L2_CLEAN queue is trying to
0172      * synchronously clean after a job.
0173      */
0174     mutex_lock(&v3d->cache_clean_lock);
0175     V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL,
0176                V3D_L2TCACTL_L2TFLS |
0177                V3D_SET_FIELD(V3D_L2TCACTL_FLM_FLUSH, V3D_L2TCACTL_FLM));
0178     mutex_unlock(&v3d->cache_clean_lock);
0179 }
0180 
0181 /* Cleans texture L1 and L2 cachelines (writing back dirty data).
0182  *
0183  * For cleaning, which happens from the CACHE_CLEAN queue after CSD has
0184  * executed, we need to make sure that the clean is done before
0185  * signaling job completion.  So, we synchronously wait before
0186  * returning, and we make sure that L2 invalidates don't happen in the
0187  * meantime to confuse our are-we-done checks.
0188  */
0189 void
0190 v3d_clean_caches(struct v3d_dev *v3d)
0191 {
0192     struct drm_device *dev = &v3d->drm;
0193     int core = 0;
0194 
0195     trace_v3d_cache_clean_begin(dev);
0196 
0197     V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL, V3D_L2TCACTL_TMUWCF);
0198     if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) &
0199                V3D_L2TCACTL_TMUWCF), 100)) {
0200         DRM_ERROR("Timeout waiting for TMU write combiner flush\n");
0201     }
0202 
0203     mutex_lock(&v3d->cache_clean_lock);
0204     V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL,
0205                V3D_L2TCACTL_L2TFLS |
0206                V3D_SET_FIELD(V3D_L2TCACTL_FLM_CLEAN, V3D_L2TCACTL_FLM));
0207 
0208     if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) &
0209                V3D_L2TCACTL_L2TFLS), 100)) {
0210         DRM_ERROR("Timeout waiting for L2T clean\n");
0211     }
0212 
0213     mutex_unlock(&v3d->cache_clean_lock);
0214 
0215     trace_v3d_cache_clean_end(dev);
0216 }
0217 
0218 /* Invalidates the slice caches.  These are read-only caches. */
0219 static void
0220 v3d_invalidate_slices(struct v3d_dev *v3d, int core)
0221 {
0222     V3D_CORE_WRITE(core, V3D_CTL_SLCACTL,
0223                V3D_SET_FIELD(0xf, V3D_SLCACTL_TVCCS) |
0224                V3D_SET_FIELD(0xf, V3D_SLCACTL_TDCCS) |
0225                V3D_SET_FIELD(0xf, V3D_SLCACTL_UCC) |
0226                V3D_SET_FIELD(0xf, V3D_SLCACTL_ICC));
0227 }
0228 
0229 void
0230 v3d_invalidate_caches(struct v3d_dev *v3d)
0231 {
0232     /* Invalidate the caches from the outside in.  That way if
0233      * another CL's concurrent use of nearby memory were to pull
0234      * an invalidated cacheline back in, we wouldn't leave stale
0235      * data in the inner cache.
0236      */
0237     v3d_flush_l3(v3d);
0238     v3d_invalidate_l2c(v3d, 0);
0239     v3d_flush_l2t(v3d, 0);
0240     v3d_invalidate_slices(v3d, 0);
0241 }
0242 
0243 /* Takes the reservation lock on all the BOs being referenced, so that
0244  * at queue submit time we can update the reservations.
0245  *
0246  * We don't lock the RCL the tile alloc/state BOs, or overflow memory
0247  * (all of which are on exec->unref_list).  They're entirely private
0248  * to v3d, so we don't attach dma-buf fences to them.
0249  */
0250 static int
0251 v3d_lock_bo_reservations(struct v3d_job *job,
0252              struct ww_acquire_ctx *acquire_ctx)
0253 {
0254     int i, ret;
0255 
0256     ret = drm_gem_lock_reservations(job->bo, job->bo_count, acquire_ctx);
0257     if (ret)
0258         return ret;
0259 
0260     for (i = 0; i < job->bo_count; i++) {
0261         ret = dma_resv_reserve_fences(job->bo[i]->resv, 1);
0262         if (ret)
0263             goto fail;
0264 
0265         ret = drm_sched_job_add_implicit_dependencies(&job->base,
0266                                   job->bo[i], true);
0267         if (ret)
0268             goto fail;
0269     }
0270 
0271     return 0;
0272 
0273 fail:
0274     drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx);
0275     return ret;
0276 }
0277 
0278 /**
0279  * v3d_lookup_bos() - Sets up job->bo[] with the GEM objects
0280  * referenced by the job.
0281  * @dev: DRM device
0282  * @file_priv: DRM file for this fd
0283  * @job: V3D job being set up
0284  * @bo_handles: GEM handles
0285  * @bo_count: Number of GEM handles passed in
0286  *
0287  * The command validator needs to reference BOs by their index within
0288  * the submitted job's BO list.  This does the validation of the job's
0289  * BO list and reference counting for the lifetime of the job.
0290  *
0291  * Note that this function doesn't need to unreference the BOs on
0292  * failure, because that will happen at v3d_exec_cleanup() time.
0293  */
0294 static int
0295 v3d_lookup_bos(struct drm_device *dev,
0296            struct drm_file *file_priv,
0297            struct v3d_job *job,
0298            u64 bo_handles,
0299            u32 bo_count)
0300 {
0301     u32 *handles;
0302     int ret = 0;
0303     int i;
0304 
0305     job->bo_count = bo_count;
0306 
0307     if (!job->bo_count) {
0308         /* See comment on bo_index for why we have to check
0309          * this.
0310          */
0311         DRM_DEBUG("Rendering requires BOs\n");
0312         return -EINVAL;
0313     }
0314 
0315     job->bo = kvmalloc_array(job->bo_count,
0316                  sizeof(struct drm_gem_cma_object *),
0317                  GFP_KERNEL | __GFP_ZERO);
0318     if (!job->bo) {
0319         DRM_DEBUG("Failed to allocate validated BO pointers\n");
0320         return -ENOMEM;
0321     }
0322 
0323     handles = kvmalloc_array(job->bo_count, sizeof(u32), GFP_KERNEL);
0324     if (!handles) {
0325         ret = -ENOMEM;
0326         DRM_DEBUG("Failed to allocate incoming GEM handles\n");
0327         goto fail;
0328     }
0329 
0330     if (copy_from_user(handles,
0331                (void __user *)(uintptr_t)bo_handles,
0332                job->bo_count * sizeof(u32))) {
0333         ret = -EFAULT;
0334         DRM_DEBUG("Failed to copy in GEM handles\n");
0335         goto fail;
0336     }
0337 
0338     spin_lock(&file_priv->table_lock);
0339     for (i = 0; i < job->bo_count; i++) {
0340         struct drm_gem_object *bo = idr_find(&file_priv->object_idr,
0341                              handles[i]);
0342         if (!bo) {
0343             DRM_DEBUG("Failed to look up GEM BO %d: %d\n",
0344                   i, handles[i]);
0345             ret = -ENOENT;
0346             spin_unlock(&file_priv->table_lock);
0347             goto fail;
0348         }
0349         drm_gem_object_get(bo);
0350         job->bo[i] = bo;
0351     }
0352     spin_unlock(&file_priv->table_lock);
0353 
0354 fail:
0355     kvfree(handles);
0356     return ret;
0357 }
0358 
0359 static void
0360 v3d_job_free(struct kref *ref)
0361 {
0362     struct v3d_job *job = container_of(ref, struct v3d_job, refcount);
0363     int i;
0364 
0365     for (i = 0; i < job->bo_count; i++) {
0366         if (job->bo[i])
0367             drm_gem_object_put(job->bo[i]);
0368     }
0369     kvfree(job->bo);
0370 
0371     dma_fence_put(job->irq_fence);
0372     dma_fence_put(job->done_fence);
0373 
0374     if (job->perfmon)
0375         v3d_perfmon_put(job->perfmon);
0376 
0377     kfree(job);
0378 }
0379 
0380 static void
0381 v3d_render_job_free(struct kref *ref)
0382 {
0383     struct v3d_render_job *job = container_of(ref, struct v3d_render_job,
0384                           base.refcount);
0385     struct v3d_bo *bo, *save;
0386 
0387     list_for_each_entry_safe(bo, save, &job->unref_list, unref_head) {
0388         drm_gem_object_put(&bo->base.base);
0389     }
0390 
0391     v3d_job_free(ref);
0392 }
0393 
0394 void v3d_job_cleanup(struct v3d_job *job)
0395 {
0396     if (!job)
0397         return;
0398 
0399     drm_sched_job_cleanup(&job->base);
0400     v3d_job_put(job);
0401 }
0402 
0403 void v3d_job_put(struct v3d_job *job)
0404 {
0405     kref_put(&job->refcount, job->free);
0406 }
0407 
0408 int
0409 v3d_wait_bo_ioctl(struct drm_device *dev, void *data,
0410           struct drm_file *file_priv)
0411 {
0412     int ret;
0413     struct drm_v3d_wait_bo *args = data;
0414     ktime_t start = ktime_get();
0415     u64 delta_ns;
0416     unsigned long timeout_jiffies =
0417         nsecs_to_jiffies_timeout(args->timeout_ns);
0418 
0419     if (args->pad != 0)
0420         return -EINVAL;
0421 
0422     ret = drm_gem_dma_resv_wait(file_priv, args->handle,
0423                     true, timeout_jiffies);
0424 
0425     /* Decrement the user's timeout, in case we got interrupted
0426      * such that the ioctl will be restarted.
0427      */
0428     delta_ns = ktime_to_ns(ktime_sub(ktime_get(), start));
0429     if (delta_ns < args->timeout_ns)
0430         args->timeout_ns -= delta_ns;
0431     else
0432         args->timeout_ns = 0;
0433 
0434     /* Asked to wait beyond the jiffie/scheduler precision? */
0435     if (ret == -ETIME && args->timeout_ns)
0436         ret = -EAGAIN;
0437 
0438     return ret;
0439 }
0440 
0441 static int
0442 v3d_job_add_deps(struct drm_file *file_priv, struct v3d_job *job,
0443          u32 in_sync, u32 point)
0444 {
0445     struct dma_fence *in_fence = NULL;
0446     int ret;
0447 
0448     ret = drm_syncobj_find_fence(file_priv, in_sync, point, 0, &in_fence);
0449     if (ret == -EINVAL)
0450         return ret;
0451 
0452     return drm_sched_job_add_dependency(&job->base, in_fence);
0453 }
0454 
0455 static int
0456 v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv,
0457          void **container, size_t size, void (*free)(struct kref *ref),
0458          u32 in_sync, struct v3d_submit_ext *se, enum v3d_queue queue)
0459 {
0460     struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
0461     struct v3d_job *job;
0462     bool has_multisync = se && (se->flags & DRM_V3D_EXT_ID_MULTI_SYNC);
0463     int ret, i;
0464 
0465     *container = kcalloc(1, size, GFP_KERNEL);
0466     if (!*container) {
0467         DRM_ERROR("Cannot allocate memory for v3d job.");
0468         return -ENOMEM;
0469     }
0470 
0471     job = *container;
0472     job->v3d = v3d;
0473     job->free = free;
0474 
0475     ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue],
0476                  v3d_priv);
0477     if (ret)
0478         goto fail;
0479 
0480     if (has_multisync) {
0481         if (se->in_sync_count && se->wait_stage == queue) {
0482             struct drm_v3d_sem __user *handle = u64_to_user_ptr(se->in_syncs);
0483 
0484             for (i = 0; i < se->in_sync_count; i++) {
0485                 struct drm_v3d_sem in;
0486 
0487                 if (copy_from_user(&in, handle++, sizeof(in))) {
0488                     ret = -EFAULT;
0489                     DRM_DEBUG("Failed to copy wait dep handle.\n");
0490                     goto fail_deps;
0491                 }
0492                 ret = v3d_job_add_deps(file_priv, job, in.handle, 0);
0493                 if (ret)
0494                     goto fail_deps;
0495             }
0496         }
0497     } else {
0498         ret = v3d_job_add_deps(file_priv, job, in_sync, 0);
0499         if (ret)
0500             goto fail_deps;
0501     }
0502 
0503     kref_init(&job->refcount);
0504 
0505     return 0;
0506 
0507 fail_deps:
0508     drm_sched_job_cleanup(&job->base);
0509 fail:
0510     kfree(*container);
0511     *container = NULL;
0512 
0513     return ret;
0514 }
0515 
0516 static void
0517 v3d_push_job(struct v3d_job *job)
0518 {
0519     drm_sched_job_arm(&job->base);
0520 
0521     job->done_fence = dma_fence_get(&job->base.s_fence->finished);
0522 
0523     /* put by scheduler job completion */
0524     kref_get(&job->refcount);
0525 
0526     drm_sched_entity_push_job(&job->base);
0527 }
0528 
0529 static void
0530 v3d_attach_fences_and_unlock_reservation(struct drm_file *file_priv,
0531                      struct v3d_job *job,
0532                      struct ww_acquire_ctx *acquire_ctx,
0533                      u32 out_sync,
0534                      struct v3d_submit_ext *se,
0535                      struct dma_fence *done_fence)
0536 {
0537     struct drm_syncobj *sync_out;
0538     bool has_multisync = se && (se->flags & DRM_V3D_EXT_ID_MULTI_SYNC);
0539     int i;
0540 
0541     for (i = 0; i < job->bo_count; i++) {
0542         /* XXX: Use shared fences for read-only objects. */
0543         dma_resv_add_fence(job->bo[i]->resv, job->done_fence,
0544                    DMA_RESV_USAGE_WRITE);
0545     }
0546 
0547     drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx);
0548 
0549     /* Update the return sync object for the job */
0550     /* If it only supports a single signal semaphore*/
0551     if (!has_multisync) {
0552         sync_out = drm_syncobj_find(file_priv, out_sync);
0553         if (sync_out) {
0554             drm_syncobj_replace_fence(sync_out, done_fence);
0555             drm_syncobj_put(sync_out);
0556         }
0557         return;
0558     }
0559 
0560     /* If multiple semaphores extension is supported */
0561     if (se->out_sync_count) {
0562         for (i = 0; i < se->out_sync_count; i++) {
0563             drm_syncobj_replace_fence(se->out_syncs[i].syncobj,
0564                           done_fence);
0565             drm_syncobj_put(se->out_syncs[i].syncobj);
0566         }
0567         kvfree(se->out_syncs);
0568     }
0569 }
0570 
0571 static void
0572 v3d_put_multisync_post_deps(struct v3d_submit_ext *se)
0573 {
0574     unsigned int i;
0575 
0576     if (!(se && se->out_sync_count))
0577         return;
0578 
0579     for (i = 0; i < se->out_sync_count; i++)
0580         drm_syncobj_put(se->out_syncs[i].syncobj);
0581     kvfree(se->out_syncs);
0582 }
0583 
0584 static int
0585 v3d_get_multisync_post_deps(struct drm_file *file_priv,
0586                 struct v3d_submit_ext *se,
0587                 u32 count, u64 handles)
0588 {
0589     struct drm_v3d_sem __user *post_deps;
0590     int i, ret;
0591 
0592     if (!count)
0593         return 0;
0594 
0595     se->out_syncs = (struct v3d_submit_outsync *)
0596             kvmalloc_array(count,
0597                        sizeof(struct v3d_submit_outsync),
0598                        GFP_KERNEL);
0599     if (!se->out_syncs)
0600         return -ENOMEM;
0601 
0602     post_deps = u64_to_user_ptr(handles);
0603 
0604     for (i = 0; i < count; i++) {
0605         struct drm_v3d_sem out;
0606 
0607         if (copy_from_user(&out, post_deps++, sizeof(out))) {
0608             ret = -EFAULT;
0609             DRM_DEBUG("Failed to copy post dep handles\n");
0610             goto fail;
0611         }
0612 
0613         se->out_syncs[i].syncobj = drm_syncobj_find(file_priv,
0614                                 out.handle);
0615         if (!se->out_syncs[i].syncobj) {
0616             ret = -EINVAL;
0617             goto fail;
0618         }
0619     }
0620     se->out_sync_count = count;
0621 
0622     return 0;
0623 
0624 fail:
0625     for (i--; i >= 0; i--)
0626         drm_syncobj_put(se->out_syncs[i].syncobj);
0627     kvfree(se->out_syncs);
0628 
0629     return ret;
0630 }
0631 
0632 /* Get data for multiple binary semaphores synchronization. Parse syncobj
0633  * to be signaled when job completes (out_sync).
0634  */
0635 static int
0636 v3d_get_multisync_submit_deps(struct drm_file *file_priv,
0637                   struct drm_v3d_extension __user *ext,
0638                   void *data)
0639 {
0640     struct drm_v3d_multi_sync multisync;
0641     struct v3d_submit_ext *se = data;
0642     int ret;
0643 
0644     if (copy_from_user(&multisync, ext, sizeof(multisync)))
0645         return -EFAULT;
0646 
0647     if (multisync.pad)
0648         return -EINVAL;
0649 
0650     ret = v3d_get_multisync_post_deps(file_priv, data, multisync.out_sync_count,
0651                       multisync.out_syncs);
0652     if (ret)
0653         return ret;
0654 
0655     se->in_sync_count = multisync.in_sync_count;
0656     se->in_syncs = multisync.in_syncs;
0657     se->flags |= DRM_V3D_EXT_ID_MULTI_SYNC;
0658     se->wait_stage = multisync.wait_stage;
0659 
0660     return 0;
0661 }
0662 
0663 /* Whenever userspace sets ioctl extensions, v3d_get_extensions parses data
0664  * according to the extension id (name).
0665  */
0666 static int
0667 v3d_get_extensions(struct drm_file *file_priv,
0668            u64 ext_handles,
0669            void *data)
0670 {
0671     struct drm_v3d_extension __user *user_ext;
0672     int ret;
0673 
0674     user_ext = u64_to_user_ptr(ext_handles);
0675     while (user_ext) {
0676         struct drm_v3d_extension ext;
0677 
0678         if (copy_from_user(&ext, user_ext, sizeof(ext))) {
0679             DRM_DEBUG("Failed to copy submit extension\n");
0680             return -EFAULT;
0681         }
0682 
0683         switch (ext.id) {
0684         case DRM_V3D_EXT_ID_MULTI_SYNC:
0685             ret = v3d_get_multisync_submit_deps(file_priv, user_ext, data);
0686             if (ret)
0687                 return ret;
0688             break;
0689         default:
0690             DRM_DEBUG_DRIVER("Unknown extension id: %d\n", ext.id);
0691             return -EINVAL;
0692         }
0693 
0694         user_ext = u64_to_user_ptr(ext.next);
0695     }
0696 
0697     return 0;
0698 }
0699 
0700 /**
0701  * v3d_submit_cl_ioctl() - Submits a job (frame) to the V3D.
0702  * @dev: DRM device
0703  * @data: ioctl argument
0704  * @file_priv: DRM file for this fd
0705  *
0706  * This is the main entrypoint for userspace to submit a 3D frame to
0707  * the GPU.  Userspace provides the binner command list (if
0708  * applicable), and the kernel sets up the render command list to draw
0709  * to the framebuffer described in the ioctl, using the command lists
0710  * that the 3D engine's binner will produce.
0711  */
0712 int
0713 v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
0714             struct drm_file *file_priv)
0715 {
0716     struct v3d_dev *v3d = to_v3d_dev(dev);
0717     struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
0718     struct drm_v3d_submit_cl *args = data;
0719     struct v3d_submit_ext se = {0};
0720     struct v3d_bin_job *bin = NULL;
0721     struct v3d_render_job *render = NULL;
0722     struct v3d_job *clean_job = NULL;
0723     struct v3d_job *last_job;
0724     struct ww_acquire_ctx acquire_ctx;
0725     int ret = 0;
0726 
0727     trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args->rcl_end);
0728 
0729     if (args->pad)
0730         return -EINVAL;
0731 
0732     if (args->flags &&
0733         args->flags & ~(DRM_V3D_SUBMIT_CL_FLUSH_CACHE |
0734                 DRM_V3D_SUBMIT_EXTENSION)) {
0735         DRM_INFO("invalid flags: %d\n", args->flags);
0736         return -EINVAL;
0737     }
0738 
0739     if (args->flags & DRM_V3D_SUBMIT_EXTENSION) {
0740         ret = v3d_get_extensions(file_priv, args->extensions, &se);
0741         if (ret) {
0742             DRM_DEBUG("Failed to get extensions.\n");
0743             return ret;
0744         }
0745     }
0746 
0747     ret = v3d_job_init(v3d, file_priv, (void *)&render, sizeof(*render),
0748                v3d_render_job_free, args->in_sync_rcl, &se, V3D_RENDER);
0749     if (ret)
0750         goto fail;
0751 
0752     render->start = args->rcl_start;
0753     render->end = args->rcl_end;
0754     INIT_LIST_HEAD(&render->unref_list);
0755 
0756     if (args->bcl_start != args->bcl_end) {
0757         ret = v3d_job_init(v3d, file_priv, (void *)&bin, sizeof(*bin),
0758                    v3d_job_free, args->in_sync_bcl, &se, V3D_BIN);
0759         if (ret)
0760             goto fail;
0761 
0762         bin->start = args->bcl_start;
0763         bin->end = args->bcl_end;
0764         bin->qma = args->qma;
0765         bin->qms = args->qms;
0766         bin->qts = args->qts;
0767         bin->render = render;
0768     }
0769 
0770     if (args->flags & DRM_V3D_SUBMIT_CL_FLUSH_CACHE) {
0771         ret = v3d_job_init(v3d, file_priv, (void *)&clean_job, sizeof(*clean_job),
0772                    v3d_job_free, 0, NULL, V3D_CACHE_CLEAN);
0773         if (ret)
0774             goto fail;
0775 
0776         last_job = clean_job;
0777     } else {
0778         last_job = &render->base;
0779     }
0780 
0781     ret = v3d_lookup_bos(dev, file_priv, last_job,
0782                  args->bo_handles, args->bo_handle_count);
0783     if (ret)
0784         goto fail;
0785 
0786     ret = v3d_lock_bo_reservations(last_job, &acquire_ctx);
0787     if (ret)
0788         goto fail;
0789 
0790     if (args->perfmon_id) {
0791         render->base.perfmon = v3d_perfmon_find(v3d_priv,
0792                             args->perfmon_id);
0793 
0794         if (!render->base.perfmon) {
0795             ret = -ENOENT;
0796             goto fail_perfmon;
0797         }
0798     }
0799 
0800     mutex_lock(&v3d->sched_lock);
0801     if (bin) {
0802         bin->base.perfmon = render->base.perfmon;
0803         v3d_perfmon_get(bin->base.perfmon);
0804         v3d_push_job(&bin->base);
0805 
0806         ret = drm_sched_job_add_dependency(&render->base.base,
0807                            dma_fence_get(bin->base.done_fence));
0808         if (ret)
0809             goto fail_unreserve;
0810     }
0811 
0812     v3d_push_job(&render->base);
0813 
0814     if (clean_job) {
0815         struct dma_fence *render_fence =
0816             dma_fence_get(render->base.done_fence);
0817         ret = drm_sched_job_add_dependency(&clean_job->base,
0818                            render_fence);
0819         if (ret)
0820             goto fail_unreserve;
0821         clean_job->perfmon = render->base.perfmon;
0822         v3d_perfmon_get(clean_job->perfmon);
0823         v3d_push_job(clean_job);
0824     }
0825 
0826     mutex_unlock(&v3d->sched_lock);
0827 
0828     v3d_attach_fences_and_unlock_reservation(file_priv,
0829                          last_job,
0830                          &acquire_ctx,
0831                          args->out_sync,
0832                          &se,
0833                          last_job->done_fence);
0834 
0835     if (bin)
0836         v3d_job_put(&bin->base);
0837     v3d_job_put(&render->base);
0838     if (clean_job)
0839         v3d_job_put(clean_job);
0840 
0841     return 0;
0842 
0843 fail_unreserve:
0844     mutex_unlock(&v3d->sched_lock);
0845 fail_perfmon:
0846     drm_gem_unlock_reservations(last_job->bo,
0847                     last_job->bo_count, &acquire_ctx);
0848 fail:
0849     v3d_job_cleanup((void *)bin);
0850     v3d_job_cleanup((void *)render);
0851     v3d_job_cleanup(clean_job);
0852     v3d_put_multisync_post_deps(&se);
0853 
0854     return ret;
0855 }
0856 
0857 /**
0858  * v3d_submit_tfu_ioctl() - Submits a TFU (texture formatting) job to the V3D.
0859  * @dev: DRM device
0860  * @data: ioctl argument
0861  * @file_priv: DRM file for this fd
0862  *
0863  * Userspace provides the register setup for the TFU, which we don't
0864  * need to validate since the TFU is behind the MMU.
0865  */
0866 int
0867 v3d_submit_tfu_ioctl(struct drm_device *dev, void *data,
0868              struct drm_file *file_priv)
0869 {
0870     struct v3d_dev *v3d = to_v3d_dev(dev);
0871     struct drm_v3d_submit_tfu *args = data;
0872     struct v3d_submit_ext se = {0};
0873     struct v3d_tfu_job *job = NULL;
0874     struct ww_acquire_ctx acquire_ctx;
0875     int ret = 0;
0876 
0877     trace_v3d_submit_tfu_ioctl(&v3d->drm, args->iia);
0878 
0879     if (args->flags && !(args->flags & DRM_V3D_SUBMIT_EXTENSION)) {
0880         DRM_DEBUG("invalid flags: %d\n", args->flags);
0881         return -EINVAL;
0882     }
0883 
0884     if (args->flags & DRM_V3D_SUBMIT_EXTENSION) {
0885         ret = v3d_get_extensions(file_priv, args->extensions, &se);
0886         if (ret) {
0887             DRM_DEBUG("Failed to get extensions.\n");
0888             return ret;
0889         }
0890     }
0891 
0892     ret = v3d_job_init(v3d, file_priv, (void *)&job, sizeof(*job),
0893                v3d_job_free, args->in_sync, &se, V3D_TFU);
0894     if (ret)
0895         goto fail;
0896 
0897     job->base.bo = kcalloc(ARRAY_SIZE(args->bo_handles),
0898                    sizeof(*job->base.bo), GFP_KERNEL);
0899     if (!job->base.bo) {
0900         ret = -ENOMEM;
0901         goto fail;
0902     }
0903 
0904     job->args = *args;
0905 
0906     spin_lock(&file_priv->table_lock);
0907     for (job->base.bo_count = 0;
0908          job->base.bo_count < ARRAY_SIZE(args->bo_handles);
0909          job->base.bo_count++) {
0910         struct drm_gem_object *bo;
0911 
0912         if (!args->bo_handles[job->base.bo_count])
0913             break;
0914 
0915         bo = idr_find(&file_priv->object_idr,
0916                   args->bo_handles[job->base.bo_count]);
0917         if (!bo) {
0918             DRM_DEBUG("Failed to look up GEM BO %d: %d\n",
0919                   job->base.bo_count,
0920                   args->bo_handles[job->base.bo_count]);
0921             ret = -ENOENT;
0922             spin_unlock(&file_priv->table_lock);
0923             goto fail;
0924         }
0925         drm_gem_object_get(bo);
0926         job->base.bo[job->base.bo_count] = bo;
0927     }
0928     spin_unlock(&file_priv->table_lock);
0929 
0930     ret = v3d_lock_bo_reservations(&job->base, &acquire_ctx);
0931     if (ret)
0932         goto fail;
0933 
0934     mutex_lock(&v3d->sched_lock);
0935     v3d_push_job(&job->base);
0936     mutex_unlock(&v3d->sched_lock);
0937 
0938     v3d_attach_fences_and_unlock_reservation(file_priv,
0939                          &job->base, &acquire_ctx,
0940                          args->out_sync,
0941                          &se,
0942                          job->base.done_fence);
0943 
0944     v3d_job_put(&job->base);
0945 
0946     return 0;
0947 
0948 fail:
0949     v3d_job_cleanup((void *)job);
0950     v3d_put_multisync_post_deps(&se);
0951 
0952     return ret;
0953 }
0954 
0955 /**
0956  * v3d_submit_csd_ioctl() - Submits a CSD (texture formatting) job to the V3D.
0957  * @dev: DRM device
0958  * @data: ioctl argument
0959  * @file_priv: DRM file for this fd
0960  *
0961  * Userspace provides the register setup for the CSD, which we don't
0962  * need to validate since the CSD is behind the MMU.
0963  */
0964 int
0965 v3d_submit_csd_ioctl(struct drm_device *dev, void *data,
0966              struct drm_file *file_priv)
0967 {
0968     struct v3d_dev *v3d = to_v3d_dev(dev);
0969     struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
0970     struct drm_v3d_submit_csd *args = data;
0971     struct v3d_submit_ext se = {0};
0972     struct v3d_csd_job *job = NULL;
0973     struct v3d_job *clean_job = NULL;
0974     struct ww_acquire_ctx acquire_ctx;
0975     int ret;
0976 
0977     trace_v3d_submit_csd_ioctl(&v3d->drm, args->cfg[5], args->cfg[6]);
0978 
0979     if (args->pad)
0980         return -EINVAL;
0981 
0982     if (!v3d_has_csd(v3d)) {
0983         DRM_DEBUG("Attempting CSD submit on non-CSD hardware\n");
0984         return -EINVAL;
0985     }
0986 
0987     if (args->flags && !(args->flags & DRM_V3D_SUBMIT_EXTENSION)) {
0988         DRM_INFO("invalid flags: %d\n", args->flags);
0989         return -EINVAL;
0990     }
0991 
0992     if (args->flags & DRM_V3D_SUBMIT_EXTENSION) {
0993         ret = v3d_get_extensions(file_priv, args->extensions, &se);
0994         if (ret) {
0995             DRM_DEBUG("Failed to get extensions.\n");
0996             return ret;
0997         }
0998     }
0999 
1000     ret = v3d_job_init(v3d, file_priv, (void *)&job, sizeof(*job),
1001                v3d_job_free, args->in_sync, &se, V3D_CSD);
1002     if (ret)
1003         goto fail;
1004 
1005     ret = v3d_job_init(v3d, file_priv, (void *)&clean_job, sizeof(*clean_job),
1006                v3d_job_free, 0, NULL, V3D_CACHE_CLEAN);
1007     if (ret)
1008         goto fail;
1009 
1010     job->args = *args;
1011 
1012     ret = v3d_lookup_bos(dev, file_priv, clean_job,
1013                  args->bo_handles, args->bo_handle_count);
1014     if (ret)
1015         goto fail;
1016 
1017     ret = v3d_lock_bo_reservations(clean_job, &acquire_ctx);
1018     if (ret)
1019         goto fail;
1020 
1021     if (args->perfmon_id) {
1022         job->base.perfmon = v3d_perfmon_find(v3d_priv,
1023                              args->perfmon_id);
1024         if (!job->base.perfmon) {
1025             ret = -ENOENT;
1026             goto fail_perfmon;
1027         }
1028     }
1029 
1030     mutex_lock(&v3d->sched_lock);
1031     v3d_push_job(&job->base);
1032 
1033     ret = drm_sched_job_add_dependency(&clean_job->base,
1034                        dma_fence_get(job->base.done_fence));
1035     if (ret)
1036         goto fail_unreserve;
1037 
1038     v3d_push_job(clean_job);
1039     mutex_unlock(&v3d->sched_lock);
1040 
1041     v3d_attach_fences_and_unlock_reservation(file_priv,
1042                          clean_job,
1043                          &acquire_ctx,
1044                          args->out_sync,
1045                          &se,
1046                          clean_job->done_fence);
1047 
1048     v3d_job_put(&job->base);
1049     v3d_job_put(clean_job);
1050 
1051     return 0;
1052 
1053 fail_unreserve:
1054     mutex_unlock(&v3d->sched_lock);
1055 fail_perfmon:
1056     drm_gem_unlock_reservations(clean_job->bo, clean_job->bo_count,
1057                     &acquire_ctx);
1058 fail:
1059     v3d_job_cleanup((void *)job);
1060     v3d_job_cleanup(clean_job);
1061     v3d_put_multisync_post_deps(&se);
1062 
1063     return ret;
1064 }
1065 
1066 int
1067 v3d_gem_init(struct drm_device *dev)
1068 {
1069     struct v3d_dev *v3d = to_v3d_dev(dev);
1070     u32 pt_size = 4096 * 1024;
1071     int ret, i;
1072 
1073     for (i = 0; i < V3D_MAX_QUEUES; i++)
1074         v3d->queue[i].fence_context = dma_fence_context_alloc(1);
1075 
1076     spin_lock_init(&v3d->mm_lock);
1077     spin_lock_init(&v3d->job_lock);
1078     mutex_init(&v3d->bo_lock);
1079     mutex_init(&v3d->reset_lock);
1080     mutex_init(&v3d->sched_lock);
1081     mutex_init(&v3d->cache_clean_lock);
1082 
1083     /* Note: We don't allocate address 0.  Various bits of HW
1084      * treat 0 as special, such as the occlusion query counters
1085      * where 0 means "disabled".
1086      */
1087     drm_mm_init(&v3d->mm, 1, pt_size / sizeof(u32) - 1);
1088 
1089     v3d->pt = dma_alloc_wc(v3d->drm.dev, pt_size,
1090                    &v3d->pt_paddr,
1091                    GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO);
1092     if (!v3d->pt) {
1093         drm_mm_takedown(&v3d->mm);
1094         dev_err(v3d->drm.dev,
1095             "Failed to allocate page tables. Please ensure you have CMA enabled.\n");
1096         return -ENOMEM;
1097     }
1098 
1099     v3d_init_hw_state(v3d);
1100     v3d_mmu_set_page_table(v3d);
1101 
1102     ret = v3d_sched_init(v3d);
1103     if (ret) {
1104         drm_mm_takedown(&v3d->mm);
1105         dma_free_coherent(v3d->drm.dev, 4096 * 1024, (void *)v3d->pt,
1106                   v3d->pt_paddr);
1107     }
1108 
1109     return 0;
1110 }
1111 
1112 void
1113 v3d_gem_destroy(struct drm_device *dev)
1114 {
1115     struct v3d_dev *v3d = to_v3d_dev(dev);
1116 
1117     v3d_sched_fini(v3d);
1118 
1119     /* Waiting for jobs to finish would need to be done before
1120      * unregistering V3D.
1121      */
1122     WARN_ON(v3d->bin_job);
1123     WARN_ON(v3d->render_job);
1124 
1125     drm_mm_takedown(&v3d->mm);
1126 
1127     dma_free_coherent(v3d->drm.dev, 4096 * 1024, (void *)v3d->pt,
1128               v3d->pt_paddr);
1129 }