i915/gem/i915_gem_execbuffer.c

0001 /*
0002  * SPDX-License-Identifier: MIT
0003  *
0004  * Copyright © 2008,2010 Intel Corporation
0005  */
0006
0007 #include <linux/dma-resv.h>
0008 #include <linux/highmem.h>
0009 #include <linux/sync_file.h>
0010 #include <linux/uaccess.h>
0011
0012 #include <drm/drm_syncobj.h>
0013
0014 #include "display/intel_frontbuffer.h"
0015
0016 #include "gem/i915_gem_ioctls.h"
0017 #include "gt/intel_context.h"
0018 #include "gt/intel_gpu_commands.h"
0019 #include "gt/intel_gt.h"
0020 #include "gt/intel_gt_buffer_pool.h"
0021 #include "gt/intel_gt_pm.h"
0022 #include "gt/intel_ring.h"
0023
0024 #include "pxp/intel_pxp.h"
0025
0026 #include "i915_cmd_parser.h"
0027 #include "i915_drv.h"
0028 #include "i915_file_private.h"
0029 #include "i915_gem_clflush.h"
0030 #include "i915_gem_context.h"
0031 #include "i915_gem_evict.h"
0032 #include "i915_gem_ioctls.h"
0033 #include "i915_trace.h"
0034 #include "i915_user_extensions.h"
0035
0036 struct eb_vma {
0037     struct i915_vma *vma;
0038     unsigned int flags;
0039
0040     /** This vma's place in the execbuf reservation list */
0041     struct drm_i915_gem_exec_object2 *exec;
0042     struct list_head bind_link;
0043     struct list_head reloc_link;
0044
0045     struct hlist_node node;
0046     u32 handle;
0047 };
0048
0049 enum {
0050     FORCE_CPU_RELOC = 1,
0051     FORCE_GTT_RELOC,
0052     FORCE_GPU_RELOC,
0053 #define DBG_FORCE_RELOC 0 /* choose one of the above! */
0054 };
0055
0056 /* __EXEC_OBJECT_NO_RESERVE is BIT(31), defined in i915_vma.h */
0057 #define __EXEC_OBJECT_HAS_PIN       BIT(30)
0058 #define __EXEC_OBJECT_HAS_FENCE     BIT(29)
0059 #define __EXEC_OBJECT_USERPTR_INIT  BIT(28)
0060 #define __EXEC_OBJECT_NEEDS_MAP     BIT(27)
0061 #define __EXEC_OBJECT_NEEDS_BIAS    BIT(26)
0062 #define __EXEC_OBJECT_INTERNAL_FLAGS    (~0u << 26) /* all of the above + */
0063 #define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE)
0064
0065 #define __EXEC_HAS_RELOC    BIT(31)
0066 #define __EXEC_ENGINE_PINNED    BIT(30)
0067 #define __EXEC_USERPTR_USED BIT(29)
0068 #define __EXEC_INTERNAL_FLAGS   (~0u << 29)
0069 #define UPDATE          PIN_OFFSET_FIXED
0070
0071 #define BATCH_OFFSET_BIAS (256*1024)
0072
0073 #define __I915_EXEC_ILLEGAL_FLAGS \
0074     (__I915_EXEC_UNKNOWN_FLAGS | \
0075      I915_EXEC_CONSTANTS_MASK  | \
0076      I915_EXEC_RESOURCE_STREAMER)
0077
0078 /* Catch emission of unexpected errors for CI! */
0079 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
0080 #undef EINVAL
0081 #define EINVAL ({ \
0082     DRM_DEBUG_DRIVER("EINVAL at %s:%d\n", __func__, __LINE__); \
0083     22; \
0084 })
0085 #endif
0086
0087 /**
0088  * DOC: User command execution
0089  *
0090  * Userspace submits commands to be executed on the GPU as an instruction
0091  * stream within a GEM object we call a batchbuffer. This instructions may
0092  * refer to other GEM objects containing auxiliary state such as kernels,
0093  * samplers, render targets and even secondary batchbuffers. Userspace does
0094  * not know where in the GPU memory these objects reside and so before the
0095  * batchbuffer is passed to the GPU for execution, those addresses in the
0096  * batchbuffer and auxiliary objects are updated. This is known as relocation,
0097  * or patching. To try and avoid having to relocate each object on the next
0098  * execution, userspace is told the location of those objects in this pass,
0099  * but this remains just a hint as the kernel may choose a new location for
0100  * any object in the future.
0101  *
0102  * At the level of talking to the hardware, submitting a batchbuffer for the
0103  * GPU to execute is to add content to a buffer from which the HW
0104  * command streamer is reading.
0105  *
0106  * 1. Add a command to load the HW context. For Logical Ring Contexts, i.e.
0107  *    Execlists, this command is not placed on the same buffer as the
0108  *    remaining items.
0109  *
0110  * 2. Add a command to invalidate caches to the buffer.
0111  *
0112  * 3. Add a batchbuffer start command to the buffer; the start command is
0113  *    essentially a token together with the GPU address of the batchbuffer
0114  *    to be executed.
0115  *
0116  * 4. Add a pipeline flush to the buffer.
0117  *
0118  * 5. Add a memory write command to the buffer to record when the GPU
0119  *    is done executing the batchbuffer. The memory write writes the
0120  *    global sequence number of the request, ``i915_request::global_seqno``;
0121  *    the i915 driver uses the current value in the register to determine
0122  *    if the GPU has completed the batchbuffer.
0123  *
0124  * 6. Add a user interrupt command to the buffer. This command instructs
0125  *    the GPU to issue an interrupt when the command, pipeline flush and
0126  *    memory write are completed.
0127  *
0128  * 7. Inform the hardware of the additional commands added to the buffer
0129  *    (by updating the tail pointer).
0130  *
0131  * Processing an execbuf ioctl is conceptually split up into a few phases.
0132  *
0133  * 1. Validation - Ensure all the pointers, handles and flags are valid.
0134  * 2. Reservation - Assign GPU address space for every object
0135  * 3. Relocation - Update any addresses to point to the final locations
0136  * 4. Serialisation - Order the request with respect to its dependencies
0137  * 5. Construction - Construct a request to execute the batchbuffer
0138  * 6. Submission (at some point in the future execution)
0139  *
0140  * Reserving resources for the execbuf is the most complicated phase. We
0141  * neither want to have to migrate the object in the address space, nor do
0142  * we want to have to update any relocations pointing to this object. Ideally,
0143  * we want to leave the object where it is and for all the existing relocations
0144  * to match. If the object is given a new address, or if userspace thinks the
0145  * object is elsewhere, we have to parse all the relocation entries and update
0146  * the addresses. Userspace can set the I915_EXEC_NORELOC flag to hint that
0147  * all the target addresses in all of its objects match the value in the
0148  * relocation entries and that they all match the presumed offsets given by the
0149  * list of execbuffer objects. Using this knowledge, we know that if we haven't
0150  * moved any buffers, all the relocation entries are valid and we can skip
0151  * the update. (If userspace is wrong, the likely outcome is an impromptu GPU
0152  * hang.) The requirement for using I915_EXEC_NO_RELOC are:
0153  *
0154  *      The addresses written in the objects must match the corresponding
0155  *      reloc.presumed_offset which in turn must match the corresponding
0156  *      execobject.offset.
0157  *
0158  *      Any render targets written to in the batch must be flagged with
0159  *      EXEC_OBJECT_WRITE.
0160  *
0161  *      To avoid stalling, execobject.offset should match the current
0162  *      address of that object within the active context.
0163  *
0164  * The reservation is done is multiple phases. First we try and keep any
0165  * object already bound in its current location - so as long as meets the
0166  * constraints imposed by the new execbuffer. Any object left unbound after the
0167  * first pass is then fitted into any available idle space. If an object does
0168  * not fit, all objects are removed from the reservation and the process rerun
0169  * after sorting the objects into a priority order (more difficult to fit
0170  * objects are tried first). Failing that, the entire VM is cleared and we try
0171  * to fit the execbuf once last time before concluding that it simply will not
0172  * fit.
0173  *
0174  * A small complication to all of this is that we allow userspace not only to
0175  * specify an alignment and a size for the object in the address space, but
0176  * we also allow userspace to specify the exact offset. This objects are
0177  * simpler to place (the location is known a priori) all we have to do is make
0178  * sure the space is available.
0179  *
0180  * Once all the objects are in place, patching up the buried pointers to point
0181  * to the final locations is a fairly simple job of walking over the relocation
0182  * entry arrays, looking up the right address and rewriting the value into
0183  * the object. Simple! ... The relocation entries are stored in user memory
0184  * and so to access them we have to copy them into a local buffer. That copy
0185  * has to avoid taking any pagefaults as they may lead back to a GEM object
0186  * requiring the struct_mutex (i.e. recursive deadlock). So once again we split
0187  * the relocation into multiple passes. First we try to do everything within an
0188  * atomic context (avoid the pagefaults) which requires that we never wait. If
0189  * we detect that we may wait, or if we need to fault, then we have to fallback
0190  * to a slower path. The slowpath has to drop the mutex. (Can you hear alarm
0191  * bells yet?) Dropping the mutex means that we lose all the state we have
0192  * built up so far for the execbuf and we must reset any global data. However,
0193  * we do leave the objects pinned in their final locations - which is a
0194  * potential issue for concurrent execbufs. Once we have left the mutex, we can
0195  * allocate and copy all the relocation entries into a large array at our
0196  * leisure, reacquire the mutex, reclaim all the objects and other state and
0197  * then proceed to update any incorrect addresses with the objects.
0198  *
0199  * As we process the relocation entries, we maintain a record of whether the
0200  * object is being written to. Using NORELOC, we expect userspace to provide
0201  * this information instead. We also check whether we can skip the relocation
0202  * by comparing the expected value inside the relocation entry with the target's
0203  * final address. If they differ, we have to map the current object and rewrite
0204  * the 4 or 8 byte pointer within.
0205  *
0206  * Serialising an execbuf is quite simple according to the rules of the GEM
0207  * ABI. Execution within each context is ordered by the order of submission.
0208  * Writes to any GEM object are in order of submission and are exclusive. Reads
0209  * from a GEM object are unordered with respect to other reads, but ordered by
0210  * writes. A write submitted after a read cannot occur before the read, and
0211  * similarly any read submitted after a write cannot occur before the write.
0212  * Writes are ordered between engines such that only one write occurs at any
0213  * time (completing any reads beforehand) - using semaphores where available
0214  * and CPU serialisation otherwise. Other GEM access obey the same rules, any
0215  * write (either via mmaps using set-domain, or via pwrite) must flush all GPU
0216  * reads before starting, and any read (either using set-domain or pread) must
0217  * flush all GPU writes before starting. (Note we only employ a barrier before,
0218  * we currently rely on userspace not concurrently starting a new execution
0219  * whilst reading or writing to an object. This may be an advantage or not
0220  * depending on how much you trust userspace not to shoot themselves in the
0221  * foot.) Serialisation may just result in the request being inserted into
0222  * a DAG awaiting its turn, but most simple is to wait on the CPU until
0223  * all dependencies are resolved.
0224  *
0225  * After all of that, is just a matter of closing the request and handing it to
0226  * the hardware (well, leaving it in a queue to be executed). However, we also
0227  * offer the ability for batchbuffers to be run with elevated privileges so
0228  * that they access otherwise hidden registers. (Used to adjust L3 cache etc.)
0229  * Before any batch is given extra privileges we first must check that it
0230  * contains no nefarious instructions, we check that each instruction is from
0231  * our whitelist and all registers are also from an allowed list. We first
0232  * copy the user's batchbuffer to a shadow (so that the user doesn't have
0233  * access to it, either by the CPU or GPU as we scan it) and then parse each
0234  * instruction. If everything is ok, we set a flag telling the hardware to run
0235  * the batchbuffer in trusted mode, otherwise the ioctl is rejected.
0236  */
0237
0238 struct eb_fence {
0239     struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */
0240     struct dma_fence *dma_fence;
0241     u64 value;
0242     struct dma_fence_chain *chain_fence;
0243 };
0244
0245 struct i915_execbuffer {
0246     struct drm_i915_private *i915; /** i915 backpointer */
0247     struct drm_file *file; /** per-file lookup tables and limits */
0248     struct drm_i915_gem_execbuffer2 *args; /** ioctl parameters */
0249     struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */
0250     struct eb_vma *vma;
0251
0252     struct intel_gt *gt; /* gt for the execbuf */
0253     struct intel_context *context; /* logical state for the request */
0254     struct i915_gem_context *gem_context; /** caller's context */
0255
0256     /** our requests to build */
0257     struct i915_request *requests[MAX_ENGINE_INSTANCE + 1];
0258     /** identity of the batch obj/vma */
0259     struct eb_vma *batches[MAX_ENGINE_INSTANCE + 1];
0260     struct i915_vma *trampoline; /** trampoline used for chaining */
0261
0262     /** used for excl fence in dma_resv objects when > 1 BB submitted */
0263     struct dma_fence *composite_fence;
0264
0265     /** actual size of execobj[] as we may extend it for the cmdparser */
0266     unsigned int buffer_count;
0267
0268     /* number of batches in execbuf IOCTL */
0269     unsigned int num_batches;
0270
0271     /** list of vma not yet bound during reservation phase */
0272     struct list_head unbound;
0273
0274     /** list of vma that have execobj.relocation_count */
0275     struct list_head relocs;
0276
0277     struct i915_gem_ww_ctx ww;
0278
0279     /**
0280      * Track the most recently used object for relocations, as we
0281      * frequently have to perform multiple relocations within the same
0282      * obj/page
0283      */
0284     struct reloc_cache {
0285         struct drm_mm_node node; /** temporary GTT binding */
0286         unsigned long vaddr; /** Current kmap address */
0287         unsigned long page; /** Currently mapped page index */
0288         unsigned int graphics_ver; /** Cached value of GRAPHICS_VER */
0289         bool use_64bit_reloc : 1;
0290         bool has_llc : 1;
0291         bool has_fence : 1;
0292         bool needs_unfenced : 1;
0293     } reloc_cache;
0294
0295     u64 invalid_flags; /** Set of execobj.flags that are invalid */
0296
0297     /** Length of batch within object */
0298     u64 batch_len[MAX_ENGINE_INSTANCE + 1];
0299     u32 batch_start_offset; /** Location within object of batch */
0300     u32 batch_flags; /** Flags composed for emit_bb_start() */
0301     struct intel_gt_buffer_pool_node *batch_pool; /** pool node for batch buffer */
0302
0303     /**
0304      * Indicate either the size of the hastable used to resolve
0305      * relocation handles, or if negative that we are using a direct
0306      * index into the execobj[].
0307      */
0308     int lut_size;
0309     struct hlist_head *buckets; /** ht for relocation handles */
0310
0311     struct eb_fence *fences;
0312     unsigned long num_fences;
0313 #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
0314     struct i915_capture_list *capture_lists[MAX_ENGINE_INSTANCE + 1];
0315 #endif
0316 };
0317
0318 static int eb_parse(struct i915_execbuffer *eb);
0319 static int eb_pin_engine(struct i915_execbuffer *eb, bool throttle);
0320 static void eb_unpin_engine(struct i915_execbuffer *eb);
0321 static void eb_capture_release(struct i915_execbuffer *eb);
0322
0323 static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
0324 {
0325     return intel_engine_requires_cmd_parser(eb->context->engine) ||
0326         (intel_engine_using_cmd_parser(eb->context->engine) &&
0327          eb->args->batch_len);
0328 }
0329
0330 static int eb_create(struct i915_execbuffer *eb)
0331 {
0332     if (!(eb->args->flags & I915_EXEC_HANDLE_LUT)) {
0333         unsigned int size = 1 + ilog2(eb->buffer_count);
0334
0335         /*
0336          * Without a 1:1 association between relocation handles and
0337          * the execobject[] index, we instead create a hashtable.
0338          * We size it dynamically based on available memory, starting
0339          * first with 1:1 assocative hash and scaling back until
0340          * the allocation succeeds.
0341          *
0342          * Later on we use a positive lut_size to indicate we are
0343          * using this hashtable, and a negative value to indicate a
0344          * direct lookup.
0345          */
0346         do {
0347             gfp_t flags;
0348
0349             /* While we can still reduce the allocation size, don't
0350              * raise a warning and allow the allocation to fail.
0351              * On the last pass though, we want to try as hard
0352              * as possible to perform the allocation and warn
0353              * if it fails.
0354              */
0355             flags = GFP_KERNEL;
0356             if (size > 1)
0357                 flags |= __GFP_NORETRY | __GFP_NOWARN;
0358
0359             eb->buckets = kzalloc(sizeof(struct hlist_head) << size,
0360                           flags);
0361             if (eb->buckets)
0362                 break;
0363         } while (--size);
0364
0365         if (unlikely(!size))
0366             return -ENOMEM;
0367
0368         eb->lut_size = size;
0369     } else {
0370         eb->lut_size = -eb->buffer_count;
0371     }
0372
0373     return 0;
0374 }
0375
0376 static bool
0377 eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry,
0378          const struct i915_vma *vma,
0379          unsigned int flags)
0380 {
0381     if (vma->node.size < entry->pad_to_size)
0382         return true;
0383
0384     if (entry->alignment && !IS_ALIGNED(vma->node.start, entry->alignment))
0385         return true;
0386
0387     if (flags & EXEC_OBJECT_PINNED &&
0388         vma->node.start != entry->offset)
0389         return true;
0390
0391     if (flags & __EXEC_OBJECT_NEEDS_BIAS &&
0392         vma->node.start < BATCH_OFFSET_BIAS)
0393         return true;
0394
0395     if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) &&
0396         (vma->node.start + vma->node.size + 4095) >> 32)
0397         return true;
0398
0399     if (flags & __EXEC_OBJECT_NEEDS_MAP &&
0400         !i915_vma_is_map_and_fenceable(vma))
0401         return true;
0402
0403     return false;
0404 }
0405
0406 static u64 eb_pin_flags(const struct drm_i915_gem_exec_object2 *entry,
0407             unsigned int exec_flags)
0408 {
0409     u64 pin_flags = 0;
0410
0411     if (exec_flags & EXEC_OBJECT_NEEDS_GTT)
0412         pin_flags |= PIN_GLOBAL;
0413
0414     /*
0415      * Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset,
0416      * limit address to the first 4GBs for unflagged objects.
0417      */
0418     if (!(exec_flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
0419         pin_flags |= PIN_ZONE_4G;
0420
0421     if (exec_flags & __EXEC_OBJECT_NEEDS_MAP)
0422         pin_flags |= PIN_MAPPABLE;
0423
0424     if (exec_flags & EXEC_OBJECT_PINNED)
0425         pin_flags |= entry->offset | PIN_OFFSET_FIXED;
0426     else if (exec_flags & __EXEC_OBJECT_NEEDS_BIAS)
0427         pin_flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
0428
0429     return pin_flags;
0430 }
0431
0432 static inline int
0433 eb_pin_vma(struct i915_execbuffer *eb,
0434        const struct drm_i915_gem_exec_object2 *entry,
0435        struct eb_vma *ev)
0436 {
0437     struct i915_vma *vma = ev->vma;
0438     u64 pin_flags;
0439     int err;
0440
0441     if (vma->node.size)
0442         pin_flags = vma->node.start;
0443     else
0444         pin_flags = entry->offset & PIN_OFFSET_MASK;
0445
0446     pin_flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED | PIN_VALIDATE;
0447     if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_GTT))
0448         pin_flags |= PIN_GLOBAL;
0449
0450     /* Attempt to reuse the current location if available */
0451     err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, pin_flags);
0452     if (err == -EDEADLK)
0453         return err;
0454
0455     if (unlikely(err)) {
0456         if (entry->flags & EXEC_OBJECT_PINNED)
0457             return err;
0458
0459         /* Failing that pick any _free_ space if suitable */
0460         err = i915_vma_pin_ww(vma, &eb->ww,
0461                          entry->pad_to_size,
0462                          entry->alignment,
0463                          eb_pin_flags(entry, ev->flags) |
0464                          PIN_USER | PIN_NOEVICT | PIN_VALIDATE);
0465         if (unlikely(err))
0466             return err;
0467     }
0468
0469     if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) {
0470         err = i915_vma_pin_fence(vma);
0471         if (unlikely(err))
0472             return err;
0473
0474         if (vma->fence)
0475             ev->flags |= __EXEC_OBJECT_HAS_FENCE;
0476     }
0477
0478     ev->flags |= __EXEC_OBJECT_HAS_PIN;
0479     if (eb_vma_misplaced(entry, vma, ev->flags))
0480         return -EBADSLT;
0481
0482     return 0;
0483 }
0484
0485 static inline void
0486 eb_unreserve_vma(struct eb_vma *ev)
0487 {
0488     if (unlikely(ev->flags & __EXEC_OBJECT_HAS_FENCE))
0489         __i915_vma_unpin_fence(ev->vma);
0490
0491     ev->flags &= ~__EXEC_OBJECT_RESERVED;
0492 }
0493
0494 static int
0495 eb_validate_vma(struct i915_execbuffer *eb,
0496         struct drm_i915_gem_exec_object2 *entry,
0497         struct i915_vma *vma)
0498 {
0499     /* Relocations are disallowed for all platforms after TGL-LP.  This
0500      * also covers all platforms with local memory.
0501      */
0502     if (entry->relocation_count &&
0503         GRAPHICS_VER(eb->i915) >= 12 && !IS_TIGERLAKE(eb->i915))
0504         return -EINVAL;
0505
0506     if (unlikely(entry->flags & eb->invalid_flags))
0507         return -EINVAL;
0508
0509     if (unlikely(entry->alignment &&
0510              !is_power_of_2_u64(entry->alignment)))
0511         return -EINVAL;
0512
0513     /*
0514      * Offset can be used as input (EXEC_OBJECT_PINNED), reject
0515      * any non-page-aligned or non-canonical addresses.
0516      */
0517     if (unlikely(entry->flags & EXEC_OBJECT_PINNED &&
0518              entry->offset != gen8_canonical_addr(entry->offset & I915_GTT_PAGE_MASK)))
0519         return -EINVAL;
0520
0521     /* pad_to_size was once a reserved field, so sanitize it */
0522     if (entry->flags & EXEC_OBJECT_PAD_TO_SIZE) {
0523         if (unlikely(offset_in_page(entry->pad_to_size)))
0524             return -EINVAL;
0525     } else {
0526         entry->pad_to_size = 0;
0527     }
0528     /*
0529      * From drm_mm perspective address space is continuous,
0530      * so from this point we're always using non-canonical
0531      * form internally.
0532      */
0533     entry->offset = gen8_noncanonical_addr(entry->offset);
0534
0535     if (!eb->reloc_cache.has_fence) {
0536         entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
0537     } else {
0538         if ((entry->flags & EXEC_OBJECT_NEEDS_FENCE ||
0539              eb->reloc_cache.needs_unfenced) &&
0540             i915_gem_object_is_tiled(vma->obj))
0541             entry->flags |= EXEC_OBJECT_NEEDS_GTT | __EXEC_OBJECT_NEEDS_MAP;
0542     }
0543
0544     return 0;
0545 }
0546
0547 static inline bool
0548 is_batch_buffer(struct i915_execbuffer *eb, unsigned int buffer_idx)
0549 {
0550     return eb->args->flags & I915_EXEC_BATCH_FIRST ?
0551         buffer_idx < eb->num_batches :
0552         buffer_idx >= eb->args->buffer_count - eb->num_batches;
0553 }
0554
0555 static int
0556 eb_add_vma(struct i915_execbuffer *eb,
0557        unsigned int *current_batch,
0558        unsigned int i,
0559        struct i915_vma *vma)
0560 {
0561     struct drm_i915_private *i915 = eb->i915;
0562     struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
0563     struct eb_vma *ev = &eb->vma[i];
0564
0565     ev->vma = vma;
0566     ev->exec = entry;
0567     ev->flags = entry->flags;
0568
0569     if (eb->lut_size > 0) {
0570         ev->handle = entry->handle;
0571         hlist_add_head(&ev->node,
0572                    &eb->buckets[hash_32(entry->handle,
0573                             eb->lut_size)]);
0574     }
0575
0576     if (entry->relocation_count)
0577         list_add_tail(&ev->reloc_link, &eb->relocs);
0578
0579     /*
0580      * SNA is doing fancy tricks with compressing batch buffers, which leads
0581      * to negative relocation deltas. Usually that works out ok since the
0582      * relocate address is still positive, except when the batch is placed
0583      * very low in the GTT. Ensure this doesn't happen.
0584      *
0585      * Note that actual hangs have only been observed on gen7, but for
0586      * paranoia do it everywhere.
0587      */
0588     if (is_batch_buffer(eb, i)) {
0589         if (entry->relocation_count &&
0590             !(ev->flags & EXEC_OBJECT_PINNED))
0591             ev->flags |= __EXEC_OBJECT_NEEDS_BIAS;
0592         if (eb->reloc_cache.has_fence)
0593             ev->flags |= EXEC_OBJECT_NEEDS_FENCE;
0594
0595         eb->batches[*current_batch] = ev;
0596
0597         if (unlikely(ev->flags & EXEC_OBJECT_WRITE)) {
0598             drm_dbg(&i915->drm,
0599                 "Attempting to use self-modifying batch buffer\n");
0600             return -EINVAL;
0601         }
0602
0603         if (range_overflows_t(u64,
0604                       eb->batch_start_offset,
0605                       eb->args->batch_len,
0606                       ev->vma->size)) {
0607             drm_dbg(&i915->drm, "Attempting to use out-of-bounds batch\n");
0608             return -EINVAL;
0609         }
0610
0611         if (eb->args->batch_len == 0)
0612             eb->batch_len[*current_batch] = ev->vma->size -
0613                 eb->batch_start_offset;
0614         else
0615             eb->batch_len[*current_batch] = eb->args->batch_len;
0616         if (unlikely(eb->batch_len[*current_batch] == 0)) { /* impossible! */
0617             drm_dbg(&i915->drm, "Invalid batch length\n");
0618             return -EINVAL;
0619         }
0620
0621         ++*current_batch;
0622     }
0623
0624     return 0;
0625 }
0626
0627 static inline int use_cpu_reloc(const struct reloc_cache *cache,
0628                 const struct drm_i915_gem_object *obj)
0629 {
0630     if (!i915_gem_object_has_struct_page(obj))
0631         return false;
0632
0633     if (DBG_FORCE_RELOC == FORCE_CPU_RELOC)
0634         return true;
0635
0636     if (DBG_FORCE_RELOC == FORCE_GTT_RELOC)
0637         return false;
0638
0639     return (cache->has_llc ||
0640         obj->cache_dirty ||
0641         obj->cache_level != I915_CACHE_NONE);
0642 }
0643
0644 static int eb_reserve_vma(struct i915_execbuffer *eb,
0645               struct eb_vma *ev,
0646               u64 pin_flags)
0647 {
0648     struct drm_i915_gem_exec_object2 *entry = ev->exec;
0649     struct i915_vma *vma = ev->vma;
0650     int err;
0651
0652     if (drm_mm_node_allocated(&vma->node) &&
0653         eb_vma_misplaced(entry, vma, ev->flags)) {
0654         err = i915_vma_unbind(vma);
0655         if (err)
0656             return err;
0657     }
0658
0659     err = i915_vma_pin_ww(vma, &eb->ww,
0660                entry->pad_to_size, entry->alignment,
0661                eb_pin_flags(entry, ev->flags) | pin_flags);
0662     if (err)
0663         return err;
0664
0665     if (entry->offset != vma->node.start) {
0666         entry->offset = vma->node.start | UPDATE;
0667         eb->args->flags |= __EXEC_HAS_RELOC;
0668     }
0669
0670     if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) {
0671         err = i915_vma_pin_fence(vma);
0672         if (unlikely(err))
0673             return err;
0674
0675         if (vma->fence)
0676             ev->flags |= __EXEC_OBJECT_HAS_FENCE;
0677     }
0678
0679     ev->flags |= __EXEC_OBJECT_HAS_PIN;
0680     GEM_BUG_ON(eb_vma_misplaced(entry, vma, ev->flags));
0681
0682     return 0;
0683 }
0684
0685 static bool eb_unbind(struct i915_execbuffer *eb, bool force)
0686 {
0687     const unsigned int count = eb->buffer_count;
0688     unsigned int i;
0689     struct list_head last;
0690     bool unpinned = false;
0691
0692     /* Resort *all* the objects into priority order */
0693     INIT_LIST_HEAD(&eb->unbound);
0694     INIT_LIST_HEAD(&last);
0695
0696     for (i = 0; i < count; i++) {
0697         struct eb_vma *ev = &eb->vma[i];
0698         unsigned int flags = ev->flags;
0699
0700         if (!force && flags & EXEC_OBJECT_PINNED &&
0701             flags & __EXEC_OBJECT_HAS_PIN)
0702             continue;
0703
0704         unpinned = true;
0705         eb_unreserve_vma(ev);
0706
0707         if (flags & EXEC_OBJECT_PINNED)
0708             /* Pinned must have their slot */
0709             list_add(&ev->bind_link, &eb->unbound);
0710         else if (flags & __EXEC_OBJECT_NEEDS_MAP)
0711             /* Map require the lowest 256MiB (aperture) */
0712             list_add_tail(&ev->bind_link, &eb->unbound);
0713         else if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
0714             /* Prioritise 4GiB region for restricted bo */
0715             list_add(&ev->bind_link, &last);
0716         else
0717             list_add_tail(&ev->bind_link, &last);
0718     }
0719
0720     list_splice_tail(&last, &eb->unbound);
0721     return unpinned;
0722 }
0723
0724 static int eb_reserve(struct i915_execbuffer *eb)
0725 {
0726     struct eb_vma *ev;
0727     unsigned int pass;
0728     int err = 0;
0729     bool unpinned;
0730
0731     /*
0732      * Attempt to pin all of the buffers into the GTT.
0733      * This is done in 2 phases:
0734      *
0735      * 1. Unbind all objects that do not match the GTT constraints for
0736      *    the execbuffer (fenceable, mappable, alignment etc).
0737      * 2. Bind new objects.
0738      *
0739      * This avoid unnecessary unbinding of later objects in order to make
0740      * room for the earlier objects *unless* we need to defragment.
0741      *
0742      * Defragmenting is skipped if all objects are pinned at a fixed location.
0743      */
0744     for (pass = 0; pass <= 2; pass++) {
0745         int pin_flags = PIN_USER | PIN_VALIDATE;
0746
0747         if (pass == 0)
0748             pin_flags |= PIN_NONBLOCK;
0749
0750         if (pass >= 1)
0751             unpinned = eb_unbind(eb, pass == 2);
0752
0753         if (pass == 2) {
0754             err = mutex_lock_interruptible(&eb->context->vm->mutex);
0755             if (!err) {
0756                 err = i915_gem_evict_vm(eb->context->vm, &eb->ww);
0757                 mutex_unlock(&eb->context->vm->mutex);
0758             }
0759             if (err)
0760                 return err;
0761         }
0762
0763         list_for_each_entry(ev, &eb->unbound, bind_link) {
0764             err = eb_reserve_vma(eb, ev, pin_flags);
0765             if (err)
0766                 break;
0767         }
0768
0769         if (err != -ENOSPC)
0770             break;
0771     }
0772
0773     return err;
0774 }
0775
0776 static int eb_select_context(struct i915_execbuffer *eb)
0777 {
0778     struct i915_gem_context *ctx;
0779
0780     ctx = i915_gem_context_lookup(eb->file->driver_priv, eb->args->rsvd1);
0781     if (unlikely(IS_ERR(ctx)))
0782         return PTR_ERR(ctx);
0783
0784     eb->gem_context = ctx;
0785     if (i915_gem_context_has_full_ppgtt(ctx))
0786         eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
0787
0788     return 0;
0789 }
0790
0791 static int __eb_add_lut(struct i915_execbuffer *eb,
0792             u32 handle, struct i915_vma *vma)
0793 {
0794     struct i915_gem_context *ctx = eb->gem_context;
0795     struct i915_lut_handle *lut;
0796     int err;
0797
0798     lut = i915_lut_handle_alloc();
0799     if (unlikely(!lut))
0800         return -ENOMEM;
0801
0802     i915_vma_get(vma);
0803     if (!atomic_fetch_inc(&vma->open_count))
0804         i915_vma_reopen(vma);
0805     lut->handle = handle;
0806     lut->ctx = ctx;
0807
0808     /* Check that the context hasn't been closed in the meantime */
0809     err = -EINTR;
0810     if (!mutex_lock_interruptible(&ctx->lut_mutex)) {
0811         if (likely(!i915_gem_context_is_closed(ctx)))
0812             err = radix_tree_insert(&ctx->handles_vma, handle, vma);
0813         else
0814             err = -ENOENT;
0815         if (err == 0) { /* And nor has this handle */
0816             struct drm_i915_gem_object *obj = vma->obj;
0817
0818             spin_lock(&obj->lut_lock);
0819             if (idr_find(&eb->file->object_idr, handle) == obj) {
0820                 list_add(&lut->obj_link, &obj->lut_list);
0821             } else {
0822                 radix_tree_delete(&ctx->handles_vma, handle);
0823                 err = -ENOENT;
0824             }
0825             spin_unlock(&obj->lut_lock);
0826         }
0827         mutex_unlock(&ctx->lut_mutex);
0828     }
0829     if (unlikely(err))
0830         goto err;
0831
0832     return 0;
0833
0834 err:
0835     i915_vma_close(vma);
0836     i915_vma_put(vma);
0837     i915_lut_handle_free(lut);
0838     return err;
0839 }
0840
0841 static struct i915_vma *eb_lookup_vma(struct i915_execbuffer *eb, u32 handle)
0842 {
0843     struct i915_address_space *vm = eb->context->vm;
0844
0845     do {
0846         struct drm_i915_gem_object *obj;
0847         struct i915_vma *vma;
0848         int err;
0849
0850         rcu_read_lock();
0851         vma = radix_tree_lookup(&eb->gem_context->handles_vma, handle);
0852         if (likely(vma && vma->vm == vm))
0853             vma = i915_vma_tryget(vma);
0854         rcu_read_unlock();
0855         if (likely(vma))
0856             return vma;
0857
0858         obj = i915_gem_object_lookup(eb->file, handle);
0859         if (unlikely(!obj))
0860             return ERR_PTR(-ENOENT);
0861
0862         /*
0863          * If the user has opted-in for protected-object tracking, make
0864          * sure the object encryption can be used.
0865          * We only need to do this when the object is first used with
0866          * this context, because the context itself will be banned when
0867          * the protected objects become invalid.
0868          */
0869         if (i915_gem_context_uses_protected_content(eb->gem_context) &&
0870             i915_gem_object_is_protected(obj)) {
0871             err = intel_pxp_key_check(&vm->gt->pxp, obj, true);
0872             if (err) {
0873                 i915_gem_object_put(obj);
0874                 return ERR_PTR(err);
0875             }
0876         }
0877
0878         vma = i915_vma_instance(obj, vm, NULL);
0879         if (IS_ERR(vma)) {
0880             i915_gem_object_put(obj);
0881             return vma;
0882         }
0883
0884         err = __eb_add_lut(eb, handle, vma);
0885         if (likely(!err))
0886             return vma;
0887
0888         i915_gem_object_put(obj);
0889         if (err != -EEXIST)
0890             return ERR_PTR(err);
0891     } while (1);
0892 }
0893
0894 static int eb_lookup_vmas(struct i915_execbuffer *eb)
0895 {
0896     unsigned int i, current_batch = 0;
0897     int err = 0;
0898
0899     INIT_LIST_HEAD(&eb->relocs);
0900
0901     for (i = 0; i < eb->buffer_count; i++) {
0902         struct i915_vma *vma;
0903
0904         vma = eb_lookup_vma(eb, eb->exec[i].handle);
0905         if (IS_ERR(vma)) {
0906             err = PTR_ERR(vma);
0907             goto err;
0908         }
0909
0910         err = eb_validate_vma(eb, &eb->exec[i], vma);
0911         if (unlikely(err)) {
0912             i915_vma_put(vma);
0913             goto err;
0914         }
0915
0916         err = eb_add_vma(eb, &current_batch, i, vma);
0917         if (err)
0918             return err;
0919
0920         if (i915_gem_object_is_userptr(vma->obj)) {
0921             err = i915_gem_object_userptr_submit_init(vma->obj);
0922             if (err) {
0923                 if (i + 1 < eb->buffer_count) {
0924                     /*
0925                      * Execbuffer code expects last vma entry to be NULL,
0926                      * since we already initialized this entry,
0927                      * set the next value to NULL or we mess up
0928                      * cleanup handling.
0929                      */
0930                     eb->vma[i + 1].vma = NULL;
0931                 }
0932
0933                 return err;
0934             }
0935
0936             eb->vma[i].flags |= __EXEC_OBJECT_USERPTR_INIT;
0937             eb->args->flags |= __EXEC_USERPTR_USED;
0938         }
0939     }
0940
0941     return 0;
0942
0943 err:
0944     eb->vma[i].vma = NULL;
0945     return err;
0946 }
0947
0948 static int eb_lock_vmas(struct i915_execbuffer *eb)
0949 {
0950     unsigned int i;
0951     int err;
0952
0953     for (i = 0; i < eb->buffer_count; i++) {
0954         struct eb_vma *ev = &eb->vma[i];
0955         struct i915_vma *vma = ev->vma;
0956
0957         err = i915_gem_object_lock(vma->obj, &eb->ww);
0958         if (err)
0959             return err;
0960     }
0961
0962     return 0;
0963 }
0964
0965 static int eb_validate_vmas(struct i915_execbuffer *eb)
0966 {
0967     unsigned int i;
0968     int err;
0969
0970     INIT_LIST_HEAD(&eb->unbound);
0971
0972     err = eb_lock_vmas(eb);
0973     if (err)
0974         return err;
0975
0976     for (i = 0; i < eb->buffer_count; i++) {
0977         struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
0978         struct eb_vma *ev = &eb->vma[i];
0979         struct i915_vma *vma = ev->vma;
0980
0981         err = eb_pin_vma(eb, entry, ev);
0982         if (err == -EDEADLK)
0983             return err;
0984
0985         if (!err) {
0986             if (entry->offset != vma->node.start) {
0987                 entry->offset = vma->node.start | UPDATE;
0988                 eb->args->flags |= __EXEC_HAS_RELOC;
0989             }
0990         } else {
0991             eb_unreserve_vma(ev);
0992
0993             list_add_tail(&ev->bind_link, &eb->unbound);
0994             if (drm_mm_node_allocated(&vma->node)) {
0995                 err = i915_vma_unbind(vma);
0996                 if (err)
0997                     return err;
0998             }
0999         }
1000
1001         /* Reserve enough slots to accommodate composite fences */
1002         err = dma_resv_reserve_fences(vma->obj->base.resv, eb->num_batches);
1003         if (err)
1004             return err;
1005
1006         GEM_BUG_ON(drm_mm_node_allocated(&vma->node) &&
1007                eb_vma_misplaced(&eb->exec[i], vma, ev->flags));
1008     }
1009
1010     if (!list_empty(&eb->unbound))
1011         return eb_reserve(eb);
1012
1013     return 0;
1014 }
1015
1016 static struct eb_vma *
1017 eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
1018 {
1019     if (eb->lut_size < 0) {
1020         if (handle >= -eb->lut_size)
1021             return NULL;
1022         return &eb->vma[handle];
1023     } else {
1024         struct hlist_head *head;
1025         struct eb_vma *ev;
1026
1027         head = &eb->buckets[hash_32(handle, eb->lut_size)];
1028         hlist_for_each_entry(ev, head, node) {
1029             if (ev->handle == handle)
1030                 return ev;
1031         }
1032         return NULL;
1033     }
1034 }
1035
1036 static void eb_release_vmas(struct i915_execbuffer *eb, bool final)
1037 {
1038     const unsigned int count = eb->buffer_count;
1039     unsigned int i;
1040
1041     for (i = 0; i < count; i++) {
1042         struct eb_vma *ev = &eb->vma[i];
1043         struct i915_vma *vma = ev->vma;
1044
1045         if (!vma)
1046             break;
1047
1048         eb_unreserve_vma(ev);
1049
1050         if (final)
1051             i915_vma_put(vma);
1052     }
1053
1054     eb_capture_release(eb);
1055     eb_unpin_engine(eb);
1056 }
1057
1058 static void eb_destroy(const struct i915_execbuffer *eb)
1059 {
1060     if (eb->lut_size > 0)
1061         kfree(eb->buckets);
1062 }
1063
1064 static inline u64
1065 relocation_target(const struct drm_i915_gem_relocation_entry *reloc,
1066           const struct i915_vma *target)
1067 {
1068     return gen8_canonical_addr((int)reloc->delta + target->node.start);
1069 }
1070
1071 static void reloc_cache_init(struct reloc_cache *cache,
1072                  struct drm_i915_private *i915)
1073 {
1074     cache->page = -1;
1075     cache->vaddr = 0;
1076     /* Must be a variable in the struct to allow GCC to unroll. */
1077     cache->graphics_ver = GRAPHICS_VER(i915);
1078     cache->has_llc = HAS_LLC(i915);
1079     cache->use_64bit_reloc = HAS_64BIT_RELOC(i915);
1080     cache->has_fence = cache->graphics_ver < 4;
1081     cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
1082     cache->node.flags = 0;
1083 }
1084
1085 static inline void *unmask_page(unsigned long p)
1086 {
1087     return (void *)(uintptr_t)(p & PAGE_MASK);
1088 }
1089
1090 static inline unsigned int unmask_flags(unsigned long p)
1091 {
1092     return p & ~PAGE_MASK;
1093 }
1094
1095 #define KMAP 0x4 /* after CLFLUSH_FLAGS */
1096
1097 static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache)
1098 {
1099     struct drm_i915_private *i915 =
1100         container_of(cache, struct i915_execbuffer, reloc_cache)->i915;
1101     return to_gt(i915)->ggtt;
1102 }
1103
1104 static void reloc_cache_unmap(struct reloc_cache *cache)
1105 {
1106     void *vaddr;
1107
1108     if (!cache->vaddr)
1109         return;
1110
1111     vaddr = unmask_page(cache->vaddr);
1112     if (cache->vaddr & KMAP)
1113         kunmap_atomic(vaddr);
1114     else
1115         io_mapping_unmap_atomic((void __iomem *)vaddr);
1116 }
1117
1118 static void reloc_cache_remap(struct reloc_cache *cache,
1119                   struct drm_i915_gem_object *obj)
1120 {
1121     void *vaddr;
1122
1123     if (!cache->vaddr)
1124         return;
1125
1126     if (cache->vaddr & KMAP) {
1127         struct page *page = i915_gem_object_get_page(obj, cache->page);
1128
1129         vaddr = kmap_atomic(page);
1130         cache->vaddr = unmask_flags(cache->vaddr) |
1131             (unsigned long)vaddr;
1132     } else {
1133         struct i915_ggtt *ggtt = cache_to_ggtt(cache);
1134         unsigned long offset;
1135
1136         offset = cache->node.start;
1137         if (!drm_mm_node_allocated(&cache->node))
1138             offset += cache->page << PAGE_SHIFT;
1139
1140         cache->vaddr = (unsigned long)
1141             io_mapping_map_atomic_wc(&ggtt->iomap, offset);
1142     }
1143 }
1144
1145 static void reloc_cache_reset(struct reloc_cache *cache, struct i915_execbuffer *eb)
1146 {
1147     void *vaddr;
1148
1149     if (!cache->vaddr)
1150         return;
1151
1152     vaddr = unmask_page(cache->vaddr);
1153     if (cache->vaddr & KMAP) {
1154         struct drm_i915_gem_object *obj =
1155             (struct drm_i915_gem_object *)cache->node.mm;
1156         if (cache->vaddr & CLFLUSH_AFTER)
1157             mb();
1158
1159         kunmap_atomic(vaddr);
1160         i915_gem_object_finish_access(obj);
1161     } else {
1162         struct i915_ggtt *ggtt = cache_to_ggtt(cache);
1163
1164         intel_gt_flush_ggtt_writes(ggtt->vm.gt);
1165         io_mapping_unmap_atomic((void __iomem *)vaddr);
1166
1167         if (drm_mm_node_allocated(&cache->node)) {
1168             ggtt->vm.clear_range(&ggtt->vm,
1169                          cache->node.start,
1170                          cache->node.size);
1171             mutex_lock(&ggtt->vm.mutex);
1172             drm_mm_remove_node(&cache->node);
1173             mutex_unlock(&ggtt->vm.mutex);
1174         } else {
1175             i915_vma_unpin((struct i915_vma *)cache->node.mm);
1176         }
1177     }
1178
1179     cache->vaddr = 0;
1180     cache->page = -1;
1181 }
1182
1183 static void *reloc_kmap(struct drm_i915_gem_object *obj,
1184             struct reloc_cache *cache,
1185             unsigned long pageno)
1186 {
1187     void *vaddr;
1188     struct page *page;
1189
1190     if (cache->vaddr) {
1191         kunmap_atomic(unmask_page(cache->vaddr));
1192     } else {
1193         unsigned int flushes;
1194         int err;
1195
1196         err = i915_gem_object_prepare_write(obj, &flushes);
1197         if (err)
1198             return ERR_PTR(err);
1199
1200         BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS);
1201         BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK);
1202
1203         cache->vaddr = flushes | KMAP;
1204         cache->node.mm = (void *)obj;
1205         if (flushes)
1206             mb();
1207     }
1208
1209     page = i915_gem_object_get_page(obj, pageno);
1210     if (!obj->mm.dirty)
1211         set_page_dirty(page);
1212
1213     vaddr = kmap_atomic(page);
1214     cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr;
1215     cache->page = pageno;
1216
1217     return vaddr;
1218 }
1219
1220 static void *reloc_iomap(struct i915_vma *batch,
1221              struct i915_execbuffer *eb,
1222              unsigned long page)
1223 {
1224     struct drm_i915_gem_object *obj = batch->obj;
1225     struct reloc_cache *cache = &eb->reloc_cache;
1226     struct i915_ggtt *ggtt = cache_to_ggtt(cache);
1227     unsigned long offset;
1228     void *vaddr;
1229
1230     if (cache->vaddr) {
1231         intel_gt_flush_ggtt_writes(ggtt->vm.gt);
1232         io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr));
1233     } else {
1234         struct i915_vma *vma = ERR_PTR(-ENODEV);
1235         int err;
1236
1237         if (i915_gem_object_is_tiled(obj))
1238             return ERR_PTR(-EINVAL);
1239
1240         if (use_cpu_reloc(cache, obj))
1241             return NULL;
1242
1243         err = i915_gem_object_set_to_gtt_domain(obj, true);
1244         if (err)
1245             return ERR_PTR(err);
1246
1247         /*
1248          * i915_gem_object_ggtt_pin_ww may attempt to remove the batch
1249          * VMA from the object list because we no longer pin.
1250          *
1251          * Only attempt to pin the batch buffer to ggtt if the current batch
1252          * is not inside ggtt, or the batch buffer is not misplaced.
1253          */
1254         if (!i915_is_ggtt(batch->vm) ||
1255             !i915_vma_misplaced(batch, 0, 0, PIN_MAPPABLE)) {
1256             vma = i915_gem_object_ggtt_pin_ww(obj, &eb->ww, NULL, 0, 0,
1257                               PIN_MAPPABLE |
1258                               PIN_NONBLOCK /* NOWARN */ |
1259                               PIN_NOEVICT);
1260         }
1261
1262         if (vma == ERR_PTR(-EDEADLK))
1263             return vma;
1264
1265         if (IS_ERR(vma)) {
1266             memset(&cache->node, 0, sizeof(cache->node));
1267             mutex_lock(&ggtt->vm.mutex);
1268             err = drm_mm_insert_node_in_range
1269                 (&ggtt->vm.mm, &cache->node,
1270                  PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE,
1271                  0, ggtt->mappable_end,
1272                  DRM_MM_INSERT_LOW);
1273             mutex_unlock(&ggtt->vm.mutex);
1274             if (err) /* no inactive aperture space, use cpu reloc */
1275                 return NULL;
1276         } else {
1277             cache->node.start = vma->node.start;
1278             cache->node.mm = (void *)vma;
1279         }
1280     }
1281
1282     offset = cache->node.start;
1283     if (drm_mm_node_allocated(&cache->node)) {
1284         ggtt->vm.insert_page(&ggtt->vm,
1285                      i915_gem_object_get_dma_address(obj, page),
1286                      offset, I915_CACHE_NONE, 0);
1287     } else {
1288         offset += page << PAGE_SHIFT;
1289     }
1290
1291     vaddr = (void __force *)io_mapping_map_atomic_wc(&ggtt->iomap,
1292                              offset);
1293     cache->page = page;
1294     cache->vaddr = (unsigned long)vaddr;
1295
1296     return vaddr;
1297 }
1298
1299 static void *reloc_vaddr(struct i915_vma *vma,
1300              struct i915_execbuffer *eb,
1301              unsigned long page)
1302 {
1303     struct reloc_cache *cache = &eb->reloc_cache;
1304     void *vaddr;
1305
1306     if (cache->page == page) {
1307         vaddr = unmask_page(cache->vaddr);
1308     } else {
1309         vaddr = NULL;
1310         if ((cache->vaddr & KMAP) == 0)
1311             vaddr = reloc_iomap(vma, eb, page);
1312         if (!vaddr)
1313             vaddr = reloc_kmap(vma->obj, cache, page);
1314     }
1315
1316     return vaddr;
1317 }
1318
1319 static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
1320 {
1321     if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) {
1322         if (flushes & CLFLUSH_BEFORE)
1323             drm_clflush_virt_range(addr, sizeof(*addr));
1324
1325         *addr = value;
1326
1327         /*
1328          * Writes to the same cacheline are serialised by the CPU
1329          * (including clflush). On the write path, we only require
1330          * that it hits memory in an orderly fashion and place
1331          * mb barriers at the start and end of the relocation phase
1332          * to ensure ordering of clflush wrt to the system.
1333          */
1334         if (flushes & CLFLUSH_AFTER)
1335             drm_clflush_virt_range(addr, sizeof(*addr));
1336     } else
1337         *addr = value;
1338 }
1339
1340 static u64
1341 relocate_entry(struct i915_vma *vma,
1342            const struct drm_i915_gem_relocation_entry *reloc,
1343            struct i915_execbuffer *eb,
1344            const struct i915_vma *target)
1345 {
1346     u64 target_addr = relocation_target(reloc, target);
1347     u64 offset = reloc->offset;
1348     bool wide = eb->reloc_cache.use_64bit_reloc;
1349     void *vaddr;
1350
1351 repeat:
1352     vaddr = reloc_vaddr(vma, eb,
1353                 offset >> PAGE_SHIFT);
1354     if (IS_ERR(vaddr))
1355         return PTR_ERR(vaddr);
1356
1357     GEM_BUG_ON(!IS_ALIGNED(offset, sizeof(u32)));
1358     clflush_write32(vaddr + offset_in_page(offset),
1359             lower_32_bits(target_addr),
1360             eb->reloc_cache.vaddr);
1361
1362     if (wide) {
1363         offset += sizeof(u32);
1364         target_addr >>= 32;
1365         wide = false;
1366         goto repeat;
1367     }
1368
1369     return target->node.start | UPDATE;
1370 }
1371
1372 static u64
1373 eb_relocate_entry(struct i915_execbuffer *eb,
1374           struct eb_vma *ev,
1375           const struct drm_i915_gem_relocation_entry *reloc)
1376 {
1377     struct drm_i915_private *i915 = eb->i915;
1378     struct eb_vma *target;
1379     int err;
1380
1381     /* we've already hold a reference to all valid objects */
1382     target = eb_get_vma(eb, reloc->target_handle);
1383     if (unlikely(!target))
1384         return -ENOENT;
1385
1386     /* Validate that the target is in a valid r/w GPU domain */
1387     if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
1388         drm_dbg(&i915->drm, "reloc with multiple write domains: "
1389               "target %d offset %d "
1390               "read %08x write %08x",
1391               reloc->target_handle,
1392               (int) reloc->offset,
1393               reloc->read_domains,
1394               reloc->write_domain);
1395         return -EINVAL;
1396     }
1397     if (unlikely((reloc->write_domain | reloc->read_domains)
1398              & ~I915_GEM_GPU_DOMAINS)) {
1399         drm_dbg(&i915->drm, "reloc with read/write non-GPU domains: "
1400               "target %d offset %d "
1401               "read %08x write %08x",
1402               reloc->target_handle,
1403               (int) reloc->offset,
1404               reloc->read_domains,
1405               reloc->write_domain);
1406         return -EINVAL;
1407     }
1408
1409     if (reloc->write_domain) {
1410         target->flags |= EXEC_OBJECT_WRITE;
1411
1412         /*
1413          * Sandybridge PPGTT errata: We need a global gtt mapping
1414          * for MI and pipe_control writes because the gpu doesn't
1415          * properly redirect them through the ppgtt for non_secure
1416          * batchbuffers.
1417          */
1418         if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
1419             GRAPHICS_VER(eb->i915) == 6 &&
1420             !i915_vma_is_bound(target->vma, I915_VMA_GLOBAL_BIND)) {
1421             struct i915_vma *vma = target->vma;
1422
1423             reloc_cache_unmap(&eb->reloc_cache);
1424             mutex_lock(&vma->vm->mutex);
1425             err = i915_vma_bind(target->vma,
1426                         target->vma->obj->cache_level,
1427                         PIN_GLOBAL, NULL, NULL);
1428             mutex_unlock(&vma->vm->mutex);
1429             reloc_cache_remap(&eb->reloc_cache, ev->vma->obj);
1430             if (err)
1431                 return err;
1432         }
1433     }
1434
1435     /*
1436      * If the relocation already has the right value in it, no
1437      * more work needs to be done.
1438      */
1439     if (!DBG_FORCE_RELOC &&
1440         gen8_canonical_addr(target->vma->node.start) == reloc->presumed_offset)
1441         return 0;
1442
1443     /* Check that the relocation address is valid... */
1444     if (unlikely(reloc->offset >
1445              ev->vma->size - (eb->reloc_cache.use_64bit_reloc ? 8 : 4))) {
1446         drm_dbg(&i915->drm, "Relocation beyond object bounds: "
1447               "target %d offset %d size %d.\n",
1448               reloc->target_handle,
1449               (int)reloc->offset,
1450               (int)ev->vma->size);
1451         return -EINVAL;
1452     }
1453     if (unlikely(reloc->offset & 3)) {
1454         drm_dbg(&i915->drm, "Relocation not 4-byte aligned: "
1455               "target %d offset %d.\n",
1456               reloc->target_handle,
1457               (int)reloc->offset);
1458         return -EINVAL;
1459     }
1460
1461     /*
1462      * If we write into the object, we need to force the synchronisation
1463      * barrier, either with an asynchronous clflush or if we executed the
1464      * patching using the GPU (though that should be serialised by the
1465      * timeline). To be completely sure, and since we are required to
1466      * do relocations we are already stalling, disable the user's opt
1467      * out of our synchronisation.
1468      */
1469     ev->flags &= ~EXEC_OBJECT_ASYNC;
1470
1471     /* and update the user's relocation entry */
1472     return relocate_entry(ev->vma, reloc, eb, target->vma);
1473 }
1474
1475 static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
1476 {
1477 #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
1478     struct drm_i915_gem_relocation_entry stack[N_RELOC(512)];
1479     const struct drm_i915_gem_exec_object2 *entry = ev->exec;
1480     struct drm_i915_gem_relocation_entry __user *urelocs =
1481         u64_to_user_ptr(entry->relocs_ptr);
1482     unsigned long remain = entry->relocation_count;
1483
1484     if (unlikely(remain > N_RELOC(ULONG_MAX)))
1485         return -EINVAL;
1486
1487     /*
1488      * We must check that the entire relocation array is safe
1489      * to read. However, if the array is not writable the user loses
1490      * the updated relocation values.
1491      */
1492     if (unlikely(!access_ok(urelocs, remain * sizeof(*urelocs))))
1493         return -EFAULT;
1494
1495     do {
1496         struct drm_i915_gem_relocation_entry *r = stack;
1497         unsigned int count =
1498             min_t(unsigned long, remain, ARRAY_SIZE(stack));
1499         unsigned int copied;
1500
1501         /*
1502          * This is the fast path and we cannot handle a pagefault
1503          * whilst holding the struct mutex lest the user pass in the
1504          * relocations contained within a mmaped bo. For in such a case
1505          * we, the page fault handler would call i915_gem_fault() and
1506          * we would try to acquire the struct mutex again. Obviously
1507          * this is bad and so lockdep complains vehemently.
1508          */
1509         pagefault_disable();
1510         copied = __copy_from_user_inatomic(r, urelocs, count * sizeof(r[0]));
1511         pagefault_enable();
1512         if (unlikely(copied)) {
1513             remain = -EFAULT;
1514             goto out;
1515         }
1516
1517         remain -= count;
1518         do {
1519             u64 offset = eb_relocate_entry(eb, ev, r);
1520
1521             if (likely(offset == 0)) {
1522             } else if ((s64)offset < 0) {
1523                 remain = (int)offset;
1524                 goto out;
1525             } else {
1526                 /*
1527                  * Note that reporting an error now
1528                  * leaves everything in an inconsistent
1529                  * state as we have *already* changed
1530                  * the relocation value inside the
1531                  * object. As we have not changed the
1532                  * reloc.presumed_offset or will not
1533                  * change the execobject.offset, on the
1534                  * call we may not rewrite the value
1535                  * inside the object, leaving it
1536                  * dangling and causing a GPU hang. Unless
1537                  * userspace dynamically rebuilds the
1538                  * relocations on each execbuf rather than
1539                  * presume a static tree.
1540                  *
1541                  * We did previously check if the relocations
1542                  * were writable (access_ok), an error now
1543                  * would be a strange race with mprotect,
1544                  * having already demonstrated that we
1545                  * can read from this userspace address.
1546                  */
1547                 offset = gen8_canonical_addr(offset & ~UPDATE);
1548                 __put_user(offset,
1549                        &urelocs[r - stack].presumed_offset);
1550             }
1551         } while (r++, --count);
1552         urelocs += ARRAY_SIZE(stack);
1553     } while (remain);
1554 out:
1555     reloc_cache_reset(&eb->reloc_cache, eb);
1556     return remain;
1557 }
1558
1559 static int
1560 eb_relocate_vma_slow(struct i915_execbuffer *eb, struct eb_vma *ev)
1561 {
1562     const struct drm_i915_gem_exec_object2 *entry = ev->exec;
1563     struct drm_i915_gem_relocation_entry *relocs =
1564         u64_to_ptr(typeof(*relocs), entry->relocs_ptr);
1565     unsigned int i;
1566     int err;
1567
1568     for (i = 0; i < entry->relocation_count; i++) {
1569         u64 offset = eb_relocate_entry(eb, ev, &relocs[i]);
1570
1571         if ((s64)offset < 0) {
1572             err = (int)offset;
1573             goto err;
1574         }
1575     }
1576     err = 0;
1577 err:
1578     reloc_cache_reset(&eb->reloc_cache, eb);
1579     return err;
1580 }
1581
1582 static int check_relocations(const struct drm_i915_gem_exec_object2 *entry)
1583 {
1584     const char __user *addr, *end;
1585     unsigned long size;
1586     char __maybe_unused c;
1587
1588     size = entry->relocation_count;
1589     if (size == 0)
1590         return 0;
1591
1592     if (size > N_RELOC(ULONG_MAX))
1593         return -EINVAL;
1594
1595     addr = u64_to_user_ptr(entry->relocs_ptr);
1596     size *= sizeof(struct drm_i915_gem_relocation_entry);
1597     if (!access_ok(addr, size))
1598         return -EFAULT;
1599
1600     end = addr + size;
1601     for (; addr < end; addr += PAGE_SIZE) {
1602         int err = __get_user(c, addr);
1603         if (err)
1604             return err;
1605     }
1606     return __get_user(c, end - 1);
1607 }
1608
1609 static int eb_copy_relocations(const struct i915_execbuffer *eb)
1610 {
1611     struct drm_i915_gem_relocation_entry *relocs;
1612     const unsigned int count = eb->buffer_count;
1613     unsigned int i;
1614     int err;
1615
1616     for (i = 0; i < count; i++) {
1617         const unsigned int nreloc = eb->exec[i].relocation_count;
1618         struct drm_i915_gem_relocation_entry __user *urelocs;
1619         unsigned long size;
1620         unsigned long copied;
1621
1622         if (nreloc == 0)
1623             continue;
1624
1625         err = check_relocations(&eb->exec[i]);
1626         if (err)
1627             goto err;
1628
1629         urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr);
1630         size = nreloc * sizeof(*relocs);
1631
1632         relocs = kvmalloc_array(size, 1, GFP_KERNEL);
1633         if (!relocs) {
1634             err = -ENOMEM;
1635             goto err;
1636         }
1637
1638         /* copy_from_user is limited to < 4GiB */
1639         copied = 0;
1640         do {
1641             unsigned int len =
1642                 min_t(u64, BIT_ULL(31), size - copied);
1643
1644             if (__copy_from_user((char *)relocs + copied,
1645                          (char __user *)urelocs + copied,
1646                          len))
1647                 goto end;
1648
1649             copied += len;
1650         } while (copied < size);
1651
1652         /*
1653          * As we do not update the known relocation offsets after
1654          * relocating (due to the complexities in lock handling),
1655          * we need to mark them as invalid now so that we force the
1656          * relocation processing next time. Just in case the target
1657          * object is evicted and then rebound into its old
1658          * presumed_offset before the next execbuffer - if that
1659          * happened we would make the mistake of assuming that the
1660          * relocations were valid.
1661          */
1662         if (!user_access_begin(urelocs, size))
1663             goto end;
1664
1665         for (copied = 0; copied < nreloc; copied++)
1666             unsafe_put_user(-1,
1667                     &urelocs[copied].presumed_offset,
1668                     end_user);
1669         user_access_end();
1670
1671         eb->exec[i].relocs_ptr = (uintptr_t)relocs;
1672     }
1673
1674     return 0;
1675
1676 end_user:
1677     user_access_end();
1678 end:
1679     kvfree(relocs);
1680     err = -EFAULT;
1681 err:
1682     while (i--) {
1683         relocs = u64_to_ptr(typeof(*relocs), eb->exec[i].relocs_ptr);
1684         if (eb->exec[i].relocation_count)
1685             kvfree(relocs);
1686     }
1687     return err;
1688 }
1689
1690 static int eb_prefault_relocations(const struct i915_execbuffer *eb)
1691 {
1692     const unsigned int count = eb->buffer_count;
1693     unsigned int i;
1694
1695     for (i = 0; i < count; i++) {
1696         int err;
1697
1698         err = check_relocations(&eb->exec[i]);
1699         if (err)
1700             return err;
1701     }
1702
1703     return 0;
1704 }
1705
1706 static int eb_reinit_userptr(struct i915_execbuffer *eb)
1707 {
1708     const unsigned int count = eb->buffer_count;
1709     unsigned int i;
1710     int ret;
1711
1712     if (likely(!(eb->args->flags & __EXEC_USERPTR_USED)))
1713         return 0;
1714
1715     for (i = 0; i < count; i++) {
1716         struct eb_vma *ev = &eb->vma[i];
1717
1718         if (!i915_gem_object_is_userptr(ev->vma->obj))
1719             continue;
1720
1721         ret = i915_gem_object_userptr_submit_init(ev->vma->obj);
1722         if (ret)
1723             return ret;
1724
1725         ev->flags |= __EXEC_OBJECT_USERPTR_INIT;
1726     }
1727
1728     return 0;
1729 }
1730
1731 static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb)
1732 {
1733     bool have_copy = false;
1734     struct eb_vma *ev;
1735     int err = 0;
1736
1737 repeat:
1738     if (signal_pending(current)) {
1739         err = -ERESTARTSYS;
1740         goto out;
1741     }
1742
1743     /* We may process another execbuffer during the unlock... */
1744     eb_release_vmas(eb, false);
1745     i915_gem_ww_ctx_fini(&eb->ww);
1746
1747     /*
1748      * We take 3 passes through the slowpatch.
1749      *
1750      * 1 - we try to just prefault all the user relocation entries and
1751      * then attempt to reuse the atomic pagefault disabled fast path again.
1752      *
1753      * 2 - we copy the user entries to a local buffer here outside of the
1754      * local and allow ourselves to wait upon any rendering before
1755      * relocations
1756      *
1757      * 3 - we already have a local copy of the relocation entries, but
1758      * were interrupted (EAGAIN) whilst waiting for the objects, try again.
1759      */
1760     if (!err) {
1761         err = eb_prefault_relocations(eb);
1762     } else if (!have_copy) {
1763         err = eb_copy_relocations(eb);
1764         have_copy = err == 0;
1765     } else {
1766         cond_resched();
1767         err = 0;
1768     }
1769
1770     if (!err)
1771         err = eb_reinit_userptr(eb);
1772
1773     i915_gem_ww_ctx_init(&eb->ww, true);
1774     if (err)
1775         goto out;
1776
1777     /* reacquire the objects */
1778 repeat_validate:
1779     err = eb_pin_engine(eb, false);
1780     if (err)
1781         goto err;
1782
1783     err = eb_validate_vmas(eb);
1784     if (err)
1785         goto err;
1786
1787     GEM_BUG_ON(!eb->batches[0]);
1788
1789     list_for_each_entry(ev, &eb->relocs, reloc_link) {
1790         if (!have_copy) {
1791             err = eb_relocate_vma(eb, ev);
1792             if (err)
1793                 break;
1794         } else {
1795             err = eb_relocate_vma_slow(eb, ev);
1796             if (err)
1797                 break;
1798         }
1799     }
1800
1801     if (err == -EDEADLK)
1802         goto err;
1803
1804     if (err && !have_copy)
1805         goto repeat;
1806
1807     if (err)
1808         goto err;
1809
1810     /* as last step, parse the command buffer */
1811     err = eb_parse(eb);
1812     if (err)
1813         goto err;
1814
1815     /*
1816      * Leave the user relocations as are, this is the painfully slow path,
1817      * and we want to avoid the complication of dropping the lock whilst
1818      * having buffers reserved in the aperture and so causing spurious
1819      * ENOSPC for random operations.
1820      */
1821
1822 err:
1823     if (err == -EDEADLK) {
1824         eb_release_vmas(eb, false);
1825         err = i915_gem_ww_ctx_backoff(&eb->ww);
1826         if (!err)
1827             goto repeat_validate;
1828     }
1829
1830     if (err == -EAGAIN)
1831         goto repeat;
1832
1833 out:
1834     if (have_copy) {
1835         const unsigned int count = eb->buffer_count;
1836         unsigned int i;
1837
1838         for (i = 0; i < count; i++) {
1839             const struct drm_i915_gem_exec_object2 *entry =
1840                 &eb->exec[i];
1841             struct drm_i915_gem_relocation_entry *relocs;
1842
1843             if (!entry->relocation_count)
1844                 continue;
1845
1846             relocs = u64_to_ptr(typeof(*relocs), entry->relocs_ptr);
1847             kvfree(relocs);
1848         }
1849     }
1850
1851     return err;
1852 }
1853
1854 static int eb_relocate_parse(struct i915_execbuffer *eb)
1855 {
1856     int err;
1857     bool throttle = true;
1858
1859 retry:
1860     err = eb_pin_engine(eb, throttle);
1861     if (err) {
1862         if (err != -EDEADLK)
1863             return err;
1864
1865         goto err;
1866     }
1867
1868     /* only throttle once, even if we didn't need to throttle */
1869     throttle = false;
1870
1871     err = eb_validate_vmas(eb);
1872     if (err == -EAGAIN)
1873         goto slow;
1874     else if (err)
1875         goto err;
1876
1877     /* The objects are in their final locations, apply the relocations. */
1878     if (eb->args->flags & __EXEC_HAS_RELOC) {
1879         struct eb_vma *ev;
1880
1881         list_for_each_entry(ev, &eb->relocs, reloc_link) {
1882             err = eb_relocate_vma(eb, ev);
1883             if (err)
1884                 break;
1885         }
1886
1887         if (err == -EDEADLK)
1888             goto err;
1889         else if (err)
1890             goto slow;
1891     }
1892
1893     if (!err)
1894         err = eb_parse(eb);
1895
1896 err:
1897     if (err == -EDEADLK) {
1898         eb_release_vmas(eb, false);
1899         err = i915_gem_ww_ctx_backoff(&eb->ww);
1900         if (!err)
1901             goto retry;
1902     }
1903
1904     return err;
1905
1906 slow:
1907     err = eb_relocate_parse_slow(eb);
1908     if (err)
1909         /*
1910          * If the user expects the execobject.offset and
1911          * reloc.presumed_offset to be an exact match,
1912          * as for using NO_RELOC, then we cannot update
1913          * the execobject.offset until we have completed
1914          * relocation.
1915          */
1916         eb->args->flags &= ~__EXEC_HAS_RELOC;
1917
1918     return err;
1919 }
1920
1921 /*
1922  * Using two helper loops for the order of which requests / batches are created
1923  * and added the to backend. Requests are created in order from the parent to
1924  * the last child. Requests are added in the reverse order, from the last child
1925  * to parent. This is done for locking reasons as the timeline lock is acquired
1926  * during request creation and released when the request is added to the
1927  * backend. To make lockdep happy (see intel_context_timeline_lock) this must be
1928  * the ordering.
1929  */
1930 #define for_each_batch_create_order(_eb, _i) \
1931     for ((_i) = 0; (_i) < (_eb)->num_batches; ++(_i))
1932 #define for_each_batch_add_order(_eb, _i) \
1933     BUILD_BUG_ON(!typecheck(int, _i)); \
1934     for ((_i) = (_eb)->num_batches - 1; (_i) >= 0; --(_i))
1935
1936 static struct i915_request *
1937 eb_find_first_request_added(struct i915_execbuffer *eb)
1938 {
1939     int i;
1940
1941     for_each_batch_add_order(eb, i)
1942         if (eb->requests[i])
1943             return eb->requests[i];
1944
1945     GEM_BUG_ON("Request not found");
1946
1947     return NULL;
1948 }
1949
1950 #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
1951
1952 /* Stage with GFP_KERNEL allocations before we enter the signaling critical path */
1953 static int eb_capture_stage(struct i915_execbuffer *eb)
1954 {
1955     const unsigned int count = eb->buffer_count;
1956     unsigned int i = count, j;
1957
1958     while (i--) {
1959         struct eb_vma *ev = &eb->vma[i];
1960         struct i915_vma *vma = ev->vma;
1961         unsigned int flags = ev->flags;
1962
1963         if (!(flags & EXEC_OBJECT_CAPTURE))
1964             continue;
1965
1966         if (i915_gem_context_is_recoverable(eb->gem_context) &&
1967             (IS_DGFX(eb->i915) || GRAPHICS_VER_FULL(eb->i915) > IP_VER(12, 0)))
1968             return -EINVAL;
1969
1970         for_each_batch_create_order(eb, j) {
1971             struct i915_capture_list *capture;
1972
1973             capture = kmalloc(sizeof(*capture), GFP_KERNEL);
1974             if (!capture)
1975                 continue;
1976
1977             capture->next = eb->capture_lists[j];
1978             capture->vma_res = i915_vma_resource_get(vma->resource);
1979             eb->capture_lists[j] = capture;
1980         }
1981     }
1982
1983     return 0;
1984 }
1985
1986 /* Commit once we're in the critical path */
1987 static void eb_capture_commit(struct i915_execbuffer *eb)
1988 {
1989     unsigned int j;
1990
1991     for_each_batch_create_order(eb, j) {
1992         struct i915_request *rq = eb->requests[j];
1993
1994         if (!rq)
1995             break;
1996
1997         rq->capture_list = eb->capture_lists[j];
1998         eb->capture_lists[j] = NULL;
1999     }
2000 }
2001
2002 /*
2003  * Release anything that didn't get committed due to errors.
2004  * The capture_list will otherwise be freed at request retire.
2005  */
2006 static void eb_capture_release(struct i915_execbuffer *eb)
2007 {
2008     unsigned int j;
2009
2010     for_each_batch_create_order(eb, j) {
2011         if (eb->capture_lists[j]) {
2012             i915_request_free_capture_list(eb->capture_lists[j]);
2013             eb->capture_lists[j] = NULL;
2014         }
2015     }
2016 }
2017
2018 static void eb_capture_list_clear(struct i915_execbuffer *eb)
2019 {
2020     memset(eb->capture_lists, 0, sizeof(eb->capture_lists));
2021 }
2022
2023 #else
2024
2025 static int eb_capture_stage(struct i915_execbuffer *eb)
2026 {
2027     return 0;
2028 }
2029
2030 static void eb_capture_commit(struct i915_execbuffer *eb)
2031 {
2032 }
2033
2034 static void eb_capture_release(struct i915_execbuffer *eb)
2035 {
2036 }
2037
2038 static void eb_capture_list_clear(struct i915_execbuffer *eb)
2039 {
2040 }
2041
2042 #endif
2043
2044 static int eb_move_to_gpu(struct i915_execbuffer *eb)
2045 {
2046     const unsigned int count = eb->buffer_count;
2047     unsigned int i = count;
2048     int err = 0, j;
2049
2050     while (i--) {
2051         struct eb_vma *ev = &eb->vma[i];
2052         struct i915_vma *vma = ev->vma;
2053         unsigned int flags = ev->flags;
2054         struct drm_i915_gem_object *obj = vma->obj;
2055
2056         assert_vma_held(vma);
2057
2058         /*
2059          * If the GPU is not _reading_ through the CPU cache, we need
2060          * to make sure that any writes (both previous GPU writes from
2061          * before a change in snooping levels and normal CPU writes)
2062          * caught in that cache are flushed to main memory.
2063          *
2064          * We want to say
2065          *   obj->cache_dirty &&
2066          *   !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)
2067          * but gcc's optimiser doesn't handle that as well and emits
2068          * two jumps instead of one. Maybe one day...
2069          *
2070          * FIXME: There is also sync flushing in set_pages(), which
2071          * serves a different purpose(some of the time at least).
2072          *
2073          * We should consider:
2074          *
2075          *   1. Rip out the async flush code.
2076          *
2077          *   2. Or make the sync flushing use the async clflush path
2078          *   using mandatory fences underneath. Currently the below
2079          *   async flush happens after we bind the object.
2080          */
2081         if (unlikely(obj->cache_dirty & ~obj->cache_coherent)) {
2082             if (i915_gem_clflush_object(obj, 0))
2083                 flags &= ~EXEC_OBJECT_ASYNC;
2084         }
2085
2086         /* We only need to await on the first request */
2087         if (err == 0 && !(flags & EXEC_OBJECT_ASYNC)) {
2088             err = i915_request_await_object
2089                 (eb_find_first_request_added(eb), obj,
2090                  flags & EXEC_OBJECT_WRITE);
2091         }
2092
2093         for_each_batch_add_order(eb, j) {
2094             if (err)
2095                 break;
2096             if (!eb->requests[j])
2097                 continue;
2098
2099             err = _i915_vma_move_to_active(vma, eb->requests[j],
2100                                j ? NULL :
2101                                eb->composite_fence ?
2102                                eb->composite_fence :
2103                                &eb->requests[j]->fence,
2104                                flags | __EXEC_OBJECT_NO_RESERVE);
2105         }
2106     }
2107
2108 #ifdef CONFIG_MMU_NOTIFIER
2109     if (!err && (eb->args->flags & __EXEC_USERPTR_USED)) {
2110         read_lock(&eb->i915->mm.notifier_lock);
2111
2112         /*
2113          * count is always at least 1, otherwise __EXEC_USERPTR_USED
2114          * could not have been set
2115          */
2116         for (i = 0; i < count; i++) {
2117             struct eb_vma *ev = &eb->vma[i];
2118             struct drm_i915_gem_object *obj = ev->vma->obj;
2119
2120             if (!i915_gem_object_is_userptr(obj))
2121                 continue;
2122
2123             err = i915_gem_object_userptr_submit_done(obj);
2124             if (err)
2125                 break;
2126         }
2127
2128         read_unlock(&eb->i915->mm.notifier_lock);
2129     }
2130 #endif
2131
2132     if (unlikely(err))
2133         goto err_skip;
2134
2135     /* Unconditionally flush any chipset caches (for streaming writes). */
2136     intel_gt_chipset_flush(eb->gt);
2137     eb_capture_commit(eb);
2138
2139     return 0;
2140
2141 err_skip:
2142     for_each_batch_create_order(eb, j) {
2143         if (!eb->requests[j])
2144             break;
2145
2146         i915_request_set_error_once(eb->requests[j], err);
2147     }
2148     return err;
2149 }
2150
2151 static int i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
2152 {
2153     if (exec->flags & __I915_EXEC_ILLEGAL_FLAGS)
2154         return -EINVAL;
2155
2156     /* Kernel clipping was a DRI1 misfeature */
2157     if (!(exec->flags & (I915_EXEC_FENCE_ARRAY |
2158                  I915_EXEC_USE_EXTENSIONS))) {
2159         if (exec->num_cliprects || exec->cliprects_ptr)
2160             return -EINVAL;
2161     }
2162
2163     if (exec->DR4 == 0xffffffff) {
2164         DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
2165         exec->DR4 = 0;
2166     }
2167     if (exec->DR1 || exec->DR4)
2168         return -EINVAL;
2169
2170     if ((exec->batch_start_offset | exec->batch_len) & 0x7)
2171         return -EINVAL;
2172
2173     return 0;
2174 }
2175
2176 static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
2177 {
2178     u32 *cs;
2179     int i;
2180
2181     if (GRAPHICS_VER(rq->engine->i915) != 7 || rq->engine->id != RCS0) {
2182         drm_dbg(&rq->engine->i915->drm, "sol reset is gen7/rcs only\n");
2183         return -EINVAL;
2184     }
2185
2186     cs = intel_ring_begin(rq, 4 * 2 + 2);
2187     if (IS_ERR(cs))
2188         return PTR_ERR(cs);
2189
2190     *cs++ = MI_LOAD_REGISTER_IMM(4);
2191     for (i = 0; i < 4; i++) {
2192         *cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i));
2193         *cs++ = 0;
2194     }
2195     *cs++ = MI_NOOP;
2196     intel_ring_advance(rq, cs);
2197
2198     return 0;
2199 }
2200
2201 static struct i915_vma *
2202 shadow_batch_pin(struct i915_execbuffer *eb,
2203          struct drm_i915_gem_object *obj,
2204          struct i915_address_space *vm,
2205          unsigned int flags)
2206 {
2207     struct i915_vma *vma;
2208     int err;
2209
2210     vma = i915_vma_instance(obj, vm, NULL);
2211     if (IS_ERR(vma))
2212         return vma;
2213
2214     err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, flags | PIN_VALIDATE);
2215     if (err)
2216         return ERR_PTR(err);
2217
2218     return vma;
2219 }
2220
2221 static struct i915_vma *eb_dispatch_secure(struct i915_execbuffer *eb, struct i915_vma *vma)
2222 {
2223     /*
2224      * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
2225      * batch" bit. Hence we need to pin secure batches into the global gtt.
2226      * hsw should have this fixed, but bdw mucks it up again. */
2227     if (eb->batch_flags & I915_DISPATCH_SECURE)
2228         return i915_gem_object_ggtt_pin_ww(vma->obj, &eb->ww, NULL, 0, 0, PIN_VALIDATE);
2229
2230     return NULL;
2231 }
2232
2233 static int eb_parse(struct i915_execbuffer *eb)
2234 {
2235     struct drm_i915_private *i915 = eb->i915;
2236     struct intel_gt_buffer_pool_node *pool = eb->batch_pool;
2237     struct i915_vma *shadow, *trampoline, *batch;
2238     unsigned long len;
2239     int err;
2240
2241     if (!eb_use_cmdparser(eb)) {
2242         batch = eb_dispatch_secure(eb, eb->batches[0]->vma);
2243         if (IS_ERR(batch))
2244             return PTR_ERR(batch);
2245
2246         goto secure_batch;
2247     }
2248
2249     if (intel_context_is_parallel(eb->context))
2250         return -EINVAL;
2251
2252     len = eb->batch_len[0];
2253     if (!CMDPARSER_USES_GGTT(eb->i915)) {
2254         /*
2255          * ppGTT backed shadow buffers must be mapped RO, to prevent
2256          * post-scan tampering
2257          */
2258         if (!eb->context->vm->has_read_only) {
2259             drm_dbg(&i915->drm,
2260                 "Cannot prevent post-scan tampering without RO capable vm\n");
2261             return -EINVAL;
2262         }
2263     } else {
2264         len += I915_CMD_PARSER_TRAMPOLINE_SIZE;
2265     }
2266     if (unlikely(len < eb->batch_len[0])) /* last paranoid check of overflow */
2267         return -EINVAL;
2268
2269     if (!pool) {
2270         pool = intel_gt_get_buffer_pool(eb->gt, len,
2271                         I915_MAP_WB);
2272         if (IS_ERR(pool))
2273             return PTR_ERR(pool);
2274         eb->batch_pool = pool;
2275     }
2276
2277     err = i915_gem_object_lock(pool->obj, &eb->ww);
2278     if (err)
2279         return err;
2280
2281     shadow = shadow_batch_pin(eb, pool->obj, eb->context->vm, PIN_USER);
2282     if (IS_ERR(shadow))
2283         return PTR_ERR(shadow);
2284
2285     intel_gt_buffer_pool_mark_used(pool);
2286     i915_gem_object_set_readonly(shadow->obj);
2287     shadow->private = pool;
2288
2289     trampoline = NULL;
2290     if (CMDPARSER_USES_GGTT(eb->i915)) {
2291         trampoline = shadow;
2292
2293         shadow = shadow_batch_pin(eb, pool->obj,
2294                       &eb->gt->ggtt->vm,
2295                       PIN_GLOBAL);
2296         if (IS_ERR(shadow))
2297             return PTR_ERR(shadow);
2298
2299         shadow->private = pool;
2300
2301         eb->batch_flags |= I915_DISPATCH_SECURE;
2302     }
2303
2304     batch = eb_dispatch_secure(eb, shadow);
2305     if (IS_ERR(batch))
2306         return PTR_ERR(batch);
2307
2308     err = dma_resv_reserve_fences(shadow->obj->base.resv, 1);
2309     if (err)
2310         return err;
2311
2312     err = intel_engine_cmd_parser(eb->context->engine,
2313                       eb->batches[0]->vma,
2314                       eb->batch_start_offset,
2315                       eb->batch_len[0],
2316                       shadow, trampoline);
2317     if (err)
2318         return err;
2319
2320     eb->batches[0] = &eb->vma[eb->buffer_count++];
2321     eb->batches[0]->vma = i915_vma_get(shadow);
2322     eb->batches[0]->flags = __EXEC_OBJECT_HAS_PIN;
2323
2324     eb->trampoline = trampoline;
2325     eb->batch_start_offset = 0;
2326
2327 secure_batch:
2328     if (batch) {
2329         if (intel_context_is_parallel(eb->context))
2330             return -EINVAL;
2331
2332         eb->batches[0] = &eb->vma[eb->buffer_count++];
2333         eb->batches[0]->flags = __EXEC_OBJECT_HAS_PIN;
2334         eb->batches[0]->vma = i915_vma_get(batch);
2335     }
2336     return 0;
2337 }
2338
2339 static int eb_request_submit(struct i915_execbuffer *eb,
2340                  struct i915_request *rq,
2341                  struct i915_vma *batch,
2342                  u64 batch_len)
2343 {
2344     int err;
2345
2346     if (intel_context_nopreempt(rq->context))
2347         __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags);
2348
2349     if (eb->args->flags & I915_EXEC_GEN7_SOL_RESET) {
2350         err = i915_reset_gen7_sol_offsets(rq);
2351         if (err)
2352             return err;
2353     }
2354
2355     /*
2356      * After we completed waiting for other engines (using HW semaphores)
2357      * then we can signal that this request/batch is ready to run. This
2358      * allows us to determine if the batch is still waiting on the GPU
2359      * or actually running by checking the breadcrumb.
2360      */
2361     if (rq->context->engine->emit_init_breadcrumb) {
2362         err = rq->context->engine->emit_init_breadcrumb(rq);
2363         if (err)
2364             return err;
2365     }
2366
2367     err = rq->context->engine->emit_bb_start(rq,
2368                          batch->node.start +
2369                          eb->batch_start_offset,
2370                          batch_len,
2371                          eb->batch_flags);
2372     if (err)
2373         return err;
2374
2375     if (eb->trampoline) {
2376         GEM_BUG_ON(intel_context_is_parallel(rq->context));
2377         GEM_BUG_ON(eb->batch_start_offset);
2378         err = rq->context->engine->emit_bb_start(rq,
2379                              eb->trampoline->node.start +
2380                              batch_len, 0, 0);
2381         if (err)
2382             return err;
2383     }
2384
2385     return 0;
2386 }
2387
2388 static int eb_submit(struct i915_execbuffer *eb)
2389 {
2390     unsigned int i;
2391     int err;
2392
2393     err = eb_move_to_gpu(eb);
2394
2395     for_each_batch_create_order(eb, i) {
2396         if (!eb->requests[i])
2397             break;
2398
2399         trace_i915_request_queue(eb->requests[i], eb->batch_flags);
2400         if (!err)
2401             err = eb_request_submit(eb, eb->requests[i],
2402                         eb->batches[i]->vma,
2403                         eb->batch_len[i]);
2404     }
2405
2406     return err;
2407 }
2408
2409 static int num_vcs_engines(struct drm_i915_private *i915)
2410 {
2411     return hweight_long(VDBOX_MASK(to_gt(i915)));
2412 }
2413
2414 /*
2415  * Find one BSD ring to dispatch the corresponding BSD command.
2416  * The engine index is returned.
2417  */
2418 static unsigned int
2419 gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv,
2420              struct drm_file *file)
2421 {
2422     struct drm_i915_file_private *file_priv = file->driver_priv;
2423
2424     /* Check whether the file_priv has already selected one ring. */
2425     if ((int)file_priv->bsd_engine < 0)
2426         file_priv->bsd_engine =
2427             get_random_int() % num_vcs_engines(dev_priv);
2428
2429     return file_priv->bsd_engine;
2430 }
2431
2432 static const enum intel_engine_id user_ring_map[] = {
2433     [I915_EXEC_DEFAULT] = RCS0,
2434     [I915_EXEC_RENDER]  = RCS0,
2435     [I915_EXEC_BLT]     = BCS0,
2436     [I915_EXEC_BSD]     = VCS0,
2437     [I915_EXEC_VEBOX]   = VECS0
2438 };
2439
2440 static struct i915_request *eb_throttle(struct i915_execbuffer *eb, struct intel_context *ce)
2441 {
2442     struct intel_ring *ring = ce->ring;
2443     struct intel_timeline *tl = ce->timeline;
2444     struct i915_request *rq;
2445
2446     /*
2447      * Completely unscientific finger-in-the-air estimates for suitable
2448      * maximum user request size (to avoid blocking) and then backoff.
2449      */
2450     if (intel_ring_update_space(ring) >= PAGE_SIZE)
2451         return NULL;
2452
2453     /*
2454      * Find a request that after waiting upon, there will be at least half
2455      * the ring available. The hysteresis allows us to compete for the
2456      * shared ring and should mean that we sleep less often prior to
2457      * claiming our resources, but not so long that the ring completely
2458      * drains before we can submit our next request.
2459      */
2460     list_for_each_entry(rq, &tl->requests, link) {
2461         if (rq->ring != ring)
2462             continue;
2463
2464         if (__intel_ring_space(rq->postfix,
2465                        ring->emit, ring->size) > ring->size / 2)
2466             break;
2467     }
2468     if (&rq->link == &tl->requests)
2469         return NULL; /* weird, we will check again later for real */
2470
2471     return i915_request_get(rq);
2472 }
2473
2474 static int eb_pin_timeline(struct i915_execbuffer *eb, struct intel_context *ce,
2475                bool throttle)
2476 {
2477     struct intel_timeline *tl;
2478     struct i915_request *rq = NULL;
2479
2480     /*
2481      * Take a local wakeref for preparing to dispatch the execbuf as
2482      * we expect to access the hardware fairly frequently in the
2483      * process, and require the engine to be kept awake between accesses.
2484      * Upon dispatch, we acquire another prolonged wakeref that we hold
2485      * until the timeline is idle, which in turn releases the wakeref
2486      * taken on the engine, and the parent device.
2487      */
2488     tl = intel_context_timeline_lock(ce);
2489     if (IS_ERR(tl))
2490         return PTR_ERR(tl);
2491
2492     intel_context_enter(ce);
2493     if (throttle)
2494         rq = eb_throttle(eb, ce);
2495     intel_context_timeline_unlock(tl);
2496
2497     if (rq) {
2498         bool nonblock = eb->file->filp->f_flags & O_NONBLOCK;
2499         long timeout = nonblock ? 0 : MAX_SCHEDULE_TIMEOUT;
2500
2501         if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE,
2502                       timeout) < 0) {
2503             i915_request_put(rq);
2504
2505             /*
2506              * Error path, cannot use intel_context_timeline_lock as
2507              * that is user interruptable and this clean up step
2508              * must be done.
2509              */
2510             mutex_lock(&ce->timeline->mutex);
2511             intel_context_exit(ce);
2512             mutex_unlock(&ce->timeline->mutex);
2513
2514             if (nonblock)
2515                 return -EWOULDBLOCK;
2516             else
2517                 return -EINTR;
2518         }
2519         i915_request_put(rq);
2520     }
2521
2522     return 0;
2523 }
2524
2525 static int eb_pin_engine(struct i915_execbuffer *eb, bool throttle)
2526 {
2527     struct intel_context *ce = eb->context, *child;
2528     int err;
2529     int i = 0, j = 0;
2530
2531     GEM_BUG_ON(eb->args->flags & __EXEC_ENGINE_PINNED);
2532
2533     if (unlikely(intel_context_is_banned(ce)))
2534         return -EIO;
2535
2536     /*
2537      * Pinning the contexts may generate requests in order to acquire
2538      * GGTT space, so do this first before we reserve a seqno for
2539      * ourselves.
2540      */
2541     err = intel_context_pin_ww(ce, &eb->ww);
2542     if (err)
2543         return err;
2544     for_each_child(ce, child) {
2545         err = intel_context_pin_ww(child, &eb->ww);
2546         GEM_BUG_ON(err);    /* perma-pinned should incr a counter */
2547     }
2548
2549     for_each_child(ce, child) {
2550         err = eb_pin_timeline(eb, child, throttle);
2551         if (err)
2552             goto unwind;
2553         ++i;
2554     }
2555     err = eb_pin_timeline(eb, ce, throttle);
2556     if (err)
2557         goto unwind;
2558
2559     eb->args->flags |= __EXEC_ENGINE_PINNED;
2560     return 0;
2561
2562 unwind:
2563     for_each_child(ce, child) {
2564         if (j++ < i) {
2565             mutex_lock(&child->timeline->mutex);
2566             intel_context_exit(child);
2567             mutex_unlock(&child->timeline->mutex);
2568         }
2569     }
2570     for_each_child(ce, child)
2571         intel_context_unpin(child);
2572     intel_context_unpin(ce);
2573     return err;
2574 }
2575
2576 static void eb_unpin_engine(struct i915_execbuffer *eb)
2577 {
2578     struct intel_context *ce = eb->context, *child;
2579
2580     if (!(eb->args->flags & __EXEC_ENGINE_PINNED))
2581         return;
2582
2583     eb->args->flags &= ~__EXEC_ENGINE_PINNED;
2584
2585     for_each_child(ce, child) {
2586         mutex_lock(&child->timeline->mutex);
2587         intel_context_exit(child);
2588         mutex_unlock(&child->timeline->mutex);
2589
2590         intel_context_unpin(child);
2591     }
2592
2593     mutex_lock(&ce->timeline->mutex);
2594     intel_context_exit(ce);
2595     mutex_unlock(&ce->timeline->mutex);
2596
2597     intel_context_unpin(ce);
2598 }
2599
2600 static unsigned int
2601 eb_select_legacy_ring(struct i915_execbuffer *eb)
2602 {
2603     struct drm_i915_private *i915 = eb->i915;
2604     struct drm_i915_gem_execbuffer2 *args = eb->args;
2605     unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
2606
2607     if (user_ring_id != I915_EXEC_BSD &&
2608         (args->flags & I915_EXEC_BSD_MASK)) {
2609         drm_dbg(&i915->drm,
2610             "execbuf with non bsd ring but with invalid "
2611             "bsd dispatch flags: %d\n", (int)(args->flags));
2612         return -1;
2613     }
2614
2615     if (user_ring_id == I915_EXEC_BSD && num_vcs_engines(i915) > 1) {
2616         unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
2617
2618         if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
2619             bsd_idx = gen8_dispatch_bsd_engine(i915, eb->file);
2620         } else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
2621                bsd_idx <= I915_EXEC_BSD_RING2) {
2622             bsd_idx >>= I915_EXEC_BSD_SHIFT;
2623             bsd_idx--;
2624         } else {
2625             drm_dbg(&i915->drm,
2626                 "execbuf with unknown bsd ring: %u\n",
2627                 bsd_idx);
2628             return -1;
2629         }
2630
2631         return _VCS(bsd_idx);
2632     }
2633
2634     if (user_ring_id >= ARRAY_SIZE(user_ring_map)) {
2635         drm_dbg(&i915->drm, "execbuf with unknown ring: %u\n",
2636             user_ring_id);
2637         return -1;
2638     }
2639
2640     return user_ring_map[user_ring_id];
2641 }
2642
2643 static int
2644 eb_select_engine(struct i915_execbuffer *eb)
2645 {
2646     struct intel_context *ce, *child;
2647     unsigned int idx;
2648     int err;
2649
2650     if (i915_gem_context_user_engines(eb->gem_context))
2651         idx = eb->args->flags & I915_EXEC_RING_MASK;
2652     else
2653         idx = eb_select_legacy_ring(eb);
2654
2655     ce = i915_gem_context_get_engine(eb->gem_context, idx);
2656     if (IS_ERR(ce))
2657         return PTR_ERR(ce);
2658
2659     if (intel_context_is_parallel(ce)) {
2660         if (eb->buffer_count < ce->parallel.number_children + 1) {
2661             intel_context_put(ce);
2662             return -EINVAL;
2663         }
2664         if (eb->batch_start_offset || eb->args->batch_len) {
2665             intel_context_put(ce);
2666             return -EINVAL;
2667         }
2668     }
2669     eb->num_batches = ce->parallel.number_children + 1;
2670
2671     for_each_child(ce, child)
2672         intel_context_get(child);
2673     intel_gt_pm_get(ce->engine->gt);
2674
2675     if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
2676         err = intel_context_alloc_state(ce);
2677         if (err)
2678             goto err;
2679     }
2680     for_each_child(ce, child) {
2681         if (!test_bit(CONTEXT_ALLOC_BIT, &child->flags)) {
2682             err = intel_context_alloc_state(child);
2683             if (err)
2684                 goto err;
2685         }
2686     }
2687
2688     /*
2689      * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
2690      * EIO if the GPU is already wedged.
2691      */
2692     err = intel_gt_terminally_wedged(ce->engine->gt);
2693     if (err)
2694         goto err;
2695
2696     if (!i915_vm_tryget(ce->vm)) {
2697         err = -ENOENT;
2698         goto err;
2699     }
2700
2701     eb->context = ce;
2702     eb->gt = ce->engine->gt;
2703
2704     /*
2705      * Make sure engine pool stays alive even if we call intel_context_put
2706      * during ww handling. The pool is destroyed when last pm reference
2707      * is dropped, which breaks our -EDEADLK handling.
2708      */
2709     return err;
2710
2711 err:
2712     intel_gt_pm_put(ce->engine->gt);
2713     for_each_child(ce, child)
2714         intel_context_put(child);
2715     intel_context_put(ce);
2716     return err;
2717 }
2718
2719 static void
2720 eb_put_engine(struct i915_execbuffer *eb)
2721 {
2722     struct intel_context *child;
2723
2724     i915_vm_put(eb->context->vm);
2725     intel_gt_pm_put(eb->gt);
2726     for_each_child(eb->context, child)
2727         intel_context_put(child);
2728     intel_context_put(eb->context);
2729 }
2730
2731 static void
2732 __free_fence_array(struct eb_fence *fences, unsigned int n)
2733 {
2734     while (n--) {
2735         drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2));
2736         dma_fence_put(fences[n].dma_fence);
2737         dma_fence_chain_free(fences[n].chain_fence);
2738     }
2739     kvfree(fences);
2740 }
2741
2742 static int
2743 add_timeline_fence_array(struct i915_execbuffer *eb,
2744              const struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences)
2745 {
2746     struct drm_i915_gem_exec_fence __user *user_fences;
2747     u64 __user *user_values;
2748     struct eb_fence *f;
2749     u64 nfences;
2750     int err = 0;
2751
2752     nfences = timeline_fences->fence_count;
2753     if (!nfences)
2754         return 0;
2755
2756     /* Check multiplication overflow for access_ok() and kvmalloc_array() */
2757     BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
2758     if (nfences > min_t(unsigned long,
2759                 ULONG_MAX / sizeof(*user_fences),
2760                 SIZE_MAX / sizeof(*f)) - eb->num_fences)
2761         return -EINVAL;
2762
2763     user_fences = u64_to_user_ptr(timeline_fences->handles_ptr);
2764     if (!access_ok(user_fences, nfences * sizeof(*user_fences)))
2765         return -EFAULT;
2766
2767     user_values = u64_to_user_ptr(timeline_fences->values_ptr);
2768     if (!access_ok(user_values, nfences * sizeof(*user_values)))
2769         return -EFAULT;
2770
2771     f = krealloc(eb->fences,
2772              (eb->num_fences + nfences) * sizeof(*f),
2773              __GFP_NOWARN | GFP_KERNEL);
2774     if (!f)
2775         return -ENOMEM;
2776
2777     eb->fences = f;
2778     f += eb->num_fences;
2779
2780     BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
2781              ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
2782
2783     while (nfences--) {
2784         struct drm_i915_gem_exec_fence user_fence;
2785         struct drm_syncobj *syncobj;
2786         struct dma_fence *fence = NULL;
2787         u64 point;
2788
2789         if (__copy_from_user(&user_fence,
2790                      user_fences++,
2791                      sizeof(user_fence)))
2792             return -EFAULT;
2793
2794         if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS)
2795             return -EINVAL;
2796
2797         if (__get_user(point, user_values++))
2798             return -EFAULT;
2799
2800         syncobj = drm_syncobj_find(eb->file, user_fence.handle);
2801         if (!syncobj) {
2802             DRM_DEBUG("Invalid syncobj handle provided\n");
2803             return -ENOENT;
2804         }
2805
2806         fence = drm_syncobj_fence_get(syncobj);
2807
2808         if (!fence && user_fence.flags &&
2809             !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {
2810             DRM_DEBUG("Syncobj handle has no fence\n");
2811             drm_syncobj_put(syncobj);
2812             return -EINVAL;
2813         }
2814
2815         if (fence)
2816             err = dma_fence_chain_find_seqno(&fence, point);
2817
2818         if (err && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {
2819             DRM_DEBUG("Syncobj handle missing requested point %llu\n", point);
2820             dma_fence_put(fence);
2821             drm_syncobj_put(syncobj);
2822             return err;
2823         }
2824
2825         /*
2826          * A point might have been signaled already and
2827          * garbage collected from the timeline. In this case
2828          * just ignore the point and carry on.
2829          */
2830         if (!fence && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {
2831             drm_syncobj_put(syncobj);
2832             continue;
2833         }
2834
2835         /*
2836          * For timeline syncobjs we need to preallocate chains for
2837          * later signaling.
2838          */
2839         if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) {
2840             /*
2841              * Waiting and signaling the same point (when point !=
2842              * 0) would break the timeline.
2843              */
2844             if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
2845                 DRM_DEBUG("Trying to wait & signal the same timeline point.\n");
2846                 dma_fence_put(fence);
2847                 drm_syncobj_put(syncobj);
2848                 return -EINVAL;
2849             }
2850
2851             f->chain_fence = dma_fence_chain_alloc();
2852             if (!f->chain_fence) {
2853                 drm_syncobj_put(syncobj);
2854                 dma_fence_put(fence);
2855                 return -ENOMEM;
2856             }
2857         } else {
2858             f->chain_fence = NULL;
2859         }
2860
2861         f->syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2);
2862         f->dma_fence = fence;
2863         f->value = point;
2864         f++;
2865         eb->num_fences++;
2866     }
2867
2868     return 0;
2869 }
2870
2871 static int add_fence_array(struct i915_execbuffer *eb)
2872 {
2873     struct drm_i915_gem_execbuffer2 *args = eb->args;
2874     struct drm_i915_gem_exec_fence __user *user;
2875     unsigned long num_fences = args->num_cliprects;
2876     struct eb_fence *f;
2877
2878     if (!(args->flags & I915_EXEC_FENCE_ARRAY))
2879         return 0;
2880
2881     if (!num_fences)
2882         return 0;
2883
2884     /* Check multiplication overflow for access_ok() and kvmalloc_array() */
2885     BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
2886     if (num_fences > min_t(unsigned long,
2887                    ULONG_MAX / sizeof(*user),
2888                    SIZE_MAX / sizeof(*f) - eb->num_fences))
2889         return -EINVAL;
2890
2891     user = u64_to_user_ptr(args->cliprects_ptr);
2892     if (!access_ok(user, num_fences * sizeof(*user)))
2893         return -EFAULT;
2894
2895     f = krealloc(eb->fences,
2896              (eb->num_fences + num_fences) * sizeof(*f),
2897              __GFP_NOWARN | GFP_KERNEL);
2898     if (!f)
2899         return -ENOMEM;
2900
2901     eb->fences = f;
2902     f += eb->num_fences;
2903     while (num_fences--) {
2904         struct drm_i915_gem_exec_fence user_fence;
2905         struct drm_syncobj *syncobj;
2906         struct dma_fence *fence = NULL;
2907
2908         if (__copy_from_user(&user_fence, user++, sizeof(user_fence)))
2909             return -EFAULT;
2910
2911         if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS)
2912             return -EINVAL;
2913
2914         syncobj = drm_syncobj_find(eb->file, user_fence.handle);
2915         if (!syncobj) {
2916             DRM_DEBUG("Invalid syncobj handle provided\n");
2917             return -ENOENT;
2918         }
2919
2920         if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
2921             fence = drm_syncobj_fence_get(syncobj);
2922             if (!fence) {
2923                 DRM_DEBUG("Syncobj handle has no fence\n");
2924                 drm_syncobj_put(syncobj);
2925                 return -EINVAL;
2926             }
2927         }
2928
2929         BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
2930                  ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
2931
2932         f->syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2);
2933         f->dma_fence = fence;
2934         f->value = 0;
2935         f->chain_fence = NULL;
2936         f++;
2937         eb->num_fences++;
2938     }
2939
2940     return 0;
2941 }
2942
2943 static void put_fence_array(struct eb_fence *fences, int num_fences)
2944 {
2945     if (fences)
2946         __free_fence_array(fences, num_fences);
2947 }
2948
2949 static int
2950 await_fence_array(struct i915_execbuffer *eb,
2951           struct i915_request *rq)
2952 {
2953     unsigned int n;
2954     int err;
2955
2956     for (n = 0; n < eb->num_fences; n++) {
2957         struct drm_syncobj *syncobj;
2958         unsigned int flags;
2959
2960         syncobj = ptr_unpack_bits(eb->fences[n].syncobj, &flags, 2);
2961
2962         if (!eb->fences[n].dma_fence)
2963             continue;
2964
2965         err = i915_request_await_dma_fence(rq, eb->fences[n].dma_fence);
2966         if (err < 0)
2967             return err;
2968     }
2969
2970     return 0;
2971 }
2972
2973 static void signal_fence_array(const struct i915_execbuffer *eb,
2974                    struct dma_fence * const fence)
2975 {
2976     unsigned int n;
2977
2978     for (n = 0; n < eb->num_fences; n++) {
2979         struct drm_syncobj *syncobj;
2980         unsigned int flags;
2981
2982         syncobj = ptr_unpack_bits(eb->fences[n].syncobj, &flags, 2);
2983         if (!(flags & I915_EXEC_FENCE_SIGNAL))
2984             continue;
2985
2986         if (eb->fences[n].chain_fence) {
2987             drm_syncobj_add_point(syncobj,
2988                           eb->fences[n].chain_fence,
2989                           fence,
2990                           eb->fences[n].value);
2991             /*
2992              * The chain's ownership is transferred to the
2993              * timeline.
2994              */
2995             eb->fences[n].chain_fence = NULL;
2996         } else {
2997             drm_syncobj_replace_fence(syncobj, fence);
2998         }
2999     }
3000 }
3001
3002 static int
3003 parse_timeline_fences(struct i915_user_extension __user *ext, void *data)
3004 {
3005     struct i915_execbuffer *eb = data;
3006     struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
3007
3008     if (copy_from_user(&timeline_fences, ext, sizeof(timeline_fences)))
3009         return -EFAULT;
3010
3011     return add_timeline_fence_array(eb, &timeline_fences);
3012 }
3013
3014 static void retire_requests(struct intel_timeline *tl, struct i915_request *end)
3015 {
3016     struct i915_request *rq, *rn;
3017
3018     list_for_each_entry_safe(rq, rn, &tl->requests, link)
3019         if (rq == end || !i915_request_retire(rq))
3020             break;
3021 }
3022
3023 static int eb_request_add(struct i915_execbuffer *eb, struct i915_request *rq,
3024               int err, bool last_parallel)
3025 {
3026     struct intel_timeline * const tl = i915_request_timeline(rq);
3027     struct i915_sched_attr attr = {};
3028     struct i915_request *prev;
3029
3030     lockdep_assert_held(&tl->mutex);
3031     lockdep_unpin_lock(&tl->mutex, rq->cookie);
3032
3033     trace_i915_request_add(rq);
3034
3035     prev = __i915_request_commit(rq);
3036
3037     /* Check that the context wasn't destroyed before submission */
3038     if (likely(!intel_context_is_closed(eb->context))) {
3039         attr = eb->gem_context->sched;
3040     } else {
3041         /* Serialise with context_close via the add_to_timeline */
3042         i915_request_set_error_once(rq, -ENOENT);
3043         __i915_request_skip(rq);
3044         err = -ENOENT; /* override any transient errors */
3045     }
3046
3047     if (intel_context_is_parallel(eb->context)) {
3048         if (err) {
3049             __i915_request_skip(rq);
3050             set_bit(I915_FENCE_FLAG_SKIP_PARALLEL,
3051                 &rq->fence.flags);
3052         }
3053         if (last_parallel)
3054             set_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL,
3055                 &rq->fence.flags);
3056     }
3057
3058     __i915_request_queue(rq, &attr);
3059
3060     /* Try to clean up the client's timeline after submitting the request */
3061     if (prev)
3062         retire_requests(tl, prev);
3063
3064     mutex_unlock(&tl->mutex);
3065
3066     return err;
3067 }
3068
3069 static int eb_requests_add(struct i915_execbuffer *eb, int err)
3070 {
3071     int i;
3072
3073     /*
3074      * We iterate in reverse order of creation to release timeline mutexes in
3075      * same order.
3076      */
3077     for_each_batch_add_order(eb, i) {
3078         struct i915_request *rq = eb->requests[i];
3079
3080         if (!rq)
3081             continue;
3082         err |= eb_request_add(eb, rq, err, i == 0);
3083     }
3084
3085     return err;
3086 }
3087
3088 static const i915_user_extension_fn execbuf_extensions[] = {
3089     [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences,
3090 };
3091
3092 static int
3093 parse_execbuf2_extensions(struct drm_i915_gem_execbuffer2 *args,
3094               struct i915_execbuffer *eb)
3095 {
3096     if (!(args->flags & I915_EXEC_USE_EXTENSIONS))
3097         return 0;
3098
3099     /* The execbuf2 extension mechanism reuses cliprects_ptr. So we cannot
3100      * have another flag also using it at the same time.
3101      */
3102     if (eb->args->flags & I915_EXEC_FENCE_ARRAY)
3103         return -EINVAL;
3104
3105     if (args->num_cliprects != 0)
3106         return -EINVAL;
3107
3108     return i915_user_extensions(u64_to_user_ptr(args->cliprects_ptr),
3109                     execbuf_extensions,
3110                     ARRAY_SIZE(execbuf_extensions),
3111                     eb);
3112 }
3113
3114 static void eb_requests_get(struct i915_execbuffer *eb)
3115 {
3116     unsigned int i;
3117
3118     for_each_batch_create_order(eb, i) {
3119         if (!eb->requests[i])
3120             break;
3121
3122         i915_request_get(eb->requests[i]);
3123     }
3124 }
3125
3126 static void eb_requests_put(struct i915_execbuffer *eb)
3127 {
3128     unsigned int i;
3129
3130     for_each_batch_create_order(eb, i) {
3131         if (!eb->requests[i])
3132             break;
3133
3134         i915_request_put(eb->requests[i]);
3135     }
3136 }
3137
3138 static struct sync_file *
3139 eb_composite_fence_create(struct i915_execbuffer *eb, int out_fence_fd)
3140 {
3141     struct sync_file *out_fence = NULL;
3142     struct dma_fence_array *fence_array;
3143     struct dma_fence **fences;
3144     unsigned int i;
3145
3146     GEM_BUG_ON(!intel_context_is_parent(eb->context));
3147
3148     fences = kmalloc_array(eb->num_batches, sizeof(*fences), GFP_KERNEL);
3149     if (!fences)
3150         return ERR_PTR(-ENOMEM);
3151
3152     for_each_batch_create_order(eb, i) {
3153         fences[i] = &eb->requests[i]->fence;
3154         __set_bit(I915_FENCE_FLAG_COMPOSITE,
3155               &eb->requests[i]->fence.flags);
3156     }
3157
3158     fence_array = dma_fence_array_create(eb->num_batches,
3159                          fences,
3160                          eb->context->parallel.fence_context,
3161                          eb->context->parallel.seqno++,
3162                          false);
3163     if (!fence_array) {
3164         kfree(fences);
3165         return ERR_PTR(-ENOMEM);
3166     }
3167
3168     /* Move ownership to the dma_fence_array created above */
3169     for_each_batch_create_order(eb, i)
3170         dma_fence_get(fences[i]);
3171
3172     if (out_fence_fd != -1) {
3173         out_fence = sync_file_create(&fence_array->base);
3174         /* sync_file now owns fence_arry, drop creation ref */
3175         dma_fence_put(&fence_array->base);
3176         if (!out_fence)
3177             return ERR_PTR(-ENOMEM);
3178     }
3179
3180     eb->composite_fence = &fence_array->base;
3181
3182     return out_fence;
3183 }
3184
3185 static struct sync_file *
3186 eb_fences_add(struct i915_execbuffer *eb, struct i915_request *rq,
3187           struct dma_fence *in_fence, int out_fence_fd)
3188 {
3189     struct sync_file *out_fence = NULL;
3190     int err;
3191
3192     if (unlikely(eb->gem_context->syncobj)) {
3193         struct dma_fence *fence;
3194
3195         fence = drm_syncobj_fence_get(eb->gem_context->syncobj);
3196         err = i915_request_await_dma_fence(rq, fence);
3197         dma_fence_put(fence);
3198         if (err)
3199             return ERR_PTR(err);
3200     }
3201
3202     if (in_fence) {
3203         if (eb->args->flags & I915_EXEC_FENCE_SUBMIT)
3204             err = i915_request_await_execution(rq, in_fence);
3205         else
3206             err = i915_request_await_dma_fence(rq, in_fence);
3207         if (err < 0)
3208             return ERR_PTR(err);
3209     }
3210
3211     if (eb->fences) {
3212         err = await_fence_array(eb, rq);
3213         if (err)
3214             return ERR_PTR(err);
3215     }
3216
3217     if (intel_context_is_parallel(eb->context)) {
3218         out_fence = eb_composite_fence_create(eb, out_fence_fd);
3219         if (IS_ERR(out_fence))
3220             return ERR_PTR(-ENOMEM);
3221     } else if (out_fence_fd != -1) {
3222         out_fence = sync_file_create(&rq->fence);
3223         if (!out_fence)
3224             return ERR_PTR(-ENOMEM);
3225     }
3226
3227     return out_fence;
3228 }
3229
3230 static struct intel_context *
3231 eb_find_context(struct i915_execbuffer *eb, unsigned int context_number)
3232 {
3233     struct intel_context *child;
3234
3235     if (likely(context_number == 0))
3236         return eb->context;
3237
3238     for_each_child(eb->context, child)
3239         if (!--context_number)
3240             return child;
3241
3242     GEM_BUG_ON("Context not found");
3243
3244     return NULL;
3245 }
3246
3247 static struct sync_file *
3248 eb_requests_create(struct i915_execbuffer *eb, struct dma_fence *in_fence,
3249            int out_fence_fd)
3250 {
3251     struct sync_file *out_fence = NULL;
3252     unsigned int i;
3253
3254     for_each_batch_create_order(eb, i) {
3255         /* Allocate a request for this batch buffer nice and early. */
3256         eb->requests[i] = i915_request_create(eb_find_context(eb, i));
3257         if (IS_ERR(eb->requests[i])) {
3258             out_fence = ERR_CAST(eb->requests[i]);
3259             eb->requests[i] = NULL;
3260             return out_fence;
3261         }
3262
3263         /*
3264          * Only the first request added (committed to backend) has to
3265          * take the in fences into account as all subsequent requests
3266          * will have fences inserted inbetween them.
3267          */
3268         if (i + 1 == eb->num_batches) {
3269             out_fence = eb_fences_add(eb, eb->requests[i],
3270                           in_fence, out_fence_fd);
3271             if (IS_ERR(out_fence))
3272                 return out_fence;
3273         }
3274
3275         /*
3276          * Not really on stack, but we don't want to call
3277          * kfree on the batch_snapshot when we put it, so use the
3278          * _onstack interface.
3279          */
3280         if (eb->batches[i]->vma)
3281             eb->requests[i]->batch_res =
3282                 i915_vma_resource_get(eb->batches[i]->vma->resource);
3283         if (eb->batch_pool) {
3284             GEM_BUG_ON(intel_context_is_parallel(eb->context));
3285             intel_gt_buffer_pool_mark_active(eb->batch_pool,
3286                              eb->requests[i]);
3287         }
3288     }
3289
3290     return out_fence;
3291 }
3292
3293 static int
3294 i915_gem_do_execbuffer(struct drm_device *dev,
3295                struct drm_file *file,
3296                struct drm_i915_gem_execbuffer2 *args,
3297                struct drm_i915_gem_exec_object2 *exec)
3298 {
3299     struct drm_i915_private *i915 = to_i915(dev);
3300     struct i915_execbuffer eb;
3301     struct dma_fence *in_fence = NULL;
3302     struct sync_file *out_fence = NULL;
3303     int out_fence_fd = -1;
3304     int err;
3305
3306     BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS);
3307     BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS &
3308              ~__EXEC_OBJECT_UNKNOWN_FLAGS);
3309
3310     eb.i915 = i915;
3311     eb.file = file;
3312     eb.args = args;
3313     if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC))
3314         args->flags |= __EXEC_HAS_RELOC;
3315
3316     eb.exec = exec;
3317     eb.vma = (struct eb_vma *)(exec + args->buffer_count + 1);
3318     eb.vma[0].vma = NULL;
3319     eb.batch_pool = NULL;
3320
3321     eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
3322     reloc_cache_init(&eb.reloc_cache, eb.i915);
3323
3324     eb.buffer_count = args->buffer_count;
3325     eb.batch_start_offset = args->batch_start_offset;
3326     eb.trampoline = NULL;
3327
3328     eb.fences = NULL;
3329     eb.num_fences = 0;
3330
3331     eb_capture_list_clear(&eb);
3332
3333     memset(eb.requests, 0, sizeof(struct i915_request *) *
3334            ARRAY_SIZE(eb.requests));
3335     eb.composite_fence = NULL;
3336
3337     eb.batch_flags = 0;
3338     if (args->flags & I915_EXEC_SECURE) {
3339         if (GRAPHICS_VER(i915) >= 11)
3340             return -ENODEV;
3341
3342         /* Return -EPERM to trigger fallback code on old binaries. */
3343         if (!HAS_SECURE_BATCHES(i915))
3344             return -EPERM;
3345
3346         if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN))
3347             return -EPERM;
3348
3349         eb.batch_flags |= I915_DISPATCH_SECURE;
3350     }
3351     if (args->flags & I915_EXEC_IS_PINNED)
3352         eb.batch_flags |= I915_DISPATCH_PINNED;
3353
3354     err = parse_execbuf2_extensions(args, &eb);
3355     if (err)
3356         goto err_ext;
3357
3358     err = add_fence_array(&eb);
3359     if (err)
3360         goto err_ext;
3361
3362 #define IN_FENCES (I915_EXEC_FENCE_IN | I915_EXEC_FENCE_SUBMIT)
3363     if (args->flags & IN_FENCES) {
3364         if ((args->flags & IN_FENCES) == IN_FENCES)
3365             return -EINVAL;
3366
3367         in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
3368         if (!in_fence) {
3369             err = -EINVAL;
3370             goto err_ext;
3371         }
3372     }
3373 #undef IN_FENCES
3374
3375     if (args->flags & I915_EXEC_FENCE_OUT) {
3376         out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
3377         if (out_fence_fd < 0) {
3378             err = out_fence_fd;
3379             goto err_in_fence;
3380         }
3381     }
3382
3383     err = eb_create(&eb);
3384     if (err)
3385         goto err_out_fence;
3386
3387     GEM_BUG_ON(!eb.lut_size);
3388
3389     err = eb_select_context(&eb);
3390     if (unlikely(err))
3391         goto err_destroy;
3392
3393     err = eb_select_engine(&eb);
3394     if (unlikely(err))
3395         goto err_context;
3396
3397     err = eb_lookup_vmas(&eb);
3398     if (err) {
3399         eb_release_vmas(&eb, true);
3400         goto err_engine;
3401     }
3402
3403     i915_gem_ww_ctx_init(&eb.ww, true);
3404
3405     err = eb_relocate_parse(&eb);
3406     if (err) {
3407         /*
3408          * If the user expects the execobject.offset and
3409          * reloc.presumed_offset to be an exact match,
3410          * as for using NO_RELOC, then we cannot update
3411          * the execobject.offset until we have completed
3412          * relocation.
3413          */
3414         args->flags &= ~__EXEC_HAS_RELOC;
3415         goto err_vma;
3416     }
3417
3418     ww_acquire_done(&eb.ww.ctx);
3419     err = eb_capture_stage(&eb);
3420     if (err)
3421         goto err_vma;
3422
3423     out_fence = eb_requests_create(&eb, in_fence, out_fence_fd);
3424     if (IS_ERR(out_fence)) {
3425         err = PTR_ERR(out_fence);
3426         out_fence = NULL;
3427         if (eb.requests[0])
3428             goto err_request;
3429         else
3430             goto err_vma;
3431     }
3432
3433     err = eb_submit(&eb);
3434
3435 err_request:
3436     eb_requests_get(&eb);
3437     err = eb_requests_add(&eb, err);
3438
3439     if (eb.fences)
3440         signal_fence_array(&eb, eb.composite_fence ?
3441                    eb.composite_fence :
3442                    &eb.requests[0]->fence);
3443
3444     if (out_fence) {
3445         if (err == 0) {
3446             fd_install(out_fence_fd, out_fence->file);
3447             args->rsvd2 &= GENMASK_ULL(31, 0); /* keep in-fence */
3448             args->rsvd2 |= (u64)out_fence_fd << 32;
3449             out_fence_fd = -1;
3450         } else {
3451             fput(out_fence->file);
3452         }
3453     }
3454
3455     if (unlikely(eb.gem_context->syncobj)) {
3456         drm_syncobj_replace_fence(eb.gem_context->syncobj,
3457                       eb.composite_fence ?
3458                       eb.composite_fence :
3459                       &eb.requests[0]->fence);
3460     }
3461
3462     if (!out_fence && eb.composite_fence)
3463         dma_fence_put(eb.composite_fence);
3464
3465     eb_requests_put(&eb);
3466
3467 err_vma:
3468     eb_release_vmas(&eb, true);
3469     WARN_ON(err == -EDEADLK);
3470     i915_gem_ww_ctx_fini(&eb.ww);
3471
3472     if (eb.batch_pool)
3473         intel_gt_buffer_pool_put(eb.batch_pool);
3474 err_engine:
3475     eb_put_engine(&eb);
3476 err_context:
3477     i915_gem_context_put(eb.gem_context);
3478 err_destroy:
3479     eb_destroy(&eb);
3480 err_out_fence:
3481     if (out_fence_fd != -1)
3482         put_unused_fd(out_fence_fd);
3483 err_in_fence:
3484     dma_fence_put(in_fence);
3485 err_ext:
3486     put_fence_array(eb.fences, eb.num_fences);
3487     return err;
3488 }
3489
3490 static size_t eb_element_size(void)
3491 {
3492     return sizeof(struct drm_i915_gem_exec_object2) + sizeof(struct eb_vma);
3493 }
3494
3495 static bool check_buffer_count(size_t count)
3496 {
3497     const size_t sz = eb_element_size();
3498
3499     /*
3500      * When using LUT_HANDLE, we impose a limit of INT_MAX for the lookup
3501      * array size (see eb_create()). Otherwise, we can accept an array as
3502      * large as can be addressed (though use large arrays at your peril)!
3503      */
3504
3505     return !(count < 1 || count > INT_MAX || count > SIZE_MAX / sz - 1);
3506 }
3507
3508 int
3509 i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
3510                struct drm_file *file)
3511 {
3512     struct drm_i915_private *i915 = to_i915(dev);
3513     struct drm_i915_gem_execbuffer2 *args = data;
3514     struct drm_i915_gem_exec_object2 *exec2_list;
3515     const size_t count = args->buffer_count;
3516     int err;
3517
3518     if (!check_buffer_count(count)) {
3519         drm_dbg(&i915->drm, "execbuf2 with %zd buffers\n", count);
3520         return -EINVAL;
3521     }
3522
3523     err = i915_gem_check_execbuffer(args);
3524     if (err)
3525         return err;
3526
3527     /* Allocate extra slots for use by the command parser */
3528     exec2_list = kvmalloc_array(count + 2, eb_element_size(),
3529                     __GFP_NOWARN | GFP_KERNEL);
3530     if (exec2_list == NULL) {
3531         drm_dbg(&i915->drm, "Failed to allocate exec list for %zd buffers\n",
3532             count);
3533         return -ENOMEM;
3534     }
3535     if (copy_from_user(exec2_list,
3536                u64_to_user_ptr(args->buffers_ptr),
3537                sizeof(*exec2_list) * count)) {
3538         drm_dbg(&i915->drm, "copy %zd exec entries failed\n", count);
3539         kvfree(exec2_list);
3540         return -EFAULT;
3541     }
3542
3543     err = i915_gem_do_execbuffer(dev, file, args, exec2_list);
3544
3545     /*
3546      * Now that we have begun execution of the batchbuffer, we ignore
3547      * any new error after this point. Also given that we have already
3548      * updated the associated relocations, we try to write out the current
3549      * object locations irrespective of any error.
3550      */
3551     if (args->flags & __EXEC_HAS_RELOC) {
3552         struct drm_i915_gem_exec_object2 __user *user_exec_list =
3553             u64_to_user_ptr(args->buffers_ptr);
3554         unsigned int i;
3555
3556         /* Copy the new buffer offsets back to the user's exec list. */
3557         /*
3558          * Note: count * sizeof(*user_exec_list) does not overflow,
3559          * because we checked 'count' in check_buffer_count().
3560          *
3561          * And this range already got effectively checked earlier
3562          * when we did the "copy_from_user()" above.
3563          */
3564         if (!user_write_access_begin(user_exec_list,
3565                          count * sizeof(*user_exec_list)))
3566             goto end;
3567
3568         for (i = 0; i < args->buffer_count; i++) {
3569             if (!(exec2_list[i].offset & UPDATE))
3570                 continue;
3571
3572             exec2_list[i].offset =
3573                 gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK);
3574             unsafe_put_user(exec2_list[i].offset,
3575                     &user_exec_list[i].offset,
3576                     end_user);
3577         }
3578 end_user:
3579         user_write_access_end();
3580 end:;
3581     }
3582
3583     args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS;
3584     kvfree(exec2_list);
3585     return err;
3586 }