Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * SPDX-License-Identifier: MIT
0003  *
0004  * Copyright © 2014-2016 Intel Corporation
0005  */
0006 
0007 #include "display/intel_frontbuffer.h"
0008 #include "gt/intel_gt.h"
0009 
0010 #include "i915_drv.h"
0011 #include "i915_gem_clflush.h"
0012 #include "i915_gem_domain.h"
0013 #include "i915_gem_gtt.h"
0014 #include "i915_gem_ioctls.h"
0015 #include "i915_gem_lmem.h"
0016 #include "i915_gem_mman.h"
0017 #include "i915_gem_object.h"
0018 #include "i915_vma.h"
0019 
0020 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
0021 {
0022     struct drm_i915_private *i915 = to_i915(obj->base.dev);
0023 
0024     if (IS_DGFX(i915))
0025         return false;
0026 
0027     return !(obj->cache_level == I915_CACHE_NONE ||
0028          obj->cache_level == I915_CACHE_WT);
0029 }
0030 
0031 bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
0032 {
0033     struct drm_i915_private *i915 = to_i915(obj->base.dev);
0034 
0035     if (obj->cache_dirty)
0036         return false;
0037 
0038     if (IS_DGFX(i915))
0039         return false;
0040 
0041     if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
0042         return true;
0043 
0044     /* Currently in use by HW (display engine)? Keep flushed. */
0045     return i915_gem_object_is_framebuffer(obj);
0046 }
0047 
0048 static void
0049 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
0050 {
0051     struct i915_vma *vma;
0052 
0053     assert_object_held(obj);
0054 
0055     if (!(obj->write_domain & flush_domains))
0056         return;
0057 
0058     switch (obj->write_domain) {
0059     case I915_GEM_DOMAIN_GTT:
0060         spin_lock(&obj->vma.lock);
0061         for_each_ggtt_vma(vma, obj) {
0062             if (i915_vma_unset_ggtt_write(vma))
0063                 intel_gt_flush_ggtt_writes(vma->vm->gt);
0064         }
0065         spin_unlock(&obj->vma.lock);
0066 
0067         i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
0068         break;
0069 
0070     case I915_GEM_DOMAIN_WC:
0071         wmb();
0072         break;
0073 
0074     case I915_GEM_DOMAIN_CPU:
0075         i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
0076         break;
0077 
0078     case I915_GEM_DOMAIN_RENDER:
0079         if (gpu_write_needs_clflush(obj))
0080             obj->cache_dirty = true;
0081         break;
0082     }
0083 
0084     obj->write_domain = 0;
0085 }
0086 
0087 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
0088 {
0089     /*
0090      * We manually flush the CPU domain so that we can override and
0091      * force the flush for the display, and perform it asyncrhonously.
0092      */
0093     flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
0094     if (obj->cache_dirty)
0095         i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
0096     obj->write_domain = 0;
0097 }
0098 
0099 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
0100 {
0101     if (!i915_gem_object_is_framebuffer(obj))
0102         return;
0103 
0104     i915_gem_object_lock(obj, NULL);
0105     __i915_gem_object_flush_for_display(obj);
0106     i915_gem_object_unlock(obj);
0107 }
0108 
0109 void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj)
0110 {
0111     if (i915_gem_object_is_framebuffer(obj))
0112         __i915_gem_object_flush_for_display(obj);
0113 }
0114 
0115 /**
0116  * Moves a single object to the WC read, and possibly write domain.
0117  * @obj: object to act on
0118  * @write: ask for write access or read only
0119  *
0120  * This function returns when the move is complete, including waiting on
0121  * flushes to occur.
0122  */
0123 int
0124 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
0125 {
0126     int ret;
0127 
0128     assert_object_held(obj);
0129 
0130     ret = i915_gem_object_wait(obj,
0131                    I915_WAIT_INTERRUPTIBLE |
0132                    (write ? I915_WAIT_ALL : 0),
0133                    MAX_SCHEDULE_TIMEOUT);
0134     if (ret)
0135         return ret;
0136 
0137     if (obj->write_domain == I915_GEM_DOMAIN_WC)
0138         return 0;
0139 
0140     /* Flush and acquire obj->pages so that we are coherent through
0141      * direct access in memory with previous cached writes through
0142      * shmemfs and that our cache domain tracking remains valid.
0143      * For example, if the obj->filp was moved to swap without us
0144      * being notified and releasing the pages, we would mistakenly
0145      * continue to assume that the obj remained out of the CPU cached
0146      * domain.
0147      */
0148     ret = i915_gem_object_pin_pages(obj);
0149     if (ret)
0150         return ret;
0151 
0152     flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
0153 
0154     /* Serialise direct access to this object with the barriers for
0155      * coherent writes from the GPU, by effectively invalidating the
0156      * WC domain upon first access.
0157      */
0158     if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
0159         mb();
0160 
0161     /* It should now be out of any other write domains, and we can update
0162      * the domain values for our changes.
0163      */
0164     GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
0165     obj->read_domains |= I915_GEM_DOMAIN_WC;
0166     if (write) {
0167         obj->read_domains = I915_GEM_DOMAIN_WC;
0168         obj->write_domain = I915_GEM_DOMAIN_WC;
0169         obj->mm.dirty = true;
0170     }
0171 
0172     i915_gem_object_unpin_pages(obj);
0173     return 0;
0174 }
0175 
0176 /**
0177  * Moves a single object to the GTT read, and possibly write domain.
0178  * @obj: object to act on
0179  * @write: ask for write access or read only
0180  *
0181  * This function returns when the move is complete, including waiting on
0182  * flushes to occur.
0183  */
0184 int
0185 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
0186 {
0187     int ret;
0188 
0189     assert_object_held(obj);
0190 
0191     ret = i915_gem_object_wait(obj,
0192                    I915_WAIT_INTERRUPTIBLE |
0193                    (write ? I915_WAIT_ALL : 0),
0194                    MAX_SCHEDULE_TIMEOUT);
0195     if (ret)
0196         return ret;
0197 
0198     if (obj->write_domain == I915_GEM_DOMAIN_GTT)
0199         return 0;
0200 
0201     /* Flush and acquire obj->pages so that we are coherent through
0202      * direct access in memory with previous cached writes through
0203      * shmemfs and that our cache domain tracking remains valid.
0204      * For example, if the obj->filp was moved to swap without us
0205      * being notified and releasing the pages, we would mistakenly
0206      * continue to assume that the obj remained out of the CPU cached
0207      * domain.
0208      */
0209     ret = i915_gem_object_pin_pages(obj);
0210     if (ret)
0211         return ret;
0212 
0213     flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
0214 
0215     /* Serialise direct access to this object with the barriers for
0216      * coherent writes from the GPU, by effectively invalidating the
0217      * GTT domain upon first access.
0218      */
0219     if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
0220         mb();
0221 
0222     /* It should now be out of any other write domains, and we can update
0223      * the domain values for our changes.
0224      */
0225     GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
0226     obj->read_domains |= I915_GEM_DOMAIN_GTT;
0227     if (write) {
0228         struct i915_vma *vma;
0229 
0230         obj->read_domains = I915_GEM_DOMAIN_GTT;
0231         obj->write_domain = I915_GEM_DOMAIN_GTT;
0232         obj->mm.dirty = true;
0233 
0234         spin_lock(&obj->vma.lock);
0235         for_each_ggtt_vma(vma, obj)
0236             if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
0237                 i915_vma_set_ggtt_write(vma);
0238         spin_unlock(&obj->vma.lock);
0239     }
0240 
0241     i915_gem_object_unpin_pages(obj);
0242     return 0;
0243 }
0244 
0245 /**
0246  * Changes the cache-level of an object across all VMA.
0247  * @obj: object to act on
0248  * @cache_level: new cache level to set for the object
0249  *
0250  * After this function returns, the object will be in the new cache-level
0251  * across all GTT and the contents of the backing storage will be coherent,
0252  * with respect to the new cache-level. In order to keep the backing storage
0253  * coherent for all users, we only allow a single cache level to be set
0254  * globally on the object and prevent it from being changed whilst the
0255  * hardware is reading from the object. That is if the object is currently
0256  * on the scanout it will be set to uncached (or equivalent display
0257  * cache coherency) and all non-MOCS GPU access will also be uncached so
0258  * that all direct access to the scanout remains coherent.
0259  */
0260 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
0261                     enum i915_cache_level cache_level)
0262 {
0263     int ret;
0264 
0265     if (obj->cache_level == cache_level)
0266         return 0;
0267 
0268     ret = i915_gem_object_wait(obj,
0269                    I915_WAIT_INTERRUPTIBLE |
0270                    I915_WAIT_ALL,
0271                    MAX_SCHEDULE_TIMEOUT);
0272     if (ret)
0273         return ret;
0274 
0275     /* Always invalidate stale cachelines */
0276     if (obj->cache_level != cache_level) {
0277         i915_gem_object_set_cache_coherency(obj, cache_level);
0278         obj->cache_dirty = true;
0279     }
0280 
0281     /* The cache-level will be applied when each vma is rebound. */
0282     return i915_gem_object_unbind(obj,
0283                       I915_GEM_OBJECT_UNBIND_ACTIVE |
0284                       I915_GEM_OBJECT_UNBIND_BARRIER);
0285 }
0286 
0287 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
0288                    struct drm_file *file)
0289 {
0290     struct drm_i915_gem_caching *args = data;
0291     struct drm_i915_gem_object *obj;
0292     int err = 0;
0293 
0294     if (IS_DGFX(to_i915(dev)))
0295         return -ENODEV;
0296 
0297     rcu_read_lock();
0298     obj = i915_gem_object_lookup_rcu(file, args->handle);
0299     if (!obj) {
0300         err = -ENOENT;
0301         goto out;
0302     }
0303 
0304     switch (obj->cache_level) {
0305     case I915_CACHE_LLC:
0306     case I915_CACHE_L3_LLC:
0307         args->caching = I915_CACHING_CACHED;
0308         break;
0309 
0310     case I915_CACHE_WT:
0311         args->caching = I915_CACHING_DISPLAY;
0312         break;
0313 
0314     default:
0315         args->caching = I915_CACHING_NONE;
0316         break;
0317     }
0318 out:
0319     rcu_read_unlock();
0320     return err;
0321 }
0322 
0323 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
0324                    struct drm_file *file)
0325 {
0326     struct drm_i915_private *i915 = to_i915(dev);
0327     struct drm_i915_gem_caching *args = data;
0328     struct drm_i915_gem_object *obj;
0329     enum i915_cache_level level;
0330     int ret = 0;
0331 
0332     if (IS_DGFX(i915))
0333         return -ENODEV;
0334 
0335     switch (args->caching) {
0336     case I915_CACHING_NONE:
0337         level = I915_CACHE_NONE;
0338         break;
0339     case I915_CACHING_CACHED:
0340         /*
0341          * Due to a HW issue on BXT A stepping, GPU stores via a
0342          * snooped mapping may leave stale data in a corresponding CPU
0343          * cacheline, whereas normally such cachelines would get
0344          * invalidated.
0345          */
0346         if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
0347             return -ENODEV;
0348 
0349         level = I915_CACHE_LLC;
0350         break;
0351     case I915_CACHING_DISPLAY:
0352         level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
0353         break;
0354     default:
0355         return -EINVAL;
0356     }
0357 
0358     obj = i915_gem_object_lookup(file, args->handle);
0359     if (!obj)
0360         return -ENOENT;
0361 
0362     /*
0363      * The caching mode of proxy object is handled by its generator, and
0364      * not allowed to be changed by userspace.
0365      */
0366     if (i915_gem_object_is_proxy(obj)) {
0367         /*
0368          * Silently allow cached for userptr; the vulkan driver
0369          * sets all objects to cached
0370          */
0371         if (!i915_gem_object_is_userptr(obj) ||
0372             args->caching != I915_CACHING_CACHED)
0373             ret = -ENXIO;
0374 
0375         goto out;
0376     }
0377 
0378     ret = i915_gem_object_lock_interruptible(obj, NULL);
0379     if (ret)
0380         goto out;
0381 
0382     ret = i915_gem_object_set_cache_level(obj, level);
0383     i915_gem_object_unlock(obj);
0384 
0385 out:
0386     i915_gem_object_put(obj);
0387     return ret;
0388 }
0389 
0390 /*
0391  * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
0392  * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
0393  * (for pageflips). We only flush the caches while preparing the buffer for
0394  * display, the callers are responsible for frontbuffer flush.
0395  */
0396 struct i915_vma *
0397 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
0398                      struct i915_gem_ww_ctx *ww,
0399                      u32 alignment,
0400                      const struct i915_ggtt_view *view,
0401                      unsigned int flags)
0402 {
0403     struct drm_i915_private *i915 = to_i915(obj->base.dev);
0404     struct i915_vma *vma;
0405     int ret;
0406 
0407     /* Frame buffer must be in LMEM */
0408     if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj))
0409         return ERR_PTR(-EINVAL);
0410 
0411     /*
0412      * The display engine is not coherent with the LLC cache on gen6.  As
0413      * a result, we make sure that the pinning that is about to occur is
0414      * done with uncached PTEs. This is lowest common denominator for all
0415      * chipsets.
0416      *
0417      * However for gen6+, we could do better by using the GFDT bit instead
0418      * of uncaching, which would allow us to flush all the LLC-cached data
0419      * with that bit in the PTE to main memory with just one PIPE_CONTROL.
0420      */
0421     ret = i915_gem_object_set_cache_level(obj,
0422                           HAS_WT(i915) ?
0423                           I915_CACHE_WT : I915_CACHE_NONE);
0424     if (ret)
0425         return ERR_PTR(ret);
0426 
0427     /*
0428      * As the user may map the buffer once pinned in the display plane
0429      * (e.g. libkms for the bootup splash), we have to ensure that we
0430      * always use map_and_fenceable for all scanout buffers. However,
0431      * it may simply be too big to fit into mappable, in which case
0432      * put it anyway and hope that userspace can cope (but always first
0433      * try to preserve the existing ABI).
0434      */
0435     vma = ERR_PTR(-ENOSPC);
0436     if ((flags & PIN_MAPPABLE) == 0 &&
0437         (!view || view->type == I915_GGTT_VIEW_NORMAL))
0438         vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0, alignment,
0439                           flags | PIN_MAPPABLE |
0440                           PIN_NONBLOCK);
0441     if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK))
0442         vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0,
0443                           alignment, flags);
0444     if (IS_ERR(vma))
0445         return vma;
0446 
0447     vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
0448     i915_vma_mark_scanout(vma);
0449 
0450     i915_gem_object_flush_if_display_locked(obj);
0451 
0452     return vma;
0453 }
0454 
0455 /**
0456  * Moves a single object to the CPU read, and possibly write domain.
0457  * @obj: object to act on
0458  * @write: requesting write or read-only access
0459  *
0460  * This function returns when the move is complete, including waiting on
0461  * flushes to occur.
0462  */
0463 int
0464 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
0465 {
0466     int ret;
0467 
0468     assert_object_held(obj);
0469 
0470     ret = i915_gem_object_wait(obj,
0471                    I915_WAIT_INTERRUPTIBLE |
0472                    (write ? I915_WAIT_ALL : 0),
0473                    MAX_SCHEDULE_TIMEOUT);
0474     if (ret)
0475         return ret;
0476 
0477     flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
0478 
0479     /* Flush the CPU cache if it's still invalid. */
0480     if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
0481         i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
0482         obj->read_domains |= I915_GEM_DOMAIN_CPU;
0483     }
0484 
0485     /* It should now be out of any other write domains, and we can update
0486      * the domain values for our changes.
0487      */
0488     GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
0489 
0490     /* If we're writing through the CPU, then the GPU read domains will
0491      * need to be invalidated at next use.
0492      */
0493     if (write)
0494         __start_cpu_write(obj);
0495 
0496     return 0;
0497 }
0498 
0499 /**
0500  * Called when user space prepares to use an object with the CPU, either
0501  * through the mmap ioctl's mapping or a GTT mapping.
0502  * @dev: drm device
0503  * @data: ioctl data blob
0504  * @file: drm file
0505  */
0506 int
0507 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
0508               struct drm_file *file)
0509 {
0510     struct drm_i915_gem_set_domain *args = data;
0511     struct drm_i915_gem_object *obj;
0512     u32 read_domains = args->read_domains;
0513     u32 write_domain = args->write_domain;
0514     int err;
0515 
0516     if (IS_DGFX(to_i915(dev)))
0517         return -ENODEV;
0518 
0519     /* Only handle setting domains to types used by the CPU. */
0520     if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
0521         return -EINVAL;
0522 
0523     /*
0524      * Having something in the write domain implies it's in the read
0525      * domain, and only that read domain.  Enforce that in the request.
0526      */
0527     if (write_domain && read_domains != write_domain)
0528         return -EINVAL;
0529 
0530     if (!read_domains)
0531         return 0;
0532 
0533     obj = i915_gem_object_lookup(file, args->handle);
0534     if (!obj)
0535         return -ENOENT;
0536 
0537     /*
0538      * Try to flush the object off the GPU without holding the lock.
0539      * We will repeat the flush holding the lock in the normal manner
0540      * to catch cases where we are gazumped.
0541      */
0542     err = i915_gem_object_wait(obj,
0543                    I915_WAIT_INTERRUPTIBLE |
0544                    I915_WAIT_PRIORITY |
0545                    (write_domain ? I915_WAIT_ALL : 0),
0546                    MAX_SCHEDULE_TIMEOUT);
0547     if (err)
0548         goto out;
0549 
0550     if (i915_gem_object_is_userptr(obj)) {
0551         /*
0552          * Try to grab userptr pages, iris uses set_domain to check
0553          * userptr validity
0554          */
0555         err = i915_gem_object_userptr_validate(obj);
0556         if (!err)
0557             err = i915_gem_object_wait(obj,
0558                            I915_WAIT_INTERRUPTIBLE |
0559                            I915_WAIT_PRIORITY |
0560                            (write_domain ? I915_WAIT_ALL : 0),
0561                            MAX_SCHEDULE_TIMEOUT);
0562         goto out;
0563     }
0564 
0565     /*
0566      * Proxy objects do not control access to the backing storage, ergo
0567      * they cannot be used as a means to manipulate the cache domain
0568      * tracking for that backing storage. The proxy object is always
0569      * considered to be outside of any cache domain.
0570      */
0571     if (i915_gem_object_is_proxy(obj)) {
0572         err = -ENXIO;
0573         goto out;
0574     }
0575 
0576     err = i915_gem_object_lock_interruptible(obj, NULL);
0577     if (err)
0578         goto out;
0579 
0580     /*
0581      * Flush and acquire obj->pages so that we are coherent through
0582      * direct access in memory with previous cached writes through
0583      * shmemfs and that our cache domain tracking remains valid.
0584      * For example, if the obj->filp was moved to swap without us
0585      * being notified and releasing the pages, we would mistakenly
0586      * continue to assume that the obj remained out of the CPU cached
0587      * domain.
0588      */
0589     err = i915_gem_object_pin_pages(obj);
0590     if (err)
0591         goto out_unlock;
0592 
0593     /*
0594      * Already in the desired write domain? Nothing for us to do!
0595      *
0596      * We apply a little bit of cunning here to catch a broader set of
0597      * no-ops. If obj->write_domain is set, we must be in the same
0598      * obj->read_domains, and only that domain. Therefore, if that
0599      * obj->write_domain matches the request read_domains, we are
0600      * already in the same read/write domain and can skip the operation,
0601      * without having to further check the requested write_domain.
0602      */
0603     if (READ_ONCE(obj->write_domain) == read_domains)
0604         goto out_unpin;
0605 
0606     if (read_domains & I915_GEM_DOMAIN_WC)
0607         err = i915_gem_object_set_to_wc_domain(obj, write_domain);
0608     else if (read_domains & I915_GEM_DOMAIN_GTT)
0609         err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
0610     else
0611         err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
0612 
0613 out_unpin:
0614     i915_gem_object_unpin_pages(obj);
0615 
0616 out_unlock:
0617     i915_gem_object_unlock(obj);
0618 
0619     if (!err && write_domain)
0620         i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
0621 
0622 out:
0623     i915_gem_object_put(obj);
0624     return err;
0625 }
0626 
0627 /*
0628  * Pins the specified object's pages and synchronizes the object with
0629  * GPU accesses. Sets needs_clflush to non-zero if the caller should
0630  * flush the object from the CPU cache.
0631  */
0632 int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
0633                  unsigned int *needs_clflush)
0634 {
0635     int ret;
0636 
0637     *needs_clflush = 0;
0638     if (!i915_gem_object_has_struct_page(obj))
0639         return -ENODEV;
0640 
0641     assert_object_held(obj);
0642 
0643     ret = i915_gem_object_wait(obj,
0644                    I915_WAIT_INTERRUPTIBLE,
0645                    MAX_SCHEDULE_TIMEOUT);
0646     if (ret)
0647         return ret;
0648 
0649     ret = i915_gem_object_pin_pages(obj);
0650     if (ret)
0651         return ret;
0652 
0653     if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
0654         !static_cpu_has(X86_FEATURE_CLFLUSH)) {
0655         ret = i915_gem_object_set_to_cpu_domain(obj, false);
0656         if (ret)
0657             goto err_unpin;
0658         else
0659             goto out;
0660     }
0661 
0662     flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
0663 
0664     /* If we're not in the cpu read domain, set ourself into the gtt
0665      * read domain and manually flush cachelines (if required). This
0666      * optimizes for the case when the gpu will dirty the data
0667      * anyway again before the next pread happens.
0668      */
0669     if (!obj->cache_dirty &&
0670         !(obj->read_domains & I915_GEM_DOMAIN_CPU))
0671         *needs_clflush = CLFLUSH_BEFORE;
0672 
0673 out:
0674     /* return with the pages pinned */
0675     return 0;
0676 
0677 err_unpin:
0678     i915_gem_object_unpin_pages(obj);
0679     return ret;
0680 }
0681 
0682 int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
0683                   unsigned int *needs_clflush)
0684 {
0685     int ret;
0686 
0687     *needs_clflush = 0;
0688     if (!i915_gem_object_has_struct_page(obj))
0689         return -ENODEV;
0690 
0691     assert_object_held(obj);
0692 
0693     ret = i915_gem_object_wait(obj,
0694                    I915_WAIT_INTERRUPTIBLE |
0695                    I915_WAIT_ALL,
0696                    MAX_SCHEDULE_TIMEOUT);
0697     if (ret)
0698         return ret;
0699 
0700     ret = i915_gem_object_pin_pages(obj);
0701     if (ret)
0702         return ret;
0703 
0704     if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
0705         !static_cpu_has(X86_FEATURE_CLFLUSH)) {
0706         ret = i915_gem_object_set_to_cpu_domain(obj, true);
0707         if (ret)
0708             goto err_unpin;
0709         else
0710             goto out;
0711     }
0712 
0713     flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
0714 
0715     /* If we're not in the cpu write domain, set ourself into the
0716      * gtt write domain and manually flush cachelines (as required).
0717      * This optimizes for the case when the gpu will use the data
0718      * right away and we therefore have to clflush anyway.
0719      */
0720     if (!obj->cache_dirty) {
0721         *needs_clflush |= CLFLUSH_AFTER;
0722 
0723         /*
0724          * Same trick applies to invalidate partially written
0725          * cachelines read before writing.
0726          */
0727         if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
0728             *needs_clflush |= CLFLUSH_BEFORE;
0729     }
0730 
0731 out:
0732     i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
0733     obj->mm.dirty = true;
0734     /* return with the pages pinned */
0735     return 0;
0736 
0737 err_unpin:
0738     i915_gem_object_unpin_pages(obj);
0739     return ret;
0740 }