Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0-only */
0002 /*
0003  * Copyright (C) 2013 Red Hat
0004  * Author: Rob Clark <robdclark@gmail.com>
0005  */
0006 
0007 #ifndef __MSM_GPU_H__
0008 #define __MSM_GPU_H__
0009 
0010 #include <linux/adreno-smmu-priv.h>
0011 #include <linux/clk.h>
0012 #include <linux/devfreq.h>
0013 #include <linux/interconnect.h>
0014 #include <linux/pm_opp.h>
0015 #include <linux/regulator/consumer.h>
0016 
0017 #include "msm_drv.h"
0018 #include "msm_fence.h"
0019 #include "msm_ringbuffer.h"
0020 #include "msm_gem.h"
0021 
0022 struct msm_gem_submit;
0023 struct msm_gpu_perfcntr;
0024 struct msm_gpu_state;
0025 struct msm_file_private;
0026 
0027 struct msm_gpu_config {
0028     const char *ioname;
0029     unsigned int nr_rings;
0030 };
0031 
0032 /* So far, with hardware that I've seen to date, we can have:
0033  *  + zero, one, or two z180 2d cores
0034  *  + a3xx or a2xx 3d core, which share a common CP (the firmware
0035  *    for the CP seems to implement some different PM4 packet types
0036  *    but the basics of cmdstream submission are the same)
0037  *
0038  * Which means that the eventual complete "class" hierarchy, once
0039  * support for all past and present hw is in place, becomes:
0040  *  + msm_gpu
0041  *    + adreno_gpu
0042  *      + a3xx_gpu
0043  *      + a2xx_gpu
0044  *    + z180_gpu
0045  */
0046 struct msm_gpu_funcs {
0047     int (*get_param)(struct msm_gpu *gpu, struct msm_file_private *ctx,
0048              uint32_t param, uint64_t *value, uint32_t *len);
0049     int (*set_param)(struct msm_gpu *gpu, struct msm_file_private *ctx,
0050              uint32_t param, uint64_t value, uint32_t len);
0051     int (*hw_init)(struct msm_gpu *gpu);
0052     int (*pm_suspend)(struct msm_gpu *gpu);
0053     int (*pm_resume)(struct msm_gpu *gpu);
0054     void (*submit)(struct msm_gpu *gpu, struct msm_gem_submit *submit);
0055     void (*flush)(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
0056     irqreturn_t (*irq)(struct msm_gpu *irq);
0057     struct msm_ringbuffer *(*active_ring)(struct msm_gpu *gpu);
0058     void (*recover)(struct msm_gpu *gpu);
0059     void (*destroy)(struct msm_gpu *gpu);
0060 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
0061     /* show GPU status in debugfs: */
0062     void (*show)(struct msm_gpu *gpu, struct msm_gpu_state *state,
0063             struct drm_printer *p);
0064     /* for generation specific debugfs: */
0065     void (*debugfs_init)(struct msm_gpu *gpu, struct drm_minor *minor);
0066 #endif
0067     /* note: gpu_busy() can assume that we have been pm_resumed */
0068     u64 (*gpu_busy)(struct msm_gpu *gpu, unsigned long *out_sample_rate);
0069     struct msm_gpu_state *(*gpu_state_get)(struct msm_gpu *gpu);
0070     int (*gpu_state_put)(struct msm_gpu_state *state);
0071     unsigned long (*gpu_get_freq)(struct msm_gpu *gpu);
0072     /* note: gpu_set_freq() can assume that we have been pm_resumed */
0073     void (*gpu_set_freq)(struct msm_gpu *gpu, struct dev_pm_opp *opp,
0074                  bool suspended);
0075     struct msm_gem_address_space *(*create_address_space)
0076         (struct msm_gpu *gpu, struct platform_device *pdev);
0077     struct msm_gem_address_space *(*create_private_address_space)
0078         (struct msm_gpu *gpu);
0079     uint32_t (*get_rptr)(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
0080 };
0081 
0082 /* Additional state for iommu faults: */
0083 struct msm_gpu_fault_info {
0084     u64 ttbr0;
0085     unsigned long iova;
0086     int flags;
0087     const char *type;
0088     const char *block;
0089 };
0090 
0091 /**
0092  * struct msm_gpu_devfreq - devfreq related state
0093  */
0094 struct msm_gpu_devfreq {
0095     /** devfreq: devfreq instance */
0096     struct devfreq *devfreq;
0097 
0098     /** lock: lock for "suspended", "busy_cycles", and "time" */
0099     struct mutex lock;
0100 
0101     /**
0102      * idle_constraint:
0103      *
0104      * A PM QoS constraint to limit max freq while the GPU is idle.
0105      */
0106     struct dev_pm_qos_request idle_freq;
0107 
0108     /**
0109      * boost_constraint:
0110      *
0111      * A PM QoS constraint to boost min freq for a period of time
0112      * until the boost expires.
0113      */
0114     struct dev_pm_qos_request boost_freq;
0115 
0116     /**
0117      * busy_cycles: Last busy counter value, for calculating elapsed busy
0118      * cycles since last sampling period.
0119      */
0120     u64 busy_cycles;
0121 
0122     /** time: Time of last sampling period. */
0123     ktime_t time;
0124 
0125     /** idle_time: Time of last transition to idle: */
0126     ktime_t idle_time;
0127 
0128     struct devfreq_dev_status average_status;
0129 
0130     /**
0131      * idle_work:
0132      *
0133      * Used to delay clamping to idle freq on active->idle transition.
0134      */
0135     struct msm_hrtimer_work idle_work;
0136 
0137     /**
0138      * boost_work:
0139      *
0140      * Used to reset the boost_constraint after the boost period has
0141      * elapsed
0142      */
0143     struct msm_hrtimer_work boost_work;
0144 
0145     /** suspended: tracks if we're suspended */
0146     bool suspended;
0147 };
0148 
0149 struct msm_gpu {
0150     const char *name;
0151     struct drm_device *dev;
0152     struct platform_device *pdev;
0153     const struct msm_gpu_funcs *funcs;
0154 
0155     struct adreno_smmu_priv adreno_smmu;
0156 
0157     /* performance counters (hw & sw): */
0158     spinlock_t perf_lock;
0159     bool perfcntr_active;
0160     struct {
0161         bool active;
0162         ktime_t time;
0163     } last_sample;
0164     uint32_t totaltime, activetime;    /* sw counters */
0165     uint32_t last_cntrs[5];            /* hw counters */
0166     const struct msm_gpu_perfcntr *perfcntrs;
0167     uint32_t num_perfcntrs;
0168 
0169     struct msm_ringbuffer *rb[MSM_GPU_MAX_RINGS];
0170     int nr_rings;
0171 
0172     /**
0173      * sysprof_active:
0174      *
0175      * The count of contexts that have enabled system profiling.
0176      */
0177     refcount_t sysprof_active;
0178 
0179     /**
0180      * cur_ctx_seqno:
0181      *
0182      * The ctx->seqno value of the last context to submit rendering,
0183      * and the one with current pgtables installed (for generations
0184      * that support per-context pgtables).  Tracked by seqno rather
0185      * than pointer value to avoid dangling pointers, and cases where
0186      * a ctx can be freed and a new one created with the same address.
0187      */
0188     int cur_ctx_seqno;
0189 
0190     /*
0191      * List of GEM active objects on this gpu.  Protected by
0192      * msm_drm_private::mm_lock
0193      */
0194     struct list_head active_list;
0195 
0196     /**
0197      * lock:
0198      *
0199      * General lock for serializing all the gpu things.
0200      *
0201      * TODO move to per-ring locking where feasible (ie. submit/retire
0202      * path, etc)
0203      */
0204     struct mutex lock;
0205 
0206     /**
0207      * active_submits:
0208      *
0209      * The number of submitted but not yet retired submits, used to
0210      * determine transitions between active and idle.
0211      *
0212      * Protected by active_lock
0213      */
0214     int active_submits;
0215 
0216     /** lock: protects active_submits and idle/active transitions */
0217     struct mutex active_lock;
0218 
0219     /* does gpu need hw_init? */
0220     bool needs_hw_init;
0221 
0222     /**
0223      * global_faults: number of GPU hangs not attributed to a particular
0224      * address space
0225      */
0226     int global_faults;
0227 
0228     void __iomem *mmio;
0229     int irq;
0230 
0231     struct msm_gem_address_space *aspace;
0232 
0233     /* Power Control: */
0234     struct regulator *gpu_reg, *gpu_cx;
0235     struct clk_bulk_data *grp_clks;
0236     int nr_clocks;
0237     struct clk *ebi1_clk, *core_clk, *rbbmtimer_clk;
0238     uint32_t fast_rate;
0239 
0240     /* Hang and Inactivity Detection:
0241      */
0242 #define DRM_MSM_INACTIVE_PERIOD   66 /* in ms (roughly four frames) */
0243 
0244 #define DRM_MSM_HANGCHECK_DEFAULT_PERIOD 500 /* in ms */
0245     struct timer_list hangcheck_timer;
0246 
0247     /* Fault info for most recent iova fault: */
0248     struct msm_gpu_fault_info fault_info;
0249 
0250     /* work for handling GPU ioval faults: */
0251     struct kthread_work fault_work;
0252 
0253     /* work for handling GPU recovery: */
0254     struct kthread_work recover_work;
0255 
0256     /** retire_event: notified when submits are retired: */
0257     wait_queue_head_t retire_event;
0258 
0259     /* work for handling active-list retiring: */
0260     struct kthread_work retire_work;
0261 
0262     /* worker for retire/recover: */
0263     struct kthread_worker *worker;
0264 
0265     struct drm_gem_object *memptrs_bo;
0266 
0267     struct msm_gpu_devfreq devfreq;
0268 
0269     uint32_t suspend_count;
0270 
0271     struct msm_gpu_state *crashstate;
0272 
0273     /* Enable clamping to idle freq when inactive: */
0274     bool clamp_to_idle;
0275 
0276     /* True if the hardware supports expanded apriv (a650 and newer) */
0277     bool hw_apriv;
0278 
0279     struct thermal_cooling_device *cooling;
0280 };
0281 
0282 static inline struct msm_gpu *dev_to_gpu(struct device *dev)
0283 {
0284     struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(dev);
0285     return container_of(adreno_smmu, struct msm_gpu, adreno_smmu);
0286 }
0287 
0288 /* It turns out that all targets use the same ringbuffer size */
0289 #define MSM_GPU_RINGBUFFER_SZ SZ_32K
0290 #define MSM_GPU_RINGBUFFER_BLKSIZE 32
0291 
0292 #define MSM_GPU_RB_CNTL_DEFAULT \
0293         (AXXX_CP_RB_CNTL_BUFSZ(ilog2(MSM_GPU_RINGBUFFER_SZ / 8)) | \
0294         AXXX_CP_RB_CNTL_BLKSZ(ilog2(MSM_GPU_RINGBUFFER_BLKSIZE / 8)))
0295 
0296 static inline bool msm_gpu_active(struct msm_gpu *gpu)
0297 {
0298     int i;
0299 
0300     for (i = 0; i < gpu->nr_rings; i++) {
0301         struct msm_ringbuffer *ring = gpu->rb[i];
0302 
0303         if (fence_after(ring->fctx->last_fence, ring->memptrs->fence))
0304             return true;
0305     }
0306 
0307     return false;
0308 }
0309 
0310 /* Perf-Counters:
0311  * The select_reg and select_val are just there for the benefit of the child
0312  * class that actually enables the perf counter..  but msm_gpu base class
0313  * will handle sampling/displaying the counters.
0314  */
0315 
0316 struct msm_gpu_perfcntr {
0317     uint32_t select_reg;
0318     uint32_t sample_reg;
0319     uint32_t select_val;
0320     const char *name;
0321 };
0322 
0323 /*
0324  * The number of priority levels provided by drm gpu scheduler.  The
0325  * DRM_SCHED_PRIORITY_KERNEL priority level is treated specially in some
0326  * cases, so we don't use it (no need for kernel generated jobs).
0327  */
0328 #define NR_SCHED_PRIORITIES (1 + DRM_SCHED_PRIORITY_HIGH - DRM_SCHED_PRIORITY_MIN)
0329 
0330 /**
0331  * struct msm_file_private - per-drm_file context
0332  *
0333  * @queuelock:    synchronizes access to submitqueues list
0334  * @submitqueues: list of &msm_gpu_submitqueue created by userspace
0335  * @queueid:      counter incremented each time a submitqueue is created,
0336  *                used to assign &msm_gpu_submitqueue.id
0337  * @aspace:       the per-process GPU address-space
0338  * @ref:          reference count
0339  * @seqno:        unique per process seqno
0340  */
0341 struct msm_file_private {
0342     rwlock_t queuelock;
0343     struct list_head submitqueues;
0344     int queueid;
0345     struct msm_gem_address_space *aspace;
0346     struct kref ref;
0347     int seqno;
0348 
0349     /**
0350      * sysprof:
0351      *
0352      * The value of MSM_PARAM_SYSPROF set by userspace.  This is
0353      * intended to be used by system profiling tools like Mesa's
0354      * pps-producer (perfetto), and restricted to CAP_SYS_ADMIN.
0355      *
0356      * Setting a value of 1 will preserve performance counters across
0357      * context switches.  Setting a value of 2 will in addition
0358      * suppress suspend.  (Performance counters lose state across
0359      * power collapse, which is undesirable for profiling in some
0360      * cases.)
0361      *
0362      * The value automatically reverts to zero when the drm device
0363      * file is closed.
0364      */
0365     int sysprof;
0366 
0367     /** comm: Overridden task comm, see MSM_PARAM_COMM */
0368     char *comm;
0369 
0370     /** cmdline: Overridden task cmdline, see MSM_PARAM_CMDLINE */
0371     char *cmdline;
0372 
0373     /**
0374      * elapsed:
0375      *
0376      * The total (cumulative) elapsed time GPU was busy with rendering
0377      * from this context in ns.
0378      */
0379     uint64_t elapsed_ns;
0380 
0381     /**
0382      * cycles:
0383      *
0384      * The total (cumulative) GPU cycles elapsed attributed to this
0385      * context.
0386      */
0387     uint64_t cycles;
0388 
0389     /**
0390      * entities:
0391      *
0392      * Table of per-priority-level sched entities used by submitqueues
0393      * associated with this &drm_file.  Because some userspace apps
0394      * make assumptions about rendering from multiple gl contexts
0395      * (of the same priority) within the process happening in FIFO
0396      * order without requiring any fencing beyond MakeCurrent(), we
0397      * create at most one &drm_sched_entity per-process per-priority-
0398      * level.
0399      */
0400     struct drm_sched_entity *entities[NR_SCHED_PRIORITIES * MSM_GPU_MAX_RINGS];
0401 };
0402 
0403 /**
0404  * msm_gpu_convert_priority - Map userspace priority to ring # and sched priority
0405  *
0406  * @gpu:        the gpu instance
0407  * @prio:       the userspace priority level
0408  * @ring_nr:    [out] the ringbuffer the userspace priority maps to
0409  * @sched_prio: [out] the gpu scheduler priority level which the userspace
0410  *              priority maps to
0411  *
0412  * With drm/scheduler providing it's own level of prioritization, our total
0413  * number of available priority levels is (nr_rings * NR_SCHED_PRIORITIES).
0414  * Each ring is associated with it's own scheduler instance.  However, our
0415  * UABI is that lower numerical values are higher priority.  So mapping the
0416  * single userspace priority level into ring_nr and sched_prio takes some
0417  * care.  The userspace provided priority (when a submitqueue is created)
0418  * is mapped to ring nr and scheduler priority as such:
0419  *
0420  *   ring_nr    = userspace_prio / NR_SCHED_PRIORITIES
0421  *   sched_prio = NR_SCHED_PRIORITIES -
0422  *                (userspace_prio % NR_SCHED_PRIORITIES) - 1
0423  *
0424  * This allows generations without preemption (nr_rings==1) to have some
0425  * amount of prioritization, and provides more priority levels for gens
0426  * that do have preemption.
0427  */
0428 static inline int msm_gpu_convert_priority(struct msm_gpu *gpu, int prio,
0429         unsigned *ring_nr, enum drm_sched_priority *sched_prio)
0430 {
0431     unsigned rn, sp;
0432 
0433     rn = div_u64_rem(prio, NR_SCHED_PRIORITIES, &sp);
0434 
0435     /* invert sched priority to map to higher-numeric-is-higher-
0436      * priority convention
0437      */
0438     sp = NR_SCHED_PRIORITIES - sp - 1;
0439 
0440     if (rn >= gpu->nr_rings)
0441         return -EINVAL;
0442 
0443     *ring_nr = rn;
0444     *sched_prio = sp;
0445 
0446     return 0;
0447 }
0448 
0449 /**
0450  * struct msm_gpu_submitqueues - Userspace created context.
0451  *
0452  * A submitqueue is associated with a gl context or vk queue (or equiv)
0453  * in userspace.
0454  *
0455  * @id:        userspace id for the submitqueue, unique within the drm_file
0456  * @flags:     userspace flags for the submitqueue, specified at creation
0457  *             (currently unusued)
0458  * @ring_nr:   the ringbuffer used by this submitqueue, which is determined
0459  *             by the submitqueue's priority
0460  * @faults:    the number of GPU hangs associated with this submitqueue
0461  * @last_fence: the sequence number of the last allocated fence (for error
0462  *             checking)
0463  * @ctx:       the per-drm_file context associated with the submitqueue (ie.
0464  *             which set of pgtables do submits jobs associated with the
0465  *             submitqueue use)
0466  * @node:      node in the context's list of submitqueues
0467  * @fence_idr: maps fence-id to dma_fence for userspace visible fence
0468  *             seqno, protected by submitqueue lock
0469  * @lock:      submitqueue lock
0470  * @ref:       reference count
0471  * @entity:    the submit job-queue
0472  */
0473 struct msm_gpu_submitqueue {
0474     int id;
0475     u32 flags;
0476     u32 ring_nr;
0477     int faults;
0478     uint32_t last_fence;
0479     struct msm_file_private *ctx;
0480     struct list_head node;
0481     struct idr fence_idr;
0482     struct mutex lock;
0483     struct kref ref;
0484     struct drm_sched_entity *entity;
0485 };
0486 
0487 struct msm_gpu_state_bo {
0488     u64 iova;
0489     size_t size;
0490     void *data;
0491     bool encoded;
0492     char name[32];
0493 };
0494 
0495 struct msm_gpu_state {
0496     struct kref ref;
0497     struct timespec64 time;
0498 
0499     struct {
0500         u64 iova;
0501         u32 fence;
0502         u32 seqno;
0503         u32 rptr;
0504         u32 wptr;
0505         void *data;
0506         int data_size;
0507         bool encoded;
0508     } ring[MSM_GPU_MAX_RINGS];
0509 
0510     int nr_registers;
0511     u32 *registers;
0512 
0513     u32 rbbm_status;
0514 
0515     char *comm;
0516     char *cmd;
0517 
0518     struct msm_gpu_fault_info fault_info;
0519 
0520     int nr_bos;
0521     struct msm_gpu_state_bo *bos;
0522 };
0523 
0524 static inline void gpu_write(struct msm_gpu *gpu, u32 reg, u32 data)
0525 {
0526     msm_writel(data, gpu->mmio + (reg << 2));
0527 }
0528 
0529 static inline u32 gpu_read(struct msm_gpu *gpu, u32 reg)
0530 {
0531     return msm_readl(gpu->mmio + (reg << 2));
0532 }
0533 
0534 static inline void gpu_rmw(struct msm_gpu *gpu, u32 reg, u32 mask, u32 or)
0535 {
0536     msm_rmw(gpu->mmio + (reg << 2), mask, or);
0537 }
0538 
0539 static inline u64 gpu_read64(struct msm_gpu *gpu, u32 lo, u32 hi)
0540 {
0541     u64 val;
0542 
0543     /*
0544      * Why not a readq here? Two reasons: 1) many of the LO registers are
0545      * not quad word aligned and 2) the GPU hardware designers have a bit
0546      * of a history of putting registers where they fit, especially in
0547      * spins. The longer a GPU family goes the higher the chance that
0548      * we'll get burned.  We could do a series of validity checks if we
0549      * wanted to, but really is a readq() that much better? Nah.
0550      */
0551 
0552     /*
0553      * For some lo/hi registers (like perfcounters), the hi value is latched
0554      * when the lo is read, so make sure to read the lo first to trigger
0555      * that
0556      */
0557     val = (u64) msm_readl(gpu->mmio + (lo << 2));
0558     val |= ((u64) msm_readl(gpu->mmio + (hi << 2)) << 32);
0559 
0560     return val;
0561 }
0562 
0563 static inline void gpu_write64(struct msm_gpu *gpu, u32 lo, u32 hi, u64 val)
0564 {
0565     /* Why not a writeq here? Read the screed above */
0566     msm_writel(lower_32_bits(val), gpu->mmio + (lo << 2));
0567     msm_writel(upper_32_bits(val), gpu->mmio + (hi << 2));
0568 }
0569 
0570 int msm_gpu_pm_suspend(struct msm_gpu *gpu);
0571 int msm_gpu_pm_resume(struct msm_gpu *gpu);
0572 
0573 void msm_gpu_show_fdinfo(struct msm_gpu *gpu, struct msm_file_private *ctx,
0574              struct drm_printer *p);
0575 
0576 int msm_submitqueue_init(struct drm_device *drm, struct msm_file_private *ctx);
0577 struct msm_gpu_submitqueue *msm_submitqueue_get(struct msm_file_private *ctx,
0578         u32 id);
0579 int msm_submitqueue_create(struct drm_device *drm,
0580         struct msm_file_private *ctx,
0581         u32 prio, u32 flags, u32 *id);
0582 int msm_submitqueue_query(struct drm_device *drm, struct msm_file_private *ctx,
0583         struct drm_msm_submitqueue_query *args);
0584 int msm_submitqueue_remove(struct msm_file_private *ctx, u32 id);
0585 void msm_submitqueue_close(struct msm_file_private *ctx);
0586 
0587 void msm_submitqueue_destroy(struct kref *kref);
0588 
0589 int msm_file_private_set_sysprof(struct msm_file_private *ctx,
0590                  struct msm_gpu *gpu, int sysprof);
0591 void __msm_file_private_destroy(struct kref *kref);
0592 
0593 static inline void msm_file_private_put(struct msm_file_private *ctx)
0594 {
0595     kref_put(&ctx->ref, __msm_file_private_destroy);
0596 }
0597 
0598 static inline struct msm_file_private *msm_file_private_get(
0599     struct msm_file_private *ctx)
0600 {
0601     kref_get(&ctx->ref);
0602     return ctx;
0603 }
0604 
0605 void msm_devfreq_init(struct msm_gpu *gpu);
0606 void msm_devfreq_cleanup(struct msm_gpu *gpu);
0607 void msm_devfreq_resume(struct msm_gpu *gpu);
0608 void msm_devfreq_suspend(struct msm_gpu *gpu);
0609 void msm_devfreq_boost(struct msm_gpu *gpu, unsigned factor);
0610 void msm_devfreq_active(struct msm_gpu *gpu);
0611 void msm_devfreq_idle(struct msm_gpu *gpu);
0612 
0613 int msm_gpu_hw_init(struct msm_gpu *gpu);
0614 
0615 void msm_gpu_perfcntr_start(struct msm_gpu *gpu);
0616 void msm_gpu_perfcntr_stop(struct msm_gpu *gpu);
0617 int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime,
0618         uint32_t *totaltime, uint32_t ncntrs, uint32_t *cntrs);
0619 
0620 void msm_gpu_retire(struct msm_gpu *gpu);
0621 void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit);
0622 
0623 int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
0624         struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs,
0625         const char *name, struct msm_gpu_config *config);
0626 
0627 struct msm_gem_address_space *
0628 msm_gpu_create_private_address_space(struct msm_gpu *gpu, struct task_struct *task);
0629 
0630 void msm_gpu_cleanup(struct msm_gpu *gpu);
0631 
0632 struct msm_gpu *adreno_load_gpu(struct drm_device *dev);
0633 void __init adreno_register(void);
0634 void __exit adreno_unregister(void);
0635 
0636 static inline void msm_submitqueue_put(struct msm_gpu_submitqueue *queue)
0637 {
0638     if (queue)
0639         kref_put(&queue->ref, msm_submitqueue_destroy);
0640 }
0641 
0642 static inline struct msm_gpu_state *msm_gpu_crashstate_get(struct msm_gpu *gpu)
0643 {
0644     struct msm_gpu_state *state = NULL;
0645 
0646     mutex_lock(&gpu->lock);
0647 
0648     if (gpu->crashstate) {
0649         kref_get(&gpu->crashstate->ref);
0650         state = gpu->crashstate;
0651     }
0652 
0653     mutex_unlock(&gpu->lock);
0654 
0655     return state;
0656 }
0657 
0658 static inline void msm_gpu_crashstate_put(struct msm_gpu *gpu)
0659 {
0660     mutex_lock(&gpu->lock);
0661 
0662     if (gpu->crashstate) {
0663         if (gpu->funcs->gpu_state_put(gpu->crashstate))
0664             gpu->crashstate = NULL;
0665     }
0666 
0667     mutex_unlock(&gpu->lock);
0668 }
0669 
0670 /*
0671  * Simple macro to semi-cleanly add the MAP_PRIV flag for targets that can
0672  * support expanded privileges
0673  */
0674 #define check_apriv(gpu, flags) \
0675     (((gpu)->hw_apriv ? MSM_BO_MAP_PRIV : 0) | (flags))
0676 
0677 
0678 #endif /* __MSM_GPU_H__ */