Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * SPDX-License-Identifier: MIT
0003  *
0004  * Copyright © 2008-2018 Intel Corporation
0005  */
0006 
0007 #ifndef _I915_GPU_ERROR_H_
0008 #define _I915_GPU_ERROR_H_
0009 
0010 #include <linux/atomic.h>
0011 #include <linux/kref.h>
0012 #include <linux/ktime.h>
0013 #include <linux/sched.h>
0014 
0015 #include <drm/drm_mm.h>
0016 
0017 #include "gt/intel_engine.h"
0018 #include "gt/intel_gt_types.h"
0019 #include "gt/uc/intel_uc_fw.h"
0020 
0021 #include "intel_device_info.h"
0022 
0023 #include "i915_gem.h"
0024 #include "i915_gem_gtt.h"
0025 #include "i915_params.h"
0026 #include "i915_scheduler.h"
0027 
0028 struct drm_i915_private;
0029 struct i915_vma_compress;
0030 struct intel_engine_capture_vma;
0031 struct intel_overlay_error_state;
0032 
0033 struct i915_vma_coredump {
0034     struct i915_vma_coredump *next;
0035 
0036     char name[20];
0037 
0038     u64 gtt_offset;
0039     u64 gtt_size;
0040     u32 gtt_page_sizes;
0041 
0042     int unused;
0043     struct list_head page_list;
0044 };
0045 
0046 struct i915_request_coredump {
0047     unsigned long flags;
0048     pid_t pid;
0049     u32 context;
0050     u32 seqno;
0051     u32 head;
0052     u32 tail;
0053     struct i915_sched_attr sched_attr;
0054 };
0055 
0056 struct __guc_capture_parsed_output;
0057 
0058 struct intel_engine_coredump {
0059     const struct intel_engine_cs *engine;
0060 
0061     bool hung;
0062     bool simulated;
0063     u32 reset_count;
0064 
0065     /* position of active request inside the ring */
0066     u32 rq_head, rq_post, rq_tail;
0067 
0068     /* Register state */
0069     u32 ccid;
0070     u32 start;
0071     u32 tail;
0072     u32 head;
0073     u32 ctl;
0074     u32 mode;
0075     u32 hws;
0076     u32 ipeir;
0077     u32 ipehr;
0078     u32 esr;
0079     u32 bbstate;
0080     u32 instpm;
0081     u32 instps;
0082     u64 bbaddr;
0083     u64 acthd;
0084     u32 fault_reg;
0085     u64 faddr;
0086     u32 rc_psmi; /* sleep state */
0087     u32 nopid;
0088     u32 excc;
0089     u32 cmd_cctl;
0090     u32 cscmdop;
0091     u32 ctx_sr_ctl;
0092     u32 dma_faddr_hi;
0093     u32 dma_faddr_lo;
0094     struct intel_instdone instdone;
0095 
0096     /* GuC matched capture-lists info */
0097     struct intel_guc_state_capture *capture;
0098     struct __guc_capture_parsed_output *guc_capture_node;
0099 
0100     struct i915_gem_context_coredump {
0101         char comm[TASK_COMM_LEN];
0102 
0103         u64 total_runtime;
0104         u64 avg_runtime;
0105 
0106         pid_t pid;
0107         int active;
0108         int guilty;
0109         struct i915_sched_attr sched_attr;
0110     } context;
0111 
0112     struct i915_vma_coredump *vma;
0113 
0114     struct i915_request_coredump execlist[EXECLIST_MAX_PORTS];
0115     unsigned int num_ports;
0116 
0117     struct {
0118         u32 gfx_mode;
0119         union {
0120             u64 pdp[4];
0121             u32 pp_dir_base;
0122         };
0123     } vm_info;
0124 
0125     struct intel_engine_coredump *next;
0126 };
0127 
0128 struct intel_gt_coredump {
0129     const struct intel_gt *_gt;
0130     bool awake;
0131     bool simulated;
0132 
0133     struct intel_gt_info info;
0134 
0135     /* Generic register state */
0136     u32 eir;
0137     u32 pgtbl_er;
0138     u32 ier;
0139     u32 gtier[6], ngtier;
0140     u32 forcewake;
0141     u32 error; /* gen6+ */
0142     u32 err_int; /* gen7 */
0143     u32 fault_data0; /* gen8, gen9 */
0144     u32 fault_data1; /* gen8, gen9 */
0145     u32 done_reg;
0146     u32 gac_eco;
0147     u32 gam_ecochk;
0148     u32 gab_ctl;
0149     u32 gfx_mode;
0150     u32 gtt_cache;
0151     u32 aux_err; /* gen12 */
0152     u32 gam_done; /* gen12 */
0153 
0154     /* Display related */
0155     u32 derrmr;
0156     u32 sfc_done[I915_MAX_SFC]; /* gen12 */
0157 
0158     u32 nfence;
0159     u64 fence[I915_MAX_NUM_FENCES];
0160 
0161     struct intel_engine_coredump *engine;
0162 
0163     struct intel_uc_coredump {
0164         struct intel_uc_fw guc_fw;
0165         struct intel_uc_fw huc_fw;
0166         struct i915_vma_coredump *guc_log;
0167         bool is_guc_capture;
0168     } *uc;
0169 
0170     struct intel_gt_coredump *next;
0171 };
0172 
0173 struct i915_gpu_coredump {
0174     struct kref ref;
0175     ktime_t time;
0176     ktime_t boottime;
0177     ktime_t uptime;
0178     unsigned long capture;
0179 
0180     struct drm_i915_private *i915;
0181 
0182     struct intel_gt_coredump *gt;
0183 
0184     char error_msg[128];
0185     bool simulated;
0186     bool wakelock;
0187     bool suspended;
0188     int iommu;
0189     u32 reset_count;
0190     u32 suspend_count;
0191 
0192     struct intel_device_info device_info;
0193     struct intel_runtime_info runtime_info;
0194     struct intel_driver_caps driver_caps;
0195     struct i915_params params;
0196 
0197     struct intel_overlay_error_state *overlay;
0198 
0199     struct scatterlist *sgl, *fit;
0200 };
0201 
0202 struct i915_gpu_error {
0203     /* For reset and error_state handling. */
0204     spinlock_t lock;
0205     /* Protected by the above dev->gpu_error.lock. */
0206     struct i915_gpu_coredump *first_error;
0207 
0208     atomic_t pending_fb_pin;
0209 
0210     /** Number of times the device has been reset (global) */
0211     atomic_t reset_count;
0212 
0213     /** Number of times an engine has been reset */
0214     atomic_t reset_engine_count[I915_NUM_ENGINES];
0215 };
0216 
0217 struct drm_i915_error_state_buf {
0218     struct drm_i915_private *i915;
0219     struct scatterlist *sgl, *cur, *end;
0220 
0221     char *buf;
0222     size_t bytes;
0223     size_t size;
0224     loff_t iter;
0225 
0226     int err;
0227 };
0228 
0229 static inline u32 i915_reset_count(struct i915_gpu_error *error)
0230 {
0231     return atomic_read(&error->reset_count);
0232 }
0233 
0234 static inline u32 i915_reset_engine_count(struct i915_gpu_error *error,
0235                       const struct intel_engine_cs *engine)
0236 {
0237     return atomic_read(&error->reset_engine_count[engine->uabi_class]);
0238 }
0239 
0240 #define CORE_DUMP_FLAG_NONE           0x0
0241 #define CORE_DUMP_FLAG_IS_GUC_CAPTURE BIT(0)
0242 
0243 #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
0244 
0245 __printf(2, 3)
0246 void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
0247 void intel_gpu_error_print_vma(struct drm_i915_error_state_buf *m,
0248                    const struct intel_engine_cs *engine,
0249                    const struct i915_vma_coredump *vma);
0250 struct i915_vma_coredump *
0251 intel_gpu_error_find_batch(const struct intel_engine_coredump *ee);
0252 
0253 struct i915_gpu_coredump *i915_gpu_coredump(struct intel_gt *gt,
0254                         intel_engine_mask_t engine_mask, u32 dump_flags);
0255 void i915_capture_error_state(struct intel_gt *gt,
0256                   intel_engine_mask_t engine_mask, u32 dump_flags);
0257 
0258 struct i915_gpu_coredump *
0259 i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp);
0260 
0261 struct intel_gt_coredump *
0262 intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp, u32 dump_flags);
0263 
0264 struct intel_engine_coredump *
0265 intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp, u32 dump_flags);
0266 
0267 struct intel_engine_capture_vma *
0268 intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
0269                   struct i915_request *rq,
0270                   gfp_t gfp);
0271 
0272 void intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
0273                    struct intel_engine_capture_vma *capture,
0274                    struct i915_vma_compress *compress);
0275 
0276 struct i915_vma_compress *
0277 i915_vma_capture_prepare(struct intel_gt_coredump *gt);
0278 
0279 void i915_vma_capture_finish(struct intel_gt_coredump *gt,
0280                  struct i915_vma_compress *compress);
0281 
0282 void i915_error_state_store(struct i915_gpu_coredump *error);
0283 
0284 static inline struct i915_gpu_coredump *
0285 i915_gpu_coredump_get(struct i915_gpu_coredump *gpu)
0286 {
0287     kref_get(&gpu->ref);
0288     return gpu;
0289 }
0290 
0291 ssize_t
0292 i915_gpu_coredump_copy_to_buffer(struct i915_gpu_coredump *error,
0293                  char *buf, loff_t offset, size_t count);
0294 
0295 void __i915_gpu_coredump_free(struct kref *kref);
0296 static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu)
0297 {
0298     if (gpu)
0299         kref_put(&gpu->ref, __i915_gpu_coredump_free);
0300 }
0301 
0302 struct i915_gpu_coredump *i915_first_error_state(struct drm_i915_private *i915);
0303 void i915_reset_error_state(struct drm_i915_private *i915);
0304 void i915_disable_error_state(struct drm_i915_private *i915, int err);
0305 
0306 #else
0307 
0308 __printf(2, 3)
0309 static inline void
0310 i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
0311 {
0312 }
0313 
0314 static inline void
0315 i915_capture_error_state(struct intel_gt *gt, intel_engine_mask_t engine_mask, u32 dump_flags)
0316 {
0317 }
0318 
0319 static inline struct i915_gpu_coredump *
0320 i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp)
0321 {
0322     return NULL;
0323 }
0324 
0325 static inline struct intel_gt_coredump *
0326 intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp, u32 dump_flags)
0327 {
0328     return NULL;
0329 }
0330 
0331 static inline struct intel_engine_coredump *
0332 intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp, u32 dump_flags)
0333 {
0334     return NULL;
0335 }
0336 
0337 static inline struct intel_engine_capture_vma *
0338 intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
0339                   struct i915_request *rq,
0340                   gfp_t gfp)
0341 {
0342     return NULL;
0343 }
0344 
0345 static inline void
0346 intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
0347                   struct intel_engine_capture_vma *capture,
0348                   struct i915_vma_compress *compress)
0349 {
0350 }
0351 
0352 static inline struct i915_vma_compress *
0353 i915_vma_capture_prepare(struct intel_gt_coredump *gt)
0354 {
0355     return NULL;
0356 }
0357 
0358 static inline void
0359 i915_vma_capture_finish(struct intel_gt_coredump *gt,
0360             struct i915_vma_compress *compress)
0361 {
0362 }
0363 
0364 static inline void
0365 i915_error_state_store(struct i915_gpu_coredump *error)
0366 {
0367 }
0368 
0369 static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu)
0370 {
0371 }
0372 
0373 static inline struct i915_gpu_coredump *
0374 i915_first_error_state(struct drm_i915_private *i915)
0375 {
0376     return ERR_PTR(-ENODEV);
0377 }
0378 
0379 static inline void i915_reset_error_state(struct drm_i915_private *i915)
0380 {
0381 }
0382 
0383 static inline void i915_disable_error_state(struct drm_i915_private *i915,
0384                         int err)
0385 {
0386 }
0387 
0388 #endif /* IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) */
0389 
0390 #endif /* _I915_GPU_ERROR_H_ */