0001
0002
0003
0004
0005
0006
0007 #ifndef _I915_GPU_ERROR_H_
0008 #define _I915_GPU_ERROR_H_
0009
0010 #include <linux/atomic.h>
0011 #include <linux/kref.h>
0012 #include <linux/ktime.h>
0013 #include <linux/sched.h>
0014
0015 #include <drm/drm_mm.h>
0016
0017 #include "gt/intel_engine.h"
0018 #include "gt/intel_gt_types.h"
0019 #include "gt/uc/intel_uc_fw.h"
0020
0021 #include "intel_device_info.h"
0022
0023 #include "i915_gem.h"
0024 #include "i915_gem_gtt.h"
0025 #include "i915_params.h"
0026 #include "i915_scheduler.h"
0027
0028 struct drm_i915_private;
0029 struct i915_vma_compress;
0030 struct intel_engine_capture_vma;
0031 struct intel_overlay_error_state;
0032
0033 struct i915_vma_coredump {
0034 struct i915_vma_coredump *next;
0035
0036 char name[20];
0037
0038 u64 gtt_offset;
0039 u64 gtt_size;
0040 u32 gtt_page_sizes;
0041
0042 int unused;
0043 struct list_head page_list;
0044 };
0045
0046 struct i915_request_coredump {
0047 unsigned long flags;
0048 pid_t pid;
0049 u32 context;
0050 u32 seqno;
0051 u32 head;
0052 u32 tail;
0053 struct i915_sched_attr sched_attr;
0054 };
0055
0056 struct __guc_capture_parsed_output;
0057
0058 struct intel_engine_coredump {
0059 const struct intel_engine_cs *engine;
0060
0061 bool hung;
0062 bool simulated;
0063 u32 reset_count;
0064
0065
0066 u32 rq_head, rq_post, rq_tail;
0067
0068
0069 u32 ccid;
0070 u32 start;
0071 u32 tail;
0072 u32 head;
0073 u32 ctl;
0074 u32 mode;
0075 u32 hws;
0076 u32 ipeir;
0077 u32 ipehr;
0078 u32 esr;
0079 u32 bbstate;
0080 u32 instpm;
0081 u32 instps;
0082 u64 bbaddr;
0083 u64 acthd;
0084 u32 fault_reg;
0085 u64 faddr;
0086 u32 rc_psmi;
0087 u32 nopid;
0088 u32 excc;
0089 u32 cmd_cctl;
0090 u32 cscmdop;
0091 u32 ctx_sr_ctl;
0092 u32 dma_faddr_hi;
0093 u32 dma_faddr_lo;
0094 struct intel_instdone instdone;
0095
0096
0097 struct intel_guc_state_capture *capture;
0098 struct __guc_capture_parsed_output *guc_capture_node;
0099
0100 struct i915_gem_context_coredump {
0101 char comm[TASK_COMM_LEN];
0102
0103 u64 total_runtime;
0104 u64 avg_runtime;
0105
0106 pid_t pid;
0107 int active;
0108 int guilty;
0109 struct i915_sched_attr sched_attr;
0110 } context;
0111
0112 struct i915_vma_coredump *vma;
0113
0114 struct i915_request_coredump execlist[EXECLIST_MAX_PORTS];
0115 unsigned int num_ports;
0116
0117 struct {
0118 u32 gfx_mode;
0119 union {
0120 u64 pdp[4];
0121 u32 pp_dir_base;
0122 };
0123 } vm_info;
0124
0125 struct intel_engine_coredump *next;
0126 };
0127
0128 struct intel_gt_coredump {
0129 const struct intel_gt *_gt;
0130 bool awake;
0131 bool simulated;
0132
0133 struct intel_gt_info info;
0134
0135
0136 u32 eir;
0137 u32 pgtbl_er;
0138 u32 ier;
0139 u32 gtier[6], ngtier;
0140 u32 forcewake;
0141 u32 error;
0142 u32 err_int;
0143 u32 fault_data0;
0144 u32 fault_data1;
0145 u32 done_reg;
0146 u32 gac_eco;
0147 u32 gam_ecochk;
0148 u32 gab_ctl;
0149 u32 gfx_mode;
0150 u32 gtt_cache;
0151 u32 aux_err;
0152 u32 gam_done;
0153
0154
0155 u32 derrmr;
0156 u32 sfc_done[I915_MAX_SFC];
0157
0158 u32 nfence;
0159 u64 fence[I915_MAX_NUM_FENCES];
0160
0161 struct intel_engine_coredump *engine;
0162
0163 struct intel_uc_coredump {
0164 struct intel_uc_fw guc_fw;
0165 struct intel_uc_fw huc_fw;
0166 struct i915_vma_coredump *guc_log;
0167 bool is_guc_capture;
0168 } *uc;
0169
0170 struct intel_gt_coredump *next;
0171 };
0172
0173 struct i915_gpu_coredump {
0174 struct kref ref;
0175 ktime_t time;
0176 ktime_t boottime;
0177 ktime_t uptime;
0178 unsigned long capture;
0179
0180 struct drm_i915_private *i915;
0181
0182 struct intel_gt_coredump *gt;
0183
0184 char error_msg[128];
0185 bool simulated;
0186 bool wakelock;
0187 bool suspended;
0188 int iommu;
0189 u32 reset_count;
0190 u32 suspend_count;
0191
0192 struct intel_device_info device_info;
0193 struct intel_runtime_info runtime_info;
0194 struct intel_driver_caps driver_caps;
0195 struct i915_params params;
0196
0197 struct intel_overlay_error_state *overlay;
0198
0199 struct scatterlist *sgl, *fit;
0200 };
0201
0202 struct i915_gpu_error {
0203
0204 spinlock_t lock;
0205
0206 struct i915_gpu_coredump *first_error;
0207
0208 atomic_t pending_fb_pin;
0209
0210
0211 atomic_t reset_count;
0212
0213
0214 atomic_t reset_engine_count[I915_NUM_ENGINES];
0215 };
0216
0217 struct drm_i915_error_state_buf {
0218 struct drm_i915_private *i915;
0219 struct scatterlist *sgl, *cur, *end;
0220
0221 char *buf;
0222 size_t bytes;
0223 size_t size;
0224 loff_t iter;
0225
0226 int err;
0227 };
0228
0229 static inline u32 i915_reset_count(struct i915_gpu_error *error)
0230 {
0231 return atomic_read(&error->reset_count);
0232 }
0233
0234 static inline u32 i915_reset_engine_count(struct i915_gpu_error *error,
0235 const struct intel_engine_cs *engine)
0236 {
0237 return atomic_read(&error->reset_engine_count[engine->uabi_class]);
0238 }
0239
0240 #define CORE_DUMP_FLAG_NONE 0x0
0241 #define CORE_DUMP_FLAG_IS_GUC_CAPTURE BIT(0)
0242
0243 #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
0244
0245 __printf(2, 3)
0246 void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
0247 void intel_gpu_error_print_vma(struct drm_i915_error_state_buf *m,
0248 const struct intel_engine_cs *engine,
0249 const struct i915_vma_coredump *vma);
0250 struct i915_vma_coredump *
0251 intel_gpu_error_find_batch(const struct intel_engine_coredump *ee);
0252
0253 struct i915_gpu_coredump *i915_gpu_coredump(struct intel_gt *gt,
0254 intel_engine_mask_t engine_mask, u32 dump_flags);
0255 void i915_capture_error_state(struct intel_gt *gt,
0256 intel_engine_mask_t engine_mask, u32 dump_flags);
0257
0258 struct i915_gpu_coredump *
0259 i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp);
0260
0261 struct intel_gt_coredump *
0262 intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp, u32 dump_flags);
0263
0264 struct intel_engine_coredump *
0265 intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp, u32 dump_flags);
0266
0267 struct intel_engine_capture_vma *
0268 intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
0269 struct i915_request *rq,
0270 gfp_t gfp);
0271
0272 void intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
0273 struct intel_engine_capture_vma *capture,
0274 struct i915_vma_compress *compress);
0275
0276 struct i915_vma_compress *
0277 i915_vma_capture_prepare(struct intel_gt_coredump *gt);
0278
0279 void i915_vma_capture_finish(struct intel_gt_coredump *gt,
0280 struct i915_vma_compress *compress);
0281
0282 void i915_error_state_store(struct i915_gpu_coredump *error);
0283
0284 static inline struct i915_gpu_coredump *
0285 i915_gpu_coredump_get(struct i915_gpu_coredump *gpu)
0286 {
0287 kref_get(&gpu->ref);
0288 return gpu;
0289 }
0290
0291 ssize_t
0292 i915_gpu_coredump_copy_to_buffer(struct i915_gpu_coredump *error,
0293 char *buf, loff_t offset, size_t count);
0294
0295 void __i915_gpu_coredump_free(struct kref *kref);
0296 static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu)
0297 {
0298 if (gpu)
0299 kref_put(&gpu->ref, __i915_gpu_coredump_free);
0300 }
0301
0302 struct i915_gpu_coredump *i915_first_error_state(struct drm_i915_private *i915);
0303 void i915_reset_error_state(struct drm_i915_private *i915);
0304 void i915_disable_error_state(struct drm_i915_private *i915, int err);
0305
0306 #else
0307
0308 __printf(2, 3)
0309 static inline void
0310 i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
0311 {
0312 }
0313
0314 static inline void
0315 i915_capture_error_state(struct intel_gt *gt, intel_engine_mask_t engine_mask, u32 dump_flags)
0316 {
0317 }
0318
0319 static inline struct i915_gpu_coredump *
0320 i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp)
0321 {
0322 return NULL;
0323 }
0324
0325 static inline struct intel_gt_coredump *
0326 intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp, u32 dump_flags)
0327 {
0328 return NULL;
0329 }
0330
0331 static inline struct intel_engine_coredump *
0332 intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp, u32 dump_flags)
0333 {
0334 return NULL;
0335 }
0336
0337 static inline struct intel_engine_capture_vma *
0338 intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
0339 struct i915_request *rq,
0340 gfp_t gfp)
0341 {
0342 return NULL;
0343 }
0344
0345 static inline void
0346 intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
0347 struct intel_engine_capture_vma *capture,
0348 struct i915_vma_compress *compress)
0349 {
0350 }
0351
0352 static inline struct i915_vma_compress *
0353 i915_vma_capture_prepare(struct intel_gt_coredump *gt)
0354 {
0355 return NULL;
0356 }
0357
0358 static inline void
0359 i915_vma_capture_finish(struct intel_gt_coredump *gt,
0360 struct i915_vma_compress *compress)
0361 {
0362 }
0363
0364 static inline void
0365 i915_error_state_store(struct i915_gpu_coredump *error)
0366 {
0367 }
0368
0369 static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu)
0370 {
0371 }
0372
0373 static inline struct i915_gpu_coredump *
0374 i915_first_error_state(struct drm_i915_private *i915)
0375 {
0376 return ERR_PTR(-ENODEV);
0377 }
0378
0379 static inline void i915_reset_error_state(struct drm_i915_private *i915)
0380 {
0381 }
0382
0383 static inline void i915_disable_error_state(struct drm_i915_private *i915,
0384 int err)
0385 {
0386 }
0387
0388 #endif
0389
0390 #endif