0001
0002
0003
0004
0005
0006 #include "i915_drv.h"
0007 #include "i915_request.h"
0008
0009 #include "intel_context.h"
0010 #include "intel_engine_heartbeat.h"
0011 #include "intel_engine_pm.h"
0012 #include "intel_engine.h"
0013 #include "intel_gt.h"
0014 #include "intel_reset.h"
0015
0016
0017
0018
0019
0020
0021
0022
0023 static bool next_heartbeat(struct intel_engine_cs *engine)
0024 {
0025 long delay;
0026
0027 delay = READ_ONCE(engine->props.heartbeat_interval_ms);
0028 if (!delay)
0029 return false;
0030
0031 delay = msecs_to_jiffies_timeout(delay);
0032 if (delay >= HZ)
0033 delay = round_jiffies_up_relative(delay);
0034 mod_delayed_work(system_highpri_wq, &engine->heartbeat.work, delay + 1);
0035
0036 return true;
0037 }
0038
0039 static struct i915_request *
0040 heartbeat_create(struct intel_context *ce, gfp_t gfp)
0041 {
0042 struct i915_request *rq;
0043
0044 intel_context_enter(ce);
0045 rq = __i915_request_create(ce, gfp);
0046 intel_context_exit(ce);
0047
0048 return rq;
0049 }
0050
0051 static void idle_pulse(struct intel_engine_cs *engine, struct i915_request *rq)
0052 {
0053 engine->wakeref_serial = READ_ONCE(engine->serial) + 1;
0054 i915_request_add_active_barriers(rq);
0055 if (!engine->heartbeat.systole && intel_engine_has_heartbeat(engine))
0056 engine->heartbeat.systole = i915_request_get(rq);
0057 }
0058
0059 static void heartbeat_commit(struct i915_request *rq,
0060 const struct i915_sched_attr *attr)
0061 {
0062 idle_pulse(rq->engine, rq);
0063
0064 __i915_request_commit(rq);
0065 __i915_request_queue(rq, attr);
0066 }
0067
0068 static void show_heartbeat(const struct i915_request *rq,
0069 struct intel_engine_cs *engine)
0070 {
0071 struct drm_printer p = drm_debug_printer("heartbeat");
0072
0073 if (!rq) {
0074 intel_engine_dump(engine, &p,
0075 "%s heartbeat not ticking\n",
0076 engine->name);
0077 } else {
0078 intel_engine_dump(engine, &p,
0079 "%s heartbeat {seqno:%llx:%lld, prio:%d} not ticking\n",
0080 engine->name,
0081 rq->fence.context,
0082 rq->fence.seqno,
0083 rq->sched.attr.priority);
0084 }
0085 }
0086
0087 static void
0088 reset_engine(struct intel_engine_cs *engine, struct i915_request *rq)
0089 {
0090 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
0091 show_heartbeat(rq, engine);
0092
0093 if (intel_engine_uses_guc(engine))
0094
0095
0096
0097
0098
0099 intel_guc_find_hung_context(engine);
0100
0101 intel_gt_handle_error(engine->gt, engine->mask,
0102 I915_ERROR_CAPTURE,
0103 "stopped heartbeat on %s",
0104 engine->name);
0105 }
0106
0107 static void heartbeat(struct work_struct *wrk)
0108 {
0109 struct i915_sched_attr attr = { .priority = I915_PRIORITY_MIN };
0110 struct intel_engine_cs *engine =
0111 container_of(wrk, typeof(*engine), heartbeat.work.work);
0112 struct intel_context *ce = engine->kernel_context;
0113 struct i915_request *rq;
0114 unsigned long serial;
0115
0116
0117 intel_engine_flush_submission(engine);
0118
0119 rq = engine->heartbeat.systole;
0120 if (rq && i915_request_completed(rq)) {
0121 i915_request_put(rq);
0122 engine->heartbeat.systole = NULL;
0123 }
0124
0125 if (!intel_engine_pm_get_if_awake(engine))
0126 return;
0127
0128 if (intel_gt_is_wedged(engine->gt))
0129 goto out;
0130
0131 if (i915_sched_engine_disabled(engine->sched_engine)) {
0132 reset_engine(engine, engine->heartbeat.systole);
0133 goto out;
0134 }
0135
0136 if (engine->heartbeat.systole) {
0137 long delay = READ_ONCE(engine->props.heartbeat_interval_ms);
0138
0139
0140 if (!time_after(jiffies,
0141 rq->emitted_jiffies + msecs_to_jiffies(delay)))
0142 goto out;
0143
0144 if (!i915_sw_fence_signaled(&rq->submit)) {
0145
0146
0147
0148
0149
0150
0151
0152
0153
0154
0155 } else if (engine->sched_engine->schedule &&
0156 rq->sched.attr.priority < I915_PRIORITY_BARRIER) {
0157
0158
0159
0160
0161
0162
0163 attr.priority = 0;
0164 if (rq->sched.attr.priority >= attr.priority)
0165 attr.priority = I915_PRIORITY_HEARTBEAT;
0166 if (rq->sched.attr.priority >= attr.priority)
0167 attr.priority = I915_PRIORITY_BARRIER;
0168
0169 local_bh_disable();
0170 engine->sched_engine->schedule(rq, &attr);
0171 local_bh_enable();
0172 } else {
0173 reset_engine(engine, rq);
0174 }
0175
0176 rq->emitted_jiffies = jiffies;
0177 goto out;
0178 }
0179
0180 serial = READ_ONCE(engine->serial);
0181 if (engine->wakeref_serial == serial)
0182 goto out;
0183
0184 if (!mutex_trylock(&ce->timeline->mutex)) {
0185
0186 if (xchg(&engine->heartbeat.blocked, serial) == serial)
0187 intel_gt_handle_error(engine->gt, engine->mask,
0188 I915_ERROR_CAPTURE,
0189 "no heartbeat on %s",
0190 engine->name);
0191 goto out;
0192 }
0193
0194 rq = heartbeat_create(ce, GFP_NOWAIT | __GFP_NOWARN);
0195 if (IS_ERR(rq))
0196 goto unlock;
0197
0198 heartbeat_commit(rq, &attr);
0199
0200 unlock:
0201 mutex_unlock(&ce->timeline->mutex);
0202 out:
0203 if (!engine->i915->params.enable_hangcheck || !next_heartbeat(engine))
0204 i915_request_put(fetch_and_zero(&engine->heartbeat.systole));
0205 intel_engine_pm_put(engine);
0206 }
0207
0208 void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine)
0209 {
0210 if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL)
0211 return;
0212
0213 next_heartbeat(engine);
0214 }
0215
0216 void intel_engine_park_heartbeat(struct intel_engine_cs *engine)
0217 {
0218 if (cancel_delayed_work(&engine->heartbeat.work))
0219 i915_request_put(fetch_and_zero(&engine->heartbeat.systole));
0220 }
0221
0222 void intel_gt_unpark_heartbeats(struct intel_gt *gt)
0223 {
0224 struct intel_engine_cs *engine;
0225 enum intel_engine_id id;
0226
0227 for_each_engine(engine, gt, id)
0228 if (intel_engine_pm_is_awake(engine))
0229 intel_engine_unpark_heartbeat(engine);
0230 }
0231
0232 void intel_gt_park_heartbeats(struct intel_gt *gt)
0233 {
0234 struct intel_engine_cs *engine;
0235 enum intel_engine_id id;
0236
0237 for_each_engine(engine, gt, id)
0238 intel_engine_park_heartbeat(engine);
0239 }
0240
0241 void intel_engine_init_heartbeat(struct intel_engine_cs *engine)
0242 {
0243 INIT_DELAYED_WORK(&engine->heartbeat.work, heartbeat);
0244 }
0245
0246 static int __intel_engine_pulse(struct intel_engine_cs *engine)
0247 {
0248 struct i915_sched_attr attr = { .priority = I915_PRIORITY_BARRIER };
0249 struct intel_context *ce = engine->kernel_context;
0250 struct i915_request *rq;
0251
0252 lockdep_assert_held(&ce->timeline->mutex);
0253 GEM_BUG_ON(!intel_engine_has_preemption(engine));
0254 GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
0255
0256 rq = heartbeat_create(ce, GFP_NOWAIT | __GFP_NOWARN);
0257 if (IS_ERR(rq))
0258 return PTR_ERR(rq);
0259
0260 __set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags);
0261
0262 heartbeat_commit(rq, &attr);
0263 GEM_BUG_ON(rq->sched.attr.priority < I915_PRIORITY_BARRIER);
0264
0265 return 0;
0266 }
0267
0268 static unsigned long set_heartbeat(struct intel_engine_cs *engine,
0269 unsigned long delay)
0270 {
0271 unsigned long old;
0272
0273 old = xchg(&engine->props.heartbeat_interval_ms, delay);
0274 if (delay)
0275 intel_engine_unpark_heartbeat(engine);
0276 else
0277 intel_engine_park_heartbeat(engine);
0278
0279 return old;
0280 }
0281
0282 int intel_engine_set_heartbeat(struct intel_engine_cs *engine,
0283 unsigned long delay)
0284 {
0285 struct intel_context *ce = engine->kernel_context;
0286 int err = 0;
0287
0288 if (!delay && !intel_engine_has_preempt_reset(engine))
0289 return -ENODEV;
0290
0291 intel_engine_pm_get(engine);
0292
0293 err = mutex_lock_interruptible(&ce->timeline->mutex);
0294 if (err)
0295 goto out_rpm;
0296
0297 if (delay != engine->props.heartbeat_interval_ms) {
0298 unsigned long saved = set_heartbeat(engine, delay);
0299
0300
0301 if (intel_engine_has_preemption(engine)) {
0302 err = __intel_engine_pulse(engine);
0303 if (err)
0304 set_heartbeat(engine, saved);
0305 }
0306 }
0307
0308 mutex_unlock(&ce->timeline->mutex);
0309
0310 out_rpm:
0311 intel_engine_pm_put(engine);
0312 return err;
0313 }
0314
0315 int intel_engine_pulse(struct intel_engine_cs *engine)
0316 {
0317 struct intel_context *ce = engine->kernel_context;
0318 int err;
0319
0320 if (!intel_engine_has_preemption(engine))
0321 return -ENODEV;
0322
0323 if (!intel_engine_pm_get_if_awake(engine))
0324 return 0;
0325
0326 err = -EINTR;
0327 if (!mutex_lock_interruptible(&ce->timeline->mutex)) {
0328 err = __intel_engine_pulse(engine);
0329 mutex_unlock(&ce->timeline->mutex);
0330 }
0331
0332 intel_engine_flush_submission(engine);
0333 intel_engine_pm_put(engine);
0334 return err;
0335 }
0336
0337 int intel_engine_flush_barriers(struct intel_engine_cs *engine)
0338 {
0339 struct i915_sched_attr attr = { .priority = I915_PRIORITY_MIN };
0340 struct intel_context *ce = engine->kernel_context;
0341 struct i915_request *rq;
0342 int err;
0343
0344 if (llist_empty(&engine->barrier_tasks))
0345 return 0;
0346
0347 if (!intel_engine_pm_get_if_awake(engine))
0348 return 0;
0349
0350 if (mutex_lock_interruptible(&ce->timeline->mutex)) {
0351 err = -EINTR;
0352 goto out_rpm;
0353 }
0354
0355 rq = heartbeat_create(ce, GFP_KERNEL);
0356 if (IS_ERR(rq)) {
0357 err = PTR_ERR(rq);
0358 goto out_unlock;
0359 }
0360
0361 heartbeat_commit(rq, &attr);
0362
0363 err = 0;
0364 out_unlock:
0365 mutex_unlock(&ce->timeline->mutex);
0366 out_rpm:
0367 intel_engine_pm_put(engine);
0368 return err;
0369 }
0370
0371 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
0372 #include "selftest_engine_heartbeat.c"
0373 #endif