0001
0002
0003
0004
0005
0006 #include <linux/sort.h>
0007
0008 #include "i915_drv.h"
0009
0010 #include "intel_gt_requests.h"
0011 #include "i915_selftest.h"
0012 #include "selftest_engine_heartbeat.h"
0013
0014 static void reset_heartbeat(struct intel_engine_cs *engine)
0015 {
0016 intel_engine_set_heartbeat(engine,
0017 engine->defaults.heartbeat_interval_ms);
0018 }
0019
0020 static int timeline_sync(struct intel_timeline *tl)
0021 {
0022 struct dma_fence *fence;
0023 long timeout;
0024
0025 fence = i915_active_fence_get(&tl->last_request);
0026 if (!fence)
0027 return 0;
0028
0029 timeout = dma_fence_wait_timeout(fence, true, HZ / 2);
0030 dma_fence_put(fence);
0031 if (timeout < 0)
0032 return timeout;
0033
0034 return 0;
0035 }
0036
0037 static int engine_sync_barrier(struct intel_engine_cs *engine)
0038 {
0039 return timeline_sync(engine->kernel_context->timeline);
0040 }
0041
0042 struct pulse {
0043 struct i915_active active;
0044 struct kref kref;
0045 };
0046
0047 static int pulse_active(struct i915_active *active)
0048 {
0049 kref_get(&container_of(active, struct pulse, active)->kref);
0050 return 0;
0051 }
0052
0053 static void pulse_free(struct kref *kref)
0054 {
0055 struct pulse *p = container_of(kref, typeof(*p), kref);
0056
0057 i915_active_fini(&p->active);
0058 kfree(p);
0059 }
0060
0061 static void pulse_put(struct pulse *p)
0062 {
0063 kref_put(&p->kref, pulse_free);
0064 }
0065
0066 static void pulse_retire(struct i915_active *active)
0067 {
0068 pulse_put(container_of(active, struct pulse, active));
0069 }
0070
0071 static struct pulse *pulse_create(void)
0072 {
0073 struct pulse *p;
0074
0075 p = kmalloc(sizeof(*p), GFP_KERNEL);
0076 if (!p)
0077 return p;
0078
0079 kref_init(&p->kref);
0080 i915_active_init(&p->active, pulse_active, pulse_retire, 0);
0081
0082 return p;
0083 }
0084
0085 static void pulse_unlock_wait(struct pulse *p)
0086 {
0087 i915_active_unlock_wait(&p->active);
0088 }
0089
0090 static int __live_idle_pulse(struct intel_engine_cs *engine,
0091 int (*fn)(struct intel_engine_cs *cs))
0092 {
0093 struct pulse *p;
0094 int err;
0095
0096 GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
0097
0098 p = pulse_create();
0099 if (!p)
0100 return -ENOMEM;
0101
0102 err = i915_active_acquire(&p->active);
0103 if (err)
0104 goto out;
0105
0106 err = i915_active_acquire_preallocate_barrier(&p->active, engine);
0107 if (err) {
0108 i915_active_release(&p->active);
0109 goto out;
0110 }
0111
0112 i915_active_acquire_barrier(&p->active);
0113 i915_active_release(&p->active);
0114
0115 GEM_BUG_ON(i915_active_is_idle(&p->active));
0116 GEM_BUG_ON(llist_empty(&engine->barrier_tasks));
0117
0118 err = fn(engine);
0119 if (err)
0120 goto out;
0121
0122 GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
0123
0124 if (engine_sync_barrier(engine)) {
0125 struct drm_printer m = drm_err_printer("pulse");
0126
0127 pr_err("%s: no heartbeat pulse?\n", engine->name);
0128 intel_engine_dump(engine, &m, "%s", engine->name);
0129
0130 err = -ETIME;
0131 goto out;
0132 }
0133
0134 GEM_BUG_ON(READ_ONCE(engine->serial) != engine->wakeref_serial);
0135
0136 pulse_unlock_wait(p);
0137
0138 if (!i915_active_is_idle(&p->active)) {
0139 struct drm_printer m = drm_err_printer("pulse");
0140
0141 pr_err("%s: heartbeat pulse did not flush idle tasks\n",
0142 engine->name);
0143 i915_active_print(&p->active, &m);
0144
0145 err = -EINVAL;
0146 goto out;
0147 }
0148
0149 out:
0150 pulse_put(p);
0151 return err;
0152 }
0153
0154 static int live_idle_flush(void *arg)
0155 {
0156 struct intel_gt *gt = arg;
0157 struct intel_engine_cs *engine;
0158 enum intel_engine_id id;
0159 int err = 0;
0160
0161
0162
0163 for_each_engine(engine, gt, id) {
0164 st_engine_heartbeat_disable(engine);
0165 err = __live_idle_pulse(engine, intel_engine_flush_barriers);
0166 st_engine_heartbeat_enable(engine);
0167 if (err)
0168 break;
0169 }
0170
0171 return err;
0172 }
0173
0174 static int live_idle_pulse(void *arg)
0175 {
0176 struct intel_gt *gt = arg;
0177 struct intel_engine_cs *engine;
0178 enum intel_engine_id id;
0179 int err = 0;
0180
0181
0182
0183 for_each_engine(engine, gt, id) {
0184 st_engine_heartbeat_disable(engine);
0185 err = __live_idle_pulse(engine, intel_engine_pulse);
0186 st_engine_heartbeat_enable(engine);
0187 if (err && err != -ENODEV)
0188 break;
0189
0190 err = 0;
0191 }
0192
0193 return err;
0194 }
0195
0196 static int cmp_u32(const void *_a, const void *_b)
0197 {
0198 const u32 *a = _a, *b = _b;
0199
0200 return *a - *b;
0201 }
0202
0203 static int __live_heartbeat_fast(struct intel_engine_cs *engine)
0204 {
0205 const unsigned int error_threshold = max(20000u, jiffies_to_usecs(6));
0206 struct intel_context *ce;
0207 struct i915_request *rq;
0208 ktime_t t0, t1;
0209 u32 times[5];
0210 int err;
0211 int i;
0212
0213 ce = intel_context_create(engine);
0214 if (IS_ERR(ce))
0215 return PTR_ERR(ce);
0216
0217 intel_engine_pm_get(engine);
0218
0219 err = intel_engine_set_heartbeat(engine, 1);
0220 if (err)
0221 goto err_pm;
0222
0223 for (i = 0; i < ARRAY_SIZE(times); i++) {
0224 do {
0225
0226 intel_engine_park_heartbeat(engine);
0227 GEM_BUG_ON(engine->heartbeat.systole);
0228 engine->serial++;
0229 intel_engine_unpark_heartbeat(engine);
0230
0231 flush_delayed_work(&engine->heartbeat.work);
0232 if (!delayed_work_pending(&engine->heartbeat.work)) {
0233 pr_err("%s: heartbeat %d did not start\n",
0234 engine->name, i);
0235 err = -EINVAL;
0236 goto err_pm;
0237 }
0238
0239 rcu_read_lock();
0240 rq = READ_ONCE(engine->heartbeat.systole);
0241 if (rq)
0242 rq = i915_request_get_rcu(rq);
0243 rcu_read_unlock();
0244 } while (!rq);
0245
0246 t0 = ktime_get();
0247 while (rq == READ_ONCE(engine->heartbeat.systole))
0248 yield();
0249 t1 = ktime_get();
0250
0251 i915_request_put(rq);
0252 times[i] = ktime_us_delta(t1, t0);
0253 }
0254
0255 sort(times, ARRAY_SIZE(times), sizeof(times[0]), cmp_u32, NULL);
0256
0257 pr_info("%s: Heartbeat delay: %uus [%u, %u]\n",
0258 engine->name,
0259 times[ARRAY_SIZE(times) / 2],
0260 times[0],
0261 times[ARRAY_SIZE(times) - 1]);
0262
0263
0264
0265
0266
0267
0268
0269
0270 if (times[ARRAY_SIZE(times) / 2] > error_threshold) {
0271 pr_err("%s: Heartbeat delay was %uus, expected less than %dus\n",
0272 engine->name,
0273 times[ARRAY_SIZE(times) / 2],
0274 error_threshold);
0275 err = -EINVAL;
0276 }
0277
0278 reset_heartbeat(engine);
0279 err_pm:
0280 intel_engine_pm_put(engine);
0281 intel_context_put(ce);
0282 return err;
0283 }
0284
0285 static int live_heartbeat_fast(void *arg)
0286 {
0287 struct intel_gt *gt = arg;
0288 struct intel_engine_cs *engine;
0289 enum intel_engine_id id;
0290 int err = 0;
0291
0292
0293 if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL)
0294 return 0;
0295
0296 for_each_engine(engine, gt, id) {
0297 err = __live_heartbeat_fast(engine);
0298 if (err)
0299 break;
0300 }
0301
0302 return err;
0303 }
0304
0305 static int __live_heartbeat_off(struct intel_engine_cs *engine)
0306 {
0307 int err;
0308
0309 intel_engine_pm_get(engine);
0310
0311 engine->serial++;
0312 flush_delayed_work(&engine->heartbeat.work);
0313 if (!delayed_work_pending(&engine->heartbeat.work)) {
0314 pr_err("%s: heartbeat not running\n",
0315 engine->name);
0316 err = -EINVAL;
0317 goto err_pm;
0318 }
0319
0320 err = intel_engine_set_heartbeat(engine, 0);
0321 if (err)
0322 goto err_pm;
0323
0324 engine->serial++;
0325 flush_delayed_work(&engine->heartbeat.work);
0326 if (delayed_work_pending(&engine->heartbeat.work)) {
0327 pr_err("%s: heartbeat still running\n",
0328 engine->name);
0329 err = -EINVAL;
0330 goto err_beat;
0331 }
0332
0333 if (READ_ONCE(engine->heartbeat.systole)) {
0334 pr_err("%s: heartbeat still allocated\n",
0335 engine->name);
0336 err = -EINVAL;
0337 goto err_beat;
0338 }
0339
0340 err_beat:
0341 reset_heartbeat(engine);
0342 err_pm:
0343 intel_engine_pm_put(engine);
0344 return err;
0345 }
0346
0347 static int live_heartbeat_off(void *arg)
0348 {
0349 struct intel_gt *gt = arg;
0350 struct intel_engine_cs *engine;
0351 enum intel_engine_id id;
0352 int err = 0;
0353
0354
0355 if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL)
0356 return 0;
0357
0358 for_each_engine(engine, gt, id) {
0359 if (!intel_engine_has_preemption(engine))
0360 continue;
0361
0362 err = __live_heartbeat_off(engine);
0363 if (err)
0364 break;
0365 }
0366
0367 return err;
0368 }
0369
0370 int intel_heartbeat_live_selftests(struct drm_i915_private *i915)
0371 {
0372 static const struct i915_subtest tests[] = {
0373 SUBTEST(live_idle_flush),
0374 SUBTEST(live_idle_pulse),
0375 SUBTEST(live_heartbeat_fast),
0376 SUBTEST(live_heartbeat_off),
0377 };
0378 int saved_hangcheck;
0379 int err;
0380
0381 if (intel_gt_is_wedged(to_gt(i915)))
0382 return 0;
0383
0384 saved_hangcheck = i915->params.enable_hangcheck;
0385 i915->params.enable_hangcheck = INT_MAX;
0386
0387 err = intel_gt_live_subtests(tests, to_gt(i915));
0388
0389 i915->params.enable_hangcheck = saved_hangcheck;
0390 return err;
0391 }
0392
0393 void st_engine_heartbeat_disable(struct intel_engine_cs *engine)
0394 {
0395 engine->props.heartbeat_interval_ms = 0;
0396
0397 intel_engine_pm_get(engine);
0398 intel_engine_park_heartbeat(engine);
0399 }
0400
0401 void st_engine_heartbeat_enable(struct intel_engine_cs *engine)
0402 {
0403 intel_engine_pm_put(engine);
0404
0405 engine->props.heartbeat_interval_ms =
0406 engine->defaults.heartbeat_interval_ms;
0407 }
0408
0409 void st_engine_heartbeat_disable_no_pm(struct intel_engine_cs *engine)
0410 {
0411 engine->props.heartbeat_interval_ms = 0;
0412
0413
0414
0415
0416
0417
0418
0419 if (intel_engine_pm_get_if_awake(engine)) {
0420 intel_engine_park_heartbeat(engine);
0421 intel_engine_pm_put(engine);
0422 }
0423 }
0424
0425 void st_engine_heartbeat_enable_no_pm(struct intel_engine_cs *engine)
0426 {
0427 engine->props.heartbeat_interval_ms =
0428 engine->defaults.heartbeat_interval_ms;
0429 }