Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: MIT
0002 /*
0003  * Copyright © 2018 Intel Corporation
0004  */
0005 
0006 #include <linux/sort.h>
0007 
0008 #include "i915_drv.h"
0009 
0010 #include "intel_gt_requests.h"
0011 #include "i915_selftest.h"
0012 #include "selftest_engine_heartbeat.h"
0013 
0014 static void reset_heartbeat(struct intel_engine_cs *engine)
0015 {
0016     intel_engine_set_heartbeat(engine,
0017                    engine->defaults.heartbeat_interval_ms);
0018 }
0019 
0020 static int timeline_sync(struct intel_timeline *tl)
0021 {
0022     struct dma_fence *fence;
0023     long timeout;
0024 
0025     fence = i915_active_fence_get(&tl->last_request);
0026     if (!fence)
0027         return 0;
0028 
0029     timeout = dma_fence_wait_timeout(fence, true, HZ / 2);
0030     dma_fence_put(fence);
0031     if (timeout < 0)
0032         return timeout;
0033 
0034     return 0;
0035 }
0036 
0037 static int engine_sync_barrier(struct intel_engine_cs *engine)
0038 {
0039     return timeline_sync(engine->kernel_context->timeline);
0040 }
0041 
0042 struct pulse {
0043     struct i915_active active;
0044     struct kref kref;
0045 };
0046 
0047 static int pulse_active(struct i915_active *active)
0048 {
0049     kref_get(&container_of(active, struct pulse, active)->kref);
0050     return 0;
0051 }
0052 
0053 static void pulse_free(struct kref *kref)
0054 {
0055     struct pulse *p = container_of(kref, typeof(*p), kref);
0056 
0057     i915_active_fini(&p->active);
0058     kfree(p);
0059 }
0060 
0061 static void pulse_put(struct pulse *p)
0062 {
0063     kref_put(&p->kref, pulse_free);
0064 }
0065 
0066 static void pulse_retire(struct i915_active *active)
0067 {
0068     pulse_put(container_of(active, struct pulse, active));
0069 }
0070 
0071 static struct pulse *pulse_create(void)
0072 {
0073     struct pulse *p;
0074 
0075     p = kmalloc(sizeof(*p), GFP_KERNEL);
0076     if (!p)
0077         return p;
0078 
0079     kref_init(&p->kref);
0080     i915_active_init(&p->active, pulse_active, pulse_retire, 0);
0081 
0082     return p;
0083 }
0084 
0085 static void pulse_unlock_wait(struct pulse *p)
0086 {
0087     i915_active_unlock_wait(&p->active);
0088 }
0089 
0090 static int __live_idle_pulse(struct intel_engine_cs *engine,
0091                  int (*fn)(struct intel_engine_cs *cs))
0092 {
0093     struct pulse *p;
0094     int err;
0095 
0096     GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
0097 
0098     p = pulse_create();
0099     if (!p)
0100         return -ENOMEM;
0101 
0102     err = i915_active_acquire(&p->active);
0103     if (err)
0104         goto out;
0105 
0106     err = i915_active_acquire_preallocate_barrier(&p->active, engine);
0107     if (err) {
0108         i915_active_release(&p->active);
0109         goto out;
0110     }
0111 
0112     i915_active_acquire_barrier(&p->active);
0113     i915_active_release(&p->active);
0114 
0115     GEM_BUG_ON(i915_active_is_idle(&p->active));
0116     GEM_BUG_ON(llist_empty(&engine->barrier_tasks));
0117 
0118     err = fn(engine);
0119     if (err)
0120         goto out;
0121 
0122     GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
0123 
0124     if (engine_sync_barrier(engine)) {
0125         struct drm_printer m = drm_err_printer("pulse");
0126 
0127         pr_err("%s: no heartbeat pulse?\n", engine->name);
0128         intel_engine_dump(engine, &m, "%s", engine->name);
0129 
0130         err = -ETIME;
0131         goto out;
0132     }
0133 
0134     GEM_BUG_ON(READ_ONCE(engine->serial) != engine->wakeref_serial);
0135 
0136     pulse_unlock_wait(p); /* synchronize with the retirement callback */
0137 
0138     if (!i915_active_is_idle(&p->active)) {
0139         struct drm_printer m = drm_err_printer("pulse");
0140 
0141         pr_err("%s: heartbeat pulse did not flush idle tasks\n",
0142                engine->name);
0143         i915_active_print(&p->active, &m);
0144 
0145         err = -EINVAL;
0146         goto out;
0147     }
0148 
0149 out:
0150     pulse_put(p);
0151     return err;
0152 }
0153 
0154 static int live_idle_flush(void *arg)
0155 {
0156     struct intel_gt *gt = arg;
0157     struct intel_engine_cs *engine;
0158     enum intel_engine_id id;
0159     int err = 0;
0160 
0161     /* Check that we can flush the idle barriers */
0162 
0163     for_each_engine(engine, gt, id) {
0164         st_engine_heartbeat_disable(engine);
0165         err = __live_idle_pulse(engine, intel_engine_flush_barriers);
0166         st_engine_heartbeat_enable(engine);
0167         if (err)
0168             break;
0169     }
0170 
0171     return err;
0172 }
0173 
0174 static int live_idle_pulse(void *arg)
0175 {
0176     struct intel_gt *gt = arg;
0177     struct intel_engine_cs *engine;
0178     enum intel_engine_id id;
0179     int err = 0;
0180 
0181     /* Check that heartbeat pulses flush the idle barriers */
0182 
0183     for_each_engine(engine, gt, id) {
0184         st_engine_heartbeat_disable(engine);
0185         err = __live_idle_pulse(engine, intel_engine_pulse);
0186         st_engine_heartbeat_enable(engine);
0187         if (err && err != -ENODEV)
0188             break;
0189 
0190         err = 0;
0191     }
0192 
0193     return err;
0194 }
0195 
0196 static int cmp_u32(const void *_a, const void *_b)
0197 {
0198     const u32 *a = _a, *b = _b;
0199 
0200     return *a - *b;
0201 }
0202 
0203 static int __live_heartbeat_fast(struct intel_engine_cs *engine)
0204 {
0205     const unsigned int error_threshold = max(20000u, jiffies_to_usecs(6));
0206     struct intel_context *ce;
0207     struct i915_request *rq;
0208     ktime_t t0, t1;
0209     u32 times[5];
0210     int err;
0211     int i;
0212 
0213     ce = intel_context_create(engine);
0214     if (IS_ERR(ce))
0215         return PTR_ERR(ce);
0216 
0217     intel_engine_pm_get(engine);
0218 
0219     err = intel_engine_set_heartbeat(engine, 1);
0220     if (err)
0221         goto err_pm;
0222 
0223     for (i = 0; i < ARRAY_SIZE(times); i++) {
0224         do {
0225             /* Manufacture a tick */
0226             intel_engine_park_heartbeat(engine);
0227             GEM_BUG_ON(engine->heartbeat.systole);
0228             engine->serial++; /*  pretend we are not idle! */
0229             intel_engine_unpark_heartbeat(engine);
0230 
0231             flush_delayed_work(&engine->heartbeat.work);
0232             if (!delayed_work_pending(&engine->heartbeat.work)) {
0233                 pr_err("%s: heartbeat %d did not start\n",
0234                        engine->name, i);
0235                 err = -EINVAL;
0236                 goto err_pm;
0237             }
0238 
0239             rcu_read_lock();
0240             rq = READ_ONCE(engine->heartbeat.systole);
0241             if (rq)
0242                 rq = i915_request_get_rcu(rq);
0243             rcu_read_unlock();
0244         } while (!rq);
0245 
0246         t0 = ktime_get();
0247         while (rq == READ_ONCE(engine->heartbeat.systole))
0248             yield(); /* work is on the local cpu! */
0249         t1 = ktime_get();
0250 
0251         i915_request_put(rq);
0252         times[i] = ktime_us_delta(t1, t0);
0253     }
0254 
0255     sort(times, ARRAY_SIZE(times), sizeof(times[0]), cmp_u32, NULL);
0256 
0257     pr_info("%s: Heartbeat delay: %uus [%u, %u]\n",
0258         engine->name,
0259         times[ARRAY_SIZE(times) / 2],
0260         times[0],
0261         times[ARRAY_SIZE(times) - 1]);
0262 
0263     /*
0264      * Ideally, the upper bound on min work delay would be something like
0265      * 2 * 2 (worst), +1 for scheduling, +1 for slack. In practice, we
0266      * are, even with system_wq_highpri, at the mercy of the CPU scheduler
0267      * and may be stuck behind some slow work for many millisecond. Such
0268      * as our very own display workers.
0269      */
0270     if (times[ARRAY_SIZE(times) / 2] > error_threshold) {
0271         pr_err("%s: Heartbeat delay was %uus, expected less than %dus\n",
0272                engine->name,
0273                times[ARRAY_SIZE(times) / 2],
0274                error_threshold);
0275         err = -EINVAL;
0276     }
0277 
0278     reset_heartbeat(engine);
0279 err_pm:
0280     intel_engine_pm_put(engine);
0281     intel_context_put(ce);
0282     return err;
0283 }
0284 
0285 static int live_heartbeat_fast(void *arg)
0286 {
0287     struct intel_gt *gt = arg;
0288     struct intel_engine_cs *engine;
0289     enum intel_engine_id id;
0290     int err = 0;
0291 
0292     /* Check that the heartbeat ticks at the desired rate. */
0293     if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL)
0294         return 0;
0295 
0296     for_each_engine(engine, gt, id) {
0297         err = __live_heartbeat_fast(engine);
0298         if (err)
0299             break;
0300     }
0301 
0302     return err;
0303 }
0304 
0305 static int __live_heartbeat_off(struct intel_engine_cs *engine)
0306 {
0307     int err;
0308 
0309     intel_engine_pm_get(engine);
0310 
0311     engine->serial++;
0312     flush_delayed_work(&engine->heartbeat.work);
0313     if (!delayed_work_pending(&engine->heartbeat.work)) {
0314         pr_err("%s: heartbeat not running\n",
0315                engine->name);
0316         err = -EINVAL;
0317         goto err_pm;
0318     }
0319 
0320     err = intel_engine_set_heartbeat(engine, 0);
0321     if (err)
0322         goto err_pm;
0323 
0324     engine->serial++;
0325     flush_delayed_work(&engine->heartbeat.work);
0326     if (delayed_work_pending(&engine->heartbeat.work)) {
0327         pr_err("%s: heartbeat still running\n",
0328                engine->name);
0329         err = -EINVAL;
0330         goto err_beat;
0331     }
0332 
0333     if (READ_ONCE(engine->heartbeat.systole)) {
0334         pr_err("%s: heartbeat still allocated\n",
0335                engine->name);
0336         err = -EINVAL;
0337         goto err_beat;
0338     }
0339 
0340 err_beat:
0341     reset_heartbeat(engine);
0342 err_pm:
0343     intel_engine_pm_put(engine);
0344     return err;
0345 }
0346 
0347 static int live_heartbeat_off(void *arg)
0348 {
0349     struct intel_gt *gt = arg;
0350     struct intel_engine_cs *engine;
0351     enum intel_engine_id id;
0352     int err = 0;
0353 
0354     /* Check that we can turn off heartbeat and not interrupt VIP */
0355     if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL)
0356         return 0;
0357 
0358     for_each_engine(engine, gt, id) {
0359         if (!intel_engine_has_preemption(engine))
0360             continue;
0361 
0362         err = __live_heartbeat_off(engine);
0363         if (err)
0364             break;
0365     }
0366 
0367     return err;
0368 }
0369 
0370 int intel_heartbeat_live_selftests(struct drm_i915_private *i915)
0371 {
0372     static const struct i915_subtest tests[] = {
0373         SUBTEST(live_idle_flush),
0374         SUBTEST(live_idle_pulse),
0375         SUBTEST(live_heartbeat_fast),
0376         SUBTEST(live_heartbeat_off),
0377     };
0378     int saved_hangcheck;
0379     int err;
0380 
0381     if (intel_gt_is_wedged(to_gt(i915)))
0382         return 0;
0383 
0384     saved_hangcheck = i915->params.enable_hangcheck;
0385     i915->params.enable_hangcheck = INT_MAX;
0386 
0387     err = intel_gt_live_subtests(tests, to_gt(i915));
0388 
0389     i915->params.enable_hangcheck = saved_hangcheck;
0390     return err;
0391 }
0392 
0393 void st_engine_heartbeat_disable(struct intel_engine_cs *engine)
0394 {
0395     engine->props.heartbeat_interval_ms = 0;
0396 
0397     intel_engine_pm_get(engine);
0398     intel_engine_park_heartbeat(engine);
0399 }
0400 
0401 void st_engine_heartbeat_enable(struct intel_engine_cs *engine)
0402 {
0403     intel_engine_pm_put(engine);
0404 
0405     engine->props.heartbeat_interval_ms =
0406         engine->defaults.heartbeat_interval_ms;
0407 }
0408 
0409 void st_engine_heartbeat_disable_no_pm(struct intel_engine_cs *engine)
0410 {
0411     engine->props.heartbeat_interval_ms = 0;
0412 
0413     /*
0414      * Park the heartbeat but without holding the PM lock as that
0415      * makes the engines appear not-idle. Note that if/when unpark
0416      * is called due to the PM lock being acquired later the
0417      * heartbeat still won't be enabled because of the above = 0.
0418      */
0419     if (intel_engine_pm_get_if_awake(engine)) {
0420         intel_engine_park_heartbeat(engine);
0421         intel_engine_pm_put(engine);
0422     }
0423 }
0424 
0425 void st_engine_heartbeat_enable_no_pm(struct intel_engine_cs *engine)
0426 {
0427     engine->props.heartbeat_interval_ms =
0428         engine->defaults.heartbeat_interval_ms;
0429 }