i915/gt/selftest_engine_pm.c

0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * Copyright © 2018 Intel Corporation
0004  */
0005
0006 #include <linux/sort.h>
0007
0008 #include "i915_selftest.h"
0009 #include "intel_engine_regs.h"
0010 #include "intel_gpu_commands.h"
0011 #include "intel_gt_clock_utils.h"
0012 #include "selftest_engine.h"
0013 #include "selftest_engine_heartbeat.h"
0014 #include "selftests/igt_atomic.h"
0015 #include "selftests/igt_flush_test.h"
0016 #include "selftests/igt_spinner.h"
0017
0018 #define COUNT 5
0019
0020 static int cmp_u64(const void *A, const void *B)
0021 {
0022     const u64 *a = A, *b = B;
0023
0024     return *a - *b;
0025 }
0026
0027 static u64 trifilter(u64 *a)
0028 {
0029     sort(a, COUNT, sizeof(*a), cmp_u64, NULL);
0030     return (a[1] + 2 * a[2] + a[3]) >> 2;
0031 }
0032
0033 static u32 *emit_wait(u32 *cs, u32 offset, int op, u32 value)
0034 {
0035     *cs++ = MI_SEMAPHORE_WAIT |
0036         MI_SEMAPHORE_GLOBAL_GTT |
0037         MI_SEMAPHORE_POLL |
0038         op;
0039     *cs++ = value;
0040     *cs++ = offset;
0041     *cs++ = 0;
0042
0043     return cs;
0044 }
0045
0046 static u32 *emit_store(u32 *cs, u32 offset, u32 value)
0047 {
0048     *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
0049     *cs++ = offset;
0050     *cs++ = 0;
0051     *cs++ = value;
0052
0053     return cs;
0054 }
0055
0056 static u32 *emit_srm(u32 *cs, i915_reg_t reg, u32 offset)
0057 {
0058     *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
0059     *cs++ = i915_mmio_reg_offset(reg);
0060     *cs++ = offset;
0061     *cs++ = 0;
0062
0063     return cs;
0064 }
0065
0066 static void write_semaphore(u32 *x, u32 value)
0067 {
0068     WRITE_ONCE(*x, value);
0069     wmb();
0070 }
0071
0072 static int __measure_timestamps(struct intel_context *ce,
0073                 u64 *dt, u64 *d_ring, u64 *d_ctx)
0074 {
0075     struct intel_engine_cs *engine = ce->engine;
0076     u32 *sema = memset32(engine->status_page.addr + 1000, 0, 5);
0077     u32 offset = i915_ggtt_offset(engine->status_page.vma);
0078     struct i915_request *rq;
0079     u32 *cs;
0080
0081     rq = intel_context_create_request(ce);
0082     if (IS_ERR(rq))
0083         return PTR_ERR(rq);
0084
0085     cs = intel_ring_begin(rq, 28);
0086     if (IS_ERR(cs)) {
0087         i915_request_add(rq);
0088         return PTR_ERR(cs);
0089     }
0090
0091     /* Signal & wait for start */
0092     cs = emit_store(cs, offset + 4008, 1);
0093     cs = emit_wait(cs, offset + 4008, MI_SEMAPHORE_SAD_NEQ_SDD, 1);
0094
0095     cs = emit_srm(cs, RING_TIMESTAMP(engine->mmio_base), offset + 4000);
0096     cs = emit_srm(cs, RING_CTX_TIMESTAMP(engine->mmio_base), offset + 4004);
0097
0098     /* Busy wait */
0099     cs = emit_wait(cs, offset + 4008, MI_SEMAPHORE_SAD_EQ_SDD, 1);
0100
0101     cs = emit_srm(cs, RING_TIMESTAMP(engine->mmio_base), offset + 4016);
0102     cs = emit_srm(cs, RING_CTX_TIMESTAMP(engine->mmio_base), offset + 4012);
0103
0104     intel_ring_advance(rq, cs);
0105     i915_request_get(rq);
0106     i915_request_add(rq);
0107     intel_engine_flush_submission(engine);
0108
0109     /* Wait for the request to start executing, that then waits for us */
0110     while (READ_ONCE(sema[2]) == 0)
0111         cpu_relax();
0112
0113     /* Run the request for a 100us, sampling timestamps before/after */
0114     local_irq_disable();
0115     write_semaphore(&sema[2], 0);
0116     while (READ_ONCE(sema[1]) == 0) /* wait for the gpu to catch up */
0117         cpu_relax();
0118     *dt = local_clock();
0119     udelay(100);
0120     *dt = local_clock() - *dt;
0121     write_semaphore(&sema[2], 1);
0122     local_irq_enable();
0123
0124     if (i915_request_wait(rq, 0, HZ / 2) < 0) {
0125         i915_request_put(rq);
0126         return -ETIME;
0127     }
0128     i915_request_put(rq);
0129
0130     pr_debug("%s CTX_TIMESTAMP: [%x, %x], RING_TIMESTAMP: [%x, %x]\n",
0131          engine->name, sema[1], sema[3], sema[0], sema[4]);
0132
0133     *d_ctx = sema[3] - sema[1];
0134     *d_ring = sema[4] - sema[0];
0135     return 0;
0136 }
0137
0138 static int __live_engine_timestamps(struct intel_engine_cs *engine)
0139 {
0140     u64 s_ring[COUNT], s_ctx[COUNT], st[COUNT], d_ring, d_ctx, dt;
0141     struct intel_context *ce;
0142     int i, err = 0;
0143
0144     ce = intel_context_create(engine);
0145     if (IS_ERR(ce))
0146         return PTR_ERR(ce);
0147
0148     for (i = 0; i < COUNT; i++) {
0149         err = __measure_timestamps(ce, &st[i], &s_ring[i], &s_ctx[i]);
0150         if (err)
0151             break;
0152     }
0153     intel_context_put(ce);
0154     if (err)
0155         return err;
0156
0157     dt = trifilter(st);
0158     d_ring = trifilter(s_ring);
0159     d_ctx = trifilter(s_ctx);
0160
0161     pr_info("%s elapsed:%lldns, CTX_TIMESTAMP:%lldns, RING_TIMESTAMP:%lldns\n",
0162         engine->name, dt,
0163         intel_gt_clock_interval_to_ns(engine->gt, d_ctx),
0164         intel_gt_clock_interval_to_ns(engine->gt, d_ring));
0165
0166     d_ring = intel_gt_clock_interval_to_ns(engine->gt, d_ring);
0167     if (3 * dt > 4 * d_ring || 4 * dt < 3 * d_ring) {
0168         pr_err("%s Mismatch between ring timestamp and walltime!\n",
0169                engine->name);
0170         return -EINVAL;
0171     }
0172
0173     d_ring = trifilter(s_ring);
0174     d_ctx = trifilter(s_ctx);
0175
0176     d_ctx *= engine->gt->clock_frequency;
0177     if (GRAPHICS_VER(engine->i915) == 11)
0178         d_ring *= 12500000; /* Fixed 80ns for GEN11 ctx timestamp? */
0179     else
0180         d_ring *= engine->gt->clock_frequency;
0181
0182     if (3 * d_ctx > 4 * d_ring || 4 * d_ctx < 3 * d_ring) {
0183         pr_err("%s Mismatch between ring and context timestamps!\n",
0184                engine->name);
0185         return -EINVAL;
0186     }
0187
0188     return 0;
0189 }
0190
0191 static int live_engine_timestamps(void *arg)
0192 {
0193     struct intel_gt *gt = arg;
0194     struct intel_engine_cs *engine;
0195     enum intel_engine_id id;
0196
0197     /*
0198      * Check that CS_TIMESTAMP / CTX_TIMESTAMP are in sync, i.e. share
0199      * the same CS clock.
0200      */
0201
0202     if (GRAPHICS_VER(gt->i915) < 8)
0203         return 0;
0204
0205     for_each_engine(engine, gt, id) {
0206         int err;
0207
0208         st_engine_heartbeat_disable(engine);
0209         err = __live_engine_timestamps(engine);
0210         st_engine_heartbeat_enable(engine);
0211         if (err)
0212             return err;
0213     }
0214
0215     return 0;
0216 }
0217
0218 static int __spin_until_busier(struct intel_engine_cs *engine, ktime_t busyness)
0219 {
0220     ktime_t start, unused, dt;
0221
0222     if (!intel_engine_uses_guc(engine))
0223         return 0;
0224
0225     /*
0226      * In GuC mode of submission, the busyness stats may get updated after
0227      * the batch starts running. Poll for a change in busyness and timeout
0228      * after 500 us.
0229      */
0230     start = ktime_get();
0231     while (intel_engine_get_busy_time(engine, &unused) == busyness) {
0232         dt = ktime_get() - start;
0233         if (dt > 10000000) {
0234             pr_err("active wait timed out %lld\n", dt);
0235             ENGINE_TRACE(engine, "active wait time out %lld\n", dt);
0236             return -ETIME;
0237         }
0238     }
0239
0240     return 0;
0241 }
0242
0243 static int live_engine_busy_stats(void *arg)
0244 {
0245     struct intel_gt *gt = arg;
0246     struct intel_engine_cs *engine;
0247     enum intel_engine_id id;
0248     struct igt_spinner spin;
0249     int err = 0;
0250
0251     /*
0252      * Check that if an engine supports busy-stats, they tell the truth.
0253      */
0254
0255     if (igt_spinner_init(&spin, gt))
0256         return -ENOMEM;
0257
0258     GEM_BUG_ON(intel_gt_pm_is_awake(gt));
0259     for_each_engine(engine, gt, id) {
0260         struct i915_request *rq;
0261         ktime_t busyness, dummy;
0262         ktime_t de, dt;
0263         ktime_t t[2];
0264
0265         if (!intel_engine_supports_stats(engine))
0266             continue;
0267
0268         if (!intel_engine_can_store_dword(engine))
0269             continue;
0270
0271         if (intel_gt_pm_wait_for_idle(gt)) {
0272             err = -EBUSY;
0273             break;
0274         }
0275
0276         st_engine_heartbeat_disable(engine);
0277
0278         ENGINE_TRACE(engine, "measuring idle time\n");
0279         preempt_disable();
0280         de = intel_engine_get_busy_time(engine, &t[0]);
0281         udelay(100);
0282         de = ktime_sub(intel_engine_get_busy_time(engine, &t[1]), de);
0283         preempt_enable();
0284         dt = ktime_sub(t[1], t[0]);
0285         if (de < 0 || de > 10) {
0286             pr_err("%s: reported %lldns [%d%%] busyness while sleeping [for %lldns]\n",
0287                    engine->name,
0288                    de, (int)div64_u64(100 * de, dt), dt);
0289             GEM_TRACE_DUMP();
0290             err = -EINVAL;
0291             goto end;
0292         }
0293
0294         /* 100% busy */
0295         rq = igt_spinner_create_request(&spin,
0296                         engine->kernel_context,
0297                         MI_NOOP);
0298         if (IS_ERR(rq)) {
0299             err = PTR_ERR(rq);
0300             goto end;
0301         }
0302         i915_request_add(rq);
0303
0304         busyness = intel_engine_get_busy_time(engine, &dummy);
0305         if (!igt_wait_for_spinner(&spin, rq)) {
0306             intel_gt_set_wedged(engine->gt);
0307             err = -ETIME;
0308             goto end;
0309         }
0310
0311         err = __spin_until_busier(engine, busyness);
0312         if (err) {
0313             GEM_TRACE_DUMP();
0314             goto end;
0315         }
0316
0317         ENGINE_TRACE(engine, "measuring busy time\n");
0318         preempt_disable();
0319         de = intel_engine_get_busy_time(engine, &t[0]);
0320         mdelay(10);
0321         de = ktime_sub(intel_engine_get_busy_time(engine, &t[1]), de);
0322         preempt_enable();
0323         dt = ktime_sub(t[1], t[0]);
0324         if (100 * de < 95 * dt || 95 * de > 100 * dt) {
0325             pr_err("%s: reported %lldns [%d%%] busyness while spinning [for %lldns]\n",
0326                    engine->name,
0327                    de, (int)div64_u64(100 * de, dt), dt);
0328             GEM_TRACE_DUMP();
0329             err = -EINVAL;
0330             goto end;
0331         }
0332
0333 end:
0334         st_engine_heartbeat_enable(engine);
0335         igt_spinner_end(&spin);
0336         if (igt_flush_test(gt->i915))
0337             err = -EIO;
0338         if (err)
0339             break;
0340     }
0341
0342     igt_spinner_fini(&spin);
0343     if (igt_flush_test(gt->i915))
0344         err = -EIO;
0345     return err;
0346 }
0347
0348 static int live_engine_pm(void *arg)
0349 {
0350     struct intel_gt *gt = arg;
0351     struct intel_engine_cs *engine;
0352     enum intel_engine_id id;
0353
0354     /*
0355      * Check we can call intel_engine_pm_put from any context. No
0356      * failures are reported directly, but if we mess up lockdep should
0357      * tell us.
0358      */
0359     if (intel_gt_pm_wait_for_idle(gt)) {
0360         pr_err("Unable to flush GT pm before test\n");
0361         return -EBUSY;
0362     }
0363
0364     GEM_BUG_ON(intel_gt_pm_is_awake(gt));
0365     for_each_engine(engine, gt, id) {
0366         const typeof(*igt_atomic_phases) *p;
0367
0368         for (p = igt_atomic_phases; p->name; p++) {
0369             /*
0370              * Acquisition is always synchronous, except if we
0371              * know that the engine is already awake, in which
0372              * case we should use intel_engine_pm_get_if_awake()
0373              * to atomically grab the wakeref.
0374              *
0375              * In practice,
0376              *    intel_engine_pm_get();
0377              *    intel_engine_pm_put();
0378              * occurs in one thread, while simultaneously
0379              *    intel_engine_pm_get_if_awake();
0380              *    intel_engine_pm_put();
0381              * occurs from atomic context in another.
0382              */
0383             GEM_BUG_ON(intel_engine_pm_is_awake(engine));
0384             intel_engine_pm_get(engine);
0385
0386             p->critical_section_begin();
0387             if (!intel_engine_pm_get_if_awake(engine))
0388                 pr_err("intel_engine_pm_get_if_awake(%s) failed under %s\n",
0389                        engine->name, p->name);
0390             else
0391                 intel_engine_pm_put_async(engine);
0392             intel_engine_pm_put_async(engine);
0393             p->critical_section_end();
0394
0395             intel_engine_pm_flush(engine);
0396
0397             if (intel_engine_pm_is_awake(engine)) {
0398                 pr_err("%s is still awake after flushing pm\n",
0399                        engine->name);
0400                 return -EINVAL;
0401             }
0402
0403             /* gt wakeref is async (deferred to workqueue) */
0404             if (intel_gt_pm_wait_for_idle(gt)) {
0405                 pr_err("GT failed to idle\n");
0406                 return -EINVAL;
0407             }
0408         }
0409     }
0410
0411     return 0;
0412 }
0413
0414 int live_engine_pm_selftests(struct intel_gt *gt)
0415 {
0416     static const struct i915_subtest tests[] = {
0417         SUBTEST(live_engine_timestamps),
0418         SUBTEST(live_engine_busy_stats),
0419         SUBTEST(live_engine_pm),
0420     };
0421
0422     return intel_gt_live_subtests(tests, gt);
0423 }