i915/selftests/i915_perf.c

0001 /*
0002  * SPDX-License-Identifier: MIT
0003  *
0004  * Copyright © 2019 Intel Corporation
0005  */
0006
0007 #include <linux/kref.h>
0008
0009 #include "gem/i915_gem_pm.h"
0010 #include "gt/intel_gt.h"
0011
0012 #include "i915_selftest.h"
0013
0014 #include "igt_flush_test.h"
0015 #include "lib_sw_fence.h"
0016
0017 #define TEST_OA_CONFIG_UUID "12345678-1234-1234-1234-1234567890ab"
0018
0019 static int
0020 alloc_empty_config(struct i915_perf *perf)
0021 {
0022     struct i915_oa_config *oa_config;
0023
0024     oa_config = kzalloc(sizeof(*oa_config), GFP_KERNEL);
0025     if (!oa_config)
0026         return -ENOMEM;
0027
0028     oa_config->perf = perf;
0029     kref_init(&oa_config->ref);
0030
0031     strlcpy(oa_config->uuid, TEST_OA_CONFIG_UUID, sizeof(oa_config->uuid));
0032
0033     mutex_lock(&perf->metrics_lock);
0034
0035     oa_config->id = idr_alloc(&perf->metrics_idr, oa_config, 2, 0, GFP_KERNEL);
0036     if (oa_config->id < 0)  {
0037         mutex_unlock(&perf->metrics_lock);
0038         i915_oa_config_put(oa_config);
0039         return -ENOMEM;
0040     }
0041
0042     mutex_unlock(&perf->metrics_lock);
0043
0044     return 0;
0045 }
0046
0047 static void
0048 destroy_empty_config(struct i915_perf *perf)
0049 {
0050     struct i915_oa_config *oa_config = NULL, *tmp;
0051     int id;
0052
0053     mutex_lock(&perf->metrics_lock);
0054
0055     idr_for_each_entry(&perf->metrics_idr, tmp, id) {
0056         if (!strcmp(tmp->uuid, TEST_OA_CONFIG_UUID)) {
0057             oa_config = tmp;
0058             break;
0059         }
0060     }
0061
0062     if (oa_config)
0063         idr_remove(&perf->metrics_idr, oa_config->id);
0064
0065     mutex_unlock(&perf->metrics_lock);
0066
0067     if (oa_config)
0068         i915_oa_config_put(oa_config);
0069 }
0070
0071 static struct i915_oa_config *
0072 get_empty_config(struct i915_perf *perf)
0073 {
0074     struct i915_oa_config *oa_config = NULL, *tmp;
0075     int id;
0076
0077     mutex_lock(&perf->metrics_lock);
0078
0079     idr_for_each_entry(&perf->metrics_idr, tmp, id) {
0080         if (!strcmp(tmp->uuid, TEST_OA_CONFIG_UUID)) {
0081             oa_config = i915_oa_config_get(tmp);
0082             break;
0083         }
0084     }
0085
0086     mutex_unlock(&perf->metrics_lock);
0087
0088     return oa_config;
0089 }
0090
0091 static struct i915_perf_stream *
0092 test_stream(struct i915_perf *perf)
0093 {
0094     struct drm_i915_perf_open_param param = {};
0095     struct i915_oa_config *oa_config = get_empty_config(perf);
0096     struct perf_open_properties props = {
0097         .engine = intel_engine_lookup_user(perf->i915,
0098                            I915_ENGINE_CLASS_RENDER,
0099                            0),
0100         .sample_flags = SAMPLE_OA_REPORT,
0101         .oa_format = GRAPHICS_VER(perf->i915) == 12 ?
0102         I915_OA_FORMAT_A32u40_A4u32_B8_C8 : I915_OA_FORMAT_C4_B8,
0103     };
0104     struct i915_perf_stream *stream;
0105
0106     if (!oa_config)
0107         return NULL;
0108
0109     props.metrics_set = oa_config->id;
0110
0111     stream = kzalloc(sizeof(*stream), GFP_KERNEL);
0112     if (!stream) {
0113         i915_oa_config_put(oa_config);
0114         return NULL;
0115     }
0116
0117     stream->perf = perf;
0118
0119     mutex_lock(&perf->lock);
0120     if (i915_oa_stream_init(stream, &param, &props)) {
0121         kfree(stream);
0122         stream =  NULL;
0123     }
0124     mutex_unlock(&perf->lock);
0125
0126     i915_oa_config_put(oa_config);
0127
0128     return stream;
0129 }
0130
0131 static void stream_destroy(struct i915_perf_stream *stream)
0132 {
0133     struct i915_perf *perf = stream->perf;
0134
0135     mutex_lock(&perf->lock);
0136     i915_perf_destroy_locked(stream);
0137     mutex_unlock(&perf->lock);
0138 }
0139
0140 static int live_sanitycheck(void *arg)
0141 {
0142     struct drm_i915_private *i915 = arg;
0143     struct i915_perf_stream *stream;
0144
0145     /* Quick check we can create a perf stream */
0146
0147     stream = test_stream(&i915->perf);
0148     if (!stream)
0149         return -EINVAL;
0150
0151     stream_destroy(stream);
0152     return 0;
0153 }
0154
0155 static int write_timestamp(struct i915_request *rq, int slot)
0156 {
0157     u32 *cs;
0158     int len;
0159
0160     cs = intel_ring_begin(rq, 6);
0161     if (IS_ERR(cs))
0162         return PTR_ERR(cs);
0163
0164     len = 5;
0165     if (GRAPHICS_VER(rq->engine->i915) >= 8)
0166         len++;
0167
0168     *cs++ = GFX_OP_PIPE_CONTROL(len);
0169     *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB |
0170         PIPE_CONTROL_STORE_DATA_INDEX |
0171         PIPE_CONTROL_WRITE_TIMESTAMP;
0172     *cs++ = slot * sizeof(u32);
0173     *cs++ = 0;
0174     *cs++ = 0;
0175     *cs++ = 0;
0176
0177     intel_ring_advance(rq, cs);
0178
0179     return 0;
0180 }
0181
0182 static ktime_t poll_status(struct i915_request *rq, int slot)
0183 {
0184     while (!intel_read_status_page(rq->engine, slot) &&
0185            !i915_request_completed(rq))
0186         cpu_relax();
0187
0188     return ktime_get();
0189 }
0190
0191 static int live_noa_delay(void *arg)
0192 {
0193     struct drm_i915_private *i915 = arg;
0194     struct i915_perf_stream *stream;
0195     struct i915_request *rq;
0196     ktime_t t0, t1;
0197     u64 expected;
0198     u32 delay;
0199     int err;
0200     int i;
0201
0202     /* Check that the GPU delays matches expectations */
0203
0204     stream = test_stream(&i915->perf);
0205     if (!stream)
0206         return -ENOMEM;
0207
0208     expected = atomic64_read(&stream->perf->noa_programming_delay);
0209
0210     if (stream->engine->class != RENDER_CLASS) {
0211         err = -ENODEV;
0212         goto out;
0213     }
0214
0215     for (i = 0; i < 4; i++)
0216         intel_write_status_page(stream->engine, 0x100 + i, 0);
0217
0218     rq = intel_engine_create_kernel_request(stream->engine);
0219     if (IS_ERR(rq)) {
0220         err = PTR_ERR(rq);
0221         goto out;
0222     }
0223
0224     if (rq->engine->emit_init_breadcrumb) {
0225         err = rq->engine->emit_init_breadcrumb(rq);
0226         if (err) {
0227             i915_request_add(rq);
0228             goto out;
0229         }
0230     }
0231
0232     err = write_timestamp(rq, 0x100);
0233     if (err) {
0234         i915_request_add(rq);
0235         goto out;
0236     }
0237
0238     err = rq->engine->emit_bb_start(rq,
0239                     i915_ggtt_offset(stream->noa_wait), 0,
0240                     I915_DISPATCH_SECURE);
0241     if (err) {
0242         i915_request_add(rq);
0243         goto out;
0244     }
0245
0246     err = write_timestamp(rq, 0x102);
0247     if (err) {
0248         i915_request_add(rq);
0249         goto out;
0250     }
0251
0252     i915_request_get(rq);
0253     i915_request_add(rq);
0254
0255     preempt_disable();
0256     t0 = poll_status(rq, 0x100);
0257     t1 = poll_status(rq, 0x102);
0258     preempt_enable();
0259
0260     pr_info("CPU delay: %lluns, expected %lluns\n",
0261         ktime_sub(t1, t0), expected);
0262
0263     delay = intel_read_status_page(stream->engine, 0x102);
0264     delay -= intel_read_status_page(stream->engine, 0x100);
0265     delay = intel_gt_clock_interval_to_ns(stream->engine->gt, delay);
0266     pr_info("GPU delay: %uns, expected %lluns\n",
0267         delay, expected);
0268
0269     if (4 * delay < 3 * expected || 2 * delay > 3 * expected) {
0270         pr_err("GPU delay [%uus] outside of expected threshold! [%lluus, %lluus]\n",
0271                delay / 1000,
0272                div_u64(3 * expected, 4000),
0273                div_u64(3 * expected, 2000));
0274         err = -EINVAL;
0275     }
0276
0277     i915_request_put(rq);
0278 out:
0279     stream_destroy(stream);
0280     return err;
0281 }
0282
0283 static int live_noa_gpr(void *arg)
0284 {
0285     struct drm_i915_private *i915 = arg;
0286     struct i915_perf_stream *stream;
0287     struct intel_context *ce;
0288     struct i915_request *rq;
0289     u32 *cs, *store;
0290     void *scratch;
0291     u32 gpr0;
0292     int err;
0293     int i;
0294
0295     /* Check that the delay does not clobber user context state (GPR) */
0296
0297     stream = test_stream(&i915->perf);
0298     if (!stream)
0299         return -ENOMEM;
0300
0301     gpr0 = i915_mmio_reg_offset(GEN8_RING_CS_GPR(stream->engine->mmio_base, 0));
0302
0303     ce = intel_context_create(stream->engine);
0304     if (IS_ERR(ce)) {
0305         err = PTR_ERR(ce);
0306         goto out;
0307     }
0308
0309     /* Poison the ce->vm so we detect writes not to the GGTT gt->scratch */
0310     scratch = __px_vaddr(ce->vm->scratch[0]);
0311     memset(scratch, POISON_FREE, PAGE_SIZE);
0312
0313     rq = intel_context_create_request(ce);
0314     if (IS_ERR(rq)) {
0315         err = PTR_ERR(rq);
0316         goto out_ce;
0317     }
0318     i915_request_get(rq);
0319
0320     if (rq->engine->emit_init_breadcrumb) {
0321         err = rq->engine->emit_init_breadcrumb(rq);
0322         if (err) {
0323             i915_request_add(rq);
0324             goto out_rq;
0325         }
0326     }
0327
0328     /* Fill the 16 qword [32 dword] GPR with a known unlikely value */
0329     cs = intel_ring_begin(rq, 2 * 32 + 2);
0330     if (IS_ERR(cs)) {
0331         err = PTR_ERR(cs);
0332         i915_request_add(rq);
0333         goto out_rq;
0334     }
0335
0336     *cs++ = MI_LOAD_REGISTER_IMM(32);
0337     for (i = 0; i < 32; i++) {
0338         *cs++ = gpr0 + i * sizeof(u32);
0339         *cs++ = STACK_MAGIC;
0340     }
0341     *cs++ = MI_NOOP;
0342     intel_ring_advance(rq, cs);
0343
0344     /* Execute the GPU delay */
0345     err = rq->engine->emit_bb_start(rq,
0346                     i915_ggtt_offset(stream->noa_wait), 0,
0347                     I915_DISPATCH_SECURE);
0348     if (err) {
0349         i915_request_add(rq);
0350         goto out_rq;
0351     }
0352
0353     /* Read the GPR back, using the pinned global HWSP for convenience */
0354     store = memset32(rq->engine->status_page.addr + 512, 0, 32);
0355     for (i = 0; i < 32; i++) {
0356         u32 cmd;
0357
0358         cs = intel_ring_begin(rq, 4);
0359         if (IS_ERR(cs)) {
0360             err = PTR_ERR(cs);
0361             i915_request_add(rq);
0362             goto out_rq;
0363         }
0364
0365         cmd = MI_STORE_REGISTER_MEM;
0366         if (GRAPHICS_VER(i915) >= 8)
0367             cmd++;
0368         cmd |= MI_USE_GGTT;
0369
0370         *cs++ = cmd;
0371         *cs++ = gpr0 + i * sizeof(u32);
0372         *cs++ = i915_ggtt_offset(rq->engine->status_page.vma) +
0373             offset_in_page(store) +
0374             i * sizeof(u32);
0375         *cs++ = 0;
0376         intel_ring_advance(rq, cs);
0377     }
0378
0379     i915_request_add(rq);
0380
0381     if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE, HZ / 2) < 0) {
0382         pr_err("noa_wait timed out\n");
0383         intel_gt_set_wedged(stream->engine->gt);
0384         err = -EIO;
0385         goto out_rq;
0386     }
0387
0388     /* Verify that the GPR contain our expected values */
0389     for (i = 0; i < 32; i++) {
0390         if (store[i] == STACK_MAGIC)
0391             continue;
0392
0393         pr_err("GPR[%d] lost, found:%08x, expected:%08x!\n",
0394                i, store[i], STACK_MAGIC);
0395         err = -EINVAL;
0396     }
0397
0398     /* Verify that the user's scratch page was not used for GPR storage */
0399     if (memchr_inv(scratch, POISON_FREE, PAGE_SIZE)) {
0400         pr_err("Scratch page overwritten!\n");
0401         igt_hexdump(scratch, 4096);
0402         err = -EINVAL;
0403     }
0404
0405 out_rq:
0406     i915_request_put(rq);
0407 out_ce:
0408     intel_context_put(ce);
0409 out:
0410     stream_destroy(stream);
0411     return err;
0412 }
0413
0414 int i915_perf_live_selftests(struct drm_i915_private *i915)
0415 {
0416     static const struct i915_subtest tests[] = {
0417         SUBTEST(live_sanitycheck),
0418         SUBTEST(live_noa_delay),
0419         SUBTEST(live_noa_gpr),
0420     };
0421     struct i915_perf *perf = &i915->perf;
0422     int err;
0423
0424     if (!perf->metrics_kobj || !perf->ops.enable_metric_set)
0425         return 0;
0426
0427     if (intel_gt_is_wedged(to_gt(i915)))
0428         return 0;
0429
0430     err = alloc_empty_config(&i915->perf);
0431     if (err)
0432         return err;
0433
0434     err = i915_subtests(tests, i915);
0435
0436     destroy_empty_config(&i915->perf);
0437
0438     return err;
0439 }