0001
0002
0003
0004
0005
0006 #include <linux/prime_numbers.h>
0007 #include <linux/string_helpers.h>
0008
0009 #include "intel_context.h"
0010 #include "intel_engine_heartbeat.h"
0011 #include "intel_engine_pm.h"
0012 #include "intel_engine_regs.h"
0013 #include "intel_gpu_commands.h"
0014 #include "intel_gt.h"
0015 #include "intel_gt_requests.h"
0016 #include "intel_ring.h"
0017 #include "selftest_engine_heartbeat.h"
0018
0019 #include "../selftests/i915_random.h"
0020 #include "../i915_selftest.h"
0021
0022 #include "selftests/igt_flush_test.h"
0023 #include "selftests/lib_sw_fence.h"
0024 #include "selftests/mock_gem_device.h"
0025 #include "selftests/mock_timeline.h"
0026
0027 static struct page *hwsp_page(struct intel_timeline *tl)
0028 {
0029 struct drm_i915_gem_object *obj = tl->hwsp_ggtt->obj;
0030
0031 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
0032 return sg_page(obj->mm.pages->sgl);
0033 }
0034
0035 static unsigned long hwsp_cacheline(struct intel_timeline *tl)
0036 {
0037 unsigned long address = (unsigned long)page_address(hwsp_page(tl));
0038
0039 return (address + offset_in_page(tl->hwsp_offset)) / TIMELINE_SEQNO_BYTES;
0040 }
0041
0042 static int selftest_tl_pin(struct intel_timeline *tl)
0043 {
0044 struct i915_gem_ww_ctx ww;
0045 int err;
0046
0047 i915_gem_ww_ctx_init(&ww, false);
0048 retry:
0049 err = i915_gem_object_lock(tl->hwsp_ggtt->obj, &ww);
0050 if (!err)
0051 err = intel_timeline_pin(tl, &ww);
0052
0053 if (err == -EDEADLK) {
0054 err = i915_gem_ww_ctx_backoff(&ww);
0055 if (!err)
0056 goto retry;
0057 }
0058 i915_gem_ww_ctx_fini(&ww);
0059 return err;
0060 }
0061
0062
0063 #define CACHELINES_PER_PAGE (PAGE_SIZE / TIMELINE_SEQNO_BYTES / 2)
0064
0065 struct mock_hwsp_freelist {
0066 struct intel_gt *gt;
0067 struct radix_tree_root cachelines;
0068 struct intel_timeline **history;
0069 unsigned long count, max;
0070 struct rnd_state prng;
0071 };
0072
0073 enum {
0074 SHUFFLE = BIT(0),
0075 };
0076
0077 static void __mock_hwsp_record(struct mock_hwsp_freelist *state,
0078 unsigned int idx,
0079 struct intel_timeline *tl)
0080 {
0081 tl = xchg(&state->history[idx], tl);
0082 if (tl) {
0083 radix_tree_delete(&state->cachelines, hwsp_cacheline(tl));
0084 intel_timeline_unpin(tl);
0085 intel_timeline_put(tl);
0086 }
0087 }
0088
0089 static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state,
0090 unsigned int count,
0091 unsigned int flags)
0092 {
0093 struct intel_timeline *tl;
0094 unsigned int idx;
0095
0096 while (count--) {
0097 unsigned long cacheline;
0098 int err;
0099
0100 tl = intel_timeline_create(state->gt);
0101 if (IS_ERR(tl))
0102 return PTR_ERR(tl);
0103
0104 err = selftest_tl_pin(tl);
0105 if (err) {
0106 intel_timeline_put(tl);
0107 return err;
0108 }
0109
0110 cacheline = hwsp_cacheline(tl);
0111 err = radix_tree_insert(&state->cachelines, cacheline, tl);
0112 if (err) {
0113 if (err == -EEXIST) {
0114 pr_err("HWSP cacheline %lu already used; duplicate allocation!\n",
0115 cacheline);
0116 }
0117 intel_timeline_unpin(tl);
0118 intel_timeline_put(tl);
0119 return err;
0120 }
0121
0122 idx = state->count++ % state->max;
0123 __mock_hwsp_record(state, idx, tl);
0124 }
0125
0126 if (flags & SHUFFLE)
0127 i915_prandom_shuffle(state->history,
0128 sizeof(*state->history),
0129 min(state->count, state->max),
0130 &state->prng);
0131
0132 count = i915_prandom_u32_max_state(min(state->count, state->max),
0133 &state->prng);
0134 while (count--) {
0135 idx = --state->count % state->max;
0136 __mock_hwsp_record(state, idx, NULL);
0137 }
0138
0139 return 0;
0140 }
0141
0142 static int mock_hwsp_freelist(void *arg)
0143 {
0144 struct mock_hwsp_freelist state;
0145 struct drm_i915_private *i915;
0146 const struct {
0147 const char *name;
0148 unsigned int flags;
0149 } phases[] = {
0150 { "linear", 0 },
0151 { "shuffled", SHUFFLE },
0152 { },
0153 }, *p;
0154 unsigned int na;
0155 int err = 0;
0156
0157 i915 = mock_gem_device();
0158 if (!i915)
0159 return -ENOMEM;
0160
0161 INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL);
0162 state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed);
0163
0164 state.gt = to_gt(i915);
0165
0166
0167
0168
0169
0170
0171 state.max = PAGE_SIZE / sizeof(*state.history);
0172 state.count = 0;
0173 state.history = kcalloc(state.max, sizeof(*state.history), GFP_KERNEL);
0174 if (!state.history) {
0175 err = -ENOMEM;
0176 goto err_put;
0177 }
0178
0179 for (p = phases; p->name; p++) {
0180 pr_debug("%s(%s)\n", __func__, p->name);
0181 for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) {
0182 err = __mock_hwsp_timeline(&state, na, p->flags);
0183 if (err)
0184 goto out;
0185 }
0186 }
0187
0188 out:
0189 for (na = 0; na < state.max; na++)
0190 __mock_hwsp_record(&state, na, NULL);
0191 kfree(state.history);
0192 err_put:
0193 mock_destroy_device(i915);
0194 return err;
0195 }
0196
0197 struct __igt_sync {
0198 const char *name;
0199 u32 seqno;
0200 bool expected;
0201 bool set;
0202 };
0203
0204 static int __igt_sync(struct intel_timeline *tl,
0205 u64 ctx,
0206 const struct __igt_sync *p,
0207 const char *name)
0208 {
0209 int ret;
0210
0211 if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) {
0212 pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n",
0213 name, p->name, ctx, p->seqno, str_yes_no(p->expected));
0214 return -EINVAL;
0215 }
0216
0217 if (p->set) {
0218 ret = __intel_timeline_sync_set(tl, ctx, p->seqno);
0219 if (ret)
0220 return ret;
0221 }
0222
0223 return 0;
0224 }
0225
0226 static int igt_sync(void *arg)
0227 {
0228 const struct __igt_sync pass[] = {
0229 { "unset", 0, false, false },
0230 { "new", 0, false, true },
0231 { "0a", 0, true, true },
0232 { "1a", 1, false, true },
0233 { "1b", 1, true, true },
0234 { "0b", 0, true, false },
0235 { "2a", 2, false, true },
0236 { "4", 4, false, true },
0237 { "INT_MAX", INT_MAX, false, true },
0238 { "INT_MAX-1", INT_MAX-1, true, false },
0239 { "INT_MAX+1", (u32)INT_MAX+1, false, true },
0240 { "INT_MAX", INT_MAX, true, false },
0241 { "UINT_MAX", UINT_MAX, false, true },
0242 { "wrap", 0, false, true },
0243 { "unwrap", UINT_MAX, true, false },
0244 {},
0245 }, *p;
0246 struct intel_timeline tl;
0247 int order, offset;
0248 int ret = -ENODEV;
0249
0250 mock_timeline_init(&tl, 0);
0251 for (p = pass; p->name; p++) {
0252 for (order = 1; order < 64; order++) {
0253 for (offset = -1; offset <= (order > 1); offset++) {
0254 u64 ctx = BIT_ULL(order) + offset;
0255
0256 ret = __igt_sync(&tl, ctx, p, "1");
0257 if (ret)
0258 goto out;
0259 }
0260 }
0261 }
0262 mock_timeline_fini(&tl);
0263
0264 mock_timeline_init(&tl, 0);
0265 for (order = 1; order < 64; order++) {
0266 for (offset = -1; offset <= (order > 1); offset++) {
0267 u64 ctx = BIT_ULL(order) + offset;
0268
0269 for (p = pass; p->name; p++) {
0270 ret = __igt_sync(&tl, ctx, p, "2");
0271 if (ret)
0272 goto out;
0273 }
0274 }
0275 }
0276
0277 out:
0278 mock_timeline_fini(&tl);
0279 return ret;
0280 }
0281
0282 static unsigned int random_engine(struct rnd_state *rnd)
0283 {
0284 return i915_prandom_u32_max_state(I915_NUM_ENGINES, rnd);
0285 }
0286
0287 static int bench_sync(void *arg)
0288 {
0289 struct rnd_state prng;
0290 struct intel_timeline tl;
0291 unsigned long end_time, count;
0292 u64 prng32_1M;
0293 ktime_t kt;
0294 int order, last_order;
0295
0296 mock_timeline_init(&tl, 0);
0297
0298
0299
0300
0301
0302
0303 prandom_seed_state(&prng, i915_selftest.random_seed);
0304 count = 0;
0305 kt = ktime_get();
0306 end_time = jiffies + HZ/10;
0307 do {
0308 u32 x;
0309
0310
0311 WRITE_ONCE(x, prandom_u32_state(&prng));
0312
0313 count++;
0314 } while (!time_after(jiffies, end_time));
0315 kt = ktime_sub(ktime_get(), kt);
0316 pr_debug("%s: %lu random evaluations, %lluns/prng\n",
0317 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
0318 prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count);
0319
0320
0321 prandom_seed_state(&prng, i915_selftest.random_seed);
0322 count = 0;
0323 kt = ktime_get();
0324 end_time = jiffies + HZ/10;
0325 do {
0326 u64 id = i915_prandom_u64_state(&prng);
0327
0328 __intel_timeline_sync_set(&tl, id, 0);
0329 count++;
0330 } while (!time_after(jiffies, end_time));
0331 kt = ktime_sub(ktime_get(), kt);
0332 kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
0333 pr_info("%s: %lu random insertions, %lluns/insert\n",
0334 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
0335
0336
0337 prandom_seed_state(&prng, i915_selftest.random_seed);
0338 end_time = count;
0339 kt = ktime_get();
0340 while (end_time--) {
0341 u64 id = i915_prandom_u64_state(&prng);
0342
0343 if (!__intel_timeline_sync_is_later(&tl, id, 0)) {
0344 mock_timeline_fini(&tl);
0345 pr_err("Lookup of %llu failed\n", id);
0346 return -EINVAL;
0347 }
0348 }
0349 kt = ktime_sub(ktime_get(), kt);
0350 kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
0351 pr_info("%s: %lu random lookups, %lluns/lookup\n",
0352 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
0353
0354 mock_timeline_fini(&tl);
0355 cond_resched();
0356
0357 mock_timeline_init(&tl, 0);
0358
0359
0360 count = 0;
0361 kt = ktime_get();
0362 end_time = jiffies + HZ/10;
0363 do {
0364 __intel_timeline_sync_set(&tl, count++, 0);
0365 } while (!time_after(jiffies, end_time));
0366 kt = ktime_sub(ktime_get(), kt);
0367 pr_info("%s: %lu in-order insertions, %lluns/insert\n",
0368 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
0369
0370
0371 end_time = count;
0372 kt = ktime_get();
0373 while (end_time--) {
0374 if (!__intel_timeline_sync_is_later(&tl, end_time, 0)) {
0375 pr_err("Lookup of %lu failed\n", end_time);
0376 mock_timeline_fini(&tl);
0377 return -EINVAL;
0378 }
0379 }
0380 kt = ktime_sub(ktime_get(), kt);
0381 pr_info("%s: %lu in-order lookups, %lluns/lookup\n",
0382 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
0383
0384 mock_timeline_fini(&tl);
0385 cond_resched();
0386
0387 mock_timeline_init(&tl, 0);
0388
0389
0390 prandom_seed_state(&prng, i915_selftest.random_seed);
0391 count = 0;
0392 kt = ktime_get();
0393 end_time = jiffies + HZ/10;
0394 do {
0395 u32 id = random_engine(&prng);
0396 u32 seqno = prandom_u32_state(&prng);
0397
0398 if (!__intel_timeline_sync_is_later(&tl, id, seqno))
0399 __intel_timeline_sync_set(&tl, id, seqno);
0400
0401 count++;
0402 } while (!time_after(jiffies, end_time));
0403 kt = ktime_sub(ktime_get(), kt);
0404 kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
0405 pr_info("%s: %lu repeated insert/lookups, %lluns/op\n",
0406 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
0407 mock_timeline_fini(&tl);
0408 cond_resched();
0409
0410
0411 for (last_order = 1, order = 1; order < 32;
0412 ({ int tmp = last_order; last_order = order; order += tmp; })) {
0413 unsigned int mask = BIT(order) - 1;
0414
0415 mock_timeline_init(&tl, 0);
0416
0417 count = 0;
0418 kt = ktime_get();
0419 end_time = jiffies + HZ/10;
0420 do {
0421
0422
0423
0424
0425 u64 id = (u64)(count & mask) << order;
0426
0427 __intel_timeline_sync_is_later(&tl, id, 0);
0428 __intel_timeline_sync_set(&tl, id, 0);
0429
0430 count++;
0431 } while (!time_after(jiffies, end_time));
0432 kt = ktime_sub(ktime_get(), kt);
0433 pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n",
0434 __func__, count, order,
0435 (long long)div64_ul(ktime_to_ns(kt), count));
0436 mock_timeline_fini(&tl);
0437 cond_resched();
0438 }
0439
0440 return 0;
0441 }
0442
0443 int intel_timeline_mock_selftests(void)
0444 {
0445 static const struct i915_subtest tests[] = {
0446 SUBTEST(mock_hwsp_freelist),
0447 SUBTEST(igt_sync),
0448 SUBTEST(bench_sync),
0449 };
0450
0451 return i915_subtests(tests, NULL);
0452 }
0453
0454 static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value)
0455 {
0456 u32 *cs;
0457
0458 cs = intel_ring_begin(rq, 4);
0459 if (IS_ERR(cs))
0460 return PTR_ERR(cs);
0461
0462 if (GRAPHICS_VER(rq->engine->i915) >= 8) {
0463 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
0464 *cs++ = addr;
0465 *cs++ = 0;
0466 *cs++ = value;
0467 } else if (GRAPHICS_VER(rq->engine->i915) >= 4) {
0468 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
0469 *cs++ = 0;
0470 *cs++ = addr;
0471 *cs++ = value;
0472 } else {
0473 *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
0474 *cs++ = addr;
0475 *cs++ = value;
0476 *cs++ = MI_NOOP;
0477 }
0478
0479 intel_ring_advance(rq, cs);
0480
0481 return 0;
0482 }
0483
0484 static struct i915_request *
0485 checked_tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value)
0486 {
0487 struct i915_request *rq;
0488 int err;
0489
0490 err = selftest_tl_pin(tl);
0491 if (err) {
0492 rq = ERR_PTR(err);
0493 goto out;
0494 }
0495
0496 if (READ_ONCE(*tl->hwsp_seqno) != tl->seqno) {
0497 pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n",
0498 *tl->hwsp_seqno, tl->seqno);
0499 intel_timeline_unpin(tl);
0500 return ERR_PTR(-EINVAL);
0501 }
0502
0503 rq = intel_engine_create_kernel_request(engine);
0504 if (IS_ERR(rq))
0505 goto out_unpin;
0506
0507 i915_request_get(rq);
0508
0509 err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value);
0510 i915_request_add(rq);
0511 if (err) {
0512 i915_request_put(rq);
0513 rq = ERR_PTR(err);
0514 }
0515
0516 out_unpin:
0517 intel_timeline_unpin(tl);
0518 out:
0519 if (IS_ERR(rq))
0520 pr_err("Failed to write to timeline!\n");
0521 return rq;
0522 }
0523
0524 static int live_hwsp_engine(void *arg)
0525 {
0526 #define NUM_TIMELINES 4096
0527 struct intel_gt *gt = arg;
0528 struct intel_timeline **timelines;
0529 struct intel_engine_cs *engine;
0530 enum intel_engine_id id;
0531 unsigned long count, n;
0532 int err = 0;
0533
0534
0535
0536
0537
0538
0539 timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
0540 sizeof(*timelines),
0541 GFP_KERNEL);
0542 if (!timelines)
0543 return -ENOMEM;
0544
0545 count = 0;
0546 for_each_engine(engine, gt, id) {
0547 if (!intel_engine_can_store_dword(engine))
0548 continue;
0549
0550 intel_engine_pm_get(engine);
0551
0552 for (n = 0; n < NUM_TIMELINES; n++) {
0553 struct intel_timeline *tl;
0554 struct i915_request *rq;
0555
0556 tl = intel_timeline_create(gt);
0557 if (IS_ERR(tl)) {
0558 err = PTR_ERR(tl);
0559 break;
0560 }
0561
0562 rq = checked_tl_write(tl, engine, count);
0563 if (IS_ERR(rq)) {
0564 intel_timeline_put(tl);
0565 err = PTR_ERR(rq);
0566 break;
0567 }
0568
0569 timelines[count++] = tl;
0570 i915_request_put(rq);
0571 }
0572
0573 intel_engine_pm_put(engine);
0574 if (err)
0575 break;
0576 }
0577
0578 if (igt_flush_test(gt->i915))
0579 err = -EIO;
0580
0581 for (n = 0; n < count; n++) {
0582 struct intel_timeline *tl = timelines[n];
0583
0584 if (!err && READ_ONCE(*tl->hwsp_seqno) != n) {
0585 GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x, found 0x%x\n",
0586 n, tl->fence_context, tl->hwsp_offset, *tl->hwsp_seqno);
0587 GEM_TRACE_DUMP();
0588 err = -EINVAL;
0589 }
0590 intel_timeline_put(tl);
0591 }
0592
0593 kvfree(timelines);
0594 return err;
0595 #undef NUM_TIMELINES
0596 }
0597
0598 static int live_hwsp_alternate(void *arg)
0599 {
0600 #define NUM_TIMELINES 4096
0601 struct intel_gt *gt = arg;
0602 struct intel_timeline **timelines;
0603 struct intel_engine_cs *engine;
0604 enum intel_engine_id id;
0605 unsigned long count, n;
0606 int err = 0;
0607
0608
0609
0610
0611
0612
0613
0614 timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
0615 sizeof(*timelines),
0616 GFP_KERNEL);
0617 if (!timelines)
0618 return -ENOMEM;
0619
0620 count = 0;
0621 for (n = 0; n < NUM_TIMELINES; n++) {
0622 for_each_engine(engine, gt, id) {
0623 struct intel_timeline *tl;
0624 struct i915_request *rq;
0625
0626 if (!intel_engine_can_store_dword(engine))
0627 continue;
0628
0629 tl = intel_timeline_create(gt);
0630 if (IS_ERR(tl)) {
0631 err = PTR_ERR(tl);
0632 goto out;
0633 }
0634
0635 intel_engine_pm_get(engine);
0636 rq = checked_tl_write(tl, engine, count);
0637 intel_engine_pm_put(engine);
0638 if (IS_ERR(rq)) {
0639 intel_timeline_put(tl);
0640 err = PTR_ERR(rq);
0641 goto out;
0642 }
0643
0644 timelines[count++] = tl;
0645 i915_request_put(rq);
0646 }
0647 }
0648
0649 out:
0650 if (igt_flush_test(gt->i915))
0651 err = -EIO;
0652
0653 for (n = 0; n < count; n++) {
0654 struct intel_timeline *tl = timelines[n];
0655
0656 if (!err && READ_ONCE(*tl->hwsp_seqno) != n) {
0657 GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x, found 0x%x\n",
0658 n, tl->fence_context, tl->hwsp_offset, *tl->hwsp_seqno);
0659 GEM_TRACE_DUMP();
0660 err = -EINVAL;
0661 }
0662 intel_timeline_put(tl);
0663 }
0664
0665 kvfree(timelines);
0666 return err;
0667 #undef NUM_TIMELINES
0668 }
0669
0670 static int live_hwsp_wrap(void *arg)
0671 {
0672 struct intel_gt *gt = arg;
0673 struct intel_engine_cs *engine;
0674 struct intel_timeline *tl;
0675 enum intel_engine_id id;
0676 int err = 0;
0677
0678
0679
0680
0681
0682
0683 tl = intel_timeline_create(gt);
0684 if (IS_ERR(tl))
0685 return PTR_ERR(tl);
0686
0687 if (!tl->has_initial_breadcrumb)
0688 goto out_free;
0689
0690 err = selftest_tl_pin(tl);
0691 if (err)
0692 goto out_free;
0693
0694 for_each_engine(engine, gt, id) {
0695 const u32 *hwsp_seqno[2];
0696 struct i915_request *rq;
0697 u32 seqno[2];
0698
0699 if (!intel_engine_can_store_dword(engine))
0700 continue;
0701
0702 rq = intel_engine_create_kernel_request(engine);
0703 if (IS_ERR(rq)) {
0704 err = PTR_ERR(rq);
0705 goto out;
0706 }
0707
0708 tl->seqno = -4u;
0709
0710 mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
0711 err = intel_timeline_get_seqno(tl, rq, &seqno[0]);
0712 mutex_unlock(&tl->mutex);
0713 if (err) {
0714 i915_request_add(rq);
0715 goto out;
0716 }
0717 pr_debug("seqno[0]:%08x, hwsp_offset:%08x\n",
0718 seqno[0], tl->hwsp_offset);
0719
0720 err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[0]);
0721 if (err) {
0722 i915_request_add(rq);
0723 goto out;
0724 }
0725 hwsp_seqno[0] = tl->hwsp_seqno;
0726
0727 mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
0728 err = intel_timeline_get_seqno(tl, rq, &seqno[1]);
0729 mutex_unlock(&tl->mutex);
0730 if (err) {
0731 i915_request_add(rq);
0732 goto out;
0733 }
0734 pr_debug("seqno[1]:%08x, hwsp_offset:%08x\n",
0735 seqno[1], tl->hwsp_offset);
0736
0737 err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[1]);
0738 if (err) {
0739 i915_request_add(rq);
0740 goto out;
0741 }
0742 hwsp_seqno[1] = tl->hwsp_seqno;
0743
0744
0745 GEM_BUG_ON(seqno[1] >= seqno[0]);
0746 GEM_BUG_ON(hwsp_seqno[0] == hwsp_seqno[1]);
0747
0748 i915_request_add(rq);
0749
0750 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
0751 pr_err("Wait for timeline writes timed out!\n");
0752 err = -EIO;
0753 goto out;
0754 }
0755
0756 if (READ_ONCE(*hwsp_seqno[0]) != seqno[0] ||
0757 READ_ONCE(*hwsp_seqno[1]) != seqno[1]) {
0758 pr_err("Bad timeline values: found (%x, %x), expected (%x, %x)\n",
0759 *hwsp_seqno[0], *hwsp_seqno[1],
0760 seqno[0], seqno[1]);
0761 err = -EINVAL;
0762 goto out;
0763 }
0764
0765 intel_gt_retire_requests(gt);
0766 }
0767
0768 out:
0769 if (igt_flush_test(gt->i915))
0770 err = -EIO;
0771
0772 intel_timeline_unpin(tl);
0773 out_free:
0774 intel_timeline_put(tl);
0775 return err;
0776 }
0777
0778 static int emit_read_hwsp(struct i915_request *rq,
0779 u32 seqno, u32 hwsp,
0780 u32 *addr)
0781 {
0782 const u32 gpr = i915_mmio_reg_offset(GEN8_RING_CS_GPR(rq->engine->mmio_base, 0));
0783 u32 *cs;
0784
0785 cs = intel_ring_begin(rq, 12);
0786 if (IS_ERR(cs))
0787 return PTR_ERR(cs);
0788
0789 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
0790 *cs++ = *addr;
0791 *cs++ = 0;
0792 *cs++ = seqno;
0793 *addr += 4;
0794
0795 *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_USE_GGTT;
0796 *cs++ = gpr;
0797 *cs++ = hwsp;
0798 *cs++ = 0;
0799
0800 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
0801 *cs++ = gpr;
0802 *cs++ = *addr;
0803 *cs++ = 0;
0804 *addr += 4;
0805
0806 intel_ring_advance(rq, cs);
0807
0808 return 0;
0809 }
0810
0811 struct hwsp_watcher {
0812 struct i915_vma *vma;
0813 struct i915_request *rq;
0814 u32 addr;
0815 u32 *map;
0816 };
0817
0818 static bool cmp_lt(u32 a, u32 b)
0819 {
0820 return a < b;
0821 }
0822
0823 static bool cmp_gte(u32 a, u32 b)
0824 {
0825 return a >= b;
0826 }
0827
0828 static int setup_watcher(struct hwsp_watcher *w, struct intel_gt *gt)
0829 {
0830 struct drm_i915_gem_object *obj;
0831 struct i915_vma *vma;
0832
0833 obj = i915_gem_object_create_internal(gt->i915, SZ_2M);
0834 if (IS_ERR(obj))
0835 return PTR_ERR(obj);
0836
0837 w->map = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
0838 if (IS_ERR(w->map)) {
0839 i915_gem_object_put(obj);
0840 return PTR_ERR(w->map);
0841 }
0842
0843 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0);
0844 if (IS_ERR(vma)) {
0845 i915_gem_object_put(obj);
0846 return PTR_ERR(vma);
0847 }
0848
0849 w->vma = vma;
0850 w->addr = i915_ggtt_offset(vma);
0851 return 0;
0852 }
0853
0854 static void switch_tl_lock(struct i915_request *from, struct i915_request *to)
0855 {
0856
0857
0858 if (from) {
0859 lockdep_unpin_lock(&from->context->timeline->mutex, from->cookie);
0860 mutex_unlock(&from->context->timeline->mutex);
0861 }
0862
0863 if (to) {
0864 mutex_lock(&to->context->timeline->mutex);
0865 to->cookie = lockdep_pin_lock(&to->context->timeline->mutex);
0866 }
0867 }
0868
0869 static int create_watcher(struct hwsp_watcher *w,
0870 struct intel_engine_cs *engine,
0871 int ringsz)
0872 {
0873 struct intel_context *ce;
0874
0875 ce = intel_context_create(engine);
0876 if (IS_ERR(ce))
0877 return PTR_ERR(ce);
0878
0879 ce->ring_size = ringsz;
0880 w->rq = intel_context_create_request(ce);
0881 intel_context_put(ce);
0882 if (IS_ERR(w->rq))
0883 return PTR_ERR(w->rq);
0884
0885 w->addr = i915_ggtt_offset(w->vma);
0886
0887 switch_tl_lock(w->rq, NULL);
0888
0889 return 0;
0890 }
0891
0892 static int check_watcher(struct hwsp_watcher *w, const char *name,
0893 bool (*op)(u32 hwsp, u32 seqno))
0894 {
0895 struct i915_request *rq = fetch_and_zero(&w->rq);
0896 u32 offset, end;
0897 int err;
0898
0899 GEM_BUG_ON(w->addr - i915_ggtt_offset(w->vma) > w->vma->size);
0900
0901 i915_request_get(rq);
0902 switch_tl_lock(NULL, rq);
0903 i915_request_add(rq);
0904
0905 if (i915_request_wait(rq, 0, HZ) < 0) {
0906 err = -ETIME;
0907 goto out;
0908 }
0909
0910 err = 0;
0911 offset = 0;
0912 end = (w->addr - i915_ggtt_offset(w->vma)) / sizeof(*w->map);
0913 while (offset < end) {
0914 if (!op(w->map[offset + 1], w->map[offset])) {
0915 pr_err("Watcher '%s' found HWSP value %x for seqno %x\n",
0916 name, w->map[offset + 1], w->map[offset]);
0917 err = -EINVAL;
0918 }
0919
0920 offset += 2;
0921 }
0922
0923 out:
0924 i915_request_put(rq);
0925 return err;
0926 }
0927
0928 static void cleanup_watcher(struct hwsp_watcher *w)
0929 {
0930 if (w->rq) {
0931 switch_tl_lock(NULL, w->rq);
0932
0933 i915_request_add(w->rq);
0934 }
0935
0936 i915_vma_unpin_and_release(&w->vma, I915_VMA_RELEASE_MAP);
0937 }
0938
0939 static bool retire_requests(struct intel_timeline *tl)
0940 {
0941 struct i915_request *rq, *rn;
0942
0943 mutex_lock(&tl->mutex);
0944 list_for_each_entry_safe(rq, rn, &tl->requests, link)
0945 if (!i915_request_retire(rq))
0946 break;
0947 mutex_unlock(&tl->mutex);
0948
0949 return !i915_active_fence_isset(&tl->last_request);
0950 }
0951
0952 static struct i915_request *wrap_timeline(struct i915_request *rq)
0953 {
0954 struct intel_context *ce = rq->context;
0955 struct intel_timeline *tl = ce->timeline;
0956 u32 seqno = rq->fence.seqno;
0957
0958 while (tl->seqno >= seqno) {
0959 i915_request_put(rq);
0960 rq = intel_context_create_request(ce);
0961 if (IS_ERR(rq))
0962 return rq;
0963
0964 i915_request_get(rq);
0965 i915_request_add(rq);
0966 }
0967
0968 i915_request_put(rq);
0969 rq = i915_request_create(ce);
0970 if (IS_ERR(rq))
0971 return rq;
0972
0973 i915_request_get(rq);
0974 i915_request_add(rq);
0975
0976 return rq;
0977 }
0978
0979 static int live_hwsp_read(void *arg)
0980 {
0981 struct intel_gt *gt = arg;
0982 struct hwsp_watcher watcher[2] = {};
0983 struct intel_engine_cs *engine;
0984 struct intel_timeline *tl;
0985 enum intel_engine_id id;
0986 int err = 0;
0987 int i;
0988
0989
0990
0991
0992
0993
0994
0995
0996
0997 if (GRAPHICS_VER(gt->i915) < 8)
0998 return 0;
0999
1000 tl = intel_timeline_create(gt);
1001 if (IS_ERR(tl))
1002 return PTR_ERR(tl);
1003
1004 if (!tl->has_initial_breadcrumb)
1005 goto out_free;
1006
1007 for (i = 0; i < ARRAY_SIZE(watcher); i++) {
1008 err = setup_watcher(&watcher[i], gt);
1009 if (err)
1010 goto out;
1011 }
1012
1013 for_each_engine(engine, gt, id) {
1014 struct intel_context *ce;
1015 unsigned long count = 0;
1016 IGT_TIMEOUT(end_time);
1017
1018
1019 err = create_watcher(&watcher[1], engine, SZ_512K);
1020 if (err)
1021 goto out;
1022
1023 do {
1024 struct i915_sw_fence *submit;
1025 struct i915_request *rq;
1026 u32 hwsp, dummy;
1027
1028 submit = heap_fence_create(GFP_KERNEL);
1029 if (!submit) {
1030 err = -ENOMEM;
1031 goto out;
1032 }
1033
1034 err = create_watcher(&watcher[0], engine, SZ_4K);
1035 if (err)
1036 goto out;
1037
1038 ce = intel_context_create(engine);
1039 if (IS_ERR(ce)) {
1040 err = PTR_ERR(ce);
1041 goto out;
1042 }
1043
1044 ce->timeline = intel_timeline_get(tl);
1045
1046
1047 err = intel_context_pin(ce);
1048 if (err) {
1049 intel_context_put(ce);
1050 goto out;
1051 }
1052
1053
1054
1055
1056
1057 tl->seqno = -12u + 2 * (count & 3);
1058 __intel_timeline_get_seqno(tl, &dummy);
1059
1060 rq = i915_request_create(ce);
1061 if (IS_ERR(rq)) {
1062 err = PTR_ERR(rq);
1063 intel_context_unpin(ce);
1064 intel_context_put(ce);
1065 goto out;
1066 }
1067
1068 err = i915_sw_fence_await_dma_fence(&rq->submit,
1069 &watcher[0].rq->fence, 0,
1070 GFP_KERNEL);
1071 if (err < 0) {
1072 i915_request_add(rq);
1073 intel_context_unpin(ce);
1074 intel_context_put(ce);
1075 goto out;
1076 }
1077
1078 switch_tl_lock(rq, watcher[0].rq);
1079 err = intel_timeline_read_hwsp(rq, watcher[0].rq, &hwsp);
1080 if (err == 0)
1081 err = emit_read_hwsp(watcher[0].rq,
1082 rq->fence.seqno, hwsp,
1083 &watcher[0].addr);
1084 switch_tl_lock(watcher[0].rq, rq);
1085 if (err) {
1086 i915_request_add(rq);
1087 intel_context_unpin(ce);
1088 intel_context_put(ce);
1089 goto out;
1090 }
1091
1092 switch_tl_lock(rq, watcher[1].rq);
1093 err = intel_timeline_read_hwsp(rq, watcher[1].rq, &hwsp);
1094 if (err == 0)
1095 err = emit_read_hwsp(watcher[1].rq,
1096 rq->fence.seqno, hwsp,
1097 &watcher[1].addr);
1098 switch_tl_lock(watcher[1].rq, rq);
1099 if (err) {
1100 i915_request_add(rq);
1101 intel_context_unpin(ce);
1102 intel_context_put(ce);
1103 goto out;
1104 }
1105
1106 i915_request_get(rq);
1107 i915_request_add(rq);
1108
1109 rq = wrap_timeline(rq);
1110 intel_context_unpin(ce);
1111 intel_context_put(ce);
1112 if (IS_ERR(rq)) {
1113 err = PTR_ERR(rq);
1114 goto out;
1115 }
1116
1117 err = i915_sw_fence_await_dma_fence(&watcher[1].rq->submit,
1118 &rq->fence, 0,
1119 GFP_KERNEL);
1120 if (err < 0) {
1121 i915_request_put(rq);
1122 goto out;
1123 }
1124
1125 err = check_watcher(&watcher[0], "before", cmp_lt);
1126 i915_sw_fence_commit(submit);
1127 heap_fence_put(submit);
1128 if (err) {
1129 i915_request_put(rq);
1130 goto out;
1131 }
1132 count++;
1133
1134
1135 if (i915_request_wait(rq,
1136 I915_WAIT_INTERRUPTIBLE,
1137 HZ) < 0) {
1138 err = -ETIME;
1139 i915_request_put(rq);
1140 goto out;
1141 }
1142 retire_requests(tl);
1143 i915_request_put(rq);
1144
1145
1146 if (8 * watcher[1].rq->ring->emit >
1147 3 * watcher[1].rq->ring->size)
1148 break;
1149
1150 } while (!__igt_timeout(end_time, NULL) &&
1151 count < (PAGE_SIZE / TIMELINE_SEQNO_BYTES - 1) / 2);
1152
1153 pr_info("%s: simulated %lu wraps\n", engine->name, count);
1154 err = check_watcher(&watcher[1], "after", cmp_gte);
1155 if (err)
1156 goto out;
1157 }
1158
1159 out:
1160 for (i = 0; i < ARRAY_SIZE(watcher); i++)
1161 cleanup_watcher(&watcher[i]);
1162
1163 if (igt_flush_test(gt->i915))
1164 err = -EIO;
1165
1166 out_free:
1167 intel_timeline_put(tl);
1168 return err;
1169 }
1170
1171 static int live_hwsp_rollover_kernel(void *arg)
1172 {
1173 struct intel_gt *gt = arg;
1174 struct intel_engine_cs *engine;
1175 enum intel_engine_id id;
1176 int err = 0;
1177
1178
1179
1180
1181
1182
1183 for_each_engine(engine, gt, id) {
1184 struct intel_context *ce = engine->kernel_context;
1185 struct intel_timeline *tl = ce->timeline;
1186 struct i915_request *rq[3] = {};
1187 int i;
1188
1189 st_engine_heartbeat_disable(engine);
1190 if (intel_gt_wait_for_idle(gt, HZ / 2)) {
1191 err = -EIO;
1192 goto out;
1193 }
1194
1195 GEM_BUG_ON(i915_active_fence_isset(&tl->last_request));
1196 tl->seqno = -2u;
1197 WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno);
1198
1199 for (i = 0; i < ARRAY_SIZE(rq); i++) {
1200 struct i915_request *this;
1201
1202 this = i915_request_create(ce);
1203 if (IS_ERR(this)) {
1204 err = PTR_ERR(this);
1205 goto out;
1206 }
1207
1208 pr_debug("%s: create fence.seqnp:%d\n",
1209 engine->name,
1210 lower_32_bits(this->fence.seqno));
1211
1212 GEM_BUG_ON(rcu_access_pointer(this->timeline) != tl);
1213
1214 rq[i] = i915_request_get(this);
1215 i915_request_add(this);
1216 }
1217
1218
1219 GEM_BUG_ON(rq[2]->fence.seqno > rq[0]->fence.seqno);
1220
1221 if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
1222 pr_err("Wait for timeline wrap timed out!\n");
1223 err = -EIO;
1224 goto out;
1225 }
1226
1227 for (i = 0; i < ARRAY_SIZE(rq); i++) {
1228 if (!i915_request_completed(rq[i])) {
1229 pr_err("Pre-wrap request not completed!\n");
1230 err = -EINVAL;
1231 goto out;
1232 }
1233 }
1234
1235 out:
1236 for (i = 0; i < ARRAY_SIZE(rq); i++)
1237 i915_request_put(rq[i]);
1238 st_engine_heartbeat_enable(engine);
1239 if (err)
1240 break;
1241 }
1242
1243 if (igt_flush_test(gt->i915))
1244 err = -EIO;
1245
1246 return err;
1247 }
1248
1249 static int live_hwsp_rollover_user(void *arg)
1250 {
1251 struct intel_gt *gt = arg;
1252 struct intel_engine_cs *engine;
1253 enum intel_engine_id id;
1254 int err = 0;
1255
1256
1257
1258
1259
1260
1261 for_each_engine(engine, gt, id) {
1262 struct i915_request *rq[3] = {};
1263 struct intel_timeline *tl;
1264 struct intel_context *ce;
1265 int i;
1266
1267 ce = intel_context_create(engine);
1268 if (IS_ERR(ce))
1269 return PTR_ERR(ce);
1270
1271 err = intel_context_alloc_state(ce);
1272 if (err)
1273 goto out;
1274
1275 tl = ce->timeline;
1276 if (!tl->has_initial_breadcrumb)
1277 goto out;
1278
1279 err = intel_context_pin(ce);
1280 if (err)
1281 goto out;
1282
1283 tl->seqno = -4u;
1284 WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno);
1285
1286 for (i = 0; i < ARRAY_SIZE(rq); i++) {
1287 struct i915_request *this;
1288
1289 this = intel_context_create_request(ce);
1290 if (IS_ERR(this)) {
1291 err = PTR_ERR(this);
1292 goto out_unpin;
1293 }
1294
1295 pr_debug("%s: create fence.seqnp:%d\n",
1296 engine->name,
1297 lower_32_bits(this->fence.seqno));
1298
1299 GEM_BUG_ON(rcu_access_pointer(this->timeline) != tl);
1300
1301 rq[i] = i915_request_get(this);
1302 i915_request_add(this);
1303 }
1304
1305
1306 GEM_BUG_ON(rq[2]->fence.seqno > rq[0]->fence.seqno);
1307
1308 if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
1309 pr_err("Wait for timeline wrap timed out!\n");
1310 err = -EIO;
1311 goto out_unpin;
1312 }
1313
1314 for (i = 0; i < ARRAY_SIZE(rq); i++) {
1315 if (!i915_request_completed(rq[i])) {
1316 pr_err("Pre-wrap request not completed!\n");
1317 err = -EINVAL;
1318 goto out_unpin;
1319 }
1320 }
1321 out_unpin:
1322 intel_context_unpin(ce);
1323 out:
1324 for (i = 0; i < ARRAY_SIZE(rq); i++)
1325 i915_request_put(rq[i]);
1326 intel_context_put(ce);
1327 if (err)
1328 break;
1329 }
1330
1331 if (igt_flush_test(gt->i915))
1332 err = -EIO;
1333
1334 return err;
1335 }
1336
1337 static int live_hwsp_recycle(void *arg)
1338 {
1339 struct intel_gt *gt = arg;
1340 struct intel_engine_cs *engine;
1341 enum intel_engine_id id;
1342 unsigned long count;
1343 int err = 0;
1344
1345
1346
1347
1348
1349
1350
1351 count = 0;
1352 for_each_engine(engine, gt, id) {
1353 IGT_TIMEOUT(end_time);
1354
1355 if (!intel_engine_can_store_dword(engine))
1356 continue;
1357
1358 intel_engine_pm_get(engine);
1359
1360 do {
1361 struct intel_timeline *tl;
1362 struct i915_request *rq;
1363
1364 tl = intel_timeline_create(gt);
1365 if (IS_ERR(tl)) {
1366 err = PTR_ERR(tl);
1367 break;
1368 }
1369
1370 rq = checked_tl_write(tl, engine, count);
1371 if (IS_ERR(rq)) {
1372 intel_timeline_put(tl);
1373 err = PTR_ERR(rq);
1374 break;
1375 }
1376
1377 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
1378 pr_err("Wait for timeline writes timed out!\n");
1379 i915_request_put(rq);
1380 intel_timeline_put(tl);
1381 err = -EIO;
1382 break;
1383 }
1384
1385 if (READ_ONCE(*tl->hwsp_seqno) != count) {
1386 GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x found 0x%x\n",
1387 count, tl->fence_context,
1388 tl->hwsp_offset, *tl->hwsp_seqno);
1389 GEM_TRACE_DUMP();
1390 err = -EINVAL;
1391 }
1392
1393 i915_request_put(rq);
1394 intel_timeline_put(tl);
1395 count++;
1396
1397 if (err)
1398 break;
1399 } while (!__igt_timeout(end_time, NULL));
1400
1401 intel_engine_pm_put(engine);
1402 if (err)
1403 break;
1404 }
1405
1406 return err;
1407 }
1408
1409 int intel_timeline_live_selftests(struct drm_i915_private *i915)
1410 {
1411 static const struct i915_subtest tests[] = {
1412 SUBTEST(live_hwsp_recycle),
1413 SUBTEST(live_hwsp_engine),
1414 SUBTEST(live_hwsp_alternate),
1415 SUBTEST(live_hwsp_wrap),
1416 SUBTEST(live_hwsp_read),
1417 SUBTEST(live_hwsp_rollover_kernel),
1418 SUBTEST(live_hwsp_rollover_user),
1419 };
1420
1421 if (intel_gt_is_wedged(to_gt(i915)))
1422 return 0;
1423
1424 return intel_gt_live_subtests(tests, to_gt(i915));
1425 }