0001
0002
0003
0004
0005
0006
0007 #include <linux/kref.h>
0008
0009 #include "gem/i915_gem_pm.h"
0010 #include "gt/intel_gt.h"
0011
0012 #include "i915_selftest.h"
0013
0014 #include "igt_flush_test.h"
0015 #include "lib_sw_fence.h"
0016
0017 #define TEST_OA_CONFIG_UUID "12345678-1234-1234-1234-1234567890ab"
0018
0019 static int
0020 alloc_empty_config(struct i915_perf *perf)
0021 {
0022 struct i915_oa_config *oa_config;
0023
0024 oa_config = kzalloc(sizeof(*oa_config), GFP_KERNEL);
0025 if (!oa_config)
0026 return -ENOMEM;
0027
0028 oa_config->perf = perf;
0029 kref_init(&oa_config->ref);
0030
0031 strlcpy(oa_config->uuid, TEST_OA_CONFIG_UUID, sizeof(oa_config->uuid));
0032
0033 mutex_lock(&perf->metrics_lock);
0034
0035 oa_config->id = idr_alloc(&perf->metrics_idr, oa_config, 2, 0, GFP_KERNEL);
0036 if (oa_config->id < 0) {
0037 mutex_unlock(&perf->metrics_lock);
0038 i915_oa_config_put(oa_config);
0039 return -ENOMEM;
0040 }
0041
0042 mutex_unlock(&perf->metrics_lock);
0043
0044 return 0;
0045 }
0046
0047 static void
0048 destroy_empty_config(struct i915_perf *perf)
0049 {
0050 struct i915_oa_config *oa_config = NULL, *tmp;
0051 int id;
0052
0053 mutex_lock(&perf->metrics_lock);
0054
0055 idr_for_each_entry(&perf->metrics_idr, tmp, id) {
0056 if (!strcmp(tmp->uuid, TEST_OA_CONFIG_UUID)) {
0057 oa_config = tmp;
0058 break;
0059 }
0060 }
0061
0062 if (oa_config)
0063 idr_remove(&perf->metrics_idr, oa_config->id);
0064
0065 mutex_unlock(&perf->metrics_lock);
0066
0067 if (oa_config)
0068 i915_oa_config_put(oa_config);
0069 }
0070
0071 static struct i915_oa_config *
0072 get_empty_config(struct i915_perf *perf)
0073 {
0074 struct i915_oa_config *oa_config = NULL, *tmp;
0075 int id;
0076
0077 mutex_lock(&perf->metrics_lock);
0078
0079 idr_for_each_entry(&perf->metrics_idr, tmp, id) {
0080 if (!strcmp(tmp->uuid, TEST_OA_CONFIG_UUID)) {
0081 oa_config = i915_oa_config_get(tmp);
0082 break;
0083 }
0084 }
0085
0086 mutex_unlock(&perf->metrics_lock);
0087
0088 return oa_config;
0089 }
0090
0091 static struct i915_perf_stream *
0092 test_stream(struct i915_perf *perf)
0093 {
0094 struct drm_i915_perf_open_param param = {};
0095 struct i915_oa_config *oa_config = get_empty_config(perf);
0096 struct perf_open_properties props = {
0097 .engine = intel_engine_lookup_user(perf->i915,
0098 I915_ENGINE_CLASS_RENDER,
0099 0),
0100 .sample_flags = SAMPLE_OA_REPORT,
0101 .oa_format = GRAPHICS_VER(perf->i915) == 12 ?
0102 I915_OA_FORMAT_A32u40_A4u32_B8_C8 : I915_OA_FORMAT_C4_B8,
0103 };
0104 struct i915_perf_stream *stream;
0105
0106 if (!oa_config)
0107 return NULL;
0108
0109 props.metrics_set = oa_config->id;
0110
0111 stream = kzalloc(sizeof(*stream), GFP_KERNEL);
0112 if (!stream) {
0113 i915_oa_config_put(oa_config);
0114 return NULL;
0115 }
0116
0117 stream->perf = perf;
0118
0119 mutex_lock(&perf->lock);
0120 if (i915_oa_stream_init(stream, ¶m, &props)) {
0121 kfree(stream);
0122 stream = NULL;
0123 }
0124 mutex_unlock(&perf->lock);
0125
0126 i915_oa_config_put(oa_config);
0127
0128 return stream;
0129 }
0130
0131 static void stream_destroy(struct i915_perf_stream *stream)
0132 {
0133 struct i915_perf *perf = stream->perf;
0134
0135 mutex_lock(&perf->lock);
0136 i915_perf_destroy_locked(stream);
0137 mutex_unlock(&perf->lock);
0138 }
0139
0140 static int live_sanitycheck(void *arg)
0141 {
0142 struct drm_i915_private *i915 = arg;
0143 struct i915_perf_stream *stream;
0144
0145
0146
0147 stream = test_stream(&i915->perf);
0148 if (!stream)
0149 return -EINVAL;
0150
0151 stream_destroy(stream);
0152 return 0;
0153 }
0154
0155 static int write_timestamp(struct i915_request *rq, int slot)
0156 {
0157 u32 *cs;
0158 int len;
0159
0160 cs = intel_ring_begin(rq, 6);
0161 if (IS_ERR(cs))
0162 return PTR_ERR(cs);
0163
0164 len = 5;
0165 if (GRAPHICS_VER(rq->engine->i915) >= 8)
0166 len++;
0167
0168 *cs++ = GFX_OP_PIPE_CONTROL(len);
0169 *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB |
0170 PIPE_CONTROL_STORE_DATA_INDEX |
0171 PIPE_CONTROL_WRITE_TIMESTAMP;
0172 *cs++ = slot * sizeof(u32);
0173 *cs++ = 0;
0174 *cs++ = 0;
0175 *cs++ = 0;
0176
0177 intel_ring_advance(rq, cs);
0178
0179 return 0;
0180 }
0181
0182 static ktime_t poll_status(struct i915_request *rq, int slot)
0183 {
0184 while (!intel_read_status_page(rq->engine, slot) &&
0185 !i915_request_completed(rq))
0186 cpu_relax();
0187
0188 return ktime_get();
0189 }
0190
0191 static int live_noa_delay(void *arg)
0192 {
0193 struct drm_i915_private *i915 = arg;
0194 struct i915_perf_stream *stream;
0195 struct i915_request *rq;
0196 ktime_t t0, t1;
0197 u64 expected;
0198 u32 delay;
0199 int err;
0200 int i;
0201
0202
0203
0204 stream = test_stream(&i915->perf);
0205 if (!stream)
0206 return -ENOMEM;
0207
0208 expected = atomic64_read(&stream->perf->noa_programming_delay);
0209
0210 if (stream->engine->class != RENDER_CLASS) {
0211 err = -ENODEV;
0212 goto out;
0213 }
0214
0215 for (i = 0; i < 4; i++)
0216 intel_write_status_page(stream->engine, 0x100 + i, 0);
0217
0218 rq = intel_engine_create_kernel_request(stream->engine);
0219 if (IS_ERR(rq)) {
0220 err = PTR_ERR(rq);
0221 goto out;
0222 }
0223
0224 if (rq->engine->emit_init_breadcrumb) {
0225 err = rq->engine->emit_init_breadcrumb(rq);
0226 if (err) {
0227 i915_request_add(rq);
0228 goto out;
0229 }
0230 }
0231
0232 err = write_timestamp(rq, 0x100);
0233 if (err) {
0234 i915_request_add(rq);
0235 goto out;
0236 }
0237
0238 err = rq->engine->emit_bb_start(rq,
0239 i915_ggtt_offset(stream->noa_wait), 0,
0240 I915_DISPATCH_SECURE);
0241 if (err) {
0242 i915_request_add(rq);
0243 goto out;
0244 }
0245
0246 err = write_timestamp(rq, 0x102);
0247 if (err) {
0248 i915_request_add(rq);
0249 goto out;
0250 }
0251
0252 i915_request_get(rq);
0253 i915_request_add(rq);
0254
0255 preempt_disable();
0256 t0 = poll_status(rq, 0x100);
0257 t1 = poll_status(rq, 0x102);
0258 preempt_enable();
0259
0260 pr_info("CPU delay: %lluns, expected %lluns\n",
0261 ktime_sub(t1, t0), expected);
0262
0263 delay = intel_read_status_page(stream->engine, 0x102);
0264 delay -= intel_read_status_page(stream->engine, 0x100);
0265 delay = intel_gt_clock_interval_to_ns(stream->engine->gt, delay);
0266 pr_info("GPU delay: %uns, expected %lluns\n",
0267 delay, expected);
0268
0269 if (4 * delay < 3 * expected || 2 * delay > 3 * expected) {
0270 pr_err("GPU delay [%uus] outside of expected threshold! [%lluus, %lluus]\n",
0271 delay / 1000,
0272 div_u64(3 * expected, 4000),
0273 div_u64(3 * expected, 2000));
0274 err = -EINVAL;
0275 }
0276
0277 i915_request_put(rq);
0278 out:
0279 stream_destroy(stream);
0280 return err;
0281 }
0282
0283 static int live_noa_gpr(void *arg)
0284 {
0285 struct drm_i915_private *i915 = arg;
0286 struct i915_perf_stream *stream;
0287 struct intel_context *ce;
0288 struct i915_request *rq;
0289 u32 *cs, *store;
0290 void *scratch;
0291 u32 gpr0;
0292 int err;
0293 int i;
0294
0295
0296
0297 stream = test_stream(&i915->perf);
0298 if (!stream)
0299 return -ENOMEM;
0300
0301 gpr0 = i915_mmio_reg_offset(GEN8_RING_CS_GPR(stream->engine->mmio_base, 0));
0302
0303 ce = intel_context_create(stream->engine);
0304 if (IS_ERR(ce)) {
0305 err = PTR_ERR(ce);
0306 goto out;
0307 }
0308
0309
0310 scratch = __px_vaddr(ce->vm->scratch[0]);
0311 memset(scratch, POISON_FREE, PAGE_SIZE);
0312
0313 rq = intel_context_create_request(ce);
0314 if (IS_ERR(rq)) {
0315 err = PTR_ERR(rq);
0316 goto out_ce;
0317 }
0318 i915_request_get(rq);
0319
0320 if (rq->engine->emit_init_breadcrumb) {
0321 err = rq->engine->emit_init_breadcrumb(rq);
0322 if (err) {
0323 i915_request_add(rq);
0324 goto out_rq;
0325 }
0326 }
0327
0328
0329 cs = intel_ring_begin(rq, 2 * 32 + 2);
0330 if (IS_ERR(cs)) {
0331 err = PTR_ERR(cs);
0332 i915_request_add(rq);
0333 goto out_rq;
0334 }
0335
0336 *cs++ = MI_LOAD_REGISTER_IMM(32);
0337 for (i = 0; i < 32; i++) {
0338 *cs++ = gpr0 + i * sizeof(u32);
0339 *cs++ = STACK_MAGIC;
0340 }
0341 *cs++ = MI_NOOP;
0342 intel_ring_advance(rq, cs);
0343
0344
0345 err = rq->engine->emit_bb_start(rq,
0346 i915_ggtt_offset(stream->noa_wait), 0,
0347 I915_DISPATCH_SECURE);
0348 if (err) {
0349 i915_request_add(rq);
0350 goto out_rq;
0351 }
0352
0353
0354 store = memset32(rq->engine->status_page.addr + 512, 0, 32);
0355 for (i = 0; i < 32; i++) {
0356 u32 cmd;
0357
0358 cs = intel_ring_begin(rq, 4);
0359 if (IS_ERR(cs)) {
0360 err = PTR_ERR(cs);
0361 i915_request_add(rq);
0362 goto out_rq;
0363 }
0364
0365 cmd = MI_STORE_REGISTER_MEM;
0366 if (GRAPHICS_VER(i915) >= 8)
0367 cmd++;
0368 cmd |= MI_USE_GGTT;
0369
0370 *cs++ = cmd;
0371 *cs++ = gpr0 + i * sizeof(u32);
0372 *cs++ = i915_ggtt_offset(rq->engine->status_page.vma) +
0373 offset_in_page(store) +
0374 i * sizeof(u32);
0375 *cs++ = 0;
0376 intel_ring_advance(rq, cs);
0377 }
0378
0379 i915_request_add(rq);
0380
0381 if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE, HZ / 2) < 0) {
0382 pr_err("noa_wait timed out\n");
0383 intel_gt_set_wedged(stream->engine->gt);
0384 err = -EIO;
0385 goto out_rq;
0386 }
0387
0388
0389 for (i = 0; i < 32; i++) {
0390 if (store[i] == STACK_MAGIC)
0391 continue;
0392
0393 pr_err("GPR[%d] lost, found:%08x, expected:%08x!\n",
0394 i, store[i], STACK_MAGIC);
0395 err = -EINVAL;
0396 }
0397
0398
0399 if (memchr_inv(scratch, POISON_FREE, PAGE_SIZE)) {
0400 pr_err("Scratch page overwritten!\n");
0401 igt_hexdump(scratch, 4096);
0402 err = -EINVAL;
0403 }
0404
0405 out_rq:
0406 i915_request_put(rq);
0407 out_ce:
0408 intel_context_put(ce);
0409 out:
0410 stream_destroy(stream);
0411 return err;
0412 }
0413
0414 int i915_perf_live_selftests(struct drm_i915_private *i915)
0415 {
0416 static const struct i915_subtest tests[] = {
0417 SUBTEST(live_sanitycheck),
0418 SUBTEST(live_noa_delay),
0419 SUBTEST(live_noa_gpr),
0420 };
0421 struct i915_perf *perf = &i915->perf;
0422 int err;
0423
0424 if (!perf->metrics_kobj || !perf->ops.enable_metric_set)
0425 return 0;
0426
0427 if (intel_gt_is_wedged(to_gt(i915)))
0428 return 0;
0429
0430 err = alloc_empty_config(&i915->perf);
0431 if (err)
0432 return err;
0433
0434 err = i915_subtests(tests, i915);
0435
0436 destroy_empty_config(&i915->perf);
0437
0438 return err;
0439 }