0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068
0069
0070
0071
0072
0073
0074
0075
0076
0077
0078
0079
0080
0081
0082
0083
0084
0085
0086
0087
0088
0089
0090
0091
0092
0093
0094
0095
0096
0097
0098
0099
0100
0101
0102
0103
0104
0105
0106
0107
0108
0109
0110
0111
0112
0113
0114
0115
0116
0117
0118
0119
0120
0121
0122
0123
0124
0125
0126
0127
0128
0129
0130
0131
0132
0133
0134
0135
0136
0137
0138
0139
0140
0141
0142
0143
0144
0145
0146
0147
0148
0149
0150
0151
0152
0153
0154
0155
0156
0157
0158
0159
0160
0161
0162
0163
0164
0165
0166
0167
0168
0169
0170
0171
0172
0173
0174
0175
0176
0177
0178
0179
0180
0181
0182
0183
0184
0185
0186
0187
0188
0189
0190
0191
0192
0193
0194 #include <linux/anon_inodes.h>
0195 #include <linux/sizes.h>
0196 #include <linux/uuid.h>
0197
0198 #include "gem/i915_gem_context.h"
0199 #include "gem/i915_gem_internal.h"
0200 #include "gt/intel_engine_pm.h"
0201 #include "gt/intel_engine_regs.h"
0202 #include "gt/intel_engine_user.h"
0203 #include "gt/intel_execlists_submission.h"
0204 #include "gt/intel_gpu_commands.h"
0205 #include "gt/intel_gt.h"
0206 #include "gt/intel_gt_clock_utils.h"
0207 #include "gt/intel_gt_regs.h"
0208 #include "gt/intel_lrc.h"
0209 #include "gt/intel_lrc_reg.h"
0210 #include "gt/intel_ring.h"
0211
0212 #include "i915_drv.h"
0213 #include "i915_file_private.h"
0214 #include "i915_perf.h"
0215 #include "i915_perf_oa_regs.h"
0216
0217
0218
0219
0220
0221 #define OA_BUFFER_SIZE SZ_16M
0222
0223 #define OA_TAKEN(tail, head) ((tail - head) & (OA_BUFFER_SIZE - 1))
0224
0225
0226
0227
0228
0229
0230
0231
0232
0233
0234
0235
0236
0237
0238
0239
0240
0241
0242
0243
0244
0245
0246
0247
0248
0249
0250
0251
0252
0253
0254
0255
0256
0257 #define OA_TAIL_MARGIN_NSEC 100000ULL
0258 #define INVALID_TAIL_PTR 0xffffffff
0259
0260
0261
0262
0263 #define DEFAULT_POLL_FREQUENCY_HZ 200
0264 #define DEFAULT_POLL_PERIOD_NS (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY_HZ)
0265
0266
0267 static u32 i915_perf_stream_paranoid = true;
0268
0269
0270
0271
0272
0273
0274
0275
0276
0277 #define OA_EXPONENT_MAX 31
0278
0279 #define INVALID_CTX_ID 0xffffffff
0280
0281
0282 #define OAREPORT_REASON_MASK 0x3f
0283 #define OAREPORT_REASON_MASK_EXTENDED 0x7f
0284 #define OAREPORT_REASON_SHIFT 19
0285 #define OAREPORT_REASON_TIMER (1<<0)
0286 #define OAREPORT_REASON_CTX_SWITCH (1<<3)
0287 #define OAREPORT_REASON_CLK_RATIO (1<<5)
0288
0289
0290
0291
0292
0293
0294
0295
0296
0297 static int oa_sample_rate_hard_limit;
0298
0299
0300
0301
0302
0303
0304
0305 static u32 i915_oa_max_sample_rate = 100000;
0306
0307
0308
0309
0310
0311 static const struct i915_oa_format oa_formats[I915_OA_FORMAT_MAX] = {
0312 [I915_OA_FORMAT_A13] = { 0, 64 },
0313 [I915_OA_FORMAT_A29] = { 1, 128 },
0314 [I915_OA_FORMAT_A13_B8_C8] = { 2, 128 },
0315
0316 [I915_OA_FORMAT_B4_C8] = { 4, 64 },
0317 [I915_OA_FORMAT_A45_B8_C8] = { 5, 256 },
0318 [I915_OA_FORMAT_B4_C8_A16] = { 6, 128 },
0319 [I915_OA_FORMAT_C4_B8] = { 7, 64 },
0320 [I915_OA_FORMAT_A12] = { 0, 64 },
0321 [I915_OA_FORMAT_A12_B8_C8] = { 2, 128 },
0322 [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
0323 };
0324
0325 #define SAMPLE_OA_REPORT (1<<0)
0326
0327
0328
0329
0330
0331
0332
0333
0334
0335
0336
0337
0338
0339
0340
0341
0342
0343
0344
0345
0346
0347
0348
0349
0350 struct perf_open_properties {
0351 u32 sample_flags;
0352
0353 u64 single_context:1;
0354 u64 hold_preemption:1;
0355 u64 ctx_handle;
0356
0357
0358 int metrics_set;
0359 int oa_format;
0360 bool oa_periodic;
0361 int oa_period_exponent;
0362
0363 struct intel_engine_cs *engine;
0364
0365 bool has_sseu;
0366 struct intel_sseu sseu;
0367
0368 u64 poll_oa_period;
0369 };
0370
0371 struct i915_oa_config_bo {
0372 struct llist_node node;
0373
0374 struct i915_oa_config *oa_config;
0375 struct i915_vma *vma;
0376 };
0377
0378 static struct ctl_table_header *sysctl_header;
0379
0380 static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer);
0381
0382 void i915_oa_config_release(struct kref *ref)
0383 {
0384 struct i915_oa_config *oa_config =
0385 container_of(ref, typeof(*oa_config), ref);
0386
0387 kfree(oa_config->flex_regs);
0388 kfree(oa_config->b_counter_regs);
0389 kfree(oa_config->mux_regs);
0390
0391 kfree_rcu(oa_config, rcu);
0392 }
0393
0394 struct i915_oa_config *
0395 i915_perf_get_oa_config(struct i915_perf *perf, int metrics_set)
0396 {
0397 struct i915_oa_config *oa_config;
0398
0399 rcu_read_lock();
0400 oa_config = idr_find(&perf->metrics_idr, metrics_set);
0401 if (oa_config)
0402 oa_config = i915_oa_config_get(oa_config);
0403 rcu_read_unlock();
0404
0405 return oa_config;
0406 }
0407
0408 static void free_oa_config_bo(struct i915_oa_config_bo *oa_bo)
0409 {
0410 i915_oa_config_put(oa_bo->oa_config);
0411 i915_vma_put(oa_bo->vma);
0412 kfree(oa_bo);
0413 }
0414
0415 static u32 gen12_oa_hw_tail_read(struct i915_perf_stream *stream)
0416 {
0417 struct intel_uncore *uncore = stream->uncore;
0418
0419 return intel_uncore_read(uncore, GEN12_OAG_OATAILPTR) &
0420 GEN12_OAG_OATAILPTR_MASK;
0421 }
0422
0423 static u32 gen8_oa_hw_tail_read(struct i915_perf_stream *stream)
0424 {
0425 struct intel_uncore *uncore = stream->uncore;
0426
0427 return intel_uncore_read(uncore, GEN8_OATAILPTR) & GEN8_OATAILPTR_MASK;
0428 }
0429
0430 static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream)
0431 {
0432 struct intel_uncore *uncore = stream->uncore;
0433 u32 oastatus1 = intel_uncore_read(uncore, GEN7_OASTATUS1);
0434
0435 return oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
0436 }
0437
0438
0439
0440
0441
0442
0443
0444
0445
0446
0447
0448
0449
0450
0451
0452
0453
0454
0455
0456
0457
0458
0459
0460
0461
0462 static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
0463 {
0464 u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
0465 int report_size = stream->oa_buffer.format_size;
0466 unsigned long flags;
0467 bool pollin;
0468 u32 hw_tail;
0469 u64 now;
0470
0471
0472
0473
0474
0475 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
0476
0477 hw_tail = stream->perf->ops.oa_hw_tail_read(stream);
0478
0479
0480
0481
0482 hw_tail &= ~(report_size - 1);
0483
0484 now = ktime_get_mono_fast_ns();
0485
0486 if (hw_tail == stream->oa_buffer.aging_tail &&
0487 (now - stream->oa_buffer.aging_timestamp) > OA_TAIL_MARGIN_NSEC) {
0488
0489
0490
0491
0492 stream->oa_buffer.tail = stream->oa_buffer.aging_tail;
0493 } else {
0494 u32 head, tail, aged_tail;
0495
0496
0497
0498
0499
0500 head = stream->oa_buffer.head - gtt_offset;
0501 aged_tail = stream->oa_buffer.tail - gtt_offset;
0502
0503 hw_tail -= gtt_offset;
0504 tail = hw_tail;
0505
0506
0507
0508
0509
0510
0511
0512
0513
0514
0515
0516
0517 while (OA_TAKEN(tail, aged_tail) >= report_size) {
0518 u32 *report32 = (void *)(stream->oa_buffer.vaddr + tail);
0519
0520 if (report32[0] != 0 || report32[1] != 0)
0521 break;
0522
0523 tail = (tail - report_size) & (OA_BUFFER_SIZE - 1);
0524 }
0525
0526 if (OA_TAKEN(hw_tail, tail) > report_size &&
0527 __ratelimit(&stream->perf->tail_pointer_race))
0528 DRM_NOTE("unlanded report(s) head=0x%x "
0529 "tail=0x%x hw_tail=0x%x\n",
0530 head, tail, hw_tail);
0531
0532 stream->oa_buffer.tail = gtt_offset + tail;
0533 stream->oa_buffer.aging_tail = gtt_offset + hw_tail;
0534 stream->oa_buffer.aging_timestamp = now;
0535 }
0536
0537 pollin = OA_TAKEN(stream->oa_buffer.tail - gtt_offset,
0538 stream->oa_buffer.head - gtt_offset) >= report_size;
0539
0540 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
0541
0542 return pollin;
0543 }
0544
0545
0546
0547
0548
0549
0550
0551
0552
0553
0554
0555
0556
0557
0558
0559
0560 static int append_oa_status(struct i915_perf_stream *stream,
0561 char __user *buf,
0562 size_t count,
0563 size_t *offset,
0564 enum drm_i915_perf_record_type type)
0565 {
0566 struct drm_i915_perf_record_header header = { type, 0, sizeof(header) };
0567
0568 if ((count - *offset) < header.size)
0569 return -ENOSPC;
0570
0571 if (copy_to_user(buf + *offset, &header, sizeof(header)))
0572 return -EFAULT;
0573
0574 (*offset) += header.size;
0575
0576 return 0;
0577 }
0578
0579
0580
0581
0582
0583
0584
0585
0586
0587
0588
0589
0590
0591
0592
0593
0594
0595
0596 static int append_oa_sample(struct i915_perf_stream *stream,
0597 char __user *buf,
0598 size_t count,
0599 size_t *offset,
0600 const u8 *report)
0601 {
0602 int report_size = stream->oa_buffer.format_size;
0603 struct drm_i915_perf_record_header header;
0604
0605 header.type = DRM_I915_PERF_RECORD_SAMPLE;
0606 header.pad = 0;
0607 header.size = stream->sample_size;
0608
0609 if ((count - *offset) < header.size)
0610 return -ENOSPC;
0611
0612 buf += *offset;
0613 if (copy_to_user(buf, &header, sizeof(header)))
0614 return -EFAULT;
0615 buf += sizeof(header);
0616
0617 if (copy_to_user(buf, report, report_size))
0618 return -EFAULT;
0619
0620 (*offset) += header.size;
0621
0622 return 0;
0623 }
0624
0625
0626
0627
0628
0629
0630
0631
0632
0633
0634
0635
0636
0637
0638
0639
0640
0641
0642
0643
0644
0645
0646 static int gen8_append_oa_reports(struct i915_perf_stream *stream,
0647 char __user *buf,
0648 size_t count,
0649 size_t *offset)
0650 {
0651 struct intel_uncore *uncore = stream->uncore;
0652 int report_size = stream->oa_buffer.format_size;
0653 u8 *oa_buf_base = stream->oa_buffer.vaddr;
0654 u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
0655 u32 mask = (OA_BUFFER_SIZE - 1);
0656 size_t start_offset = *offset;
0657 unsigned long flags;
0658 u32 head, tail;
0659 u32 taken;
0660 int ret = 0;
0661
0662 if (drm_WARN_ON(&uncore->i915->drm, !stream->enabled))
0663 return -EIO;
0664
0665 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
0666
0667 head = stream->oa_buffer.head;
0668 tail = stream->oa_buffer.tail;
0669
0670 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
0671
0672
0673
0674
0675
0676 head -= gtt_offset;
0677 tail -= gtt_offset;
0678
0679
0680
0681
0682
0683
0684
0685
0686 if (drm_WARN_ONCE(&uncore->i915->drm,
0687 head > OA_BUFFER_SIZE || head % report_size ||
0688 tail > OA_BUFFER_SIZE || tail % report_size,
0689 "Inconsistent OA buffer pointers: head = %u, tail = %u\n",
0690 head, tail))
0691 return -EIO;
0692
0693
0694 for (;
0695 (taken = OA_TAKEN(tail, head));
0696 head = (head + report_size) & mask) {
0697 u8 *report = oa_buf_base + head;
0698 u32 *report32 = (void *)report;
0699 u32 ctx_id;
0700 u32 reason;
0701
0702
0703
0704
0705
0706
0707
0708
0709
0710
0711 if (drm_WARN_ON(&uncore->i915->drm,
0712 (OA_BUFFER_SIZE - head) < report_size)) {
0713 drm_err(&uncore->i915->drm,
0714 "Spurious OA head ptr: non-integral report offset\n");
0715 break;
0716 }
0717
0718
0719
0720
0721
0722
0723
0724
0725
0726
0727 reason = ((report32[0] >> OAREPORT_REASON_SHIFT) &
0728 (GRAPHICS_VER(stream->perf->i915) == 12 ?
0729 OAREPORT_REASON_MASK_EXTENDED :
0730 OAREPORT_REASON_MASK));
0731
0732 ctx_id = report32[2] & stream->specific_ctx_id_mask;
0733
0734
0735
0736
0737
0738
0739
0740
0741
0742 if (!(report32[0] & stream->perf->gen8_valid_ctx_bit) &&
0743 GRAPHICS_VER(stream->perf->i915) <= 11)
0744 ctx_id = report32[2] = INVALID_CTX_ID;
0745
0746
0747
0748
0749
0750
0751
0752
0753
0754
0755
0756
0757
0758
0759
0760
0761
0762
0763
0764
0765
0766
0767
0768
0769
0770
0771
0772
0773
0774
0775
0776
0777 if (!stream->perf->exclusive_stream->ctx ||
0778 stream->specific_ctx_id == ctx_id ||
0779 stream->oa_buffer.last_ctx_id == stream->specific_ctx_id ||
0780 reason & OAREPORT_REASON_CTX_SWITCH) {
0781
0782
0783
0784
0785
0786 if (stream->perf->exclusive_stream->ctx &&
0787 stream->specific_ctx_id != ctx_id) {
0788 report32[2] = INVALID_CTX_ID;
0789 }
0790
0791 ret = append_oa_sample(stream, buf, count, offset,
0792 report);
0793 if (ret)
0794 break;
0795
0796 stream->oa_buffer.last_ctx_id = ctx_id;
0797 }
0798
0799
0800
0801
0802
0803 report32[0] = 0;
0804 report32[1] = 0;
0805 }
0806
0807 if (start_offset != *offset) {
0808 i915_reg_t oaheadptr;
0809
0810 oaheadptr = GRAPHICS_VER(stream->perf->i915) == 12 ?
0811 GEN12_OAG_OAHEADPTR : GEN8_OAHEADPTR;
0812
0813 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
0814
0815
0816
0817
0818
0819 head += gtt_offset;
0820 intel_uncore_write(uncore, oaheadptr,
0821 head & GEN12_OAG_OAHEADPTR_MASK);
0822 stream->oa_buffer.head = head;
0823
0824 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
0825 }
0826
0827 return ret;
0828 }
0829
0830
0831
0832
0833
0834
0835
0836
0837
0838
0839
0840
0841
0842
0843
0844
0845
0846
0847
0848
0849
0850 static int gen8_oa_read(struct i915_perf_stream *stream,
0851 char __user *buf,
0852 size_t count,
0853 size_t *offset)
0854 {
0855 struct intel_uncore *uncore = stream->uncore;
0856 u32 oastatus;
0857 i915_reg_t oastatus_reg;
0858 int ret;
0859
0860 if (drm_WARN_ON(&uncore->i915->drm, !stream->oa_buffer.vaddr))
0861 return -EIO;
0862
0863 oastatus_reg = GRAPHICS_VER(stream->perf->i915) == 12 ?
0864 GEN12_OAG_OASTATUS : GEN8_OASTATUS;
0865
0866 oastatus = intel_uncore_read(uncore, oastatus_reg);
0867
0868
0869
0870
0871
0872
0873
0874
0875
0876
0877
0878
0879
0880
0881
0882 if (oastatus & GEN8_OASTATUS_OABUFFER_OVERFLOW) {
0883 ret = append_oa_status(stream, buf, count, offset,
0884 DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
0885 if (ret)
0886 return ret;
0887
0888 drm_dbg(&stream->perf->i915->drm,
0889 "OA buffer overflow (exponent = %d): force restart\n",
0890 stream->period_exponent);
0891
0892 stream->perf->ops.oa_disable(stream);
0893 stream->perf->ops.oa_enable(stream);
0894
0895
0896
0897
0898
0899 oastatus = intel_uncore_read(uncore, oastatus_reg);
0900 }
0901
0902 if (oastatus & GEN8_OASTATUS_REPORT_LOST) {
0903 ret = append_oa_status(stream, buf, count, offset,
0904 DRM_I915_PERF_RECORD_OA_REPORT_LOST);
0905 if (ret)
0906 return ret;
0907
0908 intel_uncore_rmw(uncore, oastatus_reg,
0909 GEN8_OASTATUS_COUNTER_OVERFLOW |
0910 GEN8_OASTATUS_REPORT_LOST,
0911 IS_GRAPHICS_VER(uncore->i915, 8, 11) ?
0912 (GEN8_OASTATUS_HEAD_POINTER_WRAP |
0913 GEN8_OASTATUS_TAIL_POINTER_WRAP) : 0);
0914 }
0915
0916 return gen8_append_oa_reports(stream, buf, count, offset);
0917 }
0918
0919
0920
0921
0922
0923
0924
0925
0926
0927
0928
0929
0930
0931
0932
0933
0934
0935
0936
0937
0938
0939
0940 static int gen7_append_oa_reports(struct i915_perf_stream *stream,
0941 char __user *buf,
0942 size_t count,
0943 size_t *offset)
0944 {
0945 struct intel_uncore *uncore = stream->uncore;
0946 int report_size = stream->oa_buffer.format_size;
0947 u8 *oa_buf_base = stream->oa_buffer.vaddr;
0948 u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
0949 u32 mask = (OA_BUFFER_SIZE - 1);
0950 size_t start_offset = *offset;
0951 unsigned long flags;
0952 u32 head, tail;
0953 u32 taken;
0954 int ret = 0;
0955
0956 if (drm_WARN_ON(&uncore->i915->drm, !stream->enabled))
0957 return -EIO;
0958
0959 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
0960
0961 head = stream->oa_buffer.head;
0962 tail = stream->oa_buffer.tail;
0963
0964 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
0965
0966
0967
0968
0969 head -= gtt_offset;
0970 tail -= gtt_offset;
0971
0972
0973
0974
0975
0976
0977
0978 if (drm_WARN_ONCE(&uncore->i915->drm,
0979 head > OA_BUFFER_SIZE || head % report_size ||
0980 tail > OA_BUFFER_SIZE || tail % report_size,
0981 "Inconsistent OA buffer pointers: head = %u, tail = %u\n",
0982 head, tail))
0983 return -EIO;
0984
0985
0986 for (;
0987 (taken = OA_TAKEN(tail, head));
0988 head = (head + report_size) & mask) {
0989 u8 *report = oa_buf_base + head;
0990 u32 *report32 = (void *)report;
0991
0992
0993
0994
0995
0996
0997
0998
0999
1000 if (drm_WARN_ON(&uncore->i915->drm,
1001 (OA_BUFFER_SIZE - head) < report_size)) {
1002 drm_err(&uncore->i915->drm,
1003 "Spurious OA head ptr: non-integral report offset\n");
1004 break;
1005 }
1006
1007
1008
1009
1010
1011
1012
1013 if (report32[0] == 0) {
1014 if (__ratelimit(&stream->perf->spurious_report_rs))
1015 DRM_NOTE("Skipping spurious, invalid OA report\n");
1016 continue;
1017 }
1018
1019 ret = append_oa_sample(stream, buf, count, offset, report);
1020 if (ret)
1021 break;
1022
1023
1024
1025
1026 report32[0] = 0;
1027 report32[1] = 0;
1028 }
1029
1030 if (start_offset != *offset) {
1031 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
1032
1033
1034
1035
1036 head += gtt_offset;
1037
1038 intel_uncore_write(uncore, GEN7_OASTATUS2,
1039 (head & GEN7_OASTATUS2_HEAD_MASK) |
1040 GEN7_OASTATUS2_MEM_SELECT_GGTT);
1041 stream->oa_buffer.head = head;
1042
1043 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
1044 }
1045
1046 return ret;
1047 }
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065 static int gen7_oa_read(struct i915_perf_stream *stream,
1066 char __user *buf,
1067 size_t count,
1068 size_t *offset)
1069 {
1070 struct intel_uncore *uncore = stream->uncore;
1071 u32 oastatus1;
1072 int ret;
1073
1074 if (drm_WARN_ON(&uncore->i915->drm, !stream->oa_buffer.vaddr))
1075 return -EIO;
1076
1077 oastatus1 = intel_uncore_read(uncore, GEN7_OASTATUS1);
1078
1079
1080
1081
1082
1083
1084 oastatus1 &= ~stream->perf->gen7_latched_oastatus1;
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106 if (unlikely(oastatus1 & GEN7_OASTATUS1_OABUFFER_OVERFLOW)) {
1107 ret = append_oa_status(stream, buf, count, offset,
1108 DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
1109 if (ret)
1110 return ret;
1111
1112 drm_dbg(&stream->perf->i915->drm,
1113 "OA buffer overflow (exponent = %d): force restart\n",
1114 stream->period_exponent);
1115
1116 stream->perf->ops.oa_disable(stream);
1117 stream->perf->ops.oa_enable(stream);
1118
1119 oastatus1 = intel_uncore_read(uncore, GEN7_OASTATUS1);
1120 }
1121
1122 if (unlikely(oastatus1 & GEN7_OASTATUS1_REPORT_LOST)) {
1123 ret = append_oa_status(stream, buf, count, offset,
1124 DRM_I915_PERF_RECORD_OA_REPORT_LOST);
1125 if (ret)
1126 return ret;
1127 stream->perf->gen7_latched_oastatus1 |=
1128 GEN7_OASTATUS1_REPORT_LOST;
1129 }
1130
1131 return gen7_append_oa_reports(stream, buf, count, offset);
1132 }
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148 static int i915_oa_wait_unlocked(struct i915_perf_stream *stream)
1149 {
1150
1151 if (!stream->periodic)
1152 return -EIO;
1153
1154 return wait_event_interruptible(stream->poll_wq,
1155 oa_buffer_check_unlocked(stream));
1156 }
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168 static void i915_oa_poll_wait(struct i915_perf_stream *stream,
1169 struct file *file,
1170 poll_table *wait)
1171 {
1172 poll_wait(file, &stream->poll_wq, wait);
1173 }
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187 static int i915_oa_read(struct i915_perf_stream *stream,
1188 char __user *buf,
1189 size_t count,
1190 size_t *offset)
1191 {
1192 return stream->perf->ops.read(stream, buf, count, offset);
1193 }
1194
1195 static struct intel_context *oa_pin_context(struct i915_perf_stream *stream)
1196 {
1197 struct i915_gem_engines_iter it;
1198 struct i915_gem_context *ctx = stream->ctx;
1199 struct intel_context *ce;
1200 struct i915_gem_ww_ctx ww;
1201 int err = -ENODEV;
1202
1203 for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
1204 if (ce->engine != stream->engine)
1205 continue;
1206
1207 err = 0;
1208 break;
1209 }
1210 i915_gem_context_unlock_engines(ctx);
1211
1212 if (err)
1213 return ERR_PTR(err);
1214
1215 i915_gem_ww_ctx_init(&ww, true);
1216 retry:
1217
1218
1219
1220
1221 err = intel_context_pin_ww(ce, &ww);
1222 if (err == -EDEADLK) {
1223 err = i915_gem_ww_ctx_backoff(&ww);
1224 if (!err)
1225 goto retry;
1226 }
1227 i915_gem_ww_ctx_fini(&ww);
1228
1229 if (err)
1230 return ERR_PTR(err);
1231
1232 stream->pinned_ctx = ce;
1233 return stream->pinned_ctx;
1234 }
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246 static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
1247 {
1248 struct intel_context *ce;
1249
1250 ce = oa_pin_context(stream);
1251 if (IS_ERR(ce))
1252 return PTR_ERR(ce);
1253
1254 switch (GRAPHICS_VER(ce->engine->i915)) {
1255 case 7: {
1256
1257
1258
1259
1260 stream->specific_ctx_id = i915_ggtt_offset(ce->state);
1261 stream->specific_ctx_id_mask = 0;
1262 break;
1263 }
1264
1265 case 8:
1266 case 9:
1267 if (intel_engine_uses_guc(ce->engine)) {
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278 stream->specific_ctx_id = ce->lrc.lrca >> 12;
1279
1280
1281
1282
1283
1284 stream->specific_ctx_id_mask =
1285 (1U << (GEN8_CTX_ID_WIDTH - 1)) - 1;
1286 } else {
1287 stream->specific_ctx_id_mask =
1288 (1U << GEN8_CTX_ID_WIDTH) - 1;
1289 stream->specific_ctx_id = stream->specific_ctx_id_mask;
1290 }
1291 break;
1292
1293 case 11:
1294 case 12:
1295 if (GRAPHICS_VER_FULL(ce->engine->i915) >= IP_VER(12, 50)) {
1296 stream->specific_ctx_id_mask =
1297 ((1U << XEHP_SW_CTX_ID_WIDTH) - 1) <<
1298 (XEHP_SW_CTX_ID_SHIFT - 32);
1299 stream->specific_ctx_id =
1300 (XEHP_MAX_CONTEXT_HW_ID - 1) <<
1301 (XEHP_SW_CTX_ID_SHIFT - 32);
1302 } else {
1303 stream->specific_ctx_id_mask =
1304 ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32);
1305
1306
1307
1308
1309
1310 stream->specific_ctx_id =
1311 (GEN12_MAX_CONTEXT_HW_ID - 1) << (GEN11_SW_CTX_ID_SHIFT - 32);
1312 }
1313 break;
1314
1315 default:
1316 MISSING_CASE(GRAPHICS_VER(ce->engine->i915));
1317 }
1318
1319 ce->tag = stream->specific_ctx_id;
1320
1321 drm_dbg(&stream->perf->i915->drm,
1322 "filtering on ctx_id=0x%x ctx_id_mask=0x%x\n",
1323 stream->specific_ctx_id,
1324 stream->specific_ctx_id_mask);
1325
1326 return 0;
1327 }
1328
1329
1330
1331
1332
1333
1334
1335
1336 static void oa_put_render_ctx_id(struct i915_perf_stream *stream)
1337 {
1338 struct intel_context *ce;
1339
1340 ce = fetch_and_zero(&stream->pinned_ctx);
1341 if (ce) {
1342 ce->tag = 0;
1343 intel_context_unpin(ce);
1344 }
1345
1346 stream->specific_ctx_id = INVALID_CTX_ID;
1347 stream->specific_ctx_id_mask = 0;
1348 }
1349
1350 static void
1351 free_oa_buffer(struct i915_perf_stream *stream)
1352 {
1353 i915_vma_unpin_and_release(&stream->oa_buffer.vma,
1354 I915_VMA_RELEASE_MAP);
1355
1356 stream->oa_buffer.vaddr = NULL;
1357 }
1358
1359 static void
1360 free_oa_configs(struct i915_perf_stream *stream)
1361 {
1362 struct i915_oa_config_bo *oa_bo, *tmp;
1363
1364 i915_oa_config_put(stream->oa_config);
1365 llist_for_each_entry_safe(oa_bo, tmp, stream->oa_config_bos.first, node)
1366 free_oa_config_bo(oa_bo);
1367 }
1368
1369 static void
1370 free_noa_wait(struct i915_perf_stream *stream)
1371 {
1372 i915_vma_unpin_and_release(&stream->noa_wait, 0);
1373 }
1374
1375 static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
1376 {
1377 struct i915_perf *perf = stream->perf;
1378
1379 BUG_ON(stream != perf->exclusive_stream);
1380
1381
1382
1383
1384
1385
1386
1387 WRITE_ONCE(perf->exclusive_stream, NULL);
1388 perf->ops.disable_metric_set(stream);
1389
1390 free_oa_buffer(stream);
1391
1392 intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL);
1393 intel_engine_pm_put(stream->engine);
1394
1395 if (stream->ctx)
1396 oa_put_render_ctx_id(stream);
1397
1398 free_oa_configs(stream);
1399 free_noa_wait(stream);
1400
1401 if (perf->spurious_report_rs.missed) {
1402 DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n",
1403 perf->spurious_report_rs.missed);
1404 }
1405 }
1406
1407 static void gen7_init_oa_buffer(struct i915_perf_stream *stream)
1408 {
1409 struct intel_uncore *uncore = stream->uncore;
1410 u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
1411 unsigned long flags;
1412
1413 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
1414
1415
1416
1417
1418 intel_uncore_write(uncore, GEN7_OASTATUS2,
1419 gtt_offset | GEN7_OASTATUS2_MEM_SELECT_GGTT);
1420 stream->oa_buffer.head = gtt_offset;
1421
1422 intel_uncore_write(uncore, GEN7_OABUFFER, gtt_offset);
1423
1424 intel_uncore_write(uncore, GEN7_OASTATUS1,
1425 gtt_offset | OABUFFER_SIZE_16M);
1426
1427
1428 stream->oa_buffer.aging_tail = INVALID_TAIL_PTR;
1429 stream->oa_buffer.tail = gtt_offset;
1430
1431 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
1432
1433
1434
1435
1436
1437 stream->perf->gen7_latched_oastatus1 = 0;
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450 memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
1451 }
1452
1453 static void gen8_init_oa_buffer(struct i915_perf_stream *stream)
1454 {
1455 struct intel_uncore *uncore = stream->uncore;
1456 u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
1457 unsigned long flags;
1458
1459 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
1460
1461 intel_uncore_write(uncore, GEN8_OASTATUS, 0);
1462 intel_uncore_write(uncore, GEN8_OAHEADPTR, gtt_offset);
1463 stream->oa_buffer.head = gtt_offset;
1464
1465 intel_uncore_write(uncore, GEN8_OABUFFER_UDW, 0);
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475 intel_uncore_write(uncore, GEN8_OABUFFER, gtt_offset |
1476 OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT);
1477 intel_uncore_write(uncore, GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK);
1478
1479
1480 stream->oa_buffer.aging_tail = INVALID_TAIL_PTR;
1481 stream->oa_buffer.tail = gtt_offset;
1482
1483
1484
1485
1486
1487
1488 stream->oa_buffer.last_ctx_id = INVALID_CTX_ID;
1489
1490 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504 memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
1505 }
1506
1507 static void gen12_init_oa_buffer(struct i915_perf_stream *stream)
1508 {
1509 struct intel_uncore *uncore = stream->uncore;
1510 u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
1511 unsigned long flags;
1512
1513 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
1514
1515 intel_uncore_write(uncore, GEN12_OAG_OASTATUS, 0);
1516 intel_uncore_write(uncore, GEN12_OAG_OAHEADPTR,
1517 gtt_offset & GEN12_OAG_OAHEADPTR_MASK);
1518 stream->oa_buffer.head = gtt_offset;
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528 intel_uncore_write(uncore, GEN12_OAG_OABUFFER, gtt_offset |
1529 OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT);
1530 intel_uncore_write(uncore, GEN12_OAG_OATAILPTR,
1531 gtt_offset & GEN12_OAG_OATAILPTR_MASK);
1532
1533
1534 stream->oa_buffer.aging_tail = INVALID_TAIL_PTR;
1535 stream->oa_buffer.tail = gtt_offset;
1536
1537
1538
1539
1540
1541
1542 stream->oa_buffer.last_ctx_id = INVALID_CTX_ID;
1543
1544 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558 memset(stream->oa_buffer.vaddr, 0,
1559 stream->oa_buffer.vma->size);
1560 }
1561
1562 static int alloc_oa_buffer(struct i915_perf_stream *stream)
1563 {
1564 struct drm_i915_private *i915 = stream->perf->i915;
1565 struct drm_i915_gem_object *bo;
1566 struct i915_vma *vma;
1567 int ret;
1568
1569 if (drm_WARN_ON(&i915->drm, stream->oa_buffer.vma))
1570 return -ENODEV;
1571
1572 BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE);
1573 BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M);
1574
1575 bo = i915_gem_object_create_shmem(stream->perf->i915, OA_BUFFER_SIZE);
1576 if (IS_ERR(bo)) {
1577 drm_err(&i915->drm, "Failed to allocate OA buffer\n");
1578 return PTR_ERR(bo);
1579 }
1580
1581 i915_gem_object_set_cache_coherency(bo, I915_CACHE_LLC);
1582
1583
1584 vma = i915_gem_object_ggtt_pin(bo, NULL, 0, SZ_16M, 0);
1585 if (IS_ERR(vma)) {
1586 ret = PTR_ERR(vma);
1587 goto err_unref;
1588 }
1589 stream->oa_buffer.vma = vma;
1590
1591 stream->oa_buffer.vaddr =
1592 i915_gem_object_pin_map_unlocked(bo, I915_MAP_WB);
1593 if (IS_ERR(stream->oa_buffer.vaddr)) {
1594 ret = PTR_ERR(stream->oa_buffer.vaddr);
1595 goto err_unpin;
1596 }
1597
1598 return 0;
1599
1600 err_unpin:
1601 __i915_vma_unpin(vma);
1602
1603 err_unref:
1604 i915_gem_object_put(bo);
1605
1606 stream->oa_buffer.vaddr = NULL;
1607 stream->oa_buffer.vma = NULL;
1608
1609 return ret;
1610 }
1611
1612 static u32 *save_restore_register(struct i915_perf_stream *stream, u32 *cs,
1613 bool save, i915_reg_t reg, u32 offset,
1614 u32 dword_count)
1615 {
1616 u32 cmd;
1617 u32 d;
1618
1619 cmd = save ? MI_STORE_REGISTER_MEM : MI_LOAD_REGISTER_MEM;
1620 cmd |= MI_SRM_LRM_GLOBAL_GTT;
1621 if (GRAPHICS_VER(stream->perf->i915) >= 8)
1622 cmd++;
1623
1624 for (d = 0; d < dword_count; d++) {
1625 *cs++ = cmd;
1626 *cs++ = i915_mmio_reg_offset(reg) + 4 * d;
1627 *cs++ = intel_gt_scratch_offset(stream->engine->gt,
1628 offset) + 4 * d;
1629 *cs++ = 0;
1630 }
1631
1632 return cs;
1633 }
1634
1635 static int alloc_noa_wait(struct i915_perf_stream *stream)
1636 {
1637 struct drm_i915_private *i915 = stream->perf->i915;
1638 struct drm_i915_gem_object *bo;
1639 struct i915_vma *vma;
1640 const u64 delay_ticks = 0xffffffffffffffff -
1641 intel_gt_ns_to_clock_interval(to_gt(stream->perf->i915),
1642 atomic64_read(&stream->perf->noa_programming_delay));
1643 const u32 base = stream->engine->mmio_base;
1644 #define CS_GPR(x) GEN8_RING_CS_GPR(base, x)
1645 u32 *batch, *ts0, *cs, *jump;
1646 struct i915_gem_ww_ctx ww;
1647 int ret, i;
1648 enum {
1649 START_TS,
1650 NOW_TS,
1651 DELTA_TS,
1652 JUMP_PREDICATE,
1653 DELTA_TARGET,
1654 N_CS_GPR
1655 };
1656
1657 bo = i915_gem_object_create_internal(i915, 4096);
1658 if (IS_ERR(bo)) {
1659 drm_err(&i915->drm,
1660 "Failed to allocate NOA wait batchbuffer\n");
1661 return PTR_ERR(bo);
1662 }
1663
1664 i915_gem_ww_ctx_init(&ww, true);
1665 retry:
1666 ret = i915_gem_object_lock(bo, &ww);
1667 if (ret)
1668 goto out_ww;
1669
1670
1671
1672
1673
1674
1675 vma = i915_gem_object_ggtt_pin_ww(bo, &ww, NULL, 0, 0, PIN_HIGH);
1676 if (IS_ERR(vma)) {
1677 ret = PTR_ERR(vma);
1678 goto out_ww;
1679 }
1680
1681 batch = cs = i915_gem_object_pin_map(bo, I915_MAP_WB);
1682 if (IS_ERR(batch)) {
1683 ret = PTR_ERR(batch);
1684 goto err_unpin;
1685 }
1686
1687
1688 for (i = 0; i < N_CS_GPR; i++)
1689 cs = save_restore_register(
1690 stream, cs, true , CS_GPR(i),
1691 INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2);
1692 cs = save_restore_register(
1693 stream, cs, true , MI_PREDICATE_RESULT_1(RENDER_RING_BASE),
1694 INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1);
1695
1696
1697 ts0 = cs;
1698
1699
1700
1701
1702
1703
1704 *cs++ = MI_LOAD_REGISTER_IMM(1);
1705 *cs++ = i915_mmio_reg_offset(CS_GPR(START_TS)) + 4;
1706 *cs++ = 0;
1707 *cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
1708 *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(base));
1709 *cs++ = i915_mmio_reg_offset(CS_GPR(START_TS));
1710
1711
1712
1713
1714
1715 jump = cs;
1716
1717
1718
1719
1720
1721
1722 *cs++ = MI_LOAD_REGISTER_IMM(1);
1723 *cs++ = i915_mmio_reg_offset(CS_GPR(NOW_TS)) + 4;
1724 *cs++ = 0;
1725 *cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
1726 *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(base));
1727 *cs++ = i915_mmio_reg_offset(CS_GPR(NOW_TS));
1728
1729
1730
1731
1732
1733 *cs++ = MI_MATH(5);
1734 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
1735 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));
1736 *cs++ = MI_MATH_SUB;
1737 *cs++ = MI_MATH_STORE(MI_MATH_REG(DELTA_TS), MI_MATH_REG_ACCU);
1738 *cs++ = MI_MATH_STORE(MI_MATH_REG(JUMP_PREDICATE), MI_MATH_REG_CF);
1739
1740
1741
1742
1743
1744
1745 *cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
1746 *cs++ = i915_mmio_reg_offset(CS_GPR(JUMP_PREDICATE));
1747 *cs++ = i915_mmio_reg_offset(MI_PREDICATE_RESULT_1(RENDER_RING_BASE));
1748
1749
1750 *cs++ = (GRAPHICS_VER(i915) < 8 ?
1751 MI_BATCH_BUFFER_START :
1752 MI_BATCH_BUFFER_START_GEN8) |
1753 MI_BATCH_PREDICATE;
1754 *cs++ = i915_ggtt_offset(vma) + (ts0 - batch) * 4;
1755 *cs++ = 0;
1756
1757
1758
1759
1760
1761
1762
1763
1764 *cs++ = MI_LOAD_REGISTER_IMM(2);
1765 *cs++ = i915_mmio_reg_offset(CS_GPR(DELTA_TARGET));
1766 *cs++ = lower_32_bits(delay_ticks);
1767 *cs++ = i915_mmio_reg_offset(CS_GPR(DELTA_TARGET)) + 4;
1768 *cs++ = upper_32_bits(delay_ticks);
1769
1770 *cs++ = MI_MATH(4);
1771 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(DELTA_TS));
1772 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(DELTA_TARGET));
1773 *cs++ = MI_MATH_ADD;
1774 *cs++ = MI_MATH_STOREINV(MI_MATH_REG(JUMP_PREDICATE), MI_MATH_REG_CF);
1775
1776 *cs++ = MI_ARB_CHECK;
1777
1778
1779
1780
1781
1782 *cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
1783 *cs++ = i915_mmio_reg_offset(CS_GPR(JUMP_PREDICATE));
1784 *cs++ = i915_mmio_reg_offset(MI_PREDICATE_RESULT_1(RENDER_RING_BASE));
1785
1786
1787 *cs++ = (GRAPHICS_VER(i915) < 8 ?
1788 MI_BATCH_BUFFER_START :
1789 MI_BATCH_BUFFER_START_GEN8) |
1790 MI_BATCH_PREDICATE;
1791 *cs++ = i915_ggtt_offset(vma) + (jump - batch) * 4;
1792 *cs++ = 0;
1793
1794
1795 for (i = 0; i < N_CS_GPR; i++)
1796 cs = save_restore_register(
1797 stream, cs, false , CS_GPR(i),
1798 INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2);
1799 cs = save_restore_register(
1800 stream, cs, false , MI_PREDICATE_RESULT_1(RENDER_RING_BASE),
1801 INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1);
1802
1803
1804 *cs++ = MI_BATCH_BUFFER_END;
1805
1806 GEM_BUG_ON(cs - batch > PAGE_SIZE / sizeof(*batch));
1807
1808 i915_gem_object_flush_map(bo);
1809 __i915_gem_object_release_map(bo);
1810
1811 stream->noa_wait = vma;
1812 goto out_ww;
1813
1814 err_unpin:
1815 i915_vma_unpin_and_release(&vma, 0);
1816 out_ww:
1817 if (ret == -EDEADLK) {
1818 ret = i915_gem_ww_ctx_backoff(&ww);
1819 if (!ret)
1820 goto retry;
1821 }
1822 i915_gem_ww_ctx_fini(&ww);
1823 if (ret)
1824 i915_gem_object_put(bo);
1825 return ret;
1826 }
1827
1828 static u32 *write_cs_mi_lri(u32 *cs,
1829 const struct i915_oa_reg *reg_data,
1830 u32 n_regs)
1831 {
1832 u32 i;
1833
1834 for (i = 0; i < n_regs; i++) {
1835 if ((i % MI_LOAD_REGISTER_IMM_MAX_REGS) == 0) {
1836 u32 n_lri = min_t(u32,
1837 n_regs - i,
1838 MI_LOAD_REGISTER_IMM_MAX_REGS);
1839
1840 *cs++ = MI_LOAD_REGISTER_IMM(n_lri);
1841 }
1842 *cs++ = i915_mmio_reg_offset(reg_data[i].addr);
1843 *cs++ = reg_data[i].value;
1844 }
1845
1846 return cs;
1847 }
1848
1849 static int num_lri_dwords(int num_regs)
1850 {
1851 int count = 0;
1852
1853 if (num_regs > 0) {
1854 count += DIV_ROUND_UP(num_regs, MI_LOAD_REGISTER_IMM_MAX_REGS);
1855 count += num_regs * 2;
1856 }
1857
1858 return count;
1859 }
1860
1861 static struct i915_oa_config_bo *
1862 alloc_oa_config_buffer(struct i915_perf_stream *stream,
1863 struct i915_oa_config *oa_config)
1864 {
1865 struct drm_i915_gem_object *obj;
1866 struct i915_oa_config_bo *oa_bo;
1867 struct i915_gem_ww_ctx ww;
1868 size_t config_length = 0;
1869 u32 *cs;
1870 int err;
1871
1872 oa_bo = kzalloc(sizeof(*oa_bo), GFP_KERNEL);
1873 if (!oa_bo)
1874 return ERR_PTR(-ENOMEM);
1875
1876 config_length += num_lri_dwords(oa_config->mux_regs_len);
1877 config_length += num_lri_dwords(oa_config->b_counter_regs_len);
1878 config_length += num_lri_dwords(oa_config->flex_regs_len);
1879 config_length += 3;
1880 config_length = ALIGN(sizeof(u32) * config_length, I915_GTT_PAGE_SIZE);
1881
1882 obj = i915_gem_object_create_shmem(stream->perf->i915, config_length);
1883 if (IS_ERR(obj)) {
1884 err = PTR_ERR(obj);
1885 goto err_free;
1886 }
1887
1888 i915_gem_ww_ctx_init(&ww, true);
1889 retry:
1890 err = i915_gem_object_lock(obj, &ww);
1891 if (err)
1892 goto out_ww;
1893
1894 cs = i915_gem_object_pin_map(obj, I915_MAP_WB);
1895 if (IS_ERR(cs)) {
1896 err = PTR_ERR(cs);
1897 goto out_ww;
1898 }
1899
1900 cs = write_cs_mi_lri(cs,
1901 oa_config->mux_regs,
1902 oa_config->mux_regs_len);
1903 cs = write_cs_mi_lri(cs,
1904 oa_config->b_counter_regs,
1905 oa_config->b_counter_regs_len);
1906 cs = write_cs_mi_lri(cs,
1907 oa_config->flex_regs,
1908 oa_config->flex_regs_len);
1909
1910
1911 *cs++ = (GRAPHICS_VER(stream->perf->i915) < 8 ?
1912 MI_BATCH_BUFFER_START :
1913 MI_BATCH_BUFFER_START_GEN8);
1914 *cs++ = i915_ggtt_offset(stream->noa_wait);
1915 *cs++ = 0;
1916
1917 i915_gem_object_flush_map(obj);
1918 __i915_gem_object_release_map(obj);
1919
1920 oa_bo->vma = i915_vma_instance(obj,
1921 &stream->engine->gt->ggtt->vm,
1922 NULL);
1923 if (IS_ERR(oa_bo->vma)) {
1924 err = PTR_ERR(oa_bo->vma);
1925 goto out_ww;
1926 }
1927
1928 oa_bo->oa_config = i915_oa_config_get(oa_config);
1929 llist_add(&oa_bo->node, &stream->oa_config_bos);
1930
1931 out_ww:
1932 if (err == -EDEADLK) {
1933 err = i915_gem_ww_ctx_backoff(&ww);
1934 if (!err)
1935 goto retry;
1936 }
1937 i915_gem_ww_ctx_fini(&ww);
1938
1939 if (err)
1940 i915_gem_object_put(obj);
1941 err_free:
1942 if (err) {
1943 kfree(oa_bo);
1944 return ERR_PTR(err);
1945 }
1946 return oa_bo;
1947 }
1948
1949 static struct i915_vma *
1950 get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config *oa_config)
1951 {
1952 struct i915_oa_config_bo *oa_bo;
1953
1954
1955
1956
1957
1958 llist_for_each_entry(oa_bo, stream->oa_config_bos.first, node) {
1959 if (oa_bo->oa_config == oa_config &&
1960 memcmp(oa_bo->oa_config->uuid,
1961 oa_config->uuid,
1962 sizeof(oa_config->uuid)) == 0)
1963 goto out;
1964 }
1965
1966 oa_bo = alloc_oa_config_buffer(stream, oa_config);
1967 if (IS_ERR(oa_bo))
1968 return ERR_CAST(oa_bo);
1969
1970 out:
1971 return i915_vma_get(oa_bo->vma);
1972 }
1973
1974 static int
1975 emit_oa_config(struct i915_perf_stream *stream,
1976 struct i915_oa_config *oa_config,
1977 struct intel_context *ce,
1978 struct i915_active *active)
1979 {
1980 struct i915_request *rq;
1981 struct i915_vma *vma;
1982 struct i915_gem_ww_ctx ww;
1983 int err;
1984
1985 vma = get_oa_vma(stream, oa_config);
1986 if (IS_ERR(vma))
1987 return PTR_ERR(vma);
1988
1989 i915_gem_ww_ctx_init(&ww, true);
1990 retry:
1991 err = i915_gem_object_lock(vma->obj, &ww);
1992 if (err)
1993 goto err;
1994
1995 err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_GLOBAL | PIN_HIGH);
1996 if (err)
1997 goto err;
1998
1999 intel_engine_pm_get(ce->engine);
2000 rq = i915_request_create(ce);
2001 intel_engine_pm_put(ce->engine);
2002 if (IS_ERR(rq)) {
2003 err = PTR_ERR(rq);
2004 goto err_vma_unpin;
2005 }
2006
2007 if (!IS_ERR_OR_NULL(active)) {
2008
2009 err = i915_request_await_active(rq, active,
2010 I915_ACTIVE_AWAIT_ACTIVE);
2011 if (err)
2012 goto err_add_request;
2013
2014 err = i915_active_add_request(active, rq);
2015 if (err)
2016 goto err_add_request;
2017 }
2018
2019 err = i915_request_await_object(rq, vma->obj, 0);
2020 if (!err)
2021 err = i915_vma_move_to_active(vma, rq, 0);
2022 if (err)
2023 goto err_add_request;
2024
2025 err = rq->engine->emit_bb_start(rq,
2026 vma->node.start, 0,
2027 I915_DISPATCH_SECURE);
2028 if (err)
2029 goto err_add_request;
2030
2031 err_add_request:
2032 i915_request_add(rq);
2033 err_vma_unpin:
2034 i915_vma_unpin(vma);
2035 err:
2036 if (err == -EDEADLK) {
2037 err = i915_gem_ww_ctx_backoff(&ww);
2038 if (!err)
2039 goto retry;
2040 }
2041
2042 i915_gem_ww_ctx_fini(&ww);
2043 i915_vma_put(vma);
2044 return err;
2045 }
2046
2047 static struct intel_context *oa_context(struct i915_perf_stream *stream)
2048 {
2049 return stream->pinned_ctx ?: stream->engine->kernel_context;
2050 }
2051
2052 static int
2053 hsw_enable_metric_set(struct i915_perf_stream *stream,
2054 struct i915_active *active)
2055 {
2056 struct intel_uncore *uncore = stream->uncore;
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068 intel_uncore_rmw(uncore, GEN7_MISCCPCTL,
2069 GEN7_DOP_CLOCK_GATE_ENABLE, 0);
2070 intel_uncore_rmw(uncore, GEN6_UCGCTL1,
2071 0, GEN6_CSUNIT_CLOCK_GATE_DISABLE);
2072
2073 return emit_oa_config(stream,
2074 stream->oa_config, oa_context(stream),
2075 active);
2076 }
2077
2078 static void hsw_disable_metric_set(struct i915_perf_stream *stream)
2079 {
2080 struct intel_uncore *uncore = stream->uncore;
2081
2082 intel_uncore_rmw(uncore, GEN6_UCGCTL1,
2083 GEN6_CSUNIT_CLOCK_GATE_DISABLE, 0);
2084 intel_uncore_rmw(uncore, GEN7_MISCCPCTL,
2085 0, GEN7_DOP_CLOCK_GATE_ENABLE);
2086
2087 intel_uncore_rmw(uncore, GDT_CHICKEN_BITS, GT_NOA_ENABLE, 0);
2088 }
2089
2090 static u32 oa_config_flex_reg(const struct i915_oa_config *oa_config,
2091 i915_reg_t reg)
2092 {
2093 u32 mmio = i915_mmio_reg_offset(reg);
2094 int i;
2095
2096
2097
2098
2099
2100
2101 if (!oa_config)
2102 return 0;
2103
2104 for (i = 0; i < oa_config->flex_regs_len; i++) {
2105 if (i915_mmio_reg_offset(oa_config->flex_regs[i].addr) == mmio)
2106 return oa_config->flex_regs[i].value;
2107 }
2108
2109 return 0;
2110 }
2111
2112
2113
2114
2115
2116
2117
2118 static void
2119 gen8_update_reg_state_unlocked(const struct intel_context *ce,
2120 const struct i915_perf_stream *stream)
2121 {
2122 u32 ctx_oactxctrl = stream->perf->ctx_oactxctrl_offset;
2123 u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
2124
2125 static const i915_reg_t flex_regs[] = {
2126 EU_PERF_CNTL0,
2127 EU_PERF_CNTL1,
2128 EU_PERF_CNTL2,
2129 EU_PERF_CNTL3,
2130 EU_PERF_CNTL4,
2131 EU_PERF_CNTL5,
2132 EU_PERF_CNTL6,
2133 };
2134 u32 *reg_state = ce->lrc_reg_state;
2135 int i;
2136
2137 reg_state[ctx_oactxctrl + 1] =
2138 (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
2139 (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
2140 GEN8_OA_COUNTER_RESUME;
2141
2142 for (i = 0; i < ARRAY_SIZE(flex_regs); i++)
2143 reg_state[ctx_flexeu0 + i * 2 + 1] =
2144 oa_config_flex_reg(stream->oa_config, flex_regs[i]);
2145 }
2146
2147 struct flex {
2148 i915_reg_t reg;
2149 u32 offset;
2150 u32 value;
2151 };
2152
2153 static int
2154 gen8_store_flex(struct i915_request *rq,
2155 struct intel_context *ce,
2156 const struct flex *flex, unsigned int count)
2157 {
2158 u32 offset;
2159 u32 *cs;
2160
2161 cs = intel_ring_begin(rq, 4 * count);
2162 if (IS_ERR(cs))
2163 return PTR_ERR(cs);
2164
2165 offset = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET;
2166 do {
2167 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
2168 *cs++ = offset + flex->offset * sizeof(u32);
2169 *cs++ = 0;
2170 *cs++ = flex->value;
2171 } while (flex++, --count);
2172
2173 intel_ring_advance(rq, cs);
2174
2175 return 0;
2176 }
2177
2178 static int
2179 gen8_load_flex(struct i915_request *rq,
2180 struct intel_context *ce,
2181 const struct flex *flex, unsigned int count)
2182 {
2183 u32 *cs;
2184
2185 GEM_BUG_ON(!count || count > 63);
2186
2187 cs = intel_ring_begin(rq, 2 * count + 2);
2188 if (IS_ERR(cs))
2189 return PTR_ERR(cs);
2190
2191 *cs++ = MI_LOAD_REGISTER_IMM(count);
2192 do {
2193 *cs++ = i915_mmio_reg_offset(flex->reg);
2194 *cs++ = flex->value;
2195 } while (flex++, --count);
2196 *cs++ = MI_NOOP;
2197
2198 intel_ring_advance(rq, cs);
2199
2200 return 0;
2201 }
2202
2203 static int gen8_modify_context(struct intel_context *ce,
2204 const struct flex *flex, unsigned int count)
2205 {
2206 struct i915_request *rq;
2207 int err;
2208
2209 rq = intel_engine_create_kernel_request(ce->engine);
2210 if (IS_ERR(rq))
2211 return PTR_ERR(rq);
2212
2213
2214 err = intel_context_prepare_remote_request(ce, rq);
2215 if (err == 0)
2216 err = gen8_store_flex(rq, ce, flex, count);
2217
2218 i915_request_add(rq);
2219 return err;
2220 }
2221
2222 static int
2223 gen8_modify_self(struct intel_context *ce,
2224 const struct flex *flex, unsigned int count,
2225 struct i915_active *active)
2226 {
2227 struct i915_request *rq;
2228 int err;
2229
2230 intel_engine_pm_get(ce->engine);
2231 rq = i915_request_create(ce);
2232 intel_engine_pm_put(ce->engine);
2233 if (IS_ERR(rq))
2234 return PTR_ERR(rq);
2235
2236 if (!IS_ERR_OR_NULL(active)) {
2237 err = i915_active_add_request(active, rq);
2238 if (err)
2239 goto err_add_request;
2240 }
2241
2242 err = gen8_load_flex(rq, ce, flex, count);
2243 if (err)
2244 goto err_add_request;
2245
2246 err_add_request:
2247 i915_request_add(rq);
2248 return err;
2249 }
2250
2251 static int gen8_configure_context(struct i915_gem_context *ctx,
2252 struct flex *flex, unsigned int count)
2253 {
2254 struct i915_gem_engines_iter it;
2255 struct intel_context *ce;
2256 int err = 0;
2257
2258 for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
2259 GEM_BUG_ON(ce == ce->engine->kernel_context);
2260
2261 if (ce->engine->class != RENDER_CLASS)
2262 continue;
2263
2264
2265 if (!intel_context_pin_if_active(ce))
2266 continue;
2267
2268 flex->value = intel_sseu_make_rpcs(ce->engine->gt, &ce->sseu);
2269 err = gen8_modify_context(ce, flex, count);
2270
2271 intel_context_unpin(ce);
2272 if (err)
2273 break;
2274 }
2275 i915_gem_context_unlock_engines(ctx);
2276
2277 return err;
2278 }
2279
2280 static int gen12_configure_oar_context(struct i915_perf_stream *stream,
2281 struct i915_active *active)
2282 {
2283 int err;
2284 struct intel_context *ce = stream->pinned_ctx;
2285 u32 format = stream->oa_buffer.format;
2286 struct flex regs_context[] = {
2287 {
2288 GEN8_OACTXCONTROL,
2289 stream->perf->ctx_oactxctrl_offset + 1,
2290 active ? GEN8_OA_COUNTER_RESUME : 0,
2291 },
2292 };
2293
2294
2295
2296 #define GEN12_OAR_OACONTROL_OFFSET 0x5B0
2297 struct flex regs_lri[] = {
2298 {
2299 GEN12_OAR_OACONTROL,
2300 GEN12_OAR_OACONTROL_OFFSET + 1,
2301 (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
2302 (active ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0)
2303 },
2304 {
2305 RING_CONTEXT_CONTROL(ce->engine->mmio_base),
2306 CTX_CONTEXT_CONTROL,
2307 _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
2308 active ?
2309 GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE :
2310 0)
2311 },
2312 };
2313
2314
2315 err = intel_context_lock_pinned(ce);
2316 if (err)
2317 return err;
2318
2319 err = gen8_modify_context(ce, regs_context, ARRAY_SIZE(regs_context));
2320 intel_context_unlock_pinned(ce);
2321 if (err)
2322 return err;
2323
2324
2325 return gen8_modify_self(ce, regs_lri, ARRAY_SIZE(regs_lri), active);
2326 }
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353 static int
2354 oa_configure_all_contexts(struct i915_perf_stream *stream,
2355 struct flex *regs,
2356 size_t num_regs,
2357 struct i915_active *active)
2358 {
2359 struct drm_i915_private *i915 = stream->perf->i915;
2360 struct intel_engine_cs *engine;
2361 struct i915_gem_context *ctx, *cn;
2362 int err;
2363
2364 lockdep_assert_held(&stream->perf->lock);
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382 spin_lock(&i915->gem.contexts.lock);
2383 list_for_each_entry_safe(ctx, cn, &i915->gem.contexts.list, link) {
2384 if (!kref_get_unless_zero(&ctx->ref))
2385 continue;
2386
2387 spin_unlock(&i915->gem.contexts.lock);
2388
2389 err = gen8_configure_context(ctx, regs, num_regs);
2390 if (err) {
2391 i915_gem_context_put(ctx);
2392 return err;
2393 }
2394
2395 spin_lock(&i915->gem.contexts.lock);
2396 list_safe_reset_next(ctx, cn, link);
2397 i915_gem_context_put(ctx);
2398 }
2399 spin_unlock(&i915->gem.contexts.lock);
2400
2401
2402
2403
2404
2405
2406 for_each_uabi_engine(engine, i915) {
2407 struct intel_context *ce = engine->kernel_context;
2408
2409 if (engine->class != RENDER_CLASS)
2410 continue;
2411
2412 regs[0].value = intel_sseu_make_rpcs(engine->gt, &ce->sseu);
2413
2414 err = gen8_modify_self(ce, regs, num_regs, active);
2415 if (err)
2416 return err;
2417 }
2418
2419 return 0;
2420 }
2421
2422 static int
2423 gen12_configure_all_contexts(struct i915_perf_stream *stream,
2424 const struct i915_oa_config *oa_config,
2425 struct i915_active *active)
2426 {
2427 struct flex regs[] = {
2428 {
2429 GEN8_R_PWR_CLK_STATE(RENDER_RING_BASE),
2430 CTX_R_PWR_CLK_STATE,
2431 },
2432 };
2433
2434 return oa_configure_all_contexts(stream,
2435 regs, ARRAY_SIZE(regs),
2436 active);
2437 }
2438
2439 static int
2440 lrc_configure_all_contexts(struct i915_perf_stream *stream,
2441 const struct i915_oa_config *oa_config,
2442 struct i915_active *active)
2443 {
2444
2445 const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
2446 #define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1)
2447 struct flex regs[] = {
2448 {
2449 GEN8_R_PWR_CLK_STATE(RENDER_RING_BASE),
2450 CTX_R_PWR_CLK_STATE,
2451 },
2452 {
2453 GEN8_OACTXCONTROL,
2454 stream->perf->ctx_oactxctrl_offset + 1,
2455 },
2456 { EU_PERF_CNTL0, ctx_flexeuN(0) },
2457 { EU_PERF_CNTL1, ctx_flexeuN(1) },
2458 { EU_PERF_CNTL2, ctx_flexeuN(2) },
2459 { EU_PERF_CNTL3, ctx_flexeuN(3) },
2460 { EU_PERF_CNTL4, ctx_flexeuN(4) },
2461 { EU_PERF_CNTL5, ctx_flexeuN(5) },
2462 { EU_PERF_CNTL6, ctx_flexeuN(6) },
2463 };
2464 #undef ctx_flexeuN
2465 int i;
2466
2467 regs[1].value =
2468 (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
2469 (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
2470 GEN8_OA_COUNTER_RESUME;
2471
2472 for (i = 2; i < ARRAY_SIZE(regs); i++)
2473 regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg);
2474
2475 return oa_configure_all_contexts(stream,
2476 regs, ARRAY_SIZE(regs),
2477 active);
2478 }
2479
2480 static int
2481 gen8_enable_metric_set(struct i915_perf_stream *stream,
2482 struct i915_active *active)
2483 {
2484 struct intel_uncore *uncore = stream->uncore;
2485 struct i915_oa_config *oa_config = stream->oa_config;
2486 int ret;
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511 if (IS_GRAPHICS_VER(stream->perf->i915, 9, 11)) {
2512 intel_uncore_write(uncore, GEN8_OA_DEBUG,
2513 _MASKED_BIT_ENABLE(GEN9_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS |
2514 GEN9_OA_DEBUG_INCLUDE_CLK_RATIO));
2515 }
2516
2517
2518
2519
2520
2521
2522 ret = lrc_configure_all_contexts(stream, oa_config, active);
2523 if (ret)
2524 return ret;
2525
2526 return emit_oa_config(stream,
2527 stream->oa_config, oa_context(stream),
2528 active);
2529 }
2530
2531 static u32 oag_report_ctx_switches(const struct i915_perf_stream *stream)
2532 {
2533 return _MASKED_FIELD(GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS,
2534 (stream->sample_flags & SAMPLE_OA_REPORT) ?
2535 0 : GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS);
2536 }
2537
2538 static int
2539 gen12_enable_metric_set(struct i915_perf_stream *stream,
2540 struct i915_active *active)
2541 {
2542 struct intel_uncore *uncore = stream->uncore;
2543 struct i915_oa_config *oa_config = stream->oa_config;
2544 bool periodic = stream->periodic;
2545 u32 period_exponent = stream->period_exponent;
2546 int ret;
2547
2548 intel_uncore_write(uncore, GEN12_OAG_OA_DEBUG,
2549
2550 _MASKED_BIT_ENABLE(GEN12_OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS |
2551 GEN12_OAG_OA_DEBUG_INCLUDE_CLK_RATIO) |
2552
2553
2554
2555
2556 oag_report_ctx_switches(stream));
2557
2558 intel_uncore_write(uncore, GEN12_OAG_OAGLBCTXCTRL, periodic ?
2559 (GEN12_OAG_OAGLBCTXCTRL_COUNTER_RESUME |
2560 GEN12_OAG_OAGLBCTXCTRL_TIMER_ENABLE |
2561 (period_exponent << GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT))
2562 : 0);
2563
2564
2565
2566
2567
2568
2569 ret = gen12_configure_all_contexts(stream, oa_config, active);
2570 if (ret)
2571 return ret;
2572
2573
2574
2575
2576
2577
2578 if (stream->ctx) {
2579 ret = gen12_configure_oar_context(stream, active);
2580 if (ret)
2581 return ret;
2582 }
2583
2584 return emit_oa_config(stream,
2585 stream->oa_config, oa_context(stream),
2586 active);
2587 }
2588
2589 static void gen8_disable_metric_set(struct i915_perf_stream *stream)
2590 {
2591 struct intel_uncore *uncore = stream->uncore;
2592
2593
2594 lrc_configure_all_contexts(stream, NULL, NULL);
2595
2596 intel_uncore_rmw(uncore, GDT_CHICKEN_BITS, GT_NOA_ENABLE, 0);
2597 }
2598
2599 static void gen11_disable_metric_set(struct i915_perf_stream *stream)
2600 {
2601 struct intel_uncore *uncore = stream->uncore;
2602
2603
2604 lrc_configure_all_contexts(stream, NULL, NULL);
2605
2606
2607 intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
2608 }
2609
2610 static void gen12_disable_metric_set(struct i915_perf_stream *stream)
2611 {
2612 struct intel_uncore *uncore = stream->uncore;
2613
2614
2615 gen12_configure_all_contexts(stream, NULL, NULL);
2616
2617
2618 if (stream->ctx)
2619 gen12_configure_oar_context(stream, NULL);
2620
2621
2622 intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
2623 }
2624
2625 static void gen7_oa_enable(struct i915_perf_stream *stream)
2626 {
2627 struct intel_uncore *uncore = stream->uncore;
2628 struct i915_gem_context *ctx = stream->ctx;
2629 u32 ctx_id = stream->specific_ctx_id;
2630 bool periodic = stream->periodic;
2631 u32 period_exponent = stream->period_exponent;
2632 u32 report_format = stream->oa_buffer.format;
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643 gen7_init_oa_buffer(stream);
2644
2645 intel_uncore_write(uncore, GEN7_OACONTROL,
2646 (ctx_id & GEN7_OACONTROL_CTX_MASK) |
2647 (period_exponent <<
2648 GEN7_OACONTROL_TIMER_PERIOD_SHIFT) |
2649 (periodic ? GEN7_OACONTROL_TIMER_ENABLE : 0) |
2650 (report_format << GEN7_OACONTROL_FORMAT_SHIFT) |
2651 (ctx ? GEN7_OACONTROL_PER_CTX_ENABLE : 0) |
2652 GEN7_OACONTROL_ENABLE);
2653 }
2654
2655 static void gen8_oa_enable(struct i915_perf_stream *stream)
2656 {
2657 struct intel_uncore *uncore = stream->uncore;
2658 u32 report_format = stream->oa_buffer.format;
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669 gen8_init_oa_buffer(stream);
2670
2671
2672
2673
2674
2675
2676 intel_uncore_write(uncore, GEN8_OACONTROL,
2677 (report_format << GEN8_OA_REPORT_FORMAT_SHIFT) |
2678 GEN8_OA_COUNTER_ENABLE);
2679 }
2680
2681 static void gen12_oa_enable(struct i915_perf_stream *stream)
2682 {
2683 struct intel_uncore *uncore = stream->uncore;
2684 u32 report_format = stream->oa_buffer.format;
2685
2686
2687
2688
2689
2690 if (!(stream->sample_flags & SAMPLE_OA_REPORT))
2691 return;
2692
2693 gen12_init_oa_buffer(stream);
2694
2695 intel_uncore_write(uncore, GEN12_OAG_OACONTROL,
2696 (report_format << GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT) |
2697 GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE);
2698 }
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709 static void i915_oa_stream_enable(struct i915_perf_stream *stream)
2710 {
2711 stream->pollin = false;
2712
2713 stream->perf->ops.oa_enable(stream);
2714
2715 if (stream->sample_flags & SAMPLE_OA_REPORT)
2716 hrtimer_start(&stream->poll_check_timer,
2717 ns_to_ktime(stream->poll_oa_period),
2718 HRTIMER_MODE_REL_PINNED);
2719 }
2720
2721 static void gen7_oa_disable(struct i915_perf_stream *stream)
2722 {
2723 struct intel_uncore *uncore = stream->uncore;
2724
2725 intel_uncore_write(uncore, GEN7_OACONTROL, 0);
2726 if (intel_wait_for_register(uncore,
2727 GEN7_OACONTROL, GEN7_OACONTROL_ENABLE, 0,
2728 50))
2729 drm_err(&stream->perf->i915->drm,
2730 "wait for OA to be disabled timed out\n");
2731 }
2732
2733 static void gen8_oa_disable(struct i915_perf_stream *stream)
2734 {
2735 struct intel_uncore *uncore = stream->uncore;
2736
2737 intel_uncore_write(uncore, GEN8_OACONTROL, 0);
2738 if (intel_wait_for_register(uncore,
2739 GEN8_OACONTROL, GEN8_OA_COUNTER_ENABLE, 0,
2740 50))
2741 drm_err(&stream->perf->i915->drm,
2742 "wait for OA to be disabled timed out\n");
2743 }
2744
2745 static void gen12_oa_disable(struct i915_perf_stream *stream)
2746 {
2747 struct intel_uncore *uncore = stream->uncore;
2748
2749 intel_uncore_write(uncore, GEN12_OAG_OACONTROL, 0);
2750 if (intel_wait_for_register(uncore,
2751 GEN12_OAG_OACONTROL,
2752 GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE, 0,
2753 50))
2754 drm_err(&stream->perf->i915->drm,
2755 "wait for OA to be disabled timed out\n");
2756
2757 intel_uncore_write(uncore, GEN12_OA_TLB_INV_CR, 1);
2758 if (intel_wait_for_register(uncore,
2759 GEN12_OA_TLB_INV_CR,
2760 1, 0,
2761 50))
2762 drm_err(&stream->perf->i915->drm,
2763 "wait for OA tlb invalidate timed out\n");
2764 }
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774 static void i915_oa_stream_disable(struct i915_perf_stream *stream)
2775 {
2776 stream->perf->ops.oa_disable(stream);
2777
2778 if (stream->sample_flags & SAMPLE_OA_REPORT)
2779 hrtimer_cancel(&stream->poll_check_timer);
2780 }
2781
2782 static const struct i915_perf_stream_ops i915_oa_stream_ops = {
2783 .destroy = i915_oa_stream_destroy,
2784 .enable = i915_oa_stream_enable,
2785 .disable = i915_oa_stream_disable,
2786 .wait_unlocked = i915_oa_wait_unlocked,
2787 .poll_wait = i915_oa_poll_wait,
2788 .read = i915_oa_read,
2789 };
2790
2791 static int i915_perf_stream_enable_sync(struct i915_perf_stream *stream)
2792 {
2793 struct i915_active *active;
2794 int err;
2795
2796 active = i915_active_create();
2797 if (!active)
2798 return -ENOMEM;
2799
2800 err = stream->perf->ops.enable_metric_set(stream, active);
2801 if (err == 0)
2802 __i915_active_wait(active, TASK_UNINTERRUPTIBLE);
2803
2804 i915_active_put(active);
2805 return err;
2806 }
2807
2808 static void
2809 get_default_sseu_config(struct intel_sseu *out_sseu,
2810 struct intel_engine_cs *engine)
2811 {
2812 const struct sseu_dev_info *devinfo_sseu = &engine->gt->info.sseu;
2813
2814 *out_sseu = intel_sseu_from_device_info(devinfo_sseu);
2815
2816 if (GRAPHICS_VER(engine->i915) == 11) {
2817
2818
2819
2820
2821
2822 out_sseu->subslice_mask =
2823 ~(~0 << (hweight8(out_sseu->subslice_mask) / 2));
2824 out_sseu->slice_mask = 0x1;
2825 }
2826 }
2827
2828 static int
2829 get_sseu_config(struct intel_sseu *out_sseu,
2830 struct intel_engine_cs *engine,
2831 const struct drm_i915_gem_context_param_sseu *drm_sseu)
2832 {
2833 if (drm_sseu->engine.engine_class != engine->uabi_class ||
2834 drm_sseu->engine.engine_instance != engine->uabi_instance)
2835 return -EINVAL;
2836
2837 return i915_gem_user_to_context_sseu(engine->gt, drm_sseu, out_sseu);
2838 }
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858 static int i915_oa_stream_init(struct i915_perf_stream *stream,
2859 struct drm_i915_perf_open_param *param,
2860 struct perf_open_properties *props)
2861 {
2862 struct drm_i915_private *i915 = stream->perf->i915;
2863 struct i915_perf *perf = stream->perf;
2864 int format_size;
2865 int ret;
2866
2867 if (!props->engine) {
2868 drm_dbg(&stream->perf->i915->drm,
2869 "OA engine not specified\n");
2870 return -EINVAL;
2871 }
2872
2873
2874
2875
2876
2877
2878 if (!perf->metrics_kobj) {
2879 drm_dbg(&stream->perf->i915->drm,
2880 "OA metrics weren't advertised via sysfs\n");
2881 return -EINVAL;
2882 }
2883
2884 if (!(props->sample_flags & SAMPLE_OA_REPORT) &&
2885 (GRAPHICS_VER(perf->i915) < 12 || !stream->ctx)) {
2886 drm_dbg(&stream->perf->i915->drm,
2887 "Only OA report sampling supported\n");
2888 return -EINVAL;
2889 }
2890
2891 if (!perf->ops.enable_metric_set) {
2892 drm_dbg(&stream->perf->i915->drm,
2893 "OA unit not supported\n");
2894 return -ENODEV;
2895 }
2896
2897
2898
2899
2900
2901
2902 if (perf->exclusive_stream) {
2903 drm_dbg(&stream->perf->i915->drm,
2904 "OA unit already in use\n");
2905 return -EBUSY;
2906 }
2907
2908 if (!props->oa_format) {
2909 drm_dbg(&stream->perf->i915->drm,
2910 "OA report format not specified\n");
2911 return -EINVAL;
2912 }
2913
2914 stream->engine = props->engine;
2915 stream->uncore = stream->engine->gt->uncore;
2916
2917 stream->sample_size = sizeof(struct drm_i915_perf_record_header);
2918
2919 format_size = perf->oa_formats[props->oa_format].size;
2920
2921 stream->sample_flags = props->sample_flags;
2922 stream->sample_size += format_size;
2923
2924 stream->oa_buffer.format_size = format_size;
2925 if (drm_WARN_ON(&i915->drm, stream->oa_buffer.format_size == 0))
2926 return -EINVAL;
2927
2928 stream->hold_preemption = props->hold_preemption;
2929
2930 stream->oa_buffer.format =
2931 perf->oa_formats[props->oa_format].format;
2932
2933 stream->periodic = props->oa_periodic;
2934 if (stream->periodic)
2935 stream->period_exponent = props->oa_period_exponent;
2936
2937 if (stream->ctx) {
2938 ret = oa_get_render_ctx_id(stream);
2939 if (ret) {
2940 drm_dbg(&stream->perf->i915->drm,
2941 "Invalid context id to filter with\n");
2942 return ret;
2943 }
2944 }
2945
2946 ret = alloc_noa_wait(stream);
2947 if (ret) {
2948 drm_dbg(&stream->perf->i915->drm,
2949 "Unable to allocate NOA wait batch buffer\n");
2950 goto err_noa_wait_alloc;
2951 }
2952
2953 stream->oa_config = i915_perf_get_oa_config(perf, props->metrics_set);
2954 if (!stream->oa_config) {
2955 drm_dbg(&stream->perf->i915->drm,
2956 "Invalid OA config id=%i\n", props->metrics_set);
2957 ret = -EINVAL;
2958 goto err_config;
2959 }
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973 intel_engine_pm_get(stream->engine);
2974 intel_uncore_forcewake_get(stream->uncore, FORCEWAKE_ALL);
2975
2976 ret = alloc_oa_buffer(stream);
2977 if (ret)
2978 goto err_oa_buf_alloc;
2979
2980 stream->ops = &i915_oa_stream_ops;
2981
2982 perf->sseu = props->sseu;
2983 WRITE_ONCE(perf->exclusive_stream, stream);
2984
2985 ret = i915_perf_stream_enable_sync(stream);
2986 if (ret) {
2987 drm_dbg(&stream->perf->i915->drm,
2988 "Unable to enable metric set\n");
2989 goto err_enable;
2990 }
2991
2992 drm_dbg(&stream->perf->i915->drm,
2993 "opening stream oa config uuid=%s\n",
2994 stream->oa_config->uuid);
2995
2996 hrtimer_init(&stream->poll_check_timer,
2997 CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2998 stream->poll_check_timer.function = oa_poll_check_timer_cb;
2999 init_waitqueue_head(&stream->poll_wq);
3000 spin_lock_init(&stream->oa_buffer.ptr_lock);
3001
3002 return 0;
3003
3004 err_enable:
3005 WRITE_ONCE(perf->exclusive_stream, NULL);
3006 perf->ops.disable_metric_set(stream);
3007
3008 free_oa_buffer(stream);
3009
3010 err_oa_buf_alloc:
3011 free_oa_configs(stream);
3012
3013 intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL);
3014 intel_engine_pm_put(stream->engine);
3015
3016 err_config:
3017 free_noa_wait(stream);
3018
3019 err_noa_wait_alloc:
3020 if (stream->ctx)
3021 oa_put_render_ctx_id(stream);
3022
3023 return ret;
3024 }
3025
3026 void i915_oa_init_reg_state(const struct intel_context *ce,
3027 const struct intel_engine_cs *engine)
3028 {
3029 struct i915_perf_stream *stream;
3030
3031 if (engine->class != RENDER_CLASS)
3032 return;
3033
3034
3035 stream = READ_ONCE(engine->i915->perf.exclusive_stream);
3036 if (stream && GRAPHICS_VER(stream->perf->i915) < 12)
3037 gen8_update_reg_state_unlocked(ce, stream);
3038 }
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058 static ssize_t i915_perf_read(struct file *file,
3059 char __user *buf,
3060 size_t count,
3061 loff_t *ppos)
3062 {
3063 struct i915_perf_stream *stream = file->private_data;
3064 struct i915_perf *perf = stream->perf;
3065 size_t offset = 0;
3066 int ret;
3067
3068
3069
3070
3071
3072 if (!stream->enabled || !(stream->sample_flags & SAMPLE_OA_REPORT))
3073 return -EIO;
3074
3075 if (!(file->f_flags & O_NONBLOCK)) {
3076
3077
3078
3079
3080
3081
3082
3083 do {
3084 ret = stream->ops->wait_unlocked(stream);
3085 if (ret)
3086 return ret;
3087
3088 mutex_lock(&perf->lock);
3089 ret = stream->ops->read(stream, buf, count, &offset);
3090 mutex_unlock(&perf->lock);
3091 } while (!offset && !ret);
3092 } else {
3093 mutex_lock(&perf->lock);
3094 ret = stream->ops->read(stream, buf, count, &offset);
3095 mutex_unlock(&perf->lock);
3096 }
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109 if (ret != -ENOSPC)
3110 stream->pollin = false;
3111
3112
3113 return offset ?: (ret ?: -EAGAIN);
3114 }
3115
3116 static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer)
3117 {
3118 struct i915_perf_stream *stream =
3119 container_of(hrtimer, typeof(*stream), poll_check_timer);
3120
3121 if (oa_buffer_check_unlocked(stream)) {
3122 stream->pollin = true;
3123 wake_up(&stream->poll_wq);
3124 }
3125
3126 hrtimer_forward_now(hrtimer,
3127 ns_to_ktime(stream->poll_oa_period));
3128
3129 return HRTIMER_RESTART;
3130 }
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147 static __poll_t i915_perf_poll_locked(struct i915_perf_stream *stream,
3148 struct file *file,
3149 poll_table *wait)
3150 {
3151 __poll_t events = 0;
3152
3153 stream->ops->poll_wait(stream, file, wait);
3154
3155
3156
3157
3158
3159
3160
3161 if (stream->pollin)
3162 events |= EPOLLIN;
3163
3164 return events;
3165 }
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180 static __poll_t i915_perf_poll(struct file *file, poll_table *wait)
3181 {
3182 struct i915_perf_stream *stream = file->private_data;
3183 struct i915_perf *perf = stream->perf;
3184 __poll_t ret;
3185
3186 mutex_lock(&perf->lock);
3187 ret = i915_perf_poll_locked(stream, file, wait);
3188 mutex_unlock(&perf->lock);
3189
3190 return ret;
3191 }
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203 static void i915_perf_enable_locked(struct i915_perf_stream *stream)
3204 {
3205 if (stream->enabled)
3206 return;
3207
3208
3209 stream->enabled = true;
3210
3211 if (stream->ops->enable)
3212 stream->ops->enable(stream);
3213
3214 if (stream->hold_preemption)
3215 intel_context_set_nopreempt(stream->pinned_ctx);
3216 }
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232 static void i915_perf_disable_locked(struct i915_perf_stream *stream)
3233 {
3234 if (!stream->enabled)
3235 return;
3236
3237
3238 stream->enabled = false;
3239
3240 if (stream->hold_preemption)
3241 intel_context_clear_nopreempt(stream->pinned_ctx);
3242
3243 if (stream->ops->disable)
3244 stream->ops->disable(stream);
3245 }
3246
3247 static long i915_perf_config_locked(struct i915_perf_stream *stream,
3248 unsigned long metrics_set)
3249 {
3250 struct i915_oa_config *config;
3251 long ret = stream->oa_config->id;
3252
3253 config = i915_perf_get_oa_config(stream->perf, metrics_set);
3254 if (!config)
3255 return -EINVAL;
3256
3257 if (config != stream->oa_config) {
3258 int err;
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269 err = emit_oa_config(stream, config, oa_context(stream), NULL);
3270 if (!err)
3271 config = xchg(&stream->oa_config, config);
3272 else
3273 ret = err;
3274 }
3275
3276 i915_oa_config_put(config);
3277
3278 return ret;
3279 }
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293 static long i915_perf_ioctl_locked(struct i915_perf_stream *stream,
3294 unsigned int cmd,
3295 unsigned long arg)
3296 {
3297 switch (cmd) {
3298 case I915_PERF_IOCTL_ENABLE:
3299 i915_perf_enable_locked(stream);
3300 return 0;
3301 case I915_PERF_IOCTL_DISABLE:
3302 i915_perf_disable_locked(stream);
3303 return 0;
3304 case I915_PERF_IOCTL_CONFIG:
3305 return i915_perf_config_locked(stream, arg);
3306 }
3307
3308 return -EINVAL;
3309 }
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322 static long i915_perf_ioctl(struct file *file,
3323 unsigned int cmd,
3324 unsigned long arg)
3325 {
3326 struct i915_perf_stream *stream = file->private_data;
3327 struct i915_perf *perf = stream->perf;
3328 long ret;
3329
3330 mutex_lock(&perf->lock);
3331 ret = i915_perf_ioctl_locked(stream, cmd, arg);
3332 mutex_unlock(&perf->lock);
3333
3334 return ret;
3335 }
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347 static void i915_perf_destroy_locked(struct i915_perf_stream *stream)
3348 {
3349 if (stream->enabled)
3350 i915_perf_disable_locked(stream);
3351
3352 if (stream->ops->destroy)
3353 stream->ops->destroy(stream);
3354
3355 if (stream->ctx)
3356 i915_gem_context_put(stream->ctx);
3357
3358 kfree(stream);
3359 }
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372 static int i915_perf_release(struct inode *inode, struct file *file)
3373 {
3374 struct i915_perf_stream *stream = file->private_data;
3375 struct i915_perf *perf = stream->perf;
3376
3377 mutex_lock(&perf->lock);
3378 i915_perf_destroy_locked(stream);
3379 mutex_unlock(&perf->lock);
3380
3381
3382 drm_dev_put(&perf->i915->drm);
3383
3384 return 0;
3385 }
3386
3387
3388 static const struct file_operations fops = {
3389 .owner = THIS_MODULE,
3390 .llseek = no_llseek,
3391 .release = i915_perf_release,
3392 .poll = i915_perf_poll,
3393 .read = i915_perf_read,
3394 .unlocked_ioctl = i915_perf_ioctl,
3395
3396
3397
3398 .compat_ioctl = i915_perf_ioctl,
3399 };
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426 static int
3427 i915_perf_open_ioctl_locked(struct i915_perf *perf,
3428 struct drm_i915_perf_open_param *param,
3429 struct perf_open_properties *props,
3430 struct drm_file *file)
3431 {
3432 struct i915_gem_context *specific_ctx = NULL;
3433 struct i915_perf_stream *stream = NULL;
3434 unsigned long f_flags = 0;
3435 bool privileged_op = true;
3436 int stream_fd;
3437 int ret;
3438
3439 if (props->single_context) {
3440 u32 ctx_handle = props->ctx_handle;
3441 struct drm_i915_file_private *file_priv = file->driver_priv;
3442
3443 specific_ctx = i915_gem_context_lookup(file_priv, ctx_handle);
3444 if (IS_ERR(specific_ctx)) {
3445 drm_dbg(&perf->i915->drm,
3446 "Failed to look up context with ID %u for opening perf stream\n",
3447 ctx_handle);
3448 ret = PTR_ERR(specific_ctx);
3449 goto err;
3450 }
3451 }
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472 if (IS_HASWELL(perf->i915) && specific_ctx)
3473 privileged_op = false;
3474 else if (GRAPHICS_VER(perf->i915) == 12 && specific_ctx &&
3475 (props->sample_flags & SAMPLE_OA_REPORT) == 0)
3476 privileged_op = false;
3477
3478 if (props->hold_preemption) {
3479 if (!props->single_context) {
3480 drm_dbg(&perf->i915->drm,
3481 "preemption disable with no context\n");
3482 ret = -EINVAL;
3483 goto err;
3484 }
3485 privileged_op = true;
3486 }
3487
3488
3489
3490
3491 if (props->has_sseu)
3492 privileged_op = true;
3493 else
3494 get_default_sseu_config(&props->sseu, props->engine);
3495
3496
3497
3498
3499
3500
3501 if (privileged_op &&
3502 i915_perf_stream_paranoid && !perfmon_capable()) {
3503 drm_dbg(&perf->i915->drm,
3504 "Insufficient privileges to open i915 perf stream\n");
3505 ret = -EACCES;
3506 goto err_ctx;
3507 }
3508
3509 stream = kzalloc(sizeof(*stream), GFP_KERNEL);
3510 if (!stream) {
3511 ret = -ENOMEM;
3512 goto err_ctx;
3513 }
3514
3515 stream->perf = perf;
3516 stream->ctx = specific_ctx;
3517 stream->poll_oa_period = props->poll_oa_period;
3518
3519 ret = i915_oa_stream_init(stream, param, props);
3520 if (ret)
3521 goto err_alloc;
3522
3523
3524
3525
3526
3527 if (WARN_ON(stream->sample_flags != props->sample_flags)) {
3528 ret = -ENODEV;
3529 goto err_flags;
3530 }
3531
3532 if (param->flags & I915_PERF_FLAG_FD_CLOEXEC)
3533 f_flags |= O_CLOEXEC;
3534 if (param->flags & I915_PERF_FLAG_FD_NONBLOCK)
3535 f_flags |= O_NONBLOCK;
3536
3537 stream_fd = anon_inode_getfd("[i915_perf]", &fops, stream, f_flags);
3538 if (stream_fd < 0) {
3539 ret = stream_fd;
3540 goto err_flags;
3541 }
3542
3543 if (!(param->flags & I915_PERF_FLAG_DISABLED))
3544 i915_perf_enable_locked(stream);
3545
3546
3547
3548
3549 drm_dev_get(&perf->i915->drm);
3550
3551 return stream_fd;
3552
3553 err_flags:
3554 if (stream->ops->destroy)
3555 stream->ops->destroy(stream);
3556 err_alloc:
3557 kfree(stream);
3558 err_ctx:
3559 if (specific_ctx)
3560 i915_gem_context_put(specific_ctx);
3561 err:
3562 return ret;
3563 }
3564
3565 static u64 oa_exponent_to_ns(struct i915_perf *perf, int exponent)
3566 {
3567 return intel_gt_clock_interval_to_ns(to_gt(perf->i915),
3568 2ULL << exponent);
3569 }
3570
3571 static __always_inline bool
3572 oa_format_valid(struct i915_perf *perf, enum drm_i915_oa_format format)
3573 {
3574 return test_bit(format, perf->format_mask);
3575 }
3576
3577 static __always_inline void
3578 oa_format_add(struct i915_perf *perf, enum drm_i915_oa_format format)
3579 {
3580 __set_bit(format, perf->format_mask);
3581 }
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598 static int read_properties_unlocked(struct i915_perf *perf,
3599 u64 __user *uprops,
3600 u32 n_props,
3601 struct perf_open_properties *props)
3602 {
3603 u64 __user *uprop = uprops;
3604 u32 i;
3605 int ret;
3606
3607 memset(props, 0, sizeof(struct perf_open_properties));
3608 props->poll_oa_period = DEFAULT_POLL_PERIOD_NS;
3609
3610 if (!n_props) {
3611 drm_dbg(&perf->i915->drm,
3612 "No i915 perf properties given\n");
3613 return -EINVAL;
3614 }
3615
3616
3617 props->engine = intel_engine_lookup_user(perf->i915,
3618 I915_ENGINE_CLASS_RENDER,
3619 0);
3620 if (!props->engine) {
3621 drm_dbg(&perf->i915->drm,
3622 "No RENDER-capable engines\n");
3623 return -EINVAL;
3624 }
3625
3626
3627
3628
3629
3630
3631
3632 if (n_props >= DRM_I915_PERF_PROP_MAX) {
3633 drm_dbg(&perf->i915->drm,
3634 "More i915 perf properties specified than exist\n");
3635 return -EINVAL;
3636 }
3637
3638 for (i = 0; i < n_props; i++) {
3639 u64 oa_period, oa_freq_hz;
3640 u64 id, value;
3641
3642 ret = get_user(id, uprop);
3643 if (ret)
3644 return ret;
3645
3646 ret = get_user(value, uprop + 1);
3647 if (ret)
3648 return ret;
3649
3650 if (id == 0 || id >= DRM_I915_PERF_PROP_MAX) {
3651 drm_dbg(&perf->i915->drm,
3652 "Unknown i915 perf property ID\n");
3653 return -EINVAL;
3654 }
3655
3656 switch ((enum drm_i915_perf_property_id)id) {
3657 case DRM_I915_PERF_PROP_CTX_HANDLE:
3658 props->single_context = 1;
3659 props->ctx_handle = value;
3660 break;
3661 case DRM_I915_PERF_PROP_SAMPLE_OA:
3662 if (value)
3663 props->sample_flags |= SAMPLE_OA_REPORT;
3664 break;
3665 case DRM_I915_PERF_PROP_OA_METRICS_SET:
3666 if (value == 0) {
3667 drm_dbg(&perf->i915->drm,
3668 "Unknown OA metric set ID\n");
3669 return -EINVAL;
3670 }
3671 props->metrics_set = value;
3672 break;
3673 case DRM_I915_PERF_PROP_OA_FORMAT:
3674 if (value == 0 || value >= I915_OA_FORMAT_MAX) {
3675 drm_dbg(&perf->i915->drm,
3676 "Out-of-range OA report format %llu\n",
3677 value);
3678 return -EINVAL;
3679 }
3680 if (!oa_format_valid(perf, value)) {
3681 drm_dbg(&perf->i915->drm,
3682 "Unsupported OA report format %llu\n",
3683 value);
3684 return -EINVAL;
3685 }
3686 props->oa_format = value;
3687 break;
3688 case DRM_I915_PERF_PROP_OA_EXPONENT:
3689 if (value > OA_EXPONENT_MAX) {
3690 drm_dbg(&perf->i915->drm,
3691 "OA timer exponent too high (> %u)\n",
3692 OA_EXPONENT_MAX);
3693 return -EINVAL;
3694 }
3695
3696
3697
3698
3699
3700
3701
3702 BUILD_BUG_ON(sizeof(oa_period) != 8);
3703 oa_period = oa_exponent_to_ns(perf, value);
3704
3705
3706
3707
3708
3709
3710
3711 if (oa_period <= NSEC_PER_SEC) {
3712 u64 tmp = NSEC_PER_SEC;
3713 do_div(tmp, oa_period);
3714 oa_freq_hz = tmp;
3715 } else
3716 oa_freq_hz = 0;
3717
3718 if (oa_freq_hz > i915_oa_max_sample_rate && !perfmon_capable()) {
3719 drm_dbg(&perf->i915->drm,
3720 "OA exponent would exceed the max sampling frequency (sysctl dev.i915.oa_max_sample_rate) %uHz without CAP_PERFMON or CAP_SYS_ADMIN privileges\n",
3721 i915_oa_max_sample_rate);
3722 return -EACCES;
3723 }
3724
3725 props->oa_periodic = true;
3726 props->oa_period_exponent = value;
3727 break;
3728 case DRM_I915_PERF_PROP_HOLD_PREEMPTION:
3729 props->hold_preemption = !!value;
3730 break;
3731 case DRM_I915_PERF_PROP_GLOBAL_SSEU: {
3732 struct drm_i915_gem_context_param_sseu user_sseu;
3733
3734 if (GRAPHICS_VER_FULL(perf->i915) >= IP_VER(12, 50)) {
3735 drm_dbg(&perf->i915->drm,
3736 "SSEU config not supported on gfx %x\n",
3737 GRAPHICS_VER_FULL(perf->i915));
3738 return -ENODEV;
3739 }
3740
3741 if (copy_from_user(&user_sseu,
3742 u64_to_user_ptr(value),
3743 sizeof(user_sseu))) {
3744 drm_dbg(&perf->i915->drm,
3745 "Unable to copy global sseu parameter\n");
3746 return -EFAULT;
3747 }
3748
3749 ret = get_sseu_config(&props->sseu, props->engine, &user_sseu);
3750 if (ret) {
3751 drm_dbg(&perf->i915->drm,
3752 "Invalid SSEU configuration\n");
3753 return ret;
3754 }
3755 props->has_sseu = true;
3756 break;
3757 }
3758 case DRM_I915_PERF_PROP_POLL_OA_PERIOD:
3759 if (value < 100000 ) {
3760 drm_dbg(&perf->i915->drm,
3761 "OA availability timer too small (%lluns < 100us)\n",
3762 value);
3763 return -EINVAL;
3764 }
3765 props->poll_oa_period = value;
3766 break;
3767 case DRM_I915_PERF_PROP_MAX:
3768 MISSING_CASE(id);
3769 return -EINVAL;
3770 }
3771
3772 uprop += 2;
3773 }
3774
3775 return 0;
3776 }
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802 int i915_perf_open_ioctl(struct drm_device *dev, void *data,
3803 struct drm_file *file)
3804 {
3805 struct i915_perf *perf = &to_i915(dev)->perf;
3806 struct drm_i915_perf_open_param *param = data;
3807 struct perf_open_properties props;
3808 u32 known_open_flags;
3809 int ret;
3810
3811 if (!perf->i915) {
3812 drm_dbg(&perf->i915->drm,
3813 "i915 perf interface not available for this system\n");
3814 return -ENOTSUPP;
3815 }
3816
3817 known_open_flags = I915_PERF_FLAG_FD_CLOEXEC |
3818 I915_PERF_FLAG_FD_NONBLOCK |
3819 I915_PERF_FLAG_DISABLED;
3820 if (param->flags & ~known_open_flags) {
3821 drm_dbg(&perf->i915->drm,
3822 "Unknown drm_i915_perf_open_param flag\n");
3823 return -EINVAL;
3824 }
3825
3826 ret = read_properties_unlocked(perf,
3827 u64_to_user_ptr(param->properties_ptr),
3828 param->num_properties,
3829 &props);
3830 if (ret)
3831 return ret;
3832
3833 mutex_lock(&perf->lock);
3834 ret = i915_perf_open_ioctl_locked(perf, param, &props, file);
3835 mutex_unlock(&perf->lock);
3836
3837 return ret;
3838 }
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848 void i915_perf_register(struct drm_i915_private *i915)
3849 {
3850 struct i915_perf *perf = &i915->perf;
3851
3852 if (!perf->i915)
3853 return;
3854
3855
3856
3857
3858
3859 mutex_lock(&perf->lock);
3860
3861 perf->metrics_kobj =
3862 kobject_create_and_add("metrics",
3863 &i915->drm.primary->kdev->kobj);
3864
3865 mutex_unlock(&perf->lock);
3866 }
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877 void i915_perf_unregister(struct drm_i915_private *i915)
3878 {
3879 struct i915_perf *perf = &i915->perf;
3880
3881 if (!perf->metrics_kobj)
3882 return;
3883
3884 kobject_put(perf->metrics_kobj);
3885 perf->metrics_kobj = NULL;
3886 }
3887
3888 static bool gen8_is_valid_flex_addr(struct i915_perf *perf, u32 addr)
3889 {
3890 static const i915_reg_t flex_eu_regs[] = {
3891 EU_PERF_CNTL0,
3892 EU_PERF_CNTL1,
3893 EU_PERF_CNTL2,
3894 EU_PERF_CNTL3,
3895 EU_PERF_CNTL4,
3896 EU_PERF_CNTL5,
3897 EU_PERF_CNTL6,
3898 };
3899 int i;
3900
3901 for (i = 0; i < ARRAY_SIZE(flex_eu_regs); i++) {
3902 if (i915_mmio_reg_offset(flex_eu_regs[i]) == addr)
3903 return true;
3904 }
3905 return false;
3906 }
3907
3908 static bool reg_in_range_table(u32 addr, const struct i915_range *table)
3909 {
3910 while (table->start || table->end) {
3911 if (addr >= table->start && addr <= table->end)
3912 return true;
3913
3914 table++;
3915 }
3916
3917 return false;
3918 }
3919
3920 #define REG_EQUAL(addr, mmio) \
3921 ((addr) == i915_mmio_reg_offset(mmio))
3922
3923 static const struct i915_range gen7_oa_b_counters[] = {
3924 { .start = 0x2710, .end = 0x272c },
3925 { .start = 0x2740, .end = 0x275c },
3926 { .start = 0x2770, .end = 0x27ac },
3927 {}
3928 };
3929
3930 static const struct i915_range gen12_oa_b_counters[] = {
3931 { .start = 0x2b2c, .end = 0x2b2c },
3932 { .start = 0xd900, .end = 0xd91c },
3933 { .start = 0xd920, .end = 0xd93c },
3934 { .start = 0xd940, .end = 0xd97c },
3935 { .start = 0xdc00, .end = 0xdc3c },
3936 { .start = 0xdc40, .end = 0xdc40 },
3937 { .start = 0xdc44, .end = 0xdc44 },
3938 {}
3939 };
3940
3941 static const struct i915_range gen7_oa_mux_regs[] = {
3942 { .start = 0x91b8, .end = 0x91cc },
3943 { .start = 0x9800, .end = 0x9888 },
3944 { .start = 0xe180, .end = 0xe180 },
3945 {}
3946 };
3947
3948 static const struct i915_range hsw_oa_mux_regs[] = {
3949 { .start = 0x09e80, .end = 0x09ea4 },
3950 { .start = 0x09ec0, .end = 0x09ec0 },
3951 { .start = 0x25100, .end = 0x2ff90 },
3952 {}
3953 };
3954
3955 static const struct i915_range chv_oa_mux_regs[] = {
3956 { .start = 0x182300, .end = 0x1823a4 },
3957 {}
3958 };
3959
3960 static const struct i915_range gen8_oa_mux_regs[] = {
3961 { .start = 0x0d00, .end = 0x0d2c },
3962 { .start = 0x20cc, .end = 0x20cc },
3963 {}
3964 };
3965
3966 static const struct i915_range gen11_oa_mux_regs[] = {
3967 { .start = 0x91c8, .end = 0x91dc },
3968 {}
3969 };
3970
3971 static const struct i915_range gen12_oa_mux_regs[] = {
3972 { .start = 0x0d00, .end = 0x0d04 },
3973 { .start = 0x0d0c, .end = 0x0d2c },
3974 { .start = 0x9840, .end = 0x9840 },
3975 { .start = 0x9884, .end = 0x9888 },
3976 { .start = 0x20cc, .end = 0x20cc },
3977 {}
3978 };
3979
3980 static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
3981 {
3982 return reg_in_range_table(addr, gen7_oa_b_counters);
3983 }
3984
3985 static bool gen8_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
3986 {
3987 return reg_in_range_table(addr, gen7_oa_mux_regs) ||
3988 reg_in_range_table(addr, gen8_oa_mux_regs);
3989 }
3990
3991 static bool gen11_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
3992 {
3993 return reg_in_range_table(addr, gen7_oa_mux_regs) ||
3994 reg_in_range_table(addr, gen8_oa_mux_regs) ||
3995 reg_in_range_table(addr, gen11_oa_mux_regs);
3996 }
3997
3998 static bool hsw_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
3999 {
4000 return reg_in_range_table(addr, gen7_oa_mux_regs) ||
4001 reg_in_range_table(addr, hsw_oa_mux_regs);
4002 }
4003
4004 static bool chv_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
4005 {
4006 return reg_in_range_table(addr, gen7_oa_mux_regs) ||
4007 reg_in_range_table(addr, chv_oa_mux_regs);
4008 }
4009
4010 static bool gen12_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
4011 {
4012 return reg_in_range_table(addr, gen12_oa_b_counters);
4013 }
4014
4015 static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
4016 {
4017 return reg_in_range_table(addr, gen12_oa_mux_regs);
4018 }
4019
4020 static u32 mask_reg_value(u32 reg, u32 val)
4021 {
4022
4023
4024
4025
4026 if (REG_EQUAL(reg, HALF_SLICE_CHICKEN2))
4027 val = val & ~_MASKED_BIT_ENABLE(GEN8_ST_PO_DISABLE);
4028
4029
4030
4031
4032
4033 if (REG_EQUAL(reg, WAIT_FOR_RC6_EXIT))
4034 val = val & ~_MASKED_BIT_ENABLE(HSW_WAIT_FOR_RC6_EXIT_ENABLE);
4035
4036 return val;
4037 }
4038
4039 static struct i915_oa_reg *alloc_oa_regs(struct i915_perf *perf,
4040 bool (*is_valid)(struct i915_perf *perf, u32 addr),
4041 u32 __user *regs,
4042 u32 n_regs)
4043 {
4044 struct i915_oa_reg *oa_regs;
4045 int err;
4046 u32 i;
4047
4048 if (!n_regs)
4049 return NULL;
4050
4051
4052 GEM_BUG_ON(!is_valid);
4053 if (!is_valid)
4054 return ERR_PTR(-EINVAL);
4055
4056 oa_regs = kmalloc_array(n_regs, sizeof(*oa_regs), GFP_KERNEL);
4057 if (!oa_regs)
4058 return ERR_PTR(-ENOMEM);
4059
4060 for (i = 0; i < n_regs; i++) {
4061 u32 addr, value;
4062
4063 err = get_user(addr, regs);
4064 if (err)
4065 goto addr_err;
4066
4067 if (!is_valid(perf, addr)) {
4068 drm_dbg(&perf->i915->drm,
4069 "Invalid oa_reg address: %X\n", addr);
4070 err = -EINVAL;
4071 goto addr_err;
4072 }
4073
4074 err = get_user(value, regs + 1);
4075 if (err)
4076 goto addr_err;
4077
4078 oa_regs[i].addr = _MMIO(addr);
4079 oa_regs[i].value = mask_reg_value(addr, value);
4080
4081 regs += 2;
4082 }
4083
4084 return oa_regs;
4085
4086 addr_err:
4087 kfree(oa_regs);
4088 return ERR_PTR(err);
4089 }
4090
4091 static ssize_t show_dynamic_id(struct kobject *kobj,
4092 struct kobj_attribute *attr,
4093 char *buf)
4094 {
4095 struct i915_oa_config *oa_config =
4096 container_of(attr, typeof(*oa_config), sysfs_metric_id);
4097
4098 return sprintf(buf, "%d\n", oa_config->id);
4099 }
4100
4101 static int create_dynamic_oa_sysfs_entry(struct i915_perf *perf,
4102 struct i915_oa_config *oa_config)
4103 {
4104 sysfs_attr_init(&oa_config->sysfs_metric_id.attr);
4105 oa_config->sysfs_metric_id.attr.name = "id";
4106 oa_config->sysfs_metric_id.attr.mode = S_IRUGO;
4107 oa_config->sysfs_metric_id.show = show_dynamic_id;
4108 oa_config->sysfs_metric_id.store = NULL;
4109
4110 oa_config->attrs[0] = &oa_config->sysfs_metric_id.attr;
4111 oa_config->attrs[1] = NULL;
4112
4113 oa_config->sysfs_metric.name = oa_config->uuid;
4114 oa_config->sysfs_metric.attrs = oa_config->attrs;
4115
4116 return sysfs_create_group(perf->metrics_kobj,
4117 &oa_config->sysfs_metric);
4118 }
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133 int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
4134 struct drm_file *file)
4135 {
4136 struct i915_perf *perf = &to_i915(dev)->perf;
4137 struct drm_i915_perf_oa_config *args = data;
4138 struct i915_oa_config *oa_config, *tmp;
4139 struct i915_oa_reg *regs;
4140 int err, id;
4141
4142 if (!perf->i915) {
4143 drm_dbg(&perf->i915->drm,
4144 "i915 perf interface not available for this system\n");
4145 return -ENOTSUPP;
4146 }
4147
4148 if (!perf->metrics_kobj) {
4149 drm_dbg(&perf->i915->drm,
4150 "OA metrics weren't advertised via sysfs\n");
4151 return -EINVAL;
4152 }
4153
4154 if (i915_perf_stream_paranoid && !perfmon_capable()) {
4155 drm_dbg(&perf->i915->drm,
4156 "Insufficient privileges to add i915 OA config\n");
4157 return -EACCES;
4158 }
4159
4160 if ((!args->mux_regs_ptr || !args->n_mux_regs) &&
4161 (!args->boolean_regs_ptr || !args->n_boolean_regs) &&
4162 (!args->flex_regs_ptr || !args->n_flex_regs)) {
4163 drm_dbg(&perf->i915->drm,
4164 "No OA registers given\n");
4165 return -EINVAL;
4166 }
4167
4168 oa_config = kzalloc(sizeof(*oa_config), GFP_KERNEL);
4169 if (!oa_config) {
4170 drm_dbg(&perf->i915->drm,
4171 "Failed to allocate memory for the OA config\n");
4172 return -ENOMEM;
4173 }
4174
4175 oa_config->perf = perf;
4176 kref_init(&oa_config->ref);
4177
4178 if (!uuid_is_valid(args->uuid)) {
4179 drm_dbg(&perf->i915->drm,
4180 "Invalid uuid format for OA config\n");
4181 err = -EINVAL;
4182 goto reg_err;
4183 }
4184
4185
4186
4187
4188 memcpy(oa_config->uuid, args->uuid, sizeof(args->uuid));
4189
4190 oa_config->mux_regs_len = args->n_mux_regs;
4191 regs = alloc_oa_regs(perf,
4192 perf->ops.is_valid_mux_reg,
4193 u64_to_user_ptr(args->mux_regs_ptr),
4194 args->n_mux_regs);
4195
4196 if (IS_ERR(regs)) {
4197 drm_dbg(&perf->i915->drm,
4198 "Failed to create OA config for mux_regs\n");
4199 err = PTR_ERR(regs);
4200 goto reg_err;
4201 }
4202 oa_config->mux_regs = regs;
4203
4204 oa_config->b_counter_regs_len = args->n_boolean_regs;
4205 regs = alloc_oa_regs(perf,
4206 perf->ops.is_valid_b_counter_reg,
4207 u64_to_user_ptr(args->boolean_regs_ptr),
4208 args->n_boolean_regs);
4209
4210 if (IS_ERR(regs)) {
4211 drm_dbg(&perf->i915->drm,
4212 "Failed to create OA config for b_counter_regs\n");
4213 err = PTR_ERR(regs);
4214 goto reg_err;
4215 }
4216 oa_config->b_counter_regs = regs;
4217
4218 if (GRAPHICS_VER(perf->i915) < 8) {
4219 if (args->n_flex_regs != 0) {
4220 err = -EINVAL;
4221 goto reg_err;
4222 }
4223 } else {
4224 oa_config->flex_regs_len = args->n_flex_regs;
4225 regs = alloc_oa_regs(perf,
4226 perf->ops.is_valid_flex_reg,
4227 u64_to_user_ptr(args->flex_regs_ptr),
4228 args->n_flex_regs);
4229
4230 if (IS_ERR(regs)) {
4231 drm_dbg(&perf->i915->drm,
4232 "Failed to create OA config for flex_regs\n");
4233 err = PTR_ERR(regs);
4234 goto reg_err;
4235 }
4236 oa_config->flex_regs = regs;
4237 }
4238
4239 err = mutex_lock_interruptible(&perf->metrics_lock);
4240 if (err)
4241 goto reg_err;
4242
4243
4244
4245
4246 idr_for_each_entry(&perf->metrics_idr, tmp, id) {
4247 if (!strcmp(tmp->uuid, oa_config->uuid)) {
4248 drm_dbg(&perf->i915->drm,
4249 "OA config already exists with this uuid\n");
4250 err = -EADDRINUSE;
4251 goto sysfs_err;
4252 }
4253 }
4254
4255 err = create_dynamic_oa_sysfs_entry(perf, oa_config);
4256 if (err) {
4257 drm_dbg(&perf->i915->drm,
4258 "Failed to create sysfs entry for OA config\n");
4259 goto sysfs_err;
4260 }
4261
4262
4263 oa_config->id = idr_alloc(&perf->metrics_idr,
4264 oa_config, 2,
4265 0, GFP_KERNEL);
4266 if (oa_config->id < 0) {
4267 drm_dbg(&perf->i915->drm,
4268 "Failed to create sysfs entry for OA config\n");
4269 err = oa_config->id;
4270 goto sysfs_err;
4271 }
4272
4273 mutex_unlock(&perf->metrics_lock);
4274
4275 drm_dbg(&perf->i915->drm,
4276 "Added config %s id=%i\n", oa_config->uuid, oa_config->id);
4277
4278 return oa_config->id;
4279
4280 sysfs_err:
4281 mutex_unlock(&perf->metrics_lock);
4282 reg_err:
4283 i915_oa_config_put(oa_config);
4284 drm_dbg(&perf->i915->drm,
4285 "Failed to add new OA config\n");
4286 return err;
4287 }
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300 int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
4301 struct drm_file *file)
4302 {
4303 struct i915_perf *perf = &to_i915(dev)->perf;
4304 u64 *arg = data;
4305 struct i915_oa_config *oa_config;
4306 int ret;
4307
4308 if (!perf->i915) {
4309 drm_dbg(&perf->i915->drm,
4310 "i915 perf interface not available for this system\n");
4311 return -ENOTSUPP;
4312 }
4313
4314 if (i915_perf_stream_paranoid && !perfmon_capable()) {
4315 drm_dbg(&perf->i915->drm,
4316 "Insufficient privileges to remove i915 OA config\n");
4317 return -EACCES;
4318 }
4319
4320 ret = mutex_lock_interruptible(&perf->metrics_lock);
4321 if (ret)
4322 return ret;
4323
4324 oa_config = idr_find(&perf->metrics_idr, *arg);
4325 if (!oa_config) {
4326 drm_dbg(&perf->i915->drm,
4327 "Failed to remove unknown OA config\n");
4328 ret = -ENOENT;
4329 goto err_unlock;
4330 }
4331
4332 GEM_BUG_ON(*arg != oa_config->id);
4333
4334 sysfs_remove_group(perf->metrics_kobj, &oa_config->sysfs_metric);
4335
4336 idr_remove(&perf->metrics_idr, *arg);
4337
4338 mutex_unlock(&perf->metrics_lock);
4339
4340 drm_dbg(&perf->i915->drm,
4341 "Removed config %s id=%i\n", oa_config->uuid, oa_config->id);
4342
4343 i915_oa_config_put(oa_config);
4344
4345 return 0;
4346
4347 err_unlock:
4348 mutex_unlock(&perf->metrics_lock);
4349 return ret;
4350 }
4351
4352 static struct ctl_table oa_table[] = {
4353 {
4354 .procname = "perf_stream_paranoid",
4355 .data = &i915_perf_stream_paranoid,
4356 .maxlen = sizeof(i915_perf_stream_paranoid),
4357 .mode = 0644,
4358 .proc_handler = proc_dointvec_minmax,
4359 .extra1 = SYSCTL_ZERO,
4360 .extra2 = SYSCTL_ONE,
4361 },
4362 {
4363 .procname = "oa_max_sample_rate",
4364 .data = &i915_oa_max_sample_rate,
4365 .maxlen = sizeof(i915_oa_max_sample_rate),
4366 .mode = 0644,
4367 .proc_handler = proc_dointvec_minmax,
4368 .extra1 = SYSCTL_ZERO,
4369 .extra2 = &oa_sample_rate_hard_limit,
4370 },
4371 {}
4372 };
4373
4374 static void oa_init_supported_formats(struct i915_perf *perf)
4375 {
4376 struct drm_i915_private *i915 = perf->i915;
4377 enum intel_platform platform = INTEL_INFO(i915)->platform;
4378
4379 switch (platform) {
4380 case INTEL_HASWELL:
4381 oa_format_add(perf, I915_OA_FORMAT_A13);
4382 oa_format_add(perf, I915_OA_FORMAT_A13);
4383 oa_format_add(perf, I915_OA_FORMAT_A29);
4384 oa_format_add(perf, I915_OA_FORMAT_A13_B8_C8);
4385 oa_format_add(perf, I915_OA_FORMAT_B4_C8);
4386 oa_format_add(perf, I915_OA_FORMAT_A45_B8_C8);
4387 oa_format_add(perf, I915_OA_FORMAT_B4_C8_A16);
4388 oa_format_add(perf, I915_OA_FORMAT_C4_B8);
4389 break;
4390
4391 case INTEL_BROADWELL:
4392 case INTEL_CHERRYVIEW:
4393 case INTEL_SKYLAKE:
4394 case INTEL_BROXTON:
4395 case INTEL_KABYLAKE:
4396 case INTEL_GEMINILAKE:
4397 case INTEL_COFFEELAKE:
4398 case INTEL_COMETLAKE:
4399 case INTEL_ICELAKE:
4400 case INTEL_ELKHARTLAKE:
4401 case INTEL_JASPERLAKE:
4402 case INTEL_TIGERLAKE:
4403 case INTEL_ROCKETLAKE:
4404 case INTEL_DG1:
4405 case INTEL_ALDERLAKE_S:
4406 case INTEL_ALDERLAKE_P:
4407 oa_format_add(perf, I915_OA_FORMAT_A12);
4408 oa_format_add(perf, I915_OA_FORMAT_A12_B8_C8);
4409 oa_format_add(perf, I915_OA_FORMAT_A32u40_A4u32_B8_C8);
4410 oa_format_add(perf, I915_OA_FORMAT_C4_B8);
4411 break;
4412
4413 default:
4414 MISSING_CASE(platform);
4415 }
4416 }
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427 void i915_perf_init(struct drm_i915_private *i915)
4428 {
4429 struct i915_perf *perf = &i915->perf;
4430
4431
4432
4433
4434 if (IS_DG2(i915))
4435 return;
4436
4437 perf->oa_formats = oa_formats;
4438 if (IS_HASWELL(i915)) {
4439 perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr;
4440 perf->ops.is_valid_mux_reg = hsw_is_valid_mux_addr;
4441 perf->ops.is_valid_flex_reg = NULL;
4442 perf->ops.enable_metric_set = hsw_enable_metric_set;
4443 perf->ops.disable_metric_set = hsw_disable_metric_set;
4444 perf->ops.oa_enable = gen7_oa_enable;
4445 perf->ops.oa_disable = gen7_oa_disable;
4446 perf->ops.read = gen7_oa_read;
4447 perf->ops.oa_hw_tail_read = gen7_oa_hw_tail_read;
4448 } else if (HAS_LOGICAL_RING_CONTEXTS(i915)) {
4449
4450
4451
4452
4453
4454
4455 perf->ops.read = gen8_oa_read;
4456
4457 if (IS_GRAPHICS_VER(i915, 8, 9)) {
4458 perf->ops.is_valid_b_counter_reg =
4459 gen7_is_valid_b_counter_addr;
4460 perf->ops.is_valid_mux_reg =
4461 gen8_is_valid_mux_addr;
4462 perf->ops.is_valid_flex_reg =
4463 gen8_is_valid_flex_addr;
4464
4465 if (IS_CHERRYVIEW(i915)) {
4466 perf->ops.is_valid_mux_reg =
4467 chv_is_valid_mux_addr;
4468 }
4469
4470 perf->ops.oa_enable = gen8_oa_enable;
4471 perf->ops.oa_disable = gen8_oa_disable;
4472 perf->ops.enable_metric_set = gen8_enable_metric_set;
4473 perf->ops.disable_metric_set = gen8_disable_metric_set;
4474 perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
4475
4476 if (GRAPHICS_VER(i915) == 8) {
4477 perf->ctx_oactxctrl_offset = 0x120;
4478 perf->ctx_flexeu0_offset = 0x2ce;
4479
4480 perf->gen8_valid_ctx_bit = BIT(25);
4481 } else {
4482 perf->ctx_oactxctrl_offset = 0x128;
4483 perf->ctx_flexeu0_offset = 0x3de;
4484
4485 perf->gen8_valid_ctx_bit = BIT(16);
4486 }
4487 } else if (GRAPHICS_VER(i915) == 11) {
4488 perf->ops.is_valid_b_counter_reg =
4489 gen7_is_valid_b_counter_addr;
4490 perf->ops.is_valid_mux_reg =
4491 gen11_is_valid_mux_addr;
4492 perf->ops.is_valid_flex_reg =
4493 gen8_is_valid_flex_addr;
4494
4495 perf->ops.oa_enable = gen8_oa_enable;
4496 perf->ops.oa_disable = gen8_oa_disable;
4497 perf->ops.enable_metric_set = gen8_enable_metric_set;
4498 perf->ops.disable_metric_set = gen11_disable_metric_set;
4499 perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
4500
4501 perf->ctx_oactxctrl_offset = 0x124;
4502 perf->ctx_flexeu0_offset = 0x78e;
4503
4504 perf->gen8_valid_ctx_bit = BIT(16);
4505 } else if (GRAPHICS_VER(i915) == 12) {
4506 perf->ops.is_valid_b_counter_reg =
4507 gen12_is_valid_b_counter_addr;
4508 perf->ops.is_valid_mux_reg =
4509 gen12_is_valid_mux_addr;
4510 perf->ops.is_valid_flex_reg =
4511 gen8_is_valid_flex_addr;
4512
4513 perf->ops.oa_enable = gen12_oa_enable;
4514 perf->ops.oa_disable = gen12_oa_disable;
4515 perf->ops.enable_metric_set = gen12_enable_metric_set;
4516 perf->ops.disable_metric_set = gen12_disable_metric_set;
4517 perf->ops.oa_hw_tail_read = gen12_oa_hw_tail_read;
4518
4519 perf->ctx_flexeu0_offset = 0;
4520 perf->ctx_oactxctrl_offset = 0x144;
4521 }
4522 }
4523
4524 if (perf->ops.enable_metric_set) {
4525 mutex_init(&perf->lock);
4526
4527
4528 oa_sample_rate_hard_limit = to_gt(i915)->clock_frequency / 2;
4529
4530 mutex_init(&perf->metrics_lock);
4531 idr_init_base(&perf->metrics_idr, 1);
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543 ratelimit_state_init(&perf->spurious_report_rs, 5 * HZ, 10);
4544
4545
4546
4547
4548 ratelimit_set_flags(&perf->spurious_report_rs,
4549 RATELIMIT_MSG_ON_RELEASE);
4550
4551 ratelimit_state_init(&perf->tail_pointer_race,
4552 5 * HZ, 10);
4553 ratelimit_set_flags(&perf->tail_pointer_race,
4554 RATELIMIT_MSG_ON_RELEASE);
4555
4556 atomic64_set(&perf->noa_programming_delay,
4557 500 * 1000 );
4558
4559 perf->i915 = i915;
4560
4561 oa_init_supported_formats(perf);
4562 }
4563 }
4564
4565 static int destroy_config(int id, void *p, void *data)
4566 {
4567 i915_oa_config_put(p);
4568 return 0;
4569 }
4570
4571 int i915_perf_sysctl_register(void)
4572 {
4573 sysctl_header = register_sysctl("dev/i915", oa_table);
4574 return 0;
4575 }
4576
4577 void i915_perf_sysctl_unregister(void)
4578 {
4579 unregister_sysctl_table(sysctl_header);
4580 }
4581
4582
4583
4584
4585
4586 void i915_perf_fini(struct drm_i915_private *i915)
4587 {
4588 struct i915_perf *perf = &i915->perf;
4589
4590 if (!perf->i915)
4591 return;
4592
4593 idr_for_each(&perf->metrics_idr, destroy_config, perf);
4594 idr_destroy(&perf->metrics_idr);
4595
4596 memset(&perf->ops, 0, sizeof(perf->ops));
4597 perf->i915 = NULL;
4598 }
4599
4600
4601
4602
4603
4604
4605 int i915_perf_ioctl_version(void)
4606 {
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627 return 5;
4628 }
4629
4630 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
4631 #include "selftests/i915_perf.c"
4632 #endif