0001
0002 #include <linux/perf_event.h>
0003 #include <linux/types.h>
0004
0005 #include <asm/perf_event.h>
0006 #include <asm/msr.h>
0007 #include <asm/insn.h>
0008
0009 #include "../perf_event.h"
0010
0011
0012
0013
0014
0015
0016
0017 #define LBR_KERNEL_BIT 0
0018 #define LBR_USER_BIT 1
0019 #define LBR_JCC_BIT 2
0020 #define LBR_REL_CALL_BIT 3
0021 #define LBR_IND_CALL_BIT 4
0022 #define LBR_RETURN_BIT 5
0023 #define LBR_IND_JMP_BIT 6
0024 #define LBR_REL_JMP_BIT 7
0025 #define LBR_FAR_BIT 8
0026 #define LBR_CALL_STACK_BIT 9
0027
0028
0029
0030
0031
0032
0033 #define LBR_NO_INFO_BIT 63
0034
0035 #define LBR_KERNEL (1 << LBR_KERNEL_BIT)
0036 #define LBR_USER (1 << LBR_USER_BIT)
0037 #define LBR_JCC (1 << LBR_JCC_BIT)
0038 #define LBR_REL_CALL (1 << LBR_REL_CALL_BIT)
0039 #define LBR_IND_CALL (1 << LBR_IND_CALL_BIT)
0040 #define LBR_RETURN (1 << LBR_RETURN_BIT)
0041 #define LBR_REL_JMP (1 << LBR_REL_JMP_BIT)
0042 #define LBR_IND_JMP (1 << LBR_IND_JMP_BIT)
0043 #define LBR_FAR (1 << LBR_FAR_BIT)
0044 #define LBR_CALL_STACK (1 << LBR_CALL_STACK_BIT)
0045 #define LBR_NO_INFO (1ULL << LBR_NO_INFO_BIT)
0046
0047 #define LBR_PLM (LBR_KERNEL | LBR_USER)
0048
0049 #define LBR_SEL_MASK 0x3ff
0050 #define LBR_NOT_SUPP -1
0051 #define LBR_IGN 0
0052
0053 #define LBR_ANY \
0054 (LBR_JCC |\
0055 LBR_REL_CALL |\
0056 LBR_IND_CALL |\
0057 LBR_RETURN |\
0058 LBR_REL_JMP |\
0059 LBR_IND_JMP |\
0060 LBR_FAR)
0061
0062 #define LBR_FROM_FLAG_MISPRED BIT_ULL(63)
0063 #define LBR_FROM_FLAG_IN_TX BIT_ULL(62)
0064 #define LBR_FROM_FLAG_ABORT BIT_ULL(61)
0065
0066 #define LBR_FROM_SIGNEXT_2MSB (BIT_ULL(60) | BIT_ULL(59))
0067
0068
0069
0070
0071
0072 enum {
0073 X86_BR_NONE = 0,
0074
0075 X86_BR_USER = 1 << 0,
0076 X86_BR_KERNEL = 1 << 1,
0077
0078 X86_BR_CALL = 1 << 2,
0079 X86_BR_RET = 1 << 3,
0080 X86_BR_SYSCALL = 1 << 4,
0081 X86_BR_SYSRET = 1 << 5,
0082 X86_BR_INT = 1 << 6,
0083 X86_BR_IRET = 1 << 7,
0084 X86_BR_JCC = 1 << 8,
0085 X86_BR_JMP = 1 << 9,
0086 X86_BR_IRQ = 1 << 10,
0087 X86_BR_IND_CALL = 1 << 11,
0088 X86_BR_ABORT = 1 << 12,
0089 X86_BR_IN_TX = 1 << 13,
0090 X86_BR_NO_TX = 1 << 14,
0091 X86_BR_ZERO_CALL = 1 << 15,
0092 X86_BR_CALL_STACK = 1 << 16,
0093 X86_BR_IND_JMP = 1 << 17,
0094
0095 X86_BR_TYPE_SAVE = 1 << 18,
0096
0097 };
0098
0099 #define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
0100 #define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX)
0101
0102 #define X86_BR_ANY \
0103 (X86_BR_CALL |\
0104 X86_BR_RET |\
0105 X86_BR_SYSCALL |\
0106 X86_BR_SYSRET |\
0107 X86_BR_INT |\
0108 X86_BR_IRET |\
0109 X86_BR_JCC |\
0110 X86_BR_JMP |\
0111 X86_BR_IRQ |\
0112 X86_BR_ABORT |\
0113 X86_BR_IND_CALL |\
0114 X86_BR_IND_JMP |\
0115 X86_BR_ZERO_CALL)
0116
0117 #define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
0118
0119 #define X86_BR_ANY_CALL \
0120 (X86_BR_CALL |\
0121 X86_BR_IND_CALL |\
0122 X86_BR_ZERO_CALL |\
0123 X86_BR_SYSCALL |\
0124 X86_BR_IRQ |\
0125 X86_BR_INT)
0126
0127
0128
0129
0130
0131
0132 #define ARCH_LBR_KERNEL_BIT 1
0133 #define ARCH_LBR_USER_BIT 2
0134 #define ARCH_LBR_CALL_STACK_BIT 3
0135 #define ARCH_LBR_JCC_BIT 16
0136 #define ARCH_LBR_REL_JMP_BIT 17
0137 #define ARCH_LBR_IND_JMP_BIT 18
0138 #define ARCH_LBR_REL_CALL_BIT 19
0139 #define ARCH_LBR_IND_CALL_BIT 20
0140 #define ARCH_LBR_RETURN_BIT 21
0141 #define ARCH_LBR_OTHER_BRANCH_BIT 22
0142
0143 #define ARCH_LBR_KERNEL (1ULL << ARCH_LBR_KERNEL_BIT)
0144 #define ARCH_LBR_USER (1ULL << ARCH_LBR_USER_BIT)
0145 #define ARCH_LBR_CALL_STACK (1ULL << ARCH_LBR_CALL_STACK_BIT)
0146 #define ARCH_LBR_JCC (1ULL << ARCH_LBR_JCC_BIT)
0147 #define ARCH_LBR_REL_JMP (1ULL << ARCH_LBR_REL_JMP_BIT)
0148 #define ARCH_LBR_IND_JMP (1ULL << ARCH_LBR_IND_JMP_BIT)
0149 #define ARCH_LBR_REL_CALL (1ULL << ARCH_LBR_REL_CALL_BIT)
0150 #define ARCH_LBR_IND_CALL (1ULL << ARCH_LBR_IND_CALL_BIT)
0151 #define ARCH_LBR_RETURN (1ULL << ARCH_LBR_RETURN_BIT)
0152 #define ARCH_LBR_OTHER_BRANCH (1ULL << ARCH_LBR_OTHER_BRANCH_BIT)
0153
0154 #define ARCH_LBR_ANY \
0155 (ARCH_LBR_JCC |\
0156 ARCH_LBR_REL_JMP |\
0157 ARCH_LBR_IND_JMP |\
0158 ARCH_LBR_REL_CALL |\
0159 ARCH_LBR_IND_CALL |\
0160 ARCH_LBR_RETURN |\
0161 ARCH_LBR_OTHER_BRANCH)
0162
0163 #define ARCH_LBR_CTL_MASK 0x7f000e
0164
0165 static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc);
0166
0167 static __always_inline bool is_lbr_call_stack_bit_set(u64 config)
0168 {
0169 if (static_cpu_has(X86_FEATURE_ARCH_LBR))
0170 return !!(config & ARCH_LBR_CALL_STACK);
0171
0172 return !!(config & LBR_CALL_STACK);
0173 }
0174
0175
0176
0177
0178
0179
0180 static void __intel_pmu_lbr_enable(bool pmi)
0181 {
0182 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
0183 u64 debugctl, lbr_select = 0, orig_debugctl;
0184
0185
0186
0187
0188
0189 if (pmi && x86_pmu.version >= 4)
0190 return;
0191
0192
0193
0194
0195
0196 if (cpuc->lbr_sel)
0197 lbr_select = cpuc->lbr_sel->config & x86_pmu.lbr_sel_mask;
0198 if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && !pmi && cpuc->lbr_sel)
0199 wrmsrl(MSR_LBR_SELECT, lbr_select);
0200
0201 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
0202 orig_debugctl = debugctl;
0203
0204 if (!static_cpu_has(X86_FEATURE_ARCH_LBR))
0205 debugctl |= DEBUGCTLMSR_LBR;
0206
0207
0208
0209
0210
0211 if (is_lbr_call_stack_bit_set(lbr_select))
0212 debugctl &= ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
0213 else
0214 debugctl |= DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
0215
0216 if (orig_debugctl != debugctl)
0217 wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
0218
0219 if (static_cpu_has(X86_FEATURE_ARCH_LBR))
0220 wrmsrl(MSR_ARCH_LBR_CTL, lbr_select | ARCH_LBR_CTL_LBREN);
0221 }
0222
0223 void intel_pmu_lbr_reset_32(void)
0224 {
0225 int i;
0226
0227 for (i = 0; i < x86_pmu.lbr_nr; i++)
0228 wrmsrl(x86_pmu.lbr_from + i, 0);
0229 }
0230
0231 void intel_pmu_lbr_reset_64(void)
0232 {
0233 int i;
0234
0235 for (i = 0; i < x86_pmu.lbr_nr; i++) {
0236 wrmsrl(x86_pmu.lbr_from + i, 0);
0237 wrmsrl(x86_pmu.lbr_to + i, 0);
0238 if (x86_pmu.lbr_has_info)
0239 wrmsrl(x86_pmu.lbr_info + i, 0);
0240 }
0241 }
0242
0243 static void intel_pmu_arch_lbr_reset(void)
0244 {
0245
0246 wrmsrl(MSR_ARCH_LBR_DEPTH, x86_pmu.lbr_nr);
0247 }
0248
0249 void intel_pmu_lbr_reset(void)
0250 {
0251 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
0252
0253 if (!x86_pmu.lbr_nr)
0254 return;
0255
0256 x86_pmu.lbr_reset();
0257
0258 cpuc->last_task_ctx = NULL;
0259 cpuc->last_log_id = 0;
0260 if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && cpuc->lbr_select)
0261 wrmsrl(MSR_LBR_SELECT, 0);
0262 }
0263
0264
0265
0266
0267 static inline u64 intel_pmu_lbr_tos(void)
0268 {
0269 u64 tos;
0270
0271 rdmsrl(x86_pmu.lbr_tos, tos);
0272 return tos;
0273 }
0274
0275 enum {
0276 LBR_NONE,
0277 LBR_VALID,
0278 };
0279
0280
0281
0282
0283
0284
0285
0286
0287
0288
0289
0290
0291
0292
0293
0294
0295
0296
0297
0298 static inline bool lbr_from_signext_quirk_needed(void)
0299 {
0300 bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) ||
0301 boot_cpu_has(X86_FEATURE_RTM);
0302
0303 return !tsx_support;
0304 }
0305
0306 static DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key);
0307
0308
0309 inline u64 lbr_from_signext_quirk_wr(u64 val)
0310 {
0311 if (static_branch_unlikely(&lbr_from_quirk_key)) {
0312
0313
0314
0315
0316
0317
0318
0319
0320
0321 val |= (LBR_FROM_SIGNEXT_2MSB & val) << 2;
0322 }
0323 return val;
0324 }
0325
0326
0327
0328
0329 static u64 lbr_from_signext_quirk_rd(u64 val)
0330 {
0331 if (static_branch_unlikely(&lbr_from_quirk_key)) {
0332
0333
0334
0335
0336 val &= ~(LBR_FROM_FLAG_IN_TX | LBR_FROM_FLAG_ABORT);
0337 }
0338 return val;
0339 }
0340
0341 static __always_inline void wrlbr_from(unsigned int idx, u64 val)
0342 {
0343 val = lbr_from_signext_quirk_wr(val);
0344 wrmsrl(x86_pmu.lbr_from + idx, val);
0345 }
0346
0347 static __always_inline void wrlbr_to(unsigned int idx, u64 val)
0348 {
0349 wrmsrl(x86_pmu.lbr_to + idx, val);
0350 }
0351
0352 static __always_inline void wrlbr_info(unsigned int idx, u64 val)
0353 {
0354 wrmsrl(x86_pmu.lbr_info + idx, val);
0355 }
0356
0357 static __always_inline u64 rdlbr_from(unsigned int idx, struct lbr_entry *lbr)
0358 {
0359 u64 val;
0360
0361 if (lbr)
0362 return lbr->from;
0363
0364 rdmsrl(x86_pmu.lbr_from + idx, val);
0365
0366 return lbr_from_signext_quirk_rd(val);
0367 }
0368
0369 static __always_inline u64 rdlbr_to(unsigned int idx, struct lbr_entry *lbr)
0370 {
0371 u64 val;
0372
0373 if (lbr)
0374 return lbr->to;
0375
0376 rdmsrl(x86_pmu.lbr_to + idx, val);
0377
0378 return val;
0379 }
0380
0381 static __always_inline u64 rdlbr_info(unsigned int idx, struct lbr_entry *lbr)
0382 {
0383 u64 val;
0384
0385 if (lbr)
0386 return lbr->info;
0387
0388 rdmsrl(x86_pmu.lbr_info + idx, val);
0389
0390 return val;
0391 }
0392
0393 static inline void
0394 wrlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info)
0395 {
0396 wrlbr_from(idx, lbr->from);
0397 wrlbr_to(idx, lbr->to);
0398 if (need_info)
0399 wrlbr_info(idx, lbr->info);
0400 }
0401
0402 static inline bool
0403 rdlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info)
0404 {
0405 u64 from = rdlbr_from(idx, NULL);
0406
0407
0408 if (!from)
0409 return false;
0410
0411 lbr->from = from;
0412 lbr->to = rdlbr_to(idx, NULL);
0413 if (need_info)
0414 lbr->info = rdlbr_info(idx, NULL);
0415
0416 return true;
0417 }
0418
0419 void intel_pmu_lbr_restore(void *ctx)
0420 {
0421 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
0422 struct x86_perf_task_context *task_ctx = ctx;
0423 bool need_info = x86_pmu.lbr_has_info;
0424 u64 tos = task_ctx->tos;
0425 unsigned lbr_idx, mask;
0426 int i;
0427
0428 mask = x86_pmu.lbr_nr - 1;
0429 for (i = 0; i < task_ctx->valid_lbrs; i++) {
0430 lbr_idx = (tos - i) & mask;
0431 wrlbr_all(&task_ctx->lbr[i], lbr_idx, need_info);
0432 }
0433
0434 for (; i < x86_pmu.lbr_nr; i++) {
0435 lbr_idx = (tos - i) & mask;
0436 wrlbr_from(lbr_idx, 0);
0437 wrlbr_to(lbr_idx, 0);
0438 if (need_info)
0439 wrlbr_info(lbr_idx, 0);
0440 }
0441
0442 wrmsrl(x86_pmu.lbr_tos, tos);
0443
0444 if (cpuc->lbr_select)
0445 wrmsrl(MSR_LBR_SELECT, task_ctx->lbr_sel);
0446 }
0447
0448 static void intel_pmu_arch_lbr_restore(void *ctx)
0449 {
0450 struct x86_perf_task_context_arch_lbr *task_ctx = ctx;
0451 struct lbr_entry *entries = task_ctx->entries;
0452 int i;
0453
0454
0455 if (!entries[x86_pmu.lbr_nr - 1].from)
0456 intel_pmu_arch_lbr_reset();
0457
0458 for (i = 0; i < x86_pmu.lbr_nr; i++) {
0459 if (!entries[i].from)
0460 break;
0461 wrlbr_all(&entries[i], i, true);
0462 }
0463 }
0464
0465
0466
0467
0468
0469 static void intel_pmu_arch_lbr_xrstors(void *ctx)
0470 {
0471 struct x86_perf_task_context_arch_lbr_xsave *task_ctx = ctx;
0472
0473 xrstors(&task_ctx->xsave, XFEATURE_MASK_LBR);
0474 }
0475
0476 static __always_inline bool lbr_is_reset_in_cstate(void *ctx)
0477 {
0478 if (static_cpu_has(X86_FEATURE_ARCH_LBR))
0479 return x86_pmu.lbr_deep_c_reset && !rdlbr_from(0, NULL);
0480
0481 return !rdlbr_from(((struct x86_perf_task_context *)ctx)->tos, NULL);
0482 }
0483
0484 static void __intel_pmu_lbr_restore(void *ctx)
0485 {
0486 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
0487
0488 if (task_context_opt(ctx)->lbr_callstack_users == 0 ||
0489 task_context_opt(ctx)->lbr_stack_state == LBR_NONE) {
0490 intel_pmu_lbr_reset();
0491 return;
0492 }
0493
0494
0495
0496
0497
0498
0499 if ((ctx == cpuc->last_task_ctx) &&
0500 (task_context_opt(ctx)->log_id == cpuc->last_log_id) &&
0501 !lbr_is_reset_in_cstate(ctx)) {
0502 task_context_opt(ctx)->lbr_stack_state = LBR_NONE;
0503 return;
0504 }
0505
0506 x86_pmu.lbr_restore(ctx);
0507
0508 task_context_opt(ctx)->lbr_stack_state = LBR_NONE;
0509 }
0510
0511 void intel_pmu_lbr_save(void *ctx)
0512 {
0513 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
0514 struct x86_perf_task_context *task_ctx = ctx;
0515 bool need_info = x86_pmu.lbr_has_info;
0516 unsigned lbr_idx, mask;
0517 u64 tos;
0518 int i;
0519
0520 mask = x86_pmu.lbr_nr - 1;
0521 tos = intel_pmu_lbr_tos();
0522 for (i = 0; i < x86_pmu.lbr_nr; i++) {
0523 lbr_idx = (tos - i) & mask;
0524 if (!rdlbr_all(&task_ctx->lbr[i], lbr_idx, need_info))
0525 break;
0526 }
0527 task_ctx->valid_lbrs = i;
0528 task_ctx->tos = tos;
0529
0530 if (cpuc->lbr_select)
0531 rdmsrl(MSR_LBR_SELECT, task_ctx->lbr_sel);
0532 }
0533
0534 static void intel_pmu_arch_lbr_save(void *ctx)
0535 {
0536 struct x86_perf_task_context_arch_lbr *task_ctx = ctx;
0537 struct lbr_entry *entries = task_ctx->entries;
0538 int i;
0539
0540 for (i = 0; i < x86_pmu.lbr_nr; i++) {
0541 if (!rdlbr_all(&entries[i], i, true))
0542 break;
0543 }
0544
0545
0546 if (i < x86_pmu.lbr_nr)
0547 entries[x86_pmu.lbr_nr - 1].from = 0;
0548 }
0549
0550
0551
0552
0553
0554 static void intel_pmu_arch_lbr_xsaves(void *ctx)
0555 {
0556 struct x86_perf_task_context_arch_lbr_xsave *task_ctx = ctx;
0557
0558 xsaves(&task_ctx->xsave, XFEATURE_MASK_LBR);
0559 }
0560
0561 static void __intel_pmu_lbr_save(void *ctx)
0562 {
0563 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
0564
0565 if (task_context_opt(ctx)->lbr_callstack_users == 0) {
0566 task_context_opt(ctx)->lbr_stack_state = LBR_NONE;
0567 return;
0568 }
0569
0570 x86_pmu.lbr_save(ctx);
0571
0572 task_context_opt(ctx)->lbr_stack_state = LBR_VALID;
0573
0574 cpuc->last_task_ctx = ctx;
0575 cpuc->last_log_id = ++task_context_opt(ctx)->log_id;
0576 }
0577
0578 void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev,
0579 struct perf_event_context *next)
0580 {
0581 void *prev_ctx_data, *next_ctx_data;
0582
0583 swap(prev->task_ctx_data, next->task_ctx_data);
0584
0585
0586
0587
0588
0589
0590
0591 prev_ctx_data = next->task_ctx_data;
0592 next_ctx_data = prev->task_ctx_data;
0593
0594 if (!prev_ctx_data || !next_ctx_data)
0595 return;
0596
0597 swap(task_context_opt(prev_ctx_data)->lbr_callstack_users,
0598 task_context_opt(next_ctx_data)->lbr_callstack_users);
0599 }
0600
0601 void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
0602 {
0603 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
0604 void *task_ctx;
0605
0606 if (!cpuc->lbr_users)
0607 return;
0608
0609
0610
0611
0612
0613
0614 task_ctx = ctx ? ctx->task_ctx_data : NULL;
0615 if (task_ctx) {
0616 if (sched_in)
0617 __intel_pmu_lbr_restore(task_ctx);
0618 else
0619 __intel_pmu_lbr_save(task_ctx);
0620 return;
0621 }
0622
0623
0624
0625
0626
0627
0628
0629 if (sched_in)
0630 intel_pmu_lbr_reset();
0631 }
0632
0633 static inline bool branch_user_callstack(unsigned br_sel)
0634 {
0635 return (br_sel & X86_BR_USER) && (br_sel & X86_BR_CALL_STACK);
0636 }
0637
0638 void intel_pmu_lbr_add(struct perf_event *event)
0639 {
0640 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
0641
0642 if (!x86_pmu.lbr_nr)
0643 return;
0644
0645 if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT)
0646 cpuc->lbr_select = 1;
0647
0648 cpuc->br_sel = event->hw.branch_reg.reg;
0649
0650 if (branch_user_callstack(cpuc->br_sel) && event->ctx->task_ctx_data)
0651 task_context_opt(event->ctx->task_ctx_data)->lbr_callstack_users++;
0652
0653
0654
0655
0656
0657
0658
0659
0660
0661
0662
0663
0664
0665
0666
0667
0668
0669
0670
0671
0672 if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0)
0673 cpuc->lbr_pebs_users++;
0674 perf_sched_cb_inc(event->ctx->pmu);
0675 if (!cpuc->lbr_users++ && !event->total_time_running)
0676 intel_pmu_lbr_reset();
0677 }
0678
0679 void release_lbr_buffers(void)
0680 {
0681 struct kmem_cache *kmem_cache;
0682 struct cpu_hw_events *cpuc;
0683 int cpu;
0684
0685 if (!static_cpu_has(X86_FEATURE_ARCH_LBR))
0686 return;
0687
0688 for_each_possible_cpu(cpu) {
0689 cpuc = per_cpu_ptr(&cpu_hw_events, cpu);
0690 kmem_cache = x86_get_pmu(cpu)->task_ctx_cache;
0691 if (kmem_cache && cpuc->lbr_xsave) {
0692 kmem_cache_free(kmem_cache, cpuc->lbr_xsave);
0693 cpuc->lbr_xsave = NULL;
0694 }
0695 }
0696 }
0697
0698 void reserve_lbr_buffers(void)
0699 {
0700 struct kmem_cache *kmem_cache;
0701 struct cpu_hw_events *cpuc;
0702 int cpu;
0703
0704 if (!static_cpu_has(X86_FEATURE_ARCH_LBR))
0705 return;
0706
0707 for_each_possible_cpu(cpu) {
0708 cpuc = per_cpu_ptr(&cpu_hw_events, cpu);
0709 kmem_cache = x86_get_pmu(cpu)->task_ctx_cache;
0710 if (!kmem_cache || cpuc->lbr_xsave)
0711 continue;
0712
0713 cpuc->lbr_xsave = kmem_cache_alloc_node(kmem_cache,
0714 GFP_KERNEL | __GFP_ZERO,
0715 cpu_to_node(cpu));
0716 }
0717 }
0718
0719 void intel_pmu_lbr_del(struct perf_event *event)
0720 {
0721 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
0722
0723 if (!x86_pmu.lbr_nr)
0724 return;
0725
0726 if (branch_user_callstack(cpuc->br_sel) &&
0727 event->ctx->task_ctx_data)
0728 task_context_opt(event->ctx->task_ctx_data)->lbr_callstack_users--;
0729
0730 if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT)
0731 cpuc->lbr_select = 0;
0732
0733 if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0)
0734 cpuc->lbr_pebs_users--;
0735 cpuc->lbr_users--;
0736 WARN_ON_ONCE(cpuc->lbr_users < 0);
0737 WARN_ON_ONCE(cpuc->lbr_pebs_users < 0);
0738 perf_sched_cb_dec(event->ctx->pmu);
0739 }
0740
0741 static inline bool vlbr_exclude_host(void)
0742 {
0743 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
0744
0745 return test_bit(INTEL_PMC_IDX_FIXED_VLBR,
0746 (unsigned long *)&cpuc->intel_ctrl_guest_mask);
0747 }
0748
0749 void intel_pmu_lbr_enable_all(bool pmi)
0750 {
0751 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
0752
0753 if (cpuc->lbr_users && !vlbr_exclude_host())
0754 __intel_pmu_lbr_enable(pmi);
0755 }
0756
0757 void intel_pmu_lbr_disable_all(void)
0758 {
0759 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
0760
0761 if (cpuc->lbr_users && !vlbr_exclude_host()) {
0762 if (static_cpu_has(X86_FEATURE_ARCH_LBR))
0763 return __intel_pmu_arch_lbr_disable();
0764
0765 __intel_pmu_lbr_disable();
0766 }
0767 }
0768
0769 void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
0770 {
0771 unsigned long mask = x86_pmu.lbr_nr - 1;
0772 struct perf_branch_entry *br = cpuc->lbr_entries;
0773 u64 tos = intel_pmu_lbr_tos();
0774 int i;
0775
0776 for (i = 0; i < x86_pmu.lbr_nr; i++) {
0777 unsigned long lbr_idx = (tos - i) & mask;
0778 union {
0779 struct {
0780 u32 from;
0781 u32 to;
0782 };
0783 u64 lbr;
0784 } msr_lastbranch;
0785
0786 rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
0787
0788 perf_clear_branch_entry_bitfields(br);
0789
0790 br->from = msr_lastbranch.from;
0791 br->to = msr_lastbranch.to;
0792 br++;
0793 }
0794 cpuc->lbr_stack.nr = i;
0795 cpuc->lbr_stack.hw_idx = tos;
0796 }
0797
0798
0799
0800
0801
0802
0803 void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
0804 {
0805 bool need_info = false, call_stack = false;
0806 unsigned long mask = x86_pmu.lbr_nr - 1;
0807 struct perf_branch_entry *br = cpuc->lbr_entries;
0808 u64 tos = intel_pmu_lbr_tos();
0809 int i;
0810 int out = 0;
0811 int num = x86_pmu.lbr_nr;
0812
0813 if (cpuc->lbr_sel) {
0814 need_info = !(cpuc->lbr_sel->config & LBR_NO_INFO);
0815 if (cpuc->lbr_sel->config & LBR_CALL_STACK)
0816 call_stack = true;
0817 }
0818
0819 for (i = 0; i < num; i++) {
0820 unsigned long lbr_idx = (tos - i) & mask;
0821 u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
0822 u16 cycles = 0;
0823
0824 from = rdlbr_from(lbr_idx, NULL);
0825 to = rdlbr_to(lbr_idx, NULL);
0826
0827
0828
0829
0830
0831 if (call_stack && !from)
0832 break;
0833
0834 if (x86_pmu.lbr_has_info) {
0835 if (need_info) {
0836 u64 info;
0837
0838 info = rdlbr_info(lbr_idx, NULL);
0839 mis = !!(info & LBR_INFO_MISPRED);
0840 pred = !mis;
0841 cycles = (info & LBR_INFO_CYCLES);
0842 if (x86_pmu.lbr_has_tsx) {
0843 in_tx = !!(info & LBR_INFO_IN_TX);
0844 abort = !!(info & LBR_INFO_ABORT);
0845 }
0846 }
0847 } else {
0848 int skip = 0;
0849
0850 if (x86_pmu.lbr_from_flags) {
0851 mis = !!(from & LBR_FROM_FLAG_MISPRED);
0852 pred = !mis;
0853 skip = 1;
0854 }
0855 if (x86_pmu.lbr_has_tsx) {
0856 in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
0857 abort = !!(from & LBR_FROM_FLAG_ABORT);
0858 skip = 3;
0859 }
0860 from = (u64)((((s64)from) << skip) >> skip);
0861
0862 if (x86_pmu.lbr_to_cycles) {
0863 cycles = ((to >> 48) & LBR_INFO_CYCLES);
0864 to = (u64)((((s64)to) << 16) >> 16);
0865 }
0866 }
0867
0868
0869
0870
0871
0872
0873
0874
0875
0876 if (abort && x86_pmu.lbr_double_abort && out > 0)
0877 out--;
0878
0879 perf_clear_branch_entry_bitfields(br+out);
0880 br[out].from = from;
0881 br[out].to = to;
0882 br[out].mispred = mis;
0883 br[out].predicted = pred;
0884 br[out].in_tx = in_tx;
0885 br[out].abort = abort;
0886 br[out].cycles = cycles;
0887 out++;
0888 }
0889 cpuc->lbr_stack.nr = out;
0890 cpuc->lbr_stack.hw_idx = tos;
0891 }
0892
0893 static DEFINE_STATIC_KEY_FALSE(x86_lbr_mispred);
0894 static DEFINE_STATIC_KEY_FALSE(x86_lbr_cycles);
0895 static DEFINE_STATIC_KEY_FALSE(x86_lbr_type);
0896
0897 static __always_inline int get_lbr_br_type(u64 info)
0898 {
0899 int type = 0;
0900
0901 if (static_branch_likely(&x86_lbr_type))
0902 type = (info & LBR_INFO_BR_TYPE) >> LBR_INFO_BR_TYPE_OFFSET;
0903
0904 return type;
0905 }
0906
0907 static __always_inline bool get_lbr_mispred(u64 info)
0908 {
0909 bool mispred = 0;
0910
0911 if (static_branch_likely(&x86_lbr_mispred))
0912 mispred = !!(info & LBR_INFO_MISPRED);
0913
0914 return mispred;
0915 }
0916
0917 static __always_inline u16 get_lbr_cycles(u64 info)
0918 {
0919 u16 cycles = info & LBR_INFO_CYCLES;
0920
0921 if (static_cpu_has(X86_FEATURE_ARCH_LBR) &&
0922 (!static_branch_likely(&x86_lbr_cycles) ||
0923 !(info & LBR_INFO_CYC_CNT_VALID)))
0924 cycles = 0;
0925
0926 return cycles;
0927 }
0928
0929 static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
0930 struct lbr_entry *entries)
0931 {
0932 struct perf_branch_entry *e;
0933 struct lbr_entry *lbr;
0934 u64 from, to, info;
0935 int i;
0936
0937 for (i = 0; i < x86_pmu.lbr_nr; i++) {
0938 lbr = entries ? &entries[i] : NULL;
0939 e = &cpuc->lbr_entries[i];
0940
0941 from = rdlbr_from(i, lbr);
0942
0943
0944
0945 if (!from)
0946 break;
0947
0948 to = rdlbr_to(i, lbr);
0949 info = rdlbr_info(i, lbr);
0950
0951 perf_clear_branch_entry_bitfields(e);
0952
0953 e->from = from;
0954 e->to = to;
0955 e->mispred = get_lbr_mispred(info);
0956 e->predicted = !e->mispred;
0957 e->in_tx = !!(info & LBR_INFO_IN_TX);
0958 e->abort = !!(info & LBR_INFO_ABORT);
0959 e->cycles = get_lbr_cycles(info);
0960 e->type = get_lbr_br_type(info);
0961 }
0962
0963 cpuc->lbr_stack.nr = i;
0964 }
0965
0966 static void intel_pmu_arch_lbr_read(struct cpu_hw_events *cpuc)
0967 {
0968 intel_pmu_store_lbr(cpuc, NULL);
0969 }
0970
0971 static void intel_pmu_arch_lbr_read_xsave(struct cpu_hw_events *cpuc)
0972 {
0973 struct x86_perf_task_context_arch_lbr_xsave *xsave = cpuc->lbr_xsave;
0974
0975 if (!xsave) {
0976 intel_pmu_store_lbr(cpuc, NULL);
0977 return;
0978 }
0979 xsaves(&xsave->xsave, XFEATURE_MASK_LBR);
0980
0981 intel_pmu_store_lbr(cpuc, xsave->lbr.entries);
0982 }
0983
0984 void intel_pmu_lbr_read(void)
0985 {
0986 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
0987
0988
0989
0990
0991
0992
0993
0994 if (!cpuc->lbr_users || vlbr_exclude_host() ||
0995 cpuc->lbr_users == cpuc->lbr_pebs_users)
0996 return;
0997
0998 x86_pmu.lbr_read(cpuc);
0999
1000 intel_pmu_lbr_filter(cpuc);
1001 }
1002
1003
1004
1005
1006
1007
1008 static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
1009 {
1010 u64 br_type = event->attr.branch_sample_type;
1011 int mask = 0;
1012
1013 if (br_type & PERF_SAMPLE_BRANCH_USER)
1014 mask |= X86_BR_USER;
1015
1016 if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
1017 mask |= X86_BR_KERNEL;
1018
1019
1020
1021 if (br_type & PERF_SAMPLE_BRANCH_ANY)
1022 mask |= X86_BR_ANY;
1023
1024 if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL)
1025 mask |= X86_BR_ANY_CALL;
1026
1027 if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
1028 mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET;
1029
1030 if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
1031 mask |= X86_BR_IND_CALL;
1032
1033 if (br_type & PERF_SAMPLE_BRANCH_ABORT_TX)
1034 mask |= X86_BR_ABORT;
1035
1036 if (br_type & PERF_SAMPLE_BRANCH_IN_TX)
1037 mask |= X86_BR_IN_TX;
1038
1039 if (br_type & PERF_SAMPLE_BRANCH_NO_TX)
1040 mask |= X86_BR_NO_TX;
1041
1042 if (br_type & PERF_SAMPLE_BRANCH_COND)
1043 mask |= X86_BR_JCC;
1044
1045 if (br_type & PERF_SAMPLE_BRANCH_CALL_STACK) {
1046 if (!x86_pmu_has_lbr_callstack())
1047 return -EOPNOTSUPP;
1048 if (mask & ~(X86_BR_USER | X86_BR_KERNEL))
1049 return -EINVAL;
1050 mask |= X86_BR_CALL | X86_BR_IND_CALL | X86_BR_RET |
1051 X86_BR_CALL_STACK;
1052 }
1053
1054 if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP)
1055 mask |= X86_BR_IND_JMP;
1056
1057 if (br_type & PERF_SAMPLE_BRANCH_CALL)
1058 mask |= X86_BR_CALL | X86_BR_ZERO_CALL;
1059
1060 if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE)
1061 mask |= X86_BR_TYPE_SAVE;
1062
1063
1064
1065
1066
1067 event->hw.branch_reg.reg = mask;
1068 return 0;
1069 }
1070
1071
1072
1073
1074
1075
1076 static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
1077 {
1078 struct hw_perf_event_extra *reg;
1079 u64 br_type = event->attr.branch_sample_type;
1080 u64 mask = 0, v;
1081 int i;
1082
1083 for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) {
1084 if (!(br_type & (1ULL << i)))
1085 continue;
1086
1087 v = x86_pmu.lbr_sel_map[i];
1088 if (v == LBR_NOT_SUPP)
1089 return -EOPNOTSUPP;
1090
1091 if (v != LBR_IGN)
1092 mask |= v;
1093 }
1094
1095 reg = &event->hw.branch_reg;
1096 reg->idx = EXTRA_REG_LBR;
1097
1098 if (static_cpu_has(X86_FEATURE_ARCH_LBR)) {
1099 reg->config = mask;
1100
1101
1102
1103
1104
1105
1106
1107 reg->reg |= X86_BR_TYPE_SAVE;
1108 return 0;
1109 }
1110
1111
1112
1113
1114
1115
1116
1117
1118 reg->config = mask ^ (x86_pmu.lbr_sel_mask & ~LBR_CALL_STACK);
1119
1120 if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) &&
1121 (br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) &&
1122 x86_pmu.lbr_has_info)
1123 reg->config |= LBR_NO_INFO;
1124
1125 return 0;
1126 }
1127
1128 int intel_pmu_setup_lbr_filter(struct perf_event *event)
1129 {
1130 int ret = 0;
1131
1132
1133
1134
1135 if (!x86_pmu.lbr_nr)
1136 return -EOPNOTSUPP;
1137
1138
1139
1140
1141 ret = intel_pmu_setup_sw_lbr_filter(event);
1142 if (ret)
1143 return ret;
1144
1145
1146
1147
1148 if (x86_pmu.lbr_sel_map)
1149 ret = intel_pmu_setup_hw_lbr_filter(event);
1150
1151 return ret;
1152 }
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165 static int branch_type(unsigned long from, unsigned long to, int abort)
1166 {
1167 struct insn insn;
1168 void *addr;
1169 int bytes_read, bytes_left;
1170 int ret = X86_BR_NONE;
1171 int ext, to_plm, from_plm;
1172 u8 buf[MAX_INSN_SIZE];
1173 int is64 = 0;
1174
1175 to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
1176 from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER;
1177
1178
1179
1180
1181
1182 if (from == 0 || to == 0)
1183 return X86_BR_NONE;
1184
1185 if (abort)
1186 return X86_BR_ABORT | to_plm;
1187
1188 if (from_plm == X86_BR_USER) {
1189
1190
1191
1192
1193 if (!current->mm)
1194 return X86_BR_NONE;
1195
1196
1197 bytes_left = copy_from_user_nmi(buf, (void __user *)from,
1198 MAX_INSN_SIZE);
1199 bytes_read = MAX_INSN_SIZE - bytes_left;
1200 if (!bytes_read)
1201 return X86_BR_NONE;
1202
1203 addr = buf;
1204 } else {
1205
1206
1207
1208
1209
1210
1211 if (kernel_text_address(from)) {
1212 addr = (void *)from;
1213
1214
1215
1216
1217
1218
1219
1220 bytes_read = MAX_INSN_SIZE;
1221 } else {
1222 return X86_BR_NONE;
1223 }
1224 }
1225
1226
1227
1228
1229
1230 #ifdef CONFIG_X86_64
1231 is64 = kernel_ip((unsigned long)addr) || any_64bit_mode(current_pt_regs());
1232 #endif
1233 insn_init(&insn, addr, bytes_read, is64);
1234 if (insn_get_opcode(&insn))
1235 return X86_BR_ABORT;
1236
1237 switch (insn.opcode.bytes[0]) {
1238 case 0xf:
1239 switch (insn.opcode.bytes[1]) {
1240 case 0x05:
1241 case 0x34:
1242 ret = X86_BR_SYSCALL;
1243 break;
1244 case 0x07:
1245 case 0x35:
1246 ret = X86_BR_SYSRET;
1247 break;
1248 case 0x80 ... 0x8f:
1249 ret = X86_BR_JCC;
1250 break;
1251 default:
1252 ret = X86_BR_NONE;
1253 }
1254 break;
1255 case 0x70 ... 0x7f:
1256 ret = X86_BR_JCC;
1257 break;
1258 case 0xc2:
1259 case 0xc3:
1260 case 0xca:
1261 case 0xcb:
1262 ret = X86_BR_RET;
1263 break;
1264 case 0xcf:
1265 ret = X86_BR_IRET;
1266 break;
1267 case 0xcc ... 0xce:
1268 ret = X86_BR_INT;
1269 break;
1270 case 0xe8:
1271 if (insn_get_immediate(&insn) || insn.immediate1.value == 0) {
1272
1273 ret = X86_BR_ZERO_CALL;
1274 break;
1275 }
1276 fallthrough;
1277 case 0x9a:
1278 ret = X86_BR_CALL;
1279 break;
1280 case 0xe0 ... 0xe3:
1281 ret = X86_BR_JCC;
1282 break;
1283 case 0xe9 ... 0xeb:
1284 ret = X86_BR_JMP;
1285 break;
1286 case 0xff:
1287 if (insn_get_modrm(&insn))
1288 return X86_BR_ABORT;
1289
1290 ext = (insn.modrm.bytes[0] >> 3) & 0x7;
1291 switch (ext) {
1292 case 2:
1293 case 3:
1294 ret = X86_BR_IND_CALL;
1295 break;
1296 case 4:
1297 case 5:
1298 ret = X86_BR_IND_JMP;
1299 break;
1300 }
1301 break;
1302 default:
1303 ret = X86_BR_NONE;
1304 }
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316 if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL
1317 && ret != X86_BR_SYSCALL && ret != X86_BR_INT)
1318 ret = X86_BR_IRQ;
1319
1320
1321
1322
1323
1324 if (ret != X86_BR_NONE)
1325 ret |= to_plm;
1326
1327 return ret;
1328 }
1329
1330 #define X86_BR_TYPE_MAP_MAX 16
1331
1332 static int branch_map[X86_BR_TYPE_MAP_MAX] = {
1333 PERF_BR_CALL,
1334 PERF_BR_RET,
1335 PERF_BR_SYSCALL,
1336 PERF_BR_SYSRET,
1337 PERF_BR_UNKNOWN,
1338 PERF_BR_ERET,
1339 PERF_BR_COND,
1340 PERF_BR_UNCOND,
1341 PERF_BR_IRQ,
1342 PERF_BR_IND_CALL,
1343 PERF_BR_UNKNOWN,
1344 PERF_BR_UNKNOWN,
1345 PERF_BR_UNKNOWN,
1346 PERF_BR_CALL,
1347 PERF_BR_UNKNOWN,
1348 PERF_BR_IND,
1349 };
1350
1351 static int
1352 common_branch_type(int type)
1353 {
1354 int i;
1355
1356 type >>= 2;
1357
1358 if (type) {
1359 i = __ffs(type);
1360 if (i < X86_BR_TYPE_MAP_MAX)
1361 return branch_map[i];
1362 }
1363
1364 return PERF_BR_UNKNOWN;
1365 }
1366
1367 enum {
1368 ARCH_LBR_BR_TYPE_JCC = 0,
1369 ARCH_LBR_BR_TYPE_NEAR_IND_JMP = 1,
1370 ARCH_LBR_BR_TYPE_NEAR_REL_JMP = 2,
1371 ARCH_LBR_BR_TYPE_NEAR_IND_CALL = 3,
1372 ARCH_LBR_BR_TYPE_NEAR_REL_CALL = 4,
1373 ARCH_LBR_BR_TYPE_NEAR_RET = 5,
1374 ARCH_LBR_BR_TYPE_KNOWN_MAX = ARCH_LBR_BR_TYPE_NEAR_RET,
1375
1376 ARCH_LBR_BR_TYPE_MAP_MAX = 16,
1377 };
1378
1379 static const int arch_lbr_br_type_map[ARCH_LBR_BR_TYPE_MAP_MAX] = {
1380 [ARCH_LBR_BR_TYPE_JCC] = X86_BR_JCC,
1381 [ARCH_LBR_BR_TYPE_NEAR_IND_JMP] = X86_BR_IND_JMP,
1382 [ARCH_LBR_BR_TYPE_NEAR_REL_JMP] = X86_BR_JMP,
1383 [ARCH_LBR_BR_TYPE_NEAR_IND_CALL] = X86_BR_IND_CALL,
1384 [ARCH_LBR_BR_TYPE_NEAR_REL_CALL] = X86_BR_CALL,
1385 [ARCH_LBR_BR_TYPE_NEAR_RET] = X86_BR_RET,
1386 };
1387
1388
1389
1390
1391
1392
1393
1394
1395 static void
1396 intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
1397 {
1398 u64 from, to;
1399 int br_sel = cpuc->br_sel;
1400 int i, j, type, to_plm;
1401 bool compress = false;
1402
1403
1404 if (((br_sel & X86_BR_ALL) == X86_BR_ALL) &&
1405 ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE))
1406 return;
1407
1408 for (i = 0; i < cpuc->lbr_stack.nr; i++) {
1409
1410 from = cpuc->lbr_entries[i].from;
1411 to = cpuc->lbr_entries[i].to;
1412 type = cpuc->lbr_entries[i].type;
1413
1414
1415
1416
1417
1418
1419 if (static_cpu_has(X86_FEATURE_ARCH_LBR) &&
1420 type <= ARCH_LBR_BR_TYPE_KNOWN_MAX) {
1421 to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
1422 type = arch_lbr_br_type_map[type] | to_plm;
1423 } else
1424 type = branch_type(from, to, cpuc->lbr_entries[i].abort);
1425 if (type != X86_BR_NONE && (br_sel & X86_BR_ANYTX)) {
1426 if (cpuc->lbr_entries[i].in_tx)
1427 type |= X86_BR_IN_TX;
1428 else
1429 type |= X86_BR_NO_TX;
1430 }
1431
1432
1433 if (type == X86_BR_NONE || (br_sel & type) != type) {
1434 cpuc->lbr_entries[i].from = 0;
1435 compress = true;
1436 }
1437
1438 if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE)
1439 cpuc->lbr_entries[i].type = common_branch_type(type);
1440 }
1441
1442 if (!compress)
1443 return;
1444
1445
1446 for (i = 0; i < cpuc->lbr_stack.nr; ) {
1447 if (!cpuc->lbr_entries[i].from) {
1448 j = i;
1449 while (++j < cpuc->lbr_stack.nr)
1450 cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j];
1451 cpuc->lbr_stack.nr--;
1452 if (!cpuc->lbr_entries[i].from)
1453 continue;
1454 }
1455 i++;
1456 }
1457 }
1458
1459 void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr)
1460 {
1461 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1462
1463
1464 if (static_cpu_has(X86_FEATURE_ARCH_LBR) ||
1465 (cpuc->n_pebs == cpuc->n_large_pebs))
1466 cpuc->lbr_stack.hw_idx = -1ULL;
1467 else
1468 cpuc->lbr_stack.hw_idx = intel_pmu_lbr_tos();
1469
1470 intel_pmu_store_lbr(cpuc, lbr);
1471 intel_pmu_lbr_filter(cpuc);
1472 }
1473
1474
1475
1476
1477 static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
1478 [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY,
1479 [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER,
1480 [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL,
1481 [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN,
1482 [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_REL_JMP
1483 | LBR_IND_JMP | LBR_FAR,
1484
1485
1486
1487 [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] =
1488 LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR,
1489
1490
1491
1492 [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL | LBR_IND_JMP,
1493 [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
1494 [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
1495 };
1496
1497 static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
1498 [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY,
1499 [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER,
1500 [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL,
1501 [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN,
1502 [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR,
1503 [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL
1504 | LBR_FAR,
1505 [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL,
1506 [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
1507 [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
1508 [PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL,
1509 };
1510
1511 static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
1512 [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY,
1513 [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER,
1514 [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL,
1515 [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN,
1516 [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR,
1517 [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL
1518 | LBR_FAR,
1519 [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL,
1520 [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
1521 [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_REL_CALL | LBR_IND_CALL
1522 | LBR_RETURN | LBR_CALL_STACK,
1523 [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
1524 [PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL,
1525 };
1526
1527 static int arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
1528 [PERF_SAMPLE_BRANCH_ANY_SHIFT] = ARCH_LBR_ANY,
1529 [PERF_SAMPLE_BRANCH_USER_SHIFT] = ARCH_LBR_USER,
1530 [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = ARCH_LBR_KERNEL,
1531 [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN,
1532 [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = ARCH_LBR_RETURN |
1533 ARCH_LBR_OTHER_BRANCH,
1534 [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = ARCH_LBR_REL_CALL |
1535 ARCH_LBR_IND_CALL |
1536 ARCH_LBR_OTHER_BRANCH,
1537 [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = ARCH_LBR_IND_CALL,
1538 [PERF_SAMPLE_BRANCH_COND_SHIFT] = ARCH_LBR_JCC,
1539 [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = ARCH_LBR_REL_CALL |
1540 ARCH_LBR_IND_CALL |
1541 ARCH_LBR_RETURN |
1542 ARCH_LBR_CALL_STACK,
1543 [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = ARCH_LBR_IND_JMP,
1544 [PERF_SAMPLE_BRANCH_CALL_SHIFT] = ARCH_LBR_REL_CALL,
1545 };
1546
1547
1548 void __init intel_pmu_lbr_init_core(void)
1549 {
1550 x86_pmu.lbr_nr = 4;
1551 x86_pmu.lbr_tos = MSR_LBR_TOS;
1552 x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
1553 x86_pmu.lbr_to = MSR_LBR_CORE_TO;
1554
1555
1556
1557
1558
1559 }
1560
1561
1562 void __init intel_pmu_lbr_init_nhm(void)
1563 {
1564 x86_pmu.lbr_nr = 16;
1565 x86_pmu.lbr_tos = MSR_LBR_TOS;
1566 x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
1567 x86_pmu.lbr_to = MSR_LBR_NHM_TO;
1568
1569 x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1570 x86_pmu.lbr_sel_map = nhm_lbr_sel_map;
1571
1572
1573
1574
1575
1576
1577
1578
1579 }
1580
1581
1582 void __init intel_pmu_lbr_init_snb(void)
1583 {
1584 x86_pmu.lbr_nr = 16;
1585 x86_pmu.lbr_tos = MSR_LBR_TOS;
1586 x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
1587 x86_pmu.lbr_to = MSR_LBR_NHM_TO;
1588
1589 x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1590 x86_pmu.lbr_sel_map = snb_lbr_sel_map;
1591
1592
1593
1594
1595
1596
1597
1598 }
1599
1600 static inline struct kmem_cache *
1601 create_lbr_kmem_cache(size_t size, size_t align)
1602 {
1603 return kmem_cache_create("x86_lbr", size, align, 0, NULL);
1604 }
1605
1606
1607 void intel_pmu_lbr_init_hsw(void)
1608 {
1609 size_t size = sizeof(struct x86_perf_task_context);
1610
1611 x86_pmu.lbr_nr = 16;
1612 x86_pmu.lbr_tos = MSR_LBR_TOS;
1613 x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
1614 x86_pmu.lbr_to = MSR_LBR_NHM_TO;
1615
1616 x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1617 x86_pmu.lbr_sel_map = hsw_lbr_sel_map;
1618
1619 x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0);
1620 }
1621
1622
1623 __init void intel_pmu_lbr_init_skl(void)
1624 {
1625 size_t size = sizeof(struct x86_perf_task_context);
1626
1627 x86_pmu.lbr_nr = 32;
1628 x86_pmu.lbr_tos = MSR_LBR_TOS;
1629 x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
1630 x86_pmu.lbr_to = MSR_LBR_NHM_TO;
1631 x86_pmu.lbr_info = MSR_LBR_INFO_0;
1632
1633 x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1634 x86_pmu.lbr_sel_map = hsw_lbr_sel_map;
1635
1636 x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0);
1637
1638
1639
1640
1641
1642
1643
1644 }
1645
1646
1647 void __init intel_pmu_lbr_init_atom(void)
1648 {
1649
1650
1651
1652
1653
1654 if (boot_cpu_data.x86_model == 28
1655 && boot_cpu_data.x86_stepping < 10) {
1656 pr_cont("LBR disabled due to erratum");
1657 return;
1658 }
1659
1660 x86_pmu.lbr_nr = 8;
1661 x86_pmu.lbr_tos = MSR_LBR_TOS;
1662 x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
1663 x86_pmu.lbr_to = MSR_LBR_CORE_TO;
1664
1665
1666
1667
1668
1669 }
1670
1671
1672 void __init intel_pmu_lbr_init_slm(void)
1673 {
1674 x86_pmu.lbr_nr = 8;
1675 x86_pmu.lbr_tos = MSR_LBR_TOS;
1676 x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
1677 x86_pmu.lbr_to = MSR_LBR_CORE_TO;
1678
1679 x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1680 x86_pmu.lbr_sel_map = nhm_lbr_sel_map;
1681
1682
1683
1684
1685
1686 pr_cont("8-deep LBR, ");
1687 }
1688
1689
1690 void intel_pmu_lbr_init_knl(void)
1691 {
1692 x86_pmu.lbr_nr = 8;
1693 x86_pmu.lbr_tos = MSR_LBR_TOS;
1694 x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
1695 x86_pmu.lbr_to = MSR_LBR_NHM_TO;
1696
1697 x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1698 x86_pmu.lbr_sel_map = snb_lbr_sel_map;
1699
1700
1701 if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_LIP)
1702 x86_pmu.intel_cap.lbr_format = LBR_FORMAT_EIP_FLAGS;
1703 }
1704
1705 void intel_pmu_lbr_init(void)
1706 {
1707 switch (x86_pmu.intel_cap.lbr_format) {
1708 case LBR_FORMAT_EIP_FLAGS2:
1709 x86_pmu.lbr_has_tsx = 1;
1710 x86_pmu.lbr_from_flags = 1;
1711 if (lbr_from_signext_quirk_needed())
1712 static_branch_enable(&lbr_from_quirk_key);
1713 break;
1714
1715 case LBR_FORMAT_EIP_FLAGS:
1716 x86_pmu.lbr_from_flags = 1;
1717 break;
1718
1719 case LBR_FORMAT_INFO:
1720 x86_pmu.lbr_has_tsx = 1;
1721 fallthrough;
1722 case LBR_FORMAT_INFO2:
1723 x86_pmu.lbr_has_info = 1;
1724 break;
1725
1726 case LBR_FORMAT_TIME:
1727 x86_pmu.lbr_from_flags = 1;
1728 x86_pmu.lbr_to_cycles = 1;
1729 break;
1730 }
1731
1732 if (x86_pmu.lbr_has_info) {
1733
1734
1735
1736 static_branch_enable(&x86_lbr_mispred);
1737 static_branch_enable(&x86_lbr_cycles);
1738 }
1739 }
1740
1741
1742
1743
1744
1745
1746 static inline unsigned int get_lbr_state_size(void)
1747 {
1748 return sizeof(struct arch_lbr_state) +
1749 x86_pmu.lbr_nr * sizeof(struct lbr_entry);
1750 }
1751
1752 static bool is_arch_lbr_xsave_available(void)
1753 {
1754 if (!boot_cpu_has(X86_FEATURE_XSAVES))
1755 return false;
1756
1757
1758
1759
1760
1761 if (xfeature_size(XFEATURE_LBR) == 0)
1762 return false;
1763
1764 if (WARN_ON(xfeature_size(XFEATURE_LBR) != get_lbr_state_size()))
1765 return false;
1766
1767 return true;
1768 }
1769
1770 void __init intel_pmu_arch_lbr_init(void)
1771 {
1772 struct pmu *pmu = x86_get_pmu(smp_processor_id());
1773 union cpuid28_eax eax;
1774 union cpuid28_ebx ebx;
1775 union cpuid28_ecx ecx;
1776 unsigned int unused_edx;
1777 bool arch_lbr_xsave;
1778 size_t size;
1779 u64 lbr_nr;
1780
1781
1782 cpuid(28, &eax.full, &ebx.full, &ecx.full, &unused_edx);
1783
1784 lbr_nr = fls(eax.split.lbr_depth_mask) * 8;
1785 if (!lbr_nr)
1786 goto clear_arch_lbr;
1787
1788
1789 if (wrmsrl_safe(MSR_ARCH_LBR_DEPTH, lbr_nr))
1790 goto clear_arch_lbr;
1791
1792 x86_pmu.lbr_depth_mask = eax.split.lbr_depth_mask;
1793 x86_pmu.lbr_deep_c_reset = eax.split.lbr_deep_c_reset;
1794 x86_pmu.lbr_lip = eax.split.lbr_lip;
1795 x86_pmu.lbr_cpl = ebx.split.lbr_cpl;
1796 x86_pmu.lbr_filter = ebx.split.lbr_filter;
1797 x86_pmu.lbr_call_stack = ebx.split.lbr_call_stack;
1798 x86_pmu.lbr_mispred = ecx.split.lbr_mispred;
1799 x86_pmu.lbr_timed_lbr = ecx.split.lbr_timed_lbr;
1800 x86_pmu.lbr_br_type = ecx.split.lbr_br_type;
1801 x86_pmu.lbr_nr = lbr_nr;
1802
1803 if (x86_pmu.lbr_mispred)
1804 static_branch_enable(&x86_lbr_mispred);
1805 if (x86_pmu.lbr_timed_lbr)
1806 static_branch_enable(&x86_lbr_cycles);
1807 if (x86_pmu.lbr_br_type)
1808 static_branch_enable(&x86_lbr_type);
1809
1810 arch_lbr_xsave = is_arch_lbr_xsave_available();
1811 if (arch_lbr_xsave) {
1812 size = sizeof(struct x86_perf_task_context_arch_lbr_xsave) +
1813 get_lbr_state_size();
1814 pmu->task_ctx_cache = create_lbr_kmem_cache(size,
1815 XSAVE_ALIGNMENT);
1816 }
1817
1818 if (!pmu->task_ctx_cache) {
1819 arch_lbr_xsave = false;
1820
1821 size = sizeof(struct x86_perf_task_context_arch_lbr) +
1822 lbr_nr * sizeof(struct lbr_entry);
1823 pmu->task_ctx_cache = create_lbr_kmem_cache(size, 0);
1824 }
1825
1826 x86_pmu.lbr_from = MSR_ARCH_LBR_FROM_0;
1827 x86_pmu.lbr_to = MSR_ARCH_LBR_TO_0;
1828 x86_pmu.lbr_info = MSR_ARCH_LBR_INFO_0;
1829
1830
1831 if (!x86_pmu.lbr_cpl ||
1832 !x86_pmu.lbr_filter ||
1833 !x86_pmu.lbr_call_stack)
1834 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_NOT_SUPP;
1835
1836 if (!x86_pmu.lbr_cpl) {
1837 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_NOT_SUPP;
1838 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_NOT_SUPP;
1839 } else if (!x86_pmu.lbr_filter) {
1840 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_NOT_SUPP;
1841 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_NOT_SUPP;
1842 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_NOT_SUPP;
1843 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_NOT_SUPP;
1844 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_NOT_SUPP;
1845 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_NOT_SUPP;
1846 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_NOT_SUPP;
1847 }
1848
1849 x86_pmu.lbr_ctl_mask = ARCH_LBR_CTL_MASK;
1850 x86_pmu.lbr_ctl_map = arch_lbr_ctl_map;
1851
1852 if (!x86_pmu.lbr_cpl && !x86_pmu.lbr_filter)
1853 x86_pmu.lbr_ctl_map = NULL;
1854
1855 x86_pmu.lbr_reset = intel_pmu_arch_lbr_reset;
1856 if (arch_lbr_xsave) {
1857 x86_pmu.lbr_save = intel_pmu_arch_lbr_xsaves;
1858 x86_pmu.lbr_restore = intel_pmu_arch_lbr_xrstors;
1859 x86_pmu.lbr_read = intel_pmu_arch_lbr_read_xsave;
1860 pr_cont("XSAVE ");
1861 } else {
1862 x86_pmu.lbr_save = intel_pmu_arch_lbr_save;
1863 x86_pmu.lbr_restore = intel_pmu_arch_lbr_restore;
1864 x86_pmu.lbr_read = intel_pmu_arch_lbr_read;
1865 }
1866
1867 pr_cont("Architectural LBR, ");
1868
1869 return;
1870
1871 clear_arch_lbr:
1872 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_ARCH_LBR);
1873 }
1874
1875
1876
1877
1878
1879
1880
1881
1882 int x86_perf_get_lbr(struct x86_pmu_lbr *lbr)
1883 {
1884 int lbr_fmt = x86_pmu.intel_cap.lbr_format;
1885
1886 lbr->nr = x86_pmu.lbr_nr;
1887 lbr->from = x86_pmu.lbr_from;
1888 lbr->to = x86_pmu.lbr_to;
1889 lbr->info = (lbr_fmt == LBR_FORMAT_INFO) ? x86_pmu.lbr_info : 0;
1890
1891 return 0;
1892 }
1893 EXPORT_SYMBOL_GPL(x86_perf_get_lbr);
1894
1895 struct event_constraint vlbr_constraint =
1896 __EVENT_CONSTRAINT(INTEL_FIXED_VLBR_EVENT, (1ULL << INTEL_PMC_IDX_FIXED_VLBR),
1897 FIXED_EVENT_FLAGS, 1, 0, PERF_X86_EVENT_LBR_SELECT);