Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 #include <linux/perf_event.h>
0003 #include <linux/types.h>
0004 
0005 #include <asm/perf_event.h>
0006 #include <asm/msr.h>
0007 #include <asm/insn.h>
0008 
0009 #include "../perf_event.h"
0010 
0011 /*
0012  * Intel LBR_SELECT bits
0013  * Intel Vol3a, April 2011, Section 16.7 Table 16-10
0014  *
0015  * Hardware branch filter (not available on all CPUs)
0016  */
0017 #define LBR_KERNEL_BIT      0 /* do not capture at ring0 */
0018 #define LBR_USER_BIT        1 /* do not capture at ring > 0 */
0019 #define LBR_JCC_BIT     2 /* do not capture conditional branches */
0020 #define LBR_REL_CALL_BIT    3 /* do not capture relative calls */
0021 #define LBR_IND_CALL_BIT    4 /* do not capture indirect calls */
0022 #define LBR_RETURN_BIT      5 /* do not capture near returns */
0023 #define LBR_IND_JMP_BIT     6 /* do not capture indirect jumps */
0024 #define LBR_REL_JMP_BIT     7 /* do not capture relative jumps */
0025 #define LBR_FAR_BIT     8 /* do not capture far branches */
0026 #define LBR_CALL_STACK_BIT  9 /* enable call stack */
0027 
0028 /*
0029  * Following bit only exists in Linux; we mask it out before writing it to
0030  * the actual MSR. But it helps the constraint perf code to understand
0031  * that this is a separate configuration.
0032  */
0033 #define LBR_NO_INFO_BIT        63 /* don't read LBR_INFO. */
0034 
0035 #define LBR_KERNEL  (1 << LBR_KERNEL_BIT)
0036 #define LBR_USER    (1 << LBR_USER_BIT)
0037 #define LBR_JCC     (1 << LBR_JCC_BIT)
0038 #define LBR_REL_CALL    (1 << LBR_REL_CALL_BIT)
0039 #define LBR_IND_CALL    (1 << LBR_IND_CALL_BIT)
0040 #define LBR_RETURN  (1 << LBR_RETURN_BIT)
0041 #define LBR_REL_JMP (1 << LBR_REL_JMP_BIT)
0042 #define LBR_IND_JMP (1 << LBR_IND_JMP_BIT)
0043 #define LBR_FAR     (1 << LBR_FAR_BIT)
0044 #define LBR_CALL_STACK  (1 << LBR_CALL_STACK_BIT)
0045 #define LBR_NO_INFO (1ULL << LBR_NO_INFO_BIT)
0046 
0047 #define LBR_PLM (LBR_KERNEL | LBR_USER)
0048 
0049 #define LBR_SEL_MASK    0x3ff   /* valid bits in LBR_SELECT */
0050 #define LBR_NOT_SUPP    -1  /* LBR filter not supported */
0051 #define LBR_IGN     0   /* ignored */
0052 
0053 #define LBR_ANY      \
0054     (LBR_JCC    |\
0055      LBR_REL_CALL   |\
0056      LBR_IND_CALL   |\
0057      LBR_RETURN |\
0058      LBR_REL_JMP    |\
0059      LBR_IND_JMP    |\
0060      LBR_FAR)
0061 
0062 #define LBR_FROM_FLAG_MISPRED   BIT_ULL(63)
0063 #define LBR_FROM_FLAG_IN_TX BIT_ULL(62)
0064 #define LBR_FROM_FLAG_ABORT BIT_ULL(61)
0065 
0066 #define LBR_FROM_SIGNEXT_2MSB   (BIT_ULL(60) | BIT_ULL(59))
0067 
0068 /*
0069  * x86control flow change classification
0070  * x86control flow changes include branches, interrupts, traps, faults
0071  */
0072 enum {
0073     X86_BR_NONE     = 0,      /* unknown */
0074 
0075     X86_BR_USER     = 1 << 0, /* branch target is user */
0076     X86_BR_KERNEL       = 1 << 1, /* branch target is kernel */
0077 
0078     X86_BR_CALL     = 1 << 2, /* call */
0079     X86_BR_RET      = 1 << 3, /* return */
0080     X86_BR_SYSCALL      = 1 << 4, /* syscall */
0081     X86_BR_SYSRET       = 1 << 5, /* syscall return */
0082     X86_BR_INT      = 1 << 6, /* sw interrupt */
0083     X86_BR_IRET     = 1 << 7, /* return from interrupt */
0084     X86_BR_JCC      = 1 << 8, /* conditional */
0085     X86_BR_JMP      = 1 << 9, /* jump */
0086     X86_BR_IRQ      = 1 << 10,/* hw interrupt or trap or fault */
0087     X86_BR_IND_CALL     = 1 << 11,/* indirect calls */
0088     X86_BR_ABORT        = 1 << 12,/* transaction abort */
0089     X86_BR_IN_TX        = 1 << 13,/* in transaction */
0090     X86_BR_NO_TX        = 1 << 14,/* not in transaction */
0091     X86_BR_ZERO_CALL    = 1 << 15,/* zero length call */
0092     X86_BR_CALL_STACK   = 1 << 16,/* call stack */
0093     X86_BR_IND_JMP      = 1 << 17,/* indirect jump */
0094 
0095     X86_BR_TYPE_SAVE    = 1 << 18,/* indicate to save branch type */
0096 
0097 };
0098 
0099 #define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
0100 #define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX)
0101 
0102 #define X86_BR_ANY       \
0103     (X86_BR_CALL    |\
0104      X86_BR_RET     |\
0105      X86_BR_SYSCALL |\
0106      X86_BR_SYSRET  |\
0107      X86_BR_INT     |\
0108      X86_BR_IRET    |\
0109      X86_BR_JCC     |\
0110      X86_BR_JMP  |\
0111      X86_BR_IRQ  |\
0112      X86_BR_ABORT    |\
0113      X86_BR_IND_CALL |\
0114      X86_BR_IND_JMP  |\
0115      X86_BR_ZERO_CALL)
0116 
0117 #define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
0118 
0119 #define X86_BR_ANY_CALL      \
0120     (X86_BR_CALL        |\
0121      X86_BR_IND_CALL    |\
0122      X86_BR_ZERO_CALL   |\
0123      X86_BR_SYSCALL     |\
0124      X86_BR_IRQ     |\
0125      X86_BR_INT)
0126 
0127 /*
0128  * Intel LBR_CTL bits
0129  *
0130  * Hardware branch filter for Arch LBR
0131  */
0132 #define ARCH_LBR_KERNEL_BIT     1  /* capture at ring0 */
0133 #define ARCH_LBR_USER_BIT       2  /* capture at ring > 0 */
0134 #define ARCH_LBR_CALL_STACK_BIT     3  /* enable call stack */
0135 #define ARCH_LBR_JCC_BIT        16 /* capture conditional branches */
0136 #define ARCH_LBR_REL_JMP_BIT        17 /* capture relative jumps */
0137 #define ARCH_LBR_IND_JMP_BIT        18 /* capture indirect jumps */
0138 #define ARCH_LBR_REL_CALL_BIT       19 /* capture relative calls */
0139 #define ARCH_LBR_IND_CALL_BIT       20 /* capture indirect calls */
0140 #define ARCH_LBR_RETURN_BIT     21 /* capture near returns */
0141 #define ARCH_LBR_OTHER_BRANCH_BIT   22 /* capture other branches */
0142 
0143 #define ARCH_LBR_KERNEL         (1ULL << ARCH_LBR_KERNEL_BIT)
0144 #define ARCH_LBR_USER           (1ULL << ARCH_LBR_USER_BIT)
0145 #define ARCH_LBR_CALL_STACK     (1ULL << ARCH_LBR_CALL_STACK_BIT)
0146 #define ARCH_LBR_JCC            (1ULL << ARCH_LBR_JCC_BIT)
0147 #define ARCH_LBR_REL_JMP        (1ULL << ARCH_LBR_REL_JMP_BIT)
0148 #define ARCH_LBR_IND_JMP        (1ULL << ARCH_LBR_IND_JMP_BIT)
0149 #define ARCH_LBR_REL_CALL       (1ULL << ARCH_LBR_REL_CALL_BIT)
0150 #define ARCH_LBR_IND_CALL       (1ULL << ARCH_LBR_IND_CALL_BIT)
0151 #define ARCH_LBR_RETURN         (1ULL << ARCH_LBR_RETURN_BIT)
0152 #define ARCH_LBR_OTHER_BRANCH       (1ULL << ARCH_LBR_OTHER_BRANCH_BIT)
0153 
0154 #define ARCH_LBR_ANY             \
0155     (ARCH_LBR_JCC           |\
0156      ARCH_LBR_REL_JMP       |\
0157      ARCH_LBR_IND_JMP       |\
0158      ARCH_LBR_REL_CALL      |\
0159      ARCH_LBR_IND_CALL      |\
0160      ARCH_LBR_RETURN        |\
0161      ARCH_LBR_OTHER_BRANCH)
0162 
0163 #define ARCH_LBR_CTL_MASK           0x7f000e
0164 
0165 static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc);
0166 
0167 static __always_inline bool is_lbr_call_stack_bit_set(u64 config)
0168 {
0169     if (static_cpu_has(X86_FEATURE_ARCH_LBR))
0170         return !!(config & ARCH_LBR_CALL_STACK);
0171 
0172     return !!(config & LBR_CALL_STACK);
0173 }
0174 
0175 /*
0176  * We only support LBR implementations that have FREEZE_LBRS_ON_PMI
0177  * otherwise it becomes near impossible to get a reliable stack.
0178  */
0179 
0180 static void __intel_pmu_lbr_enable(bool pmi)
0181 {
0182     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
0183     u64 debugctl, lbr_select = 0, orig_debugctl;
0184 
0185     /*
0186      * No need to unfreeze manually, as v4 can do that as part
0187      * of the GLOBAL_STATUS ack.
0188      */
0189     if (pmi && x86_pmu.version >= 4)
0190         return;
0191 
0192     /*
0193      * No need to reprogram LBR_SELECT in a PMI, as it
0194      * did not change.
0195      */
0196     if (cpuc->lbr_sel)
0197         lbr_select = cpuc->lbr_sel->config & x86_pmu.lbr_sel_mask;
0198     if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && !pmi && cpuc->lbr_sel)
0199         wrmsrl(MSR_LBR_SELECT, lbr_select);
0200 
0201     rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
0202     orig_debugctl = debugctl;
0203 
0204     if (!static_cpu_has(X86_FEATURE_ARCH_LBR))
0205         debugctl |= DEBUGCTLMSR_LBR;
0206     /*
0207      * LBR callstack does not work well with FREEZE_LBRS_ON_PMI.
0208      * If FREEZE_LBRS_ON_PMI is set, PMI near call/return instructions
0209      * may cause superfluous increase/decrease of LBR_TOS.
0210      */
0211     if (is_lbr_call_stack_bit_set(lbr_select))
0212         debugctl &= ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
0213     else
0214         debugctl |= DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
0215 
0216     if (orig_debugctl != debugctl)
0217         wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
0218 
0219     if (static_cpu_has(X86_FEATURE_ARCH_LBR))
0220         wrmsrl(MSR_ARCH_LBR_CTL, lbr_select | ARCH_LBR_CTL_LBREN);
0221 }
0222 
0223 void intel_pmu_lbr_reset_32(void)
0224 {
0225     int i;
0226 
0227     for (i = 0; i < x86_pmu.lbr_nr; i++)
0228         wrmsrl(x86_pmu.lbr_from + i, 0);
0229 }
0230 
0231 void intel_pmu_lbr_reset_64(void)
0232 {
0233     int i;
0234 
0235     for (i = 0; i < x86_pmu.lbr_nr; i++) {
0236         wrmsrl(x86_pmu.lbr_from + i, 0);
0237         wrmsrl(x86_pmu.lbr_to   + i, 0);
0238         if (x86_pmu.lbr_has_info)
0239             wrmsrl(x86_pmu.lbr_info + i, 0);
0240     }
0241 }
0242 
0243 static void intel_pmu_arch_lbr_reset(void)
0244 {
0245     /* Write to ARCH_LBR_DEPTH MSR, all LBR entries are reset to 0 */
0246     wrmsrl(MSR_ARCH_LBR_DEPTH, x86_pmu.lbr_nr);
0247 }
0248 
0249 void intel_pmu_lbr_reset(void)
0250 {
0251     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
0252 
0253     if (!x86_pmu.lbr_nr)
0254         return;
0255 
0256     x86_pmu.lbr_reset();
0257 
0258     cpuc->last_task_ctx = NULL;
0259     cpuc->last_log_id = 0;
0260     if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && cpuc->lbr_select)
0261         wrmsrl(MSR_LBR_SELECT, 0);
0262 }
0263 
0264 /*
0265  * TOS = most recently recorded branch
0266  */
0267 static inline u64 intel_pmu_lbr_tos(void)
0268 {
0269     u64 tos;
0270 
0271     rdmsrl(x86_pmu.lbr_tos, tos);
0272     return tos;
0273 }
0274 
0275 enum {
0276     LBR_NONE,
0277     LBR_VALID,
0278 };
0279 
0280 /*
0281  * For format LBR_FORMAT_EIP_FLAGS2, bits 61:62 in MSR_LAST_BRANCH_FROM_x
0282  * are the TSX flags when TSX is supported, but when TSX is not supported
0283  * they have no consistent behavior:
0284  *
0285  *   - For wrmsr(), bits 61:62 are considered part of the sign extension.
0286  *   - For HW updates (branch captures) bits 61:62 are always OFF and are not
0287  *     part of the sign extension.
0288  *
0289  * Therefore, if:
0290  *
0291  *   1) LBR format LBR_FORMAT_EIP_FLAGS2
0292  *   2) CPU has no TSX support enabled
0293  *
0294  * ... then any value passed to wrmsr() must be sign extended to 63 bits and any
0295  * value from rdmsr() must be converted to have a 61 bits sign extension,
0296  * ignoring the TSX flags.
0297  */
0298 static inline bool lbr_from_signext_quirk_needed(void)
0299 {
0300     bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) ||
0301                boot_cpu_has(X86_FEATURE_RTM);
0302 
0303     return !tsx_support;
0304 }
0305 
0306 static DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key);
0307 
0308 /* If quirk is enabled, ensure sign extension is 63 bits: */
0309 inline u64 lbr_from_signext_quirk_wr(u64 val)
0310 {
0311     if (static_branch_unlikely(&lbr_from_quirk_key)) {
0312         /*
0313          * Sign extend into bits 61:62 while preserving bit 63.
0314          *
0315          * Quirk is enabled when TSX is disabled. Therefore TSX bits
0316          * in val are always OFF and must be changed to be sign
0317          * extension bits. Since bits 59:60 are guaranteed to be
0318          * part of the sign extension bits, we can just copy them
0319          * to 61:62.
0320          */
0321         val |= (LBR_FROM_SIGNEXT_2MSB & val) << 2;
0322     }
0323     return val;
0324 }
0325 
0326 /*
0327  * If quirk is needed, ensure sign extension is 61 bits:
0328  */
0329 static u64 lbr_from_signext_quirk_rd(u64 val)
0330 {
0331     if (static_branch_unlikely(&lbr_from_quirk_key)) {
0332         /*
0333          * Quirk is on when TSX is not enabled. Therefore TSX
0334          * flags must be read as OFF.
0335          */
0336         val &= ~(LBR_FROM_FLAG_IN_TX | LBR_FROM_FLAG_ABORT);
0337     }
0338     return val;
0339 }
0340 
0341 static __always_inline void wrlbr_from(unsigned int idx, u64 val)
0342 {
0343     val = lbr_from_signext_quirk_wr(val);
0344     wrmsrl(x86_pmu.lbr_from + idx, val);
0345 }
0346 
0347 static __always_inline void wrlbr_to(unsigned int idx, u64 val)
0348 {
0349     wrmsrl(x86_pmu.lbr_to + idx, val);
0350 }
0351 
0352 static __always_inline void wrlbr_info(unsigned int idx, u64 val)
0353 {
0354     wrmsrl(x86_pmu.lbr_info + idx, val);
0355 }
0356 
0357 static __always_inline u64 rdlbr_from(unsigned int idx, struct lbr_entry *lbr)
0358 {
0359     u64 val;
0360 
0361     if (lbr)
0362         return lbr->from;
0363 
0364     rdmsrl(x86_pmu.lbr_from + idx, val);
0365 
0366     return lbr_from_signext_quirk_rd(val);
0367 }
0368 
0369 static __always_inline u64 rdlbr_to(unsigned int idx, struct lbr_entry *lbr)
0370 {
0371     u64 val;
0372 
0373     if (lbr)
0374         return lbr->to;
0375 
0376     rdmsrl(x86_pmu.lbr_to + idx, val);
0377 
0378     return val;
0379 }
0380 
0381 static __always_inline u64 rdlbr_info(unsigned int idx, struct lbr_entry *lbr)
0382 {
0383     u64 val;
0384 
0385     if (lbr)
0386         return lbr->info;
0387 
0388     rdmsrl(x86_pmu.lbr_info + idx, val);
0389 
0390     return val;
0391 }
0392 
0393 static inline void
0394 wrlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info)
0395 {
0396     wrlbr_from(idx, lbr->from);
0397     wrlbr_to(idx, lbr->to);
0398     if (need_info)
0399         wrlbr_info(idx, lbr->info);
0400 }
0401 
0402 static inline bool
0403 rdlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info)
0404 {
0405     u64 from = rdlbr_from(idx, NULL);
0406 
0407     /* Don't read invalid entry */
0408     if (!from)
0409         return false;
0410 
0411     lbr->from = from;
0412     lbr->to = rdlbr_to(idx, NULL);
0413     if (need_info)
0414         lbr->info = rdlbr_info(idx, NULL);
0415 
0416     return true;
0417 }
0418 
0419 void intel_pmu_lbr_restore(void *ctx)
0420 {
0421     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
0422     struct x86_perf_task_context *task_ctx = ctx;
0423     bool need_info = x86_pmu.lbr_has_info;
0424     u64 tos = task_ctx->tos;
0425     unsigned lbr_idx, mask;
0426     int i;
0427 
0428     mask = x86_pmu.lbr_nr - 1;
0429     for (i = 0; i < task_ctx->valid_lbrs; i++) {
0430         lbr_idx = (tos - i) & mask;
0431         wrlbr_all(&task_ctx->lbr[i], lbr_idx, need_info);
0432     }
0433 
0434     for (; i < x86_pmu.lbr_nr; i++) {
0435         lbr_idx = (tos - i) & mask;
0436         wrlbr_from(lbr_idx, 0);
0437         wrlbr_to(lbr_idx, 0);
0438         if (need_info)
0439             wrlbr_info(lbr_idx, 0);
0440     }
0441 
0442     wrmsrl(x86_pmu.lbr_tos, tos);
0443 
0444     if (cpuc->lbr_select)
0445         wrmsrl(MSR_LBR_SELECT, task_ctx->lbr_sel);
0446 }
0447 
0448 static void intel_pmu_arch_lbr_restore(void *ctx)
0449 {
0450     struct x86_perf_task_context_arch_lbr *task_ctx = ctx;
0451     struct lbr_entry *entries = task_ctx->entries;
0452     int i;
0453 
0454     /* Fast reset the LBRs before restore if the call stack is not full. */
0455     if (!entries[x86_pmu.lbr_nr - 1].from)
0456         intel_pmu_arch_lbr_reset();
0457 
0458     for (i = 0; i < x86_pmu.lbr_nr; i++) {
0459         if (!entries[i].from)
0460             break;
0461         wrlbr_all(&entries[i], i, true);
0462     }
0463 }
0464 
0465 /*
0466  * Restore the Architecture LBR state from the xsave area in the perf
0467  * context data for the task via the XRSTORS instruction.
0468  */
0469 static void intel_pmu_arch_lbr_xrstors(void *ctx)
0470 {
0471     struct x86_perf_task_context_arch_lbr_xsave *task_ctx = ctx;
0472 
0473     xrstors(&task_ctx->xsave, XFEATURE_MASK_LBR);
0474 }
0475 
0476 static __always_inline bool lbr_is_reset_in_cstate(void *ctx)
0477 {
0478     if (static_cpu_has(X86_FEATURE_ARCH_LBR))
0479         return x86_pmu.lbr_deep_c_reset && !rdlbr_from(0, NULL);
0480 
0481     return !rdlbr_from(((struct x86_perf_task_context *)ctx)->tos, NULL);
0482 }
0483 
0484 static void __intel_pmu_lbr_restore(void *ctx)
0485 {
0486     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
0487 
0488     if (task_context_opt(ctx)->lbr_callstack_users == 0 ||
0489         task_context_opt(ctx)->lbr_stack_state == LBR_NONE) {
0490         intel_pmu_lbr_reset();
0491         return;
0492     }
0493 
0494     /*
0495      * Does not restore the LBR registers, if
0496      * - No one else touched them, and
0497      * - Was not cleared in Cstate
0498      */
0499     if ((ctx == cpuc->last_task_ctx) &&
0500         (task_context_opt(ctx)->log_id == cpuc->last_log_id) &&
0501         !lbr_is_reset_in_cstate(ctx)) {
0502         task_context_opt(ctx)->lbr_stack_state = LBR_NONE;
0503         return;
0504     }
0505 
0506     x86_pmu.lbr_restore(ctx);
0507 
0508     task_context_opt(ctx)->lbr_stack_state = LBR_NONE;
0509 }
0510 
0511 void intel_pmu_lbr_save(void *ctx)
0512 {
0513     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
0514     struct x86_perf_task_context *task_ctx = ctx;
0515     bool need_info = x86_pmu.lbr_has_info;
0516     unsigned lbr_idx, mask;
0517     u64 tos;
0518     int i;
0519 
0520     mask = x86_pmu.lbr_nr - 1;
0521     tos = intel_pmu_lbr_tos();
0522     for (i = 0; i < x86_pmu.lbr_nr; i++) {
0523         lbr_idx = (tos - i) & mask;
0524         if (!rdlbr_all(&task_ctx->lbr[i], lbr_idx, need_info))
0525             break;
0526     }
0527     task_ctx->valid_lbrs = i;
0528     task_ctx->tos = tos;
0529 
0530     if (cpuc->lbr_select)
0531         rdmsrl(MSR_LBR_SELECT, task_ctx->lbr_sel);
0532 }
0533 
0534 static void intel_pmu_arch_lbr_save(void *ctx)
0535 {
0536     struct x86_perf_task_context_arch_lbr *task_ctx = ctx;
0537     struct lbr_entry *entries = task_ctx->entries;
0538     int i;
0539 
0540     for (i = 0; i < x86_pmu.lbr_nr; i++) {
0541         if (!rdlbr_all(&entries[i], i, true))
0542             break;
0543     }
0544 
0545     /* LBR call stack is not full. Reset is required in restore. */
0546     if (i < x86_pmu.lbr_nr)
0547         entries[x86_pmu.lbr_nr - 1].from = 0;
0548 }
0549 
0550 /*
0551  * Save the Architecture LBR state to the xsave area in the perf
0552  * context data for the task via the XSAVES instruction.
0553  */
0554 static void intel_pmu_arch_lbr_xsaves(void *ctx)
0555 {
0556     struct x86_perf_task_context_arch_lbr_xsave *task_ctx = ctx;
0557 
0558     xsaves(&task_ctx->xsave, XFEATURE_MASK_LBR);
0559 }
0560 
0561 static void __intel_pmu_lbr_save(void *ctx)
0562 {
0563     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
0564 
0565     if (task_context_opt(ctx)->lbr_callstack_users == 0) {
0566         task_context_opt(ctx)->lbr_stack_state = LBR_NONE;
0567         return;
0568     }
0569 
0570     x86_pmu.lbr_save(ctx);
0571 
0572     task_context_opt(ctx)->lbr_stack_state = LBR_VALID;
0573 
0574     cpuc->last_task_ctx = ctx;
0575     cpuc->last_log_id = ++task_context_opt(ctx)->log_id;
0576 }
0577 
0578 void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev,
0579                  struct perf_event_context *next)
0580 {
0581     void *prev_ctx_data, *next_ctx_data;
0582 
0583     swap(prev->task_ctx_data, next->task_ctx_data);
0584 
0585     /*
0586      * Architecture specific synchronization makes sense in
0587      * case both prev->task_ctx_data and next->task_ctx_data
0588      * pointers are allocated.
0589      */
0590 
0591     prev_ctx_data = next->task_ctx_data;
0592     next_ctx_data = prev->task_ctx_data;
0593 
0594     if (!prev_ctx_data || !next_ctx_data)
0595         return;
0596 
0597     swap(task_context_opt(prev_ctx_data)->lbr_callstack_users,
0598          task_context_opt(next_ctx_data)->lbr_callstack_users);
0599 }
0600 
0601 void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
0602 {
0603     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
0604     void *task_ctx;
0605 
0606     if (!cpuc->lbr_users)
0607         return;
0608 
0609     /*
0610      * If LBR callstack feature is enabled and the stack was saved when
0611      * the task was scheduled out, restore the stack. Otherwise flush
0612      * the LBR stack.
0613      */
0614     task_ctx = ctx ? ctx->task_ctx_data : NULL;
0615     if (task_ctx) {
0616         if (sched_in)
0617             __intel_pmu_lbr_restore(task_ctx);
0618         else
0619             __intel_pmu_lbr_save(task_ctx);
0620         return;
0621     }
0622 
0623     /*
0624      * Since a context switch can flip the address space and LBR entries
0625      * are not tagged with an identifier, we need to wipe the LBR, even for
0626      * per-cpu events. You simply cannot resolve the branches from the old
0627      * address space.
0628      */
0629     if (sched_in)
0630         intel_pmu_lbr_reset();
0631 }
0632 
0633 static inline bool branch_user_callstack(unsigned br_sel)
0634 {
0635     return (br_sel & X86_BR_USER) && (br_sel & X86_BR_CALL_STACK);
0636 }
0637 
0638 void intel_pmu_lbr_add(struct perf_event *event)
0639 {
0640     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
0641 
0642     if (!x86_pmu.lbr_nr)
0643         return;
0644 
0645     if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT)
0646         cpuc->lbr_select = 1;
0647 
0648     cpuc->br_sel = event->hw.branch_reg.reg;
0649 
0650     if (branch_user_callstack(cpuc->br_sel) && event->ctx->task_ctx_data)
0651         task_context_opt(event->ctx->task_ctx_data)->lbr_callstack_users++;
0652 
0653     /*
0654      * Request pmu::sched_task() callback, which will fire inside the
0655      * regular perf event scheduling, so that call will:
0656      *
0657      *  - restore or wipe; when LBR-callstack,
0658      *  - wipe; otherwise,
0659      *
0660      * when this is from __perf_event_task_sched_in().
0661      *
0662      * However, if this is from perf_install_in_context(), no such callback
0663      * will follow and we'll need to reset the LBR here if this is the
0664      * first LBR event.
0665      *
0666      * The problem is, we cannot tell these cases apart... but we can
0667      * exclude the biggest chunk of cases by looking at
0668      * event->total_time_running. An event that has accrued runtime cannot
0669      * be 'new'. Conversely, a new event can get installed through the
0670      * context switch path for the first time.
0671      */
0672     if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0)
0673         cpuc->lbr_pebs_users++;
0674     perf_sched_cb_inc(event->ctx->pmu);
0675     if (!cpuc->lbr_users++ && !event->total_time_running)
0676         intel_pmu_lbr_reset();
0677 }
0678 
0679 void release_lbr_buffers(void)
0680 {
0681     struct kmem_cache *kmem_cache;
0682     struct cpu_hw_events *cpuc;
0683     int cpu;
0684 
0685     if (!static_cpu_has(X86_FEATURE_ARCH_LBR))
0686         return;
0687 
0688     for_each_possible_cpu(cpu) {
0689         cpuc = per_cpu_ptr(&cpu_hw_events, cpu);
0690         kmem_cache = x86_get_pmu(cpu)->task_ctx_cache;
0691         if (kmem_cache && cpuc->lbr_xsave) {
0692             kmem_cache_free(kmem_cache, cpuc->lbr_xsave);
0693             cpuc->lbr_xsave = NULL;
0694         }
0695     }
0696 }
0697 
0698 void reserve_lbr_buffers(void)
0699 {
0700     struct kmem_cache *kmem_cache;
0701     struct cpu_hw_events *cpuc;
0702     int cpu;
0703 
0704     if (!static_cpu_has(X86_FEATURE_ARCH_LBR))
0705         return;
0706 
0707     for_each_possible_cpu(cpu) {
0708         cpuc = per_cpu_ptr(&cpu_hw_events, cpu);
0709         kmem_cache = x86_get_pmu(cpu)->task_ctx_cache;
0710         if (!kmem_cache || cpuc->lbr_xsave)
0711             continue;
0712 
0713         cpuc->lbr_xsave = kmem_cache_alloc_node(kmem_cache,
0714                             GFP_KERNEL | __GFP_ZERO,
0715                             cpu_to_node(cpu));
0716     }
0717 }
0718 
0719 void intel_pmu_lbr_del(struct perf_event *event)
0720 {
0721     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
0722 
0723     if (!x86_pmu.lbr_nr)
0724         return;
0725 
0726     if (branch_user_callstack(cpuc->br_sel) &&
0727         event->ctx->task_ctx_data)
0728         task_context_opt(event->ctx->task_ctx_data)->lbr_callstack_users--;
0729 
0730     if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT)
0731         cpuc->lbr_select = 0;
0732 
0733     if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0)
0734         cpuc->lbr_pebs_users--;
0735     cpuc->lbr_users--;
0736     WARN_ON_ONCE(cpuc->lbr_users < 0);
0737     WARN_ON_ONCE(cpuc->lbr_pebs_users < 0);
0738     perf_sched_cb_dec(event->ctx->pmu);
0739 }
0740 
0741 static inline bool vlbr_exclude_host(void)
0742 {
0743     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
0744 
0745     return test_bit(INTEL_PMC_IDX_FIXED_VLBR,
0746         (unsigned long *)&cpuc->intel_ctrl_guest_mask);
0747 }
0748 
0749 void intel_pmu_lbr_enable_all(bool pmi)
0750 {
0751     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
0752 
0753     if (cpuc->lbr_users && !vlbr_exclude_host())
0754         __intel_pmu_lbr_enable(pmi);
0755 }
0756 
0757 void intel_pmu_lbr_disable_all(void)
0758 {
0759     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
0760 
0761     if (cpuc->lbr_users && !vlbr_exclude_host()) {
0762         if (static_cpu_has(X86_FEATURE_ARCH_LBR))
0763             return __intel_pmu_arch_lbr_disable();
0764 
0765         __intel_pmu_lbr_disable();
0766     }
0767 }
0768 
0769 void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
0770 {
0771     unsigned long mask = x86_pmu.lbr_nr - 1;
0772     struct perf_branch_entry *br = cpuc->lbr_entries;
0773     u64 tos = intel_pmu_lbr_tos();
0774     int i;
0775 
0776     for (i = 0; i < x86_pmu.lbr_nr; i++) {
0777         unsigned long lbr_idx = (tos - i) & mask;
0778         union {
0779             struct {
0780                 u32 from;
0781                 u32 to;
0782             };
0783             u64     lbr;
0784         } msr_lastbranch;
0785 
0786         rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
0787 
0788         perf_clear_branch_entry_bitfields(br);
0789 
0790         br->from    = msr_lastbranch.from;
0791         br->to      = msr_lastbranch.to;
0792         br++;
0793     }
0794     cpuc->lbr_stack.nr = i;
0795     cpuc->lbr_stack.hw_idx = tos;
0796 }
0797 
0798 /*
0799  * Due to lack of segmentation in Linux the effective address (offset)
0800  * is the same as the linear address, allowing us to merge the LIP and EIP
0801  * LBR formats.
0802  */
0803 void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
0804 {
0805     bool need_info = false, call_stack = false;
0806     unsigned long mask = x86_pmu.lbr_nr - 1;
0807     struct perf_branch_entry *br = cpuc->lbr_entries;
0808     u64 tos = intel_pmu_lbr_tos();
0809     int i;
0810     int out = 0;
0811     int num = x86_pmu.lbr_nr;
0812 
0813     if (cpuc->lbr_sel) {
0814         need_info = !(cpuc->lbr_sel->config & LBR_NO_INFO);
0815         if (cpuc->lbr_sel->config & LBR_CALL_STACK)
0816             call_stack = true;
0817     }
0818 
0819     for (i = 0; i < num; i++) {
0820         unsigned long lbr_idx = (tos - i) & mask;
0821         u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
0822         u16 cycles = 0;
0823 
0824         from = rdlbr_from(lbr_idx, NULL);
0825         to   = rdlbr_to(lbr_idx, NULL);
0826 
0827         /*
0828          * Read LBR call stack entries
0829          * until invalid entry (0s) is detected.
0830          */
0831         if (call_stack && !from)
0832             break;
0833 
0834         if (x86_pmu.lbr_has_info) {
0835             if (need_info) {
0836                 u64 info;
0837 
0838                 info = rdlbr_info(lbr_idx, NULL);
0839                 mis = !!(info & LBR_INFO_MISPRED);
0840                 pred = !mis;
0841                 cycles = (info & LBR_INFO_CYCLES);
0842                 if (x86_pmu.lbr_has_tsx) {
0843                     in_tx = !!(info & LBR_INFO_IN_TX);
0844                     abort = !!(info & LBR_INFO_ABORT);
0845                 }
0846             }
0847         } else {
0848             int skip = 0;
0849 
0850             if (x86_pmu.lbr_from_flags) {
0851                 mis = !!(from & LBR_FROM_FLAG_MISPRED);
0852                 pred = !mis;
0853                 skip = 1;
0854             }
0855             if (x86_pmu.lbr_has_tsx) {
0856                 in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
0857                 abort = !!(from & LBR_FROM_FLAG_ABORT);
0858                 skip = 3;
0859             }
0860             from = (u64)((((s64)from) << skip) >> skip);
0861 
0862             if (x86_pmu.lbr_to_cycles) {
0863                 cycles = ((to >> 48) & LBR_INFO_CYCLES);
0864                 to = (u64)((((s64)to) << 16) >> 16);
0865             }
0866         }
0867 
0868         /*
0869          * Some CPUs report duplicated abort records,
0870          * with the second entry not having an abort bit set.
0871          * Skip them here. This loop runs backwards,
0872          * so we need to undo the previous record.
0873          * If the abort just happened outside the window
0874          * the extra entry cannot be removed.
0875          */
0876         if (abort && x86_pmu.lbr_double_abort && out > 0)
0877             out--;
0878 
0879         perf_clear_branch_entry_bitfields(br+out);
0880         br[out].from     = from;
0881         br[out].to   = to;
0882         br[out].mispred  = mis;
0883         br[out].predicted = pred;
0884         br[out].in_tx    = in_tx;
0885         br[out].abort    = abort;
0886         br[out].cycles   = cycles;
0887         out++;
0888     }
0889     cpuc->lbr_stack.nr = out;
0890     cpuc->lbr_stack.hw_idx = tos;
0891 }
0892 
0893 static DEFINE_STATIC_KEY_FALSE(x86_lbr_mispred);
0894 static DEFINE_STATIC_KEY_FALSE(x86_lbr_cycles);
0895 static DEFINE_STATIC_KEY_FALSE(x86_lbr_type);
0896 
0897 static __always_inline int get_lbr_br_type(u64 info)
0898 {
0899     int type = 0;
0900 
0901     if (static_branch_likely(&x86_lbr_type))
0902         type = (info & LBR_INFO_BR_TYPE) >> LBR_INFO_BR_TYPE_OFFSET;
0903 
0904     return type;
0905 }
0906 
0907 static __always_inline bool get_lbr_mispred(u64 info)
0908 {
0909     bool mispred = 0;
0910 
0911     if (static_branch_likely(&x86_lbr_mispred))
0912         mispred = !!(info & LBR_INFO_MISPRED);
0913 
0914     return mispred;
0915 }
0916 
0917 static __always_inline u16 get_lbr_cycles(u64 info)
0918 {
0919     u16 cycles = info & LBR_INFO_CYCLES;
0920 
0921     if (static_cpu_has(X86_FEATURE_ARCH_LBR) &&
0922         (!static_branch_likely(&x86_lbr_cycles) ||
0923          !(info & LBR_INFO_CYC_CNT_VALID)))
0924         cycles = 0;
0925 
0926     return cycles;
0927 }
0928 
0929 static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
0930                 struct lbr_entry *entries)
0931 {
0932     struct perf_branch_entry *e;
0933     struct lbr_entry *lbr;
0934     u64 from, to, info;
0935     int i;
0936 
0937     for (i = 0; i < x86_pmu.lbr_nr; i++) {
0938         lbr = entries ? &entries[i] : NULL;
0939         e = &cpuc->lbr_entries[i];
0940 
0941         from = rdlbr_from(i, lbr);
0942         /*
0943          * Read LBR entries until invalid entry (0s) is detected.
0944          */
0945         if (!from)
0946             break;
0947 
0948         to = rdlbr_to(i, lbr);
0949         info = rdlbr_info(i, lbr);
0950 
0951         perf_clear_branch_entry_bitfields(e);
0952 
0953         e->from     = from;
0954         e->to       = to;
0955         e->mispred  = get_lbr_mispred(info);
0956         e->predicted    = !e->mispred;
0957         e->in_tx    = !!(info & LBR_INFO_IN_TX);
0958         e->abort    = !!(info & LBR_INFO_ABORT);
0959         e->cycles   = get_lbr_cycles(info);
0960         e->type     = get_lbr_br_type(info);
0961     }
0962 
0963     cpuc->lbr_stack.nr = i;
0964 }
0965 
0966 static void intel_pmu_arch_lbr_read(struct cpu_hw_events *cpuc)
0967 {
0968     intel_pmu_store_lbr(cpuc, NULL);
0969 }
0970 
0971 static void intel_pmu_arch_lbr_read_xsave(struct cpu_hw_events *cpuc)
0972 {
0973     struct x86_perf_task_context_arch_lbr_xsave *xsave = cpuc->lbr_xsave;
0974 
0975     if (!xsave) {
0976         intel_pmu_store_lbr(cpuc, NULL);
0977         return;
0978     }
0979     xsaves(&xsave->xsave, XFEATURE_MASK_LBR);
0980 
0981     intel_pmu_store_lbr(cpuc, xsave->lbr.entries);
0982 }
0983 
0984 void intel_pmu_lbr_read(void)
0985 {
0986     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
0987 
0988     /*
0989      * Don't read when all LBRs users are using adaptive PEBS.
0990      *
0991      * This could be smarter and actually check the event,
0992      * but this simple approach seems to work for now.
0993      */
0994     if (!cpuc->lbr_users || vlbr_exclude_host() ||
0995         cpuc->lbr_users == cpuc->lbr_pebs_users)
0996         return;
0997 
0998     x86_pmu.lbr_read(cpuc);
0999 
1000     intel_pmu_lbr_filter(cpuc);
1001 }
1002 
1003 /*
1004  * SW filter is used:
1005  * - in case there is no HW filter
1006  * - in case the HW filter has errata or limitations
1007  */
1008 static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
1009 {
1010     u64 br_type = event->attr.branch_sample_type;
1011     int mask = 0;
1012 
1013     if (br_type & PERF_SAMPLE_BRANCH_USER)
1014         mask |= X86_BR_USER;
1015 
1016     if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
1017         mask |= X86_BR_KERNEL;
1018 
1019     /* we ignore BRANCH_HV here */
1020 
1021     if (br_type & PERF_SAMPLE_BRANCH_ANY)
1022         mask |= X86_BR_ANY;
1023 
1024     if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL)
1025         mask |= X86_BR_ANY_CALL;
1026 
1027     if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
1028         mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET;
1029 
1030     if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
1031         mask |= X86_BR_IND_CALL;
1032 
1033     if (br_type & PERF_SAMPLE_BRANCH_ABORT_TX)
1034         mask |= X86_BR_ABORT;
1035 
1036     if (br_type & PERF_SAMPLE_BRANCH_IN_TX)
1037         mask |= X86_BR_IN_TX;
1038 
1039     if (br_type & PERF_SAMPLE_BRANCH_NO_TX)
1040         mask |= X86_BR_NO_TX;
1041 
1042     if (br_type & PERF_SAMPLE_BRANCH_COND)
1043         mask |= X86_BR_JCC;
1044 
1045     if (br_type & PERF_SAMPLE_BRANCH_CALL_STACK) {
1046         if (!x86_pmu_has_lbr_callstack())
1047             return -EOPNOTSUPP;
1048         if (mask & ~(X86_BR_USER | X86_BR_KERNEL))
1049             return -EINVAL;
1050         mask |= X86_BR_CALL | X86_BR_IND_CALL | X86_BR_RET |
1051             X86_BR_CALL_STACK;
1052     }
1053 
1054     if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP)
1055         mask |= X86_BR_IND_JMP;
1056 
1057     if (br_type & PERF_SAMPLE_BRANCH_CALL)
1058         mask |= X86_BR_CALL | X86_BR_ZERO_CALL;
1059 
1060     if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE)
1061         mask |= X86_BR_TYPE_SAVE;
1062 
1063     /*
1064      * stash actual user request into reg, it may
1065      * be used by fixup code for some CPU
1066      */
1067     event->hw.branch_reg.reg = mask;
1068     return 0;
1069 }
1070 
1071 /*
1072  * setup the HW LBR filter
1073  * Used only when available, may not be enough to disambiguate
1074  * all branches, may need the help of the SW filter
1075  */
1076 static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
1077 {
1078     struct hw_perf_event_extra *reg;
1079     u64 br_type = event->attr.branch_sample_type;
1080     u64 mask = 0, v;
1081     int i;
1082 
1083     for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) {
1084         if (!(br_type & (1ULL << i)))
1085             continue;
1086 
1087         v = x86_pmu.lbr_sel_map[i];
1088         if (v == LBR_NOT_SUPP)
1089             return -EOPNOTSUPP;
1090 
1091         if (v != LBR_IGN)
1092             mask |= v;
1093     }
1094 
1095     reg = &event->hw.branch_reg;
1096     reg->idx = EXTRA_REG_LBR;
1097 
1098     if (static_cpu_has(X86_FEATURE_ARCH_LBR)) {
1099         reg->config = mask;
1100 
1101         /*
1102          * The Arch LBR HW can retrieve the common branch types
1103          * from the LBR_INFO. It doesn't require the high overhead
1104          * SW disassemble.
1105          * Enable the branch type by default for the Arch LBR.
1106          */
1107         reg->reg |= X86_BR_TYPE_SAVE;
1108         return 0;
1109     }
1110 
1111     /*
1112      * The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate
1113      * in suppress mode. So LBR_SELECT should be set to
1114      * (~mask & LBR_SEL_MASK) | (mask & ~LBR_SEL_MASK)
1115      * But the 10th bit LBR_CALL_STACK does not operate
1116      * in suppress mode.
1117      */
1118     reg->config = mask ^ (x86_pmu.lbr_sel_mask & ~LBR_CALL_STACK);
1119 
1120     if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) &&
1121         (br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) &&
1122         x86_pmu.lbr_has_info)
1123         reg->config |= LBR_NO_INFO;
1124 
1125     return 0;
1126 }
1127 
1128 int intel_pmu_setup_lbr_filter(struct perf_event *event)
1129 {
1130     int ret = 0;
1131 
1132     /*
1133      * no LBR on this PMU
1134      */
1135     if (!x86_pmu.lbr_nr)
1136         return -EOPNOTSUPP;
1137 
1138     /*
1139      * setup SW LBR filter
1140      */
1141     ret = intel_pmu_setup_sw_lbr_filter(event);
1142     if (ret)
1143         return ret;
1144 
1145     /*
1146      * setup HW LBR filter, if any
1147      */
1148     if (x86_pmu.lbr_sel_map)
1149         ret = intel_pmu_setup_hw_lbr_filter(event);
1150 
1151     return ret;
1152 }
1153 
1154 /*
1155  * return the type of control flow change at address "from"
1156  * instruction is not necessarily a branch (in case of interrupt).
1157  *
1158  * The branch type returned also includes the priv level of the
1159  * target of the control flow change (X86_BR_USER, X86_BR_KERNEL).
1160  *
1161  * If a branch type is unknown OR the instruction cannot be
1162  * decoded (e.g., text page not present), then X86_BR_NONE is
1163  * returned.
1164  */
1165 static int branch_type(unsigned long from, unsigned long to, int abort)
1166 {
1167     struct insn insn;
1168     void *addr;
1169     int bytes_read, bytes_left;
1170     int ret = X86_BR_NONE;
1171     int ext, to_plm, from_plm;
1172     u8 buf[MAX_INSN_SIZE];
1173     int is64 = 0;
1174 
1175     to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
1176     from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER;
1177 
1178     /*
1179      * maybe zero if lbr did not fill up after a reset by the time
1180      * we get a PMU interrupt
1181      */
1182     if (from == 0 || to == 0)
1183         return X86_BR_NONE;
1184 
1185     if (abort)
1186         return X86_BR_ABORT | to_plm;
1187 
1188     if (from_plm == X86_BR_USER) {
1189         /*
1190          * can happen if measuring at the user level only
1191          * and we interrupt in a kernel thread, e.g., idle.
1192          */
1193         if (!current->mm)
1194             return X86_BR_NONE;
1195 
1196         /* may fail if text not present */
1197         bytes_left = copy_from_user_nmi(buf, (void __user *)from,
1198                         MAX_INSN_SIZE);
1199         bytes_read = MAX_INSN_SIZE - bytes_left;
1200         if (!bytes_read)
1201             return X86_BR_NONE;
1202 
1203         addr = buf;
1204     } else {
1205         /*
1206          * The LBR logs any address in the IP, even if the IP just
1207          * faulted. This means userspace can control the from address.
1208          * Ensure we don't blindly read any address by validating it is
1209          * a known text address.
1210          */
1211         if (kernel_text_address(from)) {
1212             addr = (void *)from;
1213             /*
1214              * Assume we can get the maximum possible size
1215              * when grabbing kernel data.  This is not
1216              * _strictly_ true since we could possibly be
1217              * executing up next to a memory hole, but
1218              * it is very unlikely to be a problem.
1219              */
1220             bytes_read = MAX_INSN_SIZE;
1221         } else {
1222             return X86_BR_NONE;
1223         }
1224     }
1225 
1226     /*
1227      * decoder needs to know the ABI especially
1228      * on 64-bit systems running 32-bit apps
1229      */
1230 #ifdef CONFIG_X86_64
1231     is64 = kernel_ip((unsigned long)addr) || any_64bit_mode(current_pt_regs());
1232 #endif
1233     insn_init(&insn, addr, bytes_read, is64);
1234     if (insn_get_opcode(&insn))
1235         return X86_BR_ABORT;
1236 
1237     switch (insn.opcode.bytes[0]) {
1238     case 0xf:
1239         switch (insn.opcode.bytes[1]) {
1240         case 0x05: /* syscall */
1241         case 0x34: /* sysenter */
1242             ret = X86_BR_SYSCALL;
1243             break;
1244         case 0x07: /* sysret */
1245         case 0x35: /* sysexit */
1246             ret = X86_BR_SYSRET;
1247             break;
1248         case 0x80 ... 0x8f: /* conditional */
1249             ret = X86_BR_JCC;
1250             break;
1251         default:
1252             ret = X86_BR_NONE;
1253         }
1254         break;
1255     case 0x70 ... 0x7f: /* conditional */
1256         ret = X86_BR_JCC;
1257         break;
1258     case 0xc2: /* near ret */
1259     case 0xc3: /* near ret */
1260     case 0xca: /* far ret */
1261     case 0xcb: /* far ret */
1262         ret = X86_BR_RET;
1263         break;
1264     case 0xcf: /* iret */
1265         ret = X86_BR_IRET;
1266         break;
1267     case 0xcc ... 0xce: /* int */
1268         ret = X86_BR_INT;
1269         break;
1270     case 0xe8: /* call near rel */
1271         if (insn_get_immediate(&insn) || insn.immediate1.value == 0) {
1272             /* zero length call */
1273             ret = X86_BR_ZERO_CALL;
1274             break;
1275         }
1276         fallthrough;
1277     case 0x9a: /* call far absolute */
1278         ret = X86_BR_CALL;
1279         break;
1280     case 0xe0 ... 0xe3: /* loop jmp */
1281         ret = X86_BR_JCC;
1282         break;
1283     case 0xe9 ... 0xeb: /* jmp */
1284         ret = X86_BR_JMP;
1285         break;
1286     case 0xff: /* call near absolute, call far absolute ind */
1287         if (insn_get_modrm(&insn))
1288             return X86_BR_ABORT;
1289 
1290         ext = (insn.modrm.bytes[0] >> 3) & 0x7;
1291         switch (ext) {
1292         case 2: /* near ind call */
1293         case 3: /* far ind call */
1294             ret = X86_BR_IND_CALL;
1295             break;
1296         case 4:
1297         case 5:
1298             ret = X86_BR_IND_JMP;
1299             break;
1300         }
1301         break;
1302     default:
1303         ret = X86_BR_NONE;
1304     }
1305     /*
1306      * interrupts, traps, faults (and thus ring transition) may
1307      * occur on any instructions. Thus, to classify them correctly,
1308      * we need to first look at the from and to priv levels. If they
1309      * are different and to is in the kernel, then it indicates
1310      * a ring transition. If the from instruction is not a ring
1311      * transition instr (syscall, systenter, int), then it means
1312      * it was a irq, trap or fault.
1313      *
1314      * we have no way of detecting kernel to kernel faults.
1315      */
1316     if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL
1317         && ret != X86_BR_SYSCALL && ret != X86_BR_INT)
1318         ret = X86_BR_IRQ;
1319 
1320     /*
1321      * branch priv level determined by target as
1322      * is done by HW when LBR_SELECT is implemented
1323      */
1324     if (ret != X86_BR_NONE)
1325         ret |= to_plm;
1326 
1327     return ret;
1328 }
1329 
1330 #define X86_BR_TYPE_MAP_MAX 16
1331 
1332 static int branch_map[X86_BR_TYPE_MAP_MAX] = {
1333     PERF_BR_CALL,       /* X86_BR_CALL */
1334     PERF_BR_RET,        /* X86_BR_RET */
1335     PERF_BR_SYSCALL,    /* X86_BR_SYSCALL */
1336     PERF_BR_SYSRET,     /* X86_BR_SYSRET */
1337     PERF_BR_UNKNOWN,    /* X86_BR_INT */
1338     PERF_BR_ERET,       /* X86_BR_IRET */
1339     PERF_BR_COND,       /* X86_BR_JCC */
1340     PERF_BR_UNCOND,     /* X86_BR_JMP */
1341     PERF_BR_IRQ,        /* X86_BR_IRQ */
1342     PERF_BR_IND_CALL,   /* X86_BR_IND_CALL */
1343     PERF_BR_UNKNOWN,    /* X86_BR_ABORT */
1344     PERF_BR_UNKNOWN,    /* X86_BR_IN_TX */
1345     PERF_BR_UNKNOWN,    /* X86_BR_NO_TX */
1346     PERF_BR_CALL,       /* X86_BR_ZERO_CALL */
1347     PERF_BR_UNKNOWN,    /* X86_BR_CALL_STACK */
1348     PERF_BR_IND,        /* X86_BR_IND_JMP */
1349 };
1350 
1351 static int
1352 common_branch_type(int type)
1353 {
1354     int i;
1355 
1356     type >>= 2; /* skip X86_BR_USER and X86_BR_KERNEL */
1357 
1358     if (type) {
1359         i = __ffs(type);
1360         if (i < X86_BR_TYPE_MAP_MAX)
1361             return branch_map[i];
1362     }
1363 
1364     return PERF_BR_UNKNOWN;
1365 }
1366 
1367 enum {
1368     ARCH_LBR_BR_TYPE_JCC            = 0,
1369     ARCH_LBR_BR_TYPE_NEAR_IND_JMP       = 1,
1370     ARCH_LBR_BR_TYPE_NEAR_REL_JMP       = 2,
1371     ARCH_LBR_BR_TYPE_NEAR_IND_CALL      = 3,
1372     ARCH_LBR_BR_TYPE_NEAR_REL_CALL      = 4,
1373     ARCH_LBR_BR_TYPE_NEAR_RET       = 5,
1374     ARCH_LBR_BR_TYPE_KNOWN_MAX      = ARCH_LBR_BR_TYPE_NEAR_RET,
1375 
1376     ARCH_LBR_BR_TYPE_MAP_MAX        = 16,
1377 };
1378 
1379 static const int arch_lbr_br_type_map[ARCH_LBR_BR_TYPE_MAP_MAX] = {
1380     [ARCH_LBR_BR_TYPE_JCC]          = X86_BR_JCC,
1381     [ARCH_LBR_BR_TYPE_NEAR_IND_JMP]     = X86_BR_IND_JMP,
1382     [ARCH_LBR_BR_TYPE_NEAR_REL_JMP]     = X86_BR_JMP,
1383     [ARCH_LBR_BR_TYPE_NEAR_IND_CALL]    = X86_BR_IND_CALL,
1384     [ARCH_LBR_BR_TYPE_NEAR_REL_CALL]    = X86_BR_CALL,
1385     [ARCH_LBR_BR_TYPE_NEAR_RET]     = X86_BR_RET,
1386 };
1387 
1388 /*
1389  * implement actual branch filter based on user demand.
1390  * Hardware may not exactly satisfy that request, thus
1391  * we need to inspect opcodes. Mismatched branches are
1392  * discarded. Therefore, the number of branches returned
1393  * in PERF_SAMPLE_BRANCH_STACK sample may vary.
1394  */
1395 static void
1396 intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
1397 {
1398     u64 from, to;
1399     int br_sel = cpuc->br_sel;
1400     int i, j, type, to_plm;
1401     bool compress = false;
1402 
1403     /* if sampling all branches, then nothing to filter */
1404     if (((br_sel & X86_BR_ALL) == X86_BR_ALL) &&
1405         ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE))
1406         return;
1407 
1408     for (i = 0; i < cpuc->lbr_stack.nr; i++) {
1409 
1410         from = cpuc->lbr_entries[i].from;
1411         to = cpuc->lbr_entries[i].to;
1412         type = cpuc->lbr_entries[i].type;
1413 
1414         /*
1415          * Parse the branch type recorded in LBR_x_INFO MSR.
1416          * Doesn't support OTHER_BRANCH decoding for now.
1417          * OTHER_BRANCH branch type still rely on software decoding.
1418          */
1419         if (static_cpu_has(X86_FEATURE_ARCH_LBR) &&
1420             type <= ARCH_LBR_BR_TYPE_KNOWN_MAX) {
1421             to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
1422             type = arch_lbr_br_type_map[type] | to_plm;
1423         } else
1424             type = branch_type(from, to, cpuc->lbr_entries[i].abort);
1425         if (type != X86_BR_NONE && (br_sel & X86_BR_ANYTX)) {
1426             if (cpuc->lbr_entries[i].in_tx)
1427                 type |= X86_BR_IN_TX;
1428             else
1429                 type |= X86_BR_NO_TX;
1430         }
1431 
1432         /* if type does not correspond, then discard */
1433         if (type == X86_BR_NONE || (br_sel & type) != type) {
1434             cpuc->lbr_entries[i].from = 0;
1435             compress = true;
1436         }
1437 
1438         if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE)
1439             cpuc->lbr_entries[i].type = common_branch_type(type);
1440     }
1441 
1442     if (!compress)
1443         return;
1444 
1445     /* remove all entries with from=0 */
1446     for (i = 0; i < cpuc->lbr_stack.nr; ) {
1447         if (!cpuc->lbr_entries[i].from) {
1448             j = i;
1449             while (++j < cpuc->lbr_stack.nr)
1450                 cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j];
1451             cpuc->lbr_stack.nr--;
1452             if (!cpuc->lbr_entries[i].from)
1453                 continue;
1454         }
1455         i++;
1456     }
1457 }
1458 
1459 void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr)
1460 {
1461     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1462 
1463     /* Cannot get TOS for large PEBS and Arch LBR */
1464     if (static_cpu_has(X86_FEATURE_ARCH_LBR) ||
1465         (cpuc->n_pebs == cpuc->n_large_pebs))
1466         cpuc->lbr_stack.hw_idx = -1ULL;
1467     else
1468         cpuc->lbr_stack.hw_idx = intel_pmu_lbr_tos();
1469 
1470     intel_pmu_store_lbr(cpuc, lbr);
1471     intel_pmu_lbr_filter(cpuc);
1472 }
1473 
1474 /*
1475  * Map interface branch filters onto LBR filters
1476  */
1477 static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
1478     [PERF_SAMPLE_BRANCH_ANY_SHIFT]      = LBR_ANY,
1479     [PERF_SAMPLE_BRANCH_USER_SHIFT]     = LBR_USER,
1480     [PERF_SAMPLE_BRANCH_KERNEL_SHIFT]   = LBR_KERNEL,
1481     [PERF_SAMPLE_BRANCH_HV_SHIFT]       = LBR_IGN,
1482     [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]   = LBR_RETURN | LBR_REL_JMP
1483                         | LBR_IND_JMP | LBR_FAR,
1484     /*
1485      * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches
1486      */
1487     [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] =
1488      LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR,
1489     /*
1490      * NHM/WSM erratum: must include IND_JMP to capture IND_CALL
1491      */
1492     [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL | LBR_IND_JMP,
1493     [PERF_SAMPLE_BRANCH_COND_SHIFT]     = LBR_JCC,
1494     [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
1495 };
1496 
1497 static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
1498     [PERF_SAMPLE_BRANCH_ANY_SHIFT]      = LBR_ANY,
1499     [PERF_SAMPLE_BRANCH_USER_SHIFT]     = LBR_USER,
1500     [PERF_SAMPLE_BRANCH_KERNEL_SHIFT]   = LBR_KERNEL,
1501     [PERF_SAMPLE_BRANCH_HV_SHIFT]       = LBR_IGN,
1502     [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]   = LBR_RETURN | LBR_FAR,
1503     [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL
1504                         | LBR_FAR,
1505     [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL,
1506     [PERF_SAMPLE_BRANCH_COND_SHIFT]     = LBR_JCC,
1507     [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
1508     [PERF_SAMPLE_BRANCH_CALL_SHIFT]     = LBR_REL_CALL,
1509 };
1510 
1511 static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
1512     [PERF_SAMPLE_BRANCH_ANY_SHIFT]      = LBR_ANY,
1513     [PERF_SAMPLE_BRANCH_USER_SHIFT]     = LBR_USER,
1514     [PERF_SAMPLE_BRANCH_KERNEL_SHIFT]   = LBR_KERNEL,
1515     [PERF_SAMPLE_BRANCH_HV_SHIFT]       = LBR_IGN,
1516     [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]   = LBR_RETURN | LBR_FAR,
1517     [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL
1518                         | LBR_FAR,
1519     [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL,
1520     [PERF_SAMPLE_BRANCH_COND_SHIFT]     = LBR_JCC,
1521     [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT]   = LBR_REL_CALL | LBR_IND_CALL
1522                         | LBR_RETURN | LBR_CALL_STACK,
1523     [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
1524     [PERF_SAMPLE_BRANCH_CALL_SHIFT]     = LBR_REL_CALL,
1525 };
1526 
1527 static int arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
1528     [PERF_SAMPLE_BRANCH_ANY_SHIFT]      = ARCH_LBR_ANY,
1529     [PERF_SAMPLE_BRANCH_USER_SHIFT]     = ARCH_LBR_USER,
1530     [PERF_SAMPLE_BRANCH_KERNEL_SHIFT]   = ARCH_LBR_KERNEL,
1531     [PERF_SAMPLE_BRANCH_HV_SHIFT]       = LBR_IGN,
1532     [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]   = ARCH_LBR_RETURN |
1533                           ARCH_LBR_OTHER_BRANCH,
1534     [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT]     = ARCH_LBR_REL_CALL |
1535                           ARCH_LBR_IND_CALL |
1536                           ARCH_LBR_OTHER_BRANCH,
1537     [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT]     = ARCH_LBR_IND_CALL,
1538     [PERF_SAMPLE_BRANCH_COND_SHIFT]         = ARCH_LBR_JCC,
1539     [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT]   = ARCH_LBR_REL_CALL |
1540                           ARCH_LBR_IND_CALL |
1541                           ARCH_LBR_RETURN |
1542                           ARCH_LBR_CALL_STACK,
1543     [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = ARCH_LBR_IND_JMP,
1544     [PERF_SAMPLE_BRANCH_CALL_SHIFT]     = ARCH_LBR_REL_CALL,
1545 };
1546 
1547 /* core */
1548 void __init intel_pmu_lbr_init_core(void)
1549 {
1550     x86_pmu.lbr_nr     = 4;
1551     x86_pmu.lbr_tos    = MSR_LBR_TOS;
1552     x86_pmu.lbr_from   = MSR_LBR_CORE_FROM;
1553     x86_pmu.lbr_to     = MSR_LBR_CORE_TO;
1554 
1555     /*
1556      * SW branch filter usage:
1557      * - compensate for lack of HW filter
1558      */
1559 }
1560 
1561 /* nehalem/westmere */
1562 void __init intel_pmu_lbr_init_nhm(void)
1563 {
1564     x86_pmu.lbr_nr     = 16;
1565     x86_pmu.lbr_tos    = MSR_LBR_TOS;
1566     x86_pmu.lbr_from   = MSR_LBR_NHM_FROM;
1567     x86_pmu.lbr_to     = MSR_LBR_NHM_TO;
1568 
1569     x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1570     x86_pmu.lbr_sel_map  = nhm_lbr_sel_map;
1571 
1572     /*
1573      * SW branch filter usage:
1574      * - workaround LBR_SEL errata (see above)
1575      * - support syscall, sysret capture.
1576      *   That requires LBR_FAR but that means far
1577      *   jmp need to be filtered out
1578      */
1579 }
1580 
1581 /* sandy bridge */
1582 void __init intel_pmu_lbr_init_snb(void)
1583 {
1584     x86_pmu.lbr_nr   = 16;
1585     x86_pmu.lbr_tos  = MSR_LBR_TOS;
1586     x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
1587     x86_pmu.lbr_to   = MSR_LBR_NHM_TO;
1588 
1589     x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1590     x86_pmu.lbr_sel_map  = snb_lbr_sel_map;
1591 
1592     /*
1593      * SW branch filter usage:
1594      * - support syscall, sysret capture.
1595      *   That requires LBR_FAR but that means far
1596      *   jmp need to be filtered out
1597      */
1598 }
1599 
1600 static inline struct kmem_cache *
1601 create_lbr_kmem_cache(size_t size, size_t align)
1602 {
1603     return kmem_cache_create("x86_lbr", size, align, 0, NULL);
1604 }
1605 
1606 /* haswell */
1607 void intel_pmu_lbr_init_hsw(void)
1608 {
1609     size_t size = sizeof(struct x86_perf_task_context);
1610 
1611     x86_pmu.lbr_nr   = 16;
1612     x86_pmu.lbr_tos  = MSR_LBR_TOS;
1613     x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
1614     x86_pmu.lbr_to   = MSR_LBR_NHM_TO;
1615 
1616     x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1617     x86_pmu.lbr_sel_map  = hsw_lbr_sel_map;
1618 
1619     x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0);
1620 }
1621 
1622 /* skylake */
1623 __init void intel_pmu_lbr_init_skl(void)
1624 {
1625     size_t size = sizeof(struct x86_perf_task_context);
1626 
1627     x86_pmu.lbr_nr   = 32;
1628     x86_pmu.lbr_tos  = MSR_LBR_TOS;
1629     x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
1630     x86_pmu.lbr_to   = MSR_LBR_NHM_TO;
1631     x86_pmu.lbr_info = MSR_LBR_INFO_0;
1632 
1633     x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1634     x86_pmu.lbr_sel_map  = hsw_lbr_sel_map;
1635 
1636     x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0);
1637 
1638     /*
1639      * SW branch filter usage:
1640      * - support syscall, sysret capture.
1641      *   That requires LBR_FAR but that means far
1642      *   jmp need to be filtered out
1643      */
1644 }
1645 
1646 /* atom */
1647 void __init intel_pmu_lbr_init_atom(void)
1648 {
1649     /*
1650      * only models starting at stepping 10 seems
1651      * to have an operational LBR which can freeze
1652      * on PMU interrupt
1653      */
1654     if (boot_cpu_data.x86_model == 28
1655         && boot_cpu_data.x86_stepping < 10) {
1656         pr_cont("LBR disabled due to erratum");
1657         return;
1658     }
1659 
1660     x86_pmu.lbr_nr     = 8;
1661     x86_pmu.lbr_tos    = MSR_LBR_TOS;
1662     x86_pmu.lbr_from   = MSR_LBR_CORE_FROM;
1663     x86_pmu.lbr_to     = MSR_LBR_CORE_TO;
1664 
1665     /*
1666      * SW branch filter usage:
1667      * - compensate for lack of HW filter
1668      */
1669 }
1670 
1671 /* slm */
1672 void __init intel_pmu_lbr_init_slm(void)
1673 {
1674     x86_pmu.lbr_nr     = 8;
1675     x86_pmu.lbr_tos    = MSR_LBR_TOS;
1676     x86_pmu.lbr_from   = MSR_LBR_CORE_FROM;
1677     x86_pmu.lbr_to     = MSR_LBR_CORE_TO;
1678 
1679     x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1680     x86_pmu.lbr_sel_map  = nhm_lbr_sel_map;
1681 
1682     /*
1683      * SW branch filter usage:
1684      * - compensate for lack of HW filter
1685      */
1686     pr_cont("8-deep LBR, ");
1687 }
1688 
1689 /* Knights Landing */
1690 void intel_pmu_lbr_init_knl(void)
1691 {
1692     x86_pmu.lbr_nr     = 8;
1693     x86_pmu.lbr_tos    = MSR_LBR_TOS;
1694     x86_pmu.lbr_from   = MSR_LBR_NHM_FROM;
1695     x86_pmu.lbr_to     = MSR_LBR_NHM_TO;
1696 
1697     x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1698     x86_pmu.lbr_sel_map  = snb_lbr_sel_map;
1699 
1700     /* Knights Landing does have MISPREDICT bit */
1701     if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_LIP)
1702         x86_pmu.intel_cap.lbr_format = LBR_FORMAT_EIP_FLAGS;
1703 }
1704 
1705 void intel_pmu_lbr_init(void)
1706 {
1707     switch (x86_pmu.intel_cap.lbr_format) {
1708     case LBR_FORMAT_EIP_FLAGS2:
1709         x86_pmu.lbr_has_tsx = 1;
1710         x86_pmu.lbr_from_flags = 1;
1711         if (lbr_from_signext_quirk_needed())
1712             static_branch_enable(&lbr_from_quirk_key);
1713         break;
1714 
1715     case LBR_FORMAT_EIP_FLAGS:
1716         x86_pmu.lbr_from_flags = 1;
1717         break;
1718 
1719     case LBR_FORMAT_INFO:
1720         x86_pmu.lbr_has_tsx = 1;
1721         fallthrough;
1722     case LBR_FORMAT_INFO2:
1723         x86_pmu.lbr_has_info = 1;
1724         break;
1725 
1726     case LBR_FORMAT_TIME:
1727         x86_pmu.lbr_from_flags = 1;
1728         x86_pmu.lbr_to_cycles = 1;
1729         break;
1730     }
1731 
1732     if (x86_pmu.lbr_has_info) {
1733         /*
1734          * Only used in combination with baseline pebs.
1735          */
1736         static_branch_enable(&x86_lbr_mispred);
1737         static_branch_enable(&x86_lbr_cycles);
1738     }
1739 }
1740 
1741 /*
1742  * LBR state size is variable based on the max number of registers.
1743  * This calculates the expected state size, which should match
1744  * what the hardware enumerates for the size of XFEATURE_LBR.
1745  */
1746 static inline unsigned int get_lbr_state_size(void)
1747 {
1748     return sizeof(struct arch_lbr_state) +
1749            x86_pmu.lbr_nr * sizeof(struct lbr_entry);
1750 }
1751 
1752 static bool is_arch_lbr_xsave_available(void)
1753 {
1754     if (!boot_cpu_has(X86_FEATURE_XSAVES))
1755         return false;
1756 
1757     /*
1758      * Check the LBR state with the corresponding software structure.
1759      * Disable LBR XSAVES support if the size doesn't match.
1760      */
1761     if (xfeature_size(XFEATURE_LBR) == 0)
1762         return false;
1763 
1764     if (WARN_ON(xfeature_size(XFEATURE_LBR) != get_lbr_state_size()))
1765         return false;
1766 
1767     return true;
1768 }
1769 
1770 void __init intel_pmu_arch_lbr_init(void)
1771 {
1772     struct pmu *pmu = x86_get_pmu(smp_processor_id());
1773     union cpuid28_eax eax;
1774     union cpuid28_ebx ebx;
1775     union cpuid28_ecx ecx;
1776     unsigned int unused_edx;
1777     bool arch_lbr_xsave;
1778     size_t size;
1779     u64 lbr_nr;
1780 
1781     /* Arch LBR Capabilities */
1782     cpuid(28, &eax.full, &ebx.full, &ecx.full, &unused_edx);
1783 
1784     lbr_nr = fls(eax.split.lbr_depth_mask) * 8;
1785     if (!lbr_nr)
1786         goto clear_arch_lbr;
1787 
1788     /* Apply the max depth of Arch LBR */
1789     if (wrmsrl_safe(MSR_ARCH_LBR_DEPTH, lbr_nr))
1790         goto clear_arch_lbr;
1791 
1792     x86_pmu.lbr_depth_mask = eax.split.lbr_depth_mask;
1793     x86_pmu.lbr_deep_c_reset = eax.split.lbr_deep_c_reset;
1794     x86_pmu.lbr_lip = eax.split.lbr_lip;
1795     x86_pmu.lbr_cpl = ebx.split.lbr_cpl;
1796     x86_pmu.lbr_filter = ebx.split.lbr_filter;
1797     x86_pmu.lbr_call_stack = ebx.split.lbr_call_stack;
1798     x86_pmu.lbr_mispred = ecx.split.lbr_mispred;
1799     x86_pmu.lbr_timed_lbr = ecx.split.lbr_timed_lbr;
1800     x86_pmu.lbr_br_type = ecx.split.lbr_br_type;
1801     x86_pmu.lbr_nr = lbr_nr;
1802 
1803     if (x86_pmu.lbr_mispred)
1804         static_branch_enable(&x86_lbr_mispred);
1805     if (x86_pmu.lbr_timed_lbr)
1806         static_branch_enable(&x86_lbr_cycles);
1807     if (x86_pmu.lbr_br_type)
1808         static_branch_enable(&x86_lbr_type);
1809 
1810     arch_lbr_xsave = is_arch_lbr_xsave_available();
1811     if (arch_lbr_xsave) {
1812         size = sizeof(struct x86_perf_task_context_arch_lbr_xsave) +
1813                get_lbr_state_size();
1814         pmu->task_ctx_cache = create_lbr_kmem_cache(size,
1815                                 XSAVE_ALIGNMENT);
1816     }
1817 
1818     if (!pmu->task_ctx_cache) {
1819         arch_lbr_xsave = false;
1820 
1821         size = sizeof(struct x86_perf_task_context_arch_lbr) +
1822                lbr_nr * sizeof(struct lbr_entry);
1823         pmu->task_ctx_cache = create_lbr_kmem_cache(size, 0);
1824     }
1825 
1826     x86_pmu.lbr_from = MSR_ARCH_LBR_FROM_0;
1827     x86_pmu.lbr_to = MSR_ARCH_LBR_TO_0;
1828     x86_pmu.lbr_info = MSR_ARCH_LBR_INFO_0;
1829 
1830     /* LBR callstack requires both CPL and Branch Filtering support */
1831     if (!x86_pmu.lbr_cpl ||
1832         !x86_pmu.lbr_filter ||
1833         !x86_pmu.lbr_call_stack)
1834         arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_NOT_SUPP;
1835 
1836     if (!x86_pmu.lbr_cpl) {
1837         arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_NOT_SUPP;
1838         arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_NOT_SUPP;
1839     } else if (!x86_pmu.lbr_filter) {
1840         arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_NOT_SUPP;
1841         arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_NOT_SUPP;
1842         arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_NOT_SUPP;
1843         arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_NOT_SUPP;
1844         arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_NOT_SUPP;
1845         arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_NOT_SUPP;
1846         arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_NOT_SUPP;
1847     }
1848 
1849     x86_pmu.lbr_ctl_mask = ARCH_LBR_CTL_MASK;
1850     x86_pmu.lbr_ctl_map  = arch_lbr_ctl_map;
1851 
1852     if (!x86_pmu.lbr_cpl && !x86_pmu.lbr_filter)
1853         x86_pmu.lbr_ctl_map = NULL;
1854 
1855     x86_pmu.lbr_reset = intel_pmu_arch_lbr_reset;
1856     if (arch_lbr_xsave) {
1857         x86_pmu.lbr_save = intel_pmu_arch_lbr_xsaves;
1858         x86_pmu.lbr_restore = intel_pmu_arch_lbr_xrstors;
1859         x86_pmu.lbr_read = intel_pmu_arch_lbr_read_xsave;
1860         pr_cont("XSAVE ");
1861     } else {
1862         x86_pmu.lbr_save = intel_pmu_arch_lbr_save;
1863         x86_pmu.lbr_restore = intel_pmu_arch_lbr_restore;
1864         x86_pmu.lbr_read = intel_pmu_arch_lbr_read;
1865     }
1866 
1867     pr_cont("Architectural LBR, ");
1868 
1869     return;
1870 
1871 clear_arch_lbr:
1872     clear_cpu_cap(&boot_cpu_data, X86_FEATURE_ARCH_LBR);
1873 }
1874 
1875 /**
1876  * x86_perf_get_lbr - get the LBR records information
1877  *
1878  * @lbr: the caller's memory to store the LBR records information
1879  *
1880  * Returns: 0 indicates the LBR info has been successfully obtained
1881  */
1882 int x86_perf_get_lbr(struct x86_pmu_lbr *lbr)
1883 {
1884     int lbr_fmt = x86_pmu.intel_cap.lbr_format;
1885 
1886     lbr->nr = x86_pmu.lbr_nr;
1887     lbr->from = x86_pmu.lbr_from;
1888     lbr->to = x86_pmu.lbr_to;
1889     lbr->info = (lbr_fmt == LBR_FORMAT_INFO) ? x86_pmu.lbr_info : 0;
1890 
1891     return 0;
1892 }
1893 EXPORT_SYMBOL_GPL(x86_perf_get_lbr);
1894 
1895 struct event_constraint vlbr_constraint =
1896     __EVENT_CONSTRAINT(INTEL_FIXED_VLBR_EVENT, (1ULL << INTEL_PMC_IDX_FIXED_VLBR),
1897               FIXED_EVENT_FLAGS, 1, 0, PERF_X86_EVENT_LBR_SELECT);