Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 #include <linux/bitops.h>
0003 #include <linux/types.h>
0004 #include <linux/slab.h>
0005 
0006 #include <asm/cpu_entry_area.h>
0007 #include <asm/perf_event.h>
0008 #include <asm/tlbflush.h>
0009 #include <asm/insn.h>
0010 #include <asm/io.h>
0011 
0012 #include "../perf_event.h"
0013 
0014 /* Waste a full page so it can be mapped into the cpu_entry_area */
0015 DEFINE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store);
0016 
0017 /* The size of a BTS record in bytes: */
0018 #define BTS_RECORD_SIZE     24
0019 
0020 #define PEBS_FIXUP_SIZE     PAGE_SIZE
0021 
0022 /*
0023  * pebs_record_32 for p4 and core not supported
0024 
0025 struct pebs_record_32 {
0026     u32 flags, ip;
0027     u32 ax, bc, cx, dx;
0028     u32 si, di, bp, sp;
0029 };
0030 
0031  */
0032 
0033 union intel_x86_pebs_dse {
0034     u64 val;
0035     struct {
0036         unsigned int ld_dse:4;
0037         unsigned int ld_stlb_miss:1;
0038         unsigned int ld_locked:1;
0039         unsigned int ld_data_blk:1;
0040         unsigned int ld_addr_blk:1;
0041         unsigned int ld_reserved:24;
0042     };
0043     struct {
0044         unsigned int st_l1d_hit:1;
0045         unsigned int st_reserved1:3;
0046         unsigned int st_stlb_miss:1;
0047         unsigned int st_locked:1;
0048         unsigned int st_reserved2:26;
0049     };
0050     struct {
0051         unsigned int st_lat_dse:4;
0052         unsigned int st_lat_stlb_miss:1;
0053         unsigned int st_lat_locked:1;
0054         unsigned int ld_reserved3:26;
0055     };
0056 };
0057 
0058 
0059 /*
0060  * Map PEBS Load Latency Data Source encodings to generic
0061  * memory data source information
0062  */
0063 #define P(a, b) PERF_MEM_S(a, b)
0064 #define OP_LH (P(OP, LOAD) | P(LVL, HIT))
0065 #define LEVEL(x) P(LVLNUM, x)
0066 #define REM P(REMOTE, REMOTE)
0067 #define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
0068 
0069 /* Version for Sandy Bridge and later */
0070 static u64 pebs_data_source[] = {
0071     P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
0072     OP_LH | P(LVL, L1)  | LEVEL(L1) | P(SNOOP, NONE),  /* 0x01: L1 local */
0073     OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */
0074     OP_LH | P(LVL, L2)  | LEVEL(L2) | P(SNOOP, NONE),  /* 0x03: L2 hit */
0075     OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, NONE),  /* 0x04: L3 hit */
0076     OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, MISS),  /* 0x05: L3 hit, snoop miss */
0077     OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, HIT),   /* 0x06: L3 hit, snoop hit */
0078     OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, HITM),  /* 0x07: L3 hit, snoop hitm */
0079     OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HIT),  /* 0x08: L3 miss snoop hit */
0080     OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/
0081     OP_LH | P(LVL, LOC_RAM)  | LEVEL(RAM) | P(SNOOP, HIT),       /* 0x0a: L3 miss, shared */
0082     OP_LH | P(LVL, REM_RAM1) | REM | LEVEL(L3) | P(SNOOP, HIT),  /* 0x0b: L3 miss, shared */
0083     OP_LH | P(LVL, LOC_RAM)  | LEVEL(RAM) | SNOOP_NONE_MISS,     /* 0x0c: L3 miss, excl */
0084     OP_LH | P(LVL, REM_RAM1) | LEVEL(RAM) | REM | SNOOP_NONE_MISS, /* 0x0d: L3 miss, excl */
0085     OP_LH | P(LVL, IO)  | LEVEL(NA) | P(SNOOP, NONE), /* 0x0e: I/O */
0086     OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* 0x0f: uncached */
0087 };
0088 
0089 /* Patch up minor differences in the bits */
0090 void __init intel_pmu_pebs_data_source_nhm(void)
0091 {
0092     pebs_data_source[0x05] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT);
0093     pebs_data_source[0x06] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
0094     pebs_data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
0095 }
0096 
0097 static void __init __intel_pmu_pebs_data_source_skl(bool pmem, u64 *data_source)
0098 {
0099     u64 pmem_or_l4 = pmem ? LEVEL(PMEM) : LEVEL(L4);
0100 
0101     data_source[0x08] = OP_LH | pmem_or_l4 | P(SNOOP, HIT);
0102     data_source[0x09] = OP_LH | pmem_or_l4 | REM | P(SNOOP, HIT);
0103     data_source[0x0b] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE);
0104     data_source[0x0c] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOPX, FWD);
0105     data_source[0x0d] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOP, HITM);
0106 }
0107 
0108 void __init intel_pmu_pebs_data_source_skl(bool pmem)
0109 {
0110     __intel_pmu_pebs_data_source_skl(pmem, pebs_data_source);
0111 }
0112 
0113 static void __init __intel_pmu_pebs_data_source_grt(u64 *data_source)
0114 {
0115     data_source[0x05] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT);
0116     data_source[0x06] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
0117     data_source[0x08] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD);
0118 }
0119 
0120 void __init intel_pmu_pebs_data_source_grt(void)
0121 {
0122     __intel_pmu_pebs_data_source_grt(pebs_data_source);
0123 }
0124 
0125 void __init intel_pmu_pebs_data_source_adl(void)
0126 {
0127     u64 *data_source;
0128 
0129     data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].pebs_data_source;
0130     memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
0131     __intel_pmu_pebs_data_source_skl(false, data_source);
0132 
0133     data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source;
0134     memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
0135     __intel_pmu_pebs_data_source_grt(data_source);
0136 }
0137 
0138 static u64 precise_store_data(u64 status)
0139 {
0140     union intel_x86_pebs_dse dse;
0141     u64 val = P(OP, STORE) | P(SNOOP, NA) | P(LVL, L1) | P(TLB, L2);
0142 
0143     dse.val = status;
0144 
0145     /*
0146      * bit 4: TLB access
0147      * 1 = stored missed 2nd level TLB
0148      *
0149      * so it either hit the walker or the OS
0150      * otherwise hit 2nd level TLB
0151      */
0152     if (dse.st_stlb_miss)
0153         val |= P(TLB, MISS);
0154     else
0155         val |= P(TLB, HIT);
0156 
0157     /*
0158      * bit 0: hit L1 data cache
0159      * if not set, then all we know is that
0160      * it missed L1D
0161      */
0162     if (dse.st_l1d_hit)
0163         val |= P(LVL, HIT);
0164     else
0165         val |= P(LVL, MISS);
0166 
0167     /*
0168      * bit 5: Locked prefix
0169      */
0170     if (dse.st_locked)
0171         val |= P(LOCK, LOCKED);
0172 
0173     return val;
0174 }
0175 
0176 static u64 precise_datala_hsw(struct perf_event *event, u64 status)
0177 {
0178     union perf_mem_data_src dse;
0179 
0180     dse.val = PERF_MEM_NA;
0181 
0182     if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
0183         dse.mem_op = PERF_MEM_OP_STORE;
0184     else if (event->hw.flags & PERF_X86_EVENT_PEBS_LD_HSW)
0185         dse.mem_op = PERF_MEM_OP_LOAD;
0186 
0187     /*
0188      * L1 info only valid for following events:
0189      *
0190      * MEM_UOPS_RETIRED.STLB_MISS_STORES
0191      * MEM_UOPS_RETIRED.LOCK_STORES
0192      * MEM_UOPS_RETIRED.SPLIT_STORES
0193      * MEM_UOPS_RETIRED.ALL_STORES
0194      */
0195     if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) {
0196         if (status & 1)
0197             dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
0198         else
0199             dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS;
0200     }
0201     return dse.val;
0202 }
0203 
0204 static inline void pebs_set_tlb_lock(u64 *val, bool tlb, bool lock)
0205 {
0206     /*
0207      * TLB access
0208      * 0 = did not miss 2nd level TLB
0209      * 1 = missed 2nd level TLB
0210      */
0211     if (tlb)
0212         *val |= P(TLB, MISS) | P(TLB, L2);
0213     else
0214         *val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
0215 
0216     /* locked prefix */
0217     if (lock)
0218         *val |= P(LOCK, LOCKED);
0219 }
0220 
0221 /* Retrieve the latency data for e-core of ADL */
0222 u64 adl_latency_data_small(struct perf_event *event, u64 status)
0223 {
0224     union intel_x86_pebs_dse dse;
0225     u64 val;
0226 
0227     WARN_ON_ONCE(hybrid_pmu(event->pmu)->cpu_type == hybrid_big);
0228 
0229     dse.val = status;
0230 
0231     val = hybrid_var(event->pmu, pebs_data_source)[dse.ld_dse];
0232 
0233     /*
0234      * For the atom core on ADL,
0235      * bit 4: lock, bit 5: TLB access.
0236      */
0237     pebs_set_tlb_lock(&val, dse.ld_locked, dse.ld_stlb_miss);
0238 
0239     if (dse.ld_data_blk)
0240         val |= P(BLK, DATA);
0241     else
0242         val |= P(BLK, NA);
0243 
0244     return val;
0245 }
0246 
0247 static u64 load_latency_data(struct perf_event *event, u64 status)
0248 {
0249     union intel_x86_pebs_dse dse;
0250     u64 val;
0251 
0252     dse.val = status;
0253 
0254     /*
0255      * use the mapping table for bit 0-3
0256      */
0257     val = hybrid_var(event->pmu, pebs_data_source)[dse.ld_dse];
0258 
0259     /*
0260      * Nehalem models do not support TLB, Lock infos
0261      */
0262     if (x86_pmu.pebs_no_tlb) {
0263         val |= P(TLB, NA) | P(LOCK, NA);
0264         return val;
0265     }
0266 
0267     pebs_set_tlb_lock(&val, dse.ld_stlb_miss, dse.ld_locked);
0268 
0269     /*
0270      * Ice Lake and earlier models do not support block infos.
0271      */
0272     if (!x86_pmu.pebs_block) {
0273         val |= P(BLK, NA);
0274         return val;
0275     }
0276     /*
0277      * bit 6: load was blocked since its data could not be forwarded
0278      *        from a preceding store
0279      */
0280     if (dse.ld_data_blk)
0281         val |= P(BLK, DATA);
0282 
0283     /*
0284      * bit 7: load was blocked due to potential address conflict with
0285      *        a preceding store
0286      */
0287     if (dse.ld_addr_blk)
0288         val |= P(BLK, ADDR);
0289 
0290     if (!dse.ld_data_blk && !dse.ld_addr_blk)
0291         val |= P(BLK, NA);
0292 
0293     return val;
0294 }
0295 
0296 static u64 store_latency_data(struct perf_event *event, u64 status)
0297 {
0298     union intel_x86_pebs_dse dse;
0299     union perf_mem_data_src src;
0300     u64 val;
0301 
0302     dse.val = status;
0303 
0304     /*
0305      * use the mapping table for bit 0-3
0306      */
0307     val = hybrid_var(event->pmu, pebs_data_source)[dse.st_lat_dse];
0308 
0309     pebs_set_tlb_lock(&val, dse.st_lat_stlb_miss, dse.st_lat_locked);
0310 
0311     val |= P(BLK, NA);
0312 
0313     /*
0314      * the pebs_data_source table is only for loads
0315      * so override the mem_op to say STORE instead
0316      */
0317     src.val = val;
0318     src.mem_op = P(OP,STORE);
0319 
0320     return src.val;
0321 }
0322 
0323 struct pebs_record_core {
0324     u64 flags, ip;
0325     u64 ax, bx, cx, dx;
0326     u64 si, di, bp, sp;
0327     u64 r8,  r9,  r10, r11;
0328     u64 r12, r13, r14, r15;
0329 };
0330 
0331 struct pebs_record_nhm {
0332     u64 flags, ip;
0333     u64 ax, bx, cx, dx;
0334     u64 si, di, bp, sp;
0335     u64 r8,  r9,  r10, r11;
0336     u64 r12, r13, r14, r15;
0337     u64 status, dla, dse, lat;
0338 };
0339 
0340 /*
0341  * Same as pebs_record_nhm, with two additional fields.
0342  */
0343 struct pebs_record_hsw {
0344     u64 flags, ip;
0345     u64 ax, bx, cx, dx;
0346     u64 si, di, bp, sp;
0347     u64 r8,  r9,  r10, r11;
0348     u64 r12, r13, r14, r15;
0349     u64 status, dla, dse, lat;
0350     u64 real_ip, tsx_tuning;
0351 };
0352 
0353 union hsw_tsx_tuning {
0354     struct {
0355         u32 cycles_last_block     : 32,
0356             hle_abort         : 1,
0357             rtm_abort         : 1,
0358             instruction_abort     : 1,
0359             non_instruction_abort : 1,
0360             retry         : 1,
0361             data_conflict     : 1,
0362             capacity_writes   : 1,
0363             capacity_reads    : 1;
0364     };
0365     u64     value;
0366 };
0367 
0368 #define PEBS_HSW_TSX_FLAGS  0xff00000000ULL
0369 
0370 /* Same as HSW, plus TSC */
0371 
0372 struct pebs_record_skl {
0373     u64 flags, ip;
0374     u64 ax, bx, cx, dx;
0375     u64 si, di, bp, sp;
0376     u64 r8,  r9,  r10, r11;
0377     u64 r12, r13, r14, r15;
0378     u64 status, dla, dse, lat;
0379     u64 real_ip, tsx_tuning;
0380     u64 tsc;
0381 };
0382 
0383 void init_debug_store_on_cpu(int cpu)
0384 {
0385     struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
0386 
0387     if (!ds)
0388         return;
0389 
0390     wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
0391              (u32)((u64)(unsigned long)ds),
0392              (u32)((u64)(unsigned long)ds >> 32));
0393 }
0394 
0395 void fini_debug_store_on_cpu(int cpu)
0396 {
0397     if (!per_cpu(cpu_hw_events, cpu).ds)
0398         return;
0399 
0400     wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
0401 }
0402 
0403 static DEFINE_PER_CPU(void *, insn_buffer);
0404 
0405 static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot)
0406 {
0407     unsigned long start = (unsigned long)cea;
0408     phys_addr_t pa;
0409     size_t msz = 0;
0410 
0411     pa = virt_to_phys(addr);
0412 
0413     preempt_disable();
0414     for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE)
0415         cea_set_pte(cea, pa, prot);
0416 
0417     /*
0418      * This is a cross-CPU update of the cpu_entry_area, we must shoot down
0419      * all TLB entries for it.
0420      */
0421     flush_tlb_kernel_range(start, start + size);
0422     preempt_enable();
0423 }
0424 
0425 static void ds_clear_cea(void *cea, size_t size)
0426 {
0427     unsigned long start = (unsigned long)cea;
0428     size_t msz = 0;
0429 
0430     preempt_disable();
0431     for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE)
0432         cea_set_pte(cea, 0, PAGE_NONE);
0433 
0434     flush_tlb_kernel_range(start, start + size);
0435     preempt_enable();
0436 }
0437 
0438 static void *dsalloc_pages(size_t size, gfp_t flags, int cpu)
0439 {
0440     unsigned int order = get_order(size);
0441     int node = cpu_to_node(cpu);
0442     struct page *page;
0443 
0444     page = __alloc_pages_node(node, flags | __GFP_ZERO, order);
0445     return page ? page_address(page) : NULL;
0446 }
0447 
0448 static void dsfree_pages(const void *buffer, size_t size)
0449 {
0450     if (buffer)
0451         free_pages((unsigned long)buffer, get_order(size));
0452 }
0453 
0454 static int alloc_pebs_buffer(int cpu)
0455 {
0456     struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
0457     struct debug_store *ds = hwev->ds;
0458     size_t bsiz = x86_pmu.pebs_buffer_size;
0459     int max, node = cpu_to_node(cpu);
0460     void *buffer, *insn_buff, *cea;
0461 
0462     if (!x86_pmu.pebs)
0463         return 0;
0464 
0465     buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu);
0466     if (unlikely(!buffer))
0467         return -ENOMEM;
0468 
0469     /*
0470      * HSW+ already provides us the eventing ip; no need to allocate this
0471      * buffer then.
0472      */
0473     if (x86_pmu.intel_cap.pebs_format < 2) {
0474         insn_buff = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
0475         if (!insn_buff) {
0476             dsfree_pages(buffer, bsiz);
0477             return -ENOMEM;
0478         }
0479         per_cpu(insn_buffer, cpu) = insn_buff;
0480     }
0481     hwev->ds_pebs_vaddr = buffer;
0482     /* Update the cpu entry area mapping */
0483     cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
0484     ds->pebs_buffer_base = (unsigned long) cea;
0485     ds_update_cea(cea, buffer, bsiz, PAGE_KERNEL);
0486     ds->pebs_index = ds->pebs_buffer_base;
0487     max = x86_pmu.pebs_record_size * (bsiz / x86_pmu.pebs_record_size);
0488     ds->pebs_absolute_maximum = ds->pebs_buffer_base + max;
0489     return 0;
0490 }
0491 
0492 static void release_pebs_buffer(int cpu)
0493 {
0494     struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
0495     void *cea;
0496 
0497     if (!x86_pmu.pebs)
0498         return;
0499 
0500     kfree(per_cpu(insn_buffer, cpu));
0501     per_cpu(insn_buffer, cpu) = NULL;
0502 
0503     /* Clear the fixmap */
0504     cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
0505     ds_clear_cea(cea, x86_pmu.pebs_buffer_size);
0506     dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size);
0507     hwev->ds_pebs_vaddr = NULL;
0508 }
0509 
0510 static int alloc_bts_buffer(int cpu)
0511 {
0512     struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
0513     struct debug_store *ds = hwev->ds;
0514     void *buffer, *cea;
0515     int max;
0516 
0517     if (!x86_pmu.bts)
0518         return 0;
0519 
0520     buffer = dsalloc_pages(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, cpu);
0521     if (unlikely(!buffer)) {
0522         WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
0523         return -ENOMEM;
0524     }
0525     hwev->ds_bts_vaddr = buffer;
0526     /* Update the fixmap */
0527     cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
0528     ds->bts_buffer_base = (unsigned long) cea;
0529     ds_update_cea(cea, buffer, BTS_BUFFER_SIZE, PAGE_KERNEL);
0530     ds->bts_index = ds->bts_buffer_base;
0531     max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
0532     ds->bts_absolute_maximum = ds->bts_buffer_base +
0533                     max * BTS_RECORD_SIZE;
0534     ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
0535                     (max / 16) * BTS_RECORD_SIZE;
0536     return 0;
0537 }
0538 
0539 static void release_bts_buffer(int cpu)
0540 {
0541     struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
0542     void *cea;
0543 
0544     if (!x86_pmu.bts)
0545         return;
0546 
0547     /* Clear the fixmap */
0548     cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
0549     ds_clear_cea(cea, BTS_BUFFER_SIZE);
0550     dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE);
0551     hwev->ds_bts_vaddr = NULL;
0552 }
0553 
0554 static int alloc_ds_buffer(int cpu)
0555 {
0556     struct debug_store *ds = &get_cpu_entry_area(cpu)->cpu_debug_store;
0557 
0558     memset(ds, 0, sizeof(*ds));
0559     per_cpu(cpu_hw_events, cpu).ds = ds;
0560     return 0;
0561 }
0562 
0563 static void release_ds_buffer(int cpu)
0564 {
0565     per_cpu(cpu_hw_events, cpu).ds = NULL;
0566 }
0567 
0568 void release_ds_buffers(void)
0569 {
0570     int cpu;
0571 
0572     if (!x86_pmu.bts && !x86_pmu.pebs)
0573         return;
0574 
0575     for_each_possible_cpu(cpu)
0576         release_ds_buffer(cpu);
0577 
0578     for_each_possible_cpu(cpu) {
0579         /*
0580          * Again, ignore errors from offline CPUs, they will no longer
0581          * observe cpu_hw_events.ds and not program the DS_AREA when
0582          * they come up.
0583          */
0584         fini_debug_store_on_cpu(cpu);
0585     }
0586 
0587     for_each_possible_cpu(cpu) {
0588         release_pebs_buffer(cpu);
0589         release_bts_buffer(cpu);
0590     }
0591 }
0592 
0593 void reserve_ds_buffers(void)
0594 {
0595     int bts_err = 0, pebs_err = 0;
0596     int cpu;
0597 
0598     x86_pmu.bts_active = 0;
0599     x86_pmu.pebs_active = 0;
0600 
0601     if (!x86_pmu.bts && !x86_pmu.pebs)
0602         return;
0603 
0604     if (!x86_pmu.bts)
0605         bts_err = 1;
0606 
0607     if (!x86_pmu.pebs)
0608         pebs_err = 1;
0609 
0610     for_each_possible_cpu(cpu) {
0611         if (alloc_ds_buffer(cpu)) {
0612             bts_err = 1;
0613             pebs_err = 1;
0614         }
0615 
0616         if (!bts_err && alloc_bts_buffer(cpu))
0617             bts_err = 1;
0618 
0619         if (!pebs_err && alloc_pebs_buffer(cpu))
0620             pebs_err = 1;
0621 
0622         if (bts_err && pebs_err)
0623             break;
0624     }
0625 
0626     if (bts_err) {
0627         for_each_possible_cpu(cpu)
0628             release_bts_buffer(cpu);
0629     }
0630 
0631     if (pebs_err) {
0632         for_each_possible_cpu(cpu)
0633             release_pebs_buffer(cpu);
0634     }
0635 
0636     if (bts_err && pebs_err) {
0637         for_each_possible_cpu(cpu)
0638             release_ds_buffer(cpu);
0639     } else {
0640         if (x86_pmu.bts && !bts_err)
0641             x86_pmu.bts_active = 1;
0642 
0643         if (x86_pmu.pebs && !pebs_err)
0644             x86_pmu.pebs_active = 1;
0645 
0646         for_each_possible_cpu(cpu) {
0647             /*
0648              * Ignores wrmsr_on_cpu() errors for offline CPUs they
0649              * will get this call through intel_pmu_cpu_starting().
0650              */
0651             init_debug_store_on_cpu(cpu);
0652         }
0653     }
0654 }
0655 
0656 /*
0657  * BTS
0658  */
0659 
0660 struct event_constraint bts_constraint =
0661     EVENT_CONSTRAINT(0, 1ULL << INTEL_PMC_IDX_FIXED_BTS, 0);
0662 
0663 void intel_pmu_enable_bts(u64 config)
0664 {
0665     unsigned long debugctlmsr;
0666 
0667     debugctlmsr = get_debugctlmsr();
0668 
0669     debugctlmsr |= DEBUGCTLMSR_TR;
0670     debugctlmsr |= DEBUGCTLMSR_BTS;
0671     if (config & ARCH_PERFMON_EVENTSEL_INT)
0672         debugctlmsr |= DEBUGCTLMSR_BTINT;
0673 
0674     if (!(config & ARCH_PERFMON_EVENTSEL_OS))
0675         debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS;
0676 
0677     if (!(config & ARCH_PERFMON_EVENTSEL_USR))
0678         debugctlmsr |= DEBUGCTLMSR_BTS_OFF_USR;
0679 
0680     update_debugctlmsr(debugctlmsr);
0681 }
0682 
0683 void intel_pmu_disable_bts(void)
0684 {
0685     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
0686     unsigned long debugctlmsr;
0687 
0688     if (!cpuc->ds)
0689         return;
0690 
0691     debugctlmsr = get_debugctlmsr();
0692 
0693     debugctlmsr &=
0694         ~(DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT |
0695           DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR);
0696 
0697     update_debugctlmsr(debugctlmsr);
0698 }
0699 
0700 int intel_pmu_drain_bts_buffer(void)
0701 {
0702     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
0703     struct debug_store *ds = cpuc->ds;
0704     struct bts_record {
0705         u64 from;
0706         u64 to;
0707         u64 flags;
0708     };
0709     struct perf_event *event = cpuc->events[INTEL_PMC_IDX_FIXED_BTS];
0710     struct bts_record *at, *base, *top;
0711     struct perf_output_handle handle;
0712     struct perf_event_header header;
0713     struct perf_sample_data data;
0714     unsigned long skip = 0;
0715     struct pt_regs regs;
0716 
0717     if (!event)
0718         return 0;
0719 
0720     if (!x86_pmu.bts_active)
0721         return 0;
0722 
0723     base = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
0724     top  = (struct bts_record *)(unsigned long)ds->bts_index;
0725 
0726     if (top <= base)
0727         return 0;
0728 
0729     memset(&regs, 0, sizeof(regs));
0730 
0731     ds->bts_index = ds->bts_buffer_base;
0732 
0733     perf_sample_data_init(&data, 0, event->hw.last_period);
0734 
0735     /*
0736      * BTS leaks kernel addresses in branches across the cpl boundary,
0737      * such as traps or system calls, so unless the user is asking for
0738      * kernel tracing (and right now it's not possible), we'd need to
0739      * filter them out. But first we need to count how many of those we
0740      * have in the current batch. This is an extra O(n) pass, however,
0741      * it's much faster than the other one especially considering that
0742      * n <= 2560 (BTS_BUFFER_SIZE / BTS_RECORD_SIZE * 15/16; see the
0743      * alloc_bts_buffer()).
0744      */
0745     for (at = base; at < top; at++) {
0746         /*
0747          * Note that right now *this* BTS code only works if
0748          * attr::exclude_kernel is set, but let's keep this extra
0749          * check here in case that changes.
0750          */
0751         if (event->attr.exclude_kernel &&
0752             (kernel_ip(at->from) || kernel_ip(at->to)))
0753             skip++;
0754     }
0755 
0756     /*
0757      * Prepare a generic sample, i.e. fill in the invariant fields.
0758      * We will overwrite the from and to address before we output
0759      * the sample.
0760      */
0761     rcu_read_lock();
0762     perf_prepare_sample(&header, &data, event, &regs);
0763 
0764     if (perf_output_begin(&handle, &data, event,
0765                   header.size * (top - base - skip)))
0766         goto unlock;
0767 
0768     for (at = base; at < top; at++) {
0769         /* Filter out any records that contain kernel addresses. */
0770         if (event->attr.exclude_kernel &&
0771             (kernel_ip(at->from) || kernel_ip(at->to)))
0772             continue;
0773 
0774         data.ip     = at->from;
0775         data.addr   = at->to;
0776 
0777         perf_output_sample(&handle, &header, &data, event);
0778     }
0779 
0780     perf_output_end(&handle);
0781 
0782     /* There's new data available. */
0783     event->hw.interrupts++;
0784     event->pending_kill = POLL_IN;
0785 unlock:
0786     rcu_read_unlock();
0787     return 1;
0788 }
0789 
0790 static inline void intel_pmu_drain_pebs_buffer(void)
0791 {
0792     struct perf_sample_data data;
0793 
0794     x86_pmu.drain_pebs(NULL, &data);
0795 }
0796 
0797 /*
0798  * PEBS
0799  */
0800 struct event_constraint intel_core2_pebs_event_constraints[] = {
0801     INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
0802     INTEL_FLAGS_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
0803     INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
0804     INTEL_FLAGS_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
0805     INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED.* */
0806     /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
0807     INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x01),
0808     EVENT_CONSTRAINT_END
0809 };
0810 
0811 struct event_constraint intel_atom_pebs_event_constraints[] = {
0812     INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
0813     INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
0814     INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED.* */
0815     /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
0816     INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x01),
0817     /* Allow all events as PEBS with no flags */
0818     INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
0819     EVENT_CONSTRAINT_END
0820 };
0821 
0822 struct event_constraint intel_slm_pebs_event_constraints[] = {
0823     /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
0824     INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x1),
0825     /* Allow all events as PEBS with no flags */
0826     INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
0827     EVENT_CONSTRAINT_END
0828 };
0829 
0830 struct event_constraint intel_glm_pebs_event_constraints[] = {
0831     /* Allow all events as PEBS with no flags */
0832     INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
0833     EVENT_CONSTRAINT_END
0834 };
0835 
0836 struct event_constraint intel_grt_pebs_event_constraints[] = {
0837     /* Allow all events as PEBS with no flags */
0838     INTEL_HYBRID_LAT_CONSTRAINT(0x5d0, 0x3),
0839     INTEL_HYBRID_LAT_CONSTRAINT(0x6d0, 0xf),
0840     EVENT_CONSTRAINT_END
0841 };
0842 
0843 struct event_constraint intel_nehalem_pebs_event_constraints[] = {
0844     INTEL_PLD_CONSTRAINT(0x100b, 0xf),      /* MEM_INST_RETIRED.* */
0845     INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */
0846     INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
0847     INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf),    /* INST_RETIRED.ANY */
0848     INTEL_EVENT_CONSTRAINT(0xc2, 0xf),    /* UOPS_RETIRED.* */
0849     INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
0850     INTEL_FLAGS_UEVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */
0851     INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf),    /* SSEX_UOPS_RETIRED.* */
0852     INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
0853     INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf),    /* MEM_LOAD_RETIRED.* */
0854     INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf),    /* FP_ASSIST.* */
0855     /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
0856     INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
0857     EVENT_CONSTRAINT_END
0858 };
0859 
0860 struct event_constraint intel_westmere_pebs_event_constraints[] = {
0861     INTEL_PLD_CONSTRAINT(0x100b, 0xf),      /* MEM_INST_RETIRED.* */
0862     INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */
0863     INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
0864     INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf),    /* INSTR_RETIRED.* */
0865     INTEL_EVENT_CONSTRAINT(0xc2, 0xf),    /* UOPS_RETIRED.* */
0866     INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
0867     INTEL_FLAGS_EVENT_CONSTRAINT(0xc5, 0xf),    /* BR_MISP_RETIRED.* */
0868     INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf),    /* SSEX_UOPS_RETIRED.* */
0869     INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
0870     INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf),    /* MEM_LOAD_RETIRED.* */
0871     INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf),    /* FP_ASSIST.* */
0872     /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
0873     INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
0874     EVENT_CONSTRAINT_END
0875 };
0876 
0877 struct event_constraint intel_snb_pebs_event_constraints[] = {
0878     INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
0879     INTEL_PLD_CONSTRAINT(0x01cd, 0x8),    /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
0880     INTEL_PST_CONSTRAINT(0x02cd, 0x8),    /* MEM_TRANS_RETIRED.PRECISE_STORES */
0881     /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
0882     INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
0883         INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
0884         INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
0885         INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
0886         INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf),    /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
0887     /* Allow all events as PEBS with no flags */
0888     INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
0889     EVENT_CONSTRAINT_END
0890 };
0891 
0892 struct event_constraint intel_ivb_pebs_event_constraints[] = {
0893         INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
0894         INTEL_PLD_CONSTRAINT(0x01cd, 0x8),    /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
0895     INTEL_PST_CONSTRAINT(0x02cd, 0x8),    /* MEM_TRANS_RETIRED.PRECISE_STORES */
0896     /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
0897     INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
0898     /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
0899     INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
0900     INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
0901     INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
0902     INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
0903     INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf),    /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
0904     /* Allow all events as PEBS with no flags */
0905     INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
0906         EVENT_CONSTRAINT_END
0907 };
0908 
0909 struct event_constraint intel_hsw_pebs_event_constraints[] = {
0910     INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
0911     INTEL_PLD_CONSTRAINT(0x01cd, 0xf),    /* MEM_TRANS_RETIRED.* */
0912     /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
0913     INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
0914     /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
0915     INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
0916     INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
0917     INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
0918     INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
0919     INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
0920     INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
0921     INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
0922     INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
0923     INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
0924     INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
0925     INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd2, 0xf),    /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
0926     INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd3, 0xf),    /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
0927     /* Allow all events as PEBS with no flags */
0928     INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
0929     EVENT_CONSTRAINT_END
0930 };
0931 
0932 struct event_constraint intel_bdw_pebs_event_constraints[] = {
0933     INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
0934     INTEL_PLD_CONSTRAINT(0x01cd, 0xf),    /* MEM_TRANS_RETIRED.* */
0935     /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
0936     INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
0937     /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
0938     INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
0939     INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
0940     INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
0941     INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
0942     INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
0943     INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
0944     INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
0945     INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
0946     INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
0947     INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
0948     INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf),    /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
0949     INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf),    /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
0950     /* Allow all events as PEBS with no flags */
0951     INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
0952     EVENT_CONSTRAINT_END
0953 };
0954 
0955 
0956 struct event_constraint intel_skl_pebs_event_constraints[] = {
0957     INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x2),  /* INST_RETIRED.PREC_DIST */
0958     /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
0959     INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
0960     /* INST_RETIRED.TOTAL_CYCLES_PS (inv=1, cmask=16) (cycles:p). */
0961     INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
0962     INTEL_PLD_CONSTRAINT(0x1cd, 0xf),             /* MEM_TRANS_RETIRED.* */
0963     INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */
0964     INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */
0965     INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */
0966     INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x22d0, 0xf), /* MEM_INST_RETIRED.LOCK_STORES */
0967     INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */
0968     INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */
0969     INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */
0970     INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */
0971     INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf),    /* MEM_LOAD_RETIRED.* */
0972     INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf),    /* MEM_LOAD_L3_HIT_RETIRED.* */
0973     INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf),    /* MEM_LOAD_L3_MISS_RETIRED.* */
0974     /* Allow all events as PEBS with no flags */
0975     INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
0976     EVENT_CONSTRAINT_END
0977 };
0978 
0979 struct event_constraint intel_icl_pebs_event_constraints[] = {
0980     INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x100000000ULL),  /* old INST_RETIRED.PREC_DIST */
0981     INTEL_FLAGS_UEVENT_CONSTRAINT(0x0100, 0x100000000ULL),  /* INST_RETIRED.PREC_DIST */
0982     INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),  /* SLOTS */
0983 
0984     INTEL_PLD_CONSTRAINT(0x1cd, 0xff),          /* MEM_TRANS_RETIRED.LOAD_LATENCY */
0985     INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf),    /* MEM_INST_RETIRED.LOAD */
0986     INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x2d0, 0xf),    /* MEM_INST_RETIRED.STORE */
0987 
0988     INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf), /* MEM_LOAD_*_RETIRED.* */
0989 
0990     INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf),        /* MEM_INST_RETIRED.* */
0991 
0992     /*
0993      * Everything else is handled by PMU_FL_PEBS_ALL, because we
0994      * need the full constraints from the main table.
0995      */
0996 
0997     EVENT_CONSTRAINT_END
0998 };
0999 
1000 struct event_constraint intel_spr_pebs_event_constraints[] = {
1001     INTEL_FLAGS_UEVENT_CONSTRAINT(0x100, 0x100000000ULL),   /* INST_RETIRED.PREC_DIST */
1002     INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),
1003 
1004     INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xfe),
1005     INTEL_PLD_CONSTRAINT(0x1cd, 0xfe),
1006     INTEL_PSD_CONSTRAINT(0x2cd, 0x1),
1007     INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf),
1008     INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x2d0, 0xf),
1009 
1010     INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf),
1011 
1012     INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf),
1013 
1014     /*
1015      * Everything else is handled by PMU_FL_PEBS_ALL, because we
1016      * need the full constraints from the main table.
1017      */
1018 
1019     EVENT_CONSTRAINT_END
1020 };
1021 
1022 struct event_constraint *intel_pebs_constraints(struct perf_event *event)
1023 {
1024     struct event_constraint *pebs_constraints = hybrid(event->pmu, pebs_constraints);
1025     struct event_constraint *c;
1026 
1027     if (!event->attr.precise_ip)
1028         return NULL;
1029 
1030     if (pebs_constraints) {
1031         for_each_event_constraint(c, pebs_constraints) {
1032             if (constraint_match(c, event->hw.config)) {
1033                 event->hw.flags |= c->flags;
1034                 return c;
1035             }
1036         }
1037     }
1038 
1039     /*
1040      * Extended PEBS support
1041      * Makes the PEBS code search the normal constraints.
1042      */
1043     if (x86_pmu.flags & PMU_FL_PEBS_ALL)
1044         return NULL;
1045 
1046     return &emptyconstraint;
1047 }
1048 
1049 /*
1050  * We need the sched_task callback even for per-cpu events when we use
1051  * the large interrupt threshold, such that we can provide PID and TID
1052  * to PEBS samples.
1053  */
1054 static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc)
1055 {
1056     if (cpuc->n_pebs == cpuc->n_pebs_via_pt)
1057         return false;
1058 
1059     return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs);
1060 }
1061 
1062 void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in)
1063 {
1064     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1065 
1066     if (!sched_in && pebs_needs_sched_cb(cpuc))
1067         intel_pmu_drain_pebs_buffer();
1068 }
1069 
1070 static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
1071 {
1072     struct debug_store *ds = cpuc->ds;
1073     int max_pebs_events = hybrid(cpuc->pmu, max_pebs_events);
1074     int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed);
1075     u64 threshold;
1076     int reserved;
1077 
1078     if (cpuc->n_pebs_via_pt)
1079         return;
1080 
1081     if (x86_pmu.flags & PMU_FL_PEBS_ALL)
1082         reserved = max_pebs_events + num_counters_fixed;
1083     else
1084         reserved = max_pebs_events;
1085 
1086     if (cpuc->n_pebs == cpuc->n_large_pebs) {
1087         threshold = ds->pebs_absolute_maximum -
1088             reserved * cpuc->pebs_record_size;
1089     } else {
1090         threshold = ds->pebs_buffer_base + cpuc->pebs_record_size;
1091     }
1092 
1093     ds->pebs_interrupt_threshold = threshold;
1094 }
1095 
1096 static void adaptive_pebs_record_size_update(void)
1097 {
1098     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1099     u64 pebs_data_cfg = cpuc->pebs_data_cfg;
1100     int sz = sizeof(struct pebs_basic);
1101 
1102     if (pebs_data_cfg & PEBS_DATACFG_MEMINFO)
1103         sz += sizeof(struct pebs_meminfo);
1104     if (pebs_data_cfg & PEBS_DATACFG_GP)
1105         sz += sizeof(struct pebs_gprs);
1106     if (pebs_data_cfg & PEBS_DATACFG_XMMS)
1107         sz += sizeof(struct pebs_xmm);
1108     if (pebs_data_cfg & PEBS_DATACFG_LBRS)
1109         sz += x86_pmu.lbr_nr * sizeof(struct lbr_entry);
1110 
1111     cpuc->pebs_record_size = sz;
1112 }
1113 
1114 #define PERF_PEBS_MEMINFO_TYPE  (PERF_SAMPLE_ADDR | PERF_SAMPLE_DATA_SRC |   \
1115                 PERF_SAMPLE_PHYS_ADDR |              \
1116                 PERF_SAMPLE_WEIGHT_TYPE |            \
1117                 PERF_SAMPLE_TRANSACTION |            \
1118                 PERF_SAMPLE_DATA_PAGE_SIZE)
1119 
1120 static u64 pebs_update_adaptive_cfg(struct perf_event *event)
1121 {
1122     struct perf_event_attr *attr = &event->attr;
1123     u64 sample_type = attr->sample_type;
1124     u64 pebs_data_cfg = 0;
1125     bool gprs, tsx_weight;
1126 
1127     if (!(sample_type & ~(PERF_SAMPLE_IP|PERF_SAMPLE_TIME)) &&
1128         attr->precise_ip > 1)
1129         return pebs_data_cfg;
1130 
1131     if (sample_type & PERF_PEBS_MEMINFO_TYPE)
1132         pebs_data_cfg |= PEBS_DATACFG_MEMINFO;
1133 
1134     /*
1135      * We need GPRs when:
1136      * + user requested them
1137      * + precise_ip < 2 for the non event IP
1138      * + For RTM TSX weight we need GPRs for the abort code.
1139      */
1140     gprs = (sample_type & PERF_SAMPLE_REGS_INTR) &&
1141            (attr->sample_regs_intr & PEBS_GP_REGS);
1142 
1143     tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT_TYPE) &&
1144              ((attr->config & INTEL_ARCH_EVENT_MASK) ==
1145               x86_pmu.rtm_abort_event);
1146 
1147     if (gprs || (attr->precise_ip < 2) || tsx_weight)
1148         pebs_data_cfg |= PEBS_DATACFG_GP;
1149 
1150     if ((sample_type & PERF_SAMPLE_REGS_INTR) &&
1151         (attr->sample_regs_intr & PERF_REG_EXTENDED_MASK))
1152         pebs_data_cfg |= PEBS_DATACFG_XMMS;
1153 
1154     if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
1155         /*
1156          * For now always log all LBRs. Could configure this
1157          * later.
1158          */
1159         pebs_data_cfg |= PEBS_DATACFG_LBRS |
1160             ((x86_pmu.lbr_nr-1) << PEBS_DATACFG_LBR_SHIFT);
1161     }
1162 
1163     return pebs_data_cfg;
1164 }
1165 
1166 static void
1167 pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
1168           struct perf_event *event, bool add)
1169 {
1170     struct pmu *pmu = event->ctx->pmu;
1171     /*
1172      * Make sure we get updated with the first PEBS
1173      * event. It will trigger also during removal, but
1174      * that does not hurt:
1175      */
1176     bool update = cpuc->n_pebs == 1;
1177 
1178     if (needed_cb != pebs_needs_sched_cb(cpuc)) {
1179         if (!needed_cb)
1180             perf_sched_cb_inc(pmu);
1181         else
1182             perf_sched_cb_dec(pmu);
1183 
1184         update = true;
1185     }
1186 
1187     /*
1188      * The PEBS record doesn't shrink on pmu::del(). Doing so would require
1189      * iterating all remaining PEBS events to reconstruct the config.
1190      */
1191     if (x86_pmu.intel_cap.pebs_baseline && add) {
1192         u64 pebs_data_cfg;
1193 
1194         /* Clear pebs_data_cfg and pebs_record_size for first PEBS. */
1195         if (cpuc->n_pebs == 1) {
1196             cpuc->pebs_data_cfg = 0;
1197             cpuc->pebs_record_size = sizeof(struct pebs_basic);
1198         }
1199 
1200         pebs_data_cfg = pebs_update_adaptive_cfg(event);
1201 
1202         /* Update pebs_record_size if new event requires more data. */
1203         if (pebs_data_cfg & ~cpuc->pebs_data_cfg) {
1204             cpuc->pebs_data_cfg |= pebs_data_cfg;
1205             adaptive_pebs_record_size_update();
1206             update = true;
1207         }
1208     }
1209 
1210     if (update)
1211         pebs_update_threshold(cpuc);
1212 }
1213 
1214 void intel_pmu_pebs_add(struct perf_event *event)
1215 {
1216     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1217     struct hw_perf_event *hwc = &event->hw;
1218     bool needed_cb = pebs_needs_sched_cb(cpuc);
1219 
1220     cpuc->n_pebs++;
1221     if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
1222         cpuc->n_large_pebs++;
1223     if (hwc->flags & PERF_X86_EVENT_PEBS_VIA_PT)
1224         cpuc->n_pebs_via_pt++;
1225 
1226     pebs_update_state(needed_cb, cpuc, event, true);
1227 }
1228 
1229 static void intel_pmu_pebs_via_pt_disable(struct perf_event *event)
1230 {
1231     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1232 
1233     if (!is_pebs_pt(event))
1234         return;
1235 
1236     if (!(cpuc->pebs_enabled & ~PEBS_VIA_PT_MASK))
1237         cpuc->pebs_enabled &= ~PEBS_VIA_PT_MASK;
1238 }
1239 
1240 static void intel_pmu_pebs_via_pt_enable(struct perf_event *event)
1241 {
1242     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1243     struct hw_perf_event *hwc = &event->hw;
1244     struct debug_store *ds = cpuc->ds;
1245     u64 value = ds->pebs_event_reset[hwc->idx];
1246     u32 base = MSR_RELOAD_PMC0;
1247     unsigned int idx = hwc->idx;
1248 
1249     if (!is_pebs_pt(event))
1250         return;
1251 
1252     if (!(event->hw.flags & PERF_X86_EVENT_LARGE_PEBS))
1253         cpuc->pebs_enabled |= PEBS_PMI_AFTER_EACH_RECORD;
1254 
1255     cpuc->pebs_enabled |= PEBS_OUTPUT_PT;
1256 
1257     if (hwc->idx >= INTEL_PMC_IDX_FIXED) {
1258         base = MSR_RELOAD_FIXED_CTR0;
1259         idx = hwc->idx - INTEL_PMC_IDX_FIXED;
1260         if (x86_pmu.intel_cap.pebs_format < 5)
1261             value = ds->pebs_event_reset[MAX_PEBS_EVENTS_FMT4 + idx];
1262         else
1263             value = ds->pebs_event_reset[MAX_PEBS_EVENTS + idx];
1264     }
1265     wrmsrl(base + idx, value);
1266 }
1267 
1268 void intel_pmu_pebs_enable(struct perf_event *event)
1269 {
1270     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1271     struct hw_perf_event *hwc = &event->hw;
1272     struct debug_store *ds = cpuc->ds;
1273     unsigned int idx = hwc->idx;
1274 
1275     hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
1276 
1277     cpuc->pebs_enabled |= 1ULL << hwc->idx;
1278 
1279     if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) && (x86_pmu.version < 5))
1280         cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
1281     else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
1282         cpuc->pebs_enabled |= 1ULL << 63;
1283 
1284     if (x86_pmu.intel_cap.pebs_baseline) {
1285         hwc->config |= ICL_EVENTSEL_ADAPTIVE;
1286         if (cpuc->pebs_data_cfg != cpuc->active_pebs_data_cfg) {
1287             wrmsrl(MSR_PEBS_DATA_CFG, cpuc->pebs_data_cfg);
1288             cpuc->active_pebs_data_cfg = cpuc->pebs_data_cfg;
1289         }
1290     }
1291 
1292     if (idx >= INTEL_PMC_IDX_FIXED) {
1293         if (x86_pmu.intel_cap.pebs_format < 5)
1294             idx = MAX_PEBS_EVENTS_FMT4 + (idx - INTEL_PMC_IDX_FIXED);
1295         else
1296             idx = MAX_PEBS_EVENTS + (idx - INTEL_PMC_IDX_FIXED);
1297     }
1298 
1299     /*
1300      * Use auto-reload if possible to save a MSR write in the PMI.
1301      * This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD.
1302      */
1303     if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
1304         ds->pebs_event_reset[idx] =
1305             (u64)(-hwc->sample_period) & x86_pmu.cntval_mask;
1306     } else {
1307         ds->pebs_event_reset[idx] = 0;
1308     }
1309 
1310     intel_pmu_pebs_via_pt_enable(event);
1311 }
1312 
1313 void intel_pmu_pebs_del(struct perf_event *event)
1314 {
1315     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1316     struct hw_perf_event *hwc = &event->hw;
1317     bool needed_cb = pebs_needs_sched_cb(cpuc);
1318 
1319     cpuc->n_pebs--;
1320     if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
1321         cpuc->n_large_pebs--;
1322     if (hwc->flags & PERF_X86_EVENT_PEBS_VIA_PT)
1323         cpuc->n_pebs_via_pt--;
1324 
1325     pebs_update_state(needed_cb, cpuc, event, false);
1326 }
1327 
1328 void intel_pmu_pebs_disable(struct perf_event *event)
1329 {
1330     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1331     struct hw_perf_event *hwc = &event->hw;
1332 
1333     if (cpuc->n_pebs == cpuc->n_large_pebs &&
1334         cpuc->n_pebs != cpuc->n_pebs_via_pt)
1335         intel_pmu_drain_pebs_buffer();
1336 
1337     cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
1338 
1339     if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) &&
1340         (x86_pmu.version < 5))
1341         cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
1342     else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
1343         cpuc->pebs_enabled &= ~(1ULL << 63);
1344 
1345     intel_pmu_pebs_via_pt_disable(event);
1346 
1347     if (cpuc->enabled)
1348         wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
1349 
1350     hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
1351 }
1352 
1353 void intel_pmu_pebs_enable_all(void)
1354 {
1355     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1356 
1357     if (cpuc->pebs_enabled)
1358         wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
1359 }
1360 
1361 void intel_pmu_pebs_disable_all(void)
1362 {
1363     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1364 
1365     if (cpuc->pebs_enabled)
1366         __intel_pmu_pebs_disable_all();
1367 }
1368 
1369 static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
1370 {
1371     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1372     unsigned long from = cpuc->lbr_entries[0].from;
1373     unsigned long old_to, to = cpuc->lbr_entries[0].to;
1374     unsigned long ip = regs->ip;
1375     int is_64bit = 0;
1376     void *kaddr;
1377     int size;
1378 
1379     /*
1380      * We don't need to fixup if the PEBS assist is fault like
1381      */
1382     if (!x86_pmu.intel_cap.pebs_trap)
1383         return 1;
1384 
1385     /*
1386      * No LBR entry, no basic block, no rewinding
1387      */
1388     if (!cpuc->lbr_stack.nr || !from || !to)
1389         return 0;
1390 
1391     /*
1392      * Basic blocks should never cross user/kernel boundaries
1393      */
1394     if (kernel_ip(ip) != kernel_ip(to))
1395         return 0;
1396 
1397     /*
1398      * unsigned math, either ip is before the start (impossible) or
1399      * the basic block is larger than 1 page (sanity)
1400      */
1401     if ((ip - to) > PEBS_FIXUP_SIZE)
1402         return 0;
1403 
1404     /*
1405      * We sampled a branch insn, rewind using the LBR stack
1406      */
1407     if (ip == to) {
1408         set_linear_ip(regs, from);
1409         return 1;
1410     }
1411 
1412     size = ip - to;
1413     if (!kernel_ip(ip)) {
1414         int bytes;
1415         u8 *buf = this_cpu_read(insn_buffer);
1416 
1417         /* 'size' must fit our buffer, see above */
1418         bytes = copy_from_user_nmi(buf, (void __user *)to, size);
1419         if (bytes != 0)
1420             return 0;
1421 
1422         kaddr = buf;
1423     } else {
1424         kaddr = (void *)to;
1425     }
1426 
1427     do {
1428         struct insn insn;
1429 
1430         old_to = to;
1431 
1432 #ifdef CONFIG_X86_64
1433         is_64bit = kernel_ip(to) || any_64bit_mode(regs);
1434 #endif
1435         insn_init(&insn, kaddr, size, is_64bit);
1436 
1437         /*
1438          * Make sure there was not a problem decoding the instruction.
1439          * This is doubly important because we have an infinite loop if
1440          * insn.length=0.
1441          */
1442         if (insn_get_length(&insn))
1443             break;
1444 
1445         to += insn.length;
1446         kaddr += insn.length;
1447         size -= insn.length;
1448     } while (to < ip);
1449 
1450     if (to == ip) {
1451         set_linear_ip(regs, old_to);
1452         return 1;
1453     }
1454 
1455     /*
1456      * Even though we decoded the basic block, the instruction stream
1457      * never matched the given IP, either the TO or the IP got corrupted.
1458      */
1459     return 0;
1460 }
1461 
1462 static inline u64 intel_get_tsx_weight(u64 tsx_tuning)
1463 {
1464     if (tsx_tuning) {
1465         union hsw_tsx_tuning tsx = { .value = tsx_tuning };
1466         return tsx.cycles_last_block;
1467     }
1468     return 0;
1469 }
1470 
1471 static inline u64 intel_get_tsx_transaction(u64 tsx_tuning, u64 ax)
1472 {
1473     u64 txn = (tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32;
1474 
1475     /* For RTM XABORTs also log the abort code from AX */
1476     if ((txn & PERF_TXN_TRANSACTION) && (ax & 1))
1477         txn |= ((ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
1478     return txn;
1479 }
1480 
1481 static inline u64 get_pebs_status(void *n)
1482 {
1483     if (x86_pmu.intel_cap.pebs_format < 4)
1484         return ((struct pebs_record_nhm *)n)->status;
1485     return ((struct pebs_basic *)n)->applicable_counters;
1486 }
1487 
1488 #define PERF_X86_EVENT_PEBS_HSW_PREC \
1489         (PERF_X86_EVENT_PEBS_ST_HSW | \
1490          PERF_X86_EVENT_PEBS_LD_HSW | \
1491          PERF_X86_EVENT_PEBS_NA_HSW)
1492 
1493 static u64 get_data_src(struct perf_event *event, u64 aux)
1494 {
1495     u64 val = PERF_MEM_NA;
1496     int fl = event->hw.flags;
1497     bool fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC);
1498 
1499     if (fl & PERF_X86_EVENT_PEBS_LDLAT)
1500         val = load_latency_data(event, aux);
1501     else if (fl & PERF_X86_EVENT_PEBS_STLAT)
1502         val = store_latency_data(event, aux);
1503     else if (fl & PERF_X86_EVENT_PEBS_LAT_HYBRID)
1504         val = x86_pmu.pebs_latency_data(event, aux);
1505     else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC))
1506         val = precise_datala_hsw(event, aux);
1507     else if (fst)
1508         val = precise_store_data(aux);
1509     return val;
1510 }
1511 
1512 #define PERF_SAMPLE_ADDR_TYPE   (PERF_SAMPLE_ADDR |     \
1513                  PERF_SAMPLE_PHYS_ADDR |    \
1514                  PERF_SAMPLE_DATA_PAGE_SIZE)
1515 
1516 static void setup_pebs_fixed_sample_data(struct perf_event *event,
1517                    struct pt_regs *iregs, void *__pebs,
1518                    struct perf_sample_data *data,
1519                    struct pt_regs *regs)
1520 {
1521     /*
1522      * We cast to the biggest pebs_record but are careful not to
1523      * unconditionally access the 'extra' entries.
1524      */
1525     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1526     struct pebs_record_skl *pebs = __pebs;
1527     u64 sample_type;
1528     int fll;
1529 
1530     if (pebs == NULL)
1531         return;
1532 
1533     sample_type = event->attr.sample_type;
1534     fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT;
1535 
1536     perf_sample_data_init(data, 0, event->hw.last_period);
1537 
1538     data->period = event->hw.last_period;
1539 
1540     /*
1541      * Use latency for weight (only avail with PEBS-LL)
1542      */
1543     if (fll && (sample_type & PERF_SAMPLE_WEIGHT_TYPE))
1544         data->weight.full = pebs->lat;
1545 
1546     /*
1547      * data.data_src encodes the data source
1548      */
1549     if (sample_type & PERF_SAMPLE_DATA_SRC)
1550         data->data_src.val = get_data_src(event, pebs->dse);
1551 
1552     /*
1553      * We must however always use iregs for the unwinder to stay sane; the
1554      * record BP,SP,IP can point into thin air when the record is from a
1555      * previous PMI context or an (I)RET happened between the record and
1556      * PMI.
1557      */
1558     if (sample_type & PERF_SAMPLE_CALLCHAIN)
1559         data->callchain = perf_callchain(event, iregs);
1560 
1561     /*
1562      * We use the interrupt regs as a base because the PEBS record does not
1563      * contain a full regs set, specifically it seems to lack segment
1564      * descriptors, which get used by things like user_mode().
1565      *
1566      * In the simple case fix up only the IP for PERF_SAMPLE_IP.
1567      */
1568     *regs = *iregs;
1569 
1570     /*
1571      * Initialize regs_>flags from PEBS,
1572      * Clear exact bit (which uses x86 EFLAGS Reserved bit 3),
1573      * i.e., do not rely on it being zero:
1574      */
1575     regs->flags = pebs->flags & ~PERF_EFLAGS_EXACT;
1576 
1577     if (sample_type & PERF_SAMPLE_REGS_INTR) {
1578         regs->ax = pebs->ax;
1579         regs->bx = pebs->bx;
1580         regs->cx = pebs->cx;
1581         regs->dx = pebs->dx;
1582         regs->si = pebs->si;
1583         regs->di = pebs->di;
1584 
1585         regs->bp = pebs->bp;
1586         regs->sp = pebs->sp;
1587 
1588 #ifndef CONFIG_X86_32
1589         regs->r8 = pebs->r8;
1590         regs->r9 = pebs->r9;
1591         regs->r10 = pebs->r10;
1592         regs->r11 = pebs->r11;
1593         regs->r12 = pebs->r12;
1594         regs->r13 = pebs->r13;
1595         regs->r14 = pebs->r14;
1596         regs->r15 = pebs->r15;
1597 #endif
1598     }
1599 
1600     if (event->attr.precise_ip > 1) {
1601         /*
1602          * Haswell and later processors have an 'eventing IP'
1603          * (real IP) which fixes the off-by-1 skid in hardware.
1604          * Use it when precise_ip >= 2 :
1605          */
1606         if (x86_pmu.intel_cap.pebs_format >= 2) {
1607             set_linear_ip(regs, pebs->real_ip);
1608             regs->flags |= PERF_EFLAGS_EXACT;
1609         } else {
1610             /* Otherwise, use PEBS off-by-1 IP: */
1611             set_linear_ip(regs, pebs->ip);
1612 
1613             /*
1614              * With precise_ip >= 2, try to fix up the off-by-1 IP
1615              * using the LBR. If successful, the fixup function
1616              * corrects regs->ip and calls set_linear_ip() on regs:
1617              */
1618             if (intel_pmu_pebs_fixup_ip(regs))
1619                 regs->flags |= PERF_EFLAGS_EXACT;
1620         }
1621     } else {
1622         /*
1623          * When precise_ip == 1, return the PEBS off-by-1 IP,
1624          * no fixup attempted:
1625          */
1626         set_linear_ip(regs, pebs->ip);
1627     }
1628 
1629 
1630     if ((sample_type & PERF_SAMPLE_ADDR_TYPE) &&
1631         x86_pmu.intel_cap.pebs_format >= 1)
1632         data->addr = pebs->dla;
1633 
1634     if (x86_pmu.intel_cap.pebs_format >= 2) {
1635         /* Only set the TSX weight when no memory weight. */
1636         if ((sample_type & PERF_SAMPLE_WEIGHT_TYPE) && !fll)
1637             data->weight.full = intel_get_tsx_weight(pebs->tsx_tuning);
1638 
1639         if (sample_type & PERF_SAMPLE_TRANSACTION)
1640             data->txn = intel_get_tsx_transaction(pebs->tsx_tuning,
1641                                   pebs->ax);
1642     }
1643 
1644     /*
1645      * v3 supplies an accurate time stamp, so we use that
1646      * for the time stamp.
1647      *
1648      * We can only do this for the default trace clock.
1649      */
1650     if (x86_pmu.intel_cap.pebs_format >= 3 &&
1651         event->attr.use_clockid == 0)
1652         data->time = native_sched_clock_from_tsc(pebs->tsc);
1653 
1654     if (has_branch_stack(event))
1655         data->br_stack = &cpuc->lbr_stack;
1656 }
1657 
1658 static void adaptive_pebs_save_regs(struct pt_regs *regs,
1659                     struct pebs_gprs *gprs)
1660 {
1661     regs->ax = gprs->ax;
1662     regs->bx = gprs->bx;
1663     regs->cx = gprs->cx;
1664     regs->dx = gprs->dx;
1665     regs->si = gprs->si;
1666     regs->di = gprs->di;
1667     regs->bp = gprs->bp;
1668     regs->sp = gprs->sp;
1669 #ifndef CONFIG_X86_32
1670     regs->r8 = gprs->r8;
1671     regs->r9 = gprs->r9;
1672     regs->r10 = gprs->r10;
1673     regs->r11 = gprs->r11;
1674     regs->r12 = gprs->r12;
1675     regs->r13 = gprs->r13;
1676     regs->r14 = gprs->r14;
1677     regs->r15 = gprs->r15;
1678 #endif
1679 }
1680 
1681 #define PEBS_LATENCY_MASK           0xffff
1682 #define PEBS_CACHE_LATENCY_OFFSET       32
1683 
1684 /*
1685  * With adaptive PEBS the layout depends on what fields are configured.
1686  */
1687 
1688 static void setup_pebs_adaptive_sample_data(struct perf_event *event,
1689                         struct pt_regs *iregs, void *__pebs,
1690                         struct perf_sample_data *data,
1691                         struct pt_regs *regs)
1692 {
1693     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1694     struct pebs_basic *basic = __pebs;
1695     void *next_record = basic + 1;
1696     u64 sample_type;
1697     u64 format_size;
1698     struct pebs_meminfo *meminfo = NULL;
1699     struct pebs_gprs *gprs = NULL;
1700     struct x86_perf_regs *perf_regs;
1701 
1702     if (basic == NULL)
1703         return;
1704 
1705     perf_regs = container_of(regs, struct x86_perf_regs, regs);
1706     perf_regs->xmm_regs = NULL;
1707 
1708     sample_type = event->attr.sample_type;
1709     format_size = basic->format_size;
1710     perf_sample_data_init(data, 0, event->hw.last_period);
1711     data->period = event->hw.last_period;
1712 
1713     if (event->attr.use_clockid == 0)
1714         data->time = native_sched_clock_from_tsc(basic->tsc);
1715 
1716     /*
1717      * We must however always use iregs for the unwinder to stay sane; the
1718      * record BP,SP,IP can point into thin air when the record is from a
1719      * previous PMI context or an (I)RET happened between the record and
1720      * PMI.
1721      */
1722     if (sample_type & PERF_SAMPLE_CALLCHAIN)
1723         data->callchain = perf_callchain(event, iregs);
1724 
1725     *regs = *iregs;
1726     /* The ip in basic is EventingIP */
1727     set_linear_ip(regs, basic->ip);
1728     regs->flags = PERF_EFLAGS_EXACT;
1729 
1730     /*
1731      * The record for MEMINFO is in front of GP
1732      * But PERF_SAMPLE_TRANSACTION needs gprs->ax.
1733      * Save the pointer here but process later.
1734      */
1735     if (format_size & PEBS_DATACFG_MEMINFO) {
1736         meminfo = next_record;
1737         next_record = meminfo + 1;
1738     }
1739 
1740     if (format_size & PEBS_DATACFG_GP) {
1741         gprs = next_record;
1742         next_record = gprs + 1;
1743 
1744         if (event->attr.precise_ip < 2) {
1745             set_linear_ip(regs, gprs->ip);
1746             regs->flags &= ~PERF_EFLAGS_EXACT;
1747         }
1748 
1749         if (sample_type & PERF_SAMPLE_REGS_INTR)
1750             adaptive_pebs_save_regs(regs, gprs);
1751     }
1752 
1753     if (format_size & PEBS_DATACFG_MEMINFO) {
1754         if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
1755             u64 weight = meminfo->latency;
1756 
1757             if (x86_pmu.flags & PMU_FL_INSTR_LATENCY) {
1758                 data->weight.var2_w = weight & PEBS_LATENCY_MASK;
1759                 weight >>= PEBS_CACHE_LATENCY_OFFSET;
1760             }
1761 
1762             /*
1763              * Although meminfo::latency is defined as a u64,
1764              * only the lower 32 bits include the valid data
1765              * in practice on Ice Lake and earlier platforms.
1766              */
1767             if (sample_type & PERF_SAMPLE_WEIGHT) {
1768                 data->weight.full = weight ?:
1769                     intel_get_tsx_weight(meminfo->tsx_tuning);
1770             } else {
1771                 data->weight.var1_dw = (u32)(weight & PEBS_LATENCY_MASK) ?:
1772                     intel_get_tsx_weight(meminfo->tsx_tuning);
1773             }
1774         }
1775 
1776         if (sample_type & PERF_SAMPLE_DATA_SRC)
1777             data->data_src.val = get_data_src(event, meminfo->aux);
1778 
1779         if (sample_type & PERF_SAMPLE_ADDR_TYPE)
1780             data->addr = meminfo->address;
1781 
1782         if (sample_type & PERF_SAMPLE_TRANSACTION)
1783             data->txn = intel_get_tsx_transaction(meminfo->tsx_tuning,
1784                               gprs ? gprs->ax : 0);
1785     }
1786 
1787     if (format_size & PEBS_DATACFG_XMMS) {
1788         struct pebs_xmm *xmm = next_record;
1789 
1790         next_record = xmm + 1;
1791         perf_regs->xmm_regs = xmm->xmm;
1792     }
1793 
1794     if (format_size & PEBS_DATACFG_LBRS) {
1795         struct lbr_entry *lbr = next_record;
1796         int num_lbr = ((format_size >> PEBS_DATACFG_LBR_SHIFT)
1797                     & 0xff) + 1;
1798         next_record = next_record + num_lbr * sizeof(struct lbr_entry);
1799 
1800         if (has_branch_stack(event)) {
1801             intel_pmu_store_pebs_lbrs(lbr);
1802             data->br_stack = &cpuc->lbr_stack;
1803         }
1804     }
1805 
1806     WARN_ONCE(next_record != __pebs + (format_size >> 48),
1807             "PEBS record size %llu, expected %llu, config %llx\n",
1808             format_size >> 48,
1809             (u64)(next_record - __pebs),
1810             basic->format_size);
1811 }
1812 
1813 static inline void *
1814 get_next_pebs_record_by_bit(void *base, void *top, int bit)
1815 {
1816     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1817     void *at;
1818     u64 pebs_status;
1819 
1820     /*
1821      * fmt0 does not have a status bitfield (does not use
1822      * perf_record_nhm format)
1823      */
1824     if (x86_pmu.intel_cap.pebs_format < 1)
1825         return base;
1826 
1827     if (base == NULL)
1828         return NULL;
1829 
1830     for (at = base; at < top; at += cpuc->pebs_record_size) {
1831         unsigned long status = get_pebs_status(at);
1832 
1833         if (test_bit(bit, (unsigned long *)&status)) {
1834             /* PEBS v3 has accurate status bits */
1835             if (x86_pmu.intel_cap.pebs_format >= 3)
1836                 return at;
1837 
1838             if (status == (1 << bit))
1839                 return at;
1840 
1841             /* clear non-PEBS bit and re-check */
1842             pebs_status = status & cpuc->pebs_enabled;
1843             pebs_status &= PEBS_COUNTER_MASK;
1844             if (pebs_status == (1 << bit))
1845                 return at;
1846         }
1847     }
1848     return NULL;
1849 }
1850 
1851 void intel_pmu_auto_reload_read(struct perf_event *event)
1852 {
1853     WARN_ON(!(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD));
1854 
1855     perf_pmu_disable(event->pmu);
1856     intel_pmu_drain_pebs_buffer();
1857     perf_pmu_enable(event->pmu);
1858 }
1859 
1860 /*
1861  * Special variant of intel_pmu_save_and_restart() for auto-reload.
1862  */
1863 static int
1864 intel_pmu_save_and_restart_reload(struct perf_event *event, int count)
1865 {
1866     struct hw_perf_event *hwc = &event->hw;
1867     int shift = 64 - x86_pmu.cntval_bits;
1868     u64 period = hwc->sample_period;
1869     u64 prev_raw_count, new_raw_count;
1870     s64 new, old;
1871 
1872     WARN_ON(!period);
1873 
1874     /*
1875      * drain_pebs() only happens when the PMU is disabled.
1876      */
1877     WARN_ON(this_cpu_read(cpu_hw_events.enabled));
1878 
1879     prev_raw_count = local64_read(&hwc->prev_count);
1880     rdpmcl(hwc->event_base_rdpmc, new_raw_count);
1881     local64_set(&hwc->prev_count, new_raw_count);
1882 
1883     /*
1884      * Since the counter increments a negative counter value and
1885      * overflows on the sign switch, giving the interval:
1886      *
1887      *   [-period, 0]
1888      *
1889      * the difference between two consecutive reads is:
1890      *
1891      *   A) value2 - value1;
1892      *      when no overflows have happened in between,
1893      *
1894      *   B) (0 - value1) + (value2 - (-period));
1895      *      when one overflow happened in between,
1896      *
1897      *   C) (0 - value1) + (n - 1) * (period) + (value2 - (-period));
1898      *      when @n overflows happened in between.
1899      *
1900      * Here A) is the obvious difference, B) is the extension to the
1901      * discrete interval, where the first term is to the top of the
1902      * interval and the second term is from the bottom of the next
1903      * interval and C) the extension to multiple intervals, where the
1904      * middle term is the whole intervals covered.
1905      *
1906      * An equivalent of C, by reduction, is:
1907      *
1908      *   value2 - value1 + n * period
1909      */
1910     new = ((s64)(new_raw_count << shift) >> shift);
1911     old = ((s64)(prev_raw_count << shift) >> shift);
1912     local64_add(new - old + count * period, &event->count);
1913 
1914     local64_set(&hwc->period_left, -new);
1915 
1916     perf_event_update_userpage(event);
1917 
1918     return 0;
1919 }
1920 
1921 static __always_inline void
1922 __intel_pmu_pebs_event(struct perf_event *event,
1923                struct pt_regs *iregs,
1924                struct perf_sample_data *data,
1925                void *base, void *top,
1926                int bit, int count,
1927                void (*setup_sample)(struct perf_event *,
1928                         struct pt_regs *,
1929                         void *,
1930                         struct perf_sample_data *,
1931                         struct pt_regs *))
1932 {
1933     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1934     struct hw_perf_event *hwc = &event->hw;
1935     struct x86_perf_regs perf_regs;
1936     struct pt_regs *regs = &perf_regs.regs;
1937     void *at = get_next_pebs_record_by_bit(base, top, bit);
1938     static struct pt_regs dummy_iregs;
1939 
1940     if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
1941         /*
1942          * Now, auto-reload is only enabled in fixed period mode.
1943          * The reload value is always hwc->sample_period.
1944          * May need to change it, if auto-reload is enabled in
1945          * freq mode later.
1946          */
1947         intel_pmu_save_and_restart_reload(event, count);
1948     } else if (!intel_pmu_save_and_restart(event))
1949         return;
1950 
1951     if (!iregs)
1952         iregs = &dummy_iregs;
1953 
1954     while (count > 1) {
1955         setup_sample(event, iregs, at, data, regs);
1956         perf_event_output(event, data, regs);
1957         at += cpuc->pebs_record_size;
1958         at = get_next_pebs_record_by_bit(at, top, bit);
1959         count--;
1960     }
1961 
1962     setup_sample(event, iregs, at, data, regs);
1963     if (iregs == &dummy_iregs) {
1964         /*
1965          * The PEBS records may be drained in the non-overflow context,
1966          * e.g., large PEBS + context switch. Perf should treat the
1967          * last record the same as other PEBS records, and doesn't
1968          * invoke the generic overflow handler.
1969          */
1970         perf_event_output(event, data, regs);
1971     } else {
1972         /*
1973          * All but the last records are processed.
1974          * The last one is left to be able to call the overflow handler.
1975          */
1976         if (perf_event_overflow(event, data, regs))
1977             x86_pmu_stop(event, 0);
1978     }
1979 }
1980 
1981 static void intel_pmu_drain_pebs_core(struct pt_regs *iregs, struct perf_sample_data *data)
1982 {
1983     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1984     struct debug_store *ds = cpuc->ds;
1985     struct perf_event *event = cpuc->events[0]; /* PMC0 only */
1986     struct pebs_record_core *at, *top;
1987     int n;
1988 
1989     if (!x86_pmu.pebs_active)
1990         return;
1991 
1992     at  = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
1993     top = (struct pebs_record_core *)(unsigned long)ds->pebs_index;
1994 
1995     /*
1996      * Whatever else happens, drain the thing
1997      */
1998     ds->pebs_index = ds->pebs_buffer_base;
1999 
2000     if (!test_bit(0, cpuc->active_mask))
2001         return;
2002 
2003     WARN_ON_ONCE(!event);
2004 
2005     if (!event->attr.precise_ip)
2006         return;
2007 
2008     n = top - at;
2009     if (n <= 0) {
2010         if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
2011             intel_pmu_save_and_restart_reload(event, 0);
2012         return;
2013     }
2014 
2015     __intel_pmu_pebs_event(event, iregs, data, at, top, 0, n,
2016                    setup_pebs_fixed_sample_data);
2017 }
2018 
2019 static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, int size)
2020 {
2021     struct perf_event *event;
2022     int bit;
2023 
2024     /*
2025      * The drain_pebs() could be called twice in a short period
2026      * for auto-reload event in pmu::read(). There are no
2027      * overflows have happened in between.
2028      * It needs to call intel_pmu_save_and_restart_reload() to
2029      * update the event->count for this case.
2030      */
2031     for_each_set_bit(bit, (unsigned long *)&cpuc->pebs_enabled, size) {
2032         event = cpuc->events[bit];
2033         if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
2034             intel_pmu_save_and_restart_reload(event, 0);
2035     }
2036 }
2037 
2038 static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_data *data)
2039 {
2040     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2041     struct debug_store *ds = cpuc->ds;
2042     struct perf_event *event;
2043     void *base, *at, *top;
2044     short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
2045     short error[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
2046     int bit, i, size;
2047     u64 mask;
2048 
2049     if (!x86_pmu.pebs_active)
2050         return;
2051 
2052     base = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
2053     top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
2054 
2055     ds->pebs_index = ds->pebs_buffer_base;
2056 
2057     mask = (1ULL << x86_pmu.max_pebs_events) - 1;
2058     size = x86_pmu.max_pebs_events;
2059     if (x86_pmu.flags & PMU_FL_PEBS_ALL) {
2060         mask |= ((1ULL << x86_pmu.num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED;
2061         size = INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed;
2062     }
2063 
2064     if (unlikely(base >= top)) {
2065         intel_pmu_pebs_event_update_no_drain(cpuc, size);
2066         return;
2067     }
2068 
2069     for (at = base; at < top; at += x86_pmu.pebs_record_size) {
2070         struct pebs_record_nhm *p = at;
2071         u64 pebs_status;
2072 
2073         pebs_status = p->status & cpuc->pebs_enabled;
2074         pebs_status &= mask;
2075 
2076         /* PEBS v3 has more accurate status bits */
2077         if (x86_pmu.intel_cap.pebs_format >= 3) {
2078             for_each_set_bit(bit, (unsigned long *)&pebs_status, size)
2079                 counts[bit]++;
2080 
2081             continue;
2082         }
2083 
2084         /*
2085          * On some CPUs the PEBS status can be zero when PEBS is
2086          * racing with clearing of GLOBAL_STATUS.
2087          *
2088          * Normally we would drop that record, but in the
2089          * case when there is only a single active PEBS event
2090          * we can assume it's for that event.
2091          */
2092         if (!pebs_status && cpuc->pebs_enabled &&
2093             !(cpuc->pebs_enabled & (cpuc->pebs_enabled-1)))
2094             pebs_status = p->status = cpuc->pebs_enabled;
2095 
2096         bit = find_first_bit((unsigned long *)&pebs_status,
2097                     x86_pmu.max_pebs_events);
2098         if (bit >= x86_pmu.max_pebs_events)
2099             continue;
2100 
2101         /*
2102          * The PEBS hardware does not deal well with the situation
2103          * when events happen near to each other and multiple bits
2104          * are set. But it should happen rarely.
2105          *
2106          * If these events include one PEBS and multiple non-PEBS
2107          * events, it doesn't impact PEBS record. The record will
2108          * be handled normally. (slow path)
2109          *
2110          * If these events include two or more PEBS events, the
2111          * records for the events can be collapsed into a single
2112          * one, and it's not possible to reconstruct all events
2113          * that caused the PEBS record. It's called collision.
2114          * If collision happened, the record will be dropped.
2115          */
2116         if (pebs_status != (1ULL << bit)) {
2117             for_each_set_bit(i, (unsigned long *)&pebs_status, size)
2118                 error[i]++;
2119             continue;
2120         }
2121 
2122         counts[bit]++;
2123     }
2124 
2125     for_each_set_bit(bit, (unsigned long *)&mask, size) {
2126         if ((counts[bit] == 0) && (error[bit] == 0))
2127             continue;
2128 
2129         event = cpuc->events[bit];
2130         if (WARN_ON_ONCE(!event))
2131             continue;
2132 
2133         if (WARN_ON_ONCE(!event->attr.precise_ip))
2134             continue;
2135 
2136         /* log dropped samples number */
2137         if (error[bit]) {
2138             perf_log_lost_samples(event, error[bit]);
2139 
2140             if (iregs && perf_event_account_interrupt(event))
2141                 x86_pmu_stop(event, 0);
2142         }
2143 
2144         if (counts[bit]) {
2145             __intel_pmu_pebs_event(event, iregs, data, base,
2146                            top, bit, counts[bit],
2147                            setup_pebs_fixed_sample_data);
2148         }
2149     }
2150 }
2151 
2152 static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_data *data)
2153 {
2154     short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
2155     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2156     int max_pebs_events = hybrid(cpuc->pmu, max_pebs_events);
2157     int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed);
2158     struct debug_store *ds = cpuc->ds;
2159     struct perf_event *event;
2160     void *base, *at, *top;
2161     int bit, size;
2162     u64 mask;
2163 
2164     if (!x86_pmu.pebs_active)
2165         return;
2166 
2167     base = (struct pebs_basic *)(unsigned long)ds->pebs_buffer_base;
2168     top = (struct pebs_basic *)(unsigned long)ds->pebs_index;
2169 
2170     ds->pebs_index = ds->pebs_buffer_base;
2171 
2172     mask = ((1ULL << max_pebs_events) - 1) |
2173            (((1ULL << num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED);
2174     size = INTEL_PMC_IDX_FIXED + num_counters_fixed;
2175 
2176     if (unlikely(base >= top)) {
2177         intel_pmu_pebs_event_update_no_drain(cpuc, size);
2178         return;
2179     }
2180 
2181     for (at = base; at < top; at += cpuc->pebs_record_size) {
2182         u64 pebs_status;
2183 
2184         pebs_status = get_pebs_status(at) & cpuc->pebs_enabled;
2185         pebs_status &= mask;
2186 
2187         for_each_set_bit(bit, (unsigned long *)&pebs_status, size)
2188             counts[bit]++;
2189     }
2190 
2191     for_each_set_bit(bit, (unsigned long *)&mask, size) {
2192         if (counts[bit] == 0)
2193             continue;
2194 
2195         event = cpuc->events[bit];
2196         if (WARN_ON_ONCE(!event))
2197             continue;
2198 
2199         if (WARN_ON_ONCE(!event->attr.precise_ip))
2200             continue;
2201 
2202         __intel_pmu_pebs_event(event, iregs, data, base,
2203                        top, bit, counts[bit],
2204                        setup_pebs_adaptive_sample_data);
2205     }
2206 }
2207 
2208 /*
2209  * BTS, PEBS probe and setup
2210  */
2211 
2212 void __init intel_ds_init(void)
2213 {
2214     /*
2215      * No support for 32bit formats
2216      */
2217     if (!boot_cpu_has(X86_FEATURE_DTES64))
2218         return;
2219 
2220     x86_pmu.bts  = boot_cpu_has(X86_FEATURE_BTS);
2221     x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
2222     x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
2223     if (x86_pmu.version <= 4)
2224         x86_pmu.pebs_no_isolation = 1;
2225 
2226     if (x86_pmu.pebs) {
2227         char pebs_type = x86_pmu.intel_cap.pebs_trap ?  '+' : '-';
2228         char *pebs_qual = "";
2229         int format = x86_pmu.intel_cap.pebs_format;
2230 
2231         if (format < 4)
2232             x86_pmu.intel_cap.pebs_baseline = 0;
2233 
2234         switch (format) {
2235         case 0:
2236             pr_cont("PEBS fmt0%c, ", pebs_type);
2237             x86_pmu.pebs_record_size = sizeof(struct pebs_record_core);
2238             /*
2239              * Using >PAGE_SIZE buffers makes the WRMSR to
2240              * PERF_GLOBAL_CTRL in intel_pmu_enable_all()
2241              * mysteriously hang on Core2.
2242              *
2243              * As a workaround, we don't do this.
2244              */
2245             x86_pmu.pebs_buffer_size = PAGE_SIZE;
2246             x86_pmu.drain_pebs = intel_pmu_drain_pebs_core;
2247             break;
2248 
2249         case 1:
2250             pr_cont("PEBS fmt1%c, ", pebs_type);
2251             x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
2252             x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
2253             break;
2254 
2255         case 2:
2256             pr_cont("PEBS fmt2%c, ", pebs_type);
2257             x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw);
2258             x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
2259             break;
2260 
2261         case 3:
2262             pr_cont("PEBS fmt3%c, ", pebs_type);
2263             x86_pmu.pebs_record_size =
2264                         sizeof(struct pebs_record_skl);
2265             x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
2266             x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME;
2267             break;
2268 
2269         case 4:
2270         case 5:
2271             x86_pmu.drain_pebs = intel_pmu_drain_pebs_icl;
2272             x86_pmu.pebs_record_size = sizeof(struct pebs_basic);
2273             if (x86_pmu.intel_cap.pebs_baseline) {
2274                 x86_pmu.large_pebs_flags |=
2275                     PERF_SAMPLE_BRANCH_STACK |
2276                     PERF_SAMPLE_TIME;
2277                 x86_pmu.flags |= PMU_FL_PEBS_ALL;
2278                 x86_pmu.pebs_capable = ~0ULL;
2279                 pebs_qual = "-baseline";
2280                 x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_EXTENDED_REGS;
2281             } else {
2282                 /* Only basic record supported */
2283                 x86_pmu.large_pebs_flags &=
2284                     ~(PERF_SAMPLE_ADDR |
2285                       PERF_SAMPLE_TIME |
2286                       PERF_SAMPLE_DATA_SRC |
2287                       PERF_SAMPLE_TRANSACTION |
2288                       PERF_SAMPLE_REGS_USER |
2289                       PERF_SAMPLE_REGS_INTR);
2290             }
2291             pr_cont("PEBS fmt4%c%s, ", pebs_type, pebs_qual);
2292 
2293             if (!is_hybrid() && x86_pmu.intel_cap.pebs_output_pt_available) {
2294                 pr_cont("PEBS-via-PT, ");
2295                 x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_AUX_OUTPUT;
2296             }
2297 
2298             break;
2299 
2300         default:
2301             pr_cont("no PEBS fmt%d%c, ", format, pebs_type);
2302             x86_pmu.pebs = 0;
2303         }
2304     }
2305 }
2306 
2307 void perf_restore_debug_store(void)
2308 {
2309     struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
2310 
2311     if (!x86_pmu.bts && !x86_pmu.pebs)
2312         return;
2313 
2314     wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ds);
2315 }