Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /* Driver for Intel Xeon Phi "Knights Corner" PMU */
0003 
0004 #include <linux/perf_event.h>
0005 #include <linux/types.h>
0006 
0007 #include <asm/hardirq.h>
0008 
0009 #include "../perf_event.h"
0010 
0011 static const u64 knc_perfmon_event_map[] =
0012 {
0013   [PERF_COUNT_HW_CPU_CYCLES]        = 0x002a,
0014   [PERF_COUNT_HW_INSTRUCTIONS]      = 0x0016,
0015   [PERF_COUNT_HW_CACHE_REFERENCES]  = 0x0028,
0016   [PERF_COUNT_HW_CACHE_MISSES]      = 0x0029,
0017   [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]   = 0x0012,
0018   [PERF_COUNT_HW_BRANCH_MISSES]     = 0x002b,
0019 };
0020 
0021 static const u64 __initconst knc_hw_cache_event_ids
0022                 [PERF_COUNT_HW_CACHE_MAX]
0023                 [PERF_COUNT_HW_CACHE_OP_MAX]
0024                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
0025 {
0026  [ C(L1D) ] = {
0027     [ C(OP_READ) ] = {
0028         /* On Xeon Phi event "0" is a valid DATA_READ          */
0029         /*   (L1 Data Cache Reads) Instruction.                */
0030         /* We code this as ARCH_PERFMON_EVENTSEL_INT as this   */
0031         /* bit will always be set in x86_pmu_hw_config().      */
0032         [ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT,
0033                         /* DATA_READ           */
0034         [ C(RESULT_MISS)   ] = 0x0003,  /* DATA_READ_MISS      */
0035     },
0036     [ C(OP_WRITE) ] = {
0037         [ C(RESULT_ACCESS) ] = 0x0001,  /* DATA_WRITE          */
0038         [ C(RESULT_MISS)   ] = 0x0004,  /* DATA_WRITE_MISS     */
0039     },
0040     [ C(OP_PREFETCH) ] = {
0041         [ C(RESULT_ACCESS) ] = 0x0011,  /* L1_DATA_PF1         */
0042         [ C(RESULT_MISS)   ] = 0x001c,  /* L1_DATA_PF1_MISS    */
0043     },
0044  },
0045  [ C(L1I ) ] = {
0046     [ C(OP_READ) ] = {
0047         [ C(RESULT_ACCESS) ] = 0x000c,  /* CODE_READ          */
0048         [ C(RESULT_MISS)   ] = 0x000e,  /* CODE_CACHE_MISS    */
0049     },
0050     [ C(OP_WRITE) ] = {
0051         [ C(RESULT_ACCESS) ] = -1,
0052         [ C(RESULT_MISS)   ] = -1,
0053     },
0054     [ C(OP_PREFETCH) ] = {
0055         [ C(RESULT_ACCESS) ] = 0x0,
0056         [ C(RESULT_MISS)   ] = 0x0,
0057     },
0058  },
0059  [ C(LL  ) ] = {
0060     [ C(OP_READ) ] = {
0061         [ C(RESULT_ACCESS) ] = 0,
0062         [ C(RESULT_MISS)   ] = 0x10cb,  /* L2_READ_MISS */
0063     },
0064     [ C(OP_WRITE) ] = {
0065         [ C(RESULT_ACCESS) ] = 0x10cc,  /* L2_WRITE_HIT */
0066         [ C(RESULT_MISS)   ] = 0,
0067     },
0068     [ C(OP_PREFETCH) ] = {
0069         [ C(RESULT_ACCESS) ] = 0x10fc,  /* L2_DATA_PF2      */
0070         [ C(RESULT_MISS)   ] = 0x10fe,  /* L2_DATA_PF2_MISS */
0071     },
0072  },
0073  [ C(DTLB) ] = {
0074     [ C(OP_READ) ] = {
0075         [ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT,
0076                         /* DATA_READ */
0077                         /* see note on L1 OP_READ */
0078         [ C(RESULT_MISS)   ] = 0x0002,  /* DATA_PAGE_WALK */
0079     },
0080     [ C(OP_WRITE) ] = {
0081         [ C(RESULT_ACCESS) ] = 0x0001,  /* DATA_WRITE */
0082         [ C(RESULT_MISS)   ] = 0x0002,  /* DATA_PAGE_WALK */
0083     },
0084     [ C(OP_PREFETCH) ] = {
0085         [ C(RESULT_ACCESS) ] = 0x0,
0086         [ C(RESULT_MISS)   ] = 0x0,
0087     },
0088  },
0089  [ C(ITLB) ] = {
0090     [ C(OP_READ) ] = {
0091         [ C(RESULT_ACCESS) ] = 0x000c,  /* CODE_READ */
0092         [ C(RESULT_MISS)   ] = 0x000d,  /* CODE_PAGE_WALK */
0093     },
0094     [ C(OP_WRITE) ] = {
0095         [ C(RESULT_ACCESS) ] = -1,
0096         [ C(RESULT_MISS)   ] = -1,
0097     },
0098     [ C(OP_PREFETCH) ] = {
0099         [ C(RESULT_ACCESS) ] = -1,
0100         [ C(RESULT_MISS)   ] = -1,
0101     },
0102  },
0103  [ C(BPU ) ] = {
0104     [ C(OP_READ) ] = {
0105         [ C(RESULT_ACCESS) ] = 0x0012,  /* BRANCHES */
0106         [ C(RESULT_MISS)   ] = 0x002b,  /* BRANCHES_MISPREDICTED */
0107     },
0108     [ C(OP_WRITE) ] = {
0109         [ C(RESULT_ACCESS) ] = -1,
0110         [ C(RESULT_MISS)   ] = -1,
0111     },
0112     [ C(OP_PREFETCH) ] = {
0113         [ C(RESULT_ACCESS) ] = -1,
0114         [ C(RESULT_MISS)   ] = -1,
0115     },
0116  },
0117 };
0118 
0119 
0120 static u64 knc_pmu_event_map(int hw_event)
0121 {
0122     return knc_perfmon_event_map[hw_event];
0123 }
0124 
0125 static struct event_constraint knc_event_constraints[] =
0126 {
0127     INTEL_EVENT_CONSTRAINT(0xc3, 0x1),  /* HWP_L2HIT */
0128     INTEL_EVENT_CONSTRAINT(0xc4, 0x1),  /* HWP_L2MISS */
0129     INTEL_EVENT_CONSTRAINT(0xc8, 0x1),  /* L2_READ_HIT_E */
0130     INTEL_EVENT_CONSTRAINT(0xc9, 0x1),  /* L2_READ_HIT_M */
0131     INTEL_EVENT_CONSTRAINT(0xca, 0x1),  /* L2_READ_HIT_S */
0132     INTEL_EVENT_CONSTRAINT(0xcb, 0x1),  /* L2_READ_MISS */
0133     INTEL_EVENT_CONSTRAINT(0xcc, 0x1),  /* L2_WRITE_HIT */
0134     INTEL_EVENT_CONSTRAINT(0xce, 0x1),  /* L2_STRONGLY_ORDERED_STREAMING_VSTORES_MISS */
0135     INTEL_EVENT_CONSTRAINT(0xcf, 0x1),  /* L2_WEAKLY_ORDERED_STREAMING_VSTORE_MISS */
0136     INTEL_EVENT_CONSTRAINT(0xd7, 0x1),  /* L2_VICTIM_REQ_WITH_DATA */
0137     INTEL_EVENT_CONSTRAINT(0xe3, 0x1),  /* SNP_HITM_BUNIT */
0138     INTEL_EVENT_CONSTRAINT(0xe6, 0x1),  /* SNP_HIT_L2 */
0139     INTEL_EVENT_CONSTRAINT(0xe7, 0x1),  /* SNP_HITM_L2 */
0140     INTEL_EVENT_CONSTRAINT(0xf1, 0x1),  /* L2_DATA_READ_MISS_CACHE_FILL */
0141     INTEL_EVENT_CONSTRAINT(0xf2, 0x1),  /* L2_DATA_WRITE_MISS_CACHE_FILL */
0142     INTEL_EVENT_CONSTRAINT(0xf6, 0x1),  /* L2_DATA_READ_MISS_MEM_FILL */
0143     INTEL_EVENT_CONSTRAINT(0xf7, 0x1),  /* L2_DATA_WRITE_MISS_MEM_FILL */
0144     INTEL_EVENT_CONSTRAINT(0xfc, 0x1),  /* L2_DATA_PF2 */
0145     INTEL_EVENT_CONSTRAINT(0xfd, 0x1),  /* L2_DATA_PF2_DROP */
0146     INTEL_EVENT_CONSTRAINT(0xfe, 0x1),  /* L2_DATA_PF2_MISS */
0147     INTEL_EVENT_CONSTRAINT(0xff, 0x1),  /* L2_DATA_HIT_INFLIGHT_PF2 */
0148     EVENT_CONSTRAINT_END
0149 };
0150 
0151 #define MSR_KNC_IA32_PERF_GLOBAL_STATUS     0x0000002d
0152 #define MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL    0x0000002e
0153 #define MSR_KNC_IA32_PERF_GLOBAL_CTRL       0x0000002f
0154 
0155 #define KNC_ENABLE_COUNTER0         0x00000001
0156 #define KNC_ENABLE_COUNTER1         0x00000002
0157 
0158 static void knc_pmu_disable_all(void)
0159 {
0160     u64 val;
0161 
0162     rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
0163     val &= ~(KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1);
0164     wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
0165 }
0166 
0167 static void knc_pmu_enable_all(int added)
0168 {
0169     u64 val;
0170 
0171     rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
0172     val |= (KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1);
0173     wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
0174 }
0175 
0176 static inline void
0177 knc_pmu_disable_event(struct perf_event *event)
0178 {
0179     struct hw_perf_event *hwc = &event->hw;
0180     u64 val;
0181 
0182     val = hwc->config;
0183     val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
0184 
0185     (void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
0186 }
0187 
0188 static void knc_pmu_enable_event(struct perf_event *event)
0189 {
0190     struct hw_perf_event *hwc = &event->hw;
0191     u64 val;
0192 
0193     val = hwc->config;
0194     val |= ARCH_PERFMON_EVENTSEL_ENABLE;
0195 
0196     (void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
0197 }
0198 
0199 static inline u64 knc_pmu_get_status(void)
0200 {
0201     u64 status;
0202 
0203     rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_STATUS, status);
0204 
0205     return status;
0206 }
0207 
0208 static inline void knc_pmu_ack_status(u64 ack)
0209 {
0210     wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL, ack);
0211 }
0212 
0213 static int knc_pmu_handle_irq(struct pt_regs *regs)
0214 {
0215     struct perf_sample_data data;
0216     struct cpu_hw_events *cpuc;
0217     int handled = 0;
0218     int bit, loops;
0219     u64 status;
0220 
0221     cpuc = this_cpu_ptr(&cpu_hw_events);
0222 
0223     knc_pmu_disable_all();
0224 
0225     status = knc_pmu_get_status();
0226     if (!status) {
0227         knc_pmu_enable_all(0);
0228         return handled;
0229     }
0230 
0231     loops = 0;
0232 again:
0233     knc_pmu_ack_status(status);
0234     if (++loops > 100) {
0235         WARN_ONCE(1, "perf: irq loop stuck!\n");
0236         perf_event_print_debug();
0237         goto done;
0238     }
0239 
0240     inc_irq_stat(apic_perf_irqs);
0241 
0242     for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
0243         struct perf_event *event = cpuc->events[bit];
0244 
0245         handled++;
0246 
0247         if (!test_bit(bit, cpuc->active_mask))
0248             continue;
0249 
0250         if (!intel_pmu_save_and_restart(event))
0251             continue;
0252 
0253         perf_sample_data_init(&data, 0, event->hw.last_period);
0254 
0255         if (perf_event_overflow(event, &data, regs))
0256             x86_pmu_stop(event, 0);
0257     }
0258 
0259     /*
0260      * Repeat if there is more work to be done:
0261      */
0262     status = knc_pmu_get_status();
0263     if (status)
0264         goto again;
0265 
0266 done:
0267     /* Only restore PMU state when it's active. See x86_pmu_disable(). */
0268     if (cpuc->enabled)
0269         knc_pmu_enable_all(0);
0270 
0271     return handled;
0272 }
0273 
0274 
0275 PMU_FORMAT_ATTR(event,  "config:0-7"    );
0276 PMU_FORMAT_ATTR(umask,  "config:8-15"   );
0277 PMU_FORMAT_ATTR(edge,   "config:18" );
0278 PMU_FORMAT_ATTR(inv,    "config:23" );
0279 PMU_FORMAT_ATTR(cmask,  "config:24-31"  );
0280 
0281 static struct attribute *intel_knc_formats_attr[] = {
0282     &format_attr_event.attr,
0283     &format_attr_umask.attr,
0284     &format_attr_edge.attr,
0285     &format_attr_inv.attr,
0286     &format_attr_cmask.attr,
0287     NULL,
0288 };
0289 
0290 static const struct x86_pmu knc_pmu __initconst = {
0291     .name           = "knc",
0292     .handle_irq     = knc_pmu_handle_irq,
0293     .disable_all        = knc_pmu_disable_all,
0294     .enable_all     = knc_pmu_enable_all,
0295     .enable         = knc_pmu_enable_event,
0296     .disable        = knc_pmu_disable_event,
0297     .hw_config      = x86_pmu_hw_config,
0298     .schedule_events    = x86_schedule_events,
0299     .eventsel       = MSR_KNC_EVNTSEL0,
0300     .perfctr        = MSR_KNC_PERFCTR0,
0301     .event_map      = knc_pmu_event_map,
0302     .max_events             = ARRAY_SIZE(knc_perfmon_event_map),
0303     .apic           = 1,
0304     .max_period     = (1ULL << 39) - 1,
0305     .version        = 0,
0306     .num_counters       = 2,
0307     .cntval_bits        = 40,
0308     .cntval_mask        = (1ULL << 40) - 1,
0309     .get_event_constraints  = x86_get_event_constraints,
0310     .event_constraints  = knc_event_constraints,
0311     .format_attrs       = intel_knc_formats_attr,
0312 };
0313 
0314 __init int knc_pmu_init(void)
0315 {
0316     x86_pmu = knc_pmu;
0317 
0318     memcpy(hw_cache_event_ids, knc_hw_cache_event_ids, 
0319         sizeof(hw_cache_event_ids));
0320 
0321     return 0;
0322 }