0001
0002
0003
0004
0005
0006
0007 #include <linux/kernel.h>
0008 #include <linux/perf_event.h>
0009 #include <linux/string.h>
0010 #include <asm/reg.h>
0011 #include <asm/cputable.h>
0012
0013 #include "internal.h"
0014
0015
0016
0017
0018 #define PM_PMC_SH 20
0019 #define PM_PMC_MSK 0xf
0020 #define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH)
0021 #define PM_UNIT_SH 16
0022 #define PM_UNIT_MSK 0xf
0023 #define PM_BYTE_SH 12
0024 #define PM_BYTE_MSK 7
0025 #define PM_GRS_SH 8
0026 #define PM_GRS_MSK 7
0027 #define PM_BUSEVENT_MSK 0x80
0028 #define PM_PMCSEL_MSK 0x7f
0029
0030
0031 #define PM_FPU 0
0032 #define PM_ISU0 1
0033 #define PM_IFU 2
0034 #define PM_ISU1 3
0035 #define PM_IDU 4
0036 #define PM_ISU0_ALT 6
0037 #define PM_GRS 7
0038 #define PM_LSU0 8
0039 #define PM_LSU1 0xc
0040 #define PM_LASTUNIT 0xc
0041
0042
0043
0044
0045 #define MMCR1_TTM0SEL_SH 62
0046 #define MMCR1_TTM1SEL_SH 60
0047 #define MMCR1_TTM2SEL_SH 58
0048 #define MMCR1_TTM3SEL_SH 56
0049 #define MMCR1_TTMSEL_MSK 3
0050 #define MMCR1_TD_CP_DBG0SEL_SH 54
0051 #define MMCR1_TD_CP_DBG1SEL_SH 52
0052 #define MMCR1_TD_CP_DBG2SEL_SH 50
0053 #define MMCR1_TD_CP_DBG3SEL_SH 48
0054 #define MMCR1_GRS_L2SEL_SH 46
0055 #define MMCR1_GRS_L2SEL_MSK 3
0056 #define MMCR1_GRS_L3SEL_SH 44
0057 #define MMCR1_GRS_L3SEL_MSK 3
0058 #define MMCR1_GRS_MCSEL_SH 41
0059 #define MMCR1_GRS_MCSEL_MSK 7
0060 #define MMCR1_GRS_FABSEL_SH 39
0061 #define MMCR1_GRS_FABSEL_MSK 3
0062 #define MMCR1_PMC1_ADDER_SEL_SH 35
0063 #define MMCR1_PMC2_ADDER_SEL_SH 34
0064 #define MMCR1_PMC3_ADDER_SEL_SH 33
0065 #define MMCR1_PMC4_ADDER_SEL_SH 32
0066 #define MMCR1_PMC1SEL_SH 25
0067 #define MMCR1_PMC2SEL_SH 17
0068 #define MMCR1_PMC3SEL_SH 9
0069 #define MMCR1_PMC4SEL_SH 1
0070 #define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
0071 #define MMCR1_PMCSEL_MSK 0x7f
0072
0073
0074
0075
0076
0077
0078
0079
0080
0081
0082
0083
0084
0085
0086
0087
0088
0089
0090
0091
0092
0093
0094
0095
0096
0097
0098
0099
0100
0101
0102
0103
0104
0105
0106
0107
0108
0109
0110
0111
0112
0113
0114
0115
0116
0117
0118
0119
0120
0121
0122 static const int grsel_shift[8] = {
0123 MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH,
0124 MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH,
0125 MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH
0126 };
0127
0128
0129 static unsigned long unit_cons[PM_LASTUNIT+1][2] = {
0130 [PM_FPU] = { 0xc0002000000000ul, 0x00001000000000ul },
0131 [PM_ISU0] = { 0x00002000000000ul, 0x00000800000000ul },
0132 [PM_ISU1] = { 0xc0002000000000ul, 0xc0001000000000ul },
0133 [PM_IFU] = { 0xc0002000000000ul, 0x80001000000000ul },
0134 [PM_IDU] = { 0x30002000000000ul, 0x00000400000000ul },
0135 [PM_GRS] = { 0x30002000000000ul, 0x30000400000000ul },
0136 };
0137
0138 static int power5_get_constraint(u64 event, unsigned long *maskp,
0139 unsigned long *valp, u64 event_config1 __maybe_unused)
0140 {
0141 int pmc, byte, unit, sh;
0142 int bit, fmask;
0143 unsigned long mask = 0, value = 0;
0144 int grp = -1;
0145
0146 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
0147 if (pmc) {
0148 if (pmc > 6)
0149 return -1;
0150 sh = (pmc - 1) * 2;
0151 mask |= 2 << sh;
0152 value |= 1 << sh;
0153 if (pmc <= 4)
0154 grp = (pmc - 1) >> 1;
0155 else if (event != 0x500009 && event != 0x600005)
0156 return -1;
0157 }
0158 if (event & PM_BUSEVENT_MSK) {
0159 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
0160 if (unit > PM_LASTUNIT)
0161 return -1;
0162 if (unit == PM_ISU0_ALT)
0163 unit = PM_ISU0;
0164 mask |= unit_cons[unit][0];
0165 value |= unit_cons[unit][1];
0166 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
0167 if (byte >= 4) {
0168 if (unit != PM_LSU1)
0169 return -1;
0170
0171 ++unit;
0172 byte &= 3;
0173 }
0174 if (unit == PM_GRS) {
0175 bit = event & 7;
0176 fmask = (bit == 6)? 7: 3;
0177 sh = grsel_shift[bit];
0178 mask |= (unsigned long)fmask << sh;
0179 value |= (unsigned long)((event >> PM_GRS_SH) & fmask)
0180 << sh;
0181 }
0182
0183
0184
0185
0186 if (!pmc)
0187 grp = byte & 1;
0188
0189 mask |= 0xfUL << (24 - 4 * byte);
0190 value |= (unsigned long)unit << (24 - 4 * byte);
0191 }
0192 if (grp == 0) {
0193
0194 mask |= 0x200000000ul;
0195 value |= 0x080000000ul;
0196 } else if (grp == 1) {
0197
0198 mask |= 0x40000000ul;
0199 value |= 0x10000000ul;
0200 }
0201 if (pmc < 5) {
0202
0203 mask |= 0x8000000000000ul;
0204 value |= 0x1000000000000ul;
0205 }
0206 *maskp = mask;
0207 *valp = value;
0208 return 0;
0209 }
0210
0211 #define MAX_ALT 3
0212
0213 static const unsigned int event_alternatives[][MAX_ALT] = {
0214 { 0x120e4, 0x400002 },
0215 { 0x410c7, 0x441084 },
0216 { 0x100005, 0x600005 },
0217 { 0x100009, 0x200009, 0x500009 },
0218 { 0x300009, 0x400009 },
0219 };
0220
0221
0222
0223
0224
0225 static int find_alternative(u64 event)
0226 {
0227 int i, j;
0228
0229 for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
0230 if (event < event_alternatives[i][0])
0231 break;
0232 for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j)
0233 if (event == event_alternatives[i][j])
0234 return i;
0235 }
0236 return -1;
0237 }
0238
0239 static const unsigned char bytedecode_alternatives[4][4] = {
0240 { 0x21, 0x23, 0x25, 0x27 },
0241 { 0x07, 0x17, 0x0e, 0x1e },
0242 { 0x20, 0x22, 0x24, 0x26 },
0243 { 0x07, 0x17, 0x0e, 0x1e }
0244 };
0245
0246
0247
0248
0249
0250
0251 static s64 find_alternative_bdecode(u64 event)
0252 {
0253 int pmc, altpmc, pp, j;
0254
0255 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
0256 if (pmc == 0 || pmc > 4)
0257 return -1;
0258 altpmc = 5 - pmc;
0259 pp = event & PM_PMCSEL_MSK;
0260 for (j = 0; j < 4; ++j) {
0261 if (bytedecode_alternatives[pmc - 1][j] == pp) {
0262 return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) |
0263 (altpmc << PM_PMC_SH) |
0264 bytedecode_alternatives[altpmc - 1][j];
0265 }
0266 }
0267 return -1;
0268 }
0269
0270 static int power5_get_alternatives(u64 event, unsigned int flags, u64 alt[])
0271 {
0272 int i, j, nalt = 1;
0273 s64 ae;
0274
0275 alt[0] = event;
0276 nalt = 1;
0277 i = find_alternative(event);
0278 if (i >= 0) {
0279 for (j = 0; j < MAX_ALT; ++j) {
0280 ae = event_alternatives[i][j];
0281 if (ae && ae != event)
0282 alt[nalt++] = ae;
0283 }
0284 } else {
0285 ae = find_alternative_bdecode(event);
0286 if (ae > 0)
0287 alt[nalt++] = ae;
0288 }
0289 return nalt;
0290 }
0291
0292
0293
0294
0295
0296
0297
0298 static unsigned char direct_event_is_marked[0x28] = {
0299 0,
0300 0x1f,
0301 0x2,
0302 0xe,
0303 0,
0304 0x1c,
0305 0x80,
0306 0x80,
0307 0, 0, 0,
0308 0x18,
0309 0,
0310 0x80,
0311 0x80,
0312 0,
0313 0,
0314 0x14,
0315 0,
0316 0x10,
0317 0x1f,
0318 0x2,
0319 0x80,
0320 0x80,
0321 0, 0, 0, 0, 0,
0322 0x80,
0323 0x80,
0324 0,
0325 0x80,
0326 0x80,
0327 0x80,
0328 0x80,
0329 0x80,
0330 0x80,
0331 0x80,
0332 0x80,
0333 };
0334
0335
0336
0337
0338
0339 static int power5_marked_instr_event(u64 event)
0340 {
0341 int pmc, psel;
0342 int bit, byte, unit;
0343 u32 mask;
0344
0345 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
0346 psel = event & PM_PMCSEL_MSK;
0347 if (pmc >= 5)
0348 return 0;
0349
0350 bit = -1;
0351 if (psel < sizeof(direct_event_is_marked)) {
0352 if (direct_event_is_marked[psel] & (1 << pmc))
0353 return 1;
0354 if (direct_event_is_marked[psel] & 0x80)
0355 bit = 4;
0356 else if (psel == 0x08)
0357 bit = pmc - 1;
0358 else if (psel == 0x10)
0359 bit = 4 - pmc;
0360 else if (psel == 0x1b && (pmc == 1 || pmc == 3))
0361 bit = 4;
0362 } else if ((psel & 0x58) == 0x40)
0363 bit = psel & 7;
0364
0365 if (!(event & PM_BUSEVENT_MSK))
0366 return 0;
0367
0368 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
0369 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
0370 if (unit == PM_LSU0) {
0371
0372 mask = 0x5dff00;
0373 } else if (unit == PM_LSU1 && byte >= 4) {
0374 byte -= 4;
0375
0376 mask = 0x5f00c0aa;
0377 } else
0378 return 0;
0379
0380 return (mask >> (byte * 8 + bit)) & 1;
0381 }
0382
0383 static int power5_compute_mmcr(u64 event[], int n_ev,
0384 unsigned int hwc[], struct mmcr_regs *mmcr,
0385 struct perf_event *pevents[],
0386 u32 flags __maybe_unused)
0387 {
0388 unsigned long mmcr1 = 0;
0389 unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
0390 unsigned int pmc, unit, byte, psel;
0391 unsigned int ttm, grp;
0392 int i, isbus, bit, grsel;
0393 unsigned int pmc_inuse = 0;
0394 unsigned int pmc_grp_use[2];
0395 unsigned char busbyte[4];
0396 unsigned char unituse[16];
0397 int ttmuse;
0398
0399 if (n_ev > 6)
0400 return -1;
0401
0402
0403 pmc_grp_use[0] = pmc_grp_use[1] = 0;
0404 memset(busbyte, 0, sizeof(busbyte));
0405 memset(unituse, 0, sizeof(unituse));
0406 for (i = 0; i < n_ev; ++i) {
0407 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
0408 if (pmc) {
0409 if (pmc > 6)
0410 return -1;
0411 if (pmc_inuse & (1 << (pmc - 1)))
0412 return -1;
0413 pmc_inuse |= 1 << (pmc - 1);
0414
0415 if (pmc <= 4)
0416 ++pmc_grp_use[(pmc - 1) >> 1];
0417 }
0418 if (event[i] & PM_BUSEVENT_MSK) {
0419 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
0420 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
0421 if (unit > PM_LASTUNIT)
0422 return -1;
0423 if (unit == PM_ISU0_ALT)
0424 unit = PM_ISU0;
0425 if (byte >= 4) {
0426 if (unit != PM_LSU1)
0427 return -1;
0428 ++unit;
0429 byte &= 3;
0430 }
0431 if (!pmc)
0432 ++pmc_grp_use[byte & 1];
0433 if (busbyte[byte] && busbyte[byte] != unit)
0434 return -1;
0435 busbyte[byte] = unit;
0436 unituse[unit] = 1;
0437 }
0438 }
0439 if (pmc_grp_use[0] > 2 || pmc_grp_use[1] > 2)
0440 return -1;
0441
0442
0443
0444
0445
0446
0447
0448 if (unituse[PM_ISU0] &
0449 (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) {
0450 unituse[PM_ISU0_ALT] = 1;
0451 unituse[PM_ISU0] = 0;
0452 }
0453
0454 ttmuse = 0;
0455 for (i = PM_FPU; i <= PM_ISU1; ++i) {
0456 if (!unituse[i])
0457 continue;
0458 if (ttmuse++)
0459 return -1;
0460 mmcr1 |= (unsigned long)i << MMCR1_TTM0SEL_SH;
0461 }
0462 ttmuse = 0;
0463 for (; i <= PM_GRS; ++i) {
0464 if (!unituse[i])
0465 continue;
0466 if (ttmuse++)
0467 return -1;
0468 mmcr1 |= (unsigned long)(i & 3) << MMCR1_TTM1SEL_SH;
0469 }
0470 if (ttmuse > 1)
0471 return -1;
0472
0473
0474 for (byte = 0; byte < 4; ++byte) {
0475 unit = busbyte[byte];
0476 if (!unit)
0477 continue;
0478 if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) {
0479
0480 unit = PM_ISU0_ALT;
0481 } else if (unit == PM_LSU1 + 1) {
0482
0483 mmcr1 |= 1ul << (MMCR1_TTM3SEL_SH + 3 - byte);
0484 }
0485 ttm = unit >> 2;
0486 mmcr1 |= (unsigned long)ttm
0487 << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
0488 }
0489
0490
0491 for (i = 0; i < n_ev; ++i) {
0492 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
0493 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
0494 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
0495 psel = event[i] & PM_PMCSEL_MSK;
0496 isbus = event[i] & PM_BUSEVENT_MSK;
0497 if (!pmc) {
0498
0499 for (pmc = 0; pmc < 4; ++pmc) {
0500 if (pmc_inuse & (1 << pmc))
0501 continue;
0502 grp = (pmc >> 1) & 1;
0503 if (isbus) {
0504 if (grp == (byte & 1))
0505 break;
0506 } else if (pmc_grp_use[grp] < 2) {
0507 ++pmc_grp_use[grp];
0508 break;
0509 }
0510 }
0511 pmc_inuse |= 1 << pmc;
0512 } else if (pmc <= 4) {
0513
0514 --pmc;
0515 if ((psel == 8 || psel == 0x10) && isbus && (byte & 2))
0516
0517 mmcr1 |= 1ul << (MMCR1_PMC1_ADDER_SEL_SH - pmc);
0518 } else {
0519
0520 --pmc;
0521 }
0522 if (isbus && unit == PM_GRS) {
0523 bit = psel & 7;
0524 grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK;
0525 mmcr1 |= (unsigned long)grsel << grsel_shift[bit];
0526 }
0527 if (power5_marked_instr_event(event[i]))
0528 mmcra |= MMCRA_SAMPLE_ENABLE;
0529 if (pmc <= 3)
0530 mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc);
0531 hwc[i] = pmc;
0532 }
0533
0534
0535 mmcr->mmcr0 = 0;
0536 if (pmc_inuse & 1)
0537 mmcr->mmcr0 = MMCR0_PMC1CE;
0538 if (pmc_inuse & 0x3e)
0539 mmcr->mmcr0 |= MMCR0_PMCjCE;
0540 mmcr->mmcr1 = mmcr1;
0541 mmcr->mmcra = mmcra;
0542 return 0;
0543 }
0544
0545 static void power5_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr)
0546 {
0547 if (pmc <= 3)
0548 mmcr->mmcr1 &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc));
0549 }
0550
0551 static int power5_generic_events[] = {
0552 [PERF_COUNT_HW_CPU_CYCLES] = 0xf,
0553 [PERF_COUNT_HW_INSTRUCTIONS] = 0x100009,
0554 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4c1090,
0555 [PERF_COUNT_HW_CACHE_MISSES] = 0x3c1088,
0556 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x230e4,
0557 [PERF_COUNT_HW_BRANCH_MISSES] = 0x230e5,
0558 };
0559
0560 #define C(x) PERF_COUNT_HW_CACHE_##x
0561
0562
0563
0564
0565
0566
0567 static u64 power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
0568 [C(L1D)] = {
0569 [C(OP_READ)] = { 0x4c1090, 0x3c1088 },
0570 [C(OP_WRITE)] = { 0x3c1090, 0xc10c3 },
0571 [C(OP_PREFETCH)] = { 0xc70e7, 0 },
0572 },
0573 [C(L1I)] = {
0574 [C(OP_READ)] = { 0, 0 },
0575 [C(OP_WRITE)] = { -1, -1 },
0576 [C(OP_PREFETCH)] = { 0, 0 },
0577 },
0578 [C(LL)] = {
0579 [C(OP_READ)] = { 0, 0x3c309b },
0580 [C(OP_WRITE)] = { 0, 0 },
0581 [C(OP_PREFETCH)] = { 0xc50c3, 0 },
0582 },
0583 [C(DTLB)] = {
0584 [C(OP_READ)] = { 0x2c4090, 0x800c4 },
0585 [C(OP_WRITE)] = { -1, -1 },
0586 [C(OP_PREFETCH)] = { -1, -1 },
0587 },
0588 [C(ITLB)] = {
0589 [C(OP_READ)] = { 0, 0x800c0 },
0590 [C(OP_WRITE)] = { -1, -1 },
0591 [C(OP_PREFETCH)] = { -1, -1 },
0592 },
0593 [C(BPU)] = {
0594 [C(OP_READ)] = { 0x230e4, 0x230e5 },
0595 [C(OP_WRITE)] = { -1, -1 },
0596 [C(OP_PREFETCH)] = { -1, -1 },
0597 },
0598 [C(NODE)] = {
0599 [C(OP_READ)] = { -1, -1 },
0600 [C(OP_WRITE)] = { -1, -1 },
0601 [C(OP_PREFETCH)] = { -1, -1 },
0602 },
0603 };
0604
0605 static struct power_pmu power5_pmu = {
0606 .name = "POWER5",
0607 .n_counter = 6,
0608 .max_alternatives = MAX_ALT,
0609 .add_fields = 0x7000090000555ul,
0610 .test_adder = 0x3000490000000ul,
0611 .compute_mmcr = power5_compute_mmcr,
0612 .get_constraint = power5_get_constraint,
0613 .get_alternatives = power5_get_alternatives,
0614 .disable_pmc = power5_disable_pmc,
0615 .n_generic = ARRAY_SIZE(power5_generic_events),
0616 .generic_events = power5_generic_events,
0617 .cache_events = &power5_cache_events,
0618 .flags = PPMU_HAS_SSLOT,
0619 };
0620
0621 int __init init_power5_pmu(void)
0622 {
0623 unsigned int pvr = mfspr(SPRN_PVR);
0624
0625 if (PVR_VER(pvr) != PVR_POWER5)
0626 return -ENODEV;
0627
0628 return register_power_pmu(&power5_pmu);
0629 }