Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /* Performance event support for sparc64.
0003  *
0004  * Copyright (C) 2009, 2010 David S. Miller <davem@davemloft.net>
0005  *
0006  * This code is based almost entirely upon the x86 perf event
0007  * code, which is:
0008  *
0009  *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
0010  *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
0011  *  Copyright (C) 2009 Jaswinder Singh Rajput
0012  *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
0013  *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra
0014  */
0015 
0016 #include <linux/perf_event.h>
0017 #include <linux/kprobes.h>
0018 #include <linux/ftrace.h>
0019 #include <linux/kernel.h>
0020 #include <linux/kdebug.h>
0021 #include <linux/mutex.h>
0022 
0023 #include <asm/stacktrace.h>
0024 #include <asm/cpudata.h>
0025 #include <linux/uaccess.h>
0026 #include <linux/atomic.h>
0027 #include <linux/sched/clock.h>
0028 #include <asm/nmi.h>
0029 #include <asm/pcr.h>
0030 #include <asm/cacheflush.h>
0031 
0032 #include "kernel.h"
0033 #include "kstack.h"
0034 
0035 /* Two classes of sparc64 chips currently exist.  All of which have
0036  * 32-bit counters which can generate overflow interrupts on the
0037  * transition from 0xffffffff to 0.
0038  *
0039  * All chips upto and including SPARC-T3 have two performance
0040  * counters.  The two 32-bit counters are accessed in one go using a
0041  * single 64-bit register.
0042  *
0043  * On these older chips both counters are controlled using a single
0044  * control register.  The only way to stop all sampling is to clear
0045  * all of the context (user, supervisor, hypervisor) sampling enable
0046  * bits.  But these bits apply to both counters, thus the two counters
0047  * can't be enabled/disabled individually.
0048  *
0049  * Furthermore, the control register on these older chips have two
0050  * event fields, one for each of the two counters.  It's thus nearly
0051  * impossible to have one counter going while keeping the other one
0052  * stopped.  Therefore it is possible to get overflow interrupts for
0053  * counters not currently "in use" and that condition must be checked
0054  * in the overflow interrupt handler.
0055  *
0056  * So we use a hack, in that we program inactive counters with the
0057  * "sw_count0" and "sw_count1" events.  These count how many times
0058  * the instruction "sethi %hi(0xfc000), %g0" is executed.  It's an
0059  * unusual way to encode a NOP and therefore will not trigger in
0060  * normal code.
0061  *
0062  * Starting with SPARC-T4 we have one control register per counter.
0063  * And the counters are stored in individual registers.  The registers
0064  * for the counters are 64-bit but only a 32-bit counter is
0065  * implemented.  The event selections on SPARC-T4 lack any
0066  * restrictions, therefore we can elide all of the complicated
0067  * conflict resolution code we have for SPARC-T3 and earlier chips.
0068  */
0069 
0070 #define MAX_HWEVENTS            4
0071 #define MAX_PCRS            4
0072 #define MAX_PERIOD          ((1UL << 32) - 1)
0073 
0074 #define PIC_UPPER_INDEX         0
0075 #define PIC_LOWER_INDEX         1
0076 #define PIC_NO_INDEX            -1
0077 
0078 struct cpu_hw_events {
0079     /* Number of events currently scheduled onto this cpu.
0080      * This tells how many entries in the arrays below
0081      * are valid.
0082      */
0083     int         n_events;
0084 
0085     /* Number of new events added since the last hw_perf_disable().
0086      * This works because the perf event layer always adds new
0087      * events inside of a perf_{disable,enable}() sequence.
0088      */
0089     int         n_added;
0090 
0091     /* Array of events current scheduled on this cpu.  */
0092     struct perf_event   *event[MAX_HWEVENTS];
0093 
0094     /* Array of encoded longs, specifying the %pcr register
0095      * encoding and the mask of PIC counters this even can
0096      * be scheduled on.  See perf_event_encode() et al.
0097      */
0098     unsigned long       events[MAX_HWEVENTS];
0099 
0100     /* The current counter index assigned to an event.  When the
0101      * event hasn't been programmed into the cpu yet, this will
0102      * hold PIC_NO_INDEX.  The event->hw.idx value tells us where
0103      * we ought to schedule the event.
0104      */
0105     int         current_idx[MAX_HWEVENTS];
0106 
0107     /* Software copy of %pcr register(s) on this cpu.  */
0108     u64         pcr[MAX_HWEVENTS];
0109 
0110     /* Enabled/disable state.  */
0111     int         enabled;
0112 
0113     unsigned int        txn_flags;
0114 };
0115 static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, };
0116 
0117 /* An event map describes the characteristics of a performance
0118  * counter event.  In particular it gives the encoding as well as
0119  * a mask telling which counters the event can be measured on.
0120  *
0121  * The mask is unused on SPARC-T4 and later.
0122  */
0123 struct perf_event_map {
0124     u16 encoding;
0125     u8  pic_mask;
0126 #define PIC_NONE    0x00
0127 #define PIC_UPPER   0x01
0128 #define PIC_LOWER   0x02
0129 };
0130 
0131 /* Encode a perf_event_map entry into a long.  */
0132 static unsigned long perf_event_encode(const struct perf_event_map *pmap)
0133 {
0134     return ((unsigned long) pmap->encoding << 16) | pmap->pic_mask;
0135 }
0136 
0137 static u8 perf_event_get_msk(unsigned long val)
0138 {
0139     return val & 0xff;
0140 }
0141 
0142 static u64 perf_event_get_enc(unsigned long val)
0143 {
0144     return val >> 16;
0145 }
0146 
0147 #define C(x) PERF_COUNT_HW_CACHE_##x
0148 
0149 #define CACHE_OP_UNSUPPORTED    0xfffe
0150 #define CACHE_OP_NONSENSE   0xffff
0151 
0152 typedef struct perf_event_map cache_map_t
0153                 [PERF_COUNT_HW_CACHE_MAX]
0154                 [PERF_COUNT_HW_CACHE_OP_MAX]
0155                 [PERF_COUNT_HW_CACHE_RESULT_MAX];
0156 
0157 struct sparc_pmu {
0158     const struct perf_event_map *(*event_map)(int);
0159     const cache_map_t       *cache_map;
0160     int             max_events;
0161     u32             (*read_pmc)(int);
0162     void                (*write_pmc)(int, u64);
0163     int             upper_shift;
0164     int             lower_shift;
0165     int             event_mask;
0166     int             user_bit;
0167     int             priv_bit;
0168     int             hv_bit;
0169     int             irq_bit;
0170     int             upper_nop;
0171     int             lower_nop;
0172     unsigned int            flags;
0173 #define SPARC_PMU_ALL_EXCLUDES_SAME 0x00000001
0174 #define SPARC_PMU_HAS_CONFLICTS     0x00000002
0175     int             max_hw_events;
0176     int             num_pcrs;
0177     int             num_pic_regs;
0178 };
0179 
0180 static u32 sparc_default_read_pmc(int idx)
0181 {
0182     u64 val;
0183 
0184     val = pcr_ops->read_pic(0);
0185     if (idx == PIC_UPPER_INDEX)
0186         val >>= 32;
0187 
0188     return val & 0xffffffff;
0189 }
0190 
0191 static void sparc_default_write_pmc(int idx, u64 val)
0192 {
0193     u64 shift, mask, pic;
0194 
0195     shift = 0;
0196     if (idx == PIC_UPPER_INDEX)
0197         shift = 32;
0198 
0199     mask = ((u64) 0xffffffff) << shift;
0200     val <<= shift;
0201 
0202     pic = pcr_ops->read_pic(0);
0203     pic &= ~mask;
0204     pic |= val;
0205     pcr_ops->write_pic(0, pic);
0206 }
0207 
0208 static const struct perf_event_map ultra3_perfmon_event_map[] = {
0209     [PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER },
0210     [PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER },
0211     [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0009, PIC_LOWER },
0212     [PERF_COUNT_HW_CACHE_MISSES] = { 0x0009, PIC_UPPER },
0213 };
0214 
0215 static const struct perf_event_map *ultra3_event_map(int event_id)
0216 {
0217     return &ultra3_perfmon_event_map[event_id];
0218 }
0219 
0220 static const cache_map_t ultra3_cache_map = {
0221 [C(L1D)] = {
0222     [C(OP_READ)] = {
0223         [C(RESULT_ACCESS)] = { 0x09, PIC_LOWER, },
0224         [C(RESULT_MISS)] = { 0x09, PIC_UPPER, },
0225     },
0226     [C(OP_WRITE)] = {
0227         [C(RESULT_ACCESS)] = { 0x0a, PIC_LOWER },
0228         [C(RESULT_MISS)] = { 0x0a, PIC_UPPER },
0229     },
0230     [C(OP_PREFETCH)] = {
0231         [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
0232         [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
0233     },
0234 },
0235 [C(L1I)] = {
0236     [C(OP_READ)] = {
0237         [C(RESULT_ACCESS)] = { 0x09, PIC_LOWER, },
0238         [C(RESULT_MISS)] = { 0x09, PIC_UPPER, },
0239     },
0240     [ C(OP_WRITE) ] = {
0241         [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE },
0242         [ C(RESULT_MISS)   ] = { CACHE_OP_NONSENSE },
0243     },
0244     [ C(OP_PREFETCH) ] = {
0245         [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
0246         [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
0247     },
0248 },
0249 [C(LL)] = {
0250     [C(OP_READ)] = {
0251         [C(RESULT_ACCESS)] = { 0x0c, PIC_LOWER, },
0252         [C(RESULT_MISS)] = { 0x0c, PIC_UPPER, },
0253     },
0254     [C(OP_WRITE)] = {
0255         [C(RESULT_ACCESS)] = { 0x0c, PIC_LOWER },
0256         [C(RESULT_MISS)] = { 0x0c, PIC_UPPER },
0257     },
0258     [C(OP_PREFETCH)] = {
0259         [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
0260         [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
0261     },
0262 },
0263 [C(DTLB)] = {
0264     [C(OP_READ)] = {
0265         [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
0266         [C(RESULT_MISS)] = { 0x12, PIC_UPPER, },
0267     },
0268     [ C(OP_WRITE) ] = {
0269         [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
0270         [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
0271     },
0272     [ C(OP_PREFETCH) ] = {
0273         [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
0274         [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
0275     },
0276 },
0277 [C(ITLB)] = {
0278     [C(OP_READ)] = {
0279         [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
0280         [C(RESULT_MISS)] = { 0x11, PIC_UPPER, },
0281     },
0282     [ C(OP_WRITE) ] = {
0283         [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
0284         [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
0285     },
0286     [ C(OP_PREFETCH) ] = {
0287         [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
0288         [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
0289     },
0290 },
0291 [C(BPU)] = {
0292     [C(OP_READ)] = {
0293         [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
0294         [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
0295     },
0296     [ C(OP_WRITE) ] = {
0297         [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
0298         [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
0299     },
0300     [ C(OP_PREFETCH) ] = {
0301         [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
0302         [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
0303     },
0304 },
0305 [C(NODE)] = {
0306     [C(OP_READ)] = {
0307         [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
0308         [C(RESULT_MISS)  ] = { CACHE_OP_UNSUPPORTED },
0309     },
0310     [ C(OP_WRITE) ] = {
0311         [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
0312         [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
0313     },
0314     [ C(OP_PREFETCH) ] = {
0315         [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
0316         [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
0317     },
0318 },
0319 };
0320 
0321 static const struct sparc_pmu ultra3_pmu = {
0322     .event_map  = ultra3_event_map,
0323     .cache_map  = &ultra3_cache_map,
0324     .max_events = ARRAY_SIZE(ultra3_perfmon_event_map),
0325     .read_pmc   = sparc_default_read_pmc,
0326     .write_pmc  = sparc_default_write_pmc,
0327     .upper_shift    = 11,
0328     .lower_shift    = 4,
0329     .event_mask = 0x3f,
0330     .user_bit   = PCR_UTRACE,
0331     .priv_bit   = PCR_STRACE,
0332     .upper_nop  = 0x1c,
0333     .lower_nop  = 0x14,
0334     .flags      = (SPARC_PMU_ALL_EXCLUDES_SAME |
0335                SPARC_PMU_HAS_CONFLICTS),
0336     .max_hw_events  = 2,
0337     .num_pcrs   = 1,
0338     .num_pic_regs   = 1,
0339 };
0340 
0341 /* Niagara1 is very limited.  The upper PIC is hard-locked to count
0342  * only instructions, so it is free running which creates all kinds of
0343  * problems.  Some hardware designs make one wonder if the creator
0344  * even looked at how this stuff gets used by software.
0345  */
0346 static const struct perf_event_map niagara1_perfmon_event_map[] = {
0347     [PERF_COUNT_HW_CPU_CYCLES] = { 0x00, PIC_UPPER },
0348     [PERF_COUNT_HW_INSTRUCTIONS] = { 0x00, PIC_UPPER },
0349     [PERF_COUNT_HW_CACHE_REFERENCES] = { 0, PIC_NONE },
0350     [PERF_COUNT_HW_CACHE_MISSES] = { 0x03, PIC_LOWER },
0351 };
0352 
0353 static const struct perf_event_map *niagara1_event_map(int event_id)
0354 {
0355     return &niagara1_perfmon_event_map[event_id];
0356 }
0357 
0358 static const cache_map_t niagara1_cache_map = {
0359 [C(L1D)] = {
0360     [C(OP_READ)] = {
0361         [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
0362         [C(RESULT_MISS)] = { 0x03, PIC_LOWER, },
0363     },
0364     [C(OP_WRITE)] = {
0365         [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
0366         [C(RESULT_MISS)] = { 0x03, PIC_LOWER, },
0367     },
0368     [C(OP_PREFETCH)] = {
0369         [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
0370         [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
0371     },
0372 },
0373 [C(L1I)] = {
0374     [C(OP_READ)] = {
0375         [C(RESULT_ACCESS)] = { 0x00, PIC_UPPER },
0376         [C(RESULT_MISS)] = { 0x02, PIC_LOWER, },
0377     },
0378     [ C(OP_WRITE) ] = {
0379         [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE },
0380         [ C(RESULT_MISS)   ] = { CACHE_OP_NONSENSE },
0381     },
0382     [ C(OP_PREFETCH) ] = {
0383         [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
0384         [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
0385     },
0386 },
0387 [C(LL)] = {
0388     [C(OP_READ)] = {
0389         [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
0390         [C(RESULT_MISS)] = { 0x07, PIC_LOWER, },
0391     },
0392     [C(OP_WRITE)] = {
0393         [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
0394         [C(RESULT_MISS)] = { 0x07, PIC_LOWER, },
0395     },
0396     [C(OP_PREFETCH)] = {
0397         [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
0398         [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
0399     },
0400 },
0401 [C(DTLB)] = {
0402     [C(OP_READ)] = {
0403         [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
0404         [C(RESULT_MISS)] = { 0x05, PIC_LOWER, },
0405     },
0406     [ C(OP_WRITE) ] = {
0407         [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
0408         [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
0409     },
0410     [ C(OP_PREFETCH) ] = {
0411         [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
0412         [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
0413     },
0414 },
0415 [C(ITLB)] = {
0416     [C(OP_READ)] = {
0417         [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
0418         [C(RESULT_MISS)] = { 0x04, PIC_LOWER, },
0419     },
0420     [ C(OP_WRITE) ] = {
0421         [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
0422         [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
0423     },
0424     [ C(OP_PREFETCH) ] = {
0425         [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
0426         [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
0427     },
0428 },
0429 [C(BPU)] = {
0430     [C(OP_READ)] = {
0431         [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
0432         [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
0433     },
0434     [ C(OP_WRITE) ] = {
0435         [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
0436         [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
0437     },
0438     [ C(OP_PREFETCH) ] = {
0439         [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
0440         [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
0441     },
0442 },
0443 [C(NODE)] = {
0444     [C(OP_READ)] = {
0445         [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
0446         [C(RESULT_MISS)  ] = { CACHE_OP_UNSUPPORTED },
0447     },
0448     [ C(OP_WRITE) ] = {
0449         [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
0450         [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
0451     },
0452     [ C(OP_PREFETCH) ] = {
0453         [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
0454         [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
0455     },
0456 },
0457 };
0458 
0459 static const struct sparc_pmu niagara1_pmu = {
0460     .event_map  = niagara1_event_map,
0461     .cache_map  = &niagara1_cache_map,
0462     .max_events = ARRAY_SIZE(niagara1_perfmon_event_map),
0463     .read_pmc   = sparc_default_read_pmc,
0464     .write_pmc  = sparc_default_write_pmc,
0465     .upper_shift    = 0,
0466     .lower_shift    = 4,
0467     .event_mask = 0x7,
0468     .user_bit   = PCR_UTRACE,
0469     .priv_bit   = PCR_STRACE,
0470     .upper_nop  = 0x0,
0471     .lower_nop  = 0x0,
0472     .flags      = (SPARC_PMU_ALL_EXCLUDES_SAME |
0473                SPARC_PMU_HAS_CONFLICTS),
0474     .max_hw_events  = 2,
0475     .num_pcrs   = 1,
0476     .num_pic_regs   = 1,
0477 };
0478 
0479 static const struct perf_event_map niagara2_perfmon_event_map[] = {
0480     [PERF_COUNT_HW_CPU_CYCLES] = { 0x02ff, PIC_UPPER | PIC_LOWER },
0481     [PERF_COUNT_HW_INSTRUCTIONS] = { 0x02ff, PIC_UPPER | PIC_LOWER },
0482     [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0208, PIC_UPPER | PIC_LOWER },
0483     [PERF_COUNT_HW_CACHE_MISSES] = { 0x0302, PIC_UPPER | PIC_LOWER },
0484     [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x0201, PIC_UPPER | PIC_LOWER },
0485     [PERF_COUNT_HW_BRANCH_MISSES] = { 0x0202, PIC_UPPER | PIC_LOWER },
0486 };
0487 
0488 static const struct perf_event_map *niagara2_event_map(int event_id)
0489 {
0490     return &niagara2_perfmon_event_map[event_id];
0491 }
0492 
0493 static const cache_map_t niagara2_cache_map = {
0494 [C(L1D)] = {
0495     [C(OP_READ)] = {
0496         [C(RESULT_ACCESS)] = { 0x0208, PIC_UPPER | PIC_LOWER, },
0497         [C(RESULT_MISS)] = { 0x0302, PIC_UPPER | PIC_LOWER, },
0498     },
0499     [C(OP_WRITE)] = {
0500         [C(RESULT_ACCESS)] = { 0x0210, PIC_UPPER | PIC_LOWER, },
0501         [C(RESULT_MISS)] = { 0x0302, PIC_UPPER | PIC_LOWER, },
0502     },
0503     [C(OP_PREFETCH)] = {
0504         [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
0505         [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
0506     },
0507 },
0508 [C(L1I)] = {
0509     [C(OP_READ)] = {
0510         [C(RESULT_ACCESS)] = { 0x02ff, PIC_UPPER | PIC_LOWER, },
0511         [C(RESULT_MISS)] = { 0x0301, PIC_UPPER | PIC_LOWER, },
0512     },
0513     [ C(OP_WRITE) ] = {
0514         [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE },
0515         [ C(RESULT_MISS)   ] = { CACHE_OP_NONSENSE },
0516     },
0517     [ C(OP_PREFETCH) ] = {
0518         [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
0519         [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
0520     },
0521 },
0522 [C(LL)] = {
0523     [C(OP_READ)] = {
0524         [C(RESULT_ACCESS)] = { 0x0208, PIC_UPPER | PIC_LOWER, },
0525         [C(RESULT_MISS)] = { 0x0330, PIC_UPPER | PIC_LOWER, },
0526     },
0527     [C(OP_WRITE)] = {
0528         [C(RESULT_ACCESS)] = { 0x0210, PIC_UPPER | PIC_LOWER, },
0529         [C(RESULT_MISS)] = { 0x0320, PIC_UPPER | PIC_LOWER, },
0530     },
0531     [C(OP_PREFETCH)] = {
0532         [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
0533         [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
0534     },
0535 },
0536 [C(DTLB)] = {
0537     [C(OP_READ)] = {
0538         [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
0539         [C(RESULT_MISS)] = { 0x0b08, PIC_UPPER | PIC_LOWER, },
0540     },
0541     [ C(OP_WRITE) ] = {
0542         [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
0543         [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
0544     },
0545     [ C(OP_PREFETCH) ] = {
0546         [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
0547         [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
0548     },
0549 },
0550 [C(ITLB)] = {
0551     [C(OP_READ)] = {
0552         [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
0553         [C(RESULT_MISS)] = { 0xb04, PIC_UPPER | PIC_LOWER, },
0554     },
0555     [ C(OP_WRITE) ] = {
0556         [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
0557         [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
0558     },
0559     [ C(OP_PREFETCH) ] = {
0560         [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
0561         [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
0562     },
0563 },
0564 [C(BPU)] = {
0565     [C(OP_READ)] = {
0566         [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
0567         [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
0568     },
0569     [ C(OP_WRITE) ] = {
0570         [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
0571         [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
0572     },
0573     [ C(OP_PREFETCH) ] = {
0574         [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
0575         [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
0576     },
0577 },
0578 [C(NODE)] = {
0579     [C(OP_READ)] = {
0580         [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
0581         [C(RESULT_MISS)  ] = { CACHE_OP_UNSUPPORTED },
0582     },
0583     [ C(OP_WRITE) ] = {
0584         [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
0585         [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
0586     },
0587     [ C(OP_PREFETCH) ] = {
0588         [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
0589         [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
0590     },
0591 },
0592 };
0593 
0594 static const struct sparc_pmu niagara2_pmu = {
0595     .event_map  = niagara2_event_map,
0596     .cache_map  = &niagara2_cache_map,
0597     .max_events = ARRAY_SIZE(niagara2_perfmon_event_map),
0598     .read_pmc   = sparc_default_read_pmc,
0599     .write_pmc  = sparc_default_write_pmc,
0600     .upper_shift    = 19,
0601     .lower_shift    = 6,
0602     .event_mask = 0xfff,
0603     .user_bit   = PCR_UTRACE,
0604     .priv_bit   = PCR_STRACE,
0605     .hv_bit     = PCR_N2_HTRACE,
0606     .irq_bit    = 0x30,
0607     .upper_nop  = 0x220,
0608     .lower_nop  = 0x220,
0609     .flags      = (SPARC_PMU_ALL_EXCLUDES_SAME |
0610                SPARC_PMU_HAS_CONFLICTS),
0611     .max_hw_events  = 2,
0612     .num_pcrs   = 1,
0613     .num_pic_regs   = 1,
0614 };
0615 
0616 static const struct perf_event_map niagara4_perfmon_event_map[] = {
0617     [PERF_COUNT_HW_CPU_CYCLES] = { (26 << 6) },
0618     [PERF_COUNT_HW_INSTRUCTIONS] = { (3 << 6) | 0x3f },
0619     [PERF_COUNT_HW_CACHE_REFERENCES] = { (3 << 6) | 0x04 },
0620     [PERF_COUNT_HW_CACHE_MISSES] = { (16 << 6) | 0x07 },
0621     [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { (4 << 6) | 0x01 },
0622     [PERF_COUNT_HW_BRANCH_MISSES] = { (25 << 6) | 0x0f },
0623 };
0624 
0625 static const struct perf_event_map *niagara4_event_map(int event_id)
0626 {
0627     return &niagara4_perfmon_event_map[event_id];
0628 }
0629 
0630 static const cache_map_t niagara4_cache_map = {
0631 [C(L1D)] = {
0632     [C(OP_READ)] = {
0633         [C(RESULT_ACCESS)] = { (3 << 6) | 0x04 },
0634         [C(RESULT_MISS)] = { (16 << 6) | 0x07 },
0635     },
0636     [C(OP_WRITE)] = {
0637         [C(RESULT_ACCESS)] = { (3 << 6) | 0x08 },
0638         [C(RESULT_MISS)] = { (16 << 6) | 0x07 },
0639     },
0640     [C(OP_PREFETCH)] = {
0641         [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
0642         [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
0643     },
0644 },
0645 [C(L1I)] = {
0646     [C(OP_READ)] = {
0647         [C(RESULT_ACCESS)] = { (3 << 6) | 0x3f },
0648         [C(RESULT_MISS)] = { (11 << 6) | 0x03 },
0649     },
0650     [ C(OP_WRITE) ] = {
0651         [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE },
0652         [ C(RESULT_MISS)   ] = { CACHE_OP_NONSENSE },
0653     },
0654     [ C(OP_PREFETCH) ] = {
0655         [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
0656         [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
0657     },
0658 },
0659 [C(LL)] = {
0660     [C(OP_READ)] = {
0661         [C(RESULT_ACCESS)] = { (3 << 6) | 0x04 },
0662         [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
0663     },
0664     [C(OP_WRITE)] = {
0665         [C(RESULT_ACCESS)] = { (3 << 6) | 0x08 },
0666         [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
0667     },
0668     [C(OP_PREFETCH)] = {
0669         [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
0670         [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
0671     },
0672 },
0673 [C(DTLB)] = {
0674     [C(OP_READ)] = {
0675         [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
0676         [C(RESULT_MISS)] = { (17 << 6) | 0x3f },
0677     },
0678     [ C(OP_WRITE) ] = {
0679         [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
0680         [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
0681     },
0682     [ C(OP_PREFETCH) ] = {
0683         [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
0684         [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
0685     },
0686 },
0687 [C(ITLB)] = {
0688     [C(OP_READ)] = {
0689         [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
0690         [C(RESULT_MISS)] = { (6 << 6) | 0x3f },
0691     },
0692     [ C(OP_WRITE) ] = {
0693         [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
0694         [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
0695     },
0696     [ C(OP_PREFETCH) ] = {
0697         [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
0698         [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
0699     },
0700 },
0701 [C(BPU)] = {
0702     [C(OP_READ)] = {
0703         [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
0704         [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
0705     },
0706     [ C(OP_WRITE) ] = {
0707         [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
0708         [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
0709     },
0710     [ C(OP_PREFETCH) ] = {
0711         [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
0712         [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
0713     },
0714 },
0715 [C(NODE)] = {
0716     [C(OP_READ)] = {
0717         [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
0718         [C(RESULT_MISS)  ] = { CACHE_OP_UNSUPPORTED },
0719     },
0720     [ C(OP_WRITE) ] = {
0721         [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
0722         [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
0723     },
0724     [ C(OP_PREFETCH) ] = {
0725         [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
0726         [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
0727     },
0728 },
0729 };
0730 
0731 static u32 sparc_vt_read_pmc(int idx)
0732 {
0733     u64 val = pcr_ops->read_pic(idx);
0734 
0735     return val & 0xffffffff;
0736 }
0737 
0738 static void sparc_vt_write_pmc(int idx, u64 val)
0739 {
0740     u64 pcr;
0741 
0742     pcr = pcr_ops->read_pcr(idx);
0743     /* ensure ov and ntc are reset */
0744     pcr &= ~(PCR_N4_OV | PCR_N4_NTC);
0745 
0746     pcr_ops->write_pic(idx, val & 0xffffffff);
0747 
0748     pcr_ops->write_pcr(idx, pcr);
0749 }
0750 
0751 static const struct sparc_pmu niagara4_pmu = {
0752     .event_map  = niagara4_event_map,
0753     .cache_map  = &niagara4_cache_map,
0754     .max_events = ARRAY_SIZE(niagara4_perfmon_event_map),
0755     .read_pmc   = sparc_vt_read_pmc,
0756     .write_pmc  = sparc_vt_write_pmc,
0757     .upper_shift    = 5,
0758     .lower_shift    = 5,
0759     .event_mask = 0x7ff,
0760     .user_bit   = PCR_N4_UTRACE,
0761     .priv_bit   = PCR_N4_STRACE,
0762 
0763     /* We explicitly don't support hypervisor tracing.  The T4
0764      * generates the overflow event for precise events via a trap
0765      * which will not be generated (ie. it's completely lost) if
0766      * we happen to be in the hypervisor when the event triggers.
0767      * Essentially, the overflow event reporting is completely
0768      * unusable when you have hypervisor mode tracing enabled.
0769      */
0770     .hv_bit     = 0,
0771 
0772     .irq_bit    = PCR_N4_TOE,
0773     .upper_nop  = 0,
0774     .lower_nop  = 0,
0775     .flags      = 0,
0776     .max_hw_events  = 4,
0777     .num_pcrs   = 4,
0778     .num_pic_regs   = 4,
0779 };
0780 
0781 static const struct sparc_pmu sparc_m7_pmu = {
0782     .event_map  = niagara4_event_map,
0783     .cache_map  = &niagara4_cache_map,
0784     .max_events = ARRAY_SIZE(niagara4_perfmon_event_map),
0785     .read_pmc   = sparc_vt_read_pmc,
0786     .write_pmc  = sparc_vt_write_pmc,
0787     .upper_shift    = 5,
0788     .lower_shift    = 5,
0789     .event_mask = 0x7ff,
0790     .user_bit   = PCR_N4_UTRACE,
0791     .priv_bit   = PCR_N4_STRACE,
0792 
0793     /* We explicitly don't support hypervisor tracing. */
0794     .hv_bit     = 0,
0795 
0796     .irq_bit    = PCR_N4_TOE,
0797     .upper_nop  = 0,
0798     .lower_nop  = 0,
0799     .flags      = 0,
0800     .max_hw_events  = 4,
0801     .num_pcrs   = 4,
0802     .num_pic_regs   = 4,
0803 };
0804 static const struct sparc_pmu *sparc_pmu __read_mostly;
0805 
0806 static u64 event_encoding(u64 event_id, int idx)
0807 {
0808     if (idx == PIC_UPPER_INDEX)
0809         event_id <<= sparc_pmu->upper_shift;
0810     else
0811         event_id <<= sparc_pmu->lower_shift;
0812     return event_id;
0813 }
0814 
0815 static u64 mask_for_index(int idx)
0816 {
0817     return event_encoding(sparc_pmu->event_mask, idx);
0818 }
0819 
0820 static u64 nop_for_index(int idx)
0821 {
0822     return event_encoding(idx == PIC_UPPER_INDEX ?
0823                   sparc_pmu->upper_nop :
0824                   sparc_pmu->lower_nop, idx);
0825 }
0826 
0827 static inline void sparc_pmu_enable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx)
0828 {
0829     u64 enc, val, mask = mask_for_index(idx);
0830     int pcr_index = 0;
0831 
0832     if (sparc_pmu->num_pcrs > 1)
0833         pcr_index = idx;
0834 
0835     enc = perf_event_get_enc(cpuc->events[idx]);
0836 
0837     val = cpuc->pcr[pcr_index];
0838     val &= ~mask;
0839     val |= event_encoding(enc, idx);
0840     cpuc->pcr[pcr_index] = val;
0841 
0842     pcr_ops->write_pcr(pcr_index, cpuc->pcr[pcr_index]);
0843 }
0844 
0845 static inline void sparc_pmu_disable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx)
0846 {
0847     u64 mask = mask_for_index(idx);
0848     u64 nop = nop_for_index(idx);
0849     int pcr_index = 0;
0850     u64 val;
0851 
0852     if (sparc_pmu->num_pcrs > 1)
0853         pcr_index = idx;
0854 
0855     val = cpuc->pcr[pcr_index];
0856     val &= ~mask;
0857     val |= nop;
0858     cpuc->pcr[pcr_index] = val;
0859 
0860     pcr_ops->write_pcr(pcr_index, cpuc->pcr[pcr_index]);
0861 }
0862 
0863 static u64 sparc_perf_event_update(struct perf_event *event,
0864                    struct hw_perf_event *hwc, int idx)
0865 {
0866     int shift = 64 - 32;
0867     u64 prev_raw_count, new_raw_count;
0868     s64 delta;
0869 
0870 again:
0871     prev_raw_count = local64_read(&hwc->prev_count);
0872     new_raw_count = sparc_pmu->read_pmc(idx);
0873 
0874     if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
0875                  new_raw_count) != prev_raw_count)
0876         goto again;
0877 
0878     delta = (new_raw_count << shift) - (prev_raw_count << shift);
0879     delta >>= shift;
0880 
0881     local64_add(delta, &event->count);
0882     local64_sub(delta, &hwc->period_left);
0883 
0884     return new_raw_count;
0885 }
0886 
0887 static int sparc_perf_event_set_period(struct perf_event *event,
0888                        struct hw_perf_event *hwc, int idx)
0889 {
0890     s64 left = local64_read(&hwc->period_left);
0891     s64 period = hwc->sample_period;
0892     int ret = 0;
0893 
0894     /* The period may have been changed by PERF_EVENT_IOC_PERIOD */
0895     if (unlikely(period != hwc->last_period))
0896         left = period - (hwc->last_period - left);
0897 
0898     if (unlikely(left <= -period)) {
0899         left = period;
0900         local64_set(&hwc->period_left, left);
0901         hwc->last_period = period;
0902         ret = 1;
0903     }
0904 
0905     if (unlikely(left <= 0)) {
0906         left += period;
0907         local64_set(&hwc->period_left, left);
0908         hwc->last_period = period;
0909         ret = 1;
0910     }
0911     if (left > MAX_PERIOD)
0912         left = MAX_PERIOD;
0913 
0914     local64_set(&hwc->prev_count, (u64)-left);
0915 
0916     sparc_pmu->write_pmc(idx, (u64)(-left) & 0xffffffff);
0917 
0918     perf_event_update_userpage(event);
0919 
0920     return ret;
0921 }
0922 
0923 static void read_in_all_counters(struct cpu_hw_events *cpuc)
0924 {
0925     int i;
0926 
0927     for (i = 0; i < cpuc->n_events; i++) {
0928         struct perf_event *cp = cpuc->event[i];
0929 
0930         if (cpuc->current_idx[i] != PIC_NO_INDEX &&
0931             cpuc->current_idx[i] != cp->hw.idx) {
0932             sparc_perf_event_update(cp, &cp->hw,
0933                         cpuc->current_idx[i]);
0934             cpuc->current_idx[i] = PIC_NO_INDEX;
0935             if (cp->hw.state & PERF_HES_STOPPED)
0936                 cp->hw.state |= PERF_HES_ARCH;
0937         }
0938     }
0939 }
0940 
0941 /* On this PMU all PICs are programmed using a single PCR.  Calculate
0942  * the combined control register value.
0943  *
0944  * For such chips we require that all of the events have the same
0945  * configuration, so just fetch the settings from the first entry.
0946  */
0947 static void calculate_single_pcr(struct cpu_hw_events *cpuc)
0948 {
0949     int i;
0950 
0951     if (!cpuc->n_added)
0952         goto out;
0953 
0954     /* Assign to counters all unassigned events.  */
0955     for (i = 0; i < cpuc->n_events; i++) {
0956         struct perf_event *cp = cpuc->event[i];
0957         struct hw_perf_event *hwc = &cp->hw;
0958         int idx = hwc->idx;
0959         u64 enc;
0960 
0961         if (cpuc->current_idx[i] != PIC_NO_INDEX)
0962             continue;
0963 
0964         sparc_perf_event_set_period(cp, hwc, idx);
0965         cpuc->current_idx[i] = idx;
0966 
0967         enc = perf_event_get_enc(cpuc->events[i]);
0968         cpuc->pcr[0] &= ~mask_for_index(idx);
0969         if (hwc->state & PERF_HES_ARCH) {
0970             cpuc->pcr[0] |= nop_for_index(idx);
0971         } else {
0972             cpuc->pcr[0] |= event_encoding(enc, idx);
0973             hwc->state = 0;
0974         }
0975     }
0976 out:
0977     cpuc->pcr[0] |= cpuc->event[0]->hw.config_base;
0978 }
0979 
0980 static void sparc_pmu_start(struct perf_event *event, int flags);
0981 
0982 /* On this PMU each PIC has it's own PCR control register.  */
0983 static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc)
0984 {
0985     int i;
0986 
0987     if (!cpuc->n_added)
0988         goto out;
0989 
0990     for (i = 0; i < cpuc->n_events; i++) {
0991         struct perf_event *cp = cpuc->event[i];
0992         struct hw_perf_event *hwc = &cp->hw;
0993         int idx = hwc->idx;
0994 
0995         if (cpuc->current_idx[i] != PIC_NO_INDEX)
0996             continue;
0997 
0998         cpuc->current_idx[i] = idx;
0999 
1000         if (cp->hw.state & PERF_HES_ARCH)
1001             continue;
1002 
1003         sparc_pmu_start(cp, PERF_EF_RELOAD);
1004     }
1005 out:
1006     for (i = 0; i < cpuc->n_events; i++) {
1007         struct perf_event *cp = cpuc->event[i];
1008         int idx = cp->hw.idx;
1009 
1010         cpuc->pcr[idx] |= cp->hw.config_base;
1011     }
1012 }
1013 
1014 /* If performance event entries have been added, move existing events
1015  * around (if necessary) and then assign new entries to counters.
1016  */
1017 static void update_pcrs_for_enable(struct cpu_hw_events *cpuc)
1018 {
1019     if (cpuc->n_added)
1020         read_in_all_counters(cpuc);
1021 
1022     if (sparc_pmu->num_pcrs == 1) {
1023         calculate_single_pcr(cpuc);
1024     } else {
1025         calculate_multiple_pcrs(cpuc);
1026     }
1027 }
1028 
1029 static void sparc_pmu_enable(struct pmu *pmu)
1030 {
1031     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1032     int i;
1033 
1034     if (cpuc->enabled)
1035         return;
1036 
1037     cpuc->enabled = 1;
1038     barrier();
1039 
1040     if (cpuc->n_events)
1041         update_pcrs_for_enable(cpuc);
1042 
1043     for (i = 0; i < sparc_pmu->num_pcrs; i++)
1044         pcr_ops->write_pcr(i, cpuc->pcr[i]);
1045 }
1046 
1047 static void sparc_pmu_disable(struct pmu *pmu)
1048 {
1049     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1050     int i;
1051 
1052     if (!cpuc->enabled)
1053         return;
1054 
1055     cpuc->enabled = 0;
1056     cpuc->n_added = 0;
1057 
1058     for (i = 0; i < sparc_pmu->num_pcrs; i++) {
1059         u64 val = cpuc->pcr[i];
1060 
1061         val &= ~(sparc_pmu->user_bit | sparc_pmu->priv_bit |
1062              sparc_pmu->hv_bit | sparc_pmu->irq_bit);
1063         cpuc->pcr[i] = val;
1064         pcr_ops->write_pcr(i, cpuc->pcr[i]);
1065     }
1066 }
1067 
1068 static int active_event_index(struct cpu_hw_events *cpuc,
1069                   struct perf_event *event)
1070 {
1071     int i;
1072 
1073     for (i = 0; i < cpuc->n_events; i++) {
1074         if (cpuc->event[i] == event)
1075             break;
1076     }
1077     BUG_ON(i == cpuc->n_events);
1078     return cpuc->current_idx[i];
1079 }
1080 
1081 static void sparc_pmu_start(struct perf_event *event, int flags)
1082 {
1083     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1084     int idx = active_event_index(cpuc, event);
1085 
1086     if (flags & PERF_EF_RELOAD) {
1087         WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
1088         sparc_perf_event_set_period(event, &event->hw, idx);
1089     }
1090 
1091     event->hw.state = 0;
1092 
1093     sparc_pmu_enable_event(cpuc, &event->hw, idx);
1094 
1095     perf_event_update_userpage(event);
1096 }
1097 
1098 static void sparc_pmu_stop(struct perf_event *event, int flags)
1099 {
1100     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1101     int idx = active_event_index(cpuc, event);
1102 
1103     if (!(event->hw.state & PERF_HES_STOPPED)) {
1104         sparc_pmu_disable_event(cpuc, &event->hw, idx);
1105         event->hw.state |= PERF_HES_STOPPED;
1106     }
1107 
1108     if (!(event->hw.state & PERF_HES_UPTODATE) && (flags & PERF_EF_UPDATE)) {
1109         sparc_perf_event_update(event, &event->hw, idx);
1110         event->hw.state |= PERF_HES_UPTODATE;
1111     }
1112 }
1113 
1114 static void sparc_pmu_del(struct perf_event *event, int _flags)
1115 {
1116     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1117     unsigned long flags;
1118     int i;
1119 
1120     local_irq_save(flags);
1121 
1122     for (i = 0; i < cpuc->n_events; i++) {
1123         if (event == cpuc->event[i]) {
1124             /* Absorb the final count and turn off the
1125              * event.
1126              */
1127             sparc_pmu_stop(event, PERF_EF_UPDATE);
1128 
1129             /* Shift remaining entries down into
1130              * the existing slot.
1131              */
1132             while (++i < cpuc->n_events) {
1133                 cpuc->event[i - 1] = cpuc->event[i];
1134                 cpuc->events[i - 1] = cpuc->events[i];
1135                 cpuc->current_idx[i - 1] =
1136                     cpuc->current_idx[i];
1137             }
1138 
1139             perf_event_update_userpage(event);
1140 
1141             cpuc->n_events--;
1142             break;
1143         }
1144     }
1145 
1146     local_irq_restore(flags);
1147 }
1148 
1149 static void sparc_pmu_read(struct perf_event *event)
1150 {
1151     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1152     int idx = active_event_index(cpuc, event);
1153     struct hw_perf_event *hwc = &event->hw;
1154 
1155     sparc_perf_event_update(event, hwc, idx);
1156 }
1157 
1158 static atomic_t active_events = ATOMIC_INIT(0);
1159 static DEFINE_MUTEX(pmc_grab_mutex);
1160 
1161 static void perf_stop_nmi_watchdog(void *unused)
1162 {
1163     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1164     int i;
1165 
1166     stop_nmi_watchdog(NULL);
1167     for (i = 0; i < sparc_pmu->num_pcrs; i++)
1168         cpuc->pcr[i] = pcr_ops->read_pcr(i);
1169 }
1170 
1171 static void perf_event_grab_pmc(void)
1172 {
1173     if (atomic_inc_not_zero(&active_events))
1174         return;
1175 
1176     mutex_lock(&pmc_grab_mutex);
1177     if (atomic_read(&active_events) == 0) {
1178         if (atomic_read(&nmi_active) > 0) {
1179             on_each_cpu(perf_stop_nmi_watchdog, NULL, 1);
1180             BUG_ON(atomic_read(&nmi_active) != 0);
1181         }
1182         atomic_inc(&active_events);
1183     }
1184     mutex_unlock(&pmc_grab_mutex);
1185 }
1186 
1187 static void perf_event_release_pmc(void)
1188 {
1189     if (atomic_dec_and_mutex_lock(&active_events, &pmc_grab_mutex)) {
1190         if (atomic_read(&nmi_active) == 0)
1191             on_each_cpu(start_nmi_watchdog, NULL, 1);
1192         mutex_unlock(&pmc_grab_mutex);
1193     }
1194 }
1195 
1196 static const struct perf_event_map *sparc_map_cache_event(u64 config)
1197 {
1198     unsigned int cache_type, cache_op, cache_result;
1199     const struct perf_event_map *pmap;
1200 
1201     if (!sparc_pmu->cache_map)
1202         return ERR_PTR(-ENOENT);
1203 
1204     cache_type = (config >>  0) & 0xff;
1205     if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
1206         return ERR_PTR(-EINVAL);
1207 
1208     cache_op = (config >>  8) & 0xff;
1209     if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
1210         return ERR_PTR(-EINVAL);
1211 
1212     cache_result = (config >> 16) & 0xff;
1213     if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
1214         return ERR_PTR(-EINVAL);
1215 
1216     pmap = &((*sparc_pmu->cache_map)[cache_type][cache_op][cache_result]);
1217 
1218     if (pmap->encoding == CACHE_OP_UNSUPPORTED)
1219         return ERR_PTR(-ENOENT);
1220 
1221     if (pmap->encoding == CACHE_OP_NONSENSE)
1222         return ERR_PTR(-EINVAL);
1223 
1224     return pmap;
1225 }
1226 
1227 static void hw_perf_event_destroy(struct perf_event *event)
1228 {
1229     perf_event_release_pmc();
1230 }
1231 
1232 /* Make sure all events can be scheduled into the hardware at
1233  * the same time.  This is simplified by the fact that we only
1234  * need to support 2 simultaneous HW events.
1235  *
1236  * As a side effect, the evts[]->hw.idx values will be assigned
1237  * on success.  These are pending indexes.  When the events are
1238  * actually programmed into the chip, these values will propagate
1239  * to the per-cpu cpuc->current_idx[] slots, see the code in
1240  * maybe_change_configuration() for details.
1241  */
1242 static int sparc_check_constraints(struct perf_event **evts,
1243                    unsigned long *events, int n_ev)
1244 {
1245     u8 msk0 = 0, msk1 = 0;
1246     int idx0 = 0;
1247 
1248     /* This case is possible when we are invoked from
1249      * hw_perf_group_sched_in().
1250      */
1251     if (!n_ev)
1252         return 0;
1253 
1254     if (n_ev > sparc_pmu->max_hw_events)
1255         return -1;
1256 
1257     if (!(sparc_pmu->flags & SPARC_PMU_HAS_CONFLICTS)) {
1258         int i;
1259 
1260         for (i = 0; i < n_ev; i++)
1261             evts[i]->hw.idx = i;
1262         return 0;
1263     }
1264 
1265     msk0 = perf_event_get_msk(events[0]);
1266     if (n_ev == 1) {
1267         if (msk0 & PIC_LOWER)
1268             idx0 = 1;
1269         goto success;
1270     }
1271     BUG_ON(n_ev != 2);
1272     msk1 = perf_event_get_msk(events[1]);
1273 
1274     /* If both events can go on any counter, OK.  */
1275     if (msk0 == (PIC_UPPER | PIC_LOWER) &&
1276         msk1 == (PIC_UPPER | PIC_LOWER))
1277         goto success;
1278 
1279     /* If one event is limited to a specific counter,
1280      * and the other can go on both, OK.
1281      */
1282     if ((msk0 == PIC_UPPER || msk0 == PIC_LOWER) &&
1283         msk1 == (PIC_UPPER | PIC_LOWER)) {
1284         if (msk0 & PIC_LOWER)
1285             idx0 = 1;
1286         goto success;
1287     }
1288 
1289     if ((msk1 == PIC_UPPER || msk1 == PIC_LOWER) &&
1290         msk0 == (PIC_UPPER | PIC_LOWER)) {
1291         if (msk1 & PIC_UPPER)
1292             idx0 = 1;
1293         goto success;
1294     }
1295 
1296     /* If the events are fixed to different counters, OK.  */
1297     if ((msk0 == PIC_UPPER && msk1 == PIC_LOWER) ||
1298         (msk0 == PIC_LOWER && msk1 == PIC_UPPER)) {
1299         if (msk0 & PIC_LOWER)
1300             idx0 = 1;
1301         goto success;
1302     }
1303 
1304     /* Otherwise, there is a conflict.  */
1305     return -1;
1306 
1307 success:
1308     evts[0]->hw.idx = idx0;
1309     if (n_ev == 2)
1310         evts[1]->hw.idx = idx0 ^ 1;
1311     return 0;
1312 }
1313 
1314 static int check_excludes(struct perf_event **evts, int n_prev, int n_new)
1315 {
1316     int eu = 0, ek = 0, eh = 0;
1317     struct perf_event *event;
1318     int i, n, first;
1319 
1320     if (!(sparc_pmu->flags & SPARC_PMU_ALL_EXCLUDES_SAME))
1321         return 0;
1322 
1323     n = n_prev + n_new;
1324     if (n <= 1)
1325         return 0;
1326 
1327     first = 1;
1328     for (i = 0; i < n; i++) {
1329         event = evts[i];
1330         if (first) {
1331             eu = event->attr.exclude_user;
1332             ek = event->attr.exclude_kernel;
1333             eh = event->attr.exclude_hv;
1334             first = 0;
1335         } else if (event->attr.exclude_user != eu ||
1336                event->attr.exclude_kernel != ek ||
1337                event->attr.exclude_hv != eh) {
1338             return -EAGAIN;
1339         }
1340     }
1341 
1342     return 0;
1343 }
1344 
1345 static int collect_events(struct perf_event *group, int max_count,
1346               struct perf_event *evts[], unsigned long *events,
1347               int *current_idx)
1348 {
1349     struct perf_event *event;
1350     int n = 0;
1351 
1352     if (!is_software_event(group)) {
1353         if (n >= max_count)
1354             return -1;
1355         evts[n] = group;
1356         events[n] = group->hw.event_base;
1357         current_idx[n++] = PIC_NO_INDEX;
1358     }
1359     for_each_sibling_event(event, group) {
1360         if (!is_software_event(event) &&
1361             event->state != PERF_EVENT_STATE_OFF) {
1362             if (n >= max_count)
1363                 return -1;
1364             evts[n] = event;
1365             events[n] = event->hw.event_base;
1366             current_idx[n++] = PIC_NO_INDEX;
1367         }
1368     }
1369     return n;
1370 }
1371 
1372 static int sparc_pmu_add(struct perf_event *event, int ef_flags)
1373 {
1374     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1375     int n0, ret = -EAGAIN;
1376     unsigned long flags;
1377 
1378     local_irq_save(flags);
1379 
1380     n0 = cpuc->n_events;
1381     if (n0 >= sparc_pmu->max_hw_events)
1382         goto out;
1383 
1384     cpuc->event[n0] = event;
1385     cpuc->events[n0] = event->hw.event_base;
1386     cpuc->current_idx[n0] = PIC_NO_INDEX;
1387 
1388     event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
1389     if (!(ef_flags & PERF_EF_START))
1390         event->hw.state |= PERF_HES_ARCH;
1391 
1392     /*
1393      * If group events scheduling transaction was started,
1394      * skip the schedulability test here, it will be performed
1395      * at commit time(->commit_txn) as a whole
1396      */
1397     if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
1398         goto nocheck;
1399 
1400     if (check_excludes(cpuc->event, n0, 1))
1401         goto out;
1402     if (sparc_check_constraints(cpuc->event, cpuc->events, n0 + 1))
1403         goto out;
1404 
1405 nocheck:
1406     cpuc->n_events++;
1407     cpuc->n_added++;
1408 
1409     ret = 0;
1410 out:
1411     local_irq_restore(flags);
1412     return ret;
1413 }
1414 
1415 static int sparc_pmu_event_init(struct perf_event *event)
1416 {
1417     struct perf_event_attr *attr = &event->attr;
1418     struct perf_event *evts[MAX_HWEVENTS];
1419     struct hw_perf_event *hwc = &event->hw;
1420     unsigned long events[MAX_HWEVENTS];
1421     int current_idx_dmy[MAX_HWEVENTS];
1422     const struct perf_event_map *pmap;
1423     int n;
1424 
1425     if (atomic_read(&nmi_active) < 0)
1426         return -ENODEV;
1427 
1428     /* does not support taken branch sampling */
1429     if (has_branch_stack(event))
1430         return -EOPNOTSUPP;
1431 
1432     switch (attr->type) {
1433     case PERF_TYPE_HARDWARE:
1434         if (attr->config >= sparc_pmu->max_events)
1435             return -EINVAL;
1436         pmap = sparc_pmu->event_map(attr->config);
1437         break;
1438 
1439     case PERF_TYPE_HW_CACHE:
1440         pmap = sparc_map_cache_event(attr->config);
1441         if (IS_ERR(pmap))
1442             return PTR_ERR(pmap);
1443         break;
1444 
1445     case PERF_TYPE_RAW:
1446         pmap = NULL;
1447         break;
1448 
1449     default:
1450         return -ENOENT;
1451 
1452     }
1453 
1454     if (pmap) {
1455         hwc->event_base = perf_event_encode(pmap);
1456     } else {
1457         /*
1458          * User gives us "(encoding << 16) | pic_mask" for
1459          * PERF_TYPE_RAW events.
1460          */
1461         hwc->event_base = attr->config;
1462     }
1463 
1464     /* We save the enable bits in the config_base.  */
1465     hwc->config_base = sparc_pmu->irq_bit;
1466     if (!attr->exclude_user)
1467         hwc->config_base |= sparc_pmu->user_bit;
1468     if (!attr->exclude_kernel)
1469         hwc->config_base |= sparc_pmu->priv_bit;
1470     if (!attr->exclude_hv)
1471         hwc->config_base |= sparc_pmu->hv_bit;
1472 
1473     n = 0;
1474     if (event->group_leader != event) {
1475         n = collect_events(event->group_leader,
1476                    sparc_pmu->max_hw_events - 1,
1477                    evts, events, current_idx_dmy);
1478         if (n < 0)
1479             return -EINVAL;
1480     }
1481     events[n] = hwc->event_base;
1482     evts[n] = event;
1483 
1484     if (check_excludes(evts, n, 1))
1485         return -EINVAL;
1486 
1487     if (sparc_check_constraints(evts, events, n + 1))
1488         return -EINVAL;
1489 
1490     hwc->idx = PIC_NO_INDEX;
1491 
1492     /* Try to do all error checking before this point, as unwinding
1493      * state after grabbing the PMC is difficult.
1494      */
1495     perf_event_grab_pmc();
1496     event->destroy = hw_perf_event_destroy;
1497 
1498     if (!hwc->sample_period) {
1499         hwc->sample_period = MAX_PERIOD;
1500         hwc->last_period = hwc->sample_period;
1501         local64_set(&hwc->period_left, hwc->sample_period);
1502     }
1503 
1504     return 0;
1505 }
1506 
1507 /*
1508  * Start group events scheduling transaction
1509  * Set the flag to make pmu::enable() not perform the
1510  * schedulability test, it will be performed at commit time
1511  */
1512 static void sparc_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags)
1513 {
1514     struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
1515 
1516     WARN_ON_ONCE(cpuhw->txn_flags);     /* txn already in flight */
1517 
1518     cpuhw->txn_flags = txn_flags;
1519     if (txn_flags & ~PERF_PMU_TXN_ADD)
1520         return;
1521 
1522     perf_pmu_disable(pmu);
1523 }
1524 
1525 /*
1526  * Stop group events scheduling transaction
1527  * Clear the flag and pmu::enable() will perform the
1528  * schedulability test.
1529  */
1530 static void sparc_pmu_cancel_txn(struct pmu *pmu)
1531 {
1532     struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
1533     unsigned int txn_flags;
1534 
1535     WARN_ON_ONCE(!cpuhw->txn_flags);    /* no txn in flight */
1536 
1537     txn_flags = cpuhw->txn_flags;
1538     cpuhw->txn_flags = 0;
1539     if (txn_flags & ~PERF_PMU_TXN_ADD)
1540         return;
1541 
1542     perf_pmu_enable(pmu);
1543 }
1544 
1545 /*
1546  * Commit group events scheduling transaction
1547  * Perform the group schedulability test as a whole
1548  * Return 0 if success
1549  */
1550 static int sparc_pmu_commit_txn(struct pmu *pmu)
1551 {
1552     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1553     int n;
1554 
1555     if (!sparc_pmu)
1556         return -EINVAL;
1557 
1558     WARN_ON_ONCE(!cpuc->txn_flags); /* no txn in flight */
1559 
1560     if (cpuc->txn_flags & ~PERF_PMU_TXN_ADD) {
1561         cpuc->txn_flags = 0;
1562         return 0;
1563     }
1564 
1565     n = cpuc->n_events;
1566     if (check_excludes(cpuc->event, 0, n))
1567         return -EINVAL;
1568     if (sparc_check_constraints(cpuc->event, cpuc->events, n))
1569         return -EAGAIN;
1570 
1571     cpuc->txn_flags = 0;
1572     perf_pmu_enable(pmu);
1573     return 0;
1574 }
1575 
1576 static struct pmu pmu = {
1577     .pmu_enable = sparc_pmu_enable,
1578     .pmu_disable    = sparc_pmu_disable,
1579     .event_init = sparc_pmu_event_init,
1580     .add        = sparc_pmu_add,
1581     .del        = sparc_pmu_del,
1582     .start      = sparc_pmu_start,
1583     .stop       = sparc_pmu_stop,
1584     .read       = sparc_pmu_read,
1585     .start_txn  = sparc_pmu_start_txn,
1586     .cancel_txn = sparc_pmu_cancel_txn,
1587     .commit_txn = sparc_pmu_commit_txn,
1588 };
1589 
1590 void perf_event_print_debug(void)
1591 {
1592     unsigned long flags;
1593     int cpu, i;
1594 
1595     if (!sparc_pmu)
1596         return;
1597 
1598     local_irq_save(flags);
1599 
1600     cpu = smp_processor_id();
1601 
1602     pr_info("\n");
1603     for (i = 0; i < sparc_pmu->num_pcrs; i++)
1604         pr_info("CPU#%d: PCR%d[%016llx]\n",
1605             cpu, i, pcr_ops->read_pcr(i));
1606     for (i = 0; i < sparc_pmu->num_pic_regs; i++)
1607         pr_info("CPU#%d: PIC%d[%016llx]\n",
1608             cpu, i, pcr_ops->read_pic(i));
1609 
1610     local_irq_restore(flags);
1611 }
1612 
1613 static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
1614                         unsigned long cmd, void *__args)
1615 {
1616     struct die_args *args = __args;
1617     struct perf_sample_data data;
1618     struct cpu_hw_events *cpuc;
1619     struct pt_regs *regs;
1620     u64 finish_clock;
1621     u64 start_clock;
1622     int i;
1623 
1624     if (!atomic_read(&active_events))
1625         return NOTIFY_DONE;
1626 
1627     switch (cmd) {
1628     case DIE_NMI:
1629         break;
1630 
1631     default:
1632         return NOTIFY_DONE;
1633     }
1634 
1635     start_clock = sched_clock();
1636 
1637     regs = args->regs;
1638 
1639     cpuc = this_cpu_ptr(&cpu_hw_events);
1640 
1641     /* If the PMU has the TOE IRQ enable bits, we need to do a
1642      * dummy write to the %pcr to clear the overflow bits and thus
1643      * the interrupt.
1644      *
1645      * Do this before we peek at the counters to determine
1646      * overflow so we don't lose any events.
1647      */
1648     if (sparc_pmu->irq_bit &&
1649         sparc_pmu->num_pcrs == 1)
1650         pcr_ops->write_pcr(0, cpuc->pcr[0]);
1651 
1652     for (i = 0; i < cpuc->n_events; i++) {
1653         struct perf_event *event = cpuc->event[i];
1654         int idx = cpuc->current_idx[i];
1655         struct hw_perf_event *hwc;
1656         u64 val;
1657 
1658         if (sparc_pmu->irq_bit &&
1659             sparc_pmu->num_pcrs > 1)
1660             pcr_ops->write_pcr(idx, cpuc->pcr[idx]);
1661 
1662         hwc = &event->hw;
1663         val = sparc_perf_event_update(event, hwc, idx);
1664         if (val & (1ULL << 31))
1665             continue;
1666 
1667         perf_sample_data_init(&data, 0, hwc->last_period);
1668         if (!sparc_perf_event_set_period(event, hwc, idx))
1669             continue;
1670 
1671         if (perf_event_overflow(event, &data, regs))
1672             sparc_pmu_stop(event, 0);
1673     }
1674 
1675     finish_clock = sched_clock();
1676 
1677     perf_sample_event_took(finish_clock - start_clock);
1678 
1679     return NOTIFY_STOP;
1680 }
1681 
1682 static __read_mostly struct notifier_block perf_event_nmi_notifier = {
1683     .notifier_call      = perf_event_nmi_handler,
1684 };
1685 
1686 static bool __init supported_pmu(void)
1687 {
1688     if (!strcmp(sparc_pmu_type, "ultra3") ||
1689         !strcmp(sparc_pmu_type, "ultra3+") ||
1690         !strcmp(sparc_pmu_type, "ultra3i") ||
1691         !strcmp(sparc_pmu_type, "ultra4+")) {
1692         sparc_pmu = &ultra3_pmu;
1693         return true;
1694     }
1695     if (!strcmp(sparc_pmu_type, "niagara")) {
1696         sparc_pmu = &niagara1_pmu;
1697         return true;
1698     }
1699     if (!strcmp(sparc_pmu_type, "niagara2") ||
1700         !strcmp(sparc_pmu_type, "niagara3")) {
1701         sparc_pmu = &niagara2_pmu;
1702         return true;
1703     }
1704     if (!strcmp(sparc_pmu_type, "niagara4") ||
1705         !strcmp(sparc_pmu_type, "niagara5")) {
1706         sparc_pmu = &niagara4_pmu;
1707         return true;
1708     }
1709     if (!strcmp(sparc_pmu_type, "sparc-m7")) {
1710         sparc_pmu = &sparc_m7_pmu;
1711         return true;
1712     }
1713     return false;
1714 }
1715 
1716 static int __init init_hw_perf_events(void)
1717 {
1718     int err;
1719 
1720     pr_info("Performance events: ");
1721 
1722     err = pcr_arch_init();
1723     if (err || !supported_pmu()) {
1724         pr_cont("No support for PMU type '%s'\n", sparc_pmu_type);
1725         return 0;
1726     }
1727 
1728     pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type);
1729 
1730     perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
1731     register_die_notifier(&perf_event_nmi_notifier);
1732 
1733     return 0;
1734 }
1735 pure_initcall(init_hw_perf_events);
1736 
1737 void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
1738                struct pt_regs *regs)
1739 {
1740     unsigned long ksp, fp;
1741 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
1742     int graph = 0;
1743 #endif
1744 
1745     stack_trace_flush();
1746 
1747     perf_callchain_store(entry, regs->tpc);
1748 
1749     ksp = regs->u_regs[UREG_I6];
1750     fp = ksp + STACK_BIAS;
1751     do {
1752         struct sparc_stackf *sf;
1753         struct pt_regs *regs;
1754         unsigned long pc;
1755 
1756         if (!kstack_valid(current_thread_info(), fp))
1757             break;
1758 
1759         sf = (struct sparc_stackf *) fp;
1760         regs = (struct pt_regs *) (sf + 1);
1761 
1762         if (kstack_is_trap_frame(current_thread_info(), regs)) {
1763             if (user_mode(regs))
1764                 break;
1765             pc = regs->tpc;
1766             fp = regs->u_regs[UREG_I6] + STACK_BIAS;
1767         } else {
1768             pc = sf->callers_pc;
1769             fp = (unsigned long)sf->fp + STACK_BIAS;
1770         }
1771         perf_callchain_store(entry, pc);
1772 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
1773         if ((pc + 8UL) == (unsigned long) &return_to_handler) {
1774             struct ftrace_ret_stack *ret_stack;
1775             ret_stack = ftrace_graph_get_ret_stack(current,
1776                                    graph);
1777             if (ret_stack) {
1778                 pc = ret_stack->ret;
1779                 perf_callchain_store(entry, pc);
1780                 graph++;
1781             }
1782         }
1783 #endif
1784     } while (entry->nr < entry->max_stack);
1785 }
1786 
1787 static inline int
1788 valid_user_frame(const void __user *fp, unsigned long size)
1789 {
1790     /* addresses should be at least 4-byte aligned */
1791     if (((unsigned long) fp) & 3)
1792         return 0;
1793 
1794     return (__range_not_ok(fp, size, TASK_SIZE) == 0);
1795 }
1796 
1797 static void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry,
1798                    struct pt_regs *regs)
1799 {
1800     unsigned long ufp;
1801 
1802     ufp = regs->u_regs[UREG_FP] + STACK_BIAS;
1803     do {
1804         struct sparc_stackf __user *usf;
1805         struct sparc_stackf sf;
1806         unsigned long pc;
1807 
1808         usf = (struct sparc_stackf __user *)ufp;
1809         if (!valid_user_frame(usf, sizeof(sf)))
1810             break;
1811 
1812         if (__copy_from_user_inatomic(&sf, usf, sizeof(sf)))
1813             break;
1814 
1815         pc = sf.callers_pc;
1816         ufp = (unsigned long)sf.fp + STACK_BIAS;
1817         perf_callchain_store(entry, pc);
1818     } while (entry->nr < entry->max_stack);
1819 }
1820 
1821 static void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry,
1822                    struct pt_regs *regs)
1823 {
1824     unsigned long ufp;
1825 
1826     ufp = regs->u_regs[UREG_FP] & 0xffffffffUL;
1827     do {
1828         unsigned long pc;
1829 
1830         if (thread32_stack_is_64bit(ufp)) {
1831             struct sparc_stackf __user *usf;
1832             struct sparc_stackf sf;
1833 
1834             ufp += STACK_BIAS;
1835             usf = (struct sparc_stackf __user *)ufp;
1836             if (__copy_from_user_inatomic(&sf, usf, sizeof(sf)))
1837                 break;
1838             pc = sf.callers_pc & 0xffffffff;
1839             ufp = ((unsigned long) sf.fp) & 0xffffffff;
1840         } else {
1841             struct sparc_stackf32 __user *usf;
1842             struct sparc_stackf32 sf;
1843             usf = (struct sparc_stackf32 __user *)ufp;
1844             if (__copy_from_user_inatomic(&sf, usf, sizeof(sf)))
1845                 break;
1846             pc = sf.callers_pc;
1847             ufp = (unsigned long)sf.fp;
1848         }
1849         perf_callchain_store(entry, pc);
1850     } while (entry->nr < entry->max_stack);
1851 }
1852 
1853 void
1854 perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
1855 {
1856     u64 saved_fault_address = current_thread_info()->fault_address;
1857     u8 saved_fault_code = get_thread_fault_code();
1858 
1859     perf_callchain_store(entry, regs->tpc);
1860 
1861     if (!current->mm)
1862         return;
1863 
1864     flushw_user();
1865 
1866     pagefault_disable();
1867 
1868     if (test_thread_flag(TIF_32BIT))
1869         perf_callchain_user_32(entry, regs);
1870     else
1871         perf_callchain_user_64(entry, regs);
1872 
1873     pagefault_enable();
1874 
1875     set_thread_fault_code(saved_fault_code);
1876     current_thread_info()->fault_address = saved_fault_address;
1877 }