Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  *
0004  * Copyright (C) 2007 Alan Stern
0005  * Copyright (C) 2009 IBM Corporation
0006  * Copyright (C) 2009 Frederic Weisbecker <fweisbec@gmail.com>
0007  *
0008  * Authors: Alan Stern <stern@rowland.harvard.edu>
0009  *          K.Prasad <prasad@linux.vnet.ibm.com>
0010  *          Frederic Weisbecker <fweisbec@gmail.com>
0011  */
0012 
0013 /*
0014  * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
0015  * using the CPU's debug registers.
0016  */
0017 
0018 #include <linux/perf_event.h>
0019 #include <linux/hw_breakpoint.h>
0020 #include <linux/irqflags.h>
0021 #include <linux/notifier.h>
0022 #include <linux/kallsyms.h>
0023 #include <linux/kprobes.h>
0024 #include <linux/percpu.h>
0025 #include <linux/kdebug.h>
0026 #include <linux/kernel.h>
0027 #include <linux/export.h>
0028 #include <linux/sched.h>
0029 #include <linux/smp.h>
0030 
0031 #include <asm/hw_breakpoint.h>
0032 #include <asm/processor.h>
0033 #include <asm/debugreg.h>
0034 #include <asm/user.h>
0035 #include <asm/desc.h>
0036 #include <asm/tlbflush.h>
0037 
0038 /* Per cpu debug control register value */
0039 DEFINE_PER_CPU(unsigned long, cpu_dr7);
0040 EXPORT_PER_CPU_SYMBOL(cpu_dr7);
0041 
0042 /* Per cpu debug address registers values */
0043 static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]);
0044 
0045 /*
0046  * Stores the breakpoints currently in use on each breakpoint address
0047  * register for each cpus
0048  */
0049 static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM]);
0050 
0051 
0052 static inline unsigned long
0053 __encode_dr7(int drnum, unsigned int len, unsigned int type)
0054 {
0055     unsigned long bp_info;
0056 
0057     bp_info = (len | type) & 0xf;
0058     bp_info <<= (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE);
0059     bp_info |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE));
0060 
0061     return bp_info;
0062 }
0063 
0064 /*
0065  * Encode the length, type, Exact, and Enable bits for a particular breakpoint
0066  * as stored in debug register 7.
0067  */
0068 unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type)
0069 {
0070     return __encode_dr7(drnum, len, type) | DR_GLOBAL_SLOWDOWN;
0071 }
0072 
0073 /*
0074  * Decode the length and type bits for a particular breakpoint as
0075  * stored in debug register 7.  Return the "enabled" status.
0076  */
0077 int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type)
0078 {
0079     int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE);
0080 
0081     *len = (bp_info & 0xc) | 0x40;
0082     *type = (bp_info & 0x3) | 0x80;
0083 
0084     return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3;
0085 }
0086 
0087 /*
0088  * Install a perf counter breakpoint.
0089  *
0090  * We seek a free debug address register and use it for this
0091  * breakpoint. Eventually we enable it in the debug control register.
0092  *
0093  * Atomic: we hold the counter->ctx->lock and we only handle variables
0094  * and registers local to this cpu.
0095  */
0096 int arch_install_hw_breakpoint(struct perf_event *bp)
0097 {
0098     struct arch_hw_breakpoint *info = counter_arch_bp(bp);
0099     unsigned long *dr7;
0100     int i;
0101 
0102     lockdep_assert_irqs_disabled();
0103 
0104     for (i = 0; i < HBP_NUM; i++) {
0105         struct perf_event **slot = this_cpu_ptr(&bp_per_reg[i]);
0106 
0107         if (!*slot) {
0108             *slot = bp;
0109             break;
0110         }
0111     }
0112 
0113     if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot"))
0114         return -EBUSY;
0115 
0116     set_debugreg(info->address, i);
0117     __this_cpu_write(cpu_debugreg[i], info->address);
0118 
0119     dr7 = this_cpu_ptr(&cpu_dr7);
0120     *dr7 |= encode_dr7(i, info->len, info->type);
0121 
0122     /*
0123      * Ensure we first write cpu_dr7 before we set the DR7 register.
0124      * This ensures an NMI never see cpu_dr7 0 when DR7 is not.
0125      */
0126     barrier();
0127 
0128     set_debugreg(*dr7, 7);
0129     if (info->mask)
0130         set_dr_addr_mask(info->mask, i);
0131 
0132     return 0;
0133 }
0134 
0135 /*
0136  * Uninstall the breakpoint contained in the given counter.
0137  *
0138  * First we search the debug address register it uses and then we disable
0139  * it.
0140  *
0141  * Atomic: we hold the counter->ctx->lock and we only handle variables
0142  * and registers local to this cpu.
0143  */
0144 void arch_uninstall_hw_breakpoint(struct perf_event *bp)
0145 {
0146     struct arch_hw_breakpoint *info = counter_arch_bp(bp);
0147     unsigned long dr7;
0148     int i;
0149 
0150     lockdep_assert_irqs_disabled();
0151 
0152     for (i = 0; i < HBP_NUM; i++) {
0153         struct perf_event **slot = this_cpu_ptr(&bp_per_reg[i]);
0154 
0155         if (*slot == bp) {
0156             *slot = NULL;
0157             break;
0158         }
0159     }
0160 
0161     if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot"))
0162         return;
0163 
0164     dr7 = this_cpu_read(cpu_dr7);
0165     dr7 &= ~__encode_dr7(i, info->len, info->type);
0166 
0167     set_debugreg(dr7, 7);
0168     if (info->mask)
0169         set_dr_addr_mask(0, i);
0170 
0171     /*
0172      * Ensure the write to cpu_dr7 is after we've set the DR7 register.
0173      * This ensures an NMI never see cpu_dr7 0 when DR7 is not.
0174      */
0175     barrier();
0176 
0177     this_cpu_write(cpu_dr7, dr7);
0178 }
0179 
0180 static int arch_bp_generic_len(int x86_len)
0181 {
0182     switch (x86_len) {
0183     case X86_BREAKPOINT_LEN_1:
0184         return HW_BREAKPOINT_LEN_1;
0185     case X86_BREAKPOINT_LEN_2:
0186         return HW_BREAKPOINT_LEN_2;
0187     case X86_BREAKPOINT_LEN_4:
0188         return HW_BREAKPOINT_LEN_4;
0189 #ifdef CONFIG_X86_64
0190     case X86_BREAKPOINT_LEN_8:
0191         return HW_BREAKPOINT_LEN_8;
0192 #endif
0193     default:
0194         return -EINVAL;
0195     }
0196 }
0197 
0198 int arch_bp_generic_fields(int x86_len, int x86_type,
0199                int *gen_len, int *gen_type)
0200 {
0201     int len;
0202 
0203     /* Type */
0204     switch (x86_type) {
0205     case X86_BREAKPOINT_EXECUTE:
0206         if (x86_len != X86_BREAKPOINT_LEN_X)
0207             return -EINVAL;
0208 
0209         *gen_type = HW_BREAKPOINT_X;
0210         *gen_len = sizeof(long);
0211         return 0;
0212     case X86_BREAKPOINT_WRITE:
0213         *gen_type = HW_BREAKPOINT_W;
0214         break;
0215     case X86_BREAKPOINT_RW:
0216         *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R;
0217         break;
0218     default:
0219         return -EINVAL;
0220     }
0221 
0222     /* Len */
0223     len = arch_bp_generic_len(x86_len);
0224     if (len < 0)
0225         return -EINVAL;
0226     *gen_len = len;
0227 
0228     return 0;
0229 }
0230 
0231 /*
0232  * Check for virtual address in kernel space.
0233  */
0234 int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw)
0235 {
0236     unsigned long va;
0237     int len;
0238 
0239     va = hw->address;
0240     len = arch_bp_generic_len(hw->len);
0241     WARN_ON_ONCE(len < 0);
0242 
0243     /*
0244      * We don't need to worry about va + len - 1 overflowing:
0245      * we already require that va is aligned to a multiple of len.
0246      */
0247     return (va >= TASK_SIZE_MAX) || ((va + len - 1) >= TASK_SIZE_MAX);
0248 }
0249 
0250 /*
0251  * Checks whether the range [addr, end], overlaps the area [base, base + size).
0252  */
0253 static inline bool within_area(unsigned long addr, unsigned long end,
0254                    unsigned long base, unsigned long size)
0255 {
0256     return end >= base && addr < (base + size);
0257 }
0258 
0259 /*
0260  * Checks whether the range from addr to end, inclusive, overlaps the fixed
0261  * mapped CPU entry area range or other ranges used for CPU entry.
0262  */
0263 static inline bool within_cpu_entry(unsigned long addr, unsigned long end)
0264 {
0265     int cpu;
0266 
0267     /* CPU entry erea is always used for CPU entry */
0268     if (within_area(addr, end, CPU_ENTRY_AREA_BASE,
0269             CPU_ENTRY_AREA_TOTAL_SIZE))
0270         return true;
0271 
0272     /*
0273      * When FSGSBASE is enabled, paranoid_entry() fetches the per-CPU
0274      * GSBASE value via __per_cpu_offset or pcpu_unit_offsets.
0275      */
0276 #ifdef CONFIG_SMP
0277     if (within_area(addr, end, (unsigned long)__per_cpu_offset,
0278             sizeof(unsigned long) * nr_cpu_ids))
0279         return true;
0280 #else
0281     if (within_area(addr, end, (unsigned long)&pcpu_unit_offsets,
0282             sizeof(pcpu_unit_offsets)))
0283         return true;
0284 #endif
0285 
0286     for_each_possible_cpu(cpu) {
0287         /* The original rw GDT is being used after load_direct_gdt() */
0288         if (within_area(addr, end, (unsigned long)get_cpu_gdt_rw(cpu),
0289                 GDT_SIZE))
0290             return true;
0291 
0292         /*
0293          * cpu_tss_rw is not directly referenced by hardware, but
0294          * cpu_tss_rw is also used in CPU entry code,
0295          */
0296         if (within_area(addr, end,
0297                 (unsigned long)&per_cpu(cpu_tss_rw, cpu),
0298                 sizeof(struct tss_struct)))
0299             return true;
0300 
0301         /*
0302          * cpu_tlbstate.user_pcid_flush_mask is used for CPU entry.
0303          * If a data breakpoint on it, it will cause an unwanted #DB.
0304          * Protect the full cpu_tlbstate structure to be sure.
0305          */
0306         if (within_area(addr, end,
0307                 (unsigned long)&per_cpu(cpu_tlbstate, cpu),
0308                 sizeof(struct tlb_state)))
0309             return true;
0310 
0311         /*
0312          * When in guest (X86_FEATURE_HYPERVISOR), local_db_save()
0313          * will read per-cpu cpu_dr7 before clear dr7 register.
0314          */
0315         if (within_area(addr, end, (unsigned long)&per_cpu(cpu_dr7, cpu),
0316                 sizeof(cpu_dr7)))
0317             return true;
0318     }
0319 
0320     return false;
0321 }
0322 
0323 static int arch_build_bp_info(struct perf_event *bp,
0324                   const struct perf_event_attr *attr,
0325                   struct arch_hw_breakpoint *hw)
0326 {
0327     unsigned long bp_end;
0328 
0329     bp_end = attr->bp_addr + attr->bp_len - 1;
0330     if (bp_end < attr->bp_addr)
0331         return -EINVAL;
0332 
0333     /*
0334      * Prevent any breakpoint of any type that overlaps the CPU
0335      * entry area and data.  This protects the IST stacks and also
0336      * reduces the chance that we ever find out what happens if
0337      * there's a data breakpoint on the GDT, IDT, or TSS.
0338      */
0339     if (within_cpu_entry(attr->bp_addr, bp_end))
0340         return -EINVAL;
0341 
0342     hw->address = attr->bp_addr;
0343     hw->mask = 0;
0344 
0345     /* Type */
0346     switch (attr->bp_type) {
0347     case HW_BREAKPOINT_W:
0348         hw->type = X86_BREAKPOINT_WRITE;
0349         break;
0350     case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
0351         hw->type = X86_BREAKPOINT_RW;
0352         break;
0353     case HW_BREAKPOINT_X:
0354         /*
0355          * We don't allow kernel breakpoints in places that are not
0356          * acceptable for kprobes.  On non-kprobes kernels, we don't
0357          * allow kernel breakpoints at all.
0358          */
0359         if (attr->bp_addr >= TASK_SIZE_MAX) {
0360             if (within_kprobe_blacklist(attr->bp_addr))
0361                 return -EINVAL;
0362         }
0363 
0364         hw->type = X86_BREAKPOINT_EXECUTE;
0365         /*
0366          * x86 inst breakpoints need to have a specific undefined len.
0367          * But we still need to check userspace is not trying to setup
0368          * an unsupported length, to get a range breakpoint for example.
0369          */
0370         if (attr->bp_len == sizeof(long)) {
0371             hw->len = X86_BREAKPOINT_LEN_X;
0372             return 0;
0373         }
0374         fallthrough;
0375     default:
0376         return -EINVAL;
0377     }
0378 
0379     /* Len */
0380     switch (attr->bp_len) {
0381     case HW_BREAKPOINT_LEN_1:
0382         hw->len = X86_BREAKPOINT_LEN_1;
0383         break;
0384     case HW_BREAKPOINT_LEN_2:
0385         hw->len = X86_BREAKPOINT_LEN_2;
0386         break;
0387     case HW_BREAKPOINT_LEN_4:
0388         hw->len = X86_BREAKPOINT_LEN_4;
0389         break;
0390 #ifdef CONFIG_X86_64
0391     case HW_BREAKPOINT_LEN_8:
0392         hw->len = X86_BREAKPOINT_LEN_8;
0393         break;
0394 #endif
0395     default:
0396         /* AMD range breakpoint */
0397         if (!is_power_of_2(attr->bp_len))
0398             return -EINVAL;
0399         if (attr->bp_addr & (attr->bp_len - 1))
0400             return -EINVAL;
0401 
0402         if (!boot_cpu_has(X86_FEATURE_BPEXT))
0403             return -EOPNOTSUPP;
0404 
0405         /*
0406          * It's impossible to use a range breakpoint to fake out
0407          * user vs kernel detection because bp_len - 1 can't
0408          * have the high bit set.  If we ever allow range instruction
0409          * breakpoints, then we'll have to check for kprobe-blacklisted
0410          * addresses anywhere in the range.
0411          */
0412         hw->mask = attr->bp_len - 1;
0413         hw->len = X86_BREAKPOINT_LEN_1;
0414     }
0415 
0416     return 0;
0417 }
0418 
0419 /*
0420  * Validate the arch-specific HW Breakpoint register settings
0421  */
0422 int hw_breakpoint_arch_parse(struct perf_event *bp,
0423                  const struct perf_event_attr *attr,
0424                  struct arch_hw_breakpoint *hw)
0425 {
0426     unsigned int align;
0427     int ret;
0428 
0429 
0430     ret = arch_build_bp_info(bp, attr, hw);
0431     if (ret)
0432         return ret;
0433 
0434     switch (hw->len) {
0435     case X86_BREAKPOINT_LEN_1:
0436         align = 0;
0437         if (hw->mask)
0438             align = hw->mask;
0439         break;
0440     case X86_BREAKPOINT_LEN_2:
0441         align = 1;
0442         break;
0443     case X86_BREAKPOINT_LEN_4:
0444         align = 3;
0445         break;
0446 #ifdef CONFIG_X86_64
0447     case X86_BREAKPOINT_LEN_8:
0448         align = 7;
0449         break;
0450 #endif
0451     default:
0452         WARN_ON_ONCE(1);
0453         return -EINVAL;
0454     }
0455 
0456     /*
0457      * Check that the low-order bits of the address are appropriate
0458      * for the alignment implied by len.
0459      */
0460     if (hw->address & align)
0461         return -EINVAL;
0462 
0463     return 0;
0464 }
0465 
0466 /*
0467  * Release the user breakpoints used by ptrace
0468  */
0469 void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
0470 {
0471     int i;
0472     struct thread_struct *t = &tsk->thread;
0473 
0474     for (i = 0; i < HBP_NUM; i++) {
0475         unregister_hw_breakpoint(t->ptrace_bps[i]);
0476         t->ptrace_bps[i] = NULL;
0477     }
0478 
0479     t->virtual_dr6 = 0;
0480     t->ptrace_dr7 = 0;
0481 }
0482 
0483 void hw_breakpoint_restore(void)
0484 {
0485     set_debugreg(__this_cpu_read(cpu_debugreg[0]), 0);
0486     set_debugreg(__this_cpu_read(cpu_debugreg[1]), 1);
0487     set_debugreg(__this_cpu_read(cpu_debugreg[2]), 2);
0488     set_debugreg(__this_cpu_read(cpu_debugreg[3]), 3);
0489     set_debugreg(DR6_RESERVED, 6);
0490     set_debugreg(__this_cpu_read(cpu_dr7), 7);
0491 }
0492 EXPORT_SYMBOL_GPL(hw_breakpoint_restore);
0493 
0494 /*
0495  * Handle debug exception notifications.
0496  *
0497  * Return value is either NOTIFY_STOP or NOTIFY_DONE as explained below.
0498  *
0499  * NOTIFY_DONE returned if one of the following conditions is true.
0500  * i) When the causative address is from user-space and the exception
0501  * is a valid one, i.e. not triggered as a result of lazy debug register
0502  * switching
0503  * ii) When there are more bits than trap<n> set in DR6 register (such
0504  * as BD, BS or BT) indicating that more than one debug condition is
0505  * met and requires some more action in do_debug().
0506  *
0507  * NOTIFY_STOP returned for all other cases
0508  *
0509  */
0510 static int hw_breakpoint_handler(struct die_args *args)
0511 {
0512     int i, rc = NOTIFY_STOP;
0513     struct perf_event *bp;
0514     unsigned long *dr6_p;
0515     unsigned long dr6;
0516     bool bpx;
0517 
0518     /* The DR6 value is pointed by args->err */
0519     dr6_p = (unsigned long *)ERR_PTR(args->err);
0520     dr6 = *dr6_p;
0521 
0522     /* Do an early return if no trap bits are set in DR6 */
0523     if ((dr6 & DR_TRAP_BITS) == 0)
0524         return NOTIFY_DONE;
0525 
0526     /* Handle all the breakpoints that were triggered */
0527     for (i = 0; i < HBP_NUM; ++i) {
0528         if (likely(!(dr6 & (DR_TRAP0 << i))))
0529             continue;
0530 
0531         bp = this_cpu_read(bp_per_reg[i]);
0532         if (!bp)
0533             continue;
0534 
0535         bpx = bp->hw.info.type == X86_BREAKPOINT_EXECUTE;
0536 
0537         /*
0538          * TF and data breakpoints are traps and can be merged, however
0539          * instruction breakpoints are faults and will be raised
0540          * separately.
0541          *
0542          * However DR6 can indicate both TF and instruction
0543          * breakpoints. In that case take TF as that has precedence and
0544          * delay the instruction breakpoint for the next exception.
0545          */
0546         if (bpx && (dr6 & DR_STEP))
0547             continue;
0548 
0549         /*
0550          * Reset the 'i'th TRAP bit in dr6 to denote completion of
0551          * exception handling
0552          */
0553         (*dr6_p) &= ~(DR_TRAP0 << i);
0554 
0555         perf_bp_event(bp, args->regs);
0556 
0557         /*
0558          * Set up resume flag to avoid breakpoint recursion when
0559          * returning back to origin.
0560          */
0561         if (bpx)
0562             args->regs->flags |= X86_EFLAGS_RF;
0563     }
0564 
0565     /*
0566      * Further processing in do_debug() is needed for a) user-space
0567      * breakpoints (to generate signals) and b) when the system has
0568      * taken exception due to multiple causes
0569      */
0570     if ((current->thread.virtual_dr6 & DR_TRAP_BITS) ||
0571         (dr6 & (~DR_TRAP_BITS)))
0572         rc = NOTIFY_DONE;
0573 
0574     return rc;
0575 }
0576 
0577 /*
0578  * Handle debug exception notifications.
0579  */
0580 int hw_breakpoint_exceptions_notify(
0581         struct notifier_block *unused, unsigned long val, void *data)
0582 {
0583     if (val != DIE_DEBUG)
0584         return NOTIFY_DONE;
0585 
0586     return hw_breakpoint_handler(data);
0587 }
0588 
0589 void hw_breakpoint_pmu_read(struct perf_event *bp)
0590 {
0591     /* TODO */
0592 }