Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * Copyright (C) 2020 ARM Ltd.
0004  */
0005 
0006 #include <linux/bitops.h>
0007 #include <linux/cpu.h>
0008 #include <linux/kernel.h>
0009 #include <linux/mm.h>
0010 #include <linux/prctl.h>
0011 #include <linux/sched.h>
0012 #include <linux/sched/mm.h>
0013 #include <linux/string.h>
0014 #include <linux/swap.h>
0015 #include <linux/swapops.h>
0016 #include <linux/thread_info.h>
0017 #include <linux/types.h>
0018 #include <linux/uaccess.h>
0019 #include <linux/uio.h>
0020 
0021 #include <asm/barrier.h>
0022 #include <asm/cpufeature.h>
0023 #include <asm/mte.h>
0024 #include <asm/ptrace.h>
0025 #include <asm/sysreg.h>
0026 
0027 static DEFINE_PER_CPU_READ_MOSTLY(u64, mte_tcf_preferred);
0028 
0029 #ifdef CONFIG_KASAN_HW_TAGS
0030 /*
0031  * The asynchronous and asymmetric MTE modes have the same behavior for
0032  * store operations. This flag is set when either of these modes is enabled.
0033  */
0034 DEFINE_STATIC_KEY_FALSE(mte_async_or_asymm_mode);
0035 EXPORT_SYMBOL_GPL(mte_async_or_asymm_mode);
0036 #endif
0037 
0038 static void mte_sync_page_tags(struct page *page, pte_t old_pte,
0039                    bool check_swap, bool pte_is_tagged)
0040 {
0041     if (check_swap && is_swap_pte(old_pte)) {
0042         swp_entry_t entry = pte_to_swp_entry(old_pte);
0043 
0044         if (!non_swap_entry(entry) && mte_restore_tags(entry, page))
0045             return;
0046     }
0047 
0048     if (!pte_is_tagged)
0049         return;
0050 
0051     mte_clear_page_tags(page_address(page));
0052 }
0053 
0054 void mte_sync_tags(pte_t old_pte, pte_t pte)
0055 {
0056     struct page *page = pte_page(pte);
0057     long i, nr_pages = compound_nr(page);
0058     bool check_swap = nr_pages == 1;
0059     bool pte_is_tagged = pte_tagged(pte);
0060 
0061     /* Early out if there's nothing to do */
0062     if (!check_swap && !pte_is_tagged)
0063         return;
0064 
0065     /* if PG_mte_tagged is set, tags have already been initialised */
0066     for (i = 0; i < nr_pages; i++, page++) {
0067         if (!test_and_set_bit(PG_mte_tagged, &page->flags))
0068             mte_sync_page_tags(page, old_pte, check_swap,
0069                        pte_is_tagged);
0070     }
0071 
0072     /* ensure the tags are visible before the PTE is set */
0073     smp_wmb();
0074 }
0075 
0076 int memcmp_pages(struct page *page1, struct page *page2)
0077 {
0078     char *addr1, *addr2;
0079     int ret;
0080 
0081     addr1 = page_address(page1);
0082     addr2 = page_address(page2);
0083     ret = memcmp(addr1, addr2, PAGE_SIZE);
0084 
0085     if (!system_supports_mte() || ret)
0086         return ret;
0087 
0088     /*
0089      * If the page content is identical but at least one of the pages is
0090      * tagged, return non-zero to avoid KSM merging. If only one of the
0091      * pages is tagged, set_pte_at() may zero or change the tags of the
0092      * other page via mte_sync_tags().
0093      */
0094     if (test_bit(PG_mte_tagged, &page1->flags) ||
0095         test_bit(PG_mte_tagged, &page2->flags))
0096         return addr1 != addr2;
0097 
0098     return ret;
0099 }
0100 
0101 static inline void __mte_enable_kernel(const char *mode, unsigned long tcf)
0102 {
0103     /* Enable MTE Sync Mode for EL1. */
0104     sysreg_clear_set(sctlr_el1, SCTLR_EL1_TCF_MASK,
0105              SYS_FIELD_PREP(SCTLR_EL1, TCF, tcf));
0106     isb();
0107 
0108     pr_info_once("MTE: enabled in %s mode at EL1\n", mode);
0109 }
0110 
0111 #ifdef CONFIG_KASAN_HW_TAGS
0112 void mte_enable_kernel_sync(void)
0113 {
0114     /*
0115      * Make sure we enter this function when no PE has set
0116      * async mode previously.
0117      */
0118     WARN_ONCE(system_uses_mte_async_or_asymm_mode(),
0119             "MTE async mode enabled system wide!");
0120 
0121     __mte_enable_kernel("synchronous", SCTLR_EL1_TCF_SYNC);
0122 }
0123 
0124 void mte_enable_kernel_async(void)
0125 {
0126     __mte_enable_kernel("asynchronous", SCTLR_EL1_TCF_ASYNC);
0127 
0128     /*
0129      * MTE async mode is set system wide by the first PE that
0130      * executes this function.
0131      *
0132      * Note: If in future KASAN acquires a runtime switching
0133      * mode in between sync and async, this strategy needs
0134      * to be reviewed.
0135      */
0136     if (!system_uses_mte_async_or_asymm_mode())
0137         static_branch_enable(&mte_async_or_asymm_mode);
0138 }
0139 
0140 void mte_enable_kernel_asymm(void)
0141 {
0142     if (cpus_have_cap(ARM64_MTE_ASYMM)) {
0143         __mte_enable_kernel("asymmetric", SCTLR_EL1_TCF_ASYMM);
0144 
0145         /*
0146          * MTE asymm mode behaves as async mode for store
0147          * operations. The mode is set system wide by the
0148          * first PE that executes this function.
0149          *
0150          * Note: If in future KASAN acquires a runtime switching
0151          * mode in between sync and async, this strategy needs
0152          * to be reviewed.
0153          */
0154         if (!system_uses_mte_async_or_asymm_mode())
0155             static_branch_enable(&mte_async_or_asymm_mode);
0156     } else {
0157         /*
0158          * If the CPU does not support MTE asymmetric mode the
0159          * kernel falls back on synchronous mode which is the
0160          * default for kasan=on.
0161          */
0162         mte_enable_kernel_sync();
0163     }
0164 }
0165 #endif
0166 
0167 #ifdef CONFIG_KASAN_HW_TAGS
0168 void mte_check_tfsr_el1(void)
0169 {
0170     u64 tfsr_el1 = read_sysreg_s(SYS_TFSR_EL1);
0171 
0172     if (unlikely(tfsr_el1 & SYS_TFSR_EL1_TF1)) {
0173         /*
0174          * Note: isb() is not required after this direct write
0175          * because there is no indirect read subsequent to it
0176          * (per ARM DDI 0487F.c table D13-1).
0177          */
0178         write_sysreg_s(0, SYS_TFSR_EL1);
0179 
0180         kasan_report_async();
0181     }
0182 }
0183 #endif
0184 
0185 /*
0186  * This is where we actually resolve the system and process MTE mode
0187  * configuration into an actual value in SCTLR_EL1 that affects
0188  * userspace.
0189  */
0190 static void mte_update_sctlr_user(struct task_struct *task)
0191 {
0192     /*
0193      * This must be called with preemption disabled and can only be called
0194      * on the current or next task since the CPU must match where the thread
0195      * is going to run. The caller is responsible for calling
0196      * update_sctlr_el1() later in the same preemption disabled block.
0197      */
0198     unsigned long sctlr = task->thread.sctlr_user;
0199     unsigned long mte_ctrl = task->thread.mte_ctrl;
0200     unsigned long pref, resolved_mte_tcf;
0201 
0202     pref = __this_cpu_read(mte_tcf_preferred);
0203     /*
0204      * If there is no overlap between the system preferred and
0205      * program requested values go with what was requested.
0206      */
0207     resolved_mte_tcf = (mte_ctrl & pref) ? pref : mte_ctrl;
0208     sctlr &= ~SCTLR_EL1_TCF0_MASK;
0209     /*
0210      * Pick an actual setting. The order in which we check for
0211      * set bits and map into register values determines our
0212      * default order.
0213      */
0214     if (resolved_mte_tcf & MTE_CTRL_TCF_ASYMM)
0215         sctlr |= SYS_FIELD_PREP_ENUM(SCTLR_EL1, TCF0, ASYMM);
0216     else if (resolved_mte_tcf & MTE_CTRL_TCF_ASYNC)
0217         sctlr |= SYS_FIELD_PREP_ENUM(SCTLR_EL1, TCF0, ASYNC);
0218     else if (resolved_mte_tcf & MTE_CTRL_TCF_SYNC)
0219         sctlr |= SYS_FIELD_PREP_ENUM(SCTLR_EL1, TCF0, SYNC);
0220     task->thread.sctlr_user = sctlr;
0221 }
0222 
0223 static void mte_update_gcr_excl(struct task_struct *task)
0224 {
0225     /*
0226      * SYS_GCR_EL1 will be set to current->thread.mte_ctrl value by
0227      * mte_set_user_gcr() in kernel_exit, but only if KASAN is enabled.
0228      */
0229     if (kasan_hw_tags_enabled())
0230         return;
0231 
0232     write_sysreg_s(
0233         ((task->thread.mte_ctrl >> MTE_CTRL_GCR_USER_EXCL_SHIFT) &
0234          SYS_GCR_EL1_EXCL_MASK) | SYS_GCR_EL1_RRND,
0235         SYS_GCR_EL1);
0236 }
0237 
0238 #ifdef CONFIG_KASAN_HW_TAGS
0239 /* Only called from assembly, silence sparse */
0240 void __init kasan_hw_tags_enable(struct alt_instr *alt, __le32 *origptr,
0241                  __le32 *updptr, int nr_inst);
0242 
0243 void __init kasan_hw_tags_enable(struct alt_instr *alt, __le32 *origptr,
0244                  __le32 *updptr, int nr_inst)
0245 {
0246     BUG_ON(nr_inst != 1); /* Branch -> NOP */
0247 
0248     if (kasan_hw_tags_enabled())
0249         *updptr = cpu_to_le32(aarch64_insn_gen_nop());
0250 }
0251 #endif
0252 
0253 void mte_thread_init_user(void)
0254 {
0255     if (!system_supports_mte())
0256         return;
0257 
0258     /* clear any pending asynchronous tag fault */
0259     dsb(ish);
0260     write_sysreg_s(0, SYS_TFSRE0_EL1);
0261     clear_thread_flag(TIF_MTE_ASYNC_FAULT);
0262     /* disable tag checking and reset tag generation mask */
0263     set_mte_ctrl(current, 0);
0264 }
0265 
0266 void mte_thread_switch(struct task_struct *next)
0267 {
0268     if (!system_supports_mte())
0269         return;
0270 
0271     mte_update_sctlr_user(next);
0272     mte_update_gcr_excl(next);
0273 
0274     /* TCO may not have been disabled on exception entry for the current task. */
0275     mte_disable_tco_entry(next);
0276 
0277     /*
0278      * Check if an async tag exception occurred at EL1.
0279      *
0280      * Note: On the context switch path we rely on the dsb() present
0281      * in __switch_to() to guarantee that the indirect writes to TFSR_EL1
0282      * are synchronized before this point.
0283      */
0284     isb();
0285     mte_check_tfsr_el1();
0286 }
0287 
0288 void mte_suspend_enter(void)
0289 {
0290     if (!system_supports_mte())
0291         return;
0292 
0293     /*
0294      * The barriers are required to guarantee that the indirect writes
0295      * to TFSR_EL1 are synchronized before we report the state.
0296      */
0297     dsb(nsh);
0298     isb();
0299 
0300     /* Report SYS_TFSR_EL1 before suspend entry */
0301     mte_check_tfsr_el1();
0302 }
0303 
0304 long set_mte_ctrl(struct task_struct *task, unsigned long arg)
0305 {
0306     u64 mte_ctrl = (~((arg & PR_MTE_TAG_MASK) >> PR_MTE_TAG_SHIFT) &
0307             SYS_GCR_EL1_EXCL_MASK) << MTE_CTRL_GCR_USER_EXCL_SHIFT;
0308 
0309     if (!system_supports_mte())
0310         return 0;
0311 
0312     if (arg & PR_MTE_TCF_ASYNC)
0313         mte_ctrl |= MTE_CTRL_TCF_ASYNC;
0314     if (arg & PR_MTE_TCF_SYNC)
0315         mte_ctrl |= MTE_CTRL_TCF_SYNC;
0316 
0317     /*
0318      * If the system supports it and both sync and async modes are
0319      * specified then implicitly enable asymmetric mode.
0320      * Userspace could see a mix of both sync and async anyway due
0321      * to differing or changing defaults on CPUs.
0322      */
0323     if (cpus_have_cap(ARM64_MTE_ASYMM) &&
0324         (arg & PR_MTE_TCF_ASYNC) &&
0325         (arg & PR_MTE_TCF_SYNC))
0326         mte_ctrl |= MTE_CTRL_TCF_ASYMM;
0327 
0328     task->thread.mte_ctrl = mte_ctrl;
0329     if (task == current) {
0330         preempt_disable();
0331         mte_update_sctlr_user(task);
0332         mte_update_gcr_excl(task);
0333         update_sctlr_el1(task->thread.sctlr_user);
0334         preempt_enable();
0335     }
0336 
0337     return 0;
0338 }
0339 
0340 long get_mte_ctrl(struct task_struct *task)
0341 {
0342     unsigned long ret;
0343     u64 mte_ctrl = task->thread.mte_ctrl;
0344     u64 incl = (~mte_ctrl >> MTE_CTRL_GCR_USER_EXCL_SHIFT) &
0345            SYS_GCR_EL1_EXCL_MASK;
0346 
0347     if (!system_supports_mte())
0348         return 0;
0349 
0350     ret = incl << PR_MTE_TAG_SHIFT;
0351     if (mte_ctrl & MTE_CTRL_TCF_ASYNC)
0352         ret |= PR_MTE_TCF_ASYNC;
0353     if (mte_ctrl & MTE_CTRL_TCF_SYNC)
0354         ret |= PR_MTE_TCF_SYNC;
0355 
0356     return ret;
0357 }
0358 
0359 /*
0360  * Access MTE tags in another process' address space as given in mm. Update
0361  * the number of tags copied. Return 0 if any tags copied, error otherwise.
0362  * Inspired by __access_remote_vm().
0363  */
0364 static int __access_remote_tags(struct mm_struct *mm, unsigned long addr,
0365                 struct iovec *kiov, unsigned int gup_flags)
0366 {
0367     struct vm_area_struct *vma;
0368     void __user *buf = kiov->iov_base;
0369     size_t len = kiov->iov_len;
0370     int ret;
0371     int write = gup_flags & FOLL_WRITE;
0372 
0373     if (!access_ok(buf, len))
0374         return -EFAULT;
0375 
0376     if (mmap_read_lock_killable(mm))
0377         return -EIO;
0378 
0379     while (len) {
0380         unsigned long tags, offset;
0381         void *maddr;
0382         struct page *page = NULL;
0383 
0384         ret = get_user_pages_remote(mm, addr, 1, gup_flags, &page,
0385                         &vma, NULL);
0386         if (ret <= 0)
0387             break;
0388 
0389         /*
0390          * Only copy tags if the page has been mapped as PROT_MTE
0391          * (PG_mte_tagged set). Otherwise the tags are not valid and
0392          * not accessible to user. Moreover, an mprotect(PROT_MTE)
0393          * would cause the existing tags to be cleared if the page
0394          * was never mapped with PROT_MTE.
0395          */
0396         if (!(vma->vm_flags & VM_MTE)) {
0397             ret = -EOPNOTSUPP;
0398             put_page(page);
0399             break;
0400         }
0401         WARN_ON_ONCE(!test_bit(PG_mte_tagged, &page->flags));
0402 
0403         /* limit access to the end of the page */
0404         offset = offset_in_page(addr);
0405         tags = min(len, (PAGE_SIZE - offset) / MTE_GRANULE_SIZE);
0406 
0407         maddr = page_address(page);
0408         if (write) {
0409             tags = mte_copy_tags_from_user(maddr + offset, buf, tags);
0410             set_page_dirty_lock(page);
0411         } else {
0412             tags = mte_copy_tags_to_user(buf, maddr + offset, tags);
0413         }
0414         put_page(page);
0415 
0416         /* error accessing the tracer's buffer */
0417         if (!tags)
0418             break;
0419 
0420         len -= tags;
0421         buf += tags;
0422         addr += tags * MTE_GRANULE_SIZE;
0423     }
0424     mmap_read_unlock(mm);
0425 
0426     /* return an error if no tags copied */
0427     kiov->iov_len = buf - kiov->iov_base;
0428     if (!kiov->iov_len) {
0429         /* check for error accessing the tracee's address space */
0430         if (ret <= 0)
0431             return -EIO;
0432         else
0433             return -EFAULT;
0434     }
0435 
0436     return 0;
0437 }
0438 
0439 /*
0440  * Copy MTE tags in another process' address space at 'addr' to/from tracer's
0441  * iovec buffer. Return 0 on success. Inspired by ptrace_access_vm().
0442  */
0443 static int access_remote_tags(struct task_struct *tsk, unsigned long addr,
0444                   struct iovec *kiov, unsigned int gup_flags)
0445 {
0446     struct mm_struct *mm;
0447     int ret;
0448 
0449     mm = get_task_mm(tsk);
0450     if (!mm)
0451         return -EPERM;
0452 
0453     if (!tsk->ptrace || (current != tsk->parent) ||
0454         ((get_dumpable(mm) != SUID_DUMP_USER) &&
0455          !ptracer_capable(tsk, mm->user_ns))) {
0456         mmput(mm);
0457         return -EPERM;
0458     }
0459 
0460     ret = __access_remote_tags(mm, addr, kiov, gup_flags);
0461     mmput(mm);
0462 
0463     return ret;
0464 }
0465 
0466 int mte_ptrace_copy_tags(struct task_struct *child, long request,
0467              unsigned long addr, unsigned long data)
0468 {
0469     int ret;
0470     struct iovec kiov;
0471     struct iovec __user *uiov = (void __user *)data;
0472     unsigned int gup_flags = FOLL_FORCE;
0473 
0474     if (!system_supports_mte())
0475         return -EIO;
0476 
0477     if (get_user(kiov.iov_base, &uiov->iov_base) ||
0478         get_user(kiov.iov_len, &uiov->iov_len))
0479         return -EFAULT;
0480 
0481     if (request == PTRACE_POKEMTETAGS)
0482         gup_flags |= FOLL_WRITE;
0483 
0484     /* align addr to the MTE tag granule */
0485     addr &= MTE_GRANULE_MASK;
0486 
0487     ret = access_remote_tags(child, addr, &kiov, gup_flags);
0488     if (!ret)
0489         ret = put_user(kiov.iov_len, &uiov->iov_len);
0490 
0491     return ret;
0492 }
0493 
0494 static ssize_t mte_tcf_preferred_show(struct device *dev,
0495                       struct device_attribute *attr, char *buf)
0496 {
0497     switch (per_cpu(mte_tcf_preferred, dev->id)) {
0498     case MTE_CTRL_TCF_ASYNC:
0499         return sysfs_emit(buf, "async\n");
0500     case MTE_CTRL_TCF_SYNC:
0501         return sysfs_emit(buf, "sync\n");
0502     case MTE_CTRL_TCF_ASYMM:
0503         return sysfs_emit(buf, "asymm\n");
0504     default:
0505         return sysfs_emit(buf, "???\n");
0506     }
0507 }
0508 
0509 static ssize_t mte_tcf_preferred_store(struct device *dev,
0510                        struct device_attribute *attr,
0511                        const char *buf, size_t count)
0512 {
0513     u64 tcf;
0514 
0515     if (sysfs_streq(buf, "async"))
0516         tcf = MTE_CTRL_TCF_ASYNC;
0517     else if (sysfs_streq(buf, "sync"))
0518         tcf = MTE_CTRL_TCF_SYNC;
0519     else if (cpus_have_cap(ARM64_MTE_ASYMM) && sysfs_streq(buf, "asymm"))
0520         tcf = MTE_CTRL_TCF_ASYMM;
0521     else
0522         return -EINVAL;
0523 
0524     device_lock(dev);
0525     per_cpu(mte_tcf_preferred, dev->id) = tcf;
0526     device_unlock(dev);
0527 
0528     return count;
0529 }
0530 static DEVICE_ATTR_RW(mte_tcf_preferred);
0531 
0532 static int register_mte_tcf_preferred_sysctl(void)
0533 {
0534     unsigned int cpu;
0535 
0536     if (!system_supports_mte())
0537         return 0;
0538 
0539     for_each_possible_cpu(cpu) {
0540         per_cpu(mte_tcf_preferred, cpu) = MTE_CTRL_TCF_ASYNC;
0541         device_create_file(get_cpu_device(cpu),
0542                    &dev_attr_mte_tcf_preferred);
0543     }
0544 
0545     return 0;
0546 }
0547 subsys_initcall(register_mte_tcf_preferred_sysctl);
0548 
0549 /*
0550  * Return 0 on success, the number of bytes not probed otherwise.
0551  */
0552 size_t mte_probe_user_range(const char __user *uaddr, size_t size)
0553 {
0554     const char __user *end = uaddr + size;
0555     int err = 0;
0556     char val;
0557 
0558     __raw_get_user(val, uaddr, err);
0559     if (err)
0560         return size;
0561 
0562     uaddr = PTR_ALIGN(uaddr, MTE_GRANULE_SIZE);
0563     while (uaddr < end) {
0564         /*
0565          * A read is sufficient for mte, the caller should have probed
0566          * for the pte write permission if required.
0567          */
0568         __raw_get_user(val, uaddr, err);
0569         if (err)
0570             return end - uaddr;
0571         uaddr += MTE_GRANULE_SIZE;
0572     }
0573     (void)val;
0574 
0575     return 0;
0576 }