Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * VDSO implementations.
0004  *
0005  * Copyright (C) 2012 ARM Limited
0006  *
0007  * Author: Will Deacon <will.deacon@arm.com>
0008  */
0009 
0010 #include <linux/cache.h>
0011 #include <linux/clocksource.h>
0012 #include <linux/elf.h>
0013 #include <linux/err.h>
0014 #include <linux/errno.h>
0015 #include <linux/gfp.h>
0016 #include <linux/kernel.h>
0017 #include <linux/mm.h>
0018 #include <linux/sched.h>
0019 #include <linux/signal.h>
0020 #include <linux/slab.h>
0021 #include <linux/time_namespace.h>
0022 #include <linux/timekeeper_internal.h>
0023 #include <linux/vmalloc.h>
0024 #include <vdso/datapage.h>
0025 #include <vdso/helpers.h>
0026 #include <vdso/vsyscall.h>
0027 
0028 #include <asm/cacheflush.h>
0029 #include <asm/signal32.h>
0030 #include <asm/vdso.h>
0031 
0032 extern char vdso_start[], vdso_end[];
0033 extern char vdso32_start[], vdso32_end[];
0034 
0035 enum vdso_abi {
0036     VDSO_ABI_AA64,
0037     VDSO_ABI_AA32,
0038 };
0039 
0040 enum vvar_pages {
0041     VVAR_DATA_PAGE_OFFSET,
0042     VVAR_TIMENS_PAGE_OFFSET,
0043     VVAR_NR_PAGES,
0044 };
0045 
0046 struct vdso_abi_info {
0047     const char *name;
0048     const char *vdso_code_start;
0049     const char *vdso_code_end;
0050     unsigned long vdso_pages;
0051     /* Data Mapping */
0052     struct vm_special_mapping *dm;
0053     /* Code Mapping */
0054     struct vm_special_mapping *cm;
0055 };
0056 
0057 static struct vdso_abi_info vdso_info[] __ro_after_init = {
0058     [VDSO_ABI_AA64] = {
0059         .name = "vdso",
0060         .vdso_code_start = vdso_start,
0061         .vdso_code_end = vdso_end,
0062     },
0063 #ifdef CONFIG_COMPAT_VDSO
0064     [VDSO_ABI_AA32] = {
0065         .name = "vdso32",
0066         .vdso_code_start = vdso32_start,
0067         .vdso_code_end = vdso32_end,
0068     },
0069 #endif /* CONFIG_COMPAT_VDSO */
0070 };
0071 
0072 /*
0073  * The vDSO data page.
0074  */
0075 static union {
0076     struct vdso_data    data[CS_BASES];
0077     u8          page[PAGE_SIZE];
0078 } vdso_data_store __page_aligned_data;
0079 struct vdso_data *vdso_data = vdso_data_store.data;
0080 
0081 static int vdso_mremap(const struct vm_special_mapping *sm,
0082         struct vm_area_struct *new_vma)
0083 {
0084     current->mm->context.vdso = (void *)new_vma->vm_start;
0085 
0086     return 0;
0087 }
0088 
0089 static int __init __vdso_init(enum vdso_abi abi)
0090 {
0091     int i;
0092     struct page **vdso_pagelist;
0093     unsigned long pfn;
0094 
0095     if (memcmp(vdso_info[abi].vdso_code_start, "\177ELF", 4)) {
0096         pr_err("vDSO is not a valid ELF object!\n");
0097         return -EINVAL;
0098     }
0099 
0100     vdso_info[abi].vdso_pages = (
0101             vdso_info[abi].vdso_code_end -
0102             vdso_info[abi].vdso_code_start) >>
0103             PAGE_SHIFT;
0104 
0105     vdso_pagelist = kcalloc(vdso_info[abi].vdso_pages,
0106                 sizeof(struct page *),
0107                 GFP_KERNEL);
0108     if (vdso_pagelist == NULL)
0109         return -ENOMEM;
0110 
0111     /* Grab the vDSO code pages. */
0112     pfn = sym_to_pfn(vdso_info[abi].vdso_code_start);
0113 
0114     for (i = 0; i < vdso_info[abi].vdso_pages; i++)
0115         vdso_pagelist[i] = pfn_to_page(pfn + i);
0116 
0117     vdso_info[abi].cm->pages = vdso_pagelist;
0118 
0119     return 0;
0120 }
0121 
0122 #ifdef CONFIG_TIME_NS
0123 struct vdso_data *arch_get_vdso_data(void *vvar_page)
0124 {
0125     return (struct vdso_data *)(vvar_page);
0126 }
0127 
0128 /*
0129  * The vvar mapping contains data for a specific time namespace, so when a task
0130  * changes namespace we must unmap its vvar data for the old namespace.
0131  * Subsequent faults will map in data for the new namespace.
0132  *
0133  * For more details see timens_setup_vdso_data().
0134  */
0135 int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
0136 {
0137     struct mm_struct *mm = task->mm;
0138     struct vm_area_struct *vma;
0139 
0140     mmap_read_lock(mm);
0141 
0142     for (vma = mm->mmap; vma; vma = vma->vm_next) {
0143         unsigned long size = vma->vm_end - vma->vm_start;
0144 
0145         if (vma_is_special_mapping(vma, vdso_info[VDSO_ABI_AA64].dm))
0146             zap_page_range(vma, vma->vm_start, size);
0147 #ifdef CONFIG_COMPAT_VDSO
0148         if (vma_is_special_mapping(vma, vdso_info[VDSO_ABI_AA32].dm))
0149             zap_page_range(vma, vma->vm_start, size);
0150 #endif
0151     }
0152 
0153     mmap_read_unlock(mm);
0154     return 0;
0155 }
0156 
0157 static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
0158 {
0159     if (likely(vma->vm_mm == current->mm))
0160         return current->nsproxy->time_ns->vvar_page;
0161 
0162     /*
0163      * VM_PFNMAP | VM_IO protect .fault() handler from being called
0164      * through interfaces like /proc/$pid/mem or
0165      * process_vm_{readv,writev}() as long as there's no .access()
0166      * in special_mapping_vmops.
0167      * For more details check_vma_flags() and __access_remote_vm()
0168      */
0169     WARN(1, "vvar_page accessed remotely");
0170 
0171     return NULL;
0172 }
0173 #else
0174 static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
0175 {
0176     return NULL;
0177 }
0178 #endif
0179 
0180 static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
0181                  struct vm_area_struct *vma, struct vm_fault *vmf)
0182 {
0183     struct page *timens_page = find_timens_vvar_page(vma);
0184     unsigned long pfn;
0185 
0186     switch (vmf->pgoff) {
0187     case VVAR_DATA_PAGE_OFFSET:
0188         if (timens_page)
0189             pfn = page_to_pfn(timens_page);
0190         else
0191             pfn = sym_to_pfn(vdso_data);
0192         break;
0193 #ifdef CONFIG_TIME_NS
0194     case VVAR_TIMENS_PAGE_OFFSET:
0195         /*
0196          * If a task belongs to a time namespace then a namespace
0197          * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and
0198          * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET
0199          * offset.
0200          * See also the comment near timens_setup_vdso_data().
0201          */
0202         if (!timens_page)
0203             return VM_FAULT_SIGBUS;
0204         pfn = sym_to_pfn(vdso_data);
0205         break;
0206 #endif /* CONFIG_TIME_NS */
0207     default:
0208         return VM_FAULT_SIGBUS;
0209     }
0210 
0211     return vmf_insert_pfn(vma, vmf->address, pfn);
0212 }
0213 
0214 static int __setup_additional_pages(enum vdso_abi abi,
0215                     struct mm_struct *mm,
0216                     struct linux_binprm *bprm,
0217                     int uses_interp)
0218 {
0219     unsigned long vdso_base, vdso_text_len, vdso_mapping_len;
0220     unsigned long gp_flags = 0;
0221     void *ret;
0222 
0223     BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES);
0224 
0225     vdso_text_len = vdso_info[abi].vdso_pages << PAGE_SHIFT;
0226     /* Be sure to map the data page */
0227     vdso_mapping_len = vdso_text_len + VVAR_NR_PAGES * PAGE_SIZE;
0228 
0229     vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0);
0230     if (IS_ERR_VALUE(vdso_base)) {
0231         ret = ERR_PTR(vdso_base);
0232         goto up_fail;
0233     }
0234 
0235     ret = _install_special_mapping(mm, vdso_base, VVAR_NR_PAGES * PAGE_SIZE,
0236                        VM_READ|VM_MAYREAD|VM_PFNMAP,
0237                        vdso_info[abi].dm);
0238     if (IS_ERR(ret))
0239         goto up_fail;
0240 
0241     if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) && system_supports_bti())
0242         gp_flags = VM_ARM64_BTI;
0243 
0244     vdso_base += VVAR_NR_PAGES * PAGE_SIZE;
0245     mm->context.vdso = (void *)vdso_base;
0246     ret = _install_special_mapping(mm, vdso_base, vdso_text_len,
0247                        VM_READ|VM_EXEC|gp_flags|
0248                        VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
0249                        vdso_info[abi].cm);
0250     if (IS_ERR(ret))
0251         goto up_fail;
0252 
0253     return 0;
0254 
0255 up_fail:
0256     mm->context.vdso = NULL;
0257     return PTR_ERR(ret);
0258 }
0259 
0260 #ifdef CONFIG_COMPAT
0261 /*
0262  * Create and map the vectors page for AArch32 tasks.
0263  */
0264 enum aarch32_map {
0265     AA32_MAP_VECTORS, /* kuser helpers */
0266     AA32_MAP_SIGPAGE,
0267     AA32_MAP_VVAR,
0268     AA32_MAP_VDSO,
0269 };
0270 
0271 static struct page *aarch32_vectors_page __ro_after_init;
0272 static struct page *aarch32_sig_page __ro_after_init;
0273 
0274 static int aarch32_sigpage_mremap(const struct vm_special_mapping *sm,
0275                   struct vm_area_struct *new_vma)
0276 {
0277     current->mm->context.sigpage = (void *)new_vma->vm_start;
0278 
0279     return 0;
0280 }
0281 
0282 static struct vm_special_mapping aarch32_vdso_maps[] = {
0283     [AA32_MAP_VECTORS] = {
0284         .name   = "[vectors]", /* ABI */
0285         .pages  = &aarch32_vectors_page,
0286     },
0287     [AA32_MAP_SIGPAGE] = {
0288         .name   = "[sigpage]", /* ABI */
0289         .pages  = &aarch32_sig_page,
0290         .mremap = aarch32_sigpage_mremap,
0291     },
0292     [AA32_MAP_VVAR] = {
0293         .name = "[vvar]",
0294         .fault = vvar_fault,
0295     },
0296     [AA32_MAP_VDSO] = {
0297         .name = "[vdso]",
0298         .mremap = vdso_mremap,
0299     },
0300 };
0301 
0302 static int aarch32_alloc_kuser_vdso_page(void)
0303 {
0304     extern char __kuser_helper_start[], __kuser_helper_end[];
0305     int kuser_sz = __kuser_helper_end - __kuser_helper_start;
0306     unsigned long vdso_page;
0307 
0308     if (!IS_ENABLED(CONFIG_KUSER_HELPERS))
0309         return 0;
0310 
0311     vdso_page = get_zeroed_page(GFP_KERNEL);
0312     if (!vdso_page)
0313         return -ENOMEM;
0314 
0315     memcpy((void *)(vdso_page + 0x1000 - kuser_sz), __kuser_helper_start,
0316            kuser_sz);
0317     aarch32_vectors_page = virt_to_page(vdso_page);
0318     return 0;
0319 }
0320 
0321 #define COMPAT_SIGPAGE_POISON_WORD  0xe7fddef1
0322 static int aarch32_alloc_sigpage(void)
0323 {
0324     extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[];
0325     int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start;
0326     __le32 poison = cpu_to_le32(COMPAT_SIGPAGE_POISON_WORD);
0327     void *sigpage;
0328 
0329     sigpage = (void *)__get_free_page(GFP_KERNEL);
0330     if (!sigpage)
0331         return -ENOMEM;
0332 
0333     memset32(sigpage, (__force u32)poison, PAGE_SIZE / sizeof(poison));
0334     memcpy(sigpage, __aarch32_sigret_code_start, sigret_sz);
0335     aarch32_sig_page = virt_to_page(sigpage);
0336     return 0;
0337 }
0338 
0339 static int __init __aarch32_alloc_vdso_pages(void)
0340 {
0341 
0342     if (!IS_ENABLED(CONFIG_COMPAT_VDSO))
0343         return 0;
0344 
0345     vdso_info[VDSO_ABI_AA32].dm = &aarch32_vdso_maps[AA32_MAP_VVAR];
0346     vdso_info[VDSO_ABI_AA32].cm = &aarch32_vdso_maps[AA32_MAP_VDSO];
0347 
0348     return __vdso_init(VDSO_ABI_AA32);
0349 }
0350 
0351 static int __init aarch32_alloc_vdso_pages(void)
0352 {
0353     int ret;
0354 
0355     ret = __aarch32_alloc_vdso_pages();
0356     if (ret)
0357         return ret;
0358 
0359     ret = aarch32_alloc_sigpage();
0360     if (ret)
0361         return ret;
0362 
0363     return aarch32_alloc_kuser_vdso_page();
0364 }
0365 arch_initcall(aarch32_alloc_vdso_pages);
0366 
0367 static int aarch32_kuser_helpers_setup(struct mm_struct *mm)
0368 {
0369     void *ret;
0370 
0371     if (!IS_ENABLED(CONFIG_KUSER_HELPERS))
0372         return 0;
0373 
0374     /*
0375      * Avoid VM_MAYWRITE for compatibility with arch/arm/, where it's
0376      * not safe to CoW the page containing the CPU exception vectors.
0377      */
0378     ret = _install_special_mapping(mm, AARCH32_VECTORS_BASE, PAGE_SIZE,
0379                        VM_READ | VM_EXEC |
0380                        VM_MAYREAD | VM_MAYEXEC,
0381                        &aarch32_vdso_maps[AA32_MAP_VECTORS]);
0382 
0383     return PTR_ERR_OR_ZERO(ret);
0384 }
0385 
0386 static int aarch32_sigreturn_setup(struct mm_struct *mm)
0387 {
0388     unsigned long addr;
0389     void *ret;
0390 
0391     addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
0392     if (IS_ERR_VALUE(addr)) {
0393         ret = ERR_PTR(addr);
0394         goto out;
0395     }
0396 
0397     /*
0398      * VM_MAYWRITE is required to allow gdb to Copy-on-Write and
0399      * set breakpoints.
0400      */
0401     ret = _install_special_mapping(mm, addr, PAGE_SIZE,
0402                        VM_READ | VM_EXEC | VM_MAYREAD |
0403                        VM_MAYWRITE | VM_MAYEXEC,
0404                        &aarch32_vdso_maps[AA32_MAP_SIGPAGE]);
0405     if (IS_ERR(ret))
0406         goto out;
0407 
0408     mm->context.sigpage = (void *)addr;
0409 
0410 out:
0411     return PTR_ERR_OR_ZERO(ret);
0412 }
0413 
0414 int aarch32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
0415 {
0416     struct mm_struct *mm = current->mm;
0417     int ret;
0418 
0419     if (mmap_write_lock_killable(mm))
0420         return -EINTR;
0421 
0422     ret = aarch32_kuser_helpers_setup(mm);
0423     if (ret)
0424         goto out;
0425 
0426     if (IS_ENABLED(CONFIG_COMPAT_VDSO)) {
0427         ret = __setup_additional_pages(VDSO_ABI_AA32, mm, bprm,
0428                            uses_interp);
0429         if (ret)
0430             goto out;
0431     }
0432 
0433     ret = aarch32_sigreturn_setup(mm);
0434 out:
0435     mmap_write_unlock(mm);
0436     return ret;
0437 }
0438 #endif /* CONFIG_COMPAT */
0439 
0440 enum aarch64_map {
0441     AA64_MAP_VVAR,
0442     AA64_MAP_VDSO,
0443 };
0444 
0445 static struct vm_special_mapping aarch64_vdso_maps[] __ro_after_init = {
0446     [AA64_MAP_VVAR] = {
0447         .name   = "[vvar]",
0448         .fault = vvar_fault,
0449     },
0450     [AA64_MAP_VDSO] = {
0451         .name   = "[vdso]",
0452         .mremap = vdso_mremap,
0453     },
0454 };
0455 
0456 static int __init vdso_init(void)
0457 {
0458     vdso_info[VDSO_ABI_AA64].dm = &aarch64_vdso_maps[AA64_MAP_VVAR];
0459     vdso_info[VDSO_ABI_AA64].cm = &aarch64_vdso_maps[AA64_MAP_VDSO];
0460 
0461     return __vdso_init(VDSO_ABI_AA64);
0462 }
0463 arch_initcall(vdso_init);
0464 
0465 int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
0466 {
0467     struct mm_struct *mm = current->mm;
0468     int ret;
0469 
0470     if (mmap_write_lock_killable(mm))
0471         return -EINTR;
0472 
0473     ret = __setup_additional_pages(VDSO_ABI_AA64, mm, bprm, uses_interp);
0474     mmap_write_unlock(mm);
0475 
0476     return ret;
0477 }