0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021 #include <linux/kernel.h>
0022 #include <linux/errno.h>
0023 #include <linux/string.h>
0024 #include <linux/types.h>
0025 #include <linux/bug.h>
0026 #include <linux/init.h>
0027 #include <linux/spinlock.h>
0028 #include <linux/mm.h>
0029 #include <linux/uaccess.h>
0030 #include <linux/cpu.h>
0031
0032 #include <asm/cpufeature.h>
0033 #include <asm/hypervisor.h>
0034 #include <asm/vsyscall.h>
0035 #include <asm/cmdline.h>
0036 #include <asm/pti.h>
0037 #include <asm/tlbflush.h>
0038 #include <asm/desc.h>
0039 #include <asm/sections.h>
0040 #include <asm/set_memory.h>
0041
0042 #undef pr_fmt
0043 #define pr_fmt(fmt) "Kernel/User page tables isolation: " fmt
0044
0045
0046 #ifndef __GFP_NOTRACK
0047 #define __GFP_NOTRACK 0
0048 #endif
0049
0050
0051
0052
0053
0054 #ifdef CONFIG_X86_64
0055 #define PTI_LEVEL_KERNEL_IMAGE PTI_CLONE_PMD
0056 #else
0057 #define PTI_LEVEL_KERNEL_IMAGE PTI_CLONE_PTE
0058 #endif
0059
0060 static void __init pti_print_if_insecure(const char *reason)
0061 {
0062 if (boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
0063 pr_info("%s\n", reason);
0064 }
0065
0066 static void __init pti_print_if_secure(const char *reason)
0067 {
0068 if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
0069 pr_info("%s\n", reason);
0070 }
0071
0072 static enum pti_mode {
0073 PTI_AUTO = 0,
0074 PTI_FORCE_OFF,
0075 PTI_FORCE_ON
0076 } pti_mode;
0077
0078 void __init pti_check_boottime_disable(void)
0079 {
0080 char arg[5];
0081 int ret;
0082
0083
0084 pti_mode = PTI_AUTO;
0085
0086 if (hypervisor_is_type(X86_HYPER_XEN_PV)) {
0087 pti_mode = PTI_FORCE_OFF;
0088 pti_print_if_insecure("disabled on XEN PV.");
0089 return;
0090 }
0091
0092 ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg));
0093 if (ret > 0) {
0094 if (ret == 3 && !strncmp(arg, "off", 3)) {
0095 pti_mode = PTI_FORCE_OFF;
0096 pti_print_if_insecure("disabled on command line.");
0097 return;
0098 }
0099 if (ret == 2 && !strncmp(arg, "on", 2)) {
0100 pti_mode = PTI_FORCE_ON;
0101 pti_print_if_secure("force enabled on command line.");
0102 goto enable;
0103 }
0104 if (ret == 4 && !strncmp(arg, "auto", 4)) {
0105 pti_mode = PTI_AUTO;
0106 goto autosel;
0107 }
0108 }
0109
0110 if (cmdline_find_option_bool(boot_command_line, "nopti") ||
0111 cpu_mitigations_off()) {
0112 pti_mode = PTI_FORCE_OFF;
0113 pti_print_if_insecure("disabled on command line.");
0114 return;
0115 }
0116
0117 autosel:
0118 if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
0119 return;
0120 enable:
0121 setup_force_cpu_cap(X86_FEATURE_PTI);
0122 }
0123
0124 pgd_t __pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd)
0125 {
0126
0127
0128
0129
0130
0131
0132
0133
0134
0135 if (!pgdp_maps_userspace(pgdp))
0136 return pgd;
0137
0138
0139
0140
0141
0142 kernel_to_user_pgdp(pgdp)->pgd = pgd.pgd;
0143
0144
0145
0146
0147
0148
0149
0150
0151
0152
0153
0154
0155
0156
0157 if ((pgd.pgd & (_PAGE_USER|_PAGE_PRESENT)) == (_PAGE_USER|_PAGE_PRESENT) &&
0158 (__supported_pte_mask & _PAGE_NX))
0159 pgd.pgd |= _PAGE_NX;
0160
0161
0162 return pgd;
0163 }
0164
0165
0166
0167
0168
0169
0170
0171 static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
0172 {
0173 pgd_t *pgd = kernel_to_user_pgdp(pgd_offset_k(address));
0174 gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
0175
0176 if (address < PAGE_OFFSET) {
0177 WARN_ONCE(1, "attempt to walk user address\n");
0178 return NULL;
0179 }
0180
0181 if (pgd_none(*pgd)) {
0182 unsigned long new_p4d_page = __get_free_page(gfp);
0183 if (WARN_ON_ONCE(!new_p4d_page))
0184 return NULL;
0185
0186 set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
0187 }
0188 BUILD_BUG_ON(pgd_large(*pgd) != 0);
0189
0190 return p4d_offset(pgd, address);
0191 }
0192
0193
0194
0195
0196
0197
0198
0199 static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
0200 {
0201 gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
0202 p4d_t *p4d;
0203 pud_t *pud;
0204
0205 p4d = pti_user_pagetable_walk_p4d(address);
0206 if (!p4d)
0207 return NULL;
0208
0209 BUILD_BUG_ON(p4d_large(*p4d) != 0);
0210 if (p4d_none(*p4d)) {
0211 unsigned long new_pud_page = __get_free_page(gfp);
0212 if (WARN_ON_ONCE(!new_pud_page))
0213 return NULL;
0214
0215 set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page)));
0216 }
0217
0218 pud = pud_offset(p4d, address);
0219
0220 if (pud_large(*pud)) {
0221 WARN_ON(1);
0222 return NULL;
0223 }
0224 if (pud_none(*pud)) {
0225 unsigned long new_pmd_page = __get_free_page(gfp);
0226 if (WARN_ON_ONCE(!new_pmd_page))
0227 return NULL;
0228
0229 set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
0230 }
0231
0232 return pmd_offset(pud, address);
0233 }
0234
0235
0236
0237
0238
0239
0240
0241
0242
0243
0244 static pte_t *pti_user_pagetable_walk_pte(unsigned long address)
0245 {
0246 gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
0247 pmd_t *pmd;
0248 pte_t *pte;
0249
0250 pmd = pti_user_pagetable_walk_pmd(address);
0251 if (!pmd)
0252 return NULL;
0253
0254
0255 if (pmd_large(*pmd)) {
0256 WARN_ON(1);
0257 return NULL;
0258 }
0259
0260 if (pmd_none(*pmd)) {
0261 unsigned long new_pte_page = __get_free_page(gfp);
0262 if (!new_pte_page)
0263 return NULL;
0264
0265 set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
0266 }
0267
0268 pte = pte_offset_kernel(pmd, address);
0269 if (pte_flags(*pte) & _PAGE_USER) {
0270 WARN_ONCE(1, "attempt to walk to user pte\n");
0271 return NULL;
0272 }
0273 return pte;
0274 }
0275
0276 #ifdef CONFIG_X86_VSYSCALL_EMULATION
0277 static void __init pti_setup_vsyscall(void)
0278 {
0279 pte_t *pte, *target_pte;
0280 unsigned int level;
0281
0282 pte = lookup_address(VSYSCALL_ADDR, &level);
0283 if (!pte || WARN_ON(level != PG_LEVEL_4K) || pte_none(*pte))
0284 return;
0285
0286 target_pte = pti_user_pagetable_walk_pte(VSYSCALL_ADDR);
0287 if (WARN_ON(!target_pte))
0288 return;
0289
0290 *target_pte = *pte;
0291 set_vsyscall_pgtable_user_bits(kernel_to_user_pgdp(swapper_pg_dir));
0292 }
0293 #else
0294 static void __init pti_setup_vsyscall(void) { }
0295 #endif
0296
0297 enum pti_clone_level {
0298 PTI_CLONE_PMD,
0299 PTI_CLONE_PTE,
0300 };
0301
0302 static void
0303 pti_clone_pgtable(unsigned long start, unsigned long end,
0304 enum pti_clone_level level)
0305 {
0306 unsigned long addr;
0307
0308
0309
0310
0311
0312 for (addr = start; addr < end;) {
0313 pte_t *pte, *target_pte;
0314 pmd_t *pmd, *target_pmd;
0315 pgd_t *pgd;
0316 p4d_t *p4d;
0317 pud_t *pud;
0318
0319
0320 if (addr < start)
0321 break;
0322
0323 pgd = pgd_offset_k(addr);
0324 if (WARN_ON(pgd_none(*pgd)))
0325 return;
0326 p4d = p4d_offset(pgd, addr);
0327 if (WARN_ON(p4d_none(*p4d)))
0328 return;
0329
0330 pud = pud_offset(p4d, addr);
0331 if (pud_none(*pud)) {
0332 WARN_ON_ONCE(addr & ~PUD_MASK);
0333 addr = round_up(addr + 1, PUD_SIZE);
0334 continue;
0335 }
0336
0337 pmd = pmd_offset(pud, addr);
0338 if (pmd_none(*pmd)) {
0339 WARN_ON_ONCE(addr & ~PMD_MASK);
0340 addr = round_up(addr + 1, PMD_SIZE);
0341 continue;
0342 }
0343
0344 if (pmd_large(*pmd) || level == PTI_CLONE_PMD) {
0345 target_pmd = pti_user_pagetable_walk_pmd(addr);
0346 if (WARN_ON(!target_pmd))
0347 return;
0348
0349
0350
0351
0352
0353
0354
0355 if (WARN_ON(!(pmd_flags(*pmd) & _PAGE_PRESENT)))
0356 return;
0357
0358
0359
0360
0361
0362
0363
0364
0365
0366
0367 if (boot_cpu_has(X86_FEATURE_PGE))
0368 *pmd = pmd_set_flags(*pmd, _PAGE_GLOBAL);
0369
0370
0371
0372
0373
0374
0375 *target_pmd = *pmd;
0376
0377 addr += PMD_SIZE;
0378
0379 } else if (level == PTI_CLONE_PTE) {
0380
0381
0382 pte = pte_offset_kernel(pmd, addr);
0383 if (pte_none(*pte)) {
0384 addr += PAGE_SIZE;
0385 continue;
0386 }
0387
0388
0389 if (WARN_ON(!(pte_flags(*pte) & _PAGE_PRESENT)))
0390 return;
0391
0392
0393 target_pte = pti_user_pagetable_walk_pte(addr);
0394 if (WARN_ON(!target_pte))
0395 return;
0396
0397
0398 if (boot_cpu_has(X86_FEATURE_PGE))
0399 *pte = pte_set_flags(*pte, _PAGE_GLOBAL);
0400
0401
0402 *target_pte = *pte;
0403
0404 addr += PAGE_SIZE;
0405
0406 } else {
0407 BUG();
0408 }
0409 }
0410 }
0411
0412 #ifdef CONFIG_X86_64
0413
0414
0415
0416
0417 static void __init pti_clone_p4d(unsigned long addr)
0418 {
0419 p4d_t *kernel_p4d, *user_p4d;
0420 pgd_t *kernel_pgd;
0421
0422 user_p4d = pti_user_pagetable_walk_p4d(addr);
0423 if (!user_p4d)
0424 return;
0425
0426 kernel_pgd = pgd_offset_k(addr);
0427 kernel_p4d = p4d_offset(kernel_pgd, addr);
0428 *user_p4d = *kernel_p4d;
0429 }
0430
0431
0432
0433
0434
0435 static void __init pti_clone_user_shared(void)
0436 {
0437 unsigned int cpu;
0438
0439 pti_clone_p4d(CPU_ENTRY_AREA_BASE);
0440
0441 for_each_possible_cpu(cpu) {
0442
0443
0444
0445
0446
0447
0448
0449
0450
0451 unsigned long va = (unsigned long)&per_cpu(cpu_tss_rw, cpu);
0452 phys_addr_t pa = per_cpu_ptr_to_phys((void *)va);
0453 pte_t *target_pte;
0454
0455 target_pte = pti_user_pagetable_walk_pte(va);
0456 if (WARN_ON(!target_pte))
0457 return;
0458
0459 *target_pte = pfn_pte(pa >> PAGE_SHIFT, PAGE_KERNEL);
0460 }
0461 }
0462
0463 #else
0464
0465
0466
0467
0468
0469
0470
0471 static void __init pti_clone_user_shared(void)
0472 {
0473 unsigned long start, end;
0474
0475 start = CPU_ENTRY_AREA_BASE;
0476 end = start + (PAGE_SIZE * CPU_ENTRY_AREA_PAGES);
0477
0478 pti_clone_pgtable(start, end, PTI_CLONE_PMD);
0479 }
0480 #endif
0481
0482
0483
0484
0485 static void __init pti_setup_espfix64(void)
0486 {
0487 #ifdef CONFIG_X86_ESPFIX64
0488 pti_clone_p4d(ESPFIX_BASE_ADDR);
0489 #endif
0490 }
0491
0492
0493
0494
0495 static void pti_clone_entry_text(void)
0496 {
0497 pti_clone_pgtable((unsigned long) __entry_text_start,
0498 (unsigned long) __entry_text_end,
0499 PTI_CLONE_PMD);
0500 }
0501
0502
0503
0504
0505
0506
0507
0508
0509
0510
0511 static inline bool pti_kernel_image_global_ok(void)
0512 {
0513
0514
0515
0516
0517 if (cpu_feature_enabled(X86_FEATURE_PCID))
0518 return false;
0519
0520
0521
0522
0523
0524 if (pti_mode != PTI_AUTO)
0525 return false;
0526
0527
0528
0529
0530
0531
0532
0533 if (boot_cpu_has(X86_FEATURE_K8))
0534 return false;
0535
0536
0537
0538
0539
0540
0541
0542
0543 if (IS_ENABLED(CONFIG_RANDSTRUCT))
0544 return false;
0545
0546 return true;
0547 }
0548
0549
0550
0551
0552
0553 static void pti_clone_kernel_text(void)
0554 {
0555
0556
0557
0558
0559
0560 unsigned long start = PFN_ALIGN(_text);
0561 unsigned long end_clone = (unsigned long)__end_rodata_aligned;
0562 unsigned long end_global = PFN_ALIGN((unsigned long)_etext);
0563
0564 if (!pti_kernel_image_global_ok())
0565 return;
0566
0567 pr_debug("mapping partial kernel image into user address space\n");
0568
0569
0570
0571
0572
0573
0574 pti_clone_pgtable(start, end_clone, PTI_LEVEL_KERNEL_IMAGE);
0575
0576
0577
0578
0579
0580
0581
0582
0583 set_memory_global(start, (end_global - start) >> PAGE_SHIFT);
0584 }
0585
0586 static void pti_set_kernel_image_nonglobal(void)
0587 {
0588
0589
0590
0591
0592
0593
0594 unsigned long start = PFN_ALIGN(_text);
0595 unsigned long end = ALIGN((unsigned long)_end, PMD_PAGE_SIZE);
0596
0597
0598
0599
0600
0601
0602 set_memory_nonglobal(start, (end - start) >> PAGE_SHIFT);
0603 }
0604
0605
0606
0607
0608 void __init pti_init(void)
0609 {
0610 if (!boot_cpu_has(X86_FEATURE_PTI))
0611 return;
0612
0613 pr_info("enabled\n");
0614
0615 #ifdef CONFIG_X86_32
0616
0617
0618
0619
0620
0621
0622 if (cpuid_ecx(0x1) & BIT(17)) {
0623
0624 printk(KERN_WARNING "\n");
0625 printk(KERN_WARNING "************************************************************\n");
0626 printk(KERN_WARNING "** WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! **\n");
0627 printk(KERN_WARNING "** **\n");
0628 printk(KERN_WARNING "** You are using 32-bit PTI on a 64-bit PCID-capable CPU. **\n");
0629 printk(KERN_WARNING "** Your performance will increase dramatically if you **\n");
0630 printk(KERN_WARNING "** switch to a 64-bit kernel! **\n");
0631 printk(KERN_WARNING "** **\n");
0632 printk(KERN_WARNING "** WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! **\n");
0633 printk(KERN_WARNING "************************************************************\n");
0634 }
0635 #endif
0636
0637 pti_clone_user_shared();
0638
0639
0640 pti_set_kernel_image_nonglobal();
0641
0642 pti_clone_entry_text();
0643 pti_setup_espfix64();
0644 pti_setup_vsyscall();
0645 }
0646
0647
0648
0649
0650
0651
0652
0653
0654 void pti_finalize(void)
0655 {
0656 if (!boot_cpu_has(X86_FEATURE_PTI))
0657 return;
0658
0659
0660
0661
0662 pti_clone_entry_text();
0663 pti_clone_kernel_text();
0664
0665 debug_checkwx_user();
0666 }