0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015 #include <linux/errno.h>
0016 #include <linux/gfp.h>
0017 #include <linux/sched.h>
0018 #include <linux/string.h>
0019 #include <linux/mm.h>
0020 #include <linux/smp.h>
0021 #include <linux/syscalls.h>
0022 #include <linux/slab.h>
0023 #include <linux/vmalloc.h>
0024 #include <linux/uaccess.h>
0025
0026 #include <asm/ldt.h>
0027 #include <asm/tlb.h>
0028 #include <asm/desc.h>
0029 #include <asm/mmu_context.h>
0030 #include <asm/pgtable_areas.h>
0031
0032 #include <xen/xen.h>
0033
0034
0035 #define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE)
0036
0037 static inline void *ldt_slot_va(int slot)
0038 {
0039 return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
0040 }
0041
0042 void load_mm_ldt(struct mm_struct *mm)
0043 {
0044 struct ldt_struct *ldt;
0045
0046
0047 ldt = READ_ONCE(mm->context.ldt);
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063 if (unlikely(ldt)) {
0064 if (static_cpu_has(X86_FEATURE_PTI)) {
0065 if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) {
0066
0067
0068
0069
0070
0071 clear_LDT();
0072 return;
0073 }
0074
0075
0076
0077
0078
0079
0080
0081 set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries);
0082 } else {
0083 set_ldt(ldt->entries, ldt->nr_entries);
0084 }
0085 } else {
0086 clear_LDT();
0087 }
0088 }
0089
0090 void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
0091 {
0092
0093
0094
0095
0096
0097
0098
0099
0100
0101
0102
0103
0104
0105
0106
0107
0108
0109 if (unlikely((unsigned long)prev->context.ldt |
0110 (unsigned long)next->context.ldt))
0111 load_mm_ldt(next);
0112
0113 DEBUG_LOCKS_WARN_ON(preemptible());
0114 }
0115
0116 static void refresh_ldt_segments(void)
0117 {
0118 #ifdef CONFIG_X86_64
0119 unsigned short sel;
0120
0121
0122
0123
0124
0125 savesegment(ds, sel);
0126 if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT)
0127 loadsegment(ds, sel);
0128
0129 savesegment(es, sel);
0130 if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT)
0131 loadsegment(es, sel);
0132 #endif
0133 }
0134
0135
0136 static void flush_ldt(void *__mm)
0137 {
0138 struct mm_struct *mm = __mm;
0139
0140 if (this_cpu_read(cpu_tlbstate.loaded_mm) != mm)
0141 return;
0142
0143 load_mm_ldt(mm);
0144
0145 refresh_ldt_segments();
0146 }
0147
0148
0149 static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries)
0150 {
0151 struct ldt_struct *new_ldt;
0152 unsigned int alloc_size;
0153
0154 if (num_entries > LDT_ENTRIES)
0155 return NULL;
0156
0157 new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL_ACCOUNT);
0158 if (!new_ldt)
0159 return NULL;
0160
0161 BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct));
0162 alloc_size = num_entries * LDT_ENTRY_SIZE;
0163
0164
0165
0166
0167
0168
0169
0170 if (alloc_size > PAGE_SIZE)
0171 new_ldt->entries = __vmalloc(alloc_size, GFP_KERNEL_ACCOUNT | __GFP_ZERO);
0172 else
0173 new_ldt->entries = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
0174
0175 if (!new_ldt->entries) {
0176 kfree(new_ldt);
0177 return NULL;
0178 }
0179
0180
0181 new_ldt->slot = -1;
0182
0183 new_ldt->nr_entries = num_entries;
0184 return new_ldt;
0185 }
0186
0187 #ifdef CONFIG_PAGE_TABLE_ISOLATION
0188
0189 static void do_sanity_check(struct mm_struct *mm,
0190 bool had_kernel_mapping,
0191 bool had_user_mapping)
0192 {
0193 if (mm->context.ldt) {
0194
0195
0196
0197
0198
0199 WARN_ON(!had_kernel_mapping);
0200 if (boot_cpu_has(X86_FEATURE_PTI))
0201 WARN_ON(!had_user_mapping);
0202 } else {
0203
0204
0205
0206
0207 WARN_ON(had_kernel_mapping);
0208 if (boot_cpu_has(X86_FEATURE_PTI))
0209 WARN_ON(had_user_mapping);
0210 }
0211 }
0212
0213 #ifdef CONFIG_X86_PAE
0214
0215 static pmd_t *pgd_to_pmd_walk(pgd_t *pgd, unsigned long va)
0216 {
0217 p4d_t *p4d;
0218 pud_t *pud;
0219
0220 if (pgd->pgd == 0)
0221 return NULL;
0222
0223 p4d = p4d_offset(pgd, va);
0224 if (p4d_none(*p4d))
0225 return NULL;
0226
0227 pud = pud_offset(p4d, va);
0228 if (pud_none(*pud))
0229 return NULL;
0230
0231 return pmd_offset(pud, va);
0232 }
0233
0234 static void map_ldt_struct_to_user(struct mm_struct *mm)
0235 {
0236 pgd_t *k_pgd = pgd_offset(mm, LDT_BASE_ADDR);
0237 pgd_t *u_pgd = kernel_to_user_pgdp(k_pgd);
0238 pmd_t *k_pmd, *u_pmd;
0239
0240 k_pmd = pgd_to_pmd_walk(k_pgd, LDT_BASE_ADDR);
0241 u_pmd = pgd_to_pmd_walk(u_pgd, LDT_BASE_ADDR);
0242
0243 if (boot_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt)
0244 set_pmd(u_pmd, *k_pmd);
0245 }
0246
0247 static void sanity_check_ldt_mapping(struct mm_struct *mm)
0248 {
0249 pgd_t *k_pgd = pgd_offset(mm, LDT_BASE_ADDR);
0250 pgd_t *u_pgd = kernel_to_user_pgdp(k_pgd);
0251 bool had_kernel, had_user;
0252 pmd_t *k_pmd, *u_pmd;
0253
0254 k_pmd = pgd_to_pmd_walk(k_pgd, LDT_BASE_ADDR);
0255 u_pmd = pgd_to_pmd_walk(u_pgd, LDT_BASE_ADDR);
0256 had_kernel = (k_pmd->pmd != 0);
0257 had_user = (u_pmd->pmd != 0);
0258
0259 do_sanity_check(mm, had_kernel, had_user);
0260 }
0261
0262 #else
0263
0264 static void map_ldt_struct_to_user(struct mm_struct *mm)
0265 {
0266 pgd_t *pgd = pgd_offset(mm, LDT_BASE_ADDR);
0267
0268 if (boot_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt)
0269 set_pgd(kernel_to_user_pgdp(pgd), *pgd);
0270 }
0271
0272 static void sanity_check_ldt_mapping(struct mm_struct *mm)
0273 {
0274 pgd_t *pgd = pgd_offset(mm, LDT_BASE_ADDR);
0275 bool had_kernel = (pgd->pgd != 0);
0276 bool had_user = (kernel_to_user_pgdp(pgd)->pgd != 0);
0277
0278 do_sanity_check(mm, had_kernel, had_user);
0279 }
0280
0281 #endif
0282
0283
0284
0285
0286
0287 static int
0288 map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
0289 {
0290 unsigned long va;
0291 bool is_vmalloc;
0292 spinlock_t *ptl;
0293 int i, nr_pages;
0294
0295 if (!boot_cpu_has(X86_FEATURE_PTI))
0296 return 0;
0297
0298
0299
0300
0301
0302 WARN_ON(ldt->slot != -1);
0303
0304
0305 sanity_check_ldt_mapping(mm);
0306
0307 is_vmalloc = is_vmalloc_addr(ldt->entries);
0308
0309 nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE);
0310
0311 for (i = 0; i < nr_pages; i++) {
0312 unsigned long offset = i << PAGE_SHIFT;
0313 const void *src = (char *)ldt->entries + offset;
0314 unsigned long pfn;
0315 pgprot_t pte_prot;
0316 pte_t pte, *ptep;
0317
0318 va = (unsigned long)ldt_slot_va(slot) + offset;
0319 pfn = is_vmalloc ? vmalloc_to_pfn(src) :
0320 page_to_pfn(virt_to_page(src));
0321
0322
0323
0324
0325
0326 ptep = get_locked_pte(mm, va, &ptl);
0327 if (!ptep)
0328 return -ENOMEM;
0329
0330
0331
0332
0333
0334 pte_prot = __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL);
0335
0336 pgprot_val(pte_prot) &= __supported_pte_mask;
0337 pte = pfn_pte(pfn, pte_prot);
0338 set_pte_at(mm, va, ptep, pte);
0339 pte_unmap_unlock(ptep, ptl);
0340 }
0341
0342
0343 map_ldt_struct_to_user(mm);
0344
0345 ldt->slot = slot;
0346 return 0;
0347 }
0348
0349 static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt)
0350 {
0351 unsigned long va;
0352 int i, nr_pages;
0353
0354 if (!ldt)
0355 return;
0356
0357
0358 if (!boot_cpu_has(X86_FEATURE_PTI))
0359 return;
0360
0361 nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE);
0362
0363 for (i = 0; i < nr_pages; i++) {
0364 unsigned long offset = i << PAGE_SHIFT;
0365 spinlock_t *ptl;
0366 pte_t *ptep;
0367
0368 va = (unsigned long)ldt_slot_va(ldt->slot) + offset;
0369 ptep = get_locked_pte(mm, va, &ptl);
0370 pte_clear(mm, va, ptep);
0371 pte_unmap_unlock(ptep, ptl);
0372 }
0373
0374 va = (unsigned long)ldt_slot_va(ldt->slot);
0375 flush_tlb_mm_range(mm, va, va + nr_pages * PAGE_SIZE, PAGE_SHIFT, false);
0376 }
0377
0378 #else
0379
0380 static int
0381 map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
0382 {
0383 return 0;
0384 }
0385
0386 static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt)
0387 {
0388 }
0389 #endif
0390
0391 static void free_ldt_pgtables(struct mm_struct *mm)
0392 {
0393 #ifdef CONFIG_PAGE_TABLE_ISOLATION
0394 struct mmu_gather tlb;
0395 unsigned long start = LDT_BASE_ADDR;
0396 unsigned long end = LDT_END_ADDR;
0397
0398 if (!boot_cpu_has(X86_FEATURE_PTI))
0399 return;
0400
0401
0402
0403
0404
0405
0406
0407 tlb_gather_mmu_fullmm(&tlb, mm);
0408 free_pgd_range(&tlb, start, end, start, end);
0409 tlb_finish_mmu(&tlb);
0410 #endif
0411 }
0412
0413
0414 static void finalize_ldt_struct(struct ldt_struct *ldt)
0415 {
0416 paravirt_alloc_ldt(ldt->entries, ldt->nr_entries);
0417 }
0418
0419 static void install_ldt(struct mm_struct *mm, struct ldt_struct *ldt)
0420 {
0421 mutex_lock(&mm->context.lock);
0422
0423
0424 smp_store_release(&mm->context.ldt, ldt);
0425
0426
0427 on_each_cpu_mask(mm_cpumask(mm), flush_ldt, mm, true);
0428
0429 mutex_unlock(&mm->context.lock);
0430 }
0431
0432 static void free_ldt_struct(struct ldt_struct *ldt)
0433 {
0434 if (likely(!ldt))
0435 return;
0436
0437 paravirt_free_ldt(ldt->entries, ldt->nr_entries);
0438 if (ldt->nr_entries * LDT_ENTRY_SIZE > PAGE_SIZE)
0439 vfree_atomic(ldt->entries);
0440 else
0441 free_page((unsigned long)ldt->entries);
0442 kfree(ldt);
0443 }
0444
0445
0446
0447
0448
0449 int ldt_dup_context(struct mm_struct *old_mm, struct mm_struct *mm)
0450 {
0451 struct ldt_struct *new_ldt;
0452 int retval = 0;
0453
0454 if (!old_mm)
0455 return 0;
0456
0457 mutex_lock(&old_mm->context.lock);
0458 if (!old_mm->context.ldt)
0459 goto out_unlock;
0460
0461 new_ldt = alloc_ldt_struct(old_mm->context.ldt->nr_entries);
0462 if (!new_ldt) {
0463 retval = -ENOMEM;
0464 goto out_unlock;
0465 }
0466
0467 memcpy(new_ldt->entries, old_mm->context.ldt->entries,
0468 new_ldt->nr_entries * LDT_ENTRY_SIZE);
0469 finalize_ldt_struct(new_ldt);
0470
0471 retval = map_ldt_struct(mm, new_ldt, 0);
0472 if (retval) {
0473 free_ldt_pgtables(mm);
0474 free_ldt_struct(new_ldt);
0475 goto out_unlock;
0476 }
0477 mm->context.ldt = new_ldt;
0478
0479 out_unlock:
0480 mutex_unlock(&old_mm->context.lock);
0481 return retval;
0482 }
0483
0484
0485
0486
0487
0488
0489 void destroy_context_ldt(struct mm_struct *mm)
0490 {
0491 free_ldt_struct(mm->context.ldt);
0492 mm->context.ldt = NULL;
0493 }
0494
0495 void ldt_arch_exit_mmap(struct mm_struct *mm)
0496 {
0497 free_ldt_pgtables(mm);
0498 }
0499
0500 static int read_ldt(void __user *ptr, unsigned long bytecount)
0501 {
0502 struct mm_struct *mm = current->mm;
0503 unsigned long entries_size;
0504 int retval;
0505
0506 down_read(&mm->context.ldt_usr_sem);
0507
0508 if (!mm->context.ldt) {
0509 retval = 0;
0510 goto out_unlock;
0511 }
0512
0513 if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES)
0514 bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES;
0515
0516 entries_size = mm->context.ldt->nr_entries * LDT_ENTRY_SIZE;
0517 if (entries_size > bytecount)
0518 entries_size = bytecount;
0519
0520 if (copy_to_user(ptr, mm->context.ldt->entries, entries_size)) {
0521 retval = -EFAULT;
0522 goto out_unlock;
0523 }
0524
0525 if (entries_size != bytecount) {
0526
0527 if (clear_user(ptr + entries_size, bytecount - entries_size)) {
0528 retval = -EFAULT;
0529 goto out_unlock;
0530 }
0531 }
0532 retval = bytecount;
0533
0534 out_unlock:
0535 up_read(&mm->context.ldt_usr_sem);
0536 return retval;
0537 }
0538
0539 static int read_default_ldt(void __user *ptr, unsigned long bytecount)
0540 {
0541
0542 #ifdef CONFIG_X86_32
0543 unsigned long size = 5 * sizeof(struct desc_struct);
0544 #else
0545 unsigned long size = 128;
0546 #endif
0547 if (bytecount > size)
0548 bytecount = size;
0549 if (clear_user(ptr, bytecount))
0550 return -EFAULT;
0551 return bytecount;
0552 }
0553
0554 static bool allow_16bit_segments(void)
0555 {
0556 if (!IS_ENABLED(CONFIG_X86_16BIT))
0557 return false;
0558
0559 #ifdef CONFIG_XEN_PV
0560
0561
0562
0563
0564
0565
0566
0567 if (xen_pv_domain()) {
0568 pr_info_once("Warning: 16-bit segments do not work correctly in a Xen PV guest\n");
0569 return false;
0570 }
0571 #endif
0572
0573 return true;
0574 }
0575
0576 static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
0577 {
0578 struct mm_struct *mm = current->mm;
0579 struct ldt_struct *new_ldt, *old_ldt;
0580 unsigned int old_nr_entries, new_nr_entries;
0581 struct user_desc ldt_info;
0582 struct desc_struct ldt;
0583 int error;
0584
0585 error = -EINVAL;
0586 if (bytecount != sizeof(ldt_info))
0587 goto out;
0588 error = -EFAULT;
0589 if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info)))
0590 goto out;
0591
0592 error = -EINVAL;
0593 if (ldt_info.entry_number >= LDT_ENTRIES)
0594 goto out;
0595 if (ldt_info.contents == 3) {
0596 if (oldmode)
0597 goto out;
0598 if (ldt_info.seg_not_present == 0)
0599 goto out;
0600 }
0601
0602 if ((oldmode && !ldt_info.base_addr && !ldt_info.limit) ||
0603 LDT_empty(&ldt_info)) {
0604
0605 memset(&ldt, 0, sizeof(ldt));
0606 } else {
0607 if (!ldt_info.seg_32bit && !allow_16bit_segments()) {
0608 error = -EINVAL;
0609 goto out;
0610 }
0611
0612 fill_ldt(&ldt, &ldt_info);
0613 if (oldmode)
0614 ldt.avl = 0;
0615 }
0616
0617 if (down_write_killable(&mm->context.ldt_usr_sem))
0618 return -EINTR;
0619
0620 old_ldt = mm->context.ldt;
0621 old_nr_entries = old_ldt ? old_ldt->nr_entries : 0;
0622 new_nr_entries = max(ldt_info.entry_number + 1, old_nr_entries);
0623
0624 error = -ENOMEM;
0625 new_ldt = alloc_ldt_struct(new_nr_entries);
0626 if (!new_ldt)
0627 goto out_unlock;
0628
0629 if (old_ldt)
0630 memcpy(new_ldt->entries, old_ldt->entries, old_nr_entries * LDT_ENTRY_SIZE);
0631
0632 new_ldt->entries[ldt_info.entry_number] = ldt;
0633 finalize_ldt_struct(new_ldt);
0634
0635
0636
0637
0638
0639
0640
0641 error = map_ldt_struct(mm, new_ldt, old_ldt ? !old_ldt->slot : 0);
0642 if (error) {
0643
0644
0645
0646
0647
0648 if (!WARN_ON_ONCE(old_ldt))
0649 free_ldt_pgtables(mm);
0650 free_ldt_struct(new_ldt);
0651 goto out_unlock;
0652 }
0653
0654 install_ldt(mm, new_ldt);
0655 unmap_ldt_struct(mm, old_ldt);
0656 free_ldt_struct(old_ldt);
0657 error = 0;
0658
0659 out_unlock:
0660 up_write(&mm->context.ldt_usr_sem);
0661 out:
0662 return error;
0663 }
0664
0665 SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr ,
0666 unsigned long , bytecount)
0667 {
0668 int ret = -ENOSYS;
0669
0670 switch (func) {
0671 case 0:
0672 ret = read_ldt(ptr, bytecount);
0673 break;
0674 case 1:
0675 ret = write_ldt(ptr, bytecount, 1);
0676 break;
0677 case 2:
0678 ret = read_default_ldt(ptr, bytecount);
0679 break;
0680 case 0x11:
0681 ret = write_ldt(ptr, bytecount, 0);
0682 break;
0683 }
0684
0685
0686
0687
0688
0689
0690
0691
0692
0693 return (unsigned int)ret;
0694 }