0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011 #include <linux/debugfs.h>
0012 #include <linux/kasan.h>
0013 #include <linux/mm.h>
0014 #include <linux/init.h>
0015 #include <linux/sched.h>
0016 #include <linux/seq_file.h>
0017 #include <linux/highmem.h>
0018 #include <linux/pci.h>
0019 #include <linux/ptdump.h>
0020
0021 #include <asm/e820/types.h>
0022
0023
0024
0025
0026
0027
0028 struct pg_state {
0029 struct ptdump_state ptdump;
0030 int level;
0031 pgprotval_t current_prot;
0032 pgprotval_t effective_prot;
0033 pgprotval_t prot_levels[5];
0034 unsigned long start_address;
0035 const struct addr_marker *marker;
0036 unsigned long lines;
0037 bool to_dmesg;
0038 bool check_wx;
0039 unsigned long wx_pages;
0040 struct seq_file *seq;
0041 };
0042
0043 struct addr_marker {
0044 unsigned long start_address;
0045 const char *name;
0046 unsigned long max_lines;
0047 };
0048
0049
0050
0051 #ifdef CONFIG_X86_64
0052
0053 enum address_markers_idx {
0054 USER_SPACE_NR = 0,
0055 KERNEL_SPACE_NR,
0056 #ifdef CONFIG_MODIFY_LDT_SYSCALL
0057 LDT_NR,
0058 #endif
0059 LOW_KERNEL_NR,
0060 VMALLOC_START_NR,
0061 VMEMMAP_START_NR,
0062 #ifdef CONFIG_KASAN
0063 KASAN_SHADOW_START_NR,
0064 KASAN_SHADOW_END_NR,
0065 #endif
0066 CPU_ENTRY_AREA_NR,
0067 #ifdef CONFIG_X86_ESPFIX64
0068 ESPFIX_START_NR,
0069 #endif
0070 #ifdef CONFIG_EFI
0071 EFI_END_NR,
0072 #endif
0073 HIGH_KERNEL_NR,
0074 MODULES_VADDR_NR,
0075 MODULES_END_NR,
0076 FIXADDR_START_NR,
0077 END_OF_SPACE_NR,
0078 };
0079
0080 static struct addr_marker address_markers[] = {
0081 [USER_SPACE_NR] = { 0, "User Space" },
0082 [KERNEL_SPACE_NR] = { (1UL << 63), "Kernel Space" },
0083 [LOW_KERNEL_NR] = { 0UL, "Low Kernel Mapping" },
0084 [VMALLOC_START_NR] = { 0UL, "vmalloc() Area" },
0085 [VMEMMAP_START_NR] = { 0UL, "Vmemmap" },
0086 #ifdef CONFIG_KASAN
0087
0088
0089
0090
0091 [KASAN_SHADOW_START_NR] = { 0UL, "KASAN shadow" },
0092 [KASAN_SHADOW_END_NR] = { 0UL, "KASAN shadow end" },
0093 #endif
0094 #ifdef CONFIG_MODIFY_LDT_SYSCALL
0095 [LDT_NR] = { 0UL, "LDT remap" },
0096 #endif
0097 [CPU_ENTRY_AREA_NR] = { CPU_ENTRY_AREA_BASE,"CPU entry Area" },
0098 #ifdef CONFIG_X86_ESPFIX64
0099 [ESPFIX_START_NR] = { ESPFIX_BASE_ADDR, "ESPfix Area", 16 },
0100 #endif
0101 #ifdef CONFIG_EFI
0102 [EFI_END_NR] = { EFI_VA_END, "EFI Runtime Services" },
0103 #endif
0104 [HIGH_KERNEL_NR] = { __START_KERNEL_map, "High Kernel Mapping" },
0105 [MODULES_VADDR_NR] = { MODULES_VADDR, "Modules" },
0106 [MODULES_END_NR] = { MODULES_END, "End Modules" },
0107 [FIXADDR_START_NR] = { FIXADDR_START, "Fixmap Area" },
0108 [END_OF_SPACE_NR] = { -1, NULL }
0109 };
0110
0111 #define INIT_PGD ((pgd_t *) &init_top_pgt)
0112
0113 #else
0114
0115 enum address_markers_idx {
0116 USER_SPACE_NR = 0,
0117 KERNEL_SPACE_NR,
0118 VMALLOC_START_NR,
0119 VMALLOC_END_NR,
0120 #ifdef CONFIG_HIGHMEM
0121 PKMAP_BASE_NR,
0122 #endif
0123 #ifdef CONFIG_MODIFY_LDT_SYSCALL
0124 LDT_NR,
0125 #endif
0126 CPU_ENTRY_AREA_NR,
0127 FIXADDR_START_NR,
0128 END_OF_SPACE_NR,
0129 };
0130
0131 static struct addr_marker address_markers[] = {
0132 [USER_SPACE_NR] = { 0, "User Space" },
0133 [KERNEL_SPACE_NR] = { PAGE_OFFSET, "Kernel Mapping" },
0134 [VMALLOC_START_NR] = { 0UL, "vmalloc() Area" },
0135 [VMALLOC_END_NR] = { 0UL, "vmalloc() End" },
0136 #ifdef CONFIG_HIGHMEM
0137 [PKMAP_BASE_NR] = { 0UL, "Persistent kmap() Area" },
0138 #endif
0139 #ifdef CONFIG_MODIFY_LDT_SYSCALL
0140 [LDT_NR] = { 0UL, "LDT remap" },
0141 #endif
0142 [CPU_ENTRY_AREA_NR] = { 0UL, "CPU entry area" },
0143 [FIXADDR_START_NR] = { 0UL, "Fixmap area" },
0144 [END_OF_SPACE_NR] = { -1, NULL }
0145 };
0146
0147 #define INIT_PGD (swapper_pg_dir)
0148
0149 #endif
0150
0151
0152 #define PTE_LEVEL_MULT (PAGE_SIZE)
0153 #define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT)
0154 #define PUD_LEVEL_MULT (PTRS_PER_PMD * PMD_LEVEL_MULT)
0155 #define P4D_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT)
0156 #define PGD_LEVEL_MULT (PTRS_PER_P4D * P4D_LEVEL_MULT)
0157
0158 #define pt_dump_seq_printf(m, to_dmesg, fmt, args...) \
0159 ({ \
0160 if (to_dmesg) \
0161 printk(KERN_INFO fmt, ##args); \
0162 else \
0163 if (m) \
0164 seq_printf(m, fmt, ##args); \
0165 })
0166
0167 #define pt_dump_cont_printf(m, to_dmesg, fmt, args...) \
0168 ({ \
0169 if (to_dmesg) \
0170 printk(KERN_CONT fmt, ##args); \
0171 else \
0172 if (m) \
0173 seq_printf(m, fmt, ##args); \
0174 })
0175
0176
0177
0178
0179 static void printk_prot(struct seq_file *m, pgprotval_t pr, int level, bool dmsg)
0180 {
0181 static const char * const level_name[] =
0182 { "pgd", "p4d", "pud", "pmd", "pte" };
0183
0184 if (!(pr & _PAGE_PRESENT)) {
0185
0186 pt_dump_cont_printf(m, dmsg, " ");
0187 } else {
0188 if (pr & _PAGE_USER)
0189 pt_dump_cont_printf(m, dmsg, "USR ");
0190 else
0191 pt_dump_cont_printf(m, dmsg, " ");
0192 if (pr & _PAGE_RW)
0193 pt_dump_cont_printf(m, dmsg, "RW ");
0194 else
0195 pt_dump_cont_printf(m, dmsg, "ro ");
0196 if (pr & _PAGE_PWT)
0197 pt_dump_cont_printf(m, dmsg, "PWT ");
0198 else
0199 pt_dump_cont_printf(m, dmsg, " ");
0200 if (pr & _PAGE_PCD)
0201 pt_dump_cont_printf(m, dmsg, "PCD ");
0202 else
0203 pt_dump_cont_printf(m, dmsg, " ");
0204
0205
0206 if (level <= 3 && pr & _PAGE_PSE)
0207 pt_dump_cont_printf(m, dmsg, "PSE ");
0208 else
0209 pt_dump_cont_printf(m, dmsg, " ");
0210 if ((level == 4 && pr & _PAGE_PAT) ||
0211 ((level == 3 || level == 2) && pr & _PAGE_PAT_LARGE))
0212 pt_dump_cont_printf(m, dmsg, "PAT ");
0213 else
0214 pt_dump_cont_printf(m, dmsg, " ");
0215 if (pr & _PAGE_GLOBAL)
0216 pt_dump_cont_printf(m, dmsg, "GLB ");
0217 else
0218 pt_dump_cont_printf(m, dmsg, " ");
0219 if (pr & _PAGE_NX)
0220 pt_dump_cont_printf(m, dmsg, "NX ");
0221 else
0222 pt_dump_cont_printf(m, dmsg, "x ");
0223 }
0224 pt_dump_cont_printf(m, dmsg, "%s\n", level_name[level]);
0225 }
0226
0227 static void note_wx(struct pg_state *st, unsigned long addr)
0228 {
0229 unsigned long npages;
0230
0231 npages = (addr - st->start_address) / PAGE_SIZE;
0232
0233 #ifdef CONFIG_PCI_BIOS
0234
0235
0236
0237
0238 if (pcibios_enabled && st->start_address >= PAGE_OFFSET + BIOS_BEGIN &&
0239 addr <= PAGE_OFFSET + BIOS_END) {
0240 pr_warn_once("x86/mm: PCI BIOS W+X mapping %lu pages\n", npages);
0241 return;
0242 }
0243 #endif
0244
0245 st->wx_pages += npages;
0246 WARN_ONCE(__supported_pte_mask & _PAGE_NX,
0247 "x86/mm: Found insecure W+X mapping at address %pS\n",
0248 (void *)st->start_address);
0249 }
0250
0251 static void effective_prot(struct ptdump_state *pt_st, int level, u64 val)
0252 {
0253 struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
0254 pgprotval_t prot = val & PTE_FLAGS_MASK;
0255 pgprotval_t effective;
0256
0257 if (level > 0) {
0258 pgprotval_t higher_prot = st->prot_levels[level - 1];
0259
0260 effective = (higher_prot & prot & (_PAGE_USER | _PAGE_RW)) |
0261 ((higher_prot | prot) & _PAGE_NX);
0262 } else {
0263 effective = prot;
0264 }
0265
0266 st->prot_levels[level] = effective;
0267 }
0268
0269
0270
0271
0272
0273
0274 static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
0275 u64 val)
0276 {
0277 struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
0278 pgprotval_t new_prot, new_eff;
0279 pgprotval_t cur, eff;
0280 static const char units[] = "BKMGTPE";
0281 struct seq_file *m = st->seq;
0282
0283 new_prot = val & PTE_FLAGS_MASK;
0284 if (!val)
0285 new_eff = 0;
0286 else
0287 new_eff = st->prot_levels[level];
0288
0289
0290
0291
0292
0293
0294 cur = st->current_prot;
0295 eff = st->effective_prot;
0296
0297 if (st->level == -1) {
0298
0299 st->current_prot = new_prot;
0300 st->effective_prot = new_eff;
0301 st->level = level;
0302 st->marker = address_markers;
0303 st->lines = 0;
0304 pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n",
0305 st->marker->name);
0306 } else if (new_prot != cur || new_eff != eff || level != st->level ||
0307 addr >= st->marker[1].start_address) {
0308 const char *unit = units;
0309 unsigned long delta;
0310 int width = sizeof(unsigned long) * 2;
0311
0312 if (st->check_wx && (eff & _PAGE_RW) && !(eff & _PAGE_NX))
0313 note_wx(st, addr);
0314
0315
0316
0317
0318 if (!st->marker->max_lines ||
0319 st->lines < st->marker->max_lines) {
0320 pt_dump_seq_printf(m, st->to_dmesg,
0321 "0x%0*lx-0x%0*lx ",
0322 width, st->start_address,
0323 width, addr);
0324
0325 delta = addr - st->start_address;
0326 while (!(delta & 1023) && unit[1]) {
0327 delta >>= 10;
0328 unit++;
0329 }
0330 pt_dump_cont_printf(m, st->to_dmesg, "%9lu%c ",
0331 delta, *unit);
0332 printk_prot(m, st->current_prot, st->level,
0333 st->to_dmesg);
0334 }
0335 st->lines++;
0336
0337
0338
0339
0340
0341
0342 if (addr >= st->marker[1].start_address) {
0343 if (st->marker->max_lines &&
0344 st->lines > st->marker->max_lines) {
0345 unsigned long nskip =
0346 st->lines - st->marker->max_lines;
0347 pt_dump_seq_printf(m, st->to_dmesg,
0348 "... %lu entr%s skipped ... \n",
0349 nskip,
0350 nskip == 1 ? "y" : "ies");
0351 }
0352 st->marker++;
0353 st->lines = 0;
0354 pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n",
0355 st->marker->name);
0356 }
0357
0358 st->start_address = addr;
0359 st->current_prot = new_prot;
0360 st->effective_prot = new_eff;
0361 st->level = level;
0362 }
0363 }
0364
0365 static void ptdump_walk_pgd_level_core(struct seq_file *m,
0366 struct mm_struct *mm, pgd_t *pgd,
0367 bool checkwx, bool dmesg)
0368 {
0369 const struct ptdump_range ptdump_ranges[] = {
0370 #ifdef CONFIG_X86_64
0371 {0, PTRS_PER_PGD * PGD_LEVEL_MULT / 2},
0372 {GUARD_HOLE_END_ADDR, ~0UL},
0373 #else
0374 {0, ~0UL},
0375 #endif
0376 {0, 0}
0377 };
0378
0379 struct pg_state st = {
0380 .ptdump = {
0381 .note_page = note_page,
0382 .effective_prot = effective_prot,
0383 .range = ptdump_ranges
0384 },
0385 .level = -1,
0386 .to_dmesg = dmesg,
0387 .check_wx = checkwx,
0388 .seq = m
0389 };
0390
0391 ptdump_walk_pgd(&st.ptdump, mm, pgd);
0392
0393 if (!checkwx)
0394 return;
0395 if (st.wx_pages)
0396 pr_info("x86/mm: Checked W+X mappings: FAILED, %lu W+X pages found.\n",
0397 st.wx_pages);
0398 else
0399 pr_info("x86/mm: Checked W+X mappings: passed, no W+X pages found.\n");
0400 }
0401
0402 void ptdump_walk_pgd_level(struct seq_file *m, struct mm_struct *mm)
0403 {
0404 ptdump_walk_pgd_level_core(m, mm, mm->pgd, false, true);
0405 }
0406
0407 void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm,
0408 bool user)
0409 {
0410 pgd_t *pgd = mm->pgd;
0411 #ifdef CONFIG_PAGE_TABLE_ISOLATION
0412 if (user && boot_cpu_has(X86_FEATURE_PTI))
0413 pgd = kernel_to_user_pgdp(pgd);
0414 #endif
0415 ptdump_walk_pgd_level_core(m, mm, pgd, false, false);
0416 }
0417 EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level_debugfs);
0418
0419 void ptdump_walk_user_pgd_level_checkwx(void)
0420 {
0421 #ifdef CONFIG_PAGE_TABLE_ISOLATION
0422 pgd_t *pgd = INIT_PGD;
0423
0424 if (!(__supported_pte_mask & _PAGE_NX) ||
0425 !boot_cpu_has(X86_FEATURE_PTI))
0426 return;
0427
0428 pr_info("x86/mm: Checking user space page tables\n");
0429 pgd = kernel_to_user_pgdp(pgd);
0430 ptdump_walk_pgd_level_core(NULL, &init_mm, pgd, true, false);
0431 #endif
0432 }
0433
0434 void ptdump_walk_pgd_level_checkwx(void)
0435 {
0436 ptdump_walk_pgd_level_core(NULL, &init_mm, INIT_PGD, true, false);
0437 }
0438
0439 static int __init pt_dump_init(void)
0440 {
0441
0442
0443
0444
0445 #ifdef CONFIG_X86_64
0446 address_markers[LOW_KERNEL_NR].start_address = PAGE_OFFSET;
0447 address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
0448 address_markers[VMEMMAP_START_NR].start_address = VMEMMAP_START;
0449 #ifdef CONFIG_MODIFY_LDT_SYSCALL
0450 address_markers[LDT_NR].start_address = LDT_BASE_ADDR;
0451 #endif
0452 #ifdef CONFIG_KASAN
0453 address_markers[KASAN_SHADOW_START_NR].start_address = KASAN_SHADOW_START;
0454 address_markers[KASAN_SHADOW_END_NR].start_address = KASAN_SHADOW_END;
0455 #endif
0456 #endif
0457 #ifdef CONFIG_X86_32
0458 address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
0459 address_markers[VMALLOC_END_NR].start_address = VMALLOC_END;
0460 # ifdef CONFIG_HIGHMEM
0461 address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE;
0462 # endif
0463 address_markers[FIXADDR_START_NR].start_address = FIXADDR_START;
0464 address_markers[CPU_ENTRY_AREA_NR].start_address = CPU_ENTRY_AREA_BASE;
0465 # ifdef CONFIG_MODIFY_LDT_SYSCALL
0466 address_markers[LDT_NR].start_address = LDT_BASE_ADDR;
0467 # endif
0468 #endif
0469 return 0;
0470 }
0471 __initcall(pt_dump_init);