0001 #include <linux/gfp.h>
0002 #include <linux/initrd.h>
0003 #include <linux/ioport.h>
0004 #include <linux/swap.h>
0005 #include <linux/memblock.h>
0006 #include <linux/swapfile.h>
0007 #include <linux/swapops.h>
0008 #include <linux/kmemleak.h>
0009 #include <linux/sched/task.h>
0010
0011 #include <asm/set_memory.h>
0012 #include <asm/e820/api.h>
0013 #include <asm/init.h>
0014 #include <asm/page.h>
0015 #include <asm/page_types.h>
0016 #include <asm/sections.h>
0017 #include <asm/setup.h>
0018 #include <asm/tlbflush.h>
0019 #include <asm/tlb.h>
0020 #include <asm/proto.h>
0021 #include <asm/dma.h> /* for MAX_DMA_PFN */
0022 #include <asm/microcode.h>
0023 #include <asm/kaslr.h>
0024 #include <asm/hypervisor.h>
0025 #include <asm/cpufeature.h>
0026 #include <asm/pti.h>
0027 #include <asm/text-patching.h>
0028 #include <asm/memtype.h>
0029
0030
0031
0032
0033
0034 #include <trace/events/tlb.h>
0035
0036 #include "mm_internal.h"
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052 static uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM] = {
0053 [_PAGE_CACHE_MODE_WB ] = 0 | 0 ,
0054 [_PAGE_CACHE_MODE_WC ] = 0 | _PAGE_PCD,
0055 [_PAGE_CACHE_MODE_UC_MINUS] = 0 | _PAGE_PCD,
0056 [_PAGE_CACHE_MODE_UC ] = _PAGE_PWT | _PAGE_PCD,
0057 [_PAGE_CACHE_MODE_WT ] = 0 | _PAGE_PCD,
0058 [_PAGE_CACHE_MODE_WP ] = 0 | _PAGE_PCD,
0059 };
0060
0061 unsigned long cachemode2protval(enum page_cache_mode pcm)
0062 {
0063 if (likely(pcm == 0))
0064 return 0;
0065 return __cachemode2pte_tbl[pcm];
0066 }
0067 EXPORT_SYMBOL(cachemode2protval);
0068
0069 static uint8_t __pte2cachemode_tbl[8] = {
0070 [__pte2cm_idx( 0 | 0 | 0 )] = _PAGE_CACHE_MODE_WB,
0071 [__pte2cm_idx(_PAGE_PWT | 0 | 0 )] = _PAGE_CACHE_MODE_UC_MINUS,
0072 [__pte2cm_idx( 0 | _PAGE_PCD | 0 )] = _PAGE_CACHE_MODE_UC_MINUS,
0073 [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | 0 )] = _PAGE_CACHE_MODE_UC,
0074 [__pte2cm_idx( 0 | 0 | _PAGE_PAT)] = _PAGE_CACHE_MODE_WB,
0075 [__pte2cm_idx(_PAGE_PWT | 0 | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS,
0076 [__pte2cm_idx(0 | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS,
0077 [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC,
0078 };
0079
0080
0081
0082
0083
0084
0085
0086
0087
0088
0089 bool x86_has_pat_wp(void)
0090 {
0091 uint16_t prot = __cachemode2pte_tbl[_PAGE_CACHE_MODE_WP];
0092
0093 return __pte2cachemode_tbl[__pte2cm_idx(prot)] == _PAGE_CACHE_MODE_WP;
0094 }
0095
0096 enum page_cache_mode pgprot2cachemode(pgprot_t pgprot)
0097 {
0098 unsigned long masked;
0099
0100 masked = pgprot_val(pgprot) & _PAGE_CACHE_MASK;
0101 if (likely(masked == 0))
0102 return 0;
0103 return __pte2cachemode_tbl[__pte2cm_idx(masked)];
0104 }
0105
0106 static unsigned long __initdata pgt_buf_start;
0107 static unsigned long __initdata pgt_buf_end;
0108 static unsigned long __initdata pgt_buf_top;
0109
0110 static unsigned long min_pfn_mapped;
0111
0112 static bool __initdata can_use_brk_pgt = true;
0113
0114
0115
0116
0117
0118
0119
0120
0121
0122
0123 __ref void *alloc_low_pages(unsigned int num)
0124 {
0125 unsigned long pfn;
0126 int i;
0127
0128 if (after_bootmem) {
0129 unsigned int order;
0130
0131 order = get_order((unsigned long)num << PAGE_SHIFT);
0132 return (void *)__get_free_pages(GFP_ATOMIC | __GFP_ZERO, order);
0133 }
0134
0135 if ((pgt_buf_end + num) > pgt_buf_top || !can_use_brk_pgt) {
0136 unsigned long ret = 0;
0137
0138 if (min_pfn_mapped < max_pfn_mapped) {
0139 ret = memblock_phys_alloc_range(
0140 PAGE_SIZE * num, PAGE_SIZE,
0141 min_pfn_mapped << PAGE_SHIFT,
0142 max_pfn_mapped << PAGE_SHIFT);
0143 }
0144 if (!ret && can_use_brk_pgt)
0145 ret = __pa(extend_brk(PAGE_SIZE * num, PAGE_SIZE));
0146
0147 if (!ret)
0148 panic("alloc_low_pages: can not alloc memory");
0149
0150 pfn = ret >> PAGE_SHIFT;
0151 } else {
0152 pfn = pgt_buf_end;
0153 pgt_buf_end += num;
0154 }
0155
0156 for (i = 0; i < num; i++) {
0157 void *adr;
0158
0159 adr = __va((pfn + i) << PAGE_SHIFT);
0160 clear_page(adr);
0161 }
0162
0163 return __va(pfn << PAGE_SHIFT);
0164 }
0165
0166
0167
0168
0169
0170
0171
0172
0173
0174 #ifndef CONFIG_X86_5LEVEL
0175 #define INIT_PGD_PAGE_TABLES 3
0176 #else
0177 #define INIT_PGD_PAGE_TABLES 4
0178 #endif
0179
0180 #ifndef CONFIG_RANDOMIZE_MEMORY
0181 #define INIT_PGD_PAGE_COUNT (2 * INIT_PGD_PAGE_TABLES)
0182 #else
0183 #define INIT_PGD_PAGE_COUNT (4 * INIT_PGD_PAGE_TABLES)
0184 #endif
0185
0186 #define INIT_PGT_BUF_SIZE (INIT_PGD_PAGE_COUNT * PAGE_SIZE)
0187 RESERVE_BRK(early_pgt_alloc, INIT_PGT_BUF_SIZE);
0188 void __init early_alloc_pgt_buf(void)
0189 {
0190 unsigned long tables = INIT_PGT_BUF_SIZE;
0191 phys_addr_t base;
0192
0193 base = __pa(extend_brk(tables, PAGE_SIZE));
0194
0195 pgt_buf_start = base >> PAGE_SHIFT;
0196 pgt_buf_end = pgt_buf_start;
0197 pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT);
0198 }
0199
0200 int after_bootmem;
0201
0202 early_param_on_off("gbpages", "nogbpages", direct_gbpages, CONFIG_X86_DIRECT_GBPAGES);
0203
0204 struct map_range {
0205 unsigned long start;
0206 unsigned long end;
0207 unsigned page_size_mask;
0208 };
0209
0210 static int page_size_mask;
0211
0212
0213
0214
0215
0216
0217 static inline void cr4_set_bits_and_update_boot(unsigned long mask)
0218 {
0219 mmu_cr4_features |= mask;
0220 if (trampoline_cr4_features)
0221 *trampoline_cr4_features = mmu_cr4_features;
0222 cr4_set_bits(mask);
0223 }
0224
0225 static void __init probe_page_size_mask(void)
0226 {
0227
0228
0229
0230
0231
0232 if (boot_cpu_has(X86_FEATURE_PSE) && !debug_pagealloc_enabled())
0233 page_size_mask |= 1 << PG_LEVEL_2M;
0234 else
0235 direct_gbpages = 0;
0236
0237
0238 if (boot_cpu_has(X86_FEATURE_PSE))
0239 cr4_set_bits_and_update_boot(X86_CR4_PSE);
0240
0241
0242 __supported_pte_mask &= ~_PAGE_GLOBAL;
0243 if (boot_cpu_has(X86_FEATURE_PGE)) {
0244 cr4_set_bits_and_update_boot(X86_CR4_PGE);
0245 __supported_pte_mask |= _PAGE_GLOBAL;
0246 }
0247
0248
0249 __default_kernel_pte_mask = __supported_pte_mask;
0250
0251 if (cpu_feature_enabled(X86_FEATURE_PTI))
0252 __default_kernel_pte_mask &= ~_PAGE_GLOBAL;
0253
0254
0255 if (direct_gbpages && boot_cpu_has(X86_FEATURE_GBPAGES)) {
0256 printk(KERN_INFO "Using GB pages for direct mapping\n");
0257 page_size_mask |= 1 << PG_LEVEL_1G;
0258 } else {
0259 direct_gbpages = 0;
0260 }
0261 }
0262
0263 static void setup_pcid(void)
0264 {
0265 if (!IS_ENABLED(CONFIG_X86_64))
0266 return;
0267
0268 if (!boot_cpu_has(X86_FEATURE_PCID))
0269 return;
0270
0271 if (boot_cpu_has(X86_FEATURE_PGE)) {
0272
0273
0274
0275
0276
0277
0278
0279
0280
0281
0282
0283 cr4_set_bits(X86_CR4_PCIDE);
0284
0285
0286
0287
0288
0289
0290
0291 if (boot_cpu_has(X86_FEATURE_INVPCID))
0292 setup_force_cpu_cap(X86_FEATURE_INVPCID_SINGLE);
0293 } else {
0294
0295
0296
0297
0298
0299
0300
0301 setup_clear_cpu_cap(X86_FEATURE_PCID);
0302 }
0303 }
0304
0305 #ifdef CONFIG_X86_32
0306 #define NR_RANGE_MR 3
0307 #else
0308 #define NR_RANGE_MR 5
0309 #endif
0310
0311 static int __meminit save_mr(struct map_range *mr, int nr_range,
0312 unsigned long start_pfn, unsigned long end_pfn,
0313 unsigned long page_size_mask)
0314 {
0315 if (start_pfn < end_pfn) {
0316 if (nr_range >= NR_RANGE_MR)
0317 panic("run out of range for init_memory_mapping\n");
0318 mr[nr_range].start = start_pfn<<PAGE_SHIFT;
0319 mr[nr_range].end = end_pfn<<PAGE_SHIFT;
0320 mr[nr_range].page_size_mask = page_size_mask;
0321 nr_range++;
0322 }
0323
0324 return nr_range;
0325 }
0326
0327
0328
0329
0330
0331 static void __ref adjust_range_page_size_mask(struct map_range *mr,
0332 int nr_range)
0333 {
0334 int i;
0335
0336 for (i = 0; i < nr_range; i++) {
0337 if ((page_size_mask & (1<<PG_LEVEL_2M)) &&
0338 !(mr[i].page_size_mask & (1<<PG_LEVEL_2M))) {
0339 unsigned long start = round_down(mr[i].start, PMD_SIZE);
0340 unsigned long end = round_up(mr[i].end, PMD_SIZE);
0341
0342 #ifdef CONFIG_X86_32
0343 if ((end >> PAGE_SHIFT) > max_low_pfn)
0344 continue;
0345 #endif
0346
0347 if (memblock_is_region_memory(start, end - start))
0348 mr[i].page_size_mask |= 1<<PG_LEVEL_2M;
0349 }
0350 if ((page_size_mask & (1<<PG_LEVEL_1G)) &&
0351 !(mr[i].page_size_mask & (1<<PG_LEVEL_1G))) {
0352 unsigned long start = round_down(mr[i].start, PUD_SIZE);
0353 unsigned long end = round_up(mr[i].end, PUD_SIZE);
0354
0355 if (memblock_is_region_memory(start, end - start))
0356 mr[i].page_size_mask |= 1<<PG_LEVEL_1G;
0357 }
0358 }
0359 }
0360
0361 static const char *page_size_string(struct map_range *mr)
0362 {
0363 static const char str_1g[] = "1G";
0364 static const char str_2m[] = "2M";
0365 static const char str_4m[] = "4M";
0366 static const char str_4k[] = "4k";
0367
0368 if (mr->page_size_mask & (1<<PG_LEVEL_1G))
0369 return str_1g;
0370
0371
0372
0373
0374
0375 if (IS_ENABLED(CONFIG_X86_32) &&
0376 !IS_ENABLED(CONFIG_X86_PAE) &&
0377 mr->page_size_mask & (1<<PG_LEVEL_2M))
0378 return str_4m;
0379
0380 if (mr->page_size_mask & (1<<PG_LEVEL_2M))
0381 return str_2m;
0382
0383 return str_4k;
0384 }
0385
0386 static int __meminit split_mem_range(struct map_range *mr, int nr_range,
0387 unsigned long start,
0388 unsigned long end)
0389 {
0390 unsigned long start_pfn, end_pfn, limit_pfn;
0391 unsigned long pfn;
0392 int i;
0393
0394 limit_pfn = PFN_DOWN(end);
0395
0396
0397 pfn = start_pfn = PFN_DOWN(start);
0398 #ifdef CONFIG_X86_32
0399
0400
0401
0402
0403
0404
0405 if (pfn == 0)
0406 end_pfn = PFN_DOWN(PMD_SIZE);
0407 else
0408 end_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE));
0409 #else
0410 end_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE));
0411 #endif
0412 if (end_pfn > limit_pfn)
0413 end_pfn = limit_pfn;
0414 if (start_pfn < end_pfn) {
0415 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
0416 pfn = end_pfn;
0417 }
0418
0419
0420 start_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE));
0421 #ifdef CONFIG_X86_32
0422 end_pfn = round_down(limit_pfn, PFN_DOWN(PMD_SIZE));
0423 #else
0424 end_pfn = round_up(pfn, PFN_DOWN(PUD_SIZE));
0425 if (end_pfn > round_down(limit_pfn, PFN_DOWN(PMD_SIZE)))
0426 end_pfn = round_down(limit_pfn, PFN_DOWN(PMD_SIZE));
0427 #endif
0428
0429 if (start_pfn < end_pfn) {
0430 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
0431 page_size_mask & (1<<PG_LEVEL_2M));
0432 pfn = end_pfn;
0433 }
0434
0435 #ifdef CONFIG_X86_64
0436
0437 start_pfn = round_up(pfn, PFN_DOWN(PUD_SIZE));
0438 end_pfn = round_down(limit_pfn, PFN_DOWN(PUD_SIZE));
0439 if (start_pfn < end_pfn) {
0440 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
0441 page_size_mask &
0442 ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G)));
0443 pfn = end_pfn;
0444 }
0445
0446
0447 start_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE));
0448 end_pfn = round_down(limit_pfn, PFN_DOWN(PMD_SIZE));
0449 if (start_pfn < end_pfn) {
0450 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
0451 page_size_mask & (1<<PG_LEVEL_2M));
0452 pfn = end_pfn;
0453 }
0454 #endif
0455
0456
0457 start_pfn = pfn;
0458 end_pfn = limit_pfn;
0459 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
0460
0461 if (!after_bootmem)
0462 adjust_range_page_size_mask(mr, nr_range);
0463
0464
0465 for (i = 0; nr_range > 1 && i < nr_range - 1; i++) {
0466 unsigned long old_start;
0467 if (mr[i].end != mr[i+1].start ||
0468 mr[i].page_size_mask != mr[i+1].page_size_mask)
0469 continue;
0470
0471 old_start = mr[i].start;
0472 memmove(&mr[i], &mr[i+1],
0473 (nr_range - 1 - i) * sizeof(struct map_range));
0474 mr[i--].start = old_start;
0475 nr_range--;
0476 }
0477
0478 for (i = 0; i < nr_range; i++)
0479 pr_debug(" [mem %#010lx-%#010lx] page %s\n",
0480 mr[i].start, mr[i].end - 1,
0481 page_size_string(&mr[i]));
0482
0483 return nr_range;
0484 }
0485
0486 struct range pfn_mapped[E820_MAX_ENTRIES];
0487 int nr_pfn_mapped;
0488
0489 static void add_pfn_range_mapped(unsigned long start_pfn, unsigned long end_pfn)
0490 {
0491 nr_pfn_mapped = add_range_with_merge(pfn_mapped, E820_MAX_ENTRIES,
0492 nr_pfn_mapped, start_pfn, end_pfn);
0493 nr_pfn_mapped = clean_sort_range(pfn_mapped, E820_MAX_ENTRIES);
0494
0495 max_pfn_mapped = max(max_pfn_mapped, end_pfn);
0496
0497 if (start_pfn < (1UL<<(32-PAGE_SHIFT)))
0498 max_low_pfn_mapped = max(max_low_pfn_mapped,
0499 min(end_pfn, 1UL<<(32-PAGE_SHIFT)));
0500 }
0501
0502 bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn)
0503 {
0504 int i;
0505
0506 for (i = 0; i < nr_pfn_mapped; i++)
0507 if ((start_pfn >= pfn_mapped[i].start) &&
0508 (end_pfn <= pfn_mapped[i].end))
0509 return true;
0510
0511 return false;
0512 }
0513
0514
0515
0516
0517
0518
0519 unsigned long __ref init_memory_mapping(unsigned long start,
0520 unsigned long end, pgprot_t prot)
0521 {
0522 struct map_range mr[NR_RANGE_MR];
0523 unsigned long ret = 0;
0524 int nr_range, i;
0525
0526 pr_debug("init_memory_mapping: [mem %#010lx-%#010lx]\n",
0527 start, end - 1);
0528
0529 memset(mr, 0, sizeof(mr));
0530 nr_range = split_mem_range(mr, 0, start, end);
0531
0532 for (i = 0; i < nr_range; i++)
0533 ret = kernel_physical_mapping_init(mr[i].start, mr[i].end,
0534 mr[i].page_size_mask,
0535 prot);
0536
0537 add_pfn_range_mapped(start >> PAGE_SHIFT, ret >> PAGE_SHIFT);
0538
0539 return ret >> PAGE_SHIFT;
0540 }
0541
0542
0543
0544
0545
0546
0547
0548
0549
0550
0551
0552
0553
0554
0555 static unsigned long __init init_range_memory_mapping(
0556 unsigned long r_start,
0557 unsigned long r_end)
0558 {
0559 unsigned long start_pfn, end_pfn;
0560 unsigned long mapped_ram_size = 0;
0561 int i;
0562
0563 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) {
0564 u64 start = clamp_val(PFN_PHYS(start_pfn), r_start, r_end);
0565 u64 end = clamp_val(PFN_PHYS(end_pfn), r_start, r_end);
0566 if (start >= end)
0567 continue;
0568
0569
0570
0571
0572
0573 can_use_brk_pgt = max(start, (u64)pgt_buf_end<<PAGE_SHIFT) >=
0574 min(end, (u64)pgt_buf_top<<PAGE_SHIFT);
0575 init_memory_mapping(start, end, PAGE_KERNEL);
0576 mapped_ram_size += end - start;
0577 can_use_brk_pgt = true;
0578 }
0579
0580 return mapped_ram_size;
0581 }
0582
0583 static unsigned long __init get_new_step_size(unsigned long step_size)
0584 {
0585
0586
0587
0588
0589
0590
0591
0592
0593
0594
0595
0596
0597
0598
0599 return step_size << (PMD_SHIFT - PAGE_SHIFT - 1);
0600 }
0601
0602
0603
0604
0605
0606
0607
0608
0609
0610
0611
0612 static void __init memory_map_top_down(unsigned long map_start,
0613 unsigned long map_end)
0614 {
0615 unsigned long real_end, last_start;
0616 unsigned long step_size;
0617 unsigned long addr;
0618 unsigned long mapped_ram_size = 0;
0619
0620
0621
0622
0623
0624
0625
0626
0627
0628 addr = memblock_phys_alloc_range(PMD_SIZE, PMD_SIZE, map_start,
0629 map_end);
0630 memblock_phys_free(addr, PMD_SIZE);
0631 real_end = addr + PMD_SIZE;
0632
0633
0634 step_size = PMD_SIZE;
0635 max_pfn_mapped = 0;
0636 min_pfn_mapped = real_end >> PAGE_SHIFT;
0637 last_start = real_end;
0638
0639
0640
0641
0642
0643
0644
0645 while (last_start > map_start) {
0646 unsigned long start;
0647
0648 if (last_start > step_size) {
0649 start = round_down(last_start - 1, step_size);
0650 if (start < map_start)
0651 start = map_start;
0652 } else
0653 start = map_start;
0654 mapped_ram_size += init_range_memory_mapping(start,
0655 last_start);
0656 last_start = start;
0657 min_pfn_mapped = last_start >> PAGE_SHIFT;
0658 if (mapped_ram_size >= step_size)
0659 step_size = get_new_step_size(step_size);
0660 }
0661
0662 if (real_end < map_end)
0663 init_range_memory_mapping(real_end, map_end);
0664 }
0665
0666
0667
0668
0669
0670
0671
0672
0673
0674
0675
0676
0677 static void __init memory_map_bottom_up(unsigned long map_start,
0678 unsigned long map_end)
0679 {
0680 unsigned long next, start;
0681 unsigned long mapped_ram_size = 0;
0682
0683 unsigned long step_size = PMD_SIZE;
0684
0685 start = map_start;
0686 min_pfn_mapped = start >> PAGE_SHIFT;
0687
0688
0689
0690
0691
0692
0693
0694 while (start < map_end) {
0695 if (step_size && map_end - start > step_size) {
0696 next = round_up(start + 1, step_size);
0697 if (next > map_end)
0698 next = map_end;
0699 } else {
0700 next = map_end;
0701 }
0702
0703 mapped_ram_size += init_range_memory_mapping(start, next);
0704 start = next;
0705
0706 if (mapped_ram_size >= step_size)
0707 step_size = get_new_step_size(step_size);
0708 }
0709 }
0710
0711
0712
0713
0714
0715
0716
0717
0718
0719
0720
0721
0722
0723 static void __init init_trampoline(void)
0724 {
0725 #ifdef CONFIG_X86_64
0726
0727
0728
0729
0730
0731 if (!kaslr_memory_enabled())
0732 trampoline_pgd_entry = init_top_pgt[pgd_index(__PAGE_OFFSET)];
0733 else
0734 init_trampoline_kaslr();
0735 #endif
0736 }
0737
0738 void __init init_mem_mapping(void)
0739 {
0740 unsigned long end;
0741
0742 pti_check_boottime_disable();
0743 probe_page_size_mask();
0744 setup_pcid();
0745
0746 #ifdef CONFIG_X86_64
0747 end = max_pfn << PAGE_SHIFT;
0748 #else
0749 end = max_low_pfn << PAGE_SHIFT;
0750 #endif
0751
0752
0753 init_memory_mapping(0, ISA_END_ADDRESS, PAGE_KERNEL);
0754
0755
0756 init_trampoline();
0757
0758
0759
0760
0761
0762 if (memblock_bottom_up()) {
0763 unsigned long kernel_end = __pa_symbol(_end);
0764
0765
0766
0767
0768
0769
0770
0771
0772 memory_map_bottom_up(kernel_end, end);
0773 memory_map_bottom_up(ISA_END_ADDRESS, kernel_end);
0774 } else {
0775 memory_map_top_down(ISA_END_ADDRESS, end);
0776 }
0777
0778 #ifdef CONFIG_X86_64
0779 if (max_pfn > max_low_pfn) {
0780
0781 max_low_pfn = max_pfn;
0782 }
0783 #else
0784 early_ioremap_page_table_range_init();
0785 #endif
0786
0787 load_cr3(swapper_pg_dir);
0788 __flush_tlb_all();
0789
0790 x86_init.hyper.init_mem_mapping();
0791
0792 early_memtest(0, max_pfn_mapped << PAGE_SHIFT);
0793 }
0794
0795
0796
0797
0798
0799 void __init poking_init(void)
0800 {
0801 spinlock_t *ptl;
0802 pte_t *ptep;
0803
0804 poking_mm = copy_init_mm();
0805 BUG_ON(!poking_mm);
0806
0807
0808
0809
0810
0811
0812 poking_addr = TASK_UNMAPPED_BASE;
0813 if (IS_ENABLED(CONFIG_RANDOMIZE_BASE))
0814 poking_addr += (kaslr_get_random_long("Poking") & PAGE_MASK) %
0815 (TASK_SIZE - TASK_UNMAPPED_BASE - 3 * PAGE_SIZE);
0816
0817 if (((poking_addr + PAGE_SIZE) & ~PMD_MASK) == 0)
0818 poking_addr += PAGE_SIZE;
0819
0820
0821
0822
0823
0824
0825 ptep = get_locked_pte(poking_mm, poking_addr, &ptl);
0826 BUG_ON(!ptep);
0827 pte_unmap_unlock(ptep, ptl);
0828 }
0829
0830
0831
0832
0833
0834
0835
0836
0837
0838
0839
0840
0841
0842 int devmem_is_allowed(unsigned long pagenr)
0843 {
0844 if (region_intersects(PFN_PHYS(pagenr), PAGE_SIZE,
0845 IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE)
0846 != REGION_DISJOINT) {
0847
0848
0849
0850
0851 if (pagenr < 256)
0852 return 2;
0853
0854 return 0;
0855 }
0856
0857
0858
0859
0860
0861 if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) {
0862
0863 if (pagenr < 256)
0864 return 1;
0865
0866 return 0;
0867 }
0868
0869 return 1;
0870 }
0871
0872 void free_init_pages(const char *what, unsigned long begin, unsigned long end)
0873 {
0874 unsigned long begin_aligned, end_aligned;
0875
0876
0877 begin_aligned = PAGE_ALIGN(begin);
0878 end_aligned = end & PAGE_MASK;
0879
0880 if (WARN_ON(begin_aligned != begin || end_aligned != end)) {
0881 begin = begin_aligned;
0882 end = end_aligned;
0883 }
0884
0885 if (begin >= end)
0886 return;
0887
0888
0889
0890
0891
0892
0893 if (debug_pagealloc_enabled()) {
0894 pr_info("debug: unmapping init [mem %#010lx-%#010lx]\n",
0895 begin, end - 1);
0896
0897
0898
0899
0900 kmemleak_free_part((void *)begin, end - begin);
0901 set_memory_np(begin, (end - begin) >> PAGE_SHIFT);
0902 } else {
0903
0904
0905
0906
0907
0908 set_memory_nx(begin, (end - begin) >> PAGE_SHIFT);
0909 set_memory_rw(begin, (end - begin) >> PAGE_SHIFT);
0910
0911 free_reserved_area((void *)begin, (void *)end,
0912 POISON_FREE_INITMEM, what);
0913 }
0914 }
0915
0916
0917
0918
0919
0920
0921 void free_kernel_image_pages(const char *what, void *begin, void *end)
0922 {
0923 unsigned long begin_ul = (unsigned long)begin;
0924 unsigned long end_ul = (unsigned long)end;
0925 unsigned long len_pages = (end_ul - begin_ul) >> PAGE_SHIFT;
0926
0927 free_init_pages(what, begin_ul, end_ul);
0928
0929
0930
0931
0932
0933
0934
0935
0936
0937
0938
0939
0940
0941
0942
0943
0944 if (IS_ENABLED(CONFIG_X86_64) && cpu_feature_enabled(X86_FEATURE_PTI))
0945 set_memory_np_noalias(begin_ul, len_pages);
0946 }
0947
0948 void __ref free_initmem(void)
0949 {
0950 e820__reallocate_tables();
0951
0952 mem_encrypt_free_decrypted_mem();
0953
0954 free_kernel_image_pages("unused kernel image (initmem)",
0955 &__init_begin, &__init_end);
0956 }
0957
0958 #ifdef CONFIG_BLK_DEV_INITRD
0959 void __init free_initrd_mem(unsigned long start, unsigned long end)
0960 {
0961
0962
0963
0964
0965
0966
0967
0968
0969
0970 free_init_pages("initrd", start, PAGE_ALIGN(end));
0971 }
0972 #endif
0973
0974
0975
0976
0977
0978
0979
0980
0981
0982 void __init memblock_find_dma_reserve(void)
0983 {
0984 #ifdef CONFIG_X86_64
0985 u64 nr_pages = 0, nr_free_pages = 0;
0986 unsigned long start_pfn, end_pfn;
0987 phys_addr_t start_addr, end_addr;
0988 int i;
0989 u64 u;
0990
0991
0992
0993
0994
0995 nr_pages = 0;
0996 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) {
0997 start_pfn = min(start_pfn, MAX_DMA_PFN);
0998 end_pfn = min(end_pfn, MAX_DMA_PFN);
0999
1000 nr_pages += end_pfn - start_pfn;
1001 }
1002
1003
1004
1005
1006
1007
1008 nr_free_pages = 0;
1009 for_each_free_mem_range(u, NUMA_NO_NODE, MEMBLOCK_NONE, &start_addr, &end_addr, NULL) {
1010 start_pfn = min_t(unsigned long, PFN_UP(start_addr), MAX_DMA_PFN);
1011 end_pfn = min_t(unsigned long, PFN_DOWN(end_addr), MAX_DMA_PFN);
1012
1013 if (start_pfn < end_pfn)
1014 nr_free_pages += end_pfn - start_pfn;
1015 }
1016
1017 set_dma_reserve(nr_pages - nr_free_pages);
1018 #endif
1019 }
1020
1021 void __init zone_sizes_init(void)
1022 {
1023 unsigned long max_zone_pfns[MAX_NR_ZONES];
1024
1025 memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
1026
1027 #ifdef CONFIG_ZONE_DMA
1028 max_zone_pfns[ZONE_DMA] = min(MAX_DMA_PFN, max_low_pfn);
1029 #endif
1030 #ifdef CONFIG_ZONE_DMA32
1031 max_zone_pfns[ZONE_DMA32] = min(MAX_DMA32_PFN, max_low_pfn);
1032 #endif
1033 max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
1034 #ifdef CONFIG_HIGHMEM
1035 max_zone_pfns[ZONE_HIGHMEM] = max_pfn;
1036 #endif
1037
1038 free_area_init(max_zone_pfns);
1039 }
1040
1041 __visible DEFINE_PER_CPU_ALIGNED(struct tlb_state, cpu_tlbstate) = {
1042 .loaded_mm = &init_mm,
1043 .next_asid = 1,
1044 .cr4 = ~0UL,
1045 };
1046
1047 void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache)
1048 {
1049
1050 BUG_ON(!entry && cache != _PAGE_CACHE_MODE_WB);
1051
1052 __cachemode2pte_tbl[cache] = __cm_idx2pte(entry);
1053 __pte2cachemode_tbl[entry] = cache;
1054 }
1055
1056 #ifdef CONFIG_SWAP
1057 unsigned long max_swapfile_size(void)
1058 {
1059 unsigned long pages;
1060
1061 pages = generic_max_swapfile_size();
1062
1063 if (boot_cpu_has_bug(X86_BUG_L1TF) && l1tf_mitigation != L1TF_MITIGATION_OFF) {
1064
1065 unsigned long long l1tf_limit = l1tf_pfn_limit();
1066
1067
1068
1069
1070 #if CONFIG_PGTABLE_LEVELS > 2
1071 l1tf_limit <<= PAGE_SHIFT - SWP_OFFSET_FIRST_BIT;
1072 #endif
1073 pages = min_t(unsigned long long, l1tf_limit, pages);
1074 }
1075 return pages;
1076 }
1077 #endif