0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020 #define BOOT_CTYPE_H
0021
0022 #include "misc.h"
0023 #include "error.h"
0024 #include "../string.h"
0025 #include "efi.h"
0026
0027 #include <generated/compile.h>
0028 #include <linux/module.h>
0029 #include <linux/uts.h>
0030 #include <linux/utsname.h>
0031 #include <linux/ctype.h>
0032 #include <generated/utsrelease.h>
0033
0034 #define _SETUP
0035 #include <asm/setup.h> /* For COMMAND_LINE_SIZE */
0036 #undef _SETUP
0037
0038 extern unsigned long get_cmd_line_ptr(void);
0039
0040
0041 static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
0042 LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
0043
0044 static unsigned long rotate_xor(unsigned long hash, const void *area,
0045 size_t size)
0046 {
0047 size_t i;
0048 unsigned long *ptr = (unsigned long *)area;
0049
0050 for (i = 0; i < size / sizeof(hash); i++) {
0051
0052 hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7);
0053 hash ^= ptr[i];
0054 }
0055
0056 return hash;
0057 }
0058
0059
0060 static unsigned long get_boot_seed(void)
0061 {
0062 unsigned long hash = 0;
0063
0064 hash = rotate_xor(hash, build_str, sizeof(build_str));
0065 hash = rotate_xor(hash, boot_params, sizeof(*boot_params));
0066
0067 return hash;
0068 }
0069
0070 #define KASLR_COMPRESSED_BOOT
0071 #include "../../lib/kaslr.c"
0072
0073
0074
0075 #define MAX_MEMMAP_REGIONS 4
0076
0077 static bool memmap_too_large;
0078
0079
0080
0081
0082
0083
0084 static u64 mem_limit;
0085
0086
0087 static int num_immovable_mem;
0088
0089 enum mem_avoid_index {
0090 MEM_AVOID_ZO_RANGE = 0,
0091 MEM_AVOID_INITRD,
0092 MEM_AVOID_CMDLINE,
0093 MEM_AVOID_BOOTPARAMS,
0094 MEM_AVOID_MEMMAP_BEGIN,
0095 MEM_AVOID_MEMMAP_END = MEM_AVOID_MEMMAP_BEGIN + MAX_MEMMAP_REGIONS - 1,
0096 MEM_AVOID_MAX,
0097 };
0098
0099 static struct mem_vector mem_avoid[MEM_AVOID_MAX];
0100
0101 static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two)
0102 {
0103
0104 if (one->start + one->size <= two->start)
0105 return false;
0106
0107 if (one->start >= two->start + two->size)
0108 return false;
0109 return true;
0110 }
0111
0112 char *skip_spaces(const char *str)
0113 {
0114 while (isspace(*str))
0115 ++str;
0116 return (char *)str;
0117 }
0118 #include "../../../../lib/ctype.c"
0119 #include "../../../../lib/cmdline.c"
0120
0121 enum parse_mode {
0122 PARSE_MEMMAP,
0123 PARSE_EFI,
0124 };
0125
0126 static int
0127 parse_memmap(char *p, u64 *start, u64 *size, enum parse_mode mode)
0128 {
0129 char *oldp;
0130
0131 if (!p)
0132 return -EINVAL;
0133
0134
0135 if (!strncmp(p, "exactmap", 8))
0136 return -EINVAL;
0137
0138 oldp = p;
0139 *size = memparse(p, &p);
0140 if (p == oldp)
0141 return -EINVAL;
0142
0143 switch (*p) {
0144 case '#':
0145 case '$':
0146 case '!':
0147 *start = memparse(p + 1, &p);
0148 return 0;
0149 case '@':
0150 if (mode == PARSE_MEMMAP) {
0151
0152
0153
0154
0155 *size = 0;
0156 } else {
0157 u64 flags;
0158
0159
0160
0161
0162
0163 *start = memparse(p + 1, &p);
0164 if (p && *p == ':') {
0165 p++;
0166 if (kstrtoull(p, 0, &flags) < 0)
0167 *size = 0;
0168 else if (flags & EFI_MEMORY_SP)
0169 return 0;
0170 }
0171 *size = 0;
0172 }
0173 fallthrough;
0174 default:
0175
0176
0177
0178
0179
0180 *start = 0;
0181 return 0;
0182 }
0183
0184 return -EINVAL;
0185 }
0186
0187 static void mem_avoid_memmap(enum parse_mode mode, char *str)
0188 {
0189 static int i;
0190
0191 if (i >= MAX_MEMMAP_REGIONS)
0192 return;
0193
0194 while (str && (i < MAX_MEMMAP_REGIONS)) {
0195 int rc;
0196 u64 start, size;
0197 char *k = strchr(str, ',');
0198
0199 if (k)
0200 *k++ = 0;
0201
0202 rc = parse_memmap(str, &start, &size, mode);
0203 if (rc < 0)
0204 break;
0205 str = k;
0206
0207 if (start == 0) {
0208
0209 if (size > 0 && size < mem_limit)
0210 mem_limit = size;
0211
0212 continue;
0213 }
0214
0215 mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].start = start;
0216 mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].size = size;
0217 i++;
0218 }
0219
0220
0221 if ((i >= MAX_MEMMAP_REGIONS) && str)
0222 memmap_too_large = true;
0223 }
0224
0225
0226 static unsigned long max_gb_huge_pages;
0227
0228 static void parse_gb_huge_pages(char *param, char *val)
0229 {
0230 static bool gbpage_sz;
0231 char *p;
0232
0233 if (!strcmp(param, "hugepagesz")) {
0234 p = val;
0235 if (memparse(p, &p) != PUD_SIZE) {
0236 gbpage_sz = false;
0237 return;
0238 }
0239
0240 if (gbpage_sz)
0241 warn("Repeatedly set hugeTLB page size of 1G!\n");
0242 gbpage_sz = true;
0243 return;
0244 }
0245
0246 if (!strcmp(param, "hugepages") && gbpage_sz) {
0247 p = val;
0248 max_gb_huge_pages = simple_strtoull(p, &p, 0);
0249 return;
0250 }
0251 }
0252
0253 static void handle_mem_options(void)
0254 {
0255 char *args = (char *)get_cmd_line_ptr();
0256 size_t len;
0257 char *tmp_cmdline;
0258 char *param, *val;
0259 u64 mem_size;
0260
0261 if (!args)
0262 return;
0263
0264 len = strnlen(args, COMMAND_LINE_SIZE-1);
0265 tmp_cmdline = malloc(len + 1);
0266 if (!tmp_cmdline)
0267 error("Failed to allocate space for tmp_cmdline");
0268
0269 memcpy(tmp_cmdline, args, len);
0270 tmp_cmdline[len] = 0;
0271 args = tmp_cmdline;
0272
0273
0274 args = skip_spaces(args);
0275
0276 while (*args) {
0277 args = next_arg(args, ¶m, &val);
0278
0279 if (!val && strcmp(param, "--") == 0)
0280 break;
0281
0282 if (!strcmp(param, "memmap")) {
0283 mem_avoid_memmap(PARSE_MEMMAP, val);
0284 } else if (IS_ENABLED(CONFIG_X86_64) && strstr(param, "hugepages")) {
0285 parse_gb_huge_pages(param, val);
0286 } else if (!strcmp(param, "mem")) {
0287 char *p = val;
0288
0289 if (!strcmp(p, "nopentium"))
0290 continue;
0291 mem_size = memparse(p, &p);
0292 if (mem_size == 0)
0293 break;
0294
0295 if (mem_size < mem_limit)
0296 mem_limit = mem_size;
0297 } else if (!strcmp(param, "efi_fake_mem")) {
0298 mem_avoid_memmap(PARSE_EFI, val);
0299 }
0300 }
0301
0302 free(tmp_cmdline);
0303 return;
0304 }
0305
0306
0307
0308
0309
0310
0311
0312
0313
0314
0315
0316
0317
0318
0319
0320
0321
0322
0323
0324
0325
0326
0327
0328
0329
0330
0331
0332
0333
0334
0335
0336
0337
0338
0339
0340
0341
0342
0343
0344
0345
0346
0347
0348
0349
0350
0351
0352
0353
0354
0355
0356
0357
0358
0359
0360
0361
0362
0363
0364
0365
0366
0367
0368
0369
0370
0371
0372
0373
0374
0375
0376
0377
0378
0379
0380
0381
0382 static void mem_avoid_init(unsigned long input, unsigned long input_size,
0383 unsigned long output)
0384 {
0385 unsigned long init_size = boot_params->hdr.init_size;
0386 u64 initrd_start, initrd_size;
0387 unsigned long cmd_line, cmd_line_size;
0388
0389
0390
0391
0392
0393 mem_avoid[MEM_AVOID_ZO_RANGE].start = input;
0394 mem_avoid[MEM_AVOID_ZO_RANGE].size = (output + init_size) - input;
0395
0396
0397 initrd_start = (u64)boot_params->ext_ramdisk_image << 32;
0398 initrd_start |= boot_params->hdr.ramdisk_image;
0399 initrd_size = (u64)boot_params->ext_ramdisk_size << 32;
0400 initrd_size |= boot_params->hdr.ramdisk_size;
0401 mem_avoid[MEM_AVOID_INITRD].start = initrd_start;
0402 mem_avoid[MEM_AVOID_INITRD].size = initrd_size;
0403
0404
0405
0406 cmd_line = get_cmd_line_ptr();
0407
0408 if (cmd_line) {
0409 cmd_line_size = strnlen((char *)cmd_line, COMMAND_LINE_SIZE-1) + 1;
0410 mem_avoid[MEM_AVOID_CMDLINE].start = cmd_line;
0411 mem_avoid[MEM_AVOID_CMDLINE].size = cmd_line_size;
0412 }
0413
0414
0415 mem_avoid[MEM_AVOID_BOOTPARAMS].start = (unsigned long)boot_params;
0416 mem_avoid[MEM_AVOID_BOOTPARAMS].size = sizeof(*boot_params);
0417
0418
0419
0420
0421 handle_mem_options();
0422
0423
0424 num_immovable_mem = count_immovable_mem_regions();
0425 }
0426
0427
0428
0429
0430
0431 static bool mem_avoid_overlap(struct mem_vector *img,
0432 struct mem_vector *overlap)
0433 {
0434 int i;
0435 struct setup_data *ptr;
0436 u64 earliest = img->start + img->size;
0437 bool is_overlapping = false;
0438
0439 for (i = 0; i < MEM_AVOID_MAX; i++) {
0440 if (mem_overlaps(img, &mem_avoid[i]) &&
0441 mem_avoid[i].start < earliest) {
0442 *overlap = mem_avoid[i];
0443 earliest = overlap->start;
0444 is_overlapping = true;
0445 }
0446 }
0447
0448
0449 ptr = (struct setup_data *)(unsigned long)boot_params->hdr.setup_data;
0450 while (ptr) {
0451 struct mem_vector avoid;
0452
0453 avoid.start = (unsigned long)ptr;
0454 avoid.size = sizeof(*ptr) + ptr->len;
0455
0456 if (mem_overlaps(img, &avoid) && (avoid.start < earliest)) {
0457 *overlap = avoid;
0458 earliest = overlap->start;
0459 is_overlapping = true;
0460 }
0461
0462 if (ptr->type == SETUP_INDIRECT &&
0463 ((struct setup_indirect *)ptr->data)->type != SETUP_INDIRECT) {
0464 avoid.start = ((struct setup_indirect *)ptr->data)->addr;
0465 avoid.size = ((struct setup_indirect *)ptr->data)->len;
0466
0467 if (mem_overlaps(img, &avoid) && (avoid.start < earliest)) {
0468 *overlap = avoid;
0469 earliest = overlap->start;
0470 is_overlapping = true;
0471 }
0472 }
0473
0474 ptr = (struct setup_data *)(unsigned long)ptr->next;
0475 }
0476
0477 return is_overlapping;
0478 }
0479
0480 struct slot_area {
0481 u64 addr;
0482 unsigned long num;
0483 };
0484
0485 #define MAX_SLOT_AREA 100
0486
0487 static struct slot_area slot_areas[MAX_SLOT_AREA];
0488 static unsigned int slot_area_index;
0489 static unsigned long slot_max;
0490
0491 static void store_slot_info(struct mem_vector *region, unsigned long image_size)
0492 {
0493 struct slot_area slot_area;
0494
0495 if (slot_area_index == MAX_SLOT_AREA)
0496 return;
0497
0498 slot_area.addr = region->start;
0499 slot_area.num = 1 + (region->size - image_size) / CONFIG_PHYSICAL_ALIGN;
0500
0501 slot_areas[slot_area_index++] = slot_area;
0502 slot_max += slot_area.num;
0503 }
0504
0505
0506
0507
0508
0509 static void
0510 process_gb_huge_pages(struct mem_vector *region, unsigned long image_size)
0511 {
0512 u64 pud_start, pud_end;
0513 unsigned long gb_huge_pages;
0514 struct mem_vector tmp;
0515
0516 if (!IS_ENABLED(CONFIG_X86_64) || !max_gb_huge_pages) {
0517 store_slot_info(region, image_size);
0518 return;
0519 }
0520
0521
0522 pud_start = ALIGN(region->start, PUD_SIZE);
0523 pud_end = ALIGN_DOWN(region->start + region->size, PUD_SIZE);
0524
0525
0526 if (pud_start >= pud_end) {
0527 store_slot_info(region, image_size);
0528 return;
0529 }
0530
0531
0532 if (pud_start >= region->start + image_size) {
0533 tmp.start = region->start;
0534 tmp.size = pud_start - region->start;
0535 store_slot_info(&tmp, image_size);
0536 }
0537
0538
0539 gb_huge_pages = (pud_end - pud_start) >> PUD_SHIFT;
0540 if (gb_huge_pages > max_gb_huge_pages) {
0541 pud_end = pud_start + (max_gb_huge_pages << PUD_SHIFT);
0542 max_gb_huge_pages = 0;
0543 } else {
0544 max_gb_huge_pages -= gb_huge_pages;
0545 }
0546
0547
0548 if (region->start + region->size >= pud_end + image_size) {
0549 tmp.start = pud_end;
0550 tmp.size = region->start + region->size - pud_end;
0551 store_slot_info(&tmp, image_size);
0552 }
0553 }
0554
0555 static u64 slots_fetch_random(void)
0556 {
0557 unsigned long slot;
0558 unsigned int i;
0559
0560
0561 if (slot_max == 0)
0562 return 0;
0563
0564 slot = kaslr_get_random_long("Physical") % slot_max;
0565
0566 for (i = 0; i < slot_area_index; i++) {
0567 if (slot >= slot_areas[i].num) {
0568 slot -= slot_areas[i].num;
0569 continue;
0570 }
0571 return slot_areas[i].addr + ((u64)slot * CONFIG_PHYSICAL_ALIGN);
0572 }
0573
0574 if (i == slot_area_index)
0575 debug_putstr("slots_fetch_random() failed!?\n");
0576 return 0;
0577 }
0578
0579 static void __process_mem_region(struct mem_vector *entry,
0580 unsigned long minimum,
0581 unsigned long image_size)
0582 {
0583 struct mem_vector region, overlap;
0584 u64 region_end;
0585
0586
0587 region.start = max_t(u64, entry->start, minimum);
0588 region_end = min(entry->start + entry->size, mem_limit);
0589
0590
0591 while (slot_area_index < MAX_SLOT_AREA) {
0592
0593 region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN);
0594
0595
0596 if (region.start > region_end)
0597 return;
0598
0599
0600 region.size = region_end - region.start;
0601
0602
0603 if (region.size < image_size)
0604 return;
0605
0606
0607 if (!mem_avoid_overlap(®ion, &overlap)) {
0608 process_gb_huge_pages(®ion, image_size);
0609 return;
0610 }
0611
0612
0613 if (overlap.start >= region.start + image_size) {
0614 region.size = overlap.start - region.start;
0615 process_gb_huge_pages(®ion, image_size);
0616 }
0617
0618
0619 region.start = overlap.start + overlap.size;
0620 }
0621 }
0622
0623 static bool process_mem_region(struct mem_vector *region,
0624 unsigned long minimum,
0625 unsigned long image_size)
0626 {
0627 int i;
0628
0629
0630
0631
0632 if (!num_immovable_mem) {
0633 __process_mem_region(region, minimum, image_size);
0634
0635 if (slot_area_index == MAX_SLOT_AREA) {
0636 debug_putstr("Aborted e820/efi memmap scan (slot_areas full)!\n");
0637 return true;
0638 }
0639 return false;
0640 }
0641
0642 #if defined(CONFIG_MEMORY_HOTREMOVE) && defined(CONFIG_ACPI)
0643
0644
0645
0646
0647 for (i = 0; i < num_immovable_mem; i++) {
0648 u64 start, end, entry_end, region_end;
0649 struct mem_vector entry;
0650
0651 if (!mem_overlaps(region, &immovable_mem[i]))
0652 continue;
0653
0654 start = immovable_mem[i].start;
0655 end = start + immovable_mem[i].size;
0656 region_end = region->start + region->size;
0657
0658 entry.start = clamp(region->start, start, end);
0659 entry_end = clamp(region_end, start, end);
0660 entry.size = entry_end - entry.start;
0661
0662 __process_mem_region(&entry, minimum, image_size);
0663
0664 if (slot_area_index == MAX_SLOT_AREA) {
0665 debug_putstr("Aborted e820/efi memmap scan when walking immovable regions(slot_areas full)!\n");
0666 return true;
0667 }
0668 }
0669 #endif
0670 return 0;
0671 }
0672
0673 #ifdef CONFIG_EFI
0674
0675
0676
0677
0678 static bool
0679 process_efi_entries(unsigned long minimum, unsigned long image_size)
0680 {
0681 struct efi_info *e = &boot_params->efi_info;
0682 bool efi_mirror_found = false;
0683 struct mem_vector region;
0684 efi_memory_desc_t *md;
0685 unsigned long pmap;
0686 char *signature;
0687 u32 nr_desc;
0688 int i;
0689
0690 signature = (char *)&e->efi_loader_signature;
0691 if (strncmp(signature, EFI32_LOADER_SIGNATURE, 4) &&
0692 strncmp(signature, EFI64_LOADER_SIGNATURE, 4))
0693 return false;
0694
0695 #ifdef CONFIG_X86_32
0696
0697 if (e->efi_memmap_hi) {
0698 warn("EFI memmap is above 4GB, can't be handled now on x86_32. EFI should be disabled.\n");
0699 return false;
0700 }
0701 pmap = e->efi_memmap;
0702 #else
0703 pmap = (e->efi_memmap | ((__u64)e->efi_memmap_hi << 32));
0704 #endif
0705
0706 nr_desc = e->efi_memmap_size / e->efi_memdesc_size;
0707 for (i = 0; i < nr_desc; i++) {
0708 md = efi_early_memdesc_ptr(pmap, e->efi_memdesc_size, i);
0709 if (md->attribute & EFI_MEMORY_MORE_RELIABLE) {
0710 efi_mirror_found = true;
0711 break;
0712 }
0713 }
0714
0715 for (i = 0; i < nr_desc; i++) {
0716 md = efi_early_memdesc_ptr(pmap, e->efi_memdesc_size, i);
0717
0718
0719
0720
0721
0722
0723
0724
0725
0726
0727
0728
0729 if (md->type != EFI_CONVENTIONAL_MEMORY)
0730 continue;
0731
0732 if (efi_soft_reserve_enabled() &&
0733 (md->attribute & EFI_MEMORY_SP))
0734 continue;
0735
0736 if (efi_mirror_found &&
0737 !(md->attribute & EFI_MEMORY_MORE_RELIABLE))
0738 continue;
0739
0740 region.start = md->phys_addr;
0741 region.size = md->num_pages << EFI_PAGE_SHIFT;
0742 if (process_mem_region(®ion, minimum, image_size))
0743 break;
0744 }
0745 return true;
0746 }
0747 #else
0748 static inline bool
0749 process_efi_entries(unsigned long minimum, unsigned long image_size)
0750 {
0751 return false;
0752 }
0753 #endif
0754
0755 static void process_e820_entries(unsigned long minimum,
0756 unsigned long image_size)
0757 {
0758 int i;
0759 struct mem_vector region;
0760 struct boot_e820_entry *entry;
0761
0762
0763 for (i = 0; i < boot_params->e820_entries; i++) {
0764 entry = &boot_params->e820_table[i];
0765
0766 if (entry->type != E820_TYPE_RAM)
0767 continue;
0768 region.start = entry->addr;
0769 region.size = entry->size;
0770 if (process_mem_region(®ion, minimum, image_size))
0771 break;
0772 }
0773 }
0774
0775 static unsigned long find_random_phys_addr(unsigned long minimum,
0776 unsigned long image_size)
0777 {
0778 u64 phys_addr;
0779
0780
0781 if (minimum + image_size > mem_limit)
0782 return 0;
0783
0784
0785 if (memmap_too_large) {
0786 debug_putstr("Aborted memory entries scan (more than 4 memmap= args)!\n");
0787 return 0;
0788 }
0789
0790 if (!process_efi_entries(minimum, image_size))
0791 process_e820_entries(minimum, image_size);
0792
0793 phys_addr = slots_fetch_random();
0794
0795
0796 if (phys_addr < minimum || phys_addr + image_size > mem_limit) {
0797 warn("Invalid physical address chosen!\n");
0798 return 0;
0799 }
0800
0801 return (unsigned long)phys_addr;
0802 }
0803
0804 static unsigned long find_random_virt_addr(unsigned long minimum,
0805 unsigned long image_size)
0806 {
0807 unsigned long slots, random_addr;
0808
0809
0810
0811
0812
0813
0814 slots = 1 + (KERNEL_IMAGE_SIZE - minimum - image_size) / CONFIG_PHYSICAL_ALIGN;
0815
0816 random_addr = kaslr_get_random_long("Virtual") % slots;
0817
0818 return random_addr * CONFIG_PHYSICAL_ALIGN + minimum;
0819 }
0820
0821
0822
0823
0824
0825 void choose_random_location(unsigned long input,
0826 unsigned long input_size,
0827 unsigned long *output,
0828 unsigned long output_size,
0829 unsigned long *virt_addr)
0830 {
0831 unsigned long random_addr, min_addr;
0832
0833 if (cmdline_find_option_bool("nokaslr")) {
0834 warn("KASLR disabled: 'nokaslr' on cmdline.");
0835 return;
0836 }
0837
0838 boot_params->hdr.loadflags |= KASLR_FLAG;
0839
0840 if (IS_ENABLED(CONFIG_X86_32))
0841 mem_limit = KERNEL_IMAGE_SIZE;
0842 else
0843 mem_limit = MAXMEM;
0844
0845
0846 mem_avoid_init(input, input_size, *output);
0847
0848
0849
0850
0851
0852
0853 min_addr = min(*output, 512UL << 20);
0854
0855 min_addr = ALIGN(min_addr, CONFIG_PHYSICAL_ALIGN);
0856
0857
0858 random_addr = find_random_phys_addr(min_addr, output_size);
0859 if (!random_addr) {
0860 warn("Physical KASLR disabled: no suitable memory region!");
0861 } else {
0862
0863 if (*output != random_addr)
0864 *output = random_addr;
0865 }
0866
0867
0868
0869 if (IS_ENABLED(CONFIG_X86_64))
0870 random_addr = find_random_virt_addr(LOAD_PHYSICAL_ADDR, output_size);
0871 *virt_addr = random_addr;
0872 }