Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * kaslr.c
0004  *
0005  * This contains the routines needed to generate a reasonable level of
0006  * entropy to choose a randomized kernel base address offset in support
0007  * of Kernel Address Space Layout Randomization (KASLR). Additionally
0008  * handles walking the physical memory maps (and tracking memory regions
0009  * to avoid) in order to select a physical memory location that can
0010  * contain the entire properly aligned running kernel image.
0011  *
0012  */
0013 
0014 /*
0015  * isspace() in linux/ctype.h is expected by next_args() to filter
0016  * out "space/lf/tab". While boot/ctype.h conflicts with linux/ctype.h,
0017  * since isdigit() is implemented in both of them. Hence disable it
0018  * here.
0019  */
0020 #define BOOT_CTYPE_H
0021 
0022 #include "misc.h"
0023 #include "error.h"
0024 #include "../string.h"
0025 #include "efi.h"
0026 
0027 #include <generated/compile.h>
0028 #include <linux/module.h>
0029 #include <linux/uts.h>
0030 #include <linux/utsname.h>
0031 #include <linux/ctype.h>
0032 #include <generated/utsrelease.h>
0033 
0034 #define _SETUP
0035 #include <asm/setup.h>  /* For COMMAND_LINE_SIZE */
0036 #undef _SETUP
0037 
0038 extern unsigned long get_cmd_line_ptr(void);
0039 
0040 /* Simplified build-specific string for starting entropy. */
0041 static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
0042         LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
0043 
0044 static unsigned long rotate_xor(unsigned long hash, const void *area,
0045                 size_t size)
0046 {
0047     size_t i;
0048     unsigned long *ptr = (unsigned long *)area;
0049 
0050     for (i = 0; i < size / sizeof(hash); i++) {
0051         /* Rotate by odd number of bits and XOR. */
0052         hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7);
0053         hash ^= ptr[i];
0054     }
0055 
0056     return hash;
0057 }
0058 
0059 /* Attempt to create a simple but unpredictable starting entropy. */
0060 static unsigned long get_boot_seed(void)
0061 {
0062     unsigned long hash = 0;
0063 
0064     hash = rotate_xor(hash, build_str, sizeof(build_str));
0065     hash = rotate_xor(hash, boot_params, sizeof(*boot_params));
0066 
0067     return hash;
0068 }
0069 
0070 #define KASLR_COMPRESSED_BOOT
0071 #include "../../lib/kaslr.c"
0072 
0073 
0074 /* Only supporting at most 4 unusable memmap regions with kaslr */
0075 #define MAX_MEMMAP_REGIONS  4
0076 
0077 static bool memmap_too_large;
0078 
0079 
0080 /*
0081  * Store memory limit: MAXMEM on 64-bit and KERNEL_IMAGE_SIZE on 32-bit.
0082  * It may be reduced by "mem=nn[KMG]" or "memmap=nn[KMG]" command line options.
0083  */
0084 static u64 mem_limit;
0085 
0086 /* Number of immovable memory regions */
0087 static int num_immovable_mem;
0088 
0089 enum mem_avoid_index {
0090     MEM_AVOID_ZO_RANGE = 0,
0091     MEM_AVOID_INITRD,
0092     MEM_AVOID_CMDLINE,
0093     MEM_AVOID_BOOTPARAMS,
0094     MEM_AVOID_MEMMAP_BEGIN,
0095     MEM_AVOID_MEMMAP_END = MEM_AVOID_MEMMAP_BEGIN + MAX_MEMMAP_REGIONS - 1,
0096     MEM_AVOID_MAX,
0097 };
0098 
0099 static struct mem_vector mem_avoid[MEM_AVOID_MAX];
0100 
0101 static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two)
0102 {
0103     /* Item one is entirely before item two. */
0104     if (one->start + one->size <= two->start)
0105         return false;
0106     /* Item one is entirely after item two. */
0107     if (one->start >= two->start + two->size)
0108         return false;
0109     return true;
0110 }
0111 
0112 char *skip_spaces(const char *str)
0113 {
0114     while (isspace(*str))
0115         ++str;
0116     return (char *)str;
0117 }
0118 #include "../../../../lib/ctype.c"
0119 #include "../../../../lib/cmdline.c"
0120 
0121 enum parse_mode {
0122     PARSE_MEMMAP,
0123     PARSE_EFI,
0124 };
0125 
0126 static int
0127 parse_memmap(char *p, u64 *start, u64 *size, enum parse_mode mode)
0128 {
0129     char *oldp;
0130 
0131     if (!p)
0132         return -EINVAL;
0133 
0134     /* We don't care about this option here */
0135     if (!strncmp(p, "exactmap", 8))
0136         return -EINVAL;
0137 
0138     oldp = p;
0139     *size = memparse(p, &p);
0140     if (p == oldp)
0141         return -EINVAL;
0142 
0143     switch (*p) {
0144     case '#':
0145     case '$':
0146     case '!':
0147         *start = memparse(p + 1, &p);
0148         return 0;
0149     case '@':
0150         if (mode == PARSE_MEMMAP) {
0151             /*
0152              * memmap=nn@ss specifies usable region, should
0153              * be skipped
0154              */
0155             *size = 0;
0156         } else {
0157             u64 flags;
0158 
0159             /*
0160              * efi_fake_mem=nn@ss:attr the attr specifies
0161              * flags that might imply a soft-reservation.
0162              */
0163             *start = memparse(p + 1, &p);
0164             if (p && *p == ':') {
0165                 p++;
0166                 if (kstrtoull(p, 0, &flags) < 0)
0167                     *size = 0;
0168                 else if (flags & EFI_MEMORY_SP)
0169                     return 0;
0170             }
0171             *size = 0;
0172         }
0173         fallthrough;
0174     default:
0175         /*
0176          * If w/o offset, only size specified, memmap=nn[KMG] has the
0177          * same behaviour as mem=nn[KMG]. It limits the max address
0178          * system can use. Region above the limit should be avoided.
0179          */
0180         *start = 0;
0181         return 0;
0182     }
0183 
0184     return -EINVAL;
0185 }
0186 
0187 static void mem_avoid_memmap(enum parse_mode mode, char *str)
0188 {
0189     static int i;
0190 
0191     if (i >= MAX_MEMMAP_REGIONS)
0192         return;
0193 
0194     while (str && (i < MAX_MEMMAP_REGIONS)) {
0195         int rc;
0196         u64 start, size;
0197         char *k = strchr(str, ',');
0198 
0199         if (k)
0200             *k++ = 0;
0201 
0202         rc = parse_memmap(str, &start, &size, mode);
0203         if (rc < 0)
0204             break;
0205         str = k;
0206 
0207         if (start == 0) {
0208             /* Store the specified memory limit if size > 0 */
0209             if (size > 0 && size < mem_limit)
0210                 mem_limit = size;
0211 
0212             continue;
0213         }
0214 
0215         mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].start = start;
0216         mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].size = size;
0217         i++;
0218     }
0219 
0220     /* More than 4 memmaps, fail kaslr */
0221     if ((i >= MAX_MEMMAP_REGIONS) && str)
0222         memmap_too_large = true;
0223 }
0224 
0225 /* Store the number of 1GB huge pages which users specified: */
0226 static unsigned long max_gb_huge_pages;
0227 
0228 static void parse_gb_huge_pages(char *param, char *val)
0229 {
0230     static bool gbpage_sz;
0231     char *p;
0232 
0233     if (!strcmp(param, "hugepagesz")) {
0234         p = val;
0235         if (memparse(p, &p) != PUD_SIZE) {
0236             gbpage_sz = false;
0237             return;
0238         }
0239 
0240         if (gbpage_sz)
0241             warn("Repeatedly set hugeTLB page size of 1G!\n");
0242         gbpage_sz = true;
0243         return;
0244     }
0245 
0246     if (!strcmp(param, "hugepages") && gbpage_sz) {
0247         p = val;
0248         max_gb_huge_pages = simple_strtoull(p, &p, 0);
0249         return;
0250     }
0251 }
0252 
0253 static void handle_mem_options(void)
0254 {
0255     char *args = (char *)get_cmd_line_ptr();
0256     size_t len;
0257     char *tmp_cmdline;
0258     char *param, *val;
0259     u64 mem_size;
0260 
0261     if (!args)
0262         return;
0263 
0264     len = strnlen(args, COMMAND_LINE_SIZE-1);
0265     tmp_cmdline = malloc(len + 1);
0266     if (!tmp_cmdline)
0267         error("Failed to allocate space for tmp_cmdline");
0268 
0269     memcpy(tmp_cmdline, args, len);
0270     tmp_cmdline[len] = 0;
0271     args = tmp_cmdline;
0272 
0273     /* Chew leading spaces */
0274     args = skip_spaces(args);
0275 
0276     while (*args) {
0277         args = next_arg(args, &param, &val);
0278         /* Stop at -- */
0279         if (!val && strcmp(param, "--") == 0)
0280             break;
0281 
0282         if (!strcmp(param, "memmap")) {
0283             mem_avoid_memmap(PARSE_MEMMAP, val);
0284         } else if (IS_ENABLED(CONFIG_X86_64) && strstr(param, "hugepages")) {
0285             parse_gb_huge_pages(param, val);
0286         } else if (!strcmp(param, "mem")) {
0287             char *p = val;
0288 
0289             if (!strcmp(p, "nopentium"))
0290                 continue;
0291             mem_size = memparse(p, &p);
0292             if (mem_size == 0)
0293                 break;
0294 
0295             if (mem_size < mem_limit)
0296                 mem_limit = mem_size;
0297         } else if (!strcmp(param, "efi_fake_mem")) {
0298             mem_avoid_memmap(PARSE_EFI, val);
0299         }
0300     }
0301 
0302     free(tmp_cmdline);
0303     return;
0304 }
0305 
0306 /*
0307  * In theory, KASLR can put the kernel anywhere in the range of [16M, MAXMEM)
0308  * on 64-bit, and [16M, KERNEL_IMAGE_SIZE) on 32-bit.
0309  *
0310  * The mem_avoid array is used to store the ranges that need to be avoided
0311  * when KASLR searches for an appropriate random address. We must avoid any
0312  * regions that are unsafe to overlap with during decompression, and other
0313  * things like the initrd, cmdline and boot_params. This comment seeks to
0314  * explain mem_avoid as clearly as possible since incorrect mem_avoid
0315  * memory ranges lead to really hard to debug boot failures.
0316  *
0317  * The initrd, cmdline, and boot_params are trivial to identify for
0318  * avoiding. They are MEM_AVOID_INITRD, MEM_AVOID_CMDLINE, and
0319  * MEM_AVOID_BOOTPARAMS respectively below.
0320  *
0321  * What is not obvious how to avoid is the range of memory that is used
0322  * during decompression (MEM_AVOID_ZO_RANGE below). This range must cover
0323  * the compressed kernel (ZO) and its run space, which is used to extract
0324  * the uncompressed kernel (VO) and relocs.
0325  *
0326  * ZO's full run size sits against the end of the decompression buffer, so
0327  * we can calculate where text, data, bss, etc of ZO are positioned more
0328  * easily.
0329  *
0330  * For additional background, the decompression calculations can be found
0331  * in header.S, and the memory diagram is based on the one found in misc.c.
0332  *
0333  * The following conditions are already enforced by the image layouts and
0334  * associated code:
0335  *  - input + input_size >= output + output_size
0336  *  - kernel_total_size <= init_size
0337  *  - kernel_total_size <= output_size (see Note below)
0338  *  - output + init_size >= output + output_size
0339  *
0340  * (Note that kernel_total_size and output_size have no fundamental
0341  * relationship, but output_size is passed to choose_random_location
0342  * as a maximum of the two. The diagram is showing a case where
0343  * kernel_total_size is larger than output_size, but this case is
0344  * handled by bumping output_size.)
0345  *
0346  * The above conditions can be illustrated by a diagram:
0347  *
0348  * 0   output            input            input+input_size    output+init_size
0349  * |     |                 |                             |             |
0350  * |     |                 |                             |             |
0351  * |-----|--------|--------|--------------|-----------|--|-------------|
0352  *                |                       |           |
0353  *                |                       |           |
0354  * output+init_size-ZO_INIT_SIZE  output+output_size  output+kernel_total_size
0355  *
0356  * [output, output+init_size) is the entire memory range used for
0357  * extracting the compressed image.
0358  *
0359  * [output, output+kernel_total_size) is the range needed for the
0360  * uncompressed kernel (VO) and its run size (bss, brk, etc).
0361  *
0362  * [output, output+output_size) is VO plus relocs (i.e. the entire
0363  * uncompressed payload contained by ZO). This is the area of the buffer
0364  * written to during decompression.
0365  *
0366  * [output+init_size-ZO_INIT_SIZE, output+init_size) is the worst-case
0367  * range of the copied ZO and decompression code. (i.e. the range
0368  * covered backwards of size ZO_INIT_SIZE, starting from output+init_size.)
0369  *
0370  * [input, input+input_size) is the original copied compressed image (ZO)
0371  * (i.e. it does not include its run size). This range must be avoided
0372  * because it contains the data used for decompression.
0373  *
0374  * [input+input_size, output+init_size) is [_text, _end) for ZO. This
0375  * range includes ZO's heap and stack, and must be avoided since it
0376  * performs the decompression.
0377  *
0378  * Since the above two ranges need to be avoided and they are adjacent,
0379  * they can be merged, resulting in: [input, output+init_size) which
0380  * becomes the MEM_AVOID_ZO_RANGE below.
0381  */
0382 static void mem_avoid_init(unsigned long input, unsigned long input_size,
0383                unsigned long output)
0384 {
0385     unsigned long init_size = boot_params->hdr.init_size;
0386     u64 initrd_start, initrd_size;
0387     unsigned long cmd_line, cmd_line_size;
0388 
0389     /*
0390      * Avoid the region that is unsafe to overlap during
0391      * decompression.
0392      */
0393     mem_avoid[MEM_AVOID_ZO_RANGE].start = input;
0394     mem_avoid[MEM_AVOID_ZO_RANGE].size = (output + init_size) - input;
0395 
0396     /* Avoid initrd. */
0397     initrd_start  = (u64)boot_params->ext_ramdisk_image << 32;
0398     initrd_start |= boot_params->hdr.ramdisk_image;
0399     initrd_size  = (u64)boot_params->ext_ramdisk_size << 32;
0400     initrd_size |= boot_params->hdr.ramdisk_size;
0401     mem_avoid[MEM_AVOID_INITRD].start = initrd_start;
0402     mem_avoid[MEM_AVOID_INITRD].size = initrd_size;
0403     /* No need to set mapping for initrd, it will be handled in VO. */
0404 
0405     /* Avoid kernel command line. */
0406     cmd_line = get_cmd_line_ptr();
0407     /* Calculate size of cmd_line. */
0408     if (cmd_line) {
0409         cmd_line_size = strnlen((char *)cmd_line, COMMAND_LINE_SIZE-1) + 1;
0410         mem_avoid[MEM_AVOID_CMDLINE].start = cmd_line;
0411         mem_avoid[MEM_AVOID_CMDLINE].size = cmd_line_size;
0412     }
0413 
0414     /* Avoid boot parameters. */
0415     mem_avoid[MEM_AVOID_BOOTPARAMS].start = (unsigned long)boot_params;
0416     mem_avoid[MEM_AVOID_BOOTPARAMS].size = sizeof(*boot_params);
0417 
0418     /* We don't need to set a mapping for setup_data. */
0419 
0420     /* Mark the memmap regions we need to avoid */
0421     handle_mem_options();
0422 
0423     /* Enumerate the immovable memory regions */
0424     num_immovable_mem = count_immovable_mem_regions();
0425 }
0426 
0427 /*
0428  * Does this memory vector overlap a known avoided area? If so, record the
0429  * overlap region with the lowest address.
0430  */
0431 static bool mem_avoid_overlap(struct mem_vector *img,
0432                   struct mem_vector *overlap)
0433 {
0434     int i;
0435     struct setup_data *ptr;
0436     u64 earliest = img->start + img->size;
0437     bool is_overlapping = false;
0438 
0439     for (i = 0; i < MEM_AVOID_MAX; i++) {
0440         if (mem_overlaps(img, &mem_avoid[i]) &&
0441             mem_avoid[i].start < earliest) {
0442             *overlap = mem_avoid[i];
0443             earliest = overlap->start;
0444             is_overlapping = true;
0445         }
0446     }
0447 
0448     /* Avoid all entries in the setup_data linked list. */
0449     ptr = (struct setup_data *)(unsigned long)boot_params->hdr.setup_data;
0450     while (ptr) {
0451         struct mem_vector avoid;
0452 
0453         avoid.start = (unsigned long)ptr;
0454         avoid.size = sizeof(*ptr) + ptr->len;
0455 
0456         if (mem_overlaps(img, &avoid) && (avoid.start < earliest)) {
0457             *overlap = avoid;
0458             earliest = overlap->start;
0459             is_overlapping = true;
0460         }
0461 
0462         if (ptr->type == SETUP_INDIRECT &&
0463             ((struct setup_indirect *)ptr->data)->type != SETUP_INDIRECT) {
0464             avoid.start = ((struct setup_indirect *)ptr->data)->addr;
0465             avoid.size = ((struct setup_indirect *)ptr->data)->len;
0466 
0467             if (mem_overlaps(img, &avoid) && (avoid.start < earliest)) {
0468                 *overlap = avoid;
0469                 earliest = overlap->start;
0470                 is_overlapping = true;
0471             }
0472         }
0473 
0474         ptr = (struct setup_data *)(unsigned long)ptr->next;
0475     }
0476 
0477     return is_overlapping;
0478 }
0479 
0480 struct slot_area {
0481     u64 addr;
0482     unsigned long num;
0483 };
0484 
0485 #define MAX_SLOT_AREA 100
0486 
0487 static struct slot_area slot_areas[MAX_SLOT_AREA];
0488 static unsigned int slot_area_index;
0489 static unsigned long slot_max;
0490 
0491 static void store_slot_info(struct mem_vector *region, unsigned long image_size)
0492 {
0493     struct slot_area slot_area;
0494 
0495     if (slot_area_index == MAX_SLOT_AREA)
0496         return;
0497 
0498     slot_area.addr = region->start;
0499     slot_area.num = 1 + (region->size - image_size) / CONFIG_PHYSICAL_ALIGN;
0500 
0501     slot_areas[slot_area_index++] = slot_area;
0502     slot_max += slot_area.num;
0503 }
0504 
0505 /*
0506  * Skip as many 1GB huge pages as possible in the passed region
0507  * according to the number which users specified:
0508  */
0509 static void
0510 process_gb_huge_pages(struct mem_vector *region, unsigned long image_size)
0511 {
0512     u64 pud_start, pud_end;
0513     unsigned long gb_huge_pages;
0514     struct mem_vector tmp;
0515 
0516     if (!IS_ENABLED(CONFIG_X86_64) || !max_gb_huge_pages) {
0517         store_slot_info(region, image_size);
0518         return;
0519     }
0520 
0521     /* Are there any 1GB pages in the region? */
0522     pud_start = ALIGN(region->start, PUD_SIZE);
0523     pud_end = ALIGN_DOWN(region->start + region->size, PUD_SIZE);
0524 
0525     /* No good 1GB huge pages found: */
0526     if (pud_start >= pud_end) {
0527         store_slot_info(region, image_size);
0528         return;
0529     }
0530 
0531     /* Check if the head part of the region is usable. */
0532     if (pud_start >= region->start + image_size) {
0533         tmp.start = region->start;
0534         tmp.size = pud_start - region->start;
0535         store_slot_info(&tmp, image_size);
0536     }
0537 
0538     /* Skip the good 1GB pages. */
0539     gb_huge_pages = (pud_end - pud_start) >> PUD_SHIFT;
0540     if (gb_huge_pages > max_gb_huge_pages) {
0541         pud_end = pud_start + (max_gb_huge_pages << PUD_SHIFT);
0542         max_gb_huge_pages = 0;
0543     } else {
0544         max_gb_huge_pages -= gb_huge_pages;
0545     }
0546 
0547     /* Check if the tail part of the region is usable. */
0548     if (region->start + region->size >= pud_end + image_size) {
0549         tmp.start = pud_end;
0550         tmp.size = region->start + region->size - pud_end;
0551         store_slot_info(&tmp, image_size);
0552     }
0553 }
0554 
0555 static u64 slots_fetch_random(void)
0556 {
0557     unsigned long slot;
0558     unsigned int i;
0559 
0560     /* Handle case of no slots stored. */
0561     if (slot_max == 0)
0562         return 0;
0563 
0564     slot = kaslr_get_random_long("Physical") % slot_max;
0565 
0566     for (i = 0; i < slot_area_index; i++) {
0567         if (slot >= slot_areas[i].num) {
0568             slot -= slot_areas[i].num;
0569             continue;
0570         }
0571         return slot_areas[i].addr + ((u64)slot * CONFIG_PHYSICAL_ALIGN);
0572     }
0573 
0574     if (i == slot_area_index)
0575         debug_putstr("slots_fetch_random() failed!?\n");
0576     return 0;
0577 }
0578 
0579 static void __process_mem_region(struct mem_vector *entry,
0580                  unsigned long minimum,
0581                  unsigned long image_size)
0582 {
0583     struct mem_vector region, overlap;
0584     u64 region_end;
0585 
0586     /* Enforce minimum and memory limit. */
0587     region.start = max_t(u64, entry->start, minimum);
0588     region_end = min(entry->start + entry->size, mem_limit);
0589 
0590     /* Give up if slot area array is full. */
0591     while (slot_area_index < MAX_SLOT_AREA) {
0592         /* Potentially raise address to meet alignment needs. */
0593         region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN);
0594 
0595         /* Did we raise the address above the passed in memory entry? */
0596         if (region.start > region_end)
0597             return;
0598 
0599         /* Reduce size by any delta from the original address. */
0600         region.size = region_end - region.start;
0601 
0602         /* Return if region can't contain decompressed kernel */
0603         if (region.size < image_size)
0604             return;
0605 
0606         /* If nothing overlaps, store the region and return. */
0607         if (!mem_avoid_overlap(&region, &overlap)) {
0608             process_gb_huge_pages(&region, image_size);
0609             return;
0610         }
0611 
0612         /* Store beginning of region if holds at least image_size. */
0613         if (overlap.start >= region.start + image_size) {
0614             region.size = overlap.start - region.start;
0615             process_gb_huge_pages(&region, image_size);
0616         }
0617 
0618         /* Clip off the overlapping region and start over. */
0619         region.start = overlap.start + overlap.size;
0620     }
0621 }
0622 
0623 static bool process_mem_region(struct mem_vector *region,
0624                    unsigned long minimum,
0625                    unsigned long image_size)
0626 {
0627     int i;
0628     /*
0629      * If no immovable memory found, or MEMORY_HOTREMOVE disabled,
0630      * use @region directly.
0631      */
0632     if (!num_immovable_mem) {
0633         __process_mem_region(region, minimum, image_size);
0634 
0635         if (slot_area_index == MAX_SLOT_AREA) {
0636             debug_putstr("Aborted e820/efi memmap scan (slot_areas full)!\n");
0637             return true;
0638         }
0639         return false;
0640     }
0641 
0642 #if defined(CONFIG_MEMORY_HOTREMOVE) && defined(CONFIG_ACPI)
0643     /*
0644      * If immovable memory found, filter the intersection between
0645      * immovable memory and @region.
0646      */
0647     for (i = 0; i < num_immovable_mem; i++) {
0648         u64 start, end, entry_end, region_end;
0649         struct mem_vector entry;
0650 
0651         if (!mem_overlaps(region, &immovable_mem[i]))
0652             continue;
0653 
0654         start = immovable_mem[i].start;
0655         end = start + immovable_mem[i].size;
0656         region_end = region->start + region->size;
0657 
0658         entry.start = clamp(region->start, start, end);
0659         entry_end = clamp(region_end, start, end);
0660         entry.size = entry_end - entry.start;
0661 
0662         __process_mem_region(&entry, minimum, image_size);
0663 
0664         if (slot_area_index == MAX_SLOT_AREA) {
0665             debug_putstr("Aborted e820/efi memmap scan when walking immovable regions(slot_areas full)!\n");
0666             return true;
0667         }
0668     }
0669 #endif
0670     return 0;
0671 }
0672 
0673 #ifdef CONFIG_EFI
0674 /*
0675  * Returns true if we processed the EFI memmap, which we prefer over the E820
0676  * table if it is available.
0677  */
0678 static bool
0679 process_efi_entries(unsigned long minimum, unsigned long image_size)
0680 {
0681     struct efi_info *e = &boot_params->efi_info;
0682     bool efi_mirror_found = false;
0683     struct mem_vector region;
0684     efi_memory_desc_t *md;
0685     unsigned long pmap;
0686     char *signature;
0687     u32 nr_desc;
0688     int i;
0689 
0690     signature = (char *)&e->efi_loader_signature;
0691     if (strncmp(signature, EFI32_LOADER_SIGNATURE, 4) &&
0692         strncmp(signature, EFI64_LOADER_SIGNATURE, 4))
0693         return false;
0694 
0695 #ifdef CONFIG_X86_32
0696     /* Can't handle data above 4GB at this time */
0697     if (e->efi_memmap_hi) {
0698         warn("EFI memmap is above 4GB, can't be handled now on x86_32. EFI should be disabled.\n");
0699         return false;
0700     }
0701     pmap =  e->efi_memmap;
0702 #else
0703     pmap = (e->efi_memmap | ((__u64)e->efi_memmap_hi << 32));
0704 #endif
0705 
0706     nr_desc = e->efi_memmap_size / e->efi_memdesc_size;
0707     for (i = 0; i < nr_desc; i++) {
0708         md = efi_early_memdesc_ptr(pmap, e->efi_memdesc_size, i);
0709         if (md->attribute & EFI_MEMORY_MORE_RELIABLE) {
0710             efi_mirror_found = true;
0711             break;
0712         }
0713     }
0714 
0715     for (i = 0; i < nr_desc; i++) {
0716         md = efi_early_memdesc_ptr(pmap, e->efi_memdesc_size, i);
0717 
0718         /*
0719          * Here we are more conservative in picking free memory than
0720          * the EFI spec allows:
0721          *
0722          * According to the spec, EFI_BOOT_SERVICES_{CODE|DATA} are also
0723          * free memory and thus available to place the kernel image into,
0724          * but in practice there's firmware where using that memory leads
0725          * to crashes.
0726          *
0727          * Only EFI_CONVENTIONAL_MEMORY is guaranteed to be free.
0728          */
0729         if (md->type != EFI_CONVENTIONAL_MEMORY)
0730             continue;
0731 
0732         if (efi_soft_reserve_enabled() &&
0733             (md->attribute & EFI_MEMORY_SP))
0734             continue;
0735 
0736         if (efi_mirror_found &&
0737             !(md->attribute & EFI_MEMORY_MORE_RELIABLE))
0738             continue;
0739 
0740         region.start = md->phys_addr;
0741         region.size = md->num_pages << EFI_PAGE_SHIFT;
0742         if (process_mem_region(&region, minimum, image_size))
0743             break;
0744     }
0745     return true;
0746 }
0747 #else
0748 static inline bool
0749 process_efi_entries(unsigned long minimum, unsigned long image_size)
0750 {
0751     return false;
0752 }
0753 #endif
0754 
0755 static void process_e820_entries(unsigned long minimum,
0756                  unsigned long image_size)
0757 {
0758     int i;
0759     struct mem_vector region;
0760     struct boot_e820_entry *entry;
0761 
0762     /* Verify potential e820 positions, appending to slots list. */
0763     for (i = 0; i < boot_params->e820_entries; i++) {
0764         entry = &boot_params->e820_table[i];
0765         /* Skip non-RAM entries. */
0766         if (entry->type != E820_TYPE_RAM)
0767             continue;
0768         region.start = entry->addr;
0769         region.size = entry->size;
0770         if (process_mem_region(&region, minimum, image_size))
0771             break;
0772     }
0773 }
0774 
0775 static unsigned long find_random_phys_addr(unsigned long minimum,
0776                        unsigned long image_size)
0777 {
0778     u64 phys_addr;
0779 
0780     /* Bail out early if it's impossible to succeed. */
0781     if (minimum + image_size > mem_limit)
0782         return 0;
0783 
0784     /* Check if we had too many memmaps. */
0785     if (memmap_too_large) {
0786         debug_putstr("Aborted memory entries scan (more than 4 memmap= args)!\n");
0787         return 0;
0788     }
0789 
0790     if (!process_efi_entries(minimum, image_size))
0791         process_e820_entries(minimum, image_size);
0792 
0793     phys_addr = slots_fetch_random();
0794 
0795     /* Perform a final check to make sure the address is in range. */
0796     if (phys_addr < minimum || phys_addr + image_size > mem_limit) {
0797         warn("Invalid physical address chosen!\n");
0798         return 0;
0799     }
0800 
0801     return (unsigned long)phys_addr;
0802 }
0803 
0804 static unsigned long find_random_virt_addr(unsigned long minimum,
0805                        unsigned long image_size)
0806 {
0807     unsigned long slots, random_addr;
0808 
0809     /*
0810      * There are how many CONFIG_PHYSICAL_ALIGN-sized slots
0811      * that can hold image_size within the range of minimum to
0812      * KERNEL_IMAGE_SIZE?
0813      */
0814     slots = 1 + (KERNEL_IMAGE_SIZE - minimum - image_size) / CONFIG_PHYSICAL_ALIGN;
0815 
0816     random_addr = kaslr_get_random_long("Virtual") % slots;
0817 
0818     return random_addr * CONFIG_PHYSICAL_ALIGN + minimum;
0819 }
0820 
0821 /*
0822  * Since this function examines addresses much more numerically,
0823  * it takes the input and output pointers as 'unsigned long'.
0824  */
0825 void choose_random_location(unsigned long input,
0826                 unsigned long input_size,
0827                 unsigned long *output,
0828                 unsigned long output_size,
0829                 unsigned long *virt_addr)
0830 {
0831     unsigned long random_addr, min_addr;
0832 
0833     if (cmdline_find_option_bool("nokaslr")) {
0834         warn("KASLR disabled: 'nokaslr' on cmdline.");
0835         return;
0836     }
0837 
0838     boot_params->hdr.loadflags |= KASLR_FLAG;
0839 
0840     if (IS_ENABLED(CONFIG_X86_32))
0841         mem_limit = KERNEL_IMAGE_SIZE;
0842     else
0843         mem_limit = MAXMEM;
0844 
0845     /* Record the various known unsafe memory ranges. */
0846     mem_avoid_init(input, input_size, *output);
0847 
0848     /*
0849      * Low end of the randomization range should be the
0850      * smaller of 512M or the initial kernel image
0851      * location:
0852      */
0853     min_addr = min(*output, 512UL << 20);
0854     /* Make sure minimum is aligned. */
0855     min_addr = ALIGN(min_addr, CONFIG_PHYSICAL_ALIGN);
0856 
0857     /* Walk available memory entries to find a random address. */
0858     random_addr = find_random_phys_addr(min_addr, output_size);
0859     if (!random_addr) {
0860         warn("Physical KASLR disabled: no suitable memory region!");
0861     } else {
0862         /* Update the new physical address location. */
0863         if (*output != random_addr)
0864             *output = random_addr;
0865     }
0866 
0867 
0868     /* Pick random virtual address starting from LOAD_PHYSICAL_ADDR. */
0869     if (IS_ENABLED(CONFIG_X86_64))
0870         random_addr = find_random_virt_addr(LOAD_PHYSICAL_ADDR, output_size);
0871     *virt_addr = random_addr;
0872 }