Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 #include <linux/string.h>
0003 #include <linux/elf.h>
0004 #include <asm/boot_data.h>
0005 #include <asm/sections.h>
0006 #include <asm/cpu_mf.h>
0007 #include <asm/setup.h>
0008 #include <asm/kasan.h>
0009 #include <asm/kexec.h>
0010 #include <asm/sclp.h>
0011 #include <asm/diag.h>
0012 #include <asm/uv.h>
0013 #include "decompressor.h"
0014 #include "boot.h"
0015 #include "uv.h"
0016 
0017 unsigned long __bootdata_preserved(__kaslr_offset);
0018 unsigned long __bootdata(__amode31_base);
0019 unsigned long __bootdata_preserved(VMALLOC_START);
0020 unsigned long __bootdata_preserved(VMALLOC_END);
0021 struct page *__bootdata_preserved(vmemmap);
0022 unsigned long __bootdata_preserved(vmemmap_size);
0023 unsigned long __bootdata_preserved(MODULES_VADDR);
0024 unsigned long __bootdata_preserved(MODULES_END);
0025 unsigned long __bootdata(ident_map_size);
0026 int __bootdata(is_full_image) = 1;
0027 struct initrd_data __bootdata(initrd_data);
0028 
0029 u64 __bootdata_preserved(stfle_fac_list[16]);
0030 u64 __bootdata_preserved(alt_stfle_fac_list[16]);
0031 struct oldmem_data __bootdata_preserved(oldmem_data);
0032 
0033 void error(char *x)
0034 {
0035     sclp_early_printk("\n\n");
0036     sclp_early_printk(x);
0037     sclp_early_printk("\n\n -- System halted");
0038 
0039     disabled_wait();
0040 }
0041 
0042 static void setup_lpp(void)
0043 {
0044     S390_lowcore.current_pid = 0;
0045     S390_lowcore.lpp = LPP_MAGIC;
0046     if (test_facility(40))
0047         lpp(&S390_lowcore.lpp);
0048 }
0049 
0050 #ifdef CONFIG_KERNEL_UNCOMPRESSED
0051 unsigned long mem_safe_offset(void)
0052 {
0053     return vmlinux.default_lma + vmlinux.image_size + vmlinux.bss_size;
0054 }
0055 #endif
0056 
0057 static void rescue_initrd(unsigned long addr)
0058 {
0059     if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD))
0060         return;
0061     if (!initrd_data.start || !initrd_data.size)
0062         return;
0063     if (addr <= initrd_data.start)
0064         return;
0065     memmove((void *)addr, (void *)initrd_data.start, initrd_data.size);
0066     initrd_data.start = addr;
0067 }
0068 
0069 static void copy_bootdata(void)
0070 {
0071     if (__boot_data_end - __boot_data_start != vmlinux.bootdata_size)
0072         error(".boot.data section size mismatch");
0073     memcpy((void *)vmlinux.bootdata_off, __boot_data_start, vmlinux.bootdata_size);
0074     if (__boot_data_preserved_end - __boot_data_preserved_start != vmlinux.bootdata_preserved_size)
0075         error(".boot.preserved.data section size mismatch");
0076     memcpy((void *)vmlinux.bootdata_preserved_off, __boot_data_preserved_start, vmlinux.bootdata_preserved_size);
0077 }
0078 
0079 static void handle_relocs(unsigned long offset)
0080 {
0081     Elf64_Rela *rela_start, *rela_end, *rela;
0082     int r_type, r_sym, rc;
0083     Elf64_Addr loc, val;
0084     Elf64_Sym *dynsym;
0085 
0086     rela_start = (Elf64_Rela *) vmlinux.rela_dyn_start;
0087     rela_end = (Elf64_Rela *) vmlinux.rela_dyn_end;
0088     dynsym = (Elf64_Sym *) vmlinux.dynsym_start;
0089     for (rela = rela_start; rela < rela_end; rela++) {
0090         loc = rela->r_offset + offset;
0091         val = rela->r_addend;
0092         r_sym = ELF64_R_SYM(rela->r_info);
0093         if (r_sym) {
0094             if (dynsym[r_sym].st_shndx != SHN_UNDEF)
0095                 val += dynsym[r_sym].st_value + offset;
0096         } else {
0097             /*
0098              * 0 == undefined symbol table index (STN_UNDEF),
0099              * used for R_390_RELATIVE, only add KASLR offset
0100              */
0101             val += offset;
0102         }
0103         r_type = ELF64_R_TYPE(rela->r_info);
0104         rc = arch_kexec_do_relocs(r_type, (void *) loc, val, 0);
0105         if (rc)
0106             error("Unknown relocation type");
0107     }
0108 }
0109 
0110 /*
0111  * Merge information from several sources into a single ident_map_size value.
0112  * "ident_map_size" represents the upper limit of physical memory we may ever
0113  * reach. It might not be all online memory, but also include standby (offline)
0114  * memory. "ident_map_size" could be lower then actual standby or even online
0115  * memory present, due to limiting factors. We should never go above this limit.
0116  * It is the size of our identity mapping.
0117  *
0118  * Consider the following factors:
0119  * 1. max_physmem_end - end of physical memory online or standby.
0120  *    Always <= end of the last online memory block (get_mem_detect_end()).
0121  * 2. CONFIG_MAX_PHYSMEM_BITS - the maximum size of physical memory the
0122  *    kernel is able to support.
0123  * 3. "mem=" kernel command line option which limits physical memory usage.
0124  * 4. OLDMEM_BASE which is a kdump memory limit when the kernel is executed as
0125  *    crash kernel.
0126  * 5. "hsa" size which is a memory limit when the kernel is executed during
0127  *    zfcp/nvme dump.
0128  */
0129 static void setup_ident_map_size(unsigned long max_physmem_end)
0130 {
0131     unsigned long hsa_size;
0132 
0133     ident_map_size = max_physmem_end;
0134     if (memory_limit)
0135         ident_map_size = min(ident_map_size, memory_limit);
0136     ident_map_size = min(ident_map_size, 1UL << MAX_PHYSMEM_BITS);
0137 
0138 #ifdef CONFIG_CRASH_DUMP
0139     if (oldmem_data.start) {
0140         kaslr_enabled = 0;
0141         ident_map_size = min(ident_map_size, oldmem_data.size);
0142     } else if (ipl_block_valid && is_ipl_block_dump()) {
0143         kaslr_enabled = 0;
0144         if (!sclp_early_get_hsa_size(&hsa_size) && hsa_size)
0145             ident_map_size = min(ident_map_size, hsa_size);
0146     }
0147 #endif
0148 }
0149 
0150 static void setup_kernel_memory_layout(void)
0151 {
0152     unsigned long vmemmap_start;
0153     unsigned long rte_size;
0154     unsigned long pages;
0155     unsigned long vmax;
0156 
0157     pages = ident_map_size / PAGE_SIZE;
0158     /* vmemmap contains a multiple of PAGES_PER_SECTION struct pages */
0159     vmemmap_size = SECTION_ALIGN_UP(pages) * sizeof(struct page);
0160 
0161     /* choose kernel address space layout: 4 or 3 levels. */
0162     vmemmap_start = round_up(ident_map_size, _REGION3_SIZE);
0163     if (IS_ENABLED(CONFIG_KASAN) ||
0164         vmalloc_size > _REGION2_SIZE ||
0165         vmemmap_start + vmemmap_size + vmalloc_size + MODULES_LEN >
0166             _REGION2_SIZE) {
0167         vmax = _REGION1_SIZE;
0168         rte_size = _REGION2_SIZE;
0169     } else {
0170         vmax = _REGION2_SIZE;
0171         rte_size = _REGION3_SIZE;
0172     }
0173     /*
0174      * forcing modules and vmalloc area under the ultravisor
0175      * secure storage limit, so that any vmalloc allocation
0176      * we do could be used to back secure guest storage.
0177      */
0178     vmax = adjust_to_uv_max(vmax);
0179 #ifdef CONFIG_KASAN
0180     /* force vmalloc and modules below kasan shadow */
0181     vmax = min(vmax, KASAN_SHADOW_START);
0182 #endif
0183     MODULES_END = vmax;
0184     MODULES_VADDR = MODULES_END - MODULES_LEN;
0185     VMALLOC_END = MODULES_VADDR;
0186 
0187     /* allow vmalloc area to occupy up to about 1/2 of the rest virtual space left */
0188     vmalloc_size = min(vmalloc_size, round_down(VMALLOC_END / 2, _REGION3_SIZE));
0189     VMALLOC_START = VMALLOC_END - vmalloc_size;
0190 
0191     /* split remaining virtual space between 1:1 mapping & vmemmap array */
0192     pages = VMALLOC_START / (PAGE_SIZE + sizeof(struct page));
0193     pages = SECTION_ALIGN_UP(pages);
0194     /* keep vmemmap_start aligned to a top level region table entry */
0195     vmemmap_start = round_down(VMALLOC_START - pages * sizeof(struct page), rte_size);
0196     /* vmemmap_start is the future VMEM_MAX_PHYS, make sure it is within MAX_PHYSMEM */
0197     vmemmap_start = min(vmemmap_start, 1UL << MAX_PHYSMEM_BITS);
0198     /* make sure identity map doesn't overlay with vmemmap */
0199     ident_map_size = min(ident_map_size, vmemmap_start);
0200     vmemmap_size = SECTION_ALIGN_UP(ident_map_size / PAGE_SIZE) * sizeof(struct page);
0201     /* make sure vmemmap doesn't overlay with vmalloc area */
0202     VMALLOC_START = max(vmemmap_start + vmemmap_size, VMALLOC_START);
0203     vmemmap = (struct page *)vmemmap_start;
0204 }
0205 
0206 /*
0207  * This function clears the BSS section of the decompressed Linux kernel and NOT the decompressor's.
0208  */
0209 static void clear_bss_section(void)
0210 {
0211     memset((void *)vmlinux.default_lma + vmlinux.image_size, 0, vmlinux.bss_size);
0212 }
0213 
0214 /*
0215  * Set vmalloc area size to an 8th of (potential) physical memory
0216  * size, unless size has been set by kernel command line parameter.
0217  */
0218 static void setup_vmalloc_size(void)
0219 {
0220     unsigned long size;
0221 
0222     if (vmalloc_size_set)
0223         return;
0224     size = round_up(ident_map_size / 8, _SEGMENT_SIZE);
0225     vmalloc_size = max(size, vmalloc_size);
0226 }
0227 
0228 static void offset_vmlinux_info(unsigned long offset)
0229 {
0230     vmlinux.default_lma += offset;
0231     *(unsigned long *)(&vmlinux.entry) += offset;
0232     vmlinux.bootdata_off += offset;
0233     vmlinux.bootdata_preserved_off += offset;
0234     vmlinux.rela_dyn_start += offset;
0235     vmlinux.rela_dyn_end += offset;
0236     vmlinux.dynsym_start += offset;
0237 }
0238 
0239 static unsigned long reserve_amode31(unsigned long safe_addr)
0240 {
0241     __amode31_base = PAGE_ALIGN(safe_addr);
0242     return safe_addr + vmlinux.amode31_size;
0243 }
0244 
0245 void startup_kernel(void)
0246 {
0247     unsigned long random_lma;
0248     unsigned long safe_addr;
0249     void *img;
0250 
0251     initrd_data.start = parmarea.initrd_start;
0252     initrd_data.size = parmarea.initrd_size;
0253     oldmem_data.start = parmarea.oldmem_base;
0254     oldmem_data.size = parmarea.oldmem_size;
0255 
0256     setup_lpp();
0257     store_ipl_parmblock();
0258     safe_addr = mem_safe_offset();
0259     safe_addr = reserve_amode31(safe_addr);
0260     safe_addr = read_ipl_report(safe_addr);
0261     uv_query_info();
0262     rescue_initrd(safe_addr);
0263     sclp_early_read_info();
0264     setup_boot_command_line();
0265     parse_boot_command_line();
0266     sanitize_prot_virt_host();
0267     setup_ident_map_size(detect_memory());
0268     setup_vmalloc_size();
0269     setup_kernel_memory_layout();
0270 
0271     if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_enabled) {
0272         random_lma = get_random_base(safe_addr);
0273         if (random_lma) {
0274             __kaslr_offset = random_lma - vmlinux.default_lma;
0275             img = (void *)vmlinux.default_lma;
0276             offset_vmlinux_info(__kaslr_offset);
0277         }
0278     }
0279 
0280     if (!IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED)) {
0281         img = decompress_kernel();
0282         memmove((void *)vmlinux.default_lma, img, vmlinux.image_size);
0283     } else if (__kaslr_offset)
0284         memcpy((void *)vmlinux.default_lma, img, vmlinux.image_size);
0285 
0286     clear_bss_section();
0287     copy_bootdata();
0288     if (IS_ENABLED(CONFIG_RELOCATABLE))
0289         handle_relocs(__kaslr_offset);
0290 
0291     if (__kaslr_offset) {
0292         /*
0293          * Save KASLR offset for early dumps, before vmcore_info is set.
0294          * Mark as uneven to distinguish from real vmcore_info pointer.
0295          */
0296         S390_lowcore.vmcore_info = __kaslr_offset | 0x1UL;
0297         /* Clear non-relocated kernel */
0298         if (IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED))
0299             memset(img, 0, vmlinux.image_size);
0300     }
0301     vmlinux.entry();
0302 }