Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * kexec.c - kexec_load system call
0004  * Copyright (C) 2002-2004 Eric Biederman  <ebiederm@xmission.com>
0005  */
0006 
0007 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
0008 
0009 #include <linux/capability.h>
0010 #include <linux/mm.h>
0011 #include <linux/file.h>
0012 #include <linux/security.h>
0013 #include <linux/kexec.h>
0014 #include <linux/mutex.h>
0015 #include <linux/list.h>
0016 #include <linux/syscalls.h>
0017 #include <linux/vmalloc.h>
0018 #include <linux/slab.h>
0019 
0020 #include "kexec_internal.h"
0021 
0022 static int kimage_alloc_init(struct kimage **rimage, unsigned long entry,
0023                  unsigned long nr_segments,
0024                  struct kexec_segment *segments,
0025                  unsigned long flags)
0026 {
0027     int ret;
0028     struct kimage *image;
0029     bool kexec_on_panic = flags & KEXEC_ON_CRASH;
0030 
0031     if (kexec_on_panic) {
0032         /* Verify we have a valid entry point */
0033         if ((entry < phys_to_boot_phys(crashk_res.start)) ||
0034             (entry > phys_to_boot_phys(crashk_res.end)))
0035             return -EADDRNOTAVAIL;
0036     }
0037 
0038     /* Allocate and initialize a controlling structure */
0039     image = do_kimage_alloc_init();
0040     if (!image)
0041         return -ENOMEM;
0042 
0043     image->start = entry;
0044     image->nr_segments = nr_segments;
0045     memcpy(image->segment, segments, nr_segments * sizeof(*segments));
0046 
0047     if (kexec_on_panic) {
0048         /* Enable special crash kernel control page alloc policy. */
0049         image->control_page = crashk_res.start;
0050         image->type = KEXEC_TYPE_CRASH;
0051     }
0052 
0053     ret = sanity_check_segment_list(image);
0054     if (ret)
0055         goto out_free_image;
0056 
0057     /*
0058      * Find a location for the control code buffer, and add it
0059      * the vector of segments so that it's pages will also be
0060      * counted as destination pages.
0061      */
0062     ret = -ENOMEM;
0063     image->control_code_page = kimage_alloc_control_pages(image,
0064                        get_order(KEXEC_CONTROL_PAGE_SIZE));
0065     if (!image->control_code_page) {
0066         pr_err("Could not allocate control_code_buffer\n");
0067         goto out_free_image;
0068     }
0069 
0070     if (!kexec_on_panic) {
0071         image->swap_page = kimage_alloc_control_pages(image, 0);
0072         if (!image->swap_page) {
0073             pr_err("Could not allocate swap buffer\n");
0074             goto out_free_control_pages;
0075         }
0076     }
0077 
0078     *rimage = image;
0079     return 0;
0080 out_free_control_pages:
0081     kimage_free_page_list(&image->control_pages);
0082 out_free_image:
0083     kfree(image);
0084     return ret;
0085 }
0086 
0087 static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
0088         struct kexec_segment *segments, unsigned long flags)
0089 {
0090     struct kimage **dest_image, *image;
0091     unsigned long i;
0092     int ret;
0093 
0094     /*
0095      * Because we write directly to the reserved memory region when loading
0096      * crash kernels we need a mutex here to prevent multiple crash kernels
0097      * from attempting to load simultaneously, and to prevent a crash kernel
0098      * from loading over the top of a in use crash kernel.
0099      *
0100      * KISS: always take the mutex.
0101      */
0102     if (!mutex_trylock(&kexec_mutex))
0103         return -EBUSY;
0104 
0105     if (flags & KEXEC_ON_CRASH) {
0106         dest_image = &kexec_crash_image;
0107         if (kexec_crash_image)
0108             arch_kexec_unprotect_crashkres();
0109     } else {
0110         dest_image = &kexec_image;
0111     }
0112 
0113     if (nr_segments == 0) {
0114         /* Uninstall image */
0115         kimage_free(xchg(dest_image, NULL));
0116         ret = 0;
0117         goto out_unlock;
0118     }
0119     if (flags & KEXEC_ON_CRASH) {
0120         /*
0121          * Loading another kernel to switch to if this one
0122          * crashes.  Free any current crash dump kernel before
0123          * we corrupt it.
0124          */
0125         kimage_free(xchg(&kexec_crash_image, NULL));
0126     }
0127 
0128     ret = kimage_alloc_init(&image, entry, nr_segments, segments, flags);
0129     if (ret)
0130         goto out_unlock;
0131 
0132     if (flags & KEXEC_PRESERVE_CONTEXT)
0133         image->preserve_context = 1;
0134 
0135     ret = machine_kexec_prepare(image);
0136     if (ret)
0137         goto out;
0138 
0139     /*
0140      * Some architecture(like S390) may touch the crash memory before
0141      * machine_kexec_prepare(), we must copy vmcoreinfo data after it.
0142      */
0143     ret = kimage_crash_copy_vmcoreinfo(image);
0144     if (ret)
0145         goto out;
0146 
0147     for (i = 0; i < nr_segments; i++) {
0148         ret = kimage_load_segment(image, &image->segment[i]);
0149         if (ret)
0150             goto out;
0151     }
0152 
0153     kimage_terminate(image);
0154 
0155     ret = machine_kexec_post_load(image);
0156     if (ret)
0157         goto out;
0158 
0159     /* Install the new kernel and uninstall the old */
0160     image = xchg(dest_image, image);
0161 
0162 out:
0163     if ((flags & KEXEC_ON_CRASH) && kexec_crash_image)
0164         arch_kexec_protect_crashkres();
0165 
0166     kimage_free(image);
0167 out_unlock:
0168     mutex_unlock(&kexec_mutex);
0169     return ret;
0170 }
0171 
0172 /*
0173  * Exec Kernel system call: for obvious reasons only root may call it.
0174  *
0175  * This call breaks up into three pieces.
0176  * - A generic part which loads the new kernel from the current
0177  *   address space, and very carefully places the data in the
0178  *   allocated pages.
0179  *
0180  * - A generic part that interacts with the kernel and tells all of
0181  *   the devices to shut down.  Preventing on-going dmas, and placing
0182  *   the devices in a consistent state so a later kernel can
0183  *   reinitialize them.
0184  *
0185  * - A machine specific part that includes the syscall number
0186  *   and then copies the image to it's final destination.  And
0187  *   jumps into the image at entry.
0188  *
0189  * kexec does not sync, or unmount filesystems so if you need
0190  * that to happen you need to do that yourself.
0191  */
0192 
0193 static inline int kexec_load_check(unsigned long nr_segments,
0194                    unsigned long flags)
0195 {
0196     int result;
0197 
0198     /* We only trust the superuser with rebooting the system. */
0199     if (!capable(CAP_SYS_BOOT) || kexec_load_disabled)
0200         return -EPERM;
0201 
0202     /* Permit LSMs and IMA to fail the kexec */
0203     result = security_kernel_load_data(LOADING_KEXEC_IMAGE, false);
0204     if (result < 0)
0205         return result;
0206 
0207     /*
0208      * kexec can be used to circumvent module loading restrictions, so
0209      * prevent loading in that case
0210      */
0211     result = security_locked_down(LOCKDOWN_KEXEC);
0212     if (result)
0213         return result;
0214 
0215     /*
0216      * Verify we have a legal set of flags
0217      * This leaves us room for future extensions.
0218      */
0219     if ((flags & KEXEC_FLAGS) != (flags & ~KEXEC_ARCH_MASK))
0220         return -EINVAL;
0221 
0222     /* Put an artificial cap on the number
0223      * of segments passed to kexec_load.
0224      */
0225     if (nr_segments > KEXEC_SEGMENT_MAX)
0226         return -EINVAL;
0227 
0228     return 0;
0229 }
0230 
0231 SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments,
0232         struct kexec_segment __user *, segments, unsigned long, flags)
0233 {
0234     struct kexec_segment *ksegments;
0235     unsigned long result;
0236 
0237     result = kexec_load_check(nr_segments, flags);
0238     if (result)
0239         return result;
0240 
0241     /* Verify we are on the appropriate architecture */
0242     if (((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH) &&
0243         ((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT))
0244         return -EINVAL;
0245 
0246     ksegments = memdup_user(segments, nr_segments * sizeof(ksegments[0]));
0247     if (IS_ERR(ksegments))
0248         return PTR_ERR(ksegments);
0249 
0250     result = do_kexec_load(entry, nr_segments, ksegments, flags);
0251     kfree(ksegments);
0252 
0253     return result;
0254 }
0255 
0256 #ifdef CONFIG_COMPAT
0257 COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry,
0258                compat_ulong_t, nr_segments,
0259                struct compat_kexec_segment __user *, segments,
0260                compat_ulong_t, flags)
0261 {
0262     struct compat_kexec_segment in;
0263     struct kexec_segment *ksegments;
0264     unsigned long i, result;
0265 
0266     result = kexec_load_check(nr_segments, flags);
0267     if (result)
0268         return result;
0269 
0270     /* Don't allow clients that don't understand the native
0271      * architecture to do anything.
0272      */
0273     if ((flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_DEFAULT)
0274         return -EINVAL;
0275 
0276     ksegments = kmalloc_array(nr_segments, sizeof(ksegments[0]),
0277             GFP_KERNEL);
0278     if (!ksegments)
0279         return -ENOMEM;
0280 
0281     for (i = 0; i < nr_segments; i++) {
0282         result = copy_from_user(&in, &segments[i], sizeof(in));
0283         if (result)
0284             goto fail;
0285 
0286         ksegments[i].buf   = compat_ptr(in.buf);
0287         ksegments[i].bufsz = in.bufsz;
0288         ksegments[i].mem   = in.mem;
0289         ksegments[i].memsz = in.memsz;
0290     }
0291 
0292     result = do_kexec_load(entry, nr_segments, ksegments, flags);
0293 
0294 fail:
0295     kfree(ksegments);
0296     return result;
0297 }
0298 #endif