Back to home page

LXR

 
 

    


0001 /*
0002  * kexec.c - kexec_load system call
0003  * Copyright (C) 2002-2004 Eric Biederman  <ebiederm@xmission.com>
0004  *
0005  * This source code is licensed under the GNU General Public License,
0006  * Version 2.  See the file COPYING for more details.
0007  */
0008 
0009 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
0010 
0011 #include <linux/capability.h>
0012 #include <linux/mm.h>
0013 #include <linux/file.h>
0014 #include <linux/kexec.h>
0015 #include <linux/mutex.h>
0016 #include <linux/list.h>
0017 #include <linux/syscalls.h>
0018 #include <linux/vmalloc.h>
0019 #include <linux/slab.h>
0020 
0021 #include "kexec_internal.h"
0022 
0023 static int copy_user_segment_list(struct kimage *image,
0024                   unsigned long nr_segments,
0025                   struct kexec_segment __user *segments)
0026 {
0027     int ret;
0028     size_t segment_bytes;
0029 
0030     /* Read in the segments */
0031     image->nr_segments = nr_segments;
0032     segment_bytes = nr_segments * sizeof(*segments);
0033     ret = copy_from_user(image->segment, segments, segment_bytes);
0034     if (ret)
0035         ret = -EFAULT;
0036 
0037     return ret;
0038 }
0039 
0040 static int kimage_alloc_init(struct kimage **rimage, unsigned long entry,
0041                  unsigned long nr_segments,
0042                  struct kexec_segment __user *segments,
0043                  unsigned long flags)
0044 {
0045     int ret;
0046     struct kimage *image;
0047     bool kexec_on_panic = flags & KEXEC_ON_CRASH;
0048 
0049     if (kexec_on_panic) {
0050         /* Verify we have a valid entry point */
0051         if ((entry < phys_to_boot_phys(crashk_res.start)) ||
0052             (entry > phys_to_boot_phys(crashk_res.end)))
0053             return -EADDRNOTAVAIL;
0054     }
0055 
0056     /* Allocate and initialize a controlling structure */
0057     image = do_kimage_alloc_init();
0058     if (!image)
0059         return -ENOMEM;
0060 
0061     image->start = entry;
0062 
0063     ret = copy_user_segment_list(image, nr_segments, segments);
0064     if (ret)
0065         goto out_free_image;
0066 
0067     if (kexec_on_panic) {
0068         /* Enable special crash kernel control page alloc policy. */
0069         image->control_page = crashk_res.start;
0070         image->type = KEXEC_TYPE_CRASH;
0071     }
0072 
0073     ret = sanity_check_segment_list(image);
0074     if (ret)
0075         goto out_free_image;
0076 
0077     /*
0078      * Find a location for the control code buffer, and add it
0079      * the vector of segments so that it's pages will also be
0080      * counted as destination pages.
0081      */
0082     ret = -ENOMEM;
0083     image->control_code_page = kimage_alloc_control_pages(image,
0084                        get_order(KEXEC_CONTROL_PAGE_SIZE));
0085     if (!image->control_code_page) {
0086         pr_err("Could not allocate control_code_buffer\n");
0087         goto out_free_image;
0088     }
0089 
0090     if (!kexec_on_panic) {
0091         image->swap_page = kimage_alloc_control_pages(image, 0);
0092         if (!image->swap_page) {
0093             pr_err("Could not allocate swap buffer\n");
0094             goto out_free_control_pages;
0095         }
0096     }
0097 
0098     *rimage = image;
0099     return 0;
0100 out_free_control_pages:
0101     kimage_free_page_list(&image->control_pages);
0102 out_free_image:
0103     kfree(image);
0104     return ret;
0105 }
0106 
0107 static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
0108         struct kexec_segment __user *segments, unsigned long flags)
0109 {
0110     struct kimage **dest_image, *image;
0111     unsigned long i;
0112     int ret;
0113 
0114     if (flags & KEXEC_ON_CRASH) {
0115         dest_image = &kexec_crash_image;
0116         if (kexec_crash_image)
0117             arch_kexec_unprotect_crashkres();
0118     } else {
0119         dest_image = &kexec_image;
0120     }
0121 
0122     if (nr_segments == 0) {
0123         /* Uninstall image */
0124         kimage_free(xchg(dest_image, NULL));
0125         return 0;
0126     }
0127     if (flags & KEXEC_ON_CRASH) {
0128         /*
0129          * Loading another kernel to switch to if this one
0130          * crashes.  Free any current crash dump kernel before
0131          * we corrupt it.
0132          */
0133         kimage_free(xchg(&kexec_crash_image, NULL));
0134     }
0135 
0136     ret = kimage_alloc_init(&image, entry, nr_segments, segments, flags);
0137     if (ret)
0138         return ret;
0139 
0140     if (flags & KEXEC_PRESERVE_CONTEXT)
0141         image->preserve_context = 1;
0142 
0143     ret = machine_kexec_prepare(image);
0144     if (ret)
0145         goto out;
0146 
0147     for (i = 0; i < nr_segments; i++) {
0148         ret = kimage_load_segment(image, &image->segment[i]);
0149         if (ret)
0150             goto out;
0151     }
0152 
0153     kimage_terminate(image);
0154 
0155     /* Install the new kernel and uninstall the old */
0156     image = xchg(dest_image, image);
0157 
0158 out:
0159     if ((flags & KEXEC_ON_CRASH) && kexec_crash_image)
0160         arch_kexec_protect_crashkres();
0161 
0162     kimage_free(image);
0163     return ret;
0164 }
0165 
0166 /*
0167  * Exec Kernel system call: for obvious reasons only root may call it.
0168  *
0169  * This call breaks up into three pieces.
0170  * - A generic part which loads the new kernel from the current
0171  *   address space, and very carefully places the data in the
0172  *   allocated pages.
0173  *
0174  * - A generic part that interacts with the kernel and tells all of
0175  *   the devices to shut down.  Preventing on-going dmas, and placing
0176  *   the devices in a consistent state so a later kernel can
0177  *   reinitialize them.
0178  *
0179  * - A machine specific part that includes the syscall number
0180  *   and then copies the image to it's final destination.  And
0181  *   jumps into the image at entry.
0182  *
0183  * kexec does not sync, or unmount filesystems so if you need
0184  * that to happen you need to do that yourself.
0185  */
0186 
0187 SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments,
0188         struct kexec_segment __user *, segments, unsigned long, flags)
0189 {
0190     int result;
0191 
0192     /* We only trust the superuser with rebooting the system. */
0193     if (!capable(CAP_SYS_BOOT) || kexec_load_disabled)
0194         return -EPERM;
0195 
0196     /*
0197      * Verify we have a legal set of flags
0198      * This leaves us room for future extensions.
0199      */
0200     if ((flags & KEXEC_FLAGS) != (flags & ~KEXEC_ARCH_MASK))
0201         return -EINVAL;
0202 
0203     /* Verify we are on the appropriate architecture */
0204     if (((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH) &&
0205         ((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT))
0206         return -EINVAL;
0207 
0208     /* Put an artificial cap on the number
0209      * of segments passed to kexec_load.
0210      */
0211     if (nr_segments > KEXEC_SEGMENT_MAX)
0212         return -EINVAL;
0213 
0214     /* Because we write directly to the reserved memory
0215      * region when loading crash kernels we need a mutex here to
0216      * prevent multiple crash  kernels from attempting to load
0217      * simultaneously, and to prevent a crash kernel from loading
0218      * over the top of a in use crash kernel.
0219      *
0220      * KISS: always take the mutex.
0221      */
0222     if (!mutex_trylock(&kexec_mutex))
0223         return -EBUSY;
0224 
0225     result = do_kexec_load(entry, nr_segments, segments, flags);
0226 
0227     mutex_unlock(&kexec_mutex);
0228 
0229     return result;
0230 }
0231 
0232 #ifdef CONFIG_COMPAT
0233 COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry,
0234                compat_ulong_t, nr_segments,
0235                struct compat_kexec_segment __user *, segments,
0236                compat_ulong_t, flags)
0237 {
0238     struct compat_kexec_segment in;
0239     struct kexec_segment out, __user *ksegments;
0240     unsigned long i, result;
0241 
0242     /* Don't allow clients that don't understand the native
0243      * architecture to do anything.
0244      */
0245     if ((flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_DEFAULT)
0246         return -EINVAL;
0247 
0248     if (nr_segments > KEXEC_SEGMENT_MAX)
0249         return -EINVAL;
0250 
0251     ksegments = compat_alloc_user_space(nr_segments * sizeof(out));
0252     for (i = 0; i < nr_segments; i++) {
0253         result = copy_from_user(&in, &segments[i], sizeof(in));
0254         if (result)
0255             return -EFAULT;
0256 
0257         out.buf   = compat_ptr(in.buf);
0258         out.bufsz = in.bufsz;
0259         out.mem   = in.mem;
0260         out.memsz = in.memsz;
0261 
0262         result = copy_to_user(&ksegments[i], &out, sizeof(out));
0263         if (result)
0264             return -EFAULT;
0265     }
0266 
0267     return sys_kexec_load(entry, nr_segments, ksegments, flags);
0268 }
0269 #endif