Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * ppc64 code to implement the kexec_file_load syscall
0004  *
0005  * Copyright (C) 2004  Adam Litke (agl@us.ibm.com)
0006  * Copyright (C) 2004  IBM Corp.
0007  * Copyright (C) 2004,2005  Milton D Miller II, IBM Corporation
0008  * Copyright (C) 2005  R Sharada (sharada@in.ibm.com)
0009  * Copyright (C) 2006  Mohan Kumar M (mohan@in.ibm.com)
0010  * Copyright (C) 2020  IBM Corporation
0011  *
0012  * Based on kexec-tools' kexec-ppc64.c, kexec-elf-rel-ppc64.c, fs2dt.c.
0013  * Heavily modified for the kernel by
0014  * Hari Bathini, IBM Corporation.
0015  */
0016 
0017 #include <linux/kexec.h>
0018 #include <linux/of_fdt.h>
0019 #include <linux/libfdt.h>
0020 #include <linux/of_device.h>
0021 #include <linux/memblock.h>
0022 #include <linux/slab.h>
0023 #include <linux/vmalloc.h>
0024 #include <asm/setup.h>
0025 #include <asm/drmem.h>
0026 #include <asm/firmware.h>
0027 #include <asm/kexec_ranges.h>
0028 #include <asm/crashdump-ppc64.h>
0029 
0030 struct umem_info {
0031     u64 *buf;       /* data buffer for usable-memory property */
0032     u32 size;       /* size allocated for the data buffer */
0033     u32 max_entries;    /* maximum no. of entries */
0034     u32 idx;        /* index of current entry */
0035 
0036     /* usable memory ranges to look up */
0037     unsigned int nr_ranges;
0038     const struct crash_mem_range *ranges;
0039 };
0040 
0041 const struct kexec_file_ops * const kexec_file_loaders[] = {
0042     &kexec_elf64_ops,
0043     NULL
0044 };
0045 
0046 /**
0047  * get_exclude_memory_ranges - Get exclude memory ranges. This list includes
0048  *                             regions like opal/rtas, tce-table, initrd,
0049  *                             kernel, htab which should be avoided while
0050  *                             setting up kexec load segments.
0051  * @mem_ranges:                Range list to add the memory ranges to.
0052  *
0053  * Returns 0 on success, negative errno on error.
0054  */
0055 static int get_exclude_memory_ranges(struct crash_mem **mem_ranges)
0056 {
0057     int ret;
0058 
0059     ret = add_tce_mem_ranges(mem_ranges);
0060     if (ret)
0061         goto out;
0062 
0063     ret = add_initrd_mem_range(mem_ranges);
0064     if (ret)
0065         goto out;
0066 
0067     ret = add_htab_mem_range(mem_ranges);
0068     if (ret)
0069         goto out;
0070 
0071     ret = add_kernel_mem_range(mem_ranges);
0072     if (ret)
0073         goto out;
0074 
0075     ret = add_rtas_mem_range(mem_ranges);
0076     if (ret)
0077         goto out;
0078 
0079     ret = add_opal_mem_range(mem_ranges);
0080     if (ret)
0081         goto out;
0082 
0083     ret = add_reserved_mem_ranges(mem_ranges);
0084     if (ret)
0085         goto out;
0086 
0087     /* exclude memory ranges should be sorted for easy lookup */
0088     sort_memory_ranges(*mem_ranges, true);
0089 out:
0090     if (ret)
0091         pr_err("Failed to setup exclude memory ranges\n");
0092     return ret;
0093 }
0094 
0095 /**
0096  * get_usable_memory_ranges - Get usable memory ranges. This list includes
0097  *                            regions like crashkernel, opal/rtas & tce-table,
0098  *                            that kdump kernel could use.
0099  * @mem_ranges:               Range list to add the memory ranges to.
0100  *
0101  * Returns 0 on success, negative errno on error.
0102  */
0103 static int get_usable_memory_ranges(struct crash_mem **mem_ranges)
0104 {
0105     int ret;
0106 
0107     /*
0108      * Early boot failure observed on guests when low memory (first memory
0109      * block?) is not added to usable memory. So, add [0, crashk_res.end]
0110      * instead of [crashk_res.start, crashk_res.end] to workaround it.
0111      * Also, crashed kernel's memory must be added to reserve map to
0112      * avoid kdump kernel from using it.
0113      */
0114     ret = add_mem_range(mem_ranges, 0, crashk_res.end + 1);
0115     if (ret)
0116         goto out;
0117 
0118     ret = add_rtas_mem_range(mem_ranges);
0119     if (ret)
0120         goto out;
0121 
0122     ret = add_opal_mem_range(mem_ranges);
0123     if (ret)
0124         goto out;
0125 
0126     ret = add_tce_mem_ranges(mem_ranges);
0127 out:
0128     if (ret)
0129         pr_err("Failed to setup usable memory ranges\n");
0130     return ret;
0131 }
0132 
0133 /**
0134  * get_crash_memory_ranges - Get crash memory ranges. This list includes
0135  *                           first/crashing kernel's memory regions that
0136  *                           would be exported via an elfcore.
0137  * @mem_ranges:              Range list to add the memory ranges to.
0138  *
0139  * Returns 0 on success, negative errno on error.
0140  */
0141 static int get_crash_memory_ranges(struct crash_mem **mem_ranges)
0142 {
0143     phys_addr_t base, end;
0144     struct crash_mem *tmem;
0145     u64 i;
0146     int ret;
0147 
0148     for_each_mem_range(i, &base, &end) {
0149         u64 size = end - base;
0150 
0151         /* Skip backup memory region, which needs a separate entry */
0152         if (base == BACKUP_SRC_START) {
0153             if (size > BACKUP_SRC_SIZE) {
0154                 base = BACKUP_SRC_END + 1;
0155                 size -= BACKUP_SRC_SIZE;
0156             } else
0157                 continue;
0158         }
0159 
0160         ret = add_mem_range(mem_ranges, base, size);
0161         if (ret)
0162             goto out;
0163 
0164         /* Try merging adjacent ranges before reallocation attempt */
0165         if ((*mem_ranges)->nr_ranges == (*mem_ranges)->max_nr_ranges)
0166             sort_memory_ranges(*mem_ranges, true);
0167     }
0168 
0169     /* Reallocate memory ranges if there is no space to split ranges */
0170     tmem = *mem_ranges;
0171     if (tmem && (tmem->nr_ranges == tmem->max_nr_ranges)) {
0172         tmem = realloc_mem_ranges(mem_ranges);
0173         if (!tmem)
0174             goto out;
0175     }
0176 
0177     /* Exclude crashkernel region */
0178     ret = crash_exclude_mem_range(tmem, crashk_res.start, crashk_res.end);
0179     if (ret)
0180         goto out;
0181 
0182     /*
0183      * FIXME: For now, stay in parity with kexec-tools but if RTAS/OPAL
0184      *        regions are exported to save their context at the time of
0185      *        crash, they should actually be backed up just like the
0186      *        first 64K bytes of memory.
0187      */
0188     ret = add_rtas_mem_range(mem_ranges);
0189     if (ret)
0190         goto out;
0191 
0192     ret = add_opal_mem_range(mem_ranges);
0193     if (ret)
0194         goto out;
0195 
0196     /* create a separate program header for the backup region */
0197     ret = add_mem_range(mem_ranges, BACKUP_SRC_START, BACKUP_SRC_SIZE);
0198     if (ret)
0199         goto out;
0200 
0201     sort_memory_ranges(*mem_ranges, false);
0202 out:
0203     if (ret)
0204         pr_err("Failed to setup crash memory ranges\n");
0205     return ret;
0206 }
0207 
0208 /**
0209  * get_reserved_memory_ranges - Get reserve memory ranges. This list includes
0210  *                              memory regions that should be added to the
0211  *                              memory reserve map to ensure the region is
0212  *                              protected from any mischief.
0213  * @mem_ranges:                 Range list to add the memory ranges to.
0214  *
0215  * Returns 0 on success, negative errno on error.
0216  */
0217 static int get_reserved_memory_ranges(struct crash_mem **mem_ranges)
0218 {
0219     int ret;
0220 
0221     ret = add_rtas_mem_range(mem_ranges);
0222     if (ret)
0223         goto out;
0224 
0225     ret = add_tce_mem_ranges(mem_ranges);
0226     if (ret)
0227         goto out;
0228 
0229     ret = add_reserved_mem_ranges(mem_ranges);
0230 out:
0231     if (ret)
0232         pr_err("Failed to setup reserved memory ranges\n");
0233     return ret;
0234 }
0235 
0236 /**
0237  * __locate_mem_hole_top_down - Looks top down for a large enough memory hole
0238  *                              in the memory regions between buf_min & buf_max
0239  *                              for the buffer. If found, sets kbuf->mem.
0240  * @kbuf:                       Buffer contents and memory parameters.
0241  * @buf_min:                    Minimum address for the buffer.
0242  * @buf_max:                    Maximum address for the buffer.
0243  *
0244  * Returns 0 on success, negative errno on error.
0245  */
0246 static int __locate_mem_hole_top_down(struct kexec_buf *kbuf,
0247                       u64 buf_min, u64 buf_max)
0248 {
0249     int ret = -EADDRNOTAVAIL;
0250     phys_addr_t start, end;
0251     u64 i;
0252 
0253     for_each_mem_range_rev(i, &start, &end) {
0254         /*
0255          * memblock uses [start, end) convention while it is
0256          * [start, end] here. Fix the off-by-one to have the
0257          * same convention.
0258          */
0259         end -= 1;
0260 
0261         if (start > buf_max)
0262             continue;
0263 
0264         /* Memory hole not found */
0265         if (end < buf_min)
0266             break;
0267 
0268         /* Adjust memory region based on the given range */
0269         if (start < buf_min)
0270             start = buf_min;
0271         if (end > buf_max)
0272             end = buf_max;
0273 
0274         start = ALIGN(start, kbuf->buf_align);
0275         if (start < end && (end - start + 1) >= kbuf->memsz) {
0276             /* Suitable memory range found. Set kbuf->mem */
0277             kbuf->mem = ALIGN_DOWN(end - kbuf->memsz + 1,
0278                            kbuf->buf_align);
0279             ret = 0;
0280             break;
0281         }
0282     }
0283 
0284     return ret;
0285 }
0286 
0287 /**
0288  * locate_mem_hole_top_down_ppc64 - Skip special memory regions to find a
0289  *                                  suitable buffer with top down approach.
0290  * @kbuf:                           Buffer contents and memory parameters.
0291  * @buf_min:                        Minimum address for the buffer.
0292  * @buf_max:                        Maximum address for the buffer.
0293  * @emem:                           Exclude memory ranges.
0294  *
0295  * Returns 0 on success, negative errno on error.
0296  */
0297 static int locate_mem_hole_top_down_ppc64(struct kexec_buf *kbuf,
0298                       u64 buf_min, u64 buf_max,
0299                       const struct crash_mem *emem)
0300 {
0301     int i, ret = 0, err = -EADDRNOTAVAIL;
0302     u64 start, end, tmin, tmax;
0303 
0304     tmax = buf_max;
0305     for (i = (emem->nr_ranges - 1); i >= 0; i--) {
0306         start = emem->ranges[i].start;
0307         end = emem->ranges[i].end;
0308 
0309         if (start > tmax)
0310             continue;
0311 
0312         if (end < tmax) {
0313             tmin = (end < buf_min ? buf_min : end + 1);
0314             ret = __locate_mem_hole_top_down(kbuf, tmin, tmax);
0315             if (!ret)
0316                 return 0;
0317         }
0318 
0319         tmax = start - 1;
0320 
0321         if (tmax < buf_min) {
0322             ret = err;
0323             break;
0324         }
0325         ret = 0;
0326     }
0327 
0328     if (!ret) {
0329         tmin = buf_min;
0330         ret = __locate_mem_hole_top_down(kbuf, tmin, tmax);
0331     }
0332     return ret;
0333 }
0334 
0335 /**
0336  * __locate_mem_hole_bottom_up - Looks bottom up for a large enough memory hole
0337  *                               in the memory regions between buf_min & buf_max
0338  *                               for the buffer. If found, sets kbuf->mem.
0339  * @kbuf:                        Buffer contents and memory parameters.
0340  * @buf_min:                     Minimum address for the buffer.
0341  * @buf_max:                     Maximum address for the buffer.
0342  *
0343  * Returns 0 on success, negative errno on error.
0344  */
0345 static int __locate_mem_hole_bottom_up(struct kexec_buf *kbuf,
0346                        u64 buf_min, u64 buf_max)
0347 {
0348     int ret = -EADDRNOTAVAIL;
0349     phys_addr_t start, end;
0350     u64 i;
0351 
0352     for_each_mem_range(i, &start, &end) {
0353         /*
0354          * memblock uses [start, end) convention while it is
0355          * [start, end] here. Fix the off-by-one to have the
0356          * same convention.
0357          */
0358         end -= 1;
0359 
0360         if (end < buf_min)
0361             continue;
0362 
0363         /* Memory hole not found */
0364         if (start > buf_max)
0365             break;
0366 
0367         /* Adjust memory region based on the given range */
0368         if (start < buf_min)
0369             start = buf_min;
0370         if (end > buf_max)
0371             end = buf_max;
0372 
0373         start = ALIGN(start, kbuf->buf_align);
0374         if (start < end && (end - start + 1) >= kbuf->memsz) {
0375             /* Suitable memory range found. Set kbuf->mem */
0376             kbuf->mem = start;
0377             ret = 0;
0378             break;
0379         }
0380     }
0381 
0382     return ret;
0383 }
0384 
0385 /**
0386  * locate_mem_hole_bottom_up_ppc64 - Skip special memory regions to find a
0387  *                                   suitable buffer with bottom up approach.
0388  * @kbuf:                            Buffer contents and memory parameters.
0389  * @buf_min:                         Minimum address for the buffer.
0390  * @buf_max:                         Maximum address for the buffer.
0391  * @emem:                            Exclude memory ranges.
0392  *
0393  * Returns 0 on success, negative errno on error.
0394  */
0395 static int locate_mem_hole_bottom_up_ppc64(struct kexec_buf *kbuf,
0396                        u64 buf_min, u64 buf_max,
0397                        const struct crash_mem *emem)
0398 {
0399     int i, ret = 0, err = -EADDRNOTAVAIL;
0400     u64 start, end, tmin, tmax;
0401 
0402     tmin = buf_min;
0403     for (i = 0; i < emem->nr_ranges; i++) {
0404         start = emem->ranges[i].start;
0405         end = emem->ranges[i].end;
0406 
0407         if (end < tmin)
0408             continue;
0409 
0410         if (start > tmin) {
0411             tmax = (start > buf_max ? buf_max : start - 1);
0412             ret = __locate_mem_hole_bottom_up(kbuf, tmin, tmax);
0413             if (!ret)
0414                 return 0;
0415         }
0416 
0417         tmin = end + 1;
0418 
0419         if (tmin > buf_max) {
0420             ret = err;
0421             break;
0422         }
0423         ret = 0;
0424     }
0425 
0426     if (!ret) {
0427         tmax = buf_max;
0428         ret = __locate_mem_hole_bottom_up(kbuf, tmin, tmax);
0429     }
0430     return ret;
0431 }
0432 
0433 /**
0434  * check_realloc_usable_mem - Reallocate buffer if it can't accommodate entries
0435  * @um_info:                  Usable memory buffer and ranges info.
0436  * @cnt:                      No. of entries to accommodate.
0437  *
0438  * Frees up the old buffer if memory reallocation fails.
0439  *
0440  * Returns buffer on success, NULL on error.
0441  */
0442 static u64 *check_realloc_usable_mem(struct umem_info *um_info, int cnt)
0443 {
0444     u32 new_size;
0445     u64 *tbuf;
0446 
0447     if ((um_info->idx + cnt) <= um_info->max_entries)
0448         return um_info->buf;
0449 
0450     new_size = um_info->size + MEM_RANGE_CHUNK_SZ;
0451     tbuf = krealloc(um_info->buf, new_size, GFP_KERNEL);
0452     if (tbuf) {
0453         um_info->buf = tbuf;
0454         um_info->size = new_size;
0455         um_info->max_entries = (um_info->size / sizeof(u64));
0456     }
0457 
0458     return tbuf;
0459 }
0460 
0461 /**
0462  * add_usable_mem - Add the usable memory ranges within the given memory range
0463  *                  to the buffer
0464  * @um_info:        Usable memory buffer and ranges info.
0465  * @base:           Base address of memory range to look for.
0466  * @end:            End address of memory range to look for.
0467  *
0468  * Returns 0 on success, negative errno on error.
0469  */
0470 static int add_usable_mem(struct umem_info *um_info, u64 base, u64 end)
0471 {
0472     u64 loc_base, loc_end;
0473     bool add;
0474     int i;
0475 
0476     for (i = 0; i < um_info->nr_ranges; i++) {
0477         add = false;
0478         loc_base = um_info->ranges[i].start;
0479         loc_end = um_info->ranges[i].end;
0480         if (loc_base >= base && loc_end <= end)
0481             add = true;
0482         else if (base < loc_end && end > loc_base) {
0483             if (loc_base < base)
0484                 loc_base = base;
0485             if (loc_end > end)
0486                 loc_end = end;
0487             add = true;
0488         }
0489 
0490         if (add) {
0491             if (!check_realloc_usable_mem(um_info, 2))
0492                 return -ENOMEM;
0493 
0494             um_info->buf[um_info->idx++] = cpu_to_be64(loc_base);
0495             um_info->buf[um_info->idx++] =
0496                     cpu_to_be64(loc_end - loc_base + 1);
0497         }
0498     }
0499 
0500     return 0;
0501 }
0502 
0503 /**
0504  * kdump_setup_usable_lmb - This is a callback function that gets called by
0505  *                          walk_drmem_lmbs for every LMB to set its
0506  *                          usable memory ranges.
0507  * @lmb:                    LMB info.
0508  * @usm:                    linux,drconf-usable-memory property value.
0509  * @data:                   Pointer to usable memory buffer and ranges info.
0510  *
0511  * Returns 0 on success, negative errno on error.
0512  */
0513 static int kdump_setup_usable_lmb(struct drmem_lmb *lmb, const __be32 **usm,
0514                   void *data)
0515 {
0516     struct umem_info *um_info;
0517     int tmp_idx, ret;
0518     u64 base, end;
0519 
0520     /*
0521      * kdump load isn't supported on kernels already booted with
0522      * linux,drconf-usable-memory property.
0523      */
0524     if (*usm) {
0525         pr_err("linux,drconf-usable-memory property already exists!");
0526         return -EINVAL;
0527     }
0528 
0529     um_info = data;
0530     tmp_idx = um_info->idx;
0531     if (!check_realloc_usable_mem(um_info, 1))
0532         return -ENOMEM;
0533 
0534     um_info->idx++;
0535     base = lmb->base_addr;
0536     end = base + drmem_lmb_size() - 1;
0537     ret = add_usable_mem(um_info, base, end);
0538     if (!ret) {
0539         /*
0540          * Update the no. of ranges added. Two entries (base & size)
0541          * for every range added.
0542          */
0543         um_info->buf[tmp_idx] =
0544                 cpu_to_be64((um_info->idx - tmp_idx - 1) / 2);
0545     }
0546 
0547     return ret;
0548 }
0549 
0550 #define NODE_PATH_LEN       256
0551 /**
0552  * add_usable_mem_property - Add usable memory property for the given
0553  *                           memory node.
0554  * @fdt:                     Flattened device tree for the kdump kernel.
0555  * @dn:                      Memory node.
0556  * @um_info:                 Usable memory buffer and ranges info.
0557  *
0558  * Returns 0 on success, negative errno on error.
0559  */
0560 static int add_usable_mem_property(void *fdt, struct device_node *dn,
0561                    struct umem_info *um_info)
0562 {
0563     int n_mem_addr_cells, n_mem_size_cells, node;
0564     char path[NODE_PATH_LEN];
0565     int i, len, ranges, ret;
0566     const __be32 *prop;
0567     u64 base, end;
0568 
0569     of_node_get(dn);
0570 
0571     if (snprintf(path, NODE_PATH_LEN, "%pOF", dn) > (NODE_PATH_LEN - 1)) {
0572         pr_err("Buffer (%d) too small for memory node: %pOF\n",
0573                NODE_PATH_LEN, dn);
0574         return -EOVERFLOW;
0575     }
0576     pr_debug("Memory node path: %s\n", path);
0577 
0578     /* Now that we know the path, find its offset in kdump kernel's fdt */
0579     node = fdt_path_offset(fdt, path);
0580     if (node < 0) {
0581         pr_err("Malformed device tree: error reading %s\n", path);
0582         ret = -EINVAL;
0583         goto out;
0584     }
0585 
0586     /* Get the address & size cells */
0587     n_mem_addr_cells = of_n_addr_cells(dn);
0588     n_mem_size_cells = of_n_size_cells(dn);
0589     pr_debug("address cells: %d, size cells: %d\n", n_mem_addr_cells,
0590          n_mem_size_cells);
0591 
0592     um_info->idx  = 0;
0593     if (!check_realloc_usable_mem(um_info, 2)) {
0594         ret = -ENOMEM;
0595         goto out;
0596     }
0597 
0598     prop = of_get_property(dn, "reg", &len);
0599     if (!prop || len <= 0) {
0600         ret = 0;
0601         goto out;
0602     }
0603 
0604     /*
0605      * "reg" property represents sequence of (addr,size) tuples
0606      * each representing a memory range.
0607      */
0608     ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells);
0609 
0610     for (i = 0; i < ranges; i++) {
0611         base = of_read_number(prop, n_mem_addr_cells);
0612         prop += n_mem_addr_cells;
0613         end = base + of_read_number(prop, n_mem_size_cells) - 1;
0614         prop += n_mem_size_cells;
0615 
0616         ret = add_usable_mem(um_info, base, end);
0617         if (ret)
0618             goto out;
0619     }
0620 
0621     /*
0622      * No kdump kernel usable memory found in this memory node.
0623      * Write (0,0) tuple in linux,usable-memory property for
0624      * this region to be ignored.
0625      */
0626     if (um_info->idx == 0) {
0627         um_info->buf[0] = 0;
0628         um_info->buf[1] = 0;
0629         um_info->idx = 2;
0630     }
0631 
0632     ret = fdt_setprop(fdt, node, "linux,usable-memory", um_info->buf,
0633               (um_info->idx * sizeof(u64)));
0634 
0635 out:
0636     of_node_put(dn);
0637     return ret;
0638 }
0639 
0640 
0641 /**
0642  * update_usable_mem_fdt - Updates kdump kernel's fdt with linux,usable-memory
0643  *                         and linux,drconf-usable-memory DT properties as
0644  *                         appropriate to restrict its memory usage.
0645  * @fdt:                   Flattened device tree for the kdump kernel.
0646  * @usable_mem:            Usable memory ranges for kdump kernel.
0647  *
0648  * Returns 0 on success, negative errno on error.
0649  */
0650 static int update_usable_mem_fdt(void *fdt, struct crash_mem *usable_mem)
0651 {
0652     struct umem_info um_info;
0653     struct device_node *dn;
0654     int node, ret = 0;
0655 
0656     if (!usable_mem) {
0657         pr_err("Usable memory ranges for kdump kernel not found\n");
0658         return -ENOENT;
0659     }
0660 
0661     node = fdt_path_offset(fdt, "/ibm,dynamic-reconfiguration-memory");
0662     if (node == -FDT_ERR_NOTFOUND)
0663         pr_debug("No dynamic reconfiguration memory found\n");
0664     else if (node < 0) {
0665         pr_err("Malformed device tree: error reading /ibm,dynamic-reconfiguration-memory.\n");
0666         return -EINVAL;
0667     }
0668 
0669     um_info.buf  = NULL;
0670     um_info.size = 0;
0671     um_info.max_entries = 0;
0672     um_info.idx  = 0;
0673     /* Memory ranges to look up */
0674     um_info.ranges = &(usable_mem->ranges[0]);
0675     um_info.nr_ranges = usable_mem->nr_ranges;
0676 
0677     dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
0678     if (dn) {
0679         ret = walk_drmem_lmbs(dn, &um_info, kdump_setup_usable_lmb);
0680         of_node_put(dn);
0681 
0682         if (ret) {
0683             pr_err("Could not setup linux,drconf-usable-memory property for kdump\n");
0684             goto out;
0685         }
0686 
0687         ret = fdt_setprop(fdt, node, "linux,drconf-usable-memory",
0688                   um_info.buf, (um_info.idx * sizeof(u64)));
0689         if (ret) {
0690             pr_err("Failed to update fdt with linux,drconf-usable-memory property");
0691             goto out;
0692         }
0693     }
0694 
0695     /*
0696      * Walk through each memory node and set linux,usable-memory property
0697      * for the corresponding node in kdump kernel's fdt.
0698      */
0699     for_each_node_by_type(dn, "memory") {
0700         ret = add_usable_mem_property(fdt, dn, &um_info);
0701         if (ret) {
0702             pr_err("Failed to set linux,usable-memory property for %s node",
0703                    dn->full_name);
0704             of_node_put(dn);
0705             goto out;
0706         }
0707     }
0708 
0709 out:
0710     kfree(um_info.buf);
0711     return ret;
0712 }
0713 
0714 /**
0715  * load_backup_segment - Locate a memory hole to place the backup region.
0716  * @image:               Kexec image.
0717  * @kbuf:                Buffer contents and memory parameters.
0718  *
0719  * Returns 0 on success, negative errno on error.
0720  */
0721 static int load_backup_segment(struct kimage *image, struct kexec_buf *kbuf)
0722 {
0723     void *buf;
0724     int ret;
0725 
0726     /*
0727      * Setup a source buffer for backup segment.
0728      *
0729      * A source buffer has no meaning for backup region as data will
0730      * be copied from backup source, after crash, in the purgatory.
0731      * But as load segment code doesn't recognize such segments,
0732      * setup a dummy source buffer to keep it happy for now.
0733      */
0734     buf = vzalloc(BACKUP_SRC_SIZE);
0735     if (!buf)
0736         return -ENOMEM;
0737 
0738     kbuf->buffer = buf;
0739     kbuf->mem = KEXEC_BUF_MEM_UNKNOWN;
0740     kbuf->bufsz = kbuf->memsz = BACKUP_SRC_SIZE;
0741     kbuf->top_down = false;
0742 
0743     ret = kexec_add_buffer(kbuf);
0744     if (ret) {
0745         vfree(buf);
0746         return ret;
0747     }
0748 
0749     image->arch.backup_buf = buf;
0750     image->arch.backup_start = kbuf->mem;
0751     return 0;
0752 }
0753 
0754 /**
0755  * update_backup_region_phdr - Update backup region's offset for the core to
0756  *                             export the region appropriately.
0757  * @image:                     Kexec image.
0758  * @ehdr:                      ELF core header.
0759  *
0760  * Assumes an exclusive program header is setup for the backup region
0761  * in the ELF headers
0762  *
0763  * Returns nothing.
0764  */
0765 static void update_backup_region_phdr(struct kimage *image, Elf64_Ehdr *ehdr)
0766 {
0767     Elf64_Phdr *phdr;
0768     unsigned int i;
0769 
0770     phdr = (Elf64_Phdr *)(ehdr + 1);
0771     for (i = 0; i < ehdr->e_phnum; i++) {
0772         if (phdr->p_paddr == BACKUP_SRC_START) {
0773             phdr->p_offset = image->arch.backup_start;
0774             pr_debug("Backup region offset updated to 0x%lx\n",
0775                  image->arch.backup_start);
0776             return;
0777         }
0778     }
0779 }
0780 
0781 /**
0782  * load_elfcorehdr_segment - Setup crash memory ranges and initialize elfcorehdr
0783  *                           segment needed to load kdump kernel.
0784  * @image:                   Kexec image.
0785  * @kbuf:                    Buffer contents and memory parameters.
0786  *
0787  * Returns 0 on success, negative errno on error.
0788  */
0789 static int load_elfcorehdr_segment(struct kimage *image, struct kexec_buf *kbuf)
0790 {
0791     struct crash_mem *cmem = NULL;
0792     unsigned long headers_sz;
0793     void *headers = NULL;
0794     int ret;
0795 
0796     ret = get_crash_memory_ranges(&cmem);
0797     if (ret)
0798         goto out;
0799 
0800     /* Setup elfcorehdr segment */
0801     ret = crash_prepare_elf64_headers(cmem, false, &headers, &headers_sz);
0802     if (ret) {
0803         pr_err("Failed to prepare elf headers for the core\n");
0804         goto out;
0805     }
0806 
0807     /* Fix the offset for backup region in the ELF header */
0808     update_backup_region_phdr(image, headers);
0809 
0810     kbuf->buffer = headers;
0811     kbuf->mem = KEXEC_BUF_MEM_UNKNOWN;
0812     kbuf->bufsz = kbuf->memsz = headers_sz;
0813     kbuf->top_down = false;
0814 
0815     ret = kexec_add_buffer(kbuf);
0816     if (ret) {
0817         vfree(headers);
0818         goto out;
0819     }
0820 
0821     image->elf_load_addr = kbuf->mem;
0822     image->elf_headers_sz = headers_sz;
0823     image->elf_headers = headers;
0824 out:
0825     kfree(cmem);
0826     return ret;
0827 }
0828 
0829 /**
0830  * load_crashdump_segments_ppc64 - Initialize the additional segements needed
0831  *                                 to load kdump kernel.
0832  * @image:                         Kexec image.
0833  * @kbuf:                          Buffer contents and memory parameters.
0834  *
0835  * Returns 0 on success, negative errno on error.
0836  */
0837 int load_crashdump_segments_ppc64(struct kimage *image,
0838                   struct kexec_buf *kbuf)
0839 {
0840     int ret;
0841 
0842     /* Load backup segment - first 64K bytes of the crashing kernel */
0843     ret = load_backup_segment(image, kbuf);
0844     if (ret) {
0845         pr_err("Failed to load backup segment\n");
0846         return ret;
0847     }
0848     pr_debug("Loaded the backup region at 0x%lx\n", kbuf->mem);
0849 
0850     /* Load elfcorehdr segment - to export crashing kernel's vmcore */
0851     ret = load_elfcorehdr_segment(image, kbuf);
0852     if (ret) {
0853         pr_err("Failed to load elfcorehdr segment\n");
0854         return ret;
0855     }
0856     pr_debug("Loaded elf core header at 0x%lx, bufsz=0x%lx memsz=0x%lx\n",
0857          image->elf_load_addr, kbuf->bufsz, kbuf->memsz);
0858 
0859     return 0;
0860 }
0861 
0862 /**
0863  * setup_purgatory_ppc64 - initialize PPC64 specific purgatory's global
0864  *                         variables and call setup_purgatory() to initialize
0865  *                         common global variable.
0866  * @image:                 kexec image.
0867  * @slave_code:            Slave code for the purgatory.
0868  * @fdt:                   Flattened device tree for the next kernel.
0869  * @kernel_load_addr:      Address where the kernel is loaded.
0870  * @fdt_load_addr:         Address where the flattened device tree is loaded.
0871  *
0872  * Returns 0 on success, negative errno on error.
0873  */
0874 int setup_purgatory_ppc64(struct kimage *image, const void *slave_code,
0875               const void *fdt, unsigned long kernel_load_addr,
0876               unsigned long fdt_load_addr)
0877 {
0878     struct device_node *dn = NULL;
0879     int ret;
0880 
0881     ret = setup_purgatory(image, slave_code, fdt, kernel_load_addr,
0882                   fdt_load_addr);
0883     if (ret)
0884         goto out;
0885 
0886     if (image->type == KEXEC_TYPE_CRASH) {
0887         u32 my_run_at_load = 1;
0888 
0889         /*
0890          * Tell relocatable kernel to run at load address
0891          * via the word meant for that at 0x5c.
0892          */
0893         ret = kexec_purgatory_get_set_symbol(image, "run_at_load",
0894                              &my_run_at_load,
0895                              sizeof(my_run_at_load),
0896                              false);
0897         if (ret)
0898             goto out;
0899     }
0900 
0901     /* Tell purgatory where to look for backup region */
0902     ret = kexec_purgatory_get_set_symbol(image, "backup_start",
0903                          &image->arch.backup_start,
0904                          sizeof(image->arch.backup_start),
0905                          false);
0906     if (ret)
0907         goto out;
0908 
0909     /* Setup OPAL base & entry values */
0910     dn = of_find_node_by_path("/ibm,opal");
0911     if (dn) {
0912         u64 val;
0913 
0914         of_property_read_u64(dn, "opal-base-address", &val);
0915         ret = kexec_purgatory_get_set_symbol(image, "opal_base", &val,
0916                              sizeof(val), false);
0917         if (ret)
0918             goto out;
0919 
0920         of_property_read_u64(dn, "opal-entry-address", &val);
0921         ret = kexec_purgatory_get_set_symbol(image, "opal_entry", &val,
0922                              sizeof(val), false);
0923     }
0924 out:
0925     if (ret)
0926         pr_err("Failed to setup purgatory symbols");
0927     of_node_put(dn);
0928     return ret;
0929 }
0930 
0931 /**
0932  * kexec_extra_fdt_size_ppc64 - Return the estimated additional size needed to
0933  *                              setup FDT for kexec/kdump kernel.
0934  * @image:                      kexec image being loaded.
0935  *
0936  * Returns the estimated extra size needed for kexec/kdump kernel FDT.
0937  */
0938 unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image)
0939 {
0940     u64 usm_entries;
0941 
0942     if (image->type != KEXEC_TYPE_CRASH)
0943         return 0;
0944 
0945     /*
0946      * For kdump kernel, account for linux,usable-memory and
0947      * linux,drconf-usable-memory properties. Get an approximate on the
0948      * number of usable memory entries and use for FDT size estimation.
0949      */
0950     usm_entries = ((memblock_end_of_DRAM() / drmem_lmb_size()) +
0951                (2 * (resource_size(&crashk_res) / drmem_lmb_size())));
0952     return (unsigned int)(usm_entries * sizeof(u64));
0953 }
0954 
0955 /**
0956  * add_node_props - Reads node properties from device node structure and add
0957  *                  them to fdt.
0958  * @fdt:            Flattened device tree of the kernel
0959  * @node_offset:    offset of the node to add a property at
0960  * @dn:             device node pointer
0961  *
0962  * Returns 0 on success, negative errno on error.
0963  */
0964 static int add_node_props(void *fdt, int node_offset, const struct device_node *dn)
0965 {
0966     int ret = 0;
0967     struct property *pp;
0968 
0969     if (!dn)
0970         return -EINVAL;
0971 
0972     for_each_property_of_node(dn, pp) {
0973         ret = fdt_setprop(fdt, node_offset, pp->name, pp->value, pp->length);
0974         if (ret < 0) {
0975             pr_err("Unable to add %s property: %s\n", pp->name, fdt_strerror(ret));
0976             return ret;
0977         }
0978     }
0979     return ret;
0980 }
0981 
0982 /**
0983  * update_cpus_node - Update cpus node of flattened device tree using of_root
0984  *                    device node.
0985  * @fdt:              Flattened device tree of the kernel.
0986  *
0987  * Returns 0 on success, negative errno on error.
0988  */
0989 static int update_cpus_node(void *fdt)
0990 {
0991     struct device_node *cpus_node, *dn;
0992     int cpus_offset, cpus_subnode_offset, ret = 0;
0993 
0994     cpus_offset = fdt_path_offset(fdt, "/cpus");
0995     if (cpus_offset < 0 && cpus_offset != -FDT_ERR_NOTFOUND) {
0996         pr_err("Malformed device tree: error reading /cpus node: %s\n",
0997                fdt_strerror(cpus_offset));
0998         return cpus_offset;
0999     }
1000 
1001     if (cpus_offset > 0) {
1002         ret = fdt_del_node(fdt, cpus_offset);
1003         if (ret < 0) {
1004             pr_err("Error deleting /cpus node: %s\n", fdt_strerror(ret));
1005             return -EINVAL;
1006         }
1007     }
1008 
1009     /* Add cpus node to fdt */
1010     cpus_offset = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"), "cpus");
1011     if (cpus_offset < 0) {
1012         pr_err("Error creating /cpus node: %s\n", fdt_strerror(cpus_offset));
1013         return -EINVAL;
1014     }
1015 
1016     /* Add cpus node properties */
1017     cpus_node = of_find_node_by_path("/cpus");
1018     ret = add_node_props(fdt, cpus_offset, cpus_node);
1019     of_node_put(cpus_node);
1020     if (ret < 0)
1021         return ret;
1022 
1023     /* Loop through all subnodes of cpus and add them to fdt */
1024     for_each_node_by_type(dn, "cpu") {
1025         cpus_subnode_offset = fdt_add_subnode(fdt, cpus_offset, dn->full_name);
1026         if (cpus_subnode_offset < 0) {
1027             pr_err("Unable to add %s subnode: %s\n", dn->full_name,
1028                    fdt_strerror(cpus_subnode_offset));
1029             ret = cpus_subnode_offset;
1030             goto out;
1031         }
1032 
1033         ret = add_node_props(fdt, cpus_subnode_offset, dn);
1034         if (ret < 0)
1035             goto out;
1036     }
1037 out:
1038     of_node_put(dn);
1039     return ret;
1040 }
1041 
1042 static int copy_property(void *fdt, int node_offset, const struct device_node *dn,
1043              const char *propname)
1044 {
1045     const void *prop, *fdtprop;
1046     int len = 0, fdtlen = 0;
1047 
1048     prop = of_get_property(dn, propname, &len);
1049     fdtprop = fdt_getprop(fdt, node_offset, propname, &fdtlen);
1050 
1051     if (fdtprop && !prop)
1052         return fdt_delprop(fdt, node_offset, propname);
1053     else if (prop)
1054         return fdt_setprop(fdt, node_offset, propname, prop, len);
1055     else
1056         return -FDT_ERR_NOTFOUND;
1057 }
1058 
1059 static int update_pci_dma_nodes(void *fdt, const char *dmapropname)
1060 {
1061     struct device_node *dn;
1062     int pci_offset, root_offset, ret = 0;
1063 
1064     if (!firmware_has_feature(FW_FEATURE_LPAR))
1065         return 0;
1066 
1067     root_offset = fdt_path_offset(fdt, "/");
1068     for_each_node_with_property(dn, dmapropname) {
1069         pci_offset = fdt_subnode_offset(fdt, root_offset, of_node_full_name(dn));
1070         if (pci_offset < 0)
1071             continue;
1072 
1073         ret = copy_property(fdt, pci_offset, dn, "ibm,dma-window");
1074         if (ret < 0)
1075             break;
1076         ret = copy_property(fdt, pci_offset, dn, dmapropname);
1077         if (ret < 0)
1078             break;
1079     }
1080 
1081     return ret;
1082 }
1083 
1084 /**
1085  * setup_new_fdt_ppc64 - Update the flattend device-tree of the kernel
1086  *                       being loaded.
1087  * @image:               kexec image being loaded.
1088  * @fdt:                 Flattened device tree for the next kernel.
1089  * @initrd_load_addr:    Address where the next initrd will be loaded.
1090  * @initrd_len:          Size of the next initrd, or 0 if there will be none.
1091  * @cmdline:             Command line for the next kernel, or NULL if there will
1092  *                       be none.
1093  *
1094  * Returns 0 on success, negative errno on error.
1095  */
1096 int setup_new_fdt_ppc64(const struct kimage *image, void *fdt,
1097             unsigned long initrd_load_addr,
1098             unsigned long initrd_len, const char *cmdline)
1099 {
1100     struct crash_mem *umem = NULL, *rmem = NULL;
1101     int i, nr_ranges, ret;
1102 
1103     /*
1104      * Restrict memory usage for kdump kernel by setting up
1105      * usable memory ranges and memory reserve map.
1106      */
1107     if (image->type == KEXEC_TYPE_CRASH) {
1108         ret = get_usable_memory_ranges(&umem);
1109         if (ret)
1110             goto out;
1111 
1112         ret = update_usable_mem_fdt(fdt, umem);
1113         if (ret) {
1114             pr_err("Error setting up usable-memory property for kdump kernel\n");
1115             goto out;
1116         }
1117 
1118         /*
1119          * Ensure we don't touch crashed kernel's memory except the
1120          * first 64K of RAM, which will be backed up.
1121          */
1122         ret = fdt_add_mem_rsv(fdt, BACKUP_SRC_END + 1,
1123                       crashk_res.start - BACKUP_SRC_SIZE);
1124         if (ret) {
1125             pr_err("Error reserving crash memory: %s\n",
1126                    fdt_strerror(ret));
1127             goto out;
1128         }
1129 
1130         /* Ensure backup region is not used by kdump/capture kernel */
1131         ret = fdt_add_mem_rsv(fdt, image->arch.backup_start,
1132                       BACKUP_SRC_SIZE);
1133         if (ret) {
1134             pr_err("Error reserving memory for backup: %s\n",
1135                    fdt_strerror(ret));
1136             goto out;
1137         }
1138     }
1139 
1140     /* Update cpus nodes information to account hotplug CPUs. */
1141     ret =  update_cpus_node(fdt);
1142     if (ret < 0)
1143         goto out;
1144 
1145 #define DIRECT64_PROPNAME "linux,direct64-ddr-window-info"
1146 #define DMA64_PROPNAME "linux,dma64-ddr-window-info"
1147     ret = update_pci_dma_nodes(fdt, DIRECT64_PROPNAME);
1148     if (ret < 0)
1149         goto out;
1150 
1151     ret = update_pci_dma_nodes(fdt, DMA64_PROPNAME);
1152     if (ret < 0)
1153         goto out;
1154 #undef DMA64_PROPNAME
1155 #undef DIRECT64_PROPNAME
1156 
1157     /* Update memory reserve map */
1158     ret = get_reserved_memory_ranges(&rmem);
1159     if (ret)
1160         goto out;
1161 
1162     nr_ranges = rmem ? rmem->nr_ranges : 0;
1163     for (i = 0; i < nr_ranges; i++) {
1164         u64 base, size;
1165 
1166         base = rmem->ranges[i].start;
1167         size = rmem->ranges[i].end - base + 1;
1168         ret = fdt_add_mem_rsv(fdt, base, size);
1169         if (ret) {
1170             pr_err("Error updating memory reserve map: %s\n",
1171                    fdt_strerror(ret));
1172             goto out;
1173         }
1174     }
1175 
1176 out:
1177     kfree(rmem);
1178     kfree(umem);
1179     return ret;
1180 }
1181 
1182 /**
1183  * arch_kexec_locate_mem_hole - Skip special memory regions like rtas, opal,
1184  *                              tce-table, reserved-ranges & such (exclude
1185  *                              memory ranges) as they can't be used for kexec
1186  *                              segment buffer. Sets kbuf->mem when a suitable
1187  *                              memory hole is found.
1188  * @kbuf:                       Buffer contents and memory parameters.
1189  *
1190  * Assumes minimum of PAGE_SIZE alignment for kbuf->memsz & kbuf->buf_align.
1191  *
1192  * Returns 0 on success, negative errno on error.
1193  */
1194 int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf)
1195 {
1196     struct crash_mem **emem;
1197     u64 buf_min, buf_max;
1198     int ret;
1199 
1200     /* Look up the exclude ranges list while locating the memory hole */
1201     emem = &(kbuf->image->arch.exclude_ranges);
1202     if (!(*emem) || ((*emem)->nr_ranges == 0)) {
1203         pr_warn("No exclude range list. Using the default locate mem hole method\n");
1204         return kexec_locate_mem_hole(kbuf);
1205     }
1206 
1207     buf_min = kbuf->buf_min;
1208     buf_max = kbuf->buf_max;
1209     /* Segments for kdump kernel should be within crashkernel region */
1210     if (kbuf->image->type == KEXEC_TYPE_CRASH) {
1211         buf_min = (buf_min < crashk_res.start ?
1212                crashk_res.start : buf_min);
1213         buf_max = (buf_max > crashk_res.end ?
1214                crashk_res.end : buf_max);
1215     }
1216 
1217     if (buf_min > buf_max) {
1218         pr_err("Invalid buffer min and/or max values\n");
1219         return -EINVAL;
1220     }
1221 
1222     if (kbuf->top_down)
1223         ret = locate_mem_hole_top_down_ppc64(kbuf, buf_min, buf_max,
1224                              *emem);
1225     else
1226         ret = locate_mem_hole_bottom_up_ppc64(kbuf, buf_min, buf_max,
1227                               *emem);
1228 
1229     /* Add the buffer allocated to the exclude list for the next lookup */
1230     if (!ret) {
1231         add_mem_range(emem, kbuf->mem, kbuf->memsz);
1232         sort_memory_ranges(*emem, true);
1233     } else {
1234         pr_err("Failed to locate memory buffer of size %lu\n",
1235                kbuf->memsz);
1236     }
1237     return ret;
1238 }
1239 
1240 /**
1241  * arch_kexec_kernel_image_probe - Does additional handling needed to setup
1242  *                                 kexec segments.
1243  * @image:                         kexec image being loaded.
1244  * @buf:                           Buffer pointing to elf data.
1245  * @buf_len:                       Length of the buffer.
1246  *
1247  * Returns 0 on success, negative errno on error.
1248  */
1249 int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
1250                   unsigned long buf_len)
1251 {
1252     int ret;
1253 
1254     /* Get exclude memory ranges needed for setting up kexec segments */
1255     ret = get_exclude_memory_ranges(&(image->arch.exclude_ranges));
1256     if (ret) {
1257         pr_err("Failed to setup exclude memory ranges for buffer lookup\n");
1258         return ret;
1259     }
1260 
1261     return kexec_image_probe_default(image, buf, buf_len);
1262 }
1263 
1264 /**
1265  * arch_kimage_file_post_load_cleanup - Frees up all the allocations done
1266  *                                      while loading the image.
1267  * @image:                              kexec image being loaded.
1268  *
1269  * Returns 0 on success, negative errno on error.
1270  */
1271 int arch_kimage_file_post_load_cleanup(struct kimage *image)
1272 {
1273     kfree(image->arch.exclude_ranges);
1274     image->arch.exclude_ranges = NULL;
1275 
1276     vfree(image->arch.backup_buf);
1277     image->arch.backup_buf = NULL;
1278 
1279     vfree(image->elf_headers);
1280     image->elf_headers = NULL;
1281     image->elf_headers_sz = 0;
1282 
1283     kvfree(image->arch.fdt);
1284     image->arch.fdt = NULL;
1285 
1286     return kexec_image_post_load_cleanup_default(image);
1287 }