Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  * Firmware Assisted dump: A robust mechanism to get reliable kernel crash
0004  * dump with assistance from firmware. This approach does not use kexec,
0005  * instead firmware assists in booting the kdump kernel while preserving
0006  * memory contents. The most of the code implementation has been adapted
0007  * from phyp assisted dump implementation written by Linas Vepstas and
0008  * Manish Ahuja
0009  *
0010  * Copyright 2011 IBM Corporation
0011  * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
0012  */
0013 
0014 #undef DEBUG
0015 #define pr_fmt(fmt) "fadump: " fmt
0016 
0017 #include <linux/string.h>
0018 #include <linux/memblock.h>
0019 #include <linux/delay.h>
0020 #include <linux/seq_file.h>
0021 #include <linux/crash_dump.h>
0022 #include <linux/kobject.h>
0023 #include <linux/sysfs.h>
0024 #include <linux/slab.h>
0025 #include <linux/cma.h>
0026 #include <linux/hugetlb.h>
0027 #include <linux/debugfs.h>
0028 #include <linux/of.h>
0029 #include <linux/of_fdt.h>
0030 
0031 #include <asm/page.h>
0032 #include <asm/fadump.h>
0033 #include <asm/fadump-internal.h>
0034 #include <asm/setup.h>
0035 #include <asm/interrupt.h>
0036 
0037 /*
0038  * The CPU who acquired the lock to trigger the fadump crash should
0039  * wait for other CPUs to enter.
0040  *
0041  * The timeout is in milliseconds.
0042  */
0043 #define CRASH_TIMEOUT       500
0044 
0045 static struct fw_dump fw_dump;
0046 
0047 static void __init fadump_reserve_crash_area(u64 base);
0048 
0049 #ifndef CONFIG_PRESERVE_FA_DUMP
0050 
0051 static struct kobject *fadump_kobj;
0052 
0053 static atomic_t cpus_in_fadump;
0054 static DEFINE_MUTEX(fadump_mutex);
0055 
0056 static struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0, false };
0057 
0058 #define RESERVED_RNGS_SZ    16384 /* 16K - 128 entries */
0059 #define RESERVED_RNGS_CNT   (RESERVED_RNGS_SZ / \
0060                  sizeof(struct fadump_memory_range))
0061 static struct fadump_memory_range rngs[RESERVED_RNGS_CNT];
0062 static struct fadump_mrange_info
0063 reserved_mrange_info = { "reserved", rngs, RESERVED_RNGS_SZ, 0, RESERVED_RNGS_CNT, true };
0064 
0065 static void __init early_init_dt_scan_reserved_ranges(unsigned long node);
0066 
0067 #ifdef CONFIG_CMA
0068 static struct cma *fadump_cma;
0069 
0070 /*
0071  * fadump_cma_init() - Initialize CMA area from a fadump reserved memory
0072  *
0073  * This function initializes CMA area from fadump reserved memory.
0074  * The total size of fadump reserved memory covers for boot memory size
0075  * + cpu data size + hpte size and metadata.
0076  * Initialize only the area equivalent to boot memory size for CMA use.
0077  * The remaining portion of fadump reserved memory will be not given
0078  * to CMA and pages for those will stay reserved. boot memory size is
0079  * aligned per CMA requirement to satisy cma_init_reserved_mem() call.
0080  * But for some reason even if it fails we still have the memory reservation
0081  * with us and we can still continue doing fadump.
0082  */
0083 static int __init fadump_cma_init(void)
0084 {
0085     unsigned long long base, size;
0086     int rc;
0087 
0088     if (!fw_dump.fadump_enabled)
0089         return 0;
0090 
0091     /*
0092      * Do not use CMA if user has provided fadump=nocma kernel parameter.
0093      * Return 1 to continue with fadump old behaviour.
0094      */
0095     if (fw_dump.nocma)
0096         return 1;
0097 
0098     base = fw_dump.reserve_dump_area_start;
0099     size = fw_dump.boot_memory_size;
0100 
0101     if (!size)
0102         return 0;
0103 
0104     rc = cma_init_reserved_mem(base, size, 0, "fadump_cma", &fadump_cma);
0105     if (rc) {
0106         pr_err("Failed to init cma area for firmware-assisted dump,%d\n", rc);
0107         /*
0108          * Though the CMA init has failed we still have memory
0109          * reservation with us. The reserved memory will be
0110          * blocked from production system usage.  Hence return 1,
0111          * so that we can continue with fadump.
0112          */
0113         return 1;
0114     }
0115 
0116     /*
0117      *  If CMA activation fails, keep the pages reserved, instead of
0118      *  exposing them to buddy allocator. Same as 'fadump=nocma' case.
0119      */
0120     cma_reserve_pages_on_error(fadump_cma);
0121 
0122     /*
0123      * So we now have successfully initialized cma area for fadump.
0124      */
0125     pr_info("Initialized 0x%lx bytes cma area at %ldMB from 0x%lx "
0126         "bytes of memory reserved for firmware-assisted dump\n",
0127         cma_get_size(fadump_cma),
0128         (unsigned long)cma_get_base(fadump_cma) >> 20,
0129         fw_dump.reserve_dump_area_size);
0130     return 1;
0131 }
0132 #else
0133 static int __init fadump_cma_init(void) { return 1; }
0134 #endif /* CONFIG_CMA */
0135 
0136 /* Scan the Firmware Assisted dump configuration details. */
0137 int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname,
0138                       int depth, void *data)
0139 {
0140     if (depth == 0) {
0141         early_init_dt_scan_reserved_ranges(node);
0142         return 0;
0143     }
0144 
0145     if (depth != 1)
0146         return 0;
0147 
0148     if (strcmp(uname, "rtas") == 0) {
0149         rtas_fadump_dt_scan(&fw_dump, node);
0150         return 1;
0151     }
0152 
0153     if (strcmp(uname, "ibm,opal") == 0) {
0154         opal_fadump_dt_scan(&fw_dump, node);
0155         return 1;
0156     }
0157 
0158     return 0;
0159 }
0160 
0161 /*
0162  * If fadump is registered, check if the memory provided
0163  * falls within boot memory area and reserved memory area.
0164  */
0165 int is_fadump_memory_area(u64 addr, unsigned long size)
0166 {
0167     u64 d_start, d_end;
0168 
0169     if (!fw_dump.dump_registered)
0170         return 0;
0171 
0172     if (!size)
0173         return 0;
0174 
0175     d_start = fw_dump.reserve_dump_area_start;
0176     d_end = d_start + fw_dump.reserve_dump_area_size;
0177     if (((addr + size) > d_start) && (addr <= d_end))
0178         return 1;
0179 
0180     return (addr <= fw_dump.boot_mem_top);
0181 }
0182 
0183 int should_fadump_crash(void)
0184 {
0185     if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr)
0186         return 0;
0187     return 1;
0188 }
0189 
0190 int is_fadump_active(void)
0191 {
0192     return fw_dump.dump_active;
0193 }
0194 
0195 /*
0196  * Returns true, if there are no holes in memory area between d_start to d_end,
0197  * false otherwise.
0198  */
0199 static bool is_fadump_mem_area_contiguous(u64 d_start, u64 d_end)
0200 {
0201     phys_addr_t reg_start, reg_end;
0202     bool ret = false;
0203     u64 i, start, end;
0204 
0205     for_each_mem_range(i, &reg_start, &reg_end) {
0206         start = max_t(u64, d_start, reg_start);
0207         end = min_t(u64, d_end, reg_end);
0208         if (d_start < end) {
0209             /* Memory hole from d_start to start */
0210             if (start > d_start)
0211                 break;
0212 
0213             if (end == d_end) {
0214                 ret = true;
0215                 break;
0216             }
0217 
0218             d_start = end + 1;
0219         }
0220     }
0221 
0222     return ret;
0223 }
0224 
0225 /*
0226  * Returns true, if there are no holes in boot memory area,
0227  * false otherwise.
0228  */
0229 bool is_fadump_boot_mem_contiguous(void)
0230 {
0231     unsigned long d_start, d_end;
0232     bool ret = false;
0233     int i;
0234 
0235     for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
0236         d_start = fw_dump.boot_mem_addr[i];
0237         d_end   = d_start + fw_dump.boot_mem_sz[i];
0238 
0239         ret = is_fadump_mem_area_contiguous(d_start, d_end);
0240         if (!ret)
0241             break;
0242     }
0243 
0244     return ret;
0245 }
0246 
0247 /*
0248  * Returns true, if there are no holes in reserved memory area,
0249  * false otherwise.
0250  */
0251 bool is_fadump_reserved_mem_contiguous(void)
0252 {
0253     u64 d_start, d_end;
0254 
0255     d_start = fw_dump.reserve_dump_area_start;
0256     d_end   = d_start + fw_dump.reserve_dump_area_size;
0257     return is_fadump_mem_area_contiguous(d_start, d_end);
0258 }
0259 
0260 /* Print firmware assisted dump configurations for debugging purpose. */
0261 static void __init fadump_show_config(void)
0262 {
0263     int i;
0264 
0265     pr_debug("Support for firmware-assisted dump (fadump): %s\n",
0266             (fw_dump.fadump_supported ? "present" : "no support"));
0267 
0268     if (!fw_dump.fadump_supported)
0269         return;
0270 
0271     pr_debug("Fadump enabled    : %s\n",
0272                 (fw_dump.fadump_enabled ? "yes" : "no"));
0273     pr_debug("Dump Active       : %s\n",
0274                 (fw_dump.dump_active ? "yes" : "no"));
0275     pr_debug("Dump section sizes:\n");
0276     pr_debug("    CPU state data size: %lx\n", fw_dump.cpu_state_data_size);
0277     pr_debug("    HPTE region size   : %lx\n", fw_dump.hpte_region_size);
0278     pr_debug("    Boot memory size   : %lx\n", fw_dump.boot_memory_size);
0279     pr_debug("    Boot memory top    : %llx\n", fw_dump.boot_mem_top);
0280     pr_debug("Boot memory regions cnt: %llx\n", fw_dump.boot_mem_regs_cnt);
0281     for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
0282         pr_debug("[%03d] base = %llx, size = %llx\n", i,
0283              fw_dump.boot_mem_addr[i], fw_dump.boot_mem_sz[i]);
0284     }
0285 }
0286 
0287 /**
0288  * fadump_calculate_reserve_size(): reserve variable boot area 5% of System RAM
0289  *
0290  * Function to find the largest memory size we need to reserve during early
0291  * boot process. This will be the size of the memory that is required for a
0292  * kernel to boot successfully.
0293  *
0294  * This function has been taken from phyp-assisted dump feature implementation.
0295  *
0296  * returns larger of 256MB or 5% rounded down to multiples of 256MB.
0297  *
0298  * TODO: Come up with better approach to find out more accurate memory size
0299  * that is required for a kernel to boot successfully.
0300  *
0301  */
0302 static __init u64 fadump_calculate_reserve_size(void)
0303 {
0304     u64 base, size, bootmem_min;
0305     int ret;
0306 
0307     if (fw_dump.reserve_bootvar)
0308         pr_warn("'fadump_reserve_mem=' parameter is deprecated in favor of 'crashkernel=' parameter.\n");
0309 
0310     /*
0311      * Check if the size is specified through crashkernel= cmdline
0312      * option. If yes, then use that but ignore base as fadump reserves
0313      * memory at a predefined offset.
0314      */
0315     ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
0316                 &size, &base);
0317     if (ret == 0 && size > 0) {
0318         unsigned long max_size;
0319 
0320         if (fw_dump.reserve_bootvar)
0321             pr_info("Using 'crashkernel=' parameter for memory reservation.\n");
0322 
0323         fw_dump.reserve_bootvar = (unsigned long)size;
0324 
0325         /*
0326          * Adjust if the boot memory size specified is above
0327          * the upper limit.
0328          */
0329         max_size = memblock_phys_mem_size() / MAX_BOOT_MEM_RATIO;
0330         if (fw_dump.reserve_bootvar > max_size) {
0331             fw_dump.reserve_bootvar = max_size;
0332             pr_info("Adjusted boot memory size to %luMB\n",
0333                 (fw_dump.reserve_bootvar >> 20));
0334         }
0335 
0336         return fw_dump.reserve_bootvar;
0337     } else if (fw_dump.reserve_bootvar) {
0338         /*
0339          * 'fadump_reserve_mem=' is being used to reserve memory
0340          * for firmware-assisted dump.
0341          */
0342         return fw_dump.reserve_bootvar;
0343     }
0344 
0345     /* divide by 20 to get 5% of value */
0346     size = memblock_phys_mem_size() / 20;
0347 
0348     /* round it down in multiples of 256 */
0349     size = size & ~0x0FFFFFFFUL;
0350 
0351     /* Truncate to memory_limit. We don't want to over reserve the memory.*/
0352     if (memory_limit && size > memory_limit)
0353         size = memory_limit;
0354 
0355     bootmem_min = fw_dump.ops->fadump_get_bootmem_min();
0356     return (size > bootmem_min ? size : bootmem_min);
0357 }
0358 
0359 /*
0360  * Calculate the total memory size required to be reserved for
0361  * firmware-assisted dump registration.
0362  */
0363 static unsigned long __init get_fadump_area_size(void)
0364 {
0365     unsigned long size = 0;
0366 
0367     size += fw_dump.cpu_state_data_size;
0368     size += fw_dump.hpte_region_size;
0369     /*
0370      * Account for pagesize alignment of boot memory area destination address.
0371      * This faciliates in mmap reading of first kernel's memory.
0372      */
0373     size = PAGE_ALIGN(size);
0374     size += fw_dump.boot_memory_size;
0375     size += sizeof(struct fadump_crash_info_header);
0376     size += sizeof(struct elfhdr); /* ELF core header.*/
0377     size += sizeof(struct elf_phdr); /* place holder for cpu notes */
0378     /* Program headers for crash memory regions. */
0379     size += sizeof(struct elf_phdr) * (memblock_num_regions(memory) + 2);
0380 
0381     size = PAGE_ALIGN(size);
0382 
0383     /* This is to hold kernel metadata on platforms that support it */
0384     size += (fw_dump.ops->fadump_get_metadata_size ?
0385          fw_dump.ops->fadump_get_metadata_size() : 0);
0386     return size;
0387 }
0388 
0389 static int __init add_boot_mem_region(unsigned long rstart,
0390                       unsigned long rsize)
0391 {
0392     int i = fw_dump.boot_mem_regs_cnt++;
0393 
0394     if (fw_dump.boot_mem_regs_cnt > FADUMP_MAX_MEM_REGS) {
0395         fw_dump.boot_mem_regs_cnt = FADUMP_MAX_MEM_REGS;
0396         return 0;
0397     }
0398 
0399     pr_debug("Added boot memory range[%d] [%#016lx-%#016lx)\n",
0400          i, rstart, (rstart + rsize));
0401     fw_dump.boot_mem_addr[i] = rstart;
0402     fw_dump.boot_mem_sz[i] = rsize;
0403     return 1;
0404 }
0405 
0406 /*
0407  * Firmware usually has a hard limit on the data it can copy per region.
0408  * Honour that by splitting a memory range into multiple regions.
0409  */
0410 static int __init add_boot_mem_regions(unsigned long mstart,
0411                        unsigned long msize)
0412 {
0413     unsigned long rstart, rsize, max_size;
0414     int ret = 1;
0415 
0416     rstart = mstart;
0417     max_size = fw_dump.max_copy_size ? fw_dump.max_copy_size : msize;
0418     while (msize) {
0419         if (msize > max_size)
0420             rsize = max_size;
0421         else
0422             rsize = msize;
0423 
0424         ret = add_boot_mem_region(rstart, rsize);
0425         if (!ret)
0426             break;
0427 
0428         msize -= rsize;
0429         rstart += rsize;
0430     }
0431 
0432     return ret;
0433 }
0434 
0435 static int __init fadump_get_boot_mem_regions(void)
0436 {
0437     unsigned long size, cur_size, hole_size, last_end;
0438     unsigned long mem_size = fw_dump.boot_memory_size;
0439     phys_addr_t reg_start, reg_end;
0440     int ret = 1;
0441     u64 i;
0442 
0443     fw_dump.boot_mem_regs_cnt = 0;
0444 
0445     last_end = 0;
0446     hole_size = 0;
0447     cur_size = 0;
0448     for_each_mem_range(i, &reg_start, &reg_end) {
0449         size = reg_end - reg_start;
0450         hole_size += (reg_start - last_end);
0451 
0452         if ((cur_size + size) >= mem_size) {
0453             size = (mem_size - cur_size);
0454             ret = add_boot_mem_regions(reg_start, size);
0455             break;
0456         }
0457 
0458         mem_size -= size;
0459         cur_size += size;
0460         ret = add_boot_mem_regions(reg_start, size);
0461         if (!ret)
0462             break;
0463 
0464         last_end = reg_end;
0465     }
0466     fw_dump.boot_mem_top = PAGE_ALIGN(fw_dump.boot_memory_size + hole_size);
0467 
0468     return ret;
0469 }
0470 
0471 /*
0472  * Returns true, if the given range overlaps with reserved memory ranges
0473  * starting at idx. Also, updates idx to index of overlapping memory range
0474  * with the given memory range.
0475  * False, otherwise.
0476  */
0477 static bool __init overlaps_reserved_ranges(u64 base, u64 end, int *idx)
0478 {
0479     bool ret = false;
0480     int i;
0481 
0482     for (i = *idx; i < reserved_mrange_info.mem_range_cnt; i++) {
0483         u64 rbase = reserved_mrange_info.mem_ranges[i].base;
0484         u64 rend = rbase + reserved_mrange_info.mem_ranges[i].size;
0485 
0486         if (end <= rbase)
0487             break;
0488 
0489         if ((end > rbase) &&  (base < rend)) {
0490             *idx = i;
0491             ret = true;
0492             break;
0493         }
0494     }
0495 
0496     return ret;
0497 }
0498 
0499 /*
0500  * Locate a suitable memory area to reserve memory for FADump. While at it,
0501  * lookup reserved-ranges & avoid overlap with them, as they are used by F/W.
0502  */
0503 static u64 __init fadump_locate_reserve_mem(u64 base, u64 size)
0504 {
0505     struct fadump_memory_range *mrngs;
0506     phys_addr_t mstart, mend;
0507     int idx = 0;
0508     u64 i, ret = 0;
0509 
0510     mrngs = reserved_mrange_info.mem_ranges;
0511     for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE,
0512                 &mstart, &mend, NULL) {
0513         pr_debug("%llu) mstart: %llx, mend: %llx, base: %llx\n",
0514              i, mstart, mend, base);
0515 
0516         if (mstart > base)
0517             base = PAGE_ALIGN(mstart);
0518 
0519         while ((mend > base) && ((mend - base) >= size)) {
0520             if (!overlaps_reserved_ranges(base, base+size, &idx)) {
0521                 ret = base;
0522                 goto out;
0523             }
0524 
0525             base = mrngs[idx].base + mrngs[idx].size;
0526             base = PAGE_ALIGN(base);
0527         }
0528     }
0529 
0530 out:
0531     return ret;
0532 }
0533 
0534 int __init fadump_reserve_mem(void)
0535 {
0536     u64 base, size, mem_boundary, bootmem_min;
0537     int ret = 1;
0538 
0539     if (!fw_dump.fadump_enabled)
0540         return 0;
0541 
0542     if (!fw_dump.fadump_supported) {
0543         pr_info("Firmware-Assisted Dump is not supported on this hardware\n");
0544         goto error_out;
0545     }
0546 
0547     /*
0548      * Initialize boot memory size
0549      * If dump is active then we have already calculated the size during
0550      * first kernel.
0551      */
0552     if (!fw_dump.dump_active) {
0553         fw_dump.boot_memory_size =
0554             PAGE_ALIGN(fadump_calculate_reserve_size());
0555 #ifdef CONFIG_CMA
0556         if (!fw_dump.nocma) {
0557             fw_dump.boot_memory_size =
0558                 ALIGN(fw_dump.boot_memory_size,
0559                       CMA_MIN_ALIGNMENT_BYTES);
0560         }
0561 #endif
0562 
0563         bootmem_min = fw_dump.ops->fadump_get_bootmem_min();
0564         if (fw_dump.boot_memory_size < bootmem_min) {
0565             pr_err("Can't enable fadump with boot memory size (0x%lx) less than 0x%llx\n",
0566                    fw_dump.boot_memory_size, bootmem_min);
0567             goto error_out;
0568         }
0569 
0570         if (!fadump_get_boot_mem_regions()) {
0571             pr_err("Too many holes in boot memory area to enable fadump\n");
0572             goto error_out;
0573         }
0574     }
0575 
0576     /*
0577      * Calculate the memory boundary.
0578      * If memory_limit is less than actual memory boundary then reserve
0579      * the memory for fadump beyond the memory_limit and adjust the
0580      * memory_limit accordingly, so that the running kernel can run with
0581      * specified memory_limit.
0582      */
0583     if (memory_limit && memory_limit < memblock_end_of_DRAM()) {
0584         size = get_fadump_area_size();
0585         if ((memory_limit + size) < memblock_end_of_DRAM())
0586             memory_limit += size;
0587         else
0588             memory_limit = memblock_end_of_DRAM();
0589         printk(KERN_INFO "Adjusted memory_limit for firmware-assisted"
0590                 " dump, now %#016llx\n", memory_limit);
0591     }
0592     if (memory_limit)
0593         mem_boundary = memory_limit;
0594     else
0595         mem_boundary = memblock_end_of_DRAM();
0596 
0597     base = fw_dump.boot_mem_top;
0598     size = get_fadump_area_size();
0599     fw_dump.reserve_dump_area_size = size;
0600     if (fw_dump.dump_active) {
0601         pr_info("Firmware-assisted dump is active.\n");
0602 
0603 #ifdef CONFIG_HUGETLB_PAGE
0604         /*
0605          * FADump capture kernel doesn't care much about hugepages.
0606          * In fact, handling hugepages in capture kernel is asking for
0607          * trouble. So, disable HugeTLB support when fadump is active.
0608          */
0609         hugetlb_disabled = true;
0610 #endif
0611         /*
0612          * If last boot has crashed then reserve all the memory
0613          * above boot memory size so that we don't touch it until
0614          * dump is written to disk by userspace tool. This memory
0615          * can be released for general use by invalidating fadump.
0616          */
0617         fadump_reserve_crash_area(base);
0618 
0619         pr_debug("fadumphdr_addr = %#016lx\n", fw_dump.fadumphdr_addr);
0620         pr_debug("Reserve dump area start address: 0x%lx\n",
0621              fw_dump.reserve_dump_area_start);
0622     } else {
0623         /*
0624          * Reserve memory at an offset closer to bottom of the RAM to
0625          * minimize the impact of memory hot-remove operation.
0626          */
0627         base = fadump_locate_reserve_mem(base, size);
0628 
0629         if (!base || (base + size > mem_boundary)) {
0630             pr_err("Failed to find memory chunk for reservation!\n");
0631             goto error_out;
0632         }
0633         fw_dump.reserve_dump_area_start = base;
0634 
0635         /*
0636          * Calculate the kernel metadata address and register it with
0637          * f/w if the platform supports.
0638          */
0639         if (fw_dump.ops->fadump_setup_metadata &&
0640             (fw_dump.ops->fadump_setup_metadata(&fw_dump) < 0))
0641             goto error_out;
0642 
0643         if (memblock_reserve(base, size)) {
0644             pr_err("Failed to reserve memory!\n");
0645             goto error_out;
0646         }
0647 
0648         pr_info("Reserved %lldMB of memory at %#016llx (System RAM: %lldMB)\n",
0649             (size >> 20), base, (memblock_phys_mem_size() >> 20));
0650 
0651         ret = fadump_cma_init();
0652     }
0653 
0654     return ret;
0655 error_out:
0656     fw_dump.fadump_enabled = 0;
0657     return 0;
0658 }
0659 
0660 /* Look for fadump= cmdline option. */
0661 static int __init early_fadump_param(char *p)
0662 {
0663     if (!p)
0664         return 1;
0665 
0666     if (strncmp(p, "on", 2) == 0)
0667         fw_dump.fadump_enabled = 1;
0668     else if (strncmp(p, "off", 3) == 0)
0669         fw_dump.fadump_enabled = 0;
0670     else if (strncmp(p, "nocma", 5) == 0) {
0671         fw_dump.fadump_enabled = 1;
0672         fw_dump.nocma = 1;
0673     }
0674 
0675     return 0;
0676 }
0677 early_param("fadump", early_fadump_param);
0678 
0679 /*
0680  * Look for fadump_reserve_mem= cmdline option
0681  * TODO: Remove references to 'fadump_reserve_mem=' parameter,
0682  *       the sooner 'crashkernel=' parameter is accustomed to.
0683  */
0684 static int __init early_fadump_reserve_mem(char *p)
0685 {
0686     if (p)
0687         fw_dump.reserve_bootvar = memparse(p, &p);
0688     return 0;
0689 }
0690 early_param("fadump_reserve_mem", early_fadump_reserve_mem);
0691 
0692 void crash_fadump(struct pt_regs *regs, const char *str)
0693 {
0694     unsigned int msecs;
0695     struct fadump_crash_info_header *fdh = NULL;
0696     int old_cpu, this_cpu;
0697     /* Do not include first CPU */
0698     unsigned int ncpus = num_online_cpus() - 1;
0699 
0700     if (!should_fadump_crash())
0701         return;
0702 
0703     /*
0704      * old_cpu == -1 means this is the first CPU which has come here,
0705      * go ahead and trigger fadump.
0706      *
0707      * old_cpu != -1 means some other CPU has already on it's way
0708      * to trigger fadump, just keep looping here.
0709      */
0710     this_cpu = smp_processor_id();
0711     old_cpu = cmpxchg(&crashing_cpu, -1, this_cpu);
0712 
0713     if (old_cpu != -1) {
0714         atomic_inc(&cpus_in_fadump);
0715 
0716         /*
0717          * We can't loop here indefinitely. Wait as long as fadump
0718          * is in force. If we race with fadump un-registration this
0719          * loop will break and then we go down to normal panic path
0720          * and reboot. If fadump is in force the first crashing
0721          * cpu will definitely trigger fadump.
0722          */
0723         while (fw_dump.dump_registered)
0724             cpu_relax();
0725         return;
0726     }
0727 
0728     fdh = __va(fw_dump.fadumphdr_addr);
0729     fdh->crashing_cpu = crashing_cpu;
0730     crash_save_vmcoreinfo();
0731 
0732     if (regs)
0733         fdh->regs = *regs;
0734     else
0735         ppc_save_regs(&fdh->regs);
0736 
0737     fdh->cpu_mask = *cpu_online_mask;
0738 
0739     /*
0740      * If we came in via system reset, wait a while for the secondary
0741      * CPUs to enter.
0742      */
0743     if (TRAP(&(fdh->regs)) == INTERRUPT_SYSTEM_RESET) {
0744         msecs = CRASH_TIMEOUT;
0745         while ((atomic_read(&cpus_in_fadump) < ncpus) && (--msecs > 0))
0746             mdelay(1);
0747     }
0748 
0749     fw_dump.ops->fadump_trigger(fdh, str);
0750 }
0751 
0752 u32 *__init fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
0753 {
0754     struct elf_prstatus prstatus;
0755 
0756     memset(&prstatus, 0, sizeof(prstatus));
0757     /*
0758      * FIXME: How do i get PID? Do I really need it?
0759      * prstatus.pr_pid = ????
0760      */
0761     elf_core_copy_regs(&prstatus.pr_reg, regs);
0762     buf = append_elf_note(buf, CRASH_CORE_NOTE_NAME, NT_PRSTATUS,
0763                   &prstatus, sizeof(prstatus));
0764     return buf;
0765 }
0766 
0767 void __init fadump_update_elfcore_header(char *bufp)
0768 {
0769     struct elf_phdr *phdr;
0770 
0771     bufp += sizeof(struct elfhdr);
0772 
0773     /* First note is a place holder for cpu notes info. */
0774     phdr = (struct elf_phdr *)bufp;
0775 
0776     if (phdr->p_type == PT_NOTE) {
0777         phdr->p_paddr   = __pa(fw_dump.cpu_notes_buf_vaddr);
0778         phdr->p_offset  = phdr->p_paddr;
0779         phdr->p_filesz  = fw_dump.cpu_notes_buf_size;
0780         phdr->p_memsz = fw_dump.cpu_notes_buf_size;
0781     }
0782     return;
0783 }
0784 
0785 static void *__init fadump_alloc_buffer(unsigned long size)
0786 {
0787     unsigned long count, i;
0788     struct page *page;
0789     void *vaddr;
0790 
0791     vaddr = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
0792     if (!vaddr)
0793         return NULL;
0794 
0795     count = PAGE_ALIGN(size) / PAGE_SIZE;
0796     page = virt_to_page(vaddr);
0797     for (i = 0; i < count; i++)
0798         mark_page_reserved(page + i);
0799     return vaddr;
0800 }
0801 
0802 static void fadump_free_buffer(unsigned long vaddr, unsigned long size)
0803 {
0804     free_reserved_area((void *)vaddr, (void *)(vaddr + size), -1, NULL);
0805 }
0806 
0807 s32 __init fadump_setup_cpu_notes_buf(u32 num_cpus)
0808 {
0809     /* Allocate buffer to hold cpu crash notes. */
0810     fw_dump.cpu_notes_buf_size = num_cpus * sizeof(note_buf_t);
0811     fw_dump.cpu_notes_buf_size = PAGE_ALIGN(fw_dump.cpu_notes_buf_size);
0812     fw_dump.cpu_notes_buf_vaddr =
0813         (unsigned long)fadump_alloc_buffer(fw_dump.cpu_notes_buf_size);
0814     if (!fw_dump.cpu_notes_buf_vaddr) {
0815         pr_err("Failed to allocate %ld bytes for CPU notes buffer\n",
0816                fw_dump.cpu_notes_buf_size);
0817         return -ENOMEM;
0818     }
0819 
0820     pr_debug("Allocated buffer for cpu notes of size %ld at 0x%lx\n",
0821          fw_dump.cpu_notes_buf_size,
0822          fw_dump.cpu_notes_buf_vaddr);
0823     return 0;
0824 }
0825 
0826 void fadump_free_cpu_notes_buf(void)
0827 {
0828     if (!fw_dump.cpu_notes_buf_vaddr)
0829         return;
0830 
0831     fadump_free_buffer(fw_dump.cpu_notes_buf_vaddr,
0832                fw_dump.cpu_notes_buf_size);
0833     fw_dump.cpu_notes_buf_vaddr = 0;
0834     fw_dump.cpu_notes_buf_size = 0;
0835 }
0836 
0837 static void fadump_free_mem_ranges(struct fadump_mrange_info *mrange_info)
0838 {
0839     if (mrange_info->is_static) {
0840         mrange_info->mem_range_cnt = 0;
0841         return;
0842     }
0843 
0844     kfree(mrange_info->mem_ranges);
0845     memset((void *)((u64)mrange_info + RNG_NAME_SZ), 0,
0846            (sizeof(struct fadump_mrange_info) - RNG_NAME_SZ));
0847 }
0848 
0849 /*
0850  * Allocate or reallocate mem_ranges array in incremental units
0851  * of PAGE_SIZE.
0852  */
0853 static int fadump_alloc_mem_ranges(struct fadump_mrange_info *mrange_info)
0854 {
0855     struct fadump_memory_range *new_array;
0856     u64 new_size;
0857 
0858     new_size = mrange_info->mem_ranges_sz + PAGE_SIZE;
0859     pr_debug("Allocating %llu bytes of memory for %s memory ranges\n",
0860          new_size, mrange_info->name);
0861 
0862     new_array = krealloc(mrange_info->mem_ranges, new_size, GFP_KERNEL);
0863     if (new_array == NULL) {
0864         pr_err("Insufficient memory for setting up %s memory ranges\n",
0865                mrange_info->name);
0866         fadump_free_mem_ranges(mrange_info);
0867         return -ENOMEM;
0868     }
0869 
0870     mrange_info->mem_ranges = new_array;
0871     mrange_info->mem_ranges_sz = new_size;
0872     mrange_info->max_mem_ranges = (new_size /
0873                        sizeof(struct fadump_memory_range));
0874     return 0;
0875 }
0876 static inline int fadump_add_mem_range(struct fadump_mrange_info *mrange_info,
0877                        u64 base, u64 end)
0878 {
0879     struct fadump_memory_range *mem_ranges = mrange_info->mem_ranges;
0880     bool is_adjacent = false;
0881     u64 start, size;
0882 
0883     if (base == end)
0884         return 0;
0885 
0886     /*
0887      * Fold adjacent memory ranges to bring down the memory ranges/
0888      * PT_LOAD segments count.
0889      */
0890     if (mrange_info->mem_range_cnt) {
0891         start = mem_ranges[mrange_info->mem_range_cnt - 1].base;
0892         size  = mem_ranges[mrange_info->mem_range_cnt - 1].size;
0893 
0894         /*
0895          * Boot memory area needs separate PT_LOAD segment(s) as it
0896          * is moved to a different location at the time of crash.
0897          * So, fold only if the region is not boot memory area.
0898          */
0899         if ((start + size) == base && start >= fw_dump.boot_mem_top)
0900             is_adjacent = true;
0901     }
0902     if (!is_adjacent) {
0903         /* resize the array on reaching the limit */
0904         if (mrange_info->mem_range_cnt == mrange_info->max_mem_ranges) {
0905             int ret;
0906 
0907             if (mrange_info->is_static) {
0908                 pr_err("Reached array size limit for %s memory ranges\n",
0909                        mrange_info->name);
0910                 return -ENOSPC;
0911             }
0912 
0913             ret = fadump_alloc_mem_ranges(mrange_info);
0914             if (ret)
0915                 return ret;
0916 
0917             /* Update to the new resized array */
0918             mem_ranges = mrange_info->mem_ranges;
0919         }
0920 
0921         start = base;
0922         mem_ranges[mrange_info->mem_range_cnt].base = start;
0923         mrange_info->mem_range_cnt++;
0924     }
0925 
0926     mem_ranges[mrange_info->mem_range_cnt - 1].size = (end - start);
0927     pr_debug("%s_memory_range[%d] [%#016llx-%#016llx], %#llx bytes\n",
0928          mrange_info->name, (mrange_info->mem_range_cnt - 1),
0929          start, end - 1, (end - start));
0930     return 0;
0931 }
0932 
0933 static int fadump_exclude_reserved_area(u64 start, u64 end)
0934 {
0935     u64 ra_start, ra_end;
0936     int ret = 0;
0937 
0938     ra_start = fw_dump.reserve_dump_area_start;
0939     ra_end = ra_start + fw_dump.reserve_dump_area_size;
0940 
0941     if ((ra_start < end) && (ra_end > start)) {
0942         if ((start < ra_start) && (end > ra_end)) {
0943             ret = fadump_add_mem_range(&crash_mrange_info,
0944                            start, ra_start);
0945             if (ret)
0946                 return ret;
0947 
0948             ret = fadump_add_mem_range(&crash_mrange_info,
0949                            ra_end, end);
0950         } else if (start < ra_start) {
0951             ret = fadump_add_mem_range(&crash_mrange_info,
0952                            start, ra_start);
0953         } else if (ra_end < end) {
0954             ret = fadump_add_mem_range(&crash_mrange_info,
0955                            ra_end, end);
0956         }
0957     } else
0958         ret = fadump_add_mem_range(&crash_mrange_info, start, end);
0959 
0960     return ret;
0961 }
0962 
0963 static int fadump_init_elfcore_header(char *bufp)
0964 {
0965     struct elfhdr *elf;
0966 
0967     elf = (struct elfhdr *) bufp;
0968     bufp += sizeof(struct elfhdr);
0969     memcpy(elf->e_ident, ELFMAG, SELFMAG);
0970     elf->e_ident[EI_CLASS] = ELF_CLASS;
0971     elf->e_ident[EI_DATA] = ELF_DATA;
0972     elf->e_ident[EI_VERSION] = EV_CURRENT;
0973     elf->e_ident[EI_OSABI] = ELF_OSABI;
0974     memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
0975     elf->e_type = ET_CORE;
0976     elf->e_machine = ELF_ARCH;
0977     elf->e_version = EV_CURRENT;
0978     elf->e_entry = 0;
0979     elf->e_phoff = sizeof(struct elfhdr);
0980     elf->e_shoff = 0;
0981 
0982     if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2))
0983         elf->e_flags = 2;
0984     else if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V1))
0985         elf->e_flags = 1;
0986     else
0987         elf->e_flags = 0;
0988 
0989     elf->e_ehsize = sizeof(struct elfhdr);
0990     elf->e_phentsize = sizeof(struct elf_phdr);
0991     elf->e_phnum = 0;
0992     elf->e_shentsize = 0;
0993     elf->e_shnum = 0;
0994     elf->e_shstrndx = 0;
0995 
0996     return 0;
0997 }
0998 
0999 /*
1000  * Traverse through memblock structure and setup crash memory ranges. These
1001  * ranges will be used create PT_LOAD program headers in elfcore header.
1002  */
1003 static int fadump_setup_crash_memory_ranges(void)
1004 {
1005     u64 i, start, end;
1006     int ret;
1007 
1008     pr_debug("Setup crash memory ranges.\n");
1009     crash_mrange_info.mem_range_cnt = 0;
1010 
1011     /*
1012      * Boot memory region(s) registered with firmware are moved to
1013      * different location at the time of crash. Create separate program
1014      * header(s) for this memory chunk(s) with the correct offset.
1015      */
1016     for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
1017         start = fw_dump.boot_mem_addr[i];
1018         end = start + fw_dump.boot_mem_sz[i];
1019         ret = fadump_add_mem_range(&crash_mrange_info, start, end);
1020         if (ret)
1021             return ret;
1022     }
1023 
1024     for_each_mem_range(i, &start, &end) {
1025         /*
1026          * skip the memory chunk that is already added
1027          * (0 through boot_memory_top).
1028          */
1029         if (start < fw_dump.boot_mem_top) {
1030             if (end > fw_dump.boot_mem_top)
1031                 start = fw_dump.boot_mem_top;
1032             else
1033                 continue;
1034         }
1035 
1036         /* add this range excluding the reserved dump area. */
1037         ret = fadump_exclude_reserved_area(start, end);
1038         if (ret)
1039             return ret;
1040     }
1041 
1042     return 0;
1043 }
1044 
1045 /*
1046  * If the given physical address falls within the boot memory region then
1047  * return the relocated address that points to the dump region reserved
1048  * for saving initial boot memory contents.
1049  */
1050 static inline unsigned long fadump_relocate(unsigned long paddr)
1051 {
1052     unsigned long raddr, rstart, rend, rlast, hole_size;
1053     int i;
1054 
1055     hole_size = 0;
1056     rlast = 0;
1057     raddr = paddr;
1058     for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
1059         rstart = fw_dump.boot_mem_addr[i];
1060         rend = rstart + fw_dump.boot_mem_sz[i];
1061         hole_size += (rstart - rlast);
1062 
1063         if (paddr >= rstart && paddr < rend) {
1064             raddr += fw_dump.boot_mem_dest_addr - hole_size;
1065             break;
1066         }
1067 
1068         rlast = rend;
1069     }
1070 
1071     pr_debug("vmcoreinfo: paddr = 0x%lx, raddr = 0x%lx\n", paddr, raddr);
1072     return raddr;
1073 }
1074 
1075 static int fadump_create_elfcore_headers(char *bufp)
1076 {
1077     unsigned long long raddr, offset;
1078     struct elf_phdr *phdr;
1079     struct elfhdr *elf;
1080     int i, j;
1081 
1082     fadump_init_elfcore_header(bufp);
1083     elf = (struct elfhdr *)bufp;
1084     bufp += sizeof(struct elfhdr);
1085 
1086     /*
1087      * setup ELF PT_NOTE, place holder for cpu notes info. The notes info
1088      * will be populated during second kernel boot after crash. Hence
1089      * this PT_NOTE will always be the first elf note.
1090      *
1091      * NOTE: Any new ELF note addition should be placed after this note.
1092      */
1093     phdr = (struct elf_phdr *)bufp;
1094     bufp += sizeof(struct elf_phdr);
1095     phdr->p_type = PT_NOTE;
1096     phdr->p_flags = 0;
1097     phdr->p_vaddr = 0;
1098     phdr->p_align = 0;
1099 
1100     phdr->p_offset = 0;
1101     phdr->p_paddr = 0;
1102     phdr->p_filesz = 0;
1103     phdr->p_memsz = 0;
1104 
1105     (elf->e_phnum)++;
1106 
1107     /* setup ELF PT_NOTE for vmcoreinfo */
1108     phdr = (struct elf_phdr *)bufp;
1109     bufp += sizeof(struct elf_phdr);
1110     phdr->p_type    = PT_NOTE;
1111     phdr->p_flags   = 0;
1112     phdr->p_vaddr   = 0;
1113     phdr->p_align   = 0;
1114 
1115     phdr->p_paddr   = fadump_relocate(paddr_vmcoreinfo_note());
1116     phdr->p_offset  = phdr->p_paddr;
1117     phdr->p_memsz   = phdr->p_filesz = VMCOREINFO_NOTE_SIZE;
1118 
1119     /* Increment number of program headers. */
1120     (elf->e_phnum)++;
1121 
1122     /* setup PT_LOAD sections. */
1123     j = 0;
1124     offset = 0;
1125     raddr = fw_dump.boot_mem_addr[0];
1126     for (i = 0; i < crash_mrange_info.mem_range_cnt; i++) {
1127         u64 mbase, msize;
1128 
1129         mbase = crash_mrange_info.mem_ranges[i].base;
1130         msize = crash_mrange_info.mem_ranges[i].size;
1131         if (!msize)
1132             continue;
1133 
1134         phdr = (struct elf_phdr *)bufp;
1135         bufp += sizeof(struct elf_phdr);
1136         phdr->p_type    = PT_LOAD;
1137         phdr->p_flags   = PF_R|PF_W|PF_X;
1138         phdr->p_offset  = mbase;
1139 
1140         if (mbase == raddr) {
1141             /*
1142              * The entire real memory region will be moved by
1143              * firmware to the specified destination_address.
1144              * Hence set the correct offset.
1145              */
1146             phdr->p_offset = fw_dump.boot_mem_dest_addr + offset;
1147             if (j < (fw_dump.boot_mem_regs_cnt - 1)) {
1148                 offset += fw_dump.boot_mem_sz[j];
1149                 raddr = fw_dump.boot_mem_addr[++j];
1150             }
1151         }
1152 
1153         phdr->p_paddr = mbase;
1154         phdr->p_vaddr = (unsigned long)__va(mbase);
1155         phdr->p_filesz = msize;
1156         phdr->p_memsz = msize;
1157         phdr->p_align = 0;
1158 
1159         /* Increment number of program headers. */
1160         (elf->e_phnum)++;
1161     }
1162     return 0;
1163 }
1164 
1165 static unsigned long init_fadump_header(unsigned long addr)
1166 {
1167     struct fadump_crash_info_header *fdh;
1168 
1169     if (!addr)
1170         return 0;
1171 
1172     fdh = __va(addr);
1173     addr += sizeof(struct fadump_crash_info_header);
1174 
1175     memset(fdh, 0, sizeof(struct fadump_crash_info_header));
1176     fdh->magic_number = FADUMP_CRASH_INFO_MAGIC;
1177     fdh->elfcorehdr_addr = addr;
1178     /* We will set the crashing cpu id in crash_fadump() during crash. */
1179     fdh->crashing_cpu = FADUMP_CPU_UNKNOWN;
1180     /*
1181      * When LPAR is terminated by PYHP, ensure all possible CPUs'
1182      * register data is processed while exporting the vmcore.
1183      */
1184     fdh->cpu_mask = *cpu_possible_mask;
1185 
1186     return addr;
1187 }
1188 
1189 static int register_fadump(void)
1190 {
1191     unsigned long addr;
1192     void *vaddr;
1193     int ret;
1194 
1195     /*
1196      * If no memory is reserved then we can not register for firmware-
1197      * assisted dump.
1198      */
1199     if (!fw_dump.reserve_dump_area_size)
1200         return -ENODEV;
1201 
1202     ret = fadump_setup_crash_memory_ranges();
1203     if (ret)
1204         return ret;
1205 
1206     addr = fw_dump.fadumphdr_addr;
1207 
1208     /* Initialize fadump crash info header. */
1209     addr = init_fadump_header(addr);
1210     vaddr = __va(addr);
1211 
1212     pr_debug("Creating ELF core headers at %#016lx\n", addr);
1213     fadump_create_elfcore_headers(vaddr);
1214 
1215     /* register the future kernel dump with firmware. */
1216     pr_debug("Registering for firmware-assisted kernel dump...\n");
1217     return fw_dump.ops->fadump_register(&fw_dump);
1218 }
1219 
1220 void fadump_cleanup(void)
1221 {
1222     if (!fw_dump.fadump_supported)
1223         return;
1224 
1225     /* Invalidate the registration only if dump is active. */
1226     if (fw_dump.dump_active) {
1227         pr_debug("Invalidating firmware-assisted dump registration\n");
1228         fw_dump.ops->fadump_invalidate(&fw_dump);
1229     } else if (fw_dump.dump_registered) {
1230         /* Un-register Firmware-assisted dump if it was registered. */
1231         fw_dump.ops->fadump_unregister(&fw_dump);
1232         fadump_free_mem_ranges(&crash_mrange_info);
1233     }
1234 
1235     if (fw_dump.ops->fadump_cleanup)
1236         fw_dump.ops->fadump_cleanup(&fw_dump);
1237 }
1238 
1239 static void fadump_free_reserved_memory(unsigned long start_pfn,
1240                     unsigned long end_pfn)
1241 {
1242     unsigned long pfn;
1243     unsigned long time_limit = jiffies + HZ;
1244 
1245     pr_info("freeing reserved memory (0x%llx - 0x%llx)\n",
1246         PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
1247 
1248     for (pfn = start_pfn; pfn < end_pfn; pfn++) {
1249         free_reserved_page(pfn_to_page(pfn));
1250 
1251         if (time_after(jiffies, time_limit)) {
1252             cond_resched();
1253             time_limit = jiffies + HZ;
1254         }
1255     }
1256 }
1257 
1258 /*
1259  * Skip memory holes and free memory that was actually reserved.
1260  */
1261 static void fadump_release_reserved_area(u64 start, u64 end)
1262 {
1263     unsigned long reg_spfn, reg_epfn;
1264     u64 tstart, tend, spfn, epfn;
1265     int i;
1266 
1267     spfn = PHYS_PFN(start);
1268     epfn = PHYS_PFN(end);
1269 
1270     for_each_mem_pfn_range(i, MAX_NUMNODES, &reg_spfn, &reg_epfn, NULL) {
1271         tstart = max_t(u64, spfn, reg_spfn);
1272         tend   = min_t(u64, epfn, reg_epfn);
1273 
1274         if (tstart < tend) {
1275             fadump_free_reserved_memory(tstart, tend);
1276 
1277             if (tend == epfn)
1278                 break;
1279 
1280             spfn = tend;
1281         }
1282     }
1283 }
1284 
1285 /*
1286  * Sort the mem ranges in-place and merge adjacent ranges
1287  * to minimize the memory ranges count.
1288  */
1289 static void sort_and_merge_mem_ranges(struct fadump_mrange_info *mrange_info)
1290 {
1291     struct fadump_memory_range *mem_ranges;
1292     u64 base, size;
1293     int i, j, idx;
1294 
1295     if (!reserved_mrange_info.mem_range_cnt)
1296         return;
1297 
1298     /* Sort the memory ranges */
1299     mem_ranges = mrange_info->mem_ranges;
1300     for (i = 0; i < mrange_info->mem_range_cnt; i++) {
1301         idx = i;
1302         for (j = (i + 1); j < mrange_info->mem_range_cnt; j++) {
1303             if (mem_ranges[idx].base > mem_ranges[j].base)
1304                 idx = j;
1305         }
1306         if (idx != i)
1307             swap(mem_ranges[idx], mem_ranges[i]);
1308     }
1309 
1310     /* Merge adjacent reserved ranges */
1311     idx = 0;
1312     for (i = 1; i < mrange_info->mem_range_cnt; i++) {
1313         base = mem_ranges[i-1].base;
1314         size = mem_ranges[i-1].size;
1315         if (mem_ranges[i].base == (base + size))
1316             mem_ranges[idx].size += mem_ranges[i].size;
1317         else {
1318             idx++;
1319             if (i == idx)
1320                 continue;
1321 
1322             mem_ranges[idx] = mem_ranges[i];
1323         }
1324     }
1325     mrange_info->mem_range_cnt = idx + 1;
1326 }
1327 
1328 /*
1329  * Scan reserved-ranges to consider them while reserving/releasing
1330  * memory for FADump.
1331  */
1332 static void __init early_init_dt_scan_reserved_ranges(unsigned long node)
1333 {
1334     const __be32 *prop;
1335     int len, ret = -1;
1336     unsigned long i;
1337 
1338     /* reserved-ranges already scanned */
1339     if (reserved_mrange_info.mem_range_cnt != 0)
1340         return;
1341 
1342     prop = of_get_flat_dt_prop(node, "reserved-ranges", &len);
1343     if (!prop)
1344         return;
1345 
1346     /*
1347      * Each reserved range is an (address,size) pair, 2 cells each,
1348      * totalling 4 cells per range.
1349      */
1350     for (i = 0; i < len / (sizeof(*prop) * 4); i++) {
1351         u64 base, size;
1352 
1353         base = of_read_number(prop + (i * 4) + 0, 2);
1354         size = of_read_number(prop + (i * 4) + 2, 2);
1355 
1356         if (size) {
1357             ret = fadump_add_mem_range(&reserved_mrange_info,
1358                            base, base + size);
1359             if (ret < 0) {
1360                 pr_warn("some reserved ranges are ignored!\n");
1361                 break;
1362             }
1363         }
1364     }
1365 
1366     /* Compact reserved ranges */
1367     sort_and_merge_mem_ranges(&reserved_mrange_info);
1368 }
1369 
1370 /*
1371  * Release the memory that was reserved during early boot to preserve the
1372  * crash'ed kernel's memory contents except reserved dump area (permanent
1373  * reservation) and reserved ranges used by F/W. The released memory will
1374  * be available for general use.
1375  */
1376 static void fadump_release_memory(u64 begin, u64 end)
1377 {
1378     u64 ra_start, ra_end, tstart;
1379     int i, ret;
1380 
1381     ra_start = fw_dump.reserve_dump_area_start;
1382     ra_end = ra_start + fw_dump.reserve_dump_area_size;
1383 
1384     /*
1385      * If reserved ranges array limit is hit, overwrite the last reserved
1386      * memory range with reserved dump area to ensure it is excluded from
1387      * the memory being released (reused for next FADump registration).
1388      */
1389     if (reserved_mrange_info.mem_range_cnt ==
1390         reserved_mrange_info.max_mem_ranges)
1391         reserved_mrange_info.mem_range_cnt--;
1392 
1393     ret = fadump_add_mem_range(&reserved_mrange_info, ra_start, ra_end);
1394     if (ret != 0)
1395         return;
1396 
1397     /* Get the reserved ranges list in order first. */
1398     sort_and_merge_mem_ranges(&reserved_mrange_info);
1399 
1400     /* Exclude reserved ranges and release remaining memory */
1401     tstart = begin;
1402     for (i = 0; i < reserved_mrange_info.mem_range_cnt; i++) {
1403         ra_start = reserved_mrange_info.mem_ranges[i].base;
1404         ra_end = ra_start + reserved_mrange_info.mem_ranges[i].size;
1405 
1406         if (tstart >= ra_end)
1407             continue;
1408 
1409         if (tstart < ra_start)
1410             fadump_release_reserved_area(tstart, ra_start);
1411         tstart = ra_end;
1412     }
1413 
1414     if (tstart < end)
1415         fadump_release_reserved_area(tstart, end);
1416 }
1417 
1418 static void fadump_invalidate_release_mem(void)
1419 {
1420     mutex_lock(&fadump_mutex);
1421     if (!fw_dump.dump_active) {
1422         mutex_unlock(&fadump_mutex);
1423         return;
1424     }
1425 
1426     fadump_cleanup();
1427     mutex_unlock(&fadump_mutex);
1428 
1429     fadump_release_memory(fw_dump.boot_mem_top, memblock_end_of_DRAM());
1430     fadump_free_cpu_notes_buf();
1431 
1432     /*
1433      * Setup kernel metadata and initialize the kernel dump
1434      * memory structure for FADump re-registration.
1435      */
1436     if (fw_dump.ops->fadump_setup_metadata &&
1437         (fw_dump.ops->fadump_setup_metadata(&fw_dump) < 0))
1438         pr_warn("Failed to setup kernel metadata!\n");
1439     fw_dump.ops->fadump_init_mem_struct(&fw_dump);
1440 }
1441 
1442 static ssize_t release_mem_store(struct kobject *kobj,
1443                  struct kobj_attribute *attr,
1444                  const char *buf, size_t count)
1445 {
1446     int input = -1;
1447 
1448     if (!fw_dump.dump_active)
1449         return -EPERM;
1450 
1451     if (kstrtoint(buf, 0, &input))
1452         return -EINVAL;
1453 
1454     if (input == 1) {
1455         /*
1456          * Take away the '/proc/vmcore'. We are releasing the dump
1457          * memory, hence it will not be valid anymore.
1458          */
1459 #ifdef CONFIG_PROC_VMCORE
1460         vmcore_cleanup();
1461 #endif
1462         fadump_invalidate_release_mem();
1463 
1464     } else
1465         return -EINVAL;
1466     return count;
1467 }
1468 
1469 /* Release the reserved memory and disable the FADump */
1470 static void __init unregister_fadump(void)
1471 {
1472     fadump_cleanup();
1473     fadump_release_memory(fw_dump.reserve_dump_area_start,
1474                   fw_dump.reserve_dump_area_size);
1475     fw_dump.fadump_enabled = 0;
1476     kobject_put(fadump_kobj);
1477 }
1478 
1479 static ssize_t enabled_show(struct kobject *kobj,
1480                 struct kobj_attribute *attr,
1481                 char *buf)
1482 {
1483     return sprintf(buf, "%d\n", fw_dump.fadump_enabled);
1484 }
1485 
1486 static ssize_t mem_reserved_show(struct kobject *kobj,
1487                  struct kobj_attribute *attr,
1488                  char *buf)
1489 {
1490     return sprintf(buf, "%ld\n", fw_dump.reserve_dump_area_size);
1491 }
1492 
1493 static ssize_t registered_show(struct kobject *kobj,
1494                    struct kobj_attribute *attr,
1495                    char *buf)
1496 {
1497     return sprintf(buf, "%d\n", fw_dump.dump_registered);
1498 }
1499 
1500 static ssize_t registered_store(struct kobject *kobj,
1501                 struct kobj_attribute *attr,
1502                 const char *buf, size_t count)
1503 {
1504     int ret = 0;
1505     int input = -1;
1506 
1507     if (!fw_dump.fadump_enabled || fw_dump.dump_active)
1508         return -EPERM;
1509 
1510     if (kstrtoint(buf, 0, &input))
1511         return -EINVAL;
1512 
1513     mutex_lock(&fadump_mutex);
1514 
1515     switch (input) {
1516     case 0:
1517         if (fw_dump.dump_registered == 0) {
1518             goto unlock_out;
1519         }
1520 
1521         /* Un-register Firmware-assisted dump */
1522         pr_debug("Un-register firmware-assisted dump\n");
1523         fw_dump.ops->fadump_unregister(&fw_dump);
1524         break;
1525     case 1:
1526         if (fw_dump.dump_registered == 1) {
1527             /* Un-register Firmware-assisted dump */
1528             fw_dump.ops->fadump_unregister(&fw_dump);
1529         }
1530         /* Register Firmware-assisted dump */
1531         ret = register_fadump();
1532         break;
1533     default:
1534         ret = -EINVAL;
1535         break;
1536     }
1537 
1538 unlock_out:
1539     mutex_unlock(&fadump_mutex);
1540     return ret < 0 ? ret : count;
1541 }
1542 
1543 static int fadump_region_show(struct seq_file *m, void *private)
1544 {
1545     if (!fw_dump.fadump_enabled)
1546         return 0;
1547 
1548     mutex_lock(&fadump_mutex);
1549     fw_dump.ops->fadump_region_show(&fw_dump, m);
1550     mutex_unlock(&fadump_mutex);
1551     return 0;
1552 }
1553 
1554 static struct kobj_attribute release_attr = __ATTR_WO(release_mem);
1555 static struct kobj_attribute enable_attr = __ATTR_RO(enabled);
1556 static struct kobj_attribute register_attr = __ATTR_RW(registered);
1557 static struct kobj_attribute mem_reserved_attr = __ATTR_RO(mem_reserved);
1558 
1559 static struct attribute *fadump_attrs[] = {
1560     &enable_attr.attr,
1561     &register_attr.attr,
1562     &mem_reserved_attr.attr,
1563     NULL,
1564 };
1565 
1566 ATTRIBUTE_GROUPS(fadump);
1567 
1568 DEFINE_SHOW_ATTRIBUTE(fadump_region);
1569 
1570 static void __init fadump_init_files(void)
1571 {
1572     int rc = 0;
1573 
1574     fadump_kobj = kobject_create_and_add("fadump", kernel_kobj);
1575     if (!fadump_kobj) {
1576         pr_err("failed to create fadump kobject\n");
1577         return;
1578     }
1579 
1580     debugfs_create_file("fadump_region", 0444, arch_debugfs_dir, NULL,
1581                 &fadump_region_fops);
1582 
1583     if (fw_dump.dump_active) {
1584         rc = sysfs_create_file(fadump_kobj, &release_attr.attr);
1585         if (rc)
1586             pr_err("unable to create release_mem sysfs file (%d)\n",
1587                    rc);
1588     }
1589 
1590     rc = sysfs_create_groups(fadump_kobj, fadump_groups);
1591     if (rc) {
1592         pr_err("sysfs group creation failed (%d), unregistering FADump",
1593                rc);
1594         unregister_fadump();
1595         return;
1596     }
1597 
1598     /*
1599      * The FADump sysfs are moved from kernel_kobj to fadump_kobj need to
1600      * create symlink at old location to maintain backward compatibility.
1601      *
1602      *      - fadump_enabled -> fadump/enabled
1603      *      - fadump_registered -> fadump/registered
1604      *      - fadump_release_mem -> fadump/release_mem
1605      */
1606     rc = compat_only_sysfs_link_entry_to_kobj(kernel_kobj, fadump_kobj,
1607                           "enabled", "fadump_enabled");
1608     if (rc) {
1609         pr_err("unable to create fadump_enabled symlink (%d)", rc);
1610         return;
1611     }
1612 
1613     rc = compat_only_sysfs_link_entry_to_kobj(kernel_kobj, fadump_kobj,
1614                           "registered",
1615                           "fadump_registered");
1616     if (rc) {
1617         pr_err("unable to create fadump_registered symlink (%d)", rc);
1618         sysfs_remove_link(kernel_kobj, "fadump_enabled");
1619         return;
1620     }
1621 
1622     if (fw_dump.dump_active) {
1623         rc = compat_only_sysfs_link_entry_to_kobj(kernel_kobj,
1624                               fadump_kobj,
1625                               "release_mem",
1626                               "fadump_release_mem");
1627         if (rc)
1628             pr_err("unable to create fadump_release_mem symlink (%d)",
1629                    rc);
1630     }
1631     return;
1632 }
1633 
1634 /*
1635  * Prepare for firmware-assisted dump.
1636  */
1637 int __init setup_fadump(void)
1638 {
1639     if (!fw_dump.fadump_supported)
1640         return 0;
1641 
1642     fadump_init_files();
1643     fadump_show_config();
1644 
1645     if (!fw_dump.fadump_enabled)
1646         return 1;
1647 
1648     /*
1649      * If dump data is available then see if it is valid and prepare for
1650      * saving it to the disk.
1651      */
1652     if (fw_dump.dump_active) {
1653         /*
1654          * if dump process fails then invalidate the registration
1655          * and release memory before proceeding for re-registration.
1656          */
1657         if (fw_dump.ops->fadump_process(&fw_dump) < 0)
1658             fadump_invalidate_release_mem();
1659     }
1660     /* Initialize the kernel dump memory structure and register with f/w */
1661     else if (fw_dump.reserve_dump_area_size) {
1662         fw_dump.ops->fadump_init_mem_struct(&fw_dump);
1663         register_fadump();
1664     }
1665 
1666     /*
1667      * In case of panic, fadump is triggered via ppc_panic_event()
1668      * panic notifier. Setting crash_kexec_post_notifiers to 'true'
1669      * lets panic() function take crash friendly path before panic
1670      * notifiers are invoked.
1671      */
1672     crash_kexec_post_notifiers = true;
1673 
1674     return 1;
1675 }
1676 /*
1677  * Use subsys_initcall_sync() here because there is dependency with
1678  * crash_save_vmcoreinfo_init(), which must run first to ensure vmcoreinfo initialization
1679  * is done before registering with f/w.
1680  */
1681 subsys_initcall_sync(setup_fadump);
1682 #else /* !CONFIG_PRESERVE_FA_DUMP */
1683 
1684 /* Scan the Firmware Assisted dump configuration details. */
1685 int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname,
1686                       int depth, void *data)
1687 {
1688     if ((depth != 1) || (strcmp(uname, "ibm,opal") != 0))
1689         return 0;
1690 
1691     opal_fadump_dt_scan(&fw_dump, node);
1692     return 1;
1693 }
1694 
1695 /*
1696  * When dump is active but PRESERVE_FA_DUMP is enabled on the kernel,
1697  * preserve crash data. The subsequent memory preserving kernel boot
1698  * is likely to process this crash data.
1699  */
1700 int __init fadump_reserve_mem(void)
1701 {
1702     if (fw_dump.dump_active) {
1703         /*
1704          * If last boot has crashed then reserve all the memory
1705          * above boot memory to preserve crash data.
1706          */
1707         pr_info("Preserving crash data for processing in next boot.\n");
1708         fadump_reserve_crash_area(fw_dump.boot_mem_top);
1709     } else
1710         pr_debug("FADump-aware kernel..\n");
1711 
1712     return 1;
1713 }
1714 #endif /* CONFIG_PRESERVE_FA_DUMP */
1715 
1716 /* Preserve everything above the base address */
1717 static void __init fadump_reserve_crash_area(u64 base)
1718 {
1719     u64 i, mstart, mend, msize;
1720 
1721     for_each_mem_range(i, &mstart, &mend) {
1722         msize  = mend - mstart;
1723 
1724         if ((mstart + msize) < base)
1725             continue;
1726 
1727         if (mstart < base) {
1728             msize -= (base - mstart);
1729             mstart = base;
1730         }
1731 
1732         pr_info("Reserving %lluMB of memory at %#016llx for preserving crash data",
1733             (msize >> 20), mstart);
1734         memblock_reserve(mstart, msize);
1735     }
1736 }
1737 
1738 unsigned long __init arch_reserved_kernel_pages(void)
1739 {
1740     return memblock_reserved_size() / PAGE_SIZE;
1741 }