Back to home page

OSCL-LXR

 
 

    


0001 /*  Generic MTRR (Memory Type Range Register) driver.
0002 
0003     Copyright (C) 1997-2000  Richard Gooch
0004     Copyright (c) 2002       Patrick Mochel
0005 
0006     This library is free software; you can redistribute it and/or
0007     modify it under the terms of the GNU Library General Public
0008     License as published by the Free Software Foundation; either
0009     version 2 of the License, or (at your option) any later version.
0010 
0011     This library is distributed in the hope that it will be useful,
0012     but WITHOUT ANY WARRANTY; without even the implied warranty of
0013     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
0014     Library General Public License for more details.
0015 
0016     You should have received a copy of the GNU Library General Public
0017     License along with this library; if not, write to the Free
0018     Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
0019 
0020     Richard Gooch may be reached by email at  rgooch@atnf.csiro.au
0021     The postal address is:
0022       Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
0023 
0024     Source: "Pentium Pro Family Developer's Manual, Volume 3:
0025     Operating System Writer's Guide" (Intel document number 242692),
0026     section 11.11.7
0027 
0028     This was cleaned and made readable by Patrick Mochel <mochel@osdl.org>
0029     on 6-7 March 2002.
0030     Source: Intel Architecture Software Developers Manual, Volume 3:
0031     System Programming Guide; Section 9.11. (1997 edition - PPro).
0032 */
0033 
0034 #include <linux/types.h> /* FIXME: kvm_para.h needs this */
0035 
0036 #include <linux/stop_machine.h>
0037 #include <linux/kvm_para.h>
0038 #include <linux/uaccess.h>
0039 #include <linux/export.h>
0040 #include <linux/mutex.h>
0041 #include <linux/init.h>
0042 #include <linux/sort.h>
0043 #include <linux/cpu.h>
0044 #include <linux/pci.h>
0045 #include <linux/smp.h>
0046 #include <linux/syscore_ops.h>
0047 #include <linux/rcupdate.h>
0048 
0049 #include <asm/cpufeature.h>
0050 #include <asm/e820/api.h>
0051 #include <asm/mtrr.h>
0052 #include <asm/msr.h>
0053 #include <asm/memtype.h>
0054 
0055 #include "mtrr.h"
0056 
0057 /* arch_phys_wc_add returns an MTRR register index plus this offset. */
0058 #define MTRR_TO_PHYS_WC_OFFSET 1000
0059 
0060 u32 num_var_ranges;
0061 static bool __mtrr_enabled;
0062 
0063 static bool mtrr_enabled(void)
0064 {
0065     return __mtrr_enabled;
0066 }
0067 
0068 unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
0069 static DEFINE_MUTEX(mtrr_mutex);
0070 
0071 u64 size_or_mask, size_and_mask;
0072 static bool mtrr_aps_delayed_init;
0073 
0074 static const struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM] __ro_after_init;
0075 
0076 const struct mtrr_ops *mtrr_if;
0077 
0078 static void set_mtrr(unsigned int reg, unsigned long base,
0079              unsigned long size, mtrr_type type);
0080 
0081 void __init set_mtrr_ops(const struct mtrr_ops *ops)
0082 {
0083     if (ops->vendor && ops->vendor < X86_VENDOR_NUM)
0084         mtrr_ops[ops->vendor] = ops;
0085 }
0086 
0087 /*  Returns non-zero if we have the write-combining memory type  */
0088 static int have_wrcomb(void)
0089 {
0090     struct pci_dev *dev;
0091 
0092     dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL);
0093     if (dev != NULL) {
0094         /*
0095          * ServerWorks LE chipsets < rev 6 have problems with
0096          * write-combining. Don't allow it and leave room for other
0097          * chipsets to be tagged
0098          */
0099         if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS &&
0100             dev->device == PCI_DEVICE_ID_SERVERWORKS_LE &&
0101             dev->revision <= 5) {
0102             pr_info("Serverworks LE rev < 6 detected. Write-combining disabled.\n");
0103             pci_dev_put(dev);
0104             return 0;
0105         }
0106         /*
0107          * Intel 450NX errata # 23. Non ascending cacheline evictions to
0108          * write combining memory may resulting in data corruption
0109          */
0110         if (dev->vendor == PCI_VENDOR_ID_INTEL &&
0111             dev->device == PCI_DEVICE_ID_INTEL_82451NX) {
0112             pr_info("Intel 450NX MMC detected. Write-combining disabled.\n");
0113             pci_dev_put(dev);
0114             return 0;
0115         }
0116         pci_dev_put(dev);
0117     }
0118     return mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0;
0119 }
0120 
0121 /*  This function returns the number of variable MTRRs  */
0122 static void __init set_num_var_ranges(void)
0123 {
0124     unsigned long config = 0, dummy;
0125 
0126     if (use_intel())
0127         rdmsr(MSR_MTRRcap, config, dummy);
0128     else if (is_cpu(AMD) || is_cpu(HYGON))
0129         config = 2;
0130     else if (is_cpu(CYRIX) || is_cpu(CENTAUR))
0131         config = 8;
0132 
0133     num_var_ranges = config & 0xff;
0134 }
0135 
0136 static void __init init_table(void)
0137 {
0138     int i, max;
0139 
0140     max = num_var_ranges;
0141     for (i = 0; i < max; i++)
0142         mtrr_usage_table[i] = 1;
0143 }
0144 
0145 struct set_mtrr_data {
0146     unsigned long   smp_base;
0147     unsigned long   smp_size;
0148     unsigned int    smp_reg;
0149     mtrr_type   smp_type;
0150 };
0151 
0152 /**
0153  * mtrr_rendezvous_handler - Work done in the synchronization handler. Executed
0154  * by all the CPUs.
0155  * @info: pointer to mtrr configuration data
0156  *
0157  * Returns nothing.
0158  */
0159 static int mtrr_rendezvous_handler(void *info)
0160 {
0161     struct set_mtrr_data *data = info;
0162 
0163     /*
0164      * We use this same function to initialize the mtrrs during boot,
0165      * resume, runtime cpu online and on an explicit request to set a
0166      * specific MTRR.
0167      *
0168      * During boot or suspend, the state of the boot cpu's mtrrs has been
0169      * saved, and we want to replicate that across all the cpus that come
0170      * online (either at the end of boot or resume or during a runtime cpu
0171      * online). If we're doing that, @reg is set to something special and on
0172      * all the cpu's we do mtrr_if->set_all() (On the logical cpu that
0173      * started the boot/resume sequence, this might be a duplicate
0174      * set_all()).
0175      */
0176     if (data->smp_reg != ~0U) {
0177         mtrr_if->set(data->smp_reg, data->smp_base,
0178                  data->smp_size, data->smp_type);
0179     } else if (mtrr_aps_delayed_init || !cpu_online(smp_processor_id())) {
0180         mtrr_if->set_all();
0181     }
0182     return 0;
0183 }
0184 
0185 static inline int types_compatible(mtrr_type type1, mtrr_type type2)
0186 {
0187     return type1 == MTRR_TYPE_UNCACHABLE ||
0188            type2 == MTRR_TYPE_UNCACHABLE ||
0189            (type1 == MTRR_TYPE_WRTHROUGH && type2 == MTRR_TYPE_WRBACK) ||
0190            (type1 == MTRR_TYPE_WRBACK && type2 == MTRR_TYPE_WRTHROUGH);
0191 }
0192 
0193 /**
0194  * set_mtrr - update mtrrs on all processors
0195  * @reg:    mtrr in question
0196  * @base:   mtrr base
0197  * @size:   mtrr size
0198  * @type:   mtrr type
0199  *
0200  * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly:
0201  *
0202  * 1. Queue work to do the following on all processors:
0203  * 2. Disable Interrupts
0204  * 3. Wait for all procs to do so
0205  * 4. Enter no-fill cache mode
0206  * 5. Flush caches
0207  * 6. Clear PGE bit
0208  * 7. Flush all TLBs
0209  * 8. Disable all range registers
0210  * 9. Update the MTRRs
0211  * 10. Enable all range registers
0212  * 11. Flush all TLBs and caches again
0213  * 12. Enter normal cache mode and reenable caching
0214  * 13. Set PGE
0215  * 14. Wait for buddies to catch up
0216  * 15. Enable interrupts.
0217  *
0218  * What does that mean for us? Well, stop_machine() will ensure that
0219  * the rendezvous handler is started on each CPU. And in lockstep they
0220  * do the state transition of disabling interrupts, updating MTRR's
0221  * (the CPU vendors may each do it differently, so we call mtrr_if->set()
0222  * callback and let them take care of it.) and enabling interrupts.
0223  *
0224  * Note that the mechanism is the same for UP systems, too; all the SMP stuff
0225  * becomes nops.
0226  */
0227 static void
0228 set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type)
0229 {
0230     struct set_mtrr_data data = { .smp_reg = reg,
0231                       .smp_base = base,
0232                       .smp_size = size,
0233                       .smp_type = type
0234                     };
0235 
0236     stop_machine(mtrr_rendezvous_handler, &data, cpu_online_mask);
0237 }
0238 
0239 static void set_mtrr_cpuslocked(unsigned int reg, unsigned long base,
0240                 unsigned long size, mtrr_type type)
0241 {
0242     struct set_mtrr_data data = { .smp_reg = reg,
0243                       .smp_base = base,
0244                       .smp_size = size,
0245                       .smp_type = type
0246                     };
0247 
0248     stop_machine_cpuslocked(mtrr_rendezvous_handler, &data, cpu_online_mask);
0249 }
0250 
0251 static void set_mtrr_from_inactive_cpu(unsigned int reg, unsigned long base,
0252                       unsigned long size, mtrr_type type)
0253 {
0254     struct set_mtrr_data data = { .smp_reg = reg,
0255                       .smp_base = base,
0256                       .smp_size = size,
0257                       .smp_type = type
0258                     };
0259 
0260     stop_machine_from_inactive_cpu(mtrr_rendezvous_handler, &data,
0261                        cpu_callout_mask);
0262 }
0263 
0264 /**
0265  * mtrr_add_page - Add a memory type region
0266  * @base: Physical base address of region in pages (in units of 4 kB!)
0267  * @size: Physical size of region in pages (4 kB)
0268  * @type: Type of MTRR desired
0269  * @increment: If this is true do usage counting on the region
0270  *
0271  * Memory type region registers control the caching on newer Intel and
0272  * non Intel processors. This function allows drivers to request an
0273  * MTRR is added. The details and hardware specifics of each processor's
0274  * implementation are hidden from the caller, but nevertheless the
0275  * caller should expect to need to provide a power of two size on an
0276  * equivalent power of two boundary.
0277  *
0278  * If the region cannot be added either because all regions are in use
0279  * or the CPU cannot support it a negative value is returned. On success
0280  * the register number for this entry is returned, but should be treated
0281  * as a cookie only.
0282  *
0283  * On a multiprocessor machine the changes are made to all processors.
0284  * This is required on x86 by the Intel processors.
0285  *
0286  * The available types are
0287  *
0288  * %MTRR_TYPE_UNCACHABLE - No caching
0289  *
0290  * %MTRR_TYPE_WRBACK - Write data back in bursts whenever
0291  *
0292  * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
0293  *
0294  * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
0295  *
0296  * BUGS: Needs a quiet flag for the cases where drivers do not mind
0297  * failures and do not wish system log messages to be sent.
0298  */
0299 int mtrr_add_page(unsigned long base, unsigned long size,
0300           unsigned int type, bool increment)
0301 {
0302     unsigned long lbase, lsize;
0303     int i, replace, error;
0304     mtrr_type ltype;
0305 
0306     if (!mtrr_enabled())
0307         return -ENXIO;
0308 
0309     error = mtrr_if->validate_add_page(base, size, type);
0310     if (error)
0311         return error;
0312 
0313     if (type >= MTRR_NUM_TYPES) {
0314         pr_warn("type: %u invalid\n", type);
0315         return -EINVAL;
0316     }
0317 
0318     /* If the type is WC, check that this processor supports it */
0319     if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) {
0320         pr_warn("your processor doesn't support write-combining\n");
0321         return -ENOSYS;
0322     }
0323 
0324     if (!size) {
0325         pr_warn("zero sized request\n");
0326         return -EINVAL;
0327     }
0328 
0329     if ((base | (base + size - 1)) >>
0330         (boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) {
0331         pr_warn("base or size exceeds the MTRR width\n");
0332         return -EINVAL;
0333     }
0334 
0335     error = -EINVAL;
0336     replace = -1;
0337 
0338     /* No CPU hotplug when we change MTRR entries */
0339     cpus_read_lock();
0340 
0341     /* Search for existing MTRR  */
0342     mutex_lock(&mtrr_mutex);
0343     for (i = 0; i < num_var_ranges; ++i) {
0344         mtrr_if->get(i, &lbase, &lsize, &ltype);
0345         if (!lsize || base > lbase + lsize - 1 ||
0346             base + size - 1 < lbase)
0347             continue;
0348         /*
0349          * At this point we know there is some kind of
0350          * overlap/enclosure
0351          */
0352         if (base < lbase || base + size - 1 > lbase + lsize - 1) {
0353             if (base <= lbase &&
0354                 base + size - 1 >= lbase + lsize - 1) {
0355                 /*  New region encloses an existing region  */
0356                 if (type == ltype) {
0357                     replace = replace == -1 ? i : -2;
0358                     continue;
0359                 } else if (types_compatible(type, ltype))
0360                     continue;
0361             }
0362             pr_warn("0x%lx000,0x%lx000 overlaps existing 0x%lx000,0x%lx000\n", base, size, lbase,
0363                 lsize);
0364             goto out;
0365         }
0366         /* New region is enclosed by an existing region */
0367         if (ltype != type) {
0368             if (types_compatible(type, ltype))
0369                 continue;
0370             pr_warn("type mismatch for %lx000,%lx000 old: %s new: %s\n",
0371                 base, size, mtrr_attrib_to_str(ltype),
0372                 mtrr_attrib_to_str(type));
0373             goto out;
0374         }
0375         if (increment)
0376             ++mtrr_usage_table[i];
0377         error = i;
0378         goto out;
0379     }
0380     /* Search for an empty MTRR */
0381     i = mtrr_if->get_free_region(base, size, replace);
0382     if (i >= 0) {
0383         set_mtrr_cpuslocked(i, base, size, type);
0384         if (likely(replace < 0)) {
0385             mtrr_usage_table[i] = 1;
0386         } else {
0387             mtrr_usage_table[i] = mtrr_usage_table[replace];
0388             if (increment)
0389                 mtrr_usage_table[i]++;
0390             if (unlikely(replace != i)) {
0391                 set_mtrr_cpuslocked(replace, 0, 0, 0);
0392                 mtrr_usage_table[replace] = 0;
0393             }
0394         }
0395     } else {
0396         pr_info("no more MTRRs available\n");
0397     }
0398     error = i;
0399  out:
0400     mutex_unlock(&mtrr_mutex);
0401     cpus_read_unlock();
0402     return error;
0403 }
0404 
0405 static int mtrr_check(unsigned long base, unsigned long size)
0406 {
0407     if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
0408         pr_warn("size and base must be multiples of 4 kiB\n");
0409         pr_debug("size: 0x%lx  base: 0x%lx\n", size, base);
0410         dump_stack();
0411         return -1;
0412     }
0413     return 0;
0414 }
0415 
0416 /**
0417  * mtrr_add - Add a memory type region
0418  * @base: Physical base address of region
0419  * @size: Physical size of region
0420  * @type: Type of MTRR desired
0421  * @increment: If this is true do usage counting on the region
0422  *
0423  * Memory type region registers control the caching on newer Intel and
0424  * non Intel processors. This function allows drivers to request an
0425  * MTRR is added. The details and hardware specifics of each processor's
0426  * implementation are hidden from the caller, but nevertheless the
0427  * caller should expect to need to provide a power of two size on an
0428  * equivalent power of two boundary.
0429  *
0430  * If the region cannot be added either because all regions are in use
0431  * or the CPU cannot support it a negative value is returned. On success
0432  * the register number for this entry is returned, but should be treated
0433  * as a cookie only.
0434  *
0435  * On a multiprocessor machine the changes are made to all processors.
0436  * This is required on x86 by the Intel processors.
0437  *
0438  * The available types are
0439  *
0440  * %MTRR_TYPE_UNCACHABLE - No caching
0441  *
0442  * %MTRR_TYPE_WRBACK - Write data back in bursts whenever
0443  *
0444  * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
0445  *
0446  * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
0447  *
0448  * BUGS: Needs a quiet flag for the cases where drivers do not mind
0449  * failures and do not wish system log messages to be sent.
0450  */
0451 int mtrr_add(unsigned long base, unsigned long size, unsigned int type,
0452          bool increment)
0453 {
0454     if (!mtrr_enabled())
0455         return -ENODEV;
0456     if (mtrr_check(base, size))
0457         return -EINVAL;
0458     return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type,
0459                  increment);
0460 }
0461 
0462 /**
0463  * mtrr_del_page - delete a memory type region
0464  * @reg: Register returned by mtrr_add
0465  * @base: Physical base address
0466  * @size: Size of region
0467  *
0468  * If register is supplied then base and size are ignored. This is
0469  * how drivers should call it.
0470  *
0471  * Releases an MTRR region. If the usage count drops to zero the
0472  * register is freed and the region returns to default state.
0473  * On success the register is returned, on failure a negative error
0474  * code.
0475  */
0476 int mtrr_del_page(int reg, unsigned long base, unsigned long size)
0477 {
0478     int i, max;
0479     mtrr_type ltype;
0480     unsigned long lbase, lsize;
0481     int error = -EINVAL;
0482 
0483     if (!mtrr_enabled())
0484         return -ENODEV;
0485 
0486     max = num_var_ranges;
0487     /* No CPU hotplug when we change MTRR entries */
0488     cpus_read_lock();
0489     mutex_lock(&mtrr_mutex);
0490     if (reg < 0) {
0491         /*  Search for existing MTRR  */
0492         for (i = 0; i < max; ++i) {
0493             mtrr_if->get(i, &lbase, &lsize, &ltype);
0494             if (lbase == base && lsize == size) {
0495                 reg = i;
0496                 break;
0497             }
0498         }
0499         if (reg < 0) {
0500             pr_debug("no MTRR for %lx000,%lx000 found\n",
0501                  base, size);
0502             goto out;
0503         }
0504     }
0505     if (reg >= max) {
0506         pr_warn("register: %d too big\n", reg);
0507         goto out;
0508     }
0509     mtrr_if->get(reg, &lbase, &lsize, &ltype);
0510     if (lsize < 1) {
0511         pr_warn("MTRR %d not used\n", reg);
0512         goto out;
0513     }
0514     if (mtrr_usage_table[reg] < 1) {
0515         pr_warn("reg: %d has count=0\n", reg);
0516         goto out;
0517     }
0518     if (--mtrr_usage_table[reg] < 1)
0519         set_mtrr_cpuslocked(reg, 0, 0, 0);
0520     error = reg;
0521  out:
0522     mutex_unlock(&mtrr_mutex);
0523     cpus_read_unlock();
0524     return error;
0525 }
0526 
0527 /**
0528  * mtrr_del - delete a memory type region
0529  * @reg: Register returned by mtrr_add
0530  * @base: Physical base address
0531  * @size: Size of region
0532  *
0533  * If register is supplied then base and size are ignored. This is
0534  * how drivers should call it.
0535  *
0536  * Releases an MTRR region. If the usage count drops to zero the
0537  * register is freed and the region returns to default state.
0538  * On success the register is returned, on failure a negative error
0539  * code.
0540  */
0541 int mtrr_del(int reg, unsigned long base, unsigned long size)
0542 {
0543     if (!mtrr_enabled())
0544         return -ENODEV;
0545     if (mtrr_check(base, size))
0546         return -EINVAL;
0547     return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT);
0548 }
0549 
0550 /**
0551  * arch_phys_wc_add - add a WC MTRR and handle errors if PAT is unavailable
0552  * @base: Physical base address
0553  * @size: Size of region
0554  *
0555  * If PAT is available, this does nothing.  If PAT is unavailable, it
0556  * attempts to add a WC MTRR covering size bytes starting at base and
0557  * logs an error if this fails.
0558  *
0559  * The called should provide a power of two size on an equivalent
0560  * power of two boundary.
0561  *
0562  * Drivers must store the return value to pass to mtrr_del_wc_if_needed,
0563  * but drivers should not try to interpret that return value.
0564  */
0565 int arch_phys_wc_add(unsigned long base, unsigned long size)
0566 {
0567     int ret;
0568 
0569     if (pat_enabled() || !mtrr_enabled())
0570         return 0;  /* Success!  (We don't need to do anything.) */
0571 
0572     ret = mtrr_add(base, size, MTRR_TYPE_WRCOMB, true);
0573     if (ret < 0) {
0574         pr_warn("Failed to add WC MTRR for [%p-%p]; performance may suffer.",
0575             (void *)base, (void *)(base + size - 1));
0576         return ret;
0577     }
0578     return ret + MTRR_TO_PHYS_WC_OFFSET;
0579 }
0580 EXPORT_SYMBOL(arch_phys_wc_add);
0581 
0582 /*
0583  * arch_phys_wc_del - undoes arch_phys_wc_add
0584  * @handle: Return value from arch_phys_wc_add
0585  *
0586  * This cleans up after mtrr_add_wc_if_needed.
0587  *
0588  * The API guarantees that mtrr_del_wc_if_needed(error code) and
0589  * mtrr_del_wc_if_needed(0) do nothing.
0590  */
0591 void arch_phys_wc_del(int handle)
0592 {
0593     if (handle >= 1) {
0594         WARN_ON(handle < MTRR_TO_PHYS_WC_OFFSET);
0595         mtrr_del(handle - MTRR_TO_PHYS_WC_OFFSET, 0, 0);
0596     }
0597 }
0598 EXPORT_SYMBOL(arch_phys_wc_del);
0599 
0600 /*
0601  * arch_phys_wc_index - translates arch_phys_wc_add's return value
0602  * @handle: Return value from arch_phys_wc_add
0603  *
0604  * This will turn the return value from arch_phys_wc_add into an mtrr
0605  * index suitable for debugging.
0606  *
0607  * Note: There is no legitimate use for this function, except possibly
0608  * in printk line.  Alas there is an illegitimate use in some ancient
0609  * drm ioctls.
0610  */
0611 int arch_phys_wc_index(int handle)
0612 {
0613     if (handle < MTRR_TO_PHYS_WC_OFFSET)
0614         return -1;
0615     else
0616         return handle - MTRR_TO_PHYS_WC_OFFSET;
0617 }
0618 EXPORT_SYMBOL_GPL(arch_phys_wc_index);
0619 
0620 /*
0621  * HACK ALERT!
0622  * These should be called implicitly, but we can't yet until all the initcall
0623  * stuff is done...
0624  */
0625 static void __init init_ifs(void)
0626 {
0627 #ifndef CONFIG_X86_64
0628     amd_init_mtrr();
0629     cyrix_init_mtrr();
0630     centaur_init_mtrr();
0631 #endif
0632 }
0633 
0634 /* The suspend/resume methods are only for CPU without MTRR. CPU using generic
0635  * MTRR driver doesn't require this
0636  */
0637 struct mtrr_value {
0638     mtrr_type   ltype;
0639     unsigned long   lbase;
0640     unsigned long   lsize;
0641 };
0642 
0643 static struct mtrr_value mtrr_value[MTRR_MAX_VAR_RANGES];
0644 
0645 static int mtrr_save(void)
0646 {
0647     int i;
0648 
0649     for (i = 0; i < num_var_ranges; i++) {
0650         mtrr_if->get(i, &mtrr_value[i].lbase,
0651                 &mtrr_value[i].lsize,
0652                 &mtrr_value[i].ltype);
0653     }
0654     return 0;
0655 }
0656 
0657 static void mtrr_restore(void)
0658 {
0659     int i;
0660 
0661     for (i = 0; i < num_var_ranges; i++) {
0662         if (mtrr_value[i].lsize) {
0663             set_mtrr(i, mtrr_value[i].lbase,
0664                     mtrr_value[i].lsize,
0665                     mtrr_value[i].ltype);
0666         }
0667     }
0668 }
0669 
0670 
0671 
0672 static struct syscore_ops mtrr_syscore_ops = {
0673     .suspend    = mtrr_save,
0674     .resume     = mtrr_restore,
0675 };
0676 
0677 int __initdata changed_by_mtrr_cleanup;
0678 
0679 #define SIZE_OR_MASK_BITS(n)  (~((1ULL << ((n) - PAGE_SHIFT)) - 1))
0680 /**
0681  * mtrr_bp_init - initialize mtrrs on the boot CPU
0682  *
0683  * This needs to be called early; before any of the other CPUs are
0684  * initialized (i.e. before smp_init()).
0685  *
0686  */
0687 void __init mtrr_bp_init(void)
0688 {
0689     u32 phys_addr;
0690 
0691     init_ifs();
0692 
0693     phys_addr = 32;
0694 
0695     if (boot_cpu_has(X86_FEATURE_MTRR)) {
0696         mtrr_if = &generic_mtrr_ops;
0697         size_or_mask = SIZE_OR_MASK_BITS(36);
0698         size_and_mask = 0x00f00000;
0699         phys_addr = 36;
0700 
0701         /*
0702          * This is an AMD specific MSR, but we assume(hope?) that
0703          * Intel will implement it too when they extend the address
0704          * bus of the Xeon.
0705          */
0706         if (cpuid_eax(0x80000000) >= 0x80000008) {
0707             phys_addr = cpuid_eax(0x80000008) & 0xff;
0708             /* CPUID workaround for Intel 0F33/0F34 CPU */
0709             if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
0710                 boot_cpu_data.x86 == 0xF &&
0711                 boot_cpu_data.x86_model == 0x3 &&
0712                 (boot_cpu_data.x86_stepping == 0x3 ||
0713                  boot_cpu_data.x86_stepping == 0x4))
0714                 phys_addr = 36;
0715 
0716             size_or_mask = SIZE_OR_MASK_BITS(phys_addr);
0717             size_and_mask = ~size_or_mask & 0xfffff00000ULL;
0718         } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR &&
0719                boot_cpu_data.x86 == 6) {
0720             /*
0721              * VIA C* family have Intel style MTRRs,
0722              * but don't support PAE
0723              */
0724             size_or_mask = SIZE_OR_MASK_BITS(32);
0725             size_and_mask = 0;
0726             phys_addr = 32;
0727         }
0728     } else {
0729         switch (boot_cpu_data.x86_vendor) {
0730         case X86_VENDOR_AMD:
0731             if (cpu_feature_enabled(X86_FEATURE_K6_MTRR)) {
0732                 /* Pre-Athlon (K6) AMD CPU MTRRs */
0733                 mtrr_if = mtrr_ops[X86_VENDOR_AMD];
0734                 size_or_mask = SIZE_OR_MASK_BITS(32);
0735                 size_and_mask = 0;
0736             }
0737             break;
0738         case X86_VENDOR_CENTAUR:
0739             if (cpu_feature_enabled(X86_FEATURE_CENTAUR_MCR)) {
0740                 mtrr_if = mtrr_ops[X86_VENDOR_CENTAUR];
0741                 size_or_mask = SIZE_OR_MASK_BITS(32);
0742                 size_and_mask = 0;
0743             }
0744             break;
0745         case X86_VENDOR_CYRIX:
0746             if (cpu_feature_enabled(X86_FEATURE_CYRIX_ARR)) {
0747                 mtrr_if = mtrr_ops[X86_VENDOR_CYRIX];
0748                 size_or_mask = SIZE_OR_MASK_BITS(32);
0749                 size_and_mask = 0;
0750             }
0751             break;
0752         default:
0753             break;
0754         }
0755     }
0756 
0757     if (mtrr_if) {
0758         __mtrr_enabled = true;
0759         set_num_var_ranges();
0760         init_table();
0761         if (use_intel()) {
0762             /* BIOS may override */
0763             __mtrr_enabled = get_mtrr_state();
0764 
0765             if (mtrr_enabled())
0766                 mtrr_bp_pat_init();
0767 
0768             if (mtrr_cleanup(phys_addr)) {
0769                 changed_by_mtrr_cleanup = 1;
0770                 mtrr_if->set_all();
0771             }
0772         }
0773     }
0774 
0775     if (!mtrr_enabled()) {
0776         pr_info("Disabled\n");
0777 
0778         /*
0779          * PAT initialization relies on MTRR's rendezvous handler.
0780          * Skip PAT init until the handler can initialize both
0781          * features independently.
0782          */
0783         pat_disable("MTRRs disabled, skipping PAT initialization too.");
0784     }
0785 }
0786 
0787 void mtrr_ap_init(void)
0788 {
0789     if (!mtrr_enabled())
0790         return;
0791 
0792     if (!use_intel() || mtrr_aps_delayed_init)
0793         return;
0794 
0795     /*
0796      * Ideally we should hold mtrr_mutex here to avoid mtrr entries
0797      * changed, but this routine will be called in cpu boot time,
0798      * holding the lock breaks it.
0799      *
0800      * This routine is called in two cases:
0801      *
0802      *   1. very early time of software resume, when there absolutely
0803      *      isn't mtrr entry changes;
0804      *
0805      *   2. cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug
0806      *      lock to prevent mtrr entry changes
0807      */
0808     set_mtrr_from_inactive_cpu(~0U, 0, 0, 0);
0809 }
0810 
0811 /**
0812  * mtrr_save_state - Save current fixed-range MTRR state of the first
0813  *  cpu in cpu_online_mask.
0814  */
0815 void mtrr_save_state(void)
0816 {
0817     int first_cpu;
0818 
0819     if (!mtrr_enabled())
0820         return;
0821 
0822     first_cpu = cpumask_first(cpu_online_mask);
0823     smp_call_function_single(first_cpu, mtrr_save_fixed_ranges, NULL, 1);
0824 }
0825 
0826 void set_mtrr_aps_delayed_init(void)
0827 {
0828     if (!mtrr_enabled())
0829         return;
0830     if (!use_intel())
0831         return;
0832 
0833     mtrr_aps_delayed_init = true;
0834 }
0835 
0836 /*
0837  * Delayed MTRR initialization for all AP's
0838  */
0839 void mtrr_aps_init(void)
0840 {
0841     if (!use_intel() || !mtrr_enabled())
0842         return;
0843 
0844     /*
0845      * Check if someone has requested the delay of AP MTRR initialization,
0846      * by doing set_mtrr_aps_delayed_init(), prior to this point. If not,
0847      * then we are done.
0848      */
0849     if (!mtrr_aps_delayed_init)
0850         return;
0851 
0852     set_mtrr(~0U, 0, 0, 0);
0853     mtrr_aps_delayed_init = false;
0854 }
0855 
0856 void mtrr_bp_restore(void)
0857 {
0858     if (!use_intel() || !mtrr_enabled())
0859         return;
0860 
0861     mtrr_if->set_all();
0862 }
0863 
0864 static int __init mtrr_init_finialize(void)
0865 {
0866     if (!mtrr_enabled())
0867         return 0;
0868 
0869     if (use_intel()) {
0870         if (!changed_by_mtrr_cleanup)
0871             mtrr_state_warn();
0872         return 0;
0873     }
0874 
0875     /*
0876      * The CPU has no MTRR and seems to not support SMP. They have
0877      * specific drivers, we use a tricky method to support
0878      * suspend/resume for them.
0879      *
0880      * TBD: is there any system with such CPU which supports
0881      * suspend/resume? If no, we should remove the code.
0882      */
0883     register_syscore_ops(&mtrr_syscore_ops);
0884 
0885     return 0;
0886 }
0887 subsys_initcall(mtrr_init_finialize);