Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  *  Routines to identify caches on Intel CPU.
0004  *
0005  *  Changes:
0006  *  Venkatesh Pallipadi : Adding cache identification through cpuid(4)
0007  *  Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
0008  *  Andi Kleen / Andreas Herrmann   : CPUID4 emulation on AMD.
0009  */
0010 
0011 #include <linux/slab.h>
0012 #include <linux/cacheinfo.h>
0013 #include <linux/cpu.h>
0014 #include <linux/sched.h>
0015 #include <linux/capability.h>
0016 #include <linux/sysfs.h>
0017 #include <linux/pci.h>
0018 
0019 #include <asm/cpufeature.h>
0020 #include <asm/cacheinfo.h>
0021 #include <asm/amd_nb.h>
0022 #include <asm/smp.h>
0023 
0024 #include "cpu.h"
0025 
0026 #define LVL_1_INST  1
0027 #define LVL_1_DATA  2
0028 #define LVL_2       3
0029 #define LVL_3       4
0030 #define LVL_TRACE   5
0031 
0032 /* Shared last level cache maps */
0033 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
0034 
0035 /* Shared L2 cache maps */
0036 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map);
0037 
0038 struct _cache_table {
0039     unsigned char descriptor;
0040     char cache_type;
0041     short size;
0042 };
0043 
0044 #define MB(x)   ((x) * 1024)
0045 
0046 /* All the cache descriptor types we care about (no TLB or
0047    trace cache entries) */
0048 
0049 static const struct _cache_table cache_table[] =
0050 {
0051     { 0x06, LVL_1_INST, 8 },    /* 4-way set assoc, 32 byte line size */
0052     { 0x08, LVL_1_INST, 16 },   /* 4-way set assoc, 32 byte line size */
0053     { 0x09, LVL_1_INST, 32 },   /* 4-way set assoc, 64 byte line size */
0054     { 0x0a, LVL_1_DATA, 8 },    /* 2 way set assoc, 32 byte line size */
0055     { 0x0c, LVL_1_DATA, 16 },   /* 4-way set assoc, 32 byte line size */
0056     { 0x0d, LVL_1_DATA, 16 },   /* 4-way set assoc, 64 byte line size */
0057     { 0x0e, LVL_1_DATA, 24 },   /* 6-way set assoc, 64 byte line size */
0058     { 0x21, LVL_2,      256 },  /* 8-way set assoc, 64 byte line size */
0059     { 0x22, LVL_3,      512 },  /* 4-way set assoc, sectored cache, 64 byte line size */
0060     { 0x23, LVL_3,      MB(1) },    /* 8-way set assoc, sectored cache, 64 byte line size */
0061     { 0x25, LVL_3,      MB(2) },    /* 8-way set assoc, sectored cache, 64 byte line size */
0062     { 0x29, LVL_3,      MB(4) },    /* 8-way set assoc, sectored cache, 64 byte line size */
0063     { 0x2c, LVL_1_DATA, 32 },   /* 8-way set assoc, 64 byte line size */
0064     { 0x30, LVL_1_INST, 32 },   /* 8-way set assoc, 64 byte line size */
0065     { 0x39, LVL_2,      128 },  /* 4-way set assoc, sectored cache, 64 byte line size */
0066     { 0x3a, LVL_2,      192 },  /* 6-way set assoc, sectored cache, 64 byte line size */
0067     { 0x3b, LVL_2,      128 },  /* 2-way set assoc, sectored cache, 64 byte line size */
0068     { 0x3c, LVL_2,      256 },  /* 4-way set assoc, sectored cache, 64 byte line size */
0069     { 0x3d, LVL_2,      384 },  /* 6-way set assoc, sectored cache, 64 byte line size */
0070     { 0x3e, LVL_2,      512 },  /* 4-way set assoc, sectored cache, 64 byte line size */
0071     { 0x3f, LVL_2,      256 },  /* 2-way set assoc, 64 byte line size */
0072     { 0x41, LVL_2,      128 },  /* 4-way set assoc, 32 byte line size */
0073     { 0x42, LVL_2,      256 },  /* 4-way set assoc, 32 byte line size */
0074     { 0x43, LVL_2,      512 },  /* 4-way set assoc, 32 byte line size */
0075     { 0x44, LVL_2,      MB(1) },    /* 4-way set assoc, 32 byte line size */
0076     { 0x45, LVL_2,      MB(2) },    /* 4-way set assoc, 32 byte line size */
0077     { 0x46, LVL_3,      MB(4) },    /* 4-way set assoc, 64 byte line size */
0078     { 0x47, LVL_3,      MB(8) },    /* 8-way set assoc, 64 byte line size */
0079     { 0x48, LVL_2,      MB(3) },    /* 12-way set assoc, 64 byte line size */
0080     { 0x49, LVL_3,      MB(4) },    /* 16-way set assoc, 64 byte line size */
0081     { 0x4a, LVL_3,      MB(6) },    /* 12-way set assoc, 64 byte line size */
0082     { 0x4b, LVL_3,      MB(8) },    /* 16-way set assoc, 64 byte line size */
0083     { 0x4c, LVL_3,      MB(12) },   /* 12-way set assoc, 64 byte line size */
0084     { 0x4d, LVL_3,      MB(16) },   /* 16-way set assoc, 64 byte line size */
0085     { 0x4e, LVL_2,      MB(6) },    /* 24-way set assoc, 64 byte line size */
0086     { 0x60, LVL_1_DATA, 16 },   /* 8-way set assoc, sectored cache, 64 byte line size */
0087     { 0x66, LVL_1_DATA, 8 },    /* 4-way set assoc, sectored cache, 64 byte line size */
0088     { 0x67, LVL_1_DATA, 16 },   /* 4-way set assoc, sectored cache, 64 byte line size */
0089     { 0x68, LVL_1_DATA, 32 },   /* 4-way set assoc, sectored cache, 64 byte line size */
0090     { 0x70, LVL_TRACE,  12 },   /* 8-way set assoc */
0091     { 0x71, LVL_TRACE,  16 },   /* 8-way set assoc */
0092     { 0x72, LVL_TRACE,  32 },   /* 8-way set assoc */
0093     { 0x73, LVL_TRACE,  64 },   /* 8-way set assoc */
0094     { 0x78, LVL_2,      MB(1) },    /* 4-way set assoc, 64 byte line size */
0095     { 0x79, LVL_2,      128 },  /* 8-way set assoc, sectored cache, 64 byte line size */
0096     { 0x7a, LVL_2,      256 },  /* 8-way set assoc, sectored cache, 64 byte line size */
0097     { 0x7b, LVL_2,      512 },  /* 8-way set assoc, sectored cache, 64 byte line size */
0098     { 0x7c, LVL_2,      MB(1) },    /* 8-way set assoc, sectored cache, 64 byte line size */
0099     { 0x7d, LVL_2,      MB(2) },    /* 8-way set assoc, 64 byte line size */
0100     { 0x7f, LVL_2,      512 },  /* 2-way set assoc, 64 byte line size */
0101     { 0x80, LVL_2,      512 },  /* 8-way set assoc, 64 byte line size */
0102     { 0x82, LVL_2,      256 },  /* 8-way set assoc, 32 byte line size */
0103     { 0x83, LVL_2,      512 },  /* 8-way set assoc, 32 byte line size */
0104     { 0x84, LVL_2,      MB(1) },    /* 8-way set assoc, 32 byte line size */
0105     { 0x85, LVL_2,      MB(2) },    /* 8-way set assoc, 32 byte line size */
0106     { 0x86, LVL_2,      512 },  /* 4-way set assoc, 64 byte line size */
0107     { 0x87, LVL_2,      MB(1) },    /* 8-way set assoc, 64 byte line size */
0108     { 0xd0, LVL_3,      512 },  /* 4-way set assoc, 64 byte line size */
0109     { 0xd1, LVL_3,      MB(1) },    /* 4-way set assoc, 64 byte line size */
0110     { 0xd2, LVL_3,      MB(2) },    /* 4-way set assoc, 64 byte line size */
0111     { 0xd6, LVL_3,      MB(1) },    /* 8-way set assoc, 64 byte line size */
0112     { 0xd7, LVL_3,      MB(2) },    /* 8-way set assoc, 64 byte line size */
0113     { 0xd8, LVL_3,      MB(4) },    /* 12-way set assoc, 64 byte line size */
0114     { 0xdc, LVL_3,      MB(2) },    /* 12-way set assoc, 64 byte line size */
0115     { 0xdd, LVL_3,      MB(4) },    /* 12-way set assoc, 64 byte line size */
0116     { 0xde, LVL_3,      MB(8) },    /* 12-way set assoc, 64 byte line size */
0117     { 0xe2, LVL_3,      MB(2) },    /* 16-way set assoc, 64 byte line size */
0118     { 0xe3, LVL_3,      MB(4) },    /* 16-way set assoc, 64 byte line size */
0119     { 0xe4, LVL_3,      MB(8) },    /* 16-way set assoc, 64 byte line size */
0120     { 0xea, LVL_3,      MB(12) },   /* 24-way set assoc, 64 byte line size */
0121     { 0xeb, LVL_3,      MB(18) },   /* 24-way set assoc, 64 byte line size */
0122     { 0xec, LVL_3,      MB(24) },   /* 24-way set assoc, 64 byte line size */
0123     { 0x00, 0, 0}
0124 };
0125 
0126 
0127 enum _cache_type {
0128     CTYPE_NULL = 0,
0129     CTYPE_DATA = 1,
0130     CTYPE_INST = 2,
0131     CTYPE_UNIFIED = 3
0132 };
0133 
0134 union _cpuid4_leaf_eax {
0135     struct {
0136         enum _cache_type    type:5;
0137         unsigned int        level:3;
0138         unsigned int        is_self_initializing:1;
0139         unsigned int        is_fully_associative:1;
0140         unsigned int        reserved:4;
0141         unsigned int        num_threads_sharing:12;
0142         unsigned int        num_cores_on_die:6;
0143     } split;
0144     u32 full;
0145 };
0146 
0147 union _cpuid4_leaf_ebx {
0148     struct {
0149         unsigned int        coherency_line_size:12;
0150         unsigned int        physical_line_partition:10;
0151         unsigned int        ways_of_associativity:10;
0152     } split;
0153     u32 full;
0154 };
0155 
0156 union _cpuid4_leaf_ecx {
0157     struct {
0158         unsigned int        number_of_sets:32;
0159     } split;
0160     u32 full;
0161 };
0162 
0163 struct _cpuid4_info_regs {
0164     union _cpuid4_leaf_eax eax;
0165     union _cpuid4_leaf_ebx ebx;
0166     union _cpuid4_leaf_ecx ecx;
0167     unsigned int id;
0168     unsigned long size;
0169     struct amd_northbridge *nb;
0170 };
0171 
0172 static unsigned short num_cache_leaves;
0173 
0174 /* AMD doesn't have CPUID4. Emulate it here to report the same
0175    information to the user.  This makes some assumptions about the machine:
0176    L2 not shared, no SMT etc. that is currently true on AMD CPUs.
0177 
0178    In theory the TLBs could be reported as fake type (they are in "dummy").
0179    Maybe later */
0180 union l1_cache {
0181     struct {
0182         unsigned line_size:8;
0183         unsigned lines_per_tag:8;
0184         unsigned assoc:8;
0185         unsigned size_in_kb:8;
0186     };
0187     unsigned val;
0188 };
0189 
0190 union l2_cache {
0191     struct {
0192         unsigned line_size:8;
0193         unsigned lines_per_tag:4;
0194         unsigned assoc:4;
0195         unsigned size_in_kb:16;
0196     };
0197     unsigned val;
0198 };
0199 
0200 union l3_cache {
0201     struct {
0202         unsigned line_size:8;
0203         unsigned lines_per_tag:4;
0204         unsigned assoc:4;
0205         unsigned res:2;
0206         unsigned size_encoded:14;
0207     };
0208     unsigned val;
0209 };
0210 
0211 static const unsigned short assocs[] = {
0212     [1] = 1,
0213     [2] = 2,
0214     [4] = 4,
0215     [6] = 8,
0216     [8] = 16,
0217     [0xa] = 32,
0218     [0xb] = 48,
0219     [0xc] = 64,
0220     [0xd] = 96,
0221     [0xe] = 128,
0222     [0xf] = 0xffff /* fully associative - no way to show this currently */
0223 };
0224 
0225 static const unsigned char levels[] = { 1, 1, 2, 3 };
0226 static const unsigned char types[] = { 1, 2, 3, 3 };
0227 
0228 static const enum cache_type cache_type_map[] = {
0229     [CTYPE_NULL] = CACHE_TYPE_NOCACHE,
0230     [CTYPE_DATA] = CACHE_TYPE_DATA,
0231     [CTYPE_INST] = CACHE_TYPE_INST,
0232     [CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
0233 };
0234 
0235 static void
0236 amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
0237              union _cpuid4_leaf_ebx *ebx,
0238              union _cpuid4_leaf_ecx *ecx)
0239 {
0240     unsigned dummy;
0241     unsigned line_size, lines_per_tag, assoc, size_in_kb;
0242     union l1_cache l1i, l1d;
0243     union l2_cache l2;
0244     union l3_cache l3;
0245     union l1_cache *l1 = &l1d;
0246 
0247     eax->full = 0;
0248     ebx->full = 0;
0249     ecx->full = 0;
0250 
0251     cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
0252     cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
0253 
0254     switch (leaf) {
0255     case 1:
0256         l1 = &l1i;
0257         fallthrough;
0258     case 0:
0259         if (!l1->val)
0260             return;
0261         assoc = assocs[l1->assoc];
0262         line_size = l1->line_size;
0263         lines_per_tag = l1->lines_per_tag;
0264         size_in_kb = l1->size_in_kb;
0265         break;
0266     case 2:
0267         if (!l2.val)
0268             return;
0269         assoc = assocs[l2.assoc];
0270         line_size = l2.line_size;
0271         lines_per_tag = l2.lines_per_tag;
0272         /* cpu_data has errata corrections for K7 applied */
0273         size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
0274         break;
0275     case 3:
0276         if (!l3.val)
0277             return;
0278         assoc = assocs[l3.assoc];
0279         line_size = l3.line_size;
0280         lines_per_tag = l3.lines_per_tag;
0281         size_in_kb = l3.size_encoded * 512;
0282         if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
0283             size_in_kb = size_in_kb >> 1;
0284             assoc = assoc >> 1;
0285         }
0286         break;
0287     default:
0288         return;
0289     }
0290 
0291     eax->split.is_self_initializing = 1;
0292     eax->split.type = types[leaf];
0293     eax->split.level = levels[leaf];
0294     eax->split.num_threads_sharing = 0;
0295     eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
0296 
0297 
0298     if (assoc == 0xffff)
0299         eax->split.is_fully_associative = 1;
0300     ebx->split.coherency_line_size = line_size - 1;
0301     ebx->split.ways_of_associativity = assoc - 1;
0302     ebx->split.physical_line_partition = lines_per_tag - 1;
0303     ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
0304         (ebx->split.ways_of_associativity + 1) - 1;
0305 }
0306 
0307 #if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS)
0308 
0309 /*
0310  * L3 cache descriptors
0311  */
0312 static void amd_calc_l3_indices(struct amd_northbridge *nb)
0313 {
0314     struct amd_l3_cache *l3 = &nb->l3_cache;
0315     unsigned int sc0, sc1, sc2, sc3;
0316     u32 val = 0;
0317 
0318     pci_read_config_dword(nb->misc, 0x1C4, &val);
0319 
0320     /* calculate subcache sizes */
0321     l3->subcaches[0] = sc0 = !(val & BIT(0));
0322     l3->subcaches[1] = sc1 = !(val & BIT(4));
0323 
0324     if (boot_cpu_data.x86 == 0x15) {
0325         l3->subcaches[0] = sc0 += !(val & BIT(1));
0326         l3->subcaches[1] = sc1 += !(val & BIT(5));
0327     }
0328 
0329     l3->subcaches[2] = sc2 = !(val & BIT(8))  + !(val & BIT(9));
0330     l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
0331 
0332     l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
0333 }
0334 
0335 /*
0336  * check whether a slot used for disabling an L3 index is occupied.
0337  * @l3: L3 cache descriptor
0338  * @slot: slot number (0..1)
0339  *
0340  * @returns: the disabled index if used or negative value if slot free.
0341  */
0342 static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
0343 {
0344     unsigned int reg = 0;
0345 
0346     pci_read_config_dword(nb->misc, 0x1BC + slot * 4, &reg);
0347 
0348     /* check whether this slot is activated already */
0349     if (reg & (3UL << 30))
0350         return reg & 0xfff;
0351 
0352     return -1;
0353 }
0354 
0355 static ssize_t show_cache_disable(struct cacheinfo *this_leaf, char *buf,
0356                   unsigned int slot)
0357 {
0358     int index;
0359     struct amd_northbridge *nb = this_leaf->priv;
0360 
0361     index = amd_get_l3_disable_slot(nb, slot);
0362     if (index >= 0)
0363         return sprintf(buf, "%d\n", index);
0364 
0365     return sprintf(buf, "FREE\n");
0366 }
0367 
0368 #define SHOW_CACHE_DISABLE(slot)                    \
0369 static ssize_t                              \
0370 cache_disable_##slot##_show(struct device *dev,             \
0371                 struct device_attribute *attr, char *buf)   \
0372 {                                   \
0373     struct cacheinfo *this_leaf = dev_get_drvdata(dev);     \
0374     return show_cache_disable(this_leaf, buf, slot);        \
0375 }
0376 SHOW_CACHE_DISABLE(0)
0377 SHOW_CACHE_DISABLE(1)
0378 
0379 static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu,
0380                  unsigned slot, unsigned long idx)
0381 {
0382     int i;
0383 
0384     idx |= BIT(30);
0385 
0386     /*
0387      *  disable index in all 4 subcaches
0388      */
0389     for (i = 0; i < 4; i++) {
0390         u32 reg = idx | (i << 20);
0391 
0392         if (!nb->l3_cache.subcaches[i])
0393             continue;
0394 
0395         pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
0396 
0397         /*
0398          * We need to WBINVD on a core on the node containing the L3
0399          * cache which indices we disable therefore a simple wbinvd()
0400          * is not sufficient.
0401          */
0402         wbinvd_on_cpu(cpu);
0403 
0404         reg |= BIT(31);
0405         pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
0406     }
0407 }
0408 
0409 /*
0410  * disable a L3 cache index by using a disable-slot
0411  *
0412  * @l3:    L3 cache descriptor
0413  * @cpu:   A CPU on the node containing the L3 cache
0414  * @slot:  slot number (0..1)
0415  * @index: index to disable
0416  *
0417  * @return: 0 on success, error status on failure
0418  */
0419 static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu,
0420                 unsigned slot, unsigned long index)
0421 {
0422     int ret = 0;
0423 
0424     /*  check if @slot is already used or the index is already disabled */
0425     ret = amd_get_l3_disable_slot(nb, slot);
0426     if (ret >= 0)
0427         return -EEXIST;
0428 
0429     if (index > nb->l3_cache.indices)
0430         return -EINVAL;
0431 
0432     /* check whether the other slot has disabled the same index already */
0433     if (index == amd_get_l3_disable_slot(nb, !slot))
0434         return -EEXIST;
0435 
0436     amd_l3_disable_index(nb, cpu, slot, index);
0437 
0438     return 0;
0439 }
0440 
0441 static ssize_t store_cache_disable(struct cacheinfo *this_leaf,
0442                    const char *buf, size_t count,
0443                    unsigned int slot)
0444 {
0445     unsigned long val = 0;
0446     int cpu, err = 0;
0447     struct amd_northbridge *nb = this_leaf->priv;
0448 
0449     if (!capable(CAP_SYS_ADMIN))
0450         return -EPERM;
0451 
0452     cpu = cpumask_first(&this_leaf->shared_cpu_map);
0453 
0454     if (kstrtoul(buf, 10, &val) < 0)
0455         return -EINVAL;
0456 
0457     err = amd_set_l3_disable_slot(nb, cpu, slot, val);
0458     if (err) {
0459         if (err == -EEXIST)
0460             pr_warn("L3 slot %d in use/index already disabled!\n",
0461                    slot);
0462         return err;
0463     }
0464     return count;
0465 }
0466 
0467 #define STORE_CACHE_DISABLE(slot)                   \
0468 static ssize_t                              \
0469 cache_disable_##slot##_store(struct device *dev,            \
0470                  struct device_attribute *attr,     \
0471                  const char *buf, size_t count)     \
0472 {                                   \
0473     struct cacheinfo *this_leaf = dev_get_drvdata(dev);     \
0474     return store_cache_disable(this_leaf, buf, count, slot);    \
0475 }
0476 STORE_CACHE_DISABLE(0)
0477 STORE_CACHE_DISABLE(1)
0478 
0479 static ssize_t subcaches_show(struct device *dev,
0480                   struct device_attribute *attr, char *buf)
0481 {
0482     struct cacheinfo *this_leaf = dev_get_drvdata(dev);
0483     int cpu = cpumask_first(&this_leaf->shared_cpu_map);
0484 
0485     return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
0486 }
0487 
0488 static ssize_t subcaches_store(struct device *dev,
0489                    struct device_attribute *attr,
0490                    const char *buf, size_t count)
0491 {
0492     struct cacheinfo *this_leaf = dev_get_drvdata(dev);
0493     int cpu = cpumask_first(&this_leaf->shared_cpu_map);
0494     unsigned long val;
0495 
0496     if (!capable(CAP_SYS_ADMIN))
0497         return -EPERM;
0498 
0499     if (kstrtoul(buf, 16, &val) < 0)
0500         return -EINVAL;
0501 
0502     if (amd_set_subcaches(cpu, val))
0503         return -EINVAL;
0504 
0505     return count;
0506 }
0507 
0508 static DEVICE_ATTR_RW(cache_disable_0);
0509 static DEVICE_ATTR_RW(cache_disable_1);
0510 static DEVICE_ATTR_RW(subcaches);
0511 
0512 static umode_t
0513 cache_private_attrs_is_visible(struct kobject *kobj,
0514                    struct attribute *attr, int unused)
0515 {
0516     struct device *dev = kobj_to_dev(kobj);
0517     struct cacheinfo *this_leaf = dev_get_drvdata(dev);
0518     umode_t mode = attr->mode;
0519 
0520     if (!this_leaf->priv)
0521         return 0;
0522 
0523     if ((attr == &dev_attr_subcaches.attr) &&
0524         amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
0525         return mode;
0526 
0527     if ((attr == &dev_attr_cache_disable_0.attr ||
0528          attr == &dev_attr_cache_disable_1.attr) &&
0529         amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
0530         return mode;
0531 
0532     return 0;
0533 }
0534 
0535 static struct attribute_group cache_private_group = {
0536     .is_visible = cache_private_attrs_is_visible,
0537 };
0538 
0539 static void init_amd_l3_attrs(void)
0540 {
0541     int n = 1;
0542     static struct attribute **amd_l3_attrs;
0543 
0544     if (amd_l3_attrs) /* already initialized */
0545         return;
0546 
0547     if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
0548         n += 2;
0549     if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
0550         n += 1;
0551 
0552     amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL);
0553     if (!amd_l3_attrs)
0554         return;
0555 
0556     n = 0;
0557     if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
0558         amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr;
0559         amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr;
0560     }
0561     if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
0562         amd_l3_attrs[n++] = &dev_attr_subcaches.attr;
0563 
0564     cache_private_group.attrs = amd_l3_attrs;
0565 }
0566 
0567 const struct attribute_group *
0568 cache_get_priv_group(struct cacheinfo *this_leaf)
0569 {
0570     struct amd_northbridge *nb = this_leaf->priv;
0571 
0572     if (this_leaf->level < 3 || !nb)
0573         return NULL;
0574 
0575     if (nb && nb->l3_cache.indices)
0576         init_amd_l3_attrs();
0577 
0578     return &cache_private_group;
0579 }
0580 
0581 static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
0582 {
0583     int node;
0584 
0585     /* only for L3, and not in virtualized environments */
0586     if (index < 3)
0587         return;
0588 
0589     node = topology_die_id(smp_processor_id());
0590     this_leaf->nb = node_to_amd_nb(node);
0591     if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
0592         amd_calc_l3_indices(this_leaf->nb);
0593 }
0594 #else
0595 #define amd_init_l3_cache(x, y)
0596 #endif  /* CONFIG_AMD_NB && CONFIG_SYSFS */
0597 
0598 static int
0599 cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf)
0600 {
0601     union _cpuid4_leaf_eax  eax;
0602     union _cpuid4_leaf_ebx  ebx;
0603     union _cpuid4_leaf_ecx  ecx;
0604     unsigned        edx;
0605 
0606     if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
0607         if (boot_cpu_has(X86_FEATURE_TOPOEXT))
0608             cpuid_count(0x8000001d, index, &eax.full,
0609                     &ebx.full, &ecx.full, &edx);
0610         else
0611             amd_cpuid4(index, &eax, &ebx, &ecx);
0612         amd_init_l3_cache(this_leaf, index);
0613     } else if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
0614         cpuid_count(0x8000001d, index, &eax.full,
0615                 &ebx.full, &ecx.full, &edx);
0616         amd_init_l3_cache(this_leaf, index);
0617     } else {
0618         cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
0619     }
0620 
0621     if (eax.split.type == CTYPE_NULL)
0622         return -EIO; /* better error ? */
0623 
0624     this_leaf->eax = eax;
0625     this_leaf->ebx = ebx;
0626     this_leaf->ecx = ecx;
0627     this_leaf->size = (ecx.split.number_of_sets          + 1) *
0628               (ebx.split.coherency_line_size     + 1) *
0629               (ebx.split.physical_line_partition + 1) *
0630               (ebx.split.ways_of_associativity   + 1);
0631     return 0;
0632 }
0633 
0634 static int find_num_cache_leaves(struct cpuinfo_x86 *c)
0635 {
0636     unsigned int        eax, ebx, ecx, edx, op;
0637     union _cpuid4_leaf_eax  cache_eax;
0638     int             i = -1;
0639 
0640     if (c->x86_vendor == X86_VENDOR_AMD ||
0641         c->x86_vendor == X86_VENDOR_HYGON)
0642         op = 0x8000001d;
0643     else
0644         op = 4;
0645 
0646     do {
0647         ++i;
0648         /* Do cpuid(op) loop to find out num_cache_leaves */
0649         cpuid_count(op, i, &eax, &ebx, &ecx, &edx);
0650         cache_eax.full = eax;
0651     } while (cache_eax.split.type != CTYPE_NULL);
0652     return i;
0653 }
0654 
0655 void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu)
0656 {
0657     /*
0658      * We may have multiple LLCs if L3 caches exist, so check if we
0659      * have an L3 cache by looking at the L3 cache CPUID leaf.
0660      */
0661     if (!cpuid_edx(0x80000006))
0662         return;
0663 
0664     if (c->x86 < 0x17) {
0665         /* LLC is at the node level. */
0666         per_cpu(cpu_llc_id, cpu) = c->cpu_die_id;
0667     } else if (c->x86 == 0x17 && c->x86_model <= 0x1F) {
0668         /*
0669          * LLC is at the core complex level.
0670          * Core complex ID is ApicId[3] for these processors.
0671          */
0672         per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
0673     } else {
0674         /*
0675          * LLC ID is calculated from the number of threads sharing the
0676          * cache.
0677          * */
0678         u32 eax, ebx, ecx, edx, num_sharing_cache = 0;
0679         u32 llc_index = find_num_cache_leaves(c) - 1;
0680 
0681         cpuid_count(0x8000001d, llc_index, &eax, &ebx, &ecx, &edx);
0682         if (eax)
0683             num_sharing_cache = ((eax >> 14) & 0xfff) + 1;
0684 
0685         if (num_sharing_cache) {
0686             int bits = get_count_order(num_sharing_cache);
0687 
0688             per_cpu(cpu_llc_id, cpu) = c->apicid >> bits;
0689         }
0690     }
0691 }
0692 
0693 void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu)
0694 {
0695     /*
0696      * We may have multiple LLCs if L3 caches exist, so check if we
0697      * have an L3 cache by looking at the L3 cache CPUID leaf.
0698      */
0699     if (!cpuid_edx(0x80000006))
0700         return;
0701 
0702     /*
0703      * LLC is at the core complex level.
0704      * Core complex ID is ApicId[3] for these processors.
0705      */
0706     per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
0707 }
0708 
0709 void init_amd_cacheinfo(struct cpuinfo_x86 *c)
0710 {
0711 
0712     if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
0713         num_cache_leaves = find_num_cache_leaves(c);
0714     } else if (c->extended_cpuid_level >= 0x80000006) {
0715         if (cpuid_edx(0x80000006) & 0xf000)
0716             num_cache_leaves = 4;
0717         else
0718             num_cache_leaves = 3;
0719     }
0720 }
0721 
0722 void init_hygon_cacheinfo(struct cpuinfo_x86 *c)
0723 {
0724     num_cache_leaves = find_num_cache_leaves(c);
0725 }
0726 
0727 void init_intel_cacheinfo(struct cpuinfo_x86 *c)
0728 {
0729     /* Cache sizes */
0730     unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
0731     unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
0732     unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
0733     unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
0734 #ifdef CONFIG_SMP
0735     unsigned int cpu = c->cpu_index;
0736 #endif
0737 
0738     if (c->cpuid_level > 3) {
0739         static int is_initialized;
0740 
0741         if (is_initialized == 0) {
0742             /* Init num_cache_leaves from boot CPU */
0743             num_cache_leaves = find_num_cache_leaves(c);
0744             is_initialized++;
0745         }
0746 
0747         /*
0748          * Whenever possible use cpuid(4), deterministic cache
0749          * parameters cpuid leaf to find the cache details
0750          */
0751         for (i = 0; i < num_cache_leaves; i++) {
0752             struct _cpuid4_info_regs this_leaf = {};
0753             int retval;
0754 
0755             retval = cpuid4_cache_lookup_regs(i, &this_leaf);
0756             if (retval < 0)
0757                 continue;
0758 
0759             switch (this_leaf.eax.split.level) {
0760             case 1:
0761                 if (this_leaf.eax.split.type == CTYPE_DATA)
0762                     new_l1d = this_leaf.size/1024;
0763                 else if (this_leaf.eax.split.type == CTYPE_INST)
0764                     new_l1i = this_leaf.size/1024;
0765                 break;
0766             case 2:
0767                 new_l2 = this_leaf.size/1024;
0768                 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
0769                 index_msb = get_count_order(num_threads_sharing);
0770                 l2_id = c->apicid & ~((1 << index_msb) - 1);
0771                 break;
0772             case 3:
0773                 new_l3 = this_leaf.size/1024;
0774                 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
0775                 index_msb = get_count_order(num_threads_sharing);
0776                 l3_id = c->apicid & ~((1 << index_msb) - 1);
0777                 break;
0778             default:
0779                 break;
0780             }
0781         }
0782     }
0783     /*
0784      * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
0785      * trace cache
0786      */
0787     if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
0788         /* supports eax=2  call */
0789         int j, n;
0790         unsigned int regs[4];
0791         unsigned char *dp = (unsigned char *)regs;
0792         int only_trace = 0;
0793 
0794         if (num_cache_leaves != 0 && c->x86 == 15)
0795             only_trace = 1;
0796 
0797         /* Number of times to iterate */
0798         n = cpuid_eax(2) & 0xFF;
0799 
0800         for (i = 0 ; i < n ; i++) {
0801             cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
0802 
0803             /* If bit 31 is set, this is an unknown format */
0804             for (j = 0 ; j < 3 ; j++)
0805                 if (regs[j] & (1 << 31))
0806                     regs[j] = 0;
0807 
0808             /* Byte 0 is level count, not a descriptor */
0809             for (j = 1 ; j < 16 ; j++) {
0810                 unsigned char des = dp[j];
0811                 unsigned char k = 0;
0812 
0813                 /* look up this descriptor in the table */
0814                 while (cache_table[k].descriptor != 0) {
0815                     if (cache_table[k].descriptor == des) {
0816                         if (only_trace && cache_table[k].cache_type != LVL_TRACE)
0817                             break;
0818                         switch (cache_table[k].cache_type) {
0819                         case LVL_1_INST:
0820                             l1i += cache_table[k].size;
0821                             break;
0822                         case LVL_1_DATA:
0823                             l1d += cache_table[k].size;
0824                             break;
0825                         case LVL_2:
0826                             l2 += cache_table[k].size;
0827                             break;
0828                         case LVL_3:
0829                             l3 += cache_table[k].size;
0830                             break;
0831                         case LVL_TRACE:
0832                             trace += cache_table[k].size;
0833                             break;
0834                         }
0835 
0836                         break;
0837                     }
0838 
0839                     k++;
0840                 }
0841             }
0842         }
0843     }
0844 
0845     if (new_l1d)
0846         l1d = new_l1d;
0847 
0848     if (new_l1i)
0849         l1i = new_l1i;
0850 
0851     if (new_l2) {
0852         l2 = new_l2;
0853 #ifdef CONFIG_SMP
0854         per_cpu(cpu_llc_id, cpu) = l2_id;
0855         per_cpu(cpu_l2c_id, cpu) = l2_id;
0856 #endif
0857     }
0858 
0859     if (new_l3) {
0860         l3 = new_l3;
0861 #ifdef CONFIG_SMP
0862         per_cpu(cpu_llc_id, cpu) = l3_id;
0863 #endif
0864     }
0865 
0866 #ifdef CONFIG_SMP
0867     /*
0868      * If cpu_llc_id is not yet set, this means cpuid_level < 4 which in
0869      * turns means that the only possibility is SMT (as indicated in
0870      * cpuid1). Since cpuid2 doesn't specify shared caches, and we know
0871      * that SMT shares all caches, we can unconditionally set cpu_llc_id to
0872      * c->phys_proc_id.
0873      */
0874     if (per_cpu(cpu_llc_id, cpu) == BAD_APICID)
0875         per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
0876 #endif
0877 
0878     c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
0879 
0880     if (!l2)
0881         cpu_detect_cache_sizes(c);
0882 }
0883 
0884 static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
0885                     struct _cpuid4_info_regs *base)
0886 {
0887     struct cpu_cacheinfo *this_cpu_ci;
0888     struct cacheinfo *this_leaf;
0889     int i, sibling;
0890 
0891     /*
0892      * For L3, always use the pre-calculated cpu_llc_shared_mask
0893      * to derive shared_cpu_map.
0894      */
0895     if (index == 3) {
0896         for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
0897             this_cpu_ci = get_cpu_cacheinfo(i);
0898             if (!this_cpu_ci->info_list)
0899                 continue;
0900             this_leaf = this_cpu_ci->info_list + index;
0901             for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
0902                 if (!cpu_online(sibling))
0903                     continue;
0904                 cpumask_set_cpu(sibling,
0905                         &this_leaf->shared_cpu_map);
0906             }
0907         }
0908     } else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
0909         unsigned int apicid, nshared, first, last;
0910 
0911         nshared = base->eax.split.num_threads_sharing + 1;
0912         apicid = cpu_data(cpu).apicid;
0913         first = apicid - (apicid % nshared);
0914         last = first + nshared - 1;
0915 
0916         for_each_online_cpu(i) {
0917             this_cpu_ci = get_cpu_cacheinfo(i);
0918             if (!this_cpu_ci->info_list)
0919                 continue;
0920 
0921             apicid = cpu_data(i).apicid;
0922             if ((apicid < first) || (apicid > last))
0923                 continue;
0924 
0925             this_leaf = this_cpu_ci->info_list + index;
0926 
0927             for_each_online_cpu(sibling) {
0928                 apicid = cpu_data(sibling).apicid;
0929                 if ((apicid < first) || (apicid > last))
0930                     continue;
0931                 cpumask_set_cpu(sibling,
0932                         &this_leaf->shared_cpu_map);
0933             }
0934         }
0935     } else
0936         return 0;
0937 
0938     return 1;
0939 }
0940 
0941 static void __cache_cpumap_setup(unsigned int cpu, int index,
0942                  struct _cpuid4_info_regs *base)
0943 {
0944     struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
0945     struct cacheinfo *this_leaf, *sibling_leaf;
0946     unsigned long num_threads_sharing;
0947     int index_msb, i;
0948     struct cpuinfo_x86 *c = &cpu_data(cpu);
0949 
0950     if (c->x86_vendor == X86_VENDOR_AMD ||
0951         c->x86_vendor == X86_VENDOR_HYGON) {
0952         if (__cache_amd_cpumap_setup(cpu, index, base))
0953             return;
0954     }
0955 
0956     this_leaf = this_cpu_ci->info_list + index;
0957     num_threads_sharing = 1 + base->eax.split.num_threads_sharing;
0958 
0959     cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
0960     if (num_threads_sharing == 1)
0961         return;
0962 
0963     index_msb = get_count_order(num_threads_sharing);
0964 
0965     for_each_online_cpu(i)
0966         if (cpu_data(i).apicid >> index_msb == c->apicid >> index_msb) {
0967             struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
0968 
0969             if (i == cpu || !sib_cpu_ci->info_list)
0970                 continue;/* skip if itself or no cacheinfo */
0971             sibling_leaf = sib_cpu_ci->info_list + index;
0972             cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
0973             cpumask_set_cpu(cpu, &sibling_leaf->shared_cpu_map);
0974         }
0975 }
0976 
0977 static void ci_leaf_init(struct cacheinfo *this_leaf,
0978              struct _cpuid4_info_regs *base)
0979 {
0980     this_leaf->id = base->id;
0981     this_leaf->attributes = CACHE_ID;
0982     this_leaf->level = base->eax.split.level;
0983     this_leaf->type = cache_type_map[base->eax.split.type];
0984     this_leaf->coherency_line_size =
0985                 base->ebx.split.coherency_line_size + 1;
0986     this_leaf->ways_of_associativity =
0987                 base->ebx.split.ways_of_associativity + 1;
0988     this_leaf->size = base->size;
0989     this_leaf->number_of_sets = base->ecx.split.number_of_sets + 1;
0990     this_leaf->physical_line_partition =
0991                 base->ebx.split.physical_line_partition + 1;
0992     this_leaf->priv = base->nb;
0993 }
0994 
0995 int init_cache_level(unsigned int cpu)
0996 {
0997     struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
0998 
0999     if (!num_cache_leaves)
1000         return -ENOENT;
1001     if (!this_cpu_ci)
1002         return -EINVAL;
1003     this_cpu_ci->num_levels = 3;
1004     this_cpu_ci->num_leaves = num_cache_leaves;
1005     return 0;
1006 }
1007 
1008 /*
1009  * The max shared threads number comes from CPUID.4:EAX[25-14] with input
1010  * ECX as cache index. Then right shift apicid by the number's order to get
1011  * cache id for this cache node.
1012  */
1013 static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4_regs)
1014 {
1015     struct cpuinfo_x86 *c = &cpu_data(cpu);
1016     unsigned long num_threads_sharing;
1017     int index_msb;
1018 
1019     num_threads_sharing = 1 + id4_regs->eax.split.num_threads_sharing;
1020     index_msb = get_count_order(num_threads_sharing);
1021     id4_regs->id = c->apicid >> index_msb;
1022 }
1023 
1024 int populate_cache_leaves(unsigned int cpu)
1025 {
1026     unsigned int idx, ret;
1027     struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
1028     struct cacheinfo *this_leaf = this_cpu_ci->info_list;
1029     struct _cpuid4_info_regs id4_regs = {};
1030 
1031     for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
1032         ret = cpuid4_cache_lookup_regs(idx, &id4_regs);
1033         if (ret)
1034             return ret;
1035         get_cache_id(cpu, &id4_regs);
1036         ci_leaf_init(this_leaf++, &id4_regs);
1037         __cache_cpumap_setup(cpu, idx, &id4_regs);
1038     }
1039     this_cpu_ci->cpu_map_populated = true;
1040 
1041     return 0;
1042 }