Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * Hardware Feedback Interface Driver
0004  *
0005  * Copyright (c) 2021, Intel Corporation.
0006  *
0007  * Authors: Aubrey Li <aubrey.li@linux.intel.com>
0008  *          Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
0009  *
0010  *
0011  * The Hardware Feedback Interface provides a performance and energy efficiency
0012  * capability information for each CPU in the system. Depending on the processor
0013  * model, hardware may periodically update these capabilities as a result of
0014  * changes in the operating conditions (e.g., power limits or thermal
0015  * constraints). On other processor models, there is a single HFI update
0016  * at boot.
0017  *
0018  * This file provides functionality to process HFI updates and relay these
0019  * updates to userspace.
0020  */
0021 
0022 #define pr_fmt(fmt)  "intel-hfi: " fmt
0023 
0024 #include <linux/bitops.h>
0025 #include <linux/cpufeature.h>
0026 #include <linux/cpumask.h>
0027 #include <linux/gfp.h>
0028 #include <linux/io.h>
0029 #include <linux/kernel.h>
0030 #include <linux/math.h>
0031 #include <linux/mutex.h>
0032 #include <linux/percpu-defs.h>
0033 #include <linux/printk.h>
0034 #include <linux/processor.h>
0035 #include <linux/slab.h>
0036 #include <linux/spinlock.h>
0037 #include <linux/string.h>
0038 #include <linux/topology.h>
0039 #include <linux/workqueue.h>
0040 
0041 #include <asm/msr.h>
0042 
0043 #include "../thermal_core.h"
0044 #include "intel_hfi.h"
0045 
0046 #define THERM_STATUS_CLEAR_PKG_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | \
0047                      BIT(9) | BIT(11) | BIT(26))
0048 
0049 /* Hardware Feedback Interface MSR configuration bits */
0050 #define HW_FEEDBACK_PTR_VALID_BIT       BIT(0)
0051 #define HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT   BIT(0)
0052 
0053 /* CPUID detection and enumeration definitions for HFI */
0054 
0055 #define CPUID_HFI_LEAF 6
0056 
0057 union hfi_capabilities {
0058     struct {
0059         u8  performance:1;
0060         u8  energy_efficiency:1;
0061         u8  __reserved:6;
0062     } split;
0063     u8 bits;
0064 };
0065 
0066 union cpuid6_edx {
0067     struct {
0068         union hfi_capabilities  capabilities;
0069         u32         table_pages:4;
0070         u32         __reserved:4;
0071         s32         index:16;
0072     } split;
0073     u32 full;
0074 };
0075 
0076 /**
0077  * struct hfi_cpu_data - HFI capabilities per CPU
0078  * @perf_cap:       Performance capability
0079  * @ee_cap:     Energy efficiency capability
0080  *
0081  * Capabilities of a logical processor in the HFI table. These capabilities are
0082  * unitless.
0083  */
0084 struct hfi_cpu_data {
0085     u8  perf_cap;
0086     u8  ee_cap;
0087 } __packed;
0088 
0089 /**
0090  * struct hfi_hdr - Header of the HFI table
0091  * @perf_updated:   Hardware updated performance capabilities
0092  * @ee_updated:     Hardware updated energy efficiency capabilities
0093  *
0094  * Properties of the data in an HFI table.
0095  */
0096 struct hfi_hdr {
0097     u8  perf_updated;
0098     u8  ee_updated;
0099 } __packed;
0100 
0101 /**
0102  * struct hfi_instance - Representation of an HFI instance (i.e., a table)
0103  * @local_table:    Base of the local copy of the HFI table
0104  * @timestamp:      Timestamp of the last update of the local table.
0105  *          Located at the base of the local table.
0106  * @hdr:        Base address of the header of the local table
0107  * @data:       Base address of the data of the local table
0108  * @cpus:       CPUs represented in this HFI table instance
0109  * @hw_table:       Pointer to the HFI table of this instance
0110  * @update_work:    Delayed work to process HFI updates
0111  * @table_lock:     Lock to protect acceses to the table of this instance
0112  * @event_lock:     Lock to process HFI interrupts
0113  *
0114  * A set of parameters to parse and navigate a specific HFI table.
0115  */
0116 struct hfi_instance {
0117     union {
0118         void            *local_table;
0119         u64         *timestamp;
0120     };
0121     void            *hdr;
0122     void            *data;
0123     cpumask_var_t       cpus;
0124     void            *hw_table;
0125     struct delayed_work update_work;
0126     raw_spinlock_t      table_lock;
0127     raw_spinlock_t      event_lock;
0128 };
0129 
0130 /**
0131  * struct hfi_features - Supported HFI features
0132  * @nr_table_pages: Size of the HFI table in 4KB pages
0133  * @cpu_stride:     Stride size to locate the capability data of a logical
0134  *          processor within the table (i.e., row stride)
0135  * @hdr_size:       Size of the table header
0136  *
0137  * Parameters and supported features that are common to all HFI instances
0138  */
0139 struct hfi_features {
0140     unsigned int    nr_table_pages;
0141     unsigned int    cpu_stride;
0142     unsigned int    hdr_size;
0143 };
0144 
0145 /**
0146  * struct hfi_cpu_info - Per-CPU attributes to consume HFI data
0147  * @index:      Row of this CPU in its HFI table
0148  * @hfi_instance:   Attributes of the HFI table to which this CPU belongs
0149  *
0150  * Parameters to link a logical processor to an HFI table and a row within it.
0151  */
0152 struct hfi_cpu_info {
0153     s16         index;
0154     struct hfi_instance *hfi_instance;
0155 };
0156 
0157 static DEFINE_PER_CPU(struct hfi_cpu_info, hfi_cpu_info) = { .index = -1 };
0158 
0159 static int max_hfi_instances;
0160 static struct hfi_instance *hfi_instances;
0161 
0162 static struct hfi_features hfi_features;
0163 static DEFINE_MUTEX(hfi_instance_lock);
0164 
0165 static struct workqueue_struct *hfi_updates_wq;
0166 #define HFI_UPDATE_INTERVAL     HZ
0167 #define HFI_MAX_THERM_NOTIFY_COUNT  16
0168 
0169 static void get_hfi_caps(struct hfi_instance *hfi_instance,
0170              struct thermal_genl_cpu_caps *cpu_caps)
0171 {
0172     int cpu, i = 0;
0173 
0174     raw_spin_lock_irq(&hfi_instance->table_lock);
0175     for_each_cpu(cpu, hfi_instance->cpus) {
0176         struct hfi_cpu_data *caps;
0177         s16 index;
0178 
0179         index = per_cpu(hfi_cpu_info, cpu).index;
0180         caps = hfi_instance->data + index * hfi_features.cpu_stride;
0181         cpu_caps[i].cpu = cpu;
0182 
0183         /*
0184          * Scale performance and energy efficiency to
0185          * the [0, 1023] interval that thermal netlink uses.
0186          */
0187         cpu_caps[i].performance = caps->perf_cap << 2;
0188         cpu_caps[i].efficiency = caps->ee_cap << 2;
0189 
0190         ++i;
0191     }
0192     raw_spin_unlock_irq(&hfi_instance->table_lock);
0193 }
0194 
0195 /*
0196  * Call update_capabilities() when there are changes in the HFI table.
0197  */
0198 static void update_capabilities(struct hfi_instance *hfi_instance)
0199 {
0200     struct thermal_genl_cpu_caps *cpu_caps;
0201     int i = 0, cpu_count;
0202 
0203     /* CPUs may come online/offline while processing an HFI update. */
0204     mutex_lock(&hfi_instance_lock);
0205 
0206     cpu_count = cpumask_weight(hfi_instance->cpus);
0207 
0208     /* No CPUs to report in this hfi_instance. */
0209     if (!cpu_count)
0210         goto out;
0211 
0212     cpu_caps = kcalloc(cpu_count, sizeof(*cpu_caps), GFP_KERNEL);
0213     if (!cpu_caps)
0214         goto out;
0215 
0216     get_hfi_caps(hfi_instance, cpu_caps);
0217 
0218     if (cpu_count < HFI_MAX_THERM_NOTIFY_COUNT)
0219         goto last_cmd;
0220 
0221     /* Process complete chunks of HFI_MAX_THERM_NOTIFY_COUNT capabilities. */
0222     for (i = 0;
0223          (i + HFI_MAX_THERM_NOTIFY_COUNT) <= cpu_count;
0224          i += HFI_MAX_THERM_NOTIFY_COUNT)
0225         thermal_genl_cpu_capability_event(HFI_MAX_THERM_NOTIFY_COUNT,
0226                           &cpu_caps[i]);
0227 
0228     cpu_count = cpu_count - i;
0229 
0230 last_cmd:
0231     /* Process the remaining capabilities if any. */
0232     if (cpu_count)
0233         thermal_genl_cpu_capability_event(cpu_count, &cpu_caps[i]);
0234 
0235     kfree(cpu_caps);
0236 out:
0237     mutex_unlock(&hfi_instance_lock);
0238 }
0239 
0240 static void hfi_update_work_fn(struct work_struct *work)
0241 {
0242     struct hfi_instance *hfi_instance;
0243 
0244     hfi_instance = container_of(to_delayed_work(work), struct hfi_instance,
0245                     update_work);
0246 
0247     update_capabilities(hfi_instance);
0248 }
0249 
0250 void intel_hfi_process_event(__u64 pkg_therm_status_msr_val)
0251 {
0252     struct hfi_instance *hfi_instance;
0253     int cpu = smp_processor_id();
0254     struct hfi_cpu_info *info;
0255     u64 new_timestamp;
0256 
0257     if (!pkg_therm_status_msr_val)
0258         return;
0259 
0260     info = &per_cpu(hfi_cpu_info, cpu);
0261     if (!info)
0262         return;
0263 
0264     /*
0265      * A CPU is linked to its HFI instance before the thermal vector in the
0266      * local APIC is unmasked. Hence, info->hfi_instance cannot be NULL
0267      * when receiving an HFI event.
0268      */
0269     hfi_instance = info->hfi_instance;
0270     if (unlikely(!hfi_instance)) {
0271         pr_debug("Received event on CPU %d but instance was null", cpu);
0272         return;
0273     }
0274 
0275     /*
0276      * On most systems, all CPUs in the package receive a package-level
0277      * thermal interrupt when there is an HFI update. It is sufficient to
0278      * let a single CPU to acknowledge the update and queue work to
0279      * process it. The remaining CPUs can resume their work.
0280      */
0281     if (!raw_spin_trylock(&hfi_instance->event_lock))
0282         return;
0283 
0284     /* Skip duplicated updates. */
0285     new_timestamp = *(u64 *)hfi_instance->hw_table;
0286     if (*hfi_instance->timestamp == new_timestamp) {
0287         raw_spin_unlock(&hfi_instance->event_lock);
0288         return;
0289     }
0290 
0291     raw_spin_lock(&hfi_instance->table_lock);
0292 
0293     /*
0294      * Copy the updated table into our local copy. This includes the new
0295      * timestamp.
0296      */
0297     memcpy(hfi_instance->local_table, hfi_instance->hw_table,
0298            hfi_features.nr_table_pages << PAGE_SHIFT);
0299 
0300     raw_spin_unlock(&hfi_instance->table_lock);
0301     raw_spin_unlock(&hfi_instance->event_lock);
0302 
0303     /*
0304      * Let hardware know that we are done reading the HFI table and it is
0305      * free to update it again.
0306      */
0307     pkg_therm_status_msr_val &= THERM_STATUS_CLEAR_PKG_MASK &
0308                     ~PACKAGE_THERM_STATUS_HFI_UPDATED;
0309     wrmsrl(MSR_IA32_PACKAGE_THERM_STATUS, pkg_therm_status_msr_val);
0310 
0311     queue_delayed_work(hfi_updates_wq, &hfi_instance->update_work,
0312                HFI_UPDATE_INTERVAL);
0313 }
0314 
0315 static void init_hfi_cpu_index(struct hfi_cpu_info *info)
0316 {
0317     union cpuid6_edx edx;
0318 
0319     /* Do not re-read @cpu's index if it has already been initialized. */
0320     if (info->index > -1)
0321         return;
0322 
0323     edx.full = cpuid_edx(CPUID_HFI_LEAF);
0324     info->index = edx.split.index;
0325 }
0326 
0327 /*
0328  * The format of the HFI table depends on the number of capabilities that the
0329  * hardware supports. Keep a data structure to navigate the table.
0330  */
0331 static void init_hfi_instance(struct hfi_instance *hfi_instance)
0332 {
0333     /* The HFI header is below the time-stamp. */
0334     hfi_instance->hdr = hfi_instance->local_table +
0335                 sizeof(*hfi_instance->timestamp);
0336 
0337     /* The HFI data starts below the header. */
0338     hfi_instance->data = hfi_instance->hdr + hfi_features.hdr_size;
0339 }
0340 
0341 /**
0342  * intel_hfi_online() - Enable HFI on @cpu
0343  * @cpu:    CPU in which the HFI will be enabled
0344  *
0345  * Enable the HFI to be used in @cpu. The HFI is enabled at the die/package
0346  * level. The first CPU in the die/package to come online does the full HFI
0347  * initialization. Subsequent CPUs will just link themselves to the HFI
0348  * instance of their die/package.
0349  *
0350  * This function is called before enabling the thermal vector in the local APIC
0351  * in order to ensure that @cpu has an associated HFI instance when it receives
0352  * an HFI event.
0353  */
0354 void intel_hfi_online(unsigned int cpu)
0355 {
0356     struct hfi_instance *hfi_instance;
0357     struct hfi_cpu_info *info;
0358     phys_addr_t hw_table_pa;
0359     u64 msr_val;
0360     u16 die_id;
0361 
0362     /* Nothing to do if hfi_instances are missing. */
0363     if (!hfi_instances)
0364         return;
0365 
0366     /*
0367      * Link @cpu to the HFI instance of its package/die. It does not
0368      * matter whether the instance has been initialized.
0369      */
0370     info = &per_cpu(hfi_cpu_info, cpu);
0371     die_id = topology_logical_die_id(cpu);
0372     hfi_instance = info->hfi_instance;
0373     if (!hfi_instance) {
0374         if (die_id < 0 || die_id >= max_hfi_instances)
0375             return;
0376 
0377         hfi_instance = &hfi_instances[die_id];
0378         info->hfi_instance = hfi_instance;
0379     }
0380 
0381     init_hfi_cpu_index(info);
0382 
0383     /*
0384      * Now check if the HFI instance of the package/die of @cpu has been
0385      * initialized (by checking its header). In such case, all we have to
0386      * do is to add @cpu to this instance's cpumask.
0387      */
0388     mutex_lock(&hfi_instance_lock);
0389     if (hfi_instance->hdr) {
0390         cpumask_set_cpu(cpu, hfi_instance->cpus);
0391         goto unlock;
0392     }
0393 
0394     /*
0395      * Hardware is programmed with the physical address of the first page
0396      * frame of the table. Hence, the allocated memory must be page-aligned.
0397      */
0398     hfi_instance->hw_table = alloc_pages_exact(hfi_features.nr_table_pages,
0399                            GFP_KERNEL | __GFP_ZERO);
0400     if (!hfi_instance->hw_table)
0401         goto unlock;
0402 
0403     hw_table_pa = virt_to_phys(hfi_instance->hw_table);
0404 
0405     /*
0406      * Allocate memory to keep a local copy of the table that
0407      * hardware generates.
0408      */
0409     hfi_instance->local_table = kzalloc(hfi_features.nr_table_pages << PAGE_SHIFT,
0410                         GFP_KERNEL);
0411     if (!hfi_instance->local_table)
0412         goto free_hw_table;
0413 
0414     /*
0415      * Program the address of the feedback table of this die/package. On
0416      * some processors, hardware remembers the old address of the HFI table
0417      * even after having been reprogrammed and re-enabled. Thus, do not free
0418      * the pages allocated for the table or reprogram the hardware with a
0419      * new base address. Namely, program the hardware only once.
0420      */
0421     msr_val = hw_table_pa | HW_FEEDBACK_PTR_VALID_BIT;
0422     wrmsrl(MSR_IA32_HW_FEEDBACK_PTR, msr_val);
0423 
0424     init_hfi_instance(hfi_instance);
0425 
0426     INIT_DELAYED_WORK(&hfi_instance->update_work, hfi_update_work_fn);
0427     raw_spin_lock_init(&hfi_instance->table_lock);
0428     raw_spin_lock_init(&hfi_instance->event_lock);
0429 
0430     cpumask_set_cpu(cpu, hfi_instance->cpus);
0431 
0432     /*
0433      * Enable the hardware feedback interface and never disable it. See
0434      * comment on programming the address of the table.
0435      */
0436     rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
0437     msr_val |= HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT;
0438     wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
0439 
0440 unlock:
0441     mutex_unlock(&hfi_instance_lock);
0442     return;
0443 
0444 free_hw_table:
0445     free_pages_exact(hfi_instance->hw_table, hfi_features.nr_table_pages);
0446     goto unlock;
0447 }
0448 
0449 /**
0450  * intel_hfi_offline() - Disable HFI on @cpu
0451  * @cpu:    CPU in which the HFI will be disabled
0452  *
0453  * Remove @cpu from those covered by its HFI instance.
0454  *
0455  * On some processors, hardware remembers previous programming settings even
0456  * after being reprogrammed. Thus, keep HFI enabled even if all CPUs in the
0457  * die/package of @cpu are offline. See note in intel_hfi_online().
0458  */
0459 void intel_hfi_offline(unsigned int cpu)
0460 {
0461     struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, cpu);
0462     struct hfi_instance *hfi_instance;
0463 
0464     /*
0465      * Check if @cpu as an associated, initialized (i.e., with a non-NULL
0466      * header). Also, HFI instances are only initialized if X86_FEATURE_HFI
0467      * is present.
0468      */
0469     hfi_instance = info->hfi_instance;
0470     if (!hfi_instance)
0471         return;
0472 
0473     if (!hfi_instance->hdr)
0474         return;
0475 
0476     mutex_lock(&hfi_instance_lock);
0477     cpumask_clear_cpu(cpu, hfi_instance->cpus);
0478     mutex_unlock(&hfi_instance_lock);
0479 }
0480 
0481 static __init int hfi_parse_features(void)
0482 {
0483     unsigned int nr_capabilities;
0484     union cpuid6_edx edx;
0485 
0486     if (!boot_cpu_has(X86_FEATURE_HFI))
0487         return -ENODEV;
0488 
0489     /*
0490      * If we are here we know that CPUID_HFI_LEAF exists. Parse the
0491      * supported capabilities and the size of the HFI table.
0492      */
0493     edx.full = cpuid_edx(CPUID_HFI_LEAF);
0494 
0495     if (!edx.split.capabilities.split.performance) {
0496         pr_debug("Performance reporting not supported! Not using HFI\n");
0497         return -ENODEV;
0498     }
0499 
0500     /*
0501      * The number of supported capabilities determines the number of
0502      * columns in the HFI table. Exclude the reserved bits.
0503      */
0504     edx.split.capabilities.split.__reserved = 0;
0505     nr_capabilities = hweight8(edx.split.capabilities.bits);
0506 
0507     /* The number of 4KB pages required by the table */
0508     hfi_features.nr_table_pages = edx.split.table_pages + 1;
0509 
0510     /*
0511      * The header contains change indications for each supported feature.
0512      * The size of the table header is rounded up to be a multiple of 8
0513      * bytes.
0514      */
0515     hfi_features.hdr_size = DIV_ROUND_UP(nr_capabilities, 8) * 8;
0516 
0517     /*
0518      * Data of each logical processor is also rounded up to be a multiple
0519      * of 8 bytes.
0520      */
0521     hfi_features.cpu_stride = DIV_ROUND_UP(nr_capabilities, 8) * 8;
0522 
0523     return 0;
0524 }
0525 
0526 void __init intel_hfi_init(void)
0527 {
0528     struct hfi_instance *hfi_instance;
0529     int i, j;
0530 
0531     if (hfi_parse_features())
0532         return;
0533 
0534     /* There is one HFI instance per die/package. */
0535     max_hfi_instances = topology_max_packages() *
0536                 topology_max_die_per_package();
0537 
0538     /*
0539      * This allocation may fail. CPU hotplug callbacks must check
0540      * for a null pointer.
0541      */
0542     hfi_instances = kcalloc(max_hfi_instances, sizeof(*hfi_instances),
0543                 GFP_KERNEL);
0544     if (!hfi_instances)
0545         return;
0546 
0547     for (i = 0; i < max_hfi_instances; i++) {
0548         hfi_instance = &hfi_instances[i];
0549         if (!zalloc_cpumask_var(&hfi_instance->cpus, GFP_KERNEL))
0550             goto err_nomem;
0551     }
0552 
0553     hfi_updates_wq = create_singlethread_workqueue("hfi-updates");
0554     if (!hfi_updates_wq)
0555         goto err_nomem;
0556 
0557     return;
0558 
0559 err_nomem:
0560     for (j = 0; j < i; ++j) {
0561         hfi_instance = &hfi_instances[j];
0562         free_cpumask_var(hfi_instance->cpus);
0563     }
0564 
0565     kfree(hfi_instances);
0566     hfi_instances = NULL;
0567 }