Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * Thermal throttle event support code (such as syslog messaging and rate
0004  * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c).
0005  *
0006  * This allows consistent reporting of CPU thermal throttle events.
0007  *
0008  * Maintains a counter in /sys that keeps track of the number of thermal
0009  * events, such that the user knows how bad the thermal problem might be
0010  * (since the logging to syslog is rate limited).
0011  *
0012  * Author: Dmitriy Zavin (dmitriyz@google.com)
0013  *
0014  * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c.
0015  *          Inspired by Ross Biro's and Al Borchers' counter code.
0016  */
0017 #include <linux/interrupt.h>
0018 #include <linux/notifier.h>
0019 #include <linux/jiffies.h>
0020 #include <linux/kernel.h>
0021 #include <linux/percpu.h>
0022 #include <linux/export.h>
0023 #include <linux/types.h>
0024 #include <linux/init.h>
0025 #include <linux/smp.h>
0026 #include <linux/cpu.h>
0027 
0028 #include <asm/processor.h>
0029 #include <asm/thermal.h>
0030 #include <asm/traps.h>
0031 #include <asm/apic.h>
0032 #include <asm/irq.h>
0033 #include <asm/msr.h>
0034 
0035 #include "intel_hfi.h"
0036 #include "thermal_interrupt.h"
0037 
0038 /* How long to wait between reporting thermal events */
0039 #define CHECK_INTERVAL      (300 * HZ)
0040 
0041 #define THERMAL_THROTTLING_EVENT    0
0042 #define POWER_LIMIT_EVENT       1
0043 
0044 /**
0045  * struct _thermal_state - Represent the current thermal event state
0046  * @next_check:         Stores the next timestamp, when it is allowed
0047  *              to log the next warning message.
0048  * @last_interrupt_time:    Stores the timestamp for the last threshold
0049  *              high event.
0050  * @therm_work:         Delayed workqueue structure
0051  * @count:          Stores the current running count for thermal
0052  *              or power threshold interrupts.
0053  * @last_count:         Stores the previous running count for thermal
0054  *              or power threshold interrupts.
0055  * @max_time_ms:        This shows the maximum amount of time CPU was
0056  *              in throttled state for a single thermal
0057  *              threshold high to low state.
0058  * @total_time_ms:      This is a cumulative time during which CPU was
0059  *              in the throttled state.
0060  * @rate_control_active:    Set when a throttling message is logged.
0061  *              This is used for the purpose of rate-control.
0062  * @new_event:          Stores the last high/low status of the
0063  *              THERM_STATUS_PROCHOT or
0064  *              THERM_STATUS_POWER_LIMIT.
0065  * @level:          Stores whether this _thermal_state instance is
0066  *              for a CORE level or for PACKAGE level.
0067  * @sample_index:       Index for storing the next sample in the buffer
0068  *              temp_samples[].
0069  * @sample_count:       Total number of samples collected in the buffer
0070  *              temp_samples[].
0071  * @average:            The last moving average of temperature samples
0072  * @baseline_temp:      Temperature at which thermal threshold high
0073  *              interrupt was generated.
0074  * @temp_samples:       Storage for temperature samples to calculate
0075  *              moving average.
0076  *
0077  * This structure is used to represent data related to thermal state for a CPU.
0078  * There is a separate storage for core and package level for each CPU.
0079  */
0080 struct _thermal_state {
0081     u64         next_check;
0082     u64         last_interrupt_time;
0083     struct delayed_work therm_work;
0084     unsigned long       count;
0085     unsigned long       last_count;
0086     unsigned long       max_time_ms;
0087     unsigned long       total_time_ms;
0088     bool            rate_control_active;
0089     bool            new_event;
0090     u8          level;
0091     u8          sample_index;
0092     u8          sample_count;
0093     u8          average;
0094     u8          baseline_temp;
0095     u8          temp_samples[3];
0096 };
0097 
0098 struct thermal_state {
0099     struct _thermal_state core_throttle;
0100     struct _thermal_state core_power_limit;
0101     struct _thermal_state package_throttle;
0102     struct _thermal_state package_power_limit;
0103     struct _thermal_state core_thresh0;
0104     struct _thermal_state core_thresh1;
0105     struct _thermal_state pkg_thresh0;
0106     struct _thermal_state pkg_thresh1;
0107 };
0108 
0109 /* Callback to handle core threshold interrupts */
0110 int (*platform_thermal_notify)(__u64 msr_val);
0111 EXPORT_SYMBOL(platform_thermal_notify);
0112 
0113 /* Callback to handle core package threshold_interrupts */
0114 int (*platform_thermal_package_notify)(__u64 msr_val);
0115 EXPORT_SYMBOL_GPL(platform_thermal_package_notify);
0116 
0117 /* Callback support of rate control, return true, if
0118  * callback has rate control */
0119 bool (*platform_thermal_package_rate_control)(void);
0120 EXPORT_SYMBOL_GPL(platform_thermal_package_rate_control);
0121 
0122 
0123 static DEFINE_PER_CPU(struct thermal_state, thermal_state);
0124 
0125 static atomic_t therm_throt_en  = ATOMIC_INIT(0);
0126 
0127 static u32 lvtthmr_init __read_mostly;
0128 
0129 #ifdef CONFIG_SYSFS
0130 #define define_therm_throt_device_one_ro(_name)             \
0131     static DEVICE_ATTR(_name, 0444,                 \
0132                therm_throt_device_show_##_name,     \
0133                    NULL)                \
0134 
0135 #define define_therm_throt_device_show_func(event, name)        \
0136                                     \
0137 static ssize_t therm_throt_device_show_##event##_##name(        \
0138             struct device *dev,             \
0139             struct device_attribute *attr,          \
0140             char *buf)                  \
0141 {                                   \
0142     unsigned int cpu = dev->id;                 \
0143     ssize_t ret;                            \
0144                                     \
0145     preempt_disable();  /* CPU hotplug */           \
0146     if (cpu_online(cpu)) {                      \
0147         ret = sprintf(buf, "%lu\n",             \
0148                   per_cpu(thermal_state, cpu).event.name);  \
0149     } else                              \
0150         ret = 0;                        \
0151     preempt_enable();                       \
0152                                     \
0153     return ret;                         \
0154 }
0155 
0156 define_therm_throt_device_show_func(core_throttle, count);
0157 define_therm_throt_device_one_ro(core_throttle_count);
0158 
0159 define_therm_throt_device_show_func(core_power_limit, count);
0160 define_therm_throt_device_one_ro(core_power_limit_count);
0161 
0162 define_therm_throt_device_show_func(package_throttle, count);
0163 define_therm_throt_device_one_ro(package_throttle_count);
0164 
0165 define_therm_throt_device_show_func(package_power_limit, count);
0166 define_therm_throt_device_one_ro(package_power_limit_count);
0167 
0168 define_therm_throt_device_show_func(core_throttle, max_time_ms);
0169 define_therm_throt_device_one_ro(core_throttle_max_time_ms);
0170 
0171 define_therm_throt_device_show_func(package_throttle, max_time_ms);
0172 define_therm_throt_device_one_ro(package_throttle_max_time_ms);
0173 
0174 define_therm_throt_device_show_func(core_throttle, total_time_ms);
0175 define_therm_throt_device_one_ro(core_throttle_total_time_ms);
0176 
0177 define_therm_throt_device_show_func(package_throttle, total_time_ms);
0178 define_therm_throt_device_one_ro(package_throttle_total_time_ms);
0179 
0180 static struct attribute *thermal_throttle_attrs[] = {
0181     &dev_attr_core_throttle_count.attr,
0182     &dev_attr_core_throttle_max_time_ms.attr,
0183     &dev_attr_core_throttle_total_time_ms.attr,
0184     NULL
0185 };
0186 
0187 static const struct attribute_group thermal_attr_group = {
0188     .attrs  = thermal_throttle_attrs,
0189     .name   = "thermal_throttle"
0190 };
0191 #endif /* CONFIG_SYSFS */
0192 
0193 #define CORE_LEVEL  0
0194 #define PACKAGE_LEVEL   1
0195 
0196 #define THERM_THROT_POLL_INTERVAL   HZ
0197 #define THERM_STATUS_PROCHOT_LOG    BIT(1)
0198 
0199 #define THERM_STATUS_CLEAR_CORE_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11) | BIT(13) | BIT(15))
0200 #define THERM_STATUS_CLEAR_PKG_MASK  (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11))
0201 
0202 static void clear_therm_status_log(int level)
0203 {
0204     int msr;
0205     u64 mask, msr_val;
0206 
0207     if (level == CORE_LEVEL) {
0208         msr  = MSR_IA32_THERM_STATUS;
0209         mask = THERM_STATUS_CLEAR_CORE_MASK;
0210     } else {
0211         msr  = MSR_IA32_PACKAGE_THERM_STATUS;
0212         mask = THERM_STATUS_CLEAR_PKG_MASK;
0213     }
0214 
0215     rdmsrl(msr, msr_val);
0216     msr_val &= mask;
0217     wrmsrl(msr, msr_val & ~THERM_STATUS_PROCHOT_LOG);
0218 }
0219 
0220 static void get_therm_status(int level, bool *proc_hot, u8 *temp)
0221 {
0222     int msr;
0223     u64 msr_val;
0224 
0225     if (level == CORE_LEVEL)
0226         msr = MSR_IA32_THERM_STATUS;
0227     else
0228         msr = MSR_IA32_PACKAGE_THERM_STATUS;
0229 
0230     rdmsrl(msr, msr_val);
0231     if (msr_val & THERM_STATUS_PROCHOT_LOG)
0232         *proc_hot = true;
0233     else
0234         *proc_hot = false;
0235 
0236     *temp = (msr_val >> 16) & 0x7F;
0237 }
0238 
0239 static void __maybe_unused throttle_active_work(struct work_struct *work)
0240 {
0241     struct _thermal_state *state = container_of(to_delayed_work(work),
0242                         struct _thermal_state, therm_work);
0243     unsigned int i, avg, this_cpu = smp_processor_id();
0244     u64 now = get_jiffies_64();
0245     bool hot;
0246     u8 temp;
0247 
0248     get_therm_status(state->level, &hot, &temp);
0249     /* temperature value is offset from the max so lesser means hotter */
0250     if (!hot && temp > state->baseline_temp) {
0251         if (state->rate_control_active)
0252             pr_info("CPU%d: %s temperature/speed normal (total events = %lu)\n",
0253                 this_cpu,
0254                 state->level == CORE_LEVEL ? "Core" : "Package",
0255                 state->count);
0256 
0257         state->rate_control_active = false;
0258         return;
0259     }
0260 
0261     if (time_before64(now, state->next_check) &&
0262               state->rate_control_active)
0263         goto re_arm;
0264 
0265     state->next_check = now + CHECK_INTERVAL;
0266 
0267     if (state->count != state->last_count) {
0268         /* There was one new thermal interrupt */
0269         state->last_count = state->count;
0270         state->average = 0;
0271         state->sample_count = 0;
0272         state->sample_index = 0;
0273     }
0274 
0275     state->temp_samples[state->sample_index] = temp;
0276     state->sample_count++;
0277     state->sample_index = (state->sample_index + 1) % ARRAY_SIZE(state->temp_samples);
0278     if (state->sample_count < ARRAY_SIZE(state->temp_samples))
0279         goto re_arm;
0280 
0281     avg = 0;
0282     for (i = 0; i < ARRAY_SIZE(state->temp_samples); ++i)
0283         avg += state->temp_samples[i];
0284 
0285     avg /= ARRAY_SIZE(state->temp_samples);
0286 
0287     if (state->average > avg) {
0288         pr_warn("CPU%d: %s temperature is above threshold, cpu clock is throttled (total events = %lu)\n",
0289             this_cpu,
0290             state->level == CORE_LEVEL ? "Core" : "Package",
0291             state->count);
0292         state->rate_control_active = true;
0293     }
0294 
0295     state->average = avg;
0296 
0297 re_arm:
0298     clear_therm_status_log(state->level);
0299     schedule_delayed_work_on(this_cpu, &state->therm_work, THERM_THROT_POLL_INTERVAL);
0300 }
0301 
0302 /***
0303  * therm_throt_process - Process thermal throttling event from interrupt
0304  * @curr: Whether the condition is current or not (boolean), since the
0305  *        thermal interrupt normally gets called both when the thermal
0306  *        event begins and once the event has ended.
0307  *
0308  * This function is called by the thermal interrupt after the
0309  * IRQ has been acknowledged.
0310  *
0311  * It will take care of rate limiting and printing messages to the syslog.
0312  */
0313 static void therm_throt_process(bool new_event, int event, int level)
0314 {
0315     struct _thermal_state *state;
0316     unsigned int this_cpu = smp_processor_id();
0317     bool old_event;
0318     u64 now;
0319     struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
0320 
0321     now = get_jiffies_64();
0322     if (level == CORE_LEVEL) {
0323         if (event == THERMAL_THROTTLING_EVENT)
0324             state = &pstate->core_throttle;
0325         else if (event == POWER_LIMIT_EVENT)
0326             state = &pstate->core_power_limit;
0327         else
0328             return;
0329     } else if (level == PACKAGE_LEVEL) {
0330         if (event == THERMAL_THROTTLING_EVENT)
0331             state = &pstate->package_throttle;
0332         else if (event == POWER_LIMIT_EVENT)
0333             state = &pstate->package_power_limit;
0334         else
0335             return;
0336     } else
0337         return;
0338 
0339     old_event = state->new_event;
0340     state->new_event = new_event;
0341 
0342     if (new_event)
0343         state->count++;
0344 
0345     if (event != THERMAL_THROTTLING_EVENT)
0346         return;
0347 
0348     if (new_event && !state->last_interrupt_time) {
0349         bool hot;
0350         u8 temp;
0351 
0352         get_therm_status(state->level, &hot, &temp);
0353         /*
0354          * Ignore short temperature spike as the system is not close
0355          * to PROCHOT. 10C offset is large enough to ignore. It is
0356          * already dropped from the high threshold temperature.
0357          */
0358         if (temp > 10)
0359             return;
0360 
0361         state->baseline_temp = temp;
0362         state->last_interrupt_time = now;
0363         schedule_delayed_work_on(this_cpu, &state->therm_work, THERM_THROT_POLL_INTERVAL);
0364     } else if (old_event && state->last_interrupt_time) {
0365         unsigned long throttle_time;
0366 
0367         throttle_time = jiffies_delta_to_msecs(now - state->last_interrupt_time);
0368         if (throttle_time > state->max_time_ms)
0369             state->max_time_ms = throttle_time;
0370         state->total_time_ms += throttle_time;
0371         state->last_interrupt_time = 0;
0372     }
0373 }
0374 
0375 static int thresh_event_valid(int level, int event)
0376 {
0377     struct _thermal_state *state;
0378     unsigned int this_cpu = smp_processor_id();
0379     struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
0380     u64 now = get_jiffies_64();
0381 
0382     if (level == PACKAGE_LEVEL)
0383         state = (event == 0) ? &pstate->pkg_thresh0 :
0384                         &pstate->pkg_thresh1;
0385     else
0386         state = (event == 0) ? &pstate->core_thresh0 :
0387                         &pstate->core_thresh1;
0388 
0389     if (time_before64(now, state->next_check))
0390         return 0;
0391 
0392     state->next_check = now + CHECK_INTERVAL;
0393 
0394     return 1;
0395 }
0396 
0397 static bool int_pln_enable;
0398 static int __init int_pln_enable_setup(char *s)
0399 {
0400     int_pln_enable = true;
0401 
0402     return 1;
0403 }
0404 __setup("int_pln_enable", int_pln_enable_setup);
0405 
0406 #ifdef CONFIG_SYSFS
0407 /* Add/Remove thermal_throttle interface for CPU device: */
0408 static int thermal_throttle_add_dev(struct device *dev, unsigned int cpu)
0409 {
0410     int err;
0411     struct cpuinfo_x86 *c = &cpu_data(cpu);
0412 
0413     err = sysfs_create_group(&dev->kobj, &thermal_attr_group);
0414     if (err)
0415         return err;
0416 
0417     if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable) {
0418         err = sysfs_add_file_to_group(&dev->kobj,
0419                           &dev_attr_core_power_limit_count.attr,
0420                           thermal_attr_group.name);
0421         if (err)
0422             goto del_group;
0423     }
0424 
0425     if (cpu_has(c, X86_FEATURE_PTS)) {
0426         err = sysfs_add_file_to_group(&dev->kobj,
0427                           &dev_attr_package_throttle_count.attr,
0428                           thermal_attr_group.name);
0429         if (err)
0430             goto del_group;
0431 
0432         err = sysfs_add_file_to_group(&dev->kobj,
0433                           &dev_attr_package_throttle_max_time_ms.attr,
0434                           thermal_attr_group.name);
0435         if (err)
0436             goto del_group;
0437 
0438         err = sysfs_add_file_to_group(&dev->kobj,
0439                           &dev_attr_package_throttle_total_time_ms.attr,
0440                           thermal_attr_group.name);
0441         if (err)
0442             goto del_group;
0443 
0444         if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable) {
0445             err = sysfs_add_file_to_group(&dev->kobj,
0446                     &dev_attr_package_power_limit_count.attr,
0447                     thermal_attr_group.name);
0448             if (err)
0449                 goto del_group;
0450         }
0451     }
0452 
0453     return 0;
0454 
0455 del_group:
0456     sysfs_remove_group(&dev->kobj, &thermal_attr_group);
0457 
0458     return err;
0459 }
0460 
0461 static void thermal_throttle_remove_dev(struct device *dev)
0462 {
0463     sysfs_remove_group(&dev->kobj, &thermal_attr_group);
0464 }
0465 
0466 /* Get notified when a cpu comes on/off. Be hotplug friendly. */
0467 static int thermal_throttle_online(unsigned int cpu)
0468 {
0469     struct thermal_state *state = &per_cpu(thermal_state, cpu);
0470     struct device *dev = get_cpu_device(cpu);
0471     u32 l;
0472 
0473     state->package_throttle.level = PACKAGE_LEVEL;
0474     state->core_throttle.level = CORE_LEVEL;
0475 
0476     INIT_DELAYED_WORK(&state->package_throttle.therm_work, throttle_active_work);
0477     INIT_DELAYED_WORK(&state->core_throttle.therm_work, throttle_active_work);
0478 
0479     /*
0480      * The first CPU coming online will enable the HFI. Usually this causes
0481      * hardware to issue an HFI thermal interrupt. Such interrupt will reach
0482      * the CPU once we enable the thermal vector in the local APIC.
0483      */
0484     intel_hfi_online(cpu);
0485 
0486     /* Unmask the thermal vector after the above workqueues are initialized. */
0487     l = apic_read(APIC_LVTTHMR);
0488     apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
0489 
0490     return thermal_throttle_add_dev(dev, cpu);
0491 }
0492 
0493 static int thermal_throttle_offline(unsigned int cpu)
0494 {
0495     struct thermal_state *state = &per_cpu(thermal_state, cpu);
0496     struct device *dev = get_cpu_device(cpu);
0497     u32 l;
0498 
0499     /* Mask the thermal vector before draining evtl. pending work */
0500     l = apic_read(APIC_LVTTHMR);
0501     apic_write(APIC_LVTTHMR, l | APIC_LVT_MASKED);
0502 
0503     intel_hfi_offline(cpu);
0504 
0505     cancel_delayed_work_sync(&state->package_throttle.therm_work);
0506     cancel_delayed_work_sync(&state->core_throttle.therm_work);
0507 
0508     state->package_throttle.rate_control_active = false;
0509     state->core_throttle.rate_control_active = false;
0510 
0511     thermal_throttle_remove_dev(dev);
0512     return 0;
0513 }
0514 
0515 static __init int thermal_throttle_init_device(void)
0516 {
0517     int ret;
0518 
0519     if (!atomic_read(&therm_throt_en))
0520         return 0;
0521 
0522     intel_hfi_init();
0523 
0524     ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/therm:online",
0525                 thermal_throttle_online,
0526                 thermal_throttle_offline);
0527     return ret < 0 ? ret : 0;
0528 }
0529 device_initcall(thermal_throttle_init_device);
0530 
0531 #endif /* CONFIG_SYSFS */
0532 
0533 static void notify_package_thresholds(__u64 msr_val)
0534 {
0535     bool notify_thres_0 = false;
0536     bool notify_thres_1 = false;
0537 
0538     if (!platform_thermal_package_notify)
0539         return;
0540 
0541     /* lower threshold check */
0542     if (msr_val & THERM_LOG_THRESHOLD0)
0543         notify_thres_0 = true;
0544     /* higher threshold check */
0545     if (msr_val & THERM_LOG_THRESHOLD1)
0546         notify_thres_1 = true;
0547 
0548     if (!notify_thres_0 && !notify_thres_1)
0549         return;
0550 
0551     if (platform_thermal_package_rate_control &&
0552         platform_thermal_package_rate_control()) {
0553         /* Rate control is implemented in callback */
0554         platform_thermal_package_notify(msr_val);
0555         return;
0556     }
0557 
0558     /* lower threshold reached */
0559     if (notify_thres_0 && thresh_event_valid(PACKAGE_LEVEL, 0))
0560         platform_thermal_package_notify(msr_val);
0561     /* higher threshold reached */
0562     if (notify_thres_1 && thresh_event_valid(PACKAGE_LEVEL, 1))
0563         platform_thermal_package_notify(msr_val);
0564 }
0565 
0566 static void notify_thresholds(__u64 msr_val)
0567 {
0568     /* check whether the interrupt handler is defined;
0569      * otherwise simply return
0570      */
0571     if (!platform_thermal_notify)
0572         return;
0573 
0574     /* lower threshold reached */
0575     if ((msr_val & THERM_LOG_THRESHOLD0) &&
0576             thresh_event_valid(CORE_LEVEL, 0))
0577         platform_thermal_notify(msr_val);
0578     /* higher threshold reached */
0579     if ((msr_val & THERM_LOG_THRESHOLD1) &&
0580             thresh_event_valid(CORE_LEVEL, 1))
0581         platform_thermal_notify(msr_val);
0582 }
0583 
0584 void __weak notify_hwp_interrupt(void)
0585 {
0586     wrmsrl_safe(MSR_HWP_STATUS, 0);
0587 }
0588 
0589 /* Thermal transition interrupt handler */
0590 void intel_thermal_interrupt(void)
0591 {
0592     __u64 msr_val;
0593 
0594     if (static_cpu_has(X86_FEATURE_HWP))
0595         notify_hwp_interrupt();
0596 
0597     rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
0598 
0599     /* Check for violation of core thermal thresholds*/
0600     notify_thresholds(msr_val);
0601 
0602     therm_throt_process(msr_val & THERM_STATUS_PROCHOT,
0603                 THERMAL_THROTTLING_EVENT,
0604                 CORE_LEVEL);
0605 
0606     if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable)
0607         therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT,
0608                     POWER_LIMIT_EVENT,
0609                     CORE_LEVEL);
0610 
0611     if (this_cpu_has(X86_FEATURE_PTS)) {
0612         rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
0613         /* check violations of package thermal thresholds */
0614         notify_package_thresholds(msr_val);
0615         therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT,
0616                     THERMAL_THROTTLING_EVENT,
0617                     PACKAGE_LEVEL);
0618         if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable)
0619             therm_throt_process(msr_val &
0620                     PACKAGE_THERM_STATUS_POWER_LIMIT,
0621                     POWER_LIMIT_EVENT,
0622                     PACKAGE_LEVEL);
0623 
0624         if (this_cpu_has(X86_FEATURE_HFI))
0625             intel_hfi_process_event(msr_val &
0626                         PACKAGE_THERM_STATUS_HFI_UPDATED);
0627     }
0628 }
0629 
0630 /* Thermal monitoring depends on APIC, ACPI and clock modulation */
0631 static int intel_thermal_supported(struct cpuinfo_x86 *c)
0632 {
0633     if (!boot_cpu_has(X86_FEATURE_APIC))
0634         return 0;
0635     if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
0636         return 0;
0637     return 1;
0638 }
0639 
0640 bool x86_thermal_enabled(void)
0641 {
0642     return atomic_read(&therm_throt_en);
0643 }
0644 
0645 void __init therm_lvt_init(void)
0646 {
0647     /*
0648      * This function is only called on boot CPU. Save the init thermal
0649      * LVT value on BSP and use that value to restore APs' thermal LVT
0650      * entry BIOS programmed later
0651      */
0652     if (intel_thermal_supported(&boot_cpu_data))
0653         lvtthmr_init = apic_read(APIC_LVTTHMR);
0654 }
0655 
0656 void intel_init_thermal(struct cpuinfo_x86 *c)
0657 {
0658     unsigned int cpu = smp_processor_id();
0659     int tm2 = 0;
0660     u32 l, h;
0661 
0662     if (!intel_thermal_supported(c))
0663         return;
0664 
0665     /*
0666      * First check if its enabled already, in which case there might
0667      * be some SMM goo which handles it, so we can't even put a handler
0668      * since it might be delivered via SMI already:
0669      */
0670     rdmsr(MSR_IA32_MISC_ENABLE, l, h);
0671 
0672     h = lvtthmr_init;
0673     /*
0674      * The initial value of thermal LVT entries on all APs always reads
0675      * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI
0676      * sequence to them and LVT registers are reset to 0s except for
0677      * the mask bits which are set to 1s when APs receive INIT IPI.
0678      * If BIOS takes over the thermal interrupt and sets its interrupt
0679      * delivery mode to SMI (not fixed), it restores the value that the
0680      * BIOS has programmed on AP based on BSP's info we saved since BIOS
0681      * is always setting the same value for all threads/cores.
0682      */
0683     if ((h & APIC_DM_FIXED_MASK) != APIC_DM_FIXED)
0684         apic_write(APIC_LVTTHMR, lvtthmr_init);
0685 
0686 
0687     if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
0688         if (system_state == SYSTEM_BOOTING)
0689             pr_debug("CPU%d: Thermal monitoring handled by SMI\n", cpu);
0690         return;
0691     }
0692 
0693     /* early Pentium M models use different method for enabling TM2 */
0694     if (cpu_has(c, X86_FEATURE_TM2)) {
0695         if (c->x86 == 6 && (c->x86_model == 9 || c->x86_model == 13)) {
0696             rdmsr(MSR_THERM2_CTL, l, h);
0697             if (l & MSR_THERM2_CTL_TM_SELECT)
0698                 tm2 = 1;
0699         } else if (l & MSR_IA32_MISC_ENABLE_TM2)
0700             tm2 = 1;
0701     }
0702 
0703     /* We'll mask the thermal vector in the lapic till we're ready: */
0704     h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
0705     apic_write(APIC_LVTTHMR, h);
0706 
0707     rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
0708     if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable)
0709         wrmsr(MSR_IA32_THERM_INTERRUPT,
0710             (l | (THERM_INT_LOW_ENABLE
0711             | THERM_INT_HIGH_ENABLE)) & ~THERM_INT_PLN_ENABLE, h);
0712     else if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
0713         wrmsr(MSR_IA32_THERM_INTERRUPT,
0714             l | (THERM_INT_LOW_ENABLE
0715             | THERM_INT_HIGH_ENABLE | THERM_INT_PLN_ENABLE), h);
0716     else
0717         wrmsr(MSR_IA32_THERM_INTERRUPT,
0718               l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h);
0719 
0720     if (cpu_has(c, X86_FEATURE_PTS)) {
0721         rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
0722         if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable)
0723             wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
0724                 (l | (PACKAGE_THERM_INT_LOW_ENABLE
0725                 | PACKAGE_THERM_INT_HIGH_ENABLE))
0726                 & ~PACKAGE_THERM_INT_PLN_ENABLE, h);
0727         else if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
0728             wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
0729                 l | (PACKAGE_THERM_INT_LOW_ENABLE
0730                 | PACKAGE_THERM_INT_HIGH_ENABLE
0731                 | PACKAGE_THERM_INT_PLN_ENABLE), h);
0732         else
0733             wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
0734                   l | (PACKAGE_THERM_INT_LOW_ENABLE
0735                 | PACKAGE_THERM_INT_HIGH_ENABLE), h);
0736 
0737         if (cpu_has(c, X86_FEATURE_HFI)) {
0738             rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
0739             wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
0740                   l | PACKAGE_THERM_INT_HFI_ENABLE, h);
0741         }
0742     }
0743 
0744     rdmsr(MSR_IA32_MISC_ENABLE, l, h);
0745     wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
0746 
0747     pr_info_once("CPU0: Thermal monitoring enabled (%s)\n",
0748               tm2 ? "TM2" : "TM1");
0749 
0750     /* enable thermal throttle processing */
0751     atomic_set(&therm_throt_en, 1);
0752 }