Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * Copyright (c) 2009-2010 Intel Corporation
0004  *
0005  * Authors:
0006  *  Jesse Barnes <jbarnes@virtuousgeek.org>
0007  */
0008 
0009 /*
0010  * Some Intel Ibex Peak based platforms support so-called "intelligent
0011  * power sharing", which allows the CPU and GPU to cooperate to maximize
0012  * performance within a given TDP (thermal design point).  This driver
0013  * performs the coordination between the CPU and GPU, monitors thermal and
0014  * power statistics in the platform, and initializes power monitoring
0015  * hardware.  It also provides a few tunables to control behavior.  Its
0016  * primary purpose is to safely allow CPU and GPU turbo modes to be enabled
0017  * by tracking power and thermal budget; secondarily it can boost turbo
0018  * performance by allocating more power or thermal budget to the CPU or GPU
0019  * based on available headroom and activity.
0020  *
0021  * The basic algorithm is driven by a 5s moving average of temperature.  If
0022  * thermal headroom is available, the CPU and/or GPU power clamps may be
0023  * adjusted upwards.  If we hit the thermal ceiling or a thermal trigger,
0024  * we scale back the clamp.  Aside from trigger events (when we're critically
0025  * close or over our TDP) we don't adjust the clamps more than once every
0026  * five seconds.
0027  *
0028  * The thermal device (device 31, function 6) has a set of registers that
0029  * are updated by the ME firmware.  The ME should also take the clamp values
0030  * written to those registers and write them to the CPU, but we currently
0031  * bypass that functionality and write the CPU MSR directly.
0032  *
0033  * UNSUPPORTED:
0034  *   - dual MCP configs
0035  *
0036  * TODO:
0037  *   - handle CPU hotplug
0038  *   - provide turbo enable/disable api
0039  *
0040  * Related documents:
0041  *   - CDI 403777, 403778 - Auburndale EDS vol 1 & 2
0042  *   - CDI 401376 - Ibex Peak EDS
0043  *   - ref 26037, 26641 - IPS BIOS spec
0044  *   - ref 26489 - Nehalem BIOS writer's guide
0045  *   - ref 26921 - Ibex Peak BIOS Specification
0046  */
0047 
0048 #include <linux/debugfs.h>
0049 #include <linux/delay.h>
0050 #include <linux/interrupt.h>
0051 #include <linux/kernel.h>
0052 #include <linux/kthread.h>
0053 #include <linux/module.h>
0054 #include <linux/pci.h>
0055 #include <linux/sched.h>
0056 #include <linux/sched/loadavg.h>
0057 #include <linux/seq_file.h>
0058 #include <linux/string.h>
0059 #include <linux/tick.h>
0060 #include <linux/timer.h>
0061 #include <linux/dmi.h>
0062 #include <drm/i915_drm.h>
0063 #include <asm/msr.h>
0064 #include <asm/processor.h>
0065 #include "intel_ips.h"
0066 
0067 #include <linux/io-64-nonatomic-lo-hi.h>
0068 
0069 #define PCI_DEVICE_ID_INTEL_THERMAL_SENSOR 0x3b32
0070 
0071 /*
0072  * Package level MSRs for monitor/control
0073  */
0074 #define PLATFORM_INFO   0xce
0075 #define   PLATFORM_TDP      (1<<29)
0076 #define   PLATFORM_RATIO    (1<<28)
0077 
0078 #define IA32_MISC_ENABLE    0x1a0
0079 #define   IA32_MISC_TURBO_EN    (1ULL<<38)
0080 
0081 #define TURBO_POWER_CURRENT_LIMIT   0x1ac
0082 #define   TURBO_TDC_OVR_EN  (1UL<<31)
0083 #define   TURBO_TDC_MASK    (0x000000007fff0000UL)
0084 #define   TURBO_TDC_SHIFT   (16)
0085 #define   TURBO_TDP_OVR_EN  (1UL<<15)
0086 #define   TURBO_TDP_MASK    (0x0000000000003fffUL)
0087 
0088 /*
0089  * Core/thread MSRs for monitoring
0090  */
0091 #define IA32_PERF_CTL       0x199
0092 #define   IA32_PERF_TURBO_DIS   (1ULL<<32)
0093 
0094 /*
0095  * Thermal PCI device regs
0096  */
0097 #define THM_CFG_TBAR    0x10
0098 #define THM_CFG_TBAR_HI 0x14
0099 
0100 #define THM_TSIU    0x00
0101 #define THM_TSE     0x01
0102 #define   TSE_EN    0xb8
0103 #define THM_TSS     0x02
0104 #define THM_TSTR    0x03
0105 #define THM_TSTTP   0x04
0106 #define THM_TSCO    0x08
0107 #define THM_TSES    0x0c
0108 #define THM_TSGPEN  0x0d
0109 #define   TSGPEN_HOT_LOHI   (1<<1)
0110 #define   TSGPEN_CRIT_LOHI  (1<<2)
0111 #define THM_TSPC    0x0e
0112 #define THM_PPEC    0x10
0113 #define THM_CTA     0x12
0114 #define THM_PTA     0x14
0115 #define   PTA_SLOPE_MASK    (0xff00)
0116 #define   PTA_SLOPE_SHIFT   8
0117 #define   PTA_OFFSET_MASK   (0x00ff)
0118 #define THM_MGTA    0x16
0119 #define   MGTA_SLOPE_MASK   (0xff00)
0120 #define   MGTA_SLOPE_SHIFT  8
0121 #define   MGTA_OFFSET_MASK  (0x00ff)
0122 #define THM_TRC     0x1a
0123 #define   TRC_CORE2_EN  (1<<15)
0124 #define   TRC_THM_EN    (1<<12)
0125 #define   TRC_C6_WAR    (1<<8)
0126 #define   TRC_CORE1_EN  (1<<7)
0127 #define   TRC_CORE_PWR  (1<<6)
0128 #define   TRC_PCH_EN    (1<<5)
0129 #define   TRC_MCH_EN    (1<<4)
0130 #define   TRC_DIMM4 (1<<3)
0131 #define   TRC_DIMM3 (1<<2)
0132 #define   TRC_DIMM2 (1<<1)
0133 #define   TRC_DIMM1 (1<<0)
0134 #define THM_TES     0x20
0135 #define THM_TEN     0x21
0136 #define   TEN_UPDATE_EN 1
0137 #define THM_PSC     0x24
0138 #define   PSC_NTG   (1<<0) /* No GFX turbo support */
0139 #define   PSC_NTPC  (1<<1) /* No CPU turbo support */
0140 #define   PSC_PP_DEF    (0<<2) /* Perf policy up to driver */
0141 #define   PSP_PP_PC (1<<2) /* BIOS prefers CPU perf */
0142 #define   PSP_PP_BAL    (2<<2) /* BIOS wants balanced perf */
0143 #define   PSP_PP_GFX    (3<<2) /* BIOS prefers GFX perf */
0144 #define   PSP_PBRT  (1<<4) /* BIOS run time support */
0145 #define THM_CTV1    0x30
0146 #define   CTV_TEMP_ERROR (1<<15)
0147 #define   CTV_TEMP_MASK 0x3f
0148 #define   CTV_
0149 #define THM_CTV2    0x32
0150 #define THM_CEC     0x34 /* undocumented power accumulator in joules */
0151 #define THM_AE      0x3f
0152 #define THM_HTS     0x50 /* 32 bits */
0153 #define   HTS_PCPL_MASK (0x7fe00000)
0154 #define   HTS_PCPL_SHIFT 21
0155 #define   HTS_GPL_MASK  (0x001ff000)
0156 #define   HTS_GPL_SHIFT 12
0157 #define   HTS_PP_MASK   (0x00000c00)
0158 #define   HTS_PP_SHIFT  10
0159 #define   HTS_PP_DEF    0
0160 #define   HTS_PP_PROC   1
0161 #define   HTS_PP_BAL    2
0162 #define   HTS_PP_GFX    3
0163 #define   HTS_PCTD_DIS  (1<<9)
0164 #define   HTS_GTD_DIS   (1<<8)
0165 #define   HTS_PTL_MASK  (0x000000fe)
0166 #define   HTS_PTL_SHIFT 1
0167 #define   HTS_NVV   (1<<0)
0168 #define THM_HTSHI   0x54 /* 16 bits */
0169 #define   HTS2_PPL_MASK     (0x03ff)
0170 #define   HTS2_PRST_MASK    (0x3c00)
0171 #define   HTS2_PRST_SHIFT   10
0172 #define   HTS2_PRST_UNLOADED    0
0173 #define   HTS2_PRST_RUNNING 1
0174 #define   HTS2_PRST_TDISOP  2 /* turbo disabled due to power */
0175 #define   HTS2_PRST_TDISHT  3 /* turbo disabled due to high temp */
0176 #define   HTS2_PRST_TDISUSR 4 /* user disabled turbo */
0177 #define   HTS2_PRST_TDISPLAT    5 /* platform disabled turbo */
0178 #define   HTS2_PRST_TDISPM  6 /* power management disabled turbo */
0179 #define   HTS2_PRST_TDISERR 7 /* some kind of error disabled turbo */
0180 #define THM_PTL     0x56
0181 #define THM_MGTV    0x58
0182 #define   TV_MASK   0x000000000000ff00
0183 #define   TV_SHIFT  8
0184 #define THM_PTV     0x60
0185 #define   PTV_MASK  0x00ff
0186 #define THM_MMGPC   0x64
0187 #define THM_MPPC    0x66
0188 #define THM_MPCPC   0x68
0189 #define THM_TSPIEN  0x82
0190 #define   TSPIEN_AUX_LOHI   (1<<0)
0191 #define   TSPIEN_HOT_LOHI   (1<<1)
0192 #define   TSPIEN_CRIT_LOHI  (1<<2)
0193 #define   TSPIEN_AUX2_LOHI  (1<<3)
0194 #define THM_TSLOCK  0x83
0195 #define THM_ATR     0x84
0196 #define THM_TOF     0x87
0197 #define THM_STS     0x98
0198 #define   STS_PCPL_MASK     (0x7fe00000)
0199 #define   STS_PCPL_SHIFT    21
0200 #define   STS_GPL_MASK      (0x001ff000)
0201 #define   STS_GPL_SHIFT     12
0202 #define   STS_PP_MASK       (0x00000c00)
0203 #define   STS_PP_SHIFT      10
0204 #define   STS_PP_DEF        0
0205 #define   STS_PP_PROC       1
0206 #define   STS_PP_BAL        2
0207 #define   STS_PP_GFX        3
0208 #define   STS_PCTD_DIS      (1<<9)
0209 #define   STS_GTD_DIS       (1<<8)
0210 #define   STS_PTL_MASK      (0x000000fe)
0211 #define   STS_PTL_SHIFT     1
0212 #define   STS_NVV       (1<<0)
0213 #define THM_SEC     0x9c
0214 #define   SEC_ACK   (1<<0)
0215 #define THM_TC3     0xa4
0216 #define THM_TC1     0xa8
0217 #define   STS_PPL_MASK      (0x0003ff00)
0218 #define   STS_PPL_SHIFT     16
0219 #define THM_TC2     0xac
0220 #define THM_DTV     0xb0
0221 #define THM_ITV     0xd8
0222 #define   ITV_ME_SEQNO_MASK 0x00ff0000 /* ME should update every ~200ms */
0223 #define   ITV_ME_SEQNO_SHIFT (16)
0224 #define   ITV_MCH_TEMP_MASK 0x0000ff00
0225 #define   ITV_MCH_TEMP_SHIFT (8)
0226 #define   ITV_PCH_TEMP_MASK 0x000000ff
0227 
0228 #define thm_readb(off) readb(ips->regmap + (off))
0229 #define thm_readw(off) readw(ips->regmap + (off))
0230 #define thm_readl(off) readl(ips->regmap + (off))
0231 #define thm_readq(off) readq(ips->regmap + (off))
0232 
0233 #define thm_writeb(off, val) writeb((val), ips->regmap + (off))
0234 #define thm_writew(off, val) writew((val), ips->regmap + (off))
0235 #define thm_writel(off, val) writel((val), ips->regmap + (off))
0236 
0237 static const int IPS_ADJUST_PERIOD = 5000; /* ms */
0238 static bool late_i915_load = false;
0239 
0240 /* For initial average collection */
0241 static const int IPS_SAMPLE_PERIOD = 200; /* ms */
0242 static const int IPS_SAMPLE_WINDOW = 5000; /* 5s moving window of samples */
0243 #define IPS_SAMPLE_COUNT (IPS_SAMPLE_WINDOW / IPS_SAMPLE_PERIOD)
0244 
0245 /* Per-SKU limits */
0246 struct ips_mcp_limits {
0247     int mcp_power_limit; /* mW units */
0248     int core_power_limit;
0249     int mch_power_limit;
0250     int core_temp_limit; /* degrees C */
0251     int mch_temp_limit;
0252 };
0253 
0254 /* Max temps are -10 degrees C to avoid PROCHOT# */
0255 
0256 static struct ips_mcp_limits ips_sv_limits = {
0257     .mcp_power_limit = 35000,
0258     .core_power_limit = 29000,
0259     .mch_power_limit = 20000,
0260     .core_temp_limit = 95,
0261     .mch_temp_limit = 90
0262 };
0263 
0264 static struct ips_mcp_limits ips_lv_limits = {
0265     .mcp_power_limit = 25000,
0266     .core_power_limit = 21000,
0267     .mch_power_limit = 13000,
0268     .core_temp_limit = 95,
0269     .mch_temp_limit = 90
0270 };
0271 
0272 static struct ips_mcp_limits ips_ulv_limits = {
0273     .mcp_power_limit = 18000,
0274     .core_power_limit = 14000,
0275     .mch_power_limit = 11000,
0276     .core_temp_limit = 95,
0277     .mch_temp_limit = 90
0278 };
0279 
0280 struct ips_driver {
0281     struct device *dev;
0282     void __iomem *regmap;
0283     int irq;
0284 
0285     struct task_struct *monitor;
0286     struct task_struct *adjust;
0287     struct dentry *debug_root;
0288     struct timer_list timer;
0289 
0290     /* Average CPU core temps (all averages in .01 degrees C for precision) */
0291     u16 ctv1_avg_temp;
0292     u16 ctv2_avg_temp;
0293     /* GMCH average */
0294     u16 mch_avg_temp;
0295     /* Average for the CPU (both cores?) */
0296     u16 mcp_avg_temp;
0297     /* Average power consumption (in mW) */
0298     u32 cpu_avg_power;
0299     u32 mch_avg_power;
0300 
0301     /* Offset values */
0302     u16 cta_val;
0303     u16 pta_val;
0304     u16 mgta_val;
0305 
0306     /* Maximums & prefs, protected by turbo status lock */
0307     spinlock_t turbo_status_lock;
0308     u16 mcp_temp_limit;
0309     u16 mcp_power_limit;
0310     u16 core_power_limit;
0311     u16 mch_power_limit;
0312     bool cpu_turbo_enabled;
0313     bool __cpu_turbo_on;
0314     bool gpu_turbo_enabled;
0315     bool __gpu_turbo_on;
0316     bool gpu_preferred;
0317     bool poll_turbo_status;
0318     bool second_cpu;
0319     bool turbo_toggle_allowed;
0320     struct ips_mcp_limits *limits;
0321 
0322     /* Optional MCH interfaces for if i915 is in use */
0323     unsigned long (*read_mch_val)(void);
0324     bool (*gpu_raise)(void);
0325     bool (*gpu_lower)(void);
0326     bool (*gpu_busy)(void);
0327     bool (*gpu_turbo_disable)(void);
0328 
0329     /* For restoration at unload */
0330     u64 orig_turbo_limit;
0331     u64 orig_turbo_ratios;
0332 };
0333 
0334 static bool
0335 ips_gpu_turbo_enabled(struct ips_driver *ips);
0336 
0337 /**
0338  * ips_cpu_busy - is CPU busy?
0339  * @ips: IPS driver struct
0340  *
0341  * Check CPU for load to see whether we should increase its thermal budget.
0342  *
0343  * RETURNS:
0344  * True if the CPU could use more power, false otherwise.
0345  */
0346 static bool ips_cpu_busy(struct ips_driver *ips)
0347 {
0348     if ((avenrun[0] >> FSHIFT) > 1)
0349         return true;
0350 
0351     return false;
0352 }
0353 
0354 /**
0355  * ips_cpu_raise - raise CPU power clamp
0356  * @ips: IPS driver struct
0357  *
0358  * Raise the CPU power clamp by %IPS_CPU_STEP, in accordance with TDP for
0359  * this platform.
0360  *
0361  * We do this by adjusting the TURBO_POWER_CURRENT_LIMIT MSR upwards (as
0362  * long as we haven't hit the TDP limit for the SKU).
0363  */
0364 static void ips_cpu_raise(struct ips_driver *ips)
0365 {
0366     u64 turbo_override;
0367     u16 cur_tdp_limit, new_tdp_limit;
0368 
0369     if (!ips->cpu_turbo_enabled)
0370         return;
0371 
0372     rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
0373 
0374     cur_tdp_limit = turbo_override & TURBO_TDP_MASK;
0375     new_tdp_limit = cur_tdp_limit + 8; /* 1W increase */
0376 
0377     /* Clamp to SKU TDP limit */
0378     if (((new_tdp_limit * 10) / 8) > ips->core_power_limit)
0379         new_tdp_limit = cur_tdp_limit;
0380 
0381     thm_writew(THM_MPCPC, (new_tdp_limit * 10) / 8);
0382 
0383     turbo_override |= TURBO_TDC_OVR_EN | TURBO_TDP_OVR_EN;
0384     wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
0385 
0386     turbo_override &= ~TURBO_TDP_MASK;
0387     turbo_override |= new_tdp_limit;
0388 
0389     wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
0390 }
0391 
0392 /**
0393  * ips_cpu_lower - lower CPU power clamp
0394  * @ips: IPS driver struct
0395  *
0396  * Lower CPU power clamp b %IPS_CPU_STEP if possible.
0397  *
0398  * We do this by adjusting the TURBO_POWER_CURRENT_LIMIT MSR down, going
0399  * as low as the platform limits will allow (though we could go lower there
0400  * wouldn't be much point).
0401  */
0402 static void ips_cpu_lower(struct ips_driver *ips)
0403 {
0404     u64 turbo_override;
0405     u16 cur_limit, new_limit;
0406 
0407     rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
0408 
0409     cur_limit = turbo_override & TURBO_TDP_MASK;
0410     new_limit = cur_limit - 8; /* 1W decrease */
0411 
0412     /* Clamp to SKU TDP limit */
0413     if (new_limit  < (ips->orig_turbo_limit & TURBO_TDP_MASK))
0414         new_limit = ips->orig_turbo_limit & TURBO_TDP_MASK;
0415 
0416     thm_writew(THM_MPCPC, (new_limit * 10) / 8);
0417 
0418     turbo_override |= TURBO_TDC_OVR_EN | TURBO_TDP_OVR_EN;
0419     wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
0420 
0421     turbo_override &= ~TURBO_TDP_MASK;
0422     turbo_override |= new_limit;
0423 
0424     wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
0425 }
0426 
0427 /**
0428  * do_enable_cpu_turbo - internal turbo enable function
0429  * @data: unused
0430  *
0431  * Internal function for actually updating MSRs.  When we enable/disable
0432  * turbo, we need to do it on each CPU; this function is the one called
0433  * by on_each_cpu() when needed.
0434  */
0435 static void do_enable_cpu_turbo(void *data)
0436 {
0437     u64 perf_ctl;
0438 
0439     rdmsrl(IA32_PERF_CTL, perf_ctl);
0440     if (perf_ctl & IA32_PERF_TURBO_DIS) {
0441         perf_ctl &= ~IA32_PERF_TURBO_DIS;
0442         wrmsrl(IA32_PERF_CTL, perf_ctl);
0443     }
0444 }
0445 
0446 /**
0447  * ips_enable_cpu_turbo - enable turbo mode on all CPUs
0448  * @ips: IPS driver struct
0449  *
0450  * Enable turbo mode by clearing the disable bit in IA32_PERF_CTL on
0451  * all logical threads.
0452  */
0453 static void ips_enable_cpu_turbo(struct ips_driver *ips)
0454 {
0455     /* Already on, no need to mess with MSRs */
0456     if (ips->__cpu_turbo_on)
0457         return;
0458 
0459     if (ips->turbo_toggle_allowed)
0460         on_each_cpu(do_enable_cpu_turbo, ips, 1);
0461 
0462     ips->__cpu_turbo_on = true;
0463 }
0464 
0465 /**
0466  * do_disable_cpu_turbo - internal turbo disable function
0467  * @data: unused
0468  *
0469  * Internal function for actually updating MSRs.  When we enable/disable
0470  * turbo, we need to do it on each CPU; this function is the one called
0471  * by on_each_cpu() when needed.
0472  */
0473 static void do_disable_cpu_turbo(void *data)
0474 {
0475     u64 perf_ctl;
0476 
0477     rdmsrl(IA32_PERF_CTL, perf_ctl);
0478     if (!(perf_ctl & IA32_PERF_TURBO_DIS)) {
0479         perf_ctl |= IA32_PERF_TURBO_DIS;
0480         wrmsrl(IA32_PERF_CTL, perf_ctl);
0481     }
0482 }
0483 
0484 /**
0485  * ips_disable_cpu_turbo - disable turbo mode on all CPUs
0486  * @ips: IPS driver struct
0487  *
0488  * Disable turbo mode by setting the disable bit in IA32_PERF_CTL on
0489  * all logical threads.
0490  */
0491 static void ips_disable_cpu_turbo(struct ips_driver *ips)
0492 {
0493     /* Already off, leave it */
0494     if (!ips->__cpu_turbo_on)
0495         return;
0496 
0497     if (ips->turbo_toggle_allowed)
0498         on_each_cpu(do_disable_cpu_turbo, ips, 1);
0499 
0500     ips->__cpu_turbo_on = false;
0501 }
0502 
0503 /**
0504  * ips_gpu_busy - is GPU busy?
0505  * @ips: IPS driver struct
0506  *
0507  * Check GPU for load to see whether we should increase its thermal budget.
0508  * We need to call into the i915 driver in this case.
0509  *
0510  * RETURNS:
0511  * True if the GPU could use more power, false otherwise.
0512  */
0513 static bool ips_gpu_busy(struct ips_driver *ips)
0514 {
0515     if (!ips_gpu_turbo_enabled(ips))
0516         return false;
0517 
0518     return ips->gpu_busy();
0519 }
0520 
0521 /**
0522  * ips_gpu_raise - raise GPU power clamp
0523  * @ips: IPS driver struct
0524  *
0525  * Raise the GPU frequency/power if possible.  We need to call into the
0526  * i915 driver in this case.
0527  */
0528 static void ips_gpu_raise(struct ips_driver *ips)
0529 {
0530     if (!ips_gpu_turbo_enabled(ips))
0531         return;
0532 
0533     if (!ips->gpu_raise())
0534         ips->gpu_turbo_enabled = false;
0535 
0536     return;
0537 }
0538 
0539 /**
0540  * ips_gpu_lower - lower GPU power clamp
0541  * @ips: IPS driver struct
0542  *
0543  * Lower GPU frequency/power if possible.  Need to call i915.
0544  */
0545 static void ips_gpu_lower(struct ips_driver *ips)
0546 {
0547     if (!ips_gpu_turbo_enabled(ips))
0548         return;
0549 
0550     if (!ips->gpu_lower())
0551         ips->gpu_turbo_enabled = false;
0552 
0553     return;
0554 }
0555 
0556 /**
0557  * ips_enable_gpu_turbo - notify the gfx driver turbo is available
0558  * @ips: IPS driver struct
0559  *
0560  * Call into the graphics driver indicating that it can safely use
0561  * turbo mode.
0562  */
0563 static void ips_enable_gpu_turbo(struct ips_driver *ips)
0564 {
0565     if (ips->__gpu_turbo_on)
0566         return;
0567     ips->__gpu_turbo_on = true;
0568 }
0569 
0570 /**
0571  * ips_disable_gpu_turbo - notify the gfx driver to disable turbo mode
0572  * @ips: IPS driver struct
0573  *
0574  * Request that the graphics driver disable turbo mode.
0575  */
0576 static void ips_disable_gpu_turbo(struct ips_driver *ips)
0577 {
0578     /* Avoid calling i915 if turbo is already disabled */
0579     if (!ips->__gpu_turbo_on)
0580         return;
0581 
0582     if (!ips->gpu_turbo_disable())
0583         dev_err(ips->dev, "failed to disable graphics turbo\n");
0584     else
0585         ips->__gpu_turbo_on = false;
0586 }
0587 
0588 /**
0589  * mcp_exceeded - check whether we're outside our thermal & power limits
0590  * @ips: IPS driver struct
0591  *
0592  * Check whether the MCP is over its thermal or power budget.
0593  */
0594 static bool mcp_exceeded(struct ips_driver *ips)
0595 {
0596     unsigned long flags;
0597     bool ret = false;
0598     u32 temp_limit;
0599     u32 avg_power;
0600 
0601     spin_lock_irqsave(&ips->turbo_status_lock, flags);
0602 
0603     temp_limit = ips->mcp_temp_limit * 100;
0604     if (ips->mcp_avg_temp > temp_limit)
0605         ret = true;
0606 
0607     avg_power = ips->cpu_avg_power + ips->mch_avg_power;
0608     if (avg_power > ips->mcp_power_limit)
0609         ret = true;
0610 
0611     spin_unlock_irqrestore(&ips->turbo_status_lock, flags);
0612 
0613     return ret;
0614 }
0615 
0616 /**
0617  * cpu_exceeded - check whether a CPU core is outside its limits
0618  * @ips: IPS driver struct
0619  * @cpu: CPU number to check
0620  *
0621  * Check a given CPU's average temp or power is over its limit.
0622  */
0623 static bool cpu_exceeded(struct ips_driver *ips, int cpu)
0624 {
0625     unsigned long flags;
0626     int avg;
0627     bool ret = false;
0628 
0629     spin_lock_irqsave(&ips->turbo_status_lock, flags);
0630     avg = cpu ? ips->ctv2_avg_temp : ips->ctv1_avg_temp;
0631     if (avg > (ips->limits->core_temp_limit * 100))
0632         ret = true;
0633     if (ips->cpu_avg_power > ips->core_power_limit * 100)
0634         ret = true;
0635     spin_unlock_irqrestore(&ips->turbo_status_lock, flags);
0636 
0637     if (ret)
0638         dev_info(ips->dev, "CPU power or thermal limit exceeded\n");
0639 
0640     return ret;
0641 }
0642 
0643 /**
0644  * mch_exceeded - check whether the GPU is over budget
0645  * @ips: IPS driver struct
0646  *
0647  * Check the MCH temp & power against their maximums.
0648  */
0649 static bool mch_exceeded(struct ips_driver *ips)
0650 {
0651     unsigned long flags;
0652     bool ret = false;
0653 
0654     spin_lock_irqsave(&ips->turbo_status_lock, flags);
0655     if (ips->mch_avg_temp > (ips->limits->mch_temp_limit * 100))
0656         ret = true;
0657     if (ips->mch_avg_power > ips->mch_power_limit)
0658         ret = true;
0659     spin_unlock_irqrestore(&ips->turbo_status_lock, flags);
0660 
0661     return ret;
0662 }
0663 
0664 /**
0665  * verify_limits - verify BIOS provided limits
0666  * @ips: IPS structure
0667  *
0668  * BIOS can optionally provide non-default limits for power and temp.  Check
0669  * them here and use the defaults if the BIOS values are not provided or
0670  * are otherwise unusable.
0671  */
0672 static void verify_limits(struct ips_driver *ips)
0673 {
0674     if (ips->mcp_power_limit < ips->limits->mcp_power_limit ||
0675         ips->mcp_power_limit > 35000)
0676         ips->mcp_power_limit = ips->limits->mcp_power_limit;
0677 
0678     if (ips->mcp_temp_limit < ips->limits->core_temp_limit ||
0679         ips->mcp_temp_limit < ips->limits->mch_temp_limit ||
0680         ips->mcp_temp_limit > 150)
0681         ips->mcp_temp_limit = min(ips->limits->core_temp_limit,
0682                       ips->limits->mch_temp_limit);
0683 }
0684 
0685 /**
0686  * update_turbo_limits - get various limits & settings from regs
0687  * @ips: IPS driver struct
0688  *
0689  * Update the IPS power & temp limits, along with turbo enable flags,
0690  * based on latest register contents.
0691  *
0692  * Used at init time and for runtime BIOS support, which requires polling
0693  * the regs for updates (as a result of AC->DC transition for example).
0694  *
0695  * LOCKING:
0696  * Caller must hold turbo_status_lock (outside of init)
0697  */
0698 static void update_turbo_limits(struct ips_driver *ips)
0699 {
0700     u32 hts = thm_readl(THM_HTS);
0701 
0702     ips->cpu_turbo_enabled = !(hts & HTS_PCTD_DIS);
0703     /* 
0704      * Disable turbo for now, until we can figure out why the power figures
0705      * are wrong
0706      */
0707     ips->cpu_turbo_enabled = false;
0708 
0709     if (ips->gpu_busy)
0710         ips->gpu_turbo_enabled = !(hts & HTS_GTD_DIS);
0711 
0712     ips->core_power_limit = thm_readw(THM_MPCPC);
0713     ips->mch_power_limit = thm_readw(THM_MMGPC);
0714     ips->mcp_temp_limit = thm_readw(THM_PTL);
0715     ips->mcp_power_limit = thm_readw(THM_MPPC);
0716 
0717     verify_limits(ips);
0718     /* Ignore BIOS CPU vs GPU pref */
0719 }
0720 
0721 /**
0722  * ips_adjust - adjust power clamp based on thermal state
0723  * @data: ips driver structure
0724  *
0725  * Wake up every 5s or so and check whether we should adjust the power clamp.
0726  * Check CPU and GPU load to determine which needs adjustment.  There are
0727  * several things to consider here:
0728  *   - do we need to adjust up or down?
0729  *   - is CPU busy?
0730  *   - is GPU busy?
0731  *   - is CPU in turbo?
0732  *   - is GPU in turbo?
0733  *   - is CPU or GPU preferred? (CPU is default)
0734  *
0735  * So, given the above, we do the following:
0736  *   - up (TDP available)
0737  *     - CPU not busy, GPU not busy - nothing
0738  *     - CPU busy, GPU not busy - adjust CPU up
0739  *     - CPU not busy, GPU busy - adjust GPU up
0740  *     - CPU busy, GPU busy - adjust preferred unit up, taking headroom from
0741  *       non-preferred unit if necessary
0742  *   - down (at TDP limit)
0743  *     - adjust both CPU and GPU down if possible
0744  *
0745         cpu+ gpu+   cpu+gpu-    cpu-gpu+    cpu-gpu-
0746 cpu < gpu < cpu+gpu+    cpu+        gpu+        nothing
0747 cpu < gpu >=    cpu+gpu-(mcp<)  cpu+gpu-(mcp<)  gpu-        gpu-
0748 cpu >= gpu <    cpu-gpu+(mcp<)  cpu-        cpu-gpu+(mcp<)  cpu-
0749 cpu >= gpu >=   cpu-gpu-    cpu-gpu-    cpu-gpu-    cpu-gpu-
0750  *
0751  */
0752 static int ips_adjust(void *data)
0753 {
0754     struct ips_driver *ips = data;
0755     unsigned long flags;
0756 
0757     dev_dbg(ips->dev, "starting ips-adjust thread\n");
0758 
0759     /*
0760      * Adjust CPU and GPU clamps every 5s if needed.  Doing it more
0761      * often isn't recommended due to ME interaction.
0762      */
0763     do {
0764         bool cpu_busy = ips_cpu_busy(ips);
0765         bool gpu_busy = ips_gpu_busy(ips);
0766 
0767         spin_lock_irqsave(&ips->turbo_status_lock, flags);
0768         if (ips->poll_turbo_status)
0769             update_turbo_limits(ips);
0770         spin_unlock_irqrestore(&ips->turbo_status_lock, flags);
0771 
0772         /* Update turbo status if necessary */
0773         if (ips->cpu_turbo_enabled)
0774             ips_enable_cpu_turbo(ips);
0775         else
0776             ips_disable_cpu_turbo(ips);
0777 
0778         if (ips->gpu_turbo_enabled)
0779             ips_enable_gpu_turbo(ips);
0780         else
0781             ips_disable_gpu_turbo(ips);
0782 
0783         /* We're outside our comfort zone, crank them down */
0784         if (mcp_exceeded(ips)) {
0785             ips_cpu_lower(ips);
0786             ips_gpu_lower(ips);
0787             goto sleep;
0788         }
0789 
0790         if (!cpu_exceeded(ips, 0) && cpu_busy)
0791             ips_cpu_raise(ips);
0792         else
0793             ips_cpu_lower(ips);
0794 
0795         if (!mch_exceeded(ips) && gpu_busy)
0796             ips_gpu_raise(ips);
0797         else
0798             ips_gpu_lower(ips);
0799 
0800 sleep:
0801         schedule_timeout_interruptible(msecs_to_jiffies(IPS_ADJUST_PERIOD));
0802     } while (!kthread_should_stop());
0803 
0804     dev_dbg(ips->dev, "ips-adjust thread stopped\n");
0805 
0806     return 0;
0807 }
0808 
0809 /*
0810  * Helpers for reading out temp/power values and calculating their
0811  * averages for the decision making and monitoring functions.
0812  */
0813 
0814 static u16 calc_avg_temp(struct ips_driver *ips, u16 *array)
0815 {
0816     u64 total = 0;
0817     int i;
0818     u16 avg;
0819 
0820     for (i = 0; i < IPS_SAMPLE_COUNT; i++)
0821         total += (u64)(array[i] * 100);
0822 
0823     do_div(total, IPS_SAMPLE_COUNT);
0824 
0825     avg = (u16)total;
0826 
0827     return avg;
0828 }
0829 
0830 static u16 read_mgtv(struct ips_driver *ips)
0831 {
0832     u16 __maybe_unused ret;
0833     u64 slope, offset;
0834     u64 val;
0835 
0836     val = thm_readq(THM_MGTV);
0837     val = (val & TV_MASK) >> TV_SHIFT;
0838 
0839     slope = offset = thm_readw(THM_MGTA);
0840     slope = (slope & MGTA_SLOPE_MASK) >> MGTA_SLOPE_SHIFT;
0841     offset = offset & MGTA_OFFSET_MASK;
0842 
0843     ret = ((val * slope + 0x40) >> 7) + offset;
0844 
0845     return 0; /* MCH temp reporting buggy */
0846 }
0847 
0848 static u16 read_ptv(struct ips_driver *ips)
0849 {
0850     u16 val;
0851 
0852     val = thm_readw(THM_PTV) & PTV_MASK;
0853 
0854     return val;
0855 }
0856 
0857 static u16 read_ctv(struct ips_driver *ips, int cpu)
0858 {
0859     int reg = cpu ? THM_CTV2 : THM_CTV1;
0860     u16 val;
0861 
0862     val = thm_readw(reg);
0863     if (!(val & CTV_TEMP_ERROR))
0864         val = (val) >> 6; /* discard fractional component */
0865     else
0866         val = 0;
0867 
0868     return val;
0869 }
0870 
0871 static u32 get_cpu_power(struct ips_driver *ips, u32 *last, int period)
0872 {
0873     u32 val;
0874     u32 ret;
0875 
0876     /*
0877      * CEC is in joules/65535.  Take difference over time to
0878      * get watts.
0879      */
0880     val = thm_readl(THM_CEC);
0881 
0882     /* period is in ms and we want mW */
0883     ret = (((val - *last) * 1000) / period);
0884     ret = (ret * 1000) / 65535;
0885     *last = val;
0886 
0887     return 0;
0888 }
0889 
0890 static const u16 temp_decay_factor = 2;
0891 static u16 update_average_temp(u16 avg, u16 val)
0892 {
0893     u16 ret;
0894 
0895     /* Multiply by 100 for extra precision */
0896     ret = (val * 100 / temp_decay_factor) +
0897         (((temp_decay_factor - 1) * avg) / temp_decay_factor);
0898     return ret;
0899 }
0900 
0901 static const u16 power_decay_factor = 2;
0902 static u16 update_average_power(u32 avg, u32 val)
0903 {
0904     u32 ret;
0905 
0906     ret = (val / power_decay_factor) +
0907         (((power_decay_factor - 1) * avg) / power_decay_factor);
0908 
0909     return ret;
0910 }
0911 
0912 static u32 calc_avg_power(struct ips_driver *ips, u32 *array)
0913 {
0914     u64 total = 0;
0915     u32 avg;
0916     int i;
0917 
0918     for (i = 0; i < IPS_SAMPLE_COUNT; i++)
0919         total += array[i];
0920 
0921     do_div(total, IPS_SAMPLE_COUNT);
0922     avg = (u32)total;
0923 
0924     return avg;
0925 }
0926 
0927 static void monitor_timeout(struct timer_list *t)
0928 {
0929     struct ips_driver *ips = from_timer(ips, t, timer);
0930     wake_up_process(ips->monitor);
0931 }
0932 
0933 /**
0934  * ips_monitor - temp/power monitoring thread
0935  * @data: ips driver structure
0936  *
0937  * This is the main function for the IPS driver.  It monitors power and
0938  * tempurature in the MCP and adjusts CPU and GPU power clams accordingly.
0939  *
0940  * We keep a 5s moving average of power consumption and tempurature.  Using
0941  * that data, along with CPU vs GPU preference, we adjust the power clamps
0942  * up or down.
0943  */
0944 static int ips_monitor(void *data)
0945 {
0946     struct ips_driver *ips = data;
0947     unsigned long seqno_timestamp, expire, last_msecs, last_sample_period;
0948     int i;
0949     u32 *cpu_samples, *mchp_samples, old_cpu_power;
0950     u16 *mcp_samples, *ctv1_samples, *ctv2_samples, *mch_samples;
0951     u8 cur_seqno, last_seqno;
0952 
0953     mcp_samples = kcalloc(IPS_SAMPLE_COUNT, sizeof(u16), GFP_KERNEL);
0954     ctv1_samples = kcalloc(IPS_SAMPLE_COUNT, sizeof(u16), GFP_KERNEL);
0955     ctv2_samples = kcalloc(IPS_SAMPLE_COUNT, sizeof(u16), GFP_KERNEL);
0956     mch_samples = kcalloc(IPS_SAMPLE_COUNT, sizeof(u16), GFP_KERNEL);
0957     cpu_samples = kcalloc(IPS_SAMPLE_COUNT, sizeof(u32), GFP_KERNEL);
0958     mchp_samples = kcalloc(IPS_SAMPLE_COUNT, sizeof(u32), GFP_KERNEL);
0959     if (!mcp_samples || !ctv1_samples || !ctv2_samples || !mch_samples ||
0960             !cpu_samples || !mchp_samples) {
0961         dev_err(ips->dev,
0962             "failed to allocate sample array, ips disabled\n");
0963         kfree(mcp_samples);
0964         kfree(ctv1_samples);
0965         kfree(ctv2_samples);
0966         kfree(mch_samples);
0967         kfree(cpu_samples);
0968         kfree(mchp_samples);
0969         return -ENOMEM;
0970     }
0971 
0972     last_seqno = (thm_readl(THM_ITV) & ITV_ME_SEQNO_MASK) >>
0973         ITV_ME_SEQNO_SHIFT;
0974     seqno_timestamp = get_jiffies_64();
0975 
0976     old_cpu_power = thm_readl(THM_CEC);
0977     schedule_timeout_interruptible(msecs_to_jiffies(IPS_SAMPLE_PERIOD));
0978 
0979     /* Collect an initial average */
0980     for (i = 0; i < IPS_SAMPLE_COUNT; i++) {
0981         u32 mchp, cpu_power;
0982         u16 val;
0983 
0984         mcp_samples[i] = read_ptv(ips);
0985 
0986         val = read_ctv(ips, 0);
0987         ctv1_samples[i] = val;
0988 
0989         val = read_ctv(ips, 1);
0990         ctv2_samples[i] = val;
0991 
0992         val = read_mgtv(ips);
0993         mch_samples[i] = val;
0994 
0995         cpu_power = get_cpu_power(ips, &old_cpu_power,
0996                       IPS_SAMPLE_PERIOD);
0997         cpu_samples[i] = cpu_power;
0998 
0999         if (ips->read_mch_val) {
1000             mchp = ips->read_mch_val();
1001             mchp_samples[i] = mchp;
1002         }
1003 
1004         schedule_timeout_interruptible(msecs_to_jiffies(IPS_SAMPLE_PERIOD));
1005         if (kthread_should_stop())
1006             break;
1007     }
1008 
1009     ips->mcp_avg_temp = calc_avg_temp(ips, mcp_samples);
1010     ips->ctv1_avg_temp = calc_avg_temp(ips, ctv1_samples);
1011     ips->ctv2_avg_temp = calc_avg_temp(ips, ctv2_samples);
1012     ips->mch_avg_temp = calc_avg_temp(ips, mch_samples);
1013     ips->cpu_avg_power = calc_avg_power(ips, cpu_samples);
1014     ips->mch_avg_power = calc_avg_power(ips, mchp_samples);
1015     kfree(mcp_samples);
1016     kfree(ctv1_samples);
1017     kfree(ctv2_samples);
1018     kfree(mch_samples);
1019     kfree(cpu_samples);
1020     kfree(mchp_samples);
1021 
1022     /* Start the adjustment thread now that we have data */
1023     wake_up_process(ips->adjust);
1024 
1025     /*
1026      * Ok, now we have an initial avg.  From here on out, we track the
1027      * running avg using a decaying average calculation.  This allows
1028      * us to reduce the sample frequency if the CPU and GPU are idle.
1029      */
1030     old_cpu_power = thm_readl(THM_CEC);
1031     schedule_timeout_interruptible(msecs_to_jiffies(IPS_SAMPLE_PERIOD));
1032     last_sample_period = IPS_SAMPLE_PERIOD;
1033 
1034     timer_setup(&ips->timer, monitor_timeout, TIMER_DEFERRABLE);
1035     do {
1036         u32 cpu_val, mch_val;
1037         u16 val;
1038 
1039         /* MCP itself */
1040         val = read_ptv(ips);
1041         ips->mcp_avg_temp = update_average_temp(ips->mcp_avg_temp, val);
1042 
1043         /* Processor 0 */
1044         val = read_ctv(ips, 0);
1045         ips->ctv1_avg_temp =
1046             update_average_temp(ips->ctv1_avg_temp, val);
1047         /* Power */
1048         cpu_val = get_cpu_power(ips, &old_cpu_power,
1049                     last_sample_period);
1050         ips->cpu_avg_power =
1051             update_average_power(ips->cpu_avg_power, cpu_val);
1052 
1053         if (ips->second_cpu) {
1054             /* Processor 1 */
1055             val = read_ctv(ips, 1);
1056             ips->ctv2_avg_temp =
1057                 update_average_temp(ips->ctv2_avg_temp, val);
1058         }
1059 
1060         /* MCH */
1061         val = read_mgtv(ips);
1062         ips->mch_avg_temp = update_average_temp(ips->mch_avg_temp, val);
1063         /* Power */
1064         if (ips->read_mch_val) {
1065             mch_val = ips->read_mch_val();
1066             ips->mch_avg_power =
1067                 update_average_power(ips->mch_avg_power,
1068                              mch_val);
1069         }
1070 
1071         /*
1072          * Make sure ME is updating thermal regs.
1073          * Note:
1074          * If it's been more than a second since the last update,
1075          * the ME is probably hung.
1076          */
1077         cur_seqno = (thm_readl(THM_ITV) & ITV_ME_SEQNO_MASK) >>
1078             ITV_ME_SEQNO_SHIFT;
1079         if (cur_seqno == last_seqno &&
1080             time_after(jiffies, seqno_timestamp + HZ)) {
1081             dev_warn(ips->dev,
1082                  "ME failed to update for more than 1s, likely hung\n");
1083         } else {
1084             seqno_timestamp = get_jiffies_64();
1085             last_seqno = cur_seqno;
1086         }
1087 
1088         last_msecs = jiffies_to_msecs(jiffies);
1089         expire = jiffies + msecs_to_jiffies(IPS_SAMPLE_PERIOD);
1090 
1091         __set_current_state(TASK_INTERRUPTIBLE);
1092         mod_timer(&ips->timer, expire);
1093         schedule();
1094 
1095         /* Calculate actual sample period for power averaging */
1096         last_sample_period = jiffies_to_msecs(jiffies) - last_msecs;
1097         if (!last_sample_period)
1098             last_sample_period = 1;
1099     } while (!kthread_should_stop());
1100 
1101     del_timer_sync(&ips->timer);
1102 
1103     dev_dbg(ips->dev, "ips-monitor thread stopped\n");
1104 
1105     return 0;
1106 }
1107 
1108 #if 0
1109 #define THM_DUMPW(reg) \
1110     { \
1111     u16 val = thm_readw(reg); \
1112     dev_dbg(ips->dev, #reg ": 0x%04x\n", val); \
1113     }
1114 #define THM_DUMPL(reg) \
1115     { \
1116     u32 val = thm_readl(reg); \
1117     dev_dbg(ips->dev, #reg ": 0x%08x\n", val); \
1118     }
1119 #define THM_DUMPQ(reg) \
1120     { \
1121     u64 val = thm_readq(reg); \
1122     dev_dbg(ips->dev, #reg ": 0x%016x\n", val); \
1123     }
1124 
1125 static void dump_thermal_info(struct ips_driver *ips)
1126 {
1127     u16 ptl;
1128 
1129     ptl = thm_readw(THM_PTL);
1130     dev_dbg(ips->dev, "Processor temp limit: %d\n", ptl);
1131 
1132     THM_DUMPW(THM_CTA);
1133     THM_DUMPW(THM_TRC);
1134     THM_DUMPW(THM_CTV1);
1135     THM_DUMPL(THM_STS);
1136     THM_DUMPW(THM_PTV);
1137     THM_DUMPQ(THM_MGTV);
1138 }
1139 #endif
1140 
1141 /**
1142  * ips_irq_handler - handle temperature triggers and other IPS events
1143  * @irq: irq number
1144  * @arg: unused
1145  *
1146  * Handle temperature limit trigger events, generally by lowering the clamps.
1147  * If we're at a critical limit, we clamp back to the lowest possible value
1148  * to prevent emergency shutdown.
1149  */
1150 static irqreturn_t ips_irq_handler(int irq, void *arg)
1151 {
1152     struct ips_driver *ips = arg;
1153     u8 tses = thm_readb(THM_TSES);
1154     u8 tes = thm_readb(THM_TES);
1155 
1156     if (!tses && !tes)
1157         return IRQ_NONE;
1158 
1159     dev_info(ips->dev, "TSES: 0x%02x\n", tses);
1160     dev_info(ips->dev, "TES: 0x%02x\n", tes);
1161 
1162     /* STS update from EC? */
1163     if (tes & 1) {
1164         u32 sts, tc1;
1165 
1166         sts = thm_readl(THM_STS);
1167         tc1 = thm_readl(THM_TC1);
1168 
1169         if (sts & STS_NVV) {
1170             spin_lock(&ips->turbo_status_lock);
1171             ips->core_power_limit = (sts & STS_PCPL_MASK) >>
1172                 STS_PCPL_SHIFT;
1173             ips->mch_power_limit = (sts & STS_GPL_MASK) >>
1174                 STS_GPL_SHIFT;
1175             /* ignore EC CPU vs GPU pref */
1176             ips->cpu_turbo_enabled = !(sts & STS_PCTD_DIS);
1177             /* 
1178              * Disable turbo for now, until we can figure
1179              * out why the power figures are wrong
1180              */
1181             ips->cpu_turbo_enabled = false;
1182             if (ips->gpu_busy)
1183                 ips->gpu_turbo_enabled = !(sts & STS_GTD_DIS);
1184             ips->mcp_temp_limit = (sts & STS_PTL_MASK) >>
1185                 STS_PTL_SHIFT;
1186             ips->mcp_power_limit = (tc1 & STS_PPL_MASK) >>
1187                 STS_PPL_SHIFT;
1188             verify_limits(ips);
1189             spin_unlock(&ips->turbo_status_lock);
1190 
1191             thm_writeb(THM_SEC, SEC_ACK);
1192         }
1193         thm_writeb(THM_TES, tes);
1194     }
1195 
1196     /* Thermal trip */
1197     if (tses) {
1198         dev_warn(ips->dev, "thermal trip occurred, tses: 0x%04x\n",
1199              tses);
1200         thm_writeb(THM_TSES, tses);
1201     }
1202 
1203     return IRQ_HANDLED;
1204 }
1205 
1206 #ifndef CONFIG_DEBUG_FS
1207 static void ips_debugfs_init(struct ips_driver *ips) { return; }
1208 static void ips_debugfs_cleanup(struct ips_driver *ips) { return; }
1209 #else
1210 
1211 /* Expose current state and limits in debugfs if possible */
1212 
1213 static int cpu_temp_show(struct seq_file *m, void *data)
1214 {
1215     struct ips_driver *ips = m->private;
1216 
1217     seq_printf(m, "%d.%02d\n", ips->ctv1_avg_temp / 100,
1218            ips->ctv1_avg_temp % 100);
1219 
1220     return 0;
1221 }
1222 DEFINE_SHOW_ATTRIBUTE(cpu_temp);
1223 
1224 static int cpu_power_show(struct seq_file *m, void *data)
1225 {
1226     struct ips_driver *ips = m->private;
1227 
1228     seq_printf(m, "%dmW\n", ips->cpu_avg_power);
1229 
1230     return 0;
1231 }
1232 DEFINE_SHOW_ATTRIBUTE(cpu_power);
1233 
1234 static int cpu_clamp_show(struct seq_file *m, void *data)
1235 {
1236     u64 turbo_override;
1237     int tdp, tdc;
1238 
1239     rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
1240 
1241     tdp = (int)(turbo_override & TURBO_TDP_MASK);
1242     tdc = (int)((turbo_override & TURBO_TDC_MASK) >> TURBO_TDC_SHIFT);
1243 
1244     /* Convert to .1W/A units */
1245     tdp = tdp * 10 / 8;
1246     tdc = tdc * 10 / 8;
1247 
1248     /* Watts Amperes */
1249     seq_printf(m, "%d.%dW %d.%dA\n", tdp / 10, tdp % 10,
1250            tdc / 10, tdc % 10);
1251 
1252     return 0;
1253 }
1254 DEFINE_SHOW_ATTRIBUTE(cpu_clamp);
1255 
1256 static int mch_temp_show(struct seq_file *m, void *data)
1257 {
1258     struct ips_driver *ips = m->private;
1259 
1260     seq_printf(m, "%d.%02d\n", ips->mch_avg_temp / 100,
1261            ips->mch_avg_temp % 100);
1262 
1263     return 0;
1264 }
1265 DEFINE_SHOW_ATTRIBUTE(mch_temp);
1266 
1267 static int mch_power_show(struct seq_file *m, void *data)
1268 {
1269     struct ips_driver *ips = m->private;
1270 
1271     seq_printf(m, "%dmW\n", ips->mch_avg_power);
1272 
1273     return 0;
1274 }
1275 DEFINE_SHOW_ATTRIBUTE(mch_power);
1276 
1277 static void ips_debugfs_cleanup(struct ips_driver *ips)
1278 {
1279     debugfs_remove_recursive(ips->debug_root);
1280 }
1281 
1282 static void ips_debugfs_init(struct ips_driver *ips)
1283 {
1284     ips->debug_root = debugfs_create_dir("ips", NULL);
1285 
1286     debugfs_create_file("cpu_temp", 0444, ips->debug_root, ips, &cpu_temp_fops);
1287     debugfs_create_file("cpu_power", 0444, ips->debug_root, ips, &cpu_power_fops);
1288     debugfs_create_file("cpu_clamp", 0444, ips->debug_root, ips, &cpu_clamp_fops);
1289     debugfs_create_file("mch_temp", 0444, ips->debug_root, ips, &mch_temp_fops);
1290     debugfs_create_file("mch_power", 0444, ips->debug_root, ips, &mch_power_fops);
1291 }
1292 #endif /* CONFIG_DEBUG_FS */
1293 
1294 /**
1295  * ips_detect_cpu - detect whether CPU supports IPS
1296  *
1297  * Walk our list and see if we're on a supported CPU.  If we find one,
1298  * return the limits for it.
1299  */
1300 static struct ips_mcp_limits *ips_detect_cpu(struct ips_driver *ips)
1301 {
1302     u64 turbo_power, misc_en;
1303     struct ips_mcp_limits *limits = NULL;
1304     u16 tdp;
1305 
1306     if (!(boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 37)) {
1307         dev_info(ips->dev, "Non-IPS CPU detected.\n");
1308         return NULL;
1309     }
1310 
1311     rdmsrl(IA32_MISC_ENABLE, misc_en);
1312     /*
1313      * If the turbo enable bit isn't set, we shouldn't try to enable/disable
1314      * turbo manually or we'll get an illegal MSR access, even though
1315      * turbo will still be available.
1316      */
1317     if (misc_en & IA32_MISC_TURBO_EN)
1318         ips->turbo_toggle_allowed = true;
1319     else
1320         ips->turbo_toggle_allowed = false;
1321 
1322     if (strstr(boot_cpu_data.x86_model_id, "CPU       M"))
1323         limits = &ips_sv_limits;
1324     else if (strstr(boot_cpu_data.x86_model_id, "CPU       L"))
1325         limits = &ips_lv_limits;
1326     else if (strstr(boot_cpu_data.x86_model_id, "CPU       U"))
1327         limits = &ips_ulv_limits;
1328     else {
1329         dev_info(ips->dev, "No CPUID match found.\n");
1330         return NULL;
1331     }
1332 
1333     rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_power);
1334     tdp = turbo_power & TURBO_TDP_MASK;
1335 
1336     /* Sanity check TDP against CPU */
1337     if (limits->core_power_limit != (tdp / 8) * 1000) {
1338         dev_info(ips->dev,
1339              "CPU TDP doesn't match expected value (found %d, expected %d)\n",
1340              tdp / 8, limits->core_power_limit / 1000);
1341         limits->core_power_limit = (tdp / 8) * 1000;
1342     }
1343 
1344     return limits;
1345 }
1346 
1347 /**
1348  * ips_get_i915_syms - try to get GPU control methods from i915 driver
1349  * @ips: IPS driver
1350  *
1351  * The i915 driver exports several interfaces to allow the IPS driver to
1352  * monitor and control graphics turbo mode.  If we can find them, we can
1353  * enable graphics turbo, otherwise we must disable it to avoid exceeding
1354  * thermal and power limits in the MCP.
1355  */
1356 static bool ips_get_i915_syms(struct ips_driver *ips)
1357 {
1358     ips->read_mch_val = symbol_get(i915_read_mch_val);
1359     if (!ips->read_mch_val)
1360         goto out_err;
1361     ips->gpu_raise = symbol_get(i915_gpu_raise);
1362     if (!ips->gpu_raise)
1363         goto out_put_mch;
1364     ips->gpu_lower = symbol_get(i915_gpu_lower);
1365     if (!ips->gpu_lower)
1366         goto out_put_raise;
1367     ips->gpu_busy = symbol_get(i915_gpu_busy);
1368     if (!ips->gpu_busy)
1369         goto out_put_lower;
1370     ips->gpu_turbo_disable = symbol_get(i915_gpu_turbo_disable);
1371     if (!ips->gpu_turbo_disable)
1372         goto out_put_busy;
1373 
1374     return true;
1375 
1376 out_put_busy:
1377     symbol_put(i915_gpu_busy);
1378 out_put_lower:
1379     symbol_put(i915_gpu_lower);
1380 out_put_raise:
1381     symbol_put(i915_gpu_raise);
1382 out_put_mch:
1383     symbol_put(i915_read_mch_val);
1384 out_err:
1385     return false;
1386 }
1387 
1388 static bool
1389 ips_gpu_turbo_enabled(struct ips_driver *ips)
1390 {
1391     if (!ips->gpu_busy && late_i915_load) {
1392         if (ips_get_i915_syms(ips)) {
1393             dev_info(ips->dev,
1394                  "i915 driver attached, reenabling gpu turbo\n");
1395             ips->gpu_turbo_enabled = !(thm_readl(THM_HTS) & HTS_GTD_DIS);
1396         }
1397     }
1398 
1399     return ips->gpu_turbo_enabled;
1400 }
1401 
1402 void
1403 ips_link_to_i915_driver(void)
1404 {
1405     /* We can't cleanly get at the various ips_driver structs from
1406      * this caller (the i915 driver), so just set a flag saying
1407      * that it's time to try getting the symbols again.
1408      */
1409     late_i915_load = true;
1410 }
1411 EXPORT_SYMBOL_GPL(ips_link_to_i915_driver);
1412 
1413 static const struct pci_device_id ips_id_table[] = {
1414     { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_THERMAL_SENSOR), },
1415     { 0, }
1416 };
1417 
1418 MODULE_DEVICE_TABLE(pci, ips_id_table);
1419 
1420 static int ips_blacklist_callback(const struct dmi_system_id *id)
1421 {
1422     pr_info("Blacklisted intel_ips for %s\n", id->ident);
1423     return 1;
1424 }
1425 
1426 static const struct dmi_system_id ips_blacklist[] = {
1427     {
1428         .callback = ips_blacklist_callback,
1429         .ident = "HP ProBook",
1430         .matches = {
1431             DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
1432             DMI_MATCH(DMI_PRODUCT_NAME, "HP ProBook"),
1433         },
1434     },
1435     { } /* terminating entry */
1436 };
1437 
1438 static int ips_probe(struct pci_dev *dev, const struct pci_device_id *id)
1439 {
1440     u64 platform_info;
1441     struct ips_driver *ips;
1442     u32 hts;
1443     int ret = 0;
1444     u16 htshi, trc, trc_required_mask;
1445     u8 tse;
1446 
1447     if (dmi_check_system(ips_blacklist))
1448         return -ENODEV;
1449 
1450     ips = devm_kzalloc(&dev->dev, sizeof(*ips), GFP_KERNEL);
1451     if (!ips)
1452         return -ENOMEM;
1453 
1454     spin_lock_init(&ips->turbo_status_lock);
1455     ips->dev = &dev->dev;
1456 
1457     ips->limits = ips_detect_cpu(ips);
1458     if (!ips->limits) {
1459         dev_info(&dev->dev, "IPS not supported on this CPU\n");
1460         return -ENXIO;
1461     }
1462 
1463     ret = pcim_enable_device(dev);
1464     if (ret) {
1465         dev_err(&dev->dev, "can't enable PCI device, aborting\n");
1466         return ret;
1467     }
1468 
1469     ret = pcim_iomap_regions(dev, 1 << 0, pci_name(dev));
1470     if (ret) {
1471         dev_err(&dev->dev, "failed to map thermal regs, aborting\n");
1472         return ret;
1473     }
1474     ips->regmap = pcim_iomap_table(dev)[0];
1475 
1476     pci_set_drvdata(dev, ips);
1477 
1478     tse = thm_readb(THM_TSE);
1479     if (tse != TSE_EN) {
1480         dev_err(&dev->dev, "thermal device not enabled (0x%02x), aborting\n", tse);
1481         return -ENXIO;
1482     }
1483 
1484     trc = thm_readw(THM_TRC);
1485     trc_required_mask = TRC_CORE1_EN | TRC_CORE_PWR | TRC_MCH_EN;
1486     if ((trc & trc_required_mask) != trc_required_mask) {
1487         dev_err(&dev->dev, "thermal reporting for required devices not enabled, aborting\n");
1488         return -ENXIO;
1489     }
1490 
1491     if (trc & TRC_CORE2_EN)
1492         ips->second_cpu = true;
1493 
1494     update_turbo_limits(ips);
1495     dev_dbg(&dev->dev, "max cpu power clamp: %dW\n",
1496         ips->mcp_power_limit / 10);
1497     dev_dbg(&dev->dev, "max core power clamp: %dW\n",
1498         ips->core_power_limit / 10);
1499     /* BIOS may update limits at runtime */
1500     if (thm_readl(THM_PSC) & PSP_PBRT)
1501         ips->poll_turbo_status = true;
1502 
1503     if (!ips_get_i915_syms(ips)) {
1504         dev_info(&dev->dev, "failed to get i915 symbols, graphics turbo disabled until i915 loads\n");
1505         ips->gpu_turbo_enabled = false;
1506     } else {
1507         dev_dbg(&dev->dev, "graphics turbo enabled\n");
1508         ips->gpu_turbo_enabled = true;
1509     }
1510 
1511     /*
1512      * Check PLATFORM_INFO MSR to make sure this chip is
1513      * turbo capable.
1514      */
1515     rdmsrl(PLATFORM_INFO, platform_info);
1516     if (!(platform_info & PLATFORM_TDP)) {
1517         dev_err(&dev->dev, "platform indicates TDP override unavailable, aborting\n");
1518         return -ENODEV;
1519     }
1520 
1521     /*
1522      * IRQ handler for ME interaction
1523      * Note: don't use MSI here as the PCH has bugs.
1524      */
1525     ret = pci_alloc_irq_vectors(dev, 1, 1, PCI_IRQ_LEGACY);
1526     if (ret < 0)
1527         return ret;
1528 
1529     ips->irq = pci_irq_vector(dev, 0);
1530 
1531     ret = request_irq(ips->irq, ips_irq_handler, IRQF_SHARED, "ips", ips);
1532     if (ret) {
1533         dev_err(&dev->dev, "request irq failed, aborting\n");
1534         return ret;
1535     }
1536 
1537     /* Enable aux, hot & critical interrupts */
1538     thm_writeb(THM_TSPIEN, TSPIEN_AUX2_LOHI | TSPIEN_CRIT_LOHI |
1539            TSPIEN_HOT_LOHI | TSPIEN_AUX_LOHI);
1540     thm_writeb(THM_TEN, TEN_UPDATE_EN);
1541 
1542     /* Collect adjustment values */
1543     ips->cta_val = thm_readw(THM_CTA);
1544     ips->pta_val = thm_readw(THM_PTA);
1545     ips->mgta_val = thm_readw(THM_MGTA);
1546 
1547     /* Save turbo limits & ratios */
1548     rdmsrl(TURBO_POWER_CURRENT_LIMIT, ips->orig_turbo_limit);
1549 
1550     ips_disable_cpu_turbo(ips);
1551     ips->cpu_turbo_enabled = false;
1552 
1553     /* Create thermal adjust thread */
1554     ips->adjust = kthread_create(ips_adjust, ips, "ips-adjust");
1555     if (IS_ERR(ips->adjust)) {
1556         dev_err(&dev->dev,
1557             "failed to create thermal adjust thread, aborting\n");
1558         ret = -ENOMEM;
1559         goto error_free_irq;
1560 
1561     }
1562 
1563     /*
1564      * Set up the work queue and monitor thread. The monitor thread
1565      * will wake up ips_adjust thread.
1566      */
1567     ips->monitor = kthread_run(ips_monitor, ips, "ips-monitor");
1568     if (IS_ERR(ips->monitor)) {
1569         dev_err(&dev->dev,
1570             "failed to create thermal monitor thread, aborting\n");
1571         ret = -ENOMEM;
1572         goto error_thread_cleanup;
1573     }
1574 
1575     hts = (ips->core_power_limit << HTS_PCPL_SHIFT) |
1576         (ips->mcp_temp_limit << HTS_PTL_SHIFT) | HTS_NVV;
1577     htshi = HTS2_PRST_RUNNING << HTS2_PRST_SHIFT;
1578 
1579     thm_writew(THM_HTSHI, htshi);
1580     thm_writel(THM_HTS, hts);
1581 
1582     ips_debugfs_init(ips);
1583 
1584     dev_info(&dev->dev, "IPS driver initialized, MCP temp limit %d\n",
1585          ips->mcp_temp_limit);
1586     return ret;
1587 
1588 error_thread_cleanup:
1589     kthread_stop(ips->adjust);
1590 error_free_irq:
1591     free_irq(ips->irq, ips);
1592     pci_free_irq_vectors(dev);
1593     return ret;
1594 }
1595 
1596 static void ips_remove(struct pci_dev *dev)
1597 {
1598     struct ips_driver *ips = pci_get_drvdata(dev);
1599     u64 turbo_override;
1600 
1601     ips_debugfs_cleanup(ips);
1602 
1603     /* Release i915 driver */
1604     if (ips->read_mch_val)
1605         symbol_put(i915_read_mch_val);
1606     if (ips->gpu_raise)
1607         symbol_put(i915_gpu_raise);
1608     if (ips->gpu_lower)
1609         symbol_put(i915_gpu_lower);
1610     if (ips->gpu_busy)
1611         symbol_put(i915_gpu_busy);
1612     if (ips->gpu_turbo_disable)
1613         symbol_put(i915_gpu_turbo_disable);
1614 
1615     rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
1616     turbo_override &= ~(TURBO_TDC_OVR_EN | TURBO_TDP_OVR_EN);
1617     wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
1618     wrmsrl(TURBO_POWER_CURRENT_LIMIT, ips->orig_turbo_limit);
1619 
1620     free_irq(ips->irq, ips);
1621     pci_free_irq_vectors(dev);
1622     if (ips->adjust)
1623         kthread_stop(ips->adjust);
1624     if (ips->monitor)
1625         kthread_stop(ips->monitor);
1626     dev_dbg(&dev->dev, "IPS driver removed\n");
1627 }
1628 
1629 static struct pci_driver ips_pci_driver = {
1630     .name = "intel ips",
1631     .id_table = ips_id_table,
1632     .probe = ips_probe,
1633     .remove = ips_remove,
1634 };
1635 
1636 module_pci_driver(ips_pci_driver);
1637 
1638 MODULE_LICENSE("GPL v2");
1639 MODULE_AUTHOR("Jesse Barnes <jbarnes@virtuousgeek.org>");
1640 MODULE_DESCRIPTION("Intelligent Power Sharing Driver");