Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  * k10temp.c - AMD Family 10h/11h/12h/14h/15h/16h/17h
0004  *      processor hardware monitoring
0005  *
0006  * Copyright (c) 2009 Clemens Ladisch <clemens@ladisch.de>
0007  * Copyright (c) 2020 Guenter Roeck <linux@roeck-us.net>
0008  *
0009  * Implementation notes:
0010  * - CCD register address information as well as the calculation to
0011  *   convert raw register values is from https://github.com/ocerman/zenpower.
0012  *   The information is not confirmed from chip datasheets, but experiments
0013  *   suggest that it provides reasonable temperature values.
0014  */
0015 
0016 #include <linux/bitops.h>
0017 #include <linux/err.h>
0018 #include <linux/hwmon.h>
0019 #include <linux/init.h>
0020 #include <linux/module.h>
0021 #include <linux/pci.h>
0022 #include <linux/pci_ids.h>
0023 #include <asm/amd_nb.h>
0024 #include <asm/processor.h>
0025 
0026 MODULE_DESCRIPTION("AMD Family 10h+ CPU core temperature monitor");
0027 MODULE_AUTHOR("Clemens Ladisch <clemens@ladisch.de>");
0028 MODULE_LICENSE("GPL");
0029 
0030 static bool force;
0031 module_param(force, bool, 0444);
0032 MODULE_PARM_DESC(force, "force loading on processors with erratum 319");
0033 
0034 /* Provide lock for writing to NB_SMU_IND_ADDR */
0035 static DEFINE_MUTEX(nb_smu_ind_mutex);
0036 
0037 #ifndef PCI_DEVICE_ID_AMD_15H_M70H_NB_F3
0038 #define PCI_DEVICE_ID_AMD_15H_M70H_NB_F3    0x15b3
0039 #endif
0040 
0041 /* CPUID function 0x80000001, ebx */
0042 #define CPUID_PKGTYPE_MASK  GENMASK(31, 28)
0043 #define CPUID_PKGTYPE_F     0x00000000
0044 #define CPUID_PKGTYPE_AM2R2_AM3 0x10000000
0045 
0046 /* DRAM controller (PCI function 2) */
0047 #define REG_DCT0_CONFIG_HIGH        0x094
0048 #define  DDR3_MODE          BIT(8)
0049 
0050 /* miscellaneous (PCI function 3) */
0051 #define REG_HARDWARE_THERMAL_CONTROL    0x64
0052 #define  HTC_ENABLE         BIT(0)
0053 
0054 #define REG_REPORTED_TEMPERATURE    0xa4
0055 
0056 #define REG_NORTHBRIDGE_CAPABILITIES    0xe8
0057 #define  NB_CAP_HTC         BIT(10)
0058 
0059 /*
0060  * For F15h M60h and M70h, REG_HARDWARE_THERMAL_CONTROL
0061  * and REG_REPORTED_TEMPERATURE have been moved to
0062  * D0F0xBC_xD820_0C64 [Hardware Temperature Control]
0063  * D0F0xBC_xD820_0CA4 [Reported Temperature Control]
0064  */
0065 #define F15H_M60H_HARDWARE_TEMP_CTRL_OFFSET 0xd8200c64
0066 #define F15H_M60H_REPORTED_TEMP_CTRL_OFFSET 0xd8200ca4
0067 
0068 /* Common for Zen CPU families (Family 17h and 18h and 19h) */
0069 #define ZEN_REPORTED_TEMP_CTRL_BASE     0x00059800
0070 
0071 #define ZEN_CCD_TEMP(offset, x)         (ZEN_REPORTED_TEMP_CTRL_BASE + \
0072                          (offset) + ((x) * 4))
0073 #define ZEN_CCD_TEMP_VALID          BIT(11)
0074 #define ZEN_CCD_TEMP_MASK           GENMASK(10, 0)
0075 
0076 #define ZEN_CUR_TEMP_SHIFT          21
0077 #define ZEN_CUR_TEMP_RANGE_SEL_MASK     BIT(19)
0078 
0079 struct k10temp_data {
0080     struct pci_dev *pdev;
0081     void (*read_htcreg)(struct pci_dev *pdev, u32 *regval);
0082     void (*read_tempreg)(struct pci_dev *pdev, u32 *regval);
0083     int temp_offset;
0084     u32 temp_adjust_mask;
0085     u32 show_temp;
0086     bool is_zen;
0087     u32 ccd_offset;
0088 };
0089 
0090 #define TCTL_BIT    0
0091 #define TDIE_BIT    1
0092 #define TCCD_BIT(x) ((x) + 2)
0093 
0094 #define HAVE_TEMP(d, channel)   ((d)->show_temp & BIT(channel))
0095 #define HAVE_TDIE(d)        HAVE_TEMP(d, TDIE_BIT)
0096 
0097 struct tctl_offset {
0098     u8 model;
0099     char const *id;
0100     int offset;
0101 };
0102 
0103 static const struct tctl_offset tctl_offset_table[] = {
0104     { 0x17, "AMD Ryzen 5 1600X", 20000 },
0105     { 0x17, "AMD Ryzen 7 1700X", 20000 },
0106     { 0x17, "AMD Ryzen 7 1800X", 20000 },
0107     { 0x17, "AMD Ryzen 7 2700X", 10000 },
0108     { 0x17, "AMD Ryzen Threadripper 19", 27000 }, /* 19{00,20,50}X */
0109     { 0x17, "AMD Ryzen Threadripper 29", 27000 }, /* 29{20,50,70,90}[W]X */
0110 };
0111 
0112 static void read_htcreg_pci(struct pci_dev *pdev, u32 *regval)
0113 {
0114     pci_read_config_dword(pdev, REG_HARDWARE_THERMAL_CONTROL, regval);
0115 }
0116 
0117 static void read_tempreg_pci(struct pci_dev *pdev, u32 *regval)
0118 {
0119     pci_read_config_dword(pdev, REG_REPORTED_TEMPERATURE, regval);
0120 }
0121 
0122 static void amd_nb_index_read(struct pci_dev *pdev, unsigned int devfn,
0123                   unsigned int base, int offset, u32 *val)
0124 {
0125     mutex_lock(&nb_smu_ind_mutex);
0126     pci_bus_write_config_dword(pdev->bus, devfn,
0127                    base, offset);
0128     pci_bus_read_config_dword(pdev->bus, devfn,
0129                   base + 4, val);
0130     mutex_unlock(&nb_smu_ind_mutex);
0131 }
0132 
0133 static void read_htcreg_nb_f15(struct pci_dev *pdev, u32 *regval)
0134 {
0135     amd_nb_index_read(pdev, PCI_DEVFN(0, 0), 0xb8,
0136               F15H_M60H_HARDWARE_TEMP_CTRL_OFFSET, regval);
0137 }
0138 
0139 static void read_tempreg_nb_f15(struct pci_dev *pdev, u32 *regval)
0140 {
0141     amd_nb_index_read(pdev, PCI_DEVFN(0, 0), 0xb8,
0142               F15H_M60H_REPORTED_TEMP_CTRL_OFFSET, regval);
0143 }
0144 
0145 static void read_tempreg_nb_zen(struct pci_dev *pdev, u32 *regval)
0146 {
0147     amd_smn_read(amd_pci_dev_to_node_id(pdev),
0148              ZEN_REPORTED_TEMP_CTRL_BASE, regval);
0149 }
0150 
0151 static long get_raw_temp(struct k10temp_data *data)
0152 {
0153     u32 regval;
0154     long temp;
0155 
0156     data->read_tempreg(data->pdev, &regval);
0157     temp = (regval >> ZEN_CUR_TEMP_SHIFT) * 125;
0158     if (regval & data->temp_adjust_mask)
0159         temp -= 49000;
0160     return temp;
0161 }
0162 
0163 static const char *k10temp_temp_label[] = {
0164     "Tctl",
0165     "Tdie",
0166     "Tccd1",
0167     "Tccd2",
0168     "Tccd3",
0169     "Tccd4",
0170     "Tccd5",
0171     "Tccd6",
0172     "Tccd7",
0173     "Tccd8",
0174     "Tccd9",
0175     "Tccd10",
0176     "Tccd11",
0177     "Tccd12",
0178 };
0179 
0180 static int k10temp_read_labels(struct device *dev,
0181                    enum hwmon_sensor_types type,
0182                    u32 attr, int channel, const char **str)
0183 {
0184     switch (type) {
0185     case hwmon_temp:
0186         *str = k10temp_temp_label[channel];
0187         break;
0188     default:
0189         return -EOPNOTSUPP;
0190     }
0191     return 0;
0192 }
0193 
0194 static int k10temp_read_temp(struct device *dev, u32 attr, int channel,
0195                  long *val)
0196 {
0197     struct k10temp_data *data = dev_get_drvdata(dev);
0198     u32 regval;
0199 
0200     switch (attr) {
0201     case hwmon_temp_input:
0202         switch (channel) {
0203         case 0:     /* Tctl */
0204             *val = get_raw_temp(data);
0205             if (*val < 0)
0206                 *val = 0;
0207             break;
0208         case 1:     /* Tdie */
0209             *val = get_raw_temp(data) - data->temp_offset;
0210             if (*val < 0)
0211                 *val = 0;
0212             break;
0213         case 2 ... 13:      /* Tccd{1-12} */
0214             amd_smn_read(amd_pci_dev_to_node_id(data->pdev),
0215                      ZEN_CCD_TEMP(data->ccd_offset, channel - 2),
0216                           &regval);
0217             *val = (regval & ZEN_CCD_TEMP_MASK) * 125 - 49000;
0218             break;
0219         default:
0220             return -EOPNOTSUPP;
0221         }
0222         break;
0223     case hwmon_temp_max:
0224         *val = 70 * 1000;
0225         break;
0226     case hwmon_temp_crit:
0227         data->read_htcreg(data->pdev, &regval);
0228         *val = ((regval >> 16) & 0x7f) * 500 + 52000;
0229         break;
0230     case hwmon_temp_crit_hyst:
0231         data->read_htcreg(data->pdev, &regval);
0232         *val = (((regval >> 16) & 0x7f)
0233             - ((regval >> 24) & 0xf)) * 500 + 52000;
0234         break;
0235     default:
0236         return -EOPNOTSUPP;
0237     }
0238     return 0;
0239 }
0240 
0241 static int k10temp_read(struct device *dev, enum hwmon_sensor_types type,
0242             u32 attr, int channel, long *val)
0243 {
0244     switch (type) {
0245     case hwmon_temp:
0246         return k10temp_read_temp(dev, attr, channel, val);
0247     default:
0248         return -EOPNOTSUPP;
0249     }
0250 }
0251 
0252 static umode_t k10temp_is_visible(const void *_data,
0253                   enum hwmon_sensor_types type,
0254                   u32 attr, int channel)
0255 {
0256     const struct k10temp_data *data = _data;
0257     struct pci_dev *pdev = data->pdev;
0258     u32 reg;
0259 
0260     switch (type) {
0261     case hwmon_temp:
0262         switch (attr) {
0263         case hwmon_temp_input:
0264             if (!HAVE_TEMP(data, channel))
0265                 return 0;
0266             break;
0267         case hwmon_temp_max:
0268             if (channel || data->is_zen)
0269                 return 0;
0270             break;
0271         case hwmon_temp_crit:
0272         case hwmon_temp_crit_hyst:
0273             if (channel || !data->read_htcreg)
0274                 return 0;
0275 
0276             pci_read_config_dword(pdev,
0277                           REG_NORTHBRIDGE_CAPABILITIES,
0278                           &reg);
0279             if (!(reg & NB_CAP_HTC))
0280                 return 0;
0281 
0282             data->read_htcreg(data->pdev, &reg);
0283             if (!(reg & HTC_ENABLE))
0284                 return 0;
0285             break;
0286         case hwmon_temp_label:
0287             /* Show temperature labels only on Zen CPUs */
0288             if (!data->is_zen || !HAVE_TEMP(data, channel))
0289                 return 0;
0290             break;
0291         default:
0292             return 0;
0293         }
0294         break;
0295     default:
0296         return 0;
0297     }
0298     return 0444;
0299 }
0300 
0301 static bool has_erratum_319(struct pci_dev *pdev)
0302 {
0303     u32 pkg_type, reg_dram_cfg;
0304 
0305     if (boot_cpu_data.x86 != 0x10)
0306         return false;
0307 
0308     /*
0309      * Erratum 319: The thermal sensor of Socket F/AM2+ processors
0310      *              may be unreliable.
0311      */
0312     pkg_type = cpuid_ebx(0x80000001) & CPUID_PKGTYPE_MASK;
0313     if (pkg_type == CPUID_PKGTYPE_F)
0314         return true;
0315     if (pkg_type != CPUID_PKGTYPE_AM2R2_AM3)
0316         return false;
0317 
0318     /* DDR3 memory implies socket AM3, which is good */
0319     pci_bus_read_config_dword(pdev->bus,
0320                   PCI_DEVFN(PCI_SLOT(pdev->devfn), 2),
0321                   REG_DCT0_CONFIG_HIGH, &reg_dram_cfg);
0322     if (reg_dram_cfg & DDR3_MODE)
0323         return false;
0324 
0325     /*
0326      * Unfortunately it is possible to run a socket AM3 CPU with DDR2
0327      * memory. We blacklist all the cores which do exist in socket AM2+
0328      * format. It still isn't perfect, as RB-C2 cores exist in both AM2+
0329      * and AM3 formats, but that's the best we can do.
0330      */
0331     return boot_cpu_data.x86_model < 4 ||
0332            (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_stepping <= 2);
0333 }
0334 
0335 static const struct hwmon_channel_info *k10temp_info[] = {
0336     HWMON_CHANNEL_INFO(temp,
0337                HWMON_T_INPUT | HWMON_T_MAX |
0338                HWMON_T_CRIT | HWMON_T_CRIT_HYST |
0339                HWMON_T_LABEL,
0340                HWMON_T_INPUT | HWMON_T_LABEL,
0341                HWMON_T_INPUT | HWMON_T_LABEL,
0342                HWMON_T_INPUT | HWMON_T_LABEL,
0343                HWMON_T_INPUT | HWMON_T_LABEL,
0344                HWMON_T_INPUT | HWMON_T_LABEL,
0345                HWMON_T_INPUT | HWMON_T_LABEL,
0346                HWMON_T_INPUT | HWMON_T_LABEL,
0347                HWMON_T_INPUT | HWMON_T_LABEL,
0348                HWMON_T_INPUT | HWMON_T_LABEL,
0349                HWMON_T_INPUT | HWMON_T_LABEL,
0350                HWMON_T_INPUT | HWMON_T_LABEL,
0351                HWMON_T_INPUT | HWMON_T_LABEL,
0352                HWMON_T_INPUT | HWMON_T_LABEL),
0353     NULL
0354 };
0355 
0356 static const struct hwmon_ops k10temp_hwmon_ops = {
0357     .is_visible = k10temp_is_visible,
0358     .read = k10temp_read,
0359     .read_string = k10temp_read_labels,
0360 };
0361 
0362 static const struct hwmon_chip_info k10temp_chip_info = {
0363     .ops = &k10temp_hwmon_ops,
0364     .info = k10temp_info,
0365 };
0366 
0367 static void k10temp_get_ccd_support(struct pci_dev *pdev,
0368                     struct k10temp_data *data, int limit)
0369 {
0370     u32 regval;
0371     int i;
0372 
0373     for (i = 0; i < limit; i++) {
0374         amd_smn_read(amd_pci_dev_to_node_id(pdev),
0375                  ZEN_CCD_TEMP(data->ccd_offset, i), &regval);
0376         if (regval & ZEN_CCD_TEMP_VALID)
0377             data->show_temp |= BIT(TCCD_BIT(i));
0378     }
0379 }
0380 
0381 static int k10temp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
0382 {
0383     int unreliable = has_erratum_319(pdev);
0384     struct device *dev = &pdev->dev;
0385     struct k10temp_data *data;
0386     struct device *hwmon_dev;
0387     int i;
0388 
0389     if (unreliable) {
0390         if (!force) {
0391             dev_err(dev,
0392                 "unreliable CPU thermal sensor; monitoring disabled\n");
0393             return -ENODEV;
0394         }
0395         dev_warn(dev,
0396              "unreliable CPU thermal sensor; check erratum 319\n");
0397     }
0398 
0399     data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
0400     if (!data)
0401         return -ENOMEM;
0402 
0403     data->pdev = pdev;
0404     data->show_temp |= BIT(TCTL_BIT);   /* Always show Tctl */
0405 
0406     if (boot_cpu_data.x86 == 0x15 &&
0407         ((boot_cpu_data.x86_model & 0xf0) == 0x60 ||
0408          (boot_cpu_data.x86_model & 0xf0) == 0x70)) {
0409         data->read_htcreg = read_htcreg_nb_f15;
0410         data->read_tempreg = read_tempreg_nb_f15;
0411     } else if (boot_cpu_data.x86 == 0x17 || boot_cpu_data.x86 == 0x18) {
0412         data->temp_adjust_mask = ZEN_CUR_TEMP_RANGE_SEL_MASK;
0413         data->read_tempreg = read_tempreg_nb_zen;
0414         data->is_zen = true;
0415 
0416         switch (boot_cpu_data.x86_model) {
0417         case 0x1:   /* Zen */
0418         case 0x8:   /* Zen+ */
0419         case 0x11:  /* Zen APU */
0420         case 0x18:  /* Zen+ APU */
0421             data->ccd_offset = 0x154;
0422             k10temp_get_ccd_support(pdev, data, 4);
0423             break;
0424         case 0x31:  /* Zen2 Threadripper */
0425         case 0x60:  /* Renoir */
0426         case 0x68:  /* Lucienne */
0427         case 0x71:  /* Zen2 */
0428             data->ccd_offset = 0x154;
0429             k10temp_get_ccd_support(pdev, data, 8);
0430             break;
0431         case 0xa0 ... 0xaf:
0432             data->ccd_offset = 0x300;
0433             k10temp_get_ccd_support(pdev, data, 8);
0434             break;
0435         }
0436     } else if (boot_cpu_data.x86 == 0x19) {
0437         data->temp_adjust_mask = ZEN_CUR_TEMP_RANGE_SEL_MASK;
0438         data->read_tempreg = read_tempreg_nb_zen;
0439         data->is_zen = true;
0440 
0441         switch (boot_cpu_data.x86_model) {
0442         case 0x0 ... 0x1:   /* Zen3 SP3/TR */
0443         case 0x21:      /* Zen3 Ryzen Desktop */
0444         case 0x50 ... 0x5f: /* Green Sardine */
0445             data->ccd_offset = 0x154;
0446             k10temp_get_ccd_support(pdev, data, 8);
0447             break;
0448         case 0x40 ... 0x4f: /* Yellow Carp */
0449             data->ccd_offset = 0x300;
0450             k10temp_get_ccd_support(pdev, data, 8);
0451             break;
0452         case 0x60 ... 0x6f:
0453         case 0x70 ... 0x7f:
0454             data->ccd_offset = 0x308;
0455             k10temp_get_ccd_support(pdev, data, 8);
0456             break;
0457         case 0x10 ... 0x1f:
0458         case 0xa0 ... 0xaf:
0459             data->ccd_offset = 0x300;
0460             k10temp_get_ccd_support(pdev, data, 12);
0461             break;
0462         }
0463     } else {
0464         data->read_htcreg = read_htcreg_pci;
0465         data->read_tempreg = read_tempreg_pci;
0466     }
0467 
0468     for (i = 0; i < ARRAY_SIZE(tctl_offset_table); i++) {
0469         const struct tctl_offset *entry = &tctl_offset_table[i];
0470 
0471         if (boot_cpu_data.x86 == entry->model &&
0472             strstr(boot_cpu_data.x86_model_id, entry->id)) {
0473             data->show_temp |= BIT(TDIE_BIT);   /* show Tdie */
0474             data->temp_offset = entry->offset;
0475             break;
0476         }
0477     }
0478 
0479     hwmon_dev = devm_hwmon_device_register_with_info(dev, "k10temp", data,
0480                              &k10temp_chip_info,
0481                              NULL);
0482     return PTR_ERR_OR_ZERO(hwmon_dev);
0483 }
0484 
0485 static const struct pci_device_id k10temp_id_table[] = {
0486     { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
0487     { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_11H_NB_MISC) },
0488     { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) },
0489     { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) },
0490     { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_M10H_F3) },
0491     { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F3) },
0492     { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_M60H_NB_F3) },
0493     { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_M70H_NB_F3) },
0494     { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) },
0495     { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F3) },
0496     { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_17H_DF_F3) },
0497     { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F3) },
0498     { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F3) },
0499     { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_17H_M60H_DF_F3) },
0500     { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F3) },
0501     { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_17H_MA0H_DF_F3) },
0502     { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_19H_DF_F3) },
0503     { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_19H_M10H_DF_F3) },
0504     { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_19H_M40H_DF_F3) },
0505     { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_19H_M50H_DF_F3) },
0506     { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_19H_M60H_DF_F3) },
0507     { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_19H_M70H_DF_F3) },
0508     { PCI_VDEVICE(HYGON, PCI_DEVICE_ID_AMD_17H_DF_F3) },
0509     {}
0510 };
0511 MODULE_DEVICE_TABLE(pci, k10temp_id_table);
0512 
0513 static struct pci_driver k10temp_driver = {
0514     .name = "k10temp",
0515     .id_table = k10temp_id_table,
0516     .probe = k10temp_probe,
0517 };
0518 
0519 module_pci_driver(k10temp_driver);