Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * turbostat -- show CPU frequency and C-state residency
0004  * on modern Intel and AMD processors.
0005  *
0006  * Copyright (c) 2022 Intel Corporation.
0007  * Len Brown <len.brown@intel.com>
0008  */
0009 
0010 #define _GNU_SOURCE
0011 #include MSRHEADER
0012 #include INTEL_FAMILY_HEADER
0013 #include <stdarg.h>
0014 #include <stdio.h>
0015 #include <err.h>
0016 #include <unistd.h>
0017 #include <sys/types.h>
0018 #include <sys/wait.h>
0019 #include <sys/stat.h>
0020 #include <sys/select.h>
0021 #include <sys/resource.h>
0022 #include <fcntl.h>
0023 #include <signal.h>
0024 #include <sys/time.h>
0025 #include <stdlib.h>
0026 #include <getopt.h>
0027 #include <dirent.h>
0028 #include <string.h>
0029 #include <ctype.h>
0030 #include <sched.h>
0031 #include <time.h>
0032 #include <cpuid.h>
0033 #include <sys/capability.h>
0034 #include <errno.h>
0035 #include <math.h>
0036 #include <linux/perf_event.h>
0037 #include <asm/unistd.h>
0038 #include <stdbool.h>
0039 
0040 #define UNUSED(x) (void)(x)
0041 
0042 /*
0043  * This list matches the column headers, except
0044  * 1. built-in only, the sysfs counters are not here -- we learn of those at run-time
0045  * 2. Core and CPU are moved to the end, we can't have strings that contain them
0046  *    matching on them for --show and --hide.
0047  */
0048 
0049 /*
0050  * buffer size used by sscanf() for added column names
0051  * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters
0052  */
0053 #define NAME_BYTES 20
0054 #define PATH_BYTES 128
0055 
0056 enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE };
0057 enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC };
0058 enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT };
0059 
0060 struct msr_counter {
0061     unsigned int msr_num;
0062     char name[NAME_BYTES];
0063     char path[PATH_BYTES];
0064     unsigned int width;
0065     enum counter_type type;
0066     enum counter_format format;
0067     struct msr_counter *next;
0068     unsigned int flags;
0069 #define FLAGS_HIDE  (1 << 0)
0070 #define FLAGS_SHOW  (1 << 1)
0071 #define SYSFS_PERCPU    (1 << 1)
0072 };
0073 
0074 struct msr_counter bic[] = {
0075     { 0x0, "usec", "", 0, 0, 0, NULL, 0 },
0076     { 0x0, "Time_Of_Day_Seconds", "", 0, 0, 0, NULL, 0 },
0077     { 0x0, "Package", "", 0, 0, 0, NULL, 0 },
0078     { 0x0, "Node", "", 0, 0, 0, NULL, 0 },
0079     { 0x0, "Avg_MHz", "", 0, 0, 0, NULL, 0 },
0080     { 0x0, "Busy%", "", 0, 0, 0, NULL, 0 },
0081     { 0x0, "Bzy_MHz", "", 0, 0, 0, NULL, 0 },
0082     { 0x0, "TSC_MHz", "", 0, 0, 0, NULL, 0 },
0083     { 0x0, "IRQ", "", 0, 0, 0, NULL, 0 },
0084     { 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL, 0 },
0085     { 0x0, "sysfs", "", 0, 0, 0, NULL, 0 },
0086     { 0x0, "CPU%c1", "", 0, 0, 0, NULL, 0 },
0087     { 0x0, "CPU%c3", "", 0, 0, 0, NULL, 0 },
0088     { 0x0, "CPU%c6", "", 0, 0, 0, NULL, 0 },
0089     { 0x0, "CPU%c7", "", 0, 0, 0, NULL, 0 },
0090     { 0x0, "ThreadC", "", 0, 0, 0, NULL, 0 },
0091     { 0x0, "CoreTmp", "", 0, 0, 0, NULL, 0 },
0092     { 0x0, "CoreCnt", "", 0, 0, 0, NULL, 0 },
0093     { 0x0, "PkgTmp", "", 0, 0, 0, NULL, 0 },
0094     { 0x0, "GFX%rc6", "", 0, 0, 0, NULL, 0 },
0095     { 0x0, "GFXMHz", "", 0, 0, 0, NULL, 0 },
0096     { 0x0, "Pkg%pc2", "", 0, 0, 0, NULL, 0 },
0097     { 0x0, "Pkg%pc3", "", 0, 0, 0, NULL, 0 },
0098     { 0x0, "Pkg%pc6", "", 0, 0, 0, NULL, 0 },
0099     { 0x0, "Pkg%pc7", "", 0, 0, 0, NULL, 0 },
0100     { 0x0, "Pkg%pc8", "", 0, 0, 0, NULL, 0 },
0101     { 0x0, "Pkg%pc9", "", 0, 0, 0, NULL, 0 },
0102     { 0x0, "Pk%pc10", "", 0, 0, 0, NULL, 0 },
0103     { 0x0, "CPU%LPI", "", 0, 0, 0, NULL, 0 },
0104     { 0x0, "SYS%LPI", "", 0, 0, 0, NULL, 0 },
0105     { 0x0, "PkgWatt", "", 0, 0, 0, NULL, 0 },
0106     { 0x0, "CorWatt", "", 0, 0, 0, NULL, 0 },
0107     { 0x0, "GFXWatt", "", 0, 0, 0, NULL, 0 },
0108     { 0x0, "PkgCnt", "", 0, 0, 0, NULL, 0 },
0109     { 0x0, "RAMWatt", "", 0, 0, 0, NULL, 0 },
0110     { 0x0, "PKG_%", "", 0, 0, 0, NULL, 0 },
0111     { 0x0, "RAM_%", "", 0, 0, 0, NULL, 0 },
0112     { 0x0, "Pkg_J", "", 0, 0, 0, NULL, 0 },
0113     { 0x0, "Cor_J", "", 0, 0, 0, NULL, 0 },
0114     { 0x0, "GFX_J", "", 0, 0, 0, NULL, 0 },
0115     { 0x0, "RAM_J", "", 0, 0, 0, NULL, 0 },
0116     { 0x0, "Mod%c6", "", 0, 0, 0, NULL, 0 },
0117     { 0x0, "Totl%C0", "", 0, 0, 0, NULL, 0 },
0118     { 0x0, "Any%C0", "", 0, 0, 0, NULL, 0 },
0119     { 0x0, "GFX%C0", "", 0, 0, 0, NULL, 0 },
0120     { 0x0, "CPUGFX%", "", 0, 0, 0, NULL, 0 },
0121     { 0x0, "Core", "", 0, 0, 0, NULL, 0 },
0122     { 0x0, "CPU", "", 0, 0, 0, NULL, 0 },
0123     { 0x0, "APIC", "", 0, 0, 0, NULL, 0 },
0124     { 0x0, "X2APIC", "", 0, 0, 0, NULL, 0 },
0125     { 0x0, "Die", "", 0, 0, 0, NULL, 0 },
0126     { 0x0, "GFXAMHz", "", 0, 0, 0, NULL, 0 },
0127     { 0x0, "IPC", "", 0, 0, 0, NULL, 0 },
0128     { 0x0, "CoreThr", "", 0, 0, 0, NULL, 0 },
0129     { 0x0, "UncMHz", "", 0, 0, 0, NULL, 0 },
0130 };
0131 
0132 #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
0133 #define BIC_USEC    (1ULL << 0)
0134 #define BIC_TOD     (1ULL << 1)
0135 #define BIC_Package (1ULL << 2)
0136 #define BIC_Node    (1ULL << 3)
0137 #define BIC_Avg_MHz (1ULL << 4)
0138 #define BIC_Busy    (1ULL << 5)
0139 #define BIC_Bzy_MHz (1ULL << 6)
0140 #define BIC_TSC_MHz (1ULL << 7)
0141 #define BIC_IRQ     (1ULL << 8)
0142 #define BIC_SMI     (1ULL << 9)
0143 #define BIC_sysfs   (1ULL << 10)
0144 #define BIC_CPU_c1  (1ULL << 11)
0145 #define BIC_CPU_c3  (1ULL << 12)
0146 #define BIC_CPU_c6  (1ULL << 13)
0147 #define BIC_CPU_c7  (1ULL << 14)
0148 #define BIC_ThreadC (1ULL << 15)
0149 #define BIC_CoreTmp (1ULL << 16)
0150 #define BIC_CoreCnt (1ULL << 17)
0151 #define BIC_PkgTmp  (1ULL << 18)
0152 #define BIC_GFX_rc6 (1ULL << 19)
0153 #define BIC_GFXMHz  (1ULL << 20)
0154 #define BIC_Pkgpc2  (1ULL << 21)
0155 #define BIC_Pkgpc3  (1ULL << 22)
0156 #define BIC_Pkgpc6  (1ULL << 23)
0157 #define BIC_Pkgpc7  (1ULL << 24)
0158 #define BIC_Pkgpc8  (1ULL << 25)
0159 #define BIC_Pkgpc9  (1ULL << 26)
0160 #define BIC_Pkgpc10 (1ULL << 27)
0161 #define BIC_CPU_LPI (1ULL << 28)
0162 #define BIC_SYS_LPI (1ULL << 29)
0163 #define BIC_PkgWatt (1ULL << 30)
0164 #define BIC_CorWatt (1ULL << 31)
0165 #define BIC_GFXWatt (1ULL << 32)
0166 #define BIC_PkgCnt  (1ULL << 33)
0167 #define BIC_RAMWatt (1ULL << 34)
0168 #define BIC_PKG__   (1ULL << 35)
0169 #define BIC_RAM__   (1ULL << 36)
0170 #define BIC_Pkg_J   (1ULL << 37)
0171 #define BIC_Cor_J   (1ULL << 38)
0172 #define BIC_GFX_J   (1ULL << 39)
0173 #define BIC_RAM_J   (1ULL << 40)
0174 #define BIC_Mod_c6  (1ULL << 41)
0175 #define BIC_Totl_c0 (1ULL << 42)
0176 #define BIC_Any_c0  (1ULL << 43)
0177 #define BIC_GFX_c0  (1ULL << 44)
0178 #define BIC_CPUGFX  (1ULL << 45)
0179 #define BIC_Core    (1ULL << 46)
0180 #define BIC_CPU     (1ULL << 47)
0181 #define BIC_APIC    (1ULL << 48)
0182 #define BIC_X2APIC  (1ULL << 49)
0183 #define BIC_Die     (1ULL << 50)
0184 #define BIC_GFXACTMHz   (1ULL << 51)
0185 #define BIC_IPC     (1ULL << 52)
0186 #define BIC_CORE_THROT_CNT  (1ULL << 53)
0187 #define BIC_UNCORE_MHZ      (1ULL << 54)
0188 
0189 #define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die )
0190 #define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__)
0191 #define BIC_FREQUENCY ( BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_UNCORE_MHZ)
0192 #define BIC_IDLE ( BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX)
0193 #define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC)
0194 
0195 #define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
0196 
0197 unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT);
0198 unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC;
0199 
0200 #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
0201 #define DO_BIC_READ(COUNTER_NAME) (bic_present & COUNTER_NAME)
0202 #define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME)
0203 #define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
0204 #define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
0205 #define BIC_IS_ENABLED(COUNTER_BIT) (bic_enabled & COUNTER_BIT)
0206 
0207 char *proc_stat = "/proc/stat";
0208 FILE *outf;
0209 int *fd_percpu;
0210 int *fd_instr_count_percpu;
0211 struct timeval interval_tv = { 5, 0 };
0212 struct timespec interval_ts = { 5, 0 };
0213 
0214 /* Save original CPU model */
0215 unsigned int model_orig;
0216 
0217 unsigned int num_iterations;
0218 unsigned int header_iterations;
0219 unsigned int debug;
0220 unsigned int quiet;
0221 unsigned int shown;
0222 unsigned int sums_need_wide_columns;
0223 unsigned int rapl_joules;
0224 unsigned int summary_only;
0225 unsigned int list_header_only;
0226 unsigned int dump_only;
0227 unsigned int do_snb_cstates;
0228 unsigned int do_knl_cstates;
0229 unsigned int do_slm_cstates;
0230 unsigned int use_c1_residency_msr;
0231 unsigned int has_aperf;
0232 unsigned int has_epb;
0233 unsigned int is_hybrid;
0234 unsigned int do_irtl_snb;
0235 unsigned int do_irtl_hsw;
0236 unsigned int units = 1000000;   /* MHz etc */
0237 unsigned int genuine_intel;
0238 unsigned int authentic_amd;
0239 unsigned int hygon_genuine;
0240 unsigned int max_level, max_extended_level;
0241 unsigned int has_invariant_tsc;
0242 unsigned int do_nhm_platform_info;
0243 unsigned int no_MSR_MISC_PWR_MGMT;
0244 unsigned int aperf_mperf_multiplier = 1;
0245 double bclk;
0246 double base_hz;
0247 unsigned int has_base_hz;
0248 double tsc_tweak = 1.0;
0249 unsigned int show_pkg_only;
0250 unsigned int show_core_only;
0251 char *output_buffer, *outp;
0252 unsigned int do_rapl;
0253 unsigned int do_dts;
0254 unsigned int do_ptm;
0255 unsigned int do_ipc;
0256 unsigned long long gfx_cur_rc6_ms;
0257 unsigned long long cpuidle_cur_cpu_lpi_us;
0258 unsigned long long cpuidle_cur_sys_lpi_us;
0259 unsigned int gfx_cur_mhz;
0260 unsigned int gfx_act_mhz;
0261 unsigned int tj_max;
0262 unsigned int tj_max_override;
0263 int tcc_offset_bits;
0264 double rapl_power_units, rapl_time_units;
0265 double rapl_dram_energy_units, rapl_energy_units;
0266 double rapl_joule_counter_range;
0267 unsigned int do_core_perf_limit_reasons;
0268 unsigned int has_automatic_cstate_conversion;
0269 unsigned int dis_cstate_prewake;
0270 unsigned int do_gfx_perf_limit_reasons;
0271 unsigned int do_ring_perf_limit_reasons;
0272 unsigned int crystal_hz;
0273 unsigned long long tsc_hz;
0274 int base_cpu;
0275 double discover_bclk(unsigned int family, unsigned int model);
0276 unsigned int has_hwp;       /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
0277             /* IA32_HWP_REQUEST, IA32_HWP_STATUS */
0278 unsigned int has_hwp_notify;    /* IA32_HWP_INTERRUPT */
0279 unsigned int has_hwp_activity_window;   /* IA32_HWP_REQUEST[bits 41:32] */
0280 unsigned int has_hwp_epp;   /* IA32_HWP_REQUEST[bits 31:24] */
0281 unsigned int has_hwp_pkg;   /* IA32_HWP_REQUEST_PKG */
0282 unsigned int has_misc_feature_control;
0283 unsigned int first_counter_read = 1;
0284 int ignore_stdin;
0285 
0286 #define RAPL_PKG        (1 << 0)
0287                     /* 0x610 MSR_PKG_POWER_LIMIT */
0288                     /* 0x611 MSR_PKG_ENERGY_STATUS */
0289 #define RAPL_PKG_PERF_STATUS    (1 << 1)
0290                     /* 0x613 MSR_PKG_PERF_STATUS */
0291 #define RAPL_PKG_POWER_INFO (1 << 2)
0292                     /* 0x614 MSR_PKG_POWER_INFO */
0293 
0294 #define RAPL_DRAM       (1 << 3)
0295                     /* 0x618 MSR_DRAM_POWER_LIMIT */
0296                     /* 0x619 MSR_DRAM_ENERGY_STATUS */
0297 #define RAPL_DRAM_PERF_STATUS   (1 << 4)
0298                     /* 0x61b MSR_DRAM_PERF_STATUS */
0299 #define RAPL_DRAM_POWER_INFO    (1 << 5)
0300                     /* 0x61c MSR_DRAM_POWER_INFO */
0301 
0302 #define RAPL_CORES_POWER_LIMIT  (1 << 6)
0303                     /* 0x638 MSR_PP0_POWER_LIMIT */
0304 #define RAPL_CORE_POLICY    (1 << 7)
0305                     /* 0x63a MSR_PP0_POLICY */
0306 
0307 #define RAPL_GFX        (1 << 8)
0308                     /* 0x640 MSR_PP1_POWER_LIMIT */
0309                     /* 0x641 MSR_PP1_ENERGY_STATUS */
0310                     /* 0x642 MSR_PP1_POLICY */
0311 
0312 #define RAPL_CORES_ENERGY_STATUS    (1 << 9)
0313                     /* 0x639 MSR_PP0_ENERGY_STATUS */
0314 #define RAPL_PER_CORE_ENERGY    (1 << 10)
0315                     /* Indicates cores energy collection is per-core,
0316                      * not per-package. */
0317 #define RAPL_AMD_F17H       (1 << 11)
0318                     /* 0xc0010299 MSR_RAPL_PWR_UNIT */
0319                     /* 0xc001029a MSR_CORE_ENERGY_STAT */
0320                     /* 0xc001029b MSR_PKG_ENERGY_STAT */
0321 #define RAPL_CORES (RAPL_CORES_ENERGY_STATUS | RAPL_CORES_POWER_LIMIT)
0322 #define TJMAX_DEFAULT   100
0323 
0324 /* MSRs that are not yet in the kernel-provided header. */
0325 #define MSR_RAPL_PWR_UNIT   0xc0010299
0326 #define MSR_CORE_ENERGY_STAT    0xc001029a
0327 #define MSR_PKG_ENERGY_STAT 0xc001029b
0328 
0329 #define MAX(a, b) ((a) > (b) ? (a) : (b))
0330 
0331 int backwards_count;
0332 char *progname;
0333 
0334 #define CPU_SUBSET_MAXCPUS  1024    /* need to use before probe... */
0335 cpu_set_t *cpu_present_set, *cpu_affinity_set, *cpu_subset;
0336 size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size;
0337 #define MAX_ADDED_COUNTERS 8
0338 #define MAX_ADDED_THREAD_COUNTERS 24
0339 #define BITMASK_SIZE 32
0340 
0341 struct thread_data {
0342     struct timeval tv_begin;
0343     struct timeval tv_end;
0344     struct timeval tv_delta;
0345     unsigned long long tsc;
0346     unsigned long long aperf;
0347     unsigned long long mperf;
0348     unsigned long long c1;
0349     unsigned long long instr_count;
0350     unsigned long long irq_count;
0351     unsigned int smi_count;
0352     unsigned int cpu_id;
0353     unsigned int apic_id;
0354     unsigned int x2apic_id;
0355     unsigned int flags;
0356     bool is_atom;
0357 #define CPU_IS_FIRST_THREAD_IN_CORE 0x2
0358 #define CPU_IS_FIRST_CORE_IN_PACKAGE    0x4
0359     unsigned long long counter[MAX_ADDED_THREAD_COUNTERS];
0360 } *thread_even, *thread_odd;
0361 
0362 struct core_data {
0363     unsigned long long c3;
0364     unsigned long long c6;
0365     unsigned long long c7;
0366     unsigned long long mc6_us;  /* duplicate as per-core for now, even though per module */
0367     unsigned int core_temp_c;
0368     unsigned int core_energy;   /* MSR_CORE_ENERGY_STAT */
0369     unsigned int core_id;
0370     unsigned long long core_throt_cnt;
0371     unsigned long long counter[MAX_ADDED_COUNTERS];
0372 } *core_even, *core_odd;
0373 
0374 struct pkg_data {
0375     unsigned long long pc2;
0376     unsigned long long pc3;
0377     unsigned long long pc6;
0378     unsigned long long pc7;
0379     unsigned long long pc8;
0380     unsigned long long pc9;
0381     unsigned long long pc10;
0382     unsigned long long cpu_lpi;
0383     unsigned long long sys_lpi;
0384     unsigned long long pkg_wtd_core_c0;
0385     unsigned long long pkg_any_core_c0;
0386     unsigned long long pkg_any_gfxe_c0;
0387     unsigned long long pkg_both_core_gfxe_c0;
0388     long long gfx_rc6_ms;
0389     unsigned int gfx_mhz;
0390     unsigned int gfx_act_mhz;
0391     unsigned int package_id;
0392     unsigned long long energy_pkg;  /* MSR_PKG_ENERGY_STATUS */
0393     unsigned long long energy_dram; /* MSR_DRAM_ENERGY_STATUS */
0394     unsigned long long energy_cores;    /* MSR_PP0_ENERGY_STATUS */
0395     unsigned long long energy_gfx;  /* MSR_PP1_ENERGY_STATUS */
0396     unsigned long long rapl_pkg_perf_status;    /* MSR_PKG_PERF_STATUS */
0397     unsigned long long rapl_dram_perf_status;   /* MSR_DRAM_PERF_STATUS */
0398     unsigned int pkg_temp_c;
0399     unsigned int uncore_mhz;
0400     unsigned long long counter[MAX_ADDED_COUNTERS];
0401 } *package_even, *package_odd;
0402 
0403 #define ODD_COUNTERS thread_odd, core_odd, package_odd
0404 #define EVEN_COUNTERS thread_even, core_even, package_even
0405 
0406 #define GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no)          \
0407     ((thread_base) +                              \
0408      ((pkg_no) *                                  \
0409       topo.nodes_per_pkg * topo.cores_per_node * topo.threads_per_core) + \
0410      ((node_no) * topo.cores_per_node * topo.threads_per_core) +          \
0411      ((core_no) * topo.threads_per_core) +                    \
0412      (thread_no))
0413 
0414 #define GET_CORE(core_base, core_no, node_no, pkg_no)           \
0415     ((core_base) +                          \
0416      ((pkg_no) *  topo.nodes_per_pkg * topo.cores_per_node) +   \
0417      ((node_no) * topo.cores_per_node) +                \
0418      (core_no))
0419 
0420 #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)
0421 
0422 /*
0423  * The accumulated sum of MSR is defined as a monotonic
0424  * increasing MSR, it will be accumulated periodically,
0425  * despite its register's bit width.
0426  */
0427 enum {
0428     IDX_PKG_ENERGY,
0429     IDX_DRAM_ENERGY,
0430     IDX_PP0_ENERGY,
0431     IDX_PP1_ENERGY,
0432     IDX_PKG_PERF,
0433     IDX_DRAM_PERF,
0434     IDX_COUNT,
0435 };
0436 
0437 int get_msr_sum(int cpu, off_t offset, unsigned long long *msr);
0438 
0439 struct msr_sum_array {
0440     /* get_msr_sum() = sum + (get_msr() - last) */
0441     struct {
0442         /*The accumulated MSR value is updated by the timer */
0443         unsigned long long sum;
0444         /*The MSR footprint recorded in last timer */
0445         unsigned long long last;
0446     } entries[IDX_COUNT];
0447 };
0448 
0449 /* The percpu MSR sum array.*/
0450 struct msr_sum_array *per_cpu_msr_sum;
0451 
0452 off_t idx_to_offset(int idx)
0453 {
0454     off_t offset;
0455 
0456     switch (idx) {
0457     case IDX_PKG_ENERGY:
0458         if (do_rapl & RAPL_AMD_F17H)
0459             offset = MSR_PKG_ENERGY_STAT;
0460         else
0461             offset = MSR_PKG_ENERGY_STATUS;
0462         break;
0463     case IDX_DRAM_ENERGY:
0464         offset = MSR_DRAM_ENERGY_STATUS;
0465         break;
0466     case IDX_PP0_ENERGY:
0467         offset = MSR_PP0_ENERGY_STATUS;
0468         break;
0469     case IDX_PP1_ENERGY:
0470         offset = MSR_PP1_ENERGY_STATUS;
0471         break;
0472     case IDX_PKG_PERF:
0473         offset = MSR_PKG_PERF_STATUS;
0474         break;
0475     case IDX_DRAM_PERF:
0476         offset = MSR_DRAM_PERF_STATUS;
0477         break;
0478     default:
0479         offset = -1;
0480     }
0481     return offset;
0482 }
0483 
0484 int offset_to_idx(off_t offset)
0485 {
0486     int idx;
0487 
0488     switch (offset) {
0489     case MSR_PKG_ENERGY_STATUS:
0490     case MSR_PKG_ENERGY_STAT:
0491         idx = IDX_PKG_ENERGY;
0492         break;
0493     case MSR_DRAM_ENERGY_STATUS:
0494         idx = IDX_DRAM_ENERGY;
0495         break;
0496     case MSR_PP0_ENERGY_STATUS:
0497         idx = IDX_PP0_ENERGY;
0498         break;
0499     case MSR_PP1_ENERGY_STATUS:
0500         idx = IDX_PP1_ENERGY;
0501         break;
0502     case MSR_PKG_PERF_STATUS:
0503         idx = IDX_PKG_PERF;
0504         break;
0505     case MSR_DRAM_PERF_STATUS:
0506         idx = IDX_DRAM_PERF;
0507         break;
0508     default:
0509         idx = -1;
0510     }
0511     return idx;
0512 }
0513 
0514 int idx_valid(int idx)
0515 {
0516     switch (idx) {
0517     case IDX_PKG_ENERGY:
0518         return do_rapl & (RAPL_PKG | RAPL_AMD_F17H);
0519     case IDX_DRAM_ENERGY:
0520         return do_rapl & RAPL_DRAM;
0521     case IDX_PP0_ENERGY:
0522         return do_rapl & RAPL_CORES_ENERGY_STATUS;
0523     case IDX_PP1_ENERGY:
0524         return do_rapl & RAPL_GFX;
0525     case IDX_PKG_PERF:
0526         return do_rapl & RAPL_PKG_PERF_STATUS;
0527     case IDX_DRAM_PERF:
0528         return do_rapl & RAPL_DRAM_PERF_STATUS;
0529     default:
0530         return 0;
0531     }
0532 }
0533 
0534 struct sys_counters {
0535     unsigned int added_thread_counters;
0536     unsigned int added_core_counters;
0537     unsigned int added_package_counters;
0538     struct msr_counter *tp;
0539     struct msr_counter *cp;
0540     struct msr_counter *pp;
0541 } sys;
0542 
0543 struct system_summary {
0544     struct thread_data threads;
0545     struct core_data cores;
0546     struct pkg_data packages;
0547 } average;
0548 
0549 struct cpu_topology {
0550     int physical_package_id;
0551     int die_id;
0552     int logical_cpu_id;
0553     int physical_node_id;
0554     int logical_node_id;    /* 0-based count within the package */
0555     int physical_core_id;
0556     int thread_id;
0557     cpu_set_t *put_ids; /* Processing Unit/Thread IDs */
0558 } *cpus;
0559 
0560 struct topo_params {
0561     int num_packages;
0562     int num_die;
0563     int num_cpus;
0564     int num_cores;
0565     int max_cpu_num;
0566     int max_node_num;
0567     int nodes_per_pkg;
0568     int cores_per_node;
0569     int threads_per_core;
0570 } topo;
0571 
0572 struct timeval tv_even, tv_odd, tv_delta;
0573 
0574 int *irq_column_2_cpu;      /* /proc/interrupts column numbers */
0575 int *irqs_per_cpu;      /* indexed by cpu_num */
0576 
0577 void setup_all_buffers(void);
0578 
0579 char *sys_lpi_file;
0580 char *sys_lpi_file_sysfs = "/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us";
0581 char *sys_lpi_file_debugfs = "/sys/kernel/debug/pmc_core/slp_s0_residency_usec";
0582 
0583 int cpu_is_not_present(int cpu)
0584 {
0585     return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set);
0586 }
0587 
0588 /*
0589  * run func(thread, core, package) in topology order
0590  * skip non-present cpus
0591  */
0592 
0593 int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pkg_data *),
0594          struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base)
0595 {
0596     int retval, pkg_no, core_no, thread_no, node_no;
0597 
0598     for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
0599         for (node_no = 0; node_no < topo.nodes_per_pkg; node_no++) {
0600             for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
0601                 for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) {
0602                     struct thread_data *t;
0603                     struct core_data *c;
0604                     struct pkg_data *p;
0605 
0606                     t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no);
0607 
0608                     if (cpu_is_not_present(t->cpu_id))
0609                         continue;
0610 
0611                     c = GET_CORE(core_base, core_no, node_no, pkg_no);
0612                     p = GET_PKG(pkg_base, pkg_no);
0613 
0614                     retval = func(t, c, p);
0615                     if (retval)
0616                         return retval;
0617                 }
0618             }
0619         }
0620     }
0621     return 0;
0622 }
0623 
0624 int cpu_migrate(int cpu)
0625 {
0626     CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
0627     CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set);
0628     if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1)
0629         return -1;
0630     else
0631         return 0;
0632 }
0633 
0634 int get_msr_fd(int cpu)
0635 {
0636     char pathname[32];
0637     int fd;
0638 
0639     fd = fd_percpu[cpu];
0640 
0641     if (fd)
0642         return fd;
0643 
0644     sprintf(pathname, "/dev/cpu/%d/msr", cpu);
0645     fd = open(pathname, O_RDONLY);
0646     if (fd < 0)
0647         err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
0648 
0649     fd_percpu[cpu] = fd;
0650 
0651     return fd;
0652 }
0653 
0654 static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags)
0655 {
0656     return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags);
0657 }
0658 
0659 static int perf_instr_count_open(int cpu_num)
0660 {
0661     struct perf_event_attr pea;
0662     int fd;
0663 
0664     memset(&pea, 0, sizeof(struct perf_event_attr));
0665     pea.type = PERF_TYPE_HARDWARE;
0666     pea.size = sizeof(struct perf_event_attr);
0667     pea.config = PERF_COUNT_HW_INSTRUCTIONS;
0668 
0669     /* counter for cpu_num, including user + kernel and all processes */
0670     fd = perf_event_open(&pea, -1, cpu_num, -1, 0);
0671     if (fd == -1) {
0672         warn("cpu%d: perf instruction counter", cpu_num);
0673         BIC_NOT_PRESENT(BIC_IPC);
0674     }
0675 
0676     return fd;
0677 }
0678 
0679 int get_instr_count_fd(int cpu)
0680 {
0681     if (fd_instr_count_percpu[cpu])
0682         return fd_instr_count_percpu[cpu];
0683 
0684     fd_instr_count_percpu[cpu] = perf_instr_count_open(cpu);
0685 
0686     return fd_instr_count_percpu[cpu];
0687 }
0688 
0689 int get_msr(int cpu, off_t offset, unsigned long long *msr)
0690 {
0691     ssize_t retval;
0692 
0693     retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset);
0694 
0695     if (retval != sizeof *msr)
0696         err(-1, "cpu%d: msr offset 0x%llx read failed", cpu, (unsigned long long)offset);
0697 
0698     return 0;
0699 }
0700 
0701 #define MAX_DEFERRED 16
0702 char *deferred_add_names[MAX_DEFERRED];
0703 char *deferred_skip_names[MAX_DEFERRED];
0704 int deferred_add_index;
0705 int deferred_skip_index;
0706 
0707 /*
0708  * HIDE_LIST - hide this list of counters, show the rest [default]
0709  * SHOW_LIST - show this list of counters, hide the rest
0710  */
0711 enum show_hide_mode { SHOW_LIST, HIDE_LIST } global_show_hide_mode = HIDE_LIST;
0712 
0713 void help(void)
0714 {
0715     fprintf(outf,
0716         "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n"
0717         "\n"
0718         "Turbostat forks the specified COMMAND and prints statistics\n"
0719         "when COMMAND completes.\n"
0720         "If no COMMAND is specified, turbostat wakes every 5-seconds\n"
0721         "to print statistics, until interrupted.\n"
0722         "  -a, --add    add a counter\n"
0723         "         eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
0724         "  -c, --cpu    cpu-set limit output to summary plus cpu-set:\n"
0725         "         {core | package | j,k,l..m,n-p }\n"
0726         "  -d, --debug  displays usec, Time_Of_Day_Seconds and more debugging\n"
0727         "  -D, --Dump   displays the raw counter values\n"
0728         "  -e, --enable [all | column]\n"
0729         "       shows all or the specified disabled column\n"
0730         "  -H, --hide [column|column,column,...]\n"
0731         "       hide the specified column(s)\n"
0732         "  -i, --interval sec.subsec\n"
0733         "       Override default 5-second measurement interval\n"
0734         "  -J, --Joules displays energy in Joules instead of Watts\n"
0735         "  -l, --list   list column headers only\n"
0736         "  -n, --num_iterations num\n"
0737         "       number of the measurement iterations\n"
0738         "  -N, --header_iterations num\n"
0739         "       print header every num iterations\n"
0740         "  -o, --out file\n"
0741         "       create or truncate \"file\" for all output\n"
0742         "  -q, --quiet  skip decoding system configuration header\n"
0743         "  -s, --show [column|column,column,...]\n"
0744         "       show only the specified column(s)\n"
0745         "  -S, --Summary\n"
0746         "       limits output to 1-line system summary per interval\n"
0747         "  -T, --TCC temperature\n"
0748         "       sets the Thermal Control Circuit temperature in\n"
0749         "         degrees Celsius\n"
0750         "  -h, --help   print this help message\n"
0751         "  -v, --version    print version information\n" "\n" "For more help, run \"man turbostat\"\n");
0752 }
0753 
0754 /*
0755  * bic_lookup
0756  * for all the strings in comma separate name_list,
0757  * set the approprate bit in return value.
0758  */
0759 unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode)
0760 {
0761     unsigned int i;
0762     unsigned long long retval = 0;
0763 
0764     while (name_list) {
0765         char *comma;
0766 
0767         comma = strchr(name_list, ',');
0768 
0769         if (comma)
0770             *comma = '\0';
0771 
0772         for (i = 0; i < MAX_BIC; ++i) {
0773             if (!strcmp(name_list, bic[i].name)) {
0774                 retval |= (1ULL << i);
0775                 break;
0776             }
0777             if (!strcmp(name_list, "all")) {
0778                 retval |= ~0;
0779                 break;
0780             } else if (!strcmp(name_list, "topology")) {
0781                 retval |= BIC_TOPOLOGY;
0782                 break;
0783             } else if (!strcmp(name_list, "power")) {
0784                 retval |= BIC_THERMAL_PWR;
0785                 break;
0786             } else if (!strcmp(name_list, "idle")) {
0787                 retval |= BIC_IDLE;
0788                 break;
0789             } else if (!strcmp(name_list, "frequency")) {
0790                 retval |= BIC_FREQUENCY;
0791                 break;
0792             } else if (!strcmp(name_list, "other")) {
0793                 retval |= BIC_OTHER;
0794                 break;
0795             }
0796 
0797         }
0798         if (i == MAX_BIC) {
0799             if (mode == SHOW_LIST) {
0800                 deferred_add_names[deferred_add_index++] = name_list;
0801                 if (deferred_add_index >= MAX_DEFERRED) {
0802                     fprintf(stderr, "More than max %d un-recognized --add options '%s'\n",
0803                         MAX_DEFERRED, name_list);
0804                     help();
0805                     exit(1);
0806                 }
0807             } else {
0808                 deferred_skip_names[deferred_skip_index++] = name_list;
0809                 if (debug)
0810                     fprintf(stderr, "deferred \"%s\"\n", name_list);
0811                 if (deferred_skip_index >= MAX_DEFERRED) {
0812                     fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n",
0813                         MAX_DEFERRED, name_list);
0814                     help();
0815                     exit(1);
0816                 }
0817             }
0818         }
0819 
0820         name_list = comma;
0821         if (name_list)
0822             name_list++;
0823 
0824     }
0825     return retval;
0826 }
0827 
0828 void print_header(char *delim)
0829 {
0830     struct msr_counter *mp;
0831     int printed = 0;
0832 
0833     if (DO_BIC(BIC_USEC))
0834         outp += sprintf(outp, "%susec", (printed++ ? delim : ""));
0835     if (DO_BIC(BIC_TOD))
0836         outp += sprintf(outp, "%sTime_Of_Day_Seconds", (printed++ ? delim : ""));
0837     if (DO_BIC(BIC_Package))
0838         outp += sprintf(outp, "%sPackage", (printed++ ? delim : ""));
0839     if (DO_BIC(BIC_Die))
0840         outp += sprintf(outp, "%sDie", (printed++ ? delim : ""));
0841     if (DO_BIC(BIC_Node))
0842         outp += sprintf(outp, "%sNode", (printed++ ? delim : ""));
0843     if (DO_BIC(BIC_Core))
0844         outp += sprintf(outp, "%sCore", (printed++ ? delim : ""));
0845     if (DO_BIC(BIC_CPU))
0846         outp += sprintf(outp, "%sCPU", (printed++ ? delim : ""));
0847     if (DO_BIC(BIC_APIC))
0848         outp += sprintf(outp, "%sAPIC", (printed++ ? delim : ""));
0849     if (DO_BIC(BIC_X2APIC))
0850         outp += sprintf(outp, "%sX2APIC", (printed++ ? delim : ""));
0851     if (DO_BIC(BIC_Avg_MHz))
0852         outp += sprintf(outp, "%sAvg_MHz", (printed++ ? delim : ""));
0853     if (DO_BIC(BIC_Busy))
0854         outp += sprintf(outp, "%sBusy%%", (printed++ ? delim : ""));
0855     if (DO_BIC(BIC_Bzy_MHz))
0856         outp += sprintf(outp, "%sBzy_MHz", (printed++ ? delim : ""));
0857     if (DO_BIC(BIC_TSC_MHz))
0858         outp += sprintf(outp, "%sTSC_MHz", (printed++ ? delim : ""));
0859 
0860     if (DO_BIC(BIC_IPC))
0861         outp += sprintf(outp, "%sIPC", (printed++ ? delim : ""));
0862 
0863     if (DO_BIC(BIC_IRQ)) {
0864         if (sums_need_wide_columns)
0865             outp += sprintf(outp, "%s     IRQ", (printed++ ? delim : ""));
0866         else
0867             outp += sprintf(outp, "%sIRQ", (printed++ ? delim : ""));
0868     }
0869 
0870     if (DO_BIC(BIC_SMI))
0871         outp += sprintf(outp, "%sSMI", (printed++ ? delim : ""));
0872 
0873     for (mp = sys.tp; mp; mp = mp->next) {
0874 
0875         if (mp->format == FORMAT_RAW) {
0876             if (mp->width == 64)
0877                 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), mp->name);
0878             else
0879                 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), mp->name);
0880         } else {
0881             if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
0882                 outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), mp->name);
0883             else
0884                 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), mp->name);
0885         }
0886     }
0887 
0888     if (DO_BIC(BIC_CPU_c1))
0889         outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : ""));
0890     if (DO_BIC(BIC_CPU_c3))
0891         outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : ""));
0892     if (DO_BIC(BIC_CPU_c6))
0893         outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : ""));
0894     if (DO_BIC(BIC_CPU_c7))
0895         outp += sprintf(outp, "%sCPU%%c7", (printed++ ? delim : ""));
0896 
0897     if (DO_BIC(BIC_Mod_c6))
0898         outp += sprintf(outp, "%sMod%%c6", (printed++ ? delim : ""));
0899 
0900     if (DO_BIC(BIC_CoreTmp))
0901         outp += sprintf(outp, "%sCoreTmp", (printed++ ? delim : ""));
0902 
0903     if (DO_BIC(BIC_CORE_THROT_CNT))
0904         outp += sprintf(outp, "%sCoreThr", (printed++ ? delim : ""));
0905 
0906     if (do_rapl && !rapl_joules) {
0907         if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY))
0908             outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
0909     } else if (do_rapl && rapl_joules) {
0910         if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY))
0911             outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
0912     }
0913 
0914     for (mp = sys.cp; mp; mp = mp->next) {
0915         if (mp->format == FORMAT_RAW) {
0916             if (mp->width == 64)
0917                 outp += sprintf(outp, "%s%18.18s", delim, mp->name);
0918             else
0919                 outp += sprintf(outp, "%s%10.10s", delim, mp->name);
0920         } else {
0921             if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
0922                 outp += sprintf(outp, "%s%8s", delim, mp->name);
0923             else
0924                 outp += sprintf(outp, "%s%s", delim, mp->name);
0925         }
0926     }
0927 
0928     if (DO_BIC(BIC_PkgTmp))
0929         outp += sprintf(outp, "%sPkgTmp", (printed++ ? delim : ""));
0930 
0931     if (DO_BIC(BIC_GFX_rc6))
0932         outp += sprintf(outp, "%sGFX%%rc6", (printed++ ? delim : ""));
0933 
0934     if (DO_BIC(BIC_GFXMHz))
0935         outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : ""));
0936 
0937     if (DO_BIC(BIC_GFXACTMHz))
0938         outp += sprintf(outp, "%sGFXAMHz", (printed++ ? delim : ""));
0939 
0940     if (DO_BIC(BIC_Totl_c0))
0941         outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : ""));
0942     if (DO_BIC(BIC_Any_c0))
0943         outp += sprintf(outp, "%sAny%%C0", (printed++ ? delim : ""));
0944     if (DO_BIC(BIC_GFX_c0))
0945         outp += sprintf(outp, "%sGFX%%C0", (printed++ ? delim : ""));
0946     if (DO_BIC(BIC_CPUGFX))
0947         outp += sprintf(outp, "%sCPUGFX%%", (printed++ ? delim : ""));
0948 
0949     if (DO_BIC(BIC_Pkgpc2))
0950         outp += sprintf(outp, "%sPkg%%pc2", (printed++ ? delim : ""));
0951     if (DO_BIC(BIC_Pkgpc3))
0952         outp += sprintf(outp, "%sPkg%%pc3", (printed++ ? delim : ""));
0953     if (DO_BIC(BIC_Pkgpc6))
0954         outp += sprintf(outp, "%sPkg%%pc6", (printed++ ? delim : ""));
0955     if (DO_BIC(BIC_Pkgpc7))
0956         outp += sprintf(outp, "%sPkg%%pc7", (printed++ ? delim : ""));
0957     if (DO_BIC(BIC_Pkgpc8))
0958         outp += sprintf(outp, "%sPkg%%pc8", (printed++ ? delim : ""));
0959     if (DO_BIC(BIC_Pkgpc9))
0960         outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : ""));
0961     if (DO_BIC(BIC_Pkgpc10))
0962         outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : ""));
0963     if (DO_BIC(BIC_CPU_LPI))
0964         outp += sprintf(outp, "%sCPU%%LPI", (printed++ ? delim : ""));
0965     if (DO_BIC(BIC_SYS_LPI))
0966         outp += sprintf(outp, "%sSYS%%LPI", (printed++ ? delim : ""));
0967 
0968     if (do_rapl && !rapl_joules) {
0969         if (DO_BIC(BIC_PkgWatt))
0970             outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : ""));
0971         if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY))
0972             outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
0973         if (DO_BIC(BIC_GFXWatt))
0974             outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : ""));
0975         if (DO_BIC(BIC_RAMWatt))
0976             outp += sprintf(outp, "%sRAMWatt", (printed++ ? delim : ""));
0977         if (DO_BIC(BIC_PKG__))
0978             outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
0979         if (DO_BIC(BIC_RAM__))
0980             outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
0981     } else if (do_rapl && rapl_joules) {
0982         if (DO_BIC(BIC_Pkg_J))
0983             outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : ""));
0984         if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY))
0985             outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
0986         if (DO_BIC(BIC_GFX_J))
0987             outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : ""));
0988         if (DO_BIC(BIC_RAM_J))
0989             outp += sprintf(outp, "%sRAM_J", (printed++ ? delim : ""));
0990         if (DO_BIC(BIC_PKG__))
0991             outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
0992         if (DO_BIC(BIC_RAM__))
0993             outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
0994     }
0995     if (DO_BIC(BIC_UNCORE_MHZ))
0996         outp += sprintf(outp, "%sUncMHz", (printed++ ? delim : ""));
0997 
0998     for (mp = sys.pp; mp; mp = mp->next) {
0999         if (mp->format == FORMAT_RAW) {
1000             if (mp->width == 64)
1001                 outp += sprintf(outp, "%s%18.18s", delim, mp->name);
1002             else
1003                 outp += sprintf(outp, "%s%10.10s", delim, mp->name);
1004         } else {
1005             if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
1006                 outp += sprintf(outp, "%s%8s", delim, mp->name);
1007             else
1008                 outp += sprintf(outp, "%s%s", delim, mp->name);
1009         }
1010     }
1011 
1012     outp += sprintf(outp, "\n");
1013 }
1014 
1015 int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1016 {
1017     int i;
1018     struct msr_counter *mp;
1019 
1020     outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p);
1021 
1022     if (t) {
1023         outp += sprintf(outp, "CPU: %d flags 0x%x\n", t->cpu_id, t->flags);
1024         outp += sprintf(outp, "TSC: %016llX\n", t->tsc);
1025         outp += sprintf(outp, "aperf: %016llX\n", t->aperf);
1026         outp += sprintf(outp, "mperf: %016llX\n", t->mperf);
1027         outp += sprintf(outp, "c1: %016llX\n", t->c1);
1028 
1029         if (DO_BIC(BIC_IPC))
1030             outp += sprintf(outp, "IPC: %lld\n", t->instr_count);
1031 
1032         if (DO_BIC(BIC_IRQ))
1033             outp += sprintf(outp, "IRQ: %lld\n", t->irq_count);
1034         if (DO_BIC(BIC_SMI))
1035             outp += sprintf(outp, "SMI: %d\n", t->smi_count);
1036 
1037         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1038             outp += sprintf(outp, "tADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, t->counter[i]);
1039         }
1040     }
1041 
1042     if (c) {
1043         outp += sprintf(outp, "core: %d\n", c->core_id);
1044         outp += sprintf(outp, "c3: %016llX\n", c->c3);
1045         outp += sprintf(outp, "c6: %016llX\n", c->c6);
1046         outp += sprintf(outp, "c7: %016llX\n", c->c7);
1047         outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c);
1048         outp += sprintf(outp, "cpu_throt_count: %016llX\n", c->core_throt_cnt);
1049         outp += sprintf(outp, "Joules: %0X\n", c->core_energy);
1050 
1051         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1052             outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, c->counter[i]);
1053         }
1054         outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us);
1055     }
1056 
1057     if (p) {
1058         outp += sprintf(outp, "package: %d\n", p->package_id);
1059 
1060         outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0);
1061         outp += sprintf(outp, "Any cores: %016llX\n", p->pkg_any_core_c0);
1062         outp += sprintf(outp, "Any GFX: %016llX\n", p->pkg_any_gfxe_c0);
1063         outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0);
1064 
1065         outp += sprintf(outp, "pc2: %016llX\n", p->pc2);
1066         if (DO_BIC(BIC_Pkgpc3))
1067             outp += sprintf(outp, "pc3: %016llX\n", p->pc3);
1068         if (DO_BIC(BIC_Pkgpc6))
1069             outp += sprintf(outp, "pc6: %016llX\n", p->pc6);
1070         if (DO_BIC(BIC_Pkgpc7))
1071             outp += sprintf(outp, "pc7: %016llX\n", p->pc7);
1072         outp += sprintf(outp, "pc8: %016llX\n", p->pc8);
1073         outp += sprintf(outp, "pc9: %016llX\n", p->pc9);
1074         outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
1075         outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi);
1076         outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi);
1077         outp += sprintf(outp, "Joules PKG: %0llX\n", p->energy_pkg);
1078         outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores);
1079         outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx);
1080         outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram);
1081         outp += sprintf(outp, "Throttle PKG: %0llX\n", p->rapl_pkg_perf_status);
1082         outp += sprintf(outp, "Throttle RAM: %0llX\n", p->rapl_dram_perf_status);
1083         outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c);
1084 
1085         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1086             outp += sprintf(outp, "pADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, p->counter[i]);
1087         }
1088     }
1089 
1090     outp += sprintf(outp, "\n");
1091 
1092     return 0;
1093 }
1094 
1095 /*
1096  * column formatting convention & formats
1097  */
1098 int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1099 {
1100     double interval_float, tsc;
1101     char *fmt8;
1102     int i;
1103     struct msr_counter *mp;
1104     char *delim = "\t";
1105     int printed = 0;
1106 
1107     /* if showing only 1st thread in core and this isn't one, bail out */
1108     if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1109         return 0;
1110 
1111     /* if showing only 1st thread in pkg and this isn't one, bail out */
1112     if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1113         return 0;
1114 
1115     /*if not summary line and --cpu is used */
1116     if ((t != &average.threads) && (cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset)))
1117         return 0;
1118 
1119     if (DO_BIC(BIC_USEC)) {
1120         /* on each row, print how many usec each timestamp took to gather */
1121         struct timeval tv;
1122 
1123         timersub(&t->tv_end, &t->tv_begin, &tv);
1124         outp += sprintf(outp, "%5ld\t", tv.tv_sec * 1000000 + tv.tv_usec);
1125     }
1126 
1127     /* Time_Of_Day_Seconds: on each row, print sec.usec last timestamp taken */
1128     if (DO_BIC(BIC_TOD))
1129         outp += sprintf(outp, "%10ld.%06ld\t", t->tv_end.tv_sec, t->tv_end.tv_usec);
1130 
1131     interval_float = t->tv_delta.tv_sec + t->tv_delta.tv_usec / 1000000.0;
1132 
1133     tsc = t->tsc * tsc_tweak;
1134 
1135     /* topo columns, print blanks on 1st (average) line */
1136     if (t == &average.threads) {
1137         if (DO_BIC(BIC_Package))
1138             outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1139         if (DO_BIC(BIC_Die))
1140             outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1141         if (DO_BIC(BIC_Node))
1142             outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1143         if (DO_BIC(BIC_Core))
1144             outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1145         if (DO_BIC(BIC_CPU))
1146             outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1147         if (DO_BIC(BIC_APIC))
1148             outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1149         if (DO_BIC(BIC_X2APIC))
1150             outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1151     } else {
1152         if (DO_BIC(BIC_Package)) {
1153             if (p)
1154                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->package_id);
1155             else
1156                 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1157         }
1158         if (DO_BIC(BIC_Die)) {
1159             if (c)
1160                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), cpus[t->cpu_id].die_id);
1161             else
1162                 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1163         }
1164         if (DO_BIC(BIC_Node)) {
1165             if (t)
1166                 outp += sprintf(outp, "%s%d",
1167                         (printed++ ? delim : ""), cpus[t->cpu_id].physical_node_id);
1168             else
1169                 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1170         }
1171         if (DO_BIC(BIC_Core)) {
1172             if (c)
1173                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_id);
1174             else
1175                 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1176         }
1177         if (DO_BIC(BIC_CPU))
1178             outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->cpu_id);
1179         if (DO_BIC(BIC_APIC))
1180             outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->apic_id);
1181         if (DO_BIC(BIC_X2APIC))
1182             outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->x2apic_id);
1183     }
1184 
1185     if (DO_BIC(BIC_Avg_MHz))
1186         outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 / units * t->aperf / interval_float);
1187 
1188     if (DO_BIC(BIC_Busy))
1189         outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->mperf / tsc);
1190 
1191     if (DO_BIC(BIC_Bzy_MHz)) {
1192         if (has_base_hz)
1193             outp +=
1194                 sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf);
1195         else
1196             outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""),
1197                     tsc / units * t->aperf / t->mperf / interval_float);
1198     }
1199 
1200     if (DO_BIC(BIC_TSC_MHz))
1201         outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 * t->tsc / units / interval_float);
1202 
1203     if (DO_BIC(BIC_IPC))
1204         outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 1.0 * t->instr_count / t->aperf);
1205 
1206     /* IRQ */
1207     if (DO_BIC(BIC_IRQ)) {
1208         if (sums_need_wide_columns)
1209             outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->irq_count);
1210         else
1211             outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->irq_count);
1212     }
1213 
1214     /* SMI */
1215     if (DO_BIC(BIC_SMI))
1216         outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->smi_count);
1217 
1218     /* Added counters */
1219     for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1220         if (mp->format == FORMAT_RAW) {
1221             if (mp->width == 32)
1222                 outp +=
1223                     sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)t->counter[i]);
1224             else
1225                 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->counter[i]);
1226         } else if (mp->format == FORMAT_DELTA) {
1227             if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
1228                 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->counter[i]);
1229             else
1230                 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->counter[i]);
1231         } else if (mp->format == FORMAT_PERCENT) {
1232             if (mp->type == COUNTER_USEC)
1233                 outp +=
1234                     sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
1235                         t->counter[i] / interval_float / 10000);
1236             else
1237                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->counter[i] / tsc);
1238         }
1239     }
1240 
1241     /* C1 */
1242     if (DO_BIC(BIC_CPU_c1))
1243         outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1 / tsc);
1244 
1245     /* print per-core data only for 1st thread in core */
1246     if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1247         goto done;
1248 
1249     if (DO_BIC(BIC_CPU_c3))
1250         outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3 / tsc);
1251     if (DO_BIC(BIC_CPU_c6))
1252         outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6 / tsc);
1253     if (DO_BIC(BIC_CPU_c7))
1254         outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c7 / tsc);
1255 
1256     /* Mod%c6 */
1257     if (DO_BIC(BIC_Mod_c6))
1258         outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->mc6_us / tsc);
1259 
1260     if (DO_BIC(BIC_CoreTmp))
1261         outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_temp_c);
1262 
1263     /* Core throttle count */
1264     if (DO_BIC(BIC_CORE_THROT_CNT))
1265         outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->core_throt_cnt);
1266 
1267     for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1268         if (mp->format == FORMAT_RAW) {
1269             if (mp->width == 32)
1270                 outp +=
1271                     sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)c->counter[i]);
1272             else
1273                 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->counter[i]);
1274         } else if (mp->format == FORMAT_DELTA) {
1275             if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
1276                 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->counter[i]);
1277             else
1278                 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->counter[i]);
1279         } else if (mp->format == FORMAT_PERCENT) {
1280             outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->counter[i] / tsc);
1281         }
1282     }
1283 
1284     fmt8 = "%s%.2f";
1285 
1286     if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY))
1287         outp +=
1288             sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units / interval_float);
1289     if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY))
1290         outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units);
1291 
1292     /* print per-package data only for 1st core in package */
1293     if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1294         goto done;
1295 
1296     /* PkgTmp */
1297     if (DO_BIC(BIC_PkgTmp))
1298         outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->pkg_temp_c);
1299 
1300     /* GFXrc6 */
1301     if (DO_BIC(BIC_GFX_rc6)) {
1302         if (p->gfx_rc6_ms == -1) {  /* detect GFX counter reset */
1303             outp += sprintf(outp, "%s**.**", (printed++ ? delim : ""));
1304         } else {
1305             outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
1306                     p->gfx_rc6_ms / 10.0 / interval_float);
1307         }
1308     }
1309 
1310     /* GFXMHz */
1311     if (DO_BIC(BIC_GFXMHz))
1312         outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz);
1313 
1314     /* GFXACTMHz */
1315     if (DO_BIC(BIC_GFXACTMHz))
1316         outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_act_mhz);
1317 
1318     /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
1319     if (DO_BIC(BIC_Totl_c0))
1320         outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0 / tsc);
1321     if (DO_BIC(BIC_Any_c0))
1322         outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0 / tsc);
1323     if (DO_BIC(BIC_GFX_c0))
1324         outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0 / tsc);
1325     if (DO_BIC(BIC_CPUGFX))
1326         outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0 / tsc);
1327 
1328     if (DO_BIC(BIC_Pkgpc2))
1329         outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2 / tsc);
1330     if (DO_BIC(BIC_Pkgpc3))
1331         outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc3 / tsc);
1332     if (DO_BIC(BIC_Pkgpc6))
1333         outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc6 / tsc);
1334     if (DO_BIC(BIC_Pkgpc7))
1335         outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc7 / tsc);
1336     if (DO_BIC(BIC_Pkgpc8))
1337         outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc8 / tsc);
1338     if (DO_BIC(BIC_Pkgpc9))
1339         outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc9 / tsc);
1340     if (DO_BIC(BIC_Pkgpc10))
1341         outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10 / tsc);
1342 
1343     if (DO_BIC(BIC_CPU_LPI))
1344         outp +=
1345             sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->cpu_lpi / 1000000.0 / interval_float);
1346     if (DO_BIC(BIC_SYS_LPI))
1347         outp +=
1348             sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->sys_lpi / 1000000.0 / interval_float);
1349 
1350     if (DO_BIC(BIC_PkgWatt))
1351         outp +=
1352             sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float);
1353 
1354     if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY))
1355         outp +=
1356             sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float);
1357     if (DO_BIC(BIC_GFXWatt))
1358         outp +=
1359             sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units / interval_float);
1360     if (DO_BIC(BIC_RAMWatt))
1361         outp +=
1362             sprintf(outp, fmt8, (printed++ ? delim : ""),
1363                 p->energy_dram * rapl_dram_energy_units / interval_float);
1364     if (DO_BIC(BIC_Pkg_J))
1365         outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units);
1366     if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY))
1367         outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units);
1368     if (DO_BIC(BIC_GFX_J))
1369         outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units);
1370     if (DO_BIC(BIC_RAM_J))
1371         outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units);
1372     if (DO_BIC(BIC_PKG__))
1373         outp +=
1374             sprintf(outp, fmt8, (printed++ ? delim : ""),
1375                 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
1376     if (DO_BIC(BIC_RAM__))
1377         outp +=
1378             sprintf(outp, fmt8, (printed++ ? delim : ""),
1379                 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
1380     /* UncMHz */
1381     if (DO_BIC(BIC_UNCORE_MHZ))
1382         outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->uncore_mhz);
1383 
1384     for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1385         if (mp->format == FORMAT_RAW) {
1386             if (mp->width == 32)
1387                 outp +=
1388                     sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)p->counter[i]);
1389             else
1390                 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->counter[i]);
1391         } else if (mp->format == FORMAT_DELTA) {
1392             if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
1393                 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->counter[i]);
1394             else
1395                 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->counter[i]);
1396         } else if (mp->format == FORMAT_PERCENT) {
1397             outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->counter[i] / tsc);
1398         }
1399     }
1400 
1401 done:
1402     if (*(outp - 1) != '\n')
1403         outp += sprintf(outp, "\n");
1404 
1405     return 0;
1406 }
1407 
1408 void flush_output_stdout(void)
1409 {
1410     FILE *filep;
1411 
1412     if (outf == stderr)
1413         filep = stdout;
1414     else
1415         filep = outf;
1416 
1417     fputs(output_buffer, filep);
1418     fflush(filep);
1419 
1420     outp = output_buffer;
1421 }
1422 
1423 void flush_output_stderr(void)
1424 {
1425     fputs(output_buffer, outf);
1426     fflush(outf);
1427     outp = output_buffer;
1428 }
1429 
1430 void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1431 {
1432     static int count;
1433 
1434     if ((!count || (header_iterations && !(count % header_iterations))) || !summary_only)
1435         print_header("\t");
1436 
1437     format_counters(&average.threads, &average.cores, &average.packages);
1438 
1439     count++;
1440 
1441     if (summary_only)
1442         return;
1443 
1444     for_all_cpus(format_counters, t, c, p);
1445 }
1446 
1447 #define DELTA_WRAP32(new, old)          \
1448     old = ((((unsigned long long)new << 32) - ((unsigned long long)old << 32)) >> 32);
1449 
1450 int delta_package(struct pkg_data *new, struct pkg_data *old)
1451 {
1452     int i;
1453     struct msr_counter *mp;
1454 
1455     if (DO_BIC(BIC_Totl_c0))
1456         old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0;
1457     if (DO_BIC(BIC_Any_c0))
1458         old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0;
1459     if (DO_BIC(BIC_GFX_c0))
1460         old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0;
1461     if (DO_BIC(BIC_CPUGFX))
1462         old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0;
1463 
1464     old->pc2 = new->pc2 - old->pc2;
1465     if (DO_BIC(BIC_Pkgpc3))
1466         old->pc3 = new->pc3 - old->pc3;
1467     if (DO_BIC(BIC_Pkgpc6))
1468         old->pc6 = new->pc6 - old->pc6;
1469     if (DO_BIC(BIC_Pkgpc7))
1470         old->pc7 = new->pc7 - old->pc7;
1471     old->pc8 = new->pc8 - old->pc8;
1472     old->pc9 = new->pc9 - old->pc9;
1473     old->pc10 = new->pc10 - old->pc10;
1474     old->cpu_lpi = new->cpu_lpi - old->cpu_lpi;
1475     old->sys_lpi = new->sys_lpi - old->sys_lpi;
1476     old->pkg_temp_c = new->pkg_temp_c;
1477 
1478     /* flag an error when rc6 counter resets/wraps */
1479     if (old->gfx_rc6_ms > new->gfx_rc6_ms)
1480         old->gfx_rc6_ms = -1;
1481     else
1482         old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms;
1483 
1484     old->uncore_mhz = new->uncore_mhz;
1485     old->gfx_mhz = new->gfx_mhz;
1486     old->gfx_act_mhz = new->gfx_act_mhz;
1487 
1488     old->energy_pkg = new->energy_pkg - old->energy_pkg;
1489     old->energy_cores = new->energy_cores - old->energy_cores;
1490     old->energy_gfx = new->energy_gfx - old->energy_gfx;
1491     old->energy_dram = new->energy_dram - old->energy_dram;
1492     old->rapl_pkg_perf_status = new->rapl_pkg_perf_status - old->rapl_pkg_perf_status;
1493     old->rapl_dram_perf_status = new->rapl_dram_perf_status - old->rapl_dram_perf_status;
1494 
1495     for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1496         if (mp->format == FORMAT_RAW)
1497             old->counter[i] = new->counter[i];
1498         else
1499             old->counter[i] = new->counter[i] - old->counter[i];
1500     }
1501 
1502     return 0;
1503 }
1504 
1505 void delta_core(struct core_data *new, struct core_data *old)
1506 {
1507     int i;
1508     struct msr_counter *mp;
1509 
1510     old->c3 = new->c3 - old->c3;
1511     old->c6 = new->c6 - old->c6;
1512     old->c7 = new->c7 - old->c7;
1513     old->core_temp_c = new->core_temp_c;
1514     old->core_throt_cnt = new->core_throt_cnt;
1515     old->mc6_us = new->mc6_us - old->mc6_us;
1516 
1517     DELTA_WRAP32(new->core_energy, old->core_energy);
1518 
1519     for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1520         if (mp->format == FORMAT_RAW)
1521             old->counter[i] = new->counter[i];
1522         else
1523             old->counter[i] = new->counter[i] - old->counter[i];
1524     }
1525 }
1526 
1527 int soft_c1_residency_display(int bic)
1528 {
1529     if (!DO_BIC(BIC_CPU_c1) || use_c1_residency_msr)
1530         return 0;
1531 
1532     return DO_BIC_READ(bic);
1533 }
1534 
1535 /*
1536  * old = new - old
1537  */
1538 int delta_thread(struct thread_data *new, struct thread_data *old, struct core_data *core_delta)
1539 {
1540     int i;
1541     struct msr_counter *mp;
1542 
1543     /* we run cpuid just the 1st time, copy the results */
1544     if (DO_BIC(BIC_APIC))
1545         new->apic_id = old->apic_id;
1546     if (DO_BIC(BIC_X2APIC))
1547         new->x2apic_id = old->x2apic_id;
1548 
1549     /*
1550      * the timestamps from start of measurement interval are in "old"
1551      * the timestamp from end of measurement interval are in "new"
1552      * over-write old w/ new so we can print end of interval values
1553      */
1554 
1555     timersub(&new->tv_begin, &old->tv_begin, &old->tv_delta);
1556     old->tv_begin = new->tv_begin;
1557     old->tv_end = new->tv_end;
1558 
1559     old->tsc = new->tsc - old->tsc;
1560 
1561     /* check for TSC < 1 Mcycles over interval */
1562     if (old->tsc < (1000 * 1000))
1563         errx(-3, "Insanely slow TSC rate, TSC stops in idle?\n"
1564              "You can disable all c-states by booting with \"idle=poll\"\n"
1565              "or just the deep ones with \"processor.max_cstate=1\"");
1566 
1567     old->c1 = new->c1 - old->c1;
1568 
1569     if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || soft_c1_residency_display(BIC_Avg_MHz)) {
1570         if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) {
1571             old->aperf = new->aperf - old->aperf;
1572             old->mperf = new->mperf - old->mperf;
1573         } else {
1574             return -1;
1575         }
1576     }
1577 
1578     if (use_c1_residency_msr) {
1579         /*
1580          * Some models have a dedicated C1 residency MSR,
1581          * which should be more accurate than the derivation below.
1582          */
1583     } else {
1584         /*
1585          * As counter collection is not atomic,
1586          * it is possible for mperf's non-halted cycles + idle states
1587          * to exceed TSC's all cycles: show c1 = 0% in that case.
1588          */
1589         if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > (old->tsc * tsc_tweak))
1590             old->c1 = 0;
1591         else {
1592             /* normal case, derive c1 */
1593             old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3
1594                 - core_delta->c6 - core_delta->c7;
1595         }
1596     }
1597 
1598     if (old->mperf == 0) {
1599         if (debug > 1)
1600             fprintf(outf, "cpu%d MPERF 0!\n", old->cpu_id);
1601         old->mperf = 1; /* divide by 0 protection */
1602     }
1603 
1604     if (DO_BIC(BIC_IPC))
1605         old->instr_count = new->instr_count - old->instr_count;
1606 
1607     if (DO_BIC(BIC_IRQ))
1608         old->irq_count = new->irq_count - old->irq_count;
1609 
1610     if (DO_BIC(BIC_SMI))
1611         old->smi_count = new->smi_count - old->smi_count;
1612 
1613     for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1614         if (mp->format == FORMAT_RAW)
1615             old->counter[i] = new->counter[i];
1616         else
1617             old->counter[i] = new->counter[i] - old->counter[i];
1618     }
1619     return 0;
1620 }
1621 
1622 int delta_cpu(struct thread_data *t, struct core_data *c,
1623           struct pkg_data *p, struct thread_data *t2, struct core_data *c2, struct pkg_data *p2)
1624 {
1625     int retval = 0;
1626 
1627     /* calculate core delta only for 1st thread in core */
1628     if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE)
1629         delta_core(c, c2);
1630 
1631     /* always calculate thread delta */
1632     retval = delta_thread(t, t2, c2);   /* c2 is core delta */
1633     if (retval)
1634         return retval;
1635 
1636     /* calculate package delta only for 1st core in package */
1637     if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)
1638         retval = delta_package(p, p2);
1639 
1640     return retval;
1641 }
1642 
1643 void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1644 {
1645     int i;
1646     struct msr_counter *mp;
1647 
1648     t->tv_begin.tv_sec = 0;
1649     t->tv_begin.tv_usec = 0;
1650     t->tv_end.tv_sec = 0;
1651     t->tv_end.tv_usec = 0;
1652     t->tv_delta.tv_sec = 0;
1653     t->tv_delta.tv_usec = 0;
1654 
1655     t->tsc = 0;
1656     t->aperf = 0;
1657     t->mperf = 0;
1658     t->c1 = 0;
1659 
1660     t->instr_count = 0;
1661 
1662     t->irq_count = 0;
1663     t->smi_count = 0;
1664 
1665     /* tells format_counters to dump all fields from this set */
1666     t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE;
1667 
1668     c->c3 = 0;
1669     c->c6 = 0;
1670     c->c7 = 0;
1671     c->mc6_us = 0;
1672     c->core_temp_c = 0;
1673     c->core_energy = 0;
1674     c->core_throt_cnt = 0;
1675 
1676     p->pkg_wtd_core_c0 = 0;
1677     p->pkg_any_core_c0 = 0;
1678     p->pkg_any_gfxe_c0 = 0;
1679     p->pkg_both_core_gfxe_c0 = 0;
1680 
1681     p->pc2 = 0;
1682     if (DO_BIC(BIC_Pkgpc3))
1683         p->pc3 = 0;
1684     if (DO_BIC(BIC_Pkgpc6))
1685         p->pc6 = 0;
1686     if (DO_BIC(BIC_Pkgpc7))
1687         p->pc7 = 0;
1688     p->pc8 = 0;
1689     p->pc9 = 0;
1690     p->pc10 = 0;
1691     p->cpu_lpi = 0;
1692     p->sys_lpi = 0;
1693 
1694     p->energy_pkg = 0;
1695     p->energy_dram = 0;
1696     p->energy_cores = 0;
1697     p->energy_gfx = 0;
1698     p->rapl_pkg_perf_status = 0;
1699     p->rapl_dram_perf_status = 0;
1700     p->pkg_temp_c = 0;
1701 
1702     p->gfx_rc6_ms = 0;
1703     p->uncore_mhz = 0;
1704     p->gfx_mhz = 0;
1705     p->gfx_act_mhz = 0;
1706     for (i = 0, mp = sys.tp; mp; i++, mp = mp->next)
1707         t->counter[i] = 0;
1708 
1709     for (i = 0, mp = sys.cp; mp; i++, mp = mp->next)
1710         c->counter[i] = 0;
1711 
1712     for (i = 0, mp = sys.pp; mp; i++, mp = mp->next)
1713         p->counter[i] = 0;
1714 }
1715 
1716 int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1717 {
1718     int i;
1719     struct msr_counter *mp;
1720 
1721     /* copy un-changing apic_id's */
1722     if (DO_BIC(BIC_APIC))
1723         average.threads.apic_id = t->apic_id;
1724     if (DO_BIC(BIC_X2APIC))
1725         average.threads.x2apic_id = t->x2apic_id;
1726 
1727     /* remember first tv_begin */
1728     if (average.threads.tv_begin.tv_sec == 0)
1729         average.threads.tv_begin = t->tv_begin;
1730 
1731     /* remember last tv_end */
1732     average.threads.tv_end = t->tv_end;
1733 
1734     average.threads.tsc += t->tsc;
1735     average.threads.aperf += t->aperf;
1736     average.threads.mperf += t->mperf;
1737     average.threads.c1 += t->c1;
1738 
1739     average.threads.instr_count += t->instr_count;
1740 
1741     average.threads.irq_count += t->irq_count;
1742     average.threads.smi_count += t->smi_count;
1743 
1744     for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1745         if (mp->format == FORMAT_RAW)
1746             continue;
1747         average.threads.counter[i] += t->counter[i];
1748     }
1749 
1750     /* sum per-core values only for 1st thread in core */
1751     if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1752         return 0;
1753 
1754     average.cores.c3 += c->c3;
1755     average.cores.c6 += c->c6;
1756     average.cores.c7 += c->c7;
1757     average.cores.mc6_us += c->mc6_us;
1758 
1759     average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
1760     average.cores.core_throt_cnt = MAX(average.cores.core_throt_cnt, c->core_throt_cnt);
1761 
1762     average.cores.core_energy += c->core_energy;
1763 
1764     for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1765         if (mp->format == FORMAT_RAW)
1766             continue;
1767         average.cores.counter[i] += c->counter[i];
1768     }
1769 
1770     /* sum per-pkg values only for 1st core in pkg */
1771     if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1772         return 0;
1773 
1774     if (DO_BIC(BIC_Totl_c0))
1775         average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0;
1776     if (DO_BIC(BIC_Any_c0))
1777         average.packages.pkg_any_core_c0 += p->pkg_any_core_c0;
1778     if (DO_BIC(BIC_GFX_c0))
1779         average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0;
1780     if (DO_BIC(BIC_CPUGFX))
1781         average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0;
1782 
1783     average.packages.pc2 += p->pc2;
1784     if (DO_BIC(BIC_Pkgpc3))
1785         average.packages.pc3 += p->pc3;
1786     if (DO_BIC(BIC_Pkgpc6))
1787         average.packages.pc6 += p->pc6;
1788     if (DO_BIC(BIC_Pkgpc7))
1789         average.packages.pc7 += p->pc7;
1790     average.packages.pc8 += p->pc8;
1791     average.packages.pc9 += p->pc9;
1792     average.packages.pc10 += p->pc10;
1793 
1794     average.packages.cpu_lpi = p->cpu_lpi;
1795     average.packages.sys_lpi = p->sys_lpi;
1796 
1797     average.packages.energy_pkg += p->energy_pkg;
1798     average.packages.energy_dram += p->energy_dram;
1799     average.packages.energy_cores += p->energy_cores;
1800     average.packages.energy_gfx += p->energy_gfx;
1801 
1802     average.packages.gfx_rc6_ms = p->gfx_rc6_ms;
1803     average.packages.uncore_mhz = p->uncore_mhz;
1804     average.packages.gfx_mhz = p->gfx_mhz;
1805     average.packages.gfx_act_mhz = p->gfx_act_mhz;
1806 
1807     average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);
1808 
1809     average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status;
1810     average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status;
1811 
1812     for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1813         if (mp->format == FORMAT_RAW)
1814             continue;
1815         average.packages.counter[i] += p->counter[i];
1816     }
1817     return 0;
1818 }
1819 
1820 /*
1821  * sum the counters for all cpus in the system
1822  * compute the weighted average
1823  */
1824 void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1825 {
1826     int i;
1827     struct msr_counter *mp;
1828 
1829     clear_counters(&average.threads, &average.cores, &average.packages);
1830 
1831     for_all_cpus(sum_counters, t, c, p);
1832 
1833     /* Use the global time delta for the average. */
1834     average.threads.tv_delta = tv_delta;
1835 
1836     average.threads.tsc /= topo.num_cpus;
1837     average.threads.aperf /= topo.num_cpus;
1838     average.threads.mperf /= topo.num_cpus;
1839     average.threads.instr_count /= topo.num_cpus;
1840     average.threads.c1 /= topo.num_cpus;
1841 
1842     if (average.threads.irq_count > 9999999)
1843         sums_need_wide_columns = 1;
1844 
1845     average.cores.c3 /= topo.num_cores;
1846     average.cores.c6 /= topo.num_cores;
1847     average.cores.c7 /= topo.num_cores;
1848     average.cores.mc6_us /= topo.num_cores;
1849 
1850     if (DO_BIC(BIC_Totl_c0))
1851         average.packages.pkg_wtd_core_c0 /= topo.num_packages;
1852     if (DO_BIC(BIC_Any_c0))
1853         average.packages.pkg_any_core_c0 /= topo.num_packages;
1854     if (DO_BIC(BIC_GFX_c0))
1855         average.packages.pkg_any_gfxe_c0 /= topo.num_packages;
1856     if (DO_BIC(BIC_CPUGFX))
1857         average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages;
1858 
1859     average.packages.pc2 /= topo.num_packages;
1860     if (DO_BIC(BIC_Pkgpc3))
1861         average.packages.pc3 /= topo.num_packages;
1862     if (DO_BIC(BIC_Pkgpc6))
1863         average.packages.pc6 /= topo.num_packages;
1864     if (DO_BIC(BIC_Pkgpc7))
1865         average.packages.pc7 /= topo.num_packages;
1866 
1867     average.packages.pc8 /= topo.num_packages;
1868     average.packages.pc9 /= topo.num_packages;
1869     average.packages.pc10 /= topo.num_packages;
1870 
1871     for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1872         if (mp->format == FORMAT_RAW)
1873             continue;
1874         if (mp->type == COUNTER_ITEMS) {
1875             if (average.threads.counter[i] > 9999999)
1876                 sums_need_wide_columns = 1;
1877             continue;
1878         }
1879         average.threads.counter[i] /= topo.num_cpus;
1880     }
1881     for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1882         if (mp->format == FORMAT_RAW)
1883             continue;
1884         if (mp->type == COUNTER_ITEMS) {
1885             if (average.cores.counter[i] > 9999999)
1886                 sums_need_wide_columns = 1;
1887         }
1888         average.cores.counter[i] /= topo.num_cores;
1889     }
1890     for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1891         if (mp->format == FORMAT_RAW)
1892             continue;
1893         if (mp->type == COUNTER_ITEMS) {
1894             if (average.packages.counter[i] > 9999999)
1895                 sums_need_wide_columns = 1;
1896         }
1897         average.packages.counter[i] /= topo.num_packages;
1898     }
1899 }
1900 
1901 static unsigned long long rdtsc(void)
1902 {
1903     unsigned int low, high;
1904 
1905     asm volatile ("rdtsc":"=a" (low), "=d"(high));
1906 
1907     return low | ((unsigned long long)high) << 32;
1908 }
1909 
1910 /*
1911  * Open a file, and exit on failure
1912  */
1913 FILE *fopen_or_die(const char *path, const char *mode)
1914 {
1915     FILE *filep = fopen(path, mode);
1916 
1917     if (!filep)
1918         err(1, "%s: open failed", path);
1919     return filep;
1920 }
1921 
1922 /*
1923  * snapshot_sysfs_counter()
1924  *
1925  * return snapshot of given counter
1926  */
1927 unsigned long long snapshot_sysfs_counter(char *path)
1928 {
1929     FILE *fp;
1930     int retval;
1931     unsigned long long counter;
1932 
1933     fp = fopen_or_die(path, "r");
1934 
1935     retval = fscanf(fp, "%lld", &counter);
1936     if (retval != 1)
1937         err(1, "snapshot_sysfs_counter(%s)", path);
1938 
1939     fclose(fp);
1940 
1941     return counter;
1942 }
1943 
1944 int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
1945 {
1946     if (mp->msr_num != 0) {
1947         if (get_msr(cpu, mp->msr_num, counterp))
1948             return -1;
1949     } else {
1950         char path[128 + PATH_BYTES];
1951 
1952         if (mp->flags & SYSFS_PERCPU) {
1953             sprintf(path, "/sys/devices/system/cpu/cpu%d/%s", cpu, mp->path);
1954 
1955             *counterp = snapshot_sysfs_counter(path);
1956         } else {
1957             *counterp = snapshot_sysfs_counter(mp->path);
1958         }
1959     }
1960 
1961     return 0;
1962 }
1963 
1964 unsigned long long get_uncore_mhz(int package, int die)
1965 {
1966     char path[128];
1967 
1968     sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/current_freq_khz", package,
1969         die);
1970 
1971     return (snapshot_sysfs_counter(path) / 1000);
1972 }
1973 
1974 int get_epb(int cpu)
1975 {
1976     char path[128 + PATH_BYTES];
1977     unsigned long long msr;
1978     int ret, epb = -1;
1979     FILE *fp;
1980 
1981     sprintf(path, "/sys/devices/system/cpu/cpu%d/power/energy_perf_bias", cpu);
1982 
1983     fp = fopen(path, "r");
1984     if (!fp)
1985         goto msr_fallback;
1986 
1987     ret = fscanf(fp, "%d", &epb);
1988     if (ret != 1)
1989         err(1, "%s(%s)", __func__, path);
1990 
1991     fclose(fp);
1992 
1993     return epb;
1994 
1995 msr_fallback:
1996     get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr);
1997 
1998     return msr & 0xf;
1999 }
2000 
2001 void get_apic_id(struct thread_data *t)
2002 {
2003     unsigned int eax, ebx, ecx, edx;
2004 
2005     if (DO_BIC(BIC_APIC)) {
2006         eax = ebx = ecx = edx = 0;
2007         __cpuid(1, eax, ebx, ecx, edx);
2008 
2009         t->apic_id = (ebx >> 24) & 0xff;
2010     }
2011 
2012     if (!DO_BIC(BIC_X2APIC))
2013         return;
2014 
2015     if (authentic_amd || hygon_genuine) {
2016         unsigned int topology_extensions;
2017 
2018         if (max_extended_level < 0x8000001e)
2019             return;
2020 
2021         eax = ebx = ecx = edx = 0;
2022         __cpuid(0x80000001, eax, ebx, ecx, edx);
2023         topology_extensions = ecx & (1 << 22);
2024 
2025         if (topology_extensions == 0)
2026             return;
2027 
2028         eax = ebx = ecx = edx = 0;
2029         __cpuid(0x8000001e, eax, ebx, ecx, edx);
2030 
2031         t->x2apic_id = eax;
2032         return;
2033     }
2034 
2035     if (!genuine_intel)
2036         return;
2037 
2038     if (max_level < 0xb)
2039         return;
2040 
2041     ecx = 0;
2042     __cpuid(0xb, eax, ebx, ecx, edx);
2043     t->x2apic_id = edx;
2044 
2045     if (debug && (t->apic_id != (t->x2apic_id & 0xff)))
2046         fprintf(outf, "cpu%d: BIOS BUG: apic 0x%x x2apic 0x%x\n", t->cpu_id, t->apic_id, t->x2apic_id);
2047 }
2048 
2049 int get_core_throt_cnt(int cpu, unsigned long long *cnt)
2050 {
2051     char path[128 + PATH_BYTES];
2052     unsigned long long tmp;
2053     FILE *fp;
2054     int ret;
2055 
2056     sprintf(path, "/sys/devices/system/cpu/cpu%d/thermal_throttle/core_throttle_count", cpu);
2057     fp = fopen(path, "r");
2058     if (!fp)
2059         return -1;
2060     ret = fscanf(fp, "%lld", &tmp);
2061     fclose(fp);
2062     if (ret != 1)
2063         return -1;
2064     *cnt = tmp;
2065 
2066     return 0;
2067 }
2068 
2069 /*
2070  * get_counters(...)
2071  * migrate to cpu
2072  * acquire and record local counters for that cpu
2073  */
2074 int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2075 {
2076     int cpu = t->cpu_id;
2077     unsigned long long msr;
2078     int aperf_mperf_retry_count = 0;
2079     struct msr_counter *mp;
2080     int i;
2081 
2082     if (cpu_migrate(cpu)) {
2083         fprintf(outf, "get_counters: Could not migrate to CPU %d\n", cpu);
2084         return -1;
2085     }
2086 
2087     gettimeofday(&t->tv_begin, (struct timezone *)NULL);
2088 
2089     if (first_counter_read)
2090         get_apic_id(t);
2091 retry:
2092     t->tsc = rdtsc();   /* we are running on local CPU of interest */
2093 
2094     if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || soft_c1_residency_display(BIC_Avg_MHz)) {
2095         unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
2096 
2097         /*
2098          * The TSC, APERF and MPERF must be read together for
2099          * APERF/MPERF and MPERF/TSC to give accurate results.
2100          *
2101          * Unfortunately, APERF and MPERF are read by
2102          * individual system call, so delays may occur
2103          * between them.  If the time to read them
2104          * varies by a large amount, we re-read them.
2105          */
2106 
2107         /*
2108          * This initial dummy APERF read has been seen to
2109          * reduce jitter in the subsequent reads.
2110          */
2111 
2112         if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
2113             return -3;
2114 
2115         t->tsc = rdtsc();   /* re-read close to APERF */
2116 
2117         tsc_before = t->tsc;
2118 
2119         if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
2120             return -3;
2121 
2122         tsc_between = rdtsc();
2123 
2124         if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
2125             return -4;
2126 
2127         tsc_after = rdtsc();
2128 
2129         aperf_time = tsc_between - tsc_before;
2130         mperf_time = tsc_after - tsc_between;
2131 
2132         /*
2133          * If the system call latency to read APERF and MPERF
2134          * differ by more than 2x, then try again.
2135          */
2136         if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) {
2137             aperf_mperf_retry_count++;
2138             if (aperf_mperf_retry_count < 5)
2139                 goto retry;
2140             else
2141                 warnx("cpu%d jitter %lld %lld", cpu, aperf_time, mperf_time);
2142         }
2143         aperf_mperf_retry_count = 0;
2144 
2145         t->aperf = t->aperf * aperf_mperf_multiplier;
2146         t->mperf = t->mperf * aperf_mperf_multiplier;
2147     }
2148 
2149     if (DO_BIC(BIC_IPC))
2150         if (read(get_instr_count_fd(cpu), &t->instr_count, sizeof(long long)) != sizeof(long long))
2151             return -4;
2152 
2153     if (DO_BIC(BIC_IRQ))
2154         t->irq_count = irqs_per_cpu[cpu];
2155     if (DO_BIC(BIC_SMI)) {
2156         if (get_msr(cpu, MSR_SMI_COUNT, &msr))
2157             return -5;
2158         t->smi_count = msr & 0xFFFFFFFF;
2159     }
2160     if (DO_BIC(BIC_CPU_c1) && use_c1_residency_msr) {
2161         if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
2162             return -6;
2163     }
2164 
2165     for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
2166         if (get_mp(cpu, mp, &t->counter[i]))
2167             return -10;
2168     }
2169 
2170     /* collect core counters only for 1st thread in core */
2171     if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
2172         goto done;
2173 
2174     if (DO_BIC(BIC_CPU_c3) || soft_c1_residency_display(BIC_CPU_c3)) {
2175         if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
2176             return -6;
2177     }
2178 
2179     if ((DO_BIC(BIC_CPU_c6) || soft_c1_residency_display(BIC_CPU_c6)) && !do_knl_cstates) {
2180         if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
2181             return -7;
2182     } else if (do_knl_cstates || soft_c1_residency_display(BIC_CPU_c6)) {
2183         if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6))
2184             return -7;
2185     }
2186 
2187     if (DO_BIC(BIC_CPU_c7) || soft_c1_residency_display(BIC_CPU_c7)) {
2188         if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7))
2189             return -8;
2190         else if (t->is_atom) {
2191             /*
2192              * For Atom CPUs that has core cstate deeper than c6,
2193              * MSR_CORE_C6_RESIDENCY returns residency of cc6 and deeper.
2194              * Minus CC7 (and deeper cstates) residency to get
2195              * accturate cc6 residency.
2196              */
2197             c->c6 -= c->c7;
2198         }
2199     }
2200 
2201     if (DO_BIC(BIC_Mod_c6))
2202         if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us))
2203             return -8;
2204 
2205     if (DO_BIC(BIC_CoreTmp)) {
2206         if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
2207             return -9;
2208         c->core_temp_c = tj_max - ((msr >> 16) & 0x7F);
2209     }
2210 
2211     if (DO_BIC(BIC_CORE_THROT_CNT))
2212         get_core_throt_cnt(cpu, &c->core_throt_cnt);
2213 
2214     if (do_rapl & RAPL_AMD_F17H) {
2215         if (get_msr(cpu, MSR_CORE_ENERGY_STAT, &msr))
2216             return -14;
2217         c->core_energy = msr & 0xFFFFFFFF;
2218     }
2219 
2220     for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
2221         if (get_mp(cpu, mp, &c->counter[i]))
2222             return -10;
2223     }
2224 
2225     /* collect package counters only for 1st core in package */
2226     if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
2227         goto done;
2228 
2229     if (DO_BIC(BIC_Totl_c0)) {
2230         if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0))
2231             return -10;
2232     }
2233     if (DO_BIC(BIC_Any_c0)) {
2234         if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0))
2235             return -11;
2236     }
2237     if (DO_BIC(BIC_GFX_c0)) {
2238         if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0))
2239             return -12;
2240     }
2241     if (DO_BIC(BIC_CPUGFX)) {
2242         if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0))
2243             return -13;
2244     }
2245     if (DO_BIC(BIC_Pkgpc3))
2246         if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
2247             return -9;
2248     if (DO_BIC(BIC_Pkgpc6)) {
2249         if (do_slm_cstates) {
2250             if (get_msr(cpu, MSR_ATOM_PKG_C6_RESIDENCY, &p->pc6))
2251                 return -10;
2252         } else {
2253             if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6))
2254                 return -10;
2255         }
2256     }
2257 
2258     if (DO_BIC(BIC_Pkgpc2))
2259         if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2))
2260             return -11;
2261     if (DO_BIC(BIC_Pkgpc7))
2262         if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7))
2263             return -12;
2264     if (DO_BIC(BIC_Pkgpc8))
2265         if (get_msr(cpu, MSR_PKG_C8_RESIDENCY, &p->pc8))
2266             return -13;
2267     if (DO_BIC(BIC_Pkgpc9))
2268         if (get_msr(cpu, MSR_PKG_C9_RESIDENCY, &p->pc9))
2269             return -13;
2270     if (DO_BIC(BIC_Pkgpc10))
2271         if (get_msr(cpu, MSR_PKG_C10_RESIDENCY, &p->pc10))
2272             return -13;
2273 
2274     if (DO_BIC(BIC_CPU_LPI))
2275         p->cpu_lpi = cpuidle_cur_cpu_lpi_us;
2276     if (DO_BIC(BIC_SYS_LPI))
2277         p->sys_lpi = cpuidle_cur_sys_lpi_us;
2278 
2279     if (do_rapl & RAPL_PKG) {
2280         if (get_msr_sum(cpu, MSR_PKG_ENERGY_STATUS, &msr))
2281             return -13;
2282         p->energy_pkg = msr;
2283     }
2284     if (do_rapl & RAPL_CORES_ENERGY_STATUS) {
2285         if (get_msr_sum(cpu, MSR_PP0_ENERGY_STATUS, &msr))
2286             return -14;
2287         p->energy_cores = msr;
2288     }
2289     if (do_rapl & RAPL_DRAM) {
2290         if (get_msr_sum(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
2291             return -15;
2292         p->energy_dram = msr;
2293     }
2294     if (do_rapl & RAPL_GFX) {
2295         if (get_msr_sum(cpu, MSR_PP1_ENERGY_STATUS, &msr))
2296             return -16;
2297         p->energy_gfx = msr;
2298     }
2299     if (do_rapl & RAPL_PKG_PERF_STATUS) {
2300         if (get_msr_sum(cpu, MSR_PKG_PERF_STATUS, &msr))
2301             return -16;
2302         p->rapl_pkg_perf_status = msr;
2303     }
2304     if (do_rapl & RAPL_DRAM_PERF_STATUS) {
2305         if (get_msr_sum(cpu, MSR_DRAM_PERF_STATUS, &msr))
2306             return -16;
2307         p->rapl_dram_perf_status = msr;
2308     }
2309     if (do_rapl & RAPL_AMD_F17H) {
2310         if (get_msr_sum(cpu, MSR_PKG_ENERGY_STAT, &msr))
2311             return -13;
2312         p->energy_pkg = msr;
2313     }
2314     if (DO_BIC(BIC_PkgTmp)) {
2315         if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
2316             return -17;
2317         p->pkg_temp_c = tj_max - ((msr >> 16) & 0x7F);
2318     }
2319 
2320     if (DO_BIC(BIC_GFX_rc6))
2321         p->gfx_rc6_ms = gfx_cur_rc6_ms;
2322 
2323     /* n.b. assume die0 uncore frequency applies to whole package */
2324     if (DO_BIC(BIC_UNCORE_MHZ))
2325         p->uncore_mhz = get_uncore_mhz(p->package_id, 0);
2326 
2327     if (DO_BIC(BIC_GFXMHz))
2328         p->gfx_mhz = gfx_cur_mhz;
2329 
2330     if (DO_BIC(BIC_GFXACTMHz))
2331         p->gfx_act_mhz = gfx_act_mhz;
2332 
2333     for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
2334         if (get_mp(cpu, mp, &p->counter[i]))
2335             return -10;
2336     }
2337 done:
2338     gettimeofday(&t->tv_end, (struct timezone *)NULL);
2339 
2340     return 0;
2341 }
2342 
2343 /*
2344  * MSR_PKG_CST_CONFIG_CONTROL decoding for pkg_cstate_limit:
2345  * If you change the values, note they are used both in comparisons
2346  * (>= PCL__7) and to index pkg_cstate_limit_strings[].
2347  */
2348 
2349 #define PCLUKN 0        /* Unknown */
2350 #define PCLRSV 1        /* Reserved */
2351 #define PCL__0 2        /* PC0 */
2352 #define PCL__1 3        /* PC1 */
2353 #define PCL__2 4        /* PC2 */
2354 #define PCL__3 5        /* PC3 */
2355 #define PCL__4 6        /* PC4 */
2356 #define PCL__6 7        /* PC6 */
2357 #define PCL_6N 8        /* PC6 No Retention */
2358 #define PCL_6R 9        /* PC6 Retention */
2359 #define PCL__7 10       /* PC7 */
2360 #define PCL_7S 11       /* PC7 Shrink */
2361 #define PCL__8 12       /* PC8 */
2362 #define PCL__9 13       /* PC9 */
2363 #define PCL_10 14       /* PC10 */
2364 #define PCLUNL 15       /* Unlimited */
2365 
2366 int pkg_cstate_limit = PCLUKN;
2367 char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2",
2368     "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "pc10", "unlimited"
2369 };
2370 
2371 int nhm_pkg_cstate_limits[16] =
2372     { PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
2373     PCLRSV, PCLRSV
2374 };
2375 
2376 int snb_pkg_cstate_limits[16] =
2377     { PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
2378     PCLRSV, PCLRSV
2379 };
2380 
2381 int hsw_pkg_cstate_limits[16] =
2382     { PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
2383     PCLRSV, PCLRSV
2384 };
2385 
2386 int slv_pkg_cstate_limits[16] =
2387     { PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
2388     PCL__6, PCL__7
2389 };
2390 
2391 int amt_pkg_cstate_limits[16] =
2392     { PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
2393     PCLRSV, PCLRSV
2394 };
2395 
2396 int phi_pkg_cstate_limits[16] =
2397     { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
2398     PCLRSV, PCLRSV
2399 };
2400 
2401 int glm_pkg_cstate_limits[16] =
2402     { PCLUNL, PCL__1, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCL_10, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
2403     PCLRSV, PCLRSV
2404 };
2405 
2406 int skx_pkg_cstate_limits[16] =
2407     { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
2408     PCLRSV, PCLRSV
2409 };
2410 
2411 int icx_pkg_cstate_limits[16] =
2412     { PCL__0, PCL__2, PCL__6, PCL__6, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
2413     PCLRSV, PCLRSV
2414 };
2415 
2416 static void calculate_tsc_tweak()
2417 {
2418     tsc_tweak = base_hz / tsc_hz;
2419 }
2420 
2421 void prewake_cstate_probe(unsigned int family, unsigned int model);
2422 
2423 static void dump_nhm_platform_info(void)
2424 {
2425     unsigned long long msr;
2426     unsigned int ratio;
2427 
2428     get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
2429 
2430     fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
2431 
2432     ratio = (msr >> 40) & 0xFF;
2433     fprintf(outf, "%d * %.1f = %.1f MHz max efficiency frequency\n", ratio, bclk, ratio * bclk);
2434 
2435     ratio = (msr >> 8) & 0xFF;
2436     fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk);
2437 
2438     get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
2439     fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
2440         base_cpu, msr, msr & 0x2 ? "EN" : "DIS");
2441 
2442     /* C-state Pre-wake Disable (CSTATE_PREWAKE_DISABLE) */
2443     if (dis_cstate_prewake)
2444         fprintf(outf, "C-state Pre-wake: %sabled\n", msr & 0x40000000 ? "DIS" : "EN");
2445 
2446     return;
2447 }
2448 
2449 static void dump_hsw_turbo_ratio_limits(void)
2450 {
2451     unsigned long long msr;
2452     unsigned int ratio;
2453 
2454     get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr);
2455 
2456     fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr);
2457 
2458     ratio = (msr >> 8) & 0xFF;
2459     if (ratio)
2460         fprintf(outf, "%d * %.1f = %.1f MHz max turbo 18 active cores\n", ratio, bclk, ratio * bclk);
2461 
2462     ratio = (msr >> 0) & 0xFF;
2463     if (ratio)
2464         fprintf(outf, "%d * %.1f = %.1f MHz max turbo 17 active cores\n", ratio, bclk, ratio * bclk);
2465     return;
2466 }
2467 
2468 static void dump_ivt_turbo_ratio_limits(void)
2469 {
2470     unsigned long long msr;
2471     unsigned int ratio;
2472 
2473     get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr);
2474 
2475     fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr);
2476 
2477     ratio = (msr >> 56) & 0xFF;
2478     if (ratio)
2479         fprintf(outf, "%d * %.1f = %.1f MHz max turbo 16 active cores\n", ratio, bclk, ratio * bclk);
2480 
2481     ratio = (msr >> 48) & 0xFF;
2482     if (ratio)
2483         fprintf(outf, "%d * %.1f = %.1f MHz max turbo 15 active cores\n", ratio, bclk, ratio * bclk);
2484 
2485     ratio = (msr >> 40) & 0xFF;
2486     if (ratio)
2487         fprintf(outf, "%d * %.1f = %.1f MHz max turbo 14 active cores\n", ratio, bclk, ratio * bclk);
2488 
2489     ratio = (msr >> 32) & 0xFF;
2490     if (ratio)
2491         fprintf(outf, "%d * %.1f = %.1f MHz max turbo 13 active cores\n", ratio, bclk, ratio * bclk);
2492 
2493     ratio = (msr >> 24) & 0xFF;
2494     if (ratio)
2495         fprintf(outf, "%d * %.1f = %.1f MHz max turbo 12 active cores\n", ratio, bclk, ratio * bclk);
2496 
2497     ratio = (msr >> 16) & 0xFF;
2498     if (ratio)
2499         fprintf(outf, "%d * %.1f = %.1f MHz max turbo 11 active cores\n", ratio, bclk, ratio * bclk);
2500 
2501     ratio = (msr >> 8) & 0xFF;
2502     if (ratio)
2503         fprintf(outf, "%d * %.1f = %.1f MHz max turbo 10 active cores\n", ratio, bclk, ratio * bclk);
2504 
2505     ratio = (msr >> 0) & 0xFF;
2506     if (ratio)
2507         fprintf(outf, "%d * %.1f = %.1f MHz max turbo 9 active cores\n", ratio, bclk, ratio * bclk);
2508     return;
2509 }
2510 
2511 int has_turbo_ratio_group_limits(int family, int model)
2512 {
2513 
2514     if (!genuine_intel)
2515         return 0;
2516 
2517     if (family != 6)
2518         return 0;
2519 
2520     switch (model) {
2521     case INTEL_FAM6_ATOM_GOLDMONT:
2522     case INTEL_FAM6_SKYLAKE_X:
2523     case INTEL_FAM6_ICELAKE_X:
2524     case INTEL_FAM6_SAPPHIRERAPIDS_X:
2525     case INTEL_FAM6_ATOM_GOLDMONT_D:
2526     case INTEL_FAM6_ATOM_TREMONT_D:
2527         return 1;
2528     default:
2529         return 0;
2530     }
2531 }
2532 
2533 static void dump_turbo_ratio_limits(int trl_msr_offset, int family, int model)
2534 {
2535     unsigned long long msr, core_counts;
2536     int shift;
2537 
2538     get_msr(base_cpu, trl_msr_offset, &msr);
2539     fprintf(outf, "cpu%d: MSR_%sTURBO_RATIO_LIMIT: 0x%08llx\n",
2540         base_cpu, trl_msr_offset == MSR_SECONDARY_TURBO_RATIO_LIMIT ? "SECONDARY" : "", msr);
2541 
2542     if (has_turbo_ratio_group_limits(family, model)) {
2543         get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &core_counts);
2544         fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, core_counts);
2545     } else {
2546         core_counts = 0x0807060504030201;
2547     }
2548 
2549     for (shift = 56; shift >= 0; shift -= 8) {
2550         unsigned int ratio, group_size;
2551 
2552         ratio = (msr >> shift) & 0xFF;
2553         group_size = (core_counts >> shift) & 0xFF;
2554         if (ratio)
2555             fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2556                 ratio, bclk, ratio * bclk, group_size);
2557     }
2558 
2559     return;
2560 }
2561 
2562 static void dump_atom_turbo_ratio_limits(void)
2563 {
2564     unsigned long long msr;
2565     unsigned int ratio;
2566 
2567     get_msr(base_cpu, MSR_ATOM_CORE_RATIOS, &msr);
2568     fprintf(outf, "cpu%d: MSR_ATOM_CORE_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
2569 
2570     ratio = (msr >> 0) & 0x3F;
2571     if (ratio)
2572         fprintf(outf, "%d * %.1f = %.1f MHz minimum operating frequency\n", ratio, bclk, ratio * bclk);
2573 
2574     ratio = (msr >> 8) & 0x3F;
2575     if (ratio)
2576         fprintf(outf, "%d * %.1f = %.1f MHz low frequency mode (LFM)\n", ratio, bclk, ratio * bclk);
2577 
2578     ratio = (msr >> 16) & 0x3F;
2579     if (ratio)
2580         fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk);
2581 
2582     get_msr(base_cpu, MSR_ATOM_CORE_TURBO_RATIOS, &msr);
2583     fprintf(outf, "cpu%d: MSR_ATOM_CORE_TURBO_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
2584 
2585     ratio = (msr >> 24) & 0x3F;
2586     if (ratio)
2587         fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n", ratio, bclk, ratio * bclk);
2588 
2589     ratio = (msr >> 16) & 0x3F;
2590     if (ratio)
2591         fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n", ratio, bclk, ratio * bclk);
2592 
2593     ratio = (msr >> 8) & 0x3F;
2594     if (ratio)
2595         fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n", ratio, bclk, ratio * bclk);
2596 
2597     ratio = (msr >> 0) & 0x3F;
2598     if (ratio)
2599         fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active core\n", ratio, bclk, ratio * bclk);
2600 }
2601 
2602 static void dump_knl_turbo_ratio_limits(void)
2603 {
2604     const unsigned int buckets_no = 7;
2605 
2606     unsigned long long msr;
2607     int delta_cores, delta_ratio;
2608     int i, b_nr;
2609     unsigned int cores[buckets_no];
2610     unsigned int ratio[buckets_no];
2611 
2612     get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
2613 
2614     fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr);
2615 
2616     /*
2617      * Turbo encoding in KNL is as follows:
2618      * [0] -- Reserved
2619      * [7:1] -- Base value of number of active cores of bucket 1.
2620      * [15:8] -- Base value of freq ratio of bucket 1.
2621      * [20:16] -- +ve delta of number of active cores of bucket 2.
2622      * i.e. active cores of bucket 2 =
2623      * active cores of bucket 1 + delta
2624      * [23:21] -- Negative delta of freq ratio of bucket 2.
2625      * i.e. freq ratio of bucket 2 =
2626      * freq ratio of bucket 1 - delta
2627      * [28:24]-- +ve delta of number of active cores of bucket 3.
2628      * [31:29]-- -ve delta of freq ratio of bucket 3.
2629      * [36:32]-- +ve delta of number of active cores of bucket 4.
2630      * [39:37]-- -ve delta of freq ratio of bucket 4.
2631      * [44:40]-- +ve delta of number of active cores of bucket 5.
2632      * [47:45]-- -ve delta of freq ratio of bucket 5.
2633      * [52:48]-- +ve delta of number of active cores of bucket 6.
2634      * [55:53]-- -ve delta of freq ratio of bucket 6.
2635      * [60:56]-- +ve delta of number of active cores of bucket 7.
2636      * [63:61]-- -ve delta of freq ratio of bucket 7.
2637      */
2638 
2639     b_nr = 0;
2640     cores[b_nr] = (msr & 0xFF) >> 1;
2641     ratio[b_nr] = (msr >> 8) & 0xFF;
2642 
2643     for (i = 16; i < 64; i += 8) {
2644         delta_cores = (msr >> i) & 0x1F;
2645         delta_ratio = (msr >> (i + 5)) & 0x7;
2646 
2647         cores[b_nr + 1] = cores[b_nr] + delta_cores;
2648         ratio[b_nr + 1] = ratio[b_nr] - delta_ratio;
2649         b_nr++;
2650     }
2651 
2652     for (i = buckets_no - 1; i >= 0; i--)
2653         if (i > 0 ? ratio[i] != ratio[i - 1] : 1)
2654             fprintf(outf,
2655                 "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2656                 ratio[i], bclk, ratio[i] * bclk, cores[i]);
2657 }
2658 
2659 static void dump_nhm_cst_cfg(void)
2660 {
2661     unsigned long long msr;
2662 
2663     get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
2664 
2665     fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr);
2666 
2667     fprintf(outf, " (%s%s%s%s%slocked, pkg-cstate-limit=%d (%s)",
2668         (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
2669         (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
2670         (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
2671         (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "",
2672         (msr & (1 << 15)) ? "" : "UN", (unsigned int)msr & 0xF, pkg_cstate_limit_strings[pkg_cstate_limit]);
2673 
2674 #define AUTOMATIC_CSTATE_CONVERSION     (1UL << 16)
2675     if (has_automatic_cstate_conversion) {
2676         fprintf(outf, ", automatic c-state conversion=%s", (msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off");
2677     }
2678 
2679     fprintf(outf, ")\n");
2680 
2681     return;
2682 }
2683 
2684 static void dump_config_tdp(void)
2685 {
2686     unsigned long long msr;
2687 
2688     get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr);
2689     fprintf(outf, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr);
2690     fprintf(outf, " (base_ratio=%d)\n", (unsigned int)msr & 0xFF);
2691 
2692     get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr);
2693     fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr);
2694     if (msr) {
2695         fprintf(outf, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
2696         fprintf(outf, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
2697         fprintf(outf, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
2698         fprintf(outf, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0x7FFF);
2699     }
2700     fprintf(outf, ")\n");
2701 
2702     get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr);
2703     fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr);
2704     if (msr) {
2705         fprintf(outf, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
2706         fprintf(outf, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
2707         fprintf(outf, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
2708         fprintf(outf, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0x7FFF);
2709     }
2710     fprintf(outf, ")\n");
2711 
2712     get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr);
2713     fprintf(outf, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr);
2714     if ((msr) & 0x3)
2715         fprintf(outf, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3);
2716     fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
2717     fprintf(outf, ")\n");
2718 
2719     get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr);
2720     fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr);
2721     fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xFF);
2722     fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
2723     fprintf(outf, ")\n");
2724 }
2725 
2726 unsigned int irtl_time_units[] = { 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 };
2727 
2728 void print_irtl(void)
2729 {
2730     unsigned long long msr;
2731 
2732     get_msr(base_cpu, MSR_PKGC3_IRTL, &msr);
2733     fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr);
2734     fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2735         (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2736 
2737     get_msr(base_cpu, MSR_PKGC6_IRTL, &msr);
2738     fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr);
2739     fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2740         (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2741 
2742     get_msr(base_cpu, MSR_PKGC7_IRTL, &msr);
2743     fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr);
2744     fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2745         (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2746 
2747     if (!do_irtl_hsw)
2748         return;
2749 
2750     get_msr(base_cpu, MSR_PKGC8_IRTL, &msr);
2751     fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr);
2752     fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2753         (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2754 
2755     get_msr(base_cpu, MSR_PKGC9_IRTL, &msr);
2756     fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr);
2757     fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2758         (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2759 
2760     get_msr(base_cpu, MSR_PKGC10_IRTL, &msr);
2761     fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr);
2762     fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2763         (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2764 
2765 }
2766 
2767 void free_fd_percpu(void)
2768 {
2769     int i;
2770 
2771     for (i = 0; i < topo.max_cpu_num + 1; ++i) {
2772         if (fd_percpu[i] != 0)
2773             close(fd_percpu[i]);
2774     }
2775 
2776     free(fd_percpu);
2777 }
2778 
2779 void free_all_buffers(void)
2780 {
2781     int i;
2782 
2783     CPU_FREE(cpu_present_set);
2784     cpu_present_set = NULL;
2785     cpu_present_setsize = 0;
2786 
2787     CPU_FREE(cpu_affinity_set);
2788     cpu_affinity_set = NULL;
2789     cpu_affinity_setsize = 0;
2790 
2791     free(thread_even);
2792     free(core_even);
2793     free(package_even);
2794 
2795     thread_even = NULL;
2796     core_even = NULL;
2797     package_even = NULL;
2798 
2799     free(thread_odd);
2800     free(core_odd);
2801     free(package_odd);
2802 
2803     thread_odd = NULL;
2804     core_odd = NULL;
2805     package_odd = NULL;
2806 
2807     free(output_buffer);
2808     output_buffer = NULL;
2809     outp = NULL;
2810 
2811     free_fd_percpu();
2812 
2813     free(irq_column_2_cpu);
2814     free(irqs_per_cpu);
2815 
2816     for (i = 0; i <= topo.max_cpu_num; ++i) {
2817         if (cpus[i].put_ids)
2818             CPU_FREE(cpus[i].put_ids);
2819     }
2820     free(cpus);
2821 }
2822 
2823 /*
2824  * Parse a file containing a single int.
2825  * Return 0 if file can not be opened
2826  * Exit if file can be opened, but can not be parsed
2827  */
2828 int parse_int_file(const char *fmt, ...)
2829 {
2830     va_list args;
2831     char path[PATH_MAX];
2832     FILE *filep;
2833     int value;
2834 
2835     va_start(args, fmt);
2836     vsnprintf(path, sizeof(path), fmt, args);
2837     va_end(args);
2838     filep = fopen(path, "r");
2839     if (!filep)
2840         return 0;
2841     if (fscanf(filep, "%d", &value) != 1)
2842         err(1, "%s: failed to parse number from file", path);
2843     fclose(filep);
2844     return value;
2845 }
2846 
2847 /*
2848  * cpu_is_first_core_in_package(cpu)
2849  * return 1 if given CPU is 1st core in package
2850  */
2851 int cpu_is_first_core_in_package(int cpu)
2852 {
2853     return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu);
2854 }
2855 
2856 int get_physical_package_id(int cpu)
2857 {
2858     return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu);
2859 }
2860 
2861 int get_die_id(int cpu)
2862 {
2863     return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/die_id", cpu);
2864 }
2865 
2866 int get_core_id(int cpu)
2867 {
2868     return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
2869 }
2870 
2871 void set_node_data(void)
2872 {
2873     int pkg, node, lnode, cpu, cpux;
2874     int cpu_count;
2875 
2876     /* initialize logical_node_id */
2877     for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu)
2878         cpus[cpu].logical_node_id = -1;
2879 
2880     cpu_count = 0;
2881     for (pkg = 0; pkg < topo.num_packages; pkg++) {
2882         lnode = 0;
2883         for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) {
2884             if (cpus[cpu].physical_package_id != pkg)
2885                 continue;
2886             /* find a cpu with an unset logical_node_id */
2887             if (cpus[cpu].logical_node_id != -1)
2888                 continue;
2889             cpus[cpu].logical_node_id = lnode;
2890             node = cpus[cpu].physical_node_id;
2891             cpu_count++;
2892             /*
2893              * find all matching cpus on this pkg and set
2894              * the logical_node_id
2895              */
2896             for (cpux = cpu; cpux <= topo.max_cpu_num; cpux++) {
2897                 if ((cpus[cpux].physical_package_id == pkg) && (cpus[cpux].physical_node_id == node)) {
2898                     cpus[cpux].logical_node_id = lnode;
2899                     cpu_count++;
2900                 }
2901             }
2902             lnode++;
2903             if (lnode > topo.nodes_per_pkg)
2904                 topo.nodes_per_pkg = lnode;
2905         }
2906         if (cpu_count >= topo.max_cpu_num)
2907             break;
2908     }
2909 }
2910 
2911 int get_physical_node_id(struct cpu_topology *thiscpu)
2912 {
2913     char path[80];
2914     FILE *filep;
2915     int i;
2916     int cpu = thiscpu->logical_cpu_id;
2917 
2918     for (i = 0; i <= topo.max_cpu_num; i++) {
2919         sprintf(path, "/sys/devices/system/cpu/cpu%d/node%i/cpulist", cpu, i);
2920         filep = fopen(path, "r");
2921         if (!filep)
2922             continue;
2923         fclose(filep);
2924         return i;
2925     }
2926     return -1;
2927 }
2928 
2929 int get_thread_siblings(struct cpu_topology *thiscpu)
2930 {
2931     char path[80], character;
2932     FILE *filep;
2933     unsigned long map;
2934     int so, shift, sib_core;
2935     int cpu = thiscpu->logical_cpu_id;
2936     int offset = topo.max_cpu_num + 1;
2937     size_t size;
2938     int thread_id = 0;
2939 
2940     thiscpu->put_ids = CPU_ALLOC((topo.max_cpu_num + 1));
2941     if (thiscpu->thread_id < 0)
2942         thiscpu->thread_id = thread_id++;
2943     if (!thiscpu->put_ids)
2944         return -1;
2945 
2946     size = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
2947     CPU_ZERO_S(size, thiscpu->put_ids);
2948 
2949     sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu);
2950     filep = fopen(path, "r");
2951 
2952     if (!filep) {
2953         warnx("%s: open failed", path);
2954         return -1;
2955     }
2956     do {
2957         offset -= BITMASK_SIZE;
2958         if (fscanf(filep, "%lx%c", &map, &character) != 2)
2959             err(1, "%s: failed to parse file", path);
2960         for (shift = 0; shift < BITMASK_SIZE; shift++) {
2961             if ((map >> shift) & 0x1) {
2962                 so = shift + offset;
2963                 sib_core = get_core_id(so);
2964                 if (sib_core == thiscpu->physical_core_id) {
2965                     CPU_SET_S(so, size, thiscpu->put_ids);
2966                     if ((so != cpu) && (cpus[so].thread_id < 0))
2967                         cpus[so].thread_id = thread_id++;
2968                 }
2969             }
2970         }
2971     } while (character == ',');
2972     fclose(filep);
2973 
2974     return CPU_COUNT_S(size, thiscpu->put_ids);
2975 }
2976 
2977 /*
2978  * run func(thread, core, package) in topology order
2979  * skip non-present cpus
2980  */
2981 
2982 int for_all_cpus_2(int (func) (struct thread_data *, struct core_data *,
2983                    struct pkg_data *, struct thread_data *, struct core_data *,
2984                    struct pkg_data *), struct thread_data *thread_base,
2985            struct core_data *core_base, struct pkg_data *pkg_base,
2986            struct thread_data *thread_base2, struct core_data *core_base2, struct pkg_data *pkg_base2)
2987 {
2988     int retval, pkg_no, node_no, core_no, thread_no;
2989 
2990     for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
2991         for (node_no = 0; node_no < topo.nodes_per_pkg; ++node_no) {
2992             for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
2993                 for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) {
2994                     struct thread_data *t, *t2;
2995                     struct core_data *c, *c2;
2996                     struct pkg_data *p, *p2;
2997 
2998                     t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no);
2999 
3000                     if (cpu_is_not_present(t->cpu_id))
3001                         continue;
3002 
3003                     t2 = GET_THREAD(thread_base2, thread_no, core_no, node_no, pkg_no);
3004 
3005                     c = GET_CORE(core_base, core_no, node_no, pkg_no);
3006                     c2 = GET_CORE(core_base2, core_no, node_no, pkg_no);
3007 
3008                     p = GET_PKG(pkg_base, pkg_no);
3009                     p2 = GET_PKG(pkg_base2, pkg_no);
3010 
3011                     retval = func(t, c, p, t2, c2, p2);
3012                     if (retval)
3013                         return retval;
3014                 }
3015             }
3016         }
3017     }
3018     return 0;
3019 }
3020 
3021 /*
3022  * run func(cpu) on every cpu in /proc/stat
3023  * return max_cpu number
3024  */
3025 int for_all_proc_cpus(int (func) (int))
3026 {
3027     FILE *fp;
3028     int cpu_num;
3029     int retval;
3030 
3031     fp = fopen_or_die(proc_stat, "r");
3032 
3033     retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
3034     if (retval != 0)
3035         err(1, "%s: failed to parse format", proc_stat);
3036 
3037     while (1) {
3038         retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num);
3039         if (retval != 1)
3040             break;
3041 
3042         retval = func(cpu_num);
3043         if (retval) {
3044             fclose(fp);
3045             return (retval);
3046         }
3047     }
3048     fclose(fp);
3049     return 0;
3050 }
3051 
3052 void re_initialize(void)
3053 {
3054     free_all_buffers();
3055     setup_all_buffers();
3056     fprintf(outf, "turbostat: re-initialized with num_cpus %d\n", topo.num_cpus);
3057 }
3058 
3059 void set_max_cpu_num(void)
3060 {
3061     FILE *filep;
3062     int base_cpu;
3063     unsigned long dummy;
3064     char pathname[64];
3065 
3066     base_cpu = sched_getcpu();
3067     if (base_cpu < 0)
3068         err(1, "cannot find calling cpu ID");
3069     sprintf(pathname, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", base_cpu);
3070 
3071     filep = fopen_or_die(pathname, "r");
3072     topo.max_cpu_num = 0;
3073     while (fscanf(filep, "%lx,", &dummy) == 1)
3074         topo.max_cpu_num += BITMASK_SIZE;
3075     fclose(filep);
3076     topo.max_cpu_num--; /* 0 based */
3077 }
3078 
3079 /*
3080  * count_cpus()
3081  * remember the last one seen, it will be the max
3082  */
3083 int count_cpus(int cpu)
3084 {
3085     UNUSED(cpu);
3086 
3087     topo.num_cpus++;
3088     return 0;
3089 }
3090 
3091 int mark_cpu_present(int cpu)
3092 {
3093     CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set);
3094     return 0;
3095 }
3096 
3097 int init_thread_id(int cpu)
3098 {
3099     cpus[cpu].thread_id = -1;
3100     return 0;
3101 }
3102 
3103 /*
3104  * snapshot_proc_interrupts()
3105  *
3106  * read and record summary of /proc/interrupts
3107  *
3108  * return 1 if config change requires a restart, else return 0
3109  */
3110 int snapshot_proc_interrupts(void)
3111 {
3112     static FILE *fp;
3113     int column, retval;
3114 
3115     if (fp == NULL)
3116         fp = fopen_or_die("/proc/interrupts", "r");
3117     else
3118         rewind(fp);
3119 
3120     /* read 1st line of /proc/interrupts to get cpu* name for each column */
3121     for (column = 0; column < topo.num_cpus; ++column) {
3122         int cpu_number;
3123 
3124         retval = fscanf(fp, " CPU%d", &cpu_number);
3125         if (retval != 1)
3126             break;
3127 
3128         if (cpu_number > topo.max_cpu_num) {
3129             warn("/proc/interrupts: cpu%d: > %d", cpu_number, topo.max_cpu_num);
3130             return 1;
3131         }
3132 
3133         irq_column_2_cpu[column] = cpu_number;
3134         irqs_per_cpu[cpu_number] = 0;
3135     }
3136 
3137     /* read /proc/interrupt count lines and sum up irqs per cpu */
3138     while (1) {
3139         int column;
3140         char buf[64];
3141 
3142         retval = fscanf(fp, " %s:", buf);   /* flush irq# "N:" */
3143         if (retval != 1)
3144             break;
3145 
3146         /* read the count per cpu */
3147         for (column = 0; column < topo.num_cpus; ++column) {
3148 
3149             int cpu_number, irq_count;
3150 
3151             retval = fscanf(fp, " %d", &irq_count);
3152             if (retval != 1)
3153                 break;
3154 
3155             cpu_number = irq_column_2_cpu[column];
3156             irqs_per_cpu[cpu_number] += irq_count;
3157 
3158         }
3159 
3160         while (getc(fp) != '\n') ;  /* flush interrupt description */
3161 
3162     }
3163     return 0;
3164 }
3165 
3166 /*
3167  * snapshot_gfx_rc6_ms()
3168  *
3169  * record snapshot of
3170  * /sys/class/drm/card0/power/rc6_residency_ms
3171  *
3172  * return 1 if config change requires a restart, else return 0
3173  */
3174 int snapshot_gfx_rc6_ms(void)
3175 {
3176     FILE *fp;
3177     int retval;
3178 
3179     fp = fopen_or_die("/sys/class/drm/card0/power/rc6_residency_ms", "r");
3180 
3181     retval = fscanf(fp, "%lld", &gfx_cur_rc6_ms);
3182     if (retval != 1)
3183         err(1, "GFX rc6");
3184 
3185     fclose(fp);
3186 
3187     return 0;
3188 }
3189 
3190 /*
3191  * snapshot_gfx_mhz()
3192  *
3193  * record snapshot of
3194  * /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz
3195  *
3196  * return 1 if config change requires a restart, else return 0
3197  */
3198 int snapshot_gfx_mhz(void)
3199 {
3200     static FILE *fp;
3201     int retval;
3202 
3203     if (fp == NULL)
3204         fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
3205     else {
3206         rewind(fp);
3207         fflush(fp);
3208     }
3209 
3210     retval = fscanf(fp, "%d", &gfx_cur_mhz);
3211     if (retval != 1)
3212         err(1, "GFX MHz");
3213 
3214     return 0;
3215 }
3216 
3217 /*
3218  * snapshot_gfx_cur_mhz()
3219  *
3220  * record snapshot of
3221  * /sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz
3222  *
3223  * return 1 if config change requires a restart, else return 0
3224  */
3225 int snapshot_gfx_act_mhz(void)
3226 {
3227     static FILE *fp;
3228     int retval;
3229 
3230     if (fp == NULL)
3231         fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", "r");
3232     else {
3233         rewind(fp);
3234         fflush(fp);
3235     }
3236 
3237     retval = fscanf(fp, "%d", &gfx_act_mhz);
3238     if (retval != 1)
3239         err(1, "GFX ACT MHz");
3240 
3241     return 0;
3242 }
3243 
3244 /*
3245  * snapshot_cpu_lpi()
3246  *
3247  * record snapshot of
3248  * /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us
3249  */
3250 int snapshot_cpu_lpi_us(void)
3251 {
3252     FILE *fp;
3253     int retval;
3254 
3255     fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", "r");
3256 
3257     retval = fscanf(fp, "%lld", &cpuidle_cur_cpu_lpi_us);
3258     if (retval != 1) {
3259         fprintf(stderr, "Disabling Low Power Idle CPU output\n");
3260         BIC_NOT_PRESENT(BIC_CPU_LPI);
3261         fclose(fp);
3262         return -1;
3263     }
3264 
3265     fclose(fp);
3266 
3267     return 0;
3268 }
3269 
3270 /*
3271  * snapshot_sys_lpi()
3272  *
3273  * record snapshot of sys_lpi_file
3274  */
3275 int snapshot_sys_lpi_us(void)
3276 {
3277     FILE *fp;
3278     int retval;
3279 
3280     fp = fopen_or_die(sys_lpi_file, "r");
3281 
3282     retval = fscanf(fp, "%lld", &cpuidle_cur_sys_lpi_us);
3283     if (retval != 1) {
3284         fprintf(stderr, "Disabling Low Power Idle System output\n");
3285         BIC_NOT_PRESENT(BIC_SYS_LPI);
3286         fclose(fp);
3287         return -1;
3288     }
3289     fclose(fp);
3290 
3291     return 0;
3292 }
3293 
3294 /*
3295  * snapshot /proc and /sys files
3296  *
3297  * return 1 if configuration restart needed, else return 0
3298  */
3299 int snapshot_proc_sysfs_files(void)
3300 {
3301     if (DO_BIC(BIC_IRQ))
3302         if (snapshot_proc_interrupts())
3303             return 1;
3304 
3305     if (DO_BIC(BIC_GFX_rc6))
3306         snapshot_gfx_rc6_ms();
3307 
3308     if (DO_BIC(BIC_GFXMHz))
3309         snapshot_gfx_mhz();
3310 
3311     if (DO_BIC(BIC_GFXACTMHz))
3312         snapshot_gfx_act_mhz();
3313 
3314     if (DO_BIC(BIC_CPU_LPI))
3315         snapshot_cpu_lpi_us();
3316 
3317     if (DO_BIC(BIC_SYS_LPI))
3318         snapshot_sys_lpi_us();
3319 
3320     return 0;
3321 }
3322 
3323 int exit_requested;
3324 
3325 static void signal_handler(int signal)
3326 {
3327     switch (signal) {
3328     case SIGINT:
3329         exit_requested = 1;
3330         if (debug)
3331             fprintf(stderr, " SIGINT\n");
3332         break;
3333     case SIGUSR1:
3334         if (debug > 1)
3335             fprintf(stderr, "SIGUSR1\n");
3336         break;
3337     }
3338 }
3339 
3340 void setup_signal_handler(void)
3341 {
3342     struct sigaction sa;
3343 
3344     memset(&sa, 0, sizeof(sa));
3345 
3346     sa.sa_handler = &signal_handler;
3347 
3348     if (sigaction(SIGINT, &sa, NULL) < 0)
3349         err(1, "sigaction SIGINT");
3350     if (sigaction(SIGUSR1, &sa, NULL) < 0)
3351         err(1, "sigaction SIGUSR1");
3352 }
3353 
3354 void do_sleep(void)
3355 {
3356     struct timeval tout;
3357     struct timespec rest;
3358     fd_set readfds;
3359     int retval;
3360 
3361     FD_ZERO(&readfds);
3362     FD_SET(0, &readfds);
3363 
3364     if (ignore_stdin) {
3365         nanosleep(&interval_ts, NULL);
3366         return;
3367     }
3368 
3369     tout = interval_tv;
3370     retval = select(1, &readfds, NULL, NULL, &tout);
3371 
3372     if (retval == 1) {
3373         switch (getc(stdin)) {
3374         case 'q':
3375             exit_requested = 1;
3376             break;
3377         case EOF:
3378             /*
3379              * 'stdin' is a pipe closed on the other end. There
3380              * won't be any further input.
3381              */
3382             ignore_stdin = 1;
3383             /* Sleep the rest of the time */
3384             rest.tv_sec = (tout.tv_sec + tout.tv_usec / 1000000);
3385             rest.tv_nsec = (tout.tv_usec % 1000000) * 1000;
3386             nanosleep(&rest, NULL);
3387         }
3388     }
3389 }
3390 
3391 int get_msr_sum(int cpu, off_t offset, unsigned long long *msr)
3392 {
3393     int ret, idx;
3394     unsigned long long msr_cur, msr_last;
3395 
3396     if (!per_cpu_msr_sum)
3397         return 1;
3398 
3399     idx = offset_to_idx(offset);
3400     if (idx < 0)
3401         return idx;
3402     /* get_msr_sum() = sum + (get_msr() - last) */
3403     ret = get_msr(cpu, offset, &msr_cur);
3404     if (ret)
3405         return ret;
3406     msr_last = per_cpu_msr_sum[cpu].entries[idx].last;
3407     DELTA_WRAP32(msr_cur, msr_last);
3408     *msr = msr_last + per_cpu_msr_sum[cpu].entries[idx].sum;
3409 
3410     return 0;
3411 }
3412 
3413 timer_t timerid;
3414 
3415 /* Timer callback, update the sum of MSRs periodically. */
3416 static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3417 {
3418     int i, ret;
3419     int cpu = t->cpu_id;
3420 
3421     UNUSED(c);
3422     UNUSED(p);
3423 
3424     for (i = IDX_PKG_ENERGY; i < IDX_COUNT; i++) {
3425         unsigned long long msr_cur, msr_last;
3426         off_t offset;
3427 
3428         if (!idx_valid(i))
3429             continue;
3430         offset = idx_to_offset(i);
3431         if (offset < 0)
3432             continue;
3433         ret = get_msr(cpu, offset, &msr_cur);
3434         if (ret) {
3435             fprintf(outf, "Can not update msr(0x%llx)\n", (unsigned long long)offset);
3436             continue;
3437         }
3438 
3439         msr_last = per_cpu_msr_sum[cpu].entries[i].last;
3440         per_cpu_msr_sum[cpu].entries[i].last = msr_cur & 0xffffffff;
3441 
3442         DELTA_WRAP32(msr_cur, msr_last);
3443         per_cpu_msr_sum[cpu].entries[i].sum += msr_last;
3444     }
3445     return 0;
3446 }
3447 
3448 static void msr_record_handler(union sigval v)
3449 {
3450     UNUSED(v);
3451 
3452     for_all_cpus(update_msr_sum, EVEN_COUNTERS);
3453 }
3454 
3455 void msr_sum_record(void)
3456 {
3457     struct itimerspec its;
3458     struct sigevent sev;
3459 
3460     per_cpu_msr_sum = calloc(topo.max_cpu_num + 1, sizeof(struct msr_sum_array));
3461     if (!per_cpu_msr_sum) {
3462         fprintf(outf, "Can not allocate memory for long time MSR.\n");
3463         return;
3464     }
3465     /*
3466      * Signal handler might be restricted, so use thread notifier instead.
3467      */
3468     memset(&sev, 0, sizeof(struct sigevent));
3469     sev.sigev_notify = SIGEV_THREAD;
3470     sev.sigev_notify_function = msr_record_handler;
3471 
3472     sev.sigev_value.sival_ptr = &timerid;
3473     if (timer_create(CLOCK_REALTIME, &sev, &timerid) == -1) {
3474         fprintf(outf, "Can not create timer.\n");
3475         goto release_msr;
3476     }
3477 
3478     its.it_value.tv_sec = 0;
3479     its.it_value.tv_nsec = 1;
3480     /*
3481      * A wraparound time has been calculated early.
3482      * Some sources state that the peak power for a
3483      * microprocessor is usually 1.5 times the TDP rating,
3484      * use 2 * TDP for safety.
3485      */
3486     its.it_interval.tv_sec = rapl_joule_counter_range / 2;
3487     its.it_interval.tv_nsec = 0;
3488 
3489     if (timer_settime(timerid, 0, &its, NULL) == -1) {
3490         fprintf(outf, "Can not set timer.\n");
3491         goto release_timer;
3492     }
3493     return;
3494 
3495 release_timer:
3496     timer_delete(timerid);
3497 release_msr:
3498     free(per_cpu_msr_sum);
3499 }
3500 
3501 /*
3502  * set_my_sched_priority(pri)
3503  * return previous
3504  *
3505  * if non-root, do this:
3506  * # /sbin/setcap cap_sys_rawio,cap_sys_nice=+ep /usr/bin/turbostat
3507  */
3508 int set_my_sched_priority(int priority)
3509 {
3510     int retval;
3511     int original_priority;
3512 
3513     errno = 0;
3514     original_priority = getpriority(PRIO_PROCESS, 0);
3515     if (errno && (original_priority == -1))
3516         err(errno, "getpriority");
3517 
3518     retval = setpriority(PRIO_PROCESS, 0, priority);
3519     if (retval)
3520         err(retval, "setpriority(%d)", priority);
3521 
3522     errno = 0;
3523     retval = getpriority(PRIO_PROCESS, 0);
3524     if (retval != priority)
3525         err(retval, "getpriority(%d) != setpriority(%d)", retval, priority);
3526 
3527     return original_priority;
3528 }
3529 
3530 void turbostat_loop()
3531 {
3532     int retval;
3533     int restarted = 0;
3534     unsigned int done_iters = 0;
3535 
3536     setup_signal_handler();
3537 
3538     /*
3539      * elevate own priority for interval mode
3540      */
3541     set_my_sched_priority(-20);
3542 
3543 restart:
3544     restarted++;
3545 
3546     snapshot_proc_sysfs_files();
3547     retval = for_all_cpus(get_counters, EVEN_COUNTERS);
3548     first_counter_read = 0;
3549     if (retval < -1) {
3550         exit(retval);
3551     } else if (retval == -1) {
3552         if (restarted > 10) {
3553             exit(retval);
3554         }
3555         re_initialize();
3556         goto restart;
3557     }
3558     restarted = 0;
3559     done_iters = 0;
3560     gettimeofday(&tv_even, (struct timezone *)NULL);
3561 
3562     while (1) {
3563         if (for_all_proc_cpus(cpu_is_not_present)) {
3564             re_initialize();
3565             goto restart;
3566         }
3567         do_sleep();
3568         if (snapshot_proc_sysfs_files())
3569             goto restart;
3570         retval = for_all_cpus(get_counters, ODD_COUNTERS);
3571         if (retval < -1) {
3572             exit(retval);
3573         } else if (retval == -1) {
3574             re_initialize();
3575             goto restart;
3576         }
3577         gettimeofday(&tv_odd, (struct timezone *)NULL);
3578         timersub(&tv_odd, &tv_even, &tv_delta);
3579         if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) {
3580             re_initialize();
3581             goto restart;
3582         }
3583         compute_average(EVEN_COUNTERS);
3584         format_all_counters(EVEN_COUNTERS);
3585         flush_output_stdout();
3586         if (exit_requested)
3587             break;
3588         if (num_iterations && ++done_iters >= num_iterations)
3589             break;
3590         do_sleep();
3591         if (snapshot_proc_sysfs_files())
3592             goto restart;
3593         retval = for_all_cpus(get_counters, EVEN_COUNTERS);
3594         if (retval < -1) {
3595             exit(retval);
3596         } else if (retval == -1) {
3597             re_initialize();
3598             goto restart;
3599         }
3600         gettimeofday(&tv_even, (struct timezone *)NULL);
3601         timersub(&tv_even, &tv_odd, &tv_delta);
3602         if (for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS)) {
3603             re_initialize();
3604             goto restart;
3605         }
3606         compute_average(ODD_COUNTERS);
3607         format_all_counters(ODD_COUNTERS);
3608         flush_output_stdout();
3609         if (exit_requested)
3610             break;
3611         if (num_iterations && ++done_iters >= num_iterations)
3612             break;
3613     }
3614 }
3615 
3616 void check_dev_msr()
3617 {
3618     struct stat sb;
3619     char pathname[32];
3620 
3621     sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
3622     if (stat(pathname, &sb))
3623         if (system("/sbin/modprobe msr > /dev/null 2>&1"))
3624             err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
3625 }
3626 
3627 /*
3628  * check for CAP_SYS_RAWIO
3629  * return 0 on success
3630  * return 1 on fail
3631  */
3632 int check_for_cap_sys_rawio(void)
3633 {
3634     cap_t caps;
3635     cap_flag_value_t cap_flag_value;
3636 
3637     caps = cap_get_proc();
3638     if (caps == NULL)
3639         err(-6, "cap_get_proc\n");
3640 
3641     if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value))
3642         err(-6, "cap_get\n");
3643 
3644     if (cap_flag_value != CAP_SET) {
3645         warnx("capget(CAP_SYS_RAWIO) failed," " try \"# setcap cap_sys_rawio=ep %s\"", progname);
3646         return 1;
3647     }
3648 
3649     if (cap_free(caps) == -1)
3650         err(-6, "cap_free\n");
3651 
3652     return 0;
3653 }
3654 
3655 void check_permissions(void)
3656 {
3657     int do_exit = 0;
3658     char pathname[32];
3659 
3660     /* check for CAP_SYS_RAWIO */
3661     do_exit += check_for_cap_sys_rawio();
3662 
3663     /* test file permissions */
3664     sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
3665     if (euidaccess(pathname, R_OK)) {
3666         do_exit++;
3667         warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr");
3668     }
3669 
3670     /* if all else fails, thell them to be root */
3671     if (do_exit)
3672         if (getuid() != 0)
3673             warnx("... or simply run as root");
3674 
3675     if (do_exit)
3676         exit(-6);
3677 }
3678 
3679 /*
3680  * NHM adds support for additional MSRs:
3681  *
3682  * MSR_SMI_COUNT                   0x00000034
3683  *
3684  * MSR_PLATFORM_INFO               0x000000ce
3685  * MSR_PKG_CST_CONFIG_CONTROL     0x000000e2
3686  *
3687  * MSR_MISC_PWR_MGMT               0x000001aa
3688  *
3689  * MSR_PKG_C3_RESIDENCY            0x000003f8
3690  * MSR_PKG_C6_RESIDENCY            0x000003f9
3691  * MSR_CORE_C3_RESIDENCY           0x000003fc
3692  * MSR_CORE_C6_RESIDENCY           0x000003fd
3693  *
3694  * Side effect:
3695  * sets global pkg_cstate_limit to decode MSR_PKG_CST_CONFIG_CONTROL
3696  * sets has_misc_feature_control
3697  */
3698 int probe_nhm_msrs(unsigned int family, unsigned int model)
3699 {
3700     unsigned long long msr;
3701     unsigned int base_ratio;
3702     int *pkg_cstate_limits;
3703 
3704     if (!genuine_intel)
3705         return 0;
3706 
3707     if (family != 6)
3708         return 0;
3709 
3710     bclk = discover_bclk(family, model);
3711 
3712     switch (model) {
3713     case INTEL_FAM6_NEHALEM:    /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
3714     case INTEL_FAM6_NEHALEM_EX: /* Nehalem-EX Xeon - Beckton */
3715         pkg_cstate_limits = nhm_pkg_cstate_limits;
3716         break;
3717     case INTEL_FAM6_SANDYBRIDGE:    /* SNB */
3718     case INTEL_FAM6_SANDYBRIDGE_X:  /* SNB Xeon */
3719     case INTEL_FAM6_IVYBRIDGE:  /* IVB */
3720     case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
3721         pkg_cstate_limits = snb_pkg_cstate_limits;
3722         has_misc_feature_control = 1;
3723         break;
3724     case INTEL_FAM6_HASWELL:    /* HSW */
3725     case INTEL_FAM6_HASWELL_G:  /* HSW */
3726     case INTEL_FAM6_HASWELL_X:  /* HSX */
3727     case INTEL_FAM6_HASWELL_L:  /* HSW */
3728     case INTEL_FAM6_BROADWELL:  /* BDW */
3729     case INTEL_FAM6_BROADWELL_G:    /* BDW */
3730     case INTEL_FAM6_BROADWELL_X:    /* BDX */
3731     case INTEL_FAM6_SKYLAKE_L:  /* SKL */
3732     case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
3733         pkg_cstate_limits = hsw_pkg_cstate_limits;
3734         has_misc_feature_control = 1;
3735         break;
3736     case INTEL_FAM6_SKYLAKE_X:  /* SKX */
3737     case INTEL_FAM6_SAPPHIRERAPIDS_X:   /* SPR */
3738         pkg_cstate_limits = skx_pkg_cstate_limits;
3739         has_misc_feature_control = 1;
3740         break;
3741     case INTEL_FAM6_ICELAKE_X:  /* ICX */
3742         pkg_cstate_limits = icx_pkg_cstate_limits;
3743         has_misc_feature_control = 1;
3744         break;
3745     case INTEL_FAM6_ATOM_SILVERMONT:    /* BYT */
3746         no_MSR_MISC_PWR_MGMT = 1;
3747         /* FALLTHRU */
3748     case INTEL_FAM6_ATOM_SILVERMONT_D:  /* AVN */
3749         pkg_cstate_limits = slv_pkg_cstate_limits;
3750         break;
3751     case INTEL_FAM6_ATOM_AIRMONT:   /* AMT */
3752         pkg_cstate_limits = amt_pkg_cstate_limits;
3753         no_MSR_MISC_PWR_MGMT = 1;
3754         break;
3755     case INTEL_FAM6_XEON_PHI_KNL:   /* PHI */
3756         pkg_cstate_limits = phi_pkg_cstate_limits;
3757         break;
3758     case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
3759     case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
3760     case INTEL_FAM6_ATOM_GOLDMONT_D:    /* DNV */
3761     case INTEL_FAM6_ATOM_TREMONT:   /* EHL */
3762     case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */
3763         pkg_cstate_limits = glm_pkg_cstate_limits;
3764         break;
3765     default:
3766         return 0;
3767     }
3768     get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
3769     pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
3770 
3771     get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
3772     base_ratio = (msr >> 8) & 0xFF;
3773 
3774     base_hz = base_ratio * bclk * 1000000;
3775     has_base_hz = 1;
3776     return 1;
3777 }
3778 
3779 /*
3780  * SLV client has support for unique MSRs:
3781  *
3782  * MSR_CC6_DEMOTION_POLICY_CONFIG
3783  * MSR_MC6_DEMOTION_POLICY_CONFIG
3784  */
3785 
3786 int has_slv_msrs(unsigned int family, unsigned int model)
3787 {
3788     if (!genuine_intel)
3789         return 0;
3790 
3791     if (family != 6)
3792         return 0;
3793 
3794     switch (model) {
3795     case INTEL_FAM6_ATOM_SILVERMONT:
3796     case INTEL_FAM6_ATOM_SILVERMONT_MID:
3797     case INTEL_FAM6_ATOM_AIRMONT_MID:
3798         return 1;
3799     }
3800     return 0;
3801 }
3802 
3803 int is_dnv(unsigned int family, unsigned int model)
3804 {
3805 
3806     if (!genuine_intel)
3807         return 0;
3808 
3809     if (family != 6)
3810         return 0;
3811 
3812     switch (model) {
3813     case INTEL_FAM6_ATOM_GOLDMONT_D:
3814         return 1;
3815     }
3816     return 0;
3817 }
3818 
3819 int is_bdx(unsigned int family, unsigned int model)
3820 {
3821 
3822     if (!genuine_intel)
3823         return 0;
3824 
3825     if (family != 6)
3826         return 0;
3827 
3828     switch (model) {
3829     case INTEL_FAM6_BROADWELL_X:
3830         return 1;
3831     }
3832     return 0;
3833 }
3834 
3835 int is_skx(unsigned int family, unsigned int model)
3836 {
3837 
3838     if (!genuine_intel)
3839         return 0;
3840 
3841     if (family != 6)
3842         return 0;
3843 
3844     switch (model) {
3845     case INTEL_FAM6_SKYLAKE_X:
3846         return 1;
3847     }
3848     return 0;
3849 }
3850 
3851 int is_icx(unsigned int family, unsigned int model)
3852 {
3853 
3854     if (!genuine_intel)
3855         return 0;
3856 
3857     if (family != 6)
3858         return 0;
3859 
3860     switch (model) {
3861     case INTEL_FAM6_ICELAKE_X:
3862         return 1;
3863     }
3864     return 0;
3865 }
3866 
3867 int is_spr(unsigned int family, unsigned int model)
3868 {
3869 
3870     if (!genuine_intel)
3871         return 0;
3872 
3873     if (family != 6)
3874         return 0;
3875 
3876     switch (model) {
3877     case INTEL_FAM6_SAPPHIRERAPIDS_X:
3878         return 1;
3879     }
3880     return 0;
3881 }
3882 
3883 int is_ehl(unsigned int family, unsigned int model)
3884 {
3885     if (!genuine_intel)
3886         return 0;
3887 
3888     if (family != 6)
3889         return 0;
3890 
3891     switch (model) {
3892     case INTEL_FAM6_ATOM_TREMONT:
3893         return 1;
3894     }
3895     return 0;
3896 }
3897 
3898 int is_jvl(unsigned int family, unsigned int model)
3899 {
3900     if (!genuine_intel)
3901         return 0;
3902 
3903     if (family != 6)
3904         return 0;
3905 
3906     switch (model) {
3907     case INTEL_FAM6_ATOM_TREMONT_D:
3908         return 1;
3909     }
3910     return 0;
3911 }
3912 
3913 int has_turbo_ratio_limit(unsigned int family, unsigned int model)
3914 {
3915     if (has_slv_msrs(family, model))
3916         return 0;
3917 
3918     if (family != 6)
3919         return 0;
3920 
3921     switch (model) {
3922         /* Nehalem compatible, but do not include turbo-ratio limit support */
3923     case INTEL_FAM6_NEHALEM_EX: /* Nehalem-EX Xeon - Beckton */
3924     case INTEL_FAM6_XEON_PHI_KNL:   /* PHI - Knights Landing (different MSR definition) */
3925         return 0;
3926     default:
3927         return 1;
3928     }
3929 }
3930 
3931 int has_atom_turbo_ratio_limit(unsigned int family, unsigned int model)
3932 {
3933     if (has_slv_msrs(family, model))
3934         return 1;
3935 
3936     return 0;
3937 }
3938 
3939 int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
3940 {
3941     if (!genuine_intel)
3942         return 0;
3943 
3944     if (family != 6)
3945         return 0;
3946 
3947     switch (model) {
3948     case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
3949     case INTEL_FAM6_HASWELL_X:  /* HSW Xeon */
3950         return 1;
3951     default:
3952         return 0;
3953     }
3954 }
3955 
3956 int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model)
3957 {
3958     if (!genuine_intel)
3959         return 0;
3960 
3961     if (family != 6)
3962         return 0;
3963 
3964     switch (model) {
3965     case INTEL_FAM6_HASWELL_X:  /* HSW Xeon */
3966         return 1;
3967     default:
3968         return 0;
3969     }
3970 }
3971 
3972 int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model)
3973 {
3974     if (!genuine_intel)
3975         return 0;
3976 
3977     if (family != 6)
3978         return 0;
3979 
3980     switch (model) {
3981     case INTEL_FAM6_XEON_PHI_KNL:   /* Knights Landing */
3982         return 1;
3983     default:
3984         return 0;
3985     }
3986 }
3987 
3988 int has_glm_turbo_ratio_limit(unsigned int family, unsigned int model)
3989 {
3990     if (!genuine_intel)
3991         return 0;
3992 
3993     if (family != 6)
3994         return 0;
3995 
3996     switch (model) {
3997     case INTEL_FAM6_ATOM_GOLDMONT:
3998     case INTEL_FAM6_SKYLAKE_X:
3999     case INTEL_FAM6_ICELAKE_X:
4000     case INTEL_FAM6_SAPPHIRERAPIDS_X:
4001         return 1;
4002     default:
4003         return 0;
4004     }
4005 }
4006 
4007 int has_config_tdp(unsigned int family, unsigned int model)
4008 {
4009     if (!genuine_intel)
4010         return 0;
4011 
4012     if (family != 6)
4013         return 0;
4014 
4015     switch (model) {
4016     case INTEL_FAM6_IVYBRIDGE:  /* IVB */
4017     case INTEL_FAM6_HASWELL:    /* HSW */
4018     case INTEL_FAM6_HASWELL_X:  /* HSX */
4019     case INTEL_FAM6_HASWELL_L:  /* HSW */
4020     case INTEL_FAM6_HASWELL_G:  /* HSW */
4021     case INTEL_FAM6_BROADWELL:  /* BDW */
4022     case INTEL_FAM6_BROADWELL_G:    /* BDW */
4023     case INTEL_FAM6_BROADWELL_X:    /* BDX */
4024     case INTEL_FAM6_SKYLAKE_L:  /* SKL */
4025     case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
4026     case INTEL_FAM6_SKYLAKE_X:  /* SKX */
4027     case INTEL_FAM6_ICELAKE_X:  /* ICX */
4028     case INTEL_FAM6_SAPPHIRERAPIDS_X:   /* SPR */
4029     case INTEL_FAM6_XEON_PHI_KNL:   /* Knights Landing */
4030         return 1;
4031     default:
4032         return 0;
4033     }
4034 }
4035 
4036 /*
4037  * tcc_offset_bits:
4038  * 0: Tcc Offset not supported (Default)
4039  * 6: Bit 29:24 of MSR_PLATFORM_INFO
4040  * 4: Bit 27:24 of MSR_PLATFORM_INFO
4041  */
4042 void check_tcc_offset(int model)
4043 {
4044     unsigned long long msr;
4045 
4046     if (!genuine_intel)
4047         return;
4048 
4049     switch (model) {
4050     case INTEL_FAM6_SKYLAKE_L:
4051     case INTEL_FAM6_SKYLAKE:
4052     case INTEL_FAM6_KABYLAKE_L:
4053     case INTEL_FAM6_KABYLAKE:
4054     case INTEL_FAM6_ICELAKE_L:
4055     case INTEL_FAM6_ICELAKE:
4056     case INTEL_FAM6_TIGERLAKE_L:
4057     case INTEL_FAM6_TIGERLAKE:
4058     case INTEL_FAM6_COMETLAKE:
4059         if (!get_msr(base_cpu, MSR_PLATFORM_INFO, &msr)) {
4060             msr = (msr >> 30) & 1;
4061             if (msr)
4062                 tcc_offset_bits = 6;
4063         }
4064         return;
4065     default:
4066         return;
4067     }
4068 }
4069 
4070 static void remove_underbar(char *s)
4071 {
4072     char *to = s;
4073 
4074     while (*s) {
4075         if (*s != '_')
4076             *to++ = *s;
4077         s++;
4078     }
4079 
4080     *to = 0;
4081 }
4082 
4083 static void dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
4084 {
4085     if (!do_nhm_platform_info)
4086         return;
4087 
4088     dump_nhm_platform_info();
4089 
4090     if (has_hsw_turbo_ratio_limit(family, model))
4091         dump_hsw_turbo_ratio_limits();
4092 
4093     if (has_ivt_turbo_ratio_limit(family, model))
4094         dump_ivt_turbo_ratio_limits();
4095 
4096     if (has_turbo_ratio_limit(family, model)) {
4097         dump_turbo_ratio_limits(MSR_TURBO_RATIO_LIMIT, family, model);
4098 
4099         if (is_hybrid)
4100             dump_turbo_ratio_limits(MSR_SECONDARY_TURBO_RATIO_LIMIT, family, model);
4101     }
4102 
4103     if (has_atom_turbo_ratio_limit(family, model))
4104         dump_atom_turbo_ratio_limits();
4105 
4106     if (has_knl_turbo_ratio_limit(family, model))
4107         dump_knl_turbo_ratio_limits();
4108 
4109     if (has_config_tdp(family, model))
4110         dump_config_tdp();
4111 
4112     dump_nhm_cst_cfg();
4113 }
4114 
4115 static int read_sysfs_int(char *path)
4116 {
4117     FILE *input;
4118     int retval = -1;
4119 
4120     input = fopen(path, "r");
4121     if (input == NULL) {
4122         if (debug)
4123             fprintf(outf, "NSFOD %s\n", path);
4124         return (-1);
4125     }
4126     if (fscanf(input, "%d", &retval) != 1)
4127         err(1, "%s: failed to read int from file", path);
4128     fclose(input);
4129 
4130     return (retval);
4131 }
4132 
4133 static void dump_sysfs_file(char *path)
4134 {
4135     FILE *input;
4136     char cpuidle_buf[64];
4137 
4138     input = fopen(path, "r");
4139     if (input == NULL) {
4140         if (debug)
4141             fprintf(outf, "NSFOD %s\n", path);
4142         return;
4143     }
4144     if (!fgets(cpuidle_buf, sizeof(cpuidle_buf), input))
4145         err(1, "%s: failed to read file", path);
4146     fclose(input);
4147 
4148     fprintf(outf, "%s: %s", strrchr(path, '/') + 1, cpuidle_buf);
4149 }
4150 
4151 static void intel_uncore_frequency_probe(void)
4152 {
4153     int i, j;
4154     char path[128];
4155 
4156     if (!genuine_intel)
4157         return;
4158 
4159     if (access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00", R_OK))
4160         return;
4161 
4162     if (!access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/current_freq_khz", R_OK))
4163         BIC_PRESENT(BIC_UNCORE_MHZ);
4164 
4165     if (quiet)
4166         return;
4167 
4168     for (i = 0; i < topo.num_packages; ++i) {
4169         for (j = 0; j < topo.num_die; ++j) {
4170             int k, l;
4171 
4172             sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/min_freq_khz",
4173                 i, j);
4174             k = read_sysfs_int(path);
4175             sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/max_freq_khz",
4176                 i, j);
4177             l = read_sysfs_int(path);
4178             fprintf(outf, "Uncore Frequency pkg%d die%d: %d - %d MHz ", i, j, k / 1000, l / 1000);
4179 
4180             sprintf(path,
4181                 "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/initial_min_freq_khz",
4182                 i, j);
4183             k = read_sysfs_int(path);
4184             sprintf(path,
4185                 "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/initial_max_freq_khz",
4186                 i, j);
4187             l = read_sysfs_int(path);
4188             fprintf(outf, "(%d - %d MHz)\n", k / 1000, l / 1000);
4189         }
4190     }
4191 }
4192 
4193 static void dump_sysfs_cstate_config(void)
4194 {
4195     char path[64];
4196     char name_buf[16];
4197     char desc[64];
4198     FILE *input;
4199     int state;
4200     char *sp;
4201 
4202     if (access("/sys/devices/system/cpu/cpuidle", R_OK)) {
4203         fprintf(outf, "cpuidle not loaded\n");
4204         return;
4205     }
4206 
4207     dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_driver");
4208     dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor");
4209     dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor_ro");
4210 
4211     for (state = 0; state < 10; ++state) {
4212 
4213         sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state);
4214         input = fopen(path, "r");
4215         if (input == NULL)
4216             continue;
4217         if (!fgets(name_buf, sizeof(name_buf), input))
4218             err(1, "%s: failed to read file", path);
4219 
4220         /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
4221         sp = strchr(name_buf, '-');
4222         if (!sp)
4223             sp = strchrnul(name_buf, '\n');
4224         *sp = '\0';
4225         fclose(input);
4226 
4227         remove_underbar(name_buf);
4228 
4229         sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc", base_cpu, state);
4230         input = fopen(path, "r");
4231         if (input == NULL)
4232             continue;
4233         if (!fgets(desc, sizeof(desc), input))
4234             err(1, "%s: failed to read file", path);
4235 
4236         fprintf(outf, "cpu%d: %s: %s", base_cpu, name_buf, desc);
4237         fclose(input);
4238     }
4239 }
4240 
4241 static void dump_sysfs_pstate_config(void)
4242 {
4243     char path[64];
4244     char driver_buf[64];
4245     char governor_buf[64];
4246     FILE *input;
4247     int turbo;
4248 
4249     sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_driver", base_cpu);
4250     input = fopen(path, "r");
4251     if (input == NULL) {
4252         fprintf(outf, "NSFOD %s\n", path);
4253         return;
4254     }
4255     if (!fgets(driver_buf, sizeof(driver_buf), input))
4256         err(1, "%s: failed to read file", path);
4257     fclose(input);
4258 
4259     sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor", base_cpu);
4260     input = fopen(path, "r");
4261     if (input == NULL) {
4262         fprintf(outf, "NSFOD %s\n", path);
4263         return;
4264     }
4265     if (!fgets(governor_buf, sizeof(governor_buf), input))
4266         err(1, "%s: failed to read file", path);
4267     fclose(input);
4268 
4269     fprintf(outf, "cpu%d: cpufreq driver: %s", base_cpu, driver_buf);
4270     fprintf(outf, "cpu%d: cpufreq governor: %s", base_cpu, governor_buf);
4271 
4272     sprintf(path, "/sys/devices/system/cpu/cpufreq/boost");
4273     input = fopen(path, "r");
4274     if (input != NULL) {
4275         if (fscanf(input, "%d", &turbo) != 1)
4276             err(1, "%s: failed to parse number from file", path);
4277         fprintf(outf, "cpufreq boost: %d\n", turbo);
4278         fclose(input);
4279     }
4280 
4281     sprintf(path, "/sys/devices/system/cpu/intel_pstate/no_turbo");
4282     input = fopen(path, "r");
4283     if (input != NULL) {
4284         if (fscanf(input, "%d", &turbo) != 1)
4285             err(1, "%s: failed to parse number from file", path);
4286         fprintf(outf, "cpufreq intel_pstate no_turbo: %d\n", turbo);
4287         fclose(input);
4288     }
4289 }
4290 
4291 /*
4292  * print_epb()
4293  * Decode the ENERGY_PERF_BIAS MSR
4294  */
4295 int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
4296 {
4297     char *epb_string;
4298     int cpu, epb;
4299 
4300     UNUSED(c);
4301     UNUSED(p);
4302 
4303     if (!has_epb)
4304         return 0;
4305 
4306     cpu = t->cpu_id;
4307 
4308     /* EPB is per-package */
4309     if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
4310         return 0;
4311 
4312     if (cpu_migrate(cpu)) {
4313         fprintf(outf, "print_epb: Could not migrate to CPU %d\n", cpu);
4314         return -1;
4315     }
4316 
4317     epb = get_epb(cpu);
4318     if (epb < 0)
4319         return 0;
4320 
4321     switch (epb) {
4322     case ENERGY_PERF_BIAS_PERFORMANCE:
4323         epb_string = "performance";
4324         break;
4325     case ENERGY_PERF_BIAS_NORMAL:
4326         epb_string = "balanced";
4327         break;
4328     case ENERGY_PERF_BIAS_POWERSAVE:
4329         epb_string = "powersave";
4330         break;
4331     default:
4332         epb_string = "custom";
4333         break;
4334     }
4335     fprintf(outf, "cpu%d: EPB: %d (%s)\n", cpu, epb, epb_string);
4336 
4337     return 0;
4338 }
4339 
4340 /*
4341  * print_hwp()
4342  * Decode the MSR_HWP_CAPABILITIES
4343  */
4344 int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
4345 {
4346     unsigned long long msr;
4347     int cpu;
4348 
4349     UNUSED(c);
4350     UNUSED(p);
4351 
4352     if (!has_hwp)
4353         return 0;
4354 
4355     cpu = t->cpu_id;
4356 
4357     /* MSR_HWP_CAPABILITIES is per-package */
4358     if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
4359         return 0;
4360 
4361     if (cpu_migrate(cpu)) {
4362         fprintf(outf, "print_hwp: Could not migrate to CPU %d\n", cpu);
4363         return -1;
4364     }
4365 
4366     if (get_msr(cpu, MSR_PM_ENABLE, &msr))
4367         return 0;
4368 
4369     fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n", cpu, msr, (msr & (1 << 0)) ? "" : "No-");
4370 
4371     /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */
4372     if ((msr & (1 << 0)) == 0)
4373         return 0;
4374 
4375     if (get_msr(cpu, MSR_HWP_CAPABILITIES, &msr))
4376         return 0;
4377 
4378     fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx "
4379         "(high %d guar %d eff %d low %d)\n",
4380         cpu, msr,
4381         (unsigned int)HWP_HIGHEST_PERF(msr),
4382         (unsigned int)HWP_GUARANTEED_PERF(msr),
4383         (unsigned int)HWP_MOSTEFFICIENT_PERF(msr), (unsigned int)HWP_LOWEST_PERF(msr));
4384 
4385     if (get_msr(cpu, MSR_HWP_REQUEST, &msr))
4386         return 0;
4387 
4388     fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx "
4389         "(min %d max %d des %d epp 0x%x window 0x%x pkg 0x%x)\n",
4390         cpu, msr,
4391         (unsigned int)(((msr) >> 0) & 0xff),
4392         (unsigned int)(((msr) >> 8) & 0xff),
4393         (unsigned int)(((msr) >> 16) & 0xff),
4394         (unsigned int)(((msr) >> 24) & 0xff),
4395         (unsigned int)(((msr) >> 32) & 0xff3), (unsigned int)(((msr) >> 42) & 0x1));
4396 
4397     if (has_hwp_pkg) {
4398         if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr))
4399             return 0;
4400 
4401         fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx "
4402             "(min %d max %d des %d epp 0x%x window 0x%x)\n",
4403             cpu, msr,
4404             (unsigned int)(((msr) >> 0) & 0xff),
4405             (unsigned int)(((msr) >> 8) & 0xff),
4406             (unsigned int)(((msr) >> 16) & 0xff),
4407             (unsigned int)(((msr) >> 24) & 0xff), (unsigned int)(((msr) >> 32) & 0xff3));
4408     }
4409     if (has_hwp_notify) {
4410         if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr))
4411             return 0;
4412 
4413         fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx "
4414             "(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n",
4415             cpu, msr, ((msr) & 0x1) ? "EN" : "Dis", ((msr) & 0x2) ? "EN" : "Dis");
4416     }
4417     if (get_msr(cpu, MSR_HWP_STATUS, &msr))
4418         return 0;
4419 
4420     fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx "
4421         "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n",
4422         cpu, msr, ((msr) & 0x1) ? "" : "No-", ((msr) & 0x2) ? "" : "No-");
4423 
4424     return 0;
4425 }
4426 
4427 /*
4428  * print_perf_limit()
4429  */
4430 int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data *p)
4431 {
4432     unsigned long long msr;
4433     int cpu;
4434 
4435     UNUSED(c);
4436     UNUSED(p);
4437 
4438     cpu = t->cpu_id;
4439 
4440     /* per-package */
4441     if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
4442         return 0;
4443 
4444     if (cpu_migrate(cpu)) {
4445         fprintf(outf, "print_perf_limit: Could not migrate to CPU %d\n", cpu);
4446         return -1;
4447     }
4448 
4449     if (do_core_perf_limit_reasons) {
4450         get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr);
4451         fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
4452         fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
4453             (msr & 1 << 15) ? "bit15, " : "",
4454             (msr & 1 << 14) ? "bit14, " : "",
4455             (msr & 1 << 13) ? "Transitions, " : "",
4456             (msr & 1 << 12) ? "MultiCoreTurbo, " : "",
4457             (msr & 1 << 11) ? "PkgPwrL2, " : "",
4458             (msr & 1 << 10) ? "PkgPwrL1, " : "",
4459             (msr & 1 << 9) ? "CorePwr, " : "",
4460             (msr & 1 << 8) ? "Amps, " : "",
4461             (msr & 1 << 6) ? "VR-Therm, " : "",
4462             (msr & 1 << 5) ? "Auto-HWP, " : "",
4463             (msr & 1 << 4) ? "Graphics, " : "",
4464             (msr & 1 << 2) ? "bit2, " : "",
4465             (msr & 1 << 1) ? "ThermStatus, " : "", (msr & 1 << 0) ? "PROCHOT, " : "");
4466         fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
4467             (msr & 1 << 31) ? "bit31, " : "",
4468             (msr & 1 << 30) ? "bit30, " : "",
4469             (msr & 1 << 29) ? "Transitions, " : "",
4470             (msr & 1 << 28) ? "MultiCoreTurbo, " : "",
4471             (msr & 1 << 27) ? "PkgPwrL2, " : "",
4472             (msr & 1 << 26) ? "PkgPwrL1, " : "",
4473             (msr & 1 << 25) ? "CorePwr, " : "",
4474             (msr & 1 << 24) ? "Amps, " : "",
4475             (msr & 1 << 22) ? "VR-Therm, " : "",
4476             (msr & 1 << 21) ? "Auto-HWP, " : "",
4477             (msr & 1 << 20) ? "Graphics, " : "",
4478             (msr & 1 << 18) ? "bit18, " : "",
4479             (msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 16) ? "PROCHOT, " : "");
4480 
4481     }
4482     if (do_gfx_perf_limit_reasons) {
4483         get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr);
4484         fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
4485         fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)",
4486             (msr & 1 << 0) ? "PROCHOT, " : "",
4487             (msr & 1 << 1) ? "ThermStatus, " : "",
4488             (msr & 1 << 4) ? "Graphics, " : "",
4489             (msr & 1 << 6) ? "VR-Therm, " : "",
4490             (msr & 1 << 8) ? "Amps, " : "",
4491             (msr & 1 << 9) ? "GFXPwr, " : "",
4492             (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : "");
4493         fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n",
4494             (msr & 1 << 16) ? "PROCHOT, " : "",
4495             (msr & 1 << 17) ? "ThermStatus, " : "",
4496             (msr & 1 << 20) ? "Graphics, " : "",
4497             (msr & 1 << 22) ? "VR-Therm, " : "",
4498             (msr & 1 << 24) ? "Amps, " : "",
4499             (msr & 1 << 25) ? "GFXPwr, " : "",
4500             (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : "");
4501     }
4502     if (do_ring_perf_limit_reasons) {
4503         get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr);
4504         fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
4505         fprintf(outf, " (Active: %s%s%s%s%s%s)",
4506             (msr & 1 << 0) ? "PROCHOT, " : "",
4507             (msr & 1 << 1) ? "ThermStatus, " : "",
4508             (msr & 1 << 6) ? "VR-Therm, " : "",
4509             (msr & 1 << 8) ? "Amps, " : "",
4510             (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : "");
4511         fprintf(outf, " (Logged: %s%s%s%s%s%s)\n",
4512             (msr & 1 << 16) ? "PROCHOT, " : "",
4513             (msr & 1 << 17) ? "ThermStatus, " : "",
4514             (msr & 1 << 22) ? "VR-Therm, " : "",
4515             (msr & 1 << 24) ? "Amps, " : "",
4516             (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : "");
4517     }
4518     return 0;
4519 }
4520 
4521 #define RAPL_POWER_GRANULARITY  0x7FFF  /* 15 bit power granularity */
4522 #define RAPL_TIME_GRANULARITY   0x3F    /* 6 bit time granularity */
4523 
4524 double get_tdp_intel(unsigned int model)
4525 {
4526     unsigned long long msr;
4527 
4528     if (do_rapl & RAPL_PKG_POWER_INFO)
4529         if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr))
4530             return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
4531 
4532     switch (model) {
4533     case INTEL_FAM6_ATOM_SILVERMONT:
4534     case INTEL_FAM6_ATOM_SILVERMONT_D:
4535         return 30.0;
4536     default:
4537         return 135.0;
4538     }
4539 }
4540 
4541 double get_tdp_amd(unsigned int family)
4542 {
4543     UNUSED(family);
4544 
4545     /* This is the max stock TDP of HEDT/Server Fam17h+ chips */
4546     return 280.0;
4547 }
4548 
4549 /*
4550  * rapl_dram_energy_units_probe()
4551  * Energy units are either hard-coded, or come from RAPL Energy Unit MSR.
4552  */
4553 static double rapl_dram_energy_units_probe(int model, double rapl_energy_units)
4554 {
4555     /* only called for genuine_intel, family 6 */
4556 
4557     switch (model) {
4558     case INTEL_FAM6_HASWELL_X:  /* HSX */
4559     case INTEL_FAM6_BROADWELL_X:    /* BDX */
4560     case INTEL_FAM6_SKYLAKE_X:  /* SKX */
4561     case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
4562     case INTEL_FAM6_ICELAKE_X:  /* ICX */
4563     case INTEL_FAM6_SAPPHIRERAPIDS_X:   /* SPR */
4564         return (rapl_dram_energy_units = 15.3 / 1000000);
4565     default:
4566         return (rapl_energy_units);
4567     }
4568 }
4569 
4570 void rapl_probe_intel(unsigned int family, unsigned int model)
4571 {
4572     unsigned long long msr;
4573     unsigned int time_unit;
4574     double tdp;
4575 
4576     if (family != 6)
4577         return;
4578 
4579     switch (model) {
4580     case INTEL_FAM6_SANDYBRIDGE:
4581     case INTEL_FAM6_IVYBRIDGE:
4582     case INTEL_FAM6_HASWELL:    /* HSW */
4583     case INTEL_FAM6_HASWELL_L:  /* HSW */
4584     case INTEL_FAM6_HASWELL_G:  /* HSW */
4585     case INTEL_FAM6_BROADWELL:  /* BDW */
4586     case INTEL_FAM6_BROADWELL_G:    /* BDW */
4587         do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
4588         if (rapl_joules) {
4589             BIC_PRESENT(BIC_Pkg_J);
4590             BIC_PRESENT(BIC_Cor_J);
4591             BIC_PRESENT(BIC_GFX_J);
4592         } else {
4593             BIC_PRESENT(BIC_PkgWatt);
4594             BIC_PRESENT(BIC_CorWatt);
4595             BIC_PRESENT(BIC_GFXWatt);
4596         }
4597         break;
4598     case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
4599     case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
4600         do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO;
4601         if (rapl_joules)
4602             BIC_PRESENT(BIC_Pkg_J);
4603         else
4604             BIC_PRESENT(BIC_PkgWatt);
4605         break;
4606     case INTEL_FAM6_ATOM_TREMONT:   /* EHL */
4607         do_rapl =
4608             RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS
4609             | RAPL_GFX | RAPL_PKG_POWER_INFO;
4610         if (rapl_joules) {
4611             BIC_PRESENT(BIC_Pkg_J);
4612             BIC_PRESENT(BIC_Cor_J);
4613             BIC_PRESENT(BIC_RAM_J);
4614             BIC_PRESENT(BIC_GFX_J);
4615         } else {
4616             BIC_PRESENT(BIC_PkgWatt);
4617             BIC_PRESENT(BIC_CorWatt);
4618             BIC_PRESENT(BIC_RAMWatt);
4619             BIC_PRESENT(BIC_GFXWatt);
4620         }
4621         break;
4622     case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */
4623         do_rapl = RAPL_PKG | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
4624         BIC_PRESENT(BIC_PKG__);
4625         if (rapl_joules)
4626             BIC_PRESENT(BIC_Pkg_J);
4627         else
4628             BIC_PRESENT(BIC_PkgWatt);
4629         break;
4630     case INTEL_FAM6_SKYLAKE_L:  /* SKL */
4631     case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
4632         do_rapl =
4633             RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS
4634             | RAPL_GFX | RAPL_PKG_POWER_INFO;
4635         BIC_PRESENT(BIC_PKG__);
4636         BIC_PRESENT(BIC_RAM__);
4637         if (rapl_joules) {
4638             BIC_PRESENT(BIC_Pkg_J);
4639             BIC_PRESENT(BIC_Cor_J);
4640             BIC_PRESENT(BIC_RAM_J);
4641             BIC_PRESENT(BIC_GFX_J);
4642         } else {
4643             BIC_PRESENT(BIC_PkgWatt);
4644             BIC_PRESENT(BIC_CorWatt);
4645             BIC_PRESENT(BIC_RAMWatt);
4646             BIC_PRESENT(BIC_GFXWatt);
4647         }
4648         break;
4649     case INTEL_FAM6_HASWELL_X:  /* HSX */
4650     case INTEL_FAM6_BROADWELL_X:    /* BDX */
4651     case INTEL_FAM6_SKYLAKE_X:  /* SKX */
4652     case INTEL_FAM6_ICELAKE_X:  /* ICX */
4653     case INTEL_FAM6_SAPPHIRERAPIDS_X:   /* SPR */
4654     case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
4655         do_rapl =
4656             RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS |
4657             RAPL_PKG_POWER_INFO;
4658         BIC_PRESENT(BIC_PKG__);
4659         BIC_PRESENT(BIC_RAM__);
4660         if (rapl_joules) {
4661             BIC_PRESENT(BIC_Pkg_J);
4662             BIC_PRESENT(BIC_RAM_J);
4663         } else {
4664             BIC_PRESENT(BIC_PkgWatt);
4665             BIC_PRESENT(BIC_RAMWatt);
4666         }
4667         break;
4668     case INTEL_FAM6_SANDYBRIDGE_X:
4669     case INTEL_FAM6_IVYBRIDGE_X:
4670         do_rapl =
4671             RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS |
4672             RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
4673         BIC_PRESENT(BIC_PKG__);
4674         BIC_PRESENT(BIC_RAM__);
4675         if (rapl_joules) {
4676             BIC_PRESENT(BIC_Pkg_J);
4677             BIC_PRESENT(BIC_Cor_J);
4678             BIC_PRESENT(BIC_RAM_J);
4679         } else {
4680             BIC_PRESENT(BIC_PkgWatt);
4681             BIC_PRESENT(BIC_CorWatt);
4682             BIC_PRESENT(BIC_RAMWatt);
4683         }
4684         break;
4685     case INTEL_FAM6_ATOM_SILVERMONT:    /* BYT */
4686     case INTEL_FAM6_ATOM_SILVERMONT_D:  /* AVN */
4687         do_rapl = RAPL_PKG | RAPL_CORES;
4688         if (rapl_joules) {
4689             BIC_PRESENT(BIC_Pkg_J);
4690             BIC_PRESENT(BIC_Cor_J);
4691         } else {
4692             BIC_PRESENT(BIC_PkgWatt);
4693             BIC_PRESENT(BIC_CorWatt);
4694         }
4695         break;
4696     case INTEL_FAM6_ATOM_GOLDMONT_D:    /* DNV */
4697         do_rapl =
4698             RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS |
4699             RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS;
4700         BIC_PRESENT(BIC_PKG__);
4701         BIC_PRESENT(BIC_RAM__);
4702         if (rapl_joules) {
4703             BIC_PRESENT(BIC_Pkg_J);
4704             BIC_PRESENT(BIC_Cor_J);
4705             BIC_PRESENT(BIC_RAM_J);
4706         } else {
4707             BIC_PRESENT(BIC_PkgWatt);
4708             BIC_PRESENT(BIC_CorWatt);
4709             BIC_PRESENT(BIC_RAMWatt);
4710         }
4711         break;
4712     default:
4713         return;
4714     }
4715 
4716     /* units on package 0, verify later other packages match */
4717     if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr))
4718         return;
4719 
4720     rapl_power_units = 1.0 / (1 << (msr & 0xF));
4721     if (model == INTEL_FAM6_ATOM_SILVERMONT)
4722         rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000;
4723     else
4724         rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
4725 
4726     rapl_dram_energy_units = rapl_dram_energy_units_probe(model, rapl_energy_units);
4727 
4728     time_unit = msr >> 16 & 0xF;
4729     if (time_unit == 0)
4730         time_unit = 0xA;
4731 
4732     rapl_time_units = 1.0 / (1 << (time_unit));
4733 
4734     tdp = get_tdp_intel(model);
4735 
4736     rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
4737     if (!quiet)
4738         fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
4739 }
4740 
4741 void rapl_probe_amd(unsigned int family, unsigned int model)
4742 {
4743     unsigned long long msr;
4744     unsigned int eax, ebx, ecx, edx;
4745     unsigned int has_rapl = 0;
4746     double tdp;
4747 
4748     UNUSED(model);
4749 
4750     if (max_extended_level >= 0x80000007) {
4751         __cpuid(0x80000007, eax, ebx, ecx, edx);
4752         /* RAPL (Fam 17h+) */
4753         has_rapl = edx & (1 << 14);
4754     }
4755 
4756     if (!has_rapl || family < 0x17)
4757         return;
4758 
4759     do_rapl = RAPL_AMD_F17H | RAPL_PER_CORE_ENERGY;
4760     if (rapl_joules) {
4761         BIC_PRESENT(BIC_Pkg_J);
4762         BIC_PRESENT(BIC_Cor_J);
4763     } else {
4764         BIC_PRESENT(BIC_PkgWatt);
4765         BIC_PRESENT(BIC_CorWatt);
4766     }
4767 
4768     if (get_msr(base_cpu, MSR_RAPL_PWR_UNIT, &msr))
4769         return;
4770 
4771     rapl_time_units = ldexp(1.0, -(msr >> 16 & 0xf));
4772     rapl_energy_units = ldexp(1.0, -(msr >> 8 & 0x1f));
4773     rapl_power_units = ldexp(1.0, -(msr & 0xf));
4774 
4775     tdp = get_tdp_amd(family);
4776 
4777     rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
4778     if (!quiet)
4779         fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
4780 }
4781 
4782 /*
4783  * rapl_probe()
4784  *
4785  * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units
4786  */
4787 void rapl_probe(unsigned int family, unsigned int model)
4788 {
4789     if (genuine_intel)
4790         rapl_probe_intel(family, model);
4791     if (authentic_amd || hygon_genuine)
4792         rapl_probe_amd(family, model);
4793 }
4794 
4795 void perf_limit_reasons_probe(unsigned int family, unsigned int model)
4796 {
4797     if (!genuine_intel)
4798         return;
4799 
4800     if (family != 6)
4801         return;
4802 
4803     switch (model) {
4804     case INTEL_FAM6_HASWELL:    /* HSW */
4805     case INTEL_FAM6_HASWELL_L:  /* HSW */
4806     case INTEL_FAM6_HASWELL_G:  /* HSW */
4807         do_gfx_perf_limit_reasons = 1;
4808         /* FALLTHRU */
4809     case INTEL_FAM6_HASWELL_X:  /* HSX */
4810         do_core_perf_limit_reasons = 1;
4811         do_ring_perf_limit_reasons = 1;
4812     default:
4813         return;
4814     }
4815 }
4816 
4817 void automatic_cstate_conversion_probe(unsigned int family, unsigned int model)
4818 {
4819     if (family != 6)
4820         return;
4821 
4822     switch (model) {
4823     case INTEL_FAM6_BROADWELL_X:
4824     case INTEL_FAM6_SKYLAKE_X:
4825         has_automatic_cstate_conversion = 1;
4826     }
4827 }
4828 
4829 void prewake_cstate_probe(unsigned int family, unsigned int model)
4830 {
4831     if (is_icx(family, model) || is_spr(family, model))
4832         dis_cstate_prewake = 1;
4833 }
4834 
4835 int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
4836 {
4837     unsigned long long msr;
4838     unsigned int dts, dts2;
4839     int cpu;
4840 
4841     UNUSED(c);
4842     UNUSED(p);
4843 
4844     if (!(do_dts || do_ptm))
4845         return 0;
4846 
4847     cpu = t->cpu_id;
4848 
4849     /* DTS is per-core, no need to print for each thread */
4850     if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
4851         return 0;
4852 
4853     if (cpu_migrate(cpu)) {
4854         fprintf(outf, "print_thermal: Could not migrate to CPU %d\n", cpu);
4855         return -1;
4856     }
4857 
4858     if (do_ptm && (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) {
4859         if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
4860             return 0;
4861 
4862         dts = (msr >> 16) & 0x7F;
4863         fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", cpu, msr, tj_max - dts);
4864 
4865         if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr))
4866             return 0;
4867 
4868         dts = (msr >> 16) & 0x7F;
4869         dts2 = (msr >> 8) & 0x7F;
4870         fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
4871             cpu, msr, tj_max - dts, tj_max - dts2);
4872     }
4873 
4874     if (do_dts && debug) {
4875         unsigned int resolution;
4876 
4877         if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
4878             return 0;
4879 
4880         dts = (msr >> 16) & 0x7F;
4881         resolution = (msr >> 27) & 0xF;
4882         fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
4883             cpu, msr, tj_max - dts, resolution);
4884 
4885         if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
4886             return 0;
4887 
4888         dts = (msr >> 16) & 0x7F;
4889         dts2 = (msr >> 8) & 0x7F;
4890         fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
4891             cpu, msr, tj_max - dts, tj_max - dts2);
4892     }
4893 
4894     return 0;
4895 }
4896 
4897 void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
4898 {
4899     fprintf(outf, "cpu%d: %s: %sabled (%0.3f Watts, %f sec, clamp %sabled)\n",
4900         cpu, label,
4901         ((msr >> 15) & 1) ? "EN" : "DIS",
4902         ((msr >> 0) & 0x7FFF) * rapl_power_units,
4903         (1.0 + (((msr >> 22) & 0x3) / 4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units,
4904         (((msr >> 16) & 1) ? "EN" : "DIS"));
4905 
4906     return;
4907 }
4908 
4909 int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
4910 {
4911     unsigned long long msr;
4912     const char *msr_name;
4913     int cpu;
4914 
4915     UNUSED(c);
4916     UNUSED(p);
4917 
4918     if (!do_rapl)
4919         return 0;
4920 
4921     /* RAPL counters are per package, so print only for 1st thread/package */
4922     if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
4923         return 0;
4924 
4925     cpu = t->cpu_id;
4926     if (cpu_migrate(cpu)) {
4927         fprintf(outf, "print_rapl: Could not migrate to CPU %d\n", cpu);
4928         return -1;
4929     }
4930 
4931     if (do_rapl & RAPL_AMD_F17H) {
4932         msr_name = "MSR_RAPL_PWR_UNIT";
4933         if (get_msr(cpu, MSR_RAPL_PWR_UNIT, &msr))
4934             return -1;
4935     } else {
4936         msr_name = "MSR_RAPL_POWER_UNIT";
4937         if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr))
4938             return -1;
4939     }
4940 
4941     fprintf(outf, "cpu%d: %s: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr_name, msr,
4942         rapl_power_units, rapl_energy_units, rapl_time_units);
4943 
4944     if (do_rapl & RAPL_PKG_POWER_INFO) {
4945 
4946         if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
4947             return -5;
4948 
4949         fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
4950             cpu, msr,
4951             ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4952             ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4953             ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4954             ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
4955 
4956     }
4957     if (do_rapl & RAPL_PKG) {
4958 
4959         if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
4960             return -9;
4961 
4962         fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n",
4963             cpu, msr, (msr >> 63) & 1 ? "" : "UN");
4964 
4965         print_power_limit_msr(cpu, msr, "PKG Limit #1");
4966         fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%0.3f Watts, %f* sec, clamp %sabled)\n",
4967             cpu,
4968             ((msr >> 47) & 1) ? "EN" : "DIS",
4969             ((msr >> 32) & 0x7FFF) * rapl_power_units,
4970             (1.0 + (((msr >> 54) & 0x3) / 4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units,
4971             ((msr >> 48) & 1) ? "EN" : "DIS");
4972 
4973         if (get_msr(cpu, MSR_VR_CURRENT_CONFIG, &msr))
4974             return -9;
4975 
4976         fprintf(outf, "cpu%d: MSR_VR_CURRENT_CONFIG: 0x%08llx\n", cpu, msr);
4977         fprintf(outf, "cpu%d: PKG Limit #4: %f Watts (%slocked)\n",
4978             cpu, ((msr >> 0) & 0x1FFF) * rapl_power_units, (msr >> 31) & 1 ? "" : "UN");
4979     }
4980 
4981     if (do_rapl & RAPL_DRAM_POWER_INFO) {
4982         if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
4983             return -6;
4984 
4985         fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
4986             cpu, msr,
4987             ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4988             ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4989             ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4990             ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
4991     }
4992     if (do_rapl & RAPL_DRAM) {
4993         if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
4994             return -9;
4995         fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
4996             cpu, msr, (msr >> 31) & 1 ? "" : "UN");
4997 
4998         print_power_limit_msr(cpu, msr, "DRAM Limit");
4999     }
5000     if (do_rapl & RAPL_CORE_POLICY) {
5001         if (get_msr(cpu, MSR_PP0_POLICY, &msr))
5002             return -7;
5003 
5004         fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
5005     }
5006     if (do_rapl & RAPL_CORES_POWER_LIMIT) {
5007         if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
5008             return -9;
5009         fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
5010             cpu, msr, (msr >> 31) & 1 ? "" : "UN");
5011         print_power_limit_msr(cpu, msr, "Cores Limit");
5012     }
5013     if (do_rapl & RAPL_GFX) {
5014         if (get_msr(cpu, MSR_PP1_POLICY, &msr))
5015             return -8;
5016 
5017         fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF);
5018 
5019         if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr))
5020             return -9;
5021         fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
5022             cpu, msr, (msr >> 31) & 1 ? "" : "UN");
5023         print_power_limit_msr(cpu, msr, "GFX Limit");
5024     }
5025     return 0;
5026 }
5027 
5028 /*
5029  * SNB adds support for additional MSRs:
5030  *
5031  * MSR_PKG_C7_RESIDENCY            0x000003fa
5032  * MSR_CORE_C7_RESIDENCY           0x000003fe
5033  * MSR_PKG_C2_RESIDENCY            0x0000060d
5034  */
5035 
5036 int has_snb_msrs(unsigned int family, unsigned int model)
5037 {
5038     if (!genuine_intel)
5039         return 0;
5040 
5041     if (family != 6)
5042         return 0;
5043 
5044     switch (model) {
5045     case INTEL_FAM6_SANDYBRIDGE:
5046     case INTEL_FAM6_SANDYBRIDGE_X:
5047     case INTEL_FAM6_IVYBRIDGE:  /* IVB */
5048     case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
5049     case INTEL_FAM6_HASWELL:    /* HSW */
5050     case INTEL_FAM6_HASWELL_X:  /* HSW */
5051     case INTEL_FAM6_HASWELL_L:  /* HSW */
5052     case INTEL_FAM6_HASWELL_G:  /* HSW */
5053     case INTEL_FAM6_BROADWELL:  /* BDW */
5054     case INTEL_FAM6_BROADWELL_G:    /* BDW */
5055     case INTEL_FAM6_BROADWELL_X:    /* BDX */
5056     case INTEL_FAM6_SKYLAKE_L:  /* SKL */
5057     case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
5058     case INTEL_FAM6_SKYLAKE_X:  /* SKX */
5059     case INTEL_FAM6_ICELAKE_X:  /* ICX */
5060     case INTEL_FAM6_SAPPHIRERAPIDS_X:   /* SPR */
5061     case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
5062     case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
5063     case INTEL_FAM6_ATOM_GOLDMONT_D:    /* DNV */
5064     case INTEL_FAM6_ATOM_TREMONT:   /* EHL */
5065     case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */
5066         return 1;
5067     }
5068     return 0;
5069 }
5070 
5071 /*
5072  * HSW ULT added support for C8/C9/C10 MSRs:
5073  *
5074  * MSR_PKG_C8_RESIDENCY     0x00000630
5075  * MSR_PKG_C9_RESIDENCY     0x00000631
5076  * MSR_PKG_C10_RESIDENCY    0x00000632
5077  *
5078  * MSR_PKGC8_IRTL       0x00000633
5079  * MSR_PKGC9_IRTL       0x00000634
5080  * MSR_PKGC10_IRTL      0x00000635
5081  *
5082  */
5083 int has_c8910_msrs(unsigned int family, unsigned int model)
5084 {
5085     if (!genuine_intel)
5086         return 0;
5087 
5088     if (family != 6)
5089         return 0;
5090 
5091     switch (model) {
5092     case INTEL_FAM6_HASWELL_L:  /* HSW */
5093     case INTEL_FAM6_BROADWELL:  /* BDW */
5094     case INTEL_FAM6_SKYLAKE_L:  /* SKL */
5095     case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
5096     case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
5097     case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
5098     case INTEL_FAM6_ATOM_TREMONT:   /* EHL */
5099         return 1;
5100     }
5101     return 0;
5102 }
5103 
5104 /*
5105  * SKL adds support for additional MSRS:
5106  *
5107  * MSR_PKG_WEIGHTED_CORE_C0_RES    0x00000658
5108  * MSR_PKG_ANY_CORE_C0_RES         0x00000659
5109  * MSR_PKG_ANY_GFXE_C0_RES         0x0000065A
5110  * MSR_PKG_BOTH_CORE_GFXE_C0_RES   0x0000065B
5111  */
5112 int has_skl_msrs(unsigned int family, unsigned int model)
5113 {
5114     if (!genuine_intel)
5115         return 0;
5116 
5117     if (family != 6)
5118         return 0;
5119 
5120     switch (model) {
5121     case INTEL_FAM6_SKYLAKE_L:  /* SKL */
5122     case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
5123         return 1;
5124     }
5125     return 0;
5126 }
5127 
5128 int is_slm(unsigned int family, unsigned int model)
5129 {
5130     if (!genuine_intel)
5131         return 0;
5132 
5133     if (family != 6)
5134         return 0;
5135 
5136     switch (model) {
5137     case INTEL_FAM6_ATOM_SILVERMONT:    /* BYT */
5138     case INTEL_FAM6_ATOM_SILVERMONT_D:  /* AVN */
5139         return 1;
5140     }
5141     return 0;
5142 }
5143 
5144 int is_knl(unsigned int family, unsigned int model)
5145 {
5146     if (!genuine_intel)
5147         return 0;
5148 
5149     if (family != 6)
5150         return 0;
5151 
5152     switch (model) {
5153     case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
5154         return 1;
5155     }
5156     return 0;
5157 }
5158 
5159 int is_cnl(unsigned int family, unsigned int model)
5160 {
5161     if (!genuine_intel)
5162         return 0;
5163 
5164     if (family != 6)
5165         return 0;
5166 
5167     switch (model) {
5168     case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
5169         return 1;
5170     }
5171 
5172     return 0;
5173 }
5174 
5175 unsigned int get_aperf_mperf_multiplier(unsigned int family, unsigned int model)
5176 {
5177     if (is_knl(family, model))
5178         return 1024;
5179     return 1;
5180 }
5181 
5182 #define SLM_BCLK_FREQS 5
5183 double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0 };
5184 
5185 double slm_bclk(void)
5186 {
5187     unsigned long long msr = 3;
5188     unsigned int i;
5189     double freq;
5190 
5191     if (get_msr(base_cpu, MSR_FSB_FREQ, &msr))
5192         fprintf(outf, "SLM BCLK: unknown\n");
5193 
5194     i = msr & 0xf;
5195     if (i >= SLM_BCLK_FREQS) {
5196         fprintf(outf, "SLM BCLK[%d] invalid\n", i);
5197         i = 3;
5198     }
5199     freq = slm_freq_table[i];
5200 
5201     if (!quiet)
5202         fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
5203 
5204     return freq;
5205 }
5206 
5207 double discover_bclk(unsigned int family, unsigned int model)
5208 {
5209     if (has_snb_msrs(family, model) || is_knl(family, model))
5210         return 100.00;
5211     else if (is_slm(family, model))
5212         return slm_bclk();
5213     else
5214         return 133.33;
5215 }
5216 
5217 int get_cpu_type(struct thread_data *t, struct core_data *c, struct pkg_data *p)
5218 {
5219     unsigned int eax, ebx, ecx, edx;
5220 
5221     UNUSED(c);
5222     UNUSED(p);
5223 
5224     if (!genuine_intel)
5225         return 0;
5226 
5227     if (cpu_migrate(t->cpu_id)) {
5228         fprintf(outf, "Could not migrate to CPU %d\n", t->cpu_id);
5229         return -1;
5230     }
5231 
5232     if (max_level < 0x1a)
5233         return 0;
5234 
5235     __cpuid(0x1a, eax, ebx, ecx, edx);
5236     eax = (eax >> 24) & 0xFF;
5237     if (eax == 0x20)
5238         t->is_atom = true;
5239     return 0;
5240 }
5241 
5242 /*
5243  * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where
5244  * the Thermal Control Circuit (TCC) activates.
5245  * This is usually equal to tjMax.
5246  *
5247  * Older processors do not have this MSR, so there we guess,
5248  * but also allow cmdline over-ride with -T.
5249  *
5250  * Several MSR temperature values are in units of degrees-C
5251  * below this value, including the Digital Thermal Sensor (DTS),
5252  * Package Thermal Management Sensor (PTM), and thermal event thresholds.
5253  */
5254 int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p)
5255 {
5256     unsigned long long msr;
5257     unsigned int tcc_default, tcc_offset;
5258     int cpu;
5259 
5260     UNUSED(c);
5261     UNUSED(p);
5262 
5263     /* tj_max is used only for dts or ptm */
5264     if (!(do_dts || do_ptm))
5265         return 0;
5266 
5267     /* this is a per-package concept */
5268     if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
5269         return 0;
5270 
5271     cpu = t->cpu_id;
5272     if (cpu_migrate(cpu)) {
5273         fprintf(outf, "Could not migrate to CPU %d\n", cpu);
5274         return -1;
5275     }
5276 
5277     if (tj_max_override != 0) {
5278         tj_max = tj_max_override;
5279         fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n", cpu, tj_max);
5280         return 0;
5281     }
5282 
5283     /* Temperature Target MSR is Nehalem and newer only */
5284     if (!do_nhm_platform_info)
5285         goto guess;
5286 
5287     if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
5288         goto guess;
5289 
5290     tcc_default = (msr >> 16) & 0xFF;
5291 
5292     if (!quiet) {
5293         switch (tcc_offset_bits) {
5294         case 4:
5295             tcc_offset = (msr >> 24) & 0xF;
5296             fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n",
5297                 cpu, msr, tcc_default - tcc_offset, tcc_default, tcc_offset);
5298             break;
5299         case 6:
5300             tcc_offset = (msr >> 24) & 0x3F;
5301             fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n",
5302                 cpu, msr, tcc_default - tcc_offset, tcc_default, tcc_offset);
5303             break;
5304         default:
5305             fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", cpu, msr, tcc_default);
5306             break;
5307         }
5308     }
5309 
5310     if (!tcc_default)
5311         goto guess;
5312 
5313     tj_max = tcc_default;
5314 
5315     return 0;
5316 
5317 guess:
5318     tj_max = TJMAX_DEFAULT;
5319     fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", cpu, tj_max);
5320 
5321     return 0;
5322 }
5323 
5324 void decode_feature_control_msr(void)
5325 {
5326     unsigned long long msr;
5327 
5328     if (!get_msr(base_cpu, MSR_IA32_FEAT_CTL, &msr))
5329         fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n",
5330             base_cpu, msr, msr & FEAT_CTL_LOCKED ? "" : "UN-", msr & (1 << 18) ? "SGX" : "");
5331 }
5332 
5333 void decode_misc_enable_msr(void)
5334 {
5335     unsigned long long msr;
5336 
5337     if (!genuine_intel)
5338         return;
5339 
5340     if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr))
5341         fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%sTCC %sEIST %sMWAIT %sPREFETCH %sTURBO)\n",
5342             base_cpu, msr,
5343             msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-",
5344             msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-",
5345             msr & MSR_IA32_MISC_ENABLE_MWAIT ? "" : "No-",
5346             msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "",
5347             msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : "");
5348 }
5349 
5350 void decode_misc_feature_control(void)
5351 {
5352     unsigned long long msr;
5353 
5354     if (!has_misc_feature_control)
5355         return;
5356 
5357     if (!get_msr(base_cpu, MSR_MISC_FEATURE_CONTROL, &msr))
5358         fprintf(outf,
5359             "cpu%d: MSR_MISC_FEATURE_CONTROL: 0x%08llx (%sL2-Prefetch %sL2-Prefetch-pair %sL1-Prefetch %sL1-IP-Prefetch)\n",
5360             base_cpu, msr, msr & (0 << 0) ? "No-" : "", msr & (1 << 0) ? "No-" : "",
5361             msr & (2 << 0) ? "No-" : "", msr & (3 << 0) ? "No-" : "");
5362 }
5363 
5364 /*
5365  * Decode MSR_MISC_PWR_MGMT
5366  *
5367  * Decode the bits according to the Nehalem documentation
5368  * bit[0] seems to continue to have same meaning going forward
5369  * bit[1] less so...
5370  */
5371 void decode_misc_pwr_mgmt_msr(void)
5372 {
5373     unsigned long long msr;
5374 
5375     if (!do_nhm_platform_info)
5376         return;
5377 
5378     if (no_MSR_MISC_PWR_MGMT)
5379         return;
5380 
5381     if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr))
5382         fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n",
5383             base_cpu, msr,
5384             msr & (1 << 0) ? "DIS" : "EN", msr & (1 << 1) ? "EN" : "DIS", msr & (1 << 8) ? "EN" : "DIS");
5385 }
5386 
5387 /*
5388  * Decode MSR_CC6_DEMOTION_POLICY_CONFIG, MSR_MC6_DEMOTION_POLICY_CONFIG
5389  *
5390  * This MSRs are present on Silvermont processors,
5391  * Intel Atom processor E3000 series (Baytrail), and friends.
5392  */
5393 void decode_c6_demotion_policy_msr(void)
5394 {
5395     unsigned long long msr;
5396 
5397     if (!get_msr(base_cpu, MSR_CC6_DEMOTION_POLICY_CONFIG, &msr))
5398         fprintf(outf, "cpu%d: MSR_CC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-CC6-Demotion)\n",
5399             base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
5400 
5401     if (!get_msr(base_cpu, MSR_MC6_DEMOTION_POLICY_CONFIG, &msr))
5402         fprintf(outf, "cpu%d: MSR_MC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-MC6-Demotion)\n",
5403             base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
5404 }
5405 
5406 /*
5407  * When models are the same, for the purpose of turbostat, reuse
5408  */
5409 unsigned int intel_model_duplicates(unsigned int model)
5410 {
5411 
5412     switch (model) {
5413     case INTEL_FAM6_NEHALEM_EP: /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */
5414     case INTEL_FAM6_NEHALEM:    /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
5415     case 0x1F:      /* Core i7 and i5 Processor - Nehalem */
5416     case INTEL_FAM6_WESTMERE:   /* Westmere Client - Clarkdale, Arrandale */
5417     case INTEL_FAM6_WESTMERE_EP:    /* Westmere EP - Gulftown */
5418         return INTEL_FAM6_NEHALEM;
5419 
5420     case INTEL_FAM6_NEHALEM_EX: /* Nehalem-EX Xeon - Beckton */
5421     case INTEL_FAM6_WESTMERE_EX:    /* Westmere-EX Xeon - Eagleton */
5422         return INTEL_FAM6_NEHALEM_EX;
5423 
5424     case INTEL_FAM6_XEON_PHI_KNM:
5425         return INTEL_FAM6_XEON_PHI_KNL;
5426 
5427     case INTEL_FAM6_BROADWELL_X:
5428     case INTEL_FAM6_BROADWELL_D:    /* BDX-DE */
5429         return INTEL_FAM6_BROADWELL_X;
5430 
5431     case INTEL_FAM6_SKYLAKE_L:
5432     case INTEL_FAM6_SKYLAKE:
5433     case INTEL_FAM6_KABYLAKE_L:
5434     case INTEL_FAM6_KABYLAKE:
5435     case INTEL_FAM6_COMETLAKE_L:
5436     case INTEL_FAM6_COMETLAKE:
5437         return INTEL_FAM6_SKYLAKE_L;
5438 
5439     case INTEL_FAM6_ICELAKE_L:
5440     case INTEL_FAM6_ICELAKE_NNPI:
5441     case INTEL_FAM6_TIGERLAKE_L:
5442     case INTEL_FAM6_TIGERLAKE:
5443     case INTEL_FAM6_ROCKETLAKE:
5444     case INTEL_FAM6_LAKEFIELD:
5445     case INTEL_FAM6_ALDERLAKE:
5446     case INTEL_FAM6_ALDERLAKE_L:
5447     case INTEL_FAM6_ALDERLAKE_N:
5448     case INTEL_FAM6_RAPTORLAKE:
5449     case INTEL_FAM6_RAPTORLAKE_P:
5450         return INTEL_FAM6_CANNONLAKE_L;
5451 
5452     case INTEL_FAM6_ATOM_TREMONT_L:
5453         return INTEL_FAM6_ATOM_TREMONT;
5454 
5455     case INTEL_FAM6_ICELAKE_D:
5456         return INTEL_FAM6_ICELAKE_X;
5457     }
5458     return model;
5459 }
5460 
5461 void print_dev_latency(void)
5462 {
5463     char *path = "/dev/cpu_dma_latency";
5464     int fd;
5465     int value;
5466     int retval;
5467 
5468     fd = open(path, O_RDONLY);
5469     if (fd < 0) {
5470         warn("fopen %s\n", path);
5471         return;
5472     }
5473 
5474     retval = read(fd, (void *)&value, sizeof(int));
5475     if (retval != sizeof(int)) {
5476         warn("read %s\n", path);
5477         close(fd);
5478         return;
5479     }
5480     fprintf(outf, "/dev/cpu_dma_latency: %d usec (%s)\n", value, value == 2000000000 ? "default" : "constrained");
5481 
5482     close(fd);
5483 }
5484 
5485 /*
5486  * Linux-perf manages the HW instructions-retired counter
5487  * by enabling when requested, and hiding rollover
5488  */
5489 void linux_perf_init(void)
5490 {
5491     if (!BIC_IS_ENABLED(BIC_IPC))
5492         return;
5493 
5494     if (access("/proc/sys/kernel/perf_event_paranoid", F_OK))
5495         return;
5496 
5497     fd_instr_count_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
5498     if (fd_instr_count_percpu == NULL)
5499         err(-1, "calloc fd_instr_count_percpu");
5500 
5501     BIC_PRESENT(BIC_IPC);
5502 }
5503 
5504 void process_cpuid()
5505 {
5506     unsigned int eax, ebx, ecx, edx;
5507     unsigned int fms, family, model, stepping, ecx_flags, edx_flags;
5508     unsigned int has_turbo;
5509     unsigned long long ucode_patch = 0;
5510 
5511     eax = ebx = ecx = edx = 0;
5512 
5513     __cpuid(0, max_level, ebx, ecx, edx);
5514 
5515     if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
5516         genuine_intel = 1;
5517     else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
5518         authentic_amd = 1;
5519     else if (ebx == 0x6f677948 && ecx == 0x656e6975 && edx == 0x6e65476e)
5520         hygon_genuine = 1;
5521 
5522     if (!quiet)
5523         fprintf(outf, "CPUID(0): %.4s%.4s%.4s 0x%x CPUID levels\n",
5524             (char *)&ebx, (char *)&edx, (char *)&ecx, max_level);
5525 
5526     __cpuid(1, fms, ebx, ecx, edx);
5527     family = (fms >> 8) & 0xf;
5528     model = (fms >> 4) & 0xf;
5529     stepping = fms & 0xf;
5530     if (family == 0xf)
5531         family += (fms >> 20) & 0xff;
5532     if (family >= 6)
5533         model += ((fms >> 16) & 0xf) << 4;
5534     ecx_flags = ecx;
5535     edx_flags = edx;
5536 
5537     if (get_msr(sched_getcpu(), MSR_IA32_UCODE_REV, &ucode_patch))
5538         warnx("get_msr(UCODE)\n");
5539 
5540     /*
5541      * check max extended function levels of CPUID.
5542      * This is needed to check for invariant TSC.
5543      * This check is valid for both Intel and AMD.
5544      */
5545     ebx = ecx = edx = 0;
5546     __cpuid(0x80000000, max_extended_level, ebx, ecx, edx);
5547 
5548     if (!quiet) {
5549         fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d) microcode 0x%x\n",
5550             family, model, stepping, family, model, stepping,
5551             (unsigned int)((ucode_patch >> 32) & 0xFFFFFFFF));
5552         fprintf(outf, "CPUID(0x80000000): max_extended_levels: 0x%x\n", max_extended_level);
5553         fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n",
5554             ecx_flags & (1 << 0) ? "SSE3" : "-",
5555             ecx_flags & (1 << 3) ? "MONITOR" : "-",
5556             ecx_flags & (1 << 6) ? "SMX" : "-",
5557             ecx_flags & (1 << 7) ? "EIST" : "-",
5558             ecx_flags & (1 << 8) ? "TM2" : "-",
5559             edx_flags & (1 << 4) ? "TSC" : "-",
5560             edx_flags & (1 << 5) ? "MSR" : "-",
5561             edx_flags & (1 << 22) ? "ACPI-TM" : "-",
5562             edx_flags & (1 << 28) ? "HT" : "-", edx_flags & (1 << 29) ? "TM" : "-");
5563     }
5564     if (genuine_intel) {
5565         model_orig = model;
5566         model = intel_model_duplicates(model);
5567     }
5568 
5569     if (!(edx_flags & (1 << 5)))
5570         errx(1, "CPUID: no MSR");
5571 
5572     if (max_extended_level >= 0x80000007) {
5573 
5574         /*
5575          * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
5576          * this check is valid for both Intel and AMD
5577          */
5578         __cpuid(0x80000007, eax, ebx, ecx, edx);
5579         has_invariant_tsc = edx & (1 << 8);
5580     }
5581 
5582     /*
5583      * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0
5584      * this check is valid for both Intel and AMD
5585      */
5586 
5587     __cpuid(0x6, eax, ebx, ecx, edx);
5588     has_aperf = ecx & (1 << 0);
5589     if (has_aperf) {
5590         BIC_PRESENT(BIC_Avg_MHz);
5591         BIC_PRESENT(BIC_Busy);
5592         BIC_PRESENT(BIC_Bzy_MHz);
5593     }
5594     do_dts = eax & (1 << 0);
5595     if (do_dts)
5596         BIC_PRESENT(BIC_CoreTmp);
5597     has_turbo = eax & (1 << 1);
5598     do_ptm = eax & (1 << 6);
5599     if (do_ptm)
5600         BIC_PRESENT(BIC_PkgTmp);
5601     has_hwp = eax & (1 << 7);
5602     has_hwp_notify = eax & (1 << 8);
5603     has_hwp_activity_window = eax & (1 << 9);
5604     has_hwp_epp = eax & (1 << 10);
5605     has_hwp_pkg = eax & (1 << 11);
5606     has_epb = ecx & (1 << 3);
5607 
5608     if (!quiet)
5609         fprintf(outf, "CPUID(6): %sAPERF, %sTURBO, %sDTS, %sPTM, %sHWP, "
5610             "%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n",
5611             has_aperf ? "" : "No-",
5612             has_turbo ? "" : "No-",
5613             do_dts ? "" : "No-",
5614             do_ptm ? "" : "No-",
5615             has_hwp ? "" : "No-",
5616             has_hwp_notify ? "" : "No-",
5617             has_hwp_activity_window ? "" : "No-",
5618             has_hwp_epp ? "" : "No-", has_hwp_pkg ? "" : "No-", has_epb ? "" : "No-");
5619 
5620     if (!quiet)
5621         decode_misc_enable_msr();
5622 
5623     if (max_level >= 0x7 && !quiet) {
5624         int has_sgx;
5625 
5626         ecx = 0;
5627 
5628         __cpuid_count(0x7, 0, eax, ebx, ecx, edx);
5629 
5630         has_sgx = ebx & (1 << 2);
5631 
5632         is_hybrid = edx & (1 << 15);
5633 
5634         fprintf(outf, "CPUID(7): %sSGX %sHybrid\n", has_sgx ? "" : "No-", is_hybrid ? "" : "No-");
5635 
5636         if (has_sgx)
5637             decode_feature_control_msr();
5638     }
5639 
5640     if (max_level >= 0x15) {
5641         unsigned int eax_crystal;
5642         unsigned int ebx_tsc;
5643 
5644         /*
5645          * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz
5646          */
5647         eax_crystal = ebx_tsc = crystal_hz = edx = 0;
5648         __cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx);
5649 
5650         if (ebx_tsc != 0) {
5651 
5652             if (!quiet && (ebx != 0))
5653                 fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
5654                     eax_crystal, ebx_tsc, crystal_hz);
5655 
5656             if (crystal_hz == 0)
5657                 switch (model) {
5658                 case INTEL_FAM6_SKYLAKE_L:  /* SKL */
5659                     crystal_hz = 24000000;  /* 24.0 MHz */
5660                     break;
5661                 case INTEL_FAM6_ATOM_GOLDMONT_D:    /* DNV */
5662                     crystal_hz = 25000000;  /* 25.0 MHz */
5663                     break;
5664                 case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
5665                 case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
5666                     crystal_hz = 19200000;  /* 19.2 MHz */
5667                     break;
5668                 default:
5669                     crystal_hz = 0;
5670                 }
5671 
5672             if (crystal_hz) {
5673                 tsc_hz = (unsigned long long)crystal_hz *ebx_tsc / eax_crystal;
5674                 if (!quiet)
5675                     fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n",
5676                         tsc_hz / 1000000, crystal_hz, ebx_tsc, eax_crystal);
5677             }
5678         }
5679     }
5680     if (max_level >= 0x16) {
5681         unsigned int base_mhz, max_mhz, bus_mhz, edx;
5682 
5683         /*
5684          * CPUID 16H Base MHz, Max MHz, Bus MHz
5685          */
5686         base_mhz = max_mhz = bus_mhz = edx = 0;
5687 
5688         __cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx);
5689         if (!quiet)
5690             fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n",
5691                 base_mhz, max_mhz, bus_mhz);
5692     }
5693 
5694     if (has_aperf)
5695         aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model);
5696 
5697     BIC_PRESENT(BIC_IRQ);
5698     BIC_PRESENT(BIC_TSC_MHz);
5699 
5700     if (probe_nhm_msrs(family, model)) {
5701         do_nhm_platform_info = 1;
5702         BIC_PRESENT(BIC_CPU_c1);
5703         BIC_PRESENT(BIC_CPU_c3);
5704         BIC_PRESENT(BIC_CPU_c6);
5705         BIC_PRESENT(BIC_SMI);
5706     }
5707     do_snb_cstates = has_snb_msrs(family, model);
5708 
5709     if (do_snb_cstates)
5710         BIC_PRESENT(BIC_CPU_c7);
5711 
5712     do_irtl_snb = has_snb_msrs(family, model);
5713     if (do_snb_cstates && (pkg_cstate_limit >= PCL__2))
5714         BIC_PRESENT(BIC_Pkgpc2);
5715     if (pkg_cstate_limit >= PCL__3)
5716         BIC_PRESENT(BIC_Pkgpc3);
5717     if (pkg_cstate_limit >= PCL__6)
5718         BIC_PRESENT(BIC_Pkgpc6);
5719     if (do_snb_cstates && (pkg_cstate_limit >= PCL__7))
5720         BIC_PRESENT(BIC_Pkgpc7);
5721     if (has_slv_msrs(family, model)) {
5722         BIC_NOT_PRESENT(BIC_Pkgpc2);
5723         BIC_NOT_PRESENT(BIC_Pkgpc3);
5724         BIC_PRESENT(BIC_Pkgpc6);
5725         BIC_NOT_PRESENT(BIC_Pkgpc7);
5726         BIC_PRESENT(BIC_Mod_c6);
5727         use_c1_residency_msr = 1;
5728     }
5729     if (is_jvl(family, model)) {
5730         BIC_NOT_PRESENT(BIC_CPU_c3);
5731         BIC_NOT_PRESENT(BIC_CPU_c7);
5732         BIC_NOT_PRESENT(BIC_Pkgpc2);
5733         BIC_NOT_PRESENT(BIC_Pkgpc3);
5734         BIC_NOT_PRESENT(BIC_Pkgpc6);
5735         BIC_NOT_PRESENT(BIC_Pkgpc7);
5736     }
5737     if (is_dnv(family, model)) {
5738         BIC_PRESENT(BIC_CPU_c1);
5739         BIC_NOT_PRESENT(BIC_CPU_c3);
5740         BIC_NOT_PRESENT(BIC_Pkgpc3);
5741         BIC_NOT_PRESENT(BIC_CPU_c7);
5742         BIC_NOT_PRESENT(BIC_Pkgpc7);
5743         use_c1_residency_msr = 1;
5744     }
5745     if (is_skx(family, model) || is_icx(family, model) || is_spr(family, model)) {
5746         BIC_NOT_PRESENT(BIC_CPU_c3);
5747         BIC_NOT_PRESENT(BIC_Pkgpc3);
5748         BIC_NOT_PRESENT(BIC_CPU_c7);
5749         BIC_NOT_PRESENT(BIC_Pkgpc7);
5750     }
5751     if (is_bdx(family, model)) {
5752         BIC_NOT_PRESENT(BIC_CPU_c7);
5753         BIC_NOT_PRESENT(BIC_Pkgpc7);
5754     }
5755     if (has_c8910_msrs(family, model)) {
5756         if (pkg_cstate_limit >= PCL__8)
5757             BIC_PRESENT(BIC_Pkgpc8);
5758         if (pkg_cstate_limit >= PCL__9)
5759             BIC_PRESENT(BIC_Pkgpc9);
5760         if (pkg_cstate_limit >= PCL_10)
5761             BIC_PRESENT(BIC_Pkgpc10);
5762     }
5763     do_irtl_hsw = has_c8910_msrs(family, model);
5764     if (has_skl_msrs(family, model)) {
5765         BIC_PRESENT(BIC_Totl_c0);
5766         BIC_PRESENT(BIC_Any_c0);
5767         BIC_PRESENT(BIC_GFX_c0);
5768         BIC_PRESENT(BIC_CPUGFX);
5769     }
5770     do_slm_cstates = is_slm(family, model);
5771     do_knl_cstates = is_knl(family, model);
5772 
5773     if (do_slm_cstates || do_knl_cstates || is_cnl(family, model) || is_ehl(family, model))
5774         BIC_NOT_PRESENT(BIC_CPU_c3);
5775 
5776     if (!quiet)
5777         decode_misc_pwr_mgmt_msr();
5778 
5779     if (!quiet && has_slv_msrs(family, model))
5780         decode_c6_demotion_policy_msr();
5781 
5782     rapl_probe(family, model);
5783     perf_limit_reasons_probe(family, model);
5784     automatic_cstate_conversion_probe(family, model);
5785 
5786     check_tcc_offset(model_orig);
5787 
5788     if (!quiet)
5789         dump_cstate_pstate_config_info(family, model);
5790     intel_uncore_frequency_probe();
5791 
5792     if (!quiet)
5793         print_dev_latency();
5794     if (!quiet)
5795         dump_sysfs_cstate_config();
5796     if (!quiet)
5797         dump_sysfs_pstate_config();
5798 
5799     if (has_skl_msrs(family, model) || is_ehl(family, model))
5800         calculate_tsc_tweak();
5801 
5802     if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK))
5803         BIC_PRESENT(BIC_GFX_rc6);
5804 
5805     if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
5806         BIC_PRESENT(BIC_GFXMHz);
5807 
5808     if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK))
5809         BIC_PRESENT(BIC_GFXACTMHz);
5810 
5811     if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK))
5812         BIC_PRESENT(BIC_CPU_LPI);
5813     else
5814         BIC_NOT_PRESENT(BIC_CPU_LPI);
5815 
5816     if (!access("/sys/devices/system/cpu/cpu0/thermal_throttle/core_throttle_count", R_OK))
5817         BIC_PRESENT(BIC_CORE_THROT_CNT);
5818     else
5819         BIC_NOT_PRESENT(BIC_CORE_THROT_CNT);
5820 
5821     if (!access(sys_lpi_file_sysfs, R_OK)) {
5822         sys_lpi_file = sys_lpi_file_sysfs;
5823         BIC_PRESENT(BIC_SYS_LPI);
5824     } else if (!access(sys_lpi_file_debugfs, R_OK)) {
5825         sys_lpi_file = sys_lpi_file_debugfs;
5826         BIC_PRESENT(BIC_SYS_LPI);
5827     } else {
5828         sys_lpi_file_sysfs = NULL;
5829         BIC_NOT_PRESENT(BIC_SYS_LPI);
5830     }
5831 
5832     if (!quiet)
5833         decode_misc_feature_control();
5834 
5835     return;
5836 }
5837 
5838 /*
5839  * in /dev/cpu/ return success for names that are numbers
5840  * ie. filter out ".", "..", "microcode".
5841  */
5842 int dir_filter(const struct dirent *dirp)
5843 {
5844     if (isdigit(dirp->d_name[0]))
5845         return 1;
5846     else
5847         return 0;
5848 }
5849 
5850 void topology_probe()
5851 {
5852     int i;
5853     int max_core_id = 0;
5854     int max_package_id = 0;
5855     int max_die_id = 0;
5856     int max_siblings = 0;
5857 
5858     /* Initialize num_cpus, max_cpu_num */
5859     set_max_cpu_num();
5860     topo.num_cpus = 0;
5861     for_all_proc_cpus(count_cpus);
5862     if (!summary_only && topo.num_cpus > 1)
5863         BIC_PRESENT(BIC_CPU);
5864 
5865     if (debug > 1)
5866         fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num);
5867 
5868     cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology));
5869     if (cpus == NULL)
5870         err(1, "calloc cpus");
5871 
5872     /*
5873      * Allocate and initialize cpu_present_set
5874      */
5875     cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1));
5876     if (cpu_present_set == NULL)
5877         err(3, "CPU_ALLOC");
5878     cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
5879     CPU_ZERO_S(cpu_present_setsize, cpu_present_set);
5880     for_all_proc_cpus(mark_cpu_present);
5881 
5882     /*
5883      * Validate that all cpus in cpu_subset are also in cpu_present_set
5884      */
5885     for (i = 0; i < CPU_SUBSET_MAXCPUS; ++i) {
5886         if (CPU_ISSET_S(i, cpu_subset_size, cpu_subset))
5887             if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set))
5888                 err(1, "cpu%d not present", i);
5889     }
5890 
5891     /*
5892      * Allocate and initialize cpu_affinity_set
5893      */
5894     cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1));
5895     if (cpu_affinity_set == NULL)
5896         err(3, "CPU_ALLOC");
5897     cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
5898     CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
5899 
5900     for_all_proc_cpus(init_thread_id);
5901 
5902     /*
5903      * For online cpus
5904      * find max_core_id, max_package_id
5905      */
5906     for (i = 0; i <= topo.max_cpu_num; ++i) {
5907         int siblings;
5908 
5909         if (cpu_is_not_present(i)) {
5910             if (debug > 1)
5911                 fprintf(outf, "cpu%d NOT PRESENT\n", i);
5912             continue;
5913         }
5914 
5915         cpus[i].logical_cpu_id = i;
5916 
5917         /* get package information */
5918         cpus[i].physical_package_id = get_physical_package_id(i);
5919         if (cpus[i].physical_package_id > max_package_id)
5920             max_package_id = cpus[i].physical_package_id;
5921 
5922         /* get die information */
5923         cpus[i].die_id = get_die_id(i);
5924         if (cpus[i].die_id > max_die_id)
5925             max_die_id = cpus[i].die_id;
5926 
5927         /* get numa node information */
5928         cpus[i].physical_node_id = get_physical_node_id(&cpus[i]);
5929         if (cpus[i].physical_node_id > topo.max_node_num)
5930             topo.max_node_num = cpus[i].physical_node_id;
5931 
5932         /* get core information */
5933         cpus[i].physical_core_id = get_core_id(i);
5934         if (cpus[i].physical_core_id > max_core_id)
5935             max_core_id = cpus[i].physical_core_id;
5936 
5937         /* get thread information */
5938         siblings = get_thread_siblings(&cpus[i]);
5939         if (siblings > max_siblings)
5940             max_siblings = siblings;
5941         if (cpus[i].thread_id == 0)
5942             topo.num_cores++;
5943     }
5944 
5945     topo.cores_per_node = max_core_id + 1;
5946     if (debug > 1)
5947         fprintf(outf, "max_core_id %d, sizing for %d cores per package\n", max_core_id, topo.cores_per_node);
5948     if (!summary_only && topo.cores_per_node > 1)
5949         BIC_PRESENT(BIC_Core);
5950 
5951     topo.num_die = max_die_id + 1;
5952     if (debug > 1)
5953         fprintf(outf, "max_die_id %d, sizing for %d die\n", max_die_id, topo.num_die);
5954     if (!summary_only && topo.num_die > 1)
5955         BIC_PRESENT(BIC_Die);
5956 
5957     topo.num_packages = max_package_id + 1;
5958     if (debug > 1)
5959         fprintf(outf, "max_package_id %d, sizing for %d packages\n", max_package_id, topo.num_packages);
5960     if (!summary_only && topo.num_packages > 1)
5961         BIC_PRESENT(BIC_Package);
5962 
5963     set_node_data();
5964     if (debug > 1)
5965         fprintf(outf, "nodes_per_pkg %d\n", topo.nodes_per_pkg);
5966     if (!summary_only && topo.nodes_per_pkg > 1)
5967         BIC_PRESENT(BIC_Node);
5968 
5969     topo.threads_per_core = max_siblings;
5970     if (debug > 1)
5971         fprintf(outf, "max_siblings %d\n", max_siblings);
5972 
5973     if (debug < 1)
5974         return;
5975 
5976     for (i = 0; i <= topo.max_cpu_num; ++i) {
5977         if (cpu_is_not_present(i))
5978             continue;
5979         fprintf(outf,
5980             "cpu %d pkg %d die %d node %d lnode %d core %d thread %d\n",
5981             i, cpus[i].physical_package_id, cpus[i].die_id,
5982             cpus[i].physical_node_id, cpus[i].logical_node_id, cpus[i].physical_core_id, cpus[i].thread_id);
5983     }
5984 
5985 }
5986 
5987 void allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p)
5988 {
5989     int i;
5990     int num_cores = topo.cores_per_node * topo.nodes_per_pkg * topo.num_packages;
5991     int num_threads = topo.threads_per_core * num_cores;
5992 
5993     *t = calloc(num_threads, sizeof(struct thread_data));
5994     if (*t == NULL)
5995         goto error;
5996 
5997     for (i = 0; i < num_threads; i++)
5998         (*t)[i].cpu_id = -1;
5999 
6000     *c = calloc(num_cores, sizeof(struct core_data));
6001     if (*c == NULL)
6002         goto error;
6003 
6004     for (i = 0; i < num_cores; i++)
6005         (*c)[i].core_id = -1;
6006 
6007     *p = calloc(topo.num_packages, sizeof(struct pkg_data));
6008     if (*p == NULL)
6009         goto error;
6010 
6011     for (i = 0; i < topo.num_packages; i++)
6012         (*p)[i].package_id = i;
6013 
6014     return;
6015 error:
6016     err(1, "calloc counters");
6017 }
6018 
6019 /*
6020  * init_counter()
6021  *
6022  * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE
6023  */
6024 void init_counter(struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base, int cpu_id)
6025 {
6026     int pkg_id = cpus[cpu_id].physical_package_id;
6027     int node_id = cpus[cpu_id].logical_node_id;
6028     int core_id = cpus[cpu_id].physical_core_id;
6029     int thread_id = cpus[cpu_id].thread_id;
6030     struct thread_data *t;
6031     struct core_data *c;
6032     struct pkg_data *p;
6033 
6034     /* Workaround for systems where physical_node_id==-1
6035      * and logical_node_id==(-1 - topo.num_cpus)
6036      */
6037     if (node_id < 0)
6038         node_id = 0;
6039 
6040     t = GET_THREAD(thread_base, thread_id, core_id, node_id, pkg_id);
6041     c = GET_CORE(core_base, core_id, node_id, pkg_id);
6042     p = GET_PKG(pkg_base, pkg_id);
6043 
6044     t->cpu_id = cpu_id;
6045     if (thread_id == 0) {
6046         t->flags |= CPU_IS_FIRST_THREAD_IN_CORE;
6047         if (cpu_is_first_core_in_package(cpu_id))
6048             t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE;
6049     }
6050 
6051     c->core_id = core_id;
6052     p->package_id = pkg_id;
6053 }
6054 
6055 int initialize_counters(int cpu_id)
6056 {
6057     init_counter(EVEN_COUNTERS, cpu_id);
6058     init_counter(ODD_COUNTERS, cpu_id);
6059     return 0;
6060 }
6061 
6062 void allocate_output_buffer()
6063 {
6064     output_buffer = calloc(1, (1 + topo.num_cpus) * 2048);
6065     outp = output_buffer;
6066     if (outp == NULL)
6067         err(-1, "calloc output buffer");
6068 }
6069 
6070 void allocate_fd_percpu(void)
6071 {
6072     fd_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
6073     if (fd_percpu == NULL)
6074         err(-1, "calloc fd_percpu");
6075 }
6076 
6077 void allocate_irq_buffers(void)
6078 {
6079     irq_column_2_cpu = calloc(topo.num_cpus, sizeof(int));
6080     if (irq_column_2_cpu == NULL)
6081         err(-1, "calloc %d", topo.num_cpus);
6082 
6083     irqs_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int));
6084     if (irqs_per_cpu == NULL)
6085         err(-1, "calloc %d", topo.max_cpu_num + 1);
6086 }
6087 
6088 void setup_all_buffers(void)
6089 {
6090     topology_probe();
6091     allocate_irq_buffers();
6092     allocate_fd_percpu();
6093     allocate_counters(&thread_even, &core_even, &package_even);
6094     allocate_counters(&thread_odd, &core_odd, &package_odd);
6095     allocate_output_buffer();
6096     for_all_proc_cpus(initialize_counters);
6097 }
6098 
6099 void set_base_cpu(void)
6100 {
6101     base_cpu = sched_getcpu();
6102     if (base_cpu < 0)
6103         err(-ENODEV, "No valid cpus found");
6104 
6105     if (debug > 1)
6106         fprintf(outf, "base_cpu = %d\n", base_cpu);
6107 }
6108 
6109 void turbostat_init()
6110 {
6111     setup_all_buffers();
6112     set_base_cpu();
6113     check_dev_msr();
6114     check_permissions();
6115     process_cpuid();
6116     linux_perf_init();
6117 
6118     if (!quiet)
6119         for_all_cpus(print_hwp, ODD_COUNTERS);
6120 
6121     if (!quiet)
6122         for_all_cpus(print_epb, ODD_COUNTERS);
6123 
6124     if (!quiet)
6125         for_all_cpus(print_perf_limit, ODD_COUNTERS);
6126 
6127     if (!quiet)
6128         for_all_cpus(print_rapl, ODD_COUNTERS);
6129 
6130     for_all_cpus(set_temperature_target, ODD_COUNTERS);
6131 
6132     for_all_cpus(get_cpu_type, ODD_COUNTERS);
6133     for_all_cpus(get_cpu_type, EVEN_COUNTERS);
6134 
6135     if (!quiet)
6136         for_all_cpus(print_thermal, ODD_COUNTERS);
6137 
6138     if (!quiet && do_irtl_snb)
6139         print_irtl();
6140 
6141     if (DO_BIC(BIC_IPC))
6142         (void)get_instr_count_fd(base_cpu);
6143 }
6144 
6145 int fork_it(char **argv)
6146 {
6147     pid_t child_pid;
6148     int status;
6149 
6150     snapshot_proc_sysfs_files();
6151     status = for_all_cpus(get_counters, EVEN_COUNTERS);
6152     first_counter_read = 0;
6153     if (status)
6154         exit(status);
6155     /* clear affinity side-effect of get_counters() */
6156     sched_setaffinity(0, cpu_present_setsize, cpu_present_set);
6157     gettimeofday(&tv_even, (struct timezone *)NULL);
6158 
6159     child_pid = fork();
6160     if (!child_pid) {
6161         /* child */
6162         execvp(argv[0], argv);
6163         err(errno, "exec %s", argv[0]);
6164     } else {
6165 
6166         /* parent */
6167         if (child_pid == -1)
6168             err(1, "fork");
6169 
6170         signal(SIGINT, SIG_IGN);
6171         signal(SIGQUIT, SIG_IGN);
6172         if (waitpid(child_pid, &status, 0) == -1)
6173             err(status, "waitpid");
6174 
6175         if (WIFEXITED(status))
6176             status = WEXITSTATUS(status);
6177     }
6178     /*
6179      * n.b. fork_it() does not check for errors from for_all_cpus()
6180      * because re-starting is problematic when forking
6181      */
6182     snapshot_proc_sysfs_files();
6183     for_all_cpus(get_counters, ODD_COUNTERS);
6184     gettimeofday(&tv_odd, (struct timezone *)NULL);
6185     timersub(&tv_odd, &tv_even, &tv_delta);
6186     if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS))
6187         fprintf(outf, "%s: Counter reset detected\n", progname);
6188     else {
6189         compute_average(EVEN_COUNTERS);
6190         format_all_counters(EVEN_COUNTERS);
6191     }
6192 
6193     fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec / 1000000.0);
6194 
6195     flush_output_stderr();
6196 
6197     return status;
6198 }
6199 
6200 int get_and_dump_counters(void)
6201 {
6202     int status;
6203 
6204     snapshot_proc_sysfs_files();
6205     status = for_all_cpus(get_counters, ODD_COUNTERS);
6206     if (status)
6207         return status;
6208 
6209     status = for_all_cpus(dump_counters, ODD_COUNTERS);
6210     if (status)
6211         return status;
6212 
6213     flush_output_stdout();
6214 
6215     return status;
6216 }
6217 
6218 void print_version()
6219 {
6220     fprintf(outf, "turbostat version 2022.07.28 - Len Brown <lenb@kernel.org>\n");
6221 }
6222 
6223 #define COMMAND_LINE_SIZE 2048
6224 
6225 void print_bootcmd(void)
6226 {
6227     char bootcmd[COMMAND_LINE_SIZE];
6228     FILE *fp;
6229     int ret;
6230 
6231     memset(bootcmd, 0, COMMAND_LINE_SIZE);
6232     fp = fopen("/proc/cmdline", "r");
6233     if (!fp)
6234         return;
6235 
6236     ret = fread(bootcmd, sizeof(char), COMMAND_LINE_SIZE - 1, fp);
6237     if (ret) {
6238         bootcmd[ret] = '\0';
6239         /* the last character is already '\n' */
6240         fprintf(outf, "Kernel command line: %s", bootcmd);
6241     }
6242 
6243     fclose(fp);
6244 }
6245 
6246 int add_counter(unsigned int msr_num, char *path, char *name,
6247         unsigned int width, enum counter_scope scope,
6248         enum counter_type type, enum counter_format format, int flags)
6249 {
6250     struct msr_counter *msrp;
6251 
6252     msrp = calloc(1, sizeof(struct msr_counter));
6253     if (msrp == NULL) {
6254         perror("calloc");
6255         exit(1);
6256     }
6257 
6258     msrp->msr_num = msr_num;
6259     strncpy(msrp->name, name, NAME_BYTES - 1);
6260     if (path)
6261         strncpy(msrp->path, path, PATH_BYTES - 1);
6262     msrp->width = width;
6263     msrp->type = type;
6264     msrp->format = format;
6265     msrp->flags = flags;
6266 
6267     switch (scope) {
6268 
6269     case SCOPE_CPU:
6270         msrp->next = sys.tp;
6271         sys.tp = msrp;
6272         sys.added_thread_counters++;
6273         if (sys.added_thread_counters > MAX_ADDED_THREAD_COUNTERS) {
6274             fprintf(stderr, "exceeded max %d added thread counters\n", MAX_ADDED_COUNTERS);
6275             exit(-1);
6276         }
6277         break;
6278 
6279     case SCOPE_CORE:
6280         msrp->next = sys.cp;
6281         sys.cp = msrp;
6282         sys.added_core_counters++;
6283         if (sys.added_core_counters > MAX_ADDED_COUNTERS) {
6284             fprintf(stderr, "exceeded max %d added core counters\n", MAX_ADDED_COUNTERS);
6285             exit(-1);
6286         }
6287         break;
6288 
6289     case SCOPE_PACKAGE:
6290         msrp->next = sys.pp;
6291         sys.pp = msrp;
6292         sys.added_package_counters++;
6293         if (sys.added_package_counters > MAX_ADDED_COUNTERS) {
6294             fprintf(stderr, "exceeded max %d added package counters\n", MAX_ADDED_COUNTERS);
6295             exit(-1);
6296         }
6297         break;
6298     }
6299 
6300     return 0;
6301 }
6302 
6303 void parse_add_command(char *add_command)
6304 {
6305     int msr_num = 0;
6306     char *path = NULL;
6307     char name_buffer[NAME_BYTES] = "";
6308     int width = 64;
6309     int fail = 0;
6310     enum counter_scope scope = SCOPE_CPU;
6311     enum counter_type type = COUNTER_CYCLES;
6312     enum counter_format format = FORMAT_DELTA;
6313 
6314     while (add_command) {
6315 
6316         if (sscanf(add_command, "msr0x%x", &msr_num) == 1)
6317             goto next;
6318 
6319         if (sscanf(add_command, "msr%d", &msr_num) == 1)
6320             goto next;
6321 
6322         if (*add_command == '/') {
6323             path = add_command;
6324             goto next;
6325         }
6326 
6327         if (sscanf(add_command, "u%d", &width) == 1) {
6328             if ((width == 32) || (width == 64))
6329                 goto next;
6330             width = 64;
6331         }
6332         if (!strncmp(add_command, "cpu", strlen("cpu"))) {
6333             scope = SCOPE_CPU;
6334             goto next;
6335         }
6336         if (!strncmp(add_command, "core", strlen("core"))) {
6337             scope = SCOPE_CORE;
6338             goto next;
6339         }
6340         if (!strncmp(add_command, "package", strlen("package"))) {
6341             scope = SCOPE_PACKAGE;
6342             goto next;
6343         }
6344         if (!strncmp(add_command, "cycles", strlen("cycles"))) {
6345             type = COUNTER_CYCLES;
6346             goto next;
6347         }
6348         if (!strncmp(add_command, "seconds", strlen("seconds"))) {
6349             type = COUNTER_SECONDS;
6350             goto next;
6351         }
6352         if (!strncmp(add_command, "usec", strlen("usec"))) {
6353             type = COUNTER_USEC;
6354             goto next;
6355         }
6356         if (!strncmp(add_command, "raw", strlen("raw"))) {
6357             format = FORMAT_RAW;
6358             goto next;
6359         }
6360         if (!strncmp(add_command, "delta", strlen("delta"))) {
6361             format = FORMAT_DELTA;
6362             goto next;
6363         }
6364         if (!strncmp(add_command, "percent", strlen("percent"))) {
6365             format = FORMAT_PERCENT;
6366             goto next;
6367         }
6368 
6369         if (sscanf(add_command, "%18s,%*s", name_buffer) == 1) {    /* 18 < NAME_BYTES */
6370             char *eos;
6371 
6372             eos = strchr(name_buffer, ',');
6373             if (eos)
6374                 *eos = '\0';
6375             goto next;
6376         }
6377 
6378 next:
6379         add_command = strchr(add_command, ',');
6380         if (add_command) {
6381             *add_command = '\0';
6382             add_command++;
6383         }
6384 
6385     }
6386     if ((msr_num == 0) && (path == NULL)) {
6387         fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter ) required\n");
6388         fail++;
6389     }
6390 
6391     /* generate default column header */
6392     if (*name_buffer == '\0') {
6393         if (width == 32)
6394             sprintf(name_buffer, "M0x%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
6395         else
6396             sprintf(name_buffer, "M0X%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
6397     }
6398 
6399     if (add_counter(msr_num, path, name_buffer, width, scope, type, format, 0))
6400         fail++;
6401 
6402     if (fail) {
6403         help();
6404         exit(1);
6405     }
6406 }
6407 
6408 int is_deferred_add(char *name)
6409 {
6410     int i;
6411 
6412     for (i = 0; i < deferred_add_index; ++i)
6413         if (!strcmp(name, deferred_add_names[i]))
6414             return 1;
6415     return 0;
6416 }
6417 
6418 int is_deferred_skip(char *name)
6419 {
6420     int i;
6421 
6422     for (i = 0; i < deferred_skip_index; ++i)
6423         if (!strcmp(name, deferred_skip_names[i]))
6424             return 1;
6425     return 0;
6426 }
6427 
6428 void probe_sysfs(void)
6429 {
6430     char path[64];
6431     char name_buf[16];
6432     FILE *input;
6433     int state;
6434     char *sp;
6435 
6436     for (state = 10; state >= 0; --state) {
6437 
6438         sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state);
6439         input = fopen(path, "r");
6440         if (input == NULL)
6441             continue;
6442         if (!fgets(name_buf, sizeof(name_buf), input))
6443             err(1, "%s: failed to read file", path);
6444 
6445         /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
6446         sp = strchr(name_buf, '-');
6447         if (!sp)
6448             sp = strchrnul(name_buf, '\n');
6449         *sp = '%';
6450         *(sp + 1) = '\0';
6451 
6452         remove_underbar(name_buf);
6453 
6454         fclose(input);
6455 
6456         sprintf(path, "cpuidle/state%d/time", state);
6457 
6458         if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf))
6459             continue;
6460 
6461         if (is_deferred_skip(name_buf))
6462             continue;
6463 
6464         add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC, FORMAT_PERCENT, SYSFS_PERCPU);
6465     }
6466 
6467     for (state = 10; state >= 0; --state) {
6468 
6469         sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state);
6470         input = fopen(path, "r");
6471         if (input == NULL)
6472             continue;
6473         if (!fgets(name_buf, sizeof(name_buf), input))
6474             err(1, "%s: failed to read file", path);
6475         /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
6476         sp = strchr(name_buf, '-');
6477         if (!sp)
6478             sp = strchrnul(name_buf, '\n');
6479         *sp = '\0';
6480         fclose(input);
6481 
6482         remove_underbar(name_buf);
6483 
6484         sprintf(path, "cpuidle/state%d/usage", state);
6485 
6486         if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf))
6487             continue;
6488 
6489         if (is_deferred_skip(name_buf))
6490             continue;
6491 
6492         add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU);
6493     }
6494 
6495 }
6496 
6497 /*
6498  * parse cpuset with following syntax
6499  * 1,2,4..6,8-10 and set bits in cpu_subset
6500  */
6501 void parse_cpu_command(char *optarg)
6502 {
6503     unsigned int start, end;
6504     char *next;
6505 
6506     if (!strcmp(optarg, "core")) {
6507         if (cpu_subset)
6508             goto error;
6509         show_core_only++;
6510         return;
6511     }
6512     if (!strcmp(optarg, "package")) {
6513         if (cpu_subset)
6514             goto error;
6515         show_pkg_only++;
6516         return;
6517     }
6518     if (show_core_only || show_pkg_only)
6519         goto error;
6520 
6521     cpu_subset = CPU_ALLOC(CPU_SUBSET_MAXCPUS);
6522     if (cpu_subset == NULL)
6523         err(3, "CPU_ALLOC");
6524     cpu_subset_size = CPU_ALLOC_SIZE(CPU_SUBSET_MAXCPUS);
6525 
6526     CPU_ZERO_S(cpu_subset_size, cpu_subset);
6527 
6528     next = optarg;
6529 
6530     while (next && *next) {
6531 
6532         if (*next == '-')   /* no negative cpu numbers */
6533             goto error;
6534 
6535         start = strtoul(next, &next, 10);
6536 
6537         if (start >= CPU_SUBSET_MAXCPUS)
6538             goto error;
6539         CPU_SET_S(start, cpu_subset_size, cpu_subset);
6540 
6541         if (*next == '\0')
6542             break;
6543 
6544         if (*next == ',') {
6545             next += 1;
6546             continue;
6547         }
6548 
6549         if (*next == '-') {
6550             next += 1;  /* start range */
6551         } else if (*next == '.') {
6552             next += 1;
6553             if (*next == '.')
6554                 next += 1;  /* start range */
6555             else
6556                 goto error;
6557         }
6558 
6559         end = strtoul(next, &next, 10);
6560         if (end <= start)
6561             goto error;
6562 
6563         while (++start <= end) {
6564             if (start >= CPU_SUBSET_MAXCPUS)
6565                 goto error;
6566             CPU_SET_S(start, cpu_subset_size, cpu_subset);
6567         }
6568 
6569         if (*next == ',')
6570             next += 1;
6571         else if (*next != '\0')
6572             goto error;
6573     }
6574 
6575     return;
6576 
6577 error:
6578     fprintf(stderr, "\"--cpu %s\" malformed\n", optarg);
6579     help();
6580     exit(-1);
6581 }
6582 
6583 void cmdline(int argc, char **argv)
6584 {
6585     int opt;
6586     int option_index = 0;
6587     static struct option long_options[] = {
6588         { "add", required_argument, 0, 'a' },
6589         { "cpu", required_argument, 0, 'c' },
6590         { "Dump", no_argument, 0, 'D' },
6591         { "debug", no_argument, 0, 'd' },   /* internal, not documented */
6592         { "enable", required_argument, 0, 'e' },
6593         { "interval", required_argument, 0, 'i' },
6594         { "IPC", no_argument, 0, 'I' },
6595         { "num_iterations", required_argument, 0, 'n' },
6596         { "header_iterations", required_argument, 0, 'N' },
6597         { "help", no_argument, 0, 'h' },
6598         { "hide", required_argument, 0, 'H' },  // meh, -h taken by --help
6599         { "Joules", no_argument, 0, 'J' },
6600         { "list", no_argument, 0, 'l' },
6601         { "out", required_argument, 0, 'o' },
6602         { "quiet", no_argument, 0, 'q' },
6603         { "show", required_argument, 0, 's' },
6604         { "Summary", no_argument, 0, 'S' },
6605         { "TCC", required_argument, 0, 'T' },
6606         { "version", no_argument, 0, 'v' },
6607         { 0, 0, 0, 0 }
6608     };
6609 
6610     progname = argv[0];
6611 
6612     while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qST:v", long_options, &option_index)) != -1) {
6613         switch (opt) {
6614         case 'a':
6615             parse_add_command(optarg);
6616             break;
6617         case 'c':
6618             parse_cpu_command(optarg);
6619             break;
6620         case 'D':
6621             dump_only++;
6622             break;
6623         case 'e':
6624             /* --enable specified counter */
6625             bic_enabled = bic_enabled | bic_lookup(optarg, SHOW_LIST);
6626             break;
6627         case 'd':
6628             debug++;
6629             ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
6630             break;
6631         case 'H':
6632             /*
6633              * --hide: do not show those specified
6634              *  multiple invocations simply clear more bits in enabled mask
6635              */
6636             bic_enabled &= ~bic_lookup(optarg, HIDE_LIST);
6637             break;
6638         case 'h':
6639         default:
6640             help();
6641             exit(1);
6642         case 'i':
6643             {
6644                 double interval = strtod(optarg, NULL);
6645 
6646                 if (interval < 0.001) {
6647                     fprintf(outf, "interval %f seconds is too small\n", interval);
6648                     exit(2);
6649                 }
6650 
6651                 interval_tv.tv_sec = interval_ts.tv_sec = interval;
6652                 interval_tv.tv_usec = (interval - interval_tv.tv_sec) * 1000000;
6653                 interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000;
6654             }
6655             break;
6656         case 'J':
6657             rapl_joules++;
6658             break;
6659         case 'l':
6660             ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
6661             list_header_only++;
6662             quiet++;
6663             break;
6664         case 'o':
6665             outf = fopen_or_die(optarg, "w");
6666             break;
6667         case 'q':
6668             quiet = 1;
6669             break;
6670         case 'n':
6671             num_iterations = strtod(optarg, NULL);
6672 
6673             if (num_iterations <= 0) {
6674                 fprintf(outf, "iterations %d should be positive number\n", num_iterations);
6675                 exit(2);
6676             }
6677             break;
6678         case 'N':
6679             header_iterations = strtod(optarg, NULL);
6680 
6681             if (header_iterations <= 0) {
6682                 fprintf(outf, "iterations %d should be positive number\n", header_iterations);
6683                 exit(2);
6684             }
6685             break;
6686         case 's':
6687             /*
6688              * --show: show only those specified
6689              *  The 1st invocation will clear and replace the enabled mask
6690              *  subsequent invocations can add to it.
6691              */
6692             if (shown == 0)
6693                 bic_enabled = bic_lookup(optarg, SHOW_LIST);
6694             else
6695                 bic_enabled |= bic_lookup(optarg, SHOW_LIST);
6696             shown = 1;
6697             break;
6698         case 'S':
6699             summary_only++;
6700             break;
6701         case 'T':
6702             tj_max_override = atoi(optarg);
6703             break;
6704         case 'v':
6705             print_version();
6706             exit(0);
6707             break;
6708         }
6709     }
6710 }
6711 
6712 int main(int argc, char **argv)
6713 {
6714     outf = stderr;
6715     cmdline(argc, argv);
6716 
6717     if (!quiet) {
6718         print_version();
6719         print_bootcmd();
6720     }
6721 
6722     probe_sysfs();
6723 
6724     turbostat_init();
6725 
6726     msr_sum_record();
6727 
6728     /* dump counters and exit */
6729     if (dump_only)
6730         return get_and_dump_counters();
6731 
6732     /* list header and exit */
6733     if (list_header_only) {
6734         print_header(",");
6735         flush_output_stdout();
6736         return 0;
6737     }
6738 
6739     /*
6740      * if any params left, it must be a command to fork
6741      */
6742     if (argc - optind)
6743         return fork_it(argv + optind);
6744     else
6745         turbostat_loop();
6746 
6747     return 0;
6748 }