Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * intel_powerclamp.c - package c-state idle injection
0004  *
0005  * Copyright (c) 2012, Intel Corporation.
0006  *
0007  * Authors:
0008  *     Arjan van de Ven <arjan@linux.intel.com>
0009  *     Jacob Pan <jacob.jun.pan@linux.intel.com>
0010  *
0011  *  TODO:
0012  *           1. better handle wakeup from external interrupts, currently a fixed
0013  *              compensation is added to clamping duration when excessive amount
0014  *              of wakeups are observed during idle time. the reason is that in
0015  *              case of external interrupts without need for ack, clamping down
0016  *              cpu in non-irq context does not reduce irq. for majority of the
0017  *              cases, clamping down cpu does help reduce irq as well, we should
0018  *              be able to differentiate the two cases and give a quantitative
0019  *              solution for the irqs that we can control. perhaps based on
0020  *              get_cpu_iowait_time_us()
0021  *
0022  *       2. synchronization with other hw blocks
0023  */
0024 
0025 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
0026 
0027 #include <linux/module.h>
0028 #include <linux/kernel.h>
0029 #include <linux/delay.h>
0030 #include <linux/kthread.h>
0031 #include <linux/cpu.h>
0032 #include <linux/thermal.h>
0033 #include <linux/slab.h>
0034 #include <linux/tick.h>
0035 #include <linux/debugfs.h>
0036 #include <linux/seq_file.h>
0037 #include <linux/sched/rt.h>
0038 #include <uapi/linux/sched/types.h>
0039 
0040 #include <asm/nmi.h>
0041 #include <asm/msr.h>
0042 #include <asm/mwait.h>
0043 #include <asm/cpu_device_id.h>
0044 #include <asm/hardirq.h>
0045 
0046 #define MAX_TARGET_RATIO (50U)
0047 /* For each undisturbed clamping period (no extra wake ups during idle time),
0048  * we increment the confidence counter for the given target ratio.
0049  * CONFIDENCE_OK defines the level where runtime calibration results are
0050  * valid.
0051  */
0052 #define CONFIDENCE_OK (3)
0053 /* Default idle injection duration, driver adjust sleep time to meet target
0054  * idle ratio. Similar to frequency modulation.
0055  */
0056 #define DEFAULT_DURATION_JIFFIES (6)
0057 
0058 static unsigned int target_mwait;
0059 static struct dentry *debug_dir;
0060 
0061 /* user selected target */
0062 static unsigned int set_target_ratio;
0063 static unsigned int current_ratio;
0064 static bool should_skip;
0065 static bool reduce_irq;
0066 static atomic_t idle_wakeup_counter;
0067 static unsigned int control_cpu; /* The cpu assigned to collect stat and update
0068                   * control parameters. default to BSP but BSP
0069                   * can be offlined.
0070                   */
0071 static bool clamping;
0072 
0073 struct powerclamp_worker_data {
0074     struct kthread_worker *worker;
0075     struct kthread_work balancing_work;
0076     struct kthread_delayed_work idle_injection_work;
0077     unsigned int cpu;
0078     unsigned int count;
0079     unsigned int guard;
0080     unsigned int window_size_now;
0081     unsigned int target_ratio;
0082     unsigned int duration_jiffies;
0083     bool clamping;
0084 };
0085 
0086 static struct powerclamp_worker_data __percpu *worker_data;
0087 static struct thermal_cooling_device *cooling_dev;
0088 static unsigned long *cpu_clamping_mask;  /* bit map for tracking per cpu
0089                        * clamping kthread worker
0090                        */
0091 
0092 static unsigned int duration;
0093 static unsigned int pkg_cstate_ratio_cur;
0094 static unsigned int window_size;
0095 
0096 static int duration_set(const char *arg, const struct kernel_param *kp)
0097 {
0098     int ret = 0;
0099     unsigned long new_duration;
0100 
0101     ret = kstrtoul(arg, 10, &new_duration);
0102     if (ret)
0103         goto exit;
0104     if (new_duration > 25 || new_duration < 6) {
0105         pr_err("Out of recommended range %lu, between 6-25ms\n",
0106             new_duration);
0107         ret = -EINVAL;
0108     }
0109 
0110     duration = clamp(new_duration, 6ul, 25ul);
0111     smp_mb();
0112 
0113 exit:
0114 
0115     return ret;
0116 }
0117 
0118 static const struct kernel_param_ops duration_ops = {
0119     .set = duration_set,
0120     .get = param_get_int,
0121 };
0122 
0123 
0124 module_param_cb(duration, &duration_ops, &duration, 0644);
0125 MODULE_PARM_DESC(duration, "forced idle time for each attempt in msec.");
0126 
0127 struct powerclamp_calibration_data {
0128     unsigned long confidence;  /* used for calibration, basically a counter
0129                     * gets incremented each time a clamping
0130                     * period is completed without extra wakeups
0131                     * once that counter is reached given level,
0132                     * compensation is deemed usable.
0133                     */
0134     unsigned long steady_comp; /* steady state compensation used when
0135                     * no extra wakeups occurred.
0136                     */
0137     unsigned long dynamic_comp; /* compensate excessive wakeup from idle
0138                      * mostly from external interrupts.
0139                      */
0140 };
0141 
0142 static struct powerclamp_calibration_data cal_data[MAX_TARGET_RATIO];
0143 
0144 static int window_size_set(const char *arg, const struct kernel_param *kp)
0145 {
0146     int ret = 0;
0147     unsigned long new_window_size;
0148 
0149     ret = kstrtoul(arg, 10, &new_window_size);
0150     if (ret)
0151         goto exit_win;
0152     if (new_window_size > 10 || new_window_size < 2) {
0153         pr_err("Out of recommended window size %lu, between 2-10\n",
0154             new_window_size);
0155         ret = -EINVAL;
0156     }
0157 
0158     window_size = clamp(new_window_size, 2ul, 10ul);
0159     smp_mb();
0160 
0161 exit_win:
0162 
0163     return ret;
0164 }
0165 
0166 static const struct kernel_param_ops window_size_ops = {
0167     .set = window_size_set,
0168     .get = param_get_int,
0169 };
0170 
0171 module_param_cb(window_size, &window_size_ops, &window_size, 0644);
0172 MODULE_PARM_DESC(window_size, "sliding window in number of clamping cycles\n"
0173     "\tpowerclamp controls idle ratio within this window. larger\n"
0174     "\twindow size results in slower response time but more smooth\n"
0175     "\tclamping results. default to 2.");
0176 
0177 static void find_target_mwait(void)
0178 {
0179     unsigned int eax, ebx, ecx, edx;
0180     unsigned int highest_cstate = 0;
0181     unsigned int highest_subcstate = 0;
0182     int i;
0183 
0184     if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
0185         return;
0186 
0187     cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
0188 
0189     if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
0190         !(ecx & CPUID5_ECX_INTERRUPT_BREAK))
0191         return;
0192 
0193     edx >>= MWAIT_SUBSTATE_SIZE;
0194     for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) {
0195         if (edx & MWAIT_SUBSTATE_MASK) {
0196             highest_cstate = i;
0197             highest_subcstate = edx & MWAIT_SUBSTATE_MASK;
0198         }
0199     }
0200     target_mwait = (highest_cstate << MWAIT_SUBSTATE_SIZE) |
0201         (highest_subcstate - 1);
0202 
0203 }
0204 
0205 struct pkg_cstate_info {
0206     bool skip;
0207     int msr_index;
0208     int cstate_id;
0209 };
0210 
0211 #define PKG_CSTATE_INIT(id) {               \
0212         .msr_index = MSR_PKG_C##id##_RESIDENCY, \
0213         .cstate_id = id             \
0214             }
0215 
0216 static struct pkg_cstate_info pkg_cstates[] = {
0217     PKG_CSTATE_INIT(2),
0218     PKG_CSTATE_INIT(3),
0219     PKG_CSTATE_INIT(6),
0220     PKG_CSTATE_INIT(7),
0221     PKG_CSTATE_INIT(8),
0222     PKG_CSTATE_INIT(9),
0223     PKG_CSTATE_INIT(10),
0224     {NULL},
0225 };
0226 
0227 static bool has_pkg_state_counter(void)
0228 {
0229     u64 val;
0230     struct pkg_cstate_info *info = pkg_cstates;
0231 
0232     /* check if any one of the counter msrs exists */
0233     while (info->msr_index) {
0234         if (!rdmsrl_safe(info->msr_index, &val))
0235             return true;
0236         info++;
0237     }
0238 
0239     return false;
0240 }
0241 
0242 static u64 pkg_state_counter(void)
0243 {
0244     u64 val;
0245     u64 count = 0;
0246     struct pkg_cstate_info *info = pkg_cstates;
0247 
0248     while (info->msr_index) {
0249         if (!info->skip) {
0250             if (!rdmsrl_safe(info->msr_index, &val))
0251                 count += val;
0252             else
0253                 info->skip = true;
0254         }
0255         info++;
0256     }
0257 
0258     return count;
0259 }
0260 
0261 static unsigned int get_compensation(int ratio)
0262 {
0263     unsigned int comp = 0;
0264 
0265     /* we only use compensation if all adjacent ones are good */
0266     if (ratio == 1 &&
0267         cal_data[ratio].confidence >= CONFIDENCE_OK &&
0268         cal_data[ratio + 1].confidence >= CONFIDENCE_OK &&
0269         cal_data[ratio + 2].confidence >= CONFIDENCE_OK) {
0270         comp = (cal_data[ratio].steady_comp +
0271             cal_data[ratio + 1].steady_comp +
0272             cal_data[ratio + 2].steady_comp) / 3;
0273     } else if (ratio == MAX_TARGET_RATIO - 1 &&
0274         cal_data[ratio].confidence >= CONFIDENCE_OK &&
0275         cal_data[ratio - 1].confidence >= CONFIDENCE_OK &&
0276         cal_data[ratio - 2].confidence >= CONFIDENCE_OK) {
0277         comp = (cal_data[ratio].steady_comp +
0278             cal_data[ratio - 1].steady_comp +
0279             cal_data[ratio - 2].steady_comp) / 3;
0280     } else if (cal_data[ratio].confidence >= CONFIDENCE_OK &&
0281         cal_data[ratio - 1].confidence >= CONFIDENCE_OK &&
0282         cal_data[ratio + 1].confidence >= CONFIDENCE_OK) {
0283         comp = (cal_data[ratio].steady_comp +
0284             cal_data[ratio - 1].steady_comp +
0285             cal_data[ratio + 1].steady_comp) / 3;
0286     }
0287 
0288     /* REVISIT: simple penalty of double idle injection */
0289     if (reduce_irq)
0290         comp = ratio;
0291     /* do not exceed limit */
0292     if (comp + ratio >= MAX_TARGET_RATIO)
0293         comp = MAX_TARGET_RATIO - ratio - 1;
0294 
0295     return comp;
0296 }
0297 
0298 static void adjust_compensation(int target_ratio, unsigned int win)
0299 {
0300     int delta;
0301     struct powerclamp_calibration_data *d = &cal_data[target_ratio];
0302 
0303     /*
0304      * adjust compensations if confidence level has not been reached or
0305      * there are too many wakeups during the last idle injection period, we
0306      * cannot trust the data for compensation.
0307      */
0308     if (d->confidence >= CONFIDENCE_OK ||
0309         atomic_read(&idle_wakeup_counter) >
0310         win * num_online_cpus())
0311         return;
0312 
0313     delta = set_target_ratio - current_ratio;
0314     /* filter out bad data */
0315     if (delta >= 0 && delta <= (1+target_ratio/10)) {
0316         if (d->steady_comp)
0317             d->steady_comp =
0318                 roundup(delta+d->steady_comp, 2)/2;
0319         else
0320             d->steady_comp = delta;
0321         d->confidence++;
0322     }
0323 }
0324 
0325 static bool powerclamp_adjust_controls(unsigned int target_ratio,
0326                 unsigned int guard, unsigned int win)
0327 {
0328     static u64 msr_last, tsc_last;
0329     u64 msr_now, tsc_now;
0330     u64 val64;
0331 
0332     /* check result for the last window */
0333     msr_now = pkg_state_counter();
0334     tsc_now = rdtsc();
0335 
0336     /* calculate pkg cstate vs tsc ratio */
0337     if (!msr_last || !tsc_last)
0338         current_ratio = 1;
0339     else if (tsc_now-tsc_last) {
0340         val64 = 100*(msr_now-msr_last);
0341         do_div(val64, (tsc_now-tsc_last));
0342         current_ratio = val64;
0343     }
0344 
0345     /* update record */
0346     msr_last = msr_now;
0347     tsc_last = tsc_now;
0348 
0349     adjust_compensation(target_ratio, win);
0350     /*
0351      * too many external interrupts, set flag such
0352      * that we can take measure later.
0353      */
0354     reduce_irq = atomic_read(&idle_wakeup_counter) >=
0355         2 * win * num_online_cpus();
0356 
0357     atomic_set(&idle_wakeup_counter, 0);
0358     /* if we are above target+guard, skip */
0359     return set_target_ratio + guard <= current_ratio;
0360 }
0361 
0362 static void clamp_balancing_func(struct kthread_work *work)
0363 {
0364     struct powerclamp_worker_data *w_data;
0365     int sleeptime;
0366     unsigned long target_jiffies;
0367     unsigned int compensated_ratio;
0368     int interval; /* jiffies to sleep for each attempt */
0369 
0370     w_data = container_of(work, struct powerclamp_worker_data,
0371                   balancing_work);
0372 
0373     /*
0374      * make sure user selected ratio does not take effect until
0375      * the next round. adjust target_ratio if user has changed
0376      * target such that we can converge quickly.
0377      */
0378     w_data->target_ratio = READ_ONCE(set_target_ratio);
0379     w_data->guard = 1 + w_data->target_ratio / 20;
0380     w_data->window_size_now = window_size;
0381     w_data->duration_jiffies = msecs_to_jiffies(duration);
0382     w_data->count++;
0383 
0384     /*
0385      * systems may have different ability to enter package level
0386      * c-states, thus we need to compensate the injected idle ratio
0387      * to achieve the actual target reported by the HW.
0388      */
0389     compensated_ratio = w_data->target_ratio +
0390         get_compensation(w_data->target_ratio);
0391     if (compensated_ratio <= 0)
0392         compensated_ratio = 1;
0393     interval = w_data->duration_jiffies * 100 / compensated_ratio;
0394 
0395     /* align idle time */
0396     target_jiffies = roundup(jiffies, interval);
0397     sleeptime = target_jiffies - jiffies;
0398     if (sleeptime <= 0)
0399         sleeptime = 1;
0400 
0401     if (clamping && w_data->clamping && cpu_online(w_data->cpu))
0402         kthread_queue_delayed_work(w_data->worker,
0403                        &w_data->idle_injection_work,
0404                        sleeptime);
0405 }
0406 
0407 static void clamp_idle_injection_func(struct kthread_work *work)
0408 {
0409     struct powerclamp_worker_data *w_data;
0410 
0411     w_data = container_of(work, struct powerclamp_worker_data,
0412                   idle_injection_work.work);
0413 
0414     /*
0415      * only elected controlling cpu can collect stats and update
0416      * control parameters.
0417      */
0418     if (w_data->cpu == control_cpu &&
0419         !(w_data->count % w_data->window_size_now)) {
0420         should_skip =
0421             powerclamp_adjust_controls(w_data->target_ratio,
0422                            w_data->guard,
0423                            w_data->window_size_now);
0424         smp_mb();
0425     }
0426 
0427     if (should_skip)
0428         goto balance;
0429 
0430     play_idle(jiffies_to_usecs(w_data->duration_jiffies));
0431 
0432 balance:
0433     if (clamping && w_data->clamping && cpu_online(w_data->cpu))
0434         kthread_queue_work(w_data->worker, &w_data->balancing_work);
0435 }
0436 
0437 /*
0438  * 1 HZ polling while clamping is active, useful for userspace
0439  * to monitor actual idle ratio.
0440  */
0441 static void poll_pkg_cstate(struct work_struct *dummy);
0442 static DECLARE_DELAYED_WORK(poll_pkg_cstate_work, poll_pkg_cstate);
0443 static void poll_pkg_cstate(struct work_struct *dummy)
0444 {
0445     static u64 msr_last;
0446     static u64 tsc_last;
0447 
0448     u64 msr_now;
0449     u64 tsc_now;
0450     u64 val64;
0451 
0452     msr_now = pkg_state_counter();
0453     tsc_now = rdtsc();
0454 
0455     /* calculate pkg cstate vs tsc ratio */
0456     if (!msr_last || !tsc_last)
0457         pkg_cstate_ratio_cur = 1;
0458     else {
0459         if (tsc_now - tsc_last) {
0460             val64 = 100 * (msr_now - msr_last);
0461             do_div(val64, (tsc_now - tsc_last));
0462             pkg_cstate_ratio_cur = val64;
0463         }
0464     }
0465 
0466     /* update record */
0467     msr_last = msr_now;
0468     tsc_last = tsc_now;
0469 
0470     if (true == clamping)
0471         schedule_delayed_work(&poll_pkg_cstate_work, HZ);
0472 }
0473 
0474 static void start_power_clamp_worker(unsigned long cpu)
0475 {
0476     struct powerclamp_worker_data *w_data = per_cpu_ptr(worker_data, cpu);
0477     struct kthread_worker *worker;
0478 
0479     worker = kthread_create_worker_on_cpu(cpu, 0, "kidle_inj/%ld", cpu);
0480     if (IS_ERR(worker))
0481         return;
0482 
0483     w_data->worker = worker;
0484     w_data->count = 0;
0485     w_data->cpu = cpu;
0486     w_data->clamping = true;
0487     set_bit(cpu, cpu_clamping_mask);
0488     sched_set_fifo(worker->task);
0489     kthread_init_work(&w_data->balancing_work, clamp_balancing_func);
0490     kthread_init_delayed_work(&w_data->idle_injection_work,
0491                   clamp_idle_injection_func);
0492     kthread_queue_work(w_data->worker, &w_data->balancing_work);
0493 }
0494 
0495 static void stop_power_clamp_worker(unsigned long cpu)
0496 {
0497     struct powerclamp_worker_data *w_data = per_cpu_ptr(worker_data, cpu);
0498 
0499     if (!w_data->worker)
0500         return;
0501 
0502     w_data->clamping = false;
0503     /*
0504      * Make sure that all works that get queued after this point see
0505      * the clamping disabled. The counter part is not needed because
0506      * there is an implicit memory barrier when the queued work
0507      * is proceed.
0508      */
0509     smp_wmb();
0510     kthread_cancel_work_sync(&w_data->balancing_work);
0511     kthread_cancel_delayed_work_sync(&w_data->idle_injection_work);
0512     /*
0513      * The balancing work still might be queued here because
0514      * the handling of the "clapming" variable, cancel, and queue
0515      * operations are not synchronized via a lock. But it is not
0516      * a big deal. The balancing work is fast and destroy kthread
0517      * will wait for it.
0518      */
0519     clear_bit(w_data->cpu, cpu_clamping_mask);
0520     kthread_destroy_worker(w_data->worker);
0521 
0522     w_data->worker = NULL;
0523 }
0524 
0525 static int start_power_clamp(void)
0526 {
0527     unsigned long cpu;
0528 
0529     set_target_ratio = clamp(set_target_ratio, 0U, MAX_TARGET_RATIO - 1);
0530     /* prevent cpu hotplug */
0531     cpus_read_lock();
0532 
0533     /* prefer BSP */
0534     control_cpu = 0;
0535     if (!cpu_online(control_cpu))
0536         control_cpu = smp_processor_id();
0537 
0538     clamping = true;
0539     schedule_delayed_work(&poll_pkg_cstate_work, 0);
0540 
0541     /* start one kthread worker per online cpu */
0542     for_each_online_cpu(cpu) {
0543         start_power_clamp_worker(cpu);
0544     }
0545     cpus_read_unlock();
0546 
0547     return 0;
0548 }
0549 
0550 static void end_power_clamp(void)
0551 {
0552     int i;
0553 
0554     /*
0555      * Block requeuing in all the kthread workers. They will flush and
0556      * stop faster.
0557      */
0558     clamping = false;
0559     for_each_set_bit(i, cpu_clamping_mask, num_possible_cpus()) {
0560         pr_debug("clamping worker for cpu %d alive, destroy\n", i);
0561         stop_power_clamp_worker(i);
0562     }
0563 }
0564 
0565 static int powerclamp_cpu_online(unsigned int cpu)
0566 {
0567     if (clamping == false)
0568         return 0;
0569     start_power_clamp_worker(cpu);
0570     /* prefer BSP as controlling CPU */
0571     if (cpu == 0) {
0572         control_cpu = 0;
0573         smp_mb();
0574     }
0575     return 0;
0576 }
0577 
0578 static int powerclamp_cpu_predown(unsigned int cpu)
0579 {
0580     if (clamping == false)
0581         return 0;
0582 
0583     stop_power_clamp_worker(cpu);
0584     if (cpu != control_cpu)
0585         return 0;
0586 
0587     control_cpu = cpumask_first(cpu_online_mask);
0588     if (control_cpu == cpu)
0589         control_cpu = cpumask_next(cpu, cpu_online_mask);
0590     smp_mb();
0591     return 0;
0592 }
0593 
0594 static int powerclamp_get_max_state(struct thermal_cooling_device *cdev,
0595                  unsigned long *state)
0596 {
0597     *state = MAX_TARGET_RATIO;
0598 
0599     return 0;
0600 }
0601 
0602 static int powerclamp_get_cur_state(struct thermal_cooling_device *cdev,
0603                  unsigned long *state)
0604 {
0605     if (true == clamping)
0606         *state = pkg_cstate_ratio_cur;
0607     else
0608         /* to save power, do not poll idle ratio while not clamping */
0609         *state = -1; /* indicates invalid state */
0610 
0611     return 0;
0612 }
0613 
0614 static int powerclamp_set_cur_state(struct thermal_cooling_device *cdev,
0615                  unsigned long new_target_ratio)
0616 {
0617     int ret = 0;
0618 
0619     new_target_ratio = clamp(new_target_ratio, 0UL,
0620                 (unsigned long) (MAX_TARGET_RATIO-1));
0621     if (set_target_ratio == 0 && new_target_ratio > 0) {
0622         pr_info("Start idle injection to reduce power\n");
0623         set_target_ratio = new_target_ratio;
0624         ret = start_power_clamp();
0625         goto exit_set;
0626     } else  if (set_target_ratio > 0 && new_target_ratio == 0) {
0627         pr_info("Stop forced idle injection\n");
0628         end_power_clamp();
0629         set_target_ratio = 0;
0630     } else  /* adjust currently running */ {
0631         set_target_ratio = new_target_ratio;
0632         /* make new set_target_ratio visible to other cpus */
0633         smp_mb();
0634     }
0635 
0636 exit_set:
0637     return ret;
0638 }
0639 
0640 /* bind to generic thermal layer as cooling device*/
0641 static const struct thermal_cooling_device_ops powerclamp_cooling_ops = {
0642     .get_max_state = powerclamp_get_max_state,
0643     .get_cur_state = powerclamp_get_cur_state,
0644     .set_cur_state = powerclamp_set_cur_state,
0645 };
0646 
0647 static const struct x86_cpu_id __initconst intel_powerclamp_ids[] = {
0648     X86_MATCH_VENDOR_FEATURE(INTEL, X86_FEATURE_MWAIT, NULL),
0649     {}
0650 };
0651 MODULE_DEVICE_TABLE(x86cpu, intel_powerclamp_ids);
0652 
0653 static int __init powerclamp_probe(void)
0654 {
0655 
0656     if (!x86_match_cpu(intel_powerclamp_ids)) {
0657         pr_err("CPU does not support MWAIT\n");
0658         return -ENODEV;
0659     }
0660 
0661     /* The goal for idle time alignment is to achieve package cstate. */
0662     if (!has_pkg_state_counter()) {
0663         pr_info("No package C-state available\n");
0664         return -ENODEV;
0665     }
0666 
0667     /* find the deepest mwait value */
0668     find_target_mwait();
0669 
0670     return 0;
0671 }
0672 
0673 static int powerclamp_debug_show(struct seq_file *m, void *unused)
0674 {
0675     int i = 0;
0676 
0677     seq_printf(m, "controlling cpu: %d\n", control_cpu);
0678     seq_printf(m, "pct confidence steady dynamic (compensation)\n");
0679     for (i = 0; i < MAX_TARGET_RATIO; i++) {
0680         seq_printf(m, "%d\t%lu\t%lu\t%lu\n",
0681             i,
0682             cal_data[i].confidence,
0683             cal_data[i].steady_comp,
0684             cal_data[i].dynamic_comp);
0685     }
0686 
0687     return 0;
0688 }
0689 
0690 DEFINE_SHOW_ATTRIBUTE(powerclamp_debug);
0691 
0692 static inline void powerclamp_create_debug_files(void)
0693 {
0694     debug_dir = debugfs_create_dir("intel_powerclamp", NULL);
0695 
0696     debugfs_create_file("powerclamp_calib", S_IRUGO, debug_dir, cal_data,
0697                 &powerclamp_debug_fops);
0698 }
0699 
0700 static enum cpuhp_state hp_state;
0701 
0702 static int __init powerclamp_init(void)
0703 {
0704     int retval;
0705 
0706     cpu_clamping_mask = bitmap_zalloc(num_possible_cpus(), GFP_KERNEL);
0707     if (!cpu_clamping_mask)
0708         return -ENOMEM;
0709 
0710     /* probe cpu features and ids here */
0711     retval = powerclamp_probe();
0712     if (retval)
0713         goto exit_free;
0714 
0715     /* set default limit, maybe adjusted during runtime based on feedback */
0716     window_size = 2;
0717     retval = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
0718                        "thermal/intel_powerclamp:online",
0719                        powerclamp_cpu_online,
0720                        powerclamp_cpu_predown);
0721     if (retval < 0)
0722         goto exit_free;
0723 
0724     hp_state = retval;
0725 
0726     worker_data = alloc_percpu(struct powerclamp_worker_data);
0727     if (!worker_data) {
0728         retval = -ENOMEM;
0729         goto exit_unregister;
0730     }
0731 
0732     cooling_dev = thermal_cooling_device_register("intel_powerclamp", NULL,
0733                         &powerclamp_cooling_ops);
0734     if (IS_ERR(cooling_dev)) {
0735         retval = -ENODEV;
0736         goto exit_free_thread;
0737     }
0738 
0739     if (!duration)
0740         duration = jiffies_to_msecs(DEFAULT_DURATION_JIFFIES);
0741 
0742     powerclamp_create_debug_files();
0743 
0744     return 0;
0745 
0746 exit_free_thread:
0747     free_percpu(worker_data);
0748 exit_unregister:
0749     cpuhp_remove_state_nocalls(hp_state);
0750 exit_free:
0751     bitmap_free(cpu_clamping_mask);
0752     return retval;
0753 }
0754 module_init(powerclamp_init);
0755 
0756 static void __exit powerclamp_exit(void)
0757 {
0758     end_power_clamp();
0759     cpuhp_remove_state_nocalls(hp_state);
0760     free_percpu(worker_data);
0761     thermal_cooling_device_unregister(cooling_dev);
0762     bitmap_free(cpu_clamping_mask);
0763 
0764     cancel_delayed_work_sync(&poll_pkg_cstate_work);
0765     debugfs_remove_recursive(debug_dir);
0766 }
0767 module_exit(powerclamp_exit);
0768 
0769 MODULE_LICENSE("GPL");
0770 MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
0771 MODULE_AUTHOR("Jacob Pan <jacob.jun.pan@linux.intel.com>");
0772 MODULE_DESCRIPTION("Package Level C-state Idle Injection for Intel CPUs");