Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * itmt.c: Support Intel Turbo Boost Max Technology 3.0
0004  *
0005  * (C) Copyright 2016 Intel Corporation
0006  * Author: Tim Chen <tim.c.chen@linux.intel.com>
0007  *
0008  * On platforms supporting Intel Turbo Boost Max Technology 3.0, (ITMT),
0009  * the maximum turbo frequencies of some cores in a CPU package may be
0010  * higher than for the other cores in the same package.  In that case,
0011  * better performance can be achieved by making the scheduler prefer
0012  * to run tasks on the CPUs with higher max turbo frequencies.
0013  *
0014  * This file provides functions and data structures for enabling the
0015  * scheduler to favor scheduling on cores can be boosted to a higher
0016  * frequency under ITMT.
0017  */
0018 
0019 #include <linux/sched.h>
0020 #include <linux/cpumask.h>
0021 #include <linux/cpuset.h>
0022 #include <linux/mutex.h>
0023 #include <linux/sysctl.h>
0024 #include <linux/nodemask.h>
0025 
0026 static DEFINE_MUTEX(itmt_update_mutex);
0027 DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority);
0028 
0029 /* Boolean to track if system has ITMT capabilities */
0030 static bool __read_mostly sched_itmt_capable;
0031 
0032 /*
0033  * Boolean to control whether we want to move processes to cpu capable
0034  * of higher turbo frequency for cpus supporting Intel Turbo Boost Max
0035  * Technology 3.0.
0036  *
0037  * It can be set via /proc/sys/kernel/sched_itmt_enabled
0038  */
0039 unsigned int __read_mostly sysctl_sched_itmt_enabled;
0040 
0041 static int sched_itmt_update_handler(struct ctl_table *table, int write,
0042                      void *buffer, size_t *lenp, loff_t *ppos)
0043 {
0044     unsigned int old_sysctl;
0045     int ret;
0046 
0047     mutex_lock(&itmt_update_mutex);
0048 
0049     if (!sched_itmt_capable) {
0050         mutex_unlock(&itmt_update_mutex);
0051         return -EINVAL;
0052     }
0053 
0054     old_sysctl = sysctl_sched_itmt_enabled;
0055     ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
0056 
0057     if (!ret && write && old_sysctl != sysctl_sched_itmt_enabled) {
0058         x86_topology_update = true;
0059         rebuild_sched_domains();
0060     }
0061 
0062     mutex_unlock(&itmt_update_mutex);
0063 
0064     return ret;
0065 }
0066 
0067 static struct ctl_table itmt_kern_table[] = {
0068     {
0069         .procname   = "sched_itmt_enabled",
0070         .data       = &sysctl_sched_itmt_enabled,
0071         .maxlen     = sizeof(unsigned int),
0072         .mode       = 0644,
0073         .proc_handler   = sched_itmt_update_handler,
0074         .extra1     = SYSCTL_ZERO,
0075         .extra2     = SYSCTL_ONE,
0076     },
0077     {}
0078 };
0079 
0080 static struct ctl_table itmt_root_table[] = {
0081     {
0082         .procname   = "kernel",
0083         .mode       = 0555,
0084         .child      = itmt_kern_table,
0085     },
0086     {}
0087 };
0088 
0089 static struct ctl_table_header *itmt_sysctl_header;
0090 
0091 /**
0092  * sched_set_itmt_support() - Indicate platform supports ITMT
0093  *
0094  * This function is used by the OS to indicate to scheduler that the platform
0095  * is capable of supporting the ITMT feature.
0096  *
0097  * The current scheme has the pstate driver detects if the system
0098  * is ITMT capable and call sched_set_itmt_support.
0099  *
0100  * This must be done only after sched_set_itmt_core_prio
0101  * has been called to set the cpus' priorities.
0102  * It must not be called with cpu hot plug lock
0103  * held as we need to acquire the lock to rebuild sched domains
0104  * later.
0105  *
0106  * Return: 0 on success
0107  */
0108 int sched_set_itmt_support(void)
0109 {
0110     mutex_lock(&itmt_update_mutex);
0111 
0112     if (sched_itmt_capable) {
0113         mutex_unlock(&itmt_update_mutex);
0114         return 0;
0115     }
0116 
0117     itmt_sysctl_header = register_sysctl_table(itmt_root_table);
0118     if (!itmt_sysctl_header) {
0119         mutex_unlock(&itmt_update_mutex);
0120         return -ENOMEM;
0121     }
0122 
0123     sched_itmt_capable = true;
0124 
0125     sysctl_sched_itmt_enabled = 1;
0126 
0127     x86_topology_update = true;
0128     rebuild_sched_domains();
0129 
0130     mutex_unlock(&itmt_update_mutex);
0131 
0132     return 0;
0133 }
0134 
0135 /**
0136  * sched_clear_itmt_support() - Revoke platform's support of ITMT
0137  *
0138  * This function is used by the OS to indicate that it has
0139  * revoked the platform's support of ITMT feature.
0140  *
0141  * It must not be called with cpu hot plug lock
0142  * held as we need to acquire the lock to rebuild sched domains
0143  * later.
0144  */
0145 void sched_clear_itmt_support(void)
0146 {
0147     mutex_lock(&itmt_update_mutex);
0148 
0149     if (!sched_itmt_capable) {
0150         mutex_unlock(&itmt_update_mutex);
0151         return;
0152     }
0153     sched_itmt_capable = false;
0154 
0155     if (itmt_sysctl_header) {
0156         unregister_sysctl_table(itmt_sysctl_header);
0157         itmt_sysctl_header = NULL;
0158     }
0159 
0160     if (sysctl_sched_itmt_enabled) {
0161         /* disable sched_itmt if we are no longer ITMT capable */
0162         sysctl_sched_itmt_enabled = 0;
0163         x86_topology_update = true;
0164         rebuild_sched_domains();
0165     }
0166 
0167     mutex_unlock(&itmt_update_mutex);
0168 }
0169 
0170 int arch_asym_cpu_priority(int cpu)
0171 {
0172     return per_cpu(sched_core_priority, cpu);
0173 }
0174 
0175 /**
0176  * sched_set_itmt_core_prio() - Set CPU priority based on ITMT
0177  * @prio:   Priority of cpu core
0178  * @core_cpu:   The cpu number associated with the core
0179  *
0180  * The pstate driver will find out the max boost frequency
0181  * and call this function to set a priority proportional
0182  * to the max boost frequency. CPU with higher boost
0183  * frequency will receive higher priority.
0184  *
0185  * No need to rebuild sched domain after updating
0186  * the CPU priorities. The sched domains have no
0187  * dependency on CPU priorities.
0188  */
0189 void sched_set_itmt_core_prio(int prio, int core_cpu)
0190 {
0191     int cpu, i = 1;
0192 
0193     for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) {
0194         int smt_prio;
0195 
0196         /*
0197          * Ensure that the siblings are moved to the end
0198          * of the priority chain and only used when
0199          * all other high priority cpus are out of capacity.
0200          */
0201         smt_prio = prio * smp_num_siblings / (i * i);
0202         per_cpu(sched_core_priority, cpu) = smt_prio;
0203         i++;
0204     }
0205 }