Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * devfreq_cooling: Thermal cooling device implementation for devices using
0004  *                  devfreq
0005  *
0006  * Copyright (C) 2014-2015 ARM Limited
0007  *
0008  * TODO:
0009  *    - If OPPs are added or removed after devfreq cooling has
0010  *      registered, the devfreq cooling won't react to it.
0011  */
0012 
0013 #include <linux/devfreq.h>
0014 #include <linux/devfreq_cooling.h>
0015 #include <linux/energy_model.h>
0016 #include <linux/export.h>
0017 #include <linux/slab.h>
0018 #include <linux/pm_opp.h>
0019 #include <linux/pm_qos.h>
0020 #include <linux/thermal.h>
0021 #include <linux/units.h>
0022 
0023 #include <trace/events/thermal.h>
0024 
0025 #define SCALE_ERROR_MITIGATION  100
0026 
0027 /**
0028  * struct devfreq_cooling_device - Devfreq cooling device
0029  *      devfreq_cooling_device registered.
0030  * @cdev:   Pointer to associated thermal cooling device.
0031  * @cooling_ops: devfreq callbacks to thermal cooling device ops
0032  * @devfreq:    Pointer to associated devfreq device.
0033  * @cooling_state:  Current cooling state.
0034  * @freq_table: Pointer to a table with the frequencies sorted in descending
0035  *      order.  You can index the table by cooling device state
0036  * @max_state:  It is the last index, that is, one less than the number of the
0037  *      OPPs
0038  * @power_ops:  Pointer to devfreq_cooling_power, a more precised model.
0039  * @res_util:   Resource utilization scaling factor for the power.
0040  *      It is multiplied by 100 to minimize the error. It is used
0041  *      for estimation of the power budget instead of using
0042  *      'utilization' (which is 'busy_time' / 'total_time').
0043  *      The 'res_util' range is from 100 to power * 100 for the
0044  *      corresponding 'state'.
0045  * @capped_state:   index to cooling state with in dynamic power budget
0046  * @req_max_freq:   PM QoS request for limiting the maximum frequency
0047  *          of the devfreq device.
0048  * @em_pd:      Energy Model for the associated Devfreq device
0049  */
0050 struct devfreq_cooling_device {
0051     struct thermal_cooling_device *cdev;
0052     struct thermal_cooling_device_ops cooling_ops;
0053     struct devfreq *devfreq;
0054     unsigned long cooling_state;
0055     u32 *freq_table;
0056     size_t max_state;
0057     struct devfreq_cooling_power *power_ops;
0058     u32 res_util;
0059     int capped_state;
0060     struct dev_pm_qos_request req_max_freq;
0061     struct em_perf_domain *em_pd;
0062 };
0063 
0064 static int devfreq_cooling_get_max_state(struct thermal_cooling_device *cdev,
0065                      unsigned long *state)
0066 {
0067     struct devfreq_cooling_device *dfc = cdev->devdata;
0068 
0069     *state = dfc->max_state;
0070 
0071     return 0;
0072 }
0073 
0074 static int devfreq_cooling_get_cur_state(struct thermal_cooling_device *cdev,
0075                      unsigned long *state)
0076 {
0077     struct devfreq_cooling_device *dfc = cdev->devdata;
0078 
0079     *state = dfc->cooling_state;
0080 
0081     return 0;
0082 }
0083 
0084 static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev,
0085                      unsigned long state)
0086 {
0087     struct devfreq_cooling_device *dfc = cdev->devdata;
0088     struct devfreq *df = dfc->devfreq;
0089     struct device *dev = df->dev.parent;
0090     unsigned long freq;
0091     int perf_idx;
0092 
0093     if (state == dfc->cooling_state)
0094         return 0;
0095 
0096     dev_dbg(dev, "Setting cooling state %lu\n", state);
0097 
0098     if (state > dfc->max_state)
0099         return -EINVAL;
0100 
0101     if (dfc->em_pd) {
0102         perf_idx = dfc->max_state - state;
0103         freq = dfc->em_pd->table[perf_idx].frequency * 1000;
0104     } else {
0105         freq = dfc->freq_table[state];
0106     }
0107 
0108     dev_pm_qos_update_request(&dfc->req_max_freq,
0109                   DIV_ROUND_UP(freq, HZ_PER_KHZ));
0110 
0111     dfc->cooling_state = state;
0112 
0113     return 0;
0114 }
0115 
0116 /**
0117  * get_perf_idx() - get the performance index corresponding to a frequency
0118  * @em_pd:  Pointer to device's Energy Model
0119  * @freq:   frequency in kHz
0120  *
0121  * Return: the performance index associated with the @freq, or
0122  * -EINVAL if it wasn't found.
0123  */
0124 static int get_perf_idx(struct em_perf_domain *em_pd, unsigned long freq)
0125 {
0126     int i;
0127 
0128     for (i = 0; i < em_pd->nr_perf_states; i++) {
0129         if (em_pd->table[i].frequency == freq)
0130             return i;
0131     }
0132 
0133     return -EINVAL;
0134 }
0135 
0136 static unsigned long get_voltage(struct devfreq *df, unsigned long freq)
0137 {
0138     struct device *dev = df->dev.parent;
0139     unsigned long voltage;
0140     struct dev_pm_opp *opp;
0141 
0142     opp = dev_pm_opp_find_freq_exact(dev, freq, true);
0143     if (PTR_ERR(opp) == -ERANGE)
0144         opp = dev_pm_opp_find_freq_exact(dev, freq, false);
0145 
0146     if (IS_ERR(opp)) {
0147         dev_err_ratelimited(dev, "Failed to find OPP for frequency %lu: %ld\n",
0148                     freq, PTR_ERR(opp));
0149         return 0;
0150     }
0151 
0152     voltage = dev_pm_opp_get_voltage(opp) / 1000; /* mV */
0153     dev_pm_opp_put(opp);
0154 
0155     if (voltage == 0) {
0156         dev_err_ratelimited(dev,
0157                     "Failed to get voltage for frequency %lu\n",
0158                     freq);
0159     }
0160 
0161     return voltage;
0162 }
0163 
0164 static void _normalize_load(struct devfreq_dev_status *status)
0165 {
0166     if (status->total_time > 0xfffff) {
0167         status->total_time >>= 10;
0168         status->busy_time >>= 10;
0169     }
0170 
0171     status->busy_time <<= 10;
0172     status->busy_time /= status->total_time ? : 1;
0173 
0174     status->busy_time = status->busy_time ? : 1;
0175     status->total_time = 1024;
0176 }
0177 
0178 static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cdev,
0179                            u32 *power)
0180 {
0181     struct devfreq_cooling_device *dfc = cdev->devdata;
0182     struct devfreq *df = dfc->devfreq;
0183     struct devfreq_dev_status status;
0184     unsigned long state;
0185     unsigned long freq;
0186     unsigned long voltage;
0187     int res, perf_idx;
0188 
0189     mutex_lock(&df->lock);
0190     status = df->last_status;
0191     mutex_unlock(&df->lock);
0192 
0193     freq = status.current_frequency;
0194 
0195     if (dfc->power_ops && dfc->power_ops->get_real_power) {
0196         voltage = get_voltage(df, freq);
0197         if (voltage == 0) {
0198             res = -EINVAL;
0199             goto fail;
0200         }
0201 
0202         res = dfc->power_ops->get_real_power(df, power, freq, voltage);
0203         if (!res) {
0204             state = dfc->capped_state;
0205 
0206             /* Convert EM power into milli-Watts first */
0207             dfc->res_util = dfc->em_pd->table[state].power;
0208             dfc->res_util /= MICROWATT_PER_MILLIWATT;
0209 
0210             dfc->res_util *= SCALE_ERROR_MITIGATION;
0211 
0212             if (*power > 1)
0213                 dfc->res_util /= *power;
0214         } else {
0215             goto fail;
0216         }
0217     } else {
0218         /* Energy Model frequencies are in kHz */
0219         perf_idx = get_perf_idx(dfc->em_pd, freq / 1000);
0220         if (perf_idx < 0) {
0221             res = -EAGAIN;
0222             goto fail;
0223         }
0224 
0225         _normalize_load(&status);
0226 
0227         /* Convert EM power into milli-Watts first */
0228         *power = dfc->em_pd->table[perf_idx].power;
0229         *power /= MICROWATT_PER_MILLIWATT;
0230         /* Scale power for utilization */
0231         *power *= status.busy_time;
0232         *power >>= 10;
0233     }
0234 
0235     trace_thermal_power_devfreq_get_power(cdev, &status, freq, *power);
0236 
0237     return 0;
0238 fail:
0239     /* It is safe to set max in this case */
0240     dfc->res_util = SCALE_ERROR_MITIGATION;
0241     return res;
0242 }
0243 
0244 static int devfreq_cooling_state2power(struct thermal_cooling_device *cdev,
0245                        unsigned long state, u32 *power)
0246 {
0247     struct devfreq_cooling_device *dfc = cdev->devdata;
0248     int perf_idx;
0249 
0250     if (state > dfc->max_state)
0251         return -EINVAL;
0252 
0253     perf_idx = dfc->max_state - state;
0254     *power = dfc->em_pd->table[perf_idx].power;
0255     *power /= MICROWATT_PER_MILLIWATT;
0256 
0257     return 0;
0258 }
0259 
0260 static int devfreq_cooling_power2state(struct thermal_cooling_device *cdev,
0261                        u32 power, unsigned long *state)
0262 {
0263     struct devfreq_cooling_device *dfc = cdev->devdata;
0264     struct devfreq *df = dfc->devfreq;
0265     struct devfreq_dev_status status;
0266     unsigned long freq, em_power_mw;
0267     s32 est_power;
0268     int i;
0269 
0270     mutex_lock(&df->lock);
0271     status = df->last_status;
0272     mutex_unlock(&df->lock);
0273 
0274     freq = status.current_frequency;
0275 
0276     if (dfc->power_ops && dfc->power_ops->get_real_power) {
0277         /* Scale for resource utilization */
0278         est_power = power * dfc->res_util;
0279         est_power /= SCALE_ERROR_MITIGATION;
0280     } else {
0281         /* Scale dynamic power for utilization */
0282         _normalize_load(&status);
0283         est_power = power << 10;
0284         est_power /= status.busy_time;
0285     }
0286 
0287     /*
0288      * Find the first cooling state that is within the power
0289      * budget. The EM power table is sorted ascending.
0290      */
0291     for (i = dfc->max_state; i > 0; i--) {
0292         /* Convert EM power to milli-Watts to make safe comparison */
0293         em_power_mw = dfc->em_pd->table[i].power;
0294         em_power_mw /= MICROWATT_PER_MILLIWATT;
0295         if (est_power >= em_power_mw)
0296             break;
0297     }
0298 
0299     *state = dfc->max_state - i;
0300     dfc->capped_state = *state;
0301 
0302     trace_thermal_power_devfreq_limit(cdev, freq, *state, power);
0303     return 0;
0304 }
0305 
0306 /**
0307  * devfreq_cooling_gen_tables() - Generate frequency table.
0308  * @dfc:    Pointer to devfreq cooling device.
0309  * @num_opps:   Number of OPPs
0310  *
0311  * Generate frequency table which holds the frequencies in descending
0312  * order. That way its indexed by cooling device state. This is for
0313  * compatibility with drivers which do not register Energy Model.
0314  *
0315  * Return: 0 on success, negative error code on failure.
0316  */
0317 static int devfreq_cooling_gen_tables(struct devfreq_cooling_device *dfc,
0318                       int num_opps)
0319 {
0320     struct devfreq *df = dfc->devfreq;
0321     struct device *dev = df->dev.parent;
0322     unsigned long freq;
0323     int i;
0324 
0325     dfc->freq_table = kcalloc(num_opps, sizeof(*dfc->freq_table),
0326                  GFP_KERNEL);
0327     if (!dfc->freq_table)
0328         return -ENOMEM;
0329 
0330     for (i = 0, freq = ULONG_MAX; i < num_opps; i++, freq--) {
0331         struct dev_pm_opp *opp;
0332 
0333         opp = dev_pm_opp_find_freq_floor(dev, &freq);
0334         if (IS_ERR(opp)) {
0335             kfree(dfc->freq_table);
0336             return PTR_ERR(opp);
0337         }
0338 
0339         dev_pm_opp_put(opp);
0340         dfc->freq_table[i] = freq;
0341     }
0342 
0343     return 0;
0344 }
0345 
0346 /**
0347  * of_devfreq_cooling_register_power() - Register devfreq cooling device,
0348  *                                      with OF and power information.
0349  * @np: Pointer to OF device_node.
0350  * @df: Pointer to devfreq device.
0351  * @dfc_power:  Pointer to devfreq_cooling_power.
0352  *
0353  * Register a devfreq cooling device.  The available OPPs must be
0354  * registered on the device.
0355  *
0356  * If @dfc_power is provided, the cooling device is registered with the
0357  * power extensions.  For the power extensions to work correctly,
0358  * devfreq should use the simple_ondemand governor, other governors
0359  * are not currently supported.
0360  */
0361 struct thermal_cooling_device *
0362 of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
0363                   struct devfreq_cooling_power *dfc_power)
0364 {
0365     struct thermal_cooling_device *cdev;
0366     struct device *dev = df->dev.parent;
0367     struct devfreq_cooling_device *dfc;
0368     struct em_perf_domain *em;
0369     struct thermal_cooling_device_ops *ops;
0370     char *name;
0371     int err, num_opps;
0372 
0373 
0374     dfc = kzalloc(sizeof(*dfc), GFP_KERNEL);
0375     if (!dfc)
0376         return ERR_PTR(-ENOMEM);
0377 
0378     dfc->devfreq = df;
0379 
0380     ops = &dfc->cooling_ops;
0381     ops->get_max_state = devfreq_cooling_get_max_state;
0382     ops->get_cur_state = devfreq_cooling_get_cur_state;
0383     ops->set_cur_state = devfreq_cooling_set_cur_state;
0384 
0385     em = em_pd_get(dev);
0386     if (em && !em_is_artificial(em)) {
0387         dfc->em_pd = em;
0388         ops->get_requested_power =
0389             devfreq_cooling_get_requested_power;
0390         ops->state2power = devfreq_cooling_state2power;
0391         ops->power2state = devfreq_cooling_power2state;
0392 
0393         dfc->power_ops = dfc_power;
0394 
0395         num_opps = em_pd_nr_perf_states(dfc->em_pd);
0396     } else {
0397         /* Backward compatibility for drivers which do not use IPA */
0398         dev_dbg(dev, "missing proper EM for cooling device\n");
0399 
0400         num_opps = dev_pm_opp_get_opp_count(dev);
0401 
0402         err = devfreq_cooling_gen_tables(dfc, num_opps);
0403         if (err)
0404             goto free_dfc;
0405     }
0406 
0407     if (num_opps <= 0) {
0408         err = -EINVAL;
0409         goto free_dfc;
0410     }
0411 
0412     /* max_state is an index, not a counter */
0413     dfc->max_state = num_opps - 1;
0414 
0415     err = dev_pm_qos_add_request(dev, &dfc->req_max_freq,
0416                      DEV_PM_QOS_MAX_FREQUENCY,
0417                      PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE);
0418     if (err < 0)
0419         goto free_table;
0420 
0421     err = -ENOMEM;
0422     name = kasprintf(GFP_KERNEL, "devfreq-%s", dev_name(dev));
0423     if (!name)
0424         goto remove_qos_req;
0425 
0426     cdev = thermal_of_cooling_device_register(np, name, dfc, ops);
0427     kfree(name);
0428 
0429     if (IS_ERR(cdev)) {
0430         err = PTR_ERR(cdev);
0431         dev_err(dev,
0432             "Failed to register devfreq cooling device (%d)\n",
0433             err);
0434         goto remove_qos_req;
0435     }
0436 
0437     dfc->cdev = cdev;
0438 
0439     return cdev;
0440 
0441 remove_qos_req:
0442     dev_pm_qos_remove_request(&dfc->req_max_freq);
0443 free_table:
0444     kfree(dfc->freq_table);
0445 free_dfc:
0446     kfree(dfc);
0447 
0448     return ERR_PTR(err);
0449 }
0450 EXPORT_SYMBOL_GPL(of_devfreq_cooling_register_power);
0451 
0452 /**
0453  * of_devfreq_cooling_register() - Register devfreq cooling device,
0454  *                                with OF information.
0455  * @np: Pointer to OF device_node.
0456  * @df: Pointer to devfreq device.
0457  */
0458 struct thermal_cooling_device *
0459 of_devfreq_cooling_register(struct device_node *np, struct devfreq *df)
0460 {
0461     return of_devfreq_cooling_register_power(np, df, NULL);
0462 }
0463 EXPORT_SYMBOL_GPL(of_devfreq_cooling_register);
0464 
0465 /**
0466  * devfreq_cooling_register() - Register devfreq cooling device.
0467  * @df: Pointer to devfreq device.
0468  */
0469 struct thermal_cooling_device *devfreq_cooling_register(struct devfreq *df)
0470 {
0471     return of_devfreq_cooling_register(NULL, df);
0472 }
0473 EXPORT_SYMBOL_GPL(devfreq_cooling_register);
0474 
0475 /**
0476  * devfreq_cooling_em_register() - Register devfreq cooling device with
0477  *      power information and automatically register Energy Model (EM)
0478  * @df:     Pointer to devfreq device.
0479  * @dfc_power:  Pointer to devfreq_cooling_power.
0480  *
0481  * Register a devfreq cooling device and automatically register EM. The
0482  * available OPPs must be registered for the device.
0483  *
0484  * If @dfc_power is provided, the cooling device is registered with the
0485  * power extensions. It is using the simple Energy Model which requires
0486  * "dynamic-power-coefficient" a devicetree property. To not break drivers
0487  * which miss that DT property, the function won't bail out when the EM
0488  * registration failed. The cooling device will be registered if everything
0489  * else is OK.
0490  */
0491 struct thermal_cooling_device *
0492 devfreq_cooling_em_register(struct devfreq *df,
0493                 struct devfreq_cooling_power *dfc_power)
0494 {
0495     struct thermal_cooling_device *cdev;
0496     struct device *dev;
0497     int ret;
0498 
0499     if (IS_ERR_OR_NULL(df))
0500         return ERR_PTR(-EINVAL);
0501 
0502     dev = df->dev.parent;
0503 
0504     ret = dev_pm_opp_of_register_em(dev, NULL);
0505     if (ret)
0506         dev_dbg(dev, "Unable to register EM for devfreq cooling device (%d)\n",
0507             ret);
0508 
0509     cdev = of_devfreq_cooling_register_power(dev->of_node, df, dfc_power);
0510 
0511     if (IS_ERR_OR_NULL(cdev))
0512         em_dev_unregister_perf_domain(dev);
0513 
0514     return cdev;
0515 }
0516 EXPORT_SYMBOL_GPL(devfreq_cooling_em_register);
0517 
0518 /**
0519  * devfreq_cooling_unregister() - Unregister devfreq cooling device.
0520  * @cdev: Pointer to devfreq cooling device to unregister.
0521  *
0522  * Unregisters devfreq cooling device and related Energy Model if it was
0523  * present.
0524  */
0525 void devfreq_cooling_unregister(struct thermal_cooling_device *cdev)
0526 {
0527     struct devfreq_cooling_device *dfc;
0528     struct device *dev;
0529 
0530     if (IS_ERR_OR_NULL(cdev))
0531         return;
0532 
0533     dfc = cdev->devdata;
0534     dev = dfc->devfreq->dev.parent;
0535 
0536     thermal_cooling_device_unregister(dfc->cdev);
0537     dev_pm_qos_remove_request(&dfc->req_max_freq);
0538 
0539     em_dev_unregister_perf_domain(dev);
0540 
0541     kfree(dfc->freq_table);
0542     kfree(dfc);
0543 }
0544 EXPORT_SYMBOL_GPL(devfreq_cooling_unregister);