the-tree/block/blk-rq-qos.c

0001 // SPDX-License-Identifier: GPL-2.0
0002
0003 #include "blk-rq-qos.h"
0004
0005 /*
0006  * Increment 'v', if 'v' is below 'below'. Returns true if we succeeded,
0007  * false if 'v' + 1 would be bigger than 'below'.
0008  */
0009 static bool atomic_inc_below(atomic_t *v, unsigned int below)
0010 {
0011     unsigned int cur = atomic_read(v);
0012
0013     do {
0014         if (cur >= below)
0015             return false;
0016     } while (!atomic_try_cmpxchg(v, &cur, cur + 1));
0017
0018     return true;
0019 }
0020
0021 bool rq_wait_inc_below(struct rq_wait *rq_wait, unsigned int limit)
0022 {
0023     return atomic_inc_below(&rq_wait->inflight, limit);
0024 }
0025
0026 void __rq_qos_cleanup(struct rq_qos *rqos, struct bio *bio)
0027 {
0028     do {
0029         if (rqos->ops->cleanup)
0030             rqos->ops->cleanup(rqos, bio);
0031         rqos = rqos->next;
0032     } while (rqos);
0033 }
0034
0035 void __rq_qos_done(struct rq_qos *rqos, struct request *rq)
0036 {
0037     do {
0038         if (rqos->ops->done)
0039             rqos->ops->done(rqos, rq);
0040         rqos = rqos->next;
0041     } while (rqos);
0042 }
0043
0044 void __rq_qos_issue(struct rq_qos *rqos, struct request *rq)
0045 {
0046     do {
0047         if (rqos->ops->issue)
0048             rqos->ops->issue(rqos, rq);
0049         rqos = rqos->next;
0050     } while (rqos);
0051 }
0052
0053 void __rq_qos_requeue(struct rq_qos *rqos, struct request *rq)
0054 {
0055     do {
0056         if (rqos->ops->requeue)
0057             rqos->ops->requeue(rqos, rq);
0058         rqos = rqos->next;
0059     } while (rqos);
0060 }
0061
0062 void __rq_qos_throttle(struct rq_qos *rqos, struct bio *bio)
0063 {
0064     do {
0065         if (rqos->ops->throttle)
0066             rqos->ops->throttle(rqos, bio);
0067         rqos = rqos->next;
0068     } while (rqos);
0069 }
0070
0071 void __rq_qos_track(struct rq_qos *rqos, struct request *rq, struct bio *bio)
0072 {
0073     do {
0074         if (rqos->ops->track)
0075             rqos->ops->track(rqos, rq, bio);
0076         rqos = rqos->next;
0077     } while (rqos);
0078 }
0079
0080 void __rq_qos_merge(struct rq_qos *rqos, struct request *rq, struct bio *bio)
0081 {
0082     do {
0083         if (rqos->ops->merge)
0084             rqos->ops->merge(rqos, rq, bio);
0085         rqos = rqos->next;
0086     } while (rqos);
0087 }
0088
0089 void __rq_qos_done_bio(struct rq_qos *rqos, struct bio *bio)
0090 {
0091     do {
0092         if (rqos->ops->done_bio)
0093             rqos->ops->done_bio(rqos, bio);
0094         rqos = rqos->next;
0095     } while (rqos);
0096 }
0097
0098 void __rq_qos_queue_depth_changed(struct rq_qos *rqos)
0099 {
0100     do {
0101         if (rqos->ops->queue_depth_changed)
0102             rqos->ops->queue_depth_changed(rqos);
0103         rqos = rqos->next;
0104     } while (rqos);
0105 }
0106
0107 /*
0108  * Return true, if we can't increase the depth further by scaling
0109  */
0110 bool rq_depth_calc_max_depth(struct rq_depth *rqd)
0111 {
0112     unsigned int depth;
0113     bool ret = false;
0114
0115     /*
0116      * For QD=1 devices, this is a special case. It's important for those
0117      * to have one request ready when one completes, so force a depth of
0118      * 2 for those devices. On the backend, it'll be a depth of 1 anyway,
0119      * since the device can't have more than that in flight. If we're
0120      * scaling down, then keep a setting of 1/1/1.
0121      */
0122     if (rqd->queue_depth == 1) {
0123         if (rqd->scale_step > 0)
0124             rqd->max_depth = 1;
0125         else {
0126             rqd->max_depth = 2;
0127             ret = true;
0128         }
0129     } else {
0130         /*
0131          * scale_step == 0 is our default state. If we have suffered
0132          * latency spikes, step will be > 0, and we shrink the
0133          * allowed write depths. If step is < 0, we're only doing
0134          * writes, and we allow a temporarily higher depth to
0135          * increase performance.
0136          */
0137         depth = min_t(unsigned int, rqd->default_depth,
0138                   rqd->queue_depth);
0139         if (rqd->scale_step > 0)
0140             depth = 1 + ((depth - 1) >> min(31, rqd->scale_step));
0141         else if (rqd->scale_step < 0) {
0142             unsigned int maxd = 3 * rqd->queue_depth / 4;
0143
0144             depth = 1 + ((depth - 1) << -rqd->scale_step);
0145             if (depth > maxd) {
0146                 depth = maxd;
0147                 ret = true;
0148             }
0149         }
0150
0151         rqd->max_depth = depth;
0152     }
0153
0154     return ret;
0155 }
0156
0157 /* Returns true on success and false if scaling up wasn't possible */
0158 bool rq_depth_scale_up(struct rq_depth *rqd)
0159 {
0160     /*
0161      * Hit max in previous round, stop here
0162      */
0163     if (rqd->scaled_max)
0164         return false;
0165
0166     rqd->scale_step--;
0167
0168     rqd->scaled_max = rq_depth_calc_max_depth(rqd);
0169     return true;
0170 }
0171
0172 /*
0173  * Scale rwb down. If 'hard_throttle' is set, do it quicker, since we
0174  * had a latency violation. Returns true on success and returns false if
0175  * scaling down wasn't possible.
0176  */
0177 bool rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle)
0178 {
0179     /*
0180      * Stop scaling down when we've hit the limit. This also prevents
0181      * ->scale_step from going to crazy values, if the device can't
0182      * keep up.
0183      */
0184     if (rqd->max_depth == 1)
0185         return false;
0186
0187     if (rqd->scale_step < 0 && hard_throttle)
0188         rqd->scale_step = 0;
0189     else
0190         rqd->scale_step++;
0191
0192     rqd->scaled_max = false;
0193     rq_depth_calc_max_depth(rqd);
0194     return true;
0195 }
0196
0197 struct rq_qos_wait_data {
0198     struct wait_queue_entry wq;
0199     struct task_struct *task;
0200     struct rq_wait *rqw;
0201     acquire_inflight_cb_t *cb;
0202     void *private_data;
0203     bool got_token;
0204 };
0205
0206 static int rq_qos_wake_function(struct wait_queue_entry *curr,
0207                 unsigned int mode, int wake_flags, void *key)
0208 {
0209     struct rq_qos_wait_data *data = container_of(curr,
0210                              struct rq_qos_wait_data,
0211                              wq);
0212
0213     /*
0214      * If we fail to get a budget, return -1 to interrupt the wake up loop
0215      * in __wake_up_common.
0216      */
0217     if (!data->cb(data->rqw, data->private_data))
0218         return -1;
0219
0220     data->got_token = true;
0221     smp_wmb();
0222     list_del_init(&curr->entry);
0223     wake_up_process(data->task);
0224     return 1;
0225 }
0226
0227 /**
0228  * rq_qos_wait - throttle on a rqw if we need to
0229  * @rqw: rqw to throttle on
0230  * @private_data: caller provided specific data
0231  * @acquire_inflight_cb: inc the rqw->inflight counter if we can
0232  * @cleanup_cb: the callback to cleanup in case we race with a waker
0233  *
0234  * This provides a uniform place for the rq_qos users to do their throttling.
0235  * Since you can end up with a lot of things sleeping at once, this manages the
0236  * waking up based on the resources available.  The acquire_inflight_cb should
0237  * inc the rqw->inflight if we have the ability to do so, or return false if not
0238  * and then we will sleep until the room becomes available.
0239  *
0240  * cleanup_cb is in case that we race with a waker and need to cleanup the
0241  * inflight count accordingly.
0242  */
0243 void rq_qos_wait(struct rq_wait *rqw, void *private_data,
0244          acquire_inflight_cb_t *acquire_inflight_cb,
0245          cleanup_cb_t *cleanup_cb)
0246 {
0247     struct rq_qos_wait_data data = {
0248         .wq = {
0249             .func   = rq_qos_wake_function,
0250             .entry  = LIST_HEAD_INIT(data.wq.entry),
0251         },
0252         .task = current,
0253         .rqw = rqw,
0254         .cb = acquire_inflight_cb,
0255         .private_data = private_data,
0256     };
0257     bool has_sleeper;
0258
0259     has_sleeper = wq_has_sleeper(&rqw->wait);
0260     if (!has_sleeper && acquire_inflight_cb(rqw, private_data))
0261         return;
0262
0263     has_sleeper = !prepare_to_wait_exclusive(&rqw->wait, &data.wq,
0264                          TASK_UNINTERRUPTIBLE);
0265     do {
0266         /* The memory barrier in set_task_state saves us here. */
0267         if (data.got_token)
0268             break;
0269         if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) {
0270             finish_wait(&rqw->wait, &data.wq);
0271
0272             /*
0273              * We raced with wbt_wake_function() getting a token,
0274              * which means we now have two. Put our local token
0275              * and wake anyone else potentially waiting for one.
0276              */
0277             smp_rmb();
0278             if (data.got_token)
0279                 cleanup_cb(rqw, private_data);
0280             break;
0281         }
0282         io_schedule();
0283         has_sleeper = true;
0284         set_current_state(TASK_UNINTERRUPTIBLE);
0285     } while (1);
0286     finish_wait(&rqw->wait, &data.wq);
0287 }
0288
0289 void rq_qos_exit(struct request_queue *q)
0290 {
0291     while (q->rq_qos) {
0292         struct rq_qos *rqos = q->rq_qos;
0293         q->rq_qos = rqos->next;
0294         rqos->ops->exit(rqos);
0295     }
0296 }