Back to home page

LXR

 
 

    


0001 /*
0002  * Tag allocation using scalable bitmaps. Uses active queue tracking to support
0003  * fairer distribution of tags between multiple submitters when a shared tag map
0004  * is used.
0005  *
0006  * Copyright (C) 2013-2014 Jens Axboe
0007  */
0008 #include <linux/kernel.h>
0009 #include <linux/module.h>
0010 
0011 #include <linux/blk-mq.h>
0012 #include "blk.h"
0013 #include "blk-mq.h"
0014 #include "blk-mq-tag.h"
0015 
0016 bool blk_mq_has_free_tags(struct blk_mq_tags *tags)
0017 {
0018     if (!tags)
0019         return true;
0020 
0021     return sbitmap_any_bit_clear(&tags->bitmap_tags.sb);
0022 }
0023 
0024 /*
0025  * If a previously inactive queue goes active, bump the active user count.
0026  */
0027 bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
0028 {
0029     if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) &&
0030         !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
0031         atomic_inc(&hctx->tags->active_queues);
0032 
0033     return true;
0034 }
0035 
0036 /*
0037  * Wakeup all potentially sleeping on tags
0038  */
0039 void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve)
0040 {
0041     sbitmap_queue_wake_all(&tags->bitmap_tags);
0042     if (include_reserve)
0043         sbitmap_queue_wake_all(&tags->breserved_tags);
0044 }
0045 
0046 /*
0047  * If a previously busy queue goes inactive, potential waiters could now
0048  * be allowed to queue. Wake them up and check.
0049  */
0050 void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
0051 {
0052     struct blk_mq_tags *tags = hctx->tags;
0053 
0054     if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
0055         return;
0056 
0057     atomic_dec(&tags->active_queues);
0058 
0059     blk_mq_tag_wakeup_all(tags, false);
0060 }
0061 
0062 /*
0063  * For shared tag users, we track the number of currently active users
0064  * and attempt to provide a fair share of the tag depth for each of them.
0065  */
0066 static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
0067                   struct sbitmap_queue *bt)
0068 {
0069     unsigned int depth, users;
0070 
0071     if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_SHARED))
0072         return true;
0073     if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
0074         return true;
0075 
0076     /*
0077      * Don't try dividing an ant
0078      */
0079     if (bt->sb.depth == 1)
0080         return true;
0081 
0082     users = atomic_read(&hctx->tags->active_queues);
0083     if (!users)
0084         return true;
0085 
0086     /*
0087      * Allow at least some tags
0088      */
0089     depth = max((bt->sb.depth + users - 1) / users, 4U);
0090     return atomic_read(&hctx->nr_active) < depth;
0091 }
0092 
0093 static int __bt_get(struct blk_mq_hw_ctx *hctx, struct sbitmap_queue *bt)
0094 {
0095     if (!hctx_may_queue(hctx, bt))
0096         return -1;
0097     return __sbitmap_queue_get(bt);
0098 }
0099 
0100 static int bt_get(struct blk_mq_alloc_data *data, struct sbitmap_queue *bt,
0101           struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags)
0102 {
0103     struct sbq_wait_state *ws;
0104     DEFINE_WAIT(wait);
0105     int tag;
0106 
0107     tag = __bt_get(hctx, bt);
0108     if (tag != -1)
0109         return tag;
0110 
0111     if (data->flags & BLK_MQ_REQ_NOWAIT)
0112         return -1;
0113 
0114     ws = bt_wait_ptr(bt, hctx);
0115     do {
0116         prepare_to_wait(&ws->wait, &wait, TASK_UNINTERRUPTIBLE);
0117 
0118         tag = __bt_get(hctx, bt);
0119         if (tag != -1)
0120             break;
0121 
0122         /*
0123          * We're out of tags on this hardware queue, kick any
0124          * pending IO submits before going to sleep waiting for
0125          * some to complete. Note that hctx can be NULL here for
0126          * reserved tag allocation.
0127          */
0128         if (hctx)
0129             blk_mq_run_hw_queue(hctx, false);
0130 
0131         /*
0132          * Retry tag allocation after running the hardware queue,
0133          * as running the queue may also have found completions.
0134          */
0135         tag = __bt_get(hctx, bt);
0136         if (tag != -1)
0137             break;
0138 
0139         blk_mq_put_ctx(data->ctx);
0140 
0141         io_schedule();
0142 
0143         data->ctx = blk_mq_get_ctx(data->q);
0144         data->hctx = blk_mq_map_queue(data->q, data->ctx->cpu);
0145         if (data->flags & BLK_MQ_REQ_RESERVED) {
0146             bt = &data->hctx->tags->breserved_tags;
0147         } else {
0148             hctx = data->hctx;
0149             bt = &hctx->tags->bitmap_tags;
0150         }
0151         finish_wait(&ws->wait, &wait);
0152         ws = bt_wait_ptr(bt, hctx);
0153     } while (1);
0154 
0155     finish_wait(&ws->wait, &wait);
0156     return tag;
0157 }
0158 
0159 static unsigned int __blk_mq_get_tag(struct blk_mq_alloc_data *data)
0160 {
0161     int tag;
0162 
0163     tag = bt_get(data, &data->hctx->tags->bitmap_tags, data->hctx,
0164              data->hctx->tags);
0165     if (tag >= 0)
0166         return tag + data->hctx->tags->nr_reserved_tags;
0167 
0168     return BLK_MQ_TAG_FAIL;
0169 }
0170 
0171 static unsigned int __blk_mq_get_reserved_tag(struct blk_mq_alloc_data *data)
0172 {
0173     int tag;
0174 
0175     if (unlikely(!data->hctx->tags->nr_reserved_tags)) {
0176         WARN_ON_ONCE(1);
0177         return BLK_MQ_TAG_FAIL;
0178     }
0179 
0180     tag = bt_get(data, &data->hctx->tags->breserved_tags, NULL,
0181              data->hctx->tags);
0182     if (tag < 0)
0183         return BLK_MQ_TAG_FAIL;
0184 
0185     return tag;
0186 }
0187 
0188 unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
0189 {
0190     if (data->flags & BLK_MQ_REQ_RESERVED)
0191         return __blk_mq_get_reserved_tag(data);
0192     return __blk_mq_get_tag(data);
0193 }
0194 
0195 void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
0196             unsigned int tag)
0197 {
0198     struct blk_mq_tags *tags = hctx->tags;
0199 
0200     if (tag >= tags->nr_reserved_tags) {
0201         const int real_tag = tag - tags->nr_reserved_tags;
0202 
0203         BUG_ON(real_tag >= tags->nr_tags);
0204         sbitmap_queue_clear(&tags->bitmap_tags, real_tag, ctx->cpu);
0205     } else {
0206         BUG_ON(tag >= tags->nr_reserved_tags);
0207         sbitmap_queue_clear(&tags->breserved_tags, tag, ctx->cpu);
0208     }
0209 }
0210 
0211 struct bt_iter_data {
0212     struct blk_mq_hw_ctx *hctx;
0213     busy_iter_fn *fn;
0214     void *data;
0215     bool reserved;
0216 };
0217 
0218 static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
0219 {
0220     struct bt_iter_data *iter_data = data;
0221     struct blk_mq_hw_ctx *hctx = iter_data->hctx;
0222     struct blk_mq_tags *tags = hctx->tags;
0223     bool reserved = iter_data->reserved;
0224     struct request *rq;
0225 
0226     if (!reserved)
0227         bitnr += tags->nr_reserved_tags;
0228     rq = tags->rqs[bitnr];
0229 
0230     if (rq->q == hctx->queue)
0231         iter_data->fn(hctx, rq, iter_data->data, reserved);
0232     return true;
0233 }
0234 
0235 static void bt_for_each(struct blk_mq_hw_ctx *hctx, struct sbitmap_queue *bt,
0236             busy_iter_fn *fn, void *data, bool reserved)
0237 {
0238     struct bt_iter_data iter_data = {
0239         .hctx = hctx,
0240         .fn = fn,
0241         .data = data,
0242         .reserved = reserved,
0243     };
0244 
0245     sbitmap_for_each_set(&bt->sb, bt_iter, &iter_data);
0246 }
0247 
0248 struct bt_tags_iter_data {
0249     struct blk_mq_tags *tags;
0250     busy_tag_iter_fn *fn;
0251     void *data;
0252     bool reserved;
0253 };
0254 
0255 static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
0256 {
0257     struct bt_tags_iter_data *iter_data = data;
0258     struct blk_mq_tags *tags = iter_data->tags;
0259     bool reserved = iter_data->reserved;
0260     struct request *rq;
0261 
0262     if (!reserved)
0263         bitnr += tags->nr_reserved_tags;
0264     rq = tags->rqs[bitnr];
0265 
0266     iter_data->fn(rq, iter_data->data, reserved);
0267     return true;
0268 }
0269 
0270 static void bt_tags_for_each(struct blk_mq_tags *tags, struct sbitmap_queue *bt,
0271                  busy_tag_iter_fn *fn, void *data, bool reserved)
0272 {
0273     struct bt_tags_iter_data iter_data = {
0274         .tags = tags,
0275         .fn = fn,
0276         .data = data,
0277         .reserved = reserved,
0278     };
0279 
0280     if (tags->rqs)
0281         sbitmap_for_each_set(&bt->sb, bt_tags_iter, &iter_data);
0282 }
0283 
0284 static void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags,
0285         busy_tag_iter_fn *fn, void *priv)
0286 {
0287     if (tags->nr_reserved_tags)
0288         bt_tags_for_each(tags, &tags->breserved_tags, fn, priv, true);
0289     bt_tags_for_each(tags, &tags->bitmap_tags, fn, priv, false);
0290 }
0291 
0292 void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
0293         busy_tag_iter_fn *fn, void *priv)
0294 {
0295     int i;
0296 
0297     for (i = 0; i < tagset->nr_hw_queues; i++) {
0298         if (tagset->tags && tagset->tags[i])
0299             blk_mq_all_tag_busy_iter(tagset->tags[i], fn, priv);
0300     }
0301 }
0302 EXPORT_SYMBOL(blk_mq_tagset_busy_iter);
0303 
0304 int blk_mq_reinit_tagset(struct blk_mq_tag_set *set)
0305 {
0306     int i, j, ret = 0;
0307 
0308     if (!set->ops->reinit_request)
0309         goto out;
0310 
0311     for (i = 0; i < set->nr_hw_queues; i++) {
0312         struct blk_mq_tags *tags = set->tags[i];
0313 
0314         for (j = 0; j < tags->nr_tags; j++) {
0315             if (!tags->rqs[j])
0316                 continue;
0317 
0318             ret = set->ops->reinit_request(set->driver_data,
0319                         tags->rqs[j]);
0320             if (ret)
0321                 goto out;
0322         }
0323     }
0324 
0325 out:
0326     return ret;
0327 }
0328 EXPORT_SYMBOL_GPL(blk_mq_reinit_tagset);
0329 
0330 void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
0331         void *priv)
0332 {
0333     struct blk_mq_hw_ctx *hctx;
0334     int i;
0335 
0336 
0337     queue_for_each_hw_ctx(q, hctx, i) {
0338         struct blk_mq_tags *tags = hctx->tags;
0339 
0340         /*
0341          * If not software queues are currently mapped to this
0342          * hardware queue, there's nothing to check
0343          */
0344         if (!blk_mq_hw_queue_mapped(hctx))
0345             continue;
0346 
0347         if (tags->nr_reserved_tags)
0348             bt_for_each(hctx, &tags->breserved_tags, fn, priv, true);
0349         bt_for_each(hctx, &tags->bitmap_tags, fn, priv, false);
0350     }
0351 
0352 }
0353 
0354 static unsigned int bt_unused_tags(const struct sbitmap_queue *bt)
0355 {
0356     return bt->sb.depth - sbitmap_weight(&bt->sb);
0357 }
0358 
0359 static int bt_alloc(struct sbitmap_queue *bt, unsigned int depth,
0360             bool round_robin, int node)
0361 {
0362     return sbitmap_queue_init_node(bt, depth, -1, round_robin, GFP_KERNEL,
0363                        node);
0364 }
0365 
0366 static struct blk_mq_tags *blk_mq_init_bitmap_tags(struct blk_mq_tags *tags,
0367                            int node, int alloc_policy)
0368 {
0369     unsigned int depth = tags->nr_tags - tags->nr_reserved_tags;
0370     bool round_robin = alloc_policy == BLK_TAG_ALLOC_RR;
0371 
0372     if (bt_alloc(&tags->bitmap_tags, depth, round_robin, node))
0373         goto free_tags;
0374     if (bt_alloc(&tags->breserved_tags, tags->nr_reserved_tags, round_robin,
0375              node))
0376         goto free_bitmap_tags;
0377 
0378     return tags;
0379 free_bitmap_tags:
0380     sbitmap_queue_free(&tags->bitmap_tags);
0381 free_tags:
0382     kfree(tags);
0383     return NULL;
0384 }
0385 
0386 struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
0387                      unsigned int reserved_tags,
0388                      int node, int alloc_policy)
0389 {
0390     struct blk_mq_tags *tags;
0391 
0392     if (total_tags > BLK_MQ_TAG_MAX) {
0393         pr_err("blk-mq: tag depth too large\n");
0394         return NULL;
0395     }
0396 
0397     tags = kzalloc_node(sizeof(*tags), GFP_KERNEL, node);
0398     if (!tags)
0399         return NULL;
0400 
0401     tags->nr_tags = total_tags;
0402     tags->nr_reserved_tags = reserved_tags;
0403 
0404     return blk_mq_init_bitmap_tags(tags, node, alloc_policy);
0405 }
0406 
0407 void blk_mq_free_tags(struct blk_mq_tags *tags)
0408 {
0409     sbitmap_queue_free(&tags->bitmap_tags);
0410     sbitmap_queue_free(&tags->breserved_tags);
0411     kfree(tags);
0412 }
0413 
0414 int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int tdepth)
0415 {
0416     tdepth -= tags->nr_reserved_tags;
0417     if (tdepth > tags->nr_tags)
0418         return -EINVAL;
0419 
0420     /*
0421      * Don't need (or can't) update reserved tags here, they remain
0422      * static and should never need resizing.
0423      */
0424     sbitmap_queue_resize(&tags->bitmap_tags, tdepth);
0425 
0426     blk_mq_tag_wakeup_all(tags, false);
0427     return 0;
0428 }
0429 
0430 /**
0431  * blk_mq_unique_tag() - return a tag that is unique queue-wide
0432  * @rq: request for which to compute a unique tag
0433  *
0434  * The tag field in struct request is unique per hardware queue but not over
0435  * all hardware queues. Hence this function that returns a tag with the
0436  * hardware context index in the upper bits and the per hardware queue tag in
0437  * the lower bits.
0438  *
0439  * Note: When called for a request that is queued on a non-multiqueue request
0440  * queue, the hardware context index is set to zero.
0441  */
0442 u32 blk_mq_unique_tag(struct request *rq)
0443 {
0444     struct request_queue *q = rq->q;
0445     struct blk_mq_hw_ctx *hctx;
0446     int hwq = 0;
0447 
0448     if (q->mq_ops) {
0449         hctx = blk_mq_map_queue(q, rq->mq_ctx->cpu);
0450         hwq = hctx->queue_num;
0451     }
0452 
0453     return (hwq << BLK_MQ_UNIQUE_TAG_BITS) |
0454         (rq->tag & BLK_MQ_UNIQUE_TAG_MASK);
0455 }
0456 EXPORT_SYMBOL(blk_mq_unique_tag);
0457 
0458 ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page)
0459 {
0460     char *orig_page = page;
0461     unsigned int free, res;
0462 
0463     if (!tags)
0464         return 0;
0465 
0466     page += sprintf(page, "nr_tags=%u, reserved_tags=%u, "
0467             "bits_per_word=%u\n",
0468             tags->nr_tags, tags->nr_reserved_tags,
0469             1U << tags->bitmap_tags.sb.shift);
0470 
0471     free = bt_unused_tags(&tags->bitmap_tags);
0472     res = bt_unused_tags(&tags->breserved_tags);
0473 
0474     page += sprintf(page, "nr_free=%u, nr_reserved=%u\n", free, res);
0475     page += sprintf(page, "active_queues=%u\n", atomic_read(&tags->active_queues));
0476 
0477     return page - orig_page;
0478 }