Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * blk-mq scheduling framework
0004  *
0005  * Copyright (C) 2016 Jens Axboe
0006  */
0007 #include <linux/kernel.h>
0008 #include <linux/module.h>
0009 #include <linux/blk-mq.h>
0010 #include <linux/list_sort.h>
0011 
0012 #include <trace/events/block.h>
0013 
0014 #include "blk.h"
0015 #include "blk-mq.h"
0016 #include "blk-mq-debugfs.h"
0017 #include "blk-mq-sched.h"
0018 #include "blk-mq-tag.h"
0019 #include "blk-wbt.h"
0020 
0021 /*
0022  * Mark a hardware queue as needing a restart. For shared queues, maintain
0023  * a count of how many hardware queues are marked for restart.
0024  */
0025 void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
0026 {
0027     if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
0028         return;
0029 
0030     set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
0031 }
0032 EXPORT_SYMBOL_GPL(blk_mq_sched_mark_restart_hctx);
0033 
0034 void __blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx)
0035 {
0036     clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
0037 
0038     /*
0039      * Order clearing SCHED_RESTART and list_empty_careful(&hctx->dispatch)
0040      * in blk_mq_run_hw_queue(). Its pair is the barrier in
0041      * blk_mq_dispatch_rq_list(). So dispatch code won't see SCHED_RESTART,
0042      * meantime new request added to hctx->dispatch is missed to check in
0043      * blk_mq_run_hw_queue().
0044      */
0045     smp_mb();
0046 
0047     blk_mq_run_hw_queue(hctx, true);
0048 }
0049 
0050 static int sched_rq_cmp(void *priv, const struct list_head *a,
0051             const struct list_head *b)
0052 {
0053     struct request *rqa = container_of(a, struct request, queuelist);
0054     struct request *rqb = container_of(b, struct request, queuelist);
0055 
0056     return rqa->mq_hctx > rqb->mq_hctx;
0057 }
0058 
0059 static bool blk_mq_dispatch_hctx_list(struct list_head *rq_list)
0060 {
0061     struct blk_mq_hw_ctx *hctx =
0062         list_first_entry(rq_list, struct request, queuelist)->mq_hctx;
0063     struct request *rq;
0064     LIST_HEAD(hctx_list);
0065     unsigned int count = 0;
0066 
0067     list_for_each_entry(rq, rq_list, queuelist) {
0068         if (rq->mq_hctx != hctx) {
0069             list_cut_before(&hctx_list, rq_list, &rq->queuelist);
0070             goto dispatch;
0071         }
0072         count++;
0073     }
0074     list_splice_tail_init(rq_list, &hctx_list);
0075 
0076 dispatch:
0077     return blk_mq_dispatch_rq_list(hctx, &hctx_list, count);
0078 }
0079 
0080 #define BLK_MQ_BUDGET_DELAY 3       /* ms units */
0081 
0082 /*
0083  * Only SCSI implements .get_budget and .put_budget, and SCSI restarts
0084  * its queue by itself in its completion handler, so we don't need to
0085  * restart queue if .get_budget() returns BLK_STS_NO_RESOURCE.
0086  *
0087  * Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to
0088  * be run again.  This is necessary to avoid starving flushes.
0089  */
0090 static int __blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
0091 {
0092     struct request_queue *q = hctx->queue;
0093     struct elevator_queue *e = q->elevator;
0094     bool multi_hctxs = false, run_queue = false;
0095     bool dispatched = false, busy = false;
0096     unsigned int max_dispatch;
0097     LIST_HEAD(rq_list);
0098     int count = 0;
0099 
0100     if (hctx->dispatch_busy)
0101         max_dispatch = 1;
0102     else
0103         max_dispatch = hctx->queue->nr_requests;
0104 
0105     do {
0106         struct request *rq;
0107         int budget_token;
0108 
0109         if (e->type->ops.has_work && !e->type->ops.has_work(hctx))
0110             break;
0111 
0112         if (!list_empty_careful(&hctx->dispatch)) {
0113             busy = true;
0114             break;
0115         }
0116 
0117         budget_token = blk_mq_get_dispatch_budget(q);
0118         if (budget_token < 0)
0119             break;
0120 
0121         rq = e->type->ops.dispatch_request(hctx);
0122         if (!rq) {
0123             blk_mq_put_dispatch_budget(q, budget_token);
0124             /*
0125              * We're releasing without dispatching. Holding the
0126              * budget could have blocked any "hctx"s with the
0127              * same queue and if we didn't dispatch then there's
0128              * no guarantee anyone will kick the queue.  Kick it
0129              * ourselves.
0130              */
0131             run_queue = true;
0132             break;
0133         }
0134 
0135         blk_mq_set_rq_budget_token(rq, budget_token);
0136 
0137         /*
0138          * Now this rq owns the budget which has to be released
0139          * if this rq won't be queued to driver via .queue_rq()
0140          * in blk_mq_dispatch_rq_list().
0141          */
0142         list_add_tail(&rq->queuelist, &rq_list);
0143         count++;
0144         if (rq->mq_hctx != hctx)
0145             multi_hctxs = true;
0146 
0147         /*
0148          * If we cannot get tag for the request, stop dequeueing
0149          * requests from the IO scheduler. We are unlikely to be able
0150          * to submit them anyway and it creates false impression for
0151          * scheduling heuristics that the device can take more IO.
0152          */
0153         if (!blk_mq_get_driver_tag(rq))
0154             break;
0155     } while (count < max_dispatch);
0156 
0157     if (!count) {
0158         if (run_queue)
0159             blk_mq_delay_run_hw_queues(q, BLK_MQ_BUDGET_DELAY);
0160     } else if (multi_hctxs) {
0161         /*
0162          * Requests from different hctx may be dequeued from some
0163          * schedulers, such as bfq and deadline.
0164          *
0165          * Sort the requests in the list according to their hctx,
0166          * dispatch batching requests from same hctx at a time.
0167          */
0168         list_sort(NULL, &rq_list, sched_rq_cmp);
0169         do {
0170             dispatched |= blk_mq_dispatch_hctx_list(&rq_list);
0171         } while (!list_empty(&rq_list));
0172     } else {
0173         dispatched = blk_mq_dispatch_rq_list(hctx, &rq_list, count);
0174     }
0175 
0176     if (busy)
0177         return -EAGAIN;
0178     return !!dispatched;
0179 }
0180 
0181 static int blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
0182 {
0183     unsigned long end = jiffies + HZ;
0184     int ret;
0185 
0186     do {
0187         ret = __blk_mq_do_dispatch_sched(hctx);
0188         if (ret != 1)
0189             break;
0190         if (need_resched() || time_is_before_jiffies(end)) {
0191             blk_mq_delay_run_hw_queue(hctx, 0);
0192             break;
0193         }
0194     } while (1);
0195 
0196     return ret;
0197 }
0198 
0199 static struct blk_mq_ctx *blk_mq_next_ctx(struct blk_mq_hw_ctx *hctx,
0200                       struct blk_mq_ctx *ctx)
0201 {
0202     unsigned short idx = ctx->index_hw[hctx->type];
0203 
0204     if (++idx == hctx->nr_ctx)
0205         idx = 0;
0206 
0207     return hctx->ctxs[idx];
0208 }
0209 
0210 /*
0211  * Only SCSI implements .get_budget and .put_budget, and SCSI restarts
0212  * its queue by itself in its completion handler, so we don't need to
0213  * restart queue if .get_budget() returns BLK_STS_NO_RESOURCE.
0214  *
0215  * Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to
0216  * be run again.  This is necessary to avoid starving flushes.
0217  */
0218 static int blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
0219 {
0220     struct request_queue *q = hctx->queue;
0221     LIST_HEAD(rq_list);
0222     struct blk_mq_ctx *ctx = READ_ONCE(hctx->dispatch_from);
0223     int ret = 0;
0224     struct request *rq;
0225 
0226     do {
0227         int budget_token;
0228 
0229         if (!list_empty_careful(&hctx->dispatch)) {
0230             ret = -EAGAIN;
0231             break;
0232         }
0233 
0234         if (!sbitmap_any_bit_set(&hctx->ctx_map))
0235             break;
0236 
0237         budget_token = blk_mq_get_dispatch_budget(q);
0238         if (budget_token < 0)
0239             break;
0240 
0241         rq = blk_mq_dequeue_from_ctx(hctx, ctx);
0242         if (!rq) {
0243             blk_mq_put_dispatch_budget(q, budget_token);
0244             /*
0245              * We're releasing without dispatching. Holding the
0246              * budget could have blocked any "hctx"s with the
0247              * same queue and if we didn't dispatch then there's
0248              * no guarantee anyone will kick the queue.  Kick it
0249              * ourselves.
0250              */
0251             blk_mq_delay_run_hw_queues(q, BLK_MQ_BUDGET_DELAY);
0252             break;
0253         }
0254 
0255         blk_mq_set_rq_budget_token(rq, budget_token);
0256 
0257         /*
0258          * Now this rq owns the budget which has to be released
0259          * if this rq won't be queued to driver via .queue_rq()
0260          * in blk_mq_dispatch_rq_list().
0261          */
0262         list_add(&rq->queuelist, &rq_list);
0263 
0264         /* round robin for fair dispatch */
0265         ctx = blk_mq_next_ctx(hctx, rq->mq_ctx);
0266 
0267     } while (blk_mq_dispatch_rq_list(rq->mq_hctx, &rq_list, 1));
0268 
0269     WRITE_ONCE(hctx->dispatch_from, ctx);
0270     return ret;
0271 }
0272 
0273 static int __blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
0274 {
0275     struct request_queue *q = hctx->queue;
0276     const bool has_sched = q->elevator;
0277     int ret = 0;
0278     LIST_HEAD(rq_list);
0279 
0280     /*
0281      * If we have previous entries on our dispatch list, grab them first for
0282      * more fair dispatch.
0283      */
0284     if (!list_empty_careful(&hctx->dispatch)) {
0285         spin_lock(&hctx->lock);
0286         if (!list_empty(&hctx->dispatch))
0287             list_splice_init(&hctx->dispatch, &rq_list);
0288         spin_unlock(&hctx->lock);
0289     }
0290 
0291     /*
0292      * Only ask the scheduler for requests, if we didn't have residual
0293      * requests from the dispatch list. This is to avoid the case where
0294      * we only ever dispatch a fraction of the requests available because
0295      * of low device queue depth. Once we pull requests out of the IO
0296      * scheduler, we can no longer merge or sort them. So it's best to
0297      * leave them there for as long as we can. Mark the hw queue as
0298      * needing a restart in that case.
0299      *
0300      * We want to dispatch from the scheduler if there was nothing
0301      * on the dispatch list or we were able to dispatch from the
0302      * dispatch list.
0303      */
0304     if (!list_empty(&rq_list)) {
0305         blk_mq_sched_mark_restart_hctx(hctx);
0306         if (blk_mq_dispatch_rq_list(hctx, &rq_list, 0)) {
0307             if (has_sched)
0308                 ret = blk_mq_do_dispatch_sched(hctx);
0309             else
0310                 ret = blk_mq_do_dispatch_ctx(hctx);
0311         }
0312     } else if (has_sched) {
0313         ret = blk_mq_do_dispatch_sched(hctx);
0314     } else if (hctx->dispatch_busy) {
0315         /* dequeue request one by one from sw queue if queue is busy */
0316         ret = blk_mq_do_dispatch_ctx(hctx);
0317     } else {
0318         blk_mq_flush_busy_ctxs(hctx, &rq_list);
0319         blk_mq_dispatch_rq_list(hctx, &rq_list, 0);
0320     }
0321 
0322     return ret;
0323 }
0324 
0325 void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
0326 {
0327     struct request_queue *q = hctx->queue;
0328 
0329     /* RCU or SRCU read lock is needed before checking quiesced flag */
0330     if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)))
0331         return;
0332 
0333     hctx->run++;
0334 
0335     /*
0336      * A return of -EAGAIN is an indication that hctx->dispatch is not
0337      * empty and we must run again in order to avoid starving flushes.
0338      */
0339     if (__blk_mq_sched_dispatch_requests(hctx) == -EAGAIN) {
0340         if (__blk_mq_sched_dispatch_requests(hctx) == -EAGAIN)
0341             blk_mq_run_hw_queue(hctx, true);
0342     }
0343 }
0344 
0345 bool blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
0346         unsigned int nr_segs)
0347 {
0348     struct elevator_queue *e = q->elevator;
0349     struct blk_mq_ctx *ctx;
0350     struct blk_mq_hw_ctx *hctx;
0351     bool ret = false;
0352     enum hctx_type type;
0353 
0354     if (e && e->type->ops.bio_merge) {
0355         ret = e->type->ops.bio_merge(q, bio, nr_segs);
0356         goto out_put;
0357     }
0358 
0359     ctx = blk_mq_get_ctx(q);
0360     hctx = blk_mq_map_queue(q, bio->bi_opf, ctx);
0361     type = hctx->type;
0362     if (!(hctx->flags & BLK_MQ_F_SHOULD_MERGE) ||
0363         list_empty_careful(&ctx->rq_lists[type]))
0364         goto out_put;
0365 
0366     /* default per sw-queue merge */
0367     spin_lock(&ctx->lock);
0368     /*
0369      * Reverse check our software queue for entries that we could
0370      * potentially merge with. Currently includes a hand-wavy stop
0371      * count of 8, to not spend too much time checking for merges.
0372      */
0373     if (blk_bio_list_merge(q, &ctx->rq_lists[type], bio, nr_segs))
0374         ret = true;
0375 
0376     spin_unlock(&ctx->lock);
0377 out_put:
0378     return ret;
0379 }
0380 
0381 bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq,
0382                    struct list_head *free)
0383 {
0384     return rq_mergeable(rq) && elv_attempt_insert_merge(q, rq, free);
0385 }
0386 EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge);
0387 
0388 static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
0389                        struct request *rq)
0390 {
0391     /*
0392      * dispatch flush and passthrough rq directly
0393      *
0394      * passthrough request has to be added to hctx->dispatch directly.
0395      * For some reason, device may be in one situation which can't
0396      * handle FS request, so STS_RESOURCE is always returned and the
0397      * FS request will be added to hctx->dispatch. However passthrough
0398      * request may be required at that time for fixing the problem. If
0399      * passthrough request is added to scheduler queue, there isn't any
0400      * chance to dispatch it given we prioritize requests in hctx->dispatch.
0401      */
0402     if ((rq->rq_flags & RQF_FLUSH_SEQ) || blk_rq_is_passthrough(rq))
0403         return true;
0404 
0405     return false;
0406 }
0407 
0408 void blk_mq_sched_insert_request(struct request *rq, bool at_head,
0409                  bool run_queue, bool async)
0410 {
0411     struct request_queue *q = rq->q;
0412     struct elevator_queue *e = q->elevator;
0413     struct blk_mq_ctx *ctx = rq->mq_ctx;
0414     struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
0415 
0416     WARN_ON(e && (rq->tag != BLK_MQ_NO_TAG));
0417 
0418     if (blk_mq_sched_bypass_insert(hctx, rq)) {
0419         /*
0420          * Firstly normal IO request is inserted to scheduler queue or
0421          * sw queue, meantime we add flush request to dispatch queue(
0422          * hctx->dispatch) directly and there is at most one in-flight
0423          * flush request for each hw queue, so it doesn't matter to add
0424          * flush request to tail or front of the dispatch queue.
0425          *
0426          * Secondly in case of NCQ, flush request belongs to non-NCQ
0427          * command, and queueing it will fail when there is any
0428          * in-flight normal IO request(NCQ command). When adding flush
0429          * rq to the front of hctx->dispatch, it is easier to introduce
0430          * extra time to flush rq's latency because of S_SCHED_RESTART
0431          * compared with adding to the tail of dispatch queue, then
0432          * chance of flush merge is increased, and less flush requests
0433          * will be issued to controller. It is observed that ~10% time
0434          * is saved in blktests block/004 on disk attached to AHCI/NCQ
0435          * drive when adding flush rq to the front of hctx->dispatch.
0436          *
0437          * Simply queue flush rq to the front of hctx->dispatch so that
0438          * intensive flush workloads can benefit in case of NCQ HW.
0439          */
0440         at_head = (rq->rq_flags & RQF_FLUSH_SEQ) ? true : at_head;
0441         blk_mq_request_bypass_insert(rq, at_head, false);
0442         goto run;
0443     }
0444 
0445     if (e) {
0446         LIST_HEAD(list);
0447 
0448         list_add(&rq->queuelist, &list);
0449         e->type->ops.insert_requests(hctx, &list, at_head);
0450     } else {
0451         spin_lock(&ctx->lock);
0452         __blk_mq_insert_request(hctx, rq, at_head);
0453         spin_unlock(&ctx->lock);
0454     }
0455 
0456 run:
0457     if (run_queue)
0458         blk_mq_run_hw_queue(hctx, async);
0459 }
0460 
0461 void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx,
0462                   struct blk_mq_ctx *ctx,
0463                   struct list_head *list, bool run_queue_async)
0464 {
0465     struct elevator_queue *e;
0466     struct request_queue *q = hctx->queue;
0467 
0468     /*
0469      * blk_mq_sched_insert_requests() is called from flush plug
0470      * context only, and hold one usage counter to prevent queue
0471      * from being released.
0472      */
0473     percpu_ref_get(&q->q_usage_counter);
0474 
0475     e = hctx->queue->elevator;
0476     if (e) {
0477         e->type->ops.insert_requests(hctx, list, false);
0478     } else {
0479         /*
0480          * try to issue requests directly if the hw queue isn't
0481          * busy in case of 'none' scheduler, and this way may save
0482          * us one extra enqueue & dequeue to sw queue.
0483          */
0484         if (!hctx->dispatch_busy && !run_queue_async) {
0485             blk_mq_run_dispatch_ops(hctx->queue,
0486                 blk_mq_try_issue_list_directly(hctx, list));
0487             if (list_empty(list))
0488                 goto out;
0489         }
0490         blk_mq_insert_requests(hctx, ctx, list);
0491     }
0492 
0493     blk_mq_run_hw_queue(hctx, run_queue_async);
0494  out:
0495     percpu_ref_put(&q->q_usage_counter);
0496 }
0497 
0498 static int blk_mq_sched_alloc_map_and_rqs(struct request_queue *q,
0499                       struct blk_mq_hw_ctx *hctx,
0500                       unsigned int hctx_idx)
0501 {
0502     if (blk_mq_is_shared_tags(q->tag_set->flags)) {
0503         hctx->sched_tags = q->sched_shared_tags;
0504         return 0;
0505     }
0506 
0507     hctx->sched_tags = blk_mq_alloc_map_and_rqs(q->tag_set, hctx_idx,
0508                             q->nr_requests);
0509 
0510     if (!hctx->sched_tags)
0511         return -ENOMEM;
0512     return 0;
0513 }
0514 
0515 static void blk_mq_exit_sched_shared_tags(struct request_queue *queue)
0516 {
0517     blk_mq_free_rq_map(queue->sched_shared_tags);
0518     queue->sched_shared_tags = NULL;
0519 }
0520 
0521 /* called in queue's release handler, tagset has gone away */
0522 static void blk_mq_sched_tags_teardown(struct request_queue *q, unsigned int flags)
0523 {
0524     struct blk_mq_hw_ctx *hctx;
0525     unsigned long i;
0526 
0527     queue_for_each_hw_ctx(q, hctx, i) {
0528         if (hctx->sched_tags) {
0529             if (!blk_mq_is_shared_tags(flags))
0530                 blk_mq_free_rq_map(hctx->sched_tags);
0531             hctx->sched_tags = NULL;
0532         }
0533     }
0534 
0535     if (blk_mq_is_shared_tags(flags))
0536         blk_mq_exit_sched_shared_tags(q);
0537 }
0538 
0539 static int blk_mq_init_sched_shared_tags(struct request_queue *queue)
0540 {
0541     struct blk_mq_tag_set *set = queue->tag_set;
0542 
0543     /*
0544      * Set initial depth at max so that we don't need to reallocate for
0545      * updating nr_requests.
0546      */
0547     queue->sched_shared_tags = blk_mq_alloc_map_and_rqs(set,
0548                         BLK_MQ_NO_HCTX_IDX,
0549                         MAX_SCHED_RQ);
0550     if (!queue->sched_shared_tags)
0551         return -ENOMEM;
0552 
0553     blk_mq_tag_update_sched_shared_tags(queue);
0554 
0555     return 0;
0556 }
0557 
0558 int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
0559 {
0560     unsigned int flags = q->tag_set->flags;
0561     struct blk_mq_hw_ctx *hctx;
0562     struct elevator_queue *eq;
0563     unsigned long i;
0564     int ret;
0565 
0566     if (!e) {
0567         blk_queue_flag_clear(QUEUE_FLAG_SQ_SCHED, q);
0568         q->elevator = NULL;
0569         q->nr_requests = q->tag_set->queue_depth;
0570         return 0;
0571     }
0572 
0573     /*
0574      * Default to double of smaller one between hw queue_depth and 128,
0575      * since we don't split into sync/async like the old code did.
0576      * Additionally, this is a per-hw queue depth.
0577      */
0578     q->nr_requests = 2 * min_t(unsigned int, q->tag_set->queue_depth,
0579                    BLKDEV_DEFAULT_RQ);
0580 
0581     if (blk_mq_is_shared_tags(flags)) {
0582         ret = blk_mq_init_sched_shared_tags(q);
0583         if (ret)
0584             return ret;
0585     }
0586 
0587     queue_for_each_hw_ctx(q, hctx, i) {
0588         ret = blk_mq_sched_alloc_map_and_rqs(q, hctx, i);
0589         if (ret)
0590             goto err_free_map_and_rqs;
0591     }
0592 
0593     ret = e->ops.init_sched(q, e);
0594     if (ret)
0595         goto err_free_map_and_rqs;
0596 
0597     mutex_lock(&q->debugfs_mutex);
0598     blk_mq_debugfs_register_sched(q);
0599     mutex_unlock(&q->debugfs_mutex);
0600 
0601     queue_for_each_hw_ctx(q, hctx, i) {
0602         if (e->ops.init_hctx) {
0603             ret = e->ops.init_hctx(hctx, i);
0604             if (ret) {
0605                 eq = q->elevator;
0606                 blk_mq_sched_free_rqs(q);
0607                 blk_mq_exit_sched(q, eq);
0608                 kobject_put(&eq->kobj);
0609                 return ret;
0610             }
0611         }
0612         mutex_lock(&q->debugfs_mutex);
0613         blk_mq_debugfs_register_sched_hctx(q, hctx);
0614         mutex_unlock(&q->debugfs_mutex);
0615     }
0616 
0617     return 0;
0618 
0619 err_free_map_and_rqs:
0620     blk_mq_sched_free_rqs(q);
0621     blk_mq_sched_tags_teardown(q, flags);
0622 
0623     q->elevator = NULL;
0624     return ret;
0625 }
0626 
0627 /*
0628  * called in either blk_queue_cleanup or elevator_switch, tagset
0629  * is required for freeing requests
0630  */
0631 void blk_mq_sched_free_rqs(struct request_queue *q)
0632 {
0633     struct blk_mq_hw_ctx *hctx;
0634     unsigned long i;
0635 
0636     if (blk_mq_is_shared_tags(q->tag_set->flags)) {
0637         blk_mq_free_rqs(q->tag_set, q->sched_shared_tags,
0638                 BLK_MQ_NO_HCTX_IDX);
0639     } else {
0640         queue_for_each_hw_ctx(q, hctx, i) {
0641             if (hctx->sched_tags)
0642                 blk_mq_free_rqs(q->tag_set,
0643                         hctx->sched_tags, i);
0644         }
0645     }
0646 }
0647 
0648 void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e)
0649 {
0650     struct blk_mq_hw_ctx *hctx;
0651     unsigned long i;
0652     unsigned int flags = 0;
0653 
0654     queue_for_each_hw_ctx(q, hctx, i) {
0655         mutex_lock(&q->debugfs_mutex);
0656         blk_mq_debugfs_unregister_sched_hctx(hctx);
0657         mutex_unlock(&q->debugfs_mutex);
0658 
0659         if (e->type->ops.exit_hctx && hctx->sched_data) {
0660             e->type->ops.exit_hctx(hctx, i);
0661             hctx->sched_data = NULL;
0662         }
0663         flags = hctx->flags;
0664     }
0665 
0666     mutex_lock(&q->debugfs_mutex);
0667     blk_mq_debugfs_unregister_sched(q);
0668     mutex_unlock(&q->debugfs_mutex);
0669 
0670     if (e->type->ops.exit_sched)
0671         e->type->ops.exit_sched(e);
0672     blk_mq_sched_tags_teardown(q, flags);
0673     q->elevator = NULL;
0674 }