the-tree/block/blk-mq.h

0001 /* SPDX-License-Identifier: GPL-2.0 */
0002 #ifndef INT_BLK_MQ_H
0003 #define INT_BLK_MQ_H
0004
0005 #include "blk-stat.h"
0006 #include "blk-mq-tag.h"
0007
0008 struct blk_mq_tag_set;
0009
0010 struct blk_mq_ctxs {
0011     struct kobject kobj;
0012     struct blk_mq_ctx __percpu  *queue_ctx;
0013 };
0014
0015 /**
0016  * struct blk_mq_ctx - State for a software queue facing the submitting CPUs
0017  */
0018 struct blk_mq_ctx {
0019     struct {
0020         spinlock_t      lock;
0021         struct list_head    rq_lists[HCTX_MAX_TYPES];
0022     } ____cacheline_aligned_in_smp;
0023
0024     unsigned int        cpu;
0025     unsigned short      index_hw[HCTX_MAX_TYPES];
0026     struct blk_mq_hw_ctx    *hctxs[HCTX_MAX_TYPES];
0027
0028     struct request_queue    *queue;
0029     struct blk_mq_ctxs      *ctxs;
0030     struct kobject      kobj;
0031 } ____cacheline_aligned_in_smp;
0032
0033 void blk_mq_submit_bio(struct bio *bio);
0034 int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch *iob,
0035         unsigned int flags);
0036 void blk_mq_exit_queue(struct request_queue *q);
0037 int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
0038 void blk_mq_wake_waiters(struct request_queue *q);
0039 bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *,
0040                  unsigned int);
0041 void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
0042                 bool kick_requeue_list);
0043 void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list);
0044 struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx,
0045                     struct blk_mq_ctx *start);
0046 void blk_mq_put_rq_ref(struct request *rq);
0047
0048 /*
0049  * Internal helpers for allocating/freeing the request map
0050  */
0051 void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
0052              unsigned int hctx_idx);
0053 void blk_mq_free_rq_map(struct blk_mq_tags *tags);
0054 struct blk_mq_tags *blk_mq_alloc_map_and_rqs(struct blk_mq_tag_set *set,
0055                 unsigned int hctx_idx, unsigned int depth);
0056 void blk_mq_free_map_and_rqs(struct blk_mq_tag_set *set,
0057                  struct blk_mq_tags *tags,
0058                  unsigned int hctx_idx);
0059 /*
0060  * Internal helpers for request insertion into sw queues
0061  */
0062 void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
0063                 bool at_head);
0064 void blk_mq_request_bypass_insert(struct request *rq, bool at_head,
0065                   bool run_queue);
0066 void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
0067                 struct list_head *list);
0068 void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
0069                     struct list_head *list);
0070
0071 /*
0072  * CPU -> queue mappings
0073  */
0074 extern int blk_mq_hw_queue_to_node(struct blk_mq_queue_map *qmap, unsigned int);
0075
0076 /*
0077  * blk_mq_map_queue_type() - map (hctx_type,cpu) to hardware queue
0078  * @q: request queue
0079  * @type: the hctx type index
0080  * @cpu: CPU
0081  */
0082 static inline struct blk_mq_hw_ctx *blk_mq_map_queue_type(struct request_queue *q,
0083                               enum hctx_type type,
0084                               unsigned int cpu)
0085 {
0086     return xa_load(&q->hctx_table, q->tag_set->map[type].mq_map[cpu]);
0087 }
0088
0089 static inline enum hctx_type blk_mq_get_hctx_type(blk_opf_t opf)
0090 {
0091     enum hctx_type type = HCTX_TYPE_DEFAULT;
0092
0093     /*
0094      * The caller ensure that if REQ_POLLED, poll must be enabled.
0095      */
0096     if (opf & REQ_POLLED)
0097         type = HCTX_TYPE_POLL;
0098     else if ((opf & REQ_OP_MASK) == REQ_OP_READ)
0099         type = HCTX_TYPE_READ;
0100     return type;
0101 }
0102
0103 /*
0104  * blk_mq_map_queue() - map (cmd_flags,type) to hardware queue
0105  * @q: request queue
0106  * @opf: operation type (REQ_OP_*) and flags (e.g. REQ_POLLED).
0107  * @ctx: software queue cpu ctx
0108  */
0109 static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q,
0110                              blk_opf_t opf,
0111                              struct blk_mq_ctx *ctx)
0112 {
0113     return ctx->hctxs[blk_mq_get_hctx_type(opf)];
0114 }
0115
0116 /*
0117  * sysfs helpers
0118  */
0119 extern void blk_mq_sysfs_init(struct request_queue *q);
0120 extern void blk_mq_sysfs_deinit(struct request_queue *q);
0121 int blk_mq_sysfs_register(struct gendisk *disk);
0122 void blk_mq_sysfs_unregister(struct gendisk *disk);
0123 int blk_mq_sysfs_register_hctxs(struct request_queue *q);
0124 void blk_mq_sysfs_unregister_hctxs(struct request_queue *q);
0125 extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx);
0126 void blk_mq_free_plug_rqs(struct blk_plug *plug);
0127 void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule);
0128
0129 void blk_mq_cancel_work_sync(struct request_queue *q);
0130
0131 void blk_mq_release(struct request_queue *q);
0132
0133 static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q,
0134                        unsigned int cpu)
0135 {
0136     return per_cpu_ptr(q->queue_ctx, cpu);
0137 }
0138
0139 /*
0140  * This assumes per-cpu software queueing queues. They could be per-node
0141  * as well, for instance. For now this is hardcoded as-is. Note that we don't
0142  * care about preemption, since we know the ctx's are persistent. This does
0143  * mean that we can't rely on ctx always matching the currently running CPU.
0144  */
0145 static inline struct blk_mq_ctx *blk_mq_get_ctx(struct request_queue *q)
0146 {
0147     return __blk_mq_get_ctx(q, raw_smp_processor_id());
0148 }
0149
0150 struct blk_mq_alloc_data {
0151     /* input parameter */
0152     struct request_queue *q;
0153     blk_mq_req_flags_t flags;
0154     unsigned int shallow_depth;
0155     blk_opf_t cmd_flags;
0156     req_flags_t rq_flags;
0157
0158     /* allocate multiple requests/tags in one go */
0159     unsigned int nr_tags;
0160     struct request **cached_rq;
0161
0162     /* input & output parameter */
0163     struct blk_mq_ctx *ctx;
0164     struct blk_mq_hw_ctx *hctx;
0165 };
0166
0167 static inline bool blk_mq_is_shared_tags(unsigned int flags)
0168 {
0169     return flags & BLK_MQ_F_TAG_HCTX_SHARED;
0170 }
0171
0172 static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data *data)
0173 {
0174     if (!(data->rq_flags & RQF_ELV))
0175         return data->hctx->tags;
0176     return data->hctx->sched_tags;
0177 }
0178
0179 static inline bool blk_mq_hctx_stopped(struct blk_mq_hw_ctx *hctx)
0180 {
0181     return test_bit(BLK_MQ_S_STOPPED, &hctx->state);
0182 }
0183
0184 static inline bool blk_mq_hw_queue_mapped(struct blk_mq_hw_ctx *hctx)
0185 {
0186     return hctx->nr_ctx && hctx->tags;
0187 }
0188
0189 unsigned int blk_mq_in_flight(struct request_queue *q,
0190         struct block_device *part);
0191 void blk_mq_in_flight_rw(struct request_queue *q, struct block_device *part,
0192         unsigned int inflight[2]);
0193
0194 static inline void blk_mq_put_dispatch_budget(struct request_queue *q,
0195                           int budget_token)
0196 {
0197     if (q->mq_ops->put_budget)
0198         q->mq_ops->put_budget(q, budget_token);
0199 }
0200
0201 static inline int blk_mq_get_dispatch_budget(struct request_queue *q)
0202 {
0203     if (q->mq_ops->get_budget)
0204         return q->mq_ops->get_budget(q);
0205     return 0;
0206 }
0207
0208 static inline void blk_mq_set_rq_budget_token(struct request *rq, int token)
0209 {
0210     if (token < 0)
0211         return;
0212
0213     if (rq->q->mq_ops->set_rq_budget_token)
0214         rq->q->mq_ops->set_rq_budget_token(rq, token);
0215 }
0216
0217 static inline int blk_mq_get_rq_budget_token(struct request *rq)
0218 {
0219     if (rq->q->mq_ops->get_rq_budget_token)
0220         return rq->q->mq_ops->get_rq_budget_token(rq);
0221     return -1;
0222 }
0223
0224 static inline void __blk_mq_inc_active_requests(struct blk_mq_hw_ctx *hctx)
0225 {
0226     if (blk_mq_is_shared_tags(hctx->flags))
0227         atomic_inc(&hctx->queue->nr_active_requests_shared_tags);
0228     else
0229         atomic_inc(&hctx->nr_active);
0230 }
0231
0232 static inline void __blk_mq_sub_active_requests(struct blk_mq_hw_ctx *hctx,
0233         int val)
0234 {
0235     if (blk_mq_is_shared_tags(hctx->flags))
0236         atomic_sub(val, &hctx->queue->nr_active_requests_shared_tags);
0237     else
0238         atomic_sub(val, &hctx->nr_active);
0239 }
0240
0241 static inline void __blk_mq_dec_active_requests(struct blk_mq_hw_ctx *hctx)
0242 {
0243     __blk_mq_sub_active_requests(hctx, 1);
0244 }
0245
0246 static inline int __blk_mq_active_requests(struct blk_mq_hw_ctx *hctx)
0247 {
0248     if (blk_mq_is_shared_tags(hctx->flags))
0249         return atomic_read(&hctx->queue->nr_active_requests_shared_tags);
0250     return atomic_read(&hctx->nr_active);
0251 }
0252 static inline void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx,
0253                        struct request *rq)
0254 {
0255     blk_mq_put_tag(hctx->tags, rq->mq_ctx, rq->tag);
0256     rq->tag = BLK_MQ_NO_TAG;
0257
0258     if (rq->rq_flags & RQF_MQ_INFLIGHT) {
0259         rq->rq_flags &= ~RQF_MQ_INFLIGHT;
0260         __blk_mq_dec_active_requests(hctx);
0261     }
0262 }
0263
0264 static inline void blk_mq_put_driver_tag(struct request *rq)
0265 {
0266     if (rq->tag == BLK_MQ_NO_TAG || rq->internal_tag == BLK_MQ_NO_TAG)
0267         return;
0268
0269     __blk_mq_put_driver_tag(rq->mq_hctx, rq);
0270 }
0271
0272 bool __blk_mq_get_driver_tag(struct blk_mq_hw_ctx *hctx, struct request *rq);
0273
0274 static inline bool blk_mq_get_driver_tag(struct request *rq)
0275 {
0276     struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
0277
0278     if (rq->tag != BLK_MQ_NO_TAG &&
0279         !(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) {
0280         hctx->tags->rqs[rq->tag] = rq;
0281         return true;
0282     }
0283
0284     return __blk_mq_get_driver_tag(hctx, rq);
0285 }
0286
0287 static inline void blk_mq_clear_mq_map(struct blk_mq_queue_map *qmap)
0288 {
0289     int cpu;
0290
0291     for_each_possible_cpu(cpu)
0292         qmap->mq_map[cpu] = 0;
0293 }
0294
0295 /*
0296  * blk_mq_plug() - Get caller context plug
0297  * @bio : the bio being submitted by the caller context
0298  *
0299  * Plugging, by design, may delay the insertion of BIOs into the elevator in
0300  * order to increase BIO merging opportunities. This however can cause BIO
0301  * insertion order to change from the order in which submit_bio() is being
0302  * executed in the case of multiple contexts concurrently issuing BIOs to a
0303  * device, even if these context are synchronized to tightly control BIO issuing
0304  * order. While this is not a problem with regular block devices, this ordering
0305  * change can cause write BIO failures with zoned block devices as these
0306  * require sequential write patterns to zones. Prevent this from happening by
0307  * ignoring the plug state of a BIO issuing context if it is for a zoned block
0308  * device and the BIO to plug is a write operation.
0309  *
0310  * Return current->plug if the bio can be plugged and NULL otherwise
0311  */
0312 static inline struct blk_plug *blk_mq_plug( struct bio *bio)
0313 {
0314     /* Zoned block device write operation case: do not plug the BIO */
0315     if (bdev_is_zoned(bio->bi_bdev) && op_is_write(bio_op(bio)))
0316         return NULL;
0317
0318     /*
0319      * For regular block devices or read operations, use the context plug
0320      * which may be NULL if blk_start_plug() was not executed.
0321      */
0322     return current->plug;
0323 }
0324
0325 /* Free all requests on the list */
0326 static inline void blk_mq_free_requests(struct list_head *list)
0327 {
0328     while (!list_empty(list)) {
0329         struct request *rq = list_entry_rq(list->next);
0330
0331         list_del_init(&rq->queuelist);
0332         blk_mq_free_request(rq);
0333     }
0334 }
0335
0336 /*
0337  * For shared tag users, we track the number of currently active users
0338  * and attempt to provide a fair share of the tag depth for each of them.
0339  */
0340 static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
0341                   struct sbitmap_queue *bt)
0342 {
0343     unsigned int depth, users;
0344
0345     if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED))
0346         return true;
0347
0348     /*
0349      * Don't try dividing an ant
0350      */
0351     if (bt->sb.depth == 1)
0352         return true;
0353
0354     if (blk_mq_is_shared_tags(hctx->flags)) {
0355         struct request_queue *q = hctx->queue;
0356
0357         if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
0358             return true;
0359     } else {
0360         if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
0361             return true;
0362     }
0363
0364     users = atomic_read(&hctx->tags->active_queues);
0365
0366     if (!users)
0367         return true;
0368
0369     /*
0370      * Allow at least some tags
0371      */
0372     depth = max((bt->sb.depth + users - 1) / users, 4U);
0373     return __blk_mq_active_requests(hctx) < depth;
0374 }
0375
0376 /* run the code block in @dispatch_ops with rcu/srcu read lock held */
0377 #define __blk_mq_run_dispatch_ops(q, check_sleep, dispatch_ops) \
0378 do {                                \
0379     if (!blk_queue_has_srcu(q)) {               \
0380         rcu_read_lock();                \
0381         (dispatch_ops);                 \
0382         rcu_read_unlock();              \
0383     } else {                        \
0384         int srcu_idx;                   \
0385                                 \
0386         might_sleep_if(check_sleep);            \
0387         srcu_idx = srcu_read_lock((q)->srcu);       \
0388         (dispatch_ops);                 \
0389         srcu_read_unlock((q)->srcu, srcu_idx);      \
0390     }                           \
0391 } while (0)
0392
0393 #define blk_mq_run_dispatch_ops(q, dispatch_ops)        \
0394     __blk_mq_run_dispatch_ops(q, true, dispatch_ops)    \
0395
0396 #endif