0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066 #include <linux/kernel.h>
0067 #include <linux/module.h>
0068 #include <linux/bio.h>
0069 #include <linux/blkdev.h>
0070 #include <linux/gfp.h>
0071 #include <linux/blk-mq.h>
0072 #include <linux/part_stat.h>
0073
0074 #include "blk.h"
0075 #include "blk-mq.h"
0076 #include "blk-mq-tag.h"
0077 #include "blk-mq-sched.h"
0078
0079
0080 enum {
0081 REQ_FSEQ_PREFLUSH = (1 << 0),
0082 REQ_FSEQ_DATA = (1 << 1),
0083 REQ_FSEQ_POSTFLUSH = (1 << 2),
0084 REQ_FSEQ_DONE = (1 << 3),
0085
0086 REQ_FSEQ_ACTIONS = REQ_FSEQ_PREFLUSH | REQ_FSEQ_DATA |
0087 REQ_FSEQ_POSTFLUSH,
0088
0089
0090
0091
0092
0093 FLUSH_PENDING_TIMEOUT = 5 * HZ,
0094 };
0095
0096 static void blk_kick_flush(struct request_queue *q,
0097 struct blk_flush_queue *fq, blk_opf_t flags);
0098
0099 static inline struct blk_flush_queue *
0100 blk_get_flush_queue(struct request_queue *q, struct blk_mq_ctx *ctx)
0101 {
0102 return blk_mq_map_queue(q, REQ_OP_FLUSH, ctx)->fq;
0103 }
0104
0105 static unsigned int blk_flush_policy(unsigned long fflags, struct request *rq)
0106 {
0107 unsigned int policy = 0;
0108
0109 if (blk_rq_sectors(rq))
0110 policy |= REQ_FSEQ_DATA;
0111
0112 if (fflags & (1UL << QUEUE_FLAG_WC)) {
0113 if (rq->cmd_flags & REQ_PREFLUSH)
0114 policy |= REQ_FSEQ_PREFLUSH;
0115 if (!(fflags & (1UL << QUEUE_FLAG_FUA)) &&
0116 (rq->cmd_flags & REQ_FUA))
0117 policy |= REQ_FSEQ_POSTFLUSH;
0118 }
0119 return policy;
0120 }
0121
0122 static unsigned int blk_flush_cur_seq(struct request *rq)
0123 {
0124 return 1 << ffz(rq->flush.seq);
0125 }
0126
0127 static void blk_flush_restore_request(struct request *rq)
0128 {
0129
0130
0131
0132
0133
0134 rq->bio = rq->biotail;
0135
0136
0137 rq->rq_flags &= ~RQF_FLUSH_SEQ;
0138 rq->end_io = rq->flush.saved_end_io;
0139 }
0140
0141 static void blk_flush_queue_rq(struct request *rq, bool add_front)
0142 {
0143 blk_mq_add_to_requeue_list(rq, add_front, true);
0144 }
0145
0146 static void blk_account_io_flush(struct request *rq)
0147 {
0148 struct block_device *part = rq->q->disk->part0;
0149
0150 part_stat_lock();
0151 part_stat_inc(part, ios[STAT_FLUSH]);
0152 part_stat_add(part, nsecs[STAT_FLUSH],
0153 ktime_get_ns() - rq->start_time_ns);
0154 part_stat_unlock();
0155 }
0156
0157
0158
0159
0160
0161
0162
0163
0164
0165
0166
0167
0168
0169
0170 static void blk_flush_complete_seq(struct request *rq,
0171 struct blk_flush_queue *fq,
0172 unsigned int seq, blk_status_t error)
0173 {
0174 struct request_queue *q = rq->q;
0175 struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx];
0176 blk_opf_t cmd_flags;
0177
0178 BUG_ON(rq->flush.seq & seq);
0179 rq->flush.seq |= seq;
0180 cmd_flags = rq->cmd_flags;
0181
0182 if (likely(!error))
0183 seq = blk_flush_cur_seq(rq);
0184 else
0185 seq = REQ_FSEQ_DONE;
0186
0187 switch (seq) {
0188 case REQ_FSEQ_PREFLUSH:
0189 case REQ_FSEQ_POSTFLUSH:
0190
0191 if (list_empty(pending))
0192 fq->flush_pending_since = jiffies;
0193 list_move_tail(&rq->flush.list, pending);
0194 break;
0195
0196 case REQ_FSEQ_DATA:
0197 list_move_tail(&rq->flush.list, &fq->flush_data_in_flight);
0198 blk_flush_queue_rq(rq, true);
0199 break;
0200
0201 case REQ_FSEQ_DONE:
0202
0203
0204
0205
0206
0207
0208 BUG_ON(!list_empty(&rq->queuelist));
0209 list_del_init(&rq->flush.list);
0210 blk_flush_restore_request(rq);
0211 blk_mq_end_request(rq, error);
0212 break;
0213
0214 default:
0215 BUG();
0216 }
0217
0218 blk_kick_flush(q, fq, cmd_flags);
0219 }
0220
0221 static void flush_end_io(struct request *flush_rq, blk_status_t error)
0222 {
0223 struct request_queue *q = flush_rq->q;
0224 struct list_head *running;
0225 struct request *rq, *n;
0226 unsigned long flags = 0;
0227 struct blk_flush_queue *fq = blk_get_flush_queue(q, flush_rq->mq_ctx);
0228
0229
0230 spin_lock_irqsave(&fq->mq_flush_lock, flags);
0231
0232 if (!req_ref_put_and_test(flush_rq)) {
0233 fq->rq_status = error;
0234 spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
0235 return;
0236 }
0237
0238 blk_account_io_flush(flush_rq);
0239
0240
0241
0242
0243
0244 WRITE_ONCE(flush_rq->state, MQ_RQ_IDLE);
0245 if (fq->rq_status != BLK_STS_OK) {
0246 error = fq->rq_status;
0247 fq->rq_status = BLK_STS_OK;
0248 }
0249
0250 if (!q->elevator) {
0251 flush_rq->tag = BLK_MQ_NO_TAG;
0252 } else {
0253 blk_mq_put_driver_tag(flush_rq);
0254 flush_rq->internal_tag = BLK_MQ_NO_TAG;
0255 }
0256
0257 running = &fq->flush_queue[fq->flush_running_idx];
0258 BUG_ON(fq->flush_pending_idx == fq->flush_running_idx);
0259
0260
0261 fq->flush_running_idx ^= 1;
0262
0263
0264 list_for_each_entry_safe(rq, n, running, flush.list) {
0265 unsigned int seq = blk_flush_cur_seq(rq);
0266
0267 BUG_ON(seq != REQ_FSEQ_PREFLUSH && seq != REQ_FSEQ_POSTFLUSH);
0268 blk_flush_complete_seq(rq, fq, seq, error);
0269 }
0270
0271 spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
0272 }
0273
0274 bool is_flush_rq(struct request *rq)
0275 {
0276 return rq->end_io == flush_end_io;
0277 }
0278
0279
0280
0281
0282
0283
0284
0285
0286
0287
0288
0289
0290
0291
0292 static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
0293 blk_opf_t flags)
0294 {
0295 struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx];
0296 struct request *first_rq =
0297 list_first_entry(pending, struct request, flush.list);
0298 struct request *flush_rq = fq->flush_rq;
0299
0300
0301 if (fq->flush_pending_idx != fq->flush_running_idx || list_empty(pending))
0302 return;
0303
0304
0305 if (!list_empty(&fq->flush_data_in_flight) &&
0306 time_before(jiffies,
0307 fq->flush_pending_since + FLUSH_PENDING_TIMEOUT))
0308 return;
0309
0310
0311
0312
0313
0314 fq->flush_pending_idx ^= 1;
0315
0316 blk_rq_init(q, flush_rq);
0317
0318
0319
0320
0321
0322
0323
0324
0325
0326 flush_rq->mq_ctx = first_rq->mq_ctx;
0327 flush_rq->mq_hctx = first_rq->mq_hctx;
0328
0329 if (!q->elevator) {
0330 flush_rq->tag = first_rq->tag;
0331
0332
0333
0334
0335
0336
0337 flush_rq->rq_flags |= RQF_MQ_INFLIGHT;
0338 } else
0339 flush_rq->internal_tag = first_rq->internal_tag;
0340
0341 flush_rq->cmd_flags = REQ_OP_FLUSH | REQ_PREFLUSH;
0342 flush_rq->cmd_flags |= (flags & REQ_DRV) | (flags & REQ_FAILFAST_MASK);
0343 flush_rq->rq_flags |= RQF_FLUSH_SEQ;
0344 flush_rq->end_io = flush_end_io;
0345
0346
0347
0348
0349
0350
0351 smp_wmb();
0352 req_ref_set(flush_rq, 1);
0353
0354 blk_flush_queue_rq(flush_rq, false);
0355 }
0356
0357 static void mq_flush_data_end_io(struct request *rq, blk_status_t error)
0358 {
0359 struct request_queue *q = rq->q;
0360 struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
0361 struct blk_mq_ctx *ctx = rq->mq_ctx;
0362 unsigned long flags;
0363 struct blk_flush_queue *fq = blk_get_flush_queue(q, ctx);
0364
0365 if (q->elevator) {
0366 WARN_ON(rq->tag < 0);
0367 blk_mq_put_driver_tag(rq);
0368 }
0369
0370
0371
0372
0373
0374 spin_lock_irqsave(&fq->mq_flush_lock, flags);
0375 blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error);
0376 spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
0377
0378 blk_mq_sched_restart(hctx);
0379 }
0380
0381
0382
0383
0384
0385
0386
0387
0388
0389
0390 void blk_insert_flush(struct request *rq)
0391 {
0392 struct request_queue *q = rq->q;
0393 unsigned long fflags = q->queue_flags;
0394 unsigned int policy = blk_flush_policy(fflags, rq);
0395 struct blk_flush_queue *fq = blk_get_flush_queue(q, rq->mq_ctx);
0396
0397
0398
0399
0400
0401 rq->cmd_flags &= ~REQ_PREFLUSH;
0402 if (!(fflags & (1UL << QUEUE_FLAG_FUA)))
0403 rq->cmd_flags &= ~REQ_FUA;
0404
0405
0406
0407
0408
0409
0410 rq->cmd_flags |= REQ_SYNC;
0411
0412
0413
0414
0415
0416
0417
0418 if (!policy) {
0419 blk_mq_end_request(rq, 0);
0420 return;
0421 }
0422
0423 BUG_ON(rq->bio != rq->biotail);
0424
0425
0426
0427
0428
0429
0430 if ((policy & REQ_FSEQ_DATA) &&
0431 !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
0432 blk_mq_request_bypass_insert(rq, false, true);
0433 return;
0434 }
0435
0436
0437
0438
0439
0440 memset(&rq->flush, 0, sizeof(rq->flush));
0441 INIT_LIST_HEAD(&rq->flush.list);
0442 rq->rq_flags |= RQF_FLUSH_SEQ;
0443 rq->flush.saved_end_io = rq->end_io;
0444
0445 rq->end_io = mq_flush_data_end_io;
0446
0447 spin_lock_irq(&fq->mq_flush_lock);
0448 blk_flush_complete_seq(rq, fq, REQ_FSEQ_ACTIONS & ~policy, 0);
0449 spin_unlock_irq(&fq->mq_flush_lock);
0450 }
0451
0452
0453
0454
0455
0456
0457
0458
0459 int blkdev_issue_flush(struct block_device *bdev)
0460 {
0461 struct bio bio;
0462
0463 bio_init(&bio, bdev, NULL, 0, REQ_OP_WRITE | REQ_PREFLUSH);
0464 return submit_bio_wait(&bio);
0465 }
0466 EXPORT_SYMBOL(blkdev_issue_flush);
0467
0468 struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size,
0469 gfp_t flags)
0470 {
0471 struct blk_flush_queue *fq;
0472 int rq_sz = sizeof(struct request);
0473
0474 fq = kzalloc_node(sizeof(*fq), flags, node);
0475 if (!fq)
0476 goto fail;
0477
0478 spin_lock_init(&fq->mq_flush_lock);
0479
0480 rq_sz = round_up(rq_sz + cmd_size, cache_line_size());
0481 fq->flush_rq = kzalloc_node(rq_sz, flags, node);
0482 if (!fq->flush_rq)
0483 goto fail_rq;
0484
0485 INIT_LIST_HEAD(&fq->flush_queue[0]);
0486 INIT_LIST_HEAD(&fq->flush_queue[1]);
0487 INIT_LIST_HEAD(&fq->flush_data_in_flight);
0488
0489 return fq;
0490
0491 fail_rq:
0492 kfree(fq);
0493 fail:
0494 return NULL;
0495 }
0496
0497 void blk_free_flush_queue(struct blk_flush_queue *fq)
0498 {
0499
0500 if (!fq)
0501 return;
0502
0503 kfree(fq->flush_rq);
0504 kfree(fq);
0505 }
0506
0507
0508
0509
0510
0511
0512
0513
0514
0515
0516
0517
0518
0519
0520
0521
0522
0523
0524
0525 void blk_mq_hctx_set_fq_lock_class(struct blk_mq_hw_ctx *hctx,
0526 struct lock_class_key *key)
0527 {
0528 lockdep_set_class(&hctx->fq->mq_flush_lock, key);
0529 }
0530 EXPORT_SYMBOL_GPL(blk_mq_hctx_set_fq_lock_class);