the-tree/block/blk-merge.c

0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * Functions related to segment and merge handling
0004  */
0005 #include <linux/kernel.h>
0006 #include <linux/module.h>
0007 #include <linux/bio.h>
0008 #include <linux/blkdev.h>
0009 #include <linux/blk-integrity.h>
0010 #include <linux/scatterlist.h>
0011 #include <linux/part_stat.h>
0012 #include <linux/blk-cgroup.h>
0013
0014 #include <trace/events/block.h>
0015
0016 #include "blk.h"
0017 #include "blk-mq-sched.h"
0018 #include "blk-rq-qos.h"
0019 #include "blk-throttle.h"
0020
0021 static inline void bio_get_first_bvec(struct bio *bio, struct bio_vec *bv)
0022 {
0023     *bv = mp_bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
0024 }
0025
0026 static inline void bio_get_last_bvec(struct bio *bio, struct bio_vec *bv)
0027 {
0028     struct bvec_iter iter = bio->bi_iter;
0029     int idx;
0030
0031     bio_get_first_bvec(bio, bv);
0032     if (bv->bv_len == bio->bi_iter.bi_size)
0033         return;     /* this bio only has a single bvec */
0034
0035     bio_advance_iter(bio, &iter, iter.bi_size);
0036
0037     if (!iter.bi_bvec_done)
0038         idx = iter.bi_idx - 1;
0039     else    /* in the middle of bvec */
0040         idx = iter.bi_idx;
0041
0042     *bv = bio->bi_io_vec[idx];
0043
0044     /*
0045      * iter.bi_bvec_done records actual length of the last bvec
0046      * if this bio ends in the middle of one io vector
0047      */
0048     if (iter.bi_bvec_done)
0049         bv->bv_len = iter.bi_bvec_done;
0050 }
0051
0052 static inline bool bio_will_gap(struct request_queue *q,
0053         struct request *prev_rq, struct bio *prev, struct bio *next)
0054 {
0055     struct bio_vec pb, nb;
0056
0057     if (!bio_has_data(prev) || !queue_virt_boundary(q))
0058         return false;
0059
0060     /*
0061      * Don't merge if the 1st bio starts with non-zero offset, otherwise it
0062      * is quite difficult to respect the sg gap limit.  We work hard to
0063      * merge a huge number of small single bios in case of mkfs.
0064      */
0065     if (prev_rq)
0066         bio_get_first_bvec(prev_rq->bio, &pb);
0067     else
0068         bio_get_first_bvec(prev, &pb);
0069     if (pb.bv_offset & queue_virt_boundary(q))
0070         return true;
0071
0072     /*
0073      * We don't need to worry about the situation that the merged segment
0074      * ends in unaligned virt boundary:
0075      *
0076      * - if 'pb' ends aligned, the merged segment ends aligned
0077      * - if 'pb' ends unaligned, the next bio must include
0078      *   one single bvec of 'nb', otherwise the 'nb' can't
0079      *   merge with 'pb'
0080      */
0081     bio_get_last_bvec(prev, &pb);
0082     bio_get_first_bvec(next, &nb);
0083     if (biovec_phys_mergeable(q, &pb, &nb))
0084         return false;
0085     return __bvec_gap_to_prev(&q->limits, &pb, nb.bv_offset);
0086 }
0087
0088 static inline bool req_gap_back_merge(struct request *req, struct bio *bio)
0089 {
0090     return bio_will_gap(req->q, req, req->biotail, bio);
0091 }
0092
0093 static inline bool req_gap_front_merge(struct request *req, struct bio *bio)
0094 {
0095     return bio_will_gap(req->q, NULL, bio, req->bio);
0096 }
0097
0098 /*
0099  * The max size one bio can handle is UINT_MAX becasue bvec_iter.bi_size
0100  * is defined as 'unsigned int', meantime it has to be aligned to with the
0101  * logical block size, which is the minimum accepted unit by hardware.
0102  */
0103 static unsigned int bio_allowed_max_sectors(struct queue_limits *lim)
0104 {
0105     return round_down(UINT_MAX, lim->logical_block_size) >> SECTOR_SHIFT;
0106 }
0107
0108 static struct bio *bio_split_discard(struct bio *bio, struct queue_limits *lim,
0109         unsigned *nsegs, struct bio_set *bs)
0110 {
0111     unsigned int max_discard_sectors, granularity;
0112     sector_t tmp;
0113     unsigned split_sectors;
0114
0115     *nsegs = 1;
0116
0117     /* Zero-sector (unknown) and one-sector granularities are the same.  */
0118     granularity = max(lim->discard_granularity >> 9, 1U);
0119
0120     max_discard_sectors =
0121         min(lim->max_discard_sectors, bio_allowed_max_sectors(lim));
0122     max_discard_sectors -= max_discard_sectors % granularity;
0123
0124     if (unlikely(!max_discard_sectors)) {
0125         /* XXX: warn */
0126         return NULL;
0127     }
0128
0129     if (bio_sectors(bio) <= max_discard_sectors)
0130         return NULL;
0131
0132     split_sectors = max_discard_sectors;
0133
0134     /*
0135      * If the next starting sector would be misaligned, stop the discard at
0136      * the previous aligned sector.
0137      */
0138     tmp = bio->bi_iter.bi_sector + split_sectors -
0139         ((lim->discard_alignment >> 9) % granularity);
0140     tmp = sector_div(tmp, granularity);
0141
0142     if (split_sectors > tmp)
0143         split_sectors -= tmp;
0144
0145     return bio_split(bio, split_sectors, GFP_NOIO, bs);
0146 }
0147
0148 static struct bio *bio_split_write_zeroes(struct bio *bio,
0149         struct queue_limits *lim, unsigned *nsegs, struct bio_set *bs)
0150 {
0151     *nsegs = 0;
0152     if (!lim->max_write_zeroes_sectors)
0153         return NULL;
0154     if (bio_sectors(bio) <= lim->max_write_zeroes_sectors)
0155         return NULL;
0156     return bio_split(bio, lim->max_write_zeroes_sectors, GFP_NOIO, bs);
0157 }
0158
0159 /*
0160  * Return the maximum number of sectors from the start of a bio that may be
0161  * submitted as a single request to a block device. If enough sectors remain,
0162  * align the end to the physical block size. Otherwise align the end to the
0163  * logical block size. This approach minimizes the number of non-aligned
0164  * requests that are submitted to a block device if the start of a bio is not
0165  * aligned to a physical block boundary.
0166  */
0167 static inline unsigned get_max_io_size(struct bio *bio,
0168         struct queue_limits *lim)
0169 {
0170     unsigned pbs = lim->physical_block_size >> SECTOR_SHIFT;
0171     unsigned lbs = lim->logical_block_size >> SECTOR_SHIFT;
0172     unsigned max_sectors = lim->max_sectors, start, end;
0173
0174     if (lim->chunk_sectors) {
0175         max_sectors = min(max_sectors,
0176             blk_chunk_sectors_left(bio->bi_iter.bi_sector,
0177                            lim->chunk_sectors));
0178     }
0179
0180     start = bio->bi_iter.bi_sector & (pbs - 1);
0181     end = (start + max_sectors) & ~(pbs - 1);
0182     if (end > start)
0183         return end - start;
0184     return max_sectors & ~(lbs - 1);
0185 }
0186
0187 static inline unsigned get_max_segment_size(struct queue_limits *lim,
0188         struct page *start_page, unsigned long offset)
0189 {
0190     unsigned long mask = lim->seg_boundary_mask;
0191
0192     offset = mask & (page_to_phys(start_page) + offset);
0193
0194     /*
0195      * overflow may be triggered in case of zero page physical address
0196      * on 32bit arch, use queue's max segment size when that happens.
0197      */
0198     return min_not_zero(mask - offset + 1,
0199             (unsigned long)lim->max_segment_size);
0200 }
0201
0202 /**
0203  * bvec_split_segs - verify whether or not a bvec should be split in the middle
0204  * @lim:      [in] queue limits to split based on
0205  * @bv:       [in] bvec to examine
0206  * @nsegs:    [in,out] Number of segments in the bio being built. Incremented
0207  *            by the number of segments from @bv that may be appended to that
0208  *            bio without exceeding @max_segs
0209  * @bytes:    [in,out] Number of bytes in the bio being built. Incremented
0210  *            by the number of bytes from @bv that may be appended to that
0211  *            bio without exceeding @max_bytes
0212  * @max_segs: [in] upper bound for *@nsegs
0213  * @max_bytes: [in] upper bound for *@bytes
0214  *
0215  * When splitting a bio, it can happen that a bvec is encountered that is too
0216  * big to fit in a single segment and hence that it has to be split in the
0217  * middle. This function verifies whether or not that should happen. The value
0218  * %true is returned if and only if appending the entire @bv to a bio with
0219  * *@nsegs segments and *@sectors sectors would make that bio unacceptable for
0220  * the block driver.
0221  */
0222 static bool bvec_split_segs(struct queue_limits *lim, const struct bio_vec *bv,
0223         unsigned *nsegs, unsigned *bytes, unsigned max_segs,
0224         unsigned max_bytes)
0225 {
0226     unsigned max_len = min(max_bytes, UINT_MAX) - *bytes;
0227     unsigned len = min(bv->bv_len, max_len);
0228     unsigned total_len = 0;
0229     unsigned seg_size = 0;
0230
0231     while (len && *nsegs < max_segs) {
0232         seg_size = get_max_segment_size(lim, bv->bv_page,
0233                         bv->bv_offset + total_len);
0234         seg_size = min(seg_size, len);
0235
0236         (*nsegs)++;
0237         total_len += seg_size;
0238         len -= seg_size;
0239
0240         if ((bv->bv_offset + total_len) & lim->virt_boundary_mask)
0241             break;
0242     }
0243
0244     *bytes += total_len;
0245
0246     /* tell the caller to split the bvec if it is too big to fit */
0247     return len > 0 || bv->bv_len > max_len;
0248 }
0249
0250 /**
0251  * bio_split_rw - split a bio in two bios
0252  * @bio:  [in] bio to be split
0253  * @lim:  [in] queue limits to split based on
0254  * @segs: [out] number of segments in the bio with the first half of the sectors
0255  * @bs:   [in] bio set to allocate the clone from
0256  * @max_bytes: [in] maximum number of bytes per bio
0257  *
0258  * Clone @bio, update the bi_iter of the clone to represent the first sectors
0259  * of @bio and update @bio->bi_iter to represent the remaining sectors. The
0260  * following is guaranteed for the cloned bio:
0261  * - That it has at most @max_bytes worth of data
0262  * - That it has at most queue_max_segments(@q) segments.
0263  *
0264  * Except for discard requests the cloned bio will point at the bi_io_vec of
0265  * the original bio. It is the responsibility of the caller to ensure that the
0266  * original bio is not freed before the cloned bio. The caller is also
0267  * responsible for ensuring that @bs is only destroyed after processing of the
0268  * split bio has finished.
0269  */
0270 static struct bio *bio_split_rw(struct bio *bio, struct queue_limits *lim,
0271         unsigned *segs, struct bio_set *bs, unsigned max_bytes)
0272 {
0273     struct bio_vec bv, bvprv, *bvprvp = NULL;
0274     struct bvec_iter iter;
0275     unsigned nsegs = 0, bytes = 0;
0276
0277     bio_for_each_bvec(bv, bio, iter) {
0278         /*
0279          * If the queue doesn't support SG gaps and adding this
0280          * offset would create a gap, disallow it.
0281          */
0282         if (bvprvp && bvec_gap_to_prev(lim, bvprvp, bv.bv_offset))
0283             goto split;
0284
0285         if (nsegs < lim->max_segments &&
0286             bytes + bv.bv_len <= max_bytes &&
0287             bv.bv_offset + bv.bv_len <= PAGE_SIZE) {
0288             nsegs++;
0289             bytes += bv.bv_len;
0290         } else {
0291             if (bvec_split_segs(lim, &bv, &nsegs, &bytes,
0292                     lim->max_segments, max_bytes))
0293                 goto split;
0294         }
0295
0296         bvprv = bv;
0297         bvprvp = &bvprv;
0298     }
0299
0300     *segs = nsegs;
0301     return NULL;
0302 split:
0303     *segs = nsegs;
0304
0305     /*
0306      * Individual bvecs might not be logical block aligned. Round down the
0307      * split size so that each bio is properly block size aligned, even if
0308      * we do not use the full hardware limits.
0309      */
0310     bytes = ALIGN_DOWN(bytes, lim->logical_block_size);
0311
0312     /*
0313      * Bio splitting may cause subtle trouble such as hang when doing sync
0314      * iopoll in direct IO routine. Given performance gain of iopoll for
0315      * big IO can be trival, disable iopoll when split needed.
0316      */
0317     bio_clear_polled(bio);
0318     return bio_split(bio, bytes >> SECTOR_SHIFT, GFP_NOIO, bs);
0319 }
0320
0321 /**
0322  * __bio_split_to_limits - split a bio to fit the queue limits
0323  * @bio:     bio to be split
0324  * @lim:     queue limits to split based on
0325  * @nr_segs: returns the number of segments in the returned bio
0326  *
0327  * Check if @bio needs splitting based on the queue limits, and if so split off
0328  * a bio fitting the limits from the beginning of @bio and return it.  @bio is
0329  * shortened to the remainder and re-submitted.
0330  *
0331  * The split bio is allocated from @q->bio_split, which is provided by the
0332  * block layer.
0333  */
0334 struct bio *__bio_split_to_limits(struct bio *bio, struct queue_limits *lim,
0335                unsigned int *nr_segs)
0336 {
0337     struct bio_set *bs = &bio->bi_bdev->bd_disk->bio_split;
0338     struct bio *split;
0339
0340     switch (bio_op(bio)) {
0341     case REQ_OP_DISCARD:
0342     case REQ_OP_SECURE_ERASE:
0343         split = bio_split_discard(bio, lim, nr_segs, bs);
0344         break;
0345     case REQ_OP_WRITE_ZEROES:
0346         split = bio_split_write_zeroes(bio, lim, nr_segs, bs);
0347         break;
0348     default:
0349         split = bio_split_rw(bio, lim, nr_segs, bs,
0350                 get_max_io_size(bio, lim) << SECTOR_SHIFT);
0351         break;
0352     }
0353
0354     if (split) {
0355         /* there isn't chance to merge the splitted bio */
0356         split->bi_opf |= REQ_NOMERGE;
0357
0358         blkcg_bio_issue_init(split);
0359         bio_chain(split, bio);
0360         trace_block_split(split, bio->bi_iter.bi_sector);
0361         submit_bio_noacct(bio);
0362         return split;
0363     }
0364     return bio;
0365 }
0366
0367 /**
0368  * bio_split_to_limits - split a bio to fit the queue limits
0369  * @bio:     bio to be split
0370  *
0371  * Check if @bio needs splitting based on the queue limits of @bio->bi_bdev, and
0372  * if so split off a bio fitting the limits from the beginning of @bio and
0373  * return it.  @bio is shortened to the remainder and re-submitted.
0374  *
0375  * The split bio is allocated from @q->bio_split, which is provided by the
0376  * block layer.
0377  */
0378 struct bio *bio_split_to_limits(struct bio *bio)
0379 {
0380     struct queue_limits *lim = &bdev_get_queue(bio->bi_bdev)->limits;
0381     unsigned int nr_segs;
0382
0383     if (bio_may_exceed_limits(bio, lim))
0384         return __bio_split_to_limits(bio, lim, &nr_segs);
0385     return bio;
0386 }
0387 EXPORT_SYMBOL(bio_split_to_limits);
0388
0389 unsigned int blk_recalc_rq_segments(struct request *rq)
0390 {
0391     unsigned int nr_phys_segs = 0;
0392     unsigned int bytes = 0;
0393     struct req_iterator iter;
0394     struct bio_vec bv;
0395
0396     if (!rq->bio)
0397         return 0;
0398
0399     switch (bio_op(rq->bio)) {
0400     case REQ_OP_DISCARD:
0401     case REQ_OP_SECURE_ERASE:
0402         if (queue_max_discard_segments(rq->q) > 1) {
0403             struct bio *bio = rq->bio;
0404
0405             for_each_bio(bio)
0406                 nr_phys_segs++;
0407             return nr_phys_segs;
0408         }
0409         return 1;
0410     case REQ_OP_WRITE_ZEROES:
0411         return 0;
0412     default:
0413         break;
0414     }
0415
0416     rq_for_each_bvec(bv, rq, iter)
0417         bvec_split_segs(&rq->q->limits, &bv, &nr_phys_segs, &bytes,
0418                 UINT_MAX, UINT_MAX);
0419     return nr_phys_segs;
0420 }
0421
0422 static inline struct scatterlist *blk_next_sg(struct scatterlist **sg,
0423         struct scatterlist *sglist)
0424 {
0425     if (!*sg)
0426         return sglist;
0427
0428     /*
0429      * If the driver previously mapped a shorter list, we could see a
0430      * termination bit prematurely unless it fully inits the sg table
0431      * on each mapping. We KNOW that there must be more entries here
0432      * or the driver would be buggy, so force clear the termination bit
0433      * to avoid doing a full sg_init_table() in drivers for each command.
0434      */
0435     sg_unmark_end(*sg);
0436     return sg_next(*sg);
0437 }
0438
0439 static unsigned blk_bvec_map_sg(struct request_queue *q,
0440         struct bio_vec *bvec, struct scatterlist *sglist,
0441         struct scatterlist **sg)
0442 {
0443     unsigned nbytes = bvec->bv_len;
0444     unsigned nsegs = 0, total = 0;
0445
0446     while (nbytes > 0) {
0447         unsigned offset = bvec->bv_offset + total;
0448         unsigned len = min(get_max_segment_size(&q->limits,
0449                    bvec->bv_page, offset), nbytes);
0450         struct page *page = bvec->bv_page;
0451
0452         /*
0453          * Unfortunately a fair number of drivers barf on scatterlists
0454          * that have an offset larger than PAGE_SIZE, despite other
0455          * subsystems dealing with that invariant just fine.  For now
0456          * stick to the legacy format where we never present those from
0457          * the block layer, but the code below should be removed once
0458          * these offenders (mostly MMC/SD drivers) are fixed.
0459          */
0460         page += (offset >> PAGE_SHIFT);
0461         offset &= ~PAGE_MASK;
0462
0463         *sg = blk_next_sg(sg, sglist);
0464         sg_set_page(*sg, page, len, offset);
0465
0466         total += len;
0467         nbytes -= len;
0468         nsegs++;
0469     }
0470
0471     return nsegs;
0472 }
0473
0474 static inline int __blk_bvec_map_sg(struct bio_vec bv,
0475         struct scatterlist *sglist, struct scatterlist **sg)
0476 {
0477     *sg = blk_next_sg(sg, sglist);
0478     sg_set_page(*sg, bv.bv_page, bv.bv_len, bv.bv_offset);
0479     return 1;
0480 }
0481
0482 /* only try to merge bvecs into one sg if they are from two bios */
0483 static inline bool
0484 __blk_segment_map_sg_merge(struct request_queue *q, struct bio_vec *bvec,
0485                struct bio_vec *bvprv, struct scatterlist **sg)
0486 {
0487
0488     int nbytes = bvec->bv_len;
0489
0490     if (!*sg)
0491         return false;
0492
0493     if ((*sg)->length + nbytes > queue_max_segment_size(q))
0494         return false;
0495
0496     if (!biovec_phys_mergeable(q, bvprv, bvec))
0497         return false;
0498
0499     (*sg)->length += nbytes;
0500
0501     return true;
0502 }
0503
0504 static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio,
0505                  struct scatterlist *sglist,
0506                  struct scatterlist **sg)
0507 {
0508     struct bio_vec bvec, bvprv = { NULL };
0509     struct bvec_iter iter;
0510     int nsegs = 0;
0511     bool new_bio = false;
0512
0513     for_each_bio(bio) {
0514         bio_for_each_bvec(bvec, bio, iter) {
0515             /*
0516              * Only try to merge bvecs from two bios given we
0517              * have done bio internal merge when adding pages
0518              * to bio
0519              */
0520             if (new_bio &&
0521                 __blk_segment_map_sg_merge(q, &bvec, &bvprv, sg))
0522                 goto next_bvec;
0523
0524             if (bvec.bv_offset + bvec.bv_len <= PAGE_SIZE)
0525                 nsegs += __blk_bvec_map_sg(bvec, sglist, sg);
0526             else
0527                 nsegs += blk_bvec_map_sg(q, &bvec, sglist, sg);
0528  next_bvec:
0529             new_bio = false;
0530         }
0531         if (likely(bio->bi_iter.bi_size)) {
0532             bvprv = bvec;
0533             new_bio = true;
0534         }
0535     }
0536
0537     return nsegs;
0538 }
0539
0540 /*
0541  * map a request to scatterlist, return number of sg entries setup. Caller
0542  * must make sure sg can hold rq->nr_phys_segments entries
0543  */
0544 int __blk_rq_map_sg(struct request_queue *q, struct request *rq,
0545         struct scatterlist *sglist, struct scatterlist **last_sg)
0546 {
0547     int nsegs = 0;
0548
0549     if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
0550         nsegs = __blk_bvec_map_sg(rq->special_vec, sglist, last_sg);
0551     else if (rq->bio)
0552         nsegs = __blk_bios_map_sg(q, rq->bio, sglist, last_sg);
0553
0554     if (*last_sg)
0555         sg_mark_end(*last_sg);
0556
0557     /*
0558      * Something must have been wrong if the figured number of
0559      * segment is bigger than number of req's physical segments
0560      */
0561     WARN_ON(nsegs > blk_rq_nr_phys_segments(rq));
0562
0563     return nsegs;
0564 }
0565 EXPORT_SYMBOL(__blk_rq_map_sg);
0566
0567 static inline unsigned int blk_rq_get_max_segments(struct request *rq)
0568 {
0569     if (req_op(rq) == REQ_OP_DISCARD)
0570         return queue_max_discard_segments(rq->q);
0571     return queue_max_segments(rq->q);
0572 }
0573
0574 static inline unsigned int blk_rq_get_max_sectors(struct request *rq,
0575                           sector_t offset)
0576 {
0577     struct request_queue *q = rq->q;
0578     unsigned int max_sectors;
0579
0580     if (blk_rq_is_passthrough(rq))
0581         return q->limits.max_hw_sectors;
0582
0583     max_sectors = blk_queue_get_max_sectors(q, req_op(rq));
0584     if (!q->limits.chunk_sectors ||
0585         req_op(rq) == REQ_OP_DISCARD ||
0586         req_op(rq) == REQ_OP_SECURE_ERASE)
0587         return max_sectors;
0588     return min(max_sectors,
0589            blk_chunk_sectors_left(offset, q->limits.chunk_sectors));
0590 }
0591
0592 static inline int ll_new_hw_segment(struct request *req, struct bio *bio,
0593         unsigned int nr_phys_segs)
0594 {
0595     if (!blk_cgroup_mergeable(req, bio))
0596         goto no_merge;
0597
0598     if (blk_integrity_merge_bio(req->q, req, bio) == false)
0599         goto no_merge;
0600
0601     /* discard request merge won't add new segment */
0602     if (req_op(req) == REQ_OP_DISCARD)
0603         return 1;
0604
0605     if (req->nr_phys_segments + nr_phys_segs > blk_rq_get_max_segments(req))
0606         goto no_merge;
0607
0608     /*
0609      * This will form the start of a new hw segment.  Bump both
0610      * counters.
0611      */
0612     req->nr_phys_segments += nr_phys_segs;
0613     return 1;
0614
0615 no_merge:
0616     req_set_nomerge(req->q, req);
0617     return 0;
0618 }
0619
0620 int ll_back_merge_fn(struct request *req, struct bio *bio, unsigned int nr_segs)
0621 {
0622     if (req_gap_back_merge(req, bio))
0623         return 0;
0624     if (blk_integrity_rq(req) &&
0625         integrity_req_gap_back_merge(req, bio))
0626         return 0;
0627     if (!bio_crypt_ctx_back_mergeable(req, bio))
0628         return 0;
0629     if (blk_rq_sectors(req) + bio_sectors(bio) >
0630         blk_rq_get_max_sectors(req, blk_rq_pos(req))) {
0631         req_set_nomerge(req->q, req);
0632         return 0;
0633     }
0634
0635     return ll_new_hw_segment(req, bio, nr_segs);
0636 }
0637
0638 static int ll_front_merge_fn(struct request *req, struct bio *bio,
0639         unsigned int nr_segs)
0640 {
0641     if (req_gap_front_merge(req, bio))
0642         return 0;
0643     if (blk_integrity_rq(req) &&
0644         integrity_req_gap_front_merge(req, bio))
0645         return 0;
0646     if (!bio_crypt_ctx_front_mergeable(req, bio))
0647         return 0;
0648     if (blk_rq_sectors(req) + bio_sectors(bio) >
0649         blk_rq_get_max_sectors(req, bio->bi_iter.bi_sector)) {
0650         req_set_nomerge(req->q, req);
0651         return 0;
0652     }
0653
0654     return ll_new_hw_segment(req, bio, nr_segs);
0655 }
0656
0657 static bool req_attempt_discard_merge(struct request_queue *q, struct request *req,
0658         struct request *next)
0659 {
0660     unsigned short segments = blk_rq_nr_discard_segments(req);
0661
0662     if (segments >= queue_max_discard_segments(q))
0663         goto no_merge;
0664     if (blk_rq_sectors(req) + bio_sectors(next->bio) >
0665         blk_rq_get_max_sectors(req, blk_rq_pos(req)))
0666         goto no_merge;
0667
0668     req->nr_phys_segments = segments + blk_rq_nr_discard_segments(next);
0669     return true;
0670 no_merge:
0671     req_set_nomerge(q, req);
0672     return false;
0673 }
0674
0675 static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
0676                 struct request *next)
0677 {
0678     int total_phys_segments;
0679
0680     if (req_gap_back_merge(req, next->bio))
0681         return 0;
0682
0683     /*
0684      * Will it become too large?
0685      */
0686     if ((blk_rq_sectors(req) + blk_rq_sectors(next)) >
0687         blk_rq_get_max_sectors(req, blk_rq_pos(req)))
0688         return 0;
0689
0690     total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
0691     if (total_phys_segments > blk_rq_get_max_segments(req))
0692         return 0;
0693
0694     if (!blk_cgroup_mergeable(req, next->bio))
0695         return 0;
0696
0697     if (blk_integrity_merge_rq(q, req, next) == false)
0698         return 0;
0699
0700     if (!bio_crypt_ctx_merge_rq(req, next))
0701         return 0;
0702
0703     /* Merge is OK... */
0704     req->nr_phys_segments = total_phys_segments;
0705     return 1;
0706 }
0707
0708 /**
0709  * blk_rq_set_mixed_merge - mark a request as mixed merge
0710  * @rq: request to mark as mixed merge
0711  *
0712  * Description:
0713  *     @rq is about to be mixed merged.  Make sure the attributes
0714  *     which can be mixed are set in each bio and mark @rq as mixed
0715  *     merged.
0716  */
0717 void blk_rq_set_mixed_merge(struct request *rq)
0718 {
0719     blk_opf_t ff = rq->cmd_flags & REQ_FAILFAST_MASK;
0720     struct bio *bio;
0721
0722     if (rq->rq_flags & RQF_MIXED_MERGE)
0723         return;
0724
0725     /*
0726      * @rq will no longer represent mixable attributes for all the
0727      * contained bios.  It will just track those of the first one.
0728      * Distributes the attributs to each bio.
0729      */
0730     for (bio = rq->bio; bio; bio = bio->bi_next) {
0731         WARN_ON_ONCE((bio->bi_opf & REQ_FAILFAST_MASK) &&
0732                  (bio->bi_opf & REQ_FAILFAST_MASK) != ff);
0733         bio->bi_opf |= ff;
0734     }
0735     rq->rq_flags |= RQF_MIXED_MERGE;
0736 }
0737
0738 static void blk_account_io_merge_request(struct request *req)
0739 {
0740     if (blk_do_io_stat(req)) {
0741         part_stat_lock();
0742         part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
0743         part_stat_unlock();
0744     }
0745 }
0746
0747 static enum elv_merge blk_try_req_merge(struct request *req,
0748                     struct request *next)
0749 {
0750     if (blk_discard_mergable(req))
0751         return ELEVATOR_DISCARD_MERGE;
0752     else if (blk_rq_pos(req) + blk_rq_sectors(req) == blk_rq_pos(next))
0753         return ELEVATOR_BACK_MERGE;
0754
0755     return ELEVATOR_NO_MERGE;
0756 }
0757
0758 /*
0759  * For non-mq, this has to be called with the request spinlock acquired.
0760  * For mq with scheduling, the appropriate queue wide lock should be held.
0761  */
0762 static struct request *attempt_merge(struct request_queue *q,
0763                      struct request *req, struct request *next)
0764 {
0765     if (!rq_mergeable(req) || !rq_mergeable(next))
0766         return NULL;
0767
0768     if (req_op(req) != req_op(next))
0769         return NULL;
0770
0771     if (rq_data_dir(req) != rq_data_dir(next))
0772         return NULL;
0773
0774     if (req->ioprio != next->ioprio)
0775         return NULL;
0776
0777     /*
0778      * If we are allowed to merge, then append bio list
0779      * from next to rq and release next. merge_requests_fn
0780      * will have updated segment counts, update sector
0781      * counts here. Handle DISCARDs separately, as they
0782      * have separate settings.
0783      */
0784
0785     switch (blk_try_req_merge(req, next)) {
0786     case ELEVATOR_DISCARD_MERGE:
0787         if (!req_attempt_discard_merge(q, req, next))
0788             return NULL;
0789         break;
0790     case ELEVATOR_BACK_MERGE:
0791         if (!ll_merge_requests_fn(q, req, next))
0792             return NULL;
0793         break;
0794     default:
0795         return NULL;
0796     }
0797
0798     /*
0799      * If failfast settings disagree or any of the two is already
0800      * a mixed merge, mark both as mixed before proceeding.  This
0801      * makes sure that all involved bios have mixable attributes
0802      * set properly.
0803      */
0804     if (((req->rq_flags | next->rq_flags) & RQF_MIXED_MERGE) ||
0805         (req->cmd_flags & REQ_FAILFAST_MASK) !=
0806         (next->cmd_flags & REQ_FAILFAST_MASK)) {
0807         blk_rq_set_mixed_merge(req);
0808         blk_rq_set_mixed_merge(next);
0809     }
0810
0811     /*
0812      * At this point we have either done a back merge or front merge. We
0813      * need the smaller start_time_ns of the merged requests to be the
0814      * current request for accounting purposes.
0815      */
0816     if (next->start_time_ns < req->start_time_ns)
0817         req->start_time_ns = next->start_time_ns;
0818
0819     req->biotail->bi_next = next->bio;
0820     req->biotail = next->biotail;
0821
0822     req->__data_len += blk_rq_bytes(next);
0823
0824     if (!blk_discard_mergable(req))
0825         elv_merge_requests(q, req, next);
0826
0827     /*
0828      * 'next' is going away, so update stats accordingly
0829      */
0830     blk_account_io_merge_request(next);
0831
0832     trace_block_rq_merge(next);
0833
0834     /*
0835      * ownership of bio passed from next to req, return 'next' for
0836      * the caller to free
0837      */
0838     next->bio = NULL;
0839     return next;
0840 }
0841
0842 static struct request *attempt_back_merge(struct request_queue *q,
0843         struct request *rq)
0844 {
0845     struct request *next = elv_latter_request(q, rq);
0846
0847     if (next)
0848         return attempt_merge(q, rq, next);
0849
0850     return NULL;
0851 }
0852
0853 static struct request *attempt_front_merge(struct request_queue *q,
0854         struct request *rq)
0855 {
0856     struct request *prev = elv_former_request(q, rq);
0857
0858     if (prev)
0859         return attempt_merge(q, prev, rq);
0860
0861     return NULL;
0862 }
0863
0864 /*
0865  * Try to merge 'next' into 'rq'. Return true if the merge happened, false
0866  * otherwise. The caller is responsible for freeing 'next' if the merge
0867  * happened.
0868  */
0869 bool blk_attempt_req_merge(struct request_queue *q, struct request *rq,
0870                struct request *next)
0871 {
0872     return attempt_merge(q, rq, next);
0873 }
0874
0875 bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
0876 {
0877     if (!rq_mergeable(rq) || !bio_mergeable(bio))
0878         return false;
0879
0880     if (req_op(rq) != bio_op(bio))
0881         return false;
0882
0883     /* different data direction or already started, don't merge */
0884     if (bio_data_dir(bio) != rq_data_dir(rq))
0885         return false;
0886
0887     /* don't merge across cgroup boundaries */
0888     if (!blk_cgroup_mergeable(rq, bio))
0889         return false;
0890
0891     /* only merge integrity protected bio into ditto rq */
0892     if (blk_integrity_merge_bio(rq->q, rq, bio) == false)
0893         return false;
0894
0895     /* Only merge if the crypt contexts are compatible */
0896     if (!bio_crypt_rq_ctx_compatible(rq, bio))
0897         return false;
0898
0899     if (rq->ioprio != bio_prio(bio))
0900         return false;
0901
0902     return true;
0903 }
0904
0905 enum elv_merge blk_try_merge(struct request *rq, struct bio *bio)
0906 {
0907     if (blk_discard_mergable(rq))
0908         return ELEVATOR_DISCARD_MERGE;
0909     else if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector)
0910         return ELEVATOR_BACK_MERGE;
0911     else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_iter.bi_sector)
0912         return ELEVATOR_FRONT_MERGE;
0913     return ELEVATOR_NO_MERGE;
0914 }
0915
0916 static void blk_account_io_merge_bio(struct request *req)
0917 {
0918     if (!blk_do_io_stat(req))
0919         return;
0920
0921     part_stat_lock();
0922     part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
0923     part_stat_unlock();
0924 }
0925
0926 enum bio_merge_status {
0927     BIO_MERGE_OK,
0928     BIO_MERGE_NONE,
0929     BIO_MERGE_FAILED,
0930 };
0931
0932 static enum bio_merge_status bio_attempt_back_merge(struct request *req,
0933         struct bio *bio, unsigned int nr_segs)
0934 {
0935     const blk_opf_t ff = bio->bi_opf & REQ_FAILFAST_MASK;
0936
0937     if (!ll_back_merge_fn(req, bio, nr_segs))
0938         return BIO_MERGE_FAILED;
0939
0940     trace_block_bio_backmerge(bio);
0941     rq_qos_merge(req->q, req, bio);
0942
0943     if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
0944         blk_rq_set_mixed_merge(req);
0945
0946     req->biotail->bi_next = bio;
0947     req->biotail = bio;
0948     req->__data_len += bio->bi_iter.bi_size;
0949
0950     bio_crypt_free_ctx(bio);
0951
0952     blk_account_io_merge_bio(req);
0953     return BIO_MERGE_OK;
0954 }
0955
0956 static enum bio_merge_status bio_attempt_front_merge(struct request *req,
0957         struct bio *bio, unsigned int nr_segs)
0958 {
0959     const blk_opf_t ff = bio->bi_opf & REQ_FAILFAST_MASK;
0960
0961     if (!ll_front_merge_fn(req, bio, nr_segs))
0962         return BIO_MERGE_FAILED;
0963
0964     trace_block_bio_frontmerge(bio);
0965     rq_qos_merge(req->q, req, bio);
0966
0967     if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
0968         blk_rq_set_mixed_merge(req);
0969
0970     bio->bi_next = req->bio;
0971     req->bio = bio;
0972
0973     req->__sector = bio->bi_iter.bi_sector;
0974     req->__data_len += bio->bi_iter.bi_size;
0975
0976     bio_crypt_do_front_merge(req, bio);
0977
0978     blk_account_io_merge_bio(req);
0979     return BIO_MERGE_OK;
0980 }
0981
0982 static enum bio_merge_status bio_attempt_discard_merge(struct request_queue *q,
0983         struct request *req, struct bio *bio)
0984 {
0985     unsigned short segments = blk_rq_nr_discard_segments(req);
0986
0987     if (segments >= queue_max_discard_segments(q))
0988         goto no_merge;
0989     if (blk_rq_sectors(req) + bio_sectors(bio) >
0990         blk_rq_get_max_sectors(req, blk_rq_pos(req)))
0991         goto no_merge;
0992
0993     rq_qos_merge(q, req, bio);
0994
0995     req->biotail->bi_next = bio;
0996     req->biotail = bio;
0997     req->__data_len += bio->bi_iter.bi_size;
0998     req->nr_phys_segments = segments + 1;
0999
1000     blk_account_io_merge_bio(req);
1001     return BIO_MERGE_OK;
1002 no_merge:
1003     req_set_nomerge(q, req);
1004     return BIO_MERGE_FAILED;
1005 }
1006
1007 static enum bio_merge_status blk_attempt_bio_merge(struct request_queue *q,
1008                            struct request *rq,
1009                            struct bio *bio,
1010                            unsigned int nr_segs,
1011                            bool sched_allow_merge)
1012 {
1013     if (!blk_rq_merge_ok(rq, bio))
1014         return BIO_MERGE_NONE;
1015
1016     switch (blk_try_merge(rq, bio)) {
1017     case ELEVATOR_BACK_MERGE:
1018         if (!sched_allow_merge || blk_mq_sched_allow_merge(q, rq, bio))
1019             return bio_attempt_back_merge(rq, bio, nr_segs);
1020         break;
1021     case ELEVATOR_FRONT_MERGE:
1022         if (!sched_allow_merge || blk_mq_sched_allow_merge(q, rq, bio))
1023             return bio_attempt_front_merge(rq, bio, nr_segs);
1024         break;
1025     case ELEVATOR_DISCARD_MERGE:
1026         return bio_attempt_discard_merge(q, rq, bio);
1027     default:
1028         return BIO_MERGE_NONE;
1029     }
1030
1031     return BIO_MERGE_FAILED;
1032 }
1033
1034 /**
1035  * blk_attempt_plug_merge - try to merge with %current's plugged list
1036  * @q: request_queue new bio is being queued at
1037  * @bio: new bio being queued
1038  * @nr_segs: number of segments in @bio
1039  * from the passed in @q already in the plug list
1040  *
1041  * Determine whether @bio being queued on @q can be merged with the previous
1042  * request on %current's plugged list.  Returns %true if merge was successful,
1043  * otherwise %false.
1044  *
1045  * Plugging coalesces IOs from the same issuer for the same purpose without
1046  * going through @q->queue_lock.  As such it's more of an issuing mechanism
1047  * than scheduling, and the request, while may have elvpriv data, is not
1048  * added on the elevator at this point.  In addition, we don't have
1049  * reliable access to the elevator outside queue lock.  Only check basic
1050  * merging parameters without querying the elevator.
1051  *
1052  * Caller must ensure !blk_queue_nomerges(q) beforehand.
1053  */
1054 bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
1055         unsigned int nr_segs)
1056 {
1057     struct blk_plug *plug;
1058     struct request *rq;
1059
1060     plug = blk_mq_plug(bio);
1061     if (!plug || rq_list_empty(plug->mq_list))
1062         return false;
1063
1064     rq_list_for_each(&plug->mq_list, rq) {
1065         if (rq->q == q) {
1066             if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) ==
1067                 BIO_MERGE_OK)
1068                 return true;
1069             break;
1070         }
1071
1072         /*
1073          * Only keep iterating plug list for merges if we have multiple
1074          * queues
1075          */
1076         if (!plug->multiple_queues)
1077             break;
1078     }
1079     return false;
1080 }
1081
1082 /*
1083  * Iterate list of requests and see if we can merge this bio with any
1084  * of them.
1085  */
1086 bool blk_bio_list_merge(struct request_queue *q, struct list_head *list,
1087             struct bio *bio, unsigned int nr_segs)
1088 {
1089     struct request *rq;
1090     int checked = 8;
1091
1092     list_for_each_entry_reverse(rq, list, queuelist) {
1093         if (!checked--)
1094             break;
1095
1096         switch (blk_attempt_bio_merge(q, rq, bio, nr_segs, true)) {
1097         case BIO_MERGE_NONE:
1098             continue;
1099         case BIO_MERGE_OK:
1100             return true;
1101         case BIO_MERGE_FAILED:
1102             return false;
1103         }
1104
1105     }
1106
1107     return false;
1108 }
1109 EXPORT_SYMBOL_GPL(blk_bio_list_merge);
1110
1111 bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
1112         unsigned int nr_segs, struct request **merged_request)
1113 {
1114     struct request *rq;
1115
1116     switch (elv_merge(q, &rq, bio)) {
1117     case ELEVATOR_BACK_MERGE:
1118         if (!blk_mq_sched_allow_merge(q, rq, bio))
1119             return false;
1120         if (bio_attempt_back_merge(rq, bio, nr_segs) != BIO_MERGE_OK)
1121             return false;
1122         *merged_request = attempt_back_merge(q, rq);
1123         if (!*merged_request)
1124             elv_merged_request(q, rq, ELEVATOR_BACK_MERGE);
1125         return true;
1126     case ELEVATOR_FRONT_MERGE:
1127         if (!blk_mq_sched_allow_merge(q, rq, bio))
1128             return false;
1129         if (bio_attempt_front_merge(rq, bio, nr_segs) != BIO_MERGE_OK)
1130             return false;
1131         *merged_request = attempt_front_merge(q, rq);
1132         if (!*merged_request)
1133             elv_merged_request(q, rq, ELEVATOR_FRONT_MERGE);
1134         return true;
1135     case ELEVATOR_DISCARD_MERGE:
1136         return bio_attempt_discard_merge(q, rq, bio) == BIO_MERGE_OK;
1137     default:
1138         return false;
1139     }
1140 }
1141 EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge);