Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * SCSI Zoned Block commands
0004  *
0005  * Copyright (C) 2014-2015 SUSE Linux GmbH
0006  * Written by: Hannes Reinecke <hare@suse.de>
0007  * Modified by: Damien Le Moal <damien.lemoal@hgst.com>
0008  * Modified by: Shaun Tancheff <shaun.tancheff@seagate.com>
0009  */
0010 
0011 #include <linux/blkdev.h>
0012 #include <linux/vmalloc.h>
0013 #include <linux/sched/mm.h>
0014 #include <linux/mutex.h>
0015 
0016 #include <asm/unaligned.h>
0017 
0018 #include <scsi/scsi.h>
0019 #include <scsi/scsi_cmnd.h>
0020 
0021 #include "sd.h"
0022 
0023 /**
0024  * sd_zbc_get_zone_wp_offset - Get zone write pointer offset.
0025  * @zone: Zone for which to return the write pointer offset.
0026  *
0027  * Return: offset of the write pointer from the start of the zone.
0028  */
0029 static unsigned int sd_zbc_get_zone_wp_offset(struct blk_zone *zone)
0030 {
0031     if (zone->type == ZBC_ZONE_TYPE_CONV)
0032         return 0;
0033 
0034     switch (zone->cond) {
0035     case BLK_ZONE_COND_IMP_OPEN:
0036     case BLK_ZONE_COND_EXP_OPEN:
0037     case BLK_ZONE_COND_CLOSED:
0038         return zone->wp - zone->start;
0039     case BLK_ZONE_COND_FULL:
0040         return zone->len;
0041     case BLK_ZONE_COND_EMPTY:
0042     case BLK_ZONE_COND_OFFLINE:
0043     case BLK_ZONE_COND_READONLY:
0044     default:
0045         /*
0046          * Offline and read-only zones do not have a valid
0047          * write pointer. Use 0 as for an empty zone.
0048          */
0049         return 0;
0050     }
0051 }
0052 
0053 /* Whether or not a SCSI zone descriptor describes a gap zone. */
0054 static bool sd_zbc_is_gap_zone(const u8 buf[64])
0055 {
0056     return (buf[0] & 0xf) == ZBC_ZONE_TYPE_GAP;
0057 }
0058 
0059 /**
0060  * sd_zbc_parse_report - Parse a SCSI zone descriptor
0061  * @sdkp: SCSI disk pointer.
0062  * @buf: SCSI zone descriptor.
0063  * @idx: Index of the zone relative to the first zone reported by the current
0064  *  sd_zbc_report_zones() call.
0065  * @cb: Callback function pointer.
0066  * @data: Second argument passed to @cb.
0067  *
0068  * Return: Value returned by @cb.
0069  *
0070  * Convert a SCSI zone descriptor into struct blk_zone format. Additionally,
0071  * call @cb(blk_zone, @data).
0072  */
0073 static int sd_zbc_parse_report(struct scsi_disk *sdkp, const u8 buf[64],
0074                    unsigned int idx, report_zones_cb cb, void *data)
0075 {
0076     struct scsi_device *sdp = sdkp->device;
0077     struct blk_zone zone = { 0 };
0078     sector_t start_lba, gran;
0079     int ret;
0080 
0081     if (WARN_ON_ONCE(sd_zbc_is_gap_zone(buf)))
0082         return -EINVAL;
0083 
0084     zone.type = buf[0] & 0x0f;
0085     zone.cond = (buf[1] >> 4) & 0xf;
0086     if (buf[1] & 0x01)
0087         zone.reset = 1;
0088     if (buf[1] & 0x02)
0089         zone.non_seq = 1;
0090 
0091     start_lba = get_unaligned_be64(&buf[16]);
0092     zone.start = logical_to_sectors(sdp, start_lba);
0093     zone.capacity = logical_to_sectors(sdp, get_unaligned_be64(&buf[8]));
0094     zone.len = zone.capacity;
0095     if (sdkp->zone_starting_lba_gran) {
0096         gran = logical_to_sectors(sdp, sdkp->zone_starting_lba_gran);
0097         if (zone.len > gran) {
0098             sd_printk(KERN_ERR, sdkp,
0099                   "Invalid zone at LBA %llu with capacity %llu and length %llu; granularity = %llu\n",
0100                   start_lba,
0101                   sectors_to_logical(sdp, zone.capacity),
0102                   sectors_to_logical(sdp, zone.len),
0103                   sectors_to_logical(sdp, gran));
0104             return -EINVAL;
0105         }
0106         /*
0107          * Use the starting LBA granularity instead of the zone length
0108          * obtained from the REPORT ZONES command.
0109          */
0110         zone.len = gran;
0111     }
0112     if (zone.cond == ZBC_ZONE_COND_FULL)
0113         zone.wp = zone.start + zone.len;
0114     else
0115         zone.wp = logical_to_sectors(sdp, get_unaligned_be64(&buf[24]));
0116 
0117     ret = cb(&zone, idx, data);
0118     if (ret)
0119         return ret;
0120 
0121     if (sdkp->rev_wp_offset)
0122         sdkp->rev_wp_offset[idx] = sd_zbc_get_zone_wp_offset(&zone);
0123 
0124     return 0;
0125 }
0126 
0127 /**
0128  * sd_zbc_do_report_zones - Issue a REPORT ZONES scsi command.
0129  * @sdkp: The target disk
0130  * @buf: vmalloc-ed buffer to use for the reply
0131  * @buflen: the buffer size
0132  * @lba: Start LBA of the report
0133  * @partial: Do partial report
0134  *
0135  * For internal use during device validation.
0136  * Using partial=true can significantly speed up execution of a report zones
0137  * command because the disk does not have to count all possible report matching
0138  * zones and will only report the count of zones fitting in the command reply
0139  * buffer.
0140  */
0141 static int sd_zbc_do_report_zones(struct scsi_disk *sdkp, unsigned char *buf,
0142                   unsigned int buflen, sector_t lba,
0143                   bool partial)
0144 {
0145     struct scsi_device *sdp = sdkp->device;
0146     const int timeout = sdp->request_queue->rq_timeout;
0147     struct scsi_sense_hdr sshdr;
0148     unsigned char cmd[16];
0149     unsigned int rep_len;
0150     int result;
0151 
0152     memset(cmd, 0, 16);
0153     cmd[0] = ZBC_IN;
0154     cmd[1] = ZI_REPORT_ZONES;
0155     put_unaligned_be64(lba, &cmd[2]);
0156     put_unaligned_be32(buflen, &cmd[10]);
0157     if (partial)
0158         cmd[14] = ZBC_REPORT_ZONE_PARTIAL;
0159 
0160     result = scsi_execute_req(sdp, cmd, DMA_FROM_DEVICE,
0161                   buf, buflen, &sshdr,
0162                   timeout, SD_MAX_RETRIES, NULL);
0163     if (result) {
0164         sd_printk(KERN_ERR, sdkp,
0165               "REPORT ZONES start lba %llu failed\n", lba);
0166         sd_print_result(sdkp, "REPORT ZONES", result);
0167         if (result > 0 && scsi_sense_valid(&sshdr))
0168             sd_print_sense_hdr(sdkp, &sshdr);
0169         return -EIO;
0170     }
0171 
0172     rep_len = get_unaligned_be32(&buf[0]);
0173     if (rep_len < 64) {
0174         sd_printk(KERN_ERR, sdkp,
0175               "REPORT ZONES report invalid length %u\n",
0176               rep_len);
0177         return -EIO;
0178     }
0179 
0180     return 0;
0181 }
0182 
0183 /**
0184  * sd_zbc_alloc_report_buffer() - Allocate a buffer for report zones reply.
0185  * @sdkp: The target disk
0186  * @nr_zones: Maximum number of zones to report
0187  * @buflen: Size of the buffer allocated
0188  *
0189  * Try to allocate a reply buffer for the number of requested zones.
0190  * The size of the buffer allocated may be smaller than requested to
0191  * satify the device constraint (max_hw_sectors, max_segments, etc).
0192  *
0193  * Return the address of the allocated buffer and update @buflen with
0194  * the size of the allocated buffer.
0195  */
0196 static void *sd_zbc_alloc_report_buffer(struct scsi_disk *sdkp,
0197                     unsigned int nr_zones, size_t *buflen)
0198 {
0199     struct request_queue *q = sdkp->disk->queue;
0200     size_t bufsize;
0201     void *buf;
0202 
0203     /*
0204      * Report zone buffer size should be at most 64B times the number of
0205      * zones requested plus the 64B reply header, but should be aligned
0206      * to SECTOR_SIZE for ATA devices.
0207      * Make sure that this size does not exceed the hardware capabilities.
0208      * Furthermore, since the report zone command cannot be split, make
0209      * sure that the allocated buffer can always be mapped by limiting the
0210      * number of pages allocated to the HBA max segments limit.
0211      */
0212     nr_zones = min(nr_zones, sdkp->zone_info.nr_zones);
0213     bufsize = roundup((nr_zones + 1) * 64, SECTOR_SIZE);
0214     bufsize = min_t(size_t, bufsize,
0215             queue_max_hw_sectors(q) << SECTOR_SHIFT);
0216     bufsize = min_t(size_t, bufsize, queue_max_segments(q) << PAGE_SHIFT);
0217 
0218     while (bufsize >= SECTOR_SIZE) {
0219         buf = __vmalloc(bufsize,
0220                 GFP_KERNEL | __GFP_ZERO | __GFP_NORETRY);
0221         if (buf) {
0222             *buflen = bufsize;
0223             return buf;
0224         }
0225         bufsize = rounddown(bufsize >> 1, SECTOR_SIZE);
0226     }
0227 
0228     return NULL;
0229 }
0230 
0231 /**
0232  * sd_zbc_zone_sectors - Get the device zone size in number of 512B sectors.
0233  * @sdkp: The target disk
0234  */
0235 static inline sector_t sd_zbc_zone_sectors(struct scsi_disk *sdkp)
0236 {
0237     return logical_to_sectors(sdkp->device, sdkp->zone_info.zone_blocks);
0238 }
0239 
0240 /**
0241  * sd_zbc_report_zones - SCSI .report_zones() callback.
0242  * @disk: Disk to report zones for.
0243  * @sector: Start sector.
0244  * @nr_zones: Maximum number of zones to report.
0245  * @cb: Callback function called to report zone information.
0246  * @data: Second argument passed to @cb.
0247  *
0248  * Called by the block layer to iterate over zone information. See also the
0249  * disk->fops->report_zones() calls in block/blk-zoned.c.
0250  */
0251 int sd_zbc_report_zones(struct gendisk *disk, sector_t sector,
0252             unsigned int nr_zones, report_zones_cb cb, void *data)
0253 {
0254     struct scsi_disk *sdkp = scsi_disk(disk);
0255     sector_t lba = sectors_to_logical(sdkp->device, sector);
0256     unsigned int nr, i;
0257     unsigned char *buf;
0258     u64 zone_length, start_lba;
0259     size_t offset, buflen = 0;
0260     int zone_idx = 0;
0261     int ret;
0262 
0263     if (!sd_is_zoned(sdkp))
0264         /* Not a zoned device */
0265         return -EOPNOTSUPP;
0266 
0267     if (!sdkp->capacity)
0268         /* Device gone or invalid */
0269         return -ENODEV;
0270 
0271     buf = sd_zbc_alloc_report_buffer(sdkp, nr_zones, &buflen);
0272     if (!buf)
0273         return -ENOMEM;
0274 
0275     while (zone_idx < nr_zones && lba < sdkp->capacity) {
0276         ret = sd_zbc_do_report_zones(sdkp, buf, buflen, lba, true);
0277         if (ret)
0278             goto out;
0279 
0280         offset = 0;
0281         nr = min(nr_zones, get_unaligned_be32(&buf[0]) / 64);
0282         if (!nr)
0283             break;
0284 
0285         for (i = 0; i < nr && zone_idx < nr_zones; i++) {
0286             offset += 64;
0287             start_lba = get_unaligned_be64(&buf[offset + 16]);
0288             zone_length = get_unaligned_be64(&buf[offset + 8]);
0289             if ((zone_idx == 0 &&
0290                 (lba < start_lba ||
0291                  lba >= start_lba + zone_length)) ||
0292                 (zone_idx > 0 && start_lba != lba) ||
0293                 start_lba + zone_length < start_lba) {
0294                 sd_printk(KERN_ERR, sdkp,
0295                       "Zone %d at LBA %llu is invalid: %llu + %llu\n",
0296                       zone_idx, lba, start_lba, zone_length);
0297                 ret = -EINVAL;
0298                 goto out;
0299             }
0300             lba = start_lba + zone_length;
0301             if (sd_zbc_is_gap_zone(&buf[offset])) {
0302                 if (sdkp->zone_starting_lba_gran)
0303                     continue;
0304                 sd_printk(KERN_ERR, sdkp,
0305                       "Gap zone without constant LBA offsets\n");
0306                 ret = -EINVAL;
0307                 goto out;
0308             }
0309 
0310             ret = sd_zbc_parse_report(sdkp, buf + offset, zone_idx,
0311                           cb, data);
0312             if (ret)
0313                 goto out;
0314 
0315             zone_idx++;
0316         }
0317     }
0318 
0319     ret = zone_idx;
0320 out:
0321     kvfree(buf);
0322     return ret;
0323 }
0324 
0325 static blk_status_t sd_zbc_cmnd_checks(struct scsi_cmnd *cmd)
0326 {
0327     struct request *rq = scsi_cmd_to_rq(cmd);
0328     struct scsi_disk *sdkp = scsi_disk(rq->q->disk);
0329     sector_t sector = blk_rq_pos(rq);
0330 
0331     if (!sd_is_zoned(sdkp))
0332         /* Not a zoned device */
0333         return BLK_STS_IOERR;
0334 
0335     if (sdkp->device->changed)
0336         return BLK_STS_IOERR;
0337 
0338     if (sector & (sd_zbc_zone_sectors(sdkp) - 1))
0339         /* Unaligned request */
0340         return BLK_STS_IOERR;
0341 
0342     return BLK_STS_OK;
0343 }
0344 
0345 #define SD_ZBC_INVALID_WP_OFST  (~0u)
0346 #define SD_ZBC_UPDATING_WP_OFST (SD_ZBC_INVALID_WP_OFST - 1)
0347 
0348 static int sd_zbc_update_wp_offset_cb(struct blk_zone *zone, unsigned int idx,
0349                     void *data)
0350 {
0351     struct scsi_disk *sdkp = data;
0352 
0353     lockdep_assert_held(&sdkp->zones_wp_offset_lock);
0354 
0355     sdkp->zones_wp_offset[idx] = sd_zbc_get_zone_wp_offset(zone);
0356 
0357     return 0;
0358 }
0359 
0360 /*
0361  * An attempt to append a zone triggered an invalid write pointer error.
0362  * Reread the write pointer of the zone(s) in which the append failed.
0363  */
0364 static void sd_zbc_update_wp_offset_workfn(struct work_struct *work)
0365 {
0366     struct scsi_disk *sdkp;
0367     unsigned long flags;
0368     sector_t zno;
0369     int ret;
0370 
0371     sdkp = container_of(work, struct scsi_disk, zone_wp_offset_work);
0372 
0373     spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags);
0374     for (zno = 0; zno < sdkp->zone_info.nr_zones; zno++) {
0375         if (sdkp->zones_wp_offset[zno] != SD_ZBC_UPDATING_WP_OFST)
0376             continue;
0377 
0378         spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags);
0379         ret = sd_zbc_do_report_zones(sdkp, sdkp->zone_wp_update_buf,
0380                          SD_BUF_SIZE,
0381                          zno * sdkp->zone_info.zone_blocks, true);
0382         spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags);
0383         if (!ret)
0384             sd_zbc_parse_report(sdkp, sdkp->zone_wp_update_buf + 64,
0385                         zno, sd_zbc_update_wp_offset_cb,
0386                         sdkp);
0387     }
0388     spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags);
0389 
0390     scsi_device_put(sdkp->device);
0391 }
0392 
0393 /**
0394  * sd_zbc_prepare_zone_append() - Prepare an emulated ZONE_APPEND command.
0395  * @cmd: the command to setup
0396  * @lba: the LBA to patch
0397  * @nr_blocks: the number of LBAs to be written
0398  *
0399  * Called from sd_setup_read_write_cmnd() for REQ_OP_ZONE_APPEND.
0400  * @sd_zbc_prepare_zone_append() handles the necessary zone wrote locking and
0401  * patching of the lba for an emulated ZONE_APPEND command.
0402  *
0403  * In case the cached write pointer offset is %SD_ZBC_INVALID_WP_OFST it will
0404  * schedule a REPORT ZONES command and return BLK_STS_IOERR.
0405  */
0406 blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd, sector_t *lba,
0407                     unsigned int nr_blocks)
0408 {
0409     struct request *rq = scsi_cmd_to_rq(cmd);
0410     struct scsi_disk *sdkp = scsi_disk(rq->q->disk);
0411     unsigned int wp_offset, zno = blk_rq_zone_no(rq);
0412     unsigned long flags;
0413     blk_status_t ret;
0414 
0415     ret = sd_zbc_cmnd_checks(cmd);
0416     if (ret != BLK_STS_OK)
0417         return ret;
0418 
0419     if (!blk_rq_zone_is_seq(rq))
0420         return BLK_STS_IOERR;
0421 
0422     /* Unlock of the write lock will happen in sd_zbc_complete() */
0423     if (!blk_req_zone_write_trylock(rq))
0424         return BLK_STS_ZONE_RESOURCE;
0425 
0426     spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags);
0427     wp_offset = sdkp->zones_wp_offset[zno];
0428     switch (wp_offset) {
0429     case SD_ZBC_INVALID_WP_OFST:
0430         /*
0431          * We are about to schedule work to update a zone write pointer
0432          * offset, which will cause the zone append command to be
0433          * requeued. So make sure that the scsi device does not go away
0434          * while the work is being processed.
0435          */
0436         if (scsi_device_get(sdkp->device)) {
0437             ret = BLK_STS_IOERR;
0438             break;
0439         }
0440         sdkp->zones_wp_offset[zno] = SD_ZBC_UPDATING_WP_OFST;
0441         schedule_work(&sdkp->zone_wp_offset_work);
0442         fallthrough;
0443     case SD_ZBC_UPDATING_WP_OFST:
0444         ret = BLK_STS_DEV_RESOURCE;
0445         break;
0446     default:
0447         wp_offset = sectors_to_logical(sdkp->device, wp_offset);
0448         if (wp_offset + nr_blocks > sdkp->zone_info.zone_blocks) {
0449             ret = BLK_STS_IOERR;
0450             break;
0451         }
0452 
0453         *lba += wp_offset;
0454     }
0455     spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags);
0456     if (ret)
0457         blk_req_zone_write_unlock(rq);
0458     return ret;
0459 }
0460 
0461 /**
0462  * sd_zbc_setup_zone_mgmt_cmnd - Prepare a zone ZBC_OUT command. The operations
0463  *          can be RESET WRITE POINTER, OPEN, CLOSE or FINISH.
0464  * @cmd: the command to setup
0465  * @op: Operation to be performed
0466  * @all: All zones control
0467  *
0468  * Called from sd_init_command() for REQ_OP_ZONE_RESET, REQ_OP_ZONE_RESET_ALL,
0469  * REQ_OP_ZONE_OPEN, REQ_OP_ZONE_CLOSE or REQ_OP_ZONE_FINISH requests.
0470  */
0471 blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd,
0472                      unsigned char op, bool all)
0473 {
0474     struct request *rq = scsi_cmd_to_rq(cmd);
0475     sector_t sector = blk_rq_pos(rq);
0476     struct scsi_disk *sdkp = scsi_disk(rq->q->disk);
0477     sector_t block = sectors_to_logical(sdkp->device, sector);
0478     blk_status_t ret;
0479 
0480     ret = sd_zbc_cmnd_checks(cmd);
0481     if (ret != BLK_STS_OK)
0482         return ret;
0483 
0484     cmd->cmd_len = 16;
0485     memset(cmd->cmnd, 0, cmd->cmd_len);
0486     cmd->cmnd[0] = ZBC_OUT;
0487     cmd->cmnd[1] = op;
0488     if (all)
0489         cmd->cmnd[14] = 0x1;
0490     else
0491         put_unaligned_be64(block, &cmd->cmnd[2]);
0492 
0493     rq->timeout = SD_TIMEOUT;
0494     cmd->sc_data_direction = DMA_NONE;
0495     cmd->transfersize = 0;
0496     cmd->allowed = 0;
0497 
0498     return BLK_STS_OK;
0499 }
0500 
0501 static bool sd_zbc_need_zone_wp_update(struct request *rq)
0502 {
0503     switch (req_op(rq)) {
0504     case REQ_OP_ZONE_APPEND:
0505     case REQ_OP_ZONE_FINISH:
0506     case REQ_OP_ZONE_RESET:
0507     case REQ_OP_ZONE_RESET_ALL:
0508         return true;
0509     case REQ_OP_WRITE:
0510     case REQ_OP_WRITE_ZEROES:
0511         return blk_rq_zone_is_seq(rq);
0512     default:
0513         return false;
0514     }
0515 }
0516 
0517 /**
0518  * sd_zbc_zone_wp_update - Update cached zone write pointer upon cmd completion
0519  * @cmd: Completed command
0520  * @good_bytes: Command reply bytes
0521  *
0522  * Called from sd_zbc_complete() to handle the update of the cached zone write
0523  * pointer value in case an update is needed.
0524  */
0525 static unsigned int sd_zbc_zone_wp_update(struct scsi_cmnd *cmd,
0526                       unsigned int good_bytes)
0527 {
0528     int result = cmd->result;
0529     struct request *rq = scsi_cmd_to_rq(cmd);
0530     struct scsi_disk *sdkp = scsi_disk(rq->q->disk);
0531     unsigned int zno = blk_rq_zone_no(rq);
0532     enum req_op op = req_op(rq);
0533     unsigned long flags;
0534 
0535     /*
0536      * If we got an error for a command that needs updating the write
0537      * pointer offset cache, we must mark the zone wp offset entry as
0538      * invalid to force an update from disk the next time a zone append
0539      * command is issued.
0540      */
0541     spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags);
0542 
0543     if (result && op != REQ_OP_ZONE_RESET_ALL) {
0544         if (op == REQ_OP_ZONE_APPEND) {
0545             /* Force complete completion (no retry) */
0546             good_bytes = 0;
0547             scsi_set_resid(cmd, blk_rq_bytes(rq));
0548         }
0549 
0550         /*
0551          * Force an update of the zone write pointer offset on
0552          * the next zone append access.
0553          */
0554         if (sdkp->zones_wp_offset[zno] != SD_ZBC_UPDATING_WP_OFST)
0555             sdkp->zones_wp_offset[zno] = SD_ZBC_INVALID_WP_OFST;
0556         goto unlock_wp_offset;
0557     }
0558 
0559     switch (op) {
0560     case REQ_OP_ZONE_APPEND:
0561         rq->__sector += sdkp->zones_wp_offset[zno];
0562         fallthrough;
0563     case REQ_OP_WRITE_ZEROES:
0564     case REQ_OP_WRITE:
0565         if (sdkp->zones_wp_offset[zno] < sd_zbc_zone_sectors(sdkp))
0566             sdkp->zones_wp_offset[zno] +=
0567                         good_bytes >> SECTOR_SHIFT;
0568         break;
0569     case REQ_OP_ZONE_RESET:
0570         sdkp->zones_wp_offset[zno] = 0;
0571         break;
0572     case REQ_OP_ZONE_FINISH:
0573         sdkp->zones_wp_offset[zno] = sd_zbc_zone_sectors(sdkp);
0574         break;
0575     case REQ_OP_ZONE_RESET_ALL:
0576         memset(sdkp->zones_wp_offset, 0,
0577                sdkp->zone_info.nr_zones * sizeof(unsigned int));
0578         break;
0579     default:
0580         break;
0581     }
0582 
0583 unlock_wp_offset:
0584     spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags);
0585 
0586     return good_bytes;
0587 }
0588 
0589 /**
0590  * sd_zbc_complete - ZBC command post processing.
0591  * @cmd: Completed command
0592  * @good_bytes: Command reply bytes
0593  * @sshdr: command sense header
0594  *
0595  * Called from sd_done() to handle zone commands errors and updates to the
0596  * device queue zone write pointer offset cahce.
0597  */
0598 unsigned int sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
0599              struct scsi_sense_hdr *sshdr)
0600 {
0601     int result = cmd->result;
0602     struct request *rq = scsi_cmd_to_rq(cmd);
0603 
0604     if (op_is_zone_mgmt(req_op(rq)) &&
0605         result &&
0606         sshdr->sense_key == ILLEGAL_REQUEST &&
0607         sshdr->asc == 0x24) {
0608         /*
0609          * INVALID FIELD IN CDB error: a zone management command was
0610          * attempted on a conventional zone. Nothing to worry about,
0611          * so be quiet about the error.
0612          */
0613         rq->rq_flags |= RQF_QUIET;
0614     } else if (sd_zbc_need_zone_wp_update(rq))
0615         good_bytes = sd_zbc_zone_wp_update(cmd, good_bytes);
0616 
0617     if (req_op(rq) == REQ_OP_ZONE_APPEND)
0618         blk_req_zone_write_unlock(rq);
0619 
0620     return good_bytes;
0621 }
0622 
0623 /**
0624  * sd_zbc_check_zoned_characteristics - Check zoned block device characteristics
0625  * @sdkp: Target disk
0626  * @buf: Buffer where to store the VPD page data
0627  *
0628  * Read VPD page B6, get information and check that reads are unconstrained.
0629  */
0630 static int sd_zbc_check_zoned_characteristics(struct scsi_disk *sdkp,
0631                           unsigned char *buf)
0632 {
0633     u64 zone_starting_lba_gran;
0634 
0635     if (scsi_get_vpd_page(sdkp->device, 0xb6, buf, 64)) {
0636         sd_printk(KERN_NOTICE, sdkp,
0637               "Read zoned characteristics VPD page failed\n");
0638         return -ENODEV;
0639     }
0640 
0641     if (sdkp->device->type != TYPE_ZBC) {
0642         /* Host-aware */
0643         sdkp->urswrz = 1;
0644         sdkp->zones_optimal_open = get_unaligned_be32(&buf[8]);
0645         sdkp->zones_optimal_nonseq = get_unaligned_be32(&buf[12]);
0646         sdkp->zones_max_open = 0;
0647         return 0;
0648     }
0649 
0650     /* Host-managed */
0651     sdkp->urswrz = buf[4] & 1;
0652     sdkp->zones_optimal_open = 0;
0653     sdkp->zones_optimal_nonseq = 0;
0654     sdkp->zones_max_open = get_unaligned_be32(&buf[16]);
0655     /* Check zone alignment method */
0656     switch (buf[23] & 0xf) {
0657     case 0:
0658     case ZBC_CONSTANT_ZONE_LENGTH:
0659         /* Use zone length */
0660         break;
0661     case ZBC_CONSTANT_ZONE_START_OFFSET:
0662         zone_starting_lba_gran = get_unaligned_be64(&buf[24]);
0663         if (zone_starting_lba_gran == 0 ||
0664             !is_power_of_2(zone_starting_lba_gran) ||
0665             logical_to_sectors(sdkp->device, zone_starting_lba_gran) >
0666             UINT_MAX) {
0667             sd_printk(KERN_ERR, sdkp,
0668                   "Invalid zone starting LBA granularity %llu\n",
0669                   zone_starting_lba_gran);
0670             return -ENODEV;
0671         }
0672         sdkp->zone_starting_lba_gran = zone_starting_lba_gran;
0673         break;
0674     default:
0675         sd_printk(KERN_ERR, sdkp, "Invalid zone alignment method\n");
0676         return -ENODEV;
0677     }
0678 
0679     /*
0680      * Check for unconstrained reads: host-managed devices with
0681      * constrained reads (drives failing read after write pointer)
0682      * are not supported.
0683      */
0684     if (!sdkp->urswrz) {
0685         if (sdkp->first_scan)
0686             sd_printk(KERN_NOTICE, sdkp,
0687               "constrained reads devices are not supported\n");
0688         return -ENODEV;
0689     }
0690 
0691     return 0;
0692 }
0693 
0694 /**
0695  * sd_zbc_check_capacity - Check the device capacity
0696  * @sdkp: Target disk
0697  * @buf: command buffer
0698  * @zblocks: zone size in logical blocks
0699  *
0700  * Get the device zone size and check that the device capacity as reported
0701  * by READ CAPACITY matches the max_lba value (plus one) of the report zones
0702  * command reply for devices with RC_BASIS == 0.
0703  *
0704  * Returns 0 upon success or an error code upon failure.
0705  */
0706 static int sd_zbc_check_capacity(struct scsi_disk *sdkp, unsigned char *buf,
0707                  u32 *zblocks)
0708 {
0709     u64 zone_blocks;
0710     sector_t max_lba;
0711     unsigned char *rec;
0712     int ret;
0713 
0714     /* Do a report zone to get max_lba and the size of the first zone */
0715     ret = sd_zbc_do_report_zones(sdkp, buf, SD_BUF_SIZE, 0, false);
0716     if (ret)
0717         return ret;
0718 
0719     if (sdkp->rc_basis == 0) {
0720         /* The max_lba field is the capacity of this device */
0721         max_lba = get_unaligned_be64(&buf[8]);
0722         if (sdkp->capacity != max_lba + 1) {
0723             if (sdkp->first_scan)
0724                 sd_printk(KERN_WARNING, sdkp,
0725                     "Changing capacity from %llu to max LBA+1 %llu\n",
0726                     (unsigned long long)sdkp->capacity,
0727                     (unsigned long long)max_lba + 1);
0728             sdkp->capacity = max_lba + 1;
0729         }
0730     }
0731 
0732     if (sdkp->zone_starting_lba_gran == 0) {
0733         /* Get the size of the first reported zone */
0734         rec = buf + 64;
0735         zone_blocks = get_unaligned_be64(&rec[8]);
0736         if (logical_to_sectors(sdkp->device, zone_blocks) > UINT_MAX) {
0737             if (sdkp->first_scan)
0738                 sd_printk(KERN_NOTICE, sdkp,
0739                       "Zone size too large\n");
0740             return -EFBIG;
0741         }
0742     } else {
0743         zone_blocks = sdkp->zone_starting_lba_gran;
0744     }
0745 
0746     if (!is_power_of_2(zone_blocks)) {
0747         sd_printk(KERN_ERR, sdkp,
0748               "Zone size %llu is not a power of two.\n",
0749               zone_blocks);
0750         return -EINVAL;
0751     }
0752 
0753     *zblocks = zone_blocks;
0754 
0755     return 0;
0756 }
0757 
0758 static void sd_zbc_print_zones(struct scsi_disk *sdkp)
0759 {
0760     if (!sd_is_zoned(sdkp) || !sdkp->capacity)
0761         return;
0762 
0763     if (sdkp->capacity & (sdkp->zone_info.zone_blocks - 1))
0764         sd_printk(KERN_NOTICE, sdkp,
0765               "%u zones of %u logical blocks + 1 runt zone\n",
0766               sdkp->zone_info.nr_zones - 1,
0767               sdkp->zone_info.zone_blocks);
0768     else
0769         sd_printk(KERN_NOTICE, sdkp,
0770               "%u zones of %u logical blocks\n",
0771               sdkp->zone_info.nr_zones,
0772               sdkp->zone_info.zone_blocks);
0773 }
0774 
0775 static int sd_zbc_init_disk(struct scsi_disk *sdkp)
0776 {
0777     sdkp->zones_wp_offset = NULL;
0778     spin_lock_init(&sdkp->zones_wp_offset_lock);
0779     sdkp->rev_wp_offset = NULL;
0780     mutex_init(&sdkp->rev_mutex);
0781     INIT_WORK(&sdkp->zone_wp_offset_work, sd_zbc_update_wp_offset_workfn);
0782     sdkp->zone_wp_update_buf = kzalloc(SD_BUF_SIZE, GFP_KERNEL);
0783     if (!sdkp->zone_wp_update_buf)
0784         return -ENOMEM;
0785 
0786     return 0;
0787 }
0788 
0789 void sd_zbc_free_zone_info(struct scsi_disk *sdkp)
0790 {
0791     if (!sdkp->zone_wp_update_buf)
0792         return;
0793 
0794     /* Serialize against revalidate zones */
0795     mutex_lock(&sdkp->rev_mutex);
0796 
0797     kvfree(sdkp->zones_wp_offset);
0798     sdkp->zones_wp_offset = NULL;
0799     kfree(sdkp->zone_wp_update_buf);
0800     sdkp->zone_wp_update_buf = NULL;
0801 
0802     sdkp->early_zone_info = (struct zoned_disk_info){ };
0803     sdkp->zone_info = (struct zoned_disk_info){ };
0804 
0805     mutex_unlock(&sdkp->rev_mutex);
0806 }
0807 
0808 static void sd_zbc_revalidate_zones_cb(struct gendisk *disk)
0809 {
0810     struct scsi_disk *sdkp = scsi_disk(disk);
0811 
0812     swap(sdkp->zones_wp_offset, sdkp->rev_wp_offset);
0813 }
0814 
0815 /*
0816  * Call blk_revalidate_disk_zones() if any of the zoned disk properties have
0817  * changed that make it necessary to call that function. Called by
0818  * sd_revalidate_disk() after the gendisk capacity has been set.
0819  */
0820 int sd_zbc_revalidate_zones(struct scsi_disk *sdkp)
0821 {
0822     struct gendisk *disk = sdkp->disk;
0823     struct request_queue *q = disk->queue;
0824     u32 zone_blocks = sdkp->early_zone_info.zone_blocks;
0825     unsigned int nr_zones = sdkp->early_zone_info.nr_zones;
0826     u32 max_append;
0827     int ret = 0;
0828     unsigned int flags;
0829 
0830     /*
0831      * For all zoned disks, initialize zone append emulation data if not
0832      * already done. This is necessary also for host-aware disks used as
0833      * regular disks due to the presence of partitions as these partitions
0834      * may be deleted and the disk zoned model changed back from
0835      * BLK_ZONED_NONE to BLK_ZONED_HA.
0836      */
0837     if (sd_is_zoned(sdkp) && !sdkp->zone_wp_update_buf) {
0838         ret = sd_zbc_init_disk(sdkp);
0839         if (ret)
0840             return ret;
0841     }
0842 
0843     /*
0844      * There is nothing to do for regular disks, including host-aware disks
0845      * that have partitions.
0846      */
0847     if (!blk_queue_is_zoned(q))
0848         return 0;
0849 
0850     /*
0851      * Make sure revalidate zones are serialized to ensure exclusive
0852      * updates of the scsi disk data.
0853      */
0854     mutex_lock(&sdkp->rev_mutex);
0855 
0856     if (sdkp->zone_info.zone_blocks == zone_blocks &&
0857         sdkp->zone_info.nr_zones == nr_zones &&
0858         disk->nr_zones == nr_zones)
0859         goto unlock;
0860 
0861     flags = memalloc_noio_save();
0862     sdkp->zone_info.zone_blocks = zone_blocks;
0863     sdkp->zone_info.nr_zones = nr_zones;
0864     sdkp->rev_wp_offset = kvcalloc(nr_zones, sizeof(u32), GFP_KERNEL);
0865     if (!sdkp->rev_wp_offset) {
0866         ret = -ENOMEM;
0867         memalloc_noio_restore(flags);
0868         goto unlock;
0869     }
0870 
0871     ret = blk_revalidate_disk_zones(disk, sd_zbc_revalidate_zones_cb);
0872 
0873     memalloc_noio_restore(flags);
0874     kvfree(sdkp->rev_wp_offset);
0875     sdkp->rev_wp_offset = NULL;
0876 
0877     if (ret) {
0878         sdkp->zone_info = (struct zoned_disk_info){ };
0879         sdkp->capacity = 0;
0880         goto unlock;
0881     }
0882 
0883     max_append = min_t(u32, logical_to_sectors(sdkp->device, zone_blocks),
0884                q->limits.max_segments << (PAGE_SHIFT - 9));
0885     max_append = min_t(u32, max_append, queue_max_hw_sectors(q));
0886 
0887     blk_queue_max_zone_append_sectors(q, max_append);
0888 
0889     sd_zbc_print_zones(sdkp);
0890 
0891 unlock:
0892     mutex_unlock(&sdkp->rev_mutex);
0893 
0894     return ret;
0895 }
0896 
0897 /**
0898  * sd_zbc_read_zones - Read zone information and update the request queue
0899  * @sdkp: SCSI disk pointer.
0900  * @buf: 512 byte buffer used for storing SCSI command output.
0901  *
0902  * Read zone information and update the request queue zone characteristics and
0903  * also the zoned device information in *sdkp. Called by sd_revalidate_disk()
0904  * before the gendisk capacity has been set.
0905  */
0906 int sd_zbc_read_zones(struct scsi_disk *sdkp, u8 buf[SD_BUF_SIZE])
0907 {
0908     struct gendisk *disk = sdkp->disk;
0909     struct request_queue *q = disk->queue;
0910     unsigned int nr_zones;
0911     u32 zone_blocks = 0;
0912     int ret;
0913 
0914     if (!sd_is_zoned(sdkp)) {
0915         /*
0916          * Device managed or normal SCSI disk, no special handling
0917          * required. Nevertheless, free the disk zone information in
0918          * case the device type changed.
0919          */
0920         sd_zbc_free_zone_info(sdkp);
0921         return 0;
0922     }
0923 
0924     /* READ16/WRITE16 is mandatory for ZBC disks */
0925     sdkp->device->use_16_for_rw = 1;
0926     sdkp->device->use_10_for_rw = 0;
0927 
0928     if (!blk_queue_is_zoned(q)) {
0929         /*
0930          * This can happen for a host aware disk with partitions.
0931          * The block device zone model was already cleared by
0932          * disk_set_zoned(). Only free the scsi disk zone
0933          * information and exit early.
0934          */
0935         sd_zbc_free_zone_info(sdkp);
0936         return 0;
0937     }
0938 
0939     /* Check zoned block device characteristics (unconstrained reads) */
0940     ret = sd_zbc_check_zoned_characteristics(sdkp, buf);
0941     if (ret)
0942         goto err;
0943 
0944     /* Check the device capacity reported by report zones */
0945     ret = sd_zbc_check_capacity(sdkp, buf, &zone_blocks);
0946     if (ret != 0)
0947         goto err;
0948 
0949     /* The drive satisfies the kernel restrictions: set it up */
0950     blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
0951     blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE);
0952     if (sdkp->zones_max_open == U32_MAX)
0953         disk_set_max_open_zones(disk, 0);
0954     else
0955         disk_set_max_open_zones(disk, sdkp->zones_max_open);
0956     disk_set_max_active_zones(disk, 0);
0957     nr_zones = round_up(sdkp->capacity, zone_blocks) >> ilog2(zone_blocks);
0958 
0959     /*
0960      * Per ZBC and ZAC specifications, writes in sequential write required
0961      * zones of host-managed devices must be aligned to the device physical
0962      * block size.
0963      */
0964     if (blk_queue_zoned_model(q) == BLK_ZONED_HM)
0965         blk_queue_zone_write_granularity(q, sdkp->physical_block_size);
0966 
0967     sdkp->early_zone_info.nr_zones = nr_zones;
0968     sdkp->early_zone_info.zone_blocks = zone_blocks;
0969 
0970     return 0;
0971 
0972 err:
0973     sdkp->capacity = 0;
0974 
0975     return ret;
0976 }