0001
0002
0003
0004
0005
0006
0007
0008 #include <linux/module.h>
0009 #include <linux/ctype.h>
0010 #include <linux/fs.h>
0011 #include <linux/kdev_t.h>
0012 #include <linux/kernel.h>
0013 #include <linux/blkdev.h>
0014 #include <linux/backing-dev.h>
0015 #include <linux/init.h>
0016 #include <linux/spinlock.h>
0017 #include <linux/proc_fs.h>
0018 #include <linux/seq_file.h>
0019 #include <linux/slab.h>
0020 #include <linux/kmod.h>
0021 #include <linux/major.h>
0022 #include <linux/mutex.h>
0023 #include <linux/idr.h>
0024 #include <linux/log2.h>
0025 #include <linux/pm_runtime.h>
0026 #include <linux/badblocks.h>
0027 #include <linux/part_stat.h>
0028 #include "blk-throttle.h"
0029
0030 #include "blk.h"
0031 #include "blk-mq-sched.h"
0032 #include "blk-rq-qos.h"
0033 #include "blk-cgroup.h"
0034
0035 static struct kobject *block_depr;
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052 static atomic64_t diskseq;
0053
0054
0055 #define NR_EXT_DEVT (1 << MINORBITS)
0056 static DEFINE_IDA(ext_devt_ida);
0057
0058 void set_capacity(struct gendisk *disk, sector_t sectors)
0059 {
0060 struct block_device *bdev = disk->part0;
0061
0062 spin_lock(&bdev->bd_size_lock);
0063 i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT);
0064 bdev->bd_nr_sectors = sectors;
0065 spin_unlock(&bdev->bd_size_lock);
0066 }
0067 EXPORT_SYMBOL(set_capacity);
0068
0069
0070
0071
0072
0073 bool set_capacity_and_notify(struct gendisk *disk, sector_t size)
0074 {
0075 sector_t capacity = get_capacity(disk);
0076 char *envp[] = { "RESIZE=1", NULL };
0077
0078 set_capacity(disk, size);
0079
0080
0081
0082
0083
0084
0085 if (size == capacity ||
0086 !disk_live(disk) ||
0087 (disk->flags & GENHD_FL_HIDDEN))
0088 return false;
0089
0090 pr_info("%s: detected capacity change from %lld to %lld\n",
0091 disk->disk_name, capacity, size);
0092
0093
0094
0095
0096
0097 if (!capacity || !size)
0098 return false;
0099 kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp);
0100 return true;
0101 }
0102 EXPORT_SYMBOL_GPL(set_capacity_and_notify);
0103
0104 static void part_stat_read_all(struct block_device *part,
0105 struct disk_stats *stat)
0106 {
0107 int cpu;
0108
0109 memset(stat, 0, sizeof(struct disk_stats));
0110 for_each_possible_cpu(cpu) {
0111 struct disk_stats *ptr = per_cpu_ptr(part->bd_stats, cpu);
0112 int group;
0113
0114 for (group = 0; group < NR_STAT_GROUPS; group++) {
0115 stat->nsecs[group] += ptr->nsecs[group];
0116 stat->sectors[group] += ptr->sectors[group];
0117 stat->ios[group] += ptr->ios[group];
0118 stat->merges[group] += ptr->merges[group];
0119 }
0120
0121 stat->io_ticks += ptr->io_ticks;
0122 }
0123 }
0124
0125 static unsigned int part_in_flight(struct block_device *part)
0126 {
0127 unsigned int inflight = 0;
0128 int cpu;
0129
0130 for_each_possible_cpu(cpu) {
0131 inflight += part_stat_local_read_cpu(part, in_flight[0], cpu) +
0132 part_stat_local_read_cpu(part, in_flight[1], cpu);
0133 }
0134 if ((int)inflight < 0)
0135 inflight = 0;
0136
0137 return inflight;
0138 }
0139
0140 static void part_in_flight_rw(struct block_device *part,
0141 unsigned int inflight[2])
0142 {
0143 int cpu;
0144
0145 inflight[0] = 0;
0146 inflight[1] = 0;
0147 for_each_possible_cpu(cpu) {
0148 inflight[0] += part_stat_local_read_cpu(part, in_flight[0], cpu);
0149 inflight[1] += part_stat_local_read_cpu(part, in_flight[1], cpu);
0150 }
0151 if ((int)inflight[0] < 0)
0152 inflight[0] = 0;
0153 if ((int)inflight[1] < 0)
0154 inflight[1] = 0;
0155 }
0156
0157
0158
0159
0160
0161 #define BLKDEV_MAJOR_HASH_SIZE 255
0162 static struct blk_major_name {
0163 struct blk_major_name *next;
0164 int major;
0165 char name[16];
0166 #ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD
0167 void (*probe)(dev_t devt);
0168 #endif
0169 } *major_names[BLKDEV_MAJOR_HASH_SIZE];
0170 static DEFINE_MUTEX(major_names_lock);
0171 static DEFINE_SPINLOCK(major_names_spinlock);
0172
0173
0174 static inline int major_to_index(unsigned major)
0175 {
0176 return major % BLKDEV_MAJOR_HASH_SIZE;
0177 }
0178
0179 #ifdef CONFIG_PROC_FS
0180 void blkdev_show(struct seq_file *seqf, off_t offset)
0181 {
0182 struct blk_major_name *dp;
0183
0184 spin_lock(&major_names_spinlock);
0185 for (dp = major_names[major_to_index(offset)]; dp; dp = dp->next)
0186 if (dp->major == offset)
0187 seq_printf(seqf, "%3d %s\n", dp->major, dp->name);
0188 spin_unlock(&major_names_spinlock);
0189 }
0190 #endif
0191
0192
0193
0194
0195
0196
0197
0198
0199
0200
0201
0202
0203
0204
0205
0206
0207
0208
0209
0210
0211
0212
0213
0214
0215
0216
0217
0218 int __register_blkdev(unsigned int major, const char *name,
0219 void (*probe)(dev_t devt))
0220 {
0221 struct blk_major_name **n, *p;
0222 int index, ret = 0;
0223
0224 mutex_lock(&major_names_lock);
0225
0226
0227 if (major == 0) {
0228 for (index = ARRAY_SIZE(major_names)-1; index > 0; index--) {
0229 if (major_names[index] == NULL)
0230 break;
0231 }
0232
0233 if (index == 0) {
0234 printk("%s: failed to get major for %s\n",
0235 __func__, name);
0236 ret = -EBUSY;
0237 goto out;
0238 }
0239 major = index;
0240 ret = major;
0241 }
0242
0243 if (major >= BLKDEV_MAJOR_MAX) {
0244 pr_err("%s: major requested (%u) is greater than the maximum (%u) for %s\n",
0245 __func__, major, BLKDEV_MAJOR_MAX-1, name);
0246
0247 ret = -EINVAL;
0248 goto out;
0249 }
0250
0251 p = kmalloc(sizeof(struct blk_major_name), GFP_KERNEL);
0252 if (p == NULL) {
0253 ret = -ENOMEM;
0254 goto out;
0255 }
0256
0257 p->major = major;
0258 #ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD
0259 p->probe = probe;
0260 #endif
0261 strlcpy(p->name, name, sizeof(p->name));
0262 p->next = NULL;
0263 index = major_to_index(major);
0264
0265 spin_lock(&major_names_spinlock);
0266 for (n = &major_names[index]; *n; n = &(*n)->next) {
0267 if ((*n)->major == major)
0268 break;
0269 }
0270 if (!*n)
0271 *n = p;
0272 else
0273 ret = -EBUSY;
0274 spin_unlock(&major_names_spinlock);
0275
0276 if (ret < 0) {
0277 printk("register_blkdev: cannot get major %u for %s\n",
0278 major, name);
0279 kfree(p);
0280 }
0281 out:
0282 mutex_unlock(&major_names_lock);
0283 return ret;
0284 }
0285 EXPORT_SYMBOL(__register_blkdev);
0286
0287 void unregister_blkdev(unsigned int major, const char *name)
0288 {
0289 struct blk_major_name **n;
0290 struct blk_major_name *p = NULL;
0291 int index = major_to_index(major);
0292
0293 mutex_lock(&major_names_lock);
0294 spin_lock(&major_names_spinlock);
0295 for (n = &major_names[index]; *n; n = &(*n)->next)
0296 if ((*n)->major == major)
0297 break;
0298 if (!*n || strcmp((*n)->name, name)) {
0299 WARN_ON(1);
0300 } else {
0301 p = *n;
0302 *n = p->next;
0303 }
0304 spin_unlock(&major_names_spinlock);
0305 mutex_unlock(&major_names_lock);
0306 kfree(p);
0307 }
0308
0309 EXPORT_SYMBOL(unregister_blkdev);
0310
0311 int blk_alloc_ext_minor(void)
0312 {
0313 int idx;
0314
0315 idx = ida_alloc_range(&ext_devt_ida, 0, NR_EXT_DEVT - 1, GFP_KERNEL);
0316 if (idx == -ENOSPC)
0317 return -EBUSY;
0318 return idx;
0319 }
0320
0321 void blk_free_ext_minor(unsigned int minor)
0322 {
0323 ida_free(&ext_devt_ida, minor);
0324 }
0325
0326 static char *bdevt_str(dev_t devt, char *buf)
0327 {
0328 if (MAJOR(devt) <= 0xff && MINOR(devt) <= 0xff) {
0329 char tbuf[BDEVT_SIZE];
0330 snprintf(tbuf, BDEVT_SIZE, "%02x%02x", MAJOR(devt), MINOR(devt));
0331 snprintf(buf, BDEVT_SIZE, "%-9s", tbuf);
0332 } else
0333 snprintf(buf, BDEVT_SIZE, "%03x:%05x", MAJOR(devt), MINOR(devt));
0334
0335 return buf;
0336 }
0337
0338 void disk_uevent(struct gendisk *disk, enum kobject_action action)
0339 {
0340 struct block_device *part;
0341 unsigned long idx;
0342
0343 rcu_read_lock();
0344 xa_for_each(&disk->part_tbl, idx, part) {
0345 if (bdev_is_partition(part) && !bdev_nr_sectors(part))
0346 continue;
0347 if (!kobject_get_unless_zero(&part->bd_device.kobj))
0348 continue;
0349
0350 rcu_read_unlock();
0351 kobject_uevent(bdev_kobj(part), action);
0352 put_device(&part->bd_device);
0353 rcu_read_lock();
0354 }
0355 rcu_read_unlock();
0356 }
0357 EXPORT_SYMBOL_GPL(disk_uevent);
0358
0359 int disk_scan_partitions(struct gendisk *disk, fmode_t mode)
0360 {
0361 struct block_device *bdev;
0362
0363 if (disk->flags & (GENHD_FL_NO_PART | GENHD_FL_HIDDEN))
0364 return -EINVAL;
0365 if (test_bit(GD_SUPPRESS_PART_SCAN, &disk->state))
0366 return -EINVAL;
0367 if (disk->open_partitions)
0368 return -EBUSY;
0369
0370 set_bit(GD_NEED_PART_SCAN, &disk->state);
0371 bdev = blkdev_get_by_dev(disk_devt(disk), mode, NULL);
0372 if (IS_ERR(bdev))
0373 return PTR_ERR(bdev);
0374 blkdev_put(bdev, mode);
0375 return 0;
0376 }
0377
0378
0379
0380
0381
0382
0383
0384
0385
0386
0387 int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
0388 const struct attribute_group **groups)
0389
0390 {
0391 struct device *ddev = disk_to_dev(disk);
0392 int ret;
0393
0394
0395 if (queue_is_mq(disk->queue) && disk->fops->poll_bio)
0396 return -EINVAL;
0397
0398
0399
0400
0401
0402
0403
0404 elevator_init_mq(disk->queue);
0405
0406
0407
0408
0409
0410
0411
0412
0413 if (disk->major) {
0414 if (WARN_ON(!disk->minors))
0415 return -EINVAL;
0416
0417 if (disk->minors > DISK_MAX_PARTS) {
0418 pr_err("block: can't allocate more than %d partitions\n",
0419 DISK_MAX_PARTS);
0420 disk->minors = DISK_MAX_PARTS;
0421 }
0422 if (disk->first_minor + disk->minors > MINORMASK + 1)
0423 return -EINVAL;
0424 } else {
0425 if (WARN_ON(disk->minors))
0426 return -EINVAL;
0427
0428 ret = blk_alloc_ext_minor();
0429 if (ret < 0)
0430 return ret;
0431 disk->major = BLOCK_EXT_MAJOR;
0432 disk->first_minor = ret;
0433 }
0434
0435
0436 dev_set_uevent_suppress(ddev, 1);
0437
0438 ddev->parent = parent;
0439 ddev->groups = groups;
0440 dev_set_name(ddev, "%s", disk->disk_name);
0441 if (!(disk->flags & GENHD_FL_HIDDEN))
0442 ddev->devt = MKDEV(disk->major, disk->first_minor);
0443 ret = device_add(ddev);
0444 if (ret)
0445 goto out_free_ext_minor;
0446
0447 ret = disk_alloc_events(disk);
0448 if (ret)
0449 goto out_device_del;
0450
0451 if (!sysfs_deprecated) {
0452 ret = sysfs_create_link(block_depr, &ddev->kobj,
0453 kobject_name(&ddev->kobj));
0454 if (ret)
0455 goto out_device_del;
0456 }
0457
0458
0459
0460
0461
0462
0463 pm_runtime_set_memalloc_noio(ddev, true);
0464
0465 ret = blk_integrity_add(disk);
0466 if (ret)
0467 goto out_del_block_link;
0468
0469 disk->part0->bd_holder_dir =
0470 kobject_create_and_add("holders", &ddev->kobj);
0471 if (!disk->part0->bd_holder_dir) {
0472 ret = -ENOMEM;
0473 goto out_del_integrity;
0474 }
0475 disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
0476 if (!disk->slave_dir) {
0477 ret = -ENOMEM;
0478 goto out_put_holder_dir;
0479 }
0480
0481 ret = bd_register_pending_holders(disk);
0482 if (ret < 0)
0483 goto out_put_slave_dir;
0484
0485 ret = blk_register_queue(disk);
0486 if (ret)
0487 goto out_put_slave_dir;
0488
0489 if (!(disk->flags & GENHD_FL_HIDDEN)) {
0490 ret = bdi_register(disk->bdi, "%u:%u",
0491 disk->major, disk->first_minor);
0492 if (ret)
0493 goto out_unregister_queue;
0494 bdi_set_owner(disk->bdi, ddev);
0495 ret = sysfs_create_link(&ddev->kobj,
0496 &disk->bdi->dev->kobj, "bdi");
0497 if (ret)
0498 goto out_unregister_bdi;
0499
0500 bdev_add(disk->part0, ddev->devt);
0501 if (get_capacity(disk))
0502 disk_scan_partitions(disk, FMODE_READ);
0503
0504
0505
0506
0507
0508 dev_set_uevent_suppress(ddev, 0);
0509 disk_uevent(disk, KOBJ_ADD);
0510 }
0511
0512 disk_update_readahead(disk);
0513 disk_add_events(disk);
0514 set_bit(GD_ADDED, &disk->state);
0515 return 0;
0516
0517 out_unregister_bdi:
0518 if (!(disk->flags & GENHD_FL_HIDDEN))
0519 bdi_unregister(disk->bdi);
0520 out_unregister_queue:
0521 blk_unregister_queue(disk);
0522 out_put_slave_dir:
0523 kobject_put(disk->slave_dir);
0524 out_put_holder_dir:
0525 kobject_put(disk->part0->bd_holder_dir);
0526 out_del_integrity:
0527 blk_integrity_del(disk);
0528 out_del_block_link:
0529 if (!sysfs_deprecated)
0530 sysfs_remove_link(block_depr, dev_name(ddev));
0531 out_device_del:
0532 device_del(ddev);
0533 out_free_ext_minor:
0534 if (disk->major == BLOCK_EXT_MAJOR)
0535 blk_free_ext_minor(disk->first_minor);
0536 return ret;
0537 }
0538 EXPORT_SYMBOL(device_add_disk);
0539
0540
0541
0542
0543
0544
0545
0546
0547 void blk_mark_disk_dead(struct gendisk *disk)
0548 {
0549 set_bit(GD_DEAD, &disk->state);
0550 blk_queue_start_drain(disk->queue);
0551 }
0552 EXPORT_SYMBOL_GPL(blk_mark_disk_dead);
0553
0554
0555
0556
0557
0558
0559
0560
0561
0562
0563
0564
0565
0566
0567
0568
0569
0570
0571
0572
0573 void del_gendisk(struct gendisk *disk)
0574 {
0575 struct request_queue *q = disk->queue;
0576
0577 might_sleep();
0578
0579 if (WARN_ON_ONCE(!disk_live(disk) && !(disk->flags & GENHD_FL_HIDDEN)))
0580 return;
0581
0582 blk_integrity_del(disk);
0583 disk_del_events(disk);
0584
0585 mutex_lock(&disk->open_mutex);
0586 remove_inode_hash(disk->part0->bd_inode);
0587 blk_drop_partitions(disk);
0588 mutex_unlock(&disk->open_mutex);
0589
0590 fsync_bdev(disk->part0);
0591 __invalidate_device(disk->part0, true);
0592
0593
0594
0595
0596 set_bit(GD_DEAD, &disk->state);
0597 if (test_bit(GD_OWNS_QUEUE, &disk->state))
0598 blk_queue_flag_set(QUEUE_FLAG_DYING, q);
0599 set_capacity(disk, 0);
0600
0601
0602
0603
0604 blk_queue_start_drain(q);
0605
0606 if (!(disk->flags & GENHD_FL_HIDDEN)) {
0607 sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
0608
0609
0610
0611
0612
0613 bdi_unregister(disk->bdi);
0614 }
0615
0616 blk_unregister_queue(disk);
0617
0618 kobject_put(disk->part0->bd_holder_dir);
0619 kobject_put(disk->slave_dir);
0620
0621 part_stat_set_all(disk->part0, 0);
0622 disk->part0->bd_stamp = 0;
0623 if (!sysfs_deprecated)
0624 sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
0625 pm_runtime_set_memalloc_noio(disk_to_dev(disk), false);
0626 device_del(disk_to_dev(disk));
0627
0628 blk_mq_freeze_queue_wait(q);
0629
0630 blk_throtl_cancel_bios(disk->queue);
0631
0632 blk_sync_queue(q);
0633 blk_flush_integrity();
0634 blk_mq_cancel_work_sync(q);
0635
0636 blk_mq_quiesce_queue(q);
0637 if (q->elevator) {
0638 mutex_lock(&q->sysfs_lock);
0639 elevator_exit(q);
0640 mutex_unlock(&q->sysfs_lock);
0641 }
0642 rq_qos_exit(q);
0643 blk_mq_unquiesce_queue(q);
0644
0645
0646
0647
0648
0649 if (!test_bit(GD_OWNS_QUEUE, &disk->state)) {
0650 blk_queue_flag_clear(QUEUE_FLAG_INIT_DONE, q);
0651 __blk_mq_unfreeze_queue(q, true);
0652 } else {
0653 if (queue_is_mq(q))
0654 blk_mq_exit_queue(q);
0655 }
0656 }
0657 EXPORT_SYMBOL(del_gendisk);
0658
0659
0660
0661
0662
0663
0664
0665
0666
0667
0668
0669 void invalidate_disk(struct gendisk *disk)
0670 {
0671 struct block_device *bdev = disk->part0;
0672
0673 invalidate_bdev(bdev);
0674 bdev->bd_inode->i_mapping->wb_err = 0;
0675 set_capacity(disk, 0);
0676 }
0677 EXPORT_SYMBOL(invalidate_disk);
0678
0679
0680 static ssize_t disk_badblocks_show(struct device *dev,
0681 struct device_attribute *attr,
0682 char *page)
0683 {
0684 struct gendisk *disk = dev_to_disk(dev);
0685
0686 if (!disk->bb)
0687 return sprintf(page, "\n");
0688
0689 return badblocks_show(disk->bb, page, 0);
0690 }
0691
0692 static ssize_t disk_badblocks_store(struct device *dev,
0693 struct device_attribute *attr,
0694 const char *page, size_t len)
0695 {
0696 struct gendisk *disk = dev_to_disk(dev);
0697
0698 if (!disk->bb)
0699 return -ENXIO;
0700
0701 return badblocks_store(disk->bb, page, len, 0);
0702 }
0703
0704 #ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD
0705 void blk_request_module(dev_t devt)
0706 {
0707 unsigned int major = MAJOR(devt);
0708 struct blk_major_name **n;
0709
0710 mutex_lock(&major_names_lock);
0711 for (n = &major_names[major_to_index(major)]; *n; n = &(*n)->next) {
0712 if ((*n)->major == major && (*n)->probe) {
0713 (*n)->probe(devt);
0714 mutex_unlock(&major_names_lock);
0715 return;
0716 }
0717 }
0718 mutex_unlock(&major_names_lock);
0719
0720 if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0)
0721
0722 request_module("block-major-%d", MAJOR(devt));
0723 }
0724 #endif
0725
0726
0727
0728
0729
0730
0731 void __init printk_all_partitions(void)
0732 {
0733 struct class_dev_iter iter;
0734 struct device *dev;
0735
0736 class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
0737 while ((dev = class_dev_iter_next(&iter))) {
0738 struct gendisk *disk = dev_to_disk(dev);
0739 struct block_device *part;
0740 char devt_buf[BDEVT_SIZE];
0741 unsigned long idx;
0742
0743
0744
0745
0746
0747 if (get_capacity(disk) == 0 || (disk->flags & GENHD_FL_HIDDEN))
0748 continue;
0749
0750
0751
0752
0753
0754 rcu_read_lock();
0755 xa_for_each(&disk->part_tbl, idx, part) {
0756 if (!bdev_nr_sectors(part))
0757 continue;
0758 printk("%s%s %10llu %pg %s",
0759 bdev_is_partition(part) ? " " : "",
0760 bdevt_str(part->bd_dev, devt_buf),
0761 bdev_nr_sectors(part) >> 1, part,
0762 part->bd_meta_info ?
0763 part->bd_meta_info->uuid : "");
0764 if (bdev_is_partition(part))
0765 printk("\n");
0766 else if (dev->parent && dev->parent->driver)
0767 printk(" driver: %s\n",
0768 dev->parent->driver->name);
0769 else
0770 printk(" (driver?)\n");
0771 }
0772 rcu_read_unlock();
0773 }
0774 class_dev_iter_exit(&iter);
0775 }
0776
0777 #ifdef CONFIG_PROC_FS
0778
0779 static void *disk_seqf_start(struct seq_file *seqf, loff_t *pos)
0780 {
0781 loff_t skip = *pos;
0782 struct class_dev_iter *iter;
0783 struct device *dev;
0784
0785 iter = kmalloc(sizeof(*iter), GFP_KERNEL);
0786 if (!iter)
0787 return ERR_PTR(-ENOMEM);
0788
0789 seqf->private = iter;
0790 class_dev_iter_init(iter, &block_class, NULL, &disk_type);
0791 do {
0792 dev = class_dev_iter_next(iter);
0793 if (!dev)
0794 return NULL;
0795 } while (skip--);
0796
0797 return dev_to_disk(dev);
0798 }
0799
0800 static void *disk_seqf_next(struct seq_file *seqf, void *v, loff_t *pos)
0801 {
0802 struct device *dev;
0803
0804 (*pos)++;
0805 dev = class_dev_iter_next(seqf->private);
0806 if (dev)
0807 return dev_to_disk(dev);
0808
0809 return NULL;
0810 }
0811
0812 static void disk_seqf_stop(struct seq_file *seqf, void *v)
0813 {
0814 struct class_dev_iter *iter = seqf->private;
0815
0816
0817 if (iter) {
0818 class_dev_iter_exit(iter);
0819 kfree(iter);
0820 seqf->private = NULL;
0821 }
0822 }
0823
0824 static void *show_partition_start(struct seq_file *seqf, loff_t *pos)
0825 {
0826 void *p;
0827
0828 p = disk_seqf_start(seqf, pos);
0829 if (!IS_ERR_OR_NULL(p) && !*pos)
0830 seq_puts(seqf, "major minor #blocks name\n\n");
0831 return p;
0832 }
0833
0834 static int show_partition(struct seq_file *seqf, void *v)
0835 {
0836 struct gendisk *sgp = v;
0837 struct block_device *part;
0838 unsigned long idx;
0839
0840 if (!get_capacity(sgp) || (sgp->flags & GENHD_FL_HIDDEN))
0841 return 0;
0842
0843 rcu_read_lock();
0844 xa_for_each(&sgp->part_tbl, idx, part) {
0845 if (!bdev_nr_sectors(part))
0846 continue;
0847 seq_printf(seqf, "%4d %7d %10llu %pg\n",
0848 MAJOR(part->bd_dev), MINOR(part->bd_dev),
0849 bdev_nr_sectors(part) >> 1, part);
0850 }
0851 rcu_read_unlock();
0852 return 0;
0853 }
0854
0855 static const struct seq_operations partitions_op = {
0856 .start = show_partition_start,
0857 .next = disk_seqf_next,
0858 .stop = disk_seqf_stop,
0859 .show = show_partition
0860 };
0861 #endif
0862
0863 static int __init genhd_device_init(void)
0864 {
0865 int error;
0866
0867 block_class.dev_kobj = sysfs_dev_block_kobj;
0868 error = class_register(&block_class);
0869 if (unlikely(error))
0870 return error;
0871 blk_dev_init();
0872
0873 register_blkdev(BLOCK_EXT_MAJOR, "blkext");
0874
0875
0876 if (!sysfs_deprecated)
0877 block_depr = kobject_create_and_add("block", NULL);
0878 return 0;
0879 }
0880
0881 subsys_initcall(genhd_device_init);
0882
0883 static ssize_t disk_range_show(struct device *dev,
0884 struct device_attribute *attr, char *buf)
0885 {
0886 struct gendisk *disk = dev_to_disk(dev);
0887
0888 return sprintf(buf, "%d\n", disk->minors);
0889 }
0890
0891 static ssize_t disk_ext_range_show(struct device *dev,
0892 struct device_attribute *attr, char *buf)
0893 {
0894 struct gendisk *disk = dev_to_disk(dev);
0895
0896 return sprintf(buf, "%d\n",
0897 (disk->flags & GENHD_FL_NO_PART) ? 1 : DISK_MAX_PARTS);
0898 }
0899
0900 static ssize_t disk_removable_show(struct device *dev,
0901 struct device_attribute *attr, char *buf)
0902 {
0903 struct gendisk *disk = dev_to_disk(dev);
0904
0905 return sprintf(buf, "%d\n",
0906 (disk->flags & GENHD_FL_REMOVABLE ? 1 : 0));
0907 }
0908
0909 static ssize_t disk_hidden_show(struct device *dev,
0910 struct device_attribute *attr, char *buf)
0911 {
0912 struct gendisk *disk = dev_to_disk(dev);
0913
0914 return sprintf(buf, "%d\n",
0915 (disk->flags & GENHD_FL_HIDDEN ? 1 : 0));
0916 }
0917
0918 static ssize_t disk_ro_show(struct device *dev,
0919 struct device_attribute *attr, char *buf)
0920 {
0921 struct gendisk *disk = dev_to_disk(dev);
0922
0923 return sprintf(buf, "%d\n", get_disk_ro(disk) ? 1 : 0);
0924 }
0925
0926 ssize_t part_size_show(struct device *dev,
0927 struct device_attribute *attr, char *buf)
0928 {
0929 return sprintf(buf, "%llu\n", bdev_nr_sectors(dev_to_bdev(dev)));
0930 }
0931
0932 ssize_t part_stat_show(struct device *dev,
0933 struct device_attribute *attr, char *buf)
0934 {
0935 struct block_device *bdev = dev_to_bdev(dev);
0936 struct request_queue *q = bdev_get_queue(bdev);
0937 struct disk_stats stat;
0938 unsigned int inflight;
0939
0940 if (queue_is_mq(q))
0941 inflight = blk_mq_in_flight(q, bdev);
0942 else
0943 inflight = part_in_flight(bdev);
0944
0945 if (inflight) {
0946 part_stat_lock();
0947 update_io_ticks(bdev, jiffies, true);
0948 part_stat_unlock();
0949 }
0950 part_stat_read_all(bdev, &stat);
0951 return sprintf(buf,
0952 "%8lu %8lu %8llu %8u "
0953 "%8lu %8lu %8llu %8u "
0954 "%8u %8u %8u "
0955 "%8lu %8lu %8llu %8u "
0956 "%8lu %8u"
0957 "\n",
0958 stat.ios[STAT_READ],
0959 stat.merges[STAT_READ],
0960 (unsigned long long)stat.sectors[STAT_READ],
0961 (unsigned int)div_u64(stat.nsecs[STAT_READ], NSEC_PER_MSEC),
0962 stat.ios[STAT_WRITE],
0963 stat.merges[STAT_WRITE],
0964 (unsigned long long)stat.sectors[STAT_WRITE],
0965 (unsigned int)div_u64(stat.nsecs[STAT_WRITE], NSEC_PER_MSEC),
0966 inflight,
0967 jiffies_to_msecs(stat.io_ticks),
0968 (unsigned int)div_u64(stat.nsecs[STAT_READ] +
0969 stat.nsecs[STAT_WRITE] +
0970 stat.nsecs[STAT_DISCARD] +
0971 stat.nsecs[STAT_FLUSH],
0972 NSEC_PER_MSEC),
0973 stat.ios[STAT_DISCARD],
0974 stat.merges[STAT_DISCARD],
0975 (unsigned long long)stat.sectors[STAT_DISCARD],
0976 (unsigned int)div_u64(stat.nsecs[STAT_DISCARD], NSEC_PER_MSEC),
0977 stat.ios[STAT_FLUSH],
0978 (unsigned int)div_u64(stat.nsecs[STAT_FLUSH], NSEC_PER_MSEC));
0979 }
0980
0981 ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
0982 char *buf)
0983 {
0984 struct block_device *bdev = dev_to_bdev(dev);
0985 struct request_queue *q = bdev_get_queue(bdev);
0986 unsigned int inflight[2];
0987
0988 if (queue_is_mq(q))
0989 blk_mq_in_flight_rw(q, bdev, inflight);
0990 else
0991 part_in_flight_rw(bdev, inflight);
0992
0993 return sprintf(buf, "%8u %8u\n", inflight[0], inflight[1]);
0994 }
0995
0996 static ssize_t disk_capability_show(struct device *dev,
0997 struct device_attribute *attr, char *buf)
0998 {
0999 struct gendisk *disk = dev_to_disk(dev);
1000
1001 return sprintf(buf, "%x\n", disk->flags);
1002 }
1003
1004 static ssize_t disk_alignment_offset_show(struct device *dev,
1005 struct device_attribute *attr,
1006 char *buf)
1007 {
1008 struct gendisk *disk = dev_to_disk(dev);
1009
1010 return sprintf(buf, "%d\n", bdev_alignment_offset(disk->part0));
1011 }
1012
1013 static ssize_t disk_discard_alignment_show(struct device *dev,
1014 struct device_attribute *attr,
1015 char *buf)
1016 {
1017 struct gendisk *disk = dev_to_disk(dev);
1018
1019 return sprintf(buf, "%d\n", bdev_alignment_offset(disk->part0));
1020 }
1021
1022 static ssize_t diskseq_show(struct device *dev,
1023 struct device_attribute *attr, char *buf)
1024 {
1025 struct gendisk *disk = dev_to_disk(dev);
1026
1027 return sprintf(buf, "%llu\n", disk->diskseq);
1028 }
1029
1030 static DEVICE_ATTR(range, 0444, disk_range_show, NULL);
1031 static DEVICE_ATTR(ext_range, 0444, disk_ext_range_show, NULL);
1032 static DEVICE_ATTR(removable, 0444, disk_removable_show, NULL);
1033 static DEVICE_ATTR(hidden, 0444, disk_hidden_show, NULL);
1034 static DEVICE_ATTR(ro, 0444, disk_ro_show, NULL);
1035 static DEVICE_ATTR(size, 0444, part_size_show, NULL);
1036 static DEVICE_ATTR(alignment_offset, 0444, disk_alignment_offset_show, NULL);
1037 static DEVICE_ATTR(discard_alignment, 0444, disk_discard_alignment_show, NULL);
1038 static DEVICE_ATTR(capability, 0444, disk_capability_show, NULL);
1039 static DEVICE_ATTR(stat, 0444, part_stat_show, NULL);
1040 static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL);
1041 static DEVICE_ATTR(badblocks, 0644, disk_badblocks_show, disk_badblocks_store);
1042 static DEVICE_ATTR(diskseq, 0444, diskseq_show, NULL);
1043
1044 #ifdef CONFIG_FAIL_MAKE_REQUEST
1045 ssize_t part_fail_show(struct device *dev,
1046 struct device_attribute *attr, char *buf)
1047 {
1048 return sprintf(buf, "%d\n", dev_to_bdev(dev)->bd_make_it_fail);
1049 }
1050
1051 ssize_t part_fail_store(struct device *dev,
1052 struct device_attribute *attr,
1053 const char *buf, size_t count)
1054 {
1055 int i;
1056
1057 if (count > 0 && sscanf(buf, "%d", &i) > 0)
1058 dev_to_bdev(dev)->bd_make_it_fail = i;
1059
1060 return count;
1061 }
1062
1063 static struct device_attribute dev_attr_fail =
1064 __ATTR(make-it-fail, 0644, part_fail_show, part_fail_store);
1065 #endif
1066
1067 #ifdef CONFIG_FAIL_IO_TIMEOUT
1068 static struct device_attribute dev_attr_fail_timeout =
1069 __ATTR(io-timeout-fail, 0644, part_timeout_show, part_timeout_store);
1070 #endif
1071
1072 static struct attribute *disk_attrs[] = {
1073 &dev_attr_range.attr,
1074 &dev_attr_ext_range.attr,
1075 &dev_attr_removable.attr,
1076 &dev_attr_hidden.attr,
1077 &dev_attr_ro.attr,
1078 &dev_attr_size.attr,
1079 &dev_attr_alignment_offset.attr,
1080 &dev_attr_discard_alignment.attr,
1081 &dev_attr_capability.attr,
1082 &dev_attr_stat.attr,
1083 &dev_attr_inflight.attr,
1084 &dev_attr_badblocks.attr,
1085 &dev_attr_events.attr,
1086 &dev_attr_events_async.attr,
1087 &dev_attr_events_poll_msecs.attr,
1088 &dev_attr_diskseq.attr,
1089 #ifdef CONFIG_FAIL_MAKE_REQUEST
1090 &dev_attr_fail.attr,
1091 #endif
1092 #ifdef CONFIG_FAIL_IO_TIMEOUT
1093 &dev_attr_fail_timeout.attr,
1094 #endif
1095 NULL
1096 };
1097
1098 static umode_t disk_visible(struct kobject *kobj, struct attribute *a, int n)
1099 {
1100 struct device *dev = container_of(kobj, typeof(*dev), kobj);
1101 struct gendisk *disk = dev_to_disk(dev);
1102
1103 if (a == &dev_attr_badblocks.attr && !disk->bb)
1104 return 0;
1105 return a->mode;
1106 }
1107
1108 static struct attribute_group disk_attr_group = {
1109 .attrs = disk_attrs,
1110 .is_visible = disk_visible,
1111 };
1112
1113 static const struct attribute_group *disk_attr_groups[] = {
1114 &disk_attr_group,
1115 #ifdef CONFIG_BLK_DEV_IO_TRACE
1116 &blk_trace_attr_group,
1117 #endif
1118 NULL
1119 };
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135 static void disk_release(struct device *dev)
1136 {
1137 struct gendisk *disk = dev_to_disk(dev);
1138
1139 might_sleep();
1140 WARN_ON_ONCE(disk_live(disk));
1141
1142
1143
1144
1145
1146
1147
1148
1149 if (queue_is_mq(disk->queue) &&
1150 test_bit(GD_OWNS_QUEUE, &disk->state) &&
1151 !test_bit(GD_ADDED, &disk->state))
1152 blk_mq_exit_queue(disk->queue);
1153
1154 blkcg_exit_queue(disk->queue);
1155 bioset_exit(&disk->bio_split);
1156
1157 disk_release_events(disk);
1158 kfree(disk->random);
1159 disk_free_zone_bitmaps(disk);
1160 xa_destroy(&disk->part_tbl);
1161
1162 disk->queue->disk = NULL;
1163 blk_put_queue(disk->queue);
1164
1165 if (test_bit(GD_ADDED, &disk->state) && disk->fops->free_disk)
1166 disk->fops->free_disk(disk);
1167
1168 iput(disk->part0->bd_inode);
1169 }
1170
1171 static int block_uevent(struct device *dev, struct kobj_uevent_env *env)
1172 {
1173 struct gendisk *disk = dev_to_disk(dev);
1174
1175 return add_uevent_var(env, "DISKSEQ=%llu", disk->diskseq);
1176 }
1177
1178 struct class block_class = {
1179 .name = "block",
1180 .dev_uevent = block_uevent,
1181 };
1182
1183 static char *block_devnode(struct device *dev, umode_t *mode,
1184 kuid_t *uid, kgid_t *gid)
1185 {
1186 struct gendisk *disk = dev_to_disk(dev);
1187
1188 if (disk->fops->devnode)
1189 return disk->fops->devnode(disk, mode);
1190 return NULL;
1191 }
1192
1193 const struct device_type disk_type = {
1194 .name = "disk",
1195 .groups = disk_attr_groups,
1196 .release = disk_release,
1197 .devnode = block_devnode,
1198 };
1199
1200 #ifdef CONFIG_PROC_FS
1201
1202
1203
1204
1205
1206
1207
1208 static int diskstats_show(struct seq_file *seqf, void *v)
1209 {
1210 struct gendisk *gp = v;
1211 struct block_device *hd;
1212 unsigned int inflight;
1213 struct disk_stats stat;
1214 unsigned long idx;
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224 rcu_read_lock();
1225 xa_for_each(&gp->part_tbl, idx, hd) {
1226 if (bdev_is_partition(hd) && !bdev_nr_sectors(hd))
1227 continue;
1228 if (queue_is_mq(gp->queue))
1229 inflight = blk_mq_in_flight(gp->queue, hd);
1230 else
1231 inflight = part_in_flight(hd);
1232
1233 if (inflight) {
1234 part_stat_lock();
1235 update_io_ticks(hd, jiffies, true);
1236 part_stat_unlock();
1237 }
1238 part_stat_read_all(hd, &stat);
1239 seq_printf(seqf, "%4d %7d %pg "
1240 "%lu %lu %lu %u "
1241 "%lu %lu %lu %u "
1242 "%u %u %u "
1243 "%lu %lu %lu %u "
1244 "%lu %u"
1245 "\n",
1246 MAJOR(hd->bd_dev), MINOR(hd->bd_dev), hd,
1247 stat.ios[STAT_READ],
1248 stat.merges[STAT_READ],
1249 stat.sectors[STAT_READ],
1250 (unsigned int)div_u64(stat.nsecs[STAT_READ],
1251 NSEC_PER_MSEC),
1252 stat.ios[STAT_WRITE],
1253 stat.merges[STAT_WRITE],
1254 stat.sectors[STAT_WRITE],
1255 (unsigned int)div_u64(stat.nsecs[STAT_WRITE],
1256 NSEC_PER_MSEC),
1257 inflight,
1258 jiffies_to_msecs(stat.io_ticks),
1259 (unsigned int)div_u64(stat.nsecs[STAT_READ] +
1260 stat.nsecs[STAT_WRITE] +
1261 stat.nsecs[STAT_DISCARD] +
1262 stat.nsecs[STAT_FLUSH],
1263 NSEC_PER_MSEC),
1264 stat.ios[STAT_DISCARD],
1265 stat.merges[STAT_DISCARD],
1266 stat.sectors[STAT_DISCARD],
1267 (unsigned int)div_u64(stat.nsecs[STAT_DISCARD],
1268 NSEC_PER_MSEC),
1269 stat.ios[STAT_FLUSH],
1270 (unsigned int)div_u64(stat.nsecs[STAT_FLUSH],
1271 NSEC_PER_MSEC)
1272 );
1273 }
1274 rcu_read_unlock();
1275
1276 return 0;
1277 }
1278
1279 static const struct seq_operations diskstats_op = {
1280 .start = disk_seqf_start,
1281 .next = disk_seqf_next,
1282 .stop = disk_seqf_stop,
1283 .show = diskstats_show
1284 };
1285
1286 static int __init proc_genhd_init(void)
1287 {
1288 proc_create_seq("diskstats", 0, NULL, &diskstats_op);
1289 proc_create_seq("partitions", 0, NULL, &partitions_op);
1290 return 0;
1291 }
1292 module_init(proc_genhd_init);
1293 #endif
1294
1295 dev_t part_devt(struct gendisk *disk, u8 partno)
1296 {
1297 struct block_device *part;
1298 dev_t devt = 0;
1299
1300 rcu_read_lock();
1301 part = xa_load(&disk->part_tbl, partno);
1302 if (part)
1303 devt = part->bd_dev;
1304 rcu_read_unlock();
1305
1306 return devt;
1307 }
1308
1309 dev_t blk_lookup_devt(const char *name, int partno)
1310 {
1311 dev_t devt = MKDEV(0, 0);
1312 struct class_dev_iter iter;
1313 struct device *dev;
1314
1315 class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
1316 while ((dev = class_dev_iter_next(&iter))) {
1317 struct gendisk *disk = dev_to_disk(dev);
1318
1319 if (strcmp(dev_name(dev), name))
1320 continue;
1321
1322 if (partno < disk->minors) {
1323
1324
1325
1326 devt = MKDEV(MAJOR(dev->devt),
1327 MINOR(dev->devt) + partno);
1328 } else {
1329 devt = part_devt(disk, partno);
1330 if (devt)
1331 break;
1332 }
1333 }
1334 class_dev_iter_exit(&iter);
1335 return devt;
1336 }
1337
1338 struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
1339 struct lock_class_key *lkclass)
1340 {
1341 struct gendisk *disk;
1342
1343 disk = kzalloc_node(sizeof(struct gendisk), GFP_KERNEL, node_id);
1344 if (!disk)
1345 return NULL;
1346
1347 if (bioset_init(&disk->bio_split, BIO_POOL_SIZE, 0, 0))
1348 goto out_free_disk;
1349
1350 disk->bdi = bdi_alloc(node_id);
1351 if (!disk->bdi)
1352 goto out_free_bioset;
1353
1354
1355 disk->queue = q;
1356
1357 disk->part0 = bdev_alloc(disk, 0);
1358 if (!disk->part0)
1359 goto out_free_bdi;
1360
1361 disk->node_id = node_id;
1362 mutex_init(&disk->open_mutex);
1363 xa_init(&disk->part_tbl);
1364 if (xa_insert(&disk->part_tbl, 0, disk->part0, GFP_KERNEL))
1365 goto out_destroy_part_tbl;
1366
1367 if (blkcg_init_queue(q))
1368 goto out_erase_part0;
1369
1370 rand_initialize_disk(disk);
1371 disk_to_dev(disk)->class = &block_class;
1372 disk_to_dev(disk)->type = &disk_type;
1373 device_initialize(disk_to_dev(disk));
1374 inc_diskseq(disk);
1375 q->disk = disk;
1376 lockdep_init_map(&disk->lockdep_map, "(bio completion)", lkclass, 0);
1377 #ifdef CONFIG_BLOCK_HOLDER_DEPRECATED
1378 INIT_LIST_HEAD(&disk->slave_bdevs);
1379 #endif
1380 return disk;
1381
1382 out_erase_part0:
1383 xa_erase(&disk->part_tbl, 0);
1384 out_destroy_part_tbl:
1385 xa_destroy(&disk->part_tbl);
1386 disk->part0->bd_disk = NULL;
1387 iput(disk->part0->bd_inode);
1388 out_free_bdi:
1389 bdi_put(disk->bdi);
1390 out_free_bioset:
1391 bioset_exit(&disk->bio_split);
1392 out_free_disk:
1393 kfree(disk);
1394 return NULL;
1395 }
1396
1397 struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass)
1398 {
1399 struct request_queue *q;
1400 struct gendisk *disk;
1401
1402 q = blk_alloc_queue(node, false);
1403 if (!q)
1404 return NULL;
1405
1406 disk = __alloc_disk_node(q, node, lkclass);
1407 if (!disk) {
1408 blk_put_queue(q);
1409 return NULL;
1410 }
1411 set_bit(GD_OWNS_QUEUE, &disk->state);
1412 return disk;
1413 }
1414 EXPORT_SYMBOL(__blk_alloc_disk);
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429 void put_disk(struct gendisk *disk)
1430 {
1431 if (disk)
1432 put_device(disk_to_dev(disk));
1433 }
1434 EXPORT_SYMBOL(put_disk);
1435
1436 static void set_disk_ro_uevent(struct gendisk *gd, int ro)
1437 {
1438 char event[] = "DISK_RO=1";
1439 char *envp[] = { event, NULL };
1440
1441 if (!ro)
1442 event[8] = '0';
1443 kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp);
1444 }
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455 void set_disk_ro(struct gendisk *disk, bool read_only)
1456 {
1457 if (read_only) {
1458 if (test_and_set_bit(GD_READ_ONLY, &disk->state))
1459 return;
1460 } else {
1461 if (!test_and_clear_bit(GD_READ_ONLY, &disk->state))
1462 return;
1463 }
1464 set_disk_ro_uevent(disk, read_only);
1465 }
1466 EXPORT_SYMBOL(set_disk_ro);
1467
1468 void inc_diskseq(struct gendisk *disk)
1469 {
1470 disk->diskseq = atomic64_inc_return(&diskseq);
1471 }