Back to home page

LXR

 
 

    


0001 /*
0002  *  Code extracted from drivers/block/genhd.c
0003  *  Copyright (C) 1991-1998  Linus Torvalds
0004  *  Re-organised Feb 1998 Russell King
0005  *
0006  *  We now have independent partition support from the
0007  *  block drivers, which allows all the partition code to
0008  *  be grouped in one location, and it to be mostly self
0009  *  contained.
0010  */
0011 
0012 #include <linux/init.h>
0013 #include <linux/module.h>
0014 #include <linux/fs.h>
0015 #include <linux/slab.h>
0016 #include <linux/kmod.h>
0017 #include <linux/ctype.h>
0018 #include <linux/genhd.h>
0019 #include <linux/dax.h>
0020 #include <linux/blktrace_api.h>
0021 
0022 #include "partitions/check.h"
0023 
0024 #ifdef CONFIG_BLK_DEV_MD
0025 extern void md_autodetect_dev(dev_t dev);
0026 #endif
0027  
0028 /*
0029  * disk_name() is used by partition check code and the genhd driver.
0030  * It formats the devicename of the indicated disk into
0031  * the supplied buffer (of size at least 32), and returns
0032  * a pointer to that same buffer (for convenience).
0033  */
0034 
0035 char *disk_name(struct gendisk *hd, int partno, char *buf)
0036 {
0037     if (!partno)
0038         snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name);
0039     else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1]))
0040         snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno);
0041     else
0042         snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno);
0043 
0044     return buf;
0045 }
0046 
0047 const char *bdevname(struct block_device *bdev, char *buf)
0048 {
0049     return disk_name(bdev->bd_disk, bdev->bd_part->partno, buf);
0050 }
0051 
0052 EXPORT_SYMBOL(bdevname);
0053 
0054 /*
0055  * There's very little reason to use this, you should really
0056  * have a struct block_device just about everywhere and use
0057  * bdevname() instead.
0058  */
0059 const char *__bdevname(dev_t dev, char *buffer)
0060 {
0061     scnprintf(buffer, BDEVNAME_SIZE, "unknown-block(%u,%u)",
0062                 MAJOR(dev), MINOR(dev));
0063     return buffer;
0064 }
0065 
0066 EXPORT_SYMBOL(__bdevname);
0067 
0068 static ssize_t part_partition_show(struct device *dev,
0069                    struct device_attribute *attr, char *buf)
0070 {
0071     struct hd_struct *p = dev_to_part(dev);
0072 
0073     return sprintf(buf, "%d\n", p->partno);
0074 }
0075 
0076 static ssize_t part_start_show(struct device *dev,
0077                    struct device_attribute *attr, char *buf)
0078 {
0079     struct hd_struct *p = dev_to_part(dev);
0080 
0081     return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect);
0082 }
0083 
0084 ssize_t part_size_show(struct device *dev,
0085                struct device_attribute *attr, char *buf)
0086 {
0087     struct hd_struct *p = dev_to_part(dev);
0088     return sprintf(buf, "%llu\n",(unsigned long long)part_nr_sects_read(p));
0089 }
0090 
0091 static ssize_t part_ro_show(struct device *dev,
0092                 struct device_attribute *attr, char *buf)
0093 {
0094     struct hd_struct *p = dev_to_part(dev);
0095     return sprintf(buf, "%d\n", p->policy ? 1 : 0);
0096 }
0097 
0098 static ssize_t part_alignment_offset_show(struct device *dev,
0099                       struct device_attribute *attr, char *buf)
0100 {
0101     struct hd_struct *p = dev_to_part(dev);
0102     return sprintf(buf, "%llu\n", (unsigned long long)p->alignment_offset);
0103 }
0104 
0105 static ssize_t part_discard_alignment_show(struct device *dev,
0106                        struct device_attribute *attr, char *buf)
0107 {
0108     struct hd_struct *p = dev_to_part(dev);
0109     return sprintf(buf, "%u\n", p->discard_alignment);
0110 }
0111 
0112 ssize_t part_stat_show(struct device *dev,
0113                struct device_attribute *attr, char *buf)
0114 {
0115     struct hd_struct *p = dev_to_part(dev);
0116     int cpu;
0117 
0118     cpu = part_stat_lock();
0119     part_round_stats(cpu, p);
0120     part_stat_unlock();
0121     return sprintf(buf,
0122         "%8lu %8lu %8llu %8u "
0123         "%8lu %8lu %8llu %8u "
0124         "%8u %8u %8u"
0125         "\n",
0126         part_stat_read(p, ios[READ]),
0127         part_stat_read(p, merges[READ]),
0128         (unsigned long long)part_stat_read(p, sectors[READ]),
0129         jiffies_to_msecs(part_stat_read(p, ticks[READ])),
0130         part_stat_read(p, ios[WRITE]),
0131         part_stat_read(p, merges[WRITE]),
0132         (unsigned long long)part_stat_read(p, sectors[WRITE]),
0133         jiffies_to_msecs(part_stat_read(p, ticks[WRITE])),
0134         part_in_flight(p),
0135         jiffies_to_msecs(part_stat_read(p, io_ticks)),
0136         jiffies_to_msecs(part_stat_read(p, time_in_queue)));
0137 }
0138 
0139 ssize_t part_inflight_show(struct device *dev,
0140             struct device_attribute *attr, char *buf)
0141 {
0142     struct hd_struct *p = dev_to_part(dev);
0143 
0144     return sprintf(buf, "%8u %8u\n", atomic_read(&p->in_flight[0]),
0145         atomic_read(&p->in_flight[1]));
0146 }
0147 
0148 #ifdef CONFIG_FAIL_MAKE_REQUEST
0149 ssize_t part_fail_show(struct device *dev,
0150                struct device_attribute *attr, char *buf)
0151 {
0152     struct hd_struct *p = dev_to_part(dev);
0153 
0154     return sprintf(buf, "%d\n", p->make_it_fail);
0155 }
0156 
0157 ssize_t part_fail_store(struct device *dev,
0158             struct device_attribute *attr,
0159             const char *buf, size_t count)
0160 {
0161     struct hd_struct *p = dev_to_part(dev);
0162     int i;
0163 
0164     if (count > 0 && sscanf(buf, "%d", &i) > 0)
0165         p->make_it_fail = (i == 0) ? 0 : 1;
0166 
0167     return count;
0168 }
0169 #endif
0170 
0171 static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL);
0172 static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL);
0173 static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
0174 static DEVICE_ATTR(ro, S_IRUGO, part_ro_show, NULL);
0175 static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL);
0176 static DEVICE_ATTR(discard_alignment, S_IRUGO, part_discard_alignment_show,
0177            NULL);
0178 static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
0179 static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
0180 #ifdef CONFIG_FAIL_MAKE_REQUEST
0181 static struct device_attribute dev_attr_fail =
0182     __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
0183 #endif
0184 
0185 static struct attribute *part_attrs[] = {
0186     &dev_attr_partition.attr,
0187     &dev_attr_start.attr,
0188     &dev_attr_size.attr,
0189     &dev_attr_ro.attr,
0190     &dev_attr_alignment_offset.attr,
0191     &dev_attr_discard_alignment.attr,
0192     &dev_attr_stat.attr,
0193     &dev_attr_inflight.attr,
0194 #ifdef CONFIG_FAIL_MAKE_REQUEST
0195     &dev_attr_fail.attr,
0196 #endif
0197     NULL
0198 };
0199 
0200 static struct attribute_group part_attr_group = {
0201     .attrs = part_attrs,
0202 };
0203 
0204 static const struct attribute_group *part_attr_groups[] = {
0205     &part_attr_group,
0206 #ifdef CONFIG_BLK_DEV_IO_TRACE
0207     &blk_trace_attr_group,
0208 #endif
0209     NULL
0210 };
0211 
0212 static void part_release(struct device *dev)
0213 {
0214     struct hd_struct *p = dev_to_part(dev);
0215     blk_free_devt(dev->devt);
0216     hd_free_part(p);
0217     kfree(p);
0218 }
0219 
0220 static int part_uevent(struct device *dev, struct kobj_uevent_env *env)
0221 {
0222     struct hd_struct *part = dev_to_part(dev);
0223 
0224     add_uevent_var(env, "PARTN=%u", part->partno);
0225     if (part->info && part->info->volname[0])
0226         add_uevent_var(env, "PARTNAME=%s", part->info->volname);
0227     return 0;
0228 }
0229 
0230 struct device_type part_type = {
0231     .name       = "partition",
0232     .groups     = part_attr_groups,
0233     .release    = part_release,
0234     .uevent     = part_uevent,
0235 };
0236 
0237 static void delete_partition_rcu_cb(struct rcu_head *head)
0238 {
0239     struct hd_struct *part = container_of(head, struct hd_struct, rcu_head);
0240 
0241     part->start_sect = 0;
0242     part->nr_sects = 0;
0243     part_stat_set_all(part, 0);
0244     put_device(part_to_dev(part));
0245 }
0246 
0247 void __delete_partition(struct percpu_ref *ref)
0248 {
0249     struct hd_struct *part = container_of(ref, struct hd_struct, ref);
0250     call_rcu(&part->rcu_head, delete_partition_rcu_cb);
0251 }
0252 
0253 void delete_partition(struct gendisk *disk, int partno)
0254 {
0255     struct disk_part_tbl *ptbl = disk->part_tbl;
0256     struct hd_struct *part;
0257 
0258     if (partno >= ptbl->len)
0259         return;
0260 
0261     part = ptbl->part[partno];
0262     if (!part)
0263         return;
0264 
0265     rcu_assign_pointer(ptbl->part[partno], NULL);
0266     rcu_assign_pointer(ptbl->last_lookup, NULL);
0267     kobject_put(part->holder_dir);
0268     device_del(part_to_dev(part));
0269 
0270     hd_struct_kill(part);
0271 }
0272 
0273 static ssize_t whole_disk_show(struct device *dev,
0274                    struct device_attribute *attr, char *buf)
0275 {
0276     return 0;
0277 }
0278 static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH,
0279            whole_disk_show, NULL);
0280 
0281 struct hd_struct *add_partition(struct gendisk *disk, int partno,
0282                 sector_t start, sector_t len, int flags,
0283                 struct partition_meta_info *info)
0284 {
0285     struct hd_struct *p;
0286     dev_t devt = MKDEV(0, 0);
0287     struct device *ddev = disk_to_dev(disk);
0288     struct device *pdev;
0289     struct disk_part_tbl *ptbl;
0290     const char *dname;
0291     int err;
0292 
0293     err = disk_expand_part_tbl(disk, partno);
0294     if (err)
0295         return ERR_PTR(err);
0296     ptbl = disk->part_tbl;
0297 
0298     if (ptbl->part[partno])
0299         return ERR_PTR(-EBUSY);
0300 
0301     p = kzalloc(sizeof(*p), GFP_KERNEL);
0302     if (!p)
0303         return ERR_PTR(-EBUSY);
0304 
0305     if (!init_part_stats(p)) {
0306         err = -ENOMEM;
0307         goto out_free;
0308     }
0309 
0310     seqcount_init(&p->nr_sects_seq);
0311     pdev = part_to_dev(p);
0312 
0313     p->start_sect = start;
0314     p->alignment_offset =
0315         queue_limit_alignment_offset(&disk->queue->limits, start);
0316     p->discard_alignment =
0317         queue_limit_discard_alignment(&disk->queue->limits, start);
0318     p->nr_sects = len;
0319     p->partno = partno;
0320     p->policy = get_disk_ro(disk);
0321 
0322     if (info) {
0323         struct partition_meta_info *pinfo = alloc_part_info(disk);
0324         if (!pinfo)
0325             goto out_free_stats;
0326         memcpy(pinfo, info, sizeof(*info));
0327         p->info = pinfo;
0328     }
0329 
0330     dname = dev_name(ddev);
0331     if (isdigit(dname[strlen(dname) - 1]))
0332         dev_set_name(pdev, "%sp%d", dname, partno);
0333     else
0334         dev_set_name(pdev, "%s%d", dname, partno);
0335 
0336     device_initialize(pdev);
0337     pdev->class = &block_class;
0338     pdev->type = &part_type;
0339     pdev->parent = ddev;
0340 
0341     err = blk_alloc_devt(p, &devt);
0342     if (err)
0343         goto out_free_info;
0344     pdev->devt = devt;
0345 
0346     /* delay uevent until 'holders' subdir is created */
0347     dev_set_uevent_suppress(pdev, 1);
0348     err = device_add(pdev);
0349     if (err)
0350         goto out_put;
0351 
0352     err = -ENOMEM;
0353     p->holder_dir = kobject_create_and_add("holders", &pdev->kobj);
0354     if (!p->holder_dir)
0355         goto out_del;
0356 
0357     dev_set_uevent_suppress(pdev, 0);
0358     if (flags & ADDPART_FLAG_WHOLEDISK) {
0359         err = device_create_file(pdev, &dev_attr_whole_disk);
0360         if (err)
0361             goto out_del;
0362     }
0363 
0364     err = hd_ref_init(p);
0365     if (err) {
0366         if (flags & ADDPART_FLAG_WHOLEDISK)
0367             goto out_remove_file;
0368         goto out_del;
0369     }
0370 
0371     /* everything is up and running, commence */
0372     rcu_assign_pointer(ptbl->part[partno], p);
0373 
0374     /* suppress uevent if the disk suppresses it */
0375     if (!dev_get_uevent_suppress(ddev))
0376         kobject_uevent(&pdev->kobj, KOBJ_ADD);
0377     return p;
0378 
0379 out_free_info:
0380     free_part_info(p);
0381 out_free_stats:
0382     free_part_stats(p);
0383 out_free:
0384     kfree(p);
0385     return ERR_PTR(err);
0386 out_remove_file:
0387     device_remove_file(pdev, &dev_attr_whole_disk);
0388 out_del:
0389     kobject_put(p->holder_dir);
0390     device_del(pdev);
0391 out_put:
0392     put_device(pdev);
0393     blk_free_devt(devt);
0394     return ERR_PTR(err);
0395 }
0396 
0397 static bool disk_unlock_native_capacity(struct gendisk *disk)
0398 {
0399     const struct block_device_operations *bdops = disk->fops;
0400 
0401     if (bdops->unlock_native_capacity &&
0402         !(disk->flags & GENHD_FL_NATIVE_CAPACITY)) {
0403         printk(KERN_CONT "enabling native capacity\n");
0404         bdops->unlock_native_capacity(disk);
0405         disk->flags |= GENHD_FL_NATIVE_CAPACITY;
0406         return true;
0407     } else {
0408         printk(KERN_CONT "truncated\n");
0409         return false;
0410     }
0411 }
0412 
0413 static int drop_partitions(struct gendisk *disk, struct block_device *bdev)
0414 {
0415     struct disk_part_iter piter;
0416     struct hd_struct *part;
0417     int res;
0418 
0419     if (bdev->bd_part_count || bdev->bd_super)
0420         return -EBUSY;
0421     res = invalidate_partition(disk, 0);
0422     if (res)
0423         return res;
0424 
0425     disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY);
0426     while ((part = disk_part_iter_next(&piter)))
0427         delete_partition(disk, part->partno);
0428     disk_part_iter_exit(&piter);
0429 
0430     return 0;
0431 }
0432 
0433 static bool part_zone_aligned(struct gendisk *disk,
0434                   struct block_device *bdev,
0435                   sector_t from, sector_t size)
0436 {
0437     unsigned int zone_sectors = bdev_zone_sectors(bdev);
0438 
0439     /*
0440      * If this function is called, then the disk is a zoned block device
0441      * (host-aware or host-managed). This can be detected even if the
0442      * zoned block device support is disabled (CONFIG_BLK_DEV_ZONED not
0443      * set). In this case, however, only host-aware devices will be seen
0444      * as a block device is not created for host-managed devices. Without
0445      * zoned block device support, host-aware drives can still be used as
0446      * regular block devices (no zone operation) and their zone size will
0447      * be reported as 0. Allow this case.
0448      */
0449     if (!zone_sectors)
0450         return true;
0451 
0452     /*
0453      * Check partition start and size alignement. If the drive has a
0454      * smaller last runt zone, ignore it and allow the partition to
0455      * use it. Check the zone size too: it should be a power of 2 number
0456      * of sectors.
0457      */
0458     if (WARN_ON_ONCE(!is_power_of_2(zone_sectors))) {
0459         u32 rem;
0460 
0461         div_u64_rem(from, zone_sectors, &rem);
0462         if (rem)
0463             return false;
0464         if ((from + size) < get_capacity(disk)) {
0465             div_u64_rem(size, zone_sectors, &rem);
0466             if (rem)
0467                 return false;
0468         }
0469 
0470     } else {
0471 
0472         if (from & (zone_sectors - 1))
0473             return false;
0474         if ((from + size) < get_capacity(disk) &&
0475             (size & (zone_sectors - 1)))
0476             return false;
0477 
0478     }
0479 
0480     return true;
0481 }
0482 
0483 int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
0484 {
0485     struct parsed_partitions *state = NULL;
0486     struct hd_struct *part;
0487     int p, highest, res;
0488 rescan:
0489     if (state && !IS_ERR(state)) {
0490         free_partitions(state);
0491         state = NULL;
0492     }
0493 
0494     res = drop_partitions(disk, bdev);
0495     if (res)
0496         return res;
0497 
0498     if (disk->fops->revalidate_disk)
0499         disk->fops->revalidate_disk(disk);
0500     blk_integrity_revalidate(disk);
0501     check_disk_size_change(disk, bdev);
0502     bdev->bd_invalidated = 0;
0503     if (!get_capacity(disk) || !(state = check_partition(disk, bdev)))
0504         return 0;
0505     if (IS_ERR(state)) {
0506         /*
0507          * I/O error reading the partition table.  If any
0508          * partition code tried to read beyond EOD, retry
0509          * after unlocking native capacity.
0510          */
0511         if (PTR_ERR(state) == -ENOSPC) {
0512             printk(KERN_WARNING "%s: partition table beyond EOD, ",
0513                    disk->disk_name);
0514             if (disk_unlock_native_capacity(disk))
0515                 goto rescan;
0516         }
0517         return -EIO;
0518     }
0519     /*
0520      * If any partition code tried to read beyond EOD, try
0521      * unlocking native capacity even if partition table is
0522      * successfully read as we could be missing some partitions.
0523      */
0524     if (state->access_beyond_eod) {
0525         printk(KERN_WARNING
0526                "%s: partition table partially beyond EOD, ",
0527                disk->disk_name);
0528         if (disk_unlock_native_capacity(disk))
0529             goto rescan;
0530     }
0531 
0532     /* tell userspace that the media / partition table may have changed */
0533     kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
0534 
0535     /* Detect the highest partition number and preallocate
0536      * disk->part_tbl.  This is an optimization and not strictly
0537      * necessary.
0538      */
0539     for (p = 1, highest = 0; p < state->limit; p++)
0540         if (state->parts[p].size)
0541             highest = p;
0542 
0543     disk_expand_part_tbl(disk, highest);
0544 
0545     /* add partitions */
0546     for (p = 1; p < state->limit; p++) {
0547         sector_t size, from;
0548 
0549         size = state->parts[p].size;
0550         if (!size)
0551             continue;
0552 
0553         from = state->parts[p].from;
0554         if (from >= get_capacity(disk)) {
0555             printk(KERN_WARNING
0556                    "%s: p%d start %llu is beyond EOD, ",
0557                    disk->disk_name, p, (unsigned long long) from);
0558             if (disk_unlock_native_capacity(disk))
0559                 goto rescan;
0560             continue;
0561         }
0562 
0563         if (from + size > get_capacity(disk)) {
0564             printk(KERN_WARNING
0565                    "%s: p%d size %llu extends beyond EOD, ",
0566                    disk->disk_name, p, (unsigned long long) size);
0567 
0568             if (disk_unlock_native_capacity(disk)) {
0569                 /* free state and restart */
0570                 goto rescan;
0571             } else {
0572                 /*
0573                  * we can not ignore partitions of broken tables
0574                  * created by for example camera firmware, but
0575                  * we limit them to the end of the disk to avoid
0576                  * creating invalid block devices
0577                  */
0578                 size = get_capacity(disk) - from;
0579             }
0580         }
0581 
0582         /*
0583          * On a zoned block device, partitions should be aligned on the
0584          * device zone size (i.e. zone boundary crossing not allowed).
0585          * Otherwise, resetting the write pointer of the last zone of
0586          * one partition may impact the following partition.
0587          */
0588         if (bdev_is_zoned(bdev) &&
0589             !part_zone_aligned(disk, bdev, from, size)) {
0590             printk(KERN_WARNING
0591                    "%s: p%d start %llu+%llu is not zone aligned\n",
0592                    disk->disk_name, p, (unsigned long long) from,
0593                    (unsigned long long) size);
0594             continue;
0595         }
0596 
0597         part = add_partition(disk, p, from, size,
0598                      state->parts[p].flags,
0599                      &state->parts[p].info);
0600         if (IS_ERR(part)) {
0601             printk(KERN_ERR " %s: p%d could not be added: %ld\n",
0602                    disk->disk_name, p, -PTR_ERR(part));
0603             continue;
0604         }
0605 #ifdef CONFIG_BLK_DEV_MD
0606         if (state->parts[p].flags & ADDPART_FLAG_RAID)
0607             md_autodetect_dev(part_to_dev(part)->devt);
0608 #endif
0609     }
0610     free_partitions(state);
0611     return 0;
0612 }
0613 
0614 int invalidate_partitions(struct gendisk *disk, struct block_device *bdev)
0615 {
0616     int res;
0617 
0618     if (!bdev->bd_invalidated)
0619         return 0;
0620 
0621     res = drop_partitions(disk, bdev);
0622     if (res)
0623         return res;
0624 
0625     set_capacity(disk, 0);
0626     check_disk_size_change(disk, bdev);
0627     bdev->bd_invalidated = 0;
0628     /* tell userspace that the media / partition table may have changed */
0629     kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
0630 
0631     return 0;
0632 }
0633 
0634 static struct page *read_pagecache_sector(struct block_device *bdev, sector_t n)
0635 {
0636     struct address_space *mapping = bdev->bd_inode->i_mapping;
0637 
0638     return read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_SHIFT-9)),
0639                  NULL);
0640 }
0641 
0642 unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
0643 {
0644     struct page *page;
0645 
0646     /* don't populate page cache for dax capable devices */
0647     if (IS_DAX(bdev->bd_inode))
0648         page = read_dax_sector(bdev, n);
0649     else
0650         page = read_pagecache_sector(bdev, n);
0651 
0652     if (!IS_ERR(page)) {
0653         if (PageError(page))
0654             goto fail;
0655         p->v = page;
0656         return (unsigned char *)page_address(page) +  ((n & ((1 << (PAGE_SHIFT - 9)) - 1)) << 9);
0657 fail:
0658         put_page(page);
0659     }
0660     p->v = NULL;
0661     return NULL;
0662 }
0663 
0664 EXPORT_SYMBOL(read_dev_sector);