0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012 #include <linux/init.h>
0013 #include <linux/initrd.h>
0014 #include <linux/module.h>
0015 #include <linux/moduleparam.h>
0016 #include <linux/major.h>
0017 #include <linux/blkdev.h>
0018 #include <linux/bio.h>
0019 #include <linux/highmem.h>
0020 #include <linux/mutex.h>
0021 #include <linux/pagemap.h>
0022 #include <linux/radix-tree.h>
0023 #include <linux/fs.h>
0024 #include <linux/slab.h>
0025 #include <linux/backing-dev.h>
0026 #include <linux/debugfs.h>
0027
0028 #include <linux/uaccess.h>
0029
0030
0031
0032
0033
0034
0035
0036
0037 struct brd_device {
0038 int brd_number;
0039 struct gendisk *brd_disk;
0040 struct list_head brd_list;
0041
0042
0043
0044
0045
0046 spinlock_t brd_lock;
0047 struct radix_tree_root brd_pages;
0048 u64 brd_nr_pages;
0049 };
0050
0051
0052
0053
0054 static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector)
0055 {
0056 pgoff_t idx;
0057 struct page *page;
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068
0069
0070 rcu_read_lock();
0071 idx = sector >> PAGE_SECTORS_SHIFT;
0072 page = radix_tree_lookup(&brd->brd_pages, idx);
0073 rcu_read_unlock();
0074
0075 BUG_ON(page && page->index != idx);
0076
0077 return page;
0078 }
0079
0080
0081
0082
0083
0084
0085 static struct page *brd_insert_page(struct brd_device *brd, sector_t sector)
0086 {
0087 pgoff_t idx;
0088 struct page *page;
0089 gfp_t gfp_flags;
0090
0091 page = brd_lookup_page(brd, sector);
0092 if (page)
0093 return page;
0094
0095
0096
0097
0098
0099 gfp_flags = GFP_NOIO | __GFP_ZERO | __GFP_HIGHMEM;
0100 page = alloc_page(gfp_flags);
0101 if (!page)
0102 return NULL;
0103
0104 if (radix_tree_preload(GFP_NOIO)) {
0105 __free_page(page);
0106 return NULL;
0107 }
0108
0109 spin_lock(&brd->brd_lock);
0110 idx = sector >> PAGE_SECTORS_SHIFT;
0111 page->index = idx;
0112 if (radix_tree_insert(&brd->brd_pages, idx, page)) {
0113 __free_page(page);
0114 page = radix_tree_lookup(&brd->brd_pages, idx);
0115 BUG_ON(!page);
0116 BUG_ON(page->index != idx);
0117 } else {
0118 brd->brd_nr_pages++;
0119 }
0120 spin_unlock(&brd->brd_lock);
0121
0122 radix_tree_preload_end();
0123
0124 return page;
0125 }
0126
0127
0128
0129
0130
0131 #define FREE_BATCH 16
0132 static void brd_free_pages(struct brd_device *brd)
0133 {
0134 unsigned long pos = 0;
0135 struct page *pages[FREE_BATCH];
0136 int nr_pages;
0137
0138 do {
0139 int i;
0140
0141 nr_pages = radix_tree_gang_lookup(&brd->brd_pages,
0142 (void **)pages, pos, FREE_BATCH);
0143
0144 for (i = 0; i < nr_pages; i++) {
0145 void *ret;
0146
0147 BUG_ON(pages[i]->index < pos);
0148 pos = pages[i]->index;
0149 ret = radix_tree_delete(&brd->brd_pages, pos);
0150 BUG_ON(!ret || ret != pages[i]);
0151 __free_page(pages[i]);
0152 }
0153
0154 pos++;
0155
0156
0157
0158
0159
0160 cond_resched();
0161
0162
0163
0164
0165
0166
0167 } while (nr_pages == FREE_BATCH);
0168 }
0169
0170
0171
0172
0173 static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n)
0174 {
0175 unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT;
0176 size_t copy;
0177
0178 copy = min_t(size_t, n, PAGE_SIZE - offset);
0179 if (!brd_insert_page(brd, sector))
0180 return -ENOSPC;
0181 if (copy < n) {
0182 sector += copy >> SECTOR_SHIFT;
0183 if (!brd_insert_page(brd, sector))
0184 return -ENOSPC;
0185 }
0186 return 0;
0187 }
0188
0189
0190
0191
0192 static void copy_to_brd(struct brd_device *brd, const void *src,
0193 sector_t sector, size_t n)
0194 {
0195 struct page *page;
0196 void *dst;
0197 unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT;
0198 size_t copy;
0199
0200 copy = min_t(size_t, n, PAGE_SIZE - offset);
0201 page = brd_lookup_page(brd, sector);
0202 BUG_ON(!page);
0203
0204 dst = kmap_atomic(page);
0205 memcpy(dst + offset, src, copy);
0206 kunmap_atomic(dst);
0207
0208 if (copy < n) {
0209 src += copy;
0210 sector += copy >> SECTOR_SHIFT;
0211 copy = n - copy;
0212 page = brd_lookup_page(brd, sector);
0213 BUG_ON(!page);
0214
0215 dst = kmap_atomic(page);
0216 memcpy(dst, src, copy);
0217 kunmap_atomic(dst);
0218 }
0219 }
0220
0221
0222
0223
0224 static void copy_from_brd(void *dst, struct brd_device *brd,
0225 sector_t sector, size_t n)
0226 {
0227 struct page *page;
0228 void *src;
0229 unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT;
0230 size_t copy;
0231
0232 copy = min_t(size_t, n, PAGE_SIZE - offset);
0233 page = brd_lookup_page(brd, sector);
0234 if (page) {
0235 src = kmap_atomic(page);
0236 memcpy(dst, src + offset, copy);
0237 kunmap_atomic(src);
0238 } else
0239 memset(dst, 0, copy);
0240
0241 if (copy < n) {
0242 dst += copy;
0243 sector += copy >> SECTOR_SHIFT;
0244 copy = n - copy;
0245 page = brd_lookup_page(brd, sector);
0246 if (page) {
0247 src = kmap_atomic(page);
0248 memcpy(dst, src, copy);
0249 kunmap_atomic(src);
0250 } else
0251 memset(dst, 0, copy);
0252 }
0253 }
0254
0255
0256
0257
0258 static int brd_do_bvec(struct brd_device *brd, struct page *page,
0259 unsigned int len, unsigned int off, enum req_op op,
0260 sector_t sector)
0261 {
0262 void *mem;
0263 int err = 0;
0264
0265 if (op_is_write(op)) {
0266 err = copy_to_brd_setup(brd, sector, len);
0267 if (err)
0268 goto out;
0269 }
0270
0271 mem = kmap_atomic(page);
0272 if (!op_is_write(op)) {
0273 copy_from_brd(mem + off, brd, sector, len);
0274 flush_dcache_page(page);
0275 } else {
0276 flush_dcache_page(page);
0277 copy_to_brd(brd, mem + off, sector, len);
0278 }
0279 kunmap_atomic(mem);
0280
0281 out:
0282 return err;
0283 }
0284
0285 static void brd_submit_bio(struct bio *bio)
0286 {
0287 struct brd_device *brd = bio->bi_bdev->bd_disk->private_data;
0288 sector_t sector = bio->bi_iter.bi_sector;
0289 struct bio_vec bvec;
0290 struct bvec_iter iter;
0291
0292 bio_for_each_segment(bvec, bio, iter) {
0293 unsigned int len = bvec.bv_len;
0294 int err;
0295
0296
0297 WARN_ON_ONCE((bvec.bv_offset & (SECTOR_SIZE - 1)) ||
0298 (len & (SECTOR_SIZE - 1)));
0299
0300 err = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset,
0301 bio_op(bio), sector);
0302 if (err) {
0303 bio_io_error(bio);
0304 return;
0305 }
0306 sector += len >> SECTOR_SHIFT;
0307 }
0308
0309 bio_endio(bio);
0310 }
0311
0312 static int brd_rw_page(struct block_device *bdev, sector_t sector,
0313 struct page *page, enum req_op op)
0314 {
0315 struct brd_device *brd = bdev->bd_disk->private_data;
0316 int err;
0317
0318 if (PageTransHuge(page))
0319 return -ENOTSUPP;
0320 err = brd_do_bvec(brd, page, PAGE_SIZE, 0, op, sector);
0321 page_endio(page, op_is_write(op), err);
0322 return err;
0323 }
0324
0325 static const struct block_device_operations brd_fops = {
0326 .owner = THIS_MODULE,
0327 .submit_bio = brd_submit_bio,
0328 .rw_page = brd_rw_page,
0329 };
0330
0331
0332
0333
0334 static int rd_nr = CONFIG_BLK_DEV_RAM_COUNT;
0335 module_param(rd_nr, int, 0444);
0336 MODULE_PARM_DESC(rd_nr, "Maximum number of brd devices");
0337
0338 unsigned long rd_size = CONFIG_BLK_DEV_RAM_SIZE;
0339 module_param(rd_size, ulong, 0444);
0340 MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes.");
0341
0342 static int max_part = 1;
0343 module_param(max_part, int, 0444);
0344 MODULE_PARM_DESC(max_part, "Num Minors to reserve between devices");
0345
0346 MODULE_LICENSE("GPL");
0347 MODULE_ALIAS_BLOCKDEV_MAJOR(RAMDISK_MAJOR);
0348 MODULE_ALIAS("rd");
0349
0350 #ifndef MODULE
0351
0352 static int __init ramdisk_size(char *str)
0353 {
0354 rd_size = simple_strtol(str, NULL, 0);
0355 return 1;
0356 }
0357 __setup("ramdisk_size=", ramdisk_size);
0358 #endif
0359
0360
0361
0362
0363
0364 static LIST_HEAD(brd_devices);
0365 static struct dentry *brd_debugfs_dir;
0366
0367 static int brd_alloc(int i)
0368 {
0369 struct brd_device *brd;
0370 struct gendisk *disk;
0371 char buf[DISK_NAME_LEN];
0372 int err = -ENOMEM;
0373
0374 list_for_each_entry(brd, &brd_devices, brd_list)
0375 if (brd->brd_number == i)
0376 return -EEXIST;
0377 brd = kzalloc(sizeof(*brd), GFP_KERNEL);
0378 if (!brd)
0379 return -ENOMEM;
0380 brd->brd_number = i;
0381 list_add_tail(&brd->brd_list, &brd_devices);
0382
0383 spin_lock_init(&brd->brd_lock);
0384 INIT_RADIX_TREE(&brd->brd_pages, GFP_ATOMIC);
0385
0386 snprintf(buf, DISK_NAME_LEN, "ram%d", i);
0387 if (!IS_ERR_OR_NULL(brd_debugfs_dir))
0388 debugfs_create_u64(buf, 0444, brd_debugfs_dir,
0389 &brd->brd_nr_pages);
0390
0391 disk = brd->brd_disk = blk_alloc_disk(NUMA_NO_NODE);
0392 if (!disk)
0393 goto out_free_dev;
0394
0395 disk->major = RAMDISK_MAJOR;
0396 disk->first_minor = i * max_part;
0397 disk->minors = max_part;
0398 disk->fops = &brd_fops;
0399 disk->private_data = brd;
0400 strlcpy(disk->disk_name, buf, DISK_NAME_LEN);
0401 set_capacity(disk, rd_size * 2);
0402
0403
0404
0405
0406
0407
0408
0409
0410 blk_queue_physical_block_size(disk->queue, PAGE_SIZE);
0411
0412
0413 blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
0414 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue);
0415 err = add_disk(disk);
0416 if (err)
0417 goto out_cleanup_disk;
0418
0419 return 0;
0420
0421 out_cleanup_disk:
0422 put_disk(disk);
0423 out_free_dev:
0424 list_del(&brd->brd_list);
0425 kfree(brd);
0426 return err;
0427 }
0428
0429 static void brd_probe(dev_t dev)
0430 {
0431 brd_alloc(MINOR(dev) / max_part);
0432 }
0433
0434 static void brd_cleanup(void)
0435 {
0436 struct brd_device *brd, *next;
0437
0438 debugfs_remove_recursive(brd_debugfs_dir);
0439
0440 list_for_each_entry_safe(brd, next, &brd_devices, brd_list) {
0441 del_gendisk(brd->brd_disk);
0442 put_disk(brd->brd_disk);
0443 brd_free_pages(brd);
0444 list_del(&brd->brd_list);
0445 kfree(brd);
0446 }
0447 }
0448
0449 static inline void brd_check_and_reset_par(void)
0450 {
0451 if (unlikely(!max_part))
0452 max_part = 1;
0453
0454
0455
0456
0457
0458 if ((1U << MINORBITS) % max_part != 0)
0459 max_part = 1UL << fls(max_part);
0460
0461 if (max_part > DISK_MAX_PARTS) {
0462 pr_info("brd: max_part can't be larger than %d, reset max_part = %d.\n",
0463 DISK_MAX_PARTS, DISK_MAX_PARTS);
0464 max_part = DISK_MAX_PARTS;
0465 }
0466 }
0467
0468 static int __init brd_init(void)
0469 {
0470 int err, i;
0471
0472 brd_check_and_reset_par();
0473
0474 brd_debugfs_dir = debugfs_create_dir("ramdisk_pages", NULL);
0475
0476 for (i = 0; i < rd_nr; i++) {
0477 err = brd_alloc(i);
0478 if (err)
0479 goto out_free;
0480 }
0481
0482
0483
0484
0485
0486
0487
0488
0489
0490
0491
0492
0493
0494
0495
0496
0497 if (__register_blkdev(RAMDISK_MAJOR, "ramdisk", brd_probe)) {
0498 err = -EIO;
0499 goto out_free;
0500 }
0501
0502 pr_info("brd: module loaded\n");
0503 return 0;
0504
0505 out_free:
0506 brd_cleanup();
0507
0508 pr_info("brd: module NOT loaded !!!\n");
0509 return err;
0510 }
0511
0512 static void __exit brd_exit(void)
0513 {
0514
0515 unregister_blkdev(RAMDISK_MAJOR, "ramdisk");
0516 brd_cleanup();
0517
0518 pr_info("brd: module unloaded\n");
0519 }
0520
0521 module_init(brd_init);
0522 module_exit(brd_exit);
0523