0001
0002
0003
0004
0005
0006
0007 #include "dm-thin-metadata.h"
0008 #include "persistent-data/dm-btree.h"
0009 #include "persistent-data/dm-space-map.h"
0010 #include "persistent-data/dm-space-map-disk.h"
0011 #include "persistent-data/dm-transaction-manager.h"
0012
0013 #include <linux/list.h>
0014 #include <linux/device-mapper.h>
0015 #include <linux/workqueue.h>
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068
0069
0070
0071
0072
0073
0074
0075 #define DM_MSG_PREFIX "thin metadata"
0076
0077 #define THIN_SUPERBLOCK_MAGIC 27022010
0078 #define THIN_SUPERBLOCK_LOCATION 0
0079 #define THIN_VERSION 2
0080 #define SECTOR_TO_BLOCK_SHIFT 3
0081
0082
0083
0084
0085
0086
0087
0088
0089
0090 #define THIN_MAX_CONCURRENT_LOCKS 6
0091
0092
0093 #define SPACE_MAP_ROOT_SIZE 128
0094
0095
0096
0097
0098 struct thin_disk_superblock {
0099 __le32 csum;
0100 __le32 flags;
0101 __le64 blocknr;
0102
0103 __u8 uuid[16];
0104 __le64 magic;
0105 __le32 version;
0106 __le32 time;
0107
0108 __le64 trans_id;
0109
0110
0111
0112
0113 __le64 held_root;
0114
0115 __u8 data_space_map_root[SPACE_MAP_ROOT_SIZE];
0116 __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
0117
0118
0119
0120
0121 __le64 data_mapping_root;
0122
0123
0124
0125
0126 __le64 device_details_root;
0127
0128 __le32 data_block_size;
0129
0130 __le32 metadata_block_size;
0131 __le64 metadata_nr_blocks;
0132
0133 __le32 compat_flags;
0134 __le32 compat_ro_flags;
0135 __le32 incompat_flags;
0136 } __packed;
0137
0138 struct disk_device_details {
0139 __le64 mapped_blocks;
0140 __le64 transaction_id;
0141 __le32 creation_time;
0142 __le32 snapshotted_time;
0143 } __packed;
0144
0145 struct dm_pool_metadata {
0146 struct hlist_node hash;
0147
0148 struct block_device *bdev;
0149 struct dm_block_manager *bm;
0150 struct dm_space_map *metadata_sm;
0151 struct dm_space_map *data_sm;
0152 struct dm_transaction_manager *tm;
0153 struct dm_transaction_manager *nb_tm;
0154
0155
0156
0157
0158
0159
0160 struct dm_btree_info info;
0161
0162
0163
0164
0165 struct dm_btree_info nb_info;
0166
0167
0168
0169
0170 struct dm_btree_info tl_info;
0171
0172
0173
0174
0175 struct dm_btree_info bl_info;
0176
0177
0178
0179
0180 struct dm_btree_info details_info;
0181
0182 struct rw_semaphore root_lock;
0183 uint32_t time;
0184 dm_block_t root;
0185 dm_block_t details_root;
0186 struct list_head thin_devices;
0187 uint64_t trans_id;
0188 unsigned long flags;
0189 sector_t data_block_size;
0190
0191
0192
0193
0194
0195
0196
0197 dm_pool_pre_commit_fn pre_commit_fn;
0198 void *pre_commit_context;
0199
0200
0201
0202
0203
0204 dm_block_t metadata_reserve;
0205
0206
0207
0208
0209
0210
0211 bool fail_io:1;
0212
0213
0214
0215
0216
0217
0218 bool in_service:1;
0219
0220
0221
0222
0223
0224 __u8 data_space_map_root[SPACE_MAP_ROOT_SIZE];
0225 __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
0226 };
0227
0228 struct dm_thin_device {
0229 struct list_head list;
0230 struct dm_pool_metadata *pmd;
0231 dm_thin_id id;
0232
0233 int open_count;
0234 bool changed:1;
0235 bool aborted_with_changes:1;
0236 uint64_t mapped_blocks;
0237 uint64_t transaction_id;
0238 uint32_t creation_time;
0239 uint32_t snapshotted_time;
0240 };
0241
0242
0243
0244
0245
0246 #define SUPERBLOCK_CSUM_XOR 160774
0247
0248 static void sb_prepare_for_write(struct dm_block_validator *v,
0249 struct dm_block *b,
0250 size_t block_size)
0251 {
0252 struct thin_disk_superblock *disk_super = dm_block_data(b);
0253
0254 disk_super->blocknr = cpu_to_le64(dm_block_location(b));
0255 disk_super->csum = cpu_to_le32(dm_bm_checksum(&disk_super->flags,
0256 block_size - sizeof(__le32),
0257 SUPERBLOCK_CSUM_XOR));
0258 }
0259
0260 static int sb_check(struct dm_block_validator *v,
0261 struct dm_block *b,
0262 size_t block_size)
0263 {
0264 struct thin_disk_superblock *disk_super = dm_block_data(b);
0265 __le32 csum_le;
0266
0267 if (dm_block_location(b) != le64_to_cpu(disk_super->blocknr)) {
0268 DMERR("sb_check failed: blocknr %llu: "
0269 "wanted %llu", le64_to_cpu(disk_super->blocknr),
0270 (unsigned long long)dm_block_location(b));
0271 return -ENOTBLK;
0272 }
0273
0274 if (le64_to_cpu(disk_super->magic) != THIN_SUPERBLOCK_MAGIC) {
0275 DMERR("sb_check failed: magic %llu: "
0276 "wanted %llu", le64_to_cpu(disk_super->magic),
0277 (unsigned long long)THIN_SUPERBLOCK_MAGIC);
0278 return -EILSEQ;
0279 }
0280
0281 csum_le = cpu_to_le32(dm_bm_checksum(&disk_super->flags,
0282 block_size - sizeof(__le32),
0283 SUPERBLOCK_CSUM_XOR));
0284 if (csum_le != disk_super->csum) {
0285 DMERR("sb_check failed: csum %u: wanted %u",
0286 le32_to_cpu(csum_le), le32_to_cpu(disk_super->csum));
0287 return -EILSEQ;
0288 }
0289
0290 return 0;
0291 }
0292
0293 static struct dm_block_validator sb_validator = {
0294 .name = "superblock",
0295 .prepare_for_write = sb_prepare_for_write,
0296 .check = sb_check
0297 };
0298
0299
0300
0301
0302
0303 static uint64_t pack_block_time(dm_block_t b, uint32_t t)
0304 {
0305 return (b << 24) | t;
0306 }
0307
0308 static void unpack_block_time(uint64_t v, dm_block_t *b, uint32_t *t)
0309 {
0310 *b = v >> 24;
0311 *t = v & ((1 << 24) - 1);
0312 }
0313
0314
0315
0316
0317
0318
0319 typedef int (*run_fn)(struct dm_space_map *, dm_block_t, dm_block_t);
0320
0321 static void with_runs(struct dm_space_map *sm, const __le64 *value_le, unsigned count, run_fn fn)
0322 {
0323 uint64_t b, begin, end;
0324 uint32_t t;
0325 bool in_run = false;
0326 unsigned i;
0327
0328 for (i = 0; i < count; i++, value_le++) {
0329
0330 unpack_block_time(le64_to_cpu(*value_le), &b, &t);
0331
0332 if (in_run) {
0333 if (b == end) {
0334 end++;
0335 } else {
0336 fn(sm, begin, end);
0337 begin = b;
0338 end = b + 1;
0339 }
0340 } else {
0341 in_run = true;
0342 begin = b;
0343 end = b + 1;
0344 }
0345 }
0346
0347 if (in_run)
0348 fn(sm, begin, end);
0349 }
0350
0351 static void data_block_inc(void *context, const void *value_le, unsigned count)
0352 {
0353 with_runs((struct dm_space_map *) context,
0354 (const __le64 *) value_le, count, dm_sm_inc_blocks);
0355 }
0356
0357 static void data_block_dec(void *context, const void *value_le, unsigned count)
0358 {
0359 with_runs((struct dm_space_map *) context,
0360 (const __le64 *) value_le, count, dm_sm_dec_blocks);
0361 }
0362
0363 static int data_block_equal(void *context, const void *value1_le, const void *value2_le)
0364 {
0365 __le64 v1_le, v2_le;
0366 uint64_t b1, b2;
0367 uint32_t t;
0368
0369 memcpy(&v1_le, value1_le, sizeof(v1_le));
0370 memcpy(&v2_le, value2_le, sizeof(v2_le));
0371 unpack_block_time(le64_to_cpu(v1_le), &b1, &t);
0372 unpack_block_time(le64_to_cpu(v2_le), &b2, &t);
0373
0374 return b1 == b2;
0375 }
0376
0377 static void subtree_inc(void *context, const void *value, unsigned count)
0378 {
0379 struct dm_btree_info *info = context;
0380 const __le64 *root_le = value;
0381 unsigned i;
0382
0383 for (i = 0; i < count; i++, root_le++)
0384 dm_tm_inc(info->tm, le64_to_cpu(*root_le));
0385 }
0386
0387 static void subtree_dec(void *context, const void *value, unsigned count)
0388 {
0389 struct dm_btree_info *info = context;
0390 const __le64 *root_le = value;
0391 unsigned i;
0392
0393 for (i = 0; i < count; i++, root_le++)
0394 if (dm_btree_del(info, le64_to_cpu(*root_le)))
0395 DMERR("btree delete failed");
0396 }
0397
0398 static int subtree_equal(void *context, const void *value1_le, const void *value2_le)
0399 {
0400 __le64 v1_le, v2_le;
0401 memcpy(&v1_le, value1_le, sizeof(v1_le));
0402 memcpy(&v2_le, value2_le, sizeof(v2_le));
0403
0404 return v1_le == v2_le;
0405 }
0406
0407
0408
0409
0410
0411
0412
0413 static inline void pmd_write_lock_in_core(struct dm_pool_metadata *pmd)
0414 __acquires(pmd->root_lock)
0415 {
0416 down_write(&pmd->root_lock);
0417 }
0418
0419 static inline void pmd_write_lock(struct dm_pool_metadata *pmd)
0420 {
0421 pmd_write_lock_in_core(pmd);
0422 if (unlikely(!pmd->in_service))
0423 pmd->in_service = true;
0424 }
0425
0426 static inline void pmd_write_unlock(struct dm_pool_metadata *pmd)
0427 __releases(pmd->root_lock)
0428 {
0429 up_write(&pmd->root_lock);
0430 }
0431
0432
0433
0434 static int superblock_lock_zero(struct dm_pool_metadata *pmd,
0435 struct dm_block **sblock)
0436 {
0437 return dm_bm_write_lock_zero(pmd->bm, THIN_SUPERBLOCK_LOCATION,
0438 &sb_validator, sblock);
0439 }
0440
0441 static int superblock_lock(struct dm_pool_metadata *pmd,
0442 struct dm_block **sblock)
0443 {
0444 return dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION,
0445 &sb_validator, sblock);
0446 }
0447
0448 static int __superblock_all_zeroes(struct dm_block_manager *bm, int *result)
0449 {
0450 int r;
0451 unsigned i;
0452 struct dm_block *b;
0453 __le64 *data_le, zero = cpu_to_le64(0);
0454 unsigned block_size = dm_bm_block_size(bm) / sizeof(__le64);
0455
0456
0457
0458
0459 r = dm_bm_read_lock(bm, THIN_SUPERBLOCK_LOCATION, NULL, &b);
0460 if (r)
0461 return r;
0462
0463 data_le = dm_block_data(b);
0464 *result = 1;
0465 for (i = 0; i < block_size; i++) {
0466 if (data_le[i] != zero) {
0467 *result = 0;
0468 break;
0469 }
0470 }
0471
0472 dm_bm_unlock(b);
0473
0474 return 0;
0475 }
0476
0477 static void __setup_btree_details(struct dm_pool_metadata *pmd)
0478 {
0479 pmd->info.tm = pmd->tm;
0480 pmd->info.levels = 2;
0481 pmd->info.value_type.context = pmd->data_sm;
0482 pmd->info.value_type.size = sizeof(__le64);
0483 pmd->info.value_type.inc = data_block_inc;
0484 pmd->info.value_type.dec = data_block_dec;
0485 pmd->info.value_type.equal = data_block_equal;
0486
0487 memcpy(&pmd->nb_info, &pmd->info, sizeof(pmd->nb_info));
0488 pmd->nb_info.tm = pmd->nb_tm;
0489
0490 pmd->tl_info.tm = pmd->tm;
0491 pmd->tl_info.levels = 1;
0492 pmd->tl_info.value_type.context = &pmd->bl_info;
0493 pmd->tl_info.value_type.size = sizeof(__le64);
0494 pmd->tl_info.value_type.inc = subtree_inc;
0495 pmd->tl_info.value_type.dec = subtree_dec;
0496 pmd->tl_info.value_type.equal = subtree_equal;
0497
0498 pmd->bl_info.tm = pmd->tm;
0499 pmd->bl_info.levels = 1;
0500 pmd->bl_info.value_type.context = pmd->data_sm;
0501 pmd->bl_info.value_type.size = sizeof(__le64);
0502 pmd->bl_info.value_type.inc = data_block_inc;
0503 pmd->bl_info.value_type.dec = data_block_dec;
0504 pmd->bl_info.value_type.equal = data_block_equal;
0505
0506 pmd->details_info.tm = pmd->tm;
0507 pmd->details_info.levels = 1;
0508 pmd->details_info.value_type.context = NULL;
0509 pmd->details_info.value_type.size = sizeof(struct disk_device_details);
0510 pmd->details_info.value_type.inc = NULL;
0511 pmd->details_info.value_type.dec = NULL;
0512 pmd->details_info.value_type.equal = NULL;
0513 }
0514
0515 static int save_sm_roots(struct dm_pool_metadata *pmd)
0516 {
0517 int r;
0518 size_t len;
0519
0520 r = dm_sm_root_size(pmd->metadata_sm, &len);
0521 if (r < 0)
0522 return r;
0523
0524 r = dm_sm_copy_root(pmd->metadata_sm, &pmd->metadata_space_map_root, len);
0525 if (r < 0)
0526 return r;
0527
0528 r = dm_sm_root_size(pmd->data_sm, &len);
0529 if (r < 0)
0530 return r;
0531
0532 return dm_sm_copy_root(pmd->data_sm, &pmd->data_space_map_root, len);
0533 }
0534
0535 static void copy_sm_roots(struct dm_pool_metadata *pmd,
0536 struct thin_disk_superblock *disk)
0537 {
0538 memcpy(&disk->metadata_space_map_root,
0539 &pmd->metadata_space_map_root,
0540 sizeof(pmd->metadata_space_map_root));
0541
0542 memcpy(&disk->data_space_map_root,
0543 &pmd->data_space_map_root,
0544 sizeof(pmd->data_space_map_root));
0545 }
0546
0547 static int __write_initial_superblock(struct dm_pool_metadata *pmd)
0548 {
0549 int r;
0550 struct dm_block *sblock;
0551 struct thin_disk_superblock *disk_super;
0552 sector_t bdev_size = bdev_nr_sectors(pmd->bdev);
0553
0554 if (bdev_size > THIN_METADATA_MAX_SECTORS)
0555 bdev_size = THIN_METADATA_MAX_SECTORS;
0556
0557 r = dm_sm_commit(pmd->data_sm);
0558 if (r < 0)
0559 return r;
0560
0561 r = dm_tm_pre_commit(pmd->tm);
0562 if (r < 0)
0563 return r;
0564
0565 r = save_sm_roots(pmd);
0566 if (r < 0)
0567 return r;
0568
0569 r = superblock_lock_zero(pmd, &sblock);
0570 if (r)
0571 return r;
0572
0573 disk_super = dm_block_data(sblock);
0574 disk_super->flags = 0;
0575 memset(disk_super->uuid, 0, sizeof(disk_super->uuid));
0576 disk_super->magic = cpu_to_le64(THIN_SUPERBLOCK_MAGIC);
0577 disk_super->version = cpu_to_le32(THIN_VERSION);
0578 disk_super->time = 0;
0579 disk_super->trans_id = 0;
0580 disk_super->held_root = 0;
0581
0582 copy_sm_roots(pmd, disk_super);
0583
0584 disk_super->data_mapping_root = cpu_to_le64(pmd->root);
0585 disk_super->device_details_root = cpu_to_le64(pmd->details_root);
0586 disk_super->metadata_block_size = cpu_to_le32(THIN_METADATA_BLOCK_SIZE);
0587 disk_super->metadata_nr_blocks = cpu_to_le64(bdev_size >> SECTOR_TO_BLOCK_SHIFT);
0588 disk_super->data_block_size = cpu_to_le32(pmd->data_block_size);
0589
0590 return dm_tm_commit(pmd->tm, sblock);
0591 }
0592
0593 static int __format_metadata(struct dm_pool_metadata *pmd)
0594 {
0595 int r;
0596
0597 r = dm_tm_create_with_sm(pmd->bm, THIN_SUPERBLOCK_LOCATION,
0598 &pmd->tm, &pmd->metadata_sm);
0599 if (r < 0) {
0600 DMERR("tm_create_with_sm failed");
0601 return r;
0602 }
0603
0604 pmd->data_sm = dm_sm_disk_create(pmd->tm, 0);
0605 if (IS_ERR(pmd->data_sm)) {
0606 DMERR("sm_disk_create failed");
0607 r = PTR_ERR(pmd->data_sm);
0608 goto bad_cleanup_tm;
0609 }
0610
0611 pmd->nb_tm = dm_tm_create_non_blocking_clone(pmd->tm);
0612 if (!pmd->nb_tm) {
0613 DMERR("could not create non-blocking clone tm");
0614 r = -ENOMEM;
0615 goto bad_cleanup_data_sm;
0616 }
0617
0618 __setup_btree_details(pmd);
0619
0620 r = dm_btree_empty(&pmd->info, &pmd->root);
0621 if (r < 0)
0622 goto bad_cleanup_nb_tm;
0623
0624 r = dm_btree_empty(&pmd->details_info, &pmd->details_root);
0625 if (r < 0) {
0626 DMERR("couldn't create devices root");
0627 goto bad_cleanup_nb_tm;
0628 }
0629
0630 r = __write_initial_superblock(pmd);
0631 if (r)
0632 goto bad_cleanup_nb_tm;
0633
0634 return 0;
0635
0636 bad_cleanup_nb_tm:
0637 dm_tm_destroy(pmd->nb_tm);
0638 bad_cleanup_data_sm:
0639 dm_sm_destroy(pmd->data_sm);
0640 bad_cleanup_tm:
0641 dm_tm_destroy(pmd->tm);
0642 dm_sm_destroy(pmd->metadata_sm);
0643
0644 return r;
0645 }
0646
0647 static int __check_incompat_features(struct thin_disk_superblock *disk_super,
0648 struct dm_pool_metadata *pmd)
0649 {
0650 uint32_t features;
0651
0652 features = le32_to_cpu(disk_super->incompat_flags) & ~THIN_FEATURE_INCOMPAT_SUPP;
0653 if (features) {
0654 DMERR("could not access metadata due to unsupported optional features (%lx).",
0655 (unsigned long)features);
0656 return -EINVAL;
0657 }
0658
0659
0660
0661
0662 if (bdev_read_only(pmd->bdev))
0663 return 0;
0664
0665 features = le32_to_cpu(disk_super->compat_ro_flags) & ~THIN_FEATURE_COMPAT_RO_SUPP;
0666 if (features) {
0667 DMERR("could not access metadata RDWR due to unsupported optional features (%lx).",
0668 (unsigned long)features);
0669 return -EINVAL;
0670 }
0671
0672 return 0;
0673 }
0674
0675 static int __open_metadata(struct dm_pool_metadata *pmd)
0676 {
0677 int r;
0678 struct dm_block *sblock;
0679 struct thin_disk_superblock *disk_super;
0680
0681 r = dm_bm_read_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION,
0682 &sb_validator, &sblock);
0683 if (r < 0) {
0684 DMERR("couldn't read superblock");
0685 return r;
0686 }
0687
0688 disk_super = dm_block_data(sblock);
0689
0690
0691 if (le32_to_cpu(disk_super->data_block_size) != pmd->data_block_size) {
0692 DMERR("changing the data block size (from %u to %llu) is not supported",
0693 le32_to_cpu(disk_super->data_block_size),
0694 (unsigned long long)pmd->data_block_size);
0695 r = -EINVAL;
0696 goto bad_unlock_sblock;
0697 }
0698
0699 r = __check_incompat_features(disk_super, pmd);
0700 if (r < 0)
0701 goto bad_unlock_sblock;
0702
0703 r = dm_tm_open_with_sm(pmd->bm, THIN_SUPERBLOCK_LOCATION,
0704 disk_super->metadata_space_map_root,
0705 sizeof(disk_super->metadata_space_map_root),
0706 &pmd->tm, &pmd->metadata_sm);
0707 if (r < 0) {
0708 DMERR("tm_open_with_sm failed");
0709 goto bad_unlock_sblock;
0710 }
0711
0712 pmd->data_sm = dm_sm_disk_open(pmd->tm, disk_super->data_space_map_root,
0713 sizeof(disk_super->data_space_map_root));
0714 if (IS_ERR(pmd->data_sm)) {
0715 DMERR("sm_disk_open failed");
0716 r = PTR_ERR(pmd->data_sm);
0717 goto bad_cleanup_tm;
0718 }
0719
0720 pmd->nb_tm = dm_tm_create_non_blocking_clone(pmd->tm);
0721 if (!pmd->nb_tm) {
0722 DMERR("could not create non-blocking clone tm");
0723 r = -ENOMEM;
0724 goto bad_cleanup_data_sm;
0725 }
0726
0727 __setup_btree_details(pmd);
0728 dm_bm_unlock(sblock);
0729
0730 return 0;
0731
0732 bad_cleanup_data_sm:
0733 dm_sm_destroy(pmd->data_sm);
0734 bad_cleanup_tm:
0735 dm_tm_destroy(pmd->tm);
0736 dm_sm_destroy(pmd->metadata_sm);
0737 bad_unlock_sblock:
0738 dm_bm_unlock(sblock);
0739
0740 return r;
0741 }
0742
0743 static int __open_or_format_metadata(struct dm_pool_metadata *pmd, bool format_device)
0744 {
0745 int r, unformatted;
0746
0747 r = __superblock_all_zeroes(pmd->bm, &unformatted);
0748 if (r)
0749 return r;
0750
0751 if (unformatted)
0752 return format_device ? __format_metadata(pmd) : -EPERM;
0753
0754 return __open_metadata(pmd);
0755 }
0756
0757 static int __create_persistent_data_objects(struct dm_pool_metadata *pmd, bool format_device)
0758 {
0759 int r;
0760
0761 pmd->bm = dm_block_manager_create(pmd->bdev, THIN_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
0762 THIN_MAX_CONCURRENT_LOCKS);
0763 if (IS_ERR(pmd->bm)) {
0764 DMERR("could not create block manager");
0765 r = PTR_ERR(pmd->bm);
0766 pmd->bm = NULL;
0767 return r;
0768 }
0769
0770 r = __open_or_format_metadata(pmd, format_device);
0771 if (r) {
0772 dm_block_manager_destroy(pmd->bm);
0773 pmd->bm = NULL;
0774 }
0775
0776 return r;
0777 }
0778
0779 static void __destroy_persistent_data_objects(struct dm_pool_metadata *pmd)
0780 {
0781 dm_sm_destroy(pmd->data_sm);
0782 dm_sm_destroy(pmd->metadata_sm);
0783 dm_tm_destroy(pmd->nb_tm);
0784 dm_tm_destroy(pmd->tm);
0785 dm_block_manager_destroy(pmd->bm);
0786 }
0787
0788 static int __begin_transaction(struct dm_pool_metadata *pmd)
0789 {
0790 int r;
0791 struct thin_disk_superblock *disk_super;
0792 struct dm_block *sblock;
0793
0794
0795
0796
0797
0798 r = dm_bm_read_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION,
0799 &sb_validator, &sblock);
0800 if (r)
0801 return r;
0802
0803 disk_super = dm_block_data(sblock);
0804 pmd->time = le32_to_cpu(disk_super->time);
0805 pmd->root = le64_to_cpu(disk_super->data_mapping_root);
0806 pmd->details_root = le64_to_cpu(disk_super->device_details_root);
0807 pmd->trans_id = le64_to_cpu(disk_super->trans_id);
0808 pmd->flags = le32_to_cpu(disk_super->flags);
0809 pmd->data_block_size = le32_to_cpu(disk_super->data_block_size);
0810
0811 dm_bm_unlock(sblock);
0812 return 0;
0813 }
0814
0815 static int __write_changed_details(struct dm_pool_metadata *pmd)
0816 {
0817 int r;
0818 struct dm_thin_device *td, *tmp;
0819 struct disk_device_details details;
0820 uint64_t key;
0821
0822 list_for_each_entry_safe(td, tmp, &pmd->thin_devices, list) {
0823 if (!td->changed)
0824 continue;
0825
0826 key = td->id;
0827
0828 details.mapped_blocks = cpu_to_le64(td->mapped_blocks);
0829 details.transaction_id = cpu_to_le64(td->transaction_id);
0830 details.creation_time = cpu_to_le32(td->creation_time);
0831 details.snapshotted_time = cpu_to_le32(td->snapshotted_time);
0832 __dm_bless_for_disk(&details);
0833
0834 r = dm_btree_insert(&pmd->details_info, pmd->details_root,
0835 &key, &details, &pmd->details_root);
0836 if (r)
0837 return r;
0838
0839 if (td->open_count)
0840 td->changed = false;
0841 else {
0842 list_del(&td->list);
0843 kfree(td);
0844 }
0845 }
0846
0847 return 0;
0848 }
0849
0850 static int __commit_transaction(struct dm_pool_metadata *pmd)
0851 {
0852 int r;
0853 struct thin_disk_superblock *disk_super;
0854 struct dm_block *sblock;
0855
0856
0857
0858
0859 BUILD_BUG_ON(sizeof(struct thin_disk_superblock) > 512);
0860 BUG_ON(!rwsem_is_locked(&pmd->root_lock));
0861
0862 if (unlikely(!pmd->in_service))
0863 return 0;
0864
0865 if (pmd->pre_commit_fn) {
0866 r = pmd->pre_commit_fn(pmd->pre_commit_context);
0867 if (r < 0) {
0868 DMERR("pre-commit callback failed");
0869 return r;
0870 }
0871 }
0872
0873 r = __write_changed_details(pmd);
0874 if (r < 0)
0875 return r;
0876
0877 r = dm_sm_commit(pmd->data_sm);
0878 if (r < 0)
0879 return r;
0880
0881 r = dm_tm_pre_commit(pmd->tm);
0882 if (r < 0)
0883 return r;
0884
0885 r = save_sm_roots(pmd);
0886 if (r < 0)
0887 return r;
0888
0889 r = superblock_lock(pmd, &sblock);
0890 if (r)
0891 return r;
0892
0893 disk_super = dm_block_data(sblock);
0894 disk_super->time = cpu_to_le32(pmd->time);
0895 disk_super->data_mapping_root = cpu_to_le64(pmd->root);
0896 disk_super->device_details_root = cpu_to_le64(pmd->details_root);
0897 disk_super->trans_id = cpu_to_le64(pmd->trans_id);
0898 disk_super->flags = cpu_to_le32(pmd->flags);
0899
0900 copy_sm_roots(pmd, disk_super);
0901
0902 return dm_tm_commit(pmd->tm, sblock);
0903 }
0904
0905 static void __set_metadata_reserve(struct dm_pool_metadata *pmd)
0906 {
0907 int r;
0908 dm_block_t total;
0909 dm_block_t max_blocks = 4096;
0910
0911 r = dm_sm_get_nr_blocks(pmd->metadata_sm, &total);
0912 if (r) {
0913 DMERR("could not get size of metadata device");
0914 pmd->metadata_reserve = max_blocks;
0915 } else
0916 pmd->metadata_reserve = min(max_blocks, div_u64(total, 10));
0917 }
0918
0919 struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev,
0920 sector_t data_block_size,
0921 bool format_device)
0922 {
0923 int r;
0924 struct dm_pool_metadata *pmd;
0925
0926 pmd = kmalloc(sizeof(*pmd), GFP_KERNEL);
0927 if (!pmd) {
0928 DMERR("could not allocate metadata struct");
0929 return ERR_PTR(-ENOMEM);
0930 }
0931
0932 init_rwsem(&pmd->root_lock);
0933 pmd->time = 0;
0934 INIT_LIST_HEAD(&pmd->thin_devices);
0935 pmd->fail_io = false;
0936 pmd->in_service = false;
0937 pmd->bdev = bdev;
0938 pmd->data_block_size = data_block_size;
0939 pmd->pre_commit_fn = NULL;
0940 pmd->pre_commit_context = NULL;
0941
0942 r = __create_persistent_data_objects(pmd, format_device);
0943 if (r) {
0944 kfree(pmd);
0945 return ERR_PTR(r);
0946 }
0947
0948 r = __begin_transaction(pmd);
0949 if (r < 0) {
0950 if (dm_pool_metadata_close(pmd) < 0)
0951 DMWARN("%s: dm_pool_metadata_close() failed.", __func__);
0952 return ERR_PTR(r);
0953 }
0954
0955 __set_metadata_reserve(pmd);
0956
0957 return pmd;
0958 }
0959
0960 int dm_pool_metadata_close(struct dm_pool_metadata *pmd)
0961 {
0962 int r;
0963 unsigned open_devices = 0;
0964 struct dm_thin_device *td, *tmp;
0965
0966 down_read(&pmd->root_lock);
0967 list_for_each_entry_safe(td, tmp, &pmd->thin_devices, list) {
0968 if (td->open_count)
0969 open_devices++;
0970 else {
0971 list_del(&td->list);
0972 kfree(td);
0973 }
0974 }
0975 up_read(&pmd->root_lock);
0976
0977 if (open_devices) {
0978 DMERR("attempt to close pmd when %u device(s) are still open",
0979 open_devices);
0980 return -EBUSY;
0981 }
0982
0983 pmd_write_lock_in_core(pmd);
0984 if (!pmd->fail_io && !dm_bm_is_read_only(pmd->bm)) {
0985 r = __commit_transaction(pmd);
0986 if (r < 0)
0987 DMWARN("%s: __commit_transaction() failed, error = %d",
0988 __func__, r);
0989 }
0990 pmd_write_unlock(pmd);
0991 if (!pmd->fail_io)
0992 __destroy_persistent_data_objects(pmd);
0993
0994 kfree(pmd);
0995 return 0;
0996 }
0997
0998
0999
1000
1001
1002
1003 static int __open_device(struct dm_pool_metadata *pmd,
1004 dm_thin_id dev, int create,
1005 struct dm_thin_device **td)
1006 {
1007 int r, changed = 0;
1008 struct dm_thin_device *td2;
1009 uint64_t key = dev;
1010 struct disk_device_details details_le;
1011
1012
1013
1014
1015 list_for_each_entry(td2, &pmd->thin_devices, list)
1016 if (td2->id == dev) {
1017
1018
1019
1020 if (create)
1021 return -EEXIST;
1022
1023 td2->open_count++;
1024 *td = td2;
1025 return 0;
1026 }
1027
1028
1029
1030
1031 r = dm_btree_lookup(&pmd->details_info, pmd->details_root,
1032 &key, &details_le);
1033 if (r) {
1034 if (r != -ENODATA || !create)
1035 return r;
1036
1037
1038
1039
1040 changed = 1;
1041 details_le.mapped_blocks = 0;
1042 details_le.transaction_id = cpu_to_le64(pmd->trans_id);
1043 details_le.creation_time = cpu_to_le32(pmd->time);
1044 details_le.snapshotted_time = cpu_to_le32(pmd->time);
1045 }
1046
1047 *td = kmalloc(sizeof(**td), GFP_NOIO);
1048 if (!*td)
1049 return -ENOMEM;
1050
1051 (*td)->pmd = pmd;
1052 (*td)->id = dev;
1053 (*td)->open_count = 1;
1054 (*td)->changed = changed;
1055 (*td)->aborted_with_changes = false;
1056 (*td)->mapped_blocks = le64_to_cpu(details_le.mapped_blocks);
1057 (*td)->transaction_id = le64_to_cpu(details_le.transaction_id);
1058 (*td)->creation_time = le32_to_cpu(details_le.creation_time);
1059 (*td)->snapshotted_time = le32_to_cpu(details_le.snapshotted_time);
1060
1061 list_add(&(*td)->list, &pmd->thin_devices);
1062
1063 return 0;
1064 }
1065
1066 static void __close_device(struct dm_thin_device *td)
1067 {
1068 --td->open_count;
1069 }
1070
1071 static int __create_thin(struct dm_pool_metadata *pmd,
1072 dm_thin_id dev)
1073 {
1074 int r;
1075 dm_block_t dev_root;
1076 uint64_t key = dev;
1077 struct dm_thin_device *td;
1078 __le64 value;
1079
1080 r = dm_btree_lookup(&pmd->details_info, pmd->details_root,
1081 &key, NULL);
1082 if (!r)
1083 return -EEXIST;
1084
1085
1086
1087
1088 r = dm_btree_empty(&pmd->bl_info, &dev_root);
1089 if (r)
1090 return r;
1091
1092
1093
1094
1095 value = cpu_to_le64(dev_root);
1096 __dm_bless_for_disk(&value);
1097 r = dm_btree_insert(&pmd->tl_info, pmd->root, &key, &value, &pmd->root);
1098 if (r) {
1099 dm_btree_del(&pmd->bl_info, dev_root);
1100 return r;
1101 }
1102
1103 r = __open_device(pmd, dev, 1, &td);
1104 if (r) {
1105 dm_btree_remove(&pmd->tl_info, pmd->root, &key, &pmd->root);
1106 dm_btree_del(&pmd->bl_info, dev_root);
1107 return r;
1108 }
1109 __close_device(td);
1110
1111 return r;
1112 }
1113
1114 int dm_pool_create_thin(struct dm_pool_metadata *pmd, dm_thin_id dev)
1115 {
1116 int r = -EINVAL;
1117
1118 pmd_write_lock(pmd);
1119 if (!pmd->fail_io)
1120 r = __create_thin(pmd, dev);
1121 pmd_write_unlock(pmd);
1122
1123 return r;
1124 }
1125
1126 static int __set_snapshot_details(struct dm_pool_metadata *pmd,
1127 struct dm_thin_device *snap,
1128 dm_thin_id origin, uint32_t time)
1129 {
1130 int r;
1131 struct dm_thin_device *td;
1132
1133 r = __open_device(pmd, origin, 0, &td);
1134 if (r)
1135 return r;
1136
1137 td->changed = true;
1138 td->snapshotted_time = time;
1139
1140 snap->mapped_blocks = td->mapped_blocks;
1141 snap->snapshotted_time = time;
1142 __close_device(td);
1143
1144 return 0;
1145 }
1146
1147 static int __create_snap(struct dm_pool_metadata *pmd,
1148 dm_thin_id dev, dm_thin_id origin)
1149 {
1150 int r;
1151 dm_block_t origin_root;
1152 uint64_t key = origin, dev_key = dev;
1153 struct dm_thin_device *td;
1154 __le64 value;
1155
1156
1157 r = dm_btree_lookup(&pmd->details_info, pmd->details_root,
1158 &dev_key, NULL);
1159 if (!r)
1160 return -EEXIST;
1161
1162
1163 r = dm_btree_lookup(&pmd->tl_info, pmd->root, &key, &value);
1164 if (r)
1165 return r;
1166 origin_root = le64_to_cpu(value);
1167
1168
1169 dm_tm_inc(pmd->tm, origin_root);
1170
1171
1172 value = cpu_to_le64(origin_root);
1173 __dm_bless_for_disk(&value);
1174 key = dev;
1175 r = dm_btree_insert(&pmd->tl_info, pmd->root, &key, &value, &pmd->root);
1176 if (r) {
1177 dm_tm_dec(pmd->tm, origin_root);
1178 return r;
1179 }
1180
1181 pmd->time++;
1182
1183 r = __open_device(pmd, dev, 1, &td);
1184 if (r)
1185 goto bad;
1186
1187 r = __set_snapshot_details(pmd, td, origin, pmd->time);
1188 __close_device(td);
1189
1190 if (r)
1191 goto bad;
1192
1193 return 0;
1194
1195 bad:
1196 dm_btree_remove(&pmd->tl_info, pmd->root, &key, &pmd->root);
1197 dm_btree_remove(&pmd->details_info, pmd->details_root,
1198 &key, &pmd->details_root);
1199 return r;
1200 }
1201
1202 int dm_pool_create_snap(struct dm_pool_metadata *pmd,
1203 dm_thin_id dev,
1204 dm_thin_id origin)
1205 {
1206 int r = -EINVAL;
1207
1208 pmd_write_lock(pmd);
1209 if (!pmd->fail_io)
1210 r = __create_snap(pmd, dev, origin);
1211 pmd_write_unlock(pmd);
1212
1213 return r;
1214 }
1215
1216 static int __delete_device(struct dm_pool_metadata *pmd, dm_thin_id dev)
1217 {
1218 int r;
1219 uint64_t key = dev;
1220 struct dm_thin_device *td;
1221
1222
1223 r = __open_device(pmd, dev, 0, &td);
1224 if (r)
1225 return r;
1226
1227 if (td->open_count > 1) {
1228 __close_device(td);
1229 return -EBUSY;
1230 }
1231
1232 list_del(&td->list);
1233 kfree(td);
1234 r = dm_btree_remove(&pmd->details_info, pmd->details_root,
1235 &key, &pmd->details_root);
1236 if (r)
1237 return r;
1238
1239 r = dm_btree_remove(&pmd->tl_info, pmd->root, &key, &pmd->root);
1240 if (r)
1241 return r;
1242
1243 return 0;
1244 }
1245
1246 int dm_pool_delete_thin_device(struct dm_pool_metadata *pmd,
1247 dm_thin_id dev)
1248 {
1249 int r = -EINVAL;
1250
1251 pmd_write_lock(pmd);
1252 if (!pmd->fail_io)
1253 r = __delete_device(pmd, dev);
1254 pmd_write_unlock(pmd);
1255
1256 return r;
1257 }
1258
1259 int dm_pool_set_metadata_transaction_id(struct dm_pool_metadata *pmd,
1260 uint64_t current_id,
1261 uint64_t new_id)
1262 {
1263 int r = -EINVAL;
1264
1265 pmd_write_lock(pmd);
1266
1267 if (pmd->fail_io)
1268 goto out;
1269
1270 if (pmd->trans_id != current_id) {
1271 DMERR("mismatched transaction id");
1272 goto out;
1273 }
1274
1275 pmd->trans_id = new_id;
1276 r = 0;
1277
1278 out:
1279 pmd_write_unlock(pmd);
1280
1281 return r;
1282 }
1283
1284 int dm_pool_get_metadata_transaction_id(struct dm_pool_metadata *pmd,
1285 uint64_t *result)
1286 {
1287 int r = -EINVAL;
1288
1289 down_read(&pmd->root_lock);
1290 if (!pmd->fail_io) {
1291 *result = pmd->trans_id;
1292 r = 0;
1293 }
1294 up_read(&pmd->root_lock);
1295
1296 return r;
1297 }
1298
1299 static int __reserve_metadata_snap(struct dm_pool_metadata *pmd)
1300 {
1301 int r, inc;
1302 struct thin_disk_superblock *disk_super;
1303 struct dm_block *copy, *sblock;
1304 dm_block_t held_root;
1305
1306
1307
1308
1309
1310 r = __commit_transaction(pmd);
1311 if (r < 0) {
1312 DMWARN("%s: __commit_transaction() failed, error = %d",
1313 __func__, r);
1314 return r;
1315 }
1316
1317
1318
1319
1320 dm_sm_inc_block(pmd->metadata_sm, THIN_SUPERBLOCK_LOCATION);
1321 r = dm_tm_shadow_block(pmd->tm, THIN_SUPERBLOCK_LOCATION,
1322 &sb_validator, ©, &inc);
1323 if (r)
1324 return r;
1325
1326 BUG_ON(!inc);
1327
1328 held_root = dm_block_location(copy);
1329 disk_super = dm_block_data(copy);
1330
1331 if (le64_to_cpu(disk_super->held_root)) {
1332 DMWARN("Pool metadata snapshot already exists: release this before taking another.");
1333
1334 dm_tm_dec(pmd->tm, held_root);
1335 dm_tm_unlock(pmd->tm, copy);
1336 return -EBUSY;
1337 }
1338
1339
1340
1341
1342 memset(&disk_super->data_space_map_root, 0,
1343 sizeof(disk_super->data_space_map_root));
1344 memset(&disk_super->metadata_space_map_root, 0,
1345 sizeof(disk_super->metadata_space_map_root));
1346
1347
1348
1349
1350 dm_tm_inc(pmd->tm, le64_to_cpu(disk_super->data_mapping_root));
1351 dm_tm_inc(pmd->tm, le64_to_cpu(disk_super->device_details_root));
1352 dm_tm_unlock(pmd->tm, copy);
1353
1354
1355
1356
1357 r = superblock_lock(pmd, &sblock);
1358 if (r) {
1359 dm_tm_dec(pmd->tm, held_root);
1360 return r;
1361 }
1362
1363 disk_super = dm_block_data(sblock);
1364 disk_super->held_root = cpu_to_le64(held_root);
1365 dm_bm_unlock(sblock);
1366 return 0;
1367 }
1368
1369 int dm_pool_reserve_metadata_snap(struct dm_pool_metadata *pmd)
1370 {
1371 int r = -EINVAL;
1372
1373 pmd_write_lock(pmd);
1374 if (!pmd->fail_io)
1375 r = __reserve_metadata_snap(pmd);
1376 pmd_write_unlock(pmd);
1377
1378 return r;
1379 }
1380
1381 static int __release_metadata_snap(struct dm_pool_metadata *pmd)
1382 {
1383 int r;
1384 struct thin_disk_superblock *disk_super;
1385 struct dm_block *sblock, *copy;
1386 dm_block_t held_root;
1387
1388 r = superblock_lock(pmd, &sblock);
1389 if (r)
1390 return r;
1391
1392 disk_super = dm_block_data(sblock);
1393 held_root = le64_to_cpu(disk_super->held_root);
1394 disk_super->held_root = cpu_to_le64(0);
1395
1396 dm_bm_unlock(sblock);
1397
1398 if (!held_root) {
1399 DMWARN("No pool metadata snapshot found: nothing to release.");
1400 return -EINVAL;
1401 }
1402
1403 r = dm_tm_read_lock(pmd->tm, held_root, &sb_validator, ©);
1404 if (r)
1405 return r;
1406
1407 disk_super = dm_block_data(copy);
1408 dm_btree_del(&pmd->info, le64_to_cpu(disk_super->data_mapping_root));
1409 dm_btree_del(&pmd->details_info, le64_to_cpu(disk_super->device_details_root));
1410 dm_sm_dec_block(pmd->metadata_sm, held_root);
1411
1412 dm_tm_unlock(pmd->tm, copy);
1413
1414 return 0;
1415 }
1416
1417 int dm_pool_release_metadata_snap(struct dm_pool_metadata *pmd)
1418 {
1419 int r = -EINVAL;
1420
1421 pmd_write_lock(pmd);
1422 if (!pmd->fail_io)
1423 r = __release_metadata_snap(pmd);
1424 pmd_write_unlock(pmd);
1425
1426 return r;
1427 }
1428
1429 static int __get_metadata_snap(struct dm_pool_metadata *pmd,
1430 dm_block_t *result)
1431 {
1432 int r;
1433 struct thin_disk_superblock *disk_super;
1434 struct dm_block *sblock;
1435
1436 r = dm_bm_read_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION,
1437 &sb_validator, &sblock);
1438 if (r)
1439 return r;
1440
1441 disk_super = dm_block_data(sblock);
1442 *result = le64_to_cpu(disk_super->held_root);
1443
1444 dm_bm_unlock(sblock);
1445
1446 return 0;
1447 }
1448
1449 int dm_pool_get_metadata_snap(struct dm_pool_metadata *pmd,
1450 dm_block_t *result)
1451 {
1452 int r = -EINVAL;
1453
1454 down_read(&pmd->root_lock);
1455 if (!pmd->fail_io)
1456 r = __get_metadata_snap(pmd, result);
1457 up_read(&pmd->root_lock);
1458
1459 return r;
1460 }
1461
1462 int dm_pool_open_thin_device(struct dm_pool_metadata *pmd, dm_thin_id dev,
1463 struct dm_thin_device **td)
1464 {
1465 int r = -EINVAL;
1466
1467 pmd_write_lock_in_core(pmd);
1468 if (!pmd->fail_io)
1469 r = __open_device(pmd, dev, 0, td);
1470 pmd_write_unlock(pmd);
1471
1472 return r;
1473 }
1474
1475 int dm_pool_close_thin_device(struct dm_thin_device *td)
1476 {
1477 pmd_write_lock_in_core(td->pmd);
1478 __close_device(td);
1479 pmd_write_unlock(td->pmd);
1480
1481 return 0;
1482 }
1483
1484 dm_thin_id dm_thin_dev_id(struct dm_thin_device *td)
1485 {
1486 return td->id;
1487 }
1488
1489
1490
1491
1492
1493
1494
1495 static bool __snapshotted_since(struct dm_thin_device *td, uint32_t time)
1496 {
1497 return td->snapshotted_time > time;
1498 }
1499
1500 static void unpack_lookup_result(struct dm_thin_device *td, __le64 value,
1501 struct dm_thin_lookup_result *result)
1502 {
1503 uint64_t block_time = 0;
1504 dm_block_t exception_block;
1505 uint32_t exception_time;
1506
1507 block_time = le64_to_cpu(value);
1508 unpack_block_time(block_time, &exception_block, &exception_time);
1509 result->block = exception_block;
1510 result->shared = __snapshotted_since(td, exception_time);
1511 }
1512
1513 static int __find_block(struct dm_thin_device *td, dm_block_t block,
1514 int can_issue_io, struct dm_thin_lookup_result *result)
1515 {
1516 int r;
1517 __le64 value;
1518 struct dm_pool_metadata *pmd = td->pmd;
1519 dm_block_t keys[2] = { td->id, block };
1520 struct dm_btree_info *info;
1521
1522 if (can_issue_io) {
1523 info = &pmd->info;
1524 } else
1525 info = &pmd->nb_info;
1526
1527 r = dm_btree_lookup(info, pmd->root, keys, &value);
1528 if (!r)
1529 unpack_lookup_result(td, value, result);
1530
1531 return r;
1532 }
1533
1534 int dm_thin_find_block(struct dm_thin_device *td, dm_block_t block,
1535 int can_issue_io, struct dm_thin_lookup_result *result)
1536 {
1537 int r;
1538 struct dm_pool_metadata *pmd = td->pmd;
1539
1540 down_read(&pmd->root_lock);
1541 if (pmd->fail_io) {
1542 up_read(&pmd->root_lock);
1543 return -EINVAL;
1544 }
1545
1546 r = __find_block(td, block, can_issue_io, result);
1547
1548 up_read(&pmd->root_lock);
1549 return r;
1550 }
1551
1552 static int __find_next_mapped_block(struct dm_thin_device *td, dm_block_t block,
1553 dm_block_t *vblock,
1554 struct dm_thin_lookup_result *result)
1555 {
1556 int r;
1557 __le64 value;
1558 struct dm_pool_metadata *pmd = td->pmd;
1559 dm_block_t keys[2] = { td->id, block };
1560
1561 r = dm_btree_lookup_next(&pmd->info, pmd->root, keys, vblock, &value);
1562 if (!r)
1563 unpack_lookup_result(td, value, result);
1564
1565 return r;
1566 }
1567
1568 static int __find_mapped_range(struct dm_thin_device *td,
1569 dm_block_t begin, dm_block_t end,
1570 dm_block_t *thin_begin, dm_block_t *thin_end,
1571 dm_block_t *pool_begin, bool *maybe_shared)
1572 {
1573 int r;
1574 dm_block_t pool_end;
1575 struct dm_thin_lookup_result lookup;
1576
1577 if (end < begin)
1578 return -ENODATA;
1579
1580 r = __find_next_mapped_block(td, begin, &begin, &lookup);
1581 if (r)
1582 return r;
1583
1584 if (begin >= end)
1585 return -ENODATA;
1586
1587 *thin_begin = begin;
1588 *pool_begin = lookup.block;
1589 *maybe_shared = lookup.shared;
1590
1591 begin++;
1592 pool_end = *pool_begin + 1;
1593 while (begin != end) {
1594 r = __find_block(td, begin, true, &lookup);
1595 if (r) {
1596 if (r == -ENODATA)
1597 break;
1598 else
1599 return r;
1600 }
1601
1602 if ((lookup.block != pool_end) ||
1603 (lookup.shared != *maybe_shared))
1604 break;
1605
1606 pool_end++;
1607 begin++;
1608 }
1609
1610 *thin_end = begin;
1611 return 0;
1612 }
1613
1614 int dm_thin_find_mapped_range(struct dm_thin_device *td,
1615 dm_block_t begin, dm_block_t end,
1616 dm_block_t *thin_begin, dm_block_t *thin_end,
1617 dm_block_t *pool_begin, bool *maybe_shared)
1618 {
1619 int r = -EINVAL;
1620 struct dm_pool_metadata *pmd = td->pmd;
1621
1622 down_read(&pmd->root_lock);
1623 if (!pmd->fail_io) {
1624 r = __find_mapped_range(td, begin, end, thin_begin, thin_end,
1625 pool_begin, maybe_shared);
1626 }
1627 up_read(&pmd->root_lock);
1628
1629 return r;
1630 }
1631
1632 static int __insert(struct dm_thin_device *td, dm_block_t block,
1633 dm_block_t data_block)
1634 {
1635 int r, inserted;
1636 __le64 value;
1637 struct dm_pool_metadata *pmd = td->pmd;
1638 dm_block_t keys[2] = { td->id, block };
1639
1640 value = cpu_to_le64(pack_block_time(data_block, pmd->time));
1641 __dm_bless_for_disk(&value);
1642
1643 r = dm_btree_insert_notify(&pmd->info, pmd->root, keys, &value,
1644 &pmd->root, &inserted);
1645 if (r)
1646 return r;
1647
1648 td->changed = true;
1649 if (inserted)
1650 td->mapped_blocks++;
1651
1652 return 0;
1653 }
1654
1655 int dm_thin_insert_block(struct dm_thin_device *td, dm_block_t block,
1656 dm_block_t data_block)
1657 {
1658 int r = -EINVAL;
1659
1660 pmd_write_lock(td->pmd);
1661 if (!td->pmd->fail_io)
1662 r = __insert(td, block, data_block);
1663 pmd_write_unlock(td->pmd);
1664
1665 return r;
1666 }
1667
1668 static int __remove_range(struct dm_thin_device *td, dm_block_t begin, dm_block_t end)
1669 {
1670 int r;
1671 unsigned count, total_count = 0;
1672 struct dm_pool_metadata *pmd = td->pmd;
1673 dm_block_t keys[1] = { td->id };
1674 __le64 value;
1675 dm_block_t mapping_root;
1676
1677
1678
1679
1680 r = dm_btree_lookup(&pmd->tl_info, pmd->root, keys, &value);
1681 if (r)
1682 return r;
1683
1684
1685
1686
1687
1688 mapping_root = le64_to_cpu(value);
1689 dm_tm_inc(pmd->tm, mapping_root);
1690 r = dm_btree_remove(&pmd->tl_info, pmd->root, keys, &pmd->root);
1691 if (r)
1692 return r;
1693
1694
1695
1696
1697
1698 while (begin < end) {
1699 r = dm_btree_lookup_next(&pmd->bl_info, mapping_root, &begin, &begin, &value);
1700 if (r == -ENODATA)
1701 break;
1702
1703 if (r)
1704 return r;
1705
1706 if (begin >= end)
1707 break;
1708
1709 r = dm_btree_remove_leaves(&pmd->bl_info, mapping_root, &begin, end, &mapping_root, &count);
1710 if (r)
1711 return r;
1712
1713 total_count += count;
1714 }
1715
1716 td->mapped_blocks -= total_count;
1717 td->changed = true;
1718
1719
1720
1721
1722 value = cpu_to_le64(mapping_root);
1723 __dm_bless_for_disk(&value);
1724 return dm_btree_insert(&pmd->tl_info, pmd->root, keys, &value, &pmd->root);
1725 }
1726
1727 int dm_thin_remove_range(struct dm_thin_device *td,
1728 dm_block_t begin, dm_block_t end)
1729 {
1730 int r = -EINVAL;
1731
1732 pmd_write_lock(td->pmd);
1733 if (!td->pmd->fail_io)
1734 r = __remove_range(td, begin, end);
1735 pmd_write_unlock(td->pmd);
1736
1737 return r;
1738 }
1739
1740 int dm_pool_block_is_shared(struct dm_pool_metadata *pmd, dm_block_t b, bool *result)
1741 {
1742 int r;
1743 uint32_t ref_count;
1744
1745 down_read(&pmd->root_lock);
1746 r = dm_sm_get_count(pmd->data_sm, b, &ref_count);
1747 if (!r)
1748 *result = (ref_count > 1);
1749 up_read(&pmd->root_lock);
1750
1751 return r;
1752 }
1753
1754 int dm_pool_inc_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_t e)
1755 {
1756 int r = 0;
1757
1758 pmd_write_lock(pmd);
1759 r = dm_sm_inc_blocks(pmd->data_sm, b, e);
1760 pmd_write_unlock(pmd);
1761
1762 return r;
1763 }
1764
1765 int dm_pool_dec_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_t e)
1766 {
1767 int r = 0;
1768
1769 pmd_write_lock(pmd);
1770 r = dm_sm_dec_blocks(pmd->data_sm, b, e);
1771 pmd_write_unlock(pmd);
1772
1773 return r;
1774 }
1775
1776 bool dm_thin_changed_this_transaction(struct dm_thin_device *td)
1777 {
1778 int r;
1779
1780 down_read(&td->pmd->root_lock);
1781 r = td->changed;
1782 up_read(&td->pmd->root_lock);
1783
1784 return r;
1785 }
1786
1787 bool dm_pool_changed_this_transaction(struct dm_pool_metadata *pmd)
1788 {
1789 bool r = false;
1790 struct dm_thin_device *td, *tmp;
1791
1792 down_read(&pmd->root_lock);
1793 list_for_each_entry_safe(td, tmp, &pmd->thin_devices, list) {
1794 if (td->changed) {
1795 r = td->changed;
1796 break;
1797 }
1798 }
1799 up_read(&pmd->root_lock);
1800
1801 return r;
1802 }
1803
1804 bool dm_thin_aborted_changes(struct dm_thin_device *td)
1805 {
1806 bool r;
1807
1808 down_read(&td->pmd->root_lock);
1809 r = td->aborted_with_changes;
1810 up_read(&td->pmd->root_lock);
1811
1812 return r;
1813 }
1814
1815 int dm_pool_alloc_data_block(struct dm_pool_metadata *pmd, dm_block_t *result)
1816 {
1817 int r = -EINVAL;
1818
1819 pmd_write_lock(pmd);
1820 if (!pmd->fail_io)
1821 r = dm_sm_new_block(pmd->data_sm, result);
1822 pmd_write_unlock(pmd);
1823
1824 return r;
1825 }
1826
1827 int dm_pool_commit_metadata(struct dm_pool_metadata *pmd)
1828 {
1829 int r = -EINVAL;
1830
1831
1832
1833
1834
1835 pmd_write_lock_in_core(pmd);
1836 if (pmd->fail_io)
1837 goto out;
1838
1839 r = __commit_transaction(pmd);
1840 if (r < 0)
1841 goto out;
1842
1843
1844
1845
1846 r = __begin_transaction(pmd);
1847 out:
1848 pmd_write_unlock(pmd);
1849 return r;
1850 }
1851
1852 static void __set_abort_with_changes_flags(struct dm_pool_metadata *pmd)
1853 {
1854 struct dm_thin_device *td;
1855
1856 list_for_each_entry(td, &pmd->thin_devices, list)
1857 td->aborted_with_changes = td->changed;
1858 }
1859
1860 int dm_pool_abort_metadata(struct dm_pool_metadata *pmd)
1861 {
1862 int r = -EINVAL;
1863
1864 pmd_write_lock(pmd);
1865 if (pmd->fail_io)
1866 goto out;
1867
1868 __set_abort_with_changes_flags(pmd);
1869 __destroy_persistent_data_objects(pmd);
1870 r = __create_persistent_data_objects(pmd, false);
1871 if (r)
1872 pmd->fail_io = true;
1873
1874 out:
1875 pmd_write_unlock(pmd);
1876
1877 return r;
1878 }
1879
1880 int dm_pool_get_free_block_count(struct dm_pool_metadata *pmd, dm_block_t *result)
1881 {
1882 int r = -EINVAL;
1883
1884 down_read(&pmd->root_lock);
1885 if (!pmd->fail_io)
1886 r = dm_sm_get_nr_free(pmd->data_sm, result);
1887 up_read(&pmd->root_lock);
1888
1889 return r;
1890 }
1891
1892 int dm_pool_get_free_metadata_block_count(struct dm_pool_metadata *pmd,
1893 dm_block_t *result)
1894 {
1895 int r = -EINVAL;
1896
1897 down_read(&pmd->root_lock);
1898 if (!pmd->fail_io)
1899 r = dm_sm_get_nr_free(pmd->metadata_sm, result);
1900
1901 if (!r) {
1902 if (*result < pmd->metadata_reserve)
1903 *result = 0;
1904 else
1905 *result -= pmd->metadata_reserve;
1906 }
1907 up_read(&pmd->root_lock);
1908
1909 return r;
1910 }
1911
1912 int dm_pool_get_metadata_dev_size(struct dm_pool_metadata *pmd,
1913 dm_block_t *result)
1914 {
1915 int r = -EINVAL;
1916
1917 down_read(&pmd->root_lock);
1918 if (!pmd->fail_io)
1919 r = dm_sm_get_nr_blocks(pmd->metadata_sm, result);
1920 up_read(&pmd->root_lock);
1921
1922 return r;
1923 }
1924
1925 int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result)
1926 {
1927 int r = -EINVAL;
1928
1929 down_read(&pmd->root_lock);
1930 if (!pmd->fail_io)
1931 r = dm_sm_get_nr_blocks(pmd->data_sm, result);
1932 up_read(&pmd->root_lock);
1933
1934 return r;
1935 }
1936
1937 int dm_thin_get_mapped_count(struct dm_thin_device *td, dm_block_t *result)
1938 {
1939 int r = -EINVAL;
1940 struct dm_pool_metadata *pmd = td->pmd;
1941
1942 down_read(&pmd->root_lock);
1943 if (!pmd->fail_io) {
1944 *result = td->mapped_blocks;
1945 r = 0;
1946 }
1947 up_read(&pmd->root_lock);
1948
1949 return r;
1950 }
1951
1952 static int __highest_block(struct dm_thin_device *td, dm_block_t *result)
1953 {
1954 int r;
1955 __le64 value_le;
1956 dm_block_t thin_root;
1957 struct dm_pool_metadata *pmd = td->pmd;
1958
1959 r = dm_btree_lookup(&pmd->tl_info, pmd->root, &td->id, &value_le);
1960 if (r)
1961 return r;
1962
1963 thin_root = le64_to_cpu(value_le);
1964
1965 return dm_btree_find_highest_key(&pmd->bl_info, thin_root, result);
1966 }
1967
1968 int dm_thin_get_highest_mapped_block(struct dm_thin_device *td,
1969 dm_block_t *result)
1970 {
1971 int r = -EINVAL;
1972 struct dm_pool_metadata *pmd = td->pmd;
1973
1974 down_read(&pmd->root_lock);
1975 if (!pmd->fail_io)
1976 r = __highest_block(td, result);
1977 up_read(&pmd->root_lock);
1978
1979 return r;
1980 }
1981
1982 static int __resize_space_map(struct dm_space_map *sm, dm_block_t new_count)
1983 {
1984 int r;
1985 dm_block_t old_count;
1986
1987 r = dm_sm_get_nr_blocks(sm, &old_count);
1988 if (r)
1989 return r;
1990
1991 if (new_count == old_count)
1992 return 0;
1993
1994 if (new_count < old_count) {
1995 DMERR("cannot reduce size of space map");
1996 return -EINVAL;
1997 }
1998
1999 return dm_sm_extend(sm, new_count - old_count);
2000 }
2001
2002 int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count)
2003 {
2004 int r = -EINVAL;
2005
2006 pmd_write_lock(pmd);
2007 if (!pmd->fail_io)
2008 r = __resize_space_map(pmd->data_sm, new_count);
2009 pmd_write_unlock(pmd);
2010
2011 return r;
2012 }
2013
2014 int dm_pool_resize_metadata_dev(struct dm_pool_metadata *pmd, dm_block_t new_count)
2015 {
2016 int r = -EINVAL;
2017
2018 pmd_write_lock(pmd);
2019 if (!pmd->fail_io) {
2020 r = __resize_space_map(pmd->metadata_sm, new_count);
2021 if (!r)
2022 __set_metadata_reserve(pmd);
2023 }
2024 pmd_write_unlock(pmd);
2025
2026 return r;
2027 }
2028
2029 void dm_pool_metadata_read_only(struct dm_pool_metadata *pmd)
2030 {
2031 pmd_write_lock_in_core(pmd);
2032 dm_bm_set_read_only(pmd->bm);
2033 pmd_write_unlock(pmd);
2034 }
2035
2036 void dm_pool_metadata_read_write(struct dm_pool_metadata *pmd)
2037 {
2038 pmd_write_lock_in_core(pmd);
2039 dm_bm_set_read_write(pmd->bm);
2040 pmd_write_unlock(pmd);
2041 }
2042
2043 int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd,
2044 dm_block_t threshold,
2045 dm_sm_threshold_fn fn,
2046 void *context)
2047 {
2048 int r = -EINVAL;
2049
2050 pmd_write_lock_in_core(pmd);
2051 if (!pmd->fail_io) {
2052 r = dm_sm_register_threshold_callback(pmd->metadata_sm,
2053 threshold, fn, context);
2054 }
2055 pmd_write_unlock(pmd);
2056
2057 return r;
2058 }
2059
2060 void dm_pool_register_pre_commit_callback(struct dm_pool_metadata *pmd,
2061 dm_pool_pre_commit_fn fn,
2062 void *context)
2063 {
2064 pmd_write_lock_in_core(pmd);
2065 pmd->pre_commit_fn = fn;
2066 pmd->pre_commit_context = context;
2067 pmd_write_unlock(pmd);
2068 }
2069
2070 int dm_pool_metadata_set_needs_check(struct dm_pool_metadata *pmd)
2071 {
2072 int r = -EINVAL;
2073 struct dm_block *sblock;
2074 struct thin_disk_superblock *disk_super;
2075
2076 pmd_write_lock(pmd);
2077 if (pmd->fail_io)
2078 goto out;
2079
2080 pmd->flags |= THIN_METADATA_NEEDS_CHECK_FLAG;
2081
2082 r = superblock_lock(pmd, &sblock);
2083 if (r) {
2084 DMERR("couldn't lock superblock");
2085 goto out;
2086 }
2087
2088 disk_super = dm_block_data(sblock);
2089 disk_super->flags = cpu_to_le32(pmd->flags);
2090
2091 dm_bm_unlock(sblock);
2092 out:
2093 pmd_write_unlock(pmd);
2094 return r;
2095 }
2096
2097 bool dm_pool_metadata_needs_check(struct dm_pool_metadata *pmd)
2098 {
2099 bool needs_check;
2100
2101 down_read(&pmd->root_lock);
2102 needs_check = pmd->flags & THIN_METADATA_NEEDS_CHECK_FLAG;
2103 up_read(&pmd->root_lock);
2104
2105 return needs_check;
2106 }
2107
2108 void dm_pool_issue_prefetches(struct dm_pool_metadata *pmd)
2109 {
2110 down_read(&pmd->root_lock);
2111 if (!pmd->fail_io)
2112 dm_tm_issue_prefetches(pmd->tm);
2113 up_read(&pmd->root_lock);
2114 }