Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * Copyright (C) 2019 Arrikto, Inc. All Rights Reserved.
0004  */
0005 
0006 #include <linux/mm.h>
0007 #include <linux/err.h>
0008 #include <linux/slab.h>
0009 #include <linux/rwsem.h>
0010 #include <linux/bitops.h>
0011 #include <linux/bitmap.h>
0012 #include <linux/device-mapper.h>
0013 
0014 #include "persistent-data/dm-bitset.h"
0015 #include "persistent-data/dm-space-map.h"
0016 #include "persistent-data/dm-block-manager.h"
0017 #include "persistent-data/dm-transaction-manager.h"
0018 
0019 #include "dm-clone-metadata.h"
0020 
0021 #define DM_MSG_PREFIX "clone metadata"
0022 
0023 #define SUPERBLOCK_LOCATION 0
0024 #define SUPERBLOCK_MAGIC 0x8af27f64
0025 #define SUPERBLOCK_CSUM_XOR 257649492
0026 
0027 #define DM_CLONE_MAX_CONCURRENT_LOCKS 5
0028 
0029 #define UUID_LEN 16
0030 
0031 /* Min and max dm-clone metadata versions supported */
0032 #define DM_CLONE_MIN_METADATA_VERSION 1
0033 #define DM_CLONE_MAX_METADATA_VERSION 1
0034 
0035 /*
0036  * On-disk metadata layout
0037  */
0038 struct superblock_disk {
0039     __le32 csum;
0040     __le32 flags;
0041     __le64 blocknr;
0042 
0043     __u8 uuid[UUID_LEN];
0044     __le64 magic;
0045     __le32 version;
0046 
0047     __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
0048 
0049     __le64 region_size;
0050     __le64 target_size;
0051 
0052     __le64 bitset_root;
0053 } __packed;
0054 
0055 /*
0056  * Region and Dirty bitmaps.
0057  *
0058  * dm-clone logically splits the source and destination devices in regions of
0059  * fixed size. The destination device's regions are gradually hydrated, i.e.,
0060  * we copy (clone) the source's regions to the destination device. Eventually,
0061  * all regions will get hydrated and all I/O will be served from the
0062  * destination device.
0063  *
0064  * We maintain an on-disk bitmap which tracks the state of each of the
0065  * destination device's regions, i.e., whether they are hydrated or not.
0066  *
0067  * To save constantly doing look ups on disk we keep an in core copy of the
0068  * on-disk bitmap, the region_map.
0069  *
0070  * In order to track which regions are hydrated during a metadata transaction,
0071  * we use a second set of bitmaps, the dmap (dirty bitmap), which includes two
0072  * bitmaps, namely dirty_regions and dirty_words. The dirty_regions bitmap
0073  * tracks the regions that got hydrated during the current metadata
0074  * transaction. The dirty_words bitmap tracks the dirty words, i.e. longs, of
0075  * the dirty_regions bitmap.
0076  *
0077  * This allows us to precisely track the regions that were hydrated during the
0078  * current metadata transaction and update the metadata accordingly, when we
0079  * commit the current transaction. This is important because dm-clone should
0080  * only commit the metadata of regions that were properly flushed to the
0081  * destination device beforehand. Otherwise, in case of a crash, we could end
0082  * up with a corrupted dm-clone device.
0083  *
0084  * When a region finishes hydrating dm-clone calls
0085  * dm_clone_set_region_hydrated(), or for discard requests
0086  * dm_clone_cond_set_range(), which sets the corresponding bits in region_map
0087  * and dmap.
0088  *
0089  * During a metadata commit we scan dmap->dirty_words and dmap->dirty_regions
0090  * and update the on-disk metadata accordingly. Thus, we don't have to flush to
0091  * disk the whole region_map. We can just flush the dirty region_map bits.
0092  *
0093  * We use the helper dmap->dirty_words bitmap, which is smaller than the
0094  * original region_map, to reduce the amount of memory accesses during a
0095  * metadata commit. Moreover, as dm-bitset also accesses the on-disk bitmap in
0096  * 64-bit word granularity, the dirty_words bitmap helps us avoid useless disk
0097  * accesses.
0098  *
0099  * We could update directly the on-disk bitmap, when dm-clone calls either
0100  * dm_clone_set_region_hydrated() or dm_clone_cond_set_range(), buts this
0101  * inserts significant metadata I/O overhead in dm-clone's I/O path. Also, as
0102  * these two functions don't block, we can call them in interrupt context,
0103  * e.g., in a hooked overwrite bio's completion routine, and further reduce the
0104  * I/O completion latency.
0105  *
0106  * We maintain two dirty bitmap sets. During a metadata commit we atomically
0107  * swap the currently used dmap with the unused one. This allows the metadata
0108  * update functions to run concurrently with an ongoing commit.
0109  */
0110 struct dirty_map {
0111     unsigned long *dirty_words;
0112     unsigned long *dirty_regions;
0113     unsigned int changed;
0114 };
0115 
0116 struct dm_clone_metadata {
0117     /* The metadata block device */
0118     struct block_device *bdev;
0119 
0120     sector_t target_size;
0121     sector_t region_size;
0122     unsigned long nr_regions;
0123     unsigned long nr_words;
0124 
0125     /* Spinlock protecting the region and dirty bitmaps. */
0126     spinlock_t bitmap_lock;
0127     struct dirty_map dmap[2];
0128     struct dirty_map *current_dmap;
0129 
0130     /* Protected by lock */
0131     struct dirty_map *committing_dmap;
0132 
0133     /*
0134      * In core copy of the on-disk bitmap to save constantly doing look ups
0135      * on disk.
0136      */
0137     unsigned long *region_map;
0138 
0139     /* Protected by bitmap_lock */
0140     unsigned int read_only;
0141 
0142     struct dm_block_manager *bm;
0143     struct dm_space_map *sm;
0144     struct dm_transaction_manager *tm;
0145 
0146     struct rw_semaphore lock;
0147 
0148     struct dm_disk_bitset bitset_info;
0149     dm_block_t bitset_root;
0150 
0151     /*
0152      * Reading the space map root can fail, so we read it into this
0153      * buffer before the superblock is locked and updated.
0154      */
0155     __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
0156 
0157     bool hydration_done:1;
0158     bool fail_io:1;
0159 };
0160 
0161 /*---------------------------------------------------------------------------*/
0162 
0163 /*
0164  * Superblock validation.
0165  */
0166 static void sb_prepare_for_write(struct dm_block_validator *v,
0167                  struct dm_block *b, size_t sb_block_size)
0168 {
0169     struct superblock_disk *sb;
0170     u32 csum;
0171 
0172     sb = dm_block_data(b);
0173     sb->blocknr = cpu_to_le64(dm_block_location(b));
0174 
0175     csum = dm_bm_checksum(&sb->flags, sb_block_size - sizeof(__le32),
0176                   SUPERBLOCK_CSUM_XOR);
0177     sb->csum = cpu_to_le32(csum);
0178 }
0179 
0180 static int sb_check(struct dm_block_validator *v, struct dm_block *b,
0181             size_t sb_block_size)
0182 {
0183     struct superblock_disk *sb;
0184     u32 csum, metadata_version;
0185 
0186     sb = dm_block_data(b);
0187 
0188     if (dm_block_location(b) != le64_to_cpu(sb->blocknr)) {
0189         DMERR("Superblock check failed: blocknr %llu, expected %llu",
0190               le64_to_cpu(sb->blocknr),
0191               (unsigned long long)dm_block_location(b));
0192         return -ENOTBLK;
0193     }
0194 
0195     if (le64_to_cpu(sb->magic) != SUPERBLOCK_MAGIC) {
0196         DMERR("Superblock check failed: magic %llu, expected %llu",
0197               le64_to_cpu(sb->magic),
0198               (unsigned long long)SUPERBLOCK_MAGIC);
0199         return -EILSEQ;
0200     }
0201 
0202     csum = dm_bm_checksum(&sb->flags, sb_block_size - sizeof(__le32),
0203                   SUPERBLOCK_CSUM_XOR);
0204     if (sb->csum != cpu_to_le32(csum)) {
0205         DMERR("Superblock check failed: checksum %u, expected %u",
0206               csum, le32_to_cpu(sb->csum));
0207         return -EILSEQ;
0208     }
0209 
0210     /* Check metadata version */
0211     metadata_version = le32_to_cpu(sb->version);
0212     if (metadata_version < DM_CLONE_MIN_METADATA_VERSION ||
0213         metadata_version > DM_CLONE_MAX_METADATA_VERSION) {
0214         DMERR("Clone metadata version %u found, but only versions between %u and %u supported.",
0215               metadata_version, DM_CLONE_MIN_METADATA_VERSION,
0216               DM_CLONE_MAX_METADATA_VERSION);
0217         return -EINVAL;
0218     }
0219 
0220     return 0;
0221 }
0222 
0223 static struct dm_block_validator sb_validator = {
0224     .name = "superblock",
0225     .prepare_for_write = sb_prepare_for_write,
0226     .check = sb_check
0227 };
0228 
0229 /*
0230  * Check if the superblock is formatted or not. We consider the superblock to
0231  * be formatted in case we find non-zero bytes in it.
0232  */
0233 static int __superblock_all_zeroes(struct dm_block_manager *bm, bool *formatted)
0234 {
0235     int r;
0236     unsigned int i, nr_words;
0237     struct dm_block *sblock;
0238     __le64 *data_le, zero = cpu_to_le64(0);
0239 
0240     /*
0241      * We don't use a validator here because the superblock could be all
0242      * zeroes.
0243      */
0244     r = dm_bm_read_lock(bm, SUPERBLOCK_LOCATION, NULL, &sblock);
0245     if (r) {
0246         DMERR("Failed to read_lock superblock");
0247         return r;
0248     }
0249 
0250     data_le = dm_block_data(sblock);
0251     *formatted = false;
0252 
0253     /* This assumes that the block size is a multiple of 8 bytes */
0254     BUG_ON(dm_bm_block_size(bm) % sizeof(__le64));
0255     nr_words = dm_bm_block_size(bm) / sizeof(__le64);
0256     for (i = 0; i < nr_words; i++) {
0257         if (data_le[i] != zero) {
0258             *formatted = true;
0259             break;
0260         }
0261     }
0262 
0263     dm_bm_unlock(sblock);
0264 
0265     return 0;
0266 }
0267 
0268 /*---------------------------------------------------------------------------*/
0269 
0270 /*
0271  * Low-level metadata handling.
0272  */
0273 static inline int superblock_read_lock(struct dm_clone_metadata *cmd,
0274                        struct dm_block **sblock)
0275 {
0276     return dm_bm_read_lock(cmd->bm, SUPERBLOCK_LOCATION, &sb_validator, sblock);
0277 }
0278 
0279 static inline int superblock_write_lock_zero(struct dm_clone_metadata *cmd,
0280                          struct dm_block **sblock)
0281 {
0282     return dm_bm_write_lock_zero(cmd->bm, SUPERBLOCK_LOCATION, &sb_validator, sblock);
0283 }
0284 
0285 static int __copy_sm_root(struct dm_clone_metadata *cmd)
0286 {
0287     int r;
0288     size_t root_size;
0289 
0290     r = dm_sm_root_size(cmd->sm, &root_size);
0291     if (r)
0292         return r;
0293 
0294     return dm_sm_copy_root(cmd->sm, &cmd->metadata_space_map_root, root_size);
0295 }
0296 
0297 /* Save dm-clone metadata in superblock */
0298 static void __prepare_superblock(struct dm_clone_metadata *cmd,
0299                  struct superblock_disk *sb)
0300 {
0301     sb->flags = cpu_to_le32(0UL);
0302 
0303     /* FIXME: UUID is currently unused */
0304     memset(sb->uuid, 0, sizeof(sb->uuid));
0305 
0306     sb->magic = cpu_to_le64(SUPERBLOCK_MAGIC);
0307     sb->version = cpu_to_le32(DM_CLONE_MAX_METADATA_VERSION);
0308 
0309     /* Save the metadata space_map root */
0310     memcpy(&sb->metadata_space_map_root, &cmd->metadata_space_map_root,
0311            sizeof(cmd->metadata_space_map_root));
0312 
0313     sb->region_size = cpu_to_le64(cmd->region_size);
0314     sb->target_size = cpu_to_le64(cmd->target_size);
0315     sb->bitset_root = cpu_to_le64(cmd->bitset_root);
0316 }
0317 
0318 static int __open_metadata(struct dm_clone_metadata *cmd)
0319 {
0320     int r;
0321     struct dm_block *sblock;
0322     struct superblock_disk *sb;
0323 
0324     r = superblock_read_lock(cmd, &sblock);
0325 
0326     if (r) {
0327         DMERR("Failed to read_lock superblock");
0328         return r;
0329     }
0330 
0331     sb = dm_block_data(sblock);
0332 
0333     /* Verify that target_size and region_size haven't changed. */
0334     if (cmd->region_size != le64_to_cpu(sb->region_size) ||
0335         cmd->target_size != le64_to_cpu(sb->target_size)) {
0336         DMERR("Region and/or target size don't match the ones in metadata");
0337         r = -EINVAL;
0338         goto out_with_lock;
0339     }
0340 
0341     r = dm_tm_open_with_sm(cmd->bm, SUPERBLOCK_LOCATION,
0342                    sb->metadata_space_map_root,
0343                    sizeof(sb->metadata_space_map_root),
0344                    &cmd->tm, &cmd->sm);
0345 
0346     if (r) {
0347         DMERR("dm_tm_open_with_sm failed");
0348         goto out_with_lock;
0349     }
0350 
0351     dm_disk_bitset_init(cmd->tm, &cmd->bitset_info);
0352     cmd->bitset_root = le64_to_cpu(sb->bitset_root);
0353 
0354 out_with_lock:
0355     dm_bm_unlock(sblock);
0356 
0357     return r;
0358 }
0359 
0360 static int __format_metadata(struct dm_clone_metadata *cmd)
0361 {
0362     int r;
0363     struct dm_block *sblock;
0364     struct superblock_disk *sb;
0365 
0366     r = dm_tm_create_with_sm(cmd->bm, SUPERBLOCK_LOCATION, &cmd->tm, &cmd->sm);
0367     if (r) {
0368         DMERR("Failed to create transaction manager");
0369         return r;
0370     }
0371 
0372     dm_disk_bitset_init(cmd->tm, &cmd->bitset_info);
0373 
0374     r = dm_bitset_empty(&cmd->bitset_info, &cmd->bitset_root);
0375     if (r) {
0376         DMERR("Failed to create empty on-disk bitset");
0377         goto err_with_tm;
0378     }
0379 
0380     r = dm_bitset_resize(&cmd->bitset_info, cmd->bitset_root, 0,
0381                  cmd->nr_regions, false, &cmd->bitset_root);
0382     if (r) {
0383         DMERR("Failed to resize on-disk bitset to %lu entries", cmd->nr_regions);
0384         goto err_with_tm;
0385     }
0386 
0387     /* Flush to disk all blocks, except the superblock */
0388     r = dm_tm_pre_commit(cmd->tm);
0389     if (r) {
0390         DMERR("dm_tm_pre_commit failed");
0391         goto err_with_tm;
0392     }
0393 
0394     r = __copy_sm_root(cmd);
0395     if (r) {
0396         DMERR("__copy_sm_root failed");
0397         goto err_with_tm;
0398     }
0399 
0400     r = superblock_write_lock_zero(cmd, &sblock);
0401     if (r) {
0402         DMERR("Failed to write_lock superblock");
0403         goto err_with_tm;
0404     }
0405 
0406     sb = dm_block_data(sblock);
0407     __prepare_superblock(cmd, sb);
0408     r = dm_tm_commit(cmd->tm, sblock);
0409     if (r) {
0410         DMERR("Failed to commit superblock");
0411         goto err_with_tm;
0412     }
0413 
0414     return 0;
0415 
0416 err_with_tm:
0417     dm_sm_destroy(cmd->sm);
0418     dm_tm_destroy(cmd->tm);
0419 
0420     return r;
0421 }
0422 
0423 static int __open_or_format_metadata(struct dm_clone_metadata *cmd, bool may_format_device)
0424 {
0425     int r;
0426     bool formatted = false;
0427 
0428     r = __superblock_all_zeroes(cmd->bm, &formatted);
0429     if (r)
0430         return r;
0431 
0432     if (!formatted)
0433         return may_format_device ? __format_metadata(cmd) : -EPERM;
0434 
0435     return __open_metadata(cmd);
0436 }
0437 
0438 static int __create_persistent_data_structures(struct dm_clone_metadata *cmd,
0439                            bool may_format_device)
0440 {
0441     int r;
0442 
0443     /* Create block manager */
0444     cmd->bm = dm_block_manager_create(cmd->bdev,
0445                      DM_CLONE_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
0446                      DM_CLONE_MAX_CONCURRENT_LOCKS);
0447     if (IS_ERR(cmd->bm)) {
0448         DMERR("Failed to create block manager");
0449         return PTR_ERR(cmd->bm);
0450     }
0451 
0452     r = __open_or_format_metadata(cmd, may_format_device);
0453     if (r)
0454         dm_block_manager_destroy(cmd->bm);
0455 
0456     return r;
0457 }
0458 
0459 static void __destroy_persistent_data_structures(struct dm_clone_metadata *cmd)
0460 {
0461     dm_sm_destroy(cmd->sm);
0462     dm_tm_destroy(cmd->tm);
0463     dm_block_manager_destroy(cmd->bm);
0464 }
0465 
0466 /*---------------------------------------------------------------------------*/
0467 
0468 static size_t bitmap_size(unsigned long nr_bits)
0469 {
0470     return BITS_TO_LONGS(nr_bits) * sizeof(long);
0471 }
0472 
0473 static int __dirty_map_init(struct dirty_map *dmap, unsigned long nr_words,
0474                 unsigned long nr_regions)
0475 {
0476     dmap->changed = 0;
0477 
0478     dmap->dirty_words = kvzalloc(bitmap_size(nr_words), GFP_KERNEL);
0479     if (!dmap->dirty_words)
0480         return -ENOMEM;
0481 
0482     dmap->dirty_regions = kvzalloc(bitmap_size(nr_regions), GFP_KERNEL);
0483     if (!dmap->dirty_regions) {
0484         kvfree(dmap->dirty_words);
0485         return -ENOMEM;
0486     }
0487 
0488     return 0;
0489 }
0490 
0491 static void __dirty_map_exit(struct dirty_map *dmap)
0492 {
0493     kvfree(dmap->dirty_words);
0494     kvfree(dmap->dirty_regions);
0495 }
0496 
0497 static int dirty_map_init(struct dm_clone_metadata *cmd)
0498 {
0499     if (__dirty_map_init(&cmd->dmap[0], cmd->nr_words, cmd->nr_regions)) {
0500         DMERR("Failed to allocate dirty bitmap");
0501         return -ENOMEM;
0502     }
0503 
0504     if (__dirty_map_init(&cmd->dmap[1], cmd->nr_words, cmd->nr_regions)) {
0505         DMERR("Failed to allocate dirty bitmap");
0506         __dirty_map_exit(&cmd->dmap[0]);
0507         return -ENOMEM;
0508     }
0509 
0510     cmd->current_dmap = &cmd->dmap[0];
0511     cmd->committing_dmap = NULL;
0512 
0513     return 0;
0514 }
0515 
0516 static void dirty_map_exit(struct dm_clone_metadata *cmd)
0517 {
0518     __dirty_map_exit(&cmd->dmap[0]);
0519     __dirty_map_exit(&cmd->dmap[1]);
0520 }
0521 
0522 static int __load_bitset_in_core(struct dm_clone_metadata *cmd)
0523 {
0524     int r;
0525     unsigned long i;
0526     struct dm_bitset_cursor c;
0527 
0528     /* Flush bitset cache */
0529     r = dm_bitset_flush(&cmd->bitset_info, cmd->bitset_root, &cmd->bitset_root);
0530     if (r)
0531         return r;
0532 
0533     r = dm_bitset_cursor_begin(&cmd->bitset_info, cmd->bitset_root, cmd->nr_regions, &c);
0534     if (r)
0535         return r;
0536 
0537     for (i = 0; ; i++) {
0538         if (dm_bitset_cursor_get_value(&c))
0539             __set_bit(i, cmd->region_map);
0540         else
0541             __clear_bit(i, cmd->region_map);
0542 
0543         if (i >= (cmd->nr_regions - 1))
0544             break;
0545 
0546         r = dm_bitset_cursor_next(&c);
0547 
0548         if (r)
0549             break;
0550     }
0551 
0552     dm_bitset_cursor_end(&c);
0553 
0554     return r;
0555 }
0556 
0557 struct dm_clone_metadata *dm_clone_metadata_open(struct block_device *bdev,
0558                          sector_t target_size,
0559                          sector_t region_size)
0560 {
0561     int r;
0562     struct dm_clone_metadata *cmd;
0563 
0564     cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
0565     if (!cmd) {
0566         DMERR("Failed to allocate memory for dm-clone metadata");
0567         return ERR_PTR(-ENOMEM);
0568     }
0569 
0570     cmd->bdev = bdev;
0571     cmd->target_size = target_size;
0572     cmd->region_size = region_size;
0573     cmd->nr_regions = dm_sector_div_up(cmd->target_size, cmd->region_size);
0574     cmd->nr_words = BITS_TO_LONGS(cmd->nr_regions);
0575 
0576     init_rwsem(&cmd->lock);
0577     spin_lock_init(&cmd->bitmap_lock);
0578     cmd->read_only = 0;
0579     cmd->fail_io = false;
0580     cmd->hydration_done = false;
0581 
0582     cmd->region_map = kvmalloc(bitmap_size(cmd->nr_regions), GFP_KERNEL);
0583     if (!cmd->region_map) {
0584         DMERR("Failed to allocate memory for region bitmap");
0585         r = -ENOMEM;
0586         goto out_with_md;
0587     }
0588 
0589     r = __create_persistent_data_structures(cmd, true);
0590     if (r)
0591         goto out_with_region_map;
0592 
0593     r = __load_bitset_in_core(cmd);
0594     if (r) {
0595         DMERR("Failed to load on-disk region map");
0596         goto out_with_pds;
0597     }
0598 
0599     r = dirty_map_init(cmd);
0600     if (r)
0601         goto out_with_pds;
0602 
0603     if (bitmap_full(cmd->region_map, cmd->nr_regions))
0604         cmd->hydration_done = true;
0605 
0606     return cmd;
0607 
0608 out_with_pds:
0609     __destroy_persistent_data_structures(cmd);
0610 
0611 out_with_region_map:
0612     kvfree(cmd->region_map);
0613 
0614 out_with_md:
0615     kfree(cmd);
0616 
0617     return ERR_PTR(r);
0618 }
0619 
0620 void dm_clone_metadata_close(struct dm_clone_metadata *cmd)
0621 {
0622     if (!cmd->fail_io)
0623         __destroy_persistent_data_structures(cmd);
0624 
0625     dirty_map_exit(cmd);
0626     kvfree(cmd->region_map);
0627     kfree(cmd);
0628 }
0629 
0630 bool dm_clone_is_hydration_done(struct dm_clone_metadata *cmd)
0631 {
0632     return cmd->hydration_done;
0633 }
0634 
0635 bool dm_clone_is_region_hydrated(struct dm_clone_metadata *cmd, unsigned long region_nr)
0636 {
0637     return dm_clone_is_hydration_done(cmd) || test_bit(region_nr, cmd->region_map);
0638 }
0639 
0640 bool dm_clone_is_range_hydrated(struct dm_clone_metadata *cmd,
0641                 unsigned long start, unsigned long nr_regions)
0642 {
0643     unsigned long bit;
0644 
0645     if (dm_clone_is_hydration_done(cmd))
0646         return true;
0647 
0648     bit = find_next_zero_bit(cmd->region_map, cmd->nr_regions, start);
0649 
0650     return (bit >= (start + nr_regions));
0651 }
0652 
0653 unsigned int dm_clone_nr_of_hydrated_regions(struct dm_clone_metadata *cmd)
0654 {
0655     return bitmap_weight(cmd->region_map, cmd->nr_regions);
0656 }
0657 
0658 unsigned long dm_clone_find_next_unhydrated_region(struct dm_clone_metadata *cmd,
0659                            unsigned long start)
0660 {
0661     return find_next_zero_bit(cmd->region_map, cmd->nr_regions, start);
0662 }
0663 
0664 static int __update_metadata_word(struct dm_clone_metadata *cmd,
0665                   unsigned long *dirty_regions,
0666                   unsigned long word)
0667 {
0668     int r;
0669     unsigned long index = word * BITS_PER_LONG;
0670     unsigned long max_index = min(cmd->nr_regions, (word + 1) * BITS_PER_LONG);
0671 
0672     while (index < max_index) {
0673         if (test_bit(index, dirty_regions)) {
0674             r = dm_bitset_set_bit(&cmd->bitset_info, cmd->bitset_root,
0675                           index, &cmd->bitset_root);
0676             if (r) {
0677                 DMERR("dm_bitset_set_bit failed");
0678                 return r;
0679             }
0680             __clear_bit(index, dirty_regions);
0681         }
0682         index++;
0683     }
0684 
0685     return 0;
0686 }
0687 
0688 static int __metadata_commit(struct dm_clone_metadata *cmd)
0689 {
0690     int r;
0691     struct dm_block *sblock;
0692     struct superblock_disk *sb;
0693 
0694     /* Flush bitset cache */
0695     r = dm_bitset_flush(&cmd->bitset_info, cmd->bitset_root, &cmd->bitset_root);
0696     if (r) {
0697         DMERR("dm_bitset_flush failed");
0698         return r;
0699     }
0700 
0701     /* Flush to disk all blocks, except the superblock */
0702     r = dm_tm_pre_commit(cmd->tm);
0703     if (r) {
0704         DMERR("dm_tm_pre_commit failed");
0705         return r;
0706     }
0707 
0708     /* Save the space map root in cmd->metadata_space_map_root */
0709     r = __copy_sm_root(cmd);
0710     if (r) {
0711         DMERR("__copy_sm_root failed");
0712         return r;
0713     }
0714 
0715     /* Lock the superblock */
0716     r = superblock_write_lock_zero(cmd, &sblock);
0717     if (r) {
0718         DMERR("Failed to write_lock superblock");
0719         return r;
0720     }
0721 
0722     /* Save the metadata in superblock */
0723     sb = dm_block_data(sblock);
0724     __prepare_superblock(cmd, sb);
0725 
0726     /* Unlock superblock and commit it to disk */
0727     r = dm_tm_commit(cmd->tm, sblock);
0728     if (r) {
0729         DMERR("Failed to commit superblock");
0730         return r;
0731     }
0732 
0733     /*
0734      * FIXME: Find a more efficient way to check if the hydration is done.
0735      */
0736     if (bitmap_full(cmd->region_map, cmd->nr_regions))
0737         cmd->hydration_done = true;
0738 
0739     return 0;
0740 }
0741 
0742 static int __flush_dmap(struct dm_clone_metadata *cmd, struct dirty_map *dmap)
0743 {
0744     int r;
0745     unsigned long word;
0746 
0747     word = 0;
0748     do {
0749         word = find_next_bit(dmap->dirty_words, cmd->nr_words, word);
0750 
0751         if (word == cmd->nr_words)
0752             break;
0753 
0754         r = __update_metadata_word(cmd, dmap->dirty_regions, word);
0755 
0756         if (r)
0757             return r;
0758 
0759         __clear_bit(word, dmap->dirty_words);
0760         word++;
0761     } while (word < cmd->nr_words);
0762 
0763     r = __metadata_commit(cmd);
0764 
0765     if (r)
0766         return r;
0767 
0768     /* Update the changed flag */
0769     spin_lock_irq(&cmd->bitmap_lock);
0770     dmap->changed = 0;
0771     spin_unlock_irq(&cmd->bitmap_lock);
0772 
0773     return 0;
0774 }
0775 
0776 int dm_clone_metadata_pre_commit(struct dm_clone_metadata *cmd)
0777 {
0778     int r = 0;
0779     struct dirty_map *dmap, *next_dmap;
0780 
0781     down_write(&cmd->lock);
0782 
0783     if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) {
0784         r = -EPERM;
0785         goto out;
0786     }
0787 
0788     /* Get current dirty bitmap */
0789     dmap = cmd->current_dmap;
0790 
0791     /* Get next dirty bitmap */
0792     next_dmap = (dmap == &cmd->dmap[0]) ? &cmd->dmap[1] : &cmd->dmap[0];
0793 
0794     /*
0795      * The last commit failed, so we don't have a clean dirty-bitmap to
0796      * use.
0797      */
0798     if (WARN_ON(next_dmap->changed || cmd->committing_dmap)) {
0799         r = -EINVAL;
0800         goto out;
0801     }
0802 
0803     /* Swap dirty bitmaps */
0804     spin_lock_irq(&cmd->bitmap_lock);
0805     cmd->current_dmap = next_dmap;
0806     spin_unlock_irq(&cmd->bitmap_lock);
0807 
0808     /* Set old dirty bitmap as currently committing */
0809     cmd->committing_dmap = dmap;
0810 out:
0811     up_write(&cmd->lock);
0812 
0813     return r;
0814 }
0815 
0816 int dm_clone_metadata_commit(struct dm_clone_metadata *cmd)
0817 {
0818     int r = -EPERM;
0819 
0820     down_write(&cmd->lock);
0821 
0822     if (cmd->fail_io || dm_bm_is_read_only(cmd->bm))
0823         goto out;
0824 
0825     if (WARN_ON(!cmd->committing_dmap)) {
0826         r = -EINVAL;
0827         goto out;
0828     }
0829 
0830     r = __flush_dmap(cmd, cmd->committing_dmap);
0831     if (!r) {
0832         /* Clear committing dmap */
0833         cmd->committing_dmap = NULL;
0834     }
0835 out:
0836     up_write(&cmd->lock);
0837 
0838     return r;
0839 }
0840 
0841 int dm_clone_set_region_hydrated(struct dm_clone_metadata *cmd, unsigned long region_nr)
0842 {
0843     int r = 0;
0844     struct dirty_map *dmap;
0845     unsigned long word, flags;
0846 
0847     if (unlikely(region_nr >= cmd->nr_regions)) {
0848         DMERR("Region %lu out of range (total number of regions %lu)",
0849               region_nr, cmd->nr_regions);
0850         return -ERANGE;
0851     }
0852 
0853     word = region_nr / BITS_PER_LONG;
0854 
0855     spin_lock_irqsave(&cmd->bitmap_lock, flags);
0856 
0857     if (cmd->read_only) {
0858         r = -EPERM;
0859         goto out;
0860     }
0861 
0862     dmap = cmd->current_dmap;
0863 
0864     __set_bit(word, dmap->dirty_words);
0865     __set_bit(region_nr, dmap->dirty_regions);
0866     __set_bit(region_nr, cmd->region_map);
0867     dmap->changed = 1;
0868 
0869 out:
0870     spin_unlock_irqrestore(&cmd->bitmap_lock, flags);
0871 
0872     return r;
0873 }
0874 
0875 int dm_clone_cond_set_range(struct dm_clone_metadata *cmd, unsigned long start,
0876                 unsigned long nr_regions)
0877 {
0878     int r = 0;
0879     struct dirty_map *dmap;
0880     unsigned long word, region_nr;
0881 
0882     if (unlikely(start >= cmd->nr_regions || (start + nr_regions) < start ||
0883              (start + nr_regions) > cmd->nr_regions)) {
0884         DMERR("Invalid region range: start %lu, nr_regions %lu (total number of regions %lu)",
0885               start, nr_regions, cmd->nr_regions);
0886         return -ERANGE;
0887     }
0888 
0889     spin_lock_irq(&cmd->bitmap_lock);
0890 
0891     if (cmd->read_only) {
0892         r = -EPERM;
0893         goto out;
0894     }
0895 
0896     dmap = cmd->current_dmap;
0897     for (region_nr = start; region_nr < (start + nr_regions); region_nr++) {
0898         if (!test_bit(region_nr, cmd->region_map)) {
0899             word = region_nr / BITS_PER_LONG;
0900             __set_bit(word, dmap->dirty_words);
0901             __set_bit(region_nr, dmap->dirty_regions);
0902             __set_bit(region_nr, cmd->region_map);
0903             dmap->changed = 1;
0904         }
0905     }
0906 out:
0907     spin_unlock_irq(&cmd->bitmap_lock);
0908 
0909     return r;
0910 }
0911 
0912 /*
0913  * WARNING: This must not be called concurrently with either
0914  * dm_clone_set_region_hydrated() or dm_clone_cond_set_range(), as it changes
0915  * cmd->region_map without taking the cmd->bitmap_lock spinlock. The only
0916  * exception is after setting the metadata to read-only mode, using
0917  * dm_clone_metadata_set_read_only().
0918  *
0919  * We don't take the spinlock because __load_bitset_in_core() does I/O, so it
0920  * may block.
0921  */
0922 int dm_clone_reload_in_core_bitset(struct dm_clone_metadata *cmd)
0923 {
0924     int r = -EINVAL;
0925 
0926     down_write(&cmd->lock);
0927 
0928     if (cmd->fail_io)
0929         goto out;
0930 
0931     r = __load_bitset_in_core(cmd);
0932 out:
0933     up_write(&cmd->lock);
0934 
0935     return r;
0936 }
0937 
0938 bool dm_clone_changed_this_transaction(struct dm_clone_metadata *cmd)
0939 {
0940     bool r;
0941     unsigned long flags;
0942 
0943     spin_lock_irqsave(&cmd->bitmap_lock, flags);
0944     r = cmd->dmap[0].changed || cmd->dmap[1].changed;
0945     spin_unlock_irqrestore(&cmd->bitmap_lock, flags);
0946 
0947     return r;
0948 }
0949 
0950 int dm_clone_metadata_abort(struct dm_clone_metadata *cmd)
0951 {
0952     int r = -EPERM;
0953 
0954     down_write(&cmd->lock);
0955 
0956     if (cmd->fail_io || dm_bm_is_read_only(cmd->bm))
0957         goto out;
0958 
0959     __destroy_persistent_data_structures(cmd);
0960 
0961     r = __create_persistent_data_structures(cmd, false);
0962     if (r) {
0963         /* If something went wrong we can neither write nor read the metadata */
0964         cmd->fail_io = true;
0965     }
0966 out:
0967     up_write(&cmd->lock);
0968 
0969     return r;
0970 }
0971 
0972 void dm_clone_metadata_set_read_only(struct dm_clone_metadata *cmd)
0973 {
0974     down_write(&cmd->lock);
0975 
0976     spin_lock_irq(&cmd->bitmap_lock);
0977     cmd->read_only = 1;
0978     spin_unlock_irq(&cmd->bitmap_lock);
0979 
0980     if (!cmd->fail_io)
0981         dm_bm_set_read_only(cmd->bm);
0982 
0983     up_write(&cmd->lock);
0984 }
0985 
0986 void dm_clone_metadata_set_read_write(struct dm_clone_metadata *cmd)
0987 {
0988     down_write(&cmd->lock);
0989 
0990     spin_lock_irq(&cmd->bitmap_lock);
0991     cmd->read_only = 0;
0992     spin_unlock_irq(&cmd->bitmap_lock);
0993 
0994     if (!cmd->fail_io)
0995         dm_bm_set_read_write(cmd->bm);
0996 
0997     up_write(&cmd->lock);
0998 }
0999 
1000 int dm_clone_get_free_metadata_block_count(struct dm_clone_metadata *cmd,
1001                        dm_block_t *result)
1002 {
1003     int r = -EINVAL;
1004 
1005     down_read(&cmd->lock);
1006 
1007     if (!cmd->fail_io)
1008         r = dm_sm_get_nr_free(cmd->sm, result);
1009 
1010     up_read(&cmd->lock);
1011 
1012     return r;
1013 }
1014 
1015 int dm_clone_get_metadata_dev_size(struct dm_clone_metadata *cmd,
1016                    dm_block_t *result)
1017 {
1018     int r = -EINVAL;
1019 
1020     down_read(&cmd->lock);
1021 
1022     if (!cmd->fail_io)
1023         r = dm_sm_get_nr_blocks(cmd->sm, result);
1024 
1025     up_read(&cmd->lock);
1026 
1027     return r;
1028 }