Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright (C) 2012 Red Hat, Inc.
0003  *
0004  * This file is released under the GPL.
0005  */
0006 
0007 #include "dm-cache-metadata.h"
0008 
0009 #include "persistent-data/dm-array.h"
0010 #include "persistent-data/dm-bitset.h"
0011 #include "persistent-data/dm-space-map.h"
0012 #include "persistent-data/dm-space-map-disk.h"
0013 #include "persistent-data/dm-transaction-manager.h"
0014 
0015 #include <linux/device-mapper.h>
0016 #include <linux/refcount.h>
0017 
0018 /*----------------------------------------------------------------*/
0019 
0020 #define DM_MSG_PREFIX   "cache metadata"
0021 
0022 #define CACHE_SUPERBLOCK_MAGIC 06142003
0023 #define CACHE_SUPERBLOCK_LOCATION 0
0024 
0025 /*
0026  * defines a range of metadata versions that this module can handle.
0027  */
0028 #define MIN_CACHE_VERSION 1
0029 #define MAX_CACHE_VERSION 2
0030 
0031 /*
0032  *  3 for btree insert +
0033  *  2 for btree lookup used within space map
0034  */
0035 #define CACHE_MAX_CONCURRENT_LOCKS 5
0036 #define SPACE_MAP_ROOT_SIZE 128
0037 
0038 enum superblock_flag_bits {
0039     /* for spotting crashes that would invalidate the dirty bitset */
0040     CLEAN_SHUTDOWN,
0041     /* metadata must be checked using the tools */
0042     NEEDS_CHECK,
0043 };
0044 
0045 /*
0046  * Each mapping from cache block -> origin block carries a set of flags.
0047  */
0048 enum mapping_bits {
0049     /*
0050      * A valid mapping.  Because we're using an array we clear this
0051      * flag for an non existant mapping.
0052      */
0053     M_VALID = 1,
0054 
0055     /*
0056      * The data on the cache is different from that on the origin.
0057      * This flag is only used by metadata format 1.
0058      */
0059     M_DIRTY = 2
0060 };
0061 
0062 struct cache_disk_superblock {
0063     __le32 csum;
0064     __le32 flags;
0065     __le64 blocknr;
0066 
0067     __u8 uuid[16];
0068     __le64 magic;
0069     __le32 version;
0070 
0071     __u8 policy_name[CACHE_POLICY_NAME_SIZE];
0072     __le32 policy_hint_size;
0073 
0074     __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
0075     __le64 mapping_root;
0076     __le64 hint_root;
0077 
0078     __le64 discard_root;
0079     __le64 discard_block_size;
0080     __le64 discard_nr_blocks;
0081 
0082     __le32 data_block_size;
0083     __le32 metadata_block_size;
0084     __le32 cache_blocks;
0085 
0086     __le32 compat_flags;
0087     __le32 compat_ro_flags;
0088     __le32 incompat_flags;
0089 
0090     __le32 read_hits;
0091     __le32 read_misses;
0092     __le32 write_hits;
0093     __le32 write_misses;
0094 
0095     __le32 policy_version[CACHE_POLICY_VERSION_SIZE];
0096 
0097     /*
0098      * Metadata format 2 fields.
0099      */
0100     __le64 dirty_root;
0101 } __packed;
0102 
0103 struct dm_cache_metadata {
0104     refcount_t ref_count;
0105     struct list_head list;
0106 
0107     unsigned version;
0108     struct block_device *bdev;
0109     struct dm_block_manager *bm;
0110     struct dm_space_map *metadata_sm;
0111     struct dm_transaction_manager *tm;
0112 
0113     struct dm_array_info info;
0114     struct dm_array_info hint_info;
0115     struct dm_disk_bitset discard_info;
0116 
0117     struct rw_semaphore root_lock;
0118     unsigned long flags;
0119     dm_block_t root;
0120     dm_block_t hint_root;
0121     dm_block_t discard_root;
0122 
0123     sector_t discard_block_size;
0124     dm_dblock_t discard_nr_blocks;
0125 
0126     sector_t data_block_size;
0127     dm_cblock_t cache_blocks;
0128     bool changed:1;
0129     bool clean_when_opened:1;
0130 
0131     char policy_name[CACHE_POLICY_NAME_SIZE];
0132     unsigned policy_version[CACHE_POLICY_VERSION_SIZE];
0133     size_t policy_hint_size;
0134     struct dm_cache_statistics stats;
0135 
0136     /*
0137      * Reading the space map root can fail, so we read it into this
0138      * buffer before the superblock is locked and updated.
0139      */
0140     __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
0141 
0142     /*
0143      * Set if a transaction has to be aborted but the attempt to roll
0144      * back to the previous (good) transaction failed.  The only
0145      * metadata operation permissible in this state is the closing of
0146      * the device.
0147      */
0148     bool fail_io:1;
0149 
0150     /*
0151      * Metadata format 2 fields.
0152      */
0153     dm_block_t dirty_root;
0154     struct dm_disk_bitset dirty_info;
0155 
0156     /*
0157      * These structures are used when loading metadata.  They're too
0158      * big to put on the stack.
0159      */
0160     struct dm_array_cursor mapping_cursor;
0161     struct dm_array_cursor hint_cursor;
0162     struct dm_bitset_cursor dirty_cursor;
0163 };
0164 
0165 /*-------------------------------------------------------------------
0166  * superblock validator
0167  *-----------------------------------------------------------------*/
0168 
0169 #define SUPERBLOCK_CSUM_XOR 9031977
0170 
0171 static void sb_prepare_for_write(struct dm_block_validator *v,
0172                  struct dm_block *b,
0173                  size_t sb_block_size)
0174 {
0175     struct cache_disk_superblock *disk_super = dm_block_data(b);
0176 
0177     disk_super->blocknr = cpu_to_le64(dm_block_location(b));
0178     disk_super->csum = cpu_to_le32(dm_bm_checksum(&disk_super->flags,
0179                               sb_block_size - sizeof(__le32),
0180                               SUPERBLOCK_CSUM_XOR));
0181 }
0182 
0183 static int check_metadata_version(struct cache_disk_superblock *disk_super)
0184 {
0185     uint32_t metadata_version = le32_to_cpu(disk_super->version);
0186 
0187     if (metadata_version < MIN_CACHE_VERSION || metadata_version > MAX_CACHE_VERSION) {
0188         DMERR("Cache metadata version %u found, but only versions between %u and %u supported.",
0189               metadata_version, MIN_CACHE_VERSION, MAX_CACHE_VERSION);
0190         return -EINVAL;
0191     }
0192 
0193     return 0;
0194 }
0195 
0196 static int sb_check(struct dm_block_validator *v,
0197             struct dm_block *b,
0198             size_t sb_block_size)
0199 {
0200     struct cache_disk_superblock *disk_super = dm_block_data(b);
0201     __le32 csum_le;
0202 
0203     if (dm_block_location(b) != le64_to_cpu(disk_super->blocknr)) {
0204         DMERR("sb_check failed: blocknr %llu: wanted %llu",
0205               le64_to_cpu(disk_super->blocknr),
0206               (unsigned long long)dm_block_location(b));
0207         return -ENOTBLK;
0208     }
0209 
0210     if (le64_to_cpu(disk_super->magic) != CACHE_SUPERBLOCK_MAGIC) {
0211         DMERR("sb_check failed: magic %llu: wanted %llu",
0212               le64_to_cpu(disk_super->magic),
0213               (unsigned long long)CACHE_SUPERBLOCK_MAGIC);
0214         return -EILSEQ;
0215     }
0216 
0217     csum_le = cpu_to_le32(dm_bm_checksum(&disk_super->flags,
0218                          sb_block_size - sizeof(__le32),
0219                          SUPERBLOCK_CSUM_XOR));
0220     if (csum_le != disk_super->csum) {
0221         DMERR("sb_check failed: csum %u: wanted %u",
0222               le32_to_cpu(csum_le), le32_to_cpu(disk_super->csum));
0223         return -EILSEQ;
0224     }
0225 
0226     return check_metadata_version(disk_super);
0227 }
0228 
0229 static struct dm_block_validator sb_validator = {
0230     .name = "superblock",
0231     .prepare_for_write = sb_prepare_for_write,
0232     .check = sb_check
0233 };
0234 
0235 /*----------------------------------------------------------------*/
0236 
0237 static int superblock_read_lock(struct dm_cache_metadata *cmd,
0238                 struct dm_block **sblock)
0239 {
0240     return dm_bm_read_lock(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
0241                    &sb_validator, sblock);
0242 }
0243 
0244 static int superblock_lock_zero(struct dm_cache_metadata *cmd,
0245                 struct dm_block **sblock)
0246 {
0247     return dm_bm_write_lock_zero(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
0248                      &sb_validator, sblock);
0249 }
0250 
0251 static int superblock_lock(struct dm_cache_metadata *cmd,
0252                struct dm_block **sblock)
0253 {
0254     return dm_bm_write_lock(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
0255                 &sb_validator, sblock);
0256 }
0257 
0258 /*----------------------------------------------------------------*/
0259 
0260 static int __superblock_all_zeroes(struct dm_block_manager *bm, bool *result)
0261 {
0262     int r;
0263     unsigned i;
0264     struct dm_block *b;
0265     __le64 *data_le, zero = cpu_to_le64(0);
0266     unsigned sb_block_size = dm_bm_block_size(bm) / sizeof(__le64);
0267 
0268     /*
0269      * We can't use a validator here - it may be all zeroes.
0270      */
0271     r = dm_bm_read_lock(bm, CACHE_SUPERBLOCK_LOCATION, NULL, &b);
0272     if (r)
0273         return r;
0274 
0275     data_le = dm_block_data(b);
0276     *result = true;
0277     for (i = 0; i < sb_block_size; i++) {
0278         if (data_le[i] != zero) {
0279             *result = false;
0280             break;
0281         }
0282     }
0283 
0284     dm_bm_unlock(b);
0285 
0286     return 0;
0287 }
0288 
0289 static void __setup_mapping_info(struct dm_cache_metadata *cmd)
0290 {
0291     struct dm_btree_value_type vt;
0292 
0293     vt.context = NULL;
0294     vt.size = sizeof(__le64);
0295     vt.inc = NULL;
0296     vt.dec = NULL;
0297     vt.equal = NULL;
0298     dm_array_info_init(&cmd->info, cmd->tm, &vt);
0299 
0300     if (cmd->policy_hint_size) {
0301         vt.size = sizeof(__le32);
0302         dm_array_info_init(&cmd->hint_info, cmd->tm, &vt);
0303     }
0304 }
0305 
0306 static int __save_sm_root(struct dm_cache_metadata *cmd)
0307 {
0308     int r;
0309     size_t metadata_len;
0310 
0311     r = dm_sm_root_size(cmd->metadata_sm, &metadata_len);
0312     if (r < 0)
0313         return r;
0314 
0315     return dm_sm_copy_root(cmd->metadata_sm, &cmd->metadata_space_map_root,
0316                    metadata_len);
0317 }
0318 
0319 static void __copy_sm_root(struct dm_cache_metadata *cmd,
0320                struct cache_disk_superblock *disk_super)
0321 {
0322     memcpy(&disk_super->metadata_space_map_root,
0323            &cmd->metadata_space_map_root,
0324            sizeof(cmd->metadata_space_map_root));
0325 }
0326 
0327 static bool separate_dirty_bits(struct dm_cache_metadata *cmd)
0328 {
0329     return cmd->version >= 2;
0330 }
0331 
0332 static int __write_initial_superblock(struct dm_cache_metadata *cmd)
0333 {
0334     int r;
0335     struct dm_block *sblock;
0336     struct cache_disk_superblock *disk_super;
0337     sector_t bdev_size = bdev_nr_sectors(cmd->bdev);
0338 
0339     /* FIXME: see if we can lose the max sectors limit */
0340     if (bdev_size > DM_CACHE_METADATA_MAX_SECTORS)
0341         bdev_size = DM_CACHE_METADATA_MAX_SECTORS;
0342 
0343     r = dm_tm_pre_commit(cmd->tm);
0344     if (r < 0)
0345         return r;
0346 
0347     /*
0348      * dm_sm_copy_root() can fail.  So we need to do it before we start
0349      * updating the superblock.
0350      */
0351     r = __save_sm_root(cmd);
0352     if (r)
0353         return r;
0354 
0355     r = superblock_lock_zero(cmd, &sblock);
0356     if (r)
0357         return r;
0358 
0359     disk_super = dm_block_data(sblock);
0360     disk_super->flags = 0;
0361     memset(disk_super->uuid, 0, sizeof(disk_super->uuid));
0362     disk_super->magic = cpu_to_le64(CACHE_SUPERBLOCK_MAGIC);
0363     disk_super->version = cpu_to_le32(cmd->version);
0364     memset(disk_super->policy_name, 0, sizeof(disk_super->policy_name));
0365     memset(disk_super->policy_version, 0, sizeof(disk_super->policy_version));
0366     disk_super->policy_hint_size = cpu_to_le32(0);
0367 
0368     __copy_sm_root(cmd, disk_super);
0369 
0370     disk_super->mapping_root = cpu_to_le64(cmd->root);
0371     disk_super->hint_root = cpu_to_le64(cmd->hint_root);
0372     disk_super->discard_root = cpu_to_le64(cmd->discard_root);
0373     disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size);
0374     disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks));
0375     disk_super->metadata_block_size = cpu_to_le32(DM_CACHE_METADATA_BLOCK_SIZE);
0376     disk_super->data_block_size = cpu_to_le32(cmd->data_block_size);
0377     disk_super->cache_blocks = cpu_to_le32(0);
0378 
0379     disk_super->read_hits = cpu_to_le32(0);
0380     disk_super->read_misses = cpu_to_le32(0);
0381     disk_super->write_hits = cpu_to_le32(0);
0382     disk_super->write_misses = cpu_to_le32(0);
0383 
0384     if (separate_dirty_bits(cmd))
0385         disk_super->dirty_root = cpu_to_le64(cmd->dirty_root);
0386 
0387     return dm_tm_commit(cmd->tm, sblock);
0388 }
0389 
0390 static int __format_metadata(struct dm_cache_metadata *cmd)
0391 {
0392     int r;
0393 
0394     r = dm_tm_create_with_sm(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
0395                  &cmd->tm, &cmd->metadata_sm);
0396     if (r < 0) {
0397         DMERR("tm_create_with_sm failed");
0398         return r;
0399     }
0400 
0401     __setup_mapping_info(cmd);
0402 
0403     r = dm_array_empty(&cmd->info, &cmd->root);
0404     if (r < 0)
0405         goto bad;
0406 
0407     if (separate_dirty_bits(cmd)) {
0408         dm_disk_bitset_init(cmd->tm, &cmd->dirty_info);
0409         r = dm_bitset_empty(&cmd->dirty_info, &cmd->dirty_root);
0410         if (r < 0)
0411             goto bad;
0412     }
0413 
0414     dm_disk_bitset_init(cmd->tm, &cmd->discard_info);
0415     r = dm_bitset_empty(&cmd->discard_info, &cmd->discard_root);
0416     if (r < 0)
0417         goto bad;
0418 
0419     cmd->discard_block_size = 0;
0420     cmd->discard_nr_blocks = 0;
0421 
0422     r = __write_initial_superblock(cmd);
0423     if (r)
0424         goto bad;
0425 
0426     cmd->clean_when_opened = true;
0427     return 0;
0428 
0429 bad:
0430     dm_tm_destroy(cmd->tm);
0431     dm_sm_destroy(cmd->metadata_sm);
0432 
0433     return r;
0434 }
0435 
0436 static int __check_incompat_features(struct cache_disk_superblock *disk_super,
0437                      struct dm_cache_metadata *cmd)
0438 {
0439     uint32_t incompat_flags, features;
0440 
0441     incompat_flags = le32_to_cpu(disk_super->incompat_flags);
0442     features = incompat_flags & ~DM_CACHE_FEATURE_INCOMPAT_SUPP;
0443     if (features) {
0444         DMERR("could not access metadata due to unsupported optional features (%lx).",
0445               (unsigned long)features);
0446         return -EINVAL;
0447     }
0448 
0449     /*
0450      * Check for read-only metadata to skip the following RDWR checks.
0451      */
0452     if (bdev_read_only(cmd->bdev))
0453         return 0;
0454 
0455     features = le32_to_cpu(disk_super->compat_ro_flags) & ~DM_CACHE_FEATURE_COMPAT_RO_SUPP;
0456     if (features) {
0457         DMERR("could not access metadata RDWR due to unsupported optional features (%lx).",
0458               (unsigned long)features);
0459         return -EINVAL;
0460     }
0461 
0462     return 0;
0463 }
0464 
0465 static int __open_metadata(struct dm_cache_metadata *cmd)
0466 {
0467     int r;
0468     struct dm_block *sblock;
0469     struct cache_disk_superblock *disk_super;
0470     unsigned long sb_flags;
0471 
0472     r = superblock_read_lock(cmd, &sblock);
0473     if (r < 0) {
0474         DMERR("couldn't read lock superblock");
0475         return r;
0476     }
0477 
0478     disk_super = dm_block_data(sblock);
0479 
0480     /* Verify the data block size hasn't changed */
0481     if (le32_to_cpu(disk_super->data_block_size) != cmd->data_block_size) {
0482         DMERR("changing the data block size (from %u to %llu) is not supported",
0483               le32_to_cpu(disk_super->data_block_size),
0484               (unsigned long long)cmd->data_block_size);
0485         r = -EINVAL;
0486         goto bad;
0487     }
0488 
0489     r = __check_incompat_features(disk_super, cmd);
0490     if (r < 0)
0491         goto bad;
0492 
0493     r = dm_tm_open_with_sm(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
0494                    disk_super->metadata_space_map_root,
0495                    sizeof(disk_super->metadata_space_map_root),
0496                    &cmd->tm, &cmd->metadata_sm);
0497     if (r < 0) {
0498         DMERR("tm_open_with_sm failed");
0499         goto bad;
0500     }
0501 
0502     __setup_mapping_info(cmd);
0503     dm_disk_bitset_init(cmd->tm, &cmd->dirty_info);
0504     dm_disk_bitset_init(cmd->tm, &cmd->discard_info);
0505     sb_flags = le32_to_cpu(disk_super->flags);
0506     cmd->clean_when_opened = test_bit(CLEAN_SHUTDOWN, &sb_flags);
0507     dm_bm_unlock(sblock);
0508 
0509     return 0;
0510 
0511 bad:
0512     dm_bm_unlock(sblock);
0513     return r;
0514 }
0515 
0516 static int __open_or_format_metadata(struct dm_cache_metadata *cmd,
0517                      bool format_device)
0518 {
0519     int r;
0520     bool unformatted = false;
0521 
0522     r = __superblock_all_zeroes(cmd->bm, &unformatted);
0523     if (r)
0524         return r;
0525 
0526     if (unformatted)
0527         return format_device ? __format_metadata(cmd) : -EPERM;
0528 
0529     return __open_metadata(cmd);
0530 }
0531 
0532 static int __create_persistent_data_objects(struct dm_cache_metadata *cmd,
0533                         bool may_format_device)
0534 {
0535     int r;
0536     cmd->bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
0537                       CACHE_MAX_CONCURRENT_LOCKS);
0538     if (IS_ERR(cmd->bm)) {
0539         DMERR("could not create block manager");
0540         r = PTR_ERR(cmd->bm);
0541         cmd->bm = NULL;
0542         return r;
0543     }
0544 
0545     r = __open_or_format_metadata(cmd, may_format_device);
0546     if (r) {
0547         dm_block_manager_destroy(cmd->bm);
0548         cmd->bm = NULL;
0549     }
0550 
0551     return r;
0552 }
0553 
0554 static void __destroy_persistent_data_objects(struct dm_cache_metadata *cmd)
0555 {
0556     dm_sm_destroy(cmd->metadata_sm);
0557     dm_tm_destroy(cmd->tm);
0558     dm_block_manager_destroy(cmd->bm);
0559 }
0560 
0561 typedef unsigned long (*flags_mutator)(unsigned long);
0562 
0563 static void update_flags(struct cache_disk_superblock *disk_super,
0564              flags_mutator mutator)
0565 {
0566     uint32_t sb_flags = mutator(le32_to_cpu(disk_super->flags));
0567     disk_super->flags = cpu_to_le32(sb_flags);
0568 }
0569 
0570 static unsigned long set_clean_shutdown(unsigned long flags)
0571 {
0572     set_bit(CLEAN_SHUTDOWN, &flags);
0573     return flags;
0574 }
0575 
0576 static unsigned long clear_clean_shutdown(unsigned long flags)
0577 {
0578     clear_bit(CLEAN_SHUTDOWN, &flags);
0579     return flags;
0580 }
0581 
0582 static void read_superblock_fields(struct dm_cache_metadata *cmd,
0583                    struct cache_disk_superblock *disk_super)
0584 {
0585     cmd->version = le32_to_cpu(disk_super->version);
0586     cmd->flags = le32_to_cpu(disk_super->flags);
0587     cmd->root = le64_to_cpu(disk_super->mapping_root);
0588     cmd->hint_root = le64_to_cpu(disk_super->hint_root);
0589     cmd->discard_root = le64_to_cpu(disk_super->discard_root);
0590     cmd->discard_block_size = le64_to_cpu(disk_super->discard_block_size);
0591     cmd->discard_nr_blocks = to_dblock(le64_to_cpu(disk_super->discard_nr_blocks));
0592     cmd->data_block_size = le32_to_cpu(disk_super->data_block_size);
0593     cmd->cache_blocks = to_cblock(le32_to_cpu(disk_super->cache_blocks));
0594     strncpy(cmd->policy_name, disk_super->policy_name, sizeof(cmd->policy_name));
0595     cmd->policy_version[0] = le32_to_cpu(disk_super->policy_version[0]);
0596     cmd->policy_version[1] = le32_to_cpu(disk_super->policy_version[1]);
0597     cmd->policy_version[2] = le32_to_cpu(disk_super->policy_version[2]);
0598     cmd->policy_hint_size = le32_to_cpu(disk_super->policy_hint_size);
0599 
0600     cmd->stats.read_hits = le32_to_cpu(disk_super->read_hits);
0601     cmd->stats.read_misses = le32_to_cpu(disk_super->read_misses);
0602     cmd->stats.write_hits = le32_to_cpu(disk_super->write_hits);
0603     cmd->stats.write_misses = le32_to_cpu(disk_super->write_misses);
0604 
0605     if (separate_dirty_bits(cmd))
0606         cmd->dirty_root = le64_to_cpu(disk_super->dirty_root);
0607 
0608     cmd->changed = false;
0609 }
0610 
0611 /*
0612  * The mutator updates the superblock flags.
0613  */
0614 static int __begin_transaction_flags(struct dm_cache_metadata *cmd,
0615                      flags_mutator mutator)
0616 {
0617     int r;
0618     struct cache_disk_superblock *disk_super;
0619     struct dm_block *sblock;
0620 
0621     r = superblock_lock(cmd, &sblock);
0622     if (r)
0623         return r;
0624 
0625     disk_super = dm_block_data(sblock);
0626     update_flags(disk_super, mutator);
0627     read_superblock_fields(cmd, disk_super);
0628     dm_bm_unlock(sblock);
0629 
0630     return dm_bm_flush(cmd->bm);
0631 }
0632 
0633 static int __begin_transaction(struct dm_cache_metadata *cmd)
0634 {
0635     int r;
0636     struct cache_disk_superblock *disk_super;
0637     struct dm_block *sblock;
0638 
0639     /*
0640      * We re-read the superblock every time.  Shouldn't need to do this
0641      * really.
0642      */
0643     r = superblock_read_lock(cmd, &sblock);
0644     if (r)
0645         return r;
0646 
0647     disk_super = dm_block_data(sblock);
0648     read_superblock_fields(cmd, disk_super);
0649     dm_bm_unlock(sblock);
0650 
0651     return 0;
0652 }
0653 
0654 static int __commit_transaction(struct dm_cache_metadata *cmd,
0655                 flags_mutator mutator)
0656 {
0657     int r;
0658     struct cache_disk_superblock *disk_super;
0659     struct dm_block *sblock;
0660 
0661     /*
0662      * We need to know if the cache_disk_superblock exceeds a 512-byte sector.
0663      */
0664     BUILD_BUG_ON(sizeof(struct cache_disk_superblock) > 512);
0665 
0666     if (separate_dirty_bits(cmd)) {
0667         r = dm_bitset_flush(&cmd->dirty_info, cmd->dirty_root,
0668                     &cmd->dirty_root);
0669         if (r)
0670             return r;
0671     }
0672 
0673     r = dm_bitset_flush(&cmd->discard_info, cmd->discard_root,
0674                 &cmd->discard_root);
0675     if (r)
0676         return r;
0677 
0678     r = dm_tm_pre_commit(cmd->tm);
0679     if (r < 0)
0680         return r;
0681 
0682     r = __save_sm_root(cmd);
0683     if (r)
0684         return r;
0685 
0686     r = superblock_lock(cmd, &sblock);
0687     if (r)
0688         return r;
0689 
0690     disk_super = dm_block_data(sblock);
0691 
0692     disk_super->flags = cpu_to_le32(cmd->flags);
0693     if (mutator)
0694         update_flags(disk_super, mutator);
0695 
0696     disk_super->mapping_root = cpu_to_le64(cmd->root);
0697     if (separate_dirty_bits(cmd))
0698         disk_super->dirty_root = cpu_to_le64(cmd->dirty_root);
0699     disk_super->hint_root = cpu_to_le64(cmd->hint_root);
0700     disk_super->discard_root = cpu_to_le64(cmd->discard_root);
0701     disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size);
0702     disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks));
0703     disk_super->cache_blocks = cpu_to_le32(from_cblock(cmd->cache_blocks));
0704     strncpy(disk_super->policy_name, cmd->policy_name, sizeof(disk_super->policy_name));
0705     disk_super->policy_version[0] = cpu_to_le32(cmd->policy_version[0]);
0706     disk_super->policy_version[1] = cpu_to_le32(cmd->policy_version[1]);
0707     disk_super->policy_version[2] = cpu_to_le32(cmd->policy_version[2]);
0708     disk_super->policy_hint_size = cpu_to_le32(cmd->policy_hint_size);
0709 
0710     disk_super->read_hits = cpu_to_le32(cmd->stats.read_hits);
0711     disk_super->read_misses = cpu_to_le32(cmd->stats.read_misses);
0712     disk_super->write_hits = cpu_to_le32(cmd->stats.write_hits);
0713     disk_super->write_misses = cpu_to_le32(cmd->stats.write_misses);
0714     __copy_sm_root(cmd, disk_super);
0715 
0716     return dm_tm_commit(cmd->tm, sblock);
0717 }
0718 
0719 /*----------------------------------------------------------------*/
0720 
0721 /*
0722  * The mappings are held in a dm-array that has 64-bit values stored in
0723  * little-endian format.  The index is the cblock, the high 48bits of the
0724  * value are the oblock and the low 16 bit the flags.
0725  */
0726 #define FLAGS_MASK ((1 << 16) - 1)
0727 
0728 static __le64 pack_value(dm_oblock_t block, unsigned flags)
0729 {
0730     uint64_t value = from_oblock(block);
0731     value <<= 16;
0732     value = value | (flags & FLAGS_MASK);
0733     return cpu_to_le64(value);
0734 }
0735 
0736 static void unpack_value(__le64 value_le, dm_oblock_t *block, unsigned *flags)
0737 {
0738     uint64_t value = le64_to_cpu(value_le);
0739     uint64_t b = value >> 16;
0740     *block = to_oblock(b);
0741     *flags = value & FLAGS_MASK;
0742 }
0743 
0744 /*----------------------------------------------------------------*/
0745 
0746 static struct dm_cache_metadata *metadata_open(struct block_device *bdev,
0747                            sector_t data_block_size,
0748                            bool may_format_device,
0749                            size_t policy_hint_size,
0750                            unsigned metadata_version)
0751 {
0752     int r;
0753     struct dm_cache_metadata *cmd;
0754 
0755     cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
0756     if (!cmd) {
0757         DMERR("could not allocate metadata struct");
0758         return ERR_PTR(-ENOMEM);
0759     }
0760 
0761     cmd->version = metadata_version;
0762     refcount_set(&cmd->ref_count, 1);
0763     init_rwsem(&cmd->root_lock);
0764     cmd->bdev = bdev;
0765     cmd->data_block_size = data_block_size;
0766     cmd->cache_blocks = 0;
0767     cmd->policy_hint_size = policy_hint_size;
0768     cmd->changed = true;
0769     cmd->fail_io = false;
0770 
0771     r = __create_persistent_data_objects(cmd, may_format_device);
0772     if (r) {
0773         kfree(cmd);
0774         return ERR_PTR(r);
0775     }
0776 
0777     r = __begin_transaction_flags(cmd, clear_clean_shutdown);
0778     if (r < 0) {
0779         dm_cache_metadata_close(cmd);
0780         return ERR_PTR(r);
0781     }
0782 
0783     return cmd;
0784 }
0785 
0786 /*
0787  * We keep a little list of ref counted metadata objects to prevent two
0788  * different target instances creating separate bufio instances.  This is
0789  * an issue if a table is reloaded before the suspend.
0790  */
0791 static DEFINE_MUTEX(table_lock);
0792 static LIST_HEAD(table);
0793 
0794 static struct dm_cache_metadata *lookup(struct block_device *bdev)
0795 {
0796     struct dm_cache_metadata *cmd;
0797 
0798     list_for_each_entry(cmd, &table, list)
0799         if (cmd->bdev == bdev) {
0800             refcount_inc(&cmd->ref_count);
0801             return cmd;
0802         }
0803 
0804     return NULL;
0805 }
0806 
0807 static struct dm_cache_metadata *lookup_or_open(struct block_device *bdev,
0808                         sector_t data_block_size,
0809                         bool may_format_device,
0810                         size_t policy_hint_size,
0811                         unsigned metadata_version)
0812 {
0813     struct dm_cache_metadata *cmd, *cmd2;
0814 
0815     mutex_lock(&table_lock);
0816     cmd = lookup(bdev);
0817     mutex_unlock(&table_lock);
0818 
0819     if (cmd)
0820         return cmd;
0821 
0822     cmd = metadata_open(bdev, data_block_size, may_format_device,
0823                 policy_hint_size, metadata_version);
0824     if (!IS_ERR(cmd)) {
0825         mutex_lock(&table_lock);
0826         cmd2 = lookup(bdev);
0827         if (cmd2) {
0828             mutex_unlock(&table_lock);
0829             __destroy_persistent_data_objects(cmd);
0830             kfree(cmd);
0831             return cmd2;
0832         }
0833         list_add(&cmd->list, &table);
0834         mutex_unlock(&table_lock);
0835     }
0836 
0837     return cmd;
0838 }
0839 
0840 static bool same_params(struct dm_cache_metadata *cmd, sector_t data_block_size)
0841 {
0842     if (cmd->data_block_size != data_block_size) {
0843         DMERR("data_block_size (%llu) different from that in metadata (%llu)",
0844               (unsigned long long) data_block_size,
0845               (unsigned long long) cmd->data_block_size);
0846         return false;
0847     }
0848 
0849     return true;
0850 }
0851 
0852 struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev,
0853                          sector_t data_block_size,
0854                          bool may_format_device,
0855                          size_t policy_hint_size,
0856                          unsigned metadata_version)
0857 {
0858     struct dm_cache_metadata *cmd = lookup_or_open(bdev, data_block_size, may_format_device,
0859                                policy_hint_size, metadata_version);
0860 
0861     if (!IS_ERR(cmd) && !same_params(cmd, data_block_size)) {
0862         dm_cache_metadata_close(cmd);
0863         return ERR_PTR(-EINVAL);
0864     }
0865 
0866     return cmd;
0867 }
0868 
0869 void dm_cache_metadata_close(struct dm_cache_metadata *cmd)
0870 {
0871     if (refcount_dec_and_test(&cmd->ref_count)) {
0872         mutex_lock(&table_lock);
0873         list_del(&cmd->list);
0874         mutex_unlock(&table_lock);
0875 
0876         if (!cmd->fail_io)
0877             __destroy_persistent_data_objects(cmd);
0878         kfree(cmd);
0879     }
0880 }
0881 
0882 /*
0883  * Checks that the given cache block is either unmapped or clean.
0884  */
0885 static int block_clean_combined_dirty(struct dm_cache_metadata *cmd, dm_cblock_t b,
0886                       bool *result)
0887 {
0888     int r;
0889     __le64 value;
0890     dm_oblock_t ob;
0891     unsigned flags;
0892 
0893     r = dm_array_get_value(&cmd->info, cmd->root, from_cblock(b), &value);
0894     if (r)
0895         return r;
0896 
0897     unpack_value(value, &ob, &flags);
0898     *result = !((flags & M_VALID) && (flags & M_DIRTY));
0899 
0900     return 0;
0901 }
0902 
0903 static int blocks_are_clean_combined_dirty(struct dm_cache_metadata *cmd,
0904                        dm_cblock_t begin, dm_cblock_t end,
0905                        bool *result)
0906 {
0907     int r;
0908     *result = true;
0909 
0910     while (begin != end) {
0911         r = block_clean_combined_dirty(cmd, begin, result);
0912         if (r) {
0913             DMERR("block_clean_combined_dirty failed");
0914             return r;
0915         }
0916 
0917         if (!*result) {
0918             DMERR("cache block %llu is dirty",
0919                   (unsigned long long) from_cblock(begin));
0920             return 0;
0921         }
0922 
0923         begin = to_cblock(from_cblock(begin) + 1);
0924     }
0925 
0926     return 0;
0927 }
0928 
0929 static int blocks_are_clean_separate_dirty(struct dm_cache_metadata *cmd,
0930                        dm_cblock_t begin, dm_cblock_t end,
0931                        bool *result)
0932 {
0933     int r;
0934     bool dirty_flag;
0935     *result = true;
0936 
0937     if (from_cblock(cmd->cache_blocks) == 0)
0938         /* Nothing to do */
0939         return 0;
0940 
0941     r = dm_bitset_cursor_begin(&cmd->dirty_info, cmd->dirty_root,
0942                    from_cblock(cmd->cache_blocks), &cmd->dirty_cursor);
0943     if (r) {
0944         DMERR("%s: dm_bitset_cursor_begin for dirty failed", __func__);
0945         return r;
0946     }
0947 
0948     r = dm_bitset_cursor_skip(&cmd->dirty_cursor, from_cblock(begin));
0949     if (r) {
0950         DMERR("%s: dm_bitset_cursor_skip for dirty failed", __func__);
0951         dm_bitset_cursor_end(&cmd->dirty_cursor);
0952         return r;
0953     }
0954 
0955     while (begin != end) {
0956         /*
0957          * We assume that unmapped blocks have their dirty bit
0958          * cleared.
0959          */
0960         dirty_flag = dm_bitset_cursor_get_value(&cmd->dirty_cursor);
0961         if (dirty_flag) {
0962             DMERR("%s: cache block %llu is dirty", __func__,
0963                   (unsigned long long) from_cblock(begin));
0964             dm_bitset_cursor_end(&cmd->dirty_cursor);
0965             *result = false;
0966             return 0;
0967         }
0968 
0969         begin = to_cblock(from_cblock(begin) + 1);
0970         if (begin == end)
0971             break;
0972 
0973         r = dm_bitset_cursor_next(&cmd->dirty_cursor);
0974         if (r) {
0975             DMERR("%s: dm_bitset_cursor_next for dirty failed", __func__);
0976             dm_bitset_cursor_end(&cmd->dirty_cursor);
0977             return r;
0978         }
0979     }
0980 
0981     dm_bitset_cursor_end(&cmd->dirty_cursor);
0982 
0983     return 0;
0984 }
0985 
0986 static int blocks_are_unmapped_or_clean(struct dm_cache_metadata *cmd,
0987                     dm_cblock_t begin, dm_cblock_t end,
0988                     bool *result)
0989 {
0990     if (separate_dirty_bits(cmd))
0991         return blocks_are_clean_separate_dirty(cmd, begin, end, result);
0992     else
0993         return blocks_are_clean_combined_dirty(cmd, begin, end, result);
0994 }
0995 
0996 static bool cmd_write_lock(struct dm_cache_metadata *cmd)
0997 {
0998     down_write(&cmd->root_lock);
0999     if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) {
1000         up_write(&cmd->root_lock);
1001         return false;
1002     }
1003     return true;
1004 }
1005 
1006 #define WRITE_LOCK(cmd)             \
1007     do {                    \
1008         if (!cmd_write_lock((cmd))) \
1009             return -EINVAL;     \
1010     } while(0)
1011 
1012 #define WRITE_LOCK_VOID(cmd)            \
1013     do {                    \
1014         if (!cmd_write_lock((cmd))) \
1015             return;         \
1016     } while(0)
1017 
1018 #define WRITE_UNLOCK(cmd) \
1019     up_write(&(cmd)->root_lock)
1020 
1021 static bool cmd_read_lock(struct dm_cache_metadata *cmd)
1022 {
1023     down_read(&cmd->root_lock);
1024     if (cmd->fail_io) {
1025         up_read(&cmd->root_lock);
1026         return false;
1027     }
1028     return true;
1029 }
1030 
1031 #define READ_LOCK(cmd)              \
1032     do {                    \
1033         if (!cmd_read_lock((cmd)))  \
1034             return -EINVAL;     \
1035     } while(0)
1036 
1037 #define READ_LOCK_VOID(cmd)         \
1038     do {                    \
1039         if (!cmd_read_lock((cmd)))  \
1040             return;         \
1041     } while(0)
1042 
1043 #define READ_UNLOCK(cmd) \
1044     up_read(&(cmd)->root_lock)
1045 
1046 int dm_cache_resize(struct dm_cache_metadata *cmd, dm_cblock_t new_cache_size)
1047 {
1048     int r;
1049     bool clean;
1050     __le64 null_mapping = pack_value(0, 0);
1051 
1052     WRITE_LOCK(cmd);
1053     __dm_bless_for_disk(&null_mapping);
1054 
1055     if (from_cblock(new_cache_size) < from_cblock(cmd->cache_blocks)) {
1056         r = blocks_are_unmapped_or_clean(cmd, new_cache_size, cmd->cache_blocks, &clean);
1057         if (r) {
1058             __dm_unbless_for_disk(&null_mapping);
1059             goto out;
1060         }
1061 
1062         if (!clean) {
1063             DMERR("unable to shrink cache due to dirty blocks");
1064             r = -EINVAL;
1065             __dm_unbless_for_disk(&null_mapping);
1066             goto out;
1067         }
1068     }
1069 
1070     r = dm_array_resize(&cmd->info, cmd->root, from_cblock(cmd->cache_blocks),
1071                 from_cblock(new_cache_size),
1072                 &null_mapping, &cmd->root);
1073     if (r)
1074         goto out;
1075 
1076     if (separate_dirty_bits(cmd)) {
1077         r = dm_bitset_resize(&cmd->dirty_info, cmd->dirty_root,
1078                      from_cblock(cmd->cache_blocks), from_cblock(new_cache_size),
1079                      false, &cmd->dirty_root);
1080         if (r)
1081             goto out;
1082     }
1083 
1084     cmd->cache_blocks = new_cache_size;
1085     cmd->changed = true;
1086 
1087 out:
1088     WRITE_UNLOCK(cmd);
1089 
1090     return r;
1091 }
1092 
1093 int dm_cache_discard_bitset_resize(struct dm_cache_metadata *cmd,
1094                    sector_t discard_block_size,
1095                    dm_dblock_t new_nr_entries)
1096 {
1097     int r;
1098 
1099     WRITE_LOCK(cmd);
1100     r = dm_bitset_resize(&cmd->discard_info,
1101                  cmd->discard_root,
1102                  from_dblock(cmd->discard_nr_blocks),
1103                  from_dblock(new_nr_entries),
1104                  false, &cmd->discard_root);
1105     if (!r) {
1106         cmd->discard_block_size = discard_block_size;
1107         cmd->discard_nr_blocks = new_nr_entries;
1108     }
1109 
1110     cmd->changed = true;
1111     WRITE_UNLOCK(cmd);
1112 
1113     return r;
1114 }
1115 
1116 static int __set_discard(struct dm_cache_metadata *cmd, dm_dblock_t b)
1117 {
1118     return dm_bitset_set_bit(&cmd->discard_info, cmd->discard_root,
1119                  from_dblock(b), &cmd->discard_root);
1120 }
1121 
1122 static int __clear_discard(struct dm_cache_metadata *cmd, dm_dblock_t b)
1123 {
1124     return dm_bitset_clear_bit(&cmd->discard_info, cmd->discard_root,
1125                    from_dblock(b), &cmd->discard_root);
1126 }
1127 
1128 static int __discard(struct dm_cache_metadata *cmd,
1129              dm_dblock_t dblock, bool discard)
1130 {
1131     int r;
1132 
1133     r = (discard ? __set_discard : __clear_discard)(cmd, dblock);
1134     if (r)
1135         return r;
1136 
1137     cmd->changed = true;
1138     return 0;
1139 }
1140 
1141 int dm_cache_set_discard(struct dm_cache_metadata *cmd,
1142              dm_dblock_t dblock, bool discard)
1143 {
1144     int r;
1145 
1146     WRITE_LOCK(cmd);
1147     r = __discard(cmd, dblock, discard);
1148     WRITE_UNLOCK(cmd);
1149 
1150     return r;
1151 }
1152 
1153 static int __load_discards(struct dm_cache_metadata *cmd,
1154                load_discard_fn fn, void *context)
1155 {
1156     int r = 0;
1157     uint32_t b;
1158     struct dm_bitset_cursor c;
1159 
1160     if (from_dblock(cmd->discard_nr_blocks) == 0)
1161         /* nothing to do */
1162         return 0;
1163 
1164     if (cmd->clean_when_opened) {
1165         r = dm_bitset_flush(&cmd->discard_info, cmd->discard_root, &cmd->discard_root);
1166         if (r)
1167             return r;
1168 
1169         r = dm_bitset_cursor_begin(&cmd->discard_info, cmd->discard_root,
1170                        from_dblock(cmd->discard_nr_blocks), &c);
1171         if (r)
1172             return r;
1173 
1174         for (b = 0; ; b++) {
1175             r = fn(context, cmd->discard_block_size, to_dblock(b),
1176                    dm_bitset_cursor_get_value(&c));
1177             if (r)
1178                 break;
1179 
1180             if (b >= (from_dblock(cmd->discard_nr_blocks) - 1))
1181                 break;
1182 
1183             r = dm_bitset_cursor_next(&c);
1184             if (r)
1185                 break;
1186         }
1187 
1188         dm_bitset_cursor_end(&c);
1189 
1190     } else {
1191         for (b = 0; b < from_dblock(cmd->discard_nr_blocks); b++) {
1192             r = fn(context, cmd->discard_block_size, to_dblock(b), false);
1193             if (r)
1194                 return r;
1195         }
1196     }
1197 
1198     return r;
1199 }
1200 
1201 int dm_cache_load_discards(struct dm_cache_metadata *cmd,
1202                load_discard_fn fn, void *context)
1203 {
1204     int r;
1205 
1206     READ_LOCK(cmd);
1207     r = __load_discards(cmd, fn, context);
1208     READ_UNLOCK(cmd);
1209 
1210     return r;
1211 }
1212 
1213 int dm_cache_size(struct dm_cache_metadata *cmd, dm_cblock_t *result)
1214 {
1215     READ_LOCK(cmd);
1216     *result = cmd->cache_blocks;
1217     READ_UNLOCK(cmd);
1218 
1219     return 0;
1220 }
1221 
1222 static int __remove(struct dm_cache_metadata *cmd, dm_cblock_t cblock)
1223 {
1224     int r;
1225     __le64 value = pack_value(0, 0);
1226 
1227     __dm_bless_for_disk(&value);
1228     r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock),
1229                    &value, &cmd->root);
1230     if (r)
1231         return r;
1232 
1233     cmd->changed = true;
1234     return 0;
1235 }
1236 
1237 int dm_cache_remove_mapping(struct dm_cache_metadata *cmd, dm_cblock_t cblock)
1238 {
1239     int r;
1240 
1241     WRITE_LOCK(cmd);
1242     r = __remove(cmd, cblock);
1243     WRITE_UNLOCK(cmd);
1244 
1245     return r;
1246 }
1247 
1248 static int __insert(struct dm_cache_metadata *cmd,
1249             dm_cblock_t cblock, dm_oblock_t oblock)
1250 {
1251     int r;
1252     __le64 value = pack_value(oblock, M_VALID);
1253     __dm_bless_for_disk(&value);
1254 
1255     r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock),
1256                    &value, &cmd->root);
1257     if (r)
1258         return r;
1259 
1260     cmd->changed = true;
1261     return 0;
1262 }
1263 
1264 int dm_cache_insert_mapping(struct dm_cache_metadata *cmd,
1265                 dm_cblock_t cblock, dm_oblock_t oblock)
1266 {
1267     int r;
1268 
1269     WRITE_LOCK(cmd);
1270     r = __insert(cmd, cblock, oblock);
1271     WRITE_UNLOCK(cmd);
1272 
1273     return r;
1274 }
1275 
1276 struct thunk {
1277     load_mapping_fn fn;
1278     void *context;
1279 
1280     struct dm_cache_metadata *cmd;
1281     bool respect_dirty_flags;
1282     bool hints_valid;
1283 };
1284 
1285 static bool policy_unchanged(struct dm_cache_metadata *cmd,
1286                  struct dm_cache_policy *policy)
1287 {
1288     const char *policy_name = dm_cache_policy_get_name(policy);
1289     const unsigned *policy_version = dm_cache_policy_get_version(policy);
1290     size_t policy_hint_size = dm_cache_policy_get_hint_size(policy);
1291 
1292     /*
1293      * Ensure policy names match.
1294      */
1295     if (strncmp(cmd->policy_name, policy_name, sizeof(cmd->policy_name)))
1296         return false;
1297 
1298     /*
1299      * Ensure policy major versions match.
1300      */
1301     if (cmd->policy_version[0] != policy_version[0])
1302         return false;
1303 
1304     /*
1305      * Ensure policy hint sizes match.
1306      */
1307     if (cmd->policy_hint_size != policy_hint_size)
1308         return false;
1309 
1310     return true;
1311 }
1312 
1313 static bool hints_array_initialized(struct dm_cache_metadata *cmd)
1314 {
1315     return cmd->hint_root && cmd->policy_hint_size;
1316 }
1317 
1318 static bool hints_array_available(struct dm_cache_metadata *cmd,
1319                   struct dm_cache_policy *policy)
1320 {
1321     return cmd->clean_when_opened && policy_unchanged(cmd, policy) &&
1322         hints_array_initialized(cmd);
1323 }
1324 
1325 static int __load_mapping_v1(struct dm_cache_metadata *cmd,
1326                  uint64_t cb, bool hints_valid,
1327                  struct dm_array_cursor *mapping_cursor,
1328                  struct dm_array_cursor *hint_cursor,
1329                  load_mapping_fn fn, void *context)
1330 {
1331     int r = 0;
1332 
1333     __le64 mapping;
1334     __le32 hint = 0;
1335 
1336     __le64 *mapping_value_le;
1337     __le32 *hint_value_le;
1338 
1339     dm_oblock_t oblock;
1340     unsigned flags;
1341     bool dirty = true;
1342 
1343     dm_array_cursor_get_value(mapping_cursor, (void **) &mapping_value_le);
1344     memcpy(&mapping, mapping_value_le, sizeof(mapping));
1345     unpack_value(mapping, &oblock, &flags);
1346 
1347     if (flags & M_VALID) {
1348         if (hints_valid) {
1349             dm_array_cursor_get_value(hint_cursor, (void **) &hint_value_le);
1350             memcpy(&hint, hint_value_le, sizeof(hint));
1351         }
1352         if (cmd->clean_when_opened)
1353             dirty = flags & M_DIRTY;
1354 
1355         r = fn(context, oblock, to_cblock(cb), dirty,
1356                le32_to_cpu(hint), hints_valid);
1357         if (r) {
1358             DMERR("policy couldn't load cache block %llu",
1359                   (unsigned long long) from_cblock(to_cblock(cb)));
1360         }
1361     }
1362 
1363     return r;
1364 }
1365 
1366 static int __load_mapping_v2(struct dm_cache_metadata *cmd,
1367                  uint64_t cb, bool hints_valid,
1368                  struct dm_array_cursor *mapping_cursor,
1369                  struct dm_array_cursor *hint_cursor,
1370                  struct dm_bitset_cursor *dirty_cursor,
1371                  load_mapping_fn fn, void *context)
1372 {
1373     int r = 0;
1374 
1375     __le64 mapping;
1376     __le32 hint = 0;
1377 
1378     __le64 *mapping_value_le;
1379     __le32 *hint_value_le;
1380 
1381     dm_oblock_t oblock;
1382     unsigned flags;
1383     bool dirty = true;
1384 
1385     dm_array_cursor_get_value(mapping_cursor, (void **) &mapping_value_le);
1386     memcpy(&mapping, mapping_value_le, sizeof(mapping));
1387     unpack_value(mapping, &oblock, &flags);
1388 
1389     if (flags & M_VALID) {
1390         if (hints_valid) {
1391             dm_array_cursor_get_value(hint_cursor, (void **) &hint_value_le);
1392             memcpy(&hint, hint_value_le, sizeof(hint));
1393         }
1394         if (cmd->clean_when_opened)
1395             dirty = dm_bitset_cursor_get_value(dirty_cursor);
1396 
1397         r = fn(context, oblock, to_cblock(cb), dirty,
1398                le32_to_cpu(hint), hints_valid);
1399         if (r) {
1400             DMERR("policy couldn't load cache block %llu",
1401                   (unsigned long long) from_cblock(to_cblock(cb)));
1402         }
1403     }
1404 
1405     return r;
1406 }
1407 
1408 static int __load_mappings(struct dm_cache_metadata *cmd,
1409                struct dm_cache_policy *policy,
1410                load_mapping_fn fn, void *context)
1411 {
1412     int r;
1413     uint64_t cb;
1414 
1415     bool hints_valid = hints_array_available(cmd, policy);
1416 
1417     if (from_cblock(cmd->cache_blocks) == 0)
1418         /* Nothing to do */
1419         return 0;
1420 
1421     r = dm_array_cursor_begin(&cmd->info, cmd->root, &cmd->mapping_cursor);
1422     if (r)
1423         return r;
1424 
1425     if (hints_valid) {
1426         r = dm_array_cursor_begin(&cmd->hint_info, cmd->hint_root, &cmd->hint_cursor);
1427         if (r) {
1428             dm_array_cursor_end(&cmd->mapping_cursor);
1429             return r;
1430         }
1431     }
1432 
1433     if (separate_dirty_bits(cmd)) {
1434         r = dm_bitset_cursor_begin(&cmd->dirty_info, cmd->dirty_root,
1435                        from_cblock(cmd->cache_blocks),
1436                        &cmd->dirty_cursor);
1437         if (r) {
1438             dm_array_cursor_end(&cmd->hint_cursor);
1439             dm_array_cursor_end(&cmd->mapping_cursor);
1440             return r;
1441         }
1442     }
1443 
1444     for (cb = 0; ; cb++) {
1445         if (separate_dirty_bits(cmd))
1446             r = __load_mapping_v2(cmd, cb, hints_valid,
1447                           &cmd->mapping_cursor,
1448                           &cmd->hint_cursor,
1449                           &cmd->dirty_cursor,
1450                           fn, context);
1451         else
1452             r = __load_mapping_v1(cmd, cb, hints_valid,
1453                           &cmd->mapping_cursor, &cmd->hint_cursor,
1454                           fn, context);
1455         if (r)
1456             goto out;
1457 
1458         /*
1459          * We need to break out before we move the cursors.
1460          */
1461         if (cb >= (from_cblock(cmd->cache_blocks) - 1))
1462             break;
1463 
1464         r = dm_array_cursor_next(&cmd->mapping_cursor);
1465         if (r) {
1466             DMERR("dm_array_cursor_next for mapping failed");
1467             goto out;
1468         }
1469 
1470         if (hints_valid) {
1471             r = dm_array_cursor_next(&cmd->hint_cursor);
1472             if (r) {
1473                 dm_array_cursor_end(&cmd->hint_cursor);
1474                 hints_valid = false;
1475             }
1476         }
1477 
1478         if (separate_dirty_bits(cmd)) {
1479             r = dm_bitset_cursor_next(&cmd->dirty_cursor);
1480             if (r) {
1481                 DMERR("dm_bitset_cursor_next for dirty failed");
1482                 goto out;
1483             }
1484         }
1485     }
1486 out:
1487     dm_array_cursor_end(&cmd->mapping_cursor);
1488     if (hints_valid)
1489         dm_array_cursor_end(&cmd->hint_cursor);
1490 
1491     if (separate_dirty_bits(cmd))
1492         dm_bitset_cursor_end(&cmd->dirty_cursor);
1493 
1494     return r;
1495 }
1496 
1497 int dm_cache_load_mappings(struct dm_cache_metadata *cmd,
1498                struct dm_cache_policy *policy,
1499                load_mapping_fn fn, void *context)
1500 {
1501     int r;
1502 
1503     READ_LOCK(cmd);
1504     r = __load_mappings(cmd, policy, fn, context);
1505     READ_UNLOCK(cmd);
1506 
1507     return r;
1508 }
1509 
1510 static int __dump_mapping(void *context, uint64_t cblock, void *leaf)
1511 {
1512     __le64 value;
1513     dm_oblock_t oblock;
1514     unsigned flags;
1515 
1516     memcpy(&value, leaf, sizeof(value));
1517     unpack_value(value, &oblock, &flags);
1518 
1519     return 0;
1520 }
1521 
1522 static int __dump_mappings(struct dm_cache_metadata *cmd)
1523 {
1524     return dm_array_walk(&cmd->info, cmd->root, __dump_mapping, NULL);
1525 }
1526 
1527 void dm_cache_dump(struct dm_cache_metadata *cmd)
1528 {
1529     READ_LOCK_VOID(cmd);
1530     __dump_mappings(cmd);
1531     READ_UNLOCK(cmd);
1532 }
1533 
1534 int dm_cache_changed_this_transaction(struct dm_cache_metadata *cmd)
1535 {
1536     int r;
1537 
1538     READ_LOCK(cmd);
1539     r = cmd->changed;
1540     READ_UNLOCK(cmd);
1541 
1542     return r;
1543 }
1544 
1545 static int __dirty(struct dm_cache_metadata *cmd, dm_cblock_t cblock, bool dirty)
1546 {
1547     int r;
1548     unsigned flags;
1549     dm_oblock_t oblock;
1550     __le64 value;
1551 
1552     r = dm_array_get_value(&cmd->info, cmd->root, from_cblock(cblock), &value);
1553     if (r)
1554         return r;
1555 
1556     unpack_value(value, &oblock, &flags);
1557 
1558     if (((flags & M_DIRTY) && dirty) || (!(flags & M_DIRTY) && !dirty))
1559         /* nothing to be done */
1560         return 0;
1561 
1562     value = pack_value(oblock, (flags & ~M_DIRTY) | (dirty ? M_DIRTY : 0));
1563     __dm_bless_for_disk(&value);
1564 
1565     r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock),
1566                    &value, &cmd->root);
1567     if (r)
1568         return r;
1569 
1570     cmd->changed = true;
1571     return 0;
1572 
1573 }
1574 
1575 static int __set_dirty_bits_v1(struct dm_cache_metadata *cmd, unsigned nr_bits, unsigned long *bits)
1576 {
1577     int r;
1578     unsigned i;
1579     for (i = 0; i < nr_bits; i++) {
1580         r = __dirty(cmd, to_cblock(i), test_bit(i, bits));
1581         if (r)
1582             return r;
1583     }
1584 
1585     return 0;
1586 }
1587 
1588 static int is_dirty_callback(uint32_t index, bool *value, void *context)
1589 {
1590     unsigned long *bits = context;
1591     *value = test_bit(index, bits);
1592     return 0;
1593 }
1594 
1595 static int __set_dirty_bits_v2(struct dm_cache_metadata *cmd, unsigned nr_bits, unsigned long *bits)
1596 {
1597     int r = 0;
1598 
1599     /* nr_bits is really just a sanity check */
1600     if (nr_bits != from_cblock(cmd->cache_blocks)) {
1601         DMERR("dirty bitset is wrong size");
1602         return -EINVAL;
1603     }
1604 
1605     r = dm_bitset_del(&cmd->dirty_info, cmd->dirty_root);
1606     if (r)
1607         return r;
1608 
1609     cmd->changed = true;
1610     return dm_bitset_new(&cmd->dirty_info, &cmd->dirty_root, nr_bits, is_dirty_callback, bits);
1611 }
1612 
1613 int dm_cache_set_dirty_bits(struct dm_cache_metadata *cmd,
1614                 unsigned nr_bits,
1615                 unsigned long *bits)
1616 {
1617     int r;
1618 
1619     WRITE_LOCK(cmd);
1620     if (separate_dirty_bits(cmd))
1621         r = __set_dirty_bits_v2(cmd, nr_bits, bits);
1622     else
1623         r = __set_dirty_bits_v1(cmd, nr_bits, bits);
1624     WRITE_UNLOCK(cmd);
1625 
1626     return r;
1627 }
1628 
1629 void dm_cache_metadata_get_stats(struct dm_cache_metadata *cmd,
1630                  struct dm_cache_statistics *stats)
1631 {
1632     READ_LOCK_VOID(cmd);
1633     *stats = cmd->stats;
1634     READ_UNLOCK(cmd);
1635 }
1636 
1637 void dm_cache_metadata_set_stats(struct dm_cache_metadata *cmd,
1638                  struct dm_cache_statistics *stats)
1639 {
1640     WRITE_LOCK_VOID(cmd);
1641     cmd->stats = *stats;
1642     WRITE_UNLOCK(cmd);
1643 }
1644 
1645 int dm_cache_commit(struct dm_cache_metadata *cmd, bool clean_shutdown)
1646 {
1647     int r = -EINVAL;
1648     flags_mutator mutator = (clean_shutdown ? set_clean_shutdown :
1649                  clear_clean_shutdown);
1650 
1651     WRITE_LOCK(cmd);
1652     if (cmd->fail_io)
1653         goto out;
1654 
1655     r = __commit_transaction(cmd, mutator);
1656     if (r)
1657         goto out;
1658 
1659     r = __begin_transaction(cmd);
1660 out:
1661     WRITE_UNLOCK(cmd);
1662     return r;
1663 }
1664 
1665 int dm_cache_get_free_metadata_block_count(struct dm_cache_metadata *cmd,
1666                        dm_block_t *result)
1667 {
1668     int r = -EINVAL;
1669 
1670     READ_LOCK(cmd);
1671     if (!cmd->fail_io)
1672         r = dm_sm_get_nr_free(cmd->metadata_sm, result);
1673     READ_UNLOCK(cmd);
1674 
1675     return r;
1676 }
1677 
1678 int dm_cache_get_metadata_dev_size(struct dm_cache_metadata *cmd,
1679                    dm_block_t *result)
1680 {
1681     int r = -EINVAL;
1682 
1683     READ_LOCK(cmd);
1684     if (!cmd->fail_io)
1685         r = dm_sm_get_nr_blocks(cmd->metadata_sm, result);
1686     READ_UNLOCK(cmd);
1687 
1688     return r;
1689 }
1690 
1691 /*----------------------------------------------------------------*/
1692 
1693 static int get_hint(uint32_t index, void *value_le, void *context)
1694 {
1695     uint32_t value;
1696     struct dm_cache_policy *policy = context;
1697 
1698     value = policy_get_hint(policy, to_cblock(index));
1699     *((__le32 *) value_le) = cpu_to_le32(value);
1700 
1701     return 0;
1702 }
1703 
1704 /*
1705  * It's quicker to always delete the hint array, and recreate with
1706  * dm_array_new().
1707  */
1708 static int write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy)
1709 {
1710     int r;
1711     size_t hint_size;
1712     const char *policy_name = dm_cache_policy_get_name(policy);
1713     const unsigned *policy_version = dm_cache_policy_get_version(policy);
1714 
1715     if (!policy_name[0] ||
1716         (strlen(policy_name) > sizeof(cmd->policy_name) - 1))
1717         return -EINVAL;
1718 
1719     strncpy(cmd->policy_name, policy_name, sizeof(cmd->policy_name));
1720     memcpy(cmd->policy_version, policy_version, sizeof(cmd->policy_version));
1721 
1722     hint_size = dm_cache_policy_get_hint_size(policy);
1723     if (!hint_size)
1724         return 0; /* short-circuit hints initialization */
1725     cmd->policy_hint_size = hint_size;
1726 
1727     if (cmd->hint_root) {
1728         r = dm_array_del(&cmd->hint_info, cmd->hint_root);
1729         if (r)
1730             return r;
1731     }
1732 
1733     return dm_array_new(&cmd->hint_info, &cmd->hint_root,
1734                 from_cblock(cmd->cache_blocks),
1735                 get_hint, policy);
1736 }
1737 
1738 int dm_cache_write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy)
1739 {
1740     int r;
1741 
1742     WRITE_LOCK(cmd);
1743     r = write_hints(cmd, policy);
1744     WRITE_UNLOCK(cmd);
1745 
1746     return r;
1747 }
1748 
1749 int dm_cache_metadata_all_clean(struct dm_cache_metadata *cmd, bool *result)
1750 {
1751     int r;
1752 
1753     READ_LOCK(cmd);
1754     r = blocks_are_unmapped_or_clean(cmd, 0, cmd->cache_blocks, result);
1755     READ_UNLOCK(cmd);
1756 
1757     return r;
1758 }
1759 
1760 void dm_cache_metadata_set_read_only(struct dm_cache_metadata *cmd)
1761 {
1762     WRITE_LOCK_VOID(cmd);
1763     dm_bm_set_read_only(cmd->bm);
1764     WRITE_UNLOCK(cmd);
1765 }
1766 
1767 void dm_cache_metadata_set_read_write(struct dm_cache_metadata *cmd)
1768 {
1769     WRITE_LOCK_VOID(cmd);
1770     dm_bm_set_read_write(cmd->bm);
1771     WRITE_UNLOCK(cmd);
1772 }
1773 
1774 int dm_cache_metadata_set_needs_check(struct dm_cache_metadata *cmd)
1775 {
1776     int r;
1777     struct dm_block *sblock;
1778     struct cache_disk_superblock *disk_super;
1779 
1780     WRITE_LOCK(cmd);
1781     set_bit(NEEDS_CHECK, &cmd->flags);
1782 
1783     r = superblock_lock(cmd, &sblock);
1784     if (r) {
1785         DMERR("couldn't read superblock");
1786         goto out;
1787     }
1788 
1789     disk_super = dm_block_data(sblock);
1790     disk_super->flags = cpu_to_le32(cmd->flags);
1791 
1792     dm_bm_unlock(sblock);
1793 
1794 out:
1795     WRITE_UNLOCK(cmd);
1796     return r;
1797 }
1798 
1799 int dm_cache_metadata_needs_check(struct dm_cache_metadata *cmd, bool *result)
1800 {
1801     READ_LOCK(cmd);
1802     *result = !!test_bit(NEEDS_CHECK, &cmd->flags);
1803     READ_UNLOCK(cmd);
1804 
1805     return 0;
1806 }
1807 
1808 int dm_cache_metadata_abort(struct dm_cache_metadata *cmd)
1809 {
1810     int r;
1811 
1812     WRITE_LOCK(cmd);
1813     __destroy_persistent_data_objects(cmd);
1814     r = __create_persistent_data_objects(cmd, false);
1815     if (r)
1816         cmd->fail_io = true;
1817     WRITE_UNLOCK(cmd);
1818 
1819     return r;
1820 }