Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright (C) 2011 Red Hat, Inc.
0003  *
0004  * This file is released under the GPL.
0005  */
0006 
0007 #include "dm-space-map-common.h"
0008 #include "dm-transaction-manager.h"
0009 #include "dm-btree-internal.h"
0010 #include "dm-persistent-data-internal.h"
0011 
0012 #include <linux/bitops.h>
0013 #include <linux/device-mapper.h>
0014 
0015 #define DM_MSG_PREFIX "space map common"
0016 
0017 /*----------------------------------------------------------------*/
0018 
0019 /*
0020  * Index validator.
0021  */
0022 #define INDEX_CSUM_XOR 160478
0023 
0024 static void index_prepare_for_write(struct dm_block_validator *v,
0025                     struct dm_block *b,
0026                     size_t block_size)
0027 {
0028     struct disk_metadata_index *mi_le = dm_block_data(b);
0029 
0030     mi_le->blocknr = cpu_to_le64(dm_block_location(b));
0031     mi_le->csum = cpu_to_le32(dm_bm_checksum(&mi_le->padding,
0032                          block_size - sizeof(__le32),
0033                          INDEX_CSUM_XOR));
0034 }
0035 
0036 static int index_check(struct dm_block_validator *v,
0037                struct dm_block *b,
0038                size_t block_size)
0039 {
0040     struct disk_metadata_index *mi_le = dm_block_data(b);
0041     __le32 csum_disk;
0042 
0043     if (dm_block_location(b) != le64_to_cpu(mi_le->blocknr)) {
0044         DMERR_LIMIT("index_check failed: blocknr %llu != wanted %llu",
0045                 le64_to_cpu(mi_le->blocknr), dm_block_location(b));
0046         return -ENOTBLK;
0047     }
0048 
0049     csum_disk = cpu_to_le32(dm_bm_checksum(&mi_le->padding,
0050                            block_size - sizeof(__le32),
0051                            INDEX_CSUM_XOR));
0052     if (csum_disk != mi_le->csum) {
0053         DMERR_LIMIT("index_check failed: csum %u != wanted %u",
0054                 le32_to_cpu(csum_disk), le32_to_cpu(mi_le->csum));
0055         return -EILSEQ;
0056     }
0057 
0058     return 0;
0059 }
0060 
0061 static struct dm_block_validator index_validator = {
0062     .name = "index",
0063     .prepare_for_write = index_prepare_for_write,
0064     .check = index_check
0065 };
0066 
0067 /*----------------------------------------------------------------*/
0068 
0069 /*
0070  * Bitmap validator
0071  */
0072 #define BITMAP_CSUM_XOR 240779
0073 
0074 static void dm_bitmap_prepare_for_write(struct dm_block_validator *v,
0075                     struct dm_block *b,
0076                     size_t block_size)
0077 {
0078     struct disk_bitmap_header *disk_header = dm_block_data(b);
0079 
0080     disk_header->blocknr = cpu_to_le64(dm_block_location(b));
0081     disk_header->csum = cpu_to_le32(dm_bm_checksum(&disk_header->not_used,
0082                                block_size - sizeof(__le32),
0083                                BITMAP_CSUM_XOR));
0084 }
0085 
0086 static int dm_bitmap_check(struct dm_block_validator *v,
0087                struct dm_block *b,
0088                size_t block_size)
0089 {
0090     struct disk_bitmap_header *disk_header = dm_block_data(b);
0091     __le32 csum_disk;
0092 
0093     if (dm_block_location(b) != le64_to_cpu(disk_header->blocknr)) {
0094         DMERR_LIMIT("bitmap check failed: blocknr %llu != wanted %llu",
0095                 le64_to_cpu(disk_header->blocknr), dm_block_location(b));
0096         return -ENOTBLK;
0097     }
0098 
0099     csum_disk = cpu_to_le32(dm_bm_checksum(&disk_header->not_used,
0100                            block_size - sizeof(__le32),
0101                            BITMAP_CSUM_XOR));
0102     if (csum_disk != disk_header->csum) {
0103         DMERR_LIMIT("bitmap check failed: csum %u != wanted %u",
0104                 le32_to_cpu(csum_disk), le32_to_cpu(disk_header->csum));
0105         return -EILSEQ;
0106     }
0107 
0108     return 0;
0109 }
0110 
0111 static struct dm_block_validator dm_sm_bitmap_validator = {
0112     .name = "sm_bitmap",
0113     .prepare_for_write = dm_bitmap_prepare_for_write,
0114     .check = dm_bitmap_check,
0115 };
0116 
0117 /*----------------------------------------------------------------*/
0118 
0119 #define ENTRIES_PER_WORD 32
0120 #define ENTRIES_SHIFT   5
0121 
0122 static void *dm_bitmap_data(struct dm_block *b)
0123 {
0124     return dm_block_data(b) + sizeof(struct disk_bitmap_header);
0125 }
0126 
0127 #define WORD_MASK_HIGH 0xAAAAAAAAAAAAAAAAULL
0128 
0129 static unsigned dm_bitmap_word_used(void *addr, unsigned b)
0130 {
0131     __le64 *words_le = addr;
0132     __le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
0133 
0134     uint64_t bits = le64_to_cpu(*w_le);
0135     uint64_t mask = (bits + WORD_MASK_HIGH + 1) & WORD_MASK_HIGH;
0136 
0137     return !(~bits & mask);
0138 }
0139 
0140 static unsigned sm_lookup_bitmap(void *addr, unsigned b)
0141 {
0142     __le64 *words_le = addr;
0143     __le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
0144     unsigned hi, lo;
0145 
0146     b = (b & (ENTRIES_PER_WORD - 1)) << 1;
0147     hi = !!test_bit_le(b, (void *) w_le);
0148     lo = !!test_bit_le(b + 1, (void *) w_le);
0149     return (hi << 1) | lo;
0150 }
0151 
0152 static void sm_set_bitmap(void *addr, unsigned b, unsigned val)
0153 {
0154     __le64 *words_le = addr;
0155     __le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
0156 
0157     b = (b & (ENTRIES_PER_WORD - 1)) << 1;
0158 
0159     if (val & 2)
0160         __set_bit_le(b, (void *) w_le);
0161     else
0162         __clear_bit_le(b, (void *) w_le);
0163 
0164     if (val & 1)
0165         __set_bit_le(b + 1, (void *) w_le);
0166     else
0167         __clear_bit_le(b + 1, (void *) w_le);
0168 }
0169 
0170 static int sm_find_free(void *addr, unsigned begin, unsigned end,
0171             unsigned *result)
0172 {
0173     while (begin < end) {
0174         if (!(begin & (ENTRIES_PER_WORD - 1)) &&
0175             dm_bitmap_word_used(addr, begin)) {
0176             begin += ENTRIES_PER_WORD;
0177             continue;
0178         }
0179 
0180         if (!sm_lookup_bitmap(addr, begin)) {
0181             *result = begin;
0182             return 0;
0183         }
0184 
0185         begin++;
0186     }
0187 
0188     return -ENOSPC;
0189 }
0190 
0191 /*----------------------------------------------------------------*/
0192 
0193 static int sm_ll_init(struct ll_disk *ll, struct dm_transaction_manager *tm)
0194 {
0195     memset(ll, 0, sizeof(struct ll_disk));
0196 
0197     ll->tm = tm;
0198 
0199     ll->bitmap_info.tm = tm;
0200     ll->bitmap_info.levels = 1;
0201 
0202     /*
0203      * Because the new bitmap blocks are created via a shadow
0204      * operation, the old entry has already had its reference count
0205      * decremented and we don't need the btree to do any bookkeeping.
0206      */
0207     ll->bitmap_info.value_type.size = sizeof(struct disk_index_entry);
0208     ll->bitmap_info.value_type.inc = NULL;
0209     ll->bitmap_info.value_type.dec = NULL;
0210     ll->bitmap_info.value_type.equal = NULL;
0211 
0212     ll->ref_count_info.tm = tm;
0213     ll->ref_count_info.levels = 1;
0214     ll->ref_count_info.value_type.size = sizeof(uint32_t);
0215     ll->ref_count_info.value_type.inc = NULL;
0216     ll->ref_count_info.value_type.dec = NULL;
0217     ll->ref_count_info.value_type.equal = NULL;
0218 
0219     ll->block_size = dm_bm_block_size(dm_tm_get_bm(tm));
0220 
0221     if (ll->block_size > (1 << 30)) {
0222         DMERR("block size too big to hold bitmaps");
0223         return -EINVAL;
0224     }
0225 
0226     ll->entries_per_block = (ll->block_size - sizeof(struct disk_bitmap_header)) *
0227         ENTRIES_PER_BYTE;
0228     ll->nr_blocks = 0;
0229     ll->bitmap_root = 0;
0230     ll->ref_count_root = 0;
0231     ll->bitmap_index_changed = false;
0232 
0233     return 0;
0234 }
0235 
0236 int sm_ll_extend(struct ll_disk *ll, dm_block_t extra_blocks)
0237 {
0238     int r;
0239     dm_block_t i, nr_blocks, nr_indexes;
0240     unsigned old_blocks, blocks;
0241 
0242     nr_blocks = ll->nr_blocks + extra_blocks;
0243     old_blocks = dm_sector_div_up(ll->nr_blocks, ll->entries_per_block);
0244     blocks = dm_sector_div_up(nr_blocks, ll->entries_per_block);
0245 
0246     nr_indexes = dm_sector_div_up(nr_blocks, ll->entries_per_block);
0247     if (nr_indexes > ll->max_entries(ll)) {
0248         DMERR("space map too large");
0249         return -EINVAL;
0250     }
0251 
0252     /*
0253      * We need to set this before the dm_tm_new_block() call below.
0254      */
0255     ll->nr_blocks = nr_blocks;
0256     for (i = old_blocks; i < blocks; i++) {
0257         struct dm_block *b;
0258         struct disk_index_entry idx;
0259 
0260         r = dm_tm_new_block(ll->tm, &dm_sm_bitmap_validator, &b);
0261         if (r < 0)
0262             return r;
0263 
0264         idx.blocknr = cpu_to_le64(dm_block_location(b));
0265 
0266         dm_tm_unlock(ll->tm, b);
0267 
0268         idx.nr_free = cpu_to_le32(ll->entries_per_block);
0269         idx.none_free_before = 0;
0270 
0271         r = ll->save_ie(ll, i, &idx);
0272         if (r < 0)
0273             return r;
0274     }
0275 
0276     return 0;
0277 }
0278 
0279 int sm_ll_lookup_bitmap(struct ll_disk *ll, dm_block_t b, uint32_t *result)
0280 {
0281     int r;
0282     dm_block_t index = b;
0283     struct disk_index_entry ie_disk;
0284     struct dm_block *blk;
0285 
0286     if (b >= ll->nr_blocks) {
0287         DMERR_LIMIT("metadata block out of bounds");
0288         return -EINVAL;
0289     }
0290 
0291     b = do_div(index, ll->entries_per_block);
0292     r = ll->load_ie(ll, index, &ie_disk);
0293     if (r < 0)
0294         return r;
0295 
0296     r = dm_tm_read_lock(ll->tm, le64_to_cpu(ie_disk.blocknr),
0297                 &dm_sm_bitmap_validator, &blk);
0298     if (r < 0)
0299         return r;
0300 
0301     *result = sm_lookup_bitmap(dm_bitmap_data(blk), b);
0302 
0303     dm_tm_unlock(ll->tm, blk);
0304 
0305     return 0;
0306 }
0307 
0308 static int sm_ll_lookup_big_ref_count(struct ll_disk *ll, dm_block_t b,
0309                       uint32_t *result)
0310 {
0311     __le32 le_rc;
0312     int r;
0313 
0314     r = dm_btree_lookup(&ll->ref_count_info, ll->ref_count_root, &b, &le_rc);
0315     if (r < 0)
0316         return r;
0317 
0318     *result = le32_to_cpu(le_rc);
0319 
0320     return r;
0321 }
0322 
0323 int sm_ll_lookup(struct ll_disk *ll, dm_block_t b, uint32_t *result)
0324 {
0325     int r = sm_ll_lookup_bitmap(ll, b, result);
0326 
0327     if (r)
0328         return r;
0329 
0330     if (*result != 3)
0331         return r;
0332 
0333     return sm_ll_lookup_big_ref_count(ll, b, result);
0334 }
0335 
0336 int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin,
0337               dm_block_t end, dm_block_t *result)
0338 {
0339     int r;
0340     struct disk_index_entry ie_disk;
0341     dm_block_t i, index_begin = begin;
0342     dm_block_t index_end = dm_sector_div_up(end, ll->entries_per_block);
0343 
0344     /*
0345      * FIXME: Use shifts
0346      */
0347     begin = do_div(index_begin, ll->entries_per_block);
0348     end = do_div(end, ll->entries_per_block);
0349     if (end == 0)
0350         end = ll->entries_per_block;
0351 
0352     for (i = index_begin; i < index_end; i++, begin = 0) {
0353         struct dm_block *blk;
0354         unsigned position;
0355         uint32_t bit_end;
0356 
0357         r = ll->load_ie(ll, i, &ie_disk);
0358         if (r < 0)
0359             return r;
0360 
0361         if (le32_to_cpu(ie_disk.nr_free) == 0)
0362             continue;
0363 
0364         r = dm_tm_read_lock(ll->tm, le64_to_cpu(ie_disk.blocknr),
0365                     &dm_sm_bitmap_validator, &blk);
0366         if (r < 0)
0367             return r;
0368 
0369         bit_end = (i == index_end - 1) ?  end : ll->entries_per_block;
0370 
0371         r = sm_find_free(dm_bitmap_data(blk),
0372                  max_t(unsigned, begin, le32_to_cpu(ie_disk.none_free_before)),
0373                  bit_end, &position);
0374         if (r == -ENOSPC) {
0375             /*
0376              * This might happen because we started searching
0377              * part way through the bitmap.
0378              */
0379             dm_tm_unlock(ll->tm, blk);
0380             continue;
0381         }
0382 
0383         dm_tm_unlock(ll->tm, blk);
0384 
0385         *result = i * ll->entries_per_block + (dm_block_t) position;
0386         return 0;
0387     }
0388 
0389     return -ENOSPC;
0390 }
0391 
0392 int sm_ll_find_common_free_block(struct ll_disk *old_ll, struct ll_disk *new_ll,
0393                              dm_block_t begin, dm_block_t end, dm_block_t *b)
0394 {
0395     int r;
0396     uint32_t count;
0397 
0398     do {
0399         r = sm_ll_find_free_block(new_ll, begin, new_ll->nr_blocks, b);
0400         if (r)
0401             break;
0402 
0403         /* double check this block wasn't used in the old transaction */
0404         if (*b >= old_ll->nr_blocks)
0405             count = 0;
0406         else {
0407             r = sm_ll_lookup(old_ll, *b, &count);
0408             if (r)
0409                 break;
0410 
0411             if (count)
0412                 begin = *b + 1;
0413         }
0414     } while (count);
0415 
0416     return r;
0417 }
0418 
0419 /*----------------------------------------------------------------*/
0420 
0421 int sm_ll_insert(struct ll_disk *ll, dm_block_t b,
0422          uint32_t ref_count, int32_t *nr_allocations)
0423 {
0424     int r;
0425     uint32_t bit, old;
0426     struct dm_block *nb;
0427     dm_block_t index = b;
0428     struct disk_index_entry ie_disk;
0429     void *bm_le;
0430     int inc;
0431 
0432     bit = do_div(index, ll->entries_per_block);
0433     r = ll->load_ie(ll, index, &ie_disk);
0434     if (r < 0)
0435         return r;
0436 
0437     r = dm_tm_shadow_block(ll->tm, le64_to_cpu(ie_disk.blocknr),
0438                    &dm_sm_bitmap_validator, &nb, &inc);
0439     if (r < 0) {
0440         DMERR("dm_tm_shadow_block() failed");
0441         return r;
0442     }
0443     ie_disk.blocknr = cpu_to_le64(dm_block_location(nb));
0444     bm_le = dm_bitmap_data(nb);
0445 
0446     old = sm_lookup_bitmap(bm_le, bit);
0447     if (old > 2) {
0448         r = sm_ll_lookup_big_ref_count(ll, b, &old);
0449         if (r < 0) {
0450             dm_tm_unlock(ll->tm, nb);
0451             return r;
0452         }
0453     }
0454 
0455     if (r) {
0456         dm_tm_unlock(ll->tm, nb);
0457         return r;
0458     }
0459 
0460     if (ref_count <= 2) {
0461         sm_set_bitmap(bm_le, bit, ref_count);
0462         dm_tm_unlock(ll->tm, nb);
0463 
0464         if (old > 2) {
0465             r = dm_btree_remove(&ll->ref_count_info,
0466                         ll->ref_count_root,
0467                         &b, &ll->ref_count_root);
0468             if (r)
0469                 return r;
0470         }
0471 
0472     } else {
0473         __le32 le_rc = cpu_to_le32(ref_count);
0474 
0475         sm_set_bitmap(bm_le, bit, 3);
0476         dm_tm_unlock(ll->tm, nb);
0477 
0478         __dm_bless_for_disk(&le_rc);
0479         r = dm_btree_insert(&ll->ref_count_info, ll->ref_count_root,
0480                     &b, &le_rc, &ll->ref_count_root);
0481         if (r < 0) {
0482             DMERR("ref count insert failed");
0483             return r;
0484         }
0485     }
0486 
0487     if (ref_count && !old) {
0488         *nr_allocations = 1;
0489         ll->nr_allocated++;
0490         le32_add_cpu(&ie_disk.nr_free, -1);
0491         if (le32_to_cpu(ie_disk.none_free_before) == bit)
0492             ie_disk.none_free_before = cpu_to_le32(bit + 1);
0493 
0494     } else if (old && !ref_count) {
0495         *nr_allocations = -1;
0496         ll->nr_allocated--;
0497         le32_add_cpu(&ie_disk.nr_free, 1);
0498         ie_disk.none_free_before = cpu_to_le32(min(le32_to_cpu(ie_disk.none_free_before), bit));
0499     } else
0500         *nr_allocations = 0;
0501 
0502     return ll->save_ie(ll, index, &ie_disk);
0503 }
0504 
0505 /*----------------------------------------------------------------*/
0506 
0507 /*
0508  * Holds useful intermediate results for the range based inc and dec
0509  * operations.
0510  */
0511 struct inc_context {
0512     struct disk_index_entry ie_disk;
0513     struct dm_block *bitmap_block;
0514     void *bitmap;
0515 
0516     struct dm_block *overflow_leaf;
0517 };
0518 
0519 static inline void init_inc_context(struct inc_context *ic)
0520 {
0521     ic->bitmap_block = NULL;
0522     ic->bitmap = NULL;
0523     ic->overflow_leaf = NULL;
0524 }
0525 
0526 static inline void exit_inc_context(struct ll_disk *ll, struct inc_context *ic)
0527 {
0528     if (ic->bitmap_block)
0529         dm_tm_unlock(ll->tm, ic->bitmap_block);
0530     if (ic->overflow_leaf)
0531         dm_tm_unlock(ll->tm, ic->overflow_leaf);
0532 }
0533 
0534 static inline void reset_inc_context(struct ll_disk *ll, struct inc_context *ic)
0535 {
0536     exit_inc_context(ll, ic);
0537     init_inc_context(ic);
0538 }
0539 
0540 /*
0541  * Confirms a btree node contains a particular key at an index.
0542  */
0543 static bool contains_key(struct btree_node *n, uint64_t key, int index)
0544 {
0545     return index >= 0 &&
0546         index < le32_to_cpu(n->header.nr_entries) &&
0547         le64_to_cpu(n->keys[index]) == key;
0548 }
0549 
0550 static int __sm_ll_inc_overflow(struct ll_disk *ll, dm_block_t b, struct inc_context *ic)
0551 {
0552     int r;
0553     int index;
0554     struct btree_node *n;
0555     __le32 *v_ptr;
0556     uint32_t rc;
0557 
0558     /*
0559      * bitmap_block needs to be unlocked because getting the
0560      * overflow_leaf may need to allocate, and thus use the space map.
0561      */
0562     reset_inc_context(ll, ic);
0563 
0564     r = btree_get_overwrite_leaf(&ll->ref_count_info, ll->ref_count_root,
0565                      b, &index, &ll->ref_count_root, &ic->overflow_leaf);
0566     if (r < 0)
0567         return r;
0568 
0569     n = dm_block_data(ic->overflow_leaf);
0570 
0571     if (!contains_key(n, b, index)) {
0572         DMERR("overflow btree is missing an entry");
0573         return -EINVAL;
0574     }
0575 
0576     v_ptr = value_ptr(n, index);
0577     rc = le32_to_cpu(*v_ptr) + 1;
0578     *v_ptr = cpu_to_le32(rc);
0579 
0580     return 0;
0581 }
0582 
0583 static int sm_ll_inc_overflow(struct ll_disk *ll, dm_block_t b, struct inc_context *ic)
0584 {
0585     int index;
0586     struct btree_node *n;
0587     __le32 *v_ptr;
0588     uint32_t rc;
0589 
0590     /*
0591      * Do we already have the correct overflow leaf?
0592      */
0593     if (ic->overflow_leaf) {
0594         n = dm_block_data(ic->overflow_leaf);
0595         index = lower_bound(n, b);
0596         if (contains_key(n, b, index)) {
0597             v_ptr = value_ptr(n, index);
0598             rc = le32_to_cpu(*v_ptr) + 1;
0599             *v_ptr = cpu_to_le32(rc);
0600 
0601             return 0;
0602         }
0603     }
0604 
0605     return __sm_ll_inc_overflow(ll, b, ic);
0606 }
0607 
0608 static inline int shadow_bitmap(struct ll_disk *ll, struct inc_context *ic)
0609 {
0610     int r, inc;
0611     r = dm_tm_shadow_block(ll->tm, le64_to_cpu(ic->ie_disk.blocknr),
0612                    &dm_sm_bitmap_validator, &ic->bitmap_block, &inc);
0613     if (r < 0) {
0614         DMERR("dm_tm_shadow_block() failed");
0615         return r;
0616     }
0617     ic->ie_disk.blocknr = cpu_to_le64(dm_block_location(ic->bitmap_block));
0618     ic->bitmap = dm_bitmap_data(ic->bitmap_block);
0619     return 0;
0620 }
0621 
0622 /*
0623  * Once shadow_bitmap has been called, which always happens at the start of inc/dec,
0624  * we can reopen the bitmap with a simple write lock, rather than re calling
0625  * dm_tm_shadow_block().
0626  */
0627 static inline int ensure_bitmap(struct ll_disk *ll, struct inc_context *ic)
0628 {
0629     if (!ic->bitmap_block) {
0630         int r = dm_bm_write_lock(dm_tm_get_bm(ll->tm), le64_to_cpu(ic->ie_disk.blocknr),
0631                      &dm_sm_bitmap_validator, &ic->bitmap_block);
0632         if (r) {
0633             DMERR("unable to re-get write lock for bitmap");
0634             return r;
0635         }
0636         ic->bitmap = dm_bitmap_data(ic->bitmap_block);
0637     }
0638 
0639     return 0;
0640 }
0641 
0642 /*
0643  * Loops round incrementing entries in a single bitmap.
0644  */
0645 static inline int sm_ll_inc_bitmap(struct ll_disk *ll, dm_block_t b,
0646                    uint32_t bit, uint32_t bit_end,
0647                    int32_t *nr_allocations, dm_block_t *new_b,
0648                    struct inc_context *ic)
0649 {
0650     int r;
0651     __le32 le_rc;
0652     uint32_t old;
0653 
0654     for (; bit != bit_end; bit++, b++) {
0655         /*
0656          * We only need to drop the bitmap if we need to find a new btree
0657          * leaf for the overflow.  So if it was dropped last iteration,
0658          * we now re-get it.
0659          */
0660         r = ensure_bitmap(ll, ic);
0661         if (r)
0662             return r;
0663 
0664         old = sm_lookup_bitmap(ic->bitmap, bit);
0665         switch (old) {
0666         case 0:
0667             /* inc bitmap, adjust nr_allocated */
0668             sm_set_bitmap(ic->bitmap, bit, 1);
0669             (*nr_allocations)++;
0670             ll->nr_allocated++;
0671             le32_add_cpu(&ic->ie_disk.nr_free, -1);
0672             if (le32_to_cpu(ic->ie_disk.none_free_before) == bit)
0673                 ic->ie_disk.none_free_before = cpu_to_le32(bit + 1);
0674             break;
0675 
0676         case 1:
0677             /* inc bitmap */
0678             sm_set_bitmap(ic->bitmap, bit, 2);
0679             break;
0680 
0681         case 2:
0682             /* inc bitmap and insert into overflow */
0683             sm_set_bitmap(ic->bitmap, bit, 3);
0684             reset_inc_context(ll, ic);
0685 
0686             le_rc = cpu_to_le32(3);
0687             __dm_bless_for_disk(&le_rc);
0688             r = dm_btree_insert(&ll->ref_count_info, ll->ref_count_root,
0689                         &b, &le_rc, &ll->ref_count_root);
0690             if (r < 0) {
0691                 DMERR("ref count insert failed");
0692                 return r;
0693             }
0694             break;
0695 
0696         default:
0697             /*
0698              * inc within the overflow tree only.
0699              */
0700             r = sm_ll_inc_overflow(ll, b, ic);
0701             if (r < 0)
0702                 return r;
0703         }
0704     }
0705 
0706     *new_b = b;
0707     return 0;
0708 }
0709 
0710 /*
0711  * Finds a bitmap that contains entries in the block range, and increments
0712  * them.
0713  */
0714 static int __sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e,
0715                int32_t *nr_allocations, dm_block_t *new_b)
0716 {
0717     int r;
0718     struct inc_context ic;
0719     uint32_t bit, bit_end;
0720     dm_block_t index = b;
0721 
0722     init_inc_context(&ic);
0723 
0724     bit = do_div(index, ll->entries_per_block);
0725     r = ll->load_ie(ll, index, &ic.ie_disk);
0726     if (r < 0)
0727         return r;
0728 
0729     r = shadow_bitmap(ll, &ic);
0730     if (r)
0731         return r;
0732 
0733     bit_end = min(bit + (e - b), (dm_block_t) ll->entries_per_block);
0734     r = sm_ll_inc_bitmap(ll, b, bit, bit_end, nr_allocations, new_b, &ic);
0735 
0736     exit_inc_context(ll, &ic);
0737 
0738     if (r)
0739         return r;
0740 
0741     return ll->save_ie(ll, index, &ic.ie_disk);
0742 }
0743 
0744 int sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e,
0745           int32_t *nr_allocations)
0746 {
0747     *nr_allocations = 0;
0748     while (b != e) {
0749         int r = __sm_ll_inc(ll, b, e, nr_allocations, &b);
0750         if (r)
0751             return r;
0752     }
0753 
0754     return 0;
0755 }
0756 
0757 /*----------------------------------------------------------------*/
0758 
0759 static int __sm_ll_del_overflow(struct ll_disk *ll, dm_block_t b,
0760                 struct inc_context *ic)
0761 {
0762     reset_inc_context(ll, ic);
0763     return dm_btree_remove(&ll->ref_count_info, ll->ref_count_root,
0764                    &b, &ll->ref_count_root);
0765 }
0766 
0767 static int __sm_ll_dec_overflow(struct ll_disk *ll, dm_block_t b,
0768                 struct inc_context *ic, uint32_t *old_rc)
0769 {
0770     int r;
0771     int index = -1;
0772     struct btree_node *n;
0773     __le32 *v_ptr;
0774     uint32_t rc;
0775 
0776     reset_inc_context(ll, ic);
0777     r = btree_get_overwrite_leaf(&ll->ref_count_info, ll->ref_count_root,
0778                      b, &index, &ll->ref_count_root, &ic->overflow_leaf);
0779     if (r < 0)
0780         return r;
0781 
0782     n = dm_block_data(ic->overflow_leaf);
0783 
0784     if (!contains_key(n, b, index)) {
0785         DMERR("overflow btree is missing an entry");
0786         return -EINVAL;
0787     }
0788 
0789     v_ptr = value_ptr(n, index);
0790     rc = le32_to_cpu(*v_ptr);
0791     *old_rc = rc;
0792 
0793     if (rc == 3) {
0794         return __sm_ll_del_overflow(ll, b, ic);
0795     } else {
0796         rc--;
0797         *v_ptr = cpu_to_le32(rc);
0798         return 0;
0799     }
0800 }
0801 
0802 static int sm_ll_dec_overflow(struct ll_disk *ll, dm_block_t b,
0803                   struct inc_context *ic, uint32_t *old_rc)
0804 {
0805     /*
0806      * Do we already have the correct overflow leaf?
0807      */
0808     if (ic->overflow_leaf) {
0809         int index;
0810         struct btree_node *n;
0811         __le32 *v_ptr;
0812         uint32_t rc;
0813 
0814         n = dm_block_data(ic->overflow_leaf);
0815         index = lower_bound(n, b);
0816         if (contains_key(n, b, index)) {
0817             v_ptr = value_ptr(n, index);
0818             rc = le32_to_cpu(*v_ptr);
0819             *old_rc = rc;
0820 
0821             if (rc > 3) {
0822                 rc--;
0823                 *v_ptr = cpu_to_le32(rc);
0824                 return 0;
0825             } else {
0826                 return __sm_ll_del_overflow(ll, b, ic);
0827             }
0828 
0829         }
0830     }
0831 
0832     return __sm_ll_dec_overflow(ll, b, ic, old_rc);
0833 }
0834 
0835 /*
0836  * Loops round incrementing entries in a single bitmap.
0837  */
0838 static inline int sm_ll_dec_bitmap(struct ll_disk *ll, dm_block_t b,
0839                    uint32_t bit, uint32_t bit_end,
0840                    struct inc_context *ic,
0841                    int32_t *nr_allocations, dm_block_t *new_b)
0842 {
0843     int r;
0844     uint32_t old;
0845 
0846     for (; bit != bit_end; bit++, b++) {
0847         /*
0848          * We only need to drop the bitmap if we need to find a new btree
0849          * leaf for the overflow.  So if it was dropped last iteration,
0850          * we now re-get it.
0851          */
0852         r = ensure_bitmap(ll, ic);
0853         if (r)
0854             return r;
0855 
0856         old = sm_lookup_bitmap(ic->bitmap, bit);
0857         switch (old) {
0858         case 0:
0859             DMERR("unable to decrement block");
0860             return -EINVAL;
0861 
0862         case 1:
0863             /* dec bitmap */
0864             sm_set_bitmap(ic->bitmap, bit, 0);
0865             (*nr_allocations)--;
0866             ll->nr_allocated--;
0867             le32_add_cpu(&ic->ie_disk.nr_free, 1);
0868             ic->ie_disk.none_free_before =
0869                 cpu_to_le32(min(le32_to_cpu(ic->ie_disk.none_free_before), bit));
0870             break;
0871 
0872         case 2:
0873             /* dec bitmap and insert into overflow */
0874             sm_set_bitmap(ic->bitmap, bit, 1);
0875             break;
0876 
0877         case 3:
0878             r = sm_ll_dec_overflow(ll, b, ic, &old);
0879             if (r < 0)
0880                 return r;
0881 
0882             if (old == 3) {
0883                 r = ensure_bitmap(ll, ic);
0884                 if (r)
0885                     return r;
0886 
0887                 sm_set_bitmap(ic->bitmap, bit, 2);
0888             }
0889             break;
0890         }
0891     }
0892 
0893     *new_b = b;
0894     return 0;
0895 }
0896 
0897 static int __sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e,
0898                int32_t *nr_allocations, dm_block_t *new_b)
0899 {
0900     int r;
0901     uint32_t bit, bit_end;
0902     struct inc_context ic;
0903     dm_block_t index = b;
0904 
0905     init_inc_context(&ic);
0906 
0907     bit = do_div(index, ll->entries_per_block);
0908     r = ll->load_ie(ll, index, &ic.ie_disk);
0909     if (r < 0)
0910         return r;
0911 
0912     r = shadow_bitmap(ll, &ic);
0913     if (r)
0914         return r;
0915 
0916     bit_end = min(bit + (e - b), (dm_block_t) ll->entries_per_block);
0917     r = sm_ll_dec_bitmap(ll, b, bit, bit_end, &ic, nr_allocations, new_b);
0918     exit_inc_context(ll, &ic);
0919 
0920     if (r)
0921         return r;
0922 
0923     return ll->save_ie(ll, index, &ic.ie_disk);
0924 }
0925 
0926 int sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e,
0927           int32_t *nr_allocations)
0928 {
0929     *nr_allocations = 0;
0930     while (b != e) {
0931         int r = __sm_ll_dec(ll, b, e, nr_allocations, &b);
0932         if (r)
0933             return r;
0934     }
0935 
0936     return 0;
0937 }
0938 
0939 /*----------------------------------------------------------------*/
0940 
0941 int sm_ll_commit(struct ll_disk *ll)
0942 {
0943     int r = 0;
0944 
0945     if (ll->bitmap_index_changed) {
0946         r = ll->commit(ll);
0947         if (!r)
0948             ll->bitmap_index_changed = false;
0949     }
0950 
0951     return r;
0952 }
0953 
0954 /*----------------------------------------------------------------*/
0955 
0956 static int metadata_ll_load_ie(struct ll_disk *ll, dm_block_t index,
0957                    struct disk_index_entry *ie)
0958 {
0959     memcpy(ie, ll->mi_le.index + index, sizeof(*ie));
0960     return 0;
0961 }
0962 
0963 static int metadata_ll_save_ie(struct ll_disk *ll, dm_block_t index,
0964                    struct disk_index_entry *ie)
0965 {
0966     ll->bitmap_index_changed = true;
0967     memcpy(ll->mi_le.index + index, ie, sizeof(*ie));
0968     return 0;
0969 }
0970 
0971 static int metadata_ll_init_index(struct ll_disk *ll)
0972 {
0973     int r;
0974     struct dm_block *b;
0975 
0976     r = dm_tm_new_block(ll->tm, &index_validator, &b);
0977     if (r < 0)
0978         return r;
0979 
0980     ll->bitmap_root = dm_block_location(b);
0981 
0982     dm_tm_unlock(ll->tm, b);
0983 
0984     return 0;
0985 }
0986 
0987 static int metadata_ll_open(struct ll_disk *ll)
0988 {
0989     int r;
0990     struct dm_block *block;
0991 
0992     r = dm_tm_read_lock(ll->tm, ll->bitmap_root,
0993                 &index_validator, &block);
0994     if (r)
0995         return r;
0996 
0997     memcpy(&ll->mi_le, dm_block_data(block), sizeof(ll->mi_le));
0998     dm_tm_unlock(ll->tm, block);
0999 
1000     return 0;
1001 }
1002 
1003 static dm_block_t metadata_ll_max_entries(struct ll_disk *ll)
1004 {
1005     return MAX_METADATA_BITMAPS;
1006 }
1007 
1008 static int metadata_ll_commit(struct ll_disk *ll)
1009 {
1010     int r, inc;
1011     struct dm_block *b;
1012 
1013     r = dm_tm_shadow_block(ll->tm, ll->bitmap_root, &index_validator, &b, &inc);
1014     if (r)
1015         return r;
1016 
1017     memcpy(dm_block_data(b), &ll->mi_le, sizeof(ll->mi_le));
1018     ll->bitmap_root = dm_block_location(b);
1019 
1020     dm_tm_unlock(ll->tm, b);
1021 
1022     return 0;
1023 }
1024 
1025 int sm_ll_new_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm)
1026 {
1027     int r;
1028 
1029     r = sm_ll_init(ll, tm);
1030     if (r < 0)
1031         return r;
1032 
1033     ll->load_ie = metadata_ll_load_ie;
1034     ll->save_ie = metadata_ll_save_ie;
1035     ll->init_index = metadata_ll_init_index;
1036     ll->open_index = metadata_ll_open;
1037     ll->max_entries = metadata_ll_max_entries;
1038     ll->commit = metadata_ll_commit;
1039 
1040     ll->nr_blocks = 0;
1041     ll->nr_allocated = 0;
1042 
1043     r = ll->init_index(ll);
1044     if (r < 0)
1045         return r;
1046 
1047     r = dm_btree_empty(&ll->ref_count_info, &ll->ref_count_root);
1048     if (r < 0)
1049         return r;
1050 
1051     return 0;
1052 }
1053 
1054 int sm_ll_open_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm,
1055             void *root_le, size_t len)
1056 {
1057     int r;
1058     struct disk_sm_root smr;
1059 
1060     if (len < sizeof(struct disk_sm_root)) {
1061         DMERR("sm_metadata root too small");
1062         return -ENOMEM;
1063     }
1064 
1065     /*
1066      * We don't know the alignment of the root_le buffer, so need to
1067      * copy into a new structure.
1068      */
1069     memcpy(&smr, root_le, sizeof(smr));
1070 
1071     r = sm_ll_init(ll, tm);
1072     if (r < 0)
1073         return r;
1074 
1075     ll->load_ie = metadata_ll_load_ie;
1076     ll->save_ie = metadata_ll_save_ie;
1077     ll->init_index = metadata_ll_init_index;
1078     ll->open_index = metadata_ll_open;
1079     ll->max_entries = metadata_ll_max_entries;
1080     ll->commit = metadata_ll_commit;
1081 
1082     ll->nr_blocks = le64_to_cpu(smr.nr_blocks);
1083     ll->nr_allocated = le64_to_cpu(smr.nr_allocated);
1084     ll->bitmap_root = le64_to_cpu(smr.bitmap_root);
1085     ll->ref_count_root = le64_to_cpu(smr.ref_count_root);
1086 
1087     return ll->open_index(ll);
1088 }
1089 
1090 /*----------------------------------------------------------------*/
1091 
1092 static inline int ie_cache_writeback(struct ll_disk *ll, struct ie_cache *iec)
1093 {
1094     iec->dirty = false;
1095     __dm_bless_for_disk(iec->ie);
1096     return dm_btree_insert(&ll->bitmap_info, ll->bitmap_root,
1097                    &iec->index, &iec->ie, &ll->bitmap_root);
1098 }
1099 
1100 static inline unsigned hash_index(dm_block_t index)
1101 {
1102     return dm_hash_block(index, IE_CACHE_MASK);
1103 }
1104 
1105 static int disk_ll_load_ie(struct ll_disk *ll, dm_block_t index,
1106                struct disk_index_entry *ie)
1107 {
1108     int r;
1109     unsigned h = hash_index(index);
1110     struct ie_cache *iec = ll->ie_cache + h;
1111 
1112     if (iec->valid) {
1113         if (iec->index == index) {
1114             memcpy(ie, &iec->ie, sizeof(*ie));
1115             return 0;
1116         }
1117 
1118         if (iec->dirty) {
1119             r = ie_cache_writeback(ll, iec);
1120             if (r)
1121                 return r;
1122         }
1123     }
1124 
1125     r = dm_btree_lookup(&ll->bitmap_info, ll->bitmap_root, &index, ie);
1126     if (!r) {
1127         iec->valid = true;
1128         iec->dirty = false;
1129         iec->index = index;
1130         memcpy(&iec->ie, ie, sizeof(*ie));
1131     }
1132 
1133     return r;
1134 }
1135 
1136 static int disk_ll_save_ie(struct ll_disk *ll, dm_block_t index,
1137                struct disk_index_entry *ie)
1138 {
1139     int r;
1140     unsigned h = hash_index(index);
1141     struct ie_cache *iec = ll->ie_cache + h;
1142 
1143     ll->bitmap_index_changed = true;
1144     if (iec->valid) {
1145         if (iec->index == index) {
1146             memcpy(&iec->ie, ie, sizeof(*ie));
1147             iec->dirty = true;
1148             return 0;
1149         }
1150 
1151         if (iec->dirty) {
1152             r = ie_cache_writeback(ll, iec);
1153             if (r)
1154                 return r;
1155         }
1156     }
1157 
1158     iec->valid = true;
1159     iec->dirty = true;
1160     iec->index = index;
1161     memcpy(&iec->ie, ie, sizeof(*ie));
1162     return 0;
1163 }
1164 
1165 static int disk_ll_init_index(struct ll_disk *ll)
1166 {
1167     unsigned i;
1168     for (i = 0; i < IE_CACHE_SIZE; i++) {
1169         struct ie_cache *iec = ll->ie_cache + i;
1170         iec->valid = false;
1171         iec->dirty = false;
1172     }
1173     return dm_btree_empty(&ll->bitmap_info, &ll->bitmap_root);
1174 }
1175 
1176 static int disk_ll_open(struct ll_disk *ll)
1177 {
1178     return 0;
1179 }
1180 
1181 static dm_block_t disk_ll_max_entries(struct ll_disk *ll)
1182 {
1183     return -1ULL;
1184 }
1185 
1186 static int disk_ll_commit(struct ll_disk *ll)
1187 {
1188     int r = 0;
1189     unsigned i;
1190 
1191     for (i = 0; i < IE_CACHE_SIZE; i++) {
1192         struct ie_cache *iec = ll->ie_cache + i;
1193         if (iec->valid && iec->dirty)
1194             r = ie_cache_writeback(ll, iec);
1195     }
1196 
1197     return r;
1198 }
1199 
1200 int sm_ll_new_disk(struct ll_disk *ll, struct dm_transaction_manager *tm)
1201 {
1202     int r;
1203 
1204     r = sm_ll_init(ll, tm);
1205     if (r < 0)
1206         return r;
1207 
1208     ll->load_ie = disk_ll_load_ie;
1209     ll->save_ie = disk_ll_save_ie;
1210     ll->init_index = disk_ll_init_index;
1211     ll->open_index = disk_ll_open;
1212     ll->max_entries = disk_ll_max_entries;
1213     ll->commit = disk_ll_commit;
1214 
1215     ll->nr_blocks = 0;
1216     ll->nr_allocated = 0;
1217 
1218     r = ll->init_index(ll);
1219     if (r < 0)
1220         return r;
1221 
1222     r = dm_btree_empty(&ll->ref_count_info, &ll->ref_count_root);
1223     if (r < 0)
1224         return r;
1225 
1226     return 0;
1227 }
1228 
1229 int sm_ll_open_disk(struct ll_disk *ll, struct dm_transaction_manager *tm,
1230             void *root_le, size_t len)
1231 {
1232     int r;
1233     struct disk_sm_root *smr = root_le;
1234 
1235     if (len < sizeof(struct disk_sm_root)) {
1236         DMERR("sm_metadata root too small");
1237         return -ENOMEM;
1238     }
1239 
1240     r = sm_ll_init(ll, tm);
1241     if (r < 0)
1242         return r;
1243 
1244     ll->load_ie = disk_ll_load_ie;
1245     ll->save_ie = disk_ll_save_ie;
1246     ll->init_index = disk_ll_init_index;
1247     ll->open_index = disk_ll_open;
1248     ll->max_entries = disk_ll_max_entries;
1249     ll->commit = disk_ll_commit;
1250 
1251     ll->nr_blocks = le64_to_cpu(smr->nr_blocks);
1252     ll->nr_allocated = le64_to_cpu(smr->nr_allocated);
1253     ll->bitmap_root = le64_to_cpu(smr->bitmap_root);
1254     ll->ref_count_root = le64_to_cpu(smr->ref_count_root);
1255 
1256     return ll->open_index(ll);
1257 }
1258 
1259 /*----------------------------------------------------------------*/