Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright (C) 2016-2017 Red Hat, Inc. All rights reserved.
0003  * Copyright (C) 2016-2017 Milan Broz
0004  * Copyright (C) 2016-2017 Mikulas Patocka
0005  *
0006  * This file is released under the GPL.
0007  */
0008 
0009 #include "dm-bio-record.h"
0010 
0011 #include <linux/compiler.h>
0012 #include <linux/module.h>
0013 #include <linux/device-mapper.h>
0014 #include <linux/dm-io.h>
0015 #include <linux/vmalloc.h>
0016 #include <linux/sort.h>
0017 #include <linux/rbtree.h>
0018 #include <linux/delay.h>
0019 #include <linux/random.h>
0020 #include <linux/reboot.h>
0021 #include <crypto/hash.h>
0022 #include <crypto/skcipher.h>
0023 #include <linux/async_tx.h>
0024 #include <linux/dm-bufio.h>
0025 
0026 #include "dm-audit.h"
0027 
0028 #define DM_MSG_PREFIX "integrity"
0029 
0030 #define DEFAULT_INTERLEAVE_SECTORS  32768
0031 #define DEFAULT_JOURNAL_SIZE_FACTOR 7
0032 #define DEFAULT_SECTORS_PER_BITMAP_BIT  32768
0033 #define DEFAULT_BUFFER_SECTORS      128
0034 #define DEFAULT_JOURNAL_WATERMARK   50
0035 #define DEFAULT_SYNC_MSEC       10000
0036 #define DEFAULT_MAX_JOURNAL_SECTORS 131072
0037 #define MIN_LOG2_INTERLEAVE_SECTORS 3
0038 #define MAX_LOG2_INTERLEAVE_SECTORS 31
0039 #define METADATA_WORKQUEUE_MAX_ACTIVE   16
0040 #define RECALC_SECTORS          32768
0041 #define RECALC_WRITE_SUPER      16
0042 #define BITMAP_BLOCK_SIZE       4096    /* don't change it */
0043 #define BITMAP_FLUSH_INTERVAL       (10 * HZ)
0044 #define DISCARD_FILLER          0xf6
0045 #define SALT_SIZE           16
0046 
0047 /*
0048  * Warning - DEBUG_PRINT prints security-sensitive data to the log,
0049  * so it should not be enabled in the official kernel
0050  */
0051 //#define DEBUG_PRINT
0052 //#define INTERNAL_VERIFY
0053 
0054 /*
0055  * On disk structures
0056  */
0057 
0058 #define SB_MAGIC            "integrt"
0059 #define SB_VERSION_1            1
0060 #define SB_VERSION_2            2
0061 #define SB_VERSION_3            3
0062 #define SB_VERSION_4            4
0063 #define SB_VERSION_5            5
0064 #define SB_SECTORS          8
0065 #define MAX_SECTORS_PER_BLOCK       8
0066 
0067 struct superblock {
0068     __u8 magic[8];
0069     __u8 version;
0070     __u8 log2_interleave_sectors;
0071     __le16 integrity_tag_size;
0072     __le32 journal_sections;
0073     __le64 provided_data_sectors;   /* userspace uses this value */
0074     __le32 flags;
0075     __u8 log2_sectors_per_block;
0076     __u8 log2_blocks_per_bitmap_bit;
0077     __u8 pad[2];
0078     __le64 recalc_sector;
0079     __u8 pad2[8];
0080     __u8 salt[SALT_SIZE];
0081 };
0082 
0083 #define SB_FLAG_HAVE_JOURNAL_MAC    0x1
0084 #define SB_FLAG_RECALCULATING       0x2
0085 #define SB_FLAG_DIRTY_BITMAP        0x4
0086 #define SB_FLAG_FIXED_PADDING       0x8
0087 #define SB_FLAG_FIXED_HMAC      0x10
0088 
0089 #define JOURNAL_ENTRY_ROUNDUP       8
0090 
0091 typedef __le64 commit_id_t;
0092 #define JOURNAL_MAC_PER_SECTOR      8
0093 
0094 struct journal_entry {
0095     union {
0096         struct {
0097             __le32 sector_lo;
0098             __le32 sector_hi;
0099         } s;
0100         __le64 sector;
0101     } u;
0102     commit_id_t last_bytes[];
0103     /* __u8 tag[0]; */
0104 };
0105 
0106 #define journal_entry_tag(ic, je)       ((__u8 *)&(je)->last_bytes[(ic)->sectors_per_block])
0107 
0108 #if BITS_PER_LONG == 64
0109 #define journal_entry_set_sector(je, x)     do { smp_wmb(); WRITE_ONCE((je)->u.sector, cpu_to_le64(x)); } while (0)
0110 #else
0111 #define journal_entry_set_sector(je, x)     do { (je)->u.s.sector_lo = cpu_to_le32(x); smp_wmb(); WRITE_ONCE((je)->u.s.sector_hi, cpu_to_le32((x) >> 32)); } while (0)
0112 #endif
0113 #define journal_entry_get_sector(je)        le64_to_cpu((je)->u.sector)
0114 #define journal_entry_is_unused(je)     ((je)->u.s.sector_hi == cpu_to_le32(-1))
0115 #define journal_entry_set_unused(je)        do { ((je)->u.s.sector_hi = cpu_to_le32(-1)); } while (0)
0116 #define journal_entry_is_inprogress(je)     ((je)->u.s.sector_hi == cpu_to_le32(-2))
0117 #define journal_entry_set_inprogress(je)    do { ((je)->u.s.sector_hi = cpu_to_le32(-2)); } while (0)
0118 
0119 #define JOURNAL_BLOCK_SECTORS       8
0120 #define JOURNAL_SECTOR_DATA     ((1 << SECTOR_SHIFT) - sizeof(commit_id_t))
0121 #define JOURNAL_MAC_SIZE        (JOURNAL_MAC_PER_SECTOR * JOURNAL_BLOCK_SECTORS)
0122 
0123 struct journal_sector {
0124     struct_group(sectors,
0125         __u8 entries[JOURNAL_SECTOR_DATA - JOURNAL_MAC_PER_SECTOR];
0126         __u8 mac[JOURNAL_MAC_PER_SECTOR];
0127     );
0128     commit_id_t commit_id;
0129 };
0130 
0131 #define MAX_TAG_SIZE            (JOURNAL_SECTOR_DATA - JOURNAL_MAC_PER_SECTOR - offsetof(struct journal_entry, last_bytes[MAX_SECTORS_PER_BLOCK]))
0132 
0133 #define METADATA_PADDING_SECTORS    8
0134 
0135 #define N_COMMIT_IDS            4
0136 
0137 static unsigned char prev_commit_seq(unsigned char seq)
0138 {
0139     return (seq + N_COMMIT_IDS - 1) % N_COMMIT_IDS;
0140 }
0141 
0142 static unsigned char next_commit_seq(unsigned char seq)
0143 {
0144     return (seq + 1) % N_COMMIT_IDS;
0145 }
0146 
0147 /*
0148  * In-memory structures
0149  */
0150 
0151 struct journal_node {
0152     struct rb_node node;
0153     sector_t sector;
0154 };
0155 
0156 struct alg_spec {
0157     char *alg_string;
0158     char *key_string;
0159     __u8 *key;
0160     unsigned key_size;
0161 };
0162 
0163 struct dm_integrity_c {
0164     struct dm_dev *dev;
0165     struct dm_dev *meta_dev;
0166     unsigned tag_size;
0167     __s8 log2_tag_size;
0168     sector_t start;
0169     mempool_t journal_io_mempool;
0170     struct dm_io_client *io;
0171     struct dm_bufio_client *bufio;
0172     struct workqueue_struct *metadata_wq;
0173     struct superblock *sb;
0174     unsigned journal_pages;
0175     unsigned n_bitmap_blocks;
0176 
0177     struct page_list *journal;
0178     struct page_list *journal_io;
0179     struct page_list *journal_xor;
0180     struct page_list *recalc_bitmap;
0181     struct page_list *may_write_bitmap;
0182     struct bitmap_block_status *bbs;
0183     unsigned bitmap_flush_interval;
0184     int synchronous_mode;
0185     struct bio_list synchronous_bios;
0186     struct delayed_work bitmap_flush_work;
0187 
0188     struct crypto_skcipher *journal_crypt;
0189     struct scatterlist **journal_scatterlist;
0190     struct scatterlist **journal_io_scatterlist;
0191     struct skcipher_request **sk_requests;
0192 
0193     struct crypto_shash *journal_mac;
0194 
0195     struct journal_node *journal_tree;
0196     struct rb_root journal_tree_root;
0197 
0198     sector_t provided_data_sectors;
0199 
0200     unsigned short journal_entry_size;
0201     unsigned char journal_entries_per_sector;
0202     unsigned char journal_section_entries;
0203     unsigned short journal_section_sectors;
0204     unsigned journal_sections;
0205     unsigned journal_entries;
0206     sector_t data_device_sectors;
0207     sector_t meta_device_sectors;
0208     unsigned initial_sectors;
0209     unsigned metadata_run;
0210     __s8 log2_metadata_run;
0211     __u8 log2_buffer_sectors;
0212     __u8 sectors_per_block;
0213     __u8 log2_blocks_per_bitmap_bit;
0214 
0215     unsigned char mode;
0216 
0217     int failed;
0218 
0219     struct crypto_shash *internal_hash;
0220 
0221     struct dm_target *ti;
0222 
0223     /* these variables are locked with endio_wait.lock */
0224     struct rb_root in_progress;
0225     struct list_head wait_list;
0226     wait_queue_head_t endio_wait;
0227     struct workqueue_struct *wait_wq;
0228     struct workqueue_struct *offload_wq;
0229 
0230     unsigned char commit_seq;
0231     commit_id_t commit_ids[N_COMMIT_IDS];
0232 
0233     unsigned committed_section;
0234     unsigned n_committed_sections;
0235 
0236     unsigned uncommitted_section;
0237     unsigned n_uncommitted_sections;
0238 
0239     unsigned free_section;
0240     unsigned char free_section_entry;
0241     unsigned free_sectors;
0242 
0243     unsigned free_sectors_threshold;
0244 
0245     struct workqueue_struct *commit_wq;
0246     struct work_struct commit_work;
0247 
0248     struct workqueue_struct *writer_wq;
0249     struct work_struct writer_work;
0250 
0251     struct workqueue_struct *recalc_wq;
0252     struct work_struct recalc_work;
0253     u8 *recalc_buffer;
0254     u8 *recalc_tags;
0255 
0256     struct bio_list flush_bio_list;
0257 
0258     unsigned long autocommit_jiffies;
0259     struct timer_list autocommit_timer;
0260     unsigned autocommit_msec;
0261 
0262     wait_queue_head_t copy_to_journal_wait;
0263 
0264     struct completion crypto_backoff;
0265 
0266     bool journal_uptodate;
0267     bool just_formatted;
0268     bool recalculate_flag;
0269     bool reset_recalculate_flag;
0270     bool discard;
0271     bool fix_padding;
0272     bool fix_hmac;
0273     bool legacy_recalculate;
0274 
0275     struct alg_spec internal_hash_alg;
0276     struct alg_spec journal_crypt_alg;
0277     struct alg_spec journal_mac_alg;
0278 
0279     atomic64_t number_of_mismatches;
0280 
0281     struct notifier_block reboot_notifier;
0282 };
0283 
0284 struct dm_integrity_range {
0285     sector_t logical_sector;
0286     sector_t n_sectors;
0287     bool waiting;
0288     union {
0289         struct rb_node node;
0290         struct {
0291             struct task_struct *task;
0292             struct list_head wait_entry;
0293         };
0294     };
0295 };
0296 
0297 struct dm_integrity_io {
0298     struct work_struct work;
0299 
0300     struct dm_integrity_c *ic;
0301     enum req_op op;
0302     bool fua;
0303 
0304     struct dm_integrity_range range;
0305 
0306     sector_t metadata_block;
0307     unsigned metadata_offset;
0308 
0309     atomic_t in_flight;
0310     blk_status_t bi_status;
0311 
0312     struct completion *completion;
0313 
0314     struct dm_bio_details bio_details;
0315 };
0316 
0317 struct journal_completion {
0318     struct dm_integrity_c *ic;
0319     atomic_t in_flight;
0320     struct completion comp;
0321 };
0322 
0323 struct journal_io {
0324     struct dm_integrity_range range;
0325     struct journal_completion *comp;
0326 };
0327 
0328 struct bitmap_block_status {
0329     struct work_struct work;
0330     struct dm_integrity_c *ic;
0331     unsigned idx;
0332     unsigned long *bitmap;
0333     struct bio_list bio_queue;
0334     spinlock_t bio_queue_lock;
0335 
0336 };
0337 
0338 static struct kmem_cache *journal_io_cache;
0339 
0340 #define JOURNAL_IO_MEMPOOL  32
0341 
0342 #ifdef DEBUG_PRINT
0343 #define DEBUG_print(x, ...) printk(KERN_DEBUG x, ##__VA_ARGS__)
0344 static void __DEBUG_bytes(__u8 *bytes, size_t len, const char *msg, ...)
0345 {
0346     va_list args;
0347     va_start(args, msg);
0348     vprintk(msg, args);
0349     va_end(args);
0350     if (len)
0351         pr_cont(":");
0352     while (len) {
0353         pr_cont(" %02x", *bytes);
0354         bytes++;
0355         len--;
0356     }
0357     pr_cont("\n");
0358 }
0359 #define DEBUG_bytes(bytes, len, msg, ...)   __DEBUG_bytes(bytes, len, KERN_DEBUG msg, ##__VA_ARGS__)
0360 #else
0361 #define DEBUG_print(x, ...)         do { } while (0)
0362 #define DEBUG_bytes(bytes, len, msg, ...)   do { } while (0)
0363 #endif
0364 
0365 static void dm_integrity_prepare(struct request *rq)
0366 {
0367 }
0368 
0369 static void dm_integrity_complete(struct request *rq, unsigned int nr_bytes)
0370 {
0371 }
0372 
0373 /*
0374  * DM Integrity profile, protection is performed layer above (dm-crypt)
0375  */
0376 static const struct blk_integrity_profile dm_integrity_profile = {
0377     .name           = "DM-DIF-EXT-TAG",
0378     .generate_fn        = NULL,
0379     .verify_fn      = NULL,
0380     .prepare_fn     = dm_integrity_prepare,
0381     .complete_fn        = dm_integrity_complete,
0382 };
0383 
0384 static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map);
0385 static void integrity_bio_wait(struct work_struct *w);
0386 static void dm_integrity_dtr(struct dm_target *ti);
0387 
0388 static void dm_integrity_io_error(struct dm_integrity_c *ic, const char *msg, int err)
0389 {
0390     if (err == -EILSEQ)
0391         atomic64_inc(&ic->number_of_mismatches);
0392     if (!cmpxchg(&ic->failed, 0, err))
0393         DMERR("Error on %s: %d", msg, err);
0394 }
0395 
0396 static int dm_integrity_failed(struct dm_integrity_c *ic)
0397 {
0398     return READ_ONCE(ic->failed);
0399 }
0400 
0401 static bool dm_integrity_disable_recalculate(struct dm_integrity_c *ic)
0402 {
0403     if (ic->legacy_recalculate)
0404         return false;
0405     if (!(ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) ?
0406         ic->internal_hash_alg.key || ic->journal_mac_alg.key :
0407         ic->internal_hash_alg.key && !ic->journal_mac_alg.key)
0408         return true;
0409     return false;
0410 }
0411 
0412 static commit_id_t dm_integrity_commit_id(struct dm_integrity_c *ic, unsigned i,
0413                       unsigned j, unsigned char seq)
0414 {
0415     /*
0416      * Xor the number with section and sector, so that if a piece of
0417      * journal is written at wrong place, it is detected.
0418      */
0419     return ic->commit_ids[seq] ^ cpu_to_le64(((__u64)i << 32) ^ j);
0420 }
0421 
0422 static void get_area_and_offset(struct dm_integrity_c *ic, sector_t data_sector,
0423                 sector_t *area, sector_t *offset)
0424 {
0425     if (!ic->meta_dev) {
0426         __u8 log2_interleave_sectors = ic->sb->log2_interleave_sectors;
0427         *area = data_sector >> log2_interleave_sectors;
0428         *offset = (unsigned)data_sector & ((1U << log2_interleave_sectors) - 1);
0429     } else {
0430         *area = 0;
0431         *offset = data_sector;
0432     }
0433 }
0434 
0435 #define sector_to_block(ic, n)                      \
0436 do {                                    \
0437     BUG_ON((n) & (unsigned)((ic)->sectors_per_block - 1));      \
0438     (n) >>= (ic)->sb->log2_sectors_per_block;           \
0439 } while (0)
0440 
0441 static __u64 get_metadata_sector_and_offset(struct dm_integrity_c *ic, sector_t area,
0442                         sector_t offset, unsigned *metadata_offset)
0443 {
0444     __u64 ms;
0445     unsigned mo;
0446 
0447     ms = area << ic->sb->log2_interleave_sectors;
0448     if (likely(ic->log2_metadata_run >= 0))
0449         ms += area << ic->log2_metadata_run;
0450     else
0451         ms += area * ic->metadata_run;
0452     ms >>= ic->log2_buffer_sectors;
0453 
0454     sector_to_block(ic, offset);
0455 
0456     if (likely(ic->log2_tag_size >= 0)) {
0457         ms += offset >> (SECTOR_SHIFT + ic->log2_buffer_sectors - ic->log2_tag_size);
0458         mo = (offset << ic->log2_tag_size) & ((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - 1);
0459     } else {
0460         ms += (__u64)offset * ic->tag_size >> (SECTOR_SHIFT + ic->log2_buffer_sectors);
0461         mo = (offset * ic->tag_size) & ((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - 1);
0462     }
0463     *metadata_offset = mo;
0464     return ms;
0465 }
0466 
0467 static sector_t get_data_sector(struct dm_integrity_c *ic, sector_t area, sector_t offset)
0468 {
0469     sector_t result;
0470 
0471     if (ic->meta_dev)
0472         return offset;
0473 
0474     result = area << ic->sb->log2_interleave_sectors;
0475     if (likely(ic->log2_metadata_run >= 0))
0476         result += (area + 1) << ic->log2_metadata_run;
0477     else
0478         result += (area + 1) * ic->metadata_run;
0479 
0480     result += (sector_t)ic->initial_sectors + offset;
0481     result += ic->start;
0482 
0483     return result;
0484 }
0485 
0486 static void wraparound_section(struct dm_integrity_c *ic, unsigned *sec_ptr)
0487 {
0488     if (unlikely(*sec_ptr >= ic->journal_sections))
0489         *sec_ptr -= ic->journal_sections;
0490 }
0491 
0492 static void sb_set_version(struct dm_integrity_c *ic)
0493 {
0494     if (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC))
0495         ic->sb->version = SB_VERSION_5;
0496     else if (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING))
0497         ic->sb->version = SB_VERSION_4;
0498     else if (ic->mode == 'B' || ic->sb->flags & cpu_to_le32(SB_FLAG_DIRTY_BITMAP))
0499         ic->sb->version = SB_VERSION_3;
0500     else if (ic->meta_dev || ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))
0501         ic->sb->version = SB_VERSION_2;
0502     else
0503         ic->sb->version = SB_VERSION_1;
0504 }
0505 
0506 static int sb_mac(struct dm_integrity_c *ic, bool wr)
0507 {
0508     SHASH_DESC_ON_STACK(desc, ic->journal_mac);
0509     int r;
0510     unsigned size = crypto_shash_digestsize(ic->journal_mac);
0511 
0512     if (sizeof(struct superblock) + size > 1 << SECTOR_SHIFT) {
0513         dm_integrity_io_error(ic, "digest is too long", -EINVAL);
0514         return -EINVAL;
0515     }
0516 
0517     desc->tfm = ic->journal_mac;
0518 
0519     r = crypto_shash_init(desc);
0520     if (unlikely(r < 0)) {
0521         dm_integrity_io_error(ic, "crypto_shash_init", r);
0522         return r;
0523     }
0524 
0525     r = crypto_shash_update(desc, (__u8 *)ic->sb, (1 << SECTOR_SHIFT) - size);
0526     if (unlikely(r < 0)) {
0527         dm_integrity_io_error(ic, "crypto_shash_update", r);
0528         return r;
0529     }
0530 
0531     if (likely(wr)) {
0532         r = crypto_shash_final(desc, (__u8 *)ic->sb + (1 << SECTOR_SHIFT) - size);
0533         if (unlikely(r < 0)) {
0534             dm_integrity_io_error(ic, "crypto_shash_final", r);
0535             return r;
0536         }
0537     } else {
0538         __u8 result[HASH_MAX_DIGESTSIZE];
0539         r = crypto_shash_final(desc, result);
0540         if (unlikely(r < 0)) {
0541             dm_integrity_io_error(ic, "crypto_shash_final", r);
0542             return r;
0543         }
0544         if (memcmp((__u8 *)ic->sb + (1 << SECTOR_SHIFT) - size, result, size)) {
0545             dm_integrity_io_error(ic, "superblock mac", -EILSEQ);
0546             dm_audit_log_target(DM_MSG_PREFIX, "mac-superblock", ic->ti, 0);
0547             return -EILSEQ;
0548         }
0549     }
0550 
0551     return 0;
0552 }
0553 
0554 static int sync_rw_sb(struct dm_integrity_c *ic, blk_opf_t opf)
0555 {
0556     struct dm_io_request io_req;
0557     struct dm_io_region io_loc;
0558     const enum req_op op = opf & REQ_OP_MASK;
0559     int r;
0560 
0561     io_req.bi_opf = opf;
0562     io_req.mem.type = DM_IO_KMEM;
0563     io_req.mem.ptr.addr = ic->sb;
0564     io_req.notify.fn = NULL;
0565     io_req.client = ic->io;
0566     io_loc.bdev = ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev;
0567     io_loc.sector = ic->start;
0568     io_loc.count = SB_SECTORS;
0569 
0570     if (op == REQ_OP_WRITE) {
0571         sb_set_version(ic);
0572         if (ic->journal_mac && ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) {
0573             r = sb_mac(ic, true);
0574             if (unlikely(r))
0575                 return r;
0576         }
0577     }
0578 
0579     r = dm_io(&io_req, 1, &io_loc, NULL);
0580     if (unlikely(r))
0581         return r;
0582 
0583     if (op == REQ_OP_READ) {
0584         if (ic->mode != 'R' && ic->journal_mac && ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) {
0585             r = sb_mac(ic, false);
0586             if (unlikely(r))
0587                 return r;
0588         }
0589     }
0590 
0591     return 0;
0592 }
0593 
0594 #define BITMAP_OP_TEST_ALL_SET      0
0595 #define BITMAP_OP_TEST_ALL_CLEAR    1
0596 #define BITMAP_OP_SET           2
0597 #define BITMAP_OP_CLEAR         3
0598 
0599 static bool block_bitmap_op(struct dm_integrity_c *ic, struct page_list *bitmap,
0600                 sector_t sector, sector_t n_sectors, int mode)
0601 {
0602     unsigned long bit, end_bit, this_end_bit, page, end_page;
0603     unsigned long *data;
0604 
0605     if (unlikely(((sector | n_sectors) & ((1 << ic->sb->log2_sectors_per_block) - 1)) != 0)) {
0606         DMCRIT("invalid bitmap access (%llx,%llx,%d,%d,%d)",
0607             sector,
0608             n_sectors,
0609             ic->sb->log2_sectors_per_block,
0610             ic->log2_blocks_per_bitmap_bit,
0611             mode);
0612         BUG();
0613     }
0614 
0615     if (unlikely(!n_sectors))
0616         return true;
0617 
0618     bit = sector >> (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit);
0619     end_bit = (sector + n_sectors - 1) >>
0620         (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit);
0621 
0622     page = bit / (PAGE_SIZE * 8);
0623     bit %= PAGE_SIZE * 8;
0624 
0625     end_page = end_bit / (PAGE_SIZE * 8);
0626     end_bit %= PAGE_SIZE * 8;
0627 
0628 repeat:
0629     if (page < end_page) {
0630         this_end_bit = PAGE_SIZE * 8 - 1;
0631     } else {
0632         this_end_bit = end_bit;
0633     }
0634 
0635     data = lowmem_page_address(bitmap[page].page);
0636 
0637     if (mode == BITMAP_OP_TEST_ALL_SET) {
0638         while (bit <= this_end_bit) {
0639             if (!(bit % BITS_PER_LONG) && this_end_bit >= bit + BITS_PER_LONG - 1) {
0640                 do {
0641                     if (data[bit / BITS_PER_LONG] != -1)
0642                         return false;
0643                     bit += BITS_PER_LONG;
0644                 } while (this_end_bit >= bit + BITS_PER_LONG - 1);
0645                 continue;
0646             }
0647             if (!test_bit(bit, data))
0648                 return false;
0649             bit++;
0650         }
0651     } else if (mode == BITMAP_OP_TEST_ALL_CLEAR) {
0652         while (bit <= this_end_bit) {
0653             if (!(bit % BITS_PER_LONG) && this_end_bit >= bit + BITS_PER_LONG - 1) {
0654                 do {
0655                     if (data[bit / BITS_PER_LONG] != 0)
0656                         return false;
0657                     bit += BITS_PER_LONG;
0658                 } while (this_end_bit >= bit + BITS_PER_LONG - 1);
0659                 continue;
0660             }
0661             if (test_bit(bit, data))
0662                 return false;
0663             bit++;
0664         }
0665     } else if (mode == BITMAP_OP_SET) {
0666         while (bit <= this_end_bit) {
0667             if (!(bit % BITS_PER_LONG) && this_end_bit >= bit + BITS_PER_LONG - 1) {
0668                 do {
0669                     data[bit / BITS_PER_LONG] = -1;
0670                     bit += BITS_PER_LONG;
0671                 } while (this_end_bit >= bit + BITS_PER_LONG - 1);
0672                 continue;
0673             }
0674             __set_bit(bit, data);
0675             bit++;
0676         }
0677     } else if (mode == BITMAP_OP_CLEAR) {
0678         if (!bit && this_end_bit == PAGE_SIZE * 8 - 1)
0679             clear_page(data);
0680         else while (bit <= this_end_bit) {
0681             if (!(bit % BITS_PER_LONG) && this_end_bit >= bit + BITS_PER_LONG - 1) {
0682                 do {
0683                     data[bit / BITS_PER_LONG] = 0;
0684                     bit += BITS_PER_LONG;
0685                 } while (this_end_bit >= bit + BITS_PER_LONG - 1);
0686                 continue;
0687             }
0688             __clear_bit(bit, data);
0689             bit++;
0690         }
0691     } else {
0692         BUG();
0693     }
0694 
0695     if (unlikely(page < end_page)) {
0696         bit = 0;
0697         page++;
0698         goto repeat;
0699     }
0700 
0701     return true;
0702 }
0703 
0704 static void block_bitmap_copy(struct dm_integrity_c *ic, struct page_list *dst, struct page_list *src)
0705 {
0706     unsigned n_bitmap_pages = DIV_ROUND_UP(ic->n_bitmap_blocks, PAGE_SIZE / BITMAP_BLOCK_SIZE);
0707     unsigned i;
0708 
0709     for (i = 0; i < n_bitmap_pages; i++) {
0710         unsigned long *dst_data = lowmem_page_address(dst[i].page);
0711         unsigned long *src_data = lowmem_page_address(src[i].page);
0712         copy_page(dst_data, src_data);
0713     }
0714 }
0715 
0716 static struct bitmap_block_status *sector_to_bitmap_block(struct dm_integrity_c *ic, sector_t sector)
0717 {
0718     unsigned bit = sector >> (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit);
0719     unsigned bitmap_block = bit / (BITMAP_BLOCK_SIZE * 8);
0720 
0721     BUG_ON(bitmap_block >= ic->n_bitmap_blocks);
0722     return &ic->bbs[bitmap_block];
0723 }
0724 
0725 static void access_journal_check(struct dm_integrity_c *ic, unsigned section, unsigned offset,
0726                  bool e, const char *function)
0727 {
0728 #if defined(CONFIG_DM_DEBUG) || defined(INTERNAL_VERIFY)
0729     unsigned limit = e ? ic->journal_section_entries : ic->journal_section_sectors;
0730 
0731     if (unlikely(section >= ic->journal_sections) ||
0732         unlikely(offset >= limit)) {
0733         DMCRIT("%s: invalid access at (%u,%u), limit (%u,%u)",
0734                function, section, offset, ic->journal_sections, limit);
0735         BUG();
0736     }
0737 #endif
0738 }
0739 
0740 static void page_list_location(struct dm_integrity_c *ic, unsigned section, unsigned offset,
0741                    unsigned *pl_index, unsigned *pl_offset)
0742 {
0743     unsigned sector;
0744 
0745     access_journal_check(ic, section, offset, false, "page_list_location");
0746 
0747     sector = section * ic->journal_section_sectors + offset;
0748 
0749     *pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT);
0750     *pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1);
0751 }
0752 
0753 static struct journal_sector *access_page_list(struct dm_integrity_c *ic, struct page_list *pl,
0754                            unsigned section, unsigned offset, unsigned *n_sectors)
0755 {
0756     unsigned pl_index, pl_offset;
0757     char *va;
0758 
0759     page_list_location(ic, section, offset, &pl_index, &pl_offset);
0760 
0761     if (n_sectors)
0762         *n_sectors = (PAGE_SIZE - pl_offset) >> SECTOR_SHIFT;
0763 
0764     va = lowmem_page_address(pl[pl_index].page);
0765 
0766     return (struct journal_sector *)(va + pl_offset);
0767 }
0768 
0769 static struct journal_sector *access_journal(struct dm_integrity_c *ic, unsigned section, unsigned offset)
0770 {
0771     return access_page_list(ic, ic->journal, section, offset, NULL);
0772 }
0773 
0774 static struct journal_entry *access_journal_entry(struct dm_integrity_c *ic, unsigned section, unsigned n)
0775 {
0776     unsigned rel_sector, offset;
0777     struct journal_sector *js;
0778 
0779     access_journal_check(ic, section, n, true, "access_journal_entry");
0780 
0781     rel_sector = n % JOURNAL_BLOCK_SECTORS;
0782     offset = n / JOURNAL_BLOCK_SECTORS;
0783 
0784     js = access_journal(ic, section, rel_sector);
0785     return (struct journal_entry *)((char *)js + offset * ic->journal_entry_size);
0786 }
0787 
0788 static struct journal_sector *access_journal_data(struct dm_integrity_c *ic, unsigned section, unsigned n)
0789 {
0790     n <<= ic->sb->log2_sectors_per_block;
0791 
0792     n += JOURNAL_BLOCK_SECTORS;
0793 
0794     access_journal_check(ic, section, n, false, "access_journal_data");
0795 
0796     return access_journal(ic, section, n);
0797 }
0798 
0799 static void section_mac(struct dm_integrity_c *ic, unsigned section, __u8 result[JOURNAL_MAC_SIZE])
0800 {
0801     SHASH_DESC_ON_STACK(desc, ic->journal_mac);
0802     int r;
0803     unsigned j, size;
0804 
0805     desc->tfm = ic->journal_mac;
0806 
0807     r = crypto_shash_init(desc);
0808     if (unlikely(r < 0)) {
0809         dm_integrity_io_error(ic, "crypto_shash_init", r);
0810         goto err;
0811     }
0812 
0813     if (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) {
0814         __le64 section_le;
0815 
0816         r = crypto_shash_update(desc, (__u8 *)&ic->sb->salt, SALT_SIZE);
0817         if (unlikely(r < 0)) {
0818             dm_integrity_io_error(ic, "crypto_shash_update", r);
0819             goto err;
0820         }
0821 
0822         section_le = cpu_to_le64(section);
0823         r = crypto_shash_update(desc, (__u8 *)&section_le, sizeof section_le);
0824         if (unlikely(r < 0)) {
0825             dm_integrity_io_error(ic, "crypto_shash_update", r);
0826             goto err;
0827         }
0828     }
0829 
0830     for (j = 0; j < ic->journal_section_entries; j++) {
0831         struct journal_entry *je = access_journal_entry(ic, section, j);
0832         r = crypto_shash_update(desc, (__u8 *)&je->u.sector, sizeof je->u.sector);
0833         if (unlikely(r < 0)) {
0834             dm_integrity_io_error(ic, "crypto_shash_update", r);
0835             goto err;
0836         }
0837     }
0838 
0839     size = crypto_shash_digestsize(ic->journal_mac);
0840 
0841     if (likely(size <= JOURNAL_MAC_SIZE)) {
0842         r = crypto_shash_final(desc, result);
0843         if (unlikely(r < 0)) {
0844             dm_integrity_io_error(ic, "crypto_shash_final", r);
0845             goto err;
0846         }
0847         memset(result + size, 0, JOURNAL_MAC_SIZE - size);
0848     } else {
0849         __u8 digest[HASH_MAX_DIGESTSIZE];
0850 
0851         if (WARN_ON(size > sizeof(digest))) {
0852             dm_integrity_io_error(ic, "digest_size", -EINVAL);
0853             goto err;
0854         }
0855         r = crypto_shash_final(desc, digest);
0856         if (unlikely(r < 0)) {
0857             dm_integrity_io_error(ic, "crypto_shash_final", r);
0858             goto err;
0859         }
0860         memcpy(result, digest, JOURNAL_MAC_SIZE);
0861     }
0862 
0863     return;
0864 err:
0865     memset(result, 0, JOURNAL_MAC_SIZE);
0866 }
0867 
0868 static void rw_section_mac(struct dm_integrity_c *ic, unsigned section, bool wr)
0869 {
0870     __u8 result[JOURNAL_MAC_SIZE];
0871     unsigned j;
0872 
0873     if (!ic->journal_mac)
0874         return;
0875 
0876     section_mac(ic, section, result);
0877 
0878     for (j = 0; j < JOURNAL_BLOCK_SECTORS; j++) {
0879         struct journal_sector *js = access_journal(ic, section, j);
0880 
0881         if (likely(wr))
0882             memcpy(&js->mac, result + (j * JOURNAL_MAC_PER_SECTOR), JOURNAL_MAC_PER_SECTOR);
0883         else {
0884             if (memcmp(&js->mac, result + (j * JOURNAL_MAC_PER_SECTOR), JOURNAL_MAC_PER_SECTOR)) {
0885                 dm_integrity_io_error(ic, "journal mac", -EILSEQ);
0886                 dm_audit_log_target(DM_MSG_PREFIX, "mac-journal", ic->ti, 0);
0887             }
0888         }
0889     }
0890 }
0891 
0892 static void complete_journal_op(void *context)
0893 {
0894     struct journal_completion *comp = context;
0895     BUG_ON(!atomic_read(&comp->in_flight));
0896     if (likely(atomic_dec_and_test(&comp->in_flight)))
0897         complete(&comp->comp);
0898 }
0899 
0900 static void xor_journal(struct dm_integrity_c *ic, bool encrypt, unsigned section,
0901             unsigned n_sections, struct journal_completion *comp)
0902 {
0903     struct async_submit_ctl submit;
0904     size_t n_bytes = (size_t)(n_sections * ic->journal_section_sectors) << SECTOR_SHIFT;
0905     unsigned pl_index, pl_offset, section_index;
0906     struct page_list *source_pl, *target_pl;
0907 
0908     if (likely(encrypt)) {
0909         source_pl = ic->journal;
0910         target_pl = ic->journal_io;
0911     } else {
0912         source_pl = ic->journal_io;
0913         target_pl = ic->journal;
0914     }
0915 
0916     page_list_location(ic, section, 0, &pl_index, &pl_offset);
0917 
0918     atomic_add(roundup(pl_offset + n_bytes, PAGE_SIZE) >> PAGE_SHIFT, &comp->in_flight);
0919 
0920     init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL, complete_journal_op, comp, NULL);
0921 
0922     section_index = pl_index;
0923 
0924     do {
0925         size_t this_step;
0926         struct page *src_pages[2];
0927         struct page *dst_page;
0928 
0929         while (unlikely(pl_index == section_index)) {
0930             unsigned dummy;
0931             if (likely(encrypt))
0932                 rw_section_mac(ic, section, true);
0933             section++;
0934             n_sections--;
0935             if (!n_sections)
0936                 break;
0937             page_list_location(ic, section, 0, &section_index, &dummy);
0938         }
0939 
0940         this_step = min(n_bytes, (size_t)PAGE_SIZE - pl_offset);
0941         dst_page = target_pl[pl_index].page;
0942         src_pages[0] = source_pl[pl_index].page;
0943         src_pages[1] = ic->journal_xor[pl_index].page;
0944 
0945         async_xor(dst_page, src_pages, pl_offset, 2, this_step, &submit);
0946 
0947         pl_index++;
0948         pl_offset = 0;
0949         n_bytes -= this_step;
0950     } while (n_bytes);
0951 
0952     BUG_ON(n_sections);
0953 
0954     async_tx_issue_pending_all();
0955 }
0956 
0957 static void complete_journal_encrypt(struct crypto_async_request *req, int err)
0958 {
0959     struct journal_completion *comp = req->data;
0960     if (unlikely(err)) {
0961         if (likely(err == -EINPROGRESS)) {
0962             complete(&comp->ic->crypto_backoff);
0963             return;
0964         }
0965         dm_integrity_io_error(comp->ic, "asynchronous encrypt", err);
0966     }
0967     complete_journal_op(comp);
0968 }
0969 
0970 static bool do_crypt(bool encrypt, struct skcipher_request *req, struct journal_completion *comp)
0971 {
0972     int r;
0973     skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
0974                       complete_journal_encrypt, comp);
0975     if (likely(encrypt))
0976         r = crypto_skcipher_encrypt(req);
0977     else
0978         r = crypto_skcipher_decrypt(req);
0979     if (likely(!r))
0980         return false;
0981     if (likely(r == -EINPROGRESS))
0982         return true;
0983     if (likely(r == -EBUSY)) {
0984         wait_for_completion(&comp->ic->crypto_backoff);
0985         reinit_completion(&comp->ic->crypto_backoff);
0986         return true;
0987     }
0988     dm_integrity_io_error(comp->ic, "encrypt", r);
0989     return false;
0990 }
0991 
0992 static void crypt_journal(struct dm_integrity_c *ic, bool encrypt, unsigned section,
0993               unsigned n_sections, struct journal_completion *comp)
0994 {
0995     struct scatterlist **source_sg;
0996     struct scatterlist **target_sg;
0997 
0998     atomic_add(2, &comp->in_flight);
0999 
1000     if (likely(encrypt)) {
1001         source_sg = ic->journal_scatterlist;
1002         target_sg = ic->journal_io_scatterlist;
1003     } else {
1004         source_sg = ic->journal_io_scatterlist;
1005         target_sg = ic->journal_scatterlist;
1006     }
1007 
1008     do {
1009         struct skcipher_request *req;
1010         unsigned ivsize;
1011         char *iv;
1012 
1013         if (likely(encrypt))
1014             rw_section_mac(ic, section, true);
1015 
1016         req = ic->sk_requests[section];
1017         ivsize = crypto_skcipher_ivsize(ic->journal_crypt);
1018         iv = req->iv;
1019 
1020         memcpy(iv, iv + ivsize, ivsize);
1021 
1022         req->src = source_sg[section];
1023         req->dst = target_sg[section];
1024 
1025         if (unlikely(do_crypt(encrypt, req, comp)))
1026             atomic_inc(&comp->in_flight);
1027 
1028         section++;
1029         n_sections--;
1030     } while (n_sections);
1031 
1032     atomic_dec(&comp->in_flight);
1033     complete_journal_op(comp);
1034 }
1035 
1036 static void encrypt_journal(struct dm_integrity_c *ic, bool encrypt, unsigned section,
1037                 unsigned n_sections, struct journal_completion *comp)
1038 {
1039     if (ic->journal_xor)
1040         return xor_journal(ic, encrypt, section, n_sections, comp);
1041     else
1042         return crypt_journal(ic, encrypt, section, n_sections, comp);
1043 }
1044 
1045 static void complete_journal_io(unsigned long error, void *context)
1046 {
1047     struct journal_completion *comp = context;
1048     if (unlikely(error != 0))
1049         dm_integrity_io_error(comp->ic, "writing journal", -EIO);
1050     complete_journal_op(comp);
1051 }
1052 
1053 static void rw_journal_sectors(struct dm_integrity_c *ic, blk_opf_t opf,
1054                    unsigned sector, unsigned n_sectors,
1055                    struct journal_completion *comp)
1056 {
1057     struct dm_io_request io_req;
1058     struct dm_io_region io_loc;
1059     unsigned pl_index, pl_offset;
1060     int r;
1061 
1062     if (unlikely(dm_integrity_failed(ic))) {
1063         if (comp)
1064             complete_journal_io(-1UL, comp);
1065         return;
1066     }
1067 
1068     pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT);
1069     pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1);
1070 
1071     io_req.bi_opf = opf;
1072     io_req.mem.type = DM_IO_PAGE_LIST;
1073     if (ic->journal_io)
1074         io_req.mem.ptr.pl = &ic->journal_io[pl_index];
1075     else
1076         io_req.mem.ptr.pl = &ic->journal[pl_index];
1077     io_req.mem.offset = pl_offset;
1078     if (likely(comp != NULL)) {
1079         io_req.notify.fn = complete_journal_io;
1080         io_req.notify.context = comp;
1081     } else {
1082         io_req.notify.fn = NULL;
1083     }
1084     io_req.client = ic->io;
1085     io_loc.bdev = ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev;
1086     io_loc.sector = ic->start + SB_SECTORS + sector;
1087     io_loc.count = n_sectors;
1088 
1089     r = dm_io(&io_req, 1, &io_loc, NULL);
1090     if (unlikely(r)) {
1091         dm_integrity_io_error(ic, (opf & REQ_OP_MASK) == REQ_OP_READ ?
1092                       "reading journal" : "writing journal", r);
1093         if (comp) {
1094             WARN_ONCE(1, "asynchronous dm_io failed: %d", r);
1095             complete_journal_io(-1UL, comp);
1096         }
1097     }
1098 }
1099 
1100 static void rw_journal(struct dm_integrity_c *ic, blk_opf_t opf,
1101                unsigned section, unsigned n_sections,
1102                struct journal_completion *comp)
1103 {
1104     unsigned sector, n_sectors;
1105 
1106     sector = section * ic->journal_section_sectors;
1107     n_sectors = n_sections * ic->journal_section_sectors;
1108 
1109     rw_journal_sectors(ic, opf, sector, n_sectors, comp);
1110 }
1111 
1112 static void write_journal(struct dm_integrity_c *ic, unsigned commit_start, unsigned commit_sections)
1113 {
1114     struct journal_completion io_comp;
1115     struct journal_completion crypt_comp_1;
1116     struct journal_completion crypt_comp_2;
1117     unsigned i;
1118 
1119     io_comp.ic = ic;
1120     init_completion(&io_comp.comp);
1121 
1122     if (commit_start + commit_sections <= ic->journal_sections) {
1123         io_comp.in_flight = (atomic_t)ATOMIC_INIT(1);
1124         if (ic->journal_io) {
1125             crypt_comp_1.ic = ic;
1126             init_completion(&crypt_comp_1.comp);
1127             crypt_comp_1.in_flight = (atomic_t)ATOMIC_INIT(0);
1128             encrypt_journal(ic, true, commit_start, commit_sections, &crypt_comp_1);
1129             wait_for_completion_io(&crypt_comp_1.comp);
1130         } else {
1131             for (i = 0; i < commit_sections; i++)
1132                 rw_section_mac(ic, commit_start + i, true);
1133         }
1134         rw_journal(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, commit_start,
1135                commit_sections, &io_comp);
1136     } else {
1137         unsigned to_end;
1138         io_comp.in_flight = (atomic_t)ATOMIC_INIT(2);
1139         to_end = ic->journal_sections - commit_start;
1140         if (ic->journal_io) {
1141             crypt_comp_1.ic = ic;
1142             init_completion(&crypt_comp_1.comp);
1143             crypt_comp_1.in_flight = (atomic_t)ATOMIC_INIT(0);
1144             encrypt_journal(ic, true, commit_start, to_end, &crypt_comp_1);
1145             if (try_wait_for_completion(&crypt_comp_1.comp)) {
1146                 rw_journal(ic, REQ_OP_WRITE | REQ_FUA,
1147                        commit_start, to_end, &io_comp);
1148                 reinit_completion(&crypt_comp_1.comp);
1149                 crypt_comp_1.in_flight = (atomic_t)ATOMIC_INIT(0);
1150                 encrypt_journal(ic, true, 0, commit_sections - to_end, &crypt_comp_1);
1151                 wait_for_completion_io(&crypt_comp_1.comp);
1152             } else {
1153                 crypt_comp_2.ic = ic;
1154                 init_completion(&crypt_comp_2.comp);
1155                 crypt_comp_2.in_flight = (atomic_t)ATOMIC_INIT(0);
1156                 encrypt_journal(ic, true, 0, commit_sections - to_end, &crypt_comp_2);
1157                 wait_for_completion_io(&crypt_comp_1.comp);
1158                 rw_journal(ic, REQ_OP_WRITE | REQ_FUA, commit_start, to_end, &io_comp);
1159                 wait_for_completion_io(&crypt_comp_2.comp);
1160             }
1161         } else {
1162             for (i = 0; i < to_end; i++)
1163                 rw_section_mac(ic, commit_start + i, true);
1164             rw_journal(ic, REQ_OP_WRITE | REQ_FUA, commit_start, to_end, &io_comp);
1165             for (i = 0; i < commit_sections - to_end; i++)
1166                 rw_section_mac(ic, i, true);
1167         }
1168         rw_journal(ic, REQ_OP_WRITE | REQ_FUA, 0, commit_sections - to_end, &io_comp);
1169     }
1170 
1171     wait_for_completion_io(&io_comp.comp);
1172 }
1173 
1174 static void copy_from_journal(struct dm_integrity_c *ic, unsigned section, unsigned offset,
1175                   unsigned n_sectors, sector_t target, io_notify_fn fn, void *data)
1176 {
1177     struct dm_io_request io_req;
1178     struct dm_io_region io_loc;
1179     int r;
1180     unsigned sector, pl_index, pl_offset;
1181 
1182     BUG_ON((target | n_sectors | offset) & (unsigned)(ic->sectors_per_block - 1));
1183 
1184     if (unlikely(dm_integrity_failed(ic))) {
1185         fn(-1UL, data);
1186         return;
1187     }
1188 
1189     sector = section * ic->journal_section_sectors + JOURNAL_BLOCK_SECTORS + offset;
1190 
1191     pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT);
1192     pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1);
1193 
1194     io_req.bi_opf = REQ_OP_WRITE;
1195     io_req.mem.type = DM_IO_PAGE_LIST;
1196     io_req.mem.ptr.pl = &ic->journal[pl_index];
1197     io_req.mem.offset = pl_offset;
1198     io_req.notify.fn = fn;
1199     io_req.notify.context = data;
1200     io_req.client = ic->io;
1201     io_loc.bdev = ic->dev->bdev;
1202     io_loc.sector = target;
1203     io_loc.count = n_sectors;
1204 
1205     r = dm_io(&io_req, 1, &io_loc, NULL);
1206     if (unlikely(r)) {
1207         WARN_ONCE(1, "asynchronous dm_io failed: %d", r);
1208         fn(-1UL, data);
1209     }
1210 }
1211 
1212 static bool ranges_overlap(struct dm_integrity_range *range1, struct dm_integrity_range *range2)
1213 {
1214     return range1->logical_sector < range2->logical_sector + range2->n_sectors &&
1215            range1->logical_sector + range1->n_sectors > range2->logical_sector;
1216 }
1217 
1218 static bool add_new_range(struct dm_integrity_c *ic, struct dm_integrity_range *new_range, bool check_waiting)
1219 {
1220     struct rb_node **n = &ic->in_progress.rb_node;
1221     struct rb_node *parent;
1222 
1223     BUG_ON((new_range->logical_sector | new_range->n_sectors) & (unsigned)(ic->sectors_per_block - 1));
1224 
1225     if (likely(check_waiting)) {
1226         struct dm_integrity_range *range;
1227         list_for_each_entry(range, &ic->wait_list, wait_entry) {
1228             if (unlikely(ranges_overlap(range, new_range)))
1229                 return false;
1230         }
1231     }
1232 
1233     parent = NULL;
1234 
1235     while (*n) {
1236         struct dm_integrity_range *range = container_of(*n, struct dm_integrity_range, node);
1237 
1238         parent = *n;
1239         if (new_range->logical_sector + new_range->n_sectors <= range->logical_sector) {
1240             n = &range->node.rb_left;
1241         } else if (new_range->logical_sector >= range->logical_sector + range->n_sectors) {
1242             n = &range->node.rb_right;
1243         } else {
1244             return false;
1245         }
1246     }
1247 
1248     rb_link_node(&new_range->node, parent, n);
1249     rb_insert_color(&new_range->node, &ic->in_progress);
1250 
1251     return true;
1252 }
1253 
1254 static void remove_range_unlocked(struct dm_integrity_c *ic, struct dm_integrity_range *range)
1255 {
1256     rb_erase(&range->node, &ic->in_progress);
1257     while (unlikely(!list_empty(&ic->wait_list))) {
1258         struct dm_integrity_range *last_range =
1259             list_first_entry(&ic->wait_list, struct dm_integrity_range, wait_entry);
1260         struct task_struct *last_range_task;
1261         last_range_task = last_range->task;
1262         list_del(&last_range->wait_entry);
1263         if (!add_new_range(ic, last_range, false)) {
1264             last_range->task = last_range_task;
1265             list_add(&last_range->wait_entry, &ic->wait_list);
1266             break;
1267         }
1268         last_range->waiting = false;
1269         wake_up_process(last_range_task);
1270     }
1271 }
1272 
1273 static void remove_range(struct dm_integrity_c *ic, struct dm_integrity_range *range)
1274 {
1275     unsigned long flags;
1276 
1277     spin_lock_irqsave(&ic->endio_wait.lock, flags);
1278     remove_range_unlocked(ic, range);
1279     spin_unlock_irqrestore(&ic->endio_wait.lock, flags);
1280 }
1281 
1282 static void wait_and_add_new_range(struct dm_integrity_c *ic, struct dm_integrity_range *new_range)
1283 {
1284     new_range->waiting = true;
1285     list_add_tail(&new_range->wait_entry, &ic->wait_list);
1286     new_range->task = current;
1287     do {
1288         __set_current_state(TASK_UNINTERRUPTIBLE);
1289         spin_unlock_irq(&ic->endio_wait.lock);
1290         io_schedule();
1291         spin_lock_irq(&ic->endio_wait.lock);
1292     } while (unlikely(new_range->waiting));
1293 }
1294 
1295 static void add_new_range_and_wait(struct dm_integrity_c *ic, struct dm_integrity_range *new_range)
1296 {
1297     if (unlikely(!add_new_range(ic, new_range, true)))
1298         wait_and_add_new_range(ic, new_range);
1299 }
1300 
1301 static void init_journal_node(struct journal_node *node)
1302 {
1303     RB_CLEAR_NODE(&node->node);
1304     node->sector = (sector_t)-1;
1305 }
1306 
1307 static void add_journal_node(struct dm_integrity_c *ic, struct journal_node *node, sector_t sector)
1308 {
1309     struct rb_node **link;
1310     struct rb_node *parent;
1311 
1312     node->sector = sector;
1313     BUG_ON(!RB_EMPTY_NODE(&node->node));
1314 
1315     link = &ic->journal_tree_root.rb_node;
1316     parent = NULL;
1317 
1318     while (*link) {
1319         struct journal_node *j;
1320         parent = *link;
1321         j = container_of(parent, struct journal_node, node);
1322         if (sector < j->sector)
1323             link = &j->node.rb_left;
1324         else
1325             link = &j->node.rb_right;
1326     }
1327 
1328     rb_link_node(&node->node, parent, link);
1329     rb_insert_color(&node->node, &ic->journal_tree_root);
1330 }
1331 
1332 static void remove_journal_node(struct dm_integrity_c *ic, struct journal_node *node)
1333 {
1334     BUG_ON(RB_EMPTY_NODE(&node->node));
1335     rb_erase(&node->node, &ic->journal_tree_root);
1336     init_journal_node(node);
1337 }
1338 
1339 #define NOT_FOUND   (-1U)
1340 
1341 static unsigned find_journal_node(struct dm_integrity_c *ic, sector_t sector, sector_t *next_sector)
1342 {
1343     struct rb_node *n = ic->journal_tree_root.rb_node;
1344     unsigned found = NOT_FOUND;
1345     *next_sector = (sector_t)-1;
1346     while (n) {
1347         struct journal_node *j = container_of(n, struct journal_node, node);
1348         if (sector == j->sector) {
1349             found = j - ic->journal_tree;
1350         }
1351         if (sector < j->sector) {
1352             *next_sector = j->sector;
1353             n = j->node.rb_left;
1354         } else {
1355             n = j->node.rb_right;
1356         }
1357     }
1358 
1359     return found;
1360 }
1361 
1362 static bool test_journal_node(struct dm_integrity_c *ic, unsigned pos, sector_t sector)
1363 {
1364     struct journal_node *node, *next_node;
1365     struct rb_node *next;
1366 
1367     if (unlikely(pos >= ic->journal_entries))
1368         return false;
1369     node = &ic->journal_tree[pos];
1370     if (unlikely(RB_EMPTY_NODE(&node->node)))
1371         return false;
1372     if (unlikely(node->sector != sector))
1373         return false;
1374 
1375     next = rb_next(&node->node);
1376     if (unlikely(!next))
1377         return true;
1378 
1379     next_node = container_of(next, struct journal_node, node);
1380     return next_node->sector != sector;
1381 }
1382 
1383 static bool find_newer_committed_node(struct dm_integrity_c *ic, struct journal_node *node)
1384 {
1385     struct rb_node *next;
1386     struct journal_node *next_node;
1387     unsigned next_section;
1388 
1389     BUG_ON(RB_EMPTY_NODE(&node->node));
1390 
1391     next = rb_next(&node->node);
1392     if (unlikely(!next))
1393         return false;
1394 
1395     next_node = container_of(next, struct journal_node, node);
1396 
1397     if (next_node->sector != node->sector)
1398         return false;
1399 
1400     next_section = (unsigned)(next_node - ic->journal_tree) / ic->journal_section_entries;
1401     if (next_section >= ic->committed_section &&
1402         next_section < ic->committed_section + ic->n_committed_sections)
1403         return true;
1404     if (next_section + ic->journal_sections < ic->committed_section + ic->n_committed_sections)
1405         return true;
1406 
1407     return false;
1408 }
1409 
1410 #define TAG_READ    0
1411 #define TAG_WRITE   1
1412 #define TAG_CMP     2
1413 
1414 static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, sector_t *metadata_block,
1415                    unsigned *metadata_offset, unsigned total_size, int op)
1416 {
1417 #define MAY_BE_FILLER       1
1418 #define MAY_BE_HASH     2
1419     unsigned hash_offset = 0;
1420     unsigned may_be = MAY_BE_HASH | (ic->discard ? MAY_BE_FILLER : 0);
1421 
1422     do {
1423         unsigned char *data, *dp;
1424         struct dm_buffer *b;
1425         unsigned to_copy;
1426         int r;
1427 
1428         r = dm_integrity_failed(ic);
1429         if (unlikely(r))
1430             return r;
1431 
1432         data = dm_bufio_read(ic->bufio, *metadata_block, &b);
1433         if (IS_ERR(data))
1434             return PTR_ERR(data);
1435 
1436         to_copy = min((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - *metadata_offset, total_size);
1437         dp = data + *metadata_offset;
1438         if (op == TAG_READ) {
1439             memcpy(tag, dp, to_copy);
1440         } else if (op == TAG_WRITE) {
1441             if (memcmp(dp, tag, to_copy)) {
1442                 memcpy(dp, tag, to_copy);
1443                 dm_bufio_mark_partial_buffer_dirty(b, *metadata_offset, *metadata_offset + to_copy);
1444             }
1445         } else {
1446             /* e.g.: op == TAG_CMP */
1447 
1448             if (likely(is_power_of_2(ic->tag_size))) {
1449                 if (unlikely(memcmp(dp, tag, to_copy)))
1450                     if (unlikely(!ic->discard) ||
1451                         unlikely(memchr_inv(dp, DISCARD_FILLER, to_copy) != NULL)) {
1452                         goto thorough_test;
1453                 }
1454             } else {
1455                 unsigned i, ts;
1456 thorough_test:
1457                 ts = total_size;
1458 
1459                 for (i = 0; i < to_copy; i++, ts--) {
1460                     if (unlikely(dp[i] != tag[i]))
1461                         may_be &= ~MAY_BE_HASH;
1462                     if (likely(dp[i] != DISCARD_FILLER))
1463                         may_be &= ~MAY_BE_FILLER;
1464                     hash_offset++;
1465                     if (unlikely(hash_offset == ic->tag_size)) {
1466                         if (unlikely(!may_be)) {
1467                             dm_bufio_release(b);
1468                             return ts;
1469                         }
1470                         hash_offset = 0;
1471                         may_be = MAY_BE_HASH | (ic->discard ? MAY_BE_FILLER : 0);
1472                     }
1473                 }
1474             }
1475         }
1476         dm_bufio_release(b);
1477 
1478         tag += to_copy;
1479         *metadata_offset += to_copy;
1480         if (unlikely(*metadata_offset == 1U << SECTOR_SHIFT << ic->log2_buffer_sectors)) {
1481             (*metadata_block)++;
1482             *metadata_offset = 0;
1483         }
1484 
1485         if (unlikely(!is_power_of_2(ic->tag_size))) {
1486             hash_offset = (hash_offset + to_copy) % ic->tag_size;
1487         }
1488 
1489         total_size -= to_copy;
1490     } while (unlikely(total_size));
1491 
1492     return 0;
1493 #undef MAY_BE_FILLER
1494 #undef MAY_BE_HASH
1495 }
1496 
1497 struct flush_request {
1498     struct dm_io_request io_req;
1499     struct dm_io_region io_reg;
1500     struct dm_integrity_c *ic;
1501     struct completion comp;
1502 };
1503 
1504 static void flush_notify(unsigned long error, void *fr_)
1505 {
1506     struct flush_request *fr = fr_;
1507     if (unlikely(error != 0))
1508         dm_integrity_io_error(fr->ic, "flushing disk cache", -EIO);
1509     complete(&fr->comp);
1510 }
1511 
1512 static void dm_integrity_flush_buffers(struct dm_integrity_c *ic, bool flush_data)
1513 {
1514     int r;
1515 
1516     struct flush_request fr;
1517 
1518     if (!ic->meta_dev)
1519         flush_data = false;
1520     if (flush_data) {
1521         fr.io_req.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC,
1522         fr.io_req.mem.type = DM_IO_KMEM,
1523         fr.io_req.mem.ptr.addr = NULL,
1524         fr.io_req.notify.fn = flush_notify,
1525         fr.io_req.notify.context = &fr;
1526         fr.io_req.client = dm_bufio_get_dm_io_client(ic->bufio),
1527         fr.io_reg.bdev = ic->dev->bdev,
1528         fr.io_reg.sector = 0,
1529         fr.io_reg.count = 0,
1530         fr.ic = ic;
1531         init_completion(&fr.comp);
1532         r = dm_io(&fr.io_req, 1, &fr.io_reg, NULL);
1533         BUG_ON(r);
1534     }
1535 
1536     r = dm_bufio_write_dirty_buffers(ic->bufio);
1537     if (unlikely(r))
1538         dm_integrity_io_error(ic, "writing tags", r);
1539 
1540     if (flush_data)
1541         wait_for_completion(&fr.comp);
1542 }
1543 
1544 static void sleep_on_endio_wait(struct dm_integrity_c *ic)
1545 {
1546     DECLARE_WAITQUEUE(wait, current);
1547     __add_wait_queue(&ic->endio_wait, &wait);
1548     __set_current_state(TASK_UNINTERRUPTIBLE);
1549     spin_unlock_irq(&ic->endio_wait.lock);
1550     io_schedule();
1551     spin_lock_irq(&ic->endio_wait.lock);
1552     __remove_wait_queue(&ic->endio_wait, &wait);
1553 }
1554 
1555 static void autocommit_fn(struct timer_list *t)
1556 {
1557     struct dm_integrity_c *ic = from_timer(ic, t, autocommit_timer);
1558 
1559     if (likely(!dm_integrity_failed(ic)))
1560         queue_work(ic->commit_wq, &ic->commit_work);
1561 }
1562 
1563 static void schedule_autocommit(struct dm_integrity_c *ic)
1564 {
1565     if (!timer_pending(&ic->autocommit_timer))
1566         mod_timer(&ic->autocommit_timer, jiffies + ic->autocommit_jiffies);
1567 }
1568 
1569 static void submit_flush_bio(struct dm_integrity_c *ic, struct dm_integrity_io *dio)
1570 {
1571     struct bio *bio;
1572     unsigned long flags;
1573 
1574     spin_lock_irqsave(&ic->endio_wait.lock, flags);
1575     bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
1576     bio_list_add(&ic->flush_bio_list, bio);
1577     spin_unlock_irqrestore(&ic->endio_wait.lock, flags);
1578 
1579     queue_work(ic->commit_wq, &ic->commit_work);
1580 }
1581 
1582 static void do_endio(struct dm_integrity_c *ic, struct bio *bio)
1583 {
1584     int r = dm_integrity_failed(ic);
1585     if (unlikely(r) && !bio->bi_status)
1586         bio->bi_status = errno_to_blk_status(r);
1587     if (unlikely(ic->synchronous_mode) && bio_op(bio) == REQ_OP_WRITE) {
1588         unsigned long flags;
1589         spin_lock_irqsave(&ic->endio_wait.lock, flags);
1590         bio_list_add(&ic->synchronous_bios, bio);
1591         queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, 0);
1592         spin_unlock_irqrestore(&ic->endio_wait.lock, flags);
1593         return;
1594     }
1595     bio_endio(bio);
1596 }
1597 
1598 static void do_endio_flush(struct dm_integrity_c *ic, struct dm_integrity_io *dio)
1599 {
1600     struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
1601 
1602     if (unlikely(dio->fua) && likely(!bio->bi_status) && likely(!dm_integrity_failed(ic)))
1603         submit_flush_bio(ic, dio);
1604     else
1605         do_endio(ic, bio);
1606 }
1607 
1608 static void dec_in_flight(struct dm_integrity_io *dio)
1609 {
1610     if (atomic_dec_and_test(&dio->in_flight)) {
1611         struct dm_integrity_c *ic = dio->ic;
1612         struct bio *bio;
1613 
1614         remove_range(ic, &dio->range);
1615 
1616         if (dio->op == REQ_OP_WRITE || unlikely(dio->op == REQ_OP_DISCARD))
1617             schedule_autocommit(ic);
1618 
1619         bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
1620 
1621         if (unlikely(dio->bi_status) && !bio->bi_status)
1622             bio->bi_status = dio->bi_status;
1623         if (likely(!bio->bi_status) && unlikely(bio_sectors(bio) != dio->range.n_sectors)) {
1624             dio->range.logical_sector += dio->range.n_sectors;
1625             bio_advance(bio, dio->range.n_sectors << SECTOR_SHIFT);
1626             INIT_WORK(&dio->work, integrity_bio_wait);
1627             queue_work(ic->offload_wq, &dio->work);
1628             return;
1629         }
1630         do_endio_flush(ic, dio);
1631     }
1632 }
1633 
1634 static void integrity_end_io(struct bio *bio)
1635 {
1636     struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io));
1637 
1638     dm_bio_restore(&dio->bio_details, bio);
1639     if (bio->bi_integrity)
1640         bio->bi_opf |= REQ_INTEGRITY;
1641 
1642     if (dio->completion)
1643         complete(dio->completion);
1644 
1645     dec_in_flight(dio);
1646 }
1647 
1648 static void integrity_sector_checksum(struct dm_integrity_c *ic, sector_t sector,
1649                       const char *data, char *result)
1650 {
1651     __le64 sector_le = cpu_to_le64(sector);
1652     SHASH_DESC_ON_STACK(req, ic->internal_hash);
1653     int r;
1654     unsigned digest_size;
1655 
1656     req->tfm = ic->internal_hash;
1657 
1658     r = crypto_shash_init(req);
1659     if (unlikely(r < 0)) {
1660         dm_integrity_io_error(ic, "crypto_shash_init", r);
1661         goto failed;
1662     }
1663 
1664     if (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) {
1665         r = crypto_shash_update(req, (__u8 *)&ic->sb->salt, SALT_SIZE);
1666         if (unlikely(r < 0)) {
1667             dm_integrity_io_error(ic, "crypto_shash_update", r);
1668             goto failed;
1669         }
1670     }
1671 
1672     r = crypto_shash_update(req, (const __u8 *)&sector_le, sizeof sector_le);
1673     if (unlikely(r < 0)) {
1674         dm_integrity_io_error(ic, "crypto_shash_update", r);
1675         goto failed;
1676     }
1677 
1678     r = crypto_shash_update(req, data, ic->sectors_per_block << SECTOR_SHIFT);
1679     if (unlikely(r < 0)) {
1680         dm_integrity_io_error(ic, "crypto_shash_update", r);
1681         goto failed;
1682     }
1683 
1684     r = crypto_shash_final(req, result);
1685     if (unlikely(r < 0)) {
1686         dm_integrity_io_error(ic, "crypto_shash_final", r);
1687         goto failed;
1688     }
1689 
1690     digest_size = crypto_shash_digestsize(ic->internal_hash);
1691     if (unlikely(digest_size < ic->tag_size))
1692         memset(result + digest_size, 0, ic->tag_size - digest_size);
1693 
1694     return;
1695 
1696 failed:
1697     /* this shouldn't happen anyway, the hash functions have no reason to fail */
1698     get_random_bytes(result, ic->tag_size);
1699 }
1700 
1701 static void integrity_metadata(struct work_struct *w)
1702 {
1703     struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work);
1704     struct dm_integrity_c *ic = dio->ic;
1705 
1706     int r;
1707 
1708     if (ic->internal_hash) {
1709         struct bvec_iter iter;
1710         struct bio_vec bv;
1711         unsigned digest_size = crypto_shash_digestsize(ic->internal_hash);
1712         struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
1713         char *checksums;
1714         unsigned extra_space = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0;
1715         char checksums_onstack[max((size_t)HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)];
1716         sector_t sector;
1717         unsigned sectors_to_process;
1718 
1719         if (unlikely(ic->mode == 'R'))
1720             goto skip_io;
1721 
1722         if (likely(dio->op != REQ_OP_DISCARD))
1723             checksums = kmalloc((PAGE_SIZE >> SECTOR_SHIFT >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
1724                         GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN);
1725         else
1726             checksums = kmalloc(PAGE_SIZE, GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN);
1727         if (!checksums) {
1728             checksums = checksums_onstack;
1729             if (WARN_ON(extra_space &&
1730                     digest_size > sizeof(checksums_onstack))) {
1731                 r = -EINVAL;
1732                 goto error;
1733             }
1734         }
1735 
1736         if (unlikely(dio->op == REQ_OP_DISCARD)) {
1737             sector_t bi_sector = dio->bio_details.bi_iter.bi_sector;
1738             unsigned bi_size = dio->bio_details.bi_iter.bi_size;
1739             unsigned max_size = likely(checksums != checksums_onstack) ? PAGE_SIZE : HASH_MAX_DIGESTSIZE;
1740             unsigned max_blocks = max_size / ic->tag_size;
1741             memset(checksums, DISCARD_FILLER, max_size);
1742 
1743             while (bi_size) {
1744                 unsigned this_step_blocks = bi_size >> (SECTOR_SHIFT + ic->sb->log2_sectors_per_block);
1745                 this_step_blocks = min(this_step_blocks, max_blocks);
1746                 r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
1747                             this_step_blocks * ic->tag_size, TAG_WRITE);
1748                 if (unlikely(r)) {
1749                     if (likely(checksums != checksums_onstack))
1750                         kfree(checksums);
1751                     goto error;
1752                 }
1753 
1754                 /*if (bi_size < this_step_blocks << (SECTOR_SHIFT + ic->sb->log2_sectors_per_block)) {
1755                     printk("BUGG: bi_sector: %llx, bi_size: %u\n", bi_sector, bi_size);
1756                     printk("BUGG: this_step_blocks: %u\n", this_step_blocks);
1757                     BUG();
1758                 }*/
1759                 bi_size -= this_step_blocks << (SECTOR_SHIFT + ic->sb->log2_sectors_per_block);
1760                 bi_sector += this_step_blocks << ic->sb->log2_sectors_per_block;
1761             }
1762 
1763             if (likely(checksums != checksums_onstack))
1764                 kfree(checksums);
1765             goto skip_io;
1766         }
1767 
1768         sector = dio->range.logical_sector;
1769         sectors_to_process = dio->range.n_sectors;
1770 
1771         __bio_for_each_segment(bv, bio, iter, dio->bio_details.bi_iter) {
1772             unsigned pos;
1773             char *mem, *checksums_ptr;
1774 
1775 again:
1776             mem = bvec_kmap_local(&bv);
1777             pos = 0;
1778             checksums_ptr = checksums;
1779             do {
1780                 integrity_sector_checksum(ic, sector, mem + pos, checksums_ptr);
1781                 checksums_ptr += ic->tag_size;
1782                 sectors_to_process -= ic->sectors_per_block;
1783                 pos += ic->sectors_per_block << SECTOR_SHIFT;
1784                 sector += ic->sectors_per_block;
1785             } while (pos < bv.bv_len && sectors_to_process && checksums != checksums_onstack);
1786             kunmap_local(mem);
1787 
1788             r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
1789                         checksums_ptr - checksums, dio->op == REQ_OP_READ ? TAG_CMP : TAG_WRITE);
1790             if (unlikely(r)) {
1791                 if (r > 0) {
1792                     sector_t s;
1793 
1794                     s = sector - ((r + ic->tag_size - 1) / ic->tag_size);
1795                     DMERR_LIMIT("%pg: Checksum failed at sector 0x%llx",
1796                             bio->bi_bdev, s);
1797                     r = -EILSEQ;
1798                     atomic64_inc(&ic->number_of_mismatches);
1799                     dm_audit_log_bio(DM_MSG_PREFIX, "integrity-checksum",
1800                              bio, s, 0);
1801                 }
1802                 if (likely(checksums != checksums_onstack))
1803                     kfree(checksums);
1804                 goto error;
1805             }
1806 
1807             if (!sectors_to_process)
1808                 break;
1809 
1810             if (unlikely(pos < bv.bv_len)) {
1811                 bv.bv_offset += pos;
1812                 bv.bv_len -= pos;
1813                 goto again;
1814             }
1815         }
1816 
1817         if (likely(checksums != checksums_onstack))
1818             kfree(checksums);
1819     } else {
1820         struct bio_integrity_payload *bip = dio->bio_details.bi_integrity;
1821 
1822         if (bip) {
1823             struct bio_vec biv;
1824             struct bvec_iter iter;
1825             unsigned data_to_process = dio->range.n_sectors;
1826             sector_to_block(ic, data_to_process);
1827             data_to_process *= ic->tag_size;
1828 
1829             bip_for_each_vec(biv, bip, iter) {
1830                 unsigned char *tag;
1831                 unsigned this_len;
1832 
1833                 BUG_ON(PageHighMem(biv.bv_page));
1834                 tag = bvec_virt(&biv);
1835                 this_len = min(biv.bv_len, data_to_process);
1836                 r = dm_integrity_rw_tag(ic, tag, &dio->metadata_block, &dio->metadata_offset,
1837                             this_len, dio->op == REQ_OP_READ ? TAG_READ : TAG_WRITE);
1838                 if (unlikely(r))
1839                     goto error;
1840                 data_to_process -= this_len;
1841                 if (!data_to_process)
1842                     break;
1843             }
1844         }
1845     }
1846 skip_io:
1847     dec_in_flight(dio);
1848     return;
1849 error:
1850     dio->bi_status = errno_to_blk_status(r);
1851     dec_in_flight(dio);
1852 }
1853 
1854 static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
1855 {
1856     struct dm_integrity_c *ic = ti->private;
1857     struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io));
1858     struct bio_integrity_payload *bip;
1859 
1860     sector_t area, offset;
1861 
1862     dio->ic = ic;
1863     dio->bi_status = 0;
1864     dio->op = bio_op(bio);
1865 
1866     if (unlikely(dio->op == REQ_OP_DISCARD)) {
1867         if (ti->max_io_len) {
1868             sector_t sec = dm_target_offset(ti, bio->bi_iter.bi_sector);
1869             unsigned log2_max_io_len = __fls(ti->max_io_len);
1870             sector_t start_boundary = sec >> log2_max_io_len;
1871             sector_t end_boundary = (sec + bio_sectors(bio) - 1) >> log2_max_io_len;
1872             if (start_boundary < end_boundary) {
1873                 sector_t len = ti->max_io_len - (sec & (ti->max_io_len - 1));
1874                 dm_accept_partial_bio(bio, len);
1875             }
1876         }
1877     }
1878 
1879     if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
1880         submit_flush_bio(ic, dio);
1881         return DM_MAPIO_SUBMITTED;
1882     }
1883 
1884     dio->range.logical_sector = dm_target_offset(ti, bio->bi_iter.bi_sector);
1885     dio->fua = dio->op == REQ_OP_WRITE && bio->bi_opf & REQ_FUA;
1886     if (unlikely(dio->fua)) {
1887         /*
1888          * Don't pass down the FUA flag because we have to flush
1889          * disk cache anyway.
1890          */
1891         bio->bi_opf &= ~REQ_FUA;
1892     }
1893     if (unlikely(dio->range.logical_sector + bio_sectors(bio) > ic->provided_data_sectors)) {
1894         DMERR("Too big sector number: 0x%llx + 0x%x > 0x%llx",
1895               dio->range.logical_sector, bio_sectors(bio),
1896               ic->provided_data_sectors);
1897         return DM_MAPIO_KILL;
1898     }
1899     if (unlikely((dio->range.logical_sector | bio_sectors(bio)) & (unsigned)(ic->sectors_per_block - 1))) {
1900         DMERR("Bio not aligned on %u sectors: 0x%llx, 0x%x",
1901               ic->sectors_per_block,
1902               dio->range.logical_sector, bio_sectors(bio));
1903         return DM_MAPIO_KILL;
1904     }
1905 
1906     if (ic->sectors_per_block > 1 && likely(dio->op != REQ_OP_DISCARD)) {
1907         struct bvec_iter iter;
1908         struct bio_vec bv;
1909         bio_for_each_segment(bv, bio, iter) {
1910             if (unlikely(bv.bv_len & ((ic->sectors_per_block << SECTOR_SHIFT) - 1))) {
1911                 DMERR("Bio vector (%u,%u) is not aligned on %u-sector boundary",
1912                     bv.bv_offset, bv.bv_len, ic->sectors_per_block);
1913                 return DM_MAPIO_KILL;
1914             }
1915         }
1916     }
1917 
1918     bip = bio_integrity(bio);
1919     if (!ic->internal_hash) {
1920         if (bip) {
1921             unsigned wanted_tag_size = bio_sectors(bio) >> ic->sb->log2_sectors_per_block;
1922             if (ic->log2_tag_size >= 0)
1923                 wanted_tag_size <<= ic->log2_tag_size;
1924             else
1925                 wanted_tag_size *= ic->tag_size;
1926             if (unlikely(wanted_tag_size != bip->bip_iter.bi_size)) {
1927                 DMERR("Invalid integrity data size %u, expected %u",
1928                       bip->bip_iter.bi_size, wanted_tag_size);
1929                 return DM_MAPIO_KILL;
1930             }
1931         }
1932     } else {
1933         if (unlikely(bip != NULL)) {
1934             DMERR("Unexpected integrity data when using internal hash");
1935             return DM_MAPIO_KILL;
1936         }
1937     }
1938 
1939     if (unlikely(ic->mode == 'R') && unlikely(dio->op != REQ_OP_READ))
1940         return DM_MAPIO_KILL;
1941 
1942     get_area_and_offset(ic, dio->range.logical_sector, &area, &offset);
1943     dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset, &dio->metadata_offset);
1944     bio->bi_iter.bi_sector = get_data_sector(ic, area, offset);
1945 
1946     dm_integrity_map_continue(dio, true);
1947     return DM_MAPIO_SUBMITTED;
1948 }
1949 
1950 static bool __journal_read_write(struct dm_integrity_io *dio, struct bio *bio,
1951                  unsigned journal_section, unsigned journal_entry)
1952 {
1953     struct dm_integrity_c *ic = dio->ic;
1954     sector_t logical_sector;
1955     unsigned n_sectors;
1956 
1957     logical_sector = dio->range.logical_sector;
1958     n_sectors = dio->range.n_sectors;
1959     do {
1960         struct bio_vec bv = bio_iovec(bio);
1961         char *mem;
1962 
1963         if (unlikely(bv.bv_len >> SECTOR_SHIFT > n_sectors))
1964             bv.bv_len = n_sectors << SECTOR_SHIFT;
1965         n_sectors -= bv.bv_len >> SECTOR_SHIFT;
1966         bio_advance_iter(bio, &bio->bi_iter, bv.bv_len);
1967 retry_kmap:
1968         mem = kmap_local_page(bv.bv_page);
1969         if (likely(dio->op == REQ_OP_WRITE))
1970             flush_dcache_page(bv.bv_page);
1971 
1972         do {
1973             struct journal_entry *je = access_journal_entry(ic, journal_section, journal_entry);
1974 
1975             if (unlikely(dio->op == REQ_OP_READ)) {
1976                 struct journal_sector *js;
1977                 char *mem_ptr;
1978                 unsigned s;
1979 
1980                 if (unlikely(journal_entry_is_inprogress(je))) {
1981                     flush_dcache_page(bv.bv_page);
1982                     kunmap_local(mem);
1983 
1984                     __io_wait_event(ic->copy_to_journal_wait, !journal_entry_is_inprogress(je));
1985                     goto retry_kmap;
1986                 }
1987                 smp_rmb();
1988                 BUG_ON(journal_entry_get_sector(je) != logical_sector);
1989                 js = access_journal_data(ic, journal_section, journal_entry);
1990                 mem_ptr = mem + bv.bv_offset;
1991                 s = 0;
1992                 do {
1993                     memcpy(mem_ptr, js, JOURNAL_SECTOR_DATA);
1994                     *(commit_id_t *)(mem_ptr + JOURNAL_SECTOR_DATA) = je->last_bytes[s];
1995                     js++;
1996                     mem_ptr += 1 << SECTOR_SHIFT;
1997                 } while (++s < ic->sectors_per_block);
1998 #ifdef INTERNAL_VERIFY
1999                 if (ic->internal_hash) {
2000                     char checksums_onstack[max((size_t)HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)];
2001 
2002                     integrity_sector_checksum(ic, logical_sector, mem + bv.bv_offset, checksums_onstack);
2003                     if (unlikely(memcmp(checksums_onstack, journal_entry_tag(ic, je), ic->tag_size))) {
2004                         DMERR_LIMIT("Checksum failed when reading from journal, at sector 0x%llx",
2005                                 logical_sector);
2006                         dm_audit_log_bio(DM_MSG_PREFIX, "journal-checksum",
2007                                  bio, logical_sector, 0);
2008                     }
2009                 }
2010 #endif
2011             }
2012 
2013             if (!ic->internal_hash) {
2014                 struct bio_integrity_payload *bip = bio_integrity(bio);
2015                 unsigned tag_todo = ic->tag_size;
2016                 char *tag_ptr = journal_entry_tag(ic, je);
2017 
2018                 if (bip) do {
2019                     struct bio_vec biv = bvec_iter_bvec(bip->bip_vec, bip->bip_iter);
2020                     unsigned tag_now = min(biv.bv_len, tag_todo);
2021                     char *tag_addr;
2022                     BUG_ON(PageHighMem(biv.bv_page));
2023                     tag_addr = bvec_virt(&biv);
2024                     if (likely(dio->op == REQ_OP_WRITE))
2025                         memcpy(tag_ptr, tag_addr, tag_now);
2026                     else
2027                         memcpy(tag_addr, tag_ptr, tag_now);
2028                     bvec_iter_advance(bip->bip_vec, &bip->bip_iter, tag_now);
2029                     tag_ptr += tag_now;
2030                     tag_todo -= tag_now;
2031                 } while (unlikely(tag_todo)); else {
2032                     if (likely(dio->op == REQ_OP_WRITE))
2033                         memset(tag_ptr, 0, tag_todo);
2034                 }
2035             }
2036 
2037             if (likely(dio->op == REQ_OP_WRITE)) {
2038                 struct journal_sector *js;
2039                 unsigned s;
2040 
2041                 js = access_journal_data(ic, journal_section, journal_entry);
2042                 memcpy(js, mem + bv.bv_offset, ic->sectors_per_block << SECTOR_SHIFT);
2043 
2044                 s = 0;
2045                 do {
2046                     je->last_bytes[s] = js[s].commit_id;
2047                 } while (++s < ic->sectors_per_block);
2048 
2049                 if (ic->internal_hash) {
2050                     unsigned digest_size = crypto_shash_digestsize(ic->internal_hash);
2051                     if (unlikely(digest_size > ic->tag_size)) {
2052                         char checksums_onstack[HASH_MAX_DIGESTSIZE];
2053                         integrity_sector_checksum(ic, logical_sector, (char *)js, checksums_onstack);
2054                         memcpy(journal_entry_tag(ic, je), checksums_onstack, ic->tag_size);
2055                     } else
2056                         integrity_sector_checksum(ic, logical_sector, (char *)js, journal_entry_tag(ic, je));
2057                 }
2058 
2059                 journal_entry_set_sector(je, logical_sector);
2060             }
2061             logical_sector += ic->sectors_per_block;
2062 
2063             journal_entry++;
2064             if (unlikely(journal_entry == ic->journal_section_entries)) {
2065                 journal_entry = 0;
2066                 journal_section++;
2067                 wraparound_section(ic, &journal_section);
2068             }
2069 
2070             bv.bv_offset += ic->sectors_per_block << SECTOR_SHIFT;
2071         } while (bv.bv_len -= ic->sectors_per_block << SECTOR_SHIFT);
2072 
2073         if (unlikely(dio->op == REQ_OP_READ))
2074             flush_dcache_page(bv.bv_page);
2075         kunmap_local(mem);
2076     } while (n_sectors);
2077 
2078     if (likely(dio->op == REQ_OP_WRITE)) {
2079         smp_mb();
2080         if (unlikely(waitqueue_active(&ic->copy_to_journal_wait)))
2081             wake_up(&ic->copy_to_journal_wait);
2082         if (READ_ONCE(ic->free_sectors) <= ic->free_sectors_threshold) {
2083             queue_work(ic->commit_wq, &ic->commit_work);
2084         } else {
2085             schedule_autocommit(ic);
2086         }
2087     } else {
2088         remove_range(ic, &dio->range);
2089     }
2090 
2091     if (unlikely(bio->bi_iter.bi_size)) {
2092         sector_t area, offset;
2093 
2094         dio->range.logical_sector = logical_sector;
2095         get_area_and_offset(ic, dio->range.logical_sector, &area, &offset);
2096         dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset, &dio->metadata_offset);
2097         return true;
2098     }
2099 
2100     return false;
2101 }
2102 
2103 static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map)
2104 {
2105     struct dm_integrity_c *ic = dio->ic;
2106     struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
2107     unsigned journal_section, journal_entry;
2108     unsigned journal_read_pos;
2109     struct completion read_comp;
2110     bool discard_retried = false;
2111     bool need_sync_io = ic->internal_hash && dio->op == REQ_OP_READ;
2112     if (unlikely(dio->op == REQ_OP_DISCARD) && ic->mode != 'D')
2113         need_sync_io = true;
2114 
2115     if (need_sync_io && from_map) {
2116         INIT_WORK(&dio->work, integrity_bio_wait);
2117         queue_work(ic->offload_wq, &dio->work);
2118         return;
2119     }
2120 
2121 lock_retry:
2122     spin_lock_irq(&ic->endio_wait.lock);
2123 retry:
2124     if (unlikely(dm_integrity_failed(ic))) {
2125         spin_unlock_irq(&ic->endio_wait.lock);
2126         do_endio(ic, bio);
2127         return;
2128     }
2129     dio->range.n_sectors = bio_sectors(bio);
2130     journal_read_pos = NOT_FOUND;
2131     if (ic->mode == 'J' && likely(dio->op != REQ_OP_DISCARD)) {
2132         if (dio->op == REQ_OP_WRITE) {
2133             unsigned next_entry, i, pos;
2134             unsigned ws, we, range_sectors;
2135 
2136             dio->range.n_sectors = min(dio->range.n_sectors,
2137                            (sector_t)ic->free_sectors << ic->sb->log2_sectors_per_block);
2138             if (unlikely(!dio->range.n_sectors)) {
2139                 if (from_map)
2140                     goto offload_to_thread;
2141                 sleep_on_endio_wait(ic);
2142                 goto retry;
2143             }
2144             range_sectors = dio->range.n_sectors >> ic->sb->log2_sectors_per_block;
2145             ic->free_sectors -= range_sectors;
2146             journal_section = ic->free_section;
2147             journal_entry = ic->free_section_entry;
2148 
2149             next_entry = ic->free_section_entry + range_sectors;
2150             ic->free_section_entry = next_entry % ic->journal_section_entries;
2151             ic->free_section += next_entry / ic->journal_section_entries;
2152             ic->n_uncommitted_sections += next_entry / ic->journal_section_entries;
2153             wraparound_section(ic, &ic->free_section);
2154 
2155             pos = journal_section * ic->journal_section_entries + journal_entry;
2156             ws = journal_section;
2157             we = journal_entry;
2158             i = 0;
2159             do {
2160                 struct journal_entry *je;
2161 
2162                 add_journal_node(ic, &ic->journal_tree[pos], dio->range.logical_sector + i);
2163                 pos++;
2164                 if (unlikely(pos >= ic->journal_entries))
2165                     pos = 0;
2166 
2167                 je = access_journal_entry(ic, ws, we);
2168                 BUG_ON(!journal_entry_is_unused(je));
2169                 journal_entry_set_inprogress(je);
2170                 we++;
2171                 if (unlikely(we == ic->journal_section_entries)) {
2172                     we = 0;
2173                     ws++;
2174                     wraparound_section(ic, &ws);
2175                 }
2176             } while ((i += ic->sectors_per_block) < dio->range.n_sectors);
2177 
2178             spin_unlock_irq(&ic->endio_wait.lock);
2179             goto journal_read_write;
2180         } else {
2181             sector_t next_sector;
2182             journal_read_pos = find_journal_node(ic, dio->range.logical_sector, &next_sector);
2183             if (likely(journal_read_pos == NOT_FOUND)) {
2184                 if (unlikely(dio->range.n_sectors > next_sector - dio->range.logical_sector))
2185                     dio->range.n_sectors = next_sector - dio->range.logical_sector;
2186             } else {
2187                 unsigned i;
2188                 unsigned jp = journal_read_pos + 1;
2189                 for (i = ic->sectors_per_block; i < dio->range.n_sectors; i += ic->sectors_per_block, jp++) {
2190                     if (!test_journal_node(ic, jp, dio->range.logical_sector + i))
2191                         break;
2192                 }
2193                 dio->range.n_sectors = i;
2194             }
2195         }
2196     }
2197     if (unlikely(!add_new_range(ic, &dio->range, true))) {
2198         /*
2199          * We must not sleep in the request routine because it could
2200          * stall bios on current->bio_list.
2201          * So, we offload the bio to a workqueue if we have to sleep.
2202          */
2203         if (from_map) {
2204 offload_to_thread:
2205             spin_unlock_irq(&ic->endio_wait.lock);
2206             INIT_WORK(&dio->work, integrity_bio_wait);
2207             queue_work(ic->wait_wq, &dio->work);
2208             return;
2209         }
2210         if (journal_read_pos != NOT_FOUND)
2211             dio->range.n_sectors = ic->sectors_per_block;
2212         wait_and_add_new_range(ic, &dio->range);
2213         /*
2214          * wait_and_add_new_range drops the spinlock, so the journal
2215          * may have been changed arbitrarily. We need to recheck.
2216          * To simplify the code, we restrict I/O size to just one block.
2217          */
2218         if (journal_read_pos != NOT_FOUND) {
2219             sector_t next_sector;
2220             unsigned new_pos = find_journal_node(ic, dio->range.logical_sector, &next_sector);
2221             if (unlikely(new_pos != journal_read_pos)) {
2222                 remove_range_unlocked(ic, &dio->range);
2223                 goto retry;
2224             }
2225         }
2226     }
2227     if (ic->mode == 'J' && likely(dio->op == REQ_OP_DISCARD) && !discard_retried) {
2228         sector_t next_sector;
2229         unsigned new_pos = find_journal_node(ic, dio->range.logical_sector, &next_sector);
2230         if (unlikely(new_pos != NOT_FOUND) ||
2231             unlikely(next_sector < dio->range.logical_sector - dio->range.n_sectors)) {
2232             remove_range_unlocked(ic, &dio->range);
2233             spin_unlock_irq(&ic->endio_wait.lock);
2234             queue_work(ic->commit_wq, &ic->commit_work);
2235             flush_workqueue(ic->commit_wq);
2236             queue_work(ic->writer_wq, &ic->writer_work);
2237             flush_workqueue(ic->writer_wq);
2238             discard_retried = true;
2239             goto lock_retry;
2240         }
2241     }
2242     spin_unlock_irq(&ic->endio_wait.lock);
2243 
2244     if (unlikely(journal_read_pos != NOT_FOUND)) {
2245         journal_section = journal_read_pos / ic->journal_section_entries;
2246         journal_entry = journal_read_pos % ic->journal_section_entries;
2247         goto journal_read_write;
2248     }
2249 
2250     if (ic->mode == 'B' && (dio->op == REQ_OP_WRITE || unlikely(dio->op == REQ_OP_DISCARD))) {
2251         if (!block_bitmap_op(ic, ic->may_write_bitmap, dio->range.logical_sector,
2252                      dio->range.n_sectors, BITMAP_OP_TEST_ALL_SET)) {
2253             struct bitmap_block_status *bbs;
2254 
2255             bbs = sector_to_bitmap_block(ic, dio->range.logical_sector);
2256             spin_lock(&bbs->bio_queue_lock);
2257             bio_list_add(&bbs->bio_queue, bio);
2258             spin_unlock(&bbs->bio_queue_lock);
2259             queue_work(ic->writer_wq, &bbs->work);
2260             return;
2261         }
2262     }
2263 
2264     dio->in_flight = (atomic_t)ATOMIC_INIT(2);
2265 
2266     if (need_sync_io) {
2267         init_completion(&read_comp);
2268         dio->completion = &read_comp;
2269     } else
2270         dio->completion = NULL;
2271 
2272     dm_bio_record(&dio->bio_details, bio);
2273     bio_set_dev(bio, ic->dev->bdev);
2274     bio->bi_integrity = NULL;
2275     bio->bi_opf &= ~REQ_INTEGRITY;
2276     bio->bi_end_io = integrity_end_io;
2277     bio->bi_iter.bi_size = dio->range.n_sectors << SECTOR_SHIFT;
2278 
2279     if (unlikely(dio->op == REQ_OP_DISCARD) && likely(ic->mode != 'D')) {
2280         integrity_metadata(&dio->work);
2281         dm_integrity_flush_buffers(ic, false);
2282 
2283         dio->in_flight = (atomic_t)ATOMIC_INIT(1);
2284         dio->completion = NULL;
2285 
2286         submit_bio_noacct(bio);
2287 
2288         return;
2289     }
2290 
2291     submit_bio_noacct(bio);
2292 
2293     if (need_sync_io) {
2294         wait_for_completion_io(&read_comp);
2295         if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) &&
2296             dio->range.logical_sector + dio->range.n_sectors > le64_to_cpu(ic->sb->recalc_sector))
2297             goto skip_check;
2298         if (ic->mode == 'B') {
2299             if (!block_bitmap_op(ic, ic->recalc_bitmap, dio->range.logical_sector,
2300                          dio->range.n_sectors, BITMAP_OP_TEST_ALL_CLEAR))
2301                 goto skip_check;
2302         }
2303 
2304         if (likely(!bio->bi_status))
2305             integrity_metadata(&dio->work);
2306         else
2307 skip_check:
2308             dec_in_flight(dio);
2309 
2310     } else {
2311         INIT_WORK(&dio->work, integrity_metadata);
2312         queue_work(ic->metadata_wq, &dio->work);
2313     }
2314 
2315     return;
2316 
2317 journal_read_write:
2318     if (unlikely(__journal_read_write(dio, bio, journal_section, journal_entry)))
2319         goto lock_retry;
2320 
2321     do_endio_flush(ic, dio);
2322 }
2323 
2324 
2325 static void integrity_bio_wait(struct work_struct *w)
2326 {
2327     struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work);
2328 
2329     dm_integrity_map_continue(dio, false);
2330 }
2331 
2332 static void pad_uncommitted(struct dm_integrity_c *ic)
2333 {
2334     if (ic->free_section_entry) {
2335         ic->free_sectors -= ic->journal_section_entries - ic->free_section_entry;
2336         ic->free_section_entry = 0;
2337         ic->free_section++;
2338         wraparound_section(ic, &ic->free_section);
2339         ic->n_uncommitted_sections++;
2340     }
2341     if (WARN_ON(ic->journal_sections * ic->journal_section_entries !=
2342             (ic->n_uncommitted_sections + ic->n_committed_sections) *
2343             ic->journal_section_entries + ic->free_sectors)) {
2344         DMCRIT("journal_sections %u, journal_section_entries %u, "
2345                "n_uncommitted_sections %u, n_committed_sections %u, "
2346                "journal_section_entries %u, free_sectors %u",
2347                ic->journal_sections, ic->journal_section_entries,
2348                ic->n_uncommitted_sections, ic->n_committed_sections,
2349                ic->journal_section_entries, ic->free_sectors);
2350     }
2351 }
2352 
2353 static void integrity_commit(struct work_struct *w)
2354 {
2355     struct dm_integrity_c *ic = container_of(w, struct dm_integrity_c, commit_work);
2356     unsigned commit_start, commit_sections;
2357     unsigned i, j, n;
2358     struct bio *flushes;
2359 
2360     del_timer(&ic->autocommit_timer);
2361 
2362     spin_lock_irq(&ic->endio_wait.lock);
2363     flushes = bio_list_get(&ic->flush_bio_list);
2364     if (unlikely(ic->mode != 'J')) {
2365         spin_unlock_irq(&ic->endio_wait.lock);
2366         dm_integrity_flush_buffers(ic, true);
2367         goto release_flush_bios;
2368     }
2369 
2370     pad_uncommitted(ic);
2371     commit_start = ic->uncommitted_section;
2372     commit_sections = ic->n_uncommitted_sections;
2373     spin_unlock_irq(&ic->endio_wait.lock);
2374 
2375     if (!commit_sections)
2376         goto release_flush_bios;
2377 
2378     i = commit_start;
2379     for (n = 0; n < commit_sections; n++) {
2380         for (j = 0; j < ic->journal_section_entries; j++) {
2381             struct journal_entry *je;
2382             je = access_journal_entry(ic, i, j);
2383             io_wait_event(ic->copy_to_journal_wait, !journal_entry_is_inprogress(je));
2384         }
2385         for (j = 0; j < ic->journal_section_sectors; j++) {
2386             struct journal_sector *js;
2387             js = access_journal(ic, i, j);
2388             js->commit_id = dm_integrity_commit_id(ic, i, j, ic->commit_seq);
2389         }
2390         i++;
2391         if (unlikely(i >= ic->journal_sections))
2392             ic->commit_seq = next_commit_seq(ic->commit_seq);
2393         wraparound_section(ic, &i);
2394     }
2395     smp_rmb();
2396 
2397     write_journal(ic, commit_start, commit_sections);
2398 
2399     spin_lock_irq(&ic->endio_wait.lock);
2400     ic->uncommitted_section += commit_sections;
2401     wraparound_section(ic, &ic->uncommitted_section);
2402     ic->n_uncommitted_sections -= commit_sections;
2403     ic->n_committed_sections += commit_sections;
2404     spin_unlock_irq(&ic->endio_wait.lock);
2405 
2406     if (READ_ONCE(ic->free_sectors) <= ic->free_sectors_threshold)
2407         queue_work(ic->writer_wq, &ic->writer_work);
2408 
2409 release_flush_bios:
2410     while (flushes) {
2411         struct bio *next = flushes->bi_next;
2412         flushes->bi_next = NULL;
2413         do_endio(ic, flushes);
2414         flushes = next;
2415     }
2416 }
2417 
2418 static void complete_copy_from_journal(unsigned long error, void *context)
2419 {
2420     struct journal_io *io = context;
2421     struct journal_completion *comp = io->comp;
2422     struct dm_integrity_c *ic = comp->ic;
2423     remove_range(ic, &io->range);
2424     mempool_free(io, &ic->journal_io_mempool);
2425     if (unlikely(error != 0))
2426         dm_integrity_io_error(ic, "copying from journal", -EIO);
2427     complete_journal_op(comp);
2428 }
2429 
2430 static void restore_last_bytes(struct dm_integrity_c *ic, struct journal_sector *js,
2431                    struct journal_entry *je)
2432 {
2433     unsigned s = 0;
2434     do {
2435         js->commit_id = je->last_bytes[s];
2436         js++;
2437     } while (++s < ic->sectors_per_block);
2438 }
2439 
2440 static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start,
2441                  unsigned write_sections, bool from_replay)
2442 {
2443     unsigned i, j, n;
2444     struct journal_completion comp;
2445     struct blk_plug plug;
2446 
2447     blk_start_plug(&plug);
2448 
2449     comp.ic = ic;
2450     comp.in_flight = (atomic_t)ATOMIC_INIT(1);
2451     init_completion(&comp.comp);
2452 
2453     i = write_start;
2454     for (n = 0; n < write_sections; n++, i++, wraparound_section(ic, &i)) {
2455 #ifndef INTERNAL_VERIFY
2456         if (unlikely(from_replay))
2457 #endif
2458             rw_section_mac(ic, i, false);
2459         for (j = 0; j < ic->journal_section_entries; j++) {
2460             struct journal_entry *je = access_journal_entry(ic, i, j);
2461             sector_t sec, area, offset;
2462             unsigned k, l, next_loop;
2463             sector_t metadata_block;
2464             unsigned metadata_offset;
2465             struct journal_io *io;
2466 
2467             if (journal_entry_is_unused(je))
2468                 continue;
2469             BUG_ON(unlikely(journal_entry_is_inprogress(je)) && !from_replay);
2470             sec = journal_entry_get_sector(je);
2471             if (unlikely(from_replay)) {
2472                 if (unlikely(sec & (unsigned)(ic->sectors_per_block - 1))) {
2473                     dm_integrity_io_error(ic, "invalid sector in journal", -EIO);
2474                     sec &= ~(sector_t)(ic->sectors_per_block - 1);
2475                 }
2476                 if (unlikely(sec >= ic->provided_data_sectors)) {
2477                     journal_entry_set_unused(je);
2478                     continue;
2479                 }
2480             }
2481             get_area_and_offset(ic, sec, &area, &offset);
2482             restore_last_bytes(ic, access_journal_data(ic, i, j), je);
2483             for (k = j + 1; k < ic->journal_section_entries; k++) {
2484                 struct journal_entry *je2 = access_journal_entry(ic, i, k);
2485                 sector_t sec2, area2, offset2;
2486                 if (journal_entry_is_unused(je2))
2487                     break;
2488                 BUG_ON(unlikely(journal_entry_is_inprogress(je2)) && !from_replay);
2489                 sec2 = journal_entry_get_sector(je2);
2490                 if (unlikely(sec2 >= ic->provided_data_sectors))
2491                     break;
2492                 get_area_and_offset(ic, sec2, &area2, &offset2);
2493                 if (area2 != area || offset2 != offset + ((k - j) << ic->sb->log2_sectors_per_block))
2494                     break;
2495                 restore_last_bytes(ic, access_journal_data(ic, i, k), je2);
2496             }
2497             next_loop = k - 1;
2498 
2499             io = mempool_alloc(&ic->journal_io_mempool, GFP_NOIO);
2500             io->comp = &comp;
2501             io->range.logical_sector = sec;
2502             io->range.n_sectors = (k - j) << ic->sb->log2_sectors_per_block;
2503 
2504             spin_lock_irq(&ic->endio_wait.lock);
2505             add_new_range_and_wait(ic, &io->range);
2506 
2507             if (likely(!from_replay)) {
2508                 struct journal_node *section_node = &ic->journal_tree[i * ic->journal_section_entries];
2509 
2510                 /* don't write if there is newer committed sector */
2511                 while (j < k && find_newer_committed_node(ic, &section_node[j])) {
2512                     struct journal_entry *je2 = access_journal_entry(ic, i, j);
2513 
2514                     journal_entry_set_unused(je2);
2515                     remove_journal_node(ic, &section_node[j]);
2516                     j++;
2517                     sec += ic->sectors_per_block;
2518                     offset += ic->sectors_per_block;
2519                 }
2520                 while (j < k && find_newer_committed_node(ic, &section_node[k - 1])) {
2521                     struct journal_entry *je2 = access_journal_entry(ic, i, k - 1);
2522 
2523                     journal_entry_set_unused(je2);
2524                     remove_journal_node(ic, &section_node[k - 1]);
2525                     k--;
2526                 }
2527                 if (j == k) {
2528                     remove_range_unlocked(ic, &io->range);
2529                     spin_unlock_irq(&ic->endio_wait.lock);
2530                     mempool_free(io, &ic->journal_io_mempool);
2531                     goto skip_io;
2532                 }
2533                 for (l = j; l < k; l++) {
2534                     remove_journal_node(ic, &section_node[l]);
2535                 }
2536             }
2537             spin_unlock_irq(&ic->endio_wait.lock);
2538 
2539             metadata_block = get_metadata_sector_and_offset(ic, area, offset, &metadata_offset);
2540             for (l = j; l < k; l++) {
2541                 int r;
2542                 struct journal_entry *je2 = access_journal_entry(ic, i, l);
2543 
2544                 if (
2545 #ifndef INTERNAL_VERIFY
2546                     unlikely(from_replay) &&
2547 #endif
2548                     ic->internal_hash) {
2549                     char test_tag[max_t(size_t, HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)];
2550 
2551                     integrity_sector_checksum(ic, sec + ((l - j) << ic->sb->log2_sectors_per_block),
2552                                   (char *)access_journal_data(ic, i, l), test_tag);
2553                     if (unlikely(memcmp(test_tag, journal_entry_tag(ic, je2), ic->tag_size))) {
2554                         dm_integrity_io_error(ic, "tag mismatch when replaying journal", -EILSEQ);
2555                         dm_audit_log_target(DM_MSG_PREFIX, "integrity-replay-journal", ic->ti, 0);
2556                     }
2557                 }
2558 
2559                 journal_entry_set_unused(je2);
2560                 r = dm_integrity_rw_tag(ic, journal_entry_tag(ic, je2), &metadata_block, &metadata_offset,
2561                             ic->tag_size, TAG_WRITE);
2562                 if (unlikely(r)) {
2563                     dm_integrity_io_error(ic, "reading tags", r);
2564                 }
2565             }
2566 
2567             atomic_inc(&comp.in_flight);
2568             copy_from_journal(ic, i, j << ic->sb->log2_sectors_per_block,
2569                       (k - j) << ic->sb->log2_sectors_per_block,
2570                       get_data_sector(ic, area, offset),
2571                       complete_copy_from_journal, io);
2572 skip_io:
2573             j = next_loop;
2574         }
2575     }
2576 
2577     dm_bufio_write_dirty_buffers_async(ic->bufio);
2578 
2579     blk_finish_plug(&plug);
2580 
2581     complete_journal_op(&comp);
2582     wait_for_completion_io(&comp.comp);
2583 
2584     dm_integrity_flush_buffers(ic, true);
2585 }
2586 
2587 static void integrity_writer(struct work_struct *w)
2588 {
2589     struct dm_integrity_c *ic = container_of(w, struct dm_integrity_c, writer_work);
2590     unsigned write_start, write_sections;
2591 
2592     unsigned prev_free_sectors;
2593 
2594     /* the following test is not needed, but it tests the replay code */
2595     if (unlikely(dm_post_suspending(ic->ti)) && !ic->meta_dev)
2596         return;
2597 
2598     spin_lock_irq(&ic->endio_wait.lock);
2599     write_start = ic->committed_section;
2600     write_sections = ic->n_committed_sections;
2601     spin_unlock_irq(&ic->endio_wait.lock);
2602 
2603     if (!write_sections)
2604         return;
2605 
2606     do_journal_write(ic, write_start, write_sections, false);
2607 
2608     spin_lock_irq(&ic->endio_wait.lock);
2609 
2610     ic->committed_section += write_sections;
2611     wraparound_section(ic, &ic->committed_section);
2612     ic->n_committed_sections -= write_sections;
2613 
2614     prev_free_sectors = ic->free_sectors;
2615     ic->free_sectors += write_sections * ic->journal_section_entries;
2616     if (unlikely(!prev_free_sectors))
2617         wake_up_locked(&ic->endio_wait);
2618 
2619     spin_unlock_irq(&ic->endio_wait.lock);
2620 }
2621 
2622 static void recalc_write_super(struct dm_integrity_c *ic)
2623 {
2624     int r;
2625 
2626     dm_integrity_flush_buffers(ic, false);
2627     if (dm_integrity_failed(ic))
2628         return;
2629 
2630     r = sync_rw_sb(ic, REQ_OP_WRITE);
2631     if (unlikely(r))
2632         dm_integrity_io_error(ic, "writing superblock", r);
2633 }
2634 
2635 static void integrity_recalc(struct work_struct *w)
2636 {
2637     struct dm_integrity_c *ic = container_of(w, struct dm_integrity_c, recalc_work);
2638     struct dm_integrity_range range;
2639     struct dm_io_request io_req;
2640     struct dm_io_region io_loc;
2641     sector_t area, offset;
2642     sector_t metadata_block;
2643     unsigned metadata_offset;
2644     sector_t logical_sector, n_sectors;
2645     __u8 *t;
2646     unsigned i;
2647     int r;
2648     unsigned super_counter = 0;
2649 
2650     DEBUG_print("start recalculation... (position %llx)\n", le64_to_cpu(ic->sb->recalc_sector));
2651 
2652     spin_lock_irq(&ic->endio_wait.lock);
2653 
2654 next_chunk:
2655 
2656     if (unlikely(dm_post_suspending(ic->ti)))
2657         goto unlock_ret;
2658 
2659     range.logical_sector = le64_to_cpu(ic->sb->recalc_sector);
2660     if (unlikely(range.logical_sector >= ic->provided_data_sectors)) {
2661         if (ic->mode == 'B') {
2662             block_bitmap_op(ic, ic->recalc_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_CLEAR);
2663             DEBUG_print("queue_delayed_work: bitmap_flush_work\n");
2664             queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, 0);
2665         }
2666         goto unlock_ret;
2667     }
2668 
2669     get_area_and_offset(ic, range.logical_sector, &area, &offset);
2670     range.n_sectors = min((sector_t)RECALC_SECTORS, ic->provided_data_sectors - range.logical_sector);
2671     if (!ic->meta_dev)
2672         range.n_sectors = min(range.n_sectors, ((sector_t)1U << ic->sb->log2_interleave_sectors) - (unsigned)offset);
2673 
2674     add_new_range_and_wait(ic, &range);
2675     spin_unlock_irq(&ic->endio_wait.lock);
2676     logical_sector = range.logical_sector;
2677     n_sectors = range.n_sectors;
2678 
2679     if (ic->mode == 'B') {
2680         if (block_bitmap_op(ic, ic->recalc_bitmap, logical_sector, n_sectors, BITMAP_OP_TEST_ALL_CLEAR)) {
2681             goto advance_and_next;
2682         }
2683         while (block_bitmap_op(ic, ic->recalc_bitmap, logical_sector,
2684                        ic->sectors_per_block, BITMAP_OP_TEST_ALL_CLEAR)) {
2685             logical_sector += ic->sectors_per_block;
2686             n_sectors -= ic->sectors_per_block;
2687             cond_resched();
2688         }
2689         while (block_bitmap_op(ic, ic->recalc_bitmap, logical_sector + n_sectors - ic->sectors_per_block,
2690                        ic->sectors_per_block, BITMAP_OP_TEST_ALL_CLEAR)) {
2691             n_sectors -= ic->sectors_per_block;
2692             cond_resched();
2693         }
2694         get_area_and_offset(ic, logical_sector, &area, &offset);
2695     }
2696 
2697     DEBUG_print("recalculating: %llx, %llx\n", logical_sector, n_sectors);
2698 
2699     if (unlikely(++super_counter == RECALC_WRITE_SUPER)) {
2700         recalc_write_super(ic);
2701         if (ic->mode == 'B') {
2702             queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, ic->bitmap_flush_interval);
2703         }
2704         super_counter = 0;
2705     }
2706 
2707     if (unlikely(dm_integrity_failed(ic)))
2708         goto err;
2709 
2710     io_req.bi_opf = REQ_OP_READ;
2711     io_req.mem.type = DM_IO_VMA;
2712     io_req.mem.ptr.addr = ic->recalc_buffer;
2713     io_req.notify.fn = NULL;
2714     io_req.client = ic->io;
2715     io_loc.bdev = ic->dev->bdev;
2716     io_loc.sector = get_data_sector(ic, area, offset);
2717     io_loc.count = n_sectors;
2718 
2719     r = dm_io(&io_req, 1, &io_loc, NULL);
2720     if (unlikely(r)) {
2721         dm_integrity_io_error(ic, "reading data", r);
2722         goto err;
2723     }
2724 
2725     t = ic->recalc_tags;
2726     for (i = 0; i < n_sectors; i += ic->sectors_per_block) {
2727         integrity_sector_checksum(ic, logical_sector + i, ic->recalc_buffer + (i << SECTOR_SHIFT), t);
2728         t += ic->tag_size;
2729     }
2730 
2731     metadata_block = get_metadata_sector_and_offset(ic, area, offset, &metadata_offset);
2732 
2733     r = dm_integrity_rw_tag(ic, ic->recalc_tags, &metadata_block, &metadata_offset, t - ic->recalc_tags, TAG_WRITE);
2734     if (unlikely(r)) {
2735         dm_integrity_io_error(ic, "writing tags", r);
2736         goto err;
2737     }
2738 
2739     if (ic->mode == 'B') {
2740         sector_t start, end;
2741         start = (range.logical_sector >>
2742              (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit)) <<
2743             (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit);
2744         end = ((range.logical_sector + range.n_sectors) >>
2745                (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit)) <<
2746             (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit);
2747         block_bitmap_op(ic, ic->recalc_bitmap, start, end - start, BITMAP_OP_CLEAR);
2748     }
2749 
2750 advance_and_next:
2751     cond_resched();
2752 
2753     spin_lock_irq(&ic->endio_wait.lock);
2754     remove_range_unlocked(ic, &range);
2755     ic->sb->recalc_sector = cpu_to_le64(range.logical_sector + range.n_sectors);
2756     goto next_chunk;
2757 
2758 err:
2759     remove_range(ic, &range);
2760     return;
2761 
2762 unlock_ret:
2763     spin_unlock_irq(&ic->endio_wait.lock);
2764 
2765     recalc_write_super(ic);
2766 }
2767 
2768 static void bitmap_block_work(struct work_struct *w)
2769 {
2770     struct bitmap_block_status *bbs = container_of(w, struct bitmap_block_status, work);
2771     struct dm_integrity_c *ic = bbs->ic;
2772     struct bio *bio;
2773     struct bio_list bio_queue;
2774     struct bio_list waiting;
2775 
2776     bio_list_init(&waiting);
2777 
2778     spin_lock(&bbs->bio_queue_lock);
2779     bio_queue = bbs->bio_queue;
2780     bio_list_init(&bbs->bio_queue);
2781     spin_unlock(&bbs->bio_queue_lock);
2782 
2783     while ((bio = bio_list_pop(&bio_queue))) {
2784         struct dm_integrity_io *dio;
2785 
2786         dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io));
2787 
2788         if (block_bitmap_op(ic, ic->may_write_bitmap, dio->range.logical_sector,
2789                     dio->range.n_sectors, BITMAP_OP_TEST_ALL_SET)) {
2790             remove_range(ic, &dio->range);
2791             INIT_WORK(&dio->work, integrity_bio_wait);
2792             queue_work(ic->offload_wq, &dio->work);
2793         } else {
2794             block_bitmap_op(ic, ic->journal, dio->range.logical_sector,
2795                     dio->range.n_sectors, BITMAP_OP_SET);
2796             bio_list_add(&waiting, bio);
2797         }
2798     }
2799 
2800     if (bio_list_empty(&waiting))
2801         return;
2802 
2803     rw_journal_sectors(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC,
2804                bbs->idx * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT),
2805                BITMAP_BLOCK_SIZE >> SECTOR_SHIFT, NULL);
2806 
2807     while ((bio = bio_list_pop(&waiting))) {
2808         struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io));
2809 
2810         block_bitmap_op(ic, ic->may_write_bitmap, dio->range.logical_sector,
2811                 dio->range.n_sectors, BITMAP_OP_SET);
2812 
2813         remove_range(ic, &dio->range);
2814         INIT_WORK(&dio->work, integrity_bio_wait);
2815         queue_work(ic->offload_wq, &dio->work);
2816     }
2817 
2818     queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, ic->bitmap_flush_interval);
2819 }
2820 
2821 static void bitmap_flush_work(struct work_struct *work)
2822 {
2823     struct dm_integrity_c *ic = container_of(work, struct dm_integrity_c, bitmap_flush_work.work);
2824     struct dm_integrity_range range;
2825     unsigned long limit;
2826     struct bio *bio;
2827 
2828     dm_integrity_flush_buffers(ic, false);
2829 
2830     range.logical_sector = 0;
2831     range.n_sectors = ic->provided_data_sectors;
2832 
2833     spin_lock_irq(&ic->endio_wait.lock);
2834     add_new_range_and_wait(ic, &range);
2835     spin_unlock_irq(&ic->endio_wait.lock);
2836 
2837     dm_integrity_flush_buffers(ic, true);
2838 
2839     limit = ic->provided_data_sectors;
2840     if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) {
2841         limit = le64_to_cpu(ic->sb->recalc_sector)
2842             >> (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit)
2843             << (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit);
2844     }
2845     /*DEBUG_print("zeroing journal\n");*/
2846     block_bitmap_op(ic, ic->journal, 0, limit, BITMAP_OP_CLEAR);
2847     block_bitmap_op(ic, ic->may_write_bitmap, 0, limit, BITMAP_OP_CLEAR);
2848 
2849     rw_journal_sectors(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, 0,
2850                ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL);
2851 
2852     spin_lock_irq(&ic->endio_wait.lock);
2853     remove_range_unlocked(ic, &range);
2854     while (unlikely((bio = bio_list_pop(&ic->synchronous_bios)) != NULL)) {
2855         bio_endio(bio);
2856         spin_unlock_irq(&ic->endio_wait.lock);
2857         spin_lock_irq(&ic->endio_wait.lock);
2858     }
2859     spin_unlock_irq(&ic->endio_wait.lock);
2860 }
2861 
2862 
2863 static void init_journal(struct dm_integrity_c *ic, unsigned start_section,
2864              unsigned n_sections, unsigned char commit_seq)
2865 {
2866     unsigned i, j, n;
2867 
2868     if (!n_sections)
2869         return;
2870 
2871     for (n = 0; n < n_sections; n++) {
2872         i = start_section + n;
2873         wraparound_section(ic, &i);
2874         for (j = 0; j < ic->journal_section_sectors; j++) {
2875             struct journal_sector *js = access_journal(ic, i, j);
2876             BUILD_BUG_ON(sizeof(js->sectors) != JOURNAL_SECTOR_DATA);
2877             memset(&js->sectors, 0, sizeof(js->sectors));
2878             js->commit_id = dm_integrity_commit_id(ic, i, j, commit_seq);
2879         }
2880         for (j = 0; j < ic->journal_section_entries; j++) {
2881             struct journal_entry *je = access_journal_entry(ic, i, j);
2882             journal_entry_set_unused(je);
2883         }
2884     }
2885 
2886     write_journal(ic, start_section, n_sections);
2887 }
2888 
2889 static int find_commit_seq(struct dm_integrity_c *ic, unsigned i, unsigned j, commit_id_t id)
2890 {
2891     unsigned char k;
2892     for (k = 0; k < N_COMMIT_IDS; k++) {
2893         if (dm_integrity_commit_id(ic, i, j, k) == id)
2894             return k;
2895     }
2896     dm_integrity_io_error(ic, "journal commit id", -EIO);
2897     return -EIO;
2898 }
2899 
2900 static void replay_journal(struct dm_integrity_c *ic)
2901 {
2902     unsigned i, j;
2903     bool used_commit_ids[N_COMMIT_IDS];
2904     unsigned max_commit_id_sections[N_COMMIT_IDS];
2905     unsigned write_start, write_sections;
2906     unsigned continue_section;
2907     bool journal_empty;
2908     unsigned char unused, last_used, want_commit_seq;
2909 
2910     if (ic->mode == 'R')
2911         return;
2912 
2913     if (ic->journal_uptodate)
2914         return;
2915 
2916     last_used = 0;
2917     write_start = 0;
2918 
2919     if (!ic->just_formatted) {
2920         DEBUG_print("reading journal\n");
2921         rw_journal(ic, REQ_OP_READ, 0, ic->journal_sections, NULL);
2922         if (ic->journal_io)
2923             DEBUG_bytes(lowmem_page_address(ic->journal_io[0].page), 64, "read journal");
2924         if (ic->journal_io) {
2925             struct journal_completion crypt_comp;
2926             crypt_comp.ic = ic;
2927             init_completion(&crypt_comp.comp);
2928             crypt_comp.in_flight = (atomic_t)ATOMIC_INIT(0);
2929             encrypt_journal(ic, false, 0, ic->journal_sections, &crypt_comp);
2930             wait_for_completion(&crypt_comp.comp);
2931         }
2932         DEBUG_bytes(lowmem_page_address(ic->journal[0].page), 64, "decrypted journal");
2933     }
2934 
2935     if (dm_integrity_failed(ic))
2936         goto clear_journal;
2937 
2938     journal_empty = true;
2939     memset(used_commit_ids, 0, sizeof used_commit_ids);
2940     memset(max_commit_id_sections, 0, sizeof max_commit_id_sections);
2941     for (i = 0; i < ic->journal_sections; i++) {
2942         for (j = 0; j < ic->journal_section_sectors; j++) {
2943             int k;
2944             struct journal_sector *js = access_journal(ic, i, j);
2945             k = find_commit_seq(ic, i, j, js->commit_id);
2946             if (k < 0)
2947                 goto clear_journal;
2948             used_commit_ids[k] = true;
2949             max_commit_id_sections[k] = i;
2950         }
2951         if (journal_empty) {
2952             for (j = 0; j < ic->journal_section_entries; j++) {
2953                 struct journal_entry *je = access_journal_entry(ic, i, j);
2954                 if (!journal_entry_is_unused(je)) {
2955                     journal_empty = false;
2956                     break;
2957                 }
2958             }
2959         }
2960     }
2961 
2962     if (!used_commit_ids[N_COMMIT_IDS - 1]) {
2963         unused = N_COMMIT_IDS - 1;
2964         while (unused && !used_commit_ids[unused - 1])
2965             unused--;
2966     } else {
2967         for (unused = 0; unused < N_COMMIT_IDS; unused++)
2968             if (!used_commit_ids[unused])
2969                 break;
2970         if (unused == N_COMMIT_IDS) {
2971             dm_integrity_io_error(ic, "journal commit ids", -EIO);
2972             goto clear_journal;
2973         }
2974     }
2975     DEBUG_print("first unused commit seq %d [%d,%d,%d,%d]\n",
2976             unused, used_commit_ids[0], used_commit_ids[1],
2977             used_commit_ids[2], used_commit_ids[3]);
2978 
2979     last_used = prev_commit_seq(unused);
2980     want_commit_seq = prev_commit_seq(last_used);
2981 
2982     if (!used_commit_ids[want_commit_seq] && used_commit_ids[prev_commit_seq(want_commit_seq)])
2983         journal_empty = true;
2984 
2985     write_start = max_commit_id_sections[last_used] + 1;
2986     if (unlikely(write_start >= ic->journal_sections))
2987         want_commit_seq = next_commit_seq(want_commit_seq);
2988     wraparound_section(ic, &write_start);
2989 
2990     i = write_start;
2991     for (write_sections = 0; write_sections < ic->journal_sections; write_sections++) {
2992         for (j = 0; j < ic->journal_section_sectors; j++) {
2993             struct journal_sector *js = access_journal(ic, i, j);
2994 
2995             if (js->commit_id != dm_integrity_commit_id(ic, i, j, want_commit_seq)) {
2996                 /*
2997                  * This could be caused by crash during writing.
2998                  * We won't replay the inconsistent part of the
2999                  * journal.
3000                  */
3001                 DEBUG_print("commit id mismatch at position (%u, %u): %d != %d\n",
3002                         i, j, find_commit_seq(ic, i, j, js->commit_id), want_commit_seq);
3003                 goto brk;
3004             }
3005         }
3006         i++;
3007         if (unlikely(i >= ic->journal_sections))
3008             want_commit_seq = next_commit_seq(want_commit_seq);
3009         wraparound_section(ic, &i);
3010     }
3011 brk:
3012 
3013     if (!journal_empty) {
3014         DEBUG_print("replaying %u sections, starting at %u, commit seq %d\n",
3015                 write_sections, write_start, want_commit_seq);
3016         do_journal_write(ic, write_start, write_sections, true);
3017     }
3018 
3019     if (write_sections == ic->journal_sections && (ic->mode == 'J' || journal_empty)) {
3020         continue_section = write_start;
3021         ic->commit_seq = want_commit_seq;
3022         DEBUG_print("continuing from section %u, commit seq %d\n", write_start, ic->commit_seq);
3023     } else {
3024         unsigned s;
3025         unsigned char erase_seq;
3026 clear_journal:
3027         DEBUG_print("clearing journal\n");
3028 
3029         erase_seq = prev_commit_seq(prev_commit_seq(last_used));
3030         s = write_start;
3031         init_journal(ic, s, 1, erase_seq);
3032         s++;
3033         wraparound_section(ic, &s);
3034         if (ic->journal_sections >= 2) {
3035             init_journal(ic, s, ic->journal_sections - 2, erase_seq);
3036             s += ic->journal_sections - 2;
3037             wraparound_section(ic, &s);
3038             init_journal(ic, s, 1, erase_seq);
3039         }
3040 
3041         continue_section = 0;
3042         ic->commit_seq = next_commit_seq(erase_seq);
3043     }
3044 
3045     ic->committed_section = continue_section;
3046     ic->n_committed_sections = 0;
3047 
3048     ic->uncommitted_section = continue_section;
3049     ic->n_uncommitted_sections = 0;
3050 
3051     ic->free_section = continue_section;
3052     ic->free_section_entry = 0;
3053     ic->free_sectors = ic->journal_entries;
3054 
3055     ic->journal_tree_root = RB_ROOT;
3056     for (i = 0; i < ic->journal_entries; i++)
3057         init_journal_node(&ic->journal_tree[i]);
3058 }
3059 
3060 static void dm_integrity_enter_synchronous_mode(struct dm_integrity_c *ic)
3061 {
3062     DEBUG_print("dm_integrity_enter_synchronous_mode\n");
3063 
3064     if (ic->mode == 'B') {
3065         ic->bitmap_flush_interval = msecs_to_jiffies(10) + 1;
3066         ic->synchronous_mode = 1;
3067 
3068         cancel_delayed_work_sync(&ic->bitmap_flush_work);
3069         queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, 0);
3070         flush_workqueue(ic->commit_wq);
3071     }
3072 }
3073 
3074 static int dm_integrity_reboot(struct notifier_block *n, unsigned long code, void *x)
3075 {
3076     struct dm_integrity_c *ic = container_of(n, struct dm_integrity_c, reboot_notifier);
3077 
3078     DEBUG_print("dm_integrity_reboot\n");
3079 
3080     dm_integrity_enter_synchronous_mode(ic);
3081 
3082     return NOTIFY_DONE;
3083 }
3084 
3085 static void dm_integrity_postsuspend(struct dm_target *ti)
3086 {
3087     struct dm_integrity_c *ic = (struct dm_integrity_c *)ti->private;
3088     int r;
3089 
3090     WARN_ON(unregister_reboot_notifier(&ic->reboot_notifier));
3091 
3092     del_timer_sync(&ic->autocommit_timer);
3093 
3094     if (ic->recalc_wq)
3095         drain_workqueue(ic->recalc_wq);
3096 
3097     if (ic->mode == 'B')
3098         cancel_delayed_work_sync(&ic->bitmap_flush_work);
3099 
3100     queue_work(ic->commit_wq, &ic->commit_work);
3101     drain_workqueue(ic->commit_wq);
3102 
3103     if (ic->mode == 'J') {
3104         if (ic->meta_dev)
3105             queue_work(ic->writer_wq, &ic->writer_work);
3106         drain_workqueue(ic->writer_wq);
3107         dm_integrity_flush_buffers(ic, true);
3108     }
3109 
3110     if (ic->mode == 'B') {
3111         dm_integrity_flush_buffers(ic, true);
3112 #if 1
3113         /* set to 0 to test bitmap replay code */
3114         init_journal(ic, 0, ic->journal_sections, 0);
3115         ic->sb->flags &= ~cpu_to_le32(SB_FLAG_DIRTY_BITMAP);
3116         r = sync_rw_sb(ic, REQ_OP_WRITE | REQ_FUA);
3117         if (unlikely(r))
3118             dm_integrity_io_error(ic, "writing superblock", r);
3119 #endif
3120     }
3121 
3122     BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress));
3123 
3124     ic->journal_uptodate = true;
3125 }
3126 
3127 static void dm_integrity_resume(struct dm_target *ti)
3128 {
3129     struct dm_integrity_c *ic = (struct dm_integrity_c *)ti->private;
3130     __u64 old_provided_data_sectors = le64_to_cpu(ic->sb->provided_data_sectors);
3131     int r;
3132 
3133     DEBUG_print("resume\n");
3134 
3135     if (ic->provided_data_sectors != old_provided_data_sectors) {
3136         if (ic->provided_data_sectors > old_provided_data_sectors &&
3137             ic->mode == 'B' &&
3138             ic->sb->log2_blocks_per_bitmap_bit == ic->log2_blocks_per_bitmap_bit) {
3139             rw_journal_sectors(ic, REQ_OP_READ, 0,
3140                        ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL);
3141             block_bitmap_op(ic, ic->journal, old_provided_data_sectors,
3142                     ic->provided_data_sectors - old_provided_data_sectors, BITMAP_OP_SET);
3143             rw_journal_sectors(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, 0,
3144                        ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL);
3145         }
3146 
3147         ic->sb->provided_data_sectors = cpu_to_le64(ic->provided_data_sectors);
3148         r = sync_rw_sb(ic, REQ_OP_WRITE | REQ_FUA);
3149         if (unlikely(r))
3150             dm_integrity_io_error(ic, "writing superblock", r);
3151     }
3152 
3153     if (ic->sb->flags & cpu_to_le32(SB_FLAG_DIRTY_BITMAP)) {
3154         DEBUG_print("resume dirty_bitmap\n");
3155         rw_journal_sectors(ic, REQ_OP_READ, 0,
3156                    ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL);
3157         if (ic->mode == 'B') {
3158             if (ic->sb->log2_blocks_per_bitmap_bit == ic->log2_blocks_per_bitmap_bit &&
3159                 !ic->reset_recalculate_flag) {
3160                 block_bitmap_copy(ic, ic->recalc_bitmap, ic->journal);
3161                 block_bitmap_copy(ic, ic->may_write_bitmap, ic->journal);
3162                 if (!block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors,
3163                              BITMAP_OP_TEST_ALL_CLEAR)) {
3164                     ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING);
3165                     ic->sb->recalc_sector = cpu_to_le64(0);
3166                 }
3167             } else {
3168                 DEBUG_print("non-matching blocks_per_bitmap_bit: %u, %u\n",
3169                         ic->sb->log2_blocks_per_bitmap_bit, ic->log2_blocks_per_bitmap_bit);
3170                 ic->sb->log2_blocks_per_bitmap_bit = ic->log2_blocks_per_bitmap_bit;
3171                 block_bitmap_op(ic, ic->recalc_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_SET);
3172                 block_bitmap_op(ic, ic->may_write_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_SET);
3173                 block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, BITMAP_OP_SET);
3174                 rw_journal_sectors(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, 0,
3175                            ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL);
3176                 ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING);
3177                 ic->sb->recalc_sector = cpu_to_le64(0);
3178             }
3179         } else {
3180             if (!(ic->sb->log2_blocks_per_bitmap_bit == ic->log2_blocks_per_bitmap_bit &&
3181                   block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, BITMAP_OP_TEST_ALL_CLEAR)) ||
3182                 ic->reset_recalculate_flag) {
3183                 ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING);
3184                 ic->sb->recalc_sector = cpu_to_le64(0);
3185             }
3186             init_journal(ic, 0, ic->journal_sections, 0);
3187             replay_journal(ic);
3188             ic->sb->flags &= ~cpu_to_le32(SB_FLAG_DIRTY_BITMAP);
3189         }
3190         r = sync_rw_sb(ic, REQ_OP_WRITE | REQ_FUA);
3191         if (unlikely(r))
3192             dm_integrity_io_error(ic, "writing superblock", r);
3193     } else {
3194         replay_journal(ic);
3195         if (ic->reset_recalculate_flag) {
3196             ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING);
3197             ic->sb->recalc_sector = cpu_to_le64(0);
3198         }
3199         if (ic->mode == 'B') {
3200             ic->sb->flags |= cpu_to_le32(SB_FLAG_DIRTY_BITMAP);
3201             ic->sb->log2_blocks_per_bitmap_bit = ic->log2_blocks_per_bitmap_bit;
3202             r = sync_rw_sb(ic, REQ_OP_WRITE | REQ_FUA);
3203             if (unlikely(r))
3204                 dm_integrity_io_error(ic, "writing superblock", r);
3205 
3206             block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, BITMAP_OP_CLEAR);
3207             block_bitmap_op(ic, ic->recalc_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_CLEAR);
3208             block_bitmap_op(ic, ic->may_write_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_CLEAR);
3209             if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) &&
3210                 le64_to_cpu(ic->sb->recalc_sector) < ic->provided_data_sectors) {
3211                 block_bitmap_op(ic, ic->journal, le64_to_cpu(ic->sb->recalc_sector),
3212                         ic->provided_data_sectors - le64_to_cpu(ic->sb->recalc_sector), BITMAP_OP_SET);
3213                 block_bitmap_op(ic, ic->recalc_bitmap, le64_to_cpu(ic->sb->recalc_sector),
3214                         ic->provided_data_sectors - le64_to_cpu(ic->sb->recalc_sector), BITMAP_OP_SET);
3215                 block_bitmap_op(ic, ic->may_write_bitmap, le64_to_cpu(ic->sb->recalc_sector),
3216                         ic->provided_data_sectors - le64_to_cpu(ic->sb->recalc_sector), BITMAP_OP_SET);
3217             }
3218             rw_journal_sectors(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, 0,
3219                        ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL);
3220         }
3221     }
3222 
3223     DEBUG_print("testing recalc: %x\n", ic->sb->flags);
3224     if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) {
3225         __u64 recalc_pos = le64_to_cpu(ic->sb->recalc_sector);
3226         DEBUG_print("recalc pos: %llx / %llx\n", recalc_pos, ic->provided_data_sectors);
3227         if (recalc_pos < ic->provided_data_sectors) {
3228             queue_work(ic->recalc_wq, &ic->recalc_work);
3229         } else if (recalc_pos > ic->provided_data_sectors) {
3230             ic->sb->recalc_sector = cpu_to_le64(ic->provided_data_sectors);
3231             recalc_write_super(ic);
3232         }
3233     }
3234 
3235     ic->reboot_notifier.notifier_call = dm_integrity_reboot;
3236     ic->reboot_notifier.next = NULL;
3237     ic->reboot_notifier.priority = INT_MAX - 1; /* be notified after md and before hardware drivers */
3238     WARN_ON(register_reboot_notifier(&ic->reboot_notifier));
3239 
3240 #if 0
3241     /* set to 1 to stress test synchronous mode */
3242     dm_integrity_enter_synchronous_mode(ic);
3243 #endif
3244 }
3245 
3246 static void dm_integrity_status(struct dm_target *ti, status_type_t type,
3247                 unsigned status_flags, char *result, unsigned maxlen)
3248 {
3249     struct dm_integrity_c *ic = (struct dm_integrity_c *)ti->private;
3250     unsigned arg_count;
3251     size_t sz = 0;
3252 
3253     switch (type) {
3254     case STATUSTYPE_INFO:
3255         DMEMIT("%llu %llu",
3256             (unsigned long long)atomic64_read(&ic->number_of_mismatches),
3257             ic->provided_data_sectors);
3258         if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))
3259             DMEMIT(" %llu", le64_to_cpu(ic->sb->recalc_sector));
3260         else
3261             DMEMIT(" -");
3262         break;
3263 
3264     case STATUSTYPE_TABLE: {
3265         __u64 watermark_percentage = (__u64)(ic->journal_entries - ic->free_sectors_threshold) * 100;
3266         watermark_percentage += ic->journal_entries / 2;
3267         do_div(watermark_percentage, ic->journal_entries);
3268         arg_count = 3;
3269         arg_count += !!ic->meta_dev;
3270         arg_count += ic->sectors_per_block != 1;
3271         arg_count += !!(ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING));
3272         arg_count += ic->reset_recalculate_flag;
3273         arg_count += ic->discard;
3274         arg_count += ic->mode == 'J';
3275         arg_count += ic->mode == 'J';
3276         arg_count += ic->mode == 'B';
3277         arg_count += ic->mode == 'B';
3278         arg_count += !!ic->internal_hash_alg.alg_string;
3279         arg_count += !!ic->journal_crypt_alg.alg_string;
3280         arg_count += !!ic->journal_mac_alg.alg_string;
3281         arg_count += (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING)) != 0;
3282         arg_count += (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) != 0;
3283         arg_count += ic->legacy_recalculate;
3284         DMEMIT("%s %llu %u %c %u", ic->dev->name, ic->start,
3285                ic->tag_size, ic->mode, arg_count);
3286         if (ic->meta_dev)
3287             DMEMIT(" meta_device:%s", ic->meta_dev->name);
3288         if (ic->sectors_per_block != 1)
3289             DMEMIT(" block_size:%u", ic->sectors_per_block << SECTOR_SHIFT);
3290         if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))
3291             DMEMIT(" recalculate");
3292         if (ic->reset_recalculate_flag)
3293             DMEMIT(" reset_recalculate");
3294         if (ic->discard)
3295             DMEMIT(" allow_discards");
3296         DMEMIT(" journal_sectors:%u", ic->initial_sectors - SB_SECTORS);
3297         DMEMIT(" interleave_sectors:%u", 1U << ic->sb->log2_interleave_sectors);
3298         DMEMIT(" buffer_sectors:%u", 1U << ic->log2_buffer_sectors);
3299         if (ic->mode == 'J') {
3300             DMEMIT(" journal_watermark:%u", (unsigned)watermark_percentage);
3301             DMEMIT(" commit_time:%u", ic->autocommit_msec);
3302         }
3303         if (ic->mode == 'B') {
3304             DMEMIT(" sectors_per_bit:%llu", (sector_t)ic->sectors_per_block << ic->log2_blocks_per_bitmap_bit);
3305             DMEMIT(" bitmap_flush_interval:%u", jiffies_to_msecs(ic->bitmap_flush_interval));
3306         }
3307         if ((ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING)) != 0)
3308             DMEMIT(" fix_padding");
3309         if ((ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) != 0)
3310             DMEMIT(" fix_hmac");
3311         if (ic->legacy_recalculate)
3312             DMEMIT(" legacy_recalculate");
3313 
3314 #define EMIT_ALG(a, n)                          \
3315         do {                            \
3316             if (ic->a.alg_string) {             \
3317                 DMEMIT(" %s:%s", n, ic->a.alg_string);  \
3318                 if (ic->a.key_string)           \
3319                     DMEMIT(":%s", ic->a.key_string);\
3320             }                       \
3321         } while (0)
3322         EMIT_ALG(internal_hash_alg, "internal_hash");
3323         EMIT_ALG(journal_crypt_alg, "journal_crypt");
3324         EMIT_ALG(journal_mac_alg, "journal_mac");
3325         break;
3326     }
3327     case STATUSTYPE_IMA:
3328         DMEMIT_TARGET_NAME_VERSION(ti->type);
3329         DMEMIT(",dev_name=%s,start=%llu,tag_size=%u,mode=%c",
3330             ic->dev->name, ic->start, ic->tag_size, ic->mode);
3331 
3332         if (ic->meta_dev)
3333             DMEMIT(",meta_device=%s", ic->meta_dev->name);
3334         if (ic->sectors_per_block != 1)
3335             DMEMIT(",block_size=%u", ic->sectors_per_block << SECTOR_SHIFT);
3336 
3337         DMEMIT(",recalculate=%c", (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) ?
3338                'y' : 'n');
3339         DMEMIT(",allow_discards=%c", ic->discard ? 'y' : 'n');
3340         DMEMIT(",fix_padding=%c",
3341                ((ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING)) != 0) ? 'y' : 'n');
3342         DMEMIT(",fix_hmac=%c",
3343                ((ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) != 0) ? 'y' : 'n');
3344         DMEMIT(",legacy_recalculate=%c", ic->legacy_recalculate ? 'y' : 'n');
3345 
3346         DMEMIT(",journal_sectors=%u", ic->initial_sectors - SB_SECTORS);
3347         DMEMIT(",interleave_sectors=%u", 1U << ic->sb->log2_interleave_sectors);
3348         DMEMIT(",buffer_sectors=%u", 1U << ic->log2_buffer_sectors);
3349         DMEMIT(";");
3350         break;
3351     }
3352 }
3353 
3354 static int dm_integrity_iterate_devices(struct dm_target *ti,
3355                     iterate_devices_callout_fn fn, void *data)
3356 {
3357     struct dm_integrity_c *ic = ti->private;
3358 
3359     if (!ic->meta_dev)
3360         return fn(ti, ic->dev, ic->start + ic->initial_sectors + ic->metadata_run, ti->len, data);
3361     else
3362         return fn(ti, ic->dev, 0, ti->len, data);
3363 }
3364 
3365 static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *limits)
3366 {
3367     struct dm_integrity_c *ic = ti->private;
3368 
3369     if (ic->sectors_per_block > 1) {
3370         limits->logical_block_size = ic->sectors_per_block << SECTOR_SHIFT;
3371         limits->physical_block_size = ic->sectors_per_block << SECTOR_SHIFT;
3372         blk_limits_io_min(limits, ic->sectors_per_block << SECTOR_SHIFT);
3373     }
3374 }
3375 
3376 static void calculate_journal_section_size(struct dm_integrity_c *ic)
3377 {
3378     unsigned sector_space = JOURNAL_SECTOR_DATA;
3379 
3380     ic->journal_sections = le32_to_cpu(ic->sb->journal_sections);
3381     ic->journal_entry_size = roundup(offsetof(struct journal_entry, last_bytes[ic->sectors_per_block]) + ic->tag_size,
3382                      JOURNAL_ENTRY_ROUNDUP);
3383 
3384     if (ic->sb->flags & cpu_to_le32(SB_FLAG_HAVE_JOURNAL_MAC))
3385         sector_space -= JOURNAL_MAC_PER_SECTOR;
3386     ic->journal_entries_per_sector = sector_space / ic->journal_entry_size;
3387     ic->journal_section_entries = ic->journal_entries_per_sector * JOURNAL_BLOCK_SECTORS;
3388     ic->journal_section_sectors = (ic->journal_section_entries << ic->sb->log2_sectors_per_block) + JOURNAL_BLOCK_SECTORS;
3389     ic->journal_entries = ic->journal_section_entries * ic->journal_sections;
3390 }
3391 
3392 static int calculate_device_limits(struct dm_integrity_c *ic)
3393 {
3394     __u64 initial_sectors;
3395 
3396     calculate_journal_section_size(ic);
3397     initial_sectors = SB_SECTORS + (__u64)ic->journal_section_sectors * ic->journal_sections;
3398     if (initial_sectors + METADATA_PADDING_SECTORS >= ic->meta_device_sectors || initial_sectors > UINT_MAX)
3399         return -EINVAL;
3400     ic->initial_sectors = initial_sectors;
3401 
3402     if (!ic->meta_dev) {
3403         sector_t last_sector, last_area, last_offset;
3404 
3405         /* we have to maintain excessive padding for compatibility with existing volumes */
3406         __u64 metadata_run_padding =
3407             ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING) ?
3408             (__u64)(METADATA_PADDING_SECTORS << SECTOR_SHIFT) :
3409             (__u64)(1 << SECTOR_SHIFT << METADATA_PADDING_SECTORS);
3410 
3411         ic->metadata_run = round_up((__u64)ic->tag_size << (ic->sb->log2_interleave_sectors - ic->sb->log2_sectors_per_block),
3412                         metadata_run_padding) >> SECTOR_SHIFT;
3413         if (!(ic->metadata_run & (ic->metadata_run - 1)))
3414             ic->log2_metadata_run = __ffs(ic->metadata_run);
3415         else
3416             ic->log2_metadata_run = -1;
3417 
3418         get_area_and_offset(ic, ic->provided_data_sectors - 1, &last_area, &last_offset);
3419         last_sector = get_data_sector(ic, last_area, last_offset);
3420         if (last_sector < ic->start || last_sector >= ic->meta_device_sectors)
3421             return -EINVAL;
3422     } else {
3423         __u64 meta_size = (ic->provided_data_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size;
3424         meta_size = (meta_size + ((1U << (ic->log2_buffer_sectors + SECTOR_SHIFT)) - 1))
3425                 >> (ic->log2_buffer_sectors + SECTOR_SHIFT);
3426         meta_size <<= ic->log2_buffer_sectors;
3427         if (ic->initial_sectors + meta_size < ic->initial_sectors ||
3428             ic->initial_sectors + meta_size > ic->meta_device_sectors)
3429             return -EINVAL;
3430         ic->metadata_run = 1;
3431         ic->log2_metadata_run = 0;
3432     }
3433 
3434     return 0;
3435 }
3436 
3437 static void get_provided_data_sectors(struct dm_integrity_c *ic)
3438 {
3439     if (!ic->meta_dev) {
3440         int test_bit;
3441         ic->provided_data_sectors = 0;
3442         for (test_bit = fls64(ic->meta_device_sectors) - 1; test_bit >= 3; test_bit--) {
3443             __u64 prev_data_sectors = ic->provided_data_sectors;
3444 
3445             ic->provided_data_sectors |= (sector_t)1 << test_bit;
3446             if (calculate_device_limits(ic))
3447                 ic->provided_data_sectors = prev_data_sectors;
3448         }
3449     } else {
3450         ic->provided_data_sectors = ic->data_device_sectors;
3451         ic->provided_data_sectors &= ~(sector_t)(ic->sectors_per_block - 1);
3452     }
3453 }
3454 
3455 static int initialize_superblock(struct dm_integrity_c *ic, unsigned journal_sectors, unsigned interleave_sectors)
3456 {
3457     unsigned journal_sections;
3458     int test_bit;
3459 
3460     memset(ic->sb, 0, SB_SECTORS << SECTOR_SHIFT);
3461     memcpy(ic->sb->magic, SB_MAGIC, 8);
3462     ic->sb->integrity_tag_size = cpu_to_le16(ic->tag_size);
3463     ic->sb->log2_sectors_per_block = __ffs(ic->sectors_per_block);
3464     if (ic->journal_mac_alg.alg_string)
3465         ic->sb->flags |= cpu_to_le32(SB_FLAG_HAVE_JOURNAL_MAC);
3466 
3467     calculate_journal_section_size(ic);
3468     journal_sections = journal_sectors / ic->journal_section_sectors;
3469     if (!journal_sections)
3470         journal_sections = 1;
3471 
3472     if (ic->fix_hmac && (ic->internal_hash_alg.alg_string || ic->journal_mac_alg.alg_string)) {
3473         ic->sb->flags |= cpu_to_le32(SB_FLAG_FIXED_HMAC);
3474         get_random_bytes(ic->sb->salt, SALT_SIZE);
3475     }
3476 
3477     if (!ic->meta_dev) {
3478         if (ic->fix_padding)
3479             ic->sb->flags |= cpu_to_le32(SB_FLAG_FIXED_PADDING);
3480         ic->sb->journal_sections = cpu_to_le32(journal_sections);
3481         if (!interleave_sectors)
3482             interleave_sectors = DEFAULT_INTERLEAVE_SECTORS;
3483         ic->sb->log2_interleave_sectors = __fls(interleave_sectors);
3484         ic->sb->log2_interleave_sectors = max((__u8)MIN_LOG2_INTERLEAVE_SECTORS, ic->sb->log2_interleave_sectors);
3485         ic->sb->log2_interleave_sectors = min((__u8)MAX_LOG2_INTERLEAVE_SECTORS, ic->sb->log2_interleave_sectors);
3486 
3487         get_provided_data_sectors(ic);
3488         if (!ic->provided_data_sectors)
3489             return -EINVAL;
3490     } else {
3491         ic->sb->log2_interleave_sectors = 0;
3492 
3493         get_provided_data_sectors(ic);
3494         if (!ic->provided_data_sectors)
3495             return -EINVAL;
3496 
3497 try_smaller_buffer:
3498         ic->sb->journal_sections = cpu_to_le32(0);
3499         for (test_bit = fls(journal_sections) - 1; test_bit >= 0; test_bit--) {
3500             __u32 prev_journal_sections = le32_to_cpu(ic->sb->journal_sections);
3501             __u32 test_journal_sections = prev_journal_sections | (1U << test_bit);
3502             if (test_journal_sections > journal_sections)
3503                 continue;
3504             ic->sb->journal_sections = cpu_to_le32(test_journal_sections);
3505             if (calculate_device_limits(ic))
3506                 ic->sb->journal_sections = cpu_to_le32(prev_journal_sections);
3507 
3508         }
3509         if (!le32_to_cpu(ic->sb->journal_sections)) {
3510             if (ic->log2_buffer_sectors > 3) {
3511                 ic->log2_buffer_sectors--;
3512                 goto try_smaller_buffer;
3513             }
3514             return -EINVAL;
3515         }
3516     }
3517 
3518     ic->sb->provided_data_sectors = cpu_to_le64(ic->provided_data_sectors);
3519 
3520     sb_set_version(ic);
3521 
3522     return 0;
3523 }
3524 
3525 static void dm_integrity_set(struct dm_target *ti, struct dm_integrity_c *ic)
3526 {
3527     struct gendisk *disk = dm_disk(dm_table_get_md(ti->table));
3528     struct blk_integrity bi;
3529 
3530     memset(&bi, 0, sizeof(bi));
3531     bi.profile = &dm_integrity_profile;
3532     bi.tuple_size = ic->tag_size;
3533     bi.tag_size = bi.tuple_size;
3534     bi.interval_exp = ic->sb->log2_sectors_per_block + SECTOR_SHIFT;
3535 
3536     blk_integrity_register(disk, &bi);
3537     blk_queue_max_integrity_segments(disk->queue, UINT_MAX);
3538 }
3539 
3540 static void dm_integrity_free_page_list(struct page_list *pl)
3541 {
3542     unsigned i;
3543 
3544     if (!pl)
3545         return;
3546     for (i = 0; pl[i].page; i++)
3547         __free_page(pl[i].page);
3548     kvfree(pl);
3549 }
3550 
3551 static struct page_list *dm_integrity_alloc_page_list(unsigned n_pages)
3552 {
3553     struct page_list *pl;
3554     unsigned i;
3555 
3556     pl = kvmalloc_array(n_pages + 1, sizeof(struct page_list), GFP_KERNEL | __GFP_ZERO);
3557     if (!pl)
3558         return NULL;
3559 
3560     for (i = 0; i < n_pages; i++) {
3561         pl[i].page = alloc_page(GFP_KERNEL);
3562         if (!pl[i].page) {
3563             dm_integrity_free_page_list(pl);
3564             return NULL;
3565         }
3566         if (i)
3567             pl[i - 1].next = &pl[i];
3568     }
3569     pl[i].page = NULL;
3570     pl[i].next = NULL;
3571 
3572     return pl;
3573 }
3574 
3575 static void dm_integrity_free_journal_scatterlist(struct dm_integrity_c *ic, struct scatterlist **sl)
3576 {
3577     unsigned i;
3578     for (i = 0; i < ic->journal_sections; i++)
3579         kvfree(sl[i]);
3580     kvfree(sl);
3581 }
3582 
3583 static struct scatterlist **dm_integrity_alloc_journal_scatterlist(struct dm_integrity_c *ic,
3584                                    struct page_list *pl)
3585 {
3586     struct scatterlist **sl;
3587     unsigned i;
3588 
3589     sl = kvmalloc_array(ic->journal_sections,
3590                 sizeof(struct scatterlist *),
3591                 GFP_KERNEL | __GFP_ZERO);
3592     if (!sl)
3593         return NULL;
3594 
3595     for (i = 0; i < ic->journal_sections; i++) {
3596         struct scatterlist *s;
3597         unsigned start_index, start_offset;
3598         unsigned end_index, end_offset;
3599         unsigned n_pages;
3600         unsigned idx;
3601 
3602         page_list_location(ic, i, 0, &start_index, &start_offset);
3603         page_list_location(ic, i, ic->journal_section_sectors - 1,
3604                    &end_index, &end_offset);
3605 
3606         n_pages = (end_index - start_index + 1);
3607 
3608         s = kvmalloc_array(n_pages, sizeof(struct scatterlist),
3609                    GFP_KERNEL);
3610         if (!s) {
3611             dm_integrity_free_journal_scatterlist(ic, sl);
3612             return NULL;
3613         }
3614 
3615         sg_init_table(s, n_pages);
3616         for (idx = start_index; idx <= end_index; idx++) {
3617             char *va = lowmem_page_address(pl[idx].page);
3618             unsigned start = 0, end = PAGE_SIZE;
3619             if (idx == start_index)
3620                 start = start_offset;
3621             if (idx == end_index)
3622                 end = end_offset + (1 << SECTOR_SHIFT);
3623             sg_set_buf(&s[idx - start_index], va + start, end - start);
3624         }
3625 
3626         sl[i] = s;
3627     }
3628 
3629     return sl;
3630 }
3631 
3632 static void free_alg(struct alg_spec *a)
3633 {
3634     kfree_sensitive(a->alg_string);
3635     kfree_sensitive(a->key);
3636     memset(a, 0, sizeof *a);
3637 }
3638 
3639 static int get_alg_and_key(const char *arg, struct alg_spec *a, char **error, char *error_inval)
3640 {
3641     char *k;
3642 
3643     free_alg(a);
3644 
3645     a->alg_string = kstrdup(strchr(arg, ':') + 1, GFP_KERNEL);
3646     if (!a->alg_string)
3647         goto nomem;
3648 
3649     k = strchr(a->alg_string, ':');
3650     if (k) {
3651         *k = 0;
3652         a->key_string = k + 1;
3653         if (strlen(a->key_string) & 1)
3654             goto inval;
3655 
3656         a->key_size = strlen(a->key_string) / 2;
3657         a->key = kmalloc(a->key_size, GFP_KERNEL);
3658         if (!a->key)
3659             goto nomem;
3660         if (hex2bin(a->key, a->key_string, a->key_size))
3661             goto inval;
3662     }
3663 
3664     return 0;
3665 inval:
3666     *error = error_inval;
3667     return -EINVAL;
3668 nomem:
3669     *error = "Out of memory for an argument";
3670     return -ENOMEM;
3671 }
3672 
3673 static int get_mac(struct crypto_shash **hash, struct alg_spec *a, char **error,
3674            char *error_alg, char *error_key)
3675 {
3676     int r;
3677 
3678     if (a->alg_string) {
3679         *hash = crypto_alloc_shash(a->alg_string, 0, CRYPTO_ALG_ALLOCATES_MEMORY);
3680         if (IS_ERR(*hash)) {
3681             *error = error_alg;
3682             r = PTR_ERR(*hash);
3683             *hash = NULL;
3684             return r;
3685         }
3686 
3687         if (a->key) {
3688             r = crypto_shash_setkey(*hash, a->key, a->key_size);
3689             if (r) {
3690                 *error = error_key;
3691                 return r;
3692             }
3693         } else if (crypto_shash_get_flags(*hash) & CRYPTO_TFM_NEED_KEY) {
3694             *error = error_key;
3695             return -ENOKEY;
3696         }
3697     }
3698 
3699     return 0;
3700 }
3701 
3702 static int create_journal(struct dm_integrity_c *ic, char **error)
3703 {
3704     int r = 0;
3705     unsigned i;
3706     __u64 journal_pages, journal_desc_size, journal_tree_size;
3707     unsigned char *crypt_data = NULL, *crypt_iv = NULL;
3708     struct skcipher_request *req = NULL;
3709 
3710     ic->commit_ids[0] = cpu_to_le64(0x1111111111111111ULL);
3711     ic->commit_ids[1] = cpu_to_le64(0x2222222222222222ULL);
3712     ic->commit_ids[2] = cpu_to_le64(0x3333333333333333ULL);
3713     ic->commit_ids[3] = cpu_to_le64(0x4444444444444444ULL);
3714 
3715     journal_pages = roundup((__u64)ic->journal_sections * ic->journal_section_sectors,
3716                 PAGE_SIZE >> SECTOR_SHIFT) >> (PAGE_SHIFT - SECTOR_SHIFT);
3717     journal_desc_size = journal_pages * sizeof(struct page_list);
3718     if (journal_pages >= totalram_pages() - totalhigh_pages() || journal_desc_size > ULONG_MAX) {
3719         *error = "Journal doesn't fit into memory";
3720         r = -ENOMEM;
3721         goto bad;
3722     }
3723     ic->journal_pages = journal_pages;
3724 
3725     ic->journal = dm_integrity_alloc_page_list(ic->journal_pages);
3726     if (!ic->journal) {
3727         *error = "Could not allocate memory for journal";
3728         r = -ENOMEM;
3729         goto bad;
3730     }
3731     if (ic->journal_crypt_alg.alg_string) {
3732         unsigned ivsize, blocksize;
3733         struct journal_completion comp;
3734 
3735         comp.ic = ic;
3736         ic->journal_crypt = crypto_alloc_skcipher(ic->journal_crypt_alg.alg_string, 0, CRYPTO_ALG_ALLOCATES_MEMORY);
3737         if (IS_ERR(ic->journal_crypt)) {
3738             *error = "Invalid journal cipher";
3739             r = PTR_ERR(ic->journal_crypt);
3740             ic->journal_crypt = NULL;
3741             goto bad;
3742         }
3743         ivsize = crypto_skcipher_ivsize(ic->journal_crypt);
3744         blocksize = crypto_skcipher_blocksize(ic->journal_crypt);
3745 
3746         if (ic->journal_crypt_alg.key) {
3747             r = crypto_skcipher_setkey(ic->journal_crypt, ic->journal_crypt_alg.key,
3748                            ic->journal_crypt_alg.key_size);
3749             if (r) {
3750                 *error = "Error setting encryption key";
3751                 goto bad;
3752             }
3753         }
3754         DEBUG_print("cipher %s, block size %u iv size %u\n",
3755                 ic->journal_crypt_alg.alg_string, blocksize, ivsize);
3756 
3757         ic->journal_io = dm_integrity_alloc_page_list(ic->journal_pages);
3758         if (!ic->journal_io) {
3759             *error = "Could not allocate memory for journal io";
3760             r = -ENOMEM;
3761             goto bad;
3762         }
3763 
3764         if (blocksize == 1) {
3765             struct scatterlist *sg;
3766 
3767             req = skcipher_request_alloc(ic->journal_crypt, GFP_KERNEL);
3768             if (!req) {
3769                 *error = "Could not allocate crypt request";
3770                 r = -ENOMEM;
3771                 goto bad;
3772             }
3773 
3774             crypt_iv = kzalloc(ivsize, GFP_KERNEL);
3775             if (!crypt_iv) {
3776                 *error = "Could not allocate iv";
3777                 r = -ENOMEM;
3778                 goto bad;
3779             }
3780 
3781             ic->journal_xor = dm_integrity_alloc_page_list(ic->journal_pages);
3782             if (!ic->journal_xor) {
3783                 *error = "Could not allocate memory for journal xor";
3784                 r = -ENOMEM;
3785                 goto bad;
3786             }
3787 
3788             sg = kvmalloc_array(ic->journal_pages + 1,
3789                         sizeof(struct scatterlist),
3790                         GFP_KERNEL);
3791             if (!sg) {
3792                 *error = "Unable to allocate sg list";
3793                 r = -ENOMEM;
3794                 goto bad;
3795             }
3796             sg_init_table(sg, ic->journal_pages + 1);
3797             for (i = 0; i < ic->journal_pages; i++) {
3798                 char *va = lowmem_page_address(ic->journal_xor[i].page);
3799                 clear_page(va);
3800                 sg_set_buf(&sg[i], va, PAGE_SIZE);
3801             }
3802             sg_set_buf(&sg[i], &ic->commit_ids, sizeof ic->commit_ids);
3803 
3804             skcipher_request_set_crypt(req, sg, sg,
3805                            PAGE_SIZE * ic->journal_pages + sizeof ic->commit_ids, crypt_iv);
3806             init_completion(&comp.comp);
3807             comp.in_flight = (atomic_t)ATOMIC_INIT(1);
3808             if (do_crypt(true, req, &comp))
3809                 wait_for_completion(&comp.comp);
3810             kvfree(sg);
3811             r = dm_integrity_failed(ic);
3812             if (r) {
3813                 *error = "Unable to encrypt journal";
3814                 goto bad;
3815             }
3816             DEBUG_bytes(lowmem_page_address(ic->journal_xor[0].page), 64, "xor data");
3817 
3818             crypto_free_skcipher(ic->journal_crypt);
3819             ic->journal_crypt = NULL;
3820         } else {
3821             unsigned crypt_len = roundup(ivsize, blocksize);
3822 
3823             req = skcipher_request_alloc(ic->journal_crypt, GFP_KERNEL);
3824             if (!req) {
3825                 *error = "Could not allocate crypt request";
3826                 r = -ENOMEM;
3827                 goto bad;
3828             }
3829 
3830             crypt_iv = kmalloc(ivsize, GFP_KERNEL);
3831             if (!crypt_iv) {
3832                 *error = "Could not allocate iv";
3833                 r = -ENOMEM;
3834                 goto bad;
3835             }
3836 
3837             crypt_data = kmalloc(crypt_len, GFP_KERNEL);
3838             if (!crypt_data) {
3839                 *error = "Unable to allocate crypt data";
3840                 r = -ENOMEM;
3841                 goto bad;
3842             }
3843 
3844             ic->journal_scatterlist = dm_integrity_alloc_journal_scatterlist(ic, ic->journal);
3845             if (!ic->journal_scatterlist) {
3846                 *error = "Unable to allocate sg list";
3847                 r = -ENOMEM;
3848                 goto bad;
3849             }
3850             ic->journal_io_scatterlist = dm_integrity_alloc_journal_scatterlist(ic, ic->journal_io);
3851             if (!ic->journal_io_scatterlist) {
3852                 *error = "Unable to allocate sg list";
3853                 r = -ENOMEM;
3854                 goto bad;
3855             }
3856             ic->sk_requests = kvmalloc_array(ic->journal_sections,
3857                              sizeof(struct skcipher_request *),
3858                              GFP_KERNEL | __GFP_ZERO);
3859             if (!ic->sk_requests) {
3860                 *error = "Unable to allocate sk requests";
3861                 r = -ENOMEM;
3862                 goto bad;
3863             }
3864             for (i = 0; i < ic->journal_sections; i++) {
3865                 struct scatterlist sg;
3866                 struct skcipher_request *section_req;
3867                 __le32 section_le = cpu_to_le32(i);
3868 
3869                 memset(crypt_iv, 0x00, ivsize);
3870                 memset(crypt_data, 0x00, crypt_len);
3871                 memcpy(crypt_data, &section_le, min((size_t)crypt_len, sizeof(section_le)));
3872 
3873                 sg_init_one(&sg, crypt_data, crypt_len);
3874                 skcipher_request_set_crypt(req, &sg, &sg, crypt_len, crypt_iv);
3875                 init_completion(&comp.comp);
3876                 comp.in_flight = (atomic_t)ATOMIC_INIT(1);
3877                 if (do_crypt(true, req, &comp))
3878                     wait_for_completion(&comp.comp);
3879 
3880                 r = dm_integrity_failed(ic);
3881                 if (r) {
3882                     *error = "Unable to generate iv";
3883                     goto bad;
3884                 }
3885 
3886                 section_req = skcipher_request_alloc(ic->journal_crypt, GFP_KERNEL);
3887                 if (!section_req) {
3888                     *error = "Unable to allocate crypt request";
3889                     r = -ENOMEM;
3890                     goto bad;
3891                 }
3892                 section_req->iv = kmalloc_array(ivsize, 2,
3893                                 GFP_KERNEL);
3894                 if (!section_req->iv) {
3895                     skcipher_request_free(section_req);
3896                     *error = "Unable to allocate iv";
3897                     r = -ENOMEM;
3898                     goto bad;
3899                 }
3900                 memcpy(section_req->iv + ivsize, crypt_data, ivsize);
3901                 section_req->cryptlen = (size_t)ic->journal_section_sectors << SECTOR_SHIFT;
3902                 ic->sk_requests[i] = section_req;
3903                 DEBUG_bytes(crypt_data, ivsize, "iv(%u)", i);
3904             }
3905         }
3906     }
3907 
3908     for (i = 0; i < N_COMMIT_IDS; i++) {
3909         unsigned j;
3910 retest_commit_id:
3911         for (j = 0; j < i; j++) {
3912             if (ic->commit_ids[j] == ic->commit_ids[i]) {
3913                 ic->commit_ids[i] = cpu_to_le64(le64_to_cpu(ic->commit_ids[i]) + 1);
3914                 goto retest_commit_id;
3915             }
3916         }
3917         DEBUG_print("commit id %u: %016llx\n", i, ic->commit_ids[i]);
3918     }
3919 
3920     journal_tree_size = (__u64)ic->journal_entries * sizeof(struct journal_node);
3921     if (journal_tree_size > ULONG_MAX) {
3922         *error = "Journal doesn't fit into memory";
3923         r = -ENOMEM;
3924         goto bad;
3925     }
3926     ic->journal_tree = kvmalloc(journal_tree_size, GFP_KERNEL);
3927     if (!ic->journal_tree) {
3928         *error = "Could not allocate memory for journal tree";
3929         r = -ENOMEM;
3930     }
3931 bad:
3932     kfree(crypt_data);
3933     kfree(crypt_iv);
3934     skcipher_request_free(req);
3935 
3936     return r;
3937 }
3938 
3939 /*
3940  * Construct a integrity mapping
3941  *
3942  * Arguments:
3943  *  device
3944  *  offset from the start of the device
3945  *  tag size
3946  *  D - direct writes, J - journal writes, B - bitmap mode, R - recovery mode
3947  *  number of optional arguments
3948  *  optional arguments:
3949  *      journal_sectors
3950  *      interleave_sectors
3951  *      buffer_sectors
3952  *      journal_watermark
3953  *      commit_time
3954  *      meta_device
3955  *      block_size
3956  *      sectors_per_bit
3957  *      bitmap_flush_interval
3958  *      internal_hash
3959  *      journal_crypt
3960  *      journal_mac
3961  *      recalculate
3962  */
3963 static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
3964 {
3965     struct dm_integrity_c *ic;
3966     char dummy;
3967     int r;
3968     unsigned extra_args;
3969     struct dm_arg_set as;
3970     static const struct dm_arg _args[] = {
3971         {0, 18, "Invalid number of feature args"},
3972     };
3973     unsigned journal_sectors, interleave_sectors, buffer_sectors, journal_watermark, sync_msec;
3974     bool should_write_sb;
3975     __u64 threshold;
3976     unsigned long long start;
3977     __s8 log2_sectors_per_bitmap_bit = -1;
3978     __s8 log2_blocks_per_bitmap_bit;
3979     __u64 bits_in_journal;
3980     __u64 n_bitmap_bits;
3981 
3982 #define DIRECT_ARGUMENTS    4
3983 
3984     if (argc <= DIRECT_ARGUMENTS) {
3985         ti->error = "Invalid argument count";
3986         return -EINVAL;
3987     }
3988 
3989     ic = kzalloc(sizeof(struct dm_integrity_c), GFP_KERNEL);
3990     if (!ic) {
3991         ti->error = "Cannot allocate integrity context";
3992         return -ENOMEM;
3993     }
3994     ti->private = ic;
3995     ti->per_io_data_size = sizeof(struct dm_integrity_io);
3996     ic->ti = ti;
3997 
3998     ic->in_progress = RB_ROOT;
3999     INIT_LIST_HEAD(&ic->wait_list);
4000     init_waitqueue_head(&ic->endio_wait);
4001     bio_list_init(&ic->flush_bio_list);
4002     init_waitqueue_head(&ic->copy_to_journal_wait);
4003     init_completion(&ic->crypto_backoff);
4004     atomic64_set(&ic->number_of_mismatches, 0);
4005     ic->bitmap_flush_interval = BITMAP_FLUSH_INTERVAL;
4006 
4007     r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &ic->dev);
4008     if (r) {
4009         ti->error = "Device lookup failed";
4010         goto bad;
4011     }
4012 
4013     if (sscanf(argv[1], "%llu%c", &start, &dummy) != 1 || start != (sector_t)start) {
4014         ti->error = "Invalid starting offset";
4015         r = -EINVAL;
4016         goto bad;
4017     }
4018     ic->start = start;
4019 
4020     if (strcmp(argv[2], "-")) {
4021         if (sscanf(argv[2], "%u%c", &ic->tag_size, &dummy) != 1 || !ic->tag_size) {
4022             ti->error = "Invalid tag size";
4023             r = -EINVAL;
4024             goto bad;
4025         }
4026     }
4027 
4028     if (!strcmp(argv[3], "J") || !strcmp(argv[3], "B") ||
4029         !strcmp(argv[3], "D") || !strcmp(argv[3], "R")) {
4030         ic->mode = argv[3][0];
4031     } else {
4032         ti->error = "Invalid mode (expecting J, B, D, R)";
4033         r = -EINVAL;
4034         goto bad;
4035     }
4036 
4037     journal_sectors = 0;
4038     interleave_sectors = DEFAULT_INTERLEAVE_SECTORS;
4039     buffer_sectors = DEFAULT_BUFFER_SECTORS;
4040     journal_watermark = DEFAULT_JOURNAL_WATERMARK;
4041     sync_msec = DEFAULT_SYNC_MSEC;
4042     ic->sectors_per_block = 1;
4043 
4044     as.argc = argc - DIRECT_ARGUMENTS;
4045     as.argv = argv + DIRECT_ARGUMENTS;
4046     r = dm_read_arg_group(_args, &as, &extra_args, &ti->error);
4047     if (r)
4048         goto bad;
4049 
4050     while (extra_args--) {
4051         const char *opt_string;
4052         unsigned val;
4053         unsigned long long llval;
4054         opt_string = dm_shift_arg(&as);
4055         if (!opt_string) {
4056             r = -EINVAL;
4057             ti->error = "Not enough feature arguments";
4058             goto bad;
4059         }
4060         if (sscanf(opt_string, "journal_sectors:%u%c", &val, &dummy) == 1)
4061             journal_sectors = val ? val : 1;
4062         else if (sscanf(opt_string, "interleave_sectors:%u%c", &val, &dummy) == 1)
4063             interleave_sectors = val;
4064         else if (sscanf(opt_string, "buffer_sectors:%u%c", &val, &dummy) == 1)
4065             buffer_sectors = val;
4066         else if (sscanf(opt_string, "journal_watermark:%u%c", &val, &dummy) == 1 && val <= 100)
4067             journal_watermark = val;
4068         else if (sscanf(opt_string, "commit_time:%u%c", &val, &dummy) == 1)
4069             sync_msec = val;
4070         else if (!strncmp(opt_string, "meta_device:", strlen("meta_device:"))) {
4071             if (ic->meta_dev) {
4072                 dm_put_device(ti, ic->meta_dev);
4073                 ic->meta_dev = NULL;
4074             }
4075             r = dm_get_device(ti, strchr(opt_string, ':') + 1,
4076                       dm_table_get_mode(ti->table), &ic->meta_dev);
4077             if (r) {
4078                 ti->error = "Device lookup failed";
4079                 goto bad;
4080             }
4081         } else if (sscanf(opt_string, "block_size:%u%c", &val, &dummy) == 1) {
4082             if (val < 1 << SECTOR_SHIFT ||
4083                 val > MAX_SECTORS_PER_BLOCK << SECTOR_SHIFT ||
4084                 (val & (val -1))) {
4085                 r = -EINVAL;
4086                 ti->error = "Invalid block_size argument";
4087                 goto bad;
4088             }
4089             ic->sectors_per_block = val >> SECTOR_SHIFT;
4090         } else if (sscanf(opt_string, "sectors_per_bit:%llu%c", &llval, &dummy) == 1) {
4091             log2_sectors_per_bitmap_bit = !llval ? 0 : __ilog2_u64(llval);
4092         } else if (sscanf(opt_string, "bitmap_flush_interval:%u%c", &val, &dummy) == 1) {
4093             if (val >= (uint64_t)UINT_MAX * 1000 / HZ) {
4094                 r = -EINVAL;
4095                 ti->error = "Invalid bitmap_flush_interval argument";
4096                 goto bad;
4097             }
4098             ic->bitmap_flush_interval = msecs_to_jiffies(val);
4099         } else if (!strncmp(opt_string, "internal_hash:", strlen("internal_hash:"))) {
4100             r = get_alg_and_key(opt_string, &ic->internal_hash_alg, &ti->error,
4101                         "Invalid internal_hash argument");
4102             if (r)
4103                 goto bad;
4104         } else if (!strncmp(opt_string, "journal_crypt:", strlen("journal_crypt:"))) {
4105             r = get_alg_and_key(opt_string, &ic->journal_crypt_alg, &ti->error,
4106                         "Invalid journal_crypt argument");
4107             if (r)
4108                 goto bad;
4109         } else if (!strncmp(opt_string, "journal_mac:", strlen("journal_mac:"))) {
4110             r = get_alg_and_key(opt_string, &ic->journal_mac_alg, &ti->error,
4111                         "Invalid journal_mac argument");
4112             if (r)
4113                 goto bad;
4114         } else if (!strcmp(opt_string, "recalculate")) {
4115             ic->recalculate_flag = true;
4116         } else if (!strcmp(opt_string, "reset_recalculate")) {
4117             ic->recalculate_flag = true;
4118             ic->reset_recalculate_flag = true;
4119         } else if (!strcmp(opt_string, "allow_discards")) {
4120             ic->discard = true;
4121         } else if (!strcmp(opt_string, "fix_padding")) {
4122             ic->fix_padding = true;
4123         } else if (!strcmp(opt_string, "fix_hmac")) {
4124             ic->fix_hmac = true;
4125         } else if (!strcmp(opt_string, "legacy_recalculate")) {
4126             ic->legacy_recalculate = true;
4127         } else {
4128             r = -EINVAL;
4129             ti->error = "Invalid argument";
4130             goto bad;
4131         }
4132     }
4133 
4134     ic->data_device_sectors = bdev_nr_sectors(ic->dev->bdev);
4135     if (!ic->meta_dev)
4136         ic->meta_device_sectors = ic->data_device_sectors;
4137     else
4138         ic->meta_device_sectors = bdev_nr_sectors(ic->meta_dev->bdev);
4139 
4140     if (!journal_sectors) {
4141         journal_sectors = min((sector_t)DEFAULT_MAX_JOURNAL_SECTORS,
4142                       ic->data_device_sectors >> DEFAULT_JOURNAL_SIZE_FACTOR);
4143     }
4144 
4145     if (!buffer_sectors)
4146         buffer_sectors = 1;
4147     ic->log2_buffer_sectors = min((int)__fls(buffer_sectors), 31 - SECTOR_SHIFT);
4148 
4149     r = get_mac(&ic->internal_hash, &ic->internal_hash_alg, &ti->error,
4150             "Invalid internal hash", "Error setting internal hash key");
4151     if (r)
4152         goto bad;
4153 
4154     r = get_mac(&ic->journal_mac, &ic->journal_mac_alg, &ti->error,
4155             "Invalid journal mac", "Error setting journal mac key");
4156     if (r)
4157         goto bad;
4158 
4159     if (!ic->tag_size) {
4160         if (!ic->internal_hash) {
4161             ti->error = "Unknown tag size";
4162             r = -EINVAL;
4163             goto bad;
4164         }
4165         ic->tag_size = crypto_shash_digestsize(ic->internal_hash);
4166     }
4167     if (ic->tag_size > MAX_TAG_SIZE) {
4168         ti->error = "Too big tag size";
4169         r = -EINVAL;
4170         goto bad;
4171     }
4172     if (!(ic->tag_size & (ic->tag_size - 1)))
4173         ic->log2_tag_size = __ffs(ic->tag_size);
4174     else
4175         ic->log2_tag_size = -1;
4176 
4177     if (ic->mode == 'B' && !ic->internal_hash) {
4178         r = -EINVAL;
4179         ti->error = "Bitmap mode can be only used with internal hash";
4180         goto bad;
4181     }
4182 
4183     if (ic->discard && !ic->internal_hash) {
4184         r = -EINVAL;
4185         ti->error = "Discard can be only used with internal hash";
4186         goto bad;
4187     }
4188 
4189     ic->autocommit_jiffies = msecs_to_jiffies(sync_msec);
4190     ic->autocommit_msec = sync_msec;
4191     timer_setup(&ic->autocommit_timer, autocommit_fn, 0);
4192 
4193     ic->io = dm_io_client_create();
4194     if (IS_ERR(ic->io)) {
4195         r = PTR_ERR(ic->io);
4196         ic->io = NULL;
4197         ti->error = "Cannot allocate dm io";
4198         goto bad;
4199     }
4200 
4201     r = mempool_init_slab_pool(&ic->journal_io_mempool, JOURNAL_IO_MEMPOOL, journal_io_cache);
4202     if (r) {
4203         ti->error = "Cannot allocate mempool";
4204         goto bad;
4205     }
4206 
4207     ic->metadata_wq = alloc_workqueue("dm-integrity-metadata",
4208                       WQ_MEM_RECLAIM, METADATA_WORKQUEUE_MAX_ACTIVE);
4209     if (!ic->metadata_wq) {
4210         ti->error = "Cannot allocate workqueue";
4211         r = -ENOMEM;
4212         goto bad;
4213     }
4214 
4215     /*
4216      * If this workqueue were percpu, it would cause bio reordering
4217      * and reduced performance.
4218      */
4219     ic->wait_wq = alloc_workqueue("dm-integrity-wait", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
4220     if (!ic->wait_wq) {
4221         ti->error = "Cannot allocate workqueue";
4222         r = -ENOMEM;
4223         goto bad;
4224     }
4225 
4226     ic->offload_wq = alloc_workqueue("dm-integrity-offload", WQ_MEM_RECLAIM,
4227                       METADATA_WORKQUEUE_MAX_ACTIVE);
4228     if (!ic->offload_wq) {
4229         ti->error = "Cannot allocate workqueue";
4230         r = -ENOMEM;
4231         goto bad;
4232     }
4233 
4234     ic->commit_wq = alloc_workqueue("dm-integrity-commit", WQ_MEM_RECLAIM, 1);
4235     if (!ic->commit_wq) {
4236         ti->error = "Cannot allocate workqueue";
4237         r = -ENOMEM;
4238         goto bad;
4239     }
4240     INIT_WORK(&ic->commit_work, integrity_commit);
4241 
4242     if (ic->mode == 'J' || ic->mode == 'B') {
4243         ic->writer_wq = alloc_workqueue("dm-integrity-writer", WQ_MEM_RECLAIM, 1);
4244         if (!ic->writer_wq) {
4245             ti->error = "Cannot allocate workqueue";
4246             r = -ENOMEM;
4247             goto bad;
4248         }
4249         INIT_WORK(&ic->writer_work, integrity_writer);
4250     }
4251 
4252     ic->sb = alloc_pages_exact(SB_SECTORS << SECTOR_SHIFT, GFP_KERNEL);
4253     if (!ic->sb) {
4254         r = -ENOMEM;
4255         ti->error = "Cannot allocate superblock area";
4256         goto bad;
4257     }
4258 
4259     r = sync_rw_sb(ic, REQ_OP_READ);
4260     if (r) {
4261         ti->error = "Error reading superblock";
4262         goto bad;
4263     }
4264     should_write_sb = false;
4265     if (memcmp(ic->sb->magic, SB_MAGIC, 8)) {
4266         if (ic->mode != 'R') {
4267             if (memchr_inv(ic->sb, 0, SB_SECTORS << SECTOR_SHIFT)) {
4268                 r = -EINVAL;
4269                 ti->error = "The device is not initialized";
4270                 goto bad;
4271             }
4272         }
4273 
4274         r = initialize_superblock(ic, journal_sectors, interleave_sectors);
4275         if (r) {
4276             ti->error = "Could not initialize superblock";
4277             goto bad;
4278         }
4279         if (ic->mode != 'R')
4280             should_write_sb = true;
4281     }
4282 
4283     if (!ic->sb->version || ic->sb->version > SB_VERSION_5) {
4284         r = -EINVAL;
4285         ti->error = "Unknown version";
4286         goto bad;
4287     }
4288     if (le16_to_cpu(ic->sb->integrity_tag_size) != ic->tag_size) {
4289         r = -EINVAL;
4290         ti->error = "Tag size doesn't match the information in superblock";
4291         goto bad;
4292     }
4293     if (ic->sb->log2_sectors_per_block != __ffs(ic->sectors_per_block)) {
4294         r = -EINVAL;
4295         ti->error = "Block size doesn't match the information in superblock";
4296         goto bad;
4297     }
4298     if (!le32_to_cpu(ic->sb->journal_sections)) {
4299         r = -EINVAL;
4300         ti->error = "Corrupted superblock, journal_sections is 0";
4301         goto bad;
4302     }
4303     /* make sure that ti->max_io_len doesn't overflow */
4304     if (!ic->meta_dev) {
4305         if (ic->sb->log2_interleave_sectors < MIN_LOG2_INTERLEAVE_SECTORS ||
4306             ic->sb->log2_interleave_sectors > MAX_LOG2_INTERLEAVE_SECTORS) {
4307             r = -EINVAL;
4308             ti->error = "Invalid interleave_sectors in the superblock";
4309             goto bad;
4310         }
4311     } else {
4312         if (ic->sb->log2_interleave_sectors) {
4313             r = -EINVAL;
4314             ti->error = "Invalid interleave_sectors in the superblock";
4315             goto bad;
4316         }
4317     }
4318     if (!!(ic->sb->flags & cpu_to_le32(SB_FLAG_HAVE_JOURNAL_MAC)) != !!ic->journal_mac_alg.alg_string) {
4319         r = -EINVAL;
4320         ti->error = "Journal mac mismatch";
4321         goto bad;
4322     }
4323 
4324     get_provided_data_sectors(ic);
4325     if (!ic->provided_data_sectors) {
4326         r = -EINVAL;
4327         ti->error = "The device is too small";
4328         goto bad;
4329     }
4330 
4331 try_smaller_buffer:
4332     r = calculate_device_limits(ic);
4333     if (r) {
4334         if (ic->meta_dev) {
4335             if (ic->log2_buffer_sectors > 3) {
4336                 ic->log2_buffer_sectors--;
4337                 goto try_smaller_buffer;
4338             }
4339         }
4340         ti->error = "The device is too small";
4341         goto bad;
4342     }
4343 
4344     if (log2_sectors_per_bitmap_bit < 0)
4345         log2_sectors_per_bitmap_bit = __fls(DEFAULT_SECTORS_PER_BITMAP_BIT);
4346     if (log2_sectors_per_bitmap_bit < ic->sb->log2_sectors_per_block)
4347         log2_sectors_per_bitmap_bit = ic->sb->log2_sectors_per_block;
4348 
4349     bits_in_journal = ((__u64)ic->journal_section_sectors * ic->journal_sections) << (SECTOR_SHIFT + 3);
4350     if (bits_in_journal > UINT_MAX)
4351         bits_in_journal = UINT_MAX;
4352     while (bits_in_journal < (ic->provided_data_sectors + ((sector_t)1 << log2_sectors_per_bitmap_bit) - 1) >> log2_sectors_per_bitmap_bit)
4353         log2_sectors_per_bitmap_bit++;
4354 
4355     log2_blocks_per_bitmap_bit = log2_sectors_per_bitmap_bit - ic->sb->log2_sectors_per_block;
4356     ic->log2_blocks_per_bitmap_bit = log2_blocks_per_bitmap_bit;
4357     if (should_write_sb) {
4358         ic->sb->log2_blocks_per_bitmap_bit = log2_blocks_per_bitmap_bit;
4359     }
4360     n_bitmap_bits = ((ic->provided_data_sectors >> ic->sb->log2_sectors_per_block)
4361                 + (((sector_t)1 << log2_blocks_per_bitmap_bit) - 1)) >> log2_blocks_per_bitmap_bit;
4362     ic->n_bitmap_blocks = DIV_ROUND_UP(n_bitmap_bits, BITMAP_BLOCK_SIZE * 8);
4363 
4364     if (!ic->meta_dev)
4365         ic->log2_buffer_sectors = min(ic->log2_buffer_sectors, (__u8)__ffs(ic->metadata_run));
4366 
4367     if (ti->len > ic->provided_data_sectors) {
4368         r = -EINVAL;
4369         ti->error = "Not enough provided sectors for requested mapping size";
4370         goto bad;
4371     }
4372 
4373 
4374     threshold = (__u64)ic->journal_entries * (100 - journal_watermark);
4375     threshold += 50;
4376     do_div(threshold, 100);
4377     ic->free_sectors_threshold = threshold;
4378 
4379     DEBUG_print("initialized:\n");
4380     DEBUG_print("   integrity_tag_size %u\n", le16_to_cpu(ic->sb->integrity_tag_size));
4381     DEBUG_print("   journal_entry_size %u\n", ic->journal_entry_size);
4382     DEBUG_print("   journal_entries_per_sector %u\n", ic->journal_entries_per_sector);
4383     DEBUG_print("   journal_section_entries %u\n", ic->journal_section_entries);
4384     DEBUG_print("   journal_section_sectors %u\n", ic->journal_section_sectors);
4385     DEBUG_print("   journal_sections %u\n", (unsigned)le32_to_cpu(ic->sb->journal_sections));
4386     DEBUG_print("   journal_entries %u\n", ic->journal_entries);
4387     DEBUG_print("   log2_interleave_sectors %d\n", ic->sb->log2_interleave_sectors);
4388     DEBUG_print("   data_device_sectors 0x%llx\n", bdev_nr_sectors(ic->dev->bdev));
4389     DEBUG_print("   initial_sectors 0x%x\n", ic->initial_sectors);
4390     DEBUG_print("   metadata_run 0x%x\n", ic->metadata_run);
4391     DEBUG_print("   log2_metadata_run %d\n", ic->log2_metadata_run);
4392     DEBUG_print("   provided_data_sectors 0x%llx (%llu)\n", ic->provided_data_sectors, ic->provided_data_sectors);
4393     DEBUG_print("   log2_buffer_sectors %u\n", ic->log2_buffer_sectors);
4394     DEBUG_print("   bits_in_journal %llu\n", bits_in_journal);
4395 
4396     if (ic->recalculate_flag && !(ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))) {
4397         ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING);
4398         ic->sb->recalc_sector = cpu_to_le64(0);
4399     }
4400 
4401     if (ic->internal_hash) {
4402         size_t recalc_tags_size;
4403         ic->recalc_wq = alloc_workqueue("dm-integrity-recalc", WQ_MEM_RECLAIM, 1);
4404         if (!ic->recalc_wq ) {
4405             ti->error = "Cannot allocate workqueue";
4406             r = -ENOMEM;
4407             goto bad;
4408         }
4409         INIT_WORK(&ic->recalc_work, integrity_recalc);
4410         ic->recalc_buffer = vmalloc(RECALC_SECTORS << SECTOR_SHIFT);
4411         if (!ic->recalc_buffer) {
4412             ti->error = "Cannot allocate buffer for recalculating";
4413             r = -ENOMEM;
4414             goto bad;
4415         }
4416         recalc_tags_size = (RECALC_SECTORS >> ic->sb->log2_sectors_per_block) * ic->tag_size;
4417         if (crypto_shash_digestsize(ic->internal_hash) > ic->tag_size)
4418             recalc_tags_size += crypto_shash_digestsize(ic->internal_hash) - ic->tag_size;
4419         ic->recalc_tags = kvmalloc(recalc_tags_size, GFP_KERNEL);
4420         if (!ic->recalc_tags) {
4421             ti->error = "Cannot allocate tags for recalculating";
4422             r = -ENOMEM;
4423             goto bad;
4424         }
4425     } else {
4426         if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) {
4427             ti->error = "Recalculate can only be specified with internal_hash";
4428             r = -EINVAL;
4429             goto bad;
4430         }
4431     }
4432 
4433     if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) &&
4434         le64_to_cpu(ic->sb->recalc_sector) < ic->provided_data_sectors &&
4435         dm_integrity_disable_recalculate(ic)) {
4436         ti->error = "Recalculating with HMAC is disabled for security reasons - if you really need it, use the argument \"legacy_recalculate\"";
4437         r = -EOPNOTSUPP;
4438         goto bad;
4439     }
4440 
4441     ic->bufio = dm_bufio_client_create(ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev,
4442             1U << (SECTOR_SHIFT + ic->log2_buffer_sectors), 1, 0, NULL, NULL, 0);
4443     if (IS_ERR(ic->bufio)) {
4444         r = PTR_ERR(ic->bufio);
4445         ti->error = "Cannot initialize dm-bufio";
4446         ic->bufio = NULL;
4447         goto bad;
4448     }
4449     dm_bufio_set_sector_offset(ic->bufio, ic->start + ic->initial_sectors);
4450 
4451     if (ic->mode != 'R') {
4452         r = create_journal(ic, &ti->error);
4453         if (r)
4454             goto bad;
4455 
4456     }
4457 
4458     if (ic->mode == 'B') {
4459         unsigned i;
4460         unsigned n_bitmap_pages = DIV_ROUND_UP(ic->n_bitmap_blocks, PAGE_SIZE / BITMAP_BLOCK_SIZE);
4461 
4462         ic->recalc_bitmap = dm_integrity_alloc_page_list(n_bitmap_pages);
4463         if (!ic->recalc_bitmap) {
4464             r = -ENOMEM;
4465             goto bad;
4466         }
4467         ic->may_write_bitmap = dm_integrity_alloc_page_list(n_bitmap_pages);
4468         if (!ic->may_write_bitmap) {
4469             r = -ENOMEM;
4470             goto bad;
4471         }
4472         ic->bbs = kvmalloc_array(ic->n_bitmap_blocks, sizeof(struct bitmap_block_status), GFP_KERNEL);
4473         if (!ic->bbs) {
4474             r = -ENOMEM;
4475             goto bad;
4476         }
4477         INIT_DELAYED_WORK(&ic->bitmap_flush_work, bitmap_flush_work);
4478         for (i = 0; i < ic->n_bitmap_blocks; i++) {
4479             struct bitmap_block_status *bbs = &ic->bbs[i];
4480             unsigned sector, pl_index, pl_offset;
4481 
4482             INIT_WORK(&bbs->work, bitmap_block_work);
4483             bbs->ic = ic;
4484             bbs->idx = i;
4485             bio_list_init(&bbs->bio_queue);
4486             spin_lock_init(&bbs->bio_queue_lock);
4487 
4488             sector = i * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT);
4489             pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT);
4490             pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1);
4491 
4492             bbs->bitmap = lowmem_page_address(ic->journal[pl_index].page) + pl_offset;
4493         }
4494     }
4495 
4496     if (should_write_sb) {
4497         init_journal(ic, 0, ic->journal_sections, 0);
4498         r = dm_integrity_failed(ic);
4499         if (unlikely(r)) {
4500             ti->error = "Error initializing journal";
4501             goto bad;
4502         }
4503         r = sync_rw_sb(ic, REQ_OP_WRITE | REQ_FUA);
4504         if (r) {
4505             ti->error = "Error initializing superblock";
4506             goto bad;
4507         }
4508         ic->just_formatted = true;
4509     }
4510 
4511     if (!ic->meta_dev) {
4512         r = dm_set_target_max_io_len(ti, 1U << ic->sb->log2_interleave_sectors);
4513         if (r)
4514             goto bad;
4515     }
4516     if (ic->mode == 'B') {
4517         unsigned max_io_len = ((sector_t)ic->sectors_per_block << ic->log2_blocks_per_bitmap_bit) * (BITMAP_BLOCK_SIZE * 8);
4518         if (!max_io_len)
4519             max_io_len = 1U << 31;
4520         DEBUG_print("max_io_len: old %u, new %u\n", ti->max_io_len, max_io_len);
4521         if (!ti->max_io_len || ti->max_io_len > max_io_len) {
4522             r = dm_set_target_max_io_len(ti, max_io_len);
4523             if (r)
4524                 goto bad;
4525         }
4526     }
4527 
4528     if (!ic->internal_hash)
4529         dm_integrity_set(ti, ic);
4530 
4531     ti->num_flush_bios = 1;
4532     ti->flush_supported = true;
4533     if (ic->discard)
4534         ti->num_discard_bios = 1;
4535 
4536     dm_audit_log_ctr(DM_MSG_PREFIX, ti, 1);
4537     return 0;
4538 
4539 bad:
4540     dm_audit_log_ctr(DM_MSG_PREFIX, ti, 0);
4541     dm_integrity_dtr(ti);
4542     return r;
4543 }
4544 
4545 static void dm_integrity_dtr(struct dm_target *ti)
4546 {
4547     struct dm_integrity_c *ic = ti->private;
4548 
4549     BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress));
4550     BUG_ON(!list_empty(&ic->wait_list));
4551 
4552     if (ic->metadata_wq)
4553         destroy_workqueue(ic->metadata_wq);
4554     if (ic->wait_wq)
4555         destroy_workqueue(ic->wait_wq);
4556     if (ic->offload_wq)
4557         destroy_workqueue(ic->offload_wq);
4558     if (ic->commit_wq)
4559         destroy_workqueue(ic->commit_wq);
4560     if (ic->writer_wq)
4561         destroy_workqueue(ic->writer_wq);
4562     if (ic->recalc_wq)
4563         destroy_workqueue(ic->recalc_wq);
4564     vfree(ic->recalc_buffer);
4565     kvfree(ic->recalc_tags);
4566     kvfree(ic->bbs);
4567     if (ic->bufio)
4568         dm_bufio_client_destroy(ic->bufio);
4569     mempool_exit(&ic->journal_io_mempool);
4570     if (ic->io)
4571         dm_io_client_destroy(ic->io);
4572     if (ic->dev)
4573         dm_put_device(ti, ic->dev);
4574     if (ic->meta_dev)
4575         dm_put_device(ti, ic->meta_dev);
4576     dm_integrity_free_page_list(ic->journal);
4577     dm_integrity_free_page_list(ic->journal_io);
4578     dm_integrity_free_page_list(ic->journal_xor);
4579     dm_integrity_free_page_list(ic->recalc_bitmap);
4580     dm_integrity_free_page_list(ic->may_write_bitmap);
4581     if (ic->journal_scatterlist)
4582         dm_integrity_free_journal_scatterlist(ic, ic->journal_scatterlist);
4583     if (ic->journal_io_scatterlist)
4584         dm_integrity_free_journal_scatterlist(ic, ic->journal_io_scatterlist);
4585     if (ic->sk_requests) {
4586         unsigned i;
4587 
4588         for (i = 0; i < ic->journal_sections; i++) {
4589             struct skcipher_request *req = ic->sk_requests[i];
4590             if (req) {
4591                 kfree_sensitive(req->iv);
4592                 skcipher_request_free(req);
4593             }
4594         }
4595         kvfree(ic->sk_requests);
4596     }
4597     kvfree(ic->journal_tree);
4598     if (ic->sb)
4599         free_pages_exact(ic->sb, SB_SECTORS << SECTOR_SHIFT);
4600 
4601     if (ic->internal_hash)
4602         crypto_free_shash(ic->internal_hash);
4603     free_alg(&ic->internal_hash_alg);
4604 
4605     if (ic->journal_crypt)
4606         crypto_free_skcipher(ic->journal_crypt);
4607     free_alg(&ic->journal_crypt_alg);
4608 
4609     if (ic->journal_mac)
4610         crypto_free_shash(ic->journal_mac);
4611     free_alg(&ic->journal_mac_alg);
4612 
4613     kfree(ic);
4614     dm_audit_log_dtr(DM_MSG_PREFIX, ti, 1);
4615 }
4616 
4617 static struct target_type integrity_target = {
4618     .name           = "integrity",
4619     .version        = {1, 10, 0},
4620     .module         = THIS_MODULE,
4621     .features       = DM_TARGET_SINGLETON | DM_TARGET_INTEGRITY,
4622     .ctr            = dm_integrity_ctr,
4623     .dtr            = dm_integrity_dtr,
4624     .map            = dm_integrity_map,
4625     .postsuspend        = dm_integrity_postsuspend,
4626     .resume         = dm_integrity_resume,
4627     .status         = dm_integrity_status,
4628     .iterate_devices    = dm_integrity_iterate_devices,
4629     .io_hints       = dm_integrity_io_hints,
4630 };
4631 
4632 static int __init dm_integrity_init(void)
4633 {
4634     int r;
4635 
4636     journal_io_cache = kmem_cache_create("integrity_journal_io",
4637                          sizeof(struct journal_io), 0, 0, NULL);
4638     if (!journal_io_cache) {
4639         DMERR("can't allocate journal io cache");
4640         return -ENOMEM;
4641     }
4642 
4643     r = dm_register_target(&integrity_target);
4644 
4645     if (r < 0)
4646         DMERR("register failed %d", r);
4647 
4648     return r;
4649 }
4650 
4651 static void __exit dm_integrity_exit(void)
4652 {
4653     dm_unregister_target(&integrity_target);
4654     kmem_cache_destroy(journal_io_cache);
4655 }
4656 
4657 module_init(dm_integrity_init);
4658 module_exit(dm_integrity_exit);
4659 
4660 MODULE_AUTHOR("Milan Broz");
4661 MODULE_AUTHOR("Mikulas Patocka");
4662 MODULE_DESCRIPTION(DM_NAME " target for integrity tags extension");
4663 MODULE_LICENSE("GPL");