Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
0003  *
0004  * This file is released under the GPL.
0005  */
0006 
0007 #include <linux/blkdev.h>
0008 #include <linux/device-mapper.h>
0009 #include <linux/delay.h>
0010 #include <linux/fs.h>
0011 #include <linux/init.h>
0012 #include <linux/kdev_t.h>
0013 #include <linux/list.h>
0014 #include <linux/list_bl.h>
0015 #include <linux/mempool.h>
0016 #include <linux/module.h>
0017 #include <linux/slab.h>
0018 #include <linux/vmalloc.h>
0019 #include <linux/log2.h>
0020 #include <linux/dm-kcopyd.h>
0021 
0022 #include "dm.h"
0023 
0024 #include "dm-exception-store.h"
0025 
0026 #define DM_MSG_PREFIX "snapshots"
0027 
0028 static const char dm_snapshot_merge_target_name[] = "snapshot-merge";
0029 
0030 #define dm_target_is_snapshot_merge(ti) \
0031     ((ti)->type->name == dm_snapshot_merge_target_name)
0032 
0033 /*
0034  * The size of the mempool used to track chunks in use.
0035  */
0036 #define MIN_IOS 256
0037 
0038 #define DM_TRACKED_CHUNK_HASH_SIZE  16
0039 #define DM_TRACKED_CHUNK_HASH(x)    ((unsigned long)(x) & \
0040                      (DM_TRACKED_CHUNK_HASH_SIZE - 1))
0041 
0042 struct dm_exception_table {
0043     uint32_t hash_mask;
0044     unsigned hash_shift;
0045     struct hlist_bl_head *table;
0046 };
0047 
0048 struct dm_snapshot {
0049     struct rw_semaphore lock;
0050 
0051     struct dm_dev *origin;
0052     struct dm_dev *cow;
0053 
0054     struct dm_target *ti;
0055 
0056     /* List of snapshots per Origin */
0057     struct list_head list;
0058 
0059     /*
0060      * You can't use a snapshot if this is 0 (e.g. if full).
0061      * A snapshot-merge target never clears this.
0062      */
0063     int valid;
0064 
0065     /*
0066      * The snapshot overflowed because of a write to the snapshot device.
0067      * We don't have to invalidate the snapshot in this case, but we need
0068      * to prevent further writes.
0069      */
0070     int snapshot_overflowed;
0071 
0072     /* Origin writes don't trigger exceptions until this is set */
0073     int active;
0074 
0075     atomic_t pending_exceptions_count;
0076 
0077     spinlock_t pe_allocation_lock;
0078 
0079     /* Protected by "pe_allocation_lock" */
0080     sector_t exception_start_sequence;
0081 
0082     /* Protected by kcopyd single-threaded callback */
0083     sector_t exception_complete_sequence;
0084 
0085     /*
0086      * A list of pending exceptions that completed out of order.
0087      * Protected by kcopyd single-threaded callback.
0088      */
0089     struct rb_root out_of_order_tree;
0090 
0091     mempool_t pending_pool;
0092 
0093     struct dm_exception_table pending;
0094     struct dm_exception_table complete;
0095 
0096     /*
0097      * pe_lock protects all pending_exception operations and access
0098      * as well as the snapshot_bios list.
0099      */
0100     spinlock_t pe_lock;
0101 
0102     /* Chunks with outstanding reads */
0103     spinlock_t tracked_chunk_lock;
0104     struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE];
0105 
0106     /* The on disk metadata handler */
0107     struct dm_exception_store *store;
0108 
0109     unsigned in_progress;
0110     struct wait_queue_head in_progress_wait;
0111 
0112     struct dm_kcopyd_client *kcopyd_client;
0113 
0114     /* Wait for events based on state_bits */
0115     unsigned long state_bits;
0116 
0117     /* Range of chunks currently being merged. */
0118     chunk_t first_merging_chunk;
0119     int num_merging_chunks;
0120 
0121     /*
0122      * The merge operation failed if this flag is set.
0123      * Failure modes are handled as follows:
0124      * - I/O error reading the header
0125      *      => don't load the target; abort.
0126      * - Header does not have "valid" flag set
0127      *      => use the origin; forget about the snapshot.
0128      * - I/O error when reading exceptions
0129      *      => don't load the target; abort.
0130      *         (We can't use the intermediate origin state.)
0131      * - I/O error while merging
0132      *  => stop merging; set merge_failed; process I/O normally.
0133      */
0134     bool merge_failed:1;
0135 
0136     bool discard_zeroes_cow:1;
0137     bool discard_passdown_origin:1;
0138 
0139     /*
0140      * Incoming bios that overlap with chunks being merged must wait
0141      * for them to be committed.
0142      */
0143     struct bio_list bios_queued_during_merge;
0144 };
0145 
0146 /*
0147  * state_bits:
0148  *   RUNNING_MERGE  - Merge operation is in progress.
0149  *   SHUTDOWN_MERGE - Set to signal that merge needs to be stopped;
0150  *                    cleared afterwards.
0151  */
0152 #define RUNNING_MERGE          0
0153 #define SHUTDOWN_MERGE         1
0154 
0155 /*
0156  * Maximum number of chunks being copied on write.
0157  *
0158  * The value was decided experimentally as a trade-off between memory
0159  * consumption, stalling the kernel's workqueues and maintaining a high enough
0160  * throughput.
0161  */
0162 #define DEFAULT_COW_THRESHOLD 2048
0163 
0164 static unsigned cow_threshold = DEFAULT_COW_THRESHOLD;
0165 module_param_named(snapshot_cow_threshold, cow_threshold, uint, 0644);
0166 MODULE_PARM_DESC(snapshot_cow_threshold, "Maximum number of chunks being copied on write");
0167 
0168 DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle,
0169         "A percentage of time allocated for copy on write");
0170 
0171 struct dm_dev *dm_snap_origin(struct dm_snapshot *s)
0172 {
0173     return s->origin;
0174 }
0175 EXPORT_SYMBOL(dm_snap_origin);
0176 
0177 struct dm_dev *dm_snap_cow(struct dm_snapshot *s)
0178 {
0179     return s->cow;
0180 }
0181 EXPORT_SYMBOL(dm_snap_cow);
0182 
0183 static sector_t chunk_to_sector(struct dm_exception_store *store,
0184                 chunk_t chunk)
0185 {
0186     return chunk << store->chunk_shift;
0187 }
0188 
0189 static int bdev_equal(struct block_device *lhs, struct block_device *rhs)
0190 {
0191     /*
0192      * There is only ever one instance of a particular block
0193      * device so we can compare pointers safely.
0194      */
0195     return lhs == rhs;
0196 }
0197 
0198 struct dm_snap_pending_exception {
0199     struct dm_exception e;
0200 
0201     /*
0202      * Origin buffers waiting for this to complete are held
0203      * in a bio list
0204      */
0205     struct bio_list origin_bios;
0206     struct bio_list snapshot_bios;
0207 
0208     /* Pointer back to snapshot context */
0209     struct dm_snapshot *snap;
0210 
0211     /*
0212      * 1 indicates the exception has already been sent to
0213      * kcopyd.
0214      */
0215     int started;
0216 
0217     /* There was copying error. */
0218     int copy_error;
0219 
0220     /* A sequence number, it is used for in-order completion. */
0221     sector_t exception_sequence;
0222 
0223     struct rb_node out_of_order_node;
0224 
0225     /*
0226      * For writing a complete chunk, bypassing the copy.
0227      */
0228     struct bio *full_bio;
0229     bio_end_io_t *full_bio_end_io;
0230 };
0231 
0232 /*
0233  * Hash table mapping origin volumes to lists of snapshots and
0234  * a lock to protect it
0235  */
0236 static struct kmem_cache *exception_cache;
0237 static struct kmem_cache *pending_cache;
0238 
0239 struct dm_snap_tracked_chunk {
0240     struct hlist_node node;
0241     chunk_t chunk;
0242 };
0243 
0244 static void init_tracked_chunk(struct bio *bio)
0245 {
0246     struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk));
0247     INIT_HLIST_NODE(&c->node);
0248 }
0249 
0250 static bool is_bio_tracked(struct bio *bio)
0251 {
0252     struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk));
0253     return !hlist_unhashed(&c->node);
0254 }
0255 
0256 static void track_chunk(struct dm_snapshot *s, struct bio *bio, chunk_t chunk)
0257 {
0258     struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk));
0259 
0260     c->chunk = chunk;
0261 
0262     spin_lock_irq(&s->tracked_chunk_lock);
0263     hlist_add_head(&c->node,
0264                &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)]);
0265     spin_unlock_irq(&s->tracked_chunk_lock);
0266 }
0267 
0268 static void stop_tracking_chunk(struct dm_snapshot *s, struct bio *bio)
0269 {
0270     struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk));
0271     unsigned long flags;
0272 
0273     spin_lock_irqsave(&s->tracked_chunk_lock, flags);
0274     hlist_del(&c->node);
0275     spin_unlock_irqrestore(&s->tracked_chunk_lock, flags);
0276 }
0277 
0278 static int __chunk_is_tracked(struct dm_snapshot *s, chunk_t chunk)
0279 {
0280     struct dm_snap_tracked_chunk *c;
0281     int found = 0;
0282 
0283     spin_lock_irq(&s->tracked_chunk_lock);
0284 
0285     hlist_for_each_entry(c,
0286         &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)], node) {
0287         if (c->chunk == chunk) {
0288             found = 1;
0289             break;
0290         }
0291     }
0292 
0293     spin_unlock_irq(&s->tracked_chunk_lock);
0294 
0295     return found;
0296 }
0297 
0298 /*
0299  * This conflicting I/O is extremely improbable in the caller,
0300  * so msleep(1) is sufficient and there is no need for a wait queue.
0301  */
0302 static void __check_for_conflicting_io(struct dm_snapshot *s, chunk_t chunk)
0303 {
0304     while (__chunk_is_tracked(s, chunk))
0305         msleep(1);
0306 }
0307 
0308 /*
0309  * One of these per registered origin, held in the snapshot_origins hash
0310  */
0311 struct origin {
0312     /* The origin device */
0313     struct block_device *bdev;
0314 
0315     struct list_head hash_list;
0316 
0317     /* List of snapshots for this origin */
0318     struct list_head snapshots;
0319 };
0320 
0321 /*
0322  * This structure is allocated for each origin target
0323  */
0324 struct dm_origin {
0325     struct dm_dev *dev;
0326     struct dm_target *ti;
0327     unsigned split_boundary;
0328     struct list_head hash_list;
0329 };
0330 
0331 /*
0332  * Size of the hash table for origin volumes. If we make this
0333  * the size of the minors list then it should be nearly perfect
0334  */
0335 #define ORIGIN_HASH_SIZE 256
0336 #define ORIGIN_MASK      0xFF
0337 static struct list_head *_origins;
0338 static struct list_head *_dm_origins;
0339 static struct rw_semaphore _origins_lock;
0340 
0341 static DECLARE_WAIT_QUEUE_HEAD(_pending_exceptions_done);
0342 static DEFINE_SPINLOCK(_pending_exceptions_done_spinlock);
0343 static uint64_t _pending_exceptions_done_count;
0344 
0345 static int init_origin_hash(void)
0346 {
0347     int i;
0348 
0349     _origins = kmalloc_array(ORIGIN_HASH_SIZE, sizeof(struct list_head),
0350                  GFP_KERNEL);
0351     if (!_origins) {
0352         DMERR("unable to allocate memory for _origins");
0353         return -ENOMEM;
0354     }
0355     for (i = 0; i < ORIGIN_HASH_SIZE; i++)
0356         INIT_LIST_HEAD(_origins + i);
0357 
0358     _dm_origins = kmalloc_array(ORIGIN_HASH_SIZE,
0359                     sizeof(struct list_head),
0360                     GFP_KERNEL);
0361     if (!_dm_origins) {
0362         DMERR("unable to allocate memory for _dm_origins");
0363         kfree(_origins);
0364         return -ENOMEM;
0365     }
0366     for (i = 0; i < ORIGIN_HASH_SIZE; i++)
0367         INIT_LIST_HEAD(_dm_origins + i);
0368 
0369     init_rwsem(&_origins_lock);
0370 
0371     return 0;
0372 }
0373 
0374 static void exit_origin_hash(void)
0375 {
0376     kfree(_origins);
0377     kfree(_dm_origins);
0378 }
0379 
0380 static unsigned origin_hash(struct block_device *bdev)
0381 {
0382     return bdev->bd_dev & ORIGIN_MASK;
0383 }
0384 
0385 static struct origin *__lookup_origin(struct block_device *origin)
0386 {
0387     struct list_head *ol;
0388     struct origin *o;
0389 
0390     ol = &_origins[origin_hash(origin)];
0391     list_for_each_entry (o, ol, hash_list)
0392         if (bdev_equal(o->bdev, origin))
0393             return o;
0394 
0395     return NULL;
0396 }
0397 
0398 static void __insert_origin(struct origin *o)
0399 {
0400     struct list_head *sl = &_origins[origin_hash(o->bdev)];
0401     list_add_tail(&o->hash_list, sl);
0402 }
0403 
0404 static struct dm_origin *__lookup_dm_origin(struct block_device *origin)
0405 {
0406     struct list_head *ol;
0407     struct dm_origin *o;
0408 
0409     ol = &_dm_origins[origin_hash(origin)];
0410     list_for_each_entry (o, ol, hash_list)
0411         if (bdev_equal(o->dev->bdev, origin))
0412             return o;
0413 
0414     return NULL;
0415 }
0416 
0417 static void __insert_dm_origin(struct dm_origin *o)
0418 {
0419     struct list_head *sl = &_dm_origins[origin_hash(o->dev->bdev)];
0420     list_add_tail(&o->hash_list, sl);
0421 }
0422 
0423 static void __remove_dm_origin(struct dm_origin *o)
0424 {
0425     list_del(&o->hash_list);
0426 }
0427 
0428 /*
0429  * _origins_lock must be held when calling this function.
0430  * Returns number of snapshots registered using the supplied cow device, plus:
0431  * snap_src - a snapshot suitable for use as a source of exception handover
0432  * snap_dest - a snapshot capable of receiving exception handover.
0433  * snap_merge - an existing snapshot-merge target linked to the same origin.
0434  *   There can be at most one snapshot-merge target. The parameter is optional.
0435  *
0436  * Possible return values and states of snap_src and snap_dest.
0437  *   0: NULL, NULL  - first new snapshot
0438  *   1: snap_src, NULL - normal snapshot
0439  *   2: snap_src, snap_dest  - waiting for handover
0440  *   2: snap_src, NULL - handed over, waiting for old to be deleted
0441  *   1: NULL, snap_dest - source got destroyed without handover
0442  */
0443 static int __find_snapshots_sharing_cow(struct dm_snapshot *snap,
0444                     struct dm_snapshot **snap_src,
0445                     struct dm_snapshot **snap_dest,
0446                     struct dm_snapshot **snap_merge)
0447 {
0448     struct dm_snapshot *s;
0449     struct origin *o;
0450     int count = 0;
0451     int active;
0452 
0453     o = __lookup_origin(snap->origin->bdev);
0454     if (!o)
0455         goto out;
0456 
0457     list_for_each_entry(s, &o->snapshots, list) {
0458         if (dm_target_is_snapshot_merge(s->ti) && snap_merge)
0459             *snap_merge = s;
0460         if (!bdev_equal(s->cow->bdev, snap->cow->bdev))
0461             continue;
0462 
0463         down_read(&s->lock);
0464         active = s->active;
0465         up_read(&s->lock);
0466 
0467         if (active) {
0468             if (snap_src)
0469                 *snap_src = s;
0470         } else if (snap_dest)
0471             *snap_dest = s;
0472 
0473         count++;
0474     }
0475 
0476 out:
0477     return count;
0478 }
0479 
0480 /*
0481  * On success, returns 1 if this snapshot is a handover destination,
0482  * otherwise returns 0.
0483  */
0484 static int __validate_exception_handover(struct dm_snapshot *snap)
0485 {
0486     struct dm_snapshot *snap_src = NULL, *snap_dest = NULL;
0487     struct dm_snapshot *snap_merge = NULL;
0488 
0489     /* Does snapshot need exceptions handed over to it? */
0490     if ((__find_snapshots_sharing_cow(snap, &snap_src, &snap_dest,
0491                       &snap_merge) == 2) ||
0492         snap_dest) {
0493         snap->ti->error = "Snapshot cow pairing for exception "
0494                   "table handover failed";
0495         return -EINVAL;
0496     }
0497 
0498     /*
0499      * If no snap_src was found, snap cannot become a handover
0500      * destination.
0501      */
0502     if (!snap_src)
0503         return 0;
0504 
0505     /*
0506      * Non-snapshot-merge handover?
0507      */
0508     if (!dm_target_is_snapshot_merge(snap->ti))
0509         return 1;
0510 
0511     /*
0512      * Do not allow more than one merging snapshot.
0513      */
0514     if (snap_merge) {
0515         snap->ti->error = "A snapshot is already merging.";
0516         return -EINVAL;
0517     }
0518 
0519     if (!snap_src->store->type->prepare_merge ||
0520         !snap_src->store->type->commit_merge) {
0521         snap->ti->error = "Snapshot exception store does not "
0522                   "support snapshot-merge.";
0523         return -EINVAL;
0524     }
0525 
0526     return 1;
0527 }
0528 
0529 static void __insert_snapshot(struct origin *o, struct dm_snapshot *s)
0530 {
0531     struct dm_snapshot *l;
0532 
0533     /* Sort the list according to chunk size, largest-first smallest-last */
0534     list_for_each_entry(l, &o->snapshots, list)
0535         if (l->store->chunk_size < s->store->chunk_size)
0536             break;
0537     list_add_tail(&s->list, &l->list);
0538 }
0539 
0540 /*
0541  * Make a note of the snapshot and its origin so we can look it
0542  * up when the origin has a write on it.
0543  *
0544  * Also validate snapshot exception store handovers.
0545  * On success, returns 1 if this registration is a handover destination,
0546  * otherwise returns 0.
0547  */
0548 static int register_snapshot(struct dm_snapshot *snap)
0549 {
0550     struct origin *o, *new_o = NULL;
0551     struct block_device *bdev = snap->origin->bdev;
0552     int r = 0;
0553 
0554     new_o = kmalloc(sizeof(*new_o), GFP_KERNEL);
0555     if (!new_o)
0556         return -ENOMEM;
0557 
0558     down_write(&_origins_lock);
0559 
0560     r = __validate_exception_handover(snap);
0561     if (r < 0) {
0562         kfree(new_o);
0563         goto out;
0564     }
0565 
0566     o = __lookup_origin(bdev);
0567     if (o)
0568         kfree(new_o);
0569     else {
0570         /* New origin */
0571         o = new_o;
0572 
0573         /* Initialise the struct */
0574         INIT_LIST_HEAD(&o->snapshots);
0575         o->bdev = bdev;
0576 
0577         __insert_origin(o);
0578     }
0579 
0580     __insert_snapshot(o, snap);
0581 
0582 out:
0583     up_write(&_origins_lock);
0584 
0585     return r;
0586 }
0587 
0588 /*
0589  * Move snapshot to correct place in list according to chunk size.
0590  */
0591 static void reregister_snapshot(struct dm_snapshot *s)
0592 {
0593     struct block_device *bdev = s->origin->bdev;
0594 
0595     down_write(&_origins_lock);
0596 
0597     list_del(&s->list);
0598     __insert_snapshot(__lookup_origin(bdev), s);
0599 
0600     up_write(&_origins_lock);
0601 }
0602 
0603 static void unregister_snapshot(struct dm_snapshot *s)
0604 {
0605     struct origin *o;
0606 
0607     down_write(&_origins_lock);
0608     o = __lookup_origin(s->origin->bdev);
0609 
0610     list_del(&s->list);
0611     if (o && list_empty(&o->snapshots)) {
0612         list_del(&o->hash_list);
0613         kfree(o);
0614     }
0615 
0616     up_write(&_origins_lock);
0617 }
0618 
0619 /*
0620  * Implementation of the exception hash tables.
0621  * The lowest hash_shift bits of the chunk number are ignored, allowing
0622  * some consecutive chunks to be grouped together.
0623  */
0624 static uint32_t exception_hash(struct dm_exception_table *et, chunk_t chunk);
0625 
0626 /* Lock to protect access to the completed and pending exception hash tables. */
0627 struct dm_exception_table_lock {
0628     struct hlist_bl_head *complete_slot;
0629     struct hlist_bl_head *pending_slot;
0630 };
0631 
0632 static void dm_exception_table_lock_init(struct dm_snapshot *s, chunk_t chunk,
0633                      struct dm_exception_table_lock *lock)
0634 {
0635     struct dm_exception_table *complete = &s->complete;
0636     struct dm_exception_table *pending = &s->pending;
0637 
0638     lock->complete_slot = &complete->table[exception_hash(complete, chunk)];
0639     lock->pending_slot = &pending->table[exception_hash(pending, chunk)];
0640 }
0641 
0642 static void dm_exception_table_lock(struct dm_exception_table_lock *lock)
0643 {
0644     hlist_bl_lock(lock->complete_slot);
0645     hlist_bl_lock(lock->pending_slot);
0646 }
0647 
0648 static void dm_exception_table_unlock(struct dm_exception_table_lock *lock)
0649 {
0650     hlist_bl_unlock(lock->pending_slot);
0651     hlist_bl_unlock(lock->complete_slot);
0652 }
0653 
0654 static int dm_exception_table_init(struct dm_exception_table *et,
0655                    uint32_t size, unsigned hash_shift)
0656 {
0657     unsigned int i;
0658 
0659     et->hash_shift = hash_shift;
0660     et->hash_mask = size - 1;
0661     et->table = kvmalloc_array(size, sizeof(struct hlist_bl_head),
0662                    GFP_KERNEL);
0663     if (!et->table)
0664         return -ENOMEM;
0665 
0666     for (i = 0; i < size; i++)
0667         INIT_HLIST_BL_HEAD(et->table + i);
0668 
0669     return 0;
0670 }
0671 
0672 static void dm_exception_table_exit(struct dm_exception_table *et,
0673                     struct kmem_cache *mem)
0674 {
0675     struct hlist_bl_head *slot;
0676     struct dm_exception *ex;
0677     struct hlist_bl_node *pos, *n;
0678     int i, size;
0679 
0680     size = et->hash_mask + 1;
0681     for (i = 0; i < size; i++) {
0682         slot = et->table + i;
0683 
0684         hlist_bl_for_each_entry_safe(ex, pos, n, slot, hash_list)
0685             kmem_cache_free(mem, ex);
0686     }
0687 
0688     kvfree(et->table);
0689 }
0690 
0691 static uint32_t exception_hash(struct dm_exception_table *et, chunk_t chunk)
0692 {
0693     return (chunk >> et->hash_shift) & et->hash_mask;
0694 }
0695 
0696 static void dm_remove_exception(struct dm_exception *e)
0697 {
0698     hlist_bl_del(&e->hash_list);
0699 }
0700 
0701 /*
0702  * Return the exception data for a sector, or NULL if not
0703  * remapped.
0704  */
0705 static struct dm_exception *dm_lookup_exception(struct dm_exception_table *et,
0706                         chunk_t chunk)
0707 {
0708     struct hlist_bl_head *slot;
0709     struct hlist_bl_node *pos;
0710     struct dm_exception *e;
0711 
0712     slot = &et->table[exception_hash(et, chunk)];
0713     hlist_bl_for_each_entry(e, pos, slot, hash_list)
0714         if (chunk >= e->old_chunk &&
0715             chunk <= e->old_chunk + dm_consecutive_chunk_count(e))
0716             return e;
0717 
0718     return NULL;
0719 }
0720 
0721 static struct dm_exception *alloc_completed_exception(gfp_t gfp)
0722 {
0723     struct dm_exception *e;
0724 
0725     e = kmem_cache_alloc(exception_cache, gfp);
0726     if (!e && gfp == GFP_NOIO)
0727         e = kmem_cache_alloc(exception_cache, GFP_ATOMIC);
0728 
0729     return e;
0730 }
0731 
0732 static void free_completed_exception(struct dm_exception *e)
0733 {
0734     kmem_cache_free(exception_cache, e);
0735 }
0736 
0737 static struct dm_snap_pending_exception *alloc_pending_exception(struct dm_snapshot *s)
0738 {
0739     struct dm_snap_pending_exception *pe = mempool_alloc(&s->pending_pool,
0740                                  GFP_NOIO);
0741 
0742     atomic_inc(&s->pending_exceptions_count);
0743     pe->snap = s;
0744 
0745     return pe;
0746 }
0747 
0748 static void free_pending_exception(struct dm_snap_pending_exception *pe)
0749 {
0750     struct dm_snapshot *s = pe->snap;
0751 
0752     mempool_free(pe, &s->pending_pool);
0753     smp_mb__before_atomic();
0754     atomic_dec(&s->pending_exceptions_count);
0755 }
0756 
0757 static void dm_insert_exception(struct dm_exception_table *eh,
0758                 struct dm_exception *new_e)
0759 {
0760     struct hlist_bl_head *l;
0761     struct hlist_bl_node *pos;
0762     struct dm_exception *e = NULL;
0763 
0764     l = &eh->table[exception_hash(eh, new_e->old_chunk)];
0765 
0766     /* Add immediately if this table doesn't support consecutive chunks */
0767     if (!eh->hash_shift)
0768         goto out;
0769 
0770     /* List is ordered by old_chunk */
0771     hlist_bl_for_each_entry(e, pos, l, hash_list) {
0772         /* Insert after an existing chunk? */
0773         if (new_e->old_chunk == (e->old_chunk +
0774                      dm_consecutive_chunk_count(e) + 1) &&
0775             new_e->new_chunk == (dm_chunk_number(e->new_chunk) +
0776                      dm_consecutive_chunk_count(e) + 1)) {
0777             dm_consecutive_chunk_count_inc(e);
0778             free_completed_exception(new_e);
0779             return;
0780         }
0781 
0782         /* Insert before an existing chunk? */
0783         if (new_e->old_chunk == (e->old_chunk - 1) &&
0784             new_e->new_chunk == (dm_chunk_number(e->new_chunk) - 1)) {
0785             dm_consecutive_chunk_count_inc(e);
0786             e->old_chunk--;
0787             e->new_chunk--;
0788             free_completed_exception(new_e);
0789             return;
0790         }
0791 
0792         if (new_e->old_chunk < e->old_chunk)
0793             break;
0794     }
0795 
0796 out:
0797     if (!e) {
0798         /*
0799          * Either the table doesn't support consecutive chunks or slot
0800          * l is empty.
0801          */
0802         hlist_bl_add_head(&new_e->hash_list, l);
0803     } else if (new_e->old_chunk < e->old_chunk) {
0804         /* Add before an existing exception */
0805         hlist_bl_add_before(&new_e->hash_list, &e->hash_list);
0806     } else {
0807         /* Add to l's tail: e is the last exception in this slot */
0808         hlist_bl_add_behind(&new_e->hash_list, &e->hash_list);
0809     }
0810 }
0811 
0812 /*
0813  * Callback used by the exception stores to load exceptions when
0814  * initialising.
0815  */
0816 static int dm_add_exception(void *context, chunk_t old, chunk_t new)
0817 {
0818     struct dm_exception_table_lock lock;
0819     struct dm_snapshot *s = context;
0820     struct dm_exception *e;
0821 
0822     e = alloc_completed_exception(GFP_KERNEL);
0823     if (!e)
0824         return -ENOMEM;
0825 
0826     e->old_chunk = old;
0827 
0828     /* Consecutive_count is implicitly initialised to zero */
0829     e->new_chunk = new;
0830 
0831     /*
0832      * Although there is no need to lock access to the exception tables
0833      * here, if we don't then hlist_bl_add_head(), called by
0834      * dm_insert_exception(), will complain about accessing the
0835      * corresponding list without locking it first.
0836      */
0837     dm_exception_table_lock_init(s, old, &lock);
0838 
0839     dm_exception_table_lock(&lock);
0840     dm_insert_exception(&s->complete, e);
0841     dm_exception_table_unlock(&lock);
0842 
0843     return 0;
0844 }
0845 
0846 /*
0847  * Return a minimum chunk size of all snapshots that have the specified origin.
0848  * Return zero if the origin has no snapshots.
0849  */
0850 static uint32_t __minimum_chunk_size(struct origin *o)
0851 {
0852     struct dm_snapshot *snap;
0853     unsigned chunk_size = rounddown_pow_of_two(UINT_MAX);
0854 
0855     if (o)
0856         list_for_each_entry(snap, &o->snapshots, list)
0857             chunk_size = min_not_zero(chunk_size,
0858                           snap->store->chunk_size);
0859 
0860     return (uint32_t) chunk_size;
0861 }
0862 
0863 /*
0864  * Hard coded magic.
0865  */
0866 static int calc_max_buckets(void)
0867 {
0868     /* use a fixed size of 2MB */
0869     unsigned long mem = 2 * 1024 * 1024;
0870     mem /= sizeof(struct hlist_bl_head);
0871 
0872     return mem;
0873 }
0874 
0875 /*
0876  * Allocate room for a suitable hash table.
0877  */
0878 static int init_hash_tables(struct dm_snapshot *s)
0879 {
0880     sector_t hash_size, cow_dev_size, max_buckets;
0881 
0882     /*
0883      * Calculate based on the size of the original volume or
0884      * the COW volume...
0885      */
0886     cow_dev_size = get_dev_size(s->cow->bdev);
0887     max_buckets = calc_max_buckets();
0888 
0889     hash_size = cow_dev_size >> s->store->chunk_shift;
0890     hash_size = min(hash_size, max_buckets);
0891 
0892     if (hash_size < 64)
0893         hash_size = 64;
0894     hash_size = rounddown_pow_of_two(hash_size);
0895     if (dm_exception_table_init(&s->complete, hash_size,
0896                     DM_CHUNK_CONSECUTIVE_BITS))
0897         return -ENOMEM;
0898 
0899     /*
0900      * Allocate hash table for in-flight exceptions
0901      * Make this smaller than the real hash table
0902      */
0903     hash_size >>= 3;
0904     if (hash_size < 64)
0905         hash_size = 64;
0906 
0907     if (dm_exception_table_init(&s->pending, hash_size, 0)) {
0908         dm_exception_table_exit(&s->complete, exception_cache);
0909         return -ENOMEM;
0910     }
0911 
0912     return 0;
0913 }
0914 
0915 static void merge_shutdown(struct dm_snapshot *s)
0916 {
0917     clear_bit_unlock(RUNNING_MERGE, &s->state_bits);
0918     smp_mb__after_atomic();
0919     wake_up_bit(&s->state_bits, RUNNING_MERGE);
0920 }
0921 
0922 static struct bio *__release_queued_bios_after_merge(struct dm_snapshot *s)
0923 {
0924     s->first_merging_chunk = 0;
0925     s->num_merging_chunks = 0;
0926 
0927     return bio_list_get(&s->bios_queued_during_merge);
0928 }
0929 
0930 /*
0931  * Remove one chunk from the index of completed exceptions.
0932  */
0933 static int __remove_single_exception_chunk(struct dm_snapshot *s,
0934                        chunk_t old_chunk)
0935 {
0936     struct dm_exception *e;
0937 
0938     e = dm_lookup_exception(&s->complete, old_chunk);
0939     if (!e) {
0940         DMERR("Corruption detected: exception for block %llu is "
0941               "on disk but not in memory",
0942               (unsigned long long)old_chunk);
0943         return -EINVAL;
0944     }
0945 
0946     /*
0947      * If this is the only chunk using this exception, remove exception.
0948      */
0949     if (!dm_consecutive_chunk_count(e)) {
0950         dm_remove_exception(e);
0951         free_completed_exception(e);
0952         return 0;
0953     }
0954 
0955     /*
0956      * The chunk may be either at the beginning or the end of a
0957      * group of consecutive chunks - never in the middle.  We are
0958      * removing chunks in the opposite order to that in which they
0959      * were added, so this should always be true.
0960      * Decrement the consecutive chunk counter and adjust the
0961      * starting point if necessary.
0962      */
0963     if (old_chunk == e->old_chunk) {
0964         e->old_chunk++;
0965         e->new_chunk++;
0966     } else if (old_chunk != e->old_chunk +
0967            dm_consecutive_chunk_count(e)) {
0968         DMERR("Attempt to merge block %llu from the "
0969               "middle of a chunk range [%llu - %llu]",
0970               (unsigned long long)old_chunk,
0971               (unsigned long long)e->old_chunk,
0972               (unsigned long long)
0973               e->old_chunk + dm_consecutive_chunk_count(e));
0974         return -EINVAL;
0975     }
0976 
0977     dm_consecutive_chunk_count_dec(e);
0978 
0979     return 0;
0980 }
0981 
0982 static void flush_bios(struct bio *bio);
0983 
0984 static int remove_single_exception_chunk(struct dm_snapshot *s)
0985 {
0986     struct bio *b = NULL;
0987     int r;
0988     chunk_t old_chunk = s->first_merging_chunk + s->num_merging_chunks - 1;
0989 
0990     down_write(&s->lock);
0991 
0992     /*
0993      * Process chunks (and associated exceptions) in reverse order
0994      * so that dm_consecutive_chunk_count_dec() accounting works.
0995      */
0996     do {
0997         r = __remove_single_exception_chunk(s, old_chunk);
0998         if (r)
0999             goto out;
1000     } while (old_chunk-- > s->first_merging_chunk);
1001 
1002     b = __release_queued_bios_after_merge(s);
1003 
1004 out:
1005     up_write(&s->lock);
1006     if (b)
1007         flush_bios(b);
1008 
1009     return r;
1010 }
1011 
1012 static int origin_write_extent(struct dm_snapshot *merging_snap,
1013                    sector_t sector, unsigned chunk_size);
1014 
1015 static void merge_callback(int read_err, unsigned long write_err,
1016                void *context);
1017 
1018 static uint64_t read_pending_exceptions_done_count(void)
1019 {
1020     uint64_t pending_exceptions_done;
1021 
1022     spin_lock(&_pending_exceptions_done_spinlock);
1023     pending_exceptions_done = _pending_exceptions_done_count;
1024     spin_unlock(&_pending_exceptions_done_spinlock);
1025 
1026     return pending_exceptions_done;
1027 }
1028 
1029 static void increment_pending_exceptions_done_count(void)
1030 {
1031     spin_lock(&_pending_exceptions_done_spinlock);
1032     _pending_exceptions_done_count++;
1033     spin_unlock(&_pending_exceptions_done_spinlock);
1034 
1035     wake_up_all(&_pending_exceptions_done);
1036 }
1037 
1038 static void snapshot_merge_next_chunks(struct dm_snapshot *s)
1039 {
1040     int i, linear_chunks;
1041     chunk_t old_chunk, new_chunk;
1042     struct dm_io_region src, dest;
1043     sector_t io_size;
1044     uint64_t previous_count;
1045 
1046     BUG_ON(!test_bit(RUNNING_MERGE, &s->state_bits));
1047     if (unlikely(test_bit(SHUTDOWN_MERGE, &s->state_bits)))
1048         goto shut;
1049 
1050     /*
1051      * valid flag never changes during merge, so no lock required.
1052      */
1053     if (!s->valid) {
1054         DMERR("Snapshot is invalid: can't merge");
1055         goto shut;
1056     }
1057 
1058     linear_chunks = s->store->type->prepare_merge(s->store, &old_chunk,
1059                               &new_chunk);
1060     if (linear_chunks <= 0) {
1061         if (linear_chunks < 0) {
1062             DMERR("Read error in exception store: "
1063                   "shutting down merge");
1064             down_write(&s->lock);
1065             s->merge_failed = true;
1066             up_write(&s->lock);
1067         }
1068         goto shut;
1069     }
1070 
1071     /* Adjust old_chunk and new_chunk to reflect start of linear region */
1072     old_chunk = old_chunk + 1 - linear_chunks;
1073     new_chunk = new_chunk + 1 - linear_chunks;
1074 
1075     /*
1076      * Use one (potentially large) I/O to copy all 'linear_chunks'
1077      * from the exception store to the origin
1078      */
1079     io_size = linear_chunks * s->store->chunk_size;
1080 
1081     dest.bdev = s->origin->bdev;
1082     dest.sector = chunk_to_sector(s->store, old_chunk);
1083     dest.count = min(io_size, get_dev_size(dest.bdev) - dest.sector);
1084 
1085     src.bdev = s->cow->bdev;
1086     src.sector = chunk_to_sector(s->store, new_chunk);
1087     src.count = dest.count;
1088 
1089     /*
1090      * Reallocate any exceptions needed in other snapshots then
1091      * wait for the pending exceptions to complete.
1092      * Each time any pending exception (globally on the system)
1093      * completes we are woken and repeat the process to find out
1094      * if we can proceed.  While this may not seem a particularly
1095      * efficient algorithm, it is not expected to have any
1096      * significant impact on performance.
1097      */
1098     previous_count = read_pending_exceptions_done_count();
1099     while (origin_write_extent(s, dest.sector, io_size)) {
1100         wait_event(_pending_exceptions_done,
1101                (read_pending_exceptions_done_count() !=
1102                 previous_count));
1103         /* Retry after the wait, until all exceptions are done. */
1104         previous_count = read_pending_exceptions_done_count();
1105     }
1106 
1107     down_write(&s->lock);
1108     s->first_merging_chunk = old_chunk;
1109     s->num_merging_chunks = linear_chunks;
1110     up_write(&s->lock);
1111 
1112     /* Wait until writes to all 'linear_chunks' drain */
1113     for (i = 0; i < linear_chunks; i++)
1114         __check_for_conflicting_io(s, old_chunk + i);
1115 
1116     dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, merge_callback, s);
1117     return;
1118 
1119 shut:
1120     merge_shutdown(s);
1121 }
1122 
1123 static void error_bios(struct bio *bio);
1124 
1125 static void merge_callback(int read_err, unsigned long write_err, void *context)
1126 {
1127     struct dm_snapshot *s = context;
1128     struct bio *b = NULL;
1129 
1130     if (read_err || write_err) {
1131         if (read_err)
1132             DMERR("Read error: shutting down merge.");
1133         else
1134             DMERR("Write error: shutting down merge.");
1135         goto shut;
1136     }
1137 
1138     if (blkdev_issue_flush(s->origin->bdev) < 0) {
1139         DMERR("Flush after merge failed: shutting down merge");
1140         goto shut;
1141     }
1142 
1143     if (s->store->type->commit_merge(s->store,
1144                      s->num_merging_chunks) < 0) {
1145         DMERR("Write error in exception store: shutting down merge");
1146         goto shut;
1147     }
1148 
1149     if (remove_single_exception_chunk(s) < 0)
1150         goto shut;
1151 
1152     snapshot_merge_next_chunks(s);
1153 
1154     return;
1155 
1156 shut:
1157     down_write(&s->lock);
1158     s->merge_failed = true;
1159     b = __release_queued_bios_after_merge(s);
1160     up_write(&s->lock);
1161     error_bios(b);
1162 
1163     merge_shutdown(s);
1164 }
1165 
1166 static void start_merge(struct dm_snapshot *s)
1167 {
1168     if (!test_and_set_bit(RUNNING_MERGE, &s->state_bits))
1169         snapshot_merge_next_chunks(s);
1170 }
1171 
1172 /*
1173  * Stop the merging process and wait until it finishes.
1174  */
1175 static void stop_merge(struct dm_snapshot *s)
1176 {
1177     set_bit(SHUTDOWN_MERGE, &s->state_bits);
1178     wait_on_bit(&s->state_bits, RUNNING_MERGE, TASK_UNINTERRUPTIBLE);
1179     clear_bit(SHUTDOWN_MERGE, &s->state_bits);
1180 }
1181 
1182 static int parse_snapshot_features(struct dm_arg_set *as, struct dm_snapshot *s,
1183                    struct dm_target *ti)
1184 {
1185     int r;
1186     unsigned argc;
1187     const char *arg_name;
1188 
1189     static const struct dm_arg _args[] = {
1190         {0, 2, "Invalid number of feature arguments"},
1191     };
1192 
1193     /*
1194      * No feature arguments supplied.
1195      */
1196     if (!as->argc)
1197         return 0;
1198 
1199     r = dm_read_arg_group(_args, as, &argc, &ti->error);
1200     if (r)
1201         return -EINVAL;
1202 
1203     while (argc && !r) {
1204         arg_name = dm_shift_arg(as);
1205         argc--;
1206 
1207         if (!strcasecmp(arg_name, "discard_zeroes_cow"))
1208             s->discard_zeroes_cow = true;
1209 
1210         else if (!strcasecmp(arg_name, "discard_passdown_origin"))
1211             s->discard_passdown_origin = true;
1212 
1213         else {
1214             ti->error = "Unrecognised feature requested";
1215             r = -EINVAL;
1216             break;
1217         }
1218     }
1219 
1220     if (!s->discard_zeroes_cow && s->discard_passdown_origin) {
1221         /*
1222          * TODO: really these are disjoint.. but ti->num_discard_bios
1223          * and dm_bio_get_target_bio_nr() require rigid constraints.
1224          */
1225         ti->error = "discard_passdown_origin feature depends on discard_zeroes_cow";
1226         r = -EINVAL;
1227     }
1228 
1229     return r;
1230 }
1231 
1232 /*
1233  * Construct a snapshot mapping:
1234  * <origin_dev> <COW-dev> <p|po|n> <chunk-size> [<# feature args> [<arg>]*]
1235  */
1236 static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1237 {
1238     struct dm_snapshot *s;
1239     struct dm_arg_set as;
1240     int i;
1241     int r = -EINVAL;
1242     char *origin_path, *cow_path;
1243     dev_t origin_dev, cow_dev;
1244     unsigned args_used, num_flush_bios = 1;
1245     fmode_t origin_mode = FMODE_READ;
1246 
1247     if (argc < 4) {
1248         ti->error = "requires 4 or more arguments";
1249         r = -EINVAL;
1250         goto bad;
1251     }
1252 
1253     if (dm_target_is_snapshot_merge(ti)) {
1254         num_flush_bios = 2;
1255         origin_mode = FMODE_WRITE;
1256     }
1257 
1258     s = kzalloc(sizeof(*s), GFP_KERNEL);
1259     if (!s) {
1260         ti->error = "Cannot allocate private snapshot structure";
1261         r = -ENOMEM;
1262         goto bad;
1263     }
1264 
1265     as.argc = argc;
1266     as.argv = argv;
1267     dm_consume_args(&as, 4);
1268     r = parse_snapshot_features(&as, s, ti);
1269     if (r)
1270         goto bad_features;
1271 
1272     origin_path = argv[0];
1273     argv++;
1274     argc--;
1275 
1276     r = dm_get_device(ti, origin_path, origin_mode, &s->origin);
1277     if (r) {
1278         ti->error = "Cannot get origin device";
1279         goto bad_origin;
1280     }
1281     origin_dev = s->origin->bdev->bd_dev;
1282 
1283     cow_path = argv[0];
1284     argv++;
1285     argc--;
1286 
1287     cow_dev = dm_get_dev_t(cow_path);
1288     if (cow_dev && cow_dev == origin_dev) {
1289         ti->error = "COW device cannot be the same as origin device";
1290         r = -EINVAL;
1291         goto bad_cow;
1292     }
1293 
1294     r = dm_get_device(ti, cow_path, dm_table_get_mode(ti->table), &s->cow);
1295     if (r) {
1296         ti->error = "Cannot get COW device";
1297         goto bad_cow;
1298     }
1299 
1300     r = dm_exception_store_create(ti, argc, argv, s, &args_used, &s->store);
1301     if (r) {
1302         ti->error = "Couldn't create exception store";
1303         r = -EINVAL;
1304         goto bad_store;
1305     }
1306 
1307     argv += args_used;
1308     argc -= args_used;
1309 
1310     s->ti = ti;
1311     s->valid = 1;
1312     s->snapshot_overflowed = 0;
1313     s->active = 0;
1314     atomic_set(&s->pending_exceptions_count, 0);
1315     spin_lock_init(&s->pe_allocation_lock);
1316     s->exception_start_sequence = 0;
1317     s->exception_complete_sequence = 0;
1318     s->out_of_order_tree = RB_ROOT;
1319     init_rwsem(&s->lock);
1320     INIT_LIST_HEAD(&s->list);
1321     spin_lock_init(&s->pe_lock);
1322     s->state_bits = 0;
1323     s->merge_failed = false;
1324     s->first_merging_chunk = 0;
1325     s->num_merging_chunks = 0;
1326     bio_list_init(&s->bios_queued_during_merge);
1327 
1328     /* Allocate hash table for COW data */
1329     if (init_hash_tables(s)) {
1330         ti->error = "Unable to allocate hash table space";
1331         r = -ENOMEM;
1332         goto bad_hash_tables;
1333     }
1334 
1335     init_waitqueue_head(&s->in_progress_wait);
1336 
1337     s->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle);
1338     if (IS_ERR(s->kcopyd_client)) {
1339         r = PTR_ERR(s->kcopyd_client);
1340         ti->error = "Could not create kcopyd client";
1341         goto bad_kcopyd;
1342     }
1343 
1344     r = mempool_init_slab_pool(&s->pending_pool, MIN_IOS, pending_cache);
1345     if (r) {
1346         ti->error = "Could not allocate mempool for pending exceptions";
1347         goto bad_pending_pool;
1348     }
1349 
1350     for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++)
1351         INIT_HLIST_HEAD(&s->tracked_chunk_hash[i]);
1352 
1353     spin_lock_init(&s->tracked_chunk_lock);
1354 
1355     ti->private = s;
1356     ti->num_flush_bios = num_flush_bios;
1357     if (s->discard_zeroes_cow)
1358         ti->num_discard_bios = (s->discard_passdown_origin ? 2 : 1);
1359     ti->per_io_data_size = sizeof(struct dm_snap_tracked_chunk);
1360 
1361     /* Add snapshot to the list of snapshots for this origin */
1362     /* Exceptions aren't triggered till snapshot_resume() is called */
1363     r = register_snapshot(s);
1364     if (r == -ENOMEM) {
1365         ti->error = "Snapshot origin struct allocation failed";
1366         goto bad_load_and_register;
1367     } else if (r < 0) {
1368         /* invalid handover, register_snapshot has set ti->error */
1369         goto bad_load_and_register;
1370     }
1371 
1372     /*
1373      * Metadata must only be loaded into one table at once, so skip this
1374      * if metadata will be handed over during resume.
1375      * Chunk size will be set during the handover - set it to zero to
1376      * ensure it's ignored.
1377      */
1378     if (r > 0) {
1379         s->store->chunk_size = 0;
1380         return 0;
1381     }
1382 
1383     r = s->store->type->read_metadata(s->store, dm_add_exception,
1384                       (void *)s);
1385     if (r < 0) {
1386         ti->error = "Failed to read snapshot metadata";
1387         goto bad_read_metadata;
1388     } else if (r > 0) {
1389         s->valid = 0;
1390         DMWARN("Snapshot is marked invalid.");
1391     }
1392 
1393     if (!s->store->chunk_size) {
1394         ti->error = "Chunk size not set";
1395         r = -EINVAL;
1396         goto bad_read_metadata;
1397     }
1398 
1399     r = dm_set_target_max_io_len(ti, s->store->chunk_size);
1400     if (r)
1401         goto bad_read_metadata;
1402 
1403     return 0;
1404 
1405 bad_read_metadata:
1406     unregister_snapshot(s);
1407 bad_load_and_register:
1408     mempool_exit(&s->pending_pool);
1409 bad_pending_pool:
1410     dm_kcopyd_client_destroy(s->kcopyd_client);
1411 bad_kcopyd:
1412     dm_exception_table_exit(&s->pending, pending_cache);
1413     dm_exception_table_exit(&s->complete, exception_cache);
1414 bad_hash_tables:
1415     dm_exception_store_destroy(s->store);
1416 bad_store:
1417     dm_put_device(ti, s->cow);
1418 bad_cow:
1419     dm_put_device(ti, s->origin);
1420 bad_origin:
1421 bad_features:
1422     kfree(s);
1423 bad:
1424     return r;
1425 }
1426 
1427 static void __free_exceptions(struct dm_snapshot *s)
1428 {
1429     dm_kcopyd_client_destroy(s->kcopyd_client);
1430     s->kcopyd_client = NULL;
1431 
1432     dm_exception_table_exit(&s->pending, pending_cache);
1433     dm_exception_table_exit(&s->complete, exception_cache);
1434 }
1435 
1436 static void __handover_exceptions(struct dm_snapshot *snap_src,
1437                   struct dm_snapshot *snap_dest)
1438 {
1439     union {
1440         struct dm_exception_table table_swap;
1441         struct dm_exception_store *store_swap;
1442     } u;
1443 
1444     /*
1445      * Swap all snapshot context information between the two instances.
1446      */
1447     u.table_swap = snap_dest->complete;
1448     snap_dest->complete = snap_src->complete;
1449     snap_src->complete = u.table_swap;
1450 
1451     u.store_swap = snap_dest->store;
1452     snap_dest->store = snap_src->store;
1453     snap_dest->store->userspace_supports_overflow = u.store_swap->userspace_supports_overflow;
1454     snap_src->store = u.store_swap;
1455 
1456     snap_dest->store->snap = snap_dest;
1457     snap_src->store->snap = snap_src;
1458 
1459     snap_dest->ti->max_io_len = snap_dest->store->chunk_size;
1460     snap_dest->valid = snap_src->valid;
1461     snap_dest->snapshot_overflowed = snap_src->snapshot_overflowed;
1462 
1463     /*
1464      * Set source invalid to ensure it receives no further I/O.
1465      */
1466     snap_src->valid = 0;
1467 }
1468 
1469 static void snapshot_dtr(struct dm_target *ti)
1470 {
1471 #ifdef CONFIG_DM_DEBUG
1472     int i;
1473 #endif
1474     struct dm_snapshot *s = ti->private;
1475     struct dm_snapshot *snap_src = NULL, *snap_dest = NULL;
1476 
1477     down_read(&_origins_lock);
1478     /* Check whether exception handover must be cancelled */
1479     (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL);
1480     if (snap_src && snap_dest && (s == snap_src)) {
1481         down_write(&snap_dest->lock);
1482         snap_dest->valid = 0;
1483         up_write(&snap_dest->lock);
1484         DMERR("Cancelling snapshot handover.");
1485     }
1486     up_read(&_origins_lock);
1487 
1488     if (dm_target_is_snapshot_merge(ti))
1489         stop_merge(s);
1490 
1491     /* Prevent further origin writes from using this snapshot. */
1492     /* After this returns there can be no new kcopyd jobs. */
1493     unregister_snapshot(s);
1494 
1495     while (atomic_read(&s->pending_exceptions_count))
1496         msleep(1);
1497     /*
1498      * Ensure instructions in mempool_exit aren't reordered
1499      * before atomic_read.
1500      */
1501     smp_mb();
1502 
1503 #ifdef CONFIG_DM_DEBUG
1504     for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++)
1505         BUG_ON(!hlist_empty(&s->tracked_chunk_hash[i]));
1506 #endif
1507 
1508     __free_exceptions(s);
1509 
1510     mempool_exit(&s->pending_pool);
1511 
1512     dm_exception_store_destroy(s->store);
1513 
1514     dm_put_device(ti, s->cow);
1515 
1516     dm_put_device(ti, s->origin);
1517 
1518     WARN_ON(s->in_progress);
1519 
1520     kfree(s);
1521 }
1522 
1523 static void account_start_copy(struct dm_snapshot *s)
1524 {
1525     spin_lock(&s->in_progress_wait.lock);
1526     s->in_progress++;
1527     spin_unlock(&s->in_progress_wait.lock);
1528 }
1529 
1530 static void account_end_copy(struct dm_snapshot *s)
1531 {
1532     spin_lock(&s->in_progress_wait.lock);
1533     BUG_ON(!s->in_progress);
1534     s->in_progress--;
1535     if (likely(s->in_progress <= cow_threshold) &&
1536         unlikely(waitqueue_active(&s->in_progress_wait)))
1537         wake_up_locked(&s->in_progress_wait);
1538     spin_unlock(&s->in_progress_wait.lock);
1539 }
1540 
1541 static bool wait_for_in_progress(struct dm_snapshot *s, bool unlock_origins)
1542 {
1543     if (unlikely(s->in_progress > cow_threshold)) {
1544         spin_lock(&s->in_progress_wait.lock);
1545         if (likely(s->in_progress > cow_threshold)) {
1546             /*
1547              * NOTE: this throttle doesn't account for whether
1548              * the caller is servicing an IO that will trigger a COW
1549              * so excess throttling may result for chunks not required
1550              * to be COW'd.  But if cow_threshold was reached, extra
1551              * throttling is unlikely to negatively impact performance.
1552              */
1553             DECLARE_WAITQUEUE(wait, current);
1554             __add_wait_queue(&s->in_progress_wait, &wait);
1555             __set_current_state(TASK_UNINTERRUPTIBLE);
1556             spin_unlock(&s->in_progress_wait.lock);
1557             if (unlock_origins)
1558                 up_read(&_origins_lock);
1559             io_schedule();
1560             remove_wait_queue(&s->in_progress_wait, &wait);
1561             return false;
1562         }
1563         spin_unlock(&s->in_progress_wait.lock);
1564     }
1565     return true;
1566 }
1567 
1568 /*
1569  * Flush a list of buffers.
1570  */
1571 static void flush_bios(struct bio *bio)
1572 {
1573     struct bio *n;
1574 
1575     while (bio) {
1576         n = bio->bi_next;
1577         bio->bi_next = NULL;
1578         submit_bio_noacct(bio);
1579         bio = n;
1580     }
1581 }
1582 
1583 static int do_origin(struct dm_dev *origin, struct bio *bio, bool limit);
1584 
1585 /*
1586  * Flush a list of buffers.
1587  */
1588 static void retry_origin_bios(struct dm_snapshot *s, struct bio *bio)
1589 {
1590     struct bio *n;
1591     int r;
1592 
1593     while (bio) {
1594         n = bio->bi_next;
1595         bio->bi_next = NULL;
1596         r = do_origin(s->origin, bio, false);
1597         if (r == DM_MAPIO_REMAPPED)
1598             submit_bio_noacct(bio);
1599         bio = n;
1600     }
1601 }
1602 
1603 /*
1604  * Error a list of buffers.
1605  */
1606 static void error_bios(struct bio *bio)
1607 {
1608     struct bio *n;
1609 
1610     while (bio) {
1611         n = bio->bi_next;
1612         bio->bi_next = NULL;
1613         bio_io_error(bio);
1614         bio = n;
1615     }
1616 }
1617 
1618 static void __invalidate_snapshot(struct dm_snapshot *s, int err)
1619 {
1620     if (!s->valid)
1621         return;
1622 
1623     if (err == -EIO)
1624         DMERR("Invalidating snapshot: Error reading/writing.");
1625     else if (err == -ENOMEM)
1626         DMERR("Invalidating snapshot: Unable to allocate exception.");
1627 
1628     if (s->store->type->drop_snapshot)
1629         s->store->type->drop_snapshot(s->store);
1630 
1631     s->valid = 0;
1632 
1633     dm_table_event(s->ti->table);
1634 }
1635 
1636 static void invalidate_snapshot(struct dm_snapshot *s, int err)
1637 {
1638     down_write(&s->lock);
1639     __invalidate_snapshot(s, err);
1640     up_write(&s->lock);
1641 }
1642 
1643 static void pending_complete(void *context, int success)
1644 {
1645     struct dm_snap_pending_exception *pe = context;
1646     struct dm_exception *e;
1647     struct dm_snapshot *s = pe->snap;
1648     struct bio *origin_bios = NULL;
1649     struct bio *snapshot_bios = NULL;
1650     struct bio *full_bio = NULL;
1651     struct dm_exception_table_lock lock;
1652     int error = 0;
1653 
1654     dm_exception_table_lock_init(s, pe->e.old_chunk, &lock);
1655 
1656     if (!success) {
1657         /* Read/write error - snapshot is unusable */
1658         invalidate_snapshot(s, -EIO);
1659         error = 1;
1660 
1661         dm_exception_table_lock(&lock);
1662         goto out;
1663     }
1664 
1665     e = alloc_completed_exception(GFP_NOIO);
1666     if (!e) {
1667         invalidate_snapshot(s, -ENOMEM);
1668         error = 1;
1669 
1670         dm_exception_table_lock(&lock);
1671         goto out;
1672     }
1673     *e = pe->e;
1674 
1675     down_read(&s->lock);
1676     dm_exception_table_lock(&lock);
1677     if (!s->valid) {
1678         up_read(&s->lock);
1679         free_completed_exception(e);
1680         error = 1;
1681 
1682         goto out;
1683     }
1684 
1685     /*
1686      * Add a proper exception. After inserting the completed exception all
1687      * subsequent snapshot reads to this chunk will be redirected to the
1688      * COW device.  This ensures that we do not starve. Moreover, as long
1689      * as the pending exception exists, neither origin writes nor snapshot
1690      * merging can overwrite the chunk in origin.
1691      */
1692     dm_insert_exception(&s->complete, e);
1693     up_read(&s->lock);
1694 
1695     /* Wait for conflicting reads to drain */
1696     if (__chunk_is_tracked(s, pe->e.old_chunk)) {
1697         dm_exception_table_unlock(&lock);
1698         __check_for_conflicting_io(s, pe->e.old_chunk);
1699         dm_exception_table_lock(&lock);
1700     }
1701 
1702 out:
1703     /* Remove the in-flight exception from the list */
1704     dm_remove_exception(&pe->e);
1705 
1706     dm_exception_table_unlock(&lock);
1707 
1708     snapshot_bios = bio_list_get(&pe->snapshot_bios);
1709     origin_bios = bio_list_get(&pe->origin_bios);
1710     full_bio = pe->full_bio;
1711     if (full_bio)
1712         full_bio->bi_end_io = pe->full_bio_end_io;
1713     increment_pending_exceptions_done_count();
1714 
1715     /* Submit any pending write bios */
1716     if (error) {
1717         if (full_bio)
1718             bio_io_error(full_bio);
1719         error_bios(snapshot_bios);
1720     } else {
1721         if (full_bio)
1722             bio_endio(full_bio);
1723         flush_bios(snapshot_bios);
1724     }
1725 
1726     retry_origin_bios(s, origin_bios);
1727 
1728     free_pending_exception(pe);
1729 }
1730 
1731 static void complete_exception(struct dm_snap_pending_exception *pe)
1732 {
1733     struct dm_snapshot *s = pe->snap;
1734 
1735     /* Update the metadata if we are persistent */
1736     s->store->type->commit_exception(s->store, &pe->e, !pe->copy_error,
1737                      pending_complete, pe);
1738 }
1739 
1740 /*
1741  * Called when the copy I/O has finished.  kcopyd actually runs
1742  * this code so don't block.
1743  */
1744 static void copy_callback(int read_err, unsigned long write_err, void *context)
1745 {
1746     struct dm_snap_pending_exception *pe = context;
1747     struct dm_snapshot *s = pe->snap;
1748 
1749     pe->copy_error = read_err || write_err;
1750 
1751     if (pe->exception_sequence == s->exception_complete_sequence) {
1752         struct rb_node *next;
1753 
1754         s->exception_complete_sequence++;
1755         complete_exception(pe);
1756 
1757         next = rb_first(&s->out_of_order_tree);
1758         while (next) {
1759             pe = rb_entry(next, struct dm_snap_pending_exception,
1760                     out_of_order_node);
1761             if (pe->exception_sequence != s->exception_complete_sequence)
1762                 break;
1763             next = rb_next(next);
1764             s->exception_complete_sequence++;
1765             rb_erase(&pe->out_of_order_node, &s->out_of_order_tree);
1766             complete_exception(pe);
1767             cond_resched();
1768         }
1769     } else {
1770         struct rb_node *parent = NULL;
1771         struct rb_node **p = &s->out_of_order_tree.rb_node;
1772         struct dm_snap_pending_exception *pe2;
1773 
1774         while (*p) {
1775             pe2 = rb_entry(*p, struct dm_snap_pending_exception, out_of_order_node);
1776             parent = *p;
1777 
1778             BUG_ON(pe->exception_sequence == pe2->exception_sequence);
1779             if (pe->exception_sequence < pe2->exception_sequence)
1780                 p = &((*p)->rb_left);
1781             else
1782                 p = &((*p)->rb_right);
1783         }
1784 
1785         rb_link_node(&pe->out_of_order_node, parent, p);
1786         rb_insert_color(&pe->out_of_order_node, &s->out_of_order_tree);
1787     }
1788     account_end_copy(s);
1789 }
1790 
1791 /*
1792  * Dispatches the copy operation to kcopyd.
1793  */
1794 static void start_copy(struct dm_snap_pending_exception *pe)
1795 {
1796     struct dm_snapshot *s = pe->snap;
1797     struct dm_io_region src, dest;
1798     struct block_device *bdev = s->origin->bdev;
1799     sector_t dev_size;
1800 
1801     dev_size = get_dev_size(bdev);
1802 
1803     src.bdev = bdev;
1804     src.sector = chunk_to_sector(s->store, pe->e.old_chunk);
1805     src.count = min((sector_t)s->store->chunk_size, dev_size - src.sector);
1806 
1807     dest.bdev = s->cow->bdev;
1808     dest.sector = chunk_to_sector(s->store, pe->e.new_chunk);
1809     dest.count = src.count;
1810 
1811     /* Hand over to kcopyd */
1812     account_start_copy(s);
1813     dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe);
1814 }
1815 
1816 static void full_bio_end_io(struct bio *bio)
1817 {
1818     void *callback_data = bio->bi_private;
1819 
1820     dm_kcopyd_do_callback(callback_data, 0, bio->bi_status ? 1 : 0);
1821 }
1822 
1823 static void start_full_bio(struct dm_snap_pending_exception *pe,
1824                struct bio *bio)
1825 {
1826     struct dm_snapshot *s = pe->snap;
1827     void *callback_data;
1828 
1829     pe->full_bio = bio;
1830     pe->full_bio_end_io = bio->bi_end_io;
1831 
1832     account_start_copy(s);
1833     callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client,
1834                            copy_callback, pe);
1835 
1836     bio->bi_end_io = full_bio_end_io;
1837     bio->bi_private = callback_data;
1838 
1839     submit_bio_noacct(bio);
1840 }
1841 
1842 static struct dm_snap_pending_exception *
1843 __lookup_pending_exception(struct dm_snapshot *s, chunk_t chunk)
1844 {
1845     struct dm_exception *e = dm_lookup_exception(&s->pending, chunk);
1846 
1847     if (!e)
1848         return NULL;
1849 
1850     return container_of(e, struct dm_snap_pending_exception, e);
1851 }
1852 
1853 /*
1854  * Inserts a pending exception into the pending table.
1855  *
1856  * NOTE: a write lock must be held on the chunk's pending exception table slot
1857  * before calling this.
1858  */
1859 static struct dm_snap_pending_exception *
1860 __insert_pending_exception(struct dm_snapshot *s,
1861                struct dm_snap_pending_exception *pe, chunk_t chunk)
1862 {
1863     pe->e.old_chunk = chunk;
1864     bio_list_init(&pe->origin_bios);
1865     bio_list_init(&pe->snapshot_bios);
1866     pe->started = 0;
1867     pe->full_bio = NULL;
1868 
1869     spin_lock(&s->pe_allocation_lock);
1870     if (s->store->type->prepare_exception(s->store, &pe->e)) {
1871         spin_unlock(&s->pe_allocation_lock);
1872         free_pending_exception(pe);
1873         return NULL;
1874     }
1875 
1876     pe->exception_sequence = s->exception_start_sequence++;
1877     spin_unlock(&s->pe_allocation_lock);
1878 
1879     dm_insert_exception(&s->pending, &pe->e);
1880 
1881     return pe;
1882 }
1883 
1884 /*
1885  * Looks to see if this snapshot already has a pending exception
1886  * for this chunk, otherwise it allocates a new one and inserts
1887  * it into the pending table.
1888  *
1889  * NOTE: a write lock must be held on the chunk's pending exception table slot
1890  * before calling this.
1891  */
1892 static struct dm_snap_pending_exception *
1893 __find_pending_exception(struct dm_snapshot *s,
1894              struct dm_snap_pending_exception *pe, chunk_t chunk)
1895 {
1896     struct dm_snap_pending_exception *pe2;
1897 
1898     pe2 = __lookup_pending_exception(s, chunk);
1899     if (pe2) {
1900         free_pending_exception(pe);
1901         return pe2;
1902     }
1903 
1904     return __insert_pending_exception(s, pe, chunk);
1905 }
1906 
1907 static void remap_exception(struct dm_snapshot *s, struct dm_exception *e,
1908                 struct bio *bio, chunk_t chunk)
1909 {
1910     bio_set_dev(bio, s->cow->bdev);
1911     bio->bi_iter.bi_sector =
1912         chunk_to_sector(s->store, dm_chunk_number(e->new_chunk) +
1913                 (chunk - e->old_chunk)) +
1914         (bio->bi_iter.bi_sector & s->store->chunk_mask);
1915 }
1916 
1917 static void zero_callback(int read_err, unsigned long write_err, void *context)
1918 {
1919     struct bio *bio = context;
1920     struct dm_snapshot *s = bio->bi_private;
1921 
1922     account_end_copy(s);
1923     bio->bi_status = write_err ? BLK_STS_IOERR : 0;
1924     bio_endio(bio);
1925 }
1926 
1927 static void zero_exception(struct dm_snapshot *s, struct dm_exception *e,
1928                struct bio *bio, chunk_t chunk)
1929 {
1930     struct dm_io_region dest;
1931 
1932     dest.bdev = s->cow->bdev;
1933     dest.sector = bio->bi_iter.bi_sector;
1934     dest.count = s->store->chunk_size;
1935 
1936     account_start_copy(s);
1937     WARN_ON_ONCE(bio->bi_private);
1938     bio->bi_private = s;
1939     dm_kcopyd_zero(s->kcopyd_client, 1, &dest, 0, zero_callback, bio);
1940 }
1941 
1942 static bool io_overlaps_chunk(struct dm_snapshot *s, struct bio *bio)
1943 {
1944     return bio->bi_iter.bi_size ==
1945         (s->store->chunk_size << SECTOR_SHIFT);
1946 }
1947 
1948 static int snapshot_map(struct dm_target *ti, struct bio *bio)
1949 {
1950     struct dm_exception *e;
1951     struct dm_snapshot *s = ti->private;
1952     int r = DM_MAPIO_REMAPPED;
1953     chunk_t chunk;
1954     struct dm_snap_pending_exception *pe = NULL;
1955     struct dm_exception_table_lock lock;
1956 
1957     init_tracked_chunk(bio);
1958 
1959     if (bio->bi_opf & REQ_PREFLUSH) {
1960         bio_set_dev(bio, s->cow->bdev);
1961         return DM_MAPIO_REMAPPED;
1962     }
1963 
1964     chunk = sector_to_chunk(s->store, bio->bi_iter.bi_sector);
1965     dm_exception_table_lock_init(s, chunk, &lock);
1966 
1967     /* Full snapshots are not usable */
1968     /* To get here the table must be live so s->active is always set. */
1969     if (!s->valid)
1970         return DM_MAPIO_KILL;
1971 
1972     if (bio_data_dir(bio) == WRITE) {
1973         while (unlikely(!wait_for_in_progress(s, false)))
1974             ; /* wait_for_in_progress() has slept */
1975     }
1976 
1977     down_read(&s->lock);
1978     dm_exception_table_lock(&lock);
1979 
1980     if (!s->valid || (unlikely(s->snapshot_overflowed) &&
1981         bio_data_dir(bio) == WRITE)) {
1982         r = DM_MAPIO_KILL;
1983         goto out_unlock;
1984     }
1985 
1986     if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) {
1987         if (s->discard_passdown_origin && dm_bio_get_target_bio_nr(bio)) {
1988             /*
1989              * passdown discard to origin (without triggering
1990              * snapshot exceptions via do_origin; doing so would
1991              * defeat the goal of freeing space in origin that is
1992              * implied by the "discard_passdown_origin" feature)
1993              */
1994             bio_set_dev(bio, s->origin->bdev);
1995             track_chunk(s, bio, chunk);
1996             goto out_unlock;
1997         }
1998         /* discard to snapshot (target_bio_nr == 0) zeroes exceptions */
1999     }
2000 
2001     /* If the block is already remapped - use that, else remap it */
2002     e = dm_lookup_exception(&s->complete, chunk);
2003     if (e) {
2004         remap_exception(s, e, bio, chunk);
2005         if (unlikely(bio_op(bio) == REQ_OP_DISCARD) &&
2006             io_overlaps_chunk(s, bio)) {
2007             dm_exception_table_unlock(&lock);
2008             up_read(&s->lock);
2009             zero_exception(s, e, bio, chunk);
2010             r = DM_MAPIO_SUBMITTED; /* discard is not issued */
2011             goto out;
2012         }
2013         goto out_unlock;
2014     }
2015 
2016     if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) {
2017         /*
2018          * If no exception exists, complete discard immediately
2019          * otherwise it'll trigger copy-out.
2020          */
2021         bio_endio(bio);
2022         r = DM_MAPIO_SUBMITTED;
2023         goto out_unlock;
2024     }
2025 
2026     /*
2027      * Write to snapshot - higher level takes care of RW/RO
2028      * flags so we should only get this if we are
2029      * writable.
2030      */
2031     if (bio_data_dir(bio) == WRITE) {
2032         pe = __lookup_pending_exception(s, chunk);
2033         if (!pe) {
2034             dm_exception_table_unlock(&lock);
2035             pe = alloc_pending_exception(s);
2036             dm_exception_table_lock(&lock);
2037 
2038             e = dm_lookup_exception(&s->complete, chunk);
2039             if (e) {
2040                 free_pending_exception(pe);
2041                 remap_exception(s, e, bio, chunk);
2042                 goto out_unlock;
2043             }
2044 
2045             pe = __find_pending_exception(s, pe, chunk);
2046             if (!pe) {
2047                 dm_exception_table_unlock(&lock);
2048                 up_read(&s->lock);
2049 
2050                 down_write(&s->lock);
2051 
2052                 if (s->store->userspace_supports_overflow) {
2053                     if (s->valid && !s->snapshot_overflowed) {
2054                         s->snapshot_overflowed = 1;
2055                         DMERR("Snapshot overflowed: Unable to allocate exception.");
2056                     }
2057                 } else
2058                     __invalidate_snapshot(s, -ENOMEM);
2059                 up_write(&s->lock);
2060 
2061                 r = DM_MAPIO_KILL;
2062                 goto out;
2063             }
2064         }
2065 
2066         remap_exception(s, &pe->e, bio, chunk);
2067 
2068         r = DM_MAPIO_SUBMITTED;
2069 
2070         if (!pe->started && io_overlaps_chunk(s, bio)) {
2071             pe->started = 1;
2072 
2073             dm_exception_table_unlock(&lock);
2074             up_read(&s->lock);
2075 
2076             start_full_bio(pe, bio);
2077             goto out;
2078         }
2079 
2080         bio_list_add(&pe->snapshot_bios, bio);
2081 
2082         if (!pe->started) {
2083             /* this is protected by the exception table lock */
2084             pe->started = 1;
2085 
2086             dm_exception_table_unlock(&lock);
2087             up_read(&s->lock);
2088 
2089             start_copy(pe);
2090             goto out;
2091         }
2092     } else {
2093         bio_set_dev(bio, s->origin->bdev);
2094         track_chunk(s, bio, chunk);
2095     }
2096 
2097 out_unlock:
2098     dm_exception_table_unlock(&lock);
2099     up_read(&s->lock);
2100 out:
2101     return r;
2102 }
2103 
2104 /*
2105  * A snapshot-merge target behaves like a combination of a snapshot
2106  * target and a snapshot-origin target.  It only generates new
2107  * exceptions in other snapshots and not in the one that is being
2108  * merged.
2109  *
2110  * For each chunk, if there is an existing exception, it is used to
2111  * redirect I/O to the cow device.  Otherwise I/O is sent to the origin,
2112  * which in turn might generate exceptions in other snapshots.
2113  * If merging is currently taking place on the chunk in question, the
2114  * I/O is deferred by adding it to s->bios_queued_during_merge.
2115  */
2116 static int snapshot_merge_map(struct dm_target *ti, struct bio *bio)
2117 {
2118     struct dm_exception *e;
2119     struct dm_snapshot *s = ti->private;
2120     int r = DM_MAPIO_REMAPPED;
2121     chunk_t chunk;
2122 
2123     init_tracked_chunk(bio);
2124 
2125     if (bio->bi_opf & REQ_PREFLUSH) {
2126         if (!dm_bio_get_target_bio_nr(bio))
2127             bio_set_dev(bio, s->origin->bdev);
2128         else
2129             bio_set_dev(bio, s->cow->bdev);
2130         return DM_MAPIO_REMAPPED;
2131     }
2132 
2133     if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) {
2134         /* Once merging, discards no longer effect change */
2135         bio_endio(bio);
2136         return DM_MAPIO_SUBMITTED;
2137     }
2138 
2139     chunk = sector_to_chunk(s->store, bio->bi_iter.bi_sector);
2140 
2141     down_write(&s->lock);
2142 
2143     /* Full merging snapshots are redirected to the origin */
2144     if (!s->valid)
2145         goto redirect_to_origin;
2146 
2147     /* If the block is already remapped - use that */
2148     e = dm_lookup_exception(&s->complete, chunk);
2149     if (e) {
2150         /* Queue writes overlapping with chunks being merged */
2151         if (bio_data_dir(bio) == WRITE &&
2152             chunk >= s->first_merging_chunk &&
2153             chunk < (s->first_merging_chunk +
2154                  s->num_merging_chunks)) {
2155             bio_set_dev(bio, s->origin->bdev);
2156             bio_list_add(&s->bios_queued_during_merge, bio);
2157             r = DM_MAPIO_SUBMITTED;
2158             goto out_unlock;
2159         }
2160 
2161         remap_exception(s, e, bio, chunk);
2162 
2163         if (bio_data_dir(bio) == WRITE)
2164             track_chunk(s, bio, chunk);
2165         goto out_unlock;
2166     }
2167 
2168 redirect_to_origin:
2169     bio_set_dev(bio, s->origin->bdev);
2170 
2171     if (bio_data_dir(bio) == WRITE) {
2172         up_write(&s->lock);
2173         return do_origin(s->origin, bio, false);
2174     }
2175 
2176 out_unlock:
2177     up_write(&s->lock);
2178 
2179     return r;
2180 }
2181 
2182 static int snapshot_end_io(struct dm_target *ti, struct bio *bio,
2183         blk_status_t *error)
2184 {
2185     struct dm_snapshot *s = ti->private;
2186 
2187     if (is_bio_tracked(bio))
2188         stop_tracking_chunk(s, bio);
2189 
2190     return DM_ENDIO_DONE;
2191 }
2192 
2193 static void snapshot_merge_presuspend(struct dm_target *ti)
2194 {
2195     struct dm_snapshot *s = ti->private;
2196 
2197     stop_merge(s);
2198 }
2199 
2200 static int snapshot_preresume(struct dm_target *ti)
2201 {
2202     int r = 0;
2203     struct dm_snapshot *s = ti->private;
2204     struct dm_snapshot *snap_src = NULL, *snap_dest = NULL;
2205 
2206     down_read(&_origins_lock);
2207     (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL);
2208     if (snap_src && snap_dest) {
2209         down_read(&snap_src->lock);
2210         if (s == snap_src) {
2211             DMERR("Unable to resume snapshot source until "
2212                   "handover completes.");
2213             r = -EINVAL;
2214         } else if (!dm_suspended(snap_src->ti)) {
2215             DMERR("Unable to perform snapshot handover until "
2216                   "source is suspended.");
2217             r = -EINVAL;
2218         }
2219         up_read(&snap_src->lock);
2220     }
2221     up_read(&_origins_lock);
2222 
2223     return r;
2224 }
2225 
2226 static void snapshot_resume(struct dm_target *ti)
2227 {
2228     struct dm_snapshot *s = ti->private;
2229     struct dm_snapshot *snap_src = NULL, *snap_dest = NULL, *snap_merging = NULL;
2230     struct dm_origin *o;
2231     struct mapped_device *origin_md = NULL;
2232     bool must_restart_merging = false;
2233 
2234     down_read(&_origins_lock);
2235 
2236     o = __lookup_dm_origin(s->origin->bdev);
2237     if (o)
2238         origin_md = dm_table_get_md(o->ti->table);
2239     if (!origin_md) {
2240         (void) __find_snapshots_sharing_cow(s, NULL, NULL, &snap_merging);
2241         if (snap_merging)
2242             origin_md = dm_table_get_md(snap_merging->ti->table);
2243     }
2244     if (origin_md == dm_table_get_md(ti->table))
2245         origin_md = NULL;
2246     if (origin_md) {
2247         if (dm_hold(origin_md))
2248             origin_md = NULL;
2249     }
2250 
2251     up_read(&_origins_lock);
2252 
2253     if (origin_md) {
2254         dm_internal_suspend_fast(origin_md);
2255         if (snap_merging && test_bit(RUNNING_MERGE, &snap_merging->state_bits)) {
2256             must_restart_merging = true;
2257             stop_merge(snap_merging);
2258         }
2259     }
2260 
2261     down_read(&_origins_lock);
2262 
2263     (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL);
2264     if (snap_src && snap_dest) {
2265         down_write(&snap_src->lock);
2266         down_write_nested(&snap_dest->lock, SINGLE_DEPTH_NESTING);
2267         __handover_exceptions(snap_src, snap_dest);
2268         up_write(&snap_dest->lock);
2269         up_write(&snap_src->lock);
2270     }
2271 
2272     up_read(&_origins_lock);
2273 
2274     if (origin_md) {
2275         if (must_restart_merging)
2276             start_merge(snap_merging);
2277         dm_internal_resume_fast(origin_md);
2278         dm_put(origin_md);
2279     }
2280 
2281     /* Now we have correct chunk size, reregister */
2282     reregister_snapshot(s);
2283 
2284     down_write(&s->lock);
2285     s->active = 1;
2286     up_write(&s->lock);
2287 }
2288 
2289 static uint32_t get_origin_minimum_chunksize(struct block_device *bdev)
2290 {
2291     uint32_t min_chunksize;
2292 
2293     down_read(&_origins_lock);
2294     min_chunksize = __minimum_chunk_size(__lookup_origin(bdev));
2295     up_read(&_origins_lock);
2296 
2297     return min_chunksize;
2298 }
2299 
2300 static void snapshot_merge_resume(struct dm_target *ti)
2301 {
2302     struct dm_snapshot *s = ti->private;
2303 
2304     /*
2305      * Handover exceptions from existing snapshot.
2306      */
2307     snapshot_resume(ti);
2308 
2309     /*
2310      * snapshot-merge acts as an origin, so set ti->max_io_len
2311      */
2312     ti->max_io_len = get_origin_minimum_chunksize(s->origin->bdev);
2313 
2314     start_merge(s);
2315 }
2316 
2317 static void snapshot_status(struct dm_target *ti, status_type_t type,
2318                 unsigned status_flags, char *result, unsigned maxlen)
2319 {
2320     unsigned sz = 0;
2321     struct dm_snapshot *snap = ti->private;
2322     unsigned num_features;
2323 
2324     switch (type) {
2325     case STATUSTYPE_INFO:
2326 
2327         down_write(&snap->lock);
2328 
2329         if (!snap->valid)
2330             DMEMIT("Invalid");
2331         else if (snap->merge_failed)
2332             DMEMIT("Merge failed");
2333         else if (snap->snapshot_overflowed)
2334             DMEMIT("Overflow");
2335         else {
2336             if (snap->store->type->usage) {
2337                 sector_t total_sectors, sectors_allocated,
2338                      metadata_sectors;
2339                 snap->store->type->usage(snap->store,
2340                              &total_sectors,
2341                              &sectors_allocated,
2342                              &metadata_sectors);
2343                 DMEMIT("%llu/%llu %llu",
2344                        (unsigned long long)sectors_allocated,
2345                        (unsigned long long)total_sectors,
2346                        (unsigned long long)metadata_sectors);
2347             }
2348             else
2349                 DMEMIT("Unknown");
2350         }
2351 
2352         up_write(&snap->lock);
2353 
2354         break;
2355 
2356     case STATUSTYPE_TABLE:
2357         /*
2358          * kdevname returns a static pointer so we need
2359          * to make private copies if the output is to
2360          * make sense.
2361          */
2362         DMEMIT("%s %s", snap->origin->name, snap->cow->name);
2363         sz += snap->store->type->status(snap->store, type, result + sz,
2364                         maxlen - sz);
2365         num_features = snap->discard_zeroes_cow + snap->discard_passdown_origin;
2366         if (num_features) {
2367             DMEMIT(" %u", num_features);
2368             if (snap->discard_zeroes_cow)
2369                 DMEMIT(" discard_zeroes_cow");
2370             if (snap->discard_passdown_origin)
2371                 DMEMIT(" discard_passdown_origin");
2372         }
2373         break;
2374 
2375     case STATUSTYPE_IMA:
2376         DMEMIT_TARGET_NAME_VERSION(ti->type);
2377         DMEMIT(",snap_origin_name=%s", snap->origin->name);
2378         DMEMIT(",snap_cow_name=%s", snap->cow->name);
2379         DMEMIT(",snap_valid=%c", snap->valid ? 'y' : 'n');
2380         DMEMIT(",snap_merge_failed=%c", snap->merge_failed ? 'y' : 'n');
2381         DMEMIT(",snapshot_overflowed=%c", snap->snapshot_overflowed ? 'y' : 'n');
2382         DMEMIT(";");
2383         break;
2384     }
2385 }
2386 
2387 static int snapshot_iterate_devices(struct dm_target *ti,
2388                     iterate_devices_callout_fn fn, void *data)
2389 {
2390     struct dm_snapshot *snap = ti->private;
2391     int r;
2392 
2393     r = fn(ti, snap->origin, 0, ti->len, data);
2394 
2395     if (!r)
2396         r = fn(ti, snap->cow, 0, get_dev_size(snap->cow->bdev), data);
2397 
2398     return r;
2399 }
2400 
2401 static void snapshot_io_hints(struct dm_target *ti, struct queue_limits *limits)
2402 {
2403     struct dm_snapshot *snap = ti->private;
2404 
2405     if (snap->discard_zeroes_cow) {
2406         struct dm_snapshot *snap_src = NULL, *snap_dest = NULL;
2407 
2408         down_read(&_origins_lock);
2409 
2410         (void) __find_snapshots_sharing_cow(snap, &snap_src, &snap_dest, NULL);
2411         if (snap_src && snap_dest)
2412             snap = snap_src;
2413 
2414         /* All discards are split on chunk_size boundary */
2415         limits->discard_granularity = snap->store->chunk_size;
2416         limits->max_discard_sectors = snap->store->chunk_size;
2417 
2418         up_read(&_origins_lock);
2419     }
2420 }
2421 
2422 /*-----------------------------------------------------------------
2423  * Origin methods
2424  *---------------------------------------------------------------*/
2425 
2426 /*
2427  * If no exceptions need creating, DM_MAPIO_REMAPPED is returned and any
2428  * supplied bio was ignored.  The caller may submit it immediately.
2429  * (No remapping actually occurs as the origin is always a direct linear
2430  * map.)
2431  *
2432  * If further exceptions are required, DM_MAPIO_SUBMITTED is returned
2433  * and any supplied bio is added to a list to be submitted once all
2434  * the necessary exceptions exist.
2435  */
2436 static int __origin_write(struct list_head *snapshots, sector_t sector,
2437               struct bio *bio)
2438 {
2439     int r = DM_MAPIO_REMAPPED;
2440     struct dm_snapshot *snap;
2441     struct dm_exception *e;
2442     struct dm_snap_pending_exception *pe, *pe2;
2443     struct dm_snap_pending_exception *pe_to_start_now = NULL;
2444     struct dm_snap_pending_exception *pe_to_start_last = NULL;
2445     struct dm_exception_table_lock lock;
2446     chunk_t chunk;
2447 
2448     /* Do all the snapshots on this origin */
2449     list_for_each_entry (snap, snapshots, list) {
2450         /*
2451          * Don't make new exceptions in a merging snapshot
2452          * because it has effectively been deleted
2453          */
2454         if (dm_target_is_snapshot_merge(snap->ti))
2455             continue;
2456 
2457         /* Nothing to do if writing beyond end of snapshot */
2458         if (sector >= dm_table_get_size(snap->ti->table))
2459             continue;
2460 
2461         /*
2462          * Remember, different snapshots can have
2463          * different chunk sizes.
2464          */
2465         chunk = sector_to_chunk(snap->store, sector);
2466         dm_exception_table_lock_init(snap, chunk, &lock);
2467 
2468         down_read(&snap->lock);
2469         dm_exception_table_lock(&lock);
2470 
2471         /* Only deal with valid and active snapshots */
2472         if (!snap->valid || !snap->active)
2473             goto next_snapshot;
2474 
2475         pe = __lookup_pending_exception(snap, chunk);
2476         if (!pe) {
2477             /*
2478              * Check exception table to see if block is already
2479              * remapped in this snapshot and trigger an exception
2480              * if not.
2481              */
2482             e = dm_lookup_exception(&snap->complete, chunk);
2483             if (e)
2484                 goto next_snapshot;
2485 
2486             dm_exception_table_unlock(&lock);
2487             pe = alloc_pending_exception(snap);
2488             dm_exception_table_lock(&lock);
2489 
2490             pe2 = __lookup_pending_exception(snap, chunk);
2491 
2492             if (!pe2) {
2493                 e = dm_lookup_exception(&snap->complete, chunk);
2494                 if (e) {
2495                     free_pending_exception(pe);
2496                     goto next_snapshot;
2497                 }
2498 
2499                 pe = __insert_pending_exception(snap, pe, chunk);
2500                 if (!pe) {
2501                     dm_exception_table_unlock(&lock);
2502                     up_read(&snap->lock);
2503 
2504                     invalidate_snapshot(snap, -ENOMEM);
2505                     continue;
2506                 }
2507             } else {
2508                 free_pending_exception(pe);
2509                 pe = pe2;
2510             }
2511         }
2512 
2513         r = DM_MAPIO_SUBMITTED;
2514 
2515         /*
2516          * If an origin bio was supplied, queue it to wait for the
2517          * completion of this exception, and start this one last,
2518          * at the end of the function.
2519          */
2520         if (bio) {
2521             bio_list_add(&pe->origin_bios, bio);
2522             bio = NULL;
2523 
2524             if (!pe->started) {
2525                 pe->started = 1;
2526                 pe_to_start_last = pe;
2527             }
2528         }
2529 
2530         if (!pe->started) {
2531             pe->started = 1;
2532             pe_to_start_now = pe;
2533         }
2534 
2535 next_snapshot:
2536         dm_exception_table_unlock(&lock);
2537         up_read(&snap->lock);
2538 
2539         if (pe_to_start_now) {
2540             start_copy(pe_to_start_now);
2541             pe_to_start_now = NULL;
2542         }
2543     }
2544 
2545     /*
2546      * Submit the exception against which the bio is queued last,
2547      * to give the other exceptions a head start.
2548      */
2549     if (pe_to_start_last)
2550         start_copy(pe_to_start_last);
2551 
2552     return r;
2553 }
2554 
2555 /*
2556  * Called on a write from the origin driver.
2557  */
2558 static int do_origin(struct dm_dev *origin, struct bio *bio, bool limit)
2559 {
2560     struct origin *o;
2561     int r = DM_MAPIO_REMAPPED;
2562 
2563 again:
2564     down_read(&_origins_lock);
2565     o = __lookup_origin(origin->bdev);
2566     if (o) {
2567         if (limit) {
2568             struct dm_snapshot *s;
2569             list_for_each_entry(s, &o->snapshots, list)
2570                 if (unlikely(!wait_for_in_progress(s, true)))
2571                     goto again;
2572         }
2573 
2574         r = __origin_write(&o->snapshots, bio->bi_iter.bi_sector, bio);
2575     }
2576     up_read(&_origins_lock);
2577 
2578     return r;
2579 }
2580 
2581 /*
2582  * Trigger exceptions in all non-merging snapshots.
2583  *
2584  * The chunk size of the merging snapshot may be larger than the chunk
2585  * size of some other snapshot so we may need to reallocate multiple
2586  * chunks in other snapshots.
2587  *
2588  * We scan all the overlapping exceptions in the other snapshots.
2589  * Returns 1 if anything was reallocated and must be waited for,
2590  * otherwise returns 0.
2591  *
2592  * size must be a multiple of merging_snap's chunk_size.
2593  */
2594 static int origin_write_extent(struct dm_snapshot *merging_snap,
2595                    sector_t sector, unsigned size)
2596 {
2597     int must_wait = 0;
2598     sector_t n;
2599     struct origin *o;
2600 
2601     /*
2602      * The origin's __minimum_chunk_size() got stored in max_io_len
2603      * by snapshot_merge_resume().
2604      */
2605     down_read(&_origins_lock);
2606     o = __lookup_origin(merging_snap->origin->bdev);
2607     for (n = 0; n < size; n += merging_snap->ti->max_io_len)
2608         if (__origin_write(&o->snapshots, sector + n, NULL) ==
2609             DM_MAPIO_SUBMITTED)
2610             must_wait = 1;
2611     up_read(&_origins_lock);
2612 
2613     return must_wait;
2614 }
2615 
2616 /*
2617  * Origin: maps a linear range of a device, with hooks for snapshotting.
2618  */
2619 
2620 /*
2621  * Construct an origin mapping: <dev_path>
2622  * The context for an origin is merely a 'struct dm_dev *'
2623  * pointing to the real device.
2624  */
2625 static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv)
2626 {
2627     int r;
2628     struct dm_origin *o;
2629 
2630     if (argc != 1) {
2631         ti->error = "origin: incorrect number of arguments";
2632         return -EINVAL;
2633     }
2634 
2635     o = kmalloc(sizeof(struct dm_origin), GFP_KERNEL);
2636     if (!o) {
2637         ti->error = "Cannot allocate private origin structure";
2638         r = -ENOMEM;
2639         goto bad_alloc;
2640     }
2641 
2642     r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &o->dev);
2643     if (r) {
2644         ti->error = "Cannot get target device";
2645         goto bad_open;
2646     }
2647 
2648     o->ti = ti;
2649     ti->private = o;
2650     ti->num_flush_bios = 1;
2651 
2652     return 0;
2653 
2654 bad_open:
2655     kfree(o);
2656 bad_alloc:
2657     return r;
2658 }
2659 
2660 static void origin_dtr(struct dm_target *ti)
2661 {
2662     struct dm_origin *o = ti->private;
2663 
2664     dm_put_device(ti, o->dev);
2665     kfree(o);
2666 }
2667 
2668 static int origin_map(struct dm_target *ti, struct bio *bio)
2669 {
2670     struct dm_origin *o = ti->private;
2671     unsigned available_sectors;
2672 
2673     bio_set_dev(bio, o->dev->bdev);
2674 
2675     if (unlikely(bio->bi_opf & REQ_PREFLUSH))
2676         return DM_MAPIO_REMAPPED;
2677 
2678     if (bio_data_dir(bio) != WRITE)
2679         return DM_MAPIO_REMAPPED;
2680 
2681     available_sectors = o->split_boundary -
2682         ((unsigned)bio->bi_iter.bi_sector & (o->split_boundary - 1));
2683 
2684     if (bio_sectors(bio) > available_sectors)
2685         dm_accept_partial_bio(bio, available_sectors);
2686 
2687     /* Only tell snapshots if this is a write */
2688     return do_origin(o->dev, bio, true);
2689 }
2690 
2691 /*
2692  * Set the target "max_io_len" field to the minimum of all the snapshots'
2693  * chunk sizes.
2694  */
2695 static void origin_resume(struct dm_target *ti)
2696 {
2697     struct dm_origin *o = ti->private;
2698 
2699     o->split_boundary = get_origin_minimum_chunksize(o->dev->bdev);
2700 
2701     down_write(&_origins_lock);
2702     __insert_dm_origin(o);
2703     up_write(&_origins_lock);
2704 }
2705 
2706 static void origin_postsuspend(struct dm_target *ti)
2707 {
2708     struct dm_origin *o = ti->private;
2709 
2710     down_write(&_origins_lock);
2711     __remove_dm_origin(o);
2712     up_write(&_origins_lock);
2713 }
2714 
2715 static void origin_status(struct dm_target *ti, status_type_t type,
2716               unsigned status_flags, char *result, unsigned maxlen)
2717 {
2718     struct dm_origin *o = ti->private;
2719 
2720     switch (type) {
2721     case STATUSTYPE_INFO:
2722         result[0] = '\0';
2723         break;
2724 
2725     case STATUSTYPE_TABLE:
2726         snprintf(result, maxlen, "%s", o->dev->name);
2727         break;
2728     case STATUSTYPE_IMA:
2729         result[0] = '\0';
2730         break;
2731     }
2732 }
2733 
2734 static int origin_iterate_devices(struct dm_target *ti,
2735                   iterate_devices_callout_fn fn, void *data)
2736 {
2737     struct dm_origin *o = ti->private;
2738 
2739     return fn(ti, o->dev, 0, ti->len, data);
2740 }
2741 
2742 static struct target_type origin_target = {
2743     .name    = "snapshot-origin",
2744     .version = {1, 9, 0},
2745     .module  = THIS_MODULE,
2746     .ctr     = origin_ctr,
2747     .dtr     = origin_dtr,
2748     .map     = origin_map,
2749     .resume  = origin_resume,
2750     .postsuspend = origin_postsuspend,
2751     .status  = origin_status,
2752     .iterate_devices = origin_iterate_devices,
2753 };
2754 
2755 static struct target_type snapshot_target = {
2756     .name    = "snapshot",
2757     .version = {1, 16, 0},
2758     .module  = THIS_MODULE,
2759     .ctr     = snapshot_ctr,
2760     .dtr     = snapshot_dtr,
2761     .map     = snapshot_map,
2762     .end_io  = snapshot_end_io,
2763     .preresume  = snapshot_preresume,
2764     .resume  = snapshot_resume,
2765     .status  = snapshot_status,
2766     .iterate_devices = snapshot_iterate_devices,
2767     .io_hints = snapshot_io_hints,
2768 };
2769 
2770 static struct target_type merge_target = {
2771     .name    = dm_snapshot_merge_target_name,
2772     .version = {1, 5, 0},
2773     .module  = THIS_MODULE,
2774     .ctr     = snapshot_ctr,
2775     .dtr     = snapshot_dtr,
2776     .map     = snapshot_merge_map,
2777     .end_io  = snapshot_end_io,
2778     .presuspend = snapshot_merge_presuspend,
2779     .preresume  = snapshot_preresume,
2780     .resume  = snapshot_merge_resume,
2781     .status  = snapshot_status,
2782     .iterate_devices = snapshot_iterate_devices,
2783     .io_hints = snapshot_io_hints,
2784 };
2785 
2786 static int __init dm_snapshot_init(void)
2787 {
2788     int r;
2789 
2790     r = dm_exception_store_init();
2791     if (r) {
2792         DMERR("Failed to initialize exception stores");
2793         return r;
2794     }
2795 
2796     r = init_origin_hash();
2797     if (r) {
2798         DMERR("init_origin_hash failed.");
2799         goto bad_origin_hash;
2800     }
2801 
2802     exception_cache = KMEM_CACHE(dm_exception, 0);
2803     if (!exception_cache) {
2804         DMERR("Couldn't create exception cache.");
2805         r = -ENOMEM;
2806         goto bad_exception_cache;
2807     }
2808 
2809     pending_cache = KMEM_CACHE(dm_snap_pending_exception, 0);
2810     if (!pending_cache) {
2811         DMERR("Couldn't create pending cache.");
2812         r = -ENOMEM;
2813         goto bad_pending_cache;
2814     }
2815 
2816     r = dm_register_target(&snapshot_target);
2817     if (r < 0) {
2818         DMERR("snapshot target register failed %d", r);
2819         goto bad_register_snapshot_target;
2820     }
2821 
2822     r = dm_register_target(&origin_target);
2823     if (r < 0) {
2824         DMERR("Origin target register failed %d", r);
2825         goto bad_register_origin_target;
2826     }
2827 
2828     r = dm_register_target(&merge_target);
2829     if (r < 0) {
2830         DMERR("Merge target register failed %d", r);
2831         goto bad_register_merge_target;
2832     }
2833 
2834     return 0;
2835 
2836 bad_register_merge_target:
2837     dm_unregister_target(&origin_target);
2838 bad_register_origin_target:
2839     dm_unregister_target(&snapshot_target);
2840 bad_register_snapshot_target:
2841     kmem_cache_destroy(pending_cache);
2842 bad_pending_cache:
2843     kmem_cache_destroy(exception_cache);
2844 bad_exception_cache:
2845     exit_origin_hash();
2846 bad_origin_hash:
2847     dm_exception_store_exit();
2848 
2849     return r;
2850 }
2851 
2852 static void __exit dm_snapshot_exit(void)
2853 {
2854     dm_unregister_target(&snapshot_target);
2855     dm_unregister_target(&origin_target);
2856     dm_unregister_target(&merge_target);
2857 
2858     exit_origin_hash();
2859     kmem_cache_destroy(pending_cache);
2860     kmem_cache_destroy(exception_cache);
2861 
2862     dm_exception_store_exit();
2863 }
2864 
2865 /* Module hooks */
2866 module_init(dm_snapshot_init);
2867 module_exit(dm_snapshot_exit);
2868 
2869 MODULE_DESCRIPTION(DM_NAME " snapshot target");
2870 MODULE_AUTHOR("Joe Thornber");
2871 MODULE_LICENSE("GPL");
2872 MODULE_ALIAS("dm-snapshot-origin");
2873 MODULE_ALIAS("dm-snapshot-merge");