Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright (C) 2011 Red Hat, Inc.
0003  *
0004  * This file is released under the GPL.
0005  */
0006 #include "dm-block-manager.h"
0007 #include "dm-persistent-data-internal.h"
0008 
0009 #include <linux/dm-bufio.h>
0010 #include <linux/crc32c.h>
0011 #include <linux/module.h>
0012 #include <linux/slab.h>
0013 #include <linux/rwsem.h>
0014 #include <linux/device-mapper.h>
0015 #include <linux/stacktrace.h>
0016 #include <linux/sched/task.h>
0017 
0018 #define DM_MSG_PREFIX "block manager"
0019 
0020 /*----------------------------------------------------------------*/
0021 
0022 #ifdef CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING
0023 
0024 /*
0025  * This is a read/write semaphore with a couple of differences.
0026  *
0027  * i) There is a restriction on the number of concurrent read locks that
0028  * may be held at once.  This is just an implementation detail.
0029  *
0030  * ii) Recursive locking attempts are detected and return EINVAL.  A stack
0031  * trace is also emitted for the previous lock acquisition.
0032  *
0033  * iii) Priority is given to write locks.
0034  */
0035 #define MAX_HOLDERS 4
0036 #define MAX_STACK 10
0037 
0038 struct stack_store {
0039     unsigned int    nr_entries;
0040     unsigned long   entries[MAX_STACK];
0041 };
0042 
0043 struct block_lock {
0044     spinlock_t lock;
0045     __s32 count;
0046     struct list_head waiters;
0047     struct task_struct *holders[MAX_HOLDERS];
0048 
0049 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
0050     struct stack_store traces[MAX_HOLDERS];
0051 #endif
0052 };
0053 
0054 struct waiter {
0055     struct list_head list;
0056     struct task_struct *task;
0057     int wants_write;
0058 };
0059 
0060 static unsigned __find_holder(struct block_lock *lock,
0061                   struct task_struct *task)
0062 {
0063     unsigned i;
0064 
0065     for (i = 0; i < MAX_HOLDERS; i++)
0066         if (lock->holders[i] == task)
0067             break;
0068 
0069     BUG_ON(i == MAX_HOLDERS);
0070     return i;
0071 }
0072 
0073 /* call this *after* you increment lock->count */
0074 static void __add_holder(struct block_lock *lock, struct task_struct *task)
0075 {
0076     unsigned h = __find_holder(lock, NULL);
0077 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
0078     struct stack_store *t;
0079 #endif
0080 
0081     get_task_struct(task);
0082     lock->holders[h] = task;
0083 
0084 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
0085     t = lock->traces + h;
0086     t->nr_entries = stack_trace_save(t->entries, MAX_STACK, 2);
0087 #endif
0088 }
0089 
0090 /* call this *before* you decrement lock->count */
0091 static void __del_holder(struct block_lock *lock, struct task_struct *task)
0092 {
0093     unsigned h = __find_holder(lock, task);
0094     lock->holders[h] = NULL;
0095     put_task_struct(task);
0096 }
0097 
0098 static int __check_holder(struct block_lock *lock)
0099 {
0100     unsigned i;
0101 
0102     for (i = 0; i < MAX_HOLDERS; i++) {
0103         if (lock->holders[i] == current) {
0104             DMERR("recursive lock detected in metadata");
0105 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
0106             DMERR("previously held here:");
0107             stack_trace_print(lock->traces[i].entries,
0108                       lock->traces[i].nr_entries, 4);
0109 
0110             DMERR("subsequent acquisition attempted here:");
0111             dump_stack();
0112 #endif
0113             return -EINVAL;
0114         }
0115     }
0116 
0117     return 0;
0118 }
0119 
0120 static void __wait(struct waiter *w)
0121 {
0122     for (;;) {
0123         set_current_state(TASK_UNINTERRUPTIBLE);
0124 
0125         if (!w->task)
0126             break;
0127 
0128         schedule();
0129     }
0130 
0131     set_current_state(TASK_RUNNING);
0132 }
0133 
0134 static void __wake_waiter(struct waiter *w)
0135 {
0136     struct task_struct *task;
0137 
0138     list_del(&w->list);
0139     task = w->task;
0140     smp_mb();
0141     w->task = NULL;
0142     wake_up_process(task);
0143 }
0144 
0145 /*
0146  * We either wake a few readers or a single writer.
0147  */
0148 static void __wake_many(struct block_lock *lock)
0149 {
0150     struct waiter *w, *tmp;
0151 
0152     BUG_ON(lock->count < 0);
0153     list_for_each_entry_safe(w, tmp, &lock->waiters, list) {
0154         if (lock->count >= MAX_HOLDERS)
0155             return;
0156 
0157         if (w->wants_write) {
0158             if (lock->count > 0)
0159                 return; /* still read locked */
0160 
0161             lock->count = -1;
0162             __add_holder(lock, w->task);
0163             __wake_waiter(w);
0164             return;
0165         }
0166 
0167         lock->count++;
0168         __add_holder(lock, w->task);
0169         __wake_waiter(w);
0170     }
0171 }
0172 
0173 static void bl_init(struct block_lock *lock)
0174 {
0175     int i;
0176 
0177     spin_lock_init(&lock->lock);
0178     lock->count = 0;
0179     INIT_LIST_HEAD(&lock->waiters);
0180     for (i = 0; i < MAX_HOLDERS; i++)
0181         lock->holders[i] = NULL;
0182 }
0183 
0184 static int __available_for_read(struct block_lock *lock)
0185 {
0186     return lock->count >= 0 &&
0187         lock->count < MAX_HOLDERS &&
0188         list_empty(&lock->waiters);
0189 }
0190 
0191 static int bl_down_read(struct block_lock *lock)
0192 {
0193     int r;
0194     struct waiter w;
0195 
0196     spin_lock(&lock->lock);
0197     r = __check_holder(lock);
0198     if (r) {
0199         spin_unlock(&lock->lock);
0200         return r;
0201     }
0202 
0203     if (__available_for_read(lock)) {
0204         lock->count++;
0205         __add_holder(lock, current);
0206         spin_unlock(&lock->lock);
0207         return 0;
0208     }
0209 
0210     get_task_struct(current);
0211 
0212     w.task = current;
0213     w.wants_write = 0;
0214     list_add_tail(&w.list, &lock->waiters);
0215     spin_unlock(&lock->lock);
0216 
0217     __wait(&w);
0218     put_task_struct(current);
0219     return 0;
0220 }
0221 
0222 static int bl_down_read_nonblock(struct block_lock *lock)
0223 {
0224     int r;
0225 
0226     spin_lock(&lock->lock);
0227     r = __check_holder(lock);
0228     if (r)
0229         goto out;
0230 
0231     if (__available_for_read(lock)) {
0232         lock->count++;
0233         __add_holder(lock, current);
0234         r = 0;
0235     } else
0236         r = -EWOULDBLOCK;
0237 
0238 out:
0239     spin_unlock(&lock->lock);
0240     return r;
0241 }
0242 
0243 static void bl_up_read(struct block_lock *lock)
0244 {
0245     spin_lock(&lock->lock);
0246     BUG_ON(lock->count <= 0);
0247     __del_holder(lock, current);
0248     --lock->count;
0249     if (!list_empty(&lock->waiters))
0250         __wake_many(lock);
0251     spin_unlock(&lock->lock);
0252 }
0253 
0254 static int bl_down_write(struct block_lock *lock)
0255 {
0256     int r;
0257     struct waiter w;
0258 
0259     spin_lock(&lock->lock);
0260     r = __check_holder(lock);
0261     if (r) {
0262         spin_unlock(&lock->lock);
0263         return r;
0264     }
0265 
0266     if (lock->count == 0 && list_empty(&lock->waiters)) {
0267         lock->count = -1;
0268         __add_holder(lock, current);
0269         spin_unlock(&lock->lock);
0270         return 0;
0271     }
0272 
0273     get_task_struct(current);
0274     w.task = current;
0275     w.wants_write = 1;
0276 
0277     /*
0278      * Writers given priority. We know there's only one mutator in the
0279      * system, so ignoring the ordering reversal.
0280      */
0281     list_add(&w.list, &lock->waiters);
0282     spin_unlock(&lock->lock);
0283 
0284     __wait(&w);
0285     put_task_struct(current);
0286 
0287     return 0;
0288 }
0289 
0290 static void bl_up_write(struct block_lock *lock)
0291 {
0292     spin_lock(&lock->lock);
0293     __del_holder(lock, current);
0294     lock->count = 0;
0295     if (!list_empty(&lock->waiters))
0296         __wake_many(lock);
0297     spin_unlock(&lock->lock);
0298 }
0299 
0300 static void report_recursive_bug(dm_block_t b, int r)
0301 {
0302     if (r == -EINVAL)
0303         DMERR("recursive acquisition of block %llu requested.",
0304               (unsigned long long) b);
0305 }
0306 
0307 #else  /* !CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING */
0308 
0309 #define bl_init(x) do { } while (0)
0310 #define bl_down_read(x) 0
0311 #define bl_down_read_nonblock(x) 0
0312 #define bl_up_read(x) do { } while (0)
0313 #define bl_down_write(x) 0
0314 #define bl_up_write(x) do { } while (0)
0315 #define report_recursive_bug(x, y) do { } while (0)
0316 
0317 #endif /* CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING */
0318 
0319 /*----------------------------------------------------------------*/
0320 
0321 /*
0322  * Block manager is currently implemented using dm-bufio.  struct
0323  * dm_block_manager and struct dm_block map directly onto a couple of
0324  * structs in the bufio interface.  I want to retain the freedom to move
0325  * away from bufio in the future.  So these structs are just cast within
0326  * this .c file, rather than making it through to the public interface.
0327  */
0328 static struct dm_buffer *to_buffer(struct dm_block *b)
0329 {
0330     return (struct dm_buffer *) b;
0331 }
0332 
0333 dm_block_t dm_block_location(struct dm_block *b)
0334 {
0335     return dm_bufio_get_block_number(to_buffer(b));
0336 }
0337 EXPORT_SYMBOL_GPL(dm_block_location);
0338 
0339 void *dm_block_data(struct dm_block *b)
0340 {
0341     return dm_bufio_get_block_data(to_buffer(b));
0342 }
0343 EXPORT_SYMBOL_GPL(dm_block_data);
0344 
0345 struct buffer_aux {
0346     struct dm_block_validator *validator;
0347     int write_locked;
0348 
0349 #ifdef CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING
0350     struct block_lock lock;
0351 #endif
0352 };
0353 
0354 static void dm_block_manager_alloc_callback(struct dm_buffer *buf)
0355 {
0356     struct buffer_aux *aux = dm_bufio_get_aux_data(buf);
0357     aux->validator = NULL;
0358     bl_init(&aux->lock);
0359 }
0360 
0361 static void dm_block_manager_write_callback(struct dm_buffer *buf)
0362 {
0363     struct buffer_aux *aux = dm_bufio_get_aux_data(buf);
0364     if (aux->validator) {
0365         aux->validator->prepare_for_write(aux->validator, (struct dm_block *) buf,
0366              dm_bufio_get_block_size(dm_bufio_get_client(buf)));
0367     }
0368 }
0369 
0370 /*----------------------------------------------------------------
0371  * Public interface
0372  *--------------------------------------------------------------*/
0373 struct dm_block_manager {
0374     struct dm_bufio_client *bufio;
0375     bool read_only:1;
0376 };
0377 
0378 struct dm_block_manager *dm_block_manager_create(struct block_device *bdev,
0379                          unsigned block_size,
0380                          unsigned max_held_per_thread)
0381 {
0382     int r;
0383     struct dm_block_manager *bm;
0384 
0385     bm = kmalloc(sizeof(*bm), GFP_KERNEL);
0386     if (!bm) {
0387         r = -ENOMEM;
0388         goto bad;
0389     }
0390 
0391     bm->bufio = dm_bufio_client_create(bdev, block_size, max_held_per_thread,
0392                        sizeof(struct buffer_aux),
0393                        dm_block_manager_alloc_callback,
0394                        dm_block_manager_write_callback,
0395                        0);
0396     if (IS_ERR(bm->bufio)) {
0397         r = PTR_ERR(bm->bufio);
0398         kfree(bm);
0399         goto bad;
0400     }
0401 
0402     bm->read_only = false;
0403 
0404     return bm;
0405 
0406 bad:
0407     return ERR_PTR(r);
0408 }
0409 EXPORT_SYMBOL_GPL(dm_block_manager_create);
0410 
0411 void dm_block_manager_destroy(struct dm_block_manager *bm)
0412 {
0413     dm_bufio_client_destroy(bm->bufio);
0414     kfree(bm);
0415 }
0416 EXPORT_SYMBOL_GPL(dm_block_manager_destroy);
0417 
0418 unsigned dm_bm_block_size(struct dm_block_manager *bm)
0419 {
0420     return dm_bufio_get_block_size(bm->bufio);
0421 }
0422 EXPORT_SYMBOL_GPL(dm_bm_block_size);
0423 
0424 dm_block_t dm_bm_nr_blocks(struct dm_block_manager *bm)
0425 {
0426     return dm_bufio_get_device_size(bm->bufio);
0427 }
0428 
0429 static int dm_bm_validate_buffer(struct dm_block_manager *bm,
0430                  struct dm_buffer *buf,
0431                  struct buffer_aux *aux,
0432                  struct dm_block_validator *v)
0433 {
0434     if (unlikely(!aux->validator)) {
0435         int r;
0436         if (!v)
0437             return 0;
0438         r = v->check(v, (struct dm_block *) buf, dm_bufio_get_block_size(bm->bufio));
0439         if (unlikely(r)) {
0440             DMERR_LIMIT("%s validator check failed for block %llu", v->name,
0441                     (unsigned long long) dm_bufio_get_block_number(buf));
0442             return r;
0443         }
0444         aux->validator = v;
0445     } else {
0446         if (unlikely(aux->validator != v)) {
0447             DMERR_LIMIT("validator mismatch (old=%s vs new=%s) for block %llu",
0448                     aux->validator->name, v ? v->name : "NULL",
0449                     (unsigned long long) dm_bufio_get_block_number(buf));
0450             return -EINVAL;
0451         }
0452     }
0453 
0454     return 0;
0455 }
0456 int dm_bm_read_lock(struct dm_block_manager *bm, dm_block_t b,
0457             struct dm_block_validator *v,
0458             struct dm_block **result)
0459 {
0460     struct buffer_aux *aux;
0461     void *p;
0462     int r;
0463 
0464     p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result);
0465     if (IS_ERR(p))
0466         return PTR_ERR(p);
0467 
0468     aux = dm_bufio_get_aux_data(to_buffer(*result));
0469     r = bl_down_read(&aux->lock);
0470     if (unlikely(r)) {
0471         dm_bufio_release(to_buffer(*result));
0472         report_recursive_bug(b, r);
0473         return r;
0474     }
0475 
0476     aux->write_locked = 0;
0477 
0478     r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v);
0479     if (unlikely(r)) {
0480         bl_up_read(&aux->lock);
0481         dm_bufio_release(to_buffer(*result));
0482         return r;
0483     }
0484 
0485     return 0;
0486 }
0487 EXPORT_SYMBOL_GPL(dm_bm_read_lock);
0488 
0489 int dm_bm_write_lock(struct dm_block_manager *bm,
0490              dm_block_t b, struct dm_block_validator *v,
0491              struct dm_block **result)
0492 {
0493     struct buffer_aux *aux;
0494     void *p;
0495     int r;
0496 
0497     if (dm_bm_is_read_only(bm))
0498         return -EPERM;
0499 
0500     p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result);
0501     if (IS_ERR(p))
0502         return PTR_ERR(p);
0503 
0504     aux = dm_bufio_get_aux_data(to_buffer(*result));
0505     r = bl_down_write(&aux->lock);
0506     if (r) {
0507         dm_bufio_release(to_buffer(*result));
0508         report_recursive_bug(b, r);
0509         return r;
0510     }
0511 
0512     aux->write_locked = 1;
0513 
0514     r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v);
0515     if (unlikely(r)) {
0516         bl_up_write(&aux->lock);
0517         dm_bufio_release(to_buffer(*result));
0518         return r;
0519     }
0520 
0521     return 0;
0522 }
0523 EXPORT_SYMBOL_GPL(dm_bm_write_lock);
0524 
0525 int dm_bm_read_try_lock(struct dm_block_manager *bm,
0526             dm_block_t b, struct dm_block_validator *v,
0527             struct dm_block **result)
0528 {
0529     struct buffer_aux *aux;
0530     void *p;
0531     int r;
0532 
0533     p = dm_bufio_get(bm->bufio, b, (struct dm_buffer **) result);
0534     if (IS_ERR(p))
0535         return PTR_ERR(p);
0536     if (unlikely(!p))
0537         return -EWOULDBLOCK;
0538 
0539     aux = dm_bufio_get_aux_data(to_buffer(*result));
0540     r = bl_down_read_nonblock(&aux->lock);
0541     if (r < 0) {
0542         dm_bufio_release(to_buffer(*result));
0543         report_recursive_bug(b, r);
0544         return r;
0545     }
0546     aux->write_locked = 0;
0547 
0548     r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v);
0549     if (unlikely(r)) {
0550         bl_up_read(&aux->lock);
0551         dm_bufio_release(to_buffer(*result));
0552         return r;
0553     }
0554 
0555     return 0;
0556 }
0557 
0558 int dm_bm_write_lock_zero(struct dm_block_manager *bm,
0559               dm_block_t b, struct dm_block_validator *v,
0560               struct dm_block **result)
0561 {
0562     int r;
0563     struct buffer_aux *aux;
0564     void *p;
0565 
0566     if (dm_bm_is_read_only(bm))
0567         return -EPERM;
0568 
0569     p = dm_bufio_new(bm->bufio, b, (struct dm_buffer **) result);
0570     if (IS_ERR(p))
0571         return PTR_ERR(p);
0572 
0573     memset(p, 0, dm_bm_block_size(bm));
0574 
0575     aux = dm_bufio_get_aux_data(to_buffer(*result));
0576     r = bl_down_write(&aux->lock);
0577     if (r) {
0578         dm_bufio_release(to_buffer(*result));
0579         return r;
0580     }
0581 
0582     aux->write_locked = 1;
0583     aux->validator = v;
0584 
0585     return 0;
0586 }
0587 EXPORT_SYMBOL_GPL(dm_bm_write_lock_zero);
0588 
0589 void dm_bm_unlock(struct dm_block *b)
0590 {
0591     struct buffer_aux *aux;
0592     aux = dm_bufio_get_aux_data(to_buffer(b));
0593 
0594     if (aux->write_locked) {
0595         dm_bufio_mark_buffer_dirty(to_buffer(b));
0596         bl_up_write(&aux->lock);
0597     } else
0598         bl_up_read(&aux->lock);
0599 
0600     dm_bufio_release(to_buffer(b));
0601 }
0602 EXPORT_SYMBOL_GPL(dm_bm_unlock);
0603 
0604 int dm_bm_flush(struct dm_block_manager *bm)
0605 {
0606     if (dm_bm_is_read_only(bm))
0607         return -EPERM;
0608 
0609     return dm_bufio_write_dirty_buffers(bm->bufio);
0610 }
0611 EXPORT_SYMBOL_GPL(dm_bm_flush);
0612 
0613 void dm_bm_prefetch(struct dm_block_manager *bm, dm_block_t b)
0614 {
0615     dm_bufio_prefetch(bm->bufio, b, 1);
0616 }
0617 
0618 bool dm_bm_is_read_only(struct dm_block_manager *bm)
0619 {
0620     return (bm ? bm->read_only : true);
0621 }
0622 EXPORT_SYMBOL_GPL(dm_bm_is_read_only);
0623 
0624 void dm_bm_set_read_only(struct dm_block_manager *bm)
0625 {
0626     if (bm)
0627         bm->read_only = true;
0628 }
0629 EXPORT_SYMBOL_GPL(dm_bm_set_read_only);
0630 
0631 void dm_bm_set_read_write(struct dm_block_manager *bm)
0632 {
0633     if (bm)
0634         bm->read_only = false;
0635 }
0636 EXPORT_SYMBOL_GPL(dm_bm_set_read_write);
0637 
0638 u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor)
0639 {
0640     return crc32c(~(u32) 0, data, len) ^ init_xor;
0641 }
0642 EXPORT_SYMBOL_GPL(dm_bm_checksum);
0643 
0644 /*----------------------------------------------------------------*/
0645 
0646 MODULE_LICENSE("GPL");
0647 MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
0648 MODULE_DESCRIPTION("Immutable metadata library for dm");
0649 
0650 /*----------------------------------------------------------------*/