0001
0002
0003
0004
0005
0006 #include "dm-block-manager.h"
0007 #include "dm-persistent-data-internal.h"
0008
0009 #include <linux/dm-bufio.h>
0010 #include <linux/crc32c.h>
0011 #include <linux/module.h>
0012 #include <linux/slab.h>
0013 #include <linux/rwsem.h>
0014 #include <linux/device-mapper.h>
0015 #include <linux/stacktrace.h>
0016 #include <linux/sched/task.h>
0017
0018 #define DM_MSG_PREFIX "block manager"
0019
0020
0021
0022 #ifdef CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035 #define MAX_HOLDERS 4
0036 #define MAX_STACK 10
0037
0038 struct stack_store {
0039 unsigned int nr_entries;
0040 unsigned long entries[MAX_STACK];
0041 };
0042
0043 struct block_lock {
0044 spinlock_t lock;
0045 __s32 count;
0046 struct list_head waiters;
0047 struct task_struct *holders[MAX_HOLDERS];
0048
0049 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
0050 struct stack_store traces[MAX_HOLDERS];
0051 #endif
0052 };
0053
0054 struct waiter {
0055 struct list_head list;
0056 struct task_struct *task;
0057 int wants_write;
0058 };
0059
0060 static unsigned __find_holder(struct block_lock *lock,
0061 struct task_struct *task)
0062 {
0063 unsigned i;
0064
0065 for (i = 0; i < MAX_HOLDERS; i++)
0066 if (lock->holders[i] == task)
0067 break;
0068
0069 BUG_ON(i == MAX_HOLDERS);
0070 return i;
0071 }
0072
0073
0074 static void __add_holder(struct block_lock *lock, struct task_struct *task)
0075 {
0076 unsigned h = __find_holder(lock, NULL);
0077 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
0078 struct stack_store *t;
0079 #endif
0080
0081 get_task_struct(task);
0082 lock->holders[h] = task;
0083
0084 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
0085 t = lock->traces + h;
0086 t->nr_entries = stack_trace_save(t->entries, MAX_STACK, 2);
0087 #endif
0088 }
0089
0090
0091 static void __del_holder(struct block_lock *lock, struct task_struct *task)
0092 {
0093 unsigned h = __find_holder(lock, task);
0094 lock->holders[h] = NULL;
0095 put_task_struct(task);
0096 }
0097
0098 static int __check_holder(struct block_lock *lock)
0099 {
0100 unsigned i;
0101
0102 for (i = 0; i < MAX_HOLDERS; i++) {
0103 if (lock->holders[i] == current) {
0104 DMERR("recursive lock detected in metadata");
0105 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
0106 DMERR("previously held here:");
0107 stack_trace_print(lock->traces[i].entries,
0108 lock->traces[i].nr_entries, 4);
0109
0110 DMERR("subsequent acquisition attempted here:");
0111 dump_stack();
0112 #endif
0113 return -EINVAL;
0114 }
0115 }
0116
0117 return 0;
0118 }
0119
0120 static void __wait(struct waiter *w)
0121 {
0122 for (;;) {
0123 set_current_state(TASK_UNINTERRUPTIBLE);
0124
0125 if (!w->task)
0126 break;
0127
0128 schedule();
0129 }
0130
0131 set_current_state(TASK_RUNNING);
0132 }
0133
0134 static void __wake_waiter(struct waiter *w)
0135 {
0136 struct task_struct *task;
0137
0138 list_del(&w->list);
0139 task = w->task;
0140 smp_mb();
0141 w->task = NULL;
0142 wake_up_process(task);
0143 }
0144
0145
0146
0147
0148 static void __wake_many(struct block_lock *lock)
0149 {
0150 struct waiter *w, *tmp;
0151
0152 BUG_ON(lock->count < 0);
0153 list_for_each_entry_safe(w, tmp, &lock->waiters, list) {
0154 if (lock->count >= MAX_HOLDERS)
0155 return;
0156
0157 if (w->wants_write) {
0158 if (lock->count > 0)
0159 return;
0160
0161 lock->count = -1;
0162 __add_holder(lock, w->task);
0163 __wake_waiter(w);
0164 return;
0165 }
0166
0167 lock->count++;
0168 __add_holder(lock, w->task);
0169 __wake_waiter(w);
0170 }
0171 }
0172
0173 static void bl_init(struct block_lock *lock)
0174 {
0175 int i;
0176
0177 spin_lock_init(&lock->lock);
0178 lock->count = 0;
0179 INIT_LIST_HEAD(&lock->waiters);
0180 for (i = 0; i < MAX_HOLDERS; i++)
0181 lock->holders[i] = NULL;
0182 }
0183
0184 static int __available_for_read(struct block_lock *lock)
0185 {
0186 return lock->count >= 0 &&
0187 lock->count < MAX_HOLDERS &&
0188 list_empty(&lock->waiters);
0189 }
0190
0191 static int bl_down_read(struct block_lock *lock)
0192 {
0193 int r;
0194 struct waiter w;
0195
0196 spin_lock(&lock->lock);
0197 r = __check_holder(lock);
0198 if (r) {
0199 spin_unlock(&lock->lock);
0200 return r;
0201 }
0202
0203 if (__available_for_read(lock)) {
0204 lock->count++;
0205 __add_holder(lock, current);
0206 spin_unlock(&lock->lock);
0207 return 0;
0208 }
0209
0210 get_task_struct(current);
0211
0212 w.task = current;
0213 w.wants_write = 0;
0214 list_add_tail(&w.list, &lock->waiters);
0215 spin_unlock(&lock->lock);
0216
0217 __wait(&w);
0218 put_task_struct(current);
0219 return 0;
0220 }
0221
0222 static int bl_down_read_nonblock(struct block_lock *lock)
0223 {
0224 int r;
0225
0226 spin_lock(&lock->lock);
0227 r = __check_holder(lock);
0228 if (r)
0229 goto out;
0230
0231 if (__available_for_read(lock)) {
0232 lock->count++;
0233 __add_holder(lock, current);
0234 r = 0;
0235 } else
0236 r = -EWOULDBLOCK;
0237
0238 out:
0239 spin_unlock(&lock->lock);
0240 return r;
0241 }
0242
0243 static void bl_up_read(struct block_lock *lock)
0244 {
0245 spin_lock(&lock->lock);
0246 BUG_ON(lock->count <= 0);
0247 __del_holder(lock, current);
0248 --lock->count;
0249 if (!list_empty(&lock->waiters))
0250 __wake_many(lock);
0251 spin_unlock(&lock->lock);
0252 }
0253
0254 static int bl_down_write(struct block_lock *lock)
0255 {
0256 int r;
0257 struct waiter w;
0258
0259 spin_lock(&lock->lock);
0260 r = __check_holder(lock);
0261 if (r) {
0262 spin_unlock(&lock->lock);
0263 return r;
0264 }
0265
0266 if (lock->count == 0 && list_empty(&lock->waiters)) {
0267 lock->count = -1;
0268 __add_holder(lock, current);
0269 spin_unlock(&lock->lock);
0270 return 0;
0271 }
0272
0273 get_task_struct(current);
0274 w.task = current;
0275 w.wants_write = 1;
0276
0277
0278
0279
0280
0281 list_add(&w.list, &lock->waiters);
0282 spin_unlock(&lock->lock);
0283
0284 __wait(&w);
0285 put_task_struct(current);
0286
0287 return 0;
0288 }
0289
0290 static void bl_up_write(struct block_lock *lock)
0291 {
0292 spin_lock(&lock->lock);
0293 __del_holder(lock, current);
0294 lock->count = 0;
0295 if (!list_empty(&lock->waiters))
0296 __wake_many(lock);
0297 spin_unlock(&lock->lock);
0298 }
0299
0300 static void report_recursive_bug(dm_block_t b, int r)
0301 {
0302 if (r == -EINVAL)
0303 DMERR("recursive acquisition of block %llu requested.",
0304 (unsigned long long) b);
0305 }
0306
0307 #else
0308
0309 #define bl_init(x) do { } while (0)
0310 #define bl_down_read(x) 0
0311 #define bl_down_read_nonblock(x) 0
0312 #define bl_up_read(x) do { } while (0)
0313 #define bl_down_write(x) 0
0314 #define bl_up_write(x) do { } while (0)
0315 #define report_recursive_bug(x, y) do { } while (0)
0316
0317 #endif
0318
0319
0320
0321
0322
0323
0324
0325
0326
0327
0328 static struct dm_buffer *to_buffer(struct dm_block *b)
0329 {
0330 return (struct dm_buffer *) b;
0331 }
0332
0333 dm_block_t dm_block_location(struct dm_block *b)
0334 {
0335 return dm_bufio_get_block_number(to_buffer(b));
0336 }
0337 EXPORT_SYMBOL_GPL(dm_block_location);
0338
0339 void *dm_block_data(struct dm_block *b)
0340 {
0341 return dm_bufio_get_block_data(to_buffer(b));
0342 }
0343 EXPORT_SYMBOL_GPL(dm_block_data);
0344
0345 struct buffer_aux {
0346 struct dm_block_validator *validator;
0347 int write_locked;
0348
0349 #ifdef CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING
0350 struct block_lock lock;
0351 #endif
0352 };
0353
0354 static void dm_block_manager_alloc_callback(struct dm_buffer *buf)
0355 {
0356 struct buffer_aux *aux = dm_bufio_get_aux_data(buf);
0357 aux->validator = NULL;
0358 bl_init(&aux->lock);
0359 }
0360
0361 static void dm_block_manager_write_callback(struct dm_buffer *buf)
0362 {
0363 struct buffer_aux *aux = dm_bufio_get_aux_data(buf);
0364 if (aux->validator) {
0365 aux->validator->prepare_for_write(aux->validator, (struct dm_block *) buf,
0366 dm_bufio_get_block_size(dm_bufio_get_client(buf)));
0367 }
0368 }
0369
0370
0371
0372
0373 struct dm_block_manager {
0374 struct dm_bufio_client *bufio;
0375 bool read_only:1;
0376 };
0377
0378 struct dm_block_manager *dm_block_manager_create(struct block_device *bdev,
0379 unsigned block_size,
0380 unsigned max_held_per_thread)
0381 {
0382 int r;
0383 struct dm_block_manager *bm;
0384
0385 bm = kmalloc(sizeof(*bm), GFP_KERNEL);
0386 if (!bm) {
0387 r = -ENOMEM;
0388 goto bad;
0389 }
0390
0391 bm->bufio = dm_bufio_client_create(bdev, block_size, max_held_per_thread,
0392 sizeof(struct buffer_aux),
0393 dm_block_manager_alloc_callback,
0394 dm_block_manager_write_callback,
0395 0);
0396 if (IS_ERR(bm->bufio)) {
0397 r = PTR_ERR(bm->bufio);
0398 kfree(bm);
0399 goto bad;
0400 }
0401
0402 bm->read_only = false;
0403
0404 return bm;
0405
0406 bad:
0407 return ERR_PTR(r);
0408 }
0409 EXPORT_SYMBOL_GPL(dm_block_manager_create);
0410
0411 void dm_block_manager_destroy(struct dm_block_manager *bm)
0412 {
0413 dm_bufio_client_destroy(bm->bufio);
0414 kfree(bm);
0415 }
0416 EXPORT_SYMBOL_GPL(dm_block_manager_destroy);
0417
0418 unsigned dm_bm_block_size(struct dm_block_manager *bm)
0419 {
0420 return dm_bufio_get_block_size(bm->bufio);
0421 }
0422 EXPORT_SYMBOL_GPL(dm_bm_block_size);
0423
0424 dm_block_t dm_bm_nr_blocks(struct dm_block_manager *bm)
0425 {
0426 return dm_bufio_get_device_size(bm->bufio);
0427 }
0428
0429 static int dm_bm_validate_buffer(struct dm_block_manager *bm,
0430 struct dm_buffer *buf,
0431 struct buffer_aux *aux,
0432 struct dm_block_validator *v)
0433 {
0434 if (unlikely(!aux->validator)) {
0435 int r;
0436 if (!v)
0437 return 0;
0438 r = v->check(v, (struct dm_block *) buf, dm_bufio_get_block_size(bm->bufio));
0439 if (unlikely(r)) {
0440 DMERR_LIMIT("%s validator check failed for block %llu", v->name,
0441 (unsigned long long) dm_bufio_get_block_number(buf));
0442 return r;
0443 }
0444 aux->validator = v;
0445 } else {
0446 if (unlikely(aux->validator != v)) {
0447 DMERR_LIMIT("validator mismatch (old=%s vs new=%s) for block %llu",
0448 aux->validator->name, v ? v->name : "NULL",
0449 (unsigned long long) dm_bufio_get_block_number(buf));
0450 return -EINVAL;
0451 }
0452 }
0453
0454 return 0;
0455 }
0456 int dm_bm_read_lock(struct dm_block_manager *bm, dm_block_t b,
0457 struct dm_block_validator *v,
0458 struct dm_block **result)
0459 {
0460 struct buffer_aux *aux;
0461 void *p;
0462 int r;
0463
0464 p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result);
0465 if (IS_ERR(p))
0466 return PTR_ERR(p);
0467
0468 aux = dm_bufio_get_aux_data(to_buffer(*result));
0469 r = bl_down_read(&aux->lock);
0470 if (unlikely(r)) {
0471 dm_bufio_release(to_buffer(*result));
0472 report_recursive_bug(b, r);
0473 return r;
0474 }
0475
0476 aux->write_locked = 0;
0477
0478 r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v);
0479 if (unlikely(r)) {
0480 bl_up_read(&aux->lock);
0481 dm_bufio_release(to_buffer(*result));
0482 return r;
0483 }
0484
0485 return 0;
0486 }
0487 EXPORT_SYMBOL_GPL(dm_bm_read_lock);
0488
0489 int dm_bm_write_lock(struct dm_block_manager *bm,
0490 dm_block_t b, struct dm_block_validator *v,
0491 struct dm_block **result)
0492 {
0493 struct buffer_aux *aux;
0494 void *p;
0495 int r;
0496
0497 if (dm_bm_is_read_only(bm))
0498 return -EPERM;
0499
0500 p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result);
0501 if (IS_ERR(p))
0502 return PTR_ERR(p);
0503
0504 aux = dm_bufio_get_aux_data(to_buffer(*result));
0505 r = bl_down_write(&aux->lock);
0506 if (r) {
0507 dm_bufio_release(to_buffer(*result));
0508 report_recursive_bug(b, r);
0509 return r;
0510 }
0511
0512 aux->write_locked = 1;
0513
0514 r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v);
0515 if (unlikely(r)) {
0516 bl_up_write(&aux->lock);
0517 dm_bufio_release(to_buffer(*result));
0518 return r;
0519 }
0520
0521 return 0;
0522 }
0523 EXPORT_SYMBOL_GPL(dm_bm_write_lock);
0524
0525 int dm_bm_read_try_lock(struct dm_block_manager *bm,
0526 dm_block_t b, struct dm_block_validator *v,
0527 struct dm_block **result)
0528 {
0529 struct buffer_aux *aux;
0530 void *p;
0531 int r;
0532
0533 p = dm_bufio_get(bm->bufio, b, (struct dm_buffer **) result);
0534 if (IS_ERR(p))
0535 return PTR_ERR(p);
0536 if (unlikely(!p))
0537 return -EWOULDBLOCK;
0538
0539 aux = dm_bufio_get_aux_data(to_buffer(*result));
0540 r = bl_down_read_nonblock(&aux->lock);
0541 if (r < 0) {
0542 dm_bufio_release(to_buffer(*result));
0543 report_recursive_bug(b, r);
0544 return r;
0545 }
0546 aux->write_locked = 0;
0547
0548 r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v);
0549 if (unlikely(r)) {
0550 bl_up_read(&aux->lock);
0551 dm_bufio_release(to_buffer(*result));
0552 return r;
0553 }
0554
0555 return 0;
0556 }
0557
0558 int dm_bm_write_lock_zero(struct dm_block_manager *bm,
0559 dm_block_t b, struct dm_block_validator *v,
0560 struct dm_block **result)
0561 {
0562 int r;
0563 struct buffer_aux *aux;
0564 void *p;
0565
0566 if (dm_bm_is_read_only(bm))
0567 return -EPERM;
0568
0569 p = dm_bufio_new(bm->bufio, b, (struct dm_buffer **) result);
0570 if (IS_ERR(p))
0571 return PTR_ERR(p);
0572
0573 memset(p, 0, dm_bm_block_size(bm));
0574
0575 aux = dm_bufio_get_aux_data(to_buffer(*result));
0576 r = bl_down_write(&aux->lock);
0577 if (r) {
0578 dm_bufio_release(to_buffer(*result));
0579 return r;
0580 }
0581
0582 aux->write_locked = 1;
0583 aux->validator = v;
0584
0585 return 0;
0586 }
0587 EXPORT_SYMBOL_GPL(dm_bm_write_lock_zero);
0588
0589 void dm_bm_unlock(struct dm_block *b)
0590 {
0591 struct buffer_aux *aux;
0592 aux = dm_bufio_get_aux_data(to_buffer(b));
0593
0594 if (aux->write_locked) {
0595 dm_bufio_mark_buffer_dirty(to_buffer(b));
0596 bl_up_write(&aux->lock);
0597 } else
0598 bl_up_read(&aux->lock);
0599
0600 dm_bufio_release(to_buffer(b));
0601 }
0602 EXPORT_SYMBOL_GPL(dm_bm_unlock);
0603
0604 int dm_bm_flush(struct dm_block_manager *bm)
0605 {
0606 if (dm_bm_is_read_only(bm))
0607 return -EPERM;
0608
0609 return dm_bufio_write_dirty_buffers(bm->bufio);
0610 }
0611 EXPORT_SYMBOL_GPL(dm_bm_flush);
0612
0613 void dm_bm_prefetch(struct dm_block_manager *bm, dm_block_t b)
0614 {
0615 dm_bufio_prefetch(bm->bufio, b, 1);
0616 }
0617
0618 bool dm_bm_is_read_only(struct dm_block_manager *bm)
0619 {
0620 return (bm ? bm->read_only : true);
0621 }
0622 EXPORT_SYMBOL_GPL(dm_bm_is_read_only);
0623
0624 void dm_bm_set_read_only(struct dm_block_manager *bm)
0625 {
0626 if (bm)
0627 bm->read_only = true;
0628 }
0629 EXPORT_SYMBOL_GPL(dm_bm_set_read_only);
0630
0631 void dm_bm_set_read_write(struct dm_block_manager *bm)
0632 {
0633 if (bm)
0634 bm->read_only = false;
0635 }
0636 EXPORT_SYMBOL_GPL(dm_bm_set_read_write);
0637
0638 u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor)
0639 {
0640 return crc32c(~(u32) 0, data, len) ^ init_xor;
0641 }
0642 EXPORT_SYMBOL_GPL(dm_bm_checksum);
0643
0644
0645
0646 MODULE_LICENSE("GPL");
0647 MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
0648 MODULE_DESCRIPTION("Immutable metadata library for dm");
0649
0650