0001
0002
0003
0004
0005
0006
0007 #include <linux/kernel.h>
0008 #include <linux/blkdev.h>
0009 #include <linux/slab.h>
0010 #include <linux/crc32c.h>
0011 #include <linux/async_tx.h>
0012 #include <linux/raid/md_p.h>
0013 #include "md.h"
0014 #include "raid5.h"
0015 #include "raid5-log.h"
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068
0069
0070
0071
0072
0073
0074
0075
0076
0077
0078
0079
0080
0081
0082
0083
0084
0085 #define PPL_SPACE_SIZE (128 * 1024)
0086
0087 struct ppl_conf {
0088 struct mddev *mddev;
0089
0090
0091 struct ppl_log *child_logs;
0092 int count;
0093
0094 int block_size;
0095
0096 u32 signature;
0097 atomic64_t seq;
0098
0099 struct kmem_cache *io_kc;
0100 mempool_t io_pool;
0101 struct bio_set bs;
0102 struct bio_set flush_bs;
0103
0104
0105 int recovered_entries;
0106 int mismatch_count;
0107
0108
0109 struct list_head no_mem_stripes;
0110 spinlock_t no_mem_stripes_lock;
0111
0112 unsigned short write_hint;
0113 };
0114
0115 struct ppl_log {
0116 struct ppl_conf *ppl_conf;
0117
0118 struct md_rdev *rdev;
0119
0120 struct mutex io_mutex;
0121 struct ppl_io_unit *current_io;
0122
0123 spinlock_t io_list_lock;
0124 struct list_head io_list;
0125
0126 sector_t next_io_sector;
0127 unsigned int entry_space;
0128 bool use_multippl;
0129 bool wb_cache_on;
0130 unsigned long disk_flush_bitmap;
0131 };
0132
0133 #define PPL_IO_INLINE_BVECS 32
0134
0135 struct ppl_io_unit {
0136 struct ppl_log *log;
0137
0138 struct page *header_page;
0139
0140 unsigned int entries_count;
0141 unsigned int pp_size;
0142
0143 u64 seq;
0144 struct list_head log_sibling;
0145
0146 struct list_head stripe_list;
0147 atomic_t pending_stripes;
0148 atomic_t pending_flushes;
0149
0150 bool submitted;
0151
0152
0153 struct bio bio;
0154 struct bio_vec biovec[PPL_IO_INLINE_BVECS];
0155 };
0156
0157 struct dma_async_tx_descriptor *
0158 ops_run_partial_parity(struct stripe_head *sh, struct raid5_percpu *percpu,
0159 struct dma_async_tx_descriptor *tx)
0160 {
0161 int disks = sh->disks;
0162 struct page **srcs = percpu->scribble;
0163 int count = 0, pd_idx = sh->pd_idx, i;
0164 struct async_submit_ctl submit;
0165
0166 pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector);
0167
0168
0169
0170
0171
0172
0173
0174 if (sh->reconstruct_state == reconstruct_state_prexor_drain_run) {
0175
0176
0177
0178
0179
0180 srcs[count++] = sh->dev[pd_idx].page;
0181 } else if (sh->reconstruct_state == reconstruct_state_drain_run) {
0182
0183 for (i = disks; i--;) {
0184 struct r5dev *dev = &sh->dev[i];
0185 if (test_bit(R5_UPTODATE, &dev->flags))
0186 srcs[count++] = dev->page;
0187 }
0188 } else {
0189 return tx;
0190 }
0191
0192 init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, tx,
0193 NULL, sh, (void *) (srcs + sh->disks + 2));
0194
0195 if (count == 1)
0196 tx = async_memcpy(sh->ppl_page, srcs[0], 0, 0, PAGE_SIZE,
0197 &submit);
0198 else
0199 tx = async_xor(sh->ppl_page, srcs, 0, count, PAGE_SIZE,
0200 &submit);
0201
0202 return tx;
0203 }
0204
0205 static void *ppl_io_pool_alloc(gfp_t gfp_mask, void *pool_data)
0206 {
0207 struct kmem_cache *kc = pool_data;
0208 struct ppl_io_unit *io;
0209
0210 io = kmem_cache_alloc(kc, gfp_mask);
0211 if (!io)
0212 return NULL;
0213
0214 io->header_page = alloc_page(gfp_mask);
0215 if (!io->header_page) {
0216 kmem_cache_free(kc, io);
0217 return NULL;
0218 }
0219
0220 return io;
0221 }
0222
0223 static void ppl_io_pool_free(void *element, void *pool_data)
0224 {
0225 struct kmem_cache *kc = pool_data;
0226 struct ppl_io_unit *io = element;
0227
0228 __free_page(io->header_page);
0229 kmem_cache_free(kc, io);
0230 }
0231
0232 static struct ppl_io_unit *ppl_new_iounit(struct ppl_log *log,
0233 struct stripe_head *sh)
0234 {
0235 struct ppl_conf *ppl_conf = log->ppl_conf;
0236 struct ppl_io_unit *io;
0237 struct ppl_header *pplhdr;
0238 struct page *header_page;
0239
0240 io = mempool_alloc(&ppl_conf->io_pool, GFP_NOWAIT);
0241 if (!io)
0242 return NULL;
0243
0244 header_page = io->header_page;
0245 memset(io, 0, sizeof(*io));
0246 io->header_page = header_page;
0247
0248 io->log = log;
0249 INIT_LIST_HEAD(&io->log_sibling);
0250 INIT_LIST_HEAD(&io->stripe_list);
0251 atomic_set(&io->pending_stripes, 0);
0252 atomic_set(&io->pending_flushes, 0);
0253 bio_init(&io->bio, log->rdev->bdev, io->biovec, PPL_IO_INLINE_BVECS,
0254 REQ_OP_WRITE | REQ_FUA);
0255
0256 pplhdr = page_address(io->header_page);
0257 clear_page(pplhdr);
0258 memset(pplhdr->reserved, 0xff, PPL_HDR_RESERVED);
0259 pplhdr->signature = cpu_to_le32(ppl_conf->signature);
0260
0261 io->seq = atomic64_add_return(1, &ppl_conf->seq);
0262 pplhdr->generation = cpu_to_le64(io->seq);
0263
0264 return io;
0265 }
0266
0267 static int ppl_log_stripe(struct ppl_log *log, struct stripe_head *sh)
0268 {
0269 struct ppl_io_unit *io = log->current_io;
0270 struct ppl_header_entry *e = NULL;
0271 struct ppl_header *pplhdr;
0272 int i;
0273 sector_t data_sector = 0;
0274 int data_disks = 0;
0275 struct r5conf *conf = sh->raid_conf;
0276
0277 pr_debug("%s: stripe: %llu\n", __func__, (unsigned long long)sh->sector);
0278
0279
0280 if (io && (io->pp_size == log->entry_space ||
0281 io->entries_count == PPL_HDR_MAX_ENTRIES)) {
0282 pr_debug("%s: add io_unit blocked by seq: %llu\n",
0283 __func__, io->seq);
0284 io = NULL;
0285 }
0286
0287
0288 if (!io) {
0289 io = ppl_new_iounit(log, sh);
0290 if (!io)
0291 return -ENOMEM;
0292 spin_lock_irq(&log->io_list_lock);
0293 list_add_tail(&io->log_sibling, &log->io_list);
0294 spin_unlock_irq(&log->io_list_lock);
0295
0296 log->current_io = io;
0297 }
0298
0299 for (i = 0; i < sh->disks; i++) {
0300 struct r5dev *dev = &sh->dev[i];
0301
0302 if (i != sh->pd_idx && test_bit(R5_Wantwrite, &dev->flags)) {
0303 if (!data_disks || dev->sector < data_sector)
0304 data_sector = dev->sector;
0305 data_disks++;
0306 }
0307 }
0308 BUG_ON(!data_disks);
0309
0310 pr_debug("%s: seq: %llu data_sector: %llu data_disks: %d\n", __func__,
0311 io->seq, (unsigned long long)data_sector, data_disks);
0312
0313 pplhdr = page_address(io->header_page);
0314
0315 if (io->entries_count > 0) {
0316 struct ppl_header_entry *last =
0317 &pplhdr->entries[io->entries_count - 1];
0318 struct stripe_head *sh_last = list_last_entry(
0319 &io->stripe_list, struct stripe_head, log_list);
0320 u64 data_sector_last = le64_to_cpu(last->data_sector);
0321 u32 data_size_last = le32_to_cpu(last->data_size);
0322
0323
0324
0325
0326
0327
0328 if ((sh->sector == sh_last->sector + RAID5_STRIPE_SECTORS(conf)) &&
0329 (data_sector >> ilog2(conf->chunk_sectors) ==
0330 data_sector_last >> ilog2(conf->chunk_sectors)) &&
0331 ((data_sector - data_sector_last) * data_disks ==
0332 data_size_last >> 9))
0333 e = last;
0334 }
0335
0336 if (!e) {
0337 e = &pplhdr->entries[io->entries_count++];
0338 e->data_sector = cpu_to_le64(data_sector);
0339 e->parity_disk = cpu_to_le32(sh->pd_idx);
0340 e->checksum = cpu_to_le32(~0);
0341 }
0342
0343 le32_add_cpu(&e->data_size, data_disks << PAGE_SHIFT);
0344
0345
0346 if (!test_bit(STRIPE_FULL_WRITE, &sh->state)) {
0347 le32_add_cpu(&e->pp_size, PAGE_SIZE);
0348 io->pp_size += PAGE_SIZE;
0349 e->checksum = cpu_to_le32(crc32c_le(le32_to_cpu(e->checksum),
0350 page_address(sh->ppl_page),
0351 PAGE_SIZE));
0352 }
0353
0354 list_add_tail(&sh->log_list, &io->stripe_list);
0355 atomic_inc(&io->pending_stripes);
0356 sh->ppl_io = io;
0357
0358 return 0;
0359 }
0360
0361 int ppl_write_stripe(struct r5conf *conf, struct stripe_head *sh)
0362 {
0363 struct ppl_conf *ppl_conf = conf->log_private;
0364 struct ppl_io_unit *io = sh->ppl_io;
0365 struct ppl_log *log;
0366
0367 if (io || test_bit(STRIPE_SYNCING, &sh->state) || !sh->ppl_page ||
0368 !test_bit(R5_Wantwrite, &sh->dev[sh->pd_idx].flags) ||
0369 !test_bit(R5_Insync, &sh->dev[sh->pd_idx].flags)) {
0370 clear_bit(STRIPE_LOG_TRAPPED, &sh->state);
0371 return -EAGAIN;
0372 }
0373
0374 log = &ppl_conf->child_logs[sh->pd_idx];
0375
0376 mutex_lock(&log->io_mutex);
0377
0378 if (!log->rdev || test_bit(Faulty, &log->rdev->flags)) {
0379 mutex_unlock(&log->io_mutex);
0380 return -EAGAIN;
0381 }
0382
0383 set_bit(STRIPE_LOG_TRAPPED, &sh->state);
0384 clear_bit(STRIPE_DELAYED, &sh->state);
0385 atomic_inc(&sh->count);
0386
0387 if (ppl_log_stripe(log, sh)) {
0388 spin_lock_irq(&ppl_conf->no_mem_stripes_lock);
0389 list_add_tail(&sh->log_list, &ppl_conf->no_mem_stripes);
0390 spin_unlock_irq(&ppl_conf->no_mem_stripes_lock);
0391 }
0392
0393 mutex_unlock(&log->io_mutex);
0394
0395 return 0;
0396 }
0397
0398 static void ppl_log_endio(struct bio *bio)
0399 {
0400 struct ppl_io_unit *io = bio->bi_private;
0401 struct ppl_log *log = io->log;
0402 struct ppl_conf *ppl_conf = log->ppl_conf;
0403 struct stripe_head *sh, *next;
0404
0405 pr_debug("%s: seq: %llu\n", __func__, io->seq);
0406
0407 if (bio->bi_status)
0408 md_error(ppl_conf->mddev, log->rdev);
0409
0410 list_for_each_entry_safe(sh, next, &io->stripe_list, log_list) {
0411 list_del_init(&sh->log_list);
0412
0413 set_bit(STRIPE_HANDLE, &sh->state);
0414 raid5_release_stripe(sh);
0415 }
0416 }
0417
0418 static void ppl_submit_iounit_bio(struct ppl_io_unit *io, struct bio *bio)
0419 {
0420 pr_debug("%s: seq: %llu size: %u sector: %llu dev: %pg\n",
0421 __func__, io->seq, bio->bi_iter.bi_size,
0422 (unsigned long long)bio->bi_iter.bi_sector,
0423 bio->bi_bdev);
0424
0425 submit_bio(bio);
0426 }
0427
0428 static void ppl_submit_iounit(struct ppl_io_unit *io)
0429 {
0430 struct ppl_log *log = io->log;
0431 struct ppl_conf *ppl_conf = log->ppl_conf;
0432 struct ppl_header *pplhdr = page_address(io->header_page);
0433 struct bio *bio = &io->bio;
0434 struct stripe_head *sh;
0435 int i;
0436
0437 bio->bi_private = io;
0438
0439 if (!log->rdev || test_bit(Faulty, &log->rdev->flags)) {
0440 ppl_log_endio(bio);
0441 return;
0442 }
0443
0444 for (i = 0; i < io->entries_count; i++) {
0445 struct ppl_header_entry *e = &pplhdr->entries[i];
0446
0447 pr_debug("%s: seq: %llu entry: %d data_sector: %llu pp_size: %u data_size: %u\n",
0448 __func__, io->seq, i, le64_to_cpu(e->data_sector),
0449 le32_to_cpu(e->pp_size), le32_to_cpu(e->data_size));
0450
0451 e->data_sector = cpu_to_le64(le64_to_cpu(e->data_sector) >>
0452 ilog2(ppl_conf->block_size >> 9));
0453 e->checksum = cpu_to_le32(~le32_to_cpu(e->checksum));
0454 }
0455
0456 pplhdr->entries_count = cpu_to_le32(io->entries_count);
0457 pplhdr->checksum = cpu_to_le32(~crc32c_le(~0, pplhdr, PPL_HEADER_SIZE));
0458
0459
0460 if (log->use_multippl &&
0461 log->rdev->ppl.sector + log->rdev->ppl.size - log->next_io_sector <
0462 (PPL_HEADER_SIZE + io->pp_size) >> 9)
0463 log->next_io_sector = log->rdev->ppl.sector;
0464
0465
0466 bio->bi_end_io = ppl_log_endio;
0467 bio->bi_iter.bi_sector = log->next_io_sector;
0468 bio_add_page(bio, io->header_page, PAGE_SIZE, 0);
0469
0470 pr_debug("%s: log->current_io_sector: %llu\n", __func__,
0471 (unsigned long long)log->next_io_sector);
0472
0473 if (log->use_multippl)
0474 log->next_io_sector += (PPL_HEADER_SIZE + io->pp_size) >> 9;
0475
0476 WARN_ON(log->disk_flush_bitmap != 0);
0477
0478 list_for_each_entry(sh, &io->stripe_list, log_list) {
0479 for (i = 0; i < sh->disks; i++) {
0480 struct r5dev *dev = &sh->dev[i];
0481
0482 if ((ppl_conf->child_logs[i].wb_cache_on) &&
0483 (test_bit(R5_Wantwrite, &dev->flags))) {
0484 set_bit(i, &log->disk_flush_bitmap);
0485 }
0486 }
0487
0488
0489 if (test_bit(STRIPE_FULL_WRITE, &sh->state))
0490 continue;
0491
0492 if (!bio_add_page(bio, sh->ppl_page, PAGE_SIZE, 0)) {
0493 struct bio *prev = bio;
0494
0495 bio = bio_alloc_bioset(prev->bi_bdev, BIO_MAX_VECS,
0496 prev->bi_opf, GFP_NOIO,
0497 &ppl_conf->bs);
0498 bio->bi_iter.bi_sector = bio_end_sector(prev);
0499 bio_add_page(bio, sh->ppl_page, PAGE_SIZE, 0);
0500
0501 bio_chain(bio, prev);
0502 ppl_submit_iounit_bio(io, prev);
0503 }
0504 }
0505
0506 ppl_submit_iounit_bio(io, bio);
0507 }
0508
0509 static void ppl_submit_current_io(struct ppl_log *log)
0510 {
0511 struct ppl_io_unit *io;
0512
0513 spin_lock_irq(&log->io_list_lock);
0514
0515 io = list_first_entry_or_null(&log->io_list, struct ppl_io_unit,
0516 log_sibling);
0517 if (io && io->submitted)
0518 io = NULL;
0519
0520 spin_unlock_irq(&log->io_list_lock);
0521
0522 if (io) {
0523 io->submitted = true;
0524
0525 if (io == log->current_io)
0526 log->current_io = NULL;
0527
0528 ppl_submit_iounit(io);
0529 }
0530 }
0531
0532 void ppl_write_stripe_run(struct r5conf *conf)
0533 {
0534 struct ppl_conf *ppl_conf = conf->log_private;
0535 struct ppl_log *log;
0536 int i;
0537
0538 for (i = 0; i < ppl_conf->count; i++) {
0539 log = &ppl_conf->child_logs[i];
0540
0541 mutex_lock(&log->io_mutex);
0542 ppl_submit_current_io(log);
0543 mutex_unlock(&log->io_mutex);
0544 }
0545 }
0546
0547 static void ppl_io_unit_finished(struct ppl_io_unit *io)
0548 {
0549 struct ppl_log *log = io->log;
0550 struct ppl_conf *ppl_conf = log->ppl_conf;
0551 struct r5conf *conf = ppl_conf->mddev->private;
0552 unsigned long flags;
0553
0554 pr_debug("%s: seq: %llu\n", __func__, io->seq);
0555
0556 local_irq_save(flags);
0557
0558 spin_lock(&log->io_list_lock);
0559 list_del(&io->log_sibling);
0560 spin_unlock(&log->io_list_lock);
0561
0562 mempool_free(io, &ppl_conf->io_pool);
0563
0564 spin_lock(&ppl_conf->no_mem_stripes_lock);
0565 if (!list_empty(&ppl_conf->no_mem_stripes)) {
0566 struct stripe_head *sh;
0567
0568 sh = list_first_entry(&ppl_conf->no_mem_stripes,
0569 struct stripe_head, log_list);
0570 list_del_init(&sh->log_list);
0571 set_bit(STRIPE_HANDLE, &sh->state);
0572 raid5_release_stripe(sh);
0573 }
0574 spin_unlock(&ppl_conf->no_mem_stripes_lock);
0575
0576 local_irq_restore(flags);
0577
0578 wake_up(&conf->wait_for_quiescent);
0579 }
0580
0581 static void ppl_flush_endio(struct bio *bio)
0582 {
0583 struct ppl_io_unit *io = bio->bi_private;
0584 struct ppl_log *log = io->log;
0585 struct ppl_conf *ppl_conf = log->ppl_conf;
0586 struct r5conf *conf = ppl_conf->mddev->private;
0587
0588 pr_debug("%s: dev: %pg\n", __func__, bio->bi_bdev);
0589
0590 if (bio->bi_status) {
0591 struct md_rdev *rdev;
0592
0593 rcu_read_lock();
0594 rdev = md_find_rdev_rcu(conf->mddev, bio_dev(bio));
0595 if (rdev)
0596 md_error(rdev->mddev, rdev);
0597 rcu_read_unlock();
0598 }
0599
0600 bio_put(bio);
0601
0602 if (atomic_dec_and_test(&io->pending_flushes)) {
0603 ppl_io_unit_finished(io);
0604 md_wakeup_thread(conf->mddev->thread);
0605 }
0606 }
0607
0608 static void ppl_do_flush(struct ppl_io_unit *io)
0609 {
0610 struct ppl_log *log = io->log;
0611 struct ppl_conf *ppl_conf = log->ppl_conf;
0612 struct r5conf *conf = ppl_conf->mddev->private;
0613 int raid_disks = conf->raid_disks;
0614 int flushed_disks = 0;
0615 int i;
0616
0617 atomic_set(&io->pending_flushes, raid_disks);
0618
0619 for_each_set_bit(i, &log->disk_flush_bitmap, raid_disks) {
0620 struct md_rdev *rdev;
0621 struct block_device *bdev = NULL;
0622
0623 rcu_read_lock();
0624 rdev = rcu_dereference(conf->disks[i].rdev);
0625 if (rdev && !test_bit(Faulty, &rdev->flags))
0626 bdev = rdev->bdev;
0627 rcu_read_unlock();
0628
0629 if (bdev) {
0630 struct bio *bio;
0631
0632 bio = bio_alloc_bioset(bdev, 0,
0633 REQ_OP_WRITE | REQ_PREFLUSH,
0634 GFP_NOIO, &ppl_conf->flush_bs);
0635 bio->bi_private = io;
0636 bio->bi_end_io = ppl_flush_endio;
0637
0638 pr_debug("%s: dev: %ps\n", __func__, bio->bi_bdev);
0639
0640 submit_bio(bio);
0641 flushed_disks++;
0642 }
0643 }
0644
0645 log->disk_flush_bitmap = 0;
0646
0647 for (i = flushed_disks ; i < raid_disks; i++) {
0648 if (atomic_dec_and_test(&io->pending_flushes))
0649 ppl_io_unit_finished(io);
0650 }
0651 }
0652
0653 static inline bool ppl_no_io_unit_submitted(struct r5conf *conf,
0654 struct ppl_log *log)
0655 {
0656 struct ppl_io_unit *io;
0657
0658 io = list_first_entry_or_null(&log->io_list, struct ppl_io_unit,
0659 log_sibling);
0660
0661 return !io || !io->submitted;
0662 }
0663
0664 void ppl_quiesce(struct r5conf *conf, int quiesce)
0665 {
0666 struct ppl_conf *ppl_conf = conf->log_private;
0667 int i;
0668
0669 if (quiesce) {
0670 for (i = 0; i < ppl_conf->count; i++) {
0671 struct ppl_log *log = &ppl_conf->child_logs[i];
0672
0673 spin_lock_irq(&log->io_list_lock);
0674 wait_event_lock_irq(conf->wait_for_quiescent,
0675 ppl_no_io_unit_submitted(conf, log),
0676 log->io_list_lock);
0677 spin_unlock_irq(&log->io_list_lock);
0678 }
0679 }
0680 }
0681
0682 int ppl_handle_flush_request(struct bio *bio)
0683 {
0684 if (bio->bi_iter.bi_size == 0) {
0685 bio_endio(bio);
0686 return 0;
0687 }
0688 bio->bi_opf &= ~REQ_PREFLUSH;
0689 return -EAGAIN;
0690 }
0691
0692 void ppl_stripe_write_finished(struct stripe_head *sh)
0693 {
0694 struct ppl_io_unit *io;
0695
0696 io = sh->ppl_io;
0697 sh->ppl_io = NULL;
0698
0699 if (io && atomic_dec_and_test(&io->pending_stripes)) {
0700 if (io->log->disk_flush_bitmap)
0701 ppl_do_flush(io);
0702 else
0703 ppl_io_unit_finished(io);
0704 }
0705 }
0706
0707 static void ppl_xor(int size, struct page *page1, struct page *page2)
0708 {
0709 struct async_submit_ctl submit;
0710 struct dma_async_tx_descriptor *tx;
0711 struct page *xor_srcs[] = { page1, page2 };
0712
0713 init_async_submit(&submit, ASYNC_TX_ACK|ASYNC_TX_XOR_DROP_DST,
0714 NULL, NULL, NULL, NULL);
0715 tx = async_xor(page1, xor_srcs, 0, 2, size, &submit);
0716
0717 async_tx_quiesce(&tx);
0718 }
0719
0720
0721
0722
0723
0724
0725
0726
0727
0728
0729
0730
0731
0732
0733
0734
0735
0736
0737
0738
0739
0740
0741
0742
0743
0744
0745
0746
0747
0748
0749
0750
0751
0752
0753
0754
0755
0756
0757
0758
0759
0760
0761
0762
0763
0764
0765
0766
0767
0768
0769
0770
0771
0772
0773
0774
0775
0776
0777
0778
0779
0780
0781
0782
0783
0784
0785
0786 static int ppl_recover_entry(struct ppl_log *log, struct ppl_header_entry *e,
0787 sector_t ppl_sector)
0788 {
0789 struct ppl_conf *ppl_conf = log->ppl_conf;
0790 struct mddev *mddev = ppl_conf->mddev;
0791 struct r5conf *conf = mddev->private;
0792 int block_size = ppl_conf->block_size;
0793 struct page *page1;
0794 struct page *page2;
0795 sector_t r_sector_first;
0796 sector_t r_sector_last;
0797 int strip_sectors;
0798 int data_disks;
0799 int i;
0800 int ret = 0;
0801 unsigned int pp_size = le32_to_cpu(e->pp_size);
0802 unsigned int data_size = le32_to_cpu(e->data_size);
0803
0804 page1 = alloc_page(GFP_KERNEL);
0805 page2 = alloc_page(GFP_KERNEL);
0806
0807 if (!page1 || !page2) {
0808 ret = -ENOMEM;
0809 goto out;
0810 }
0811
0812 r_sector_first = le64_to_cpu(e->data_sector) * (block_size >> 9);
0813
0814 if ((pp_size >> 9) < conf->chunk_sectors) {
0815 if (pp_size > 0) {
0816 data_disks = data_size / pp_size;
0817 strip_sectors = pp_size >> 9;
0818 } else {
0819 data_disks = conf->raid_disks - conf->max_degraded;
0820 strip_sectors = (data_size >> 9) / data_disks;
0821 }
0822 r_sector_last = r_sector_first +
0823 (data_disks - 1) * conf->chunk_sectors +
0824 strip_sectors;
0825 } else {
0826 data_disks = conf->raid_disks - conf->max_degraded;
0827 strip_sectors = conf->chunk_sectors;
0828 r_sector_last = r_sector_first + (data_size >> 9);
0829 }
0830
0831 pr_debug("%s: array sector first: %llu last: %llu\n", __func__,
0832 (unsigned long long)r_sector_first,
0833 (unsigned long long)r_sector_last);
0834
0835
0836 if (block_size == 512 &&
0837 (r_sector_first & (RAID5_STRIPE_SECTORS(conf) - 1)) == 0 &&
0838 (r_sector_last & (RAID5_STRIPE_SECTORS(conf) - 1)) == 0)
0839 block_size = RAID5_STRIPE_SIZE(conf);
0840
0841
0842 for (i = 0; i < strip_sectors; i += (block_size >> 9)) {
0843 bool update_parity = false;
0844 sector_t parity_sector;
0845 struct md_rdev *parity_rdev;
0846 struct stripe_head sh;
0847 int disk;
0848 int indent = 0;
0849
0850 pr_debug("%s:%*s iter %d start\n", __func__, indent, "", i);
0851 indent += 2;
0852
0853 memset(page_address(page1), 0, PAGE_SIZE);
0854
0855
0856 for (disk = 0; disk < data_disks; disk++) {
0857 int dd_idx;
0858 struct md_rdev *rdev;
0859 sector_t sector;
0860 sector_t r_sector = r_sector_first + i +
0861 (disk * conf->chunk_sectors);
0862
0863 pr_debug("%s:%*s data member disk %d start\n",
0864 __func__, indent, "", disk);
0865 indent += 2;
0866
0867 if (r_sector >= r_sector_last) {
0868 pr_debug("%s:%*s array sector %llu doesn't need parity update\n",
0869 __func__, indent, "",
0870 (unsigned long long)r_sector);
0871 indent -= 2;
0872 continue;
0873 }
0874
0875 update_parity = true;
0876
0877
0878 sector = raid5_compute_sector(conf, r_sector, 0,
0879 &dd_idx, NULL);
0880 pr_debug("%s:%*s processing array sector %llu => data member disk %d, sector %llu\n",
0881 __func__, indent, "",
0882 (unsigned long long)r_sector, dd_idx,
0883 (unsigned long long)sector);
0884
0885
0886 rdev = rcu_dereference_protected(
0887 conf->disks[dd_idx].rdev, 1);
0888 if (!rdev || (!test_bit(In_sync, &rdev->flags) &&
0889 sector >= rdev->recovery_offset)) {
0890 pr_debug("%s:%*s data member disk %d missing\n",
0891 __func__, indent, "", dd_idx);
0892 update_parity = false;
0893 break;
0894 }
0895
0896 pr_debug("%s:%*s reading data member disk %pg sector %llu\n",
0897 __func__, indent, "", rdev->bdev,
0898 (unsigned long long)sector);
0899 if (!sync_page_io(rdev, sector, block_size, page2,
0900 REQ_OP_READ, false)) {
0901 md_error(mddev, rdev);
0902 pr_debug("%s:%*s read failed!\n", __func__,
0903 indent, "");
0904 ret = -EIO;
0905 goto out;
0906 }
0907
0908 ppl_xor(block_size, page1, page2);
0909
0910 indent -= 2;
0911 }
0912
0913 if (!update_parity)
0914 continue;
0915
0916 if (pp_size > 0) {
0917 pr_debug("%s:%*s reading pp disk sector %llu\n",
0918 __func__, indent, "",
0919 (unsigned long long)(ppl_sector + i));
0920 if (!sync_page_io(log->rdev,
0921 ppl_sector - log->rdev->data_offset + i,
0922 block_size, page2, REQ_OP_READ,
0923 false)) {
0924 pr_debug("%s:%*s read failed!\n", __func__,
0925 indent, "");
0926 md_error(mddev, log->rdev);
0927 ret = -EIO;
0928 goto out;
0929 }
0930
0931 ppl_xor(block_size, page1, page2);
0932 }
0933
0934
0935 parity_sector = raid5_compute_sector(conf, r_sector_first + i,
0936 0, &disk, &sh);
0937 BUG_ON(sh.pd_idx != le32_to_cpu(e->parity_disk));
0938
0939
0940 parity_rdev = rcu_dereference_protected(
0941 conf->disks[sh.pd_idx].rdev, 1);
0942
0943 BUG_ON(parity_rdev->bdev->bd_dev != log->rdev->bdev->bd_dev);
0944 pr_debug("%s:%*s write parity at sector %llu, disk %pg\n",
0945 __func__, indent, "",
0946 (unsigned long long)parity_sector,
0947 parity_rdev->bdev);
0948 if (!sync_page_io(parity_rdev, parity_sector, block_size,
0949 page1, REQ_OP_WRITE, false)) {
0950 pr_debug("%s:%*s parity write error!\n", __func__,
0951 indent, "");
0952 md_error(mddev, parity_rdev);
0953 ret = -EIO;
0954 goto out;
0955 }
0956 }
0957 out:
0958 if (page1)
0959 __free_page(page1);
0960 if (page2)
0961 __free_page(page2);
0962 return ret;
0963 }
0964
0965 static int ppl_recover(struct ppl_log *log, struct ppl_header *pplhdr,
0966 sector_t offset)
0967 {
0968 struct ppl_conf *ppl_conf = log->ppl_conf;
0969 struct md_rdev *rdev = log->rdev;
0970 struct mddev *mddev = rdev->mddev;
0971 sector_t ppl_sector = rdev->ppl.sector + offset +
0972 (PPL_HEADER_SIZE >> 9);
0973 struct page *page;
0974 int i;
0975 int ret = 0;
0976
0977 page = alloc_page(GFP_KERNEL);
0978 if (!page)
0979 return -ENOMEM;
0980
0981
0982 for (i = 0; i < le32_to_cpu(pplhdr->entries_count); i++) {
0983 struct ppl_header_entry *e = &pplhdr->entries[i];
0984 u32 pp_size = le32_to_cpu(e->pp_size);
0985 sector_t sector = ppl_sector;
0986 int ppl_entry_sectors = pp_size >> 9;
0987 u32 crc, crc_stored;
0988
0989 pr_debug("%s: disk: %d entry: %d ppl_sector: %llu pp_size: %u\n",
0990 __func__, rdev->raid_disk, i,
0991 (unsigned long long)ppl_sector, pp_size);
0992
0993 crc = ~0;
0994 crc_stored = le32_to_cpu(e->checksum);
0995
0996
0997 while (pp_size) {
0998 int s = pp_size > PAGE_SIZE ? PAGE_SIZE : pp_size;
0999
1000 if (!sync_page_io(rdev, sector - rdev->data_offset,
1001 s, page, REQ_OP_READ, false)) {
1002 md_error(mddev, rdev);
1003 ret = -EIO;
1004 goto out;
1005 }
1006
1007 crc = crc32c_le(crc, page_address(page), s);
1008
1009 pp_size -= s;
1010 sector += s >> 9;
1011 }
1012
1013 crc = ~crc;
1014
1015 if (crc != crc_stored) {
1016
1017
1018
1019
1020
1021 pr_debug("%s: ppl entry crc does not match: stored: 0x%x calculated: 0x%x\n",
1022 __func__, crc_stored, crc);
1023 ppl_conf->mismatch_count++;
1024 } else {
1025 ret = ppl_recover_entry(log, e, ppl_sector);
1026 if (ret)
1027 goto out;
1028 ppl_conf->recovered_entries++;
1029 }
1030
1031 ppl_sector += ppl_entry_sectors;
1032 }
1033
1034
1035 ret = blkdev_issue_flush(rdev->bdev);
1036 out:
1037 __free_page(page);
1038 return ret;
1039 }
1040
1041 static int ppl_write_empty_header(struct ppl_log *log)
1042 {
1043 struct page *page;
1044 struct ppl_header *pplhdr;
1045 struct md_rdev *rdev = log->rdev;
1046 int ret = 0;
1047
1048 pr_debug("%s: disk: %d ppl_sector: %llu\n", __func__,
1049 rdev->raid_disk, (unsigned long long)rdev->ppl.sector);
1050
1051 page = alloc_page(GFP_NOIO | __GFP_ZERO);
1052 if (!page)
1053 return -ENOMEM;
1054
1055 pplhdr = page_address(page);
1056
1057 blkdev_issue_zeroout(rdev->bdev, rdev->ppl.sector,
1058 log->rdev->ppl.size, GFP_NOIO, 0);
1059 memset(pplhdr->reserved, 0xff, PPL_HDR_RESERVED);
1060 pplhdr->signature = cpu_to_le32(log->ppl_conf->signature);
1061 pplhdr->checksum = cpu_to_le32(~crc32c_le(~0, pplhdr, PAGE_SIZE));
1062
1063 if (!sync_page_io(rdev, rdev->ppl.sector - rdev->data_offset,
1064 PPL_HEADER_SIZE, page, REQ_OP_WRITE | REQ_SYNC |
1065 REQ_FUA, false)) {
1066 md_error(rdev->mddev, rdev);
1067 ret = -EIO;
1068 }
1069
1070 __free_page(page);
1071 return ret;
1072 }
1073
1074 static int ppl_load_distributed(struct ppl_log *log)
1075 {
1076 struct ppl_conf *ppl_conf = log->ppl_conf;
1077 struct md_rdev *rdev = log->rdev;
1078 struct mddev *mddev = rdev->mddev;
1079 struct page *page, *page2;
1080 struct ppl_header *pplhdr = NULL, *prev_pplhdr = NULL;
1081 u32 crc, crc_stored;
1082 u32 signature;
1083 int ret = 0, i;
1084 sector_t pplhdr_offset = 0, prev_pplhdr_offset = 0;
1085
1086 pr_debug("%s: disk: %d\n", __func__, rdev->raid_disk);
1087
1088 page = alloc_page(GFP_KERNEL);
1089 if (!page)
1090 return -ENOMEM;
1091
1092 page2 = alloc_page(GFP_KERNEL);
1093 if (!page2) {
1094 __free_page(page);
1095 return -ENOMEM;
1096 }
1097
1098
1099 while (pplhdr_offset < rdev->ppl.size - (PPL_HEADER_SIZE >> 9)) {
1100 if (!sync_page_io(rdev,
1101 rdev->ppl.sector - rdev->data_offset +
1102 pplhdr_offset, PAGE_SIZE, page, REQ_OP_READ,
1103 false)) {
1104 md_error(mddev, rdev);
1105 ret = -EIO;
1106
1107 pplhdr = NULL;
1108 break;
1109 }
1110 pplhdr = page_address(page);
1111
1112
1113 crc_stored = le32_to_cpu(pplhdr->checksum);
1114 pplhdr->checksum = 0;
1115 crc = ~crc32c_le(~0, pplhdr, PAGE_SIZE);
1116
1117 if (crc_stored != crc) {
1118 pr_debug("%s: ppl header crc does not match: stored: 0x%x calculated: 0x%x (offset: %llu)\n",
1119 __func__, crc_stored, crc,
1120 (unsigned long long)pplhdr_offset);
1121 pplhdr = prev_pplhdr;
1122 pplhdr_offset = prev_pplhdr_offset;
1123 break;
1124 }
1125
1126 signature = le32_to_cpu(pplhdr->signature);
1127
1128 if (mddev->external) {
1129
1130
1131
1132
1133 ppl_conf->signature = signature;
1134 } else if (ppl_conf->signature != signature) {
1135 pr_debug("%s: ppl header signature does not match: stored: 0x%x configured: 0x%x (offset: %llu)\n",
1136 __func__, signature, ppl_conf->signature,
1137 (unsigned long long)pplhdr_offset);
1138 pplhdr = prev_pplhdr;
1139 pplhdr_offset = prev_pplhdr_offset;
1140 break;
1141 }
1142
1143 if (prev_pplhdr && le64_to_cpu(prev_pplhdr->generation) >
1144 le64_to_cpu(pplhdr->generation)) {
1145
1146 pplhdr = prev_pplhdr;
1147 pplhdr_offset = prev_pplhdr_offset;
1148 break;
1149 }
1150
1151 prev_pplhdr_offset = pplhdr_offset;
1152 prev_pplhdr = pplhdr;
1153
1154 swap(page, page2);
1155
1156
1157 for (i = 0; i < le32_to_cpu(pplhdr->entries_count); i++)
1158 pplhdr_offset +=
1159 le32_to_cpu(pplhdr->entries[i].pp_size) >> 9;
1160 pplhdr_offset += PPL_HEADER_SIZE >> 9;
1161 }
1162
1163
1164 if (!pplhdr)
1165 ppl_conf->mismatch_count++;
1166 else
1167 pr_debug("%s: latest PPL found at offset: %llu, with generation: %llu\n",
1168 __func__, (unsigned long long)pplhdr_offset,
1169 le64_to_cpu(pplhdr->generation));
1170
1171
1172 if (pplhdr && !mddev->pers && mddev->recovery_cp != MaxSector)
1173 ret = ppl_recover(log, pplhdr, pplhdr_offset);
1174
1175
1176 if (!ret && !mddev->pers)
1177 ret = ppl_write_empty_header(log);
1178
1179 __free_page(page);
1180 __free_page(page2);
1181
1182 pr_debug("%s: return: %d mismatch_count: %d recovered_entries: %d\n",
1183 __func__, ret, ppl_conf->mismatch_count,
1184 ppl_conf->recovered_entries);
1185 return ret;
1186 }
1187
1188 static int ppl_load(struct ppl_conf *ppl_conf)
1189 {
1190 int ret = 0;
1191 u32 signature = 0;
1192 bool signature_set = false;
1193 int i;
1194
1195 for (i = 0; i < ppl_conf->count; i++) {
1196 struct ppl_log *log = &ppl_conf->child_logs[i];
1197
1198
1199 if (!log->rdev)
1200 continue;
1201
1202 ret = ppl_load_distributed(log);
1203 if (ret)
1204 break;
1205
1206
1207
1208
1209
1210
1211 if (ppl_conf->mddev->external) {
1212 if (!signature_set) {
1213 signature = ppl_conf->signature;
1214 signature_set = true;
1215 } else if (signature != ppl_conf->signature) {
1216 pr_warn("md/raid:%s: PPL header signature does not match on all member drives\n",
1217 mdname(ppl_conf->mddev));
1218 ret = -EINVAL;
1219 break;
1220 }
1221 }
1222 }
1223
1224 pr_debug("%s: return: %d mismatch_count: %d recovered_entries: %d\n",
1225 __func__, ret, ppl_conf->mismatch_count,
1226 ppl_conf->recovered_entries);
1227 return ret;
1228 }
1229
1230 static void __ppl_exit_log(struct ppl_conf *ppl_conf)
1231 {
1232 clear_bit(MD_HAS_PPL, &ppl_conf->mddev->flags);
1233 clear_bit(MD_HAS_MULTIPLE_PPLS, &ppl_conf->mddev->flags);
1234
1235 kfree(ppl_conf->child_logs);
1236
1237 bioset_exit(&ppl_conf->bs);
1238 bioset_exit(&ppl_conf->flush_bs);
1239 mempool_exit(&ppl_conf->io_pool);
1240 kmem_cache_destroy(ppl_conf->io_kc);
1241
1242 kfree(ppl_conf);
1243 }
1244
1245 void ppl_exit_log(struct r5conf *conf)
1246 {
1247 struct ppl_conf *ppl_conf = conf->log_private;
1248
1249 if (ppl_conf) {
1250 __ppl_exit_log(ppl_conf);
1251 conf->log_private = NULL;
1252 }
1253 }
1254
1255 static int ppl_validate_rdev(struct md_rdev *rdev)
1256 {
1257 int ppl_data_sectors;
1258 int ppl_size_new;
1259
1260
1261
1262
1263
1264
1265
1266 ppl_data_sectors = rdev->ppl.size - (PPL_HEADER_SIZE >> 9);
1267
1268 if (ppl_data_sectors > 0)
1269 ppl_data_sectors = rounddown(ppl_data_sectors,
1270 RAID5_STRIPE_SECTORS((struct r5conf *)rdev->mddev->private));
1271
1272 if (ppl_data_sectors <= 0) {
1273 pr_warn("md/raid:%s: PPL space too small on %pg\n",
1274 mdname(rdev->mddev), rdev->bdev);
1275 return -ENOSPC;
1276 }
1277
1278 ppl_size_new = ppl_data_sectors + (PPL_HEADER_SIZE >> 9);
1279
1280 if ((rdev->ppl.sector < rdev->data_offset &&
1281 rdev->ppl.sector + ppl_size_new > rdev->data_offset) ||
1282 (rdev->ppl.sector >= rdev->data_offset &&
1283 rdev->data_offset + rdev->sectors > rdev->ppl.sector)) {
1284 pr_warn("md/raid:%s: PPL space overlaps with data on %pg\n",
1285 mdname(rdev->mddev), rdev->bdev);
1286 return -EINVAL;
1287 }
1288
1289 if (!rdev->mddev->external &&
1290 ((rdev->ppl.offset > 0 && rdev->ppl.offset < (rdev->sb_size >> 9)) ||
1291 (rdev->ppl.offset <= 0 && rdev->ppl.offset + ppl_size_new > 0))) {
1292 pr_warn("md/raid:%s: PPL space overlaps with superblock on %pg\n",
1293 mdname(rdev->mddev), rdev->bdev);
1294 return -EINVAL;
1295 }
1296
1297 rdev->ppl.size = ppl_size_new;
1298
1299 return 0;
1300 }
1301
1302 static void ppl_init_child_log(struct ppl_log *log, struct md_rdev *rdev)
1303 {
1304 struct request_queue *q;
1305
1306 if ((rdev->ppl.size << 9) >= (PPL_SPACE_SIZE +
1307 PPL_HEADER_SIZE) * 2) {
1308 log->use_multippl = true;
1309 set_bit(MD_HAS_MULTIPLE_PPLS,
1310 &log->ppl_conf->mddev->flags);
1311 log->entry_space = PPL_SPACE_SIZE;
1312 } else {
1313 log->use_multippl = false;
1314 log->entry_space = (log->rdev->ppl.size << 9) -
1315 PPL_HEADER_SIZE;
1316 }
1317 log->next_io_sector = rdev->ppl.sector;
1318
1319 q = bdev_get_queue(rdev->bdev);
1320 if (test_bit(QUEUE_FLAG_WC, &q->queue_flags))
1321 log->wb_cache_on = true;
1322 }
1323
1324 int ppl_init_log(struct r5conf *conf)
1325 {
1326 struct ppl_conf *ppl_conf;
1327 struct mddev *mddev = conf->mddev;
1328 int ret = 0;
1329 int max_disks;
1330 int i;
1331
1332 pr_debug("md/raid:%s: enabling distributed Partial Parity Log\n",
1333 mdname(conf->mddev));
1334
1335 if (PAGE_SIZE != 4096)
1336 return -EINVAL;
1337
1338 if (mddev->level != 5) {
1339 pr_warn("md/raid:%s PPL is not compatible with raid level %d\n",
1340 mdname(mddev), mddev->level);
1341 return -EINVAL;
1342 }
1343
1344 if (mddev->bitmap_info.file || mddev->bitmap_info.offset) {
1345 pr_warn("md/raid:%s PPL is not compatible with bitmap\n",
1346 mdname(mddev));
1347 return -EINVAL;
1348 }
1349
1350 if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
1351 pr_warn("md/raid:%s PPL is not compatible with journal\n",
1352 mdname(mddev));
1353 return -EINVAL;
1354 }
1355
1356 max_disks = sizeof_field(struct ppl_log, disk_flush_bitmap) *
1357 BITS_PER_BYTE;
1358 if (conf->raid_disks > max_disks) {
1359 pr_warn("md/raid:%s PPL doesn't support over %d disks in the array\n",
1360 mdname(mddev), max_disks);
1361 return -EINVAL;
1362 }
1363
1364 ppl_conf = kzalloc(sizeof(struct ppl_conf), GFP_KERNEL);
1365 if (!ppl_conf)
1366 return -ENOMEM;
1367
1368 ppl_conf->mddev = mddev;
1369
1370 ppl_conf->io_kc = KMEM_CACHE(ppl_io_unit, 0);
1371 if (!ppl_conf->io_kc) {
1372 ret = -ENOMEM;
1373 goto err;
1374 }
1375
1376 ret = mempool_init(&ppl_conf->io_pool, conf->raid_disks, ppl_io_pool_alloc,
1377 ppl_io_pool_free, ppl_conf->io_kc);
1378 if (ret)
1379 goto err;
1380
1381 ret = bioset_init(&ppl_conf->bs, conf->raid_disks, 0, BIOSET_NEED_BVECS);
1382 if (ret)
1383 goto err;
1384
1385 ret = bioset_init(&ppl_conf->flush_bs, conf->raid_disks, 0, 0);
1386 if (ret)
1387 goto err;
1388
1389 ppl_conf->count = conf->raid_disks;
1390 ppl_conf->child_logs = kcalloc(ppl_conf->count, sizeof(struct ppl_log),
1391 GFP_KERNEL);
1392 if (!ppl_conf->child_logs) {
1393 ret = -ENOMEM;
1394 goto err;
1395 }
1396
1397 atomic64_set(&ppl_conf->seq, 0);
1398 INIT_LIST_HEAD(&ppl_conf->no_mem_stripes);
1399 spin_lock_init(&ppl_conf->no_mem_stripes_lock);
1400
1401 if (!mddev->external) {
1402 ppl_conf->signature = ~crc32c_le(~0, mddev->uuid, sizeof(mddev->uuid));
1403 ppl_conf->block_size = 512;
1404 } else {
1405 ppl_conf->block_size = queue_logical_block_size(mddev->queue);
1406 }
1407
1408 for (i = 0; i < ppl_conf->count; i++) {
1409 struct ppl_log *log = &ppl_conf->child_logs[i];
1410
1411 struct md_rdev *rdev =
1412 rcu_dereference_protected(conf->disks[i].rdev, 1);
1413
1414 mutex_init(&log->io_mutex);
1415 spin_lock_init(&log->io_list_lock);
1416 INIT_LIST_HEAD(&log->io_list);
1417
1418 log->ppl_conf = ppl_conf;
1419 log->rdev = rdev;
1420
1421 if (rdev) {
1422 ret = ppl_validate_rdev(rdev);
1423 if (ret)
1424 goto err;
1425
1426 ppl_init_child_log(log, rdev);
1427 }
1428 }
1429
1430
1431 ret = ppl_load(ppl_conf);
1432
1433 if (ret) {
1434 goto err;
1435 } else if (!mddev->pers && mddev->recovery_cp == 0 &&
1436 ppl_conf->recovered_entries > 0 &&
1437 ppl_conf->mismatch_count == 0) {
1438
1439
1440
1441
1442 mddev->recovery_cp = MaxSector;
1443 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
1444 } else if (mddev->pers && ppl_conf->mismatch_count > 0) {
1445
1446 ret = -EINVAL;
1447 goto err;
1448 }
1449
1450 conf->log_private = ppl_conf;
1451 set_bit(MD_HAS_PPL, &ppl_conf->mddev->flags);
1452
1453 return 0;
1454 err:
1455 __ppl_exit_log(ppl_conf);
1456 return ret;
1457 }
1458
1459 int ppl_modify_log(struct r5conf *conf, struct md_rdev *rdev, bool add)
1460 {
1461 struct ppl_conf *ppl_conf = conf->log_private;
1462 struct ppl_log *log;
1463 int ret = 0;
1464
1465 if (!rdev)
1466 return -EINVAL;
1467
1468 pr_debug("%s: disk: %d operation: %s dev: %pg\n",
1469 __func__, rdev->raid_disk, add ? "add" : "remove",
1470 rdev->bdev);
1471
1472 if (rdev->raid_disk < 0)
1473 return 0;
1474
1475 if (rdev->raid_disk >= ppl_conf->count)
1476 return -ENODEV;
1477
1478 log = &ppl_conf->child_logs[rdev->raid_disk];
1479
1480 mutex_lock(&log->io_mutex);
1481 if (add) {
1482 ret = ppl_validate_rdev(rdev);
1483 if (!ret) {
1484 log->rdev = rdev;
1485 ret = ppl_write_empty_header(log);
1486 ppl_init_child_log(log, rdev);
1487 }
1488 } else {
1489 log->rdev = NULL;
1490 }
1491 mutex_unlock(&log->io_mutex);
1492
1493 return ret;
1494 }
1495
1496 static ssize_t
1497 ppl_write_hint_show(struct mddev *mddev, char *buf)
1498 {
1499 return sprintf(buf, "%d\n", 0);
1500 }
1501
1502 static ssize_t
1503 ppl_write_hint_store(struct mddev *mddev, const char *page, size_t len)
1504 {
1505 struct r5conf *conf;
1506 int err = 0;
1507 unsigned short new;
1508
1509 if (len >= PAGE_SIZE)
1510 return -EINVAL;
1511 if (kstrtou16(page, 10, &new))
1512 return -EINVAL;
1513
1514 err = mddev_lock(mddev);
1515 if (err)
1516 return err;
1517
1518 conf = mddev->private;
1519 if (!conf)
1520 err = -ENODEV;
1521 else if (!raid5_has_ppl(conf) || !conf->log_private)
1522 err = -EINVAL;
1523
1524 mddev_unlock(mddev);
1525
1526 return err ?: len;
1527 }
1528
1529 struct md_sysfs_entry
1530 ppl_write_hint = __ATTR(ppl_write_hint, S_IRUGO | S_IWUSR,
1531 ppl_write_hint_show,
1532 ppl_write_hint_store);