0001
0002
0003
0004
0005
0006
0007
0008 #ifndef _MD_MD_H
0009 #define _MD_MD_H
0010
0011 #include <linux/blkdev.h>
0012 #include <linux/backing-dev.h>
0013 #include <linux/badblocks.h>
0014 #include <linux/kobject.h>
0015 #include <linux/list.h>
0016 #include <linux/mm.h>
0017 #include <linux/mutex.h>
0018 #include <linux/timer.h>
0019 #include <linux/wait.h>
0020 #include <linux/workqueue.h>
0021 #include "md-cluster.h"
0022
0023 #define MaxSector (~(sector_t)0)
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034 #define MD_FAILFAST (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT)
0035
0036
0037
0038
0039 struct serial_in_rdev {
0040 struct rb_root_cached serial_rb;
0041 spinlock_t serial_lock;
0042 wait_queue_head_t serial_io_wait;
0043 };
0044
0045
0046
0047
0048 struct md_rdev {
0049 struct list_head same_set;
0050
0051 sector_t sectors;
0052 struct mddev *mddev;
0053 int last_events;
0054
0055
0056
0057
0058
0059
0060 struct block_device *meta_bdev;
0061 struct block_device *bdev;
0062
0063 struct page *sb_page, *bb_page;
0064 int sb_loaded;
0065 __u64 sb_events;
0066 sector_t data_offset;
0067 sector_t new_data_offset;
0068 sector_t sb_start;
0069 int sb_size;
0070 int preferred_minor;
0071
0072 struct kobject kobj;
0073
0074
0075
0076
0077
0078
0079
0080
0081
0082
0083
0084
0085 unsigned long flags;
0086 wait_queue_head_t blocked_wait;
0087
0088 int desc_nr;
0089 int raid_disk;
0090 int new_raid_disk;
0091
0092
0093 int saved_raid_disk;
0094
0095
0096
0097 union {
0098 sector_t recovery_offset;
0099
0100
0101
0102 sector_t journal_tail;
0103
0104
0105
0106 };
0107
0108 atomic_t nr_pending;
0109
0110
0111
0112 atomic_t read_errors;
0113
0114
0115 time64_t last_read_error;
0116
0117
0118 atomic_t corrected_errors;
0119
0120
0121
0122
0123 struct serial_in_rdev *serial;
0124
0125 struct work_struct del_work;
0126
0127 struct kernfs_node *sysfs_state;
0128
0129
0130 struct kernfs_node *sysfs_unack_badblocks;
0131
0132 struct kernfs_node *sysfs_badblocks;
0133 struct badblocks badblocks;
0134
0135 struct {
0136 short offset;
0137
0138 unsigned int size;
0139 sector_t sector;
0140 } ppl;
0141 };
0142 enum flag_bits {
0143 Faulty,
0144 In_sync,
0145 Bitmap_sync,
0146
0147
0148
0149
0150 WriteMostly,
0151 AutoDetected,
0152 Blocked,
0153
0154
0155
0156 WriteErrorSeen,
0157
0158
0159 FaultRecorded,
0160
0161
0162
0163
0164
0165 BlockedBadBlocks,
0166
0167
0168
0169
0170
0171
0172
0173
0174
0175 WantReplacement,
0176
0177
0178
0179
0180 Replacement,
0181
0182
0183
0184 Candidate,
0185
0186
0187
0188 Journal,
0189
0190
0191
0192
0193 ClusterRemove,
0194 RemoveSynchronized,
0195
0196
0197
0198
0199 ExternalBbl,
0200
0201
0202 FailFast,
0203
0204
0205
0206
0207
0208 LastDev,
0209
0210
0211
0212 CollisionCheck,
0213
0214
0215
0216 };
0217
0218 static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors,
0219 sector_t *first_bad, int *bad_sectors)
0220 {
0221 if (unlikely(rdev->badblocks.count)) {
0222 int rv = badblocks_check(&rdev->badblocks, rdev->data_offset + s,
0223 sectors,
0224 first_bad, bad_sectors);
0225 if (rv)
0226 *first_bad -= rdev->data_offset;
0227 return rv;
0228 }
0229 return 0;
0230 }
0231 extern int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
0232 int is_new);
0233 extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
0234 int is_new);
0235 struct md_cluster_info;
0236
0237
0238
0239
0240
0241
0242
0243
0244
0245
0246
0247
0248
0249
0250
0251
0252
0253
0254
0255
0256
0257
0258
0259
0260
0261 enum mddev_flags {
0262 MD_ARRAY_FIRST_USE,
0263 MD_CLOSING,
0264 MD_JOURNAL_CLEAN,
0265 MD_HAS_JOURNAL,
0266 MD_CLUSTER_RESYNC_LOCKED,
0267 MD_FAILFAST_SUPPORTED,
0268 MD_HAS_PPL,
0269 MD_HAS_MULTIPLE_PPLS,
0270 MD_ALLOW_SB_UPDATE,
0271 MD_UPDATING_SB,
0272 MD_NOT_READY,
0273 MD_BROKEN,
0274 MD_DELETED,
0275 };
0276
0277 enum mddev_sb_flags {
0278 MD_SB_CHANGE_DEVS,
0279 MD_SB_CHANGE_CLEAN,
0280 MD_SB_CHANGE_PENDING,
0281 MD_SB_NEED_REWRITE,
0282 };
0283
0284 #define NR_SERIAL_INFOS 8
0285
0286 struct serial_info {
0287 struct rb_node node;
0288 sector_t start;
0289 sector_t last;
0290 sector_t _subtree_last;
0291 };
0292
0293
0294
0295
0296
0297 enum {
0298
0299 MD_RESYNC_NONE = 0,
0300
0301 MD_RESYNC_YIELDED = 1,
0302
0303 MD_RESYNC_DELAYED = 2,
0304
0305 MD_RESYNC_ACTIVE = 3,
0306 };
0307
0308 struct mddev {
0309 void *private;
0310 struct md_personality *pers;
0311 dev_t unit;
0312 int md_minor;
0313 struct list_head disks;
0314 unsigned long flags;
0315 unsigned long sb_flags;
0316
0317 int suspended;
0318 atomic_t active_io;
0319 int ro;
0320 int sysfs_active;
0321
0322
0323
0324 struct gendisk *gendisk;
0325
0326 struct kobject kobj;
0327 int hold_active;
0328 #define UNTIL_IOCTL 1
0329 #define UNTIL_STOP 2
0330
0331
0332 int major_version,
0333 minor_version,
0334 patch_version;
0335 int persistent;
0336 int external;
0337
0338 char metadata_type[17];
0339 int chunk_sectors;
0340 time64_t ctime, utime;
0341 int level, layout;
0342 char clevel[16];
0343 int raid_disks;
0344 int max_disks;
0345 sector_t dev_sectors;
0346
0347 sector_t array_sectors;
0348 int external_size;
0349
0350 __u64 events;
0351
0352
0353
0354
0355
0356 int can_decrease_events;
0357
0358 char uuid[16];
0359
0360
0361
0362
0363
0364
0365 sector_t reshape_position;
0366 int delta_disks, new_level, new_layout;
0367 int new_chunk_sectors;
0368 int reshape_backwards;
0369
0370 struct md_thread *thread;
0371 struct md_thread *sync_thread;
0372
0373
0374
0375
0376
0377
0378
0379 char *last_sync_action;
0380 sector_t curr_resync;
0381
0382
0383
0384
0385
0386
0387 sector_t curr_resync_completed;
0388 unsigned long resync_mark;
0389 sector_t resync_mark_cnt;
0390 sector_t curr_mark_cnt;
0391
0392 sector_t resync_max_sectors;
0393
0394 atomic64_t resync_mismatches;
0395
0396
0397
0398
0399 sector_t suspend_lo;
0400 sector_t suspend_hi;
0401
0402 int sync_speed_min;
0403 int sync_speed_max;
0404
0405
0406 int parallel_resync;
0407
0408 int ok_start_degraded;
0409
0410 unsigned long recovery;
0411
0412
0413
0414
0415
0416 int recovery_disabled;
0417
0418 int in_sync;
0419
0420
0421
0422
0423
0424
0425
0426
0427
0428 struct mutex open_mutex;
0429 struct mutex reconfig_mutex;
0430 atomic_t active;
0431 atomic_t openers;
0432
0433 int changed;
0434
0435 int degraded;
0436
0437
0438
0439 atomic_t recovery_active;
0440 wait_queue_head_t recovery_wait;
0441 sector_t recovery_cp;
0442 sector_t resync_min;
0443
0444 sector_t resync_max;
0445
0446
0447 struct kernfs_node *sysfs_state;
0448
0449
0450 struct kernfs_node *sysfs_action;
0451 struct kernfs_node *sysfs_completed;
0452 struct kernfs_node *sysfs_degraded;
0453 struct kernfs_node *sysfs_level;
0454
0455 struct work_struct del_work;
0456
0457
0458
0459
0460
0461
0462
0463
0464
0465
0466
0467
0468 spinlock_t lock;
0469 wait_queue_head_t sb_wait;
0470 atomic_t pending_writes;
0471
0472 unsigned int safemode;
0473
0474
0475 unsigned int safemode_delay;
0476 struct timer_list safemode_timer;
0477 struct percpu_ref writes_pending;
0478 int sync_checkers;
0479 struct request_queue *queue;
0480
0481 struct bitmap *bitmap;
0482 struct {
0483 struct file *file;
0484 loff_t offset;
0485
0486
0487
0488
0489
0490 unsigned long space;
0491 loff_t default_offset;
0492
0493
0494
0495 unsigned long default_space;
0496
0497 struct mutex mutex;
0498 unsigned long chunksize;
0499 unsigned long daemon_sleep;
0500 unsigned long max_write_behind;
0501 int external;
0502 int nodes;
0503 char cluster_name[64];
0504 } bitmap_info;
0505
0506 atomic_t max_corr_read_errors;
0507 struct list_head all_mddevs;
0508
0509 const struct attribute_group *to_remove;
0510
0511 struct bio_set bio_set;
0512 struct bio_set sync_set;
0513
0514
0515 struct bio_set io_acct_set;
0516
0517
0518
0519
0520
0521 struct bio *flush_bio;
0522 atomic_t flush_pending;
0523 ktime_t start_flush, prev_flush_start;
0524
0525
0526 struct work_struct flush_work;
0527 struct work_struct event_work;
0528 mempool_t *serial_info_pool;
0529 void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev);
0530 struct md_cluster_info *cluster_info;
0531 unsigned int good_device_nr;
0532 unsigned int noio_flag;
0533
0534 bool has_superblocks:1;
0535 bool fail_last_dev:1;
0536 bool serialize_policy:1;
0537 };
0538
0539 enum recovery_flags {
0540
0541
0542
0543 MD_RECOVERY_RUNNING,
0544 MD_RECOVERY_SYNC,
0545 MD_RECOVERY_RECOVER,
0546 MD_RECOVERY_INTR,
0547 MD_RECOVERY_DONE,
0548 MD_RECOVERY_NEEDED,
0549 MD_RECOVERY_REQUESTED,
0550 MD_RECOVERY_CHECK,
0551 MD_RECOVERY_RESHAPE,
0552 MD_RECOVERY_FROZEN,
0553 MD_RECOVERY_ERROR,
0554 MD_RECOVERY_WAIT,
0555 MD_RESYNCING_REMOTE,
0556 };
0557
0558 static inline int __must_check mddev_lock(struct mddev *mddev)
0559 {
0560 return mutex_lock_interruptible(&mddev->reconfig_mutex);
0561 }
0562
0563
0564
0565
0566 static inline void mddev_lock_nointr(struct mddev *mddev)
0567 {
0568 mutex_lock(&mddev->reconfig_mutex);
0569 }
0570
0571 static inline int mddev_trylock(struct mddev *mddev)
0572 {
0573 return mutex_trylock(&mddev->reconfig_mutex);
0574 }
0575 extern void mddev_unlock(struct mddev *mddev);
0576
0577 static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sectors)
0578 {
0579 atomic_add(nr_sectors, &bdev->bd_disk->sync_io);
0580 }
0581
0582 static inline void md_sync_acct_bio(struct bio *bio, unsigned long nr_sectors)
0583 {
0584 md_sync_acct(bio->bi_bdev, nr_sectors);
0585 }
0586
0587 struct md_personality
0588 {
0589 char *name;
0590 int level;
0591 struct list_head list;
0592 struct module *owner;
0593 bool __must_check (*make_request)(struct mddev *mddev, struct bio *bio);
0594
0595
0596
0597
0598 int (*run)(struct mddev *mddev);
0599
0600 int (*start)(struct mddev *mddev);
0601 void (*free)(struct mddev *mddev, void *priv);
0602 void (*status)(struct seq_file *seq, struct mddev *mddev);
0603
0604
0605
0606 void (*error_handler)(struct mddev *mddev, struct md_rdev *rdev);
0607 int (*hot_add_disk) (struct mddev *mddev, struct md_rdev *rdev);
0608 int (*hot_remove_disk) (struct mddev *mddev, struct md_rdev *rdev);
0609 int (*spare_active) (struct mddev *mddev);
0610 sector_t (*sync_request)(struct mddev *mddev, sector_t sector_nr, int *skipped);
0611 int (*resize) (struct mddev *mddev, sector_t sectors);
0612 sector_t (*size) (struct mddev *mddev, sector_t sectors, int raid_disks);
0613 int (*check_reshape) (struct mddev *mddev);
0614 int (*start_reshape) (struct mddev *mddev);
0615 void (*finish_reshape) (struct mddev *mddev);
0616 void (*update_reshape_pos) (struct mddev *mddev);
0617
0618
0619
0620
0621 void (*quiesce) (struct mddev *mddev, int quiesce);
0622
0623
0624
0625
0626
0627
0628
0629
0630
0631 void *(*takeover) (struct mddev *mddev);
0632
0633 int (*change_consistency_policy)(struct mddev *mddev, const char *buf);
0634 };
0635
0636 struct md_sysfs_entry {
0637 struct attribute attr;
0638 ssize_t (*show)(struct mddev *, char *);
0639 ssize_t (*store)(struct mddev *, const char *, size_t);
0640 };
0641 extern const struct attribute_group md_bitmap_group;
0642
0643 static inline struct kernfs_node *sysfs_get_dirent_safe(struct kernfs_node *sd, char *name)
0644 {
0645 if (sd)
0646 return sysfs_get_dirent(sd, name);
0647 return sd;
0648 }
0649 static inline void sysfs_notify_dirent_safe(struct kernfs_node *sd)
0650 {
0651 if (sd)
0652 sysfs_notify_dirent(sd);
0653 }
0654
0655 static inline char * mdname (struct mddev * mddev)
0656 {
0657 return mddev->gendisk ? mddev->gendisk->disk_name : "mdX";
0658 }
0659
0660 static inline int sysfs_link_rdev(struct mddev *mddev, struct md_rdev *rdev)
0661 {
0662 char nm[20];
0663 if (!test_bit(Replacement, &rdev->flags) &&
0664 !test_bit(Journal, &rdev->flags) &&
0665 mddev->kobj.sd) {
0666 sprintf(nm, "rd%d", rdev->raid_disk);
0667 return sysfs_create_link(&mddev->kobj, &rdev->kobj, nm);
0668 } else
0669 return 0;
0670 }
0671
0672 static inline void sysfs_unlink_rdev(struct mddev *mddev, struct md_rdev *rdev)
0673 {
0674 char nm[20];
0675 if (!test_bit(Replacement, &rdev->flags) &&
0676 !test_bit(Journal, &rdev->flags) &&
0677 mddev->kobj.sd) {
0678 sprintf(nm, "rd%d", rdev->raid_disk);
0679 sysfs_remove_link(&mddev->kobj, nm);
0680 }
0681 }
0682
0683
0684
0685
0686
0687 #define rdev_for_each_list(rdev, tmp, head) \
0688 list_for_each_entry_safe(rdev, tmp, head, same_set)
0689
0690
0691
0692
0693 #define rdev_for_each(rdev, mddev) \
0694 list_for_each_entry(rdev, &((mddev)->disks), same_set)
0695
0696 #define rdev_for_each_safe(rdev, tmp, mddev) \
0697 list_for_each_entry_safe(rdev, tmp, &((mddev)->disks), same_set)
0698
0699 #define rdev_for_each_rcu(rdev, mddev) \
0700 list_for_each_entry_rcu(rdev, &((mddev)->disks), same_set)
0701
0702 struct md_thread {
0703 void (*run) (struct md_thread *thread);
0704 struct mddev *mddev;
0705 wait_queue_head_t wqueue;
0706 unsigned long flags;
0707 struct task_struct *tsk;
0708 unsigned long timeout;
0709 void *private;
0710 };
0711
0712 struct md_io_acct {
0713 struct bio *orig_bio;
0714 unsigned long start_time;
0715 struct bio bio_clone;
0716 };
0717
0718 #define THREAD_WAKEUP 0
0719
0720 static inline void safe_put_page(struct page *p)
0721 {
0722 if (p) put_page(p);
0723 }
0724
0725 extern int register_md_personality(struct md_personality *p);
0726 extern int unregister_md_personality(struct md_personality *p);
0727 extern int register_md_cluster_operations(struct md_cluster_operations *ops,
0728 struct module *module);
0729 extern int unregister_md_cluster_operations(void);
0730 extern int md_setup_cluster(struct mddev *mddev, int nodes);
0731 extern void md_cluster_stop(struct mddev *mddev);
0732 extern struct md_thread *md_register_thread(
0733 void (*run)(struct md_thread *thread),
0734 struct mddev *mddev,
0735 const char *name);
0736 extern void md_unregister_thread(struct md_thread **threadp);
0737 extern void md_wakeup_thread(struct md_thread *thread);
0738 extern void md_check_recovery(struct mddev *mddev);
0739 extern void md_reap_sync_thread(struct mddev *mddev);
0740 extern int mddev_init_writes_pending(struct mddev *mddev);
0741 extern bool md_write_start(struct mddev *mddev, struct bio *bi);
0742 extern void md_write_inc(struct mddev *mddev, struct bio *bi);
0743 extern void md_write_end(struct mddev *mddev);
0744 extern void md_done_sync(struct mddev *mddev, int blocks, int ok);
0745 extern void md_error(struct mddev *mddev, struct md_rdev *rdev);
0746 extern void md_finish_reshape(struct mddev *mddev);
0747 void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
0748 struct bio *bio, sector_t start, sector_t size);
0749 int acct_bioset_init(struct mddev *mddev);
0750 void acct_bioset_exit(struct mddev *mddev);
0751 void md_account_bio(struct mddev *mddev, struct bio **bio);
0752
0753 extern bool __must_check md_flush_request(struct mddev *mddev, struct bio *bio);
0754 extern void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
0755 sector_t sector, int size, struct page *page);
0756 extern int md_super_wait(struct mddev *mddev);
0757 extern int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
0758 struct page *page, blk_opf_t opf, bool metadata_op);
0759 extern void md_do_sync(struct md_thread *thread);
0760 extern void md_new_event(void);
0761 extern void md_allow_write(struct mddev *mddev);
0762 extern void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev);
0763 extern void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors);
0764 extern int md_check_no_bitmap(struct mddev *mddev);
0765 extern int md_integrity_register(struct mddev *mddev);
0766 extern int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev);
0767 extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale);
0768
0769 extern void mddev_init(struct mddev *mddev);
0770 struct mddev *md_alloc(dev_t dev, char *name);
0771 void mddev_put(struct mddev *mddev);
0772 extern int md_run(struct mddev *mddev);
0773 extern int md_start(struct mddev *mddev);
0774 extern void md_stop(struct mddev *mddev);
0775 extern void md_stop_writes(struct mddev *mddev);
0776 extern int md_rdev_init(struct md_rdev *rdev);
0777 extern void md_rdev_clear(struct md_rdev *rdev);
0778
0779 extern void md_handle_request(struct mddev *mddev, struct bio *bio);
0780 extern void mddev_suspend(struct mddev *mddev);
0781 extern void mddev_resume(struct mddev *mddev);
0782
0783 extern void md_reload_sb(struct mddev *mddev, int raid_disk);
0784 extern void md_update_sb(struct mddev *mddev, int force);
0785 extern void md_kick_rdev_from_array(struct md_rdev * rdev);
0786 extern void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
0787 bool is_suspend);
0788 extern void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
0789 bool is_suspend);
0790 struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr);
0791 struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev);
0792
0793 static inline bool is_mddev_broken(struct md_rdev *rdev, const char *md_type)
0794 {
0795 if (!disk_live(rdev->bdev->bd_disk)) {
0796 if (!test_and_set_bit(MD_BROKEN, &rdev->mddev->flags))
0797 pr_warn("md: %s: %s array has a missing/failed member\n",
0798 mdname(rdev->mddev), md_type);
0799 return true;
0800 }
0801 return false;
0802 }
0803
0804 static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
0805 {
0806 int faulty = test_bit(Faulty, &rdev->flags);
0807 if (atomic_dec_and_test(&rdev->nr_pending) && faulty) {
0808 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
0809 md_wakeup_thread(mddev->thread);
0810 }
0811 }
0812
0813 extern struct md_cluster_operations *md_cluster_ops;
0814 static inline int mddev_is_clustered(struct mddev *mddev)
0815 {
0816 return mddev->cluster_info && mddev->bitmap_info.nodes > 1;
0817 }
0818
0819
0820 static inline void mddev_clear_unsupported_flags(struct mddev *mddev,
0821 unsigned long unsupported_flags)
0822 {
0823 mddev->flags &= ~unsupported_flags;
0824 }
0825
0826 static inline void mddev_check_write_zeroes(struct mddev *mddev, struct bio *bio)
0827 {
0828 if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
0829 !bio->bi_bdev->bd_disk->queue->limits.max_write_zeroes_sectors)
0830 mddev->queue->limits.max_write_zeroes_sectors = 0;
0831 }
0832
0833 struct mdu_array_info_s;
0834 struct mdu_disk_info_s;
0835
0836 extern int mdp_major;
0837 void md_autostart_arrays(int part);
0838 int md_set_array_info(struct mddev *mddev, struct mdu_array_info_s *info);
0839 int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info);
0840 int do_md_run(struct mddev *mddev);
0841
0842 extern const struct block_device_operations md_fops;
0843
0844 #endif