Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0 */
0002 /*
0003  * bitmap.h: Copyright (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003
0004  *
0005  * additions: Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.
0006  */
0007 #ifndef BITMAP_H
0008 #define BITMAP_H 1
0009 
0010 #define BITMAP_MAJOR_LO 3
0011 /* version 4 insists the bitmap is in little-endian order
0012  * with version 3, it is host-endian which is non-portable
0013  * Version 5 is currently set only for clustered devices
0014  */
0015 #define BITMAP_MAJOR_HI 4
0016 #define BITMAP_MAJOR_CLUSTERED 5
0017 #define BITMAP_MAJOR_HOSTENDIAN 3
0018 
0019 /*
0020  * in-memory bitmap:
0021  *
0022  * Use 16 bit block counters to track pending writes to each "chunk".
0023  * The 2 high order bits are special-purpose, the first is a flag indicating
0024  * whether a resync is needed.  The second is a flag indicating whether a
0025  * resync is active.
0026  * This means that the counter is actually 14 bits:
0027  *
0028  * +--------+--------+------------------------------------------------+
0029  * | resync | resync |               counter                          |
0030  * | needed | active |                                                |
0031  * |  (0-1) |  (0-1) |              (0-16383)                         |
0032  * +--------+--------+------------------------------------------------+
0033  *
0034  * The "resync needed" bit is set when:
0035  *    a '1' bit is read from storage at startup.
0036  *    a write request fails on some drives
0037  *    a resync is aborted on a chunk with 'resync active' set
0038  * It is cleared (and resync-active set) when a resync starts across all drives
0039  * of the chunk.
0040  *
0041  *
0042  * The "resync active" bit is set when:
0043  *    a resync is started on all drives, and resync_needed is set.
0044  *       resync_needed will be cleared (as long as resync_active wasn't already set).
0045  * It is cleared when a resync completes.
0046  *
0047  * The counter counts pending write requests, plus the on-disk bit.
0048  * When the counter is '1' and the resync bits are clear, the on-disk
0049  * bit can be cleared as well, thus setting the counter to 0.
0050  * When we set a bit, or in the counter (to start a write), if the fields is
0051  * 0, we first set the disk bit and set the counter to 1.
0052  *
0053  * If the counter is 0, the on-disk bit is clear and the stripe is clean
0054  * Anything that dirties the stripe pushes the counter to 2 (at least)
0055  * and sets the on-disk bit (lazily).
0056  * If a periodic sweep find the counter at 2, it is decremented to 1.
0057  * If the sweep find the counter at 1, the on-disk bit is cleared and the
0058  * counter goes to zero.
0059  *
0060  * Also, we'll hijack the "map" pointer itself and use it as two 16 bit block
0061  * counters as a fallback when "page" memory cannot be allocated:
0062  *
0063  * Normal case (page memory allocated):
0064  *
0065  *     page pointer (32-bit)
0066  *
0067  *     [ ] ------+
0068  *               |
0069  *               +-------> [   ][   ]..[   ] (4096 byte page == 2048 counters)
0070  *                          c1   c2    c2048
0071  *
0072  * Hijacked case (page memory allocation failed):
0073  *
0074  *     hijacked page pointer (32-bit)
0075  *
0076  *     [          ][          ] (no page memory allocated)
0077  *      counter #1 (16-bit) counter #2 (16-bit)
0078  *
0079  */
0080 
0081 #ifdef __KERNEL__
0082 
0083 #define PAGE_BITS (PAGE_SIZE << 3)
0084 #define PAGE_BIT_SHIFT (PAGE_SHIFT + 3)
0085 
0086 typedef __u16 bitmap_counter_t;
0087 #define COUNTER_BITS 16
0088 #define COUNTER_BIT_SHIFT 4
0089 #define COUNTER_BYTE_SHIFT (COUNTER_BIT_SHIFT - 3)
0090 
0091 #define NEEDED_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 1)))
0092 #define RESYNC_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 2)))
0093 #define COUNTER_MAX ((bitmap_counter_t) RESYNC_MASK - 1)
0094 #define NEEDED(x) (((bitmap_counter_t) x) & NEEDED_MASK)
0095 #define RESYNC(x) (((bitmap_counter_t) x) & RESYNC_MASK)
0096 #define COUNTER(x) (((bitmap_counter_t) x) & COUNTER_MAX)
0097 
0098 /* how many counters per page? */
0099 #define PAGE_COUNTER_RATIO (PAGE_BITS / COUNTER_BITS)
0100 /* same, except a shift value for more efficient bitops */
0101 #define PAGE_COUNTER_SHIFT (PAGE_BIT_SHIFT - COUNTER_BIT_SHIFT)
0102 /* same, except a mask value for more efficient bitops */
0103 #define PAGE_COUNTER_MASK  (PAGE_COUNTER_RATIO - 1)
0104 
0105 #define BITMAP_BLOCK_SHIFT 9
0106 
0107 #endif
0108 
0109 /*
0110  * bitmap structures:
0111  */
0112 
0113 #define BITMAP_MAGIC 0x6d746962
0114 
0115 /* use these for bitmap->flags and bitmap->sb->state bit-fields */
0116 enum bitmap_state {
0117     BITMAP_STALE       = 1,  /* the bitmap file is out of date or had -EIO */
0118     BITMAP_WRITE_ERROR = 2, /* A write error has occurred */
0119     BITMAP_HOSTENDIAN  =15,
0120 };
0121 
0122 /* the superblock at the front of the bitmap file -- little endian */
0123 typedef struct bitmap_super_s {
0124     __le32 magic;        /*  0  BITMAP_MAGIC */
0125     __le32 version;      /*  4  the bitmap major for now, could change... */
0126     __u8  uuid[16];      /*  8  128 bit uuid - must match md device uuid */
0127     __le64 events;       /* 24  event counter for the bitmap (1)*/
0128     __le64 events_cleared;/*32  event counter when last bit cleared (2) */
0129     __le64 sync_size;    /* 40  the size of the md device's sync range(3) */
0130     __le32 state;        /* 48  bitmap state information */
0131     __le32 chunksize;    /* 52  the bitmap chunk size in bytes */
0132     __le32 daemon_sleep; /* 56  seconds between disk flushes */
0133     __le32 write_behind; /* 60  number of outstanding write-behind writes */
0134     __le32 sectors_reserved; /* 64 number of 512-byte sectors that are
0135                   * reserved for the bitmap. */
0136     __le32 nodes;        /* 68 the maximum number of nodes in cluster. */
0137     __u8 cluster_name[64]; /* 72 cluster name to which this md belongs */
0138     __u8  pad[256 - 136]; /* set to zero */
0139 } bitmap_super_t;
0140 
0141 /* notes:
0142  * (1) This event counter is updated before the eventcounter in the md superblock
0143  *    When a bitmap is loaded, it is only accepted if this event counter is equal
0144  *    to, or one greater than, the event counter in the superblock.
0145  * (2) This event counter is updated when the other one is *if*and*only*if* the
0146  *    array is not degraded.  As bits are not cleared when the array is degraded,
0147  *    this represents the last time that any bits were cleared.
0148  *    If a device is being added that has an event count with this value or
0149  *    higher, it is accepted as conforming to the bitmap.
0150  * (3)This is the number of sectors represented by the bitmap, and is the range that
0151  *    resync happens across.  For raid1 and raid5/6 it is the size of individual
0152  *    devices.  For raid10 it is the size of the array.
0153  */
0154 
0155 #ifdef __KERNEL__
0156 
0157 /* the in-memory bitmap is represented by bitmap_pages */
0158 struct bitmap_page {
0159     /*
0160      * map points to the actual memory page
0161      */
0162     char *map;
0163     /*
0164      * in emergencies (when map cannot be alloced), hijack the map
0165      * pointer and use it as two counters itself
0166      */
0167     unsigned int hijacked:1;
0168     /*
0169      * If any counter in this page is '1' or '2' - and so could be
0170      * cleared then that page is marked as 'pending'
0171      */
0172     unsigned int pending:1;
0173     /*
0174      * count of dirty bits on the page
0175      */
0176     unsigned int  count:30;
0177 };
0178 
0179 /* the main bitmap structure - one per mddev */
0180 struct bitmap {
0181 
0182     struct bitmap_counts {
0183         spinlock_t lock;
0184         struct bitmap_page *bp;
0185         unsigned long pages;        /* total number of pages
0186                          * in the bitmap */
0187         unsigned long missing_pages;    /* number of pages
0188                          * not yet allocated */
0189         unsigned long chunkshift;   /* chunksize = 2^chunkshift
0190                          * (for bitops) */
0191         unsigned long chunks;       /* Total number of data
0192                          * chunks for the array */
0193     } counts;
0194 
0195     struct mddev *mddev; /* the md device that the bitmap is for */
0196 
0197     __u64   events_cleared;
0198     int need_sync;
0199 
0200     struct bitmap_storage {
0201         struct file *file;      /* backing disk file */
0202         struct page *sb_page;       /* cached copy of the bitmap
0203                          * file superblock */
0204         struct page **filemap;      /* list of cache pages for
0205                          * the file */
0206         unsigned long *filemap_attr;    /* attributes associated
0207                          * w/ filemap pages */
0208         unsigned long file_pages;   /* number of pages in the file*/
0209         unsigned long bytes;        /* total bytes in the bitmap */
0210     } storage;
0211 
0212     unsigned long flags;
0213 
0214     int allclean;
0215 
0216     atomic_t behind_writes;
0217     unsigned long behind_writes_used; /* highest actual value at runtime */
0218 
0219     /*
0220      * the bitmap daemon - periodically wakes up and sweeps the bitmap
0221      * file, cleaning up bits and flushing out pages to disk as necessary
0222      */
0223     unsigned long daemon_lastrun; /* jiffies of last run */
0224     unsigned long last_end_sync; /* when we lasted called end_sync to
0225                       * update bitmap with resync progress */
0226 
0227     atomic_t pending_writes; /* pending writes to the bitmap file */
0228     wait_queue_head_t write_wait;
0229     wait_queue_head_t overflow_wait;
0230     wait_queue_head_t behind_wait;
0231 
0232     struct kernfs_node *sysfs_can_clear;
0233     int cluster_slot;       /* Slot offset for clustered env */
0234 };
0235 
0236 /* the bitmap API */
0237 
0238 /* these are used only by md/bitmap */
0239 struct bitmap *md_bitmap_create(struct mddev *mddev, int slot);
0240 int md_bitmap_load(struct mddev *mddev);
0241 void md_bitmap_flush(struct mddev *mddev);
0242 void md_bitmap_destroy(struct mddev *mddev);
0243 
0244 void md_bitmap_print_sb(struct bitmap *bitmap);
0245 void md_bitmap_update_sb(struct bitmap *bitmap);
0246 void md_bitmap_status(struct seq_file *seq, struct bitmap *bitmap);
0247 
0248 int  md_bitmap_setallbits(struct bitmap *bitmap);
0249 void md_bitmap_write_all(struct bitmap *bitmap);
0250 
0251 void md_bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e);
0252 
0253 /* these are exported */
0254 int md_bitmap_startwrite(struct bitmap *bitmap, sector_t offset,
0255              unsigned long sectors, int behind);
0256 void md_bitmap_endwrite(struct bitmap *bitmap, sector_t offset,
0257             unsigned long sectors, int success, int behind);
0258 int md_bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int degraded);
0259 void md_bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted);
0260 void md_bitmap_close_sync(struct bitmap *bitmap);
0261 void md_bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force);
0262 void md_bitmap_sync_with_cluster(struct mddev *mddev,
0263                  sector_t old_lo, sector_t old_hi,
0264                  sector_t new_lo, sector_t new_hi);
0265 
0266 void md_bitmap_unplug(struct bitmap *bitmap);
0267 void md_bitmap_daemon_work(struct mddev *mddev);
0268 
0269 int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks,
0270              int chunksize, int init);
0271 struct bitmap *get_bitmap_from_slot(struct mddev *mddev, int slot);
0272 int md_bitmap_copy_from_slot(struct mddev *mddev, int slot,
0273                  sector_t *lo, sector_t *hi, bool clear_bits);
0274 void md_bitmap_free(struct bitmap *bitmap);
0275 void md_bitmap_wait_behind_writes(struct mddev *mddev);
0276 #endif
0277 
0278 #endif