Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * Bad block management
0004  *
0005  * - Heavily based on MD badblocks code from Neil Brown
0006  *
0007  * Copyright (c) 2015, Intel Corporation.
0008  */
0009 
0010 #include <linux/badblocks.h>
0011 #include <linux/seqlock.h>
0012 #include <linux/device.h>
0013 #include <linux/kernel.h>
0014 #include <linux/module.h>
0015 #include <linux/stddef.h>
0016 #include <linux/types.h>
0017 #include <linux/slab.h>
0018 
0019 /**
0020  * badblocks_check() - check a given range for bad sectors
0021  * @bb:     the badblocks structure that holds all badblock information
0022  * @s:      sector (start) at which to check for badblocks
0023  * @sectors:    number of sectors to check for badblocks
0024  * @first_bad:  pointer to store location of the first badblock
0025  * @bad_sectors: pointer to store number of badblocks after @first_bad
0026  *
0027  * We can record which blocks on each device are 'bad' and so just
0028  * fail those blocks, or that stripe, rather than the whole device.
0029  * Entries in the bad-block table are 64bits wide.  This comprises:
0030  * Length of bad-range, in sectors: 0-511 for lengths 1-512
0031  * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes)
0032  *  A 'shift' can be set so that larger blocks are tracked and
0033  *  consequently larger devices can be covered.
0034  * 'Acknowledged' flag - 1 bit. - the most significant bit.
0035  *
0036  * Locking of the bad-block table uses a seqlock so badblocks_check
0037  * might need to retry if it is very unlucky.
0038  * We will sometimes want to check for bad blocks in a bi_end_io function,
0039  * so we use the write_seqlock_irq variant.
0040  *
0041  * When looking for a bad block we specify a range and want to
0042  * know if any block in the range is bad.  So we binary-search
0043  * to the last range that starts at-or-before the given endpoint,
0044  * (or "before the sector after the target range")
0045  * then see if it ends after the given start.
0046  *
0047  * Return:
0048  *  0: there are no known bad blocks in the range
0049  *  1: there are known bad block which are all acknowledged
0050  * -1: there are bad blocks which have not yet been acknowledged in metadata.
0051  * plus the start/length of the first bad section we overlap.
0052  */
0053 int badblocks_check(struct badblocks *bb, sector_t s, int sectors,
0054             sector_t *first_bad, int *bad_sectors)
0055 {
0056     int hi;
0057     int lo;
0058     u64 *p = bb->page;
0059     int rv;
0060     sector_t target = s + sectors;
0061     unsigned seq;
0062 
0063     if (bb->shift > 0) {
0064         /* round the start down, and the end up */
0065         s >>= bb->shift;
0066         target += (1<<bb->shift) - 1;
0067         target >>= bb->shift;
0068     }
0069     /* 'target' is now the first block after the bad range */
0070 
0071 retry:
0072     seq = read_seqbegin(&bb->lock);
0073     lo = 0;
0074     rv = 0;
0075     hi = bb->count;
0076 
0077     /* Binary search between lo and hi for 'target'
0078      * i.e. for the last range that starts before 'target'
0079      */
0080     /* INVARIANT: ranges before 'lo' and at-or-after 'hi'
0081      * are known not to be the last range before target.
0082      * VARIANT: hi-lo is the number of possible
0083      * ranges, and decreases until it reaches 1
0084      */
0085     while (hi - lo > 1) {
0086         int mid = (lo + hi) / 2;
0087         sector_t a = BB_OFFSET(p[mid]);
0088 
0089         if (a < target)
0090             /* This could still be the one, earlier ranges
0091              * could not.
0092              */
0093             lo = mid;
0094         else
0095             /* This and later ranges are definitely out. */
0096             hi = mid;
0097     }
0098     /* 'lo' might be the last that started before target, but 'hi' isn't */
0099     if (hi > lo) {
0100         /* need to check all range that end after 's' to see if
0101          * any are unacknowledged.
0102          */
0103         while (lo >= 0 &&
0104                BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
0105             if (BB_OFFSET(p[lo]) < target) {
0106                 /* starts before the end, and finishes after
0107                  * the start, so they must overlap
0108                  */
0109                 if (rv != -1 && BB_ACK(p[lo]))
0110                     rv = 1;
0111                 else
0112                     rv = -1;
0113                 *first_bad = BB_OFFSET(p[lo]);
0114                 *bad_sectors = BB_LEN(p[lo]);
0115             }
0116             lo--;
0117         }
0118     }
0119 
0120     if (read_seqretry(&bb->lock, seq))
0121         goto retry;
0122 
0123     return rv;
0124 }
0125 EXPORT_SYMBOL_GPL(badblocks_check);
0126 
0127 static void badblocks_update_acked(struct badblocks *bb)
0128 {
0129     u64 *p = bb->page;
0130     int i;
0131     bool unacked = false;
0132 
0133     if (!bb->unacked_exist)
0134         return;
0135 
0136     for (i = 0; i < bb->count ; i++) {
0137         if (!BB_ACK(p[i])) {
0138             unacked = true;
0139             break;
0140         }
0141     }
0142 
0143     if (!unacked)
0144         bb->unacked_exist = 0;
0145 }
0146 
0147 /**
0148  * badblocks_set() - Add a range of bad blocks to the table.
0149  * @bb:     the badblocks structure that holds all badblock information
0150  * @s:      first sector to mark as bad
0151  * @sectors:    number of sectors to mark as bad
0152  * @acknowledged: weather to mark the bad sectors as acknowledged
0153  *
0154  * This might extend the table, or might contract it if two adjacent ranges
0155  * can be merged. We binary-search to find the 'insertion' point, then
0156  * decide how best to handle it.
0157  *
0158  * Return:
0159  *  0: success
0160  *  1: failed to set badblocks (out of space)
0161  */
0162 int badblocks_set(struct badblocks *bb, sector_t s, int sectors,
0163             int acknowledged)
0164 {
0165     u64 *p;
0166     int lo, hi;
0167     int rv = 0;
0168     unsigned long flags;
0169 
0170     if (bb->shift < 0)
0171         /* badblocks are disabled */
0172         return 1;
0173 
0174     if (bb->shift) {
0175         /* round the start down, and the end up */
0176         sector_t next = s + sectors;
0177 
0178         s >>= bb->shift;
0179         next += (1<<bb->shift) - 1;
0180         next >>= bb->shift;
0181         sectors = next - s;
0182     }
0183 
0184     write_seqlock_irqsave(&bb->lock, flags);
0185 
0186     p = bb->page;
0187     lo = 0;
0188     hi = bb->count;
0189     /* Find the last range that starts at-or-before 's' */
0190     while (hi - lo > 1) {
0191         int mid = (lo + hi) / 2;
0192         sector_t a = BB_OFFSET(p[mid]);
0193 
0194         if (a <= s)
0195             lo = mid;
0196         else
0197             hi = mid;
0198     }
0199     if (hi > lo && BB_OFFSET(p[lo]) > s)
0200         hi = lo;
0201 
0202     if (hi > lo) {
0203         /* we found a range that might merge with the start
0204          * of our new range
0205          */
0206         sector_t a = BB_OFFSET(p[lo]);
0207         sector_t e = a + BB_LEN(p[lo]);
0208         int ack = BB_ACK(p[lo]);
0209 
0210         if (e >= s) {
0211             /* Yes, we can merge with a previous range */
0212             if (s == a && s + sectors >= e)
0213                 /* new range covers old */
0214                 ack = acknowledged;
0215             else
0216                 ack = ack && acknowledged;
0217 
0218             if (e < s + sectors)
0219                 e = s + sectors;
0220             if (e - a <= BB_MAX_LEN) {
0221                 p[lo] = BB_MAKE(a, e-a, ack);
0222                 s = e;
0223             } else {
0224                 /* does not all fit in one range,
0225                  * make p[lo] maximal
0226                  */
0227                 if (BB_LEN(p[lo]) != BB_MAX_LEN)
0228                     p[lo] = BB_MAKE(a, BB_MAX_LEN, ack);
0229                 s = a + BB_MAX_LEN;
0230             }
0231             sectors = e - s;
0232         }
0233     }
0234     if (sectors && hi < bb->count) {
0235         /* 'hi' points to the first range that starts after 's'.
0236          * Maybe we can merge with the start of that range
0237          */
0238         sector_t a = BB_OFFSET(p[hi]);
0239         sector_t e = a + BB_LEN(p[hi]);
0240         int ack = BB_ACK(p[hi]);
0241 
0242         if (a <= s + sectors) {
0243             /* merging is possible */
0244             if (e <= s + sectors) {
0245                 /* full overlap */
0246                 e = s + sectors;
0247                 ack = acknowledged;
0248             } else
0249                 ack = ack && acknowledged;
0250 
0251             a = s;
0252             if (e - a <= BB_MAX_LEN) {
0253                 p[hi] = BB_MAKE(a, e-a, ack);
0254                 s = e;
0255             } else {
0256                 p[hi] = BB_MAKE(a, BB_MAX_LEN, ack);
0257                 s = a + BB_MAX_LEN;
0258             }
0259             sectors = e - s;
0260             lo = hi;
0261             hi++;
0262         }
0263     }
0264     if (sectors == 0 && hi < bb->count) {
0265         /* we might be able to combine lo and hi */
0266         /* Note: 's' is at the end of 'lo' */
0267         sector_t a = BB_OFFSET(p[hi]);
0268         int lolen = BB_LEN(p[lo]);
0269         int hilen = BB_LEN(p[hi]);
0270         int newlen = lolen + hilen - (s - a);
0271 
0272         if (s >= a && newlen < BB_MAX_LEN) {
0273             /* yes, we can combine them */
0274             int ack = BB_ACK(p[lo]) && BB_ACK(p[hi]);
0275 
0276             p[lo] = BB_MAKE(BB_OFFSET(p[lo]), newlen, ack);
0277             memmove(p + hi, p + hi + 1,
0278                 (bb->count - hi - 1) * 8);
0279             bb->count--;
0280         }
0281     }
0282     while (sectors) {
0283         /* didn't merge (it all).
0284          * Need to add a range just before 'hi'
0285          */
0286         if (bb->count >= MAX_BADBLOCKS) {
0287             /* No room for more */
0288             rv = 1;
0289             break;
0290         } else {
0291             int this_sectors = sectors;
0292 
0293             memmove(p + hi + 1, p + hi,
0294                 (bb->count - hi) * 8);
0295             bb->count++;
0296 
0297             if (this_sectors > BB_MAX_LEN)
0298                 this_sectors = BB_MAX_LEN;
0299             p[hi] = BB_MAKE(s, this_sectors, acknowledged);
0300             sectors -= this_sectors;
0301             s += this_sectors;
0302         }
0303     }
0304 
0305     bb->changed = 1;
0306     if (!acknowledged)
0307         bb->unacked_exist = 1;
0308     else
0309         badblocks_update_acked(bb);
0310     write_sequnlock_irqrestore(&bb->lock, flags);
0311 
0312     return rv;
0313 }
0314 EXPORT_SYMBOL_GPL(badblocks_set);
0315 
0316 /**
0317  * badblocks_clear() - Remove a range of bad blocks to the table.
0318  * @bb:     the badblocks structure that holds all badblock information
0319  * @s:      first sector to mark as bad
0320  * @sectors:    number of sectors to mark as bad
0321  *
0322  * This may involve extending the table if we spilt a region,
0323  * but it must not fail.  So if the table becomes full, we just
0324  * drop the remove request.
0325  *
0326  * Return:
0327  *  0: success
0328  *  1: failed to clear badblocks
0329  */
0330 int badblocks_clear(struct badblocks *bb, sector_t s, int sectors)
0331 {
0332     u64 *p;
0333     int lo, hi;
0334     sector_t target = s + sectors;
0335     int rv = 0;
0336 
0337     if (bb->shift > 0) {
0338         /* When clearing we round the start up and the end down.
0339          * This should not matter as the shift should align with
0340          * the block size and no rounding should ever be needed.
0341          * However it is better the think a block is bad when it
0342          * isn't than to think a block is not bad when it is.
0343          */
0344         s += (1<<bb->shift) - 1;
0345         s >>= bb->shift;
0346         target >>= bb->shift;
0347     }
0348 
0349     write_seqlock_irq(&bb->lock);
0350 
0351     p = bb->page;
0352     lo = 0;
0353     hi = bb->count;
0354     /* Find the last range that starts before 'target' */
0355     while (hi - lo > 1) {
0356         int mid = (lo + hi) / 2;
0357         sector_t a = BB_OFFSET(p[mid]);
0358 
0359         if (a < target)
0360             lo = mid;
0361         else
0362             hi = mid;
0363     }
0364     if (hi > lo) {
0365         /* p[lo] is the last range that could overlap the
0366          * current range.  Earlier ranges could also overlap,
0367          * but only this one can overlap the end of the range.
0368          */
0369         if ((BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) &&
0370             (BB_OFFSET(p[lo]) < target)) {
0371             /* Partial overlap, leave the tail of this range */
0372             int ack = BB_ACK(p[lo]);
0373             sector_t a = BB_OFFSET(p[lo]);
0374             sector_t end = a + BB_LEN(p[lo]);
0375 
0376             if (a < s) {
0377                 /* we need to split this range */
0378                 if (bb->count >= MAX_BADBLOCKS) {
0379                     rv = -ENOSPC;
0380                     goto out;
0381                 }
0382                 memmove(p+lo+1, p+lo, (bb->count - lo) * 8);
0383                 bb->count++;
0384                 p[lo] = BB_MAKE(a, s-a, ack);
0385                 lo++;
0386             }
0387             p[lo] = BB_MAKE(target, end - target, ack);
0388             /* there is no longer an overlap */
0389             hi = lo;
0390             lo--;
0391         }
0392         while (lo >= 0 &&
0393                (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) &&
0394                (BB_OFFSET(p[lo]) < target)) {
0395             /* This range does overlap */
0396             if (BB_OFFSET(p[lo]) < s) {
0397                 /* Keep the early parts of this range. */
0398                 int ack = BB_ACK(p[lo]);
0399                 sector_t start = BB_OFFSET(p[lo]);
0400 
0401                 p[lo] = BB_MAKE(start, s - start, ack);
0402                 /* now low doesn't overlap, so.. */
0403                 break;
0404             }
0405             lo--;
0406         }
0407         /* 'lo' is strictly before, 'hi' is strictly after,
0408          * anything between needs to be discarded
0409          */
0410         if (hi - lo > 1) {
0411             memmove(p+lo+1, p+hi, (bb->count - hi) * 8);
0412             bb->count -= (hi - lo - 1);
0413         }
0414     }
0415 
0416     badblocks_update_acked(bb);
0417     bb->changed = 1;
0418 out:
0419     write_sequnlock_irq(&bb->lock);
0420     return rv;
0421 }
0422 EXPORT_SYMBOL_GPL(badblocks_clear);
0423 
0424 /**
0425  * ack_all_badblocks() - Acknowledge all bad blocks in a list.
0426  * @bb:     the badblocks structure that holds all badblock information
0427  *
0428  * This only succeeds if ->changed is clear.  It is used by
0429  * in-kernel metadata updates
0430  */
0431 void ack_all_badblocks(struct badblocks *bb)
0432 {
0433     if (bb->page == NULL || bb->changed)
0434         /* no point even trying */
0435         return;
0436     write_seqlock_irq(&bb->lock);
0437 
0438     if (bb->changed == 0 && bb->unacked_exist) {
0439         u64 *p = bb->page;
0440         int i;
0441 
0442         for (i = 0; i < bb->count ; i++) {
0443             if (!BB_ACK(p[i])) {
0444                 sector_t start = BB_OFFSET(p[i]);
0445                 int len = BB_LEN(p[i]);
0446 
0447                 p[i] = BB_MAKE(start, len, 1);
0448             }
0449         }
0450         bb->unacked_exist = 0;
0451     }
0452     write_sequnlock_irq(&bb->lock);
0453 }
0454 EXPORT_SYMBOL_GPL(ack_all_badblocks);
0455 
0456 /**
0457  * badblocks_show() - sysfs access to bad-blocks list
0458  * @bb:     the badblocks structure that holds all badblock information
0459  * @page:   buffer received from sysfs
0460  * @unack:  weather to show unacknowledged badblocks
0461  *
0462  * Return:
0463  *  Length of returned data
0464  */
0465 ssize_t badblocks_show(struct badblocks *bb, char *page, int unack)
0466 {
0467     size_t len;
0468     int i;
0469     u64 *p = bb->page;
0470     unsigned seq;
0471 
0472     if (bb->shift < 0)
0473         return 0;
0474 
0475 retry:
0476     seq = read_seqbegin(&bb->lock);
0477 
0478     len = 0;
0479     i = 0;
0480 
0481     while (len < PAGE_SIZE && i < bb->count) {
0482         sector_t s = BB_OFFSET(p[i]);
0483         unsigned int length = BB_LEN(p[i]);
0484         int ack = BB_ACK(p[i]);
0485 
0486         i++;
0487 
0488         if (unack && ack)
0489             continue;
0490 
0491         len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n",
0492                 (unsigned long long)s << bb->shift,
0493                 length << bb->shift);
0494     }
0495     if (unack && len == 0)
0496         bb->unacked_exist = 0;
0497 
0498     if (read_seqretry(&bb->lock, seq))
0499         goto retry;
0500 
0501     return len;
0502 }
0503 EXPORT_SYMBOL_GPL(badblocks_show);
0504 
0505 /**
0506  * badblocks_store() - sysfs access to bad-blocks list
0507  * @bb:     the badblocks structure that holds all badblock information
0508  * @page:   buffer received from sysfs
0509  * @len:    length of data received from sysfs
0510  * @unack:  weather to show unacknowledged badblocks
0511  *
0512  * Return:
0513  *  Length of the buffer processed or -ve error.
0514  */
0515 ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len,
0516             int unack)
0517 {
0518     unsigned long long sector;
0519     int length;
0520     char newline;
0521 
0522     switch (sscanf(page, "%llu %d%c", &sector, &length, &newline)) {
0523     case 3:
0524         if (newline != '\n')
0525             return -EINVAL;
0526         fallthrough;
0527     case 2:
0528         if (length <= 0)
0529             return -EINVAL;
0530         break;
0531     default:
0532         return -EINVAL;
0533     }
0534 
0535     if (badblocks_set(bb, sector, length, !unack))
0536         return -ENOSPC;
0537     else
0538         return len;
0539 }
0540 EXPORT_SYMBOL_GPL(badblocks_store);
0541 
0542 static int __badblocks_init(struct device *dev, struct badblocks *bb,
0543         int enable)
0544 {
0545     bb->dev = dev;
0546     bb->count = 0;
0547     if (enable)
0548         bb->shift = 0;
0549     else
0550         bb->shift = -1;
0551     if (dev)
0552         bb->page = devm_kzalloc(dev, PAGE_SIZE, GFP_KERNEL);
0553     else
0554         bb->page = kzalloc(PAGE_SIZE, GFP_KERNEL);
0555     if (!bb->page) {
0556         bb->shift = -1;
0557         return -ENOMEM;
0558     }
0559     seqlock_init(&bb->lock);
0560 
0561     return 0;
0562 }
0563 
0564 /**
0565  * badblocks_init() - initialize the badblocks structure
0566  * @bb:     the badblocks structure that holds all badblock information
0567  * @enable: weather to enable badblocks accounting
0568  *
0569  * Return:
0570  *  0: success
0571  *  -ve errno: on error
0572  */
0573 int badblocks_init(struct badblocks *bb, int enable)
0574 {
0575     return __badblocks_init(NULL, bb, enable);
0576 }
0577 EXPORT_SYMBOL_GPL(badblocks_init);
0578 
0579 int devm_init_badblocks(struct device *dev, struct badblocks *bb)
0580 {
0581     if (!bb)
0582         return -EINVAL;
0583     return __badblocks_init(dev, bb, 1);
0584 }
0585 EXPORT_SYMBOL_GPL(devm_init_badblocks);
0586 
0587 /**
0588  * badblocks_exit() - free the badblocks structure
0589  * @bb:     the badblocks structure that holds all badblock information
0590  */
0591 void badblocks_exit(struct badblocks *bb)
0592 {
0593     if (!bb)
0594         return;
0595     if (bb->dev)
0596         devm_kfree(bb->dev, bb->page);
0597     else
0598         kfree(bb->page);
0599     bb->page = NULL;
0600 }
0601 EXPORT_SYMBOL_GPL(badblocks_exit);