Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * blockcheck.c
0004  *
0005  * Checksum and ECC codes for the OCFS2 userspace library.
0006  *
0007  * Copyright (C) 2006, 2008 Oracle.  All rights reserved.
0008  */
0009 
0010 #include <linux/kernel.h>
0011 #include <linux/types.h>
0012 #include <linux/crc32.h>
0013 #include <linux/buffer_head.h>
0014 #include <linux/bitops.h>
0015 #include <linux/debugfs.h>
0016 #include <linux/module.h>
0017 #include <linux/fs.h>
0018 #include <asm/byteorder.h>
0019 
0020 #include <cluster/masklog.h>
0021 
0022 #include "ocfs2.h"
0023 
0024 #include "blockcheck.h"
0025 
0026 
0027 /*
0028  * We use the following conventions:
0029  *
0030  * d = # data bits
0031  * p = # parity bits
0032  * c = # total code bits (d + p)
0033  */
0034 
0035 
0036 /*
0037  * Calculate the bit offset in the hamming code buffer based on the bit's
0038  * offset in the data buffer.  Since the hamming code reserves all
0039  * power-of-two bits for parity, the data bit number and the code bit
0040  * number are offset by all the parity bits beforehand.
0041  *
0042  * Recall that bit numbers in hamming code are 1-based.  This function
0043  * takes the 0-based data bit from the caller.
0044  *
0045  * An example.  Take bit 1 of the data buffer.  1 is a power of two (2^0),
0046  * so it's a parity bit.  2 is a power of two (2^1), so it's a parity bit.
0047  * 3 is not a power of two.  So bit 1 of the data buffer ends up as bit 3
0048  * in the code buffer.
0049  *
0050  * The caller can pass in *p if it wants to keep track of the most recent
0051  * number of parity bits added.  This allows the function to start the
0052  * calculation at the last place.
0053  */
0054 static unsigned int calc_code_bit(unsigned int i, unsigned int *p_cache)
0055 {
0056     unsigned int b, p = 0;
0057 
0058     /*
0059      * Data bits are 0-based, but we're talking code bits, which
0060      * are 1-based.
0061      */
0062     b = i + 1;
0063 
0064     /* Use the cache if it is there */
0065     if (p_cache)
0066         p = *p_cache;
0067         b += p;
0068 
0069     /*
0070      * For every power of two below our bit number, bump our bit.
0071      *
0072      * We compare with (b + 1) because we have to compare with what b
0073      * would be _if_ it were bumped up by the parity bit.  Capice?
0074      *
0075      * p is set above.
0076      */
0077     for (; (1 << p) < (b + 1); p++)
0078         b++;
0079 
0080     if (p_cache)
0081         *p_cache = p;
0082 
0083     return b;
0084 }
0085 
0086 /*
0087  * This is the low level encoder function.  It can be called across
0088  * multiple hunks just like the crc32 code.  'd' is the number of bits
0089  * _in_this_hunk_.  nr is the bit offset of this hunk.  So, if you had
0090  * two 512B buffers, you would do it like so:
0091  *
0092  * parity = ocfs2_hamming_encode(0, buf1, 512 * 8, 0);
0093  * parity = ocfs2_hamming_encode(parity, buf2, 512 * 8, 512 * 8);
0094  *
0095  * If you just have one buffer, use ocfs2_hamming_encode_block().
0096  */
0097 u32 ocfs2_hamming_encode(u32 parity, void *data, unsigned int d, unsigned int nr)
0098 {
0099     unsigned int i, b, p = 0;
0100 
0101     BUG_ON(!d);
0102 
0103     /*
0104      * b is the hamming code bit number.  Hamming code specifies a
0105      * 1-based array, but C uses 0-based.  So 'i' is for C, and 'b' is
0106      * for the algorithm.
0107      *
0108      * The i++ in the for loop is so that the start offset passed
0109      * to ocfs2_find_next_bit_set() is one greater than the previously
0110      * found bit.
0111      */
0112     for (i = 0; (i = ocfs2_find_next_bit(data, d, i)) < d; i++)
0113     {
0114         /*
0115          * i is the offset in this hunk, nr + i is the total bit
0116          * offset.
0117          */
0118         b = calc_code_bit(nr + i, &p);
0119 
0120         /*
0121          * Data bits in the resultant code are checked by
0122          * parity bits that are part of the bit number
0123          * representation.  Huh?
0124          *
0125          * <wikipedia href="https://en.wikipedia.org/wiki/Hamming_code">
0126          * In other words, the parity bit at position 2^k
0127          * checks bits in positions having bit k set in
0128          * their binary representation.  Conversely, for
0129          * instance, bit 13, i.e. 1101(2), is checked by
0130          * bits 1000(2) = 8, 0100(2)=4 and 0001(2) = 1.
0131          * </wikipedia>
0132          *
0133          * Note that 'k' is the _code_ bit number.  'b' in
0134          * our loop.
0135          */
0136         parity ^= b;
0137     }
0138 
0139     /* While the data buffer was treated as little endian, the
0140      * return value is in host endian. */
0141     return parity;
0142 }
0143 
0144 u32 ocfs2_hamming_encode_block(void *data, unsigned int blocksize)
0145 {
0146     return ocfs2_hamming_encode(0, data, blocksize * 8, 0);
0147 }
0148 
0149 /*
0150  * Like ocfs2_hamming_encode(), this can handle hunks.  nr is the bit
0151  * offset of the current hunk.  If bit to be fixed is not part of the
0152  * current hunk, this does nothing.
0153  *
0154  * If you only have one hunk, use ocfs2_hamming_fix_block().
0155  */
0156 void ocfs2_hamming_fix(void *data, unsigned int d, unsigned int nr,
0157                unsigned int fix)
0158 {
0159     unsigned int i, b;
0160 
0161     BUG_ON(!d);
0162 
0163     /*
0164      * If the bit to fix has an hweight of 1, it's a parity bit.  One
0165      * busted parity bit is its own error.  Nothing to do here.
0166      */
0167     if (hweight32(fix) == 1)
0168         return;
0169 
0170     /*
0171      * nr + d is the bit right past the data hunk we're looking at.
0172      * If fix after that, nothing to do
0173      */
0174     if (fix >= calc_code_bit(nr + d, NULL))
0175         return;
0176 
0177     /*
0178      * nr is the offset in the data hunk we're starting at.  Let's
0179      * start b at the offset in the code buffer.  See hamming_encode()
0180      * for a more detailed description of 'b'.
0181      */
0182     b = calc_code_bit(nr, NULL);
0183     /* If the fix is before this hunk, nothing to do */
0184     if (fix < b)
0185         return;
0186 
0187     for (i = 0; i < d; i++, b++)
0188     {
0189         /* Skip past parity bits */
0190         while (hweight32(b) == 1)
0191             b++;
0192 
0193         /*
0194          * i is the offset in this data hunk.
0195          * nr + i is the offset in the total data buffer.
0196          * b is the offset in the total code buffer.
0197          *
0198          * Thus, when b == fix, bit i in the current hunk needs
0199          * fixing.
0200          */
0201         if (b == fix)
0202         {
0203             if (ocfs2_test_bit(i, data))
0204                 ocfs2_clear_bit(i, data);
0205             else
0206                 ocfs2_set_bit(i, data);
0207             break;
0208         }
0209     }
0210 }
0211 
0212 void ocfs2_hamming_fix_block(void *data, unsigned int blocksize,
0213                  unsigned int fix)
0214 {
0215     ocfs2_hamming_fix(data, blocksize * 8, 0, fix);
0216 }
0217 
0218 
0219 /*
0220  * Debugfs handling.
0221  */
0222 
0223 #ifdef CONFIG_DEBUG_FS
0224 
0225 static int blockcheck_u64_get(void *data, u64 *val)
0226 {
0227     *val = *(u64 *)data;
0228     return 0;
0229 }
0230 DEFINE_DEBUGFS_ATTRIBUTE(blockcheck_fops, blockcheck_u64_get, NULL, "%llu\n");
0231 
0232 static void ocfs2_blockcheck_debug_remove(struct ocfs2_blockcheck_stats *stats)
0233 {
0234     if (stats) {
0235         debugfs_remove_recursive(stats->b_debug_dir);
0236         stats->b_debug_dir = NULL;
0237     }
0238 }
0239 
0240 static void ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats,
0241                        struct dentry *parent)
0242 {
0243     struct dentry *dir;
0244 
0245     dir = debugfs_create_dir("blockcheck", parent);
0246     stats->b_debug_dir = dir;
0247 
0248     debugfs_create_file("blocks_checked", S_IFREG | S_IRUSR, dir,
0249                 &stats->b_check_count, &blockcheck_fops);
0250 
0251     debugfs_create_file("checksums_failed", S_IFREG | S_IRUSR, dir,
0252                 &stats->b_failure_count, &blockcheck_fops);
0253 
0254     debugfs_create_file("ecc_recoveries", S_IFREG | S_IRUSR, dir,
0255                 &stats->b_recover_count, &blockcheck_fops);
0256 
0257 }
0258 #else
0259 static inline void ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats,
0260                           struct dentry *parent)
0261 {
0262 }
0263 
0264 static inline void ocfs2_blockcheck_debug_remove(struct ocfs2_blockcheck_stats *stats)
0265 {
0266 }
0267 #endif  /* CONFIG_DEBUG_FS */
0268 
0269 /* Always-called wrappers for starting and stopping the debugfs files */
0270 void ocfs2_blockcheck_stats_debugfs_install(struct ocfs2_blockcheck_stats *stats,
0271                         struct dentry *parent)
0272 {
0273     ocfs2_blockcheck_debug_install(stats, parent);
0274 }
0275 
0276 void ocfs2_blockcheck_stats_debugfs_remove(struct ocfs2_blockcheck_stats *stats)
0277 {
0278     ocfs2_blockcheck_debug_remove(stats);
0279 }
0280 
0281 static void ocfs2_blockcheck_inc_check(struct ocfs2_blockcheck_stats *stats)
0282 {
0283     u64 new_count;
0284 
0285     if (!stats)
0286         return;
0287 
0288     spin_lock(&stats->b_lock);
0289     stats->b_check_count++;
0290     new_count = stats->b_check_count;
0291     spin_unlock(&stats->b_lock);
0292 
0293     if (!new_count)
0294         mlog(ML_NOTICE, "Block check count has wrapped\n");
0295 }
0296 
0297 static void ocfs2_blockcheck_inc_failure(struct ocfs2_blockcheck_stats *stats)
0298 {
0299     u64 new_count;
0300 
0301     if (!stats)
0302         return;
0303 
0304     spin_lock(&stats->b_lock);
0305     stats->b_failure_count++;
0306     new_count = stats->b_failure_count;
0307     spin_unlock(&stats->b_lock);
0308 
0309     if (!new_count)
0310         mlog(ML_NOTICE, "Checksum failure count has wrapped\n");
0311 }
0312 
0313 static void ocfs2_blockcheck_inc_recover(struct ocfs2_blockcheck_stats *stats)
0314 {
0315     u64 new_count;
0316 
0317     if (!stats)
0318         return;
0319 
0320     spin_lock(&stats->b_lock);
0321     stats->b_recover_count++;
0322     new_count = stats->b_recover_count;
0323     spin_unlock(&stats->b_lock);
0324 
0325     if (!new_count)
0326         mlog(ML_NOTICE, "ECC recovery count has wrapped\n");
0327 }
0328 
0329 
0330 
0331 /*
0332  * These are the low-level APIs for using the ocfs2_block_check structure.
0333  */
0334 
0335 /*
0336  * This function generates check information for a block.
0337  * data is the block to be checked.  bc is a pointer to the
0338  * ocfs2_block_check structure describing the crc32 and the ecc.
0339  *
0340  * bc should be a pointer inside data, as the function will
0341  * take care of zeroing it before calculating the check information.  If
0342  * bc does not point inside data, the caller must make sure any inline
0343  * ocfs2_block_check structures are zeroed.
0344  *
0345  * The data buffer must be in on-disk endian (little endian for ocfs2).
0346  * bc will be filled with little-endian values and will be ready to go to
0347  * disk.
0348  */
0349 void ocfs2_block_check_compute(void *data, size_t blocksize,
0350                    struct ocfs2_block_check *bc)
0351 {
0352     u32 crc;
0353     u32 ecc;
0354 
0355     memset(bc, 0, sizeof(struct ocfs2_block_check));
0356 
0357     crc = crc32_le(~0, data, blocksize);
0358     ecc = ocfs2_hamming_encode_block(data, blocksize);
0359 
0360     /*
0361      * No ecc'd ocfs2 structure is larger than 4K, so ecc will be no
0362      * larger than 16 bits.
0363      */
0364     BUG_ON(ecc > USHRT_MAX);
0365 
0366     bc->bc_crc32e = cpu_to_le32(crc);
0367     bc->bc_ecc = cpu_to_le16((u16)ecc);
0368 }
0369 
0370 /*
0371  * This function validates existing check information.  Like _compute,
0372  * the function will take care of zeroing bc before calculating check codes.
0373  * If bc is not a pointer inside data, the caller must have zeroed any
0374  * inline ocfs2_block_check structures.
0375  *
0376  * Again, the data passed in should be the on-disk endian.
0377  */
0378 int ocfs2_block_check_validate(void *data, size_t blocksize,
0379                    struct ocfs2_block_check *bc,
0380                    struct ocfs2_blockcheck_stats *stats)
0381 {
0382     int rc = 0;
0383     u32 bc_crc32e;
0384     u16 bc_ecc;
0385     u32 crc, ecc;
0386 
0387     ocfs2_blockcheck_inc_check(stats);
0388 
0389     bc_crc32e = le32_to_cpu(bc->bc_crc32e);
0390     bc_ecc = le16_to_cpu(bc->bc_ecc);
0391 
0392     memset(bc, 0, sizeof(struct ocfs2_block_check));
0393 
0394     /* Fast path - if the crc32 validates, we're good to go */
0395     crc = crc32_le(~0, data, blocksize);
0396     if (crc == bc_crc32e)
0397         goto out;
0398 
0399     ocfs2_blockcheck_inc_failure(stats);
0400     mlog(ML_ERROR,
0401          "CRC32 failed: stored: 0x%x, computed 0x%x. Applying ECC.\n",
0402          (unsigned int)bc_crc32e, (unsigned int)crc);
0403 
0404     /* Ok, try ECC fixups */
0405     ecc = ocfs2_hamming_encode_block(data, blocksize);
0406     ocfs2_hamming_fix_block(data, blocksize, ecc ^ bc_ecc);
0407 
0408     /* And check the crc32 again */
0409     crc = crc32_le(~0, data, blocksize);
0410     if (crc == bc_crc32e) {
0411         ocfs2_blockcheck_inc_recover(stats);
0412         goto out;
0413     }
0414 
0415     mlog(ML_ERROR, "Fixed CRC32 failed: stored: 0x%x, computed 0x%x\n",
0416          (unsigned int)bc_crc32e, (unsigned int)crc);
0417 
0418     rc = -EIO;
0419 
0420 out:
0421     bc->bc_crc32e = cpu_to_le32(bc_crc32e);
0422     bc->bc_ecc = cpu_to_le16(bc_ecc);
0423 
0424     return rc;
0425 }
0426 
0427 /*
0428  * This function generates check information for a list of buffer_heads.
0429  * bhs is the blocks to be checked.  bc is a pointer to the
0430  * ocfs2_block_check structure describing the crc32 and the ecc.
0431  *
0432  * bc should be a pointer inside data, as the function will
0433  * take care of zeroing it before calculating the check information.  If
0434  * bc does not point inside data, the caller must make sure any inline
0435  * ocfs2_block_check structures are zeroed.
0436  *
0437  * The data buffer must be in on-disk endian (little endian for ocfs2).
0438  * bc will be filled with little-endian values and will be ready to go to
0439  * disk.
0440  */
0441 void ocfs2_block_check_compute_bhs(struct buffer_head **bhs, int nr,
0442                    struct ocfs2_block_check *bc)
0443 {
0444     int i;
0445     u32 crc, ecc;
0446 
0447     BUG_ON(nr < 0);
0448 
0449     if (!nr)
0450         return;
0451 
0452     memset(bc, 0, sizeof(struct ocfs2_block_check));
0453 
0454     for (i = 0, crc = ~0, ecc = 0; i < nr; i++) {
0455         crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size);
0456         /*
0457          * The number of bits in a buffer is obviously b_size*8.
0458          * The offset of this buffer is b_size*i, so the bit offset
0459          * of this buffer is b_size*8*i.
0460          */
0461         ecc = (u16)ocfs2_hamming_encode(ecc, bhs[i]->b_data,
0462                         bhs[i]->b_size * 8,
0463                         bhs[i]->b_size * 8 * i);
0464     }
0465 
0466     /*
0467      * No ecc'd ocfs2 structure is larger than 4K, so ecc will be no
0468      * larger than 16 bits.
0469      */
0470     BUG_ON(ecc > USHRT_MAX);
0471 
0472     bc->bc_crc32e = cpu_to_le32(crc);
0473     bc->bc_ecc = cpu_to_le16((u16)ecc);
0474 }
0475 
0476 /*
0477  * This function validates existing check information on a list of
0478  * buffer_heads.  Like _compute_bhs, the function will take care of
0479  * zeroing bc before calculating check codes.  If bc is not a pointer
0480  * inside data, the caller must have zeroed any inline
0481  * ocfs2_block_check structures.
0482  *
0483  * Again, the data passed in should be the on-disk endian.
0484  */
0485 int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr,
0486                    struct ocfs2_block_check *bc,
0487                    struct ocfs2_blockcheck_stats *stats)
0488 {
0489     int i, rc = 0;
0490     u32 bc_crc32e;
0491     u16 bc_ecc;
0492     u32 crc, ecc, fix;
0493 
0494     BUG_ON(nr < 0);
0495 
0496     if (!nr)
0497         return 0;
0498 
0499     ocfs2_blockcheck_inc_check(stats);
0500 
0501     bc_crc32e = le32_to_cpu(bc->bc_crc32e);
0502     bc_ecc = le16_to_cpu(bc->bc_ecc);
0503 
0504     memset(bc, 0, sizeof(struct ocfs2_block_check));
0505 
0506     /* Fast path - if the crc32 validates, we're good to go */
0507     for (i = 0, crc = ~0; i < nr; i++)
0508         crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size);
0509     if (crc == bc_crc32e)
0510         goto out;
0511 
0512     ocfs2_blockcheck_inc_failure(stats);
0513     mlog(ML_ERROR,
0514          "CRC32 failed: stored: %u, computed %u.  Applying ECC.\n",
0515          (unsigned int)bc_crc32e, (unsigned int)crc);
0516 
0517     /* Ok, try ECC fixups */
0518     for (i = 0, ecc = 0; i < nr; i++) {
0519         /*
0520          * The number of bits in a buffer is obviously b_size*8.
0521          * The offset of this buffer is b_size*i, so the bit offset
0522          * of this buffer is b_size*8*i.
0523          */
0524         ecc = (u16)ocfs2_hamming_encode(ecc, bhs[i]->b_data,
0525                         bhs[i]->b_size * 8,
0526                         bhs[i]->b_size * 8 * i);
0527     }
0528     fix = ecc ^ bc_ecc;
0529     for (i = 0; i < nr; i++) {
0530         /*
0531          * Try the fix against each buffer.  It will only affect
0532          * one of them.
0533          */
0534         ocfs2_hamming_fix(bhs[i]->b_data, bhs[i]->b_size * 8,
0535                   bhs[i]->b_size * 8 * i, fix);
0536     }
0537 
0538     /* And check the crc32 again */
0539     for (i = 0, crc = ~0; i < nr; i++)
0540         crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size);
0541     if (crc == bc_crc32e) {
0542         ocfs2_blockcheck_inc_recover(stats);
0543         goto out;
0544     }
0545 
0546     mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n",
0547          (unsigned int)bc_crc32e, (unsigned int)crc);
0548 
0549     rc = -EIO;
0550 
0551 out:
0552     bc->bc_crc32e = cpu_to_le32(bc_crc32e);
0553     bc->bc_ecc = cpu_to_le16(bc_ecc);
0554 
0555     return rc;
0556 }
0557 
0558 /*
0559  * These are the main API.  They check the superblock flag before
0560  * calling the underlying operations.
0561  *
0562  * They expect the buffer(s) to be in disk format.
0563  */
0564 void ocfs2_compute_meta_ecc(struct super_block *sb, void *data,
0565                 struct ocfs2_block_check *bc)
0566 {
0567     if (ocfs2_meta_ecc(OCFS2_SB(sb)))
0568         ocfs2_block_check_compute(data, sb->s_blocksize, bc);
0569 }
0570 
0571 int ocfs2_validate_meta_ecc(struct super_block *sb, void *data,
0572                 struct ocfs2_block_check *bc)
0573 {
0574     int rc = 0;
0575     struct ocfs2_super *osb = OCFS2_SB(sb);
0576 
0577     if (ocfs2_meta_ecc(osb))
0578         rc = ocfs2_block_check_validate(data, sb->s_blocksize, bc,
0579                         &osb->osb_ecc_stats);
0580 
0581     return rc;
0582 }
0583 
0584 void ocfs2_compute_meta_ecc_bhs(struct super_block *sb,
0585                 struct buffer_head **bhs, int nr,
0586                 struct ocfs2_block_check *bc)
0587 {
0588     if (ocfs2_meta_ecc(OCFS2_SB(sb)))
0589         ocfs2_block_check_compute_bhs(bhs, nr, bc);
0590 }
0591 
0592 int ocfs2_validate_meta_ecc_bhs(struct super_block *sb,
0593                 struct buffer_head **bhs, int nr,
0594                 struct ocfs2_block_check *bc)
0595 {
0596     int rc = 0;
0597     struct ocfs2_super *osb = OCFS2_SB(sb);
0598 
0599     if (ocfs2_meta_ecc(osb))
0600         rc = ocfs2_block_check_validate_bhs(bhs, nr, bc,
0601                             &osb->osb_ecc_stats);
0602 
0603     return rc;
0604 }
0605