Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 #include <linux/fs.h>
0003 #include <linux/random.h>
0004 #include <linux/buffer_head.h>
0005 #include <linux/utsname.h>
0006 #include <linux/kthread.h>
0007 
0008 #include "ext4.h"
0009 
0010 /* Checksumming functions */
0011 static __le32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp)
0012 {
0013     struct ext4_sb_info *sbi = EXT4_SB(sb);
0014     int offset = offsetof(struct mmp_struct, mmp_checksum);
0015     __u32 csum;
0016 
0017     csum = ext4_chksum(sbi, sbi->s_csum_seed, (char *)mmp, offset);
0018 
0019     return cpu_to_le32(csum);
0020 }
0021 
0022 static int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp)
0023 {
0024     if (!ext4_has_metadata_csum(sb))
0025         return 1;
0026 
0027     return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp);
0028 }
0029 
0030 static void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp)
0031 {
0032     if (!ext4_has_metadata_csum(sb))
0033         return;
0034 
0035     mmp->mmp_checksum = ext4_mmp_csum(sb, mmp);
0036 }
0037 
0038 /*
0039  * Write the MMP block using REQ_SYNC to try to get the block on-disk
0040  * faster.
0041  */
0042 static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
0043 {
0044     struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data);
0045 
0046     /*
0047      * We protect against freezing so that we don't create dirty buffers
0048      * on frozen filesystem.
0049      */
0050     sb_start_write(sb);
0051     ext4_mmp_csum_set(sb, mmp);
0052     lock_buffer(bh);
0053     bh->b_end_io = end_buffer_write_sync;
0054     get_bh(bh);
0055     submit_bh(REQ_OP_WRITE | REQ_SYNC | REQ_META | REQ_PRIO, bh);
0056     wait_on_buffer(bh);
0057     sb_end_write(sb);
0058     if (unlikely(!buffer_uptodate(bh)))
0059         return -EIO;
0060 
0061     return 0;
0062 }
0063 
0064 /*
0065  * Read the MMP block. It _must_ be read from disk and hence we clear the
0066  * uptodate flag on the buffer.
0067  */
0068 static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
0069               ext4_fsblk_t mmp_block)
0070 {
0071     struct mmp_struct *mmp;
0072     int ret;
0073 
0074     if (*bh)
0075         clear_buffer_uptodate(*bh);
0076 
0077     /* This would be sb_bread(sb, mmp_block), except we need to be sure
0078      * that the MD RAID device cache has been bypassed, and that the read
0079      * is not blocked in the elevator. */
0080     if (!*bh) {
0081         *bh = sb_getblk(sb, mmp_block);
0082         if (!*bh) {
0083             ret = -ENOMEM;
0084             goto warn_exit;
0085         }
0086     }
0087 
0088     lock_buffer(*bh);
0089     ret = ext4_read_bh(*bh, REQ_META | REQ_PRIO, NULL);
0090     if (ret)
0091         goto warn_exit;
0092 
0093     mmp = (struct mmp_struct *)((*bh)->b_data);
0094     if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC) {
0095         ret = -EFSCORRUPTED;
0096         goto warn_exit;
0097     }
0098     if (!ext4_mmp_csum_verify(sb, mmp)) {
0099         ret = -EFSBADCRC;
0100         goto warn_exit;
0101     }
0102     return 0;
0103 warn_exit:
0104     brelse(*bh);
0105     *bh = NULL;
0106     ext4_warning(sb, "Error %d while reading MMP block %llu",
0107              ret, mmp_block);
0108     return ret;
0109 }
0110 
0111 /*
0112  * Dump as much information as possible to help the admin.
0113  */
0114 void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp,
0115             const char *function, unsigned int line, const char *msg)
0116 {
0117     __ext4_warning(sb, function, line, "%s", msg);
0118     __ext4_warning(sb, function, line,
0119                "MMP failure info: last update time: %llu, last update node: %.*s, last update device: %.*s",
0120                (unsigned long long)le64_to_cpu(mmp->mmp_time),
0121                (int)sizeof(mmp->mmp_nodename), mmp->mmp_nodename,
0122                (int)sizeof(mmp->mmp_bdevname), mmp->mmp_bdevname);
0123 }
0124 
0125 /*
0126  * kmmpd will update the MMP sequence every s_mmp_update_interval seconds
0127  */
0128 static int kmmpd(void *data)
0129 {
0130     struct super_block *sb = data;
0131     struct ext4_super_block *es = EXT4_SB(sb)->s_es;
0132     struct buffer_head *bh = EXT4_SB(sb)->s_mmp_bh;
0133     struct mmp_struct *mmp;
0134     ext4_fsblk_t mmp_block;
0135     u32 seq = 0;
0136     unsigned long failed_writes = 0;
0137     int mmp_update_interval = le16_to_cpu(es->s_mmp_update_interval);
0138     unsigned mmp_check_interval;
0139     unsigned long last_update_time;
0140     unsigned long diff;
0141     int retval = 0;
0142 
0143     mmp_block = le64_to_cpu(es->s_mmp_block);
0144     mmp = (struct mmp_struct *)(bh->b_data);
0145     mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds());
0146     /*
0147      * Start with the higher mmp_check_interval and reduce it if
0148      * the MMP block is being updated on time.
0149      */
0150     mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval,
0151                  EXT4_MMP_MIN_CHECK_INTERVAL);
0152     mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
0153 
0154     memcpy(mmp->mmp_nodename, init_utsname()->nodename,
0155            sizeof(mmp->mmp_nodename));
0156 
0157     while (!kthread_should_stop() && !sb_rdonly(sb)) {
0158         if (!ext4_has_feature_mmp(sb)) {
0159             ext4_warning(sb, "kmmpd being stopped since MMP feature"
0160                      " has been disabled.");
0161             goto wait_to_exit;
0162         }
0163         if (++seq > EXT4_MMP_SEQ_MAX)
0164             seq = 1;
0165 
0166         mmp->mmp_seq = cpu_to_le32(seq);
0167         mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds());
0168         last_update_time = jiffies;
0169 
0170         retval = write_mmp_block(sb, bh);
0171         /*
0172          * Don't spew too many error messages. Print one every
0173          * (s_mmp_update_interval * 60) seconds.
0174          */
0175         if (retval) {
0176             if ((failed_writes % 60) == 0) {
0177                 ext4_error_err(sb, -retval,
0178                            "Error writing to MMP block");
0179             }
0180             failed_writes++;
0181         }
0182 
0183         diff = jiffies - last_update_time;
0184         if (diff < mmp_update_interval * HZ)
0185             schedule_timeout_interruptible(mmp_update_interval *
0186                                HZ - diff);
0187 
0188         /*
0189          * We need to make sure that more than mmp_check_interval
0190          * seconds have not passed since writing. If that has happened
0191          * we need to check if the MMP block is as we left it.
0192          */
0193         diff = jiffies - last_update_time;
0194         if (diff > mmp_check_interval * HZ) {
0195             struct buffer_head *bh_check = NULL;
0196             struct mmp_struct *mmp_check;
0197 
0198             retval = read_mmp_block(sb, &bh_check, mmp_block);
0199             if (retval) {
0200                 ext4_error_err(sb, -retval,
0201                            "error reading MMP data: %d",
0202                            retval);
0203                 goto wait_to_exit;
0204             }
0205 
0206             mmp_check = (struct mmp_struct *)(bh_check->b_data);
0207             if (mmp->mmp_seq != mmp_check->mmp_seq ||
0208                 memcmp(mmp->mmp_nodename, mmp_check->mmp_nodename,
0209                    sizeof(mmp->mmp_nodename))) {
0210                 dump_mmp_msg(sb, mmp_check,
0211                          "Error while updating MMP info. "
0212                          "The filesystem seems to have been"
0213                          " multiply mounted.");
0214                 ext4_error_err(sb, EBUSY, "abort");
0215                 put_bh(bh_check);
0216                 retval = -EBUSY;
0217                 goto wait_to_exit;
0218             }
0219             put_bh(bh_check);
0220         }
0221 
0222          /*
0223          * Adjust the mmp_check_interval depending on how much time
0224          * it took for the MMP block to be written.
0225          */
0226         mmp_check_interval = max(min(EXT4_MMP_CHECK_MULT * diff / HZ,
0227                          EXT4_MMP_MAX_CHECK_INTERVAL),
0228                      EXT4_MMP_MIN_CHECK_INTERVAL);
0229         mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
0230     }
0231 
0232     /*
0233      * Unmount seems to be clean.
0234      */
0235     mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN);
0236     mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds());
0237 
0238     retval = write_mmp_block(sb, bh);
0239 
0240 wait_to_exit:
0241     while (!kthread_should_stop()) {
0242         set_current_state(TASK_INTERRUPTIBLE);
0243         if (!kthread_should_stop())
0244             schedule();
0245     }
0246     set_current_state(TASK_RUNNING);
0247     return retval;
0248 }
0249 
0250 void ext4_stop_mmpd(struct ext4_sb_info *sbi)
0251 {
0252     if (sbi->s_mmp_tsk) {
0253         kthread_stop(sbi->s_mmp_tsk);
0254         brelse(sbi->s_mmp_bh);
0255         sbi->s_mmp_tsk = NULL;
0256     }
0257 }
0258 
0259 /*
0260  * Get a random new sequence number but make sure it is not greater than
0261  * EXT4_MMP_SEQ_MAX.
0262  */
0263 static unsigned int mmp_new_seq(void)
0264 {
0265     u32 new_seq;
0266 
0267     do {
0268         new_seq = prandom_u32();
0269     } while (new_seq > EXT4_MMP_SEQ_MAX);
0270 
0271     return new_seq;
0272 }
0273 
0274 /*
0275  * Protect the filesystem from being mounted more than once.
0276  */
0277 int ext4_multi_mount_protect(struct super_block *sb,
0278                     ext4_fsblk_t mmp_block)
0279 {
0280     struct ext4_super_block *es = EXT4_SB(sb)->s_es;
0281     struct buffer_head *bh = NULL;
0282     struct mmp_struct *mmp = NULL;
0283     u32 seq;
0284     unsigned int mmp_check_interval = le16_to_cpu(es->s_mmp_update_interval);
0285     unsigned int wait_time = 0;
0286     int retval;
0287 
0288     if (mmp_block < le32_to_cpu(es->s_first_data_block) ||
0289         mmp_block >= ext4_blocks_count(es)) {
0290         ext4_warning(sb, "Invalid MMP block in superblock");
0291         goto failed;
0292     }
0293 
0294     retval = read_mmp_block(sb, &bh, mmp_block);
0295     if (retval)
0296         goto failed;
0297 
0298     mmp = (struct mmp_struct *)(bh->b_data);
0299 
0300     if (mmp_check_interval < EXT4_MMP_MIN_CHECK_INTERVAL)
0301         mmp_check_interval = EXT4_MMP_MIN_CHECK_INTERVAL;
0302 
0303     /*
0304      * If check_interval in MMP block is larger, use that instead of
0305      * update_interval from the superblock.
0306      */
0307     if (le16_to_cpu(mmp->mmp_check_interval) > mmp_check_interval)
0308         mmp_check_interval = le16_to_cpu(mmp->mmp_check_interval);
0309 
0310     seq = le32_to_cpu(mmp->mmp_seq);
0311     if (seq == EXT4_MMP_SEQ_CLEAN)
0312         goto skip;
0313 
0314     if (seq == EXT4_MMP_SEQ_FSCK) {
0315         dump_mmp_msg(sb, mmp, "fsck is running on the filesystem");
0316         goto failed;
0317     }
0318 
0319     wait_time = min(mmp_check_interval * 2 + 1,
0320             mmp_check_interval + 60);
0321 
0322     /* Print MMP interval if more than 20 secs. */
0323     if (wait_time > EXT4_MMP_MIN_CHECK_INTERVAL * 4)
0324         ext4_warning(sb, "MMP interval %u higher than expected, please"
0325                  " wait.\n", wait_time * 2);
0326 
0327     if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
0328         ext4_warning(sb, "MMP startup interrupted, failing mount\n");
0329         goto failed;
0330     }
0331 
0332     retval = read_mmp_block(sb, &bh, mmp_block);
0333     if (retval)
0334         goto failed;
0335     mmp = (struct mmp_struct *)(bh->b_data);
0336     if (seq != le32_to_cpu(mmp->mmp_seq)) {
0337         dump_mmp_msg(sb, mmp,
0338                  "Device is already active on another node.");
0339         goto failed;
0340     }
0341 
0342 skip:
0343     /*
0344      * write a new random sequence number.
0345      */
0346     seq = mmp_new_seq();
0347     mmp->mmp_seq = cpu_to_le32(seq);
0348 
0349     retval = write_mmp_block(sb, bh);
0350     if (retval)
0351         goto failed;
0352 
0353     /*
0354      * wait for MMP interval and check mmp_seq.
0355      */
0356     if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
0357         ext4_warning(sb, "MMP startup interrupted, failing mount");
0358         goto failed;
0359     }
0360 
0361     retval = read_mmp_block(sb, &bh, mmp_block);
0362     if (retval)
0363         goto failed;
0364     mmp = (struct mmp_struct *)(bh->b_data);
0365     if (seq != le32_to_cpu(mmp->mmp_seq)) {
0366         dump_mmp_msg(sb, mmp,
0367                  "Device is already active on another node.");
0368         goto failed;
0369     }
0370 
0371     EXT4_SB(sb)->s_mmp_bh = bh;
0372 
0373     BUILD_BUG_ON(sizeof(mmp->mmp_bdevname) < BDEVNAME_SIZE);
0374     snprintf(mmp->mmp_bdevname, sizeof(mmp->mmp_bdevname),
0375          "%pg", bh->b_bdev);
0376 
0377     /*
0378      * Start a kernel thread to update the MMP block periodically.
0379      */
0380     EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, sb, "kmmpd-%.*s",
0381                          (int)sizeof(mmp->mmp_bdevname),
0382                          mmp->mmp_bdevname);
0383     if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) {
0384         EXT4_SB(sb)->s_mmp_tsk = NULL;
0385         ext4_warning(sb, "Unable to create kmmpd thread for %s.",
0386                  sb->s_id);
0387         goto failed;
0388     }
0389 
0390     return 0;
0391 
0392 failed:
0393     brelse(bh);
0394     return 1;
0395 }