Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * Copyright (C) 2007 Oracle.  All rights reserved.
0004  */
0005 
0006 #include <linux/blkdev.h>
0007 #include <linux/module.h>
0008 #include <linux/fs.h>
0009 #include <linux/pagemap.h>
0010 #include <linux/highmem.h>
0011 #include <linux/time.h>
0012 #include <linux/init.h>
0013 #include <linux/seq_file.h>
0014 #include <linux/string.h>
0015 #include <linux/backing-dev.h>
0016 #include <linux/mount.h>
0017 #include <linux/writeback.h>
0018 #include <linux/statfs.h>
0019 #include <linux/compat.h>
0020 #include <linux/parser.h>
0021 #include <linux/ctype.h>
0022 #include <linux/namei.h>
0023 #include <linux/miscdevice.h>
0024 #include <linux/magic.h>
0025 #include <linux/slab.h>
0026 #include <linux/ratelimit.h>
0027 #include <linux/crc32c.h>
0028 #include <linux/btrfs.h>
0029 #include "delayed-inode.h"
0030 #include "ctree.h"
0031 #include "disk-io.h"
0032 #include "transaction.h"
0033 #include "btrfs_inode.h"
0034 #include "print-tree.h"
0035 #include "props.h"
0036 #include "xattr.h"
0037 #include "volumes.h"
0038 #include "export.h"
0039 #include "compression.h"
0040 #include "rcu-string.h"
0041 #include "dev-replace.h"
0042 #include "free-space-cache.h"
0043 #include "backref.h"
0044 #include "space-info.h"
0045 #include "sysfs.h"
0046 #include "zoned.h"
0047 #include "tests/btrfs-tests.h"
0048 #include "block-group.h"
0049 #include "discard.h"
0050 #include "qgroup.h"
0051 #include "raid56.h"
0052 #define CREATE_TRACE_POINTS
0053 #include <trace/events/btrfs.h>
0054 
0055 static const struct super_operations btrfs_super_ops;
0056 
0057 /*
0058  * Types for mounting the default subvolume and a subvolume explicitly
0059  * requested by subvol=/path. That way the callchain is straightforward and we
0060  * don't have to play tricks with the mount options and recursive calls to
0061  * btrfs_mount.
0062  *
0063  * The new btrfs_root_fs_type also servers as a tag for the bdev_holder.
0064  */
0065 static struct file_system_type btrfs_fs_type;
0066 static struct file_system_type btrfs_root_fs_type;
0067 
0068 static int btrfs_remount(struct super_block *sb, int *flags, char *data);
0069 
0070 #ifdef CONFIG_PRINTK
0071 
0072 #define STATE_STRING_PREFACE    ": state "
0073 #define STATE_STRING_BUF_LEN    (sizeof(STATE_STRING_PREFACE) + BTRFS_FS_STATE_COUNT)
0074 
0075 /*
0076  * Characters to print to indicate error conditions or uncommon filesystem state.
0077  * RO is not an error.
0078  */
0079 static const char fs_state_chars[] = {
0080     [BTRFS_FS_STATE_ERROR]          = 'E',
0081     [BTRFS_FS_STATE_REMOUNTING]     = 'M',
0082     [BTRFS_FS_STATE_RO]         = 0,
0083     [BTRFS_FS_STATE_TRANS_ABORTED]      = 'A',
0084     [BTRFS_FS_STATE_DEV_REPLACING]      = 'R',
0085     [BTRFS_FS_STATE_DUMMY_FS_INFO]      = 0,
0086     [BTRFS_FS_STATE_NO_CSUMS]       = 'C',
0087     [BTRFS_FS_STATE_LOG_CLEANUP_ERROR]  = 'L',
0088 };
0089 
0090 static void btrfs_state_to_string(const struct btrfs_fs_info *info, char *buf)
0091 {
0092     unsigned int bit;
0093     bool states_printed = false;
0094     unsigned long fs_state = READ_ONCE(info->fs_state);
0095     char *curr = buf;
0096 
0097     memcpy(curr, STATE_STRING_PREFACE, sizeof(STATE_STRING_PREFACE));
0098     curr += sizeof(STATE_STRING_PREFACE) - 1;
0099 
0100     for_each_set_bit(bit, &fs_state, sizeof(fs_state)) {
0101         WARN_ON_ONCE(bit >= BTRFS_FS_STATE_COUNT);
0102         if ((bit < BTRFS_FS_STATE_COUNT) && fs_state_chars[bit]) {
0103             *curr++ = fs_state_chars[bit];
0104             states_printed = true;
0105         }
0106     }
0107 
0108     /* If no states were printed, reset the buffer */
0109     if (!states_printed)
0110         curr = buf;
0111 
0112     *curr++ = 0;
0113 }
0114 #endif
0115 
0116 /*
0117  * Generally the error codes correspond to their respective errors, but there
0118  * are a few special cases.
0119  *
0120  * EUCLEAN: Any sort of corruption that we encounter.  The tree-checker for
0121  *          instance will return EUCLEAN if any of the blocks are corrupted in
0122  *          a way that is problematic.  We want to reserve EUCLEAN for these
0123  *          sort of corruptions.
0124  *
0125  * EROFS: If we check BTRFS_FS_STATE_ERROR and fail out with a return error, we
0126  *        need to use EROFS for this case.  We will have no idea of the
0127  *        original failure, that will have been reported at the time we tripped
0128  *        over the error.  Each subsequent error that doesn't have any context
0129  *        of the original error should use EROFS when handling BTRFS_FS_STATE_ERROR.
0130  */
0131 const char * __attribute_const__ btrfs_decode_error(int errno)
0132 {
0133     char *errstr = "unknown";
0134 
0135     switch (errno) {
0136     case -ENOENT:       /* -2 */
0137         errstr = "No such entry";
0138         break;
0139     case -EIO:      /* -5 */
0140         errstr = "IO failure";
0141         break;
0142     case -ENOMEM:       /* -12*/
0143         errstr = "Out of memory";
0144         break;
0145     case -EEXIST:       /* -17 */
0146         errstr = "Object already exists";
0147         break;
0148     case -ENOSPC:       /* -28 */
0149         errstr = "No space left";
0150         break;
0151     case -EROFS:        /* -30 */
0152         errstr = "Readonly filesystem";
0153         break;
0154     case -EOPNOTSUPP:   /* -95 */
0155         errstr = "Operation not supported";
0156         break;
0157     case -EUCLEAN:      /* -117 */
0158         errstr = "Filesystem corrupted";
0159         break;
0160     case -EDQUOT:       /* -122 */
0161         errstr = "Quota exceeded";
0162         break;
0163     }
0164 
0165     return errstr;
0166 }
0167 
0168 /*
0169  * __btrfs_handle_fs_error decodes expected errors from the caller and
0170  * invokes the appropriate error response.
0171  */
0172 __cold
0173 void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function,
0174                unsigned int line, int errno, const char *fmt, ...)
0175 {
0176     struct super_block *sb = fs_info->sb;
0177 #ifdef CONFIG_PRINTK
0178     char statestr[STATE_STRING_BUF_LEN];
0179     const char *errstr;
0180 #endif
0181 
0182     /*
0183      * Special case: if the error is EROFS, and we're already
0184      * under SB_RDONLY, then it is safe here.
0185      */
0186     if (errno == -EROFS && sb_rdonly(sb))
0187         return;
0188 
0189 #ifdef CONFIG_PRINTK
0190     errstr = btrfs_decode_error(errno);
0191     btrfs_state_to_string(fs_info, statestr);
0192     if (fmt) {
0193         struct va_format vaf;
0194         va_list args;
0195 
0196         va_start(args, fmt);
0197         vaf.fmt = fmt;
0198         vaf.va = &args;
0199 
0200         pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s (%pV)\n",
0201             sb->s_id, statestr, function, line, errno, errstr, &vaf);
0202         va_end(args);
0203     } else {
0204         pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s\n",
0205             sb->s_id, statestr, function, line, errno, errstr);
0206     }
0207 #endif
0208 
0209     /*
0210      * Today we only save the error info to memory.  Long term we'll
0211      * also send it down to the disk
0212      */
0213     set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
0214 
0215     /* Don't go through full error handling during mount */
0216     if (!(sb->s_flags & SB_BORN))
0217         return;
0218 
0219     if (sb_rdonly(sb))
0220         return;
0221 
0222     btrfs_discard_stop(fs_info);
0223 
0224     /* btrfs handle error by forcing the filesystem readonly */
0225     btrfs_set_sb_rdonly(sb);
0226     btrfs_info(fs_info, "forced readonly");
0227     /*
0228      * Note that a running device replace operation is not canceled here
0229      * although there is no way to update the progress. It would add the
0230      * risk of a deadlock, therefore the canceling is omitted. The only
0231      * penalty is that some I/O remains active until the procedure
0232      * completes. The next time when the filesystem is mounted writable
0233      * again, the device replace operation continues.
0234      */
0235 }
0236 
0237 #ifdef CONFIG_PRINTK
0238 static const char * const logtypes[] = {
0239     "emergency",
0240     "alert",
0241     "critical",
0242     "error",
0243     "warning",
0244     "notice",
0245     "info",
0246     "debug",
0247 };
0248 
0249 
0250 /*
0251  * Use one ratelimit state per log level so that a flood of less important
0252  * messages doesn't cause more important ones to be dropped.
0253  */
0254 static struct ratelimit_state printk_limits[] = {
0255     RATELIMIT_STATE_INIT(printk_limits[0], DEFAULT_RATELIMIT_INTERVAL, 100),
0256     RATELIMIT_STATE_INIT(printk_limits[1], DEFAULT_RATELIMIT_INTERVAL, 100),
0257     RATELIMIT_STATE_INIT(printk_limits[2], DEFAULT_RATELIMIT_INTERVAL, 100),
0258     RATELIMIT_STATE_INIT(printk_limits[3], DEFAULT_RATELIMIT_INTERVAL, 100),
0259     RATELIMIT_STATE_INIT(printk_limits[4], DEFAULT_RATELIMIT_INTERVAL, 100),
0260     RATELIMIT_STATE_INIT(printk_limits[5], DEFAULT_RATELIMIT_INTERVAL, 100),
0261     RATELIMIT_STATE_INIT(printk_limits[6], DEFAULT_RATELIMIT_INTERVAL, 100),
0262     RATELIMIT_STATE_INIT(printk_limits[7], DEFAULT_RATELIMIT_INTERVAL, 100),
0263 };
0264 
0265 void __cold _btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
0266 {
0267     char lvl[PRINTK_MAX_SINGLE_HEADER_LEN + 1] = "\0";
0268     struct va_format vaf;
0269     va_list args;
0270     int kern_level;
0271     const char *type = logtypes[4];
0272     struct ratelimit_state *ratelimit = &printk_limits[4];
0273 
0274     va_start(args, fmt);
0275 
0276     while ((kern_level = printk_get_level(fmt)) != 0) {
0277         size_t size = printk_skip_level(fmt) - fmt;
0278 
0279         if (kern_level >= '0' && kern_level <= '7') {
0280             memcpy(lvl, fmt,  size);
0281             lvl[size] = '\0';
0282             type = logtypes[kern_level - '0'];
0283             ratelimit = &printk_limits[kern_level - '0'];
0284         }
0285         fmt += size;
0286     }
0287 
0288     vaf.fmt = fmt;
0289     vaf.va = &args;
0290 
0291     if (__ratelimit(ratelimit)) {
0292         if (fs_info) {
0293             char statestr[STATE_STRING_BUF_LEN];
0294 
0295             btrfs_state_to_string(fs_info, statestr);
0296             _printk("%sBTRFS %s (device %s%s): %pV\n", lvl, type,
0297                 fs_info->sb->s_id, statestr, &vaf);
0298         } else {
0299             _printk("%sBTRFS %s: %pV\n", lvl, type, &vaf);
0300         }
0301     }
0302 
0303     va_end(args);
0304 }
0305 #endif
0306 
0307 #if BITS_PER_LONG == 32
0308 void __cold btrfs_warn_32bit_limit(struct btrfs_fs_info *fs_info)
0309 {
0310     if (!test_and_set_bit(BTRFS_FS_32BIT_WARN, &fs_info->flags)) {
0311         btrfs_warn(fs_info, "reaching 32bit limit for logical addresses");
0312         btrfs_warn(fs_info,
0313 "due to page cache limit on 32bit systems, btrfs can't access metadata at or beyond %lluT",
0314                BTRFS_32BIT_MAX_FILE_SIZE >> 40);
0315         btrfs_warn(fs_info,
0316                "please consider upgrading to 64bit kernel/hardware");
0317     }
0318 }
0319 
0320 void __cold btrfs_err_32bit_limit(struct btrfs_fs_info *fs_info)
0321 {
0322     if (!test_and_set_bit(BTRFS_FS_32BIT_ERROR, &fs_info->flags)) {
0323         btrfs_err(fs_info, "reached 32bit limit for logical addresses");
0324         btrfs_err(fs_info,
0325 "due to page cache limit on 32bit systems, metadata beyond %lluT can't be accessed",
0326               BTRFS_32BIT_MAX_FILE_SIZE >> 40);
0327         btrfs_err(fs_info,
0328                "please consider upgrading to 64bit kernel/hardware");
0329     }
0330 }
0331 #endif
0332 
0333 /*
0334  * We only mark the transaction aborted and then set the file system read-only.
0335  * This will prevent new transactions from starting or trying to join this
0336  * one.
0337  *
0338  * This means that error recovery at the call site is limited to freeing
0339  * any local memory allocations and passing the error code up without
0340  * further cleanup. The transaction should complete as it normally would
0341  * in the call path but will return -EIO.
0342  *
0343  * We'll complete the cleanup in btrfs_end_transaction and
0344  * btrfs_commit_transaction.
0345  */
0346 __cold
0347 void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
0348                    const char *function,
0349                    unsigned int line, int errno)
0350 {
0351     struct btrfs_fs_info *fs_info = trans->fs_info;
0352 
0353     WRITE_ONCE(trans->aborted, errno);
0354     WRITE_ONCE(trans->transaction->aborted, errno);
0355     /* Wake up anybody who may be waiting on this transaction */
0356     wake_up(&fs_info->transaction_wait);
0357     wake_up(&fs_info->transaction_blocked_wait);
0358     __btrfs_handle_fs_error(fs_info, function, line, errno, NULL);
0359 }
0360 /*
0361  * __btrfs_panic decodes unexpected, fatal errors from the caller,
0362  * issues an alert, and either panics or BUGs, depending on mount options.
0363  */
0364 __cold
0365 void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
0366            unsigned int line, int errno, const char *fmt, ...)
0367 {
0368     char *s_id = "<unknown>";
0369     const char *errstr;
0370     struct va_format vaf = { .fmt = fmt };
0371     va_list args;
0372 
0373     if (fs_info)
0374         s_id = fs_info->sb->s_id;
0375 
0376     va_start(args, fmt);
0377     vaf.va = &args;
0378 
0379     errstr = btrfs_decode_error(errno);
0380     if (fs_info && (btrfs_test_opt(fs_info, PANIC_ON_FATAL_ERROR)))
0381         panic(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (errno=%d %s)\n",
0382             s_id, function, line, &vaf, errno, errstr);
0383 
0384     btrfs_crit(fs_info, "panic in %s:%d: %pV (errno=%d %s)",
0385            function, line, &vaf, errno, errstr);
0386     va_end(args);
0387     /* Caller calls BUG() */
0388 }
0389 
0390 static void btrfs_put_super(struct super_block *sb)
0391 {
0392     close_ctree(btrfs_sb(sb));
0393 }
0394 
0395 enum {
0396     Opt_acl, Opt_noacl,
0397     Opt_clear_cache,
0398     Opt_commit_interval,
0399     Opt_compress,
0400     Opt_compress_force,
0401     Opt_compress_force_type,
0402     Opt_compress_type,
0403     Opt_degraded,
0404     Opt_device,
0405     Opt_fatal_errors,
0406     Opt_flushoncommit, Opt_noflushoncommit,
0407     Opt_max_inline,
0408     Opt_barrier, Opt_nobarrier,
0409     Opt_datacow, Opt_nodatacow,
0410     Opt_datasum, Opt_nodatasum,
0411     Opt_defrag, Opt_nodefrag,
0412     Opt_discard, Opt_nodiscard,
0413     Opt_discard_mode,
0414     Opt_norecovery,
0415     Opt_ratio,
0416     Opt_rescan_uuid_tree,
0417     Opt_skip_balance,
0418     Opt_space_cache, Opt_no_space_cache,
0419     Opt_space_cache_version,
0420     Opt_ssd, Opt_nossd,
0421     Opt_ssd_spread, Opt_nossd_spread,
0422     Opt_subvol,
0423     Opt_subvol_empty,
0424     Opt_subvolid,
0425     Opt_thread_pool,
0426     Opt_treelog, Opt_notreelog,
0427     Opt_user_subvol_rm_allowed,
0428 
0429     /* Rescue options */
0430     Opt_rescue,
0431     Opt_usebackuproot,
0432     Opt_nologreplay,
0433     Opt_ignorebadroots,
0434     Opt_ignoredatacsums,
0435     Opt_rescue_all,
0436 
0437     /* Deprecated options */
0438     Opt_recovery,
0439     Opt_inode_cache, Opt_noinode_cache,
0440 
0441     /* Debugging options */
0442     Opt_check_integrity,
0443     Opt_check_integrity_including_extent_data,
0444     Opt_check_integrity_print_mask,
0445     Opt_enospc_debug, Opt_noenospc_debug,
0446 #ifdef CONFIG_BTRFS_DEBUG
0447     Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all,
0448 #endif
0449 #ifdef CONFIG_BTRFS_FS_REF_VERIFY
0450     Opt_ref_verify,
0451 #endif
0452     Opt_err,
0453 };
0454 
0455 static const match_table_t tokens = {
0456     {Opt_acl, "acl"},
0457     {Opt_noacl, "noacl"},
0458     {Opt_clear_cache, "clear_cache"},
0459     {Opt_commit_interval, "commit=%u"},
0460     {Opt_compress, "compress"},
0461     {Opt_compress_type, "compress=%s"},
0462     {Opt_compress_force, "compress-force"},
0463     {Opt_compress_force_type, "compress-force=%s"},
0464     {Opt_degraded, "degraded"},
0465     {Opt_device, "device=%s"},
0466     {Opt_fatal_errors, "fatal_errors=%s"},
0467     {Opt_flushoncommit, "flushoncommit"},
0468     {Opt_noflushoncommit, "noflushoncommit"},
0469     {Opt_inode_cache, "inode_cache"},
0470     {Opt_noinode_cache, "noinode_cache"},
0471     {Opt_max_inline, "max_inline=%s"},
0472     {Opt_barrier, "barrier"},
0473     {Opt_nobarrier, "nobarrier"},
0474     {Opt_datacow, "datacow"},
0475     {Opt_nodatacow, "nodatacow"},
0476     {Opt_datasum, "datasum"},
0477     {Opt_nodatasum, "nodatasum"},
0478     {Opt_defrag, "autodefrag"},
0479     {Opt_nodefrag, "noautodefrag"},
0480     {Opt_discard, "discard"},
0481     {Opt_discard_mode, "discard=%s"},
0482     {Opt_nodiscard, "nodiscard"},
0483     {Opt_norecovery, "norecovery"},
0484     {Opt_ratio, "metadata_ratio=%u"},
0485     {Opt_rescan_uuid_tree, "rescan_uuid_tree"},
0486     {Opt_skip_balance, "skip_balance"},
0487     {Opt_space_cache, "space_cache"},
0488     {Opt_no_space_cache, "nospace_cache"},
0489     {Opt_space_cache_version, "space_cache=%s"},
0490     {Opt_ssd, "ssd"},
0491     {Opt_nossd, "nossd"},
0492     {Opt_ssd_spread, "ssd_spread"},
0493     {Opt_nossd_spread, "nossd_spread"},
0494     {Opt_subvol, "subvol=%s"},
0495     {Opt_subvol_empty, "subvol="},
0496     {Opt_subvolid, "subvolid=%s"},
0497     {Opt_thread_pool, "thread_pool=%u"},
0498     {Opt_treelog, "treelog"},
0499     {Opt_notreelog, "notreelog"},
0500     {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
0501 
0502     /* Rescue options */
0503     {Opt_rescue, "rescue=%s"},
0504     /* Deprecated, with alias rescue=nologreplay */
0505     {Opt_nologreplay, "nologreplay"},
0506     /* Deprecated, with alias rescue=usebackuproot */
0507     {Opt_usebackuproot, "usebackuproot"},
0508 
0509     /* Deprecated options */
0510     {Opt_recovery, "recovery"},
0511 
0512     /* Debugging options */
0513     {Opt_check_integrity, "check_int"},
0514     {Opt_check_integrity_including_extent_data, "check_int_data"},
0515     {Opt_check_integrity_print_mask, "check_int_print_mask=%u"},
0516     {Opt_enospc_debug, "enospc_debug"},
0517     {Opt_noenospc_debug, "noenospc_debug"},
0518 #ifdef CONFIG_BTRFS_DEBUG
0519     {Opt_fragment_data, "fragment=data"},
0520     {Opt_fragment_metadata, "fragment=metadata"},
0521     {Opt_fragment_all, "fragment=all"},
0522 #endif
0523 #ifdef CONFIG_BTRFS_FS_REF_VERIFY
0524     {Opt_ref_verify, "ref_verify"},
0525 #endif
0526     {Opt_err, NULL},
0527 };
0528 
0529 static const match_table_t rescue_tokens = {
0530     {Opt_usebackuproot, "usebackuproot"},
0531     {Opt_nologreplay, "nologreplay"},
0532     {Opt_ignorebadroots, "ignorebadroots"},
0533     {Opt_ignorebadroots, "ibadroots"},
0534     {Opt_ignoredatacsums, "ignoredatacsums"},
0535     {Opt_ignoredatacsums, "idatacsums"},
0536     {Opt_rescue_all, "all"},
0537     {Opt_err, NULL},
0538 };
0539 
0540 static bool check_ro_option(struct btrfs_fs_info *fs_info, unsigned long opt,
0541                 const char *opt_name)
0542 {
0543     if (fs_info->mount_opt & opt) {
0544         btrfs_err(fs_info, "%s must be used with ro mount option",
0545               opt_name);
0546         return true;
0547     }
0548     return false;
0549 }
0550 
0551 static int parse_rescue_options(struct btrfs_fs_info *info, const char *options)
0552 {
0553     char *opts;
0554     char *orig;
0555     char *p;
0556     substring_t args[MAX_OPT_ARGS];
0557     int ret = 0;
0558 
0559     opts = kstrdup(options, GFP_KERNEL);
0560     if (!opts)
0561         return -ENOMEM;
0562     orig = opts;
0563 
0564     while ((p = strsep(&opts, ":")) != NULL) {
0565         int token;
0566 
0567         if (!*p)
0568             continue;
0569         token = match_token(p, rescue_tokens, args);
0570         switch (token){
0571         case Opt_usebackuproot:
0572             btrfs_info(info,
0573                    "trying to use backup root at mount time");
0574             btrfs_set_opt(info->mount_opt, USEBACKUPROOT);
0575             break;
0576         case Opt_nologreplay:
0577             btrfs_set_and_info(info, NOLOGREPLAY,
0578                        "disabling log replay at mount time");
0579             break;
0580         case Opt_ignorebadroots:
0581             btrfs_set_and_info(info, IGNOREBADROOTS,
0582                        "ignoring bad roots");
0583             break;
0584         case Opt_ignoredatacsums:
0585             btrfs_set_and_info(info, IGNOREDATACSUMS,
0586                        "ignoring data csums");
0587             break;
0588         case Opt_rescue_all:
0589             btrfs_info(info, "enabling all of the rescue options");
0590             btrfs_set_and_info(info, IGNOREDATACSUMS,
0591                        "ignoring data csums");
0592             btrfs_set_and_info(info, IGNOREBADROOTS,
0593                        "ignoring bad roots");
0594             btrfs_set_and_info(info, NOLOGREPLAY,
0595                        "disabling log replay at mount time");
0596             break;
0597         case Opt_err:
0598             btrfs_info(info, "unrecognized rescue option '%s'", p);
0599             ret = -EINVAL;
0600             goto out;
0601         default:
0602             break;
0603         }
0604 
0605     }
0606 out:
0607     kfree(orig);
0608     return ret;
0609 }
0610 
0611 /*
0612  * Regular mount options parser.  Everything that is needed only when
0613  * reading in a new superblock is parsed here.
0614  * XXX JDM: This needs to be cleaned up for remount.
0615  */
0616 int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
0617             unsigned long new_flags)
0618 {
0619     substring_t args[MAX_OPT_ARGS];
0620     char *p, *num;
0621     int intarg;
0622     int ret = 0;
0623     char *compress_type;
0624     bool compress_force = false;
0625     enum btrfs_compression_type saved_compress_type;
0626     int saved_compress_level;
0627     bool saved_compress_force;
0628     int no_compress = 0;
0629 
0630     if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
0631         btrfs_set_opt(info->mount_opt, FREE_SPACE_TREE);
0632     else if (btrfs_free_space_cache_v1_active(info)) {
0633         if (btrfs_is_zoned(info)) {
0634             btrfs_info(info,
0635             "zoned: clearing existing space cache");
0636             btrfs_set_super_cache_generation(info->super_copy, 0);
0637         } else {
0638             btrfs_set_opt(info->mount_opt, SPACE_CACHE);
0639         }
0640     }
0641 
0642     /*
0643      * Even the options are empty, we still need to do extra check
0644      * against new flags
0645      */
0646     if (!options)
0647         goto check;
0648 
0649     while ((p = strsep(&options, ",")) != NULL) {
0650         int token;
0651         if (!*p)
0652             continue;
0653 
0654         token = match_token(p, tokens, args);
0655         switch (token) {
0656         case Opt_degraded:
0657             btrfs_info(info, "allowing degraded mounts");
0658             btrfs_set_opt(info->mount_opt, DEGRADED);
0659             break;
0660         case Opt_subvol:
0661         case Opt_subvol_empty:
0662         case Opt_subvolid:
0663         case Opt_device:
0664             /*
0665              * These are parsed by btrfs_parse_subvol_options or
0666              * btrfs_parse_device_options and can be ignored here.
0667              */
0668             break;
0669         case Opt_nodatasum:
0670             btrfs_set_and_info(info, NODATASUM,
0671                        "setting nodatasum");
0672             break;
0673         case Opt_datasum:
0674             if (btrfs_test_opt(info, NODATASUM)) {
0675                 if (btrfs_test_opt(info, NODATACOW))
0676                     btrfs_info(info,
0677                            "setting datasum, datacow enabled");
0678                 else
0679                     btrfs_info(info, "setting datasum");
0680             }
0681             btrfs_clear_opt(info->mount_opt, NODATACOW);
0682             btrfs_clear_opt(info->mount_opt, NODATASUM);
0683             break;
0684         case Opt_nodatacow:
0685             if (!btrfs_test_opt(info, NODATACOW)) {
0686                 if (!btrfs_test_opt(info, COMPRESS) ||
0687                     !btrfs_test_opt(info, FORCE_COMPRESS)) {
0688                     btrfs_info(info,
0689                            "setting nodatacow, compression disabled");
0690                 } else {
0691                     btrfs_info(info, "setting nodatacow");
0692                 }
0693             }
0694             btrfs_clear_opt(info->mount_opt, COMPRESS);
0695             btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
0696             btrfs_set_opt(info->mount_opt, NODATACOW);
0697             btrfs_set_opt(info->mount_opt, NODATASUM);
0698             break;
0699         case Opt_datacow:
0700             btrfs_clear_and_info(info, NODATACOW,
0701                          "setting datacow");
0702             break;
0703         case Opt_compress_force:
0704         case Opt_compress_force_type:
0705             compress_force = true;
0706             fallthrough;
0707         case Opt_compress:
0708         case Opt_compress_type:
0709             saved_compress_type = btrfs_test_opt(info,
0710                                  COMPRESS) ?
0711                 info->compress_type : BTRFS_COMPRESS_NONE;
0712             saved_compress_force =
0713                 btrfs_test_opt(info, FORCE_COMPRESS);
0714             saved_compress_level = info->compress_level;
0715             if (token == Opt_compress ||
0716                 token == Opt_compress_force ||
0717                 strncmp(args[0].from, "zlib", 4) == 0) {
0718                 compress_type = "zlib";
0719 
0720                 info->compress_type = BTRFS_COMPRESS_ZLIB;
0721                 info->compress_level = BTRFS_ZLIB_DEFAULT_LEVEL;
0722                 /*
0723                  * args[0] contains uninitialized data since
0724                  * for these tokens we don't expect any
0725                  * parameter.
0726                  */
0727                 if (token != Opt_compress &&
0728                     token != Opt_compress_force)
0729                     info->compress_level =
0730                       btrfs_compress_str2level(
0731                             BTRFS_COMPRESS_ZLIB,
0732                             args[0].from + 4);
0733                 btrfs_set_opt(info->mount_opt, COMPRESS);
0734                 btrfs_clear_opt(info->mount_opt, NODATACOW);
0735                 btrfs_clear_opt(info->mount_opt, NODATASUM);
0736                 no_compress = 0;
0737             } else if (strncmp(args[0].from, "lzo", 3) == 0) {
0738                 compress_type = "lzo";
0739                 info->compress_type = BTRFS_COMPRESS_LZO;
0740                 info->compress_level = 0;
0741                 btrfs_set_opt(info->mount_opt, COMPRESS);
0742                 btrfs_clear_opt(info->mount_opt, NODATACOW);
0743                 btrfs_clear_opt(info->mount_opt, NODATASUM);
0744                 btrfs_set_fs_incompat(info, COMPRESS_LZO);
0745                 no_compress = 0;
0746             } else if (strncmp(args[0].from, "zstd", 4) == 0) {
0747                 compress_type = "zstd";
0748                 info->compress_type = BTRFS_COMPRESS_ZSTD;
0749                 info->compress_level =
0750                     btrfs_compress_str2level(
0751                              BTRFS_COMPRESS_ZSTD,
0752                              args[0].from + 4);
0753                 btrfs_set_opt(info->mount_opt, COMPRESS);
0754                 btrfs_clear_opt(info->mount_opt, NODATACOW);
0755                 btrfs_clear_opt(info->mount_opt, NODATASUM);
0756                 btrfs_set_fs_incompat(info, COMPRESS_ZSTD);
0757                 no_compress = 0;
0758             } else if (strncmp(args[0].from, "no", 2) == 0) {
0759                 compress_type = "no";
0760                 info->compress_level = 0;
0761                 info->compress_type = 0;
0762                 btrfs_clear_opt(info->mount_opt, COMPRESS);
0763                 btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
0764                 compress_force = false;
0765                 no_compress++;
0766             } else {
0767                 btrfs_err(info, "unrecognized compression value %s",
0768                       args[0].from);
0769                 ret = -EINVAL;
0770                 goto out;
0771             }
0772 
0773             if (compress_force) {
0774                 btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
0775             } else {
0776                 /*
0777                  * If we remount from compress-force=xxx to
0778                  * compress=xxx, we need clear FORCE_COMPRESS
0779                  * flag, otherwise, there is no way for users
0780                  * to disable forcible compression separately.
0781                  */
0782                 btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
0783             }
0784             if (no_compress == 1) {
0785                 btrfs_info(info, "use no compression");
0786             } else if ((info->compress_type != saved_compress_type) ||
0787                    (compress_force != saved_compress_force) ||
0788                    (info->compress_level != saved_compress_level)) {
0789                 btrfs_info(info, "%s %s compression, level %d",
0790                        (compress_force) ? "force" : "use",
0791                        compress_type, info->compress_level);
0792             }
0793             compress_force = false;
0794             break;
0795         case Opt_ssd:
0796             btrfs_set_and_info(info, SSD,
0797                        "enabling ssd optimizations");
0798             btrfs_clear_opt(info->mount_opt, NOSSD);
0799             break;
0800         case Opt_ssd_spread:
0801             btrfs_set_and_info(info, SSD,
0802                        "enabling ssd optimizations");
0803             btrfs_set_and_info(info, SSD_SPREAD,
0804                        "using spread ssd allocation scheme");
0805             btrfs_clear_opt(info->mount_opt, NOSSD);
0806             break;
0807         case Opt_nossd:
0808             btrfs_set_opt(info->mount_opt, NOSSD);
0809             btrfs_clear_and_info(info, SSD,
0810                          "not using ssd optimizations");
0811             fallthrough;
0812         case Opt_nossd_spread:
0813             btrfs_clear_and_info(info, SSD_SPREAD,
0814                          "not using spread ssd allocation scheme");
0815             break;
0816         case Opt_barrier:
0817             btrfs_clear_and_info(info, NOBARRIER,
0818                          "turning on barriers");
0819             break;
0820         case Opt_nobarrier:
0821             btrfs_set_and_info(info, NOBARRIER,
0822                        "turning off barriers");
0823             break;
0824         case Opt_thread_pool:
0825             ret = match_int(&args[0], &intarg);
0826             if (ret) {
0827                 btrfs_err(info, "unrecognized thread_pool value %s",
0828                       args[0].from);
0829                 goto out;
0830             } else if (intarg == 0) {
0831                 btrfs_err(info, "invalid value 0 for thread_pool");
0832                 ret = -EINVAL;
0833                 goto out;
0834             }
0835             info->thread_pool_size = intarg;
0836             break;
0837         case Opt_max_inline:
0838             num = match_strdup(&args[0]);
0839             if (num) {
0840                 info->max_inline = memparse(num, NULL);
0841                 kfree(num);
0842 
0843                 if (info->max_inline) {
0844                     info->max_inline = min_t(u64,
0845                         info->max_inline,
0846                         info->sectorsize);
0847                 }
0848                 btrfs_info(info, "max_inline at %llu",
0849                        info->max_inline);
0850             } else {
0851                 ret = -ENOMEM;
0852                 goto out;
0853             }
0854             break;
0855         case Opt_acl:
0856 #ifdef CONFIG_BTRFS_FS_POSIX_ACL
0857             info->sb->s_flags |= SB_POSIXACL;
0858             break;
0859 #else
0860             btrfs_err(info, "support for ACL not compiled in!");
0861             ret = -EINVAL;
0862             goto out;
0863 #endif
0864         case Opt_noacl:
0865             info->sb->s_flags &= ~SB_POSIXACL;
0866             break;
0867         case Opt_notreelog:
0868             btrfs_set_and_info(info, NOTREELOG,
0869                        "disabling tree log");
0870             break;
0871         case Opt_treelog:
0872             btrfs_clear_and_info(info, NOTREELOG,
0873                          "enabling tree log");
0874             break;
0875         case Opt_norecovery:
0876         case Opt_nologreplay:
0877             btrfs_warn(info,
0878         "'nologreplay' is deprecated, use 'rescue=nologreplay' instead");
0879             btrfs_set_and_info(info, NOLOGREPLAY,
0880                        "disabling log replay at mount time");
0881             break;
0882         case Opt_flushoncommit:
0883             btrfs_set_and_info(info, FLUSHONCOMMIT,
0884                        "turning on flush-on-commit");
0885             break;
0886         case Opt_noflushoncommit:
0887             btrfs_clear_and_info(info, FLUSHONCOMMIT,
0888                          "turning off flush-on-commit");
0889             break;
0890         case Opt_ratio:
0891             ret = match_int(&args[0], &intarg);
0892             if (ret) {
0893                 btrfs_err(info, "unrecognized metadata_ratio value %s",
0894                       args[0].from);
0895                 goto out;
0896             }
0897             info->metadata_ratio = intarg;
0898             btrfs_info(info, "metadata ratio %u",
0899                    info->metadata_ratio);
0900             break;
0901         case Opt_discard:
0902         case Opt_discard_mode:
0903             if (token == Opt_discard ||
0904                 strcmp(args[0].from, "sync") == 0) {
0905                 btrfs_clear_opt(info->mount_opt, DISCARD_ASYNC);
0906                 btrfs_set_and_info(info, DISCARD_SYNC,
0907                            "turning on sync discard");
0908             } else if (strcmp(args[0].from, "async") == 0) {
0909                 btrfs_clear_opt(info->mount_opt, DISCARD_SYNC);
0910                 btrfs_set_and_info(info, DISCARD_ASYNC,
0911                            "turning on async discard");
0912             } else {
0913                 btrfs_err(info, "unrecognized discard mode value %s",
0914                       args[0].from);
0915                 ret = -EINVAL;
0916                 goto out;
0917             }
0918             break;
0919         case Opt_nodiscard:
0920             btrfs_clear_and_info(info, DISCARD_SYNC,
0921                          "turning off discard");
0922             btrfs_clear_and_info(info, DISCARD_ASYNC,
0923                          "turning off async discard");
0924             break;
0925         case Opt_space_cache:
0926         case Opt_space_cache_version:
0927             /*
0928              * We already set FREE_SPACE_TREE above because we have
0929              * compat_ro(FREE_SPACE_TREE) set, and we aren't going
0930              * to allow v1 to be set for extent tree v2, simply
0931              * ignore this setting if we're extent tree v2.
0932              */
0933             if (btrfs_fs_incompat(info, EXTENT_TREE_V2))
0934                 break;
0935             if (token == Opt_space_cache ||
0936                 strcmp(args[0].from, "v1") == 0) {
0937                 btrfs_clear_opt(info->mount_opt,
0938                         FREE_SPACE_TREE);
0939                 btrfs_set_and_info(info, SPACE_CACHE,
0940                        "enabling disk space caching");
0941             } else if (strcmp(args[0].from, "v2") == 0) {
0942                 btrfs_clear_opt(info->mount_opt,
0943                         SPACE_CACHE);
0944                 btrfs_set_and_info(info, FREE_SPACE_TREE,
0945                            "enabling free space tree");
0946             } else {
0947                 btrfs_err(info, "unrecognized space_cache value %s",
0948                       args[0].from);
0949                 ret = -EINVAL;
0950                 goto out;
0951             }
0952             break;
0953         case Opt_rescan_uuid_tree:
0954             btrfs_set_opt(info->mount_opt, RESCAN_UUID_TREE);
0955             break;
0956         case Opt_no_space_cache:
0957             /*
0958              * We cannot operate without the free space tree with
0959              * extent tree v2, ignore this option.
0960              */
0961             if (btrfs_fs_incompat(info, EXTENT_TREE_V2))
0962                 break;
0963             if (btrfs_test_opt(info, SPACE_CACHE)) {
0964                 btrfs_clear_and_info(info, SPACE_CACHE,
0965                          "disabling disk space caching");
0966             }
0967             if (btrfs_test_opt(info, FREE_SPACE_TREE)) {
0968                 btrfs_clear_and_info(info, FREE_SPACE_TREE,
0969                          "disabling free space tree");
0970             }
0971             break;
0972         case Opt_inode_cache:
0973         case Opt_noinode_cache:
0974             btrfs_warn(info,
0975     "the 'inode_cache' option is deprecated and has no effect since 5.11");
0976             break;
0977         case Opt_clear_cache:
0978             /*
0979              * We cannot clear the free space tree with extent tree
0980              * v2, ignore this option.
0981              */
0982             if (btrfs_fs_incompat(info, EXTENT_TREE_V2))
0983                 break;
0984             btrfs_set_and_info(info, CLEAR_CACHE,
0985                        "force clearing of disk cache");
0986             break;
0987         case Opt_user_subvol_rm_allowed:
0988             btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
0989             break;
0990         case Opt_enospc_debug:
0991             btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG);
0992             break;
0993         case Opt_noenospc_debug:
0994             btrfs_clear_opt(info->mount_opt, ENOSPC_DEBUG);
0995             break;
0996         case Opt_defrag:
0997             btrfs_set_and_info(info, AUTO_DEFRAG,
0998                        "enabling auto defrag");
0999             break;
1000         case Opt_nodefrag:
1001             btrfs_clear_and_info(info, AUTO_DEFRAG,
1002                          "disabling auto defrag");
1003             break;
1004         case Opt_recovery:
1005         case Opt_usebackuproot:
1006             btrfs_warn(info,
1007             "'%s' is deprecated, use 'rescue=usebackuproot' instead",
1008                    token == Opt_recovery ? "recovery" :
1009                    "usebackuproot");
1010             btrfs_info(info,
1011                    "trying to use backup root at mount time");
1012             btrfs_set_opt(info->mount_opt, USEBACKUPROOT);
1013             break;
1014         case Opt_skip_balance:
1015             btrfs_set_opt(info->mount_opt, SKIP_BALANCE);
1016             break;
1017 #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
1018         case Opt_check_integrity_including_extent_data:
1019             btrfs_info(info,
1020                    "enabling check integrity including extent data");
1021             btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY_DATA);
1022             btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
1023             break;
1024         case Opt_check_integrity:
1025             btrfs_info(info, "enabling check integrity");
1026             btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
1027             break;
1028         case Opt_check_integrity_print_mask:
1029             ret = match_int(&args[0], &intarg);
1030             if (ret) {
1031                 btrfs_err(info,
1032                 "unrecognized check_integrity_print_mask value %s",
1033                     args[0].from);
1034                 goto out;
1035             }
1036             info->check_integrity_print_mask = intarg;
1037             btrfs_info(info, "check_integrity_print_mask 0x%x",
1038                    info->check_integrity_print_mask);
1039             break;
1040 #else
1041         case Opt_check_integrity_including_extent_data:
1042         case Opt_check_integrity:
1043         case Opt_check_integrity_print_mask:
1044             btrfs_err(info,
1045                   "support for check_integrity* not compiled in!");
1046             ret = -EINVAL;
1047             goto out;
1048 #endif
1049         case Opt_fatal_errors:
1050             if (strcmp(args[0].from, "panic") == 0) {
1051                 btrfs_set_opt(info->mount_opt,
1052                           PANIC_ON_FATAL_ERROR);
1053             } else if (strcmp(args[0].from, "bug") == 0) {
1054                 btrfs_clear_opt(info->mount_opt,
1055                           PANIC_ON_FATAL_ERROR);
1056             } else {
1057                 btrfs_err(info, "unrecognized fatal_errors value %s",
1058                       args[0].from);
1059                 ret = -EINVAL;
1060                 goto out;
1061             }
1062             break;
1063         case Opt_commit_interval:
1064             intarg = 0;
1065             ret = match_int(&args[0], &intarg);
1066             if (ret) {
1067                 btrfs_err(info, "unrecognized commit_interval value %s",
1068                       args[0].from);
1069                 ret = -EINVAL;
1070                 goto out;
1071             }
1072             if (intarg == 0) {
1073                 btrfs_info(info,
1074                        "using default commit interval %us",
1075                        BTRFS_DEFAULT_COMMIT_INTERVAL);
1076                 intarg = BTRFS_DEFAULT_COMMIT_INTERVAL;
1077             } else if (intarg > 300) {
1078                 btrfs_warn(info, "excessive commit interval %d",
1079                        intarg);
1080             }
1081             info->commit_interval = intarg;
1082             break;
1083         case Opt_rescue:
1084             ret = parse_rescue_options(info, args[0].from);
1085             if (ret < 0) {
1086                 btrfs_err(info, "unrecognized rescue value %s",
1087                       args[0].from);
1088                 goto out;
1089             }
1090             break;
1091 #ifdef CONFIG_BTRFS_DEBUG
1092         case Opt_fragment_all:
1093             btrfs_info(info, "fragmenting all space");
1094             btrfs_set_opt(info->mount_opt, FRAGMENT_DATA);
1095             btrfs_set_opt(info->mount_opt, FRAGMENT_METADATA);
1096             break;
1097         case Opt_fragment_metadata:
1098             btrfs_info(info, "fragmenting metadata");
1099             btrfs_set_opt(info->mount_opt,
1100                       FRAGMENT_METADATA);
1101             break;
1102         case Opt_fragment_data:
1103             btrfs_info(info, "fragmenting data");
1104             btrfs_set_opt(info->mount_opt, FRAGMENT_DATA);
1105             break;
1106 #endif
1107 #ifdef CONFIG_BTRFS_FS_REF_VERIFY
1108         case Opt_ref_verify:
1109             btrfs_info(info, "doing ref verification");
1110             btrfs_set_opt(info->mount_opt, REF_VERIFY);
1111             break;
1112 #endif
1113         case Opt_err:
1114             btrfs_err(info, "unrecognized mount option '%s'", p);
1115             ret = -EINVAL;
1116             goto out;
1117         default:
1118             break;
1119         }
1120     }
1121 check:
1122     /* We're read-only, don't have to check. */
1123     if (new_flags & SB_RDONLY)
1124         goto out;
1125 
1126     if (check_ro_option(info, BTRFS_MOUNT_NOLOGREPLAY, "nologreplay") ||
1127         check_ro_option(info, BTRFS_MOUNT_IGNOREBADROOTS, "ignorebadroots") ||
1128         check_ro_option(info, BTRFS_MOUNT_IGNOREDATACSUMS, "ignoredatacsums"))
1129         ret = -EINVAL;
1130 out:
1131     if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE) &&
1132         !btrfs_test_opt(info, FREE_SPACE_TREE) &&
1133         !btrfs_test_opt(info, CLEAR_CACHE)) {
1134         btrfs_err(info, "cannot disable free space tree");
1135         ret = -EINVAL;
1136 
1137     }
1138     if (!ret)
1139         ret = btrfs_check_mountopts_zoned(info);
1140     if (!ret && btrfs_test_opt(info, SPACE_CACHE))
1141         btrfs_info(info, "disk space caching is enabled");
1142     if (!ret && btrfs_test_opt(info, FREE_SPACE_TREE))
1143         btrfs_info(info, "using free space tree");
1144     return ret;
1145 }
1146 
1147 /*
1148  * Parse mount options that are required early in the mount process.
1149  *
1150  * All other options will be parsed on much later in the mount process and
1151  * only when we need to allocate a new super block.
1152  */
1153 static int btrfs_parse_device_options(const char *options, fmode_t flags,
1154                       void *holder)
1155 {
1156     substring_t args[MAX_OPT_ARGS];
1157     char *device_name, *opts, *orig, *p;
1158     struct btrfs_device *device = NULL;
1159     int error = 0;
1160 
1161     lockdep_assert_held(&uuid_mutex);
1162 
1163     if (!options)
1164         return 0;
1165 
1166     /*
1167      * strsep changes the string, duplicate it because btrfs_parse_options
1168      * gets called later
1169      */
1170     opts = kstrdup(options, GFP_KERNEL);
1171     if (!opts)
1172         return -ENOMEM;
1173     orig = opts;
1174 
1175     while ((p = strsep(&opts, ",")) != NULL) {
1176         int token;
1177 
1178         if (!*p)
1179             continue;
1180 
1181         token = match_token(p, tokens, args);
1182         if (token == Opt_device) {
1183             device_name = match_strdup(&args[0]);
1184             if (!device_name) {
1185                 error = -ENOMEM;
1186                 goto out;
1187             }
1188             device = btrfs_scan_one_device(device_name, flags,
1189                     holder);
1190             kfree(device_name);
1191             if (IS_ERR(device)) {
1192                 error = PTR_ERR(device);
1193                 goto out;
1194             }
1195         }
1196     }
1197 
1198 out:
1199     kfree(orig);
1200     return error;
1201 }
1202 
1203 /*
1204  * Parse mount options that are related to subvolume id
1205  *
1206  * The value is later passed to mount_subvol()
1207  */
1208 static int btrfs_parse_subvol_options(const char *options, char **subvol_name,
1209         u64 *subvol_objectid)
1210 {
1211     substring_t args[MAX_OPT_ARGS];
1212     char *opts, *orig, *p;
1213     int error = 0;
1214     u64 subvolid;
1215 
1216     if (!options)
1217         return 0;
1218 
1219     /*
1220      * strsep changes the string, duplicate it because
1221      * btrfs_parse_device_options gets called later
1222      */
1223     opts = kstrdup(options, GFP_KERNEL);
1224     if (!opts)
1225         return -ENOMEM;
1226     orig = opts;
1227 
1228     while ((p = strsep(&opts, ",")) != NULL) {
1229         int token;
1230         if (!*p)
1231             continue;
1232 
1233         token = match_token(p, tokens, args);
1234         switch (token) {
1235         case Opt_subvol:
1236             kfree(*subvol_name);
1237             *subvol_name = match_strdup(&args[0]);
1238             if (!*subvol_name) {
1239                 error = -ENOMEM;
1240                 goto out;
1241             }
1242             break;
1243         case Opt_subvolid:
1244             error = match_u64(&args[0], &subvolid);
1245             if (error)
1246                 goto out;
1247 
1248             /* we want the original fs_tree */
1249             if (subvolid == 0)
1250                 subvolid = BTRFS_FS_TREE_OBJECTID;
1251 
1252             *subvol_objectid = subvolid;
1253             break;
1254         default:
1255             break;
1256         }
1257     }
1258 
1259 out:
1260     kfree(orig);
1261     return error;
1262 }
1263 
1264 char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
1265                       u64 subvol_objectid)
1266 {
1267     struct btrfs_root *root = fs_info->tree_root;
1268     struct btrfs_root *fs_root = NULL;
1269     struct btrfs_root_ref *root_ref;
1270     struct btrfs_inode_ref *inode_ref;
1271     struct btrfs_key key;
1272     struct btrfs_path *path = NULL;
1273     char *name = NULL, *ptr;
1274     u64 dirid;
1275     int len;
1276     int ret;
1277 
1278     path = btrfs_alloc_path();
1279     if (!path) {
1280         ret = -ENOMEM;
1281         goto err;
1282     }
1283 
1284     name = kmalloc(PATH_MAX, GFP_KERNEL);
1285     if (!name) {
1286         ret = -ENOMEM;
1287         goto err;
1288     }
1289     ptr = name + PATH_MAX - 1;
1290     ptr[0] = '\0';
1291 
1292     /*
1293      * Walk up the subvolume trees in the tree of tree roots by root
1294      * backrefs until we hit the top-level subvolume.
1295      */
1296     while (subvol_objectid != BTRFS_FS_TREE_OBJECTID) {
1297         key.objectid = subvol_objectid;
1298         key.type = BTRFS_ROOT_BACKREF_KEY;
1299         key.offset = (u64)-1;
1300 
1301         ret = btrfs_search_backwards(root, &key, path);
1302         if (ret < 0) {
1303             goto err;
1304         } else if (ret > 0) {
1305             ret = -ENOENT;
1306             goto err;
1307         }
1308 
1309         subvol_objectid = key.offset;
1310 
1311         root_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
1312                       struct btrfs_root_ref);
1313         len = btrfs_root_ref_name_len(path->nodes[0], root_ref);
1314         ptr -= len + 1;
1315         if (ptr < name) {
1316             ret = -ENAMETOOLONG;
1317             goto err;
1318         }
1319         read_extent_buffer(path->nodes[0], ptr + 1,
1320                    (unsigned long)(root_ref + 1), len);
1321         ptr[0] = '/';
1322         dirid = btrfs_root_ref_dirid(path->nodes[0], root_ref);
1323         btrfs_release_path(path);
1324 
1325         fs_root = btrfs_get_fs_root(fs_info, subvol_objectid, true);
1326         if (IS_ERR(fs_root)) {
1327             ret = PTR_ERR(fs_root);
1328             fs_root = NULL;
1329             goto err;
1330         }
1331 
1332         /*
1333          * Walk up the filesystem tree by inode refs until we hit the
1334          * root directory.
1335          */
1336         while (dirid != BTRFS_FIRST_FREE_OBJECTID) {
1337             key.objectid = dirid;
1338             key.type = BTRFS_INODE_REF_KEY;
1339             key.offset = (u64)-1;
1340 
1341             ret = btrfs_search_backwards(fs_root, &key, path);
1342             if (ret < 0) {
1343                 goto err;
1344             } else if (ret > 0) {
1345                 ret = -ENOENT;
1346                 goto err;
1347             }
1348 
1349             dirid = key.offset;
1350 
1351             inode_ref = btrfs_item_ptr(path->nodes[0],
1352                            path->slots[0],
1353                            struct btrfs_inode_ref);
1354             len = btrfs_inode_ref_name_len(path->nodes[0],
1355                                inode_ref);
1356             ptr -= len + 1;
1357             if (ptr < name) {
1358                 ret = -ENAMETOOLONG;
1359                 goto err;
1360             }
1361             read_extent_buffer(path->nodes[0], ptr + 1,
1362                        (unsigned long)(inode_ref + 1), len);
1363             ptr[0] = '/';
1364             btrfs_release_path(path);
1365         }
1366         btrfs_put_root(fs_root);
1367         fs_root = NULL;
1368     }
1369 
1370     btrfs_free_path(path);
1371     if (ptr == name + PATH_MAX - 1) {
1372         name[0] = '/';
1373         name[1] = '\0';
1374     } else {
1375         memmove(name, ptr, name + PATH_MAX - ptr);
1376     }
1377     return name;
1378 
1379 err:
1380     btrfs_put_root(fs_root);
1381     btrfs_free_path(path);
1382     kfree(name);
1383     return ERR_PTR(ret);
1384 }
1385 
1386 static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objectid)
1387 {
1388     struct btrfs_root *root = fs_info->tree_root;
1389     struct btrfs_dir_item *di;
1390     struct btrfs_path *path;
1391     struct btrfs_key location;
1392     u64 dir_id;
1393 
1394     path = btrfs_alloc_path();
1395     if (!path)
1396         return -ENOMEM;
1397 
1398     /*
1399      * Find the "default" dir item which points to the root item that we
1400      * will mount by default if we haven't been given a specific subvolume
1401      * to mount.
1402      */
1403     dir_id = btrfs_super_root_dir(fs_info->super_copy);
1404     di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0);
1405     if (IS_ERR(di)) {
1406         btrfs_free_path(path);
1407         return PTR_ERR(di);
1408     }
1409     if (!di) {
1410         /*
1411          * Ok the default dir item isn't there.  This is weird since
1412          * it's always been there, but don't freak out, just try and
1413          * mount the top-level subvolume.
1414          */
1415         btrfs_free_path(path);
1416         *objectid = BTRFS_FS_TREE_OBJECTID;
1417         return 0;
1418     }
1419 
1420     btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
1421     btrfs_free_path(path);
1422     *objectid = location.objectid;
1423     return 0;
1424 }
1425 
1426 static int btrfs_fill_super(struct super_block *sb,
1427                 struct btrfs_fs_devices *fs_devices,
1428                 void *data)
1429 {
1430     struct inode *inode;
1431     struct btrfs_fs_info *fs_info = btrfs_sb(sb);
1432     int err;
1433 
1434     sb->s_maxbytes = MAX_LFS_FILESIZE;
1435     sb->s_magic = BTRFS_SUPER_MAGIC;
1436     sb->s_op = &btrfs_super_ops;
1437     sb->s_d_op = &btrfs_dentry_operations;
1438     sb->s_export_op = &btrfs_export_ops;
1439 #ifdef CONFIG_FS_VERITY
1440     sb->s_vop = &btrfs_verityops;
1441 #endif
1442     sb->s_xattr = btrfs_xattr_handlers;
1443     sb->s_time_gran = 1;
1444 #ifdef CONFIG_BTRFS_FS_POSIX_ACL
1445     sb->s_flags |= SB_POSIXACL;
1446 #endif
1447     sb->s_flags |= SB_I_VERSION;
1448     sb->s_iflags |= SB_I_CGROUPWB;
1449 
1450     err = super_setup_bdi(sb);
1451     if (err) {
1452         btrfs_err(fs_info, "super_setup_bdi failed");
1453         return err;
1454     }
1455 
1456     err = open_ctree(sb, fs_devices, (char *)data);
1457     if (err) {
1458         btrfs_err(fs_info, "open_ctree failed");
1459         return err;
1460     }
1461 
1462     inode = btrfs_iget(sb, BTRFS_FIRST_FREE_OBJECTID, fs_info->fs_root);
1463     if (IS_ERR(inode)) {
1464         err = PTR_ERR(inode);
1465         goto fail_close;
1466     }
1467 
1468     sb->s_root = d_make_root(inode);
1469     if (!sb->s_root) {
1470         err = -ENOMEM;
1471         goto fail_close;
1472     }
1473 
1474     sb->s_flags |= SB_ACTIVE;
1475     return 0;
1476 
1477 fail_close:
1478     close_ctree(fs_info);
1479     return err;
1480 }
1481 
1482 int btrfs_sync_fs(struct super_block *sb, int wait)
1483 {
1484     struct btrfs_trans_handle *trans;
1485     struct btrfs_fs_info *fs_info = btrfs_sb(sb);
1486     struct btrfs_root *root = fs_info->tree_root;
1487 
1488     trace_btrfs_sync_fs(fs_info, wait);
1489 
1490     if (!wait) {
1491         filemap_flush(fs_info->btree_inode->i_mapping);
1492         return 0;
1493     }
1494 
1495     btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
1496 
1497     trans = btrfs_attach_transaction_barrier(root);
1498     if (IS_ERR(trans)) {
1499         /* no transaction, don't bother */
1500         if (PTR_ERR(trans) == -ENOENT) {
1501             /*
1502              * Exit unless we have some pending changes
1503              * that need to go through commit
1504              */
1505             if (fs_info->pending_changes == 0)
1506                 return 0;
1507             /*
1508              * A non-blocking test if the fs is frozen. We must not
1509              * start a new transaction here otherwise a deadlock
1510              * happens. The pending operations are delayed to the
1511              * next commit after thawing.
1512              */
1513             if (sb_start_write_trylock(sb))
1514                 sb_end_write(sb);
1515             else
1516                 return 0;
1517             trans = btrfs_start_transaction(root, 0);
1518         }
1519         if (IS_ERR(trans))
1520             return PTR_ERR(trans);
1521     }
1522     return btrfs_commit_transaction(trans);
1523 }
1524 
1525 static void print_rescue_option(struct seq_file *seq, const char *s, bool *printed)
1526 {
1527     seq_printf(seq, "%s%s", (*printed) ? ":" : ",rescue=", s);
1528     *printed = true;
1529 }
1530 
1531 static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
1532 {
1533     struct btrfs_fs_info *info = btrfs_sb(dentry->d_sb);
1534     const char *compress_type;
1535     const char *subvol_name;
1536     bool printed = false;
1537 
1538     if (btrfs_test_opt(info, DEGRADED))
1539         seq_puts(seq, ",degraded");
1540     if (btrfs_test_opt(info, NODATASUM))
1541         seq_puts(seq, ",nodatasum");
1542     if (btrfs_test_opt(info, NODATACOW))
1543         seq_puts(seq, ",nodatacow");
1544     if (btrfs_test_opt(info, NOBARRIER))
1545         seq_puts(seq, ",nobarrier");
1546     if (info->max_inline != BTRFS_DEFAULT_MAX_INLINE)
1547         seq_printf(seq, ",max_inline=%llu", info->max_inline);
1548     if (info->thread_pool_size !=  min_t(unsigned long,
1549                          num_online_cpus() + 2, 8))
1550         seq_printf(seq, ",thread_pool=%u", info->thread_pool_size);
1551     if (btrfs_test_opt(info, COMPRESS)) {
1552         compress_type = btrfs_compress_type2str(info->compress_type);
1553         if (btrfs_test_opt(info, FORCE_COMPRESS))
1554             seq_printf(seq, ",compress-force=%s", compress_type);
1555         else
1556             seq_printf(seq, ",compress=%s", compress_type);
1557         if (info->compress_level)
1558             seq_printf(seq, ":%d", info->compress_level);
1559     }
1560     if (btrfs_test_opt(info, NOSSD))
1561         seq_puts(seq, ",nossd");
1562     if (btrfs_test_opt(info, SSD_SPREAD))
1563         seq_puts(seq, ",ssd_spread");
1564     else if (btrfs_test_opt(info, SSD))
1565         seq_puts(seq, ",ssd");
1566     if (btrfs_test_opt(info, NOTREELOG))
1567         seq_puts(seq, ",notreelog");
1568     if (btrfs_test_opt(info, NOLOGREPLAY))
1569         print_rescue_option(seq, "nologreplay", &printed);
1570     if (btrfs_test_opt(info, USEBACKUPROOT))
1571         print_rescue_option(seq, "usebackuproot", &printed);
1572     if (btrfs_test_opt(info, IGNOREBADROOTS))
1573         print_rescue_option(seq, "ignorebadroots", &printed);
1574     if (btrfs_test_opt(info, IGNOREDATACSUMS))
1575         print_rescue_option(seq, "ignoredatacsums", &printed);
1576     if (btrfs_test_opt(info, FLUSHONCOMMIT))
1577         seq_puts(seq, ",flushoncommit");
1578     if (btrfs_test_opt(info, DISCARD_SYNC))
1579         seq_puts(seq, ",discard");
1580     if (btrfs_test_opt(info, DISCARD_ASYNC))
1581         seq_puts(seq, ",discard=async");
1582     if (!(info->sb->s_flags & SB_POSIXACL))
1583         seq_puts(seq, ",noacl");
1584     if (btrfs_free_space_cache_v1_active(info))
1585         seq_puts(seq, ",space_cache");
1586     else if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
1587         seq_puts(seq, ",space_cache=v2");
1588     else
1589         seq_puts(seq, ",nospace_cache");
1590     if (btrfs_test_opt(info, RESCAN_UUID_TREE))
1591         seq_puts(seq, ",rescan_uuid_tree");
1592     if (btrfs_test_opt(info, CLEAR_CACHE))
1593         seq_puts(seq, ",clear_cache");
1594     if (btrfs_test_opt(info, USER_SUBVOL_RM_ALLOWED))
1595         seq_puts(seq, ",user_subvol_rm_allowed");
1596     if (btrfs_test_opt(info, ENOSPC_DEBUG))
1597         seq_puts(seq, ",enospc_debug");
1598     if (btrfs_test_opt(info, AUTO_DEFRAG))
1599         seq_puts(seq, ",autodefrag");
1600     if (btrfs_test_opt(info, SKIP_BALANCE))
1601         seq_puts(seq, ",skip_balance");
1602 #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
1603     if (btrfs_test_opt(info, CHECK_INTEGRITY_DATA))
1604         seq_puts(seq, ",check_int_data");
1605     else if (btrfs_test_opt(info, CHECK_INTEGRITY))
1606         seq_puts(seq, ",check_int");
1607     if (info->check_integrity_print_mask)
1608         seq_printf(seq, ",check_int_print_mask=%d",
1609                 info->check_integrity_print_mask);
1610 #endif
1611     if (info->metadata_ratio)
1612         seq_printf(seq, ",metadata_ratio=%u", info->metadata_ratio);
1613     if (btrfs_test_opt(info, PANIC_ON_FATAL_ERROR))
1614         seq_puts(seq, ",fatal_errors=panic");
1615     if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL)
1616         seq_printf(seq, ",commit=%u", info->commit_interval);
1617 #ifdef CONFIG_BTRFS_DEBUG
1618     if (btrfs_test_opt(info, FRAGMENT_DATA))
1619         seq_puts(seq, ",fragment=data");
1620     if (btrfs_test_opt(info, FRAGMENT_METADATA))
1621         seq_puts(seq, ",fragment=metadata");
1622 #endif
1623     if (btrfs_test_opt(info, REF_VERIFY))
1624         seq_puts(seq, ",ref_verify");
1625     seq_printf(seq, ",subvolid=%llu",
1626           BTRFS_I(d_inode(dentry))->root->root_key.objectid);
1627     subvol_name = btrfs_get_subvol_name_from_objectid(info,
1628             BTRFS_I(d_inode(dentry))->root->root_key.objectid);
1629     if (!IS_ERR(subvol_name)) {
1630         seq_puts(seq, ",subvol=");
1631         seq_escape(seq, subvol_name, " \t\n\\");
1632         kfree(subvol_name);
1633     }
1634     return 0;
1635 }
1636 
1637 static int btrfs_test_super(struct super_block *s, void *data)
1638 {
1639     struct btrfs_fs_info *p = data;
1640     struct btrfs_fs_info *fs_info = btrfs_sb(s);
1641 
1642     return fs_info->fs_devices == p->fs_devices;
1643 }
1644 
1645 static int btrfs_set_super(struct super_block *s, void *data)
1646 {
1647     int err = set_anon_super(s, data);
1648     if (!err)
1649         s->s_fs_info = data;
1650     return err;
1651 }
1652 
1653 /*
1654  * subvolumes are identified by ino 256
1655  */
1656 static inline int is_subvolume_inode(struct inode *inode)
1657 {
1658     if (inode && inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
1659         return 1;
1660     return 0;
1661 }
1662 
1663 static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid,
1664                    struct vfsmount *mnt)
1665 {
1666     struct dentry *root;
1667     int ret;
1668 
1669     if (!subvol_name) {
1670         if (!subvol_objectid) {
1671             ret = get_default_subvol_objectid(btrfs_sb(mnt->mnt_sb),
1672                               &subvol_objectid);
1673             if (ret) {
1674                 root = ERR_PTR(ret);
1675                 goto out;
1676             }
1677         }
1678         subvol_name = btrfs_get_subvol_name_from_objectid(
1679                     btrfs_sb(mnt->mnt_sb), subvol_objectid);
1680         if (IS_ERR(subvol_name)) {
1681             root = ERR_CAST(subvol_name);
1682             subvol_name = NULL;
1683             goto out;
1684         }
1685 
1686     }
1687 
1688     root = mount_subtree(mnt, subvol_name);
1689     /* mount_subtree() drops our reference on the vfsmount. */
1690     mnt = NULL;
1691 
1692     if (!IS_ERR(root)) {
1693         struct super_block *s = root->d_sb;
1694         struct btrfs_fs_info *fs_info = btrfs_sb(s);
1695         struct inode *root_inode = d_inode(root);
1696         u64 root_objectid = BTRFS_I(root_inode)->root->root_key.objectid;
1697 
1698         ret = 0;
1699         if (!is_subvolume_inode(root_inode)) {
1700             btrfs_err(fs_info, "'%s' is not a valid subvolume",
1701                    subvol_name);
1702             ret = -EINVAL;
1703         }
1704         if (subvol_objectid && root_objectid != subvol_objectid) {
1705             /*
1706              * This will also catch a race condition where a
1707              * subvolume which was passed by ID is renamed and
1708              * another subvolume is renamed over the old location.
1709              */
1710             btrfs_err(fs_info,
1711                   "subvol '%s' does not match subvolid %llu",
1712                   subvol_name, subvol_objectid);
1713             ret = -EINVAL;
1714         }
1715         if (ret) {
1716             dput(root);
1717             root = ERR_PTR(ret);
1718             deactivate_locked_super(s);
1719         }
1720     }
1721 
1722 out:
1723     mntput(mnt);
1724     kfree(subvol_name);
1725     return root;
1726 }
1727 
1728 /*
1729  * Find a superblock for the given device / mount point.
1730  *
1731  * Note: This is based on mount_bdev from fs/super.c with a few additions
1732  *       for multiple device setup.  Make sure to keep it in sync.
1733  */
1734 static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
1735         int flags, const char *device_name, void *data)
1736 {
1737     struct block_device *bdev = NULL;
1738     struct super_block *s;
1739     struct btrfs_device *device = NULL;
1740     struct btrfs_fs_devices *fs_devices = NULL;
1741     struct btrfs_fs_info *fs_info = NULL;
1742     void *new_sec_opts = NULL;
1743     fmode_t mode = FMODE_READ;
1744     int error = 0;
1745 
1746     if (!(flags & SB_RDONLY))
1747         mode |= FMODE_WRITE;
1748 
1749     if (data) {
1750         error = security_sb_eat_lsm_opts(data, &new_sec_opts);
1751         if (error)
1752             return ERR_PTR(error);
1753     }
1754 
1755     /*
1756      * Setup a dummy root and fs_info for test/set super.  This is because
1757      * we don't actually fill this stuff out until open_ctree, but we need
1758      * then open_ctree will properly initialize the file system specific
1759      * settings later.  btrfs_init_fs_info initializes the static elements
1760      * of the fs_info (locks and such) to make cleanup easier if we find a
1761      * superblock with our given fs_devices later on at sget() time.
1762      */
1763     fs_info = kvzalloc(sizeof(struct btrfs_fs_info), GFP_KERNEL);
1764     if (!fs_info) {
1765         error = -ENOMEM;
1766         goto error_sec_opts;
1767     }
1768     btrfs_init_fs_info(fs_info);
1769 
1770     fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
1771     fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
1772     if (!fs_info->super_copy || !fs_info->super_for_commit) {
1773         error = -ENOMEM;
1774         goto error_fs_info;
1775     }
1776 
1777     mutex_lock(&uuid_mutex);
1778     error = btrfs_parse_device_options(data, mode, fs_type);
1779     if (error) {
1780         mutex_unlock(&uuid_mutex);
1781         goto error_fs_info;
1782     }
1783 
1784     device = btrfs_scan_one_device(device_name, mode, fs_type);
1785     if (IS_ERR(device)) {
1786         mutex_unlock(&uuid_mutex);
1787         error = PTR_ERR(device);
1788         goto error_fs_info;
1789     }
1790 
1791     fs_devices = device->fs_devices;
1792     fs_info->fs_devices = fs_devices;
1793 
1794     error = btrfs_open_devices(fs_devices, mode, fs_type);
1795     mutex_unlock(&uuid_mutex);
1796     if (error)
1797         goto error_fs_info;
1798 
1799     if (!(flags & SB_RDONLY) && fs_devices->rw_devices == 0) {
1800         error = -EACCES;
1801         goto error_close_devices;
1802     }
1803 
1804     bdev = fs_devices->latest_dev->bdev;
1805     s = sget(fs_type, btrfs_test_super, btrfs_set_super, flags | SB_NOSEC,
1806          fs_info);
1807     if (IS_ERR(s)) {
1808         error = PTR_ERR(s);
1809         goto error_close_devices;
1810     }
1811 
1812     if (s->s_root) {
1813         btrfs_close_devices(fs_devices);
1814         btrfs_free_fs_info(fs_info);
1815         if ((flags ^ s->s_flags) & SB_RDONLY)
1816             error = -EBUSY;
1817     } else {
1818         snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
1819         shrinker_debugfs_rename(&s->s_shrink, "sb-%s:%s", fs_type->name,
1820                     s->s_id);
1821         btrfs_sb(s)->bdev_holder = fs_type;
1822         if (!strstr(crc32c_impl(), "generic"))
1823             set_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags);
1824         error = btrfs_fill_super(s, fs_devices, data);
1825     }
1826     if (!error)
1827         error = security_sb_set_mnt_opts(s, new_sec_opts, 0, NULL);
1828     security_free_mnt_opts(&new_sec_opts);
1829     if (error) {
1830         deactivate_locked_super(s);
1831         return ERR_PTR(error);
1832     }
1833 
1834     return dget(s->s_root);
1835 
1836 error_close_devices:
1837     btrfs_close_devices(fs_devices);
1838 error_fs_info:
1839     btrfs_free_fs_info(fs_info);
1840 error_sec_opts:
1841     security_free_mnt_opts(&new_sec_opts);
1842     return ERR_PTR(error);
1843 }
1844 
1845 /*
1846  * Mount function which is called by VFS layer.
1847  *
1848  * In order to allow mounting a subvolume directly, btrfs uses mount_subtree()
1849  * which needs vfsmount* of device's root (/).  This means device's root has to
1850  * be mounted internally in any case.
1851  *
1852  * Operation flow:
1853  *   1. Parse subvol id related options for later use in mount_subvol().
1854  *
1855  *   2. Mount device's root (/) by calling vfs_kern_mount().
1856  *
1857  *      NOTE: vfs_kern_mount() is used by VFS to call btrfs_mount() in the
1858  *      first place. In order to avoid calling btrfs_mount() again, we use
1859  *      different file_system_type which is not registered to VFS by
1860  *      register_filesystem() (btrfs_root_fs_type). As a result,
1861  *      btrfs_mount_root() is called. The return value will be used by
1862  *      mount_subtree() in mount_subvol().
1863  *
1864  *   3. Call mount_subvol() to get the dentry of subvolume. Since there is
1865  *      "btrfs subvolume set-default", mount_subvol() is called always.
1866  */
1867 static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
1868         const char *device_name, void *data)
1869 {
1870     struct vfsmount *mnt_root;
1871     struct dentry *root;
1872     char *subvol_name = NULL;
1873     u64 subvol_objectid = 0;
1874     int error = 0;
1875 
1876     error = btrfs_parse_subvol_options(data, &subvol_name,
1877                     &subvol_objectid);
1878     if (error) {
1879         kfree(subvol_name);
1880         return ERR_PTR(error);
1881     }
1882 
1883     /* mount device's root (/) */
1884     mnt_root = vfs_kern_mount(&btrfs_root_fs_type, flags, device_name, data);
1885     if (PTR_ERR_OR_ZERO(mnt_root) == -EBUSY) {
1886         if (flags & SB_RDONLY) {
1887             mnt_root = vfs_kern_mount(&btrfs_root_fs_type,
1888                 flags & ~SB_RDONLY, device_name, data);
1889         } else {
1890             mnt_root = vfs_kern_mount(&btrfs_root_fs_type,
1891                 flags | SB_RDONLY, device_name, data);
1892             if (IS_ERR(mnt_root)) {
1893                 root = ERR_CAST(mnt_root);
1894                 kfree(subvol_name);
1895                 goto out;
1896             }
1897 
1898             down_write(&mnt_root->mnt_sb->s_umount);
1899             error = btrfs_remount(mnt_root->mnt_sb, &flags, NULL);
1900             up_write(&mnt_root->mnt_sb->s_umount);
1901             if (error < 0) {
1902                 root = ERR_PTR(error);
1903                 mntput(mnt_root);
1904                 kfree(subvol_name);
1905                 goto out;
1906             }
1907         }
1908     }
1909     if (IS_ERR(mnt_root)) {
1910         root = ERR_CAST(mnt_root);
1911         kfree(subvol_name);
1912         goto out;
1913     }
1914 
1915     /* mount_subvol() will free subvol_name and mnt_root */
1916     root = mount_subvol(subvol_name, subvol_objectid, mnt_root);
1917 
1918 out:
1919     return root;
1920 }
1921 
1922 static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
1923                      u32 new_pool_size, u32 old_pool_size)
1924 {
1925     if (new_pool_size == old_pool_size)
1926         return;
1927 
1928     fs_info->thread_pool_size = new_pool_size;
1929 
1930     btrfs_info(fs_info, "resize thread pool %d -> %d",
1931            old_pool_size, new_pool_size);
1932 
1933     btrfs_workqueue_set_max(fs_info->workers, new_pool_size);
1934     btrfs_workqueue_set_max(fs_info->hipri_workers, new_pool_size);
1935     btrfs_workqueue_set_max(fs_info->delalloc_workers, new_pool_size);
1936     btrfs_workqueue_set_max(fs_info->caching_workers, new_pool_size);
1937     btrfs_workqueue_set_max(fs_info->endio_write_workers, new_pool_size);
1938     btrfs_workqueue_set_max(fs_info->endio_freespace_worker, new_pool_size);
1939     btrfs_workqueue_set_max(fs_info->delayed_workers, new_pool_size);
1940 }
1941 
1942 static inline void btrfs_remount_begin(struct btrfs_fs_info *fs_info,
1943                        unsigned long old_opts, int flags)
1944 {
1945     if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) &&
1946         (!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) ||
1947          (flags & SB_RDONLY))) {
1948         /* wait for any defraggers to finish */
1949         wait_event(fs_info->transaction_wait,
1950                (atomic_read(&fs_info->defrag_running) == 0));
1951         if (flags & SB_RDONLY)
1952             sync_filesystem(fs_info->sb);
1953     }
1954 }
1955 
1956 static inline void btrfs_remount_cleanup(struct btrfs_fs_info *fs_info,
1957                      unsigned long old_opts)
1958 {
1959     const bool cache_opt = btrfs_test_opt(fs_info, SPACE_CACHE);
1960 
1961     /*
1962      * We need to cleanup all defragable inodes if the autodefragment is
1963      * close or the filesystem is read only.
1964      */
1965     if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) &&
1966         (!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) || sb_rdonly(fs_info->sb))) {
1967         btrfs_cleanup_defrag_inodes(fs_info);
1968     }
1969 
1970     /* If we toggled discard async */
1971     if (!btrfs_raw_test_opt(old_opts, DISCARD_ASYNC) &&
1972         btrfs_test_opt(fs_info, DISCARD_ASYNC))
1973         btrfs_discard_resume(fs_info);
1974     else if (btrfs_raw_test_opt(old_opts, DISCARD_ASYNC) &&
1975          !btrfs_test_opt(fs_info, DISCARD_ASYNC))
1976         btrfs_discard_cleanup(fs_info);
1977 
1978     /* If we toggled space cache */
1979     if (cache_opt != btrfs_free_space_cache_v1_active(fs_info))
1980         btrfs_set_free_space_cache_v1_active(fs_info, cache_opt);
1981 }
1982 
1983 static int btrfs_remount(struct super_block *sb, int *flags, char *data)
1984 {
1985     struct btrfs_fs_info *fs_info = btrfs_sb(sb);
1986     unsigned old_flags = sb->s_flags;
1987     unsigned long old_opts = fs_info->mount_opt;
1988     unsigned long old_compress_type = fs_info->compress_type;
1989     u64 old_max_inline = fs_info->max_inline;
1990     u32 old_thread_pool_size = fs_info->thread_pool_size;
1991     u32 old_metadata_ratio = fs_info->metadata_ratio;
1992     int ret;
1993 
1994     sync_filesystem(sb);
1995     set_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
1996 
1997     if (data) {
1998         void *new_sec_opts = NULL;
1999 
2000         ret = security_sb_eat_lsm_opts(data, &new_sec_opts);
2001         if (!ret)
2002             ret = security_sb_remount(sb, new_sec_opts);
2003         security_free_mnt_opts(&new_sec_opts);
2004         if (ret)
2005             goto restore;
2006     }
2007 
2008     ret = btrfs_parse_options(fs_info, data, *flags);
2009     if (ret)
2010         goto restore;
2011 
2012     /* V1 cache is not supported for subpage mount. */
2013     if (fs_info->sectorsize < PAGE_SIZE && btrfs_test_opt(fs_info, SPACE_CACHE)) {
2014         btrfs_warn(fs_info,
2015     "v1 space cache is not supported for page size %lu with sectorsize %u",
2016                PAGE_SIZE, fs_info->sectorsize);
2017         ret = -EINVAL;
2018         goto restore;
2019     }
2020     btrfs_remount_begin(fs_info, old_opts, *flags);
2021     btrfs_resize_thread_pool(fs_info,
2022         fs_info->thread_pool_size, old_thread_pool_size);
2023 
2024     if ((bool)btrfs_test_opt(fs_info, FREE_SPACE_TREE) !=
2025         (bool)btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) &&
2026         (!sb_rdonly(sb) || (*flags & SB_RDONLY))) {
2027         btrfs_warn(fs_info,
2028         "remount supports changing free space tree only from ro to rw");
2029         /* Make sure free space cache options match the state on disk */
2030         if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
2031             btrfs_set_opt(fs_info->mount_opt, FREE_SPACE_TREE);
2032             btrfs_clear_opt(fs_info->mount_opt, SPACE_CACHE);
2033         }
2034         if (btrfs_free_space_cache_v1_active(fs_info)) {
2035             btrfs_clear_opt(fs_info->mount_opt, FREE_SPACE_TREE);
2036             btrfs_set_opt(fs_info->mount_opt, SPACE_CACHE);
2037         }
2038     }
2039 
2040     if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb))
2041         goto out;
2042 
2043     if (*flags & SB_RDONLY) {
2044         /*
2045          * this also happens on 'umount -rf' or on shutdown, when
2046          * the filesystem is busy.
2047          */
2048         cancel_work_sync(&fs_info->async_reclaim_work);
2049         cancel_work_sync(&fs_info->async_data_reclaim_work);
2050 
2051         btrfs_discard_cleanup(fs_info);
2052 
2053         /* wait for the uuid_scan task to finish */
2054         down(&fs_info->uuid_tree_rescan_sem);
2055         /* avoid complains from lockdep et al. */
2056         up(&fs_info->uuid_tree_rescan_sem);
2057 
2058         btrfs_set_sb_rdonly(sb);
2059 
2060         /*
2061          * Setting SB_RDONLY will put the cleaner thread to
2062          * sleep at the next loop if it's already active.
2063          * If it's already asleep, we'll leave unused block
2064          * groups on disk until we're mounted read-write again
2065          * unless we clean them up here.
2066          */
2067         btrfs_delete_unused_bgs(fs_info);
2068 
2069         /*
2070          * The cleaner task could be already running before we set the
2071          * flag BTRFS_FS_STATE_RO (and SB_RDONLY in the superblock).
2072          * We must make sure that after we finish the remount, i.e. after
2073          * we call btrfs_commit_super(), the cleaner can no longer start
2074          * a transaction - either because it was dropping a dead root,
2075          * running delayed iputs or deleting an unused block group (the
2076          * cleaner picked a block group from the list of unused block
2077          * groups before we were able to in the previous call to
2078          * btrfs_delete_unused_bgs()).
2079          */
2080         wait_on_bit(&fs_info->flags, BTRFS_FS_CLEANER_RUNNING,
2081                 TASK_UNINTERRUPTIBLE);
2082 
2083         /*
2084          * We've set the superblock to RO mode, so we might have made
2085          * the cleaner task sleep without running all pending delayed
2086          * iputs. Go through all the delayed iputs here, so that if an
2087          * unmount happens without remounting RW we don't end up at
2088          * finishing close_ctree() with a non-empty list of delayed
2089          * iputs.
2090          */
2091         btrfs_run_delayed_iputs(fs_info);
2092 
2093         btrfs_dev_replace_suspend_for_unmount(fs_info);
2094         btrfs_scrub_cancel(fs_info);
2095         btrfs_pause_balance(fs_info);
2096 
2097         /*
2098          * Pause the qgroup rescan worker if it is running. We don't want
2099          * it to be still running after we are in RO mode, as after that,
2100          * by the time we unmount, it might have left a transaction open,
2101          * so we would leak the transaction and/or crash.
2102          */
2103         btrfs_qgroup_wait_for_completion(fs_info, false);
2104 
2105         ret = btrfs_commit_super(fs_info);
2106         if (ret)
2107             goto restore;
2108     } else {
2109         if (BTRFS_FS_ERROR(fs_info)) {
2110             btrfs_err(fs_info,
2111                 "Remounting read-write after error is not allowed");
2112             ret = -EINVAL;
2113             goto restore;
2114         }
2115         if (fs_info->fs_devices->rw_devices == 0) {
2116             ret = -EACCES;
2117             goto restore;
2118         }
2119 
2120         if (!btrfs_check_rw_degradable(fs_info, NULL)) {
2121             btrfs_warn(fs_info,
2122         "too many missing devices, writable remount is not allowed");
2123             ret = -EACCES;
2124             goto restore;
2125         }
2126 
2127         if (btrfs_super_log_root(fs_info->super_copy) != 0) {
2128             btrfs_warn(fs_info,
2129         "mount required to replay tree-log, cannot remount read-write");
2130             ret = -EINVAL;
2131             goto restore;
2132         }
2133 
2134         /*
2135          * NOTE: when remounting with a change that does writes, don't
2136          * put it anywhere above this point, as we are not sure to be
2137          * safe to write until we pass the above checks.
2138          */
2139         ret = btrfs_start_pre_rw_mount(fs_info);
2140         if (ret)
2141             goto restore;
2142 
2143         btrfs_clear_sb_rdonly(sb);
2144 
2145         set_bit(BTRFS_FS_OPEN, &fs_info->flags);
2146     }
2147 out:
2148     /*
2149      * We need to set SB_I_VERSION here otherwise it'll get cleared by VFS,
2150      * since the absence of the flag means it can be toggled off by remount.
2151      */
2152     *flags |= SB_I_VERSION;
2153 
2154     wake_up_process(fs_info->transaction_kthread);
2155     btrfs_remount_cleanup(fs_info, old_opts);
2156     btrfs_clear_oneshot_options(fs_info);
2157     clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
2158 
2159     return 0;
2160 
2161 restore:
2162     /* We've hit an error - don't reset SB_RDONLY */
2163     if (sb_rdonly(sb))
2164         old_flags |= SB_RDONLY;
2165     if (!(old_flags & SB_RDONLY))
2166         clear_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state);
2167     sb->s_flags = old_flags;
2168     fs_info->mount_opt = old_opts;
2169     fs_info->compress_type = old_compress_type;
2170     fs_info->max_inline = old_max_inline;
2171     btrfs_resize_thread_pool(fs_info,
2172         old_thread_pool_size, fs_info->thread_pool_size);
2173     fs_info->metadata_ratio = old_metadata_ratio;
2174     btrfs_remount_cleanup(fs_info, old_opts);
2175     clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
2176 
2177     return ret;
2178 }
2179 
2180 /* Used to sort the devices by max_avail(descending sort) */
2181 static int btrfs_cmp_device_free_bytes(const void *a, const void *b)
2182 {
2183     const struct btrfs_device_info *dev_info1 = a;
2184     const struct btrfs_device_info *dev_info2 = b;
2185 
2186     if (dev_info1->max_avail > dev_info2->max_avail)
2187         return -1;
2188     else if (dev_info1->max_avail < dev_info2->max_avail)
2189         return 1;
2190     return 0;
2191 }
2192 
2193 /*
2194  * sort the devices by max_avail, in which max free extent size of each device
2195  * is stored.(Descending Sort)
2196  */
2197 static inline void btrfs_descending_sort_devices(
2198                     struct btrfs_device_info *devices,
2199                     size_t nr_devices)
2200 {
2201     sort(devices, nr_devices, sizeof(struct btrfs_device_info),
2202          btrfs_cmp_device_free_bytes, NULL);
2203 }
2204 
2205 /*
2206  * The helper to calc the free space on the devices that can be used to store
2207  * file data.
2208  */
2209 static inline int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info,
2210                           u64 *free_bytes)
2211 {
2212     struct btrfs_device_info *devices_info;
2213     struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
2214     struct btrfs_device *device;
2215     u64 type;
2216     u64 avail_space;
2217     u64 min_stripe_size;
2218     int num_stripes = 1;
2219     int i = 0, nr_devices;
2220     const struct btrfs_raid_attr *rattr;
2221 
2222     /*
2223      * We aren't under the device list lock, so this is racy-ish, but good
2224      * enough for our purposes.
2225      */
2226     nr_devices = fs_info->fs_devices->open_devices;
2227     if (!nr_devices) {
2228         smp_mb();
2229         nr_devices = fs_info->fs_devices->open_devices;
2230         ASSERT(nr_devices);
2231         if (!nr_devices) {
2232             *free_bytes = 0;
2233             return 0;
2234         }
2235     }
2236 
2237     devices_info = kmalloc_array(nr_devices, sizeof(*devices_info),
2238                    GFP_KERNEL);
2239     if (!devices_info)
2240         return -ENOMEM;
2241 
2242     /* calc min stripe number for data space allocation */
2243     type = btrfs_data_alloc_profile(fs_info);
2244     rattr = &btrfs_raid_array[btrfs_bg_flags_to_raid_index(type)];
2245 
2246     if (type & BTRFS_BLOCK_GROUP_RAID0)
2247         num_stripes = nr_devices;
2248     else if (type & BTRFS_BLOCK_GROUP_RAID1_MASK)
2249         num_stripes = rattr->ncopies;
2250     else if (type & BTRFS_BLOCK_GROUP_RAID10)
2251         num_stripes = 4;
2252 
2253     /* Adjust for more than 1 stripe per device */
2254     min_stripe_size = rattr->dev_stripes * BTRFS_STRIPE_LEN;
2255 
2256     rcu_read_lock();
2257     list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) {
2258         if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
2259                         &device->dev_state) ||
2260             !device->bdev ||
2261             test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
2262             continue;
2263 
2264         if (i >= nr_devices)
2265             break;
2266 
2267         avail_space = device->total_bytes - device->bytes_used;
2268 
2269         /* align with stripe_len */
2270         avail_space = rounddown(avail_space, BTRFS_STRIPE_LEN);
2271 
2272         /*
2273          * Ensure we have at least min_stripe_size on top of the
2274          * reserved space on the device.
2275          */
2276         if (avail_space <= BTRFS_DEVICE_RANGE_RESERVED + min_stripe_size)
2277             continue;
2278 
2279         avail_space -= BTRFS_DEVICE_RANGE_RESERVED;
2280 
2281         devices_info[i].dev = device;
2282         devices_info[i].max_avail = avail_space;
2283 
2284         i++;
2285     }
2286     rcu_read_unlock();
2287 
2288     nr_devices = i;
2289 
2290     btrfs_descending_sort_devices(devices_info, nr_devices);
2291 
2292     i = nr_devices - 1;
2293     avail_space = 0;
2294     while (nr_devices >= rattr->devs_min) {
2295         num_stripes = min(num_stripes, nr_devices);
2296 
2297         if (devices_info[i].max_avail >= min_stripe_size) {
2298             int j;
2299             u64 alloc_size;
2300 
2301             avail_space += devices_info[i].max_avail * num_stripes;
2302             alloc_size = devices_info[i].max_avail;
2303             for (j = i + 1 - num_stripes; j <= i; j++)
2304                 devices_info[j].max_avail -= alloc_size;
2305         }
2306         i--;
2307         nr_devices--;
2308     }
2309 
2310     kfree(devices_info);
2311     *free_bytes = avail_space;
2312     return 0;
2313 }
2314 
2315 /*
2316  * Calculate numbers for 'df', pessimistic in case of mixed raid profiles.
2317  *
2318  * If there's a redundant raid level at DATA block groups, use the respective
2319  * multiplier to scale the sizes.
2320  *
2321  * Unused device space usage is based on simulating the chunk allocator
2322  * algorithm that respects the device sizes and order of allocations.  This is
2323  * a close approximation of the actual use but there are other factors that may
2324  * change the result (like a new metadata chunk).
2325  *
2326  * If metadata is exhausted, f_bavail will be 0.
2327  */
2328 static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
2329 {
2330     struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb);
2331     struct btrfs_super_block *disk_super = fs_info->super_copy;
2332     struct btrfs_space_info *found;
2333     u64 total_used = 0;
2334     u64 total_free_data = 0;
2335     u64 total_free_meta = 0;
2336     u32 bits = fs_info->sectorsize_bits;
2337     __be32 *fsid = (__be32 *)fs_info->fs_devices->fsid;
2338     unsigned factor = 1;
2339     struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
2340     int ret;
2341     u64 thresh = 0;
2342     int mixed = 0;
2343 
2344     list_for_each_entry(found, &fs_info->space_info, list) {
2345         if (found->flags & BTRFS_BLOCK_GROUP_DATA) {
2346             int i;
2347 
2348             total_free_data += found->disk_total - found->disk_used;
2349             total_free_data -=
2350                 btrfs_account_ro_block_groups_free_space(found);
2351 
2352             for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
2353                 if (!list_empty(&found->block_groups[i]))
2354                     factor = btrfs_bg_type_to_factor(
2355                         btrfs_raid_array[i].bg_flag);
2356             }
2357         }
2358 
2359         /*
2360          * Metadata in mixed block goup profiles are accounted in data
2361          */
2362         if (!mixed && found->flags & BTRFS_BLOCK_GROUP_METADATA) {
2363             if (found->flags & BTRFS_BLOCK_GROUP_DATA)
2364                 mixed = 1;
2365             else
2366                 total_free_meta += found->disk_total -
2367                     found->disk_used;
2368         }
2369 
2370         total_used += found->disk_used;
2371     }
2372 
2373     buf->f_blocks = div_u64(btrfs_super_total_bytes(disk_super), factor);
2374     buf->f_blocks >>= bits;
2375     buf->f_bfree = buf->f_blocks - (div_u64(total_used, factor) >> bits);
2376 
2377     /* Account global block reserve as used, it's in logical size already */
2378     spin_lock(&block_rsv->lock);
2379     /* Mixed block groups accounting is not byte-accurate, avoid overflow */
2380     if (buf->f_bfree >= block_rsv->size >> bits)
2381         buf->f_bfree -= block_rsv->size >> bits;
2382     else
2383         buf->f_bfree = 0;
2384     spin_unlock(&block_rsv->lock);
2385 
2386     buf->f_bavail = div_u64(total_free_data, factor);
2387     ret = btrfs_calc_avail_data_space(fs_info, &total_free_data);
2388     if (ret)
2389         return ret;
2390     buf->f_bavail += div_u64(total_free_data, factor);
2391     buf->f_bavail = buf->f_bavail >> bits;
2392 
2393     /*
2394      * We calculate the remaining metadata space minus global reserve. If
2395      * this is (supposedly) smaller than zero, there's no space. But this
2396      * does not hold in practice, the exhausted state happens where's still
2397      * some positive delta. So we apply some guesswork and compare the
2398      * delta to a 4M threshold.  (Practically observed delta was ~2M.)
2399      *
2400      * We probably cannot calculate the exact threshold value because this
2401      * depends on the internal reservations requested by various
2402      * operations, so some operations that consume a few metadata will
2403      * succeed even if the Avail is zero. But this is better than the other
2404      * way around.
2405      */
2406     thresh = SZ_4M;
2407 
2408     /*
2409      * We only want to claim there's no available space if we can no longer
2410      * allocate chunks for our metadata profile and our global reserve will
2411      * not fit in the free metadata space.  If we aren't ->full then we
2412      * still can allocate chunks and thus are fine using the currently
2413      * calculated f_bavail.
2414      */
2415     if (!mixed && block_rsv->space_info->full &&
2416         total_free_meta - thresh < block_rsv->size)
2417         buf->f_bavail = 0;
2418 
2419     buf->f_type = BTRFS_SUPER_MAGIC;
2420     buf->f_bsize = dentry->d_sb->s_blocksize;
2421     buf->f_namelen = BTRFS_NAME_LEN;
2422 
2423     /* We treat it as constant endianness (it doesn't matter _which_)
2424        because we want the fsid to come out the same whether mounted
2425        on a big-endian or little-endian host */
2426     buf->f_fsid.val[0] = be32_to_cpu(fsid[0]) ^ be32_to_cpu(fsid[2]);
2427     buf->f_fsid.val[1] = be32_to_cpu(fsid[1]) ^ be32_to_cpu(fsid[3]);
2428     /* Mask in the root object ID too, to disambiguate subvols */
2429     buf->f_fsid.val[0] ^=
2430         BTRFS_I(d_inode(dentry))->root->root_key.objectid >> 32;
2431     buf->f_fsid.val[1] ^=
2432         BTRFS_I(d_inode(dentry))->root->root_key.objectid;
2433 
2434     return 0;
2435 }
2436 
2437 static void btrfs_kill_super(struct super_block *sb)
2438 {
2439     struct btrfs_fs_info *fs_info = btrfs_sb(sb);
2440     kill_anon_super(sb);
2441     btrfs_free_fs_info(fs_info);
2442 }
2443 
2444 static struct file_system_type btrfs_fs_type = {
2445     .owner      = THIS_MODULE,
2446     .name       = "btrfs",
2447     .mount      = btrfs_mount,
2448     .kill_sb    = btrfs_kill_super,
2449     .fs_flags   = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA,
2450 };
2451 
2452 static struct file_system_type btrfs_root_fs_type = {
2453     .owner      = THIS_MODULE,
2454     .name       = "btrfs",
2455     .mount      = btrfs_mount_root,
2456     .kill_sb    = btrfs_kill_super,
2457     .fs_flags   = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA | FS_ALLOW_IDMAP,
2458 };
2459 
2460 MODULE_ALIAS_FS("btrfs");
2461 
2462 static int btrfs_control_open(struct inode *inode, struct file *file)
2463 {
2464     /*
2465      * The control file's private_data is used to hold the
2466      * transaction when it is started and is used to keep
2467      * track of whether a transaction is already in progress.
2468      */
2469     file->private_data = NULL;
2470     return 0;
2471 }
2472 
2473 /*
2474  * Used by /dev/btrfs-control for devices ioctls.
2475  */
2476 static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
2477                 unsigned long arg)
2478 {
2479     struct btrfs_ioctl_vol_args *vol;
2480     struct btrfs_device *device = NULL;
2481     dev_t devt = 0;
2482     int ret = -ENOTTY;
2483 
2484     if (!capable(CAP_SYS_ADMIN))
2485         return -EPERM;
2486 
2487     vol = memdup_user((void __user *)arg, sizeof(*vol));
2488     if (IS_ERR(vol))
2489         return PTR_ERR(vol);
2490     vol->name[BTRFS_PATH_NAME_MAX] = '\0';
2491 
2492     switch (cmd) {
2493     case BTRFS_IOC_SCAN_DEV:
2494         mutex_lock(&uuid_mutex);
2495         device = btrfs_scan_one_device(vol->name, FMODE_READ,
2496                            &btrfs_root_fs_type);
2497         ret = PTR_ERR_OR_ZERO(device);
2498         mutex_unlock(&uuid_mutex);
2499         break;
2500     case BTRFS_IOC_FORGET_DEV:
2501         if (vol->name[0] != 0) {
2502             ret = lookup_bdev(vol->name, &devt);
2503             if (ret)
2504                 break;
2505         }
2506         ret = btrfs_forget_devices(devt);
2507         break;
2508     case BTRFS_IOC_DEVICES_READY:
2509         mutex_lock(&uuid_mutex);
2510         device = btrfs_scan_one_device(vol->name, FMODE_READ,
2511                            &btrfs_root_fs_type);
2512         if (IS_ERR(device)) {
2513             mutex_unlock(&uuid_mutex);
2514             ret = PTR_ERR(device);
2515             break;
2516         }
2517         ret = !(device->fs_devices->num_devices ==
2518             device->fs_devices->total_devices);
2519         mutex_unlock(&uuid_mutex);
2520         break;
2521     case BTRFS_IOC_GET_SUPPORTED_FEATURES:
2522         ret = btrfs_ioctl_get_supported_features((void __user*)arg);
2523         break;
2524     }
2525 
2526     kfree(vol);
2527     return ret;
2528 }
2529 
2530 static int btrfs_freeze(struct super_block *sb)
2531 {
2532     struct btrfs_trans_handle *trans;
2533     struct btrfs_fs_info *fs_info = btrfs_sb(sb);
2534     struct btrfs_root *root = fs_info->tree_root;
2535 
2536     set_bit(BTRFS_FS_FROZEN, &fs_info->flags);
2537     /*
2538      * We don't need a barrier here, we'll wait for any transaction that
2539      * could be in progress on other threads (and do delayed iputs that
2540      * we want to avoid on a frozen filesystem), or do the commit
2541      * ourselves.
2542      */
2543     trans = btrfs_attach_transaction_barrier(root);
2544     if (IS_ERR(trans)) {
2545         /* no transaction, don't bother */
2546         if (PTR_ERR(trans) == -ENOENT)
2547             return 0;
2548         return PTR_ERR(trans);
2549     }
2550     return btrfs_commit_transaction(trans);
2551 }
2552 
2553 static int btrfs_unfreeze(struct super_block *sb)
2554 {
2555     struct btrfs_fs_info *fs_info = btrfs_sb(sb);
2556 
2557     clear_bit(BTRFS_FS_FROZEN, &fs_info->flags);
2558     return 0;
2559 }
2560 
2561 static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
2562 {
2563     struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb);
2564 
2565     /*
2566      * There should be always a valid pointer in latest_dev, it may be stale
2567      * for a short moment in case it's being deleted but still valid until
2568      * the end of RCU grace period.
2569      */
2570     rcu_read_lock();
2571     seq_escape(m, rcu_str_deref(fs_info->fs_devices->latest_dev->name), " \t\n\\");
2572     rcu_read_unlock();
2573 
2574     return 0;
2575 }
2576 
2577 static const struct super_operations btrfs_super_ops = {
2578     .drop_inode = btrfs_drop_inode,
2579     .evict_inode    = btrfs_evict_inode,
2580     .put_super  = btrfs_put_super,
2581     .sync_fs    = btrfs_sync_fs,
2582     .show_options   = btrfs_show_options,
2583     .show_devname   = btrfs_show_devname,
2584     .alloc_inode    = btrfs_alloc_inode,
2585     .destroy_inode  = btrfs_destroy_inode,
2586     .free_inode = btrfs_free_inode,
2587     .statfs     = btrfs_statfs,
2588     .remount_fs = btrfs_remount,
2589     .freeze_fs  = btrfs_freeze,
2590     .unfreeze_fs    = btrfs_unfreeze,
2591 };
2592 
2593 static const struct file_operations btrfs_ctl_fops = {
2594     .open = btrfs_control_open,
2595     .unlocked_ioctl  = btrfs_control_ioctl,
2596     .compat_ioctl = compat_ptr_ioctl,
2597     .owner   = THIS_MODULE,
2598     .llseek = noop_llseek,
2599 };
2600 
2601 static struct miscdevice btrfs_misc = {
2602     .minor      = BTRFS_MINOR,
2603     .name       = "btrfs-control",
2604     .fops       = &btrfs_ctl_fops
2605 };
2606 
2607 MODULE_ALIAS_MISCDEV(BTRFS_MINOR);
2608 MODULE_ALIAS("devname:btrfs-control");
2609 
2610 static int __init btrfs_interface_init(void)
2611 {
2612     return misc_register(&btrfs_misc);
2613 }
2614 
2615 static __cold void btrfs_interface_exit(void)
2616 {
2617     misc_deregister(&btrfs_misc);
2618 }
2619 
2620 static void __init btrfs_print_mod_info(void)
2621 {
2622     static const char options[] = ""
2623 #ifdef CONFIG_BTRFS_DEBUG
2624             ", debug=on"
2625 #endif
2626 #ifdef CONFIG_BTRFS_ASSERT
2627             ", assert=on"
2628 #endif
2629 #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
2630             ", integrity-checker=on"
2631 #endif
2632 #ifdef CONFIG_BTRFS_FS_REF_VERIFY
2633             ", ref-verify=on"
2634 #endif
2635 #ifdef CONFIG_BLK_DEV_ZONED
2636             ", zoned=yes"
2637 #else
2638             ", zoned=no"
2639 #endif
2640 #ifdef CONFIG_FS_VERITY
2641             ", fsverity=yes"
2642 #else
2643             ", fsverity=no"
2644 #endif
2645             ;
2646     pr_info("Btrfs loaded, crc32c=%s%s\n", crc32c_impl(), options);
2647 }
2648 
2649 static int __init init_btrfs_fs(void)
2650 {
2651     int err;
2652 
2653     btrfs_props_init();
2654 
2655     err = btrfs_init_sysfs();
2656     if (err)
2657         return err;
2658 
2659     btrfs_init_compress();
2660 
2661     err = btrfs_init_cachep();
2662     if (err)
2663         goto free_compress;
2664 
2665     err = extent_io_init();
2666     if (err)
2667         goto free_cachep;
2668 
2669     err = extent_state_cache_init();
2670     if (err)
2671         goto free_extent_io;
2672 
2673     err = extent_map_init();
2674     if (err)
2675         goto free_extent_state_cache;
2676 
2677     err = ordered_data_init();
2678     if (err)
2679         goto free_extent_map;
2680 
2681     err = btrfs_delayed_inode_init();
2682     if (err)
2683         goto free_ordered_data;
2684 
2685     err = btrfs_auto_defrag_init();
2686     if (err)
2687         goto free_delayed_inode;
2688 
2689     err = btrfs_delayed_ref_init();
2690     if (err)
2691         goto free_auto_defrag;
2692 
2693     err = btrfs_prelim_ref_init();
2694     if (err)
2695         goto free_delayed_ref;
2696 
2697     err = btrfs_interface_init();
2698     if (err)
2699         goto free_prelim_ref;
2700 
2701     btrfs_print_mod_info();
2702 
2703     err = btrfs_run_sanity_tests();
2704     if (err)
2705         goto unregister_ioctl;
2706 
2707     err = register_filesystem(&btrfs_fs_type);
2708     if (err)
2709         goto unregister_ioctl;
2710 
2711     return 0;
2712 
2713 unregister_ioctl:
2714     btrfs_interface_exit();
2715 free_prelim_ref:
2716     btrfs_prelim_ref_exit();
2717 free_delayed_ref:
2718     btrfs_delayed_ref_exit();
2719 free_auto_defrag:
2720     btrfs_auto_defrag_exit();
2721 free_delayed_inode:
2722     btrfs_delayed_inode_exit();
2723 free_ordered_data:
2724     ordered_data_exit();
2725 free_extent_map:
2726     extent_map_exit();
2727 free_extent_state_cache:
2728     extent_state_cache_exit();
2729 free_extent_io:
2730     extent_io_exit();
2731 free_cachep:
2732     btrfs_destroy_cachep();
2733 free_compress:
2734     btrfs_exit_compress();
2735     btrfs_exit_sysfs();
2736 
2737     return err;
2738 }
2739 
2740 static void __exit exit_btrfs_fs(void)
2741 {
2742     btrfs_destroy_cachep();
2743     btrfs_delayed_ref_exit();
2744     btrfs_auto_defrag_exit();
2745     btrfs_delayed_inode_exit();
2746     btrfs_prelim_ref_exit();
2747     ordered_data_exit();
2748     extent_map_exit();
2749     extent_state_cache_exit();
2750     extent_io_exit();
2751     btrfs_interface_exit();
2752     unregister_filesystem(&btrfs_fs_type);
2753     btrfs_exit_sysfs();
2754     btrfs_cleanup_fs_uuids();
2755     btrfs_exit_compress();
2756 }
2757 
2758 late_initcall(init_btrfs_fs);
2759 module_exit(exit_btrfs_fs)
2760 
2761 MODULE_LICENSE("GPL");
2762 MODULE_SOFTDEP("pre: crc32c");
2763 MODULE_SOFTDEP("pre: xxhash64");
2764 MODULE_SOFTDEP("pre: sha256");
2765 MODULE_SOFTDEP("pre: blake2b-256");