fs/xfs/xfs_super.c

0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * Copyright (c) 2000-2006 Silicon Graphics, Inc.
0004  * All Rights Reserved.
0005  */
0006
0007 #include "xfs.h"
0008 #include "xfs_shared.h"
0009 #include "xfs_format.h"
0010 #include "xfs_log_format.h"
0011 #include "xfs_trans_resv.h"
0012 #include "xfs_sb.h"
0013 #include "xfs_mount.h"
0014 #include "xfs_inode.h"
0015 #include "xfs_btree.h"
0016 #include "xfs_bmap.h"
0017 #include "xfs_alloc.h"
0018 #include "xfs_fsops.h"
0019 #include "xfs_trans.h"
0020 #include "xfs_buf_item.h"
0021 #include "xfs_log.h"
0022 #include "xfs_log_priv.h"
0023 #include "xfs_dir2.h"
0024 #include "xfs_extfree_item.h"
0025 #include "xfs_mru_cache.h"
0026 #include "xfs_inode_item.h"
0027 #include "xfs_icache.h"
0028 #include "xfs_trace.h"
0029 #include "xfs_icreate_item.h"
0030 #include "xfs_filestream.h"
0031 #include "xfs_quota.h"
0032 #include "xfs_sysfs.h"
0033 #include "xfs_ondisk.h"
0034 #include "xfs_rmap_item.h"
0035 #include "xfs_refcount_item.h"
0036 #include "xfs_bmap_item.h"
0037 #include "xfs_reflink.h"
0038 #include "xfs_pwork.h"
0039 #include "xfs_ag.h"
0040 #include "xfs_defer.h"
0041 #include "xfs_attr_item.h"
0042 #include "xfs_xattr.h"
0043 #include "xfs_iunlink_item.h"
0044
0045 #include <linux/magic.h>
0046 #include <linux/fs_context.h>
0047 #include <linux/fs_parser.h>
0048
0049 static const struct super_operations xfs_super_operations;
0050
0051 static struct kset *xfs_kset;       /* top-level xfs sysfs dir */
0052 #ifdef DEBUG
0053 static struct xfs_kobj xfs_dbg_kobj;    /* global debug sysfs attrs */
0054 #endif
0055
0056 #ifdef CONFIG_HOTPLUG_CPU
0057 static LIST_HEAD(xfs_mount_list);
0058 static DEFINE_SPINLOCK(xfs_mount_list_lock);
0059
0060 static inline void xfs_mount_list_add(struct xfs_mount *mp)
0061 {
0062     spin_lock(&xfs_mount_list_lock);
0063     list_add(&mp->m_mount_list, &xfs_mount_list);
0064     spin_unlock(&xfs_mount_list_lock);
0065 }
0066
0067 static inline void xfs_mount_list_del(struct xfs_mount *mp)
0068 {
0069     spin_lock(&xfs_mount_list_lock);
0070     list_del(&mp->m_mount_list);
0071     spin_unlock(&xfs_mount_list_lock);
0072 }
0073 #else /* !CONFIG_HOTPLUG_CPU */
0074 static inline void xfs_mount_list_add(struct xfs_mount *mp) {}
0075 static inline void xfs_mount_list_del(struct xfs_mount *mp) {}
0076 #endif
0077
0078 enum xfs_dax_mode {
0079     XFS_DAX_INODE = 0,
0080     XFS_DAX_ALWAYS = 1,
0081     XFS_DAX_NEVER = 2,
0082 };
0083
0084 static void
0085 xfs_mount_set_dax_mode(
0086     struct xfs_mount    *mp,
0087     enum xfs_dax_mode   mode)
0088 {
0089     switch (mode) {
0090     case XFS_DAX_INODE:
0091         mp->m_features &= ~(XFS_FEAT_DAX_ALWAYS | XFS_FEAT_DAX_NEVER);
0092         break;
0093     case XFS_DAX_ALWAYS:
0094         mp->m_features |= XFS_FEAT_DAX_ALWAYS;
0095         mp->m_features &= ~XFS_FEAT_DAX_NEVER;
0096         break;
0097     case XFS_DAX_NEVER:
0098         mp->m_features |= XFS_FEAT_DAX_NEVER;
0099         mp->m_features &= ~XFS_FEAT_DAX_ALWAYS;
0100         break;
0101     }
0102 }
0103
0104 static const struct constant_table dax_param_enums[] = {
0105     {"inode",   XFS_DAX_INODE },
0106     {"always",  XFS_DAX_ALWAYS },
0107     {"never",   XFS_DAX_NEVER },
0108     {}
0109 };
0110
0111 /*
0112  * Table driven mount option parser.
0113  */
0114 enum {
0115     Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev,
0116     Opt_wsync, Opt_noalign, Opt_swalloc, Opt_sunit, Opt_swidth, Opt_nouuid,
0117     Opt_grpid, Opt_nogrpid, Opt_bsdgroups, Opt_sysvgroups,
0118     Opt_allocsize, Opt_norecovery, Opt_inode64, Opt_inode32, Opt_ikeep,
0119     Opt_noikeep, Opt_largeio, Opt_nolargeio, Opt_attr2, Opt_noattr2,
0120     Opt_filestreams, Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota,
0121     Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota,
0122     Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce,
0123     Opt_discard, Opt_nodiscard, Opt_dax, Opt_dax_enum,
0124 };
0125
0126 static const struct fs_parameter_spec xfs_fs_parameters[] = {
0127     fsparam_u32("logbufs",      Opt_logbufs),
0128     fsparam_string("logbsize",  Opt_logbsize),
0129     fsparam_string("logdev",    Opt_logdev),
0130     fsparam_string("rtdev",     Opt_rtdev),
0131     fsparam_flag("wsync",       Opt_wsync),
0132     fsparam_flag("noalign",     Opt_noalign),
0133     fsparam_flag("swalloc",     Opt_swalloc),
0134     fsparam_u32("sunit",        Opt_sunit),
0135     fsparam_u32("swidth",       Opt_swidth),
0136     fsparam_flag("nouuid",      Opt_nouuid),
0137     fsparam_flag("grpid",       Opt_grpid),
0138     fsparam_flag("nogrpid",     Opt_nogrpid),
0139     fsparam_flag("bsdgroups",   Opt_bsdgroups),
0140     fsparam_flag("sysvgroups",  Opt_sysvgroups),
0141     fsparam_string("allocsize", Opt_allocsize),
0142     fsparam_flag("norecovery",  Opt_norecovery),
0143     fsparam_flag("inode64",     Opt_inode64),
0144     fsparam_flag("inode32",     Opt_inode32),
0145     fsparam_flag("ikeep",       Opt_ikeep),
0146     fsparam_flag("noikeep",     Opt_noikeep),
0147     fsparam_flag("largeio",     Opt_largeio),
0148     fsparam_flag("nolargeio",   Opt_nolargeio),
0149     fsparam_flag("attr2",       Opt_attr2),
0150     fsparam_flag("noattr2",     Opt_noattr2),
0151     fsparam_flag("filestreams", Opt_filestreams),
0152     fsparam_flag("quota",       Opt_quota),
0153     fsparam_flag("noquota",     Opt_noquota),
0154     fsparam_flag("usrquota",    Opt_usrquota),
0155     fsparam_flag("grpquota",    Opt_grpquota),
0156     fsparam_flag("prjquota",    Opt_prjquota),
0157     fsparam_flag("uquota",      Opt_uquota),
0158     fsparam_flag("gquota",      Opt_gquota),
0159     fsparam_flag("pquota",      Opt_pquota),
0160     fsparam_flag("uqnoenforce", Opt_uqnoenforce),
0161     fsparam_flag("gqnoenforce", Opt_gqnoenforce),
0162     fsparam_flag("pqnoenforce", Opt_pqnoenforce),
0163     fsparam_flag("qnoenforce",  Opt_qnoenforce),
0164     fsparam_flag("discard",     Opt_discard),
0165     fsparam_flag("nodiscard",   Opt_nodiscard),
0166     fsparam_flag("dax",     Opt_dax),
0167     fsparam_enum("dax",     Opt_dax_enum, dax_param_enums),
0168     {}
0169 };
0170
0171 struct proc_xfs_info {
0172     uint64_t    flag;
0173     char        *str;
0174 };
0175
0176 static int
0177 xfs_fs_show_options(
0178     struct seq_file     *m,
0179     struct dentry       *root)
0180 {
0181     static struct proc_xfs_info xfs_info_set[] = {
0182         /* the few simple ones we can get from the mount struct */
0183         { XFS_FEAT_IKEEP,       ",ikeep" },
0184         { XFS_FEAT_WSYNC,       ",wsync" },
0185         { XFS_FEAT_NOALIGN,     ",noalign" },
0186         { XFS_FEAT_SWALLOC,     ",swalloc" },
0187         { XFS_FEAT_NOUUID,      ",nouuid" },
0188         { XFS_FEAT_NORECOVERY,      ",norecovery" },
0189         { XFS_FEAT_ATTR2,       ",attr2" },
0190         { XFS_FEAT_FILESTREAMS,     ",filestreams" },
0191         { XFS_FEAT_GRPID,       ",grpid" },
0192         { XFS_FEAT_DISCARD,     ",discard" },
0193         { XFS_FEAT_LARGE_IOSIZE,    ",largeio" },
0194         { XFS_FEAT_DAX_ALWAYS,      ",dax=always" },
0195         { XFS_FEAT_DAX_NEVER,       ",dax=never" },
0196         { 0, NULL }
0197     };
0198     struct xfs_mount    *mp = XFS_M(root->d_sb);
0199     struct proc_xfs_info    *xfs_infop;
0200
0201     for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) {
0202         if (mp->m_features & xfs_infop->flag)
0203             seq_puts(m, xfs_infop->str);
0204     }
0205
0206     seq_printf(m, ",inode%d", xfs_has_small_inums(mp) ? 32 : 64);
0207
0208     if (xfs_has_allocsize(mp))
0209         seq_printf(m, ",allocsize=%dk",
0210                (1 << mp->m_allocsize_log) >> 10);
0211
0212     if (mp->m_logbufs > 0)
0213         seq_printf(m, ",logbufs=%d", mp->m_logbufs);
0214     if (mp->m_logbsize > 0)
0215         seq_printf(m, ",logbsize=%dk", mp->m_logbsize >> 10);
0216
0217     if (mp->m_logname)
0218         seq_show_option(m, "logdev", mp->m_logname);
0219     if (mp->m_rtname)
0220         seq_show_option(m, "rtdev", mp->m_rtname);
0221
0222     if (mp->m_dalign > 0)
0223         seq_printf(m, ",sunit=%d",
0224                 (int)XFS_FSB_TO_BB(mp, mp->m_dalign));
0225     if (mp->m_swidth > 0)
0226         seq_printf(m, ",swidth=%d",
0227                 (int)XFS_FSB_TO_BB(mp, mp->m_swidth));
0228
0229     if (mp->m_qflags & XFS_UQUOTA_ENFD)
0230         seq_puts(m, ",usrquota");
0231     else if (mp->m_qflags & XFS_UQUOTA_ACCT)
0232         seq_puts(m, ",uqnoenforce");
0233
0234     if (mp->m_qflags & XFS_PQUOTA_ENFD)
0235         seq_puts(m, ",prjquota");
0236     else if (mp->m_qflags & XFS_PQUOTA_ACCT)
0237         seq_puts(m, ",pqnoenforce");
0238
0239     if (mp->m_qflags & XFS_GQUOTA_ENFD)
0240         seq_puts(m, ",grpquota");
0241     else if (mp->m_qflags & XFS_GQUOTA_ACCT)
0242         seq_puts(m, ",gqnoenforce");
0243
0244     if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
0245         seq_puts(m, ",noquota");
0246
0247     return 0;
0248 }
0249
0250 /*
0251  * Set parameters for inode allocation heuristics, taking into account
0252  * filesystem size and inode32/inode64 mount options; i.e. specifically
0253  * whether or not XFS_FEAT_SMALL_INUMS is set.
0254  *
0255  * Inode allocation patterns are altered only if inode32 is requested
0256  * (XFS_FEAT_SMALL_INUMS), and the filesystem is sufficiently large.
0257  * If altered, XFS_OPSTATE_INODE32 is set as well.
0258  *
0259  * An agcount independent of that in the mount structure is provided
0260  * because in the growfs case, mp->m_sb.sb_agcount is not yet updated
0261  * to the potentially higher ag count.
0262  *
0263  * Returns the maximum AG index which may contain inodes.
0264  */
0265 xfs_agnumber_t
0266 xfs_set_inode_alloc(
0267     struct xfs_mount *mp,
0268     xfs_agnumber_t  agcount)
0269 {
0270     xfs_agnumber_t  index;
0271     xfs_agnumber_t  maxagi = 0;
0272     xfs_sb_t    *sbp = &mp->m_sb;
0273     xfs_agnumber_t  max_metadata;
0274     xfs_agino_t agino;
0275     xfs_ino_t   ino;
0276
0277     /*
0278      * Calculate how much should be reserved for inodes to meet
0279      * the max inode percentage.  Used only for inode32.
0280      */
0281     if (M_IGEO(mp)->maxicount) {
0282         uint64_t    icount;
0283
0284         icount = sbp->sb_dblocks * sbp->sb_imax_pct;
0285         do_div(icount, 100);
0286         icount += sbp->sb_agblocks - 1;
0287         do_div(icount, sbp->sb_agblocks);
0288         max_metadata = icount;
0289     } else {
0290         max_metadata = agcount;
0291     }
0292
0293     /* Get the last possible inode in the filesystem */
0294     agino = XFS_AGB_TO_AGINO(mp, sbp->sb_agblocks - 1);
0295     ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
0296
0297     /*
0298      * If user asked for no more than 32-bit inodes, and the fs is
0299      * sufficiently large, set XFS_OPSTATE_INODE32 if we must alter
0300      * the allocator to accommodate the request.
0301      */
0302     if (xfs_has_small_inums(mp) && ino > XFS_MAXINUMBER_32)
0303         set_bit(XFS_OPSTATE_INODE32, &mp->m_opstate);
0304     else
0305         clear_bit(XFS_OPSTATE_INODE32, &mp->m_opstate);
0306
0307     for (index = 0; index < agcount; index++) {
0308         struct xfs_perag    *pag;
0309
0310         ino = XFS_AGINO_TO_INO(mp, index, agino);
0311
0312         pag = xfs_perag_get(mp, index);
0313
0314         if (xfs_is_inode32(mp)) {
0315             if (ino > XFS_MAXINUMBER_32) {
0316                 pag->pagi_inodeok = 0;
0317                 pag->pagf_metadata = 0;
0318             } else {
0319                 pag->pagi_inodeok = 1;
0320                 maxagi++;
0321                 if (index < max_metadata)
0322                     pag->pagf_metadata = 1;
0323                 else
0324                     pag->pagf_metadata = 0;
0325             }
0326         } else {
0327             pag->pagi_inodeok = 1;
0328             pag->pagf_metadata = 0;
0329         }
0330
0331         xfs_perag_put(pag);
0332     }
0333
0334     return xfs_is_inode32(mp) ? maxagi : agcount;
0335 }
0336
0337 static int
0338 xfs_setup_dax_always(
0339     struct xfs_mount    *mp)
0340 {
0341     if (!mp->m_ddev_targp->bt_daxdev &&
0342         (!mp->m_rtdev_targp || !mp->m_rtdev_targp->bt_daxdev)) {
0343         xfs_alert(mp,
0344             "DAX unsupported by block device. Turning off DAX.");
0345         goto disable_dax;
0346     }
0347
0348     if (mp->m_super->s_blocksize != PAGE_SIZE) {
0349         xfs_alert(mp,
0350             "DAX not supported for blocksize. Turning off DAX.");
0351         goto disable_dax;
0352     }
0353
0354     if (xfs_has_reflink(mp) &&
0355         bdev_is_partition(mp->m_ddev_targp->bt_bdev)) {
0356         xfs_alert(mp,
0357             "DAX and reflink cannot work with multi-partitions!");
0358         return -EINVAL;
0359     }
0360
0361     xfs_warn(mp, "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
0362     return 0;
0363
0364 disable_dax:
0365     xfs_mount_set_dax_mode(mp, XFS_DAX_NEVER);
0366     return 0;
0367 }
0368
0369 STATIC int
0370 xfs_blkdev_get(
0371     xfs_mount_t     *mp,
0372     const char      *name,
0373     struct block_device **bdevp)
0374 {
0375     int         error = 0;
0376
0377     *bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
0378                     mp);
0379     if (IS_ERR(*bdevp)) {
0380         error = PTR_ERR(*bdevp);
0381         xfs_warn(mp, "Invalid device [%s], error=%d", name, error);
0382     }
0383
0384     return error;
0385 }
0386
0387 STATIC void
0388 xfs_blkdev_put(
0389     struct block_device *bdev)
0390 {
0391     if (bdev)
0392         blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
0393 }
0394
0395 STATIC void
0396 xfs_close_devices(
0397     struct xfs_mount    *mp)
0398 {
0399     if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
0400         struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
0401
0402         xfs_free_buftarg(mp->m_logdev_targp);
0403         xfs_blkdev_put(logdev);
0404     }
0405     if (mp->m_rtdev_targp) {
0406         struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
0407
0408         xfs_free_buftarg(mp->m_rtdev_targp);
0409         xfs_blkdev_put(rtdev);
0410     }
0411     xfs_free_buftarg(mp->m_ddev_targp);
0412 }
0413
0414 /*
0415  * The file system configurations are:
0416  *  (1) device (partition) with data and internal log
0417  *  (2) logical volume with data and log subvolumes.
0418  *  (3) logical volume with data, log, and realtime subvolumes.
0419  *
0420  * We only have to handle opening the log and realtime volumes here if
0421  * they are present.  The data subvolume has already been opened by
0422  * get_sb_bdev() and is stored in sb->s_bdev.
0423  */
0424 STATIC int
0425 xfs_open_devices(
0426     struct xfs_mount    *mp)
0427 {
0428     struct block_device *ddev = mp->m_super->s_bdev;
0429     struct block_device *logdev = NULL, *rtdev = NULL;
0430     int         error;
0431
0432     /*
0433      * Open real time and log devices - order is important.
0434      */
0435     if (mp->m_logname) {
0436         error = xfs_blkdev_get(mp, mp->m_logname, &logdev);
0437         if (error)
0438             return error;
0439     }
0440
0441     if (mp->m_rtname) {
0442         error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev);
0443         if (error)
0444             goto out_close_logdev;
0445
0446         if (rtdev == ddev || rtdev == logdev) {
0447             xfs_warn(mp,
0448     "Cannot mount filesystem with identical rtdev and ddev/logdev.");
0449             error = -EINVAL;
0450             goto out_close_rtdev;
0451         }
0452     }
0453
0454     /*
0455      * Setup xfs_mount buffer target pointers
0456      */
0457     error = -ENOMEM;
0458     mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev);
0459     if (!mp->m_ddev_targp)
0460         goto out_close_rtdev;
0461
0462     if (rtdev) {
0463         mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev);
0464         if (!mp->m_rtdev_targp)
0465             goto out_free_ddev_targ;
0466     }
0467
0468     if (logdev && logdev != ddev) {
0469         mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev);
0470         if (!mp->m_logdev_targp)
0471             goto out_free_rtdev_targ;
0472     } else {
0473         mp->m_logdev_targp = mp->m_ddev_targp;
0474     }
0475
0476     return 0;
0477
0478  out_free_rtdev_targ:
0479     if (mp->m_rtdev_targp)
0480         xfs_free_buftarg(mp->m_rtdev_targp);
0481  out_free_ddev_targ:
0482     xfs_free_buftarg(mp->m_ddev_targp);
0483  out_close_rtdev:
0484     xfs_blkdev_put(rtdev);
0485  out_close_logdev:
0486     if (logdev && logdev != ddev)
0487         xfs_blkdev_put(logdev);
0488     return error;
0489 }
0490
0491 /*
0492  * Setup xfs_mount buffer target pointers based on superblock
0493  */
0494 STATIC int
0495 xfs_setup_devices(
0496     struct xfs_mount    *mp)
0497 {
0498     int         error;
0499
0500     error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_sectsize);
0501     if (error)
0502         return error;
0503
0504     if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
0505         unsigned int    log_sector_size = BBSIZE;
0506
0507         if (xfs_has_sector(mp))
0508             log_sector_size = mp->m_sb.sb_logsectsize;
0509         error = xfs_setsize_buftarg(mp->m_logdev_targp,
0510                         log_sector_size);
0511         if (error)
0512             return error;
0513     }
0514     if (mp->m_rtdev_targp) {
0515         error = xfs_setsize_buftarg(mp->m_rtdev_targp,
0516                         mp->m_sb.sb_sectsize);
0517         if (error)
0518             return error;
0519     }
0520
0521     return 0;
0522 }
0523
0524 STATIC int
0525 xfs_init_mount_workqueues(
0526     struct xfs_mount    *mp)
0527 {
0528     mp->m_buf_workqueue = alloc_workqueue("xfs-buf/%s",
0529             XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM),
0530             1, mp->m_super->s_id);
0531     if (!mp->m_buf_workqueue)
0532         goto out;
0533
0534     mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s",
0535             XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM),
0536             0, mp->m_super->s_id);
0537     if (!mp->m_unwritten_workqueue)
0538         goto out_destroy_buf;
0539
0540     mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s",
0541             XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM),
0542             0, mp->m_super->s_id);
0543     if (!mp->m_reclaim_workqueue)
0544         goto out_destroy_unwritten;
0545
0546     mp->m_blockgc_wq = alloc_workqueue("xfs-blockgc/%s",
0547             XFS_WQFLAGS(WQ_UNBOUND | WQ_FREEZABLE | WQ_MEM_RECLAIM),
0548             0, mp->m_super->s_id);
0549     if (!mp->m_blockgc_wq)
0550         goto out_destroy_reclaim;
0551
0552     mp->m_inodegc_wq = alloc_workqueue("xfs-inodegc/%s",
0553             XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM),
0554             1, mp->m_super->s_id);
0555     if (!mp->m_inodegc_wq)
0556         goto out_destroy_blockgc;
0557
0558     mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s",
0559             XFS_WQFLAGS(WQ_FREEZABLE), 0, mp->m_super->s_id);
0560     if (!mp->m_sync_workqueue)
0561         goto out_destroy_inodegc;
0562
0563     return 0;
0564
0565 out_destroy_inodegc:
0566     destroy_workqueue(mp->m_inodegc_wq);
0567 out_destroy_blockgc:
0568     destroy_workqueue(mp->m_blockgc_wq);
0569 out_destroy_reclaim:
0570     destroy_workqueue(mp->m_reclaim_workqueue);
0571 out_destroy_unwritten:
0572     destroy_workqueue(mp->m_unwritten_workqueue);
0573 out_destroy_buf:
0574     destroy_workqueue(mp->m_buf_workqueue);
0575 out:
0576     return -ENOMEM;
0577 }
0578
0579 STATIC void
0580 xfs_destroy_mount_workqueues(
0581     struct xfs_mount    *mp)
0582 {
0583     destroy_workqueue(mp->m_sync_workqueue);
0584     destroy_workqueue(mp->m_blockgc_wq);
0585     destroy_workqueue(mp->m_inodegc_wq);
0586     destroy_workqueue(mp->m_reclaim_workqueue);
0587     destroy_workqueue(mp->m_unwritten_workqueue);
0588     destroy_workqueue(mp->m_buf_workqueue);
0589 }
0590
0591 static void
0592 xfs_flush_inodes_worker(
0593     struct work_struct  *work)
0594 {
0595     struct xfs_mount    *mp = container_of(work, struct xfs_mount,
0596                            m_flush_inodes_work);
0597     struct super_block  *sb = mp->m_super;
0598
0599     if (down_read_trylock(&sb->s_umount)) {
0600         sync_inodes_sb(sb);
0601         up_read(&sb->s_umount);
0602     }
0603 }
0604
0605 /*
0606  * Flush all dirty data to disk. Must not be called while holding an XFS_ILOCK
0607  * or a page lock. We use sync_inodes_sb() here to ensure we block while waiting
0608  * for IO to complete so that we effectively throttle multiple callers to the
0609  * rate at which IO is completing.
0610  */
0611 void
0612 xfs_flush_inodes(
0613     struct xfs_mount    *mp)
0614 {
0615     /*
0616      * If flush_work() returns true then that means we waited for a flush
0617      * which was already in progress.  Don't bother running another scan.
0618      */
0619     if (flush_work(&mp->m_flush_inodes_work))
0620         return;
0621
0622     queue_work(mp->m_sync_workqueue, &mp->m_flush_inodes_work);
0623     flush_work(&mp->m_flush_inodes_work);
0624 }
0625
0626 /* Catch misguided souls that try to use this interface on XFS */
0627 STATIC struct inode *
0628 xfs_fs_alloc_inode(
0629     struct super_block  *sb)
0630 {
0631     BUG();
0632     return NULL;
0633 }
0634
0635 /*
0636  * Now that the generic code is guaranteed not to be accessing
0637  * the linux inode, we can inactivate and reclaim the inode.
0638  */
0639 STATIC void
0640 xfs_fs_destroy_inode(
0641     struct inode        *inode)
0642 {
0643     struct xfs_inode    *ip = XFS_I(inode);
0644
0645     trace_xfs_destroy_inode(ip);
0646
0647     ASSERT(!rwsem_is_locked(&inode->i_rwsem));
0648     XFS_STATS_INC(ip->i_mount, vn_rele);
0649     XFS_STATS_INC(ip->i_mount, vn_remove);
0650     xfs_inode_mark_reclaimable(ip);
0651 }
0652
0653 static void
0654 xfs_fs_dirty_inode(
0655     struct inode            *inode,
0656     int             flag)
0657 {
0658     struct xfs_inode        *ip = XFS_I(inode);
0659     struct xfs_mount        *mp = ip->i_mount;
0660     struct xfs_trans        *tp;
0661
0662     if (!(inode->i_sb->s_flags & SB_LAZYTIME))
0663         return;
0664     if (flag != I_DIRTY_SYNC || !(inode->i_state & I_DIRTY_TIME))
0665         return;
0666
0667     if (xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp))
0668         return;
0669     xfs_ilock(ip, XFS_ILOCK_EXCL);
0670     xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
0671     xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP);
0672     xfs_trans_commit(tp);
0673 }
0674
0675 /*
0676  * Slab object creation initialisation for the XFS inode.
0677  * This covers only the idempotent fields in the XFS inode;
0678  * all other fields need to be initialised on allocation
0679  * from the slab. This avoids the need to repeatedly initialise
0680  * fields in the xfs inode that left in the initialise state
0681  * when freeing the inode.
0682  */
0683 STATIC void
0684 xfs_fs_inode_init_once(
0685     void            *inode)
0686 {
0687     struct xfs_inode    *ip = inode;
0688
0689     memset(ip, 0, sizeof(struct xfs_inode));
0690
0691     /* vfs inode */
0692     inode_init_once(VFS_I(ip));
0693
0694     /* xfs inode */
0695     atomic_set(&ip->i_pincount, 0);
0696     spin_lock_init(&ip->i_flags_lock);
0697
0698     mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
0699              "xfsino", ip->i_ino);
0700 }
0701
0702 /*
0703  * We do an unlocked check for XFS_IDONTCACHE here because we are already
0704  * serialised against cache hits here via the inode->i_lock and igrab() in
0705  * xfs_iget_cache_hit(). Hence a lookup that might clear this flag will not be
0706  * racing with us, and it avoids needing to grab a spinlock here for every inode
0707  * we drop the final reference on.
0708  */
0709 STATIC int
0710 xfs_fs_drop_inode(
0711     struct inode        *inode)
0712 {
0713     struct xfs_inode    *ip = XFS_I(inode);
0714
0715     /*
0716      * If this unlinked inode is in the middle of recovery, don't
0717      * drop the inode just yet; log recovery will take care of
0718      * that.  See the comment for this inode flag.
0719      */
0720     if (ip->i_flags & XFS_IRECOVERY) {
0721         ASSERT(xlog_recovery_needed(ip->i_mount->m_log));
0722         return 0;
0723     }
0724
0725     return generic_drop_inode(inode);
0726 }
0727
0728 static void
0729 xfs_mount_free(
0730     struct xfs_mount    *mp)
0731 {
0732     kfree(mp->m_rtname);
0733     kfree(mp->m_logname);
0734     kmem_free(mp);
0735 }
0736
0737 STATIC int
0738 xfs_fs_sync_fs(
0739     struct super_block  *sb,
0740     int         wait)
0741 {
0742     struct xfs_mount    *mp = XFS_M(sb);
0743     int         error;
0744
0745     trace_xfs_fs_sync_fs(mp, __return_address);
0746
0747     /*
0748      * Doing anything during the async pass would be counterproductive.
0749      */
0750     if (!wait)
0751         return 0;
0752
0753     error = xfs_log_force(mp, XFS_LOG_SYNC);
0754     if (error)
0755         return error;
0756
0757     if (laptop_mode) {
0758         /*
0759          * The disk must be active because we're syncing.
0760          * We schedule log work now (now that the disk is
0761          * active) instead of later (when it might not be).
0762          */
0763         flush_delayed_work(&mp->m_log->l_work);
0764     }
0765
0766     /*
0767      * If we are called with page faults frozen out, it means we are about
0768      * to freeze the transaction subsystem. Take the opportunity to shut
0769      * down inodegc because once SB_FREEZE_FS is set it's too late to
0770      * prevent inactivation races with freeze. The fs doesn't get called
0771      * again by the freezing process until after SB_FREEZE_FS has been set,
0772      * so it's now or never.  Same logic applies to speculative allocation
0773      * garbage collection.
0774      *
0775      * We don't care if this is a normal syncfs call that does this or
0776      * freeze that does this - we can run this multiple times without issue
0777      * and we won't race with a restart because a restart can only occur
0778      * when the state is either SB_FREEZE_FS or SB_FREEZE_COMPLETE.
0779      */
0780     if (sb->s_writers.frozen == SB_FREEZE_PAGEFAULT) {
0781         xfs_inodegc_stop(mp);
0782         xfs_blockgc_stop(mp);
0783     }
0784
0785     return 0;
0786 }
0787
0788 STATIC int
0789 xfs_fs_statfs(
0790     struct dentry       *dentry,
0791     struct kstatfs      *statp)
0792 {
0793     struct xfs_mount    *mp = XFS_M(dentry->d_sb);
0794     xfs_sb_t        *sbp = &mp->m_sb;
0795     struct xfs_inode    *ip = XFS_I(d_inode(dentry));
0796     uint64_t        fakeinos, id;
0797     uint64_t        icount;
0798     uint64_t        ifree;
0799     uint64_t        fdblocks;
0800     xfs_extlen_t        lsize;
0801     int64_t         ffree;
0802
0803     /*
0804      * Expedite background inodegc but don't wait. We do not want to block
0805      * here waiting hours for a billion extent file to be truncated.
0806      */
0807     xfs_inodegc_push(mp);
0808
0809     statp->f_type = XFS_SUPER_MAGIC;
0810     statp->f_namelen = MAXNAMELEN - 1;
0811
0812     id = huge_encode_dev(mp->m_ddev_targp->bt_dev);
0813     statp->f_fsid = u64_to_fsid(id);
0814
0815     icount = percpu_counter_sum(&mp->m_icount);
0816     ifree = percpu_counter_sum(&mp->m_ifree);
0817     fdblocks = percpu_counter_sum(&mp->m_fdblocks);
0818
0819     spin_lock(&mp->m_sb_lock);
0820     statp->f_bsize = sbp->sb_blocksize;
0821     lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0;
0822     statp->f_blocks = sbp->sb_dblocks - lsize;
0823     spin_unlock(&mp->m_sb_lock);
0824
0825     /* make sure statp->f_bfree does not underflow */
0826     statp->f_bfree = max_t(int64_t, 0,
0827                 fdblocks - xfs_fdblocks_unavailable(mp));
0828     statp->f_bavail = statp->f_bfree;
0829
0830     fakeinos = XFS_FSB_TO_INO(mp, statp->f_bfree);
0831     statp->f_files = min(icount + fakeinos, (uint64_t)XFS_MAXINUMBER);
0832     if (M_IGEO(mp)->maxicount)
0833         statp->f_files = min_t(typeof(statp->f_files),
0834                     statp->f_files,
0835                     M_IGEO(mp)->maxicount);
0836
0837     /* If sb_icount overshot maxicount, report actual allocation */
0838     statp->f_files = max_t(typeof(statp->f_files),
0839                     statp->f_files,
0840                     sbp->sb_icount);
0841
0842     /* make sure statp->f_ffree does not underflow */
0843     ffree = statp->f_files - (icount - ifree);
0844     statp->f_ffree = max_t(int64_t, ffree, 0);
0845
0846
0847     if ((ip->i_diflags & XFS_DIFLAG_PROJINHERIT) &&
0848         ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) ==
0849                   (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))
0850         xfs_qm_statvfs(ip, statp);
0851
0852     if (XFS_IS_REALTIME_MOUNT(mp) &&
0853         (ip->i_diflags & (XFS_DIFLAG_RTINHERIT | XFS_DIFLAG_REALTIME))) {
0854         s64 freertx;
0855
0856         statp->f_blocks = sbp->sb_rblocks;
0857         freertx = percpu_counter_sum_positive(&mp->m_frextents);
0858         statp->f_bavail = statp->f_bfree = freertx * sbp->sb_rextsize;
0859     }
0860
0861     return 0;
0862 }
0863
0864 STATIC void
0865 xfs_save_resvblks(struct xfs_mount *mp)
0866 {
0867     uint64_t resblks = 0;
0868
0869     mp->m_resblks_save = mp->m_resblks;
0870     xfs_reserve_blocks(mp, &resblks, NULL);
0871 }
0872
0873 STATIC void
0874 xfs_restore_resvblks(struct xfs_mount *mp)
0875 {
0876     uint64_t resblks;
0877
0878     if (mp->m_resblks_save) {
0879         resblks = mp->m_resblks_save;
0880         mp->m_resblks_save = 0;
0881     } else
0882         resblks = xfs_default_resblks(mp);
0883
0884     xfs_reserve_blocks(mp, &resblks, NULL);
0885 }
0886
0887 /*
0888  * Second stage of a freeze. The data is already frozen so we only
0889  * need to take care of the metadata. Once that's done sync the superblock
0890  * to the log to dirty it in case of a crash while frozen. This ensures that we
0891  * will recover the unlinked inode lists on the next mount.
0892  */
0893 STATIC int
0894 xfs_fs_freeze(
0895     struct super_block  *sb)
0896 {
0897     struct xfs_mount    *mp = XFS_M(sb);
0898     unsigned int        flags;
0899     int         ret;
0900
0901     /*
0902      * The filesystem is now frozen far enough that memory reclaim
0903      * cannot safely operate on the filesystem. Hence we need to
0904      * set a GFP_NOFS context here to avoid recursion deadlocks.
0905      */
0906     flags = memalloc_nofs_save();
0907     xfs_save_resvblks(mp);
0908     ret = xfs_log_quiesce(mp);
0909     memalloc_nofs_restore(flags);
0910
0911     /*
0912      * For read-write filesystems, we need to restart the inodegc on error
0913      * because we stopped it at SB_FREEZE_PAGEFAULT level and a thaw is not
0914      * going to be run to restart it now.  We are at SB_FREEZE_FS level
0915      * here, so we can restart safely without racing with a stop in
0916      * xfs_fs_sync_fs().
0917      */
0918     if (ret && !xfs_is_readonly(mp)) {
0919         xfs_blockgc_start(mp);
0920         xfs_inodegc_start(mp);
0921     }
0922
0923     return ret;
0924 }
0925
0926 STATIC int
0927 xfs_fs_unfreeze(
0928     struct super_block  *sb)
0929 {
0930     struct xfs_mount    *mp = XFS_M(sb);
0931
0932     xfs_restore_resvblks(mp);
0933     xfs_log_work_queue(mp);
0934
0935     /*
0936      * Don't reactivate the inodegc worker on a readonly filesystem because
0937      * inodes are sent directly to reclaim.  Don't reactivate the blockgc
0938      * worker because there are no speculative preallocations on a readonly
0939      * filesystem.
0940      */
0941     if (!xfs_is_readonly(mp)) {
0942         xfs_blockgc_start(mp);
0943         xfs_inodegc_start(mp);
0944     }
0945
0946     return 0;
0947 }
0948
0949 /*
0950  * This function fills in xfs_mount_t fields based on mount args.
0951  * Note: the superblock _has_ now been read in.
0952  */
0953 STATIC int
0954 xfs_finish_flags(
0955     struct xfs_mount    *mp)
0956 {
0957     /* Fail a mount where the logbuf is smaller than the log stripe */
0958     if (xfs_has_logv2(mp)) {
0959         if (mp->m_logbsize <= 0 &&
0960             mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) {
0961             mp->m_logbsize = mp->m_sb.sb_logsunit;
0962         } else if (mp->m_logbsize > 0 &&
0963                mp->m_logbsize < mp->m_sb.sb_logsunit) {
0964             xfs_warn(mp,
0965         "logbuf size must be greater than or equal to log stripe size");
0966             return -EINVAL;
0967         }
0968     } else {
0969         /* Fail a mount if the logbuf is larger than 32K */
0970         if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) {
0971             xfs_warn(mp,
0972         "logbuf size for version 1 logs must be 16K or 32K");
0973             return -EINVAL;
0974         }
0975     }
0976
0977     /*
0978      * V5 filesystems always use attr2 format for attributes.
0979      */
0980     if (xfs_has_crc(mp) && xfs_has_noattr2(mp)) {
0981         xfs_warn(mp, "Cannot mount a V5 filesystem as noattr2. "
0982                  "attr2 is always enabled for V5 filesystems.");
0983         return -EINVAL;
0984     }
0985
0986     /*
0987      * prohibit r/w mounts of read-only filesystems
0988      */
0989     if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !xfs_is_readonly(mp)) {
0990         xfs_warn(mp,
0991             "cannot mount a read-only filesystem as read-write");
0992         return -EROFS;
0993     }
0994
0995     if ((mp->m_qflags & XFS_GQUOTA_ACCT) &&
0996         (mp->m_qflags & XFS_PQUOTA_ACCT) &&
0997         !xfs_has_pquotino(mp)) {
0998         xfs_warn(mp,
0999           "Super block does not support project and group quota together");
1000         return -EINVAL;
1001     }
1002
1003     return 0;
1004 }
1005
1006 static int
1007 xfs_init_percpu_counters(
1008     struct xfs_mount    *mp)
1009 {
1010     int     error;
1011
1012     error = percpu_counter_init(&mp->m_icount, 0, GFP_KERNEL);
1013     if (error)
1014         return -ENOMEM;
1015
1016     error = percpu_counter_init(&mp->m_ifree, 0, GFP_KERNEL);
1017     if (error)
1018         goto free_icount;
1019
1020     error = percpu_counter_init(&mp->m_fdblocks, 0, GFP_KERNEL);
1021     if (error)
1022         goto free_ifree;
1023
1024     error = percpu_counter_init(&mp->m_delalloc_blks, 0, GFP_KERNEL);
1025     if (error)
1026         goto free_fdblocks;
1027
1028     error = percpu_counter_init(&mp->m_frextents, 0, GFP_KERNEL);
1029     if (error)
1030         goto free_delalloc;
1031
1032     return 0;
1033
1034 free_delalloc:
1035     percpu_counter_destroy(&mp->m_delalloc_blks);
1036 free_fdblocks:
1037     percpu_counter_destroy(&mp->m_fdblocks);
1038 free_ifree:
1039     percpu_counter_destroy(&mp->m_ifree);
1040 free_icount:
1041     percpu_counter_destroy(&mp->m_icount);
1042     return -ENOMEM;
1043 }
1044
1045 void
1046 xfs_reinit_percpu_counters(
1047     struct xfs_mount    *mp)
1048 {
1049     percpu_counter_set(&mp->m_icount, mp->m_sb.sb_icount);
1050     percpu_counter_set(&mp->m_ifree, mp->m_sb.sb_ifree);
1051     percpu_counter_set(&mp->m_fdblocks, mp->m_sb.sb_fdblocks);
1052     percpu_counter_set(&mp->m_frextents, mp->m_sb.sb_frextents);
1053 }
1054
1055 static void
1056 xfs_destroy_percpu_counters(
1057     struct xfs_mount    *mp)
1058 {
1059     percpu_counter_destroy(&mp->m_icount);
1060     percpu_counter_destroy(&mp->m_ifree);
1061     percpu_counter_destroy(&mp->m_fdblocks);
1062     ASSERT(xfs_is_shutdown(mp) ||
1063            percpu_counter_sum(&mp->m_delalloc_blks) == 0);
1064     percpu_counter_destroy(&mp->m_delalloc_blks);
1065     percpu_counter_destroy(&mp->m_frextents);
1066 }
1067
1068 static int
1069 xfs_inodegc_init_percpu(
1070     struct xfs_mount    *mp)
1071 {
1072     struct xfs_inodegc  *gc;
1073     int         cpu;
1074
1075     mp->m_inodegc = alloc_percpu(struct xfs_inodegc);
1076     if (!mp->m_inodegc)
1077         return -ENOMEM;
1078
1079     for_each_possible_cpu(cpu) {
1080         gc = per_cpu_ptr(mp->m_inodegc, cpu);
1081         init_llist_head(&gc->list);
1082         gc->items = 0;
1083         INIT_DELAYED_WORK(&gc->work, xfs_inodegc_worker);
1084     }
1085     return 0;
1086 }
1087
1088 static void
1089 xfs_inodegc_free_percpu(
1090     struct xfs_mount    *mp)
1091 {
1092     if (!mp->m_inodegc)
1093         return;
1094     free_percpu(mp->m_inodegc);
1095 }
1096
1097 static void
1098 xfs_fs_put_super(
1099     struct super_block  *sb)
1100 {
1101     struct xfs_mount    *mp = XFS_M(sb);
1102
1103     /* if ->fill_super failed, we have no mount to tear down */
1104     if (!sb->s_fs_info)
1105         return;
1106
1107     xfs_notice(mp, "Unmounting Filesystem");
1108     xfs_filestream_unmount(mp);
1109     xfs_unmountfs(mp);
1110
1111     xfs_freesb(mp);
1112     free_percpu(mp->m_stats.xs_stats);
1113     xfs_mount_list_del(mp);
1114     xfs_inodegc_free_percpu(mp);
1115     xfs_destroy_percpu_counters(mp);
1116     xfs_destroy_mount_workqueues(mp);
1117     xfs_close_devices(mp);
1118
1119     sb->s_fs_info = NULL;
1120     xfs_mount_free(mp);
1121 }
1122
1123 static long
1124 xfs_fs_nr_cached_objects(
1125     struct super_block  *sb,
1126     struct shrink_control   *sc)
1127 {
1128     /* Paranoia: catch incorrect calls during mount setup or teardown */
1129     if (WARN_ON_ONCE(!sb->s_fs_info))
1130         return 0;
1131     return xfs_reclaim_inodes_count(XFS_M(sb));
1132 }
1133
1134 static long
1135 xfs_fs_free_cached_objects(
1136     struct super_block  *sb,
1137     struct shrink_control   *sc)
1138 {
1139     return xfs_reclaim_inodes_nr(XFS_M(sb), sc->nr_to_scan);
1140 }
1141
1142 static const struct super_operations xfs_super_operations = {
1143     .alloc_inode        = xfs_fs_alloc_inode,
1144     .destroy_inode      = xfs_fs_destroy_inode,
1145     .dirty_inode        = xfs_fs_dirty_inode,
1146     .drop_inode     = xfs_fs_drop_inode,
1147     .put_super      = xfs_fs_put_super,
1148     .sync_fs        = xfs_fs_sync_fs,
1149     .freeze_fs      = xfs_fs_freeze,
1150     .unfreeze_fs        = xfs_fs_unfreeze,
1151     .statfs         = xfs_fs_statfs,
1152     .show_options       = xfs_fs_show_options,
1153     .nr_cached_objects  = xfs_fs_nr_cached_objects,
1154     .free_cached_objects    = xfs_fs_free_cached_objects,
1155 };
1156
1157 static int
1158 suffix_kstrtoint(
1159     const char  *s,
1160     unsigned int    base,
1161     int     *res)
1162 {
1163     int     last, shift_left_factor = 0, _res;
1164     char        *value;
1165     int     ret = 0;
1166
1167     value = kstrdup(s, GFP_KERNEL);
1168     if (!value)
1169         return -ENOMEM;
1170
1171     last = strlen(value) - 1;
1172     if (value[last] == 'K' || value[last] == 'k') {
1173         shift_left_factor = 10;
1174         value[last] = '\0';
1175     }
1176     if (value[last] == 'M' || value[last] == 'm') {
1177         shift_left_factor = 20;
1178         value[last] = '\0';
1179     }
1180     if (value[last] == 'G' || value[last] == 'g') {
1181         shift_left_factor = 30;
1182         value[last] = '\0';
1183     }
1184
1185     if (kstrtoint(value, base, &_res))
1186         ret = -EINVAL;
1187     kfree(value);
1188     *res = _res << shift_left_factor;
1189     return ret;
1190 }
1191
1192 static inline void
1193 xfs_fs_warn_deprecated(
1194     struct fs_context   *fc,
1195     struct fs_parameter *param,
1196     uint64_t        flag,
1197     bool            value)
1198 {
1199     /* Don't print the warning if reconfiguring and current mount point
1200      * already had the flag set
1201      */
1202     if ((fc->purpose & FS_CONTEXT_FOR_RECONFIGURE) &&
1203             !!(XFS_M(fc->root->d_sb)->m_features & flag) == value)
1204         return;
1205     xfs_warn(fc->s_fs_info, "%s mount option is deprecated.", param->key);
1206 }
1207
1208 /*
1209  * Set mount state from a mount option.
1210  *
1211  * NOTE: mp->m_super is NULL here!
1212  */
1213 static int
1214 xfs_fs_parse_param(
1215     struct fs_context   *fc,
1216     struct fs_parameter *param)
1217 {
1218     struct xfs_mount    *parsing_mp = fc->s_fs_info;
1219     struct fs_parse_result  result;
1220     int         size = 0;
1221     int         opt;
1222
1223     opt = fs_parse(fc, xfs_fs_parameters, param, &result);
1224     if (opt < 0)
1225         return opt;
1226
1227     switch (opt) {
1228     case Opt_logbufs:
1229         parsing_mp->m_logbufs = result.uint_32;
1230         return 0;
1231     case Opt_logbsize:
1232         if (suffix_kstrtoint(param->string, 10, &parsing_mp->m_logbsize))
1233             return -EINVAL;
1234         return 0;
1235     case Opt_logdev:
1236         kfree(parsing_mp->m_logname);
1237         parsing_mp->m_logname = kstrdup(param->string, GFP_KERNEL);
1238         if (!parsing_mp->m_logname)
1239             return -ENOMEM;
1240         return 0;
1241     case Opt_rtdev:
1242         kfree(parsing_mp->m_rtname);
1243         parsing_mp->m_rtname = kstrdup(param->string, GFP_KERNEL);
1244         if (!parsing_mp->m_rtname)
1245             return -ENOMEM;
1246         return 0;
1247     case Opt_allocsize:
1248         if (suffix_kstrtoint(param->string, 10, &size))
1249             return -EINVAL;
1250         parsing_mp->m_allocsize_log = ffs(size) - 1;
1251         parsing_mp->m_features |= XFS_FEAT_ALLOCSIZE;
1252         return 0;
1253     case Opt_grpid:
1254     case Opt_bsdgroups:
1255         parsing_mp->m_features |= XFS_FEAT_GRPID;
1256         return 0;
1257     case Opt_nogrpid:
1258     case Opt_sysvgroups:
1259         parsing_mp->m_features &= ~XFS_FEAT_GRPID;
1260         return 0;
1261     case Opt_wsync:
1262         parsing_mp->m_features |= XFS_FEAT_WSYNC;
1263         return 0;
1264     case Opt_norecovery:
1265         parsing_mp->m_features |= XFS_FEAT_NORECOVERY;
1266         return 0;
1267     case Opt_noalign:
1268         parsing_mp->m_features |= XFS_FEAT_NOALIGN;
1269         return 0;
1270     case Opt_swalloc:
1271         parsing_mp->m_features |= XFS_FEAT_SWALLOC;
1272         return 0;
1273     case Opt_sunit:
1274         parsing_mp->m_dalign = result.uint_32;
1275         return 0;
1276     case Opt_swidth:
1277         parsing_mp->m_swidth = result.uint_32;
1278         return 0;
1279     case Opt_inode32:
1280         parsing_mp->m_features |= XFS_FEAT_SMALL_INUMS;
1281         return 0;
1282     case Opt_inode64:
1283         parsing_mp->m_features &= ~XFS_FEAT_SMALL_INUMS;
1284         return 0;
1285     case Opt_nouuid:
1286         parsing_mp->m_features |= XFS_FEAT_NOUUID;
1287         return 0;
1288     case Opt_largeio:
1289         parsing_mp->m_features |= XFS_FEAT_LARGE_IOSIZE;
1290         return 0;
1291     case Opt_nolargeio:
1292         parsing_mp->m_features &= ~XFS_FEAT_LARGE_IOSIZE;
1293         return 0;
1294     case Opt_filestreams:
1295         parsing_mp->m_features |= XFS_FEAT_FILESTREAMS;
1296         return 0;
1297     case Opt_noquota:
1298         parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT;
1299         parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD;
1300         return 0;
1301     case Opt_quota:
1302     case Opt_uquota:
1303     case Opt_usrquota:
1304         parsing_mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ENFD);
1305         return 0;
1306     case Opt_qnoenforce:
1307     case Opt_uqnoenforce:
1308         parsing_mp->m_qflags |= XFS_UQUOTA_ACCT;
1309         parsing_mp->m_qflags &= ~XFS_UQUOTA_ENFD;
1310         return 0;
1311     case Opt_pquota:
1312     case Opt_prjquota:
1313         parsing_mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ENFD);
1314         return 0;
1315     case Opt_pqnoenforce:
1316         parsing_mp->m_qflags |= XFS_PQUOTA_ACCT;
1317         parsing_mp->m_qflags &= ~XFS_PQUOTA_ENFD;
1318         return 0;
1319     case Opt_gquota:
1320     case Opt_grpquota:
1321         parsing_mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ENFD);
1322         return 0;
1323     case Opt_gqnoenforce:
1324         parsing_mp->m_qflags |= XFS_GQUOTA_ACCT;
1325         parsing_mp->m_qflags &= ~XFS_GQUOTA_ENFD;
1326         return 0;
1327     case Opt_discard:
1328         parsing_mp->m_features |= XFS_FEAT_DISCARD;
1329         return 0;
1330     case Opt_nodiscard:
1331         parsing_mp->m_features &= ~XFS_FEAT_DISCARD;
1332         return 0;
1333 #ifdef CONFIG_FS_DAX
1334     case Opt_dax:
1335         xfs_mount_set_dax_mode(parsing_mp, XFS_DAX_ALWAYS);
1336         return 0;
1337     case Opt_dax_enum:
1338         xfs_mount_set_dax_mode(parsing_mp, result.uint_32);
1339         return 0;
1340 #endif
1341     /* Following mount options will be removed in September 2025 */
1342     case Opt_ikeep:
1343         xfs_fs_warn_deprecated(fc, param, XFS_FEAT_IKEEP, true);
1344         parsing_mp->m_features |= XFS_FEAT_IKEEP;
1345         return 0;
1346     case Opt_noikeep:
1347         xfs_fs_warn_deprecated(fc, param, XFS_FEAT_IKEEP, false);
1348         parsing_mp->m_features &= ~XFS_FEAT_IKEEP;
1349         return 0;
1350     case Opt_attr2:
1351         xfs_fs_warn_deprecated(fc, param, XFS_FEAT_ATTR2, true);
1352         parsing_mp->m_features |= XFS_FEAT_ATTR2;
1353         return 0;
1354     case Opt_noattr2:
1355         xfs_fs_warn_deprecated(fc, param, XFS_FEAT_NOATTR2, true);
1356         parsing_mp->m_features |= XFS_FEAT_NOATTR2;
1357         return 0;
1358     default:
1359         xfs_warn(parsing_mp, "unknown mount option [%s].", param->key);
1360         return -EINVAL;
1361     }
1362
1363     return 0;
1364 }
1365
1366 static int
1367 xfs_fs_validate_params(
1368     struct xfs_mount    *mp)
1369 {
1370     /* No recovery flag requires a read-only mount */
1371     if (xfs_has_norecovery(mp) && !xfs_is_readonly(mp)) {
1372         xfs_warn(mp, "no-recovery mounts must be read-only.");
1373         return -EINVAL;
1374     }
1375
1376     /*
1377      * We have not read the superblock at this point, so only the attr2
1378      * mount option can set the attr2 feature by this stage.
1379      */
1380     if (xfs_has_attr2(mp) && xfs_has_noattr2(mp)) {
1381         xfs_warn(mp, "attr2 and noattr2 cannot both be specified.");
1382         return -EINVAL;
1383     }
1384
1385
1386     if (xfs_has_noalign(mp) && (mp->m_dalign || mp->m_swidth)) {
1387         xfs_warn(mp,
1388     "sunit and swidth options incompatible with the noalign option");
1389         return -EINVAL;
1390     }
1391
1392     if (!IS_ENABLED(CONFIG_XFS_QUOTA) && mp->m_qflags != 0) {
1393         xfs_warn(mp, "quota support not available in this kernel.");
1394         return -EINVAL;
1395     }
1396
1397     if ((mp->m_dalign && !mp->m_swidth) ||
1398         (!mp->m_dalign && mp->m_swidth)) {
1399         xfs_warn(mp, "sunit and swidth must be specified together");
1400         return -EINVAL;
1401     }
1402
1403     if (mp->m_dalign && (mp->m_swidth % mp->m_dalign != 0)) {
1404         xfs_warn(mp,
1405     "stripe width (%d) must be a multiple of the stripe unit (%d)",
1406             mp->m_swidth, mp->m_dalign);
1407         return -EINVAL;
1408     }
1409
1410     if (mp->m_logbufs != -1 &&
1411         mp->m_logbufs != 0 &&
1412         (mp->m_logbufs < XLOG_MIN_ICLOGS ||
1413          mp->m_logbufs > XLOG_MAX_ICLOGS)) {
1414         xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]",
1415             mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
1416         return -EINVAL;
1417     }
1418
1419     if (mp->m_logbsize != -1 &&
1420         mp->m_logbsize !=  0 &&
1421         (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE ||
1422          mp->m_logbsize > XLOG_MAX_RECORD_BSIZE ||
1423          !is_power_of_2(mp->m_logbsize))) {
1424         xfs_warn(mp,
1425             "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
1426             mp->m_logbsize);
1427         return -EINVAL;
1428     }
1429
1430     if (xfs_has_allocsize(mp) &&
1431         (mp->m_allocsize_log > XFS_MAX_IO_LOG ||
1432          mp->m_allocsize_log < XFS_MIN_IO_LOG)) {
1433         xfs_warn(mp, "invalid log iosize: %d [not %d-%d]",
1434             mp->m_allocsize_log, XFS_MIN_IO_LOG, XFS_MAX_IO_LOG);
1435         return -EINVAL;
1436     }
1437
1438     return 0;
1439 }
1440
1441 static int
1442 xfs_fs_fill_super(
1443     struct super_block  *sb,
1444     struct fs_context   *fc)
1445 {
1446     struct xfs_mount    *mp = sb->s_fs_info;
1447     struct inode        *root;
1448     int         flags = 0, error;
1449
1450     mp->m_super = sb;
1451
1452     error = xfs_fs_validate_params(mp);
1453     if (error)
1454         goto out_free_names;
1455
1456     sb_min_blocksize(sb, BBSIZE);
1457     sb->s_xattr = xfs_xattr_handlers;
1458     sb->s_export_op = &xfs_export_operations;
1459 #ifdef CONFIG_XFS_QUOTA
1460     sb->s_qcop = &xfs_quotactl_operations;
1461     sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
1462 #endif
1463     sb->s_op = &xfs_super_operations;
1464
1465     /*
1466      * Delay mount work if the debug hook is set. This is debug
1467      * instrumention to coordinate simulation of xfs mount failures with
1468      * VFS superblock operations
1469      */
1470     if (xfs_globals.mount_delay) {
1471         xfs_notice(mp, "Delaying mount for %d seconds.",
1472             xfs_globals.mount_delay);
1473         msleep(xfs_globals.mount_delay * 1000);
1474     }
1475
1476     if (fc->sb_flags & SB_SILENT)
1477         flags |= XFS_MFSI_QUIET;
1478
1479     error = xfs_open_devices(mp);
1480     if (error)
1481         goto out_free_names;
1482
1483     error = xfs_init_mount_workqueues(mp);
1484     if (error)
1485         goto out_close_devices;
1486
1487     error = xfs_init_percpu_counters(mp);
1488     if (error)
1489         goto out_destroy_workqueues;
1490
1491     error = xfs_inodegc_init_percpu(mp);
1492     if (error)
1493         goto out_destroy_counters;
1494
1495     /*
1496      * All percpu data structures requiring cleanup when a cpu goes offline
1497      * must be allocated before adding this @mp to the cpu-dead handler's
1498      * mount list.
1499      */
1500     xfs_mount_list_add(mp);
1501
1502     /* Allocate stats memory before we do operations that might use it */
1503     mp->m_stats.xs_stats = alloc_percpu(struct xfsstats);
1504     if (!mp->m_stats.xs_stats) {
1505         error = -ENOMEM;
1506         goto out_destroy_inodegc;
1507     }
1508
1509     error = xfs_readsb(mp, flags);
1510     if (error)
1511         goto out_free_stats;
1512
1513     error = xfs_finish_flags(mp);
1514     if (error)
1515         goto out_free_sb;
1516
1517     error = xfs_setup_devices(mp);
1518     if (error)
1519         goto out_free_sb;
1520
1521     /* V4 support is undergoing deprecation. */
1522     if (!xfs_has_crc(mp)) {
1523 #ifdef CONFIG_XFS_SUPPORT_V4
1524         xfs_warn_once(mp,
1525     "Deprecated V4 format (crc=0) will not be supported after September 2030.");
1526 #else
1527         xfs_warn(mp,
1528     "Deprecated V4 format (crc=0) not supported by kernel.");
1529         error = -EINVAL;
1530         goto out_free_sb;
1531 #endif
1532     }
1533
1534     /* Filesystem claims it needs repair, so refuse the mount. */
1535     if (xfs_has_needsrepair(mp)) {
1536         xfs_warn(mp, "Filesystem needs repair.  Please run xfs_repair.");
1537         error = -EFSCORRUPTED;
1538         goto out_free_sb;
1539     }
1540
1541     /*
1542      * Don't touch the filesystem if a user tool thinks it owns the primary
1543      * superblock.  mkfs doesn't clear the flag from secondary supers, so
1544      * we don't check them at all.
1545      */
1546     if (mp->m_sb.sb_inprogress) {
1547         xfs_warn(mp, "Offline file system operation in progress!");
1548         error = -EFSCORRUPTED;
1549         goto out_free_sb;
1550     }
1551
1552     /*
1553      * Until this is fixed only page-sized or smaller data blocks work.
1554      */
1555     if (mp->m_sb.sb_blocksize > PAGE_SIZE) {
1556         xfs_warn(mp,
1557         "File system with blocksize %d bytes. "
1558         "Only pagesize (%ld) or less will currently work.",
1559                 mp->m_sb.sb_blocksize, PAGE_SIZE);
1560         error = -ENOSYS;
1561         goto out_free_sb;
1562     }
1563
1564     /* Ensure this filesystem fits in the page cache limits */
1565     if (xfs_sb_validate_fsb_count(&mp->m_sb, mp->m_sb.sb_dblocks) ||
1566         xfs_sb_validate_fsb_count(&mp->m_sb, mp->m_sb.sb_rblocks)) {
1567         xfs_warn(mp,
1568         "file system too large to be mounted on this system.");
1569         error = -EFBIG;
1570         goto out_free_sb;
1571     }
1572
1573     /*
1574      * XFS block mappings use 54 bits to store the logical block offset.
1575      * This should suffice to handle the maximum file size that the VFS
1576      * supports (currently 2^63 bytes on 64-bit and ULONG_MAX << PAGE_SHIFT
1577      * bytes on 32-bit), but as XFS and VFS have gotten the s_maxbytes
1578      * calculation wrong on 32-bit kernels in the past, we'll add a WARN_ON
1579      * to check this assertion.
1580      *
1581      * Avoid integer overflow by comparing the maximum bmbt offset to the
1582      * maximum pagecache offset in units of fs blocks.
1583      */
1584     if (!xfs_verify_fileoff(mp, XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE))) {
1585         xfs_warn(mp,
1586 "MAX_LFS_FILESIZE block offset (%llu) exceeds extent map maximum (%llu)!",
1587              XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE),
1588              XFS_MAX_FILEOFF);
1589         error = -EINVAL;
1590         goto out_free_sb;
1591     }
1592
1593     error = xfs_filestream_mount(mp);
1594     if (error)
1595         goto out_free_sb;
1596
1597     /*
1598      * we must configure the block size in the superblock before we run the
1599      * full mount process as the mount process can lookup and cache inodes.
1600      */
1601     sb->s_magic = XFS_SUPER_MAGIC;
1602     sb->s_blocksize = mp->m_sb.sb_blocksize;
1603     sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
1604     sb->s_maxbytes = MAX_LFS_FILESIZE;
1605     sb->s_max_links = XFS_MAXLINK;
1606     sb->s_time_gran = 1;
1607     if (xfs_has_bigtime(mp)) {
1608         sb->s_time_min = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MIN);
1609         sb->s_time_max = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MAX);
1610     } else {
1611         sb->s_time_min = XFS_LEGACY_TIME_MIN;
1612         sb->s_time_max = XFS_LEGACY_TIME_MAX;
1613     }
1614     trace_xfs_inode_timestamp_range(mp, sb->s_time_min, sb->s_time_max);
1615     sb->s_iflags |= SB_I_CGROUPWB;
1616
1617     set_posix_acl_flag(sb);
1618
1619     /* version 5 superblocks support inode version counters. */
1620     if (xfs_has_crc(mp))
1621         sb->s_flags |= SB_I_VERSION;
1622
1623     if (xfs_has_dax_always(mp)) {
1624         error = xfs_setup_dax_always(mp);
1625         if (error)
1626             goto out_filestream_unmount;
1627     }
1628
1629     if (xfs_has_discard(mp) && !bdev_max_discard_sectors(sb->s_bdev)) {
1630         xfs_warn(mp,
1631     "mounting with \"discard\" option, but the device does not support discard");
1632         mp->m_features &= ~XFS_FEAT_DISCARD;
1633     }
1634
1635     if (xfs_has_reflink(mp)) {
1636         if (mp->m_sb.sb_rblocks) {
1637             xfs_alert(mp,
1638     "reflink not compatible with realtime device!");
1639             error = -EINVAL;
1640             goto out_filestream_unmount;
1641         }
1642
1643         if (xfs_globals.always_cow) {
1644             xfs_info(mp, "using DEBUG-only always_cow mode.");
1645             mp->m_always_cow = true;
1646         }
1647     }
1648
1649     if (xfs_has_rmapbt(mp) && mp->m_sb.sb_rblocks) {
1650         xfs_alert(mp,
1651     "reverse mapping btree not compatible with realtime device!");
1652         error = -EINVAL;
1653         goto out_filestream_unmount;
1654     }
1655
1656     if (xfs_has_large_extent_counts(mp))
1657         xfs_warn(mp,
1658     "EXPERIMENTAL Large extent counts feature in use. Use at your own risk!");
1659
1660     error = xfs_mountfs(mp);
1661     if (error)
1662         goto out_filestream_unmount;
1663
1664     root = igrab(VFS_I(mp->m_rootip));
1665     if (!root) {
1666         error = -ENOENT;
1667         goto out_unmount;
1668     }
1669     sb->s_root = d_make_root(root);
1670     if (!sb->s_root) {
1671         error = -ENOMEM;
1672         goto out_unmount;
1673     }
1674
1675     return 0;
1676
1677  out_filestream_unmount:
1678     xfs_filestream_unmount(mp);
1679  out_free_sb:
1680     xfs_freesb(mp);
1681  out_free_stats:
1682     free_percpu(mp->m_stats.xs_stats);
1683  out_destroy_inodegc:
1684     xfs_mount_list_del(mp);
1685     xfs_inodegc_free_percpu(mp);
1686  out_destroy_counters:
1687     xfs_destroy_percpu_counters(mp);
1688  out_destroy_workqueues:
1689     xfs_destroy_mount_workqueues(mp);
1690  out_close_devices:
1691     xfs_close_devices(mp);
1692  out_free_names:
1693     sb->s_fs_info = NULL;
1694     xfs_mount_free(mp);
1695     return error;
1696
1697  out_unmount:
1698     xfs_filestream_unmount(mp);
1699     xfs_unmountfs(mp);
1700     goto out_free_sb;
1701 }
1702
1703 static int
1704 xfs_fs_get_tree(
1705     struct fs_context   *fc)
1706 {
1707     return get_tree_bdev(fc, xfs_fs_fill_super);
1708 }
1709
1710 static int
1711 xfs_remount_rw(
1712     struct xfs_mount    *mp)
1713 {
1714     struct xfs_sb       *sbp = &mp->m_sb;
1715     int error;
1716
1717     if (xfs_has_norecovery(mp)) {
1718         xfs_warn(mp,
1719             "ro->rw transition prohibited on norecovery mount");
1720         return -EINVAL;
1721     }
1722
1723     if (xfs_sb_is_v5(sbp) &&
1724         xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) {
1725         xfs_warn(mp,
1726     "ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem",
1727             (sbp->sb_features_ro_compat &
1728                 XFS_SB_FEAT_RO_COMPAT_UNKNOWN));
1729         return -EINVAL;
1730     }
1731
1732     clear_bit(XFS_OPSTATE_READONLY, &mp->m_opstate);
1733
1734     /*
1735      * If this is the first remount to writeable state we might have some
1736      * superblock changes to update.
1737      */
1738     if (mp->m_update_sb) {
1739         error = xfs_sync_sb(mp, false);
1740         if (error) {
1741             xfs_warn(mp, "failed to write sb changes");
1742             return error;
1743         }
1744         mp->m_update_sb = false;
1745     }
1746
1747     /*
1748      * Fill out the reserve pool if it is empty. Use the stashed value if
1749      * it is non-zero, otherwise go with the default.
1750      */
1751     xfs_restore_resvblks(mp);
1752     xfs_log_work_queue(mp);
1753     xfs_blockgc_start(mp);
1754
1755     /* Create the per-AG metadata reservation pool .*/
1756     error = xfs_fs_reserve_ag_blocks(mp);
1757     if (error && error != -ENOSPC)
1758         return error;
1759
1760     /* Re-enable the background inode inactivation worker. */
1761     xfs_inodegc_start(mp);
1762
1763     return 0;
1764 }
1765
1766 static int
1767 xfs_remount_ro(
1768     struct xfs_mount    *mp)
1769 {
1770     struct xfs_icwalk   icw = {
1771         .icw_flags  = XFS_ICWALK_FLAG_SYNC,
1772     };
1773     int         error;
1774
1775     /* Flush all the dirty data to disk. */
1776     error = sync_filesystem(mp->m_super);
1777     if (error)
1778         return error;
1779
1780     /*
1781      * Cancel background eofb scanning so it cannot race with the final
1782      * log force+buftarg wait and deadlock the remount.
1783      */
1784     xfs_blockgc_stop(mp);
1785
1786     /*
1787      * Clear out all remaining COW staging extents and speculative post-EOF
1788      * preallocations so that we don't leave inodes requiring inactivation
1789      * cleanups during reclaim on a read-only mount.  We must process every
1790      * cached inode, so this requires a synchronous cache scan.
1791      */
1792     error = xfs_blockgc_free_space(mp, &icw);
1793     if (error) {
1794         xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1795         return error;
1796     }
1797
1798     /*
1799      * Stop the inodegc background worker.  xfs_fs_reconfigure already
1800      * flushed all pending inodegc work when it sync'd the filesystem.
1801      * The VFS holds s_umount, so we know that inodes cannot enter
1802      * xfs_fs_destroy_inode during a remount operation.  In readonly mode
1803      * we send inodes straight to reclaim, so no inodes will be queued.
1804      */
1805     xfs_inodegc_stop(mp);
1806
1807     /* Free the per-AG metadata reservation pool. */
1808     error = xfs_fs_unreserve_ag_blocks(mp);
1809     if (error) {
1810         xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1811         return error;
1812     }
1813
1814     /*
1815      * Before we sync the metadata, we need to free up the reserve block
1816      * pool so that the used block count in the superblock on disk is
1817      * correct at the end of the remount. Stash the current* reserve pool
1818      * size so that if we get remounted rw, we can return it to the same
1819      * size.
1820      */
1821     xfs_save_resvblks(mp);
1822
1823     xfs_log_clean(mp);
1824     set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate);
1825
1826     return 0;
1827 }
1828
1829 /*
1830  * Logically we would return an error here to prevent users from believing
1831  * they might have changed mount options using remount which can't be changed.
1832  *
1833  * But unfortunately mount(8) adds all options from mtab and fstab to the mount
1834  * arguments in some cases so we can't blindly reject options, but have to
1835  * check for each specified option if it actually differs from the currently
1836  * set option and only reject it if that's the case.
1837  *
1838  * Until that is implemented we return success for every remount request, and
1839  * silently ignore all options that we can't actually change.
1840  */
1841 static int
1842 xfs_fs_reconfigure(
1843     struct fs_context *fc)
1844 {
1845     struct xfs_mount    *mp = XFS_M(fc->root->d_sb);
1846     struct xfs_mount        *new_mp = fc->s_fs_info;
1847     int         flags = fc->sb_flags;
1848     int         error;
1849
1850     /* version 5 superblocks always support version counters. */
1851     if (xfs_has_crc(mp))
1852         fc->sb_flags |= SB_I_VERSION;
1853
1854     error = xfs_fs_validate_params(new_mp);
1855     if (error)
1856         return error;
1857
1858     /* inode32 -> inode64 */
1859     if (xfs_has_small_inums(mp) && !xfs_has_small_inums(new_mp)) {
1860         mp->m_features &= ~XFS_FEAT_SMALL_INUMS;
1861         mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount);
1862     }
1863
1864     /* inode64 -> inode32 */
1865     if (!xfs_has_small_inums(mp) && xfs_has_small_inums(new_mp)) {
1866         mp->m_features |= XFS_FEAT_SMALL_INUMS;
1867         mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount);
1868     }
1869
1870     /* ro -> rw */
1871     if (xfs_is_readonly(mp) && !(flags & SB_RDONLY)) {
1872         error = xfs_remount_rw(mp);
1873         if (error)
1874             return error;
1875     }
1876
1877     /* rw -> ro */
1878     if (!xfs_is_readonly(mp) && (flags & SB_RDONLY)) {
1879         error = xfs_remount_ro(mp);
1880         if (error)
1881             return error;
1882     }
1883
1884     return 0;
1885 }
1886
1887 static void xfs_fs_free(
1888     struct fs_context   *fc)
1889 {
1890     struct xfs_mount    *mp = fc->s_fs_info;
1891
1892     /*
1893      * mp is stored in the fs_context when it is initialized.
1894      * mp is transferred to the superblock on a successful mount,
1895      * but if an error occurs before the transfer we have to free
1896      * it here.
1897      */
1898     if (mp)
1899         xfs_mount_free(mp);
1900 }
1901
1902 static const struct fs_context_operations xfs_context_ops = {
1903     .parse_param = xfs_fs_parse_param,
1904     .get_tree    = xfs_fs_get_tree,
1905     .reconfigure = xfs_fs_reconfigure,
1906     .free        = xfs_fs_free,
1907 };
1908
1909 static int xfs_init_fs_context(
1910     struct fs_context   *fc)
1911 {
1912     struct xfs_mount    *mp;
1913
1914     mp = kmem_alloc(sizeof(struct xfs_mount), KM_ZERO);
1915     if (!mp)
1916         return -ENOMEM;
1917
1918     spin_lock_init(&mp->m_sb_lock);
1919     spin_lock_init(&mp->m_agirotor_lock);
1920     INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC);
1921     spin_lock_init(&mp->m_perag_lock);
1922     mutex_init(&mp->m_growlock);
1923     INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker);
1924     INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
1925     mp->m_kobj.kobject.kset = xfs_kset;
1926     /*
1927      * We don't create the finobt per-ag space reservation until after log
1928      * recovery, so we must set this to true so that an ifree transaction
1929      * started during log recovery will not depend on space reservations
1930      * for finobt expansion.
1931      */
1932     mp->m_finobt_nores = true;
1933
1934     /*
1935      * These can be overridden by the mount option parsing.
1936      */
1937     mp->m_logbufs = -1;
1938     mp->m_logbsize = -1;
1939     mp->m_allocsize_log = 16; /* 64k */
1940
1941     /*
1942      * Copy binary VFS mount flags we are interested in.
1943      */
1944     if (fc->sb_flags & SB_RDONLY)
1945         set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate);
1946     if (fc->sb_flags & SB_DIRSYNC)
1947         mp->m_features |= XFS_FEAT_DIRSYNC;
1948     if (fc->sb_flags & SB_SYNCHRONOUS)
1949         mp->m_features |= XFS_FEAT_WSYNC;
1950
1951     fc->s_fs_info = mp;
1952     fc->ops = &xfs_context_ops;
1953
1954     return 0;
1955 }
1956
1957 static struct file_system_type xfs_fs_type = {
1958     .owner          = THIS_MODULE,
1959     .name           = "xfs",
1960     .init_fs_context    = xfs_init_fs_context,
1961     .parameters     = xfs_fs_parameters,
1962     .kill_sb        = kill_block_super,
1963     .fs_flags       = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
1964 };
1965 MODULE_ALIAS_FS("xfs");
1966
1967 STATIC int __init
1968 xfs_init_caches(void)
1969 {
1970     int     error;
1971
1972     xfs_buf_cache = kmem_cache_create("xfs_buf", sizeof(struct xfs_buf), 0,
1973                      SLAB_HWCACHE_ALIGN |
1974                      SLAB_RECLAIM_ACCOUNT |
1975                      SLAB_MEM_SPREAD,
1976                      NULL);
1977     if (!xfs_buf_cache)
1978         goto out;
1979
1980     xfs_log_ticket_cache = kmem_cache_create("xfs_log_ticket",
1981                         sizeof(struct xlog_ticket),
1982                         0, 0, NULL);
1983     if (!xfs_log_ticket_cache)
1984         goto out_destroy_buf_cache;
1985
1986     error = xfs_btree_init_cur_caches();
1987     if (error)
1988         goto out_destroy_log_ticket_cache;
1989
1990     error = xfs_defer_init_item_caches();
1991     if (error)
1992         goto out_destroy_btree_cur_cache;
1993
1994     xfs_da_state_cache = kmem_cache_create("xfs_da_state",
1995                           sizeof(struct xfs_da_state),
1996                           0, 0, NULL);
1997     if (!xfs_da_state_cache)
1998         goto out_destroy_defer_item_cache;
1999
2000     xfs_ifork_cache = kmem_cache_create("xfs_ifork",
2001                        sizeof(struct xfs_ifork),
2002                        0, 0, NULL);
2003     if (!xfs_ifork_cache)
2004         goto out_destroy_da_state_cache;
2005
2006     xfs_trans_cache = kmem_cache_create("xfs_trans",
2007                        sizeof(struct xfs_trans),
2008                        0, 0, NULL);
2009     if (!xfs_trans_cache)
2010         goto out_destroy_ifork_cache;
2011
2012
2013     /*
2014      * The size of the cache-allocated buf log item is the maximum
2015      * size possible under XFS.  This wastes a little bit of memory,
2016      * but it is much faster.
2017      */
2018     xfs_buf_item_cache = kmem_cache_create("xfs_buf_item",
2019                           sizeof(struct xfs_buf_log_item),
2020                           0, 0, NULL);
2021     if (!xfs_buf_item_cache)
2022         goto out_destroy_trans_cache;
2023
2024     xfs_efd_cache = kmem_cache_create("xfs_efd_item",
2025                     (sizeof(struct xfs_efd_log_item) +
2026                     (XFS_EFD_MAX_FAST_EXTENTS - 1) *
2027                     sizeof(struct xfs_extent)),
2028                     0, 0, NULL);
2029     if (!xfs_efd_cache)
2030         goto out_destroy_buf_item_cache;
2031
2032     xfs_efi_cache = kmem_cache_create("xfs_efi_item",
2033                      (sizeof(struct xfs_efi_log_item) +
2034                      (XFS_EFI_MAX_FAST_EXTENTS - 1) *
2035                      sizeof(struct xfs_extent)),
2036                      0, 0, NULL);
2037     if (!xfs_efi_cache)
2038         goto out_destroy_efd_cache;
2039
2040     xfs_inode_cache = kmem_cache_create("xfs_inode",
2041                        sizeof(struct xfs_inode), 0,
2042                        (SLAB_HWCACHE_ALIGN |
2043                         SLAB_RECLAIM_ACCOUNT |
2044                         SLAB_MEM_SPREAD | SLAB_ACCOUNT),
2045                        xfs_fs_inode_init_once);
2046     if (!xfs_inode_cache)
2047         goto out_destroy_efi_cache;
2048
2049     xfs_ili_cache = kmem_cache_create("xfs_ili",
2050                      sizeof(struct xfs_inode_log_item), 0,
2051                      SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
2052                      NULL);
2053     if (!xfs_ili_cache)
2054         goto out_destroy_inode_cache;
2055
2056     xfs_icreate_cache = kmem_cache_create("xfs_icr",
2057                          sizeof(struct xfs_icreate_item),
2058                          0, 0, NULL);
2059     if (!xfs_icreate_cache)
2060         goto out_destroy_ili_cache;
2061
2062     xfs_rud_cache = kmem_cache_create("xfs_rud_item",
2063                      sizeof(struct xfs_rud_log_item),
2064                      0, 0, NULL);
2065     if (!xfs_rud_cache)
2066         goto out_destroy_icreate_cache;
2067
2068     xfs_rui_cache = kmem_cache_create("xfs_rui_item",
2069             xfs_rui_log_item_sizeof(XFS_RUI_MAX_FAST_EXTENTS),
2070             0, 0, NULL);
2071     if (!xfs_rui_cache)
2072         goto out_destroy_rud_cache;
2073
2074     xfs_cud_cache = kmem_cache_create("xfs_cud_item",
2075                      sizeof(struct xfs_cud_log_item),
2076                      0, 0, NULL);
2077     if (!xfs_cud_cache)
2078         goto out_destroy_rui_cache;
2079
2080     xfs_cui_cache = kmem_cache_create("xfs_cui_item",
2081             xfs_cui_log_item_sizeof(XFS_CUI_MAX_FAST_EXTENTS),
2082             0, 0, NULL);
2083     if (!xfs_cui_cache)
2084         goto out_destroy_cud_cache;
2085
2086     xfs_bud_cache = kmem_cache_create("xfs_bud_item",
2087                      sizeof(struct xfs_bud_log_item),
2088                      0, 0, NULL);
2089     if (!xfs_bud_cache)
2090         goto out_destroy_cui_cache;
2091
2092     xfs_bui_cache = kmem_cache_create("xfs_bui_item",
2093             xfs_bui_log_item_sizeof(XFS_BUI_MAX_FAST_EXTENTS),
2094             0, 0, NULL);
2095     if (!xfs_bui_cache)
2096         goto out_destroy_bud_cache;
2097
2098     xfs_attrd_cache = kmem_cache_create("xfs_attrd_item",
2099                         sizeof(struct xfs_attrd_log_item),
2100                         0, 0, NULL);
2101     if (!xfs_attrd_cache)
2102         goto out_destroy_bui_cache;
2103
2104     xfs_attri_cache = kmem_cache_create("xfs_attri_item",
2105                         sizeof(struct xfs_attri_log_item),
2106                         0, 0, NULL);
2107     if (!xfs_attri_cache)
2108         goto out_destroy_attrd_cache;
2109
2110     xfs_iunlink_cache = kmem_cache_create("xfs_iul_item",
2111                          sizeof(struct xfs_iunlink_item),
2112                          0, 0, NULL);
2113     if (!xfs_iunlink_cache)
2114         goto out_destroy_attri_cache;
2115
2116     return 0;
2117
2118  out_destroy_attri_cache:
2119     kmem_cache_destroy(xfs_attri_cache);
2120  out_destroy_attrd_cache:
2121     kmem_cache_destroy(xfs_attrd_cache);
2122  out_destroy_bui_cache:
2123     kmem_cache_destroy(xfs_bui_cache);
2124  out_destroy_bud_cache:
2125     kmem_cache_destroy(xfs_bud_cache);
2126  out_destroy_cui_cache:
2127     kmem_cache_destroy(xfs_cui_cache);
2128  out_destroy_cud_cache:
2129     kmem_cache_destroy(xfs_cud_cache);
2130  out_destroy_rui_cache:
2131     kmem_cache_destroy(xfs_rui_cache);
2132  out_destroy_rud_cache:
2133     kmem_cache_destroy(xfs_rud_cache);
2134  out_destroy_icreate_cache:
2135     kmem_cache_destroy(xfs_icreate_cache);
2136  out_destroy_ili_cache:
2137     kmem_cache_destroy(xfs_ili_cache);
2138  out_destroy_inode_cache:
2139     kmem_cache_destroy(xfs_inode_cache);
2140  out_destroy_efi_cache:
2141     kmem_cache_destroy(xfs_efi_cache);
2142  out_destroy_efd_cache:
2143     kmem_cache_destroy(xfs_efd_cache);
2144  out_destroy_buf_item_cache:
2145     kmem_cache_destroy(xfs_buf_item_cache);
2146  out_destroy_trans_cache:
2147     kmem_cache_destroy(xfs_trans_cache);
2148  out_destroy_ifork_cache:
2149     kmem_cache_destroy(xfs_ifork_cache);
2150  out_destroy_da_state_cache:
2151     kmem_cache_destroy(xfs_da_state_cache);
2152  out_destroy_defer_item_cache:
2153     xfs_defer_destroy_item_caches();
2154  out_destroy_btree_cur_cache:
2155     xfs_btree_destroy_cur_caches();
2156  out_destroy_log_ticket_cache:
2157     kmem_cache_destroy(xfs_log_ticket_cache);
2158  out_destroy_buf_cache:
2159     kmem_cache_destroy(xfs_buf_cache);
2160  out:
2161     return -ENOMEM;
2162 }
2163
2164 STATIC void
2165 xfs_destroy_caches(void)
2166 {
2167     /*
2168      * Make sure all delayed rcu free are flushed before we
2169      * destroy caches.
2170      */
2171     rcu_barrier();
2172     kmem_cache_destroy(xfs_iunlink_cache);
2173     kmem_cache_destroy(xfs_attri_cache);
2174     kmem_cache_destroy(xfs_attrd_cache);
2175     kmem_cache_destroy(xfs_bui_cache);
2176     kmem_cache_destroy(xfs_bud_cache);
2177     kmem_cache_destroy(xfs_cui_cache);
2178     kmem_cache_destroy(xfs_cud_cache);
2179     kmem_cache_destroy(xfs_rui_cache);
2180     kmem_cache_destroy(xfs_rud_cache);
2181     kmem_cache_destroy(xfs_icreate_cache);
2182     kmem_cache_destroy(xfs_ili_cache);
2183     kmem_cache_destroy(xfs_inode_cache);
2184     kmem_cache_destroy(xfs_efi_cache);
2185     kmem_cache_destroy(xfs_efd_cache);
2186     kmem_cache_destroy(xfs_buf_item_cache);
2187     kmem_cache_destroy(xfs_trans_cache);
2188     kmem_cache_destroy(xfs_ifork_cache);
2189     kmem_cache_destroy(xfs_da_state_cache);
2190     xfs_defer_destroy_item_caches();
2191     xfs_btree_destroy_cur_caches();
2192     kmem_cache_destroy(xfs_log_ticket_cache);
2193     kmem_cache_destroy(xfs_buf_cache);
2194 }
2195
2196 STATIC int __init
2197 xfs_init_workqueues(void)
2198 {
2199     /*
2200      * The allocation workqueue can be used in memory reclaim situations
2201      * (writepage path), and parallelism is only limited by the number of
2202      * AGs in all the filesystems mounted. Hence use the default large
2203      * max_active value for this workqueue.
2204      */
2205     xfs_alloc_wq = alloc_workqueue("xfsalloc",
2206             XFS_WQFLAGS(WQ_MEM_RECLAIM | WQ_FREEZABLE), 0);
2207     if (!xfs_alloc_wq)
2208         return -ENOMEM;
2209
2210     xfs_discard_wq = alloc_workqueue("xfsdiscard", XFS_WQFLAGS(WQ_UNBOUND),
2211             0);
2212     if (!xfs_discard_wq)
2213         goto out_free_alloc_wq;
2214
2215     return 0;
2216 out_free_alloc_wq:
2217     destroy_workqueue(xfs_alloc_wq);
2218     return -ENOMEM;
2219 }
2220
2221 STATIC void
2222 xfs_destroy_workqueues(void)
2223 {
2224     destroy_workqueue(xfs_discard_wq);
2225     destroy_workqueue(xfs_alloc_wq);
2226 }
2227
2228 #ifdef CONFIG_HOTPLUG_CPU
2229 static int
2230 xfs_cpu_dead(
2231     unsigned int        cpu)
2232 {
2233     struct xfs_mount    *mp, *n;
2234
2235     spin_lock(&xfs_mount_list_lock);
2236     list_for_each_entry_safe(mp, n, &xfs_mount_list, m_mount_list) {
2237         spin_unlock(&xfs_mount_list_lock);
2238         xfs_inodegc_cpu_dead(mp, cpu);
2239         xlog_cil_pcp_dead(mp->m_log, cpu);
2240         spin_lock(&xfs_mount_list_lock);
2241     }
2242     spin_unlock(&xfs_mount_list_lock);
2243     return 0;
2244 }
2245
2246 static int __init
2247 xfs_cpu_hotplug_init(void)
2248 {
2249     int error;
2250
2251     error = cpuhp_setup_state_nocalls(CPUHP_XFS_DEAD, "xfs:dead", NULL,
2252             xfs_cpu_dead);
2253     if (error < 0)
2254         xfs_alert(NULL,
2255 "Failed to initialise CPU hotplug, error %d. XFS is non-functional.",
2256             error);
2257     return error;
2258 }
2259
2260 static void
2261 xfs_cpu_hotplug_destroy(void)
2262 {
2263     cpuhp_remove_state_nocalls(CPUHP_XFS_DEAD);
2264 }
2265
2266 #else /* !CONFIG_HOTPLUG_CPU */
2267 static inline int xfs_cpu_hotplug_init(void) { return 0; }
2268 static inline void xfs_cpu_hotplug_destroy(void) {}
2269 #endif
2270
2271 STATIC int __init
2272 init_xfs_fs(void)
2273 {
2274     int         error;
2275
2276     xfs_check_ondisk_structs();
2277
2278     printk(KERN_INFO XFS_VERSION_STRING " with "
2279              XFS_BUILD_OPTIONS " enabled\n");
2280
2281     xfs_dir_startup();
2282
2283     error = xfs_cpu_hotplug_init();
2284     if (error)
2285         goto out;
2286
2287     error = xfs_init_caches();
2288     if (error)
2289         goto out_destroy_hp;
2290
2291     error = xfs_init_workqueues();
2292     if (error)
2293         goto out_destroy_caches;
2294
2295     error = xfs_mru_cache_init();
2296     if (error)
2297         goto out_destroy_wq;
2298
2299     error = xfs_init_procfs();
2300     if (error)
2301         goto out_mru_cache_uninit;
2302
2303     error = xfs_sysctl_register();
2304     if (error)
2305         goto out_cleanup_procfs;
2306
2307     xfs_kset = kset_create_and_add("xfs", NULL, fs_kobj);
2308     if (!xfs_kset) {
2309         error = -ENOMEM;
2310         goto out_sysctl_unregister;
2311     }
2312
2313     xfsstats.xs_kobj.kobject.kset = xfs_kset;
2314
2315     xfsstats.xs_stats = alloc_percpu(struct xfsstats);
2316     if (!xfsstats.xs_stats) {
2317         error = -ENOMEM;
2318         goto out_kset_unregister;
2319     }
2320
2321     error = xfs_sysfs_init(&xfsstats.xs_kobj, &xfs_stats_ktype, NULL,
2322                    "stats");
2323     if (error)
2324         goto out_free_stats;
2325
2326 #ifdef DEBUG
2327     xfs_dbg_kobj.kobject.kset = xfs_kset;
2328     error = xfs_sysfs_init(&xfs_dbg_kobj, &xfs_dbg_ktype, NULL, "debug");
2329     if (error)
2330         goto out_remove_stats_kobj;
2331 #endif
2332
2333     error = xfs_qm_init();
2334     if (error)
2335         goto out_remove_dbg_kobj;
2336
2337     error = register_filesystem(&xfs_fs_type);
2338     if (error)
2339         goto out_qm_exit;
2340     return 0;
2341
2342  out_qm_exit:
2343     xfs_qm_exit();
2344  out_remove_dbg_kobj:
2345 #ifdef DEBUG
2346     xfs_sysfs_del(&xfs_dbg_kobj);
2347  out_remove_stats_kobj:
2348 #endif
2349     xfs_sysfs_del(&xfsstats.xs_kobj);
2350  out_free_stats:
2351     free_percpu(xfsstats.xs_stats);
2352  out_kset_unregister:
2353     kset_unregister(xfs_kset);
2354  out_sysctl_unregister:
2355     xfs_sysctl_unregister();
2356  out_cleanup_procfs:
2357     xfs_cleanup_procfs();
2358  out_mru_cache_uninit:
2359     xfs_mru_cache_uninit();
2360  out_destroy_wq:
2361     xfs_destroy_workqueues();
2362  out_destroy_caches:
2363     xfs_destroy_caches();
2364  out_destroy_hp:
2365     xfs_cpu_hotplug_destroy();
2366  out:
2367     return error;
2368 }
2369
2370 STATIC void __exit
2371 exit_xfs_fs(void)
2372 {
2373     xfs_qm_exit();
2374     unregister_filesystem(&xfs_fs_type);
2375 #ifdef DEBUG
2376     xfs_sysfs_del(&xfs_dbg_kobj);
2377 #endif
2378     xfs_sysfs_del(&xfsstats.xs_kobj);
2379     free_percpu(xfsstats.xs_stats);
2380     kset_unregister(xfs_kset);
2381     xfs_sysctl_unregister();
2382     xfs_cleanup_procfs();
2383     xfs_mru_cache_uninit();
2384     xfs_destroy_workqueues();
2385     xfs_destroy_caches();
2386     xfs_uuid_table_free();
2387     xfs_cpu_hotplug_destroy();
2388 }
2389
2390 module_init(init_xfs_fs);
2391 module_exit(exit_xfs_fs);
2392
2393 MODULE_AUTHOR("Silicon Graphics, Inc.");
2394 MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled");
2395 MODULE_LICENSE("GPL");