Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 
0003 #include <linux/ceph/ceph_debug.h>
0004 
0005 #include <linux/backing-dev.h>
0006 #include <linux/ctype.h>
0007 #include <linux/fs.h>
0008 #include <linux/inet.h>
0009 #include <linux/in6.h>
0010 #include <linux/module.h>
0011 #include <linux/mount.h>
0012 #include <linux/fs_context.h>
0013 #include <linux/fs_parser.h>
0014 #include <linux/sched.h>
0015 #include <linux/seq_file.h>
0016 #include <linux/slab.h>
0017 #include <linux/statfs.h>
0018 #include <linux/string.h>
0019 
0020 #include "super.h"
0021 #include "mds_client.h"
0022 #include "cache.h"
0023 
0024 #include <linux/ceph/ceph_features.h>
0025 #include <linux/ceph/decode.h>
0026 #include <linux/ceph/mon_client.h>
0027 #include <linux/ceph/auth.h>
0028 #include <linux/ceph/debugfs.h>
0029 
0030 #include <uapi/linux/magic.h>
0031 
0032 static DEFINE_SPINLOCK(ceph_fsc_lock);
0033 static LIST_HEAD(ceph_fsc_list);
0034 
0035 /*
0036  * Ceph superblock operations
0037  *
0038  * Handle the basics of mounting, unmounting.
0039  */
0040 
0041 /*
0042  * super ops
0043  */
0044 static void ceph_put_super(struct super_block *s)
0045 {
0046     struct ceph_fs_client *fsc = ceph_sb_to_client(s);
0047 
0048     dout("put_super\n");
0049     ceph_mdsc_close_sessions(fsc->mdsc);
0050 }
0051 
0052 static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
0053 {
0054     struct ceph_fs_client *fsc = ceph_inode_to_client(d_inode(dentry));
0055     struct ceph_mon_client *monc = &fsc->client->monc;
0056     struct ceph_statfs st;
0057     int i, err;
0058     u64 data_pool;
0059 
0060     if (fsc->mdsc->mdsmap->m_num_data_pg_pools == 1) {
0061         data_pool = fsc->mdsc->mdsmap->m_data_pg_pools[0];
0062     } else {
0063         data_pool = CEPH_NOPOOL;
0064     }
0065 
0066     dout("statfs\n");
0067     err = ceph_monc_do_statfs(monc, data_pool, &st);
0068     if (err < 0)
0069         return err;
0070 
0071     /* fill in kstatfs */
0072     buf->f_type = CEPH_SUPER_MAGIC;  /* ?? */
0073 
0074     /*
0075      * Express utilization in terms of large blocks to avoid
0076      * overflow on 32-bit machines.
0077      */
0078     buf->f_frsize = 1 << CEPH_BLOCK_SHIFT;
0079 
0080     /*
0081      * By default use root quota for stats; fallback to overall filesystem
0082      * usage if using 'noquotadf' mount option or if the root dir doesn't
0083      * have max_bytes quota set.
0084      */
0085     if (ceph_test_mount_opt(fsc, NOQUOTADF) ||
0086         !ceph_quota_update_statfs(fsc, buf)) {
0087         buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10);
0088         buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
0089         buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
0090     }
0091 
0092     /*
0093      * NOTE: for the time being, we make bsize == frsize to humor
0094      * not-yet-ancient versions of glibc that are broken.
0095      * Someday, we will probably want to report a real block
0096      * size...  whatever that may mean for a network file system!
0097      */
0098     buf->f_bsize = buf->f_frsize;
0099 
0100     buf->f_files = le64_to_cpu(st.num_objects);
0101     buf->f_ffree = -1;
0102     buf->f_namelen = NAME_MAX;
0103 
0104     /* Must convert the fsid, for consistent values across arches */
0105     buf->f_fsid.val[0] = 0;
0106     mutex_lock(&monc->mutex);
0107     for (i = 0 ; i < sizeof(monc->monmap->fsid) / sizeof(__le32) ; ++i)
0108         buf->f_fsid.val[0] ^= le32_to_cpu(((__le32 *)&monc->monmap->fsid)[i]);
0109     mutex_unlock(&monc->mutex);
0110 
0111     /* fold the fs_cluster_id into the upper bits */
0112     buf->f_fsid.val[1] = monc->fs_cluster_id;
0113 
0114     return 0;
0115 }
0116 
0117 static int ceph_sync_fs(struct super_block *sb, int wait)
0118 {
0119     struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
0120 
0121     if (!wait) {
0122         dout("sync_fs (non-blocking)\n");
0123         ceph_flush_dirty_caps(fsc->mdsc);
0124         dout("sync_fs (non-blocking) done\n");
0125         return 0;
0126     }
0127 
0128     dout("sync_fs (blocking)\n");
0129     ceph_osdc_sync(&fsc->client->osdc);
0130     ceph_mdsc_sync(fsc->mdsc);
0131     dout("sync_fs (blocking) done\n");
0132     return 0;
0133 }
0134 
0135 /*
0136  * mount options
0137  */
0138 enum {
0139     Opt_wsize,
0140     Opt_rsize,
0141     Opt_rasize,
0142     Opt_caps_wanted_delay_min,
0143     Opt_caps_wanted_delay_max,
0144     Opt_caps_max,
0145     Opt_readdir_max_entries,
0146     Opt_readdir_max_bytes,
0147     Opt_congestion_kb,
0148     /* int args above */
0149     Opt_snapdirname,
0150     Opt_mds_namespace,
0151     Opt_recover_session,
0152     Opt_source,
0153     Opt_mon_addr,
0154     /* string args above */
0155     Opt_dirstat,
0156     Opt_rbytes,
0157     Opt_asyncreaddir,
0158     Opt_dcache,
0159     Opt_ino32,
0160     Opt_fscache,
0161     Opt_poolperm,
0162     Opt_require_active_mds,
0163     Opt_acl,
0164     Opt_quotadf,
0165     Opt_copyfrom,
0166     Opt_wsync,
0167     Opt_pagecache,
0168 };
0169 
0170 enum ceph_recover_session_mode {
0171     ceph_recover_session_no,
0172     ceph_recover_session_clean
0173 };
0174 
0175 static const struct constant_table ceph_param_recover[] = {
0176     { "no",     ceph_recover_session_no },
0177     { "clean",  ceph_recover_session_clean },
0178     {}
0179 };
0180 
0181 static const struct fs_parameter_spec ceph_mount_parameters[] = {
0182     fsparam_flag_no ("acl",             Opt_acl),
0183     fsparam_flag_no ("asyncreaddir",        Opt_asyncreaddir),
0184     fsparam_s32 ("caps_max",            Opt_caps_max),
0185     fsparam_u32 ("caps_wanted_delay_max",   Opt_caps_wanted_delay_max),
0186     fsparam_u32 ("caps_wanted_delay_min",   Opt_caps_wanted_delay_min),
0187     fsparam_u32 ("write_congestion_kb",     Opt_congestion_kb),
0188     fsparam_flag_no ("copyfrom",            Opt_copyfrom),
0189     fsparam_flag_no ("dcache",          Opt_dcache),
0190     fsparam_flag_no ("dirstat",         Opt_dirstat),
0191     fsparam_flag_no ("fsc",             Opt_fscache), // fsc|nofsc
0192     fsparam_string  ("fsc",             Opt_fscache), // fsc=...
0193     fsparam_flag_no ("ino32",           Opt_ino32),
0194     fsparam_string  ("mds_namespace",       Opt_mds_namespace),
0195     fsparam_flag_no ("poolperm",            Opt_poolperm),
0196     fsparam_flag_no ("quotadf",         Opt_quotadf),
0197     fsparam_u32 ("rasize",          Opt_rasize),
0198     fsparam_flag_no ("rbytes",          Opt_rbytes),
0199     fsparam_u32 ("readdir_max_bytes",       Opt_readdir_max_bytes),
0200     fsparam_u32 ("readdir_max_entries",     Opt_readdir_max_entries),
0201     fsparam_enum    ("recover_session",     Opt_recover_session, ceph_param_recover),
0202     fsparam_flag_no ("require_active_mds",      Opt_require_active_mds),
0203     fsparam_u32 ("rsize",           Opt_rsize),
0204     fsparam_string  ("snapdirname",         Opt_snapdirname),
0205     fsparam_string  ("source",          Opt_source),
0206     fsparam_string  ("mon_addr",            Opt_mon_addr),
0207     fsparam_u32 ("wsize",           Opt_wsize),
0208     fsparam_flag_no ("wsync",           Opt_wsync),
0209     fsparam_flag_no ("pagecache",           Opt_pagecache),
0210     {}
0211 };
0212 
0213 struct ceph_parse_opts_ctx {
0214     struct ceph_options     *copts;
0215     struct ceph_mount_options   *opts;
0216 };
0217 
0218 /*
0219  * Remove adjacent slashes and then the trailing slash, unless it is
0220  * the only remaining character.
0221  *
0222  * E.g. "//dir1////dir2///" --> "/dir1/dir2", "///" --> "/".
0223  */
0224 static void canonicalize_path(char *path)
0225 {
0226     int i, j = 0;
0227 
0228     for (i = 0; path[i] != '\0'; i++) {
0229         if (path[i] != '/' || j < 1 || path[j - 1] != '/')
0230             path[j++] = path[i];
0231     }
0232 
0233     if (j > 1 && path[j - 1] == '/')
0234         j--;
0235     path[j] = '\0';
0236 }
0237 
0238 /*
0239  * Check if the mds namespace in ceph_mount_options matches
0240  * the passed in namespace string. First time match (when
0241  * ->mds_namespace is NULL) is treated specially, since
0242  * ->mds_namespace needs to be initialized by the caller.
0243  */
0244 static int namespace_equals(struct ceph_mount_options *fsopt,
0245                 const char *namespace, size_t len)
0246 {
0247     return !(fsopt->mds_namespace &&
0248          (strlen(fsopt->mds_namespace) != len ||
0249           strncmp(fsopt->mds_namespace, namespace, len)));
0250 }
0251 
0252 static int ceph_parse_old_source(const char *dev_name, const char *dev_name_end,
0253                  struct fs_context *fc)
0254 {
0255     int r;
0256     struct ceph_parse_opts_ctx *pctx = fc->fs_private;
0257     struct ceph_mount_options *fsopt = pctx->opts;
0258 
0259     if (*dev_name_end != ':')
0260         return invalfc(fc, "separator ':' missing in source");
0261 
0262     r = ceph_parse_mon_ips(dev_name, dev_name_end - dev_name,
0263                    pctx->copts, fc->log.log, ',');
0264     if (r)
0265         return r;
0266 
0267     fsopt->new_dev_syntax = false;
0268     return 0;
0269 }
0270 
0271 static int ceph_parse_new_source(const char *dev_name, const char *dev_name_end,
0272                  struct fs_context *fc)
0273 {
0274     size_t len;
0275     struct ceph_fsid fsid;
0276     struct ceph_parse_opts_ctx *pctx = fc->fs_private;
0277     struct ceph_mount_options *fsopt = pctx->opts;
0278     char *fsid_start, *fs_name_start;
0279 
0280     if (*dev_name_end != '=') {
0281         dout("separator '=' missing in source");
0282         return -EINVAL;
0283     }
0284 
0285     fsid_start = strchr(dev_name, '@');
0286     if (!fsid_start)
0287         return invalfc(fc, "missing cluster fsid");
0288     ++fsid_start; /* start of cluster fsid */
0289 
0290     fs_name_start = strchr(fsid_start, '.');
0291     if (!fs_name_start)
0292         return invalfc(fc, "missing file system name");
0293 
0294     if (ceph_parse_fsid(fsid_start, &fsid))
0295         return invalfc(fc, "Invalid FSID");
0296 
0297     ++fs_name_start; /* start of file system name */
0298     len = dev_name_end - fs_name_start;
0299 
0300     if (!namespace_equals(fsopt, fs_name_start, len))
0301         return invalfc(fc, "Mismatching mds_namespace");
0302     kfree(fsopt->mds_namespace);
0303     fsopt->mds_namespace = kstrndup(fs_name_start, len, GFP_KERNEL);
0304     if (!fsopt->mds_namespace)
0305         return -ENOMEM;
0306     dout("file system (mds namespace) '%s'\n", fsopt->mds_namespace);
0307 
0308     fsopt->new_dev_syntax = true;
0309     return 0;
0310 }
0311 
0312 /*
0313  * Parse the source parameter for new device format. Distinguish the device
0314  * spec from the path. Try parsing new device format and fallback to old
0315  * format if needed.
0316  *
0317  * New device syntax will looks like:
0318  *     <device_spec>=/<path>
0319  * where
0320  *     <device_spec> is name@fsid.fsname
0321  *     <path> is optional, but if present must begin with '/'
0322  * (monitor addresses are passed via mount option)
0323  *
0324  * Old device syntax is:
0325  *     <server_spec>[,<server_spec>...]:[<path>]
0326  * where
0327  *     <server_spec> is <ip>[:<port>]
0328  *     <path> is optional, but if present must begin with '/'
0329  */
0330 static int ceph_parse_source(struct fs_parameter *param, struct fs_context *fc)
0331 {
0332     struct ceph_parse_opts_ctx *pctx = fc->fs_private;
0333     struct ceph_mount_options *fsopt = pctx->opts;
0334     char *dev_name = param->string, *dev_name_end;
0335     int ret;
0336 
0337     dout("%s '%s'\n", __func__, dev_name);
0338     if (!dev_name || !*dev_name)
0339         return invalfc(fc, "Empty source");
0340 
0341     dev_name_end = strchr(dev_name, '/');
0342     if (dev_name_end) {
0343         /*
0344          * The server_path will include the whole chars from userland
0345          * including the leading '/'.
0346          */
0347         kfree(fsopt->server_path);
0348         fsopt->server_path = kstrdup(dev_name_end, GFP_KERNEL);
0349         if (!fsopt->server_path)
0350             return -ENOMEM;
0351 
0352         canonicalize_path(fsopt->server_path);
0353     } else {
0354         dev_name_end = dev_name + strlen(dev_name);
0355     }
0356 
0357     dev_name_end--;     /* back up to separator */
0358     if (dev_name_end < dev_name)
0359         return invalfc(fc, "Path missing in source");
0360 
0361     dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name);
0362     if (fsopt->server_path)
0363         dout("server path '%s'\n", fsopt->server_path);
0364 
0365     dout("trying new device syntax");
0366     ret = ceph_parse_new_source(dev_name, dev_name_end, fc);
0367     if (ret) {
0368         if (ret != -EINVAL)
0369             return ret;
0370         dout("trying old device syntax");
0371         ret = ceph_parse_old_source(dev_name, dev_name_end, fc);
0372         if (ret)
0373             return ret;
0374     }
0375 
0376     fc->source = param->string;
0377     param->string = NULL;
0378     return 0;
0379 }
0380 
0381 static int ceph_parse_mon_addr(struct fs_parameter *param,
0382                    struct fs_context *fc)
0383 {
0384     struct ceph_parse_opts_ctx *pctx = fc->fs_private;
0385     struct ceph_mount_options *fsopt = pctx->opts;
0386 
0387     kfree(fsopt->mon_addr);
0388     fsopt->mon_addr = param->string;
0389     param->string = NULL;
0390 
0391     return ceph_parse_mon_ips(fsopt->mon_addr, strlen(fsopt->mon_addr),
0392                   pctx->copts, fc->log.log, '/');
0393 }
0394 
0395 static int ceph_parse_mount_param(struct fs_context *fc,
0396                   struct fs_parameter *param)
0397 {
0398     struct ceph_parse_opts_ctx *pctx = fc->fs_private;
0399     struct ceph_mount_options *fsopt = pctx->opts;
0400     struct fs_parse_result result;
0401     unsigned int mode;
0402     int token, ret;
0403 
0404     ret = ceph_parse_param(param, pctx->copts, fc->log.log);
0405     if (ret != -ENOPARAM)
0406         return ret;
0407 
0408     token = fs_parse(fc, ceph_mount_parameters, param, &result);
0409     dout("%s fs_parse '%s' token %d\n", __func__, param->key, token);
0410     if (token < 0)
0411         return token;
0412 
0413     switch (token) {
0414     case Opt_snapdirname:
0415         kfree(fsopt->snapdir_name);
0416         fsopt->snapdir_name = param->string;
0417         param->string = NULL;
0418         break;
0419     case Opt_mds_namespace:
0420         if (!namespace_equals(fsopt, param->string, strlen(param->string)))
0421             return invalfc(fc, "Mismatching mds_namespace");
0422         kfree(fsopt->mds_namespace);
0423         fsopt->mds_namespace = param->string;
0424         param->string = NULL;
0425         break;
0426     case Opt_recover_session:
0427         mode = result.uint_32;
0428         if (mode == ceph_recover_session_no)
0429             fsopt->flags &= ~CEPH_MOUNT_OPT_CLEANRECOVER;
0430         else if (mode == ceph_recover_session_clean)
0431             fsopt->flags |= CEPH_MOUNT_OPT_CLEANRECOVER;
0432         else
0433             BUG();
0434         break;
0435     case Opt_source:
0436         if (fc->source)
0437             return invalfc(fc, "Multiple sources specified");
0438         return ceph_parse_source(param, fc);
0439     case Opt_mon_addr:
0440         return ceph_parse_mon_addr(param, fc);
0441     case Opt_wsize:
0442         if (result.uint_32 < PAGE_SIZE ||
0443             result.uint_32 > CEPH_MAX_WRITE_SIZE)
0444             goto out_of_range;
0445         fsopt->wsize = ALIGN(result.uint_32, PAGE_SIZE);
0446         break;
0447     case Opt_rsize:
0448         if (result.uint_32 < PAGE_SIZE ||
0449             result.uint_32 > CEPH_MAX_READ_SIZE)
0450             goto out_of_range;
0451         fsopt->rsize = ALIGN(result.uint_32, PAGE_SIZE);
0452         break;
0453     case Opt_rasize:
0454         fsopt->rasize = ALIGN(result.uint_32, PAGE_SIZE);
0455         break;
0456     case Opt_caps_wanted_delay_min:
0457         if (result.uint_32 < 1)
0458             goto out_of_range;
0459         fsopt->caps_wanted_delay_min = result.uint_32;
0460         break;
0461     case Opt_caps_wanted_delay_max:
0462         if (result.uint_32 < 1)
0463             goto out_of_range;
0464         fsopt->caps_wanted_delay_max = result.uint_32;
0465         break;
0466     case Opt_caps_max:
0467         if (result.int_32 < 0)
0468             goto out_of_range;
0469         fsopt->caps_max = result.int_32;
0470         break;
0471     case Opt_readdir_max_entries:
0472         if (result.uint_32 < 1)
0473             goto out_of_range;
0474         fsopt->max_readdir = result.uint_32;
0475         break;
0476     case Opt_readdir_max_bytes:
0477         if (result.uint_32 < PAGE_SIZE && result.uint_32 != 0)
0478             goto out_of_range;
0479         fsopt->max_readdir_bytes = result.uint_32;
0480         break;
0481     case Opt_congestion_kb:
0482         if (result.uint_32 < 1024) /* at least 1M */
0483             goto out_of_range;
0484         fsopt->congestion_kb = result.uint_32;
0485         break;
0486     case Opt_dirstat:
0487         if (!result.negated)
0488             fsopt->flags |= CEPH_MOUNT_OPT_DIRSTAT;
0489         else
0490             fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT;
0491         break;
0492     case Opt_rbytes:
0493         if (!result.negated)
0494             fsopt->flags |= CEPH_MOUNT_OPT_RBYTES;
0495         else
0496             fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES;
0497         break;
0498     case Opt_asyncreaddir:
0499         if (!result.negated)
0500             fsopt->flags &= ~CEPH_MOUNT_OPT_NOASYNCREADDIR;
0501         else
0502             fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR;
0503         break;
0504     case Opt_dcache:
0505         if (!result.negated)
0506             fsopt->flags |= CEPH_MOUNT_OPT_DCACHE;
0507         else
0508             fsopt->flags &= ~CEPH_MOUNT_OPT_DCACHE;
0509         break;
0510     case Opt_ino32:
0511         if (!result.negated)
0512             fsopt->flags |= CEPH_MOUNT_OPT_INO32;
0513         else
0514             fsopt->flags &= ~CEPH_MOUNT_OPT_INO32;
0515         break;
0516 
0517     case Opt_fscache:
0518 #ifdef CONFIG_CEPH_FSCACHE
0519         kfree(fsopt->fscache_uniq);
0520         fsopt->fscache_uniq = NULL;
0521         if (result.negated) {
0522             fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE;
0523         } else {
0524             fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE;
0525             fsopt->fscache_uniq = param->string;
0526             param->string = NULL;
0527         }
0528         break;
0529 #else
0530         return invalfc(fc, "fscache support is disabled");
0531 #endif
0532     case Opt_poolperm:
0533         if (!result.negated)
0534             fsopt->flags &= ~CEPH_MOUNT_OPT_NOPOOLPERM;
0535         else
0536             fsopt->flags |= CEPH_MOUNT_OPT_NOPOOLPERM;
0537         break;
0538     case Opt_require_active_mds:
0539         if (!result.negated)
0540             fsopt->flags &= ~CEPH_MOUNT_OPT_MOUNTWAIT;
0541         else
0542             fsopt->flags |= CEPH_MOUNT_OPT_MOUNTWAIT;
0543         break;
0544     case Opt_quotadf:
0545         if (!result.negated)
0546             fsopt->flags &= ~CEPH_MOUNT_OPT_NOQUOTADF;
0547         else
0548             fsopt->flags |= CEPH_MOUNT_OPT_NOQUOTADF;
0549         break;
0550     case Opt_copyfrom:
0551         if (!result.negated)
0552             fsopt->flags &= ~CEPH_MOUNT_OPT_NOCOPYFROM;
0553         else
0554             fsopt->flags |= CEPH_MOUNT_OPT_NOCOPYFROM;
0555         break;
0556     case Opt_acl:
0557         if (!result.negated) {
0558 #ifdef CONFIG_CEPH_FS_POSIX_ACL
0559             fc->sb_flags |= SB_POSIXACL;
0560 #else
0561             return invalfc(fc, "POSIX ACL support is disabled");
0562 #endif
0563         } else {
0564             fc->sb_flags &= ~SB_POSIXACL;
0565         }
0566         break;
0567     case Opt_wsync:
0568         if (!result.negated)
0569             fsopt->flags &= ~CEPH_MOUNT_OPT_ASYNC_DIROPS;
0570         else
0571             fsopt->flags |= CEPH_MOUNT_OPT_ASYNC_DIROPS;
0572         break;
0573     case Opt_pagecache:
0574         if (result.negated)
0575             fsopt->flags |= CEPH_MOUNT_OPT_NOPAGECACHE;
0576         else
0577             fsopt->flags &= ~CEPH_MOUNT_OPT_NOPAGECACHE;
0578         break;
0579     default:
0580         BUG();
0581     }
0582     return 0;
0583 
0584 out_of_range:
0585     return invalfc(fc, "%s out of range", param->key);
0586 }
0587 
0588 static void destroy_mount_options(struct ceph_mount_options *args)
0589 {
0590     dout("destroy_mount_options %p\n", args);
0591     if (!args)
0592         return;
0593 
0594     kfree(args->snapdir_name);
0595     kfree(args->mds_namespace);
0596     kfree(args->server_path);
0597     kfree(args->fscache_uniq);
0598     kfree(args->mon_addr);
0599     kfree(args);
0600 }
0601 
0602 static int strcmp_null(const char *s1, const char *s2)
0603 {
0604     if (!s1 && !s2)
0605         return 0;
0606     if (s1 && !s2)
0607         return -1;
0608     if (!s1 && s2)
0609         return 1;
0610     return strcmp(s1, s2);
0611 }
0612 
0613 static int compare_mount_options(struct ceph_mount_options *new_fsopt,
0614                  struct ceph_options *new_opt,
0615                  struct ceph_fs_client *fsc)
0616 {
0617     struct ceph_mount_options *fsopt1 = new_fsopt;
0618     struct ceph_mount_options *fsopt2 = fsc->mount_options;
0619     int ofs = offsetof(struct ceph_mount_options, snapdir_name);
0620     int ret;
0621 
0622     ret = memcmp(fsopt1, fsopt2, ofs);
0623     if (ret)
0624         return ret;
0625 
0626     ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name);
0627     if (ret)
0628         return ret;
0629 
0630     ret = strcmp_null(fsopt1->mds_namespace, fsopt2->mds_namespace);
0631     if (ret)
0632         return ret;
0633 
0634     ret = strcmp_null(fsopt1->server_path, fsopt2->server_path);
0635     if (ret)
0636         return ret;
0637 
0638     ret = strcmp_null(fsopt1->fscache_uniq, fsopt2->fscache_uniq);
0639     if (ret)
0640         return ret;
0641 
0642     ret = strcmp_null(fsopt1->mon_addr, fsopt2->mon_addr);
0643     if (ret)
0644         return ret;
0645 
0646     return ceph_compare_options(new_opt, fsc->client);
0647 }
0648 
0649 /**
0650  * ceph_show_options - Show mount options in /proc/mounts
0651  * @m: seq_file to write to
0652  * @root: root of that (sub)tree
0653  */
0654 static int ceph_show_options(struct seq_file *m, struct dentry *root)
0655 {
0656     struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb);
0657     struct ceph_mount_options *fsopt = fsc->mount_options;
0658     size_t pos;
0659     int ret;
0660 
0661     /* a comma between MNT/MS and client options */
0662     seq_putc(m, ',');
0663     pos = m->count;
0664 
0665     ret = ceph_print_client_options(m, fsc->client, false);
0666     if (ret)
0667         return ret;
0668 
0669     /* retract our comma if no client options */
0670     if (m->count == pos)
0671         m->count--;
0672 
0673     if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT)
0674         seq_puts(m, ",dirstat");
0675     if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES))
0676         seq_puts(m, ",rbytes");
0677     if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR)
0678         seq_puts(m, ",noasyncreaddir");
0679     if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0)
0680         seq_puts(m, ",nodcache");
0681     if (fsopt->flags & CEPH_MOUNT_OPT_INO32)
0682         seq_puts(m, ",ino32");
0683     if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) {
0684         seq_show_option(m, "fsc", fsopt->fscache_uniq);
0685     }
0686     if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM)
0687         seq_puts(m, ",nopoolperm");
0688     if (fsopt->flags & CEPH_MOUNT_OPT_NOQUOTADF)
0689         seq_puts(m, ",noquotadf");
0690 
0691 #ifdef CONFIG_CEPH_FS_POSIX_ACL
0692     if (root->d_sb->s_flags & SB_POSIXACL)
0693         seq_puts(m, ",acl");
0694     else
0695         seq_puts(m, ",noacl");
0696 #endif
0697 
0698     if ((fsopt->flags & CEPH_MOUNT_OPT_NOCOPYFROM) == 0)
0699         seq_puts(m, ",copyfrom");
0700 
0701     /* dump mds_namespace when old device syntax is in use */
0702     if (fsopt->mds_namespace && !fsopt->new_dev_syntax)
0703         seq_show_option(m, "mds_namespace", fsopt->mds_namespace);
0704 
0705     if (fsopt->mon_addr)
0706         seq_printf(m, ",mon_addr=%s", fsopt->mon_addr);
0707 
0708     if (fsopt->flags & CEPH_MOUNT_OPT_CLEANRECOVER)
0709         seq_show_option(m, "recover_session", "clean");
0710 
0711     if (!(fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS))
0712         seq_puts(m, ",wsync");
0713 
0714     if (fsopt->flags & CEPH_MOUNT_OPT_NOPAGECACHE)
0715         seq_puts(m, ",nopagecache");
0716 
0717     if (fsopt->wsize != CEPH_MAX_WRITE_SIZE)
0718         seq_printf(m, ",wsize=%u", fsopt->wsize);
0719     if (fsopt->rsize != CEPH_MAX_READ_SIZE)
0720         seq_printf(m, ",rsize=%u", fsopt->rsize);
0721     if (fsopt->rasize != CEPH_RASIZE_DEFAULT)
0722         seq_printf(m, ",rasize=%u", fsopt->rasize);
0723     if (fsopt->congestion_kb != default_congestion_kb())
0724         seq_printf(m, ",write_congestion_kb=%u", fsopt->congestion_kb);
0725     if (fsopt->caps_max)
0726         seq_printf(m, ",caps_max=%d", fsopt->caps_max);
0727     if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT)
0728         seq_printf(m, ",caps_wanted_delay_min=%u",
0729              fsopt->caps_wanted_delay_min);
0730     if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT)
0731         seq_printf(m, ",caps_wanted_delay_max=%u",
0732                fsopt->caps_wanted_delay_max);
0733     if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT)
0734         seq_printf(m, ",readdir_max_entries=%u", fsopt->max_readdir);
0735     if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT)
0736         seq_printf(m, ",readdir_max_bytes=%u", fsopt->max_readdir_bytes);
0737     if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT))
0738         seq_show_option(m, "snapdirname", fsopt->snapdir_name);
0739 
0740     return 0;
0741 }
0742 
0743 /*
0744  * handle any mon messages the standard library doesn't understand.
0745  * return error if we don't either.
0746  */
0747 static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg)
0748 {
0749     struct ceph_fs_client *fsc = client->private;
0750     int type = le16_to_cpu(msg->hdr.type);
0751 
0752     switch (type) {
0753     case CEPH_MSG_MDS_MAP:
0754         ceph_mdsc_handle_mdsmap(fsc->mdsc, msg);
0755         return 0;
0756     case CEPH_MSG_FS_MAP_USER:
0757         ceph_mdsc_handle_fsmap(fsc->mdsc, msg);
0758         return 0;
0759     default:
0760         return -1;
0761     }
0762 }
0763 
0764 /*
0765  * create a new fs client
0766  *
0767  * Success or not, this function consumes @fsopt and @opt.
0768  */
0769 static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
0770                     struct ceph_options *opt)
0771 {
0772     struct ceph_fs_client *fsc;
0773     int err;
0774 
0775     fsc = kzalloc(sizeof(*fsc), GFP_KERNEL);
0776     if (!fsc) {
0777         err = -ENOMEM;
0778         goto fail;
0779     }
0780 
0781     fsc->client = ceph_create_client(opt, fsc);
0782     if (IS_ERR(fsc->client)) {
0783         err = PTR_ERR(fsc->client);
0784         goto fail;
0785     }
0786     opt = NULL; /* fsc->client now owns this */
0787 
0788     fsc->client->extra_mon_dispatch = extra_mon_dispatch;
0789     ceph_set_opt(fsc->client, ABORT_ON_FULL);
0790 
0791     if (!fsopt->mds_namespace) {
0792         ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_MDSMAP,
0793                    0, true);
0794     } else {
0795         ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_FSMAP,
0796                    0, false);
0797     }
0798 
0799     fsc->mount_options = fsopt;
0800 
0801     fsc->sb = NULL;
0802     fsc->mount_state = CEPH_MOUNT_MOUNTING;
0803     fsc->filp_gen = 1;
0804     fsc->have_copy_from2 = true;
0805 
0806     atomic_long_set(&fsc->writeback_count, 0);
0807     fsc->write_congested = false;
0808 
0809     err = -ENOMEM;
0810     /*
0811      * The number of concurrent works can be high but they don't need
0812      * to be processed in parallel, limit concurrency.
0813      */
0814     fsc->inode_wq = alloc_workqueue("ceph-inode", WQ_UNBOUND, 0);
0815     if (!fsc->inode_wq)
0816         goto fail_client;
0817     fsc->cap_wq = alloc_workqueue("ceph-cap", 0, 1);
0818     if (!fsc->cap_wq)
0819         goto fail_inode_wq;
0820 
0821     hash_init(fsc->async_unlink_conflict);
0822     spin_lock_init(&fsc->async_unlink_conflict_lock);
0823 
0824     spin_lock(&ceph_fsc_lock);
0825     list_add_tail(&fsc->metric_wakeup, &ceph_fsc_list);
0826     spin_unlock(&ceph_fsc_lock);
0827 
0828     return fsc;
0829 
0830 fail_inode_wq:
0831     destroy_workqueue(fsc->inode_wq);
0832 fail_client:
0833     ceph_destroy_client(fsc->client);
0834 fail:
0835     kfree(fsc);
0836     if (opt)
0837         ceph_destroy_options(opt);
0838     destroy_mount_options(fsopt);
0839     return ERR_PTR(err);
0840 }
0841 
0842 static void flush_fs_workqueues(struct ceph_fs_client *fsc)
0843 {
0844     flush_workqueue(fsc->inode_wq);
0845     flush_workqueue(fsc->cap_wq);
0846 }
0847 
0848 static void destroy_fs_client(struct ceph_fs_client *fsc)
0849 {
0850     dout("destroy_fs_client %p\n", fsc);
0851 
0852     spin_lock(&ceph_fsc_lock);
0853     list_del(&fsc->metric_wakeup);
0854     spin_unlock(&ceph_fsc_lock);
0855 
0856     ceph_mdsc_destroy(fsc);
0857     destroy_workqueue(fsc->inode_wq);
0858     destroy_workqueue(fsc->cap_wq);
0859 
0860     destroy_mount_options(fsc->mount_options);
0861 
0862     ceph_destroy_client(fsc->client);
0863 
0864     kfree(fsc);
0865     dout("destroy_fs_client %p done\n", fsc);
0866 }
0867 
0868 /*
0869  * caches
0870  */
0871 struct kmem_cache *ceph_inode_cachep;
0872 struct kmem_cache *ceph_cap_cachep;
0873 struct kmem_cache *ceph_cap_snap_cachep;
0874 struct kmem_cache *ceph_cap_flush_cachep;
0875 struct kmem_cache *ceph_dentry_cachep;
0876 struct kmem_cache *ceph_file_cachep;
0877 struct kmem_cache *ceph_dir_file_cachep;
0878 struct kmem_cache *ceph_mds_request_cachep;
0879 mempool_t *ceph_wb_pagevec_pool;
0880 
0881 static void ceph_inode_init_once(void *foo)
0882 {
0883     struct ceph_inode_info *ci = foo;
0884     inode_init_once(&ci->netfs.inode);
0885 }
0886 
0887 static int __init init_caches(void)
0888 {
0889     int error = -ENOMEM;
0890 
0891     ceph_inode_cachep = kmem_cache_create("ceph_inode_info",
0892                       sizeof(struct ceph_inode_info),
0893                       __alignof__(struct ceph_inode_info),
0894                       SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|
0895                       SLAB_ACCOUNT, ceph_inode_init_once);
0896     if (!ceph_inode_cachep)
0897         return -ENOMEM;
0898 
0899     ceph_cap_cachep = KMEM_CACHE(ceph_cap, SLAB_MEM_SPREAD);
0900     if (!ceph_cap_cachep)
0901         goto bad_cap;
0902     ceph_cap_snap_cachep = KMEM_CACHE(ceph_cap_snap, SLAB_MEM_SPREAD);
0903     if (!ceph_cap_snap_cachep)
0904         goto bad_cap_snap;
0905     ceph_cap_flush_cachep = KMEM_CACHE(ceph_cap_flush,
0906                        SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
0907     if (!ceph_cap_flush_cachep)
0908         goto bad_cap_flush;
0909 
0910     ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info,
0911                     SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
0912     if (!ceph_dentry_cachep)
0913         goto bad_dentry;
0914 
0915     ceph_file_cachep = KMEM_CACHE(ceph_file_info, SLAB_MEM_SPREAD);
0916     if (!ceph_file_cachep)
0917         goto bad_file;
0918 
0919     ceph_dir_file_cachep = KMEM_CACHE(ceph_dir_file_info, SLAB_MEM_SPREAD);
0920     if (!ceph_dir_file_cachep)
0921         goto bad_dir_file;
0922 
0923     ceph_mds_request_cachep = KMEM_CACHE(ceph_mds_request, SLAB_MEM_SPREAD);
0924     if (!ceph_mds_request_cachep)
0925         goto bad_mds_req;
0926 
0927     ceph_wb_pagevec_pool = mempool_create_kmalloc_pool(10, CEPH_MAX_WRITE_SIZE >> PAGE_SHIFT);
0928     if (!ceph_wb_pagevec_pool)
0929         goto bad_pagevec_pool;
0930 
0931     return 0;
0932 
0933 bad_pagevec_pool:
0934     kmem_cache_destroy(ceph_mds_request_cachep);
0935 bad_mds_req:
0936     kmem_cache_destroy(ceph_dir_file_cachep);
0937 bad_dir_file:
0938     kmem_cache_destroy(ceph_file_cachep);
0939 bad_file:
0940     kmem_cache_destroy(ceph_dentry_cachep);
0941 bad_dentry:
0942     kmem_cache_destroy(ceph_cap_flush_cachep);
0943 bad_cap_flush:
0944     kmem_cache_destroy(ceph_cap_snap_cachep);
0945 bad_cap_snap:
0946     kmem_cache_destroy(ceph_cap_cachep);
0947 bad_cap:
0948     kmem_cache_destroy(ceph_inode_cachep);
0949     return error;
0950 }
0951 
0952 static void destroy_caches(void)
0953 {
0954     /*
0955      * Make sure all delayed rcu free inodes are flushed before we
0956      * destroy cache.
0957      */
0958     rcu_barrier();
0959 
0960     kmem_cache_destroy(ceph_inode_cachep);
0961     kmem_cache_destroy(ceph_cap_cachep);
0962     kmem_cache_destroy(ceph_cap_snap_cachep);
0963     kmem_cache_destroy(ceph_cap_flush_cachep);
0964     kmem_cache_destroy(ceph_dentry_cachep);
0965     kmem_cache_destroy(ceph_file_cachep);
0966     kmem_cache_destroy(ceph_dir_file_cachep);
0967     kmem_cache_destroy(ceph_mds_request_cachep);
0968     mempool_destroy(ceph_wb_pagevec_pool);
0969 }
0970 
0971 static void __ceph_umount_begin(struct ceph_fs_client *fsc)
0972 {
0973     ceph_osdc_abort_requests(&fsc->client->osdc, -EIO);
0974     ceph_mdsc_force_umount(fsc->mdsc);
0975     fsc->filp_gen++; // invalidate open files
0976 }
0977 
0978 /*
0979  * ceph_umount_begin - initiate forced umount.  Tear down the
0980  * mount, skipping steps that may hang while waiting for server(s).
0981  */
0982 void ceph_umount_begin(struct super_block *sb)
0983 {
0984     struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
0985 
0986     dout("ceph_umount_begin - starting forced umount\n");
0987     if (!fsc)
0988         return;
0989     fsc->mount_state = CEPH_MOUNT_SHUTDOWN;
0990     __ceph_umount_begin(fsc);
0991 }
0992 
0993 static const struct super_operations ceph_super_ops = {
0994     .alloc_inode    = ceph_alloc_inode,
0995     .free_inode = ceph_free_inode,
0996     .write_inode    = ceph_write_inode,
0997     .drop_inode = generic_delete_inode,
0998     .evict_inode    = ceph_evict_inode,
0999     .sync_fs        = ceph_sync_fs,
1000     .put_super  = ceph_put_super,
1001     .show_options   = ceph_show_options,
1002     .statfs     = ceph_statfs,
1003     .umount_begin   = ceph_umount_begin,
1004 };
1005 
1006 /*
1007  * Bootstrap mount by opening the root directory.  Note the mount
1008  * @started time from caller, and time out if this takes too long.
1009  */
1010 static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
1011                        const char *path,
1012                        unsigned long started)
1013 {
1014     struct ceph_mds_client *mdsc = fsc->mdsc;
1015     struct ceph_mds_request *req = NULL;
1016     int err;
1017     struct dentry *root;
1018 
1019     /* open dir */
1020     dout("open_root_inode opening '%s'\n", path);
1021     req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
1022     if (IS_ERR(req))
1023         return ERR_CAST(req);
1024     req->r_path1 = kstrdup(path, GFP_NOFS);
1025     if (!req->r_path1) {
1026         root = ERR_PTR(-ENOMEM);
1027         goto out;
1028     }
1029 
1030     req->r_ino1.ino = CEPH_INO_ROOT;
1031     req->r_ino1.snap = CEPH_NOSNAP;
1032     req->r_started = started;
1033     req->r_timeout = fsc->client->options->mount_timeout;
1034     req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE);
1035     req->r_num_caps = 2;
1036     err = ceph_mdsc_do_request(mdsc, NULL, req);
1037     if (err == 0) {
1038         struct inode *inode = req->r_target_inode;
1039         req->r_target_inode = NULL;
1040         dout("open_root_inode success\n");
1041         root = d_make_root(inode);
1042         if (!root) {
1043             root = ERR_PTR(-ENOMEM);
1044             goto out;
1045         }
1046         dout("open_root_inode success, root dentry is %p\n", root);
1047     } else {
1048         root = ERR_PTR(err);
1049     }
1050 out:
1051     ceph_mdsc_put_request(req);
1052     return root;
1053 }
1054 
1055 /*
1056  * mount: join the ceph cluster, and open root directory.
1057  */
1058 static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
1059                       struct fs_context *fc)
1060 {
1061     int err;
1062     unsigned long started = jiffies;  /* note the start time */
1063     struct dentry *root;
1064 
1065     dout("mount start %p\n", fsc);
1066     mutex_lock(&fsc->client->mount_mutex);
1067 
1068     if (!fsc->sb->s_root) {
1069         const char *path = fsc->mount_options->server_path ?
1070                      fsc->mount_options->server_path + 1 : "";
1071 
1072         err = __ceph_open_session(fsc->client, started);
1073         if (err < 0)
1074             goto out;
1075 
1076         /* setup fscache */
1077         if (fsc->mount_options->flags & CEPH_MOUNT_OPT_FSCACHE) {
1078             err = ceph_fscache_register_fs(fsc, fc);
1079             if (err < 0)
1080                 goto out;
1081         }
1082 
1083         dout("mount opening path '%s'\n", path);
1084 
1085         ceph_fs_debugfs_init(fsc);
1086 
1087         root = open_root_dentry(fsc, path, started);
1088         if (IS_ERR(root)) {
1089             err = PTR_ERR(root);
1090             goto out;
1091         }
1092         fsc->sb->s_root = dget(root);
1093     } else {
1094         root = dget(fsc->sb->s_root);
1095     }
1096 
1097     fsc->mount_state = CEPH_MOUNT_MOUNTED;
1098     dout("mount success\n");
1099     mutex_unlock(&fsc->client->mount_mutex);
1100     return root;
1101 
1102 out:
1103     mutex_unlock(&fsc->client->mount_mutex);
1104     return ERR_PTR(err);
1105 }
1106 
1107 static int ceph_set_super(struct super_block *s, struct fs_context *fc)
1108 {
1109     struct ceph_fs_client *fsc = s->s_fs_info;
1110     int ret;
1111 
1112     dout("set_super %p\n", s);
1113 
1114     s->s_maxbytes = MAX_LFS_FILESIZE;
1115 
1116     s->s_xattr = ceph_xattr_handlers;
1117     fsc->sb = s;
1118     fsc->max_file_size = 1ULL << 40; /* temp value until we get mdsmap */
1119 
1120     s->s_op = &ceph_super_ops;
1121     s->s_d_op = &ceph_dentry_ops;
1122     s->s_export_op = &ceph_export_ops;
1123 
1124     s->s_time_gran = 1;
1125     s->s_time_min = 0;
1126     s->s_time_max = U32_MAX;
1127     s->s_flags |= SB_NODIRATIME | SB_NOATIME;
1128 
1129     ret = set_anon_super_fc(s, fc);
1130     if (ret != 0)
1131         fsc->sb = NULL;
1132     return ret;
1133 }
1134 
1135 /*
1136  * share superblock if same fs AND options
1137  */
1138 static int ceph_compare_super(struct super_block *sb, struct fs_context *fc)
1139 {
1140     struct ceph_fs_client *new = fc->s_fs_info;
1141     struct ceph_mount_options *fsopt = new->mount_options;
1142     struct ceph_options *opt = new->client->options;
1143     struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
1144 
1145     dout("ceph_compare_super %p\n", sb);
1146 
1147     if (compare_mount_options(fsopt, opt, fsc)) {
1148         dout("monitor(s)/mount options don't match\n");
1149         return 0;
1150     }
1151     if ((opt->flags & CEPH_OPT_FSID) &&
1152         ceph_fsid_compare(&opt->fsid, &fsc->client->fsid)) {
1153         dout("fsid doesn't match\n");
1154         return 0;
1155     }
1156     if (fc->sb_flags != (sb->s_flags & ~SB_BORN)) {
1157         dout("flags differ\n");
1158         return 0;
1159     }
1160 
1161     if (fsc->blocklisted && !ceph_test_mount_opt(fsc, CLEANRECOVER)) {
1162         dout("client is blocklisted (and CLEANRECOVER is not set)\n");
1163         return 0;
1164     }
1165 
1166     if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) {
1167         dout("client has been forcibly unmounted\n");
1168         return 0;
1169     }
1170 
1171     return 1;
1172 }
1173 
1174 /*
1175  * construct our own bdi so we can control readahead, etc.
1176  */
1177 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
1178 
1179 static int ceph_setup_bdi(struct super_block *sb, struct ceph_fs_client *fsc)
1180 {
1181     int err;
1182 
1183     err = super_setup_bdi_name(sb, "ceph-%ld",
1184                    atomic_long_inc_return(&bdi_seq));
1185     if (err)
1186         return err;
1187 
1188     /* set ra_pages based on rasize mount option? */
1189     sb->s_bdi->ra_pages = fsc->mount_options->rasize >> PAGE_SHIFT;
1190 
1191     /* set io_pages based on max osd read size */
1192     sb->s_bdi->io_pages = fsc->mount_options->rsize >> PAGE_SHIFT;
1193 
1194     return 0;
1195 }
1196 
1197 static int ceph_get_tree(struct fs_context *fc)
1198 {
1199     struct ceph_parse_opts_ctx *pctx = fc->fs_private;
1200     struct ceph_mount_options *fsopt = pctx->opts;
1201     struct super_block *sb;
1202     struct ceph_fs_client *fsc;
1203     struct dentry *res;
1204     int (*compare_super)(struct super_block *, struct fs_context *) =
1205         ceph_compare_super;
1206     int err;
1207 
1208     dout("ceph_get_tree\n");
1209 
1210     if (!fc->source)
1211         return invalfc(fc, "No source");
1212     if (fsopt->new_dev_syntax && !fsopt->mon_addr)
1213         return invalfc(fc, "No monitor address");
1214 
1215     /* create client (which we may/may not use) */
1216     fsc = create_fs_client(pctx->opts, pctx->copts);
1217     pctx->opts = NULL;
1218     pctx->copts = NULL;
1219     if (IS_ERR(fsc)) {
1220         err = PTR_ERR(fsc);
1221         goto out_final;
1222     }
1223 
1224     err = ceph_mdsc_init(fsc);
1225     if (err < 0)
1226         goto out;
1227 
1228     if (ceph_test_opt(fsc->client, NOSHARE))
1229         compare_super = NULL;
1230 
1231     fc->s_fs_info = fsc;
1232     sb = sget_fc(fc, compare_super, ceph_set_super);
1233     fc->s_fs_info = NULL;
1234     if (IS_ERR(sb)) {
1235         err = PTR_ERR(sb);
1236         goto out;
1237     }
1238 
1239     if (ceph_sb_to_client(sb) != fsc) {
1240         destroy_fs_client(fsc);
1241         fsc = ceph_sb_to_client(sb);
1242         dout("get_sb got existing client %p\n", fsc);
1243     } else {
1244         dout("get_sb using new client %p\n", fsc);
1245         err = ceph_setup_bdi(sb, fsc);
1246         if (err < 0)
1247             goto out_splat;
1248     }
1249 
1250     res = ceph_real_mount(fsc, fc);
1251     if (IS_ERR(res)) {
1252         err = PTR_ERR(res);
1253         goto out_splat;
1254     }
1255     dout("root %p inode %p ino %llx.%llx\n", res,
1256          d_inode(res), ceph_vinop(d_inode(res)));
1257     fc->root = fsc->sb->s_root;
1258     return 0;
1259 
1260 out_splat:
1261     if (!ceph_mdsmap_is_cluster_available(fsc->mdsc->mdsmap)) {
1262         pr_info("No mds server is up or the cluster is laggy\n");
1263         err = -EHOSTUNREACH;
1264     }
1265 
1266     ceph_mdsc_close_sessions(fsc->mdsc);
1267     deactivate_locked_super(sb);
1268     goto out_final;
1269 
1270 out:
1271     destroy_fs_client(fsc);
1272 out_final:
1273     dout("ceph_get_tree fail %d\n", err);
1274     return err;
1275 }
1276 
1277 static void ceph_free_fc(struct fs_context *fc)
1278 {
1279     struct ceph_parse_opts_ctx *pctx = fc->fs_private;
1280 
1281     if (pctx) {
1282         destroy_mount_options(pctx->opts);
1283         ceph_destroy_options(pctx->copts);
1284         kfree(pctx);
1285     }
1286 }
1287 
1288 static int ceph_reconfigure_fc(struct fs_context *fc)
1289 {
1290     struct ceph_parse_opts_ctx *pctx = fc->fs_private;
1291     struct ceph_mount_options *fsopt = pctx->opts;
1292     struct ceph_fs_client *fsc = ceph_sb_to_client(fc->root->d_sb);
1293 
1294     if (fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS)
1295         ceph_set_mount_opt(fsc, ASYNC_DIROPS);
1296     else
1297         ceph_clear_mount_opt(fsc, ASYNC_DIROPS);
1298 
1299     if (strcmp_null(fsc->mount_options->mon_addr, fsopt->mon_addr)) {
1300         kfree(fsc->mount_options->mon_addr);
1301         fsc->mount_options->mon_addr = fsopt->mon_addr;
1302         fsopt->mon_addr = NULL;
1303         pr_notice("ceph: monitor addresses recorded, but not used for reconnection");
1304     }
1305 
1306     sync_filesystem(fc->root->d_sb);
1307     return 0;
1308 }
1309 
1310 static const struct fs_context_operations ceph_context_ops = {
1311     .free       = ceph_free_fc,
1312     .parse_param    = ceph_parse_mount_param,
1313     .get_tree   = ceph_get_tree,
1314     .reconfigure    = ceph_reconfigure_fc,
1315 };
1316 
1317 /*
1318  * Set up the filesystem mount context.
1319  */
1320 static int ceph_init_fs_context(struct fs_context *fc)
1321 {
1322     struct ceph_parse_opts_ctx *pctx;
1323     struct ceph_mount_options *fsopt;
1324 
1325     pctx = kzalloc(sizeof(*pctx), GFP_KERNEL);
1326     if (!pctx)
1327         return -ENOMEM;
1328 
1329     pctx->copts = ceph_alloc_options();
1330     if (!pctx->copts)
1331         goto nomem;
1332 
1333     pctx->opts = kzalloc(sizeof(*pctx->opts), GFP_KERNEL);
1334     if (!pctx->opts)
1335         goto nomem;
1336 
1337     fsopt = pctx->opts;
1338     fsopt->flags = CEPH_MOUNT_OPT_DEFAULT;
1339 
1340     fsopt->wsize = CEPH_MAX_WRITE_SIZE;
1341     fsopt->rsize = CEPH_MAX_READ_SIZE;
1342     fsopt->rasize = CEPH_RASIZE_DEFAULT;
1343     fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
1344     if (!fsopt->snapdir_name)
1345         goto nomem;
1346 
1347     fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT;
1348     fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
1349     fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT;
1350     fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT;
1351     fsopt->congestion_kb = default_congestion_kb();
1352 
1353 #ifdef CONFIG_CEPH_FS_POSIX_ACL
1354     fc->sb_flags |= SB_POSIXACL;
1355 #endif
1356 
1357     fc->fs_private = pctx;
1358     fc->ops = &ceph_context_ops;
1359     return 0;
1360 
1361 nomem:
1362     destroy_mount_options(pctx->opts);
1363     ceph_destroy_options(pctx->copts);
1364     kfree(pctx);
1365     return -ENOMEM;
1366 }
1367 
1368 static void ceph_kill_sb(struct super_block *s)
1369 {
1370     struct ceph_fs_client *fsc = ceph_sb_to_client(s);
1371 
1372     dout("kill_sb %p\n", s);
1373 
1374     ceph_mdsc_pre_umount(fsc->mdsc);
1375     flush_fs_workqueues(fsc);
1376 
1377     kill_anon_super(s);
1378 
1379     fsc->client->extra_mon_dispatch = NULL;
1380     ceph_fs_debugfs_cleanup(fsc);
1381 
1382     ceph_fscache_unregister_fs(fsc);
1383 
1384     destroy_fs_client(fsc);
1385 }
1386 
1387 static struct file_system_type ceph_fs_type = {
1388     .owner      = THIS_MODULE,
1389     .name       = "ceph",
1390     .init_fs_context = ceph_init_fs_context,
1391     .kill_sb    = ceph_kill_sb,
1392     .fs_flags   = FS_RENAME_DOES_D_MOVE,
1393 };
1394 MODULE_ALIAS_FS("ceph");
1395 
1396 int ceph_force_reconnect(struct super_block *sb)
1397 {
1398     struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
1399     int err = 0;
1400 
1401     fsc->mount_state = CEPH_MOUNT_RECOVER;
1402     __ceph_umount_begin(fsc);
1403 
1404     /* Make sure all page caches get invalidated.
1405      * see remove_session_caps_cb() */
1406     flush_workqueue(fsc->inode_wq);
1407 
1408     /* In case that we were blocklisted. This also reset
1409      * all mon/osd connections */
1410     ceph_reset_client_addr(fsc->client);
1411 
1412     ceph_osdc_clear_abort_err(&fsc->client->osdc);
1413 
1414     fsc->blocklisted = false;
1415     fsc->mount_state = CEPH_MOUNT_MOUNTED;
1416 
1417     if (sb->s_root) {
1418         err = __ceph_do_getattr(d_inode(sb->s_root), NULL,
1419                     CEPH_STAT_CAP_INODE, true);
1420     }
1421     return err;
1422 }
1423 
1424 static int __init init_ceph(void)
1425 {
1426     int ret = init_caches();
1427     if (ret)
1428         goto out;
1429 
1430     ceph_flock_init();
1431     ret = register_filesystem(&ceph_fs_type);
1432     if (ret)
1433         goto out_caches;
1434 
1435     pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL);
1436 
1437     return 0;
1438 
1439 out_caches:
1440     destroy_caches();
1441 out:
1442     return ret;
1443 }
1444 
1445 static void __exit exit_ceph(void)
1446 {
1447     dout("exit_ceph\n");
1448     unregister_filesystem(&ceph_fs_type);
1449     destroy_caches();
1450 }
1451 
1452 static int param_set_metrics(const char *val, const struct kernel_param *kp)
1453 {
1454     struct ceph_fs_client *fsc;
1455     int ret;
1456 
1457     ret = param_set_bool(val, kp);
1458     if (ret) {
1459         pr_err("Failed to parse sending metrics switch value '%s'\n",
1460                val);
1461         return ret;
1462     } else if (!disable_send_metrics) {
1463         // wake up all the mds clients
1464         spin_lock(&ceph_fsc_lock);
1465         list_for_each_entry(fsc, &ceph_fsc_list, metric_wakeup) {
1466             metric_schedule_delayed(&fsc->mdsc->metric);
1467         }
1468         spin_unlock(&ceph_fsc_lock);
1469     }
1470 
1471     return 0;
1472 }
1473 
1474 static const struct kernel_param_ops param_ops_metrics = {
1475     .set = param_set_metrics,
1476     .get = param_get_bool,
1477 };
1478 
1479 bool disable_send_metrics = false;
1480 module_param_cb(disable_send_metrics, &param_ops_metrics, &disable_send_metrics, 0644);
1481 MODULE_PARM_DESC(disable_send_metrics, "Enable sending perf metrics to ceph cluster (default: on)");
1482 
1483 /* for both v1 and v2 syntax */
1484 static bool mount_support = true;
1485 static const struct kernel_param_ops param_ops_mount_syntax = {
1486     .get = param_get_bool,
1487 };
1488 module_param_cb(mount_syntax_v1, &param_ops_mount_syntax, &mount_support, 0444);
1489 module_param_cb(mount_syntax_v2, &param_ops_mount_syntax, &mount_support, 0444);
1490 
1491 module_init(init_ceph);
1492 module_exit(exit_ceph);
1493 
1494 MODULE_AUTHOR("Sage Weil <sage@newdream.net>");
1495 MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>");
1496 MODULE_AUTHOR("Patience Warnick <patience@newdream.net>");
1497 MODULE_DESCRIPTION("Ceph filesystem for Linux");
1498 MODULE_LICENSE("GPL");