Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * (C) 2001 Clemson University and The University of Chicago
0004  * Copyright 2018 Omnibond Systems, L.L.C.
0005  *
0006  * See COPYING in top-level directory.
0007  */
0008 #include <linux/kernel.h>
0009 #include "protocol.h"
0010 #include "orangefs-kernel.h"
0011 #include "orangefs-dev-proto.h"
0012 #include "orangefs-bufmap.h"
0013 
0014 __s32 fsid_of_op(struct orangefs_kernel_op_s *op)
0015 {
0016     __s32 fsid = ORANGEFS_FS_ID_NULL;
0017 
0018     if (op) {
0019         switch (op->upcall.type) {
0020         case ORANGEFS_VFS_OP_FILE_IO:
0021             fsid = op->upcall.req.io.refn.fs_id;
0022             break;
0023         case ORANGEFS_VFS_OP_LOOKUP:
0024             fsid = op->upcall.req.lookup.parent_refn.fs_id;
0025             break;
0026         case ORANGEFS_VFS_OP_CREATE:
0027             fsid = op->upcall.req.create.parent_refn.fs_id;
0028             break;
0029         case ORANGEFS_VFS_OP_GETATTR:
0030             fsid = op->upcall.req.getattr.refn.fs_id;
0031             break;
0032         case ORANGEFS_VFS_OP_REMOVE:
0033             fsid = op->upcall.req.remove.parent_refn.fs_id;
0034             break;
0035         case ORANGEFS_VFS_OP_MKDIR:
0036             fsid = op->upcall.req.mkdir.parent_refn.fs_id;
0037             break;
0038         case ORANGEFS_VFS_OP_READDIR:
0039             fsid = op->upcall.req.readdir.refn.fs_id;
0040             break;
0041         case ORANGEFS_VFS_OP_SETATTR:
0042             fsid = op->upcall.req.setattr.refn.fs_id;
0043             break;
0044         case ORANGEFS_VFS_OP_SYMLINK:
0045             fsid = op->upcall.req.sym.parent_refn.fs_id;
0046             break;
0047         case ORANGEFS_VFS_OP_RENAME:
0048             fsid = op->upcall.req.rename.old_parent_refn.fs_id;
0049             break;
0050         case ORANGEFS_VFS_OP_STATFS:
0051             fsid = op->upcall.req.statfs.fs_id;
0052             break;
0053         case ORANGEFS_VFS_OP_TRUNCATE:
0054             fsid = op->upcall.req.truncate.refn.fs_id;
0055             break;
0056         case ORANGEFS_VFS_OP_RA_FLUSH:
0057             fsid = op->upcall.req.ra_cache_flush.refn.fs_id;
0058             break;
0059         case ORANGEFS_VFS_OP_FS_UMOUNT:
0060             fsid = op->upcall.req.fs_umount.fs_id;
0061             break;
0062         case ORANGEFS_VFS_OP_GETXATTR:
0063             fsid = op->upcall.req.getxattr.refn.fs_id;
0064             break;
0065         case ORANGEFS_VFS_OP_SETXATTR:
0066             fsid = op->upcall.req.setxattr.refn.fs_id;
0067             break;
0068         case ORANGEFS_VFS_OP_LISTXATTR:
0069             fsid = op->upcall.req.listxattr.refn.fs_id;
0070             break;
0071         case ORANGEFS_VFS_OP_REMOVEXATTR:
0072             fsid = op->upcall.req.removexattr.refn.fs_id;
0073             break;
0074         case ORANGEFS_VFS_OP_FSYNC:
0075             fsid = op->upcall.req.fsync.refn.fs_id;
0076             break;
0077         default:
0078             break;
0079         }
0080     }
0081     return fsid;
0082 }
0083 
0084 static int orangefs_inode_flags(struct ORANGEFS_sys_attr_s *attrs)
0085 {
0086     int flags = 0;
0087     if (attrs->flags & ORANGEFS_IMMUTABLE_FL)
0088         flags |= S_IMMUTABLE;
0089     else
0090         flags &= ~S_IMMUTABLE;
0091     if (attrs->flags & ORANGEFS_APPEND_FL)
0092         flags |= S_APPEND;
0093     else
0094         flags &= ~S_APPEND;
0095     if (attrs->flags & ORANGEFS_NOATIME_FL)
0096         flags |= S_NOATIME;
0097     else
0098         flags &= ~S_NOATIME;
0099     return flags;
0100 }
0101 
0102 static int orangefs_inode_perms(struct ORANGEFS_sys_attr_s *attrs)
0103 {
0104     int perm_mode = 0;
0105 
0106     if (attrs->perms & ORANGEFS_O_EXECUTE)
0107         perm_mode |= S_IXOTH;
0108     if (attrs->perms & ORANGEFS_O_WRITE)
0109         perm_mode |= S_IWOTH;
0110     if (attrs->perms & ORANGEFS_O_READ)
0111         perm_mode |= S_IROTH;
0112 
0113     if (attrs->perms & ORANGEFS_G_EXECUTE)
0114         perm_mode |= S_IXGRP;
0115     if (attrs->perms & ORANGEFS_G_WRITE)
0116         perm_mode |= S_IWGRP;
0117     if (attrs->perms & ORANGEFS_G_READ)
0118         perm_mode |= S_IRGRP;
0119 
0120     if (attrs->perms & ORANGEFS_U_EXECUTE)
0121         perm_mode |= S_IXUSR;
0122     if (attrs->perms & ORANGEFS_U_WRITE)
0123         perm_mode |= S_IWUSR;
0124     if (attrs->perms & ORANGEFS_U_READ)
0125         perm_mode |= S_IRUSR;
0126 
0127     if (attrs->perms & ORANGEFS_G_SGID)
0128         perm_mode |= S_ISGID;
0129     if (attrs->perms & ORANGEFS_U_SUID)
0130         perm_mode |= S_ISUID;
0131 
0132     return perm_mode;
0133 }
0134 
0135 /*
0136  * NOTE: in kernel land, we never use the sys_attr->link_target for
0137  * anything, so don't bother copying it into the sys_attr object here.
0138  */
0139 static inline void copy_attributes_from_inode(struct inode *inode,
0140     struct ORANGEFS_sys_attr_s *attrs)
0141 {
0142     struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
0143     attrs->mask = 0;
0144     if (orangefs_inode->attr_valid & ATTR_UID) {
0145         attrs->owner = from_kuid(&init_user_ns, inode->i_uid);
0146         attrs->mask |= ORANGEFS_ATTR_SYS_UID;
0147         gossip_debug(GOSSIP_UTILS_DEBUG, "(UID) %d\n", attrs->owner);
0148     }
0149     if (orangefs_inode->attr_valid & ATTR_GID) {
0150         attrs->group = from_kgid(&init_user_ns, inode->i_gid);
0151         attrs->mask |= ORANGEFS_ATTR_SYS_GID;
0152         gossip_debug(GOSSIP_UTILS_DEBUG, "(GID) %d\n", attrs->group);
0153     }
0154 
0155     if (orangefs_inode->attr_valid & ATTR_ATIME) {
0156         attrs->mask |= ORANGEFS_ATTR_SYS_ATIME;
0157         if (orangefs_inode->attr_valid & ATTR_ATIME_SET) {
0158             attrs->atime = (time64_t)inode->i_atime.tv_sec;
0159             attrs->mask |= ORANGEFS_ATTR_SYS_ATIME_SET;
0160         }
0161     }
0162     if (orangefs_inode->attr_valid & ATTR_MTIME) {
0163         attrs->mask |= ORANGEFS_ATTR_SYS_MTIME;
0164         if (orangefs_inode->attr_valid & ATTR_MTIME_SET) {
0165             attrs->mtime = (time64_t)inode->i_mtime.tv_sec;
0166             attrs->mask |= ORANGEFS_ATTR_SYS_MTIME_SET;
0167         }
0168     }
0169     if (orangefs_inode->attr_valid & ATTR_CTIME)
0170         attrs->mask |= ORANGEFS_ATTR_SYS_CTIME;
0171 
0172     /*
0173      * ORANGEFS cannot set size with a setattr operation. Probably not
0174      * likely to be requested through the VFS, but just in case, don't
0175      * worry about ATTR_SIZE
0176      */
0177 
0178     if (orangefs_inode->attr_valid & ATTR_MODE) {
0179         attrs->perms = ORANGEFS_util_translate_mode(inode->i_mode);
0180         attrs->mask |= ORANGEFS_ATTR_SYS_PERM;
0181     }
0182 }
0183 
0184 static int orangefs_inode_type(enum orangefs_ds_type objtype)
0185 {
0186     if (objtype == ORANGEFS_TYPE_METAFILE)
0187         return S_IFREG;
0188     else if (objtype == ORANGEFS_TYPE_DIRECTORY)
0189         return S_IFDIR;
0190     else if (objtype == ORANGEFS_TYPE_SYMLINK)
0191         return S_IFLNK;
0192     else
0193         return -1;
0194 }
0195 
0196 static void orangefs_make_bad_inode(struct inode *inode)
0197 {
0198     if (is_root_handle(inode)) {
0199         /*
0200          * if this occurs, the pvfs2-client-core was killed but we
0201          * can't afford to lose the inode operations and such
0202          * associated with the root handle in any case.
0203          */
0204         gossip_debug(GOSSIP_UTILS_DEBUG,
0205                  "*** NOT making bad root inode %pU\n",
0206                  get_khandle_from_ino(inode));
0207     } else {
0208         gossip_debug(GOSSIP_UTILS_DEBUG,
0209                  "*** making bad inode %pU\n",
0210                  get_khandle_from_ino(inode));
0211         make_bad_inode(inode);
0212     }
0213 }
0214 
0215 static int orangefs_inode_is_stale(struct inode *inode,
0216     struct ORANGEFS_sys_attr_s *attrs, char *link_target)
0217 {
0218     struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
0219     int type = orangefs_inode_type(attrs->objtype);
0220     /*
0221      * If the inode type or symlink target have changed then this
0222      * inode is stale.
0223      */
0224     if (type == -1 || inode_wrong_type(inode, type)) {
0225         orangefs_make_bad_inode(inode);
0226         return 1;
0227     }
0228     if (type == S_IFLNK && strncmp(orangefs_inode->link_target,
0229         link_target, ORANGEFS_NAME_MAX)) {
0230         orangefs_make_bad_inode(inode);
0231         return 1;
0232     }
0233     return 0;
0234 }
0235 
0236 int orangefs_inode_getattr(struct inode *inode, int flags)
0237 {
0238     struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
0239     struct orangefs_kernel_op_s *new_op;
0240     loff_t inode_size;
0241     int ret, type;
0242 
0243     gossip_debug(GOSSIP_UTILS_DEBUG, "%s: called on inode %pU flags %d\n",
0244         __func__, get_khandle_from_ino(inode), flags);
0245 
0246 again:
0247     spin_lock(&inode->i_lock);
0248     /* Must have all the attributes in the mask and be within cache time. */
0249     if ((!flags && time_before(jiffies, orangefs_inode->getattr_time)) ||
0250         orangefs_inode->attr_valid || inode->i_state & I_DIRTY_PAGES) {
0251         if (orangefs_inode->attr_valid) {
0252             spin_unlock(&inode->i_lock);
0253             write_inode_now(inode, 1);
0254             goto again;
0255         }
0256         spin_unlock(&inode->i_lock);
0257         return 0;
0258     }
0259     spin_unlock(&inode->i_lock);
0260 
0261     new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR);
0262     if (!new_op)
0263         return -ENOMEM;
0264     new_op->upcall.req.getattr.refn = orangefs_inode->refn;
0265     /*
0266      * Size is the hardest attribute to get.  The incremental cost of any
0267      * other attribute is essentially zero.
0268      */
0269     if (flags)
0270         new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_ALL_NOHINT;
0271     else
0272         new_op->upcall.req.getattr.mask =
0273             ORANGEFS_ATTR_SYS_ALL_NOHINT & ~ORANGEFS_ATTR_SYS_SIZE;
0274 
0275     ret = service_operation(new_op, __func__,
0276         get_interruptible_flag(inode));
0277     if (ret != 0)
0278         goto out;
0279 
0280 again2:
0281     spin_lock(&inode->i_lock);
0282     /* Must have all the attributes in the mask and be within cache time. */
0283     if ((!flags && time_before(jiffies, orangefs_inode->getattr_time)) ||
0284         orangefs_inode->attr_valid || inode->i_state & I_DIRTY_PAGES) {
0285         if (orangefs_inode->attr_valid) {
0286             spin_unlock(&inode->i_lock);
0287             write_inode_now(inode, 1);
0288             goto again2;
0289         }
0290         if (inode->i_state & I_DIRTY_PAGES) {
0291             ret = 0;
0292             goto out_unlock;
0293         }
0294         gossip_debug(GOSSIP_UTILS_DEBUG, "%s: in cache or dirty\n",
0295             __func__);
0296         ret = 0;
0297         goto out_unlock;
0298     }
0299 
0300     if (!(flags & ORANGEFS_GETATTR_NEW)) {
0301         ret = orangefs_inode_is_stale(inode,
0302             &new_op->downcall.resp.getattr.attributes,
0303             new_op->downcall.resp.getattr.link_target);
0304         if (ret) {
0305             ret = -ESTALE;
0306             goto out_unlock;
0307         }
0308     }
0309 
0310     type = orangefs_inode_type(new_op->
0311         downcall.resp.getattr.attributes.objtype);
0312     switch (type) {
0313     case S_IFREG:
0314         inode->i_flags = orangefs_inode_flags(&new_op->
0315             downcall.resp.getattr.attributes);
0316         if (flags) {
0317             inode_size = (loff_t)new_op->
0318                 downcall.resp.getattr.attributes.size;
0319             inode->i_size = inode_size;
0320             inode->i_blkbits = ffs(new_op->downcall.resp.getattr.
0321                 attributes.blksize);
0322             inode->i_bytes = inode_size;
0323             inode->i_blocks =
0324                 (inode_size + 512 - inode_size % 512)/512;
0325         }
0326         break;
0327     case S_IFDIR:
0328         if (flags) {
0329             inode->i_size = PAGE_SIZE;
0330             inode_set_bytes(inode, inode->i_size);
0331         }
0332         set_nlink(inode, 1);
0333         break;
0334     case S_IFLNK:
0335         if (flags & ORANGEFS_GETATTR_NEW) {
0336             inode->i_size = (loff_t)strlen(new_op->
0337                 downcall.resp.getattr.link_target);
0338             ret = strscpy(orangefs_inode->link_target,
0339                 new_op->downcall.resp.getattr.link_target,
0340                 ORANGEFS_NAME_MAX);
0341             if (ret == -E2BIG) {
0342                 ret = -EIO;
0343                 goto out_unlock;
0344             }
0345             inode->i_link = orangefs_inode->link_target;
0346         }
0347         break;
0348     /* i.e. -1 */
0349     default:
0350         /* XXX: ESTALE?  This is what is done if it is not new. */
0351         orangefs_make_bad_inode(inode);
0352         ret = -ESTALE;
0353         goto out_unlock;
0354     }
0355 
0356     inode->i_uid = make_kuid(&init_user_ns, new_op->
0357         downcall.resp.getattr.attributes.owner);
0358     inode->i_gid = make_kgid(&init_user_ns, new_op->
0359         downcall.resp.getattr.attributes.group);
0360     inode->i_atime.tv_sec = (time64_t)new_op->
0361         downcall.resp.getattr.attributes.atime;
0362     inode->i_mtime.tv_sec = (time64_t)new_op->
0363         downcall.resp.getattr.attributes.mtime;
0364     inode->i_ctime.tv_sec = (time64_t)new_op->
0365         downcall.resp.getattr.attributes.ctime;
0366     inode->i_atime.tv_nsec = 0;
0367     inode->i_mtime.tv_nsec = 0;
0368     inode->i_ctime.tv_nsec = 0;
0369 
0370     /* special case: mark the root inode as sticky */
0371     inode->i_mode = type | (is_root_handle(inode) ? S_ISVTX : 0) |
0372         orangefs_inode_perms(&new_op->downcall.resp.getattr.attributes);
0373 
0374     orangefs_inode->getattr_time = jiffies +
0375         orangefs_getattr_timeout_msecs*HZ/1000;
0376     ret = 0;
0377 out_unlock:
0378     spin_unlock(&inode->i_lock);
0379 out:
0380     op_release(new_op);
0381     return ret;
0382 }
0383 
0384 int orangefs_inode_check_changed(struct inode *inode)
0385 {
0386     struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
0387     struct orangefs_kernel_op_s *new_op;
0388     int ret;
0389 
0390     gossip_debug(GOSSIP_UTILS_DEBUG, "%s: called on inode %pU\n", __func__,
0391         get_khandle_from_ino(inode));
0392 
0393     new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR);
0394     if (!new_op)
0395         return -ENOMEM;
0396     new_op->upcall.req.getattr.refn = orangefs_inode->refn;
0397     new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_TYPE |
0398         ORANGEFS_ATTR_SYS_LNK_TARGET;
0399 
0400     ret = service_operation(new_op, __func__,
0401         get_interruptible_flag(inode));
0402     if (ret != 0)
0403         goto out;
0404 
0405     ret = orangefs_inode_is_stale(inode,
0406         &new_op->downcall.resp.getattr.attributes,
0407         new_op->downcall.resp.getattr.link_target);
0408 out:
0409     op_release(new_op);
0410     return ret;
0411 }
0412 
0413 /*
0414  * issues a orangefs setattr request to make sure the new attribute values
0415  * take effect if successful.  returns 0 on success; -errno otherwise
0416  */
0417 int orangefs_inode_setattr(struct inode *inode)
0418 {
0419     struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
0420     struct orangefs_kernel_op_s *new_op;
0421     int ret;
0422 
0423     new_op = op_alloc(ORANGEFS_VFS_OP_SETATTR);
0424     if (!new_op)
0425         return -ENOMEM;
0426 
0427     spin_lock(&inode->i_lock);
0428     new_op->upcall.uid = from_kuid(&init_user_ns, orangefs_inode->attr_uid);
0429     new_op->upcall.gid = from_kgid(&init_user_ns, orangefs_inode->attr_gid);
0430     new_op->upcall.req.setattr.refn = orangefs_inode->refn;
0431     copy_attributes_from_inode(inode,
0432         &new_op->upcall.req.setattr.attributes);
0433     orangefs_inode->attr_valid = 0;
0434     if (!new_op->upcall.req.setattr.attributes.mask) {
0435         spin_unlock(&inode->i_lock);
0436         op_release(new_op);
0437         return 0;
0438     }
0439     spin_unlock(&inode->i_lock);
0440 
0441     ret = service_operation(new_op, __func__,
0442         get_interruptible_flag(inode) | ORANGEFS_OP_WRITEBACK);
0443     gossip_debug(GOSSIP_UTILS_DEBUG,
0444         "orangefs_inode_setattr: returning %d\n", ret);
0445     if (ret)
0446         orangefs_make_bad_inode(inode);
0447 
0448     op_release(new_op);
0449 
0450     if (ret == 0)
0451         orangefs_inode->getattr_time = jiffies - 1;
0452     return ret;
0453 }
0454 
0455 /*
0456  * The following is a very dirty hack that is now a permanent part of the
0457  * ORANGEFS protocol. See protocol.h for more error definitions.
0458  */
0459 
0460 /* The order matches include/orangefs-types.h in the OrangeFS source. */
0461 static int PINT_errno_mapping[] = {
0462     0, EPERM, ENOENT, EINTR, EIO, ENXIO, EBADF, EAGAIN, ENOMEM,
0463     EFAULT, EBUSY, EEXIST, ENODEV, ENOTDIR, EISDIR, EINVAL, EMFILE,
0464     EFBIG, ENOSPC, EROFS, EMLINK, EPIPE, EDEADLK, ENAMETOOLONG,
0465     ENOLCK, ENOSYS, ENOTEMPTY, ELOOP, EWOULDBLOCK, ENOMSG, EUNATCH,
0466     EBADR, EDEADLOCK, ENODATA, ETIME, ENONET, EREMOTE, ECOMM,
0467     EPROTO, EBADMSG, EOVERFLOW, ERESTART, EMSGSIZE, EPROTOTYPE,
0468     ENOPROTOOPT, EPROTONOSUPPORT, EOPNOTSUPP, EADDRINUSE,
0469     EADDRNOTAVAIL, ENETDOWN, ENETUNREACH, ENETRESET, ENOBUFS,
0470     ETIMEDOUT, ECONNREFUSED, EHOSTDOWN, EHOSTUNREACH, EALREADY,
0471     EACCES, ECONNRESET, ERANGE
0472 };
0473 
0474 int orangefs_normalize_to_errno(__s32 error_code)
0475 {
0476     __u32 i;
0477 
0478     /* Success */
0479     if (error_code == 0) {
0480         return 0;
0481     /*
0482      * This shouldn't ever happen. If it does it should be fixed on the
0483      * server.
0484      */
0485     } else if (error_code > 0) {
0486         gossip_err("orangefs: error status received.\n");
0487         gossip_err("orangefs: assuming error code is inverted.\n");
0488         error_code = -error_code;
0489     }
0490 
0491     /*
0492      * XXX: This is very bad since error codes from ORANGEFS may not be
0493      * suitable for return into userspace.
0494      */
0495 
0496     /*
0497      * Convert ORANGEFS error values into errno values suitable for return
0498      * from the kernel.
0499      */
0500     if ((-error_code) & ORANGEFS_NON_ERRNO_ERROR_BIT) {
0501         if (((-error_code) &
0502             (ORANGEFS_ERROR_NUMBER_BITS|ORANGEFS_NON_ERRNO_ERROR_BIT|
0503             ORANGEFS_ERROR_BIT)) == ORANGEFS_ECANCEL) {
0504             /*
0505              * cancellation error codes generally correspond to
0506              * a timeout from the client's perspective
0507              */
0508             error_code = -ETIMEDOUT;
0509         } else {
0510             /* assume a default error code */
0511             gossip_err("%s: bad error code :%d:.\n",
0512                 __func__,
0513                 error_code);
0514             error_code = -EINVAL;
0515         }
0516 
0517     /* Convert ORANGEFS encoded errno values into regular errno values. */
0518     } else if ((-error_code) & ORANGEFS_ERROR_BIT) {
0519         i = (-error_code) & ~(ORANGEFS_ERROR_BIT|ORANGEFS_ERROR_CLASS_BITS);
0520         if (i < ARRAY_SIZE(PINT_errno_mapping))
0521             error_code = -PINT_errno_mapping[i];
0522         else
0523             error_code = -EINVAL;
0524 
0525     /*
0526      * Only ORANGEFS protocol error codes should ever come here. Otherwise
0527      * there is a bug somewhere.
0528      */
0529     } else {
0530         gossip_err("%s: unknown error code.\n", __func__);
0531         error_code = -EINVAL;
0532     }
0533     return error_code;
0534 }
0535 
0536 #define NUM_MODES 11
0537 __s32 ORANGEFS_util_translate_mode(int mode)
0538 {
0539     int ret = 0;
0540     int i = 0;
0541     static int modes[NUM_MODES] = {
0542         S_IXOTH, S_IWOTH, S_IROTH,
0543         S_IXGRP, S_IWGRP, S_IRGRP,
0544         S_IXUSR, S_IWUSR, S_IRUSR,
0545         S_ISGID, S_ISUID
0546     };
0547     static int orangefs_modes[NUM_MODES] = {
0548         ORANGEFS_O_EXECUTE, ORANGEFS_O_WRITE, ORANGEFS_O_READ,
0549         ORANGEFS_G_EXECUTE, ORANGEFS_G_WRITE, ORANGEFS_G_READ,
0550         ORANGEFS_U_EXECUTE, ORANGEFS_U_WRITE, ORANGEFS_U_READ,
0551         ORANGEFS_G_SGID, ORANGEFS_U_SUID
0552     };
0553 
0554     for (i = 0; i < NUM_MODES; i++)
0555         if (mode & modes[i])
0556             ret |= orangefs_modes[i];
0557 
0558     return ret;
0559 }
0560 #undef NUM_MODES