0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016 #include <linux/capability.h>
0017 #include <linux/init.h>
0018 #include <linux/pagemap.h>
0019 #include <linux/file.h>
0020 #include <linux/mount.h>
0021 #include <linux/fs_context.h>
0022 #include <linux/namei.h>
0023 #include <linux/sysctl.h>
0024 #include <linux/poll.h>
0025 #include <linux/mqueue.h>
0026 #include <linux/msg.h>
0027 #include <linux/skbuff.h>
0028 #include <linux/vmalloc.h>
0029 #include <linux/netlink.h>
0030 #include <linux/syscalls.h>
0031 #include <linux/audit.h>
0032 #include <linux/signal.h>
0033 #include <linux/mutex.h>
0034 #include <linux/nsproxy.h>
0035 #include <linux/pid.h>
0036 #include <linux/ipc_namespace.h>
0037 #include <linux/user_namespace.h>
0038 #include <linux/slab.h>
0039 #include <linux/sched/wake_q.h>
0040 #include <linux/sched/signal.h>
0041 #include <linux/sched/user.h>
0042
0043 #include <net/sock.h>
0044 #include "util.h"
0045
0046 struct mqueue_fs_context {
0047 struct ipc_namespace *ipc_ns;
0048 bool newns;
0049 };
0050
0051 #define MQUEUE_MAGIC 0x19800202
0052 #define DIRENT_SIZE 20
0053 #define FILENT_SIZE 80
0054
0055 #define SEND 0
0056 #define RECV 1
0057
0058 #define STATE_NONE 0
0059 #define STATE_READY 1
0060
0061 struct posix_msg_tree_node {
0062 struct rb_node rb_node;
0063 struct list_head msg_list;
0064 int priority;
0065 };
0066
0067
0068
0069
0070
0071
0072
0073
0074
0075
0076
0077
0078
0079
0080
0081
0082
0083
0084
0085
0086
0087
0088
0089
0090
0091
0092
0093
0094
0095
0096
0097
0098
0099
0100
0101
0102
0103
0104
0105
0106
0107
0108
0109
0110
0111
0112
0113
0114
0115
0116
0117
0118
0119
0120
0121
0122
0123
0124
0125
0126
0127 struct ext_wait_queue {
0128 struct task_struct *task;
0129 struct list_head list;
0130 struct msg_msg *msg;
0131 int state;
0132 };
0133
0134 struct mqueue_inode_info {
0135 spinlock_t lock;
0136 struct inode vfs_inode;
0137 wait_queue_head_t wait_q;
0138
0139 struct rb_root msg_tree;
0140 struct rb_node *msg_tree_rightmost;
0141 struct posix_msg_tree_node *node_cache;
0142 struct mq_attr attr;
0143
0144 struct sigevent notify;
0145 struct pid *notify_owner;
0146 u32 notify_self_exec_id;
0147 struct user_namespace *notify_user_ns;
0148 struct ucounts *ucounts;
0149 struct sock *notify_sock;
0150 struct sk_buff *notify_cookie;
0151
0152
0153 struct ext_wait_queue e_wait_q[2];
0154
0155 unsigned long qsize;
0156 };
0157
0158 static struct file_system_type mqueue_fs_type;
0159 static const struct inode_operations mqueue_dir_inode_operations;
0160 static const struct file_operations mqueue_file_operations;
0161 static const struct super_operations mqueue_super_ops;
0162 static const struct fs_context_operations mqueue_fs_context_ops;
0163 static void remove_notification(struct mqueue_inode_info *info);
0164
0165 static struct kmem_cache *mqueue_inode_cachep;
0166
0167 static inline struct mqueue_inode_info *MQUEUE_I(struct inode *inode)
0168 {
0169 return container_of(inode, struct mqueue_inode_info, vfs_inode);
0170 }
0171
0172
0173
0174
0175 static inline struct ipc_namespace *__get_ns_from_inode(struct inode *inode)
0176 {
0177 return get_ipc_ns(inode->i_sb->s_fs_info);
0178 }
0179
0180 static struct ipc_namespace *get_ns_from_inode(struct inode *inode)
0181 {
0182 struct ipc_namespace *ns;
0183
0184 spin_lock(&mq_lock);
0185 ns = __get_ns_from_inode(inode);
0186 spin_unlock(&mq_lock);
0187 return ns;
0188 }
0189
0190
0191 static int msg_insert(struct msg_msg *msg, struct mqueue_inode_info *info)
0192 {
0193 struct rb_node **p, *parent = NULL;
0194 struct posix_msg_tree_node *leaf;
0195 bool rightmost = true;
0196
0197 p = &info->msg_tree.rb_node;
0198 while (*p) {
0199 parent = *p;
0200 leaf = rb_entry(parent, struct posix_msg_tree_node, rb_node);
0201
0202 if (likely(leaf->priority == msg->m_type))
0203 goto insert_msg;
0204 else if (msg->m_type < leaf->priority) {
0205 p = &(*p)->rb_left;
0206 rightmost = false;
0207 } else
0208 p = &(*p)->rb_right;
0209 }
0210 if (info->node_cache) {
0211 leaf = info->node_cache;
0212 info->node_cache = NULL;
0213 } else {
0214 leaf = kmalloc(sizeof(*leaf), GFP_ATOMIC);
0215 if (!leaf)
0216 return -ENOMEM;
0217 INIT_LIST_HEAD(&leaf->msg_list);
0218 }
0219 leaf->priority = msg->m_type;
0220
0221 if (rightmost)
0222 info->msg_tree_rightmost = &leaf->rb_node;
0223
0224 rb_link_node(&leaf->rb_node, parent, p);
0225 rb_insert_color(&leaf->rb_node, &info->msg_tree);
0226 insert_msg:
0227 info->attr.mq_curmsgs++;
0228 info->qsize += msg->m_ts;
0229 list_add_tail(&msg->m_list, &leaf->msg_list);
0230 return 0;
0231 }
0232
0233 static inline void msg_tree_erase(struct posix_msg_tree_node *leaf,
0234 struct mqueue_inode_info *info)
0235 {
0236 struct rb_node *node = &leaf->rb_node;
0237
0238 if (info->msg_tree_rightmost == node)
0239 info->msg_tree_rightmost = rb_prev(node);
0240
0241 rb_erase(node, &info->msg_tree);
0242 if (info->node_cache)
0243 kfree(leaf);
0244 else
0245 info->node_cache = leaf;
0246 }
0247
0248 static inline struct msg_msg *msg_get(struct mqueue_inode_info *info)
0249 {
0250 struct rb_node *parent = NULL;
0251 struct posix_msg_tree_node *leaf;
0252 struct msg_msg *msg;
0253
0254 try_again:
0255
0256
0257
0258
0259
0260 parent = info->msg_tree_rightmost;
0261 if (!parent) {
0262 if (info->attr.mq_curmsgs) {
0263 pr_warn_once("Inconsistency in POSIX message queue, "
0264 "no tree element, but supposedly messages "
0265 "should exist!\n");
0266 info->attr.mq_curmsgs = 0;
0267 }
0268 return NULL;
0269 }
0270 leaf = rb_entry(parent, struct posix_msg_tree_node, rb_node);
0271 if (unlikely(list_empty(&leaf->msg_list))) {
0272 pr_warn_once("Inconsistency in POSIX message queue, "
0273 "empty leaf node but we haven't implemented "
0274 "lazy leaf delete!\n");
0275 msg_tree_erase(leaf, info);
0276 goto try_again;
0277 } else {
0278 msg = list_first_entry(&leaf->msg_list,
0279 struct msg_msg, m_list);
0280 list_del(&msg->m_list);
0281 if (list_empty(&leaf->msg_list)) {
0282 msg_tree_erase(leaf, info);
0283 }
0284 }
0285 info->attr.mq_curmsgs--;
0286 info->qsize -= msg->m_ts;
0287 return msg;
0288 }
0289
0290 static struct inode *mqueue_get_inode(struct super_block *sb,
0291 struct ipc_namespace *ipc_ns, umode_t mode,
0292 struct mq_attr *attr)
0293 {
0294 struct inode *inode;
0295 int ret = -ENOMEM;
0296
0297 inode = new_inode(sb);
0298 if (!inode)
0299 goto err;
0300
0301 inode->i_ino = get_next_ino();
0302 inode->i_mode = mode;
0303 inode->i_uid = current_fsuid();
0304 inode->i_gid = current_fsgid();
0305 inode->i_mtime = inode->i_ctime = inode->i_atime = current_time(inode);
0306
0307 if (S_ISREG(mode)) {
0308 struct mqueue_inode_info *info;
0309 unsigned long mq_bytes, mq_treesize;
0310
0311 inode->i_fop = &mqueue_file_operations;
0312 inode->i_size = FILENT_SIZE;
0313
0314 info = MQUEUE_I(inode);
0315 spin_lock_init(&info->lock);
0316 init_waitqueue_head(&info->wait_q);
0317 INIT_LIST_HEAD(&info->e_wait_q[0].list);
0318 INIT_LIST_HEAD(&info->e_wait_q[1].list);
0319 info->notify_owner = NULL;
0320 info->notify_user_ns = NULL;
0321 info->qsize = 0;
0322 info->ucounts = NULL;
0323 info->msg_tree = RB_ROOT;
0324 info->msg_tree_rightmost = NULL;
0325 info->node_cache = NULL;
0326 memset(&info->attr, 0, sizeof(info->attr));
0327 info->attr.mq_maxmsg = min(ipc_ns->mq_msg_max,
0328 ipc_ns->mq_msg_default);
0329 info->attr.mq_msgsize = min(ipc_ns->mq_msgsize_max,
0330 ipc_ns->mq_msgsize_default);
0331 if (attr) {
0332 info->attr.mq_maxmsg = attr->mq_maxmsg;
0333 info->attr.mq_msgsize = attr->mq_msgsize;
0334 }
0335
0336
0337
0338
0339
0340
0341
0342
0343
0344
0345
0346
0347
0348
0349 ret = -EINVAL;
0350 if (info->attr.mq_maxmsg <= 0 || info->attr.mq_msgsize <= 0)
0351 goto out_inode;
0352 if (capable(CAP_SYS_RESOURCE)) {
0353 if (info->attr.mq_maxmsg > HARD_MSGMAX ||
0354 info->attr.mq_msgsize > HARD_MSGSIZEMAX)
0355 goto out_inode;
0356 } else {
0357 if (info->attr.mq_maxmsg > ipc_ns->mq_msg_max ||
0358 info->attr.mq_msgsize > ipc_ns->mq_msgsize_max)
0359 goto out_inode;
0360 }
0361 ret = -EOVERFLOW;
0362
0363 if (info->attr.mq_msgsize > ULONG_MAX/info->attr.mq_maxmsg)
0364 goto out_inode;
0365 mq_treesize = info->attr.mq_maxmsg * sizeof(struct msg_msg) +
0366 min_t(unsigned int, info->attr.mq_maxmsg, MQ_PRIO_MAX) *
0367 sizeof(struct posix_msg_tree_node);
0368 mq_bytes = info->attr.mq_maxmsg * info->attr.mq_msgsize;
0369 if (mq_bytes + mq_treesize < mq_bytes)
0370 goto out_inode;
0371 mq_bytes += mq_treesize;
0372 info->ucounts = get_ucounts(current_ucounts());
0373 if (info->ucounts) {
0374 long msgqueue;
0375
0376 spin_lock(&mq_lock);
0377 msgqueue = inc_rlimit_ucounts(info->ucounts, UCOUNT_RLIMIT_MSGQUEUE, mq_bytes);
0378 if (msgqueue == LONG_MAX || msgqueue > rlimit(RLIMIT_MSGQUEUE)) {
0379 dec_rlimit_ucounts(info->ucounts, UCOUNT_RLIMIT_MSGQUEUE, mq_bytes);
0380 spin_unlock(&mq_lock);
0381 put_ucounts(info->ucounts);
0382 info->ucounts = NULL;
0383
0384 ret = -EMFILE;
0385 goto out_inode;
0386 }
0387 spin_unlock(&mq_lock);
0388 }
0389 } else if (S_ISDIR(mode)) {
0390 inc_nlink(inode);
0391
0392 inode->i_size = 2 * DIRENT_SIZE;
0393 inode->i_op = &mqueue_dir_inode_operations;
0394 inode->i_fop = &simple_dir_operations;
0395 }
0396
0397 return inode;
0398 out_inode:
0399 iput(inode);
0400 err:
0401 return ERR_PTR(ret);
0402 }
0403
0404 static int mqueue_fill_super(struct super_block *sb, struct fs_context *fc)
0405 {
0406 struct inode *inode;
0407 struct ipc_namespace *ns = sb->s_fs_info;
0408
0409 sb->s_iflags |= SB_I_NOEXEC | SB_I_NODEV;
0410 sb->s_blocksize = PAGE_SIZE;
0411 sb->s_blocksize_bits = PAGE_SHIFT;
0412 sb->s_magic = MQUEUE_MAGIC;
0413 sb->s_op = &mqueue_super_ops;
0414
0415 inode = mqueue_get_inode(sb, ns, S_IFDIR | S_ISVTX | S_IRWXUGO, NULL);
0416 if (IS_ERR(inode))
0417 return PTR_ERR(inode);
0418
0419 sb->s_root = d_make_root(inode);
0420 if (!sb->s_root)
0421 return -ENOMEM;
0422 return 0;
0423 }
0424
0425 static int mqueue_get_tree(struct fs_context *fc)
0426 {
0427 struct mqueue_fs_context *ctx = fc->fs_private;
0428
0429
0430
0431
0432
0433 if (ctx->newns) {
0434 fc->s_fs_info = ctx->ipc_ns;
0435 return get_tree_nodev(fc, mqueue_fill_super);
0436 }
0437 return get_tree_keyed(fc, mqueue_fill_super, ctx->ipc_ns);
0438 }
0439
0440 static void mqueue_fs_context_free(struct fs_context *fc)
0441 {
0442 struct mqueue_fs_context *ctx = fc->fs_private;
0443
0444 put_ipc_ns(ctx->ipc_ns);
0445 kfree(ctx);
0446 }
0447
0448 static int mqueue_init_fs_context(struct fs_context *fc)
0449 {
0450 struct mqueue_fs_context *ctx;
0451
0452 ctx = kzalloc(sizeof(struct mqueue_fs_context), GFP_KERNEL);
0453 if (!ctx)
0454 return -ENOMEM;
0455
0456 ctx->ipc_ns = get_ipc_ns(current->nsproxy->ipc_ns);
0457 put_user_ns(fc->user_ns);
0458 fc->user_ns = get_user_ns(ctx->ipc_ns->user_ns);
0459 fc->fs_private = ctx;
0460 fc->ops = &mqueue_fs_context_ops;
0461 return 0;
0462 }
0463
0464
0465
0466
0467
0468 static struct vfsmount *mq_create_mount(struct ipc_namespace *ns)
0469 {
0470 struct mqueue_fs_context *ctx;
0471 struct fs_context *fc;
0472 struct vfsmount *mnt;
0473
0474 fc = fs_context_for_mount(&mqueue_fs_type, SB_KERNMOUNT);
0475 if (IS_ERR(fc))
0476 return ERR_CAST(fc);
0477
0478 ctx = fc->fs_private;
0479 ctx->newns = true;
0480 put_ipc_ns(ctx->ipc_ns);
0481 ctx->ipc_ns = get_ipc_ns(ns);
0482 put_user_ns(fc->user_ns);
0483 fc->user_ns = get_user_ns(ctx->ipc_ns->user_ns);
0484
0485 mnt = fc_mount(fc);
0486 put_fs_context(fc);
0487 return mnt;
0488 }
0489
0490 static void init_once(void *foo)
0491 {
0492 struct mqueue_inode_info *p = foo;
0493
0494 inode_init_once(&p->vfs_inode);
0495 }
0496
0497 static struct inode *mqueue_alloc_inode(struct super_block *sb)
0498 {
0499 struct mqueue_inode_info *ei;
0500
0501 ei = alloc_inode_sb(sb, mqueue_inode_cachep, GFP_KERNEL);
0502 if (!ei)
0503 return NULL;
0504 return &ei->vfs_inode;
0505 }
0506
0507 static void mqueue_free_inode(struct inode *inode)
0508 {
0509 kmem_cache_free(mqueue_inode_cachep, MQUEUE_I(inode));
0510 }
0511
0512 static void mqueue_evict_inode(struct inode *inode)
0513 {
0514 struct mqueue_inode_info *info;
0515 struct ipc_namespace *ipc_ns;
0516 struct msg_msg *msg, *nmsg;
0517 LIST_HEAD(tmp_msg);
0518
0519 clear_inode(inode);
0520
0521 if (S_ISDIR(inode->i_mode))
0522 return;
0523
0524 ipc_ns = get_ns_from_inode(inode);
0525 info = MQUEUE_I(inode);
0526 spin_lock(&info->lock);
0527 while ((msg = msg_get(info)) != NULL)
0528 list_add_tail(&msg->m_list, &tmp_msg);
0529 kfree(info->node_cache);
0530 spin_unlock(&info->lock);
0531
0532 list_for_each_entry_safe(msg, nmsg, &tmp_msg, m_list) {
0533 list_del(&msg->m_list);
0534 free_msg(msg);
0535 }
0536
0537 if (info->ucounts) {
0538 unsigned long mq_bytes, mq_treesize;
0539
0540
0541 mq_treesize = info->attr.mq_maxmsg * sizeof(struct msg_msg) +
0542 min_t(unsigned int, info->attr.mq_maxmsg, MQ_PRIO_MAX) *
0543 sizeof(struct posix_msg_tree_node);
0544
0545 mq_bytes = mq_treesize + (info->attr.mq_maxmsg *
0546 info->attr.mq_msgsize);
0547
0548 spin_lock(&mq_lock);
0549 dec_rlimit_ucounts(info->ucounts, UCOUNT_RLIMIT_MSGQUEUE, mq_bytes);
0550
0551
0552
0553
0554
0555
0556 if (ipc_ns)
0557 ipc_ns->mq_queues_count--;
0558 spin_unlock(&mq_lock);
0559 put_ucounts(info->ucounts);
0560 info->ucounts = NULL;
0561 }
0562 if (ipc_ns)
0563 put_ipc_ns(ipc_ns);
0564 }
0565
0566 static int mqueue_create_attr(struct dentry *dentry, umode_t mode, void *arg)
0567 {
0568 struct inode *dir = dentry->d_parent->d_inode;
0569 struct inode *inode;
0570 struct mq_attr *attr = arg;
0571 int error;
0572 struct ipc_namespace *ipc_ns;
0573
0574 spin_lock(&mq_lock);
0575 ipc_ns = __get_ns_from_inode(dir);
0576 if (!ipc_ns) {
0577 error = -EACCES;
0578 goto out_unlock;
0579 }
0580
0581 if (ipc_ns->mq_queues_count >= ipc_ns->mq_queues_max &&
0582 !capable(CAP_SYS_RESOURCE)) {
0583 error = -ENOSPC;
0584 goto out_unlock;
0585 }
0586 ipc_ns->mq_queues_count++;
0587 spin_unlock(&mq_lock);
0588
0589 inode = mqueue_get_inode(dir->i_sb, ipc_ns, mode, attr);
0590 if (IS_ERR(inode)) {
0591 error = PTR_ERR(inode);
0592 spin_lock(&mq_lock);
0593 ipc_ns->mq_queues_count--;
0594 goto out_unlock;
0595 }
0596
0597 put_ipc_ns(ipc_ns);
0598 dir->i_size += DIRENT_SIZE;
0599 dir->i_ctime = dir->i_mtime = dir->i_atime = current_time(dir);
0600
0601 d_instantiate(dentry, inode);
0602 dget(dentry);
0603 return 0;
0604 out_unlock:
0605 spin_unlock(&mq_lock);
0606 if (ipc_ns)
0607 put_ipc_ns(ipc_ns);
0608 return error;
0609 }
0610
0611 static int mqueue_create(struct user_namespace *mnt_userns, struct inode *dir,
0612 struct dentry *dentry, umode_t mode, bool excl)
0613 {
0614 return mqueue_create_attr(dentry, mode, NULL);
0615 }
0616
0617 static int mqueue_unlink(struct inode *dir, struct dentry *dentry)
0618 {
0619 struct inode *inode = d_inode(dentry);
0620
0621 dir->i_ctime = dir->i_mtime = dir->i_atime = current_time(dir);
0622 dir->i_size -= DIRENT_SIZE;
0623 drop_nlink(inode);
0624 dput(dentry);
0625 return 0;
0626 }
0627
0628
0629
0630
0631
0632
0633
0634
0635 static ssize_t mqueue_read_file(struct file *filp, char __user *u_data,
0636 size_t count, loff_t *off)
0637 {
0638 struct mqueue_inode_info *info = MQUEUE_I(file_inode(filp));
0639 char buffer[FILENT_SIZE];
0640 ssize_t ret;
0641
0642 spin_lock(&info->lock);
0643 snprintf(buffer, sizeof(buffer),
0644 "QSIZE:%-10lu NOTIFY:%-5d SIGNO:%-5d NOTIFY_PID:%-6d\n",
0645 info->qsize,
0646 info->notify_owner ? info->notify.sigev_notify : 0,
0647 (info->notify_owner &&
0648 info->notify.sigev_notify == SIGEV_SIGNAL) ?
0649 info->notify.sigev_signo : 0,
0650 pid_vnr(info->notify_owner));
0651 spin_unlock(&info->lock);
0652 buffer[sizeof(buffer)-1] = '\0';
0653
0654 ret = simple_read_from_buffer(u_data, count, off, buffer,
0655 strlen(buffer));
0656 if (ret <= 0)
0657 return ret;
0658
0659 file_inode(filp)->i_atime = file_inode(filp)->i_ctime = current_time(file_inode(filp));
0660 return ret;
0661 }
0662
0663 static int mqueue_flush_file(struct file *filp, fl_owner_t id)
0664 {
0665 struct mqueue_inode_info *info = MQUEUE_I(file_inode(filp));
0666
0667 spin_lock(&info->lock);
0668 if (task_tgid(current) == info->notify_owner)
0669 remove_notification(info);
0670
0671 spin_unlock(&info->lock);
0672 return 0;
0673 }
0674
0675 static __poll_t mqueue_poll_file(struct file *filp, struct poll_table_struct *poll_tab)
0676 {
0677 struct mqueue_inode_info *info = MQUEUE_I(file_inode(filp));
0678 __poll_t retval = 0;
0679
0680 poll_wait(filp, &info->wait_q, poll_tab);
0681
0682 spin_lock(&info->lock);
0683 if (info->attr.mq_curmsgs)
0684 retval = EPOLLIN | EPOLLRDNORM;
0685
0686 if (info->attr.mq_curmsgs < info->attr.mq_maxmsg)
0687 retval |= EPOLLOUT | EPOLLWRNORM;
0688 spin_unlock(&info->lock);
0689
0690 return retval;
0691 }
0692
0693
0694 static void wq_add(struct mqueue_inode_info *info, int sr,
0695 struct ext_wait_queue *ewp)
0696 {
0697 struct ext_wait_queue *walk;
0698
0699 list_for_each_entry(walk, &info->e_wait_q[sr].list, list) {
0700 if (walk->task->prio <= current->prio) {
0701 list_add_tail(&ewp->list, &walk->list);
0702 return;
0703 }
0704 }
0705 list_add_tail(&ewp->list, &info->e_wait_q[sr].list);
0706 }
0707
0708
0709
0710
0711
0712
0713 static int wq_sleep(struct mqueue_inode_info *info, int sr,
0714 ktime_t *timeout, struct ext_wait_queue *ewp)
0715 __releases(&info->lock)
0716 {
0717 int retval;
0718 signed long time;
0719
0720 wq_add(info, sr, ewp);
0721
0722 for (;;) {
0723
0724 __set_current_state(TASK_INTERRUPTIBLE);
0725
0726 spin_unlock(&info->lock);
0727 time = schedule_hrtimeout_range_clock(timeout, 0,
0728 HRTIMER_MODE_ABS, CLOCK_REALTIME);
0729
0730 if (READ_ONCE(ewp->state) == STATE_READY) {
0731
0732 smp_acquire__after_ctrl_dep();
0733 retval = 0;
0734 goto out;
0735 }
0736 spin_lock(&info->lock);
0737
0738
0739 if (READ_ONCE(ewp->state) == STATE_READY) {
0740 retval = 0;
0741 goto out_unlock;
0742 }
0743 if (signal_pending(current)) {
0744 retval = -ERESTARTSYS;
0745 break;
0746 }
0747 if (time == 0) {
0748 retval = -ETIMEDOUT;
0749 break;
0750 }
0751 }
0752 list_del(&ewp->list);
0753 out_unlock:
0754 spin_unlock(&info->lock);
0755 out:
0756 return retval;
0757 }
0758
0759
0760
0761
0762 static struct ext_wait_queue *wq_get_first_waiter(
0763 struct mqueue_inode_info *info, int sr)
0764 {
0765 struct list_head *ptr;
0766
0767 ptr = info->e_wait_q[sr].list.prev;
0768 if (ptr == &info->e_wait_q[sr].list)
0769 return NULL;
0770 return list_entry(ptr, struct ext_wait_queue, list);
0771 }
0772
0773
0774 static inline void set_cookie(struct sk_buff *skb, char code)
0775 {
0776 ((char *)skb->data)[NOTIFY_COOKIE_LEN-1] = code;
0777 }
0778
0779
0780
0781
0782 static void __do_notify(struct mqueue_inode_info *info)
0783 {
0784
0785
0786
0787
0788
0789 if (info->notify_owner &&
0790 info->attr.mq_curmsgs == 1) {
0791 switch (info->notify.sigev_notify) {
0792 case SIGEV_NONE:
0793 break;
0794 case SIGEV_SIGNAL: {
0795 struct kernel_siginfo sig_i;
0796 struct task_struct *task;
0797
0798
0799 if (!info->notify.sigev_signo)
0800 break;
0801
0802 clear_siginfo(&sig_i);
0803 sig_i.si_signo = info->notify.sigev_signo;
0804 sig_i.si_errno = 0;
0805 sig_i.si_code = SI_MESGQ;
0806 sig_i.si_value = info->notify.sigev_value;
0807 rcu_read_lock();
0808
0809 sig_i.si_pid = task_tgid_nr_ns(current,
0810 ns_of_pid(info->notify_owner));
0811 sig_i.si_uid = from_kuid_munged(info->notify_user_ns,
0812 current_uid());
0813
0814
0815
0816
0817
0818
0819
0820 task = pid_task(info->notify_owner, PIDTYPE_TGID);
0821 if (task && task->self_exec_id ==
0822 info->notify_self_exec_id) {
0823 do_send_sig_info(info->notify.sigev_signo,
0824 &sig_i, task, PIDTYPE_TGID);
0825 }
0826 rcu_read_unlock();
0827 break;
0828 }
0829 case SIGEV_THREAD:
0830 set_cookie(info->notify_cookie, NOTIFY_WOKENUP);
0831 netlink_sendskb(info->notify_sock, info->notify_cookie);
0832 break;
0833 }
0834
0835 put_pid(info->notify_owner);
0836 put_user_ns(info->notify_user_ns);
0837 info->notify_owner = NULL;
0838 info->notify_user_ns = NULL;
0839 }
0840 wake_up(&info->wait_q);
0841 }
0842
0843 static int prepare_timeout(const struct __kernel_timespec __user *u_abs_timeout,
0844 struct timespec64 *ts)
0845 {
0846 if (get_timespec64(ts, u_abs_timeout))
0847 return -EFAULT;
0848 if (!timespec64_valid(ts))
0849 return -EINVAL;
0850 return 0;
0851 }
0852
0853 static void remove_notification(struct mqueue_inode_info *info)
0854 {
0855 if (info->notify_owner != NULL &&
0856 info->notify.sigev_notify == SIGEV_THREAD) {
0857 set_cookie(info->notify_cookie, NOTIFY_REMOVED);
0858 netlink_sendskb(info->notify_sock, info->notify_cookie);
0859 }
0860 put_pid(info->notify_owner);
0861 put_user_ns(info->notify_user_ns);
0862 info->notify_owner = NULL;
0863 info->notify_user_ns = NULL;
0864 }
0865
0866 static int prepare_open(struct dentry *dentry, int oflag, int ro,
0867 umode_t mode, struct filename *name,
0868 struct mq_attr *attr)
0869 {
0870 static const int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE,
0871 MAY_READ | MAY_WRITE };
0872 int acc;
0873
0874 if (d_really_is_negative(dentry)) {
0875 if (!(oflag & O_CREAT))
0876 return -ENOENT;
0877 if (ro)
0878 return ro;
0879 audit_inode_parent_hidden(name, dentry->d_parent);
0880 return vfs_mkobj(dentry, mode & ~current_umask(),
0881 mqueue_create_attr, attr);
0882 }
0883
0884 audit_inode(name, dentry, 0);
0885 if ((oflag & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
0886 return -EEXIST;
0887 if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY))
0888 return -EINVAL;
0889 acc = oflag2acc[oflag & O_ACCMODE];
0890 return inode_permission(&init_user_ns, d_inode(dentry), acc);
0891 }
0892
0893 static int do_mq_open(const char __user *u_name, int oflag, umode_t mode,
0894 struct mq_attr *attr)
0895 {
0896 struct vfsmount *mnt = current->nsproxy->ipc_ns->mq_mnt;
0897 struct dentry *root = mnt->mnt_root;
0898 struct filename *name;
0899 struct path path;
0900 int fd, error;
0901 int ro;
0902
0903 audit_mq_open(oflag, mode, attr);
0904
0905 if (IS_ERR(name = getname(u_name)))
0906 return PTR_ERR(name);
0907
0908 fd = get_unused_fd_flags(O_CLOEXEC);
0909 if (fd < 0)
0910 goto out_putname;
0911
0912 ro = mnt_want_write(mnt);
0913 inode_lock(d_inode(root));
0914 path.dentry = lookup_one_len(name->name, root, strlen(name->name));
0915 if (IS_ERR(path.dentry)) {
0916 error = PTR_ERR(path.dentry);
0917 goto out_putfd;
0918 }
0919 path.mnt = mntget(mnt);
0920 error = prepare_open(path.dentry, oflag, ro, mode, name, attr);
0921 if (!error) {
0922 struct file *file = dentry_open(&path, oflag, current_cred());
0923 if (!IS_ERR(file))
0924 fd_install(fd, file);
0925 else
0926 error = PTR_ERR(file);
0927 }
0928 path_put(&path);
0929 out_putfd:
0930 if (error) {
0931 put_unused_fd(fd);
0932 fd = error;
0933 }
0934 inode_unlock(d_inode(root));
0935 if (!ro)
0936 mnt_drop_write(mnt);
0937 out_putname:
0938 putname(name);
0939 return fd;
0940 }
0941
0942 SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, umode_t, mode,
0943 struct mq_attr __user *, u_attr)
0944 {
0945 struct mq_attr attr;
0946 if (u_attr && copy_from_user(&attr, u_attr, sizeof(struct mq_attr)))
0947 return -EFAULT;
0948
0949 return do_mq_open(u_name, oflag, mode, u_attr ? &attr : NULL);
0950 }
0951
0952 SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
0953 {
0954 int err;
0955 struct filename *name;
0956 struct dentry *dentry;
0957 struct inode *inode = NULL;
0958 struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
0959 struct vfsmount *mnt = ipc_ns->mq_mnt;
0960
0961 name = getname(u_name);
0962 if (IS_ERR(name))
0963 return PTR_ERR(name);
0964
0965 audit_inode_parent_hidden(name, mnt->mnt_root);
0966 err = mnt_want_write(mnt);
0967 if (err)
0968 goto out_name;
0969 inode_lock_nested(d_inode(mnt->mnt_root), I_MUTEX_PARENT);
0970 dentry = lookup_one_len(name->name, mnt->mnt_root,
0971 strlen(name->name));
0972 if (IS_ERR(dentry)) {
0973 err = PTR_ERR(dentry);
0974 goto out_unlock;
0975 }
0976
0977 inode = d_inode(dentry);
0978 if (!inode) {
0979 err = -ENOENT;
0980 } else {
0981 ihold(inode);
0982 err = vfs_unlink(&init_user_ns, d_inode(dentry->d_parent),
0983 dentry, NULL);
0984 }
0985 dput(dentry);
0986
0987 out_unlock:
0988 inode_unlock(d_inode(mnt->mnt_root));
0989 if (inode)
0990 iput(inode);
0991 mnt_drop_write(mnt);
0992 out_name:
0993 putname(name);
0994
0995 return err;
0996 }
0997
0998
0999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017 static inline void __pipelined_op(struct wake_q_head *wake_q,
1018 struct mqueue_inode_info *info,
1019 struct ext_wait_queue *this)
1020 {
1021 struct task_struct *task;
1022
1023 list_del(&this->list);
1024 task = get_task_struct(this->task);
1025
1026
1027 smp_store_release(&this->state, STATE_READY);
1028 wake_q_add_safe(wake_q, task);
1029 }
1030
1031
1032
1033
1034 static inline void pipelined_send(struct wake_q_head *wake_q,
1035 struct mqueue_inode_info *info,
1036 struct msg_msg *message,
1037 struct ext_wait_queue *receiver)
1038 {
1039 receiver->msg = message;
1040 __pipelined_op(wake_q, info, receiver);
1041 }
1042
1043
1044
1045 static inline void pipelined_receive(struct wake_q_head *wake_q,
1046 struct mqueue_inode_info *info)
1047 {
1048 struct ext_wait_queue *sender = wq_get_first_waiter(info, SEND);
1049
1050 if (!sender) {
1051
1052 wake_up_interruptible(&info->wait_q);
1053 return;
1054 }
1055 if (msg_insert(sender->msg, info))
1056 return;
1057
1058 __pipelined_op(wake_q, info, sender);
1059 }
1060
1061 static int do_mq_timedsend(mqd_t mqdes, const char __user *u_msg_ptr,
1062 size_t msg_len, unsigned int msg_prio,
1063 struct timespec64 *ts)
1064 {
1065 struct fd f;
1066 struct inode *inode;
1067 struct ext_wait_queue wait;
1068 struct ext_wait_queue *receiver;
1069 struct msg_msg *msg_ptr;
1070 struct mqueue_inode_info *info;
1071 ktime_t expires, *timeout = NULL;
1072 struct posix_msg_tree_node *new_leaf = NULL;
1073 int ret = 0;
1074 DEFINE_WAKE_Q(wake_q);
1075
1076 if (unlikely(msg_prio >= (unsigned long) MQ_PRIO_MAX))
1077 return -EINVAL;
1078
1079 if (ts) {
1080 expires = timespec64_to_ktime(*ts);
1081 timeout = &expires;
1082 }
1083
1084 audit_mq_sendrecv(mqdes, msg_len, msg_prio, ts);
1085
1086 f = fdget(mqdes);
1087 if (unlikely(!f.file)) {
1088 ret = -EBADF;
1089 goto out;
1090 }
1091
1092 inode = file_inode(f.file);
1093 if (unlikely(f.file->f_op != &mqueue_file_operations)) {
1094 ret = -EBADF;
1095 goto out_fput;
1096 }
1097 info = MQUEUE_I(inode);
1098 audit_file(f.file);
1099
1100 if (unlikely(!(f.file->f_mode & FMODE_WRITE))) {
1101 ret = -EBADF;
1102 goto out_fput;
1103 }
1104
1105 if (unlikely(msg_len > info->attr.mq_msgsize)) {
1106 ret = -EMSGSIZE;
1107 goto out_fput;
1108 }
1109
1110
1111
1112 msg_ptr = load_msg(u_msg_ptr, msg_len);
1113 if (IS_ERR(msg_ptr)) {
1114 ret = PTR_ERR(msg_ptr);
1115 goto out_fput;
1116 }
1117 msg_ptr->m_ts = msg_len;
1118 msg_ptr->m_type = msg_prio;
1119
1120
1121
1122
1123
1124
1125 if (!info->node_cache)
1126 new_leaf = kmalloc(sizeof(*new_leaf), GFP_KERNEL);
1127
1128 spin_lock(&info->lock);
1129
1130 if (!info->node_cache && new_leaf) {
1131
1132 INIT_LIST_HEAD(&new_leaf->msg_list);
1133 info->node_cache = new_leaf;
1134 new_leaf = NULL;
1135 } else {
1136 kfree(new_leaf);
1137 }
1138
1139 if (info->attr.mq_curmsgs == info->attr.mq_maxmsg) {
1140 if (f.file->f_flags & O_NONBLOCK) {
1141 ret = -EAGAIN;
1142 } else {
1143 wait.task = current;
1144 wait.msg = (void *) msg_ptr;
1145
1146
1147 WRITE_ONCE(wait.state, STATE_NONE);
1148 ret = wq_sleep(info, SEND, timeout, &wait);
1149
1150
1151
1152
1153 goto out_free;
1154 }
1155 } else {
1156 receiver = wq_get_first_waiter(info, RECV);
1157 if (receiver) {
1158 pipelined_send(&wake_q, info, msg_ptr, receiver);
1159 } else {
1160
1161 ret = msg_insert(msg_ptr, info);
1162 if (ret)
1163 goto out_unlock;
1164 __do_notify(info);
1165 }
1166 inode->i_atime = inode->i_mtime = inode->i_ctime =
1167 current_time(inode);
1168 }
1169 out_unlock:
1170 spin_unlock(&info->lock);
1171 wake_up_q(&wake_q);
1172 out_free:
1173 if (ret)
1174 free_msg(msg_ptr);
1175 out_fput:
1176 fdput(f);
1177 out:
1178 return ret;
1179 }
1180
1181 static int do_mq_timedreceive(mqd_t mqdes, char __user *u_msg_ptr,
1182 size_t msg_len, unsigned int __user *u_msg_prio,
1183 struct timespec64 *ts)
1184 {
1185 ssize_t ret;
1186 struct msg_msg *msg_ptr;
1187 struct fd f;
1188 struct inode *inode;
1189 struct mqueue_inode_info *info;
1190 struct ext_wait_queue wait;
1191 ktime_t expires, *timeout = NULL;
1192 struct posix_msg_tree_node *new_leaf = NULL;
1193
1194 if (ts) {
1195 expires = timespec64_to_ktime(*ts);
1196 timeout = &expires;
1197 }
1198
1199 audit_mq_sendrecv(mqdes, msg_len, 0, ts);
1200
1201 f = fdget(mqdes);
1202 if (unlikely(!f.file)) {
1203 ret = -EBADF;
1204 goto out;
1205 }
1206
1207 inode = file_inode(f.file);
1208 if (unlikely(f.file->f_op != &mqueue_file_operations)) {
1209 ret = -EBADF;
1210 goto out_fput;
1211 }
1212 info = MQUEUE_I(inode);
1213 audit_file(f.file);
1214
1215 if (unlikely(!(f.file->f_mode & FMODE_READ))) {
1216 ret = -EBADF;
1217 goto out_fput;
1218 }
1219
1220
1221 if (unlikely(msg_len < info->attr.mq_msgsize)) {
1222 ret = -EMSGSIZE;
1223 goto out_fput;
1224 }
1225
1226
1227
1228
1229
1230
1231 if (!info->node_cache)
1232 new_leaf = kmalloc(sizeof(*new_leaf), GFP_KERNEL);
1233
1234 spin_lock(&info->lock);
1235
1236 if (!info->node_cache && new_leaf) {
1237
1238 INIT_LIST_HEAD(&new_leaf->msg_list);
1239 info->node_cache = new_leaf;
1240 } else {
1241 kfree(new_leaf);
1242 }
1243
1244 if (info->attr.mq_curmsgs == 0) {
1245 if (f.file->f_flags & O_NONBLOCK) {
1246 spin_unlock(&info->lock);
1247 ret = -EAGAIN;
1248 } else {
1249 wait.task = current;
1250
1251
1252 WRITE_ONCE(wait.state, STATE_NONE);
1253 ret = wq_sleep(info, RECV, timeout, &wait);
1254 msg_ptr = wait.msg;
1255 }
1256 } else {
1257 DEFINE_WAKE_Q(wake_q);
1258
1259 msg_ptr = msg_get(info);
1260
1261 inode->i_atime = inode->i_mtime = inode->i_ctime =
1262 current_time(inode);
1263
1264
1265 pipelined_receive(&wake_q, info);
1266 spin_unlock(&info->lock);
1267 wake_up_q(&wake_q);
1268 ret = 0;
1269 }
1270 if (ret == 0) {
1271 ret = msg_ptr->m_ts;
1272
1273 if ((u_msg_prio && put_user(msg_ptr->m_type, u_msg_prio)) ||
1274 store_msg(u_msg_ptr, msg_ptr, msg_ptr->m_ts)) {
1275 ret = -EFAULT;
1276 }
1277 free_msg(msg_ptr);
1278 }
1279 out_fput:
1280 fdput(f);
1281 out:
1282 return ret;
1283 }
1284
1285 SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
1286 size_t, msg_len, unsigned int, msg_prio,
1287 const struct __kernel_timespec __user *, u_abs_timeout)
1288 {
1289 struct timespec64 ts, *p = NULL;
1290 if (u_abs_timeout) {
1291 int res = prepare_timeout(u_abs_timeout, &ts);
1292 if (res)
1293 return res;
1294 p = &ts;
1295 }
1296 return do_mq_timedsend(mqdes, u_msg_ptr, msg_len, msg_prio, p);
1297 }
1298
1299 SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr,
1300 size_t, msg_len, unsigned int __user *, u_msg_prio,
1301 const struct __kernel_timespec __user *, u_abs_timeout)
1302 {
1303 struct timespec64 ts, *p = NULL;
1304 if (u_abs_timeout) {
1305 int res = prepare_timeout(u_abs_timeout, &ts);
1306 if (res)
1307 return res;
1308 p = &ts;
1309 }
1310 return do_mq_timedreceive(mqdes, u_msg_ptr, msg_len, u_msg_prio, p);
1311 }
1312
1313
1314
1315
1316
1317
1318 static int do_mq_notify(mqd_t mqdes, const struct sigevent *notification)
1319 {
1320 int ret;
1321 struct fd f;
1322 struct sock *sock;
1323 struct inode *inode;
1324 struct mqueue_inode_info *info;
1325 struct sk_buff *nc;
1326
1327 audit_mq_notify(mqdes, notification);
1328
1329 nc = NULL;
1330 sock = NULL;
1331 if (notification != NULL) {
1332 if (unlikely(notification->sigev_notify != SIGEV_NONE &&
1333 notification->sigev_notify != SIGEV_SIGNAL &&
1334 notification->sigev_notify != SIGEV_THREAD))
1335 return -EINVAL;
1336 if (notification->sigev_notify == SIGEV_SIGNAL &&
1337 !valid_signal(notification->sigev_signo)) {
1338 return -EINVAL;
1339 }
1340 if (notification->sigev_notify == SIGEV_THREAD) {
1341 long timeo;
1342
1343
1344 nc = alloc_skb(NOTIFY_COOKIE_LEN, GFP_KERNEL);
1345 if (!nc)
1346 return -ENOMEM;
1347
1348 if (copy_from_user(nc->data,
1349 notification->sigev_value.sival_ptr,
1350 NOTIFY_COOKIE_LEN)) {
1351 ret = -EFAULT;
1352 goto free_skb;
1353 }
1354
1355
1356 skb_put(nc, NOTIFY_COOKIE_LEN);
1357
1358 retry:
1359 f = fdget(notification->sigev_signo);
1360 if (!f.file) {
1361 ret = -EBADF;
1362 goto out;
1363 }
1364 sock = netlink_getsockbyfilp(f.file);
1365 fdput(f);
1366 if (IS_ERR(sock)) {
1367 ret = PTR_ERR(sock);
1368 goto free_skb;
1369 }
1370
1371 timeo = MAX_SCHEDULE_TIMEOUT;
1372 ret = netlink_attachskb(sock, nc, &timeo, NULL);
1373 if (ret == 1) {
1374 sock = NULL;
1375 goto retry;
1376 }
1377 if (ret)
1378 return ret;
1379 }
1380 }
1381
1382 f = fdget(mqdes);
1383 if (!f.file) {
1384 ret = -EBADF;
1385 goto out;
1386 }
1387
1388 inode = file_inode(f.file);
1389 if (unlikely(f.file->f_op != &mqueue_file_operations)) {
1390 ret = -EBADF;
1391 goto out_fput;
1392 }
1393 info = MQUEUE_I(inode);
1394
1395 ret = 0;
1396 spin_lock(&info->lock);
1397 if (notification == NULL) {
1398 if (info->notify_owner == task_tgid(current)) {
1399 remove_notification(info);
1400 inode->i_atime = inode->i_ctime = current_time(inode);
1401 }
1402 } else if (info->notify_owner != NULL) {
1403 ret = -EBUSY;
1404 } else {
1405 switch (notification->sigev_notify) {
1406 case SIGEV_NONE:
1407 info->notify.sigev_notify = SIGEV_NONE;
1408 break;
1409 case SIGEV_THREAD:
1410 info->notify_sock = sock;
1411 info->notify_cookie = nc;
1412 sock = NULL;
1413 nc = NULL;
1414 info->notify.sigev_notify = SIGEV_THREAD;
1415 break;
1416 case SIGEV_SIGNAL:
1417 info->notify.sigev_signo = notification->sigev_signo;
1418 info->notify.sigev_value = notification->sigev_value;
1419 info->notify.sigev_notify = SIGEV_SIGNAL;
1420 info->notify_self_exec_id = current->self_exec_id;
1421 break;
1422 }
1423
1424 info->notify_owner = get_pid(task_tgid(current));
1425 info->notify_user_ns = get_user_ns(current_user_ns());
1426 inode->i_atime = inode->i_ctime = current_time(inode);
1427 }
1428 spin_unlock(&info->lock);
1429 out_fput:
1430 fdput(f);
1431 out:
1432 if (sock)
1433 netlink_detachskb(sock, nc);
1434 else
1435 free_skb:
1436 dev_kfree_skb(nc);
1437
1438 return ret;
1439 }
1440
1441 SYSCALL_DEFINE2(mq_notify, mqd_t, mqdes,
1442 const struct sigevent __user *, u_notification)
1443 {
1444 struct sigevent n, *p = NULL;
1445 if (u_notification) {
1446 if (copy_from_user(&n, u_notification, sizeof(struct sigevent)))
1447 return -EFAULT;
1448 p = &n;
1449 }
1450 return do_mq_notify(mqdes, p);
1451 }
1452
1453 static int do_mq_getsetattr(int mqdes, struct mq_attr *new, struct mq_attr *old)
1454 {
1455 struct fd f;
1456 struct inode *inode;
1457 struct mqueue_inode_info *info;
1458
1459 if (new && (new->mq_flags & (~O_NONBLOCK)))
1460 return -EINVAL;
1461
1462 f = fdget(mqdes);
1463 if (!f.file)
1464 return -EBADF;
1465
1466 if (unlikely(f.file->f_op != &mqueue_file_operations)) {
1467 fdput(f);
1468 return -EBADF;
1469 }
1470
1471 inode = file_inode(f.file);
1472 info = MQUEUE_I(inode);
1473
1474 spin_lock(&info->lock);
1475
1476 if (old) {
1477 *old = info->attr;
1478 old->mq_flags = f.file->f_flags & O_NONBLOCK;
1479 }
1480 if (new) {
1481 audit_mq_getsetattr(mqdes, new);
1482 spin_lock(&f.file->f_lock);
1483 if (new->mq_flags & O_NONBLOCK)
1484 f.file->f_flags |= O_NONBLOCK;
1485 else
1486 f.file->f_flags &= ~O_NONBLOCK;
1487 spin_unlock(&f.file->f_lock);
1488
1489 inode->i_atime = inode->i_ctime = current_time(inode);
1490 }
1491
1492 spin_unlock(&info->lock);
1493 fdput(f);
1494 return 0;
1495 }
1496
1497 SYSCALL_DEFINE3(mq_getsetattr, mqd_t, mqdes,
1498 const struct mq_attr __user *, u_mqstat,
1499 struct mq_attr __user *, u_omqstat)
1500 {
1501 int ret;
1502 struct mq_attr mqstat, omqstat;
1503 struct mq_attr *new = NULL, *old = NULL;
1504
1505 if (u_mqstat) {
1506 new = &mqstat;
1507 if (copy_from_user(new, u_mqstat, sizeof(struct mq_attr)))
1508 return -EFAULT;
1509 }
1510 if (u_omqstat)
1511 old = &omqstat;
1512
1513 ret = do_mq_getsetattr(mqdes, new, old);
1514 if (ret || !old)
1515 return ret;
1516
1517 if (copy_to_user(u_omqstat, old, sizeof(struct mq_attr)))
1518 return -EFAULT;
1519 return 0;
1520 }
1521
1522 #ifdef CONFIG_COMPAT
1523
1524 struct compat_mq_attr {
1525 compat_long_t mq_flags;
1526 compat_long_t mq_maxmsg;
1527 compat_long_t mq_msgsize;
1528 compat_long_t mq_curmsgs;
1529 compat_long_t __reserved[4];
1530 };
1531
1532 static inline int get_compat_mq_attr(struct mq_attr *attr,
1533 const struct compat_mq_attr __user *uattr)
1534 {
1535 struct compat_mq_attr v;
1536
1537 if (copy_from_user(&v, uattr, sizeof(*uattr)))
1538 return -EFAULT;
1539
1540 memset(attr, 0, sizeof(*attr));
1541 attr->mq_flags = v.mq_flags;
1542 attr->mq_maxmsg = v.mq_maxmsg;
1543 attr->mq_msgsize = v.mq_msgsize;
1544 attr->mq_curmsgs = v.mq_curmsgs;
1545 return 0;
1546 }
1547
1548 static inline int put_compat_mq_attr(const struct mq_attr *attr,
1549 struct compat_mq_attr __user *uattr)
1550 {
1551 struct compat_mq_attr v;
1552
1553 memset(&v, 0, sizeof(v));
1554 v.mq_flags = attr->mq_flags;
1555 v.mq_maxmsg = attr->mq_maxmsg;
1556 v.mq_msgsize = attr->mq_msgsize;
1557 v.mq_curmsgs = attr->mq_curmsgs;
1558 if (copy_to_user(uattr, &v, sizeof(*uattr)))
1559 return -EFAULT;
1560 return 0;
1561 }
1562
1563 COMPAT_SYSCALL_DEFINE4(mq_open, const char __user *, u_name,
1564 int, oflag, compat_mode_t, mode,
1565 struct compat_mq_attr __user *, u_attr)
1566 {
1567 struct mq_attr attr, *p = NULL;
1568 if (u_attr && oflag & O_CREAT) {
1569 p = &attr;
1570 if (get_compat_mq_attr(&attr, u_attr))
1571 return -EFAULT;
1572 }
1573 return do_mq_open(u_name, oflag, mode, p);
1574 }
1575
1576 COMPAT_SYSCALL_DEFINE2(mq_notify, mqd_t, mqdes,
1577 const struct compat_sigevent __user *, u_notification)
1578 {
1579 struct sigevent n, *p = NULL;
1580 if (u_notification) {
1581 if (get_compat_sigevent(&n, u_notification))
1582 return -EFAULT;
1583 if (n.sigev_notify == SIGEV_THREAD)
1584 n.sigev_value.sival_ptr = compat_ptr(n.sigev_value.sival_int);
1585 p = &n;
1586 }
1587 return do_mq_notify(mqdes, p);
1588 }
1589
1590 COMPAT_SYSCALL_DEFINE3(mq_getsetattr, mqd_t, mqdes,
1591 const struct compat_mq_attr __user *, u_mqstat,
1592 struct compat_mq_attr __user *, u_omqstat)
1593 {
1594 int ret;
1595 struct mq_attr mqstat, omqstat;
1596 struct mq_attr *new = NULL, *old = NULL;
1597
1598 if (u_mqstat) {
1599 new = &mqstat;
1600 if (get_compat_mq_attr(new, u_mqstat))
1601 return -EFAULT;
1602 }
1603 if (u_omqstat)
1604 old = &omqstat;
1605
1606 ret = do_mq_getsetattr(mqdes, new, old);
1607 if (ret || !old)
1608 return ret;
1609
1610 if (put_compat_mq_attr(old, u_omqstat))
1611 return -EFAULT;
1612 return 0;
1613 }
1614 #endif
1615
1616 #ifdef CONFIG_COMPAT_32BIT_TIME
1617 static int compat_prepare_timeout(const struct old_timespec32 __user *p,
1618 struct timespec64 *ts)
1619 {
1620 if (get_old_timespec32(ts, p))
1621 return -EFAULT;
1622 if (!timespec64_valid(ts))
1623 return -EINVAL;
1624 return 0;
1625 }
1626
1627 SYSCALL_DEFINE5(mq_timedsend_time32, mqd_t, mqdes,
1628 const char __user *, u_msg_ptr,
1629 unsigned int, msg_len, unsigned int, msg_prio,
1630 const struct old_timespec32 __user *, u_abs_timeout)
1631 {
1632 struct timespec64 ts, *p = NULL;
1633 if (u_abs_timeout) {
1634 int res = compat_prepare_timeout(u_abs_timeout, &ts);
1635 if (res)
1636 return res;
1637 p = &ts;
1638 }
1639 return do_mq_timedsend(mqdes, u_msg_ptr, msg_len, msg_prio, p);
1640 }
1641
1642 SYSCALL_DEFINE5(mq_timedreceive_time32, mqd_t, mqdes,
1643 char __user *, u_msg_ptr,
1644 unsigned int, msg_len, unsigned int __user *, u_msg_prio,
1645 const struct old_timespec32 __user *, u_abs_timeout)
1646 {
1647 struct timespec64 ts, *p = NULL;
1648 if (u_abs_timeout) {
1649 int res = compat_prepare_timeout(u_abs_timeout, &ts);
1650 if (res)
1651 return res;
1652 p = &ts;
1653 }
1654 return do_mq_timedreceive(mqdes, u_msg_ptr, msg_len, u_msg_prio, p);
1655 }
1656 #endif
1657
1658 static const struct inode_operations mqueue_dir_inode_operations = {
1659 .lookup = simple_lookup,
1660 .create = mqueue_create,
1661 .unlink = mqueue_unlink,
1662 };
1663
1664 static const struct file_operations mqueue_file_operations = {
1665 .flush = mqueue_flush_file,
1666 .poll = mqueue_poll_file,
1667 .read = mqueue_read_file,
1668 .llseek = default_llseek,
1669 };
1670
1671 static const struct super_operations mqueue_super_ops = {
1672 .alloc_inode = mqueue_alloc_inode,
1673 .free_inode = mqueue_free_inode,
1674 .evict_inode = mqueue_evict_inode,
1675 .statfs = simple_statfs,
1676 };
1677
1678 static const struct fs_context_operations mqueue_fs_context_ops = {
1679 .free = mqueue_fs_context_free,
1680 .get_tree = mqueue_get_tree,
1681 };
1682
1683 static struct file_system_type mqueue_fs_type = {
1684 .name = "mqueue",
1685 .init_fs_context = mqueue_init_fs_context,
1686 .kill_sb = kill_litter_super,
1687 .fs_flags = FS_USERNS_MOUNT,
1688 };
1689
1690 int mq_init_ns(struct ipc_namespace *ns)
1691 {
1692 struct vfsmount *m;
1693
1694 ns->mq_queues_count = 0;
1695 ns->mq_queues_max = DFLT_QUEUESMAX;
1696 ns->mq_msg_max = DFLT_MSGMAX;
1697 ns->mq_msgsize_max = DFLT_MSGSIZEMAX;
1698 ns->mq_msg_default = DFLT_MSG;
1699 ns->mq_msgsize_default = DFLT_MSGSIZE;
1700
1701 m = mq_create_mount(ns);
1702 if (IS_ERR(m))
1703 return PTR_ERR(m);
1704 ns->mq_mnt = m;
1705 return 0;
1706 }
1707
1708 void mq_clear_sbinfo(struct ipc_namespace *ns)
1709 {
1710 ns->mq_mnt->mnt_sb->s_fs_info = NULL;
1711 }
1712
1713 void mq_put_mnt(struct ipc_namespace *ns)
1714 {
1715 kern_unmount(ns->mq_mnt);
1716 }
1717
1718 static int __init init_mqueue_fs(void)
1719 {
1720 int error;
1721
1722 mqueue_inode_cachep = kmem_cache_create("mqueue_inode_cache",
1723 sizeof(struct mqueue_inode_info), 0,
1724 SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT, init_once);
1725 if (mqueue_inode_cachep == NULL)
1726 return -ENOMEM;
1727
1728 if (!setup_mq_sysctls(&init_ipc_ns)) {
1729 pr_warn("sysctl registration failed\n");
1730 return -ENOMEM;
1731 }
1732
1733 error = register_filesystem(&mqueue_fs_type);
1734 if (error)
1735 goto out_sysctl;
1736
1737 spin_lock_init(&mq_lock);
1738
1739 error = mq_init_ns(&init_ipc_ns);
1740 if (error)
1741 goto out_filesystem;
1742
1743 return 0;
1744
1745 out_filesystem:
1746 unregister_filesystem(&mqueue_fs_type);
1747 out_sysctl:
1748 kmem_cache_destroy(mqueue_inode_cachep);
1749 return error;
1750 }
1751
1752 device_initcall(init_mqueue_fs);