Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * linux/ipc/shm.c
0004  * Copyright (C) 1992, 1993 Krishna Balasubramanian
0005  *   Many improvements/fixes by Bruno Haible.
0006  * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994.
0007  * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli.
0008  *
0009  * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
0010  * BIGMEM support, Andrea Arcangeli <andrea@suse.de>
0011  * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr>
0012  * HIGHMEM support, Ingo Molnar <mingo@redhat.com>
0013  * Make shmmax, shmall, shmmni sysctl'able, Christoph Rohland <cr@sap.com>
0014  * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com>
0015  * Move the mm functionality over to mm/shmem.c, Christoph Rohland <cr@sap.com>
0016  *
0017  * support for audit of ipc object properties and permission changes
0018  * Dustin Kirkland <dustin.kirkland@us.ibm.com>
0019  *
0020  * namespaces support
0021  * OpenVZ, SWsoft Inc.
0022  * Pavel Emelianov <xemul@openvz.org>
0023  *
0024  * Better ipc lock (kern_ipc_perm.lock) handling
0025  * Davidlohr Bueso <davidlohr.bueso@hp.com>, June 2013.
0026  */
0027 
0028 #include <linux/slab.h>
0029 #include <linux/mm.h>
0030 #include <linux/hugetlb.h>
0031 #include <linux/shm.h>
0032 #include <linux/init.h>
0033 #include <linux/file.h>
0034 #include <linux/mman.h>
0035 #include <linux/shmem_fs.h>
0036 #include <linux/security.h>
0037 #include <linux/syscalls.h>
0038 #include <linux/audit.h>
0039 #include <linux/capability.h>
0040 #include <linux/ptrace.h>
0041 #include <linux/seq_file.h>
0042 #include <linux/rwsem.h>
0043 #include <linux/nsproxy.h>
0044 #include <linux/mount.h>
0045 #include <linux/ipc_namespace.h>
0046 #include <linux/rhashtable.h>
0047 
0048 #include <linux/uaccess.h>
0049 
0050 #include "util.h"
0051 
0052 struct shmid_kernel /* private to the kernel */
0053 {
0054     struct kern_ipc_perm    shm_perm;
0055     struct file     *shm_file;
0056     unsigned long       shm_nattch;
0057     unsigned long       shm_segsz;
0058     time64_t        shm_atim;
0059     time64_t        shm_dtim;
0060     time64_t        shm_ctim;
0061     struct pid      *shm_cprid;
0062     struct pid      *shm_lprid;
0063     struct ucounts      *mlock_ucounts;
0064 
0065     /*
0066      * The task created the shm object, for
0067      * task_lock(shp->shm_creator)
0068      */
0069     struct task_struct  *shm_creator;
0070 
0071     /*
0072      * List by creator. task_lock(->shm_creator) required for read/write.
0073      * If list_empty(), then the creator is dead already.
0074      */
0075     struct list_head    shm_clist;
0076     struct ipc_namespace    *ns;
0077 } __randomize_layout;
0078 
0079 /* shm_mode upper byte flags */
0080 #define SHM_DEST    01000   /* segment will be destroyed on last detach */
0081 #define SHM_LOCKED  02000   /* segment will not be swapped */
0082 
0083 struct shm_file_data {
0084     int id;
0085     struct ipc_namespace *ns;
0086     struct file *file;
0087     const struct vm_operations_struct *vm_ops;
0088 };
0089 
0090 #define shm_file_data(file) (*((struct shm_file_data **)&(file)->private_data))
0091 
0092 static const struct file_operations shm_file_operations;
0093 static const struct vm_operations_struct shm_vm_ops;
0094 
0095 #define shm_ids(ns) ((ns)->ids[IPC_SHM_IDS])
0096 
0097 #define shm_unlock(shp)         \
0098     ipc_unlock(&(shp)->shm_perm)
0099 
0100 static int newseg(struct ipc_namespace *, struct ipc_params *);
0101 static void shm_open(struct vm_area_struct *vma);
0102 static void shm_close(struct vm_area_struct *vma);
0103 static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp);
0104 #ifdef CONFIG_PROC_FS
0105 static int sysvipc_shm_proc_show(struct seq_file *s, void *it);
0106 #endif
0107 
0108 void shm_init_ns(struct ipc_namespace *ns)
0109 {
0110     ns->shm_ctlmax = SHMMAX;
0111     ns->shm_ctlall = SHMALL;
0112     ns->shm_ctlmni = SHMMNI;
0113     ns->shm_rmid_forced = 0;
0114     ns->shm_tot = 0;
0115     ipc_init_ids(&shm_ids(ns));
0116 }
0117 
0118 /*
0119  * Called with shm_ids.rwsem (writer) and the shp structure locked.
0120  * Only shm_ids.rwsem remains locked on exit.
0121  */
0122 static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
0123 {
0124     struct shmid_kernel *shp;
0125 
0126     shp = container_of(ipcp, struct shmid_kernel, shm_perm);
0127     WARN_ON(ns != shp->ns);
0128 
0129     if (shp->shm_nattch) {
0130         shp->shm_perm.mode |= SHM_DEST;
0131         /* Do not find it any more */
0132         ipc_set_key_private(&shm_ids(ns), &shp->shm_perm);
0133         shm_unlock(shp);
0134     } else
0135         shm_destroy(ns, shp);
0136 }
0137 
0138 #ifdef CONFIG_IPC_NS
0139 void shm_exit_ns(struct ipc_namespace *ns)
0140 {
0141     free_ipcs(ns, &shm_ids(ns), do_shm_rmid);
0142     idr_destroy(&ns->ids[IPC_SHM_IDS].ipcs_idr);
0143     rhashtable_destroy(&ns->ids[IPC_SHM_IDS].key_ht);
0144 }
0145 #endif
0146 
0147 static int __init ipc_ns_init(void)
0148 {
0149     shm_init_ns(&init_ipc_ns);
0150     return 0;
0151 }
0152 
0153 pure_initcall(ipc_ns_init);
0154 
0155 void __init shm_init(void)
0156 {
0157     ipc_init_proc_interface("sysvipc/shm",
0158 #if BITS_PER_LONG <= 32
0159                 "       key      shmid perms       size  cpid  lpid nattch   uid   gid  cuid  cgid      atime      dtime      ctime        rss       swap\n",
0160 #else
0161                 "       key      shmid perms                  size  cpid  lpid nattch   uid   gid  cuid  cgid      atime      dtime      ctime                   rss                  swap\n",
0162 #endif
0163                 IPC_SHM_IDS, sysvipc_shm_proc_show);
0164 }
0165 
0166 static inline struct shmid_kernel *shm_obtain_object(struct ipc_namespace *ns, int id)
0167 {
0168     struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&shm_ids(ns), id);
0169 
0170     if (IS_ERR(ipcp))
0171         return ERR_CAST(ipcp);
0172 
0173     return container_of(ipcp, struct shmid_kernel, shm_perm);
0174 }
0175 
0176 static inline struct shmid_kernel *shm_obtain_object_check(struct ipc_namespace *ns, int id)
0177 {
0178     struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&shm_ids(ns), id);
0179 
0180     if (IS_ERR(ipcp))
0181         return ERR_CAST(ipcp);
0182 
0183     return container_of(ipcp, struct shmid_kernel, shm_perm);
0184 }
0185 
0186 /*
0187  * shm_lock_(check_) routines are called in the paths where the rwsem
0188  * is not necessarily held.
0189  */
0190 static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
0191 {
0192     struct kern_ipc_perm *ipcp;
0193 
0194     rcu_read_lock();
0195     ipcp = ipc_obtain_object_idr(&shm_ids(ns), id);
0196     if (IS_ERR(ipcp))
0197         goto err;
0198 
0199     ipc_lock_object(ipcp);
0200     /*
0201      * ipc_rmid() may have already freed the ID while ipc_lock_object()
0202      * was spinning: here verify that the structure is still valid.
0203      * Upon races with RMID, return -EIDRM, thus indicating that
0204      * the ID points to a removed identifier.
0205      */
0206     if (ipc_valid_object(ipcp)) {
0207         /* return a locked ipc object upon success */
0208         return container_of(ipcp, struct shmid_kernel, shm_perm);
0209     }
0210 
0211     ipc_unlock_object(ipcp);
0212     ipcp = ERR_PTR(-EIDRM);
0213 err:
0214     rcu_read_unlock();
0215     /*
0216      * Callers of shm_lock() must validate the status of the returned ipc
0217      * object pointer and error out as appropriate.
0218      */
0219     return ERR_CAST(ipcp);
0220 }
0221 
0222 static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)
0223 {
0224     rcu_read_lock();
0225     ipc_lock_object(&ipcp->shm_perm);
0226 }
0227 
0228 static void shm_rcu_free(struct rcu_head *head)
0229 {
0230     struct kern_ipc_perm *ptr = container_of(head, struct kern_ipc_perm,
0231                             rcu);
0232     struct shmid_kernel *shp = container_of(ptr, struct shmid_kernel,
0233                             shm_perm);
0234     security_shm_free(&shp->shm_perm);
0235     kfree(shp);
0236 }
0237 
0238 /*
0239  * It has to be called with shp locked.
0240  * It must be called before ipc_rmid()
0241  */
0242 static inline void shm_clist_rm(struct shmid_kernel *shp)
0243 {
0244     struct task_struct *creator;
0245 
0246     /* ensure that shm_creator does not disappear */
0247     rcu_read_lock();
0248 
0249     /*
0250      * A concurrent exit_shm may do a list_del_init() as well.
0251      * Just do nothing if exit_shm already did the work
0252      */
0253     if (!list_empty(&shp->shm_clist)) {
0254         /*
0255          * shp->shm_creator is guaranteed to be valid *only*
0256          * if shp->shm_clist is not empty.
0257          */
0258         creator = shp->shm_creator;
0259 
0260         task_lock(creator);
0261         /*
0262          * list_del_init() is a nop if the entry was already removed
0263          * from the list.
0264          */
0265         list_del_init(&shp->shm_clist);
0266         task_unlock(creator);
0267     }
0268     rcu_read_unlock();
0269 }
0270 
0271 static inline void shm_rmid(struct shmid_kernel *s)
0272 {
0273     shm_clist_rm(s);
0274     ipc_rmid(&shm_ids(s->ns), &s->shm_perm);
0275 }
0276 
0277 
0278 static int __shm_open(struct vm_area_struct *vma)
0279 {
0280     struct file *file = vma->vm_file;
0281     struct shm_file_data *sfd = shm_file_data(file);
0282     struct shmid_kernel *shp;
0283 
0284     shp = shm_lock(sfd->ns, sfd->id);
0285 
0286     if (IS_ERR(shp))
0287         return PTR_ERR(shp);
0288 
0289     if (shp->shm_file != sfd->file) {
0290         /* ID was reused */
0291         shm_unlock(shp);
0292         return -EINVAL;
0293     }
0294 
0295     shp->shm_atim = ktime_get_real_seconds();
0296     ipc_update_pid(&shp->shm_lprid, task_tgid(current));
0297     shp->shm_nattch++;
0298     shm_unlock(shp);
0299     return 0;
0300 }
0301 
0302 /* This is called by fork, once for every shm attach. */
0303 static void shm_open(struct vm_area_struct *vma)
0304 {
0305     int err = __shm_open(vma);
0306     /*
0307      * We raced in the idr lookup or with shm_destroy().
0308      * Either way, the ID is busted.
0309      */
0310     WARN_ON_ONCE(err);
0311 }
0312 
0313 /*
0314  * shm_destroy - free the struct shmid_kernel
0315  *
0316  * @ns: namespace
0317  * @shp: struct to free
0318  *
0319  * It has to be called with shp and shm_ids.rwsem (writer) locked,
0320  * but returns with shp unlocked and freed.
0321  */
0322 static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
0323 {
0324     struct file *shm_file;
0325 
0326     shm_file = shp->shm_file;
0327     shp->shm_file = NULL;
0328     ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
0329     shm_rmid(shp);
0330     shm_unlock(shp);
0331     if (!is_file_hugepages(shm_file))
0332         shmem_lock(shm_file, 0, shp->mlock_ucounts);
0333     fput(shm_file);
0334     ipc_update_pid(&shp->shm_cprid, NULL);
0335     ipc_update_pid(&shp->shm_lprid, NULL);
0336     ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
0337 }
0338 
0339 /*
0340  * shm_may_destroy - identifies whether shm segment should be destroyed now
0341  *
0342  * Returns true if and only if there are no active users of the segment and
0343  * one of the following is true:
0344  *
0345  * 1) shmctl(id, IPC_RMID, NULL) was called for this shp
0346  *
0347  * 2) sysctl kernel.shm_rmid_forced is set to 1.
0348  */
0349 static bool shm_may_destroy(struct shmid_kernel *shp)
0350 {
0351     return (shp->shm_nattch == 0) &&
0352            (shp->ns->shm_rmid_forced ||
0353         (shp->shm_perm.mode & SHM_DEST));
0354 }
0355 
0356 /*
0357  * remove the attach descriptor vma.
0358  * free memory for segment if it is marked destroyed.
0359  * The descriptor has already been removed from the current->mm->mmap list
0360  * and will later be kfree()d.
0361  */
0362 static void shm_close(struct vm_area_struct *vma)
0363 {
0364     struct file *file = vma->vm_file;
0365     struct shm_file_data *sfd = shm_file_data(file);
0366     struct shmid_kernel *shp;
0367     struct ipc_namespace *ns = sfd->ns;
0368 
0369     down_write(&shm_ids(ns).rwsem);
0370     /* remove from the list of attaches of the shm segment */
0371     shp = shm_lock(ns, sfd->id);
0372 
0373     /*
0374      * We raced in the idr lookup or with shm_destroy().
0375      * Either way, the ID is busted.
0376      */
0377     if (WARN_ON_ONCE(IS_ERR(shp)))
0378         goto done; /* no-op */
0379 
0380     ipc_update_pid(&shp->shm_lprid, task_tgid(current));
0381     shp->shm_dtim = ktime_get_real_seconds();
0382     shp->shm_nattch--;
0383     if (shm_may_destroy(shp))
0384         shm_destroy(ns, shp);
0385     else
0386         shm_unlock(shp);
0387 done:
0388     up_write(&shm_ids(ns).rwsem);
0389 }
0390 
0391 /* Called with ns->shm_ids(ns).rwsem locked */
0392 static int shm_try_destroy_orphaned(int id, void *p, void *data)
0393 {
0394     struct ipc_namespace *ns = data;
0395     struct kern_ipc_perm *ipcp = p;
0396     struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm);
0397 
0398     /*
0399      * We want to destroy segments without users and with already
0400      * exit'ed originating process.
0401      *
0402      * As shp->* are changed under rwsem, it's safe to skip shp locking.
0403      */
0404     if (!list_empty(&shp->shm_clist))
0405         return 0;
0406 
0407     if (shm_may_destroy(shp)) {
0408         shm_lock_by_ptr(shp);
0409         shm_destroy(ns, shp);
0410     }
0411     return 0;
0412 }
0413 
0414 void shm_destroy_orphaned(struct ipc_namespace *ns)
0415 {
0416     down_write(&shm_ids(ns).rwsem);
0417     if (shm_ids(ns).in_use)
0418         idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns);
0419     up_write(&shm_ids(ns).rwsem);
0420 }
0421 
0422 /* Locking assumes this will only be called with task == current */
0423 void exit_shm(struct task_struct *task)
0424 {
0425     for (;;) {
0426         struct shmid_kernel *shp;
0427         struct ipc_namespace *ns;
0428 
0429         task_lock(task);
0430 
0431         if (list_empty(&task->sysvshm.shm_clist)) {
0432             task_unlock(task);
0433             break;
0434         }
0435 
0436         shp = list_first_entry(&task->sysvshm.shm_clist, struct shmid_kernel,
0437                 shm_clist);
0438 
0439         /*
0440          * 1) Get pointer to the ipc namespace. It is worth to say
0441          * that this pointer is guaranteed to be valid because
0442          * shp lifetime is always shorter than namespace lifetime
0443          * in which shp lives.
0444          * We taken task_lock it means that shp won't be freed.
0445          */
0446         ns = shp->ns;
0447 
0448         /*
0449          * 2) If kernel.shm_rmid_forced is not set then only keep track of
0450          * which shmids are orphaned, so that a later set of the sysctl
0451          * can clean them up.
0452          */
0453         if (!ns->shm_rmid_forced)
0454             goto unlink_continue;
0455 
0456         /*
0457          * 3) get a reference to the namespace.
0458          *    The refcount could be already 0. If it is 0, then
0459          *    the shm objects will be free by free_ipc_work().
0460          */
0461         ns = get_ipc_ns_not_zero(ns);
0462         if (!ns) {
0463 unlink_continue:
0464             list_del_init(&shp->shm_clist);
0465             task_unlock(task);
0466             continue;
0467         }
0468 
0469         /*
0470          * 4) get a reference to shp.
0471          *   This cannot fail: shm_clist_rm() is called before
0472          *   ipc_rmid(), thus the refcount cannot be 0.
0473          */
0474         WARN_ON(!ipc_rcu_getref(&shp->shm_perm));
0475 
0476         /*
0477          * 5) unlink the shm segment from the list of segments
0478          *    created by current.
0479          *    This must be done last. After unlinking,
0480          *    only the refcounts obtained above prevent IPC_RMID
0481          *    from destroying the segment or the namespace.
0482          */
0483         list_del_init(&shp->shm_clist);
0484 
0485         task_unlock(task);
0486 
0487         /*
0488          * 6) we have all references
0489          *    Thus lock & if needed destroy shp.
0490          */
0491         down_write(&shm_ids(ns).rwsem);
0492         shm_lock_by_ptr(shp);
0493         /*
0494          * rcu_read_lock was implicitly taken in shm_lock_by_ptr, it's
0495          * safe to call ipc_rcu_putref here
0496          */
0497         ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
0498 
0499         if (ipc_valid_object(&shp->shm_perm)) {
0500             if (shm_may_destroy(shp))
0501                 shm_destroy(ns, shp);
0502             else
0503                 shm_unlock(shp);
0504         } else {
0505             /*
0506              * Someone else deleted the shp from namespace
0507              * idr/kht while we have waited.
0508              * Just unlock and continue.
0509              */
0510             shm_unlock(shp);
0511         }
0512 
0513         up_write(&shm_ids(ns).rwsem);
0514         put_ipc_ns(ns); /* paired with get_ipc_ns_not_zero */
0515     }
0516 }
0517 
0518 static vm_fault_t shm_fault(struct vm_fault *vmf)
0519 {
0520     struct file *file = vmf->vma->vm_file;
0521     struct shm_file_data *sfd = shm_file_data(file);
0522 
0523     return sfd->vm_ops->fault(vmf);
0524 }
0525 
0526 static int shm_may_split(struct vm_area_struct *vma, unsigned long addr)
0527 {
0528     struct file *file = vma->vm_file;
0529     struct shm_file_data *sfd = shm_file_data(file);
0530 
0531     if (sfd->vm_ops->may_split)
0532         return sfd->vm_ops->may_split(vma, addr);
0533 
0534     return 0;
0535 }
0536 
0537 static unsigned long shm_pagesize(struct vm_area_struct *vma)
0538 {
0539     struct file *file = vma->vm_file;
0540     struct shm_file_data *sfd = shm_file_data(file);
0541 
0542     if (sfd->vm_ops->pagesize)
0543         return sfd->vm_ops->pagesize(vma);
0544 
0545     return PAGE_SIZE;
0546 }
0547 
0548 #ifdef CONFIG_NUMA
0549 static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
0550 {
0551     struct file *file = vma->vm_file;
0552     struct shm_file_data *sfd = shm_file_data(file);
0553     int err = 0;
0554 
0555     if (sfd->vm_ops->set_policy)
0556         err = sfd->vm_ops->set_policy(vma, new);
0557     return err;
0558 }
0559 
0560 static struct mempolicy *shm_get_policy(struct vm_area_struct *vma,
0561                     unsigned long addr)
0562 {
0563     struct file *file = vma->vm_file;
0564     struct shm_file_data *sfd = shm_file_data(file);
0565     struct mempolicy *pol = NULL;
0566 
0567     if (sfd->vm_ops->get_policy)
0568         pol = sfd->vm_ops->get_policy(vma, addr);
0569     else if (vma->vm_policy)
0570         pol = vma->vm_policy;
0571 
0572     return pol;
0573 }
0574 #endif
0575 
0576 static int shm_mmap(struct file *file, struct vm_area_struct *vma)
0577 {
0578     struct shm_file_data *sfd = shm_file_data(file);
0579     int ret;
0580 
0581     /*
0582      * In case of remap_file_pages() emulation, the file can represent an
0583      * IPC ID that was removed, and possibly even reused by another shm
0584      * segment already.  Propagate this case as an error to caller.
0585      */
0586     ret = __shm_open(vma);
0587     if (ret)
0588         return ret;
0589 
0590     ret = call_mmap(sfd->file, vma);
0591     if (ret) {
0592         shm_close(vma);
0593         return ret;
0594     }
0595     sfd->vm_ops = vma->vm_ops;
0596 #ifdef CONFIG_MMU
0597     WARN_ON(!sfd->vm_ops->fault);
0598 #endif
0599     vma->vm_ops = &shm_vm_ops;
0600     return 0;
0601 }
0602 
0603 static int shm_release(struct inode *ino, struct file *file)
0604 {
0605     struct shm_file_data *sfd = shm_file_data(file);
0606 
0607     put_ipc_ns(sfd->ns);
0608     fput(sfd->file);
0609     shm_file_data(file) = NULL;
0610     kfree(sfd);
0611     return 0;
0612 }
0613 
0614 static int shm_fsync(struct file *file, loff_t start, loff_t end, int datasync)
0615 {
0616     struct shm_file_data *sfd = shm_file_data(file);
0617 
0618     if (!sfd->file->f_op->fsync)
0619         return -EINVAL;
0620     return sfd->file->f_op->fsync(sfd->file, start, end, datasync);
0621 }
0622 
0623 static long shm_fallocate(struct file *file, int mode, loff_t offset,
0624               loff_t len)
0625 {
0626     struct shm_file_data *sfd = shm_file_data(file);
0627 
0628     if (!sfd->file->f_op->fallocate)
0629         return -EOPNOTSUPP;
0630     return sfd->file->f_op->fallocate(file, mode, offset, len);
0631 }
0632 
0633 static unsigned long shm_get_unmapped_area(struct file *file,
0634     unsigned long addr, unsigned long len, unsigned long pgoff,
0635     unsigned long flags)
0636 {
0637     struct shm_file_data *sfd = shm_file_data(file);
0638 
0639     return sfd->file->f_op->get_unmapped_area(sfd->file, addr, len,
0640                         pgoff, flags);
0641 }
0642 
0643 static const struct file_operations shm_file_operations = {
0644     .mmap       = shm_mmap,
0645     .fsync      = shm_fsync,
0646     .release    = shm_release,
0647     .get_unmapped_area  = shm_get_unmapped_area,
0648     .llseek     = noop_llseek,
0649     .fallocate  = shm_fallocate,
0650 };
0651 
0652 /*
0653  * shm_file_operations_huge is now identical to shm_file_operations,
0654  * but we keep it distinct for the sake of is_file_shm_hugepages().
0655  */
0656 static const struct file_operations shm_file_operations_huge = {
0657     .mmap       = shm_mmap,
0658     .fsync      = shm_fsync,
0659     .release    = shm_release,
0660     .get_unmapped_area  = shm_get_unmapped_area,
0661     .llseek     = noop_llseek,
0662     .fallocate  = shm_fallocate,
0663 };
0664 
0665 bool is_file_shm_hugepages(struct file *file)
0666 {
0667     return file->f_op == &shm_file_operations_huge;
0668 }
0669 
0670 static const struct vm_operations_struct shm_vm_ops = {
0671     .open   = shm_open, /* callback for a new vm-area open */
0672     .close  = shm_close,    /* callback for when the vm-area is released */
0673     .fault  = shm_fault,
0674     .may_split = shm_may_split,
0675     .pagesize = shm_pagesize,
0676 #if defined(CONFIG_NUMA)
0677     .set_policy = shm_set_policy,
0678     .get_policy = shm_get_policy,
0679 #endif
0680 };
0681 
0682 /**
0683  * newseg - Create a new shared memory segment
0684  * @ns: namespace
0685  * @params: ptr to the structure that contains key, size and shmflg
0686  *
0687  * Called with shm_ids.rwsem held as a writer.
0688  */
0689 static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
0690 {
0691     key_t key = params->key;
0692     int shmflg = params->flg;
0693     size_t size = params->u.size;
0694     int error;
0695     struct shmid_kernel *shp;
0696     size_t numpages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
0697     struct file *file;
0698     char name[13];
0699     vm_flags_t acctflag = 0;
0700 
0701     if (size < SHMMIN || size > ns->shm_ctlmax)
0702         return -EINVAL;
0703 
0704     if (numpages << PAGE_SHIFT < size)
0705         return -ENOSPC;
0706 
0707     if (ns->shm_tot + numpages < ns->shm_tot ||
0708             ns->shm_tot + numpages > ns->shm_ctlall)
0709         return -ENOSPC;
0710 
0711     shp = kmalloc(sizeof(*shp), GFP_KERNEL_ACCOUNT);
0712     if (unlikely(!shp))
0713         return -ENOMEM;
0714 
0715     shp->shm_perm.key = key;
0716     shp->shm_perm.mode = (shmflg & S_IRWXUGO);
0717     shp->mlock_ucounts = NULL;
0718 
0719     shp->shm_perm.security = NULL;
0720     error = security_shm_alloc(&shp->shm_perm);
0721     if (error) {
0722         kfree(shp);
0723         return error;
0724     }
0725 
0726     sprintf(name, "SYSV%08x", key);
0727     if (shmflg & SHM_HUGETLB) {
0728         struct hstate *hs;
0729         size_t hugesize;
0730 
0731         hs = hstate_sizelog((shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
0732         if (!hs) {
0733             error = -EINVAL;
0734             goto no_file;
0735         }
0736         hugesize = ALIGN(size, huge_page_size(hs));
0737 
0738         /* hugetlb_file_setup applies strict accounting */
0739         if (shmflg & SHM_NORESERVE)
0740             acctflag = VM_NORESERVE;
0741         file = hugetlb_file_setup(name, hugesize, acctflag,
0742                 HUGETLB_SHMFS_INODE, (shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
0743     } else {
0744         /*
0745          * Do not allow no accounting for OVERCOMMIT_NEVER, even
0746          * if it's asked for.
0747          */
0748         if  ((shmflg & SHM_NORESERVE) &&
0749                 sysctl_overcommit_memory != OVERCOMMIT_NEVER)
0750             acctflag = VM_NORESERVE;
0751         file = shmem_kernel_file_setup(name, size, acctflag);
0752     }
0753     error = PTR_ERR(file);
0754     if (IS_ERR(file))
0755         goto no_file;
0756 
0757     shp->shm_cprid = get_pid(task_tgid(current));
0758     shp->shm_lprid = NULL;
0759     shp->shm_atim = shp->shm_dtim = 0;
0760     shp->shm_ctim = ktime_get_real_seconds();
0761     shp->shm_segsz = size;
0762     shp->shm_nattch = 0;
0763     shp->shm_file = file;
0764     shp->shm_creator = current;
0765 
0766     /* ipc_addid() locks shp upon success. */
0767     error = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
0768     if (error < 0)
0769         goto no_id;
0770 
0771     shp->ns = ns;
0772 
0773     task_lock(current);
0774     list_add(&shp->shm_clist, &current->sysvshm.shm_clist);
0775     task_unlock(current);
0776 
0777     /*
0778      * shmid gets reported as "inode#" in /proc/pid/maps.
0779      * proc-ps tools use this. Changing this will break them.
0780      */
0781     file_inode(file)->i_ino = shp->shm_perm.id;
0782 
0783     ns->shm_tot += numpages;
0784     error = shp->shm_perm.id;
0785 
0786     ipc_unlock_object(&shp->shm_perm);
0787     rcu_read_unlock();
0788     return error;
0789 
0790 no_id:
0791     ipc_update_pid(&shp->shm_cprid, NULL);
0792     ipc_update_pid(&shp->shm_lprid, NULL);
0793     fput(file);
0794     ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
0795     return error;
0796 no_file:
0797     call_rcu(&shp->shm_perm.rcu, shm_rcu_free);
0798     return error;
0799 }
0800 
0801 /*
0802  * Called with shm_ids.rwsem and ipcp locked.
0803  */
0804 static int shm_more_checks(struct kern_ipc_perm *ipcp, struct ipc_params *params)
0805 {
0806     struct shmid_kernel *shp;
0807 
0808     shp = container_of(ipcp, struct shmid_kernel, shm_perm);
0809     if (shp->shm_segsz < params->u.size)
0810         return -EINVAL;
0811 
0812     return 0;
0813 }
0814 
0815 long ksys_shmget(key_t key, size_t size, int shmflg)
0816 {
0817     struct ipc_namespace *ns;
0818     static const struct ipc_ops shm_ops = {
0819         .getnew = newseg,
0820         .associate = security_shm_associate,
0821         .more_checks = shm_more_checks,
0822     };
0823     struct ipc_params shm_params;
0824 
0825     ns = current->nsproxy->ipc_ns;
0826 
0827     shm_params.key = key;
0828     shm_params.flg = shmflg;
0829     shm_params.u.size = size;
0830 
0831     return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params);
0832 }
0833 
0834 SYSCALL_DEFINE3(shmget, key_t, key, size_t, size, int, shmflg)
0835 {
0836     return ksys_shmget(key, size, shmflg);
0837 }
0838 
0839 static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version)
0840 {
0841     switch (version) {
0842     case IPC_64:
0843         return copy_to_user(buf, in, sizeof(*in));
0844     case IPC_OLD:
0845         {
0846         struct shmid_ds out;
0847 
0848         memset(&out, 0, sizeof(out));
0849         ipc64_perm_to_ipc_perm(&in->shm_perm, &out.shm_perm);
0850         out.shm_segsz   = in->shm_segsz;
0851         out.shm_atime   = in->shm_atime;
0852         out.shm_dtime   = in->shm_dtime;
0853         out.shm_ctime   = in->shm_ctime;
0854         out.shm_cpid    = in->shm_cpid;
0855         out.shm_lpid    = in->shm_lpid;
0856         out.shm_nattch  = in->shm_nattch;
0857 
0858         return copy_to_user(buf, &out, sizeof(out));
0859         }
0860     default:
0861         return -EINVAL;
0862     }
0863 }
0864 
0865 static inline unsigned long
0866 copy_shmid_from_user(struct shmid64_ds *out, void __user *buf, int version)
0867 {
0868     switch (version) {
0869     case IPC_64:
0870         if (copy_from_user(out, buf, sizeof(*out)))
0871             return -EFAULT;
0872         return 0;
0873     case IPC_OLD:
0874         {
0875         struct shmid_ds tbuf_old;
0876 
0877         if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
0878             return -EFAULT;
0879 
0880         out->shm_perm.uid   = tbuf_old.shm_perm.uid;
0881         out->shm_perm.gid   = tbuf_old.shm_perm.gid;
0882         out->shm_perm.mode  = tbuf_old.shm_perm.mode;
0883 
0884         return 0;
0885         }
0886     default:
0887         return -EINVAL;
0888     }
0889 }
0890 
0891 static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminfo64 *in, int version)
0892 {
0893     switch (version) {
0894     case IPC_64:
0895         return copy_to_user(buf, in, sizeof(*in));
0896     case IPC_OLD:
0897         {
0898         struct shminfo out;
0899 
0900         if (in->shmmax > INT_MAX)
0901             out.shmmax = INT_MAX;
0902         else
0903             out.shmmax = (int)in->shmmax;
0904 
0905         out.shmmin  = in->shmmin;
0906         out.shmmni  = in->shmmni;
0907         out.shmseg  = in->shmseg;
0908         out.shmall  = in->shmall;
0909 
0910         return copy_to_user(buf, &out, sizeof(out));
0911         }
0912     default:
0913         return -EINVAL;
0914     }
0915 }
0916 
0917 /*
0918  * Calculate and add used RSS and swap pages of a shm.
0919  * Called with shm_ids.rwsem held as a reader
0920  */
0921 static void shm_add_rss_swap(struct shmid_kernel *shp,
0922     unsigned long *rss_add, unsigned long *swp_add)
0923 {
0924     struct inode *inode;
0925 
0926     inode = file_inode(shp->shm_file);
0927 
0928     if (is_file_hugepages(shp->shm_file)) {
0929         struct address_space *mapping = inode->i_mapping;
0930         struct hstate *h = hstate_file(shp->shm_file);
0931         *rss_add += pages_per_huge_page(h) * mapping->nrpages;
0932     } else {
0933 #ifdef CONFIG_SHMEM
0934         struct shmem_inode_info *info = SHMEM_I(inode);
0935 
0936         spin_lock_irq(&info->lock);
0937         *rss_add += inode->i_mapping->nrpages;
0938         *swp_add += info->swapped;
0939         spin_unlock_irq(&info->lock);
0940 #else
0941         *rss_add += inode->i_mapping->nrpages;
0942 #endif
0943     }
0944 }
0945 
0946 /*
0947  * Called with shm_ids.rwsem held as a reader
0948  */
0949 static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
0950         unsigned long *swp)
0951 {
0952     int next_id;
0953     int total, in_use;
0954 
0955     *rss = 0;
0956     *swp = 0;
0957 
0958     in_use = shm_ids(ns).in_use;
0959 
0960     for (total = 0, next_id = 0; total < in_use; next_id++) {
0961         struct kern_ipc_perm *ipc;
0962         struct shmid_kernel *shp;
0963 
0964         ipc = idr_find(&shm_ids(ns).ipcs_idr, next_id);
0965         if (ipc == NULL)
0966             continue;
0967         shp = container_of(ipc, struct shmid_kernel, shm_perm);
0968 
0969         shm_add_rss_swap(shp, rss, swp);
0970 
0971         total++;
0972     }
0973 }
0974 
0975 /*
0976  * This function handles some shmctl commands which require the rwsem
0977  * to be held in write mode.
0978  * NOTE: no locks must be held, the rwsem is taken inside this function.
0979  */
0980 static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd,
0981                struct shmid64_ds *shmid64)
0982 {
0983     struct kern_ipc_perm *ipcp;
0984     struct shmid_kernel *shp;
0985     int err;
0986 
0987     down_write(&shm_ids(ns).rwsem);
0988     rcu_read_lock();
0989 
0990     ipcp = ipcctl_obtain_check(ns, &shm_ids(ns), shmid, cmd,
0991                       &shmid64->shm_perm, 0);
0992     if (IS_ERR(ipcp)) {
0993         err = PTR_ERR(ipcp);
0994         goto out_unlock1;
0995     }
0996 
0997     shp = container_of(ipcp, struct shmid_kernel, shm_perm);
0998 
0999     err = security_shm_shmctl(&shp->shm_perm, cmd);
1000     if (err)
1001         goto out_unlock1;
1002 
1003     switch (cmd) {
1004     case IPC_RMID:
1005         ipc_lock_object(&shp->shm_perm);
1006         /* do_shm_rmid unlocks the ipc object and rcu */
1007         do_shm_rmid(ns, ipcp);
1008         goto out_up;
1009     case IPC_SET:
1010         ipc_lock_object(&shp->shm_perm);
1011         err = ipc_update_perm(&shmid64->shm_perm, ipcp);
1012         if (err)
1013             goto out_unlock0;
1014         shp->shm_ctim = ktime_get_real_seconds();
1015         break;
1016     default:
1017         err = -EINVAL;
1018         goto out_unlock1;
1019     }
1020 
1021 out_unlock0:
1022     ipc_unlock_object(&shp->shm_perm);
1023 out_unlock1:
1024     rcu_read_unlock();
1025 out_up:
1026     up_write(&shm_ids(ns).rwsem);
1027     return err;
1028 }
1029 
1030 static int shmctl_ipc_info(struct ipc_namespace *ns,
1031                struct shminfo64 *shminfo)
1032 {
1033     int err = security_shm_shmctl(NULL, IPC_INFO);
1034     if (!err) {
1035         memset(shminfo, 0, sizeof(*shminfo));
1036         shminfo->shmmni = shminfo->shmseg = ns->shm_ctlmni;
1037         shminfo->shmmax = ns->shm_ctlmax;
1038         shminfo->shmall = ns->shm_ctlall;
1039         shminfo->shmmin = SHMMIN;
1040         down_read(&shm_ids(ns).rwsem);
1041         err = ipc_get_maxidx(&shm_ids(ns));
1042         up_read(&shm_ids(ns).rwsem);
1043         if (err < 0)
1044             err = 0;
1045     }
1046     return err;
1047 }
1048 
1049 static int shmctl_shm_info(struct ipc_namespace *ns,
1050                struct shm_info *shm_info)
1051 {
1052     int err = security_shm_shmctl(NULL, SHM_INFO);
1053     if (!err) {
1054         memset(shm_info, 0, sizeof(*shm_info));
1055         down_read(&shm_ids(ns).rwsem);
1056         shm_info->used_ids = shm_ids(ns).in_use;
1057         shm_get_stat(ns, &shm_info->shm_rss, &shm_info->shm_swp);
1058         shm_info->shm_tot = ns->shm_tot;
1059         shm_info->swap_attempts = 0;
1060         shm_info->swap_successes = 0;
1061         err = ipc_get_maxidx(&shm_ids(ns));
1062         up_read(&shm_ids(ns).rwsem);
1063         if (err < 0)
1064             err = 0;
1065     }
1066     return err;
1067 }
1068 
1069 static int shmctl_stat(struct ipc_namespace *ns, int shmid,
1070             int cmd, struct shmid64_ds *tbuf)
1071 {
1072     struct shmid_kernel *shp;
1073     int err;
1074 
1075     memset(tbuf, 0, sizeof(*tbuf));
1076 
1077     rcu_read_lock();
1078     if (cmd == SHM_STAT || cmd == SHM_STAT_ANY) {
1079         shp = shm_obtain_object(ns, shmid);
1080         if (IS_ERR(shp)) {
1081             err = PTR_ERR(shp);
1082             goto out_unlock;
1083         }
1084     } else { /* IPC_STAT */
1085         shp = shm_obtain_object_check(ns, shmid);
1086         if (IS_ERR(shp)) {
1087             err = PTR_ERR(shp);
1088             goto out_unlock;
1089         }
1090     }
1091 
1092     /*
1093      * Semantically SHM_STAT_ANY ought to be identical to
1094      * that functionality provided by the /proc/sysvipc/
1095      * interface. As such, only audit these calls and
1096      * do not do traditional S_IRUGO permission checks on
1097      * the ipc object.
1098      */
1099     if (cmd == SHM_STAT_ANY)
1100         audit_ipc_obj(&shp->shm_perm);
1101     else {
1102         err = -EACCES;
1103         if (ipcperms(ns, &shp->shm_perm, S_IRUGO))
1104             goto out_unlock;
1105     }
1106 
1107     err = security_shm_shmctl(&shp->shm_perm, cmd);
1108     if (err)
1109         goto out_unlock;
1110 
1111     ipc_lock_object(&shp->shm_perm);
1112 
1113     if (!ipc_valid_object(&shp->shm_perm)) {
1114         ipc_unlock_object(&shp->shm_perm);
1115         err = -EIDRM;
1116         goto out_unlock;
1117     }
1118 
1119     kernel_to_ipc64_perm(&shp->shm_perm, &tbuf->shm_perm);
1120     tbuf->shm_segsz = shp->shm_segsz;
1121     tbuf->shm_atime = shp->shm_atim;
1122     tbuf->shm_dtime = shp->shm_dtim;
1123     tbuf->shm_ctime = shp->shm_ctim;
1124 #ifndef CONFIG_64BIT
1125     tbuf->shm_atime_high = shp->shm_atim >> 32;
1126     tbuf->shm_dtime_high = shp->shm_dtim >> 32;
1127     tbuf->shm_ctime_high = shp->shm_ctim >> 32;
1128 #endif
1129     tbuf->shm_cpid  = pid_vnr(shp->shm_cprid);
1130     tbuf->shm_lpid  = pid_vnr(shp->shm_lprid);
1131     tbuf->shm_nattch = shp->shm_nattch;
1132 
1133     if (cmd == IPC_STAT) {
1134         /*
1135          * As defined in SUS:
1136          * Return 0 on success
1137          */
1138         err = 0;
1139     } else {
1140         /*
1141          * SHM_STAT and SHM_STAT_ANY (both Linux specific)
1142          * Return the full id, including the sequence number
1143          */
1144         err = shp->shm_perm.id;
1145     }
1146 
1147     ipc_unlock_object(&shp->shm_perm);
1148 out_unlock:
1149     rcu_read_unlock();
1150     return err;
1151 }
1152 
1153 static int shmctl_do_lock(struct ipc_namespace *ns, int shmid, int cmd)
1154 {
1155     struct shmid_kernel *shp;
1156     struct file *shm_file;
1157     int err;
1158 
1159     rcu_read_lock();
1160     shp = shm_obtain_object_check(ns, shmid);
1161     if (IS_ERR(shp)) {
1162         err = PTR_ERR(shp);
1163         goto out_unlock1;
1164     }
1165 
1166     audit_ipc_obj(&(shp->shm_perm));
1167     err = security_shm_shmctl(&shp->shm_perm, cmd);
1168     if (err)
1169         goto out_unlock1;
1170 
1171     ipc_lock_object(&shp->shm_perm);
1172 
1173     /* check if shm_destroy() is tearing down shp */
1174     if (!ipc_valid_object(&shp->shm_perm)) {
1175         err = -EIDRM;
1176         goto out_unlock0;
1177     }
1178 
1179     if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) {
1180         kuid_t euid = current_euid();
1181 
1182         if (!uid_eq(euid, shp->shm_perm.uid) &&
1183             !uid_eq(euid, shp->shm_perm.cuid)) {
1184             err = -EPERM;
1185             goto out_unlock0;
1186         }
1187         if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) {
1188             err = -EPERM;
1189             goto out_unlock0;
1190         }
1191     }
1192 
1193     shm_file = shp->shm_file;
1194     if (is_file_hugepages(shm_file))
1195         goto out_unlock0;
1196 
1197     if (cmd == SHM_LOCK) {
1198         struct ucounts *ucounts = current_ucounts();
1199 
1200         err = shmem_lock(shm_file, 1, ucounts);
1201         if (!err && !(shp->shm_perm.mode & SHM_LOCKED)) {
1202             shp->shm_perm.mode |= SHM_LOCKED;
1203             shp->mlock_ucounts = ucounts;
1204         }
1205         goto out_unlock0;
1206     }
1207 
1208     /* SHM_UNLOCK */
1209     if (!(shp->shm_perm.mode & SHM_LOCKED))
1210         goto out_unlock0;
1211     shmem_lock(shm_file, 0, shp->mlock_ucounts);
1212     shp->shm_perm.mode &= ~SHM_LOCKED;
1213     shp->mlock_ucounts = NULL;
1214     get_file(shm_file);
1215     ipc_unlock_object(&shp->shm_perm);
1216     rcu_read_unlock();
1217     shmem_unlock_mapping(shm_file->f_mapping);
1218 
1219     fput(shm_file);
1220     return err;
1221 
1222 out_unlock0:
1223     ipc_unlock_object(&shp->shm_perm);
1224 out_unlock1:
1225     rcu_read_unlock();
1226     return err;
1227 }
1228 
1229 static long ksys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf, int version)
1230 {
1231     int err;
1232     struct ipc_namespace *ns;
1233     struct shmid64_ds sem64;
1234 
1235     if (cmd < 0 || shmid < 0)
1236         return -EINVAL;
1237 
1238     ns = current->nsproxy->ipc_ns;
1239 
1240     switch (cmd) {
1241     case IPC_INFO: {
1242         struct shminfo64 shminfo;
1243         err = shmctl_ipc_info(ns, &shminfo);
1244         if (err < 0)
1245             return err;
1246         if (copy_shminfo_to_user(buf, &shminfo, version))
1247             err = -EFAULT;
1248         return err;
1249     }
1250     case SHM_INFO: {
1251         struct shm_info shm_info;
1252         err = shmctl_shm_info(ns, &shm_info);
1253         if (err < 0)
1254             return err;
1255         if (copy_to_user(buf, &shm_info, sizeof(shm_info)))
1256             err = -EFAULT;
1257         return err;
1258     }
1259     case SHM_STAT:
1260     case SHM_STAT_ANY:
1261     case IPC_STAT: {
1262         err = shmctl_stat(ns, shmid, cmd, &sem64);
1263         if (err < 0)
1264             return err;
1265         if (copy_shmid_to_user(buf, &sem64, version))
1266             err = -EFAULT;
1267         return err;
1268     }
1269     case IPC_SET:
1270         if (copy_shmid_from_user(&sem64, buf, version))
1271             return -EFAULT;
1272         fallthrough;
1273     case IPC_RMID:
1274         return shmctl_down(ns, shmid, cmd, &sem64);
1275     case SHM_LOCK:
1276     case SHM_UNLOCK:
1277         return shmctl_do_lock(ns, shmid, cmd);
1278     default:
1279         return -EINVAL;
1280     }
1281 }
1282 
1283 SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
1284 {
1285     return ksys_shmctl(shmid, cmd, buf, IPC_64);
1286 }
1287 
1288 #ifdef CONFIG_ARCH_WANT_IPC_PARSE_VERSION
1289 long ksys_old_shmctl(int shmid, int cmd, struct shmid_ds __user *buf)
1290 {
1291     int version = ipc_parse_version(&cmd);
1292 
1293     return ksys_shmctl(shmid, cmd, buf, version);
1294 }
1295 
1296 SYSCALL_DEFINE3(old_shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
1297 {
1298     return ksys_old_shmctl(shmid, cmd, buf);
1299 }
1300 #endif
1301 
1302 #ifdef CONFIG_COMPAT
1303 
1304 struct compat_shmid_ds {
1305     struct compat_ipc_perm shm_perm;
1306     int shm_segsz;
1307     old_time32_t shm_atime;
1308     old_time32_t shm_dtime;
1309     old_time32_t shm_ctime;
1310     compat_ipc_pid_t shm_cpid;
1311     compat_ipc_pid_t shm_lpid;
1312     unsigned short shm_nattch;
1313     unsigned short shm_unused;
1314     compat_uptr_t shm_unused2;
1315     compat_uptr_t shm_unused3;
1316 };
1317 
1318 struct compat_shminfo64 {
1319     compat_ulong_t shmmax;
1320     compat_ulong_t shmmin;
1321     compat_ulong_t shmmni;
1322     compat_ulong_t shmseg;
1323     compat_ulong_t shmall;
1324     compat_ulong_t __unused1;
1325     compat_ulong_t __unused2;
1326     compat_ulong_t __unused3;
1327     compat_ulong_t __unused4;
1328 };
1329 
1330 struct compat_shm_info {
1331     compat_int_t used_ids;
1332     compat_ulong_t shm_tot, shm_rss, shm_swp;
1333     compat_ulong_t swap_attempts, swap_successes;
1334 };
1335 
1336 static int copy_compat_shminfo_to_user(void __user *buf, struct shminfo64 *in,
1337                     int version)
1338 {
1339     if (in->shmmax > INT_MAX)
1340         in->shmmax = INT_MAX;
1341     if (version == IPC_64) {
1342         struct compat_shminfo64 info;
1343         memset(&info, 0, sizeof(info));
1344         info.shmmax = in->shmmax;
1345         info.shmmin = in->shmmin;
1346         info.shmmni = in->shmmni;
1347         info.shmseg = in->shmseg;
1348         info.shmall = in->shmall;
1349         return copy_to_user(buf, &info, sizeof(info));
1350     } else {
1351         struct shminfo info;
1352         memset(&info, 0, sizeof(info));
1353         info.shmmax = in->shmmax;
1354         info.shmmin = in->shmmin;
1355         info.shmmni = in->shmmni;
1356         info.shmseg = in->shmseg;
1357         info.shmall = in->shmall;
1358         return copy_to_user(buf, &info, sizeof(info));
1359     }
1360 }
1361 
1362 static int put_compat_shm_info(struct shm_info *ip,
1363                 struct compat_shm_info __user *uip)
1364 {
1365     struct compat_shm_info info;
1366 
1367     memset(&info, 0, sizeof(info));
1368     info.used_ids = ip->used_ids;
1369     info.shm_tot = ip->shm_tot;
1370     info.shm_rss = ip->shm_rss;
1371     info.shm_swp = ip->shm_swp;
1372     info.swap_attempts = ip->swap_attempts;
1373     info.swap_successes = ip->swap_successes;
1374     return copy_to_user(uip, &info, sizeof(info));
1375 }
1376 
1377 static int copy_compat_shmid_to_user(void __user *buf, struct shmid64_ds *in,
1378                     int version)
1379 {
1380     if (version == IPC_64) {
1381         struct compat_shmid64_ds v;
1382         memset(&v, 0, sizeof(v));
1383         to_compat_ipc64_perm(&v.shm_perm, &in->shm_perm);
1384         v.shm_atime  = lower_32_bits(in->shm_atime);
1385         v.shm_atime_high = upper_32_bits(in->shm_atime);
1386         v.shm_dtime  = lower_32_bits(in->shm_dtime);
1387         v.shm_dtime_high = upper_32_bits(in->shm_dtime);
1388         v.shm_ctime  = lower_32_bits(in->shm_ctime);
1389         v.shm_ctime_high = upper_32_bits(in->shm_ctime);
1390         v.shm_segsz = in->shm_segsz;
1391         v.shm_nattch = in->shm_nattch;
1392         v.shm_cpid = in->shm_cpid;
1393         v.shm_lpid = in->shm_lpid;
1394         return copy_to_user(buf, &v, sizeof(v));
1395     } else {
1396         struct compat_shmid_ds v;
1397         memset(&v, 0, sizeof(v));
1398         to_compat_ipc_perm(&v.shm_perm, &in->shm_perm);
1399         v.shm_perm.key = in->shm_perm.key;
1400         v.shm_atime = in->shm_atime;
1401         v.shm_dtime = in->shm_dtime;
1402         v.shm_ctime = in->shm_ctime;
1403         v.shm_segsz = in->shm_segsz;
1404         v.shm_nattch = in->shm_nattch;
1405         v.shm_cpid = in->shm_cpid;
1406         v.shm_lpid = in->shm_lpid;
1407         return copy_to_user(buf, &v, sizeof(v));
1408     }
1409 }
1410 
1411 static int copy_compat_shmid_from_user(struct shmid64_ds *out, void __user *buf,
1412                     int version)
1413 {
1414     memset(out, 0, sizeof(*out));
1415     if (version == IPC_64) {
1416         struct compat_shmid64_ds __user *p = buf;
1417         return get_compat_ipc64_perm(&out->shm_perm, &p->shm_perm);
1418     } else {
1419         struct compat_shmid_ds __user *p = buf;
1420         return get_compat_ipc_perm(&out->shm_perm, &p->shm_perm);
1421     }
1422 }
1423 
1424 static long compat_ksys_shmctl(int shmid, int cmd, void __user *uptr, int version)
1425 {
1426     struct ipc_namespace *ns;
1427     struct shmid64_ds sem64;
1428     int err;
1429 
1430     ns = current->nsproxy->ipc_ns;
1431 
1432     if (cmd < 0 || shmid < 0)
1433         return -EINVAL;
1434 
1435     switch (cmd) {
1436     case IPC_INFO: {
1437         struct shminfo64 shminfo;
1438         err = shmctl_ipc_info(ns, &shminfo);
1439         if (err < 0)
1440             return err;
1441         if (copy_compat_shminfo_to_user(uptr, &shminfo, version))
1442             err = -EFAULT;
1443         return err;
1444     }
1445     case SHM_INFO: {
1446         struct shm_info shm_info;
1447         err = shmctl_shm_info(ns, &shm_info);
1448         if (err < 0)
1449             return err;
1450         if (put_compat_shm_info(&shm_info, uptr))
1451             err = -EFAULT;
1452         return err;
1453     }
1454     case IPC_STAT:
1455     case SHM_STAT_ANY:
1456     case SHM_STAT:
1457         err = shmctl_stat(ns, shmid, cmd, &sem64);
1458         if (err < 0)
1459             return err;
1460         if (copy_compat_shmid_to_user(uptr, &sem64, version))
1461             err = -EFAULT;
1462         return err;
1463 
1464     case IPC_SET:
1465         if (copy_compat_shmid_from_user(&sem64, uptr, version))
1466             return -EFAULT;
1467         fallthrough;
1468     case IPC_RMID:
1469         return shmctl_down(ns, shmid, cmd, &sem64);
1470     case SHM_LOCK:
1471     case SHM_UNLOCK:
1472         return shmctl_do_lock(ns, shmid, cmd);
1473     default:
1474         return -EINVAL;
1475     }
1476     return err;
1477 }
1478 
1479 COMPAT_SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, void __user *, uptr)
1480 {
1481     return compat_ksys_shmctl(shmid, cmd, uptr, IPC_64);
1482 }
1483 
1484 #ifdef CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION
1485 long compat_ksys_old_shmctl(int shmid, int cmd, void __user *uptr)
1486 {
1487     int version = compat_ipc_parse_version(&cmd);
1488 
1489     return compat_ksys_shmctl(shmid, cmd, uptr, version);
1490 }
1491 
1492 COMPAT_SYSCALL_DEFINE3(old_shmctl, int, shmid, int, cmd, void __user *, uptr)
1493 {
1494     return compat_ksys_old_shmctl(shmid, cmd, uptr);
1495 }
1496 #endif
1497 #endif
1498 
1499 /*
1500  * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists.
1501  *
1502  * NOTE! Despite the name, this is NOT a direct system call entrypoint. The
1503  * "raddr" thing points to kernel space, and there has to be a wrapper around
1504  * this.
1505  */
1506 long do_shmat(int shmid, char __user *shmaddr, int shmflg,
1507           ulong *raddr, unsigned long shmlba)
1508 {
1509     struct shmid_kernel *shp;
1510     unsigned long addr = (unsigned long)shmaddr;
1511     unsigned long size;
1512     struct file *file, *base;
1513     int    err;
1514     unsigned long flags = MAP_SHARED;
1515     unsigned long prot;
1516     int acc_mode;
1517     struct ipc_namespace *ns;
1518     struct shm_file_data *sfd;
1519     int f_flags;
1520     unsigned long populate = 0;
1521 
1522     err = -EINVAL;
1523     if (shmid < 0)
1524         goto out;
1525 
1526     if (addr) {
1527         if (addr & (shmlba - 1)) {
1528             if (shmflg & SHM_RND) {
1529                 addr &= ~(shmlba - 1);  /* round down */
1530 
1531                 /*
1532                  * Ensure that the round-down is non-nil
1533                  * when remapping. This can happen for
1534                  * cases when addr < shmlba.
1535                  */
1536                 if (!addr && (shmflg & SHM_REMAP))
1537                     goto out;
1538             } else
1539 #ifndef __ARCH_FORCE_SHMLBA
1540                 if (addr & ~PAGE_MASK)
1541 #endif
1542                     goto out;
1543         }
1544 
1545         flags |= MAP_FIXED;
1546     } else if ((shmflg & SHM_REMAP))
1547         goto out;
1548 
1549     if (shmflg & SHM_RDONLY) {
1550         prot = PROT_READ;
1551         acc_mode = S_IRUGO;
1552         f_flags = O_RDONLY;
1553     } else {
1554         prot = PROT_READ | PROT_WRITE;
1555         acc_mode = S_IRUGO | S_IWUGO;
1556         f_flags = O_RDWR;
1557     }
1558     if (shmflg & SHM_EXEC) {
1559         prot |= PROT_EXEC;
1560         acc_mode |= S_IXUGO;
1561     }
1562 
1563     /*
1564      * We cannot rely on the fs check since SYSV IPC does have an
1565      * additional creator id...
1566      */
1567     ns = current->nsproxy->ipc_ns;
1568     rcu_read_lock();
1569     shp = shm_obtain_object_check(ns, shmid);
1570     if (IS_ERR(shp)) {
1571         err = PTR_ERR(shp);
1572         goto out_unlock;
1573     }
1574 
1575     err = -EACCES;
1576     if (ipcperms(ns, &shp->shm_perm, acc_mode))
1577         goto out_unlock;
1578 
1579     err = security_shm_shmat(&shp->shm_perm, shmaddr, shmflg);
1580     if (err)
1581         goto out_unlock;
1582 
1583     ipc_lock_object(&shp->shm_perm);
1584 
1585     /* check if shm_destroy() is tearing down shp */
1586     if (!ipc_valid_object(&shp->shm_perm)) {
1587         ipc_unlock_object(&shp->shm_perm);
1588         err = -EIDRM;
1589         goto out_unlock;
1590     }
1591 
1592     /*
1593      * We need to take a reference to the real shm file to prevent the
1594      * pointer from becoming stale in cases where the lifetime of the outer
1595      * file extends beyond that of the shm segment.  It's not usually
1596      * possible, but it can happen during remap_file_pages() emulation as
1597      * that unmaps the memory, then does ->mmap() via file reference only.
1598      * We'll deny the ->mmap() if the shm segment was since removed, but to
1599      * detect shm ID reuse we need to compare the file pointers.
1600      */
1601     base = get_file(shp->shm_file);
1602     shp->shm_nattch++;
1603     size = i_size_read(file_inode(base));
1604     ipc_unlock_object(&shp->shm_perm);
1605     rcu_read_unlock();
1606 
1607     err = -ENOMEM;
1608     sfd = kzalloc(sizeof(*sfd), GFP_KERNEL);
1609     if (!sfd) {
1610         fput(base);
1611         goto out_nattch;
1612     }
1613 
1614     file = alloc_file_clone(base, f_flags,
1615               is_file_hugepages(base) ?
1616                 &shm_file_operations_huge :
1617                 &shm_file_operations);
1618     err = PTR_ERR(file);
1619     if (IS_ERR(file)) {
1620         kfree(sfd);
1621         fput(base);
1622         goto out_nattch;
1623     }
1624 
1625     sfd->id = shp->shm_perm.id;
1626     sfd->ns = get_ipc_ns(ns);
1627     sfd->file = base;
1628     sfd->vm_ops = NULL;
1629     file->private_data = sfd;
1630 
1631     err = security_mmap_file(file, prot, flags);
1632     if (err)
1633         goto out_fput;
1634 
1635     if (mmap_write_lock_killable(current->mm)) {
1636         err = -EINTR;
1637         goto out_fput;
1638     }
1639 
1640     if (addr && !(shmflg & SHM_REMAP)) {
1641         err = -EINVAL;
1642         if (addr + size < addr)
1643             goto invalid;
1644 
1645         if (find_vma_intersection(current->mm, addr, addr + size))
1646             goto invalid;
1647     }
1648 
1649     addr = do_mmap(file, addr, size, prot, flags, 0, &populate, NULL);
1650     *raddr = addr;
1651     err = 0;
1652     if (IS_ERR_VALUE(addr))
1653         err = (long)addr;
1654 invalid:
1655     mmap_write_unlock(current->mm);
1656     if (populate)
1657         mm_populate(addr, populate);
1658 
1659 out_fput:
1660     fput(file);
1661 
1662 out_nattch:
1663     down_write(&shm_ids(ns).rwsem);
1664     shp = shm_lock(ns, shmid);
1665     shp->shm_nattch--;
1666 
1667     if (shm_may_destroy(shp))
1668         shm_destroy(ns, shp);
1669     else
1670         shm_unlock(shp);
1671     up_write(&shm_ids(ns).rwsem);
1672     return err;
1673 
1674 out_unlock:
1675     rcu_read_unlock();
1676 out:
1677     return err;
1678 }
1679 
1680 SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg)
1681 {
1682     unsigned long ret;
1683     long err;
1684 
1685     err = do_shmat(shmid, shmaddr, shmflg, &ret, SHMLBA);
1686     if (err)
1687         return err;
1688     force_successful_syscall_return();
1689     return (long)ret;
1690 }
1691 
1692 #ifdef CONFIG_COMPAT
1693 
1694 #ifndef COMPAT_SHMLBA
1695 #define COMPAT_SHMLBA   SHMLBA
1696 #endif
1697 
1698 COMPAT_SYSCALL_DEFINE3(shmat, int, shmid, compat_uptr_t, shmaddr, int, shmflg)
1699 {
1700     unsigned long ret;
1701     long err;
1702 
1703     err = do_shmat(shmid, compat_ptr(shmaddr), shmflg, &ret, COMPAT_SHMLBA);
1704     if (err)
1705         return err;
1706     force_successful_syscall_return();
1707     return (long)ret;
1708 }
1709 #endif
1710 
1711 /*
1712  * detach and kill segment if marked destroyed.
1713  * The work is done in shm_close.
1714  */
1715 long ksys_shmdt(char __user *shmaddr)
1716 {
1717     struct mm_struct *mm = current->mm;
1718     struct vm_area_struct *vma;
1719     unsigned long addr = (unsigned long)shmaddr;
1720     int retval = -EINVAL;
1721 #ifdef CONFIG_MMU
1722     loff_t size = 0;
1723     struct file *file;
1724     struct vm_area_struct *next;
1725 #endif
1726 
1727     if (addr & ~PAGE_MASK)
1728         return retval;
1729 
1730     if (mmap_write_lock_killable(mm))
1731         return -EINTR;
1732 
1733     /*
1734      * This function tries to be smart and unmap shm segments that
1735      * were modified by partial mlock or munmap calls:
1736      * - It first determines the size of the shm segment that should be
1737      *   unmapped: It searches for a vma that is backed by shm and that
1738      *   started at address shmaddr. It records it's size and then unmaps
1739      *   it.
1740      * - Then it unmaps all shm vmas that started at shmaddr and that
1741      *   are within the initially determined size and that are from the
1742      *   same shm segment from which we determined the size.
1743      * Errors from do_munmap are ignored: the function only fails if
1744      * it's called with invalid parameters or if it's called to unmap
1745      * a part of a vma. Both calls in this function are for full vmas,
1746      * the parameters are directly copied from the vma itself and always
1747      * valid - therefore do_munmap cannot fail. (famous last words?)
1748      */
1749     /*
1750      * If it had been mremap()'d, the starting address would not
1751      * match the usual checks anyway. So assume all vma's are
1752      * above the starting address given.
1753      */
1754     vma = find_vma(mm, addr);
1755 
1756 #ifdef CONFIG_MMU
1757     while (vma) {
1758         next = vma->vm_next;
1759 
1760         /*
1761          * Check if the starting address would match, i.e. it's
1762          * a fragment created by mprotect() and/or munmap(), or it
1763          * otherwise it starts at this address with no hassles.
1764          */
1765         if ((vma->vm_ops == &shm_vm_ops) &&
1766             (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) {
1767 
1768             /*
1769              * Record the file of the shm segment being
1770              * unmapped.  With mremap(), someone could place
1771              * page from another segment but with equal offsets
1772              * in the range we are unmapping.
1773              */
1774             file = vma->vm_file;
1775             size = i_size_read(file_inode(vma->vm_file));
1776             do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
1777             /*
1778              * We discovered the size of the shm segment, so
1779              * break out of here and fall through to the next
1780              * loop that uses the size information to stop
1781              * searching for matching vma's.
1782              */
1783             retval = 0;
1784             vma = next;
1785             break;
1786         }
1787         vma = next;
1788     }
1789 
1790     /*
1791      * We need look no further than the maximum address a fragment
1792      * could possibly have landed at. Also cast things to loff_t to
1793      * prevent overflows and make comparisons vs. equal-width types.
1794      */
1795     size = PAGE_ALIGN(size);
1796     while (vma && (loff_t)(vma->vm_end - addr) <= size) {
1797         next = vma->vm_next;
1798 
1799         /* finding a matching vma now does not alter retval */
1800         if ((vma->vm_ops == &shm_vm_ops) &&
1801             ((vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) &&
1802             (vma->vm_file == file))
1803             do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
1804         vma = next;
1805     }
1806 
1807 #else   /* CONFIG_MMU */
1808     /* under NOMMU conditions, the exact address to be destroyed must be
1809      * given
1810      */
1811     if (vma && vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) {
1812         do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
1813         retval = 0;
1814     }
1815 
1816 #endif
1817 
1818     mmap_write_unlock(mm);
1819     return retval;
1820 }
1821 
1822 SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
1823 {
1824     return ksys_shmdt(shmaddr);
1825 }
1826 
1827 #ifdef CONFIG_PROC_FS
1828 static int sysvipc_shm_proc_show(struct seq_file *s, void *it)
1829 {
1830     struct pid_namespace *pid_ns = ipc_seq_pid_ns(s);
1831     struct user_namespace *user_ns = seq_user_ns(s);
1832     struct kern_ipc_perm *ipcp = it;
1833     struct shmid_kernel *shp;
1834     unsigned long rss = 0, swp = 0;
1835 
1836     shp = container_of(ipcp, struct shmid_kernel, shm_perm);
1837     shm_add_rss_swap(shp, &rss, &swp);
1838 
1839 #if BITS_PER_LONG <= 32
1840 #define SIZE_SPEC "%10lu"
1841 #else
1842 #define SIZE_SPEC "%21lu"
1843 #endif
1844 
1845     seq_printf(s,
1846            "%10d %10d  %4o " SIZE_SPEC " %5u %5u  "
1847            "%5lu %5u %5u %5u %5u %10llu %10llu %10llu "
1848            SIZE_SPEC " " SIZE_SPEC "\n",
1849            shp->shm_perm.key,
1850            shp->shm_perm.id,
1851            shp->shm_perm.mode,
1852            shp->shm_segsz,
1853            pid_nr_ns(shp->shm_cprid, pid_ns),
1854            pid_nr_ns(shp->shm_lprid, pid_ns),
1855            shp->shm_nattch,
1856            from_kuid_munged(user_ns, shp->shm_perm.uid),
1857            from_kgid_munged(user_ns, shp->shm_perm.gid),
1858            from_kuid_munged(user_ns, shp->shm_perm.cuid),
1859            from_kgid_munged(user_ns, shp->shm_perm.cgid),
1860            shp->shm_atim,
1861            shp->shm_dtim,
1862            shp->shm_ctim,
1863            rss * PAGE_SIZE,
1864            swp * PAGE_SIZE);
1865 
1866     return 0;
1867 }
1868 #endif