Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * Generic pidhash and scalable, time-bounded PID allocator
0004  *
0005  * (C) 2002-2003 Nadia Yvette Chambers, IBM
0006  * (C) 2004 Nadia Yvette Chambers, Oracle
0007  * (C) 2002-2004 Ingo Molnar, Red Hat
0008  *
0009  * pid-structures are backing objects for tasks sharing a given ID to chain
0010  * against. There is very little to them aside from hashing them and
0011  * parking tasks using given ID's on a list.
0012  *
0013  * The hash is always changed with the tasklist_lock write-acquired,
0014  * and the hash is only accessed with the tasklist_lock at least
0015  * read-acquired, so there's no additional SMP locking needed here.
0016  *
0017  * We have a list of bitmap pages, which bitmaps represent the PID space.
0018  * Allocating and freeing PIDs is completely lockless. The worst-case
0019  * allocation scenario when all but one out of 1 million PIDs possible are
0020  * allocated already: the scanning of 32 list entries and at most PAGE_SIZE
0021  * bytes. The typical fastpath is a single successful setbit. Freeing is O(1).
0022  *
0023  * Pid namespaces:
0024  *    (C) 2007 Pavel Emelyanov <xemul@openvz.org>, OpenVZ, SWsoft Inc.
0025  *    (C) 2007 Sukadev Bhattiprolu <sukadev@us.ibm.com>, IBM
0026  *     Many thanks to Oleg Nesterov for comments and help
0027  *
0028  */
0029 
0030 #include <linux/mm.h>
0031 #include <linux/export.h>
0032 #include <linux/slab.h>
0033 #include <linux/init.h>
0034 #include <linux/rculist.h>
0035 #include <linux/memblock.h>
0036 #include <linux/pid_namespace.h>
0037 #include <linux/init_task.h>
0038 #include <linux/syscalls.h>
0039 #include <linux/proc_ns.h>
0040 #include <linux/refcount.h>
0041 #include <linux/anon_inodes.h>
0042 #include <linux/sched/signal.h>
0043 #include <linux/sched/task.h>
0044 #include <linux/idr.h>
0045 #include <net/sock.h>
0046 #include <uapi/linux/pidfd.h>
0047 
0048 struct pid init_struct_pid = {
0049     .count      = REFCOUNT_INIT(1),
0050     .tasks      = {
0051         { .first = NULL },
0052         { .first = NULL },
0053         { .first = NULL },
0054     },
0055     .level      = 0,
0056     .numbers    = { {
0057         .nr     = 0,
0058         .ns     = &init_pid_ns,
0059     }, }
0060 };
0061 
0062 int pid_max = PID_MAX_DEFAULT;
0063 
0064 #define RESERVED_PIDS       300
0065 
0066 int pid_max_min = RESERVED_PIDS + 1;
0067 int pid_max_max = PID_MAX_LIMIT;
0068 
0069 /*
0070  * PID-map pages start out as NULL, they get allocated upon
0071  * first use and are never deallocated. This way a low pid_max
0072  * value does not cause lots of bitmaps to be allocated, but
0073  * the scheme scales to up to 4 million PIDs, runtime.
0074  */
0075 struct pid_namespace init_pid_ns = {
0076     .ns.count = REFCOUNT_INIT(2),
0077     .idr = IDR_INIT(init_pid_ns.idr),
0078     .pid_allocated = PIDNS_ADDING,
0079     .level = 0,
0080     .child_reaper = &init_task,
0081     .user_ns = &init_user_ns,
0082     .ns.inum = PROC_PID_INIT_INO,
0083 #ifdef CONFIG_PID_NS
0084     .ns.ops = &pidns_operations,
0085 #endif
0086 };
0087 EXPORT_SYMBOL_GPL(init_pid_ns);
0088 
0089 /*
0090  * Note: disable interrupts while the pidmap_lock is held as an
0091  * interrupt might come in and do read_lock(&tasklist_lock).
0092  *
0093  * If we don't disable interrupts there is a nasty deadlock between
0094  * detach_pid()->free_pid() and another cpu that does
0095  * spin_lock(&pidmap_lock) followed by an interrupt routine that does
0096  * read_lock(&tasklist_lock);
0097  *
0098  * After we clean up the tasklist_lock and know there are no
0099  * irq handlers that take it we can leave the interrupts enabled.
0100  * For now it is easier to be safe than to prove it can't happen.
0101  */
0102 
0103 static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
0104 
0105 void put_pid(struct pid *pid)
0106 {
0107     struct pid_namespace *ns;
0108 
0109     if (!pid)
0110         return;
0111 
0112     ns = pid->numbers[pid->level].ns;
0113     if (refcount_dec_and_test(&pid->count)) {
0114         kmem_cache_free(ns->pid_cachep, pid);
0115         put_pid_ns(ns);
0116     }
0117 }
0118 EXPORT_SYMBOL_GPL(put_pid);
0119 
0120 static void delayed_put_pid(struct rcu_head *rhp)
0121 {
0122     struct pid *pid = container_of(rhp, struct pid, rcu);
0123     put_pid(pid);
0124 }
0125 
0126 void free_pid(struct pid *pid)
0127 {
0128     /* We can be called with write_lock_irq(&tasklist_lock) held */
0129     int i;
0130     unsigned long flags;
0131 
0132     spin_lock_irqsave(&pidmap_lock, flags);
0133     for (i = 0; i <= pid->level; i++) {
0134         struct upid *upid = pid->numbers + i;
0135         struct pid_namespace *ns = upid->ns;
0136         switch (--ns->pid_allocated) {
0137         case 2:
0138         case 1:
0139             /* When all that is left in the pid namespace
0140              * is the reaper wake up the reaper.  The reaper
0141              * may be sleeping in zap_pid_ns_processes().
0142              */
0143             wake_up_process(ns->child_reaper);
0144             break;
0145         case PIDNS_ADDING:
0146             /* Handle a fork failure of the first process */
0147             WARN_ON(ns->child_reaper);
0148             ns->pid_allocated = 0;
0149             break;
0150         }
0151 
0152         idr_remove(&ns->idr, upid->nr);
0153     }
0154     spin_unlock_irqrestore(&pidmap_lock, flags);
0155 
0156     call_rcu(&pid->rcu, delayed_put_pid);
0157 }
0158 
0159 struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
0160               size_t set_tid_size)
0161 {
0162     struct pid *pid;
0163     enum pid_type type;
0164     int i, nr;
0165     struct pid_namespace *tmp;
0166     struct upid *upid;
0167     int retval = -ENOMEM;
0168 
0169     /*
0170      * set_tid_size contains the size of the set_tid array. Starting at
0171      * the most nested currently active PID namespace it tells alloc_pid()
0172      * which PID to set for a process in that most nested PID namespace
0173      * up to set_tid_size PID namespaces. It does not have to set the PID
0174      * for a process in all nested PID namespaces but set_tid_size must
0175      * never be greater than the current ns->level + 1.
0176      */
0177     if (set_tid_size > ns->level + 1)
0178         return ERR_PTR(-EINVAL);
0179 
0180     pid = kmem_cache_alloc(ns->pid_cachep, GFP_KERNEL);
0181     if (!pid)
0182         return ERR_PTR(retval);
0183 
0184     tmp = ns;
0185     pid->level = ns->level;
0186 
0187     for (i = ns->level; i >= 0; i--) {
0188         int tid = 0;
0189 
0190         if (set_tid_size) {
0191             tid = set_tid[ns->level - i];
0192 
0193             retval = -EINVAL;
0194             if (tid < 1 || tid >= pid_max)
0195                 goto out_free;
0196             /*
0197              * Also fail if a PID != 1 is requested and
0198              * no PID 1 exists.
0199              */
0200             if (tid != 1 && !tmp->child_reaper)
0201                 goto out_free;
0202             retval = -EPERM;
0203             if (!checkpoint_restore_ns_capable(tmp->user_ns))
0204                 goto out_free;
0205             set_tid_size--;
0206         }
0207 
0208         idr_preload(GFP_KERNEL);
0209         spin_lock_irq(&pidmap_lock);
0210 
0211         if (tid) {
0212             nr = idr_alloc(&tmp->idr, NULL, tid,
0213                        tid + 1, GFP_ATOMIC);
0214             /*
0215              * If ENOSPC is returned it means that the PID is
0216              * alreay in use. Return EEXIST in that case.
0217              */
0218             if (nr == -ENOSPC)
0219                 nr = -EEXIST;
0220         } else {
0221             int pid_min = 1;
0222             /*
0223              * init really needs pid 1, but after reaching the
0224              * maximum wrap back to RESERVED_PIDS
0225              */
0226             if (idr_get_cursor(&tmp->idr) > RESERVED_PIDS)
0227                 pid_min = RESERVED_PIDS;
0228 
0229             /*
0230              * Store a null pointer so find_pid_ns does not find
0231              * a partially initialized PID (see below).
0232              */
0233             nr = idr_alloc_cyclic(&tmp->idr, NULL, pid_min,
0234                           pid_max, GFP_ATOMIC);
0235         }
0236         spin_unlock_irq(&pidmap_lock);
0237         idr_preload_end();
0238 
0239         if (nr < 0) {
0240             retval = (nr == -ENOSPC) ? -EAGAIN : nr;
0241             goto out_free;
0242         }
0243 
0244         pid->numbers[i].nr = nr;
0245         pid->numbers[i].ns = tmp;
0246         tmp = tmp->parent;
0247     }
0248 
0249     /*
0250      * ENOMEM is not the most obvious choice especially for the case
0251      * where the child subreaper has already exited and the pid
0252      * namespace denies the creation of any new processes. But ENOMEM
0253      * is what we have exposed to userspace for a long time and it is
0254      * documented behavior for pid namespaces. So we can't easily
0255      * change it even if there were an error code better suited.
0256      */
0257     retval = -ENOMEM;
0258 
0259     get_pid_ns(ns);
0260     refcount_set(&pid->count, 1);
0261     spin_lock_init(&pid->lock);
0262     for (type = 0; type < PIDTYPE_MAX; ++type)
0263         INIT_HLIST_HEAD(&pid->tasks[type]);
0264 
0265     init_waitqueue_head(&pid->wait_pidfd);
0266     INIT_HLIST_HEAD(&pid->inodes);
0267 
0268     upid = pid->numbers + ns->level;
0269     spin_lock_irq(&pidmap_lock);
0270     if (!(ns->pid_allocated & PIDNS_ADDING))
0271         goto out_unlock;
0272     for ( ; upid >= pid->numbers; --upid) {
0273         /* Make the PID visible to find_pid_ns. */
0274         idr_replace(&upid->ns->idr, pid, upid->nr);
0275         upid->ns->pid_allocated++;
0276     }
0277     spin_unlock_irq(&pidmap_lock);
0278 
0279     return pid;
0280 
0281 out_unlock:
0282     spin_unlock_irq(&pidmap_lock);
0283     put_pid_ns(ns);
0284 
0285 out_free:
0286     spin_lock_irq(&pidmap_lock);
0287     while (++i <= ns->level) {
0288         upid = pid->numbers + i;
0289         idr_remove(&upid->ns->idr, upid->nr);
0290     }
0291 
0292     /* On failure to allocate the first pid, reset the state */
0293     if (ns->pid_allocated == PIDNS_ADDING)
0294         idr_set_cursor(&ns->idr, 0);
0295 
0296     spin_unlock_irq(&pidmap_lock);
0297 
0298     kmem_cache_free(ns->pid_cachep, pid);
0299     return ERR_PTR(retval);
0300 }
0301 
0302 void disable_pid_allocation(struct pid_namespace *ns)
0303 {
0304     spin_lock_irq(&pidmap_lock);
0305     ns->pid_allocated &= ~PIDNS_ADDING;
0306     spin_unlock_irq(&pidmap_lock);
0307 }
0308 
0309 struct pid *find_pid_ns(int nr, struct pid_namespace *ns)
0310 {
0311     return idr_find(&ns->idr, nr);
0312 }
0313 EXPORT_SYMBOL_GPL(find_pid_ns);
0314 
0315 struct pid *find_vpid(int nr)
0316 {
0317     return find_pid_ns(nr, task_active_pid_ns(current));
0318 }
0319 EXPORT_SYMBOL_GPL(find_vpid);
0320 
0321 static struct pid **task_pid_ptr(struct task_struct *task, enum pid_type type)
0322 {
0323     return (type == PIDTYPE_PID) ?
0324         &task->thread_pid :
0325         &task->signal->pids[type];
0326 }
0327 
0328 /*
0329  * attach_pid() must be called with the tasklist_lock write-held.
0330  */
0331 void attach_pid(struct task_struct *task, enum pid_type type)
0332 {
0333     struct pid *pid = *task_pid_ptr(task, type);
0334     hlist_add_head_rcu(&task->pid_links[type], &pid->tasks[type]);
0335 }
0336 
0337 static void __change_pid(struct task_struct *task, enum pid_type type,
0338             struct pid *new)
0339 {
0340     struct pid **pid_ptr = task_pid_ptr(task, type);
0341     struct pid *pid;
0342     int tmp;
0343 
0344     pid = *pid_ptr;
0345 
0346     hlist_del_rcu(&task->pid_links[type]);
0347     *pid_ptr = new;
0348 
0349     for (tmp = PIDTYPE_MAX; --tmp >= 0; )
0350         if (pid_has_task(pid, tmp))
0351             return;
0352 
0353     free_pid(pid);
0354 }
0355 
0356 void detach_pid(struct task_struct *task, enum pid_type type)
0357 {
0358     __change_pid(task, type, NULL);
0359 }
0360 
0361 void change_pid(struct task_struct *task, enum pid_type type,
0362         struct pid *pid)
0363 {
0364     __change_pid(task, type, pid);
0365     attach_pid(task, type);
0366 }
0367 
0368 void exchange_tids(struct task_struct *left, struct task_struct *right)
0369 {
0370     struct pid *pid1 = left->thread_pid;
0371     struct pid *pid2 = right->thread_pid;
0372     struct hlist_head *head1 = &pid1->tasks[PIDTYPE_PID];
0373     struct hlist_head *head2 = &pid2->tasks[PIDTYPE_PID];
0374 
0375     /* Swap the single entry tid lists */
0376     hlists_swap_heads_rcu(head1, head2);
0377 
0378     /* Swap the per task_struct pid */
0379     rcu_assign_pointer(left->thread_pid, pid2);
0380     rcu_assign_pointer(right->thread_pid, pid1);
0381 
0382     /* Swap the cached value */
0383     WRITE_ONCE(left->pid, pid_nr(pid2));
0384     WRITE_ONCE(right->pid, pid_nr(pid1));
0385 }
0386 
0387 /* transfer_pid is an optimization of attach_pid(new), detach_pid(old) */
0388 void transfer_pid(struct task_struct *old, struct task_struct *new,
0389                enum pid_type type)
0390 {
0391     if (type == PIDTYPE_PID)
0392         new->thread_pid = old->thread_pid;
0393     hlist_replace_rcu(&old->pid_links[type], &new->pid_links[type]);
0394 }
0395 
0396 struct task_struct *pid_task(struct pid *pid, enum pid_type type)
0397 {
0398     struct task_struct *result = NULL;
0399     if (pid) {
0400         struct hlist_node *first;
0401         first = rcu_dereference_check(hlist_first_rcu(&pid->tasks[type]),
0402                           lockdep_tasklist_lock_is_held());
0403         if (first)
0404             result = hlist_entry(first, struct task_struct, pid_links[(type)]);
0405     }
0406     return result;
0407 }
0408 EXPORT_SYMBOL(pid_task);
0409 
0410 /*
0411  * Must be called under rcu_read_lock().
0412  */
0413 struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns)
0414 {
0415     RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
0416              "find_task_by_pid_ns() needs rcu_read_lock() protection");
0417     return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID);
0418 }
0419 
0420 struct task_struct *find_task_by_vpid(pid_t vnr)
0421 {
0422     return find_task_by_pid_ns(vnr, task_active_pid_ns(current));
0423 }
0424 
0425 struct task_struct *find_get_task_by_vpid(pid_t nr)
0426 {
0427     struct task_struct *task;
0428 
0429     rcu_read_lock();
0430     task = find_task_by_vpid(nr);
0431     if (task)
0432         get_task_struct(task);
0433     rcu_read_unlock();
0434 
0435     return task;
0436 }
0437 
0438 struct pid *get_task_pid(struct task_struct *task, enum pid_type type)
0439 {
0440     struct pid *pid;
0441     rcu_read_lock();
0442     pid = get_pid(rcu_dereference(*task_pid_ptr(task, type)));
0443     rcu_read_unlock();
0444     return pid;
0445 }
0446 EXPORT_SYMBOL_GPL(get_task_pid);
0447 
0448 struct task_struct *get_pid_task(struct pid *pid, enum pid_type type)
0449 {
0450     struct task_struct *result;
0451     rcu_read_lock();
0452     result = pid_task(pid, type);
0453     if (result)
0454         get_task_struct(result);
0455     rcu_read_unlock();
0456     return result;
0457 }
0458 EXPORT_SYMBOL_GPL(get_pid_task);
0459 
0460 struct pid *find_get_pid(pid_t nr)
0461 {
0462     struct pid *pid;
0463 
0464     rcu_read_lock();
0465     pid = get_pid(find_vpid(nr));
0466     rcu_read_unlock();
0467 
0468     return pid;
0469 }
0470 EXPORT_SYMBOL_GPL(find_get_pid);
0471 
0472 pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
0473 {
0474     struct upid *upid;
0475     pid_t nr = 0;
0476 
0477     if (pid && ns->level <= pid->level) {
0478         upid = &pid->numbers[ns->level];
0479         if (upid->ns == ns)
0480             nr = upid->nr;
0481     }
0482     return nr;
0483 }
0484 EXPORT_SYMBOL_GPL(pid_nr_ns);
0485 
0486 pid_t pid_vnr(struct pid *pid)
0487 {
0488     return pid_nr_ns(pid, task_active_pid_ns(current));
0489 }
0490 EXPORT_SYMBOL_GPL(pid_vnr);
0491 
0492 pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
0493             struct pid_namespace *ns)
0494 {
0495     pid_t nr = 0;
0496 
0497     rcu_read_lock();
0498     if (!ns)
0499         ns = task_active_pid_ns(current);
0500     nr = pid_nr_ns(rcu_dereference(*task_pid_ptr(task, type)), ns);
0501     rcu_read_unlock();
0502 
0503     return nr;
0504 }
0505 EXPORT_SYMBOL(__task_pid_nr_ns);
0506 
0507 struct pid_namespace *task_active_pid_ns(struct task_struct *tsk)
0508 {
0509     return ns_of_pid(task_pid(tsk));
0510 }
0511 EXPORT_SYMBOL_GPL(task_active_pid_ns);
0512 
0513 /*
0514  * Used by proc to find the first pid that is greater than or equal to nr.
0515  *
0516  * If there is a pid at nr this function is exactly the same as find_pid_ns.
0517  */
0518 struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
0519 {
0520     return idr_get_next(&ns->idr, &nr);
0521 }
0522 
0523 struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags)
0524 {
0525     struct fd f;
0526     struct pid *pid;
0527 
0528     f = fdget(fd);
0529     if (!f.file)
0530         return ERR_PTR(-EBADF);
0531 
0532     pid = pidfd_pid(f.file);
0533     if (!IS_ERR(pid)) {
0534         get_pid(pid);
0535         *flags = f.file->f_flags;
0536     }
0537 
0538     fdput(f);
0539     return pid;
0540 }
0541 
0542 /**
0543  * pidfd_get_task() - Get the task associated with a pidfd
0544  *
0545  * @pidfd: pidfd for which to get the task
0546  * @flags: flags associated with this pidfd
0547  *
0548  * Return the task associated with @pidfd. The function takes a reference on
0549  * the returned task. The caller is responsible for releasing that reference.
0550  *
0551  * Currently, the process identified by @pidfd is always a thread-group leader.
0552  * This restriction currently exists for all aspects of pidfds including pidfd
0553  * creation (CLONE_PIDFD cannot be used with CLONE_THREAD) and pidfd polling
0554  * (only supports thread group leaders).
0555  *
0556  * Return: On success, the task_struct associated with the pidfd.
0557  *     On error, a negative errno number will be returned.
0558  */
0559 struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags)
0560 {
0561     unsigned int f_flags;
0562     struct pid *pid;
0563     struct task_struct *task;
0564 
0565     pid = pidfd_get_pid(pidfd, &f_flags);
0566     if (IS_ERR(pid))
0567         return ERR_CAST(pid);
0568 
0569     task = get_pid_task(pid, PIDTYPE_TGID);
0570     put_pid(pid);
0571     if (!task)
0572         return ERR_PTR(-ESRCH);
0573 
0574     *flags = f_flags;
0575     return task;
0576 }
0577 
0578 /**
0579  * pidfd_create() - Create a new pid file descriptor.
0580  *
0581  * @pid:   struct pid that the pidfd will reference
0582  * @flags: flags to pass
0583  *
0584  * This creates a new pid file descriptor with the O_CLOEXEC flag set.
0585  *
0586  * Note, that this function can only be called after the fd table has
0587  * been unshared to avoid leaking the pidfd to the new process.
0588  *
0589  * This symbol should not be explicitly exported to loadable modules.
0590  *
0591  * Return: On success, a cloexec pidfd is returned.
0592  *         On error, a negative errno number will be returned.
0593  */
0594 int pidfd_create(struct pid *pid, unsigned int flags)
0595 {
0596     int fd;
0597 
0598     if (!pid || !pid_has_task(pid, PIDTYPE_TGID))
0599         return -EINVAL;
0600 
0601     if (flags & ~(O_NONBLOCK | O_RDWR | O_CLOEXEC))
0602         return -EINVAL;
0603 
0604     fd = anon_inode_getfd("[pidfd]", &pidfd_fops, get_pid(pid),
0605                   flags | O_RDWR | O_CLOEXEC);
0606     if (fd < 0)
0607         put_pid(pid);
0608 
0609     return fd;
0610 }
0611 
0612 /**
0613  * pidfd_open() - Open new pid file descriptor.
0614  *
0615  * @pid:   pid for which to retrieve a pidfd
0616  * @flags: flags to pass
0617  *
0618  * This creates a new pid file descriptor with the O_CLOEXEC flag set for
0619  * the process identified by @pid. Currently, the process identified by
0620  * @pid must be a thread-group leader. This restriction currently exists
0621  * for all aspects of pidfds including pidfd creation (CLONE_PIDFD cannot
0622  * be used with CLONE_THREAD) and pidfd polling (only supports thread group
0623  * leaders).
0624  *
0625  * Return: On success, a cloexec pidfd is returned.
0626  *         On error, a negative errno number will be returned.
0627  */
0628 SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags)
0629 {
0630     int fd;
0631     struct pid *p;
0632 
0633     if (flags & ~PIDFD_NONBLOCK)
0634         return -EINVAL;
0635 
0636     if (pid <= 0)
0637         return -EINVAL;
0638 
0639     p = find_get_pid(pid);
0640     if (!p)
0641         return -ESRCH;
0642 
0643     fd = pidfd_create(p, flags);
0644 
0645     put_pid(p);
0646     return fd;
0647 }
0648 
0649 void __init pid_idr_init(void)
0650 {
0651     /* Verify no one has done anything silly: */
0652     BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_ADDING);
0653 
0654     /* bump default and minimum pid_max based on number of cpus */
0655     pid_max = min(pid_max_max, max_t(int, pid_max,
0656                 PIDS_PER_CPU_DEFAULT * num_possible_cpus()));
0657     pid_max_min = max_t(int, pid_max_min,
0658                 PIDS_PER_CPU_MIN * num_possible_cpus());
0659     pr_info("pid_max: default: %u minimum: %u\n", pid_max, pid_max_min);
0660 
0661     idr_init(&init_pid_ns.idr);
0662 
0663     init_pid_ns.pid_cachep = KMEM_CACHE(pid,
0664             SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT);
0665 }
0666 
0667 static struct file *__pidfd_fget(struct task_struct *task, int fd)
0668 {
0669     struct file *file;
0670     int ret;
0671 
0672     ret = down_read_killable(&task->signal->exec_update_lock);
0673     if (ret)
0674         return ERR_PTR(ret);
0675 
0676     if (ptrace_may_access(task, PTRACE_MODE_ATTACH_REALCREDS))
0677         file = fget_task(task, fd);
0678     else
0679         file = ERR_PTR(-EPERM);
0680 
0681     up_read(&task->signal->exec_update_lock);
0682 
0683     return file ?: ERR_PTR(-EBADF);
0684 }
0685 
0686 static int pidfd_getfd(struct pid *pid, int fd)
0687 {
0688     struct task_struct *task;
0689     struct file *file;
0690     int ret;
0691 
0692     task = get_pid_task(pid, PIDTYPE_PID);
0693     if (!task)
0694         return -ESRCH;
0695 
0696     file = __pidfd_fget(task, fd);
0697     put_task_struct(task);
0698     if (IS_ERR(file))
0699         return PTR_ERR(file);
0700 
0701     ret = receive_fd(file, O_CLOEXEC);
0702     fput(file);
0703 
0704     return ret;
0705 }
0706 
0707 /**
0708  * sys_pidfd_getfd() - Get a file descriptor from another process
0709  *
0710  * @pidfd:  the pidfd file descriptor of the process
0711  * @fd:     the file descriptor number to get
0712  * @flags:  flags on how to get the fd (reserved)
0713  *
0714  * This syscall gets a copy of a file descriptor from another process
0715  * based on the pidfd, and file descriptor number. It requires that
0716  * the calling process has the ability to ptrace the process represented
0717  * by the pidfd. The process which is having its file descriptor copied
0718  * is otherwise unaffected.
0719  *
0720  * Return: On success, a cloexec file descriptor is returned.
0721  *         On error, a negative errno number will be returned.
0722  */
0723 SYSCALL_DEFINE3(pidfd_getfd, int, pidfd, int, fd,
0724         unsigned int, flags)
0725 {
0726     struct pid *pid;
0727     struct fd f;
0728     int ret;
0729 
0730     /* flags is currently unused - make sure it's unset */
0731     if (flags)
0732         return -EINVAL;
0733 
0734     f = fdget(pidfd);
0735     if (!f.file)
0736         return -EBADF;
0737 
0738     pid = pidfd_pid(f.file);
0739     if (IS_ERR(pid))
0740         ret = PTR_ERR(pid);
0741     else
0742         ret = pidfd_getfd(pid, fd);
0743 
0744     fdput(f);
0745     return ret;
0746 }