0001
0002
0003
0004
0005 #include <linux/capability.h>
0006 #include <linux/audit.h>
0007 #include <linux/init.h>
0008 #include <linux/kernel.h>
0009 #include <linux/lsm_hooks.h>
0010 #include <linux/file.h>
0011 #include <linux/mm.h>
0012 #include <linux/mman.h>
0013 #include <linux/pagemap.h>
0014 #include <linux/swap.h>
0015 #include <linux/skbuff.h>
0016 #include <linux/netlink.h>
0017 #include <linux/ptrace.h>
0018 #include <linux/xattr.h>
0019 #include <linux/hugetlb.h>
0020 #include <linux/mount.h>
0021 #include <linux/sched.h>
0022 #include <linux/prctl.h>
0023 #include <linux/securebits.h>
0024 #include <linux/user_namespace.h>
0025 #include <linux/binfmts.h>
0026 #include <linux/personality.h>
0027 #include <linux/mnt_idmapping.h>
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040 static void warn_setuid_and_fcaps_mixed(const char *fname)
0041 {
0042 static int warned;
0043 if (!warned) {
0044 printk(KERN_INFO "warning: `%s' has both setuid-root and"
0045 " effective capabilities. Therefore not raising all"
0046 " capabilities.\n", fname);
0047 warned = 1;
0048 }
0049 }
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066 int cap_capable(const struct cred *cred, struct user_namespace *targ_ns,
0067 int cap, unsigned int opts)
0068 {
0069 struct user_namespace *ns = targ_ns;
0070
0071
0072
0073
0074
0075 for (;;) {
0076
0077 if (ns == cred->user_ns)
0078 return cap_raised(cred->cap_effective, cap) ? 0 : -EPERM;
0079
0080
0081
0082
0083
0084 if (ns->level <= cred->user_ns->level)
0085 return -EPERM;
0086
0087
0088
0089
0090
0091 if ((ns->parent == cred->user_ns) && uid_eq(ns->owner, cred->euid))
0092 return 0;
0093
0094
0095
0096
0097
0098 ns = ns->parent;
0099 }
0100
0101
0102 }
0103
0104
0105
0106
0107
0108
0109
0110
0111
0112 int cap_settime(const struct timespec64 *ts, const struct timezone *tz)
0113 {
0114 if (!capable(CAP_SYS_TIME))
0115 return -EPERM;
0116 return 0;
0117 }
0118
0119
0120
0121
0122
0123
0124
0125
0126
0127
0128
0129
0130
0131
0132
0133
0134 int cap_ptrace_access_check(struct task_struct *child, unsigned int mode)
0135 {
0136 int ret = 0;
0137 const struct cred *cred, *child_cred;
0138 const kernel_cap_t *caller_caps;
0139
0140 rcu_read_lock();
0141 cred = current_cred();
0142 child_cred = __task_cred(child);
0143 if (mode & PTRACE_MODE_FSCREDS)
0144 caller_caps = &cred->cap_effective;
0145 else
0146 caller_caps = &cred->cap_permitted;
0147 if (cred->user_ns == child_cred->user_ns &&
0148 cap_issubset(child_cred->cap_permitted, *caller_caps))
0149 goto out;
0150 if (ns_capable(child_cred->user_ns, CAP_SYS_PTRACE))
0151 goto out;
0152 ret = -EPERM;
0153 out:
0154 rcu_read_unlock();
0155 return ret;
0156 }
0157
0158
0159
0160
0161
0162
0163
0164
0165
0166
0167
0168
0169
0170
0171 int cap_ptrace_traceme(struct task_struct *parent)
0172 {
0173 int ret = 0;
0174 const struct cred *cred, *child_cred;
0175
0176 rcu_read_lock();
0177 cred = __task_cred(parent);
0178 child_cred = current_cred();
0179 if (cred->user_ns == child_cred->user_ns &&
0180 cap_issubset(child_cred->cap_permitted, cred->cap_permitted))
0181 goto out;
0182 if (has_ns_capability(parent, child_cred->user_ns, CAP_SYS_PTRACE))
0183 goto out;
0184 ret = -EPERM;
0185 out:
0186 rcu_read_unlock();
0187 return ret;
0188 }
0189
0190
0191
0192
0193
0194
0195
0196
0197
0198
0199
0200 int cap_capget(struct task_struct *target, kernel_cap_t *effective,
0201 kernel_cap_t *inheritable, kernel_cap_t *permitted)
0202 {
0203 const struct cred *cred;
0204
0205
0206 rcu_read_lock();
0207 cred = __task_cred(target);
0208 *effective = cred->cap_effective;
0209 *inheritable = cred->cap_inheritable;
0210 *permitted = cred->cap_permitted;
0211 rcu_read_unlock();
0212 return 0;
0213 }
0214
0215
0216
0217
0218
0219 static inline int cap_inh_is_capped(void)
0220 {
0221
0222
0223
0224 if (cap_capable(current_cred(), current_cred()->user_ns,
0225 CAP_SETPCAP, CAP_OPT_NONE) == 0)
0226 return 0;
0227 return 1;
0228 }
0229
0230
0231
0232
0233
0234
0235
0236
0237
0238
0239
0240
0241
0242 int cap_capset(struct cred *new,
0243 const struct cred *old,
0244 const kernel_cap_t *effective,
0245 const kernel_cap_t *inheritable,
0246 const kernel_cap_t *permitted)
0247 {
0248 if (cap_inh_is_capped() &&
0249 !cap_issubset(*inheritable,
0250 cap_combine(old->cap_inheritable,
0251 old->cap_permitted)))
0252
0253 return -EPERM;
0254
0255 if (!cap_issubset(*inheritable,
0256 cap_combine(old->cap_inheritable,
0257 old->cap_bset)))
0258
0259 return -EPERM;
0260
0261
0262 if (!cap_issubset(*permitted, old->cap_permitted))
0263 return -EPERM;
0264
0265
0266 if (!cap_issubset(*effective, *permitted))
0267 return -EPERM;
0268
0269 new->cap_effective = *effective;
0270 new->cap_inheritable = *inheritable;
0271 new->cap_permitted = *permitted;
0272
0273
0274
0275
0276
0277 new->cap_ambient = cap_intersect(new->cap_ambient,
0278 cap_intersect(*permitted,
0279 *inheritable));
0280 if (WARN_ON(!cap_ambient_invariant_ok(new)))
0281 return -EINVAL;
0282 return 0;
0283 }
0284
0285
0286
0287
0288
0289
0290
0291
0292
0293
0294
0295
0296 int cap_inode_need_killpriv(struct dentry *dentry)
0297 {
0298 struct inode *inode = d_backing_inode(dentry);
0299 int error;
0300
0301 error = __vfs_getxattr(dentry, inode, XATTR_NAME_CAPS, NULL, 0);
0302 return error > 0;
0303 }
0304
0305
0306
0307
0308
0309
0310
0311
0312
0313
0314
0315
0316
0317
0318
0319
0320
0321 int cap_inode_killpriv(struct user_namespace *mnt_userns, struct dentry *dentry)
0322 {
0323 int error;
0324
0325 error = __vfs_removexattr(mnt_userns, dentry, XATTR_NAME_CAPS);
0326 if (error == -EOPNOTSUPP)
0327 error = 0;
0328 return error;
0329 }
0330
0331 static bool rootid_owns_currentns(kuid_t kroot)
0332 {
0333 struct user_namespace *ns;
0334
0335 if (!uid_valid(kroot))
0336 return false;
0337
0338 for (ns = current_user_ns(); ; ns = ns->parent) {
0339 if (from_kuid(ns, kroot) == 0)
0340 return true;
0341 if (ns == &init_user_ns)
0342 break;
0343 }
0344
0345 return false;
0346 }
0347
0348 static __u32 sansflags(__u32 m)
0349 {
0350 return m & ~VFS_CAP_FLAGS_EFFECTIVE;
0351 }
0352
0353 static bool is_v2header(size_t size, const struct vfs_cap_data *cap)
0354 {
0355 if (size != XATTR_CAPS_SZ_2)
0356 return false;
0357 return sansflags(le32_to_cpu(cap->magic_etc)) == VFS_CAP_REVISION_2;
0358 }
0359
0360 static bool is_v3header(size_t size, const struct vfs_cap_data *cap)
0361 {
0362 if (size != XATTR_CAPS_SZ_3)
0363 return false;
0364 return sansflags(le32_to_cpu(cap->magic_etc)) == VFS_CAP_REVISION_3;
0365 }
0366
0367
0368
0369
0370
0371
0372
0373
0374
0375
0376
0377
0378 int cap_inode_getsecurity(struct user_namespace *mnt_userns,
0379 struct inode *inode, const char *name, void **buffer,
0380 bool alloc)
0381 {
0382 int size, ret;
0383 kuid_t kroot;
0384 u32 nsmagic, magic;
0385 uid_t root, mappedroot;
0386 char *tmpbuf = NULL;
0387 struct vfs_cap_data *cap;
0388 struct vfs_ns_cap_data *nscap = NULL;
0389 struct dentry *dentry;
0390 struct user_namespace *fs_ns;
0391
0392 if (strcmp(name, "capability") != 0)
0393 return -EOPNOTSUPP;
0394
0395 dentry = d_find_any_alias(inode);
0396 if (!dentry)
0397 return -EINVAL;
0398
0399 size = sizeof(struct vfs_ns_cap_data);
0400 ret = (int)vfs_getxattr_alloc(mnt_userns, dentry, XATTR_NAME_CAPS,
0401 &tmpbuf, size, GFP_NOFS);
0402 dput(dentry);
0403
0404 if (ret < 0 || !tmpbuf)
0405 return ret;
0406
0407 fs_ns = inode->i_sb->s_user_ns;
0408 cap = (struct vfs_cap_data *) tmpbuf;
0409 if (is_v2header((size_t) ret, cap)) {
0410 root = 0;
0411 } else if (is_v3header((size_t) ret, cap)) {
0412 nscap = (struct vfs_ns_cap_data *) tmpbuf;
0413 root = le32_to_cpu(nscap->rootid);
0414 } else {
0415 size = -EINVAL;
0416 goto out_free;
0417 }
0418
0419 kroot = make_kuid(fs_ns, root);
0420
0421
0422 kroot = mapped_kuid_fs(mnt_userns, fs_ns, kroot);
0423
0424
0425
0426 mappedroot = from_kuid(current_user_ns(), kroot);
0427 if (mappedroot != (uid_t)-1 && mappedroot != (uid_t)0) {
0428 size = sizeof(struct vfs_ns_cap_data);
0429 if (alloc) {
0430 if (!nscap) {
0431
0432 nscap = kzalloc(size, GFP_ATOMIC);
0433 if (!nscap) {
0434 size = -ENOMEM;
0435 goto out_free;
0436 }
0437 nsmagic = VFS_CAP_REVISION_3;
0438 magic = le32_to_cpu(cap->magic_etc);
0439 if (magic & VFS_CAP_FLAGS_EFFECTIVE)
0440 nsmagic |= VFS_CAP_FLAGS_EFFECTIVE;
0441 memcpy(&nscap->data, &cap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
0442 nscap->magic_etc = cpu_to_le32(nsmagic);
0443 } else {
0444
0445 tmpbuf = NULL;
0446 }
0447 nscap->rootid = cpu_to_le32(mappedroot);
0448 *buffer = nscap;
0449 }
0450 goto out_free;
0451 }
0452
0453 if (!rootid_owns_currentns(kroot)) {
0454 size = -EOVERFLOW;
0455 goto out_free;
0456 }
0457
0458
0459 size = sizeof(struct vfs_cap_data);
0460 if (alloc) {
0461 if (nscap) {
0462
0463 cap = kzalloc(size, GFP_ATOMIC);
0464 if (!cap) {
0465 size = -ENOMEM;
0466 goto out_free;
0467 }
0468 magic = VFS_CAP_REVISION_2;
0469 nsmagic = le32_to_cpu(nscap->magic_etc);
0470 if (nsmagic & VFS_CAP_FLAGS_EFFECTIVE)
0471 magic |= VFS_CAP_FLAGS_EFFECTIVE;
0472 memcpy(&cap->data, &nscap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
0473 cap->magic_etc = cpu_to_le32(magic);
0474 } else {
0475
0476 tmpbuf = NULL;
0477 }
0478 *buffer = cap;
0479 }
0480 out_free:
0481 kfree(tmpbuf);
0482 return size;
0483 }
0484
0485
0486
0487
0488
0489
0490
0491
0492
0493
0494
0495
0496
0497
0498
0499
0500 static kuid_t rootid_from_xattr(const void *value, size_t size,
0501 struct user_namespace *task_ns,
0502 struct user_namespace *mnt_userns,
0503 struct user_namespace *fs_userns)
0504 {
0505 const struct vfs_ns_cap_data *nscap = value;
0506 kuid_t rootkid;
0507 uid_t rootid = 0;
0508
0509 if (size == XATTR_CAPS_SZ_3)
0510 rootid = le32_to_cpu(nscap->rootid);
0511
0512 rootkid = make_kuid(task_ns, rootid);
0513 return mapped_kuid_user(mnt_userns, fs_userns, rootkid);
0514 }
0515
0516 static bool validheader(size_t size, const struct vfs_cap_data *cap)
0517 {
0518 return is_v2header(size, cap) || is_v3header(size, cap);
0519 }
0520
0521
0522
0523
0524
0525
0526
0527
0528
0529
0530
0531
0532
0533
0534
0535
0536
0537
0538
0539
0540 int cap_convert_nscap(struct user_namespace *mnt_userns, struct dentry *dentry,
0541 const void **ivalue, size_t size)
0542 {
0543 struct vfs_ns_cap_data *nscap;
0544 uid_t nsrootid;
0545 const struct vfs_cap_data *cap = *ivalue;
0546 __u32 magic, nsmagic;
0547 struct inode *inode = d_backing_inode(dentry);
0548 struct user_namespace *task_ns = current_user_ns(),
0549 *fs_ns = inode->i_sb->s_user_ns;
0550 kuid_t rootid;
0551 size_t newsize;
0552
0553 if (!*ivalue)
0554 return -EINVAL;
0555 if (!validheader(size, cap))
0556 return -EINVAL;
0557 if (!capable_wrt_inode_uidgid(mnt_userns, inode, CAP_SETFCAP))
0558 return -EPERM;
0559 if (size == XATTR_CAPS_SZ_2 && (mnt_userns == fs_ns))
0560 if (ns_capable(inode->i_sb->s_user_ns, CAP_SETFCAP))
0561
0562 return size;
0563
0564 rootid = rootid_from_xattr(*ivalue, size, task_ns, mnt_userns, fs_ns);
0565 if (!uid_valid(rootid))
0566 return -EINVAL;
0567
0568 nsrootid = from_kuid(fs_ns, rootid);
0569 if (nsrootid == -1)
0570 return -EINVAL;
0571
0572 newsize = sizeof(struct vfs_ns_cap_data);
0573 nscap = kmalloc(newsize, GFP_ATOMIC);
0574 if (!nscap)
0575 return -ENOMEM;
0576 nscap->rootid = cpu_to_le32(nsrootid);
0577 nsmagic = VFS_CAP_REVISION_3;
0578 magic = le32_to_cpu(cap->magic_etc);
0579 if (magic & VFS_CAP_FLAGS_EFFECTIVE)
0580 nsmagic |= VFS_CAP_FLAGS_EFFECTIVE;
0581 nscap->magic_etc = cpu_to_le32(nsmagic);
0582 memcpy(&nscap->data, &cap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
0583
0584 *ivalue = nscap;
0585 return newsize;
0586 }
0587
0588
0589
0590
0591
0592 static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps,
0593 struct linux_binprm *bprm,
0594 bool *effective,
0595 bool *has_fcap)
0596 {
0597 struct cred *new = bprm->cred;
0598 unsigned i;
0599 int ret = 0;
0600
0601 if (caps->magic_etc & VFS_CAP_FLAGS_EFFECTIVE)
0602 *effective = true;
0603
0604 if (caps->magic_etc & VFS_CAP_REVISION_MASK)
0605 *has_fcap = true;
0606
0607 CAP_FOR_EACH_U32(i) {
0608 __u32 permitted = caps->permitted.cap[i];
0609 __u32 inheritable = caps->inheritable.cap[i];
0610
0611
0612
0613
0614
0615 new->cap_permitted.cap[i] =
0616 (new->cap_bset.cap[i] & permitted) |
0617 (new->cap_inheritable.cap[i] & inheritable);
0618
0619 if (permitted & ~new->cap_permitted.cap[i])
0620
0621 ret = -EPERM;
0622 }
0623
0624
0625
0626
0627
0628
0629 return *effective ? ret : 0;
0630 }
0631
0632
0633
0634
0635
0636
0637
0638
0639
0640
0641
0642
0643
0644
0645
0646
0647 int get_vfs_caps_from_disk(struct user_namespace *mnt_userns,
0648 const struct dentry *dentry,
0649 struct cpu_vfs_cap_data *cpu_caps)
0650 {
0651 struct inode *inode = d_backing_inode(dentry);
0652 __u32 magic_etc;
0653 unsigned tocopy, i;
0654 int size;
0655 struct vfs_ns_cap_data data, *nscaps = &data;
0656 struct vfs_cap_data *caps = (struct vfs_cap_data *) &data;
0657 kuid_t rootkuid;
0658 struct user_namespace *fs_ns;
0659
0660 memset(cpu_caps, 0, sizeof(struct cpu_vfs_cap_data));
0661
0662 if (!inode)
0663 return -ENODATA;
0664
0665 fs_ns = inode->i_sb->s_user_ns;
0666 size = __vfs_getxattr((struct dentry *)dentry, inode,
0667 XATTR_NAME_CAPS, &data, XATTR_CAPS_SZ);
0668 if (size == -ENODATA || size == -EOPNOTSUPP)
0669
0670 return -ENODATA;
0671
0672 if (size < 0)
0673 return size;
0674
0675 if (size < sizeof(magic_etc))
0676 return -EINVAL;
0677
0678 cpu_caps->magic_etc = magic_etc = le32_to_cpu(caps->magic_etc);
0679
0680 rootkuid = make_kuid(fs_ns, 0);
0681 switch (magic_etc & VFS_CAP_REVISION_MASK) {
0682 case VFS_CAP_REVISION_1:
0683 if (size != XATTR_CAPS_SZ_1)
0684 return -EINVAL;
0685 tocopy = VFS_CAP_U32_1;
0686 break;
0687 case VFS_CAP_REVISION_2:
0688 if (size != XATTR_CAPS_SZ_2)
0689 return -EINVAL;
0690 tocopy = VFS_CAP_U32_2;
0691 break;
0692 case VFS_CAP_REVISION_3:
0693 if (size != XATTR_CAPS_SZ_3)
0694 return -EINVAL;
0695 tocopy = VFS_CAP_U32_3;
0696 rootkuid = make_kuid(fs_ns, le32_to_cpu(nscaps->rootid));
0697 break;
0698
0699 default:
0700 return -EINVAL;
0701 }
0702
0703
0704
0705 rootkuid = mapped_kuid_fs(mnt_userns, fs_ns, rootkuid);
0706 if (!rootid_owns_currentns(rootkuid))
0707 return -ENODATA;
0708
0709 CAP_FOR_EACH_U32(i) {
0710 if (i >= tocopy)
0711 break;
0712 cpu_caps->permitted.cap[i] = le32_to_cpu(caps->data[i].permitted);
0713 cpu_caps->inheritable.cap[i] = le32_to_cpu(caps->data[i].inheritable);
0714 }
0715
0716 cpu_caps->permitted.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
0717 cpu_caps->inheritable.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
0718
0719 cpu_caps->rootid = rootkuid;
0720
0721 return 0;
0722 }
0723
0724
0725
0726
0727
0728
0729 static int get_file_caps(struct linux_binprm *bprm, struct file *file,
0730 bool *effective, bool *has_fcap)
0731 {
0732 int rc = 0;
0733 struct cpu_vfs_cap_data vcaps;
0734
0735 cap_clear(bprm->cred->cap_permitted);
0736
0737 if (!file_caps_enabled)
0738 return 0;
0739
0740 if (!mnt_may_suid(file->f_path.mnt))
0741 return 0;
0742
0743
0744
0745
0746
0747
0748 if (!current_in_userns(file->f_path.mnt->mnt_sb->s_user_ns))
0749 return 0;
0750
0751 rc = get_vfs_caps_from_disk(file_mnt_user_ns(file),
0752 file->f_path.dentry, &vcaps);
0753 if (rc < 0) {
0754 if (rc == -EINVAL)
0755 printk(KERN_NOTICE "Invalid argument reading file caps for %s\n",
0756 bprm->filename);
0757 else if (rc == -ENODATA)
0758 rc = 0;
0759 goto out;
0760 }
0761
0762 rc = bprm_caps_from_vfs_caps(&vcaps, bprm, effective, has_fcap);
0763
0764 out:
0765 if (rc)
0766 cap_clear(bprm->cred->cap_permitted);
0767
0768 return rc;
0769 }
0770
0771 static inline bool root_privileged(void) { return !issecure(SECURE_NOROOT); }
0772
0773 static inline bool __is_real(kuid_t uid, struct cred *cred)
0774 { return uid_eq(cred->uid, uid); }
0775
0776 static inline bool __is_eff(kuid_t uid, struct cred *cred)
0777 { return uid_eq(cred->euid, uid); }
0778
0779 static inline bool __is_suid(kuid_t uid, struct cred *cred)
0780 { return !__is_real(uid, cred) && __is_eff(uid, cred); }
0781
0782
0783
0784
0785
0786
0787
0788
0789
0790
0791
0792
0793
0794 static void handle_privileged_root(struct linux_binprm *bprm, bool has_fcap,
0795 bool *effective, kuid_t root_uid)
0796 {
0797 const struct cred *old = current_cred();
0798 struct cred *new = bprm->cred;
0799
0800 if (!root_privileged())
0801 return;
0802
0803
0804
0805
0806
0807 if (has_fcap && __is_suid(root_uid, new)) {
0808 warn_setuid_and_fcaps_mixed(bprm->filename);
0809 return;
0810 }
0811
0812
0813
0814
0815
0816 if (__is_eff(root_uid, new) || __is_real(root_uid, new)) {
0817
0818 new->cap_permitted = cap_combine(old->cap_bset,
0819 old->cap_inheritable);
0820 }
0821
0822
0823
0824 if (__is_eff(root_uid, new))
0825 *effective = true;
0826 }
0827
0828 #define __cap_gained(field, target, source) \
0829 !cap_issubset(target->cap_##field, source->cap_##field)
0830 #define __cap_grew(target, source, cred) \
0831 !cap_issubset(cred->cap_##target, cred->cap_##source)
0832 #define __cap_full(field, cred) \
0833 cap_issubset(CAP_FULL_SET, cred->cap_##field)
0834
0835 static inline bool __is_setuid(struct cred *new, const struct cred *old)
0836 { return !uid_eq(new->euid, old->uid); }
0837
0838 static inline bool __is_setgid(struct cred *new, const struct cred *old)
0839 { return !gid_eq(new->egid, old->gid); }
0840
0841
0842
0843
0844
0845
0846
0847
0848
0849
0850
0851
0852
0853
0854
0855
0856
0857
0858 static inline bool nonroot_raised_pE(struct cred *new, const struct cred *old,
0859 kuid_t root, bool has_fcap)
0860 {
0861 bool ret = false;
0862
0863 if ((__cap_grew(effective, ambient, new) &&
0864 !(__cap_full(effective, new) &&
0865 (__is_eff(root, new) || __is_real(root, new)) &&
0866 root_privileged())) ||
0867 (root_privileged() &&
0868 __is_suid(root, new) &&
0869 !__cap_full(effective, new)) ||
0870 (!__is_setuid(new, old) &&
0871 ((has_fcap &&
0872 __cap_gained(permitted, new, old)) ||
0873 __cap_gained(ambient, new, old))))
0874
0875 ret = true;
0876
0877 return ret;
0878 }
0879
0880
0881
0882
0883
0884
0885
0886
0887
0888
0889
0890
0891 int cap_bprm_creds_from_file(struct linux_binprm *bprm, struct file *file)
0892 {
0893
0894 const struct cred *old = current_cred();
0895 struct cred *new = bprm->cred;
0896 bool effective = false, has_fcap = false, is_setid;
0897 int ret;
0898 kuid_t root_uid;
0899
0900 if (WARN_ON(!cap_ambient_invariant_ok(old)))
0901 return -EPERM;
0902
0903 ret = get_file_caps(bprm, file, &effective, &has_fcap);
0904 if (ret < 0)
0905 return ret;
0906
0907 root_uid = make_kuid(new->user_ns, 0);
0908
0909 handle_privileged_root(bprm, has_fcap, &effective, root_uid);
0910
0911
0912 if (__cap_gained(permitted, new, old))
0913 bprm->per_clear |= PER_CLEAR_ON_SETID;
0914
0915
0916
0917
0918
0919
0920 is_setid = __is_setuid(new, old) || __is_setgid(new, old);
0921
0922 if ((is_setid || __cap_gained(permitted, new, old)) &&
0923 ((bprm->unsafe & ~LSM_UNSAFE_PTRACE) ||
0924 !ptracer_capable(current, new->user_ns))) {
0925
0926 if (!ns_capable(new->user_ns, CAP_SETUID) ||
0927 (bprm->unsafe & LSM_UNSAFE_NO_NEW_PRIVS)) {
0928 new->euid = new->uid;
0929 new->egid = new->gid;
0930 }
0931 new->cap_permitted = cap_intersect(new->cap_permitted,
0932 old->cap_permitted);
0933 }
0934
0935 new->suid = new->fsuid = new->euid;
0936 new->sgid = new->fsgid = new->egid;
0937
0938
0939 if (has_fcap || is_setid)
0940 cap_clear(new->cap_ambient);
0941
0942
0943
0944
0945
0946 new->cap_permitted = cap_combine(new->cap_permitted, new->cap_ambient);
0947
0948
0949
0950
0951
0952 if (effective)
0953 new->cap_effective = new->cap_permitted;
0954 else
0955 new->cap_effective = new->cap_ambient;
0956
0957 if (WARN_ON(!cap_ambient_invariant_ok(new)))
0958 return -EPERM;
0959
0960 if (nonroot_raised_pE(new, old, root_uid, has_fcap)) {
0961 ret = audit_log_bprm_fcaps(bprm, new, old);
0962 if (ret < 0)
0963 return ret;
0964 }
0965
0966 new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS);
0967
0968 if (WARN_ON(!cap_ambient_invariant_ok(new)))
0969 return -EPERM;
0970
0971
0972 if (is_setid ||
0973 (!__is_real(root_uid, new) &&
0974 (effective ||
0975 __cap_grew(permitted, ambient, new))))
0976 bprm->secureexec = 1;
0977
0978 return 0;
0979 }
0980
0981
0982
0983
0984
0985
0986
0987
0988
0989
0990
0991
0992
0993
0994
0995 int cap_inode_setxattr(struct dentry *dentry, const char *name,
0996 const void *value, size_t size, int flags)
0997 {
0998 struct user_namespace *user_ns = dentry->d_sb->s_user_ns;
0999
1000
1001 if (strncmp(name, XATTR_SECURITY_PREFIX,
1002 XATTR_SECURITY_PREFIX_LEN) != 0)
1003 return 0;
1004
1005
1006
1007
1008
1009 if (strcmp(name, XATTR_NAME_CAPS) == 0)
1010 return 0;
1011
1012 if (!ns_capable(user_ns, CAP_SYS_ADMIN))
1013 return -EPERM;
1014 return 0;
1015 }
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036 int cap_inode_removexattr(struct user_namespace *mnt_userns,
1037 struct dentry *dentry, const char *name)
1038 {
1039 struct user_namespace *user_ns = dentry->d_sb->s_user_ns;
1040
1041
1042 if (strncmp(name, XATTR_SECURITY_PREFIX,
1043 XATTR_SECURITY_PREFIX_LEN) != 0)
1044 return 0;
1045
1046 if (strcmp(name, XATTR_NAME_CAPS) == 0) {
1047
1048 struct inode *inode = d_backing_inode(dentry);
1049 if (!inode)
1050 return -EINVAL;
1051 if (!capable_wrt_inode_uidgid(mnt_userns, inode, CAP_SETFCAP))
1052 return -EPERM;
1053 return 0;
1054 }
1055
1056 if (!ns_capable(user_ns, CAP_SYS_ADMIN))
1057 return -EPERM;
1058 return 0;
1059 }
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090 static inline void cap_emulate_setxuid(struct cred *new, const struct cred *old)
1091 {
1092 kuid_t root_uid = make_kuid(old->user_ns, 0);
1093
1094 if ((uid_eq(old->uid, root_uid) ||
1095 uid_eq(old->euid, root_uid) ||
1096 uid_eq(old->suid, root_uid)) &&
1097 (!uid_eq(new->uid, root_uid) &&
1098 !uid_eq(new->euid, root_uid) &&
1099 !uid_eq(new->suid, root_uid))) {
1100 if (!issecure(SECURE_KEEP_CAPS)) {
1101 cap_clear(new->cap_permitted);
1102 cap_clear(new->cap_effective);
1103 }
1104
1105
1106
1107
1108
1109
1110 cap_clear(new->cap_ambient);
1111 }
1112 if (uid_eq(old->euid, root_uid) && !uid_eq(new->euid, root_uid))
1113 cap_clear(new->cap_effective);
1114 if (!uid_eq(old->euid, root_uid) && uid_eq(new->euid, root_uid))
1115 new->cap_effective = new->cap_permitted;
1116 }
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129 int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags)
1130 {
1131 switch (flags) {
1132 case LSM_SETID_RE:
1133 case LSM_SETID_ID:
1134 case LSM_SETID_RES:
1135
1136
1137 if (!issecure(SECURE_NO_SETUID_FIXUP))
1138 cap_emulate_setxuid(new, old);
1139 break;
1140
1141 case LSM_SETID_FS:
1142
1143
1144
1145
1146
1147
1148 if (!issecure(SECURE_NO_SETUID_FIXUP)) {
1149 kuid_t root_uid = make_kuid(old->user_ns, 0);
1150 if (uid_eq(old->fsuid, root_uid) && !uid_eq(new->fsuid, root_uid))
1151 new->cap_effective =
1152 cap_drop_fs_set(new->cap_effective);
1153
1154 if (!uid_eq(old->fsuid, root_uid) && uid_eq(new->fsuid, root_uid))
1155 new->cap_effective =
1156 cap_raise_fs_set(new->cap_effective,
1157 new->cap_permitted);
1158 }
1159 break;
1160
1161 default:
1162 return -EINVAL;
1163 }
1164
1165 return 0;
1166 }
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178 static int cap_safe_nice(struct task_struct *p)
1179 {
1180 int is_subset, ret = 0;
1181
1182 rcu_read_lock();
1183 is_subset = cap_issubset(__task_cred(p)->cap_permitted,
1184 current_cred()->cap_permitted);
1185 if (!is_subset && !ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE))
1186 ret = -EPERM;
1187 rcu_read_unlock();
1188
1189 return ret;
1190 }
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201 int cap_task_setscheduler(struct task_struct *p)
1202 {
1203 return cap_safe_nice(p);
1204 }
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216 int cap_task_setioprio(struct task_struct *p, int ioprio)
1217 {
1218 return cap_safe_nice(p);
1219 }
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231 int cap_task_setnice(struct task_struct *p, int nice)
1232 {
1233 return cap_safe_nice(p);
1234 }
1235
1236
1237
1238
1239
1240 static int cap_prctl_drop(unsigned long cap)
1241 {
1242 struct cred *new;
1243
1244 if (!ns_capable(current_user_ns(), CAP_SETPCAP))
1245 return -EPERM;
1246 if (!cap_valid(cap))
1247 return -EINVAL;
1248
1249 new = prepare_creds();
1250 if (!new)
1251 return -ENOMEM;
1252 cap_lower(new->cap_bset, cap);
1253 return commit_creds(new);
1254 }
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271 int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
1272 unsigned long arg4, unsigned long arg5)
1273 {
1274 const struct cred *old = current_cred();
1275 struct cred *new;
1276
1277 switch (option) {
1278 case PR_CAPBSET_READ:
1279 if (!cap_valid(arg2))
1280 return -EINVAL;
1281 return !!cap_raised(old->cap_bset, arg2);
1282
1283 case PR_CAPBSET_DROP:
1284 return cap_prctl_drop(arg2);
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305 case PR_SET_SECUREBITS:
1306 if ((((old->securebits & SECURE_ALL_LOCKS) >> 1)
1307 & (old->securebits ^ arg2))
1308 || ((old->securebits & SECURE_ALL_LOCKS & ~arg2))
1309 || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS))
1310 || (cap_capable(current_cred(),
1311 current_cred()->user_ns,
1312 CAP_SETPCAP,
1313 CAP_OPT_NONE) != 0)
1314
1315
1316
1317
1318
1319
1320
1321 )
1322
1323 return -EPERM;
1324
1325 new = prepare_creds();
1326 if (!new)
1327 return -ENOMEM;
1328 new->securebits = arg2;
1329 return commit_creds(new);
1330
1331 case PR_GET_SECUREBITS:
1332 return old->securebits;
1333
1334 case PR_GET_KEEPCAPS:
1335 return !!issecure(SECURE_KEEP_CAPS);
1336
1337 case PR_SET_KEEPCAPS:
1338 if (arg2 > 1)
1339 return -EINVAL;
1340 if (issecure(SECURE_KEEP_CAPS_LOCKED))
1341 return -EPERM;
1342
1343 new = prepare_creds();
1344 if (!new)
1345 return -ENOMEM;
1346 if (arg2)
1347 new->securebits |= issecure_mask(SECURE_KEEP_CAPS);
1348 else
1349 new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS);
1350 return commit_creds(new);
1351
1352 case PR_CAP_AMBIENT:
1353 if (arg2 == PR_CAP_AMBIENT_CLEAR_ALL) {
1354 if (arg3 | arg4 | arg5)
1355 return -EINVAL;
1356
1357 new = prepare_creds();
1358 if (!new)
1359 return -ENOMEM;
1360 cap_clear(new->cap_ambient);
1361 return commit_creds(new);
1362 }
1363
1364 if (((!cap_valid(arg3)) | arg4 | arg5))
1365 return -EINVAL;
1366
1367 if (arg2 == PR_CAP_AMBIENT_IS_SET) {
1368 return !!cap_raised(current_cred()->cap_ambient, arg3);
1369 } else if (arg2 != PR_CAP_AMBIENT_RAISE &&
1370 arg2 != PR_CAP_AMBIENT_LOWER) {
1371 return -EINVAL;
1372 } else {
1373 if (arg2 == PR_CAP_AMBIENT_RAISE &&
1374 (!cap_raised(current_cred()->cap_permitted, arg3) ||
1375 !cap_raised(current_cred()->cap_inheritable,
1376 arg3) ||
1377 issecure(SECURE_NO_CAP_AMBIENT_RAISE)))
1378 return -EPERM;
1379
1380 new = prepare_creds();
1381 if (!new)
1382 return -ENOMEM;
1383 if (arg2 == PR_CAP_AMBIENT_RAISE)
1384 cap_raise(new->cap_ambient, arg3);
1385 else
1386 cap_lower(new->cap_ambient, arg3);
1387 return commit_creds(new);
1388 }
1389
1390 default:
1391
1392 return -ENOSYS;
1393 }
1394 }
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406 int cap_vm_enough_memory(struct mm_struct *mm, long pages)
1407 {
1408 int cap_sys_admin = 0;
1409
1410 if (cap_capable(current_cred(), &init_user_ns,
1411 CAP_SYS_ADMIN, CAP_OPT_NOAUDIT) == 0)
1412 cap_sys_admin = 1;
1413
1414 return cap_sys_admin;
1415 }
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427 int cap_mmap_addr(unsigned long addr)
1428 {
1429 int ret = 0;
1430
1431 if (addr < dac_mmap_min_addr) {
1432 ret = cap_capable(current_cred(), &init_user_ns, CAP_SYS_RAWIO,
1433 CAP_OPT_NONE);
1434
1435 if (ret == 0)
1436 current->flags |= PF_SUPERPRIV;
1437 }
1438 return ret;
1439 }
1440
1441 int cap_mmap_file(struct file *file, unsigned long reqprot,
1442 unsigned long prot, unsigned long flags)
1443 {
1444 return 0;
1445 }
1446
1447 #ifdef CONFIG_SECURITY
1448
1449 static struct security_hook_list capability_hooks[] __lsm_ro_after_init = {
1450 LSM_HOOK_INIT(capable, cap_capable),
1451 LSM_HOOK_INIT(settime, cap_settime),
1452 LSM_HOOK_INIT(ptrace_access_check, cap_ptrace_access_check),
1453 LSM_HOOK_INIT(ptrace_traceme, cap_ptrace_traceme),
1454 LSM_HOOK_INIT(capget, cap_capget),
1455 LSM_HOOK_INIT(capset, cap_capset),
1456 LSM_HOOK_INIT(bprm_creds_from_file, cap_bprm_creds_from_file),
1457 LSM_HOOK_INIT(inode_need_killpriv, cap_inode_need_killpriv),
1458 LSM_HOOK_INIT(inode_killpriv, cap_inode_killpriv),
1459 LSM_HOOK_INIT(inode_getsecurity, cap_inode_getsecurity),
1460 LSM_HOOK_INIT(mmap_addr, cap_mmap_addr),
1461 LSM_HOOK_INIT(mmap_file, cap_mmap_file),
1462 LSM_HOOK_INIT(task_fix_setuid, cap_task_fix_setuid),
1463 LSM_HOOK_INIT(task_prctl, cap_task_prctl),
1464 LSM_HOOK_INIT(task_setscheduler, cap_task_setscheduler),
1465 LSM_HOOK_INIT(task_setioprio, cap_task_setioprio),
1466 LSM_HOOK_INIT(task_setnice, cap_task_setnice),
1467 LSM_HOOK_INIT(vm_enough_memory, cap_vm_enough_memory),
1468 };
1469
1470 static int __init capability_init(void)
1471 {
1472 security_add_hooks(capability_hooks, ARRAY_SIZE(capability_hooks),
1473 "capability");
1474 return 0;
1475 }
1476
1477 DEFINE_LSM(capability) = {
1478 .name = "capability",
1479 .order = LSM_ORDER_FIRST,
1480 .init = capability_init,
1481 };
1482
1483 #endif