0001
0002
0003
0004
0005
0006
0007
0008 #include <linux/kernel.h>
0009 #include <linux/atomic.h>
0010 #include <linux/cgroup.h>
0011 #include <linux/filter.h>
0012 #include <linux/slab.h>
0013 #include <linux/sysctl.h>
0014 #include <linux/string.h>
0015 #include <linux/bpf.h>
0016 #include <linux/bpf-cgroup.h>
0017 #include <linux/bpf_lsm.h>
0018 #include <linux/bpf_verifier.h>
0019 #include <net/sock.h>
0020 #include <net/bpf_sk_storage.h>
0021
0022 #include "../cgroup/cgroup-internal.h"
0023
0024 DEFINE_STATIC_KEY_ARRAY_FALSE(cgroup_bpf_enabled_key, MAX_CGROUP_BPF_ATTACH_TYPE);
0025 EXPORT_SYMBOL(cgroup_bpf_enabled_key);
0026
0027
0028
0029
0030 static __always_inline int
0031 bpf_prog_run_array_cg(const struct cgroup_bpf *cgrp,
0032 enum cgroup_bpf_attach_type atype,
0033 const void *ctx, bpf_prog_run_fn run_prog,
0034 int retval, u32 *ret_flags)
0035 {
0036 const struct bpf_prog_array_item *item;
0037 const struct bpf_prog *prog;
0038 const struct bpf_prog_array *array;
0039 struct bpf_run_ctx *old_run_ctx;
0040 struct bpf_cg_run_ctx run_ctx;
0041 u32 func_ret;
0042
0043 run_ctx.retval = retval;
0044 migrate_disable();
0045 rcu_read_lock();
0046 array = rcu_dereference(cgrp->effective[atype]);
0047 item = &array->items[0];
0048 old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
0049 while ((prog = READ_ONCE(item->prog))) {
0050 run_ctx.prog_item = item;
0051 func_ret = run_prog(prog, ctx);
0052 if (ret_flags) {
0053 *(ret_flags) |= (func_ret >> 1);
0054 func_ret &= 1;
0055 }
0056 if (!func_ret && !IS_ERR_VALUE((long)run_ctx.retval))
0057 run_ctx.retval = -EPERM;
0058 item++;
0059 }
0060 bpf_reset_run_ctx(old_run_ctx);
0061 rcu_read_unlock();
0062 migrate_enable();
0063 return run_ctx.retval;
0064 }
0065
0066 unsigned int __cgroup_bpf_run_lsm_sock(const void *ctx,
0067 const struct bpf_insn *insn)
0068 {
0069 const struct bpf_prog *shim_prog;
0070 struct sock *sk;
0071 struct cgroup *cgrp;
0072 int ret = 0;
0073 u64 *args;
0074
0075 args = (u64 *)ctx;
0076 sk = (void *)(unsigned long)args[0];
0077
0078 shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi));
0079
0080 cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
0081 if (likely(cgrp))
0082 ret = bpf_prog_run_array_cg(&cgrp->bpf,
0083 shim_prog->aux->cgroup_atype,
0084 ctx, bpf_prog_run, 0, NULL);
0085 return ret;
0086 }
0087
0088 unsigned int __cgroup_bpf_run_lsm_socket(const void *ctx,
0089 const struct bpf_insn *insn)
0090 {
0091 const struct bpf_prog *shim_prog;
0092 struct socket *sock;
0093 struct cgroup *cgrp;
0094 int ret = 0;
0095 u64 *args;
0096
0097 args = (u64 *)ctx;
0098 sock = (void *)(unsigned long)args[0];
0099
0100 shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi));
0101
0102 cgrp = sock_cgroup_ptr(&sock->sk->sk_cgrp_data);
0103 if (likely(cgrp))
0104 ret = bpf_prog_run_array_cg(&cgrp->bpf,
0105 shim_prog->aux->cgroup_atype,
0106 ctx, bpf_prog_run, 0, NULL);
0107 return ret;
0108 }
0109
0110 unsigned int __cgroup_bpf_run_lsm_current(const void *ctx,
0111 const struct bpf_insn *insn)
0112 {
0113 const struct bpf_prog *shim_prog;
0114 struct cgroup *cgrp;
0115 int ret = 0;
0116
0117
0118 shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi));
0119
0120
0121 cgrp = task_dfl_cgroup(current);
0122 if (likely(cgrp))
0123 ret = bpf_prog_run_array_cg(&cgrp->bpf,
0124 shim_prog->aux->cgroup_atype,
0125 ctx, bpf_prog_run, 0, NULL);
0126 return ret;
0127 }
0128
0129 #ifdef CONFIG_BPF_LSM
0130 struct cgroup_lsm_atype {
0131 u32 attach_btf_id;
0132 int refcnt;
0133 };
0134
0135 static struct cgroup_lsm_atype cgroup_lsm_atype[CGROUP_LSM_NUM];
0136
0137 static enum cgroup_bpf_attach_type
0138 bpf_cgroup_atype_find(enum bpf_attach_type attach_type, u32 attach_btf_id)
0139 {
0140 int i;
0141
0142 lockdep_assert_held(&cgroup_mutex);
0143
0144 if (attach_type != BPF_LSM_CGROUP)
0145 return to_cgroup_bpf_attach_type(attach_type);
0146
0147 for (i = 0; i < ARRAY_SIZE(cgroup_lsm_atype); i++)
0148 if (cgroup_lsm_atype[i].attach_btf_id == attach_btf_id)
0149 return CGROUP_LSM_START + i;
0150
0151 for (i = 0; i < ARRAY_SIZE(cgroup_lsm_atype); i++)
0152 if (cgroup_lsm_atype[i].attach_btf_id == 0)
0153 return CGROUP_LSM_START + i;
0154
0155 return -E2BIG;
0156
0157 }
0158
0159 void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype)
0160 {
0161 int i = cgroup_atype - CGROUP_LSM_START;
0162
0163 lockdep_assert_held(&cgroup_mutex);
0164
0165 WARN_ON_ONCE(cgroup_lsm_atype[i].attach_btf_id &&
0166 cgroup_lsm_atype[i].attach_btf_id != attach_btf_id);
0167
0168 cgroup_lsm_atype[i].attach_btf_id = attach_btf_id;
0169 cgroup_lsm_atype[i].refcnt++;
0170 }
0171
0172 void bpf_cgroup_atype_put(int cgroup_atype)
0173 {
0174 int i = cgroup_atype - CGROUP_LSM_START;
0175
0176 mutex_lock(&cgroup_mutex);
0177 if (--cgroup_lsm_atype[i].refcnt <= 0)
0178 cgroup_lsm_atype[i].attach_btf_id = 0;
0179 WARN_ON_ONCE(cgroup_lsm_atype[i].refcnt < 0);
0180 mutex_unlock(&cgroup_mutex);
0181 }
0182 #else
0183 static enum cgroup_bpf_attach_type
0184 bpf_cgroup_atype_find(enum bpf_attach_type attach_type, u32 attach_btf_id)
0185 {
0186 if (attach_type != BPF_LSM_CGROUP)
0187 return to_cgroup_bpf_attach_type(attach_type);
0188 return -EOPNOTSUPP;
0189 }
0190 #endif
0191
0192 void cgroup_bpf_offline(struct cgroup *cgrp)
0193 {
0194 cgroup_get(cgrp);
0195 percpu_ref_kill(&cgrp->bpf.refcnt);
0196 }
0197
0198 static void bpf_cgroup_storages_free(struct bpf_cgroup_storage *storages[])
0199 {
0200 enum bpf_cgroup_storage_type stype;
0201
0202 for_each_cgroup_storage_type(stype)
0203 bpf_cgroup_storage_free(storages[stype]);
0204 }
0205
0206 static int bpf_cgroup_storages_alloc(struct bpf_cgroup_storage *storages[],
0207 struct bpf_cgroup_storage *new_storages[],
0208 enum bpf_attach_type type,
0209 struct bpf_prog *prog,
0210 struct cgroup *cgrp)
0211 {
0212 enum bpf_cgroup_storage_type stype;
0213 struct bpf_cgroup_storage_key key;
0214 struct bpf_map *map;
0215
0216 key.cgroup_inode_id = cgroup_id(cgrp);
0217 key.attach_type = type;
0218
0219 for_each_cgroup_storage_type(stype) {
0220 map = prog->aux->cgroup_storage[stype];
0221 if (!map)
0222 continue;
0223
0224 storages[stype] = cgroup_storage_lookup((void *)map, &key, false);
0225 if (storages[stype])
0226 continue;
0227
0228 storages[stype] = bpf_cgroup_storage_alloc(prog, stype);
0229 if (IS_ERR(storages[stype])) {
0230 bpf_cgroup_storages_free(new_storages);
0231 return -ENOMEM;
0232 }
0233
0234 new_storages[stype] = storages[stype];
0235 }
0236
0237 return 0;
0238 }
0239
0240 static void bpf_cgroup_storages_assign(struct bpf_cgroup_storage *dst[],
0241 struct bpf_cgroup_storage *src[])
0242 {
0243 enum bpf_cgroup_storage_type stype;
0244
0245 for_each_cgroup_storage_type(stype)
0246 dst[stype] = src[stype];
0247 }
0248
0249 static void bpf_cgroup_storages_link(struct bpf_cgroup_storage *storages[],
0250 struct cgroup *cgrp,
0251 enum bpf_attach_type attach_type)
0252 {
0253 enum bpf_cgroup_storage_type stype;
0254
0255 for_each_cgroup_storage_type(stype)
0256 bpf_cgroup_storage_link(storages[stype], cgrp, attach_type);
0257 }
0258
0259
0260
0261
0262
0263
0264 static void bpf_cgroup_link_auto_detach(struct bpf_cgroup_link *link)
0265 {
0266 cgroup_put(link->cgroup);
0267 link->cgroup = NULL;
0268 }
0269
0270
0271
0272
0273
0274
0275 static void cgroup_bpf_release(struct work_struct *work)
0276 {
0277 struct cgroup *p, *cgrp = container_of(work, struct cgroup,
0278 bpf.release_work);
0279 struct bpf_prog_array *old_array;
0280 struct list_head *storages = &cgrp->bpf.storages;
0281 struct bpf_cgroup_storage *storage, *stmp;
0282
0283 unsigned int atype;
0284
0285 mutex_lock(&cgroup_mutex);
0286
0287 for (atype = 0; atype < ARRAY_SIZE(cgrp->bpf.progs); atype++) {
0288 struct hlist_head *progs = &cgrp->bpf.progs[atype];
0289 struct bpf_prog_list *pl;
0290 struct hlist_node *pltmp;
0291
0292 hlist_for_each_entry_safe(pl, pltmp, progs, node) {
0293 hlist_del(&pl->node);
0294 if (pl->prog) {
0295 if (pl->prog->expected_attach_type == BPF_LSM_CGROUP)
0296 bpf_trampoline_unlink_cgroup_shim(pl->prog);
0297 bpf_prog_put(pl->prog);
0298 }
0299 if (pl->link) {
0300 if (pl->link->link.prog->expected_attach_type == BPF_LSM_CGROUP)
0301 bpf_trampoline_unlink_cgroup_shim(pl->link->link.prog);
0302 bpf_cgroup_link_auto_detach(pl->link);
0303 }
0304 kfree(pl);
0305 static_branch_dec(&cgroup_bpf_enabled_key[atype]);
0306 }
0307 old_array = rcu_dereference_protected(
0308 cgrp->bpf.effective[atype],
0309 lockdep_is_held(&cgroup_mutex));
0310 bpf_prog_array_free(old_array);
0311 }
0312
0313 list_for_each_entry_safe(storage, stmp, storages, list_cg) {
0314 bpf_cgroup_storage_unlink(storage);
0315 bpf_cgroup_storage_free(storage);
0316 }
0317
0318 mutex_unlock(&cgroup_mutex);
0319
0320 for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p))
0321 cgroup_bpf_put(p);
0322
0323 percpu_ref_exit(&cgrp->bpf.refcnt);
0324 cgroup_put(cgrp);
0325 }
0326
0327
0328
0329
0330
0331
0332 static void cgroup_bpf_release_fn(struct percpu_ref *ref)
0333 {
0334 struct cgroup *cgrp = container_of(ref, struct cgroup, bpf.refcnt);
0335
0336 INIT_WORK(&cgrp->bpf.release_work, cgroup_bpf_release);
0337 queue_work(system_wq, &cgrp->bpf.release_work);
0338 }
0339
0340
0341
0342
0343 static struct bpf_prog *prog_list_prog(struct bpf_prog_list *pl)
0344 {
0345 if (pl->prog)
0346 return pl->prog;
0347 if (pl->link)
0348 return pl->link->link.prog;
0349 return NULL;
0350 }
0351
0352
0353
0354
0355 static u32 prog_list_length(struct hlist_head *head)
0356 {
0357 struct bpf_prog_list *pl;
0358 u32 cnt = 0;
0359
0360 hlist_for_each_entry(pl, head, node) {
0361 if (!prog_list_prog(pl))
0362 continue;
0363 cnt++;
0364 }
0365 return cnt;
0366 }
0367
0368
0369
0370
0371
0372 static bool hierarchy_allows_attach(struct cgroup *cgrp,
0373 enum cgroup_bpf_attach_type atype)
0374 {
0375 struct cgroup *p;
0376
0377 p = cgroup_parent(cgrp);
0378 if (!p)
0379 return true;
0380 do {
0381 u32 flags = p->bpf.flags[atype];
0382 u32 cnt;
0383
0384 if (flags & BPF_F_ALLOW_MULTI)
0385 return true;
0386 cnt = prog_list_length(&p->bpf.progs[atype]);
0387 WARN_ON_ONCE(cnt > 1);
0388 if (cnt == 1)
0389 return !!(flags & BPF_F_ALLOW_OVERRIDE);
0390 p = cgroup_parent(p);
0391 } while (p);
0392 return true;
0393 }
0394
0395
0396
0397
0398
0399
0400
0401 static int compute_effective_progs(struct cgroup *cgrp,
0402 enum cgroup_bpf_attach_type atype,
0403 struct bpf_prog_array **array)
0404 {
0405 struct bpf_prog_array_item *item;
0406 struct bpf_prog_array *progs;
0407 struct bpf_prog_list *pl;
0408 struct cgroup *p = cgrp;
0409 int cnt = 0;
0410
0411
0412 do {
0413 if (cnt == 0 || (p->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
0414 cnt += prog_list_length(&p->bpf.progs[atype]);
0415 p = cgroup_parent(p);
0416 } while (p);
0417
0418 progs = bpf_prog_array_alloc(cnt, GFP_KERNEL);
0419 if (!progs)
0420 return -ENOMEM;
0421
0422
0423 cnt = 0;
0424 p = cgrp;
0425 do {
0426 if (cnt > 0 && !(p->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
0427 continue;
0428
0429 hlist_for_each_entry(pl, &p->bpf.progs[atype], node) {
0430 if (!prog_list_prog(pl))
0431 continue;
0432
0433 item = &progs->items[cnt];
0434 item->prog = prog_list_prog(pl);
0435 bpf_cgroup_storages_assign(item->cgroup_storage,
0436 pl->storage);
0437 cnt++;
0438 }
0439 } while ((p = cgroup_parent(p)));
0440
0441 *array = progs;
0442 return 0;
0443 }
0444
0445 static void activate_effective_progs(struct cgroup *cgrp,
0446 enum cgroup_bpf_attach_type atype,
0447 struct bpf_prog_array *old_array)
0448 {
0449 old_array = rcu_replace_pointer(cgrp->bpf.effective[atype], old_array,
0450 lockdep_is_held(&cgroup_mutex));
0451
0452
0453
0454 bpf_prog_array_free(old_array);
0455 }
0456
0457
0458
0459
0460
0461 int cgroup_bpf_inherit(struct cgroup *cgrp)
0462 {
0463
0464
0465
0466 #define NR ARRAY_SIZE(cgrp->bpf.effective)
0467 struct bpf_prog_array *arrays[NR] = {};
0468 struct cgroup *p;
0469 int ret, i;
0470
0471 ret = percpu_ref_init(&cgrp->bpf.refcnt, cgroup_bpf_release_fn, 0,
0472 GFP_KERNEL);
0473 if (ret)
0474 return ret;
0475
0476 for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p))
0477 cgroup_bpf_get(p);
0478
0479 for (i = 0; i < NR; i++)
0480 INIT_HLIST_HEAD(&cgrp->bpf.progs[i]);
0481
0482 INIT_LIST_HEAD(&cgrp->bpf.storages);
0483
0484 for (i = 0; i < NR; i++)
0485 if (compute_effective_progs(cgrp, i, &arrays[i]))
0486 goto cleanup;
0487
0488 for (i = 0; i < NR; i++)
0489 activate_effective_progs(cgrp, i, arrays[i]);
0490
0491 return 0;
0492 cleanup:
0493 for (i = 0; i < NR; i++)
0494 bpf_prog_array_free(arrays[i]);
0495
0496 for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p))
0497 cgroup_bpf_put(p);
0498
0499 percpu_ref_exit(&cgrp->bpf.refcnt);
0500
0501 return -ENOMEM;
0502 }
0503
0504 static int update_effective_progs(struct cgroup *cgrp,
0505 enum cgroup_bpf_attach_type atype)
0506 {
0507 struct cgroup_subsys_state *css;
0508 int err;
0509
0510
0511 css_for_each_descendant_pre(css, &cgrp->self) {
0512 struct cgroup *desc = container_of(css, struct cgroup, self);
0513
0514 if (percpu_ref_is_zero(&desc->bpf.refcnt))
0515 continue;
0516
0517 err = compute_effective_progs(desc, atype, &desc->bpf.inactive);
0518 if (err)
0519 goto cleanup;
0520 }
0521
0522
0523 css_for_each_descendant_pre(css, &cgrp->self) {
0524 struct cgroup *desc = container_of(css, struct cgroup, self);
0525
0526 if (percpu_ref_is_zero(&desc->bpf.refcnt)) {
0527 if (unlikely(desc->bpf.inactive)) {
0528 bpf_prog_array_free(desc->bpf.inactive);
0529 desc->bpf.inactive = NULL;
0530 }
0531 continue;
0532 }
0533
0534 activate_effective_progs(desc, atype, desc->bpf.inactive);
0535 desc->bpf.inactive = NULL;
0536 }
0537
0538 return 0;
0539
0540 cleanup:
0541
0542
0543
0544 css_for_each_descendant_pre(css, &cgrp->self) {
0545 struct cgroup *desc = container_of(css, struct cgroup, self);
0546
0547 bpf_prog_array_free(desc->bpf.inactive);
0548 desc->bpf.inactive = NULL;
0549 }
0550
0551 return err;
0552 }
0553
0554 #define BPF_CGROUP_MAX_PROGS 64
0555
0556 static struct bpf_prog_list *find_attach_entry(struct hlist_head *progs,
0557 struct bpf_prog *prog,
0558 struct bpf_cgroup_link *link,
0559 struct bpf_prog *replace_prog,
0560 bool allow_multi)
0561 {
0562 struct bpf_prog_list *pl;
0563
0564
0565 if (!allow_multi) {
0566 if (hlist_empty(progs))
0567 return NULL;
0568 return hlist_entry(progs->first, typeof(*pl), node);
0569 }
0570
0571 hlist_for_each_entry(pl, progs, node) {
0572 if (prog && pl->prog == prog && prog != replace_prog)
0573
0574 return ERR_PTR(-EINVAL);
0575 if (link && pl->link == link)
0576
0577 return ERR_PTR(-EINVAL);
0578 }
0579
0580
0581 if (replace_prog) {
0582 hlist_for_each_entry(pl, progs, node) {
0583 if (pl->prog == replace_prog)
0584
0585 return pl;
0586 }
0587
0588 return ERR_PTR(-ENOENT);
0589 }
0590
0591 return NULL;
0592 }
0593
0594
0595
0596
0597
0598
0599
0600
0601
0602
0603
0604
0605
0606
0607 static int __cgroup_bpf_attach(struct cgroup *cgrp,
0608 struct bpf_prog *prog, struct bpf_prog *replace_prog,
0609 struct bpf_cgroup_link *link,
0610 enum bpf_attach_type type, u32 flags)
0611 {
0612 u32 saved_flags = (flags & (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI));
0613 struct bpf_prog *old_prog = NULL;
0614 struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
0615 struct bpf_cgroup_storage *new_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
0616 struct bpf_prog *new_prog = prog ? : link->link.prog;
0617 enum cgroup_bpf_attach_type atype;
0618 struct bpf_prog_list *pl;
0619 struct hlist_head *progs;
0620 int err;
0621
0622 if (((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI)) ||
0623 ((flags & BPF_F_REPLACE) && !(flags & BPF_F_ALLOW_MULTI)))
0624
0625 return -EINVAL;
0626 if (link && (prog || replace_prog))
0627
0628 return -EINVAL;
0629 if (!!replace_prog != !!(flags & BPF_F_REPLACE))
0630
0631 return -EINVAL;
0632
0633 atype = bpf_cgroup_atype_find(type, new_prog->aux->attach_btf_id);
0634 if (atype < 0)
0635 return -EINVAL;
0636
0637 progs = &cgrp->bpf.progs[atype];
0638
0639 if (!hierarchy_allows_attach(cgrp, atype))
0640 return -EPERM;
0641
0642 if (!hlist_empty(progs) && cgrp->bpf.flags[atype] != saved_flags)
0643
0644
0645
0646
0647 return -EPERM;
0648
0649 if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS)
0650 return -E2BIG;
0651
0652 pl = find_attach_entry(progs, prog, link, replace_prog,
0653 flags & BPF_F_ALLOW_MULTI);
0654 if (IS_ERR(pl))
0655 return PTR_ERR(pl);
0656
0657 if (bpf_cgroup_storages_alloc(storage, new_storage, type,
0658 prog ? : link->link.prog, cgrp))
0659 return -ENOMEM;
0660
0661 if (pl) {
0662 old_prog = pl->prog;
0663 } else {
0664 struct hlist_node *last = NULL;
0665
0666 pl = kmalloc(sizeof(*pl), GFP_KERNEL);
0667 if (!pl) {
0668 bpf_cgroup_storages_free(new_storage);
0669 return -ENOMEM;
0670 }
0671 if (hlist_empty(progs))
0672 hlist_add_head(&pl->node, progs);
0673 else
0674 hlist_for_each(last, progs) {
0675 if (last->next)
0676 continue;
0677 hlist_add_behind(&pl->node, last);
0678 break;
0679 }
0680 }
0681
0682 pl->prog = prog;
0683 pl->link = link;
0684 bpf_cgroup_storages_assign(pl->storage, storage);
0685 cgrp->bpf.flags[atype] = saved_flags;
0686
0687 if (type == BPF_LSM_CGROUP) {
0688 err = bpf_trampoline_link_cgroup_shim(new_prog, atype);
0689 if (err)
0690 goto cleanup;
0691 }
0692
0693 err = update_effective_progs(cgrp, atype);
0694 if (err)
0695 goto cleanup_trampoline;
0696
0697 if (old_prog) {
0698 if (type == BPF_LSM_CGROUP)
0699 bpf_trampoline_unlink_cgroup_shim(old_prog);
0700 bpf_prog_put(old_prog);
0701 } else {
0702 static_branch_inc(&cgroup_bpf_enabled_key[atype]);
0703 }
0704 bpf_cgroup_storages_link(new_storage, cgrp, type);
0705 return 0;
0706
0707 cleanup_trampoline:
0708 if (type == BPF_LSM_CGROUP)
0709 bpf_trampoline_unlink_cgroup_shim(new_prog);
0710
0711 cleanup:
0712 if (old_prog) {
0713 pl->prog = old_prog;
0714 pl->link = NULL;
0715 }
0716 bpf_cgroup_storages_free(new_storage);
0717 if (!old_prog) {
0718 hlist_del(&pl->node);
0719 kfree(pl);
0720 }
0721 return err;
0722 }
0723
0724 static int cgroup_bpf_attach(struct cgroup *cgrp,
0725 struct bpf_prog *prog, struct bpf_prog *replace_prog,
0726 struct bpf_cgroup_link *link,
0727 enum bpf_attach_type type,
0728 u32 flags)
0729 {
0730 int ret;
0731
0732 mutex_lock(&cgroup_mutex);
0733 ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, link, type, flags);
0734 mutex_unlock(&cgroup_mutex);
0735 return ret;
0736 }
0737
0738
0739
0740
0741 static void replace_effective_prog(struct cgroup *cgrp,
0742 enum cgroup_bpf_attach_type atype,
0743 struct bpf_cgroup_link *link)
0744 {
0745 struct bpf_prog_array_item *item;
0746 struct cgroup_subsys_state *css;
0747 struct bpf_prog_array *progs;
0748 struct bpf_prog_list *pl;
0749 struct hlist_head *head;
0750 struct cgroup *cg;
0751 int pos;
0752
0753 css_for_each_descendant_pre(css, &cgrp->self) {
0754 struct cgroup *desc = container_of(css, struct cgroup, self);
0755
0756 if (percpu_ref_is_zero(&desc->bpf.refcnt))
0757 continue;
0758
0759
0760 for (pos = 0, cg = desc; cg; cg = cgroup_parent(cg)) {
0761 if (pos && !(cg->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
0762 continue;
0763
0764 head = &cg->bpf.progs[atype];
0765 hlist_for_each_entry(pl, head, node) {
0766 if (!prog_list_prog(pl))
0767 continue;
0768 if (pl->link == link)
0769 goto found;
0770 pos++;
0771 }
0772 }
0773 found:
0774 BUG_ON(!cg);
0775 progs = rcu_dereference_protected(
0776 desc->bpf.effective[atype],
0777 lockdep_is_held(&cgroup_mutex));
0778 item = &progs->items[pos];
0779 WRITE_ONCE(item->prog, link->link.prog);
0780 }
0781 }
0782
0783
0784
0785
0786
0787
0788
0789
0790
0791
0792 static int __cgroup_bpf_replace(struct cgroup *cgrp,
0793 struct bpf_cgroup_link *link,
0794 struct bpf_prog *new_prog)
0795 {
0796 enum cgroup_bpf_attach_type atype;
0797 struct bpf_prog *old_prog;
0798 struct bpf_prog_list *pl;
0799 struct hlist_head *progs;
0800 bool found = false;
0801
0802 atype = bpf_cgroup_atype_find(link->type, new_prog->aux->attach_btf_id);
0803 if (atype < 0)
0804 return -EINVAL;
0805
0806 progs = &cgrp->bpf.progs[atype];
0807
0808 if (link->link.prog->type != new_prog->type)
0809 return -EINVAL;
0810
0811 hlist_for_each_entry(pl, progs, node) {
0812 if (pl->link == link) {
0813 found = true;
0814 break;
0815 }
0816 }
0817 if (!found)
0818 return -ENOENT;
0819
0820 old_prog = xchg(&link->link.prog, new_prog);
0821 replace_effective_prog(cgrp, atype, link);
0822 bpf_prog_put(old_prog);
0823 return 0;
0824 }
0825
0826 static int cgroup_bpf_replace(struct bpf_link *link, struct bpf_prog *new_prog,
0827 struct bpf_prog *old_prog)
0828 {
0829 struct bpf_cgroup_link *cg_link;
0830 int ret;
0831
0832 cg_link = container_of(link, struct bpf_cgroup_link, link);
0833
0834 mutex_lock(&cgroup_mutex);
0835
0836 if (!cg_link->cgroup) {
0837 ret = -ENOLINK;
0838 goto out_unlock;
0839 }
0840 if (old_prog && link->prog != old_prog) {
0841 ret = -EPERM;
0842 goto out_unlock;
0843 }
0844 ret = __cgroup_bpf_replace(cg_link->cgroup, cg_link, new_prog);
0845 out_unlock:
0846 mutex_unlock(&cgroup_mutex);
0847 return ret;
0848 }
0849
0850 static struct bpf_prog_list *find_detach_entry(struct hlist_head *progs,
0851 struct bpf_prog *prog,
0852 struct bpf_cgroup_link *link,
0853 bool allow_multi)
0854 {
0855 struct bpf_prog_list *pl;
0856
0857 if (!allow_multi) {
0858 if (hlist_empty(progs))
0859
0860 return ERR_PTR(-ENOENT);
0861
0862
0863
0864
0865 return hlist_entry(progs->first, typeof(*pl), node);
0866 }
0867
0868 if (!prog && !link)
0869
0870
0871
0872 return ERR_PTR(-EINVAL);
0873
0874
0875 hlist_for_each_entry(pl, progs, node) {
0876 if (pl->prog == prog && pl->link == link)
0877 return pl;
0878 }
0879 return ERR_PTR(-ENOENT);
0880 }
0881
0882
0883
0884
0885
0886
0887
0888
0889
0890
0891
0892 static void purge_effective_progs(struct cgroup *cgrp, struct bpf_prog *prog,
0893 struct bpf_cgroup_link *link,
0894 enum cgroup_bpf_attach_type atype)
0895 {
0896 struct cgroup_subsys_state *css;
0897 struct bpf_prog_array *progs;
0898 struct bpf_prog_list *pl;
0899 struct hlist_head *head;
0900 struct cgroup *cg;
0901 int pos;
0902
0903
0904 css_for_each_descendant_pre(css, &cgrp->self) {
0905 struct cgroup *desc = container_of(css, struct cgroup, self);
0906
0907 if (percpu_ref_is_zero(&desc->bpf.refcnt))
0908 continue;
0909
0910
0911 for (pos = 0, cg = desc; cg; cg = cgroup_parent(cg)) {
0912 if (pos && !(cg->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
0913 continue;
0914
0915 head = &cg->bpf.progs[atype];
0916 hlist_for_each_entry(pl, head, node) {
0917 if (!prog_list_prog(pl))
0918 continue;
0919 if (pl->prog == prog && pl->link == link)
0920 goto found;
0921 pos++;
0922 }
0923 }
0924
0925
0926 continue;
0927 found:
0928 progs = rcu_dereference_protected(
0929 desc->bpf.effective[atype],
0930 lockdep_is_held(&cgroup_mutex));
0931
0932
0933 WARN_ONCE(bpf_prog_array_delete_safe_at(progs, pos),
0934 "Failed to purge a prog from array at index %d", pos);
0935 }
0936 }
0937
0938
0939
0940
0941
0942
0943
0944
0945
0946
0947
0948
0949 static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
0950 struct bpf_cgroup_link *link, enum bpf_attach_type type)
0951 {
0952 enum cgroup_bpf_attach_type atype;
0953 struct bpf_prog *old_prog;
0954 struct bpf_prog_list *pl;
0955 struct hlist_head *progs;
0956 u32 attach_btf_id = 0;
0957 u32 flags;
0958
0959 if (prog)
0960 attach_btf_id = prog->aux->attach_btf_id;
0961 if (link)
0962 attach_btf_id = link->link.prog->aux->attach_btf_id;
0963
0964 atype = bpf_cgroup_atype_find(type, attach_btf_id);
0965 if (atype < 0)
0966 return -EINVAL;
0967
0968 progs = &cgrp->bpf.progs[atype];
0969 flags = cgrp->bpf.flags[atype];
0970
0971 if (prog && link)
0972
0973 return -EINVAL;
0974
0975 pl = find_detach_entry(progs, prog, link, flags & BPF_F_ALLOW_MULTI);
0976 if (IS_ERR(pl))
0977 return PTR_ERR(pl);
0978
0979
0980 old_prog = pl->prog;
0981 pl->prog = NULL;
0982 pl->link = NULL;
0983
0984 if (update_effective_progs(cgrp, atype)) {
0985
0986 pl->prog = old_prog;
0987 pl->link = link;
0988 purge_effective_progs(cgrp, old_prog, link, atype);
0989 }
0990
0991
0992 hlist_del(&pl->node);
0993
0994 kfree(pl);
0995 if (hlist_empty(progs))
0996
0997 cgrp->bpf.flags[atype] = 0;
0998 if (old_prog) {
0999 if (type == BPF_LSM_CGROUP)
1000 bpf_trampoline_unlink_cgroup_shim(old_prog);
1001 bpf_prog_put(old_prog);
1002 }
1003 static_branch_dec(&cgroup_bpf_enabled_key[atype]);
1004 return 0;
1005 }
1006
1007 static int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
1008 enum bpf_attach_type type)
1009 {
1010 int ret;
1011
1012 mutex_lock(&cgroup_mutex);
1013 ret = __cgroup_bpf_detach(cgrp, prog, NULL, type);
1014 mutex_unlock(&cgroup_mutex);
1015 return ret;
1016 }
1017
1018
1019 static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
1020 union bpf_attr __user *uattr)
1021 {
1022 __u32 __user *prog_attach_flags = u64_to_user_ptr(attr->query.prog_attach_flags);
1023 __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
1024 enum bpf_attach_type type = attr->query.attach_type;
1025 enum cgroup_bpf_attach_type from_atype, to_atype;
1026 enum cgroup_bpf_attach_type atype;
1027 struct bpf_prog_array *effective;
1028 int cnt, ret = 0, i;
1029 int total_cnt = 0;
1030 u32 flags;
1031
1032 if (type == BPF_LSM_CGROUP) {
1033 if (attr->query.prog_cnt && prog_ids && !prog_attach_flags)
1034 return -EINVAL;
1035
1036 from_atype = CGROUP_LSM_START;
1037 to_atype = CGROUP_LSM_END;
1038 flags = 0;
1039 } else {
1040 from_atype = to_cgroup_bpf_attach_type(type);
1041 if (from_atype < 0)
1042 return -EINVAL;
1043 to_atype = from_atype;
1044 flags = cgrp->bpf.flags[from_atype];
1045 }
1046
1047 for (atype = from_atype; atype <= to_atype; atype++) {
1048 if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) {
1049 effective = rcu_dereference_protected(cgrp->bpf.effective[atype],
1050 lockdep_is_held(&cgroup_mutex));
1051 total_cnt += bpf_prog_array_length(effective);
1052 } else {
1053 total_cnt += prog_list_length(&cgrp->bpf.progs[atype]);
1054 }
1055 }
1056
1057 if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)))
1058 return -EFAULT;
1059 if (copy_to_user(&uattr->query.prog_cnt, &total_cnt, sizeof(total_cnt)))
1060 return -EFAULT;
1061 if (attr->query.prog_cnt == 0 || !prog_ids || !total_cnt)
1062
1063 return 0;
1064
1065 if (attr->query.prog_cnt < total_cnt) {
1066 total_cnt = attr->query.prog_cnt;
1067 ret = -ENOSPC;
1068 }
1069
1070 for (atype = from_atype; atype <= to_atype && total_cnt; atype++) {
1071 if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) {
1072 effective = rcu_dereference_protected(cgrp->bpf.effective[atype],
1073 lockdep_is_held(&cgroup_mutex));
1074 cnt = min_t(int, bpf_prog_array_length(effective), total_cnt);
1075 ret = bpf_prog_array_copy_to_user(effective, prog_ids, cnt);
1076 } else {
1077 struct hlist_head *progs;
1078 struct bpf_prog_list *pl;
1079 struct bpf_prog *prog;
1080 u32 id;
1081
1082 progs = &cgrp->bpf.progs[atype];
1083 cnt = min_t(int, prog_list_length(progs), total_cnt);
1084 i = 0;
1085 hlist_for_each_entry(pl, progs, node) {
1086 prog = prog_list_prog(pl);
1087 id = prog->aux->id;
1088 if (copy_to_user(prog_ids + i, &id, sizeof(id)))
1089 return -EFAULT;
1090 if (++i == cnt)
1091 break;
1092 }
1093 }
1094
1095 if (prog_attach_flags) {
1096 flags = cgrp->bpf.flags[atype];
1097
1098 for (i = 0; i < cnt; i++)
1099 if (copy_to_user(prog_attach_flags + i, &flags, sizeof(flags)))
1100 return -EFAULT;
1101 prog_attach_flags += cnt;
1102 }
1103
1104 prog_ids += cnt;
1105 total_cnt -= cnt;
1106 }
1107 return ret;
1108 }
1109
1110 static int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
1111 union bpf_attr __user *uattr)
1112 {
1113 int ret;
1114
1115 mutex_lock(&cgroup_mutex);
1116 ret = __cgroup_bpf_query(cgrp, attr, uattr);
1117 mutex_unlock(&cgroup_mutex);
1118 return ret;
1119 }
1120
1121 int cgroup_bpf_prog_attach(const union bpf_attr *attr,
1122 enum bpf_prog_type ptype, struct bpf_prog *prog)
1123 {
1124 struct bpf_prog *replace_prog = NULL;
1125 struct cgroup *cgrp;
1126 int ret;
1127
1128 cgrp = cgroup_get_from_fd(attr->target_fd);
1129 if (IS_ERR(cgrp))
1130 return PTR_ERR(cgrp);
1131
1132 if ((attr->attach_flags & BPF_F_ALLOW_MULTI) &&
1133 (attr->attach_flags & BPF_F_REPLACE)) {
1134 replace_prog = bpf_prog_get_type(attr->replace_bpf_fd, ptype);
1135 if (IS_ERR(replace_prog)) {
1136 cgroup_put(cgrp);
1137 return PTR_ERR(replace_prog);
1138 }
1139 }
1140
1141 ret = cgroup_bpf_attach(cgrp, prog, replace_prog, NULL,
1142 attr->attach_type, attr->attach_flags);
1143
1144 if (replace_prog)
1145 bpf_prog_put(replace_prog);
1146 cgroup_put(cgrp);
1147 return ret;
1148 }
1149
1150 int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
1151 {
1152 struct bpf_prog *prog;
1153 struct cgroup *cgrp;
1154 int ret;
1155
1156 cgrp = cgroup_get_from_fd(attr->target_fd);
1157 if (IS_ERR(cgrp))
1158 return PTR_ERR(cgrp);
1159
1160 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
1161 if (IS_ERR(prog))
1162 prog = NULL;
1163
1164 ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type);
1165 if (prog)
1166 bpf_prog_put(prog);
1167
1168 cgroup_put(cgrp);
1169 return ret;
1170 }
1171
1172 static void bpf_cgroup_link_release(struct bpf_link *link)
1173 {
1174 struct bpf_cgroup_link *cg_link =
1175 container_of(link, struct bpf_cgroup_link, link);
1176 struct cgroup *cg;
1177
1178
1179
1180
1181 if (!cg_link->cgroup)
1182 return;
1183
1184 mutex_lock(&cgroup_mutex);
1185
1186
1187 if (!cg_link->cgroup) {
1188 mutex_unlock(&cgroup_mutex);
1189 return;
1190 }
1191
1192 WARN_ON(__cgroup_bpf_detach(cg_link->cgroup, NULL, cg_link,
1193 cg_link->type));
1194 if (cg_link->type == BPF_LSM_CGROUP)
1195 bpf_trampoline_unlink_cgroup_shim(cg_link->link.prog);
1196
1197 cg = cg_link->cgroup;
1198 cg_link->cgroup = NULL;
1199
1200 mutex_unlock(&cgroup_mutex);
1201
1202 cgroup_put(cg);
1203 }
1204
1205 static void bpf_cgroup_link_dealloc(struct bpf_link *link)
1206 {
1207 struct bpf_cgroup_link *cg_link =
1208 container_of(link, struct bpf_cgroup_link, link);
1209
1210 kfree(cg_link);
1211 }
1212
1213 static int bpf_cgroup_link_detach(struct bpf_link *link)
1214 {
1215 bpf_cgroup_link_release(link);
1216
1217 return 0;
1218 }
1219
1220 static void bpf_cgroup_link_show_fdinfo(const struct bpf_link *link,
1221 struct seq_file *seq)
1222 {
1223 struct bpf_cgroup_link *cg_link =
1224 container_of(link, struct bpf_cgroup_link, link);
1225 u64 cg_id = 0;
1226
1227 mutex_lock(&cgroup_mutex);
1228 if (cg_link->cgroup)
1229 cg_id = cgroup_id(cg_link->cgroup);
1230 mutex_unlock(&cgroup_mutex);
1231
1232 seq_printf(seq,
1233 "cgroup_id:\t%llu\n"
1234 "attach_type:\t%d\n",
1235 cg_id,
1236 cg_link->type);
1237 }
1238
1239 static int bpf_cgroup_link_fill_link_info(const struct bpf_link *link,
1240 struct bpf_link_info *info)
1241 {
1242 struct bpf_cgroup_link *cg_link =
1243 container_of(link, struct bpf_cgroup_link, link);
1244 u64 cg_id = 0;
1245
1246 mutex_lock(&cgroup_mutex);
1247 if (cg_link->cgroup)
1248 cg_id = cgroup_id(cg_link->cgroup);
1249 mutex_unlock(&cgroup_mutex);
1250
1251 info->cgroup.cgroup_id = cg_id;
1252 info->cgroup.attach_type = cg_link->type;
1253 return 0;
1254 }
1255
1256 static const struct bpf_link_ops bpf_cgroup_link_lops = {
1257 .release = bpf_cgroup_link_release,
1258 .dealloc = bpf_cgroup_link_dealloc,
1259 .detach = bpf_cgroup_link_detach,
1260 .update_prog = cgroup_bpf_replace,
1261 .show_fdinfo = bpf_cgroup_link_show_fdinfo,
1262 .fill_link_info = bpf_cgroup_link_fill_link_info,
1263 };
1264
1265 int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
1266 {
1267 struct bpf_link_primer link_primer;
1268 struct bpf_cgroup_link *link;
1269 struct cgroup *cgrp;
1270 int err;
1271
1272 if (attr->link_create.flags)
1273 return -EINVAL;
1274
1275 cgrp = cgroup_get_from_fd(attr->link_create.target_fd);
1276 if (IS_ERR(cgrp))
1277 return PTR_ERR(cgrp);
1278
1279 link = kzalloc(sizeof(*link), GFP_USER);
1280 if (!link) {
1281 err = -ENOMEM;
1282 goto out_put_cgroup;
1283 }
1284 bpf_link_init(&link->link, BPF_LINK_TYPE_CGROUP, &bpf_cgroup_link_lops,
1285 prog);
1286 link->cgroup = cgrp;
1287 link->type = attr->link_create.attach_type;
1288
1289 err = bpf_link_prime(&link->link, &link_primer);
1290 if (err) {
1291 kfree(link);
1292 goto out_put_cgroup;
1293 }
1294
1295 err = cgroup_bpf_attach(cgrp, NULL, NULL, link,
1296 link->type, BPF_F_ALLOW_MULTI);
1297 if (err) {
1298 bpf_link_cleanup(&link_primer);
1299 goto out_put_cgroup;
1300 }
1301
1302 return bpf_link_settle(&link_primer);
1303
1304 out_put_cgroup:
1305 cgroup_put(cgrp);
1306 return err;
1307 }
1308
1309 int cgroup_bpf_prog_query(const union bpf_attr *attr,
1310 union bpf_attr __user *uattr)
1311 {
1312 struct cgroup *cgrp;
1313 int ret;
1314
1315 cgrp = cgroup_get_from_fd(attr->query.target_fd);
1316 if (IS_ERR(cgrp))
1317 return PTR_ERR(cgrp);
1318
1319 ret = cgroup_bpf_query(cgrp, attr, uattr);
1320
1321 cgroup_put(cgrp);
1322 return ret;
1323 }
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348 int __cgroup_bpf_run_filter_skb(struct sock *sk,
1349 struct sk_buff *skb,
1350 enum cgroup_bpf_attach_type atype)
1351 {
1352 unsigned int offset = skb->data - skb_network_header(skb);
1353 struct sock *save_sk;
1354 void *saved_data_end;
1355 struct cgroup *cgrp;
1356 int ret;
1357
1358 if (!sk || !sk_fullsock(sk))
1359 return 0;
1360
1361 if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
1362 return 0;
1363
1364 cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
1365 save_sk = skb->sk;
1366 skb->sk = sk;
1367 __skb_push(skb, offset);
1368
1369
1370 bpf_compute_and_save_data_end(skb, &saved_data_end);
1371
1372 if (atype == CGROUP_INET_EGRESS) {
1373 u32 flags = 0;
1374 bool cn;
1375
1376 ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, skb,
1377 __bpf_prog_run_save_cb, 0, &flags);
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394 cn = flags & BPF_RET_SET_CN;
1395 if (ret && !IS_ERR_VALUE((long)ret))
1396 ret = -EFAULT;
1397 if (!ret)
1398 ret = (cn ? NET_XMIT_CN : NET_XMIT_SUCCESS);
1399 else
1400 ret = (cn ? NET_XMIT_DROP : ret);
1401 } else {
1402 ret = bpf_prog_run_array_cg(&cgrp->bpf, atype,
1403 skb, __bpf_prog_run_save_cb, 0,
1404 NULL);
1405 if (ret && !IS_ERR_VALUE((long)ret))
1406 ret = -EFAULT;
1407 }
1408 bpf_restore_data_end(skb, saved_data_end);
1409 __skb_pull(skb, offset);
1410 skb->sk = save_sk;
1411
1412 return ret;
1413 }
1414 EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429 int __cgroup_bpf_run_filter_sk(struct sock *sk,
1430 enum cgroup_bpf_attach_type atype)
1431 {
1432 struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
1433
1434 return bpf_prog_run_array_cg(&cgrp->bpf, atype, sk, bpf_prog_run, 0,
1435 NULL);
1436 }
1437 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454 int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
1455 struct sockaddr *uaddr,
1456 enum cgroup_bpf_attach_type atype,
1457 void *t_ctx,
1458 u32 *flags)
1459 {
1460 struct bpf_sock_addr_kern ctx = {
1461 .sk = sk,
1462 .uaddr = uaddr,
1463 .t_ctx = t_ctx,
1464 };
1465 struct sockaddr_storage unspec;
1466 struct cgroup *cgrp;
1467
1468
1469
1470
1471 if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
1472 return 0;
1473
1474 if (!ctx.uaddr) {
1475 memset(&unspec, 0, sizeof(unspec));
1476 ctx.uaddr = (struct sockaddr *)&unspec;
1477 }
1478
1479 cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
1480 return bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run,
1481 0, flags);
1482 }
1483 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501 int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
1502 struct bpf_sock_ops_kern *sock_ops,
1503 enum cgroup_bpf_attach_type atype)
1504 {
1505 struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
1506
1507 return bpf_prog_run_array_cg(&cgrp->bpf, atype, sock_ops, bpf_prog_run,
1508 0, NULL);
1509 }
1510 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
1511
1512 int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
1513 short access, enum cgroup_bpf_attach_type atype)
1514 {
1515 struct cgroup *cgrp;
1516 struct bpf_cgroup_dev_ctx ctx = {
1517 .access_type = (access << 16) | dev_type,
1518 .major = major,
1519 .minor = minor,
1520 };
1521 int ret;
1522
1523 rcu_read_lock();
1524 cgrp = task_dfl_cgroup(current);
1525 ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run, 0,
1526 NULL);
1527 rcu_read_unlock();
1528
1529 return ret;
1530 }
1531
1532 BPF_CALL_0(bpf_get_retval)
1533 {
1534 struct bpf_cg_run_ctx *ctx =
1535 container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx);
1536
1537 return ctx->retval;
1538 }
1539
1540 const struct bpf_func_proto bpf_get_retval_proto = {
1541 .func = bpf_get_retval,
1542 .gpl_only = false,
1543 .ret_type = RET_INTEGER,
1544 };
1545
1546 BPF_CALL_1(bpf_set_retval, int, retval)
1547 {
1548 struct bpf_cg_run_ctx *ctx =
1549 container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx);
1550
1551 ctx->retval = retval;
1552 return 0;
1553 }
1554
1555 const struct bpf_func_proto bpf_set_retval_proto = {
1556 .func = bpf_set_retval,
1557 .gpl_only = false,
1558 .ret_type = RET_INTEGER,
1559 .arg1_type = ARG_ANYTHING,
1560 };
1561
1562 static const struct bpf_func_proto *
1563 cgroup_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
1564 {
1565 switch (func_id) {
1566 case BPF_FUNC_get_current_uid_gid:
1567 return &bpf_get_current_uid_gid_proto;
1568 case BPF_FUNC_get_local_storage:
1569 return &bpf_get_local_storage_proto;
1570 case BPF_FUNC_get_current_cgroup_id:
1571 return &bpf_get_current_cgroup_id_proto;
1572 case BPF_FUNC_perf_event_output:
1573 return &bpf_event_output_data_proto;
1574 case BPF_FUNC_get_retval:
1575 return &bpf_get_retval_proto;
1576 case BPF_FUNC_set_retval:
1577 return &bpf_set_retval_proto;
1578 default:
1579 return bpf_base_func_proto(func_id);
1580 }
1581 }
1582
1583 static const struct bpf_func_proto *
1584 cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
1585 {
1586 return cgroup_base_func_proto(func_id, prog);
1587 }
1588
1589 static bool cgroup_dev_is_valid_access(int off, int size,
1590 enum bpf_access_type type,
1591 const struct bpf_prog *prog,
1592 struct bpf_insn_access_aux *info)
1593 {
1594 const int size_default = sizeof(__u32);
1595
1596 if (type == BPF_WRITE)
1597 return false;
1598
1599 if (off < 0 || off + size > sizeof(struct bpf_cgroup_dev_ctx))
1600 return false;
1601
1602 if (off % size != 0)
1603 return false;
1604
1605 switch (off) {
1606 case bpf_ctx_range(struct bpf_cgroup_dev_ctx, access_type):
1607 bpf_ctx_record_field_size(info, size_default);
1608 if (!bpf_ctx_narrow_access_ok(off, size, size_default))
1609 return false;
1610 break;
1611 default:
1612 if (size != size_default)
1613 return false;
1614 }
1615
1616 return true;
1617 }
1618
1619 const struct bpf_prog_ops cg_dev_prog_ops = {
1620 };
1621
1622 const struct bpf_verifier_ops cg_dev_verifier_ops = {
1623 .get_func_proto = cgroup_dev_func_proto,
1624 .is_valid_access = cgroup_dev_is_valid_access,
1625 };
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648 int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
1649 struct ctl_table *table, int write,
1650 char **buf, size_t *pcount, loff_t *ppos,
1651 enum cgroup_bpf_attach_type atype)
1652 {
1653 struct bpf_sysctl_kern ctx = {
1654 .head = head,
1655 .table = table,
1656 .write = write,
1657 .ppos = ppos,
1658 .cur_val = NULL,
1659 .cur_len = PAGE_SIZE,
1660 .new_val = NULL,
1661 .new_len = 0,
1662 .new_updated = 0,
1663 };
1664 struct cgroup *cgrp;
1665 loff_t pos = 0;
1666 int ret;
1667
1668 ctx.cur_val = kmalloc_track_caller(ctx.cur_len, GFP_KERNEL);
1669 if (!ctx.cur_val ||
1670 table->proc_handler(table, 0, ctx.cur_val, &ctx.cur_len, &pos)) {
1671
1672 ctx.cur_len = 0;
1673 }
1674
1675 if (write && *buf && *pcount) {
1676
1677
1678
1679 ctx.new_val = kmalloc_track_caller(PAGE_SIZE, GFP_KERNEL);
1680 ctx.new_len = min_t(size_t, PAGE_SIZE, *pcount);
1681 if (ctx.new_val) {
1682 memcpy(ctx.new_val, *buf, ctx.new_len);
1683 } else {
1684
1685 ctx.new_len = 0;
1686 }
1687 }
1688
1689 rcu_read_lock();
1690 cgrp = task_dfl_cgroup(current);
1691 ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run, 0,
1692 NULL);
1693 rcu_read_unlock();
1694
1695 kfree(ctx.cur_val);
1696
1697 if (ret == 1 && ctx.new_updated) {
1698 kfree(*buf);
1699 *buf = ctx.new_val;
1700 *pcount = ctx.new_len;
1701 } else {
1702 kfree(ctx.new_val);
1703 }
1704
1705 return ret;
1706 }
1707
1708 #ifdef CONFIG_NET
1709 static int sockopt_alloc_buf(struct bpf_sockopt_kern *ctx, int max_optlen,
1710 struct bpf_sockopt_buf *buf)
1711 {
1712 if (unlikely(max_optlen < 0))
1713 return -EINVAL;
1714
1715 if (unlikely(max_optlen > PAGE_SIZE)) {
1716
1717
1718
1719 max_optlen = PAGE_SIZE;
1720 }
1721
1722 if (max_optlen <= sizeof(buf->data)) {
1723
1724
1725
1726 ctx->optval = buf->data;
1727 ctx->optval_end = ctx->optval + max_optlen;
1728 return max_optlen;
1729 }
1730
1731 ctx->optval = kzalloc(max_optlen, GFP_USER);
1732 if (!ctx->optval)
1733 return -ENOMEM;
1734
1735 ctx->optval_end = ctx->optval + max_optlen;
1736
1737 return max_optlen;
1738 }
1739
1740 static void sockopt_free_buf(struct bpf_sockopt_kern *ctx,
1741 struct bpf_sockopt_buf *buf)
1742 {
1743 if (ctx->optval == buf->data)
1744 return;
1745 kfree(ctx->optval);
1746 }
1747
1748 static bool sockopt_buf_allocated(struct bpf_sockopt_kern *ctx,
1749 struct bpf_sockopt_buf *buf)
1750 {
1751 return ctx->optval != buf->data;
1752 }
1753
1754 int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
1755 int *optname, char __user *optval,
1756 int *optlen, char **kernel_optval)
1757 {
1758 struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
1759 struct bpf_sockopt_buf buf = {};
1760 struct bpf_sockopt_kern ctx = {
1761 .sk = sk,
1762 .level = *level,
1763 .optname = *optname,
1764 };
1765 int ret, max_optlen;
1766
1767
1768
1769
1770
1771 max_optlen = max_t(int, 16, *optlen);
1772 max_optlen = sockopt_alloc_buf(&ctx, max_optlen, &buf);
1773 if (max_optlen < 0)
1774 return max_optlen;
1775
1776 ctx.optlen = *optlen;
1777
1778 if (copy_from_user(ctx.optval, optval, min(*optlen, max_optlen)) != 0) {
1779 ret = -EFAULT;
1780 goto out;
1781 }
1782
1783 lock_sock(sk);
1784 ret = bpf_prog_run_array_cg(&cgrp->bpf, CGROUP_SETSOCKOPT,
1785 &ctx, bpf_prog_run, 0, NULL);
1786 release_sock(sk);
1787
1788 if (ret)
1789 goto out;
1790
1791 if (ctx.optlen == -1) {
1792
1793 ret = 1;
1794 } else if (ctx.optlen > max_optlen || ctx.optlen < -1) {
1795
1796 ret = -EFAULT;
1797 } else {
1798
1799 ret = 0;
1800
1801
1802 *level = ctx.level;
1803 *optname = ctx.optname;
1804
1805
1806
1807
1808 if (ctx.optlen != 0) {
1809 *optlen = ctx.optlen;
1810
1811
1812
1813
1814
1815
1816 if (!sockopt_buf_allocated(&ctx, &buf)) {
1817 void *p = kmalloc(ctx.optlen, GFP_USER);
1818
1819 if (!p) {
1820 ret = -ENOMEM;
1821 goto out;
1822 }
1823 memcpy(p, ctx.optval, ctx.optlen);
1824 *kernel_optval = p;
1825 } else {
1826 *kernel_optval = ctx.optval;
1827 }
1828
1829 return 0;
1830 }
1831 }
1832
1833 out:
1834 sockopt_free_buf(&ctx, &buf);
1835 return ret;
1836 }
1837
1838 int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
1839 int optname, char __user *optval,
1840 int __user *optlen, int max_optlen,
1841 int retval)
1842 {
1843 struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
1844 struct bpf_sockopt_buf buf = {};
1845 struct bpf_sockopt_kern ctx = {
1846 .sk = sk,
1847 .level = level,
1848 .optname = optname,
1849 .current_task = current,
1850 };
1851 int ret;
1852
1853 ctx.optlen = max_optlen;
1854 max_optlen = sockopt_alloc_buf(&ctx, max_optlen, &buf);
1855 if (max_optlen < 0)
1856 return max_optlen;
1857
1858 if (!retval) {
1859
1860
1861
1862
1863
1864
1865
1866 if (get_user(ctx.optlen, optlen)) {
1867 ret = -EFAULT;
1868 goto out;
1869 }
1870
1871 if (ctx.optlen < 0) {
1872 ret = -EFAULT;
1873 goto out;
1874 }
1875
1876 if (copy_from_user(ctx.optval, optval,
1877 min(ctx.optlen, max_optlen)) != 0) {
1878 ret = -EFAULT;
1879 goto out;
1880 }
1881 }
1882
1883 lock_sock(sk);
1884 ret = bpf_prog_run_array_cg(&cgrp->bpf, CGROUP_GETSOCKOPT,
1885 &ctx, bpf_prog_run, retval, NULL);
1886 release_sock(sk);
1887
1888 if (ret < 0)
1889 goto out;
1890
1891 if (ctx.optlen > max_optlen || ctx.optlen < 0) {
1892 ret = -EFAULT;
1893 goto out;
1894 }
1895
1896 if (ctx.optlen != 0) {
1897 if (copy_to_user(optval, ctx.optval, ctx.optlen) ||
1898 put_user(ctx.optlen, optlen)) {
1899 ret = -EFAULT;
1900 goto out;
1901 }
1902 }
1903
1904 out:
1905 sockopt_free_buf(&ctx, &buf);
1906 return ret;
1907 }
1908
1909 int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
1910 int optname, void *optval,
1911 int *optlen, int retval)
1912 {
1913 struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
1914 struct bpf_sockopt_kern ctx = {
1915 .sk = sk,
1916 .level = level,
1917 .optname = optname,
1918 .optlen = *optlen,
1919 .optval = optval,
1920 .optval_end = optval + *optlen,
1921 .current_task = current,
1922 };
1923 int ret;
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933 ret = bpf_prog_run_array_cg(&cgrp->bpf, CGROUP_GETSOCKOPT,
1934 &ctx, bpf_prog_run, retval, NULL);
1935 if (ret < 0)
1936 return ret;
1937
1938 if (ctx.optlen > *optlen)
1939 return -EFAULT;
1940
1941
1942
1943 if (ctx.optlen != 0)
1944 *optlen = ctx.optlen;
1945
1946 return ret;
1947 }
1948 #endif
1949
1950 static ssize_t sysctl_cpy_dir(const struct ctl_dir *dir, char **bufp,
1951 size_t *lenp)
1952 {
1953 ssize_t tmp_ret = 0, ret;
1954
1955 if (dir->header.parent) {
1956 tmp_ret = sysctl_cpy_dir(dir->header.parent, bufp, lenp);
1957 if (tmp_ret < 0)
1958 return tmp_ret;
1959 }
1960
1961 ret = strscpy(*bufp, dir->header.ctl_table[0].procname, *lenp);
1962 if (ret < 0)
1963 return ret;
1964 *bufp += ret;
1965 *lenp -= ret;
1966 ret += tmp_ret;
1967
1968
1969 if (!ret)
1970 return ret;
1971
1972 tmp_ret = strscpy(*bufp, "/", *lenp);
1973 if (tmp_ret < 0)
1974 return tmp_ret;
1975 *bufp += tmp_ret;
1976 *lenp -= tmp_ret;
1977
1978 return ret + tmp_ret;
1979 }
1980
1981 BPF_CALL_4(bpf_sysctl_get_name, struct bpf_sysctl_kern *, ctx, char *, buf,
1982 size_t, buf_len, u64, flags)
1983 {
1984 ssize_t tmp_ret = 0, ret;
1985
1986 if (!buf)
1987 return -EINVAL;
1988
1989 if (!(flags & BPF_F_SYSCTL_BASE_NAME)) {
1990 if (!ctx->head)
1991 return -EINVAL;
1992 tmp_ret = sysctl_cpy_dir(ctx->head->parent, &buf, &buf_len);
1993 if (tmp_ret < 0)
1994 return tmp_ret;
1995 }
1996
1997 ret = strscpy(buf, ctx->table->procname, buf_len);
1998
1999 return ret < 0 ? ret : tmp_ret + ret;
2000 }
2001
2002 static const struct bpf_func_proto bpf_sysctl_get_name_proto = {
2003 .func = bpf_sysctl_get_name,
2004 .gpl_only = false,
2005 .ret_type = RET_INTEGER,
2006 .arg1_type = ARG_PTR_TO_CTX,
2007 .arg2_type = ARG_PTR_TO_MEM,
2008 .arg3_type = ARG_CONST_SIZE,
2009 .arg4_type = ARG_ANYTHING,
2010 };
2011
2012 static int copy_sysctl_value(char *dst, size_t dst_len, char *src,
2013 size_t src_len)
2014 {
2015 if (!dst)
2016 return -EINVAL;
2017
2018 if (!dst_len)
2019 return -E2BIG;
2020
2021 if (!src || !src_len) {
2022 memset(dst, 0, dst_len);
2023 return -EINVAL;
2024 }
2025
2026 memcpy(dst, src, min(dst_len, src_len));
2027
2028 if (dst_len > src_len) {
2029 memset(dst + src_len, '\0', dst_len - src_len);
2030 return src_len;
2031 }
2032
2033 dst[dst_len - 1] = '\0';
2034
2035 return -E2BIG;
2036 }
2037
2038 BPF_CALL_3(bpf_sysctl_get_current_value, struct bpf_sysctl_kern *, ctx,
2039 char *, buf, size_t, buf_len)
2040 {
2041 return copy_sysctl_value(buf, buf_len, ctx->cur_val, ctx->cur_len);
2042 }
2043
2044 static const struct bpf_func_proto bpf_sysctl_get_current_value_proto = {
2045 .func = bpf_sysctl_get_current_value,
2046 .gpl_only = false,
2047 .ret_type = RET_INTEGER,
2048 .arg1_type = ARG_PTR_TO_CTX,
2049 .arg2_type = ARG_PTR_TO_UNINIT_MEM,
2050 .arg3_type = ARG_CONST_SIZE,
2051 };
2052
2053 BPF_CALL_3(bpf_sysctl_get_new_value, struct bpf_sysctl_kern *, ctx, char *, buf,
2054 size_t, buf_len)
2055 {
2056 if (!ctx->write) {
2057 if (buf && buf_len)
2058 memset(buf, '\0', buf_len);
2059 return -EINVAL;
2060 }
2061 return copy_sysctl_value(buf, buf_len, ctx->new_val, ctx->new_len);
2062 }
2063
2064 static const struct bpf_func_proto bpf_sysctl_get_new_value_proto = {
2065 .func = bpf_sysctl_get_new_value,
2066 .gpl_only = false,
2067 .ret_type = RET_INTEGER,
2068 .arg1_type = ARG_PTR_TO_CTX,
2069 .arg2_type = ARG_PTR_TO_UNINIT_MEM,
2070 .arg3_type = ARG_CONST_SIZE,
2071 };
2072
2073 BPF_CALL_3(bpf_sysctl_set_new_value, struct bpf_sysctl_kern *, ctx,
2074 const char *, buf, size_t, buf_len)
2075 {
2076 if (!ctx->write || !ctx->new_val || !ctx->new_len || !buf || !buf_len)
2077 return -EINVAL;
2078
2079 if (buf_len > PAGE_SIZE - 1)
2080 return -E2BIG;
2081
2082 memcpy(ctx->new_val, buf, buf_len);
2083 ctx->new_len = buf_len;
2084 ctx->new_updated = 1;
2085
2086 return 0;
2087 }
2088
2089 static const struct bpf_func_proto bpf_sysctl_set_new_value_proto = {
2090 .func = bpf_sysctl_set_new_value,
2091 .gpl_only = false,
2092 .ret_type = RET_INTEGER,
2093 .arg1_type = ARG_PTR_TO_CTX,
2094 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
2095 .arg3_type = ARG_CONST_SIZE,
2096 };
2097
2098 static const struct bpf_func_proto *
2099 sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
2100 {
2101 switch (func_id) {
2102 case BPF_FUNC_strtol:
2103 return &bpf_strtol_proto;
2104 case BPF_FUNC_strtoul:
2105 return &bpf_strtoul_proto;
2106 case BPF_FUNC_sysctl_get_name:
2107 return &bpf_sysctl_get_name_proto;
2108 case BPF_FUNC_sysctl_get_current_value:
2109 return &bpf_sysctl_get_current_value_proto;
2110 case BPF_FUNC_sysctl_get_new_value:
2111 return &bpf_sysctl_get_new_value_proto;
2112 case BPF_FUNC_sysctl_set_new_value:
2113 return &bpf_sysctl_set_new_value_proto;
2114 case BPF_FUNC_ktime_get_coarse_ns:
2115 return &bpf_ktime_get_coarse_ns_proto;
2116 default:
2117 return cgroup_base_func_proto(func_id, prog);
2118 }
2119 }
2120
2121 static bool sysctl_is_valid_access(int off, int size, enum bpf_access_type type,
2122 const struct bpf_prog *prog,
2123 struct bpf_insn_access_aux *info)
2124 {
2125 const int size_default = sizeof(__u32);
2126
2127 if (off < 0 || off + size > sizeof(struct bpf_sysctl) || off % size)
2128 return false;
2129
2130 switch (off) {
2131 case bpf_ctx_range(struct bpf_sysctl, write):
2132 if (type != BPF_READ)
2133 return false;
2134 bpf_ctx_record_field_size(info, size_default);
2135 return bpf_ctx_narrow_access_ok(off, size, size_default);
2136 case bpf_ctx_range(struct bpf_sysctl, file_pos):
2137 if (type == BPF_READ) {
2138 bpf_ctx_record_field_size(info, size_default);
2139 return bpf_ctx_narrow_access_ok(off, size, size_default);
2140 } else {
2141 return size == size_default;
2142 }
2143 default:
2144 return false;
2145 }
2146 }
2147
2148 static u32 sysctl_convert_ctx_access(enum bpf_access_type type,
2149 const struct bpf_insn *si,
2150 struct bpf_insn *insn_buf,
2151 struct bpf_prog *prog, u32 *target_size)
2152 {
2153 struct bpf_insn *insn = insn_buf;
2154 u32 read_size;
2155
2156 switch (si->off) {
2157 case offsetof(struct bpf_sysctl, write):
2158 *insn++ = BPF_LDX_MEM(
2159 BPF_SIZE(si->code), si->dst_reg, si->src_reg,
2160 bpf_target_off(struct bpf_sysctl_kern, write,
2161 sizeof_field(struct bpf_sysctl_kern,
2162 write),
2163 target_size));
2164 break;
2165 case offsetof(struct bpf_sysctl, file_pos):
2166
2167
2168
2169
2170
2171 if (type == BPF_WRITE) {
2172 int treg = BPF_REG_9;
2173
2174 if (si->src_reg == treg || si->dst_reg == treg)
2175 --treg;
2176 if (si->src_reg == treg || si->dst_reg == treg)
2177 --treg;
2178 *insn++ = BPF_STX_MEM(
2179 BPF_DW, si->dst_reg, treg,
2180 offsetof(struct bpf_sysctl_kern, tmp_reg));
2181 *insn++ = BPF_LDX_MEM(
2182 BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos),
2183 treg, si->dst_reg,
2184 offsetof(struct bpf_sysctl_kern, ppos));
2185 *insn++ = BPF_STX_MEM(
2186 BPF_SIZEOF(u32), treg, si->src_reg,
2187 bpf_ctx_narrow_access_offset(
2188 0, sizeof(u32), sizeof(loff_t)));
2189 *insn++ = BPF_LDX_MEM(
2190 BPF_DW, treg, si->dst_reg,
2191 offsetof(struct bpf_sysctl_kern, tmp_reg));
2192 } else {
2193 *insn++ = BPF_LDX_MEM(
2194 BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos),
2195 si->dst_reg, si->src_reg,
2196 offsetof(struct bpf_sysctl_kern, ppos));
2197 read_size = bpf_size_to_bytes(BPF_SIZE(si->code));
2198 *insn++ = BPF_LDX_MEM(
2199 BPF_SIZE(si->code), si->dst_reg, si->dst_reg,
2200 bpf_ctx_narrow_access_offset(
2201 0, read_size, sizeof(loff_t)));
2202 }
2203 *target_size = sizeof(u32);
2204 break;
2205 }
2206
2207 return insn - insn_buf;
2208 }
2209
2210 const struct bpf_verifier_ops cg_sysctl_verifier_ops = {
2211 .get_func_proto = sysctl_func_proto,
2212 .is_valid_access = sysctl_is_valid_access,
2213 .convert_ctx_access = sysctl_convert_ctx_access,
2214 };
2215
2216 const struct bpf_prog_ops cg_sysctl_prog_ops = {
2217 };
2218
2219 #ifdef CONFIG_NET
2220 BPF_CALL_1(bpf_get_netns_cookie_sockopt, struct bpf_sockopt_kern *, ctx)
2221 {
2222 const struct net *net = ctx ? sock_net(ctx->sk) : &init_net;
2223
2224 return net->net_cookie;
2225 }
2226
2227 static const struct bpf_func_proto bpf_get_netns_cookie_sockopt_proto = {
2228 .func = bpf_get_netns_cookie_sockopt,
2229 .gpl_only = false,
2230 .ret_type = RET_INTEGER,
2231 .arg1_type = ARG_PTR_TO_CTX_OR_NULL,
2232 };
2233 #endif
2234
2235 static const struct bpf_func_proto *
2236 cg_sockopt_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
2237 {
2238 switch (func_id) {
2239 #ifdef CONFIG_NET
2240 case BPF_FUNC_get_netns_cookie:
2241 return &bpf_get_netns_cookie_sockopt_proto;
2242 case BPF_FUNC_sk_storage_get:
2243 return &bpf_sk_storage_get_proto;
2244 case BPF_FUNC_sk_storage_delete:
2245 return &bpf_sk_storage_delete_proto;
2246 case BPF_FUNC_setsockopt:
2247 if (prog->expected_attach_type == BPF_CGROUP_SETSOCKOPT)
2248 return &bpf_sk_setsockopt_proto;
2249 return NULL;
2250 case BPF_FUNC_getsockopt:
2251 if (prog->expected_attach_type == BPF_CGROUP_SETSOCKOPT)
2252 return &bpf_sk_getsockopt_proto;
2253 return NULL;
2254 #endif
2255 #ifdef CONFIG_INET
2256 case BPF_FUNC_tcp_sock:
2257 return &bpf_tcp_sock_proto;
2258 #endif
2259 default:
2260 return cgroup_base_func_proto(func_id, prog);
2261 }
2262 }
2263
2264 static bool cg_sockopt_is_valid_access(int off, int size,
2265 enum bpf_access_type type,
2266 const struct bpf_prog *prog,
2267 struct bpf_insn_access_aux *info)
2268 {
2269 const int size_default = sizeof(__u32);
2270
2271 if (off < 0 || off >= sizeof(struct bpf_sockopt))
2272 return false;
2273
2274 if (off % size != 0)
2275 return false;
2276
2277 if (type == BPF_WRITE) {
2278 switch (off) {
2279 case offsetof(struct bpf_sockopt, retval):
2280 if (size != size_default)
2281 return false;
2282 return prog->expected_attach_type ==
2283 BPF_CGROUP_GETSOCKOPT;
2284 case offsetof(struct bpf_sockopt, optname):
2285 fallthrough;
2286 case offsetof(struct bpf_sockopt, level):
2287 if (size != size_default)
2288 return false;
2289 return prog->expected_attach_type ==
2290 BPF_CGROUP_SETSOCKOPT;
2291 case offsetof(struct bpf_sockopt, optlen):
2292 return size == size_default;
2293 default:
2294 return false;
2295 }
2296 }
2297
2298 switch (off) {
2299 case offsetof(struct bpf_sockopt, sk):
2300 if (size != sizeof(__u64))
2301 return false;
2302 info->reg_type = PTR_TO_SOCKET;
2303 break;
2304 case offsetof(struct bpf_sockopt, optval):
2305 if (size != sizeof(__u64))
2306 return false;
2307 info->reg_type = PTR_TO_PACKET;
2308 break;
2309 case offsetof(struct bpf_sockopt, optval_end):
2310 if (size != sizeof(__u64))
2311 return false;
2312 info->reg_type = PTR_TO_PACKET_END;
2313 break;
2314 case offsetof(struct bpf_sockopt, retval):
2315 if (size != size_default)
2316 return false;
2317 return prog->expected_attach_type == BPF_CGROUP_GETSOCKOPT;
2318 default:
2319 if (size != size_default)
2320 return false;
2321 break;
2322 }
2323 return true;
2324 }
2325
2326 #define CG_SOCKOPT_ACCESS_FIELD(T, F) \
2327 T(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, F), \
2328 si->dst_reg, si->src_reg, \
2329 offsetof(struct bpf_sockopt_kern, F))
2330
2331 static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type,
2332 const struct bpf_insn *si,
2333 struct bpf_insn *insn_buf,
2334 struct bpf_prog *prog,
2335 u32 *target_size)
2336 {
2337 struct bpf_insn *insn = insn_buf;
2338
2339 switch (si->off) {
2340 case offsetof(struct bpf_sockopt, sk):
2341 *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, sk);
2342 break;
2343 case offsetof(struct bpf_sockopt, level):
2344 if (type == BPF_WRITE)
2345 *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, level);
2346 else
2347 *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, level);
2348 break;
2349 case offsetof(struct bpf_sockopt, optname):
2350 if (type == BPF_WRITE)
2351 *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, optname);
2352 else
2353 *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optname);
2354 break;
2355 case offsetof(struct bpf_sockopt, optlen):
2356 if (type == BPF_WRITE)
2357 *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, optlen);
2358 else
2359 *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optlen);
2360 break;
2361 case offsetof(struct bpf_sockopt, retval):
2362 BUILD_BUG_ON(offsetof(struct bpf_cg_run_ctx, run_ctx) != 0);
2363
2364 if (type == BPF_WRITE) {
2365 int treg = BPF_REG_9;
2366
2367 if (si->src_reg == treg || si->dst_reg == treg)
2368 --treg;
2369 if (si->src_reg == treg || si->dst_reg == treg)
2370 --treg;
2371 *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, treg,
2372 offsetof(struct bpf_sockopt_kern, tmp_reg));
2373 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task),
2374 treg, si->dst_reg,
2375 offsetof(struct bpf_sockopt_kern, current_task));
2376 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx),
2377 treg, treg,
2378 offsetof(struct task_struct, bpf_ctx));
2379 *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval),
2380 treg, si->src_reg,
2381 offsetof(struct bpf_cg_run_ctx, retval));
2382 *insn++ = BPF_LDX_MEM(BPF_DW, treg, si->dst_reg,
2383 offsetof(struct bpf_sockopt_kern, tmp_reg));
2384 } else {
2385 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task),
2386 si->dst_reg, si->src_reg,
2387 offsetof(struct bpf_sockopt_kern, current_task));
2388 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx),
2389 si->dst_reg, si->dst_reg,
2390 offsetof(struct task_struct, bpf_ctx));
2391 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval),
2392 si->dst_reg, si->dst_reg,
2393 offsetof(struct bpf_cg_run_ctx, retval));
2394 }
2395 break;
2396 case offsetof(struct bpf_sockopt, optval):
2397 *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval);
2398 break;
2399 case offsetof(struct bpf_sockopt, optval_end):
2400 *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval_end);
2401 break;
2402 }
2403
2404 return insn - insn_buf;
2405 }
2406
2407 static int cg_sockopt_get_prologue(struct bpf_insn *insn_buf,
2408 bool direct_write,
2409 const struct bpf_prog *prog)
2410 {
2411
2412
2413 return 0;
2414 }
2415
2416 const struct bpf_verifier_ops cg_sockopt_verifier_ops = {
2417 .get_func_proto = cg_sockopt_func_proto,
2418 .is_valid_access = cg_sockopt_is_valid_access,
2419 .convert_ctx_access = cg_sockopt_convert_ctx_access,
2420 .gen_prologue = cg_sockopt_get_prologue,
2421 };
2422
2423 const struct bpf_prog_ops cg_sockopt_prog_ops = {
2424 };