0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011 #include <linux/bitops.h>
0012 #include <linux/slab.h>
0013 #include <linux/seq_file.h>
0014 #include <linux/cgroup.h>
0015 #include <linux/parser.h>
0016 #include <linux/cgroup_rdma.h>
0017
0018 #define RDMACG_MAX_STR "max"
0019
0020
0021
0022
0023
0024 static DEFINE_MUTEX(rdmacg_mutex);
0025 static LIST_HEAD(rdmacg_devices);
0026
0027 enum rdmacg_file_type {
0028 RDMACG_RESOURCE_TYPE_MAX,
0029 RDMACG_RESOURCE_TYPE_STAT,
0030 };
0031
0032
0033
0034
0035
0036
0037 static char const *rdmacg_resource_names[] = {
0038 [RDMACG_RESOURCE_HCA_HANDLE] = "hca_handle",
0039 [RDMACG_RESOURCE_HCA_OBJECT] = "hca_object",
0040 };
0041
0042
0043 struct rdmacg_resource {
0044 int max;
0045 int usage;
0046 };
0047
0048
0049
0050
0051
0052
0053
0054 struct rdmacg_resource_pool {
0055 struct rdmacg_device *device;
0056 struct rdmacg_resource resources[RDMACG_RESOURCE_MAX];
0057
0058 struct list_head cg_node;
0059 struct list_head dev_node;
0060
0061
0062 u64 usage_sum;
0063
0064 int num_max_cnt;
0065 };
0066
0067 static struct rdma_cgroup *css_rdmacg(struct cgroup_subsys_state *css)
0068 {
0069 return container_of(css, struct rdma_cgroup, css);
0070 }
0071
0072 static struct rdma_cgroup *parent_rdmacg(struct rdma_cgroup *cg)
0073 {
0074 return css_rdmacg(cg->css.parent);
0075 }
0076
0077 static inline struct rdma_cgroup *get_current_rdmacg(void)
0078 {
0079 return css_rdmacg(task_get_css(current, rdma_cgrp_id));
0080 }
0081
0082 static void set_resource_limit(struct rdmacg_resource_pool *rpool,
0083 int index, int new_max)
0084 {
0085 if (new_max == S32_MAX) {
0086 if (rpool->resources[index].max != S32_MAX)
0087 rpool->num_max_cnt++;
0088 } else {
0089 if (rpool->resources[index].max == S32_MAX)
0090 rpool->num_max_cnt--;
0091 }
0092 rpool->resources[index].max = new_max;
0093 }
0094
0095 static void set_all_resource_max_limit(struct rdmacg_resource_pool *rpool)
0096 {
0097 int i;
0098
0099 for (i = 0; i < RDMACG_RESOURCE_MAX; i++)
0100 set_resource_limit(rpool, i, S32_MAX);
0101 }
0102
0103 static void free_cg_rpool_locked(struct rdmacg_resource_pool *rpool)
0104 {
0105 lockdep_assert_held(&rdmacg_mutex);
0106
0107 list_del(&rpool->cg_node);
0108 list_del(&rpool->dev_node);
0109 kfree(rpool);
0110 }
0111
0112 static struct rdmacg_resource_pool *
0113 find_cg_rpool_locked(struct rdma_cgroup *cg,
0114 struct rdmacg_device *device)
0115
0116 {
0117 struct rdmacg_resource_pool *pool;
0118
0119 lockdep_assert_held(&rdmacg_mutex);
0120
0121 list_for_each_entry(pool, &cg->rpools, cg_node)
0122 if (pool->device == device)
0123 return pool;
0124
0125 return NULL;
0126 }
0127
0128 static struct rdmacg_resource_pool *
0129 get_cg_rpool_locked(struct rdma_cgroup *cg, struct rdmacg_device *device)
0130 {
0131 struct rdmacg_resource_pool *rpool;
0132
0133 rpool = find_cg_rpool_locked(cg, device);
0134 if (rpool)
0135 return rpool;
0136
0137 rpool = kzalloc(sizeof(*rpool), GFP_KERNEL);
0138 if (!rpool)
0139 return ERR_PTR(-ENOMEM);
0140
0141 rpool->device = device;
0142 set_all_resource_max_limit(rpool);
0143
0144 INIT_LIST_HEAD(&rpool->cg_node);
0145 INIT_LIST_HEAD(&rpool->dev_node);
0146 list_add_tail(&rpool->cg_node, &cg->rpools);
0147 list_add_tail(&rpool->dev_node, &device->rpools);
0148 return rpool;
0149 }
0150
0151
0152
0153
0154
0155
0156
0157
0158
0159
0160
0161 static void
0162 uncharge_cg_locked(struct rdma_cgroup *cg,
0163 struct rdmacg_device *device,
0164 enum rdmacg_resource_type index)
0165 {
0166 struct rdmacg_resource_pool *rpool;
0167
0168 rpool = find_cg_rpool_locked(cg, device);
0169
0170
0171
0172
0173
0174
0175 if (unlikely(!rpool)) {
0176 pr_warn("Invalid device %p or rdma cgroup %p\n", cg, device);
0177 return;
0178 }
0179
0180 rpool->resources[index].usage--;
0181
0182
0183
0184
0185
0186 WARN_ON_ONCE(rpool->resources[index].usage < 0);
0187 rpool->usage_sum--;
0188 if (rpool->usage_sum == 0 &&
0189 rpool->num_max_cnt == RDMACG_RESOURCE_MAX) {
0190
0191
0192
0193
0194 free_cg_rpool_locked(rpool);
0195 }
0196 }
0197
0198
0199
0200
0201
0202
0203
0204
0205 static void rdmacg_uncharge_hierarchy(struct rdma_cgroup *cg,
0206 struct rdmacg_device *device,
0207 struct rdma_cgroup *stop_cg,
0208 enum rdmacg_resource_type index)
0209 {
0210 struct rdma_cgroup *p;
0211
0212 mutex_lock(&rdmacg_mutex);
0213
0214 for (p = cg; p != stop_cg; p = parent_rdmacg(p))
0215 uncharge_cg_locked(p, device, index);
0216
0217 mutex_unlock(&rdmacg_mutex);
0218
0219 css_put(&cg->css);
0220 }
0221
0222
0223
0224
0225
0226
0227 void rdmacg_uncharge(struct rdma_cgroup *cg,
0228 struct rdmacg_device *device,
0229 enum rdmacg_resource_type index)
0230 {
0231 if (index >= RDMACG_RESOURCE_MAX)
0232 return;
0233
0234 rdmacg_uncharge_hierarchy(cg, device, NULL, index);
0235 }
0236 EXPORT_SYMBOL(rdmacg_uncharge);
0237
0238
0239
0240
0241
0242
0243
0244
0245
0246
0247
0248
0249
0250
0251
0252
0253
0254
0255
0256
0257
0258
0259 int rdmacg_try_charge(struct rdma_cgroup **rdmacg,
0260 struct rdmacg_device *device,
0261 enum rdmacg_resource_type index)
0262 {
0263 struct rdma_cgroup *cg, *p;
0264 struct rdmacg_resource_pool *rpool;
0265 s64 new;
0266 int ret = 0;
0267
0268 if (index >= RDMACG_RESOURCE_MAX)
0269 return -EINVAL;
0270
0271
0272
0273
0274
0275 cg = get_current_rdmacg();
0276
0277 mutex_lock(&rdmacg_mutex);
0278 for (p = cg; p; p = parent_rdmacg(p)) {
0279 rpool = get_cg_rpool_locked(p, device);
0280 if (IS_ERR(rpool)) {
0281 ret = PTR_ERR(rpool);
0282 goto err;
0283 } else {
0284 new = rpool->resources[index].usage + 1;
0285 if (new > rpool->resources[index].max) {
0286 ret = -EAGAIN;
0287 goto err;
0288 } else {
0289 rpool->resources[index].usage = new;
0290 rpool->usage_sum++;
0291 }
0292 }
0293 }
0294 mutex_unlock(&rdmacg_mutex);
0295
0296 *rdmacg = cg;
0297 return 0;
0298
0299 err:
0300 mutex_unlock(&rdmacg_mutex);
0301 rdmacg_uncharge_hierarchy(cg, device, p, index);
0302 return ret;
0303 }
0304 EXPORT_SYMBOL(rdmacg_try_charge);
0305
0306
0307
0308
0309
0310
0311
0312
0313
0314 void rdmacg_register_device(struct rdmacg_device *device)
0315 {
0316 INIT_LIST_HEAD(&device->dev_node);
0317 INIT_LIST_HEAD(&device->rpools);
0318
0319 mutex_lock(&rdmacg_mutex);
0320 list_add_tail(&device->dev_node, &rdmacg_devices);
0321 mutex_unlock(&rdmacg_mutex);
0322 }
0323 EXPORT_SYMBOL(rdmacg_register_device);
0324
0325
0326
0327
0328
0329
0330
0331
0332
0333
0334 void rdmacg_unregister_device(struct rdmacg_device *device)
0335 {
0336 struct rdmacg_resource_pool *rpool, *tmp;
0337
0338
0339
0340
0341
0342 mutex_lock(&rdmacg_mutex);
0343 list_del_init(&device->dev_node);
0344
0345
0346
0347
0348
0349 list_for_each_entry_safe(rpool, tmp, &device->rpools, dev_node)
0350 free_cg_rpool_locked(rpool);
0351
0352 mutex_unlock(&rdmacg_mutex);
0353 }
0354 EXPORT_SYMBOL(rdmacg_unregister_device);
0355
0356 static int parse_resource(char *c, int *intval)
0357 {
0358 substring_t argstr;
0359 char *name, *value = c;
0360 size_t len;
0361 int ret, i;
0362
0363 name = strsep(&value, "=");
0364 if (!name || !value)
0365 return -EINVAL;
0366
0367 i = match_string(rdmacg_resource_names, RDMACG_RESOURCE_MAX, name);
0368 if (i < 0)
0369 return i;
0370
0371 len = strlen(value);
0372
0373 argstr.from = value;
0374 argstr.to = value + len;
0375
0376 ret = match_int(&argstr, intval);
0377 if (ret >= 0) {
0378 if (*intval < 0)
0379 return -EINVAL;
0380 return i;
0381 }
0382 if (strncmp(value, RDMACG_MAX_STR, len) == 0) {
0383 *intval = S32_MAX;
0384 return i;
0385 }
0386 return -EINVAL;
0387 }
0388
0389 static int rdmacg_parse_limits(char *options,
0390 int *new_limits, unsigned long *enables)
0391 {
0392 char *c;
0393 int err = -EINVAL;
0394
0395
0396 while ((c = strsep(&options, " ")) != NULL) {
0397 int index, intval;
0398
0399 index = parse_resource(c, &intval);
0400 if (index < 0)
0401 goto err;
0402
0403 new_limits[index] = intval;
0404 *enables |= BIT(index);
0405 }
0406 return 0;
0407
0408 err:
0409 return err;
0410 }
0411
0412 static struct rdmacg_device *rdmacg_get_device_locked(const char *name)
0413 {
0414 struct rdmacg_device *device;
0415
0416 lockdep_assert_held(&rdmacg_mutex);
0417
0418 list_for_each_entry(device, &rdmacg_devices, dev_node)
0419 if (!strcmp(name, device->name))
0420 return device;
0421
0422 return NULL;
0423 }
0424
0425 static ssize_t rdmacg_resource_set_max(struct kernfs_open_file *of,
0426 char *buf, size_t nbytes, loff_t off)
0427 {
0428 struct rdma_cgroup *cg = css_rdmacg(of_css(of));
0429 const char *dev_name;
0430 struct rdmacg_resource_pool *rpool;
0431 struct rdmacg_device *device;
0432 char *options = strstrip(buf);
0433 int *new_limits;
0434 unsigned long enables = 0;
0435 int i = 0, ret = 0;
0436
0437
0438 dev_name = strsep(&options, " ");
0439 if (!dev_name) {
0440 ret = -EINVAL;
0441 goto err;
0442 }
0443
0444 new_limits = kcalloc(RDMACG_RESOURCE_MAX, sizeof(int), GFP_KERNEL);
0445 if (!new_limits) {
0446 ret = -ENOMEM;
0447 goto err;
0448 }
0449
0450 ret = rdmacg_parse_limits(options, new_limits, &enables);
0451 if (ret)
0452 goto parse_err;
0453
0454
0455 mutex_lock(&rdmacg_mutex);
0456
0457 device = rdmacg_get_device_locked(dev_name);
0458 if (!device) {
0459 ret = -ENODEV;
0460 goto dev_err;
0461 }
0462
0463 rpool = get_cg_rpool_locked(cg, device);
0464 if (IS_ERR(rpool)) {
0465 ret = PTR_ERR(rpool);
0466 goto dev_err;
0467 }
0468
0469
0470 for_each_set_bit(i, &enables, RDMACG_RESOURCE_MAX)
0471 set_resource_limit(rpool, i, new_limits[i]);
0472
0473 if (rpool->usage_sum == 0 &&
0474 rpool->num_max_cnt == RDMACG_RESOURCE_MAX) {
0475
0476
0477
0478
0479 free_cg_rpool_locked(rpool);
0480 }
0481
0482 dev_err:
0483 mutex_unlock(&rdmacg_mutex);
0484
0485 parse_err:
0486 kfree(new_limits);
0487
0488 err:
0489 return ret ?: nbytes;
0490 }
0491
0492 static void print_rpool_values(struct seq_file *sf,
0493 struct rdmacg_resource_pool *rpool)
0494 {
0495 enum rdmacg_file_type sf_type;
0496 int i;
0497 u32 value;
0498
0499 sf_type = seq_cft(sf)->private;
0500
0501 for (i = 0; i < RDMACG_RESOURCE_MAX; i++) {
0502 seq_puts(sf, rdmacg_resource_names[i]);
0503 seq_putc(sf, '=');
0504 if (sf_type == RDMACG_RESOURCE_TYPE_MAX) {
0505 if (rpool)
0506 value = rpool->resources[i].max;
0507 else
0508 value = S32_MAX;
0509 } else {
0510 if (rpool)
0511 value = rpool->resources[i].usage;
0512 else
0513 value = 0;
0514 }
0515
0516 if (value == S32_MAX)
0517 seq_puts(sf, RDMACG_MAX_STR);
0518 else
0519 seq_printf(sf, "%d", value);
0520 seq_putc(sf, ' ');
0521 }
0522 }
0523
0524 static int rdmacg_resource_read(struct seq_file *sf, void *v)
0525 {
0526 struct rdmacg_device *device;
0527 struct rdmacg_resource_pool *rpool;
0528 struct rdma_cgroup *cg = css_rdmacg(seq_css(sf));
0529
0530 mutex_lock(&rdmacg_mutex);
0531
0532 list_for_each_entry(device, &rdmacg_devices, dev_node) {
0533 seq_printf(sf, "%s ", device->name);
0534
0535 rpool = find_cg_rpool_locked(cg, device);
0536 print_rpool_values(sf, rpool);
0537
0538 seq_putc(sf, '\n');
0539 }
0540
0541 mutex_unlock(&rdmacg_mutex);
0542 return 0;
0543 }
0544
0545 static struct cftype rdmacg_files[] = {
0546 {
0547 .name = "max",
0548 .write = rdmacg_resource_set_max,
0549 .seq_show = rdmacg_resource_read,
0550 .private = RDMACG_RESOURCE_TYPE_MAX,
0551 .flags = CFTYPE_NOT_ON_ROOT,
0552 },
0553 {
0554 .name = "current",
0555 .seq_show = rdmacg_resource_read,
0556 .private = RDMACG_RESOURCE_TYPE_STAT,
0557 .flags = CFTYPE_NOT_ON_ROOT,
0558 },
0559 { }
0560 };
0561
0562 static struct cgroup_subsys_state *
0563 rdmacg_css_alloc(struct cgroup_subsys_state *parent)
0564 {
0565 struct rdma_cgroup *cg;
0566
0567 cg = kzalloc(sizeof(*cg), GFP_KERNEL);
0568 if (!cg)
0569 return ERR_PTR(-ENOMEM);
0570
0571 INIT_LIST_HEAD(&cg->rpools);
0572 return &cg->css;
0573 }
0574
0575 static void rdmacg_css_free(struct cgroup_subsys_state *css)
0576 {
0577 struct rdma_cgroup *cg = css_rdmacg(css);
0578
0579 kfree(cg);
0580 }
0581
0582
0583
0584
0585
0586
0587
0588
0589
0590
0591 static void rdmacg_css_offline(struct cgroup_subsys_state *css)
0592 {
0593 struct rdma_cgroup *cg = css_rdmacg(css);
0594 struct rdmacg_resource_pool *rpool;
0595
0596 mutex_lock(&rdmacg_mutex);
0597
0598 list_for_each_entry(rpool, &cg->rpools, cg_node)
0599 set_all_resource_max_limit(rpool);
0600
0601 mutex_unlock(&rdmacg_mutex);
0602 }
0603
0604 struct cgroup_subsys rdma_cgrp_subsys = {
0605 .css_alloc = rdmacg_css_alloc,
0606 .css_free = rdmacg_css_free,
0607 .css_offline = rdmacg_css_offline,
0608 .legacy_cftypes = rdmacg_files,
0609 .dfl_cftypes = rdmacg_files,
0610 };