Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  *
0003  * Copyright IBM Corporation, 2012
0004  * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
0005  *
0006  * Cgroup v2
0007  * Copyright (C) 2019 Red Hat, Inc.
0008  * Author: Giuseppe Scrivano <gscrivan@redhat.com>
0009  *
0010  * This program is free software; you can redistribute it and/or modify it
0011  * under the terms of version 2.1 of the GNU Lesser General Public License
0012  * as published by the Free Software Foundation.
0013  *
0014  * This program is distributed in the hope that it would be useful, but
0015  * WITHOUT ANY WARRANTY; without even the implied warranty of
0016  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
0017  *
0018  */
0019 
0020 #include <linux/cgroup.h>
0021 #include <linux/page_counter.h>
0022 #include <linux/slab.h>
0023 #include <linux/hugetlb.h>
0024 #include <linux/hugetlb_cgroup.h>
0025 
0026 #define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val))
0027 #define MEMFILE_IDX(val)    (((val) >> 16) & 0xffff)
0028 #define MEMFILE_ATTR(val)   ((val) & 0xffff)
0029 
0030 static struct hugetlb_cgroup *root_h_cgroup __read_mostly;
0031 
0032 static inline struct page_counter *
0033 __hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx,
0034                      bool rsvd)
0035 {
0036     if (rsvd)
0037         return &h_cg->rsvd_hugepage[idx];
0038     return &h_cg->hugepage[idx];
0039 }
0040 
0041 static inline struct page_counter *
0042 hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx)
0043 {
0044     return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, false);
0045 }
0046 
0047 static inline struct page_counter *
0048 hugetlb_cgroup_counter_from_cgroup_rsvd(struct hugetlb_cgroup *h_cg, int idx)
0049 {
0050     return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, true);
0051 }
0052 
0053 static inline
0054 struct hugetlb_cgroup *hugetlb_cgroup_from_css(struct cgroup_subsys_state *s)
0055 {
0056     return s ? container_of(s, struct hugetlb_cgroup, css) : NULL;
0057 }
0058 
0059 static inline
0060 struct hugetlb_cgroup *hugetlb_cgroup_from_task(struct task_struct *task)
0061 {
0062     return hugetlb_cgroup_from_css(task_css(task, hugetlb_cgrp_id));
0063 }
0064 
0065 static inline bool hugetlb_cgroup_is_root(struct hugetlb_cgroup *h_cg)
0066 {
0067     return (h_cg == root_h_cgroup);
0068 }
0069 
0070 static inline struct hugetlb_cgroup *
0071 parent_hugetlb_cgroup(struct hugetlb_cgroup *h_cg)
0072 {
0073     return hugetlb_cgroup_from_css(h_cg->css.parent);
0074 }
0075 
0076 static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg)
0077 {
0078     int idx;
0079 
0080     for (idx = 0; idx < hugetlb_max_hstate; idx++) {
0081         if (page_counter_read(
0082                 hugetlb_cgroup_counter_from_cgroup(h_cg, idx)))
0083             return true;
0084     }
0085     return false;
0086 }
0087 
0088 static void hugetlb_cgroup_init(struct hugetlb_cgroup *h_cgroup,
0089                 struct hugetlb_cgroup *parent_h_cgroup)
0090 {
0091     int idx;
0092 
0093     for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) {
0094         struct page_counter *fault_parent = NULL;
0095         struct page_counter *rsvd_parent = NULL;
0096         unsigned long limit;
0097         int ret;
0098 
0099         if (parent_h_cgroup) {
0100             fault_parent = hugetlb_cgroup_counter_from_cgroup(
0101                 parent_h_cgroup, idx);
0102             rsvd_parent = hugetlb_cgroup_counter_from_cgroup_rsvd(
0103                 parent_h_cgroup, idx);
0104         }
0105         page_counter_init(hugetlb_cgroup_counter_from_cgroup(h_cgroup,
0106                                      idx),
0107                   fault_parent);
0108         page_counter_init(
0109             hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx),
0110             rsvd_parent);
0111 
0112         limit = round_down(PAGE_COUNTER_MAX,
0113                    pages_per_huge_page(&hstates[idx]));
0114 
0115         ret = page_counter_set_max(
0116             hugetlb_cgroup_counter_from_cgroup(h_cgroup, idx),
0117             limit);
0118         VM_BUG_ON(ret);
0119         ret = page_counter_set_max(
0120             hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx),
0121             limit);
0122         VM_BUG_ON(ret);
0123     }
0124 }
0125 
0126 static void hugetlb_cgroup_free(struct hugetlb_cgroup *h_cgroup)
0127 {
0128     int node;
0129 
0130     for_each_node(node)
0131         kfree(h_cgroup->nodeinfo[node]);
0132     kfree(h_cgroup);
0133 }
0134 
0135 static struct cgroup_subsys_state *
0136 hugetlb_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
0137 {
0138     struct hugetlb_cgroup *parent_h_cgroup = hugetlb_cgroup_from_css(parent_css);
0139     struct hugetlb_cgroup *h_cgroup;
0140     int node;
0141 
0142     h_cgroup = kzalloc(struct_size(h_cgroup, nodeinfo, nr_node_ids),
0143                GFP_KERNEL);
0144 
0145     if (!h_cgroup)
0146         return ERR_PTR(-ENOMEM);
0147 
0148     if (!parent_h_cgroup)
0149         root_h_cgroup = h_cgroup;
0150 
0151     /*
0152      * TODO: this routine can waste much memory for nodes which will
0153      * never be onlined. It's better to use memory hotplug callback
0154      * function.
0155      */
0156     for_each_node(node) {
0157         /* Set node_to_alloc to -1 for offline nodes. */
0158         int node_to_alloc =
0159             node_state(node, N_NORMAL_MEMORY) ? node : -1;
0160         h_cgroup->nodeinfo[node] =
0161             kzalloc_node(sizeof(struct hugetlb_cgroup_per_node),
0162                      GFP_KERNEL, node_to_alloc);
0163         if (!h_cgroup->nodeinfo[node])
0164             goto fail_alloc_nodeinfo;
0165     }
0166 
0167     hugetlb_cgroup_init(h_cgroup, parent_h_cgroup);
0168     return &h_cgroup->css;
0169 
0170 fail_alloc_nodeinfo:
0171     hugetlb_cgroup_free(h_cgroup);
0172     return ERR_PTR(-ENOMEM);
0173 }
0174 
0175 static void hugetlb_cgroup_css_free(struct cgroup_subsys_state *css)
0176 {
0177     hugetlb_cgroup_free(hugetlb_cgroup_from_css(css));
0178 }
0179 
0180 /*
0181  * Should be called with hugetlb_lock held.
0182  * Since we are holding hugetlb_lock, pages cannot get moved from
0183  * active list or uncharged from the cgroup, So no need to get
0184  * page reference and test for page active here. This function
0185  * cannot fail.
0186  */
0187 static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg,
0188                        struct page *page)
0189 {
0190     unsigned int nr_pages;
0191     struct page_counter *counter;
0192     struct hugetlb_cgroup *page_hcg;
0193     struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(h_cg);
0194 
0195     page_hcg = hugetlb_cgroup_from_page(page);
0196     /*
0197      * We can have pages in active list without any cgroup
0198      * ie, hugepage with less than 3 pages. We can safely
0199      * ignore those pages.
0200      */
0201     if (!page_hcg || page_hcg != h_cg)
0202         goto out;
0203 
0204     nr_pages = compound_nr(page);
0205     if (!parent) {
0206         parent = root_h_cgroup;
0207         /* root has no limit */
0208         page_counter_charge(&parent->hugepage[idx], nr_pages);
0209     }
0210     counter = &h_cg->hugepage[idx];
0211     /* Take the pages off the local counter */
0212     page_counter_cancel(counter, nr_pages);
0213 
0214     set_hugetlb_cgroup(page, parent);
0215 out:
0216     return;
0217 }
0218 
0219 /*
0220  * Force the hugetlb cgroup to empty the hugetlb resources by moving them to
0221  * the parent cgroup.
0222  */
0223 static void hugetlb_cgroup_css_offline(struct cgroup_subsys_state *css)
0224 {
0225     struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
0226     struct hstate *h;
0227     struct page *page;
0228     int idx;
0229 
0230     do {
0231         idx = 0;
0232         for_each_hstate(h) {
0233             spin_lock_irq(&hugetlb_lock);
0234             list_for_each_entry(page, &h->hugepage_activelist, lru)
0235                 hugetlb_cgroup_move_parent(idx, h_cg, page);
0236 
0237             spin_unlock_irq(&hugetlb_lock);
0238             idx++;
0239         }
0240         cond_resched();
0241     } while (hugetlb_cgroup_have_usage(h_cg));
0242 }
0243 
0244 static inline void hugetlb_event(struct hugetlb_cgroup *hugetlb, int idx,
0245                  enum hugetlb_memory_event event)
0246 {
0247     atomic_long_inc(&hugetlb->events_local[idx][event]);
0248     cgroup_file_notify(&hugetlb->events_local_file[idx]);
0249 
0250     do {
0251         atomic_long_inc(&hugetlb->events[idx][event]);
0252         cgroup_file_notify(&hugetlb->events_file[idx]);
0253     } while ((hugetlb = parent_hugetlb_cgroup(hugetlb)) &&
0254          !hugetlb_cgroup_is_root(hugetlb));
0255 }
0256 
0257 static int __hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
0258                       struct hugetlb_cgroup **ptr,
0259                       bool rsvd)
0260 {
0261     int ret = 0;
0262     struct page_counter *counter;
0263     struct hugetlb_cgroup *h_cg = NULL;
0264 
0265     if (hugetlb_cgroup_disabled())
0266         goto done;
0267     /*
0268      * We don't charge any cgroup if the compound page have less
0269      * than 3 pages.
0270      */
0271     if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER)
0272         goto done;
0273 again:
0274     rcu_read_lock();
0275     h_cg = hugetlb_cgroup_from_task(current);
0276     if (!css_tryget(&h_cg->css)) {
0277         rcu_read_unlock();
0278         goto again;
0279     }
0280     rcu_read_unlock();
0281 
0282     if (!page_counter_try_charge(
0283             __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd),
0284             nr_pages, &counter)) {
0285         ret = -ENOMEM;
0286         hugetlb_event(h_cg, idx, HUGETLB_MAX);
0287         css_put(&h_cg->css);
0288         goto done;
0289     }
0290     /* Reservations take a reference to the css because they do not get
0291      * reparented.
0292      */
0293     if (!rsvd)
0294         css_put(&h_cg->css);
0295 done:
0296     *ptr = h_cg;
0297     return ret;
0298 }
0299 
0300 int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
0301                  struct hugetlb_cgroup **ptr)
0302 {
0303     return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, false);
0304 }
0305 
0306 int hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages,
0307                       struct hugetlb_cgroup **ptr)
0308 {
0309     return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, true);
0310 }
0311 
0312 /* Should be called with hugetlb_lock held */
0313 static void __hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
0314                        struct hugetlb_cgroup *h_cg,
0315                        struct page *page, bool rsvd)
0316 {
0317     if (hugetlb_cgroup_disabled() || !h_cg)
0318         return;
0319 
0320     __set_hugetlb_cgroup(page, h_cg, rsvd);
0321     if (!rsvd) {
0322         unsigned long usage =
0323             h_cg->nodeinfo[page_to_nid(page)]->usage[idx];
0324         /*
0325          * This write is not atomic due to fetching usage and writing
0326          * to it, but that's fine because we call this with
0327          * hugetlb_lock held anyway.
0328          */
0329         WRITE_ONCE(h_cg->nodeinfo[page_to_nid(page)]->usage[idx],
0330                usage + nr_pages);
0331     }
0332 }
0333 
0334 void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
0335                   struct hugetlb_cgroup *h_cg,
0336                   struct page *page)
0337 {
0338     __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, page, false);
0339 }
0340 
0341 void hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages,
0342                        struct hugetlb_cgroup *h_cg,
0343                        struct page *page)
0344 {
0345     __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, page, true);
0346 }
0347 
0348 /*
0349  * Should be called with hugetlb_lock held
0350  */
0351 static void __hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
0352                        struct page *page, bool rsvd)
0353 {
0354     struct hugetlb_cgroup *h_cg;
0355 
0356     if (hugetlb_cgroup_disabled())
0357         return;
0358     lockdep_assert_held(&hugetlb_lock);
0359     h_cg = __hugetlb_cgroup_from_page(page, rsvd);
0360     if (unlikely(!h_cg))
0361         return;
0362     __set_hugetlb_cgroup(page, NULL, rsvd);
0363 
0364     page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx,
0365                                    rsvd),
0366                   nr_pages);
0367 
0368     if (rsvd)
0369         css_put(&h_cg->css);
0370     else {
0371         unsigned long usage =
0372             h_cg->nodeinfo[page_to_nid(page)]->usage[idx];
0373         /*
0374          * This write is not atomic due to fetching usage and writing
0375          * to it, but that's fine because we call this with
0376          * hugetlb_lock held anyway.
0377          */
0378         WRITE_ONCE(h_cg->nodeinfo[page_to_nid(page)]->usage[idx],
0379                usage - nr_pages);
0380     }
0381 }
0382 
0383 void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
0384                   struct page *page)
0385 {
0386     __hugetlb_cgroup_uncharge_page(idx, nr_pages, page, false);
0387 }
0388 
0389 void hugetlb_cgroup_uncharge_page_rsvd(int idx, unsigned long nr_pages,
0390                        struct page *page)
0391 {
0392     __hugetlb_cgroup_uncharge_page(idx, nr_pages, page, true);
0393 }
0394 
0395 static void __hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
0396                          struct hugetlb_cgroup *h_cg,
0397                          bool rsvd)
0398 {
0399     if (hugetlb_cgroup_disabled() || !h_cg)
0400         return;
0401 
0402     if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER)
0403         return;
0404 
0405     page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx,
0406                                    rsvd),
0407                   nr_pages);
0408 
0409     if (rsvd)
0410         css_put(&h_cg->css);
0411 }
0412 
0413 void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
0414                     struct hugetlb_cgroup *h_cg)
0415 {
0416     __hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, false);
0417 }
0418 
0419 void hugetlb_cgroup_uncharge_cgroup_rsvd(int idx, unsigned long nr_pages,
0420                      struct hugetlb_cgroup *h_cg)
0421 {
0422     __hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, true);
0423 }
0424 
0425 void hugetlb_cgroup_uncharge_counter(struct resv_map *resv, unsigned long start,
0426                      unsigned long end)
0427 {
0428     if (hugetlb_cgroup_disabled() || !resv || !resv->reservation_counter ||
0429         !resv->css)
0430         return;
0431 
0432     page_counter_uncharge(resv->reservation_counter,
0433                   (end - start) * resv->pages_per_hpage);
0434     css_put(resv->css);
0435 }
0436 
0437 void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv,
0438                      struct file_region *rg,
0439                      unsigned long nr_pages,
0440                      bool region_del)
0441 {
0442     if (hugetlb_cgroup_disabled() || !resv || !rg || !nr_pages)
0443         return;
0444 
0445     if (rg->reservation_counter && resv->pages_per_hpage && nr_pages > 0 &&
0446         !resv->reservation_counter) {
0447         page_counter_uncharge(rg->reservation_counter,
0448                       nr_pages * resv->pages_per_hpage);
0449         /*
0450          * Only do css_put(rg->css) when we delete the entire region
0451          * because one file_region must hold exactly one css reference.
0452          */
0453         if (region_del)
0454             css_put(rg->css);
0455     }
0456 }
0457 
0458 enum {
0459     RES_USAGE,
0460     RES_RSVD_USAGE,
0461     RES_LIMIT,
0462     RES_RSVD_LIMIT,
0463     RES_MAX_USAGE,
0464     RES_RSVD_MAX_USAGE,
0465     RES_FAILCNT,
0466     RES_RSVD_FAILCNT,
0467 };
0468 
0469 static int hugetlb_cgroup_read_numa_stat(struct seq_file *seq, void *dummy)
0470 {
0471     int nid;
0472     struct cftype *cft = seq_cft(seq);
0473     int idx = MEMFILE_IDX(cft->private);
0474     bool legacy = MEMFILE_ATTR(cft->private);
0475     struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq));
0476     struct cgroup_subsys_state *css;
0477     unsigned long usage;
0478 
0479     if (legacy) {
0480         /* Add up usage across all nodes for the non-hierarchical total. */
0481         usage = 0;
0482         for_each_node_state(nid, N_MEMORY)
0483             usage += READ_ONCE(h_cg->nodeinfo[nid]->usage[idx]);
0484         seq_printf(seq, "total=%lu", usage * PAGE_SIZE);
0485 
0486         /* Simply print the per-node usage for the non-hierarchical total. */
0487         for_each_node_state(nid, N_MEMORY)
0488             seq_printf(seq, " N%d=%lu", nid,
0489                    READ_ONCE(h_cg->nodeinfo[nid]->usage[idx]) *
0490                        PAGE_SIZE);
0491         seq_putc(seq, '\n');
0492     }
0493 
0494     /*
0495      * The hierarchical total is pretty much the value recorded by the
0496      * counter, so use that.
0497      */
0498     seq_printf(seq, "%stotal=%lu", legacy ? "hierarchical_" : "",
0499            page_counter_read(&h_cg->hugepage[idx]) * PAGE_SIZE);
0500 
0501     /*
0502      * For each node, transverse the css tree to obtain the hierarchical
0503      * node usage.
0504      */
0505     for_each_node_state(nid, N_MEMORY) {
0506         usage = 0;
0507         rcu_read_lock();
0508         css_for_each_descendant_pre(css, &h_cg->css) {
0509             usage += READ_ONCE(hugetlb_cgroup_from_css(css)
0510                            ->nodeinfo[nid]
0511                            ->usage[idx]);
0512         }
0513         rcu_read_unlock();
0514         seq_printf(seq, " N%d=%lu", nid, usage * PAGE_SIZE);
0515     }
0516 
0517     seq_putc(seq, '\n');
0518 
0519     return 0;
0520 }
0521 
0522 static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css,
0523                    struct cftype *cft)
0524 {
0525     struct page_counter *counter;
0526     struct page_counter *rsvd_counter;
0527     struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
0528 
0529     counter = &h_cg->hugepage[MEMFILE_IDX(cft->private)];
0530     rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(cft->private)];
0531 
0532     switch (MEMFILE_ATTR(cft->private)) {
0533     case RES_USAGE:
0534         return (u64)page_counter_read(counter) * PAGE_SIZE;
0535     case RES_RSVD_USAGE:
0536         return (u64)page_counter_read(rsvd_counter) * PAGE_SIZE;
0537     case RES_LIMIT:
0538         return (u64)counter->max * PAGE_SIZE;
0539     case RES_RSVD_LIMIT:
0540         return (u64)rsvd_counter->max * PAGE_SIZE;
0541     case RES_MAX_USAGE:
0542         return (u64)counter->watermark * PAGE_SIZE;
0543     case RES_RSVD_MAX_USAGE:
0544         return (u64)rsvd_counter->watermark * PAGE_SIZE;
0545     case RES_FAILCNT:
0546         return counter->failcnt;
0547     case RES_RSVD_FAILCNT:
0548         return rsvd_counter->failcnt;
0549     default:
0550         BUG();
0551     }
0552 }
0553 
0554 static int hugetlb_cgroup_read_u64_max(struct seq_file *seq, void *v)
0555 {
0556     int idx;
0557     u64 val;
0558     struct cftype *cft = seq_cft(seq);
0559     unsigned long limit;
0560     struct page_counter *counter;
0561     struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq));
0562 
0563     idx = MEMFILE_IDX(cft->private);
0564     counter = &h_cg->hugepage[idx];
0565 
0566     limit = round_down(PAGE_COUNTER_MAX,
0567                pages_per_huge_page(&hstates[idx]));
0568 
0569     switch (MEMFILE_ATTR(cft->private)) {
0570     case RES_RSVD_USAGE:
0571         counter = &h_cg->rsvd_hugepage[idx];
0572         fallthrough;
0573     case RES_USAGE:
0574         val = (u64)page_counter_read(counter);
0575         seq_printf(seq, "%llu\n", val * PAGE_SIZE);
0576         break;
0577     case RES_RSVD_LIMIT:
0578         counter = &h_cg->rsvd_hugepage[idx];
0579         fallthrough;
0580     case RES_LIMIT:
0581         val = (u64)counter->max;
0582         if (val == limit)
0583             seq_puts(seq, "max\n");
0584         else
0585             seq_printf(seq, "%llu\n", val * PAGE_SIZE);
0586         break;
0587     default:
0588         BUG();
0589     }
0590 
0591     return 0;
0592 }
0593 
0594 static DEFINE_MUTEX(hugetlb_limit_mutex);
0595 
0596 static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of,
0597                     char *buf, size_t nbytes, loff_t off,
0598                     const char *max)
0599 {
0600     int ret, idx;
0601     unsigned long nr_pages;
0602     struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of));
0603     bool rsvd = false;
0604 
0605     if (hugetlb_cgroup_is_root(h_cg)) /* Can't set limit on root */
0606         return -EINVAL;
0607 
0608     buf = strstrip(buf);
0609     ret = page_counter_memparse(buf, max, &nr_pages);
0610     if (ret)
0611         return ret;
0612 
0613     idx = MEMFILE_IDX(of_cft(of)->private);
0614     nr_pages = round_down(nr_pages, pages_per_huge_page(&hstates[idx]));
0615 
0616     switch (MEMFILE_ATTR(of_cft(of)->private)) {
0617     case RES_RSVD_LIMIT:
0618         rsvd = true;
0619         fallthrough;
0620     case RES_LIMIT:
0621         mutex_lock(&hugetlb_limit_mutex);
0622         ret = page_counter_set_max(
0623             __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd),
0624             nr_pages);
0625         mutex_unlock(&hugetlb_limit_mutex);
0626         break;
0627     default:
0628         ret = -EINVAL;
0629         break;
0630     }
0631     return ret ?: nbytes;
0632 }
0633 
0634 static ssize_t hugetlb_cgroup_write_legacy(struct kernfs_open_file *of,
0635                        char *buf, size_t nbytes, loff_t off)
0636 {
0637     return hugetlb_cgroup_write(of, buf, nbytes, off, "-1");
0638 }
0639 
0640 static ssize_t hugetlb_cgroup_write_dfl(struct kernfs_open_file *of,
0641                     char *buf, size_t nbytes, loff_t off)
0642 {
0643     return hugetlb_cgroup_write(of, buf, nbytes, off, "max");
0644 }
0645 
0646 static ssize_t hugetlb_cgroup_reset(struct kernfs_open_file *of,
0647                     char *buf, size_t nbytes, loff_t off)
0648 {
0649     int ret = 0;
0650     struct page_counter *counter, *rsvd_counter;
0651     struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of));
0652 
0653     counter = &h_cg->hugepage[MEMFILE_IDX(of_cft(of)->private)];
0654     rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(of_cft(of)->private)];
0655 
0656     switch (MEMFILE_ATTR(of_cft(of)->private)) {
0657     case RES_MAX_USAGE:
0658         page_counter_reset_watermark(counter);
0659         break;
0660     case RES_RSVD_MAX_USAGE:
0661         page_counter_reset_watermark(rsvd_counter);
0662         break;
0663     case RES_FAILCNT:
0664         counter->failcnt = 0;
0665         break;
0666     case RES_RSVD_FAILCNT:
0667         rsvd_counter->failcnt = 0;
0668         break;
0669     default:
0670         ret = -EINVAL;
0671         break;
0672     }
0673     return ret ?: nbytes;
0674 }
0675 
0676 static char *mem_fmt(char *buf, int size, unsigned long hsize)
0677 {
0678     if (hsize >= (1UL << 30))
0679         snprintf(buf, size, "%luGB", hsize >> 30);
0680     else if (hsize >= (1UL << 20))
0681         snprintf(buf, size, "%luMB", hsize >> 20);
0682     else
0683         snprintf(buf, size, "%luKB", hsize >> 10);
0684     return buf;
0685 }
0686 
0687 static int __hugetlb_events_show(struct seq_file *seq, bool local)
0688 {
0689     int idx;
0690     long max;
0691     struct cftype *cft = seq_cft(seq);
0692     struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq));
0693 
0694     idx = MEMFILE_IDX(cft->private);
0695 
0696     if (local)
0697         max = atomic_long_read(&h_cg->events_local[idx][HUGETLB_MAX]);
0698     else
0699         max = atomic_long_read(&h_cg->events[idx][HUGETLB_MAX]);
0700 
0701     seq_printf(seq, "max %lu\n", max);
0702 
0703     return 0;
0704 }
0705 
0706 static int hugetlb_events_show(struct seq_file *seq, void *v)
0707 {
0708     return __hugetlb_events_show(seq, false);
0709 }
0710 
0711 static int hugetlb_events_local_show(struct seq_file *seq, void *v)
0712 {
0713     return __hugetlb_events_show(seq, true);
0714 }
0715 
0716 static void __init __hugetlb_cgroup_file_dfl_init(int idx)
0717 {
0718     char buf[32];
0719     struct cftype *cft;
0720     struct hstate *h = &hstates[idx];
0721 
0722     /* format the size */
0723     mem_fmt(buf, sizeof(buf), huge_page_size(h));
0724 
0725     /* Add the limit file */
0726     cft = &h->cgroup_files_dfl[0];
0727     snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max", buf);
0728     cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT);
0729     cft->seq_show = hugetlb_cgroup_read_u64_max;
0730     cft->write = hugetlb_cgroup_write_dfl;
0731     cft->flags = CFTYPE_NOT_ON_ROOT;
0732 
0733     /* Add the reservation limit file */
0734     cft = &h->cgroup_files_dfl[1];
0735     snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.max", buf);
0736     cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_LIMIT);
0737     cft->seq_show = hugetlb_cgroup_read_u64_max;
0738     cft->write = hugetlb_cgroup_write_dfl;
0739     cft->flags = CFTYPE_NOT_ON_ROOT;
0740 
0741     /* Add the current usage file */
0742     cft = &h->cgroup_files_dfl[2];
0743     snprintf(cft->name, MAX_CFTYPE_NAME, "%s.current", buf);
0744     cft->private = MEMFILE_PRIVATE(idx, RES_USAGE);
0745     cft->seq_show = hugetlb_cgroup_read_u64_max;
0746     cft->flags = CFTYPE_NOT_ON_ROOT;
0747 
0748     /* Add the current reservation usage file */
0749     cft = &h->cgroup_files_dfl[3];
0750     snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.current", buf);
0751     cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_USAGE);
0752     cft->seq_show = hugetlb_cgroup_read_u64_max;
0753     cft->flags = CFTYPE_NOT_ON_ROOT;
0754 
0755     /* Add the events file */
0756     cft = &h->cgroup_files_dfl[4];
0757     snprintf(cft->name, MAX_CFTYPE_NAME, "%s.events", buf);
0758     cft->private = MEMFILE_PRIVATE(idx, 0);
0759     cft->seq_show = hugetlb_events_show;
0760     cft->file_offset = offsetof(struct hugetlb_cgroup, events_file[idx]);
0761     cft->flags = CFTYPE_NOT_ON_ROOT;
0762 
0763     /* Add the events.local file */
0764     cft = &h->cgroup_files_dfl[5];
0765     snprintf(cft->name, MAX_CFTYPE_NAME, "%s.events.local", buf);
0766     cft->private = MEMFILE_PRIVATE(idx, 0);
0767     cft->seq_show = hugetlb_events_local_show;
0768     cft->file_offset = offsetof(struct hugetlb_cgroup,
0769                     events_local_file[idx]);
0770     cft->flags = CFTYPE_NOT_ON_ROOT;
0771 
0772     /* Add the numa stat file */
0773     cft = &h->cgroup_files_dfl[6];
0774     snprintf(cft->name, MAX_CFTYPE_NAME, "%s.numa_stat", buf);
0775     cft->private = MEMFILE_PRIVATE(idx, 0);
0776     cft->seq_show = hugetlb_cgroup_read_numa_stat;
0777     cft->flags = CFTYPE_NOT_ON_ROOT;
0778 
0779     /* NULL terminate the last cft */
0780     cft = &h->cgroup_files_dfl[7];
0781     memset(cft, 0, sizeof(*cft));
0782 
0783     WARN_ON(cgroup_add_dfl_cftypes(&hugetlb_cgrp_subsys,
0784                        h->cgroup_files_dfl));
0785 }
0786 
0787 static void __init __hugetlb_cgroup_file_legacy_init(int idx)
0788 {
0789     char buf[32];
0790     struct cftype *cft;
0791     struct hstate *h = &hstates[idx];
0792 
0793     /* format the size */
0794     mem_fmt(buf, sizeof(buf), huge_page_size(h));
0795 
0796     /* Add the limit file */
0797     cft = &h->cgroup_files_legacy[0];
0798     snprintf(cft->name, MAX_CFTYPE_NAME, "%s.limit_in_bytes", buf);
0799     cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT);
0800     cft->read_u64 = hugetlb_cgroup_read_u64;
0801     cft->write = hugetlb_cgroup_write_legacy;
0802 
0803     /* Add the reservation limit file */
0804     cft = &h->cgroup_files_legacy[1];
0805     snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.limit_in_bytes", buf);
0806     cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_LIMIT);
0807     cft->read_u64 = hugetlb_cgroup_read_u64;
0808     cft->write = hugetlb_cgroup_write_legacy;
0809 
0810     /* Add the usage file */
0811     cft = &h->cgroup_files_legacy[2];
0812     snprintf(cft->name, MAX_CFTYPE_NAME, "%s.usage_in_bytes", buf);
0813     cft->private = MEMFILE_PRIVATE(idx, RES_USAGE);
0814     cft->read_u64 = hugetlb_cgroup_read_u64;
0815 
0816     /* Add the reservation usage file */
0817     cft = &h->cgroup_files_legacy[3];
0818     snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.usage_in_bytes", buf);
0819     cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_USAGE);
0820     cft->read_u64 = hugetlb_cgroup_read_u64;
0821 
0822     /* Add the MAX usage file */
0823     cft = &h->cgroup_files_legacy[4];
0824     snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max_usage_in_bytes", buf);
0825     cft->private = MEMFILE_PRIVATE(idx, RES_MAX_USAGE);
0826     cft->write = hugetlb_cgroup_reset;
0827     cft->read_u64 = hugetlb_cgroup_read_u64;
0828 
0829     /* Add the MAX reservation usage file */
0830     cft = &h->cgroup_files_legacy[5];
0831     snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.max_usage_in_bytes", buf);
0832     cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_MAX_USAGE);
0833     cft->write = hugetlb_cgroup_reset;
0834     cft->read_u64 = hugetlb_cgroup_read_u64;
0835 
0836     /* Add the failcntfile */
0837     cft = &h->cgroup_files_legacy[6];
0838     snprintf(cft->name, MAX_CFTYPE_NAME, "%s.failcnt", buf);
0839     cft->private = MEMFILE_PRIVATE(idx, RES_FAILCNT);
0840     cft->write = hugetlb_cgroup_reset;
0841     cft->read_u64 = hugetlb_cgroup_read_u64;
0842 
0843     /* Add the reservation failcntfile */
0844     cft = &h->cgroup_files_legacy[7];
0845     snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.failcnt", buf);
0846     cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_FAILCNT);
0847     cft->write = hugetlb_cgroup_reset;
0848     cft->read_u64 = hugetlb_cgroup_read_u64;
0849 
0850     /* Add the numa stat file */
0851     cft = &h->cgroup_files_legacy[8];
0852     snprintf(cft->name, MAX_CFTYPE_NAME, "%s.numa_stat", buf);
0853     cft->private = MEMFILE_PRIVATE(idx, 1);
0854     cft->seq_show = hugetlb_cgroup_read_numa_stat;
0855 
0856     /* NULL terminate the last cft */
0857     cft = &h->cgroup_files_legacy[9];
0858     memset(cft, 0, sizeof(*cft));
0859 
0860     WARN_ON(cgroup_add_legacy_cftypes(&hugetlb_cgrp_subsys,
0861                       h->cgroup_files_legacy));
0862 }
0863 
0864 static void __init __hugetlb_cgroup_file_init(int idx)
0865 {
0866     __hugetlb_cgroup_file_dfl_init(idx);
0867     __hugetlb_cgroup_file_legacy_init(idx);
0868 }
0869 
0870 void __init hugetlb_cgroup_file_init(void)
0871 {
0872     struct hstate *h;
0873 
0874     for_each_hstate(h) {
0875         /*
0876          * Add cgroup control files only if the huge page consists
0877          * of more than two normal pages. This is because we use
0878          * page[2].private for storing cgroup details.
0879          */
0880         if (huge_page_order(h) >= HUGETLB_CGROUP_MIN_ORDER)
0881             __hugetlb_cgroup_file_init(hstate_index(h));
0882     }
0883 }
0884 
0885 /*
0886  * hugetlb_lock will make sure a parallel cgroup rmdir won't happen
0887  * when we migrate hugepages
0888  */
0889 void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage)
0890 {
0891     struct hugetlb_cgroup *h_cg;
0892     struct hugetlb_cgroup *h_cg_rsvd;
0893     struct hstate *h = page_hstate(oldhpage);
0894 
0895     if (hugetlb_cgroup_disabled())
0896         return;
0897 
0898     spin_lock_irq(&hugetlb_lock);
0899     h_cg = hugetlb_cgroup_from_page(oldhpage);
0900     h_cg_rsvd = hugetlb_cgroup_from_page_rsvd(oldhpage);
0901     set_hugetlb_cgroup(oldhpage, NULL);
0902     set_hugetlb_cgroup_rsvd(oldhpage, NULL);
0903 
0904     /* move the h_cg details to new cgroup */
0905     set_hugetlb_cgroup(newhpage, h_cg);
0906     set_hugetlb_cgroup_rsvd(newhpage, h_cg_rsvd);
0907     list_move(&newhpage->lru, &h->hugepage_activelist);
0908     spin_unlock_irq(&hugetlb_lock);
0909     return;
0910 }
0911 
0912 static struct cftype hugetlb_files[] = {
0913     {} /* terminate */
0914 };
0915 
0916 struct cgroup_subsys hugetlb_cgrp_subsys = {
0917     .css_alloc  = hugetlb_cgroup_css_alloc,
0918     .css_offline    = hugetlb_cgroup_css_offline,
0919     .css_free   = hugetlb_cgroup_css_free,
0920     .dfl_cftypes    = hugetlb_files,
0921     .legacy_cftypes = hugetlb_files,
0922 };