Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * Lockless hierarchical page accounting & limiting
0004  *
0005  * Copyright (C) 2014 Red Hat, Inc., Johannes Weiner
0006  */
0007 
0008 #include <linux/page_counter.h>
0009 #include <linux/atomic.h>
0010 #include <linux/kernel.h>
0011 #include <linux/string.h>
0012 #include <linux/sched.h>
0013 #include <linux/bug.h>
0014 #include <asm/page.h>
0015 
0016 static void propagate_protected_usage(struct page_counter *c,
0017                       unsigned long usage)
0018 {
0019     unsigned long protected, old_protected;
0020     unsigned long low, min;
0021     long delta;
0022 
0023     if (!c->parent)
0024         return;
0025 
0026     min = READ_ONCE(c->min);
0027     if (min || atomic_long_read(&c->min_usage)) {
0028         protected = min(usage, min);
0029         old_protected = atomic_long_xchg(&c->min_usage, protected);
0030         delta = protected - old_protected;
0031         if (delta)
0032             atomic_long_add(delta, &c->parent->children_min_usage);
0033     }
0034 
0035     low = READ_ONCE(c->low);
0036     if (low || atomic_long_read(&c->low_usage)) {
0037         protected = min(usage, low);
0038         old_protected = atomic_long_xchg(&c->low_usage, protected);
0039         delta = protected - old_protected;
0040         if (delta)
0041             atomic_long_add(delta, &c->parent->children_low_usage);
0042     }
0043 }
0044 
0045 /**
0046  * page_counter_cancel - take pages out of the local counter
0047  * @counter: counter
0048  * @nr_pages: number of pages to cancel
0049  */
0050 void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages)
0051 {
0052     long new;
0053 
0054     new = atomic_long_sub_return(nr_pages, &counter->usage);
0055     /* More uncharges than charges? */
0056     if (WARN_ONCE(new < 0, "page_counter underflow: %ld nr_pages=%lu\n",
0057               new, nr_pages)) {
0058         new = 0;
0059         atomic_long_set(&counter->usage, new);
0060     }
0061     propagate_protected_usage(counter, new);
0062 }
0063 
0064 /**
0065  * page_counter_charge - hierarchically charge pages
0066  * @counter: counter
0067  * @nr_pages: number of pages to charge
0068  *
0069  * NOTE: This does not consider any configured counter limits.
0070  */
0071 void page_counter_charge(struct page_counter *counter, unsigned long nr_pages)
0072 {
0073     struct page_counter *c;
0074 
0075     for (c = counter; c; c = c->parent) {
0076         long new;
0077 
0078         new = atomic_long_add_return(nr_pages, &c->usage);
0079         propagate_protected_usage(c, new);
0080         /*
0081          * This is indeed racy, but we can live with some
0082          * inaccuracy in the watermark.
0083          */
0084         if (new > READ_ONCE(c->watermark))
0085             WRITE_ONCE(c->watermark, new);
0086     }
0087 }
0088 
0089 /**
0090  * page_counter_try_charge - try to hierarchically charge pages
0091  * @counter: counter
0092  * @nr_pages: number of pages to charge
0093  * @fail: points first counter to hit its limit, if any
0094  *
0095  * Returns %true on success, or %false and @fail if the counter or one
0096  * of its ancestors has hit its configured limit.
0097  */
0098 bool page_counter_try_charge(struct page_counter *counter,
0099                  unsigned long nr_pages,
0100                  struct page_counter **fail)
0101 {
0102     struct page_counter *c;
0103 
0104     for (c = counter; c; c = c->parent) {
0105         long new;
0106         /*
0107          * Charge speculatively to avoid an expensive CAS.  If
0108          * a bigger charge fails, it might falsely lock out a
0109          * racing smaller charge and send it into reclaim
0110          * early, but the error is limited to the difference
0111          * between the two sizes, which is less than 2M/4M in
0112          * case of a THP locking out a regular page charge.
0113          *
0114          * The atomic_long_add_return() implies a full memory
0115          * barrier between incrementing the count and reading
0116          * the limit.  When racing with page_counter_set_max(),
0117          * we either see the new limit or the setter sees the
0118          * counter has changed and retries.
0119          */
0120         new = atomic_long_add_return(nr_pages, &c->usage);
0121         if (new > c->max) {
0122             atomic_long_sub(nr_pages, &c->usage);
0123             /*
0124              * This is racy, but we can live with some
0125              * inaccuracy in the failcnt which is only used
0126              * to report stats.
0127              */
0128             data_race(c->failcnt++);
0129             *fail = c;
0130             goto failed;
0131         }
0132         propagate_protected_usage(c, new);
0133         /*
0134          * Just like with failcnt, we can live with some
0135          * inaccuracy in the watermark.
0136          */
0137         if (new > READ_ONCE(c->watermark))
0138             WRITE_ONCE(c->watermark, new);
0139     }
0140     return true;
0141 
0142 failed:
0143     for (c = counter; c != *fail; c = c->parent)
0144         page_counter_cancel(c, nr_pages);
0145 
0146     return false;
0147 }
0148 
0149 /**
0150  * page_counter_uncharge - hierarchically uncharge pages
0151  * @counter: counter
0152  * @nr_pages: number of pages to uncharge
0153  */
0154 void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages)
0155 {
0156     struct page_counter *c;
0157 
0158     for (c = counter; c; c = c->parent)
0159         page_counter_cancel(c, nr_pages);
0160 }
0161 
0162 /**
0163  * page_counter_set_max - set the maximum number of pages allowed
0164  * @counter: counter
0165  * @nr_pages: limit to set
0166  *
0167  * Returns 0 on success, -EBUSY if the current number of pages on the
0168  * counter already exceeds the specified limit.
0169  *
0170  * The caller must serialize invocations on the same counter.
0171  */
0172 int page_counter_set_max(struct page_counter *counter, unsigned long nr_pages)
0173 {
0174     for (;;) {
0175         unsigned long old;
0176         long usage;
0177 
0178         /*
0179          * Update the limit while making sure that it's not
0180          * below the concurrently-changing counter value.
0181          *
0182          * The xchg implies two full memory barriers before
0183          * and after, so the read-swap-read is ordered and
0184          * ensures coherency with page_counter_try_charge():
0185          * that function modifies the count before checking
0186          * the limit, so if it sees the old limit, we see the
0187          * modified counter and retry.
0188          */
0189         usage = page_counter_read(counter);
0190 
0191         if (usage > nr_pages)
0192             return -EBUSY;
0193 
0194         old = xchg(&counter->max, nr_pages);
0195 
0196         if (page_counter_read(counter) <= usage)
0197             return 0;
0198 
0199         counter->max = old;
0200         cond_resched();
0201     }
0202 }
0203 
0204 /**
0205  * page_counter_set_min - set the amount of protected memory
0206  * @counter: counter
0207  * @nr_pages: value to set
0208  *
0209  * The caller must serialize invocations on the same counter.
0210  */
0211 void page_counter_set_min(struct page_counter *counter, unsigned long nr_pages)
0212 {
0213     struct page_counter *c;
0214 
0215     WRITE_ONCE(counter->min, nr_pages);
0216 
0217     for (c = counter; c; c = c->parent)
0218         propagate_protected_usage(c, atomic_long_read(&c->usage));
0219 }
0220 
0221 /**
0222  * page_counter_set_low - set the amount of protected memory
0223  * @counter: counter
0224  * @nr_pages: value to set
0225  *
0226  * The caller must serialize invocations on the same counter.
0227  */
0228 void page_counter_set_low(struct page_counter *counter, unsigned long nr_pages)
0229 {
0230     struct page_counter *c;
0231 
0232     WRITE_ONCE(counter->low, nr_pages);
0233 
0234     for (c = counter; c; c = c->parent)
0235         propagate_protected_usage(c, atomic_long_read(&c->usage));
0236 }
0237 
0238 /**
0239  * page_counter_memparse - memparse() for page counter limits
0240  * @buf: string to parse
0241  * @max: string meaning maximum possible value
0242  * @nr_pages: returns the result in number of pages
0243  *
0244  * Returns -EINVAL, or 0 and @nr_pages on success.  @nr_pages will be
0245  * limited to %PAGE_COUNTER_MAX.
0246  */
0247 int page_counter_memparse(const char *buf, const char *max,
0248               unsigned long *nr_pages)
0249 {
0250     char *end;
0251     u64 bytes;
0252 
0253     if (!strcmp(buf, max)) {
0254         *nr_pages = PAGE_COUNTER_MAX;
0255         return 0;
0256     }
0257 
0258     bytes = memparse(buf, &end);
0259     if (*end != '\0')
0260         return -EINVAL;
0261 
0262     *nr_pages = min(bytes / PAGE_SIZE, (u64)PAGE_COUNTER_MAX);
0263 
0264     return 0;
0265 }