Back to home page

LXR

 
 

    


0001 /*
0002  * Lockless hierarchical page accounting & limiting
0003  *
0004  * Copyright (C) 2014 Red Hat, Inc., Johannes Weiner
0005  */
0006 
0007 #include <linux/page_counter.h>
0008 #include <linux/atomic.h>
0009 #include <linux/kernel.h>
0010 #include <linux/string.h>
0011 #include <linux/sched.h>
0012 #include <linux/bug.h>
0013 #include <asm/page.h>
0014 
0015 /**
0016  * page_counter_cancel - take pages out of the local counter
0017  * @counter: counter
0018  * @nr_pages: number of pages to cancel
0019  */
0020 void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages)
0021 {
0022     long new;
0023 
0024     new = atomic_long_sub_return(nr_pages, &counter->count);
0025     /* More uncharges than charges? */
0026     WARN_ON_ONCE(new < 0);
0027 }
0028 
0029 /**
0030  * page_counter_charge - hierarchically charge pages
0031  * @counter: counter
0032  * @nr_pages: number of pages to charge
0033  *
0034  * NOTE: This does not consider any configured counter limits.
0035  */
0036 void page_counter_charge(struct page_counter *counter, unsigned long nr_pages)
0037 {
0038     struct page_counter *c;
0039 
0040     for (c = counter; c; c = c->parent) {
0041         long new;
0042 
0043         new = atomic_long_add_return(nr_pages, &c->count);
0044         /*
0045          * This is indeed racy, but we can live with some
0046          * inaccuracy in the watermark.
0047          */
0048         if (new > c->watermark)
0049             c->watermark = new;
0050     }
0051 }
0052 
0053 /**
0054  * page_counter_try_charge - try to hierarchically charge pages
0055  * @counter: counter
0056  * @nr_pages: number of pages to charge
0057  * @fail: points first counter to hit its limit, if any
0058  *
0059  * Returns %true on success, or %false and @fail if the counter or one
0060  * of its ancestors has hit its configured limit.
0061  */
0062 bool page_counter_try_charge(struct page_counter *counter,
0063                  unsigned long nr_pages,
0064                  struct page_counter **fail)
0065 {
0066     struct page_counter *c;
0067 
0068     for (c = counter; c; c = c->parent) {
0069         long new;
0070         /*
0071          * Charge speculatively to avoid an expensive CAS.  If
0072          * a bigger charge fails, it might falsely lock out a
0073          * racing smaller charge and send it into reclaim
0074          * early, but the error is limited to the difference
0075          * between the two sizes, which is less than 2M/4M in
0076          * case of a THP locking out a regular page charge.
0077          *
0078          * The atomic_long_add_return() implies a full memory
0079          * barrier between incrementing the count and reading
0080          * the limit.  When racing with page_counter_limit(),
0081          * we either see the new limit or the setter sees the
0082          * counter has changed and retries.
0083          */
0084         new = atomic_long_add_return(nr_pages, &c->count);
0085         if (new > c->limit) {
0086             atomic_long_sub(nr_pages, &c->count);
0087             /*
0088              * This is racy, but we can live with some
0089              * inaccuracy in the failcnt.
0090              */
0091             c->failcnt++;
0092             *fail = c;
0093             goto failed;
0094         }
0095         /*
0096          * Just like with failcnt, we can live with some
0097          * inaccuracy in the watermark.
0098          */
0099         if (new > c->watermark)
0100             c->watermark = new;
0101     }
0102     return true;
0103 
0104 failed:
0105     for (c = counter; c != *fail; c = c->parent)
0106         page_counter_cancel(c, nr_pages);
0107 
0108     return false;
0109 }
0110 
0111 /**
0112  * page_counter_uncharge - hierarchically uncharge pages
0113  * @counter: counter
0114  * @nr_pages: number of pages to uncharge
0115  */
0116 void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages)
0117 {
0118     struct page_counter *c;
0119 
0120     for (c = counter; c; c = c->parent)
0121         page_counter_cancel(c, nr_pages);
0122 }
0123 
0124 /**
0125  * page_counter_limit - limit the number of pages allowed
0126  * @counter: counter
0127  * @limit: limit to set
0128  *
0129  * Returns 0 on success, -EBUSY if the current number of pages on the
0130  * counter already exceeds the specified limit.
0131  *
0132  * The caller must serialize invocations on the same counter.
0133  */
0134 int page_counter_limit(struct page_counter *counter, unsigned long limit)
0135 {
0136     for (;;) {
0137         unsigned long old;
0138         long count;
0139 
0140         /*
0141          * Update the limit while making sure that it's not
0142          * below the concurrently-changing counter value.
0143          *
0144          * The xchg implies two full memory barriers before
0145          * and after, so the read-swap-read is ordered and
0146          * ensures coherency with page_counter_try_charge():
0147          * that function modifies the count before checking
0148          * the limit, so if it sees the old limit, we see the
0149          * modified counter and retry.
0150          */
0151         count = atomic_long_read(&counter->count);
0152 
0153         if (count > limit)
0154             return -EBUSY;
0155 
0156         old = xchg(&counter->limit, limit);
0157 
0158         if (atomic_long_read(&counter->count) <= count)
0159             return 0;
0160 
0161         counter->limit = old;
0162         cond_resched();
0163     }
0164 }
0165 
0166 /**
0167  * page_counter_memparse - memparse() for page counter limits
0168  * @buf: string to parse
0169  * @max: string meaning maximum possible value
0170  * @nr_pages: returns the result in number of pages
0171  *
0172  * Returns -EINVAL, or 0 and @nr_pages on success.  @nr_pages will be
0173  * limited to %PAGE_COUNTER_MAX.
0174  */
0175 int page_counter_memparse(const char *buf, const char *max,
0176               unsigned long *nr_pages)
0177 {
0178     char *end;
0179     u64 bytes;
0180 
0181     if (!strcmp(buf, max)) {
0182         *nr_pages = PAGE_COUNTER_MAX;
0183         return 0;
0184     }
0185 
0186     bytes = memparse(buf, &end);
0187     if (*end != '\0')
0188         return -EINVAL;
0189 
0190     *nr_pages = min(bytes / PAGE_SIZE, (u64)PAGE_COUNTER_MAX);
0191 
0192     return 0;
0193 }