Back to home page

LXR

 
 

    


0001 /*
0002  * Percpu IDA library
0003  *
0004  * Copyright (C) 2013 Datera, Inc. Kent Overstreet
0005  *
0006  * This program is free software; you can redistribute it and/or
0007  * modify it under the terms of the GNU General Public License as
0008  * published by the Free Software Foundation; either version 2, or (at
0009  * your option) any later version.
0010  *
0011  * This program is distributed in the hope that it will be useful, but
0012  * WITHOUT ANY WARRANTY; without even the implied warranty of
0013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
0014  * General Public License for more details.
0015  */
0016 
0017 #include <linux/bitmap.h>
0018 #include <linux/bitops.h>
0019 #include <linux/bug.h>
0020 #include <linux/err.h>
0021 #include <linux/export.h>
0022 #include <linux/init.h>
0023 #include <linux/kernel.h>
0024 #include <linux/percpu.h>
0025 #include <linux/sched.h>
0026 #include <linux/string.h>
0027 #include <linux/spinlock.h>
0028 #include <linux/percpu_ida.h>
0029 
0030 struct percpu_ida_cpu {
0031     /*
0032      * Even though this is percpu, we need a lock for tag stealing by remote
0033      * CPUs:
0034      */
0035     spinlock_t          lock;
0036 
0037     /* nr_free/freelist form a stack of free IDs */
0038     unsigned            nr_free;
0039     unsigned            freelist[];
0040 };
0041 
0042 static inline void move_tags(unsigned *dst, unsigned *dst_nr,
0043                  unsigned *src, unsigned *src_nr,
0044                  unsigned nr)
0045 {
0046     *src_nr -= nr;
0047     memcpy(dst + *dst_nr, src + *src_nr, sizeof(unsigned) * nr);
0048     *dst_nr += nr;
0049 }
0050 
0051 /*
0052  * Try to steal tags from a remote cpu's percpu freelist.
0053  *
0054  * We first check how many percpu freelists have tags
0055  *
0056  * Then we iterate through the cpus until we find some tags - we don't attempt
0057  * to find the "best" cpu to steal from, to keep cacheline bouncing to a
0058  * minimum.
0059  */
0060 static inline void steal_tags(struct percpu_ida *pool,
0061                   struct percpu_ida_cpu *tags)
0062 {
0063     unsigned cpus_have_tags, cpu = pool->cpu_last_stolen;
0064     struct percpu_ida_cpu *remote;
0065 
0066     for (cpus_have_tags = cpumask_weight(&pool->cpus_have_tags);
0067          cpus_have_tags; cpus_have_tags--) {
0068         cpu = cpumask_next(cpu, &pool->cpus_have_tags);
0069 
0070         if (cpu >= nr_cpu_ids) {
0071             cpu = cpumask_first(&pool->cpus_have_tags);
0072             if (cpu >= nr_cpu_ids)
0073                 BUG();
0074         }
0075 
0076         pool->cpu_last_stolen = cpu;
0077         remote = per_cpu_ptr(pool->tag_cpu, cpu);
0078 
0079         cpumask_clear_cpu(cpu, &pool->cpus_have_tags);
0080 
0081         if (remote == tags)
0082             continue;
0083 
0084         spin_lock(&remote->lock);
0085 
0086         if (remote->nr_free) {
0087             memcpy(tags->freelist,
0088                    remote->freelist,
0089                    sizeof(unsigned) * remote->nr_free);
0090 
0091             tags->nr_free = remote->nr_free;
0092             remote->nr_free = 0;
0093         }
0094 
0095         spin_unlock(&remote->lock);
0096 
0097         if (tags->nr_free)
0098             break;
0099     }
0100 }
0101 
0102 /*
0103  * Pop up to IDA_PCPU_BATCH_MOVE IDs off the global freelist, and push them onto
0104  * our percpu freelist:
0105  */
0106 static inline void alloc_global_tags(struct percpu_ida *pool,
0107                      struct percpu_ida_cpu *tags)
0108 {
0109     move_tags(tags->freelist, &tags->nr_free,
0110           pool->freelist, &pool->nr_free,
0111           min(pool->nr_free, pool->percpu_batch_size));
0112 }
0113 
0114 static inline unsigned alloc_local_tag(struct percpu_ida_cpu *tags)
0115 {
0116     int tag = -ENOSPC;
0117 
0118     spin_lock(&tags->lock);
0119     if (tags->nr_free)
0120         tag = tags->freelist[--tags->nr_free];
0121     spin_unlock(&tags->lock);
0122 
0123     return tag;
0124 }
0125 
0126 /**
0127  * percpu_ida_alloc - allocate a tag
0128  * @pool: pool to allocate from
0129  * @state: task state for prepare_to_wait
0130  *
0131  * Returns a tag - an integer in the range [0..nr_tags) (passed to
0132  * tag_pool_init()), or otherwise -ENOSPC on allocation failure.
0133  *
0134  * Safe to be called from interrupt context (assuming it isn't passed
0135  * TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, of course).
0136  *
0137  * @gfp indicates whether or not to wait until a free id is available (it's not
0138  * used for internal memory allocations); thus if passed __GFP_RECLAIM we may sleep
0139  * however long it takes until another thread frees an id (same semantics as a
0140  * mempool).
0141  *
0142  * Will not fail if passed TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE.
0143  */
0144 int percpu_ida_alloc(struct percpu_ida *pool, int state)
0145 {
0146     DEFINE_WAIT(wait);
0147     struct percpu_ida_cpu *tags;
0148     unsigned long flags;
0149     int tag;
0150 
0151     local_irq_save(flags);
0152     tags = this_cpu_ptr(pool->tag_cpu);
0153 
0154     /* Fastpath */
0155     tag = alloc_local_tag(tags);
0156     if (likely(tag >= 0)) {
0157         local_irq_restore(flags);
0158         return tag;
0159     }
0160 
0161     while (1) {
0162         spin_lock(&pool->lock);
0163 
0164         /*
0165          * prepare_to_wait() must come before steal_tags(), in case
0166          * percpu_ida_free() on another cpu flips a bit in
0167          * cpus_have_tags
0168          *
0169          * global lock held and irqs disabled, don't need percpu lock
0170          */
0171         if (state != TASK_RUNNING)
0172             prepare_to_wait(&pool->wait, &wait, state);
0173 
0174         if (!tags->nr_free)
0175             alloc_global_tags(pool, tags);
0176         if (!tags->nr_free)
0177             steal_tags(pool, tags);
0178 
0179         if (tags->nr_free) {
0180             tag = tags->freelist[--tags->nr_free];
0181             if (tags->nr_free)
0182                 cpumask_set_cpu(smp_processor_id(),
0183                         &pool->cpus_have_tags);
0184         }
0185 
0186         spin_unlock(&pool->lock);
0187         local_irq_restore(flags);
0188 
0189         if (tag >= 0 || state == TASK_RUNNING)
0190             break;
0191 
0192         if (signal_pending_state(state, current)) {
0193             tag = -ERESTARTSYS;
0194             break;
0195         }
0196 
0197         schedule();
0198 
0199         local_irq_save(flags);
0200         tags = this_cpu_ptr(pool->tag_cpu);
0201     }
0202     if (state != TASK_RUNNING)
0203         finish_wait(&pool->wait, &wait);
0204 
0205     return tag;
0206 }
0207 EXPORT_SYMBOL_GPL(percpu_ida_alloc);
0208 
0209 /**
0210  * percpu_ida_free - free a tag
0211  * @pool: pool @tag was allocated from
0212  * @tag: a tag previously allocated with percpu_ida_alloc()
0213  *
0214  * Safe to be called from interrupt context.
0215  */
0216 void percpu_ida_free(struct percpu_ida *pool, unsigned tag)
0217 {
0218     struct percpu_ida_cpu *tags;
0219     unsigned long flags;
0220     unsigned nr_free;
0221 
0222     BUG_ON(tag >= pool->nr_tags);
0223 
0224     local_irq_save(flags);
0225     tags = this_cpu_ptr(pool->tag_cpu);
0226 
0227     spin_lock(&tags->lock);
0228     tags->freelist[tags->nr_free++] = tag;
0229 
0230     nr_free = tags->nr_free;
0231     spin_unlock(&tags->lock);
0232 
0233     if (nr_free == 1) {
0234         cpumask_set_cpu(smp_processor_id(),
0235                 &pool->cpus_have_tags);
0236         wake_up(&pool->wait);
0237     }
0238 
0239     if (nr_free == pool->percpu_max_size) {
0240         spin_lock(&pool->lock);
0241 
0242         /*
0243          * Global lock held and irqs disabled, don't need percpu
0244          * lock
0245          */
0246         if (tags->nr_free == pool->percpu_max_size) {
0247             move_tags(pool->freelist, &pool->nr_free,
0248                   tags->freelist, &tags->nr_free,
0249                   pool->percpu_batch_size);
0250 
0251             wake_up(&pool->wait);
0252         }
0253         spin_unlock(&pool->lock);
0254     }
0255 
0256     local_irq_restore(flags);
0257 }
0258 EXPORT_SYMBOL_GPL(percpu_ida_free);
0259 
0260 /**
0261  * percpu_ida_destroy - release a tag pool's resources
0262  * @pool: pool to free
0263  *
0264  * Frees the resources allocated by percpu_ida_init().
0265  */
0266 void percpu_ida_destroy(struct percpu_ida *pool)
0267 {
0268     free_percpu(pool->tag_cpu);
0269     free_pages((unsigned long) pool->freelist,
0270            get_order(pool->nr_tags * sizeof(unsigned)));
0271 }
0272 EXPORT_SYMBOL_GPL(percpu_ida_destroy);
0273 
0274 /**
0275  * percpu_ida_init - initialize a percpu tag pool
0276  * @pool: pool to initialize
0277  * @nr_tags: number of tags that will be available for allocation
0278  *
0279  * Initializes @pool so that it can be used to allocate tags - integers in the
0280  * range [0, nr_tags). Typically, they'll be used by driver code to refer to a
0281  * preallocated array of tag structures.
0282  *
0283  * Allocation is percpu, but sharding is limited by nr_tags - for best
0284  * performance, the workload should not span more cpus than nr_tags / 128.
0285  */
0286 int __percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags,
0287     unsigned long max_size, unsigned long batch_size)
0288 {
0289     unsigned i, cpu, order;
0290 
0291     memset(pool, 0, sizeof(*pool));
0292 
0293     init_waitqueue_head(&pool->wait);
0294     spin_lock_init(&pool->lock);
0295     pool->nr_tags = nr_tags;
0296     pool->percpu_max_size = max_size;
0297     pool->percpu_batch_size = batch_size;
0298 
0299     /* Guard against overflow */
0300     if (nr_tags > (unsigned) INT_MAX + 1) {
0301         pr_err("percpu_ida_init(): nr_tags too large\n");
0302         return -EINVAL;
0303     }
0304 
0305     order = get_order(nr_tags * sizeof(unsigned));
0306     pool->freelist = (void *) __get_free_pages(GFP_KERNEL, order);
0307     if (!pool->freelist)
0308         return -ENOMEM;
0309 
0310     for (i = 0; i < nr_tags; i++)
0311         pool->freelist[i] = i;
0312 
0313     pool->nr_free = nr_tags;
0314 
0315     pool->tag_cpu = __alloc_percpu(sizeof(struct percpu_ida_cpu) +
0316                        pool->percpu_max_size * sizeof(unsigned),
0317                        sizeof(unsigned));
0318     if (!pool->tag_cpu)
0319         goto err;
0320 
0321     for_each_possible_cpu(cpu)
0322         spin_lock_init(&per_cpu_ptr(pool->tag_cpu, cpu)->lock);
0323 
0324     return 0;
0325 err:
0326     percpu_ida_destroy(pool);
0327     return -ENOMEM;
0328 }
0329 EXPORT_SYMBOL_GPL(__percpu_ida_init);
0330 
0331 /**
0332  * percpu_ida_for_each_free - iterate free ids of a pool
0333  * @pool: pool to iterate
0334  * @fn: interate callback function
0335  * @data: parameter for @fn
0336  *
0337  * Note, this doesn't guarantee to iterate all free ids restrictly. Some free
0338  * ids might be missed, some might be iterated duplicated, and some might
0339  * be iterated and not free soon.
0340  */
0341 int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn,
0342     void *data)
0343 {
0344     unsigned long flags;
0345     struct percpu_ida_cpu *remote;
0346     unsigned cpu, i, err = 0;
0347 
0348     local_irq_save(flags);
0349     for_each_possible_cpu(cpu) {
0350         remote = per_cpu_ptr(pool->tag_cpu, cpu);
0351         spin_lock(&remote->lock);
0352         for (i = 0; i < remote->nr_free; i++) {
0353             err = fn(remote->freelist[i], data);
0354             if (err)
0355                 break;
0356         }
0357         spin_unlock(&remote->lock);
0358         if (err)
0359             goto out;
0360     }
0361 
0362     spin_lock(&pool->lock);
0363     for (i = 0; i < pool->nr_free; i++) {
0364         err = fn(pool->freelist[i], data);
0365         if (err)
0366             break;
0367     }
0368     spin_unlock(&pool->lock);
0369 out:
0370     local_irq_restore(flags);
0371     return err;
0372 }
0373 EXPORT_SYMBOL_GPL(percpu_ida_for_each_free);
0374 
0375 /**
0376  * percpu_ida_free_tags - return free tags number of a specific cpu or global pool
0377  * @pool: pool related
0378  * @cpu: specific cpu or global pool if @cpu == nr_cpu_ids
0379  *
0380  * Note: this just returns a snapshot of free tags number.
0381  */
0382 unsigned percpu_ida_free_tags(struct percpu_ida *pool, int cpu)
0383 {
0384     struct percpu_ida_cpu *remote;
0385     if (cpu == nr_cpu_ids)
0386         return pool->nr_free;
0387     remote = per_cpu_ptr(pool->tag_cpu, cpu);
0388     return remote->nr_free;
0389 }
0390 EXPORT_SYMBOL_GPL(percpu_ida_free_tags);