Back to home page

LXR

 
 

    


0001 /*
0002  * cpu_rmap.c: CPU affinity reverse-map support
0003  * Copyright 2011 Solarflare Communications Inc.
0004  *
0005  * This program is free software; you can redistribute it and/or modify it
0006  * under the terms of the GNU General Public License version 2 as published
0007  * by the Free Software Foundation, incorporated herein by reference.
0008  */
0009 
0010 #include <linux/cpu_rmap.h>
0011 #include <linux/interrupt.h>
0012 #include <linux/export.h>
0013 
0014 /*
0015  * These functions maintain a mapping from CPUs to some ordered set of
0016  * objects with CPU affinities.  This can be seen as a reverse-map of
0017  * CPU affinity.  However, we do not assume that the object affinities
0018  * cover all CPUs in the system.  For those CPUs not directly covered
0019  * by object affinities, we attempt to find a nearest object based on
0020  * CPU topology.
0021  */
0022 
0023 /**
0024  * alloc_cpu_rmap - allocate CPU affinity reverse-map
0025  * @size: Number of objects to be mapped
0026  * @flags: Allocation flags e.g. %GFP_KERNEL
0027  */
0028 struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags)
0029 {
0030     struct cpu_rmap *rmap;
0031     unsigned int cpu;
0032     size_t obj_offset;
0033 
0034     /* This is a silly number of objects, and we use u16 indices. */
0035     if (size > 0xffff)
0036         return NULL;
0037 
0038     /* Offset of object pointer array from base structure */
0039     obj_offset = ALIGN(offsetof(struct cpu_rmap, near[nr_cpu_ids]),
0040                sizeof(void *));
0041 
0042     rmap = kzalloc(obj_offset + size * sizeof(rmap->obj[0]), flags);
0043     if (!rmap)
0044         return NULL;
0045 
0046     kref_init(&rmap->refcount);
0047     rmap->obj = (void **)((char *)rmap + obj_offset);
0048 
0049     /* Initially assign CPUs to objects on a rota, since we have
0050      * no idea where the objects are.  Use infinite distance, so
0051      * any object with known distance is preferable.  Include the
0052      * CPUs that are not present/online, since we definitely want
0053      * any newly-hotplugged CPUs to have some object assigned.
0054      */
0055     for_each_possible_cpu(cpu) {
0056         rmap->near[cpu].index = cpu % size;
0057         rmap->near[cpu].dist = CPU_RMAP_DIST_INF;
0058     }
0059 
0060     rmap->size = size;
0061     return rmap;
0062 }
0063 EXPORT_SYMBOL(alloc_cpu_rmap);
0064 
0065 /**
0066  * cpu_rmap_release - internal reclaiming helper called from kref_put
0067  * @ref: kref to struct cpu_rmap
0068  */
0069 static void cpu_rmap_release(struct kref *ref)
0070 {
0071     struct cpu_rmap *rmap = container_of(ref, struct cpu_rmap, refcount);
0072     kfree(rmap);
0073 }
0074 
0075 /**
0076  * cpu_rmap_get - internal helper to get new ref on a cpu_rmap
0077  * @rmap: reverse-map allocated with alloc_cpu_rmap()
0078  */
0079 static inline void cpu_rmap_get(struct cpu_rmap *rmap)
0080 {
0081     kref_get(&rmap->refcount);
0082 }
0083 
0084 /**
0085  * cpu_rmap_put - release ref on a cpu_rmap
0086  * @rmap: reverse-map allocated with alloc_cpu_rmap()
0087  */
0088 int cpu_rmap_put(struct cpu_rmap *rmap)
0089 {
0090     return kref_put(&rmap->refcount, cpu_rmap_release);
0091 }
0092 EXPORT_SYMBOL(cpu_rmap_put);
0093 
0094 /* Reevaluate nearest object for given CPU, comparing with the given
0095  * neighbours at the given distance.
0096  */
0097 static bool cpu_rmap_copy_neigh(struct cpu_rmap *rmap, unsigned int cpu,
0098                 const struct cpumask *mask, u16 dist)
0099 {
0100     int neigh;
0101 
0102     for_each_cpu(neigh, mask) {
0103         if (rmap->near[cpu].dist > dist &&
0104             rmap->near[neigh].dist <= dist) {
0105             rmap->near[cpu].index = rmap->near[neigh].index;
0106             rmap->near[cpu].dist = dist;
0107             return true;
0108         }
0109     }
0110     return false;
0111 }
0112 
0113 #ifdef DEBUG
0114 static void debug_print_rmap(const struct cpu_rmap *rmap, const char *prefix)
0115 {
0116     unsigned index;
0117     unsigned int cpu;
0118 
0119     pr_info("cpu_rmap %p, %s:\n", rmap, prefix);
0120 
0121     for_each_possible_cpu(cpu) {
0122         index = rmap->near[cpu].index;
0123         pr_info("cpu %d -> obj %u (distance %u)\n",
0124             cpu, index, rmap->near[cpu].dist);
0125     }
0126 }
0127 #else
0128 static inline void
0129 debug_print_rmap(const struct cpu_rmap *rmap, const char *prefix)
0130 {
0131 }
0132 #endif
0133 
0134 /**
0135  * cpu_rmap_add - add object to a rmap
0136  * @rmap: CPU rmap allocated with alloc_cpu_rmap()
0137  * @obj: Object to add to rmap
0138  *
0139  * Return index of object.
0140  */
0141 int cpu_rmap_add(struct cpu_rmap *rmap, void *obj)
0142 {
0143     u16 index;
0144 
0145     BUG_ON(rmap->used >= rmap->size);
0146     index = rmap->used++;
0147     rmap->obj[index] = obj;
0148     return index;
0149 }
0150 EXPORT_SYMBOL(cpu_rmap_add);
0151 
0152 /**
0153  * cpu_rmap_update - update CPU rmap following a change of object affinity
0154  * @rmap: CPU rmap to update
0155  * @index: Index of object whose affinity changed
0156  * @affinity: New CPU affinity of object
0157  */
0158 int cpu_rmap_update(struct cpu_rmap *rmap, u16 index,
0159             const struct cpumask *affinity)
0160 {
0161     cpumask_var_t update_mask;
0162     unsigned int cpu;
0163 
0164     if (unlikely(!zalloc_cpumask_var(&update_mask, GFP_KERNEL)))
0165         return -ENOMEM;
0166 
0167     /* Invalidate distance for all CPUs for which this used to be
0168      * the nearest object.  Mark those CPUs for update.
0169      */
0170     for_each_online_cpu(cpu) {
0171         if (rmap->near[cpu].index == index) {
0172             rmap->near[cpu].dist = CPU_RMAP_DIST_INF;
0173             cpumask_set_cpu(cpu, update_mask);
0174         }
0175     }
0176 
0177     debug_print_rmap(rmap, "after invalidating old distances");
0178 
0179     /* Set distance to 0 for all CPUs in the new affinity mask.
0180      * Mark all CPUs within their NUMA nodes for update.
0181      */
0182     for_each_cpu(cpu, affinity) {
0183         rmap->near[cpu].index = index;
0184         rmap->near[cpu].dist = 0;
0185         cpumask_or(update_mask, update_mask,
0186                cpumask_of_node(cpu_to_node(cpu)));
0187     }
0188 
0189     debug_print_rmap(rmap, "after updating neighbours");
0190 
0191     /* Update distances based on topology */
0192     for_each_cpu(cpu, update_mask) {
0193         if (cpu_rmap_copy_neigh(rmap, cpu,
0194                     topology_sibling_cpumask(cpu), 1))
0195             continue;
0196         if (cpu_rmap_copy_neigh(rmap, cpu,
0197                     topology_core_cpumask(cpu), 2))
0198             continue;
0199         if (cpu_rmap_copy_neigh(rmap, cpu,
0200                     cpumask_of_node(cpu_to_node(cpu)), 3))
0201             continue;
0202         /* We could continue into NUMA node distances, but for now
0203          * we give up.
0204          */
0205     }
0206 
0207     debug_print_rmap(rmap, "after copying neighbours");
0208 
0209     free_cpumask_var(update_mask);
0210     return 0;
0211 }
0212 EXPORT_SYMBOL(cpu_rmap_update);
0213 
0214 /* Glue between IRQ affinity notifiers and CPU rmaps */
0215 
0216 struct irq_glue {
0217     struct irq_affinity_notify notify;
0218     struct cpu_rmap *rmap;
0219     u16 index;
0220 };
0221 
0222 /**
0223  * free_irq_cpu_rmap - free a CPU affinity reverse-map used for IRQs
0224  * @rmap: Reverse-map allocated with alloc_irq_cpu_map(), or %NULL
0225  *
0226  * Must be called in process context, before freeing the IRQs.
0227  */
0228 void free_irq_cpu_rmap(struct cpu_rmap *rmap)
0229 {
0230     struct irq_glue *glue;
0231     u16 index;
0232 
0233     if (!rmap)
0234         return;
0235 
0236     for (index = 0; index < rmap->used; index++) {
0237         glue = rmap->obj[index];
0238         irq_set_affinity_notifier(glue->notify.irq, NULL);
0239     }
0240 
0241     cpu_rmap_put(rmap);
0242 }
0243 EXPORT_SYMBOL(free_irq_cpu_rmap);
0244 
0245 /**
0246  * irq_cpu_rmap_notify - callback for IRQ subsystem when IRQ affinity updated
0247  * @notify: struct irq_affinity_notify passed by irq/manage.c
0248  * @mask: cpu mask for new SMP affinity
0249  *
0250  * This is executed in workqueue context.
0251  */
0252 static void
0253 irq_cpu_rmap_notify(struct irq_affinity_notify *notify, const cpumask_t *mask)
0254 {
0255     struct irq_glue *glue =
0256         container_of(notify, struct irq_glue, notify);
0257     int rc;
0258 
0259     rc = cpu_rmap_update(glue->rmap, glue->index, mask);
0260     if (rc)
0261         pr_warning("irq_cpu_rmap_notify: update failed: %d\n", rc);
0262 }
0263 
0264 /**
0265  * irq_cpu_rmap_release - reclaiming callback for IRQ subsystem
0266  * @ref: kref to struct irq_affinity_notify passed by irq/manage.c
0267  */
0268 static void irq_cpu_rmap_release(struct kref *ref)
0269 {
0270     struct irq_glue *glue =
0271         container_of(ref, struct irq_glue, notify.kref);
0272 
0273     cpu_rmap_put(glue->rmap);
0274     kfree(glue);
0275 }
0276 
0277 /**
0278  * irq_cpu_rmap_add - add an IRQ to a CPU affinity reverse-map
0279  * @rmap: The reverse-map
0280  * @irq: The IRQ number
0281  *
0282  * This adds an IRQ affinity notifier that will update the reverse-map
0283  * automatically.
0284  *
0285  * Must be called in process context, after the IRQ is allocated but
0286  * before it is bound with request_irq().
0287  */
0288 int irq_cpu_rmap_add(struct cpu_rmap *rmap, int irq)
0289 {
0290     struct irq_glue *glue = kzalloc(sizeof(*glue), GFP_KERNEL);
0291     int rc;
0292 
0293     if (!glue)
0294         return -ENOMEM;
0295     glue->notify.notify = irq_cpu_rmap_notify;
0296     glue->notify.release = irq_cpu_rmap_release;
0297     glue->rmap = rmap;
0298     cpu_rmap_get(rmap);
0299     glue->index = cpu_rmap_add(rmap, glue);
0300     rc = irq_set_affinity_notifier(irq, &glue->notify);
0301     if (rc) {
0302         cpu_rmap_put(glue->rmap);
0303         kfree(glue);
0304     }
0305     return rc;
0306 }
0307 EXPORT_SYMBOL(irq_cpu_rmap_add);