Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * Manage cache of swap slots to be used for and returned from
0004  * swap.
0005  *
0006  * Copyright(c) 2016 Intel Corporation.
0007  *
0008  * Author: Tim Chen <tim.c.chen@linux.intel.com>
0009  *
0010  * We allocate the swap slots from the global pool and put
0011  * it into local per cpu caches.  This has the advantage
0012  * of no needing to acquire the swap_info lock every time
0013  * we need a new slot.
0014  *
0015  * There is also opportunity to simply return the slot
0016  * to local caches without needing to acquire swap_info
0017  * lock.  We do not reuse the returned slots directly but
0018  * move them back to the global pool in a batch.  This
0019  * allows the slots to coalesce and reduce fragmentation.
0020  *
0021  * The swap entry allocated is marked with SWAP_HAS_CACHE
0022  * flag in map_count that prevents it from being allocated
0023  * again from the global pool.
0024  *
0025  * The swap slots cache is protected by a mutex instead of
0026  * a spin lock as when we search for slots with scan_swap_map,
0027  * we can possibly sleep.
0028  */
0029 
0030 #include <linux/swap_slots.h>
0031 #include <linux/cpu.h>
0032 #include <linux/cpumask.h>
0033 #include <linux/slab.h>
0034 #include <linux/vmalloc.h>
0035 #include <linux/mutex.h>
0036 #include <linux/mm.h>
0037 
0038 static DEFINE_PER_CPU(struct swap_slots_cache, swp_slots);
0039 static bool swap_slot_cache_active;
0040 bool    swap_slot_cache_enabled;
0041 static bool swap_slot_cache_initialized;
0042 static DEFINE_MUTEX(swap_slots_cache_mutex);
0043 /* Serialize swap slots cache enable/disable operations */
0044 static DEFINE_MUTEX(swap_slots_cache_enable_mutex);
0045 
0046 static void __drain_swap_slots_cache(unsigned int type);
0047 
0048 #define use_swap_slot_cache (swap_slot_cache_active && swap_slot_cache_enabled)
0049 #define SLOTS_CACHE 0x1
0050 #define SLOTS_CACHE_RET 0x2
0051 
0052 static void deactivate_swap_slots_cache(void)
0053 {
0054     mutex_lock(&swap_slots_cache_mutex);
0055     swap_slot_cache_active = false;
0056     __drain_swap_slots_cache(SLOTS_CACHE|SLOTS_CACHE_RET);
0057     mutex_unlock(&swap_slots_cache_mutex);
0058 }
0059 
0060 static void reactivate_swap_slots_cache(void)
0061 {
0062     mutex_lock(&swap_slots_cache_mutex);
0063     swap_slot_cache_active = true;
0064     mutex_unlock(&swap_slots_cache_mutex);
0065 }
0066 
0067 /* Must not be called with cpu hot plug lock */
0068 void disable_swap_slots_cache_lock(void)
0069 {
0070     mutex_lock(&swap_slots_cache_enable_mutex);
0071     swap_slot_cache_enabled = false;
0072     if (swap_slot_cache_initialized) {
0073         /* serialize with cpu hotplug operations */
0074         cpus_read_lock();
0075         __drain_swap_slots_cache(SLOTS_CACHE|SLOTS_CACHE_RET);
0076         cpus_read_unlock();
0077     }
0078 }
0079 
0080 static void __reenable_swap_slots_cache(void)
0081 {
0082     swap_slot_cache_enabled = has_usable_swap();
0083 }
0084 
0085 void reenable_swap_slots_cache_unlock(void)
0086 {
0087     __reenable_swap_slots_cache();
0088     mutex_unlock(&swap_slots_cache_enable_mutex);
0089 }
0090 
0091 static bool check_cache_active(void)
0092 {
0093     long pages;
0094 
0095     if (!swap_slot_cache_enabled)
0096         return false;
0097 
0098     pages = get_nr_swap_pages();
0099     if (!swap_slot_cache_active) {
0100         if (pages > num_online_cpus() *
0101             THRESHOLD_ACTIVATE_SWAP_SLOTS_CACHE)
0102             reactivate_swap_slots_cache();
0103         goto out;
0104     }
0105 
0106     /* if global pool of slot caches too low, deactivate cache */
0107     if (pages < num_online_cpus() * THRESHOLD_DEACTIVATE_SWAP_SLOTS_CACHE)
0108         deactivate_swap_slots_cache();
0109 out:
0110     return swap_slot_cache_active;
0111 }
0112 
0113 static int alloc_swap_slot_cache(unsigned int cpu)
0114 {
0115     struct swap_slots_cache *cache;
0116     swp_entry_t *slots, *slots_ret;
0117 
0118     /*
0119      * Do allocation outside swap_slots_cache_mutex
0120      * as kvzalloc could trigger reclaim and folio_alloc_swap,
0121      * which can lock swap_slots_cache_mutex.
0122      */
0123     slots = kvcalloc(SWAP_SLOTS_CACHE_SIZE, sizeof(swp_entry_t),
0124              GFP_KERNEL);
0125     if (!slots)
0126         return -ENOMEM;
0127 
0128     slots_ret = kvcalloc(SWAP_SLOTS_CACHE_SIZE, sizeof(swp_entry_t),
0129                  GFP_KERNEL);
0130     if (!slots_ret) {
0131         kvfree(slots);
0132         return -ENOMEM;
0133     }
0134 
0135     mutex_lock(&swap_slots_cache_mutex);
0136     cache = &per_cpu(swp_slots, cpu);
0137     if (cache->slots || cache->slots_ret) {
0138         /* cache already allocated */
0139         mutex_unlock(&swap_slots_cache_mutex);
0140 
0141         kvfree(slots);
0142         kvfree(slots_ret);
0143 
0144         return 0;
0145     }
0146 
0147     if (!cache->lock_initialized) {
0148         mutex_init(&cache->alloc_lock);
0149         spin_lock_init(&cache->free_lock);
0150         cache->lock_initialized = true;
0151     }
0152     cache->nr = 0;
0153     cache->cur = 0;
0154     cache->n_ret = 0;
0155     /*
0156      * We initialized alloc_lock and free_lock earlier.  We use
0157      * !cache->slots or !cache->slots_ret to know if it is safe to acquire
0158      * the corresponding lock and use the cache.  Memory barrier below
0159      * ensures the assumption.
0160      */
0161     mb();
0162     cache->slots = slots;
0163     cache->slots_ret = slots_ret;
0164     mutex_unlock(&swap_slots_cache_mutex);
0165     return 0;
0166 }
0167 
0168 static void drain_slots_cache_cpu(unsigned int cpu, unsigned int type,
0169                   bool free_slots)
0170 {
0171     struct swap_slots_cache *cache;
0172     swp_entry_t *slots = NULL;
0173 
0174     cache = &per_cpu(swp_slots, cpu);
0175     if ((type & SLOTS_CACHE) && cache->slots) {
0176         mutex_lock(&cache->alloc_lock);
0177         swapcache_free_entries(cache->slots + cache->cur, cache->nr);
0178         cache->cur = 0;
0179         cache->nr = 0;
0180         if (free_slots && cache->slots) {
0181             kvfree(cache->slots);
0182             cache->slots = NULL;
0183         }
0184         mutex_unlock(&cache->alloc_lock);
0185     }
0186     if ((type & SLOTS_CACHE_RET) && cache->slots_ret) {
0187         spin_lock_irq(&cache->free_lock);
0188         swapcache_free_entries(cache->slots_ret, cache->n_ret);
0189         cache->n_ret = 0;
0190         if (free_slots && cache->slots_ret) {
0191             slots = cache->slots_ret;
0192             cache->slots_ret = NULL;
0193         }
0194         spin_unlock_irq(&cache->free_lock);
0195         kvfree(slots);
0196     }
0197 }
0198 
0199 static void __drain_swap_slots_cache(unsigned int type)
0200 {
0201     unsigned int cpu;
0202 
0203     /*
0204      * This function is called during
0205      *  1) swapoff, when we have to make sure no
0206      *     left over slots are in cache when we remove
0207      *     a swap device;
0208      *      2) disabling of swap slot cache, when we run low
0209      *     on swap slots when allocating memory and need
0210      *     to return swap slots to global pool.
0211      *
0212      * We cannot acquire cpu hot plug lock here as
0213      * this function can be invoked in the cpu
0214      * hot plug path:
0215      * cpu_up -> lock cpu_hotplug -> cpu hotplug state callback
0216      *   -> memory allocation -> direct reclaim -> folio_alloc_swap
0217      *   -> drain_swap_slots_cache
0218      *
0219      * Hence the loop over current online cpu below could miss cpu that
0220      * is being brought online but not yet marked as online.
0221      * That is okay as we do not schedule and run anything on a
0222      * cpu before it has been marked online. Hence, we will not
0223      * fill any swap slots in slots cache of such cpu.
0224      * There are no slots on such cpu that need to be drained.
0225      */
0226     for_each_online_cpu(cpu)
0227         drain_slots_cache_cpu(cpu, type, false);
0228 }
0229 
0230 static int free_slot_cache(unsigned int cpu)
0231 {
0232     mutex_lock(&swap_slots_cache_mutex);
0233     drain_slots_cache_cpu(cpu, SLOTS_CACHE | SLOTS_CACHE_RET, true);
0234     mutex_unlock(&swap_slots_cache_mutex);
0235     return 0;
0236 }
0237 
0238 void enable_swap_slots_cache(void)
0239 {
0240     mutex_lock(&swap_slots_cache_enable_mutex);
0241     if (!swap_slot_cache_initialized) {
0242         int ret;
0243 
0244         ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "swap_slots_cache",
0245                     alloc_swap_slot_cache, free_slot_cache);
0246         if (WARN_ONCE(ret < 0, "Cache allocation failed (%s), operating "
0247                        "without swap slots cache.\n", __func__))
0248             goto out_unlock;
0249 
0250         swap_slot_cache_initialized = true;
0251     }
0252 
0253     __reenable_swap_slots_cache();
0254 out_unlock:
0255     mutex_unlock(&swap_slots_cache_enable_mutex);
0256 }
0257 
0258 /* called with swap slot cache's alloc lock held */
0259 static int refill_swap_slots_cache(struct swap_slots_cache *cache)
0260 {
0261     if (!use_swap_slot_cache)
0262         return 0;
0263 
0264     cache->cur = 0;
0265     if (swap_slot_cache_active)
0266         cache->nr = get_swap_pages(SWAP_SLOTS_CACHE_SIZE,
0267                        cache->slots, 1);
0268 
0269     return cache->nr;
0270 }
0271 
0272 void free_swap_slot(swp_entry_t entry)
0273 {
0274     struct swap_slots_cache *cache;
0275 
0276     cache = raw_cpu_ptr(&swp_slots);
0277     if (likely(use_swap_slot_cache && cache->slots_ret)) {
0278         spin_lock_irq(&cache->free_lock);
0279         /* Swap slots cache may be deactivated before acquiring lock */
0280         if (!use_swap_slot_cache || !cache->slots_ret) {
0281             spin_unlock_irq(&cache->free_lock);
0282             goto direct_free;
0283         }
0284         if (cache->n_ret >= SWAP_SLOTS_CACHE_SIZE) {
0285             /*
0286              * Return slots to global pool.
0287              * The current swap_map value is SWAP_HAS_CACHE.
0288              * Set it to 0 to indicate it is available for
0289              * allocation in global pool
0290              */
0291             swapcache_free_entries(cache->slots_ret, cache->n_ret);
0292             cache->n_ret = 0;
0293         }
0294         cache->slots_ret[cache->n_ret++] = entry;
0295         spin_unlock_irq(&cache->free_lock);
0296     } else {
0297 direct_free:
0298         swapcache_free_entries(&entry, 1);
0299     }
0300 }
0301 
0302 swp_entry_t folio_alloc_swap(struct folio *folio)
0303 {
0304     swp_entry_t entry;
0305     struct swap_slots_cache *cache;
0306 
0307     entry.val = 0;
0308 
0309     if (folio_test_large(folio)) {
0310         if (IS_ENABLED(CONFIG_THP_SWAP) && arch_thp_swp_supported())
0311             get_swap_pages(1, &entry, folio_nr_pages(folio));
0312         goto out;
0313     }
0314 
0315     /*
0316      * Preemption is allowed here, because we may sleep
0317      * in refill_swap_slots_cache().  But it is safe, because
0318      * accesses to the per-CPU data structure are protected by the
0319      * mutex cache->alloc_lock.
0320      *
0321      * The alloc path here does not touch cache->slots_ret
0322      * so cache->free_lock is not taken.
0323      */
0324     cache = raw_cpu_ptr(&swp_slots);
0325 
0326     if (likely(check_cache_active() && cache->slots)) {
0327         mutex_lock(&cache->alloc_lock);
0328         if (cache->slots) {
0329 repeat:
0330             if (cache->nr) {
0331                 entry = cache->slots[cache->cur];
0332                 cache->slots[cache->cur++].val = 0;
0333                 cache->nr--;
0334             } else if (refill_swap_slots_cache(cache)) {
0335                 goto repeat;
0336             }
0337         }
0338         mutex_unlock(&cache->alloc_lock);
0339         if (entry.val)
0340             goto out;
0341     }
0342 
0343     get_swap_pages(1, &entry, 1);
0344 out:
0345     if (mem_cgroup_try_charge_swap(folio, entry)) {
0346         put_swap_page(&folio->page, entry);
0347         entry.val = 0;
0348     }
0349     return entry;
0350 }