Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  * zswap.c - zswap driver file
0004  *
0005  * zswap is a backend for frontswap that takes pages that are in the process
0006  * of being swapped out and attempts to compress and store them in a
0007  * RAM-based memory pool.  This can result in a significant I/O reduction on
0008  * the swap device and, in the case where decompressing from RAM is faster
0009  * than reading from the swap device, can also improve workload performance.
0010  *
0011  * Copyright (C) 2012  Seth Jennings <sjenning@linux.vnet.ibm.com>
0012 */
0013 
0014 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
0015 
0016 #include <linux/module.h>
0017 #include <linux/cpu.h>
0018 #include <linux/highmem.h>
0019 #include <linux/slab.h>
0020 #include <linux/spinlock.h>
0021 #include <linux/types.h>
0022 #include <linux/atomic.h>
0023 #include <linux/frontswap.h>
0024 #include <linux/rbtree.h>
0025 #include <linux/swap.h>
0026 #include <linux/crypto.h>
0027 #include <linux/scatterlist.h>
0028 #include <linux/mempool.h>
0029 #include <linux/zpool.h>
0030 #include <crypto/acompress.h>
0031 
0032 #include <linux/mm_types.h>
0033 #include <linux/page-flags.h>
0034 #include <linux/swapops.h>
0035 #include <linux/writeback.h>
0036 #include <linux/pagemap.h>
0037 #include <linux/workqueue.h>
0038 
0039 #include "swap.h"
0040 
0041 /*********************************
0042 * statistics
0043 **********************************/
0044 /* Total bytes used by the compressed storage */
0045 u64 zswap_pool_total_size;
0046 /* The number of compressed pages currently stored in zswap */
0047 atomic_t zswap_stored_pages = ATOMIC_INIT(0);
0048 /* The number of same-value filled pages currently stored in zswap */
0049 static atomic_t zswap_same_filled_pages = ATOMIC_INIT(0);
0050 
0051 /*
0052  * The statistics below are not protected from concurrent access for
0053  * performance reasons so they may not be a 100% accurate.  However,
0054  * they do provide useful information on roughly how many times a
0055  * certain event is occurring.
0056 */
0057 
0058 /* Pool limit was hit (see zswap_max_pool_percent) */
0059 static u64 zswap_pool_limit_hit;
0060 /* Pages written back when pool limit was reached */
0061 static u64 zswap_written_back_pages;
0062 /* Store failed due to a reclaim failure after pool limit was reached */
0063 static u64 zswap_reject_reclaim_fail;
0064 /* Compressed page was too big for the allocator to (optimally) store */
0065 static u64 zswap_reject_compress_poor;
0066 /* Store failed because underlying allocator could not get memory */
0067 static u64 zswap_reject_alloc_fail;
0068 /* Store failed because the entry metadata could not be allocated (rare) */
0069 static u64 zswap_reject_kmemcache_fail;
0070 /* Duplicate store was encountered (rare) */
0071 static u64 zswap_duplicate_entry;
0072 
0073 /* Shrinker work queue */
0074 static struct workqueue_struct *shrink_wq;
0075 /* Pool limit was hit, we need to calm down */
0076 static bool zswap_pool_reached_full;
0077 
0078 /*********************************
0079 * tunables
0080 **********************************/
0081 
0082 #define ZSWAP_PARAM_UNSET ""
0083 
0084 /* Enable/disable zswap */
0085 static bool zswap_enabled = IS_ENABLED(CONFIG_ZSWAP_DEFAULT_ON);
0086 static int zswap_enabled_param_set(const char *,
0087                    const struct kernel_param *);
0088 static const struct kernel_param_ops zswap_enabled_param_ops = {
0089     .set =      zswap_enabled_param_set,
0090     .get =      param_get_bool,
0091 };
0092 module_param_cb(enabled, &zswap_enabled_param_ops, &zswap_enabled, 0644);
0093 
0094 /* Crypto compressor to use */
0095 static char *zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT;
0096 static int zswap_compressor_param_set(const char *,
0097                       const struct kernel_param *);
0098 static const struct kernel_param_ops zswap_compressor_param_ops = {
0099     .set =      zswap_compressor_param_set,
0100     .get =      param_get_charp,
0101     .free =     param_free_charp,
0102 };
0103 module_param_cb(compressor, &zswap_compressor_param_ops,
0104         &zswap_compressor, 0644);
0105 
0106 /* Compressed storage zpool to use */
0107 static char *zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT;
0108 static int zswap_zpool_param_set(const char *, const struct kernel_param *);
0109 static const struct kernel_param_ops zswap_zpool_param_ops = {
0110     .set =      zswap_zpool_param_set,
0111     .get =      param_get_charp,
0112     .free =     param_free_charp,
0113 };
0114 module_param_cb(zpool, &zswap_zpool_param_ops, &zswap_zpool_type, 0644);
0115 
0116 /* The maximum percentage of memory that the compressed pool can occupy */
0117 static unsigned int zswap_max_pool_percent = 20;
0118 module_param_named(max_pool_percent, zswap_max_pool_percent, uint, 0644);
0119 
0120 /* The threshold for accepting new pages after the max_pool_percent was hit */
0121 static unsigned int zswap_accept_thr_percent = 90; /* of max pool size */
0122 module_param_named(accept_threshold_percent, zswap_accept_thr_percent,
0123            uint, 0644);
0124 
0125 /*
0126  * Enable/disable handling same-value filled pages (enabled by default).
0127  * If disabled every page is considered non-same-value filled.
0128  */
0129 static bool zswap_same_filled_pages_enabled = true;
0130 module_param_named(same_filled_pages_enabled, zswap_same_filled_pages_enabled,
0131            bool, 0644);
0132 
0133 /* Enable/disable handling non-same-value filled pages (enabled by default) */
0134 static bool zswap_non_same_filled_pages_enabled = true;
0135 module_param_named(non_same_filled_pages_enabled, zswap_non_same_filled_pages_enabled,
0136            bool, 0644);
0137 
0138 /*********************************
0139 * data structures
0140 **********************************/
0141 
0142 struct crypto_acomp_ctx {
0143     struct crypto_acomp *acomp;
0144     struct acomp_req *req;
0145     struct crypto_wait wait;
0146     u8 *dstmem;
0147     struct mutex *mutex;
0148 };
0149 
0150 struct zswap_pool {
0151     struct zpool *zpool;
0152     struct crypto_acomp_ctx __percpu *acomp_ctx;
0153     struct kref kref;
0154     struct list_head list;
0155     struct work_struct release_work;
0156     struct work_struct shrink_work;
0157     struct hlist_node node;
0158     char tfm_name[CRYPTO_MAX_ALG_NAME];
0159 };
0160 
0161 /*
0162  * struct zswap_entry
0163  *
0164  * This structure contains the metadata for tracking a single compressed
0165  * page within zswap.
0166  *
0167  * rbnode - links the entry into red-black tree for the appropriate swap type
0168  * offset - the swap offset for the entry.  Index into the red-black tree.
0169  * refcount - the number of outstanding reference to the entry. This is needed
0170  *            to protect against premature freeing of the entry by code
0171  *            concurrent calls to load, invalidate, and writeback.  The lock
0172  *            for the zswap_tree structure that contains the entry must
0173  *            be held while changing the refcount.  Since the lock must
0174  *            be held, there is no reason to also make refcount atomic.
0175  * length - the length in bytes of the compressed page data.  Needed during
0176  *          decompression. For a same value filled page length is 0.
0177  * pool - the zswap_pool the entry's data is in
0178  * handle - zpool allocation handle that stores the compressed page data
0179  * value - value of the same-value filled pages which have same content
0180  */
0181 struct zswap_entry {
0182     struct rb_node rbnode;
0183     pgoff_t offset;
0184     int refcount;
0185     unsigned int length;
0186     struct zswap_pool *pool;
0187     union {
0188         unsigned long handle;
0189         unsigned long value;
0190     };
0191     struct obj_cgroup *objcg;
0192 };
0193 
0194 struct zswap_header {
0195     swp_entry_t swpentry;
0196 };
0197 
0198 /*
0199  * The tree lock in the zswap_tree struct protects a few things:
0200  * - the rbtree
0201  * - the refcount field of each entry in the tree
0202  */
0203 struct zswap_tree {
0204     struct rb_root rbroot;
0205     spinlock_t lock;
0206 };
0207 
0208 static struct zswap_tree *zswap_trees[MAX_SWAPFILES];
0209 
0210 /* RCU-protected iteration */
0211 static LIST_HEAD(zswap_pools);
0212 /* protects zswap_pools list modification */
0213 static DEFINE_SPINLOCK(zswap_pools_lock);
0214 /* pool counter to provide unique names to zpool */
0215 static atomic_t zswap_pools_count = ATOMIC_INIT(0);
0216 
0217 /* used by param callback function */
0218 static bool zswap_init_started;
0219 
0220 /* fatal error during init */
0221 static bool zswap_init_failed;
0222 
0223 /* init completed, but couldn't create the initial pool */
0224 static bool zswap_has_pool;
0225 
0226 /*********************************
0227 * helpers and fwd declarations
0228 **********************************/
0229 
0230 #define zswap_pool_debug(msg, p)                \
0231     pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name,     \
0232          zpool_get_type((p)->zpool))
0233 
0234 static int zswap_writeback_entry(struct zpool *pool, unsigned long handle);
0235 static int zswap_pool_get(struct zswap_pool *pool);
0236 static void zswap_pool_put(struct zswap_pool *pool);
0237 
0238 static const struct zpool_ops zswap_zpool_ops = {
0239     .evict = zswap_writeback_entry
0240 };
0241 
0242 static bool zswap_is_full(void)
0243 {
0244     return totalram_pages() * zswap_max_pool_percent / 100 <
0245             DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
0246 }
0247 
0248 static bool zswap_can_accept(void)
0249 {
0250     return totalram_pages() * zswap_accept_thr_percent / 100 *
0251                 zswap_max_pool_percent / 100 >
0252             DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
0253 }
0254 
0255 static void zswap_update_total_size(void)
0256 {
0257     struct zswap_pool *pool;
0258     u64 total = 0;
0259 
0260     rcu_read_lock();
0261 
0262     list_for_each_entry_rcu(pool, &zswap_pools, list)
0263         total += zpool_get_total_size(pool->zpool);
0264 
0265     rcu_read_unlock();
0266 
0267     zswap_pool_total_size = total;
0268 }
0269 
0270 /*********************************
0271 * zswap entry functions
0272 **********************************/
0273 static struct kmem_cache *zswap_entry_cache;
0274 
0275 static int __init zswap_entry_cache_create(void)
0276 {
0277     zswap_entry_cache = KMEM_CACHE(zswap_entry, 0);
0278     return zswap_entry_cache == NULL;
0279 }
0280 
0281 static void __init zswap_entry_cache_destroy(void)
0282 {
0283     kmem_cache_destroy(zswap_entry_cache);
0284 }
0285 
0286 static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp)
0287 {
0288     struct zswap_entry *entry;
0289     entry = kmem_cache_alloc(zswap_entry_cache, gfp);
0290     if (!entry)
0291         return NULL;
0292     entry->refcount = 1;
0293     RB_CLEAR_NODE(&entry->rbnode);
0294     return entry;
0295 }
0296 
0297 static void zswap_entry_cache_free(struct zswap_entry *entry)
0298 {
0299     kmem_cache_free(zswap_entry_cache, entry);
0300 }
0301 
0302 /*********************************
0303 * rbtree functions
0304 **********************************/
0305 static struct zswap_entry *zswap_rb_search(struct rb_root *root, pgoff_t offset)
0306 {
0307     struct rb_node *node = root->rb_node;
0308     struct zswap_entry *entry;
0309 
0310     while (node) {
0311         entry = rb_entry(node, struct zswap_entry, rbnode);
0312         if (entry->offset > offset)
0313             node = node->rb_left;
0314         else if (entry->offset < offset)
0315             node = node->rb_right;
0316         else
0317             return entry;
0318     }
0319     return NULL;
0320 }
0321 
0322 /*
0323  * In the case that a entry with the same offset is found, a pointer to
0324  * the existing entry is stored in dupentry and the function returns -EEXIST
0325  */
0326 static int zswap_rb_insert(struct rb_root *root, struct zswap_entry *entry,
0327             struct zswap_entry **dupentry)
0328 {
0329     struct rb_node **link = &root->rb_node, *parent = NULL;
0330     struct zswap_entry *myentry;
0331 
0332     while (*link) {
0333         parent = *link;
0334         myentry = rb_entry(parent, struct zswap_entry, rbnode);
0335         if (myentry->offset > entry->offset)
0336             link = &(*link)->rb_left;
0337         else if (myentry->offset < entry->offset)
0338             link = &(*link)->rb_right;
0339         else {
0340             *dupentry = myentry;
0341             return -EEXIST;
0342         }
0343     }
0344     rb_link_node(&entry->rbnode, parent, link);
0345     rb_insert_color(&entry->rbnode, root);
0346     return 0;
0347 }
0348 
0349 static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry)
0350 {
0351     if (!RB_EMPTY_NODE(&entry->rbnode)) {
0352         rb_erase(&entry->rbnode, root);
0353         RB_CLEAR_NODE(&entry->rbnode);
0354     }
0355 }
0356 
0357 /*
0358  * Carries out the common pattern of freeing and entry's zpool allocation,
0359  * freeing the entry itself, and decrementing the number of stored pages.
0360  */
0361 static void zswap_free_entry(struct zswap_entry *entry)
0362 {
0363     if (entry->objcg) {
0364         obj_cgroup_uncharge_zswap(entry->objcg, entry->length);
0365         obj_cgroup_put(entry->objcg);
0366     }
0367     if (!entry->length)
0368         atomic_dec(&zswap_same_filled_pages);
0369     else {
0370         zpool_free(entry->pool->zpool, entry->handle);
0371         zswap_pool_put(entry->pool);
0372     }
0373     zswap_entry_cache_free(entry);
0374     atomic_dec(&zswap_stored_pages);
0375     zswap_update_total_size();
0376 }
0377 
0378 /* caller must hold the tree lock */
0379 static void zswap_entry_get(struct zswap_entry *entry)
0380 {
0381     entry->refcount++;
0382 }
0383 
0384 /* caller must hold the tree lock
0385 * remove from the tree and free it, if nobody reference the entry
0386 */
0387 static void zswap_entry_put(struct zswap_tree *tree,
0388             struct zswap_entry *entry)
0389 {
0390     int refcount = --entry->refcount;
0391 
0392     BUG_ON(refcount < 0);
0393     if (refcount == 0) {
0394         zswap_rb_erase(&tree->rbroot, entry);
0395         zswap_free_entry(entry);
0396     }
0397 }
0398 
0399 /* caller must hold the tree lock */
0400 static struct zswap_entry *zswap_entry_find_get(struct rb_root *root,
0401                 pgoff_t offset)
0402 {
0403     struct zswap_entry *entry;
0404 
0405     entry = zswap_rb_search(root, offset);
0406     if (entry)
0407         zswap_entry_get(entry);
0408 
0409     return entry;
0410 }
0411 
0412 /*********************************
0413 * per-cpu code
0414 **********************************/
0415 static DEFINE_PER_CPU(u8 *, zswap_dstmem);
0416 /*
0417  * If users dynamically change the zpool type and compressor at runtime, i.e.
0418  * zswap is running, zswap can have more than one zpool on one cpu, but they
0419  * are sharing dtsmem. So we need this mutex to be per-cpu.
0420  */
0421 static DEFINE_PER_CPU(struct mutex *, zswap_mutex);
0422 
0423 static int zswap_dstmem_prepare(unsigned int cpu)
0424 {
0425     struct mutex *mutex;
0426     u8 *dst;
0427 
0428     dst = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
0429     if (!dst)
0430         return -ENOMEM;
0431 
0432     mutex = kmalloc_node(sizeof(*mutex), GFP_KERNEL, cpu_to_node(cpu));
0433     if (!mutex) {
0434         kfree(dst);
0435         return -ENOMEM;
0436     }
0437 
0438     mutex_init(mutex);
0439     per_cpu(zswap_dstmem, cpu) = dst;
0440     per_cpu(zswap_mutex, cpu) = mutex;
0441     return 0;
0442 }
0443 
0444 static int zswap_dstmem_dead(unsigned int cpu)
0445 {
0446     struct mutex *mutex;
0447     u8 *dst;
0448 
0449     mutex = per_cpu(zswap_mutex, cpu);
0450     kfree(mutex);
0451     per_cpu(zswap_mutex, cpu) = NULL;
0452 
0453     dst = per_cpu(zswap_dstmem, cpu);
0454     kfree(dst);
0455     per_cpu(zswap_dstmem, cpu) = NULL;
0456 
0457     return 0;
0458 }
0459 
0460 static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
0461 {
0462     struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
0463     struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu);
0464     struct crypto_acomp *acomp;
0465     struct acomp_req *req;
0466 
0467     acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu));
0468     if (IS_ERR(acomp)) {
0469         pr_err("could not alloc crypto acomp %s : %ld\n",
0470                 pool->tfm_name, PTR_ERR(acomp));
0471         return PTR_ERR(acomp);
0472     }
0473     acomp_ctx->acomp = acomp;
0474 
0475     req = acomp_request_alloc(acomp_ctx->acomp);
0476     if (!req) {
0477         pr_err("could not alloc crypto acomp_request %s\n",
0478                pool->tfm_name);
0479         crypto_free_acomp(acomp_ctx->acomp);
0480         return -ENOMEM;
0481     }
0482     acomp_ctx->req = req;
0483 
0484     crypto_init_wait(&acomp_ctx->wait);
0485     /*
0486      * if the backend of acomp is async zip, crypto_req_done() will wakeup
0487      * crypto_wait_req(); if the backend of acomp is scomp, the callback
0488      * won't be called, crypto_wait_req() will return without blocking.
0489      */
0490     acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
0491                    crypto_req_done, &acomp_ctx->wait);
0492 
0493     acomp_ctx->mutex = per_cpu(zswap_mutex, cpu);
0494     acomp_ctx->dstmem = per_cpu(zswap_dstmem, cpu);
0495 
0496     return 0;
0497 }
0498 
0499 static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node)
0500 {
0501     struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
0502     struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu);
0503 
0504     if (!IS_ERR_OR_NULL(acomp_ctx)) {
0505         if (!IS_ERR_OR_NULL(acomp_ctx->req))
0506             acomp_request_free(acomp_ctx->req);
0507         if (!IS_ERR_OR_NULL(acomp_ctx->acomp))
0508             crypto_free_acomp(acomp_ctx->acomp);
0509     }
0510 
0511     return 0;
0512 }
0513 
0514 /*********************************
0515 * pool functions
0516 **********************************/
0517 
0518 static struct zswap_pool *__zswap_pool_current(void)
0519 {
0520     struct zswap_pool *pool;
0521 
0522     pool = list_first_or_null_rcu(&zswap_pools, typeof(*pool), list);
0523     WARN_ONCE(!pool && zswap_has_pool,
0524           "%s: no page storage pool!\n", __func__);
0525 
0526     return pool;
0527 }
0528 
0529 static struct zswap_pool *zswap_pool_current(void)
0530 {
0531     assert_spin_locked(&zswap_pools_lock);
0532 
0533     return __zswap_pool_current();
0534 }
0535 
0536 static struct zswap_pool *zswap_pool_current_get(void)
0537 {
0538     struct zswap_pool *pool;
0539 
0540     rcu_read_lock();
0541 
0542     pool = __zswap_pool_current();
0543     if (!zswap_pool_get(pool))
0544         pool = NULL;
0545 
0546     rcu_read_unlock();
0547 
0548     return pool;
0549 }
0550 
0551 static struct zswap_pool *zswap_pool_last_get(void)
0552 {
0553     struct zswap_pool *pool, *last = NULL;
0554 
0555     rcu_read_lock();
0556 
0557     list_for_each_entry_rcu(pool, &zswap_pools, list)
0558         last = pool;
0559     WARN_ONCE(!last && zswap_has_pool,
0560           "%s: no page storage pool!\n", __func__);
0561     if (!zswap_pool_get(last))
0562         last = NULL;
0563 
0564     rcu_read_unlock();
0565 
0566     return last;
0567 }
0568 
0569 /* type and compressor must be null-terminated */
0570 static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
0571 {
0572     struct zswap_pool *pool;
0573 
0574     assert_spin_locked(&zswap_pools_lock);
0575 
0576     list_for_each_entry_rcu(pool, &zswap_pools, list) {
0577         if (strcmp(pool->tfm_name, compressor))
0578             continue;
0579         if (strcmp(zpool_get_type(pool->zpool), type))
0580             continue;
0581         /* if we can't get it, it's about to be destroyed */
0582         if (!zswap_pool_get(pool))
0583             continue;
0584         return pool;
0585     }
0586 
0587     return NULL;
0588 }
0589 
0590 static void shrink_worker(struct work_struct *w)
0591 {
0592     struct zswap_pool *pool = container_of(w, typeof(*pool),
0593                         shrink_work);
0594 
0595     if (zpool_shrink(pool->zpool, 1, NULL))
0596         zswap_reject_reclaim_fail++;
0597     zswap_pool_put(pool);
0598 }
0599 
0600 static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
0601 {
0602     struct zswap_pool *pool;
0603     char name[38]; /* 'zswap' + 32 char (max) num + \0 */
0604     gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
0605     int ret;
0606 
0607     if (!zswap_has_pool) {
0608         /* if either are unset, pool initialization failed, and we
0609          * need both params to be set correctly before trying to
0610          * create a pool.
0611          */
0612         if (!strcmp(type, ZSWAP_PARAM_UNSET))
0613             return NULL;
0614         if (!strcmp(compressor, ZSWAP_PARAM_UNSET))
0615             return NULL;
0616     }
0617 
0618     pool = kzalloc(sizeof(*pool), GFP_KERNEL);
0619     if (!pool)
0620         return NULL;
0621 
0622     /* unique name for each pool specifically required by zsmalloc */
0623     snprintf(name, 38, "zswap%x", atomic_inc_return(&zswap_pools_count));
0624 
0625     pool->zpool = zpool_create_pool(type, name, gfp, &zswap_zpool_ops);
0626     if (!pool->zpool) {
0627         pr_err("%s zpool not available\n", type);
0628         goto error;
0629     }
0630     pr_debug("using %s zpool\n", zpool_get_type(pool->zpool));
0631 
0632     strscpy(pool->tfm_name, compressor, sizeof(pool->tfm_name));
0633 
0634     pool->acomp_ctx = alloc_percpu(*pool->acomp_ctx);
0635     if (!pool->acomp_ctx) {
0636         pr_err("percpu alloc failed\n");
0637         goto error;
0638     }
0639 
0640     ret = cpuhp_state_add_instance(CPUHP_MM_ZSWP_POOL_PREPARE,
0641                        &pool->node);
0642     if (ret)
0643         goto error;
0644     pr_debug("using %s compressor\n", pool->tfm_name);
0645 
0646     /* being the current pool takes 1 ref; this func expects the
0647      * caller to always add the new pool as the current pool
0648      */
0649     kref_init(&pool->kref);
0650     INIT_LIST_HEAD(&pool->list);
0651     INIT_WORK(&pool->shrink_work, shrink_worker);
0652 
0653     zswap_pool_debug("created", pool);
0654 
0655     return pool;
0656 
0657 error:
0658     if (pool->acomp_ctx)
0659         free_percpu(pool->acomp_ctx);
0660     if (pool->zpool)
0661         zpool_destroy_pool(pool->zpool);
0662     kfree(pool);
0663     return NULL;
0664 }
0665 
0666 static __init struct zswap_pool *__zswap_pool_create_fallback(void)
0667 {
0668     bool has_comp, has_zpool;
0669 
0670     has_comp = crypto_has_acomp(zswap_compressor, 0, 0);
0671     if (!has_comp && strcmp(zswap_compressor,
0672                 CONFIG_ZSWAP_COMPRESSOR_DEFAULT)) {
0673         pr_err("compressor %s not available, using default %s\n",
0674                zswap_compressor, CONFIG_ZSWAP_COMPRESSOR_DEFAULT);
0675         param_free_charp(&zswap_compressor);
0676         zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT;
0677         has_comp = crypto_has_acomp(zswap_compressor, 0, 0);
0678     }
0679     if (!has_comp) {
0680         pr_err("default compressor %s not available\n",
0681                zswap_compressor);
0682         param_free_charp(&zswap_compressor);
0683         zswap_compressor = ZSWAP_PARAM_UNSET;
0684     }
0685 
0686     has_zpool = zpool_has_pool(zswap_zpool_type);
0687     if (!has_zpool && strcmp(zswap_zpool_type,
0688                  CONFIG_ZSWAP_ZPOOL_DEFAULT)) {
0689         pr_err("zpool %s not available, using default %s\n",
0690                zswap_zpool_type, CONFIG_ZSWAP_ZPOOL_DEFAULT);
0691         param_free_charp(&zswap_zpool_type);
0692         zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT;
0693         has_zpool = zpool_has_pool(zswap_zpool_type);
0694     }
0695     if (!has_zpool) {
0696         pr_err("default zpool %s not available\n",
0697                zswap_zpool_type);
0698         param_free_charp(&zswap_zpool_type);
0699         zswap_zpool_type = ZSWAP_PARAM_UNSET;
0700     }
0701 
0702     if (!has_comp || !has_zpool)
0703         return NULL;
0704 
0705     return zswap_pool_create(zswap_zpool_type, zswap_compressor);
0706 }
0707 
0708 static void zswap_pool_destroy(struct zswap_pool *pool)
0709 {
0710     zswap_pool_debug("destroying", pool);
0711 
0712     cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node);
0713     free_percpu(pool->acomp_ctx);
0714     zpool_destroy_pool(pool->zpool);
0715     kfree(pool);
0716 }
0717 
0718 static int __must_check zswap_pool_get(struct zswap_pool *pool)
0719 {
0720     if (!pool)
0721         return 0;
0722 
0723     return kref_get_unless_zero(&pool->kref);
0724 }
0725 
0726 static void __zswap_pool_release(struct work_struct *work)
0727 {
0728     struct zswap_pool *pool = container_of(work, typeof(*pool),
0729                         release_work);
0730 
0731     synchronize_rcu();
0732 
0733     /* nobody should have been able to get a kref... */
0734     WARN_ON(kref_get_unless_zero(&pool->kref));
0735 
0736     /* pool is now off zswap_pools list and has no references. */
0737     zswap_pool_destroy(pool);
0738 }
0739 
0740 static void __zswap_pool_empty(struct kref *kref)
0741 {
0742     struct zswap_pool *pool;
0743 
0744     pool = container_of(kref, typeof(*pool), kref);
0745 
0746     spin_lock(&zswap_pools_lock);
0747 
0748     WARN_ON(pool == zswap_pool_current());
0749 
0750     list_del_rcu(&pool->list);
0751 
0752     INIT_WORK(&pool->release_work, __zswap_pool_release);
0753     schedule_work(&pool->release_work);
0754 
0755     spin_unlock(&zswap_pools_lock);
0756 }
0757 
0758 static void zswap_pool_put(struct zswap_pool *pool)
0759 {
0760     kref_put(&pool->kref, __zswap_pool_empty);
0761 }
0762 
0763 /*********************************
0764 * param callbacks
0765 **********************************/
0766 
0767 /* val must be a null-terminated string */
0768 static int __zswap_param_set(const char *val, const struct kernel_param *kp,
0769                  char *type, char *compressor)
0770 {
0771     struct zswap_pool *pool, *put_pool = NULL;
0772     char *s = strstrip((char *)val);
0773     int ret;
0774 
0775     if (zswap_init_failed) {
0776         pr_err("can't set param, initialization failed\n");
0777         return -ENODEV;
0778     }
0779 
0780     /* no change required */
0781     if (!strcmp(s, *(char **)kp->arg) && zswap_has_pool)
0782         return 0;
0783 
0784     /* if this is load-time (pre-init) param setting,
0785      * don't create a pool; that's done during init.
0786      */
0787     if (!zswap_init_started)
0788         return param_set_charp(s, kp);
0789 
0790     if (!type) {
0791         if (!zpool_has_pool(s)) {
0792             pr_err("zpool %s not available\n", s);
0793             return -ENOENT;
0794         }
0795         type = s;
0796     } else if (!compressor) {
0797         if (!crypto_has_acomp(s, 0, 0)) {
0798             pr_err("compressor %s not available\n", s);
0799             return -ENOENT;
0800         }
0801         compressor = s;
0802     } else {
0803         WARN_ON(1);
0804         return -EINVAL;
0805     }
0806 
0807     spin_lock(&zswap_pools_lock);
0808 
0809     pool = zswap_pool_find_get(type, compressor);
0810     if (pool) {
0811         zswap_pool_debug("using existing", pool);
0812         WARN_ON(pool == zswap_pool_current());
0813         list_del_rcu(&pool->list);
0814     }
0815 
0816     spin_unlock(&zswap_pools_lock);
0817 
0818     if (!pool)
0819         pool = zswap_pool_create(type, compressor);
0820 
0821     if (pool)
0822         ret = param_set_charp(s, kp);
0823     else
0824         ret = -EINVAL;
0825 
0826     spin_lock(&zswap_pools_lock);
0827 
0828     if (!ret) {
0829         put_pool = zswap_pool_current();
0830         list_add_rcu(&pool->list, &zswap_pools);
0831         zswap_has_pool = true;
0832     } else if (pool) {
0833         /* add the possibly pre-existing pool to the end of the pools
0834          * list; if it's new (and empty) then it'll be removed and
0835          * destroyed by the put after we drop the lock
0836          */
0837         list_add_tail_rcu(&pool->list, &zswap_pools);
0838         put_pool = pool;
0839     }
0840 
0841     spin_unlock(&zswap_pools_lock);
0842 
0843     if (!zswap_has_pool && !pool) {
0844         /* if initial pool creation failed, and this pool creation also
0845          * failed, maybe both compressor and zpool params were bad.
0846          * Allow changing this param, so pool creation will succeed
0847          * when the other param is changed. We already verified this
0848          * param is ok in the zpool_has_pool() or crypto_has_acomp()
0849          * checks above.
0850          */
0851         ret = param_set_charp(s, kp);
0852     }
0853 
0854     /* drop the ref from either the old current pool,
0855      * or the new pool we failed to add
0856      */
0857     if (put_pool)
0858         zswap_pool_put(put_pool);
0859 
0860     return ret;
0861 }
0862 
0863 static int zswap_compressor_param_set(const char *val,
0864                       const struct kernel_param *kp)
0865 {
0866     return __zswap_param_set(val, kp, zswap_zpool_type, NULL);
0867 }
0868 
0869 static int zswap_zpool_param_set(const char *val,
0870                  const struct kernel_param *kp)
0871 {
0872     return __zswap_param_set(val, kp, NULL, zswap_compressor);
0873 }
0874 
0875 static int zswap_enabled_param_set(const char *val,
0876                    const struct kernel_param *kp)
0877 {
0878     if (zswap_init_failed) {
0879         pr_err("can't enable, initialization failed\n");
0880         return -ENODEV;
0881     }
0882     if (!zswap_has_pool && zswap_init_started) {
0883         pr_err("can't enable, no pool configured\n");
0884         return -ENODEV;
0885     }
0886 
0887     return param_set_bool(val, kp);
0888 }
0889 
0890 /*********************************
0891 * writeback code
0892 **********************************/
0893 /* return enum for zswap_get_swap_cache_page */
0894 enum zswap_get_swap_ret {
0895     ZSWAP_SWAPCACHE_NEW,
0896     ZSWAP_SWAPCACHE_EXIST,
0897     ZSWAP_SWAPCACHE_FAIL,
0898 };
0899 
0900 /*
0901  * zswap_get_swap_cache_page
0902  *
0903  * This is an adaption of read_swap_cache_async()
0904  *
0905  * This function tries to find a page with the given swap entry
0906  * in the swapper_space address space (the swap cache).  If the page
0907  * is found, it is returned in retpage.  Otherwise, a page is allocated,
0908  * added to the swap cache, and returned in retpage.
0909  *
0910  * If success, the swap cache page is returned in retpage
0911  * Returns ZSWAP_SWAPCACHE_EXIST if page was already in the swap cache
0912  * Returns ZSWAP_SWAPCACHE_NEW if the new page needs to be populated,
0913  *     the new page is added to swapcache and locked
0914  * Returns ZSWAP_SWAPCACHE_FAIL on error
0915  */
0916 static int zswap_get_swap_cache_page(swp_entry_t entry,
0917                 struct page **retpage)
0918 {
0919     bool page_was_allocated;
0920 
0921     *retpage = __read_swap_cache_async(entry, GFP_KERNEL,
0922             NULL, 0, &page_was_allocated);
0923     if (page_was_allocated)
0924         return ZSWAP_SWAPCACHE_NEW;
0925     if (!*retpage)
0926         return ZSWAP_SWAPCACHE_FAIL;
0927     return ZSWAP_SWAPCACHE_EXIST;
0928 }
0929 
0930 /*
0931  * Attempts to free an entry by adding a page to the swap cache,
0932  * decompressing the entry data into the page, and issuing a
0933  * bio write to write the page back to the swap device.
0934  *
0935  * This can be thought of as a "resumed writeback" of the page
0936  * to the swap device.  We are basically resuming the same swap
0937  * writeback path that was intercepted with the frontswap_store()
0938  * in the first place.  After the page has been decompressed into
0939  * the swap cache, the compressed version stored by zswap can be
0940  * freed.
0941  */
0942 static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
0943 {
0944     struct zswap_header *zhdr;
0945     swp_entry_t swpentry;
0946     struct zswap_tree *tree;
0947     pgoff_t offset;
0948     struct zswap_entry *entry;
0949     struct page *page;
0950     struct scatterlist input, output;
0951     struct crypto_acomp_ctx *acomp_ctx;
0952 
0953     u8 *src, *tmp = NULL;
0954     unsigned int dlen;
0955     int ret;
0956     struct writeback_control wbc = {
0957         .sync_mode = WB_SYNC_NONE,
0958     };
0959 
0960     if (!zpool_can_sleep_mapped(pool)) {
0961         tmp = kmalloc(PAGE_SIZE, GFP_ATOMIC);
0962         if (!tmp)
0963             return -ENOMEM;
0964     }
0965 
0966     /* extract swpentry from data */
0967     zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO);
0968     swpentry = zhdr->swpentry; /* here */
0969     tree = zswap_trees[swp_type(swpentry)];
0970     offset = swp_offset(swpentry);
0971 
0972     /* find and ref zswap entry */
0973     spin_lock(&tree->lock);
0974     entry = zswap_entry_find_get(&tree->rbroot, offset);
0975     if (!entry) {
0976         /* entry was invalidated */
0977         spin_unlock(&tree->lock);
0978         zpool_unmap_handle(pool, handle);
0979         kfree(tmp);
0980         return 0;
0981     }
0982     spin_unlock(&tree->lock);
0983     BUG_ON(offset != entry->offset);
0984 
0985     src = (u8 *)zhdr + sizeof(struct zswap_header);
0986     if (!zpool_can_sleep_mapped(pool)) {
0987         memcpy(tmp, src, entry->length);
0988         src = tmp;
0989         zpool_unmap_handle(pool, handle);
0990     }
0991 
0992     /* try to allocate swap cache page */
0993     switch (zswap_get_swap_cache_page(swpentry, &page)) {
0994     case ZSWAP_SWAPCACHE_FAIL: /* no memory or invalidate happened */
0995         ret = -ENOMEM;
0996         goto fail;
0997 
0998     case ZSWAP_SWAPCACHE_EXIST:
0999         /* page is already in the swap cache, ignore for now */
1000         put_page(page);
1001         ret = -EEXIST;
1002         goto fail;
1003 
1004     case ZSWAP_SWAPCACHE_NEW: /* page is locked */
1005         /* decompress */
1006         acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
1007         dlen = PAGE_SIZE;
1008 
1009         mutex_lock(acomp_ctx->mutex);
1010         sg_init_one(&input, src, entry->length);
1011         sg_init_table(&output, 1);
1012         sg_set_page(&output, page, PAGE_SIZE, 0);
1013         acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, dlen);
1014         ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait);
1015         dlen = acomp_ctx->req->dlen;
1016         mutex_unlock(acomp_ctx->mutex);
1017 
1018         BUG_ON(ret);
1019         BUG_ON(dlen != PAGE_SIZE);
1020 
1021         /* page is up to date */
1022         SetPageUptodate(page);
1023     }
1024 
1025     /* move it to the tail of the inactive list after end_writeback */
1026     SetPageReclaim(page);
1027 
1028     /* start writeback */
1029     __swap_writepage(page, &wbc, end_swap_bio_write);
1030     put_page(page);
1031     zswap_written_back_pages++;
1032 
1033     spin_lock(&tree->lock);
1034     /* drop local reference */
1035     zswap_entry_put(tree, entry);
1036 
1037     /*
1038     * There are two possible situations for entry here:
1039     * (1) refcount is 1(normal case),  entry is valid and on the tree
1040     * (2) refcount is 0, entry is freed and not on the tree
1041     *     because invalidate happened during writeback
1042     *  search the tree and free the entry if find entry
1043     */
1044     if (entry == zswap_rb_search(&tree->rbroot, offset))
1045         zswap_entry_put(tree, entry);
1046     spin_unlock(&tree->lock);
1047 
1048     goto end;
1049 
1050     /*
1051     * if we get here due to ZSWAP_SWAPCACHE_EXIST
1052     * a load may be happening concurrently.
1053     * it is safe and okay to not free the entry.
1054     * if we free the entry in the following put
1055     * it is also okay to return !0
1056     */
1057 fail:
1058     spin_lock(&tree->lock);
1059     zswap_entry_put(tree, entry);
1060     spin_unlock(&tree->lock);
1061 
1062 end:
1063     if (zpool_can_sleep_mapped(pool))
1064         zpool_unmap_handle(pool, handle);
1065     else
1066         kfree(tmp);
1067 
1068     return ret;
1069 }
1070 
1071 static int zswap_is_page_same_filled(void *ptr, unsigned long *value)
1072 {
1073     unsigned int pos;
1074     unsigned long *page;
1075 
1076     page = (unsigned long *)ptr;
1077     for (pos = 1; pos < PAGE_SIZE / sizeof(*page); pos++) {
1078         if (page[pos] != page[0])
1079             return 0;
1080     }
1081     *value = page[0];
1082     return 1;
1083 }
1084 
1085 static void zswap_fill_page(void *ptr, unsigned long value)
1086 {
1087     unsigned long *page;
1088 
1089     page = (unsigned long *)ptr;
1090     memset_l(page, value, PAGE_SIZE / sizeof(unsigned long));
1091 }
1092 
1093 /*********************************
1094 * frontswap hooks
1095 **********************************/
1096 /* attempts to compress and store an single page */
1097 static int zswap_frontswap_store(unsigned type, pgoff_t offset,
1098                 struct page *page)
1099 {
1100     struct zswap_tree *tree = zswap_trees[type];
1101     struct zswap_entry *entry, *dupentry;
1102     struct scatterlist input, output;
1103     struct crypto_acomp_ctx *acomp_ctx;
1104     struct obj_cgroup *objcg = NULL;
1105     struct zswap_pool *pool;
1106     int ret;
1107     unsigned int hlen, dlen = PAGE_SIZE;
1108     unsigned long handle, value;
1109     char *buf;
1110     u8 *src, *dst;
1111     struct zswap_header zhdr = { .swpentry = swp_entry(type, offset) };
1112     gfp_t gfp;
1113 
1114     /* THP isn't supported */
1115     if (PageTransHuge(page)) {
1116         ret = -EINVAL;
1117         goto reject;
1118     }
1119 
1120     if (!zswap_enabled || !tree) {
1121         ret = -ENODEV;
1122         goto reject;
1123     }
1124 
1125     objcg = get_obj_cgroup_from_page(page);
1126     if (objcg && !obj_cgroup_may_zswap(objcg))
1127         goto shrink;
1128 
1129     /* reclaim space if needed */
1130     if (zswap_is_full()) {
1131         zswap_pool_limit_hit++;
1132         zswap_pool_reached_full = true;
1133         goto shrink;
1134     }
1135 
1136     if (zswap_pool_reached_full) {
1137            if (!zswap_can_accept()) {
1138             ret = -ENOMEM;
1139             goto reject;
1140         } else
1141             zswap_pool_reached_full = false;
1142     }
1143 
1144     /* allocate entry */
1145     entry = zswap_entry_cache_alloc(GFP_KERNEL);
1146     if (!entry) {
1147         zswap_reject_kmemcache_fail++;
1148         ret = -ENOMEM;
1149         goto reject;
1150     }
1151 
1152     if (zswap_same_filled_pages_enabled) {
1153         src = kmap_atomic(page);
1154         if (zswap_is_page_same_filled(src, &value)) {
1155             kunmap_atomic(src);
1156             entry->offset = offset;
1157             entry->length = 0;
1158             entry->value = value;
1159             atomic_inc(&zswap_same_filled_pages);
1160             goto insert_entry;
1161         }
1162         kunmap_atomic(src);
1163     }
1164 
1165     if (!zswap_non_same_filled_pages_enabled) {
1166         ret = -EINVAL;
1167         goto freepage;
1168     }
1169 
1170     /* if entry is successfully added, it keeps the reference */
1171     entry->pool = zswap_pool_current_get();
1172     if (!entry->pool) {
1173         ret = -EINVAL;
1174         goto freepage;
1175     }
1176 
1177     /* compress */
1178     acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
1179 
1180     mutex_lock(acomp_ctx->mutex);
1181 
1182     dst = acomp_ctx->dstmem;
1183     sg_init_table(&input, 1);
1184     sg_set_page(&input, page, PAGE_SIZE, 0);
1185 
1186     /* zswap_dstmem is of size (PAGE_SIZE * 2). Reflect same in sg_list */
1187     sg_init_one(&output, dst, PAGE_SIZE * 2);
1188     acomp_request_set_params(acomp_ctx->req, &input, &output, PAGE_SIZE, dlen);
1189     /*
1190      * it maybe looks a little bit silly that we send an asynchronous request,
1191      * then wait for its completion synchronously. This makes the process look
1192      * synchronous in fact.
1193      * Theoretically, acomp supports users send multiple acomp requests in one
1194      * acomp instance, then get those requests done simultaneously. but in this
1195      * case, frontswap actually does store and load page by page, there is no
1196      * existing method to send the second page before the first page is done
1197      * in one thread doing frontswap.
1198      * but in different threads running on different cpu, we have different
1199      * acomp instance, so multiple threads can do (de)compression in parallel.
1200      */
1201     ret = crypto_wait_req(crypto_acomp_compress(acomp_ctx->req), &acomp_ctx->wait);
1202     dlen = acomp_ctx->req->dlen;
1203 
1204     if (ret) {
1205         ret = -EINVAL;
1206         goto put_dstmem;
1207     }
1208 
1209     /* store */
1210     hlen = zpool_evictable(entry->pool->zpool) ? sizeof(zhdr) : 0;
1211     gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
1212     if (zpool_malloc_support_movable(entry->pool->zpool))
1213         gfp |= __GFP_HIGHMEM | __GFP_MOVABLE;
1214     ret = zpool_malloc(entry->pool->zpool, hlen + dlen, gfp, &handle);
1215     if (ret == -ENOSPC) {
1216         zswap_reject_compress_poor++;
1217         goto put_dstmem;
1218     }
1219     if (ret) {
1220         zswap_reject_alloc_fail++;
1221         goto put_dstmem;
1222     }
1223     buf = zpool_map_handle(entry->pool->zpool, handle, ZPOOL_MM_WO);
1224     memcpy(buf, &zhdr, hlen);
1225     memcpy(buf + hlen, dst, dlen);
1226     zpool_unmap_handle(entry->pool->zpool, handle);
1227     mutex_unlock(acomp_ctx->mutex);
1228 
1229     /* populate entry */
1230     entry->offset = offset;
1231     entry->handle = handle;
1232     entry->length = dlen;
1233 
1234 insert_entry:
1235     entry->objcg = objcg;
1236     if (objcg) {
1237         obj_cgroup_charge_zswap(objcg, entry->length);
1238         /* Account before objcg ref is moved to tree */
1239         count_objcg_event(objcg, ZSWPOUT);
1240     }
1241 
1242     /* map */
1243     spin_lock(&tree->lock);
1244     do {
1245         ret = zswap_rb_insert(&tree->rbroot, entry, &dupentry);
1246         if (ret == -EEXIST) {
1247             zswap_duplicate_entry++;
1248             /* remove from rbtree */
1249             zswap_rb_erase(&tree->rbroot, dupentry);
1250             zswap_entry_put(tree, dupentry);
1251         }
1252     } while (ret == -EEXIST);
1253     spin_unlock(&tree->lock);
1254 
1255     /* update stats */
1256     atomic_inc(&zswap_stored_pages);
1257     zswap_update_total_size();
1258     count_vm_event(ZSWPOUT);
1259 
1260     return 0;
1261 
1262 put_dstmem:
1263     mutex_unlock(acomp_ctx->mutex);
1264     zswap_pool_put(entry->pool);
1265 freepage:
1266     zswap_entry_cache_free(entry);
1267 reject:
1268     if (objcg)
1269         obj_cgroup_put(objcg);
1270     return ret;
1271 
1272 shrink:
1273     pool = zswap_pool_last_get();
1274     if (pool)
1275         queue_work(shrink_wq, &pool->shrink_work);
1276     ret = -ENOMEM;
1277     goto reject;
1278 }
1279 
1280 /*
1281  * returns 0 if the page was successfully decompressed
1282  * return -1 on entry not found or error
1283 */
1284 static int zswap_frontswap_load(unsigned type, pgoff_t offset,
1285                 struct page *page)
1286 {
1287     struct zswap_tree *tree = zswap_trees[type];
1288     struct zswap_entry *entry;
1289     struct scatterlist input, output;
1290     struct crypto_acomp_ctx *acomp_ctx;
1291     u8 *src, *dst, *tmp;
1292     unsigned int dlen;
1293     int ret;
1294 
1295     /* find */
1296     spin_lock(&tree->lock);
1297     entry = zswap_entry_find_get(&tree->rbroot, offset);
1298     if (!entry) {
1299         /* entry was written back */
1300         spin_unlock(&tree->lock);
1301         return -1;
1302     }
1303     spin_unlock(&tree->lock);
1304 
1305     if (!entry->length) {
1306         dst = kmap_atomic(page);
1307         zswap_fill_page(dst, entry->value);
1308         kunmap_atomic(dst);
1309         ret = 0;
1310         goto stats;
1311     }
1312 
1313     if (!zpool_can_sleep_mapped(entry->pool->zpool)) {
1314         tmp = kmalloc(entry->length, GFP_ATOMIC);
1315         if (!tmp) {
1316             ret = -ENOMEM;
1317             goto freeentry;
1318         }
1319     }
1320 
1321     /* decompress */
1322     dlen = PAGE_SIZE;
1323     src = zpool_map_handle(entry->pool->zpool, entry->handle, ZPOOL_MM_RO);
1324     if (zpool_evictable(entry->pool->zpool))
1325         src += sizeof(struct zswap_header);
1326 
1327     if (!zpool_can_sleep_mapped(entry->pool->zpool)) {
1328         memcpy(tmp, src, entry->length);
1329         src = tmp;
1330         zpool_unmap_handle(entry->pool->zpool, entry->handle);
1331     }
1332 
1333     acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
1334     mutex_lock(acomp_ctx->mutex);
1335     sg_init_one(&input, src, entry->length);
1336     sg_init_table(&output, 1);
1337     sg_set_page(&output, page, PAGE_SIZE, 0);
1338     acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, dlen);
1339     ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait);
1340     mutex_unlock(acomp_ctx->mutex);
1341 
1342     if (zpool_can_sleep_mapped(entry->pool->zpool))
1343         zpool_unmap_handle(entry->pool->zpool, entry->handle);
1344     else
1345         kfree(tmp);
1346 
1347     BUG_ON(ret);
1348 stats:
1349     count_vm_event(ZSWPIN);
1350     if (entry->objcg)
1351         count_objcg_event(entry->objcg, ZSWPIN);
1352 freeentry:
1353     spin_lock(&tree->lock);
1354     zswap_entry_put(tree, entry);
1355     spin_unlock(&tree->lock);
1356 
1357     return ret;
1358 }
1359 
1360 /* frees an entry in zswap */
1361 static void zswap_frontswap_invalidate_page(unsigned type, pgoff_t offset)
1362 {
1363     struct zswap_tree *tree = zswap_trees[type];
1364     struct zswap_entry *entry;
1365 
1366     /* find */
1367     spin_lock(&tree->lock);
1368     entry = zswap_rb_search(&tree->rbroot, offset);
1369     if (!entry) {
1370         /* entry was written back */
1371         spin_unlock(&tree->lock);
1372         return;
1373     }
1374 
1375     /* remove from rbtree */
1376     zswap_rb_erase(&tree->rbroot, entry);
1377 
1378     /* drop the initial reference from entry creation */
1379     zswap_entry_put(tree, entry);
1380 
1381     spin_unlock(&tree->lock);
1382 }
1383 
1384 /* frees all zswap entries for the given swap type */
1385 static void zswap_frontswap_invalidate_area(unsigned type)
1386 {
1387     struct zswap_tree *tree = zswap_trees[type];
1388     struct zswap_entry *entry, *n;
1389 
1390     if (!tree)
1391         return;
1392 
1393     /* walk the tree and free everything */
1394     spin_lock(&tree->lock);
1395     rbtree_postorder_for_each_entry_safe(entry, n, &tree->rbroot, rbnode)
1396         zswap_free_entry(entry);
1397     tree->rbroot = RB_ROOT;
1398     spin_unlock(&tree->lock);
1399     kfree(tree);
1400     zswap_trees[type] = NULL;
1401 }
1402 
1403 static void zswap_frontswap_init(unsigned type)
1404 {
1405     struct zswap_tree *tree;
1406 
1407     tree = kzalloc(sizeof(*tree), GFP_KERNEL);
1408     if (!tree) {
1409         pr_err("alloc failed, zswap disabled for swap type %d\n", type);
1410         return;
1411     }
1412 
1413     tree->rbroot = RB_ROOT;
1414     spin_lock_init(&tree->lock);
1415     zswap_trees[type] = tree;
1416 }
1417 
1418 static const struct frontswap_ops zswap_frontswap_ops = {
1419     .store = zswap_frontswap_store,
1420     .load = zswap_frontswap_load,
1421     .invalidate_page = zswap_frontswap_invalidate_page,
1422     .invalidate_area = zswap_frontswap_invalidate_area,
1423     .init = zswap_frontswap_init
1424 };
1425 
1426 /*********************************
1427 * debugfs functions
1428 **********************************/
1429 #ifdef CONFIG_DEBUG_FS
1430 #include <linux/debugfs.h>
1431 
1432 static struct dentry *zswap_debugfs_root;
1433 
1434 static int __init zswap_debugfs_init(void)
1435 {
1436     if (!debugfs_initialized())
1437         return -ENODEV;
1438 
1439     zswap_debugfs_root = debugfs_create_dir("zswap", NULL);
1440 
1441     debugfs_create_u64("pool_limit_hit", 0444,
1442                zswap_debugfs_root, &zswap_pool_limit_hit);
1443     debugfs_create_u64("reject_reclaim_fail", 0444,
1444                zswap_debugfs_root, &zswap_reject_reclaim_fail);
1445     debugfs_create_u64("reject_alloc_fail", 0444,
1446                zswap_debugfs_root, &zswap_reject_alloc_fail);
1447     debugfs_create_u64("reject_kmemcache_fail", 0444,
1448                zswap_debugfs_root, &zswap_reject_kmemcache_fail);
1449     debugfs_create_u64("reject_compress_poor", 0444,
1450                zswap_debugfs_root, &zswap_reject_compress_poor);
1451     debugfs_create_u64("written_back_pages", 0444,
1452                zswap_debugfs_root, &zswap_written_back_pages);
1453     debugfs_create_u64("duplicate_entry", 0444,
1454                zswap_debugfs_root, &zswap_duplicate_entry);
1455     debugfs_create_u64("pool_total_size", 0444,
1456                zswap_debugfs_root, &zswap_pool_total_size);
1457     debugfs_create_atomic_t("stored_pages", 0444,
1458                 zswap_debugfs_root, &zswap_stored_pages);
1459     debugfs_create_atomic_t("same_filled_pages", 0444,
1460                 zswap_debugfs_root, &zswap_same_filled_pages);
1461 
1462     return 0;
1463 }
1464 #else
1465 static int __init zswap_debugfs_init(void)
1466 {
1467     return 0;
1468 }
1469 #endif
1470 
1471 /*********************************
1472 * module init and exit
1473 **********************************/
1474 static int __init init_zswap(void)
1475 {
1476     struct zswap_pool *pool;
1477     int ret;
1478 
1479     zswap_init_started = true;
1480 
1481     if (zswap_entry_cache_create()) {
1482         pr_err("entry cache creation failed\n");
1483         goto cache_fail;
1484     }
1485 
1486     ret = cpuhp_setup_state(CPUHP_MM_ZSWP_MEM_PREPARE, "mm/zswap:prepare",
1487                 zswap_dstmem_prepare, zswap_dstmem_dead);
1488     if (ret) {
1489         pr_err("dstmem alloc failed\n");
1490         goto dstmem_fail;
1491     }
1492 
1493     ret = cpuhp_setup_state_multi(CPUHP_MM_ZSWP_POOL_PREPARE,
1494                       "mm/zswap_pool:prepare",
1495                       zswap_cpu_comp_prepare,
1496                       zswap_cpu_comp_dead);
1497     if (ret)
1498         goto hp_fail;
1499 
1500     pool = __zswap_pool_create_fallback();
1501     if (pool) {
1502         pr_info("loaded using pool %s/%s\n", pool->tfm_name,
1503             zpool_get_type(pool->zpool));
1504         list_add(&pool->list, &zswap_pools);
1505         zswap_has_pool = true;
1506     } else {
1507         pr_err("pool creation failed\n");
1508         zswap_enabled = false;
1509     }
1510 
1511     shrink_wq = create_workqueue("zswap-shrink");
1512     if (!shrink_wq)
1513         goto fallback_fail;
1514 
1515     ret = frontswap_register_ops(&zswap_frontswap_ops);
1516     if (ret)
1517         goto destroy_wq;
1518     if (zswap_debugfs_init())
1519         pr_warn("debugfs initialization failed\n");
1520     return 0;
1521 
1522 destroy_wq:
1523     destroy_workqueue(shrink_wq);
1524 fallback_fail:
1525     if (pool)
1526         zswap_pool_destroy(pool);
1527 hp_fail:
1528     cpuhp_remove_state(CPUHP_MM_ZSWP_MEM_PREPARE);
1529 dstmem_fail:
1530     zswap_entry_cache_destroy();
1531 cache_fail:
1532     /* if built-in, we aren't unloaded on failure; don't allow use */
1533     zswap_init_failed = true;
1534     zswap_enabled = false;
1535     return -ENOMEM;
1536 }
1537 /* must be late so crypto has time to come up */
1538 late_initcall(init_zswap);
1539 
1540 MODULE_LICENSE("GPL");
1541 MODULE_AUTHOR("Seth Jennings <sjennings@variantweb.net>");
1542 MODULE_DESCRIPTION("Compressed cache for swap pages");