0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041 #include <linux/module.h>
0042 #include <linux/kernel.h>
0043 #include <linux/sched.h>
0044 #include <linux/bitops.h>
0045 #include <linux/errno.h>
0046 #include <linux/highmem.h>
0047 #include <linux/string.h>
0048 #include <linux/slab.h>
0049 #include <linux/pgtable.h>
0050 #include <asm/tlbflush.h>
0051 #include <linux/cpumask.h>
0052 #include <linux/cpu.h>
0053 #include <linux/vmalloc.h>
0054 #include <linux/preempt.h>
0055 #include <linux/spinlock.h>
0056 #include <linux/shrinker.h>
0057 #include <linux/types.h>
0058 #include <linux/debugfs.h>
0059 #include <linux/zsmalloc.h>
0060 #include <linux/zpool.h>
0061 #include <linux/migrate.h>
0062 #include <linux/wait.h>
0063 #include <linux/pagemap.h>
0064 #include <linux/fs.h>
0065 #include <linux/local_lock.h>
0066
0067 #define ZSPAGE_MAGIC 0x58
0068
0069
0070
0071
0072
0073
0074
0075 #define ZS_ALIGN 8
0076
0077
0078
0079
0080
0081 #define ZS_MAX_ZSPAGE_ORDER 2
0082 #define ZS_MAX_PAGES_PER_ZSPAGE (_AC(1, UL) << ZS_MAX_ZSPAGE_ORDER)
0083
0084 #define ZS_HANDLE_SIZE (sizeof(unsigned long))
0085
0086
0087
0088
0089
0090
0091
0092
0093
0094
0095 #ifndef MAX_POSSIBLE_PHYSMEM_BITS
0096 #ifdef MAX_PHYSMEM_BITS
0097 #define MAX_POSSIBLE_PHYSMEM_BITS MAX_PHYSMEM_BITS
0098 #else
0099
0100
0101
0102
0103 #define MAX_POSSIBLE_PHYSMEM_BITS BITS_PER_LONG
0104 #endif
0105 #endif
0106
0107 #define _PFN_BITS (MAX_POSSIBLE_PHYSMEM_BITS - PAGE_SHIFT)
0108
0109
0110
0111
0112
0113
0114
0115
0116 #define OBJ_ALLOCATED_TAG 1
0117 #define OBJ_TAG_BITS 1
0118 #define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS - OBJ_TAG_BITS)
0119 #define OBJ_INDEX_MASK ((_AC(1, UL) << OBJ_INDEX_BITS) - 1)
0120
0121 #define HUGE_BITS 1
0122 #define FULLNESS_BITS 2
0123 #define CLASS_BITS 8
0124 #define ISOLATED_BITS 3
0125 #define MAGIC_VAL_BITS 8
0126
0127 #define MAX(a, b) ((a) >= (b) ? (a) : (b))
0128
0129 #define ZS_MIN_ALLOC_SIZE \
0130 MAX(32, (ZS_MAX_PAGES_PER_ZSPAGE << PAGE_SHIFT >> OBJ_INDEX_BITS))
0131
0132 #define ZS_MAX_ALLOC_SIZE PAGE_SIZE
0133
0134
0135
0136
0137
0138
0139
0140
0141
0142
0143
0144
0145
0146
0147 #define ZS_SIZE_CLASS_DELTA (PAGE_SIZE >> CLASS_BITS)
0148 #define ZS_SIZE_CLASSES (DIV_ROUND_UP(ZS_MAX_ALLOC_SIZE - ZS_MIN_ALLOC_SIZE, \
0149 ZS_SIZE_CLASS_DELTA) + 1)
0150
0151 enum fullness_group {
0152 ZS_EMPTY,
0153 ZS_ALMOST_EMPTY,
0154 ZS_ALMOST_FULL,
0155 ZS_FULL,
0156 NR_ZS_FULLNESS,
0157 };
0158
0159 enum class_stat_type {
0160 CLASS_EMPTY,
0161 CLASS_ALMOST_EMPTY,
0162 CLASS_ALMOST_FULL,
0163 CLASS_FULL,
0164 OBJ_ALLOCATED,
0165 OBJ_USED,
0166 NR_ZS_STAT_TYPE,
0167 };
0168
0169 struct zs_size_stat {
0170 unsigned long objs[NR_ZS_STAT_TYPE];
0171 };
0172
0173 #ifdef CONFIG_ZSMALLOC_STAT
0174 static struct dentry *zs_stat_root;
0175 #endif
0176
0177
0178
0179
0180
0181
0182
0183
0184
0185
0186
0187
0188
0189
0190
0191 static const int fullness_threshold_frac = 4;
0192 static size_t huge_class_size;
0193
0194 struct size_class {
0195 spinlock_t lock;
0196 struct list_head fullness_list[NR_ZS_FULLNESS];
0197
0198
0199
0200
0201 int size;
0202 int objs_per_zspage;
0203
0204 int pages_per_zspage;
0205
0206 unsigned int index;
0207 struct zs_size_stat stats;
0208 };
0209
0210
0211
0212
0213
0214
0215
0216 struct link_free {
0217 union {
0218
0219
0220
0221
0222 unsigned long next;
0223
0224
0225
0226 unsigned long handle;
0227 };
0228 };
0229
0230 struct zs_pool {
0231 const char *name;
0232
0233 struct size_class *size_class[ZS_SIZE_CLASSES];
0234 struct kmem_cache *handle_cachep;
0235 struct kmem_cache *zspage_cachep;
0236
0237 atomic_long_t pages_allocated;
0238
0239 struct zs_pool_stats stats;
0240
0241
0242 struct shrinker shrinker;
0243
0244 #ifdef CONFIG_ZSMALLOC_STAT
0245 struct dentry *stat_dentry;
0246 #endif
0247 #ifdef CONFIG_COMPACTION
0248 struct work_struct free_work;
0249 #endif
0250
0251 rwlock_t migrate_lock;
0252 };
0253
0254 struct zspage {
0255 struct {
0256 unsigned int huge:HUGE_BITS;
0257 unsigned int fullness:FULLNESS_BITS;
0258 unsigned int class:CLASS_BITS + 1;
0259 unsigned int isolated:ISOLATED_BITS;
0260 unsigned int magic:MAGIC_VAL_BITS;
0261 };
0262 unsigned int inuse;
0263 unsigned int freeobj;
0264 struct page *first_page;
0265 struct list_head list;
0266 struct zs_pool *pool;
0267 #ifdef CONFIG_COMPACTION
0268 rwlock_t lock;
0269 #endif
0270 };
0271
0272 struct mapping_area {
0273 local_lock_t lock;
0274 char *vm_buf;
0275 char *vm_addr;
0276 enum zs_mapmode vm_mm;
0277 };
0278
0279
0280 static void SetZsHugePage(struct zspage *zspage)
0281 {
0282 zspage->huge = 1;
0283 }
0284
0285 static bool ZsHugePage(struct zspage *zspage)
0286 {
0287 return zspage->huge;
0288 }
0289
0290 #ifdef CONFIG_COMPACTION
0291 static void migrate_lock_init(struct zspage *zspage);
0292 static void migrate_read_lock(struct zspage *zspage);
0293 static void migrate_read_unlock(struct zspage *zspage);
0294 static void migrate_write_lock(struct zspage *zspage);
0295 static void migrate_write_lock_nested(struct zspage *zspage);
0296 static void migrate_write_unlock(struct zspage *zspage);
0297 static void kick_deferred_free(struct zs_pool *pool);
0298 static void init_deferred_free(struct zs_pool *pool);
0299 static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage);
0300 #else
0301 static void migrate_lock_init(struct zspage *zspage) {}
0302 static void migrate_read_lock(struct zspage *zspage) {}
0303 static void migrate_read_unlock(struct zspage *zspage) {}
0304 static void migrate_write_lock(struct zspage *zspage) {}
0305 static void migrate_write_lock_nested(struct zspage *zspage) {}
0306 static void migrate_write_unlock(struct zspage *zspage) {}
0307 static void kick_deferred_free(struct zs_pool *pool) {}
0308 static void init_deferred_free(struct zs_pool *pool) {}
0309 static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage) {}
0310 #endif
0311
0312 static int create_cache(struct zs_pool *pool)
0313 {
0314 pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_SIZE,
0315 0, 0, NULL);
0316 if (!pool->handle_cachep)
0317 return 1;
0318
0319 pool->zspage_cachep = kmem_cache_create("zspage", sizeof(struct zspage),
0320 0, 0, NULL);
0321 if (!pool->zspage_cachep) {
0322 kmem_cache_destroy(pool->handle_cachep);
0323 pool->handle_cachep = NULL;
0324 return 1;
0325 }
0326
0327 return 0;
0328 }
0329
0330 static void destroy_cache(struct zs_pool *pool)
0331 {
0332 kmem_cache_destroy(pool->handle_cachep);
0333 kmem_cache_destroy(pool->zspage_cachep);
0334 }
0335
0336 static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp)
0337 {
0338 return (unsigned long)kmem_cache_alloc(pool->handle_cachep,
0339 gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
0340 }
0341
0342 static void cache_free_handle(struct zs_pool *pool, unsigned long handle)
0343 {
0344 kmem_cache_free(pool->handle_cachep, (void *)handle);
0345 }
0346
0347 static struct zspage *cache_alloc_zspage(struct zs_pool *pool, gfp_t flags)
0348 {
0349 return kmem_cache_zalloc(pool->zspage_cachep,
0350 flags & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
0351 }
0352
0353 static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage)
0354 {
0355 kmem_cache_free(pool->zspage_cachep, zspage);
0356 }
0357
0358
0359 static void record_obj(unsigned long handle, unsigned long obj)
0360 {
0361 *(unsigned long *)handle = obj;
0362 }
0363
0364
0365
0366 #ifdef CONFIG_ZPOOL
0367
0368 static void *zs_zpool_create(const char *name, gfp_t gfp,
0369 const struct zpool_ops *zpool_ops,
0370 struct zpool *zpool)
0371 {
0372
0373
0374
0375
0376
0377 return zs_create_pool(name);
0378 }
0379
0380 static void zs_zpool_destroy(void *pool)
0381 {
0382 zs_destroy_pool(pool);
0383 }
0384
0385 static int zs_zpool_malloc(void *pool, size_t size, gfp_t gfp,
0386 unsigned long *handle)
0387 {
0388 *handle = zs_malloc(pool, size, gfp);
0389
0390 if (IS_ERR((void *)(*handle)))
0391 return PTR_ERR((void *)*handle);
0392 return 0;
0393 }
0394 static void zs_zpool_free(void *pool, unsigned long handle)
0395 {
0396 zs_free(pool, handle);
0397 }
0398
0399 static void *zs_zpool_map(void *pool, unsigned long handle,
0400 enum zpool_mapmode mm)
0401 {
0402 enum zs_mapmode zs_mm;
0403
0404 switch (mm) {
0405 case ZPOOL_MM_RO:
0406 zs_mm = ZS_MM_RO;
0407 break;
0408 case ZPOOL_MM_WO:
0409 zs_mm = ZS_MM_WO;
0410 break;
0411 case ZPOOL_MM_RW:
0412 default:
0413 zs_mm = ZS_MM_RW;
0414 break;
0415 }
0416
0417 return zs_map_object(pool, handle, zs_mm);
0418 }
0419 static void zs_zpool_unmap(void *pool, unsigned long handle)
0420 {
0421 zs_unmap_object(pool, handle);
0422 }
0423
0424 static u64 zs_zpool_total_size(void *pool)
0425 {
0426 return zs_get_total_pages(pool) << PAGE_SHIFT;
0427 }
0428
0429 static struct zpool_driver zs_zpool_driver = {
0430 .type = "zsmalloc",
0431 .owner = THIS_MODULE,
0432 .create = zs_zpool_create,
0433 .destroy = zs_zpool_destroy,
0434 .malloc_support_movable = true,
0435 .malloc = zs_zpool_malloc,
0436 .free = zs_zpool_free,
0437 .map = zs_zpool_map,
0438 .unmap = zs_zpool_unmap,
0439 .total_size = zs_zpool_total_size,
0440 };
0441
0442 MODULE_ALIAS("zpool-zsmalloc");
0443 #endif
0444
0445
0446 static DEFINE_PER_CPU(struct mapping_area, zs_map_area) = {
0447 .lock = INIT_LOCAL_LOCK(lock),
0448 };
0449
0450 static __maybe_unused int is_first_page(struct page *page)
0451 {
0452 return PagePrivate(page);
0453 }
0454
0455
0456 static inline int get_zspage_inuse(struct zspage *zspage)
0457 {
0458 return zspage->inuse;
0459 }
0460
0461
0462 static inline void mod_zspage_inuse(struct zspage *zspage, int val)
0463 {
0464 zspage->inuse += val;
0465 }
0466
0467 static inline struct page *get_first_page(struct zspage *zspage)
0468 {
0469 struct page *first_page = zspage->first_page;
0470
0471 VM_BUG_ON_PAGE(!is_first_page(first_page), first_page);
0472 return first_page;
0473 }
0474
0475 static inline int get_first_obj_offset(struct page *page)
0476 {
0477 return page->page_type;
0478 }
0479
0480 static inline void set_first_obj_offset(struct page *page, int offset)
0481 {
0482 page->page_type = offset;
0483 }
0484
0485 static inline unsigned int get_freeobj(struct zspage *zspage)
0486 {
0487 return zspage->freeobj;
0488 }
0489
0490 static inline void set_freeobj(struct zspage *zspage, unsigned int obj)
0491 {
0492 zspage->freeobj = obj;
0493 }
0494
0495 static void get_zspage_mapping(struct zspage *zspage,
0496 unsigned int *class_idx,
0497 enum fullness_group *fullness)
0498 {
0499 BUG_ON(zspage->magic != ZSPAGE_MAGIC);
0500
0501 *fullness = zspage->fullness;
0502 *class_idx = zspage->class;
0503 }
0504
0505 static struct size_class *zspage_class(struct zs_pool *pool,
0506 struct zspage *zspage)
0507 {
0508 return pool->size_class[zspage->class];
0509 }
0510
0511 static void set_zspage_mapping(struct zspage *zspage,
0512 unsigned int class_idx,
0513 enum fullness_group fullness)
0514 {
0515 zspage->class = class_idx;
0516 zspage->fullness = fullness;
0517 }
0518
0519
0520
0521
0522
0523
0524
0525
0526 static int get_size_class_index(int size)
0527 {
0528 int idx = 0;
0529
0530 if (likely(size > ZS_MIN_ALLOC_SIZE))
0531 idx = DIV_ROUND_UP(size - ZS_MIN_ALLOC_SIZE,
0532 ZS_SIZE_CLASS_DELTA);
0533
0534 return min_t(int, ZS_SIZE_CLASSES - 1, idx);
0535 }
0536
0537
0538 static inline void class_stat_inc(struct size_class *class,
0539 int type, unsigned long cnt)
0540 {
0541 class->stats.objs[type] += cnt;
0542 }
0543
0544
0545 static inline void class_stat_dec(struct size_class *class,
0546 int type, unsigned long cnt)
0547 {
0548 class->stats.objs[type] -= cnt;
0549 }
0550
0551
0552 static inline unsigned long zs_stat_get(struct size_class *class,
0553 int type)
0554 {
0555 return class->stats.objs[type];
0556 }
0557
0558 #ifdef CONFIG_ZSMALLOC_STAT
0559
0560 static void __init zs_stat_init(void)
0561 {
0562 if (!debugfs_initialized()) {
0563 pr_warn("debugfs not available, stat dir not created\n");
0564 return;
0565 }
0566
0567 zs_stat_root = debugfs_create_dir("zsmalloc", NULL);
0568 }
0569
0570 static void __exit zs_stat_exit(void)
0571 {
0572 debugfs_remove_recursive(zs_stat_root);
0573 }
0574
0575 static unsigned long zs_can_compact(struct size_class *class);
0576
0577 static int zs_stats_size_show(struct seq_file *s, void *v)
0578 {
0579 int i;
0580 struct zs_pool *pool = s->private;
0581 struct size_class *class;
0582 int objs_per_zspage;
0583 unsigned long class_almost_full, class_almost_empty;
0584 unsigned long obj_allocated, obj_used, pages_used, freeable;
0585 unsigned long total_class_almost_full = 0, total_class_almost_empty = 0;
0586 unsigned long total_objs = 0, total_used_objs = 0, total_pages = 0;
0587 unsigned long total_freeable = 0;
0588
0589 seq_printf(s, " %5s %5s %11s %12s %13s %10s %10s %16s %8s\n",
0590 "class", "size", "almost_full", "almost_empty",
0591 "obj_allocated", "obj_used", "pages_used",
0592 "pages_per_zspage", "freeable");
0593
0594 for (i = 0; i < ZS_SIZE_CLASSES; i++) {
0595 class = pool->size_class[i];
0596
0597 if (class->index != i)
0598 continue;
0599
0600 spin_lock(&class->lock);
0601 class_almost_full = zs_stat_get(class, CLASS_ALMOST_FULL);
0602 class_almost_empty = zs_stat_get(class, CLASS_ALMOST_EMPTY);
0603 obj_allocated = zs_stat_get(class, OBJ_ALLOCATED);
0604 obj_used = zs_stat_get(class, OBJ_USED);
0605 freeable = zs_can_compact(class);
0606 spin_unlock(&class->lock);
0607
0608 objs_per_zspage = class->objs_per_zspage;
0609 pages_used = obj_allocated / objs_per_zspage *
0610 class->pages_per_zspage;
0611
0612 seq_printf(s, " %5u %5u %11lu %12lu %13lu"
0613 " %10lu %10lu %16d %8lu\n",
0614 i, class->size, class_almost_full, class_almost_empty,
0615 obj_allocated, obj_used, pages_used,
0616 class->pages_per_zspage, freeable);
0617
0618 total_class_almost_full += class_almost_full;
0619 total_class_almost_empty += class_almost_empty;
0620 total_objs += obj_allocated;
0621 total_used_objs += obj_used;
0622 total_pages += pages_used;
0623 total_freeable += freeable;
0624 }
0625
0626 seq_puts(s, "\n");
0627 seq_printf(s, " %5s %5s %11lu %12lu %13lu %10lu %10lu %16s %8lu\n",
0628 "Total", "", total_class_almost_full,
0629 total_class_almost_empty, total_objs,
0630 total_used_objs, total_pages, "", total_freeable);
0631
0632 return 0;
0633 }
0634 DEFINE_SHOW_ATTRIBUTE(zs_stats_size);
0635
0636 static void zs_pool_stat_create(struct zs_pool *pool, const char *name)
0637 {
0638 if (!zs_stat_root) {
0639 pr_warn("no root stat dir, not creating <%s> stat dir\n", name);
0640 return;
0641 }
0642
0643 pool->stat_dentry = debugfs_create_dir(name, zs_stat_root);
0644
0645 debugfs_create_file("classes", S_IFREG | 0444, pool->stat_dentry, pool,
0646 &zs_stats_size_fops);
0647 }
0648
0649 static void zs_pool_stat_destroy(struct zs_pool *pool)
0650 {
0651 debugfs_remove_recursive(pool->stat_dentry);
0652 }
0653
0654 #else
0655 static void __init zs_stat_init(void)
0656 {
0657 }
0658
0659 static void __exit zs_stat_exit(void)
0660 {
0661 }
0662
0663 static inline void zs_pool_stat_create(struct zs_pool *pool, const char *name)
0664 {
0665 }
0666
0667 static inline void zs_pool_stat_destroy(struct zs_pool *pool)
0668 {
0669 }
0670 #endif
0671
0672
0673
0674
0675
0676
0677
0678
0679
0680 static enum fullness_group get_fullness_group(struct size_class *class,
0681 struct zspage *zspage)
0682 {
0683 int inuse, objs_per_zspage;
0684 enum fullness_group fg;
0685
0686 inuse = get_zspage_inuse(zspage);
0687 objs_per_zspage = class->objs_per_zspage;
0688
0689 if (inuse == 0)
0690 fg = ZS_EMPTY;
0691 else if (inuse == objs_per_zspage)
0692 fg = ZS_FULL;
0693 else if (inuse <= 3 * objs_per_zspage / fullness_threshold_frac)
0694 fg = ZS_ALMOST_EMPTY;
0695 else
0696 fg = ZS_ALMOST_FULL;
0697
0698 return fg;
0699 }
0700
0701
0702
0703
0704
0705
0706
0707 static void insert_zspage(struct size_class *class,
0708 struct zspage *zspage,
0709 enum fullness_group fullness)
0710 {
0711 struct zspage *head;
0712
0713 class_stat_inc(class, fullness, 1);
0714 head = list_first_entry_or_null(&class->fullness_list[fullness],
0715 struct zspage, list);
0716
0717
0718
0719
0720 if (head && get_zspage_inuse(zspage) < get_zspage_inuse(head))
0721 list_add(&zspage->list, &head->list);
0722 else
0723 list_add(&zspage->list, &class->fullness_list[fullness]);
0724 }
0725
0726
0727
0728
0729
0730 static void remove_zspage(struct size_class *class,
0731 struct zspage *zspage,
0732 enum fullness_group fullness)
0733 {
0734 VM_BUG_ON(list_empty(&class->fullness_list[fullness]));
0735
0736 list_del_init(&zspage->list);
0737 class_stat_dec(class, fullness, 1);
0738 }
0739
0740
0741
0742
0743
0744
0745
0746
0747
0748
0749 static enum fullness_group fix_fullness_group(struct size_class *class,
0750 struct zspage *zspage)
0751 {
0752 int class_idx;
0753 enum fullness_group currfg, newfg;
0754
0755 get_zspage_mapping(zspage, &class_idx, &currfg);
0756 newfg = get_fullness_group(class, zspage);
0757 if (newfg == currfg)
0758 goto out;
0759
0760 remove_zspage(class, zspage, currfg);
0761 insert_zspage(class, zspage, newfg);
0762 set_zspage_mapping(zspage, class_idx, newfg);
0763 out:
0764 return newfg;
0765 }
0766
0767
0768
0769
0770
0771
0772
0773
0774
0775
0776
0777
0778
0779
0780 static int get_pages_per_zspage(int class_size)
0781 {
0782 int i, max_usedpc = 0;
0783
0784 int max_usedpc_order = 1;
0785
0786 for (i = 1; i <= ZS_MAX_PAGES_PER_ZSPAGE; i++) {
0787 int zspage_size;
0788 int waste, usedpc;
0789
0790 zspage_size = i * PAGE_SIZE;
0791 waste = zspage_size % class_size;
0792 usedpc = (zspage_size - waste) * 100 / zspage_size;
0793
0794 if (usedpc > max_usedpc) {
0795 max_usedpc = usedpc;
0796 max_usedpc_order = i;
0797 }
0798 }
0799
0800 return max_usedpc_order;
0801 }
0802
0803 static struct zspage *get_zspage(struct page *page)
0804 {
0805 struct zspage *zspage = (struct zspage *)page_private(page);
0806
0807 BUG_ON(zspage->magic != ZSPAGE_MAGIC);
0808 return zspage;
0809 }
0810
0811 static struct page *get_next_page(struct page *page)
0812 {
0813 struct zspage *zspage = get_zspage(page);
0814
0815 if (unlikely(ZsHugePage(zspage)))
0816 return NULL;
0817
0818 return (struct page *)page->index;
0819 }
0820
0821
0822
0823
0824
0825
0826
0827 static void obj_to_location(unsigned long obj, struct page **page,
0828 unsigned int *obj_idx)
0829 {
0830 obj >>= OBJ_TAG_BITS;
0831 *page = pfn_to_page(obj >> OBJ_INDEX_BITS);
0832 *obj_idx = (obj & OBJ_INDEX_MASK);
0833 }
0834
0835 static void obj_to_page(unsigned long obj, struct page **page)
0836 {
0837 obj >>= OBJ_TAG_BITS;
0838 *page = pfn_to_page(obj >> OBJ_INDEX_BITS);
0839 }
0840
0841
0842
0843
0844
0845
0846 static unsigned long location_to_obj(struct page *page, unsigned int obj_idx)
0847 {
0848 unsigned long obj;
0849
0850 obj = page_to_pfn(page) << OBJ_INDEX_BITS;
0851 obj |= obj_idx & OBJ_INDEX_MASK;
0852 obj <<= OBJ_TAG_BITS;
0853
0854 return obj;
0855 }
0856
0857 static unsigned long handle_to_obj(unsigned long handle)
0858 {
0859 return *(unsigned long *)handle;
0860 }
0861
0862 static bool obj_allocated(struct page *page, void *obj, unsigned long *phandle)
0863 {
0864 unsigned long handle;
0865 struct zspage *zspage = get_zspage(page);
0866
0867 if (unlikely(ZsHugePage(zspage))) {
0868 VM_BUG_ON_PAGE(!is_first_page(page), page);
0869 handle = page->index;
0870 } else
0871 handle = *(unsigned long *)obj;
0872
0873 if (!(handle & OBJ_ALLOCATED_TAG))
0874 return false;
0875
0876 *phandle = handle & ~OBJ_ALLOCATED_TAG;
0877 return true;
0878 }
0879
0880 static void reset_page(struct page *page)
0881 {
0882 __ClearPageMovable(page);
0883 ClearPagePrivate(page);
0884 set_page_private(page, 0);
0885 page_mapcount_reset(page);
0886 page->index = 0;
0887 }
0888
0889 static int trylock_zspage(struct zspage *zspage)
0890 {
0891 struct page *cursor, *fail;
0892
0893 for (cursor = get_first_page(zspage); cursor != NULL; cursor =
0894 get_next_page(cursor)) {
0895 if (!trylock_page(cursor)) {
0896 fail = cursor;
0897 goto unlock;
0898 }
0899 }
0900
0901 return 1;
0902 unlock:
0903 for (cursor = get_first_page(zspage); cursor != fail; cursor =
0904 get_next_page(cursor))
0905 unlock_page(cursor);
0906
0907 return 0;
0908 }
0909
0910 static void __free_zspage(struct zs_pool *pool, struct size_class *class,
0911 struct zspage *zspage)
0912 {
0913 struct page *page, *next;
0914 enum fullness_group fg;
0915 unsigned int class_idx;
0916
0917 get_zspage_mapping(zspage, &class_idx, &fg);
0918
0919 assert_spin_locked(&class->lock);
0920
0921 VM_BUG_ON(get_zspage_inuse(zspage));
0922 VM_BUG_ON(fg != ZS_EMPTY);
0923
0924 next = page = get_first_page(zspage);
0925 do {
0926 VM_BUG_ON_PAGE(!PageLocked(page), page);
0927 next = get_next_page(page);
0928 reset_page(page);
0929 unlock_page(page);
0930 dec_zone_page_state(page, NR_ZSPAGES);
0931 put_page(page);
0932 page = next;
0933 } while (page != NULL);
0934
0935 cache_free_zspage(pool, zspage);
0936
0937 class_stat_dec(class, OBJ_ALLOCATED, class->objs_per_zspage);
0938 atomic_long_sub(class->pages_per_zspage,
0939 &pool->pages_allocated);
0940 }
0941
0942 static void free_zspage(struct zs_pool *pool, struct size_class *class,
0943 struct zspage *zspage)
0944 {
0945 VM_BUG_ON(get_zspage_inuse(zspage));
0946 VM_BUG_ON(list_empty(&zspage->list));
0947
0948
0949
0950
0951
0952
0953 if (!trylock_zspage(zspage)) {
0954 kick_deferred_free(pool);
0955 return;
0956 }
0957
0958 remove_zspage(class, zspage, ZS_EMPTY);
0959 __free_zspage(pool, class, zspage);
0960 }
0961
0962
0963 static void init_zspage(struct size_class *class, struct zspage *zspage)
0964 {
0965 unsigned int freeobj = 1;
0966 unsigned long off = 0;
0967 struct page *page = get_first_page(zspage);
0968
0969 while (page) {
0970 struct page *next_page;
0971 struct link_free *link;
0972 void *vaddr;
0973
0974 set_first_obj_offset(page, off);
0975
0976 vaddr = kmap_atomic(page);
0977 link = (struct link_free *)vaddr + off / sizeof(*link);
0978
0979 while ((off += class->size) < PAGE_SIZE) {
0980 link->next = freeobj++ << OBJ_TAG_BITS;
0981 link += class->size / sizeof(*link);
0982 }
0983
0984
0985
0986
0987
0988
0989 next_page = get_next_page(page);
0990 if (next_page) {
0991 link->next = freeobj++ << OBJ_TAG_BITS;
0992 } else {
0993
0994
0995
0996
0997 link->next = -1UL << OBJ_TAG_BITS;
0998 }
0999 kunmap_atomic(vaddr);
1000 page = next_page;
1001 off %= PAGE_SIZE;
1002 }
1003
1004 set_freeobj(zspage, 0);
1005 }
1006
1007 static void create_page_chain(struct size_class *class, struct zspage *zspage,
1008 struct page *pages[])
1009 {
1010 int i;
1011 struct page *page;
1012 struct page *prev_page = NULL;
1013 int nr_pages = class->pages_per_zspage;
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023 for (i = 0; i < nr_pages; i++) {
1024 page = pages[i];
1025 set_page_private(page, (unsigned long)zspage);
1026 page->index = 0;
1027 if (i == 0) {
1028 zspage->first_page = page;
1029 SetPagePrivate(page);
1030 if (unlikely(class->objs_per_zspage == 1 &&
1031 class->pages_per_zspage == 1))
1032 SetZsHugePage(zspage);
1033 } else {
1034 prev_page->index = (unsigned long)page;
1035 }
1036 prev_page = page;
1037 }
1038 }
1039
1040
1041
1042
1043 static struct zspage *alloc_zspage(struct zs_pool *pool,
1044 struct size_class *class,
1045 gfp_t gfp)
1046 {
1047 int i;
1048 struct page *pages[ZS_MAX_PAGES_PER_ZSPAGE];
1049 struct zspage *zspage = cache_alloc_zspage(pool, gfp);
1050
1051 if (!zspage)
1052 return NULL;
1053
1054 zspage->magic = ZSPAGE_MAGIC;
1055 migrate_lock_init(zspage);
1056
1057 for (i = 0; i < class->pages_per_zspage; i++) {
1058 struct page *page;
1059
1060 page = alloc_page(gfp);
1061 if (!page) {
1062 while (--i >= 0) {
1063 dec_zone_page_state(pages[i], NR_ZSPAGES);
1064 __free_page(pages[i]);
1065 }
1066 cache_free_zspage(pool, zspage);
1067 return NULL;
1068 }
1069
1070 inc_zone_page_state(page, NR_ZSPAGES);
1071 pages[i] = page;
1072 }
1073
1074 create_page_chain(class, zspage, pages);
1075 init_zspage(class, zspage);
1076 zspage->pool = pool;
1077
1078 return zspage;
1079 }
1080
1081 static struct zspage *find_get_zspage(struct size_class *class)
1082 {
1083 int i;
1084 struct zspage *zspage;
1085
1086 for (i = ZS_ALMOST_FULL; i >= ZS_EMPTY; i--) {
1087 zspage = list_first_entry_or_null(&class->fullness_list[i],
1088 struct zspage, list);
1089 if (zspage)
1090 break;
1091 }
1092
1093 return zspage;
1094 }
1095
1096 static inline int __zs_cpu_up(struct mapping_area *area)
1097 {
1098
1099
1100
1101
1102 if (area->vm_buf)
1103 return 0;
1104 area->vm_buf = kmalloc(ZS_MAX_ALLOC_SIZE, GFP_KERNEL);
1105 if (!area->vm_buf)
1106 return -ENOMEM;
1107 return 0;
1108 }
1109
1110 static inline void __zs_cpu_down(struct mapping_area *area)
1111 {
1112 kfree(area->vm_buf);
1113 area->vm_buf = NULL;
1114 }
1115
1116 static void *__zs_map_object(struct mapping_area *area,
1117 struct page *pages[2], int off, int size)
1118 {
1119 int sizes[2];
1120 void *addr;
1121 char *buf = area->vm_buf;
1122
1123
1124 pagefault_disable();
1125
1126
1127 if (area->vm_mm == ZS_MM_WO)
1128 goto out;
1129
1130 sizes[0] = PAGE_SIZE - off;
1131 sizes[1] = size - sizes[0];
1132
1133
1134 addr = kmap_atomic(pages[0]);
1135 memcpy(buf, addr + off, sizes[0]);
1136 kunmap_atomic(addr);
1137 addr = kmap_atomic(pages[1]);
1138 memcpy(buf + sizes[0], addr, sizes[1]);
1139 kunmap_atomic(addr);
1140 out:
1141 return area->vm_buf;
1142 }
1143
1144 static void __zs_unmap_object(struct mapping_area *area,
1145 struct page *pages[2], int off, int size)
1146 {
1147 int sizes[2];
1148 void *addr;
1149 char *buf;
1150
1151
1152 if (area->vm_mm == ZS_MM_RO)
1153 goto out;
1154
1155 buf = area->vm_buf;
1156 buf = buf + ZS_HANDLE_SIZE;
1157 size -= ZS_HANDLE_SIZE;
1158 off += ZS_HANDLE_SIZE;
1159
1160 sizes[0] = PAGE_SIZE - off;
1161 sizes[1] = size - sizes[0];
1162
1163
1164 addr = kmap_atomic(pages[0]);
1165 memcpy(addr + off, buf, sizes[0]);
1166 kunmap_atomic(addr);
1167 addr = kmap_atomic(pages[1]);
1168 memcpy(addr, buf + sizes[0], sizes[1]);
1169 kunmap_atomic(addr);
1170
1171 out:
1172
1173 pagefault_enable();
1174 }
1175
1176 static int zs_cpu_prepare(unsigned int cpu)
1177 {
1178 struct mapping_area *area;
1179
1180 area = &per_cpu(zs_map_area, cpu);
1181 return __zs_cpu_up(area);
1182 }
1183
1184 static int zs_cpu_dead(unsigned int cpu)
1185 {
1186 struct mapping_area *area;
1187
1188 area = &per_cpu(zs_map_area, cpu);
1189 __zs_cpu_down(area);
1190 return 0;
1191 }
1192
1193 static bool can_merge(struct size_class *prev, int pages_per_zspage,
1194 int objs_per_zspage)
1195 {
1196 if (prev->pages_per_zspage == pages_per_zspage &&
1197 prev->objs_per_zspage == objs_per_zspage)
1198 return true;
1199
1200 return false;
1201 }
1202
1203 static bool zspage_full(struct size_class *class, struct zspage *zspage)
1204 {
1205 return get_zspage_inuse(zspage) == class->objs_per_zspage;
1206 }
1207
1208 unsigned long zs_get_total_pages(struct zs_pool *pool)
1209 {
1210 return atomic_long_read(&pool->pages_allocated);
1211 }
1212 EXPORT_SYMBOL_GPL(zs_get_total_pages);
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229 void *zs_map_object(struct zs_pool *pool, unsigned long handle,
1230 enum zs_mapmode mm)
1231 {
1232 struct zspage *zspage;
1233 struct page *page;
1234 unsigned long obj, off;
1235 unsigned int obj_idx;
1236
1237 struct size_class *class;
1238 struct mapping_area *area;
1239 struct page *pages[2];
1240 void *ret;
1241
1242
1243
1244
1245
1246
1247 BUG_ON(in_interrupt());
1248
1249
1250 read_lock(&pool->migrate_lock);
1251 obj = handle_to_obj(handle);
1252 obj_to_location(obj, &page, &obj_idx);
1253 zspage = get_zspage(page);
1254
1255
1256
1257
1258
1259
1260
1261 migrate_read_lock(zspage);
1262 read_unlock(&pool->migrate_lock);
1263
1264 class = zspage_class(pool, zspage);
1265 off = (class->size * obj_idx) & ~PAGE_MASK;
1266
1267 local_lock(&zs_map_area.lock);
1268 area = this_cpu_ptr(&zs_map_area);
1269 area->vm_mm = mm;
1270 if (off + class->size <= PAGE_SIZE) {
1271
1272 area->vm_addr = kmap_atomic(page);
1273 ret = area->vm_addr + off;
1274 goto out;
1275 }
1276
1277
1278 pages[0] = page;
1279 pages[1] = get_next_page(page);
1280 BUG_ON(!pages[1]);
1281
1282 ret = __zs_map_object(area, pages, off, class->size);
1283 out:
1284 if (likely(!ZsHugePage(zspage)))
1285 ret += ZS_HANDLE_SIZE;
1286
1287 return ret;
1288 }
1289 EXPORT_SYMBOL_GPL(zs_map_object);
1290
1291 void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
1292 {
1293 struct zspage *zspage;
1294 struct page *page;
1295 unsigned long obj, off;
1296 unsigned int obj_idx;
1297
1298 struct size_class *class;
1299 struct mapping_area *area;
1300
1301 obj = handle_to_obj(handle);
1302 obj_to_location(obj, &page, &obj_idx);
1303 zspage = get_zspage(page);
1304 class = zspage_class(pool, zspage);
1305 off = (class->size * obj_idx) & ~PAGE_MASK;
1306
1307 area = this_cpu_ptr(&zs_map_area);
1308 if (off + class->size <= PAGE_SIZE)
1309 kunmap_atomic(area->vm_addr);
1310 else {
1311 struct page *pages[2];
1312
1313 pages[0] = page;
1314 pages[1] = get_next_page(page);
1315 BUG_ON(!pages[1]);
1316
1317 __zs_unmap_object(area, pages, off, class->size);
1318 }
1319 local_unlock(&zs_map_area.lock);
1320
1321 migrate_read_unlock(zspage);
1322 }
1323 EXPORT_SYMBOL_GPL(zs_unmap_object);
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338 size_t zs_huge_class_size(struct zs_pool *pool)
1339 {
1340 return huge_class_size;
1341 }
1342 EXPORT_SYMBOL_GPL(zs_huge_class_size);
1343
1344 static unsigned long obj_malloc(struct zs_pool *pool,
1345 struct zspage *zspage, unsigned long handle)
1346 {
1347 int i, nr_page, offset;
1348 unsigned long obj;
1349 struct link_free *link;
1350 struct size_class *class;
1351
1352 struct page *m_page;
1353 unsigned long m_offset;
1354 void *vaddr;
1355
1356 class = pool->size_class[zspage->class];
1357 handle |= OBJ_ALLOCATED_TAG;
1358 obj = get_freeobj(zspage);
1359
1360 offset = obj * class->size;
1361 nr_page = offset >> PAGE_SHIFT;
1362 m_offset = offset & ~PAGE_MASK;
1363 m_page = get_first_page(zspage);
1364
1365 for (i = 0; i < nr_page; i++)
1366 m_page = get_next_page(m_page);
1367
1368 vaddr = kmap_atomic(m_page);
1369 link = (struct link_free *)vaddr + m_offset / sizeof(*link);
1370 set_freeobj(zspage, link->next >> OBJ_TAG_BITS);
1371 if (likely(!ZsHugePage(zspage)))
1372
1373 link->handle = handle;
1374 else
1375
1376 zspage->first_page->index = handle;
1377
1378 kunmap_atomic(vaddr);
1379 mod_zspage_inuse(zspage, 1);
1380
1381 obj = location_to_obj(m_page, obj);
1382
1383 return obj;
1384 }
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397 unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)
1398 {
1399 unsigned long handle, obj;
1400 struct size_class *class;
1401 enum fullness_group newfg;
1402 struct zspage *zspage;
1403
1404 if (unlikely(!size || size > ZS_MAX_ALLOC_SIZE))
1405 return (unsigned long)ERR_PTR(-EINVAL);
1406
1407 handle = cache_alloc_handle(pool, gfp);
1408 if (!handle)
1409 return (unsigned long)ERR_PTR(-ENOMEM);
1410
1411
1412 size += ZS_HANDLE_SIZE;
1413 class = pool->size_class[get_size_class_index(size)];
1414
1415
1416 spin_lock(&class->lock);
1417 zspage = find_get_zspage(class);
1418 if (likely(zspage)) {
1419 obj = obj_malloc(pool, zspage, handle);
1420
1421 fix_fullness_group(class, zspage);
1422 record_obj(handle, obj);
1423 class_stat_inc(class, OBJ_USED, 1);
1424 spin_unlock(&class->lock);
1425
1426 return handle;
1427 }
1428
1429 spin_unlock(&class->lock);
1430
1431 zspage = alloc_zspage(pool, class, gfp);
1432 if (!zspage) {
1433 cache_free_handle(pool, handle);
1434 return (unsigned long)ERR_PTR(-ENOMEM);
1435 }
1436
1437 spin_lock(&class->lock);
1438 obj = obj_malloc(pool, zspage, handle);
1439 newfg = get_fullness_group(class, zspage);
1440 insert_zspage(class, zspage, newfg);
1441 set_zspage_mapping(zspage, class->index, newfg);
1442 record_obj(handle, obj);
1443 atomic_long_add(class->pages_per_zspage,
1444 &pool->pages_allocated);
1445 class_stat_inc(class, OBJ_ALLOCATED, class->objs_per_zspage);
1446 class_stat_inc(class, OBJ_USED, 1);
1447
1448
1449 SetZsPageMovable(pool, zspage);
1450 spin_unlock(&class->lock);
1451
1452 return handle;
1453 }
1454 EXPORT_SYMBOL_GPL(zs_malloc);
1455
1456 static void obj_free(int class_size, unsigned long obj)
1457 {
1458 struct link_free *link;
1459 struct zspage *zspage;
1460 struct page *f_page;
1461 unsigned long f_offset;
1462 unsigned int f_objidx;
1463 void *vaddr;
1464
1465 obj_to_location(obj, &f_page, &f_objidx);
1466 f_offset = (class_size * f_objidx) & ~PAGE_MASK;
1467 zspage = get_zspage(f_page);
1468
1469 vaddr = kmap_atomic(f_page);
1470
1471
1472 link = (struct link_free *)(vaddr + f_offset);
1473 if (likely(!ZsHugePage(zspage)))
1474 link->next = get_freeobj(zspage) << OBJ_TAG_BITS;
1475 else
1476 f_page->index = 0;
1477 kunmap_atomic(vaddr);
1478 set_freeobj(zspage, f_objidx);
1479 mod_zspage_inuse(zspage, -1);
1480 }
1481
1482 void zs_free(struct zs_pool *pool, unsigned long handle)
1483 {
1484 struct zspage *zspage;
1485 struct page *f_page;
1486 unsigned long obj;
1487 struct size_class *class;
1488 enum fullness_group fullness;
1489
1490 if (IS_ERR_OR_NULL((void *)handle))
1491 return;
1492
1493
1494
1495
1496
1497 read_lock(&pool->migrate_lock);
1498 obj = handle_to_obj(handle);
1499 obj_to_page(obj, &f_page);
1500 zspage = get_zspage(f_page);
1501 class = zspage_class(pool, zspage);
1502 spin_lock(&class->lock);
1503 read_unlock(&pool->migrate_lock);
1504
1505 obj_free(class->size, obj);
1506 class_stat_dec(class, OBJ_USED, 1);
1507 fullness = fix_fullness_group(class, zspage);
1508 if (fullness != ZS_EMPTY)
1509 goto out;
1510
1511 free_zspage(pool, class, zspage);
1512 out:
1513 spin_unlock(&class->lock);
1514 cache_free_handle(pool, handle);
1515 }
1516 EXPORT_SYMBOL_GPL(zs_free);
1517
1518 static void zs_object_copy(struct size_class *class, unsigned long dst,
1519 unsigned long src)
1520 {
1521 struct page *s_page, *d_page;
1522 unsigned int s_objidx, d_objidx;
1523 unsigned long s_off, d_off;
1524 void *s_addr, *d_addr;
1525 int s_size, d_size, size;
1526 int written = 0;
1527
1528 s_size = d_size = class->size;
1529
1530 obj_to_location(src, &s_page, &s_objidx);
1531 obj_to_location(dst, &d_page, &d_objidx);
1532
1533 s_off = (class->size * s_objidx) & ~PAGE_MASK;
1534 d_off = (class->size * d_objidx) & ~PAGE_MASK;
1535
1536 if (s_off + class->size > PAGE_SIZE)
1537 s_size = PAGE_SIZE - s_off;
1538
1539 if (d_off + class->size > PAGE_SIZE)
1540 d_size = PAGE_SIZE - d_off;
1541
1542 s_addr = kmap_atomic(s_page);
1543 d_addr = kmap_atomic(d_page);
1544
1545 while (1) {
1546 size = min(s_size, d_size);
1547 memcpy(d_addr + d_off, s_addr + s_off, size);
1548 written += size;
1549
1550 if (written == class->size)
1551 break;
1552
1553 s_off += size;
1554 s_size -= size;
1555 d_off += size;
1556 d_size -= size;
1557
1558 if (s_off >= PAGE_SIZE) {
1559 kunmap_atomic(d_addr);
1560 kunmap_atomic(s_addr);
1561 s_page = get_next_page(s_page);
1562 s_addr = kmap_atomic(s_page);
1563 d_addr = kmap_atomic(d_page);
1564 s_size = class->size - written;
1565 s_off = 0;
1566 }
1567
1568 if (d_off >= PAGE_SIZE) {
1569 kunmap_atomic(d_addr);
1570 d_page = get_next_page(d_page);
1571 d_addr = kmap_atomic(d_page);
1572 d_size = class->size - written;
1573 d_off = 0;
1574 }
1575 }
1576
1577 kunmap_atomic(d_addr);
1578 kunmap_atomic(s_addr);
1579 }
1580
1581
1582
1583
1584
1585 static unsigned long find_alloced_obj(struct size_class *class,
1586 struct page *page, int *obj_idx)
1587 {
1588 int offset = 0;
1589 int index = *obj_idx;
1590 unsigned long handle = 0;
1591 void *addr = kmap_atomic(page);
1592
1593 offset = get_first_obj_offset(page);
1594 offset += class->size * index;
1595
1596 while (offset < PAGE_SIZE) {
1597 if (obj_allocated(page, addr + offset, &handle))
1598 break;
1599
1600 offset += class->size;
1601 index++;
1602 }
1603
1604 kunmap_atomic(addr);
1605
1606 *obj_idx = index;
1607
1608 return handle;
1609 }
1610
1611 struct zs_compact_control {
1612
1613 struct page *s_page;
1614
1615
1616 struct page *d_page;
1617
1618
1619 int obj_idx;
1620 };
1621
1622 static int migrate_zspage(struct zs_pool *pool, struct size_class *class,
1623 struct zs_compact_control *cc)
1624 {
1625 unsigned long used_obj, free_obj;
1626 unsigned long handle;
1627 struct page *s_page = cc->s_page;
1628 struct page *d_page = cc->d_page;
1629 int obj_idx = cc->obj_idx;
1630 int ret = 0;
1631
1632 while (1) {
1633 handle = find_alloced_obj(class, s_page, &obj_idx);
1634 if (!handle) {
1635 s_page = get_next_page(s_page);
1636 if (!s_page)
1637 break;
1638 obj_idx = 0;
1639 continue;
1640 }
1641
1642
1643 if (zspage_full(class, get_zspage(d_page))) {
1644 ret = -ENOMEM;
1645 break;
1646 }
1647
1648 used_obj = handle_to_obj(handle);
1649 free_obj = obj_malloc(pool, get_zspage(d_page), handle);
1650 zs_object_copy(class, free_obj, used_obj);
1651 obj_idx++;
1652 record_obj(handle, free_obj);
1653 obj_free(class->size, used_obj);
1654 }
1655
1656
1657 cc->s_page = s_page;
1658 cc->obj_idx = obj_idx;
1659
1660 return ret;
1661 }
1662
1663 static struct zspage *isolate_zspage(struct size_class *class, bool source)
1664 {
1665 int i;
1666 struct zspage *zspage;
1667 enum fullness_group fg[2] = {ZS_ALMOST_EMPTY, ZS_ALMOST_FULL};
1668
1669 if (!source) {
1670 fg[0] = ZS_ALMOST_FULL;
1671 fg[1] = ZS_ALMOST_EMPTY;
1672 }
1673
1674 for (i = 0; i < 2; i++) {
1675 zspage = list_first_entry_or_null(&class->fullness_list[fg[i]],
1676 struct zspage, list);
1677 if (zspage) {
1678 remove_zspage(class, zspage, fg[i]);
1679 return zspage;
1680 }
1681 }
1682
1683 return zspage;
1684 }
1685
1686
1687
1688
1689
1690
1691
1692
1693 static enum fullness_group putback_zspage(struct size_class *class,
1694 struct zspage *zspage)
1695 {
1696 enum fullness_group fullness;
1697
1698 fullness = get_fullness_group(class, zspage);
1699 insert_zspage(class, zspage, fullness);
1700 set_zspage_mapping(zspage, class->index, fullness);
1701
1702 return fullness;
1703 }
1704
1705 #ifdef CONFIG_COMPACTION
1706
1707
1708
1709
1710 static void lock_zspage(struct zspage *zspage)
1711 {
1712 struct page *curr_page, *page;
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722 while (1) {
1723 migrate_read_lock(zspage);
1724 page = get_first_page(zspage);
1725 if (trylock_page(page))
1726 break;
1727 get_page(page);
1728 migrate_read_unlock(zspage);
1729 wait_on_page_locked(page);
1730 put_page(page);
1731 }
1732
1733 curr_page = page;
1734 while ((page = get_next_page(curr_page))) {
1735 if (trylock_page(page)) {
1736 curr_page = page;
1737 } else {
1738 get_page(page);
1739 migrate_read_unlock(zspage);
1740 wait_on_page_locked(page);
1741 put_page(page);
1742 migrate_read_lock(zspage);
1743 }
1744 }
1745 migrate_read_unlock(zspage);
1746 }
1747
1748 static void migrate_lock_init(struct zspage *zspage)
1749 {
1750 rwlock_init(&zspage->lock);
1751 }
1752
1753 static void migrate_read_lock(struct zspage *zspage) __acquires(&zspage->lock)
1754 {
1755 read_lock(&zspage->lock);
1756 }
1757
1758 static void migrate_read_unlock(struct zspage *zspage) __releases(&zspage->lock)
1759 {
1760 read_unlock(&zspage->lock);
1761 }
1762
1763 static void migrate_write_lock(struct zspage *zspage)
1764 {
1765 write_lock(&zspage->lock);
1766 }
1767
1768 static void migrate_write_lock_nested(struct zspage *zspage)
1769 {
1770 write_lock_nested(&zspage->lock, SINGLE_DEPTH_NESTING);
1771 }
1772
1773 static void migrate_write_unlock(struct zspage *zspage)
1774 {
1775 write_unlock(&zspage->lock);
1776 }
1777
1778
1779 static void inc_zspage_isolation(struct zspage *zspage)
1780 {
1781 zspage->isolated++;
1782 }
1783
1784 static void dec_zspage_isolation(struct zspage *zspage)
1785 {
1786 VM_BUG_ON(zspage->isolated == 0);
1787 zspage->isolated--;
1788 }
1789
1790 static const struct movable_operations zsmalloc_mops;
1791
1792 static void replace_sub_page(struct size_class *class, struct zspage *zspage,
1793 struct page *newpage, struct page *oldpage)
1794 {
1795 struct page *page;
1796 struct page *pages[ZS_MAX_PAGES_PER_ZSPAGE] = {NULL, };
1797 int idx = 0;
1798
1799 page = get_first_page(zspage);
1800 do {
1801 if (page == oldpage)
1802 pages[idx] = newpage;
1803 else
1804 pages[idx] = page;
1805 idx++;
1806 } while ((page = get_next_page(page)) != NULL);
1807
1808 create_page_chain(class, zspage, pages);
1809 set_first_obj_offset(newpage, get_first_obj_offset(oldpage));
1810 if (unlikely(ZsHugePage(zspage)))
1811 newpage->index = oldpage->index;
1812 __SetPageMovable(newpage, &zsmalloc_mops);
1813 }
1814
1815 static bool zs_page_isolate(struct page *page, isolate_mode_t mode)
1816 {
1817 struct zspage *zspage;
1818
1819
1820
1821
1822
1823 VM_BUG_ON_PAGE(!PageMovable(page), page);
1824 VM_BUG_ON_PAGE(PageIsolated(page), page);
1825
1826 zspage = get_zspage(page);
1827 migrate_write_lock(zspage);
1828 inc_zspage_isolation(zspage);
1829 migrate_write_unlock(zspage);
1830
1831 return true;
1832 }
1833
1834 static int zs_page_migrate(struct page *newpage, struct page *page,
1835 enum migrate_mode mode)
1836 {
1837 struct zs_pool *pool;
1838 struct size_class *class;
1839 struct zspage *zspage;
1840 struct page *dummy;
1841 void *s_addr, *d_addr, *addr;
1842 int offset;
1843 unsigned long handle;
1844 unsigned long old_obj, new_obj;
1845 unsigned int obj_idx;
1846
1847
1848
1849
1850
1851
1852 if (mode == MIGRATE_SYNC_NO_COPY)
1853 return -EINVAL;
1854
1855 VM_BUG_ON_PAGE(!PageMovable(page), page);
1856 VM_BUG_ON_PAGE(!PageIsolated(page), page);
1857
1858
1859 zspage = get_zspage(page);
1860 pool = zspage->pool;
1861
1862
1863
1864
1865
1866 write_lock(&pool->migrate_lock);
1867 class = zspage_class(pool, zspage);
1868
1869
1870
1871
1872 spin_lock(&class->lock);
1873
1874 migrate_write_lock(zspage);
1875
1876 offset = get_first_obj_offset(page);
1877 s_addr = kmap_atomic(page);
1878
1879
1880
1881
1882 d_addr = kmap_atomic(newpage);
1883 memcpy(d_addr, s_addr, PAGE_SIZE);
1884 kunmap_atomic(d_addr);
1885
1886 for (addr = s_addr + offset; addr < s_addr + PAGE_SIZE;
1887 addr += class->size) {
1888 if (obj_allocated(page, addr, &handle)) {
1889
1890 old_obj = handle_to_obj(handle);
1891 obj_to_location(old_obj, &dummy, &obj_idx);
1892 new_obj = (unsigned long)location_to_obj(newpage,
1893 obj_idx);
1894 record_obj(handle, new_obj);
1895 }
1896 }
1897 kunmap_atomic(s_addr);
1898
1899 replace_sub_page(class, zspage, newpage, page);
1900
1901
1902
1903
1904 write_unlock(&pool->migrate_lock);
1905 spin_unlock(&class->lock);
1906 dec_zspage_isolation(zspage);
1907 migrate_write_unlock(zspage);
1908
1909 get_page(newpage);
1910 if (page_zone(newpage) != page_zone(page)) {
1911 dec_zone_page_state(page, NR_ZSPAGES);
1912 inc_zone_page_state(newpage, NR_ZSPAGES);
1913 }
1914
1915 reset_page(page);
1916 put_page(page);
1917
1918 return MIGRATEPAGE_SUCCESS;
1919 }
1920
1921 static void zs_page_putback(struct page *page)
1922 {
1923 struct zspage *zspage;
1924
1925 VM_BUG_ON_PAGE(!PageMovable(page), page);
1926 VM_BUG_ON_PAGE(!PageIsolated(page), page);
1927
1928 zspage = get_zspage(page);
1929 migrate_write_lock(zspage);
1930 dec_zspage_isolation(zspage);
1931 migrate_write_unlock(zspage);
1932 }
1933
1934 static const struct movable_operations zsmalloc_mops = {
1935 .isolate_page = zs_page_isolate,
1936 .migrate_page = zs_page_migrate,
1937 .putback_page = zs_page_putback,
1938 };
1939
1940
1941
1942
1943
1944 static void async_free_zspage(struct work_struct *work)
1945 {
1946 int i;
1947 struct size_class *class;
1948 unsigned int class_idx;
1949 enum fullness_group fullness;
1950 struct zspage *zspage, *tmp;
1951 LIST_HEAD(free_pages);
1952 struct zs_pool *pool = container_of(work, struct zs_pool,
1953 free_work);
1954
1955 for (i = 0; i < ZS_SIZE_CLASSES; i++) {
1956 class = pool->size_class[i];
1957 if (class->index != i)
1958 continue;
1959
1960 spin_lock(&class->lock);
1961 list_splice_init(&class->fullness_list[ZS_EMPTY], &free_pages);
1962 spin_unlock(&class->lock);
1963 }
1964
1965 list_for_each_entry_safe(zspage, tmp, &free_pages, list) {
1966 list_del(&zspage->list);
1967 lock_zspage(zspage);
1968
1969 get_zspage_mapping(zspage, &class_idx, &fullness);
1970 VM_BUG_ON(fullness != ZS_EMPTY);
1971 class = pool->size_class[class_idx];
1972 spin_lock(&class->lock);
1973 __free_zspage(pool, class, zspage);
1974 spin_unlock(&class->lock);
1975 }
1976 };
1977
1978 static void kick_deferred_free(struct zs_pool *pool)
1979 {
1980 schedule_work(&pool->free_work);
1981 }
1982
1983 static void zs_flush_migration(struct zs_pool *pool)
1984 {
1985 flush_work(&pool->free_work);
1986 }
1987
1988 static void init_deferred_free(struct zs_pool *pool)
1989 {
1990 INIT_WORK(&pool->free_work, async_free_zspage);
1991 }
1992
1993 static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage)
1994 {
1995 struct page *page = get_first_page(zspage);
1996
1997 do {
1998 WARN_ON(!trylock_page(page));
1999 __SetPageMovable(page, &zsmalloc_mops);
2000 unlock_page(page);
2001 } while ((page = get_next_page(page)) != NULL);
2002 }
2003 #else
2004 static inline void zs_flush_migration(struct zs_pool *pool) { }
2005 #endif
2006
2007
2008
2009
2010
2011
2012 static unsigned long zs_can_compact(struct size_class *class)
2013 {
2014 unsigned long obj_wasted;
2015 unsigned long obj_allocated = zs_stat_get(class, OBJ_ALLOCATED);
2016 unsigned long obj_used = zs_stat_get(class, OBJ_USED);
2017
2018 if (obj_allocated <= obj_used)
2019 return 0;
2020
2021 obj_wasted = obj_allocated - obj_used;
2022 obj_wasted /= class->objs_per_zspage;
2023
2024 return obj_wasted * class->pages_per_zspage;
2025 }
2026
2027 static unsigned long __zs_compact(struct zs_pool *pool,
2028 struct size_class *class)
2029 {
2030 struct zs_compact_control cc;
2031 struct zspage *src_zspage;
2032 struct zspage *dst_zspage = NULL;
2033 unsigned long pages_freed = 0;
2034
2035
2036 write_lock(&pool->migrate_lock);
2037
2038 spin_lock(&class->lock);
2039 while ((src_zspage = isolate_zspage(class, true))) {
2040
2041 migrate_write_lock(src_zspage);
2042
2043 if (!zs_can_compact(class))
2044 break;
2045
2046 cc.obj_idx = 0;
2047 cc.s_page = get_first_page(src_zspage);
2048
2049 while ((dst_zspage = isolate_zspage(class, false))) {
2050 migrate_write_lock_nested(dst_zspage);
2051
2052 cc.d_page = get_first_page(dst_zspage);
2053
2054
2055
2056
2057 if (!migrate_zspage(pool, class, &cc))
2058 break;
2059
2060 putback_zspage(class, dst_zspage);
2061 migrate_write_unlock(dst_zspage);
2062 dst_zspage = NULL;
2063 if (rwlock_is_contended(&pool->migrate_lock))
2064 break;
2065 }
2066
2067
2068 if (dst_zspage == NULL)
2069 break;
2070
2071 putback_zspage(class, dst_zspage);
2072 migrate_write_unlock(dst_zspage);
2073
2074 if (putback_zspage(class, src_zspage) == ZS_EMPTY) {
2075 migrate_write_unlock(src_zspage);
2076 free_zspage(pool, class, src_zspage);
2077 pages_freed += class->pages_per_zspage;
2078 } else
2079 migrate_write_unlock(src_zspage);
2080 spin_unlock(&class->lock);
2081 write_unlock(&pool->migrate_lock);
2082 cond_resched();
2083 write_lock(&pool->migrate_lock);
2084 spin_lock(&class->lock);
2085 }
2086
2087 if (src_zspage) {
2088 putback_zspage(class, src_zspage);
2089 migrate_write_unlock(src_zspage);
2090 }
2091
2092 spin_unlock(&class->lock);
2093 write_unlock(&pool->migrate_lock);
2094
2095 return pages_freed;
2096 }
2097
2098 unsigned long zs_compact(struct zs_pool *pool)
2099 {
2100 int i;
2101 struct size_class *class;
2102 unsigned long pages_freed = 0;
2103
2104 for (i = ZS_SIZE_CLASSES - 1; i >= 0; i--) {
2105 class = pool->size_class[i];
2106 if (!class)
2107 continue;
2108 if (class->index != i)
2109 continue;
2110 pages_freed += __zs_compact(pool, class);
2111 }
2112 atomic_long_add(pages_freed, &pool->stats.pages_compacted);
2113
2114 return pages_freed;
2115 }
2116 EXPORT_SYMBOL_GPL(zs_compact);
2117
2118 void zs_pool_stats(struct zs_pool *pool, struct zs_pool_stats *stats)
2119 {
2120 memcpy(stats, &pool->stats, sizeof(struct zs_pool_stats));
2121 }
2122 EXPORT_SYMBOL_GPL(zs_pool_stats);
2123
2124 static unsigned long zs_shrinker_scan(struct shrinker *shrinker,
2125 struct shrink_control *sc)
2126 {
2127 unsigned long pages_freed;
2128 struct zs_pool *pool = container_of(shrinker, struct zs_pool,
2129 shrinker);
2130
2131
2132
2133
2134
2135
2136 pages_freed = zs_compact(pool);
2137
2138 return pages_freed ? pages_freed : SHRINK_STOP;
2139 }
2140
2141 static unsigned long zs_shrinker_count(struct shrinker *shrinker,
2142 struct shrink_control *sc)
2143 {
2144 int i;
2145 struct size_class *class;
2146 unsigned long pages_to_free = 0;
2147 struct zs_pool *pool = container_of(shrinker, struct zs_pool,
2148 shrinker);
2149
2150 for (i = ZS_SIZE_CLASSES - 1; i >= 0; i--) {
2151 class = pool->size_class[i];
2152 if (!class)
2153 continue;
2154 if (class->index != i)
2155 continue;
2156
2157 pages_to_free += zs_can_compact(class);
2158 }
2159
2160 return pages_to_free;
2161 }
2162
2163 static void zs_unregister_shrinker(struct zs_pool *pool)
2164 {
2165 unregister_shrinker(&pool->shrinker);
2166 }
2167
2168 static int zs_register_shrinker(struct zs_pool *pool)
2169 {
2170 pool->shrinker.scan_objects = zs_shrinker_scan;
2171 pool->shrinker.count_objects = zs_shrinker_count;
2172 pool->shrinker.batch = 0;
2173 pool->shrinker.seeks = DEFAULT_SEEKS;
2174
2175 return register_shrinker(&pool->shrinker, "mm-zspool:%s",
2176 pool->name);
2177 }
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189 struct zs_pool *zs_create_pool(const char *name)
2190 {
2191 int i;
2192 struct zs_pool *pool;
2193 struct size_class *prev_class = NULL;
2194
2195 pool = kzalloc(sizeof(*pool), GFP_KERNEL);
2196 if (!pool)
2197 return NULL;
2198
2199 init_deferred_free(pool);
2200 rwlock_init(&pool->migrate_lock);
2201
2202 pool->name = kstrdup(name, GFP_KERNEL);
2203 if (!pool->name)
2204 goto err;
2205
2206 if (create_cache(pool))
2207 goto err;
2208
2209
2210
2211
2212
2213 for (i = ZS_SIZE_CLASSES - 1; i >= 0; i--) {
2214 int size;
2215 int pages_per_zspage;
2216 int objs_per_zspage;
2217 struct size_class *class;
2218 int fullness = 0;
2219
2220 size = ZS_MIN_ALLOC_SIZE + i * ZS_SIZE_CLASS_DELTA;
2221 if (size > ZS_MAX_ALLOC_SIZE)
2222 size = ZS_MAX_ALLOC_SIZE;
2223 pages_per_zspage = get_pages_per_zspage(size);
2224 objs_per_zspage = pages_per_zspage * PAGE_SIZE / size;
2225
2226
2227
2228
2229
2230
2231
2232 if (pages_per_zspage != 1 && objs_per_zspage != 1 &&
2233 !huge_class_size) {
2234 huge_class_size = size;
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244 huge_class_size -= (ZS_HANDLE_SIZE - 1);
2245 }
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256 if (prev_class) {
2257 if (can_merge(prev_class, pages_per_zspage, objs_per_zspage)) {
2258 pool->size_class[i] = prev_class;
2259 continue;
2260 }
2261 }
2262
2263 class = kzalloc(sizeof(struct size_class), GFP_KERNEL);
2264 if (!class)
2265 goto err;
2266
2267 class->size = size;
2268 class->index = i;
2269 class->pages_per_zspage = pages_per_zspage;
2270 class->objs_per_zspage = objs_per_zspage;
2271 spin_lock_init(&class->lock);
2272 pool->size_class[i] = class;
2273 for (fullness = ZS_EMPTY; fullness < NR_ZS_FULLNESS;
2274 fullness++)
2275 INIT_LIST_HEAD(&class->fullness_list[fullness]);
2276
2277 prev_class = class;
2278 }
2279
2280
2281 zs_pool_stat_create(pool, name);
2282
2283
2284
2285
2286
2287
2288
2289 zs_register_shrinker(pool);
2290
2291 return pool;
2292
2293 err:
2294 zs_destroy_pool(pool);
2295 return NULL;
2296 }
2297 EXPORT_SYMBOL_GPL(zs_create_pool);
2298
2299 void zs_destroy_pool(struct zs_pool *pool)
2300 {
2301 int i;
2302
2303 zs_unregister_shrinker(pool);
2304 zs_flush_migration(pool);
2305 zs_pool_stat_destroy(pool);
2306
2307 for (i = 0; i < ZS_SIZE_CLASSES; i++) {
2308 int fg;
2309 struct size_class *class = pool->size_class[i];
2310
2311 if (!class)
2312 continue;
2313
2314 if (class->index != i)
2315 continue;
2316
2317 for (fg = ZS_EMPTY; fg < NR_ZS_FULLNESS; fg++) {
2318 if (!list_empty(&class->fullness_list[fg])) {
2319 pr_info("Freeing non-empty class with size %db, fullness group %d\n",
2320 class->size, fg);
2321 }
2322 }
2323 kfree(class);
2324 }
2325
2326 destroy_cache(pool);
2327 kfree(pool->name);
2328 kfree(pool);
2329 }
2330 EXPORT_SYMBOL_GPL(zs_destroy_pool);
2331
2332 static int __init zs_init(void)
2333 {
2334 int ret;
2335
2336 ret = cpuhp_setup_state(CPUHP_MM_ZS_PREPARE, "mm/zsmalloc:prepare",
2337 zs_cpu_prepare, zs_cpu_dead);
2338 if (ret)
2339 goto out;
2340
2341 #ifdef CONFIG_ZPOOL
2342 zpool_register_driver(&zs_zpool_driver);
2343 #endif
2344
2345 zs_stat_init();
2346
2347 return 0;
2348
2349 out:
2350 return ret;
2351 }
2352
2353 static void __exit zs_exit(void)
2354 {
2355 #ifdef CONFIG_ZPOOL
2356 zpool_unregister_driver(&zs_zpool_driver);
2357 #endif
2358 cpuhp_remove_state(CPUHP_MM_ZS_PREPARE);
2359
2360 zs_stat_exit();
2361 }
2362
2363 module_init(zs_init);
2364 module_exit(zs_exit);
2365
2366 MODULE_LICENSE("Dual BSD/GPL");
2367 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");