0001
0002 #ifndef _LINUX_SWAP_H
0003 #define _LINUX_SWAP_H
0004
0005 #include <linux/spinlock.h>
0006 #include <linux/linkage.h>
0007 #include <linux/mmzone.h>
0008 #include <linux/list.h>
0009 #include <linux/memcontrol.h>
0010 #include <linux/sched.h>
0011 #include <linux/node.h>
0012 #include <linux/fs.h>
0013 #include <linux/pagemap.h>
0014 #include <linux/atomic.h>
0015 #include <linux/page-flags.h>
0016 #include <uapi/linux/mempolicy.h>
0017 #include <asm/page.h>
0018
0019 struct notifier_block;
0020
0021 struct bio;
0022
0023 struct pagevec;
0024
0025 #define SWAP_FLAG_PREFER 0x8000
0026 #define SWAP_FLAG_PRIO_MASK 0x7fff
0027 #define SWAP_FLAG_PRIO_SHIFT 0
0028 #define SWAP_FLAG_DISCARD 0x10000
0029 #define SWAP_FLAG_DISCARD_ONCE 0x20000
0030 #define SWAP_FLAG_DISCARD_PAGES 0x40000
0031
0032 #define SWAP_FLAGS_VALID (SWAP_FLAG_PRIO_MASK | SWAP_FLAG_PREFER | \
0033 SWAP_FLAG_DISCARD | SWAP_FLAG_DISCARD_ONCE | \
0034 SWAP_FLAG_DISCARD_PAGES)
0035 #define SWAP_BATCH 64
0036
0037 static inline int current_is_kswapd(void)
0038 {
0039 return current->flags & PF_KSWAPD;
0040 }
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050 #define MAX_SWAPFILES_SHIFT 5
0051
0052
0053
0054
0055
0056
0057
0058 #define SWP_SWAPIN_ERROR_NUM 1
0059 #define SWP_SWAPIN_ERROR (MAX_SWAPFILES + SWP_HWPOISON_NUM + \
0060 SWP_MIGRATION_NUM + SWP_DEVICE_NUM + \
0061 SWP_PTE_MARKER_NUM)
0062
0063
0064
0065
0066
0067 #ifdef CONFIG_PTE_MARKER
0068 #define SWP_PTE_MARKER_NUM 1
0069 #define SWP_PTE_MARKER (MAX_SWAPFILES + SWP_HWPOISON_NUM + \
0070 SWP_MIGRATION_NUM + SWP_DEVICE_NUM)
0071 #else
0072 #define SWP_PTE_MARKER_NUM 0
0073 #endif
0074
0075
0076
0077
0078
0079
0080
0081
0082
0083
0084
0085
0086
0087 #ifdef CONFIG_DEVICE_PRIVATE
0088 #define SWP_DEVICE_NUM 4
0089 #define SWP_DEVICE_WRITE (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM)
0090 #define SWP_DEVICE_READ (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+1)
0091 #define SWP_DEVICE_EXCLUSIVE_WRITE (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+2)
0092 #define SWP_DEVICE_EXCLUSIVE_READ (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+3)
0093 #else
0094 #define SWP_DEVICE_NUM 0
0095 #endif
0096
0097
0098
0099
0100
0101
0102
0103
0104
0105
0106 #ifdef CONFIG_MIGRATION
0107 #define SWP_MIGRATION_NUM 3
0108 #define SWP_MIGRATION_READ (MAX_SWAPFILES + SWP_HWPOISON_NUM)
0109 #define SWP_MIGRATION_READ_EXCLUSIVE (MAX_SWAPFILES + SWP_HWPOISON_NUM + 1)
0110 #define SWP_MIGRATION_WRITE (MAX_SWAPFILES + SWP_HWPOISON_NUM + 2)
0111 #else
0112 #define SWP_MIGRATION_NUM 0
0113 #endif
0114
0115
0116
0117
0118 #ifdef CONFIG_MEMORY_FAILURE
0119 #define SWP_HWPOISON_NUM 1
0120 #define SWP_HWPOISON MAX_SWAPFILES
0121 #else
0122 #define SWP_HWPOISON_NUM 0
0123 #endif
0124
0125 #define MAX_SWAPFILES \
0126 ((1 << MAX_SWAPFILES_SHIFT) - SWP_DEVICE_NUM - \
0127 SWP_MIGRATION_NUM - SWP_HWPOISON_NUM - \
0128 SWP_PTE_MARKER_NUM - SWP_SWAPIN_ERROR_NUM)
0129
0130
0131
0132
0133
0134
0135
0136
0137
0138
0139
0140
0141
0142 union swap_header {
0143 struct {
0144 char reserved[PAGE_SIZE - 10];
0145 char magic[10];
0146 } magic;
0147 struct {
0148 char bootbits[1024];
0149 __u32 version;
0150 __u32 last_page;
0151 __u32 nr_badpages;
0152 unsigned char sws_uuid[16];
0153 unsigned char sws_volume[16];
0154 __u32 padding[117];
0155 __u32 badpages[1];
0156 } info;
0157 };
0158
0159
0160
0161
0162
0163 struct reclaim_state {
0164 unsigned long reclaimed_slab;
0165 };
0166
0167 #ifdef __KERNEL__
0168
0169 struct address_space;
0170 struct sysinfo;
0171 struct writeback_control;
0172 struct zone;
0173
0174
0175
0176
0177
0178
0179
0180
0181
0182 struct swap_extent {
0183 struct rb_node rb_node;
0184 pgoff_t start_page;
0185 pgoff_t nr_pages;
0186 sector_t start_block;
0187 };
0188
0189
0190
0191
0192 #define MAX_SWAP_BADPAGES \
0193 ((offsetof(union swap_header, magic.magic) - \
0194 offsetof(union swap_header, info.badpages)) / sizeof(int))
0195
0196 enum {
0197 SWP_USED = (1 << 0),
0198 SWP_WRITEOK = (1 << 1),
0199 SWP_DISCARDABLE = (1 << 2),
0200 SWP_DISCARDING = (1 << 3),
0201 SWP_SOLIDSTATE = (1 << 4),
0202 SWP_CONTINUED = (1 << 5),
0203 SWP_BLKDEV = (1 << 6),
0204 SWP_ACTIVATED = (1 << 7),
0205 SWP_FS_OPS = (1 << 8),
0206 SWP_AREA_DISCARD = (1 << 9),
0207 SWP_PAGE_DISCARD = (1 << 10),
0208 SWP_STABLE_WRITES = (1 << 11),
0209 SWP_SYNCHRONOUS_IO = (1 << 12),
0210
0211 SWP_SCANNING = (1 << 14),
0212 };
0213
0214 #define SWAP_CLUSTER_MAX 32UL
0215 #define COMPACT_CLUSTER_MAX SWAP_CLUSTER_MAX
0216
0217
0218 #define SWAP_HAS_CACHE 0x40
0219 #define COUNT_CONTINUED 0x80
0220
0221
0222 #define SWAP_MAP_MAX 0x3e
0223 #define SWAP_MAP_BAD 0x3f
0224 #define SWAP_MAP_SHMEM 0xbf
0225
0226
0227 #define SWAP_CONT_MAX 0x7f
0228
0229
0230
0231
0232
0233
0234
0235
0236
0237
0238
0239 struct swap_cluster_info {
0240 spinlock_t lock;
0241
0242
0243
0244
0245
0246 unsigned int data:24;
0247 unsigned int flags:8;
0248 };
0249 #define CLUSTER_FLAG_FREE 1
0250 #define CLUSTER_FLAG_NEXT_NULL 2
0251 #define CLUSTER_FLAG_HUGE 4
0252
0253
0254
0255
0256
0257
0258 struct percpu_cluster {
0259 struct swap_cluster_info index;
0260 unsigned int next;
0261 };
0262
0263 struct swap_cluster_list {
0264 struct swap_cluster_info head;
0265 struct swap_cluster_info tail;
0266 };
0267
0268
0269
0270
0271 struct swap_info_struct {
0272 struct percpu_ref users;
0273 unsigned long flags;
0274 signed short prio;
0275 struct plist_node list;
0276 signed char type;
0277 unsigned int max;
0278 unsigned char *swap_map;
0279 struct swap_cluster_info *cluster_info;
0280 struct swap_cluster_list free_clusters;
0281 unsigned int lowest_bit;
0282 unsigned int highest_bit;
0283 unsigned int pages;
0284 unsigned int inuse_pages;
0285 unsigned int cluster_next;
0286 unsigned int cluster_nr;
0287 unsigned int __percpu *cluster_next_cpu;
0288 struct percpu_cluster __percpu *percpu_cluster;
0289 struct rb_root swap_extent_root;
0290 struct block_device *bdev;
0291 struct file *swap_file;
0292 unsigned int old_block_size;
0293 struct completion comp;
0294 #ifdef CONFIG_FRONTSWAP
0295 unsigned long *frontswap_map;
0296 atomic_t frontswap_pages;
0297 #endif
0298 spinlock_t lock;
0299
0300
0301
0302
0303
0304
0305
0306
0307
0308
0309
0310
0311 spinlock_t cont_lock;
0312
0313
0314
0315 struct work_struct discard_work;
0316 struct swap_cluster_list discard_clusters;
0317 struct plist_node avail_lists[];
0318
0319
0320
0321
0322
0323
0324
0325
0326
0327 };
0328
0329 #ifdef CONFIG_64BIT
0330 #define SWAP_RA_ORDER_CEILING 5
0331 #else
0332
0333 #define SWAP_RA_ORDER_CEILING 3
0334 #define SWAP_RA_PTE_CACHE_SIZE (1 << SWAP_RA_ORDER_CEILING)
0335 #endif
0336
0337 struct vma_swap_readahead {
0338 unsigned short win;
0339 unsigned short offset;
0340 unsigned short nr_pte;
0341 #ifdef CONFIG_64BIT
0342 pte_t *ptes;
0343 #else
0344 pte_t ptes[SWAP_RA_PTE_CACHE_SIZE];
0345 #endif
0346 };
0347
0348 static inline swp_entry_t folio_swap_entry(struct folio *folio)
0349 {
0350 swp_entry_t entry = { .val = page_private(&folio->page) };
0351 return entry;
0352 }
0353
0354
0355 void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages);
0356 void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg);
0357 void workingset_refault(struct folio *folio, void *shadow);
0358 void workingset_activation(struct folio *folio);
0359
0360
0361 void workingset_update_node(struct xa_node *node);
0362 extern struct list_lru shadow_nodes;
0363 #define mapping_set_update(xas, mapping) do { \
0364 if (!dax_mapping(mapping) && !shmem_mapping(mapping)) { \
0365 xas_set_update(xas, workingset_update_node); \
0366 xas_set_lru(xas, &shadow_nodes); \
0367 } \
0368 } while (0)
0369
0370
0371 extern unsigned long totalreserve_pages;
0372
0373
0374 #define nr_free_pages() global_zone_page_state(NR_FREE_PAGES)
0375
0376
0377
0378 extern void lru_note_cost(struct lruvec *lruvec, bool file,
0379 unsigned int nr_pages);
0380 extern void lru_note_cost_folio(struct folio *);
0381 extern void folio_add_lru(struct folio *);
0382 extern void lru_cache_add(struct page *);
0383 void mark_page_accessed(struct page *);
0384 void folio_mark_accessed(struct folio *);
0385
0386 extern atomic_t lru_disable_count;
0387
0388 static inline bool lru_cache_disabled(void)
0389 {
0390 return atomic_read(&lru_disable_count);
0391 }
0392
0393 static inline void lru_cache_enable(void)
0394 {
0395 atomic_dec(&lru_disable_count);
0396 }
0397
0398 extern void lru_cache_disable(void);
0399 extern void lru_add_drain(void);
0400 extern void lru_add_drain_cpu(int cpu);
0401 extern void lru_add_drain_cpu_zone(struct zone *zone);
0402 extern void lru_add_drain_all(void);
0403 extern void deactivate_page(struct page *page);
0404 extern void mark_page_lazyfree(struct page *page);
0405 extern void swap_setup(void);
0406
0407 extern void lru_cache_add_inactive_or_unevictable(struct page *page,
0408 struct vm_area_struct *vma);
0409
0410
0411 extern unsigned long zone_reclaimable_pages(struct zone *zone);
0412 extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
0413 gfp_t gfp_mask, nodemask_t *mask);
0414
0415 #define MEMCG_RECLAIM_MAY_SWAP (1 << 1)
0416 #define MEMCG_RECLAIM_PROACTIVE (1 << 2)
0417 extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
0418 unsigned long nr_pages,
0419 gfp_t gfp_mask,
0420 unsigned int reclaim_options);
0421 extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem,
0422 gfp_t gfp_mask, bool noswap,
0423 pg_data_t *pgdat,
0424 unsigned long *nr_scanned);
0425 extern unsigned long shrink_all_memory(unsigned long nr_pages);
0426 extern int vm_swappiness;
0427 long remove_mapping(struct address_space *mapping, struct folio *folio);
0428
0429 extern unsigned long reclaim_pages(struct list_head *page_list);
0430 #ifdef CONFIG_NUMA
0431 extern int node_reclaim_mode;
0432 extern int sysctl_min_unmapped_ratio;
0433 extern int sysctl_min_slab_ratio;
0434 #else
0435 #define node_reclaim_mode 0
0436 #endif
0437
0438 static inline bool node_reclaim_enabled(void)
0439 {
0440
0441 return node_reclaim_mode & (RECLAIM_ZONE|RECLAIM_WRITE|RECLAIM_UNMAP);
0442 }
0443
0444 void check_move_unevictable_folios(struct folio_batch *fbatch);
0445 void check_move_unevictable_pages(struct pagevec *pvec);
0446
0447 extern void kswapd_run(int nid);
0448 extern void kswapd_stop(int nid);
0449
0450 #ifdef CONFIG_SWAP
0451
0452 int add_swap_extent(struct swap_info_struct *sis, unsigned long start_page,
0453 unsigned long nr_pages, sector_t start_block);
0454 int generic_swapfile_activate(struct swap_info_struct *, struct file *,
0455 sector_t *);
0456
0457 static inline unsigned long total_swapcache_pages(void)
0458 {
0459 return global_node_page_state(NR_SWAPCACHE);
0460 }
0461
0462 extern void free_swap_cache(struct page *page);
0463 extern void free_page_and_swap_cache(struct page *);
0464 extern void free_pages_and_swap_cache(struct page **, int);
0465
0466 extern atomic_long_t nr_swap_pages;
0467 extern long total_swap_pages;
0468 extern atomic_t nr_rotate_swap;
0469 extern bool has_usable_swap(void);
0470
0471
0472 static inline bool vm_swap_full(void)
0473 {
0474 return atomic_long_read(&nr_swap_pages) * 2 < total_swap_pages;
0475 }
0476
0477 static inline long get_nr_swap_pages(void)
0478 {
0479 return atomic_long_read(&nr_swap_pages);
0480 }
0481
0482 extern void si_swapinfo(struct sysinfo *);
0483 swp_entry_t folio_alloc_swap(struct folio *folio);
0484 extern void put_swap_page(struct page *page, swp_entry_t entry);
0485 extern swp_entry_t get_swap_page_of_type(int);
0486 extern int get_swap_pages(int n, swp_entry_t swp_entries[], int entry_size);
0487 extern int add_swap_count_continuation(swp_entry_t, gfp_t);
0488 extern void swap_shmem_alloc(swp_entry_t);
0489 extern int swap_duplicate(swp_entry_t);
0490 extern int swapcache_prepare(swp_entry_t);
0491 extern void swap_free(swp_entry_t);
0492 extern void swapcache_free_entries(swp_entry_t *entries, int n);
0493 extern int free_swap_and_cache(swp_entry_t);
0494 int swap_type_of(dev_t device, sector_t offset);
0495 int find_first_swap(dev_t *device);
0496 extern unsigned int count_swap_pages(int, int);
0497 extern sector_t swapdev_block(int, pgoff_t);
0498 extern int __swap_count(swp_entry_t entry);
0499 extern int __swp_swapcount(swp_entry_t entry);
0500 extern int swp_swapcount(swp_entry_t entry);
0501 extern struct swap_info_struct *page_swap_info(struct page *);
0502 extern struct swap_info_struct *swp_swap_info(swp_entry_t entry);
0503 extern int try_to_free_swap(struct page *);
0504 struct backing_dev_info;
0505 extern int init_swap_address_space(unsigned int type, unsigned long nr_pages);
0506 extern void exit_swap_address_space(unsigned int type);
0507 extern struct swap_info_struct *get_swap_device(swp_entry_t entry);
0508 sector_t swap_page_sector(struct page *page);
0509
0510 static inline void put_swap_device(struct swap_info_struct *si)
0511 {
0512 percpu_ref_put(&si->users);
0513 }
0514
0515 #else
0516 static inline struct swap_info_struct *swp_swap_info(swp_entry_t entry)
0517 {
0518 return NULL;
0519 }
0520
0521 static inline struct swap_info_struct *get_swap_device(swp_entry_t entry)
0522 {
0523 return NULL;
0524 }
0525
0526 static inline void put_swap_device(struct swap_info_struct *si)
0527 {
0528 }
0529
0530 #define get_nr_swap_pages() 0L
0531 #define total_swap_pages 0L
0532 #define total_swapcache_pages() 0UL
0533 #define vm_swap_full() 0
0534
0535 #define si_swapinfo(val) \
0536 do { (val)->freeswap = (val)->totalswap = 0; } while (0)
0537
0538
0539 #define free_page_and_swap_cache(page) \
0540 put_page(page)
0541 #define free_pages_and_swap_cache(pages, nr) \
0542 release_pages((pages), (nr));
0543
0544
0545 #define free_swap_and_cache(e) is_pfn_swap_entry(e)
0546
0547 static inline void free_swap_cache(struct page *page)
0548 {
0549 }
0550
0551 static inline int add_swap_count_continuation(swp_entry_t swp, gfp_t gfp_mask)
0552 {
0553 return 0;
0554 }
0555
0556 static inline void swap_shmem_alloc(swp_entry_t swp)
0557 {
0558 }
0559
0560 static inline int swap_duplicate(swp_entry_t swp)
0561 {
0562 return 0;
0563 }
0564
0565 static inline void swap_free(swp_entry_t swp)
0566 {
0567 }
0568
0569 static inline void put_swap_page(struct page *page, swp_entry_t swp)
0570 {
0571 }
0572
0573 static inline int __swap_count(swp_entry_t entry)
0574 {
0575 return 0;
0576 }
0577
0578 static inline int __swp_swapcount(swp_entry_t entry)
0579 {
0580 return 0;
0581 }
0582
0583 static inline int swp_swapcount(swp_entry_t entry)
0584 {
0585 return 0;
0586 }
0587
0588 static inline int try_to_free_swap(struct page *page)
0589 {
0590 return 0;
0591 }
0592
0593 static inline swp_entry_t folio_alloc_swap(struct folio *folio)
0594 {
0595 swp_entry_t entry;
0596 entry.val = 0;
0597 return entry;
0598 }
0599
0600 static inline int add_swap_extent(struct swap_info_struct *sis,
0601 unsigned long start_page,
0602 unsigned long nr_pages, sector_t start_block)
0603 {
0604 return -EINVAL;
0605 }
0606 #endif
0607
0608 #ifdef CONFIG_THP_SWAP
0609 extern int split_swap_cluster(swp_entry_t entry);
0610 #else
0611 static inline int split_swap_cluster(swp_entry_t entry)
0612 {
0613 return 0;
0614 }
0615 #endif
0616
0617 #ifdef CONFIG_MEMCG
0618 static inline int mem_cgroup_swappiness(struct mem_cgroup *memcg)
0619 {
0620
0621 if (cgroup_subsys_on_dfl(memory_cgrp_subsys))
0622 return vm_swappiness;
0623
0624
0625 if (mem_cgroup_disabled() || mem_cgroup_is_root(memcg))
0626 return vm_swappiness;
0627
0628 return memcg->swappiness;
0629 }
0630 #else
0631 static inline int mem_cgroup_swappiness(struct mem_cgroup *mem)
0632 {
0633 return vm_swappiness;
0634 }
0635 #endif
0636
0637 #ifdef CONFIG_ZSWAP
0638 extern u64 zswap_pool_total_size;
0639 extern atomic_t zswap_stored_pages;
0640 #endif
0641
0642 #if defined(CONFIG_SWAP) && defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
0643 extern void __cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask);
0644 static inline void cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask)
0645 {
0646 if (mem_cgroup_disabled())
0647 return;
0648 __cgroup_throttle_swaprate(page, gfp_mask);
0649 }
0650 #else
0651 static inline void cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask)
0652 {
0653 }
0654 #endif
0655 static inline void folio_throttle_swaprate(struct folio *folio, gfp_t gfp)
0656 {
0657 cgroup_throttle_swaprate(&folio->page, gfp);
0658 }
0659
0660 #ifdef CONFIG_MEMCG_SWAP
0661 void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry);
0662 int __mem_cgroup_try_charge_swap(struct folio *folio, swp_entry_t entry);
0663 static inline int mem_cgroup_try_charge_swap(struct folio *folio,
0664 swp_entry_t entry)
0665 {
0666 if (mem_cgroup_disabled())
0667 return 0;
0668 return __mem_cgroup_try_charge_swap(folio, entry);
0669 }
0670
0671 extern void __mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages);
0672 static inline void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages)
0673 {
0674 if (mem_cgroup_disabled())
0675 return;
0676 __mem_cgroup_uncharge_swap(entry, nr_pages);
0677 }
0678
0679 extern long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg);
0680 extern bool mem_cgroup_swap_full(struct page *page);
0681 #else
0682 static inline void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry)
0683 {
0684 }
0685
0686 static inline int mem_cgroup_try_charge_swap(struct folio *folio,
0687 swp_entry_t entry)
0688 {
0689 return 0;
0690 }
0691
0692 static inline void mem_cgroup_uncharge_swap(swp_entry_t entry,
0693 unsigned int nr_pages)
0694 {
0695 }
0696
0697 static inline long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg)
0698 {
0699 return get_nr_swap_pages();
0700 }
0701
0702 static inline bool mem_cgroup_swap_full(struct page *page)
0703 {
0704 return vm_swap_full();
0705 }
0706 #endif
0707
0708 #endif
0709 #endif