0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014 #include <linux/mm.h>
0015 #include <linux/kernel_stat.h>
0016 #include <linux/gfp.h>
0017 #include <linux/pagemap.h>
0018 #include <linux/swap.h>
0019 #include <linux/bio.h>
0020 #include <linux/swapops.h>
0021 #include <linux/buffer_head.h>
0022 #include <linux/writeback.h>
0023 #include <linux/frontswap.h>
0024 #include <linux/blkdev.h>
0025 #include <linux/psi.h>
0026 #include <linux/uio.h>
0027 #include <linux/sched/task.h>
0028 #include <linux/delayacct.h>
0029 #include "swap.h"
0030
0031 void end_swap_bio_write(struct bio *bio)
0032 {
0033 struct page *page = bio_first_page_all(bio);
0034
0035 if (bio->bi_status) {
0036 SetPageError(page);
0037
0038
0039
0040
0041
0042
0043
0044
0045 set_page_dirty(page);
0046 pr_alert_ratelimited("Write-error on swap-device (%u:%u:%llu)\n",
0047 MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)),
0048 (unsigned long long)bio->bi_iter.bi_sector);
0049 ClearPageReclaim(page);
0050 }
0051 end_page_writeback(page);
0052 bio_put(bio);
0053 }
0054
0055 static void end_swap_bio_read(struct bio *bio)
0056 {
0057 struct page *page = bio_first_page_all(bio);
0058 struct task_struct *waiter = bio->bi_private;
0059
0060 if (bio->bi_status) {
0061 SetPageError(page);
0062 ClearPageUptodate(page);
0063 pr_alert_ratelimited("Read-error on swap-device (%u:%u:%llu)\n",
0064 MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)),
0065 (unsigned long long)bio->bi_iter.bi_sector);
0066 goto out;
0067 }
0068
0069 SetPageUptodate(page);
0070 out:
0071 unlock_page(page);
0072 WRITE_ONCE(bio->bi_private, NULL);
0073 bio_put(bio);
0074 if (waiter) {
0075 blk_wake_io_task(waiter);
0076 put_task_struct(waiter);
0077 }
0078 }
0079
0080 int generic_swapfile_activate(struct swap_info_struct *sis,
0081 struct file *swap_file,
0082 sector_t *span)
0083 {
0084 struct address_space *mapping = swap_file->f_mapping;
0085 struct inode *inode = mapping->host;
0086 unsigned blocks_per_page;
0087 unsigned long page_no;
0088 unsigned blkbits;
0089 sector_t probe_block;
0090 sector_t last_block;
0091 sector_t lowest_block = -1;
0092 sector_t highest_block = 0;
0093 int nr_extents = 0;
0094 int ret;
0095
0096 blkbits = inode->i_blkbits;
0097 blocks_per_page = PAGE_SIZE >> blkbits;
0098
0099
0100
0101
0102
0103 probe_block = 0;
0104 page_no = 0;
0105 last_block = i_size_read(inode) >> blkbits;
0106 while ((probe_block + blocks_per_page) <= last_block &&
0107 page_no < sis->max) {
0108 unsigned block_in_page;
0109 sector_t first_block;
0110
0111 cond_resched();
0112
0113 first_block = probe_block;
0114 ret = bmap(inode, &first_block);
0115 if (ret || !first_block)
0116 goto bad_bmap;
0117
0118
0119
0120
0121 if (first_block & (blocks_per_page - 1)) {
0122 probe_block++;
0123 goto reprobe;
0124 }
0125
0126 for (block_in_page = 1; block_in_page < blocks_per_page;
0127 block_in_page++) {
0128 sector_t block;
0129
0130 block = probe_block + block_in_page;
0131 ret = bmap(inode, &block);
0132 if (ret || !block)
0133 goto bad_bmap;
0134
0135 if (block != first_block + block_in_page) {
0136
0137 probe_block++;
0138 goto reprobe;
0139 }
0140 }
0141
0142 first_block >>= (PAGE_SHIFT - blkbits);
0143 if (page_no) {
0144 if (first_block < lowest_block)
0145 lowest_block = first_block;
0146 if (first_block > highest_block)
0147 highest_block = first_block;
0148 }
0149
0150
0151
0152
0153 ret = add_swap_extent(sis, page_no, 1, first_block);
0154 if (ret < 0)
0155 goto out;
0156 nr_extents += ret;
0157 page_no++;
0158 probe_block += blocks_per_page;
0159 reprobe:
0160 continue;
0161 }
0162 ret = nr_extents;
0163 *span = 1 + highest_block - lowest_block;
0164 if (page_no == 0)
0165 page_no = 1;
0166 sis->max = page_no;
0167 sis->pages = page_no - 1;
0168 sis->highest_bit = page_no - 1;
0169 out:
0170 return ret;
0171 bad_bmap:
0172 pr_err("swapon: swapfile has holes\n");
0173 ret = -EINVAL;
0174 goto out;
0175 }
0176
0177
0178
0179
0180
0181 int swap_writepage(struct page *page, struct writeback_control *wbc)
0182 {
0183 int ret = 0;
0184
0185 if (try_to_free_swap(page)) {
0186 unlock_page(page);
0187 goto out;
0188 }
0189
0190
0191
0192
0193 ret = arch_prepare_to_swap(page);
0194 if (ret) {
0195 set_page_dirty(page);
0196 unlock_page(page);
0197 goto out;
0198 }
0199 if (frontswap_store(page) == 0) {
0200 set_page_writeback(page);
0201 unlock_page(page);
0202 end_page_writeback(page);
0203 goto out;
0204 }
0205 ret = __swap_writepage(page, wbc, end_swap_bio_write);
0206 out:
0207 return ret;
0208 }
0209
0210 static inline void count_swpout_vm_event(struct page *page)
0211 {
0212 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
0213 if (unlikely(PageTransHuge(page)))
0214 count_vm_event(THP_SWPOUT);
0215 #endif
0216 count_vm_events(PSWPOUT, thp_nr_pages(page));
0217 }
0218
0219 #if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
0220 static void bio_associate_blkg_from_page(struct bio *bio, struct page *page)
0221 {
0222 struct cgroup_subsys_state *css;
0223 struct mem_cgroup *memcg;
0224
0225 memcg = page_memcg(page);
0226 if (!memcg)
0227 return;
0228
0229 rcu_read_lock();
0230 css = cgroup_e_css(memcg->css.cgroup, &io_cgrp_subsys);
0231 bio_associate_blkg_from_css(bio, css);
0232 rcu_read_unlock();
0233 }
0234 #else
0235 #define bio_associate_blkg_from_page(bio, page) do { } while (0)
0236 #endif
0237
0238 struct swap_iocb {
0239 struct kiocb iocb;
0240 struct bio_vec bvec[SWAP_CLUSTER_MAX];
0241 int pages;
0242 int len;
0243 };
0244 static mempool_t *sio_pool;
0245
0246 int sio_pool_init(void)
0247 {
0248 if (!sio_pool) {
0249 mempool_t *pool = mempool_create_kmalloc_pool(
0250 SWAP_CLUSTER_MAX, sizeof(struct swap_iocb));
0251 if (cmpxchg(&sio_pool, NULL, pool))
0252 mempool_destroy(pool);
0253 }
0254 if (!sio_pool)
0255 return -ENOMEM;
0256 return 0;
0257 }
0258
0259 static void sio_write_complete(struct kiocb *iocb, long ret)
0260 {
0261 struct swap_iocb *sio = container_of(iocb, struct swap_iocb, iocb);
0262 struct page *page = sio->bvec[0].bv_page;
0263 int p;
0264
0265 if (ret != sio->len) {
0266
0267
0268
0269
0270
0271
0272
0273
0274
0275
0276 pr_err_ratelimited("Write error %ld on dio swapfile (%llu)\n",
0277 ret, page_file_offset(page));
0278 for (p = 0; p < sio->pages; p++) {
0279 page = sio->bvec[p].bv_page;
0280 set_page_dirty(page);
0281 ClearPageReclaim(page);
0282 }
0283 } else {
0284 for (p = 0; p < sio->pages; p++)
0285 count_swpout_vm_event(sio->bvec[p].bv_page);
0286 }
0287
0288 for (p = 0; p < sio->pages; p++)
0289 end_page_writeback(sio->bvec[p].bv_page);
0290
0291 mempool_free(sio, sio_pool);
0292 }
0293
0294 static int swap_writepage_fs(struct page *page, struct writeback_control *wbc)
0295 {
0296 struct swap_iocb *sio = NULL;
0297 struct swap_info_struct *sis = page_swap_info(page);
0298 struct file *swap_file = sis->swap_file;
0299 loff_t pos = page_file_offset(page);
0300
0301 set_page_writeback(page);
0302 unlock_page(page);
0303 if (wbc->swap_plug)
0304 sio = *wbc->swap_plug;
0305 if (sio) {
0306 if (sio->iocb.ki_filp != swap_file ||
0307 sio->iocb.ki_pos + sio->len != pos) {
0308 swap_write_unplug(sio);
0309 sio = NULL;
0310 }
0311 }
0312 if (!sio) {
0313 sio = mempool_alloc(sio_pool, GFP_NOIO);
0314 init_sync_kiocb(&sio->iocb, swap_file);
0315 sio->iocb.ki_complete = sio_write_complete;
0316 sio->iocb.ki_pos = pos;
0317 sio->pages = 0;
0318 sio->len = 0;
0319 }
0320 sio->bvec[sio->pages].bv_page = page;
0321 sio->bvec[sio->pages].bv_len = thp_size(page);
0322 sio->bvec[sio->pages].bv_offset = 0;
0323 sio->len += thp_size(page);
0324 sio->pages += 1;
0325 if (sio->pages == ARRAY_SIZE(sio->bvec) || !wbc->swap_plug) {
0326 swap_write_unplug(sio);
0327 sio = NULL;
0328 }
0329 if (wbc->swap_plug)
0330 *wbc->swap_plug = sio;
0331
0332 return 0;
0333 }
0334
0335 int __swap_writepage(struct page *page, struct writeback_control *wbc,
0336 bio_end_io_t end_write_func)
0337 {
0338 struct bio *bio;
0339 int ret;
0340 struct swap_info_struct *sis = page_swap_info(page);
0341
0342 VM_BUG_ON_PAGE(!PageSwapCache(page), page);
0343
0344
0345
0346
0347
0348 if (data_race(sis->flags & SWP_FS_OPS))
0349 return swap_writepage_fs(page, wbc);
0350
0351 ret = bdev_write_page(sis->bdev, swap_page_sector(page), page, wbc);
0352 if (!ret) {
0353 count_swpout_vm_event(page);
0354 return 0;
0355 }
0356
0357 bio = bio_alloc(sis->bdev, 1,
0358 REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc),
0359 GFP_NOIO);
0360 bio->bi_iter.bi_sector = swap_page_sector(page);
0361 bio->bi_end_io = end_write_func;
0362 bio_add_page(bio, page, thp_size(page), 0);
0363
0364 bio_associate_blkg_from_page(bio, page);
0365 count_swpout_vm_event(page);
0366 set_page_writeback(page);
0367 unlock_page(page);
0368 submit_bio(bio);
0369
0370 return 0;
0371 }
0372
0373 void swap_write_unplug(struct swap_iocb *sio)
0374 {
0375 struct iov_iter from;
0376 struct address_space *mapping = sio->iocb.ki_filp->f_mapping;
0377 int ret;
0378
0379 iov_iter_bvec(&from, WRITE, sio->bvec, sio->pages, sio->len);
0380 ret = mapping->a_ops->swap_rw(&sio->iocb, &from);
0381 if (ret != -EIOCBQUEUED)
0382 sio_write_complete(&sio->iocb, ret);
0383 }
0384
0385 static void sio_read_complete(struct kiocb *iocb, long ret)
0386 {
0387 struct swap_iocb *sio = container_of(iocb, struct swap_iocb, iocb);
0388 int p;
0389
0390 if (ret == sio->len) {
0391 for (p = 0; p < sio->pages; p++) {
0392 struct page *page = sio->bvec[p].bv_page;
0393
0394 SetPageUptodate(page);
0395 unlock_page(page);
0396 }
0397 count_vm_events(PSWPIN, sio->pages);
0398 } else {
0399 for (p = 0; p < sio->pages; p++) {
0400 struct page *page = sio->bvec[p].bv_page;
0401
0402 SetPageError(page);
0403 ClearPageUptodate(page);
0404 unlock_page(page);
0405 }
0406 pr_alert_ratelimited("Read-error on swap-device\n");
0407 }
0408 mempool_free(sio, sio_pool);
0409 }
0410
0411 static void swap_readpage_fs(struct page *page,
0412 struct swap_iocb **plug)
0413 {
0414 struct swap_info_struct *sis = page_swap_info(page);
0415 struct swap_iocb *sio = NULL;
0416 loff_t pos = page_file_offset(page);
0417
0418 if (plug)
0419 sio = *plug;
0420 if (sio) {
0421 if (sio->iocb.ki_filp != sis->swap_file ||
0422 sio->iocb.ki_pos + sio->len != pos) {
0423 swap_read_unplug(sio);
0424 sio = NULL;
0425 }
0426 }
0427 if (!sio) {
0428 sio = mempool_alloc(sio_pool, GFP_KERNEL);
0429 init_sync_kiocb(&sio->iocb, sis->swap_file);
0430 sio->iocb.ki_pos = pos;
0431 sio->iocb.ki_complete = sio_read_complete;
0432 sio->pages = 0;
0433 sio->len = 0;
0434 }
0435 sio->bvec[sio->pages].bv_page = page;
0436 sio->bvec[sio->pages].bv_len = thp_size(page);
0437 sio->bvec[sio->pages].bv_offset = 0;
0438 sio->len += thp_size(page);
0439 sio->pages += 1;
0440 if (sio->pages == ARRAY_SIZE(sio->bvec) || !plug) {
0441 swap_read_unplug(sio);
0442 sio = NULL;
0443 }
0444 if (plug)
0445 *plug = sio;
0446 }
0447
0448 int swap_readpage(struct page *page, bool synchronous,
0449 struct swap_iocb **plug)
0450 {
0451 struct bio *bio;
0452 int ret = 0;
0453 struct swap_info_struct *sis = page_swap_info(page);
0454 bool workingset = PageWorkingset(page);
0455 unsigned long pflags;
0456
0457 VM_BUG_ON_PAGE(!PageSwapCache(page) && !synchronous, page);
0458 VM_BUG_ON_PAGE(!PageLocked(page), page);
0459 VM_BUG_ON_PAGE(PageUptodate(page), page);
0460
0461
0462
0463
0464
0465
0466 if (workingset)
0467 psi_memstall_enter(&pflags);
0468 delayacct_swapin_start();
0469
0470 if (frontswap_load(page) == 0) {
0471 SetPageUptodate(page);
0472 unlock_page(page);
0473 goto out;
0474 }
0475
0476 if (data_race(sis->flags & SWP_FS_OPS)) {
0477 swap_readpage_fs(page, plug);
0478 goto out;
0479 }
0480
0481 if (sis->flags & SWP_SYNCHRONOUS_IO) {
0482 ret = bdev_read_page(sis->bdev, swap_page_sector(page), page);
0483 if (!ret) {
0484 count_vm_event(PSWPIN);
0485 goto out;
0486 }
0487 }
0488
0489 ret = 0;
0490 bio = bio_alloc(sis->bdev, 1, REQ_OP_READ, GFP_KERNEL);
0491 bio->bi_iter.bi_sector = swap_page_sector(page);
0492 bio->bi_end_io = end_swap_bio_read;
0493 bio_add_page(bio, page, thp_size(page), 0);
0494
0495
0496
0497
0498 if (synchronous) {
0499 get_task_struct(current);
0500 bio->bi_private = current;
0501 }
0502 count_vm_event(PSWPIN);
0503 bio_get(bio);
0504 submit_bio(bio);
0505 while (synchronous) {
0506 set_current_state(TASK_UNINTERRUPTIBLE);
0507 if (!READ_ONCE(bio->bi_private))
0508 break;
0509
0510 blk_io_schedule();
0511 }
0512 __set_current_state(TASK_RUNNING);
0513 bio_put(bio);
0514
0515 out:
0516 if (workingset)
0517 psi_memstall_leave(&pflags);
0518 delayacct_swapin_end();
0519 return ret;
0520 }
0521
0522 void __swap_read_unplug(struct swap_iocb *sio)
0523 {
0524 struct iov_iter from;
0525 struct address_space *mapping = sio->iocb.ki_filp->f_mapping;
0526 int ret;
0527
0528 iov_iter_bvec(&from, READ, sio->bvec, sio->pages, sio->len);
0529 ret = mapping->a_ops->swap_rw(&sio->iocb, &from);
0530 if (ret != -EIOCBQUEUED)
0531 sio_read_complete(&sio->iocb, ret);
0532 }