0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068
0069
0070
0071
0072
0073
0074
0075
0076
0077
0078
0079
0080
0081
0082
0083
0084
0085
0086
0087
0088
0089
0090
0091
0092
0093
0094
0095
0096
0097
0098
0099
0100
0101
0102
0103
0104
0105
0106
0107
0108
0109
0110
0111
0112
0113
0114
0115
0116 #include <linux/blkdev.h>
0117 #include <linux/kernel.h>
0118 #include <linux/dax.h>
0119 #include <linux/gfp.h>
0120 #include <linux/export.h>
0121 #include <linux/backing-dev.h>
0122 #include <linux/task_io_accounting_ops.h>
0123 #include <linux/pagevec.h>
0124 #include <linux/pagemap.h>
0125 #include <linux/syscalls.h>
0126 #include <linux/file.h>
0127 #include <linux/mm_inline.h>
0128 #include <linux/blk-cgroup.h>
0129 #include <linux/fadvise.h>
0130 #include <linux/sched/mm.h>
0131
0132 #include "internal.h"
0133
0134
0135
0136
0137
0138 void
0139 file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping)
0140 {
0141 ra->ra_pages = inode_to_bdi(mapping->host)->ra_pages;
0142 ra->prev_pos = -1;
0143 }
0144 EXPORT_SYMBOL_GPL(file_ra_state_init);
0145
0146 static void read_pages(struct readahead_control *rac)
0147 {
0148 const struct address_space_operations *aops = rac->mapping->a_ops;
0149 struct folio *folio;
0150 struct blk_plug plug;
0151
0152 if (!readahead_count(rac))
0153 return;
0154
0155 blk_start_plug(&plug);
0156
0157 if (aops->readahead) {
0158 aops->readahead(rac);
0159
0160
0161
0162
0163
0164 while ((folio = readahead_folio(rac)) != NULL) {
0165 unsigned long nr = folio_nr_pages(folio);
0166
0167 folio_get(folio);
0168 rac->ra->size -= nr;
0169 if (rac->ra->async_size >= nr) {
0170 rac->ra->async_size -= nr;
0171 filemap_remove_folio(folio);
0172 }
0173 folio_unlock(folio);
0174 folio_put(folio);
0175 }
0176 } else {
0177 while ((folio = readahead_folio(rac)) != NULL)
0178 aops->read_folio(rac->file, folio);
0179 }
0180
0181 blk_finish_plug(&plug);
0182
0183 BUG_ON(readahead_count(rac));
0184 }
0185
0186
0187
0188
0189
0190
0191
0192
0193
0194
0195
0196
0197
0198
0199
0200 void page_cache_ra_unbounded(struct readahead_control *ractl,
0201 unsigned long nr_to_read, unsigned long lookahead_size)
0202 {
0203 struct address_space *mapping = ractl->mapping;
0204 unsigned long index = readahead_index(ractl);
0205 gfp_t gfp_mask = readahead_gfp_mask(mapping);
0206 unsigned long i;
0207
0208
0209
0210
0211
0212
0213
0214
0215
0216
0217
0218 unsigned int nofs = memalloc_nofs_save();
0219
0220 filemap_invalidate_lock_shared(mapping);
0221
0222
0223
0224 for (i = 0; i < nr_to_read; i++) {
0225 struct folio *folio = xa_load(&mapping->i_pages, index + i);
0226
0227 if (folio && !xa_is_value(folio)) {
0228
0229
0230
0231
0232
0233
0234
0235
0236 read_pages(ractl);
0237 ractl->_index++;
0238 i = ractl->_index + ractl->_nr_pages - index - 1;
0239 continue;
0240 }
0241
0242 folio = filemap_alloc_folio(gfp_mask, 0);
0243 if (!folio)
0244 break;
0245 if (filemap_add_folio(mapping, folio, index + i,
0246 gfp_mask) < 0) {
0247 folio_put(folio);
0248 read_pages(ractl);
0249 ractl->_index++;
0250 i = ractl->_index + ractl->_nr_pages - index - 1;
0251 continue;
0252 }
0253 if (i == nr_to_read - lookahead_size)
0254 folio_set_readahead(folio);
0255 ractl->_nr_pages++;
0256 }
0257
0258
0259
0260
0261
0262
0263 read_pages(ractl);
0264 filemap_invalidate_unlock_shared(mapping);
0265 memalloc_nofs_restore(nofs);
0266 }
0267 EXPORT_SYMBOL_GPL(page_cache_ra_unbounded);
0268
0269
0270
0271
0272
0273
0274
0275 static void do_page_cache_ra(struct readahead_control *ractl,
0276 unsigned long nr_to_read, unsigned long lookahead_size)
0277 {
0278 struct inode *inode = ractl->mapping->host;
0279 unsigned long index = readahead_index(ractl);
0280 loff_t isize = i_size_read(inode);
0281 pgoff_t end_index;
0282
0283 if (isize == 0)
0284 return;
0285
0286 end_index = (isize - 1) >> PAGE_SHIFT;
0287 if (index > end_index)
0288 return;
0289
0290 if (nr_to_read > end_index - index)
0291 nr_to_read = end_index - index + 1;
0292
0293 page_cache_ra_unbounded(ractl, nr_to_read, lookahead_size);
0294 }
0295
0296
0297
0298
0299
0300 void force_page_cache_ra(struct readahead_control *ractl,
0301 unsigned long nr_to_read)
0302 {
0303 struct address_space *mapping = ractl->mapping;
0304 struct file_ra_state *ra = ractl->ra;
0305 struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
0306 unsigned long max_pages, index;
0307
0308 if (unlikely(!mapping->a_ops->read_folio && !mapping->a_ops->readahead))
0309 return;
0310
0311
0312
0313
0314
0315 index = readahead_index(ractl);
0316 max_pages = max_t(unsigned long, bdi->io_pages, ra->ra_pages);
0317 nr_to_read = min_t(unsigned long, nr_to_read, max_pages);
0318 while (nr_to_read) {
0319 unsigned long this_chunk = (2 * 1024 * 1024) / PAGE_SIZE;
0320
0321 if (this_chunk > nr_to_read)
0322 this_chunk = nr_to_read;
0323 ractl->_index = index;
0324 do_page_cache_ra(ractl, this_chunk, 0);
0325
0326 index += this_chunk;
0327 nr_to_read -= this_chunk;
0328 }
0329 }
0330
0331
0332
0333
0334
0335
0336
0337 static unsigned long get_init_ra_size(unsigned long size, unsigned long max)
0338 {
0339 unsigned long newsize = roundup_pow_of_two(size);
0340
0341 if (newsize <= max / 32)
0342 newsize = newsize * 4;
0343 else if (newsize <= max / 4)
0344 newsize = newsize * 2;
0345 else
0346 newsize = max;
0347
0348 return newsize;
0349 }
0350
0351
0352
0353
0354
0355 static unsigned long get_next_ra_size(struct file_ra_state *ra,
0356 unsigned long max)
0357 {
0358 unsigned long cur = ra->size;
0359
0360 if (cur < max / 16)
0361 return 4 * cur;
0362 if (cur <= max / 2)
0363 return 2 * cur;
0364 return max;
0365 }
0366
0367
0368
0369
0370
0371
0372
0373
0374
0375
0376
0377
0378
0379
0380
0381
0382
0383
0384
0385
0386
0387
0388
0389
0390
0391
0392
0393
0394
0395
0396
0397
0398
0399
0400
0401
0402
0403
0404
0405
0406
0407
0408
0409
0410
0411
0412 static pgoff_t count_history_pages(struct address_space *mapping,
0413 pgoff_t index, unsigned long max)
0414 {
0415 pgoff_t head;
0416
0417 rcu_read_lock();
0418 head = page_cache_prev_miss(mapping, index - 1, max);
0419 rcu_read_unlock();
0420
0421 return index - 1 - head;
0422 }
0423
0424
0425
0426
0427 static int try_context_readahead(struct address_space *mapping,
0428 struct file_ra_state *ra,
0429 pgoff_t index,
0430 unsigned long req_size,
0431 unsigned long max)
0432 {
0433 pgoff_t size;
0434
0435 size = count_history_pages(mapping, index, max);
0436
0437
0438
0439
0440
0441 if (size <= req_size)
0442 return 0;
0443
0444
0445
0446
0447
0448 if (size >= index)
0449 size *= 2;
0450
0451 ra->start = index;
0452 ra->size = min(size + req_size, max);
0453 ra->async_size = 1;
0454
0455 return 1;
0456 }
0457
0458
0459
0460
0461
0462
0463
0464
0465 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
0466 #define MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER
0467 #else
0468 #define MAX_PAGECACHE_ORDER 8
0469 #endif
0470
0471 static inline int ra_alloc_folio(struct readahead_control *ractl, pgoff_t index,
0472 pgoff_t mark, unsigned int order, gfp_t gfp)
0473 {
0474 int err;
0475 struct folio *folio = filemap_alloc_folio(gfp, order);
0476
0477 if (!folio)
0478 return -ENOMEM;
0479 mark = round_up(mark, 1UL << order);
0480 if (index == mark)
0481 folio_set_readahead(folio);
0482 err = filemap_add_folio(ractl->mapping, folio, index, gfp);
0483 if (err)
0484 folio_put(folio);
0485 else
0486 ractl->_nr_pages += 1UL << order;
0487 return err;
0488 }
0489
0490 void page_cache_ra_order(struct readahead_control *ractl,
0491 struct file_ra_state *ra, unsigned int new_order)
0492 {
0493 struct address_space *mapping = ractl->mapping;
0494 pgoff_t index = readahead_index(ractl);
0495 pgoff_t limit = (i_size_read(mapping->host) - 1) >> PAGE_SHIFT;
0496 pgoff_t mark = index + ra->size - ra->async_size;
0497 int err = 0;
0498 gfp_t gfp = readahead_gfp_mask(mapping);
0499
0500 if (!mapping_large_folio_support(mapping) || ra->size < 4)
0501 goto fallback;
0502
0503 limit = min(limit, index + ra->size - 1);
0504
0505 if (new_order < MAX_PAGECACHE_ORDER) {
0506 new_order += 2;
0507 if (new_order > MAX_PAGECACHE_ORDER)
0508 new_order = MAX_PAGECACHE_ORDER;
0509 while ((1 << new_order) > ra->size)
0510 new_order--;
0511 }
0512
0513 filemap_invalidate_lock_shared(mapping);
0514 while (index <= limit) {
0515 unsigned int order = new_order;
0516
0517
0518 if (index & ((1UL << order) - 1)) {
0519 order = __ffs(index);
0520 if (order == 1)
0521 order = 0;
0522 }
0523
0524 while (index + (1UL << order) - 1 > limit) {
0525 if (--order == 1)
0526 order = 0;
0527 }
0528 err = ra_alloc_folio(ractl, index, mark, order, gfp);
0529 if (err)
0530 break;
0531 index += 1UL << order;
0532 }
0533
0534 if (index > limit) {
0535 ra->size += index - limit - 1;
0536 ra->async_size += index - limit - 1;
0537 }
0538
0539 read_pages(ractl);
0540 filemap_invalidate_unlock_shared(mapping);
0541
0542
0543
0544
0545
0546
0547 if (!err)
0548 return;
0549 fallback:
0550 do_page_cache_ra(ractl, ra->size, ra->async_size);
0551 }
0552
0553
0554
0555
0556 static void ondemand_readahead(struct readahead_control *ractl,
0557 struct folio *folio, unsigned long req_size)
0558 {
0559 struct backing_dev_info *bdi = inode_to_bdi(ractl->mapping->host);
0560 struct file_ra_state *ra = ractl->ra;
0561 unsigned long max_pages = ra->ra_pages;
0562 unsigned long add_pages;
0563 pgoff_t index = readahead_index(ractl);
0564 pgoff_t expected, prev_index;
0565 unsigned int order = folio ? folio_order(folio) : 0;
0566
0567
0568
0569
0570
0571 if (req_size > max_pages && bdi->io_pages > max_pages)
0572 max_pages = min(req_size, bdi->io_pages);
0573
0574
0575
0576
0577 if (!index)
0578 goto initial_readahead;
0579
0580
0581
0582
0583
0584 expected = round_up(ra->start + ra->size - ra->async_size,
0585 1UL << order);
0586 if (index == expected || index == (ra->start + ra->size)) {
0587 ra->start += ra->size;
0588 ra->size = get_next_ra_size(ra, max_pages);
0589 ra->async_size = ra->size;
0590 goto readit;
0591 }
0592
0593
0594
0595
0596
0597
0598
0599 if (folio) {
0600 pgoff_t start;
0601
0602 rcu_read_lock();
0603 start = page_cache_next_miss(ractl->mapping, index + 1,
0604 max_pages);
0605 rcu_read_unlock();
0606
0607 if (!start || start - index > max_pages)
0608 return;
0609
0610 ra->start = start;
0611 ra->size = start - index;
0612 ra->size += req_size;
0613 ra->size = get_next_ra_size(ra, max_pages);
0614 ra->async_size = ra->size;
0615 goto readit;
0616 }
0617
0618
0619
0620
0621 if (req_size > max_pages)
0622 goto initial_readahead;
0623
0624
0625
0626
0627
0628
0629 prev_index = (unsigned long long)ra->prev_pos >> PAGE_SHIFT;
0630 if (index - prev_index <= 1UL)
0631 goto initial_readahead;
0632
0633
0634
0635
0636
0637 if (try_context_readahead(ractl->mapping, ra, index, req_size,
0638 max_pages))
0639 goto readit;
0640
0641
0642
0643
0644
0645 do_page_cache_ra(ractl, req_size, 0);
0646 return;
0647
0648 initial_readahead:
0649 ra->start = index;
0650 ra->size = get_init_ra_size(req_size, max_pages);
0651 ra->async_size = ra->size > req_size ? ra->size - req_size : ra->size;
0652
0653 readit:
0654
0655
0656
0657
0658
0659
0660 if (index == ra->start && ra->size == ra->async_size) {
0661 add_pages = get_next_ra_size(ra, max_pages);
0662 if (ra->size + add_pages <= max_pages) {
0663 ra->async_size = add_pages;
0664 ra->size += add_pages;
0665 } else {
0666 ra->size = max_pages;
0667 ra->async_size = max_pages >> 1;
0668 }
0669 }
0670
0671 ractl->_index = ra->start;
0672 page_cache_ra_order(ractl, ra, order);
0673 }
0674
0675 void page_cache_sync_ra(struct readahead_control *ractl,
0676 unsigned long req_count)
0677 {
0678 bool do_forced_ra = ractl->file && (ractl->file->f_mode & FMODE_RANDOM);
0679
0680
0681
0682
0683
0684
0685
0686 if (!ractl->ra->ra_pages || blk_cgroup_congested()) {
0687 if (!ractl->file)
0688 return;
0689 req_count = 1;
0690 do_forced_ra = true;
0691 }
0692
0693
0694 if (do_forced_ra) {
0695 force_page_cache_ra(ractl, req_count);
0696 return;
0697 }
0698
0699 ondemand_readahead(ractl, NULL, req_count);
0700 }
0701 EXPORT_SYMBOL_GPL(page_cache_sync_ra);
0702
0703 void page_cache_async_ra(struct readahead_control *ractl,
0704 struct folio *folio, unsigned long req_count)
0705 {
0706
0707 if (!ractl->ra->ra_pages)
0708 return;
0709
0710
0711
0712
0713 if (folio_test_writeback(folio))
0714 return;
0715
0716 folio_clear_readahead(folio);
0717
0718 if (blk_cgroup_congested())
0719 return;
0720
0721 ondemand_readahead(ractl, folio, req_count);
0722 }
0723 EXPORT_SYMBOL_GPL(page_cache_async_ra);
0724
0725 ssize_t ksys_readahead(int fd, loff_t offset, size_t count)
0726 {
0727 ssize_t ret;
0728 struct fd f;
0729
0730 ret = -EBADF;
0731 f = fdget(fd);
0732 if (!f.file || !(f.file->f_mode & FMODE_READ))
0733 goto out;
0734
0735
0736
0737
0738
0739
0740 ret = -EINVAL;
0741 if (!f.file->f_mapping || !f.file->f_mapping->a_ops ||
0742 !S_ISREG(file_inode(f.file)->i_mode))
0743 goto out;
0744
0745 ret = vfs_fadvise(f.file, offset, count, POSIX_FADV_WILLNEED);
0746 out:
0747 fdput(f);
0748 return ret;
0749 }
0750
0751 SYSCALL_DEFINE3(readahead, int, fd, loff_t, offset, size_t, count)
0752 {
0753 return ksys_readahead(fd, offset, count);
0754 }
0755
0756 #if defined(CONFIG_COMPAT) && defined(__ARCH_WANT_COMPAT_READAHEAD)
0757 COMPAT_SYSCALL_DEFINE4(readahead, int, fd, compat_arg_u64_dual(offset), size_t, count)
0758 {
0759 return ksys_readahead(fd, compat_arg_u64_glue(offset), count);
0760 }
0761 #endif
0762
0763
0764
0765
0766
0767
0768
0769
0770
0771
0772
0773
0774
0775
0776
0777
0778
0779
0780
0781 void readahead_expand(struct readahead_control *ractl,
0782 loff_t new_start, size_t new_len)
0783 {
0784 struct address_space *mapping = ractl->mapping;
0785 struct file_ra_state *ra = ractl->ra;
0786 pgoff_t new_index, new_nr_pages;
0787 gfp_t gfp_mask = readahead_gfp_mask(mapping);
0788
0789 new_index = new_start / PAGE_SIZE;
0790
0791
0792 while (ractl->_index > new_index) {
0793 unsigned long index = ractl->_index - 1;
0794 struct page *page = xa_load(&mapping->i_pages, index);
0795
0796 if (page && !xa_is_value(page))
0797 return;
0798
0799 page = __page_cache_alloc(gfp_mask);
0800 if (!page)
0801 return;
0802 if (add_to_page_cache_lru(page, mapping, index, gfp_mask) < 0) {
0803 put_page(page);
0804 return;
0805 }
0806
0807 ractl->_nr_pages++;
0808 ractl->_index = page->index;
0809 }
0810
0811 new_len += new_start - readahead_pos(ractl);
0812 new_nr_pages = DIV_ROUND_UP(new_len, PAGE_SIZE);
0813
0814
0815 while (ractl->_nr_pages < new_nr_pages) {
0816 unsigned long index = ractl->_index + ractl->_nr_pages;
0817 struct page *page = xa_load(&mapping->i_pages, index);
0818
0819 if (page && !xa_is_value(page))
0820 return;
0821
0822 page = __page_cache_alloc(gfp_mask);
0823 if (!page)
0824 return;
0825 if (add_to_page_cache_lru(page, mapping, index, gfp_mask) < 0) {
0826 put_page(page);
0827 return;
0828 }
0829 ractl->_nr_pages++;
0830 if (ra) {
0831 ra->size++;
0832 ra->async_size++;
0833 }
0834 }
0835 }
0836 EXPORT_SYMBOL(readahead_expand);