0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013 #include <linux/export.h>
0014 #include <linux/compiler.h>
0015 #include <linux/dax.h>
0016 #include <linux/fs.h>
0017 #include <linux/sched/signal.h>
0018 #include <linux/uaccess.h>
0019 #include <linux/capability.h>
0020 #include <linux/kernel_stat.h>
0021 #include <linux/gfp.h>
0022 #include <linux/mm.h>
0023 #include <linux/swap.h>
0024 #include <linux/swapops.h>
0025 #include <linux/mman.h>
0026 #include <linux/pagemap.h>
0027 #include <linux/file.h>
0028 #include <linux/uio.h>
0029 #include <linux/error-injection.h>
0030 #include <linux/hash.h>
0031 #include <linux/writeback.h>
0032 #include <linux/backing-dev.h>
0033 #include <linux/pagevec.h>
0034 #include <linux/security.h>
0035 #include <linux/cpuset.h>
0036 #include <linux/hugetlb.h>
0037 #include <linux/memcontrol.h>
0038 #include <linux/shmem_fs.h>
0039 #include <linux/rmap.h>
0040 #include <linux/delayacct.h>
0041 #include <linux/psi.h>
0042 #include <linux/ramfs.h>
0043 #include <linux/page_idle.h>
0044 #include <linux/migrate.h>
0045 #include <asm/pgalloc.h>
0046 #include <asm/tlbflush.h>
0047 #include "internal.h"
0048
0049 #define CREATE_TRACE_POINTS
0050 #include <trace/events/filemap.h>
0051
0052
0053
0054
0055 #include <linux/buffer_head.h> /* for try_to_free_buffers */
0056
0057 #include <asm/mman.h>
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068
0069
0070
0071
0072
0073
0074
0075
0076
0077
0078
0079
0080
0081
0082
0083
0084
0085
0086
0087
0088
0089
0090
0091
0092
0093
0094
0095
0096
0097
0098
0099
0100
0101
0102
0103
0104
0105
0106
0107
0108
0109
0110
0111
0112
0113
0114
0115
0116
0117
0118
0119
0120
0121
0122
0123
0124 static void page_cache_delete(struct address_space *mapping,
0125 struct folio *folio, void *shadow)
0126 {
0127 XA_STATE(xas, &mapping->i_pages, folio->index);
0128 long nr = 1;
0129
0130 mapping_set_update(&xas, mapping);
0131
0132
0133 if (!folio_test_hugetlb(folio)) {
0134 xas_set_order(&xas, folio->index, folio_order(folio));
0135 nr = folio_nr_pages(folio);
0136 }
0137
0138 VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
0139
0140 xas_store(&xas, shadow);
0141 xas_init_marks(&xas);
0142
0143 folio->mapping = NULL;
0144
0145 mapping->nrpages -= nr;
0146 }
0147
0148 static void filemap_unaccount_folio(struct address_space *mapping,
0149 struct folio *folio)
0150 {
0151 long nr;
0152
0153 VM_BUG_ON_FOLIO(folio_mapped(folio), folio);
0154 if (!IS_ENABLED(CONFIG_DEBUG_VM) && unlikely(folio_mapped(folio))) {
0155 pr_alert("BUG: Bad page cache in process %s pfn:%05lx\n",
0156 current->comm, folio_pfn(folio));
0157 dump_page(&folio->page, "still mapped when deleted");
0158 dump_stack();
0159 add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
0160
0161 if (mapping_exiting(mapping) && !folio_test_large(folio)) {
0162 int mapcount = page_mapcount(&folio->page);
0163
0164 if (folio_ref_count(folio) >= mapcount + 2) {
0165
0166
0167
0168
0169
0170
0171 page_mapcount_reset(&folio->page);
0172 folio_ref_sub(folio, mapcount);
0173 }
0174 }
0175 }
0176
0177
0178 if (folio_test_hugetlb(folio))
0179 return;
0180
0181 nr = folio_nr_pages(folio);
0182
0183 __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, -nr);
0184 if (folio_test_swapbacked(folio)) {
0185 __lruvec_stat_mod_folio(folio, NR_SHMEM, -nr);
0186 if (folio_test_pmd_mappable(folio))
0187 __lruvec_stat_mod_folio(folio, NR_SHMEM_THPS, -nr);
0188 } else if (folio_test_pmd_mappable(folio)) {
0189 __lruvec_stat_mod_folio(folio, NR_FILE_THPS, -nr);
0190 filemap_nr_thps_dec(mapping);
0191 }
0192
0193
0194
0195
0196
0197
0198
0199
0200
0201
0202
0203
0204
0205
0206
0207 if (WARN_ON_ONCE(folio_test_dirty(folio) &&
0208 mapping_can_writeback(mapping)))
0209 folio_account_cleaned(folio, inode_to_wb(mapping->host));
0210 }
0211
0212
0213
0214
0215
0216
0217 void __filemap_remove_folio(struct folio *folio, void *shadow)
0218 {
0219 struct address_space *mapping = folio->mapping;
0220
0221 trace_mm_filemap_delete_from_page_cache(folio);
0222 filemap_unaccount_folio(mapping, folio);
0223 page_cache_delete(mapping, folio, shadow);
0224 }
0225
0226 void filemap_free_folio(struct address_space *mapping, struct folio *folio)
0227 {
0228 void (*free_folio)(struct folio *);
0229 int refs = 1;
0230
0231 free_folio = mapping->a_ops->free_folio;
0232 if (free_folio)
0233 free_folio(folio);
0234
0235 if (folio_test_large(folio) && !folio_test_hugetlb(folio))
0236 refs = folio_nr_pages(folio);
0237 folio_put_refs(folio, refs);
0238 }
0239
0240
0241
0242
0243
0244
0245
0246
0247
0248 void filemap_remove_folio(struct folio *folio)
0249 {
0250 struct address_space *mapping = folio->mapping;
0251
0252 BUG_ON(!folio_test_locked(folio));
0253 spin_lock(&mapping->host->i_lock);
0254 xa_lock_irq(&mapping->i_pages);
0255 __filemap_remove_folio(folio, NULL);
0256 xa_unlock_irq(&mapping->i_pages);
0257 if (mapping_shrinkable(mapping))
0258 inode_add_lru(mapping->host);
0259 spin_unlock(&mapping->host->i_lock);
0260
0261 filemap_free_folio(mapping, folio);
0262 }
0263
0264
0265
0266
0267
0268
0269
0270
0271
0272
0273
0274
0275
0276
0277 static void page_cache_delete_batch(struct address_space *mapping,
0278 struct folio_batch *fbatch)
0279 {
0280 XA_STATE(xas, &mapping->i_pages, fbatch->folios[0]->index);
0281 long total_pages = 0;
0282 int i = 0;
0283 struct folio *folio;
0284
0285 mapping_set_update(&xas, mapping);
0286 xas_for_each(&xas, folio, ULONG_MAX) {
0287 if (i >= folio_batch_count(fbatch))
0288 break;
0289
0290
0291 if (xa_is_value(folio))
0292 continue;
0293
0294
0295
0296
0297
0298
0299
0300 if (folio != fbatch->folios[i]) {
0301 VM_BUG_ON_FOLIO(folio->index >
0302 fbatch->folios[i]->index, folio);
0303 continue;
0304 }
0305
0306 WARN_ON_ONCE(!folio_test_locked(folio));
0307
0308 folio->mapping = NULL;
0309
0310
0311 i++;
0312 xas_store(&xas, NULL);
0313 total_pages += folio_nr_pages(folio);
0314 }
0315 mapping->nrpages -= total_pages;
0316 }
0317
0318 void delete_from_page_cache_batch(struct address_space *mapping,
0319 struct folio_batch *fbatch)
0320 {
0321 int i;
0322
0323 if (!folio_batch_count(fbatch))
0324 return;
0325
0326 spin_lock(&mapping->host->i_lock);
0327 xa_lock_irq(&mapping->i_pages);
0328 for (i = 0; i < folio_batch_count(fbatch); i++) {
0329 struct folio *folio = fbatch->folios[i];
0330
0331 trace_mm_filemap_delete_from_page_cache(folio);
0332 filemap_unaccount_folio(mapping, folio);
0333 }
0334 page_cache_delete_batch(mapping, fbatch);
0335 xa_unlock_irq(&mapping->i_pages);
0336 if (mapping_shrinkable(mapping))
0337 inode_add_lru(mapping->host);
0338 spin_unlock(&mapping->host->i_lock);
0339
0340 for (i = 0; i < folio_batch_count(fbatch); i++)
0341 filemap_free_folio(mapping, fbatch->folios[i]);
0342 }
0343
0344 int filemap_check_errors(struct address_space *mapping)
0345 {
0346 int ret = 0;
0347
0348 if (test_bit(AS_ENOSPC, &mapping->flags) &&
0349 test_and_clear_bit(AS_ENOSPC, &mapping->flags))
0350 ret = -ENOSPC;
0351 if (test_bit(AS_EIO, &mapping->flags) &&
0352 test_and_clear_bit(AS_EIO, &mapping->flags))
0353 ret = -EIO;
0354 return ret;
0355 }
0356 EXPORT_SYMBOL(filemap_check_errors);
0357
0358 static int filemap_check_and_keep_errors(struct address_space *mapping)
0359 {
0360
0361 if (test_bit(AS_EIO, &mapping->flags))
0362 return -EIO;
0363 if (test_bit(AS_ENOSPC, &mapping->flags))
0364 return -ENOSPC;
0365 return 0;
0366 }
0367
0368
0369
0370
0371
0372
0373
0374
0375
0376
0377
0378 int filemap_fdatawrite_wbc(struct address_space *mapping,
0379 struct writeback_control *wbc)
0380 {
0381 int ret;
0382
0383 if (!mapping_can_writeback(mapping) ||
0384 !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
0385 return 0;
0386
0387 wbc_attach_fdatawrite_inode(wbc, mapping->host);
0388 ret = do_writepages(mapping, wbc);
0389 wbc_detach_inode(wbc);
0390 return ret;
0391 }
0392 EXPORT_SYMBOL(filemap_fdatawrite_wbc);
0393
0394
0395
0396
0397
0398
0399
0400
0401
0402
0403
0404
0405
0406
0407
0408
0409
0410
0411 int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
0412 loff_t end, int sync_mode)
0413 {
0414 struct writeback_control wbc = {
0415 .sync_mode = sync_mode,
0416 .nr_to_write = LONG_MAX,
0417 .range_start = start,
0418 .range_end = end,
0419 };
0420
0421 return filemap_fdatawrite_wbc(mapping, &wbc);
0422 }
0423
0424 static inline int __filemap_fdatawrite(struct address_space *mapping,
0425 int sync_mode)
0426 {
0427 return __filemap_fdatawrite_range(mapping, 0, LLONG_MAX, sync_mode);
0428 }
0429
0430 int filemap_fdatawrite(struct address_space *mapping)
0431 {
0432 return __filemap_fdatawrite(mapping, WB_SYNC_ALL);
0433 }
0434 EXPORT_SYMBOL(filemap_fdatawrite);
0435
0436 int filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
0437 loff_t end)
0438 {
0439 return __filemap_fdatawrite_range(mapping, start, end, WB_SYNC_ALL);
0440 }
0441 EXPORT_SYMBOL(filemap_fdatawrite_range);
0442
0443
0444
0445
0446
0447
0448
0449
0450
0451
0452 int filemap_flush(struct address_space *mapping)
0453 {
0454 return __filemap_fdatawrite(mapping, WB_SYNC_NONE);
0455 }
0456 EXPORT_SYMBOL(filemap_flush);
0457
0458
0459
0460
0461
0462
0463
0464
0465
0466
0467
0468
0469
0470 bool filemap_range_has_page(struct address_space *mapping,
0471 loff_t start_byte, loff_t end_byte)
0472 {
0473 struct page *page;
0474 XA_STATE(xas, &mapping->i_pages, start_byte >> PAGE_SHIFT);
0475 pgoff_t max = end_byte >> PAGE_SHIFT;
0476
0477 if (end_byte < start_byte)
0478 return false;
0479
0480 rcu_read_lock();
0481 for (;;) {
0482 page = xas_find(&xas, max);
0483 if (xas_retry(&xas, page))
0484 continue;
0485
0486 if (xa_is_value(page))
0487 continue;
0488
0489
0490
0491
0492
0493 break;
0494 }
0495 rcu_read_unlock();
0496
0497 return page != NULL;
0498 }
0499 EXPORT_SYMBOL(filemap_range_has_page);
0500
0501 static void __filemap_fdatawait_range(struct address_space *mapping,
0502 loff_t start_byte, loff_t end_byte)
0503 {
0504 pgoff_t index = start_byte >> PAGE_SHIFT;
0505 pgoff_t end = end_byte >> PAGE_SHIFT;
0506 struct pagevec pvec;
0507 int nr_pages;
0508
0509 if (end_byte < start_byte)
0510 return;
0511
0512 pagevec_init(&pvec);
0513 while (index <= end) {
0514 unsigned i;
0515
0516 nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index,
0517 end, PAGECACHE_TAG_WRITEBACK);
0518 if (!nr_pages)
0519 break;
0520
0521 for (i = 0; i < nr_pages; i++) {
0522 struct page *page = pvec.pages[i];
0523
0524 wait_on_page_writeback(page);
0525 ClearPageError(page);
0526 }
0527 pagevec_release(&pvec);
0528 cond_resched();
0529 }
0530 }
0531
0532
0533
0534
0535
0536
0537
0538
0539
0540
0541
0542
0543
0544
0545
0546
0547
0548 int filemap_fdatawait_range(struct address_space *mapping, loff_t start_byte,
0549 loff_t end_byte)
0550 {
0551 __filemap_fdatawait_range(mapping, start_byte, end_byte);
0552 return filemap_check_errors(mapping);
0553 }
0554 EXPORT_SYMBOL(filemap_fdatawait_range);
0555
0556
0557
0558
0559
0560
0561
0562
0563
0564
0565
0566
0567
0568
0569
0570 int filemap_fdatawait_range_keep_errors(struct address_space *mapping,
0571 loff_t start_byte, loff_t end_byte)
0572 {
0573 __filemap_fdatawait_range(mapping, start_byte, end_byte);
0574 return filemap_check_and_keep_errors(mapping);
0575 }
0576 EXPORT_SYMBOL(filemap_fdatawait_range_keep_errors);
0577
0578
0579
0580
0581
0582
0583
0584
0585
0586
0587
0588
0589
0590
0591
0592
0593
0594 int file_fdatawait_range(struct file *file, loff_t start_byte, loff_t end_byte)
0595 {
0596 struct address_space *mapping = file->f_mapping;
0597
0598 __filemap_fdatawait_range(mapping, start_byte, end_byte);
0599 return file_check_and_advance_wb_err(file);
0600 }
0601 EXPORT_SYMBOL(file_fdatawait_range);
0602
0603
0604
0605
0606
0607
0608
0609
0610
0611
0612
0613
0614
0615
0616
0617 int filemap_fdatawait_keep_errors(struct address_space *mapping)
0618 {
0619 __filemap_fdatawait_range(mapping, 0, LLONG_MAX);
0620 return filemap_check_and_keep_errors(mapping);
0621 }
0622 EXPORT_SYMBOL(filemap_fdatawait_keep_errors);
0623
0624
0625 static bool mapping_needs_writeback(struct address_space *mapping)
0626 {
0627 return mapping->nrpages;
0628 }
0629
0630 bool filemap_range_has_writeback(struct address_space *mapping,
0631 loff_t start_byte, loff_t end_byte)
0632 {
0633 XA_STATE(xas, &mapping->i_pages, start_byte >> PAGE_SHIFT);
0634 pgoff_t max = end_byte >> PAGE_SHIFT;
0635 struct page *page;
0636
0637 if (end_byte < start_byte)
0638 return false;
0639
0640 rcu_read_lock();
0641 xas_for_each(&xas, page, max) {
0642 if (xas_retry(&xas, page))
0643 continue;
0644 if (xa_is_value(page))
0645 continue;
0646 if (PageDirty(page) || PageLocked(page) || PageWriteback(page))
0647 break;
0648 }
0649 rcu_read_unlock();
0650 return page != NULL;
0651 }
0652 EXPORT_SYMBOL_GPL(filemap_range_has_writeback);
0653
0654
0655
0656
0657
0658
0659
0660
0661
0662
0663
0664
0665
0666
0667 int filemap_write_and_wait_range(struct address_space *mapping,
0668 loff_t lstart, loff_t lend)
0669 {
0670 int err = 0, err2;
0671
0672 if (mapping_needs_writeback(mapping)) {
0673 err = __filemap_fdatawrite_range(mapping, lstart, lend,
0674 WB_SYNC_ALL);
0675
0676
0677
0678
0679
0680
0681 if (err != -EIO)
0682 __filemap_fdatawait_range(mapping, lstart, lend);
0683 }
0684 err2 = filemap_check_errors(mapping);
0685 if (!err)
0686 err = err2;
0687 return err;
0688 }
0689 EXPORT_SYMBOL(filemap_write_and_wait_range);
0690
0691 void __filemap_set_wb_err(struct address_space *mapping, int err)
0692 {
0693 errseq_t eseq = errseq_set(&mapping->wb_err, err);
0694
0695 trace_filemap_set_wb_err(mapping, eseq);
0696 }
0697 EXPORT_SYMBOL(__filemap_set_wb_err);
0698
0699
0700
0701
0702
0703
0704
0705
0706
0707
0708
0709
0710
0711
0712
0713
0714
0715
0716
0717
0718
0719
0720
0721
0722
0723 int file_check_and_advance_wb_err(struct file *file)
0724 {
0725 int err = 0;
0726 errseq_t old = READ_ONCE(file->f_wb_err);
0727 struct address_space *mapping = file->f_mapping;
0728
0729
0730 if (errseq_check(&mapping->wb_err, old)) {
0731
0732 spin_lock(&file->f_lock);
0733 old = file->f_wb_err;
0734 err = errseq_check_and_advance(&mapping->wb_err,
0735 &file->f_wb_err);
0736 trace_file_check_and_advance_wb_err(file, old);
0737 spin_unlock(&file->f_lock);
0738 }
0739
0740
0741
0742
0743
0744
0745 clear_bit(AS_EIO, &mapping->flags);
0746 clear_bit(AS_ENOSPC, &mapping->flags);
0747 return err;
0748 }
0749 EXPORT_SYMBOL(file_check_and_advance_wb_err);
0750
0751
0752
0753
0754
0755
0756
0757
0758
0759
0760
0761
0762
0763
0764
0765
0766
0767 int file_write_and_wait_range(struct file *file, loff_t lstart, loff_t lend)
0768 {
0769 int err = 0, err2;
0770 struct address_space *mapping = file->f_mapping;
0771
0772 if (mapping_needs_writeback(mapping)) {
0773 err = __filemap_fdatawrite_range(mapping, lstart, lend,
0774 WB_SYNC_ALL);
0775
0776 if (err != -EIO)
0777 __filemap_fdatawait_range(mapping, lstart, lend);
0778 }
0779 err2 = file_check_and_advance_wb_err(file);
0780 if (!err)
0781 err = err2;
0782 return err;
0783 }
0784 EXPORT_SYMBOL(file_write_and_wait_range);
0785
0786
0787
0788
0789
0790
0791
0792
0793
0794
0795
0796
0797
0798
0799 void replace_page_cache_page(struct page *old, struct page *new)
0800 {
0801 struct folio *fold = page_folio(old);
0802 struct folio *fnew = page_folio(new);
0803 struct address_space *mapping = old->mapping;
0804 void (*free_folio)(struct folio *) = mapping->a_ops->free_folio;
0805 pgoff_t offset = old->index;
0806 XA_STATE(xas, &mapping->i_pages, offset);
0807
0808 VM_BUG_ON_PAGE(!PageLocked(old), old);
0809 VM_BUG_ON_PAGE(!PageLocked(new), new);
0810 VM_BUG_ON_PAGE(new->mapping, new);
0811
0812 get_page(new);
0813 new->mapping = mapping;
0814 new->index = offset;
0815
0816 mem_cgroup_migrate(fold, fnew);
0817
0818 xas_lock_irq(&xas);
0819 xas_store(&xas, new);
0820
0821 old->mapping = NULL;
0822
0823 if (!PageHuge(old))
0824 __dec_lruvec_page_state(old, NR_FILE_PAGES);
0825 if (!PageHuge(new))
0826 __inc_lruvec_page_state(new, NR_FILE_PAGES);
0827 if (PageSwapBacked(old))
0828 __dec_lruvec_page_state(old, NR_SHMEM);
0829 if (PageSwapBacked(new))
0830 __inc_lruvec_page_state(new, NR_SHMEM);
0831 xas_unlock_irq(&xas);
0832 if (free_folio)
0833 free_folio(fold);
0834 folio_put(fold);
0835 }
0836 EXPORT_SYMBOL_GPL(replace_page_cache_page);
0837
0838 noinline int __filemap_add_folio(struct address_space *mapping,
0839 struct folio *folio, pgoff_t index, gfp_t gfp, void **shadowp)
0840 {
0841 XA_STATE(xas, &mapping->i_pages, index);
0842 int huge = folio_test_hugetlb(folio);
0843 bool charged = false;
0844 long nr = 1;
0845
0846 VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
0847 VM_BUG_ON_FOLIO(folio_test_swapbacked(folio), folio);
0848 mapping_set_update(&xas, mapping);
0849
0850 if (!huge) {
0851 int error = mem_cgroup_charge(folio, NULL, gfp);
0852 VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio);
0853 if (error)
0854 return error;
0855 charged = true;
0856 xas_set_order(&xas, index, folio_order(folio));
0857 nr = folio_nr_pages(folio);
0858 }
0859
0860 gfp &= GFP_RECLAIM_MASK;
0861 folio_ref_add(folio, nr);
0862 folio->mapping = mapping;
0863 folio->index = xas.xa_index;
0864
0865 do {
0866 unsigned int order = xa_get_order(xas.xa, xas.xa_index);
0867 void *entry, *old = NULL;
0868
0869 if (order > folio_order(folio))
0870 xas_split_alloc(&xas, xa_load(xas.xa, xas.xa_index),
0871 order, gfp);
0872 xas_lock_irq(&xas);
0873 xas_for_each_conflict(&xas, entry) {
0874 old = entry;
0875 if (!xa_is_value(entry)) {
0876 xas_set_err(&xas, -EEXIST);
0877 goto unlock;
0878 }
0879 }
0880
0881 if (old) {
0882 if (shadowp)
0883 *shadowp = old;
0884
0885 order = xa_get_order(xas.xa, xas.xa_index);
0886 if (order > folio_order(folio)) {
0887
0888 BUG_ON(shmem_mapping(mapping));
0889 xas_split(&xas, old, order);
0890 xas_reset(&xas);
0891 }
0892 }
0893
0894 xas_store(&xas, folio);
0895 if (xas_error(&xas))
0896 goto unlock;
0897
0898 mapping->nrpages += nr;
0899
0900
0901 if (!huge) {
0902 __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, nr);
0903 if (folio_test_pmd_mappable(folio))
0904 __lruvec_stat_mod_folio(folio,
0905 NR_FILE_THPS, nr);
0906 }
0907 unlock:
0908 xas_unlock_irq(&xas);
0909 } while (xas_nomem(&xas, gfp));
0910
0911 if (xas_error(&xas))
0912 goto error;
0913
0914 trace_mm_filemap_add_to_page_cache(folio);
0915 return 0;
0916 error:
0917 if (charged)
0918 mem_cgroup_uncharge(folio);
0919 folio->mapping = NULL;
0920
0921 folio_put_refs(folio, nr);
0922 return xas_error(&xas);
0923 }
0924 ALLOW_ERROR_INJECTION(__filemap_add_folio, ERRNO);
0925
0926 int filemap_add_folio(struct address_space *mapping, struct folio *folio,
0927 pgoff_t index, gfp_t gfp)
0928 {
0929 void *shadow = NULL;
0930 int ret;
0931
0932 __folio_set_locked(folio);
0933 ret = __filemap_add_folio(mapping, folio, index, gfp, &shadow);
0934 if (unlikely(ret))
0935 __folio_clear_locked(folio);
0936 else {
0937
0938
0939
0940
0941
0942
0943
0944
0945 WARN_ON_ONCE(folio_test_active(folio));
0946 if (!(gfp & __GFP_WRITE) && shadow)
0947 workingset_refault(folio, shadow);
0948 folio_add_lru(folio);
0949 }
0950 return ret;
0951 }
0952 EXPORT_SYMBOL_GPL(filemap_add_folio);
0953
0954 #ifdef CONFIG_NUMA
0955 struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order)
0956 {
0957 int n;
0958 struct folio *folio;
0959
0960 if (cpuset_do_page_mem_spread()) {
0961 unsigned int cpuset_mems_cookie;
0962 do {
0963 cpuset_mems_cookie = read_mems_allowed_begin();
0964 n = cpuset_mem_spread_node();
0965 folio = __folio_alloc_node(gfp, order, n);
0966 } while (!folio && read_mems_allowed_retry(cpuset_mems_cookie));
0967
0968 return folio;
0969 }
0970 return folio_alloc(gfp, order);
0971 }
0972 EXPORT_SYMBOL(filemap_alloc_folio);
0973 #endif
0974
0975
0976
0977
0978
0979
0980
0981
0982
0983 void filemap_invalidate_lock_two(struct address_space *mapping1,
0984 struct address_space *mapping2)
0985 {
0986 if (mapping1 > mapping2)
0987 swap(mapping1, mapping2);
0988 if (mapping1)
0989 down_write(&mapping1->invalidate_lock);
0990 if (mapping2 && mapping1 != mapping2)
0991 down_write_nested(&mapping2->invalidate_lock, 1);
0992 }
0993 EXPORT_SYMBOL(filemap_invalidate_lock_two);
0994
0995
0996
0997
0998
0999
1000
1001
1002
1003 void filemap_invalidate_unlock_two(struct address_space *mapping1,
1004 struct address_space *mapping2)
1005 {
1006 if (mapping1)
1007 up_write(&mapping1->invalidate_lock);
1008 if (mapping2 && mapping1 != mapping2)
1009 up_write(&mapping2->invalidate_lock);
1010 }
1011 EXPORT_SYMBOL(filemap_invalidate_unlock_two);
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023 #define PAGE_WAIT_TABLE_BITS 8
1024 #define PAGE_WAIT_TABLE_SIZE (1 << PAGE_WAIT_TABLE_BITS)
1025 static wait_queue_head_t folio_wait_table[PAGE_WAIT_TABLE_SIZE] __cacheline_aligned;
1026
1027 static wait_queue_head_t *folio_waitqueue(struct folio *folio)
1028 {
1029 return &folio_wait_table[hash_ptr(folio, PAGE_WAIT_TABLE_BITS)];
1030 }
1031
1032 void __init pagecache_init(void)
1033 {
1034 int i;
1035
1036 for (i = 0; i < PAGE_WAIT_TABLE_SIZE; i++)
1037 init_waitqueue_head(&folio_wait_table[i]);
1038
1039 page_writeback_init();
1040 }
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076 static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *arg)
1077 {
1078 unsigned int flags;
1079 struct wait_page_key *key = arg;
1080 struct wait_page_queue *wait_page
1081 = container_of(wait, struct wait_page_queue, wait);
1082
1083 if (!wake_page_match(wait_page, key))
1084 return 0;
1085
1086
1087
1088
1089
1090 flags = wait->flags;
1091 if (flags & WQ_FLAG_EXCLUSIVE) {
1092 if (test_bit(key->bit_nr, &key->folio->flags))
1093 return -1;
1094 if (flags & WQ_FLAG_CUSTOM) {
1095 if (test_and_set_bit(key->bit_nr, &key->folio->flags))
1096 return -1;
1097 flags |= WQ_FLAG_DONE;
1098 }
1099 }
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110 smp_store_release(&wait->flags, flags | WQ_FLAG_WOKEN);
1111 wake_up_state(wait->private, mode);
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123 list_del_init_careful(&wait->entry);
1124 return (flags & WQ_FLAG_EXCLUSIVE) != 0;
1125 }
1126
1127 static void folio_wake_bit(struct folio *folio, int bit_nr)
1128 {
1129 wait_queue_head_t *q = folio_waitqueue(folio);
1130 struct wait_page_key key;
1131 unsigned long flags;
1132 wait_queue_entry_t bookmark;
1133
1134 key.folio = folio;
1135 key.bit_nr = bit_nr;
1136 key.page_match = 0;
1137
1138 bookmark.flags = 0;
1139 bookmark.private = NULL;
1140 bookmark.func = NULL;
1141 INIT_LIST_HEAD(&bookmark.entry);
1142
1143 spin_lock_irqsave(&q->lock, flags);
1144 __wake_up_locked_key_bookmark(q, TASK_NORMAL, &key, &bookmark);
1145
1146 while (bookmark.flags & WQ_FLAG_BOOKMARK) {
1147
1148
1149
1150
1151
1152
1153 spin_unlock_irqrestore(&q->lock, flags);
1154 cpu_relax();
1155 spin_lock_irqsave(&q->lock, flags);
1156 __wake_up_locked_key_bookmark(q, TASK_NORMAL, &key, &bookmark);
1157 }
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168 if (!waitqueue_active(q) || !key.page_match)
1169 folio_clear_waiters(folio);
1170
1171 spin_unlock_irqrestore(&q->lock, flags);
1172 }
1173
1174 static void folio_wake(struct folio *folio, int bit)
1175 {
1176 if (!folio_test_waiters(folio))
1177 return;
1178 folio_wake_bit(folio, bit);
1179 }
1180
1181
1182
1183
1184 enum behavior {
1185 EXCLUSIVE,
1186
1187
1188 SHARED,
1189
1190
1191 DROP,
1192
1193
1194 };
1195
1196
1197
1198
1199
1200 static inline bool folio_trylock_flag(struct folio *folio, int bit_nr,
1201 struct wait_queue_entry *wait)
1202 {
1203 if (wait->flags & WQ_FLAG_EXCLUSIVE) {
1204 if (test_and_set_bit(bit_nr, &folio->flags))
1205 return false;
1206 } else if (test_bit(bit_nr, &folio->flags))
1207 return false;
1208
1209 wait->flags |= WQ_FLAG_WOKEN | WQ_FLAG_DONE;
1210 return true;
1211 }
1212
1213
1214 int sysctl_page_lock_unfairness = 5;
1215
1216 static inline int folio_wait_bit_common(struct folio *folio, int bit_nr,
1217 int state, enum behavior behavior)
1218 {
1219 wait_queue_head_t *q = folio_waitqueue(folio);
1220 int unfairness = sysctl_page_lock_unfairness;
1221 struct wait_page_queue wait_page;
1222 wait_queue_entry_t *wait = &wait_page.wait;
1223 bool thrashing = false;
1224 bool delayacct = false;
1225 unsigned long pflags;
1226
1227 if (bit_nr == PG_locked &&
1228 !folio_test_uptodate(folio) && folio_test_workingset(folio)) {
1229 if (!folio_test_swapbacked(folio)) {
1230 delayacct_thrashing_start();
1231 delayacct = true;
1232 }
1233 psi_memstall_enter(&pflags);
1234 thrashing = true;
1235 }
1236
1237 init_wait(wait);
1238 wait->func = wake_page_function;
1239 wait_page.folio = folio;
1240 wait_page.bit_nr = bit_nr;
1241
1242 repeat:
1243 wait->flags = 0;
1244 if (behavior == EXCLUSIVE) {
1245 wait->flags = WQ_FLAG_EXCLUSIVE;
1246 if (--unfairness < 0)
1247 wait->flags |= WQ_FLAG_CUSTOM;
1248 }
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264 spin_lock_irq(&q->lock);
1265 folio_set_waiters(folio);
1266 if (!folio_trylock_flag(folio, bit_nr, wait))
1267 __add_wait_queue_entry_tail(q, wait);
1268 spin_unlock_irq(&q->lock);
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278 if (behavior == DROP)
1279 folio_put(folio);
1280
1281
1282
1283
1284
1285
1286
1287 for (;;) {
1288 unsigned int flags;
1289
1290 set_current_state(state);
1291
1292
1293 flags = smp_load_acquire(&wait->flags);
1294 if (!(flags & WQ_FLAG_WOKEN)) {
1295 if (signal_pending_state(state, current))
1296 break;
1297
1298 io_schedule();
1299 continue;
1300 }
1301
1302
1303 if (behavior != EXCLUSIVE)
1304 break;
1305
1306
1307 if (flags & WQ_FLAG_DONE)
1308 break;
1309
1310
1311
1312
1313
1314
1315
1316 if (unlikely(test_and_set_bit(bit_nr, folio_flags(folio, 0))))
1317 goto repeat;
1318
1319 wait->flags |= WQ_FLAG_DONE;
1320 break;
1321 }
1322
1323
1324
1325
1326
1327
1328
1329 finish_wait(q, wait);
1330
1331 if (thrashing) {
1332 if (delayacct)
1333 delayacct_thrashing_end();
1334 psi_memstall_leave(&pflags);
1335 }
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350 if (behavior == EXCLUSIVE)
1351 return wait->flags & WQ_FLAG_DONE ? 0 : -EINTR;
1352
1353 return wait->flags & WQ_FLAG_WOKEN ? 0 : -EINTR;
1354 }
1355
1356 #ifdef CONFIG_MIGRATION
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375 void migration_entry_wait_on_locked(swp_entry_t entry, pte_t *ptep,
1376 spinlock_t *ptl)
1377 {
1378 struct wait_page_queue wait_page;
1379 wait_queue_entry_t *wait = &wait_page.wait;
1380 bool thrashing = false;
1381 bool delayacct = false;
1382 unsigned long pflags;
1383 wait_queue_head_t *q;
1384 struct folio *folio = page_folio(pfn_swap_entry_to_page(entry));
1385
1386 q = folio_waitqueue(folio);
1387 if (!folio_test_uptodate(folio) && folio_test_workingset(folio)) {
1388 if (!folio_test_swapbacked(folio)) {
1389 delayacct_thrashing_start();
1390 delayacct = true;
1391 }
1392 psi_memstall_enter(&pflags);
1393 thrashing = true;
1394 }
1395
1396 init_wait(wait);
1397 wait->func = wake_page_function;
1398 wait_page.folio = folio;
1399 wait_page.bit_nr = PG_locked;
1400 wait->flags = 0;
1401
1402 spin_lock_irq(&q->lock);
1403 folio_set_waiters(folio);
1404 if (!folio_trylock_flag(folio, PG_locked, wait))
1405 __add_wait_queue_entry_tail(q, wait);
1406 spin_unlock_irq(&q->lock);
1407
1408
1409
1410
1411
1412
1413 if (ptep)
1414 pte_unmap_unlock(ptep, ptl);
1415 else
1416 spin_unlock(ptl);
1417
1418 for (;;) {
1419 unsigned int flags;
1420
1421 set_current_state(TASK_UNINTERRUPTIBLE);
1422
1423
1424 flags = smp_load_acquire(&wait->flags);
1425 if (!(flags & WQ_FLAG_WOKEN)) {
1426 if (signal_pending_state(TASK_UNINTERRUPTIBLE, current))
1427 break;
1428
1429 io_schedule();
1430 continue;
1431 }
1432 break;
1433 }
1434
1435 finish_wait(q, wait);
1436
1437 if (thrashing) {
1438 if (delayacct)
1439 delayacct_thrashing_end();
1440 psi_memstall_leave(&pflags);
1441 }
1442 }
1443 #endif
1444
1445 void folio_wait_bit(struct folio *folio, int bit_nr)
1446 {
1447 folio_wait_bit_common(folio, bit_nr, TASK_UNINTERRUPTIBLE, SHARED);
1448 }
1449 EXPORT_SYMBOL(folio_wait_bit);
1450
1451 int folio_wait_bit_killable(struct folio *folio, int bit_nr)
1452 {
1453 return folio_wait_bit_common(folio, bit_nr, TASK_KILLABLE, SHARED);
1454 }
1455 EXPORT_SYMBOL(folio_wait_bit_killable);
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470 int folio_put_wait_locked(struct folio *folio, int state)
1471 {
1472 return folio_wait_bit_common(folio, PG_locked, state, DROP);
1473 }
1474
1475
1476
1477
1478
1479
1480
1481
1482 void folio_add_wait_queue(struct folio *folio, wait_queue_entry_t *waiter)
1483 {
1484 wait_queue_head_t *q = folio_waitqueue(folio);
1485 unsigned long flags;
1486
1487 spin_lock_irqsave(&q->lock, flags);
1488 __add_wait_queue_entry_tail(q, waiter);
1489 folio_set_waiters(folio);
1490 spin_unlock_irqrestore(&q->lock, flags);
1491 }
1492 EXPORT_SYMBOL_GPL(folio_add_wait_queue);
1493
1494 #ifndef clear_bit_unlock_is_negative_byte
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508 static inline bool clear_bit_unlock_is_negative_byte(long nr, volatile void *mem)
1509 {
1510 clear_bit_unlock(nr, mem);
1511
1512 return test_bit(PG_waiters, mem);
1513 }
1514
1515 #endif
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526 void folio_unlock(struct folio *folio)
1527 {
1528
1529 BUILD_BUG_ON(PG_waiters != 7);
1530 BUILD_BUG_ON(PG_locked > 7);
1531 VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
1532 if (clear_bit_unlock_is_negative_byte(PG_locked, folio_flags(folio, 0)))
1533 folio_wake_bit(folio, PG_locked);
1534 }
1535 EXPORT_SYMBOL(folio_unlock);
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548 void folio_end_private_2(struct folio *folio)
1549 {
1550 VM_BUG_ON_FOLIO(!folio_test_private_2(folio), folio);
1551 clear_bit_unlock(PG_private_2, folio_flags(folio, 0));
1552 folio_wake_bit(folio, PG_private_2);
1553 folio_put(folio);
1554 }
1555 EXPORT_SYMBOL(folio_end_private_2);
1556
1557
1558
1559
1560
1561
1562
1563 void folio_wait_private_2(struct folio *folio)
1564 {
1565 while (folio_test_private_2(folio))
1566 folio_wait_bit(folio, PG_private_2);
1567 }
1568 EXPORT_SYMBOL(folio_wait_private_2);
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581 int folio_wait_private_2_killable(struct folio *folio)
1582 {
1583 int ret = 0;
1584
1585 while (folio_test_private_2(folio)) {
1586 ret = folio_wait_bit_killable(folio, PG_private_2);
1587 if (ret < 0)
1588 break;
1589 }
1590
1591 return ret;
1592 }
1593 EXPORT_SYMBOL(folio_wait_private_2_killable);
1594
1595
1596
1597
1598
1599 void folio_end_writeback(struct folio *folio)
1600 {
1601
1602
1603
1604
1605
1606
1607
1608 if (folio_test_reclaim(folio)) {
1609 folio_clear_reclaim(folio);
1610 folio_rotate_reclaimable(folio);
1611 }
1612
1613
1614
1615
1616
1617
1618
1619 folio_get(folio);
1620 if (!__folio_end_writeback(folio))
1621 BUG();
1622
1623 smp_mb__after_atomic();
1624 folio_wake(folio, PG_writeback);
1625 acct_reclaim_writeback(folio);
1626 folio_put(folio);
1627 }
1628 EXPORT_SYMBOL(folio_end_writeback);
1629
1630
1631
1632
1633
1634 void page_endio(struct page *page, bool is_write, int err)
1635 {
1636 if (!is_write) {
1637 if (!err) {
1638 SetPageUptodate(page);
1639 } else {
1640 ClearPageUptodate(page);
1641 SetPageError(page);
1642 }
1643 unlock_page(page);
1644 } else {
1645 if (err) {
1646 struct address_space *mapping;
1647
1648 SetPageError(page);
1649 mapping = page_mapping(page);
1650 if (mapping)
1651 mapping_set_error(mapping, err);
1652 }
1653 end_page_writeback(page);
1654 }
1655 }
1656 EXPORT_SYMBOL_GPL(page_endio);
1657
1658
1659
1660
1661
1662 void __folio_lock(struct folio *folio)
1663 {
1664 folio_wait_bit_common(folio, PG_locked, TASK_UNINTERRUPTIBLE,
1665 EXCLUSIVE);
1666 }
1667 EXPORT_SYMBOL(__folio_lock);
1668
1669 int __folio_lock_killable(struct folio *folio)
1670 {
1671 return folio_wait_bit_common(folio, PG_locked, TASK_KILLABLE,
1672 EXCLUSIVE);
1673 }
1674 EXPORT_SYMBOL_GPL(__folio_lock_killable);
1675
1676 static int __folio_lock_async(struct folio *folio, struct wait_page_queue *wait)
1677 {
1678 struct wait_queue_head *q = folio_waitqueue(folio);
1679 int ret = 0;
1680
1681 wait->folio = folio;
1682 wait->bit_nr = PG_locked;
1683
1684 spin_lock_irq(&q->lock);
1685 __add_wait_queue_entry_tail(q, &wait->wait);
1686 folio_set_waiters(folio);
1687 ret = !folio_trylock(folio);
1688
1689
1690
1691
1692
1693
1694 if (!ret)
1695 __remove_wait_queue(q, &wait->wait);
1696 else
1697 ret = -EIOCBQUEUED;
1698 spin_unlock_irq(&q->lock);
1699 return ret;
1700 }
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713 bool __folio_lock_or_retry(struct folio *folio, struct mm_struct *mm,
1714 unsigned int flags)
1715 {
1716 if (fault_flag_allow_retry_first(flags)) {
1717
1718
1719
1720
1721 if (flags & FAULT_FLAG_RETRY_NOWAIT)
1722 return false;
1723
1724 mmap_read_unlock(mm);
1725 if (flags & FAULT_FLAG_KILLABLE)
1726 folio_wait_locked_killable(folio);
1727 else
1728 folio_wait_locked(folio);
1729 return false;
1730 }
1731 if (flags & FAULT_FLAG_KILLABLE) {
1732 bool ret;
1733
1734 ret = __folio_lock_killable(folio);
1735 if (ret) {
1736 mmap_read_unlock(mm);
1737 return false;
1738 }
1739 } else {
1740 __folio_lock(folio);
1741 }
1742
1743 return true;
1744 }
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765 pgoff_t page_cache_next_miss(struct address_space *mapping,
1766 pgoff_t index, unsigned long max_scan)
1767 {
1768 XA_STATE(xas, &mapping->i_pages, index);
1769
1770 while (max_scan--) {
1771 void *entry = xas_next(&xas);
1772 if (!entry || xa_is_value(entry))
1773 break;
1774 if (xas.xa_index == 0)
1775 break;
1776 }
1777
1778 return xas.xa_index;
1779 }
1780 EXPORT_SYMBOL(page_cache_next_miss);
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801 pgoff_t page_cache_prev_miss(struct address_space *mapping,
1802 pgoff_t index, unsigned long max_scan)
1803 {
1804 XA_STATE(xas, &mapping->i_pages, index);
1805
1806 while (max_scan--) {
1807 void *entry = xas_prev(&xas);
1808 if (!entry || xa_is_value(entry))
1809 break;
1810 if (xas.xa_index == ULONG_MAX)
1811 break;
1812 }
1813
1814 return xas.xa_index;
1815 }
1816 EXPORT_SYMBOL(page_cache_prev_miss);
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850 static void *mapping_get_entry(struct address_space *mapping, pgoff_t index)
1851 {
1852 XA_STATE(xas, &mapping->i_pages, index);
1853 struct folio *folio;
1854
1855 rcu_read_lock();
1856 repeat:
1857 xas_reset(&xas);
1858 folio = xas_load(&xas);
1859 if (xas_retry(&xas, folio))
1860 goto repeat;
1861
1862
1863
1864
1865 if (!folio || xa_is_value(folio))
1866 goto out;
1867
1868 if (!folio_try_get_rcu(folio))
1869 goto repeat;
1870
1871 if (unlikely(folio != xas_reload(&xas))) {
1872 folio_put(folio);
1873 goto repeat;
1874 }
1875 out:
1876 rcu_read_unlock();
1877
1878 return folio;
1879 }
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914 struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
1915 int fgp_flags, gfp_t gfp)
1916 {
1917 struct folio *folio;
1918
1919 repeat:
1920 folio = mapping_get_entry(mapping, index);
1921 if (xa_is_value(folio)) {
1922 if (fgp_flags & FGP_ENTRY)
1923 return folio;
1924 folio = NULL;
1925 }
1926 if (!folio)
1927 goto no_page;
1928
1929 if (fgp_flags & FGP_LOCK) {
1930 if (fgp_flags & FGP_NOWAIT) {
1931 if (!folio_trylock(folio)) {
1932 folio_put(folio);
1933 return NULL;
1934 }
1935 } else {
1936 folio_lock(folio);
1937 }
1938
1939
1940 if (unlikely(folio->mapping != mapping)) {
1941 folio_unlock(folio);
1942 folio_put(folio);
1943 goto repeat;
1944 }
1945 VM_BUG_ON_FOLIO(!folio_contains(folio, index), folio);
1946 }
1947
1948 if (fgp_flags & FGP_ACCESSED)
1949 folio_mark_accessed(folio);
1950 else if (fgp_flags & FGP_WRITE) {
1951
1952 if (folio_test_idle(folio))
1953 folio_clear_idle(folio);
1954 }
1955
1956 if (fgp_flags & FGP_STABLE)
1957 folio_wait_stable(folio);
1958 no_page:
1959 if (!folio && (fgp_flags & FGP_CREAT)) {
1960 int err;
1961 if ((fgp_flags & FGP_WRITE) && mapping_can_writeback(mapping))
1962 gfp |= __GFP_WRITE;
1963 if (fgp_flags & FGP_NOFS)
1964 gfp &= ~__GFP_FS;
1965 if (fgp_flags & FGP_NOWAIT) {
1966 gfp &= ~GFP_KERNEL;
1967 gfp |= GFP_NOWAIT | __GFP_NOWARN;
1968 }
1969
1970 folio = filemap_alloc_folio(gfp, 0);
1971 if (!folio)
1972 return NULL;
1973
1974 if (WARN_ON_ONCE(!(fgp_flags & (FGP_LOCK | FGP_FOR_MMAP))))
1975 fgp_flags |= FGP_LOCK;
1976
1977
1978 if (fgp_flags & FGP_ACCESSED)
1979 __folio_set_referenced(folio);
1980
1981 err = filemap_add_folio(mapping, folio, index, gfp);
1982 if (unlikely(err)) {
1983 folio_put(folio);
1984 folio = NULL;
1985 if (err == -EEXIST)
1986 goto repeat;
1987 }
1988
1989
1990
1991
1992
1993 if (folio && (fgp_flags & FGP_FOR_MMAP))
1994 folio_unlock(folio);
1995 }
1996
1997 return folio;
1998 }
1999 EXPORT_SYMBOL(__filemap_get_folio);
2000
2001 static inline struct folio *find_get_entry(struct xa_state *xas, pgoff_t max,
2002 xa_mark_t mark)
2003 {
2004 struct folio *folio;
2005
2006 retry:
2007 if (mark == XA_PRESENT)
2008 folio = xas_find(xas, max);
2009 else
2010 folio = xas_find_marked(xas, max, mark);
2011
2012 if (xas_retry(xas, folio))
2013 goto retry;
2014
2015
2016
2017
2018
2019 if (!folio || xa_is_value(folio))
2020 return folio;
2021
2022 if (!folio_try_get_rcu(folio))
2023 goto reset;
2024
2025 if (unlikely(folio != xas_reload(xas))) {
2026 folio_put(folio);
2027 goto reset;
2028 }
2029
2030 return folio;
2031 reset:
2032 xas_reset(xas);
2033 goto retry;
2034 }
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056 unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
2057 pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices)
2058 {
2059 XA_STATE(xas, &mapping->i_pages, start);
2060 struct folio *folio;
2061
2062 rcu_read_lock();
2063 while ((folio = find_get_entry(&xas, end, XA_PRESENT)) != NULL) {
2064 indices[fbatch->nr] = xas.xa_index;
2065 if (!folio_batch_add(fbatch, folio))
2066 break;
2067 }
2068 rcu_read_unlock();
2069
2070 return folio_batch_count(fbatch);
2071 }
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093 unsigned find_lock_entries(struct address_space *mapping, pgoff_t start,
2094 pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices)
2095 {
2096 XA_STATE(xas, &mapping->i_pages, start);
2097 struct folio *folio;
2098
2099 rcu_read_lock();
2100 while ((folio = find_get_entry(&xas, end, XA_PRESENT))) {
2101 if (!xa_is_value(folio)) {
2102 if (folio->index < start)
2103 goto put;
2104 if (folio->index + folio_nr_pages(folio) - 1 > end)
2105 goto put;
2106 if (!folio_trylock(folio))
2107 goto put;
2108 if (folio->mapping != mapping ||
2109 folio_test_writeback(folio))
2110 goto unlock;
2111 VM_BUG_ON_FOLIO(!folio_contains(folio, xas.xa_index),
2112 folio);
2113 }
2114 indices[fbatch->nr] = xas.xa_index;
2115 if (!folio_batch_add(fbatch, folio))
2116 break;
2117 continue;
2118 unlock:
2119 folio_unlock(folio);
2120 put:
2121 folio_put(folio);
2122 }
2123 rcu_read_unlock();
2124
2125 return folio_batch_count(fbatch);
2126 }
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149 unsigned filemap_get_folios(struct address_space *mapping, pgoff_t *start,
2150 pgoff_t end, struct folio_batch *fbatch)
2151 {
2152 XA_STATE(xas, &mapping->i_pages, *start);
2153 struct folio *folio;
2154
2155 rcu_read_lock();
2156 while ((folio = find_get_entry(&xas, end, XA_PRESENT)) != NULL) {
2157
2158 if (xa_is_value(folio))
2159 continue;
2160 if (!folio_batch_add(fbatch, folio)) {
2161 unsigned long nr = folio_nr_pages(folio);
2162
2163 if (folio_test_hugetlb(folio))
2164 nr = 1;
2165 *start = folio->index + nr;
2166 goto out;
2167 }
2168 }
2169
2170
2171
2172
2173
2174
2175
2176 if (end == (pgoff_t)-1)
2177 *start = (pgoff_t)-1;
2178 else
2179 *start = end + 1;
2180 out:
2181 rcu_read_unlock();
2182
2183 return folio_batch_count(fbatch);
2184 }
2185 EXPORT_SYMBOL(filemap_get_folios);
2186
2187 static inline
2188 bool folio_more_pages(struct folio *folio, pgoff_t index, pgoff_t max)
2189 {
2190 if (!folio_test_large(folio) || folio_test_hugetlb(folio))
2191 return false;
2192 if (index >= max)
2193 return false;
2194 return index < folio->index + folio_nr_pages(folio) - 1;
2195 }
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210 unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
2211 unsigned int nr_pages, struct page **pages)
2212 {
2213 XA_STATE(xas, &mapping->i_pages, index);
2214 struct folio *folio;
2215 unsigned int ret = 0;
2216
2217 if (unlikely(!nr_pages))
2218 return 0;
2219
2220 rcu_read_lock();
2221 for (folio = xas_load(&xas); folio; folio = xas_next(&xas)) {
2222 if (xas_retry(&xas, folio))
2223 continue;
2224
2225
2226
2227
2228 if (xa_is_value(folio))
2229 break;
2230
2231 if (!folio_try_get_rcu(folio))
2232 goto retry;
2233
2234 if (unlikely(folio != xas_reload(&xas)))
2235 goto put_page;
2236
2237 again:
2238 pages[ret] = folio_file_page(folio, xas.xa_index);
2239 if (++ret == nr_pages)
2240 break;
2241 if (folio_more_pages(folio, xas.xa_index, ULONG_MAX)) {
2242 xas.xa_index++;
2243 folio_ref_inc(folio);
2244 goto again;
2245 }
2246 continue;
2247 put_page:
2248 folio_put(folio);
2249 retry:
2250 xas_reset(&xas);
2251 }
2252 rcu_read_unlock();
2253 return ret;
2254 }
2255 EXPORT_SYMBOL(find_get_pages_contig);
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272 unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
2273 pgoff_t end, xa_mark_t tag, unsigned int nr_pages,
2274 struct page **pages)
2275 {
2276 XA_STATE(xas, &mapping->i_pages, *index);
2277 struct folio *folio;
2278 unsigned ret = 0;
2279
2280 if (unlikely(!nr_pages))
2281 return 0;
2282
2283 rcu_read_lock();
2284 while ((folio = find_get_entry(&xas, end, tag))) {
2285
2286
2287
2288
2289
2290 if (xa_is_value(folio))
2291 continue;
2292
2293 pages[ret] = &folio->page;
2294 if (++ret == nr_pages) {
2295 *index = folio->index + folio_nr_pages(folio);
2296 goto out;
2297 }
2298 }
2299
2300
2301
2302
2303
2304
2305
2306 if (end == (pgoff_t)-1)
2307 *index = (pgoff_t)-1;
2308 else
2309 *index = end + 1;
2310 out:
2311 rcu_read_unlock();
2312
2313 return ret;
2314 }
2315 EXPORT_SYMBOL(find_get_pages_range_tag);
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332 static void shrink_readahead_size_eio(struct file_ra_state *ra)
2333 {
2334 ra->ra_pages /= 4;
2335 }
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346 static void filemap_get_read_batch(struct address_space *mapping,
2347 pgoff_t index, pgoff_t max, struct folio_batch *fbatch)
2348 {
2349 XA_STATE(xas, &mapping->i_pages, index);
2350 struct folio *folio;
2351
2352 rcu_read_lock();
2353 for (folio = xas_load(&xas); folio; folio = xas_next(&xas)) {
2354 if (xas_retry(&xas, folio))
2355 continue;
2356 if (xas.xa_index > max || xa_is_value(folio))
2357 break;
2358 if (xa_is_sibling(folio))
2359 break;
2360 if (!folio_try_get_rcu(folio))
2361 goto retry;
2362
2363 if (unlikely(folio != xas_reload(&xas)))
2364 goto put_folio;
2365
2366 if (!folio_batch_add(fbatch, folio))
2367 break;
2368 if (!folio_test_uptodate(folio))
2369 break;
2370 if (folio_test_readahead(folio))
2371 break;
2372 xas_advance(&xas, folio->index + folio_nr_pages(folio) - 1);
2373 continue;
2374 put_folio:
2375 folio_put(folio);
2376 retry:
2377 xas_reset(&xas);
2378 }
2379 rcu_read_unlock();
2380 }
2381
2382 static int filemap_read_folio(struct file *file, filler_t filler,
2383 struct folio *folio)
2384 {
2385 int error;
2386
2387
2388
2389
2390
2391
2392 folio_clear_error(folio);
2393
2394 error = filler(file, folio);
2395 if (error)
2396 return error;
2397
2398 error = folio_wait_locked_killable(folio);
2399 if (error)
2400 return error;
2401 if (folio_test_uptodate(folio))
2402 return 0;
2403 if (file)
2404 shrink_readahead_size_eio(&file->f_ra);
2405 return -EIO;
2406 }
2407
2408 static bool filemap_range_uptodate(struct address_space *mapping,
2409 loff_t pos, struct iov_iter *iter, struct folio *folio)
2410 {
2411 int count;
2412
2413 if (folio_test_uptodate(folio))
2414 return true;
2415
2416 if (iov_iter_is_pipe(iter))
2417 return false;
2418 if (!mapping->a_ops->is_partially_uptodate)
2419 return false;
2420 if (mapping->host->i_blkbits >= folio_shift(folio))
2421 return false;
2422
2423 count = iter->count;
2424 if (folio_pos(folio) > pos) {
2425 count -= folio_pos(folio) - pos;
2426 pos = 0;
2427 } else {
2428 pos -= folio_pos(folio);
2429 }
2430
2431 return mapping->a_ops->is_partially_uptodate(folio, pos, count);
2432 }
2433
2434 static int filemap_update_page(struct kiocb *iocb,
2435 struct address_space *mapping, struct iov_iter *iter,
2436 struct folio *folio)
2437 {
2438 int error;
2439
2440 if (iocb->ki_flags & IOCB_NOWAIT) {
2441 if (!filemap_invalidate_trylock_shared(mapping))
2442 return -EAGAIN;
2443 } else {
2444 filemap_invalidate_lock_shared(mapping);
2445 }
2446
2447 if (!folio_trylock(folio)) {
2448 error = -EAGAIN;
2449 if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_NOIO))
2450 goto unlock_mapping;
2451 if (!(iocb->ki_flags & IOCB_WAITQ)) {
2452 filemap_invalidate_unlock_shared(mapping);
2453
2454
2455
2456
2457 folio_put_wait_locked(folio, TASK_KILLABLE);
2458 return AOP_TRUNCATED_PAGE;
2459 }
2460 error = __folio_lock_async(folio, iocb->ki_waitq);
2461 if (error)
2462 goto unlock_mapping;
2463 }
2464
2465 error = AOP_TRUNCATED_PAGE;
2466 if (!folio->mapping)
2467 goto unlock;
2468
2469 error = 0;
2470 if (filemap_range_uptodate(mapping, iocb->ki_pos, iter, folio))
2471 goto unlock;
2472
2473 error = -EAGAIN;
2474 if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT | IOCB_WAITQ))
2475 goto unlock;
2476
2477 error = filemap_read_folio(iocb->ki_filp, mapping->a_ops->read_folio,
2478 folio);
2479 goto unlock_mapping;
2480 unlock:
2481 folio_unlock(folio);
2482 unlock_mapping:
2483 filemap_invalidate_unlock_shared(mapping);
2484 if (error == AOP_TRUNCATED_PAGE)
2485 folio_put(folio);
2486 return error;
2487 }
2488
2489 static int filemap_create_folio(struct file *file,
2490 struct address_space *mapping, pgoff_t index,
2491 struct folio_batch *fbatch)
2492 {
2493 struct folio *folio;
2494 int error;
2495
2496 folio = filemap_alloc_folio(mapping_gfp_mask(mapping), 0);
2497 if (!folio)
2498 return -ENOMEM;
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513 filemap_invalidate_lock_shared(mapping);
2514 error = filemap_add_folio(mapping, folio, index,
2515 mapping_gfp_constraint(mapping, GFP_KERNEL));
2516 if (error == -EEXIST)
2517 error = AOP_TRUNCATED_PAGE;
2518 if (error)
2519 goto error;
2520
2521 error = filemap_read_folio(file, mapping->a_ops->read_folio, folio);
2522 if (error)
2523 goto error;
2524
2525 filemap_invalidate_unlock_shared(mapping);
2526 folio_batch_add(fbatch, folio);
2527 return 0;
2528 error:
2529 filemap_invalidate_unlock_shared(mapping);
2530 folio_put(folio);
2531 return error;
2532 }
2533
2534 static int filemap_readahead(struct kiocb *iocb, struct file *file,
2535 struct address_space *mapping, struct folio *folio,
2536 pgoff_t last_index)
2537 {
2538 DEFINE_READAHEAD(ractl, file, &file->f_ra, mapping, folio->index);
2539
2540 if (iocb->ki_flags & IOCB_NOIO)
2541 return -EAGAIN;
2542 page_cache_async_ra(&ractl, folio, last_index - folio->index);
2543 return 0;
2544 }
2545
2546 static int filemap_get_pages(struct kiocb *iocb, struct iov_iter *iter,
2547 struct folio_batch *fbatch)
2548 {
2549 struct file *filp = iocb->ki_filp;
2550 struct address_space *mapping = filp->f_mapping;
2551 struct file_ra_state *ra = &filp->f_ra;
2552 pgoff_t index = iocb->ki_pos >> PAGE_SHIFT;
2553 pgoff_t last_index;
2554 struct folio *folio;
2555 int err = 0;
2556
2557 last_index = DIV_ROUND_UP(iocb->ki_pos + iter->count, PAGE_SIZE);
2558 retry:
2559 if (fatal_signal_pending(current))
2560 return -EINTR;
2561
2562 filemap_get_read_batch(mapping, index, last_index, fbatch);
2563 if (!folio_batch_count(fbatch)) {
2564 if (iocb->ki_flags & IOCB_NOIO)
2565 return -EAGAIN;
2566 page_cache_sync_readahead(mapping, ra, filp, index,
2567 last_index - index);
2568 filemap_get_read_batch(mapping, index, last_index, fbatch);
2569 }
2570 if (!folio_batch_count(fbatch)) {
2571 if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_WAITQ))
2572 return -EAGAIN;
2573 err = filemap_create_folio(filp, mapping,
2574 iocb->ki_pos >> PAGE_SHIFT, fbatch);
2575 if (err == AOP_TRUNCATED_PAGE)
2576 goto retry;
2577 return err;
2578 }
2579
2580 folio = fbatch->folios[folio_batch_count(fbatch) - 1];
2581 if (folio_test_readahead(folio)) {
2582 err = filemap_readahead(iocb, filp, mapping, folio, last_index);
2583 if (err)
2584 goto err;
2585 }
2586 if (!folio_test_uptodate(folio)) {
2587 if ((iocb->ki_flags & IOCB_WAITQ) &&
2588 folio_batch_count(fbatch) > 1)
2589 iocb->ki_flags |= IOCB_NOWAIT;
2590 err = filemap_update_page(iocb, mapping, iter, folio);
2591 if (err)
2592 goto err;
2593 }
2594
2595 return 0;
2596 err:
2597 if (err < 0)
2598 folio_put(folio);
2599 if (likely(--fbatch->nr))
2600 return 0;
2601 if (err == AOP_TRUNCATED_PAGE)
2602 goto retry;
2603 return err;
2604 }
2605
2606 static inline bool pos_same_folio(loff_t pos1, loff_t pos2, struct folio *folio)
2607 {
2608 unsigned int shift = folio_shift(folio);
2609
2610 return (pos1 >> shift == pos2 >> shift);
2611 }
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626 ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter,
2627 ssize_t already_read)
2628 {
2629 struct file *filp = iocb->ki_filp;
2630 struct file_ra_state *ra = &filp->f_ra;
2631 struct address_space *mapping = filp->f_mapping;
2632 struct inode *inode = mapping->host;
2633 struct folio_batch fbatch;
2634 int i, error = 0;
2635 bool writably_mapped;
2636 loff_t isize, end_offset;
2637
2638 if (unlikely(iocb->ki_pos >= inode->i_sb->s_maxbytes))
2639 return 0;
2640 if (unlikely(!iov_iter_count(iter)))
2641 return 0;
2642
2643 iov_iter_truncate(iter, inode->i_sb->s_maxbytes);
2644 folio_batch_init(&fbatch);
2645
2646 do {
2647 cond_resched();
2648
2649
2650
2651
2652
2653
2654 if ((iocb->ki_flags & IOCB_WAITQ) && already_read)
2655 iocb->ki_flags |= IOCB_NOWAIT;
2656
2657 if (unlikely(iocb->ki_pos >= i_size_read(inode)))
2658 break;
2659
2660 error = filemap_get_pages(iocb, iter, &fbatch);
2661 if (error < 0)
2662 break;
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672 isize = i_size_read(inode);
2673 if (unlikely(iocb->ki_pos >= isize))
2674 goto put_folios;
2675 end_offset = min_t(loff_t, isize, iocb->ki_pos + iter->count);
2676
2677
2678
2679
2680
2681 writably_mapped = mapping_writably_mapped(mapping);
2682
2683
2684
2685
2686
2687 if (!pos_same_folio(iocb->ki_pos, ra->prev_pos - 1,
2688 fbatch.folios[0]))
2689 folio_mark_accessed(fbatch.folios[0]);
2690
2691 for (i = 0; i < folio_batch_count(&fbatch); i++) {
2692 struct folio *folio = fbatch.folios[i];
2693 size_t fsize = folio_size(folio);
2694 size_t offset = iocb->ki_pos & (fsize - 1);
2695 size_t bytes = min_t(loff_t, end_offset - iocb->ki_pos,
2696 fsize - offset);
2697 size_t copied;
2698
2699 if (end_offset < folio_pos(folio))
2700 break;
2701 if (i > 0)
2702 folio_mark_accessed(folio);
2703
2704
2705
2706
2707
2708 if (writably_mapped)
2709 flush_dcache_folio(folio);
2710
2711 copied = copy_folio_to_iter(folio, offset, bytes, iter);
2712
2713 already_read += copied;
2714 iocb->ki_pos += copied;
2715 ra->prev_pos = iocb->ki_pos;
2716
2717 if (copied < bytes) {
2718 error = -EFAULT;
2719 break;
2720 }
2721 }
2722 put_folios:
2723 for (i = 0; i < folio_batch_count(&fbatch); i++)
2724 folio_put(fbatch.folios[i]);
2725 folio_batch_init(&fbatch);
2726 } while (iov_iter_count(iter) && iocb->ki_pos < isize && !error);
2727
2728 file_accessed(filp);
2729
2730 return already_read ? already_read : error;
2731 }
2732 EXPORT_SYMBOL_GPL(filemap_read);
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755 ssize_t
2756 generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
2757 {
2758 size_t count = iov_iter_count(iter);
2759 ssize_t retval = 0;
2760
2761 if (!count)
2762 return 0;
2763
2764 if (iocb->ki_flags & IOCB_DIRECT) {
2765 struct file *file = iocb->ki_filp;
2766 struct address_space *mapping = file->f_mapping;
2767 struct inode *inode = mapping->host;
2768
2769 if (iocb->ki_flags & IOCB_NOWAIT) {
2770 if (filemap_range_needs_writeback(mapping, iocb->ki_pos,
2771 iocb->ki_pos + count - 1))
2772 return -EAGAIN;
2773 } else {
2774 retval = filemap_write_and_wait_range(mapping,
2775 iocb->ki_pos,
2776 iocb->ki_pos + count - 1);
2777 if (retval < 0)
2778 return retval;
2779 }
2780
2781 file_accessed(file);
2782
2783 retval = mapping->a_ops->direct_IO(iocb, iter);
2784 if (retval >= 0) {
2785 iocb->ki_pos += retval;
2786 count -= retval;
2787 }
2788 if (retval != -EIOCBQUEUED)
2789 iov_iter_revert(iter, count - iov_iter_count(iter));
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800 if (retval < 0 || !count || IS_DAX(inode))
2801 return retval;
2802 if (iocb->ki_pos >= i_size_read(inode))
2803 return retval;
2804 }
2805
2806 return filemap_read(iocb, iter, retval);
2807 }
2808 EXPORT_SYMBOL(generic_file_read_iter);
2809
2810 static inline loff_t folio_seek_hole_data(struct xa_state *xas,
2811 struct address_space *mapping, struct folio *folio,
2812 loff_t start, loff_t end, bool seek_data)
2813 {
2814 const struct address_space_operations *ops = mapping->a_ops;
2815 size_t offset, bsz = i_blocksize(mapping->host);
2816
2817 if (xa_is_value(folio) || folio_test_uptodate(folio))
2818 return seek_data ? start : end;
2819 if (!ops->is_partially_uptodate)
2820 return seek_data ? end : start;
2821
2822 xas_pause(xas);
2823 rcu_read_unlock();
2824 folio_lock(folio);
2825 if (unlikely(folio->mapping != mapping))
2826 goto unlock;
2827
2828 offset = offset_in_folio(folio, start) & ~(bsz - 1);
2829
2830 do {
2831 if (ops->is_partially_uptodate(folio, offset, bsz) ==
2832 seek_data)
2833 break;
2834 start = (start + bsz) & ~(bsz - 1);
2835 offset += bsz;
2836 } while (offset < folio_size(folio));
2837 unlock:
2838 folio_unlock(folio);
2839 rcu_read_lock();
2840 return start;
2841 }
2842
2843 static inline size_t seek_folio_size(struct xa_state *xas, struct folio *folio)
2844 {
2845 if (xa_is_value(folio))
2846 return PAGE_SIZE << xa_get_order(xas->xa, xas->xa_index);
2847 return folio_size(folio);
2848 }
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868 loff_t mapping_seek_hole_data(struct address_space *mapping, loff_t start,
2869 loff_t end, int whence)
2870 {
2871 XA_STATE(xas, &mapping->i_pages, start >> PAGE_SHIFT);
2872 pgoff_t max = (end - 1) >> PAGE_SHIFT;
2873 bool seek_data = (whence == SEEK_DATA);
2874 struct folio *folio;
2875
2876 if (end <= start)
2877 return -ENXIO;
2878
2879 rcu_read_lock();
2880 while ((folio = find_get_entry(&xas, max, XA_PRESENT))) {
2881 loff_t pos = (u64)xas.xa_index << PAGE_SHIFT;
2882 size_t seek_size;
2883
2884 if (start < pos) {
2885 if (!seek_data)
2886 goto unlock;
2887 start = pos;
2888 }
2889
2890 seek_size = seek_folio_size(&xas, folio);
2891 pos = round_up((u64)pos + 1, seek_size);
2892 start = folio_seek_hole_data(&xas, mapping, folio, start, pos,
2893 seek_data);
2894 if (start < pos)
2895 goto unlock;
2896 if (start >= end)
2897 break;
2898 if (seek_size > PAGE_SIZE)
2899 xas_set(&xas, pos >> PAGE_SHIFT);
2900 if (!xa_is_value(folio))
2901 folio_put(folio);
2902 }
2903 if (seek_data)
2904 start = -ENXIO;
2905 unlock:
2906 rcu_read_unlock();
2907 if (folio && !xa_is_value(folio))
2908 folio_put(folio);
2909 if (start > end)
2910 return end;
2911 return start;
2912 }
2913
2914 #ifdef CONFIG_MMU
2915 #define MMAP_LOTSAMISS (100)
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928 static int lock_folio_maybe_drop_mmap(struct vm_fault *vmf, struct folio *folio,
2929 struct file **fpin)
2930 {
2931 if (folio_trylock(folio))
2932 return 1;
2933
2934
2935
2936
2937
2938
2939 if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT)
2940 return 0;
2941
2942 *fpin = maybe_unlock_mmap_for_io(vmf, *fpin);
2943 if (vmf->flags & FAULT_FLAG_KILLABLE) {
2944 if (__folio_lock_killable(folio)) {
2945
2946
2947
2948
2949
2950
2951 if (*fpin == NULL)
2952 mmap_read_unlock(vmf->vma->vm_mm);
2953 return 0;
2954 }
2955 } else
2956 __folio_lock(folio);
2957
2958 return 1;
2959 }
2960
2961
2962
2963
2964
2965
2966
2967
2968 static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
2969 {
2970 struct file *file = vmf->vma->vm_file;
2971 struct file_ra_state *ra = &file->f_ra;
2972 struct address_space *mapping = file->f_mapping;
2973 DEFINE_READAHEAD(ractl, file, ra, mapping, vmf->pgoff);
2974 struct file *fpin = NULL;
2975 unsigned long vm_flags = vmf->vma->vm_flags;
2976 unsigned int mmap_miss;
2977
2978 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
2979
2980 if (vm_flags & VM_HUGEPAGE) {
2981 fpin = maybe_unlock_mmap_for_io(vmf, fpin);
2982 ractl._index &= ~((unsigned long)HPAGE_PMD_NR - 1);
2983 ra->size = HPAGE_PMD_NR;
2984
2985
2986
2987
2988 if (!(vm_flags & VM_RAND_READ))
2989 ra->size *= 2;
2990 ra->async_size = HPAGE_PMD_NR;
2991 page_cache_ra_order(&ractl, ra, HPAGE_PMD_ORDER);
2992 return fpin;
2993 }
2994 #endif
2995
2996
2997 if (vm_flags & VM_RAND_READ)
2998 return fpin;
2999 if (!ra->ra_pages)
3000 return fpin;
3001
3002 if (vm_flags & VM_SEQ_READ) {
3003 fpin = maybe_unlock_mmap_for_io(vmf, fpin);
3004 page_cache_sync_ra(&ractl, ra->ra_pages);
3005 return fpin;
3006 }
3007
3008
3009 mmap_miss = READ_ONCE(ra->mmap_miss);
3010 if (mmap_miss < MMAP_LOTSAMISS * 10)
3011 WRITE_ONCE(ra->mmap_miss, ++mmap_miss);
3012
3013
3014
3015
3016
3017 if (mmap_miss > MMAP_LOTSAMISS)
3018 return fpin;
3019
3020
3021
3022
3023 fpin = maybe_unlock_mmap_for_io(vmf, fpin);
3024 ra->start = max_t(long, 0, vmf->pgoff - ra->ra_pages / 2);
3025 ra->size = ra->ra_pages;
3026 ra->async_size = ra->ra_pages / 4;
3027 ractl._index = ra->start;
3028 page_cache_ra_order(&ractl, ra, 0);
3029 return fpin;
3030 }
3031
3032
3033
3034
3035
3036
3037 static struct file *do_async_mmap_readahead(struct vm_fault *vmf,
3038 struct folio *folio)
3039 {
3040 struct file *file = vmf->vma->vm_file;
3041 struct file_ra_state *ra = &file->f_ra;
3042 DEFINE_READAHEAD(ractl, file, ra, file->f_mapping, vmf->pgoff);
3043 struct file *fpin = NULL;
3044 unsigned int mmap_miss;
3045
3046
3047 if (vmf->vma->vm_flags & VM_RAND_READ || !ra->ra_pages)
3048 return fpin;
3049
3050 mmap_miss = READ_ONCE(ra->mmap_miss);
3051 if (mmap_miss)
3052 WRITE_ONCE(ra->mmap_miss, --mmap_miss);
3053
3054 if (folio_test_readahead(folio)) {
3055 fpin = maybe_unlock_mmap_for_io(vmf, fpin);
3056 page_cache_async_ra(&ractl, folio, ra->ra_pages);
3057 }
3058 return fpin;
3059 }
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084 vm_fault_t filemap_fault(struct vm_fault *vmf)
3085 {
3086 int error;
3087 struct file *file = vmf->vma->vm_file;
3088 struct file *fpin = NULL;
3089 struct address_space *mapping = file->f_mapping;
3090 struct inode *inode = mapping->host;
3091 pgoff_t max_idx, index = vmf->pgoff;
3092 struct folio *folio;
3093 vm_fault_t ret = 0;
3094 bool mapping_locked = false;
3095
3096 max_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
3097 if (unlikely(index >= max_idx))
3098 return VM_FAULT_SIGBUS;
3099
3100
3101
3102
3103 folio = filemap_get_folio(mapping, index);
3104 if (likely(folio)) {
3105
3106
3107
3108
3109 if (!(vmf->flags & FAULT_FLAG_TRIED))
3110 fpin = do_async_mmap_readahead(vmf, folio);
3111 if (unlikely(!folio_test_uptodate(folio))) {
3112 filemap_invalidate_lock_shared(mapping);
3113 mapping_locked = true;
3114 }
3115 } else {
3116
3117 count_vm_event(PGMAJFAULT);
3118 count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);
3119 ret = VM_FAULT_MAJOR;
3120 fpin = do_sync_mmap_readahead(vmf);
3121 retry_find:
3122
3123
3124
3125
3126 if (!mapping_locked) {
3127 filemap_invalidate_lock_shared(mapping);
3128 mapping_locked = true;
3129 }
3130 folio = __filemap_get_folio(mapping, index,
3131 FGP_CREAT|FGP_FOR_MMAP,
3132 vmf->gfp_mask);
3133 if (!folio) {
3134 if (fpin)
3135 goto out_retry;
3136 filemap_invalidate_unlock_shared(mapping);
3137 return VM_FAULT_OOM;
3138 }
3139 }
3140
3141 if (!lock_folio_maybe_drop_mmap(vmf, folio, &fpin))
3142 goto out_retry;
3143
3144
3145 if (unlikely(folio->mapping != mapping)) {
3146 folio_unlock(folio);
3147 folio_put(folio);
3148 goto retry_find;
3149 }
3150 VM_BUG_ON_FOLIO(!folio_contains(folio, index), folio);
3151
3152
3153
3154
3155
3156 if (unlikely(!folio_test_uptodate(folio))) {
3157
3158
3159
3160
3161
3162
3163 if (!mapping_locked) {
3164 folio_unlock(folio);
3165 folio_put(folio);
3166 goto retry_find;
3167 }
3168 goto page_not_uptodate;
3169 }
3170
3171
3172
3173
3174
3175
3176 if (fpin) {
3177 folio_unlock(folio);
3178 goto out_retry;
3179 }
3180 if (mapping_locked)
3181 filemap_invalidate_unlock_shared(mapping);
3182
3183
3184
3185
3186
3187 max_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
3188 if (unlikely(index >= max_idx)) {
3189 folio_unlock(folio);
3190 folio_put(folio);
3191 return VM_FAULT_SIGBUS;
3192 }
3193
3194 vmf->page = folio_file_page(folio, index);
3195 return ret | VM_FAULT_LOCKED;
3196
3197 page_not_uptodate:
3198
3199
3200
3201
3202
3203
3204 fpin = maybe_unlock_mmap_for_io(vmf, fpin);
3205 error = filemap_read_folio(file, mapping->a_ops->read_folio, folio);
3206 if (fpin)
3207 goto out_retry;
3208 folio_put(folio);
3209
3210 if (!error || error == AOP_TRUNCATED_PAGE)
3211 goto retry_find;
3212 filemap_invalidate_unlock_shared(mapping);
3213
3214 return VM_FAULT_SIGBUS;
3215
3216 out_retry:
3217
3218
3219
3220
3221
3222 if (folio)
3223 folio_put(folio);
3224 if (mapping_locked)
3225 filemap_invalidate_unlock_shared(mapping);
3226 if (fpin)
3227 fput(fpin);
3228 return ret | VM_FAULT_RETRY;
3229 }
3230 EXPORT_SYMBOL(filemap_fault);
3231
3232 static bool filemap_map_pmd(struct vm_fault *vmf, struct page *page)
3233 {
3234 struct mm_struct *mm = vmf->vma->vm_mm;
3235
3236
3237 if (pmd_trans_huge(*vmf->pmd)) {
3238 unlock_page(page);
3239 put_page(page);
3240 return true;
3241 }
3242
3243 if (pmd_none(*vmf->pmd) && PageTransHuge(page)) {
3244 vm_fault_t ret = do_set_pmd(vmf, page);
3245 if (!ret) {
3246
3247 unlock_page(page);
3248 return true;
3249 }
3250 }
3251
3252 if (pmd_none(*vmf->pmd))
3253 pmd_install(mm, vmf->pmd, &vmf->prealloc_pte);
3254
3255
3256 if (pmd_devmap_trans_unstable(vmf->pmd)) {
3257 unlock_page(page);
3258 put_page(page);
3259 return true;
3260 }
3261
3262 return false;
3263 }
3264
3265 static struct folio *next_uptodate_page(struct folio *folio,
3266 struct address_space *mapping,
3267 struct xa_state *xas, pgoff_t end_pgoff)
3268 {
3269 unsigned long max_idx;
3270
3271 do {
3272 if (!folio)
3273 return NULL;
3274 if (xas_retry(xas, folio))
3275 continue;
3276 if (xa_is_value(folio))
3277 continue;
3278 if (folio_test_locked(folio))
3279 continue;
3280 if (!folio_try_get_rcu(folio))
3281 continue;
3282
3283 if (unlikely(folio != xas_reload(xas)))
3284 goto skip;
3285 if (!folio_test_uptodate(folio) || folio_test_readahead(folio))
3286 goto skip;
3287 if (!folio_trylock(folio))
3288 goto skip;
3289 if (folio->mapping != mapping)
3290 goto unlock;
3291 if (!folio_test_uptodate(folio))
3292 goto unlock;
3293 max_idx = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
3294 if (xas->xa_index >= max_idx)
3295 goto unlock;
3296 return folio;
3297 unlock:
3298 folio_unlock(folio);
3299 skip:
3300 folio_put(folio);
3301 } while ((folio = xas_next_entry(xas, end_pgoff)) != NULL);
3302
3303 return NULL;
3304 }
3305
3306 static inline struct folio *first_map_page(struct address_space *mapping,
3307 struct xa_state *xas,
3308 pgoff_t end_pgoff)
3309 {
3310 return next_uptodate_page(xas_find(xas, end_pgoff),
3311 mapping, xas, end_pgoff);
3312 }
3313
3314 static inline struct folio *next_map_page(struct address_space *mapping,
3315 struct xa_state *xas,
3316 pgoff_t end_pgoff)
3317 {
3318 return next_uptodate_page(xas_next_entry(xas, end_pgoff),
3319 mapping, xas, end_pgoff);
3320 }
3321
3322 vm_fault_t filemap_map_pages(struct vm_fault *vmf,
3323 pgoff_t start_pgoff, pgoff_t end_pgoff)
3324 {
3325 struct vm_area_struct *vma = vmf->vma;
3326 struct file *file = vma->vm_file;
3327 struct address_space *mapping = file->f_mapping;
3328 pgoff_t last_pgoff = start_pgoff;
3329 unsigned long addr;
3330 XA_STATE(xas, &mapping->i_pages, start_pgoff);
3331 struct folio *folio;
3332 struct page *page;
3333 unsigned int mmap_miss = READ_ONCE(file->f_ra.mmap_miss);
3334 vm_fault_t ret = 0;
3335
3336 rcu_read_lock();
3337 folio = first_map_page(mapping, &xas, end_pgoff);
3338 if (!folio)
3339 goto out;
3340
3341 if (filemap_map_pmd(vmf, &folio->page)) {
3342 ret = VM_FAULT_NOPAGE;
3343 goto out;
3344 }
3345
3346 addr = vma->vm_start + ((start_pgoff - vma->vm_pgoff) << PAGE_SHIFT);
3347 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, addr, &vmf->ptl);
3348 do {
3349 again:
3350 page = folio_file_page(folio, xas.xa_index);
3351 if (PageHWPoison(page))
3352 goto unlock;
3353
3354 if (mmap_miss > 0)
3355 mmap_miss--;
3356
3357 addr += (xas.xa_index - last_pgoff) << PAGE_SHIFT;
3358 vmf->pte += xas.xa_index - last_pgoff;
3359 last_pgoff = xas.xa_index;
3360
3361
3362
3363
3364
3365
3366 if (!pte_none(*vmf->pte))
3367 goto unlock;
3368
3369
3370 if (vmf->address == addr)
3371 ret = VM_FAULT_NOPAGE;
3372
3373 do_set_pte(vmf, page, addr);
3374
3375 update_mmu_cache(vma, addr, vmf->pte);
3376 if (folio_more_pages(folio, xas.xa_index, end_pgoff)) {
3377 xas.xa_index++;
3378 folio_ref_inc(folio);
3379 goto again;
3380 }
3381 folio_unlock(folio);
3382 continue;
3383 unlock:
3384 if (folio_more_pages(folio, xas.xa_index, end_pgoff)) {
3385 xas.xa_index++;
3386 goto again;
3387 }
3388 folio_unlock(folio);
3389 folio_put(folio);
3390 } while ((folio = next_map_page(mapping, &xas, end_pgoff)) != NULL);
3391 pte_unmap_unlock(vmf->pte, vmf->ptl);
3392 out:
3393 rcu_read_unlock();
3394 WRITE_ONCE(file->f_ra.mmap_miss, mmap_miss);
3395 return ret;
3396 }
3397 EXPORT_SYMBOL(filemap_map_pages);
3398
3399 vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf)
3400 {
3401 struct address_space *mapping = vmf->vma->vm_file->f_mapping;
3402 struct folio *folio = page_folio(vmf->page);
3403 vm_fault_t ret = VM_FAULT_LOCKED;
3404
3405 sb_start_pagefault(mapping->host->i_sb);
3406 file_update_time(vmf->vma->vm_file);
3407 folio_lock(folio);
3408 if (folio->mapping != mapping) {
3409 folio_unlock(folio);
3410 ret = VM_FAULT_NOPAGE;
3411 goto out;
3412 }
3413
3414
3415
3416
3417
3418 folio_mark_dirty(folio);
3419 folio_wait_stable(folio);
3420 out:
3421 sb_end_pagefault(mapping->host->i_sb);
3422 return ret;
3423 }
3424
3425 const struct vm_operations_struct generic_file_vm_ops = {
3426 .fault = filemap_fault,
3427 .map_pages = filemap_map_pages,
3428 .page_mkwrite = filemap_page_mkwrite,
3429 };
3430
3431
3432
3433 int generic_file_mmap(struct file *file, struct vm_area_struct *vma)
3434 {
3435 struct address_space *mapping = file->f_mapping;
3436
3437 if (!mapping->a_ops->read_folio)
3438 return -ENOEXEC;
3439 file_accessed(file);
3440 vma->vm_ops = &generic_file_vm_ops;
3441 return 0;
3442 }
3443
3444
3445
3446
3447 int generic_file_readonly_mmap(struct file *file, struct vm_area_struct *vma)
3448 {
3449 if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
3450 return -EINVAL;
3451 return generic_file_mmap(file, vma);
3452 }
3453 #else
3454 vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf)
3455 {
3456 return VM_FAULT_SIGBUS;
3457 }
3458 int generic_file_mmap(struct file *file, struct vm_area_struct *vma)
3459 {
3460 return -ENOSYS;
3461 }
3462 int generic_file_readonly_mmap(struct file *file, struct vm_area_struct *vma)
3463 {
3464 return -ENOSYS;
3465 }
3466 #endif
3467
3468 EXPORT_SYMBOL(filemap_page_mkwrite);
3469 EXPORT_SYMBOL(generic_file_mmap);
3470 EXPORT_SYMBOL(generic_file_readonly_mmap);
3471
3472 static struct folio *do_read_cache_folio(struct address_space *mapping,
3473 pgoff_t index, filler_t filler, struct file *file, gfp_t gfp)
3474 {
3475 struct folio *folio;
3476 int err;
3477
3478 if (!filler)
3479 filler = mapping->a_ops->read_folio;
3480 repeat:
3481 folio = filemap_get_folio(mapping, index);
3482 if (!folio) {
3483 folio = filemap_alloc_folio(gfp, 0);
3484 if (!folio)
3485 return ERR_PTR(-ENOMEM);
3486 err = filemap_add_folio(mapping, folio, index, gfp);
3487 if (unlikely(err)) {
3488 folio_put(folio);
3489 if (err == -EEXIST)
3490 goto repeat;
3491
3492 return ERR_PTR(err);
3493 }
3494
3495 goto filler;
3496 }
3497 if (folio_test_uptodate(folio))
3498 goto out;
3499
3500 if (!folio_trylock(folio)) {
3501 folio_put_wait_locked(folio, TASK_UNINTERRUPTIBLE);
3502 goto repeat;
3503 }
3504
3505
3506 if (!folio->mapping) {
3507 folio_unlock(folio);
3508 folio_put(folio);
3509 goto repeat;
3510 }
3511
3512
3513 if (folio_test_uptodate(folio)) {
3514 folio_unlock(folio);
3515 goto out;
3516 }
3517
3518 filler:
3519 err = filemap_read_folio(file, filler, folio);
3520 if (err) {
3521 folio_put(folio);
3522 if (err == AOP_TRUNCATED_PAGE)
3523 goto repeat;
3524 return ERR_PTR(err);
3525 }
3526
3527 out:
3528 folio_mark_accessed(folio);
3529 return folio;
3530 }
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548 struct folio *read_cache_folio(struct address_space *mapping, pgoff_t index,
3549 filler_t filler, struct file *file)
3550 {
3551 return do_read_cache_folio(mapping, index, filler, file,
3552 mapping_gfp_mask(mapping));
3553 }
3554 EXPORT_SYMBOL(read_cache_folio);
3555
3556 static struct page *do_read_cache_page(struct address_space *mapping,
3557 pgoff_t index, filler_t *filler, struct file *file, gfp_t gfp)
3558 {
3559 struct folio *folio;
3560
3561 folio = do_read_cache_folio(mapping, index, filler, file, gfp);
3562 if (IS_ERR(folio))
3563 return &folio->page;
3564 return folio_file_page(folio, index);
3565 }
3566
3567 struct page *read_cache_page(struct address_space *mapping,
3568 pgoff_t index, filler_t *filler, struct file *file)
3569 {
3570 return do_read_cache_page(mapping, index, filler, file,
3571 mapping_gfp_mask(mapping));
3572 }
3573 EXPORT_SYMBOL(read_cache_page);
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590 struct page *read_cache_page_gfp(struct address_space *mapping,
3591 pgoff_t index,
3592 gfp_t gfp)
3593 {
3594 return do_read_cache_page(mapping, index, NULL, NULL, gfp);
3595 }
3596 EXPORT_SYMBOL(read_cache_page_gfp);
3597
3598
3599
3600
3601 void dio_warn_stale_pagecache(struct file *filp)
3602 {
3603 static DEFINE_RATELIMIT_STATE(_rs, 86400 * HZ, DEFAULT_RATELIMIT_BURST);
3604 char pathname[128];
3605 char *path;
3606
3607 errseq_set(&filp->f_mapping->wb_err, -EIO);
3608 if (__ratelimit(&_rs)) {
3609 path = file_path(filp, pathname, sizeof(pathname));
3610 if (IS_ERR(path))
3611 path = "(unknown)";
3612 pr_crit("Page cache invalidation failure on direct I/O. Possible data corruption due to collision with buffered I/O!\n");
3613 pr_crit("File: %s PID: %d Comm: %.20s\n", path, current->pid,
3614 current->comm);
3615 }
3616 }
3617
3618 ssize_t
3619 generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
3620 {
3621 struct file *file = iocb->ki_filp;
3622 struct address_space *mapping = file->f_mapping;
3623 struct inode *inode = mapping->host;
3624 loff_t pos = iocb->ki_pos;
3625 ssize_t written;
3626 size_t write_len;
3627 pgoff_t end;
3628
3629 write_len = iov_iter_count(from);
3630 end = (pos + write_len - 1) >> PAGE_SHIFT;
3631
3632 if (iocb->ki_flags & IOCB_NOWAIT) {
3633
3634 if (filemap_range_has_page(file->f_mapping, pos,
3635 pos + write_len - 1))
3636 return -EAGAIN;
3637 } else {
3638 written = filemap_write_and_wait_range(mapping, pos,
3639 pos + write_len - 1);
3640 if (written)
3641 goto out;
3642 }
3643
3644
3645
3646
3647
3648
3649
3650 written = invalidate_inode_pages2_range(mapping,
3651 pos >> PAGE_SHIFT, end);
3652
3653
3654
3655
3656 if (written) {
3657 if (written == -EBUSY)
3658 return 0;
3659 goto out;
3660 }
3661
3662 written = mapping->a_ops->direct_IO(iocb, from);
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681 if (written > 0 && mapping->nrpages &&
3682 invalidate_inode_pages2_range(mapping, pos >> PAGE_SHIFT, end))
3683 dio_warn_stale_pagecache(file);
3684
3685 if (written > 0) {
3686 pos += written;
3687 write_len -= written;
3688 if (pos > i_size_read(inode) && !S_ISBLK(inode->i_mode)) {
3689 i_size_write(inode, pos);
3690 mark_inode_dirty(inode);
3691 }
3692 iocb->ki_pos = pos;
3693 }
3694 if (written != -EIOCBQUEUED)
3695 iov_iter_revert(from, write_len - iov_iter_count(from));
3696 out:
3697 return written;
3698 }
3699 EXPORT_SYMBOL(generic_file_direct_write);
3700
3701 ssize_t generic_perform_write(struct kiocb *iocb, struct iov_iter *i)
3702 {
3703 struct file *file = iocb->ki_filp;
3704 loff_t pos = iocb->ki_pos;
3705 struct address_space *mapping = file->f_mapping;
3706 const struct address_space_operations *a_ops = mapping->a_ops;
3707 long status = 0;
3708 ssize_t written = 0;
3709
3710 do {
3711 struct page *page;
3712 unsigned long offset;
3713 unsigned long bytes;
3714 size_t copied;
3715 void *fsdata;
3716
3717 offset = (pos & (PAGE_SIZE - 1));
3718 bytes = min_t(unsigned long, PAGE_SIZE - offset,
3719 iov_iter_count(i));
3720
3721 again:
3722
3723
3724
3725
3726
3727
3728 if (unlikely(fault_in_iov_iter_readable(i, bytes) == bytes)) {
3729 status = -EFAULT;
3730 break;
3731 }
3732
3733 if (fatal_signal_pending(current)) {
3734 status = -EINTR;
3735 break;
3736 }
3737
3738 status = a_ops->write_begin(file, mapping, pos, bytes,
3739 &page, &fsdata);
3740 if (unlikely(status < 0))
3741 break;
3742
3743 if (mapping_writably_mapped(mapping))
3744 flush_dcache_page(page);
3745
3746 copied = copy_page_from_iter_atomic(page, offset, bytes, i);
3747 flush_dcache_page(page);
3748
3749 status = a_ops->write_end(file, mapping, pos, bytes, copied,
3750 page, fsdata);
3751 if (unlikely(status != copied)) {
3752 iov_iter_revert(i, copied - max(status, 0L));
3753 if (unlikely(status < 0))
3754 break;
3755 }
3756 cond_resched();
3757
3758 if (unlikely(status == 0)) {
3759
3760
3761
3762
3763
3764
3765 if (copied)
3766 bytes = copied;
3767 goto again;
3768 }
3769 pos += status;
3770 written += status;
3771
3772 balance_dirty_pages_ratelimited(mapping);
3773 } while (iov_iter_count(i));
3774
3775 return written ? written : status;
3776 }
3777 EXPORT_SYMBOL(generic_perform_write);
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800 ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
3801 {
3802 struct file *file = iocb->ki_filp;
3803 struct address_space *mapping = file->f_mapping;
3804 struct inode *inode = mapping->host;
3805 ssize_t written = 0;
3806 ssize_t err;
3807 ssize_t status;
3808
3809
3810 current->backing_dev_info = inode_to_bdi(inode);
3811 err = file_remove_privs(file);
3812 if (err)
3813 goto out;
3814
3815 err = file_update_time(file);
3816 if (err)
3817 goto out;
3818
3819 if (iocb->ki_flags & IOCB_DIRECT) {
3820 loff_t pos, endbyte;
3821
3822 written = generic_file_direct_write(iocb, from);
3823
3824
3825
3826
3827
3828
3829
3830 if (written < 0 || !iov_iter_count(from) || IS_DAX(inode))
3831 goto out;
3832
3833 pos = iocb->ki_pos;
3834 status = generic_perform_write(iocb, from);
3835
3836
3837
3838
3839
3840
3841
3842 if (unlikely(status < 0)) {
3843 err = status;
3844 goto out;
3845 }
3846
3847
3848
3849
3850
3851 endbyte = pos + status - 1;
3852 err = filemap_write_and_wait_range(mapping, pos, endbyte);
3853 if (err == 0) {
3854 iocb->ki_pos = endbyte + 1;
3855 written += status;
3856 invalidate_mapping_pages(mapping,
3857 pos >> PAGE_SHIFT,
3858 endbyte >> PAGE_SHIFT);
3859 } else {
3860
3861
3862
3863
3864 }
3865 } else {
3866 written = generic_perform_write(iocb, from);
3867 if (likely(written > 0))
3868 iocb->ki_pos += written;
3869 }
3870 out:
3871 current->backing_dev_info = NULL;
3872 return written ? written : err;
3873 }
3874 EXPORT_SYMBOL(__generic_file_write_iter);
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889 ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
3890 {
3891 struct file *file = iocb->ki_filp;
3892 struct inode *inode = file->f_mapping->host;
3893 ssize_t ret;
3894
3895 inode_lock(inode);
3896 ret = generic_write_checks(iocb, from);
3897 if (ret > 0)
3898 ret = __generic_file_write_iter(iocb, from);
3899 inode_unlock(inode);
3900
3901 if (ret > 0)
3902 ret = generic_write_sync(iocb, ret);
3903 return ret;
3904 }
3905 EXPORT_SYMBOL(generic_file_write_iter);
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924 bool filemap_release_folio(struct folio *folio, gfp_t gfp)
3925 {
3926 struct address_space * const mapping = folio->mapping;
3927
3928 BUG_ON(!folio_test_locked(folio));
3929 if (folio_test_writeback(folio))
3930 return false;
3931
3932 if (mapping && mapping->a_ops->release_folio)
3933 return mapping->a_ops->release_folio(folio, gfp);
3934 return try_to_free_buffers(folio);
3935 }
3936 EXPORT_SYMBOL(filemap_release_folio);