0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022 #include <linux/kernel.h>
0023 #include <linux/sched/signal.h>
0024 #include <linux/syscalls.h>
0025 #include <linux/fs.h>
0026 #include <linux/iomap.h>
0027 #include <linux/mm.h>
0028 #include <linux/percpu.h>
0029 #include <linux/slab.h>
0030 #include <linux/capability.h>
0031 #include <linux/blkdev.h>
0032 #include <linux/file.h>
0033 #include <linux/quotaops.h>
0034 #include <linux/highmem.h>
0035 #include <linux/export.h>
0036 #include <linux/backing-dev.h>
0037 #include <linux/writeback.h>
0038 #include <linux/hash.h>
0039 #include <linux/suspend.h>
0040 #include <linux/buffer_head.h>
0041 #include <linux/task_io_accounting_ops.h>
0042 #include <linux/bio.h>
0043 #include <linux/cpu.h>
0044 #include <linux/bitops.h>
0045 #include <linux/mpage.h>
0046 #include <linux/bit_spinlock.h>
0047 #include <linux/pagevec.h>
0048 #include <linux/sched/mm.h>
0049 #include <trace/events/block.h>
0050 #include <linux/fscrypt.h>
0051
0052 #include "internal.h"
0053
0054 static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
0055 static int submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
0056 struct writeback_control *wbc);
0057
0058 #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
0059
0060 inline void touch_buffer(struct buffer_head *bh)
0061 {
0062 trace_block_touch_buffer(bh);
0063 mark_page_accessed(bh->b_page);
0064 }
0065 EXPORT_SYMBOL(touch_buffer);
0066
0067 void __lock_buffer(struct buffer_head *bh)
0068 {
0069 wait_on_bit_lock_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
0070 }
0071 EXPORT_SYMBOL(__lock_buffer);
0072
0073 void unlock_buffer(struct buffer_head *bh)
0074 {
0075 clear_bit_unlock(BH_Lock, &bh->b_state);
0076 smp_mb__after_atomic();
0077 wake_up_bit(&bh->b_state, BH_Lock);
0078 }
0079 EXPORT_SYMBOL(unlock_buffer);
0080
0081
0082
0083
0084
0085
0086 void buffer_check_dirty_writeback(struct folio *folio,
0087 bool *dirty, bool *writeback)
0088 {
0089 struct buffer_head *head, *bh;
0090 *dirty = false;
0091 *writeback = false;
0092
0093 BUG_ON(!folio_test_locked(folio));
0094
0095 head = folio_buffers(folio);
0096 if (!head)
0097 return;
0098
0099 if (folio_test_writeback(folio))
0100 *writeback = true;
0101
0102 bh = head;
0103 do {
0104 if (buffer_locked(bh))
0105 *writeback = true;
0106
0107 if (buffer_dirty(bh))
0108 *dirty = true;
0109
0110 bh = bh->b_this_page;
0111 } while (bh != head);
0112 }
0113 EXPORT_SYMBOL(buffer_check_dirty_writeback);
0114
0115
0116
0117
0118
0119
0120 void __wait_on_buffer(struct buffer_head * bh)
0121 {
0122 wait_on_bit_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
0123 }
0124 EXPORT_SYMBOL(__wait_on_buffer);
0125
0126 static void buffer_io_error(struct buffer_head *bh, char *msg)
0127 {
0128 if (!test_bit(BH_Quiet, &bh->b_state))
0129 printk_ratelimited(KERN_ERR
0130 "Buffer I/O error on dev %pg, logical block %llu%s\n",
0131 bh->b_bdev, (unsigned long long)bh->b_blocknr, msg);
0132 }
0133
0134
0135
0136
0137
0138
0139
0140
0141
0142 static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
0143 {
0144 if (uptodate) {
0145 set_buffer_uptodate(bh);
0146 } else {
0147
0148 clear_buffer_uptodate(bh);
0149 }
0150 unlock_buffer(bh);
0151 }
0152
0153
0154
0155
0156
0157 void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
0158 {
0159 __end_buffer_read_notouch(bh, uptodate);
0160 put_bh(bh);
0161 }
0162 EXPORT_SYMBOL(end_buffer_read_sync);
0163
0164 void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
0165 {
0166 if (uptodate) {
0167 set_buffer_uptodate(bh);
0168 } else {
0169 buffer_io_error(bh, ", lost sync page write");
0170 mark_buffer_write_io_error(bh);
0171 clear_buffer_uptodate(bh);
0172 }
0173 unlock_buffer(bh);
0174 put_bh(bh);
0175 }
0176 EXPORT_SYMBOL(end_buffer_write_sync);
0177
0178
0179
0180
0181
0182
0183
0184
0185
0186
0187
0188 static struct buffer_head *
0189 __find_get_block_slow(struct block_device *bdev, sector_t block)
0190 {
0191 struct inode *bd_inode = bdev->bd_inode;
0192 struct address_space *bd_mapping = bd_inode->i_mapping;
0193 struct buffer_head *ret = NULL;
0194 pgoff_t index;
0195 struct buffer_head *bh;
0196 struct buffer_head *head;
0197 struct page *page;
0198 int all_mapped = 1;
0199 static DEFINE_RATELIMIT_STATE(last_warned, HZ, 1);
0200
0201 index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
0202 page = find_get_page_flags(bd_mapping, index, FGP_ACCESSED);
0203 if (!page)
0204 goto out;
0205
0206 spin_lock(&bd_mapping->private_lock);
0207 if (!page_has_buffers(page))
0208 goto out_unlock;
0209 head = page_buffers(page);
0210 bh = head;
0211 do {
0212 if (!buffer_mapped(bh))
0213 all_mapped = 0;
0214 else if (bh->b_blocknr == block) {
0215 ret = bh;
0216 get_bh(bh);
0217 goto out_unlock;
0218 }
0219 bh = bh->b_this_page;
0220 } while (bh != head);
0221
0222
0223
0224
0225
0226
0227 ratelimit_set_flags(&last_warned, RATELIMIT_MSG_ON_RELEASE);
0228 if (all_mapped && __ratelimit(&last_warned)) {
0229 printk("__find_get_block_slow() failed. block=%llu, "
0230 "b_blocknr=%llu, b_state=0x%08lx, b_size=%zu, "
0231 "device %pg blocksize: %d\n",
0232 (unsigned long long)block,
0233 (unsigned long long)bh->b_blocknr,
0234 bh->b_state, bh->b_size, bdev,
0235 1 << bd_inode->i_blkbits);
0236 }
0237 out_unlock:
0238 spin_unlock(&bd_mapping->private_lock);
0239 put_page(page);
0240 out:
0241 return ret;
0242 }
0243
0244 static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
0245 {
0246 unsigned long flags;
0247 struct buffer_head *first;
0248 struct buffer_head *tmp;
0249 struct page *page;
0250 int page_uptodate = 1;
0251
0252 BUG_ON(!buffer_async_read(bh));
0253
0254 page = bh->b_page;
0255 if (uptodate) {
0256 set_buffer_uptodate(bh);
0257 } else {
0258 clear_buffer_uptodate(bh);
0259 buffer_io_error(bh, ", async page read");
0260 SetPageError(page);
0261 }
0262
0263
0264
0265
0266
0267
0268 first = page_buffers(page);
0269 spin_lock_irqsave(&first->b_uptodate_lock, flags);
0270 clear_buffer_async_read(bh);
0271 unlock_buffer(bh);
0272 tmp = bh;
0273 do {
0274 if (!buffer_uptodate(tmp))
0275 page_uptodate = 0;
0276 if (buffer_async_read(tmp)) {
0277 BUG_ON(!buffer_locked(tmp));
0278 goto still_busy;
0279 }
0280 tmp = tmp->b_this_page;
0281 } while (tmp != bh);
0282 spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
0283
0284
0285
0286
0287
0288 if (page_uptodate)
0289 SetPageUptodate(page);
0290 unlock_page(page);
0291 return;
0292
0293 still_busy:
0294 spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
0295 return;
0296 }
0297
0298 struct decrypt_bh_ctx {
0299 struct work_struct work;
0300 struct buffer_head *bh;
0301 };
0302
0303 static void decrypt_bh(struct work_struct *work)
0304 {
0305 struct decrypt_bh_ctx *ctx =
0306 container_of(work, struct decrypt_bh_ctx, work);
0307 struct buffer_head *bh = ctx->bh;
0308 int err;
0309
0310 err = fscrypt_decrypt_pagecache_blocks(bh->b_page, bh->b_size,
0311 bh_offset(bh));
0312 end_buffer_async_read(bh, err == 0);
0313 kfree(ctx);
0314 }
0315
0316
0317
0318
0319
0320 static void end_buffer_async_read_io(struct buffer_head *bh, int uptodate)
0321 {
0322
0323 if (uptodate &&
0324 fscrypt_inode_uses_fs_layer_crypto(bh->b_page->mapping->host)) {
0325 struct decrypt_bh_ctx *ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC);
0326
0327 if (ctx) {
0328 INIT_WORK(&ctx->work, decrypt_bh);
0329 ctx->bh = bh;
0330 fscrypt_enqueue_decrypt_work(&ctx->work);
0331 return;
0332 }
0333 uptodate = 0;
0334 }
0335 end_buffer_async_read(bh, uptodate);
0336 }
0337
0338
0339
0340
0341
0342 void end_buffer_async_write(struct buffer_head *bh, int uptodate)
0343 {
0344 unsigned long flags;
0345 struct buffer_head *first;
0346 struct buffer_head *tmp;
0347 struct page *page;
0348
0349 BUG_ON(!buffer_async_write(bh));
0350
0351 page = bh->b_page;
0352 if (uptodate) {
0353 set_buffer_uptodate(bh);
0354 } else {
0355 buffer_io_error(bh, ", lost async page write");
0356 mark_buffer_write_io_error(bh);
0357 clear_buffer_uptodate(bh);
0358 SetPageError(page);
0359 }
0360
0361 first = page_buffers(page);
0362 spin_lock_irqsave(&first->b_uptodate_lock, flags);
0363
0364 clear_buffer_async_write(bh);
0365 unlock_buffer(bh);
0366 tmp = bh->b_this_page;
0367 while (tmp != bh) {
0368 if (buffer_async_write(tmp)) {
0369 BUG_ON(!buffer_locked(tmp));
0370 goto still_busy;
0371 }
0372 tmp = tmp->b_this_page;
0373 }
0374 spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
0375 end_page_writeback(page);
0376 return;
0377
0378 still_busy:
0379 spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
0380 return;
0381 }
0382 EXPORT_SYMBOL(end_buffer_async_write);
0383
0384
0385
0386
0387
0388
0389
0390
0391
0392
0393
0394
0395
0396
0397
0398
0399
0400
0401
0402
0403
0404
0405 static void mark_buffer_async_read(struct buffer_head *bh)
0406 {
0407 bh->b_end_io = end_buffer_async_read_io;
0408 set_buffer_async_read(bh);
0409 }
0410
0411 static void mark_buffer_async_write_endio(struct buffer_head *bh,
0412 bh_end_io_t *handler)
0413 {
0414 bh->b_end_io = handler;
0415 set_buffer_async_write(bh);
0416 }
0417
0418 void mark_buffer_async_write(struct buffer_head *bh)
0419 {
0420 mark_buffer_async_write_endio(bh, end_buffer_async_write);
0421 }
0422 EXPORT_SYMBOL(mark_buffer_async_write);
0423
0424
0425
0426
0427
0428
0429
0430
0431
0432
0433
0434
0435
0436
0437
0438
0439
0440
0441
0442
0443
0444
0445
0446
0447
0448
0449
0450
0451
0452
0453
0454
0455
0456
0457
0458
0459
0460
0461
0462
0463
0464
0465
0466
0467
0468
0469
0470
0471
0472
0473
0474
0475
0476
0477 static void __remove_assoc_queue(struct buffer_head *bh)
0478 {
0479 list_del_init(&bh->b_assoc_buffers);
0480 WARN_ON(!bh->b_assoc_map);
0481 bh->b_assoc_map = NULL;
0482 }
0483
0484 int inode_has_buffers(struct inode *inode)
0485 {
0486 return !list_empty(&inode->i_data.private_list);
0487 }
0488
0489
0490
0491
0492
0493
0494
0495
0496
0497
0498
0499 static int osync_buffers_list(spinlock_t *lock, struct list_head *list)
0500 {
0501 struct buffer_head *bh;
0502 struct list_head *p;
0503 int err = 0;
0504
0505 spin_lock(lock);
0506 repeat:
0507 list_for_each_prev(p, list) {
0508 bh = BH_ENTRY(p);
0509 if (buffer_locked(bh)) {
0510 get_bh(bh);
0511 spin_unlock(lock);
0512 wait_on_buffer(bh);
0513 if (!buffer_uptodate(bh))
0514 err = -EIO;
0515 brelse(bh);
0516 spin_lock(lock);
0517 goto repeat;
0518 }
0519 }
0520 spin_unlock(lock);
0521 return err;
0522 }
0523
0524 void emergency_thaw_bdev(struct super_block *sb)
0525 {
0526 while (sb->s_bdev && !thaw_bdev(sb->s_bdev))
0527 printk(KERN_WARNING "Emergency Thaw on %pg\n", sb->s_bdev);
0528 }
0529
0530
0531
0532
0533
0534
0535
0536
0537
0538
0539
0540
0541 int sync_mapping_buffers(struct address_space *mapping)
0542 {
0543 struct address_space *buffer_mapping = mapping->private_data;
0544
0545 if (buffer_mapping == NULL || list_empty(&mapping->private_list))
0546 return 0;
0547
0548 return fsync_buffers_list(&buffer_mapping->private_lock,
0549 &mapping->private_list);
0550 }
0551 EXPORT_SYMBOL(sync_mapping_buffers);
0552
0553
0554
0555
0556
0557
0558
0559 void write_boundary_block(struct block_device *bdev,
0560 sector_t bblock, unsigned blocksize)
0561 {
0562 struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize);
0563 if (bh) {
0564 if (buffer_dirty(bh))
0565 ll_rw_block(REQ_OP_WRITE, 1, &bh);
0566 put_bh(bh);
0567 }
0568 }
0569
0570 void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
0571 {
0572 struct address_space *mapping = inode->i_mapping;
0573 struct address_space *buffer_mapping = bh->b_page->mapping;
0574
0575 mark_buffer_dirty(bh);
0576 if (!mapping->private_data) {
0577 mapping->private_data = buffer_mapping;
0578 } else {
0579 BUG_ON(mapping->private_data != buffer_mapping);
0580 }
0581 if (!bh->b_assoc_map) {
0582 spin_lock(&buffer_mapping->private_lock);
0583 list_move_tail(&bh->b_assoc_buffers,
0584 &mapping->private_list);
0585 bh->b_assoc_map = mapping;
0586 spin_unlock(&buffer_mapping->private_lock);
0587 }
0588 }
0589 EXPORT_SYMBOL(mark_buffer_dirty_inode);
0590
0591
0592
0593
0594
0595
0596
0597
0598
0599
0600
0601
0602
0603
0604
0605
0606
0607
0608
0609
0610
0611
0612
0613
0614
0615
0616 bool block_dirty_folio(struct address_space *mapping, struct folio *folio)
0617 {
0618 struct buffer_head *head;
0619 bool newly_dirty;
0620
0621 spin_lock(&mapping->private_lock);
0622 head = folio_buffers(folio);
0623 if (head) {
0624 struct buffer_head *bh = head;
0625
0626 do {
0627 set_buffer_dirty(bh);
0628 bh = bh->b_this_page;
0629 } while (bh != head);
0630 }
0631
0632
0633
0634
0635 folio_memcg_lock(folio);
0636 newly_dirty = !folio_test_set_dirty(folio);
0637 spin_unlock(&mapping->private_lock);
0638
0639 if (newly_dirty)
0640 __folio_mark_dirty(folio, mapping, 1);
0641
0642 folio_memcg_unlock(folio);
0643
0644 if (newly_dirty)
0645 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
0646
0647 return newly_dirty;
0648 }
0649 EXPORT_SYMBOL(block_dirty_folio);
0650
0651
0652
0653
0654
0655
0656
0657
0658
0659
0660
0661
0662
0663
0664
0665
0666
0667
0668
0669
0670 static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
0671 {
0672 struct buffer_head *bh;
0673 struct list_head tmp;
0674 struct address_space *mapping;
0675 int err = 0, err2;
0676 struct blk_plug plug;
0677
0678 INIT_LIST_HEAD(&tmp);
0679 blk_start_plug(&plug);
0680
0681 spin_lock(lock);
0682 while (!list_empty(list)) {
0683 bh = BH_ENTRY(list->next);
0684 mapping = bh->b_assoc_map;
0685 __remove_assoc_queue(bh);
0686
0687
0688 smp_mb();
0689 if (buffer_dirty(bh) || buffer_locked(bh)) {
0690 list_add(&bh->b_assoc_buffers, &tmp);
0691 bh->b_assoc_map = mapping;
0692 if (buffer_dirty(bh)) {
0693 get_bh(bh);
0694 spin_unlock(lock);
0695
0696
0697
0698
0699
0700
0701
0702 write_dirty_buffer(bh, REQ_SYNC);
0703
0704
0705
0706
0707
0708
0709
0710 brelse(bh);
0711 spin_lock(lock);
0712 }
0713 }
0714 }
0715
0716 spin_unlock(lock);
0717 blk_finish_plug(&plug);
0718 spin_lock(lock);
0719
0720 while (!list_empty(&tmp)) {
0721 bh = BH_ENTRY(tmp.prev);
0722 get_bh(bh);
0723 mapping = bh->b_assoc_map;
0724 __remove_assoc_queue(bh);
0725
0726
0727 smp_mb();
0728 if (buffer_dirty(bh)) {
0729 list_add(&bh->b_assoc_buffers,
0730 &mapping->private_list);
0731 bh->b_assoc_map = mapping;
0732 }
0733 spin_unlock(lock);
0734 wait_on_buffer(bh);
0735 if (!buffer_uptodate(bh))
0736 err = -EIO;
0737 brelse(bh);
0738 spin_lock(lock);
0739 }
0740
0741 spin_unlock(lock);
0742 err2 = osync_buffers_list(lock, list);
0743 if (err)
0744 return err;
0745 else
0746 return err2;
0747 }
0748
0749
0750
0751
0752
0753
0754
0755
0756
0757
0758 void invalidate_inode_buffers(struct inode *inode)
0759 {
0760 if (inode_has_buffers(inode)) {
0761 struct address_space *mapping = &inode->i_data;
0762 struct list_head *list = &mapping->private_list;
0763 struct address_space *buffer_mapping = mapping->private_data;
0764
0765 spin_lock(&buffer_mapping->private_lock);
0766 while (!list_empty(list))
0767 __remove_assoc_queue(BH_ENTRY(list->next));
0768 spin_unlock(&buffer_mapping->private_lock);
0769 }
0770 }
0771 EXPORT_SYMBOL(invalidate_inode_buffers);
0772
0773
0774
0775
0776
0777
0778
0779 int remove_inode_buffers(struct inode *inode)
0780 {
0781 int ret = 1;
0782
0783 if (inode_has_buffers(inode)) {
0784 struct address_space *mapping = &inode->i_data;
0785 struct list_head *list = &mapping->private_list;
0786 struct address_space *buffer_mapping = mapping->private_data;
0787
0788 spin_lock(&buffer_mapping->private_lock);
0789 while (!list_empty(list)) {
0790 struct buffer_head *bh = BH_ENTRY(list->next);
0791 if (buffer_dirty(bh)) {
0792 ret = 0;
0793 break;
0794 }
0795 __remove_assoc_queue(bh);
0796 }
0797 spin_unlock(&buffer_mapping->private_lock);
0798 }
0799 return ret;
0800 }
0801
0802
0803
0804
0805
0806
0807
0808
0809
0810
0811 struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
0812 bool retry)
0813 {
0814 struct buffer_head *bh, *head;
0815 gfp_t gfp = GFP_NOFS | __GFP_ACCOUNT;
0816 long offset;
0817 struct mem_cgroup *memcg, *old_memcg;
0818
0819 if (retry)
0820 gfp |= __GFP_NOFAIL;
0821
0822
0823 memcg = page_memcg(page);
0824 old_memcg = set_active_memcg(memcg);
0825
0826 head = NULL;
0827 offset = PAGE_SIZE;
0828 while ((offset -= size) >= 0) {
0829 bh = alloc_buffer_head(gfp);
0830 if (!bh)
0831 goto no_grow;
0832
0833 bh->b_this_page = head;
0834 bh->b_blocknr = -1;
0835 head = bh;
0836
0837 bh->b_size = size;
0838
0839
0840 set_bh_page(bh, page, offset);
0841 }
0842 out:
0843 set_active_memcg(old_memcg);
0844 return head;
0845
0846
0847
0848 no_grow:
0849 if (head) {
0850 do {
0851 bh = head;
0852 head = head->b_this_page;
0853 free_buffer_head(bh);
0854 } while (head);
0855 }
0856
0857 goto out;
0858 }
0859 EXPORT_SYMBOL_GPL(alloc_page_buffers);
0860
0861 static inline void
0862 link_dev_buffers(struct page *page, struct buffer_head *head)
0863 {
0864 struct buffer_head *bh, *tail;
0865
0866 bh = head;
0867 do {
0868 tail = bh;
0869 bh = bh->b_this_page;
0870 } while (bh);
0871 tail->b_this_page = head;
0872 attach_page_private(page, head);
0873 }
0874
0875 static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
0876 {
0877 sector_t retval = ~((sector_t)0);
0878 loff_t sz = bdev_nr_bytes(bdev);
0879
0880 if (sz) {
0881 unsigned int sizebits = blksize_bits(size);
0882 retval = (sz >> sizebits);
0883 }
0884 return retval;
0885 }
0886
0887
0888
0889
0890 static sector_t
0891 init_page_buffers(struct page *page, struct block_device *bdev,
0892 sector_t block, int size)
0893 {
0894 struct buffer_head *head = page_buffers(page);
0895 struct buffer_head *bh = head;
0896 int uptodate = PageUptodate(page);
0897 sector_t end_block = blkdev_max_block(bdev, size);
0898
0899 do {
0900 if (!buffer_mapped(bh)) {
0901 bh->b_end_io = NULL;
0902 bh->b_private = NULL;
0903 bh->b_bdev = bdev;
0904 bh->b_blocknr = block;
0905 if (uptodate)
0906 set_buffer_uptodate(bh);
0907 if (block < end_block)
0908 set_buffer_mapped(bh);
0909 }
0910 block++;
0911 bh = bh->b_this_page;
0912 } while (bh != head);
0913
0914
0915
0916
0917 return end_block;
0918 }
0919
0920
0921
0922
0923
0924
0925 static int
0926 grow_dev_page(struct block_device *bdev, sector_t block,
0927 pgoff_t index, int size, int sizebits, gfp_t gfp)
0928 {
0929 struct inode *inode = bdev->bd_inode;
0930 struct page *page;
0931 struct buffer_head *bh;
0932 sector_t end_block;
0933 int ret = 0;
0934 gfp_t gfp_mask;
0935
0936 gfp_mask = mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS) | gfp;
0937
0938
0939
0940
0941
0942
0943
0944 gfp_mask |= __GFP_NOFAIL;
0945
0946 page = find_or_create_page(inode->i_mapping, index, gfp_mask);
0947
0948 BUG_ON(!PageLocked(page));
0949
0950 if (page_has_buffers(page)) {
0951 bh = page_buffers(page);
0952 if (bh->b_size == size) {
0953 end_block = init_page_buffers(page, bdev,
0954 (sector_t)index << sizebits,
0955 size);
0956 goto done;
0957 }
0958 if (!try_to_free_buffers(page_folio(page)))
0959 goto failed;
0960 }
0961
0962
0963
0964
0965 bh = alloc_page_buffers(page, size, true);
0966
0967
0968
0969
0970
0971
0972 spin_lock(&inode->i_mapping->private_lock);
0973 link_dev_buffers(page, bh);
0974 end_block = init_page_buffers(page, bdev, (sector_t)index << sizebits,
0975 size);
0976 spin_unlock(&inode->i_mapping->private_lock);
0977 done:
0978 ret = (block < end_block) ? 1 : -ENXIO;
0979 failed:
0980 unlock_page(page);
0981 put_page(page);
0982 return ret;
0983 }
0984
0985
0986
0987
0988
0989 static int
0990 grow_buffers(struct block_device *bdev, sector_t block, int size, gfp_t gfp)
0991 {
0992 pgoff_t index;
0993 int sizebits;
0994
0995 sizebits = PAGE_SHIFT - __ffs(size);
0996 index = block >> sizebits;
0997
0998
0999
1000
1001
1002 if (unlikely(index != block >> sizebits)) {
1003 printk(KERN_ERR "%s: requested out-of-range block %llu for "
1004 "device %pg\n",
1005 __func__, (unsigned long long)block,
1006 bdev);
1007 return -EIO;
1008 }
1009
1010
1011 return grow_dev_page(bdev, block, index, size, sizebits, gfp);
1012 }
1013
1014 static struct buffer_head *
1015 __getblk_slow(struct block_device *bdev, sector_t block,
1016 unsigned size, gfp_t gfp)
1017 {
1018
1019 if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
1020 (size < 512 || size > PAGE_SIZE))) {
1021 printk(KERN_ERR "getblk(): invalid block size %d requested\n",
1022 size);
1023 printk(KERN_ERR "logical block size: %d\n",
1024 bdev_logical_block_size(bdev));
1025
1026 dump_stack();
1027 return NULL;
1028 }
1029
1030 for (;;) {
1031 struct buffer_head *bh;
1032 int ret;
1033
1034 bh = __find_get_block(bdev, block, size);
1035 if (bh)
1036 return bh;
1037
1038 ret = grow_buffers(bdev, block, size, gfp);
1039 if (ret < 0)
1040 return NULL;
1041 }
1042 }
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079 void mark_buffer_dirty(struct buffer_head *bh)
1080 {
1081 WARN_ON_ONCE(!buffer_uptodate(bh));
1082
1083 trace_block_dirty_buffer(bh);
1084
1085
1086
1087
1088
1089
1090
1091 if (buffer_dirty(bh)) {
1092 smp_mb();
1093 if (buffer_dirty(bh))
1094 return;
1095 }
1096
1097 if (!test_set_buffer_dirty(bh)) {
1098 struct page *page = bh->b_page;
1099 struct address_space *mapping = NULL;
1100
1101 lock_page_memcg(page);
1102 if (!TestSetPageDirty(page)) {
1103 mapping = page_mapping(page);
1104 if (mapping)
1105 __set_page_dirty(page, mapping, 0);
1106 }
1107 unlock_page_memcg(page);
1108 if (mapping)
1109 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
1110 }
1111 }
1112 EXPORT_SYMBOL(mark_buffer_dirty);
1113
1114 void mark_buffer_write_io_error(struct buffer_head *bh)
1115 {
1116 struct super_block *sb;
1117
1118 set_buffer_write_io_error(bh);
1119
1120 if (bh->b_page && bh->b_page->mapping)
1121 mapping_set_error(bh->b_page->mapping, -EIO);
1122 if (bh->b_assoc_map)
1123 mapping_set_error(bh->b_assoc_map, -EIO);
1124 rcu_read_lock();
1125 sb = READ_ONCE(bh->b_bdev->bd_super);
1126 if (sb)
1127 errseq_set(&sb->s_wb_err, -EIO);
1128 rcu_read_unlock();
1129 }
1130 EXPORT_SYMBOL(mark_buffer_write_io_error);
1131
1132
1133
1134
1135
1136
1137
1138
1139 void __brelse(struct buffer_head * buf)
1140 {
1141 if (atomic_read(&buf->b_count)) {
1142 put_bh(buf);
1143 return;
1144 }
1145 WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer\n");
1146 }
1147 EXPORT_SYMBOL(__brelse);
1148
1149
1150
1151
1152
1153 void __bforget(struct buffer_head *bh)
1154 {
1155 clear_buffer_dirty(bh);
1156 if (bh->b_assoc_map) {
1157 struct address_space *buffer_mapping = bh->b_page->mapping;
1158
1159 spin_lock(&buffer_mapping->private_lock);
1160 list_del_init(&bh->b_assoc_buffers);
1161 bh->b_assoc_map = NULL;
1162 spin_unlock(&buffer_mapping->private_lock);
1163 }
1164 __brelse(bh);
1165 }
1166 EXPORT_SYMBOL(__bforget);
1167
1168 static struct buffer_head *__bread_slow(struct buffer_head *bh)
1169 {
1170 lock_buffer(bh);
1171 if (buffer_uptodate(bh)) {
1172 unlock_buffer(bh);
1173 return bh;
1174 } else {
1175 get_bh(bh);
1176 bh->b_end_io = end_buffer_read_sync;
1177 submit_bh(REQ_OP_READ, bh);
1178 wait_on_buffer(bh);
1179 if (buffer_uptodate(bh))
1180 return bh;
1181 }
1182 brelse(bh);
1183 return NULL;
1184 }
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200 #define BH_LRU_SIZE 16
1201
1202 struct bh_lru {
1203 struct buffer_head *bhs[BH_LRU_SIZE];
1204 };
1205
1206 static DEFINE_PER_CPU(struct bh_lru, bh_lrus) = {{ NULL }};
1207
1208 #ifdef CONFIG_SMP
1209 #define bh_lru_lock() local_irq_disable()
1210 #define bh_lru_unlock() local_irq_enable()
1211 #else
1212 #define bh_lru_lock() preempt_disable()
1213 #define bh_lru_unlock() preempt_enable()
1214 #endif
1215
1216 static inline void check_irqs_on(void)
1217 {
1218 #ifdef irqs_disabled
1219 BUG_ON(irqs_disabled());
1220 #endif
1221 }
1222
1223
1224
1225
1226
1227
1228 static void bh_lru_install(struct buffer_head *bh)
1229 {
1230 struct buffer_head *evictee = bh;
1231 struct bh_lru *b;
1232 int i;
1233
1234 check_irqs_on();
1235 bh_lru_lock();
1236
1237
1238
1239
1240
1241
1242
1243 if (lru_cache_disabled()) {
1244 bh_lru_unlock();
1245 return;
1246 }
1247
1248 b = this_cpu_ptr(&bh_lrus);
1249 for (i = 0; i < BH_LRU_SIZE; i++) {
1250 swap(evictee, b->bhs[i]);
1251 if (evictee == bh) {
1252 bh_lru_unlock();
1253 return;
1254 }
1255 }
1256
1257 get_bh(bh);
1258 bh_lru_unlock();
1259 brelse(evictee);
1260 }
1261
1262
1263
1264
1265 static struct buffer_head *
1266 lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
1267 {
1268 struct buffer_head *ret = NULL;
1269 unsigned int i;
1270
1271 check_irqs_on();
1272 bh_lru_lock();
1273 for (i = 0; i < BH_LRU_SIZE; i++) {
1274 struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]);
1275
1276 if (bh && bh->b_blocknr == block && bh->b_bdev == bdev &&
1277 bh->b_size == size) {
1278 if (i) {
1279 while (i) {
1280 __this_cpu_write(bh_lrus.bhs[i],
1281 __this_cpu_read(bh_lrus.bhs[i - 1]));
1282 i--;
1283 }
1284 __this_cpu_write(bh_lrus.bhs[0], bh);
1285 }
1286 get_bh(bh);
1287 ret = bh;
1288 break;
1289 }
1290 }
1291 bh_lru_unlock();
1292 return ret;
1293 }
1294
1295
1296
1297
1298
1299
1300 struct buffer_head *
1301 __find_get_block(struct block_device *bdev, sector_t block, unsigned size)
1302 {
1303 struct buffer_head *bh = lookup_bh_lru(bdev, block, size);
1304
1305 if (bh == NULL) {
1306
1307 bh = __find_get_block_slow(bdev, block);
1308 if (bh)
1309 bh_lru_install(bh);
1310 } else
1311 touch_buffer(bh);
1312
1313 return bh;
1314 }
1315 EXPORT_SYMBOL(__find_get_block);
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325 struct buffer_head *
1326 __getblk_gfp(struct block_device *bdev, sector_t block,
1327 unsigned size, gfp_t gfp)
1328 {
1329 struct buffer_head *bh = __find_get_block(bdev, block, size);
1330
1331 might_sleep();
1332 if (bh == NULL)
1333 bh = __getblk_slow(bdev, block, size, gfp);
1334 return bh;
1335 }
1336 EXPORT_SYMBOL(__getblk_gfp);
1337
1338
1339
1340
1341 void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
1342 {
1343 struct buffer_head *bh = __getblk(bdev, block, size);
1344 if (likely(bh)) {
1345 ll_rw_block(REQ_OP_READ | REQ_RAHEAD, 1, &bh);
1346 brelse(bh);
1347 }
1348 }
1349 EXPORT_SYMBOL(__breadahead);
1350
1351 void __breadahead_gfp(struct block_device *bdev, sector_t block, unsigned size,
1352 gfp_t gfp)
1353 {
1354 struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp);
1355 if (likely(bh)) {
1356 ll_rw_block(REQ_OP_READ | REQ_RAHEAD, 1, &bh);
1357 brelse(bh);
1358 }
1359 }
1360 EXPORT_SYMBOL(__breadahead_gfp);
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374 struct buffer_head *
1375 __bread_gfp(struct block_device *bdev, sector_t block,
1376 unsigned size, gfp_t gfp)
1377 {
1378 struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp);
1379
1380 if (likely(bh) && !buffer_uptodate(bh))
1381 bh = __bread_slow(bh);
1382 return bh;
1383 }
1384 EXPORT_SYMBOL(__bread_gfp);
1385
1386 static void __invalidate_bh_lrus(struct bh_lru *b)
1387 {
1388 int i;
1389
1390 for (i = 0; i < BH_LRU_SIZE; i++) {
1391 brelse(b->bhs[i]);
1392 b->bhs[i] = NULL;
1393 }
1394 }
1395
1396
1397
1398
1399
1400 static void invalidate_bh_lru(void *arg)
1401 {
1402 struct bh_lru *b = &get_cpu_var(bh_lrus);
1403
1404 __invalidate_bh_lrus(b);
1405 put_cpu_var(bh_lrus);
1406 }
1407
1408 bool has_bh_in_lru(int cpu, void *dummy)
1409 {
1410 struct bh_lru *b = per_cpu_ptr(&bh_lrus, cpu);
1411 int i;
1412
1413 for (i = 0; i < BH_LRU_SIZE; i++) {
1414 if (b->bhs[i])
1415 return true;
1416 }
1417
1418 return false;
1419 }
1420
1421 void invalidate_bh_lrus(void)
1422 {
1423 on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1);
1424 }
1425 EXPORT_SYMBOL_GPL(invalidate_bh_lrus);
1426
1427
1428
1429
1430
1431 void invalidate_bh_lrus_cpu(void)
1432 {
1433 struct bh_lru *b;
1434
1435 bh_lru_lock();
1436 b = this_cpu_ptr(&bh_lrus);
1437 __invalidate_bh_lrus(b);
1438 bh_lru_unlock();
1439 }
1440
1441 void set_bh_page(struct buffer_head *bh,
1442 struct page *page, unsigned long offset)
1443 {
1444 bh->b_page = page;
1445 BUG_ON(offset >= PAGE_SIZE);
1446 if (PageHighMem(page))
1447
1448
1449
1450 bh->b_data = (char *)(0 + offset);
1451 else
1452 bh->b_data = page_address(page) + offset;
1453 }
1454 EXPORT_SYMBOL(set_bh_page);
1455
1456
1457
1458
1459
1460
1461 #define BUFFER_FLAGS_DISCARD \
1462 (1 << BH_Mapped | 1 << BH_New | 1 << BH_Req | \
1463 1 << BH_Delay | 1 << BH_Unwritten)
1464
1465 static void discard_buffer(struct buffer_head * bh)
1466 {
1467 unsigned long b_state, b_state_old;
1468
1469 lock_buffer(bh);
1470 clear_buffer_dirty(bh);
1471 bh->b_bdev = NULL;
1472 b_state = bh->b_state;
1473 for (;;) {
1474 b_state_old = cmpxchg(&bh->b_state, b_state,
1475 (b_state & ~BUFFER_FLAGS_DISCARD));
1476 if (b_state_old == b_state)
1477 break;
1478 b_state = b_state_old;
1479 }
1480 unlock_buffer(bh);
1481 }
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498 void block_invalidate_folio(struct folio *folio, size_t offset, size_t length)
1499 {
1500 struct buffer_head *head, *bh, *next;
1501 size_t curr_off = 0;
1502 size_t stop = length + offset;
1503
1504 BUG_ON(!folio_test_locked(folio));
1505
1506
1507
1508
1509 BUG_ON(stop > folio_size(folio) || stop < length);
1510
1511 head = folio_buffers(folio);
1512 if (!head)
1513 return;
1514
1515 bh = head;
1516 do {
1517 size_t next_off = curr_off + bh->b_size;
1518 next = bh->b_this_page;
1519
1520
1521
1522
1523 if (next_off > stop)
1524 goto out;
1525
1526
1527
1528
1529 if (offset <= curr_off)
1530 discard_buffer(bh);
1531 curr_off = next_off;
1532 bh = next;
1533 } while (bh != head);
1534
1535
1536
1537
1538
1539
1540 if (length == folio_size(folio))
1541 filemap_release_folio(folio, 0);
1542 out:
1543 return;
1544 }
1545 EXPORT_SYMBOL(block_invalidate_folio);
1546
1547
1548
1549
1550
1551
1552
1553 void create_empty_buffers(struct page *page,
1554 unsigned long blocksize, unsigned long b_state)
1555 {
1556 struct buffer_head *bh, *head, *tail;
1557
1558 head = alloc_page_buffers(page, blocksize, true);
1559 bh = head;
1560 do {
1561 bh->b_state |= b_state;
1562 tail = bh;
1563 bh = bh->b_this_page;
1564 } while (bh);
1565 tail->b_this_page = head;
1566
1567 spin_lock(&page->mapping->private_lock);
1568 if (PageUptodate(page) || PageDirty(page)) {
1569 bh = head;
1570 do {
1571 if (PageDirty(page))
1572 set_buffer_dirty(bh);
1573 if (PageUptodate(page))
1574 set_buffer_uptodate(bh);
1575 bh = bh->b_this_page;
1576 } while (bh != head);
1577 }
1578 attach_page_private(page, head);
1579 spin_unlock(&page->mapping->private_lock);
1580 }
1581 EXPORT_SYMBOL(create_empty_buffers);
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603 void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len)
1604 {
1605 struct inode *bd_inode = bdev->bd_inode;
1606 struct address_space *bd_mapping = bd_inode->i_mapping;
1607 struct folio_batch fbatch;
1608 pgoff_t index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
1609 pgoff_t end;
1610 int i, count;
1611 struct buffer_head *bh;
1612 struct buffer_head *head;
1613
1614 end = (block + len - 1) >> (PAGE_SHIFT - bd_inode->i_blkbits);
1615 folio_batch_init(&fbatch);
1616 while (filemap_get_folios(bd_mapping, &index, end, &fbatch)) {
1617 count = folio_batch_count(&fbatch);
1618 for (i = 0; i < count; i++) {
1619 struct folio *folio = fbatch.folios[i];
1620
1621 if (!folio_buffers(folio))
1622 continue;
1623
1624
1625
1626
1627
1628 folio_lock(folio);
1629
1630 head = folio_buffers(folio);
1631 if (!head)
1632 goto unlock_page;
1633 bh = head;
1634 do {
1635 if (!buffer_mapped(bh) || (bh->b_blocknr < block))
1636 goto next;
1637 if (bh->b_blocknr >= block + len)
1638 break;
1639 clear_buffer_dirty(bh);
1640 wait_on_buffer(bh);
1641 clear_buffer_req(bh);
1642 next:
1643 bh = bh->b_this_page;
1644 } while (bh != head);
1645 unlock_page:
1646 folio_unlock(folio);
1647 }
1648 folio_batch_release(&fbatch);
1649 cond_resched();
1650
1651 if (index > end || !index)
1652 break;
1653 }
1654 }
1655 EXPORT_SYMBOL(clean_bdev_aliases);
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665 static inline int block_size_bits(unsigned int blocksize)
1666 {
1667 return ilog2(blocksize);
1668 }
1669
1670 static struct buffer_head *create_page_buffers(struct page *page, struct inode *inode, unsigned int b_state)
1671 {
1672 BUG_ON(!PageLocked(page));
1673
1674 if (!page_has_buffers(page))
1675 create_empty_buffers(page, 1 << READ_ONCE(inode->i_blkbits),
1676 b_state);
1677 return page_buffers(page);
1678 }
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709 int __block_write_full_page(struct inode *inode, struct page *page,
1710 get_block_t *get_block, struct writeback_control *wbc,
1711 bh_end_io_t *handler)
1712 {
1713 int err;
1714 sector_t block;
1715 sector_t last_block;
1716 struct buffer_head *bh, *head;
1717 unsigned int blocksize, bbits;
1718 int nr_underway = 0;
1719 blk_opf_t write_flags = wbc_to_write_flags(wbc);
1720
1721 head = create_page_buffers(page, inode,
1722 (1 << BH_Dirty)|(1 << BH_Uptodate));
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734 bh = head;
1735 blocksize = bh->b_size;
1736 bbits = block_size_bits(blocksize);
1737
1738 block = (sector_t)page->index << (PAGE_SHIFT - bbits);
1739 last_block = (i_size_read(inode) - 1) >> bbits;
1740
1741
1742
1743
1744
1745 do {
1746 if (block > last_block) {
1747
1748
1749
1750
1751
1752
1753
1754
1755 clear_buffer_dirty(bh);
1756 set_buffer_uptodate(bh);
1757 } else if ((!buffer_mapped(bh) || buffer_delay(bh)) &&
1758 buffer_dirty(bh)) {
1759 WARN_ON(bh->b_size != blocksize);
1760 err = get_block(inode, block, bh, 1);
1761 if (err)
1762 goto recover;
1763 clear_buffer_delay(bh);
1764 if (buffer_new(bh)) {
1765
1766 clear_buffer_new(bh);
1767 clean_bdev_bh_alias(bh);
1768 }
1769 }
1770 bh = bh->b_this_page;
1771 block++;
1772 } while (bh != head);
1773
1774 do {
1775 if (!buffer_mapped(bh))
1776 continue;
1777
1778
1779
1780
1781
1782
1783
1784 if (wbc->sync_mode != WB_SYNC_NONE) {
1785 lock_buffer(bh);
1786 } else if (!trylock_buffer(bh)) {
1787 redirty_page_for_writepage(wbc, page);
1788 continue;
1789 }
1790 if (test_clear_buffer_dirty(bh)) {
1791 mark_buffer_async_write_endio(bh, handler);
1792 } else {
1793 unlock_buffer(bh);
1794 }
1795 } while ((bh = bh->b_this_page) != head);
1796
1797
1798
1799
1800
1801 BUG_ON(PageWriteback(page));
1802 set_page_writeback(page);
1803
1804 do {
1805 struct buffer_head *next = bh->b_this_page;
1806 if (buffer_async_write(bh)) {
1807 submit_bh_wbc(REQ_OP_WRITE | write_flags, bh, wbc);
1808 nr_underway++;
1809 }
1810 bh = next;
1811 } while (bh != head);
1812 unlock_page(page);
1813
1814 err = 0;
1815 done:
1816 if (nr_underway == 0) {
1817
1818
1819
1820
1821
1822 end_page_writeback(page);
1823
1824
1825
1826
1827
1828 }
1829 return err;
1830
1831 recover:
1832
1833
1834
1835
1836
1837
1838 bh = head;
1839
1840 do {
1841 if (buffer_mapped(bh) && buffer_dirty(bh) &&
1842 !buffer_delay(bh)) {
1843 lock_buffer(bh);
1844 mark_buffer_async_write_endio(bh, handler);
1845 } else {
1846
1847
1848
1849
1850 clear_buffer_dirty(bh);
1851 }
1852 } while ((bh = bh->b_this_page) != head);
1853 SetPageError(page);
1854 BUG_ON(PageWriteback(page));
1855 mapping_set_error(page->mapping, err);
1856 set_page_writeback(page);
1857 do {
1858 struct buffer_head *next = bh->b_this_page;
1859 if (buffer_async_write(bh)) {
1860 clear_buffer_dirty(bh);
1861 submit_bh_wbc(REQ_OP_WRITE | write_flags, bh, wbc);
1862 nr_underway++;
1863 }
1864 bh = next;
1865 } while (bh != head);
1866 unlock_page(page);
1867 goto done;
1868 }
1869 EXPORT_SYMBOL(__block_write_full_page);
1870
1871
1872
1873
1874
1875
1876 void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
1877 {
1878 unsigned int block_start, block_end;
1879 struct buffer_head *head, *bh;
1880
1881 BUG_ON(!PageLocked(page));
1882 if (!page_has_buffers(page))
1883 return;
1884
1885 bh = head = page_buffers(page);
1886 block_start = 0;
1887 do {
1888 block_end = block_start + bh->b_size;
1889
1890 if (buffer_new(bh)) {
1891 if (block_end > from && block_start < to) {
1892 if (!PageUptodate(page)) {
1893 unsigned start, size;
1894
1895 start = max(from, block_start);
1896 size = min(to, block_end) - start;
1897
1898 zero_user(page, start, size);
1899 set_buffer_uptodate(bh);
1900 }
1901
1902 clear_buffer_new(bh);
1903 mark_buffer_dirty(bh);
1904 }
1905 }
1906
1907 block_start = block_end;
1908 bh = bh->b_this_page;
1909 } while (bh != head);
1910 }
1911 EXPORT_SYMBOL(page_zero_new_buffers);
1912
1913 static void
1914 iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh,
1915 const struct iomap *iomap)
1916 {
1917 loff_t offset = block << inode->i_blkbits;
1918
1919 bh->b_bdev = iomap->bdev;
1920
1921
1922
1923
1924
1925
1926
1927 BUG_ON(offset >= iomap->offset + iomap->length);
1928
1929 switch (iomap->type) {
1930 case IOMAP_HOLE:
1931
1932
1933
1934
1935
1936 if (!buffer_uptodate(bh) ||
1937 (offset >= i_size_read(inode)))
1938 set_buffer_new(bh);
1939 break;
1940 case IOMAP_DELALLOC:
1941 if (!buffer_uptodate(bh) ||
1942 (offset >= i_size_read(inode)))
1943 set_buffer_new(bh);
1944 set_buffer_uptodate(bh);
1945 set_buffer_mapped(bh);
1946 set_buffer_delay(bh);
1947 break;
1948 case IOMAP_UNWRITTEN:
1949
1950
1951
1952
1953
1954 set_buffer_new(bh);
1955 set_buffer_unwritten(bh);
1956 fallthrough;
1957 case IOMAP_MAPPED:
1958 if ((iomap->flags & IOMAP_F_NEW) ||
1959 offset >= i_size_read(inode))
1960 set_buffer_new(bh);
1961 bh->b_blocknr = (iomap->addr + offset - iomap->offset) >>
1962 inode->i_blkbits;
1963 set_buffer_mapped(bh);
1964 break;
1965 }
1966 }
1967
1968 int __block_write_begin_int(struct folio *folio, loff_t pos, unsigned len,
1969 get_block_t *get_block, const struct iomap *iomap)
1970 {
1971 unsigned from = pos & (PAGE_SIZE - 1);
1972 unsigned to = from + len;
1973 struct inode *inode = folio->mapping->host;
1974 unsigned block_start, block_end;
1975 sector_t block;
1976 int err = 0;
1977 unsigned blocksize, bbits;
1978 struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
1979
1980 BUG_ON(!folio_test_locked(folio));
1981 BUG_ON(from > PAGE_SIZE);
1982 BUG_ON(to > PAGE_SIZE);
1983 BUG_ON(from > to);
1984
1985 head = create_page_buffers(&folio->page, inode, 0);
1986 blocksize = head->b_size;
1987 bbits = block_size_bits(blocksize);
1988
1989 block = (sector_t)folio->index << (PAGE_SHIFT - bbits);
1990
1991 for(bh = head, block_start = 0; bh != head || !block_start;
1992 block++, block_start=block_end, bh = bh->b_this_page) {
1993 block_end = block_start + blocksize;
1994 if (block_end <= from || block_start >= to) {
1995 if (folio_test_uptodate(folio)) {
1996 if (!buffer_uptodate(bh))
1997 set_buffer_uptodate(bh);
1998 }
1999 continue;
2000 }
2001 if (buffer_new(bh))
2002 clear_buffer_new(bh);
2003 if (!buffer_mapped(bh)) {
2004 WARN_ON(bh->b_size != blocksize);
2005 if (get_block) {
2006 err = get_block(inode, block, bh, 1);
2007 if (err)
2008 break;
2009 } else {
2010 iomap_to_bh(inode, block, bh, iomap);
2011 }
2012
2013 if (buffer_new(bh)) {
2014 clean_bdev_bh_alias(bh);
2015 if (folio_test_uptodate(folio)) {
2016 clear_buffer_new(bh);
2017 set_buffer_uptodate(bh);
2018 mark_buffer_dirty(bh);
2019 continue;
2020 }
2021 if (block_end > to || block_start < from)
2022 folio_zero_segments(folio,
2023 to, block_end,
2024 block_start, from);
2025 continue;
2026 }
2027 }
2028 if (folio_test_uptodate(folio)) {
2029 if (!buffer_uptodate(bh))
2030 set_buffer_uptodate(bh);
2031 continue;
2032 }
2033 if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
2034 !buffer_unwritten(bh) &&
2035 (block_start < from || block_end > to)) {
2036 ll_rw_block(REQ_OP_READ, 1, &bh);
2037 *wait_bh++=bh;
2038 }
2039 }
2040
2041
2042
2043 while(wait_bh > wait) {
2044 wait_on_buffer(*--wait_bh);
2045 if (!buffer_uptodate(*wait_bh))
2046 err = -EIO;
2047 }
2048 if (unlikely(err))
2049 page_zero_new_buffers(&folio->page, from, to);
2050 return err;
2051 }
2052
2053 int __block_write_begin(struct page *page, loff_t pos, unsigned len,
2054 get_block_t *get_block)
2055 {
2056 return __block_write_begin_int(page_folio(page), pos, len, get_block,
2057 NULL);
2058 }
2059 EXPORT_SYMBOL(__block_write_begin);
2060
2061 static int __block_commit_write(struct inode *inode, struct page *page,
2062 unsigned from, unsigned to)
2063 {
2064 unsigned block_start, block_end;
2065 int partial = 0;
2066 unsigned blocksize;
2067 struct buffer_head *bh, *head;
2068
2069 bh = head = page_buffers(page);
2070 blocksize = bh->b_size;
2071
2072 block_start = 0;
2073 do {
2074 block_end = block_start + blocksize;
2075 if (block_end <= from || block_start >= to) {
2076 if (!buffer_uptodate(bh))
2077 partial = 1;
2078 } else {
2079 set_buffer_uptodate(bh);
2080 mark_buffer_dirty(bh);
2081 }
2082 if (buffer_new(bh))
2083 clear_buffer_new(bh);
2084
2085 block_start = block_end;
2086 bh = bh->b_this_page;
2087 } while (bh != head);
2088
2089
2090
2091
2092
2093
2094
2095 if (!partial)
2096 SetPageUptodate(page);
2097 return 0;
2098 }
2099
2100
2101
2102
2103
2104
2105
2106 int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
2107 struct page **pagep, get_block_t *get_block)
2108 {
2109 pgoff_t index = pos >> PAGE_SHIFT;
2110 struct page *page;
2111 int status;
2112
2113 page = grab_cache_page_write_begin(mapping, index);
2114 if (!page)
2115 return -ENOMEM;
2116
2117 status = __block_write_begin(page, pos, len, get_block);
2118 if (unlikely(status)) {
2119 unlock_page(page);
2120 put_page(page);
2121 page = NULL;
2122 }
2123
2124 *pagep = page;
2125 return status;
2126 }
2127 EXPORT_SYMBOL(block_write_begin);
2128
2129 int block_write_end(struct file *file, struct address_space *mapping,
2130 loff_t pos, unsigned len, unsigned copied,
2131 struct page *page, void *fsdata)
2132 {
2133 struct inode *inode = mapping->host;
2134 unsigned start;
2135
2136 start = pos & (PAGE_SIZE - 1);
2137
2138 if (unlikely(copied < len)) {
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151 if (!PageUptodate(page))
2152 copied = 0;
2153
2154 page_zero_new_buffers(page, start+copied, start+len);
2155 }
2156 flush_dcache_page(page);
2157
2158
2159 __block_commit_write(inode, page, start, start+copied);
2160
2161 return copied;
2162 }
2163 EXPORT_SYMBOL(block_write_end);
2164
2165 int generic_write_end(struct file *file, struct address_space *mapping,
2166 loff_t pos, unsigned len, unsigned copied,
2167 struct page *page, void *fsdata)
2168 {
2169 struct inode *inode = mapping->host;
2170 loff_t old_size = inode->i_size;
2171 bool i_size_changed = false;
2172
2173 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
2174
2175
2176
2177
2178
2179
2180
2181
2182 if (pos + copied > inode->i_size) {
2183 i_size_write(inode, pos + copied);
2184 i_size_changed = true;
2185 }
2186
2187 unlock_page(page);
2188 put_page(page);
2189
2190 if (old_size < pos)
2191 pagecache_isize_extended(inode, old_size, pos);
2192
2193
2194
2195
2196
2197
2198 if (i_size_changed)
2199 mark_inode_dirty(inode);
2200 return copied;
2201 }
2202 EXPORT_SYMBOL(generic_write_end);
2203
2204
2205
2206
2207
2208
2209
2210
2211 bool block_is_partially_uptodate(struct folio *folio, size_t from, size_t count)
2212 {
2213 unsigned block_start, block_end, blocksize;
2214 unsigned to;
2215 struct buffer_head *bh, *head;
2216 bool ret = true;
2217
2218 head = folio_buffers(folio);
2219 if (!head)
2220 return false;
2221 blocksize = head->b_size;
2222 to = min_t(unsigned, folio_size(folio) - from, count);
2223 to = from + to;
2224 if (from < blocksize && to > folio_size(folio) - blocksize)
2225 return false;
2226
2227 bh = head;
2228 block_start = 0;
2229 do {
2230 block_end = block_start + blocksize;
2231 if (block_end > from && block_start < to) {
2232 if (!buffer_uptodate(bh)) {
2233 ret = false;
2234 break;
2235 }
2236 if (block_end >= to)
2237 break;
2238 }
2239 block_start = block_end;
2240 bh = bh->b_this_page;
2241 } while (bh != head);
2242
2243 return ret;
2244 }
2245 EXPORT_SYMBOL(block_is_partially_uptodate);
2246
2247
2248
2249
2250
2251
2252
2253
2254 int block_read_full_folio(struct folio *folio, get_block_t *get_block)
2255 {
2256 struct inode *inode = folio->mapping->host;
2257 sector_t iblock, lblock;
2258 struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
2259 unsigned int blocksize, bbits;
2260 int nr, i;
2261 int fully_mapped = 1;
2262 bool page_error = false;
2263
2264 VM_BUG_ON_FOLIO(folio_test_large(folio), folio);
2265
2266 head = create_page_buffers(&folio->page, inode, 0);
2267 blocksize = head->b_size;
2268 bbits = block_size_bits(blocksize);
2269
2270 iblock = (sector_t)folio->index << (PAGE_SHIFT - bbits);
2271 lblock = (i_size_read(inode)+blocksize-1) >> bbits;
2272 bh = head;
2273 nr = 0;
2274 i = 0;
2275
2276 do {
2277 if (buffer_uptodate(bh))
2278 continue;
2279
2280 if (!buffer_mapped(bh)) {
2281 int err = 0;
2282
2283 fully_mapped = 0;
2284 if (iblock < lblock) {
2285 WARN_ON(bh->b_size != blocksize);
2286 err = get_block(inode, iblock, bh, 0);
2287 if (err) {
2288 folio_set_error(folio);
2289 page_error = true;
2290 }
2291 }
2292 if (!buffer_mapped(bh)) {
2293 folio_zero_range(folio, i * blocksize,
2294 blocksize);
2295 if (!err)
2296 set_buffer_uptodate(bh);
2297 continue;
2298 }
2299
2300
2301
2302
2303 if (buffer_uptodate(bh))
2304 continue;
2305 }
2306 arr[nr++] = bh;
2307 } while (i++, iblock++, (bh = bh->b_this_page) != head);
2308
2309 if (fully_mapped)
2310 folio_set_mappedtodisk(folio);
2311
2312 if (!nr) {
2313
2314
2315
2316
2317 if (!page_error)
2318 folio_mark_uptodate(folio);
2319 folio_unlock(folio);
2320 return 0;
2321 }
2322
2323
2324 for (i = 0; i < nr; i++) {
2325 bh = arr[i];
2326 lock_buffer(bh);
2327 mark_buffer_async_read(bh);
2328 }
2329
2330
2331
2332
2333
2334
2335 for (i = 0; i < nr; i++) {
2336 bh = arr[i];
2337 if (buffer_uptodate(bh))
2338 end_buffer_async_read(bh, 1);
2339 else
2340 submit_bh(REQ_OP_READ, bh);
2341 }
2342 return 0;
2343 }
2344 EXPORT_SYMBOL(block_read_full_folio);
2345
2346
2347
2348
2349
2350 int generic_cont_expand_simple(struct inode *inode, loff_t size)
2351 {
2352 struct address_space *mapping = inode->i_mapping;
2353 const struct address_space_operations *aops = mapping->a_ops;
2354 struct page *page;
2355 void *fsdata;
2356 int err;
2357
2358 err = inode_newsize_ok(inode, size);
2359 if (err)
2360 goto out;
2361
2362 err = aops->write_begin(NULL, mapping, size, 0, &page, &fsdata);
2363 if (err)
2364 goto out;
2365
2366 err = aops->write_end(NULL, mapping, size, 0, 0, page, fsdata);
2367 BUG_ON(err > 0);
2368
2369 out:
2370 return err;
2371 }
2372 EXPORT_SYMBOL(generic_cont_expand_simple);
2373
2374 static int cont_expand_zero(struct file *file, struct address_space *mapping,
2375 loff_t pos, loff_t *bytes)
2376 {
2377 struct inode *inode = mapping->host;
2378 const struct address_space_operations *aops = mapping->a_ops;
2379 unsigned int blocksize = i_blocksize(inode);
2380 struct page *page;
2381 void *fsdata;
2382 pgoff_t index, curidx;
2383 loff_t curpos;
2384 unsigned zerofrom, offset, len;
2385 int err = 0;
2386
2387 index = pos >> PAGE_SHIFT;
2388 offset = pos & ~PAGE_MASK;
2389
2390 while (index > (curidx = (curpos = *bytes)>>PAGE_SHIFT)) {
2391 zerofrom = curpos & ~PAGE_MASK;
2392 if (zerofrom & (blocksize-1)) {
2393 *bytes |= (blocksize-1);
2394 (*bytes)++;
2395 }
2396 len = PAGE_SIZE - zerofrom;
2397
2398 err = aops->write_begin(file, mapping, curpos, len,
2399 &page, &fsdata);
2400 if (err)
2401 goto out;
2402 zero_user(page, zerofrom, len);
2403 err = aops->write_end(file, mapping, curpos, len, len,
2404 page, fsdata);
2405 if (err < 0)
2406 goto out;
2407 BUG_ON(err != len);
2408 err = 0;
2409
2410 balance_dirty_pages_ratelimited(mapping);
2411
2412 if (fatal_signal_pending(current)) {
2413 err = -EINTR;
2414 goto out;
2415 }
2416 }
2417
2418
2419 if (index == curidx) {
2420 zerofrom = curpos & ~PAGE_MASK;
2421
2422 if (offset <= zerofrom) {
2423 goto out;
2424 }
2425 if (zerofrom & (blocksize-1)) {
2426 *bytes |= (blocksize-1);
2427 (*bytes)++;
2428 }
2429 len = offset - zerofrom;
2430
2431 err = aops->write_begin(file, mapping, curpos, len,
2432 &page, &fsdata);
2433 if (err)
2434 goto out;
2435 zero_user(page, zerofrom, len);
2436 err = aops->write_end(file, mapping, curpos, len, len,
2437 page, fsdata);
2438 if (err < 0)
2439 goto out;
2440 BUG_ON(err != len);
2441 err = 0;
2442 }
2443 out:
2444 return err;
2445 }
2446
2447
2448
2449
2450
2451 int cont_write_begin(struct file *file, struct address_space *mapping,
2452 loff_t pos, unsigned len,
2453 struct page **pagep, void **fsdata,
2454 get_block_t *get_block, loff_t *bytes)
2455 {
2456 struct inode *inode = mapping->host;
2457 unsigned int blocksize = i_blocksize(inode);
2458 unsigned int zerofrom;
2459 int err;
2460
2461 err = cont_expand_zero(file, mapping, pos, bytes);
2462 if (err)
2463 return err;
2464
2465 zerofrom = *bytes & ~PAGE_MASK;
2466 if (pos+len > *bytes && zerofrom & (blocksize-1)) {
2467 *bytes |= (blocksize-1);
2468 (*bytes)++;
2469 }
2470
2471 return block_write_begin(mapping, pos, len, pagep, get_block);
2472 }
2473 EXPORT_SYMBOL(cont_write_begin);
2474
2475 int block_commit_write(struct page *page, unsigned from, unsigned to)
2476 {
2477 struct inode *inode = page->mapping->host;
2478 __block_commit_write(inode,page,from,to);
2479 return 0;
2480 }
2481 EXPORT_SYMBOL(block_commit_write);
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501 int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2502 get_block_t get_block)
2503 {
2504 struct page *page = vmf->page;
2505 struct inode *inode = file_inode(vma->vm_file);
2506 unsigned long end;
2507 loff_t size;
2508 int ret;
2509
2510 lock_page(page);
2511 size = i_size_read(inode);
2512 if ((page->mapping != inode->i_mapping) ||
2513 (page_offset(page) > size)) {
2514
2515 ret = -EFAULT;
2516 goto out_unlock;
2517 }
2518
2519
2520 if (((page->index + 1) << PAGE_SHIFT) > size)
2521 end = size & ~PAGE_MASK;
2522 else
2523 end = PAGE_SIZE;
2524
2525 ret = __block_write_begin(page, 0, end, get_block);
2526 if (!ret)
2527 ret = block_commit_write(page, 0, end);
2528
2529 if (unlikely(ret < 0))
2530 goto out_unlock;
2531 set_page_dirty(page);
2532 wait_for_stable_page(page);
2533 return 0;
2534 out_unlock:
2535 unlock_page(page);
2536 return ret;
2537 }
2538 EXPORT_SYMBOL(block_page_mkwrite);
2539
2540 int block_truncate_page(struct address_space *mapping,
2541 loff_t from, get_block_t *get_block)
2542 {
2543 pgoff_t index = from >> PAGE_SHIFT;
2544 unsigned offset = from & (PAGE_SIZE-1);
2545 unsigned blocksize;
2546 sector_t iblock;
2547 unsigned length, pos;
2548 struct inode *inode = mapping->host;
2549 struct page *page;
2550 struct buffer_head *bh;
2551 int err;
2552
2553 blocksize = i_blocksize(inode);
2554 length = offset & (blocksize - 1);
2555
2556
2557 if (!length)
2558 return 0;
2559
2560 length = blocksize - length;
2561 iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits);
2562
2563 page = grab_cache_page(mapping, index);
2564 err = -ENOMEM;
2565 if (!page)
2566 goto out;
2567
2568 if (!page_has_buffers(page))
2569 create_empty_buffers(page, blocksize, 0);
2570
2571
2572 bh = page_buffers(page);
2573 pos = blocksize;
2574 while (offset >= pos) {
2575 bh = bh->b_this_page;
2576 iblock++;
2577 pos += blocksize;
2578 }
2579
2580 err = 0;
2581 if (!buffer_mapped(bh)) {
2582 WARN_ON(bh->b_size != blocksize);
2583 err = get_block(inode, iblock, bh, 0);
2584 if (err)
2585 goto unlock;
2586
2587 if (!buffer_mapped(bh))
2588 goto unlock;
2589 }
2590
2591
2592 if (PageUptodate(page))
2593 set_buffer_uptodate(bh);
2594
2595 if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) {
2596 err = -EIO;
2597 ll_rw_block(REQ_OP_READ, 1, &bh);
2598 wait_on_buffer(bh);
2599
2600 if (!buffer_uptodate(bh))
2601 goto unlock;
2602 }
2603
2604 zero_user(page, offset, length);
2605 mark_buffer_dirty(bh);
2606 err = 0;
2607
2608 unlock:
2609 unlock_page(page);
2610 put_page(page);
2611 out:
2612 return err;
2613 }
2614 EXPORT_SYMBOL(block_truncate_page);
2615
2616
2617
2618
2619 int block_write_full_page(struct page *page, get_block_t *get_block,
2620 struct writeback_control *wbc)
2621 {
2622 struct inode * const inode = page->mapping->host;
2623 loff_t i_size = i_size_read(inode);
2624 const pgoff_t end_index = i_size >> PAGE_SHIFT;
2625 unsigned offset;
2626
2627
2628 if (page->index < end_index)
2629 return __block_write_full_page(inode, page, get_block, wbc,
2630 end_buffer_async_write);
2631
2632
2633 offset = i_size & (PAGE_SIZE-1);
2634 if (page->index >= end_index+1 || !offset) {
2635 unlock_page(page);
2636 return 0;
2637 }
2638
2639
2640
2641
2642
2643
2644
2645
2646 zero_user_segment(page, offset, PAGE_SIZE);
2647 return __block_write_full_page(inode, page, get_block, wbc,
2648 end_buffer_async_write);
2649 }
2650 EXPORT_SYMBOL(block_write_full_page);
2651
2652 sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
2653 get_block_t *get_block)
2654 {
2655 struct inode *inode = mapping->host;
2656 struct buffer_head tmp = {
2657 .b_size = i_blocksize(inode),
2658 };
2659
2660 get_block(inode, block, &tmp, 0);
2661 return tmp.b_blocknr;
2662 }
2663 EXPORT_SYMBOL(generic_block_bmap);
2664
2665 static void end_bio_bh_io_sync(struct bio *bio)
2666 {
2667 struct buffer_head *bh = bio->bi_private;
2668
2669 if (unlikely(bio_flagged(bio, BIO_QUIET)))
2670 set_bit(BH_Quiet, &bh->b_state);
2671
2672 bh->b_end_io(bh, !bio->bi_status);
2673 bio_put(bio);
2674 }
2675
2676 static int submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
2677 struct writeback_control *wbc)
2678 {
2679 const enum req_op op = opf & REQ_OP_MASK;
2680 struct bio *bio;
2681
2682 BUG_ON(!buffer_locked(bh));
2683 BUG_ON(!buffer_mapped(bh));
2684 BUG_ON(!bh->b_end_io);
2685 BUG_ON(buffer_delay(bh));
2686 BUG_ON(buffer_unwritten(bh));
2687
2688
2689
2690
2691 if (test_set_buffer_req(bh) && (op == REQ_OP_WRITE))
2692 clear_buffer_write_io_error(bh);
2693
2694 if (buffer_meta(bh))
2695 opf |= REQ_META;
2696 if (buffer_prio(bh))
2697 opf |= REQ_PRIO;
2698
2699 bio = bio_alloc(bh->b_bdev, 1, opf, GFP_NOIO);
2700
2701 fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO);
2702
2703 bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
2704
2705 bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
2706 BUG_ON(bio->bi_iter.bi_size != bh->b_size);
2707
2708 bio->bi_end_io = end_bio_bh_io_sync;
2709 bio->bi_private = bh;
2710
2711
2712 guard_bio_eod(bio);
2713
2714 if (wbc) {
2715 wbc_init_bio(wbc, bio);
2716 wbc_account_cgroup_owner(wbc, bh->b_page, bh->b_size);
2717 }
2718
2719 submit_bio(bio);
2720 return 0;
2721 }
2722
2723 int submit_bh(blk_opf_t opf, struct buffer_head *bh)
2724 {
2725 return submit_bh_wbc(opf, bh, NULL);
2726 }
2727 EXPORT_SYMBOL(submit_bh);
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754 void ll_rw_block(const blk_opf_t opf, int nr, struct buffer_head *bhs[])
2755 {
2756 const enum req_op op = opf & REQ_OP_MASK;
2757 int i;
2758
2759 for (i = 0; i < nr; i++) {
2760 struct buffer_head *bh = bhs[i];
2761
2762 if (!trylock_buffer(bh))
2763 continue;
2764 if (op == REQ_OP_WRITE) {
2765 if (test_clear_buffer_dirty(bh)) {
2766 bh->b_end_io = end_buffer_write_sync;
2767 get_bh(bh);
2768 submit_bh(opf, bh);
2769 continue;
2770 }
2771 } else {
2772 if (!buffer_uptodate(bh)) {
2773 bh->b_end_io = end_buffer_read_sync;
2774 get_bh(bh);
2775 submit_bh(opf, bh);
2776 continue;
2777 }
2778 }
2779 unlock_buffer(bh);
2780 }
2781 }
2782 EXPORT_SYMBOL(ll_rw_block);
2783
2784 void write_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags)
2785 {
2786 lock_buffer(bh);
2787 if (!test_clear_buffer_dirty(bh)) {
2788 unlock_buffer(bh);
2789 return;
2790 }
2791 bh->b_end_io = end_buffer_write_sync;
2792 get_bh(bh);
2793 submit_bh(REQ_OP_WRITE | op_flags, bh);
2794 }
2795 EXPORT_SYMBOL(write_dirty_buffer);
2796
2797
2798
2799
2800
2801
2802 int __sync_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags)
2803 {
2804 int ret = 0;
2805
2806 WARN_ON(atomic_read(&bh->b_count) < 1);
2807 lock_buffer(bh);
2808 if (test_clear_buffer_dirty(bh)) {
2809
2810
2811
2812
2813 if (!buffer_mapped(bh)) {
2814 unlock_buffer(bh);
2815 return -EIO;
2816 }
2817
2818 get_bh(bh);
2819 bh->b_end_io = end_buffer_write_sync;
2820 ret = submit_bh(REQ_OP_WRITE | op_flags, bh);
2821 wait_on_buffer(bh);
2822 if (!ret && !buffer_uptodate(bh))
2823 ret = -EIO;
2824 } else {
2825 unlock_buffer(bh);
2826 }
2827 return ret;
2828 }
2829 EXPORT_SYMBOL(__sync_dirty_buffer);
2830
2831 int sync_dirty_buffer(struct buffer_head *bh)
2832 {
2833 return __sync_dirty_buffer(bh, REQ_SYNC);
2834 }
2835 EXPORT_SYMBOL(sync_dirty_buffer);
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857 static inline int buffer_busy(struct buffer_head *bh)
2858 {
2859 return atomic_read(&bh->b_count) |
2860 (bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock)));
2861 }
2862
2863 static bool
2864 drop_buffers(struct folio *folio, struct buffer_head **buffers_to_free)
2865 {
2866 struct buffer_head *head = folio_buffers(folio);
2867 struct buffer_head *bh;
2868
2869 bh = head;
2870 do {
2871 if (buffer_busy(bh))
2872 goto failed;
2873 bh = bh->b_this_page;
2874 } while (bh != head);
2875
2876 do {
2877 struct buffer_head *next = bh->b_this_page;
2878
2879 if (bh->b_assoc_map)
2880 __remove_assoc_queue(bh);
2881 bh = next;
2882 } while (bh != head);
2883 *buffers_to_free = head;
2884 folio_detach_private(folio);
2885 return true;
2886 failed:
2887 return false;
2888 }
2889
2890 bool try_to_free_buffers(struct folio *folio)
2891 {
2892 struct address_space * const mapping = folio->mapping;
2893 struct buffer_head *buffers_to_free = NULL;
2894 bool ret = 0;
2895
2896 BUG_ON(!folio_test_locked(folio));
2897 if (folio_test_writeback(folio))
2898 return false;
2899
2900 if (mapping == NULL) {
2901 ret = drop_buffers(folio, &buffers_to_free);
2902 goto out;
2903 }
2904
2905 spin_lock(&mapping->private_lock);
2906 ret = drop_buffers(folio, &buffers_to_free);
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922 if (ret)
2923 folio_cancel_dirty(folio);
2924 spin_unlock(&mapping->private_lock);
2925 out:
2926 if (buffers_to_free) {
2927 struct buffer_head *bh = buffers_to_free;
2928
2929 do {
2930 struct buffer_head *next = bh->b_this_page;
2931 free_buffer_head(bh);
2932 bh = next;
2933 } while (bh != buffers_to_free);
2934 }
2935 return ret;
2936 }
2937 EXPORT_SYMBOL(try_to_free_buffers);
2938
2939
2940
2941
2942 static struct kmem_cache *bh_cachep __read_mostly;
2943
2944
2945
2946
2947
2948 static unsigned long max_buffer_heads;
2949
2950 int buffer_heads_over_limit;
2951
2952 struct bh_accounting {
2953 int nr;
2954 int ratelimit;
2955 };
2956
2957 static DEFINE_PER_CPU(struct bh_accounting, bh_accounting) = {0, 0};
2958
2959 static void recalc_bh_state(void)
2960 {
2961 int i;
2962 int tot = 0;
2963
2964 if (__this_cpu_inc_return(bh_accounting.ratelimit) - 1 < 4096)
2965 return;
2966 __this_cpu_write(bh_accounting.ratelimit, 0);
2967 for_each_online_cpu(i)
2968 tot += per_cpu(bh_accounting, i).nr;
2969 buffer_heads_over_limit = (tot > max_buffer_heads);
2970 }
2971
2972 struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
2973 {
2974 struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
2975 if (ret) {
2976 INIT_LIST_HEAD(&ret->b_assoc_buffers);
2977 spin_lock_init(&ret->b_uptodate_lock);
2978 preempt_disable();
2979 __this_cpu_inc(bh_accounting.nr);
2980 recalc_bh_state();
2981 preempt_enable();
2982 }
2983 return ret;
2984 }
2985 EXPORT_SYMBOL(alloc_buffer_head);
2986
2987 void free_buffer_head(struct buffer_head *bh)
2988 {
2989 BUG_ON(!list_empty(&bh->b_assoc_buffers));
2990 kmem_cache_free(bh_cachep, bh);
2991 preempt_disable();
2992 __this_cpu_dec(bh_accounting.nr);
2993 recalc_bh_state();
2994 preempt_enable();
2995 }
2996 EXPORT_SYMBOL(free_buffer_head);
2997
2998 static int buffer_exit_cpu_dead(unsigned int cpu)
2999 {
3000 int i;
3001 struct bh_lru *b = &per_cpu(bh_lrus, cpu);
3002
3003 for (i = 0; i < BH_LRU_SIZE; i++) {
3004 brelse(b->bhs[i]);
3005 b->bhs[i] = NULL;
3006 }
3007 this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr);
3008 per_cpu(bh_accounting, cpu).nr = 0;
3009 return 0;
3010 }
3011
3012
3013
3014
3015
3016
3017
3018
3019 int bh_uptodate_or_lock(struct buffer_head *bh)
3020 {
3021 if (!buffer_uptodate(bh)) {
3022 lock_buffer(bh);
3023 if (!buffer_uptodate(bh))
3024 return 0;
3025 unlock_buffer(bh);
3026 }
3027 return 1;
3028 }
3029 EXPORT_SYMBOL(bh_uptodate_or_lock);
3030
3031
3032
3033
3034
3035
3036
3037 int bh_submit_read(struct buffer_head *bh)
3038 {
3039 BUG_ON(!buffer_locked(bh));
3040
3041 if (buffer_uptodate(bh)) {
3042 unlock_buffer(bh);
3043 return 0;
3044 }
3045
3046 get_bh(bh);
3047 bh->b_end_io = end_buffer_read_sync;
3048 submit_bh(REQ_OP_READ, bh);
3049 wait_on_buffer(bh);
3050 if (buffer_uptodate(bh))
3051 return 0;
3052 return -EIO;
3053 }
3054 EXPORT_SYMBOL(bh_submit_read);
3055
3056 void __init buffer_init(void)
3057 {
3058 unsigned long nrpages;
3059 int ret;
3060
3061 bh_cachep = kmem_cache_create("buffer_head",
3062 sizeof(struct buffer_head), 0,
3063 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
3064 SLAB_MEM_SPREAD),
3065 NULL);
3066
3067
3068
3069
3070 nrpages = (nr_free_buffer_pages() * 10) / 100;
3071 max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head));
3072 ret = cpuhp_setup_state_nocalls(CPUHP_FS_BUFF_DEAD, "fs/buffer:dead",
3073 NULL, buffer_exit_cpu_dead);
3074 WARN_ON(ret < 0);
3075 }