0001
0002
0003
0004
0005
0006
0007
0008
0009
0010 #include <linux/fs.h>
0011 #include <linux/time.h>
0012 #include <linux/highuid.h>
0013 #include <linux/pagemap.h>
0014 #include <linux/quotaops.h>
0015 #include <linux/string.h>
0016 #include <linux/buffer_head.h>
0017 #include <linux/writeback.h>
0018 #include <linux/pagevec.h>
0019 #include <linux/mpage.h>
0020 #include <linux/namei.h>
0021 #include <linux/uio.h>
0022 #include <linux/bio.h>
0023 #include <linux/workqueue.h>
0024 #include <linux/kernel.h>
0025 #include <linux/slab.h>
0026 #include <linux/mm.h>
0027 #include <linux/sched/mm.h>
0028
0029 #include "ext4_jbd2.h"
0030 #include "xattr.h"
0031 #include "acl.h"
0032
0033 static struct kmem_cache *io_end_cachep;
0034 static struct kmem_cache *io_end_vec_cachep;
0035
0036 int __init ext4_init_pageio(void)
0037 {
0038 io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT);
0039 if (io_end_cachep == NULL)
0040 return -ENOMEM;
0041
0042 io_end_vec_cachep = KMEM_CACHE(ext4_io_end_vec, 0);
0043 if (io_end_vec_cachep == NULL) {
0044 kmem_cache_destroy(io_end_cachep);
0045 return -ENOMEM;
0046 }
0047 return 0;
0048 }
0049
0050 void ext4_exit_pageio(void)
0051 {
0052 kmem_cache_destroy(io_end_cachep);
0053 kmem_cache_destroy(io_end_vec_cachep);
0054 }
0055
0056 struct ext4_io_end_vec *ext4_alloc_io_end_vec(ext4_io_end_t *io_end)
0057 {
0058 struct ext4_io_end_vec *io_end_vec;
0059
0060 io_end_vec = kmem_cache_zalloc(io_end_vec_cachep, GFP_NOFS);
0061 if (!io_end_vec)
0062 return ERR_PTR(-ENOMEM);
0063 INIT_LIST_HEAD(&io_end_vec->list);
0064 list_add_tail(&io_end_vec->list, &io_end->list_vec);
0065 return io_end_vec;
0066 }
0067
0068 static void ext4_free_io_end_vec(ext4_io_end_t *io_end)
0069 {
0070 struct ext4_io_end_vec *io_end_vec, *tmp;
0071
0072 if (list_empty(&io_end->list_vec))
0073 return;
0074 list_for_each_entry_safe(io_end_vec, tmp, &io_end->list_vec, list) {
0075 list_del(&io_end_vec->list);
0076 kmem_cache_free(io_end_vec_cachep, io_end_vec);
0077 }
0078 }
0079
0080 struct ext4_io_end_vec *ext4_last_io_end_vec(ext4_io_end_t *io_end)
0081 {
0082 BUG_ON(list_empty(&io_end->list_vec));
0083 return list_last_entry(&io_end->list_vec, struct ext4_io_end_vec, list);
0084 }
0085
0086
0087
0088
0089
0090
0091
0092
0093 static void buffer_io_error(struct buffer_head *bh)
0094 {
0095 printk_ratelimited(KERN_ERR "Buffer I/O error on device %pg, logical block %llu\n",
0096 bh->b_bdev,
0097 (unsigned long long)bh->b_blocknr);
0098 }
0099
0100 static void ext4_finish_bio(struct bio *bio)
0101 {
0102 struct bio_vec *bvec;
0103 struct bvec_iter_all iter_all;
0104
0105 bio_for_each_segment_all(bvec, bio, iter_all) {
0106 struct page *page = bvec->bv_page;
0107 struct page *bounce_page = NULL;
0108 struct buffer_head *bh, *head;
0109 unsigned bio_start = bvec->bv_offset;
0110 unsigned bio_end = bio_start + bvec->bv_len;
0111 unsigned under_io = 0;
0112 unsigned long flags;
0113
0114 if (fscrypt_is_bounce_page(page)) {
0115 bounce_page = page;
0116 page = fscrypt_pagecache_page(bounce_page);
0117 }
0118
0119 if (bio->bi_status) {
0120 SetPageError(page);
0121 mapping_set_error(page->mapping, -EIO);
0122 }
0123 bh = head = page_buffers(page);
0124
0125
0126
0127
0128 spin_lock_irqsave(&head->b_uptodate_lock, flags);
0129 do {
0130 if (bh_offset(bh) < bio_start ||
0131 bh_offset(bh) + bh->b_size > bio_end) {
0132 if (buffer_async_write(bh))
0133 under_io++;
0134 continue;
0135 }
0136 clear_buffer_async_write(bh);
0137 if (bio->bi_status) {
0138 set_buffer_write_io_error(bh);
0139 buffer_io_error(bh);
0140 }
0141 } while ((bh = bh->b_this_page) != head);
0142 spin_unlock_irqrestore(&head->b_uptodate_lock, flags);
0143 if (!under_io) {
0144 fscrypt_free_bounce_page(bounce_page);
0145 end_page_writeback(page);
0146 }
0147 }
0148 }
0149
0150 static void ext4_release_io_end(ext4_io_end_t *io_end)
0151 {
0152 struct bio *bio, *next_bio;
0153
0154 BUG_ON(!list_empty(&io_end->list));
0155 BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN);
0156 WARN_ON(io_end->handle);
0157
0158 for (bio = io_end->bio; bio; bio = next_bio) {
0159 next_bio = bio->bi_private;
0160 ext4_finish_bio(bio);
0161 bio_put(bio);
0162 }
0163 ext4_free_io_end_vec(io_end);
0164 kmem_cache_free(io_end_cachep, io_end);
0165 }
0166
0167
0168
0169
0170
0171
0172
0173
0174
0175 static int ext4_end_io_end(ext4_io_end_t *io_end)
0176 {
0177 struct inode *inode = io_end->inode;
0178 handle_t *handle = io_end->handle;
0179 int ret = 0;
0180
0181 ext4_debug("ext4_end_io_nolock: io_end 0x%p from inode %lu,list->next 0x%p,"
0182 "list->prev 0x%p\n",
0183 io_end, inode->i_ino, io_end->list.next, io_end->list.prev);
0184
0185 io_end->handle = NULL;
0186 ret = ext4_convert_unwritten_io_end_vec(handle, io_end);
0187 if (ret < 0 && !ext4_forced_shutdown(EXT4_SB(inode->i_sb))) {
0188 ext4_msg(inode->i_sb, KERN_EMERG,
0189 "failed to convert unwritten extents to written "
0190 "extents -- potential data loss! "
0191 "(inode %lu, error %d)", inode->i_ino, ret);
0192 }
0193 ext4_clear_io_unwritten_flag(io_end);
0194 ext4_release_io_end(io_end);
0195 return ret;
0196 }
0197
0198 static void dump_completed_IO(struct inode *inode, struct list_head *head)
0199 {
0200 #ifdef EXT4FS_DEBUG
0201 struct list_head *cur, *before, *after;
0202 ext4_io_end_t *io_end, *io_end0, *io_end1;
0203
0204 if (list_empty(head))
0205 return;
0206
0207 ext4_debug("Dump inode %lu completed io list\n", inode->i_ino);
0208 list_for_each_entry(io_end, head, list) {
0209 cur = &io_end->list;
0210 before = cur->prev;
0211 io_end0 = container_of(before, ext4_io_end_t, list);
0212 after = cur->next;
0213 io_end1 = container_of(after, ext4_io_end_t, list);
0214
0215 ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
0216 io_end, inode->i_ino, io_end0, io_end1);
0217 }
0218 #endif
0219 }
0220
0221
0222 static void ext4_add_complete_io(ext4_io_end_t *io_end)
0223 {
0224 struct ext4_inode_info *ei = EXT4_I(io_end->inode);
0225 struct ext4_sb_info *sbi = EXT4_SB(io_end->inode->i_sb);
0226 struct workqueue_struct *wq;
0227 unsigned long flags;
0228
0229
0230 WARN_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN));
0231 WARN_ON(!io_end->handle && sbi->s_journal);
0232 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
0233 wq = sbi->rsv_conversion_wq;
0234 if (list_empty(&ei->i_rsv_conversion_list))
0235 queue_work(wq, &ei->i_rsv_conversion_work);
0236 list_add_tail(&io_end->list, &ei->i_rsv_conversion_list);
0237 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
0238 }
0239
0240 static int ext4_do_flush_completed_IO(struct inode *inode,
0241 struct list_head *head)
0242 {
0243 ext4_io_end_t *io_end;
0244 struct list_head unwritten;
0245 unsigned long flags;
0246 struct ext4_inode_info *ei = EXT4_I(inode);
0247 int err, ret = 0;
0248
0249 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
0250 dump_completed_IO(inode, head);
0251 list_replace_init(head, &unwritten);
0252 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
0253
0254 while (!list_empty(&unwritten)) {
0255 io_end = list_entry(unwritten.next, ext4_io_end_t, list);
0256 BUG_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN));
0257 list_del_init(&io_end->list);
0258
0259 err = ext4_end_io_end(io_end);
0260 if (unlikely(!ret && err))
0261 ret = err;
0262 }
0263 return ret;
0264 }
0265
0266
0267
0268
0269 void ext4_end_io_rsv_work(struct work_struct *work)
0270 {
0271 struct ext4_inode_info *ei = container_of(work, struct ext4_inode_info,
0272 i_rsv_conversion_work);
0273 ext4_do_flush_completed_IO(&ei->vfs_inode, &ei->i_rsv_conversion_list);
0274 }
0275
0276 ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
0277 {
0278 ext4_io_end_t *io_end = kmem_cache_zalloc(io_end_cachep, flags);
0279
0280 if (io_end) {
0281 io_end->inode = inode;
0282 INIT_LIST_HEAD(&io_end->list);
0283 INIT_LIST_HEAD(&io_end->list_vec);
0284 refcount_set(&io_end->count, 1);
0285 }
0286 return io_end;
0287 }
0288
0289 void ext4_put_io_end_defer(ext4_io_end_t *io_end)
0290 {
0291 if (refcount_dec_and_test(&io_end->count)) {
0292 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) ||
0293 list_empty(&io_end->list_vec)) {
0294 ext4_release_io_end(io_end);
0295 return;
0296 }
0297 ext4_add_complete_io(io_end);
0298 }
0299 }
0300
0301 int ext4_put_io_end(ext4_io_end_t *io_end)
0302 {
0303 int err = 0;
0304
0305 if (refcount_dec_and_test(&io_end->count)) {
0306 if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
0307 err = ext4_convert_unwritten_io_end_vec(io_end->handle,
0308 io_end);
0309 io_end->handle = NULL;
0310 ext4_clear_io_unwritten_flag(io_end);
0311 }
0312 ext4_release_io_end(io_end);
0313 }
0314 return err;
0315 }
0316
0317 ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end)
0318 {
0319 refcount_inc(&io_end->count);
0320 return io_end;
0321 }
0322
0323
0324 static void ext4_end_bio(struct bio *bio)
0325 {
0326 ext4_io_end_t *io_end = bio->bi_private;
0327 sector_t bi_sector = bio->bi_iter.bi_sector;
0328
0329 if (WARN_ONCE(!io_end, "io_end is NULL: %pg: sector %Lu len %u err %d\n",
0330 bio->bi_bdev,
0331 (long long) bio->bi_iter.bi_sector,
0332 (unsigned) bio_sectors(bio),
0333 bio->bi_status)) {
0334 ext4_finish_bio(bio);
0335 bio_put(bio);
0336 return;
0337 }
0338 bio->bi_end_io = NULL;
0339
0340 if (bio->bi_status) {
0341 struct inode *inode = io_end->inode;
0342
0343 ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu "
0344 "starting block %llu)",
0345 bio->bi_status, inode->i_ino,
0346 (unsigned long long)
0347 bi_sector >> (inode->i_blkbits - 9));
0348 mapping_set_error(inode->i_mapping,
0349 blk_status_to_errno(bio->bi_status));
0350 }
0351
0352 if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
0353
0354
0355
0356
0357
0358 bio->bi_private = xchg(&io_end->bio, bio);
0359 ext4_put_io_end_defer(io_end);
0360 } else {
0361
0362
0363
0364
0365 ext4_put_io_end_defer(io_end);
0366 ext4_finish_bio(bio);
0367 bio_put(bio);
0368 }
0369 }
0370
0371 void ext4_io_submit(struct ext4_io_submit *io)
0372 {
0373 struct bio *bio = io->io_bio;
0374
0375 if (bio) {
0376 if (io->io_wbc->sync_mode == WB_SYNC_ALL)
0377 io->io_bio->bi_opf |= REQ_SYNC;
0378 submit_bio(io->io_bio);
0379 }
0380 io->io_bio = NULL;
0381 }
0382
0383 void ext4_io_submit_init(struct ext4_io_submit *io,
0384 struct writeback_control *wbc)
0385 {
0386 io->io_wbc = wbc;
0387 io->io_bio = NULL;
0388 io->io_end = NULL;
0389 }
0390
0391 static void io_submit_init_bio(struct ext4_io_submit *io,
0392 struct buffer_head *bh)
0393 {
0394 struct bio *bio;
0395
0396
0397
0398
0399
0400 bio = bio_alloc(bh->b_bdev, BIO_MAX_VECS, REQ_OP_WRITE, GFP_NOIO);
0401 fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO);
0402 bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
0403 bio->bi_end_io = ext4_end_bio;
0404 bio->bi_private = ext4_get_io_end(io->io_end);
0405 io->io_bio = bio;
0406 io->io_next_block = bh->b_blocknr;
0407 wbc_init_bio(io->io_wbc, bio);
0408 }
0409
0410 static void io_submit_add_bh(struct ext4_io_submit *io,
0411 struct inode *inode,
0412 struct page *page,
0413 struct buffer_head *bh)
0414 {
0415 int ret;
0416
0417 if (io->io_bio && (bh->b_blocknr != io->io_next_block ||
0418 !fscrypt_mergeable_bio_bh(io->io_bio, bh))) {
0419 submit_and_retry:
0420 ext4_io_submit(io);
0421 }
0422 if (io->io_bio == NULL)
0423 io_submit_init_bio(io, bh);
0424 ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh));
0425 if (ret != bh->b_size)
0426 goto submit_and_retry;
0427 wbc_account_cgroup_owner(io->io_wbc, page, bh->b_size);
0428 io->io_next_block++;
0429 }
0430
0431 int ext4_bio_write_page(struct ext4_io_submit *io,
0432 struct page *page,
0433 int len,
0434 bool keep_towrite)
0435 {
0436 struct page *bounce_page = NULL;
0437 struct inode *inode = page->mapping->host;
0438 unsigned block_start;
0439 struct buffer_head *bh, *head;
0440 int ret = 0;
0441 int nr_submitted = 0;
0442 int nr_to_submit = 0;
0443 struct writeback_control *wbc = io->io_wbc;
0444
0445 BUG_ON(!PageLocked(page));
0446 BUG_ON(PageWriteback(page));
0447
0448 if (keep_towrite)
0449 set_page_writeback_keepwrite(page);
0450 else
0451 set_page_writeback(page);
0452 ClearPageError(page);
0453
0454
0455
0456
0457
0458
0459
0460
0461
0462
0463 if (len < PAGE_SIZE)
0464 zero_user_segment(page, len, PAGE_SIZE);
0465
0466
0467
0468
0469
0470
0471
0472 bh = head = page_buffers(page);
0473 do {
0474 block_start = bh_offset(bh);
0475 if (block_start >= len) {
0476 clear_buffer_dirty(bh);
0477 set_buffer_uptodate(bh);
0478 continue;
0479 }
0480 if (!buffer_dirty(bh) || buffer_delay(bh) ||
0481 !buffer_mapped(bh) || buffer_unwritten(bh)) {
0482
0483 if (!buffer_mapped(bh))
0484 clear_buffer_dirty(bh);
0485 if (io->io_bio)
0486 ext4_io_submit(io);
0487 continue;
0488 }
0489 if (buffer_new(bh))
0490 clear_buffer_new(bh);
0491 set_buffer_async_write(bh);
0492 nr_to_submit++;
0493 } while ((bh = bh->b_this_page) != head);
0494
0495 bh = head = page_buffers(page);
0496
0497
0498
0499
0500
0501
0502
0503
0504 if (fscrypt_inode_uses_fs_layer_crypto(inode) && nr_to_submit) {
0505 gfp_t gfp_flags = GFP_NOFS;
0506 unsigned int enc_bytes = round_up(len, i_blocksize(inode));
0507
0508
0509
0510
0511
0512
0513 if (io->io_bio)
0514 gfp_flags = GFP_NOWAIT | __GFP_NOWARN;
0515 retry_encrypt:
0516 bounce_page = fscrypt_encrypt_pagecache_blocks(page, enc_bytes,
0517 0, gfp_flags);
0518 if (IS_ERR(bounce_page)) {
0519 ret = PTR_ERR(bounce_page);
0520 if (ret == -ENOMEM &&
0521 (io->io_bio || wbc->sync_mode == WB_SYNC_ALL)) {
0522 gfp_t new_gfp_flags = GFP_NOFS;
0523 if (io->io_bio)
0524 ext4_io_submit(io);
0525 else
0526 new_gfp_flags |= __GFP_NOFAIL;
0527 memalloc_retry_wait(gfp_flags);
0528 gfp_flags = new_gfp_flags;
0529 goto retry_encrypt;
0530 }
0531
0532 printk_ratelimited(KERN_ERR "%s: ret = %d\n", __func__, ret);
0533 redirty_page_for_writepage(wbc, page);
0534 do {
0535 clear_buffer_async_write(bh);
0536 bh = bh->b_this_page;
0537 } while (bh != head);
0538 goto unlock;
0539 }
0540 }
0541
0542
0543 do {
0544 if (!buffer_async_write(bh))
0545 continue;
0546 io_submit_add_bh(io, inode,
0547 bounce_page ? bounce_page : page, bh);
0548 nr_submitted++;
0549 clear_buffer_dirty(bh);
0550 } while ((bh = bh->b_this_page) != head);
0551
0552 unlock:
0553 unlock_page(page);
0554
0555 if (!nr_submitted)
0556 end_page_writeback(page);
0557 return ret;
0558 }