0001
0002
0003
0004
0005
0006 #include <crypto/hash.h>
0007 #include <linux/kernel.h>
0008 #include <linux/bio.h>
0009 #include <linux/blk-cgroup.h>
0010 #include <linux/file.h>
0011 #include <linux/fs.h>
0012 #include <linux/pagemap.h>
0013 #include <linux/highmem.h>
0014 #include <linux/time.h>
0015 #include <linux/init.h>
0016 #include <linux/string.h>
0017 #include <linux/backing-dev.h>
0018 #include <linux/writeback.h>
0019 #include <linux/compat.h>
0020 #include <linux/xattr.h>
0021 #include <linux/posix_acl.h>
0022 #include <linux/falloc.h>
0023 #include <linux/slab.h>
0024 #include <linux/ratelimit.h>
0025 #include <linux/btrfs.h>
0026 #include <linux/blkdev.h>
0027 #include <linux/posix_acl_xattr.h>
0028 #include <linux/uio.h>
0029 #include <linux/magic.h>
0030 #include <linux/iversion.h>
0031 #include <linux/swap.h>
0032 #include <linux/migrate.h>
0033 #include <linux/sched/mm.h>
0034 #include <linux/iomap.h>
0035 #include <asm/unaligned.h>
0036 #include <linux/fsverity.h>
0037 #include "misc.h"
0038 #include "ctree.h"
0039 #include "disk-io.h"
0040 #include "transaction.h"
0041 #include "btrfs_inode.h"
0042 #include "print-tree.h"
0043 #include "ordered-data.h"
0044 #include "xattr.h"
0045 #include "tree-log.h"
0046 #include "volumes.h"
0047 #include "compression.h"
0048 #include "locking.h"
0049 #include "free-space-cache.h"
0050 #include "props.h"
0051 #include "qgroup.h"
0052 #include "delalloc-space.h"
0053 #include "block-group.h"
0054 #include "space-info.h"
0055 #include "zoned.h"
0056 #include "subpage.h"
0057 #include "inode-item.h"
0058
0059 struct btrfs_iget_args {
0060 u64 ino;
0061 struct btrfs_root *root;
0062 };
0063
0064 struct btrfs_dio_data {
0065 ssize_t submitted;
0066 struct extent_changeset *data_reserved;
0067 bool data_space_reserved;
0068 bool nocow_done;
0069 };
0070
0071 struct btrfs_dio_private {
0072 struct inode *inode;
0073
0074
0075
0076
0077
0078 u64 file_offset;
0079
0080 u32 bytes;
0081
0082
0083
0084
0085
0086 refcount_t refs;
0087
0088
0089 u8 *csums;
0090
0091
0092 struct bio bio;
0093 };
0094
0095 static struct bio_set btrfs_dio_bioset;
0096
0097 struct btrfs_rename_ctx {
0098
0099 u64 index;
0100 };
0101
0102 static const struct inode_operations btrfs_dir_inode_operations;
0103 static const struct inode_operations btrfs_symlink_inode_operations;
0104 static const struct inode_operations btrfs_special_inode_operations;
0105 static const struct inode_operations btrfs_file_inode_operations;
0106 static const struct address_space_operations btrfs_aops;
0107 static const struct file_operations btrfs_dir_file_operations;
0108
0109 static struct kmem_cache *btrfs_inode_cachep;
0110 struct kmem_cache *btrfs_trans_handle_cachep;
0111 struct kmem_cache *btrfs_path_cachep;
0112 struct kmem_cache *btrfs_free_space_cachep;
0113 struct kmem_cache *btrfs_free_space_bitmap_cachep;
0114
0115 static int btrfs_setsize(struct inode *inode, struct iattr *attr);
0116 static int btrfs_truncate(struct inode *inode, bool skip_writeback);
0117 static noinline int cow_file_range(struct btrfs_inode *inode,
0118 struct page *locked_page,
0119 u64 start, u64 end, int *page_started,
0120 unsigned long *nr_written, int unlock,
0121 u64 *done_offset);
0122 static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
0123 u64 len, u64 orig_start, u64 block_start,
0124 u64 block_len, u64 orig_block_len,
0125 u64 ram_bytes, int compress_type,
0126 int type);
0127
0128
0129
0130
0131
0132
0133
0134
0135
0136
0137
0138 int btrfs_inode_lock(struct inode *inode, unsigned int ilock_flags)
0139 {
0140 if (ilock_flags & BTRFS_ILOCK_SHARED) {
0141 if (ilock_flags & BTRFS_ILOCK_TRY) {
0142 if (!inode_trylock_shared(inode))
0143 return -EAGAIN;
0144 else
0145 return 0;
0146 }
0147 inode_lock_shared(inode);
0148 } else {
0149 if (ilock_flags & BTRFS_ILOCK_TRY) {
0150 if (!inode_trylock(inode))
0151 return -EAGAIN;
0152 else
0153 return 0;
0154 }
0155 inode_lock(inode);
0156 }
0157 if (ilock_flags & BTRFS_ILOCK_MMAP)
0158 down_write(&BTRFS_I(inode)->i_mmap_lock);
0159 return 0;
0160 }
0161
0162
0163
0164
0165
0166
0167
0168 void btrfs_inode_unlock(struct inode *inode, unsigned int ilock_flags)
0169 {
0170 if (ilock_flags & BTRFS_ILOCK_MMAP)
0171 up_write(&BTRFS_I(inode)->i_mmap_lock);
0172 if (ilock_flags & BTRFS_ILOCK_SHARED)
0173 inode_unlock_shared(inode);
0174 else
0175 inode_unlock(inode);
0176 }
0177
0178
0179
0180
0181
0182
0183
0184
0185
0186
0187
0188 static inline void btrfs_cleanup_ordered_extents(struct btrfs_inode *inode,
0189 struct page *locked_page,
0190 u64 offset, u64 bytes)
0191 {
0192 unsigned long index = offset >> PAGE_SHIFT;
0193 unsigned long end_index = (offset + bytes - 1) >> PAGE_SHIFT;
0194 u64 page_start, page_end;
0195 struct page *page;
0196
0197 if (locked_page) {
0198 page_start = page_offset(locked_page);
0199 page_end = page_start + PAGE_SIZE - 1;
0200 }
0201
0202 while (index <= end_index) {
0203
0204
0205
0206
0207
0208
0209
0210
0211
0212
0213
0214 if (locked_page && index == (page_start >> PAGE_SHIFT)) {
0215 index++;
0216 continue;
0217 }
0218 page = find_get_page(inode->vfs_inode.i_mapping, index);
0219 index++;
0220 if (!page)
0221 continue;
0222
0223
0224
0225
0226
0227
0228 btrfs_page_clamp_clear_ordered(inode->root->fs_info, page,
0229 offset, bytes);
0230 put_page(page);
0231 }
0232
0233 if (locked_page) {
0234
0235 if (bytes + offset <= page_start + PAGE_SIZE)
0236 return;
0237
0238
0239
0240
0241
0242
0243 if (page_start >= offset && page_end <= (offset + bytes - 1)) {
0244 bytes = offset + bytes - page_offset(locked_page) - PAGE_SIZE;
0245 offset = page_offset(locked_page) + PAGE_SIZE;
0246 }
0247 }
0248
0249 return btrfs_mark_ordered_io_finished(inode, NULL, offset, bytes, false);
0250 }
0251
0252 static int btrfs_dirty_inode(struct inode *inode);
0253
0254 static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
0255 struct btrfs_new_inode_args *args)
0256 {
0257 int err;
0258
0259 if (args->default_acl) {
0260 err = __btrfs_set_acl(trans, args->inode, args->default_acl,
0261 ACL_TYPE_DEFAULT);
0262 if (err)
0263 return err;
0264 }
0265 if (args->acl) {
0266 err = __btrfs_set_acl(trans, args->inode, args->acl, ACL_TYPE_ACCESS);
0267 if (err)
0268 return err;
0269 }
0270 if (!args->default_acl && !args->acl)
0271 cache_no_acl(args->inode);
0272 return btrfs_xattr_security_init(trans, args->inode, args->dir,
0273 &args->dentry->d_name);
0274 }
0275
0276
0277
0278
0279
0280
0281 static int insert_inline_extent(struct btrfs_trans_handle *trans,
0282 struct btrfs_path *path,
0283 struct btrfs_inode *inode, bool extent_inserted,
0284 size_t size, size_t compressed_size,
0285 int compress_type,
0286 struct page **compressed_pages,
0287 bool update_i_size)
0288 {
0289 struct btrfs_root *root = inode->root;
0290 struct extent_buffer *leaf;
0291 struct page *page = NULL;
0292 char *kaddr;
0293 unsigned long ptr;
0294 struct btrfs_file_extent_item *ei;
0295 int ret;
0296 size_t cur_size = size;
0297 u64 i_size;
0298
0299 ASSERT((compressed_size > 0 && compressed_pages) ||
0300 (compressed_size == 0 && !compressed_pages));
0301
0302 if (compressed_size && compressed_pages)
0303 cur_size = compressed_size;
0304
0305 if (!extent_inserted) {
0306 struct btrfs_key key;
0307 size_t datasize;
0308
0309 key.objectid = btrfs_ino(inode);
0310 key.offset = 0;
0311 key.type = BTRFS_EXTENT_DATA_KEY;
0312
0313 datasize = btrfs_file_extent_calc_inline_size(cur_size);
0314 ret = btrfs_insert_empty_item(trans, root, path, &key,
0315 datasize);
0316 if (ret)
0317 goto fail;
0318 }
0319 leaf = path->nodes[0];
0320 ei = btrfs_item_ptr(leaf, path->slots[0],
0321 struct btrfs_file_extent_item);
0322 btrfs_set_file_extent_generation(leaf, ei, trans->transid);
0323 btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE);
0324 btrfs_set_file_extent_encryption(leaf, ei, 0);
0325 btrfs_set_file_extent_other_encoding(leaf, ei, 0);
0326 btrfs_set_file_extent_ram_bytes(leaf, ei, size);
0327 ptr = btrfs_file_extent_inline_start(ei);
0328
0329 if (compress_type != BTRFS_COMPRESS_NONE) {
0330 struct page *cpage;
0331 int i = 0;
0332 while (compressed_size > 0) {
0333 cpage = compressed_pages[i];
0334 cur_size = min_t(unsigned long, compressed_size,
0335 PAGE_SIZE);
0336
0337 kaddr = kmap_local_page(cpage);
0338 write_extent_buffer(leaf, kaddr, ptr, cur_size);
0339 kunmap_local(kaddr);
0340
0341 i++;
0342 ptr += cur_size;
0343 compressed_size -= cur_size;
0344 }
0345 btrfs_set_file_extent_compression(leaf, ei,
0346 compress_type);
0347 } else {
0348 page = find_get_page(inode->vfs_inode.i_mapping, 0);
0349 btrfs_set_file_extent_compression(leaf, ei, 0);
0350 kaddr = kmap_local_page(page);
0351 write_extent_buffer(leaf, kaddr, ptr, size);
0352 kunmap_local(kaddr);
0353 put_page(page);
0354 }
0355 btrfs_mark_buffer_dirty(leaf);
0356 btrfs_release_path(path);
0357
0358
0359
0360
0361
0362 ret = btrfs_inode_set_file_extent_range(inode, 0,
0363 ALIGN(size, root->fs_info->sectorsize));
0364 if (ret)
0365 goto fail;
0366
0367
0368
0369
0370
0371
0372
0373
0374 i_size = i_size_read(&inode->vfs_inode);
0375 if (update_i_size && size > i_size) {
0376 i_size_write(&inode->vfs_inode, size);
0377 i_size = size;
0378 }
0379 inode->disk_i_size = i_size;
0380
0381 fail:
0382 return ret;
0383 }
0384
0385
0386
0387
0388
0389
0390
0391 static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 size,
0392 size_t compressed_size,
0393 int compress_type,
0394 struct page **compressed_pages,
0395 bool update_i_size)
0396 {
0397 struct btrfs_drop_extents_args drop_args = { 0 };
0398 struct btrfs_root *root = inode->root;
0399 struct btrfs_fs_info *fs_info = root->fs_info;
0400 struct btrfs_trans_handle *trans;
0401 u64 data_len = (compressed_size ?: size);
0402 int ret;
0403 struct btrfs_path *path;
0404
0405
0406
0407
0408
0409
0410
0411 if (size < i_size_read(&inode->vfs_inode) ||
0412 size > fs_info->sectorsize ||
0413 data_len > BTRFS_MAX_INLINE_DATA_SIZE(fs_info) ||
0414 data_len > fs_info->max_inline)
0415 return 1;
0416
0417 path = btrfs_alloc_path();
0418 if (!path)
0419 return -ENOMEM;
0420
0421 trans = btrfs_join_transaction(root);
0422 if (IS_ERR(trans)) {
0423 btrfs_free_path(path);
0424 return PTR_ERR(trans);
0425 }
0426 trans->block_rsv = &inode->block_rsv;
0427
0428 drop_args.path = path;
0429 drop_args.start = 0;
0430 drop_args.end = fs_info->sectorsize;
0431 drop_args.drop_cache = true;
0432 drop_args.replace_extent = true;
0433 drop_args.extent_item_size = btrfs_file_extent_calc_inline_size(data_len);
0434 ret = btrfs_drop_extents(trans, root, inode, &drop_args);
0435 if (ret) {
0436 btrfs_abort_transaction(trans, ret);
0437 goto out;
0438 }
0439
0440 ret = insert_inline_extent(trans, path, inode, drop_args.extent_inserted,
0441 size, compressed_size, compress_type,
0442 compressed_pages, update_i_size);
0443 if (ret && ret != -ENOSPC) {
0444 btrfs_abort_transaction(trans, ret);
0445 goto out;
0446 } else if (ret == -ENOSPC) {
0447 ret = 1;
0448 goto out;
0449 }
0450
0451 btrfs_update_inode_bytes(inode, size, drop_args.bytes_found);
0452 ret = btrfs_update_inode(trans, root, inode);
0453 if (ret && ret != -ENOSPC) {
0454 btrfs_abort_transaction(trans, ret);
0455 goto out;
0456 } else if (ret == -ENOSPC) {
0457 ret = 1;
0458 goto out;
0459 }
0460
0461 btrfs_set_inode_full_sync(inode);
0462 out:
0463
0464
0465
0466
0467
0468
0469 btrfs_qgroup_free_data(inode, NULL, 0, PAGE_SIZE);
0470 btrfs_free_path(path);
0471 btrfs_end_transaction(trans);
0472 return ret;
0473 }
0474
0475 struct async_extent {
0476 u64 start;
0477 u64 ram_size;
0478 u64 compressed_size;
0479 struct page **pages;
0480 unsigned long nr_pages;
0481 int compress_type;
0482 struct list_head list;
0483 };
0484
0485 struct async_chunk {
0486 struct inode *inode;
0487 struct page *locked_page;
0488 u64 start;
0489 u64 end;
0490 blk_opf_t write_flags;
0491 struct list_head extents;
0492 struct cgroup_subsys_state *blkcg_css;
0493 struct btrfs_work work;
0494 struct async_cow *async_cow;
0495 };
0496
0497 struct async_cow {
0498 atomic_t num_chunks;
0499 struct async_chunk chunks[];
0500 };
0501
0502 static noinline int add_async_extent(struct async_chunk *cow,
0503 u64 start, u64 ram_size,
0504 u64 compressed_size,
0505 struct page **pages,
0506 unsigned long nr_pages,
0507 int compress_type)
0508 {
0509 struct async_extent *async_extent;
0510
0511 async_extent = kmalloc(sizeof(*async_extent), GFP_NOFS);
0512 BUG_ON(!async_extent);
0513 async_extent->start = start;
0514 async_extent->ram_size = ram_size;
0515 async_extent->compressed_size = compressed_size;
0516 async_extent->pages = pages;
0517 async_extent->nr_pages = nr_pages;
0518 async_extent->compress_type = compress_type;
0519 list_add_tail(&async_extent->list, &cow->extents);
0520 return 0;
0521 }
0522
0523
0524
0525
0526
0527 static inline int inode_need_compress(struct btrfs_inode *inode, u64 start,
0528 u64 end)
0529 {
0530 struct btrfs_fs_info *fs_info = inode->root->fs_info;
0531
0532 if (!btrfs_inode_can_compress(inode)) {
0533 WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG),
0534 KERN_ERR "BTRFS: unexpected compression for ino %llu\n",
0535 btrfs_ino(inode));
0536 return 0;
0537 }
0538
0539
0540
0541
0542
0543
0544
0545
0546
0547
0548
0549
0550
0551
0552
0553
0554
0555
0556
0557
0558
0559
0560
0561
0562
0563
0564 if (fs_info->sectorsize < PAGE_SIZE) {
0565 if (!PAGE_ALIGNED(start) ||
0566 !PAGE_ALIGNED(end + 1))
0567 return 0;
0568 }
0569
0570
0571 if (btrfs_test_opt(fs_info, FORCE_COMPRESS))
0572 return 1;
0573
0574 if (inode->defrag_compress)
0575 return 1;
0576
0577 if (inode->flags & BTRFS_INODE_NOCOMPRESS)
0578 return 0;
0579 if (btrfs_test_opt(fs_info, COMPRESS) ||
0580 inode->flags & BTRFS_INODE_COMPRESS ||
0581 inode->prop_compress)
0582 return btrfs_compress_heuristic(&inode->vfs_inode, start, end);
0583 return 0;
0584 }
0585
0586 static inline void inode_should_defrag(struct btrfs_inode *inode,
0587 u64 start, u64 end, u64 num_bytes, u32 small_write)
0588 {
0589
0590 if (num_bytes < small_write &&
0591 (start > 0 || end + 1 < inode->disk_i_size))
0592 btrfs_add_inode_defrag(NULL, inode, small_write);
0593 }
0594
0595
0596
0597
0598
0599
0600
0601
0602
0603
0604
0605
0606
0607
0608
0609
0610
0611
0612 static noinline int compress_file_range(struct async_chunk *async_chunk)
0613 {
0614 struct inode *inode = async_chunk->inode;
0615 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
0616 u64 blocksize = fs_info->sectorsize;
0617 u64 start = async_chunk->start;
0618 u64 end = async_chunk->end;
0619 u64 actual_end;
0620 u64 i_size;
0621 int ret = 0;
0622 struct page **pages = NULL;
0623 unsigned long nr_pages;
0624 unsigned long total_compressed = 0;
0625 unsigned long total_in = 0;
0626 int i;
0627 int will_compress;
0628 int compress_type = fs_info->compress_type;
0629 int compressed_extents = 0;
0630 int redirty = 0;
0631
0632 inode_should_defrag(BTRFS_I(inode), start, end, end - start + 1,
0633 SZ_16K);
0634
0635
0636
0637
0638
0639
0640
0641
0642
0643
0644 barrier();
0645 i_size = i_size_read(inode);
0646 barrier();
0647 actual_end = min_t(u64, i_size, end + 1);
0648 again:
0649 will_compress = 0;
0650 nr_pages = (end >> PAGE_SHIFT) - (start >> PAGE_SHIFT) + 1;
0651 nr_pages = min_t(unsigned long, nr_pages,
0652 BTRFS_MAX_COMPRESSED / PAGE_SIZE);
0653
0654
0655
0656
0657
0658
0659
0660
0661
0662
0663
0664 if (actual_end <= start)
0665 goto cleanup_and_bail_uncompressed;
0666
0667 total_compressed = actual_end - start;
0668
0669
0670
0671
0672
0673 if (total_compressed <= blocksize &&
0674 (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
0675 goto cleanup_and_bail_uncompressed;
0676
0677
0678
0679
0680
0681
0682 if (blocksize < PAGE_SIZE) {
0683 if (!PAGE_ALIGNED(start) ||
0684 !PAGE_ALIGNED(round_up(actual_end, blocksize)))
0685 goto cleanup_and_bail_uncompressed;
0686 }
0687
0688 total_compressed = min_t(unsigned long, total_compressed,
0689 BTRFS_MAX_UNCOMPRESSED);
0690 total_in = 0;
0691 ret = 0;
0692
0693
0694
0695
0696
0697
0698 if (inode_need_compress(BTRFS_I(inode), start, end)) {
0699 WARN_ON(pages);
0700 pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
0701 if (!pages) {
0702
0703 nr_pages = 0;
0704 goto cont;
0705 }
0706
0707 if (BTRFS_I(inode)->defrag_compress)
0708 compress_type = BTRFS_I(inode)->defrag_compress;
0709 else if (BTRFS_I(inode)->prop_compress)
0710 compress_type = BTRFS_I(inode)->prop_compress;
0711
0712
0713
0714
0715
0716
0717
0718
0719
0720
0721
0722
0723
0724 if (!redirty) {
0725 extent_range_clear_dirty_for_io(inode, start, end);
0726 redirty = 1;
0727 }
0728
0729
0730 ret = btrfs_compress_pages(
0731 compress_type | (fs_info->compress_level << 4),
0732 inode->i_mapping, start,
0733 pages,
0734 &nr_pages,
0735 &total_in,
0736 &total_compressed);
0737
0738 if (!ret) {
0739 unsigned long offset = offset_in_page(total_compressed);
0740 struct page *page = pages[nr_pages - 1];
0741
0742
0743
0744
0745 if (offset)
0746 memzero_page(page, offset, PAGE_SIZE - offset);
0747 will_compress = 1;
0748 }
0749 }
0750 cont:
0751
0752
0753
0754
0755 if (start == 0 && fs_info->sectorsize == PAGE_SIZE) {
0756
0757 if (ret || total_in < actual_end) {
0758
0759
0760
0761 ret = cow_file_range_inline(BTRFS_I(inode), actual_end,
0762 0, BTRFS_COMPRESS_NONE,
0763 NULL, false);
0764 } else {
0765
0766 ret = cow_file_range_inline(BTRFS_I(inode), actual_end,
0767 total_compressed,
0768 compress_type, pages,
0769 false);
0770 }
0771 if (ret <= 0) {
0772 unsigned long clear_flags = EXTENT_DELALLOC |
0773 EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
0774 EXTENT_DO_ACCOUNTING;
0775 unsigned long page_error_op;
0776
0777 page_error_op = ret < 0 ? PAGE_SET_ERROR : 0;
0778
0779
0780
0781
0782
0783
0784
0785
0786
0787
0788
0789 extent_clear_unlock_delalloc(BTRFS_I(inode), start, end,
0790 NULL,
0791 clear_flags,
0792 PAGE_UNLOCK |
0793 PAGE_START_WRITEBACK |
0794 page_error_op |
0795 PAGE_END_WRITEBACK);
0796
0797
0798
0799
0800
0801
0802 if (pages) {
0803 for (i = 0; i < nr_pages; i++) {
0804 WARN_ON(pages[i]->mapping);
0805 put_page(pages[i]);
0806 }
0807 kfree(pages);
0808 }
0809 return 0;
0810 }
0811 }
0812
0813 if (will_compress) {
0814
0815
0816
0817
0818
0819 total_compressed = ALIGN(total_compressed, blocksize);
0820
0821
0822
0823
0824
0825
0826 total_in = round_up(total_in, fs_info->sectorsize);
0827 if (total_compressed + blocksize <= total_in) {
0828 compressed_extents++;
0829
0830
0831
0832
0833
0834
0835 add_async_extent(async_chunk, start, total_in,
0836 total_compressed, pages, nr_pages,
0837 compress_type);
0838
0839 if (start + total_in < end) {
0840 start += total_in;
0841 pages = NULL;
0842 cond_resched();
0843 goto again;
0844 }
0845 return compressed_extents;
0846 }
0847 }
0848 if (pages) {
0849
0850
0851
0852
0853 for (i = 0; i < nr_pages; i++) {
0854 WARN_ON(pages[i]->mapping);
0855 put_page(pages[i]);
0856 }
0857 kfree(pages);
0858 pages = NULL;
0859 total_compressed = 0;
0860 nr_pages = 0;
0861
0862
0863 if (!btrfs_test_opt(fs_info, FORCE_COMPRESS) &&
0864 !(BTRFS_I(inode)->prop_compress)) {
0865 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
0866 }
0867 }
0868 cleanup_and_bail_uncompressed:
0869
0870
0871
0872
0873
0874
0875 if (async_chunk->locked_page &&
0876 (page_offset(async_chunk->locked_page) >= start &&
0877 page_offset(async_chunk->locked_page)) <= end) {
0878 __set_page_dirty_nobuffers(async_chunk->locked_page);
0879
0880 }
0881
0882 if (redirty)
0883 extent_range_redirty_for_io(inode, start, end);
0884 add_async_extent(async_chunk, start, end - start + 1, 0, NULL, 0,
0885 BTRFS_COMPRESS_NONE);
0886 compressed_extents++;
0887
0888 return compressed_extents;
0889 }
0890
0891 static void free_async_extent_pages(struct async_extent *async_extent)
0892 {
0893 int i;
0894
0895 if (!async_extent->pages)
0896 return;
0897
0898 for (i = 0; i < async_extent->nr_pages; i++) {
0899 WARN_ON(async_extent->pages[i]->mapping);
0900 put_page(async_extent->pages[i]);
0901 }
0902 kfree(async_extent->pages);
0903 async_extent->nr_pages = 0;
0904 async_extent->pages = NULL;
0905 }
0906
0907 static int submit_uncompressed_range(struct btrfs_inode *inode,
0908 struct async_extent *async_extent,
0909 struct page *locked_page)
0910 {
0911 u64 start = async_extent->start;
0912 u64 end = async_extent->start + async_extent->ram_size - 1;
0913 unsigned long nr_written = 0;
0914 int page_started = 0;
0915 int ret;
0916
0917
0918
0919
0920
0921
0922
0923
0924 ret = cow_file_range(inode, locked_page, start, end, &page_started,
0925 &nr_written, 0, NULL);
0926
0927 if (page_started) {
0928 ret = 0;
0929 goto out;
0930 }
0931 if (ret < 0) {
0932 btrfs_cleanup_ordered_extents(inode, locked_page, start, end - start + 1);
0933 if (locked_page) {
0934 const u64 page_start = page_offset(locked_page);
0935 const u64 page_end = page_start + PAGE_SIZE - 1;
0936
0937 btrfs_page_set_error(inode->root->fs_info, locked_page,
0938 page_start, PAGE_SIZE);
0939 set_page_writeback(locked_page);
0940 end_page_writeback(locked_page);
0941 end_extent_writepage(locked_page, ret, page_start, page_end);
0942 unlock_page(locked_page);
0943 }
0944 goto out;
0945 }
0946
0947 ret = extent_write_locked_range(&inode->vfs_inode, start, end);
0948
0949 out:
0950 kfree(async_extent);
0951 return ret;
0952 }
0953
0954 static int submit_one_async_extent(struct btrfs_inode *inode,
0955 struct async_chunk *async_chunk,
0956 struct async_extent *async_extent,
0957 u64 *alloc_hint)
0958 {
0959 struct extent_io_tree *io_tree = &inode->io_tree;
0960 struct btrfs_root *root = inode->root;
0961 struct btrfs_fs_info *fs_info = root->fs_info;
0962 struct btrfs_key ins;
0963 struct page *locked_page = NULL;
0964 struct extent_map *em;
0965 int ret = 0;
0966 u64 start = async_extent->start;
0967 u64 end = async_extent->start + async_extent->ram_size - 1;
0968
0969
0970
0971
0972
0973 if (async_chunk->locked_page) {
0974 u64 locked_page_start = page_offset(async_chunk->locked_page);
0975 u64 locked_page_end = locked_page_start + PAGE_SIZE - 1;
0976
0977 if (!(start >= locked_page_end || end <= locked_page_start))
0978 locked_page = async_chunk->locked_page;
0979 }
0980 lock_extent(io_tree, start, end);
0981
0982
0983 if (!async_extent->pages)
0984 return submit_uncompressed_range(inode, async_extent, locked_page);
0985
0986 ret = btrfs_reserve_extent(root, async_extent->ram_size,
0987 async_extent->compressed_size,
0988 async_extent->compressed_size,
0989 0, *alloc_hint, &ins, 1, 1);
0990 if (ret) {
0991 free_async_extent_pages(async_extent);
0992
0993
0994
0995
0996
0997
0998
0999 goto out_free;
1000 }
1001
1002
1003 em = create_io_em(inode, start,
1004 async_extent->ram_size,
1005 start,
1006 ins.objectid,
1007 ins.offset,
1008 ins.offset,
1009 async_extent->ram_size,
1010 async_extent->compress_type,
1011 BTRFS_ORDERED_COMPRESSED);
1012 if (IS_ERR(em)) {
1013 ret = PTR_ERR(em);
1014 goto out_free_reserve;
1015 }
1016 free_extent_map(em);
1017
1018 ret = btrfs_add_ordered_extent(inode, start,
1019 async_extent->ram_size,
1020 async_extent->ram_size,
1021 ins.objectid,
1022 ins.offset,
1023 0,
1024 1 << BTRFS_ORDERED_COMPRESSED,
1025 async_extent->compress_type);
1026 if (ret) {
1027 btrfs_drop_extent_cache(inode, start, end, 0);
1028 goto out_free_reserve;
1029 }
1030 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
1031
1032
1033 extent_clear_unlock_delalloc(inode, start, end,
1034 NULL, EXTENT_LOCKED | EXTENT_DELALLOC,
1035 PAGE_UNLOCK | PAGE_START_WRITEBACK);
1036 if (btrfs_submit_compressed_write(inode, start,
1037 async_extent->ram_size,
1038 ins.objectid,
1039 ins.offset,
1040 async_extent->pages,
1041 async_extent->nr_pages,
1042 async_chunk->write_flags,
1043 async_chunk->blkcg_css, true)) {
1044 const u64 start = async_extent->start;
1045 const u64 end = start + async_extent->ram_size - 1;
1046
1047 btrfs_writepage_endio_finish_ordered(inode, NULL, start, end, 0);
1048
1049 extent_clear_unlock_delalloc(inode, start, end, NULL, 0,
1050 PAGE_END_WRITEBACK | PAGE_SET_ERROR);
1051 free_async_extent_pages(async_extent);
1052 }
1053 *alloc_hint = ins.objectid + ins.offset;
1054 kfree(async_extent);
1055 return ret;
1056
1057 out_free_reserve:
1058 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
1059 btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
1060 out_free:
1061 extent_clear_unlock_delalloc(inode, start, end,
1062 NULL, EXTENT_LOCKED | EXTENT_DELALLOC |
1063 EXTENT_DELALLOC_NEW |
1064 EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING,
1065 PAGE_UNLOCK | PAGE_START_WRITEBACK |
1066 PAGE_END_WRITEBACK | PAGE_SET_ERROR);
1067 free_async_extent_pages(async_extent);
1068 kfree(async_extent);
1069 return ret;
1070 }
1071
1072
1073
1074
1075
1076
1077 static noinline void submit_compressed_extents(struct async_chunk *async_chunk)
1078 {
1079 struct btrfs_inode *inode = BTRFS_I(async_chunk->inode);
1080 struct btrfs_fs_info *fs_info = inode->root->fs_info;
1081 struct async_extent *async_extent;
1082 u64 alloc_hint = 0;
1083 int ret = 0;
1084
1085 while (!list_empty(&async_chunk->extents)) {
1086 u64 extent_start;
1087 u64 ram_size;
1088
1089 async_extent = list_entry(async_chunk->extents.next,
1090 struct async_extent, list);
1091 list_del(&async_extent->list);
1092 extent_start = async_extent->start;
1093 ram_size = async_extent->ram_size;
1094
1095 ret = submit_one_async_extent(inode, async_chunk, async_extent,
1096 &alloc_hint);
1097 btrfs_debug(fs_info,
1098 "async extent submission failed root=%lld inode=%llu start=%llu len=%llu ret=%d",
1099 inode->root->root_key.objectid,
1100 btrfs_ino(inode), extent_start, ram_size, ret);
1101 }
1102 }
1103
1104 static u64 get_extent_allocation_hint(struct btrfs_inode *inode, u64 start,
1105 u64 num_bytes)
1106 {
1107 struct extent_map_tree *em_tree = &inode->extent_tree;
1108 struct extent_map *em;
1109 u64 alloc_hint = 0;
1110
1111 read_lock(&em_tree->lock);
1112 em = search_extent_mapping(em_tree, start, num_bytes);
1113 if (em) {
1114
1115
1116
1117
1118
1119 if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
1120 free_extent_map(em);
1121 em = search_extent_mapping(em_tree, 0, 0);
1122 if (em && em->block_start < EXTENT_MAP_LAST_BYTE)
1123 alloc_hint = em->block_start;
1124 if (em)
1125 free_extent_map(em);
1126 } else {
1127 alloc_hint = em->block_start;
1128 free_extent_map(em);
1129 }
1130 }
1131 read_unlock(&em_tree->lock);
1132
1133 return alloc_hint;
1134 }
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171 static noinline int cow_file_range(struct btrfs_inode *inode,
1172 struct page *locked_page,
1173 u64 start, u64 end, int *page_started,
1174 unsigned long *nr_written, int unlock,
1175 u64 *done_offset)
1176 {
1177 struct btrfs_root *root = inode->root;
1178 struct btrfs_fs_info *fs_info = root->fs_info;
1179 u64 alloc_hint = 0;
1180 u64 orig_start = start;
1181 u64 num_bytes;
1182 unsigned long ram_size;
1183 u64 cur_alloc_size = 0;
1184 u64 min_alloc_size;
1185 u64 blocksize = fs_info->sectorsize;
1186 struct btrfs_key ins;
1187 struct extent_map *em;
1188 unsigned clear_bits;
1189 unsigned long page_ops;
1190 bool extent_reserved = false;
1191 int ret = 0;
1192
1193 if (btrfs_is_free_space_inode(inode)) {
1194 ret = -EINVAL;
1195 goto out_unlock;
1196 }
1197
1198 num_bytes = ALIGN(end - start + 1, blocksize);
1199 num_bytes = max(blocksize, num_bytes);
1200 ASSERT(num_bytes <= btrfs_super_total_bytes(fs_info->super_copy));
1201
1202 inode_should_defrag(inode, start, end, num_bytes, SZ_64K);
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214 if (start == 0 && fs_info->sectorsize == PAGE_SIZE) {
1215 u64 actual_end = min_t(u64, i_size_read(&inode->vfs_inode),
1216 end + 1);
1217
1218
1219 ret = cow_file_range_inline(inode, actual_end, 0,
1220 BTRFS_COMPRESS_NONE, NULL, false);
1221 if (ret == 0) {
1222
1223
1224
1225
1226
1227
1228 extent_clear_unlock_delalloc(inode, start, end,
1229 locked_page,
1230 EXTENT_LOCKED | EXTENT_DELALLOC |
1231 EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
1232 EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
1233 PAGE_START_WRITEBACK | PAGE_END_WRITEBACK);
1234 *nr_written = *nr_written +
1235 (end - start + PAGE_SIZE) / PAGE_SIZE;
1236 *page_started = 1;
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249 unlock_page(locked_page);
1250 goto out;
1251 } else if (ret < 0) {
1252 goto out_unlock;
1253 }
1254 }
1255
1256 alloc_hint = get_extent_allocation_hint(inode, start, num_bytes);
1257 btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270 if (btrfs_is_data_reloc_root(root))
1271 min_alloc_size = num_bytes;
1272 else
1273 min_alloc_size = fs_info->sectorsize;
1274
1275 while (num_bytes > 0) {
1276 cur_alloc_size = num_bytes;
1277 ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
1278 min_alloc_size, 0, alloc_hint,
1279 &ins, 1, 1);
1280 if (ret < 0)
1281 goto out_unlock;
1282 cur_alloc_size = ins.offset;
1283 extent_reserved = true;
1284
1285 ram_size = ins.offset;
1286 em = create_io_em(inode, start, ins.offset,
1287 start,
1288 ins.objectid,
1289 ins.offset,
1290 ins.offset,
1291 ram_size,
1292 BTRFS_COMPRESS_NONE,
1293 BTRFS_ORDERED_REGULAR );
1294 if (IS_ERR(em)) {
1295 ret = PTR_ERR(em);
1296 goto out_reserve;
1297 }
1298 free_extent_map(em);
1299
1300 ret = btrfs_add_ordered_extent(inode, start, ram_size, ram_size,
1301 ins.objectid, cur_alloc_size, 0,
1302 1 << BTRFS_ORDERED_REGULAR,
1303 BTRFS_COMPRESS_NONE);
1304 if (ret)
1305 goto out_drop_extent_cache;
1306
1307 if (btrfs_is_data_reloc_root(root)) {
1308 ret = btrfs_reloc_clone_csums(inode, start,
1309 cur_alloc_size);
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321 if (ret)
1322 btrfs_drop_extent_cache(inode, start,
1323 start + ram_size - 1, 0);
1324 }
1325
1326 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336 page_ops = unlock ? PAGE_UNLOCK : 0;
1337 page_ops |= PAGE_SET_ORDERED;
1338
1339 extent_clear_unlock_delalloc(inode, start, start + ram_size - 1,
1340 locked_page,
1341 EXTENT_LOCKED | EXTENT_DELALLOC,
1342 page_ops);
1343 if (num_bytes < cur_alloc_size)
1344 num_bytes = 0;
1345 else
1346 num_bytes -= cur_alloc_size;
1347 alloc_hint = ins.objectid + ins.offset;
1348 start += cur_alloc_size;
1349 extent_reserved = false;
1350
1351
1352
1353
1354
1355
1356 if (ret)
1357 goto out_unlock;
1358 }
1359 out:
1360 return ret;
1361
1362 out_drop_extent_cache:
1363 btrfs_drop_extent_cache(inode, start, start + ram_size - 1, 0);
1364 out_reserve:
1365 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
1366 btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
1367 out_unlock:
1368
1369
1370
1371
1372 if (done_offset && ret == -EAGAIN) {
1373 if (orig_start < start)
1374 *done_offset = start - 1;
1375 else
1376 *done_offset = start;
1377 return ret;
1378 } else if (ret == -EAGAIN) {
1379
1380 ret = -ENOSPC;
1381 }
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392 clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
1393 EXTENT_DEFRAG | EXTENT_CLEAR_META_RESV;
1394 page_ops = PAGE_UNLOCK | PAGE_START_WRITEBACK | PAGE_END_WRITEBACK;
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408 if (!unlock && orig_start < start) {
1409 if (!locked_page)
1410 mapping_set_error(inode->vfs_inode.i_mapping, ret);
1411 extent_clear_unlock_delalloc(inode, orig_start, start - 1,
1412 locked_page, 0, page_ops);
1413 }
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425 if (extent_reserved) {
1426 extent_clear_unlock_delalloc(inode, start,
1427 start + cur_alloc_size - 1,
1428 locked_page,
1429 clear_bits,
1430 page_ops);
1431 start += cur_alloc_size;
1432 if (start >= end)
1433 return ret;
1434 }
1435
1436
1437
1438
1439
1440
1441
1442 extent_clear_unlock_delalloc(inode, start, end, locked_page,
1443 clear_bits | EXTENT_CLEAR_DATA_RESV,
1444 page_ops);
1445 return ret;
1446 }
1447
1448
1449
1450
1451 static noinline void async_cow_start(struct btrfs_work *work)
1452 {
1453 struct async_chunk *async_chunk;
1454 int compressed_extents;
1455
1456 async_chunk = container_of(work, struct async_chunk, work);
1457
1458 compressed_extents = compress_file_range(async_chunk);
1459 if (compressed_extents == 0) {
1460 btrfs_add_delayed_iput(async_chunk->inode);
1461 async_chunk->inode = NULL;
1462 }
1463 }
1464
1465
1466
1467
1468 static noinline void async_cow_submit(struct btrfs_work *work)
1469 {
1470 struct async_chunk *async_chunk = container_of(work, struct async_chunk,
1471 work);
1472 struct btrfs_fs_info *fs_info = btrfs_work_owner(work);
1473 unsigned long nr_pages;
1474
1475 nr_pages = (async_chunk->end - async_chunk->start + PAGE_SIZE) >>
1476 PAGE_SHIFT;
1477
1478
1479
1480
1481
1482
1483
1484 if (async_chunk->inode)
1485 submit_compressed_extents(async_chunk);
1486
1487
1488 if (atomic_sub_return(nr_pages, &fs_info->async_delalloc_pages) <
1489 5 * SZ_1M)
1490 cond_wake_up_nomb(&fs_info->async_submit_wait);
1491 }
1492
1493 static noinline void async_cow_free(struct btrfs_work *work)
1494 {
1495 struct async_chunk *async_chunk;
1496 struct async_cow *async_cow;
1497
1498 async_chunk = container_of(work, struct async_chunk, work);
1499 if (async_chunk->inode)
1500 btrfs_add_delayed_iput(async_chunk->inode);
1501 if (async_chunk->blkcg_css)
1502 css_put(async_chunk->blkcg_css);
1503
1504 async_cow = async_chunk->async_cow;
1505 if (atomic_dec_and_test(&async_cow->num_chunks))
1506 kvfree(async_cow);
1507 }
1508
1509 static int cow_file_range_async(struct btrfs_inode *inode,
1510 struct writeback_control *wbc,
1511 struct page *locked_page,
1512 u64 start, u64 end, int *page_started,
1513 unsigned long *nr_written)
1514 {
1515 struct btrfs_fs_info *fs_info = inode->root->fs_info;
1516 struct cgroup_subsys_state *blkcg_css = wbc_blkcg_css(wbc);
1517 struct async_cow *ctx;
1518 struct async_chunk *async_chunk;
1519 unsigned long nr_pages;
1520 u64 cur_end;
1521 u64 num_chunks = DIV_ROUND_UP(end - start, SZ_512K);
1522 int i;
1523 bool should_compress;
1524 unsigned nofs_flag;
1525 const blk_opf_t write_flags = wbc_to_write_flags(wbc);
1526
1527 unlock_extent(&inode->io_tree, start, end);
1528
1529 if (inode->flags & BTRFS_INODE_NOCOMPRESS &&
1530 !btrfs_test_opt(fs_info, FORCE_COMPRESS)) {
1531 num_chunks = 1;
1532 should_compress = false;
1533 } else {
1534 should_compress = true;
1535 }
1536
1537 nofs_flag = memalloc_nofs_save();
1538 ctx = kvmalloc(struct_size(ctx, chunks, num_chunks), GFP_KERNEL);
1539 memalloc_nofs_restore(nofs_flag);
1540
1541 if (!ctx) {
1542 unsigned clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC |
1543 EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
1544 EXTENT_DO_ACCOUNTING;
1545 unsigned long page_ops = PAGE_UNLOCK | PAGE_START_WRITEBACK |
1546 PAGE_END_WRITEBACK | PAGE_SET_ERROR;
1547
1548 extent_clear_unlock_delalloc(inode, start, end, locked_page,
1549 clear_bits, page_ops);
1550 return -ENOMEM;
1551 }
1552
1553 async_chunk = ctx->chunks;
1554 atomic_set(&ctx->num_chunks, num_chunks);
1555
1556 for (i = 0; i < num_chunks; i++) {
1557 if (should_compress)
1558 cur_end = min(end, start + SZ_512K - 1);
1559 else
1560 cur_end = end;
1561
1562
1563
1564
1565
1566 ihold(&inode->vfs_inode);
1567 async_chunk[i].async_cow = ctx;
1568 async_chunk[i].inode = &inode->vfs_inode;
1569 async_chunk[i].start = start;
1570 async_chunk[i].end = cur_end;
1571 async_chunk[i].write_flags = write_flags;
1572 INIT_LIST_HEAD(&async_chunk[i].extents);
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583 if (locked_page) {
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593 wbc_account_cgroup_owner(wbc, locked_page,
1594 cur_end - start);
1595 async_chunk[i].locked_page = locked_page;
1596 locked_page = NULL;
1597 } else {
1598 async_chunk[i].locked_page = NULL;
1599 }
1600
1601 if (blkcg_css != blkcg_root_css) {
1602 css_get(blkcg_css);
1603 async_chunk[i].blkcg_css = blkcg_css;
1604 } else {
1605 async_chunk[i].blkcg_css = NULL;
1606 }
1607
1608 btrfs_init_work(&async_chunk[i].work, async_cow_start,
1609 async_cow_submit, async_cow_free);
1610
1611 nr_pages = DIV_ROUND_UP(cur_end - start, PAGE_SIZE);
1612 atomic_add(nr_pages, &fs_info->async_delalloc_pages);
1613
1614 btrfs_queue_work(fs_info->delalloc_workers, &async_chunk[i].work);
1615
1616 *nr_written += nr_pages;
1617 start = cur_end + 1;
1618 }
1619 *page_started = 1;
1620 return 0;
1621 }
1622
1623 static noinline int run_delalloc_zoned(struct btrfs_inode *inode,
1624 struct page *locked_page, u64 start,
1625 u64 end, int *page_started,
1626 unsigned long *nr_written)
1627 {
1628 u64 done_offset = end;
1629 int ret;
1630 bool locked_page_done = false;
1631
1632 while (start <= end) {
1633 ret = cow_file_range(inode, locked_page, start, end, page_started,
1634 nr_written, 0, &done_offset);
1635 if (ret && ret != -EAGAIN)
1636 return ret;
1637
1638 if (*page_started) {
1639 ASSERT(ret == 0);
1640 return 0;
1641 }
1642
1643 if (ret == 0)
1644 done_offset = end;
1645
1646 if (done_offset == start) {
1647 wait_on_bit_io(&inode->root->fs_info->flags,
1648 BTRFS_FS_NEED_ZONE_FINISH,
1649 TASK_UNINTERRUPTIBLE);
1650 continue;
1651 }
1652
1653 if (!locked_page_done) {
1654 __set_page_dirty_nobuffers(locked_page);
1655 account_page_redirty(locked_page);
1656 }
1657 locked_page_done = true;
1658 extent_write_locked_range(&inode->vfs_inode, start, done_offset);
1659
1660 start = done_offset + 1;
1661 }
1662
1663 *page_started = 1;
1664
1665 return 0;
1666 }
1667
1668 static noinline int csum_exist_in_range(struct btrfs_fs_info *fs_info,
1669 u64 bytenr, u64 num_bytes)
1670 {
1671 struct btrfs_root *csum_root = btrfs_csum_root(fs_info, bytenr);
1672 struct btrfs_ordered_sum *sums;
1673 int ret;
1674 LIST_HEAD(list);
1675
1676 ret = btrfs_lookup_csums_range(csum_root, bytenr,
1677 bytenr + num_bytes - 1, &list, 0);
1678 if (ret == 0 && list_empty(&list))
1679 return 0;
1680
1681 while (!list_empty(&list)) {
1682 sums = list_entry(list.next, struct btrfs_ordered_sum, list);
1683 list_del(&sums->list);
1684 kfree(sums);
1685 }
1686 if (ret < 0)
1687 return ret;
1688 return 1;
1689 }
1690
1691 static int fallback_to_cow(struct btrfs_inode *inode, struct page *locked_page,
1692 const u64 start, const u64 end,
1693 int *page_started, unsigned long *nr_written)
1694 {
1695 const bool is_space_ino = btrfs_is_free_space_inode(inode);
1696 const bool is_reloc_ino = btrfs_is_data_reloc_root(inode->root);
1697 const u64 range_bytes = end + 1 - start;
1698 struct extent_io_tree *io_tree = &inode->io_tree;
1699 u64 range_start = start;
1700 u64 count;
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734 count = count_range_bits(io_tree, &range_start, end, range_bytes,
1735 EXTENT_NORESERVE, 0);
1736 if (count > 0 || is_space_ino || is_reloc_ino) {
1737 u64 bytes = count;
1738 struct btrfs_fs_info *fs_info = inode->root->fs_info;
1739 struct btrfs_space_info *sinfo = fs_info->data_sinfo;
1740
1741 if (is_space_ino || is_reloc_ino)
1742 bytes = range_bytes;
1743
1744 spin_lock(&sinfo->lock);
1745 btrfs_space_info_update_bytes_may_use(fs_info, sinfo, bytes);
1746 spin_unlock(&sinfo->lock);
1747
1748 if (count > 0)
1749 clear_extent_bit(io_tree, start, end, EXTENT_NORESERVE,
1750 0, 0, NULL);
1751 }
1752
1753 return cow_file_range(inode, locked_page, start, end, page_started,
1754 nr_written, 1, NULL);
1755 }
1756
1757 struct can_nocow_file_extent_args {
1758
1759
1760
1761 u64 start;
1762
1763 u64 end;
1764 bool writeback_path;
1765 bool strict;
1766
1767
1768
1769
1770 bool free_path;
1771
1772
1773
1774 u64 disk_bytenr;
1775 u64 disk_num_bytes;
1776 u64 extent_offset;
1777
1778 u64 num_bytes;
1779 };
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790 static int can_nocow_file_extent(struct btrfs_path *path,
1791 struct btrfs_key *key,
1792 struct btrfs_inode *inode,
1793 struct can_nocow_file_extent_args *args)
1794 {
1795 const bool is_freespace_inode = btrfs_is_free_space_inode(inode);
1796 struct extent_buffer *leaf = path->nodes[0];
1797 struct btrfs_root *root = inode->root;
1798 struct btrfs_file_extent_item *fi;
1799 u64 extent_end;
1800 u8 extent_type;
1801 int can_nocow = 0;
1802 int ret = 0;
1803
1804 fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item);
1805 extent_type = btrfs_file_extent_type(leaf, fi);
1806
1807 if (extent_type == BTRFS_FILE_EXTENT_INLINE)
1808 goto out;
1809
1810
1811 args->disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1812 args->disk_num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
1813 args->extent_offset = btrfs_file_extent_offset(leaf, fi);
1814
1815 if (!(inode->flags & BTRFS_INODE_NODATACOW) &&
1816 extent_type == BTRFS_FILE_EXTENT_REG)
1817 goto out;
1818
1819
1820
1821
1822
1823
1824 if (!args->strict &&
1825 btrfs_file_extent_generation(leaf, fi) <=
1826 btrfs_root_last_snapshot(&root->root_item))
1827 goto out;
1828
1829
1830 if (args->disk_bytenr == 0)
1831 goto out;
1832
1833
1834 if (btrfs_file_extent_compression(leaf, fi) ||
1835 btrfs_file_extent_encryption(leaf, fi) ||
1836 btrfs_file_extent_other_encoding(leaf, fi))
1837 goto out;
1838
1839 extent_end = btrfs_file_extent_end(path);
1840
1841
1842
1843
1844
1845
1846 btrfs_release_path(path);
1847
1848 ret = btrfs_cross_ref_exist(root, btrfs_ino(inode),
1849 key->offset - args->extent_offset,
1850 args->disk_bytenr, false, path);
1851 WARN_ON_ONCE(ret > 0 && is_freespace_inode);
1852 if (ret != 0)
1853 goto out;
1854
1855 if (args->free_path) {
1856
1857
1858
1859
1860
1861
1862 btrfs_free_path(path);
1863 path = NULL;
1864 }
1865
1866
1867 if (args->writeback_path && !is_freespace_inode &&
1868 atomic_read(&root->snapshot_force_cow))
1869 goto out;
1870
1871 args->disk_bytenr += args->extent_offset;
1872 args->disk_bytenr += args->start - key->offset;
1873 args->num_bytes = min(args->end + 1, extent_end) - args->start;
1874
1875
1876
1877
1878
1879 ret = csum_exist_in_range(root->fs_info, args->disk_bytenr, args->num_bytes);
1880 WARN_ON_ONCE(ret > 0 && is_freespace_inode);
1881 if (ret != 0)
1882 goto out;
1883
1884 can_nocow = 1;
1885 out:
1886 if (args->free_path && path)
1887 btrfs_free_path(path);
1888
1889 return ret < 0 ? ret : can_nocow;
1890 }
1891
1892
1893
1894
1895
1896
1897
1898
1899 static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
1900 struct page *locked_page,
1901 const u64 start, const u64 end,
1902 int *page_started,
1903 unsigned long *nr_written)
1904 {
1905 struct btrfs_fs_info *fs_info = inode->root->fs_info;
1906 struct btrfs_root *root = inode->root;
1907 struct btrfs_path *path;
1908 u64 cow_start = (u64)-1;
1909 u64 cur_offset = start;
1910 int ret;
1911 bool check_prev = true;
1912 u64 ino = btrfs_ino(inode);
1913 struct btrfs_block_group *bg;
1914 bool nocow = false;
1915 struct can_nocow_file_extent_args nocow_args = { 0 };
1916
1917 path = btrfs_alloc_path();
1918 if (!path) {
1919 extent_clear_unlock_delalloc(inode, start, end, locked_page,
1920 EXTENT_LOCKED | EXTENT_DELALLOC |
1921 EXTENT_DO_ACCOUNTING |
1922 EXTENT_DEFRAG, PAGE_UNLOCK |
1923 PAGE_START_WRITEBACK |
1924 PAGE_END_WRITEBACK);
1925 return -ENOMEM;
1926 }
1927
1928 nocow_args.end = end;
1929 nocow_args.writeback_path = true;
1930
1931 while (1) {
1932 struct btrfs_key found_key;
1933 struct btrfs_file_extent_item *fi;
1934 struct extent_buffer *leaf;
1935 u64 extent_end;
1936 u64 ram_bytes;
1937 u64 nocow_end;
1938 int extent_type;
1939
1940 nocow = false;
1941
1942 ret = btrfs_lookup_file_extent(NULL, root, path, ino,
1943 cur_offset, 0);
1944 if (ret < 0)
1945 goto error;
1946
1947
1948
1949
1950
1951
1952 if (ret > 0 && path->slots[0] > 0 && check_prev) {
1953 leaf = path->nodes[0];
1954 btrfs_item_key_to_cpu(leaf, &found_key,
1955 path->slots[0] - 1);
1956 if (found_key.objectid == ino &&
1957 found_key.type == BTRFS_EXTENT_DATA_KEY)
1958 path->slots[0]--;
1959 }
1960 check_prev = false;
1961 next_slot:
1962
1963 leaf = path->nodes[0];
1964 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1965 ret = btrfs_next_leaf(root, path);
1966 if (ret < 0) {
1967 if (cow_start != (u64)-1)
1968 cur_offset = cow_start;
1969 goto error;
1970 }
1971 if (ret > 0)
1972 break;
1973 leaf = path->nodes[0];
1974 }
1975
1976 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1977
1978
1979 if (found_key.objectid > ino)
1980 break;
1981
1982
1983
1984
1985 if (WARN_ON_ONCE(found_key.objectid < ino) ||
1986 found_key.type < BTRFS_EXTENT_DATA_KEY) {
1987 path->slots[0]++;
1988 goto next_slot;
1989 }
1990
1991
1992 if (found_key.type > BTRFS_EXTENT_DATA_KEY ||
1993 found_key.offset > end)
1994 break;
1995
1996
1997
1998
1999
2000 if (found_key.offset > cur_offset) {
2001 extent_end = found_key.offset;
2002 extent_type = 0;
2003 goto out_check;
2004 }
2005
2006
2007
2008
2009
2010 fi = btrfs_item_ptr(leaf, path->slots[0],
2011 struct btrfs_file_extent_item);
2012 extent_type = btrfs_file_extent_type(leaf, fi);
2013
2014 ASSERT(extent_type < BTRFS_NR_FILE_EXTENT_TYPES);
2015 if (WARN_ON(extent_type >= BTRFS_NR_FILE_EXTENT_TYPES)) {
2016 ret = -EUCLEAN;
2017 goto error;
2018 }
2019 ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
2020 extent_end = btrfs_file_extent_end(path);
2021
2022
2023
2024
2025
2026 if (extent_end <= cur_offset) {
2027 path->slots[0]++;
2028 goto next_slot;
2029 }
2030
2031 nocow_args.start = cur_offset;
2032 ret = can_nocow_file_extent(path, &found_key, inode, &nocow_args);
2033 if (ret < 0) {
2034 if (cow_start != (u64)-1)
2035 cur_offset = cow_start;
2036 goto error;
2037 } else if (ret == 0) {
2038 goto out_check;
2039 }
2040
2041 ret = 0;
2042 bg = btrfs_inc_nocow_writers(fs_info, nocow_args.disk_bytenr);
2043 if (bg)
2044 nocow = true;
2045 out_check:
2046
2047
2048
2049
2050 if (!nocow) {
2051 if (cow_start == (u64)-1)
2052 cow_start = cur_offset;
2053 cur_offset = extent_end;
2054 if (cur_offset > end)
2055 break;
2056 if (!path->nodes[0])
2057 continue;
2058 path->slots[0]++;
2059 goto next_slot;
2060 }
2061
2062
2063
2064
2065
2066
2067 if (cow_start != (u64)-1) {
2068 ret = fallback_to_cow(inode, locked_page,
2069 cow_start, found_key.offset - 1,
2070 page_started, nr_written);
2071 if (ret)
2072 goto error;
2073 cow_start = (u64)-1;
2074 }
2075
2076 nocow_end = cur_offset + nocow_args.num_bytes - 1;
2077
2078 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
2079 u64 orig_start = found_key.offset - nocow_args.extent_offset;
2080 struct extent_map *em;
2081
2082 em = create_io_em(inode, cur_offset, nocow_args.num_bytes,
2083 orig_start,
2084 nocow_args.disk_bytenr,
2085 nocow_args.num_bytes,
2086 nocow_args.disk_num_bytes,
2087 ram_bytes, BTRFS_COMPRESS_NONE,
2088 BTRFS_ORDERED_PREALLOC);
2089 if (IS_ERR(em)) {
2090 ret = PTR_ERR(em);
2091 goto error;
2092 }
2093 free_extent_map(em);
2094 ret = btrfs_add_ordered_extent(inode,
2095 cur_offset, nocow_args.num_bytes,
2096 nocow_args.num_bytes,
2097 nocow_args.disk_bytenr,
2098 nocow_args.num_bytes, 0,
2099 1 << BTRFS_ORDERED_PREALLOC,
2100 BTRFS_COMPRESS_NONE);
2101 if (ret) {
2102 btrfs_drop_extent_cache(inode, cur_offset,
2103 nocow_end, 0);
2104 goto error;
2105 }
2106 } else {
2107 ret = btrfs_add_ordered_extent(inode, cur_offset,
2108 nocow_args.num_bytes,
2109 nocow_args.num_bytes,
2110 nocow_args.disk_bytenr,
2111 nocow_args.num_bytes,
2112 0,
2113 1 << BTRFS_ORDERED_NOCOW,
2114 BTRFS_COMPRESS_NONE);
2115 if (ret)
2116 goto error;
2117 }
2118
2119 if (nocow) {
2120 btrfs_dec_nocow_writers(bg);
2121 nocow = false;
2122 }
2123
2124 if (btrfs_is_data_reloc_root(root))
2125
2126
2127
2128
2129
2130 ret = btrfs_reloc_clone_csums(inode, cur_offset,
2131 nocow_args.num_bytes);
2132
2133 extent_clear_unlock_delalloc(inode, cur_offset, nocow_end,
2134 locked_page, EXTENT_LOCKED |
2135 EXTENT_DELALLOC |
2136 EXTENT_CLEAR_DATA_RESV,
2137 PAGE_UNLOCK | PAGE_SET_ORDERED);
2138
2139 cur_offset = extent_end;
2140
2141
2142
2143
2144
2145
2146 if (ret)
2147 goto error;
2148 if (cur_offset > end)
2149 break;
2150 }
2151 btrfs_release_path(path);
2152
2153 if (cur_offset <= end && cow_start == (u64)-1)
2154 cow_start = cur_offset;
2155
2156 if (cow_start != (u64)-1) {
2157 cur_offset = end;
2158 ret = fallback_to_cow(inode, locked_page, cow_start, end,
2159 page_started, nr_written);
2160 if (ret)
2161 goto error;
2162 }
2163
2164 error:
2165 if (nocow)
2166 btrfs_dec_nocow_writers(bg);
2167
2168 if (ret && cur_offset < end)
2169 extent_clear_unlock_delalloc(inode, cur_offset, end,
2170 locked_page, EXTENT_LOCKED |
2171 EXTENT_DELALLOC | EXTENT_DEFRAG |
2172 EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
2173 PAGE_START_WRITEBACK |
2174 PAGE_END_WRITEBACK);
2175 btrfs_free_path(path);
2176 return ret;
2177 }
2178
2179 static bool should_nocow(struct btrfs_inode *inode, u64 start, u64 end)
2180 {
2181 if (inode->flags & (BTRFS_INODE_NODATACOW | BTRFS_INODE_PREALLOC)) {
2182 if (inode->defrag_bytes &&
2183 test_range_bit(&inode->io_tree, start, end, EXTENT_DEFRAG,
2184 0, NULL))
2185 return false;
2186 return true;
2187 }
2188 return false;
2189 }
2190
2191
2192
2193
2194
2195 int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page,
2196 u64 start, u64 end, int *page_started, unsigned long *nr_written,
2197 struct writeback_control *wbc)
2198 {
2199 int ret;
2200 const bool zoned = btrfs_is_zoned(inode->root->fs_info);
2201
2202
2203
2204
2205
2206 ASSERT(!(end <= page_offset(locked_page) ||
2207 start >= page_offset(locked_page) + PAGE_SIZE));
2208
2209 if (should_nocow(inode, start, end)) {
2210
2211
2212
2213
2214
2215
2216
2217 ASSERT(!zoned || btrfs_is_data_reloc_root(inode->root));
2218 ret = run_delalloc_nocow(inode, locked_page, start, end,
2219 page_started, nr_written);
2220 } else if (!btrfs_inode_can_compress(inode) ||
2221 !inode_need_compress(inode, start, end)) {
2222 if (zoned)
2223 ret = run_delalloc_zoned(inode, locked_page, start, end,
2224 page_started, nr_written);
2225 else
2226 ret = cow_file_range(inode, locked_page, start, end,
2227 page_started, nr_written, 1, NULL);
2228 } else {
2229 set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &inode->runtime_flags);
2230 ret = cow_file_range_async(inode, wbc, locked_page, start, end,
2231 page_started, nr_written);
2232 }
2233 ASSERT(ret <= 0);
2234 if (ret)
2235 btrfs_cleanup_ordered_extents(inode, locked_page, start,
2236 end - start + 1);
2237 return ret;
2238 }
2239
2240 void btrfs_split_delalloc_extent(struct inode *inode,
2241 struct extent_state *orig, u64 split)
2242 {
2243 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2244 u64 size;
2245
2246
2247 if (!(orig->state & EXTENT_DELALLOC))
2248 return;
2249
2250 size = orig->end - orig->start + 1;
2251 if (size > fs_info->max_extent_size) {
2252 u32 num_extents;
2253 u64 new_size;
2254
2255
2256
2257
2258
2259 new_size = orig->end - split + 1;
2260 num_extents = count_max_extents(fs_info, new_size);
2261 new_size = split - orig->start;
2262 num_extents += count_max_extents(fs_info, new_size);
2263 if (count_max_extents(fs_info, size) >= num_extents)
2264 return;
2265 }
2266
2267 spin_lock(&BTRFS_I(inode)->lock);
2268 btrfs_mod_outstanding_extents(BTRFS_I(inode), 1);
2269 spin_unlock(&BTRFS_I(inode)->lock);
2270 }
2271
2272
2273
2274
2275
2276
2277 void btrfs_merge_delalloc_extent(struct inode *inode, struct extent_state *new,
2278 struct extent_state *other)
2279 {
2280 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2281 u64 new_size, old_size;
2282 u32 num_extents;
2283
2284
2285 if (!(other->state & EXTENT_DELALLOC))
2286 return;
2287
2288 if (new->start > other->start)
2289 new_size = new->end - other->start + 1;
2290 else
2291 new_size = other->end - new->start + 1;
2292
2293
2294 if (new_size <= fs_info->max_extent_size) {
2295 spin_lock(&BTRFS_I(inode)->lock);
2296 btrfs_mod_outstanding_extents(BTRFS_I(inode), -1);
2297 spin_unlock(&BTRFS_I(inode)->lock);
2298 return;
2299 }
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319 old_size = other->end - other->start + 1;
2320 num_extents = count_max_extents(fs_info, old_size);
2321 old_size = new->end - new->start + 1;
2322 num_extents += count_max_extents(fs_info, old_size);
2323 if (count_max_extents(fs_info, new_size) >= num_extents)
2324 return;
2325
2326 spin_lock(&BTRFS_I(inode)->lock);
2327 btrfs_mod_outstanding_extents(BTRFS_I(inode), -1);
2328 spin_unlock(&BTRFS_I(inode)->lock);
2329 }
2330
2331 static void btrfs_add_delalloc_inodes(struct btrfs_root *root,
2332 struct inode *inode)
2333 {
2334 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2335
2336 spin_lock(&root->delalloc_lock);
2337 if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
2338 list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
2339 &root->delalloc_inodes);
2340 set_bit(BTRFS_INODE_IN_DELALLOC_LIST,
2341 &BTRFS_I(inode)->runtime_flags);
2342 root->nr_delalloc_inodes++;
2343 if (root->nr_delalloc_inodes == 1) {
2344 spin_lock(&fs_info->delalloc_root_lock);
2345 BUG_ON(!list_empty(&root->delalloc_root));
2346 list_add_tail(&root->delalloc_root,
2347 &fs_info->delalloc_roots);
2348 spin_unlock(&fs_info->delalloc_root_lock);
2349 }
2350 }
2351 spin_unlock(&root->delalloc_lock);
2352 }
2353
2354
2355 void __btrfs_del_delalloc_inode(struct btrfs_root *root,
2356 struct btrfs_inode *inode)
2357 {
2358 struct btrfs_fs_info *fs_info = root->fs_info;
2359
2360 if (!list_empty(&inode->delalloc_inodes)) {
2361 list_del_init(&inode->delalloc_inodes);
2362 clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
2363 &inode->runtime_flags);
2364 root->nr_delalloc_inodes--;
2365 if (!root->nr_delalloc_inodes) {
2366 ASSERT(list_empty(&root->delalloc_inodes));
2367 spin_lock(&fs_info->delalloc_root_lock);
2368 BUG_ON(list_empty(&root->delalloc_root));
2369 list_del_init(&root->delalloc_root);
2370 spin_unlock(&fs_info->delalloc_root_lock);
2371 }
2372 }
2373 }
2374
2375 static void btrfs_del_delalloc_inode(struct btrfs_root *root,
2376 struct btrfs_inode *inode)
2377 {
2378 spin_lock(&root->delalloc_lock);
2379 __btrfs_del_delalloc_inode(root, inode);
2380 spin_unlock(&root->delalloc_lock);
2381 }
2382
2383
2384
2385
2386
2387 void btrfs_set_delalloc_extent(struct inode *inode, struct extent_state *state,
2388 u32 bits)
2389 {
2390 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2391
2392 if ((bits & EXTENT_DEFRAG) && !(bits & EXTENT_DELALLOC))
2393 WARN_ON(1);
2394
2395
2396
2397
2398
2399 if (!(state->state & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
2400 struct btrfs_root *root = BTRFS_I(inode)->root;
2401 u64 len = state->end + 1 - state->start;
2402 u32 num_extents = count_max_extents(fs_info, len);
2403 bool do_list = !btrfs_is_free_space_inode(BTRFS_I(inode));
2404
2405 spin_lock(&BTRFS_I(inode)->lock);
2406 btrfs_mod_outstanding_extents(BTRFS_I(inode), num_extents);
2407 spin_unlock(&BTRFS_I(inode)->lock);
2408
2409
2410 if (btrfs_is_testing(fs_info))
2411 return;
2412
2413 percpu_counter_add_batch(&fs_info->delalloc_bytes, len,
2414 fs_info->delalloc_batch);
2415 spin_lock(&BTRFS_I(inode)->lock);
2416 BTRFS_I(inode)->delalloc_bytes += len;
2417 if (bits & EXTENT_DEFRAG)
2418 BTRFS_I(inode)->defrag_bytes += len;
2419 if (do_list && !test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
2420 &BTRFS_I(inode)->runtime_flags))
2421 btrfs_add_delalloc_inodes(root, inode);
2422 spin_unlock(&BTRFS_I(inode)->lock);
2423 }
2424
2425 if (!(state->state & EXTENT_DELALLOC_NEW) &&
2426 (bits & EXTENT_DELALLOC_NEW)) {
2427 spin_lock(&BTRFS_I(inode)->lock);
2428 BTRFS_I(inode)->new_delalloc_bytes += state->end + 1 -
2429 state->start;
2430 spin_unlock(&BTRFS_I(inode)->lock);
2431 }
2432 }
2433
2434
2435
2436
2437
2438 void btrfs_clear_delalloc_extent(struct inode *vfs_inode,
2439 struct extent_state *state, u32 bits)
2440 {
2441 struct btrfs_inode *inode = BTRFS_I(vfs_inode);
2442 struct btrfs_fs_info *fs_info = btrfs_sb(vfs_inode->i_sb);
2443 u64 len = state->end + 1 - state->start;
2444 u32 num_extents = count_max_extents(fs_info, len);
2445
2446 if ((state->state & EXTENT_DEFRAG) && (bits & EXTENT_DEFRAG)) {
2447 spin_lock(&inode->lock);
2448 inode->defrag_bytes -= len;
2449 spin_unlock(&inode->lock);
2450 }
2451
2452
2453
2454
2455
2456
2457 if ((state->state & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
2458 struct btrfs_root *root = inode->root;
2459 bool do_list = !btrfs_is_free_space_inode(inode);
2460
2461 spin_lock(&inode->lock);
2462 btrfs_mod_outstanding_extents(inode, -num_extents);
2463 spin_unlock(&inode->lock);
2464
2465
2466
2467
2468
2469
2470 if (bits & EXTENT_CLEAR_META_RESV &&
2471 root != fs_info->tree_root)
2472 btrfs_delalloc_release_metadata(inode, len, false);
2473
2474
2475 if (btrfs_is_testing(fs_info))
2476 return;
2477
2478 if (!btrfs_is_data_reloc_root(root) &&
2479 do_list && !(state->state & EXTENT_NORESERVE) &&
2480 (bits & EXTENT_CLEAR_DATA_RESV))
2481 btrfs_free_reserved_data_space_noquota(fs_info, len);
2482
2483 percpu_counter_add_batch(&fs_info->delalloc_bytes, -len,
2484 fs_info->delalloc_batch);
2485 spin_lock(&inode->lock);
2486 inode->delalloc_bytes -= len;
2487 if (do_list && inode->delalloc_bytes == 0 &&
2488 test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
2489 &inode->runtime_flags))
2490 btrfs_del_delalloc_inode(root, inode);
2491 spin_unlock(&inode->lock);
2492 }
2493
2494 if ((state->state & EXTENT_DELALLOC_NEW) &&
2495 (bits & EXTENT_DELALLOC_NEW)) {
2496 spin_lock(&inode->lock);
2497 ASSERT(inode->new_delalloc_bytes >= len);
2498 inode->new_delalloc_bytes -= len;
2499 if (bits & EXTENT_ADD_INODE_BYTES)
2500 inode_add_bytes(&inode->vfs_inode, len);
2501 spin_unlock(&inode->lock);
2502 }
2503 }
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513 static blk_status_t btrfs_submit_bio_start(struct inode *inode, struct bio *bio,
2514 u64 dio_file_offset)
2515 {
2516 return btrfs_csum_one_bio(BTRFS_I(inode), bio, (u64)-1, false);
2517 }
2518
2519
2520
2521
2522
2523
2524 static int split_zoned_em(struct btrfs_inode *inode, u64 start, u64 len,
2525 u64 pre, u64 post)
2526 {
2527 struct extent_map_tree *em_tree = &inode->extent_tree;
2528 struct extent_map *em;
2529 struct extent_map *split_pre = NULL;
2530 struct extent_map *split_mid = NULL;
2531 struct extent_map *split_post = NULL;
2532 int ret = 0;
2533 unsigned long flags;
2534
2535
2536 if (pre == 0 && post == 0)
2537 return 0;
2538
2539 split_pre = alloc_extent_map();
2540 if (pre)
2541 split_mid = alloc_extent_map();
2542 if (post)
2543 split_post = alloc_extent_map();
2544 if (!split_pre || (pre && !split_mid) || (post && !split_post)) {
2545 ret = -ENOMEM;
2546 goto out;
2547 }
2548
2549 ASSERT(pre + post < len);
2550
2551 lock_extent(&inode->io_tree, start, start + len - 1);
2552 write_lock(&em_tree->lock);
2553 em = lookup_extent_mapping(em_tree, start, len);
2554 if (!em) {
2555 ret = -EIO;
2556 goto out_unlock;
2557 }
2558
2559 ASSERT(em->len == len);
2560 ASSERT(!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags));
2561 ASSERT(em->block_start < EXTENT_MAP_LAST_BYTE);
2562 ASSERT(test_bit(EXTENT_FLAG_PINNED, &em->flags));
2563 ASSERT(!test_bit(EXTENT_FLAG_LOGGING, &em->flags));
2564 ASSERT(!list_empty(&em->list));
2565
2566 flags = em->flags;
2567 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
2568
2569
2570 split_pre->start = em->start;
2571 split_pre->len = (pre ? pre : em->len - post);
2572 split_pre->orig_start = split_pre->start;
2573 split_pre->block_start = em->block_start;
2574 split_pre->block_len = split_pre->len;
2575 split_pre->orig_block_len = split_pre->block_len;
2576 split_pre->ram_bytes = split_pre->len;
2577 split_pre->flags = flags;
2578 split_pre->compress_type = em->compress_type;
2579 split_pre->generation = em->generation;
2580
2581 replace_extent_mapping(em_tree, em, split_pre, 1);
2582
2583
2584
2585
2586
2587
2588
2589 if (pre) {
2590
2591 split_mid->start = em->start + pre;
2592 split_mid->len = em->len - pre - post;
2593 split_mid->orig_start = split_mid->start;
2594 split_mid->block_start = em->block_start + pre;
2595 split_mid->block_len = split_mid->len;
2596 split_mid->orig_block_len = split_mid->block_len;
2597 split_mid->ram_bytes = split_mid->len;
2598 split_mid->flags = flags;
2599 split_mid->compress_type = em->compress_type;
2600 split_mid->generation = em->generation;
2601 add_extent_mapping(em_tree, split_mid, 1);
2602 }
2603
2604 if (post) {
2605 split_post->start = em->start + em->len - post;
2606 split_post->len = post;
2607 split_post->orig_start = split_post->start;
2608 split_post->block_start = em->block_start + em->len - post;
2609 split_post->block_len = split_post->len;
2610 split_post->orig_block_len = split_post->block_len;
2611 split_post->ram_bytes = split_post->len;
2612 split_post->flags = flags;
2613 split_post->compress_type = em->compress_type;
2614 split_post->generation = em->generation;
2615 add_extent_mapping(em_tree, split_post, 1);
2616 }
2617
2618
2619 free_extent_map(em);
2620
2621 free_extent_map(em);
2622
2623 out_unlock:
2624 write_unlock(&em_tree->lock);
2625 unlock_extent(&inode->io_tree, start, start + len - 1);
2626 out:
2627 free_extent_map(split_pre);
2628 free_extent_map(split_mid);
2629 free_extent_map(split_post);
2630
2631 return ret;
2632 }
2633
2634 static blk_status_t extract_ordered_extent(struct btrfs_inode *inode,
2635 struct bio *bio, loff_t file_offset)
2636 {
2637 struct btrfs_ordered_extent *ordered;
2638 u64 start = (u64)bio->bi_iter.bi_sector << SECTOR_SHIFT;
2639 u64 file_len;
2640 u64 len = bio->bi_iter.bi_size;
2641 u64 end = start + len;
2642 u64 ordered_end;
2643 u64 pre, post;
2644 int ret = 0;
2645
2646 ordered = btrfs_lookup_ordered_extent(inode, file_offset);
2647 if (WARN_ON_ONCE(!ordered))
2648 return BLK_STS_IOERR;
2649
2650
2651 if (ordered->disk_num_bytes == len)
2652 goto out;
2653
2654
2655 if (WARN_ON_ONCE(ordered->bytes_left != ordered->disk_num_bytes)) {
2656 ret = -EINVAL;
2657 goto out;
2658 }
2659
2660
2661 if (WARN_ON_ONCE(ordered->disk_num_bytes != ordered->num_bytes)) {
2662 ret = -EINVAL;
2663 goto out;
2664 }
2665
2666 ordered_end = ordered->disk_bytenr + ordered->disk_num_bytes;
2667
2668 if (WARN_ON_ONCE(start < ordered->disk_bytenr || end > ordered_end)) {
2669 ret = -EINVAL;
2670 goto out;
2671 }
2672
2673
2674 if (WARN_ON_ONCE(!list_empty(&ordered->list))) {
2675 ret = -EINVAL;
2676 goto out;
2677 }
2678
2679 file_len = ordered->num_bytes;
2680 pre = start - ordered->disk_bytenr;
2681 post = ordered_end - end;
2682
2683 ret = btrfs_split_ordered_extent(ordered, pre, post);
2684 if (ret)
2685 goto out;
2686 ret = split_zoned_em(inode, file_offset, file_len, pre, post);
2687
2688 out:
2689 btrfs_put_ordered_extent(ordered);
2690
2691 return errno_to_blk_status(ret);
2692 }
2693
2694 void btrfs_submit_data_write_bio(struct inode *inode, struct bio *bio, int mirror_num)
2695 {
2696 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2697 struct btrfs_inode *bi = BTRFS_I(inode);
2698 blk_status_t ret;
2699
2700 if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
2701 ret = extract_ordered_extent(bi, bio,
2702 page_offset(bio_first_bvec_all(bio)->bv_page));
2703 if (ret)
2704 goto out;
2705 }
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715 if (!(bi->flags & BTRFS_INODE_NODATASUM) &&
2716 !test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state) &&
2717 !btrfs_is_data_reloc_root(bi->root)) {
2718 if (!atomic_read(&bi->sync_writers) &&
2719 btrfs_wq_submit_bio(inode, bio, mirror_num, 0,
2720 btrfs_submit_bio_start))
2721 return;
2722
2723 ret = btrfs_csum_one_bio(bi, bio, (u64)-1, false);
2724 if (ret)
2725 goto out;
2726 }
2727 btrfs_submit_bio(fs_info, bio, mirror_num);
2728 return;
2729 out:
2730 if (ret) {
2731 bio->bi_status = ret;
2732 bio_endio(bio);
2733 }
2734 }
2735
2736 void btrfs_submit_data_read_bio(struct inode *inode, struct bio *bio,
2737 int mirror_num, enum btrfs_compression_type compress_type)
2738 {
2739 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2740 blk_status_t ret;
2741
2742 if (compress_type != BTRFS_COMPRESS_NONE) {
2743
2744
2745
2746
2747 btrfs_submit_compressed_read(inode, bio, mirror_num);
2748 return;
2749 }
2750
2751
2752 btrfs_bio(bio)->iter = bio->bi_iter;
2753
2754
2755
2756
2757
2758 ret = btrfs_lookup_bio_sums(inode, bio, NULL);
2759 if (ret) {
2760 bio->bi_status = ret;
2761 bio_endio(bio);
2762 return;
2763 }
2764
2765 btrfs_submit_bio(fs_info, bio, mirror_num);
2766 }
2767
2768
2769
2770
2771
2772 static int add_pending_csums(struct btrfs_trans_handle *trans,
2773 struct list_head *list)
2774 {
2775 struct btrfs_ordered_sum *sum;
2776 struct btrfs_root *csum_root = NULL;
2777 int ret;
2778
2779 list_for_each_entry(sum, list, list) {
2780 trans->adding_csums = true;
2781 if (!csum_root)
2782 csum_root = btrfs_csum_root(trans->fs_info,
2783 sum->bytenr);
2784 ret = btrfs_csum_file_blocks(trans, csum_root, sum);
2785 trans->adding_csums = false;
2786 if (ret)
2787 return ret;
2788 }
2789 return 0;
2790 }
2791
2792 static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode,
2793 const u64 start,
2794 const u64 len,
2795 struct extent_state **cached_state)
2796 {
2797 u64 search_start = start;
2798 const u64 end = start + len - 1;
2799
2800 while (search_start < end) {
2801 const u64 search_len = end - search_start + 1;
2802 struct extent_map *em;
2803 u64 em_len;
2804 int ret = 0;
2805
2806 em = btrfs_get_extent(inode, NULL, 0, search_start, search_len);
2807 if (IS_ERR(em))
2808 return PTR_ERR(em);
2809
2810 if (em->block_start != EXTENT_MAP_HOLE)
2811 goto next;
2812
2813 em_len = em->len;
2814 if (em->start < search_start)
2815 em_len -= search_start - em->start;
2816 if (em_len > search_len)
2817 em_len = search_len;
2818
2819 ret = set_extent_bit(&inode->io_tree, search_start,
2820 search_start + em_len - 1,
2821 EXTENT_DELALLOC_NEW, 0, NULL, cached_state,
2822 GFP_NOFS, NULL);
2823 next:
2824 search_start = extent_map_end(em);
2825 free_extent_map(em);
2826 if (ret)
2827 return ret;
2828 }
2829 return 0;
2830 }
2831
2832 int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
2833 unsigned int extra_bits,
2834 struct extent_state **cached_state)
2835 {
2836 WARN_ON(PAGE_ALIGNED(end));
2837
2838 if (start >= i_size_read(&inode->vfs_inode) &&
2839 !(inode->flags & BTRFS_INODE_PREALLOC)) {
2840
2841
2842
2843
2844 extra_bits |= EXTENT_DELALLOC_NEW;
2845 } else {
2846 int ret;
2847
2848 ret = btrfs_find_new_delalloc_bytes(inode, start,
2849 end + 1 - start,
2850 cached_state);
2851 if (ret)
2852 return ret;
2853 }
2854
2855 return set_extent_delalloc(&inode->io_tree, start, end, extra_bits,
2856 cached_state);
2857 }
2858
2859
2860 struct btrfs_writepage_fixup {
2861 struct page *page;
2862 struct inode *inode;
2863 struct btrfs_work work;
2864 };
2865
2866 static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
2867 {
2868 struct btrfs_writepage_fixup *fixup;
2869 struct btrfs_ordered_extent *ordered;
2870 struct extent_state *cached_state = NULL;
2871 struct extent_changeset *data_reserved = NULL;
2872 struct page *page;
2873 struct btrfs_inode *inode;
2874 u64 page_start;
2875 u64 page_end;
2876 int ret = 0;
2877 bool free_delalloc_space = true;
2878
2879 fixup = container_of(work, struct btrfs_writepage_fixup, work);
2880 page = fixup->page;
2881 inode = BTRFS_I(fixup->inode);
2882 page_start = page_offset(page);
2883 page_end = page_offset(page) + PAGE_SIZE - 1;
2884
2885
2886
2887
2888
2889 ret = btrfs_delalloc_reserve_space(inode, &data_reserved, page_start,
2890 PAGE_SIZE);
2891 again:
2892 lock_page(page);
2893
2894
2895
2896
2897
2898
2899 if (!page->mapping || !PageDirty(page) || !PageChecked(page)) {
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917 if (!ret) {
2918 btrfs_delalloc_release_extents(inode, PAGE_SIZE);
2919 btrfs_delalloc_release_space(inode, data_reserved,
2920 page_start, PAGE_SIZE,
2921 true);
2922 }
2923 ret = 0;
2924 goto out_page;
2925 }
2926
2927
2928
2929
2930
2931 if (ret)
2932 goto out_page;
2933
2934 lock_extent_bits(&inode->io_tree, page_start, page_end, &cached_state);
2935
2936
2937 if (PageOrdered(page))
2938 goto out_reserved;
2939
2940 ordered = btrfs_lookup_ordered_range(inode, page_start, PAGE_SIZE);
2941 if (ordered) {
2942 unlock_extent_cached(&inode->io_tree, page_start, page_end,
2943 &cached_state);
2944 unlock_page(page);
2945 btrfs_start_ordered_extent(ordered, 1);
2946 btrfs_put_ordered_extent(ordered);
2947 goto again;
2948 }
2949
2950 ret = btrfs_set_extent_delalloc(inode, page_start, page_end, 0,
2951 &cached_state);
2952 if (ret)
2953 goto out_reserved;
2954
2955
2956
2957
2958
2959
2960
2961
2962 BUG_ON(!PageDirty(page));
2963 free_delalloc_space = false;
2964 out_reserved:
2965 btrfs_delalloc_release_extents(inode, PAGE_SIZE);
2966 if (free_delalloc_space)
2967 btrfs_delalloc_release_space(inode, data_reserved, page_start,
2968 PAGE_SIZE, true);
2969 unlock_extent_cached(&inode->io_tree, page_start, page_end,
2970 &cached_state);
2971 out_page:
2972 if (ret) {
2973
2974
2975
2976
2977 mapping_set_error(page->mapping, ret);
2978 end_extent_writepage(page, ret, page_start, page_end);
2979 clear_page_dirty_for_io(page);
2980 SetPageError(page);
2981 }
2982 btrfs_page_clear_checked(inode->root->fs_info, page, page_start, PAGE_SIZE);
2983 unlock_page(page);
2984 put_page(page);
2985 kfree(fixup);
2986 extent_changeset_free(data_reserved);
2987
2988
2989
2990
2991
2992 btrfs_add_delayed_iput(&inode->vfs_inode);
2993 }
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006 int btrfs_writepage_cow_fixup(struct page *page)
3007 {
3008 struct inode *inode = page->mapping->host;
3009 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
3010 struct btrfs_writepage_fixup *fixup;
3011
3012
3013 if (PageOrdered(page))
3014 return 0;
3015
3016
3017
3018
3019
3020
3021
3022
3023 if (PageChecked(page))
3024 return -EAGAIN;
3025
3026 fixup = kzalloc(sizeof(*fixup), GFP_NOFS);
3027 if (!fixup)
3028 return -EAGAIN;
3029
3030
3031
3032
3033
3034
3035
3036 ihold(inode);
3037 btrfs_page_set_checked(fs_info, page, page_offset(page), PAGE_SIZE);
3038 get_page(page);
3039 btrfs_init_work(&fixup->work, btrfs_writepage_fixup_worker, NULL, NULL);
3040 fixup->page = page;
3041 fixup->inode = inode;
3042 btrfs_queue_work(fs_info->fixup_workers, &fixup->work);
3043
3044 return -EAGAIN;
3045 }
3046
3047 static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
3048 struct btrfs_inode *inode, u64 file_pos,
3049 struct btrfs_file_extent_item *stack_fi,
3050 const bool update_inode_bytes,
3051 u64 qgroup_reserved)
3052 {
3053 struct btrfs_root *root = inode->root;
3054 const u64 sectorsize = root->fs_info->sectorsize;
3055 struct btrfs_path *path;
3056 struct extent_buffer *leaf;
3057 struct btrfs_key ins;
3058 u64 disk_num_bytes = btrfs_stack_file_extent_disk_num_bytes(stack_fi);
3059 u64 disk_bytenr = btrfs_stack_file_extent_disk_bytenr(stack_fi);
3060 u64 offset = btrfs_stack_file_extent_offset(stack_fi);
3061 u64 num_bytes = btrfs_stack_file_extent_num_bytes(stack_fi);
3062 u64 ram_bytes = btrfs_stack_file_extent_ram_bytes(stack_fi);
3063 struct btrfs_drop_extents_args drop_args = { 0 };
3064 int ret;
3065
3066 path = btrfs_alloc_path();
3067 if (!path)
3068 return -ENOMEM;
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079 drop_args.path = path;
3080 drop_args.start = file_pos;
3081 drop_args.end = file_pos + num_bytes;
3082 drop_args.replace_extent = true;
3083 drop_args.extent_item_size = sizeof(*stack_fi);
3084 ret = btrfs_drop_extents(trans, root, inode, &drop_args);
3085 if (ret)
3086 goto out;
3087
3088 if (!drop_args.extent_inserted) {
3089 ins.objectid = btrfs_ino(inode);
3090 ins.offset = file_pos;
3091 ins.type = BTRFS_EXTENT_DATA_KEY;
3092
3093 ret = btrfs_insert_empty_item(trans, root, path, &ins,
3094 sizeof(*stack_fi));
3095 if (ret)
3096 goto out;
3097 }
3098 leaf = path->nodes[0];
3099 btrfs_set_stack_file_extent_generation(stack_fi, trans->transid);
3100 write_extent_buffer(leaf, stack_fi,
3101 btrfs_item_ptr_offset(leaf, path->slots[0]),
3102 sizeof(struct btrfs_file_extent_item));
3103
3104 btrfs_mark_buffer_dirty(leaf);
3105 btrfs_release_path(path);
3106
3107
3108
3109
3110
3111
3112
3113
3114 if (file_pos == 0 && !IS_ALIGNED(drop_args.bytes_found, sectorsize)) {
3115 u64 inline_size = round_down(drop_args.bytes_found, sectorsize);
3116
3117 inline_size = drop_args.bytes_found - inline_size;
3118 btrfs_update_inode_bytes(inode, sectorsize, inline_size);
3119 drop_args.bytes_found -= inline_size;
3120 num_bytes -= sectorsize;
3121 }
3122
3123 if (update_inode_bytes)
3124 btrfs_update_inode_bytes(inode, num_bytes, drop_args.bytes_found);
3125
3126 ins.objectid = disk_bytenr;
3127 ins.offset = disk_num_bytes;
3128 ins.type = BTRFS_EXTENT_ITEM_KEY;
3129
3130 ret = btrfs_inode_set_file_extent_range(inode, file_pos, ram_bytes);
3131 if (ret)
3132 goto out;
3133
3134 ret = btrfs_alloc_reserved_file_extent(trans, root, btrfs_ino(inode),
3135 file_pos - offset,
3136 qgroup_reserved, &ins);
3137 out:
3138 btrfs_free_path(path);
3139
3140 return ret;
3141 }
3142
3143 static void btrfs_release_delalloc_bytes(struct btrfs_fs_info *fs_info,
3144 u64 start, u64 len)
3145 {
3146 struct btrfs_block_group *cache;
3147
3148 cache = btrfs_lookup_block_group(fs_info, start);
3149 ASSERT(cache);
3150
3151 spin_lock(&cache->lock);
3152 cache->delalloc_bytes -= len;
3153 spin_unlock(&cache->lock);
3154
3155 btrfs_put_block_group(cache);
3156 }
3157
3158 static int insert_ordered_extent_file_extent(struct btrfs_trans_handle *trans,
3159 struct btrfs_ordered_extent *oe)
3160 {
3161 struct btrfs_file_extent_item stack_fi;
3162 bool update_inode_bytes;
3163 u64 num_bytes = oe->num_bytes;
3164 u64 ram_bytes = oe->ram_bytes;
3165
3166 memset(&stack_fi, 0, sizeof(stack_fi));
3167 btrfs_set_stack_file_extent_type(&stack_fi, BTRFS_FILE_EXTENT_REG);
3168 btrfs_set_stack_file_extent_disk_bytenr(&stack_fi, oe->disk_bytenr);
3169 btrfs_set_stack_file_extent_disk_num_bytes(&stack_fi,
3170 oe->disk_num_bytes);
3171 btrfs_set_stack_file_extent_offset(&stack_fi, oe->offset);
3172 if (test_bit(BTRFS_ORDERED_TRUNCATED, &oe->flags)) {
3173 num_bytes = oe->truncated_len;
3174 ram_bytes = num_bytes;
3175 }
3176 btrfs_set_stack_file_extent_num_bytes(&stack_fi, num_bytes);
3177 btrfs_set_stack_file_extent_ram_bytes(&stack_fi, ram_bytes);
3178 btrfs_set_stack_file_extent_compression(&stack_fi, oe->compress_type);
3179
3180
3181
3182
3183
3184
3185
3186
3187 update_inode_bytes = test_bit(BTRFS_ORDERED_DIRECT, &oe->flags) ||
3188 test_bit(BTRFS_ORDERED_ENCODED, &oe->flags) ||
3189 test_bit(BTRFS_ORDERED_TRUNCATED, &oe->flags);
3190
3191 return insert_reserved_file_extent(trans, BTRFS_I(oe->inode),
3192 oe->file_offset, &stack_fi,
3193 update_inode_bytes, oe->qgroup_rsv);
3194 }
3195
3196
3197
3198
3199
3200
3201 int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
3202 {
3203 struct btrfs_inode *inode = BTRFS_I(ordered_extent->inode);
3204 struct btrfs_root *root = inode->root;
3205 struct btrfs_fs_info *fs_info = root->fs_info;
3206 struct btrfs_trans_handle *trans = NULL;
3207 struct extent_io_tree *io_tree = &inode->io_tree;
3208 struct extent_state *cached_state = NULL;
3209 u64 start, end;
3210 int compress_type = 0;
3211 int ret = 0;
3212 u64 logical_len = ordered_extent->num_bytes;
3213 bool freespace_inode;
3214 bool truncated = false;
3215 bool clear_reserved_extent = true;
3216 unsigned int clear_bits = EXTENT_DEFRAG;
3217
3218 start = ordered_extent->file_offset;
3219 end = start + ordered_extent->num_bytes - 1;
3220
3221 if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
3222 !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags) &&
3223 !test_bit(BTRFS_ORDERED_DIRECT, &ordered_extent->flags) &&
3224 !test_bit(BTRFS_ORDERED_ENCODED, &ordered_extent->flags))
3225 clear_bits |= EXTENT_DELALLOC_NEW;
3226
3227 freespace_inode = btrfs_is_free_space_inode(inode);
3228
3229 if (test_bit(BTRFS_ORDERED_IOERR, &ordered_extent->flags)) {
3230 ret = -EIO;
3231 goto out;
3232 }
3233
3234
3235 if (ordered_extent->bdev) {
3236 btrfs_rewrite_logical_zoned(ordered_extent);
3237 btrfs_zone_finish_endio(fs_info, ordered_extent->disk_bytenr,
3238 ordered_extent->disk_num_bytes);
3239 }
3240
3241 btrfs_free_io_failure_record(inode, start, end);
3242
3243 if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) {
3244 truncated = true;
3245 logical_len = ordered_extent->truncated_len;
3246
3247 if (!logical_len)
3248 goto out;
3249 }
3250
3251 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
3252 BUG_ON(!list_empty(&ordered_extent->list));
3253
3254 btrfs_inode_safe_disk_i_size_write(inode, 0);
3255 if (freespace_inode)
3256 trans = btrfs_join_transaction_spacecache(root);
3257 else
3258 trans = btrfs_join_transaction(root);
3259 if (IS_ERR(trans)) {
3260 ret = PTR_ERR(trans);
3261 trans = NULL;
3262 goto out;
3263 }
3264 trans->block_rsv = &inode->block_rsv;
3265 ret = btrfs_update_inode_fallback(trans, root, inode);
3266 if (ret)
3267 btrfs_abort_transaction(trans, ret);
3268 goto out;
3269 }
3270
3271 clear_bits |= EXTENT_LOCKED;
3272 lock_extent_bits(io_tree, start, end, &cached_state);
3273
3274 if (freespace_inode)
3275 trans = btrfs_join_transaction_spacecache(root);
3276 else
3277 trans = btrfs_join_transaction(root);
3278 if (IS_ERR(trans)) {
3279 ret = PTR_ERR(trans);
3280 trans = NULL;
3281 goto out;
3282 }
3283
3284 trans->block_rsv = &inode->block_rsv;
3285
3286 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
3287 compress_type = ordered_extent->compress_type;
3288 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
3289 BUG_ON(compress_type);
3290 ret = btrfs_mark_extent_written(trans, inode,
3291 ordered_extent->file_offset,
3292 ordered_extent->file_offset +
3293 logical_len);
3294 btrfs_zoned_release_data_reloc_bg(fs_info, ordered_extent->disk_bytenr,
3295 ordered_extent->disk_num_bytes);
3296 } else {
3297 BUG_ON(root == fs_info->tree_root);
3298 ret = insert_ordered_extent_file_extent(trans, ordered_extent);
3299 if (!ret) {
3300 clear_reserved_extent = false;
3301 btrfs_release_delalloc_bytes(fs_info,
3302 ordered_extent->disk_bytenr,
3303 ordered_extent->disk_num_bytes);
3304 }
3305 }
3306 unpin_extent_cache(&inode->extent_tree, ordered_extent->file_offset,
3307 ordered_extent->num_bytes, trans->transid);
3308 if (ret < 0) {
3309 btrfs_abort_transaction(trans, ret);
3310 goto out;
3311 }
3312
3313 ret = add_pending_csums(trans, &ordered_extent->list);
3314 if (ret) {
3315 btrfs_abort_transaction(trans, ret);
3316 goto out;
3317 }
3318
3319
3320
3321
3322
3323
3324 if ((clear_bits & EXTENT_DELALLOC_NEW) &&
3325 !test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags))
3326 clear_extent_bit(&inode->io_tree, start, end,
3327 EXTENT_DELALLOC_NEW | EXTENT_ADD_INODE_BYTES,
3328 0, 0, &cached_state);
3329
3330 btrfs_inode_safe_disk_i_size_write(inode, 0);
3331 ret = btrfs_update_inode_fallback(trans, root, inode);
3332 if (ret) {
3333 btrfs_abort_transaction(trans, ret);
3334 goto out;
3335 }
3336 ret = 0;
3337 out:
3338 clear_extent_bit(&inode->io_tree, start, end, clear_bits,
3339 (clear_bits & EXTENT_LOCKED) ? 1 : 0, 0,
3340 &cached_state);
3341
3342 if (trans)
3343 btrfs_end_transaction(trans);
3344
3345 if (ret || truncated) {
3346 u64 unwritten_start = start;
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356 if (ret && !test_and_set_bit(BTRFS_ORDERED_IOERR,
3357 &ordered_extent->flags))
3358 mapping_set_error(ordered_extent->inode->i_mapping, -EIO);
3359
3360 if (truncated)
3361 unwritten_start += logical_len;
3362 clear_extent_uptodate(io_tree, unwritten_start, end, NULL);
3363
3364
3365 btrfs_drop_extent_cache(inode, unwritten_start, end, 0);
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377 if ((ret || !logical_len) &&
3378 clear_reserved_extent &&
3379 !test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
3380 !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
3381
3382
3383
3384
3385 if (ret && btrfs_test_opt(fs_info, DISCARD_SYNC))
3386 btrfs_discard_extent(fs_info,
3387 ordered_extent->disk_bytenr,
3388 ordered_extent->disk_num_bytes,
3389 NULL);
3390 btrfs_free_reserved_extent(fs_info,
3391 ordered_extent->disk_bytenr,
3392 ordered_extent->disk_num_bytes, 1);
3393 }
3394 }
3395
3396
3397
3398
3399
3400 btrfs_remove_ordered_extent(inode, ordered_extent);
3401
3402
3403 btrfs_put_ordered_extent(ordered_extent);
3404
3405 btrfs_put_ordered_extent(ordered_extent);
3406
3407 return ret;
3408 }
3409
3410 void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode,
3411 struct page *page, u64 start,
3412 u64 end, bool uptodate)
3413 {
3414 trace_btrfs_writepage_end_io_hook(inode, start, end, uptodate);
3415
3416 btrfs_mark_ordered_io_finished(inode, page, start, end + 1 - start, uptodate);
3417 }
3418
3419
3420
3421
3422
3423 int btrfs_check_sector_csum(struct btrfs_fs_info *fs_info, struct page *page,
3424 u32 pgoff, u8 *csum, const u8 * const csum_expected)
3425 {
3426 SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
3427 char *kaddr;
3428
3429 ASSERT(pgoff + fs_info->sectorsize <= PAGE_SIZE);
3430
3431 shash->tfm = fs_info->csum_shash;
3432
3433 kaddr = kmap_local_page(page) + pgoff;
3434 crypto_shash_digest(shash, kaddr, fs_info->sectorsize, csum);
3435 kunmap_local(kaddr);
3436
3437 if (memcmp(csum, csum_expected, fs_info->csum_size))
3438 return -EIO;
3439 return 0;
3440 }
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455 int btrfs_check_data_csum(struct inode *inode, struct btrfs_bio *bbio,
3456 u32 bio_offset, struct page *page, u32 pgoff)
3457 {
3458 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
3459 u32 len = fs_info->sectorsize;
3460 u8 *csum_expected;
3461 u8 csum[BTRFS_CSUM_SIZE];
3462
3463 ASSERT(pgoff + len <= PAGE_SIZE);
3464
3465 csum_expected = btrfs_csum_ptr(fs_info, bbio->csum, bio_offset);
3466
3467 if (btrfs_check_sector_csum(fs_info, page, pgoff, csum, csum_expected))
3468 goto zeroit;
3469 return 0;
3470
3471 zeroit:
3472 btrfs_print_data_csum_error(BTRFS_I(inode),
3473 bbio->file_offset + bio_offset,
3474 csum, csum_expected, bbio->mirror_num);
3475 if (bbio->device)
3476 btrfs_dev_stat_inc_and_print(bbio->device,
3477 BTRFS_DEV_STAT_CORRUPTION_ERRS);
3478 memzero_page(page, pgoff, len);
3479 return -EIO;
3480 }
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494 unsigned int btrfs_verify_data_csum(struct btrfs_bio *bbio,
3495 u32 bio_offset, struct page *page,
3496 u64 start, u64 end)
3497 {
3498 struct inode *inode = page->mapping->host;
3499 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
3500 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
3501 struct btrfs_root *root = BTRFS_I(inode)->root;
3502 const u32 sectorsize = root->fs_info->sectorsize;
3503 u32 pg_off;
3504 unsigned int result = 0;
3505
3506
3507
3508
3509
3510
3511 if (bbio->csum == NULL)
3512 return 0;
3513
3514 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
3515 return 0;
3516
3517 if (unlikely(test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state)))
3518 return 0;
3519
3520 ASSERT(page_offset(page) <= start &&
3521 end <= page_offset(page) + PAGE_SIZE - 1);
3522 for (pg_off = offset_in_page(start);
3523 pg_off < offset_in_page(end);
3524 pg_off += sectorsize, bio_offset += sectorsize) {
3525 u64 file_offset = pg_off + page_offset(page);
3526 int ret;
3527
3528 if (btrfs_is_data_reloc_root(root) &&
3529 test_range_bit(io_tree, file_offset,
3530 file_offset + sectorsize - 1,
3531 EXTENT_NODATASUM, 1, NULL)) {
3532
3533 clear_extent_bits(io_tree, file_offset,
3534 file_offset + sectorsize - 1,
3535 EXTENT_NODATASUM);
3536 continue;
3537 }
3538 ret = btrfs_check_data_csum(inode, bbio, bio_offset, page, pg_off);
3539 if (ret < 0) {
3540 const int nr_bit = (pg_off - offset_in_page(start)) >>
3541 root->fs_info->sectorsize_bits;
3542
3543 result |= (1U << nr_bit);
3544 }
3545 }
3546 return result;
3547 }
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559 void btrfs_add_delayed_iput(struct inode *inode)
3560 {
3561 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
3562 struct btrfs_inode *binode = BTRFS_I(inode);
3563
3564 if (atomic_add_unless(&inode->i_count, -1, 1))
3565 return;
3566
3567 atomic_inc(&fs_info->nr_delayed_iputs);
3568 spin_lock(&fs_info->delayed_iput_lock);
3569 ASSERT(list_empty(&binode->delayed_iput));
3570 list_add_tail(&binode->delayed_iput, &fs_info->delayed_iputs);
3571 spin_unlock(&fs_info->delayed_iput_lock);
3572 if (!test_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags))
3573 wake_up_process(fs_info->cleaner_kthread);
3574 }
3575
3576 static void run_delayed_iput_locked(struct btrfs_fs_info *fs_info,
3577 struct btrfs_inode *inode)
3578 {
3579 list_del_init(&inode->delayed_iput);
3580 spin_unlock(&fs_info->delayed_iput_lock);
3581 iput(&inode->vfs_inode);
3582 if (atomic_dec_and_test(&fs_info->nr_delayed_iputs))
3583 wake_up(&fs_info->delayed_iputs_wait);
3584 spin_lock(&fs_info->delayed_iput_lock);
3585 }
3586
3587 static void btrfs_run_delayed_iput(struct btrfs_fs_info *fs_info,
3588 struct btrfs_inode *inode)
3589 {
3590 if (!list_empty(&inode->delayed_iput)) {
3591 spin_lock(&fs_info->delayed_iput_lock);
3592 if (!list_empty(&inode->delayed_iput))
3593 run_delayed_iput_locked(fs_info, inode);
3594 spin_unlock(&fs_info->delayed_iput_lock);
3595 }
3596 }
3597
3598 void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info)
3599 {
3600
3601 spin_lock(&fs_info->delayed_iput_lock);
3602 while (!list_empty(&fs_info->delayed_iputs)) {
3603 struct btrfs_inode *inode;
3604
3605 inode = list_first_entry(&fs_info->delayed_iputs,
3606 struct btrfs_inode, delayed_iput);
3607 run_delayed_iput_locked(fs_info, inode);
3608 cond_resched_lock(&fs_info->delayed_iput_lock);
3609 }
3610 spin_unlock(&fs_info->delayed_iput_lock);
3611 }
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625 int btrfs_wait_on_delayed_iputs(struct btrfs_fs_info *fs_info)
3626 {
3627 int ret = wait_event_killable(fs_info->delayed_iputs_wait,
3628 atomic_read(&fs_info->nr_delayed_iputs) == 0);
3629 if (ret)
3630 return -EINTR;
3631 return 0;
3632 }
3633
3634
3635
3636
3637
3638 int btrfs_orphan_add(struct btrfs_trans_handle *trans,
3639 struct btrfs_inode *inode)
3640 {
3641 int ret;
3642
3643 ret = btrfs_insert_orphan_item(trans, inode->root, btrfs_ino(inode));
3644 if (ret && ret != -EEXIST) {
3645 btrfs_abort_transaction(trans, ret);
3646 return ret;
3647 }
3648
3649 return 0;
3650 }
3651
3652
3653
3654
3655
3656 static int btrfs_orphan_del(struct btrfs_trans_handle *trans,
3657 struct btrfs_inode *inode)
3658 {
3659 return btrfs_del_orphan_item(trans, inode->root, btrfs_ino(inode));
3660 }
3661
3662
3663
3664
3665
3666 int btrfs_orphan_cleanup(struct btrfs_root *root)
3667 {
3668 struct btrfs_fs_info *fs_info = root->fs_info;
3669 struct btrfs_path *path;
3670 struct extent_buffer *leaf;
3671 struct btrfs_key key, found_key;
3672 struct btrfs_trans_handle *trans;
3673 struct inode *inode;
3674 u64 last_objectid = 0;
3675 int ret = 0, nr_unlink = 0;
3676
3677 if (test_and_set_bit(BTRFS_ROOT_ORPHAN_CLEANUP, &root->state))
3678 return 0;
3679
3680 path = btrfs_alloc_path();
3681 if (!path) {
3682 ret = -ENOMEM;
3683 goto out;
3684 }
3685 path->reada = READA_BACK;
3686
3687 key.objectid = BTRFS_ORPHAN_OBJECTID;
3688 key.type = BTRFS_ORPHAN_ITEM_KEY;
3689 key.offset = (u64)-1;
3690
3691 while (1) {
3692 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3693 if (ret < 0)
3694 goto out;
3695
3696
3697
3698
3699
3700
3701 if (ret > 0) {
3702 ret = 0;
3703 if (path->slots[0] == 0)
3704 break;
3705 path->slots[0]--;
3706 }
3707
3708
3709 leaf = path->nodes[0];
3710 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3711
3712
3713 if (found_key.objectid != BTRFS_ORPHAN_OBJECTID)
3714 break;
3715 if (found_key.type != BTRFS_ORPHAN_ITEM_KEY)
3716 break;
3717
3718
3719 btrfs_release_path(path);
3720
3721
3722
3723
3724
3725
3726
3727 if (found_key.offset == last_objectid) {
3728 btrfs_err(fs_info,
3729 "Error removing orphan entry, stopping orphan cleanup");
3730 ret = -EINVAL;
3731 goto out;
3732 }
3733
3734 last_objectid = found_key.offset;
3735
3736 found_key.objectid = found_key.offset;
3737 found_key.type = BTRFS_INODE_ITEM_KEY;
3738 found_key.offset = 0;
3739 inode = btrfs_iget(fs_info->sb, last_objectid, root);
3740 ret = PTR_ERR_OR_ZERO(inode);
3741 if (ret && ret != -ENOENT)
3742 goto out;
3743
3744 if (ret == -ENOENT && root == fs_info->tree_root) {
3745 struct btrfs_root *dead_root;
3746 int is_dead_root = 0;
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764 spin_lock(&fs_info->fs_roots_radix_lock);
3765 dead_root = radix_tree_lookup(&fs_info->fs_roots_radix,
3766 (unsigned long)found_key.objectid);
3767 if (dead_root && btrfs_root_refs(&dead_root->root_item) == 0)
3768 is_dead_root = 1;
3769 spin_unlock(&fs_info->fs_roots_radix_lock);
3770
3771 if (is_dead_root) {
3772
3773 key.offset = found_key.objectid - 1;
3774 continue;
3775 }
3776
3777 }
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805 if (ret == -ENOENT || inode->i_nlink) {
3806 if (!ret) {
3807 ret = btrfs_drop_verity_items(BTRFS_I(inode));
3808 iput(inode);
3809 if (ret)
3810 goto out;
3811 }
3812 trans = btrfs_start_transaction(root, 1);
3813 if (IS_ERR(trans)) {
3814 ret = PTR_ERR(trans);
3815 goto out;
3816 }
3817 btrfs_debug(fs_info, "auto deleting %Lu",
3818 found_key.objectid);
3819 ret = btrfs_del_orphan_item(trans, root,
3820 found_key.objectid);
3821 btrfs_end_transaction(trans);
3822 if (ret)
3823 goto out;
3824 continue;
3825 }
3826
3827 nr_unlink++;
3828
3829
3830 iput(inode);
3831 }
3832
3833 btrfs_release_path(path);
3834
3835 if (test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state)) {
3836 trans = btrfs_join_transaction(root);
3837 if (!IS_ERR(trans))
3838 btrfs_end_transaction(trans);
3839 }
3840
3841 if (nr_unlink)
3842 btrfs_debug(fs_info, "unlinked %d orphans", nr_unlink);
3843
3844 out:
3845 if (ret)
3846 btrfs_err(fs_info, "could not do orphan cleanup %d", ret);
3847 btrfs_free_path(path);
3848 return ret;
3849 }
3850
3851
3852
3853
3854
3855
3856
3857 static noinline int acls_after_inode_item(struct extent_buffer *leaf,
3858 int slot, u64 objectid,
3859 int *first_xattr_slot)
3860 {
3861 u32 nritems = btrfs_header_nritems(leaf);
3862 struct btrfs_key found_key;
3863 static u64 xattr_access = 0;
3864 static u64 xattr_default = 0;
3865 int scanned = 0;
3866
3867 if (!xattr_access) {
3868 xattr_access = btrfs_name_hash(XATTR_NAME_POSIX_ACL_ACCESS,
3869 strlen(XATTR_NAME_POSIX_ACL_ACCESS));
3870 xattr_default = btrfs_name_hash(XATTR_NAME_POSIX_ACL_DEFAULT,
3871 strlen(XATTR_NAME_POSIX_ACL_DEFAULT));
3872 }
3873
3874 slot++;
3875 *first_xattr_slot = -1;
3876 while (slot < nritems) {
3877 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3878
3879
3880 if (found_key.objectid != objectid)
3881 return 0;
3882
3883
3884 if (found_key.type == BTRFS_XATTR_ITEM_KEY) {
3885 if (*first_xattr_slot == -1)
3886 *first_xattr_slot = slot;
3887 if (found_key.offset == xattr_access ||
3888 found_key.offset == xattr_default)
3889 return 1;
3890 }
3891
3892
3893
3894
3895
3896 if (found_key.type > BTRFS_XATTR_ITEM_KEY)
3897 return 0;
3898
3899 slot++;
3900 scanned++;
3901
3902
3903
3904
3905
3906
3907
3908 if (scanned >= 8)
3909 break;
3910 }
3911
3912
3913
3914
3915 if (*first_xattr_slot == -1)
3916 *first_xattr_slot = slot;
3917 return 1;
3918 }
3919
3920
3921
3922
3923 static int btrfs_read_locked_inode(struct inode *inode,
3924 struct btrfs_path *in_path)
3925 {
3926 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
3927 struct btrfs_path *path = in_path;
3928 struct extent_buffer *leaf;
3929 struct btrfs_inode_item *inode_item;
3930 struct btrfs_root *root = BTRFS_I(inode)->root;
3931 struct btrfs_key location;
3932 unsigned long ptr;
3933 int maybe_acls;
3934 u32 rdev;
3935 int ret;
3936 bool filled = false;
3937 int first_xattr_slot;
3938
3939 ret = btrfs_fill_inode(inode, &rdev);
3940 if (!ret)
3941 filled = true;
3942
3943 if (!path) {
3944 path = btrfs_alloc_path();
3945 if (!path)
3946 return -ENOMEM;
3947 }
3948
3949 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
3950
3951 ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
3952 if (ret) {
3953 if (path != in_path)
3954 btrfs_free_path(path);
3955 return ret;
3956 }
3957
3958 leaf = path->nodes[0];
3959
3960 if (filled)
3961 goto cache_index;
3962
3963 inode_item = btrfs_item_ptr(leaf, path->slots[0],
3964 struct btrfs_inode_item);
3965 inode->i_mode = btrfs_inode_mode(leaf, inode_item);
3966 set_nlink(inode, btrfs_inode_nlink(leaf, inode_item));
3967 i_uid_write(inode, btrfs_inode_uid(leaf, inode_item));
3968 i_gid_write(inode, btrfs_inode_gid(leaf, inode_item));
3969 btrfs_i_size_write(BTRFS_I(inode), btrfs_inode_size(leaf, inode_item));
3970 btrfs_inode_set_file_extent_range(BTRFS_I(inode), 0,
3971 round_up(i_size_read(inode), fs_info->sectorsize));
3972
3973 inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->atime);
3974 inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->atime);
3975
3976 inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->mtime);
3977 inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->mtime);
3978
3979 inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->ctime);
3980 inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->ctime);
3981
3982 BTRFS_I(inode)->i_otime.tv_sec =
3983 btrfs_timespec_sec(leaf, &inode_item->otime);
3984 BTRFS_I(inode)->i_otime.tv_nsec =
3985 btrfs_timespec_nsec(leaf, &inode_item->otime);
3986
3987 inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item));
3988 BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item);
3989 BTRFS_I(inode)->last_trans = btrfs_inode_transid(leaf, inode_item);
3990
3991 inode_set_iversion_queried(inode,
3992 btrfs_inode_sequence(leaf, inode_item));
3993 inode->i_generation = BTRFS_I(inode)->generation;
3994 inode->i_rdev = 0;
3995 rdev = btrfs_inode_rdev(leaf, inode_item);
3996
3997 BTRFS_I(inode)->index_cnt = (u64)-1;
3998 btrfs_inode_split_flags(btrfs_inode_flags(leaf, inode_item),
3999 &BTRFS_I(inode)->flags, &BTRFS_I(inode)->ro_flags);
4000
4001 cache_index:
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011 if (BTRFS_I(inode)->last_trans == fs_info->generation)
4012 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
4013 &BTRFS_I(inode)->runtime_flags);
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042 BTRFS_I(inode)->last_unlink_trans = BTRFS_I(inode)->last_trans;
4043
4044
4045
4046
4047
4048
4049
4050 BTRFS_I(inode)->last_reflink_trans = BTRFS_I(inode)->last_trans;
4051
4052 path->slots[0]++;
4053 if (inode->i_nlink != 1 ||
4054 path->slots[0] >= btrfs_header_nritems(leaf))
4055 goto cache_acl;
4056
4057 btrfs_item_key_to_cpu(leaf, &location, path->slots[0]);
4058 if (location.objectid != btrfs_ino(BTRFS_I(inode)))
4059 goto cache_acl;
4060
4061 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
4062 if (location.type == BTRFS_INODE_REF_KEY) {
4063 struct btrfs_inode_ref *ref;
4064
4065 ref = (struct btrfs_inode_ref *)ptr;
4066 BTRFS_I(inode)->dir_index = btrfs_inode_ref_index(leaf, ref);
4067 } else if (location.type == BTRFS_INODE_EXTREF_KEY) {
4068 struct btrfs_inode_extref *extref;
4069
4070 extref = (struct btrfs_inode_extref *)ptr;
4071 BTRFS_I(inode)->dir_index = btrfs_inode_extref_index(leaf,
4072 extref);
4073 }
4074 cache_acl:
4075
4076
4077
4078
4079 maybe_acls = acls_after_inode_item(leaf, path->slots[0],
4080 btrfs_ino(BTRFS_I(inode)), &first_xattr_slot);
4081 if (first_xattr_slot != -1) {
4082 path->slots[0] = first_xattr_slot;
4083 ret = btrfs_load_inode_props(inode, path);
4084 if (ret)
4085 btrfs_err(fs_info,
4086 "error loading props for ino %llu (root %llu): %d",
4087 btrfs_ino(BTRFS_I(inode)),
4088 root->root_key.objectid, ret);
4089 }
4090 if (path != in_path)
4091 btrfs_free_path(path);
4092
4093 if (!maybe_acls)
4094 cache_no_acl(inode);
4095
4096 switch (inode->i_mode & S_IFMT) {
4097 case S_IFREG:
4098 inode->i_mapping->a_ops = &btrfs_aops;
4099 inode->i_fop = &btrfs_file_operations;
4100 inode->i_op = &btrfs_file_inode_operations;
4101 break;
4102 case S_IFDIR:
4103 inode->i_fop = &btrfs_dir_file_operations;
4104 inode->i_op = &btrfs_dir_inode_operations;
4105 break;
4106 case S_IFLNK:
4107 inode->i_op = &btrfs_symlink_inode_operations;
4108 inode_nohighmem(inode);
4109 inode->i_mapping->a_ops = &btrfs_aops;
4110 break;
4111 default:
4112 inode->i_op = &btrfs_special_inode_operations;
4113 init_special_inode(inode, inode->i_mode, rdev);
4114 break;
4115 }
4116
4117 btrfs_sync_inode_flags_to_i_flags(inode);
4118 return 0;
4119 }
4120
4121
4122
4123
4124 static void fill_inode_item(struct btrfs_trans_handle *trans,
4125 struct extent_buffer *leaf,
4126 struct btrfs_inode_item *item,
4127 struct inode *inode)
4128 {
4129 struct btrfs_map_token token;
4130 u64 flags;
4131
4132 btrfs_init_map_token(&token, leaf);
4133
4134 btrfs_set_token_inode_uid(&token, item, i_uid_read(inode));
4135 btrfs_set_token_inode_gid(&token, item, i_gid_read(inode));
4136 btrfs_set_token_inode_size(&token, item, BTRFS_I(inode)->disk_i_size);
4137 btrfs_set_token_inode_mode(&token, item, inode->i_mode);
4138 btrfs_set_token_inode_nlink(&token, item, inode->i_nlink);
4139
4140 btrfs_set_token_timespec_sec(&token, &item->atime,
4141 inode->i_atime.tv_sec);
4142 btrfs_set_token_timespec_nsec(&token, &item->atime,
4143 inode->i_atime.tv_nsec);
4144
4145 btrfs_set_token_timespec_sec(&token, &item->mtime,
4146 inode->i_mtime.tv_sec);
4147 btrfs_set_token_timespec_nsec(&token, &item->mtime,
4148 inode->i_mtime.tv_nsec);
4149
4150 btrfs_set_token_timespec_sec(&token, &item->ctime,
4151 inode->i_ctime.tv_sec);
4152 btrfs_set_token_timespec_nsec(&token, &item->ctime,
4153 inode->i_ctime.tv_nsec);
4154
4155 btrfs_set_token_timespec_sec(&token, &item->otime,
4156 BTRFS_I(inode)->i_otime.tv_sec);
4157 btrfs_set_token_timespec_nsec(&token, &item->otime,
4158 BTRFS_I(inode)->i_otime.tv_nsec);
4159
4160 btrfs_set_token_inode_nbytes(&token, item, inode_get_bytes(inode));
4161 btrfs_set_token_inode_generation(&token, item,
4162 BTRFS_I(inode)->generation);
4163 btrfs_set_token_inode_sequence(&token, item, inode_peek_iversion(inode));
4164 btrfs_set_token_inode_transid(&token, item, trans->transid);
4165 btrfs_set_token_inode_rdev(&token, item, inode->i_rdev);
4166 flags = btrfs_inode_combine_flags(BTRFS_I(inode)->flags,
4167 BTRFS_I(inode)->ro_flags);
4168 btrfs_set_token_inode_flags(&token, item, flags);
4169 btrfs_set_token_inode_block_group(&token, item, 0);
4170 }
4171
4172
4173
4174
4175 static noinline int btrfs_update_inode_item(struct btrfs_trans_handle *trans,
4176 struct btrfs_root *root,
4177 struct btrfs_inode *inode)
4178 {
4179 struct btrfs_inode_item *inode_item;
4180 struct btrfs_path *path;
4181 struct extent_buffer *leaf;
4182 int ret;
4183
4184 path = btrfs_alloc_path();
4185 if (!path)
4186 return -ENOMEM;
4187
4188 ret = btrfs_lookup_inode(trans, root, path, &inode->location, 1);
4189 if (ret) {
4190 if (ret > 0)
4191 ret = -ENOENT;
4192 goto failed;
4193 }
4194
4195 leaf = path->nodes[0];
4196 inode_item = btrfs_item_ptr(leaf, path->slots[0],
4197 struct btrfs_inode_item);
4198
4199 fill_inode_item(trans, leaf, inode_item, &inode->vfs_inode);
4200 btrfs_mark_buffer_dirty(leaf);
4201 btrfs_set_inode_last_trans(trans, inode);
4202 ret = 0;
4203 failed:
4204 btrfs_free_path(path);
4205 return ret;
4206 }
4207
4208
4209
4210
4211 noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
4212 struct btrfs_root *root,
4213 struct btrfs_inode *inode)
4214 {
4215 struct btrfs_fs_info *fs_info = root->fs_info;
4216 int ret;
4217
4218
4219
4220
4221
4222
4223
4224
4225 if (!btrfs_is_free_space_inode(inode)
4226 && !btrfs_is_data_reloc_root(root)
4227 && !test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) {
4228 btrfs_update_root_times(trans, root);
4229
4230 ret = btrfs_delayed_update_inode(trans, root, inode);
4231 if (!ret)
4232 btrfs_set_inode_last_trans(trans, inode);
4233 return ret;
4234 }
4235
4236 return btrfs_update_inode_item(trans, root, inode);
4237 }
4238
4239 int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
4240 struct btrfs_root *root, struct btrfs_inode *inode)
4241 {
4242 int ret;
4243
4244 ret = btrfs_update_inode(trans, root, inode);
4245 if (ret == -ENOSPC)
4246 return btrfs_update_inode_item(trans, root, inode);
4247 return ret;
4248 }
4249
4250
4251
4252
4253
4254
4255 static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
4256 struct btrfs_inode *dir,
4257 struct btrfs_inode *inode,
4258 const char *name, int name_len,
4259 struct btrfs_rename_ctx *rename_ctx)
4260 {
4261 struct btrfs_root *root = dir->root;
4262 struct btrfs_fs_info *fs_info = root->fs_info;
4263 struct btrfs_path *path;
4264 int ret = 0;
4265 struct btrfs_dir_item *di;
4266 u64 index;
4267 u64 ino = btrfs_ino(inode);
4268 u64 dir_ino = btrfs_ino(dir);
4269
4270 path = btrfs_alloc_path();
4271 if (!path) {
4272 ret = -ENOMEM;
4273 goto out;
4274 }
4275
4276 di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
4277 name, name_len, -1);
4278 if (IS_ERR_OR_NULL(di)) {
4279 ret = di ? PTR_ERR(di) : -ENOENT;
4280 goto err;
4281 }
4282 ret = btrfs_delete_one_dir_name(trans, root, path, di);
4283 if (ret)
4284 goto err;
4285 btrfs_release_path(path);
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297 if (inode->dir_index) {
4298 ret = btrfs_delayed_delete_inode_ref(inode);
4299 if (!ret) {
4300 index = inode->dir_index;
4301 goto skip_backref;
4302 }
4303 }
4304
4305 ret = btrfs_del_inode_ref(trans, root, name, name_len, ino,
4306 dir_ino, &index);
4307 if (ret) {
4308 btrfs_info(fs_info,
4309 "failed to delete reference to %.*s, inode %llu parent %llu",
4310 name_len, name, ino, dir_ino);
4311 btrfs_abort_transaction(trans, ret);
4312 goto err;
4313 }
4314 skip_backref:
4315 if (rename_ctx)
4316 rename_ctx->index = index;
4317
4318 ret = btrfs_delete_delayed_dir_index(trans, dir, index);
4319 if (ret) {
4320 btrfs_abort_transaction(trans, ret);
4321 goto err;
4322 }
4323
4324
4325
4326
4327
4328
4329
4330 if (!rename_ctx) {
4331 btrfs_del_inode_ref_in_log(trans, root, name, name_len, inode,
4332 dir_ino);
4333 btrfs_del_dir_entries_in_log(trans, root, name, name_len, dir,
4334 index);
4335 }
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346 btrfs_run_delayed_iput(fs_info, inode);
4347 err:
4348 btrfs_free_path(path);
4349 if (ret)
4350 goto out;
4351
4352 btrfs_i_size_write(dir, dir->vfs_inode.i_size - name_len * 2);
4353 inode_inc_iversion(&inode->vfs_inode);
4354 inode_inc_iversion(&dir->vfs_inode);
4355 inode->vfs_inode.i_ctime = current_time(&inode->vfs_inode);
4356 dir->vfs_inode.i_mtime = inode->vfs_inode.i_ctime;
4357 dir->vfs_inode.i_ctime = inode->vfs_inode.i_ctime;
4358 ret = btrfs_update_inode(trans, root, dir);
4359 out:
4360 return ret;
4361 }
4362
4363 int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
4364 struct btrfs_inode *dir, struct btrfs_inode *inode,
4365 const char *name, int name_len)
4366 {
4367 int ret;
4368 ret = __btrfs_unlink_inode(trans, dir, inode, name, name_len, NULL);
4369 if (!ret) {
4370 drop_nlink(&inode->vfs_inode);
4371 ret = btrfs_update_inode(trans, inode->root, inode);
4372 }
4373 return ret;
4374 }
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384 static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir)
4385 {
4386 struct btrfs_root *root = BTRFS_I(dir)->root;
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396 return btrfs_start_transaction_fallback_global_rsv(root, 6);
4397 }
4398
4399 static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
4400 {
4401 struct btrfs_trans_handle *trans;
4402 struct inode *inode = d_inode(dentry);
4403 int ret;
4404
4405 trans = __unlink_start_trans(dir);
4406 if (IS_ERR(trans))
4407 return PTR_ERR(trans);
4408
4409 btrfs_record_unlink_dir(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)),
4410 0);
4411
4412 ret = btrfs_unlink_inode(trans, BTRFS_I(dir),
4413 BTRFS_I(d_inode(dentry)), dentry->d_name.name,
4414 dentry->d_name.len);
4415 if (ret)
4416 goto out;
4417
4418 if (inode->i_nlink == 0) {
4419 ret = btrfs_orphan_add(trans, BTRFS_I(inode));
4420 if (ret)
4421 goto out;
4422 }
4423
4424 out:
4425 btrfs_end_transaction(trans);
4426 btrfs_btree_balance_dirty(BTRFS_I(dir)->root->fs_info);
4427 return ret;
4428 }
4429
4430 static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
4431 struct inode *dir, struct dentry *dentry)
4432 {
4433 struct btrfs_root *root = BTRFS_I(dir)->root;
4434 struct btrfs_inode *inode = BTRFS_I(d_inode(dentry));
4435 struct btrfs_path *path;
4436 struct extent_buffer *leaf;
4437 struct btrfs_dir_item *di;
4438 struct btrfs_key key;
4439 const char *name = dentry->d_name.name;
4440 int name_len = dentry->d_name.len;
4441 u64 index;
4442 int ret;
4443 u64 objectid;
4444 u64 dir_ino = btrfs_ino(BTRFS_I(dir));
4445
4446 if (btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID) {
4447 objectid = inode->root->root_key.objectid;
4448 } else if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) {
4449 objectid = inode->location.objectid;
4450 } else {
4451 WARN_ON(1);
4452 return -EINVAL;
4453 }
4454
4455 path = btrfs_alloc_path();
4456 if (!path)
4457 return -ENOMEM;
4458
4459 di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
4460 name, name_len, -1);
4461 if (IS_ERR_OR_NULL(di)) {
4462 ret = di ? PTR_ERR(di) : -ENOENT;
4463 goto out;
4464 }
4465
4466 leaf = path->nodes[0];
4467 btrfs_dir_item_key_to_cpu(leaf, di, &key);
4468 WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid);
4469 ret = btrfs_delete_one_dir_name(trans, root, path, di);
4470 if (ret) {
4471 btrfs_abort_transaction(trans, ret);
4472 goto out;
4473 }
4474 btrfs_release_path(path);
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485 if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) {
4486 di = btrfs_search_dir_index_item(root, path, dir_ino,
4487 name, name_len);
4488 if (IS_ERR_OR_NULL(di)) {
4489 if (!di)
4490 ret = -ENOENT;
4491 else
4492 ret = PTR_ERR(di);
4493 btrfs_abort_transaction(trans, ret);
4494 goto out;
4495 }
4496
4497 leaf = path->nodes[0];
4498 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4499 index = key.offset;
4500 btrfs_release_path(path);
4501 } else {
4502 ret = btrfs_del_root_ref(trans, objectid,
4503 root->root_key.objectid, dir_ino,
4504 &index, name, name_len);
4505 if (ret) {
4506 btrfs_abort_transaction(trans, ret);
4507 goto out;
4508 }
4509 }
4510
4511 ret = btrfs_delete_delayed_dir_index(trans, BTRFS_I(dir), index);
4512 if (ret) {
4513 btrfs_abort_transaction(trans, ret);
4514 goto out;
4515 }
4516
4517 btrfs_i_size_write(BTRFS_I(dir), dir->i_size - name_len * 2);
4518 inode_inc_iversion(dir);
4519 dir->i_mtime = current_time(dir);
4520 dir->i_ctime = dir->i_mtime;
4521 ret = btrfs_update_inode_fallback(trans, root, BTRFS_I(dir));
4522 if (ret)
4523 btrfs_abort_transaction(trans, ret);
4524 out:
4525 btrfs_free_path(path);
4526 return ret;
4527 }
4528
4529
4530
4531
4532
4533 static noinline int may_destroy_subvol(struct btrfs_root *root)
4534 {
4535 struct btrfs_fs_info *fs_info = root->fs_info;
4536 struct btrfs_path *path;
4537 struct btrfs_dir_item *di;
4538 struct btrfs_key key;
4539 u64 dir_id;
4540 int ret;
4541
4542 path = btrfs_alloc_path();
4543 if (!path)
4544 return -ENOMEM;
4545
4546
4547 dir_id = btrfs_super_root_dir(fs_info->super_copy);
4548 di = btrfs_lookup_dir_item(NULL, fs_info->tree_root, path,
4549 dir_id, "default", 7, 0);
4550 if (di && !IS_ERR(di)) {
4551 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
4552 if (key.objectid == root->root_key.objectid) {
4553 ret = -EPERM;
4554 btrfs_err(fs_info,
4555 "deleting default subvolume %llu is not allowed",
4556 key.objectid);
4557 goto out;
4558 }
4559 btrfs_release_path(path);
4560 }
4561
4562 key.objectid = root->root_key.objectid;
4563 key.type = BTRFS_ROOT_REF_KEY;
4564 key.offset = (u64)-1;
4565
4566 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
4567 if (ret < 0)
4568 goto out;
4569 BUG_ON(ret == 0);
4570
4571 ret = 0;
4572 if (path->slots[0] > 0) {
4573 path->slots[0]--;
4574 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
4575 if (key.objectid == root->root_key.objectid &&
4576 key.type == BTRFS_ROOT_REF_KEY)
4577 ret = -ENOTEMPTY;
4578 }
4579 out:
4580 btrfs_free_path(path);
4581 return ret;
4582 }
4583
4584
4585 static void btrfs_prune_dentries(struct btrfs_root *root)
4586 {
4587 struct btrfs_fs_info *fs_info = root->fs_info;
4588 struct rb_node *node;
4589 struct rb_node *prev;
4590 struct btrfs_inode *entry;
4591 struct inode *inode;
4592 u64 objectid = 0;
4593
4594 if (!BTRFS_FS_ERROR(fs_info))
4595 WARN_ON(btrfs_root_refs(&root->root_item) != 0);
4596
4597 spin_lock(&root->inode_lock);
4598 again:
4599 node = root->inode_tree.rb_node;
4600 prev = NULL;
4601 while (node) {
4602 prev = node;
4603 entry = rb_entry(node, struct btrfs_inode, rb_node);
4604
4605 if (objectid < btrfs_ino(entry))
4606 node = node->rb_left;
4607 else if (objectid > btrfs_ino(entry))
4608 node = node->rb_right;
4609 else
4610 break;
4611 }
4612 if (!node) {
4613 while (prev) {
4614 entry = rb_entry(prev, struct btrfs_inode, rb_node);
4615 if (objectid <= btrfs_ino(entry)) {
4616 node = prev;
4617 break;
4618 }
4619 prev = rb_next(prev);
4620 }
4621 }
4622 while (node) {
4623 entry = rb_entry(node, struct btrfs_inode, rb_node);
4624 objectid = btrfs_ino(entry) + 1;
4625 inode = igrab(&entry->vfs_inode);
4626 if (inode) {
4627 spin_unlock(&root->inode_lock);
4628 if (atomic_read(&inode->i_count) > 1)
4629 d_prune_aliases(inode);
4630
4631
4632
4633
4634 iput(inode);
4635 cond_resched();
4636 spin_lock(&root->inode_lock);
4637 goto again;
4638 }
4639
4640 if (cond_resched_lock(&root->inode_lock))
4641 goto again;
4642
4643 node = rb_next(node);
4644 }
4645 spin_unlock(&root->inode_lock);
4646 }
4647
4648 int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
4649 {
4650 struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb);
4651 struct btrfs_root *root = BTRFS_I(dir)->root;
4652 struct inode *inode = d_inode(dentry);
4653 struct btrfs_root *dest = BTRFS_I(inode)->root;
4654 struct btrfs_trans_handle *trans;
4655 struct btrfs_block_rsv block_rsv;
4656 u64 root_flags;
4657 int ret;
4658
4659
4660
4661
4662
4663
4664 spin_lock(&dest->root_item_lock);
4665 if (dest->send_in_progress) {
4666 spin_unlock(&dest->root_item_lock);
4667 btrfs_warn(fs_info,
4668 "attempt to delete subvolume %llu during send",
4669 dest->root_key.objectid);
4670 return -EPERM;
4671 }
4672 if (atomic_read(&dest->nr_swapfiles)) {
4673 spin_unlock(&dest->root_item_lock);
4674 btrfs_warn(fs_info,
4675 "attempt to delete subvolume %llu with active swapfile",
4676 root->root_key.objectid);
4677 return -EPERM;
4678 }
4679 root_flags = btrfs_root_flags(&dest->root_item);
4680 btrfs_set_root_flags(&dest->root_item,
4681 root_flags | BTRFS_ROOT_SUBVOL_DEAD);
4682 spin_unlock(&dest->root_item_lock);
4683
4684 down_write(&fs_info->subvol_sem);
4685
4686 ret = may_destroy_subvol(dest);
4687 if (ret)
4688 goto out_up_write;
4689
4690 btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP);
4691
4692
4693
4694
4695
4696 ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true);
4697 if (ret)
4698 goto out_up_write;
4699
4700 trans = btrfs_start_transaction(root, 0);
4701 if (IS_ERR(trans)) {
4702 ret = PTR_ERR(trans);
4703 goto out_release;
4704 }
4705 trans->block_rsv = &block_rsv;
4706 trans->bytes_reserved = block_rsv.size;
4707
4708 btrfs_record_snapshot_destroy(trans, BTRFS_I(dir));
4709
4710 ret = btrfs_unlink_subvol(trans, dir, dentry);
4711 if (ret) {
4712 btrfs_abort_transaction(trans, ret);
4713 goto out_end_trans;
4714 }
4715
4716 ret = btrfs_record_root_in_trans(trans, dest);
4717 if (ret) {
4718 btrfs_abort_transaction(trans, ret);
4719 goto out_end_trans;
4720 }
4721
4722 memset(&dest->root_item.drop_progress, 0,
4723 sizeof(dest->root_item.drop_progress));
4724 btrfs_set_root_drop_level(&dest->root_item, 0);
4725 btrfs_set_root_refs(&dest->root_item, 0);
4726
4727 if (!test_and_set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &dest->state)) {
4728 ret = btrfs_insert_orphan_item(trans,
4729 fs_info->tree_root,
4730 dest->root_key.objectid);
4731 if (ret) {
4732 btrfs_abort_transaction(trans, ret);
4733 goto out_end_trans;
4734 }
4735 }
4736
4737 ret = btrfs_uuid_tree_remove(trans, dest->root_item.uuid,
4738 BTRFS_UUID_KEY_SUBVOL,
4739 dest->root_key.objectid);
4740 if (ret && ret != -ENOENT) {
4741 btrfs_abort_transaction(trans, ret);
4742 goto out_end_trans;
4743 }
4744 if (!btrfs_is_empty_uuid(dest->root_item.received_uuid)) {
4745 ret = btrfs_uuid_tree_remove(trans,
4746 dest->root_item.received_uuid,
4747 BTRFS_UUID_KEY_RECEIVED_SUBVOL,
4748 dest->root_key.objectid);
4749 if (ret && ret != -ENOENT) {
4750 btrfs_abort_transaction(trans, ret);
4751 goto out_end_trans;
4752 }
4753 }
4754
4755 free_anon_bdev(dest->anon_dev);
4756 dest->anon_dev = 0;
4757 out_end_trans:
4758 trans->block_rsv = NULL;
4759 trans->bytes_reserved = 0;
4760 ret = btrfs_end_transaction(trans);
4761 inode->i_flags |= S_DEAD;
4762 out_release:
4763 btrfs_subvolume_release_metadata(root, &block_rsv);
4764 out_up_write:
4765 up_write(&fs_info->subvol_sem);
4766 if (ret) {
4767 spin_lock(&dest->root_item_lock);
4768 root_flags = btrfs_root_flags(&dest->root_item);
4769 btrfs_set_root_flags(&dest->root_item,
4770 root_flags & ~BTRFS_ROOT_SUBVOL_DEAD);
4771 spin_unlock(&dest->root_item_lock);
4772 } else {
4773 d_invalidate(dentry);
4774 btrfs_prune_dentries(dest);
4775 ASSERT(dest->send_in_progress == 0);
4776 }
4777
4778 return ret;
4779 }
4780
4781 static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
4782 {
4783 struct inode *inode = d_inode(dentry);
4784 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
4785 int err = 0;
4786 struct btrfs_trans_handle *trans;
4787 u64 last_unlink_trans;
4788
4789 if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
4790 return -ENOTEMPTY;
4791 if (btrfs_ino(BTRFS_I(inode)) == BTRFS_FIRST_FREE_OBJECTID) {
4792 if (unlikely(btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))) {
4793 btrfs_err(fs_info,
4794 "extent tree v2 doesn't support snapshot deletion yet");
4795 return -EOPNOTSUPP;
4796 }
4797 return btrfs_delete_subvolume(dir, dentry);
4798 }
4799
4800 trans = __unlink_start_trans(dir);
4801 if (IS_ERR(trans))
4802 return PTR_ERR(trans);
4803
4804 if (unlikely(btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
4805 err = btrfs_unlink_subvol(trans, dir, dentry);
4806 goto out;
4807 }
4808
4809 err = btrfs_orphan_add(trans, BTRFS_I(inode));
4810 if (err)
4811 goto out;
4812
4813 last_unlink_trans = BTRFS_I(inode)->last_unlink_trans;
4814
4815
4816 err = btrfs_unlink_inode(trans, BTRFS_I(dir),
4817 BTRFS_I(d_inode(dentry)), dentry->d_name.name,
4818 dentry->d_name.len);
4819 if (!err) {
4820 btrfs_i_size_write(BTRFS_I(inode), 0);
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832 if (last_unlink_trans >= trans->transid)
4833 BTRFS_I(dir)->last_unlink_trans = last_unlink_trans;
4834 }
4835 out:
4836 btrfs_end_transaction(trans);
4837 btrfs_btree_balance_dirty(fs_info);
4838
4839 return err;
4840 }
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853 int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len,
4854 int front)
4855 {
4856 struct btrfs_fs_info *fs_info = inode->root->fs_info;
4857 struct address_space *mapping = inode->vfs_inode.i_mapping;
4858 struct extent_io_tree *io_tree = &inode->io_tree;
4859 struct btrfs_ordered_extent *ordered;
4860 struct extent_state *cached_state = NULL;
4861 struct extent_changeset *data_reserved = NULL;
4862 bool only_release_metadata = false;
4863 u32 blocksize = fs_info->sectorsize;
4864 pgoff_t index = from >> PAGE_SHIFT;
4865 unsigned offset = from & (blocksize - 1);
4866 struct page *page;
4867 gfp_t mask = btrfs_alloc_write_mask(mapping);
4868 size_t write_bytes = blocksize;
4869 int ret = 0;
4870 u64 block_start;
4871 u64 block_end;
4872
4873 if (IS_ALIGNED(offset, blocksize) &&
4874 (!len || IS_ALIGNED(len, blocksize)))
4875 goto out;
4876
4877 block_start = round_down(from, blocksize);
4878 block_end = block_start + blocksize - 1;
4879
4880 ret = btrfs_check_data_free_space(inode, &data_reserved, block_start,
4881 blocksize);
4882 if (ret < 0) {
4883 if (btrfs_check_nocow_lock(inode, block_start, &write_bytes) > 0) {
4884
4885 only_release_metadata = true;
4886 } else {
4887 goto out;
4888 }
4889 }
4890 ret = btrfs_delalloc_reserve_metadata(inode, blocksize, blocksize, false);
4891 if (ret < 0) {
4892 if (!only_release_metadata)
4893 btrfs_free_reserved_data_space(inode, data_reserved,
4894 block_start, blocksize);
4895 goto out;
4896 }
4897 again:
4898 page = find_or_create_page(mapping, index, mask);
4899 if (!page) {
4900 btrfs_delalloc_release_space(inode, data_reserved, block_start,
4901 blocksize, true);
4902 btrfs_delalloc_release_extents(inode, blocksize);
4903 ret = -ENOMEM;
4904 goto out;
4905 }
4906 ret = set_page_extent_mapped(page);
4907 if (ret < 0)
4908 goto out_unlock;
4909
4910 if (!PageUptodate(page)) {
4911 ret = btrfs_read_folio(NULL, page_folio(page));
4912 lock_page(page);
4913 if (page->mapping != mapping) {
4914 unlock_page(page);
4915 put_page(page);
4916 goto again;
4917 }
4918 if (!PageUptodate(page)) {
4919 ret = -EIO;
4920 goto out_unlock;
4921 }
4922 }
4923 wait_on_page_writeback(page);
4924
4925 lock_extent_bits(io_tree, block_start, block_end, &cached_state);
4926
4927 ordered = btrfs_lookup_ordered_extent(inode, block_start);
4928 if (ordered) {
4929 unlock_extent_cached(io_tree, block_start, block_end,
4930 &cached_state);
4931 unlock_page(page);
4932 put_page(page);
4933 btrfs_start_ordered_extent(ordered, 1);
4934 btrfs_put_ordered_extent(ordered);
4935 goto again;
4936 }
4937
4938 clear_extent_bit(&inode->io_tree, block_start, block_end,
4939 EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
4940 0, 0, &cached_state);
4941
4942 ret = btrfs_set_extent_delalloc(inode, block_start, block_end, 0,
4943 &cached_state);
4944 if (ret) {
4945 unlock_extent_cached(io_tree, block_start, block_end,
4946 &cached_state);
4947 goto out_unlock;
4948 }
4949
4950 if (offset != blocksize) {
4951 if (!len)
4952 len = blocksize - offset;
4953 if (front)
4954 memzero_page(page, (block_start - page_offset(page)),
4955 offset);
4956 else
4957 memzero_page(page, (block_start - page_offset(page)) + offset,
4958 len);
4959 }
4960 btrfs_page_clear_checked(fs_info, page, block_start,
4961 block_end + 1 - block_start);
4962 btrfs_page_set_dirty(fs_info, page, block_start, block_end + 1 - block_start);
4963 unlock_extent_cached(io_tree, block_start, block_end, &cached_state);
4964
4965 if (only_release_metadata)
4966 set_extent_bit(&inode->io_tree, block_start, block_end,
4967 EXTENT_NORESERVE, 0, NULL, NULL, GFP_NOFS, NULL);
4968
4969 out_unlock:
4970 if (ret) {
4971 if (only_release_metadata)
4972 btrfs_delalloc_release_metadata(inode, blocksize, true);
4973 else
4974 btrfs_delalloc_release_space(inode, data_reserved,
4975 block_start, blocksize, true);
4976 }
4977 btrfs_delalloc_release_extents(inode, blocksize);
4978 unlock_page(page);
4979 put_page(page);
4980 out:
4981 if (only_release_metadata)
4982 btrfs_check_nocow_unlock(inode);
4983 extent_changeset_free(data_reserved);
4984 return ret;
4985 }
4986
4987 static int maybe_insert_hole(struct btrfs_root *root, struct btrfs_inode *inode,
4988 u64 offset, u64 len)
4989 {
4990 struct btrfs_fs_info *fs_info = root->fs_info;
4991 struct btrfs_trans_handle *trans;
4992 struct btrfs_drop_extents_args drop_args = { 0 };
4993 int ret;
4994
4995
4996
4997
4998
4999
5000
5001 if (btrfs_fs_incompat(fs_info, NO_HOLES))
5002 return 0;
5003
5004
5005
5006
5007
5008
5009 trans = btrfs_start_transaction(root, 3);
5010 if (IS_ERR(trans))
5011 return PTR_ERR(trans);
5012
5013 drop_args.start = offset;
5014 drop_args.end = offset + len;
5015 drop_args.drop_cache = true;
5016
5017 ret = btrfs_drop_extents(trans, root, inode, &drop_args);
5018 if (ret) {
5019 btrfs_abort_transaction(trans, ret);
5020 btrfs_end_transaction(trans);
5021 return ret;
5022 }
5023
5024 ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode),
5025 offset, 0, 0, len, 0, len, 0, 0, 0);
5026 if (ret) {
5027 btrfs_abort_transaction(trans, ret);
5028 } else {
5029 btrfs_update_inode_bytes(inode, 0, drop_args.bytes_found);
5030 btrfs_update_inode(trans, root, inode);
5031 }
5032 btrfs_end_transaction(trans);
5033 return ret;
5034 }
5035
5036
5037
5038
5039
5040
5041
5042 int btrfs_cont_expand(struct btrfs_inode *inode, loff_t oldsize, loff_t size)
5043 {
5044 struct btrfs_root *root = inode->root;
5045 struct btrfs_fs_info *fs_info = root->fs_info;
5046 struct extent_io_tree *io_tree = &inode->io_tree;
5047 struct extent_map *em = NULL;
5048 struct extent_state *cached_state = NULL;
5049 struct extent_map_tree *em_tree = &inode->extent_tree;
5050 u64 hole_start = ALIGN(oldsize, fs_info->sectorsize);
5051 u64 block_end = ALIGN(size, fs_info->sectorsize);
5052 u64 last_byte;
5053 u64 cur_offset;
5054 u64 hole_size;
5055 int err = 0;
5056
5057
5058
5059
5060
5061
5062 err = btrfs_truncate_block(inode, oldsize, 0, 0);
5063 if (err)
5064 return err;
5065
5066 if (size <= hole_start)
5067 return 0;
5068
5069 btrfs_lock_and_flush_ordered_range(inode, hole_start, block_end - 1,
5070 &cached_state);
5071 cur_offset = hole_start;
5072 while (1) {
5073 em = btrfs_get_extent(inode, NULL, 0, cur_offset,
5074 block_end - cur_offset);
5075 if (IS_ERR(em)) {
5076 err = PTR_ERR(em);
5077 em = NULL;
5078 break;
5079 }
5080 last_byte = min(extent_map_end(em), block_end);
5081 last_byte = ALIGN(last_byte, fs_info->sectorsize);
5082 hole_size = last_byte - cur_offset;
5083
5084 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
5085 struct extent_map *hole_em;
5086
5087 err = maybe_insert_hole(root, inode, cur_offset,
5088 hole_size);
5089 if (err)
5090 break;
5091
5092 err = btrfs_inode_set_file_extent_range(inode,
5093 cur_offset, hole_size);
5094 if (err)
5095 break;
5096
5097 btrfs_drop_extent_cache(inode, cur_offset,
5098 cur_offset + hole_size - 1, 0);
5099 hole_em = alloc_extent_map();
5100 if (!hole_em) {
5101 btrfs_set_inode_full_sync(inode);
5102 goto next;
5103 }
5104 hole_em->start = cur_offset;
5105 hole_em->len = hole_size;
5106 hole_em->orig_start = cur_offset;
5107
5108 hole_em->block_start = EXTENT_MAP_HOLE;
5109 hole_em->block_len = 0;
5110 hole_em->orig_block_len = 0;
5111 hole_em->ram_bytes = hole_size;
5112 hole_em->compress_type = BTRFS_COMPRESS_NONE;
5113 hole_em->generation = fs_info->generation;
5114
5115 while (1) {
5116 write_lock(&em_tree->lock);
5117 err = add_extent_mapping(em_tree, hole_em, 1);
5118 write_unlock(&em_tree->lock);
5119 if (err != -EEXIST)
5120 break;
5121 btrfs_drop_extent_cache(inode, cur_offset,
5122 cur_offset +
5123 hole_size - 1, 0);
5124 }
5125 free_extent_map(hole_em);
5126 } else {
5127 err = btrfs_inode_set_file_extent_range(inode,
5128 cur_offset, hole_size);
5129 if (err)
5130 break;
5131 }
5132 next:
5133 free_extent_map(em);
5134 em = NULL;
5135 cur_offset = last_byte;
5136 if (cur_offset >= block_end)
5137 break;
5138 }
5139 free_extent_map(em);
5140 unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state);
5141 return err;
5142 }
5143
5144 static int btrfs_setsize(struct inode *inode, struct iattr *attr)
5145 {
5146 struct btrfs_root *root = BTRFS_I(inode)->root;
5147 struct btrfs_trans_handle *trans;
5148 loff_t oldsize = i_size_read(inode);
5149 loff_t newsize = attr->ia_size;
5150 int mask = attr->ia_valid;
5151 int ret;
5152
5153
5154
5155
5156
5157
5158
5159 if (newsize != oldsize) {
5160 inode_inc_iversion(inode);
5161 if (!(mask & (ATTR_CTIME | ATTR_MTIME))) {
5162 inode->i_mtime = current_time(inode);
5163 inode->i_ctime = inode->i_mtime;
5164 }
5165 }
5166
5167 if (newsize > oldsize) {
5168
5169
5170
5171
5172
5173
5174
5175 btrfs_drew_write_lock(&root->snapshot_lock);
5176 ret = btrfs_cont_expand(BTRFS_I(inode), oldsize, newsize);
5177 if (ret) {
5178 btrfs_drew_write_unlock(&root->snapshot_lock);
5179 return ret;
5180 }
5181
5182 trans = btrfs_start_transaction(root, 1);
5183 if (IS_ERR(trans)) {
5184 btrfs_drew_write_unlock(&root->snapshot_lock);
5185 return PTR_ERR(trans);
5186 }
5187
5188 i_size_write(inode, newsize);
5189 btrfs_inode_safe_disk_i_size_write(BTRFS_I(inode), 0);
5190 pagecache_isize_extended(inode, oldsize, newsize);
5191 ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
5192 btrfs_drew_write_unlock(&root->snapshot_lock);
5193 btrfs_end_transaction(trans);
5194 } else {
5195 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
5196
5197 if (btrfs_is_zoned(fs_info)) {
5198 ret = btrfs_wait_ordered_range(inode,
5199 ALIGN(newsize, fs_info->sectorsize),
5200 (u64)-1);
5201 if (ret)
5202 return ret;
5203 }
5204
5205
5206
5207
5208
5209
5210 if (newsize == 0)
5211 set_bit(BTRFS_INODE_FLUSH_ON_CLOSE,
5212 &BTRFS_I(inode)->runtime_flags);
5213
5214 truncate_setsize(inode, newsize);
5215
5216 inode_dio_wait(inode);
5217
5218 ret = btrfs_truncate(inode, newsize == oldsize);
5219 if (ret && inode->i_nlink) {
5220 int err;
5221
5222
5223
5224
5225
5226
5227
5228 err = btrfs_wait_ordered_range(inode, 0, (u64)-1);
5229 if (err)
5230 return err;
5231 i_size_write(inode, BTRFS_I(inode)->disk_i_size);
5232 }
5233 }
5234
5235 return ret;
5236 }
5237
5238 static int btrfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
5239 struct iattr *attr)
5240 {
5241 struct inode *inode = d_inode(dentry);
5242 struct btrfs_root *root = BTRFS_I(inode)->root;
5243 int err;
5244
5245 if (btrfs_root_readonly(root))
5246 return -EROFS;
5247
5248 err = setattr_prepare(mnt_userns, dentry, attr);
5249 if (err)
5250 return err;
5251
5252 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
5253 err = btrfs_setsize(inode, attr);
5254 if (err)
5255 return err;
5256 }
5257
5258 if (attr->ia_valid) {
5259 setattr_copy(mnt_userns, inode, attr);
5260 inode_inc_iversion(inode);
5261 err = btrfs_dirty_inode(inode);
5262
5263 if (!err && attr->ia_valid & ATTR_MODE)
5264 err = posix_acl_chmod(mnt_userns, inode, inode->i_mode);
5265 }
5266
5267 return err;
5268 }
5269
5270
5271
5272
5273
5274
5275
5276
5277
5278
5279
5280
5281
5282
5283 static void evict_inode_truncate_pages(struct inode *inode)
5284 {
5285 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
5286 struct extent_map_tree *map_tree = &BTRFS_I(inode)->extent_tree;
5287 struct rb_node *node;
5288
5289 ASSERT(inode->i_state & I_FREEING);
5290 truncate_inode_pages_final(&inode->i_data);
5291
5292 write_lock(&map_tree->lock);
5293 while (!RB_EMPTY_ROOT(&map_tree->map.rb_root)) {
5294 struct extent_map *em;
5295
5296 node = rb_first_cached(&map_tree->map);
5297 em = rb_entry(node, struct extent_map, rb_node);
5298 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
5299 clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
5300 remove_extent_mapping(map_tree, em);
5301 free_extent_map(em);
5302 if (need_resched()) {
5303 write_unlock(&map_tree->lock);
5304 cond_resched();
5305 write_lock(&map_tree->lock);
5306 }
5307 }
5308 write_unlock(&map_tree->lock);
5309
5310
5311
5312
5313
5314
5315
5316
5317
5318
5319
5320
5321
5322
5323
5324
5325
5326 spin_lock(&io_tree->lock);
5327 while (!RB_EMPTY_ROOT(&io_tree->state)) {
5328 struct extent_state *state;
5329 struct extent_state *cached_state = NULL;
5330 u64 start;
5331 u64 end;
5332 unsigned state_flags;
5333
5334 node = rb_first(&io_tree->state);
5335 state = rb_entry(node, struct extent_state, rb_node);
5336 start = state->start;
5337 end = state->end;
5338 state_flags = state->state;
5339 spin_unlock(&io_tree->lock);
5340
5341 lock_extent_bits(io_tree, start, end, &cached_state);
5342
5343
5344
5345
5346
5347
5348
5349
5350
5351 if (state_flags & EXTENT_DELALLOC)
5352 btrfs_qgroup_free_data(BTRFS_I(inode), NULL, start,
5353 end - start + 1);
5354
5355 clear_extent_bit(io_tree, start, end,
5356 EXTENT_LOCKED | EXTENT_DELALLOC |
5357 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 1,
5358 &cached_state);
5359
5360 cond_resched();
5361 spin_lock(&io_tree->lock);
5362 }
5363 spin_unlock(&io_tree->lock);
5364 }
5365
5366 static struct btrfs_trans_handle *evict_refill_and_join(struct btrfs_root *root,
5367 struct btrfs_block_rsv *rsv)
5368 {
5369 struct btrfs_fs_info *fs_info = root->fs_info;
5370 struct btrfs_trans_handle *trans;
5371 u64 delayed_refs_extra = btrfs_calc_insert_metadata_size(fs_info, 1);
5372 int ret;
5373
5374
5375
5376
5377
5378
5379
5380
5381
5382
5383
5384
5385
5386
5387
5388 ret = btrfs_block_rsv_refill(fs_info, rsv, rsv->size + delayed_refs_extra,
5389 BTRFS_RESERVE_FLUSH_EVICT);
5390 if (ret) {
5391 ret = btrfs_block_rsv_refill(fs_info, rsv, rsv->size,
5392 BTRFS_RESERVE_FLUSH_EVICT);
5393 if (ret) {
5394 btrfs_warn(fs_info,
5395 "could not allocate space for delete; will truncate on mount");
5396 return ERR_PTR(-ENOSPC);
5397 }
5398 delayed_refs_extra = 0;
5399 }
5400
5401 trans = btrfs_join_transaction(root);
5402 if (IS_ERR(trans))
5403 return trans;
5404
5405 if (delayed_refs_extra) {
5406 trans->block_rsv = &fs_info->trans_block_rsv;
5407 trans->bytes_reserved = delayed_refs_extra;
5408 btrfs_block_rsv_migrate(rsv, trans->block_rsv,
5409 delayed_refs_extra, 1);
5410 }
5411 return trans;
5412 }
5413
5414 void btrfs_evict_inode(struct inode *inode)
5415 {
5416 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
5417 struct btrfs_trans_handle *trans;
5418 struct btrfs_root *root = BTRFS_I(inode)->root;
5419 struct btrfs_block_rsv *rsv;
5420 int ret;
5421
5422 trace_btrfs_inode_evict(inode);
5423
5424 if (!root) {
5425 fsverity_cleanup_inode(inode);
5426 clear_inode(inode);
5427 return;
5428 }
5429
5430 evict_inode_truncate_pages(inode);
5431
5432 if (inode->i_nlink &&
5433 ((btrfs_root_refs(&root->root_item) != 0 &&
5434 root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID) ||
5435 btrfs_is_free_space_inode(BTRFS_I(inode))))
5436 goto no_delete;
5437
5438 if (is_bad_inode(inode))
5439 goto no_delete;
5440
5441 btrfs_free_io_failure_record(BTRFS_I(inode), 0, (u64)-1);
5442
5443 if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
5444 goto no_delete;
5445
5446 if (inode->i_nlink > 0) {
5447 BUG_ON(btrfs_root_refs(&root->root_item) != 0 &&
5448 root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID);
5449 goto no_delete;
5450 }
5451
5452
5453
5454
5455
5456 ret = btrfs_commit_inode_delayed_inode(BTRFS_I(inode));
5457 if (ret)
5458 goto no_delete;
5459
5460
5461
5462
5463
5464
5465
5466 btrfs_kill_delayed_inode_items(BTRFS_I(inode));
5467
5468 rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP);
5469 if (!rsv)
5470 goto no_delete;
5471 rsv->size = btrfs_calc_metadata_size(fs_info, 1);
5472 rsv->failfast = true;
5473
5474 btrfs_i_size_write(BTRFS_I(inode), 0);
5475
5476 while (1) {
5477 struct btrfs_truncate_control control = {
5478 .inode = BTRFS_I(inode),
5479 .ino = btrfs_ino(BTRFS_I(inode)),
5480 .new_size = 0,
5481 .min_type = 0,
5482 };
5483
5484 trans = evict_refill_and_join(root, rsv);
5485 if (IS_ERR(trans))
5486 goto free_rsv;
5487
5488 trans->block_rsv = rsv;
5489
5490 ret = btrfs_truncate_inode_items(trans, root, &control);
5491 trans->block_rsv = &fs_info->trans_block_rsv;
5492 btrfs_end_transaction(trans);
5493 btrfs_btree_balance_dirty(fs_info);
5494 if (ret && ret != -ENOSPC && ret != -EAGAIN)
5495 goto free_rsv;
5496 else if (!ret)
5497 break;
5498 }
5499
5500
5501
5502
5503
5504
5505
5506
5507
5508
5509 trans = evict_refill_and_join(root, rsv);
5510 if (!IS_ERR(trans)) {
5511 trans->block_rsv = rsv;
5512 btrfs_orphan_del(trans, BTRFS_I(inode));
5513 trans->block_rsv = &fs_info->trans_block_rsv;
5514 btrfs_end_transaction(trans);
5515 }
5516
5517 free_rsv:
5518 btrfs_free_block_rsv(fs_info, rsv);
5519 no_delete:
5520
5521
5522
5523
5524
5525 btrfs_remove_delayed_node(BTRFS_I(inode));
5526 fsverity_cleanup_inode(inode);
5527 clear_inode(inode);
5528 }
5529
5530
5531
5532
5533
5534
5535
5536
5537 static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
5538 struct btrfs_key *location, u8 *type)
5539 {
5540 const char *name = dentry->d_name.name;
5541 int namelen = dentry->d_name.len;
5542 struct btrfs_dir_item *di;
5543 struct btrfs_path *path;
5544 struct btrfs_root *root = BTRFS_I(dir)->root;
5545 int ret = 0;
5546
5547 path = btrfs_alloc_path();
5548 if (!path)
5549 return -ENOMEM;
5550
5551 di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(BTRFS_I(dir)),
5552 name, namelen, 0);
5553 if (IS_ERR_OR_NULL(di)) {
5554 ret = di ? PTR_ERR(di) : -ENOENT;
5555 goto out;
5556 }
5557
5558 btrfs_dir_item_key_to_cpu(path->nodes[0], di, location);
5559 if (location->type != BTRFS_INODE_ITEM_KEY &&
5560 location->type != BTRFS_ROOT_ITEM_KEY) {
5561 ret = -EUCLEAN;
5562 btrfs_warn(root->fs_info,
5563 "%s gets something invalid in DIR_ITEM (name %s, directory ino %llu, location(%llu %u %llu))",
5564 __func__, name, btrfs_ino(BTRFS_I(dir)),
5565 location->objectid, location->type, location->offset);
5566 }
5567 if (!ret)
5568 *type = btrfs_dir_type(path->nodes[0], di);
5569 out:
5570 btrfs_free_path(path);
5571 return ret;
5572 }
5573
5574
5575
5576
5577
5578
5579 static int fixup_tree_root_location(struct btrfs_fs_info *fs_info,
5580 struct inode *dir,
5581 struct dentry *dentry,
5582 struct btrfs_key *location,
5583 struct btrfs_root **sub_root)
5584 {
5585 struct btrfs_path *path;
5586 struct btrfs_root *new_root;
5587 struct btrfs_root_ref *ref;
5588 struct extent_buffer *leaf;
5589 struct btrfs_key key;
5590 int ret;
5591 int err = 0;
5592
5593 path = btrfs_alloc_path();
5594 if (!path) {
5595 err = -ENOMEM;
5596 goto out;
5597 }
5598
5599 err = -ENOENT;
5600 key.objectid = BTRFS_I(dir)->root->root_key.objectid;
5601 key.type = BTRFS_ROOT_REF_KEY;
5602 key.offset = location->objectid;
5603
5604 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
5605 if (ret) {
5606 if (ret < 0)
5607 err = ret;
5608 goto out;
5609 }
5610
5611 leaf = path->nodes[0];
5612 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
5613 if (btrfs_root_ref_dirid(leaf, ref) != btrfs_ino(BTRFS_I(dir)) ||
5614 btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len)
5615 goto out;
5616
5617 ret = memcmp_extent_buffer(leaf, dentry->d_name.name,
5618 (unsigned long)(ref + 1),
5619 dentry->d_name.len);
5620 if (ret)
5621 goto out;
5622
5623 btrfs_release_path(path);
5624
5625 new_root = btrfs_get_fs_root(fs_info, location->objectid, true);
5626 if (IS_ERR(new_root)) {
5627 err = PTR_ERR(new_root);
5628 goto out;
5629 }
5630
5631 *sub_root = new_root;
5632 location->objectid = btrfs_root_dirid(&new_root->root_item);
5633 location->type = BTRFS_INODE_ITEM_KEY;
5634 location->offset = 0;
5635 err = 0;
5636 out:
5637 btrfs_free_path(path);
5638 return err;
5639 }
5640
5641 static void inode_tree_add(struct inode *inode)
5642 {
5643 struct btrfs_root *root = BTRFS_I(inode)->root;
5644 struct btrfs_inode *entry;
5645 struct rb_node **p;
5646 struct rb_node *parent;
5647 struct rb_node *new = &BTRFS_I(inode)->rb_node;
5648 u64 ino = btrfs_ino(BTRFS_I(inode));
5649
5650 if (inode_unhashed(inode))
5651 return;
5652 parent = NULL;
5653 spin_lock(&root->inode_lock);
5654 p = &root->inode_tree.rb_node;
5655 while (*p) {
5656 parent = *p;
5657 entry = rb_entry(parent, struct btrfs_inode, rb_node);
5658
5659 if (ino < btrfs_ino(entry))
5660 p = &parent->rb_left;
5661 else if (ino > btrfs_ino(entry))
5662 p = &parent->rb_right;
5663 else {
5664 WARN_ON(!(entry->vfs_inode.i_state &
5665 (I_WILL_FREE | I_FREEING)));
5666 rb_replace_node(parent, new, &root->inode_tree);
5667 RB_CLEAR_NODE(parent);
5668 spin_unlock(&root->inode_lock);
5669 return;
5670 }
5671 }
5672 rb_link_node(new, parent, p);
5673 rb_insert_color(new, &root->inode_tree);
5674 spin_unlock(&root->inode_lock);
5675 }
5676
5677 static void inode_tree_del(struct btrfs_inode *inode)
5678 {
5679 struct btrfs_root *root = inode->root;
5680 int empty = 0;
5681
5682 spin_lock(&root->inode_lock);
5683 if (!RB_EMPTY_NODE(&inode->rb_node)) {
5684 rb_erase(&inode->rb_node, &root->inode_tree);
5685 RB_CLEAR_NODE(&inode->rb_node);
5686 empty = RB_EMPTY_ROOT(&root->inode_tree);
5687 }
5688 spin_unlock(&root->inode_lock);
5689
5690 if (empty && btrfs_root_refs(&root->root_item) == 0) {
5691 spin_lock(&root->inode_lock);
5692 empty = RB_EMPTY_ROOT(&root->inode_tree);
5693 spin_unlock(&root->inode_lock);
5694 if (empty)
5695 btrfs_add_dead_root(root);
5696 }
5697 }
5698
5699
5700 static int btrfs_init_locked_inode(struct inode *inode, void *p)
5701 {
5702 struct btrfs_iget_args *args = p;
5703
5704 inode->i_ino = args->ino;
5705 BTRFS_I(inode)->location.objectid = args->ino;
5706 BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
5707 BTRFS_I(inode)->location.offset = 0;
5708 BTRFS_I(inode)->root = btrfs_grab_root(args->root);
5709 BUG_ON(args->root && !BTRFS_I(inode)->root);
5710 return 0;
5711 }
5712
5713 static int btrfs_find_actor(struct inode *inode, void *opaque)
5714 {
5715 struct btrfs_iget_args *args = opaque;
5716
5717 return args->ino == BTRFS_I(inode)->location.objectid &&
5718 args->root == BTRFS_I(inode)->root;
5719 }
5720
5721 static struct inode *btrfs_iget_locked(struct super_block *s, u64 ino,
5722 struct btrfs_root *root)
5723 {
5724 struct inode *inode;
5725 struct btrfs_iget_args args;
5726 unsigned long hashval = btrfs_inode_hash(ino, root);
5727
5728 args.ino = ino;
5729 args.root = root;
5730
5731 inode = iget5_locked(s, hashval, btrfs_find_actor,
5732 btrfs_init_locked_inode,
5733 (void *)&args);
5734 return inode;
5735 }
5736
5737
5738
5739
5740
5741
5742
5743 struct inode *btrfs_iget_path(struct super_block *s, u64 ino,
5744 struct btrfs_root *root, struct btrfs_path *path)
5745 {
5746 struct inode *inode;
5747
5748 inode = btrfs_iget_locked(s, ino, root);
5749 if (!inode)
5750 return ERR_PTR(-ENOMEM);
5751
5752 if (inode->i_state & I_NEW) {
5753 int ret;
5754
5755 ret = btrfs_read_locked_inode(inode, path);
5756 if (!ret) {
5757 inode_tree_add(inode);
5758 unlock_new_inode(inode);
5759 } else {
5760 iget_failed(inode);
5761
5762
5763
5764
5765
5766 if (ret > 0)
5767 ret = -ENOENT;
5768 inode = ERR_PTR(ret);
5769 }
5770 }
5771
5772 return inode;
5773 }
5774
5775 struct inode *btrfs_iget(struct super_block *s, u64 ino, struct btrfs_root *root)
5776 {
5777 return btrfs_iget_path(s, ino, root, NULL);
5778 }
5779
5780 static struct inode *new_simple_dir(struct super_block *s,
5781 struct btrfs_key *key,
5782 struct btrfs_root *root)
5783 {
5784 struct inode *inode = new_inode(s);
5785
5786 if (!inode)
5787 return ERR_PTR(-ENOMEM);
5788
5789 BTRFS_I(inode)->root = btrfs_grab_root(root);
5790 memcpy(&BTRFS_I(inode)->location, key, sizeof(*key));
5791 set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags);
5792
5793 inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID;
5794
5795
5796
5797
5798 inode->i_op = &simple_dir_inode_operations;
5799 inode->i_opflags &= ~IOP_XATTR;
5800 inode->i_fop = &simple_dir_operations;
5801 inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
5802 inode->i_mtime = current_time(inode);
5803 inode->i_atime = inode->i_mtime;
5804 inode->i_ctime = inode->i_mtime;
5805 BTRFS_I(inode)->i_otime = inode->i_mtime;
5806
5807 return inode;
5808 }
5809
5810 static_assert(BTRFS_FT_UNKNOWN == FT_UNKNOWN);
5811 static_assert(BTRFS_FT_REG_FILE == FT_REG_FILE);
5812 static_assert(BTRFS_FT_DIR == FT_DIR);
5813 static_assert(BTRFS_FT_CHRDEV == FT_CHRDEV);
5814 static_assert(BTRFS_FT_BLKDEV == FT_BLKDEV);
5815 static_assert(BTRFS_FT_FIFO == FT_FIFO);
5816 static_assert(BTRFS_FT_SOCK == FT_SOCK);
5817 static_assert(BTRFS_FT_SYMLINK == FT_SYMLINK);
5818
5819 static inline u8 btrfs_inode_type(struct inode *inode)
5820 {
5821 return fs_umode_to_ftype(inode->i_mode);
5822 }
5823
5824 struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
5825 {
5826 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
5827 struct inode *inode;
5828 struct btrfs_root *root = BTRFS_I(dir)->root;
5829 struct btrfs_root *sub_root = root;
5830 struct btrfs_key location;
5831 u8 di_type = 0;
5832 int ret = 0;
5833
5834 if (dentry->d_name.len > BTRFS_NAME_LEN)
5835 return ERR_PTR(-ENAMETOOLONG);
5836
5837 ret = btrfs_inode_by_name(dir, dentry, &location, &di_type);
5838 if (ret < 0)
5839 return ERR_PTR(ret);
5840
5841 if (location.type == BTRFS_INODE_ITEM_KEY) {
5842 inode = btrfs_iget(dir->i_sb, location.objectid, root);
5843 if (IS_ERR(inode))
5844 return inode;
5845
5846
5847 if (btrfs_inode_type(inode) != di_type) {
5848 btrfs_crit(fs_info,
5849 "inode mode mismatch with dir: inode mode=0%o btrfs type=%u dir type=%u",
5850 inode->i_mode, btrfs_inode_type(inode),
5851 di_type);
5852 iput(inode);
5853 return ERR_PTR(-EUCLEAN);
5854 }
5855 return inode;
5856 }
5857
5858 ret = fixup_tree_root_location(fs_info, dir, dentry,
5859 &location, &sub_root);
5860 if (ret < 0) {
5861 if (ret != -ENOENT)
5862 inode = ERR_PTR(ret);
5863 else
5864 inode = new_simple_dir(dir->i_sb, &location, root);
5865 } else {
5866 inode = btrfs_iget(dir->i_sb, location.objectid, sub_root);
5867 btrfs_put_root(sub_root);
5868
5869 if (IS_ERR(inode))
5870 return inode;
5871
5872 down_read(&fs_info->cleanup_work_sem);
5873 if (!sb_rdonly(inode->i_sb))
5874 ret = btrfs_orphan_cleanup(sub_root);
5875 up_read(&fs_info->cleanup_work_sem);
5876 if (ret) {
5877 iput(inode);
5878 inode = ERR_PTR(ret);
5879 }
5880 }
5881
5882 return inode;
5883 }
5884
5885 static int btrfs_dentry_delete(const struct dentry *dentry)
5886 {
5887 struct btrfs_root *root;
5888 struct inode *inode = d_inode(dentry);
5889
5890 if (!inode && !IS_ROOT(dentry))
5891 inode = d_inode(dentry->d_parent);
5892
5893 if (inode) {
5894 root = BTRFS_I(inode)->root;
5895 if (btrfs_root_refs(&root->root_item) == 0)
5896 return 1;
5897
5898 if (btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
5899 return 1;
5900 }
5901 return 0;
5902 }
5903
5904 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
5905 unsigned int flags)
5906 {
5907 struct inode *inode = btrfs_lookup_dentry(dir, dentry);
5908
5909 if (inode == ERR_PTR(-ENOENT))
5910 inode = NULL;
5911 return d_splice_alias(inode, dentry);
5912 }
5913
5914
5915
5916
5917
5918
5919
5920
5921
5922
5923 static int btrfs_opendir(struct inode *inode, struct file *file)
5924 {
5925 struct btrfs_file_private *private;
5926
5927 private = kzalloc(sizeof(struct btrfs_file_private), GFP_KERNEL);
5928 if (!private)
5929 return -ENOMEM;
5930 private->filldir_buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
5931 if (!private->filldir_buf) {
5932 kfree(private);
5933 return -ENOMEM;
5934 }
5935 file->private_data = private;
5936 return 0;
5937 }
5938
5939 struct dir_entry {
5940 u64 ino;
5941 u64 offset;
5942 unsigned type;
5943 int name_len;
5944 };
5945
5946 static int btrfs_filldir(void *addr, int entries, struct dir_context *ctx)
5947 {
5948 while (entries--) {
5949 struct dir_entry *entry = addr;
5950 char *name = (char *)(entry + 1);
5951
5952 ctx->pos = get_unaligned(&entry->offset);
5953 if (!dir_emit(ctx, name, get_unaligned(&entry->name_len),
5954 get_unaligned(&entry->ino),
5955 get_unaligned(&entry->type)))
5956 return 1;
5957 addr += sizeof(struct dir_entry) +
5958 get_unaligned(&entry->name_len);
5959 ctx->pos++;
5960 }
5961 return 0;
5962 }
5963
5964 static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
5965 {
5966 struct inode *inode = file_inode(file);
5967 struct btrfs_root *root = BTRFS_I(inode)->root;
5968 struct btrfs_file_private *private = file->private_data;
5969 struct btrfs_dir_item *di;
5970 struct btrfs_key key;
5971 struct btrfs_key found_key;
5972 struct btrfs_path *path;
5973 void *addr;
5974 struct list_head ins_list;
5975 struct list_head del_list;
5976 int ret;
5977 char *name_ptr;
5978 int name_len;
5979 int entries = 0;
5980 int total_len = 0;
5981 bool put = false;
5982 struct btrfs_key location;
5983
5984 if (!dir_emit_dots(file, ctx))
5985 return 0;
5986
5987 path = btrfs_alloc_path();
5988 if (!path)
5989 return -ENOMEM;
5990
5991 addr = private->filldir_buf;
5992 path->reada = READA_FORWARD;
5993
5994 INIT_LIST_HEAD(&ins_list);
5995 INIT_LIST_HEAD(&del_list);
5996 put = btrfs_readdir_get_delayed_items(inode, &ins_list, &del_list);
5997
5998 again:
5999 key.type = BTRFS_DIR_INDEX_KEY;
6000 key.offset = ctx->pos;
6001 key.objectid = btrfs_ino(BTRFS_I(inode));
6002
6003 btrfs_for_each_slot(root, &key, &found_key, path, ret) {
6004 struct dir_entry *entry;
6005 struct extent_buffer *leaf = path->nodes[0];
6006
6007 if (found_key.objectid != key.objectid)
6008 break;
6009 if (found_key.type != BTRFS_DIR_INDEX_KEY)
6010 break;
6011 if (found_key.offset < ctx->pos)
6012 continue;
6013 if (btrfs_should_delete_dir_index(&del_list, found_key.offset))
6014 continue;
6015 di = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
6016 name_len = btrfs_dir_name_len(leaf, di);
6017 if ((total_len + sizeof(struct dir_entry) + name_len) >=
6018 PAGE_SIZE) {
6019 btrfs_release_path(path);
6020 ret = btrfs_filldir(private->filldir_buf, entries, ctx);
6021 if (ret)
6022 goto nopos;
6023 addr = private->filldir_buf;
6024 entries = 0;
6025 total_len = 0;
6026 goto again;
6027 }
6028
6029 entry = addr;
6030 put_unaligned(name_len, &entry->name_len);
6031 name_ptr = (char *)(entry + 1);
6032 read_extent_buffer(leaf, name_ptr, (unsigned long)(di + 1),
6033 name_len);
6034 put_unaligned(fs_ftype_to_dtype(btrfs_dir_type(leaf, di)),
6035 &entry->type);
6036 btrfs_dir_item_key_to_cpu(leaf, di, &location);
6037 put_unaligned(location.objectid, &entry->ino);
6038 put_unaligned(found_key.offset, &entry->offset);
6039 entries++;
6040 addr += sizeof(struct dir_entry) + name_len;
6041 total_len += sizeof(struct dir_entry) + name_len;
6042 }
6043
6044 if (ret < 0)
6045 goto err;
6046
6047 btrfs_release_path(path);
6048
6049 ret = btrfs_filldir(private->filldir_buf, entries, ctx);
6050 if (ret)
6051 goto nopos;
6052
6053 ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list);
6054 if (ret)
6055 goto nopos;
6056
6057
6058
6059
6060
6061
6062
6063
6064
6065
6066
6067
6068
6069
6070
6071
6072
6073
6074 if (ctx->pos >= INT_MAX)
6075 ctx->pos = LLONG_MAX;
6076 else
6077 ctx->pos = INT_MAX;
6078 nopos:
6079 ret = 0;
6080 err:
6081 if (put)
6082 btrfs_readdir_put_delayed_items(inode, &ins_list, &del_list);
6083 btrfs_free_path(path);
6084 return ret;
6085 }
6086
6087
6088
6089
6090
6091
6092
6093 static int btrfs_dirty_inode(struct inode *inode)
6094 {
6095 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
6096 struct btrfs_root *root = BTRFS_I(inode)->root;
6097 struct btrfs_trans_handle *trans;
6098 int ret;
6099
6100 if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags))
6101 return 0;
6102
6103 trans = btrfs_join_transaction(root);
6104 if (IS_ERR(trans))
6105 return PTR_ERR(trans);
6106
6107 ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
6108 if (ret && (ret == -ENOSPC || ret == -EDQUOT)) {
6109
6110 btrfs_end_transaction(trans);
6111 trans = btrfs_start_transaction(root, 1);
6112 if (IS_ERR(trans))
6113 return PTR_ERR(trans);
6114
6115 ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
6116 }
6117 btrfs_end_transaction(trans);
6118 if (BTRFS_I(inode)->delayed_node)
6119 btrfs_balance_delayed_items(fs_info);
6120
6121 return ret;
6122 }
6123
6124
6125
6126
6127
6128 static int btrfs_update_time(struct inode *inode, struct timespec64 *now,
6129 int flags)
6130 {
6131 struct btrfs_root *root = BTRFS_I(inode)->root;
6132 bool dirty = flags & ~S_VERSION;
6133
6134 if (btrfs_root_readonly(root))
6135 return -EROFS;
6136
6137 if (flags & S_VERSION)
6138 dirty |= inode_maybe_inc_iversion(inode, dirty);
6139 if (flags & S_CTIME)
6140 inode->i_ctime = *now;
6141 if (flags & S_MTIME)
6142 inode->i_mtime = *now;
6143 if (flags & S_ATIME)
6144 inode->i_atime = *now;
6145 return dirty ? btrfs_dirty_inode(inode) : 0;
6146 }
6147
6148
6149
6150
6151
6152
6153 static int btrfs_set_inode_index_count(struct btrfs_inode *inode)
6154 {
6155 struct btrfs_root *root = inode->root;
6156 struct btrfs_key key, found_key;
6157 struct btrfs_path *path;
6158 struct extent_buffer *leaf;
6159 int ret;
6160
6161 key.objectid = btrfs_ino(inode);
6162 key.type = BTRFS_DIR_INDEX_KEY;
6163 key.offset = (u64)-1;
6164
6165 path = btrfs_alloc_path();
6166 if (!path)
6167 return -ENOMEM;
6168
6169 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6170 if (ret < 0)
6171 goto out;
6172
6173 if (ret == 0)
6174 goto out;
6175 ret = 0;
6176
6177 if (path->slots[0] == 0) {
6178 inode->index_cnt = BTRFS_DIR_START_INDEX;
6179 goto out;
6180 }
6181
6182 path->slots[0]--;
6183
6184 leaf = path->nodes[0];
6185 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
6186
6187 if (found_key.objectid != btrfs_ino(inode) ||
6188 found_key.type != BTRFS_DIR_INDEX_KEY) {
6189 inode->index_cnt = BTRFS_DIR_START_INDEX;
6190 goto out;
6191 }
6192
6193 inode->index_cnt = found_key.offset + 1;
6194 out:
6195 btrfs_free_path(path);
6196 return ret;
6197 }
6198
6199
6200
6201
6202
6203 int btrfs_set_inode_index(struct btrfs_inode *dir, u64 *index)
6204 {
6205 int ret = 0;
6206
6207 if (dir->index_cnt == (u64)-1) {
6208 ret = btrfs_inode_delayed_dir_index_count(dir);
6209 if (ret) {
6210 ret = btrfs_set_inode_index_count(dir);
6211 if (ret)
6212 return ret;
6213 }
6214 }
6215
6216 *index = dir->index_cnt;
6217 dir->index_cnt++;
6218
6219 return ret;
6220 }
6221
6222 static int btrfs_insert_inode_locked(struct inode *inode)
6223 {
6224 struct btrfs_iget_args args;
6225
6226 args.ino = BTRFS_I(inode)->location.objectid;
6227 args.root = BTRFS_I(inode)->root;
6228
6229 return insert_inode_locked4(inode,
6230 btrfs_inode_hash(inode->i_ino, BTRFS_I(inode)->root),
6231 btrfs_find_actor, &args);
6232 }
6233
6234 int btrfs_new_inode_prepare(struct btrfs_new_inode_args *args,
6235 unsigned int *trans_num_items)
6236 {
6237 struct inode *dir = args->dir;
6238 struct inode *inode = args->inode;
6239 int ret;
6240
6241 ret = posix_acl_create(dir, &inode->i_mode, &args->default_acl, &args->acl);
6242 if (ret)
6243 return ret;
6244
6245
6246 *trans_num_items = 1;
6247
6248 if (BTRFS_I(dir)->prop_compress)
6249 (*trans_num_items)++;
6250
6251 if (args->default_acl)
6252 (*trans_num_items)++;
6253
6254 if (args->acl)
6255 (*trans_num_items)++;
6256 #ifdef CONFIG_SECURITY
6257
6258 if (dir->i_security)
6259 (*trans_num_items)++;
6260 #endif
6261 if (args->orphan) {
6262
6263 (*trans_num_items)++;
6264 } else {
6265
6266
6267
6268
6269
6270
6271
6272
6273
6274 *trans_num_items += 3;
6275 }
6276 return 0;
6277 }
6278
6279 void btrfs_new_inode_args_destroy(struct btrfs_new_inode_args *args)
6280 {
6281 posix_acl_release(args->acl);
6282 posix_acl_release(args->default_acl);
6283 }
6284
6285
6286
6287
6288
6289
6290 static void btrfs_inherit_iflags(struct inode *inode, struct inode *dir)
6291 {
6292 unsigned int flags;
6293
6294 flags = BTRFS_I(dir)->flags;
6295
6296 if (flags & BTRFS_INODE_NOCOMPRESS) {
6297 BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS;
6298 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
6299 } else if (flags & BTRFS_INODE_COMPRESS) {
6300 BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS;
6301 BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS;
6302 }
6303
6304 if (flags & BTRFS_INODE_NODATACOW) {
6305 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
6306 if (S_ISREG(inode->i_mode))
6307 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
6308 }
6309
6310 btrfs_sync_inode_flags_to_i_flags(inode);
6311 }
6312
6313 int btrfs_create_new_inode(struct btrfs_trans_handle *trans,
6314 struct btrfs_new_inode_args *args)
6315 {
6316 struct inode *dir = args->dir;
6317 struct inode *inode = args->inode;
6318 const char *name = args->orphan ? NULL : args->dentry->d_name.name;
6319 int name_len = args->orphan ? 0 : args->dentry->d_name.len;
6320 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
6321 struct btrfs_root *root;
6322 struct btrfs_inode_item *inode_item;
6323 struct btrfs_key *location;
6324 struct btrfs_path *path;
6325 u64 objectid;
6326 struct btrfs_inode_ref *ref;
6327 struct btrfs_key key[2];
6328 u32 sizes[2];
6329 struct btrfs_item_batch batch;
6330 unsigned long ptr;
6331 int ret;
6332
6333 path = btrfs_alloc_path();
6334 if (!path)
6335 return -ENOMEM;
6336
6337 if (!args->subvol)
6338 BTRFS_I(inode)->root = btrfs_grab_root(BTRFS_I(dir)->root);
6339 root = BTRFS_I(inode)->root;
6340
6341 ret = btrfs_get_free_objectid(root, &objectid);
6342 if (ret)
6343 goto out;
6344 inode->i_ino = objectid;
6345
6346 if (args->orphan) {
6347
6348
6349
6350
6351 set_nlink(inode, 0);
6352 } else {
6353 trace_btrfs_inode_request(dir);
6354
6355 ret = btrfs_set_inode_index(BTRFS_I(dir), &BTRFS_I(inode)->dir_index);
6356 if (ret)
6357 goto out;
6358 }
6359
6360 BTRFS_I(inode)->index_cnt = BTRFS_DIR_START_INDEX;
6361 BTRFS_I(inode)->generation = trans->transid;
6362 inode->i_generation = BTRFS_I(inode)->generation;
6363
6364
6365
6366
6367
6368
6369 if (!args->subvol)
6370 btrfs_inherit_iflags(inode, dir);
6371
6372 if (S_ISREG(inode->i_mode)) {
6373 if (btrfs_test_opt(fs_info, NODATASUM))
6374 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
6375 if (btrfs_test_opt(fs_info, NODATACOW))
6376 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW |
6377 BTRFS_INODE_NODATASUM;
6378 }
6379
6380 location = &BTRFS_I(inode)->location;
6381 location->objectid = objectid;
6382 location->offset = 0;
6383 location->type = BTRFS_INODE_ITEM_KEY;
6384
6385 ret = btrfs_insert_inode_locked(inode);
6386 if (ret < 0) {
6387 if (!args->orphan)
6388 BTRFS_I(dir)->index_cnt--;
6389 goto out;
6390 }
6391
6392
6393
6394
6395
6396
6397
6398 btrfs_set_inode_full_sync(BTRFS_I(inode));
6399
6400 key[0].objectid = objectid;
6401 key[0].type = BTRFS_INODE_ITEM_KEY;
6402 key[0].offset = 0;
6403
6404 sizes[0] = sizeof(struct btrfs_inode_item);
6405
6406 if (!args->orphan) {
6407
6408
6409
6410
6411
6412
6413 key[1].objectid = objectid;
6414 key[1].type = BTRFS_INODE_REF_KEY;
6415 if (args->subvol) {
6416 key[1].offset = objectid;
6417 sizes[1] = 2 + sizeof(*ref);
6418 } else {
6419 key[1].offset = btrfs_ino(BTRFS_I(dir));
6420 sizes[1] = name_len + sizeof(*ref);
6421 }
6422 }
6423
6424 batch.keys = &key[0];
6425 batch.data_sizes = &sizes[0];
6426 batch.total_data_size = sizes[0] + (args->orphan ? 0 : sizes[1]);
6427 batch.nr = args->orphan ? 1 : 2;
6428 ret = btrfs_insert_empty_items(trans, root, path, &batch);
6429 if (ret != 0) {
6430 btrfs_abort_transaction(trans, ret);
6431 goto discard;
6432 }
6433
6434 inode->i_mtime = current_time(inode);
6435 inode->i_atime = inode->i_mtime;
6436 inode->i_ctime = inode->i_mtime;
6437 BTRFS_I(inode)->i_otime = inode->i_mtime;
6438
6439
6440
6441
6442
6443
6444 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
6445 struct btrfs_inode_item);
6446 memzero_extent_buffer(path->nodes[0], (unsigned long)inode_item,
6447 sizeof(*inode_item));
6448 fill_inode_item(trans, path->nodes[0], inode_item, inode);
6449
6450 if (!args->orphan) {
6451 ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
6452 struct btrfs_inode_ref);
6453 ptr = (unsigned long)(ref + 1);
6454 if (args->subvol) {
6455 btrfs_set_inode_ref_name_len(path->nodes[0], ref, 2);
6456 btrfs_set_inode_ref_index(path->nodes[0], ref, 0);
6457 write_extent_buffer(path->nodes[0], "..", ptr, 2);
6458 } else {
6459 btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
6460 btrfs_set_inode_ref_index(path->nodes[0], ref,
6461 BTRFS_I(inode)->dir_index);
6462 write_extent_buffer(path->nodes[0], name, ptr, name_len);
6463 }
6464 }
6465
6466 btrfs_mark_buffer_dirty(path->nodes[0]);
6467
6468
6469
6470
6471
6472 btrfs_free_path(path);
6473 path = NULL;
6474
6475 if (args->subvol) {
6476 struct inode *parent;
6477
6478
6479
6480
6481
6482 parent = btrfs_iget(fs_info->sb, BTRFS_FIRST_FREE_OBJECTID,
6483 BTRFS_I(dir)->root);
6484 if (IS_ERR(parent)) {
6485 ret = PTR_ERR(parent);
6486 } else {
6487 ret = btrfs_inode_inherit_props(trans, inode, parent);
6488 iput(parent);
6489 }
6490 } else {
6491 ret = btrfs_inode_inherit_props(trans, inode, dir);
6492 }
6493 if (ret) {
6494 btrfs_err(fs_info,
6495 "error inheriting props for ino %llu (root %llu): %d",
6496 btrfs_ino(BTRFS_I(inode)), root->root_key.objectid,
6497 ret);
6498 }
6499
6500
6501
6502
6503
6504 if (!args->subvol) {
6505 ret = btrfs_init_inode_security(trans, args);
6506 if (ret) {
6507 btrfs_abort_transaction(trans, ret);
6508 goto discard;
6509 }
6510 }
6511
6512 inode_tree_add(inode);
6513
6514 trace_btrfs_inode_new(inode);
6515 btrfs_set_inode_last_trans(trans, BTRFS_I(inode));
6516
6517 btrfs_update_root_times(trans, root);
6518
6519 if (args->orphan) {
6520 ret = btrfs_orphan_add(trans, BTRFS_I(inode));
6521 } else {
6522 ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode), name,
6523 name_len, 0, BTRFS_I(inode)->dir_index);
6524 }
6525 if (ret) {
6526 btrfs_abort_transaction(trans, ret);
6527 goto discard;
6528 }
6529
6530 return 0;
6531
6532 discard:
6533
6534
6535
6536
6537 ihold(inode);
6538 discard_new_inode(inode);
6539 out:
6540 btrfs_free_path(path);
6541 return ret;
6542 }
6543
6544
6545
6546
6547
6548
6549
6550 int btrfs_add_link(struct btrfs_trans_handle *trans,
6551 struct btrfs_inode *parent_inode, struct btrfs_inode *inode,
6552 const char *name, int name_len, int add_backref, u64 index)
6553 {
6554 int ret = 0;
6555 struct btrfs_key key;
6556 struct btrfs_root *root = parent_inode->root;
6557 u64 ino = btrfs_ino(inode);
6558 u64 parent_ino = btrfs_ino(parent_inode);
6559
6560 if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
6561 memcpy(&key, &inode->root->root_key, sizeof(key));
6562 } else {
6563 key.objectid = ino;
6564 key.type = BTRFS_INODE_ITEM_KEY;
6565 key.offset = 0;
6566 }
6567
6568 if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
6569 ret = btrfs_add_root_ref(trans, key.objectid,
6570 root->root_key.objectid, parent_ino,
6571 index, name, name_len);
6572 } else if (add_backref) {
6573 ret = btrfs_insert_inode_ref(trans, root, name, name_len, ino,
6574 parent_ino, index);
6575 }
6576
6577
6578 if (ret)
6579 return ret;
6580
6581 ret = btrfs_insert_dir_item(trans, name, name_len, parent_inode, &key,
6582 btrfs_inode_type(&inode->vfs_inode), index);
6583 if (ret == -EEXIST || ret == -EOVERFLOW)
6584 goto fail_dir_item;
6585 else if (ret) {
6586 btrfs_abort_transaction(trans, ret);
6587 return ret;
6588 }
6589
6590 btrfs_i_size_write(parent_inode, parent_inode->vfs_inode.i_size +
6591 name_len * 2);
6592 inode_inc_iversion(&parent_inode->vfs_inode);
6593
6594
6595
6596
6597
6598
6599 if (!test_bit(BTRFS_FS_LOG_RECOVERING, &root->fs_info->flags)) {
6600 struct timespec64 now = current_time(&parent_inode->vfs_inode);
6601
6602 parent_inode->vfs_inode.i_mtime = now;
6603 parent_inode->vfs_inode.i_ctime = now;
6604 }
6605 ret = btrfs_update_inode(trans, root, parent_inode);
6606 if (ret)
6607 btrfs_abort_transaction(trans, ret);
6608 return ret;
6609
6610 fail_dir_item:
6611 if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
6612 u64 local_index;
6613 int err;
6614 err = btrfs_del_root_ref(trans, key.objectid,
6615 root->root_key.objectid, parent_ino,
6616 &local_index, name, name_len);
6617 if (err)
6618 btrfs_abort_transaction(trans, err);
6619 } else if (add_backref) {
6620 u64 local_index;
6621 int err;
6622
6623 err = btrfs_del_inode_ref(trans, root, name, name_len,
6624 ino, parent_ino, &local_index);
6625 if (err)
6626 btrfs_abort_transaction(trans, err);
6627 }
6628
6629
6630 return ret;
6631 }
6632
6633 static int btrfs_create_common(struct inode *dir, struct dentry *dentry,
6634 struct inode *inode)
6635 {
6636 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
6637 struct btrfs_root *root = BTRFS_I(dir)->root;
6638 struct btrfs_new_inode_args new_inode_args = {
6639 .dir = dir,
6640 .dentry = dentry,
6641 .inode = inode,
6642 };
6643 unsigned int trans_num_items;
6644 struct btrfs_trans_handle *trans;
6645 int err;
6646
6647 err = btrfs_new_inode_prepare(&new_inode_args, &trans_num_items);
6648 if (err)
6649 goto out_inode;
6650
6651 trans = btrfs_start_transaction(root, trans_num_items);
6652 if (IS_ERR(trans)) {
6653 err = PTR_ERR(trans);
6654 goto out_new_inode_args;
6655 }
6656
6657 err = btrfs_create_new_inode(trans, &new_inode_args);
6658 if (!err)
6659 d_instantiate_new(dentry, inode);
6660
6661 btrfs_end_transaction(trans);
6662 btrfs_btree_balance_dirty(fs_info);
6663 out_new_inode_args:
6664 btrfs_new_inode_args_destroy(&new_inode_args);
6665 out_inode:
6666 if (err)
6667 iput(inode);
6668 return err;
6669 }
6670
6671 static int btrfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
6672 struct dentry *dentry, umode_t mode, dev_t rdev)
6673 {
6674 struct inode *inode;
6675
6676 inode = new_inode(dir->i_sb);
6677 if (!inode)
6678 return -ENOMEM;
6679 inode_init_owner(mnt_userns, inode, dir, mode);
6680 inode->i_op = &btrfs_special_inode_operations;
6681 init_special_inode(inode, inode->i_mode, rdev);
6682 return btrfs_create_common(dir, dentry, inode);
6683 }
6684
6685 static int btrfs_create(struct user_namespace *mnt_userns, struct inode *dir,
6686 struct dentry *dentry, umode_t mode, bool excl)
6687 {
6688 struct inode *inode;
6689
6690 inode = new_inode(dir->i_sb);
6691 if (!inode)
6692 return -ENOMEM;
6693 inode_init_owner(mnt_userns, inode, dir, mode);
6694 inode->i_fop = &btrfs_file_operations;
6695 inode->i_op = &btrfs_file_inode_operations;
6696 inode->i_mapping->a_ops = &btrfs_aops;
6697 return btrfs_create_common(dir, dentry, inode);
6698 }
6699
6700 static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
6701 struct dentry *dentry)
6702 {
6703 struct btrfs_trans_handle *trans = NULL;
6704 struct btrfs_root *root = BTRFS_I(dir)->root;
6705 struct inode *inode = d_inode(old_dentry);
6706 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
6707 u64 index;
6708 int err;
6709 int drop_inode = 0;
6710
6711
6712 if (root->root_key.objectid != BTRFS_I(inode)->root->root_key.objectid)
6713 return -EXDEV;
6714
6715 if (inode->i_nlink >= BTRFS_LINK_MAX)
6716 return -EMLINK;
6717
6718 err = btrfs_set_inode_index(BTRFS_I(dir), &index);
6719 if (err)
6720 goto fail;
6721
6722
6723
6724
6725
6726
6727
6728 trans = btrfs_start_transaction(root, inode->i_nlink ? 5 : 6);
6729 if (IS_ERR(trans)) {
6730 err = PTR_ERR(trans);
6731 trans = NULL;
6732 goto fail;
6733 }
6734
6735
6736 BTRFS_I(inode)->dir_index = 0ULL;
6737 inc_nlink(inode);
6738 inode_inc_iversion(inode);
6739 inode->i_ctime = current_time(inode);
6740 ihold(inode);
6741 set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags);
6742
6743 err = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode),
6744 dentry->d_name.name, dentry->d_name.len, 1, index);
6745
6746 if (err) {
6747 drop_inode = 1;
6748 } else {
6749 struct dentry *parent = dentry->d_parent;
6750
6751 err = btrfs_update_inode(trans, root, BTRFS_I(inode));
6752 if (err)
6753 goto fail;
6754 if (inode->i_nlink == 1) {
6755
6756
6757
6758
6759 err = btrfs_orphan_del(trans, BTRFS_I(inode));
6760 if (err)
6761 goto fail;
6762 }
6763 d_instantiate(dentry, inode);
6764 btrfs_log_new_name(trans, old_dentry, NULL, 0, parent);
6765 }
6766
6767 fail:
6768 if (trans)
6769 btrfs_end_transaction(trans);
6770 if (drop_inode) {
6771 inode_dec_link_count(inode);
6772 iput(inode);
6773 }
6774 btrfs_btree_balance_dirty(fs_info);
6775 return err;
6776 }
6777
6778 static int btrfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
6779 struct dentry *dentry, umode_t mode)
6780 {
6781 struct inode *inode;
6782
6783 inode = new_inode(dir->i_sb);
6784 if (!inode)
6785 return -ENOMEM;
6786 inode_init_owner(mnt_userns, inode, dir, S_IFDIR | mode);
6787 inode->i_op = &btrfs_dir_inode_operations;
6788 inode->i_fop = &btrfs_dir_file_operations;
6789 return btrfs_create_common(dir, dentry, inode);
6790 }
6791
6792 static noinline int uncompress_inline(struct btrfs_path *path,
6793 struct page *page,
6794 size_t pg_offset, u64 extent_offset,
6795 struct btrfs_file_extent_item *item)
6796 {
6797 int ret;
6798 struct extent_buffer *leaf = path->nodes[0];
6799 char *tmp;
6800 size_t max_size;
6801 unsigned long inline_size;
6802 unsigned long ptr;
6803 int compress_type;
6804
6805 WARN_ON(pg_offset != 0);
6806 compress_type = btrfs_file_extent_compression(leaf, item);
6807 max_size = btrfs_file_extent_ram_bytes(leaf, item);
6808 inline_size = btrfs_file_extent_inline_item_len(leaf, path->slots[0]);
6809 tmp = kmalloc(inline_size, GFP_NOFS);
6810 if (!tmp)
6811 return -ENOMEM;
6812 ptr = btrfs_file_extent_inline_start(item);
6813
6814 read_extent_buffer(leaf, tmp, ptr, inline_size);
6815
6816 max_size = min_t(unsigned long, PAGE_SIZE, max_size);
6817 ret = btrfs_decompress(compress_type, tmp, page,
6818 extent_offset, inline_size, max_size);
6819
6820
6821
6822
6823
6824
6825
6826
6827
6828 if (max_size + pg_offset < PAGE_SIZE)
6829 memzero_page(page, pg_offset + max_size,
6830 PAGE_SIZE - max_size - pg_offset);
6831 kfree(tmp);
6832 return ret;
6833 }
6834
6835
6836
6837
6838
6839
6840
6841
6842
6843
6844
6845
6846
6847
6848
6849
6850
6851
6852
6853 struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
6854 struct page *page, size_t pg_offset,
6855 u64 start, u64 len)
6856 {
6857 struct btrfs_fs_info *fs_info = inode->root->fs_info;
6858 int ret = 0;
6859 u64 extent_start = 0;
6860 u64 extent_end = 0;
6861 u64 objectid = btrfs_ino(inode);
6862 int extent_type = -1;
6863 struct btrfs_path *path = NULL;
6864 struct btrfs_root *root = inode->root;
6865 struct btrfs_file_extent_item *item;
6866 struct extent_buffer *leaf;
6867 struct btrfs_key found_key;
6868 struct extent_map *em = NULL;
6869 struct extent_map_tree *em_tree = &inode->extent_tree;
6870 struct extent_io_tree *io_tree = &inode->io_tree;
6871
6872 read_lock(&em_tree->lock);
6873 em = lookup_extent_mapping(em_tree, start, len);
6874 read_unlock(&em_tree->lock);
6875
6876 if (em) {
6877 if (em->start > start || em->start + em->len <= start)
6878 free_extent_map(em);
6879 else if (em->block_start == EXTENT_MAP_INLINE && page)
6880 free_extent_map(em);
6881 else
6882 goto out;
6883 }
6884 em = alloc_extent_map();
6885 if (!em) {
6886 ret = -ENOMEM;
6887 goto out;
6888 }
6889 em->start = EXTENT_MAP_HOLE;
6890 em->orig_start = EXTENT_MAP_HOLE;
6891 em->len = (u64)-1;
6892 em->block_len = (u64)-1;
6893
6894 path = btrfs_alloc_path();
6895 if (!path) {
6896 ret = -ENOMEM;
6897 goto out;
6898 }
6899
6900
6901 path->reada = READA_FORWARD;
6902
6903
6904
6905
6906
6907
6908 if (btrfs_is_free_space_inode(inode)) {
6909 path->search_commit_root = 1;
6910 path->skip_locking = 1;
6911 }
6912
6913 ret = btrfs_lookup_file_extent(NULL, root, path, objectid, start, 0);
6914 if (ret < 0) {
6915 goto out;
6916 } else if (ret > 0) {
6917 if (path->slots[0] == 0)
6918 goto not_found;
6919 path->slots[0]--;
6920 ret = 0;
6921 }
6922
6923 leaf = path->nodes[0];
6924 item = btrfs_item_ptr(leaf, path->slots[0],
6925 struct btrfs_file_extent_item);
6926 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
6927 if (found_key.objectid != objectid ||
6928 found_key.type != BTRFS_EXTENT_DATA_KEY) {
6929
6930
6931
6932
6933
6934
6935 extent_end = start;
6936 goto next;
6937 }
6938
6939 extent_type = btrfs_file_extent_type(leaf, item);
6940 extent_start = found_key.offset;
6941 extent_end = btrfs_file_extent_end(path);
6942 if (extent_type == BTRFS_FILE_EXTENT_REG ||
6943 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
6944
6945 if (!S_ISREG(inode->vfs_inode.i_mode)) {
6946 ret = -EUCLEAN;
6947 btrfs_crit(fs_info,
6948 "regular/prealloc extent found for non-regular inode %llu",
6949 btrfs_ino(inode));
6950 goto out;
6951 }
6952 trace_btrfs_get_extent_show_fi_regular(inode, leaf, item,
6953 extent_start);
6954 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
6955 trace_btrfs_get_extent_show_fi_inline(inode, leaf, item,
6956 path->slots[0],
6957 extent_start);
6958 }
6959 next:
6960 if (start >= extent_end) {
6961 path->slots[0]++;
6962 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
6963 ret = btrfs_next_leaf(root, path);
6964 if (ret < 0)
6965 goto out;
6966 else if (ret > 0)
6967 goto not_found;
6968
6969 leaf = path->nodes[0];
6970 }
6971 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
6972 if (found_key.objectid != objectid ||
6973 found_key.type != BTRFS_EXTENT_DATA_KEY)
6974 goto not_found;
6975 if (start + len <= found_key.offset)
6976 goto not_found;
6977 if (start > found_key.offset)
6978 goto next;
6979
6980
6981 em->start = start;
6982 em->orig_start = start;
6983 em->len = found_key.offset - start;
6984 em->block_start = EXTENT_MAP_HOLE;
6985 goto insert;
6986 }
6987
6988 btrfs_extent_item_to_extent_map(inode, path, item, !page, em);
6989
6990 if (extent_type == BTRFS_FILE_EXTENT_REG ||
6991 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
6992 goto insert;
6993 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
6994 unsigned long ptr;
6995 char *map;
6996 size_t size;
6997 size_t extent_offset;
6998 size_t copy_size;
6999
7000 if (!page)
7001 goto out;
7002
7003 size = btrfs_file_extent_ram_bytes(leaf, item);
7004 extent_offset = page_offset(page) + pg_offset - extent_start;
7005 copy_size = min_t(u64, PAGE_SIZE - pg_offset,
7006 size - extent_offset);
7007 em->start = extent_start + extent_offset;
7008 em->len = ALIGN(copy_size, fs_info->sectorsize);
7009 em->orig_block_len = em->len;
7010 em->orig_start = em->start;
7011 ptr = btrfs_file_extent_inline_start(item) + extent_offset;
7012
7013 if (!PageUptodate(page)) {
7014 if (btrfs_file_extent_compression(leaf, item) !=
7015 BTRFS_COMPRESS_NONE) {
7016 ret = uncompress_inline(path, page, pg_offset,
7017 extent_offset, item);
7018 if (ret)
7019 goto out;
7020 } else {
7021 map = kmap_local_page(page);
7022 read_extent_buffer(leaf, map + pg_offset, ptr,
7023 copy_size);
7024 if (pg_offset + copy_size < PAGE_SIZE) {
7025 memset(map + pg_offset + copy_size, 0,
7026 PAGE_SIZE - pg_offset -
7027 copy_size);
7028 }
7029 kunmap_local(map);
7030 }
7031 flush_dcache_page(page);
7032 }
7033 set_extent_uptodate(io_tree, em->start,
7034 extent_map_end(em) - 1, NULL, GFP_NOFS);
7035 goto insert;
7036 }
7037 not_found:
7038 em->start = start;
7039 em->orig_start = start;
7040 em->len = len;
7041 em->block_start = EXTENT_MAP_HOLE;
7042 insert:
7043 ret = 0;
7044 btrfs_release_path(path);
7045 if (em->start > start || extent_map_end(em) <= start) {
7046 btrfs_err(fs_info,
7047 "bad extent! em: [%llu %llu] passed [%llu %llu]",
7048 em->start, em->len, start, len);
7049 ret = -EIO;
7050 goto out;
7051 }
7052
7053 write_lock(&em_tree->lock);
7054 ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, start, len);
7055 write_unlock(&em_tree->lock);
7056 out:
7057 btrfs_free_path(path);
7058
7059 trace_btrfs_get_extent(root, inode, em);
7060
7061 if (ret) {
7062 free_extent_map(em);
7063 return ERR_PTR(ret);
7064 }
7065 return em;
7066 }
7067
7068 struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
7069 u64 start, u64 len)
7070 {
7071 struct extent_map *em;
7072 struct extent_map *hole_em = NULL;
7073 u64 delalloc_start = start;
7074 u64 end;
7075 u64 delalloc_len;
7076 u64 delalloc_end;
7077 int err = 0;
7078
7079 em = btrfs_get_extent(inode, NULL, 0, start, len);
7080 if (IS_ERR(em))
7081 return em;
7082
7083
7084
7085
7086
7087
7088 if (em->block_start != EXTENT_MAP_HOLE &&
7089 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
7090 return em;
7091 else
7092 hole_em = em;
7093
7094
7095 end = start + len;
7096 if (end < start)
7097 end = (u64)-1;
7098 else
7099 end -= 1;
7100
7101 em = NULL;
7102
7103
7104 delalloc_len = count_range_bits(&inode->io_tree, &delalloc_start,
7105 end, len, EXTENT_DELALLOC, 1);
7106 delalloc_end = delalloc_start + delalloc_len;
7107 if (delalloc_end < delalloc_start)
7108 delalloc_end = (u64)-1;
7109
7110
7111
7112
7113
7114 if (delalloc_start > end || delalloc_end <= start) {
7115 em = hole_em;
7116 hole_em = NULL;
7117 goto out;
7118 }
7119
7120
7121
7122
7123
7124 delalloc_start = max(start, delalloc_start);
7125 delalloc_len = delalloc_end - delalloc_start;
7126
7127 if (delalloc_len > 0) {
7128 u64 hole_start;
7129 u64 hole_len;
7130 const u64 hole_end = extent_map_end(hole_em);
7131
7132 em = alloc_extent_map();
7133 if (!em) {
7134 err = -ENOMEM;
7135 goto out;
7136 }
7137
7138 ASSERT(hole_em);
7139
7140
7141
7142
7143
7144
7145
7146 if (hole_end <= start || hole_em->start > end) {
7147 free_extent_map(hole_em);
7148 hole_em = NULL;
7149 } else {
7150 hole_start = max(hole_em->start, start);
7151 hole_len = hole_end - hole_start;
7152 }
7153
7154 if (hole_em && delalloc_start > hole_start) {
7155
7156
7157
7158
7159
7160 em->len = min(hole_len, delalloc_start - hole_start);
7161 em->start = hole_start;
7162 em->orig_start = hole_start;
7163
7164
7165
7166
7167 em->block_start = hole_em->block_start;
7168 em->block_len = hole_len;
7169 if (test_bit(EXTENT_FLAG_PREALLOC, &hole_em->flags))
7170 set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
7171 } else {
7172
7173
7174
7175
7176 em->start = delalloc_start;
7177 em->len = delalloc_len;
7178 em->orig_start = delalloc_start;
7179 em->block_start = EXTENT_MAP_DELALLOC;
7180 em->block_len = delalloc_len;
7181 }
7182 } else {
7183 return hole_em;
7184 }
7185 out:
7186
7187 free_extent_map(hole_em);
7188 if (err) {
7189 free_extent_map(em);
7190 return ERR_PTR(err);
7191 }
7192 return em;
7193 }
7194
7195 static struct extent_map *btrfs_create_dio_extent(struct btrfs_inode *inode,
7196 const u64 start,
7197 const u64 len,
7198 const u64 orig_start,
7199 const u64 block_start,
7200 const u64 block_len,
7201 const u64 orig_block_len,
7202 const u64 ram_bytes,
7203 const int type)
7204 {
7205 struct extent_map *em = NULL;
7206 int ret;
7207
7208 if (type != BTRFS_ORDERED_NOCOW) {
7209 em = create_io_em(inode, start, len, orig_start, block_start,
7210 block_len, orig_block_len, ram_bytes,
7211 BTRFS_COMPRESS_NONE,
7212 type);
7213 if (IS_ERR(em))
7214 goto out;
7215 }
7216 ret = btrfs_add_ordered_extent(inode, start, len, len, block_start,
7217 block_len, 0,
7218 (1 << type) |
7219 (1 << BTRFS_ORDERED_DIRECT),
7220 BTRFS_COMPRESS_NONE);
7221 if (ret) {
7222 if (em) {
7223 free_extent_map(em);
7224 btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
7225 }
7226 em = ERR_PTR(ret);
7227 }
7228 out:
7229
7230 return em;
7231 }
7232
7233 static struct extent_map *btrfs_new_extent_direct(struct btrfs_inode *inode,
7234 u64 start, u64 len)
7235 {
7236 struct btrfs_root *root = inode->root;
7237 struct btrfs_fs_info *fs_info = root->fs_info;
7238 struct extent_map *em;
7239 struct btrfs_key ins;
7240 u64 alloc_hint;
7241 int ret;
7242
7243 alloc_hint = get_extent_allocation_hint(inode, start, len);
7244 ret = btrfs_reserve_extent(root, len, len, fs_info->sectorsize,
7245 0, alloc_hint, &ins, 1, 1);
7246 if (ret)
7247 return ERR_PTR(ret);
7248
7249 em = btrfs_create_dio_extent(inode, start, ins.offset, start,
7250 ins.objectid, ins.offset, ins.offset,
7251 ins.offset, BTRFS_ORDERED_REGULAR);
7252 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
7253 if (IS_ERR(em))
7254 btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset,
7255 1);
7256
7257 return em;
7258 }
7259
7260 static bool btrfs_extent_readonly(struct btrfs_fs_info *fs_info, u64 bytenr)
7261 {
7262 struct btrfs_block_group *block_group;
7263 bool readonly = false;
7264
7265 block_group = btrfs_lookup_block_group(fs_info, bytenr);
7266 if (!block_group || block_group->ro)
7267 readonly = true;
7268 if (block_group)
7269 btrfs_put_block_group(block_group);
7270 return readonly;
7271 }
7272
7273
7274
7275
7276
7277
7278
7279
7280
7281
7282
7283
7284
7285
7286
7287
7288
7289
7290
7291
7292
7293 noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
7294 u64 *orig_start, u64 *orig_block_len,
7295 u64 *ram_bytes, bool strict)
7296 {
7297 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
7298 struct can_nocow_file_extent_args nocow_args = { 0 };
7299 struct btrfs_path *path;
7300 int ret;
7301 struct extent_buffer *leaf;
7302 struct btrfs_root *root = BTRFS_I(inode)->root;
7303 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
7304 struct btrfs_file_extent_item *fi;
7305 struct btrfs_key key;
7306 int found_type;
7307
7308 path = btrfs_alloc_path();
7309 if (!path)
7310 return -ENOMEM;
7311
7312 ret = btrfs_lookup_file_extent(NULL, root, path,
7313 btrfs_ino(BTRFS_I(inode)), offset, 0);
7314 if (ret < 0)
7315 goto out;
7316
7317 if (ret == 1) {
7318 if (path->slots[0] == 0) {
7319
7320 ret = 0;
7321 goto out;
7322 }
7323 path->slots[0]--;
7324 }
7325 ret = 0;
7326 leaf = path->nodes[0];
7327 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
7328 if (key.objectid != btrfs_ino(BTRFS_I(inode)) ||
7329 key.type != BTRFS_EXTENT_DATA_KEY) {
7330
7331 goto out;
7332 }
7333
7334 if (key.offset > offset) {
7335
7336 goto out;
7337 }
7338
7339 if (btrfs_file_extent_end(path) <= offset)
7340 goto out;
7341
7342 fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item);
7343 found_type = btrfs_file_extent_type(leaf, fi);
7344 if (ram_bytes)
7345 *ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
7346
7347 nocow_args.start = offset;
7348 nocow_args.end = offset + *len - 1;
7349 nocow_args.strict = strict;
7350 nocow_args.free_path = true;
7351
7352 ret = can_nocow_file_extent(path, &key, BTRFS_I(inode), &nocow_args);
7353
7354 path = NULL;
7355
7356 if (ret != 1) {
7357
7358 ret = 0;
7359 goto out;
7360 }
7361
7362 ret = 0;
7363 if (btrfs_extent_readonly(fs_info, nocow_args.disk_bytenr))
7364 goto out;
7365
7366 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
7367 found_type == BTRFS_FILE_EXTENT_PREALLOC) {
7368 u64 range_end;
7369
7370 range_end = round_up(offset + nocow_args.num_bytes,
7371 root->fs_info->sectorsize) - 1;
7372 ret = test_range_bit(io_tree, offset, range_end,
7373 EXTENT_DELALLOC, 0, NULL);
7374 if (ret) {
7375 ret = -EAGAIN;
7376 goto out;
7377 }
7378 }
7379
7380 if (orig_start)
7381 *orig_start = key.offset - nocow_args.extent_offset;
7382 if (orig_block_len)
7383 *orig_block_len = nocow_args.disk_num_bytes;
7384
7385 *len = nocow_args.num_bytes;
7386 ret = 1;
7387 out:
7388 btrfs_free_path(path);
7389 return ret;
7390 }
7391
7392 static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
7393 struct extent_state **cached_state,
7394 unsigned int iomap_flags)
7395 {
7396 const bool writing = (iomap_flags & IOMAP_WRITE);
7397 const bool nowait = (iomap_flags & IOMAP_NOWAIT);
7398 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
7399 struct btrfs_ordered_extent *ordered;
7400 int ret = 0;
7401
7402 while (1) {
7403 if (nowait) {
7404 if (!try_lock_extent(io_tree, lockstart, lockend))
7405 return -EAGAIN;
7406 } else {
7407 lock_extent_bits(io_tree, lockstart, lockend, cached_state);
7408 }
7409
7410
7411
7412
7413
7414 ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), lockstart,
7415 lockend - lockstart + 1);
7416
7417
7418
7419
7420
7421
7422
7423
7424 if (!ordered &&
7425 (!writing || !filemap_range_has_page(inode->i_mapping,
7426 lockstart, lockend)))
7427 break;
7428
7429 unlock_extent_cached(io_tree, lockstart, lockend, cached_state);
7430
7431 if (ordered) {
7432 if (nowait) {
7433 btrfs_put_ordered_extent(ordered);
7434 ret = -EAGAIN;
7435 break;
7436 }
7437
7438
7439
7440
7441
7442
7443
7444
7445
7446
7447
7448
7449
7450
7451
7452 if (writing ||
7453 test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags))
7454 btrfs_start_ordered_extent(ordered, 1);
7455 else
7456 ret = nowait ? -EAGAIN : -ENOTBLK;
7457 btrfs_put_ordered_extent(ordered);
7458 } else {
7459
7460
7461
7462
7463
7464
7465
7466
7467
7468
7469
7470
7471
7472 ret = nowait ? -EAGAIN : -ENOTBLK;
7473 }
7474
7475 if (ret)
7476 break;
7477
7478 cond_resched();
7479 }
7480
7481 return ret;
7482 }
7483
7484
7485 static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
7486 u64 len, u64 orig_start, u64 block_start,
7487 u64 block_len, u64 orig_block_len,
7488 u64 ram_bytes, int compress_type,
7489 int type)
7490 {
7491 struct extent_map_tree *em_tree;
7492 struct extent_map *em;
7493 int ret;
7494
7495 ASSERT(type == BTRFS_ORDERED_PREALLOC ||
7496 type == BTRFS_ORDERED_COMPRESSED ||
7497 type == BTRFS_ORDERED_NOCOW ||
7498 type == BTRFS_ORDERED_REGULAR);
7499
7500 em_tree = &inode->extent_tree;
7501 em = alloc_extent_map();
7502 if (!em)
7503 return ERR_PTR(-ENOMEM);
7504
7505 em->start = start;
7506 em->orig_start = orig_start;
7507 em->len = len;
7508 em->block_len = block_len;
7509 em->block_start = block_start;
7510 em->orig_block_len = orig_block_len;
7511 em->ram_bytes = ram_bytes;
7512 em->generation = -1;
7513 set_bit(EXTENT_FLAG_PINNED, &em->flags);
7514 if (type == BTRFS_ORDERED_PREALLOC) {
7515 set_bit(EXTENT_FLAG_FILLING, &em->flags);
7516 } else if (type == BTRFS_ORDERED_COMPRESSED) {
7517 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
7518 em->compress_type = compress_type;
7519 }
7520
7521 do {
7522 btrfs_drop_extent_cache(inode, em->start,
7523 em->start + em->len - 1, 0);
7524 write_lock(&em_tree->lock);
7525 ret = add_extent_mapping(em_tree, em, 1);
7526 write_unlock(&em_tree->lock);
7527
7528
7529
7530
7531 } while (ret == -EEXIST);
7532
7533 if (ret) {
7534 free_extent_map(em);
7535 return ERR_PTR(ret);
7536 }
7537
7538
7539 return em;
7540 }
7541
7542
7543 static int btrfs_get_blocks_direct_write(struct extent_map **map,
7544 struct inode *inode,
7545 struct btrfs_dio_data *dio_data,
7546 u64 start, u64 len,
7547 unsigned int iomap_flags)
7548 {
7549 const bool nowait = (iomap_flags & IOMAP_NOWAIT);
7550 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
7551 struct extent_map *em = *map;
7552 int type;
7553 u64 block_start, orig_start, orig_block_len, ram_bytes;
7554 struct btrfs_block_group *bg;
7555 bool can_nocow = false;
7556 bool space_reserved = false;
7557 u64 prev_len;
7558 int ret = 0;
7559
7560
7561
7562
7563
7564
7565
7566
7567
7568
7569 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
7570 ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
7571 em->block_start != EXTENT_MAP_HOLE)) {
7572 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
7573 type = BTRFS_ORDERED_PREALLOC;
7574 else
7575 type = BTRFS_ORDERED_NOCOW;
7576 len = min(len, em->len - (start - em->start));
7577 block_start = em->block_start + (start - em->start);
7578
7579 if (can_nocow_extent(inode, start, &len, &orig_start,
7580 &orig_block_len, &ram_bytes, false) == 1) {
7581 bg = btrfs_inc_nocow_writers(fs_info, block_start);
7582 if (bg)
7583 can_nocow = true;
7584 }
7585 }
7586
7587 prev_len = len;
7588 if (can_nocow) {
7589 struct extent_map *em2;
7590
7591
7592 ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), len, len,
7593 nowait);
7594 if (ret < 0) {
7595
7596 free_extent_map(em);
7597 *map = NULL;
7598 btrfs_dec_nocow_writers(bg);
7599 if (nowait && (ret == -ENOSPC || ret == -EDQUOT))
7600 ret = -EAGAIN;
7601 goto out;
7602 }
7603 space_reserved = true;
7604
7605 em2 = btrfs_create_dio_extent(BTRFS_I(inode), start, len,
7606 orig_start, block_start,
7607 len, orig_block_len,
7608 ram_bytes, type);
7609 btrfs_dec_nocow_writers(bg);
7610 if (type == BTRFS_ORDERED_PREALLOC) {
7611 free_extent_map(em);
7612 *map = em2;
7613 em = em2;
7614 }
7615
7616 if (IS_ERR(em2)) {
7617 ret = PTR_ERR(em2);
7618 goto out;
7619 }
7620
7621 dio_data->nocow_done = true;
7622 } else {
7623
7624 free_extent_map(em);
7625 *map = NULL;
7626
7627 if (nowait)
7628 return -EAGAIN;
7629
7630
7631
7632
7633
7634 if (!dio_data->data_space_reserved)
7635 return -ENOSPC;
7636
7637
7638
7639
7640
7641 ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), len, len,
7642 false);
7643 if (ret < 0)
7644 goto out;
7645 space_reserved = true;
7646
7647 em = btrfs_new_extent_direct(BTRFS_I(inode), start, len);
7648 if (IS_ERR(em)) {
7649 ret = PTR_ERR(em);
7650 goto out;
7651 }
7652 *map = em;
7653 len = min(len, em->len - (start - em->start));
7654 if (len < prev_len)
7655 btrfs_delalloc_release_metadata(BTRFS_I(inode),
7656 prev_len - len, true);
7657 }
7658
7659
7660
7661
7662
7663 btrfs_delalloc_release_extents(BTRFS_I(inode), prev_len);
7664
7665
7666
7667
7668
7669 if (start + len > i_size_read(inode))
7670 i_size_write(inode, start + len);
7671 out:
7672 if (ret && space_reserved) {
7673 btrfs_delalloc_release_extents(BTRFS_I(inode), len);
7674 btrfs_delalloc_release_metadata(BTRFS_I(inode), len, true);
7675 }
7676 return ret;
7677 }
7678
7679 static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
7680 loff_t length, unsigned int flags, struct iomap *iomap,
7681 struct iomap *srcmap)
7682 {
7683 struct iomap_iter *iter = container_of(iomap, struct iomap_iter, iomap);
7684 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
7685 struct extent_map *em;
7686 struct extent_state *cached_state = NULL;
7687 struct btrfs_dio_data *dio_data = iter->private;
7688 u64 lockstart, lockend;
7689 const bool write = !!(flags & IOMAP_WRITE);
7690 int ret = 0;
7691 u64 len = length;
7692 const u64 data_alloc_len = length;
7693 bool unlock_extents = false;
7694
7695
7696
7697
7698
7699
7700
7701
7702
7703
7704
7705
7706 if (!write && (flags & IOMAP_NOWAIT) && length > PAGE_SIZE)
7707 return -EAGAIN;
7708
7709
7710
7711
7712
7713 if (!write)
7714 len = min_t(u64, len, fs_info->sectorsize * BTRFS_MAX_BIO_SECTORS);
7715
7716 lockstart = start;
7717 lockend = start + len - 1;
7718
7719
7720
7721
7722
7723
7724
7725
7726
7727
7728
7729
7730
7731
7732
7733
7734
7735
7736
7737 if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
7738 &BTRFS_I(inode)->runtime_flags)) {
7739 if (flags & IOMAP_NOWAIT) {
7740 if (filemap_range_needs_writeback(inode->i_mapping,
7741 lockstart, lockend))
7742 return -EAGAIN;
7743 } else {
7744 ret = filemap_fdatawrite_range(inode->i_mapping, start,
7745 start + length - 1);
7746 if (ret)
7747 return ret;
7748 }
7749 }
7750
7751 memset(dio_data, 0, sizeof(*dio_data));
7752
7753
7754
7755
7756
7757
7758
7759
7760
7761
7762 if (write && !(flags & IOMAP_NOWAIT)) {
7763 ret = btrfs_check_data_free_space(BTRFS_I(inode),
7764 &dio_data->data_reserved,
7765 start, data_alloc_len);
7766 if (!ret)
7767 dio_data->data_space_reserved = true;
7768 else if (ret && !(BTRFS_I(inode)->flags &
7769 (BTRFS_INODE_NODATACOW | BTRFS_INODE_PREALLOC)))
7770 goto err;
7771 }
7772
7773
7774
7775
7776
7777
7778 ret = lock_extent_direct(inode, lockstart, lockend, &cached_state, flags);
7779 if (ret < 0)
7780 goto err;
7781
7782 em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len);
7783 if (IS_ERR(em)) {
7784 ret = PTR_ERR(em);
7785 goto unlock_err;
7786 }
7787
7788
7789
7790
7791
7792
7793
7794
7795
7796
7797
7798
7799
7800
7801
7802 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) ||
7803 em->block_start == EXTENT_MAP_INLINE) {
7804 free_extent_map(em);
7805
7806
7807
7808
7809
7810
7811
7812
7813
7814
7815
7816
7817 ret = (flags & IOMAP_NOWAIT) ? -EAGAIN : -ENOTBLK;
7818 goto unlock_err;
7819 }
7820
7821 len = min(len, em->len - (start - em->start));
7822
7823
7824
7825
7826
7827
7828
7829
7830
7831
7832
7833
7834
7835
7836
7837
7838
7839
7840
7841
7842
7843
7844 if ((flags & IOMAP_NOWAIT) && len < length) {
7845 free_extent_map(em);
7846 ret = -EAGAIN;
7847 goto unlock_err;
7848 }
7849
7850 if (write) {
7851 ret = btrfs_get_blocks_direct_write(&em, inode, dio_data,
7852 start, len, flags);
7853 if (ret < 0)
7854 goto unlock_err;
7855 unlock_extents = true;
7856
7857 len = min(len, em->len - (start - em->start));
7858 if (dio_data->data_space_reserved) {
7859 u64 release_offset;
7860 u64 release_len = 0;
7861
7862 if (dio_data->nocow_done) {
7863 release_offset = start;
7864 release_len = data_alloc_len;
7865 } else if (len < data_alloc_len) {
7866 release_offset = start + len;
7867 release_len = data_alloc_len - len;
7868 }
7869
7870 if (release_len > 0)
7871 btrfs_free_reserved_data_space(BTRFS_I(inode),
7872 dio_data->data_reserved,
7873 release_offset,
7874 release_len);
7875 }
7876 } else {
7877
7878
7879
7880
7881 lockstart = start + len;
7882 if (lockstart < lockend)
7883 unlock_extents = true;
7884 }
7885
7886 if (unlock_extents)
7887 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
7888 lockstart, lockend, &cached_state);
7889 else
7890 free_extent_state(cached_state);
7891
7892
7893
7894
7895
7896
7897 if ((em->block_start == EXTENT_MAP_HOLE) ||
7898 (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) && !write)) {
7899 iomap->addr = IOMAP_NULL_ADDR;
7900 iomap->type = IOMAP_HOLE;
7901 } else {
7902 iomap->addr = em->block_start + (start - em->start);
7903 iomap->type = IOMAP_MAPPED;
7904 }
7905 iomap->offset = start;
7906 iomap->bdev = fs_info->fs_devices->latest_dev->bdev;
7907 iomap->length = len;
7908
7909 if (write && btrfs_use_zone_append(BTRFS_I(inode), em->block_start))
7910 iomap->flags |= IOMAP_F_ZONE_APPEND;
7911
7912 free_extent_map(em);
7913
7914 return 0;
7915
7916 unlock_err:
7917 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
7918 &cached_state);
7919 err:
7920 if (dio_data->data_space_reserved) {
7921 btrfs_free_reserved_data_space(BTRFS_I(inode),
7922 dio_data->data_reserved,
7923 start, data_alloc_len);
7924 extent_changeset_free(dio_data->data_reserved);
7925 }
7926
7927 return ret;
7928 }
7929
7930 static int btrfs_dio_iomap_end(struct inode *inode, loff_t pos, loff_t length,
7931 ssize_t written, unsigned int flags, struct iomap *iomap)
7932 {
7933 struct iomap_iter *iter = container_of(iomap, struct iomap_iter, iomap);
7934 struct btrfs_dio_data *dio_data = iter->private;
7935 size_t submitted = dio_data->submitted;
7936 const bool write = !!(flags & IOMAP_WRITE);
7937 int ret = 0;
7938
7939 if (!write && (iomap->type == IOMAP_HOLE)) {
7940
7941 unlock_extent(&BTRFS_I(inode)->io_tree, pos, pos + length - 1);
7942 return 0;
7943 }
7944
7945 if (submitted < length) {
7946 pos += submitted;
7947 length -= submitted;
7948 if (write)
7949 btrfs_mark_ordered_io_finished(BTRFS_I(inode), NULL,
7950 pos, length, false);
7951 else
7952 unlock_extent(&BTRFS_I(inode)->io_tree, pos,
7953 pos + length - 1);
7954 ret = -ENOTBLK;
7955 }
7956
7957 if (write)
7958 extent_changeset_free(dio_data->data_reserved);
7959 return ret;
7960 }
7961
7962 static void btrfs_dio_private_put(struct btrfs_dio_private *dip)
7963 {
7964
7965
7966
7967
7968 if (!refcount_dec_and_test(&dip->refs))
7969 return;
7970
7971 if (btrfs_op(&dip->bio) == BTRFS_MAP_WRITE) {
7972 btrfs_mark_ordered_io_finished(BTRFS_I(dip->inode), NULL,
7973 dip->file_offset, dip->bytes,
7974 !dip->bio.bi_status);
7975 } else {
7976 unlock_extent(&BTRFS_I(dip->inode)->io_tree,
7977 dip->file_offset,
7978 dip->file_offset + dip->bytes - 1);
7979 }
7980
7981 kfree(dip->csums);
7982 bio_endio(&dip->bio);
7983 }
7984
7985 static void submit_dio_repair_bio(struct inode *inode, struct bio *bio,
7986 int mirror_num,
7987 enum btrfs_compression_type compress_type)
7988 {
7989 struct btrfs_dio_private *dip = bio->bi_private;
7990 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
7991
7992 BUG_ON(bio_op(bio) == REQ_OP_WRITE);
7993
7994 refcount_inc(&dip->refs);
7995 btrfs_submit_bio(fs_info, bio, mirror_num);
7996 }
7997
7998 static blk_status_t btrfs_check_read_dio_bio(struct btrfs_dio_private *dip,
7999 struct btrfs_bio *bbio,
8000 const bool uptodate)
8001 {
8002 struct inode *inode = dip->inode;
8003 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
8004 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
8005 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
8006 const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM);
8007 blk_status_t err = BLK_STS_OK;
8008 struct bvec_iter iter;
8009 struct bio_vec bv;
8010 u32 offset;
8011
8012 btrfs_bio_for_each_sector(fs_info, bv, bbio, iter, offset) {
8013 u64 start = bbio->file_offset + offset;
8014
8015 if (uptodate &&
8016 (!csum || !btrfs_check_data_csum(inode, bbio, offset, bv.bv_page,
8017 bv.bv_offset))) {
8018 clean_io_failure(fs_info, failure_tree, io_tree, start,
8019 bv.bv_page, btrfs_ino(BTRFS_I(inode)),
8020 bv.bv_offset);
8021 } else {
8022 int ret;
8023
8024 ret = btrfs_repair_one_sector(inode, bbio, offset,
8025 bv.bv_page, bv.bv_offset,
8026 submit_dio_repair_bio);
8027 if (ret)
8028 err = errno_to_blk_status(ret);
8029 }
8030 }
8031
8032 return err;
8033 }
8034
8035 static blk_status_t btrfs_submit_bio_start_direct_io(struct inode *inode,
8036 struct bio *bio,
8037 u64 dio_file_offset)
8038 {
8039 return btrfs_csum_one_bio(BTRFS_I(inode), bio, dio_file_offset, false);
8040 }
8041
8042 static void btrfs_end_dio_bio(struct bio *bio)
8043 {
8044 struct btrfs_dio_private *dip = bio->bi_private;
8045 struct btrfs_bio *bbio = btrfs_bio(bio);
8046 blk_status_t err = bio->bi_status;
8047
8048 if (err)
8049 btrfs_warn(BTRFS_I(dip->inode)->root->fs_info,
8050 "direct IO failed ino %llu rw %d,%u sector %#Lx len %u err no %d",
8051 btrfs_ino(BTRFS_I(dip->inode)), bio_op(bio),
8052 bio->bi_opf, bio->bi_iter.bi_sector,
8053 bio->bi_iter.bi_size, err);
8054
8055 if (bio_op(bio) == REQ_OP_READ)
8056 err = btrfs_check_read_dio_bio(dip, bbio, !err);
8057
8058 if (err)
8059 dip->bio.bi_status = err;
8060
8061 btrfs_record_physical_zoned(dip->inode, bbio->file_offset, bio);
8062
8063 bio_put(bio);
8064 btrfs_dio_private_put(dip);
8065 }
8066
8067 static void btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
8068 u64 file_offset, int async_submit)
8069 {
8070 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
8071 struct btrfs_dio_private *dip = bio->bi_private;
8072 blk_status_t ret;
8073
8074
8075 if (btrfs_op(bio) == BTRFS_MAP_READ)
8076 btrfs_bio(bio)->iter = bio->bi_iter;
8077
8078 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
8079 goto map;
8080
8081 if (btrfs_op(bio) == BTRFS_MAP_WRITE) {
8082
8083 if (async_submit && !atomic_read(&BTRFS_I(inode)->sync_writers) &&
8084 btrfs_wq_submit_bio(inode, bio, 0, file_offset,
8085 btrfs_submit_bio_start_direct_io))
8086 return;
8087
8088
8089
8090
8091
8092 ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, file_offset, false);
8093 if (ret) {
8094 bio->bi_status = ret;
8095 bio_endio(bio);
8096 return;
8097 }
8098 } else {
8099 btrfs_bio(bio)->csum = btrfs_csum_ptr(fs_info, dip->csums,
8100 file_offset - dip->file_offset);
8101 }
8102 map:
8103 btrfs_submit_bio(fs_info, bio, 0);
8104 }
8105
8106 static void btrfs_submit_direct(const struct iomap_iter *iter,
8107 struct bio *dio_bio, loff_t file_offset)
8108 {
8109 struct btrfs_dio_private *dip =
8110 container_of(dio_bio, struct btrfs_dio_private, bio);
8111 struct inode *inode = iter->inode;
8112 const bool write = (btrfs_op(dio_bio) == BTRFS_MAP_WRITE);
8113 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
8114 const bool raid56 = (btrfs_data_alloc_profile(fs_info) &
8115 BTRFS_BLOCK_GROUP_RAID56_MASK);
8116 struct bio *bio;
8117 u64 start_sector;
8118 int async_submit = 0;
8119 u64 submit_len;
8120 u64 clone_offset = 0;
8121 u64 clone_len;
8122 u64 logical;
8123 int ret;
8124 blk_status_t status;
8125 struct btrfs_io_geometry geom;
8126 struct btrfs_dio_data *dio_data = iter->private;
8127 struct extent_map *em = NULL;
8128
8129 dip->inode = inode;
8130 dip->file_offset = file_offset;
8131 dip->bytes = dio_bio->bi_iter.bi_size;
8132 refcount_set(&dip->refs, 1);
8133 dip->csums = NULL;
8134
8135 if (!write && !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
8136 unsigned int nr_sectors =
8137 (dio_bio->bi_iter.bi_size >> fs_info->sectorsize_bits);
8138
8139
8140
8141
8142
8143 status = BLK_STS_RESOURCE;
8144 dip->csums = kcalloc(nr_sectors, fs_info->csum_size, GFP_NOFS);
8145 if (!dip)
8146 goto out_err;
8147
8148 status = btrfs_lookup_bio_sums(inode, dio_bio, dip->csums);
8149 if (status != BLK_STS_OK)
8150 goto out_err;
8151 }
8152
8153 start_sector = dio_bio->bi_iter.bi_sector;
8154 submit_len = dio_bio->bi_iter.bi_size;
8155
8156 do {
8157 logical = start_sector << 9;
8158 em = btrfs_get_chunk_map(fs_info, logical, submit_len);
8159 if (IS_ERR(em)) {
8160 status = errno_to_blk_status(PTR_ERR(em));
8161 em = NULL;
8162 goto out_err_em;
8163 }
8164 ret = btrfs_get_io_geometry(fs_info, em, btrfs_op(dio_bio),
8165 logical, &geom);
8166 if (ret) {
8167 status = errno_to_blk_status(ret);
8168 goto out_err_em;
8169 }
8170
8171 clone_len = min(submit_len, geom.len);
8172 ASSERT(clone_len <= UINT_MAX);
8173
8174
8175
8176
8177
8178 bio = btrfs_bio_clone_partial(dio_bio, clone_offset, clone_len);
8179 bio->bi_private = dip;
8180 bio->bi_end_io = btrfs_end_dio_bio;
8181 btrfs_bio(bio)->file_offset = file_offset;
8182
8183 if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
8184 status = extract_ordered_extent(BTRFS_I(inode), bio,
8185 file_offset);
8186 if (status) {
8187 bio_put(bio);
8188 goto out_err;
8189 }
8190 }
8191
8192 ASSERT(submit_len >= clone_len);
8193 submit_len -= clone_len;
8194
8195
8196
8197
8198
8199
8200
8201
8202
8203
8204 if (submit_len > 0) {
8205 refcount_inc(&dip->refs);
8206
8207
8208
8209
8210
8211
8212 if (!raid56)
8213 async_submit = 1;
8214 }
8215
8216 btrfs_submit_dio_bio(bio, inode, file_offset, async_submit);
8217
8218 dio_data->submitted += clone_len;
8219 clone_offset += clone_len;
8220 start_sector += clone_len >> 9;
8221 file_offset += clone_len;
8222
8223 free_extent_map(em);
8224 } while (submit_len > 0);
8225 return;
8226
8227 out_err_em:
8228 free_extent_map(em);
8229 out_err:
8230 dio_bio->bi_status = status;
8231 btrfs_dio_private_put(dip);
8232 }
8233
8234 static const struct iomap_ops btrfs_dio_iomap_ops = {
8235 .iomap_begin = btrfs_dio_iomap_begin,
8236 .iomap_end = btrfs_dio_iomap_end,
8237 };
8238
8239 static const struct iomap_dio_ops btrfs_dio_ops = {
8240 .submit_io = btrfs_submit_direct,
8241 .bio_set = &btrfs_dio_bioset,
8242 };
8243
8244 ssize_t btrfs_dio_rw(struct kiocb *iocb, struct iov_iter *iter, size_t done_before)
8245 {
8246 struct btrfs_dio_data data;
8247
8248 return iomap_dio_rw(iocb, iter, &btrfs_dio_iomap_ops, &btrfs_dio_ops,
8249 IOMAP_DIO_PARTIAL | IOMAP_DIO_NOSYNC,
8250 &data, done_before);
8251 }
8252
8253 static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
8254 u64 start, u64 len)
8255 {
8256 int ret;
8257
8258 ret = fiemap_prep(inode, fieinfo, start, &len, 0);
8259 if (ret)
8260 return ret;
8261
8262 return extent_fiemap(BTRFS_I(inode), fieinfo, start, len);
8263 }
8264
8265 static int btrfs_writepages(struct address_space *mapping,
8266 struct writeback_control *wbc)
8267 {
8268 return extent_writepages(mapping, wbc);
8269 }
8270
8271 static void btrfs_readahead(struct readahead_control *rac)
8272 {
8273 extent_readahead(rac);
8274 }
8275
8276
8277
8278
8279
8280
8281
8282
8283 static void wait_subpage_spinlock(struct page *page)
8284 {
8285 struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb);
8286 struct btrfs_subpage *subpage;
8287
8288 if (!btrfs_is_subpage(fs_info, page))
8289 return;
8290
8291 ASSERT(PagePrivate(page) && page->private);
8292 subpage = (struct btrfs_subpage *)page->private;
8293
8294
8295
8296
8297
8298
8299
8300
8301
8302
8303
8304
8305 spin_lock_irq(&subpage->lock);
8306 spin_unlock_irq(&subpage->lock);
8307 }
8308
8309 static bool __btrfs_release_folio(struct folio *folio, gfp_t gfp_flags)
8310 {
8311 int ret = try_release_extent_mapping(&folio->page, gfp_flags);
8312
8313 if (ret == 1) {
8314 wait_subpage_spinlock(&folio->page);
8315 clear_page_extent_mapped(&folio->page);
8316 }
8317 return ret;
8318 }
8319
8320 static bool btrfs_release_folio(struct folio *folio, gfp_t gfp_flags)
8321 {
8322 if (folio_test_writeback(folio) || folio_test_dirty(folio))
8323 return false;
8324 return __btrfs_release_folio(folio, gfp_flags);
8325 }
8326
8327 #ifdef CONFIG_MIGRATION
8328 static int btrfs_migrate_folio(struct address_space *mapping,
8329 struct folio *dst, struct folio *src,
8330 enum migrate_mode mode)
8331 {
8332 int ret = filemap_migrate_folio(mapping, dst, src, mode);
8333
8334 if (ret != MIGRATEPAGE_SUCCESS)
8335 return ret;
8336
8337 if (folio_test_ordered(src)) {
8338 folio_clear_ordered(src);
8339 folio_set_ordered(dst);
8340 }
8341
8342 return MIGRATEPAGE_SUCCESS;
8343 }
8344 #else
8345 #define btrfs_migrate_folio NULL
8346 #endif
8347
8348 static void btrfs_invalidate_folio(struct folio *folio, size_t offset,
8349 size_t length)
8350 {
8351 struct btrfs_inode *inode = BTRFS_I(folio->mapping->host);
8352 struct btrfs_fs_info *fs_info = inode->root->fs_info;
8353 struct extent_io_tree *tree = &inode->io_tree;
8354 struct extent_state *cached_state = NULL;
8355 u64 page_start = folio_pos(folio);
8356 u64 page_end = page_start + folio_size(folio) - 1;
8357 u64 cur;
8358 int inode_evicting = inode->vfs_inode.i_state & I_FREEING;
8359
8360
8361
8362
8363
8364
8365
8366
8367
8368
8369
8370
8371
8372
8373 folio_wait_writeback(folio);
8374 wait_subpage_spinlock(&folio->page);
8375
8376
8377
8378
8379
8380
8381
8382
8383
8384
8385
8386
8387
8388 if (!(offset == 0 && length == folio_size(folio))) {
8389 btrfs_release_folio(folio, GFP_NOFS);
8390 return;
8391 }
8392
8393 if (!inode_evicting)
8394 lock_extent_bits(tree, page_start, page_end, &cached_state);
8395
8396 cur = page_start;
8397 while (cur < page_end) {
8398 struct btrfs_ordered_extent *ordered;
8399 bool delete_states;
8400 u64 range_end;
8401 u32 range_len;
8402
8403 ordered = btrfs_lookup_first_ordered_range(inode, cur,
8404 page_end + 1 - cur);
8405 if (!ordered) {
8406 range_end = page_end;
8407
8408
8409
8410
8411 delete_states = true;
8412 goto next;
8413 }
8414 if (ordered->file_offset > cur) {
8415
8416
8417
8418
8419
8420
8421 range_end = ordered->file_offset - 1;
8422 delete_states = true;
8423 goto next;
8424 }
8425
8426 range_end = min(ordered->file_offset + ordered->num_bytes - 1,
8427 page_end);
8428 ASSERT(range_end + 1 - cur < U32_MAX);
8429 range_len = range_end + 1 - cur;
8430 if (!btrfs_page_test_ordered(fs_info, &folio->page, cur, range_len)) {
8431
8432
8433
8434
8435
8436
8437 delete_states = false;
8438 goto next;
8439 }
8440 btrfs_page_clear_ordered(fs_info, &folio->page, cur, range_len);
8441
8442
8443
8444
8445
8446
8447
8448
8449
8450 if (!inode_evicting)
8451 clear_extent_bit(tree, cur, range_end,
8452 EXTENT_DELALLOC |
8453 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
8454 EXTENT_DEFRAG, 1, 0, &cached_state);
8455
8456 spin_lock_irq(&inode->ordered_tree.lock);
8457 set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags);
8458 ordered->truncated_len = min(ordered->truncated_len,
8459 cur - ordered->file_offset);
8460 spin_unlock_irq(&inode->ordered_tree.lock);
8461
8462 if (btrfs_dec_test_ordered_pending(inode, &ordered,
8463 cur, range_end + 1 - cur)) {
8464 btrfs_finish_ordered_io(ordered);
8465
8466
8467
8468
8469 delete_states = true;
8470 } else {
8471
8472
8473
8474
8475
8476 delete_states = false;
8477 }
8478 next:
8479 if (ordered)
8480 btrfs_put_ordered_extent(ordered);
8481
8482
8483
8484
8485
8486
8487
8488
8489
8490
8491
8492
8493
8494
8495
8496 btrfs_qgroup_free_data(inode, NULL, cur, range_end + 1 - cur);
8497 if (!inode_evicting) {
8498 clear_extent_bit(tree, cur, range_end, EXTENT_LOCKED |
8499 EXTENT_DELALLOC | EXTENT_UPTODATE |
8500 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1,
8501 delete_states, &cached_state);
8502 }
8503 cur = range_end + 1;
8504 }
8505
8506
8507
8508
8509
8510 ASSERT(!folio_test_ordered(folio));
8511 btrfs_page_clear_checked(fs_info, &folio->page, folio_pos(folio), folio_size(folio));
8512 if (!inode_evicting)
8513 __btrfs_release_folio(folio, GFP_NOFS);
8514 clear_page_extent_mapped(&folio->page);
8515 }
8516
8517
8518
8519
8520
8521
8522
8523
8524
8525
8526
8527
8528
8529
8530
8531
8532 vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf)
8533 {
8534 struct page *page = vmf->page;
8535 struct inode *inode = file_inode(vmf->vma->vm_file);
8536 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
8537 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
8538 struct btrfs_ordered_extent *ordered;
8539 struct extent_state *cached_state = NULL;
8540 struct extent_changeset *data_reserved = NULL;
8541 unsigned long zero_start;
8542 loff_t size;
8543 vm_fault_t ret;
8544 int ret2;
8545 int reserved = 0;
8546 u64 reserved_space;
8547 u64 page_start;
8548 u64 page_end;
8549 u64 end;
8550
8551 reserved_space = PAGE_SIZE;
8552
8553 sb_start_pagefault(inode->i_sb);
8554 page_start = page_offset(page);
8555 page_end = page_start + PAGE_SIZE - 1;
8556 end = page_end;
8557
8558
8559
8560
8561
8562
8563
8564
8565
8566 ret2 = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved,
8567 page_start, reserved_space);
8568 if (!ret2) {
8569 ret2 = file_update_time(vmf->vma->vm_file);
8570 reserved = 1;
8571 }
8572 if (ret2) {
8573 ret = vmf_error(ret2);
8574 if (reserved)
8575 goto out;
8576 goto out_noreserve;
8577 }
8578
8579 ret = VM_FAULT_NOPAGE;
8580 again:
8581 down_read(&BTRFS_I(inode)->i_mmap_lock);
8582 lock_page(page);
8583 size = i_size_read(inode);
8584
8585 if ((page->mapping != inode->i_mapping) ||
8586 (page_start >= size)) {
8587
8588 goto out_unlock;
8589 }
8590 wait_on_page_writeback(page);
8591
8592 lock_extent_bits(io_tree, page_start, page_end, &cached_state);
8593 ret2 = set_page_extent_mapped(page);
8594 if (ret2 < 0) {
8595 ret = vmf_error(ret2);
8596 unlock_extent_cached(io_tree, page_start, page_end, &cached_state);
8597 goto out_unlock;
8598 }
8599
8600
8601
8602
8603
8604 ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), page_start,
8605 PAGE_SIZE);
8606 if (ordered) {
8607 unlock_extent_cached(io_tree, page_start, page_end,
8608 &cached_state);
8609 unlock_page(page);
8610 up_read(&BTRFS_I(inode)->i_mmap_lock);
8611 btrfs_start_ordered_extent(ordered, 1);
8612 btrfs_put_ordered_extent(ordered);
8613 goto again;
8614 }
8615
8616 if (page->index == ((size - 1) >> PAGE_SHIFT)) {
8617 reserved_space = round_up(size - page_start,
8618 fs_info->sectorsize);
8619 if (reserved_space < PAGE_SIZE) {
8620 end = page_start + reserved_space - 1;
8621 btrfs_delalloc_release_space(BTRFS_I(inode),
8622 data_reserved, page_start,
8623 PAGE_SIZE - reserved_space, true);
8624 }
8625 }
8626
8627
8628
8629
8630
8631
8632
8633
8634 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, end,
8635 EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
8636 EXTENT_DEFRAG, 0, 0, &cached_state);
8637
8638 ret2 = btrfs_set_extent_delalloc(BTRFS_I(inode), page_start, end, 0,
8639 &cached_state);
8640 if (ret2) {
8641 unlock_extent_cached(io_tree, page_start, page_end,
8642 &cached_state);
8643 ret = VM_FAULT_SIGBUS;
8644 goto out_unlock;
8645 }
8646
8647
8648 if (page_start + PAGE_SIZE > size)
8649 zero_start = offset_in_page(size);
8650 else
8651 zero_start = PAGE_SIZE;
8652
8653 if (zero_start != PAGE_SIZE)
8654 memzero_page(page, zero_start, PAGE_SIZE - zero_start);
8655
8656 btrfs_page_clear_checked(fs_info, page, page_start, PAGE_SIZE);
8657 btrfs_page_set_dirty(fs_info, page, page_start, end + 1 - page_start);
8658 btrfs_page_set_uptodate(fs_info, page, page_start, end + 1 - page_start);
8659
8660 btrfs_set_inode_last_sub_trans(BTRFS_I(inode));
8661
8662 unlock_extent_cached(io_tree, page_start, page_end, &cached_state);
8663 up_read(&BTRFS_I(inode)->i_mmap_lock);
8664
8665 btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
8666 sb_end_pagefault(inode->i_sb);
8667 extent_changeset_free(data_reserved);
8668 return VM_FAULT_LOCKED;
8669
8670 out_unlock:
8671 unlock_page(page);
8672 up_read(&BTRFS_I(inode)->i_mmap_lock);
8673 out:
8674 btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
8675 btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved, page_start,
8676 reserved_space, (ret != 0));
8677 out_noreserve:
8678 sb_end_pagefault(inode->i_sb);
8679 extent_changeset_free(data_reserved);
8680 return ret;
8681 }
8682
8683 static int btrfs_truncate(struct inode *inode, bool skip_writeback)
8684 {
8685 struct btrfs_truncate_control control = {
8686 .inode = BTRFS_I(inode),
8687 .ino = btrfs_ino(BTRFS_I(inode)),
8688 .min_type = BTRFS_EXTENT_DATA_KEY,
8689 .clear_extent_range = true,
8690 };
8691 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
8692 struct btrfs_root *root = BTRFS_I(inode)->root;
8693 struct btrfs_block_rsv *rsv;
8694 int ret;
8695 struct btrfs_trans_handle *trans;
8696 u64 mask = fs_info->sectorsize - 1;
8697 u64 min_size = btrfs_calc_metadata_size(fs_info, 1);
8698
8699 if (!skip_writeback) {
8700 ret = btrfs_wait_ordered_range(inode, inode->i_size & (~mask),
8701 (u64)-1);
8702 if (ret)
8703 return ret;
8704 }
8705
8706
8707
8708
8709
8710
8711
8712
8713
8714
8715
8716
8717
8718
8719
8720
8721
8722
8723
8724
8725
8726
8727
8728
8729
8730
8731
8732
8733
8734 rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP);
8735 if (!rsv)
8736 return -ENOMEM;
8737 rsv->size = min_size;
8738 rsv->failfast = true;
8739
8740
8741
8742
8743
8744 trans = btrfs_start_transaction(root, 2);
8745 if (IS_ERR(trans)) {
8746 ret = PTR_ERR(trans);
8747 goto out;
8748 }
8749
8750
8751 ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, rsv,
8752 min_size, false);
8753 BUG_ON(ret);
8754
8755 trans->block_rsv = rsv;
8756
8757 while (1) {
8758 struct extent_state *cached_state = NULL;
8759 const u64 new_size = inode->i_size;
8760 const u64 lock_start = ALIGN_DOWN(new_size, fs_info->sectorsize);
8761
8762 control.new_size = new_size;
8763 lock_extent_bits(&BTRFS_I(inode)->io_tree, lock_start, (u64)-1,
8764 &cached_state);
8765
8766
8767
8768
8769
8770 btrfs_drop_extent_cache(BTRFS_I(inode),
8771 ALIGN(new_size, fs_info->sectorsize),
8772 (u64)-1, 0);
8773
8774 ret = btrfs_truncate_inode_items(trans, root, &control);
8775
8776 inode_sub_bytes(inode, control.sub_bytes);
8777 btrfs_inode_safe_disk_i_size_write(BTRFS_I(inode), control.last_size);
8778
8779 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start,
8780 (u64)-1, &cached_state);
8781
8782 trans->block_rsv = &fs_info->trans_block_rsv;
8783 if (ret != -ENOSPC && ret != -EAGAIN)
8784 break;
8785
8786 ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
8787 if (ret)
8788 break;
8789
8790 btrfs_end_transaction(trans);
8791 btrfs_btree_balance_dirty(fs_info);
8792
8793 trans = btrfs_start_transaction(root, 2);
8794 if (IS_ERR(trans)) {
8795 ret = PTR_ERR(trans);
8796 trans = NULL;
8797 break;
8798 }
8799
8800 btrfs_block_rsv_release(fs_info, rsv, -1, NULL);
8801 ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv,
8802 rsv, min_size, false);
8803 BUG_ON(ret);
8804 trans->block_rsv = rsv;
8805 }
8806
8807
8808
8809
8810
8811
8812
8813 if (ret == BTRFS_NEED_TRUNCATE_BLOCK) {
8814 btrfs_end_transaction(trans);
8815 btrfs_btree_balance_dirty(fs_info);
8816
8817 ret = btrfs_truncate_block(BTRFS_I(inode), inode->i_size, 0, 0);
8818 if (ret)
8819 goto out;
8820 trans = btrfs_start_transaction(root, 1);
8821 if (IS_ERR(trans)) {
8822 ret = PTR_ERR(trans);
8823 goto out;
8824 }
8825 btrfs_inode_safe_disk_i_size_write(BTRFS_I(inode), 0);
8826 }
8827
8828 if (trans) {
8829 int ret2;
8830
8831 trans->block_rsv = &fs_info->trans_block_rsv;
8832 ret2 = btrfs_update_inode(trans, root, BTRFS_I(inode));
8833 if (ret2 && !ret)
8834 ret = ret2;
8835
8836 ret2 = btrfs_end_transaction(trans);
8837 if (ret2 && !ret)
8838 ret = ret2;
8839 btrfs_btree_balance_dirty(fs_info);
8840 }
8841 out:
8842 btrfs_free_block_rsv(fs_info, rsv);
8843
8844
8845
8846
8847
8848
8849
8850
8851
8852
8853
8854
8855
8856
8857 if (control.extents_found > 0)
8858 btrfs_set_inode_full_sync(BTRFS_I(inode));
8859
8860 return ret;
8861 }
8862
8863 struct inode *btrfs_new_subvol_inode(struct user_namespace *mnt_userns,
8864 struct inode *dir)
8865 {
8866 struct inode *inode;
8867
8868 inode = new_inode(dir->i_sb);
8869 if (inode) {
8870
8871
8872
8873
8874 inode_init_owner(mnt_userns, inode, NULL,
8875 S_IFDIR | (~current_umask() & S_IRWXUGO));
8876 inode->i_op = &btrfs_dir_inode_operations;
8877 inode->i_fop = &btrfs_dir_file_operations;
8878 }
8879 return inode;
8880 }
8881
8882 struct inode *btrfs_alloc_inode(struct super_block *sb)
8883 {
8884 struct btrfs_fs_info *fs_info = btrfs_sb(sb);
8885 struct btrfs_inode *ei;
8886 struct inode *inode;
8887
8888 ei = alloc_inode_sb(sb, btrfs_inode_cachep, GFP_KERNEL);
8889 if (!ei)
8890 return NULL;
8891
8892 ei->root = NULL;
8893 ei->generation = 0;
8894 ei->last_trans = 0;
8895 ei->last_sub_trans = 0;
8896 ei->logged_trans = 0;
8897 ei->delalloc_bytes = 0;
8898 ei->new_delalloc_bytes = 0;
8899 ei->defrag_bytes = 0;
8900 ei->disk_i_size = 0;
8901 ei->flags = 0;
8902 ei->ro_flags = 0;
8903 ei->csum_bytes = 0;
8904 ei->index_cnt = (u64)-1;
8905 ei->dir_index = 0;
8906 ei->last_unlink_trans = 0;
8907 ei->last_reflink_trans = 0;
8908 ei->last_log_commit = 0;
8909
8910 spin_lock_init(&ei->lock);
8911 ei->outstanding_extents = 0;
8912 if (sb->s_magic != BTRFS_TEST_MAGIC)
8913 btrfs_init_metadata_block_rsv(fs_info, &ei->block_rsv,
8914 BTRFS_BLOCK_RSV_DELALLOC);
8915 ei->runtime_flags = 0;
8916 ei->prop_compress = BTRFS_COMPRESS_NONE;
8917 ei->defrag_compress = BTRFS_COMPRESS_NONE;
8918
8919 ei->delayed_node = NULL;
8920
8921 ei->i_otime.tv_sec = 0;
8922 ei->i_otime.tv_nsec = 0;
8923
8924 inode = &ei->vfs_inode;
8925 extent_map_tree_init(&ei->extent_tree);
8926 extent_io_tree_init(fs_info, &ei->io_tree, IO_TREE_INODE_IO, inode);
8927 extent_io_tree_init(fs_info, &ei->io_failure_tree,
8928 IO_TREE_INODE_IO_FAILURE, inode);
8929 extent_io_tree_init(fs_info, &ei->file_extent_tree,
8930 IO_TREE_INODE_FILE_EXTENT, inode);
8931 ei->io_tree.track_uptodate = true;
8932 ei->io_failure_tree.track_uptodate = true;
8933 atomic_set(&ei->sync_writers, 0);
8934 mutex_init(&ei->log_mutex);
8935 btrfs_ordered_inode_tree_init(&ei->ordered_tree);
8936 INIT_LIST_HEAD(&ei->delalloc_inodes);
8937 INIT_LIST_HEAD(&ei->delayed_iput);
8938 RB_CLEAR_NODE(&ei->rb_node);
8939 init_rwsem(&ei->i_mmap_lock);
8940
8941 return inode;
8942 }
8943
8944 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
8945 void btrfs_test_destroy_inode(struct inode *inode)
8946 {
8947 btrfs_drop_extent_cache(BTRFS_I(inode), 0, (u64)-1, 0);
8948 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
8949 }
8950 #endif
8951
8952 void btrfs_free_inode(struct inode *inode)
8953 {
8954 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
8955 }
8956
8957 void btrfs_destroy_inode(struct inode *vfs_inode)
8958 {
8959 struct btrfs_ordered_extent *ordered;
8960 struct btrfs_inode *inode = BTRFS_I(vfs_inode);
8961 struct btrfs_root *root = inode->root;
8962
8963 WARN_ON(!hlist_empty(&vfs_inode->i_dentry));
8964 WARN_ON(vfs_inode->i_data.nrpages);
8965 WARN_ON(inode->block_rsv.reserved);
8966 WARN_ON(inode->block_rsv.size);
8967 WARN_ON(inode->outstanding_extents);
8968 if (!S_ISDIR(vfs_inode->i_mode)) {
8969 WARN_ON(inode->delalloc_bytes);
8970 WARN_ON(inode->new_delalloc_bytes);
8971 }
8972 WARN_ON(inode->csum_bytes);
8973 WARN_ON(inode->defrag_bytes);
8974
8975
8976
8977
8978
8979
8980 if (!root)
8981 return;
8982
8983 while (1) {
8984 ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1);
8985 if (!ordered)
8986 break;
8987 else {
8988 btrfs_err(root->fs_info,
8989 "found ordered extent %llu %llu on inode cleanup",
8990 ordered->file_offset, ordered->num_bytes);
8991 btrfs_remove_ordered_extent(inode, ordered);
8992 btrfs_put_ordered_extent(ordered);
8993 btrfs_put_ordered_extent(ordered);
8994 }
8995 }
8996 btrfs_qgroup_check_reserved_leak(inode);
8997 inode_tree_del(inode);
8998 btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
8999 btrfs_inode_clear_file_extent_range(inode, 0, (u64)-1);
9000 btrfs_put_root(inode->root);
9001 }
9002
9003 int btrfs_drop_inode(struct inode *inode)
9004 {
9005 struct btrfs_root *root = BTRFS_I(inode)->root;
9006
9007 if (root == NULL)
9008 return 1;
9009
9010
9011 if (btrfs_root_refs(&root->root_item) == 0)
9012 return 1;
9013 else
9014 return generic_drop_inode(inode);
9015 }
9016
9017 static void init_once(void *foo)
9018 {
9019 struct btrfs_inode *ei = foo;
9020
9021 inode_init_once(&ei->vfs_inode);
9022 }
9023
9024 void __cold btrfs_destroy_cachep(void)
9025 {
9026
9027
9028
9029
9030 rcu_barrier();
9031 bioset_exit(&btrfs_dio_bioset);
9032 kmem_cache_destroy(btrfs_inode_cachep);
9033 kmem_cache_destroy(btrfs_trans_handle_cachep);
9034 kmem_cache_destroy(btrfs_path_cachep);
9035 kmem_cache_destroy(btrfs_free_space_cachep);
9036 kmem_cache_destroy(btrfs_free_space_bitmap_cachep);
9037 }
9038
9039 int __init btrfs_init_cachep(void)
9040 {
9041 btrfs_inode_cachep = kmem_cache_create("btrfs_inode",
9042 sizeof(struct btrfs_inode), 0,
9043 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | SLAB_ACCOUNT,
9044 init_once);
9045 if (!btrfs_inode_cachep)
9046 goto fail;
9047
9048 btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle",
9049 sizeof(struct btrfs_trans_handle), 0,
9050 SLAB_TEMPORARY | SLAB_MEM_SPREAD, NULL);
9051 if (!btrfs_trans_handle_cachep)
9052 goto fail;
9053
9054 btrfs_path_cachep = kmem_cache_create("btrfs_path",
9055 sizeof(struct btrfs_path), 0,
9056 SLAB_MEM_SPREAD, NULL);
9057 if (!btrfs_path_cachep)
9058 goto fail;
9059
9060 btrfs_free_space_cachep = kmem_cache_create("btrfs_free_space",
9061 sizeof(struct btrfs_free_space), 0,
9062 SLAB_MEM_SPREAD, NULL);
9063 if (!btrfs_free_space_cachep)
9064 goto fail;
9065
9066 btrfs_free_space_bitmap_cachep = kmem_cache_create("btrfs_free_space_bitmap",
9067 PAGE_SIZE, PAGE_SIZE,
9068 SLAB_MEM_SPREAD, NULL);
9069 if (!btrfs_free_space_bitmap_cachep)
9070 goto fail;
9071
9072 if (bioset_init(&btrfs_dio_bioset, BIO_POOL_SIZE,
9073 offsetof(struct btrfs_dio_private, bio),
9074 BIOSET_NEED_BVECS))
9075 goto fail;
9076
9077 return 0;
9078 fail:
9079 btrfs_destroy_cachep();
9080 return -ENOMEM;
9081 }
9082
9083 static int btrfs_getattr(struct user_namespace *mnt_userns,
9084 const struct path *path, struct kstat *stat,
9085 u32 request_mask, unsigned int flags)
9086 {
9087 u64 delalloc_bytes;
9088 u64 inode_bytes;
9089 struct inode *inode = d_inode(path->dentry);
9090 u32 blocksize = inode->i_sb->s_blocksize;
9091 u32 bi_flags = BTRFS_I(inode)->flags;
9092 u32 bi_ro_flags = BTRFS_I(inode)->ro_flags;
9093
9094 stat->result_mask |= STATX_BTIME;
9095 stat->btime.tv_sec = BTRFS_I(inode)->i_otime.tv_sec;
9096 stat->btime.tv_nsec = BTRFS_I(inode)->i_otime.tv_nsec;
9097 if (bi_flags & BTRFS_INODE_APPEND)
9098 stat->attributes |= STATX_ATTR_APPEND;
9099 if (bi_flags & BTRFS_INODE_COMPRESS)
9100 stat->attributes |= STATX_ATTR_COMPRESSED;
9101 if (bi_flags & BTRFS_INODE_IMMUTABLE)
9102 stat->attributes |= STATX_ATTR_IMMUTABLE;
9103 if (bi_flags & BTRFS_INODE_NODUMP)
9104 stat->attributes |= STATX_ATTR_NODUMP;
9105 if (bi_ro_flags & BTRFS_INODE_RO_VERITY)
9106 stat->attributes |= STATX_ATTR_VERITY;
9107
9108 stat->attributes_mask |= (STATX_ATTR_APPEND |
9109 STATX_ATTR_COMPRESSED |
9110 STATX_ATTR_IMMUTABLE |
9111 STATX_ATTR_NODUMP);
9112
9113 generic_fillattr(mnt_userns, inode, stat);
9114 stat->dev = BTRFS_I(inode)->root->anon_dev;
9115
9116 spin_lock(&BTRFS_I(inode)->lock);
9117 delalloc_bytes = BTRFS_I(inode)->new_delalloc_bytes;
9118 inode_bytes = inode_get_bytes(inode);
9119 spin_unlock(&BTRFS_I(inode)->lock);
9120 stat->blocks = (ALIGN(inode_bytes, blocksize) +
9121 ALIGN(delalloc_bytes, blocksize)) >> 9;
9122 return 0;
9123 }
9124
9125 static int btrfs_rename_exchange(struct inode *old_dir,
9126 struct dentry *old_dentry,
9127 struct inode *new_dir,
9128 struct dentry *new_dentry)
9129 {
9130 struct btrfs_fs_info *fs_info = btrfs_sb(old_dir->i_sb);
9131 struct btrfs_trans_handle *trans;
9132 unsigned int trans_num_items;
9133 struct btrfs_root *root = BTRFS_I(old_dir)->root;
9134 struct btrfs_root *dest = BTRFS_I(new_dir)->root;
9135 struct inode *new_inode = new_dentry->d_inode;
9136 struct inode *old_inode = old_dentry->d_inode;
9137 struct timespec64 ctime = current_time(old_inode);
9138 struct btrfs_rename_ctx old_rename_ctx;
9139 struct btrfs_rename_ctx new_rename_ctx;
9140 u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
9141 u64 new_ino = btrfs_ino(BTRFS_I(new_inode));
9142 u64 old_idx = 0;
9143 u64 new_idx = 0;
9144 int ret;
9145 int ret2;
9146 bool need_abort = false;
9147
9148
9149
9150
9151
9152
9153 if (root != dest &&
9154 (old_ino != BTRFS_FIRST_FREE_OBJECTID ||
9155 new_ino != BTRFS_FIRST_FREE_OBJECTID))
9156 return -EXDEV;
9157
9158
9159 if (old_ino == BTRFS_FIRST_FREE_OBJECTID ||
9160 new_ino == BTRFS_FIRST_FREE_OBJECTID)
9161 down_read(&fs_info->subvol_sem);
9162
9163
9164
9165
9166
9167
9168
9169
9170
9171
9172
9173 trans_num_items = (old_dir == new_dir ? 9 : 10);
9174 if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
9175
9176
9177
9178
9179
9180
9181 trans_num_items += 4;
9182 } else {
9183
9184
9185
9186
9187
9188 trans_num_items += 3;
9189 }
9190 if (new_ino == BTRFS_FIRST_FREE_OBJECTID)
9191 trans_num_items += 4;
9192 else
9193 trans_num_items += 3;
9194 trans = btrfs_start_transaction(root, trans_num_items);
9195 if (IS_ERR(trans)) {
9196 ret = PTR_ERR(trans);
9197 goto out_notrans;
9198 }
9199
9200 if (dest != root) {
9201 ret = btrfs_record_root_in_trans(trans, dest);
9202 if (ret)
9203 goto out_fail;
9204 }
9205
9206
9207
9208
9209
9210 ret = btrfs_set_inode_index(BTRFS_I(new_dir), &old_idx);
9211 if (ret)
9212 goto out_fail;
9213 ret = btrfs_set_inode_index(BTRFS_I(old_dir), &new_idx);
9214 if (ret)
9215 goto out_fail;
9216
9217 BTRFS_I(old_inode)->dir_index = 0ULL;
9218 BTRFS_I(new_inode)->dir_index = 0ULL;
9219
9220
9221 if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
9222
9223 btrfs_set_log_full_commit(trans);
9224 } else {
9225 ret = btrfs_insert_inode_ref(trans, dest,
9226 new_dentry->d_name.name,
9227 new_dentry->d_name.len,
9228 old_ino,
9229 btrfs_ino(BTRFS_I(new_dir)),
9230 old_idx);
9231 if (ret)
9232 goto out_fail;
9233 need_abort = true;
9234 }
9235
9236
9237 if (new_ino == BTRFS_FIRST_FREE_OBJECTID) {
9238
9239 btrfs_set_log_full_commit(trans);
9240 } else {
9241 ret = btrfs_insert_inode_ref(trans, root,
9242 old_dentry->d_name.name,
9243 old_dentry->d_name.len,
9244 new_ino,
9245 btrfs_ino(BTRFS_I(old_dir)),
9246 new_idx);
9247 if (ret) {
9248 if (need_abort)
9249 btrfs_abort_transaction(trans, ret);
9250 goto out_fail;
9251 }
9252 }
9253
9254
9255 inode_inc_iversion(old_dir);
9256 inode_inc_iversion(new_dir);
9257 inode_inc_iversion(old_inode);
9258 inode_inc_iversion(new_inode);
9259 old_dir->i_mtime = ctime;
9260 old_dir->i_ctime = ctime;
9261 new_dir->i_mtime = ctime;
9262 new_dir->i_ctime = ctime;
9263 old_inode->i_ctime = ctime;
9264 new_inode->i_ctime = ctime;
9265
9266 if (old_dentry->d_parent != new_dentry->d_parent) {
9267 btrfs_record_unlink_dir(trans, BTRFS_I(old_dir),
9268 BTRFS_I(old_inode), 1);
9269 btrfs_record_unlink_dir(trans, BTRFS_I(new_dir),
9270 BTRFS_I(new_inode), 1);
9271 }
9272
9273
9274 if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
9275 ret = btrfs_unlink_subvol(trans, old_dir, old_dentry);
9276 } else {
9277 ret = __btrfs_unlink_inode(trans, BTRFS_I(old_dir),
9278 BTRFS_I(old_dentry->d_inode),
9279 old_dentry->d_name.name,
9280 old_dentry->d_name.len,
9281 &old_rename_ctx);
9282 if (!ret)
9283 ret = btrfs_update_inode(trans, root, BTRFS_I(old_inode));
9284 }
9285 if (ret) {
9286 btrfs_abort_transaction(trans, ret);
9287 goto out_fail;
9288 }
9289
9290
9291 if (new_ino == BTRFS_FIRST_FREE_OBJECTID) {
9292 ret = btrfs_unlink_subvol(trans, new_dir, new_dentry);
9293 } else {
9294 ret = __btrfs_unlink_inode(trans, BTRFS_I(new_dir),
9295 BTRFS_I(new_dentry->d_inode),
9296 new_dentry->d_name.name,
9297 new_dentry->d_name.len,
9298 &new_rename_ctx);
9299 if (!ret)
9300 ret = btrfs_update_inode(trans, dest, BTRFS_I(new_inode));
9301 }
9302 if (ret) {
9303 btrfs_abort_transaction(trans, ret);
9304 goto out_fail;
9305 }
9306
9307 ret = btrfs_add_link(trans, BTRFS_I(new_dir), BTRFS_I(old_inode),
9308 new_dentry->d_name.name,
9309 new_dentry->d_name.len, 0, old_idx);
9310 if (ret) {
9311 btrfs_abort_transaction(trans, ret);
9312 goto out_fail;
9313 }
9314
9315 ret = btrfs_add_link(trans, BTRFS_I(old_dir), BTRFS_I(new_inode),
9316 old_dentry->d_name.name,
9317 old_dentry->d_name.len, 0, new_idx);
9318 if (ret) {
9319 btrfs_abort_transaction(trans, ret);
9320 goto out_fail;
9321 }
9322
9323 if (old_inode->i_nlink == 1)
9324 BTRFS_I(old_inode)->dir_index = old_idx;
9325 if (new_inode->i_nlink == 1)
9326 BTRFS_I(new_inode)->dir_index = new_idx;
9327
9328
9329
9330
9331
9332
9333
9334 if (old_ino != BTRFS_FIRST_FREE_OBJECTID)
9335 btrfs_pin_log_trans(root);
9336 if (new_ino != BTRFS_FIRST_FREE_OBJECTID)
9337 btrfs_pin_log_trans(dest);
9338
9339
9340 if (old_ino != BTRFS_FIRST_FREE_OBJECTID)
9341 btrfs_log_new_name(trans, old_dentry, BTRFS_I(old_dir),
9342 old_rename_ctx.index, new_dentry->d_parent);
9343 if (new_ino != BTRFS_FIRST_FREE_OBJECTID)
9344 btrfs_log_new_name(trans, new_dentry, BTRFS_I(new_dir),
9345 new_rename_ctx.index, old_dentry->d_parent);
9346
9347
9348 if (old_ino != BTRFS_FIRST_FREE_OBJECTID)
9349 btrfs_end_log_trans(root);
9350 if (new_ino != BTRFS_FIRST_FREE_OBJECTID)
9351 btrfs_end_log_trans(dest);
9352 out_fail:
9353 ret2 = btrfs_end_transaction(trans);
9354 ret = ret ? ret : ret2;
9355 out_notrans:
9356 if (new_ino == BTRFS_FIRST_FREE_OBJECTID ||
9357 old_ino == BTRFS_FIRST_FREE_OBJECTID)
9358 up_read(&fs_info->subvol_sem);
9359
9360 return ret;
9361 }
9362
9363 static struct inode *new_whiteout_inode(struct user_namespace *mnt_userns,
9364 struct inode *dir)
9365 {
9366 struct inode *inode;
9367
9368 inode = new_inode(dir->i_sb);
9369 if (inode) {
9370 inode_init_owner(mnt_userns, inode, dir,
9371 S_IFCHR | WHITEOUT_MODE);
9372 inode->i_op = &btrfs_special_inode_operations;
9373 init_special_inode(inode, inode->i_mode, WHITEOUT_DEV);
9374 }
9375 return inode;
9376 }
9377
9378 static int btrfs_rename(struct user_namespace *mnt_userns,
9379 struct inode *old_dir, struct dentry *old_dentry,
9380 struct inode *new_dir, struct dentry *new_dentry,
9381 unsigned int flags)
9382 {
9383 struct btrfs_fs_info *fs_info = btrfs_sb(old_dir->i_sb);
9384 struct btrfs_new_inode_args whiteout_args = {
9385 .dir = old_dir,
9386 .dentry = old_dentry,
9387 };
9388 struct btrfs_trans_handle *trans;
9389 unsigned int trans_num_items;
9390 struct btrfs_root *root = BTRFS_I(old_dir)->root;
9391 struct btrfs_root *dest = BTRFS_I(new_dir)->root;
9392 struct inode *new_inode = d_inode(new_dentry);
9393 struct inode *old_inode = d_inode(old_dentry);
9394 struct btrfs_rename_ctx rename_ctx;
9395 u64 index = 0;
9396 int ret;
9397 int ret2;
9398 u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
9399
9400 if (btrfs_ino(BTRFS_I(new_dir)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
9401 return -EPERM;
9402
9403
9404 if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
9405 return -EXDEV;
9406
9407 if (old_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID ||
9408 (new_inode && btrfs_ino(BTRFS_I(new_inode)) == BTRFS_FIRST_FREE_OBJECTID))
9409 return -ENOTEMPTY;
9410
9411 if (S_ISDIR(old_inode->i_mode) && new_inode &&
9412 new_inode->i_size > BTRFS_EMPTY_DIR_SIZE)
9413 return -ENOTEMPTY;
9414
9415
9416
9417 ret = btrfs_check_dir_item_collision(dest, new_dir->i_ino,
9418 new_dentry->d_name.name,
9419 new_dentry->d_name.len);
9420
9421 if (ret) {
9422 if (ret == -EEXIST) {
9423
9424
9425 if (WARN_ON(!new_inode)) {
9426 return ret;
9427 }
9428 } else {
9429
9430 return ret;
9431 }
9432 }
9433 ret = 0;
9434
9435
9436
9437
9438
9439 if (new_inode && S_ISREG(old_inode->i_mode) && new_inode->i_size)
9440 filemap_flush(old_inode->i_mapping);
9441
9442 if (flags & RENAME_WHITEOUT) {
9443 whiteout_args.inode = new_whiteout_inode(mnt_userns, old_dir);
9444 if (!whiteout_args.inode)
9445 return -ENOMEM;
9446 ret = btrfs_new_inode_prepare(&whiteout_args, &trans_num_items);
9447 if (ret)
9448 goto out_whiteout_inode;
9449 } else {
9450
9451 trans_num_items = 1;
9452 }
9453
9454 if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
9455
9456 down_read(&fs_info->subvol_sem);
9457
9458
9459
9460
9461
9462
9463 trans_num_items += 4;
9464 } else {
9465
9466
9467
9468
9469
9470 trans_num_items += 3;
9471 }
9472
9473
9474
9475
9476
9477
9478 trans_num_items += 4;
9479
9480 if (new_dir != old_dir)
9481 trans_num_items++;
9482 if (new_inode) {
9483
9484
9485
9486
9487
9488
9489
9490 trans_num_items += 5;
9491 }
9492 trans = btrfs_start_transaction(root, trans_num_items);
9493 if (IS_ERR(trans)) {
9494 ret = PTR_ERR(trans);
9495 goto out_notrans;
9496 }
9497
9498 if (dest != root) {
9499 ret = btrfs_record_root_in_trans(trans, dest);
9500 if (ret)
9501 goto out_fail;
9502 }
9503
9504 ret = btrfs_set_inode_index(BTRFS_I(new_dir), &index);
9505 if (ret)
9506 goto out_fail;
9507
9508 BTRFS_I(old_inode)->dir_index = 0ULL;
9509 if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
9510
9511 btrfs_set_log_full_commit(trans);
9512 } else {
9513 ret = btrfs_insert_inode_ref(trans, dest,
9514 new_dentry->d_name.name,
9515 new_dentry->d_name.len,
9516 old_ino,
9517 btrfs_ino(BTRFS_I(new_dir)), index);
9518 if (ret)
9519 goto out_fail;
9520 }
9521
9522 inode_inc_iversion(old_dir);
9523 inode_inc_iversion(new_dir);
9524 inode_inc_iversion(old_inode);
9525 old_dir->i_mtime = current_time(old_dir);
9526 old_dir->i_ctime = old_dir->i_mtime;
9527 new_dir->i_mtime = old_dir->i_mtime;
9528 new_dir->i_ctime = old_dir->i_mtime;
9529 old_inode->i_ctime = old_dir->i_mtime;
9530
9531 if (old_dentry->d_parent != new_dentry->d_parent)
9532 btrfs_record_unlink_dir(trans, BTRFS_I(old_dir),
9533 BTRFS_I(old_inode), 1);
9534
9535 if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
9536 ret = btrfs_unlink_subvol(trans, old_dir, old_dentry);
9537 } else {
9538 ret = __btrfs_unlink_inode(trans, BTRFS_I(old_dir),
9539 BTRFS_I(d_inode(old_dentry)),
9540 old_dentry->d_name.name,
9541 old_dentry->d_name.len,
9542 &rename_ctx);
9543 if (!ret)
9544 ret = btrfs_update_inode(trans, root, BTRFS_I(old_inode));
9545 }
9546 if (ret) {
9547 btrfs_abort_transaction(trans, ret);
9548 goto out_fail;
9549 }
9550
9551 if (new_inode) {
9552 inode_inc_iversion(new_inode);
9553 new_inode->i_ctime = current_time(new_inode);
9554 if (unlikely(btrfs_ino(BTRFS_I(new_inode)) ==
9555 BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
9556 ret = btrfs_unlink_subvol(trans, new_dir, new_dentry);
9557 BUG_ON(new_inode->i_nlink == 0);
9558 } else {
9559 ret = btrfs_unlink_inode(trans, BTRFS_I(new_dir),
9560 BTRFS_I(d_inode(new_dentry)),
9561 new_dentry->d_name.name,
9562 new_dentry->d_name.len);
9563 }
9564 if (!ret && new_inode->i_nlink == 0)
9565 ret = btrfs_orphan_add(trans,
9566 BTRFS_I(d_inode(new_dentry)));
9567 if (ret) {
9568 btrfs_abort_transaction(trans, ret);
9569 goto out_fail;
9570 }
9571 }
9572
9573 ret = btrfs_add_link(trans, BTRFS_I(new_dir), BTRFS_I(old_inode),
9574 new_dentry->d_name.name,
9575 new_dentry->d_name.len, 0, index);
9576 if (ret) {
9577 btrfs_abort_transaction(trans, ret);
9578 goto out_fail;
9579 }
9580
9581 if (old_inode->i_nlink == 1)
9582 BTRFS_I(old_inode)->dir_index = index;
9583
9584 if (old_ino != BTRFS_FIRST_FREE_OBJECTID)
9585 btrfs_log_new_name(trans, old_dentry, BTRFS_I(old_dir),
9586 rename_ctx.index, new_dentry->d_parent);
9587
9588 if (flags & RENAME_WHITEOUT) {
9589 ret = btrfs_create_new_inode(trans, &whiteout_args);
9590 if (ret) {
9591 btrfs_abort_transaction(trans, ret);
9592 goto out_fail;
9593 } else {
9594 unlock_new_inode(whiteout_args.inode);
9595 iput(whiteout_args.inode);
9596 whiteout_args.inode = NULL;
9597 }
9598 }
9599 out_fail:
9600 ret2 = btrfs_end_transaction(trans);
9601 ret = ret ? ret : ret2;
9602 out_notrans:
9603 if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
9604 up_read(&fs_info->subvol_sem);
9605 if (flags & RENAME_WHITEOUT)
9606 btrfs_new_inode_args_destroy(&whiteout_args);
9607 out_whiteout_inode:
9608 if (flags & RENAME_WHITEOUT)
9609 iput(whiteout_args.inode);
9610 return ret;
9611 }
9612
9613 static int btrfs_rename2(struct user_namespace *mnt_userns, struct inode *old_dir,
9614 struct dentry *old_dentry, struct inode *new_dir,
9615 struct dentry *new_dentry, unsigned int flags)
9616 {
9617 int ret;
9618
9619 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
9620 return -EINVAL;
9621
9622 if (flags & RENAME_EXCHANGE)
9623 ret = btrfs_rename_exchange(old_dir, old_dentry, new_dir,
9624 new_dentry);
9625 else
9626 ret = btrfs_rename(mnt_userns, old_dir, old_dentry, new_dir,
9627 new_dentry, flags);
9628
9629 btrfs_btree_balance_dirty(BTRFS_I(new_dir)->root->fs_info);
9630
9631 return ret;
9632 }
9633
9634 struct btrfs_delalloc_work {
9635 struct inode *inode;
9636 struct completion completion;
9637 struct list_head list;
9638 struct btrfs_work work;
9639 };
9640
9641 static void btrfs_run_delalloc_work(struct btrfs_work *work)
9642 {
9643 struct btrfs_delalloc_work *delalloc_work;
9644 struct inode *inode;
9645
9646 delalloc_work = container_of(work, struct btrfs_delalloc_work,
9647 work);
9648 inode = delalloc_work->inode;
9649 filemap_flush(inode->i_mapping);
9650 if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
9651 &BTRFS_I(inode)->runtime_flags))
9652 filemap_flush(inode->i_mapping);
9653
9654 iput(inode);
9655 complete(&delalloc_work->completion);
9656 }
9657
9658 static struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode)
9659 {
9660 struct btrfs_delalloc_work *work;
9661
9662 work = kmalloc(sizeof(*work), GFP_NOFS);
9663 if (!work)
9664 return NULL;
9665
9666 init_completion(&work->completion);
9667 INIT_LIST_HEAD(&work->list);
9668 work->inode = inode;
9669 btrfs_init_work(&work->work, btrfs_run_delalloc_work, NULL, NULL);
9670
9671 return work;
9672 }
9673
9674
9675
9676
9677
9678 static int start_delalloc_inodes(struct btrfs_root *root,
9679 struct writeback_control *wbc, bool snapshot,
9680 bool in_reclaim_context)
9681 {
9682 struct btrfs_inode *binode;
9683 struct inode *inode;
9684 struct btrfs_delalloc_work *work, *next;
9685 struct list_head works;
9686 struct list_head splice;
9687 int ret = 0;
9688 bool full_flush = wbc->nr_to_write == LONG_MAX;
9689
9690 INIT_LIST_HEAD(&works);
9691 INIT_LIST_HEAD(&splice);
9692
9693 mutex_lock(&root->delalloc_mutex);
9694 spin_lock(&root->delalloc_lock);
9695 list_splice_init(&root->delalloc_inodes, &splice);
9696 while (!list_empty(&splice)) {
9697 binode = list_entry(splice.next, struct btrfs_inode,
9698 delalloc_inodes);
9699
9700 list_move_tail(&binode->delalloc_inodes,
9701 &root->delalloc_inodes);
9702
9703 if (in_reclaim_context &&
9704 test_bit(BTRFS_INODE_NO_DELALLOC_FLUSH, &binode->runtime_flags))
9705 continue;
9706
9707 inode = igrab(&binode->vfs_inode);
9708 if (!inode) {
9709 cond_resched_lock(&root->delalloc_lock);
9710 continue;
9711 }
9712 spin_unlock(&root->delalloc_lock);
9713
9714 if (snapshot)
9715 set_bit(BTRFS_INODE_SNAPSHOT_FLUSH,
9716 &binode->runtime_flags);
9717 if (full_flush) {
9718 work = btrfs_alloc_delalloc_work(inode);
9719 if (!work) {
9720 iput(inode);
9721 ret = -ENOMEM;
9722 goto out;
9723 }
9724 list_add_tail(&work->list, &works);
9725 btrfs_queue_work(root->fs_info->flush_workers,
9726 &work->work);
9727 } else {
9728 ret = filemap_fdatawrite_wbc(inode->i_mapping, wbc);
9729 btrfs_add_delayed_iput(inode);
9730 if (ret || wbc->nr_to_write <= 0)
9731 goto out;
9732 }
9733 cond_resched();
9734 spin_lock(&root->delalloc_lock);
9735 }
9736 spin_unlock(&root->delalloc_lock);
9737
9738 out:
9739 list_for_each_entry_safe(work, next, &works, list) {
9740 list_del_init(&work->list);
9741 wait_for_completion(&work->completion);
9742 kfree(work);
9743 }
9744
9745 if (!list_empty(&splice)) {
9746 spin_lock(&root->delalloc_lock);
9747 list_splice_tail(&splice, &root->delalloc_inodes);
9748 spin_unlock(&root->delalloc_lock);
9749 }
9750 mutex_unlock(&root->delalloc_mutex);
9751 return ret;
9752 }
9753
9754 int btrfs_start_delalloc_snapshot(struct btrfs_root *root, bool in_reclaim_context)
9755 {
9756 struct writeback_control wbc = {
9757 .nr_to_write = LONG_MAX,
9758 .sync_mode = WB_SYNC_NONE,
9759 .range_start = 0,
9760 .range_end = LLONG_MAX,
9761 };
9762 struct btrfs_fs_info *fs_info = root->fs_info;
9763
9764 if (BTRFS_FS_ERROR(fs_info))
9765 return -EROFS;
9766
9767 return start_delalloc_inodes(root, &wbc, true, in_reclaim_context);
9768 }
9769
9770 int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr,
9771 bool in_reclaim_context)
9772 {
9773 struct writeback_control wbc = {
9774 .nr_to_write = nr,
9775 .sync_mode = WB_SYNC_NONE,
9776 .range_start = 0,
9777 .range_end = LLONG_MAX,
9778 };
9779 struct btrfs_root *root;
9780 struct list_head splice;
9781 int ret;
9782
9783 if (BTRFS_FS_ERROR(fs_info))
9784 return -EROFS;
9785
9786 INIT_LIST_HEAD(&splice);
9787
9788 mutex_lock(&fs_info->delalloc_root_mutex);
9789 spin_lock(&fs_info->delalloc_root_lock);
9790 list_splice_init(&fs_info->delalloc_roots, &splice);
9791 while (!list_empty(&splice)) {
9792
9793
9794
9795
9796 if (nr == LONG_MAX)
9797 wbc.nr_to_write = LONG_MAX;
9798
9799 root = list_first_entry(&splice, struct btrfs_root,
9800 delalloc_root);
9801 root = btrfs_grab_root(root);
9802 BUG_ON(!root);
9803 list_move_tail(&root->delalloc_root,
9804 &fs_info->delalloc_roots);
9805 spin_unlock(&fs_info->delalloc_root_lock);
9806
9807 ret = start_delalloc_inodes(root, &wbc, false, in_reclaim_context);
9808 btrfs_put_root(root);
9809 if (ret < 0 || wbc.nr_to_write <= 0)
9810 goto out;
9811 spin_lock(&fs_info->delalloc_root_lock);
9812 }
9813 spin_unlock(&fs_info->delalloc_root_lock);
9814
9815 ret = 0;
9816 out:
9817 if (!list_empty(&splice)) {
9818 spin_lock(&fs_info->delalloc_root_lock);
9819 list_splice_tail(&splice, &fs_info->delalloc_roots);
9820 spin_unlock(&fs_info->delalloc_root_lock);
9821 }
9822 mutex_unlock(&fs_info->delalloc_root_mutex);
9823 return ret;
9824 }
9825
9826 static int btrfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
9827 struct dentry *dentry, const char *symname)
9828 {
9829 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
9830 struct btrfs_trans_handle *trans;
9831 struct btrfs_root *root = BTRFS_I(dir)->root;
9832 struct btrfs_path *path;
9833 struct btrfs_key key;
9834 struct inode *inode;
9835 struct btrfs_new_inode_args new_inode_args = {
9836 .dir = dir,
9837 .dentry = dentry,
9838 };
9839 unsigned int trans_num_items;
9840 int err;
9841 int name_len;
9842 int datasize;
9843 unsigned long ptr;
9844 struct btrfs_file_extent_item *ei;
9845 struct extent_buffer *leaf;
9846
9847 name_len = strlen(symname);
9848 if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(fs_info))
9849 return -ENAMETOOLONG;
9850
9851 inode = new_inode(dir->i_sb);
9852 if (!inode)
9853 return -ENOMEM;
9854 inode_init_owner(mnt_userns, inode, dir, S_IFLNK | S_IRWXUGO);
9855 inode->i_op = &btrfs_symlink_inode_operations;
9856 inode_nohighmem(inode);
9857 inode->i_mapping->a_ops = &btrfs_aops;
9858 btrfs_i_size_write(BTRFS_I(inode), name_len);
9859 inode_set_bytes(inode, name_len);
9860
9861 new_inode_args.inode = inode;
9862 err = btrfs_new_inode_prepare(&new_inode_args, &trans_num_items);
9863 if (err)
9864 goto out_inode;
9865
9866 trans_num_items++;
9867
9868 trans = btrfs_start_transaction(root, trans_num_items);
9869 if (IS_ERR(trans)) {
9870 err = PTR_ERR(trans);
9871 goto out_new_inode_args;
9872 }
9873
9874 err = btrfs_create_new_inode(trans, &new_inode_args);
9875 if (err)
9876 goto out;
9877
9878 path = btrfs_alloc_path();
9879 if (!path) {
9880 err = -ENOMEM;
9881 btrfs_abort_transaction(trans, err);
9882 discard_new_inode(inode);
9883 inode = NULL;
9884 goto out;
9885 }
9886 key.objectid = btrfs_ino(BTRFS_I(inode));
9887 key.offset = 0;
9888 key.type = BTRFS_EXTENT_DATA_KEY;
9889 datasize = btrfs_file_extent_calc_inline_size(name_len);
9890 err = btrfs_insert_empty_item(trans, root, path, &key,
9891 datasize);
9892 if (err) {
9893 btrfs_abort_transaction(trans, err);
9894 btrfs_free_path(path);
9895 discard_new_inode(inode);
9896 inode = NULL;
9897 goto out;
9898 }
9899 leaf = path->nodes[0];
9900 ei = btrfs_item_ptr(leaf, path->slots[0],
9901 struct btrfs_file_extent_item);
9902 btrfs_set_file_extent_generation(leaf, ei, trans->transid);
9903 btrfs_set_file_extent_type(leaf, ei,
9904 BTRFS_FILE_EXTENT_INLINE);
9905 btrfs_set_file_extent_encryption(leaf, ei, 0);
9906 btrfs_set_file_extent_compression(leaf, ei, 0);
9907 btrfs_set_file_extent_other_encoding(leaf, ei, 0);
9908 btrfs_set_file_extent_ram_bytes(leaf, ei, name_len);
9909
9910 ptr = btrfs_file_extent_inline_start(ei);
9911 write_extent_buffer(leaf, symname, ptr, name_len);
9912 btrfs_mark_buffer_dirty(leaf);
9913 btrfs_free_path(path);
9914
9915 d_instantiate_new(dentry, inode);
9916 err = 0;
9917 out:
9918 btrfs_end_transaction(trans);
9919 btrfs_btree_balance_dirty(fs_info);
9920 out_new_inode_args:
9921 btrfs_new_inode_args_destroy(&new_inode_args);
9922 out_inode:
9923 if (err)
9924 iput(inode);
9925 return err;
9926 }
9927
9928 static struct btrfs_trans_handle *insert_prealloc_file_extent(
9929 struct btrfs_trans_handle *trans_in,
9930 struct btrfs_inode *inode,
9931 struct btrfs_key *ins,
9932 u64 file_offset)
9933 {
9934 struct btrfs_file_extent_item stack_fi;
9935 struct btrfs_replace_extent_info extent_info;
9936 struct btrfs_trans_handle *trans = trans_in;
9937 struct btrfs_path *path;
9938 u64 start = ins->objectid;
9939 u64 len = ins->offset;
9940 int qgroup_released;
9941 int ret;
9942
9943 memset(&stack_fi, 0, sizeof(stack_fi));
9944
9945 btrfs_set_stack_file_extent_type(&stack_fi, BTRFS_FILE_EXTENT_PREALLOC);
9946 btrfs_set_stack_file_extent_disk_bytenr(&stack_fi, start);
9947 btrfs_set_stack_file_extent_disk_num_bytes(&stack_fi, len);
9948 btrfs_set_stack_file_extent_num_bytes(&stack_fi, len);
9949 btrfs_set_stack_file_extent_ram_bytes(&stack_fi, len);
9950 btrfs_set_stack_file_extent_compression(&stack_fi, BTRFS_COMPRESS_NONE);
9951
9952
9953 qgroup_released = btrfs_qgroup_release_data(inode, file_offset, len);
9954 if (qgroup_released < 0)
9955 return ERR_PTR(qgroup_released);
9956
9957 if (trans) {
9958 ret = insert_reserved_file_extent(trans, inode,
9959 file_offset, &stack_fi,
9960 true, qgroup_released);
9961 if (ret)
9962 goto free_qgroup;
9963 return trans;
9964 }
9965
9966 extent_info.disk_offset = start;
9967 extent_info.disk_len = len;
9968 extent_info.data_offset = 0;
9969 extent_info.data_len = len;
9970 extent_info.file_offset = file_offset;
9971 extent_info.extent_buf = (char *)&stack_fi;
9972 extent_info.is_new_extent = true;
9973 extent_info.update_times = true;
9974 extent_info.qgroup_reserved = qgroup_released;
9975 extent_info.insertions = 0;
9976
9977 path = btrfs_alloc_path();
9978 if (!path) {
9979 ret = -ENOMEM;
9980 goto free_qgroup;
9981 }
9982
9983 ret = btrfs_replace_file_extents(inode, path, file_offset,
9984 file_offset + len - 1, &extent_info,
9985 &trans);
9986 btrfs_free_path(path);
9987 if (ret)
9988 goto free_qgroup;
9989 return trans;
9990
9991 free_qgroup:
9992
9993
9994
9995
9996
9997
9998
9999 btrfs_qgroup_free_refroot(inode->root->fs_info,
10000 inode->root->root_key.objectid, qgroup_released,
10001 BTRFS_QGROUP_RSV_DATA);
10002 return ERR_PTR(ret);
10003 }
10004
10005 static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
10006 u64 start, u64 num_bytes, u64 min_size,
10007 loff_t actual_len, u64 *alloc_hint,
10008 struct btrfs_trans_handle *trans)
10009 {
10010 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
10011 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
10012 struct extent_map *em;
10013 struct btrfs_root *root = BTRFS_I(inode)->root;
10014 struct btrfs_key ins;
10015 u64 cur_offset = start;
10016 u64 clear_offset = start;
10017 u64 i_size;
10018 u64 cur_bytes;
10019 u64 last_alloc = (u64)-1;
10020 int ret = 0;
10021 bool own_trans = true;
10022 u64 end = start + num_bytes - 1;
10023
10024 if (trans)
10025 own_trans = false;
10026 while (num_bytes > 0) {
10027 cur_bytes = min_t(u64, num_bytes, SZ_256M);
10028 cur_bytes = max(cur_bytes, min_size);
10029
10030
10031
10032
10033
10034
10035 cur_bytes = min(cur_bytes, last_alloc);
10036 ret = btrfs_reserve_extent(root, cur_bytes, cur_bytes,
10037 min_size, 0, *alloc_hint, &ins, 1, 0);
10038 if (ret)
10039 break;
10040
10041
10042
10043
10044
10045
10046
10047
10048 clear_offset += ins.offset;
10049
10050 last_alloc = ins.offset;
10051 trans = insert_prealloc_file_extent(trans, BTRFS_I(inode),
10052 &ins, cur_offset);
10053
10054
10055
10056
10057
10058
10059 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
10060 if (IS_ERR(trans)) {
10061 ret = PTR_ERR(trans);
10062 btrfs_free_reserved_extent(fs_info, ins.objectid,
10063 ins.offset, 0);
10064 break;
10065 }
10066
10067 btrfs_drop_extent_cache(BTRFS_I(inode), cur_offset,
10068 cur_offset + ins.offset -1, 0);
10069
10070 em = alloc_extent_map();
10071 if (!em) {
10072 btrfs_set_inode_full_sync(BTRFS_I(inode));
10073 goto next;
10074 }
10075
10076 em->start = cur_offset;
10077 em->orig_start = cur_offset;
10078 em->len = ins.offset;
10079 em->block_start = ins.objectid;
10080 em->block_len = ins.offset;
10081 em->orig_block_len = ins.offset;
10082 em->ram_bytes = ins.offset;
10083 set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
10084 em->generation = trans->transid;
10085
10086 while (1) {
10087 write_lock(&em_tree->lock);
10088 ret = add_extent_mapping(em_tree, em, 1);
10089 write_unlock(&em_tree->lock);
10090 if (ret != -EEXIST)
10091 break;
10092 btrfs_drop_extent_cache(BTRFS_I(inode), cur_offset,
10093 cur_offset + ins.offset - 1,
10094 0);
10095 }
10096 free_extent_map(em);
10097 next:
10098 num_bytes -= ins.offset;
10099 cur_offset += ins.offset;
10100 *alloc_hint = ins.objectid + ins.offset;
10101
10102 inode_inc_iversion(inode);
10103 inode->i_ctime = current_time(inode);
10104 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
10105 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
10106 (actual_len > inode->i_size) &&
10107 (cur_offset > inode->i_size)) {
10108 if (cur_offset > actual_len)
10109 i_size = actual_len;
10110 else
10111 i_size = cur_offset;
10112 i_size_write(inode, i_size);
10113 btrfs_inode_safe_disk_i_size_write(BTRFS_I(inode), 0);
10114 }
10115
10116 ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
10117
10118 if (ret) {
10119 btrfs_abort_transaction(trans, ret);
10120 if (own_trans)
10121 btrfs_end_transaction(trans);
10122 break;
10123 }
10124
10125 if (own_trans) {
10126 btrfs_end_transaction(trans);
10127 trans = NULL;
10128 }
10129 }
10130 if (clear_offset < end)
10131 btrfs_free_reserved_data_space(BTRFS_I(inode), NULL, clear_offset,
10132 end - clear_offset + 1);
10133 return ret;
10134 }
10135
10136 int btrfs_prealloc_file_range(struct inode *inode, int mode,
10137 u64 start, u64 num_bytes, u64 min_size,
10138 loff_t actual_len, u64 *alloc_hint)
10139 {
10140 return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
10141 min_size, actual_len, alloc_hint,
10142 NULL);
10143 }
10144
10145 int btrfs_prealloc_file_range_trans(struct inode *inode,
10146 struct btrfs_trans_handle *trans, int mode,
10147 u64 start, u64 num_bytes, u64 min_size,
10148 loff_t actual_len, u64 *alloc_hint)
10149 {
10150 return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
10151 min_size, actual_len, alloc_hint, trans);
10152 }
10153
10154 static int btrfs_permission(struct user_namespace *mnt_userns,
10155 struct inode *inode, int mask)
10156 {
10157 struct btrfs_root *root = BTRFS_I(inode)->root;
10158 umode_t mode = inode->i_mode;
10159
10160 if (mask & MAY_WRITE &&
10161 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) {
10162 if (btrfs_root_readonly(root))
10163 return -EROFS;
10164 if (BTRFS_I(inode)->flags & BTRFS_INODE_READONLY)
10165 return -EACCES;
10166 }
10167 return generic_permission(mnt_userns, inode, mask);
10168 }
10169
10170 static int btrfs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
10171 struct dentry *dentry, umode_t mode)
10172 {
10173 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
10174 struct btrfs_trans_handle *trans;
10175 struct btrfs_root *root = BTRFS_I(dir)->root;
10176 struct inode *inode;
10177 struct btrfs_new_inode_args new_inode_args = {
10178 .dir = dir,
10179 .dentry = dentry,
10180 .orphan = true,
10181 };
10182 unsigned int trans_num_items;
10183 int ret;
10184
10185 inode = new_inode(dir->i_sb);
10186 if (!inode)
10187 return -ENOMEM;
10188 inode_init_owner(mnt_userns, inode, dir, mode);
10189 inode->i_fop = &btrfs_file_operations;
10190 inode->i_op = &btrfs_file_inode_operations;
10191 inode->i_mapping->a_ops = &btrfs_aops;
10192
10193 new_inode_args.inode = inode;
10194 ret = btrfs_new_inode_prepare(&new_inode_args, &trans_num_items);
10195 if (ret)
10196 goto out_inode;
10197
10198 trans = btrfs_start_transaction(root, trans_num_items);
10199 if (IS_ERR(trans)) {
10200 ret = PTR_ERR(trans);
10201 goto out_new_inode_args;
10202 }
10203
10204 ret = btrfs_create_new_inode(trans, &new_inode_args);
10205
10206
10207
10208
10209
10210
10211
10212
10213 set_nlink(inode, 1);
10214
10215 if (!ret) {
10216 d_tmpfile(dentry, inode);
10217 unlock_new_inode(inode);
10218 mark_inode_dirty(inode);
10219 }
10220
10221 btrfs_end_transaction(trans);
10222 btrfs_btree_balance_dirty(fs_info);
10223 out_new_inode_args:
10224 btrfs_new_inode_args_destroy(&new_inode_args);
10225 out_inode:
10226 if (ret)
10227 iput(inode);
10228 return ret;
10229 }
10230
10231 void btrfs_set_range_writeback(struct btrfs_inode *inode, u64 start, u64 end)
10232 {
10233 struct btrfs_fs_info *fs_info = inode->root->fs_info;
10234 unsigned long index = start >> PAGE_SHIFT;
10235 unsigned long end_index = end >> PAGE_SHIFT;
10236 struct page *page;
10237 u32 len;
10238
10239 ASSERT(end + 1 - start <= U32_MAX);
10240 len = end + 1 - start;
10241 while (index <= end_index) {
10242 page = find_get_page(inode->vfs_inode.i_mapping, index);
10243 ASSERT(page);
10244
10245 btrfs_page_set_writeback(fs_info, page, start, len);
10246 put_page(page);
10247 index++;
10248 }
10249 }
10250
10251 int btrfs_encoded_io_compression_from_extent(struct btrfs_fs_info *fs_info,
10252 int compress_type)
10253 {
10254 switch (compress_type) {
10255 case BTRFS_COMPRESS_NONE:
10256 return BTRFS_ENCODED_IO_COMPRESSION_NONE;
10257 case BTRFS_COMPRESS_ZLIB:
10258 return BTRFS_ENCODED_IO_COMPRESSION_ZLIB;
10259 case BTRFS_COMPRESS_LZO:
10260
10261
10262
10263
10264 if (fs_info->sectorsize < SZ_4K || fs_info->sectorsize > SZ_64K)
10265 return -EINVAL;
10266 return BTRFS_ENCODED_IO_COMPRESSION_LZO_4K +
10267 (fs_info->sectorsize_bits - 12);
10268 case BTRFS_COMPRESS_ZSTD:
10269 return BTRFS_ENCODED_IO_COMPRESSION_ZSTD;
10270 default:
10271 return -EUCLEAN;
10272 }
10273 }
10274
10275 static ssize_t btrfs_encoded_read_inline(
10276 struct kiocb *iocb,
10277 struct iov_iter *iter, u64 start,
10278 u64 lockend,
10279 struct extent_state **cached_state,
10280 u64 extent_start, size_t count,
10281 struct btrfs_ioctl_encoded_io_args *encoded,
10282 bool *unlocked)
10283 {
10284 struct btrfs_inode *inode = BTRFS_I(file_inode(iocb->ki_filp));
10285 struct btrfs_root *root = inode->root;
10286 struct btrfs_fs_info *fs_info = root->fs_info;
10287 struct extent_io_tree *io_tree = &inode->io_tree;
10288 struct btrfs_path *path;
10289 struct extent_buffer *leaf;
10290 struct btrfs_file_extent_item *item;
10291 u64 ram_bytes;
10292 unsigned long ptr;
10293 void *tmp;
10294 ssize_t ret;
10295
10296 path = btrfs_alloc_path();
10297 if (!path) {
10298 ret = -ENOMEM;
10299 goto out;
10300 }
10301 ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(inode),
10302 extent_start, 0);
10303 if (ret) {
10304 if (ret > 0) {
10305
10306 ret = -EIO;
10307 }
10308 goto out;
10309 }
10310 leaf = path->nodes[0];
10311 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item);
10312
10313 ram_bytes = btrfs_file_extent_ram_bytes(leaf, item);
10314 ptr = btrfs_file_extent_inline_start(item);
10315
10316 encoded->len = min_t(u64, extent_start + ram_bytes,
10317 inode->vfs_inode.i_size) - iocb->ki_pos;
10318 ret = btrfs_encoded_io_compression_from_extent(fs_info,
10319 btrfs_file_extent_compression(leaf, item));
10320 if (ret < 0)
10321 goto out;
10322 encoded->compression = ret;
10323 if (encoded->compression) {
10324 size_t inline_size;
10325
10326 inline_size = btrfs_file_extent_inline_item_len(leaf,
10327 path->slots[0]);
10328 if (inline_size > count) {
10329 ret = -ENOBUFS;
10330 goto out;
10331 }
10332 count = inline_size;
10333 encoded->unencoded_len = ram_bytes;
10334 encoded->unencoded_offset = iocb->ki_pos - extent_start;
10335 } else {
10336 count = min_t(u64, count, encoded->len);
10337 encoded->len = count;
10338 encoded->unencoded_len = count;
10339 ptr += iocb->ki_pos - extent_start;
10340 }
10341
10342 tmp = kmalloc(count, GFP_NOFS);
10343 if (!tmp) {
10344 ret = -ENOMEM;
10345 goto out;
10346 }
10347 read_extent_buffer(leaf, tmp, ptr, count);
10348 btrfs_release_path(path);
10349 unlock_extent_cached(io_tree, start, lockend, cached_state);
10350 btrfs_inode_unlock(&inode->vfs_inode, BTRFS_ILOCK_SHARED);
10351 *unlocked = true;
10352
10353 ret = copy_to_iter(tmp, count, iter);
10354 if (ret != count)
10355 ret = -EFAULT;
10356 kfree(tmp);
10357 out:
10358 btrfs_free_path(path);
10359 return ret;
10360 }
10361
10362 struct btrfs_encoded_read_private {
10363 struct btrfs_inode *inode;
10364 u64 file_offset;
10365 wait_queue_head_t wait;
10366 atomic_t pending;
10367 blk_status_t status;
10368 bool skip_csum;
10369 };
10370
10371 static blk_status_t submit_encoded_read_bio(struct btrfs_inode *inode,
10372 struct bio *bio, int mirror_num)
10373 {
10374 struct btrfs_encoded_read_private *priv = bio->bi_private;
10375 struct btrfs_fs_info *fs_info = inode->root->fs_info;
10376 blk_status_t ret;
10377
10378 if (!priv->skip_csum) {
10379 ret = btrfs_lookup_bio_sums(&inode->vfs_inode, bio, NULL);
10380 if (ret)
10381 return ret;
10382 }
10383
10384 atomic_inc(&priv->pending);
10385 btrfs_submit_bio(fs_info, bio, mirror_num);
10386 return BLK_STS_OK;
10387 }
10388
10389 static blk_status_t btrfs_encoded_read_verify_csum(struct btrfs_bio *bbio)
10390 {
10391 const bool uptodate = (bbio->bio.bi_status == BLK_STS_OK);
10392 struct btrfs_encoded_read_private *priv = bbio->bio.bi_private;
10393 struct btrfs_inode *inode = priv->inode;
10394 struct btrfs_fs_info *fs_info = inode->root->fs_info;
10395 u32 sectorsize = fs_info->sectorsize;
10396 struct bio_vec *bvec;
10397 struct bvec_iter_all iter_all;
10398 u32 bio_offset = 0;
10399
10400 if (priv->skip_csum || !uptodate)
10401 return bbio->bio.bi_status;
10402
10403 bio_for_each_segment_all(bvec, &bbio->bio, iter_all) {
10404 unsigned int i, nr_sectors, pgoff;
10405
10406 nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec->bv_len);
10407 pgoff = bvec->bv_offset;
10408 for (i = 0; i < nr_sectors; i++) {
10409 ASSERT(pgoff < PAGE_SIZE);
10410 if (btrfs_check_data_csum(&inode->vfs_inode, bbio, bio_offset,
10411 bvec->bv_page, pgoff))
10412 return BLK_STS_IOERR;
10413 bio_offset += sectorsize;
10414 pgoff += sectorsize;
10415 }
10416 }
10417 return BLK_STS_OK;
10418 }
10419
10420 static void btrfs_encoded_read_endio(struct bio *bio)
10421 {
10422 struct btrfs_encoded_read_private *priv = bio->bi_private;
10423 struct btrfs_bio *bbio = btrfs_bio(bio);
10424 blk_status_t status;
10425
10426 status = btrfs_encoded_read_verify_csum(bbio);
10427 if (status) {
10428
10429
10430
10431
10432
10433
10434
10435
10436 WRITE_ONCE(priv->status, status);
10437 }
10438 if (!atomic_dec_return(&priv->pending))
10439 wake_up(&priv->wait);
10440 btrfs_bio_free_csum(bbio);
10441 bio_put(bio);
10442 }
10443
10444 int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
10445 u64 file_offset, u64 disk_bytenr,
10446 u64 disk_io_size, struct page **pages)
10447 {
10448 struct btrfs_fs_info *fs_info = inode->root->fs_info;
10449 struct btrfs_encoded_read_private priv = {
10450 .inode = inode,
10451 .file_offset = file_offset,
10452 .pending = ATOMIC_INIT(1),
10453 .skip_csum = (inode->flags & BTRFS_INODE_NODATASUM),
10454 };
10455 unsigned long i = 0;
10456 u64 cur = 0;
10457 int ret;
10458
10459 init_waitqueue_head(&priv.wait);
10460
10461
10462
10463
10464 while (cur < disk_io_size) {
10465 struct extent_map *em;
10466 struct btrfs_io_geometry geom;
10467 struct bio *bio = NULL;
10468 u64 remaining;
10469
10470 em = btrfs_get_chunk_map(fs_info, disk_bytenr + cur,
10471 disk_io_size - cur);
10472 if (IS_ERR(em)) {
10473 ret = PTR_ERR(em);
10474 } else {
10475 ret = btrfs_get_io_geometry(fs_info, em, BTRFS_MAP_READ,
10476 disk_bytenr + cur, &geom);
10477 free_extent_map(em);
10478 }
10479 if (ret) {
10480 WRITE_ONCE(priv.status, errno_to_blk_status(ret));
10481 break;
10482 }
10483 remaining = min(geom.len, disk_io_size - cur);
10484 while (bio || remaining) {
10485 size_t bytes = min_t(u64, remaining, PAGE_SIZE);
10486
10487 if (!bio) {
10488 bio = btrfs_bio_alloc(BIO_MAX_VECS);
10489 bio->bi_iter.bi_sector =
10490 (disk_bytenr + cur) >> SECTOR_SHIFT;
10491 bio->bi_end_io = btrfs_encoded_read_endio;
10492 bio->bi_private = &priv;
10493 bio->bi_opf = REQ_OP_READ;
10494 }
10495
10496 if (!bytes ||
10497 bio_add_page(bio, pages[i], bytes, 0) < bytes) {
10498 blk_status_t status;
10499
10500 status = submit_encoded_read_bio(inode, bio, 0);
10501 if (status) {
10502 WRITE_ONCE(priv.status, status);
10503 bio_put(bio);
10504 goto out;
10505 }
10506 bio = NULL;
10507 continue;
10508 }
10509
10510 i++;
10511 cur += bytes;
10512 remaining -= bytes;
10513 }
10514 }
10515
10516 out:
10517 if (atomic_dec_return(&priv.pending))
10518 io_wait_event(priv.wait, !atomic_read(&priv.pending));
10519
10520 return blk_status_to_errno(READ_ONCE(priv.status));
10521 }
10522
10523 static ssize_t btrfs_encoded_read_regular(struct kiocb *iocb,
10524 struct iov_iter *iter,
10525 u64 start, u64 lockend,
10526 struct extent_state **cached_state,
10527 u64 disk_bytenr, u64 disk_io_size,
10528 size_t count, bool compressed,
10529 bool *unlocked)
10530 {
10531 struct btrfs_inode *inode = BTRFS_I(file_inode(iocb->ki_filp));
10532 struct extent_io_tree *io_tree = &inode->io_tree;
10533 struct page **pages;
10534 unsigned long nr_pages, i;
10535 u64 cur;
10536 size_t page_offset;
10537 ssize_t ret;
10538
10539 nr_pages = DIV_ROUND_UP(disk_io_size, PAGE_SIZE);
10540 pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
10541 if (!pages)
10542 return -ENOMEM;
10543 ret = btrfs_alloc_page_array(nr_pages, pages);
10544 if (ret) {
10545 ret = -ENOMEM;
10546 goto out;
10547 }
10548
10549 ret = btrfs_encoded_read_regular_fill_pages(inode, start, disk_bytenr,
10550 disk_io_size, pages);
10551 if (ret)
10552 goto out;
10553
10554 unlock_extent_cached(io_tree, start, lockend, cached_state);
10555 btrfs_inode_unlock(&inode->vfs_inode, BTRFS_ILOCK_SHARED);
10556 *unlocked = true;
10557
10558 if (compressed) {
10559 i = 0;
10560 page_offset = 0;
10561 } else {
10562 i = (iocb->ki_pos - start) >> PAGE_SHIFT;
10563 page_offset = (iocb->ki_pos - start) & (PAGE_SIZE - 1);
10564 }
10565 cur = 0;
10566 while (cur < count) {
10567 size_t bytes = min_t(size_t, count - cur,
10568 PAGE_SIZE - page_offset);
10569
10570 if (copy_page_to_iter(pages[i], page_offset, bytes,
10571 iter) != bytes) {
10572 ret = -EFAULT;
10573 goto out;
10574 }
10575 i++;
10576 cur += bytes;
10577 page_offset = 0;
10578 }
10579 ret = count;
10580 out:
10581 for (i = 0; i < nr_pages; i++) {
10582 if (pages[i])
10583 __free_page(pages[i]);
10584 }
10585 kfree(pages);
10586 return ret;
10587 }
10588
10589 ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
10590 struct btrfs_ioctl_encoded_io_args *encoded)
10591 {
10592 struct btrfs_inode *inode = BTRFS_I(file_inode(iocb->ki_filp));
10593 struct btrfs_fs_info *fs_info = inode->root->fs_info;
10594 struct extent_io_tree *io_tree = &inode->io_tree;
10595 ssize_t ret;
10596 size_t count = iov_iter_count(iter);
10597 u64 start, lockend, disk_bytenr, disk_io_size;
10598 struct extent_state *cached_state = NULL;
10599 struct extent_map *em;
10600 bool unlocked = false;
10601
10602 file_accessed(iocb->ki_filp);
10603
10604 btrfs_inode_lock(&inode->vfs_inode, BTRFS_ILOCK_SHARED);
10605
10606 if (iocb->ki_pos >= inode->vfs_inode.i_size) {
10607 btrfs_inode_unlock(&inode->vfs_inode, BTRFS_ILOCK_SHARED);
10608 return 0;
10609 }
10610 start = ALIGN_DOWN(iocb->ki_pos, fs_info->sectorsize);
10611
10612
10613
10614
10615 lockend = start + BTRFS_MAX_UNCOMPRESSED - 1;
10616
10617 for (;;) {
10618 struct btrfs_ordered_extent *ordered;
10619
10620 ret = btrfs_wait_ordered_range(&inode->vfs_inode, start,
10621 lockend - start + 1);
10622 if (ret)
10623 goto out_unlock_inode;
10624 lock_extent_bits(io_tree, start, lockend, &cached_state);
10625 ordered = btrfs_lookup_ordered_range(inode, start,
10626 lockend - start + 1);
10627 if (!ordered)
10628 break;
10629 btrfs_put_ordered_extent(ordered);
10630 unlock_extent_cached(io_tree, start, lockend, &cached_state);
10631 cond_resched();
10632 }
10633
10634 em = btrfs_get_extent(inode, NULL, 0, start, lockend - start + 1);
10635 if (IS_ERR(em)) {
10636 ret = PTR_ERR(em);
10637 goto out_unlock_extent;
10638 }
10639
10640 if (em->block_start == EXTENT_MAP_INLINE) {
10641 u64 extent_start = em->start;
10642
10643
10644
10645
10646
10647 free_extent_map(em);
10648 em = NULL;
10649 ret = btrfs_encoded_read_inline(iocb, iter, start, lockend,
10650 &cached_state, extent_start,
10651 count, encoded, &unlocked);
10652 goto out;
10653 }
10654
10655
10656
10657
10658
10659 encoded->len = min_t(u64, extent_map_end(em),
10660 inode->vfs_inode.i_size) - iocb->ki_pos;
10661 if (em->block_start == EXTENT_MAP_HOLE ||
10662 test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
10663 disk_bytenr = EXTENT_MAP_HOLE;
10664 count = min_t(u64, count, encoded->len);
10665 encoded->len = count;
10666 encoded->unencoded_len = count;
10667 } else if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
10668 disk_bytenr = em->block_start;
10669
10670
10671
10672
10673 if (em->block_len > count) {
10674 ret = -ENOBUFS;
10675 goto out_em;
10676 }
10677 disk_io_size = em->block_len;
10678 count = em->block_len;
10679 encoded->unencoded_len = em->ram_bytes;
10680 encoded->unencoded_offset = iocb->ki_pos - em->orig_start;
10681 ret = btrfs_encoded_io_compression_from_extent(fs_info,
10682 em->compress_type);
10683 if (ret < 0)
10684 goto out_em;
10685 encoded->compression = ret;
10686 } else {
10687 disk_bytenr = em->block_start + (start - em->start);
10688 if (encoded->len > count)
10689 encoded->len = count;
10690
10691
10692
10693
10694 disk_io_size = min(lockend + 1, iocb->ki_pos + encoded->len) - start;
10695 count = start + disk_io_size - iocb->ki_pos;
10696 encoded->len = count;
10697 encoded->unencoded_len = count;
10698 disk_io_size = ALIGN(disk_io_size, fs_info->sectorsize);
10699 }
10700 free_extent_map(em);
10701 em = NULL;
10702
10703 if (disk_bytenr == EXTENT_MAP_HOLE) {
10704 unlock_extent_cached(io_tree, start, lockend, &cached_state);
10705 btrfs_inode_unlock(&inode->vfs_inode, BTRFS_ILOCK_SHARED);
10706 unlocked = true;
10707 ret = iov_iter_zero(count, iter);
10708 if (ret != count)
10709 ret = -EFAULT;
10710 } else {
10711 ret = btrfs_encoded_read_regular(iocb, iter, start, lockend,
10712 &cached_state, disk_bytenr,
10713 disk_io_size, count,
10714 encoded->compression,
10715 &unlocked);
10716 }
10717
10718 out:
10719 if (ret >= 0)
10720 iocb->ki_pos += encoded->len;
10721 out_em:
10722 free_extent_map(em);
10723 out_unlock_extent:
10724 if (!unlocked)
10725 unlock_extent_cached(io_tree, start, lockend, &cached_state);
10726 out_unlock_inode:
10727 if (!unlocked)
10728 btrfs_inode_unlock(&inode->vfs_inode, BTRFS_ILOCK_SHARED);
10729 return ret;
10730 }
10731
10732 ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
10733 const struct btrfs_ioctl_encoded_io_args *encoded)
10734 {
10735 struct btrfs_inode *inode = BTRFS_I(file_inode(iocb->ki_filp));
10736 struct btrfs_root *root = inode->root;
10737 struct btrfs_fs_info *fs_info = root->fs_info;
10738 struct extent_io_tree *io_tree = &inode->io_tree;
10739 struct extent_changeset *data_reserved = NULL;
10740 struct extent_state *cached_state = NULL;
10741 int compression;
10742 size_t orig_count;
10743 u64 start, end;
10744 u64 num_bytes, ram_bytes, disk_num_bytes;
10745 unsigned long nr_pages, i;
10746 struct page **pages;
10747 struct btrfs_key ins;
10748 bool extent_reserved = false;
10749 struct extent_map *em;
10750 ssize_t ret;
10751
10752 switch (encoded->compression) {
10753 case BTRFS_ENCODED_IO_COMPRESSION_ZLIB:
10754 compression = BTRFS_COMPRESS_ZLIB;
10755 break;
10756 case BTRFS_ENCODED_IO_COMPRESSION_ZSTD:
10757 compression = BTRFS_COMPRESS_ZSTD;
10758 break;
10759 case BTRFS_ENCODED_IO_COMPRESSION_LZO_4K:
10760 case BTRFS_ENCODED_IO_COMPRESSION_LZO_8K:
10761 case BTRFS_ENCODED_IO_COMPRESSION_LZO_16K:
10762 case BTRFS_ENCODED_IO_COMPRESSION_LZO_32K:
10763 case BTRFS_ENCODED_IO_COMPRESSION_LZO_64K:
10764
10765 if (encoded->compression -
10766 BTRFS_ENCODED_IO_COMPRESSION_LZO_4K + 12 !=
10767 fs_info->sectorsize_bits)
10768 return -EINVAL;
10769 compression = BTRFS_COMPRESS_LZO;
10770 break;
10771 default:
10772 return -EINVAL;
10773 }
10774 if (encoded->encryption != BTRFS_ENCODED_IO_ENCRYPTION_NONE)
10775 return -EINVAL;
10776
10777 orig_count = iov_iter_count(from);
10778
10779
10780 if (encoded->unencoded_len > BTRFS_MAX_UNCOMPRESSED ||
10781 orig_count > BTRFS_MAX_COMPRESSED || orig_count == 0)
10782 return -EINVAL;
10783
10784
10785
10786
10787
10788
10789
10790
10791
10792
10793
10794
10795
10796
10797 if (orig_count >= encoded->unencoded_len)
10798 return -EINVAL;
10799
10800
10801 start = iocb->ki_pos;
10802 if (!IS_ALIGNED(start, fs_info->sectorsize))
10803 return -EINVAL;
10804
10805
10806
10807
10808
10809
10810 if (start + encoded->len < inode->vfs_inode.i_size &&
10811 !IS_ALIGNED(start + encoded->len, fs_info->sectorsize))
10812 return -EINVAL;
10813
10814
10815 if (!IS_ALIGNED(encoded->unencoded_offset, fs_info->sectorsize))
10816 return -EINVAL;
10817
10818 num_bytes = ALIGN(encoded->len, fs_info->sectorsize);
10819 ram_bytes = ALIGN(encoded->unencoded_len, fs_info->sectorsize);
10820 end = start + num_bytes - 1;
10821
10822
10823
10824
10825
10826
10827 disk_num_bytes = ALIGN(orig_count, fs_info->sectorsize);
10828 nr_pages = DIV_ROUND_UP(disk_num_bytes, PAGE_SIZE);
10829 pages = kvcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL_ACCOUNT);
10830 if (!pages)
10831 return -ENOMEM;
10832 for (i = 0; i < nr_pages; i++) {
10833 size_t bytes = min_t(size_t, PAGE_SIZE, iov_iter_count(from));
10834 char *kaddr;
10835
10836 pages[i] = alloc_page(GFP_KERNEL_ACCOUNT);
10837 if (!pages[i]) {
10838 ret = -ENOMEM;
10839 goto out_pages;
10840 }
10841 kaddr = kmap_local_page(pages[i]);
10842 if (copy_from_iter(kaddr, bytes, from) != bytes) {
10843 kunmap_local(kaddr);
10844 ret = -EFAULT;
10845 goto out_pages;
10846 }
10847 if (bytes < PAGE_SIZE)
10848 memset(kaddr + bytes, 0, PAGE_SIZE - bytes);
10849 kunmap_local(kaddr);
10850 }
10851
10852 for (;;) {
10853 struct btrfs_ordered_extent *ordered;
10854
10855 ret = btrfs_wait_ordered_range(&inode->vfs_inode, start, num_bytes);
10856 if (ret)
10857 goto out_pages;
10858 ret = invalidate_inode_pages2_range(inode->vfs_inode.i_mapping,
10859 start >> PAGE_SHIFT,
10860 end >> PAGE_SHIFT);
10861 if (ret)
10862 goto out_pages;
10863 lock_extent_bits(io_tree, start, end, &cached_state);
10864 ordered = btrfs_lookup_ordered_range(inode, start, num_bytes);
10865 if (!ordered &&
10866 !filemap_range_has_page(inode->vfs_inode.i_mapping, start, end))
10867 break;
10868 if (ordered)
10869 btrfs_put_ordered_extent(ordered);
10870 unlock_extent_cached(io_tree, start, end, &cached_state);
10871 cond_resched();
10872 }
10873
10874
10875
10876
10877
10878 ret = btrfs_alloc_data_chunk_ondemand(inode, disk_num_bytes);
10879 if (ret)
10880 goto out_unlock;
10881 ret = btrfs_qgroup_reserve_data(inode, &data_reserved, start, num_bytes);
10882 if (ret)
10883 goto out_free_data_space;
10884 ret = btrfs_delalloc_reserve_metadata(inode, num_bytes, disk_num_bytes,
10885 false);
10886 if (ret)
10887 goto out_qgroup_free_data;
10888
10889
10890 if (start == 0 && encoded->unencoded_len == encoded->len &&
10891 encoded->unencoded_offset == 0) {
10892 ret = cow_file_range_inline(inode, encoded->len, orig_count,
10893 compression, pages, true);
10894 if (ret <= 0) {
10895 if (ret == 0)
10896 ret = orig_count;
10897 goto out_delalloc_release;
10898 }
10899 }
10900
10901 ret = btrfs_reserve_extent(root, disk_num_bytes, disk_num_bytes,
10902 disk_num_bytes, 0, 0, &ins, 1, 1);
10903 if (ret)
10904 goto out_delalloc_release;
10905 extent_reserved = true;
10906
10907 em = create_io_em(inode, start, num_bytes,
10908 start - encoded->unencoded_offset, ins.objectid,
10909 ins.offset, ins.offset, ram_bytes, compression,
10910 BTRFS_ORDERED_COMPRESSED);
10911 if (IS_ERR(em)) {
10912 ret = PTR_ERR(em);
10913 goto out_free_reserved;
10914 }
10915 free_extent_map(em);
10916
10917 ret = btrfs_add_ordered_extent(inode, start, num_bytes, ram_bytes,
10918 ins.objectid, ins.offset,
10919 encoded->unencoded_offset,
10920 (1 << BTRFS_ORDERED_ENCODED) |
10921 (1 << BTRFS_ORDERED_COMPRESSED),
10922 compression);
10923 if (ret) {
10924 btrfs_drop_extent_cache(inode, start, end, 0);
10925 goto out_free_reserved;
10926 }
10927 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
10928
10929 if (start + encoded->len > inode->vfs_inode.i_size)
10930 i_size_write(&inode->vfs_inode, start + encoded->len);
10931
10932 unlock_extent_cached(io_tree, start, end, &cached_state);
10933
10934 btrfs_delalloc_release_extents(inode, num_bytes);
10935
10936 if (btrfs_submit_compressed_write(inode, start, num_bytes, ins.objectid,
10937 ins.offset, pages, nr_pages, 0, NULL,
10938 false)) {
10939 btrfs_writepage_endio_finish_ordered(inode, pages[0], start, end, 0);
10940 ret = -EIO;
10941 goto out_pages;
10942 }
10943 ret = orig_count;
10944 goto out;
10945
10946 out_free_reserved:
10947 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
10948 btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
10949 out_delalloc_release:
10950 btrfs_delalloc_release_extents(inode, num_bytes);
10951 btrfs_delalloc_release_metadata(inode, disk_num_bytes, ret < 0);
10952 out_qgroup_free_data:
10953 if (ret < 0)
10954 btrfs_qgroup_free_data(inode, data_reserved, start, num_bytes);
10955 out_free_data_space:
10956
10957
10958
10959
10960 if (!extent_reserved)
10961 btrfs_free_reserved_data_space_noquota(fs_info, disk_num_bytes);
10962 out_unlock:
10963 unlock_extent_cached(io_tree, start, end, &cached_state);
10964 out_pages:
10965 for (i = 0; i < nr_pages; i++) {
10966 if (pages[i])
10967 __free_page(pages[i]);
10968 }
10969 kvfree(pages);
10970 out:
10971 if (ret >= 0)
10972 iocb->ki_pos += encoded->len;
10973 return ret;
10974 }
10975
10976 #ifdef CONFIG_SWAP
10977
10978
10979
10980
10981
10982 static int btrfs_add_swapfile_pin(struct inode *inode, void *ptr,
10983 bool is_block_group)
10984 {
10985 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
10986 struct btrfs_swapfile_pin *sp, *entry;
10987 struct rb_node **p;
10988 struct rb_node *parent = NULL;
10989
10990 sp = kmalloc(sizeof(*sp), GFP_NOFS);
10991 if (!sp)
10992 return -ENOMEM;
10993 sp->ptr = ptr;
10994 sp->inode = inode;
10995 sp->is_block_group = is_block_group;
10996 sp->bg_extent_count = 1;
10997
10998 spin_lock(&fs_info->swapfile_pins_lock);
10999 p = &fs_info->swapfile_pins.rb_node;
11000 while (*p) {
11001 parent = *p;
11002 entry = rb_entry(parent, struct btrfs_swapfile_pin, node);
11003 if (sp->ptr < entry->ptr ||
11004 (sp->ptr == entry->ptr && sp->inode < entry->inode)) {
11005 p = &(*p)->rb_left;
11006 } else if (sp->ptr > entry->ptr ||
11007 (sp->ptr == entry->ptr && sp->inode > entry->inode)) {
11008 p = &(*p)->rb_right;
11009 } else {
11010 if (is_block_group)
11011 entry->bg_extent_count++;
11012 spin_unlock(&fs_info->swapfile_pins_lock);
11013 kfree(sp);
11014 return 1;
11015 }
11016 }
11017 rb_link_node(&sp->node, parent, p);
11018 rb_insert_color(&sp->node, &fs_info->swapfile_pins);
11019 spin_unlock(&fs_info->swapfile_pins_lock);
11020 return 0;
11021 }
11022
11023
11024 static void btrfs_free_swapfile_pins(struct inode *inode)
11025 {
11026 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
11027 struct btrfs_swapfile_pin *sp;
11028 struct rb_node *node, *next;
11029
11030 spin_lock(&fs_info->swapfile_pins_lock);
11031 node = rb_first(&fs_info->swapfile_pins);
11032 while (node) {
11033 next = rb_next(node);
11034 sp = rb_entry(node, struct btrfs_swapfile_pin, node);
11035 if (sp->inode == inode) {
11036 rb_erase(&sp->node, &fs_info->swapfile_pins);
11037 if (sp->is_block_group) {
11038 btrfs_dec_block_group_swap_extents(sp->ptr,
11039 sp->bg_extent_count);
11040 btrfs_put_block_group(sp->ptr);
11041 }
11042 kfree(sp);
11043 }
11044 node = next;
11045 }
11046 spin_unlock(&fs_info->swapfile_pins_lock);
11047 }
11048
11049 struct btrfs_swap_info {
11050 u64 start;
11051 u64 block_start;
11052 u64 block_len;
11053 u64 lowest_ppage;
11054 u64 highest_ppage;
11055 unsigned long nr_pages;
11056 int nr_extents;
11057 };
11058
11059 static int btrfs_add_swap_extent(struct swap_info_struct *sis,
11060 struct btrfs_swap_info *bsi)
11061 {
11062 unsigned long nr_pages;
11063 unsigned long max_pages;
11064 u64 first_ppage, first_ppage_reported, next_ppage;
11065 int ret;
11066
11067
11068
11069
11070
11071
11072 if (bsi->nr_pages >= sis->max)
11073 return 0;
11074
11075 max_pages = sis->max - bsi->nr_pages;
11076 first_ppage = ALIGN(bsi->block_start, PAGE_SIZE) >> PAGE_SHIFT;
11077 next_ppage = ALIGN_DOWN(bsi->block_start + bsi->block_len,
11078 PAGE_SIZE) >> PAGE_SHIFT;
11079
11080 if (first_ppage >= next_ppage)
11081 return 0;
11082 nr_pages = next_ppage - first_ppage;
11083 nr_pages = min(nr_pages, max_pages);
11084
11085 first_ppage_reported = first_ppage;
11086 if (bsi->start == 0)
11087 first_ppage_reported++;
11088 if (bsi->lowest_ppage > first_ppage_reported)
11089 bsi->lowest_ppage = first_ppage_reported;
11090 if (bsi->highest_ppage < (next_ppage - 1))
11091 bsi->highest_ppage = next_ppage - 1;
11092
11093 ret = add_swap_extent(sis, bsi->nr_pages, nr_pages, first_ppage);
11094 if (ret < 0)
11095 return ret;
11096 bsi->nr_extents += ret;
11097 bsi->nr_pages += nr_pages;
11098 return 0;
11099 }
11100
11101 static void btrfs_swap_deactivate(struct file *file)
11102 {
11103 struct inode *inode = file_inode(file);
11104
11105 btrfs_free_swapfile_pins(inode);
11106 atomic_dec(&BTRFS_I(inode)->root->nr_swapfiles);
11107 }
11108
11109 static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
11110 sector_t *span)
11111 {
11112 struct inode *inode = file_inode(file);
11113 struct btrfs_root *root = BTRFS_I(inode)->root;
11114 struct btrfs_fs_info *fs_info = root->fs_info;
11115 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
11116 struct extent_state *cached_state = NULL;
11117 struct extent_map *em = NULL;
11118 struct btrfs_device *device = NULL;
11119 struct btrfs_swap_info bsi = {
11120 .lowest_ppage = (sector_t)-1ULL,
11121 };
11122 int ret = 0;
11123 u64 isize;
11124 u64 start;
11125
11126
11127
11128
11129
11130
11131 ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
11132 if (ret)
11133 return ret;
11134
11135
11136
11137
11138 if (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS) {
11139 btrfs_warn(fs_info, "swapfile must not be compressed");
11140 return -EINVAL;
11141 }
11142 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW)) {
11143 btrfs_warn(fs_info, "swapfile must not be copy-on-write");
11144 return -EINVAL;
11145 }
11146 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
11147 btrfs_warn(fs_info, "swapfile must not be checksummed");
11148 return -EINVAL;
11149 }
11150
11151
11152
11153
11154
11155
11156
11157
11158
11159
11160 if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_SWAP_ACTIVATE)) {
11161 btrfs_warn(fs_info,
11162 "cannot activate swapfile while exclusive operation is running");
11163 return -EBUSY;
11164 }
11165
11166
11167
11168
11169
11170
11171
11172
11173 if (!btrfs_drew_try_write_lock(&root->snapshot_lock)) {
11174 btrfs_exclop_finish(fs_info);
11175 btrfs_warn(fs_info,
11176 "cannot activate swapfile because snapshot creation is in progress");
11177 return -EINVAL;
11178 }
11179
11180
11181
11182
11183
11184
11185
11186
11187
11188
11189 spin_lock(&root->root_item_lock);
11190 if (btrfs_root_dead(root)) {
11191 spin_unlock(&root->root_item_lock);
11192
11193 btrfs_exclop_finish(fs_info);
11194 btrfs_warn(fs_info,
11195 "cannot activate swapfile because subvolume %llu is being deleted",
11196 root->root_key.objectid);
11197 return -EPERM;
11198 }
11199 atomic_inc(&root->nr_swapfiles);
11200 spin_unlock(&root->root_item_lock);
11201
11202 isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize);
11203
11204 lock_extent_bits(io_tree, 0, isize - 1, &cached_state);
11205 start = 0;
11206 while (start < isize) {
11207 u64 logical_block_start, physical_block_start;
11208 struct btrfs_block_group *bg;
11209 u64 len = isize - start;
11210
11211 em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len);
11212 if (IS_ERR(em)) {
11213 ret = PTR_ERR(em);
11214 goto out;
11215 }
11216
11217 if (em->block_start == EXTENT_MAP_HOLE) {
11218 btrfs_warn(fs_info, "swapfile must not have holes");
11219 ret = -EINVAL;
11220 goto out;
11221 }
11222 if (em->block_start == EXTENT_MAP_INLINE) {
11223
11224
11225
11226
11227
11228
11229
11230 btrfs_warn(fs_info, "swapfile must not be inline");
11231 ret = -EINVAL;
11232 goto out;
11233 }
11234 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
11235 btrfs_warn(fs_info, "swapfile must not be compressed");
11236 ret = -EINVAL;
11237 goto out;
11238 }
11239
11240 logical_block_start = em->block_start + (start - em->start);
11241 len = min(len, em->len - (start - em->start));
11242 free_extent_map(em);
11243 em = NULL;
11244
11245 ret = can_nocow_extent(inode, start, &len, NULL, NULL, NULL, true);
11246 if (ret < 0) {
11247 goto out;
11248 } else if (ret) {
11249 ret = 0;
11250 } else {
11251 btrfs_warn(fs_info,
11252 "swapfile must not be copy-on-write");
11253 ret = -EINVAL;
11254 goto out;
11255 }
11256
11257 em = btrfs_get_chunk_map(fs_info, logical_block_start, len);
11258 if (IS_ERR(em)) {
11259 ret = PTR_ERR(em);
11260 goto out;
11261 }
11262
11263 if (em->map_lookup->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
11264 btrfs_warn(fs_info,
11265 "swapfile must have single data profile");
11266 ret = -EINVAL;
11267 goto out;
11268 }
11269
11270 if (device == NULL) {
11271 device = em->map_lookup->stripes[0].dev;
11272 ret = btrfs_add_swapfile_pin(inode, device, false);
11273 if (ret == 1)
11274 ret = 0;
11275 else if (ret)
11276 goto out;
11277 } else if (device != em->map_lookup->stripes[0].dev) {
11278 btrfs_warn(fs_info, "swapfile must be on one device");
11279 ret = -EINVAL;
11280 goto out;
11281 }
11282
11283 physical_block_start = (em->map_lookup->stripes[0].physical +
11284 (logical_block_start - em->start));
11285 len = min(len, em->len - (logical_block_start - em->start));
11286 free_extent_map(em);
11287 em = NULL;
11288
11289 bg = btrfs_lookup_block_group(fs_info, logical_block_start);
11290 if (!bg) {
11291 btrfs_warn(fs_info,
11292 "could not find block group containing swapfile");
11293 ret = -EINVAL;
11294 goto out;
11295 }
11296
11297 if (!btrfs_inc_block_group_swap_extents(bg)) {
11298 btrfs_warn(fs_info,
11299 "block group for swapfile at %llu is read-only%s",
11300 bg->start,
11301 atomic_read(&fs_info->scrubs_running) ?
11302 " (scrub running)" : "");
11303 btrfs_put_block_group(bg);
11304 ret = -EINVAL;
11305 goto out;
11306 }
11307
11308 ret = btrfs_add_swapfile_pin(inode, bg, true);
11309 if (ret) {
11310 btrfs_put_block_group(bg);
11311 if (ret == 1)
11312 ret = 0;
11313 else
11314 goto out;
11315 }
11316
11317 if (bsi.block_len &&
11318 bsi.block_start + bsi.block_len == physical_block_start) {
11319 bsi.block_len += len;
11320 } else {
11321 if (bsi.block_len) {
11322 ret = btrfs_add_swap_extent(sis, &bsi);
11323 if (ret)
11324 goto out;
11325 }
11326 bsi.start = start;
11327 bsi.block_start = physical_block_start;
11328 bsi.block_len = len;
11329 }
11330
11331 start += len;
11332 }
11333
11334 if (bsi.block_len)
11335 ret = btrfs_add_swap_extent(sis, &bsi);
11336
11337 out:
11338 if (!IS_ERR_OR_NULL(em))
11339 free_extent_map(em);
11340
11341 unlock_extent_cached(io_tree, 0, isize - 1, &cached_state);
11342
11343 if (ret)
11344 btrfs_swap_deactivate(file);
11345
11346 btrfs_drew_write_unlock(&root->snapshot_lock);
11347
11348 btrfs_exclop_finish(fs_info);
11349
11350 if (ret)
11351 return ret;
11352
11353 if (device)
11354 sis->bdev = device->bdev;
11355 *span = bsi.highest_ppage - bsi.lowest_ppage + 1;
11356 sis->max = bsi.nr_pages;
11357 sis->pages = bsi.nr_pages - 1;
11358 sis->highest_bit = bsi.nr_pages - 1;
11359 return bsi.nr_extents;
11360 }
11361 #else
11362 static void btrfs_swap_deactivate(struct file *file)
11363 {
11364 }
11365
11366 static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
11367 sector_t *span)
11368 {
11369 return -EOPNOTSUPP;
11370 }
11371 #endif
11372
11373
11374
11375
11376
11377
11378
11379 void btrfs_update_inode_bytes(struct btrfs_inode *inode,
11380 const u64 add_bytes,
11381 const u64 del_bytes)
11382 {
11383 if (add_bytes == del_bytes)
11384 return;
11385
11386 spin_lock(&inode->lock);
11387 if (del_bytes > 0)
11388 inode_sub_bytes(&inode->vfs_inode, del_bytes);
11389 if (add_bytes > 0)
11390 inode_add_bytes(&inode->vfs_inode, add_bytes);
11391 spin_unlock(&inode->lock);
11392 }
11393
11394
11395
11396
11397
11398
11399
11400
11401
11402
11403
11404
11405
11406
11407
11408 void btrfs_assert_inode_range_clean(struct btrfs_inode *inode, u64 start, u64 end)
11409 {
11410 struct btrfs_root *root = inode->root;
11411 struct btrfs_ordered_extent *ordered;
11412
11413 if (!IS_ENABLED(CONFIG_BTRFS_ASSERT))
11414 return;
11415
11416 ordered = btrfs_lookup_first_ordered_range(inode, start, end + 1 - start);
11417 if (ordered) {
11418 btrfs_err(root->fs_info,
11419 "found unexpected ordered extent in file range [%llu, %llu] for inode %llu root %llu (ordered range [%llu, %llu])",
11420 start, end, btrfs_ino(inode), root->root_key.objectid,
11421 ordered->file_offset,
11422 ordered->file_offset + ordered->num_bytes - 1);
11423 btrfs_put_ordered_extent(ordered);
11424 }
11425
11426 ASSERT(ordered == NULL);
11427 }
11428
11429 static const struct inode_operations btrfs_dir_inode_operations = {
11430 .getattr = btrfs_getattr,
11431 .lookup = btrfs_lookup,
11432 .create = btrfs_create,
11433 .unlink = btrfs_unlink,
11434 .link = btrfs_link,
11435 .mkdir = btrfs_mkdir,
11436 .rmdir = btrfs_rmdir,
11437 .rename = btrfs_rename2,
11438 .symlink = btrfs_symlink,
11439 .setattr = btrfs_setattr,
11440 .mknod = btrfs_mknod,
11441 .listxattr = btrfs_listxattr,
11442 .permission = btrfs_permission,
11443 .get_acl = btrfs_get_acl,
11444 .set_acl = btrfs_set_acl,
11445 .update_time = btrfs_update_time,
11446 .tmpfile = btrfs_tmpfile,
11447 .fileattr_get = btrfs_fileattr_get,
11448 .fileattr_set = btrfs_fileattr_set,
11449 };
11450
11451 static const struct file_operations btrfs_dir_file_operations = {
11452 .llseek = generic_file_llseek,
11453 .read = generic_read_dir,
11454 .iterate_shared = btrfs_real_readdir,
11455 .open = btrfs_opendir,
11456 .unlocked_ioctl = btrfs_ioctl,
11457 #ifdef CONFIG_COMPAT
11458 .compat_ioctl = btrfs_compat_ioctl,
11459 #endif
11460 .release = btrfs_release_file,
11461 .fsync = btrfs_sync_file,
11462 };
11463
11464
11465
11466
11467
11468
11469
11470
11471
11472
11473
11474
11475
11476 static const struct address_space_operations btrfs_aops = {
11477 .read_folio = btrfs_read_folio,
11478 .writepages = btrfs_writepages,
11479 .readahead = btrfs_readahead,
11480 .direct_IO = noop_direct_IO,
11481 .invalidate_folio = btrfs_invalidate_folio,
11482 .release_folio = btrfs_release_folio,
11483 .migrate_folio = btrfs_migrate_folio,
11484 .dirty_folio = filemap_dirty_folio,
11485 .error_remove_page = generic_error_remove_page,
11486 .swap_activate = btrfs_swap_activate,
11487 .swap_deactivate = btrfs_swap_deactivate,
11488 };
11489
11490 static const struct inode_operations btrfs_file_inode_operations = {
11491 .getattr = btrfs_getattr,
11492 .setattr = btrfs_setattr,
11493 .listxattr = btrfs_listxattr,
11494 .permission = btrfs_permission,
11495 .fiemap = btrfs_fiemap,
11496 .get_acl = btrfs_get_acl,
11497 .set_acl = btrfs_set_acl,
11498 .update_time = btrfs_update_time,
11499 .fileattr_get = btrfs_fileattr_get,
11500 .fileattr_set = btrfs_fileattr_set,
11501 };
11502 static const struct inode_operations btrfs_special_inode_operations = {
11503 .getattr = btrfs_getattr,
11504 .setattr = btrfs_setattr,
11505 .permission = btrfs_permission,
11506 .listxattr = btrfs_listxattr,
11507 .get_acl = btrfs_get_acl,
11508 .set_acl = btrfs_set_acl,
11509 .update_time = btrfs_update_time,
11510 };
11511 static const struct inode_operations btrfs_symlink_inode_operations = {
11512 .get_link = page_get_link,
11513 .getattr = btrfs_getattr,
11514 .setattr = btrfs_setattr,
11515 .permission = btrfs_permission,
11516 .listxattr = btrfs_listxattr,
11517 .update_time = btrfs_update_time,
11518 };
11519
11520 const struct dentry_operations btrfs_dentry_operations = {
11521 .d_delete = btrfs_dentry_delete,
11522 };