Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 
0003 #include <linux/init.h>
0004 #include <linux/fs.h>
0005 #include <linux/slab.h>
0006 #include <linux/rwsem.h>
0007 #include <linux/xattr.h>
0008 #include <linux/security.h>
0009 #include <linux/posix_acl_xattr.h>
0010 #include <linux/iversion.h>
0011 #include <linux/fsverity.h>
0012 #include <linux/sched/mm.h>
0013 #include "ctree.h"
0014 #include "btrfs_inode.h"
0015 #include "transaction.h"
0016 #include "disk-io.h"
0017 #include "locking.h"
0018 
0019 /*
0020  * Implementation of the interface defined in struct fsverity_operations.
0021  *
0022  * The main question is how and where to store the verity descriptor and the
0023  * Merkle tree. We store both in dedicated btree items in the filesystem tree,
0024  * together with the rest of the inode metadata. This means we'll need to do
0025  * extra work to encrypt them once encryption is supported in btrfs, but btrfs
0026  * has a lot of careful code around i_size and it seems better to make a new key
0027  * type than try and adjust all of our expectations for i_size.
0028  *
0029  * Note that this differs from the implementation in ext4 and f2fs, where
0030  * this data is stored as if it were in the file, but past EOF. However, btrfs
0031  * does not have a widespread mechanism for caching opaque metadata pages, so we
0032  * do pretend that the Merkle tree pages themselves are past EOF for the
0033  * purposes of caching them (as opposed to creating a virtual inode).
0034  *
0035  * fs verity items are stored under two different key types on disk.
0036  * The descriptor items:
0037  * [ inode objectid, BTRFS_VERITY_DESC_ITEM_KEY, offset ]
0038  *
0039  * At offset 0, we store a btrfs_verity_descriptor_item which tracks the
0040  * size of the descriptor item and some extra data for encryption.
0041  * Starting at offset 1, these hold the generic fs verity descriptor.
0042  * The latter are opaque to btrfs, we just read and write them as a blob for
0043  * the higher level verity code.  The most common descriptor size is 256 bytes.
0044  *
0045  * The merkle tree items:
0046  * [ inode objectid, BTRFS_VERITY_MERKLE_ITEM_KEY, offset ]
0047  *
0048  * These also start at offset 0, and correspond to the merkle tree bytes.
0049  * So when fsverity asks for page 0 of the merkle tree, we pull up one page
0050  * starting at offset 0 for this key type.  These are also opaque to btrfs,
0051  * we're blindly storing whatever fsverity sends down.
0052  *
0053  * Another important consideration is the fact that the Merkle tree data scales
0054  * linearly with the size of the file (with 4K pages/blocks and SHA-256, it's
0055  * ~1/127th the size) so for large files, writing the tree can be a lengthy
0056  * operation. For that reason, we guard the whole enable verity operation
0057  * (between begin_enable_verity and end_enable_verity) with an orphan item.
0058  * Again, because the data can be pretty large, it's quite possible that we
0059  * could run out of space writing it, so we try our best to handle errors by
0060  * stopping and rolling back rather than aborting the victim transaction.
0061  */
0062 
0063 #define MERKLE_START_ALIGN          65536
0064 
0065 /*
0066  * Compute the logical file offset where we cache the Merkle tree.
0067  *
0068  * @inode:  inode of the verity file
0069  *
0070  * For the purposes of caching the Merkle tree pages, as required by
0071  * fs-verity, it is convenient to do size computations in terms of a file
0072  * offset, rather than in terms of page indices.
0073  *
0074  * Use 64K to be sure it's past the last page in the file, even with 64K pages.
0075  * That rounding operation itself can overflow loff_t, so we do it in u64 and
0076  * check.
0077  *
0078  * Returns the file offset on success, negative error code on failure.
0079  */
0080 static loff_t merkle_file_pos(const struct inode *inode)
0081 {
0082     u64 sz = inode->i_size;
0083     u64 rounded = round_up(sz, MERKLE_START_ALIGN);
0084 
0085     if (rounded > inode->i_sb->s_maxbytes)
0086         return -EFBIG;
0087 
0088     return rounded;
0089 }
0090 
0091 /*
0092  * Drop all the items for this inode with this key_type.
0093  *
0094  * @inode:     inode to drop items for
0095  * @key_type:  type of items to drop (BTRFS_VERITY_DESC_ITEM or
0096  *             BTRFS_VERITY_MERKLE_ITEM)
0097  *
0098  * Before doing a verity enable we cleanup any existing verity items.
0099  * This is also used to clean up if a verity enable failed half way through.
0100  *
0101  * Returns number of dropped items on success, negative error code on failure.
0102  */
0103 static int drop_verity_items(struct btrfs_inode *inode, u8 key_type)
0104 {
0105     struct btrfs_trans_handle *trans;
0106     struct btrfs_root *root = inode->root;
0107     struct btrfs_path *path;
0108     struct btrfs_key key;
0109     int count = 0;
0110     int ret;
0111 
0112     path = btrfs_alloc_path();
0113     if (!path)
0114         return -ENOMEM;
0115 
0116     while (1) {
0117         /* 1 for the item being dropped */
0118         trans = btrfs_start_transaction(root, 1);
0119         if (IS_ERR(trans)) {
0120             ret = PTR_ERR(trans);
0121             goto out;
0122         }
0123 
0124         /*
0125          * Walk backwards through all the items until we find one that
0126          * isn't from our key type or objectid
0127          */
0128         key.objectid = btrfs_ino(inode);
0129         key.type = key_type;
0130         key.offset = (u64)-1;
0131 
0132         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
0133         if (ret > 0) {
0134             ret = 0;
0135             /* No more keys of this type, we're done */
0136             if (path->slots[0] == 0)
0137                 break;
0138             path->slots[0]--;
0139         } else if (ret < 0) {
0140             btrfs_end_transaction(trans);
0141             goto out;
0142         }
0143 
0144         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
0145 
0146         /* No more keys of this type, we're done */
0147         if (key.objectid != btrfs_ino(inode) || key.type != key_type)
0148             break;
0149 
0150         /*
0151          * This shouldn't be a performance sensitive function because
0152          * it's not used as part of truncate.  If it ever becomes
0153          * perf sensitive, change this to walk forward and bulk delete
0154          * items
0155          */
0156         ret = btrfs_del_items(trans, root, path, path->slots[0], 1);
0157         if (ret) {
0158             btrfs_end_transaction(trans);
0159             goto out;
0160         }
0161         count++;
0162         btrfs_release_path(path);
0163         btrfs_end_transaction(trans);
0164     }
0165     ret = count;
0166     btrfs_end_transaction(trans);
0167 out:
0168     btrfs_free_path(path);
0169     return ret;
0170 }
0171 
0172 /*
0173  * Drop all verity items
0174  *
0175  * @inode:  inode to drop verity items for
0176  *
0177  * In most contexts where we are dropping verity items, we want to do it for all
0178  * the types of verity items, not a particular one.
0179  *
0180  * Returns: 0 on success, negative error code on failure.
0181  */
0182 int btrfs_drop_verity_items(struct btrfs_inode *inode)
0183 {
0184     int ret;
0185 
0186     ret = drop_verity_items(inode, BTRFS_VERITY_DESC_ITEM_KEY);
0187     if (ret < 0)
0188         return ret;
0189     ret = drop_verity_items(inode, BTRFS_VERITY_MERKLE_ITEM_KEY);
0190     if (ret < 0)
0191         return ret;
0192 
0193     return 0;
0194 }
0195 
0196 /*
0197  * Insert and write inode items with a given key type and offset.
0198  *
0199  * @inode:     inode to insert for
0200  * @key_type:  key type to insert
0201  * @offset:    item offset to insert at
0202  * @src:       source data to write
0203  * @len:       length of source data to write
0204  *
0205  * Write len bytes from src into items of up to 2K length.
0206  * The inserted items will have key (ino, key_type, offset + off) where off is
0207  * consecutively increasing from 0 up to the last item ending at offset + len.
0208  *
0209  * Returns 0 on success and a negative error code on failure.
0210  */
0211 static int write_key_bytes(struct btrfs_inode *inode, u8 key_type, u64 offset,
0212                const char *src, u64 len)
0213 {
0214     struct btrfs_trans_handle *trans;
0215     struct btrfs_path *path;
0216     struct btrfs_root *root = inode->root;
0217     struct extent_buffer *leaf;
0218     struct btrfs_key key;
0219     unsigned long copy_bytes;
0220     unsigned long src_offset = 0;
0221     void *data;
0222     int ret = 0;
0223 
0224     path = btrfs_alloc_path();
0225     if (!path)
0226         return -ENOMEM;
0227 
0228     while (len > 0) {
0229         /* 1 for the new item being inserted */
0230         trans = btrfs_start_transaction(root, 1);
0231         if (IS_ERR(trans)) {
0232             ret = PTR_ERR(trans);
0233             break;
0234         }
0235 
0236         key.objectid = btrfs_ino(inode);
0237         key.type = key_type;
0238         key.offset = offset;
0239 
0240         /*
0241          * Insert 2K at a time mostly to be friendly for smaller leaf
0242          * size filesystems
0243          */
0244         copy_bytes = min_t(u64, len, 2048);
0245 
0246         ret = btrfs_insert_empty_item(trans, root, path, &key, copy_bytes);
0247         if (ret) {
0248             btrfs_end_transaction(trans);
0249             break;
0250         }
0251 
0252         leaf = path->nodes[0];
0253 
0254         data = btrfs_item_ptr(leaf, path->slots[0], void);
0255         write_extent_buffer(leaf, src + src_offset,
0256                     (unsigned long)data, copy_bytes);
0257         offset += copy_bytes;
0258         src_offset += copy_bytes;
0259         len -= copy_bytes;
0260 
0261         btrfs_release_path(path);
0262         btrfs_end_transaction(trans);
0263     }
0264 
0265     btrfs_free_path(path);
0266     return ret;
0267 }
0268 
0269 /*
0270  * Read inode items of the given key type and offset from the btree.
0271  *
0272  * @inode:      inode to read items of
0273  * @key_type:   key type to read
0274  * @offset:     item offset to read from
0275  * @dest:       Buffer to read into. This parameter has slightly tricky
0276  *              semantics.  If it is NULL, the function will not do any copying
0277  *              and will just return the size of all the items up to len bytes.
0278  *              If dest_page is passed, then the function will kmap_local the
0279  *              page and ignore dest, but it must still be non-NULL to avoid the
0280  *              counting-only behavior.
0281  * @len:        length in bytes to read
0282  * @dest_page:  copy into this page instead of the dest buffer
0283  *
0284  * Helper function to read items from the btree.  This returns the number of
0285  * bytes read or < 0 for errors.  We can return short reads if the items don't
0286  * exist on disk or aren't big enough to fill the desired length.  Supports
0287  * reading into a provided buffer (dest) or into the page cache
0288  *
0289  * Returns number of bytes read or a negative error code on failure.
0290  */
0291 static int read_key_bytes(struct btrfs_inode *inode, u8 key_type, u64 offset,
0292               char *dest, u64 len, struct page *dest_page)
0293 {
0294     struct btrfs_path *path;
0295     struct btrfs_root *root = inode->root;
0296     struct extent_buffer *leaf;
0297     struct btrfs_key key;
0298     u64 item_end;
0299     u64 copy_end;
0300     int copied = 0;
0301     u32 copy_offset;
0302     unsigned long copy_bytes;
0303     unsigned long dest_offset = 0;
0304     void *data;
0305     char *kaddr = dest;
0306     int ret;
0307 
0308     path = btrfs_alloc_path();
0309     if (!path)
0310         return -ENOMEM;
0311 
0312     if (dest_page)
0313         path->reada = READA_FORWARD;
0314 
0315     key.objectid = btrfs_ino(inode);
0316     key.type = key_type;
0317     key.offset = offset;
0318 
0319     ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
0320     if (ret < 0) {
0321         goto out;
0322     } else if (ret > 0) {
0323         ret = 0;
0324         if (path->slots[0] == 0)
0325             goto out;
0326         path->slots[0]--;
0327     }
0328 
0329     while (len > 0) {
0330         leaf = path->nodes[0];
0331         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
0332 
0333         if (key.objectid != btrfs_ino(inode) || key.type != key_type)
0334             break;
0335 
0336         item_end = btrfs_item_size(leaf, path->slots[0]) + key.offset;
0337 
0338         if (copied > 0) {
0339             /*
0340              * Once we've copied something, we want all of the items
0341              * to be sequential
0342              */
0343             if (key.offset != offset)
0344                 break;
0345         } else {
0346             /*
0347              * Our initial offset might be in the middle of an
0348              * item.  Make sure it all makes sense.
0349              */
0350             if (key.offset > offset)
0351                 break;
0352             if (item_end <= offset)
0353                 break;
0354         }
0355 
0356         /* desc = NULL to just sum all the item lengths */
0357         if (!dest)
0358             copy_end = item_end;
0359         else
0360             copy_end = min(offset + len, item_end);
0361 
0362         /* Number of bytes in this item we want to copy */
0363         copy_bytes = copy_end - offset;
0364 
0365         /* Offset from the start of item for copying */
0366         copy_offset = offset - key.offset;
0367 
0368         if (dest) {
0369             if (dest_page)
0370                 kaddr = kmap_local_page(dest_page);
0371 
0372             data = btrfs_item_ptr(leaf, path->slots[0], void);
0373             read_extent_buffer(leaf, kaddr + dest_offset,
0374                        (unsigned long)data + copy_offset,
0375                        copy_bytes);
0376 
0377             if (dest_page)
0378                 kunmap_local(kaddr);
0379         }
0380 
0381         offset += copy_bytes;
0382         dest_offset += copy_bytes;
0383         len -= copy_bytes;
0384         copied += copy_bytes;
0385 
0386         path->slots[0]++;
0387         if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
0388             /*
0389              * We've reached the last slot in this leaf and we need
0390              * to go to the next leaf.
0391              */
0392             ret = btrfs_next_leaf(root, path);
0393             if (ret < 0) {
0394                 break;
0395             } else if (ret > 0) {
0396                 ret = 0;
0397                 break;
0398             }
0399         }
0400     }
0401 out:
0402     btrfs_free_path(path);
0403     if (!ret)
0404         ret = copied;
0405     return ret;
0406 }
0407 
0408 /*
0409  * Delete an fsverity orphan
0410  *
0411  * @trans:  transaction to do the delete in
0412  * @inode:  inode to orphan
0413  *
0414  * Capture verity orphan specific logic that is repeated in the couple places
0415  * we delete verity orphans. Specifically, handling ENOENT and ignoring inodes
0416  * with 0 links.
0417  *
0418  * Returns zero on success or a negative error code on failure.
0419  */
0420 static int del_orphan(struct btrfs_trans_handle *trans, struct btrfs_inode *inode)
0421 {
0422     struct btrfs_root *root = inode->root;
0423     int ret;
0424 
0425     /*
0426      * If the inode has no links, it is either already unlinked, or was
0427      * created with O_TMPFILE. In either case, it should have an orphan from
0428      * that other operation. Rather than reference count the orphans, we
0429      * simply ignore them here, because we only invoke the verity path in
0430      * the orphan logic when i_nlink is 1.
0431      */
0432     if (!inode->vfs_inode.i_nlink)
0433         return 0;
0434 
0435     ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode));
0436     if (ret == -ENOENT)
0437         ret = 0;
0438     return ret;
0439 }
0440 
0441 /*
0442  * Rollback in-progress verity if we encounter an error.
0443  *
0444  * @inode:  inode verity had an error for
0445  *
0446  * We try to handle recoverable errors while enabling verity by rolling it back
0447  * and just failing the operation, rather than having an fs level error no
0448  * matter what. However, any error in rollback is unrecoverable.
0449  *
0450  * Returns 0 on success, negative error code on failure.
0451  */
0452 static int rollback_verity(struct btrfs_inode *inode)
0453 {
0454     struct btrfs_trans_handle *trans = NULL;
0455     struct btrfs_root *root = inode->root;
0456     int ret;
0457 
0458     ASSERT(inode_is_locked(&inode->vfs_inode));
0459     truncate_inode_pages(inode->vfs_inode.i_mapping, inode->vfs_inode.i_size);
0460     clear_bit(BTRFS_INODE_VERITY_IN_PROGRESS, &inode->runtime_flags);
0461     ret = btrfs_drop_verity_items(inode);
0462     if (ret) {
0463         btrfs_handle_fs_error(root->fs_info, ret,
0464                 "failed to drop verity items in rollback %llu",
0465                 (u64)inode->vfs_inode.i_ino);
0466         goto out;
0467     }
0468 
0469     /*
0470      * 1 for updating the inode flag
0471      * 1 for deleting the orphan
0472      */
0473     trans = btrfs_start_transaction(root, 2);
0474     if (IS_ERR(trans)) {
0475         ret = PTR_ERR(trans);
0476         trans = NULL;
0477         btrfs_handle_fs_error(root->fs_info, ret,
0478             "failed to start transaction in verity rollback %llu",
0479             (u64)inode->vfs_inode.i_ino);
0480         goto out;
0481     }
0482     inode->ro_flags &= ~BTRFS_INODE_RO_VERITY;
0483     btrfs_sync_inode_flags_to_i_flags(&inode->vfs_inode);
0484     ret = btrfs_update_inode(trans, root, inode);
0485     if (ret) {
0486         btrfs_abort_transaction(trans, ret);
0487         goto out;
0488     }
0489     ret = del_orphan(trans, inode);
0490     if (ret) {
0491         btrfs_abort_transaction(trans, ret);
0492         goto out;
0493     }
0494 out:
0495     if (trans)
0496         btrfs_end_transaction(trans);
0497     return ret;
0498 }
0499 
0500 /*
0501  * Finalize making the file a valid verity file
0502  *
0503  * @inode:      inode to be marked as verity
0504  * @desc:       contents of the verity descriptor to write (not NULL)
0505  * @desc_size:  size of the verity descriptor
0506  *
0507  * Do the actual work of finalizing verity after successfully writing the Merkle
0508  * tree:
0509  *
0510  * - write out the descriptor items
0511  * - mark the inode with the verity flag
0512  * - delete the orphan item
0513  * - mark the ro compat bit
0514  * - clear the in progress bit
0515  *
0516  * Returns 0 on success, negative error code on failure.
0517  */
0518 static int finish_verity(struct btrfs_inode *inode, const void *desc,
0519              size_t desc_size)
0520 {
0521     struct btrfs_trans_handle *trans = NULL;
0522     struct btrfs_root *root = inode->root;
0523     struct btrfs_verity_descriptor_item item;
0524     int ret;
0525 
0526     /* Write out the descriptor item */
0527     memset(&item, 0, sizeof(item));
0528     btrfs_set_stack_verity_descriptor_size(&item, desc_size);
0529     ret = write_key_bytes(inode, BTRFS_VERITY_DESC_ITEM_KEY, 0,
0530                   (const char *)&item, sizeof(item));
0531     if (ret)
0532         goto out;
0533 
0534     /* Write out the descriptor itself */
0535     ret = write_key_bytes(inode, BTRFS_VERITY_DESC_ITEM_KEY, 1,
0536                   desc, desc_size);
0537     if (ret)
0538         goto out;
0539 
0540     /*
0541      * 1 for updating the inode flag
0542      * 1 for deleting the orphan
0543      */
0544     trans = btrfs_start_transaction(root, 2);
0545     if (IS_ERR(trans)) {
0546         ret = PTR_ERR(trans);
0547         goto out;
0548     }
0549     inode->ro_flags |= BTRFS_INODE_RO_VERITY;
0550     btrfs_sync_inode_flags_to_i_flags(&inode->vfs_inode);
0551     ret = btrfs_update_inode(trans, root, inode);
0552     if (ret)
0553         goto end_trans;
0554     ret = del_orphan(trans, inode);
0555     if (ret)
0556         goto end_trans;
0557     clear_bit(BTRFS_INODE_VERITY_IN_PROGRESS, &inode->runtime_flags);
0558     btrfs_set_fs_compat_ro(root->fs_info, VERITY);
0559 end_trans:
0560     btrfs_end_transaction(trans);
0561 out:
0562     return ret;
0563 
0564 }
0565 
0566 /*
0567  * fsverity op that begins enabling verity.
0568  *
0569  * @filp:  file to enable verity on
0570  *
0571  * Begin enabling fsverity for the file. We drop any existing verity items, add
0572  * an orphan and set the in progress bit.
0573  *
0574  * Returns 0 on success, negative error code on failure.
0575  */
0576 static int btrfs_begin_enable_verity(struct file *filp)
0577 {
0578     struct btrfs_inode *inode = BTRFS_I(file_inode(filp));
0579     struct btrfs_root *root = inode->root;
0580     struct btrfs_trans_handle *trans;
0581     int ret;
0582 
0583     ASSERT(inode_is_locked(file_inode(filp)));
0584 
0585     if (test_bit(BTRFS_INODE_VERITY_IN_PROGRESS, &inode->runtime_flags))
0586         return -EBUSY;
0587 
0588     /*
0589      * This should almost never do anything, but theoretically, it's
0590      * possible that we failed to enable verity on a file, then were
0591      * interrupted or failed while rolling back, failed to cleanup the
0592      * orphan, and finally attempt to enable verity again.
0593      */
0594     ret = btrfs_drop_verity_items(inode);
0595     if (ret)
0596         return ret;
0597 
0598     /* 1 for the orphan item */
0599     trans = btrfs_start_transaction(root, 1);
0600     if (IS_ERR(trans))
0601         return PTR_ERR(trans);
0602 
0603     ret = btrfs_orphan_add(trans, inode);
0604     if (!ret)
0605         set_bit(BTRFS_INODE_VERITY_IN_PROGRESS, &inode->runtime_flags);
0606     btrfs_end_transaction(trans);
0607 
0608     return 0;
0609 }
0610 
0611 /*
0612  * fsverity op that ends enabling verity.
0613  *
0614  * @filp:              file we are finishing enabling verity on
0615  * @desc:              verity descriptor to write out (NULL in error conditions)
0616  * @desc_size:         size of the verity descriptor (variable with signatures)
0617  * @merkle_tree_size:  size of the merkle tree in bytes
0618  *
0619  * If desc is null, then VFS is signaling an error occurred during verity
0620  * enable, and we should try to rollback. Otherwise, attempt to finish verity.
0621  *
0622  * Returns 0 on success, negative error code on error.
0623  */
0624 static int btrfs_end_enable_verity(struct file *filp, const void *desc,
0625                    size_t desc_size, u64 merkle_tree_size)
0626 {
0627     struct btrfs_inode *inode = BTRFS_I(file_inode(filp));
0628     int ret = 0;
0629     int rollback_ret;
0630 
0631     ASSERT(inode_is_locked(file_inode(filp)));
0632 
0633     if (desc == NULL)
0634         goto rollback;
0635 
0636     ret = finish_verity(inode, desc, desc_size);
0637     if (ret)
0638         goto rollback;
0639     return ret;
0640 
0641 rollback:
0642     rollback_ret = rollback_verity(inode);
0643     if (rollback_ret)
0644         btrfs_err(inode->root->fs_info,
0645               "failed to rollback verity items: %d", rollback_ret);
0646     return ret;
0647 }
0648 
0649 /*
0650  * fsverity op that gets the struct fsverity_descriptor.
0651  *
0652  * @inode:     inode to get the descriptor of
0653  * @buf:       output buffer for the descriptor contents
0654  * @buf_size:  size of the output buffer. 0 to query the size
0655  *
0656  * fsverity does a two pass setup for reading the descriptor, in the first pass
0657  * it calls with buf_size = 0 to query the size of the descriptor, and then in
0658  * the second pass it actually reads the descriptor off disk.
0659  *
0660  * Returns the size on success or a negative error code on failure.
0661  */
0662 static int btrfs_get_verity_descriptor(struct inode *inode, void *buf,
0663                        size_t buf_size)
0664 {
0665     u64 true_size;
0666     int ret = 0;
0667     struct btrfs_verity_descriptor_item item;
0668 
0669     memset(&item, 0, sizeof(item));
0670     ret = read_key_bytes(BTRFS_I(inode), BTRFS_VERITY_DESC_ITEM_KEY, 0,
0671                  (char *)&item, sizeof(item), NULL);
0672     if (ret < 0)
0673         return ret;
0674 
0675     if (item.reserved[0] != 0 || item.reserved[1] != 0)
0676         return -EUCLEAN;
0677 
0678     true_size = btrfs_stack_verity_descriptor_size(&item);
0679     if (true_size > INT_MAX)
0680         return -EUCLEAN;
0681 
0682     if (buf_size == 0)
0683         return true_size;
0684     if (buf_size < true_size)
0685         return -ERANGE;
0686 
0687     ret = read_key_bytes(BTRFS_I(inode), BTRFS_VERITY_DESC_ITEM_KEY, 1,
0688                  buf, buf_size, NULL);
0689     if (ret < 0)
0690         return ret;
0691     if (ret != true_size)
0692         return -EIO;
0693 
0694     return true_size;
0695 }
0696 
0697 /*
0698  * fsverity op that reads and caches a merkle tree page.
0699  *
0700  * @inode:         inode to read a merkle tree page for
0701  * @index:         page index relative to the start of the merkle tree
0702  * @num_ra_pages:  number of pages to readahead. Optional, we ignore it
0703  *
0704  * The Merkle tree is stored in the filesystem btree, but its pages are cached
0705  * with a logical position past EOF in the inode's mapping.
0706  *
0707  * Returns the page we read, or an ERR_PTR on error.
0708  */
0709 static struct page *btrfs_read_merkle_tree_page(struct inode *inode,
0710                         pgoff_t index,
0711                         unsigned long num_ra_pages)
0712 {
0713     struct page *page;
0714     u64 off = (u64)index << PAGE_SHIFT;
0715     loff_t merkle_pos = merkle_file_pos(inode);
0716     int ret;
0717 
0718     if (merkle_pos < 0)
0719         return ERR_PTR(merkle_pos);
0720     if (merkle_pos > inode->i_sb->s_maxbytes - off - PAGE_SIZE)
0721         return ERR_PTR(-EFBIG);
0722     index += merkle_pos >> PAGE_SHIFT;
0723 again:
0724     page = find_get_page_flags(inode->i_mapping, index, FGP_ACCESSED);
0725     if (page) {
0726         if (PageUptodate(page))
0727             return page;
0728 
0729         lock_page(page);
0730         /*
0731          * We only insert uptodate pages, so !Uptodate has to be
0732          * an error
0733          */
0734         if (!PageUptodate(page)) {
0735             unlock_page(page);
0736             put_page(page);
0737             return ERR_PTR(-EIO);
0738         }
0739         unlock_page(page);
0740         return page;
0741     }
0742 
0743     page = __page_cache_alloc(mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS));
0744     if (!page)
0745         return ERR_PTR(-ENOMEM);
0746 
0747     /*
0748      * Merkle item keys are indexed from byte 0 in the merkle tree.
0749      * They have the form:
0750      *
0751      * [ inode objectid, BTRFS_MERKLE_ITEM_KEY, offset in bytes ]
0752      */
0753     ret = read_key_bytes(BTRFS_I(inode), BTRFS_VERITY_MERKLE_ITEM_KEY, off,
0754                  page_address(page), PAGE_SIZE, page);
0755     if (ret < 0) {
0756         put_page(page);
0757         return ERR_PTR(ret);
0758     }
0759     if (ret < PAGE_SIZE)
0760         memzero_page(page, ret, PAGE_SIZE - ret);
0761 
0762     SetPageUptodate(page);
0763     ret = add_to_page_cache_lru(page, inode->i_mapping, index, GFP_NOFS);
0764 
0765     if (!ret) {
0766         /* Inserted and ready for fsverity */
0767         unlock_page(page);
0768     } else {
0769         put_page(page);
0770         /* Did someone race us into inserting this page? */
0771         if (ret == -EEXIST)
0772             goto again;
0773         page = ERR_PTR(ret);
0774     }
0775     return page;
0776 }
0777 
0778 /*
0779  * fsverity op that writes a Merkle tree block into the btree.
0780  *
0781  * @inode:          inode to write a Merkle tree block for
0782  * @buf:            Merkle tree data block to write
0783  * @index:          index of the block in the Merkle tree
0784  * @log_blocksize:  log base 2 of the Merkle tree block size
0785  *
0786  * Note that the block size could be different from the page size, so it is not
0787  * safe to assume that index is a page index.
0788  *
0789  * Returns 0 on success or negative error code on failure
0790  */
0791 static int btrfs_write_merkle_tree_block(struct inode *inode, const void *buf,
0792                     u64 index, int log_blocksize)
0793 {
0794     u64 off = index << log_blocksize;
0795     u64 len = 1ULL << log_blocksize;
0796     loff_t merkle_pos = merkle_file_pos(inode);
0797 
0798     if (merkle_pos < 0)
0799         return merkle_pos;
0800     if (merkle_pos > inode->i_sb->s_maxbytes - off - len)
0801         return -EFBIG;
0802 
0803     return write_key_bytes(BTRFS_I(inode), BTRFS_VERITY_MERKLE_ITEM_KEY,
0804                    off, buf, len);
0805 }
0806 
0807 const struct fsverity_operations btrfs_verityops = {
0808     .begin_enable_verity     = btrfs_begin_enable_verity,
0809     .end_enable_verity       = btrfs_end_enable_verity,
0810     .get_verity_descriptor   = btrfs_get_verity_descriptor,
0811     .read_merkle_tree_page   = btrfs_read_merkle_tree_page,
0812     .write_merkle_tree_block = btrfs_write_merkle_tree_block,
0813 };