Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0+
0002 /*
0003  * NILFS B-tree.
0004  *
0005  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
0006  *
0007  * Written by Koji Sato.
0008  */
0009 
0010 #include <linux/slab.h>
0011 #include <linux/string.h>
0012 #include <linux/errno.h>
0013 #include <linux/pagevec.h>
0014 #include "nilfs.h"
0015 #include "page.h"
0016 #include "btnode.h"
0017 #include "btree.h"
0018 #include "alloc.h"
0019 #include "dat.h"
0020 
0021 static void __nilfs_btree_init(struct nilfs_bmap *bmap);
0022 
0023 static struct nilfs_btree_path *nilfs_btree_alloc_path(void)
0024 {
0025     struct nilfs_btree_path *path;
0026     int level = NILFS_BTREE_LEVEL_DATA;
0027 
0028     path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
0029     if (path == NULL)
0030         goto out;
0031 
0032     for (; level < NILFS_BTREE_LEVEL_MAX; level++) {
0033         path[level].bp_bh = NULL;
0034         path[level].bp_sib_bh = NULL;
0035         path[level].bp_index = 0;
0036         path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
0037         path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
0038         path[level].bp_op = NULL;
0039     }
0040 
0041 out:
0042     return path;
0043 }
0044 
0045 static void nilfs_btree_free_path(struct nilfs_btree_path *path)
0046 {
0047     int level = NILFS_BTREE_LEVEL_DATA;
0048 
0049     for (; level < NILFS_BTREE_LEVEL_MAX; level++)
0050         brelse(path[level].bp_bh);
0051 
0052     kmem_cache_free(nilfs_btree_path_cache, path);
0053 }
0054 
0055 /*
0056  * B-tree node operations
0057  */
0058 static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree,
0059                      __u64 ptr, struct buffer_head **bhp)
0060 {
0061     struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
0062     struct address_space *btnc = btnc_inode->i_mapping;
0063     struct buffer_head *bh;
0064 
0065     bh = nilfs_btnode_create_block(btnc, ptr);
0066     if (!bh)
0067         return -ENOMEM;
0068 
0069     set_buffer_nilfs_volatile(bh);
0070     *bhp = bh;
0071     return 0;
0072 }
0073 
0074 static int nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
0075 {
0076     return node->bn_flags;
0077 }
0078 
0079 static void
0080 nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags)
0081 {
0082     node->bn_flags = flags;
0083 }
0084 
0085 static int nilfs_btree_node_root(const struct nilfs_btree_node *node)
0086 {
0087     return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT;
0088 }
0089 
0090 static int nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
0091 {
0092     return node->bn_level;
0093 }
0094 
0095 static void
0096 nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level)
0097 {
0098     node->bn_level = level;
0099 }
0100 
0101 static int nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
0102 {
0103     return le16_to_cpu(node->bn_nchildren);
0104 }
0105 
0106 static void
0107 nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
0108 {
0109     node->bn_nchildren = cpu_to_le16(nchildren);
0110 }
0111 
0112 static int nilfs_btree_node_size(const struct nilfs_bmap *btree)
0113 {
0114     return i_blocksize(btree->b_inode);
0115 }
0116 
0117 static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree)
0118 {
0119     return btree->b_nchildren_per_block;
0120 }
0121 
0122 static __le64 *
0123 nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
0124 {
0125     return (__le64 *)((char *)(node + 1) +
0126               (nilfs_btree_node_root(node) ?
0127                0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
0128 }
0129 
0130 static __le64 *
0131 nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, int ncmax)
0132 {
0133     return (__le64 *)(nilfs_btree_node_dkeys(node) + ncmax);
0134 }
0135 
0136 static __u64
0137 nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index)
0138 {
0139     return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index));
0140 }
0141 
0142 static void
0143 nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key)
0144 {
0145     *(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key);
0146 }
0147 
0148 static __u64
0149 nilfs_btree_node_get_ptr(const struct nilfs_btree_node *node, int index,
0150              int ncmax)
0151 {
0152     return le64_to_cpu(*(nilfs_btree_node_dptrs(node, ncmax) + index));
0153 }
0154 
0155 static void
0156 nilfs_btree_node_set_ptr(struct nilfs_btree_node *node, int index, __u64 ptr,
0157              int ncmax)
0158 {
0159     *(nilfs_btree_node_dptrs(node, ncmax) + index) = cpu_to_le64(ptr);
0160 }
0161 
0162 static void nilfs_btree_node_init(struct nilfs_btree_node *node, int flags,
0163                   int level, int nchildren, int ncmax,
0164                   const __u64 *keys, const __u64 *ptrs)
0165 {
0166     __le64 *dkeys;
0167     __le64 *dptrs;
0168     int i;
0169 
0170     nilfs_btree_node_set_flags(node, flags);
0171     nilfs_btree_node_set_level(node, level);
0172     nilfs_btree_node_set_nchildren(node, nchildren);
0173 
0174     dkeys = nilfs_btree_node_dkeys(node);
0175     dptrs = nilfs_btree_node_dptrs(node, ncmax);
0176     for (i = 0; i < nchildren; i++) {
0177         dkeys[i] = cpu_to_le64(keys[i]);
0178         dptrs[i] = cpu_to_le64(ptrs[i]);
0179     }
0180 }
0181 
0182 /* Assume the buffer heads corresponding to left and right are locked. */
0183 static void nilfs_btree_node_move_left(struct nilfs_btree_node *left,
0184                        struct nilfs_btree_node *right,
0185                        int n, int lncmax, int rncmax)
0186 {
0187     __le64 *ldkeys, *rdkeys;
0188     __le64 *ldptrs, *rdptrs;
0189     int lnchildren, rnchildren;
0190 
0191     ldkeys = nilfs_btree_node_dkeys(left);
0192     ldptrs = nilfs_btree_node_dptrs(left, lncmax);
0193     lnchildren = nilfs_btree_node_get_nchildren(left);
0194 
0195     rdkeys = nilfs_btree_node_dkeys(right);
0196     rdptrs = nilfs_btree_node_dptrs(right, rncmax);
0197     rnchildren = nilfs_btree_node_get_nchildren(right);
0198 
0199     memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
0200     memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
0201     memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
0202     memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
0203 
0204     lnchildren += n;
0205     rnchildren -= n;
0206     nilfs_btree_node_set_nchildren(left, lnchildren);
0207     nilfs_btree_node_set_nchildren(right, rnchildren);
0208 }
0209 
0210 /* Assume that the buffer heads corresponding to left and right are locked. */
0211 static void nilfs_btree_node_move_right(struct nilfs_btree_node *left,
0212                     struct nilfs_btree_node *right,
0213                     int n, int lncmax, int rncmax)
0214 {
0215     __le64 *ldkeys, *rdkeys;
0216     __le64 *ldptrs, *rdptrs;
0217     int lnchildren, rnchildren;
0218 
0219     ldkeys = nilfs_btree_node_dkeys(left);
0220     ldptrs = nilfs_btree_node_dptrs(left, lncmax);
0221     lnchildren = nilfs_btree_node_get_nchildren(left);
0222 
0223     rdkeys = nilfs_btree_node_dkeys(right);
0224     rdptrs = nilfs_btree_node_dptrs(right, rncmax);
0225     rnchildren = nilfs_btree_node_get_nchildren(right);
0226 
0227     memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
0228     memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
0229     memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
0230     memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
0231 
0232     lnchildren -= n;
0233     rnchildren += n;
0234     nilfs_btree_node_set_nchildren(left, lnchildren);
0235     nilfs_btree_node_set_nchildren(right, rnchildren);
0236 }
0237 
0238 /* Assume that the buffer head corresponding to node is locked. */
0239 static void nilfs_btree_node_insert(struct nilfs_btree_node *node, int index,
0240                     __u64 key, __u64 ptr, int ncmax)
0241 {
0242     __le64 *dkeys;
0243     __le64 *dptrs;
0244     int nchildren;
0245 
0246     dkeys = nilfs_btree_node_dkeys(node);
0247     dptrs = nilfs_btree_node_dptrs(node, ncmax);
0248     nchildren = nilfs_btree_node_get_nchildren(node);
0249     if (index < nchildren) {
0250         memmove(dkeys + index + 1, dkeys + index,
0251             (nchildren - index) * sizeof(*dkeys));
0252         memmove(dptrs + index + 1, dptrs + index,
0253             (nchildren - index) * sizeof(*dptrs));
0254     }
0255     dkeys[index] = cpu_to_le64(key);
0256     dptrs[index] = cpu_to_le64(ptr);
0257     nchildren++;
0258     nilfs_btree_node_set_nchildren(node, nchildren);
0259 }
0260 
0261 /* Assume that the buffer head corresponding to node is locked. */
0262 static void nilfs_btree_node_delete(struct nilfs_btree_node *node, int index,
0263                     __u64 *keyp, __u64 *ptrp, int ncmax)
0264 {
0265     __u64 key;
0266     __u64 ptr;
0267     __le64 *dkeys;
0268     __le64 *dptrs;
0269     int nchildren;
0270 
0271     dkeys = nilfs_btree_node_dkeys(node);
0272     dptrs = nilfs_btree_node_dptrs(node, ncmax);
0273     key = le64_to_cpu(dkeys[index]);
0274     ptr = le64_to_cpu(dptrs[index]);
0275     nchildren = nilfs_btree_node_get_nchildren(node);
0276     if (keyp != NULL)
0277         *keyp = key;
0278     if (ptrp != NULL)
0279         *ptrp = ptr;
0280 
0281     if (index < nchildren - 1) {
0282         memmove(dkeys + index, dkeys + index + 1,
0283             (nchildren - index - 1) * sizeof(*dkeys));
0284         memmove(dptrs + index, dptrs + index + 1,
0285             (nchildren - index - 1) * sizeof(*dptrs));
0286     }
0287     nchildren--;
0288     nilfs_btree_node_set_nchildren(node, nchildren);
0289 }
0290 
0291 static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
0292                    __u64 key, int *indexp)
0293 {
0294     __u64 nkey;
0295     int index, low, high, s;
0296 
0297     /* binary search */
0298     low = 0;
0299     high = nilfs_btree_node_get_nchildren(node) - 1;
0300     index = 0;
0301     s = 0;
0302     while (low <= high) {
0303         index = (low + high) / 2;
0304         nkey = nilfs_btree_node_get_key(node, index);
0305         if (nkey == key) {
0306             s = 0;
0307             goto out;
0308         } else if (nkey < key) {
0309             low = index + 1;
0310             s = -1;
0311         } else {
0312             high = index - 1;
0313             s = 1;
0314         }
0315     }
0316 
0317     /* adjust index */
0318     if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) {
0319         if (s > 0 && index > 0)
0320             index--;
0321     } else if (s < 0)
0322         index++;
0323 
0324  out:
0325     *indexp = index;
0326 
0327     return s == 0;
0328 }
0329 
0330 /**
0331  * nilfs_btree_node_broken - verify consistency of btree node
0332  * @node: btree node block to be examined
0333  * @size: node size (in bytes)
0334  * @inode: host inode of btree
0335  * @blocknr: block number
0336  *
0337  * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
0338  */
0339 static int nilfs_btree_node_broken(const struct nilfs_btree_node *node,
0340                    size_t size, struct inode *inode,
0341                    sector_t blocknr)
0342 {
0343     int level, flags, nchildren;
0344     int ret = 0;
0345 
0346     level = nilfs_btree_node_get_level(node);
0347     flags = nilfs_btree_node_get_flags(node);
0348     nchildren = nilfs_btree_node_get_nchildren(node);
0349 
0350     if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
0351              level >= NILFS_BTREE_LEVEL_MAX ||
0352              (flags & NILFS_BTREE_NODE_ROOT) ||
0353              nchildren < 0 ||
0354              nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) {
0355         nilfs_crit(inode->i_sb,
0356                "bad btree node (ino=%lu, blocknr=%llu): level = %d, flags = 0x%x, nchildren = %d",
0357                inode->i_ino, (unsigned long long)blocknr, level,
0358                flags, nchildren);
0359         ret = 1;
0360     }
0361     return ret;
0362 }
0363 
0364 /**
0365  * nilfs_btree_root_broken - verify consistency of btree root node
0366  * @node: btree root node to be examined
0367  * @inode: host inode of btree
0368  *
0369  * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
0370  */
0371 static int nilfs_btree_root_broken(const struct nilfs_btree_node *node,
0372                    struct inode *inode)
0373 {
0374     int level, flags, nchildren;
0375     int ret = 0;
0376 
0377     level = nilfs_btree_node_get_level(node);
0378     flags = nilfs_btree_node_get_flags(node);
0379     nchildren = nilfs_btree_node_get_nchildren(node);
0380 
0381     if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
0382              level >= NILFS_BTREE_LEVEL_MAX ||
0383              nchildren < 0 ||
0384              nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) {
0385         nilfs_crit(inode->i_sb,
0386                "bad btree root (ino=%lu): level = %d, flags = 0x%x, nchildren = %d",
0387                inode->i_ino, level, flags, nchildren);
0388         ret = 1;
0389     }
0390     return ret;
0391 }
0392 
0393 int nilfs_btree_broken_node_block(struct buffer_head *bh)
0394 {
0395     struct inode *inode;
0396     int ret;
0397 
0398     if (buffer_nilfs_checked(bh))
0399         return 0;
0400 
0401     inode = bh->b_page->mapping->host;
0402     ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data,
0403                       bh->b_size, inode, bh->b_blocknr);
0404     if (likely(!ret))
0405         set_buffer_nilfs_checked(bh);
0406     return ret;
0407 }
0408 
0409 static struct nilfs_btree_node *
0410 nilfs_btree_get_root(const struct nilfs_bmap *btree)
0411 {
0412     return (struct nilfs_btree_node *)btree->b_u.u_data;
0413 }
0414 
0415 static struct nilfs_btree_node *
0416 nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level)
0417 {
0418     return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
0419 }
0420 
0421 static struct nilfs_btree_node *
0422 nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level)
0423 {
0424     return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
0425 }
0426 
0427 static int nilfs_btree_height(const struct nilfs_bmap *btree)
0428 {
0429     return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1;
0430 }
0431 
0432 static struct nilfs_btree_node *
0433 nilfs_btree_get_node(const struct nilfs_bmap *btree,
0434              const struct nilfs_btree_path *path,
0435              int level, int *ncmaxp)
0436 {
0437     struct nilfs_btree_node *node;
0438 
0439     if (level == nilfs_btree_height(btree) - 1) {
0440         node = nilfs_btree_get_root(btree);
0441         *ncmaxp = NILFS_BTREE_ROOT_NCHILDREN_MAX;
0442     } else {
0443         node = nilfs_btree_get_nonroot_node(path, level);
0444         *ncmaxp = nilfs_btree_nchildren_per_block(btree);
0445     }
0446     return node;
0447 }
0448 
0449 static int nilfs_btree_bad_node(const struct nilfs_bmap *btree,
0450                 struct nilfs_btree_node *node, int level)
0451 {
0452     if (unlikely(nilfs_btree_node_get_level(node) != level)) {
0453         dump_stack();
0454         nilfs_crit(btree->b_inode->i_sb,
0455                "btree level mismatch (ino=%lu): %d != %d",
0456                btree->b_inode->i_ino,
0457                nilfs_btree_node_get_level(node), level);
0458         return 1;
0459     }
0460     return 0;
0461 }
0462 
0463 struct nilfs_btree_readahead_info {
0464     struct nilfs_btree_node *node;  /* parent node */
0465     int max_ra_blocks;      /* max nof blocks to read ahead */
0466     int index;          /* current index on the parent node */
0467     int ncmax;          /* nof children in the parent node */
0468 };
0469 
0470 static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
0471                    struct buffer_head **bhp,
0472                    const struct nilfs_btree_readahead_info *ra)
0473 {
0474     struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
0475     struct address_space *btnc = btnc_inode->i_mapping;
0476     struct buffer_head *bh, *ra_bh;
0477     sector_t submit_ptr = 0;
0478     int ret;
0479 
0480     ret = nilfs_btnode_submit_block(btnc, ptr, 0, REQ_OP_READ, &bh,
0481                     &submit_ptr);
0482     if (ret) {
0483         if (ret != -EEXIST)
0484             return ret;
0485         goto out_check;
0486     }
0487 
0488     if (ra) {
0489         int i, n;
0490         __u64 ptr2;
0491 
0492         /* read ahead sibling nodes */
0493         for (n = ra->max_ra_blocks, i = ra->index + 1;
0494              n > 0 && i < ra->ncmax; n--, i++) {
0495             ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax);
0496 
0497             ret = nilfs_btnode_submit_block(btnc, ptr2, 0,
0498                         REQ_OP_READ | REQ_RAHEAD,
0499                         &ra_bh, &submit_ptr);
0500             if (likely(!ret || ret == -EEXIST))
0501                 brelse(ra_bh);
0502             else if (ret != -EBUSY)
0503                 break;
0504             if (!buffer_locked(bh))
0505                 goto out_no_wait;
0506         }
0507     }
0508 
0509     wait_on_buffer(bh);
0510 
0511  out_no_wait:
0512     if (!buffer_uptodate(bh)) {
0513         nilfs_err(btree->b_inode->i_sb,
0514               "I/O error reading b-tree node block (ino=%lu, blocknr=%llu)",
0515               btree->b_inode->i_ino, (unsigned long long)ptr);
0516         brelse(bh);
0517         return -EIO;
0518     }
0519 
0520  out_check:
0521     if (nilfs_btree_broken_node_block(bh)) {
0522         clear_buffer_uptodate(bh);
0523         brelse(bh);
0524         return -EINVAL;
0525     }
0526 
0527     *bhp = bh;
0528     return 0;
0529 }
0530 
0531 static int nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
0532                    struct buffer_head **bhp)
0533 {
0534     return __nilfs_btree_get_block(btree, ptr, bhp, NULL);
0535 }
0536 
0537 static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree,
0538                  struct nilfs_btree_path *path,
0539                  __u64 key, __u64 *ptrp, int minlevel,
0540                  int readahead)
0541 {
0542     struct nilfs_btree_node *node;
0543     struct nilfs_btree_readahead_info p, *ra;
0544     __u64 ptr;
0545     int level, index, found, ncmax, ret;
0546 
0547     node = nilfs_btree_get_root(btree);
0548     level = nilfs_btree_node_get_level(node);
0549     if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0)
0550         return -ENOENT;
0551 
0552     found = nilfs_btree_node_lookup(node, key, &index);
0553     ptr = nilfs_btree_node_get_ptr(node, index,
0554                        NILFS_BTREE_ROOT_NCHILDREN_MAX);
0555     path[level].bp_bh = NULL;
0556     path[level].bp_index = index;
0557 
0558     ncmax = nilfs_btree_nchildren_per_block(btree);
0559 
0560     while (--level >= minlevel) {
0561         ra = NULL;
0562         if (level == NILFS_BTREE_LEVEL_NODE_MIN && readahead) {
0563             p.node = nilfs_btree_get_node(btree, path, level + 1,
0564                               &p.ncmax);
0565             p.index = index;
0566             p.max_ra_blocks = 7;
0567             ra = &p;
0568         }
0569         ret = __nilfs_btree_get_block(btree, ptr, &path[level].bp_bh,
0570                           ra);
0571         if (ret < 0)
0572             return ret;
0573 
0574         node = nilfs_btree_get_nonroot_node(path, level);
0575         if (nilfs_btree_bad_node(btree, node, level))
0576             return -EINVAL;
0577         if (!found)
0578             found = nilfs_btree_node_lookup(node, key, &index);
0579         else
0580             index = 0;
0581         if (index < ncmax) {
0582             ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
0583         } else {
0584             WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
0585             /* insert */
0586             ptr = NILFS_BMAP_INVALID_PTR;
0587         }
0588         path[level].bp_index = index;
0589     }
0590     if (!found)
0591         return -ENOENT;
0592 
0593     if (ptrp != NULL)
0594         *ptrp = ptr;
0595 
0596     return 0;
0597 }
0598 
0599 static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree,
0600                       struct nilfs_btree_path *path,
0601                       __u64 *keyp, __u64 *ptrp)
0602 {
0603     struct nilfs_btree_node *node;
0604     __u64 ptr;
0605     int index, level, ncmax, ret;
0606 
0607     node = nilfs_btree_get_root(btree);
0608     index = nilfs_btree_node_get_nchildren(node) - 1;
0609     if (index < 0)
0610         return -ENOENT;
0611     level = nilfs_btree_node_get_level(node);
0612     ptr = nilfs_btree_node_get_ptr(node, index,
0613                        NILFS_BTREE_ROOT_NCHILDREN_MAX);
0614     path[level].bp_bh = NULL;
0615     path[level].bp_index = index;
0616     ncmax = nilfs_btree_nchildren_per_block(btree);
0617 
0618     for (level--; level > 0; level--) {
0619         ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
0620         if (ret < 0)
0621             return ret;
0622         node = nilfs_btree_get_nonroot_node(path, level);
0623         if (nilfs_btree_bad_node(btree, node, level))
0624             return -EINVAL;
0625         index = nilfs_btree_node_get_nchildren(node) - 1;
0626         ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
0627         path[level].bp_index = index;
0628     }
0629 
0630     if (keyp != NULL)
0631         *keyp = nilfs_btree_node_get_key(node, index);
0632     if (ptrp != NULL)
0633         *ptrp = ptr;
0634 
0635     return 0;
0636 }
0637 
0638 /**
0639  * nilfs_btree_get_next_key - get next valid key from btree path array
0640  * @btree: bmap struct of btree
0641  * @path: array of nilfs_btree_path struct
0642  * @minlevel: start level
0643  * @nextkey: place to store the next valid key
0644  *
0645  * Return Value: If a next key was found, 0 is returned. Otherwise,
0646  * -ENOENT is returned.
0647  */
0648 static int nilfs_btree_get_next_key(const struct nilfs_bmap *btree,
0649                     const struct nilfs_btree_path *path,
0650                     int minlevel, __u64 *nextkey)
0651 {
0652     struct nilfs_btree_node *node;
0653     int maxlevel = nilfs_btree_height(btree) - 1;
0654     int index, next_adj, level;
0655 
0656     /* Next index is already set to bp_index for leaf nodes. */
0657     next_adj = 0;
0658     for (level = minlevel; level <= maxlevel; level++) {
0659         if (level == maxlevel)
0660             node = nilfs_btree_get_root(btree);
0661         else
0662             node = nilfs_btree_get_nonroot_node(path, level);
0663 
0664         index = path[level].bp_index + next_adj;
0665         if (index < nilfs_btree_node_get_nchildren(node)) {
0666             /* Next key is in this node */
0667             *nextkey = nilfs_btree_node_get_key(node, index);
0668             return 0;
0669         }
0670         /* For non-leaf nodes, next index is stored at bp_index + 1. */
0671         next_adj = 1;
0672     }
0673     return -ENOENT;
0674 }
0675 
0676 static int nilfs_btree_lookup(const struct nilfs_bmap *btree,
0677                   __u64 key, int level, __u64 *ptrp)
0678 {
0679     struct nilfs_btree_path *path;
0680     int ret;
0681 
0682     path = nilfs_btree_alloc_path();
0683     if (path == NULL)
0684         return -ENOMEM;
0685 
0686     ret = nilfs_btree_do_lookup(btree, path, key, ptrp, level, 0);
0687 
0688     nilfs_btree_free_path(path);
0689 
0690     return ret;
0691 }
0692 
0693 static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree,
0694                      __u64 key, __u64 *ptrp,
0695                      unsigned int maxblocks)
0696 {
0697     struct nilfs_btree_path *path;
0698     struct nilfs_btree_node *node;
0699     struct inode *dat = NULL;
0700     __u64 ptr, ptr2;
0701     sector_t blocknr;
0702     int level = NILFS_BTREE_LEVEL_NODE_MIN;
0703     int ret, cnt, index, maxlevel, ncmax;
0704     struct nilfs_btree_readahead_info p;
0705 
0706     path = nilfs_btree_alloc_path();
0707     if (path == NULL)
0708         return -ENOMEM;
0709 
0710     ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level, 1);
0711     if (ret < 0)
0712         goto out;
0713 
0714     if (NILFS_BMAP_USE_VBN(btree)) {
0715         dat = nilfs_bmap_get_dat(btree);
0716         ret = nilfs_dat_translate(dat, ptr, &blocknr);
0717         if (ret < 0)
0718             goto out;
0719         ptr = blocknr;
0720     }
0721     cnt = 1;
0722     if (cnt == maxblocks)
0723         goto end;
0724 
0725     maxlevel = nilfs_btree_height(btree) - 1;
0726     node = nilfs_btree_get_node(btree, path, level, &ncmax);
0727     index = path[level].bp_index + 1;
0728     for (;;) {
0729         while (index < nilfs_btree_node_get_nchildren(node)) {
0730             if (nilfs_btree_node_get_key(node, index) !=
0731                 key + cnt)
0732                 goto end;
0733             ptr2 = nilfs_btree_node_get_ptr(node, index, ncmax);
0734             if (dat) {
0735                 ret = nilfs_dat_translate(dat, ptr2, &blocknr);
0736                 if (ret < 0)
0737                     goto out;
0738                 ptr2 = blocknr;
0739             }
0740             if (ptr2 != ptr + cnt || ++cnt == maxblocks)
0741                 goto end;
0742             index++;
0743         }
0744         if (level == maxlevel)
0745             break;
0746 
0747         /* look-up right sibling node */
0748         p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax);
0749         p.index = path[level + 1].bp_index + 1;
0750         p.max_ra_blocks = 7;
0751         if (p.index >= nilfs_btree_node_get_nchildren(p.node) ||
0752             nilfs_btree_node_get_key(p.node, p.index) != key + cnt)
0753             break;
0754         ptr2 = nilfs_btree_node_get_ptr(p.node, p.index, p.ncmax);
0755         path[level + 1].bp_index = p.index;
0756 
0757         brelse(path[level].bp_bh);
0758         path[level].bp_bh = NULL;
0759 
0760         ret = __nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh,
0761                           &p);
0762         if (ret < 0)
0763             goto out;
0764         node = nilfs_btree_get_nonroot_node(path, level);
0765         ncmax = nilfs_btree_nchildren_per_block(btree);
0766         index = 0;
0767         path[level].bp_index = index;
0768     }
0769  end:
0770     *ptrp = ptr;
0771     ret = cnt;
0772  out:
0773     nilfs_btree_free_path(path);
0774     return ret;
0775 }
0776 
0777 static void nilfs_btree_promote_key(struct nilfs_bmap *btree,
0778                     struct nilfs_btree_path *path,
0779                     int level, __u64 key)
0780 {
0781     if (level < nilfs_btree_height(btree) - 1) {
0782         do {
0783             nilfs_btree_node_set_key(
0784                 nilfs_btree_get_nonroot_node(path, level),
0785                 path[level].bp_index, key);
0786             if (!buffer_dirty(path[level].bp_bh))
0787                 mark_buffer_dirty(path[level].bp_bh);
0788         } while ((path[level].bp_index == 0) &&
0789              (++level < nilfs_btree_height(btree) - 1));
0790     }
0791 
0792     /* root */
0793     if (level == nilfs_btree_height(btree) - 1) {
0794         nilfs_btree_node_set_key(nilfs_btree_get_root(btree),
0795                      path[level].bp_index, key);
0796     }
0797 }
0798 
0799 static void nilfs_btree_do_insert(struct nilfs_bmap *btree,
0800                   struct nilfs_btree_path *path,
0801                   int level, __u64 *keyp, __u64 *ptrp)
0802 {
0803     struct nilfs_btree_node *node;
0804     int ncblk;
0805 
0806     if (level < nilfs_btree_height(btree) - 1) {
0807         node = nilfs_btree_get_nonroot_node(path, level);
0808         ncblk = nilfs_btree_nchildren_per_block(btree);
0809         nilfs_btree_node_insert(node, path[level].bp_index,
0810                     *keyp, *ptrp, ncblk);
0811         if (!buffer_dirty(path[level].bp_bh))
0812             mark_buffer_dirty(path[level].bp_bh);
0813 
0814         if (path[level].bp_index == 0)
0815             nilfs_btree_promote_key(btree, path, level + 1,
0816                         nilfs_btree_node_get_key(node,
0817                                      0));
0818     } else {
0819         node = nilfs_btree_get_root(btree);
0820         nilfs_btree_node_insert(node, path[level].bp_index,
0821                     *keyp, *ptrp,
0822                     NILFS_BTREE_ROOT_NCHILDREN_MAX);
0823     }
0824 }
0825 
0826 static void nilfs_btree_carry_left(struct nilfs_bmap *btree,
0827                    struct nilfs_btree_path *path,
0828                    int level, __u64 *keyp, __u64 *ptrp)
0829 {
0830     struct nilfs_btree_node *node, *left;
0831     int nchildren, lnchildren, n, move, ncblk;
0832 
0833     node = nilfs_btree_get_nonroot_node(path, level);
0834     left = nilfs_btree_get_sib_node(path, level);
0835     nchildren = nilfs_btree_node_get_nchildren(node);
0836     lnchildren = nilfs_btree_node_get_nchildren(left);
0837     ncblk = nilfs_btree_nchildren_per_block(btree);
0838     move = 0;
0839 
0840     n = (nchildren + lnchildren + 1) / 2 - lnchildren;
0841     if (n > path[level].bp_index) {
0842         /* move insert point */
0843         n--;
0844         move = 1;
0845     }
0846 
0847     nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
0848 
0849     if (!buffer_dirty(path[level].bp_bh))
0850         mark_buffer_dirty(path[level].bp_bh);
0851     if (!buffer_dirty(path[level].bp_sib_bh))
0852         mark_buffer_dirty(path[level].bp_sib_bh);
0853 
0854     nilfs_btree_promote_key(btree, path, level + 1,
0855                 nilfs_btree_node_get_key(node, 0));
0856 
0857     if (move) {
0858         brelse(path[level].bp_bh);
0859         path[level].bp_bh = path[level].bp_sib_bh;
0860         path[level].bp_sib_bh = NULL;
0861         path[level].bp_index += lnchildren;
0862         path[level + 1].bp_index--;
0863     } else {
0864         brelse(path[level].bp_sib_bh);
0865         path[level].bp_sib_bh = NULL;
0866         path[level].bp_index -= n;
0867     }
0868 
0869     nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
0870 }
0871 
0872 static void nilfs_btree_carry_right(struct nilfs_bmap *btree,
0873                     struct nilfs_btree_path *path,
0874                     int level, __u64 *keyp, __u64 *ptrp)
0875 {
0876     struct nilfs_btree_node *node, *right;
0877     int nchildren, rnchildren, n, move, ncblk;
0878 
0879     node = nilfs_btree_get_nonroot_node(path, level);
0880     right = nilfs_btree_get_sib_node(path, level);
0881     nchildren = nilfs_btree_node_get_nchildren(node);
0882     rnchildren = nilfs_btree_node_get_nchildren(right);
0883     ncblk = nilfs_btree_nchildren_per_block(btree);
0884     move = 0;
0885 
0886     n = (nchildren + rnchildren + 1) / 2 - rnchildren;
0887     if (n > nchildren - path[level].bp_index) {
0888         /* move insert point */
0889         n--;
0890         move = 1;
0891     }
0892 
0893     nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
0894 
0895     if (!buffer_dirty(path[level].bp_bh))
0896         mark_buffer_dirty(path[level].bp_bh);
0897     if (!buffer_dirty(path[level].bp_sib_bh))
0898         mark_buffer_dirty(path[level].bp_sib_bh);
0899 
0900     path[level + 1].bp_index++;
0901     nilfs_btree_promote_key(btree, path, level + 1,
0902                 nilfs_btree_node_get_key(right, 0));
0903     path[level + 1].bp_index--;
0904 
0905     if (move) {
0906         brelse(path[level].bp_bh);
0907         path[level].bp_bh = path[level].bp_sib_bh;
0908         path[level].bp_sib_bh = NULL;
0909         path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
0910         path[level + 1].bp_index++;
0911     } else {
0912         brelse(path[level].bp_sib_bh);
0913         path[level].bp_sib_bh = NULL;
0914     }
0915 
0916     nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
0917 }
0918 
0919 static void nilfs_btree_split(struct nilfs_bmap *btree,
0920                   struct nilfs_btree_path *path,
0921                   int level, __u64 *keyp, __u64 *ptrp)
0922 {
0923     struct nilfs_btree_node *node, *right;
0924     int nchildren, n, move, ncblk;
0925 
0926     node = nilfs_btree_get_nonroot_node(path, level);
0927     right = nilfs_btree_get_sib_node(path, level);
0928     nchildren = nilfs_btree_node_get_nchildren(node);
0929     ncblk = nilfs_btree_nchildren_per_block(btree);
0930     move = 0;
0931 
0932     n = (nchildren + 1) / 2;
0933     if (n > nchildren - path[level].bp_index) {
0934         n--;
0935         move = 1;
0936     }
0937 
0938     nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
0939 
0940     if (!buffer_dirty(path[level].bp_bh))
0941         mark_buffer_dirty(path[level].bp_bh);
0942     if (!buffer_dirty(path[level].bp_sib_bh))
0943         mark_buffer_dirty(path[level].bp_sib_bh);
0944 
0945     if (move) {
0946         path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
0947         nilfs_btree_node_insert(right, path[level].bp_index,
0948                     *keyp, *ptrp, ncblk);
0949 
0950         *keyp = nilfs_btree_node_get_key(right, 0);
0951         *ptrp = path[level].bp_newreq.bpr_ptr;
0952 
0953         brelse(path[level].bp_bh);
0954         path[level].bp_bh = path[level].bp_sib_bh;
0955         path[level].bp_sib_bh = NULL;
0956     } else {
0957         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
0958 
0959         *keyp = nilfs_btree_node_get_key(right, 0);
0960         *ptrp = path[level].bp_newreq.bpr_ptr;
0961 
0962         brelse(path[level].bp_sib_bh);
0963         path[level].bp_sib_bh = NULL;
0964     }
0965 
0966     path[level + 1].bp_index++;
0967 }
0968 
0969 static void nilfs_btree_grow(struct nilfs_bmap *btree,
0970                  struct nilfs_btree_path *path,
0971                  int level, __u64 *keyp, __u64 *ptrp)
0972 {
0973     struct nilfs_btree_node *root, *child;
0974     int n, ncblk;
0975 
0976     root = nilfs_btree_get_root(btree);
0977     child = nilfs_btree_get_sib_node(path, level);
0978     ncblk = nilfs_btree_nchildren_per_block(btree);
0979 
0980     n = nilfs_btree_node_get_nchildren(root);
0981 
0982     nilfs_btree_node_move_right(root, child, n,
0983                     NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
0984     nilfs_btree_node_set_level(root, level + 1);
0985 
0986     if (!buffer_dirty(path[level].bp_sib_bh))
0987         mark_buffer_dirty(path[level].bp_sib_bh);
0988 
0989     path[level].bp_bh = path[level].bp_sib_bh;
0990     path[level].bp_sib_bh = NULL;
0991 
0992     nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
0993 
0994     *keyp = nilfs_btree_node_get_key(child, 0);
0995     *ptrp = path[level].bp_newreq.bpr_ptr;
0996 }
0997 
0998 static __u64 nilfs_btree_find_near(const struct nilfs_bmap *btree,
0999                    const struct nilfs_btree_path *path)
1000 {
1001     struct nilfs_btree_node *node;
1002     int level, ncmax;
1003 
1004     if (path == NULL)
1005         return NILFS_BMAP_INVALID_PTR;
1006 
1007     /* left sibling */
1008     level = NILFS_BTREE_LEVEL_NODE_MIN;
1009     if (path[level].bp_index > 0) {
1010         node = nilfs_btree_get_node(btree, path, level, &ncmax);
1011         return nilfs_btree_node_get_ptr(node,
1012                         path[level].bp_index - 1,
1013                         ncmax);
1014     }
1015 
1016     /* parent */
1017     level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
1018     if (level <= nilfs_btree_height(btree) - 1) {
1019         node = nilfs_btree_get_node(btree, path, level, &ncmax);
1020         return nilfs_btree_node_get_ptr(node, path[level].bp_index,
1021                         ncmax);
1022     }
1023 
1024     return NILFS_BMAP_INVALID_PTR;
1025 }
1026 
1027 static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree,
1028                        const struct nilfs_btree_path *path,
1029                        __u64 key)
1030 {
1031     __u64 ptr;
1032 
1033     ptr = nilfs_bmap_find_target_seq(btree, key);
1034     if (ptr != NILFS_BMAP_INVALID_PTR)
1035         /* sequential access */
1036         return ptr;
1037 
1038     ptr = nilfs_btree_find_near(btree, path);
1039     if (ptr != NILFS_BMAP_INVALID_PTR)
1040         /* near */
1041         return ptr;
1042 
1043     /* block group */
1044     return nilfs_bmap_find_target_in_group(btree);
1045 }
1046 
1047 static int nilfs_btree_prepare_insert(struct nilfs_bmap *btree,
1048                       struct nilfs_btree_path *path,
1049                       int *levelp, __u64 key, __u64 ptr,
1050                       struct nilfs_bmap_stats *stats)
1051 {
1052     struct buffer_head *bh;
1053     struct nilfs_btree_node *node, *parent, *sib;
1054     __u64 sibptr;
1055     int pindex, level, ncmax, ncblk, ret;
1056     struct inode *dat = NULL;
1057 
1058     stats->bs_nblocks = 0;
1059     level = NILFS_BTREE_LEVEL_DATA;
1060 
1061     /* allocate a new ptr for data block */
1062     if (NILFS_BMAP_USE_VBN(btree)) {
1063         path[level].bp_newreq.bpr_ptr =
1064             nilfs_btree_find_target_v(btree, path, key);
1065         dat = nilfs_bmap_get_dat(btree);
1066     }
1067 
1068     ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1069     if (ret < 0)
1070         goto err_out_data;
1071 
1072     ncblk = nilfs_btree_nchildren_per_block(btree);
1073 
1074     for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1075          level < nilfs_btree_height(btree) - 1;
1076          level++) {
1077         node = nilfs_btree_get_nonroot_node(path, level);
1078         if (nilfs_btree_node_get_nchildren(node) < ncblk) {
1079             path[level].bp_op = nilfs_btree_do_insert;
1080             stats->bs_nblocks++;
1081             goto out;
1082         }
1083 
1084         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1085         pindex = path[level + 1].bp_index;
1086 
1087         /* left sibling */
1088         if (pindex > 0) {
1089             sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1090                               ncmax);
1091             ret = nilfs_btree_get_block(btree, sibptr, &bh);
1092             if (ret < 0)
1093                 goto err_out_child_node;
1094             sib = (struct nilfs_btree_node *)bh->b_data;
1095             if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1096                 path[level].bp_sib_bh = bh;
1097                 path[level].bp_op = nilfs_btree_carry_left;
1098                 stats->bs_nblocks++;
1099                 goto out;
1100             } else {
1101                 brelse(bh);
1102             }
1103         }
1104 
1105         /* right sibling */
1106         if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) {
1107             sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1108                               ncmax);
1109             ret = nilfs_btree_get_block(btree, sibptr, &bh);
1110             if (ret < 0)
1111                 goto err_out_child_node;
1112             sib = (struct nilfs_btree_node *)bh->b_data;
1113             if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1114                 path[level].bp_sib_bh = bh;
1115                 path[level].bp_op = nilfs_btree_carry_right;
1116                 stats->bs_nblocks++;
1117                 goto out;
1118             } else {
1119                 brelse(bh);
1120             }
1121         }
1122 
1123         /* split */
1124         path[level].bp_newreq.bpr_ptr =
1125             path[level - 1].bp_newreq.bpr_ptr + 1;
1126         ret = nilfs_bmap_prepare_alloc_ptr(btree,
1127                            &path[level].bp_newreq, dat);
1128         if (ret < 0)
1129             goto err_out_child_node;
1130         ret = nilfs_btree_get_new_block(btree,
1131                         path[level].bp_newreq.bpr_ptr,
1132                         &bh);
1133         if (ret < 0)
1134             goto err_out_curr_node;
1135 
1136         stats->bs_nblocks++;
1137 
1138         sib = (struct nilfs_btree_node *)bh->b_data;
1139         nilfs_btree_node_init(sib, 0, level, 0, ncblk, NULL, NULL);
1140         path[level].bp_sib_bh = bh;
1141         path[level].bp_op = nilfs_btree_split;
1142     }
1143 
1144     /* root */
1145     node = nilfs_btree_get_root(btree);
1146     if (nilfs_btree_node_get_nchildren(node) <
1147         NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1148         path[level].bp_op = nilfs_btree_do_insert;
1149         stats->bs_nblocks++;
1150         goto out;
1151     }
1152 
1153     /* grow */
1154     path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1155     ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1156     if (ret < 0)
1157         goto err_out_child_node;
1158     ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
1159                     &bh);
1160     if (ret < 0)
1161         goto err_out_curr_node;
1162 
1163     nilfs_btree_node_init((struct nilfs_btree_node *)bh->b_data,
1164                   0, level, 0, ncblk, NULL, NULL);
1165     path[level].bp_sib_bh = bh;
1166     path[level].bp_op = nilfs_btree_grow;
1167 
1168     level++;
1169     path[level].bp_op = nilfs_btree_do_insert;
1170 
1171     /* a newly-created node block and a data block are added */
1172     stats->bs_nblocks += 2;
1173 
1174     /* success */
1175  out:
1176     *levelp = level;
1177     return ret;
1178 
1179     /* error */
1180  err_out_curr_node:
1181     nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1182  err_out_child_node:
1183     for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1184         nilfs_btnode_delete(path[level].bp_sib_bh);
1185         nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1186 
1187     }
1188 
1189     nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1190  err_out_data:
1191     *levelp = level;
1192     stats->bs_nblocks = 0;
1193     return ret;
1194 }
1195 
1196 static void nilfs_btree_commit_insert(struct nilfs_bmap *btree,
1197                       struct nilfs_btree_path *path,
1198                       int maxlevel, __u64 key, __u64 ptr)
1199 {
1200     struct inode *dat = NULL;
1201     int level;
1202 
1203     set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1204     ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1205     if (NILFS_BMAP_USE_VBN(btree)) {
1206         nilfs_bmap_set_target_v(btree, key, ptr);
1207         dat = nilfs_bmap_get_dat(btree);
1208     }
1209 
1210     for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1211         nilfs_bmap_commit_alloc_ptr(btree,
1212                         &path[level - 1].bp_newreq, dat);
1213         path[level].bp_op(btree, path, level, &key, &ptr);
1214     }
1215 
1216     if (!nilfs_bmap_dirty(btree))
1217         nilfs_bmap_set_dirty(btree);
1218 }
1219 
1220 static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr)
1221 {
1222     struct nilfs_btree_path *path;
1223     struct nilfs_bmap_stats stats;
1224     int level, ret;
1225 
1226     path = nilfs_btree_alloc_path();
1227     if (path == NULL)
1228         return -ENOMEM;
1229 
1230     ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1231                     NILFS_BTREE_LEVEL_NODE_MIN, 0);
1232     if (ret != -ENOENT) {
1233         if (ret == 0)
1234             ret = -EEXIST;
1235         goto out;
1236     }
1237 
1238     ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1239     if (ret < 0)
1240         goto out;
1241     nilfs_btree_commit_insert(btree, path, level, key, ptr);
1242     nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1243 
1244  out:
1245     nilfs_btree_free_path(path);
1246     return ret;
1247 }
1248 
1249 static void nilfs_btree_do_delete(struct nilfs_bmap *btree,
1250                   struct nilfs_btree_path *path,
1251                   int level, __u64 *keyp, __u64 *ptrp)
1252 {
1253     struct nilfs_btree_node *node;
1254     int ncblk;
1255 
1256     if (level < nilfs_btree_height(btree) - 1) {
1257         node = nilfs_btree_get_nonroot_node(path, level);
1258         ncblk = nilfs_btree_nchildren_per_block(btree);
1259         nilfs_btree_node_delete(node, path[level].bp_index,
1260                     keyp, ptrp, ncblk);
1261         if (!buffer_dirty(path[level].bp_bh))
1262             mark_buffer_dirty(path[level].bp_bh);
1263         if (path[level].bp_index == 0)
1264             nilfs_btree_promote_key(btree, path, level + 1,
1265                 nilfs_btree_node_get_key(node, 0));
1266     } else {
1267         node = nilfs_btree_get_root(btree);
1268         nilfs_btree_node_delete(node, path[level].bp_index,
1269                     keyp, ptrp,
1270                     NILFS_BTREE_ROOT_NCHILDREN_MAX);
1271     }
1272 }
1273 
1274 static void nilfs_btree_borrow_left(struct nilfs_bmap *btree,
1275                     struct nilfs_btree_path *path,
1276                     int level, __u64 *keyp, __u64 *ptrp)
1277 {
1278     struct nilfs_btree_node *node, *left;
1279     int nchildren, lnchildren, n, ncblk;
1280 
1281     nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1282 
1283     node = nilfs_btree_get_nonroot_node(path, level);
1284     left = nilfs_btree_get_sib_node(path, level);
1285     nchildren = nilfs_btree_node_get_nchildren(node);
1286     lnchildren = nilfs_btree_node_get_nchildren(left);
1287     ncblk = nilfs_btree_nchildren_per_block(btree);
1288 
1289     n = (nchildren + lnchildren) / 2 - nchildren;
1290 
1291     nilfs_btree_node_move_right(left, node, n, ncblk, ncblk);
1292 
1293     if (!buffer_dirty(path[level].bp_bh))
1294         mark_buffer_dirty(path[level].bp_bh);
1295     if (!buffer_dirty(path[level].bp_sib_bh))
1296         mark_buffer_dirty(path[level].bp_sib_bh);
1297 
1298     nilfs_btree_promote_key(btree, path, level + 1,
1299                 nilfs_btree_node_get_key(node, 0));
1300 
1301     brelse(path[level].bp_sib_bh);
1302     path[level].bp_sib_bh = NULL;
1303     path[level].bp_index += n;
1304 }
1305 
1306 static void nilfs_btree_borrow_right(struct nilfs_bmap *btree,
1307                      struct nilfs_btree_path *path,
1308                      int level, __u64 *keyp, __u64 *ptrp)
1309 {
1310     struct nilfs_btree_node *node, *right;
1311     int nchildren, rnchildren, n, ncblk;
1312 
1313     nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1314 
1315     node = nilfs_btree_get_nonroot_node(path, level);
1316     right = nilfs_btree_get_sib_node(path, level);
1317     nchildren = nilfs_btree_node_get_nchildren(node);
1318     rnchildren = nilfs_btree_node_get_nchildren(right);
1319     ncblk = nilfs_btree_nchildren_per_block(btree);
1320 
1321     n = (nchildren + rnchildren) / 2 - nchildren;
1322 
1323     nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1324 
1325     if (!buffer_dirty(path[level].bp_bh))
1326         mark_buffer_dirty(path[level].bp_bh);
1327     if (!buffer_dirty(path[level].bp_sib_bh))
1328         mark_buffer_dirty(path[level].bp_sib_bh);
1329 
1330     path[level + 1].bp_index++;
1331     nilfs_btree_promote_key(btree, path, level + 1,
1332                 nilfs_btree_node_get_key(right, 0));
1333     path[level + 1].bp_index--;
1334 
1335     brelse(path[level].bp_sib_bh);
1336     path[level].bp_sib_bh = NULL;
1337 }
1338 
1339 static void nilfs_btree_concat_left(struct nilfs_bmap *btree,
1340                     struct nilfs_btree_path *path,
1341                     int level, __u64 *keyp, __u64 *ptrp)
1342 {
1343     struct nilfs_btree_node *node, *left;
1344     int n, ncblk;
1345 
1346     nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1347 
1348     node = nilfs_btree_get_nonroot_node(path, level);
1349     left = nilfs_btree_get_sib_node(path, level);
1350     ncblk = nilfs_btree_nchildren_per_block(btree);
1351 
1352     n = nilfs_btree_node_get_nchildren(node);
1353 
1354     nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
1355 
1356     if (!buffer_dirty(path[level].bp_sib_bh))
1357         mark_buffer_dirty(path[level].bp_sib_bh);
1358 
1359     nilfs_btnode_delete(path[level].bp_bh);
1360     path[level].bp_bh = path[level].bp_sib_bh;
1361     path[level].bp_sib_bh = NULL;
1362     path[level].bp_index += nilfs_btree_node_get_nchildren(left);
1363 }
1364 
1365 static void nilfs_btree_concat_right(struct nilfs_bmap *btree,
1366                      struct nilfs_btree_path *path,
1367                      int level, __u64 *keyp, __u64 *ptrp)
1368 {
1369     struct nilfs_btree_node *node, *right;
1370     int n, ncblk;
1371 
1372     nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1373 
1374     node = nilfs_btree_get_nonroot_node(path, level);
1375     right = nilfs_btree_get_sib_node(path, level);
1376     ncblk = nilfs_btree_nchildren_per_block(btree);
1377 
1378     n = nilfs_btree_node_get_nchildren(right);
1379 
1380     nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1381 
1382     if (!buffer_dirty(path[level].bp_bh))
1383         mark_buffer_dirty(path[level].bp_bh);
1384 
1385     nilfs_btnode_delete(path[level].bp_sib_bh);
1386     path[level].bp_sib_bh = NULL;
1387     path[level + 1].bp_index++;
1388 }
1389 
1390 static void nilfs_btree_shrink(struct nilfs_bmap *btree,
1391                    struct nilfs_btree_path *path,
1392                    int level, __u64 *keyp, __u64 *ptrp)
1393 {
1394     struct nilfs_btree_node *root, *child;
1395     int n, ncblk;
1396 
1397     nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1398 
1399     root = nilfs_btree_get_root(btree);
1400     child = nilfs_btree_get_nonroot_node(path, level);
1401     ncblk = nilfs_btree_nchildren_per_block(btree);
1402 
1403     nilfs_btree_node_delete(root, 0, NULL, NULL,
1404                 NILFS_BTREE_ROOT_NCHILDREN_MAX);
1405     nilfs_btree_node_set_level(root, level);
1406     n = nilfs_btree_node_get_nchildren(child);
1407     nilfs_btree_node_move_left(root, child, n,
1408                    NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
1409 
1410     nilfs_btnode_delete(path[level].bp_bh);
1411     path[level].bp_bh = NULL;
1412 }
1413 
1414 static void nilfs_btree_nop(struct nilfs_bmap *btree,
1415                 struct nilfs_btree_path *path,
1416                 int level, __u64 *keyp, __u64 *ptrp)
1417 {
1418 }
1419 
1420 static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
1421                       struct nilfs_btree_path *path,
1422                       int *levelp,
1423                       struct nilfs_bmap_stats *stats,
1424                       struct inode *dat)
1425 {
1426     struct buffer_head *bh;
1427     struct nilfs_btree_node *node, *parent, *sib;
1428     __u64 sibptr;
1429     int pindex, dindex, level, ncmin, ncmax, ncblk, ret;
1430 
1431     ret = 0;
1432     stats->bs_nblocks = 0;
1433     ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
1434     ncblk = nilfs_btree_nchildren_per_block(btree);
1435 
1436     for (level = NILFS_BTREE_LEVEL_NODE_MIN, dindex = path[level].bp_index;
1437          level < nilfs_btree_height(btree) - 1;
1438          level++) {
1439         node = nilfs_btree_get_nonroot_node(path, level);
1440         path[level].bp_oldreq.bpr_ptr =
1441             nilfs_btree_node_get_ptr(node, dindex, ncblk);
1442         ret = nilfs_bmap_prepare_end_ptr(btree,
1443                          &path[level].bp_oldreq, dat);
1444         if (ret < 0)
1445             goto err_out_child_node;
1446 
1447         if (nilfs_btree_node_get_nchildren(node) > ncmin) {
1448             path[level].bp_op = nilfs_btree_do_delete;
1449             stats->bs_nblocks++;
1450             goto out;
1451         }
1452 
1453         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1454         pindex = path[level + 1].bp_index;
1455         dindex = pindex;
1456 
1457         if (pindex > 0) {
1458             /* left sibling */
1459             sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1460                               ncmax);
1461             ret = nilfs_btree_get_block(btree, sibptr, &bh);
1462             if (ret < 0)
1463                 goto err_out_curr_node;
1464             sib = (struct nilfs_btree_node *)bh->b_data;
1465             if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1466                 path[level].bp_sib_bh = bh;
1467                 path[level].bp_op = nilfs_btree_borrow_left;
1468                 stats->bs_nblocks++;
1469                 goto out;
1470             } else {
1471                 path[level].bp_sib_bh = bh;
1472                 path[level].bp_op = nilfs_btree_concat_left;
1473                 stats->bs_nblocks++;
1474                 /* continue; */
1475             }
1476         } else if (pindex <
1477                nilfs_btree_node_get_nchildren(parent) - 1) {
1478             /* right sibling */
1479             sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1480                               ncmax);
1481             ret = nilfs_btree_get_block(btree, sibptr, &bh);
1482             if (ret < 0)
1483                 goto err_out_curr_node;
1484             sib = (struct nilfs_btree_node *)bh->b_data;
1485             if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1486                 path[level].bp_sib_bh = bh;
1487                 path[level].bp_op = nilfs_btree_borrow_right;
1488                 stats->bs_nblocks++;
1489                 goto out;
1490             } else {
1491                 path[level].bp_sib_bh = bh;
1492                 path[level].bp_op = nilfs_btree_concat_right;
1493                 stats->bs_nblocks++;
1494                 /*
1495                  * When merging right sibling node
1496                  * into the current node, pointer to
1497                  * the right sibling node must be
1498                  * terminated instead.  The adjustment
1499                  * below is required for that.
1500                  */
1501                 dindex = pindex + 1;
1502                 /* continue; */
1503             }
1504         } else {
1505             /* no siblings */
1506             /* the only child of the root node */
1507             WARN_ON(level != nilfs_btree_height(btree) - 2);
1508             if (nilfs_btree_node_get_nchildren(node) - 1 <=
1509                 NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1510                 path[level].bp_op = nilfs_btree_shrink;
1511                 stats->bs_nblocks += 2;
1512                 level++;
1513                 path[level].bp_op = nilfs_btree_nop;
1514                 goto shrink_root_child;
1515             } else {
1516                 path[level].bp_op = nilfs_btree_do_delete;
1517                 stats->bs_nblocks++;
1518                 goto out;
1519             }
1520         }
1521     }
1522 
1523     /* child of the root node is deleted */
1524     path[level].bp_op = nilfs_btree_do_delete;
1525     stats->bs_nblocks++;
1526 
1527 shrink_root_child:
1528     node = nilfs_btree_get_root(btree);
1529     path[level].bp_oldreq.bpr_ptr =
1530         nilfs_btree_node_get_ptr(node, dindex,
1531                      NILFS_BTREE_ROOT_NCHILDREN_MAX);
1532 
1533     ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat);
1534     if (ret < 0)
1535         goto err_out_child_node;
1536 
1537     /* success */
1538  out:
1539     *levelp = level;
1540     return ret;
1541 
1542     /* error */
1543  err_out_curr_node:
1544     nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1545  err_out_child_node:
1546     for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1547         brelse(path[level].bp_sib_bh);
1548         nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1549     }
1550     *levelp = level;
1551     stats->bs_nblocks = 0;
1552     return ret;
1553 }
1554 
1555 static void nilfs_btree_commit_delete(struct nilfs_bmap *btree,
1556                       struct nilfs_btree_path *path,
1557                       int maxlevel, struct inode *dat)
1558 {
1559     int level;
1560 
1561     for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1562         nilfs_bmap_commit_end_ptr(btree, &path[level].bp_oldreq, dat);
1563         path[level].bp_op(btree, path, level, NULL, NULL);
1564     }
1565 
1566     if (!nilfs_bmap_dirty(btree))
1567         nilfs_bmap_set_dirty(btree);
1568 }
1569 
1570 static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key)
1571 
1572 {
1573     struct nilfs_btree_path *path;
1574     struct nilfs_bmap_stats stats;
1575     struct inode *dat;
1576     int level, ret;
1577 
1578     path = nilfs_btree_alloc_path();
1579     if (path == NULL)
1580         return -ENOMEM;
1581 
1582     ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1583                     NILFS_BTREE_LEVEL_NODE_MIN, 0);
1584     if (ret < 0)
1585         goto out;
1586 
1587 
1588     dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1589 
1590     ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat);
1591     if (ret < 0)
1592         goto out;
1593     nilfs_btree_commit_delete(btree, path, level, dat);
1594     nilfs_inode_sub_blocks(btree->b_inode, stats.bs_nblocks);
1595 
1596 out:
1597     nilfs_btree_free_path(path);
1598     return ret;
1599 }
1600 
1601 static int nilfs_btree_seek_key(const struct nilfs_bmap *btree, __u64 start,
1602                 __u64 *keyp)
1603 {
1604     struct nilfs_btree_path *path;
1605     const int minlevel = NILFS_BTREE_LEVEL_NODE_MIN;
1606     int ret;
1607 
1608     path = nilfs_btree_alloc_path();
1609     if (!path)
1610         return -ENOMEM;
1611 
1612     ret = nilfs_btree_do_lookup(btree, path, start, NULL, minlevel, 0);
1613     if (!ret)
1614         *keyp = start;
1615     else if (ret == -ENOENT)
1616         ret = nilfs_btree_get_next_key(btree, path, minlevel, keyp);
1617 
1618     nilfs_btree_free_path(path);
1619     return ret;
1620 }
1621 
1622 static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp)
1623 {
1624     struct nilfs_btree_path *path;
1625     int ret;
1626 
1627     path = nilfs_btree_alloc_path();
1628     if (path == NULL)
1629         return -ENOMEM;
1630 
1631     ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1632 
1633     nilfs_btree_free_path(path);
1634 
1635     return ret;
1636 }
1637 
1638 static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key)
1639 {
1640     struct buffer_head *bh;
1641     struct nilfs_btree_node *root, *node;
1642     __u64 maxkey, nextmaxkey;
1643     __u64 ptr;
1644     int nchildren, ret;
1645 
1646     root = nilfs_btree_get_root(btree);
1647     switch (nilfs_btree_height(btree)) {
1648     case 2:
1649         bh = NULL;
1650         node = root;
1651         break;
1652     case 3:
1653         nchildren = nilfs_btree_node_get_nchildren(root);
1654         if (nchildren > 1)
1655             return 0;
1656         ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1657                            NILFS_BTREE_ROOT_NCHILDREN_MAX);
1658         ret = nilfs_btree_get_block(btree, ptr, &bh);
1659         if (ret < 0)
1660             return ret;
1661         node = (struct nilfs_btree_node *)bh->b_data;
1662         break;
1663     default:
1664         return 0;
1665     }
1666 
1667     nchildren = nilfs_btree_node_get_nchildren(node);
1668     maxkey = nilfs_btree_node_get_key(node, nchildren - 1);
1669     nextmaxkey = (nchildren > 1) ?
1670         nilfs_btree_node_get_key(node, nchildren - 2) : 0;
1671     if (bh != NULL)
1672         brelse(bh);
1673 
1674     return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
1675 }
1676 
1677 static int nilfs_btree_gather_data(struct nilfs_bmap *btree,
1678                    __u64 *keys, __u64 *ptrs, int nitems)
1679 {
1680     struct buffer_head *bh;
1681     struct nilfs_btree_node *node, *root;
1682     __le64 *dkeys;
1683     __le64 *dptrs;
1684     __u64 ptr;
1685     int nchildren, ncmax, i, ret;
1686 
1687     root = nilfs_btree_get_root(btree);
1688     switch (nilfs_btree_height(btree)) {
1689     case 2:
1690         bh = NULL;
1691         node = root;
1692         ncmax = NILFS_BTREE_ROOT_NCHILDREN_MAX;
1693         break;
1694     case 3:
1695         nchildren = nilfs_btree_node_get_nchildren(root);
1696         WARN_ON(nchildren > 1);
1697         ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1698                            NILFS_BTREE_ROOT_NCHILDREN_MAX);
1699         ret = nilfs_btree_get_block(btree, ptr, &bh);
1700         if (ret < 0)
1701             return ret;
1702         node = (struct nilfs_btree_node *)bh->b_data;
1703         ncmax = nilfs_btree_nchildren_per_block(btree);
1704         break;
1705     default:
1706         node = NULL;
1707         return -EINVAL;
1708     }
1709 
1710     nchildren = nilfs_btree_node_get_nchildren(node);
1711     if (nchildren < nitems)
1712         nitems = nchildren;
1713     dkeys = nilfs_btree_node_dkeys(node);
1714     dptrs = nilfs_btree_node_dptrs(node, ncmax);
1715     for (i = 0; i < nitems; i++) {
1716         keys[i] = le64_to_cpu(dkeys[i]);
1717         ptrs[i] = le64_to_cpu(dptrs[i]);
1718     }
1719 
1720     if (bh != NULL)
1721         brelse(bh);
1722 
1723     return nitems;
1724 }
1725 
1726 static int
1727 nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key,
1728                        union nilfs_bmap_ptr_req *dreq,
1729                        union nilfs_bmap_ptr_req *nreq,
1730                        struct buffer_head **bhp,
1731                        struct nilfs_bmap_stats *stats)
1732 {
1733     struct buffer_head *bh;
1734     struct inode *dat = NULL;
1735     int ret;
1736 
1737     stats->bs_nblocks = 0;
1738 
1739     /* for data */
1740     /* cannot find near ptr */
1741     if (NILFS_BMAP_USE_VBN(btree)) {
1742         dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
1743         dat = nilfs_bmap_get_dat(btree);
1744     }
1745 
1746     ret = nilfs_attach_btree_node_cache(&NILFS_BMAP_I(btree)->vfs_inode);
1747     if (ret < 0)
1748         return ret;
1749 
1750     ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat);
1751     if (ret < 0)
1752         return ret;
1753 
1754     *bhp = NULL;
1755     stats->bs_nblocks++;
1756     if (nreq != NULL) {
1757         nreq->bpr_ptr = dreq->bpr_ptr + 1;
1758         ret = nilfs_bmap_prepare_alloc_ptr(btree, nreq, dat);
1759         if (ret < 0)
1760             goto err_out_dreq;
1761 
1762         ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
1763         if (ret < 0)
1764             goto err_out_nreq;
1765 
1766         *bhp = bh;
1767         stats->bs_nblocks++;
1768     }
1769 
1770     /* success */
1771     return 0;
1772 
1773     /* error */
1774  err_out_nreq:
1775     nilfs_bmap_abort_alloc_ptr(btree, nreq, dat);
1776  err_out_dreq:
1777     nilfs_bmap_abort_alloc_ptr(btree, dreq, dat);
1778     stats->bs_nblocks = 0;
1779     return ret;
1780 
1781 }
1782 
1783 static void
1784 nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree,
1785                       __u64 key, __u64 ptr,
1786                       const __u64 *keys, const __u64 *ptrs,
1787                       int n,
1788                       union nilfs_bmap_ptr_req *dreq,
1789                       union nilfs_bmap_ptr_req *nreq,
1790                       struct buffer_head *bh)
1791 {
1792     struct nilfs_btree_node *node;
1793     struct inode *dat;
1794     __u64 tmpptr;
1795     int ncblk;
1796 
1797     /* free resources */
1798     if (btree->b_ops->bop_clear != NULL)
1799         btree->b_ops->bop_clear(btree);
1800 
1801     /* ptr must be a pointer to a buffer head. */
1802     set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1803 
1804     /* convert and insert */
1805     dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1806     __nilfs_btree_init(btree);
1807     if (nreq != NULL) {
1808         nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1809         nilfs_bmap_commit_alloc_ptr(btree, nreq, dat);
1810 
1811         /* create child node at level 1 */
1812         node = (struct nilfs_btree_node *)bh->b_data;
1813         ncblk = nilfs_btree_nchildren_per_block(btree);
1814         nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs);
1815         nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk);
1816         if (!buffer_dirty(bh))
1817             mark_buffer_dirty(bh);
1818         if (!nilfs_bmap_dirty(btree))
1819             nilfs_bmap_set_dirty(btree);
1820 
1821         brelse(bh);
1822 
1823         /* create root node at level 2 */
1824         node = nilfs_btree_get_root(btree);
1825         tmpptr = nreq->bpr_ptr;
1826         nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 2, 1,
1827                       NILFS_BTREE_ROOT_NCHILDREN_MAX,
1828                       &keys[0], &tmpptr);
1829     } else {
1830         nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1831 
1832         /* create root node at level 1 */
1833         node = nilfs_btree_get_root(btree);
1834         nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 1, n,
1835                       NILFS_BTREE_ROOT_NCHILDREN_MAX,
1836                       keys, ptrs);
1837         nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr,
1838                     NILFS_BTREE_ROOT_NCHILDREN_MAX);
1839         if (!nilfs_bmap_dirty(btree))
1840             nilfs_bmap_set_dirty(btree);
1841     }
1842 
1843     if (NILFS_BMAP_USE_VBN(btree))
1844         nilfs_bmap_set_target_v(btree, key, dreq->bpr_ptr);
1845 }
1846 
1847 /**
1848  * nilfs_btree_convert_and_insert -
1849  * @bmap:
1850  * @key:
1851  * @ptr:
1852  * @keys:
1853  * @ptrs:
1854  * @n:
1855  */
1856 int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree,
1857                    __u64 key, __u64 ptr,
1858                    const __u64 *keys, const __u64 *ptrs, int n)
1859 {
1860     struct buffer_head *bh = NULL;
1861     union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1862     struct nilfs_bmap_stats stats;
1863     int ret;
1864 
1865     if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1866         di = &dreq;
1867         ni = NULL;
1868     } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1869                nilfs_btree_node_size(btree))) {
1870         di = &dreq;
1871         ni = &nreq;
1872     } else {
1873         di = NULL;
1874         ni = NULL;
1875         BUG();
1876     }
1877 
1878     ret = nilfs_btree_prepare_convert_and_insert(btree, key, di, ni, &bh,
1879                              &stats);
1880     if (ret < 0)
1881         return ret;
1882     nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n,
1883                           di, ni, bh);
1884     nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1885     return 0;
1886 }
1887 
1888 static int nilfs_btree_propagate_p(struct nilfs_bmap *btree,
1889                    struct nilfs_btree_path *path,
1890                    int level,
1891                    struct buffer_head *bh)
1892 {
1893     while ((++level < nilfs_btree_height(btree) - 1) &&
1894            !buffer_dirty(path[level].bp_bh))
1895         mark_buffer_dirty(path[level].bp_bh);
1896 
1897     return 0;
1898 }
1899 
1900 static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree,
1901                     struct nilfs_btree_path *path,
1902                     int level, struct inode *dat)
1903 {
1904     struct nilfs_btree_node *parent;
1905     int ncmax, ret;
1906 
1907     parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1908     path[level].bp_oldreq.bpr_ptr =
1909         nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
1910                      ncmax);
1911     path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1912     ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req,
1913                        &path[level].bp_newreq.bpr_req);
1914     if (ret < 0)
1915         return ret;
1916 
1917     if (buffer_nilfs_node(path[level].bp_bh)) {
1918         path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1919         path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1920         path[level].bp_ctxt.bh = path[level].bp_bh;
1921         ret = nilfs_btnode_prepare_change_key(
1922             NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
1923             &path[level].bp_ctxt);
1924         if (ret < 0) {
1925             nilfs_dat_abort_update(dat,
1926                            &path[level].bp_oldreq.bpr_req,
1927                            &path[level].bp_newreq.bpr_req);
1928             return ret;
1929         }
1930     }
1931 
1932     return 0;
1933 }
1934 
1935 static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree,
1936                     struct nilfs_btree_path *path,
1937                     int level, struct inode *dat)
1938 {
1939     struct nilfs_btree_node *parent;
1940     int ncmax;
1941 
1942     nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req,
1943                 &path[level].bp_newreq.bpr_req,
1944                 btree->b_ptr_type == NILFS_BMAP_PTR_VS);
1945 
1946     if (buffer_nilfs_node(path[level].bp_bh)) {
1947         nilfs_btnode_commit_change_key(
1948             NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
1949             &path[level].bp_ctxt);
1950         path[level].bp_bh = path[level].bp_ctxt.bh;
1951     }
1952     set_buffer_nilfs_volatile(path[level].bp_bh);
1953 
1954     parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1955     nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index,
1956                  path[level].bp_newreq.bpr_ptr, ncmax);
1957 }
1958 
1959 static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree,
1960                        struct nilfs_btree_path *path,
1961                        int level, struct inode *dat)
1962 {
1963     nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req,
1964                    &path[level].bp_newreq.bpr_req);
1965     if (buffer_nilfs_node(path[level].bp_bh))
1966         nilfs_btnode_abort_change_key(
1967             NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
1968             &path[level].bp_ctxt);
1969 }
1970 
1971 static int nilfs_btree_prepare_propagate_v(struct nilfs_bmap *btree,
1972                        struct nilfs_btree_path *path,
1973                        int minlevel, int *maxlevelp,
1974                        struct inode *dat)
1975 {
1976     int level, ret;
1977 
1978     level = minlevel;
1979     if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1980         ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1981         if (ret < 0)
1982             return ret;
1983     }
1984     while ((++level < nilfs_btree_height(btree) - 1) &&
1985            !buffer_dirty(path[level].bp_bh)) {
1986 
1987         WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1988         ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1989         if (ret < 0)
1990             goto out;
1991     }
1992 
1993     /* success */
1994     *maxlevelp = level - 1;
1995     return 0;
1996 
1997     /* error */
1998  out:
1999     while (--level > minlevel)
2000         nilfs_btree_abort_update_v(btree, path, level, dat);
2001     if (!buffer_nilfs_volatile(path[level].bp_bh))
2002         nilfs_btree_abort_update_v(btree, path, level, dat);
2003     return ret;
2004 }
2005 
2006 static void nilfs_btree_commit_propagate_v(struct nilfs_bmap *btree,
2007                        struct nilfs_btree_path *path,
2008                        int minlevel, int maxlevel,
2009                        struct buffer_head *bh,
2010                        struct inode *dat)
2011 {
2012     int level;
2013 
2014     if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
2015         nilfs_btree_commit_update_v(btree, path, minlevel, dat);
2016 
2017     for (level = minlevel + 1; level <= maxlevel; level++)
2018         nilfs_btree_commit_update_v(btree, path, level, dat);
2019 }
2020 
2021 static int nilfs_btree_propagate_v(struct nilfs_bmap *btree,
2022                    struct nilfs_btree_path *path,
2023                    int level, struct buffer_head *bh)
2024 {
2025     int maxlevel = 0, ret;
2026     struct nilfs_btree_node *parent;
2027     struct inode *dat = nilfs_bmap_get_dat(btree);
2028     __u64 ptr;
2029     int ncmax;
2030 
2031     get_bh(bh);
2032     path[level].bp_bh = bh;
2033     ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel,
2034                           dat);
2035     if (ret < 0)
2036         goto out;
2037 
2038     if (buffer_nilfs_volatile(path[level].bp_bh)) {
2039         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2040         ptr = nilfs_btree_node_get_ptr(parent,
2041                            path[level + 1].bp_index,
2042                            ncmax);
2043         ret = nilfs_dat_mark_dirty(dat, ptr);
2044         if (ret < 0)
2045             goto out;
2046     }
2047 
2048     nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat);
2049 
2050  out:
2051     brelse(path[level].bp_bh);
2052     path[level].bp_bh = NULL;
2053     return ret;
2054 }
2055 
2056 static int nilfs_btree_propagate(struct nilfs_bmap *btree,
2057                  struct buffer_head *bh)
2058 {
2059     struct nilfs_btree_path *path;
2060     struct nilfs_btree_node *node;
2061     __u64 key;
2062     int level, ret;
2063 
2064     WARN_ON(!buffer_dirty(bh));
2065 
2066     path = nilfs_btree_alloc_path();
2067     if (path == NULL)
2068         return -ENOMEM;
2069 
2070     if (buffer_nilfs_node(bh)) {
2071         node = (struct nilfs_btree_node *)bh->b_data;
2072         key = nilfs_btree_node_get_key(node, 0);
2073         level = nilfs_btree_node_get_level(node);
2074     } else {
2075         key = nilfs_bmap_data_get_key(btree, bh);
2076         level = NILFS_BTREE_LEVEL_DATA;
2077     }
2078 
2079     ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2080     if (ret < 0) {
2081         if (unlikely(ret == -ENOENT))
2082             nilfs_crit(btree->b_inode->i_sb,
2083                    "writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d",
2084                    btree->b_inode->i_ino,
2085                    (unsigned long long)key, level);
2086         goto out;
2087     }
2088 
2089     ret = NILFS_BMAP_USE_VBN(btree) ?
2090         nilfs_btree_propagate_v(btree, path, level, bh) :
2091         nilfs_btree_propagate_p(btree, path, level, bh);
2092 
2093  out:
2094     nilfs_btree_free_path(path);
2095 
2096     return ret;
2097 }
2098 
2099 static int nilfs_btree_propagate_gc(struct nilfs_bmap *btree,
2100                     struct buffer_head *bh)
2101 {
2102     return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(btree), bh->b_blocknr);
2103 }
2104 
2105 static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree,
2106                      struct list_head *lists,
2107                      struct buffer_head *bh)
2108 {
2109     struct list_head *head;
2110     struct buffer_head *cbh;
2111     struct nilfs_btree_node *node, *cnode;
2112     __u64 key, ckey;
2113     int level;
2114 
2115     get_bh(bh);
2116     node = (struct nilfs_btree_node *)bh->b_data;
2117     key = nilfs_btree_node_get_key(node, 0);
2118     level = nilfs_btree_node_get_level(node);
2119     if (level < NILFS_BTREE_LEVEL_NODE_MIN ||
2120         level >= NILFS_BTREE_LEVEL_MAX) {
2121         dump_stack();
2122         nilfs_warn(btree->b_inode->i_sb,
2123                "invalid btree level: %d (key=%llu, ino=%lu, blocknr=%llu)",
2124                level, (unsigned long long)key,
2125                btree->b_inode->i_ino,
2126                (unsigned long long)bh->b_blocknr);
2127         return;
2128     }
2129 
2130     list_for_each(head, &lists[level]) {
2131         cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
2132         cnode = (struct nilfs_btree_node *)cbh->b_data;
2133         ckey = nilfs_btree_node_get_key(cnode, 0);
2134         if (key < ckey)
2135             break;
2136     }
2137     list_add_tail(&bh->b_assoc_buffers, head);
2138 }
2139 
2140 static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree,
2141                          struct list_head *listp)
2142 {
2143     struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
2144     struct address_space *btcache = btnc_inode->i_mapping;
2145     struct list_head lists[NILFS_BTREE_LEVEL_MAX];
2146     struct pagevec pvec;
2147     struct buffer_head *bh, *head;
2148     pgoff_t index = 0;
2149     int level, i;
2150 
2151     for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2152          level < NILFS_BTREE_LEVEL_MAX;
2153          level++)
2154         INIT_LIST_HEAD(&lists[level]);
2155 
2156     pagevec_init(&pvec);
2157 
2158     while (pagevec_lookup_tag(&pvec, btcache, &index,
2159                     PAGECACHE_TAG_DIRTY)) {
2160         for (i = 0; i < pagevec_count(&pvec); i++) {
2161             bh = head = page_buffers(pvec.pages[i]);
2162             do {
2163                 if (buffer_dirty(bh))
2164                     nilfs_btree_add_dirty_buffer(btree,
2165                                      lists, bh);
2166             } while ((bh = bh->b_this_page) != head);
2167         }
2168         pagevec_release(&pvec);
2169         cond_resched();
2170     }
2171 
2172     for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2173          level < NILFS_BTREE_LEVEL_MAX;
2174          level++)
2175         list_splice_tail(&lists[level], listp);
2176 }
2177 
2178 static int nilfs_btree_assign_p(struct nilfs_bmap *btree,
2179                 struct nilfs_btree_path *path,
2180                 int level,
2181                 struct buffer_head **bh,
2182                 sector_t blocknr,
2183                 union nilfs_binfo *binfo)
2184 {
2185     struct nilfs_btree_node *parent;
2186     __u64 key;
2187     __u64 ptr;
2188     int ncmax, ret;
2189 
2190     parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2191     ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2192                        ncmax);
2193     if (buffer_nilfs_node(*bh)) {
2194         path[level].bp_ctxt.oldkey = ptr;
2195         path[level].bp_ctxt.newkey = blocknr;
2196         path[level].bp_ctxt.bh = *bh;
2197         ret = nilfs_btnode_prepare_change_key(
2198             NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
2199             &path[level].bp_ctxt);
2200         if (ret < 0)
2201             return ret;
2202         nilfs_btnode_commit_change_key(
2203             NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
2204             &path[level].bp_ctxt);
2205         *bh = path[level].bp_ctxt.bh;
2206     }
2207 
2208     nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, blocknr,
2209                  ncmax);
2210 
2211     key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2212     /* on-disk format */
2213     binfo->bi_dat.bi_blkoff = cpu_to_le64(key);
2214     binfo->bi_dat.bi_level = level;
2215 
2216     return 0;
2217 }
2218 
2219 static int nilfs_btree_assign_v(struct nilfs_bmap *btree,
2220                 struct nilfs_btree_path *path,
2221                 int level,
2222                 struct buffer_head **bh,
2223                 sector_t blocknr,
2224                 union nilfs_binfo *binfo)
2225 {
2226     struct nilfs_btree_node *parent;
2227     struct inode *dat = nilfs_bmap_get_dat(btree);
2228     __u64 key;
2229     __u64 ptr;
2230     union nilfs_bmap_ptr_req req;
2231     int ncmax, ret;
2232 
2233     parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2234     ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2235                        ncmax);
2236     req.bpr_ptr = ptr;
2237     ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
2238     if (ret < 0)
2239         return ret;
2240     nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
2241 
2242     key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2243     /* on-disk format */
2244     binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr);
2245     binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2246 
2247     return 0;
2248 }
2249 
2250 static int nilfs_btree_assign(struct nilfs_bmap *btree,
2251                   struct buffer_head **bh,
2252                   sector_t blocknr,
2253                   union nilfs_binfo *binfo)
2254 {
2255     struct nilfs_btree_path *path;
2256     struct nilfs_btree_node *node;
2257     __u64 key;
2258     int level, ret;
2259 
2260     path = nilfs_btree_alloc_path();
2261     if (path == NULL)
2262         return -ENOMEM;
2263 
2264     if (buffer_nilfs_node(*bh)) {
2265         node = (struct nilfs_btree_node *)(*bh)->b_data;
2266         key = nilfs_btree_node_get_key(node, 0);
2267         level = nilfs_btree_node_get_level(node);
2268     } else {
2269         key = nilfs_bmap_data_get_key(btree, *bh);
2270         level = NILFS_BTREE_LEVEL_DATA;
2271     }
2272 
2273     ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2274     if (ret < 0) {
2275         WARN_ON(ret == -ENOENT);
2276         goto out;
2277     }
2278 
2279     ret = NILFS_BMAP_USE_VBN(btree) ?
2280         nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) :
2281         nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2282 
2283  out:
2284     nilfs_btree_free_path(path);
2285 
2286     return ret;
2287 }
2288 
2289 static int nilfs_btree_assign_gc(struct nilfs_bmap *btree,
2290                  struct buffer_head **bh,
2291                  sector_t blocknr,
2292                  union nilfs_binfo *binfo)
2293 {
2294     struct nilfs_btree_node *node;
2295     __u64 key;
2296     int ret;
2297 
2298     ret = nilfs_dat_move(nilfs_bmap_get_dat(btree), (*bh)->b_blocknr,
2299                  blocknr);
2300     if (ret < 0)
2301         return ret;
2302 
2303     if (buffer_nilfs_node(*bh)) {
2304         node = (struct nilfs_btree_node *)(*bh)->b_data;
2305         key = nilfs_btree_node_get_key(node, 0);
2306     } else
2307         key = nilfs_bmap_data_get_key(btree, *bh);
2308 
2309     /* on-disk format */
2310     binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2311     binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2312 
2313     return 0;
2314 }
2315 
2316 static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level)
2317 {
2318     struct buffer_head *bh;
2319     struct nilfs_btree_path *path;
2320     __u64 ptr;
2321     int ret;
2322 
2323     path = nilfs_btree_alloc_path();
2324     if (path == NULL)
2325         return -ENOMEM;
2326 
2327     ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1, 0);
2328     if (ret < 0) {
2329         WARN_ON(ret == -ENOENT);
2330         goto out;
2331     }
2332     ret = nilfs_btree_get_block(btree, ptr, &bh);
2333     if (ret < 0) {
2334         WARN_ON(ret == -ENOENT);
2335         goto out;
2336     }
2337 
2338     if (!buffer_dirty(bh))
2339         mark_buffer_dirty(bh);
2340     brelse(bh);
2341     if (!nilfs_bmap_dirty(btree))
2342         nilfs_bmap_set_dirty(btree);
2343 
2344  out:
2345     nilfs_btree_free_path(path);
2346     return ret;
2347 }
2348 
2349 static const struct nilfs_bmap_operations nilfs_btree_ops = {
2350     .bop_lookup     =   nilfs_btree_lookup,
2351     .bop_lookup_contig  =   nilfs_btree_lookup_contig,
2352     .bop_insert     =   nilfs_btree_insert,
2353     .bop_delete     =   nilfs_btree_delete,
2354     .bop_clear      =   NULL,
2355 
2356     .bop_propagate      =   nilfs_btree_propagate,
2357 
2358     .bop_lookup_dirty_buffers = nilfs_btree_lookup_dirty_buffers,
2359 
2360     .bop_assign     =   nilfs_btree_assign,
2361     .bop_mark       =   nilfs_btree_mark,
2362 
2363     .bop_seek_key       =   nilfs_btree_seek_key,
2364     .bop_last_key       =   nilfs_btree_last_key,
2365 
2366     .bop_check_insert   =   NULL,
2367     .bop_check_delete   =   nilfs_btree_check_delete,
2368     .bop_gather_data    =   nilfs_btree_gather_data,
2369 };
2370 
2371 static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2372     .bop_lookup     =   NULL,
2373     .bop_lookup_contig  =   NULL,
2374     .bop_insert     =   NULL,
2375     .bop_delete     =   NULL,
2376     .bop_clear      =   NULL,
2377 
2378     .bop_propagate      =   nilfs_btree_propagate_gc,
2379 
2380     .bop_lookup_dirty_buffers = nilfs_btree_lookup_dirty_buffers,
2381 
2382     .bop_assign     =   nilfs_btree_assign_gc,
2383     .bop_mark       =   NULL,
2384 
2385     .bop_seek_key       =   NULL,
2386     .bop_last_key       =   NULL,
2387 
2388     .bop_check_insert   =   NULL,
2389     .bop_check_delete   =   NULL,
2390     .bop_gather_data    =   NULL,
2391 };
2392 
2393 static void __nilfs_btree_init(struct nilfs_bmap *bmap)
2394 {
2395     bmap->b_ops = &nilfs_btree_ops;
2396     bmap->b_nchildren_per_block =
2397         NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2398 }
2399 
2400 int nilfs_btree_init(struct nilfs_bmap *bmap)
2401 {
2402     int ret = 0;
2403 
2404     __nilfs_btree_init(bmap);
2405 
2406     if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap), bmap->b_inode))
2407         ret = -EIO;
2408     else
2409         ret = nilfs_attach_btree_node_cache(
2410             &NILFS_BMAP_I(bmap)->vfs_inode);
2411 
2412     return ret;
2413 }
2414 
2415 void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2416 {
2417     bmap->b_ops = &nilfs_btree_ops_gc;
2418     bmap->b_nchildren_per_block =
2419         NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2420 }