Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * Copyright (c) 2017 Christoph Hellwig.
0004  */
0005 
0006 #include "xfs.h"
0007 #include "xfs_shared.h"
0008 #include "xfs_format.h"
0009 #include "xfs_bit.h"
0010 #include "xfs_log_format.h"
0011 #include "xfs_trans_resv.h"
0012 #include "xfs_mount.h"
0013 #include "xfs_inode.h"
0014 #include "xfs_trace.h"
0015 
0016 /*
0017  * In-core extent record layout:
0018  *
0019  * +-------+----------------------------+
0020  * | 00:53 | all 54 bits of startoff    |
0021  * | 54:63 | low 10 bits of startblock  |
0022  * +-------+----------------------------+
0023  * | 00:20 | all 21 bits of length      |
0024  * |    21 | unwritten extent bit       |
0025  * | 22:63 | high 42 bits of startblock |
0026  * +-------+----------------------------+
0027  */
0028 #define XFS_IEXT_STARTOFF_MASK      xfs_mask64lo(BMBT_STARTOFF_BITLEN)
0029 #define XFS_IEXT_LENGTH_MASK        xfs_mask64lo(BMBT_BLOCKCOUNT_BITLEN)
0030 #define XFS_IEXT_STARTBLOCK_MASK    xfs_mask64lo(BMBT_STARTBLOCK_BITLEN)
0031 
0032 struct xfs_iext_rec {
0033     uint64_t            lo;
0034     uint64_t            hi;
0035 };
0036 
0037 /*
0038  * Given that the length can't be a zero, only an empty hi value indicates an
0039  * unused record.
0040  */
0041 static bool xfs_iext_rec_is_empty(struct xfs_iext_rec *rec)
0042 {
0043     return rec->hi == 0;
0044 }
0045 
0046 static inline void xfs_iext_rec_clear(struct xfs_iext_rec *rec)
0047 {
0048     rec->lo = 0;
0049     rec->hi = 0;
0050 }
0051 
0052 static void
0053 xfs_iext_set(
0054     struct xfs_iext_rec *rec,
0055     struct xfs_bmbt_irec    *irec)
0056 {
0057     ASSERT((irec->br_startoff & ~XFS_IEXT_STARTOFF_MASK) == 0);
0058     ASSERT((irec->br_blockcount & ~XFS_IEXT_LENGTH_MASK) == 0);
0059     ASSERT((irec->br_startblock & ~XFS_IEXT_STARTBLOCK_MASK) == 0);
0060 
0061     rec->lo = irec->br_startoff & XFS_IEXT_STARTOFF_MASK;
0062     rec->hi = irec->br_blockcount & XFS_IEXT_LENGTH_MASK;
0063 
0064     rec->lo |= (irec->br_startblock << 54);
0065     rec->hi |= ((irec->br_startblock & ~xfs_mask64lo(10)) << (22 - 10));
0066 
0067     if (irec->br_state == XFS_EXT_UNWRITTEN)
0068         rec->hi |= (1 << 21);
0069 }
0070 
0071 static void
0072 xfs_iext_get(
0073     struct xfs_bmbt_irec    *irec,
0074     struct xfs_iext_rec *rec)
0075 {
0076     irec->br_startoff = rec->lo & XFS_IEXT_STARTOFF_MASK;
0077     irec->br_blockcount = rec->hi & XFS_IEXT_LENGTH_MASK;
0078 
0079     irec->br_startblock = rec->lo >> 54;
0080     irec->br_startblock |= (rec->hi & xfs_mask64hi(42)) >> (22 - 10);
0081 
0082     if (rec->hi & (1 << 21))
0083         irec->br_state = XFS_EXT_UNWRITTEN;
0084     else
0085         irec->br_state = XFS_EXT_NORM;
0086 }
0087 
0088 enum {
0089     NODE_SIZE   = 256,
0090     KEYS_PER_NODE   = NODE_SIZE / (sizeof(uint64_t) + sizeof(void *)),
0091     RECS_PER_LEAF   = (NODE_SIZE - (2 * sizeof(struct xfs_iext_leaf *))) /
0092                 sizeof(struct xfs_iext_rec),
0093 };
0094 
0095 /*
0096  * In-core extent btree block layout:
0097  *
0098  * There are two types of blocks in the btree: leaf and inner (non-leaf) blocks.
0099  *
0100  * The leaf blocks are made up by %KEYS_PER_NODE extent records, which each
0101  * contain the startoffset, blockcount, startblock and unwritten extent flag.
0102  * See above for the exact format, followed by pointers to the previous and next
0103  * leaf blocks (if there are any).
0104  *
0105  * The inner (non-leaf) blocks first contain KEYS_PER_NODE lookup keys, followed
0106  * by an equal number of pointers to the btree blocks at the next lower level.
0107  *
0108  *      +-------+-------+-------+-------+-------+----------+----------+
0109  * Leaf:    | rec 1 | rec 2 | rec 3 | rec 4 | rec N | prev-ptr | next-ptr |
0110  *      +-------+-------+-------+-------+-------+----------+----------+
0111  *
0112  *      +-------+-------+-------+-------+-------+-------+------+-------+
0113  * Inner:   | key 1 | key 2 | key 3 | key N | ptr 1 | ptr 2 | ptr3 | ptr N |
0114  *      +-------+-------+-------+-------+-------+-------+------+-------+
0115  */
0116 struct xfs_iext_node {
0117     uint64_t        keys[KEYS_PER_NODE];
0118 #define XFS_IEXT_KEY_INVALID    (1ULL << 63)
0119     void            *ptrs[KEYS_PER_NODE];
0120 };
0121 
0122 struct xfs_iext_leaf {
0123     struct xfs_iext_rec recs[RECS_PER_LEAF];
0124     struct xfs_iext_leaf    *prev;
0125     struct xfs_iext_leaf    *next;
0126 };
0127 
0128 inline xfs_extnum_t xfs_iext_count(struct xfs_ifork *ifp)
0129 {
0130     return ifp->if_bytes / sizeof(struct xfs_iext_rec);
0131 }
0132 
0133 static inline int xfs_iext_max_recs(struct xfs_ifork *ifp)
0134 {
0135     if (ifp->if_height == 1)
0136         return xfs_iext_count(ifp);
0137     return RECS_PER_LEAF;
0138 }
0139 
0140 static inline struct xfs_iext_rec *cur_rec(struct xfs_iext_cursor *cur)
0141 {
0142     return &cur->leaf->recs[cur->pos];
0143 }
0144 
0145 static inline bool xfs_iext_valid(struct xfs_ifork *ifp,
0146         struct xfs_iext_cursor *cur)
0147 {
0148     if (!cur->leaf)
0149         return false;
0150     if (cur->pos < 0 || cur->pos >= xfs_iext_max_recs(ifp))
0151         return false;
0152     if (xfs_iext_rec_is_empty(cur_rec(cur)))
0153         return false;
0154     return true;
0155 }
0156 
0157 static void *
0158 xfs_iext_find_first_leaf(
0159     struct xfs_ifork    *ifp)
0160 {
0161     struct xfs_iext_node    *node = ifp->if_u1.if_root;
0162     int         height;
0163 
0164     if (!ifp->if_height)
0165         return NULL;
0166 
0167     for (height = ifp->if_height; height > 1; height--) {
0168         node = node->ptrs[0];
0169         ASSERT(node);
0170     }
0171 
0172     return node;
0173 }
0174 
0175 static void *
0176 xfs_iext_find_last_leaf(
0177     struct xfs_ifork    *ifp)
0178 {
0179     struct xfs_iext_node    *node = ifp->if_u1.if_root;
0180     int         height, i;
0181 
0182     if (!ifp->if_height)
0183         return NULL;
0184 
0185     for (height = ifp->if_height; height > 1; height--) {
0186         for (i = 1; i < KEYS_PER_NODE; i++)
0187             if (!node->ptrs[i])
0188                 break;
0189         node = node->ptrs[i - 1];
0190         ASSERT(node);
0191     }
0192 
0193     return node;
0194 }
0195 
0196 void
0197 xfs_iext_first(
0198     struct xfs_ifork    *ifp,
0199     struct xfs_iext_cursor  *cur)
0200 {
0201     cur->pos = 0;
0202     cur->leaf = xfs_iext_find_first_leaf(ifp);
0203 }
0204 
0205 void
0206 xfs_iext_last(
0207     struct xfs_ifork    *ifp,
0208     struct xfs_iext_cursor  *cur)
0209 {
0210     int         i;
0211 
0212     cur->leaf = xfs_iext_find_last_leaf(ifp);
0213     if (!cur->leaf) {
0214         cur->pos = 0;
0215         return;
0216     }
0217 
0218     for (i = 1; i < xfs_iext_max_recs(ifp); i++) {
0219         if (xfs_iext_rec_is_empty(&cur->leaf->recs[i]))
0220             break;
0221     }
0222     cur->pos = i - 1;
0223 }
0224 
0225 void
0226 xfs_iext_next(
0227     struct xfs_ifork    *ifp,
0228     struct xfs_iext_cursor  *cur)
0229 {
0230     if (!cur->leaf) {
0231         ASSERT(cur->pos <= 0 || cur->pos >= RECS_PER_LEAF);
0232         xfs_iext_first(ifp, cur);
0233         return;
0234     }
0235 
0236     ASSERT(cur->pos >= 0);
0237     ASSERT(cur->pos < xfs_iext_max_recs(ifp));
0238 
0239     cur->pos++;
0240     if (ifp->if_height > 1 && !xfs_iext_valid(ifp, cur) &&
0241         cur->leaf->next) {
0242         cur->leaf = cur->leaf->next;
0243         cur->pos = 0;
0244     }
0245 }
0246 
0247 void
0248 xfs_iext_prev(
0249     struct xfs_ifork    *ifp,
0250     struct xfs_iext_cursor  *cur)
0251 {
0252     if (!cur->leaf) {
0253         ASSERT(cur->pos <= 0 || cur->pos >= RECS_PER_LEAF);
0254         xfs_iext_last(ifp, cur);
0255         return;
0256     }
0257 
0258     ASSERT(cur->pos >= 0);
0259     ASSERT(cur->pos <= RECS_PER_LEAF);
0260 
0261 recurse:
0262     do {
0263         cur->pos--;
0264         if (xfs_iext_valid(ifp, cur))
0265             return;
0266     } while (cur->pos > 0);
0267 
0268     if (ifp->if_height > 1 && cur->leaf->prev) {
0269         cur->leaf = cur->leaf->prev;
0270         cur->pos = RECS_PER_LEAF;
0271         goto recurse;
0272     }
0273 }
0274 
0275 static inline int
0276 xfs_iext_key_cmp(
0277     struct xfs_iext_node    *node,
0278     int         n,
0279     xfs_fileoff_t       offset)
0280 {
0281     if (node->keys[n] > offset)
0282         return 1;
0283     if (node->keys[n] < offset)
0284         return -1;
0285     return 0;
0286 }
0287 
0288 static inline int
0289 xfs_iext_rec_cmp(
0290     struct xfs_iext_rec *rec,
0291     xfs_fileoff_t       offset)
0292 {
0293     uint64_t        rec_offset = rec->lo & XFS_IEXT_STARTOFF_MASK;
0294     uint32_t        rec_len = rec->hi & XFS_IEXT_LENGTH_MASK;
0295 
0296     if (rec_offset > offset)
0297         return 1;
0298     if (rec_offset + rec_len <= offset)
0299         return -1;
0300     return 0;
0301 }
0302 
0303 static void *
0304 xfs_iext_find_level(
0305     struct xfs_ifork    *ifp,
0306     xfs_fileoff_t       offset,
0307     int         level)
0308 {
0309     struct xfs_iext_node    *node = ifp->if_u1.if_root;
0310     int         height, i;
0311 
0312     if (!ifp->if_height)
0313         return NULL;
0314 
0315     for (height = ifp->if_height; height > level; height--) {
0316         for (i = 1; i < KEYS_PER_NODE; i++)
0317             if (xfs_iext_key_cmp(node, i, offset) > 0)
0318                 break;
0319 
0320         node = node->ptrs[i - 1];
0321         if (!node)
0322             break;
0323     }
0324 
0325     return node;
0326 }
0327 
0328 static int
0329 xfs_iext_node_pos(
0330     struct xfs_iext_node    *node,
0331     xfs_fileoff_t       offset)
0332 {
0333     int         i;
0334 
0335     for (i = 1; i < KEYS_PER_NODE; i++) {
0336         if (xfs_iext_key_cmp(node, i, offset) > 0)
0337             break;
0338     }
0339 
0340     return i - 1;
0341 }
0342 
0343 static int
0344 xfs_iext_node_insert_pos(
0345     struct xfs_iext_node    *node,
0346     xfs_fileoff_t       offset)
0347 {
0348     int         i;
0349 
0350     for (i = 0; i < KEYS_PER_NODE; i++) {
0351         if (xfs_iext_key_cmp(node, i, offset) > 0)
0352             return i;
0353     }
0354 
0355     return KEYS_PER_NODE;
0356 }
0357 
0358 static int
0359 xfs_iext_node_nr_entries(
0360     struct xfs_iext_node    *node,
0361     int         start)
0362 {
0363     int         i;
0364 
0365     for (i = start; i < KEYS_PER_NODE; i++) {
0366         if (node->keys[i] == XFS_IEXT_KEY_INVALID)
0367             break;
0368     }
0369 
0370     return i;
0371 }
0372 
0373 static int
0374 xfs_iext_leaf_nr_entries(
0375     struct xfs_ifork    *ifp,
0376     struct xfs_iext_leaf    *leaf,
0377     int         start)
0378 {
0379     int         i;
0380 
0381     for (i = start; i < xfs_iext_max_recs(ifp); i++) {
0382         if (xfs_iext_rec_is_empty(&leaf->recs[i]))
0383             break;
0384     }
0385 
0386     return i;
0387 }
0388 
0389 static inline uint64_t
0390 xfs_iext_leaf_key(
0391     struct xfs_iext_leaf    *leaf,
0392     int         n)
0393 {
0394     return leaf->recs[n].lo & XFS_IEXT_STARTOFF_MASK;
0395 }
0396 
0397 static void
0398 xfs_iext_grow(
0399     struct xfs_ifork    *ifp)
0400 {
0401     struct xfs_iext_node    *node = kmem_zalloc(NODE_SIZE, KM_NOFS);
0402     int         i;
0403 
0404     if (ifp->if_height == 1) {
0405         struct xfs_iext_leaf *prev = ifp->if_u1.if_root;
0406 
0407         node->keys[0] = xfs_iext_leaf_key(prev, 0);
0408         node->ptrs[0] = prev;
0409     } else  {
0410         struct xfs_iext_node *prev = ifp->if_u1.if_root;
0411 
0412         ASSERT(ifp->if_height > 1);
0413 
0414         node->keys[0] = prev->keys[0];
0415         node->ptrs[0] = prev;
0416     }
0417 
0418     for (i = 1; i < KEYS_PER_NODE; i++)
0419         node->keys[i] = XFS_IEXT_KEY_INVALID;
0420 
0421     ifp->if_u1.if_root = node;
0422     ifp->if_height++;
0423 }
0424 
0425 static void
0426 xfs_iext_update_node(
0427     struct xfs_ifork    *ifp,
0428     xfs_fileoff_t       old_offset,
0429     xfs_fileoff_t       new_offset,
0430     int         level,
0431     void            *ptr)
0432 {
0433     struct xfs_iext_node    *node = ifp->if_u1.if_root;
0434     int         height, i;
0435 
0436     for (height = ifp->if_height; height > level; height--) {
0437         for (i = 0; i < KEYS_PER_NODE; i++) {
0438             if (i > 0 && xfs_iext_key_cmp(node, i, old_offset) > 0)
0439                 break;
0440             if (node->keys[i] == old_offset)
0441                 node->keys[i] = new_offset;
0442         }
0443         node = node->ptrs[i - 1];
0444         ASSERT(node);
0445     }
0446 
0447     ASSERT(node == ptr);
0448 }
0449 
0450 static struct xfs_iext_node *
0451 xfs_iext_split_node(
0452     struct xfs_iext_node    **nodep,
0453     int         *pos,
0454     int         *nr_entries)
0455 {
0456     struct xfs_iext_node    *node = *nodep;
0457     struct xfs_iext_node    *new = kmem_zalloc(NODE_SIZE, KM_NOFS);
0458     const int       nr_move = KEYS_PER_NODE / 2;
0459     int         nr_keep = nr_move + (KEYS_PER_NODE & 1);
0460     int         i = 0;
0461 
0462     /* for sequential append operations just spill over into the new node */
0463     if (*pos == KEYS_PER_NODE) {
0464         *nodep = new;
0465         *pos = 0;
0466         *nr_entries = 0;
0467         goto done;
0468     }
0469 
0470 
0471     for (i = 0; i < nr_move; i++) {
0472         new->keys[i] = node->keys[nr_keep + i];
0473         new->ptrs[i] = node->ptrs[nr_keep + i];
0474 
0475         node->keys[nr_keep + i] = XFS_IEXT_KEY_INVALID;
0476         node->ptrs[nr_keep + i] = NULL;
0477     }
0478 
0479     if (*pos >= nr_keep) {
0480         *nodep = new;
0481         *pos -= nr_keep;
0482         *nr_entries = nr_move;
0483     } else {
0484         *nr_entries = nr_keep;
0485     }
0486 done:
0487     for (; i < KEYS_PER_NODE; i++)
0488         new->keys[i] = XFS_IEXT_KEY_INVALID;
0489     return new;
0490 }
0491 
0492 static void
0493 xfs_iext_insert_node(
0494     struct xfs_ifork    *ifp,
0495     uint64_t        offset,
0496     void            *ptr,
0497     int         level)
0498 {
0499     struct xfs_iext_node    *node, *new;
0500     int         i, pos, nr_entries;
0501 
0502 again:
0503     if (ifp->if_height < level)
0504         xfs_iext_grow(ifp);
0505 
0506     new = NULL;
0507     node = xfs_iext_find_level(ifp, offset, level);
0508     pos = xfs_iext_node_insert_pos(node, offset);
0509     nr_entries = xfs_iext_node_nr_entries(node, pos);
0510 
0511     ASSERT(pos >= nr_entries || xfs_iext_key_cmp(node, pos, offset) != 0);
0512     ASSERT(nr_entries <= KEYS_PER_NODE);
0513 
0514     if (nr_entries == KEYS_PER_NODE)
0515         new = xfs_iext_split_node(&node, &pos, &nr_entries);
0516 
0517     /*
0518      * Update the pointers in higher levels if the first entry changes
0519      * in an existing node.
0520      */
0521     if (node != new && pos == 0 && nr_entries > 0)
0522         xfs_iext_update_node(ifp, node->keys[0], offset, level, node);
0523 
0524     for (i = nr_entries; i > pos; i--) {
0525         node->keys[i] = node->keys[i - 1];
0526         node->ptrs[i] = node->ptrs[i - 1];
0527     }
0528     node->keys[pos] = offset;
0529     node->ptrs[pos] = ptr;
0530 
0531     if (new) {
0532         offset = new->keys[0];
0533         ptr = new;
0534         level++;
0535         goto again;
0536     }
0537 }
0538 
0539 static struct xfs_iext_leaf *
0540 xfs_iext_split_leaf(
0541     struct xfs_iext_cursor  *cur,
0542     int         *nr_entries)
0543 {
0544     struct xfs_iext_leaf    *leaf = cur->leaf;
0545     struct xfs_iext_leaf    *new = kmem_zalloc(NODE_SIZE, KM_NOFS);
0546     const int       nr_move = RECS_PER_LEAF / 2;
0547     int         nr_keep = nr_move + (RECS_PER_LEAF & 1);
0548     int         i;
0549 
0550     /* for sequential append operations just spill over into the new node */
0551     if (cur->pos == RECS_PER_LEAF) {
0552         cur->leaf = new;
0553         cur->pos = 0;
0554         *nr_entries = 0;
0555         goto done;
0556     }
0557 
0558     for (i = 0; i < nr_move; i++) {
0559         new->recs[i] = leaf->recs[nr_keep + i];
0560         xfs_iext_rec_clear(&leaf->recs[nr_keep + i]);
0561     }
0562 
0563     if (cur->pos >= nr_keep) {
0564         cur->leaf = new;
0565         cur->pos -= nr_keep;
0566         *nr_entries = nr_move;
0567     } else {
0568         *nr_entries = nr_keep;
0569     }
0570 done:
0571     if (leaf->next)
0572         leaf->next->prev = new;
0573     new->next = leaf->next;
0574     new->prev = leaf;
0575     leaf->next = new;
0576     return new;
0577 }
0578 
0579 static void
0580 xfs_iext_alloc_root(
0581     struct xfs_ifork    *ifp,
0582     struct xfs_iext_cursor  *cur)
0583 {
0584     ASSERT(ifp->if_bytes == 0);
0585 
0586     ifp->if_u1.if_root = kmem_zalloc(sizeof(struct xfs_iext_rec), KM_NOFS);
0587     ifp->if_height = 1;
0588 
0589     /* now that we have a node step into it */
0590     cur->leaf = ifp->if_u1.if_root;
0591     cur->pos = 0;
0592 }
0593 
0594 static void
0595 xfs_iext_realloc_root(
0596     struct xfs_ifork    *ifp,
0597     struct xfs_iext_cursor  *cur)
0598 {
0599     int64_t new_size = ifp->if_bytes + sizeof(struct xfs_iext_rec);
0600     void *new;
0601 
0602     /* account for the prev/next pointers */
0603     if (new_size / sizeof(struct xfs_iext_rec) == RECS_PER_LEAF)
0604         new_size = NODE_SIZE;
0605 
0606     new = krealloc(ifp->if_u1.if_root, new_size, GFP_NOFS | __GFP_NOFAIL);
0607     memset(new + ifp->if_bytes, 0, new_size - ifp->if_bytes);
0608     ifp->if_u1.if_root = new;
0609     cur->leaf = new;
0610 }
0611 
0612 /*
0613  * Increment the sequence counter on extent tree changes. If we are on a COW
0614  * fork, this allows the writeback code to skip looking for a COW extent if the
0615  * COW fork hasn't changed. We use WRITE_ONCE here to ensure the update to the
0616  * sequence counter is seen before the modifications to the extent tree itself
0617  * take effect.
0618  */
0619 static inline void xfs_iext_inc_seq(struct xfs_ifork *ifp)
0620 {
0621     WRITE_ONCE(ifp->if_seq, READ_ONCE(ifp->if_seq) + 1);
0622 }
0623 
0624 void
0625 xfs_iext_insert(
0626     struct xfs_inode    *ip,
0627     struct xfs_iext_cursor  *cur,
0628     struct xfs_bmbt_irec    *irec,
0629     int         state)
0630 {
0631     struct xfs_ifork    *ifp = xfs_iext_state_to_fork(ip, state);
0632     xfs_fileoff_t       offset = irec->br_startoff;
0633     struct xfs_iext_leaf    *new = NULL;
0634     int         nr_entries, i;
0635 
0636     xfs_iext_inc_seq(ifp);
0637 
0638     if (ifp->if_height == 0)
0639         xfs_iext_alloc_root(ifp, cur);
0640     else if (ifp->if_height == 1)
0641         xfs_iext_realloc_root(ifp, cur);
0642 
0643     nr_entries = xfs_iext_leaf_nr_entries(ifp, cur->leaf, cur->pos);
0644     ASSERT(nr_entries <= RECS_PER_LEAF);
0645     ASSERT(cur->pos >= nr_entries ||
0646            xfs_iext_rec_cmp(cur_rec(cur), irec->br_startoff) != 0);
0647 
0648     if (nr_entries == RECS_PER_LEAF)
0649         new = xfs_iext_split_leaf(cur, &nr_entries);
0650 
0651     /*
0652      * Update the pointers in higher levels if the first entry changes
0653      * in an existing node.
0654      */
0655     if (cur->leaf != new && cur->pos == 0 && nr_entries > 0) {
0656         xfs_iext_update_node(ifp, xfs_iext_leaf_key(cur->leaf, 0),
0657                 offset, 1, cur->leaf);
0658     }
0659 
0660     for (i = nr_entries; i > cur->pos; i--)
0661         cur->leaf->recs[i] = cur->leaf->recs[i - 1];
0662     xfs_iext_set(cur_rec(cur), irec);
0663     ifp->if_bytes += sizeof(struct xfs_iext_rec);
0664 
0665     trace_xfs_iext_insert(ip, cur, state, _RET_IP_);
0666 
0667     if (new)
0668         xfs_iext_insert_node(ifp, xfs_iext_leaf_key(new, 0), new, 2);
0669 }
0670 
0671 static struct xfs_iext_node *
0672 xfs_iext_rebalance_node(
0673     struct xfs_iext_node    *parent,
0674     int         *pos,
0675     struct xfs_iext_node    *node,
0676     int         nr_entries)
0677 {
0678     /*
0679      * If the neighbouring nodes are completely full, or have different
0680      * parents, we might never be able to merge our node, and will only
0681      * delete it once the number of entries hits zero.
0682      */
0683     if (nr_entries == 0)
0684         return node;
0685 
0686     if (*pos > 0) {
0687         struct xfs_iext_node *prev = parent->ptrs[*pos - 1];
0688         int nr_prev = xfs_iext_node_nr_entries(prev, 0), i;
0689 
0690         if (nr_prev + nr_entries <= KEYS_PER_NODE) {
0691             for (i = 0; i < nr_entries; i++) {
0692                 prev->keys[nr_prev + i] = node->keys[i];
0693                 prev->ptrs[nr_prev + i] = node->ptrs[i];
0694             }
0695             return node;
0696         }
0697     }
0698 
0699     if (*pos + 1 < xfs_iext_node_nr_entries(parent, *pos)) {
0700         struct xfs_iext_node *next = parent->ptrs[*pos + 1];
0701         int nr_next = xfs_iext_node_nr_entries(next, 0), i;
0702 
0703         if (nr_entries + nr_next <= KEYS_PER_NODE) {
0704             /*
0705              * Merge the next node into this node so that we don't
0706              * have to do an additional update of the keys in the
0707              * higher levels.
0708              */
0709             for (i = 0; i < nr_next; i++) {
0710                 node->keys[nr_entries + i] = next->keys[i];
0711                 node->ptrs[nr_entries + i] = next->ptrs[i];
0712             }
0713 
0714             ++*pos;
0715             return next;
0716         }
0717     }
0718 
0719     return NULL;
0720 }
0721 
0722 static void
0723 xfs_iext_remove_node(
0724     struct xfs_ifork    *ifp,
0725     xfs_fileoff_t       offset,
0726     void            *victim)
0727 {
0728     struct xfs_iext_node    *node, *parent;
0729     int         level = 2, pos, nr_entries, i;
0730 
0731     ASSERT(level <= ifp->if_height);
0732     node = xfs_iext_find_level(ifp, offset, level);
0733     pos = xfs_iext_node_pos(node, offset);
0734 again:
0735     ASSERT(node->ptrs[pos]);
0736     ASSERT(node->ptrs[pos] == victim);
0737     kmem_free(victim);
0738 
0739     nr_entries = xfs_iext_node_nr_entries(node, pos) - 1;
0740     offset = node->keys[0];
0741     for (i = pos; i < nr_entries; i++) {
0742         node->keys[i] = node->keys[i + 1];
0743         node->ptrs[i] = node->ptrs[i + 1];
0744     }
0745     node->keys[nr_entries] = XFS_IEXT_KEY_INVALID;
0746     node->ptrs[nr_entries] = NULL;
0747 
0748     if (pos == 0 && nr_entries > 0) {
0749         xfs_iext_update_node(ifp, offset, node->keys[0], level, node);
0750         offset = node->keys[0];
0751     }
0752 
0753     if (nr_entries >= KEYS_PER_NODE / 2)
0754         return;
0755 
0756     if (level < ifp->if_height) {
0757         /*
0758          * If we aren't at the root yet try to find a neighbour node to
0759          * merge with (or delete the node if it is empty), and then
0760          * recurse up to the next level.
0761          */
0762         level++;
0763         parent = xfs_iext_find_level(ifp, offset, level);
0764         pos = xfs_iext_node_pos(parent, offset);
0765 
0766         ASSERT(pos != KEYS_PER_NODE);
0767         ASSERT(parent->ptrs[pos] == node);
0768 
0769         node = xfs_iext_rebalance_node(parent, &pos, node, nr_entries);
0770         if (node) {
0771             victim = node;
0772             node = parent;
0773             goto again;
0774         }
0775     } else if (nr_entries == 1) {
0776         /*
0777          * If we are at the root and only one entry is left we can just
0778          * free this node and update the root pointer.
0779          */
0780         ASSERT(node == ifp->if_u1.if_root);
0781         ifp->if_u1.if_root = node->ptrs[0];
0782         ifp->if_height--;
0783         kmem_free(node);
0784     }
0785 }
0786 
0787 static void
0788 xfs_iext_rebalance_leaf(
0789     struct xfs_ifork    *ifp,
0790     struct xfs_iext_cursor  *cur,
0791     struct xfs_iext_leaf    *leaf,
0792     xfs_fileoff_t       offset,
0793     int         nr_entries)
0794 {
0795     /*
0796      * If the neighbouring nodes are completely full we might never be able
0797      * to merge our node, and will only delete it once the number of
0798      * entries hits zero.
0799      */
0800     if (nr_entries == 0)
0801         goto remove_node;
0802 
0803     if (leaf->prev) {
0804         int nr_prev = xfs_iext_leaf_nr_entries(ifp, leaf->prev, 0), i;
0805 
0806         if (nr_prev + nr_entries <= RECS_PER_LEAF) {
0807             for (i = 0; i < nr_entries; i++)
0808                 leaf->prev->recs[nr_prev + i] = leaf->recs[i];
0809 
0810             if (cur->leaf == leaf) {
0811                 cur->leaf = leaf->prev;
0812                 cur->pos += nr_prev;
0813             }
0814             goto remove_node;
0815         }
0816     }
0817 
0818     if (leaf->next) {
0819         int nr_next = xfs_iext_leaf_nr_entries(ifp, leaf->next, 0), i;
0820 
0821         if (nr_entries + nr_next <= RECS_PER_LEAF) {
0822             /*
0823              * Merge the next node into this node so that we don't
0824              * have to do an additional update of the keys in the
0825              * higher levels.
0826              */
0827             for (i = 0; i < nr_next; i++) {
0828                 leaf->recs[nr_entries + i] =
0829                     leaf->next->recs[i];
0830             }
0831 
0832             if (cur->leaf == leaf->next) {
0833                 cur->leaf = leaf;
0834                 cur->pos += nr_entries;
0835             }
0836 
0837             offset = xfs_iext_leaf_key(leaf->next, 0);
0838             leaf = leaf->next;
0839             goto remove_node;
0840         }
0841     }
0842 
0843     return;
0844 remove_node:
0845     if (leaf->prev)
0846         leaf->prev->next = leaf->next;
0847     if (leaf->next)
0848         leaf->next->prev = leaf->prev;
0849     xfs_iext_remove_node(ifp, offset, leaf);
0850 }
0851 
0852 static void
0853 xfs_iext_free_last_leaf(
0854     struct xfs_ifork    *ifp)
0855 {
0856     ifp->if_height--;
0857     kmem_free(ifp->if_u1.if_root);
0858     ifp->if_u1.if_root = NULL;
0859 }
0860 
0861 void
0862 xfs_iext_remove(
0863     struct xfs_inode    *ip,
0864     struct xfs_iext_cursor  *cur,
0865     int         state)
0866 {
0867     struct xfs_ifork    *ifp = xfs_iext_state_to_fork(ip, state);
0868     struct xfs_iext_leaf    *leaf = cur->leaf;
0869     xfs_fileoff_t       offset = xfs_iext_leaf_key(leaf, 0);
0870     int         i, nr_entries;
0871 
0872     trace_xfs_iext_remove(ip, cur, state, _RET_IP_);
0873 
0874     ASSERT(ifp->if_height > 0);
0875     ASSERT(ifp->if_u1.if_root != NULL);
0876     ASSERT(xfs_iext_valid(ifp, cur));
0877 
0878     xfs_iext_inc_seq(ifp);
0879 
0880     nr_entries = xfs_iext_leaf_nr_entries(ifp, leaf, cur->pos) - 1;
0881     for (i = cur->pos; i < nr_entries; i++)
0882         leaf->recs[i] = leaf->recs[i + 1];
0883     xfs_iext_rec_clear(&leaf->recs[nr_entries]);
0884     ifp->if_bytes -= sizeof(struct xfs_iext_rec);
0885 
0886     if (cur->pos == 0 && nr_entries > 0) {
0887         xfs_iext_update_node(ifp, offset, xfs_iext_leaf_key(leaf, 0), 1,
0888                 leaf);
0889         offset = xfs_iext_leaf_key(leaf, 0);
0890     } else if (cur->pos == nr_entries) {
0891         if (ifp->if_height > 1 && leaf->next)
0892             cur->leaf = leaf->next;
0893         else
0894             cur->leaf = NULL;
0895         cur->pos = 0;
0896     }
0897 
0898     if (nr_entries >= RECS_PER_LEAF / 2)
0899         return;
0900 
0901     if (ifp->if_height > 1)
0902         xfs_iext_rebalance_leaf(ifp, cur, leaf, offset, nr_entries);
0903     else if (nr_entries == 0)
0904         xfs_iext_free_last_leaf(ifp);
0905 }
0906 
0907 /*
0908  * Lookup the extent covering bno.
0909  *
0910  * If there is an extent covering bno return the extent index, and store the
0911  * expanded extent structure in *gotp, and the extent cursor in *cur.
0912  * If there is no extent covering bno, but there is an extent after it (e.g.
0913  * it lies in a hole) return that extent in *gotp and its cursor in *cur
0914  * instead.
0915  * If bno is beyond the last extent return false, and return an invalid
0916  * cursor value.
0917  */
0918 bool
0919 xfs_iext_lookup_extent(
0920     struct xfs_inode    *ip,
0921     struct xfs_ifork    *ifp,
0922     xfs_fileoff_t       offset,
0923     struct xfs_iext_cursor  *cur,
0924     struct xfs_bmbt_irec    *gotp)
0925 {
0926     XFS_STATS_INC(ip->i_mount, xs_look_exlist);
0927 
0928     cur->leaf = xfs_iext_find_level(ifp, offset, 1);
0929     if (!cur->leaf) {
0930         cur->pos = 0;
0931         return false;
0932     }
0933 
0934     for (cur->pos = 0; cur->pos < xfs_iext_max_recs(ifp); cur->pos++) {
0935         struct xfs_iext_rec *rec = cur_rec(cur);
0936 
0937         if (xfs_iext_rec_is_empty(rec))
0938             break;
0939         if (xfs_iext_rec_cmp(rec, offset) >= 0)
0940             goto found;
0941     }
0942 
0943     /* Try looking in the next node for an entry > offset */
0944     if (ifp->if_height == 1 || !cur->leaf->next)
0945         return false;
0946     cur->leaf = cur->leaf->next;
0947     cur->pos = 0;
0948     if (!xfs_iext_valid(ifp, cur))
0949         return false;
0950 found:
0951     xfs_iext_get(gotp, cur_rec(cur));
0952     return true;
0953 }
0954 
0955 /*
0956  * Returns the last extent before end, and if this extent doesn't cover
0957  * end, update end to the end of the extent.
0958  */
0959 bool
0960 xfs_iext_lookup_extent_before(
0961     struct xfs_inode    *ip,
0962     struct xfs_ifork    *ifp,
0963     xfs_fileoff_t       *end,
0964     struct xfs_iext_cursor  *cur,
0965     struct xfs_bmbt_irec    *gotp)
0966 {
0967     /* could be optimized to not even look up the next on a match.. */
0968     if (xfs_iext_lookup_extent(ip, ifp, *end - 1, cur, gotp) &&
0969         gotp->br_startoff <= *end - 1)
0970         return true;
0971     if (!xfs_iext_prev_extent(ifp, cur, gotp))
0972         return false;
0973     *end = gotp->br_startoff + gotp->br_blockcount;
0974     return true;
0975 }
0976 
0977 void
0978 xfs_iext_update_extent(
0979     struct xfs_inode    *ip,
0980     int         state,
0981     struct xfs_iext_cursor  *cur,
0982     struct xfs_bmbt_irec    *new)
0983 {
0984     struct xfs_ifork    *ifp = xfs_iext_state_to_fork(ip, state);
0985 
0986     xfs_iext_inc_seq(ifp);
0987 
0988     if (cur->pos == 0) {
0989         struct xfs_bmbt_irec    old;
0990 
0991         xfs_iext_get(&old, cur_rec(cur));
0992         if (new->br_startoff != old.br_startoff) {
0993             xfs_iext_update_node(ifp, old.br_startoff,
0994                     new->br_startoff, 1, cur->leaf);
0995         }
0996     }
0997 
0998     trace_xfs_bmap_pre_update(ip, cur, state, _RET_IP_);
0999     xfs_iext_set(cur_rec(cur), new);
1000     trace_xfs_bmap_post_update(ip, cur, state, _RET_IP_);
1001 }
1002 
1003 /*
1004  * Return true if the cursor points at an extent and return the extent structure
1005  * in gotp.  Else return false.
1006  */
1007 bool
1008 xfs_iext_get_extent(
1009     struct xfs_ifork    *ifp,
1010     struct xfs_iext_cursor  *cur,
1011     struct xfs_bmbt_irec    *gotp)
1012 {
1013     if (!xfs_iext_valid(ifp, cur))
1014         return false;
1015     xfs_iext_get(gotp, cur_rec(cur));
1016     return true;
1017 }
1018 
1019 /*
1020  * This is a recursive function, because of that we need to be extremely
1021  * careful with stack usage.
1022  */
1023 static void
1024 xfs_iext_destroy_node(
1025     struct xfs_iext_node    *node,
1026     int         level)
1027 {
1028     int         i;
1029 
1030     if (level > 1) {
1031         for (i = 0; i < KEYS_PER_NODE; i++) {
1032             if (node->keys[i] == XFS_IEXT_KEY_INVALID)
1033                 break;
1034             xfs_iext_destroy_node(node->ptrs[i], level - 1);
1035         }
1036     }
1037 
1038     kmem_free(node);
1039 }
1040 
1041 void
1042 xfs_iext_destroy(
1043     struct xfs_ifork    *ifp)
1044 {
1045     xfs_iext_destroy_node(ifp->if_u1.if_root, ifp->if_height);
1046 
1047     ifp->if_bytes = 0;
1048     ifp->if_height = 0;
1049     ifp->if_u1.if_root = NULL;
1050 }