Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  *   Copyright (C) International Business Machines Corp., 2000-2005
0004  */
0005 /*
0006  *  jfs_xtree.c: extent allocation descriptor B+-tree manager
0007  */
0008 
0009 #include <linux/fs.h>
0010 #include <linux/module.h>
0011 #include <linux/quotaops.h>
0012 #include <linux/seq_file.h>
0013 #include "jfs_incore.h"
0014 #include "jfs_filsys.h"
0015 #include "jfs_metapage.h"
0016 #include "jfs_dmap.h"
0017 #include "jfs_dinode.h"
0018 #include "jfs_superblock.h"
0019 #include "jfs_debug.h"
0020 
0021 /*
0022  * xtree local flag
0023  */
0024 #define XT_INSERT   0x00000001
0025 
0026 /*
0027  *  xtree key/entry comparison: extent offset
0028  *
0029  * return:
0030  *  -1: k < start of extent
0031  *   0: start_of_extent <= k <= end_of_extent
0032  *   1: k > end_of_extent
0033  */
0034 #define XT_CMP(CMP, K, X, OFFSET64)\
0035 {\
0036     OFFSET64 = offsetXAD(X);\
0037     (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\
0038         ((K) < OFFSET64) ? -1 : 0;\
0039 }
0040 
0041 /* write a xad entry */
0042 #define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\
0043 {\
0044     (XAD)->flag = (FLAG);\
0045     XADoffset((XAD), (OFF));\
0046     XADlength((XAD), (LEN));\
0047     XADaddress((XAD), (ADDR));\
0048 }
0049 
0050 #define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot)
0051 
0052 /* get page buffer for specified block address */
0053 /* ToDo: Replace this ugly macro with a function */
0054 #define XT_GETPAGE(IP, BN, MP, SIZE, P, RC)             \
0055 do {                                    \
0056     BT_GETPAGE(IP, BN, MP, xtpage_t, SIZE, P, RC, i_xtroot);    \
0057     if (!(RC)) {                            \
0058         if ((le16_to_cpu((P)->header.nextindex) < XTENTRYSTART) || \
0059             (le16_to_cpu((P)->header.nextindex) >       \
0060              le16_to_cpu((P)->header.maxentry)) ||      \
0061             (le16_to_cpu((P)->header.maxentry) >        \
0062              (((BN) == 0) ? XTROOTMAXSLOT : PSIZE >> L2XTSLOTSIZE))) { \
0063             jfs_error((IP)->i_sb,               \
0064                   "XT_GETPAGE: xtree page corrupt\n");  \
0065             BT_PUTPAGE(MP);                 \
0066             MP = NULL;                  \
0067             RC = -EIO;                  \
0068         }                           \
0069     }                               \
0070 } while (0)
0071 
0072 /* for consistency */
0073 #define XT_PUTPAGE(MP) BT_PUTPAGE(MP)
0074 
0075 #define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \
0076     BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot)
0077 /* xtree entry parameter descriptor */
0078 struct xtsplit {
0079     struct metapage *mp;
0080     s16 index;
0081     u8 flag;
0082     s64 off;
0083     s64 addr;
0084     int len;
0085     struct pxdlist *pxdlist;
0086 };
0087 
0088 
0089 /*
0090  *  statistics
0091  */
0092 #ifdef CONFIG_JFS_STATISTICS
0093 static struct {
0094     uint search;
0095     uint fastSearch;
0096     uint split;
0097 } xtStat;
0098 #endif
0099 
0100 
0101 /*
0102  * forward references
0103  */
0104 static int xtSearch(struct inode *ip, s64 xoff, s64 *next, int *cmpp,
0105             struct btstack * btstack, int flag);
0106 
0107 static int xtSplitUp(tid_t tid,
0108              struct inode *ip,
0109              struct xtsplit * split, struct btstack * btstack);
0110 
0111 static int xtSplitPage(tid_t tid, struct inode *ip, struct xtsplit * split,
0112                struct metapage ** rmpp, s64 * rbnp);
0113 
0114 static int xtSplitRoot(tid_t tid, struct inode *ip,
0115                struct xtsplit * split, struct metapage ** rmpp);
0116 
0117 /*
0118  *  xtLookup()
0119  *
0120  * function: map a single page into a physical extent;
0121  */
0122 int xtLookup(struct inode *ip, s64 lstart,
0123          s64 llen, int *pflag, s64 * paddr, s32 * plen, int no_check)
0124 {
0125     int rc = 0;
0126     struct btstack btstack;
0127     int cmp;
0128     s64 bn;
0129     struct metapage *mp;
0130     xtpage_t *p;
0131     int index;
0132     xad_t *xad;
0133     s64 next, size, xoff, xend;
0134     int xlen;
0135     s64 xaddr;
0136 
0137     *paddr = 0;
0138     *plen = llen;
0139 
0140     if (!no_check) {
0141         /* is lookup offset beyond eof ? */
0142         size = ((u64) ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
0143             JFS_SBI(ip->i_sb)->l2bsize;
0144         if (lstart >= size)
0145             return 0;
0146     }
0147 
0148     /*
0149      * search for the xad entry covering the logical extent
0150      */
0151 //search:
0152     if ((rc = xtSearch(ip, lstart, &next, &cmp, &btstack, 0))) {
0153         jfs_err("xtLookup: xtSearch returned %d", rc);
0154         return rc;
0155     }
0156 
0157     /*
0158      *  compute the physical extent covering logical extent
0159      *
0160      * N.B. search may have failed (e.g., hole in sparse file),
0161      * and returned the index of the next entry.
0162      */
0163     /* retrieve search result */
0164     XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
0165 
0166     /* is xad found covering start of logical extent ?
0167      * lstart is a page start address,
0168      * i.e., lstart cannot start in a hole;
0169      */
0170     if (cmp) {
0171         if (next)
0172             *plen = min(next - lstart, llen);
0173         goto out;
0174     }
0175 
0176     /*
0177      * lxd covered by xad
0178      */
0179     xad = &p->xad[index];
0180     xoff = offsetXAD(xad);
0181     xlen = lengthXAD(xad);
0182     xend = xoff + xlen;
0183     xaddr = addressXAD(xad);
0184 
0185     /* initialize new pxd */
0186     *pflag = xad->flag;
0187     *paddr = xaddr + (lstart - xoff);
0188     /* a page must be fully covered by an xad */
0189     *plen = min(xend - lstart, llen);
0190 
0191       out:
0192     XT_PUTPAGE(mp);
0193 
0194     return rc;
0195 }
0196 
0197 /*
0198  *  xtSearch()
0199  *
0200  * function:    search for the xad entry covering specified offset.
0201  *
0202  * parameters:
0203  *  ip  - file object;
0204  *  xoff    - extent offset;
0205  *  nextp   - address of next extent (if any) for search miss
0206  *  cmpp    - comparison result:
0207  *  btstack - traverse stack;
0208  *  flag    - search process flag (XT_INSERT);
0209  *
0210  * returns:
0211  *  btstack contains (bn, index) of search path traversed to the entry.
0212  *  *cmpp is set to result of comparison with the entry returned.
0213  *  the page containing the entry is pinned at exit.
0214  */
0215 static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp,
0216             int *cmpp, struct btstack * btstack, int flag)
0217 {
0218     struct jfs_inode_info *jfs_ip = JFS_IP(ip);
0219     int rc = 0;
0220     int cmp = 1;        /* init for empty page */
0221     s64 bn;         /* block number */
0222     struct metapage *mp;    /* page buffer */
0223     xtpage_t *p;        /* page */
0224     xad_t *xad;
0225     int base, index, lim, btindex;
0226     struct btframe *btsp;
0227     int nsplit = 0;     /* number of pages to split */
0228     s64 t64;
0229     s64 next = 0;
0230 
0231     INCREMENT(xtStat.search);
0232 
0233     BT_CLR(btstack);
0234 
0235     btstack->nsplit = 0;
0236 
0237     /*
0238      *  search down tree from root:
0239      *
0240      * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
0241      * internal page, child page Pi contains entry with k, Ki <= K < Kj.
0242      *
0243      * if entry with search key K is not found
0244      * internal page search find the entry with largest key Ki
0245      * less than K which point to the child page to search;
0246      * leaf page search find the entry with smallest key Kj
0247      * greater than K so that the returned index is the position of
0248      * the entry to be shifted right for insertion of new entry.
0249      * for empty tree, search key is greater than any key of the tree.
0250      *
0251      * by convention, root bn = 0.
0252      */
0253     for (bn = 0;;) {
0254         /* get/pin the page to search */
0255         XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
0256         if (rc)
0257             return rc;
0258 
0259         /* try sequential access heuristics with the previous
0260          * access entry in target leaf page:
0261          * once search narrowed down into the target leaf,
0262          * key must either match an entry in the leaf or
0263          * key entry does not exist in the tree;
0264          */
0265 //fastSearch:
0266         if ((jfs_ip->btorder & BT_SEQUENTIAL) &&
0267             (p->header.flag & BT_LEAF) &&
0268             (index = jfs_ip->btindex) <
0269             le16_to_cpu(p->header.nextindex)) {
0270             xad = &p->xad[index];
0271             t64 = offsetXAD(xad);
0272             if (xoff < t64 + lengthXAD(xad)) {
0273                 if (xoff >= t64) {
0274                     *cmpp = 0;
0275                     goto out;
0276                 }
0277 
0278                 /* stop sequential access heuristics */
0279                 goto binarySearch;
0280             } else {    /* (t64 + lengthXAD(xad)) <= xoff */
0281 
0282                 /* try next sequential entry */
0283                 index++;
0284                 if (index <
0285                     le16_to_cpu(p->header.nextindex)) {
0286                     xad++;
0287                     t64 = offsetXAD(xad);
0288                     if (xoff < t64 + lengthXAD(xad)) {
0289                         if (xoff >= t64) {
0290                             *cmpp = 0;
0291                             goto out;
0292                         }
0293 
0294                         /* miss: key falls between
0295                          * previous and this entry
0296                          */
0297                         *cmpp = 1;
0298                         next = t64;
0299                         goto out;
0300                     }
0301 
0302                     /* (xoff >= t64 + lengthXAD(xad));
0303                      * matching entry may be further out:
0304                      * stop heuristic search
0305                      */
0306                     /* stop sequential access heuristics */
0307                     goto binarySearch;
0308                 }
0309 
0310                 /* (index == p->header.nextindex);
0311                  * miss: key entry does not exist in
0312                  * the target leaf/tree
0313                  */
0314                 *cmpp = 1;
0315                 goto out;
0316             }
0317 
0318             /*
0319              * if hit, return index of the entry found, and
0320              * if miss, where new entry with search key is
0321              * to be inserted;
0322              */
0323               out:
0324             /* compute number of pages to split */
0325             if (flag & XT_INSERT) {
0326                 if (p->header.nextindex ==  /* little-endian */
0327                     p->header.maxentry)
0328                     nsplit++;
0329                 else
0330                     nsplit = 0;
0331                 btstack->nsplit = nsplit;
0332             }
0333 
0334             /* save search result */
0335             btsp = btstack->top;
0336             btsp->bn = bn;
0337             btsp->index = index;
0338             btsp->mp = mp;
0339 
0340             /* update sequential access heuristics */
0341             jfs_ip->btindex = index;
0342 
0343             if (nextp)
0344                 *nextp = next;
0345 
0346             INCREMENT(xtStat.fastSearch);
0347             return 0;
0348         }
0349 
0350         /* well, ... full search now */
0351           binarySearch:
0352         lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
0353 
0354         /*
0355          * binary search with search key K on the current page
0356          */
0357         for (base = XTENTRYSTART; lim; lim >>= 1) {
0358             index = base + (lim >> 1);
0359 
0360             XT_CMP(cmp, xoff, &p->xad[index], t64);
0361             if (cmp == 0) {
0362                 /*
0363                  *  search hit
0364                  */
0365                 /* search hit - leaf page:
0366                  * return the entry found
0367                  */
0368                 if (p->header.flag & BT_LEAF) {
0369                     *cmpp = cmp;
0370 
0371                     /* compute number of pages to split */
0372                     if (flag & XT_INSERT) {
0373                         if (p->header.nextindex ==
0374                             p->header.maxentry)
0375                             nsplit++;
0376                         else
0377                             nsplit = 0;
0378                         btstack->nsplit = nsplit;
0379                     }
0380 
0381                     /* save search result */
0382                     btsp = btstack->top;
0383                     btsp->bn = bn;
0384                     btsp->index = index;
0385                     btsp->mp = mp;
0386 
0387                     /* init sequential access heuristics */
0388                     btindex = jfs_ip->btindex;
0389                     if (index == btindex ||
0390                         index == btindex + 1)
0391                         jfs_ip->btorder = BT_SEQUENTIAL;
0392                     else
0393                         jfs_ip->btorder = BT_RANDOM;
0394                     jfs_ip->btindex = index;
0395 
0396                     return 0;
0397                 }
0398                 /* search hit - internal page:
0399                  * descend/search its child page
0400                  */
0401                 if (index < le16_to_cpu(p->header.nextindex)-1)
0402                     next = offsetXAD(&p->xad[index + 1]);
0403                 goto next;
0404             }
0405 
0406             if (cmp > 0) {
0407                 base = index + 1;
0408                 --lim;
0409             }
0410         }
0411 
0412         /*
0413          *  search miss
0414          *
0415          * base is the smallest index with key (Kj) greater than
0416          * search key (K) and may be zero or maxentry index.
0417          */
0418         if (base < le16_to_cpu(p->header.nextindex))
0419             next = offsetXAD(&p->xad[base]);
0420         /*
0421          * search miss - leaf page:
0422          *
0423          * return location of entry (base) where new entry with
0424          * search key K is to be inserted.
0425          */
0426         if (p->header.flag & BT_LEAF) {
0427             *cmpp = cmp;
0428 
0429             /* compute number of pages to split */
0430             if (flag & XT_INSERT) {
0431                 if (p->header.nextindex ==
0432                     p->header.maxentry)
0433                     nsplit++;
0434                 else
0435                     nsplit = 0;
0436                 btstack->nsplit = nsplit;
0437             }
0438 
0439             /* save search result */
0440             btsp = btstack->top;
0441             btsp->bn = bn;
0442             btsp->index = base;
0443             btsp->mp = mp;
0444 
0445             /* init sequential access heuristics */
0446             btindex = jfs_ip->btindex;
0447             if (base == btindex || base == btindex + 1)
0448                 jfs_ip->btorder = BT_SEQUENTIAL;
0449             else
0450                 jfs_ip->btorder = BT_RANDOM;
0451             jfs_ip->btindex = base;
0452 
0453             if (nextp)
0454                 *nextp = next;
0455 
0456             return 0;
0457         }
0458 
0459         /*
0460          * search miss - non-leaf page:
0461          *
0462          * if base is non-zero, decrement base by one to get the parent
0463          * entry of the child page to search.
0464          */
0465         index = base ? base - 1 : base;
0466 
0467         /*
0468          * go down to child page
0469          */
0470           next:
0471         /* update number of pages to split */
0472         if (p->header.nextindex == p->header.maxentry)
0473             nsplit++;
0474         else
0475             nsplit = 0;
0476 
0477         /* push (bn, index) of the parent page/entry */
0478         if (BT_STACK_FULL(btstack)) {
0479             jfs_error(ip->i_sb, "stack overrun!\n");
0480             XT_PUTPAGE(mp);
0481             return -EIO;
0482         }
0483         BT_PUSH(btstack, bn, index);
0484 
0485         /* get the child page block number */
0486         bn = addressXAD(&p->xad[index]);
0487 
0488         /* unpin the parent page */
0489         XT_PUTPAGE(mp);
0490     }
0491 }
0492 
0493 /*
0494  *  xtInsert()
0495  *
0496  * function:
0497  *
0498  * parameter:
0499  *  tid - transaction id;
0500  *  ip  - file object;
0501  *  xflag   - extent flag (XAD_NOTRECORDED):
0502  *  xoff    - extent offset;
0503  *  xlen    - extent length;
0504  *  xaddrp  - extent address pointer (in/out):
0505  *      if (*xaddrp)
0506  *          caller allocated data extent at *xaddrp;
0507  *      else
0508  *          allocate data extent and return its xaddr;
0509  *  flag    -
0510  *
0511  * return:
0512  */
0513 int xtInsert(tid_t tid,     /* transaction id */
0514          struct inode *ip, int xflag, s64 xoff, s32 xlen, s64 * xaddrp,
0515          int flag)
0516 {
0517     int rc = 0;
0518     s64 xaddr, hint;
0519     struct metapage *mp;    /* meta-page buffer */
0520     xtpage_t *p;        /* base B+-tree index page */
0521     s64 bn;
0522     int index, nextindex;
0523     struct btstack btstack; /* traverse stack */
0524     struct xtsplit split;   /* split information */
0525     xad_t *xad;
0526     int cmp;
0527     s64 next;
0528     struct tlock *tlck;
0529     struct xtlock *xtlck;
0530 
0531     jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
0532 
0533     /*
0534      *  search for the entry location at which to insert:
0535      *
0536      * xtFastSearch() and xtSearch() both returns (leaf page
0537      * pinned, index at which to insert).
0538      * n.b. xtSearch() may return index of maxentry of
0539      * the full page.
0540      */
0541     if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT)))
0542         return rc;
0543 
0544     /* retrieve search result */
0545     XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
0546 
0547     /* This test must follow XT_GETSEARCH since mp must be valid if
0548      * we branch to out: */
0549     if ((cmp == 0) || (next && (xlen > next - xoff))) {
0550         rc = -EEXIST;
0551         goto out;
0552     }
0553 
0554     /*
0555      * allocate data extent requested
0556      *
0557      * allocation hint: last xad
0558      */
0559     if ((xaddr = *xaddrp) == 0) {
0560         if (index > XTENTRYSTART) {
0561             xad = &p->xad[index - 1];
0562             hint = addressXAD(xad) + lengthXAD(xad) - 1;
0563         } else
0564             hint = 0;
0565         if ((rc = dquot_alloc_block(ip, xlen)))
0566             goto out;
0567         if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) {
0568             dquot_free_block(ip, xlen);
0569             goto out;
0570         }
0571     }
0572 
0573     /*
0574      *  insert entry for new extent
0575      */
0576     xflag |= XAD_NEW;
0577 
0578     /*
0579      *  if the leaf page is full, split the page and
0580      *  propagate up the router entry for the new page from split
0581      *
0582      * The xtSplitUp() will insert the entry and unpin the leaf page.
0583      */
0584     nextindex = le16_to_cpu(p->header.nextindex);
0585     if (nextindex == le16_to_cpu(p->header.maxentry)) {
0586         split.mp = mp;
0587         split.index = index;
0588         split.flag = xflag;
0589         split.off = xoff;
0590         split.len = xlen;
0591         split.addr = xaddr;
0592         split.pxdlist = NULL;
0593         if ((rc = xtSplitUp(tid, ip, &split, &btstack))) {
0594             /* undo data extent allocation */
0595             if (*xaddrp == 0) {
0596                 dbFree(ip, xaddr, (s64) xlen);
0597                 dquot_free_block(ip, xlen);
0598             }
0599             return rc;
0600         }
0601 
0602         *xaddrp = xaddr;
0603         return 0;
0604     }
0605 
0606     /*
0607      *  insert the new entry into the leaf page
0608      */
0609     /*
0610      * acquire a transaction lock on the leaf page;
0611      *
0612      * action: xad insertion/extension;
0613      */
0614     BT_MARK_DIRTY(mp, ip);
0615 
0616     /* if insert into middle, shift right remaining entries. */
0617     if (index < nextindex)
0618         memmove(&p->xad[index + 1], &p->xad[index],
0619             (nextindex - index) * sizeof(xad_t));
0620 
0621     /* insert the new entry: mark the entry NEW */
0622     xad = &p->xad[index];
0623     XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
0624 
0625     /* advance next available entry index */
0626     le16_add_cpu(&p->header.nextindex, 1);
0627 
0628     /* Don't log it if there are no links to the file */
0629     if (!test_cflag(COMMIT_Nolink, ip)) {
0630         tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
0631         xtlck = (struct xtlock *) & tlck->lock;
0632         xtlck->lwm.offset =
0633             (xtlck->lwm.offset) ? min(index,
0634                           (int)xtlck->lwm.offset) : index;
0635         xtlck->lwm.length =
0636             le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
0637     }
0638 
0639     *xaddrp = xaddr;
0640 
0641       out:
0642     /* unpin the leaf page */
0643     XT_PUTPAGE(mp);
0644 
0645     return rc;
0646 }
0647 
0648 
0649 /*
0650  *  xtSplitUp()
0651  *
0652  * function:
0653  *  split full pages as propagating insertion up the tree
0654  *
0655  * parameter:
0656  *  tid - transaction id;
0657  *  ip  - file object;
0658  *  split   - entry parameter descriptor;
0659  *  btstack - traverse stack from xtSearch()
0660  *
0661  * return:
0662  */
0663 static int
0664 xtSplitUp(tid_t tid,
0665       struct inode *ip, struct xtsplit * split, struct btstack * btstack)
0666 {
0667     int rc = 0;
0668     struct metapage *smp;
0669     xtpage_t *sp;       /* split page */
0670     struct metapage *rmp;
0671     s64 rbn;        /* new right page block number */
0672     struct metapage *rcmp;
0673     xtpage_t *rcp;      /* right child page */
0674     s64 rcbn;       /* right child page block number */
0675     int skip;       /* index of entry of insertion */
0676     int nextindex;      /* next available entry index of p */
0677     struct btframe *parent; /* parent page entry on traverse stack */
0678     xad_t *xad;
0679     s64 xaddr;
0680     int xlen;
0681     int nsplit;     /* number of pages split */
0682     struct pxdlist pxdlist;
0683     pxd_t *pxd;
0684     struct tlock *tlck;
0685     struct xtlock *xtlck;
0686 
0687     smp = split->mp;
0688     sp = XT_PAGE(ip, smp);
0689 
0690     /* is inode xtree root extension/inline EA area free ? */
0691     if ((sp->header.flag & BT_ROOT) && (!S_ISDIR(ip->i_mode)) &&
0692         (le16_to_cpu(sp->header.maxentry) < XTROOTMAXSLOT) &&
0693         (JFS_IP(ip)->mode2 & INLINEEA)) {
0694         sp->header.maxentry = cpu_to_le16(XTROOTMAXSLOT);
0695         JFS_IP(ip)->mode2 &= ~INLINEEA;
0696 
0697         BT_MARK_DIRTY(smp, ip);
0698         /*
0699          * acquire a transaction lock on the leaf page;
0700          *
0701          * action: xad insertion/extension;
0702          */
0703 
0704         /* if insert into middle, shift right remaining entries. */
0705         skip = split->index;
0706         nextindex = le16_to_cpu(sp->header.nextindex);
0707         if (skip < nextindex)
0708             memmove(&sp->xad[skip + 1], &sp->xad[skip],
0709                 (nextindex - skip) * sizeof(xad_t));
0710 
0711         /* insert the new entry: mark the entry NEW */
0712         xad = &sp->xad[skip];
0713         XT_PUTENTRY(xad, split->flag, split->off, split->len,
0714                 split->addr);
0715 
0716         /* advance next available entry index */
0717         le16_add_cpu(&sp->header.nextindex, 1);
0718 
0719         /* Don't log it if there are no links to the file */
0720         if (!test_cflag(COMMIT_Nolink, ip)) {
0721             tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW);
0722             xtlck = (struct xtlock *) & tlck->lock;
0723             xtlck->lwm.offset = (xtlck->lwm.offset) ?
0724                 min(skip, (int)xtlck->lwm.offset) : skip;
0725             xtlck->lwm.length =
0726                 le16_to_cpu(sp->header.nextindex) -
0727                 xtlck->lwm.offset;
0728         }
0729 
0730         return 0;
0731     }
0732 
0733     /*
0734      * allocate new index blocks to cover index page split(s)
0735      *
0736      * allocation hint: ?
0737      */
0738     if (split->pxdlist == NULL) {
0739         nsplit = btstack->nsplit;
0740         split->pxdlist = &pxdlist;
0741         pxdlist.maxnpxd = pxdlist.npxd = 0;
0742         pxd = &pxdlist.pxd[0];
0743         xlen = JFS_SBI(ip->i_sb)->nbperpage;
0744         for (; nsplit > 0; nsplit--, pxd++) {
0745             if ((rc = dbAlloc(ip, (s64) 0, (s64) xlen, &xaddr))
0746                 == 0) {
0747                 PXDaddress(pxd, xaddr);
0748                 PXDlength(pxd, xlen);
0749 
0750                 pxdlist.maxnpxd++;
0751 
0752                 continue;
0753             }
0754 
0755             /* undo allocation */
0756 
0757             XT_PUTPAGE(smp);
0758             return rc;
0759         }
0760     }
0761 
0762     /*
0763      * Split leaf page <sp> into <sp> and a new right page <rp>.
0764      *
0765      * The split routines insert the new entry into the leaf page,
0766      * and acquire txLock as appropriate.
0767      * return <rp> pinned and its block number <rpbn>.
0768      */
0769     rc = (sp->header.flag & BT_ROOT) ?
0770         xtSplitRoot(tid, ip, split, &rmp) :
0771         xtSplitPage(tid, ip, split, &rmp, &rbn);
0772 
0773     XT_PUTPAGE(smp);
0774 
0775     if (rc)
0776         return -EIO;
0777     /*
0778      * propagate up the router entry for the leaf page just split
0779      *
0780      * insert a router entry for the new page into the parent page,
0781      * propagate the insert/split up the tree by walking back the stack
0782      * of (bn of parent page, index of child page entry in parent page)
0783      * that were traversed during the search for the page that split.
0784      *
0785      * the propagation of insert/split up the tree stops if the root
0786      * splits or the page inserted into doesn't have to split to hold
0787      * the new entry.
0788      *
0789      * the parent entry for the split page remains the same, and
0790      * a new entry is inserted at its right with the first key and
0791      * block number of the new right page.
0792      *
0793      * There are a maximum of 3 pages pinned at any time:
0794      * right child, left parent and right parent (when the parent splits)
0795      * to keep the child page pinned while working on the parent.
0796      * make sure that all pins are released at exit.
0797      */
0798     while ((parent = BT_POP(btstack)) != NULL) {
0799         /* parent page specified by stack frame <parent> */
0800 
0801         /* keep current child pages <rcp> pinned */
0802         rcmp = rmp;
0803         rcbn = rbn;
0804         rcp = XT_PAGE(ip, rcmp);
0805 
0806         /*
0807          * insert router entry in parent for new right child page <rp>
0808          */
0809         /* get/pin the parent page <sp> */
0810         XT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc);
0811         if (rc) {
0812             XT_PUTPAGE(rcmp);
0813             return rc;
0814         }
0815 
0816         /*
0817          * The new key entry goes ONE AFTER the index of parent entry,
0818          * because the split was to the right.
0819          */
0820         skip = parent->index + 1;
0821 
0822         /*
0823          * split or shift right remaining entries of the parent page
0824          */
0825         nextindex = le16_to_cpu(sp->header.nextindex);
0826         /*
0827          * parent page is full - split the parent page
0828          */
0829         if (nextindex == le16_to_cpu(sp->header.maxentry)) {
0830             /* init for parent page split */
0831             split->mp = smp;
0832             split->index = skip;    /* index at insert */
0833             split->flag = XAD_NEW;
0834             split->off = offsetXAD(&rcp->xad[XTENTRYSTART]);
0835             split->len = JFS_SBI(ip->i_sb)->nbperpage;
0836             split->addr = rcbn;
0837 
0838             /* unpin previous right child page */
0839             XT_PUTPAGE(rcmp);
0840 
0841             /* The split routines insert the new entry,
0842              * and acquire txLock as appropriate.
0843              * return <rp> pinned and its block number <rpbn>.
0844              */
0845             rc = (sp->header.flag & BT_ROOT) ?
0846                 xtSplitRoot(tid, ip, split, &rmp) :
0847                 xtSplitPage(tid, ip, split, &rmp, &rbn);
0848             if (rc) {
0849                 XT_PUTPAGE(smp);
0850                 return rc;
0851             }
0852 
0853             XT_PUTPAGE(smp);
0854             /* keep new child page <rp> pinned */
0855         }
0856         /*
0857          * parent page is not full - insert in parent page
0858          */
0859         else {
0860             /*
0861              * insert router entry in parent for the right child
0862              * page from the first entry of the right child page:
0863              */
0864             /*
0865              * acquire a transaction lock on the parent page;
0866              *
0867              * action: router xad insertion;
0868              */
0869             BT_MARK_DIRTY(smp, ip);
0870 
0871             /*
0872              * if insert into middle, shift right remaining entries
0873              */
0874             if (skip < nextindex)
0875                 memmove(&sp->xad[skip + 1], &sp->xad[skip],
0876                     (nextindex -
0877                      skip) << L2XTSLOTSIZE);
0878 
0879             /* insert the router entry */
0880             xad = &sp->xad[skip];
0881             XT_PUTENTRY(xad, XAD_NEW,
0882                     offsetXAD(&rcp->xad[XTENTRYSTART]),
0883                     JFS_SBI(ip->i_sb)->nbperpage, rcbn);
0884 
0885             /* advance next available entry index. */
0886             le16_add_cpu(&sp->header.nextindex, 1);
0887 
0888             /* Don't log it if there are no links to the file */
0889             if (!test_cflag(COMMIT_Nolink, ip)) {
0890                 tlck = txLock(tid, ip, smp,
0891                           tlckXTREE | tlckGROW);
0892                 xtlck = (struct xtlock *) & tlck->lock;
0893                 xtlck->lwm.offset = (xtlck->lwm.offset) ?
0894                     min(skip, (int)xtlck->lwm.offset) : skip;
0895                 xtlck->lwm.length =
0896                     le16_to_cpu(sp->header.nextindex) -
0897                     xtlck->lwm.offset;
0898             }
0899 
0900             /* unpin parent page */
0901             XT_PUTPAGE(smp);
0902 
0903             /* exit propagate up */
0904             break;
0905         }
0906     }
0907 
0908     /* unpin current right page */
0909     XT_PUTPAGE(rmp);
0910 
0911     return 0;
0912 }
0913 
0914 
0915 /*
0916  *  xtSplitPage()
0917  *
0918  * function:
0919  *  split a full non-root page into
0920  *  original/split/left page and new right page
0921  *  i.e., the original/split page remains as left page.
0922  *
0923  * parameter:
0924  *  int     tid,
0925  *  struct inode    *ip,
0926  *  struct xtsplit  *split,
0927  *  struct metapage **rmpp,
0928  *  u64     *rbnp,
0929  *
0930  * return:
0931  *  Pointer to page in which to insert or NULL on error.
0932  */
0933 static int
0934 xtSplitPage(tid_t tid, struct inode *ip,
0935         struct xtsplit * split, struct metapage ** rmpp, s64 * rbnp)
0936 {
0937     int rc = 0;
0938     struct metapage *smp;
0939     xtpage_t *sp;
0940     struct metapage *rmp;
0941     xtpage_t *rp;       /* new right page allocated */
0942     s64 rbn;        /* new right page block number */
0943     struct metapage *mp;
0944     xtpage_t *p;
0945     s64 nextbn;
0946     int skip, maxentry, middle, righthalf, n;
0947     xad_t *xad;
0948     struct pxdlist *pxdlist;
0949     pxd_t *pxd;
0950     struct tlock *tlck;
0951     struct xtlock *sxtlck = NULL, *rxtlck = NULL;
0952     int quota_allocation = 0;
0953 
0954     smp = split->mp;
0955     sp = XT_PAGE(ip, smp);
0956 
0957     INCREMENT(xtStat.split);
0958 
0959     pxdlist = split->pxdlist;
0960     pxd = &pxdlist->pxd[pxdlist->npxd];
0961     pxdlist->npxd++;
0962     rbn = addressPXD(pxd);
0963 
0964     /* Allocate blocks to quota. */
0965     rc = dquot_alloc_block(ip, lengthPXD(pxd));
0966     if (rc)
0967         goto clean_up;
0968 
0969     quota_allocation += lengthPXD(pxd);
0970 
0971     /*
0972      * allocate the new right page for the split
0973      */
0974     rmp = get_metapage(ip, rbn, PSIZE, 1);
0975     if (rmp == NULL) {
0976         rc = -EIO;
0977         goto clean_up;
0978     }
0979 
0980     jfs_info("xtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp);
0981 
0982     BT_MARK_DIRTY(rmp, ip);
0983     /*
0984      * action: new page;
0985      */
0986 
0987     rp = (xtpage_t *) rmp->data;
0988     rp->header.self = *pxd;
0989     rp->header.flag = sp->header.flag & BT_TYPE;
0990     rp->header.maxentry = sp->header.maxentry;  /* little-endian */
0991     rp->header.nextindex = cpu_to_le16(XTENTRYSTART);
0992 
0993     BT_MARK_DIRTY(smp, ip);
0994     /* Don't log it if there are no links to the file */
0995     if (!test_cflag(COMMIT_Nolink, ip)) {
0996         /*
0997          * acquire a transaction lock on the new right page;
0998          */
0999         tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW);
1000         rxtlck = (struct xtlock *) & tlck->lock;
1001         rxtlck->lwm.offset = XTENTRYSTART;
1002         /*
1003          * acquire a transaction lock on the split page
1004          */
1005         tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW);
1006         sxtlck = (struct xtlock *) & tlck->lock;
1007     }
1008 
1009     /*
1010      * initialize/update sibling pointers of <sp> and <rp>
1011      */
1012     nextbn = le64_to_cpu(sp->header.next);
1013     rp->header.next = cpu_to_le64(nextbn);
1014     rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self));
1015     sp->header.next = cpu_to_le64(rbn);
1016 
1017     skip = split->index;
1018 
1019     /*
1020      *  sequential append at tail (after last entry of last page)
1021      *
1022      * if splitting the last page on a level because of appending
1023      * a entry to it (skip is maxentry), it's likely that the access is
1024      * sequential. adding an empty page on the side of the level is less
1025      * work and can push the fill factor much higher than normal.
1026      * if we're wrong it's no big deal -  we will do the split the right
1027      * way next time.
1028      * (it may look like it's equally easy to do a similar hack for
1029      * reverse sorted data, that is, split the tree left, but it's not.
1030      * Be my guest.)
1031      */
1032     if (nextbn == 0 && skip == le16_to_cpu(sp->header.maxentry)) {
1033         /*
1034          * acquire a transaction lock on the new/right page;
1035          *
1036          * action: xad insertion;
1037          */
1038         /* insert entry at the first entry of the new right page */
1039         xad = &rp->xad[XTENTRYSTART];
1040         XT_PUTENTRY(xad, split->flag, split->off, split->len,
1041                 split->addr);
1042 
1043         rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1);
1044 
1045         if (!test_cflag(COMMIT_Nolink, ip)) {
1046             /* rxtlck->lwm.offset = XTENTRYSTART; */
1047             rxtlck->lwm.length = 1;
1048         }
1049 
1050         *rmpp = rmp;
1051         *rbnp = rbn;
1052 
1053         jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp);
1054         return 0;
1055     }
1056 
1057     /*
1058      *  non-sequential insert (at possibly middle page)
1059      */
1060 
1061     /*
1062      * update previous pointer of old next/right page of <sp>
1063      */
1064     if (nextbn != 0) {
1065         XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
1066         if (rc) {
1067             XT_PUTPAGE(rmp);
1068             goto clean_up;
1069         }
1070 
1071         BT_MARK_DIRTY(mp, ip);
1072         /*
1073          * acquire a transaction lock on the next page;
1074          *
1075          * action:sibling pointer update;
1076          */
1077         if (!test_cflag(COMMIT_Nolink, ip))
1078             tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK);
1079 
1080         p->header.prev = cpu_to_le64(rbn);
1081 
1082         /* sibling page may have been updated previously, or
1083          * it may be updated later;
1084          */
1085 
1086         XT_PUTPAGE(mp);
1087     }
1088 
1089     /*
1090      * split the data between the split and new/right pages
1091      */
1092     maxentry = le16_to_cpu(sp->header.maxentry);
1093     middle = maxentry >> 1;
1094     righthalf = maxentry - middle;
1095 
1096     /*
1097      * skip index in old split/left page - insert into left page:
1098      */
1099     if (skip <= middle) {
1100         /* move right half of split page to the new right page */
1101         memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle],
1102             righthalf << L2XTSLOTSIZE);
1103 
1104         /* shift right tail of left half to make room for new entry */
1105         if (skip < middle)
1106             memmove(&sp->xad[skip + 1], &sp->xad[skip],
1107                 (middle - skip) << L2XTSLOTSIZE);
1108 
1109         /* insert new entry */
1110         xad = &sp->xad[skip];
1111         XT_PUTENTRY(xad, split->flag, split->off, split->len,
1112                 split->addr);
1113 
1114         /* update page header */
1115         sp->header.nextindex = cpu_to_le16(middle + 1);
1116         if (!test_cflag(COMMIT_Nolink, ip)) {
1117             sxtlck->lwm.offset = (sxtlck->lwm.offset) ?
1118                 min(skip, (int)sxtlck->lwm.offset) : skip;
1119         }
1120 
1121         rp->header.nextindex =
1122             cpu_to_le16(XTENTRYSTART + righthalf);
1123     }
1124     /*
1125      * skip index in new right page - insert into right page:
1126      */
1127     else {
1128         /* move left head of right half to right page */
1129         n = skip - middle;
1130         memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle],
1131             n << L2XTSLOTSIZE);
1132 
1133         /* insert new entry */
1134         n += XTENTRYSTART;
1135         xad = &rp->xad[n];
1136         XT_PUTENTRY(xad, split->flag, split->off, split->len,
1137                 split->addr);
1138 
1139         /* move right tail of right half to right page */
1140         if (skip < maxentry)
1141             memmove(&rp->xad[n + 1], &sp->xad[skip],
1142                 (maxentry - skip) << L2XTSLOTSIZE);
1143 
1144         /* update page header */
1145         sp->header.nextindex = cpu_to_le16(middle);
1146         if (!test_cflag(COMMIT_Nolink, ip)) {
1147             sxtlck->lwm.offset = (sxtlck->lwm.offset) ?
1148                 min(middle, (int)sxtlck->lwm.offset) : middle;
1149         }
1150 
1151         rp->header.nextindex = cpu_to_le16(XTENTRYSTART +
1152                            righthalf + 1);
1153     }
1154 
1155     if (!test_cflag(COMMIT_Nolink, ip)) {
1156         sxtlck->lwm.length = le16_to_cpu(sp->header.nextindex) -
1157             sxtlck->lwm.offset;
1158 
1159         /* rxtlck->lwm.offset = XTENTRYSTART; */
1160         rxtlck->lwm.length = le16_to_cpu(rp->header.nextindex) -
1161             XTENTRYSTART;
1162     }
1163 
1164     *rmpp = rmp;
1165     *rbnp = rbn;
1166 
1167     jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp);
1168     return rc;
1169 
1170       clean_up:
1171 
1172     /* Rollback quota allocation. */
1173     if (quota_allocation)
1174         dquot_free_block(ip, quota_allocation);
1175 
1176     return (rc);
1177 }
1178 
1179 
1180 /*
1181  *  xtSplitRoot()
1182  *
1183  * function:
1184  *  split the full root page into original/root/split page and new
1185  *  right page
1186  *  i.e., root remains fixed in tree anchor (inode) and the root is
1187  *  copied to a single new right child page since root page <<
1188  *  non-root page, and the split root page contains a single entry
1189  *  for the new right child page.
1190  *
1191  * parameter:
1192  *  int     tid,
1193  *  struct inode    *ip,
1194  *  struct xtsplit  *split,
1195  *  struct metapage **rmpp)
1196  *
1197  * return:
1198  *  Pointer to page in which to insert or NULL on error.
1199  */
1200 static int
1201 xtSplitRoot(tid_t tid,
1202         struct inode *ip, struct xtsplit * split, struct metapage ** rmpp)
1203 {
1204     xtpage_t *sp;
1205     struct metapage *rmp;
1206     xtpage_t *rp;
1207     s64 rbn;
1208     int skip, nextindex;
1209     xad_t *xad;
1210     pxd_t *pxd;
1211     struct pxdlist *pxdlist;
1212     struct tlock *tlck;
1213     struct xtlock *xtlck;
1214     int rc;
1215 
1216     sp = &JFS_IP(ip)->i_xtroot;
1217 
1218     INCREMENT(xtStat.split);
1219 
1220     /*
1221      *  allocate a single (right) child page
1222      */
1223     pxdlist = split->pxdlist;
1224     pxd = &pxdlist->pxd[pxdlist->npxd];
1225     pxdlist->npxd++;
1226     rbn = addressPXD(pxd);
1227     rmp = get_metapage(ip, rbn, PSIZE, 1);
1228     if (rmp == NULL)
1229         return -EIO;
1230 
1231     /* Allocate blocks to quota. */
1232     rc = dquot_alloc_block(ip, lengthPXD(pxd));
1233     if (rc) {
1234         release_metapage(rmp);
1235         return rc;
1236     }
1237 
1238     jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip, rmp);
1239 
1240     /*
1241      * acquire a transaction lock on the new right page;
1242      *
1243      * action: new page;
1244      */
1245     BT_MARK_DIRTY(rmp, ip);
1246 
1247     rp = (xtpage_t *) rmp->data;
1248     rp->header.flag =
1249         (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL;
1250     rp->header.self = *pxd;
1251     rp->header.nextindex = cpu_to_le16(XTENTRYSTART);
1252     rp->header.maxentry = cpu_to_le16(PSIZE >> L2XTSLOTSIZE);
1253 
1254     /* initialize sibling pointers */
1255     rp->header.next = 0;
1256     rp->header.prev = 0;
1257 
1258     /*
1259      * copy the in-line root page into new right page extent
1260      */
1261     nextindex = le16_to_cpu(sp->header.maxentry);
1262     memmove(&rp->xad[XTENTRYSTART], &sp->xad[XTENTRYSTART],
1263         (nextindex - XTENTRYSTART) << L2XTSLOTSIZE);
1264 
1265     /*
1266      * insert the new entry into the new right/child page
1267      * (skip index in the new right page will not change)
1268      */
1269     skip = split->index;
1270     /* if insert into middle, shift right remaining entries */
1271     if (skip != nextindex)
1272         memmove(&rp->xad[skip + 1], &rp->xad[skip],
1273             (nextindex - skip) * sizeof(xad_t));
1274 
1275     xad = &rp->xad[skip];
1276     XT_PUTENTRY(xad, split->flag, split->off, split->len, split->addr);
1277 
1278     /* update page header */
1279     rp->header.nextindex = cpu_to_le16(nextindex + 1);
1280 
1281     if (!test_cflag(COMMIT_Nolink, ip)) {
1282         tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW);
1283         xtlck = (struct xtlock *) & tlck->lock;
1284         xtlck->lwm.offset = XTENTRYSTART;
1285         xtlck->lwm.length = le16_to_cpu(rp->header.nextindex) -
1286             XTENTRYSTART;
1287     }
1288 
1289     /*
1290      *  reset the root
1291      *
1292      * init root with the single entry for the new right page
1293      * set the 1st entry offset to 0, which force the left-most key
1294      * at any level of the tree to be less than any search key.
1295      */
1296     /*
1297      * acquire a transaction lock on the root page (in-memory inode);
1298      *
1299      * action: root split;
1300      */
1301     BT_MARK_DIRTY(split->mp, ip);
1302 
1303     xad = &sp->xad[XTENTRYSTART];
1304     XT_PUTENTRY(xad, XAD_NEW, 0, JFS_SBI(ip->i_sb)->nbperpage, rbn);
1305 
1306     /* update page header of root */
1307     sp->header.flag &= ~BT_LEAF;
1308     sp->header.flag |= BT_INTERNAL;
1309 
1310     sp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1);
1311 
1312     if (!test_cflag(COMMIT_Nolink, ip)) {
1313         tlck = txLock(tid, ip, split->mp, tlckXTREE | tlckGROW);
1314         xtlck = (struct xtlock *) & tlck->lock;
1315         xtlck->lwm.offset = XTENTRYSTART;
1316         xtlck->lwm.length = 1;
1317     }
1318 
1319     *rmpp = rmp;
1320 
1321     jfs_info("xtSplitRoot: sp:0x%p rp:0x%p", sp, rp);
1322     return 0;
1323 }
1324 
1325 
1326 /*
1327  *  xtExtend()
1328  *
1329  * function: extend in-place;
1330  *
1331  * note: existing extent may or may not have been committed.
1332  * caller is responsible for pager buffer cache update, and
1333  * working block allocation map update;
1334  * update pmap: alloc whole extended extent;
1335  */
1336 int xtExtend(tid_t tid,     /* transaction id */
1337          struct inode *ip, s64 xoff,    /* delta extent offset */
1338          s32 xlen,      /* delta extent length */
1339          int flag)
1340 {
1341     int rc = 0;
1342     int cmp;
1343     struct metapage *mp;    /* meta-page buffer */
1344     xtpage_t *p;        /* base B+-tree index page */
1345     s64 bn;
1346     int index, nextindex, len;
1347     struct btstack btstack; /* traverse stack */
1348     struct xtsplit split;   /* split information */
1349     xad_t *xad;
1350     s64 xaddr;
1351     struct tlock *tlck;
1352     struct xtlock *xtlck = NULL;
1353 
1354     jfs_info("xtExtend: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
1355 
1356     /* there must exist extent to be extended */
1357     if ((rc = xtSearch(ip, xoff - 1, NULL, &cmp, &btstack, XT_INSERT)))
1358         return rc;
1359 
1360     /* retrieve search result */
1361     XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
1362 
1363     if (cmp != 0) {
1364         XT_PUTPAGE(mp);
1365         jfs_error(ip->i_sb, "xtSearch did not find extent\n");
1366         return -EIO;
1367     }
1368 
1369     /* extension must be contiguous */
1370     xad = &p->xad[index];
1371     if ((offsetXAD(xad) + lengthXAD(xad)) != xoff) {
1372         XT_PUTPAGE(mp);
1373         jfs_error(ip->i_sb, "extension is not contiguous\n");
1374         return -EIO;
1375     }
1376 
1377     /*
1378      * acquire a transaction lock on the leaf page;
1379      *
1380      * action: xad insertion/extension;
1381      */
1382     BT_MARK_DIRTY(mp, ip);
1383     if (!test_cflag(COMMIT_Nolink, ip)) {
1384         tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
1385         xtlck = (struct xtlock *) & tlck->lock;
1386     }
1387 
1388     /* extend will overflow extent ? */
1389     xlen = lengthXAD(xad) + xlen;
1390     if ((len = xlen - MAXXLEN) <= 0)
1391         goto extendOld;
1392 
1393     /*
1394      *  extent overflow: insert entry for new extent
1395      */
1396 //insertNew:
1397     xoff = offsetXAD(xad) + MAXXLEN;
1398     xaddr = addressXAD(xad) + MAXXLEN;
1399     nextindex = le16_to_cpu(p->header.nextindex);
1400 
1401     /*
1402      *  if the leaf page is full, insert the new entry and
1403      *  propagate up the router entry for the new page from split
1404      *
1405      * The xtSplitUp() will insert the entry and unpin the leaf page.
1406      */
1407     if (nextindex == le16_to_cpu(p->header.maxentry)) {
1408         /* xtSpliUp() unpins leaf pages */
1409         split.mp = mp;
1410         split.index = index + 1;
1411         split.flag = XAD_NEW;
1412         split.off = xoff;   /* split offset */
1413         split.len = len;
1414         split.addr = xaddr;
1415         split.pxdlist = NULL;
1416         if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
1417             return rc;
1418 
1419         /* get back old page */
1420         XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1421         if (rc)
1422             return rc;
1423         /*
1424          * if leaf root has been split, original root has been
1425          * copied to new child page, i.e., original entry now
1426          * resides on the new child page;
1427          */
1428         if (p->header.flag & BT_INTERNAL) {
1429             ASSERT(p->header.nextindex ==
1430                    cpu_to_le16(XTENTRYSTART + 1));
1431             xad = &p->xad[XTENTRYSTART];
1432             bn = addressXAD(xad);
1433             XT_PUTPAGE(mp);
1434 
1435             /* get new child page */
1436             XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1437             if (rc)
1438                 return rc;
1439 
1440             BT_MARK_DIRTY(mp, ip);
1441             if (!test_cflag(COMMIT_Nolink, ip)) {
1442                 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
1443                 xtlck = (struct xtlock *) & tlck->lock;
1444             }
1445         }
1446     }
1447     /*
1448      *  insert the new entry into the leaf page
1449      */
1450     else {
1451         /* insert the new entry: mark the entry NEW */
1452         xad = &p->xad[index + 1];
1453         XT_PUTENTRY(xad, XAD_NEW, xoff, len, xaddr);
1454 
1455         /* advance next available entry index */
1456         le16_add_cpu(&p->header.nextindex, 1);
1457     }
1458 
1459     /* get back old entry */
1460     xad = &p->xad[index];
1461     xlen = MAXXLEN;
1462 
1463     /*
1464      * extend old extent
1465      */
1466       extendOld:
1467     XADlength(xad, xlen);
1468     if (!(xad->flag & XAD_NEW))
1469         xad->flag |= XAD_EXTENDED;
1470 
1471     if (!test_cflag(COMMIT_Nolink, ip)) {
1472         xtlck->lwm.offset =
1473             (xtlck->lwm.offset) ? min(index,
1474                           (int)xtlck->lwm.offset) : index;
1475         xtlck->lwm.length =
1476             le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
1477     }
1478 
1479     /* unpin the leaf page */
1480     XT_PUTPAGE(mp);
1481 
1482     return rc;
1483 }
1484 
1485 /*
1486  *  xtUpdate()
1487  *
1488  * function: update XAD;
1489  *
1490  *  update extent for allocated_but_not_recorded or
1491  *  compressed extent;
1492  *
1493  * parameter:
1494  *  nxad    - new XAD;
1495  *      logical extent of the specified XAD must be completely
1496  *      contained by an existing XAD;
1497  */
1498 int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad)
1499 {               /* new XAD */
1500     int rc = 0;
1501     int cmp;
1502     struct metapage *mp;    /* meta-page buffer */
1503     xtpage_t *p;        /* base B+-tree index page */
1504     s64 bn;
1505     int index0, index, newindex, nextindex;
1506     struct btstack btstack; /* traverse stack */
1507     struct xtsplit split;   /* split information */
1508     xad_t *xad, *lxad, *rxad;
1509     int xflag;
1510     s64 nxoff, xoff;
1511     int nxlen, xlen, lxlen, rxlen;
1512     s64 nxaddr, xaddr;
1513     struct tlock *tlck;
1514     struct xtlock *xtlck = NULL;
1515     int newpage = 0;
1516 
1517     /* there must exist extent to be tailgated */
1518     nxoff = offsetXAD(nxad);
1519     nxlen = lengthXAD(nxad);
1520     nxaddr = addressXAD(nxad);
1521 
1522     if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT)))
1523         return rc;
1524 
1525     /* retrieve search result */
1526     XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0);
1527 
1528     if (cmp != 0) {
1529         XT_PUTPAGE(mp);
1530         jfs_error(ip->i_sb, "Could not find extent\n");
1531         return -EIO;
1532     }
1533 
1534     BT_MARK_DIRTY(mp, ip);
1535     /*
1536      * acquire tlock of the leaf page containing original entry
1537      */
1538     if (!test_cflag(COMMIT_Nolink, ip)) {
1539         tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
1540         xtlck = (struct xtlock *) & tlck->lock;
1541     }
1542 
1543     xad = &p->xad[index0];
1544     xflag = xad->flag;
1545     xoff = offsetXAD(xad);
1546     xlen = lengthXAD(xad);
1547     xaddr = addressXAD(xad);
1548 
1549     /* nXAD must be completely contained within XAD */
1550     if ((xoff > nxoff) ||
1551         (nxoff + nxlen > xoff + xlen)) {
1552         XT_PUTPAGE(mp);
1553         jfs_error(ip->i_sb,
1554               "nXAD in not completely contained within XAD\n");
1555         return -EIO;
1556     }
1557 
1558     index = index0;
1559     newindex = index + 1;
1560     nextindex = le16_to_cpu(p->header.nextindex);
1561 
1562     if (xoff < nxoff)
1563         goto coalesceRight;
1564 
1565     /*
1566      * coalesce with left XAD
1567      */
1568     /* is XAD first entry of page ? */
1569     if (index == XTENTRYSTART)
1570         goto replace;
1571 
1572     /* is nXAD logically and physically contiguous with lXAD ? */
1573     lxad = &p->xad[index - 1];
1574     lxlen = lengthXAD(lxad);
1575     if (!(lxad->flag & XAD_NOTRECORDED) &&
1576         (nxoff == offsetXAD(lxad) + lxlen) &&
1577         (nxaddr == addressXAD(lxad) + lxlen) &&
1578         (lxlen + nxlen < MAXXLEN)) {
1579         /* extend right lXAD */
1580         index0 = index - 1;
1581         XADlength(lxad, lxlen + nxlen);
1582 
1583         /* If we just merged two extents together, need to make sure the
1584          * right extent gets logged.  If the left one is marked XAD_NEW,
1585          * then we know it will be logged.  Otherwise, mark as
1586          * XAD_EXTENDED
1587          */
1588         if (!(lxad->flag & XAD_NEW))
1589             lxad->flag |= XAD_EXTENDED;
1590 
1591         if (xlen > nxlen) {
1592             /* truncate XAD */
1593             XADoffset(xad, xoff + nxlen);
1594             XADlength(xad, xlen - nxlen);
1595             XADaddress(xad, xaddr + nxlen);
1596             goto out;
1597         } else {    /* (xlen == nxlen) */
1598 
1599             /* remove XAD */
1600             if (index < nextindex - 1)
1601                 memmove(&p->xad[index], &p->xad[index + 1],
1602                     (nextindex - index -
1603                      1) << L2XTSLOTSIZE);
1604 
1605             p->header.nextindex =
1606                 cpu_to_le16(le16_to_cpu(p->header.nextindex) -
1607                     1);
1608 
1609             index = index0;
1610             newindex = index + 1;
1611             nextindex = le16_to_cpu(p->header.nextindex);
1612             xoff = nxoff = offsetXAD(lxad);
1613             xlen = nxlen = lxlen + nxlen;
1614             xaddr = nxaddr = addressXAD(lxad);
1615             goto coalesceRight;
1616         }
1617     }
1618 
1619     /*
1620      * replace XAD with nXAD
1621      */
1622       replace:          /* (nxoff == xoff) */
1623     if (nxlen == xlen) {
1624         /* replace XAD with nXAD:recorded */
1625         *xad = *nxad;
1626         xad->flag = xflag & ~XAD_NOTRECORDED;
1627 
1628         goto coalesceRight;
1629     } else          /* (nxlen < xlen) */
1630         goto updateLeft;
1631 
1632     /*
1633      * coalesce with right XAD
1634      */
1635       coalesceRight:        /* (xoff <= nxoff) */
1636     /* is XAD last entry of page ? */
1637     if (newindex == nextindex) {
1638         if (xoff == nxoff)
1639             goto out;
1640         goto updateRight;
1641     }
1642 
1643     /* is nXAD logically and physically contiguous with rXAD ? */
1644     rxad = &p->xad[index + 1];
1645     rxlen = lengthXAD(rxad);
1646     if (!(rxad->flag & XAD_NOTRECORDED) &&
1647         (nxoff + nxlen == offsetXAD(rxad)) &&
1648         (nxaddr + nxlen == addressXAD(rxad)) &&
1649         (rxlen + nxlen < MAXXLEN)) {
1650         /* extend left rXAD */
1651         XADoffset(rxad, nxoff);
1652         XADlength(rxad, rxlen + nxlen);
1653         XADaddress(rxad, nxaddr);
1654 
1655         /* If we just merged two extents together, need to make sure
1656          * the left extent gets logged.  If the right one is marked
1657          * XAD_NEW, then we know it will be logged.  Otherwise, mark as
1658          * XAD_EXTENDED
1659          */
1660         if (!(rxad->flag & XAD_NEW))
1661             rxad->flag |= XAD_EXTENDED;
1662 
1663         if (xlen > nxlen)
1664             /* truncate XAD */
1665             XADlength(xad, xlen - nxlen);
1666         else {      /* (xlen == nxlen) */
1667 
1668             /* remove XAD */
1669             memmove(&p->xad[index], &p->xad[index + 1],
1670                 (nextindex - index - 1) << L2XTSLOTSIZE);
1671 
1672             p->header.nextindex =
1673                 cpu_to_le16(le16_to_cpu(p->header.nextindex) -
1674                     1);
1675         }
1676 
1677         goto out;
1678     } else if (xoff == nxoff)
1679         goto out;
1680 
1681     if (xoff >= nxoff) {
1682         XT_PUTPAGE(mp);
1683         jfs_error(ip->i_sb, "xoff >= nxoff\n");
1684         return -EIO;
1685     }
1686 
1687     /*
1688      * split XAD into (lXAD, nXAD):
1689      *
1690      *          |---nXAD--->
1691      * --|----------XAD----------|--
1692      *   |-lXAD-|
1693      */
1694       updateRight:      /* (xoff < nxoff) */
1695     /* truncate old XAD as lXAD:not_recorded */
1696     xad = &p->xad[index];
1697     XADlength(xad, nxoff - xoff);
1698 
1699     /* insert nXAD:recorded */
1700     if (nextindex == le16_to_cpu(p->header.maxentry)) {
1701 
1702         /* xtSpliUp() unpins leaf pages */
1703         split.mp = mp;
1704         split.index = newindex;
1705         split.flag = xflag & ~XAD_NOTRECORDED;
1706         split.off = nxoff;
1707         split.len = nxlen;
1708         split.addr = nxaddr;
1709         split.pxdlist = NULL;
1710         if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
1711             return rc;
1712 
1713         /* get back old page */
1714         XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1715         if (rc)
1716             return rc;
1717         /*
1718          * if leaf root has been split, original root has been
1719          * copied to new child page, i.e., original entry now
1720          * resides on the new child page;
1721          */
1722         if (p->header.flag & BT_INTERNAL) {
1723             ASSERT(p->header.nextindex ==
1724                    cpu_to_le16(XTENTRYSTART + 1));
1725             xad = &p->xad[XTENTRYSTART];
1726             bn = addressXAD(xad);
1727             XT_PUTPAGE(mp);
1728 
1729             /* get new child page */
1730             XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1731             if (rc)
1732                 return rc;
1733 
1734             BT_MARK_DIRTY(mp, ip);
1735             if (!test_cflag(COMMIT_Nolink, ip)) {
1736                 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
1737                 xtlck = (struct xtlock *) & tlck->lock;
1738             }
1739         } else {
1740             /* is nXAD on new page ? */
1741             if (newindex >
1742                 (le16_to_cpu(p->header.maxentry) >> 1)) {
1743                 newindex =
1744                     newindex -
1745                     le16_to_cpu(p->header.nextindex) +
1746                     XTENTRYSTART;
1747                 newpage = 1;
1748             }
1749         }
1750     } else {
1751         /* if insert into middle, shift right remaining entries */
1752         if (newindex < nextindex)
1753             memmove(&p->xad[newindex + 1], &p->xad[newindex],
1754                 (nextindex - newindex) << L2XTSLOTSIZE);
1755 
1756         /* insert the entry */
1757         xad = &p->xad[newindex];
1758         *xad = *nxad;
1759         xad->flag = xflag & ~XAD_NOTRECORDED;
1760 
1761         /* advance next available entry index. */
1762         p->header.nextindex =
1763             cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
1764     }
1765 
1766     /*
1767      * does nXAD force 3-way split ?
1768      *
1769      *          |---nXAD--->|
1770      * --|----------XAD-------------|--
1771      *   |-lXAD-|           |-rXAD -|
1772      */
1773     if (nxoff + nxlen == xoff + xlen)
1774         goto out;
1775 
1776     /* reorient nXAD as XAD for further split XAD into (nXAD, rXAD) */
1777     if (newpage) {
1778         /* close out old page */
1779         if (!test_cflag(COMMIT_Nolink, ip)) {
1780             xtlck->lwm.offset = (xtlck->lwm.offset) ?
1781                 min(index0, (int)xtlck->lwm.offset) : index0;
1782             xtlck->lwm.length =
1783                 le16_to_cpu(p->header.nextindex) -
1784                 xtlck->lwm.offset;
1785         }
1786 
1787         bn = le64_to_cpu(p->header.next);
1788         XT_PUTPAGE(mp);
1789 
1790         /* get new right page */
1791         XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1792         if (rc)
1793             return rc;
1794 
1795         BT_MARK_DIRTY(mp, ip);
1796         if (!test_cflag(COMMIT_Nolink, ip)) {
1797             tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
1798             xtlck = (struct xtlock *) & tlck->lock;
1799         }
1800 
1801         index0 = index = newindex;
1802     } else
1803         index++;
1804 
1805     newindex = index + 1;
1806     nextindex = le16_to_cpu(p->header.nextindex);
1807     xlen = xlen - (nxoff - xoff);
1808     xoff = nxoff;
1809     xaddr = nxaddr;
1810 
1811     /* recompute split pages */
1812     if (nextindex == le16_to_cpu(p->header.maxentry)) {
1813         XT_PUTPAGE(mp);
1814 
1815         if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT)))
1816             return rc;
1817 
1818         /* retrieve search result */
1819         XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0);
1820 
1821         if (cmp != 0) {
1822             XT_PUTPAGE(mp);
1823             jfs_error(ip->i_sb, "xtSearch failed\n");
1824             return -EIO;
1825         }
1826 
1827         if (index0 != index) {
1828             XT_PUTPAGE(mp);
1829             jfs_error(ip->i_sb, "unexpected value of index\n");
1830             return -EIO;
1831         }
1832     }
1833 
1834     /*
1835      * split XAD into (nXAD, rXAD)
1836      *
1837      *          ---nXAD---|
1838      * --|----------XAD----------|--
1839      *                    |-rXAD-|
1840      */
1841       updateLeft:       /* (nxoff == xoff) && (nxlen < xlen) */
1842     /* update old XAD with nXAD:recorded */
1843     xad = &p->xad[index];
1844     *xad = *nxad;
1845     xad->flag = xflag & ~XAD_NOTRECORDED;
1846 
1847     /* insert rXAD:not_recorded */
1848     xoff = xoff + nxlen;
1849     xlen = xlen - nxlen;
1850     xaddr = xaddr + nxlen;
1851     if (nextindex == le16_to_cpu(p->header.maxentry)) {
1852 /*
1853 printf("xtUpdate.updateLeft.split p:0x%p\n", p);
1854 */
1855         /* xtSpliUp() unpins leaf pages */
1856         split.mp = mp;
1857         split.index = newindex;
1858         split.flag = xflag;
1859         split.off = xoff;
1860         split.len = xlen;
1861         split.addr = xaddr;
1862         split.pxdlist = NULL;
1863         if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
1864             return rc;
1865 
1866         /* get back old page */
1867         XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1868         if (rc)
1869             return rc;
1870 
1871         /*
1872          * if leaf root has been split, original root has been
1873          * copied to new child page, i.e., original entry now
1874          * resides on the new child page;
1875          */
1876         if (p->header.flag & BT_INTERNAL) {
1877             ASSERT(p->header.nextindex ==
1878                    cpu_to_le16(XTENTRYSTART + 1));
1879             xad = &p->xad[XTENTRYSTART];
1880             bn = addressXAD(xad);
1881             XT_PUTPAGE(mp);
1882 
1883             /* get new child page */
1884             XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1885             if (rc)
1886                 return rc;
1887 
1888             BT_MARK_DIRTY(mp, ip);
1889             if (!test_cflag(COMMIT_Nolink, ip)) {
1890                 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
1891                 xtlck = (struct xtlock *) & tlck->lock;
1892             }
1893         }
1894     } else {
1895         /* if insert into middle, shift right remaining entries */
1896         if (newindex < nextindex)
1897             memmove(&p->xad[newindex + 1], &p->xad[newindex],
1898                 (nextindex - newindex) << L2XTSLOTSIZE);
1899 
1900         /* insert the entry */
1901         xad = &p->xad[newindex];
1902         XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
1903 
1904         /* advance next available entry index. */
1905         p->header.nextindex =
1906             cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
1907     }
1908 
1909       out:
1910     if (!test_cflag(COMMIT_Nolink, ip)) {
1911         xtlck->lwm.offset = (xtlck->lwm.offset) ?
1912             min(index0, (int)xtlck->lwm.offset) : index0;
1913         xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
1914             xtlck->lwm.offset;
1915     }
1916 
1917     /* unpin the leaf page */
1918     XT_PUTPAGE(mp);
1919 
1920     return rc;
1921 }
1922 
1923 
1924 /*
1925  *  xtAppend()
1926  *
1927  * function: grow in append mode from contiguous region specified ;
1928  *
1929  * parameter:
1930  *  tid     - transaction id;
1931  *  ip      - file object;
1932  *  xflag       - extent flag:
1933  *  xoff        - extent offset;
1934  *  maxblocks   - max extent length;
1935  *  xlen        - extent length (in/out);
1936  *  xaddrp      - extent address pointer (in/out):
1937  *  flag        -
1938  *
1939  * return:
1940  */
1941 int xtAppend(tid_t tid,     /* transaction id */
1942          struct inode *ip, int xflag, s64 xoff, s32 maxblocks,
1943          s32 * xlenp,   /* (in/out) */
1944          s64 * xaddrp,  /* (in/out) */
1945          int flag)
1946 {
1947     int rc = 0;
1948     struct metapage *mp;    /* meta-page buffer */
1949     xtpage_t *p;        /* base B+-tree index page */
1950     s64 bn, xaddr;
1951     int index, nextindex;
1952     struct btstack btstack; /* traverse stack */
1953     struct xtsplit split;   /* split information */
1954     xad_t *xad;
1955     int cmp;
1956     struct tlock *tlck;
1957     struct xtlock *xtlck;
1958     int nsplit, nblocks, xlen;
1959     struct pxdlist pxdlist;
1960     pxd_t *pxd;
1961     s64 next;
1962 
1963     xaddr = *xaddrp;
1964     xlen = *xlenp;
1965     jfs_info("xtAppend: xoff:0x%lx maxblocks:%d xlen:%d xaddr:0x%lx",
1966          (ulong) xoff, maxblocks, xlen, (ulong) xaddr);
1967 
1968     /*
1969      *  search for the entry location at which to insert:
1970      *
1971      * xtFastSearch() and xtSearch() both returns (leaf page
1972      * pinned, index at which to insert).
1973      * n.b. xtSearch() may return index of maxentry of
1974      * the full page.
1975      */
1976     if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT)))
1977         return rc;
1978 
1979     /* retrieve search result */
1980     XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
1981 
1982     if (cmp == 0) {
1983         rc = -EEXIST;
1984         goto out;
1985     }
1986 
1987     if (next)
1988         xlen = min(xlen, (int)(next - xoff));
1989 //insert:
1990     /*
1991      *  insert entry for new extent
1992      */
1993     xflag |= XAD_NEW;
1994 
1995     /*
1996      *  if the leaf page is full, split the page and
1997      *  propagate up the router entry for the new page from split
1998      *
1999      * The xtSplitUp() will insert the entry and unpin the leaf page.
2000      */
2001     nextindex = le16_to_cpu(p->header.nextindex);
2002     if (nextindex < le16_to_cpu(p->header.maxentry))
2003         goto insertLeaf;
2004 
2005     /*
2006      * allocate new index blocks to cover index page split(s)
2007      */
2008     nsplit = btstack.nsplit;
2009     split.pxdlist = &pxdlist;
2010     pxdlist.maxnpxd = pxdlist.npxd = 0;
2011     pxd = &pxdlist.pxd[0];
2012     nblocks = JFS_SBI(ip->i_sb)->nbperpage;
2013     for (; nsplit > 0; nsplit--, pxd++, xaddr += nblocks, maxblocks -= nblocks) {
2014         if ((rc = dbAllocBottomUp(ip, xaddr, (s64) nblocks)) == 0) {
2015             PXDaddress(pxd, xaddr);
2016             PXDlength(pxd, nblocks);
2017 
2018             pxdlist.maxnpxd++;
2019 
2020             continue;
2021         }
2022 
2023         /* undo allocation */
2024 
2025         goto out;
2026     }
2027 
2028     xlen = min(xlen, maxblocks);
2029 
2030     /*
2031      * allocate data extent requested
2032      */
2033     if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen)))
2034         goto out;
2035 
2036     split.mp = mp;
2037     split.index = index;
2038     split.flag = xflag;
2039     split.off = xoff;
2040     split.len = xlen;
2041     split.addr = xaddr;
2042     if ((rc = xtSplitUp(tid, ip, &split, &btstack))) {
2043         /* undo data extent allocation */
2044         dbFree(ip, *xaddrp, (s64) * xlenp);
2045 
2046         return rc;
2047     }
2048 
2049     *xaddrp = xaddr;
2050     *xlenp = xlen;
2051     return 0;
2052 
2053     /*
2054      *  insert the new entry into the leaf page
2055      */
2056       insertLeaf:
2057     /*
2058      * allocate data extent requested
2059      */
2060     if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen)))
2061         goto out;
2062 
2063     BT_MARK_DIRTY(mp, ip);
2064     /*
2065      * acquire a transaction lock on the leaf page;
2066      *
2067      * action: xad insertion/extension;
2068      */
2069     tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
2070     xtlck = (struct xtlock *) & tlck->lock;
2071 
2072     /* insert the new entry: mark the entry NEW */
2073     xad = &p->xad[index];
2074     XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
2075 
2076     /* advance next available entry index */
2077     le16_add_cpu(&p->header.nextindex, 1);
2078 
2079     xtlck->lwm.offset =
2080         (xtlck->lwm.offset) ? min(index,(int) xtlck->lwm.offset) : index;
2081     xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
2082         xtlck->lwm.offset;
2083 
2084     *xaddrp = xaddr;
2085     *xlenp = xlen;
2086 
2087       out:
2088     /* unpin the leaf page */
2089     XT_PUTPAGE(mp);
2090 
2091     return rc;
2092 }
2093 
2094 /*
2095  *  xtInitRoot()
2096  *
2097  * initialize file root (inline in inode)
2098  */
2099 void xtInitRoot(tid_t tid, struct inode *ip)
2100 {
2101     xtpage_t *p;
2102 
2103     /*
2104      * acquire a transaction lock on the root
2105      *
2106      * action:
2107      */
2108     txLock(tid, ip, (struct metapage *) &JFS_IP(ip)->bxflag,
2109               tlckXTREE | tlckNEW);
2110     p = &JFS_IP(ip)->i_xtroot;
2111 
2112     p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF;
2113     p->header.nextindex = cpu_to_le16(XTENTRYSTART);
2114 
2115     if (S_ISDIR(ip->i_mode))
2116         p->header.maxentry = cpu_to_le16(XTROOTINITSLOT_DIR);
2117     else {
2118         p->header.maxentry = cpu_to_le16(XTROOTINITSLOT);
2119         ip->i_size = 0;
2120     }
2121 
2122 
2123     return;
2124 }
2125 
2126 
2127 /*
2128  * We can run into a deadlock truncating a file with a large number of
2129  * xtree pages (large fragmented file).  A robust fix would entail a
2130  * reservation system where we would reserve a number of metadata pages
2131  * and tlocks which we would be guaranteed without a deadlock.  Without
2132  * this, a partial fix is to limit number of metadata pages we will lock
2133  * in a single transaction.  Currently we will truncate the file so that
2134  * no more than 50 leaf pages will be locked.  The caller of xtTruncate
2135  * will be responsible for ensuring that the current transaction gets
2136  * committed, and that subsequent transactions are created to truncate
2137  * the file further if needed.
2138  */
2139 #define MAX_TRUNCATE_LEAVES 50
2140 
2141 /*
2142  *  xtTruncate()
2143  *
2144  * function:
2145  *  traverse for truncation logging backward bottom up;
2146  *  terminate at the last extent entry at the current subtree
2147  *  root page covering new down size.
2148  *  truncation may occur within the last extent entry.
2149  *
2150  * parameter:
2151  *  int     tid,
2152  *  struct inode    *ip,
2153  *  s64     newsize,
2154  *  int     type)   {PWMAP, PMAP, WMAP; DELETE, TRUNCATE}
2155  *
2156  * return:
2157  *
2158  * note:
2159  *  PWMAP:
2160  *   1. truncate (non-COMMIT_NOLINK file)
2161  *      by jfs_truncate() or jfs_open(O_TRUNC):
2162  *      xtree is updated;
2163  *   2. truncate index table of directory when last entry removed
2164  *  map update via tlock at commit time;
2165  *  PMAP:
2166  *   Call xtTruncate_pmap instead
2167  *  WMAP:
2168  *   1. remove (free zero link count) on last reference release
2169  *      (pmap has been freed at commit zero link count);
2170  *   2. truncate (COMMIT_NOLINK file, i.e., tmp file):
2171  *      xtree is updated;
2172  *   map update directly at truncation time;
2173  *
2174  *  if (DELETE)
2175  *      no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient);
2176  *  else if (TRUNCATE)
2177  *      must write LOG_NOREDOPAGE for deleted index page;
2178  *
2179  * pages may already have been tlocked by anonymous transactions
2180  * during file growth (i.e., write) before truncation;
2181  *
2182  * except last truncated entry, deleted entries remains as is
2183  * in the page (nextindex is updated) for other use
2184  * (e.g., log/update allocation map): this avoid copying the page
2185  * info but delay free of pages;
2186  *
2187  */
2188 s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
2189 {
2190     int rc = 0;
2191     s64 teof;
2192     struct metapage *mp;
2193     xtpage_t *p;
2194     s64 bn;
2195     int index, nextindex;
2196     xad_t *xad;
2197     s64 xoff, xaddr;
2198     int xlen, len, freexlen;
2199     struct btstack btstack;
2200     struct btframe *parent;
2201     struct tblock *tblk = NULL;
2202     struct tlock *tlck = NULL;
2203     struct xtlock *xtlck = NULL;
2204     struct xdlistlock xadlock;  /* maplock for COMMIT_WMAP */
2205     struct pxd_lock *pxdlock;       /* maplock for COMMIT_WMAP */
2206     s64 nfreed;
2207     int freed, log;
2208     int locked_leaves = 0;
2209 
2210     /* save object truncation type */
2211     if (tid) {
2212         tblk = tid_to_tblock(tid);
2213         tblk->xflag |= flag;
2214     }
2215 
2216     nfreed = 0;
2217 
2218     flag &= COMMIT_MAP;
2219     assert(flag != COMMIT_PMAP);
2220 
2221     if (flag == COMMIT_PWMAP)
2222         log = 1;
2223     else {
2224         log = 0;
2225         xadlock.flag = mlckFREEXADLIST;
2226         xadlock.index = 1;
2227     }
2228 
2229     /*
2230      * if the newsize is not an integral number of pages,
2231      * the file between newsize and next page boundary will
2232      * be cleared.
2233      * if truncating into a file hole, it will cause
2234      * a full block to be allocated for the logical block.
2235      */
2236 
2237     /*
2238      * release page blocks of truncated region <teof, eof>
2239      *
2240      * free the data blocks from the leaf index blocks.
2241      * delete the parent index entries corresponding to
2242      * the freed child data/index blocks.
2243      * free the index blocks themselves which aren't needed
2244      * in new sized file.
2245      *
2246      * index blocks are updated only if the blocks are to be
2247      * retained in the new sized file.
2248      * if type is PMAP, the data and index pages are NOT
2249      * freed, and the data and index blocks are NOT freed
2250      * from working map.
2251      * (this will allow continued access of data/index of
2252      * temporary file (zerolink count file truncated to zero-length)).
2253      */
2254     teof = (newsize + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
2255         JFS_SBI(ip->i_sb)->l2bsize;
2256 
2257     /* clear stack */
2258     BT_CLR(&btstack);
2259 
2260     /*
2261      * start with root
2262      *
2263      * root resides in the inode
2264      */
2265     bn = 0;
2266 
2267     /*
2268      * first access of each page:
2269      */
2270       getPage:
2271     XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2272     if (rc)
2273         return rc;
2274 
2275     /* process entries backward from last index */
2276     index = le16_to_cpu(p->header.nextindex) - 1;
2277 
2278 
2279     /* Since this is the rightmost page at this level, and we may have
2280      * already freed a page that was formerly to the right, let's make
2281      * sure that the next pointer is zero.
2282      */
2283     if (p->header.next) {
2284         if (log)
2285             /*
2286              * Make sure this change to the header is logged.
2287              * If we really truncate this leaf, the flag
2288              * will be changed to tlckTRUNCATE
2289              */
2290             tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
2291         BT_MARK_DIRTY(mp, ip);
2292         p->header.next = 0;
2293     }
2294 
2295     if (p->header.flag & BT_INTERNAL)
2296         goto getChild;
2297 
2298     /*
2299      *  leaf page
2300      */
2301     freed = 0;
2302 
2303     /* does region covered by leaf page precede Teof ? */
2304     xad = &p->xad[index];
2305     xoff = offsetXAD(xad);
2306     xlen = lengthXAD(xad);
2307     if (teof >= xoff + xlen) {
2308         XT_PUTPAGE(mp);
2309         goto getParent;
2310     }
2311 
2312     /* (re)acquire tlock of the leaf page */
2313     if (log) {
2314         if (++locked_leaves > MAX_TRUNCATE_LEAVES) {
2315             /*
2316              * We need to limit the size of the transaction
2317              * to avoid exhausting pagecache & tlocks
2318              */
2319             XT_PUTPAGE(mp);
2320             newsize = (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
2321             goto getParent;
2322         }
2323         tlck = txLock(tid, ip, mp, tlckXTREE);
2324         tlck->type = tlckXTREE | tlckTRUNCATE;
2325         xtlck = (struct xtlock *) & tlck->lock;
2326         xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1;
2327     }
2328     BT_MARK_DIRTY(mp, ip);
2329 
2330     /*
2331      * scan backward leaf page entries
2332      */
2333     for (; index >= XTENTRYSTART; index--) {
2334         xad = &p->xad[index];
2335         xoff = offsetXAD(xad);
2336         xlen = lengthXAD(xad);
2337         xaddr = addressXAD(xad);
2338 
2339         /*
2340          * The "data" for a directory is indexed by the block
2341          * device's address space.  This metadata must be invalidated
2342          * here
2343          */
2344         if (S_ISDIR(ip->i_mode) && (teof == 0))
2345             invalidate_xad_metapages(ip, *xad);
2346         /*
2347          * entry beyond eof: continue scan of current page
2348          *          xad
2349          * ---|---=======------->
2350          *   eof
2351          */
2352         if (teof < xoff) {
2353             nfreed += xlen;
2354             continue;
2355         }
2356 
2357         /*
2358          * (xoff <= teof): last entry to be deleted from page;
2359          * If other entries remain in page: keep and update the page.
2360          */
2361 
2362         /*
2363          * eof == entry_start: delete the entry
2364          *           xad
2365          * -------|=======------->
2366          *       eof
2367          *
2368          */
2369         if (teof == xoff) {
2370             nfreed += xlen;
2371 
2372             if (index == XTENTRYSTART)
2373                 break;
2374 
2375             nextindex = index;
2376         }
2377         /*
2378          * eof within the entry: truncate the entry.
2379          *          xad
2380          * -------===|===------->
2381          *          eof
2382          */
2383         else if (teof < xoff + xlen) {
2384             /* update truncated entry */
2385             len = teof - xoff;
2386             freexlen = xlen - len;
2387             XADlength(xad, len);
2388 
2389             /* save pxd of truncated extent in tlck */
2390             xaddr += len;
2391             if (log) {  /* COMMIT_PWMAP */
2392                 xtlck->lwm.offset = (xtlck->lwm.offset) ?
2393                     min(index, (int)xtlck->lwm.offset) : index;
2394                 xtlck->lwm.length = index + 1 -
2395                     xtlck->lwm.offset;
2396                 xtlck->twm.offset = index;
2397                 pxdlock = (struct pxd_lock *) & xtlck->pxdlock;
2398                 pxdlock->flag = mlckFREEPXD;
2399                 PXDaddress(&pxdlock->pxd, xaddr);
2400                 PXDlength(&pxdlock->pxd, freexlen);
2401             }
2402             /* free truncated extent */
2403             else {  /* COMMIT_WMAP */
2404 
2405                 pxdlock = (struct pxd_lock *) & xadlock;
2406                 pxdlock->flag = mlckFREEPXD;
2407                 PXDaddress(&pxdlock->pxd, xaddr);
2408                 PXDlength(&pxdlock->pxd, freexlen);
2409                 txFreeMap(ip, pxdlock, NULL, COMMIT_WMAP);
2410 
2411                 /* reset map lock */
2412                 xadlock.flag = mlckFREEXADLIST;
2413             }
2414 
2415             /* current entry is new last entry; */
2416             nextindex = index + 1;
2417 
2418             nfreed += freexlen;
2419         }
2420         /*
2421          * eof beyond the entry:
2422          *          xad
2423          * -------=======---|--->
2424          *                 eof
2425          */
2426         else {      /* (xoff + xlen < teof) */
2427 
2428             nextindex = index + 1;
2429         }
2430 
2431         if (nextindex < le16_to_cpu(p->header.nextindex)) {
2432             if (!log) { /* COMMIT_WAMP */
2433                 xadlock.xdlist = &p->xad[nextindex];
2434                 xadlock.count =
2435                     le16_to_cpu(p->header.nextindex) -
2436                     nextindex;
2437                 txFreeMap(ip, (struct maplock *) & xadlock,
2438                       NULL, COMMIT_WMAP);
2439             }
2440             p->header.nextindex = cpu_to_le16(nextindex);
2441         }
2442 
2443         XT_PUTPAGE(mp);
2444 
2445         /* assert(freed == 0); */
2446         goto getParent;
2447     }           /* end scan of leaf page entries */
2448 
2449     freed = 1;
2450 
2451     /*
2452      * leaf page become empty: free the page if type != PMAP
2453      */
2454     if (log) {      /* COMMIT_PWMAP */
2455         /* txCommit() with tlckFREE:
2456          * free data extents covered by leaf [XTENTRYSTART:hwm);
2457          * invalidate leaf if COMMIT_PWMAP;
2458          * if (TRUNCATE), will write LOG_NOREDOPAGE;
2459          */
2460         tlck->type = tlckXTREE | tlckFREE;
2461     } else {        /* COMMIT_WAMP */
2462 
2463         /* free data extents covered by leaf */
2464         xadlock.xdlist = &p->xad[XTENTRYSTART];
2465         xadlock.count =
2466             le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
2467         txFreeMap(ip, (struct maplock *) & xadlock, NULL, COMMIT_WMAP);
2468     }
2469 
2470     if (p->header.flag & BT_ROOT) {
2471         p->header.flag &= ~BT_INTERNAL;
2472         p->header.flag |= BT_LEAF;
2473         p->header.nextindex = cpu_to_le16(XTENTRYSTART);
2474 
2475         XT_PUTPAGE(mp); /* debug */
2476         goto out;
2477     } else {
2478         if (log) {  /* COMMIT_PWMAP */
2479             /* page will be invalidated at tx completion
2480              */
2481             XT_PUTPAGE(mp);
2482         } else {    /* COMMIT_WMAP */
2483 
2484             if (mp->lid)
2485                 lid_to_tlock(mp->lid)->flag |= tlckFREELOCK;
2486 
2487             /* invalidate empty leaf page */
2488             discard_metapage(mp);
2489         }
2490     }
2491 
2492     /*
2493      * the leaf page become empty: delete the parent entry
2494      * for the leaf page if the parent page is to be kept
2495      * in the new sized file.
2496      */
2497 
2498     /*
2499      * go back up to the parent page
2500      */
2501       getParent:
2502     /* pop/restore parent entry for the current child page */
2503     if ((parent = BT_POP(&btstack)) == NULL)
2504         /* current page must have been root */
2505         goto out;
2506 
2507     /* get back the parent page */
2508     bn = parent->bn;
2509     XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2510     if (rc)
2511         return rc;
2512 
2513     index = parent->index;
2514 
2515     /*
2516      * child page was not empty:
2517      */
2518     if (freed == 0) {
2519         /* has any entry deleted from parent ? */
2520         if (index < le16_to_cpu(p->header.nextindex) - 1) {
2521             /* (re)acquire tlock on the parent page */
2522             if (log) {  /* COMMIT_PWMAP */
2523                 /* txCommit() with tlckTRUNCATE:
2524                  * free child extents covered by parent [);
2525                  */
2526                 tlck = txLock(tid, ip, mp, tlckXTREE);
2527                 xtlck = (struct xtlock *) & tlck->lock;
2528                 if (!(tlck->type & tlckTRUNCATE)) {
2529                     xtlck->hwm.offset =
2530                         le16_to_cpu(p->header.
2531                             nextindex) - 1;
2532                     tlck->type =
2533                         tlckXTREE | tlckTRUNCATE;
2534                 }
2535             } else {    /* COMMIT_WMAP */
2536 
2537                 /* free child extents covered by parent */
2538                 xadlock.xdlist = &p->xad[index + 1];
2539                 xadlock.count =
2540                     le16_to_cpu(p->header.nextindex) -
2541                     index - 1;
2542                 txFreeMap(ip, (struct maplock *) & xadlock,
2543                       NULL, COMMIT_WMAP);
2544             }
2545             BT_MARK_DIRTY(mp, ip);
2546 
2547             p->header.nextindex = cpu_to_le16(index + 1);
2548         }
2549         XT_PUTPAGE(mp);
2550         goto getParent;
2551     }
2552 
2553     /*
2554      * child page was empty:
2555      */
2556     nfreed += lengthXAD(&p->xad[index]);
2557 
2558     /*
2559      * During working map update, child page's tlock must be handled
2560      * before parent's.  This is because the parent's tlock will cause
2561      * the child's disk space to be marked available in the wmap, so
2562      * it's important that the child page be released by that time.
2563      *
2564      * ToDo:  tlocks should be on doubly-linked list, so we can
2565      * quickly remove it and add it to the end.
2566      */
2567 
2568     /*
2569      * Move parent page's tlock to the end of the tid's tlock list
2570      */
2571     if (log && mp->lid && (tblk->last != mp->lid) &&
2572         lid_to_tlock(mp->lid)->tid) {
2573         lid_t lid = mp->lid;
2574         struct tlock *prev;
2575 
2576         tlck = lid_to_tlock(lid);
2577 
2578         if (tblk->next == lid)
2579             tblk->next = tlck->next;
2580         else {
2581             for (prev = lid_to_tlock(tblk->next);
2582                  prev->next != lid;
2583                  prev = lid_to_tlock(prev->next)) {
2584                 assert(prev->next);
2585             }
2586             prev->next = tlck->next;
2587         }
2588         lid_to_tlock(tblk->last)->next = lid;
2589         tlck->next = 0;
2590         tblk->last = lid;
2591     }
2592 
2593     /*
2594      * parent page become empty: free the page
2595      */
2596     if (index == XTENTRYSTART) {
2597         if (log) {  /* COMMIT_PWMAP */
2598             /* txCommit() with tlckFREE:
2599              * free child extents covered by parent;
2600              * invalidate parent if COMMIT_PWMAP;
2601              */
2602             tlck = txLock(tid, ip, mp, tlckXTREE);
2603             xtlck = (struct xtlock *) & tlck->lock;
2604             xtlck->hwm.offset =
2605                 le16_to_cpu(p->header.nextindex) - 1;
2606             tlck->type = tlckXTREE | tlckFREE;
2607         } else {    /* COMMIT_WMAP */
2608 
2609             /* free child extents covered by parent */
2610             xadlock.xdlist = &p->xad[XTENTRYSTART];
2611             xadlock.count =
2612                 le16_to_cpu(p->header.nextindex) -
2613                 XTENTRYSTART;
2614             txFreeMap(ip, (struct maplock *) & xadlock, NULL,
2615                   COMMIT_WMAP);
2616         }
2617         BT_MARK_DIRTY(mp, ip);
2618 
2619         if (p->header.flag & BT_ROOT) {
2620             p->header.flag &= ~BT_INTERNAL;
2621             p->header.flag |= BT_LEAF;
2622             p->header.nextindex = cpu_to_le16(XTENTRYSTART);
2623             if (le16_to_cpu(p->header.maxentry) == XTROOTMAXSLOT) {
2624                 /*
2625                  * Shrink root down to allow inline
2626                  * EA (otherwise fsck complains)
2627                  */
2628                 p->header.maxentry =
2629                     cpu_to_le16(XTROOTINITSLOT);
2630                 JFS_IP(ip)->mode2 |= INLINEEA;
2631             }
2632 
2633             XT_PUTPAGE(mp); /* debug */
2634             goto out;
2635         } else {
2636             if (log) {  /* COMMIT_PWMAP */
2637                 /* page will be invalidated at tx completion
2638                  */
2639                 XT_PUTPAGE(mp);
2640             } else {    /* COMMIT_WMAP */
2641 
2642                 if (mp->lid)
2643                     lid_to_tlock(mp->lid)->flag |=
2644                         tlckFREELOCK;
2645 
2646                 /* invalidate parent page */
2647                 discard_metapage(mp);
2648             }
2649 
2650             /* parent has become empty and freed:
2651              * go back up to its parent page
2652              */
2653             /* freed = 1; */
2654             goto getParent;
2655         }
2656     }
2657     /*
2658      * parent page still has entries for front region;
2659      */
2660     else {
2661         /* try truncate region covered by preceding entry
2662          * (process backward)
2663          */
2664         index--;
2665 
2666         /* go back down to the child page corresponding
2667          * to the entry
2668          */
2669         goto getChild;
2670     }
2671 
2672     /*
2673      *  internal page: go down to child page of current entry
2674      */
2675       getChild:
2676     /* save current parent entry for the child page */
2677     if (BT_STACK_FULL(&btstack)) {
2678         jfs_error(ip->i_sb, "stack overrun!\n");
2679         XT_PUTPAGE(mp);
2680         return -EIO;
2681     }
2682     BT_PUSH(&btstack, bn, index);
2683 
2684     /* get child page */
2685     xad = &p->xad[index];
2686     bn = addressXAD(xad);
2687 
2688     /*
2689      * first access of each internal entry:
2690      */
2691     /* release parent page */
2692     XT_PUTPAGE(mp);
2693 
2694     /* process the child page */
2695     goto getPage;
2696 
2697       out:
2698     /*
2699      * update file resource stat
2700      */
2701     /* set size
2702      */
2703     if (S_ISDIR(ip->i_mode) && !newsize)
2704         ip->i_size = 1; /* fsck hates zero-length directories */
2705     else
2706         ip->i_size = newsize;
2707 
2708     /* update quota allocation to reflect freed blocks */
2709     dquot_free_block(ip, nfreed);
2710 
2711     /*
2712      * free tlock of invalidated pages
2713      */
2714     if (flag == COMMIT_WMAP)
2715         txFreelock(ip);
2716 
2717     return newsize;
2718 }
2719 
2720 
2721 /*
2722  *  xtTruncate_pmap()
2723  *
2724  * function:
2725  *  Perform truncate to zero length for deleted file, leaving the
2726  *  xtree and working map untouched.  This allows the file to
2727  *  be accessed via open file handles, while the delete of the file
2728  *  is committed to disk.
2729  *
2730  * parameter:
2731  *  tid_t       tid,
2732  *  struct inode    *ip,
2733  *  s64     committed_size)
2734  *
2735  * return: new committed size
2736  *
2737  * note:
2738  *
2739  *  To avoid deadlock by holding too many transaction locks, the
2740  *  truncation may be broken up into multiple transactions.
2741  *  The committed_size keeps track of part of the file has been
2742  *  freed from the pmaps.
2743  */
2744 s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
2745 {
2746     s64 bn;
2747     struct btstack btstack;
2748     int cmp;
2749     int index;
2750     int locked_leaves = 0;
2751     struct metapage *mp;
2752     xtpage_t *p;
2753     struct btframe *parent;
2754     int rc;
2755     struct tblock *tblk;
2756     struct tlock *tlck = NULL;
2757     xad_t *xad;
2758     int xlen;
2759     s64 xoff;
2760     struct xtlock *xtlck = NULL;
2761 
2762     /* save object truncation type */
2763     tblk = tid_to_tblock(tid);
2764     tblk->xflag |= COMMIT_PMAP;
2765 
2766     /* clear stack */
2767     BT_CLR(&btstack);
2768 
2769     if (committed_size) {
2770         xoff = (committed_size >> JFS_SBI(ip->i_sb)->l2bsize) - 1;
2771         rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0);
2772         if (rc)
2773             return rc;
2774 
2775         XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
2776 
2777         if (cmp != 0) {
2778             XT_PUTPAGE(mp);
2779             jfs_error(ip->i_sb, "did not find extent\n");
2780             return -EIO;
2781         }
2782     } else {
2783         /*
2784          * start with root
2785          *
2786          * root resides in the inode
2787          */
2788         bn = 0;
2789 
2790         /*
2791          * first access of each page:
2792          */
2793       getPage:
2794         XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2795         if (rc)
2796             return rc;
2797 
2798         /* process entries backward from last index */
2799         index = le16_to_cpu(p->header.nextindex) - 1;
2800 
2801         if (p->header.flag & BT_INTERNAL)
2802             goto getChild;
2803     }
2804 
2805     /*
2806      *  leaf page
2807      */
2808 
2809     if (++locked_leaves > MAX_TRUNCATE_LEAVES) {
2810         /*
2811          * We need to limit the size of the transaction
2812          * to avoid exhausting pagecache & tlocks
2813          */
2814         xad = &p->xad[index];
2815         xoff = offsetXAD(xad);
2816         xlen = lengthXAD(xad);
2817         XT_PUTPAGE(mp);
2818         return (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
2819     }
2820     tlck = txLock(tid, ip, mp, tlckXTREE);
2821     tlck->type = tlckXTREE | tlckFREE;
2822     xtlck = (struct xtlock *) & tlck->lock;
2823     xtlck->hwm.offset = index;
2824 
2825 
2826     XT_PUTPAGE(mp);
2827 
2828     /*
2829      * go back up to the parent page
2830      */
2831       getParent:
2832     /* pop/restore parent entry for the current child page */
2833     if ((parent = BT_POP(&btstack)) == NULL)
2834         /* current page must have been root */
2835         goto out;
2836 
2837     /* get back the parent page */
2838     bn = parent->bn;
2839     XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2840     if (rc)
2841         return rc;
2842 
2843     index = parent->index;
2844 
2845     /*
2846      * parent page become empty: free the page
2847      */
2848     if (index == XTENTRYSTART) {
2849         /* txCommit() with tlckFREE:
2850          * free child extents covered by parent;
2851          * invalidate parent if COMMIT_PWMAP;
2852          */
2853         tlck = txLock(tid, ip, mp, tlckXTREE);
2854         xtlck = (struct xtlock *) & tlck->lock;
2855         xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1;
2856         tlck->type = tlckXTREE | tlckFREE;
2857 
2858         XT_PUTPAGE(mp);
2859 
2860         if (p->header.flag & BT_ROOT) {
2861 
2862             goto out;
2863         } else {
2864             goto getParent;
2865         }
2866     }
2867     /*
2868      * parent page still has entries for front region;
2869      */
2870     else
2871         index--;
2872     /*
2873      *  internal page: go down to child page of current entry
2874      */
2875       getChild:
2876     /* save current parent entry for the child page */
2877     if (BT_STACK_FULL(&btstack)) {
2878         jfs_error(ip->i_sb, "stack overrun!\n");
2879         XT_PUTPAGE(mp);
2880         return -EIO;
2881     }
2882     BT_PUSH(&btstack, bn, index);
2883 
2884     /* get child page */
2885     xad = &p->xad[index];
2886     bn = addressXAD(xad);
2887 
2888     /*
2889      * first access of each internal entry:
2890      */
2891     /* release parent page */
2892     XT_PUTPAGE(mp);
2893 
2894     /* process the child page */
2895     goto getPage;
2896 
2897       out:
2898 
2899     return 0;
2900 }
2901 
2902 #ifdef CONFIG_JFS_STATISTICS
2903 int jfs_xtstat_proc_show(struct seq_file *m, void *v)
2904 {
2905     seq_printf(m,
2906                "JFS Xtree statistics\n"
2907                "====================\n"
2908                "searches = %d\n"
2909                "fast searches = %d\n"
2910                "splits = %d\n",
2911                xtStat.search,
2912                xtStat.fastSearch,
2913                xtStat.split);
2914     return 0;
2915 }
2916 #endif