Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  *   Copyright (C) International Business Machines Corp., 2000-2004
0004  */
0005 
0006 /*
0007  *  jfs_dtree.c: directory B+-tree manager
0008  *
0009  * B+-tree with variable length key directory:
0010  *
0011  * each directory page is structured as an array of 32-byte
0012  * directory entry slots initialized as a freelist
0013  * to avoid search/compaction of free space at insertion.
0014  * when an entry is inserted, a number of slots are allocated
0015  * from the freelist as required to store variable length data
0016  * of the entry; when the entry is deleted, slots of the entry
0017  * are returned to freelist.
0018  *
0019  * leaf entry stores full name as key and file serial number
0020  * (aka inode number) as data.
0021  * internal/router entry stores sufffix compressed name
0022  * as key and simple extent descriptor as data.
0023  *
0024  * each directory page maintains a sorted entry index table
0025  * which stores the start slot index of sorted entries
0026  * to allow binary search on the table.
0027  *
0028  * directory starts as a root/leaf page in on-disk inode
0029  * inline data area.
0030  * when it becomes full, it starts a leaf of a external extent
0031  * of length of 1 block. each time the first leaf becomes full,
0032  * it is extended rather than split (its size is doubled),
0033  * until its length becoms 4 KBytes, from then the extent is split
0034  * with new 4 Kbyte extent when it becomes full
0035  * to reduce external fragmentation of small directories.
0036  *
0037  * blah, blah, blah, for linear scan of directory in pieces by
0038  * readdir().
0039  *
0040  *
0041  *  case-insensitive directory file system
0042  *
0043  * names are stored in case-sensitive way in leaf entry.
0044  * but stored, searched and compared in case-insensitive (uppercase) order
0045  * (i.e., both search key and entry key are folded for search/compare):
0046  * (note that case-sensitive order is BROKEN in storage, e.g.,
0047  *  sensitive: Ad, aB, aC, aD -> insensitive: aB, aC, aD, Ad
0048  *
0049  *  entries which folds to the same key makes up a equivalent class
0050  *  whose members are stored as contiguous cluster (may cross page boundary)
0051  *  but whose order is arbitrary and acts as duplicate, e.g.,
0052  *  abc, Abc, aBc, abC)
0053  *
0054  * once match is found at leaf, requires scan forward/backward
0055  * either for, in case-insensitive search, duplicate
0056  * or for, in case-sensitive search, for exact match
0057  *
0058  * router entry must be created/stored in case-insensitive way
0059  * in internal entry:
0060  * (right most key of left page and left most key of right page
0061  * are folded, and its suffix compression is propagated as router
0062  * key in parent)
0063  * (e.g., if split occurs <abc> and <aBd>, <ABD> trather than <aB>
0064  * should be made the router key for the split)
0065  *
0066  * case-insensitive search:
0067  *
0068  *  fold search key;
0069  *
0070  *  case-insensitive search of B-tree:
0071  *  for internal entry, router key is already folded;
0072  *  for leaf entry, fold the entry key before comparison.
0073  *
0074  *  if (leaf entry case-insensitive match found)
0075  *      if (next entry satisfies case-insensitive match)
0076  *          return EDUPLICATE;
0077  *      if (prev entry satisfies case-insensitive match)
0078  *          return EDUPLICATE;
0079  *      return match;
0080  *  else
0081  *      return no match;
0082  *
0083  *  serialization:
0084  * target directory inode lock is being held on entry/exit
0085  * of all main directory service routines.
0086  *
0087  *  log based recovery:
0088  */
0089 
0090 #include <linux/fs.h>
0091 #include <linux/quotaops.h>
0092 #include <linux/slab.h>
0093 #include "jfs_incore.h"
0094 #include "jfs_superblock.h"
0095 #include "jfs_filsys.h"
0096 #include "jfs_metapage.h"
0097 #include "jfs_dmap.h"
0098 #include "jfs_unicode.h"
0099 #include "jfs_debug.h"
0100 
0101 /* dtree split parameter */
0102 struct dtsplit {
0103     struct metapage *mp;
0104     s16 index;
0105     s16 nslot;
0106     struct component_name *key;
0107     ddata_t *data;
0108     struct pxdlist *pxdlist;
0109 };
0110 
0111 #define DT_PAGE(IP, MP) BT_PAGE(IP, MP, dtpage_t, i_dtroot)
0112 
0113 /* get page buffer for specified block address */
0114 #define DT_GETPAGE(IP, BN, MP, SIZE, P, RC)             \
0115 do {                                    \
0116     BT_GETPAGE(IP, BN, MP, dtpage_t, SIZE, P, RC, i_dtroot);    \
0117     if (!(RC)) {                            \
0118         if (((P)->header.nextindex >                \
0119              (((BN) == 0) ? DTROOTMAXSLOT : (P)->header.maxslot)) || \
0120             ((BN) && ((P)->header.maxslot > DTPAGEMAXSLOT))) {  \
0121             BT_PUTPAGE(MP);                 \
0122             jfs_error((IP)->i_sb,               \
0123                   "DT_GETPAGE: dtree page corrupt\n");  \
0124             MP = NULL;                  \
0125             RC = -EIO;                  \
0126         }                           \
0127     }                               \
0128 } while (0)
0129 
0130 /* for consistency */
0131 #define DT_PUTPAGE(MP) BT_PUTPAGE(MP)
0132 
0133 #define DT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \
0134     BT_GETSEARCH(IP, LEAF, BN, MP, dtpage_t, P, INDEX, i_dtroot)
0135 
0136 /*
0137  * forward references
0138  */
0139 static int dtSplitUp(tid_t tid, struct inode *ip,
0140              struct dtsplit * split, struct btstack * btstack);
0141 
0142 static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
0143                struct metapage ** rmpp, dtpage_t ** rpp, pxd_t * rxdp);
0144 
0145 static int dtExtendPage(tid_t tid, struct inode *ip,
0146             struct dtsplit * split, struct btstack * btstack);
0147 
0148 static int dtSplitRoot(tid_t tid, struct inode *ip,
0149                struct dtsplit * split, struct metapage ** rmpp);
0150 
0151 static int dtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp,
0152               dtpage_t * fp, struct btstack * btstack);
0153 
0154 static int dtRelink(tid_t tid, struct inode *ip, dtpage_t * p);
0155 
0156 static int dtReadFirst(struct inode *ip, struct btstack * btstack);
0157 
0158 static int dtReadNext(struct inode *ip,
0159               loff_t * offset, struct btstack * btstack);
0160 
0161 static int dtCompare(struct component_name * key, dtpage_t * p, int si);
0162 
0163 static int ciCompare(struct component_name * key, dtpage_t * p, int si,
0164              int flag);
0165 
0166 static void dtGetKey(dtpage_t * p, int i, struct component_name * key,
0167              int flag);
0168 
0169 static int ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp,
0170                   int ri, struct component_name * key, int flag);
0171 
0172 static void dtInsertEntry(dtpage_t * p, int index, struct component_name * key,
0173               ddata_t * data, struct dt_lock **);
0174 
0175 static void dtMoveEntry(dtpage_t * sp, int si, dtpage_t * dp,
0176             struct dt_lock ** sdtlock, struct dt_lock ** ddtlock,
0177             int do_index);
0178 
0179 static void dtDeleteEntry(dtpage_t * p, int fi, struct dt_lock ** dtlock);
0180 
0181 static void dtTruncateEntry(dtpage_t * p, int ti, struct dt_lock ** dtlock);
0182 
0183 static void dtLinelockFreelist(dtpage_t * p, int m, struct dt_lock ** dtlock);
0184 
0185 #define ciToUpper(c)    UniStrupr((c)->name)
0186 
0187 /*
0188  *  read_index_page()
0189  *
0190  *  Reads a page of a directory's index table.
0191  *  Having metadata mapped into the directory inode's address space
0192  *  presents a multitude of problems.  We avoid this by mapping to
0193  *  the absolute address space outside of the *_metapage routines
0194  */
0195 static struct metapage *read_index_page(struct inode *inode, s64 blkno)
0196 {
0197     int rc;
0198     s64 xaddr;
0199     int xflag;
0200     s32 xlen;
0201 
0202     rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1);
0203     if (rc || (xaddr == 0))
0204         return NULL;
0205 
0206     return read_metapage(inode, xaddr, PSIZE, 1);
0207 }
0208 
0209 /*
0210  *  get_index_page()
0211  *
0212  *  Same as get_index_page(), but get's a new page without reading
0213  */
0214 static struct metapage *get_index_page(struct inode *inode, s64 blkno)
0215 {
0216     int rc;
0217     s64 xaddr;
0218     int xflag;
0219     s32 xlen;
0220 
0221     rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1);
0222     if (rc || (xaddr == 0))
0223         return NULL;
0224 
0225     return get_metapage(inode, xaddr, PSIZE, 1);
0226 }
0227 
0228 /*
0229  *  find_index()
0230  *
0231  *  Returns dtree page containing directory table entry for specified
0232  *  index and pointer to its entry.
0233  *
0234  *  mp must be released by caller.
0235  */
0236 static struct dir_table_slot *find_index(struct inode *ip, u32 index,
0237                      struct metapage ** mp, s64 *lblock)
0238 {
0239     struct jfs_inode_info *jfs_ip = JFS_IP(ip);
0240     s64 blkno;
0241     s64 offset;
0242     int page_offset;
0243     struct dir_table_slot *slot;
0244     static int maxWarnings = 10;
0245 
0246     if (index < 2) {
0247         if (maxWarnings) {
0248             jfs_warn("find_entry called with index = %d", index);
0249             maxWarnings--;
0250         }
0251         return NULL;
0252     }
0253 
0254     if (index >= jfs_ip->next_index) {
0255         jfs_warn("find_entry called with index >= next_index");
0256         return NULL;
0257     }
0258 
0259     if (jfs_dirtable_inline(ip)) {
0260         /*
0261          * Inline directory table
0262          */
0263         *mp = NULL;
0264         slot = &jfs_ip->i_dirtable[index - 2];
0265     } else {
0266         offset = (index - 2) * sizeof(struct dir_table_slot);
0267         page_offset = offset & (PSIZE - 1);
0268         blkno = ((offset + 1) >> L2PSIZE) <<
0269             JFS_SBI(ip->i_sb)->l2nbperpage;
0270 
0271         if (*mp && (*lblock != blkno)) {
0272             release_metapage(*mp);
0273             *mp = NULL;
0274         }
0275         if (!(*mp)) {
0276             *lblock = blkno;
0277             *mp = read_index_page(ip, blkno);
0278         }
0279         if (!(*mp)) {
0280             jfs_err("free_index: error reading directory table");
0281             return NULL;
0282         }
0283 
0284         slot =
0285             (struct dir_table_slot *) ((char *) (*mp)->data +
0286                            page_offset);
0287     }
0288     return slot;
0289 }
0290 
0291 static inline void lock_index(tid_t tid, struct inode *ip, struct metapage * mp,
0292                   u32 index)
0293 {
0294     struct tlock *tlck;
0295     struct linelock *llck;
0296     struct lv *lv;
0297 
0298     tlck = txLock(tid, ip, mp, tlckDATA);
0299     llck = (struct linelock *) tlck->lock;
0300 
0301     if (llck->index >= llck->maxcnt)
0302         llck = txLinelock(llck);
0303     lv = &llck->lv[llck->index];
0304 
0305     /*
0306      *  Linelock slot size is twice the size of directory table
0307      *  slot size.  512 entries per page.
0308      */
0309     lv->offset = ((index - 2) & 511) >> 1;
0310     lv->length = 1;
0311     llck->index++;
0312 }
0313 
0314 /*
0315  *  add_index()
0316  *
0317  *  Adds an entry to the directory index table.  This is used to provide
0318  *  each directory entry with a persistent index in which to resume
0319  *  directory traversals
0320  */
0321 static u32 add_index(tid_t tid, struct inode *ip, s64 bn, int slot)
0322 {
0323     struct super_block *sb = ip->i_sb;
0324     struct jfs_sb_info *sbi = JFS_SBI(sb);
0325     struct jfs_inode_info *jfs_ip = JFS_IP(ip);
0326     u64 blkno;
0327     struct dir_table_slot *dirtab_slot;
0328     u32 index;
0329     struct linelock *llck;
0330     struct lv *lv;
0331     struct metapage *mp;
0332     s64 offset;
0333     uint page_offset;
0334     struct tlock *tlck;
0335     s64 xaddr;
0336 
0337     ASSERT(DO_INDEX(ip));
0338 
0339     if (jfs_ip->next_index < 2) {
0340         jfs_warn("add_index: next_index = %d.  Resetting!",
0341                jfs_ip->next_index);
0342         jfs_ip->next_index = 2;
0343     }
0344 
0345     index = jfs_ip->next_index++;
0346 
0347     if (index <= MAX_INLINE_DIRTABLE_ENTRY) {
0348         /*
0349          * i_size reflects size of index table, or 8 bytes per entry.
0350          */
0351         ip->i_size = (loff_t) (index - 1) << 3;
0352 
0353         /*
0354          * dir table fits inline within inode
0355          */
0356         dirtab_slot = &jfs_ip->i_dirtable[index-2];
0357         dirtab_slot->flag = DIR_INDEX_VALID;
0358         dirtab_slot->slot = slot;
0359         DTSaddress(dirtab_slot, bn);
0360 
0361         set_cflag(COMMIT_Dirtable, ip);
0362 
0363         return index;
0364     }
0365     if (index == (MAX_INLINE_DIRTABLE_ENTRY + 1)) {
0366         struct dir_table_slot temp_table[12];
0367 
0368         /*
0369          * It's time to move the inline table to an external
0370          * page and begin to build the xtree
0371          */
0372         if (dquot_alloc_block(ip, sbi->nbperpage))
0373             goto clean_up;
0374         if (dbAlloc(ip, 0, sbi->nbperpage, &xaddr)) {
0375             dquot_free_block(ip, sbi->nbperpage);
0376             goto clean_up;
0377         }
0378 
0379         /*
0380          * Save the table, we're going to overwrite it with the
0381          * xtree root
0382          */
0383         memcpy(temp_table, &jfs_ip->i_dirtable, sizeof(temp_table));
0384 
0385         /*
0386          * Initialize empty x-tree
0387          */
0388         xtInitRoot(tid, ip);
0389 
0390         /*
0391          * Add the first block to the xtree
0392          */
0393         if (xtInsert(tid, ip, 0, 0, sbi->nbperpage, &xaddr, 0)) {
0394             /* This really shouldn't fail */
0395             jfs_warn("add_index: xtInsert failed!");
0396             memcpy(&jfs_ip->i_dirtable, temp_table,
0397                    sizeof (temp_table));
0398             dbFree(ip, xaddr, sbi->nbperpage);
0399             dquot_free_block(ip, sbi->nbperpage);
0400             goto clean_up;
0401         }
0402         ip->i_size = PSIZE;
0403 
0404         mp = get_index_page(ip, 0);
0405         if (!mp) {
0406             jfs_err("add_index: get_metapage failed!");
0407             xtTruncate(tid, ip, 0, COMMIT_PWMAP);
0408             memcpy(&jfs_ip->i_dirtable, temp_table,
0409                    sizeof (temp_table));
0410             goto clean_up;
0411         }
0412         tlck = txLock(tid, ip, mp, tlckDATA);
0413         llck = (struct linelock *) & tlck->lock;
0414         ASSERT(llck->index == 0);
0415         lv = &llck->lv[0];
0416 
0417         lv->offset = 0;
0418         lv->length = 6; /* tlckDATA slot size is 16 bytes */
0419         llck->index++;
0420 
0421         memcpy(mp->data, temp_table, sizeof(temp_table));
0422 
0423         mark_metapage_dirty(mp);
0424         release_metapage(mp);
0425 
0426         /*
0427          * Logging is now directed by xtree tlocks
0428          */
0429         clear_cflag(COMMIT_Dirtable, ip);
0430     }
0431 
0432     offset = (index - 2) * sizeof(struct dir_table_slot);
0433     page_offset = offset & (PSIZE - 1);
0434     blkno = ((offset + 1) >> L2PSIZE) << sbi->l2nbperpage;
0435     if (page_offset == 0) {
0436         /*
0437          * This will be the beginning of a new page
0438          */
0439         xaddr = 0;
0440         if (xtInsert(tid, ip, 0, blkno, sbi->nbperpage, &xaddr, 0)) {
0441             jfs_warn("add_index: xtInsert failed!");
0442             goto clean_up;
0443         }
0444         ip->i_size += PSIZE;
0445 
0446         if ((mp = get_index_page(ip, blkno)))
0447             memset(mp->data, 0, PSIZE); /* Just looks better */
0448         else
0449             xtTruncate(tid, ip, offset, COMMIT_PWMAP);
0450     } else
0451         mp = read_index_page(ip, blkno);
0452 
0453     if (!mp) {
0454         jfs_err("add_index: get/read_metapage failed!");
0455         goto clean_up;
0456     }
0457 
0458     lock_index(tid, ip, mp, index);
0459 
0460     dirtab_slot =
0461         (struct dir_table_slot *) ((char *) mp->data + page_offset);
0462     dirtab_slot->flag = DIR_INDEX_VALID;
0463     dirtab_slot->slot = slot;
0464     DTSaddress(dirtab_slot, bn);
0465 
0466     mark_metapage_dirty(mp);
0467     release_metapage(mp);
0468 
0469     return index;
0470 
0471       clean_up:
0472 
0473     jfs_ip->next_index--;
0474 
0475     return 0;
0476 }
0477 
0478 /*
0479  *  free_index()
0480  *
0481  *  Marks an entry to the directory index table as free.
0482  */
0483 static void free_index(tid_t tid, struct inode *ip, u32 index, u32 next)
0484 {
0485     struct dir_table_slot *dirtab_slot;
0486     s64 lblock;
0487     struct metapage *mp = NULL;
0488 
0489     dirtab_slot = find_index(ip, index, &mp, &lblock);
0490 
0491     if (!dirtab_slot)
0492         return;
0493 
0494     dirtab_slot->flag = DIR_INDEX_FREE;
0495     dirtab_slot->slot = dirtab_slot->addr1 = 0;
0496     dirtab_slot->addr2 = cpu_to_le32(next);
0497 
0498     if (mp) {
0499         lock_index(tid, ip, mp, index);
0500         mark_metapage_dirty(mp);
0501         release_metapage(mp);
0502     } else
0503         set_cflag(COMMIT_Dirtable, ip);
0504 }
0505 
0506 /*
0507  *  modify_index()
0508  *
0509  *  Changes an entry in the directory index table
0510  */
0511 static void modify_index(tid_t tid, struct inode *ip, u32 index, s64 bn,
0512              int slot, struct metapage ** mp, s64 *lblock)
0513 {
0514     struct dir_table_slot *dirtab_slot;
0515 
0516     dirtab_slot = find_index(ip, index, mp, lblock);
0517 
0518     if (!dirtab_slot)
0519         return;
0520 
0521     DTSaddress(dirtab_slot, bn);
0522     dirtab_slot->slot = slot;
0523 
0524     if (*mp) {
0525         lock_index(tid, ip, *mp, index);
0526         mark_metapage_dirty(*mp);
0527     } else
0528         set_cflag(COMMIT_Dirtable, ip);
0529 }
0530 
0531 /*
0532  *  read_index()
0533  *
0534  *  reads a directory table slot
0535  */
0536 static int read_index(struct inode *ip, u32 index,
0537              struct dir_table_slot * dirtab_slot)
0538 {
0539     s64 lblock;
0540     struct metapage *mp = NULL;
0541     struct dir_table_slot *slot;
0542 
0543     slot = find_index(ip, index, &mp, &lblock);
0544     if (!slot) {
0545         return -EIO;
0546     }
0547 
0548     memcpy(dirtab_slot, slot, sizeof(struct dir_table_slot));
0549 
0550     if (mp)
0551         release_metapage(mp);
0552 
0553     return 0;
0554 }
0555 
0556 /*
0557  *  dtSearch()
0558  *
0559  * function:
0560  *  Search for the entry with specified key
0561  *
0562  * parameter:
0563  *
0564  * return: 0 - search result on stack, leaf page pinned;
0565  *     errno - I/O error
0566  */
0567 int dtSearch(struct inode *ip, struct component_name * key, ino_t * data,
0568          struct btstack * btstack, int flag)
0569 {
0570     int rc = 0;
0571     int cmp = 1;        /* init for empty page */
0572     s64 bn;
0573     struct metapage *mp;
0574     dtpage_t *p;
0575     s8 *stbl;
0576     int base, index, lim;
0577     struct btframe *btsp;
0578     pxd_t *pxd;
0579     int psize = 288;    /* initial in-line directory */
0580     ino_t inumber;
0581     struct component_name ciKey;
0582     struct super_block *sb = ip->i_sb;
0583 
0584     ciKey.name = kmalloc_array(JFS_NAME_MAX + 1, sizeof(wchar_t),
0585                    GFP_NOFS);
0586     if (!ciKey.name) {
0587         rc = -ENOMEM;
0588         goto dtSearch_Exit2;
0589     }
0590 
0591 
0592     /* uppercase search key for c-i directory */
0593     UniStrcpy(ciKey.name, key->name);
0594     ciKey.namlen = key->namlen;
0595 
0596     /* only uppercase if case-insensitive support is on */
0597     if ((JFS_SBI(sb)->mntflag & JFS_OS2) == JFS_OS2) {
0598         ciToUpper(&ciKey);
0599     }
0600     BT_CLR(btstack);    /* reset stack */
0601 
0602     /* init level count for max pages to split */
0603     btstack->nsplit = 1;
0604 
0605     /*
0606      *  search down tree from root:
0607      *
0608      * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
0609      * internal page, child page Pi contains entry with k, Ki <= K < Kj.
0610      *
0611      * if entry with search key K is not found
0612      * internal page search find the entry with largest key Ki
0613      * less than K which point to the child page to search;
0614      * leaf page search find the entry with smallest key Kj
0615      * greater than K so that the returned index is the position of
0616      * the entry to be shifted right for insertion of new entry.
0617      * for empty tree, search key is greater than any key of the tree.
0618      *
0619      * by convention, root bn = 0.
0620      */
0621     for (bn = 0;;) {
0622         /* get/pin the page to search */
0623         DT_GETPAGE(ip, bn, mp, psize, p, rc);
0624         if (rc)
0625             goto dtSearch_Exit1;
0626 
0627         /* get sorted entry table of the page */
0628         stbl = DT_GETSTBL(p);
0629 
0630         /*
0631          * binary search with search key K on the current page.
0632          */
0633         for (base = 0, lim = p->header.nextindex; lim; lim >>= 1) {
0634             index = base + (lim >> 1);
0635 
0636             if (p->header.flag & BT_LEAF) {
0637                 /* uppercase leaf name to compare */
0638                 cmp =
0639                     ciCompare(&ciKey, p, stbl[index],
0640                           JFS_SBI(sb)->mntflag);
0641             } else {
0642                 /* router key is in uppercase */
0643 
0644                 cmp = dtCompare(&ciKey, p, stbl[index]);
0645 
0646 
0647             }
0648             if (cmp == 0) {
0649                 /*
0650                  *  search hit
0651                  */
0652                 /* search hit - leaf page:
0653                  * return the entry found
0654                  */
0655                 if (p->header.flag & BT_LEAF) {
0656                     inumber = le32_to_cpu(
0657             ((struct ldtentry *) & p->slot[stbl[index]])->inumber);
0658 
0659                     /*
0660                      * search for JFS_LOOKUP
0661                      */
0662                     if (flag == JFS_LOOKUP) {
0663                         *data = inumber;
0664                         rc = 0;
0665                         goto out;
0666                     }
0667 
0668                     /*
0669                      * search for JFS_CREATE
0670                      */
0671                     if (flag == JFS_CREATE) {
0672                         *data = inumber;
0673                         rc = -EEXIST;
0674                         goto out;
0675                     }
0676 
0677                     /*
0678                      * search for JFS_REMOVE or JFS_RENAME
0679                      */
0680                     if ((flag == JFS_REMOVE ||
0681                          flag == JFS_RENAME) &&
0682                         *data != inumber) {
0683                         rc = -ESTALE;
0684                         goto out;
0685                     }
0686 
0687                     /*
0688                      * JFS_REMOVE|JFS_FINDDIR|JFS_RENAME
0689                      */
0690                     /* save search result */
0691                     *data = inumber;
0692                     btsp = btstack->top;
0693                     btsp->bn = bn;
0694                     btsp->index = index;
0695                     btsp->mp = mp;
0696 
0697                     rc = 0;
0698                     goto dtSearch_Exit1;
0699                 }
0700 
0701                 /* search hit - internal page:
0702                  * descend/search its child page
0703                  */
0704                 goto getChild;
0705             }
0706 
0707             if (cmp > 0) {
0708                 base = index + 1;
0709                 --lim;
0710             }
0711         }
0712 
0713         /*
0714          *  search miss
0715          *
0716          * base is the smallest index with key (Kj) greater than
0717          * search key (K) and may be zero or (maxindex + 1) index.
0718          */
0719         /*
0720          * search miss - leaf page
0721          *
0722          * return location of entry (base) where new entry with
0723          * search key K is to be inserted.
0724          */
0725         if (p->header.flag & BT_LEAF) {
0726             /*
0727              * search for JFS_LOOKUP, JFS_REMOVE, or JFS_RENAME
0728              */
0729             if (flag == JFS_LOOKUP || flag == JFS_REMOVE ||
0730                 flag == JFS_RENAME) {
0731                 rc = -ENOENT;
0732                 goto out;
0733             }
0734 
0735             /*
0736              * search for JFS_CREATE|JFS_FINDDIR:
0737              *
0738              * save search result
0739              */
0740             *data = 0;
0741             btsp = btstack->top;
0742             btsp->bn = bn;
0743             btsp->index = base;
0744             btsp->mp = mp;
0745 
0746             rc = 0;
0747             goto dtSearch_Exit1;
0748         }
0749 
0750         /*
0751          * search miss - internal page
0752          *
0753          * if base is non-zero, decrement base by one to get the parent
0754          * entry of the child page to search.
0755          */
0756         index = base ? base - 1 : base;
0757 
0758         /*
0759          * go down to child page
0760          */
0761           getChild:
0762         /* update max. number of pages to split */
0763         if (BT_STACK_FULL(btstack)) {
0764             /* Something's corrupted, mark filesystem dirty so
0765              * chkdsk will fix it.
0766              */
0767             jfs_error(sb, "stack overrun!\n");
0768             BT_STACK_DUMP(btstack);
0769             rc = -EIO;
0770             goto out;
0771         }
0772         btstack->nsplit++;
0773 
0774         /* push (bn, index) of the parent page/entry */
0775         BT_PUSH(btstack, bn, index);
0776 
0777         /* get the child page block number */
0778         pxd = (pxd_t *) & p->slot[stbl[index]];
0779         bn = addressPXD(pxd);
0780         psize = lengthPXD(pxd) << JFS_SBI(ip->i_sb)->l2bsize;
0781 
0782         /* unpin the parent page */
0783         DT_PUTPAGE(mp);
0784     }
0785 
0786       out:
0787     DT_PUTPAGE(mp);
0788 
0789       dtSearch_Exit1:
0790 
0791     kfree(ciKey.name);
0792 
0793       dtSearch_Exit2:
0794 
0795     return rc;
0796 }
0797 
0798 
0799 /*
0800  *  dtInsert()
0801  *
0802  * function: insert an entry to directory tree
0803  *
0804  * parameter:
0805  *
0806  * return: 0 - success;
0807  *     errno - failure;
0808  */
0809 int dtInsert(tid_t tid, struct inode *ip,
0810      struct component_name * name, ino_t * fsn, struct btstack * btstack)
0811 {
0812     int rc = 0;
0813     struct metapage *mp;    /* meta-page buffer */
0814     dtpage_t *p;        /* base B+-tree index page */
0815     s64 bn;
0816     int index;
0817     struct dtsplit split;   /* split information */
0818     ddata_t data;
0819     struct dt_lock *dtlck;
0820     int n;
0821     struct tlock *tlck;
0822     struct lv *lv;
0823 
0824     /*
0825      *  retrieve search result
0826      *
0827      * dtSearch() returns (leaf page pinned, index at which to insert).
0828      * n.b. dtSearch() may return index of (maxindex + 1) of
0829      * the full page.
0830      */
0831     DT_GETSEARCH(ip, btstack->top, bn, mp, p, index);
0832 
0833     /*
0834      *  insert entry for new key
0835      */
0836     if (DO_INDEX(ip)) {
0837         if (JFS_IP(ip)->next_index == DIREND) {
0838             DT_PUTPAGE(mp);
0839             return -EMLINK;
0840         }
0841         n = NDTLEAF(name->namlen);
0842         data.leaf.tid = tid;
0843         data.leaf.ip = ip;
0844     } else {
0845         n = NDTLEAF_LEGACY(name->namlen);
0846         data.leaf.ip = NULL;    /* signifies legacy directory format */
0847     }
0848     data.leaf.ino = *fsn;
0849 
0850     /*
0851      *  leaf page does not have enough room for new entry:
0852      *
0853      *  extend/split the leaf page;
0854      *
0855      * dtSplitUp() will insert the entry and unpin the leaf page.
0856      */
0857     if (n > p->header.freecnt) {
0858         split.mp = mp;
0859         split.index = index;
0860         split.nslot = n;
0861         split.key = name;
0862         split.data = &data;
0863         rc = dtSplitUp(tid, ip, &split, btstack);
0864         return rc;
0865     }
0866 
0867     /*
0868      *  leaf page does have enough room for new entry:
0869      *
0870      *  insert the new data entry into the leaf page;
0871      */
0872     BT_MARK_DIRTY(mp, ip);
0873     /*
0874      * acquire a transaction lock on the leaf page
0875      */
0876     tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
0877     dtlck = (struct dt_lock *) & tlck->lock;
0878     ASSERT(dtlck->index == 0);
0879     lv = & dtlck->lv[0];
0880 
0881     /* linelock header */
0882     lv->offset = 0;
0883     lv->length = 1;
0884     dtlck->index++;
0885 
0886     dtInsertEntry(p, index, name, &data, &dtlck);
0887 
0888     /* linelock stbl of non-root leaf page */
0889     if (!(p->header.flag & BT_ROOT)) {
0890         if (dtlck->index >= dtlck->maxcnt)
0891             dtlck = (struct dt_lock *) txLinelock(dtlck);
0892         lv = & dtlck->lv[dtlck->index];
0893         n = index >> L2DTSLOTSIZE;
0894         lv->offset = p->header.stblindex + n;
0895         lv->length =
0896             ((p->header.nextindex - 1) >> L2DTSLOTSIZE) - n + 1;
0897         dtlck->index++;
0898     }
0899 
0900     /* unpin the leaf page */
0901     DT_PUTPAGE(mp);
0902 
0903     return 0;
0904 }
0905 
0906 
0907 /*
0908  *  dtSplitUp()
0909  *
0910  * function: propagate insertion bottom up;
0911  *
0912  * parameter:
0913  *
0914  * return: 0 - success;
0915  *     errno - failure;
0916  *  leaf page unpinned;
0917  */
0918 static int dtSplitUp(tid_t tid,
0919       struct inode *ip, struct dtsplit * split, struct btstack * btstack)
0920 {
0921     struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
0922     int rc = 0;
0923     struct metapage *smp;
0924     dtpage_t *sp;       /* split page */
0925     struct metapage *rmp;
0926     dtpage_t *rp;       /* new right page split from sp */
0927     pxd_t rpxd;     /* new right page extent descriptor */
0928     struct metapage *lmp;
0929     dtpage_t *lp;       /* left child page */
0930     int skip;       /* index of entry of insertion */
0931     struct btframe *parent; /* parent page entry on traverse stack */
0932     s64 xaddr, nxaddr;
0933     int xlen, xsize;
0934     struct pxdlist pxdlist;
0935     pxd_t *pxd;
0936     struct component_name key = { 0, NULL };
0937     ddata_t *data = split->data;
0938     int n;
0939     struct dt_lock *dtlck;
0940     struct tlock *tlck;
0941     struct lv *lv;
0942     int quota_allocation = 0;
0943 
0944     /* get split page */
0945     smp = split->mp;
0946     sp = DT_PAGE(ip, smp);
0947 
0948     key.name = kmalloc_array(JFS_NAME_MAX + 2, sizeof(wchar_t), GFP_NOFS);
0949     if (!key.name) {
0950         DT_PUTPAGE(smp);
0951         rc = -ENOMEM;
0952         goto dtSplitUp_Exit;
0953     }
0954 
0955     /*
0956      *  split leaf page
0957      *
0958      * The split routines insert the new entry, and
0959      * acquire txLock as appropriate.
0960      */
0961     /*
0962      *  split root leaf page:
0963      */
0964     if (sp->header.flag & BT_ROOT) {
0965         /*
0966          * allocate a single extent child page
0967          */
0968         xlen = 1;
0969         n = sbi->bsize >> L2DTSLOTSIZE;
0970         n -= (n + 31) >> L2DTSLOTSIZE;  /* stbl size */
0971         n -= DTROOTMAXSLOT - sp->header.freecnt; /* header + entries */
0972         if (n <= split->nslot)
0973             xlen++;
0974         if ((rc = dbAlloc(ip, 0, (s64) xlen, &xaddr))) {
0975             DT_PUTPAGE(smp);
0976             goto freeKeyName;
0977         }
0978 
0979         pxdlist.maxnpxd = 1;
0980         pxdlist.npxd = 0;
0981         pxd = &pxdlist.pxd[0];
0982         PXDaddress(pxd, xaddr);
0983         PXDlength(pxd, xlen);
0984         split->pxdlist = &pxdlist;
0985         rc = dtSplitRoot(tid, ip, split, &rmp);
0986 
0987         if (rc)
0988             dbFree(ip, xaddr, xlen);
0989         else
0990             DT_PUTPAGE(rmp);
0991 
0992         DT_PUTPAGE(smp);
0993 
0994         if (!DO_INDEX(ip))
0995             ip->i_size = xlen << sbi->l2bsize;
0996 
0997         goto freeKeyName;
0998     }
0999 
1000     /*
1001      *  extend first leaf page
1002      *
1003      * extend the 1st extent if less than buffer page size
1004      * (dtExtendPage() reurns leaf page unpinned)
1005      */
1006     pxd = &sp->header.self;
1007     xlen = lengthPXD(pxd);
1008     xsize = xlen << sbi->l2bsize;
1009     if (xsize < PSIZE) {
1010         xaddr = addressPXD(pxd);
1011         n = xsize >> L2DTSLOTSIZE;
1012         n -= (n + 31) >> L2DTSLOTSIZE;  /* stbl size */
1013         if ((n + sp->header.freecnt) <= split->nslot)
1014             n = xlen + (xlen << 1);
1015         else
1016             n = xlen;
1017 
1018         /* Allocate blocks to quota. */
1019         rc = dquot_alloc_block(ip, n);
1020         if (rc)
1021             goto extendOut;
1022         quota_allocation += n;
1023 
1024         if ((rc = dbReAlloc(sbi->ipbmap, xaddr, (s64) xlen,
1025                     (s64) n, &nxaddr)))
1026             goto extendOut;
1027 
1028         pxdlist.maxnpxd = 1;
1029         pxdlist.npxd = 0;
1030         pxd = &pxdlist.pxd[0];
1031         PXDaddress(pxd, nxaddr);
1032         PXDlength(pxd, xlen + n);
1033         split->pxdlist = &pxdlist;
1034         if ((rc = dtExtendPage(tid, ip, split, btstack))) {
1035             nxaddr = addressPXD(pxd);
1036             if (xaddr != nxaddr) {
1037                 /* free relocated extent */
1038                 xlen = lengthPXD(pxd);
1039                 dbFree(ip, nxaddr, (s64) xlen);
1040             } else {
1041                 /* free extended delta */
1042                 xlen = lengthPXD(pxd) - n;
1043                 xaddr = addressPXD(pxd) + xlen;
1044                 dbFree(ip, xaddr, (s64) n);
1045             }
1046         } else if (!DO_INDEX(ip))
1047             ip->i_size = lengthPXD(pxd) << sbi->l2bsize;
1048 
1049 
1050           extendOut:
1051         DT_PUTPAGE(smp);
1052         goto freeKeyName;
1053     }
1054 
1055     /*
1056      *  split leaf page <sp> into <sp> and a new right page <rp>.
1057      *
1058      * return <rp> pinned and its extent descriptor <rpxd>
1059      */
1060     /*
1061      * allocate new directory page extent and
1062      * new index page(s) to cover page split(s)
1063      *
1064      * allocation hint: ?
1065      */
1066     n = btstack->nsplit;
1067     pxdlist.maxnpxd = pxdlist.npxd = 0;
1068     xlen = sbi->nbperpage;
1069     for (pxd = pxdlist.pxd; n > 0; n--, pxd++) {
1070         if ((rc = dbAlloc(ip, 0, (s64) xlen, &xaddr)) == 0) {
1071             PXDaddress(pxd, xaddr);
1072             PXDlength(pxd, xlen);
1073             pxdlist.maxnpxd++;
1074             continue;
1075         }
1076 
1077         DT_PUTPAGE(smp);
1078 
1079         /* undo allocation */
1080         goto splitOut;
1081     }
1082 
1083     split->pxdlist = &pxdlist;
1084     if ((rc = dtSplitPage(tid, ip, split, &rmp, &rp, &rpxd))) {
1085         DT_PUTPAGE(smp);
1086 
1087         /* undo allocation */
1088         goto splitOut;
1089     }
1090 
1091     if (!DO_INDEX(ip))
1092         ip->i_size += PSIZE;
1093 
1094     /*
1095      * propagate up the router entry for the leaf page just split
1096      *
1097      * insert a router entry for the new page into the parent page,
1098      * propagate the insert/split up the tree by walking back the stack
1099      * of (bn of parent page, index of child page entry in parent page)
1100      * that were traversed during the search for the page that split.
1101      *
1102      * the propagation of insert/split up the tree stops if the root
1103      * splits or the page inserted into doesn't have to split to hold
1104      * the new entry.
1105      *
1106      * the parent entry for the split page remains the same, and
1107      * a new entry is inserted at its right with the first key and
1108      * block number of the new right page.
1109      *
1110      * There are a maximum of 4 pages pinned at any time:
1111      * two children, left parent and right parent (when the parent splits).
1112      * keep the child pages pinned while working on the parent.
1113      * make sure that all pins are released at exit.
1114      */
1115     while ((parent = BT_POP(btstack)) != NULL) {
1116         /* parent page specified by stack frame <parent> */
1117 
1118         /* keep current child pages (<lp>, <rp>) pinned */
1119         lmp = smp;
1120         lp = sp;
1121 
1122         /*
1123          * insert router entry in parent for new right child page <rp>
1124          */
1125         /* get the parent page <sp> */
1126         DT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc);
1127         if (rc) {
1128             DT_PUTPAGE(lmp);
1129             DT_PUTPAGE(rmp);
1130             goto splitOut;
1131         }
1132 
1133         /*
1134          * The new key entry goes ONE AFTER the index of parent entry,
1135          * because the split was to the right.
1136          */
1137         skip = parent->index + 1;
1138 
1139         /*
1140          * compute the key for the router entry
1141          *
1142          * key suffix compression:
1143          * for internal pages that have leaf pages as children,
1144          * retain only what's needed to distinguish between
1145          * the new entry and the entry on the page to its left.
1146          * If the keys compare equal, retain the entire key.
1147          *
1148          * note that compression is performed only at computing
1149          * router key at the lowest internal level.
1150          * further compression of the key between pairs of higher
1151          * level internal pages loses too much information and
1152          * the search may fail.
1153          * (e.g., two adjacent leaf pages of {a, ..., x} {xx, ...,}
1154          * results in two adjacent parent entries (a)(xx).
1155          * if split occurs between these two entries, and
1156          * if compression is applied, the router key of parent entry
1157          * of right page (x) will divert search for x into right
1158          * subtree and miss x in the left subtree.)
1159          *
1160          * the entire key must be retained for the next-to-leftmost
1161          * internal key at any level of the tree, or search may fail
1162          * (e.g., ?)
1163          */
1164         switch (rp->header.flag & BT_TYPE) {
1165         case BT_LEAF:
1166             /*
1167              * compute the length of prefix for suffix compression
1168              * between last entry of left page and first entry
1169              * of right page
1170              */
1171             if ((sp->header.flag & BT_ROOT && skip > 1) ||
1172                 sp->header.prev != 0 || skip > 1) {
1173                 /* compute uppercase router prefix key */
1174                 rc = ciGetLeafPrefixKey(lp,
1175                             lp->header.nextindex-1,
1176                             rp, 0, &key,
1177                             sbi->mntflag);
1178                 if (rc) {
1179                     DT_PUTPAGE(lmp);
1180                     DT_PUTPAGE(rmp);
1181                     DT_PUTPAGE(smp);
1182                     goto splitOut;
1183                 }
1184             } else {
1185                 /* next to leftmost entry of
1186                    lowest internal level */
1187 
1188                 /* compute uppercase router key */
1189                 dtGetKey(rp, 0, &key, sbi->mntflag);
1190                 key.name[key.namlen] = 0;
1191 
1192                 if ((sbi->mntflag & JFS_OS2) == JFS_OS2)
1193                     ciToUpper(&key);
1194             }
1195 
1196             n = NDTINTERNAL(key.namlen);
1197             break;
1198 
1199         case BT_INTERNAL:
1200             dtGetKey(rp, 0, &key, sbi->mntflag);
1201             n = NDTINTERNAL(key.namlen);
1202             break;
1203 
1204         default:
1205             jfs_err("dtSplitUp(): UFO!");
1206             break;
1207         }
1208 
1209         /* unpin left child page */
1210         DT_PUTPAGE(lmp);
1211 
1212         /*
1213          * compute the data for the router entry
1214          */
1215         data->xd = rpxd;    /* child page xd */
1216 
1217         /*
1218          * parent page is full - split the parent page
1219          */
1220         if (n > sp->header.freecnt) {
1221             /* init for parent page split */
1222             split->mp = smp;
1223             split->index = skip;    /* index at insert */
1224             split->nslot = n;
1225             split->key = &key;
1226             /* split->data = data; */
1227 
1228             /* unpin right child page */
1229             DT_PUTPAGE(rmp);
1230 
1231             /* The split routines insert the new entry,
1232              * acquire txLock as appropriate.
1233              * return <rp> pinned and its block number <rbn>.
1234              */
1235             rc = (sp->header.flag & BT_ROOT) ?
1236                 dtSplitRoot(tid, ip, split, &rmp) :
1237                 dtSplitPage(tid, ip, split, &rmp, &rp, &rpxd);
1238             if (rc) {
1239                 DT_PUTPAGE(smp);
1240                 goto splitOut;
1241             }
1242 
1243             /* smp and rmp are pinned */
1244         }
1245         /*
1246          * parent page is not full - insert router entry in parent page
1247          */
1248         else {
1249             BT_MARK_DIRTY(smp, ip);
1250             /*
1251              * acquire a transaction lock on the parent page
1252              */
1253             tlck = txLock(tid, ip, smp, tlckDTREE | tlckENTRY);
1254             dtlck = (struct dt_lock *) & tlck->lock;
1255             ASSERT(dtlck->index == 0);
1256             lv = & dtlck->lv[0];
1257 
1258             /* linelock header */
1259             lv->offset = 0;
1260             lv->length = 1;
1261             dtlck->index++;
1262 
1263             /* linelock stbl of non-root parent page */
1264             if (!(sp->header.flag & BT_ROOT)) {
1265                 lv++;
1266                 n = skip >> L2DTSLOTSIZE;
1267                 lv->offset = sp->header.stblindex + n;
1268                 lv->length =
1269                     ((sp->header.nextindex -
1270                       1) >> L2DTSLOTSIZE) - n + 1;
1271                 dtlck->index++;
1272             }
1273 
1274             dtInsertEntry(sp, skip, &key, data, &dtlck);
1275 
1276             /* exit propagate up */
1277             break;
1278         }
1279     }
1280 
1281     /* unpin current split and its right page */
1282     DT_PUTPAGE(smp);
1283     DT_PUTPAGE(rmp);
1284 
1285     /*
1286      * free remaining extents allocated for split
1287      */
1288       splitOut:
1289     n = pxdlist.npxd;
1290     pxd = &pxdlist.pxd[n];
1291     for (; n < pxdlist.maxnpxd; n++, pxd++)
1292         dbFree(ip, addressPXD(pxd), (s64) lengthPXD(pxd));
1293 
1294       freeKeyName:
1295     kfree(key.name);
1296 
1297     /* Rollback quota allocation */
1298     if (rc && quota_allocation)
1299         dquot_free_block(ip, quota_allocation);
1300 
1301       dtSplitUp_Exit:
1302 
1303     return rc;
1304 }
1305 
1306 
1307 /*
1308  *  dtSplitPage()
1309  *
1310  * function: Split a non-root page of a btree.
1311  *
1312  * parameter:
1313  *
1314  * return: 0 - success;
1315  *     errno - failure;
1316  *  return split and new page pinned;
1317  */
1318 static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
1319         struct metapage ** rmpp, dtpage_t ** rpp, pxd_t * rpxdp)
1320 {
1321     int rc = 0;
1322     struct metapage *smp;
1323     dtpage_t *sp;
1324     struct metapage *rmp;
1325     dtpage_t *rp;       /* new right page allocated */
1326     s64 rbn;        /* new right page block number */
1327     struct metapage *mp;
1328     dtpage_t *p;
1329     s64 nextbn;
1330     struct pxdlist *pxdlist;
1331     pxd_t *pxd;
1332     int skip, nextindex, half, left, nxt, off, si;
1333     struct ldtentry *ldtentry;
1334     struct idtentry *idtentry;
1335     u8 *stbl;
1336     struct dtslot *f;
1337     int fsi, stblsize;
1338     int n;
1339     struct dt_lock *sdtlck, *rdtlck;
1340     struct tlock *tlck;
1341     struct dt_lock *dtlck;
1342     struct lv *slv, *rlv, *lv;
1343 
1344     /* get split page */
1345     smp = split->mp;
1346     sp = DT_PAGE(ip, smp);
1347 
1348     /*
1349      * allocate the new right page for the split
1350      */
1351     pxdlist = split->pxdlist;
1352     pxd = &pxdlist->pxd[pxdlist->npxd];
1353     pxdlist->npxd++;
1354     rbn = addressPXD(pxd);
1355     rmp = get_metapage(ip, rbn, PSIZE, 1);
1356     if (rmp == NULL)
1357         return -EIO;
1358 
1359     /* Allocate blocks to quota. */
1360     rc = dquot_alloc_block(ip, lengthPXD(pxd));
1361     if (rc) {
1362         release_metapage(rmp);
1363         return rc;
1364     }
1365 
1366     jfs_info("dtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp);
1367 
1368     BT_MARK_DIRTY(rmp, ip);
1369     /*
1370      * acquire a transaction lock on the new right page
1371      */
1372     tlck = txLock(tid, ip, rmp, tlckDTREE | tlckNEW);
1373     rdtlck = (struct dt_lock *) & tlck->lock;
1374 
1375     rp = (dtpage_t *) rmp->data;
1376     *rpp = rp;
1377     rp->header.self = *pxd;
1378 
1379     BT_MARK_DIRTY(smp, ip);
1380     /*
1381      * acquire a transaction lock on the split page
1382      *
1383      * action:
1384      */
1385     tlck = txLock(tid, ip, smp, tlckDTREE | tlckENTRY);
1386     sdtlck = (struct dt_lock *) & tlck->lock;
1387 
1388     /* linelock header of split page */
1389     ASSERT(sdtlck->index == 0);
1390     slv = & sdtlck->lv[0];
1391     slv->offset = 0;
1392     slv->length = 1;
1393     sdtlck->index++;
1394 
1395     /*
1396      * initialize/update sibling pointers between sp and rp
1397      */
1398     nextbn = le64_to_cpu(sp->header.next);
1399     rp->header.next = cpu_to_le64(nextbn);
1400     rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self));
1401     sp->header.next = cpu_to_le64(rbn);
1402 
1403     /*
1404      * initialize new right page
1405      */
1406     rp->header.flag = sp->header.flag;
1407 
1408     /* compute sorted entry table at start of extent data area */
1409     rp->header.nextindex = 0;
1410     rp->header.stblindex = 1;
1411 
1412     n = PSIZE >> L2DTSLOTSIZE;
1413     rp->header.maxslot = n;
1414     stblsize = (n + 31) >> L2DTSLOTSIZE;    /* in unit of slot */
1415 
1416     /* init freelist */
1417     fsi = rp->header.stblindex + stblsize;
1418     rp->header.freelist = fsi;
1419     rp->header.freecnt = rp->header.maxslot - fsi;
1420 
1421     /*
1422      *  sequential append at tail: append without split
1423      *
1424      * If splitting the last page on a level because of appending
1425      * a entry to it (skip is maxentry), it's likely that the access is
1426      * sequential. Adding an empty page on the side of the level is less
1427      * work and can push the fill factor much higher than normal.
1428      * If we're wrong it's no big deal, we'll just do the split the right
1429      * way next time.
1430      * (It may look like it's equally easy to do a similar hack for
1431      * reverse sorted data, that is, split the tree left,
1432      * but it's not. Be my guest.)
1433      */
1434     if (nextbn == 0 && split->index == sp->header.nextindex) {
1435         /* linelock header + stbl (first slot) of new page */
1436         rlv = & rdtlck->lv[rdtlck->index];
1437         rlv->offset = 0;
1438         rlv->length = 2;
1439         rdtlck->index++;
1440 
1441         /*
1442          * initialize freelist of new right page
1443          */
1444         f = &rp->slot[fsi];
1445         for (fsi++; fsi < rp->header.maxslot; f++, fsi++)
1446             f->next = fsi;
1447         f->next = -1;
1448 
1449         /* insert entry at the first entry of the new right page */
1450         dtInsertEntry(rp, 0, split->key, split->data, &rdtlck);
1451 
1452         goto out;
1453     }
1454 
1455     /*
1456      *  non-sequential insert (at possibly middle page)
1457      */
1458 
1459     /*
1460      * update prev pointer of previous right sibling page;
1461      */
1462     if (nextbn != 0) {
1463         DT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
1464         if (rc) {
1465             discard_metapage(rmp);
1466             return rc;
1467         }
1468 
1469         BT_MARK_DIRTY(mp, ip);
1470         /*
1471          * acquire a transaction lock on the next page
1472          */
1473         tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK);
1474         jfs_info("dtSplitPage: tlck = 0x%p, ip = 0x%p, mp=0x%p",
1475             tlck, ip, mp);
1476         dtlck = (struct dt_lock *) & tlck->lock;
1477 
1478         /* linelock header of previous right sibling page */
1479         lv = & dtlck->lv[dtlck->index];
1480         lv->offset = 0;
1481         lv->length = 1;
1482         dtlck->index++;
1483 
1484         p->header.prev = cpu_to_le64(rbn);
1485 
1486         DT_PUTPAGE(mp);
1487     }
1488 
1489     /*
1490      * split the data between the split and right pages.
1491      */
1492     skip = split->index;
1493     half = (PSIZE >> L2DTSLOTSIZE) >> 1;    /* swag */
1494     left = 0;
1495 
1496     /*
1497      *  compute fill factor for split pages
1498      *
1499      * <nxt> traces the next entry to move to rp
1500      * <off> traces the next entry to stay in sp
1501      */
1502     stbl = (u8 *) & sp->slot[sp->header.stblindex];
1503     nextindex = sp->header.nextindex;
1504     for (nxt = off = 0; nxt < nextindex; ++off) {
1505         if (off == skip)
1506             /* check for fill factor with new entry size */
1507             n = split->nslot;
1508         else {
1509             si = stbl[nxt];
1510             switch (sp->header.flag & BT_TYPE) {
1511             case BT_LEAF:
1512                 ldtentry = (struct ldtentry *) & sp->slot[si];
1513                 if (DO_INDEX(ip))
1514                     n = NDTLEAF(ldtentry->namlen);
1515                 else
1516                     n = NDTLEAF_LEGACY(ldtentry->
1517                                namlen);
1518                 break;
1519 
1520             case BT_INTERNAL:
1521                 idtentry = (struct idtentry *) & sp->slot[si];
1522                 n = NDTINTERNAL(idtentry->namlen);
1523                 break;
1524 
1525             default:
1526                 break;
1527             }
1528 
1529             ++nxt;  /* advance to next entry to move in sp */
1530         }
1531 
1532         left += n;
1533         if (left >= half)
1534             break;
1535     }
1536 
1537     /* <nxt> poins to the 1st entry to move */
1538 
1539     /*
1540      *  move entries to right page
1541      *
1542      * dtMoveEntry() initializes rp and reserves entry for insertion
1543      *
1544      * split page moved out entries are linelocked;
1545      * new/right page moved in entries are linelocked;
1546      */
1547     /* linelock header + stbl of new right page */
1548     rlv = & rdtlck->lv[rdtlck->index];
1549     rlv->offset = 0;
1550     rlv->length = 5;
1551     rdtlck->index++;
1552 
1553     dtMoveEntry(sp, nxt, rp, &sdtlck, &rdtlck, DO_INDEX(ip));
1554 
1555     sp->header.nextindex = nxt;
1556 
1557     /*
1558      * finalize freelist of new right page
1559      */
1560     fsi = rp->header.freelist;
1561     f = &rp->slot[fsi];
1562     for (fsi++; fsi < rp->header.maxslot; f++, fsi++)
1563         f->next = fsi;
1564     f->next = -1;
1565 
1566     /*
1567      * Update directory index table for entries now in right page
1568      */
1569     if ((rp->header.flag & BT_LEAF) && DO_INDEX(ip)) {
1570         s64 lblock;
1571 
1572         mp = NULL;
1573         stbl = DT_GETSTBL(rp);
1574         for (n = 0; n < rp->header.nextindex; n++) {
1575             ldtentry = (struct ldtentry *) & rp->slot[stbl[n]];
1576             modify_index(tid, ip, le32_to_cpu(ldtentry->index),
1577                      rbn, n, &mp, &lblock);
1578         }
1579         if (mp)
1580             release_metapage(mp);
1581     }
1582 
1583     /*
1584      * the skipped index was on the left page,
1585      */
1586     if (skip <= off) {
1587         /* insert the new entry in the split page */
1588         dtInsertEntry(sp, skip, split->key, split->data, &sdtlck);
1589 
1590         /* linelock stbl of split page */
1591         if (sdtlck->index >= sdtlck->maxcnt)
1592             sdtlck = (struct dt_lock *) txLinelock(sdtlck);
1593         slv = & sdtlck->lv[sdtlck->index];
1594         n = skip >> L2DTSLOTSIZE;
1595         slv->offset = sp->header.stblindex + n;
1596         slv->length =
1597             ((sp->header.nextindex - 1) >> L2DTSLOTSIZE) - n + 1;
1598         sdtlck->index++;
1599     }
1600     /*
1601      * the skipped index was on the right page,
1602      */
1603     else {
1604         /* adjust the skip index to reflect the new position */
1605         skip -= nxt;
1606 
1607         /* insert the new entry in the right page */
1608         dtInsertEntry(rp, skip, split->key, split->data, &rdtlck);
1609     }
1610 
1611       out:
1612     *rmpp = rmp;
1613     *rpxdp = *pxd;
1614 
1615     return rc;
1616 }
1617 
1618 
1619 /*
1620  *  dtExtendPage()
1621  *
1622  * function: extend 1st/only directory leaf page
1623  *
1624  * parameter:
1625  *
1626  * return: 0 - success;
1627  *     errno - failure;
1628  *  return extended page pinned;
1629  */
1630 static int dtExtendPage(tid_t tid,
1631          struct inode *ip, struct dtsplit * split, struct btstack * btstack)
1632 {
1633     struct super_block *sb = ip->i_sb;
1634     int rc;
1635     struct metapage *smp, *pmp, *mp;
1636     dtpage_t *sp, *pp;
1637     struct pxdlist *pxdlist;
1638     pxd_t *pxd, *tpxd;
1639     int xlen, xsize;
1640     int newstblindex, newstblsize;
1641     int oldstblindex, oldstblsize;
1642     int fsi, last;
1643     struct dtslot *f;
1644     struct btframe *parent;
1645     int n;
1646     struct dt_lock *dtlck;
1647     s64 xaddr, txaddr;
1648     struct tlock *tlck;
1649     struct pxd_lock *pxdlock;
1650     struct lv *lv;
1651     uint type;
1652     struct ldtentry *ldtentry;
1653     u8 *stbl;
1654 
1655     /* get page to extend */
1656     smp = split->mp;
1657     sp = DT_PAGE(ip, smp);
1658 
1659     /* get parent/root page */
1660     parent = BT_POP(btstack);
1661     DT_GETPAGE(ip, parent->bn, pmp, PSIZE, pp, rc);
1662     if (rc)
1663         return (rc);
1664 
1665     /*
1666      *  extend the extent
1667      */
1668     pxdlist = split->pxdlist;
1669     pxd = &pxdlist->pxd[pxdlist->npxd];
1670     pxdlist->npxd++;
1671 
1672     xaddr = addressPXD(pxd);
1673     tpxd = &sp->header.self;
1674     txaddr = addressPXD(tpxd);
1675     /* in-place extension */
1676     if (xaddr == txaddr) {
1677         type = tlckEXTEND;
1678     }
1679     /* relocation */
1680     else {
1681         type = tlckNEW;
1682 
1683         /* save moved extent descriptor for later free */
1684         tlck = txMaplock(tid, ip, tlckDTREE | tlckRELOCATE);
1685         pxdlock = (struct pxd_lock *) & tlck->lock;
1686         pxdlock->flag = mlckFREEPXD;
1687         pxdlock->pxd = sp->header.self;
1688         pxdlock->index = 1;
1689 
1690         /*
1691          * Update directory index table to reflect new page address
1692          */
1693         if (DO_INDEX(ip)) {
1694             s64 lblock;
1695 
1696             mp = NULL;
1697             stbl = DT_GETSTBL(sp);
1698             for (n = 0; n < sp->header.nextindex; n++) {
1699                 ldtentry =
1700                     (struct ldtentry *) & sp->slot[stbl[n]];
1701                 modify_index(tid, ip,
1702                          le32_to_cpu(ldtentry->index),
1703                          xaddr, n, &mp, &lblock);
1704             }
1705             if (mp)
1706                 release_metapage(mp);
1707         }
1708     }
1709 
1710     /*
1711      *  extend the page
1712      */
1713     sp->header.self = *pxd;
1714 
1715     jfs_info("dtExtendPage: ip:0x%p smp:0x%p sp:0x%p", ip, smp, sp);
1716 
1717     BT_MARK_DIRTY(smp, ip);
1718     /*
1719      * acquire a transaction lock on the extended/leaf page
1720      */
1721     tlck = txLock(tid, ip, smp, tlckDTREE | type);
1722     dtlck = (struct dt_lock *) & tlck->lock;
1723     lv = & dtlck->lv[0];
1724 
1725     /* update buffer extent descriptor of extended page */
1726     xlen = lengthPXD(pxd);
1727     xsize = xlen << JFS_SBI(sb)->l2bsize;
1728 
1729     /*
1730      * copy old stbl to new stbl at start of extended area
1731      */
1732     oldstblindex = sp->header.stblindex;
1733     oldstblsize = (sp->header.maxslot + 31) >> L2DTSLOTSIZE;
1734     newstblindex = sp->header.maxslot;
1735     n = xsize >> L2DTSLOTSIZE;
1736     newstblsize = (n + 31) >> L2DTSLOTSIZE;
1737     memcpy(&sp->slot[newstblindex], &sp->slot[oldstblindex],
1738            sp->header.nextindex);
1739 
1740     /*
1741      * in-line extension: linelock old area of extended page
1742      */
1743     if (type == tlckEXTEND) {
1744         /* linelock header */
1745         lv->offset = 0;
1746         lv->length = 1;
1747         dtlck->index++;
1748         lv++;
1749 
1750         /* linelock new stbl of extended page */
1751         lv->offset = newstblindex;
1752         lv->length = newstblsize;
1753     }
1754     /*
1755      * relocation: linelock whole relocated area
1756      */
1757     else {
1758         lv->offset = 0;
1759         lv->length = sp->header.maxslot + newstblsize;
1760     }
1761 
1762     dtlck->index++;
1763 
1764     sp->header.maxslot = n;
1765     sp->header.stblindex = newstblindex;
1766     /* sp->header.nextindex remains the same */
1767 
1768     /*
1769      * add old stbl region at head of freelist
1770      */
1771     fsi = oldstblindex;
1772     f = &sp->slot[fsi];
1773     last = sp->header.freelist;
1774     for (n = 0; n < oldstblsize; n++, fsi++, f++) {
1775         f->next = last;
1776         last = fsi;
1777     }
1778     sp->header.freelist = last;
1779     sp->header.freecnt += oldstblsize;
1780 
1781     /*
1782      * append free region of newly extended area at tail of freelist
1783      */
1784     /* init free region of newly extended area */
1785     fsi = n = newstblindex + newstblsize;
1786     f = &sp->slot[fsi];
1787     for (fsi++; fsi < sp->header.maxslot; f++, fsi++)
1788         f->next = fsi;
1789     f->next = -1;
1790 
1791     /* append new free region at tail of old freelist */
1792     fsi = sp->header.freelist;
1793     if (fsi == -1)
1794         sp->header.freelist = n;
1795     else {
1796         do {
1797             f = &sp->slot[fsi];
1798             fsi = f->next;
1799         } while (fsi != -1);
1800 
1801         f->next = n;
1802     }
1803 
1804     sp->header.freecnt += sp->header.maxslot - n;
1805 
1806     /*
1807      * insert the new entry
1808      */
1809     dtInsertEntry(sp, split->index, split->key, split->data, &dtlck);
1810 
1811     BT_MARK_DIRTY(pmp, ip);
1812     /*
1813      * linelock any freeslots residing in old extent
1814      */
1815     if (type == tlckEXTEND) {
1816         n = sp->header.maxslot >> 2;
1817         if (sp->header.freelist < n)
1818             dtLinelockFreelist(sp, n, &dtlck);
1819     }
1820 
1821     /*
1822      *  update parent entry on the parent/root page
1823      */
1824     /*
1825      * acquire a transaction lock on the parent/root page
1826      */
1827     tlck = txLock(tid, ip, pmp, tlckDTREE | tlckENTRY);
1828     dtlck = (struct dt_lock *) & tlck->lock;
1829     lv = & dtlck->lv[dtlck->index];
1830 
1831     /* linelock parent entry - 1st slot */
1832     lv->offset = 1;
1833     lv->length = 1;
1834     dtlck->index++;
1835 
1836     /* update the parent pxd for page extension */
1837     tpxd = (pxd_t *) & pp->slot[1];
1838     *tpxd = *pxd;
1839 
1840     DT_PUTPAGE(pmp);
1841     return 0;
1842 }
1843 
1844 
1845 /*
1846  *  dtSplitRoot()
1847  *
1848  * function:
1849  *  split the full root page into
1850  *  original/root/split page and new right page
1851  *  i.e., root remains fixed in tree anchor (inode) and
1852  *  the root is copied to a single new right child page
1853  *  since root page << non-root page, and
1854  *  the split root page contains a single entry for the
1855  *  new right child page.
1856  *
1857  * parameter:
1858  *
1859  * return: 0 - success;
1860  *     errno - failure;
1861  *  return new page pinned;
1862  */
1863 static int dtSplitRoot(tid_t tid,
1864         struct inode *ip, struct dtsplit * split, struct metapage ** rmpp)
1865 {
1866     struct super_block *sb = ip->i_sb;
1867     struct metapage *smp;
1868     dtroot_t *sp;
1869     struct metapage *rmp;
1870     dtpage_t *rp;
1871     s64 rbn;
1872     int xlen;
1873     int xsize;
1874     struct dtslot *f;
1875     s8 *stbl;
1876     int fsi, stblsize, n;
1877     struct idtentry *s;
1878     pxd_t *ppxd;
1879     struct pxdlist *pxdlist;
1880     pxd_t *pxd;
1881     struct dt_lock *dtlck;
1882     struct tlock *tlck;
1883     struct lv *lv;
1884     int rc;
1885 
1886     /* get split root page */
1887     smp = split->mp;
1888     sp = &JFS_IP(ip)->i_dtroot;
1889 
1890     /*
1891      *  allocate/initialize a single (right) child page
1892      *
1893      * N.B. at first split, a one (or two) block to fit new entry
1894      * is allocated; at subsequent split, a full page is allocated;
1895      */
1896     pxdlist = split->pxdlist;
1897     pxd = &pxdlist->pxd[pxdlist->npxd];
1898     pxdlist->npxd++;
1899     rbn = addressPXD(pxd);
1900     xlen = lengthPXD(pxd);
1901     xsize = xlen << JFS_SBI(sb)->l2bsize;
1902     rmp = get_metapage(ip, rbn, xsize, 1);
1903     if (!rmp)
1904         return -EIO;
1905 
1906     rp = rmp->data;
1907 
1908     /* Allocate blocks to quota. */
1909     rc = dquot_alloc_block(ip, lengthPXD(pxd));
1910     if (rc) {
1911         release_metapage(rmp);
1912         return rc;
1913     }
1914 
1915     BT_MARK_DIRTY(rmp, ip);
1916     /*
1917      * acquire a transaction lock on the new right page
1918      */
1919     tlck = txLock(tid, ip, rmp, tlckDTREE | tlckNEW);
1920     dtlck = (struct dt_lock *) & tlck->lock;
1921 
1922     rp->header.flag =
1923         (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL;
1924     rp->header.self = *pxd;
1925 
1926     /* initialize sibling pointers */
1927     rp->header.next = 0;
1928     rp->header.prev = 0;
1929 
1930     /*
1931      *  move in-line root page into new right page extent
1932      */
1933     /* linelock header + copied entries + new stbl (1st slot) in new page */
1934     ASSERT(dtlck->index == 0);
1935     lv = & dtlck->lv[0];
1936     lv->offset = 0;
1937     lv->length = 10;    /* 1 + 8 + 1 */
1938     dtlck->index++;
1939 
1940     n = xsize >> L2DTSLOTSIZE;
1941     rp->header.maxslot = n;
1942     stblsize = (n + 31) >> L2DTSLOTSIZE;
1943 
1944     /* copy old stbl to new stbl at start of extended area */
1945     rp->header.stblindex = DTROOTMAXSLOT;
1946     stbl = (s8 *) & rp->slot[DTROOTMAXSLOT];
1947     memcpy(stbl, sp->header.stbl, sp->header.nextindex);
1948     rp->header.nextindex = sp->header.nextindex;
1949 
1950     /* copy old data area to start of new data area */
1951     memcpy(&rp->slot[1], &sp->slot[1], IDATASIZE);
1952 
1953     /*
1954      * append free region of newly extended area at tail of freelist
1955      */
1956     /* init free region of newly extended area */
1957     fsi = n = DTROOTMAXSLOT + stblsize;
1958     f = &rp->slot[fsi];
1959     for (fsi++; fsi < rp->header.maxslot; f++, fsi++)
1960         f->next = fsi;
1961     f->next = -1;
1962 
1963     /* append new free region at tail of old freelist */
1964     fsi = sp->header.freelist;
1965     if (fsi == -1)
1966         rp->header.freelist = n;
1967     else {
1968         rp->header.freelist = fsi;
1969 
1970         do {
1971             f = &rp->slot[fsi];
1972             fsi = f->next;
1973         } while (fsi != -1);
1974 
1975         f->next = n;
1976     }
1977 
1978     rp->header.freecnt = sp->header.freecnt + rp->header.maxslot - n;
1979 
1980     /*
1981      * Update directory index table for entries now in right page
1982      */
1983     if ((rp->header.flag & BT_LEAF) && DO_INDEX(ip)) {
1984         s64 lblock;
1985         struct metapage *mp = NULL;
1986         struct ldtentry *ldtentry;
1987 
1988         stbl = DT_GETSTBL(rp);
1989         for (n = 0; n < rp->header.nextindex; n++) {
1990             ldtentry = (struct ldtentry *) & rp->slot[stbl[n]];
1991             modify_index(tid, ip, le32_to_cpu(ldtentry->index),
1992                      rbn, n, &mp, &lblock);
1993         }
1994         if (mp)
1995             release_metapage(mp);
1996     }
1997     /*
1998      * insert the new entry into the new right/child page
1999      * (skip index in the new right page will not change)
2000      */
2001     dtInsertEntry(rp, split->index, split->key, split->data, &dtlck);
2002 
2003     /*
2004      *  reset parent/root page
2005      *
2006      * set the 1st entry offset to 0, which force the left-most key
2007      * at any level of the tree to be less than any search key.
2008      *
2009      * The btree comparison code guarantees that the left-most key on any
2010      * level of the tree is never used, so it doesn't need to be filled in.
2011      */
2012     BT_MARK_DIRTY(smp, ip);
2013     /*
2014      * acquire a transaction lock on the root page (in-memory inode)
2015      */
2016     tlck = txLock(tid, ip, smp, tlckDTREE | tlckNEW | tlckBTROOT);
2017     dtlck = (struct dt_lock *) & tlck->lock;
2018 
2019     /* linelock root */
2020     ASSERT(dtlck->index == 0);
2021     lv = & dtlck->lv[0];
2022     lv->offset = 0;
2023     lv->length = DTROOTMAXSLOT;
2024     dtlck->index++;
2025 
2026     /* update page header of root */
2027     if (sp->header.flag & BT_LEAF) {
2028         sp->header.flag &= ~BT_LEAF;
2029         sp->header.flag |= BT_INTERNAL;
2030     }
2031 
2032     /* init the first entry */
2033     s = (struct idtentry *) & sp->slot[DTENTRYSTART];
2034     ppxd = (pxd_t *) s;
2035     *ppxd = *pxd;
2036     s->next = -1;
2037     s->namlen = 0;
2038 
2039     stbl = sp->header.stbl;
2040     stbl[0] = DTENTRYSTART;
2041     sp->header.nextindex = 1;
2042 
2043     /* init freelist */
2044     fsi = DTENTRYSTART + 1;
2045     f = &sp->slot[fsi];
2046 
2047     /* init free region of remaining area */
2048     for (fsi++; fsi < DTROOTMAXSLOT; f++, fsi++)
2049         f->next = fsi;
2050     f->next = -1;
2051 
2052     sp->header.freelist = DTENTRYSTART + 1;
2053     sp->header.freecnt = DTROOTMAXSLOT - (DTENTRYSTART + 1);
2054 
2055     *rmpp = rmp;
2056 
2057     return 0;
2058 }
2059 
2060 
2061 /*
2062  *  dtDelete()
2063  *
2064  * function: delete the entry(s) referenced by a key.
2065  *
2066  * parameter:
2067  *
2068  * return:
2069  */
2070 int dtDelete(tid_t tid,
2071      struct inode *ip, struct component_name * key, ino_t * ino, int flag)
2072 {
2073     int rc = 0;
2074     s64 bn;
2075     struct metapage *mp, *imp;
2076     dtpage_t *p;
2077     int index;
2078     struct btstack btstack;
2079     struct dt_lock *dtlck;
2080     struct tlock *tlck;
2081     struct lv *lv;
2082     int i;
2083     struct ldtentry *ldtentry;
2084     u8 *stbl;
2085     u32 table_index, next_index;
2086     struct metapage *nmp;
2087     dtpage_t *np;
2088 
2089     /*
2090      *  search for the entry to delete:
2091      *
2092      * dtSearch() returns (leaf page pinned, index at which to delete).
2093      */
2094     if ((rc = dtSearch(ip, key, ino, &btstack, flag)))
2095         return rc;
2096 
2097     /* retrieve search result */
2098     DT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
2099 
2100     /*
2101      * We need to find put the index of the next entry into the
2102      * directory index table in order to resume a readdir from this
2103      * entry.
2104      */
2105     if (DO_INDEX(ip)) {
2106         stbl = DT_GETSTBL(p);
2107         ldtentry = (struct ldtentry *) & p->slot[stbl[index]];
2108         table_index = le32_to_cpu(ldtentry->index);
2109         if (index == (p->header.nextindex - 1)) {
2110             /*
2111              * Last entry in this leaf page
2112              */
2113             if ((p->header.flag & BT_ROOT)
2114                 || (p->header.next == 0))
2115                 next_index = -1;
2116             else {
2117                 /* Read next leaf page */
2118                 DT_GETPAGE(ip, le64_to_cpu(p->header.next),
2119                        nmp, PSIZE, np, rc);
2120                 if (rc)
2121                     next_index = -1;
2122                 else {
2123                     stbl = DT_GETSTBL(np);
2124                     ldtentry =
2125                         (struct ldtentry *) & np->
2126                         slot[stbl[0]];
2127                     next_index =
2128                         le32_to_cpu(ldtentry->index);
2129                     DT_PUTPAGE(nmp);
2130                 }
2131             }
2132         } else {
2133             ldtentry =
2134                 (struct ldtentry *) & p->slot[stbl[index + 1]];
2135             next_index = le32_to_cpu(ldtentry->index);
2136         }
2137         free_index(tid, ip, table_index, next_index);
2138     }
2139     /*
2140      * the leaf page becomes empty, delete the page
2141      */
2142     if (p->header.nextindex == 1) {
2143         /* delete empty page */
2144         rc = dtDeleteUp(tid, ip, mp, p, &btstack);
2145     }
2146     /*
2147      * the leaf page has other entries remaining:
2148      *
2149      * delete the entry from the leaf page.
2150      */
2151     else {
2152         BT_MARK_DIRTY(mp, ip);
2153         /*
2154          * acquire a transaction lock on the leaf page
2155          */
2156         tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
2157         dtlck = (struct dt_lock *) & tlck->lock;
2158 
2159         /*
2160          * Do not assume that dtlck->index will be zero.  During a
2161          * rename within a directory, this transaction may have
2162          * modified this page already when adding the new entry.
2163          */
2164 
2165         /* linelock header */
2166         if (dtlck->index >= dtlck->maxcnt)
2167             dtlck = (struct dt_lock *) txLinelock(dtlck);
2168         lv = & dtlck->lv[dtlck->index];
2169         lv->offset = 0;
2170         lv->length = 1;
2171         dtlck->index++;
2172 
2173         /* linelock stbl of non-root leaf page */
2174         if (!(p->header.flag & BT_ROOT)) {
2175             if (dtlck->index >= dtlck->maxcnt)
2176                 dtlck = (struct dt_lock *) txLinelock(dtlck);
2177             lv = & dtlck->lv[dtlck->index];
2178             i = index >> L2DTSLOTSIZE;
2179             lv->offset = p->header.stblindex + i;
2180             lv->length =
2181                 ((p->header.nextindex - 1) >> L2DTSLOTSIZE) -
2182                 i + 1;
2183             dtlck->index++;
2184         }
2185 
2186         /* free the leaf entry */
2187         dtDeleteEntry(p, index, &dtlck);
2188 
2189         /*
2190          * Update directory index table for entries moved in stbl
2191          */
2192         if (DO_INDEX(ip) && index < p->header.nextindex) {
2193             s64 lblock;
2194 
2195             imp = NULL;
2196             stbl = DT_GETSTBL(p);
2197             for (i = index; i < p->header.nextindex; i++) {
2198                 ldtentry =
2199                     (struct ldtentry *) & p->slot[stbl[i]];
2200                 modify_index(tid, ip,
2201                          le32_to_cpu(ldtentry->index),
2202                          bn, i, &imp, &lblock);
2203             }
2204             if (imp)
2205                 release_metapage(imp);
2206         }
2207 
2208         DT_PUTPAGE(mp);
2209     }
2210 
2211     return rc;
2212 }
2213 
2214 
2215 /*
2216  *  dtDeleteUp()
2217  *
2218  * function:
2219  *  free empty pages as propagating deletion up the tree
2220  *
2221  * parameter:
2222  *
2223  * return:
2224  */
2225 static int dtDeleteUp(tid_t tid, struct inode *ip,
2226        struct metapage * fmp, dtpage_t * fp, struct btstack * btstack)
2227 {
2228     int rc = 0;
2229     struct metapage *mp;
2230     dtpage_t *p;
2231     int index, nextindex;
2232     int xlen;
2233     struct btframe *parent;
2234     struct dt_lock *dtlck;
2235     struct tlock *tlck;
2236     struct lv *lv;
2237     struct pxd_lock *pxdlock;
2238     int i;
2239 
2240     /*
2241      *  keep the root leaf page which has become empty
2242      */
2243     if (BT_IS_ROOT(fmp)) {
2244         /*
2245          * reset the root
2246          *
2247          * dtInitRoot() acquires txlock on the root
2248          */
2249         dtInitRoot(tid, ip, PARENT(ip));
2250 
2251         DT_PUTPAGE(fmp);
2252 
2253         return 0;
2254     }
2255 
2256     /*
2257      *  free the non-root leaf page
2258      */
2259     /*
2260      * acquire a transaction lock on the page
2261      *
2262      * write FREEXTENT|NOREDOPAGE log record
2263      * N.B. linelock is overlaid as freed extent descriptor, and
2264      * the buffer page is freed;
2265      */
2266     tlck = txMaplock(tid, ip, tlckDTREE | tlckFREE);
2267     pxdlock = (struct pxd_lock *) & tlck->lock;
2268     pxdlock->flag = mlckFREEPXD;
2269     pxdlock->pxd = fp->header.self;
2270     pxdlock->index = 1;
2271 
2272     /* update sibling pointers */
2273     if ((rc = dtRelink(tid, ip, fp))) {
2274         BT_PUTPAGE(fmp);
2275         return rc;
2276     }
2277 
2278     xlen = lengthPXD(&fp->header.self);
2279 
2280     /* Free quota allocation. */
2281     dquot_free_block(ip, xlen);
2282 
2283     /* free/invalidate its buffer page */
2284     discard_metapage(fmp);
2285 
2286     /*
2287      *  propagate page deletion up the directory tree
2288      *
2289      * If the delete from the parent page makes it empty,
2290      * continue all the way up the tree.
2291      * stop if the root page is reached (which is never deleted) or
2292      * if the entry deletion does not empty the page.
2293      */
2294     while ((parent = BT_POP(btstack)) != NULL) {
2295         /* pin the parent page <sp> */
2296         DT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc);
2297         if (rc)
2298             return rc;
2299 
2300         /*
2301          * free the extent of the child page deleted
2302          */
2303         index = parent->index;
2304 
2305         /*
2306          * delete the entry for the child page from parent
2307          */
2308         nextindex = p->header.nextindex;
2309 
2310         /*
2311          * the parent has the single entry being deleted:
2312          *
2313          * free the parent page which has become empty.
2314          */
2315         if (nextindex == 1) {
2316             /*
2317              * keep the root internal page which has become empty
2318              */
2319             if (p->header.flag & BT_ROOT) {
2320                 /*
2321                  * reset the root
2322                  *
2323                  * dtInitRoot() acquires txlock on the root
2324                  */
2325                 dtInitRoot(tid, ip, PARENT(ip));
2326 
2327                 DT_PUTPAGE(mp);
2328 
2329                 return 0;
2330             }
2331             /*
2332              * free the parent page
2333              */
2334             else {
2335                 /*
2336                  * acquire a transaction lock on the page
2337                  *
2338                  * write FREEXTENT|NOREDOPAGE log record
2339                  */
2340                 tlck =
2341                     txMaplock(tid, ip,
2342                           tlckDTREE | tlckFREE);
2343                 pxdlock = (struct pxd_lock *) & tlck->lock;
2344                 pxdlock->flag = mlckFREEPXD;
2345                 pxdlock->pxd = p->header.self;
2346                 pxdlock->index = 1;
2347 
2348                 /* update sibling pointers */
2349                 if ((rc = dtRelink(tid, ip, p))) {
2350                     DT_PUTPAGE(mp);
2351                     return rc;
2352                 }
2353 
2354                 xlen = lengthPXD(&p->header.self);
2355 
2356                 /* Free quota allocation */
2357                 dquot_free_block(ip, xlen);
2358 
2359                 /* free/invalidate its buffer page */
2360                 discard_metapage(mp);
2361 
2362                 /* propagate up */
2363                 continue;
2364             }
2365         }
2366 
2367         /*
2368          * the parent has other entries remaining:
2369          *
2370          * delete the router entry from the parent page.
2371          */
2372         BT_MARK_DIRTY(mp, ip);
2373         /*
2374          * acquire a transaction lock on the page
2375          *
2376          * action: router entry deletion
2377          */
2378         tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
2379         dtlck = (struct dt_lock *) & tlck->lock;
2380 
2381         /* linelock header */
2382         if (dtlck->index >= dtlck->maxcnt)
2383             dtlck = (struct dt_lock *) txLinelock(dtlck);
2384         lv = & dtlck->lv[dtlck->index];
2385         lv->offset = 0;
2386         lv->length = 1;
2387         dtlck->index++;
2388 
2389         /* linelock stbl of non-root leaf page */
2390         if (!(p->header.flag & BT_ROOT)) {
2391             if (dtlck->index < dtlck->maxcnt)
2392                 lv++;
2393             else {
2394                 dtlck = (struct dt_lock *) txLinelock(dtlck);
2395                 lv = & dtlck->lv[0];
2396             }
2397             i = index >> L2DTSLOTSIZE;
2398             lv->offset = p->header.stblindex + i;
2399             lv->length =
2400                 ((p->header.nextindex - 1) >> L2DTSLOTSIZE) -
2401                 i + 1;
2402             dtlck->index++;
2403         }
2404 
2405         /* free the router entry */
2406         dtDeleteEntry(p, index, &dtlck);
2407 
2408         /* reset key of new leftmost entry of level (for consistency) */
2409         if (index == 0 &&
2410             ((p->header.flag & BT_ROOT) || p->header.prev == 0))
2411             dtTruncateEntry(p, 0, &dtlck);
2412 
2413         /* unpin the parent page */
2414         DT_PUTPAGE(mp);
2415 
2416         /* exit propagation up */
2417         break;
2418     }
2419 
2420     if (!DO_INDEX(ip))
2421         ip->i_size -= PSIZE;
2422 
2423     return 0;
2424 }
2425 
2426 /*
2427  *  dtRelink()
2428  *
2429  * function:
2430  *  link around a freed page.
2431  *
2432  * parameter:
2433  *  fp: page to be freed
2434  *
2435  * return:
2436  */
2437 static int dtRelink(tid_t tid, struct inode *ip, dtpage_t * p)
2438 {
2439     int rc;
2440     struct metapage *mp;
2441     s64 nextbn, prevbn;
2442     struct tlock *tlck;
2443     struct dt_lock *dtlck;
2444     struct lv *lv;
2445 
2446     nextbn = le64_to_cpu(p->header.next);
2447     prevbn = le64_to_cpu(p->header.prev);
2448 
2449     /* update prev pointer of the next page */
2450     if (nextbn != 0) {
2451         DT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
2452         if (rc)
2453             return rc;
2454 
2455         BT_MARK_DIRTY(mp, ip);
2456         /*
2457          * acquire a transaction lock on the next page
2458          *
2459          * action: update prev pointer;
2460          */
2461         tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK);
2462         jfs_info("dtRelink nextbn: tlck = 0x%p, ip = 0x%p, mp=0x%p",
2463             tlck, ip, mp);
2464         dtlck = (struct dt_lock *) & tlck->lock;
2465 
2466         /* linelock header */
2467         if (dtlck->index >= dtlck->maxcnt)
2468             dtlck = (struct dt_lock *) txLinelock(dtlck);
2469         lv = & dtlck->lv[dtlck->index];
2470         lv->offset = 0;
2471         lv->length = 1;
2472         dtlck->index++;
2473 
2474         p->header.prev = cpu_to_le64(prevbn);
2475         DT_PUTPAGE(mp);
2476     }
2477 
2478     /* update next pointer of the previous page */
2479     if (prevbn != 0) {
2480         DT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc);
2481         if (rc)
2482             return rc;
2483 
2484         BT_MARK_DIRTY(mp, ip);
2485         /*
2486          * acquire a transaction lock on the prev page
2487          *
2488          * action: update next pointer;
2489          */
2490         tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK);
2491         jfs_info("dtRelink prevbn: tlck = 0x%p, ip = 0x%p, mp=0x%p",
2492             tlck, ip, mp);
2493         dtlck = (struct dt_lock *) & tlck->lock;
2494 
2495         /* linelock header */
2496         if (dtlck->index >= dtlck->maxcnt)
2497             dtlck = (struct dt_lock *) txLinelock(dtlck);
2498         lv = & dtlck->lv[dtlck->index];
2499         lv->offset = 0;
2500         lv->length = 1;
2501         dtlck->index++;
2502 
2503         p->header.next = cpu_to_le64(nextbn);
2504         DT_PUTPAGE(mp);
2505     }
2506 
2507     return 0;
2508 }
2509 
2510 
2511 /*
2512  *  dtInitRoot()
2513  *
2514  * initialize directory root (inline in inode)
2515  */
2516 void dtInitRoot(tid_t tid, struct inode *ip, u32 idotdot)
2517 {
2518     struct jfs_inode_info *jfs_ip = JFS_IP(ip);
2519     dtroot_t *p;
2520     int fsi;
2521     struct dtslot *f;
2522     struct tlock *tlck;
2523     struct dt_lock *dtlck;
2524     struct lv *lv;
2525     u16 xflag_save;
2526 
2527     /*
2528      * If this was previously an non-empty directory, we need to remove
2529      * the old directory table.
2530      */
2531     if (DO_INDEX(ip)) {
2532         if (!jfs_dirtable_inline(ip)) {
2533             struct tblock *tblk = tid_to_tblock(tid);
2534             /*
2535              * We're playing games with the tid's xflag.  If
2536              * we're removing a regular file, the file's xtree
2537              * is committed with COMMIT_PMAP, but we always
2538              * commit the directories xtree with COMMIT_PWMAP.
2539              */
2540             xflag_save = tblk->xflag;
2541             tblk->xflag = 0;
2542             /*
2543              * xtTruncate isn't guaranteed to fully truncate
2544              * the xtree.  The caller needs to check i_size
2545              * after committing the transaction to see if
2546              * additional truncation is needed.  The
2547              * COMMIT_Stale flag tells caller that we
2548              * initiated the truncation.
2549              */
2550             xtTruncate(tid, ip, 0, COMMIT_PWMAP);
2551             set_cflag(COMMIT_Stale, ip);
2552 
2553             tblk->xflag = xflag_save;
2554         } else
2555             ip->i_size = 1;
2556 
2557         jfs_ip->next_index = 2;
2558     } else
2559         ip->i_size = IDATASIZE;
2560 
2561     /*
2562      * acquire a transaction lock on the root
2563      *
2564      * action: directory initialization;
2565      */
2566     tlck = txLock(tid, ip, (struct metapage *) & jfs_ip->bxflag,
2567               tlckDTREE | tlckENTRY | tlckBTROOT);
2568     dtlck = (struct dt_lock *) & tlck->lock;
2569 
2570     /* linelock root */
2571     ASSERT(dtlck->index == 0);
2572     lv = & dtlck->lv[0];
2573     lv->offset = 0;
2574     lv->length = DTROOTMAXSLOT;
2575     dtlck->index++;
2576 
2577     p = &jfs_ip->i_dtroot;
2578 
2579     p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF;
2580 
2581     p->header.nextindex = 0;
2582 
2583     /* init freelist */
2584     fsi = 1;
2585     f = &p->slot[fsi];
2586 
2587     /* init data area of root */
2588     for (fsi++; fsi < DTROOTMAXSLOT; f++, fsi++)
2589         f->next = fsi;
2590     f->next = -1;
2591 
2592     p->header.freelist = 1;
2593     p->header.freecnt = 8;
2594 
2595     /* init '..' entry */
2596     p->header.idotdot = cpu_to_le32(idotdot);
2597 
2598     return;
2599 }
2600 
2601 /*
2602  *  add_missing_indices()
2603  *
2604  * function: Fix dtree page in which one or more entries has an invalid index.
2605  *       fsck.jfs should really fix this, but it currently does not.
2606  *       Called from jfs_readdir when bad index is detected.
2607  */
2608 static void add_missing_indices(struct inode *inode, s64 bn)
2609 {
2610     struct ldtentry *d;
2611     struct dt_lock *dtlck;
2612     int i;
2613     uint index;
2614     struct lv *lv;
2615     struct metapage *mp;
2616     dtpage_t *p;
2617     int rc;
2618     s8 *stbl;
2619     tid_t tid;
2620     struct tlock *tlck;
2621 
2622     tid = txBegin(inode->i_sb, 0);
2623 
2624     DT_GETPAGE(inode, bn, mp, PSIZE, p, rc);
2625 
2626     if (rc) {
2627         printk(KERN_ERR "DT_GETPAGE failed!\n");
2628         goto end;
2629     }
2630     BT_MARK_DIRTY(mp, inode);
2631 
2632     ASSERT(p->header.flag & BT_LEAF);
2633 
2634     tlck = txLock(tid, inode, mp, tlckDTREE | tlckENTRY);
2635     if (BT_IS_ROOT(mp))
2636         tlck->type |= tlckBTROOT;
2637 
2638     dtlck = (struct dt_lock *) &tlck->lock;
2639 
2640     stbl = DT_GETSTBL(p);
2641     for (i = 0; i < p->header.nextindex; i++) {
2642         d = (struct ldtentry *) &p->slot[stbl[i]];
2643         index = le32_to_cpu(d->index);
2644         if ((index < 2) || (index >= JFS_IP(inode)->next_index)) {
2645             d->index = cpu_to_le32(add_index(tid, inode, bn, i));
2646             if (dtlck->index >= dtlck->maxcnt)
2647                 dtlck = (struct dt_lock *) txLinelock(dtlck);
2648             lv = &dtlck->lv[dtlck->index];
2649             lv->offset = stbl[i];
2650             lv->length = 1;
2651             dtlck->index++;
2652         }
2653     }
2654 
2655     DT_PUTPAGE(mp);
2656     (void) txCommit(tid, 1, &inode, 0);
2657 end:
2658     txEnd(tid);
2659 }
2660 
2661 /*
2662  * Buffer to hold directory entry info while traversing a dtree page
2663  * before being fed to the filldir function
2664  */
2665 struct jfs_dirent {
2666     loff_t position;
2667     int ino;
2668     u16 name_len;
2669     char name[];
2670 };
2671 
2672 /*
2673  * function to determine next variable-sized jfs_dirent in buffer
2674  */
2675 static inline struct jfs_dirent *next_jfs_dirent(struct jfs_dirent *dirent)
2676 {
2677     return (struct jfs_dirent *)
2678         ((char *)dirent +
2679          ((sizeof (struct jfs_dirent) + dirent->name_len + 1 +
2680            sizeof (loff_t) - 1) &
2681           ~(sizeof (loff_t) - 1)));
2682 }
2683 
2684 /*
2685  *  jfs_readdir()
2686  *
2687  * function: read directory entries sequentially
2688  *  from the specified entry offset
2689  *
2690  * parameter:
2691  *
2692  * return: offset = (pn, index) of start entry
2693  *  of next jfs_readdir()/dtRead()
2694  */
2695 int jfs_readdir(struct file *file, struct dir_context *ctx)
2696 {
2697     struct inode *ip = file_inode(file);
2698     struct nls_table *codepage = JFS_SBI(ip->i_sb)->nls_tab;
2699     int rc = 0;
2700     loff_t dtpos;   /* legacy OS/2 style position */
2701     struct dtoffset {
2702         s16 pn;
2703         s16 index;
2704         s32 unused;
2705     } *dtoffset = (struct dtoffset *) &dtpos;
2706     s64 bn;
2707     struct metapage *mp;
2708     dtpage_t *p;
2709     int index;
2710     s8 *stbl;
2711     struct btstack btstack;
2712     int i, next;
2713     struct ldtentry *d;
2714     struct dtslot *t;
2715     int d_namleft, len, outlen;
2716     unsigned long dirent_buf;
2717     char *name_ptr;
2718     u32 dir_index;
2719     int do_index = 0;
2720     uint loop_count = 0;
2721     struct jfs_dirent *jfs_dirent;
2722     int jfs_dirents;
2723     int overflow, fix_page, page_fixed = 0;
2724     static int unique_pos = 2;  /* If we can't fix broken index */
2725 
2726     if (ctx->pos == DIREND)
2727         return 0;
2728 
2729     if (DO_INDEX(ip)) {
2730         /*
2731          * persistent index is stored in directory entries.
2732          * Special cases:    0 = .
2733          *           1 = ..
2734          *          -1 = End of directory
2735          */
2736         do_index = 1;
2737 
2738         dir_index = (u32) ctx->pos;
2739 
2740         /*
2741          * NFSv4 reserves cookies 1 and 2 for . and .. so the value
2742          * we return to the vfs is one greater than the one we use
2743          * internally.
2744          */
2745         if (dir_index)
2746             dir_index--;
2747 
2748         if (dir_index > 1) {
2749             struct dir_table_slot dirtab_slot;
2750 
2751             if (dtEmpty(ip) ||
2752                 (dir_index >= JFS_IP(ip)->next_index)) {
2753                 /* Stale position.  Directory has shrunk */
2754                 ctx->pos = DIREND;
2755                 return 0;
2756             }
2757               repeat:
2758             rc = read_index(ip, dir_index, &dirtab_slot);
2759             if (rc) {
2760                 ctx->pos = DIREND;
2761                 return rc;
2762             }
2763             if (dirtab_slot.flag == DIR_INDEX_FREE) {
2764                 if (loop_count++ > JFS_IP(ip)->next_index) {
2765                     jfs_err("jfs_readdir detected infinite loop!");
2766                     ctx->pos = DIREND;
2767                     return 0;
2768                 }
2769                 dir_index = le32_to_cpu(dirtab_slot.addr2);
2770                 if (dir_index == -1) {
2771                     ctx->pos = DIREND;
2772                     return 0;
2773                 }
2774                 goto repeat;
2775             }
2776             bn = addressDTS(&dirtab_slot);
2777             index = dirtab_slot.slot;
2778             DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2779             if (rc) {
2780                 ctx->pos = DIREND;
2781                 return 0;
2782             }
2783             if (p->header.flag & BT_INTERNAL) {
2784                 jfs_err("jfs_readdir: bad index table");
2785                 DT_PUTPAGE(mp);
2786                 ctx->pos = DIREND;
2787                 return 0;
2788             }
2789         } else {
2790             if (dir_index == 0) {
2791                 /*
2792                  * self "."
2793                  */
2794                 ctx->pos = 1;
2795                 if (!dir_emit(ctx, ".", 1, ip->i_ino, DT_DIR))
2796                     return 0;
2797             }
2798             /*
2799              * parent ".."
2800              */
2801             ctx->pos = 2;
2802             if (!dir_emit(ctx, "..", 2, PARENT(ip), DT_DIR))
2803                 return 0;
2804 
2805             /*
2806              * Find first entry of left-most leaf
2807              */
2808             if (dtEmpty(ip)) {
2809                 ctx->pos = DIREND;
2810                 return 0;
2811             }
2812 
2813             if ((rc = dtReadFirst(ip, &btstack)))
2814                 return rc;
2815 
2816             DT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
2817         }
2818     } else {
2819         /*
2820          * Legacy filesystem - OS/2 & Linux JFS < 0.3.6
2821          *
2822          * pn = 0; index = 1:   First entry "."
2823          * pn = 0; index = 2:   Second entry ".."
2824          * pn > 0:      Real entries, pn=1 -> leftmost page
2825          * pn = index = -1: No more entries
2826          */
2827         dtpos = ctx->pos;
2828         if (dtpos < 2) {
2829             /* build "." entry */
2830             ctx->pos = 1;
2831             if (!dir_emit(ctx, ".", 1, ip->i_ino, DT_DIR))
2832                 return 0;
2833             dtoffset->index = 2;
2834             ctx->pos = dtpos;
2835         }
2836 
2837         if (dtoffset->pn == 0) {
2838             if (dtoffset->index == 2) {
2839                 /* build ".." entry */
2840                 if (!dir_emit(ctx, "..", 2, PARENT(ip), DT_DIR))
2841                     return 0;
2842             } else {
2843                 jfs_err("jfs_readdir called with invalid offset!");
2844             }
2845             dtoffset->pn = 1;
2846             dtoffset->index = 0;
2847             ctx->pos = dtpos;
2848         }
2849 
2850         if (dtEmpty(ip)) {
2851             ctx->pos = DIREND;
2852             return 0;
2853         }
2854 
2855         if ((rc = dtReadNext(ip, &ctx->pos, &btstack))) {
2856             jfs_err("jfs_readdir: unexpected rc = %d from dtReadNext",
2857                 rc);
2858             ctx->pos = DIREND;
2859             return 0;
2860         }
2861         /* get start leaf page and index */
2862         DT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
2863 
2864         /* offset beyond directory eof ? */
2865         if (bn < 0) {
2866             ctx->pos = DIREND;
2867             return 0;
2868         }
2869     }
2870 
2871     dirent_buf = __get_free_page(GFP_KERNEL);
2872     if (dirent_buf == 0) {
2873         DT_PUTPAGE(mp);
2874         jfs_warn("jfs_readdir: __get_free_page failed!");
2875         ctx->pos = DIREND;
2876         return -ENOMEM;
2877     }
2878 
2879     while (1) {
2880         jfs_dirent = (struct jfs_dirent *) dirent_buf;
2881         jfs_dirents = 0;
2882         overflow = fix_page = 0;
2883 
2884         stbl = DT_GETSTBL(p);
2885 
2886         for (i = index; i < p->header.nextindex; i++) {
2887             d = (struct ldtentry *) & p->slot[stbl[i]];
2888 
2889             if (((long) jfs_dirent + d->namlen + 1) >
2890                 (dirent_buf + PAGE_SIZE)) {
2891                 /* DBCS codepages could overrun dirent_buf */
2892                 index = i;
2893                 overflow = 1;
2894                 break;
2895             }
2896 
2897             d_namleft = d->namlen;
2898             name_ptr = jfs_dirent->name;
2899             jfs_dirent->ino = le32_to_cpu(d->inumber);
2900 
2901             if (do_index) {
2902                 len = min(d_namleft, DTLHDRDATALEN);
2903                 jfs_dirent->position = le32_to_cpu(d->index);
2904                 /*
2905                  * d->index should always be valid, but it
2906                  * isn't.  fsck.jfs doesn't create the
2907                  * directory index for the lost+found
2908                  * directory.  Rather than let it go,
2909                  * we can try to fix it.
2910                  */
2911                 if ((jfs_dirent->position < 2) ||
2912                     (jfs_dirent->position >=
2913                      JFS_IP(ip)->next_index)) {
2914                     if (!page_fixed && !isReadOnly(ip)) {
2915                         fix_page = 1;
2916                         /*
2917                          * setting overflow and setting
2918                          * index to i will cause the
2919                          * same page to be processed
2920                          * again starting here
2921                          */
2922                         overflow = 1;
2923                         index = i;
2924                         break;
2925                     }
2926                     jfs_dirent->position = unique_pos++;
2927                 }
2928                 /*
2929                  * We add 1 to the index because we may
2930                  * use a value of 2 internally, and NFSv4
2931                  * doesn't like that.
2932                  */
2933                 jfs_dirent->position++;
2934             } else {
2935                 jfs_dirent->position = dtpos;
2936                 len = min(d_namleft, DTLHDRDATALEN_LEGACY);
2937             }
2938 
2939             /* copy the name of head/only segment */
2940             outlen = jfs_strfromUCS_le(name_ptr, d->name, len,
2941                            codepage);
2942             jfs_dirent->name_len = outlen;
2943 
2944             /* copy name in the additional segment(s) */
2945             next = d->next;
2946             while (next >= 0) {
2947                 t = (struct dtslot *) & p->slot[next];
2948                 name_ptr += outlen;
2949                 d_namleft -= len;
2950                 /* Sanity Check */
2951                 if (d_namleft == 0) {
2952                     jfs_error(ip->i_sb,
2953                           "JFS:Dtree error: ino = %ld, bn=%lld, index = %d\n",
2954                           (long)ip->i_ino,
2955                           (long long)bn,
2956                           i);
2957                     goto skip_one;
2958                 }
2959                 len = min(d_namleft, DTSLOTDATALEN);
2960                 outlen = jfs_strfromUCS_le(name_ptr, t->name,
2961                                len, codepage);
2962                 jfs_dirent->name_len += outlen;
2963 
2964                 next = t->next;
2965             }
2966 
2967             jfs_dirents++;
2968             jfs_dirent = next_jfs_dirent(jfs_dirent);
2969 skip_one:
2970             if (!do_index)
2971                 dtoffset->index++;
2972         }
2973 
2974         if (!overflow) {
2975             /* Point to next leaf page */
2976             if (p->header.flag & BT_ROOT)
2977                 bn = 0;
2978             else {
2979                 bn = le64_to_cpu(p->header.next);
2980                 index = 0;
2981                 /* update offset (pn:index) for new page */
2982                 if (!do_index) {
2983                     dtoffset->pn++;
2984                     dtoffset->index = 0;
2985                 }
2986             }
2987             page_fixed = 0;
2988         }
2989 
2990         /* unpin previous leaf page */
2991         DT_PUTPAGE(mp);
2992 
2993         jfs_dirent = (struct jfs_dirent *) dirent_buf;
2994         while (jfs_dirents--) {
2995             ctx->pos = jfs_dirent->position;
2996             if (!dir_emit(ctx, jfs_dirent->name,
2997                     jfs_dirent->name_len,
2998                     jfs_dirent->ino, DT_UNKNOWN))
2999                 goto out;
3000             jfs_dirent = next_jfs_dirent(jfs_dirent);
3001         }
3002 
3003         if (fix_page) {
3004             add_missing_indices(ip, bn);
3005             page_fixed = 1;
3006         }
3007 
3008         if (!overflow && (bn == 0)) {
3009             ctx->pos = DIREND;
3010             break;
3011         }
3012 
3013         DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3014         if (rc) {
3015             free_page(dirent_buf);
3016             return rc;
3017         }
3018     }
3019 
3020       out:
3021     free_page(dirent_buf);
3022 
3023     return rc;
3024 }
3025 
3026 
3027 /*
3028  *  dtReadFirst()
3029  *
3030  * function: get the leftmost page of the directory
3031  */
3032 static int dtReadFirst(struct inode *ip, struct btstack * btstack)
3033 {
3034     int rc = 0;
3035     s64 bn;
3036     int psize = 288;    /* initial in-line directory */
3037     struct metapage *mp;
3038     dtpage_t *p;
3039     s8 *stbl;
3040     struct btframe *btsp;
3041     pxd_t *xd;
3042 
3043     BT_CLR(btstack);    /* reset stack */
3044 
3045     /*
3046      *  descend leftmost path of the tree
3047      *
3048      * by convention, root bn = 0.
3049      */
3050     for (bn = 0;;) {
3051         DT_GETPAGE(ip, bn, mp, psize, p, rc);
3052         if (rc)
3053             return rc;
3054 
3055         /*
3056          * leftmost leaf page
3057          */
3058         if (p->header.flag & BT_LEAF) {
3059             /* return leftmost entry */
3060             btsp = btstack->top;
3061             btsp->bn = bn;
3062             btsp->index = 0;
3063             btsp->mp = mp;
3064 
3065             return 0;
3066         }
3067 
3068         /*
3069          * descend down to leftmost child page
3070          */
3071         if (BT_STACK_FULL(btstack)) {
3072             DT_PUTPAGE(mp);
3073             jfs_error(ip->i_sb, "btstack overrun\n");
3074             BT_STACK_DUMP(btstack);
3075             return -EIO;
3076         }
3077         /* push (bn, index) of the parent page/entry */
3078         BT_PUSH(btstack, bn, 0);
3079 
3080         /* get the leftmost entry */
3081         stbl = DT_GETSTBL(p);
3082         xd = (pxd_t *) & p->slot[stbl[0]];
3083 
3084         /* get the child page block address */
3085         bn = addressPXD(xd);
3086         psize = lengthPXD(xd) << JFS_SBI(ip->i_sb)->l2bsize;
3087 
3088         /* unpin the parent page */
3089         DT_PUTPAGE(mp);
3090     }
3091 }
3092 
3093 
3094 /*
3095  *  dtReadNext()
3096  *
3097  * function: get the page of the specified offset (pn:index)
3098  *
3099  * return: if (offset > eof), bn = -1;
3100  *
3101  * note: if index > nextindex of the target leaf page,
3102  * start with 1st entry of next leaf page;
3103  */
3104 static int dtReadNext(struct inode *ip, loff_t * offset,
3105               struct btstack * btstack)
3106 {
3107     int rc = 0;
3108     struct dtoffset {
3109         s16 pn;
3110         s16 index;
3111         s32 unused;
3112     } *dtoffset = (struct dtoffset *) offset;
3113     s64 bn;
3114     struct metapage *mp;
3115     dtpage_t *p;
3116     int index;
3117     int pn;
3118     s8 *stbl;
3119     struct btframe *btsp, *parent;
3120     pxd_t *xd;
3121 
3122     /*
3123      * get leftmost leaf page pinned
3124      */
3125     if ((rc = dtReadFirst(ip, btstack)))
3126         return rc;
3127 
3128     /* get leaf page */
3129     DT_GETSEARCH(ip, btstack->top, bn, mp, p, index);
3130 
3131     /* get the start offset (pn:index) */
3132     pn = dtoffset->pn - 1;  /* Now pn = 0 represents leftmost leaf */
3133     index = dtoffset->index;
3134 
3135     /* start at leftmost page ? */
3136     if (pn == 0) {
3137         /* offset beyond eof ? */
3138         if (index < p->header.nextindex)
3139             goto out;
3140 
3141         if (p->header.flag & BT_ROOT) {
3142             bn = -1;
3143             goto out;
3144         }
3145 
3146         /* start with 1st entry of next leaf page */
3147         dtoffset->pn++;
3148         dtoffset->index = index = 0;
3149         goto a;
3150     }
3151 
3152     /* start at non-leftmost page: scan parent pages for large pn */
3153     if (p->header.flag & BT_ROOT) {
3154         bn = -1;
3155         goto out;
3156     }
3157 
3158     /* start after next leaf page ? */
3159     if (pn > 1)
3160         goto b;
3161 
3162     /* get leaf page pn = 1 */
3163       a:
3164     bn = le64_to_cpu(p->header.next);
3165 
3166     /* unpin leaf page */
3167     DT_PUTPAGE(mp);
3168 
3169     /* offset beyond eof ? */
3170     if (bn == 0) {
3171         bn = -1;
3172         goto out;
3173     }
3174 
3175     goto c;
3176 
3177     /*
3178      * scan last internal page level to get target leaf page
3179      */
3180       b:
3181     /* unpin leftmost leaf page */
3182     DT_PUTPAGE(mp);
3183 
3184     /* get left most parent page */
3185     btsp = btstack->top;
3186     parent = btsp - 1;
3187     bn = parent->bn;
3188     DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3189     if (rc)
3190         return rc;
3191 
3192     /* scan parent pages at last internal page level */
3193     while (pn >= p->header.nextindex) {
3194         pn -= p->header.nextindex;
3195 
3196         /* get next parent page address */
3197         bn = le64_to_cpu(p->header.next);
3198 
3199         /* unpin current parent page */
3200         DT_PUTPAGE(mp);
3201 
3202         /* offset beyond eof ? */
3203         if (bn == 0) {
3204             bn = -1;
3205             goto out;
3206         }
3207 
3208         /* get next parent page */
3209         DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3210         if (rc)
3211             return rc;
3212 
3213         /* update parent page stack frame */
3214         parent->bn = bn;
3215     }
3216 
3217     /* get leaf page address */
3218     stbl = DT_GETSTBL(p);
3219     xd = (pxd_t *) & p->slot[stbl[pn]];
3220     bn = addressPXD(xd);
3221 
3222     /* unpin parent page */
3223     DT_PUTPAGE(mp);
3224 
3225     /*
3226      * get target leaf page
3227      */
3228       c:
3229     DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3230     if (rc)
3231         return rc;
3232 
3233     /*
3234      * leaf page has been completed:
3235      * start with 1st entry of next leaf page
3236      */
3237     if (index >= p->header.nextindex) {
3238         bn = le64_to_cpu(p->header.next);
3239 
3240         /* unpin leaf page */
3241         DT_PUTPAGE(mp);
3242 
3243         /* offset beyond eof ? */
3244         if (bn == 0) {
3245             bn = -1;
3246             goto out;
3247         }
3248 
3249         /* get next leaf page */
3250         DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3251         if (rc)
3252             return rc;
3253 
3254         /* start with 1st entry of next leaf page */
3255         dtoffset->pn++;
3256         dtoffset->index = 0;
3257     }
3258 
3259       out:
3260     /* return target leaf page pinned */
3261     btsp = btstack->top;
3262     btsp->bn = bn;
3263     btsp->index = dtoffset->index;
3264     btsp->mp = mp;
3265 
3266     return 0;
3267 }
3268 
3269 
3270 /*
3271  *  dtCompare()
3272  *
3273  * function: compare search key with an internal entry
3274  *
3275  * return:
3276  *  < 0 if k is < record
3277  *  = 0 if k is = record
3278  *  > 0 if k is > record
3279  */
3280 static int dtCompare(struct component_name * key,   /* search key */
3281              dtpage_t * p,  /* directory page */
3282              int si)
3283 {               /* entry slot index */
3284     wchar_t *kname;
3285     __le16 *name;
3286     int klen, namlen, len, rc;
3287     struct idtentry *ih;
3288     struct dtslot *t;
3289 
3290     /*
3291      * force the left-most key on internal pages, at any level of
3292      * the tree, to be less than any search key.
3293      * this obviates having to update the leftmost key on an internal
3294      * page when the user inserts a new key in the tree smaller than
3295      * anything that has been stored.
3296      *
3297      * (? if/when dtSearch() narrows down to 1st entry (index = 0),
3298      * at any internal page at any level of the tree,
3299      * it descends to child of the entry anyway -
3300      * ? make the entry as min size dummy entry)
3301      *
3302      * if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & BT_LEAF))
3303      * return (1);
3304      */
3305 
3306     kname = key->name;
3307     klen = key->namlen;
3308 
3309     ih = (struct idtentry *) & p->slot[si];
3310     si = ih->next;
3311     name = ih->name;
3312     namlen = ih->namlen;
3313     len = min(namlen, DTIHDRDATALEN);
3314 
3315     /* compare with head/only segment */
3316     len = min(klen, len);
3317     if ((rc = UniStrncmp_le(kname, name, len)))
3318         return rc;
3319 
3320     klen -= len;
3321     namlen -= len;
3322 
3323     /* compare with additional segment(s) */
3324     kname += len;
3325     while (klen > 0 && namlen > 0) {
3326         /* compare with next name segment */
3327         t = (struct dtslot *) & p->slot[si];
3328         len = min(namlen, DTSLOTDATALEN);
3329         len = min(klen, len);
3330         name = t->name;
3331         if ((rc = UniStrncmp_le(kname, name, len)))
3332             return rc;
3333 
3334         klen -= len;
3335         namlen -= len;
3336         kname += len;
3337         si = t->next;
3338     }
3339 
3340     return (klen - namlen);
3341 }
3342 
3343 
3344 
3345 
3346 /*
3347  *  ciCompare()
3348  *
3349  * function: compare search key with an (leaf/internal) entry
3350  *
3351  * return:
3352  *  < 0 if k is < record
3353  *  = 0 if k is = record
3354  *  > 0 if k is > record
3355  */
3356 static int ciCompare(struct component_name * key,   /* search key */
3357              dtpage_t * p,  /* directory page */
3358              int si,    /* entry slot index */
3359              int flag)
3360 {
3361     wchar_t *kname, x;
3362     __le16 *name;
3363     int klen, namlen, len, rc;
3364     struct ldtentry *lh;
3365     struct idtentry *ih;
3366     struct dtslot *t;
3367     int i;
3368 
3369     /*
3370      * force the left-most key on internal pages, at any level of
3371      * the tree, to be less than any search key.
3372      * this obviates having to update the leftmost key on an internal
3373      * page when the user inserts a new key in the tree smaller than
3374      * anything that has been stored.
3375      *
3376      * (? if/when dtSearch() narrows down to 1st entry (index = 0),
3377      * at any internal page at any level of the tree,
3378      * it descends to child of the entry anyway -
3379      * ? make the entry as min size dummy entry)
3380      *
3381      * if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & BT_LEAF))
3382      * return (1);
3383      */
3384 
3385     kname = key->name;
3386     klen = key->namlen;
3387 
3388     /*
3389      * leaf page entry
3390      */
3391     if (p->header.flag & BT_LEAF) {
3392         lh = (struct ldtentry *) & p->slot[si];
3393         si = lh->next;
3394         name = lh->name;
3395         namlen = lh->namlen;
3396         if (flag & JFS_DIR_INDEX)
3397             len = min(namlen, DTLHDRDATALEN);
3398         else
3399             len = min(namlen, DTLHDRDATALEN_LEGACY);
3400     }
3401     /*
3402      * internal page entry
3403      */
3404     else {
3405         ih = (struct idtentry *) & p->slot[si];
3406         si = ih->next;
3407         name = ih->name;
3408         namlen = ih->namlen;
3409         len = min(namlen, DTIHDRDATALEN);
3410     }
3411 
3412     /* compare with head/only segment */
3413     len = min(klen, len);
3414     for (i = 0; i < len; i++, kname++, name++) {
3415         /* only uppercase if case-insensitive support is on */
3416         if ((flag & JFS_OS2) == JFS_OS2)
3417             x = UniToupper(le16_to_cpu(*name));
3418         else
3419             x = le16_to_cpu(*name);
3420         if ((rc = *kname - x))
3421             return rc;
3422     }
3423 
3424     klen -= len;
3425     namlen -= len;
3426 
3427     /* compare with additional segment(s) */
3428     while (klen > 0 && namlen > 0) {
3429         /* compare with next name segment */
3430         t = (struct dtslot *) & p->slot[si];
3431         len = min(namlen, DTSLOTDATALEN);
3432         len = min(klen, len);
3433         name = t->name;
3434         for (i = 0; i < len; i++, kname++, name++) {
3435             /* only uppercase if case-insensitive support is on */
3436             if ((flag & JFS_OS2) == JFS_OS2)
3437                 x = UniToupper(le16_to_cpu(*name));
3438             else
3439                 x = le16_to_cpu(*name);
3440 
3441             if ((rc = *kname - x))
3442                 return rc;
3443         }
3444 
3445         klen -= len;
3446         namlen -= len;
3447         si = t->next;
3448     }
3449 
3450     return (klen - namlen);
3451 }
3452 
3453 
3454 /*
3455  *  ciGetLeafPrefixKey()
3456  *
3457  * function: compute prefix of suffix compression
3458  *       from two adjacent leaf entries
3459  *       across page boundary
3460  *
3461  * return: non-zero on error
3462  *
3463  */
3464 static int ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp,
3465                    int ri, struct component_name * key, int flag)
3466 {
3467     int klen, namlen;
3468     wchar_t *pl, *pr, *kname;
3469     struct component_name lkey;
3470     struct component_name rkey;
3471 
3472     lkey.name = kmalloc_array(JFS_NAME_MAX + 1, sizeof(wchar_t),
3473                     GFP_KERNEL);
3474     if (lkey.name == NULL)
3475         return -ENOMEM;
3476 
3477     rkey.name = kmalloc_array(JFS_NAME_MAX + 1, sizeof(wchar_t),
3478                     GFP_KERNEL);
3479     if (rkey.name == NULL) {
3480         kfree(lkey.name);
3481         return -ENOMEM;
3482     }
3483 
3484     /* get left and right key */
3485     dtGetKey(lp, li, &lkey, flag);
3486     lkey.name[lkey.namlen] = 0;
3487 
3488     if ((flag & JFS_OS2) == JFS_OS2)
3489         ciToUpper(&lkey);
3490 
3491     dtGetKey(rp, ri, &rkey, flag);
3492     rkey.name[rkey.namlen] = 0;
3493 
3494 
3495     if ((flag & JFS_OS2) == JFS_OS2)
3496         ciToUpper(&rkey);
3497 
3498     /* compute prefix */
3499     klen = 0;
3500     kname = key->name;
3501     namlen = min(lkey.namlen, rkey.namlen);
3502     for (pl = lkey.name, pr = rkey.name;
3503          namlen; pl++, pr++, namlen--, klen++, kname++) {
3504         *kname = *pr;
3505         if (*pl != *pr) {
3506             key->namlen = klen + 1;
3507             goto free_names;
3508         }
3509     }
3510 
3511     /* l->namlen <= r->namlen since l <= r */
3512     if (lkey.namlen < rkey.namlen) {
3513         *kname = *pr;
3514         key->namlen = klen + 1;
3515     } else          /* l->namelen == r->namelen */
3516         key->namlen = klen;
3517 
3518 free_names:
3519     kfree(lkey.name);
3520     kfree(rkey.name);
3521     return 0;
3522 }
3523 
3524 
3525 
3526 /*
3527  *  dtGetKey()
3528  *
3529  * function: get key of the entry
3530  */
3531 static void dtGetKey(dtpage_t * p, int i,   /* entry index */
3532              struct component_name * key, int flag)
3533 {
3534     int si;
3535     s8 *stbl;
3536     struct ldtentry *lh;
3537     struct idtentry *ih;
3538     struct dtslot *t;
3539     int namlen, len;
3540     wchar_t *kname;
3541     __le16 *name;
3542 
3543     /* get entry */
3544     stbl = DT_GETSTBL(p);
3545     si = stbl[i];
3546     if (p->header.flag & BT_LEAF) {
3547         lh = (struct ldtentry *) & p->slot[si];
3548         si = lh->next;
3549         namlen = lh->namlen;
3550         name = lh->name;
3551         if (flag & JFS_DIR_INDEX)
3552             len = min(namlen, DTLHDRDATALEN);
3553         else
3554             len = min(namlen, DTLHDRDATALEN_LEGACY);
3555     } else {
3556         ih = (struct idtentry *) & p->slot[si];
3557         si = ih->next;
3558         namlen = ih->namlen;
3559         name = ih->name;
3560         len = min(namlen, DTIHDRDATALEN);
3561     }
3562 
3563     key->namlen = namlen;
3564     kname = key->name;
3565 
3566     /*
3567      * move head/only segment
3568      */
3569     UniStrncpy_from_le(kname, name, len);
3570 
3571     /*
3572      * move additional segment(s)
3573      */
3574     while (si >= 0) {
3575         /* get next segment */
3576         t = &p->slot[si];
3577         kname += len;
3578         namlen -= len;
3579         len = min(namlen, DTSLOTDATALEN);
3580         UniStrncpy_from_le(kname, t->name, len);
3581 
3582         si = t->next;
3583     }
3584 }
3585 
3586 
3587 /*
3588  *  dtInsertEntry()
3589  *
3590  * function: allocate free slot(s) and
3591  *       write a leaf/internal entry
3592  *
3593  * return: entry slot index
3594  */
3595 static void dtInsertEntry(dtpage_t * p, int index, struct component_name * key,
3596               ddata_t * data, struct dt_lock ** dtlock)
3597 {
3598     struct dtslot *h, *t;
3599     struct ldtentry *lh = NULL;
3600     struct idtentry *ih = NULL;
3601     int hsi, fsi, klen, len, nextindex;
3602     wchar_t *kname;
3603     __le16 *name;
3604     s8 *stbl;
3605     pxd_t *xd;
3606     struct dt_lock *dtlck = *dtlock;
3607     struct lv *lv;
3608     int xsi, n;
3609     s64 bn = 0;
3610     struct metapage *mp = NULL;
3611 
3612     klen = key->namlen;
3613     kname = key->name;
3614 
3615     /* allocate a free slot */
3616     hsi = fsi = p->header.freelist;
3617     h = &p->slot[fsi];
3618     p->header.freelist = h->next;
3619     --p->header.freecnt;
3620 
3621     /* open new linelock */
3622     if (dtlck->index >= dtlck->maxcnt)
3623         dtlck = (struct dt_lock *) txLinelock(dtlck);
3624 
3625     lv = & dtlck->lv[dtlck->index];
3626     lv->offset = hsi;
3627 
3628     /* write head/only segment */
3629     if (p->header.flag & BT_LEAF) {
3630         lh = (struct ldtentry *) h;
3631         lh->next = h->next;
3632         lh->inumber = cpu_to_le32(data->leaf.ino);
3633         lh->namlen = klen;
3634         name = lh->name;
3635         if (data->leaf.ip) {
3636             len = min(klen, DTLHDRDATALEN);
3637             if (!(p->header.flag & BT_ROOT))
3638                 bn = addressPXD(&p->header.self);
3639             lh->index = cpu_to_le32(add_index(data->leaf.tid,
3640                               data->leaf.ip,
3641                               bn, index));
3642         } else
3643             len = min(klen, DTLHDRDATALEN_LEGACY);
3644     } else {
3645         ih = (struct idtentry *) h;
3646         ih->next = h->next;
3647         xd = (pxd_t *) ih;
3648         *xd = data->xd;
3649         ih->namlen = klen;
3650         name = ih->name;
3651         len = min(klen, DTIHDRDATALEN);
3652     }
3653 
3654     UniStrncpy_to_le(name, kname, len);
3655 
3656     n = 1;
3657     xsi = hsi;
3658 
3659     /* write additional segment(s) */
3660     t = h;
3661     klen -= len;
3662     while (klen) {
3663         /* get free slot */
3664         fsi = p->header.freelist;
3665         t = &p->slot[fsi];
3666         p->header.freelist = t->next;
3667         --p->header.freecnt;
3668 
3669         /* is next slot contiguous ? */
3670         if (fsi != xsi + 1) {
3671             /* close current linelock */
3672             lv->length = n;
3673             dtlck->index++;
3674 
3675             /* open new linelock */
3676             if (dtlck->index < dtlck->maxcnt)
3677                 lv++;
3678             else {
3679                 dtlck = (struct dt_lock *) txLinelock(dtlck);
3680                 lv = & dtlck->lv[0];
3681             }
3682 
3683             lv->offset = fsi;
3684             n = 0;
3685         }
3686 
3687         kname += len;
3688         len = min(klen, DTSLOTDATALEN);
3689         UniStrncpy_to_le(t->name, kname, len);
3690 
3691         n++;
3692         xsi = fsi;
3693         klen -= len;
3694     }
3695 
3696     /* close current linelock */
3697     lv->length = n;
3698     dtlck->index++;
3699 
3700     *dtlock = dtlck;
3701 
3702     /* terminate last/only segment */
3703     if (h == t) {
3704         /* single segment entry */
3705         if (p->header.flag & BT_LEAF)
3706             lh->next = -1;
3707         else
3708             ih->next = -1;
3709     } else
3710         /* multi-segment entry */
3711         t->next = -1;
3712 
3713     /* if insert into middle, shift right succeeding entries in stbl */
3714     stbl = DT_GETSTBL(p);
3715     nextindex = p->header.nextindex;
3716     if (index < nextindex) {
3717         memmove(stbl + index + 1, stbl + index, nextindex - index);
3718 
3719         if ((p->header.flag & BT_LEAF) && data->leaf.ip) {
3720             s64 lblock;
3721 
3722             /*
3723              * Need to update slot number for entries that moved
3724              * in the stbl
3725              */
3726             mp = NULL;
3727             for (n = index + 1; n <= nextindex; n++) {
3728                 lh = (struct ldtentry *) & (p->slot[stbl[n]]);
3729                 modify_index(data->leaf.tid, data->leaf.ip,
3730                          le32_to_cpu(lh->index), bn, n,
3731                          &mp, &lblock);
3732             }
3733             if (mp)
3734                 release_metapage(mp);
3735         }
3736     }
3737 
3738     stbl[index] = hsi;
3739 
3740     /* advance next available entry index of stbl */
3741     ++p->header.nextindex;
3742 }
3743 
3744 
3745 /*
3746  *  dtMoveEntry()
3747  *
3748  * function: move entries from split/left page to new/right page
3749  *
3750  *  nextindex of dst page and freelist/freecnt of both pages
3751  *  are updated.
3752  */
3753 static void dtMoveEntry(dtpage_t * sp, int si, dtpage_t * dp,
3754             struct dt_lock ** sdtlock, struct dt_lock ** ddtlock,
3755             int do_index)
3756 {
3757     int ssi, next;      /* src slot index */
3758     int di;         /* dst entry index */
3759     int dsi;        /* dst slot index */
3760     s8 *sstbl, *dstbl;  /* sorted entry table */
3761     int snamlen, len;
3762     struct ldtentry *slh, *dlh = NULL;
3763     struct idtentry *sih, *dih = NULL;
3764     struct dtslot *h, *s, *d;
3765     struct dt_lock *sdtlck = *sdtlock, *ddtlck = *ddtlock;
3766     struct lv *slv, *dlv;
3767     int xssi, ns, nd;
3768     int sfsi;
3769 
3770     sstbl = (s8 *) & sp->slot[sp->header.stblindex];
3771     dstbl = (s8 *) & dp->slot[dp->header.stblindex];
3772 
3773     dsi = dp->header.freelist;  /* first (whole page) free slot */
3774     sfsi = sp->header.freelist;
3775 
3776     /* linelock destination entry slot */
3777     dlv = & ddtlck->lv[ddtlck->index];
3778     dlv->offset = dsi;
3779 
3780     /* linelock source entry slot */
3781     slv = & sdtlck->lv[sdtlck->index];
3782     slv->offset = sstbl[si];
3783     xssi = slv->offset - 1;
3784 
3785     /*
3786      * move entries
3787      */
3788     ns = nd = 0;
3789     for (di = 0; si < sp->header.nextindex; si++, di++) {
3790         ssi = sstbl[si];
3791         dstbl[di] = dsi;
3792 
3793         /* is next slot contiguous ? */
3794         if (ssi != xssi + 1) {
3795             /* close current linelock */
3796             slv->length = ns;
3797             sdtlck->index++;
3798 
3799             /* open new linelock */
3800             if (sdtlck->index < sdtlck->maxcnt)
3801                 slv++;
3802             else {
3803                 sdtlck = (struct dt_lock *) txLinelock(sdtlck);
3804                 slv = & sdtlck->lv[0];
3805             }
3806 
3807             slv->offset = ssi;
3808             ns = 0;
3809         }
3810 
3811         /*
3812          * move head/only segment of an entry
3813          */
3814         /* get dst slot */
3815         h = d = &dp->slot[dsi];
3816 
3817         /* get src slot and move */
3818         s = &sp->slot[ssi];
3819         if (sp->header.flag & BT_LEAF) {
3820             /* get source entry */
3821             slh = (struct ldtentry *) s;
3822             dlh = (struct ldtentry *) h;
3823             snamlen = slh->namlen;
3824 
3825             if (do_index) {
3826                 len = min(snamlen, DTLHDRDATALEN);
3827                 dlh->index = slh->index; /* little-endian */
3828             } else
3829                 len = min(snamlen, DTLHDRDATALEN_LEGACY);
3830 
3831             memcpy(dlh, slh, 6 + len * 2);
3832 
3833             next = slh->next;
3834 
3835             /* update dst head/only segment next field */
3836             dsi++;
3837             dlh->next = dsi;
3838         } else {
3839             sih = (struct idtentry *) s;
3840             snamlen = sih->namlen;
3841 
3842             len = min(snamlen, DTIHDRDATALEN);
3843             dih = (struct idtentry *) h;
3844             memcpy(dih, sih, 10 + len * 2);
3845             next = sih->next;
3846 
3847             dsi++;
3848             dih->next = dsi;
3849         }
3850 
3851         /* free src head/only segment */
3852         s->next = sfsi;
3853         s->cnt = 1;
3854         sfsi = ssi;
3855 
3856         ns++;
3857         nd++;
3858         xssi = ssi;
3859 
3860         /*
3861          * move additional segment(s) of the entry
3862          */
3863         snamlen -= len;
3864         while ((ssi = next) >= 0) {
3865             /* is next slot contiguous ? */
3866             if (ssi != xssi + 1) {
3867                 /* close current linelock */
3868                 slv->length = ns;
3869                 sdtlck->index++;
3870 
3871                 /* open new linelock */
3872                 if (sdtlck->index < sdtlck->maxcnt)
3873                     slv++;
3874                 else {
3875                     sdtlck =
3876                         (struct dt_lock *)
3877                         txLinelock(sdtlck);
3878                     slv = & sdtlck->lv[0];
3879                 }
3880 
3881                 slv->offset = ssi;
3882                 ns = 0;
3883             }
3884 
3885             /* get next source segment */
3886             s = &sp->slot[ssi];
3887 
3888             /* get next destination free slot */
3889             d++;
3890 
3891             len = min(snamlen, DTSLOTDATALEN);
3892             UniStrncpy_le(d->name, s->name, len);
3893 
3894             ns++;
3895             nd++;
3896             xssi = ssi;
3897 
3898             dsi++;
3899             d->next = dsi;
3900 
3901             /* free source segment */
3902             next = s->next;
3903             s->next = sfsi;
3904             s->cnt = 1;
3905             sfsi = ssi;
3906 
3907             snamlen -= len;
3908         }       /* end while */
3909 
3910         /* terminate dst last/only segment */
3911         if (h == d) {
3912             /* single segment entry */
3913             if (dp->header.flag & BT_LEAF)
3914                 dlh->next = -1;
3915             else
3916                 dih->next = -1;
3917         } else
3918             /* multi-segment entry */
3919             d->next = -1;
3920     }           /* end for */
3921 
3922     /* close current linelock */
3923     slv->length = ns;
3924     sdtlck->index++;
3925     *sdtlock = sdtlck;
3926 
3927     dlv->length = nd;
3928     ddtlck->index++;
3929     *ddtlock = ddtlck;
3930 
3931     /* update source header */
3932     sp->header.freelist = sfsi;
3933     sp->header.freecnt += nd;
3934 
3935     /* update destination header */
3936     dp->header.nextindex = di;
3937 
3938     dp->header.freelist = dsi;
3939     dp->header.freecnt -= nd;
3940 }
3941 
3942 
3943 /*
3944  *  dtDeleteEntry()
3945  *
3946  * function: free a (leaf/internal) entry
3947  *
3948  * log freelist header, stbl, and each segment slot of entry
3949  * (even though last/only segment next field is modified,
3950  * physical image logging requires all segment slots of
3951  * the entry logged to avoid applying previous updates
3952  * to the same slots)
3953  */
3954 static void dtDeleteEntry(dtpage_t * p, int fi, struct dt_lock ** dtlock)
3955 {
3956     int fsi;        /* free entry slot index */
3957     s8 *stbl;
3958     struct dtslot *t;
3959     int si, freecnt;
3960     struct dt_lock *dtlck = *dtlock;
3961     struct lv *lv;
3962     int xsi, n;
3963 
3964     /* get free entry slot index */
3965     stbl = DT_GETSTBL(p);
3966     fsi = stbl[fi];
3967 
3968     /* open new linelock */
3969     if (dtlck->index >= dtlck->maxcnt)
3970         dtlck = (struct dt_lock *) txLinelock(dtlck);
3971     lv = & dtlck->lv[dtlck->index];
3972 
3973     lv->offset = fsi;
3974 
3975     /* get the head/only segment */
3976     t = &p->slot[fsi];
3977     if (p->header.flag & BT_LEAF)
3978         si = ((struct ldtentry *) t)->next;
3979     else
3980         si = ((struct idtentry *) t)->next;
3981     t->next = si;
3982     t->cnt = 1;
3983 
3984     n = freecnt = 1;
3985     xsi = fsi;
3986 
3987     /* find the last/only segment */
3988     while (si >= 0) {
3989         /* is next slot contiguous ? */
3990         if (si != xsi + 1) {
3991             /* close current linelock */
3992             lv->length = n;
3993             dtlck->index++;
3994 
3995             /* open new linelock */
3996             if (dtlck->index < dtlck->maxcnt)
3997                 lv++;
3998             else {
3999                 dtlck = (struct dt_lock *) txLinelock(dtlck);
4000                 lv = & dtlck->lv[0];
4001             }
4002 
4003             lv->offset = si;
4004             n = 0;
4005         }
4006 
4007         n++;
4008         xsi = si;
4009         freecnt++;
4010 
4011         t = &p->slot[si];
4012         t->cnt = 1;
4013         si = t->next;
4014     }
4015 
4016     /* close current linelock */
4017     lv->length = n;
4018     dtlck->index++;
4019 
4020     *dtlock = dtlck;
4021 
4022     /* update freelist */
4023     t->next = p->header.freelist;
4024     p->header.freelist = fsi;
4025     p->header.freecnt += freecnt;
4026 
4027     /* if delete from middle,
4028      * shift left the succedding entries in the stbl
4029      */
4030     si = p->header.nextindex;
4031     if (fi < si - 1)
4032         memmove(&stbl[fi], &stbl[fi + 1], si - fi - 1);
4033 
4034     p->header.nextindex--;
4035 }
4036 
4037 
4038 /*
4039  *  dtTruncateEntry()
4040  *
4041  * function: truncate a (leaf/internal) entry
4042  *
4043  * log freelist header, stbl, and each segment slot of entry
4044  * (even though last/only segment next field is modified,
4045  * physical image logging requires all segment slots of
4046  * the entry logged to avoid applying previous updates
4047  * to the same slots)
4048  */
4049 static void dtTruncateEntry(dtpage_t * p, int ti, struct dt_lock ** dtlock)
4050 {
4051     int tsi;        /* truncate entry slot index */
4052     s8 *stbl;
4053     struct dtslot *t;
4054     int si, freecnt;
4055     struct dt_lock *dtlck = *dtlock;
4056     struct lv *lv;
4057     int fsi, xsi, n;
4058 
4059     /* get free entry slot index */
4060     stbl = DT_GETSTBL(p);
4061     tsi = stbl[ti];
4062 
4063     /* open new linelock */
4064     if (dtlck->index >= dtlck->maxcnt)
4065         dtlck = (struct dt_lock *) txLinelock(dtlck);
4066     lv = & dtlck->lv[dtlck->index];
4067 
4068     lv->offset = tsi;
4069 
4070     /* get the head/only segment */
4071     t = &p->slot[tsi];
4072     ASSERT(p->header.flag & BT_INTERNAL);
4073     ((struct idtentry *) t)->namlen = 0;
4074     si = ((struct idtentry *) t)->next;
4075     ((struct idtentry *) t)->next = -1;
4076 
4077     n = 1;
4078     freecnt = 0;
4079     fsi = si;
4080     xsi = tsi;
4081 
4082     /* find the last/only segment */
4083     while (si >= 0) {
4084         /* is next slot contiguous ? */
4085         if (si != xsi + 1) {
4086             /* close current linelock */
4087             lv->length = n;
4088             dtlck->index++;
4089 
4090             /* open new linelock */
4091             if (dtlck->index < dtlck->maxcnt)
4092                 lv++;
4093             else {
4094                 dtlck = (struct dt_lock *) txLinelock(dtlck);
4095                 lv = & dtlck->lv[0];
4096             }
4097 
4098             lv->offset = si;
4099             n = 0;
4100         }
4101 
4102         n++;
4103         xsi = si;
4104         freecnt++;
4105 
4106         t = &p->slot[si];
4107         t->cnt = 1;
4108         si = t->next;
4109     }
4110 
4111     /* close current linelock */
4112     lv->length = n;
4113     dtlck->index++;
4114 
4115     *dtlock = dtlck;
4116 
4117     /* update freelist */
4118     if (freecnt == 0)
4119         return;
4120     t->next = p->header.freelist;
4121     p->header.freelist = fsi;
4122     p->header.freecnt += freecnt;
4123 }
4124 
4125 
4126 /*
4127  *  dtLinelockFreelist()
4128  */
4129 static void dtLinelockFreelist(dtpage_t * p,    /* directory page */
4130                    int m,   /* max slot index */
4131                    struct dt_lock ** dtlock)
4132 {
4133     int fsi;        /* free entry slot index */
4134     struct dtslot *t;
4135     int si;
4136     struct dt_lock *dtlck = *dtlock;
4137     struct lv *lv;
4138     int xsi, n;
4139 
4140     /* get free entry slot index */
4141     fsi = p->header.freelist;
4142 
4143     /* open new linelock */
4144     if (dtlck->index >= dtlck->maxcnt)
4145         dtlck = (struct dt_lock *) txLinelock(dtlck);
4146     lv = & dtlck->lv[dtlck->index];
4147 
4148     lv->offset = fsi;
4149 
4150     n = 1;
4151     xsi = fsi;
4152 
4153     t = &p->slot[fsi];
4154     si = t->next;
4155 
4156     /* find the last/only segment */
4157     while (si < m && si >= 0) {
4158         /* is next slot contiguous ? */
4159         if (si != xsi + 1) {
4160             /* close current linelock */
4161             lv->length = n;
4162             dtlck->index++;
4163 
4164             /* open new linelock */
4165             if (dtlck->index < dtlck->maxcnt)
4166                 lv++;
4167             else {
4168                 dtlck = (struct dt_lock *) txLinelock(dtlck);
4169                 lv = & dtlck->lv[0];
4170             }
4171 
4172             lv->offset = si;
4173             n = 0;
4174         }
4175 
4176         n++;
4177         xsi = si;
4178 
4179         t = &p->slot[si];
4180         si = t->next;
4181     }
4182 
4183     /* close current linelock */
4184     lv->length = n;
4185     dtlck->index++;
4186 
4187     *dtlock = dtlck;
4188 }
4189 
4190 
4191 /*
4192  * NAME: dtModify
4193  *
4194  * FUNCTION: Modify the inode number part of a directory entry
4195  *
4196  * PARAMETERS:
4197  *  tid - Transaction id
4198  *  ip  - Inode of parent directory
4199  *  key - Name of entry to be modified
4200  *  orig_ino    - Original inode number expected in entry
4201  *  new_ino - New inode number to put into entry
4202  *  flag    - JFS_RENAME
4203  *
4204  * RETURNS:
4205  *  -ESTALE - If entry found does not match orig_ino passed in
4206  *  -ENOENT - If no entry can be found to match key
4207  *  0   - If successfully modified entry
4208  */
4209 int dtModify(tid_t tid, struct inode *ip,
4210      struct component_name * key, ino_t * orig_ino, ino_t new_ino, int flag)
4211 {
4212     int rc;
4213     s64 bn;
4214     struct metapage *mp;
4215     dtpage_t *p;
4216     int index;
4217     struct btstack btstack;
4218     struct tlock *tlck;
4219     struct dt_lock *dtlck;
4220     struct lv *lv;
4221     s8 *stbl;
4222     int entry_si;       /* entry slot index */
4223     struct ldtentry *entry;
4224 
4225     /*
4226      *  search for the entry to modify:
4227      *
4228      * dtSearch() returns (leaf page pinned, index at which to modify).
4229      */
4230     if ((rc = dtSearch(ip, key, orig_ino, &btstack, flag)))
4231         return rc;
4232 
4233     /* retrieve search result */
4234     DT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
4235 
4236     BT_MARK_DIRTY(mp, ip);
4237     /*
4238      * acquire a transaction lock on the leaf page of named entry
4239      */
4240     tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
4241     dtlck = (struct dt_lock *) & tlck->lock;
4242 
4243     /* get slot index of the entry */
4244     stbl = DT_GETSTBL(p);
4245     entry_si = stbl[index];
4246 
4247     /* linelock entry */
4248     ASSERT(dtlck->index == 0);
4249     lv = & dtlck->lv[0];
4250     lv->offset = entry_si;
4251     lv->length = 1;
4252     dtlck->index++;
4253 
4254     /* get the head/only segment */
4255     entry = (struct ldtentry *) & p->slot[entry_si];
4256 
4257     /* substitute the inode number of the entry */
4258     entry->inumber = cpu_to_le32(new_ino);
4259 
4260     /* unpin the leaf page */
4261     DT_PUTPAGE(mp);
4262 
4263     return 0;
4264 }